Index: head/bin/df/df.c
===================================================================
--- head/bin/df/df.c	(revision 122536)
+++ head/bin/df/df.c	(revision 122537)
@@ -1,587 +1,589 @@
 /*
  * Copyright (c) 1980, 1990, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1980, 1990, 1993, 1994\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #ifndef lint
 static char sccsid[] = "@(#)df.c	8.9 (Berkeley) 5/8/95";
 #endif /* not lint */
 #endif
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/stat.h>
 #include <sys/mount.h>
 #include <sys/sysctl.h>
 #include <ufs/ufs/ufsmount.h>
 #include <err.h>
 #include <math.h>
 #include <inttypes.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sysexits.h>
 #include <unistd.h>
 
 #include "extern.h"
 
 #define UNITS_SI 1
 #define UNITS_2 2
 
 #define KILO_SZ(n) (n)
 #define MEGA_SZ(n) ((n) * (n))
 #define GIGA_SZ(n) ((n) * (n) * (n))
 #define TERA_SZ(n) ((n) * (n) * (n) * (n))
 #define PETA_SZ(n) ((n) * (n) * (n) * (n) * (n))
 
 #define KILO_2_SZ (KILO_SZ(1024ULL))
 #define MEGA_2_SZ (MEGA_SZ(1024ULL))
 #define GIGA_2_SZ (GIGA_SZ(1024ULL))
 #define TERA_2_SZ (TERA_SZ(1024ULL))
 #define PETA_2_SZ (PETA_SZ(1024ULL))
 
 #define KILO_SI_SZ (KILO_SZ(1000ULL))
 #define MEGA_SI_SZ (MEGA_SZ(1000ULL))
 #define GIGA_SI_SZ (GIGA_SZ(1000ULL))
 #define TERA_SI_SZ (TERA_SZ(1000ULL))
 #define PETA_SI_SZ (PETA_SZ(1000ULL))
 
 /* Maximum widths of various fields. */
 struct maxwidths {
 	size_t mntfrom;
 	size_t total;
 	size_t used;
 	size_t avail;
 	size_t iused;
 	size_t ifree;
 };
 
 static uintmax_t vals_si [] = {
 	1,
 	KILO_SI_SZ,
 	MEGA_SI_SZ,
 	GIGA_SI_SZ,
 	TERA_SI_SZ,
 	PETA_SI_SZ
 };
 static uintmax_t vals_base2[] = {
 	1,
 	KILO_2_SZ,
 	MEGA_2_SZ,
 	GIGA_2_SZ,
 	TERA_2_SZ,
 	PETA_2_SZ
 };
 static uintmax_t *valp;
 
 typedef enum { NONE, KILO, MEGA, GIGA, TERA, PETA, UNIT_MAX } unit_t;
 
 static unit_t unitp [] = { NONE, KILO, MEGA, GIGA, TERA, PETA };
 
 static char	 *getmntpt(const char *);
-static size_t	  longwidth(long);
+static size_t	  int64width(int64_t);
 static char	 *makenetvfslist(void);
-static void	  prthuman(const struct statfs *, size_t);
+static void	  prthuman(const struct statfs *, int64_t);
 static void	  prthumanval(double);
 static void	  prtstat(struct statfs *, struct maxwidths *);
 static size_t	  regetmntinfo(struct statfs **, long, const char **);
 static unit_t	  unit_adjust(double *);
 static void	  update_maxwidths(struct maxwidths *, const struct statfs *);
 static void	  usage(void);
 
 static __inline u_int
 max(u_int a, u_int b)
 {
 	return (a > b ? a : b);
 }
 
 static int	aflag = 0, hflag, iflag, nflag;
 static struct	ufs_args mdev;
 
 int
 main(int argc, char *argv[])
 {
 	struct stat stbuf;
 	struct statfs statfsbuf, *mntbuf;
 	struct maxwidths maxwidths;
 	const char *fstype;
 	char *mntpath, *mntpt;
 	const char **vfslist;
 	size_t i, mntsize;
 	int ch, rv;
 
 	fstype = "ufs";
 
 	vfslist = NULL;
 	while ((ch = getopt(argc, argv, "abgHhiklmnPt:")) != -1)
 		switch (ch) {
 		case 'a':
 			aflag = 1;
 			break;
 		case 'b':
 				/* FALLTHROUGH */
 		case 'P':
 			putenv("BLOCKSIZE=512");
 			hflag = 0;
 			break;
 		case 'g':
 			putenv("BLOCKSIZE=1g");
 			hflag = 0;
 			break;
 		case 'H':
 			hflag = UNITS_SI;
 			valp = vals_si;
 			break;
 		case 'h':
 			hflag = UNITS_2;
 			valp = vals_base2;
 			break;
 		case 'i':
 			iflag = 1;
 			break;
 		case 'k':
 			putenv("BLOCKSIZE=1k");
 			hflag = 0;
 			break;
 		case 'l':
 			if (vfslist != NULL)
 				errx(1, "-l and -t are mutually exclusive.");
 			vfslist = makevfslist(makenetvfslist());
 			break;
 		case 'm':
 			putenv("BLOCKSIZE=1m");
 			hflag = 0;
 			break;
 		case 'n':
 			nflag = 1;
 			break;
 		case 't':
 			if (vfslist != NULL)
 				errx(1, "only one -t option may be specified");
 			fstype = optarg;
 			vfslist = makevfslist(optarg);
 			break;
 		case '?':
 		default:
 			usage();
 		}
 	argc -= optind;
 	argv += optind;
 
 	mntsize = getmntinfo(&mntbuf, MNT_NOWAIT);
 	bzero(&maxwidths, sizeof(maxwidths));
 	for (i = 0; i < mntsize; i++)
 		update_maxwidths(&maxwidths, &mntbuf[i]);
 
 	rv = 0;
 	if (!*argv) {
 		mntsize = regetmntinfo(&mntbuf, mntsize, vfslist);
 		bzero(&maxwidths, sizeof(maxwidths));
 		for (i = 0; i < mntsize; i++)
 			update_maxwidths(&maxwidths, &mntbuf[i]);
 		for (i = 0; i < mntsize; i++) {
 			if (aflag || (mntbuf[i].f_flags & MNT_IGNORE) == 0)
 				prtstat(&mntbuf[i], &maxwidths);
 		}
 		exit(rv);
 	}
 
 	for (; *argv; argv++) {
 		if (stat(*argv, &stbuf) < 0) {
 			if ((mntpt = getmntpt(*argv)) == 0) {
 				warn("%s", *argv);
 				rv = 1;
 				continue;
 			}
 		} else if (S_ISCHR(stbuf.st_mode)) {
 			if ((mntpt = getmntpt(*argv)) == 0) {
 				mdev.fspec = *argv;
 				mntpath = strdup("/tmp/df.XXXXXX");
 				if (mntpath == NULL) {
 					warn("strdup failed");
 					rv = 1;
 					continue;
 				}
 				mntpt = mkdtemp(mntpath);
 				if (mntpt == NULL) {
 					warn("mkdtemp(\"%s\") failed", mntpath);
 					rv = 1;
 					free(mntpath);
 					continue;
 				}
 				if (mount(fstype, mntpt, MNT_RDONLY,
 				    &mdev) != 0) {
 					warn("%s", *argv);
 					rv = 1;
 					(void)rmdir(mntpt);
 					free(mntpath);
 					continue;
 				} else if (statfs(mntpt, &statfsbuf) == 0) {
 					statfsbuf.f_mntonname[0] = '\0';
 					prtstat(&statfsbuf, &maxwidths);
 				} else {
 					warn("%s", *argv);
 					rv = 1;
 				}
 				(void)unmount(mntpt, 0);
 				(void)rmdir(mntpt);
 				free(mntpath);
 				continue;
 			}
 		} else
 			mntpt = *argv;
 
 		/*
 		 * Statfs does not take a `wait' flag, so we cannot
 		 * implement nflag here.
 		 */
 		if (statfs(mntpt, &statfsbuf) < 0) {
 			warn("%s", mntpt);
 			rv = 1;
 			continue;
 		}
 
 		/*
 		 * Check to make sure the arguments we've been given are
 		 * satisfied.  Return an error if we have been asked to
 		 * list a mount point that does not match the other args
 		 * we've been given (-l, -t, etc.).
 		 */
 		if (checkvfsname(statfsbuf.f_fstypename, vfslist)) {
 			rv = 1;
 			continue;
 		}
 
 		if (argc == 1) {
 			bzero(&maxwidths, sizeof(maxwidths));
 			update_maxwidths(&maxwidths, &statfsbuf);
 		}
 		prtstat(&statfsbuf, &maxwidths);
 	}
 	return (rv);
 }
 
 static char *
 getmntpt(const char *name)
 {
 	size_t mntsize, i;
 	struct statfs *mntbuf;
 
 	mntsize = getmntinfo(&mntbuf, MNT_NOWAIT);
 	for (i = 0; i < mntsize; i++) {
 		if (!strcmp(mntbuf[i].f_mntfromname, name))
 			return (mntbuf[i].f_mntonname);
 	}
 	return (0);
 }
 
 /*
  * Make a pass over the file system info in ``mntbuf'' filtering out
  * file system types not in vfslist and possibly re-stating to get
  * current (not cached) info.  Returns the new count of valid statfs bufs.
  */
 static size_t
 regetmntinfo(struct statfs **mntbufp, long mntsize, const char **vfslist)
 {
 	int i, j;
 	struct statfs *mntbuf;
 
 	if (vfslist == NULL)
 		return (nflag ? mntsize : getmntinfo(mntbufp, MNT_WAIT));
 
 	mntbuf = *mntbufp;
 	for (j = 0, i = 0; i < mntsize; i++) {
 		if (checkvfsname(mntbuf[i].f_fstypename, vfslist))
 			continue;
 		if (!nflag)
 			(void)statfs(mntbuf[i].f_mntonname,&mntbuf[j]);
 		else if (i != j)
 			mntbuf[j] = mntbuf[i];
 		j++;
 	}
 	return (j);
 }
 
 /*
  * Output in "human-readable" format.  Uses 3 digits max and puts
  * unit suffixes at the end.  Makes output compact and easy to read,
  * especially on huge disks.
  *
  */
 static unit_t
 unit_adjust(double *val)
 {
 	double abval;
 	unit_t unit;
 	int unit_sz;
 
 	abval = fabs(*val);
 
 	unit_sz = abval ? ilogb(abval) / 10 : 0;
 
 	if (unit_sz >= (int)UNIT_MAX) {
 		unit = NONE;
 	} else {
 		unit = unitp[unit_sz];
 		*val /= (double)valp[unit_sz];
 	}
 
 	return (unit);
 }
 
 static void
-prthuman(const struct statfs *sfsp, size_t used)
+prthuman(const struct statfs *sfsp, int64_t used)
 {
 
 	prthumanval((double)sfsp->f_blocks * (double)sfsp->f_bsize);
 	prthumanval((double)used * (double)sfsp->f_bsize);
 	prthumanval((double)sfsp->f_bavail * (double)sfsp->f_bsize);
 }
 
 static void
 prthumanval(double bytes)
 {
 
 	unit_t unit;
 	unit = unit_adjust(&bytes);
 
 	if (bytes == 0)
 		(void)printf("     0B");
 	else if (bytes > 10)
 		(void)printf(" %5.0f%c", bytes, "BKMGTPE"[unit]);
 	else
 		(void)printf(" %5.1f%c", bytes, "BKMGTPE"[unit]);
 }
 
 /*
  * Convert statfs returned file system size into BLOCKSIZE units.
  * Attempts to avoid overflow for large file systems.
  */
 #define fsbtoblk(num, fsbs, bs) \
 	(((fsbs) != 0 && (fsbs) < (bs)) ? \
 		(num) / ((bs) / (fsbs)) : (num) * ((fsbs) / (bs)))
 
 /*
  * Print out status about a file system.
  */
 static void
 prtstat(struct statfs *sfsp, struct maxwidths *mwp)
 {
-	static long blocksize;
+	static u_long blocksize;
 	static int headerlen, timesthrough = 0;
 	static const char *header;
-	size_t used, availblks, inodes;
+	int64_t used, availblks, inodes;
 
 	if (++timesthrough == 1) {
 		mwp->mntfrom = max(mwp->mntfrom, strlen("Filesystem"));
 		if (hflag) {
 			header = "  Size";
 			mwp->total = mwp->used = mwp->avail = strlen(header);
 		} else {
 			header = getbsize(&headerlen, &blocksize);
 			mwp->total = max(mwp->total, (u_int)headerlen);
 		}
 		mwp->used = max(mwp->used, strlen("Used"));
 		mwp->avail = max(mwp->avail, strlen("Avail"));
 
 		(void)printf("%-*s %-*s %*s %*s Capacity",
 		    (u_int)mwp->mntfrom, "Filesystem",
 		    (u_int)mwp->total, header,
 		    (u_int)mwp->used, "Used",
 		    (u_int)mwp->avail, "Avail");
 		if (iflag) {
 			mwp->iused = max(mwp->iused, strlen("  iused"));
 			mwp->ifree = max(mwp->ifree, strlen("ifree"));
 			(void)printf(" %*s %*s %%iused",
 			    (u_int)mwp->iused - 2, "iused",
 			    (u_int)mwp->ifree, "ifree");
 		}
 		(void)printf("  Mounted on\n");
 	}
 	(void)printf("%-*s", (u_int)mwp->mntfrom, sfsp->f_mntfromname);
 	used = sfsp->f_blocks - sfsp->f_bfree;
 	availblks = sfsp->f_bavail + used;
 	if (hflag) {
 		prthuman(sfsp, used);
 	} else {
-		(void)printf(" %*ld %*ld %*ld",
-		    (u_int)mwp->total, fsbtoblk(sfsp->f_blocks, sfsp->f_bsize, blocksize),
-		    (u_int)mwp->used, fsbtoblk(used, sfsp->f_bsize, blocksize),
-	            (u_int)mwp->avail, fsbtoblk(sfsp->f_bavail, sfsp->f_bsize,
-		    blocksize));
+		(void)printf(" %*qd %*qd %*qd",
+		  (u_int)mwp->total,
+		  (intmax_t)fsbtoblk(sfsp->f_blocks, sfsp->f_bsize, blocksize),
+		  (u_int)mwp->used,
+		  (intmax_t)fsbtoblk(used, sfsp->f_bsize, blocksize),
+	          (u_int)mwp->avail,
+	          (intmax_t)fsbtoblk(sfsp->f_bavail, sfsp->f_bsize, blocksize));
 	}
 	(void)printf(" %5.0f%%",
 	    availblks == 0 ? 100.0 : (double)used / (double)availblks * 100.0);
 	if (iflag) {
 		inodes = sfsp->f_files;
 		used = inodes - sfsp->f_ffree;
-		(void)printf(" %*lu %*lu %4.0f%% ",
-		    (u_int)mwp->iused, (u_long)used,
-		    (u_int)mwp->ifree, sfsp->f_ffree,
-		    inodes == 0 ? 100.0 : (double)used / (double)inodes * 100.0);
+		(void)printf(" %*qd %*qd %4.0f%% ",
+		   (u_int)mwp->iused, (intmax_t)used,
+		   (u_int)mwp->ifree, (intmax_t)sfsp->f_ffree,
+		   inodes == 0 ? 100.0 : (double)used / (double)inodes * 100.0);
 	} else
 		(void)printf("  ");
 	(void)printf("  %s\n", sfsp->f_mntonname);
 }
 
 /*
  * Update the maximum field-width information in `mwp' based on
  * the file system specified by `sfsp'.
  */
 static void
 update_maxwidths(struct maxwidths *mwp, const struct statfs *sfsp)
 {
-	static long blocksize = 0;
+	static u_long blocksize = 0;
 	int dummy;
 
 	if (blocksize == 0)
 		getbsize(&dummy, &blocksize);
 
 	mwp->mntfrom = max(mwp->mntfrom, strlen(sfsp->f_mntfromname));
-	mwp->total = max(mwp->total, longwidth(fsbtoblk(sfsp->f_blocks,
+	mwp->total = max(mwp->total, int64width(
+	    fsbtoblk((int64_t)sfsp->f_blocks, sfsp->f_bsize, blocksize)));
+	mwp->used = max(mwp->used, int64width(fsbtoblk((int64_t)sfsp->f_blocks -
+	    (int64_t)sfsp->f_bfree, sfsp->f_bsize, blocksize)));
+	mwp->avail = max(mwp->avail, int64width(fsbtoblk(sfsp->f_bavail,
 	    sfsp->f_bsize, blocksize)));
-	mwp->used = max(mwp->used, longwidth(fsbtoblk(sfsp->f_blocks -
-	    sfsp->f_bfree, sfsp->f_bsize, blocksize)));
-	mwp->avail = max(mwp->avail, longwidth(fsbtoblk(sfsp->f_bavail,
-	    sfsp->f_bsize, blocksize)));
-	mwp->iused = max(mwp->iused, longwidth(sfsp->f_files -
+	mwp->iused = max(mwp->iused, int64width((int64_t)sfsp->f_files -
 	    sfsp->f_ffree));
-	mwp->ifree = max(mwp->ifree, longwidth(sfsp->f_ffree));
+	mwp->ifree = max(mwp->ifree, int64width(sfsp->f_ffree));
 }
 
 /* Return the width in characters of the specified long. */
 static size_t
-longwidth(long val)
+int64width(int64_t val)
 {
 	size_t len;
 
 	len = 0;
 	/* Negative or zero values require one extra digit. */
 	if (val <= 0) {
 		val = -val;
 		len++;
 	}
 	while (val > 0) {
 		len++;
 		val /= 10;
 	}
 
 	return (len);
 }
 
 static void
 usage(void)
 {
 
 	(void)fprintf(stderr,
 	    "usage: df [-b | -H | -h | -k | -m | -P] [-ailn] [-t type] [file | filesystem ...]\n");
 	exit(EX_USAGE);
 }
 
 static char *
 makenetvfslist(void)
 {
 	char *str, *strptr, **listptr;
 	struct xvfsconf *xvfsp, *keep_xvfsp;
 	size_t buflen;
 	int cnt, i, maxvfsconf;
 
 	if (sysctlbyname("vfs.conflist", NULL, &buflen, NULL, 0) < 0) {
 		warn("sysctl(vfs.conflist)");
 		return (NULL);
 	}
 	xvfsp = malloc(buflen);
 	if (xvfsp == NULL) {
 		warnx("malloc failed");
 		return (NULL);
 	}
 	keep_xvfsp = xvfsp;
 	if (sysctlbyname("vfs.conflist", xvfsp, &buflen, NULL, 0) < 0) {
 		warn("sysctl(vfs.conflist)");
 		free(keep_xvfsp);
 		return (NULL);
 	}
 	maxvfsconf = buflen / sizeof(struct xvfsconf);
 
 	if ((listptr = malloc(sizeof(char*) * maxvfsconf)) == NULL) {
 		warnx("malloc failed");
 		free(keep_xvfsp);
 		return (NULL);
 	}
 
 	for (cnt = 0, i = 0; i < maxvfsconf; i++) {
 		if (xvfsp->vfc_flags & VFCF_NETWORK) {
 			listptr[cnt++] = strdup(xvfsp->vfc_name);
 			if (listptr[cnt-1] == NULL) {
 				warnx("malloc failed");
 				free(listptr);
 				free(keep_xvfsp);
 				return (NULL);
 			}
 		}
 		xvfsp++;
 	}
 
 	if (cnt == 0 ||
 	    (str = malloc(sizeof(char) * (32 * cnt + cnt + 2))) == NULL) {
 		if (cnt > 0)
 			warnx("malloc failed");
 		free(listptr);
 		free(keep_xvfsp);
 		return (NULL);
 	}
 
 	*str = 'n'; *(str + 1) = 'o';
 	for (i = 0, strptr = str + 2; i < cnt; i++, strptr++) {
 		strncpy(strptr, listptr[i], 32);
 		strptr += strlen(listptr[i]);
 		*strptr = ',';
 		free(listptr[i]);
 	}
 	*(--strptr) = NULL;
 
 	free(keep_xvfsp);
 	free(listptr);
 	return (str);
 }
Index: head/sys/kern/syscalls.master
===================================================================
--- head/sys/kern/syscalls.master	(revision 122536)
+++ head/sys/kern/syscalls.master	(revision 122537)
@@ -1,643 +1,645 @@
  $FreeBSD$
 ;	from: @(#)syscalls.master	8.2 (Berkeley) 1/13/94
 ;
 ; System call name/number master file.
 ; Processed to created init_sysent.c, syscalls.c and syscall.h.
 
 ; Columns: number [M]type nargs namespc name alt{name,tag,rtyp}/comments
 ;	number	system call number, must be in order
 ;	type	one of [M]STD, [M]OBSOL, [M]UNIMPL, [M]COMPAT, [M]CPT_NOA,
 ;		[M]LIBCOMPAT, [M]NODEF,  [M]NOARGS,  [M]NOPROTO,  [M]NOIMPL,
 ;		[M]NOSTD, [M]COMPAT4
 ;	namespc one of POSIX, BSD, NOHIDE
 ;	name	psuedo-prototype of syscall routine
 ;		If one of the following alts is different, then all appear:
 ;	altname	name of system call if different
 ;	alttag	name of args struct tag if different from [o]`name'"_args"
 ;	altrtyp	return type if not int (bogus - syscalls always return int)
 ;		for UNIMPL/OBSOL, name continues with comments
 
 ; types:
 ;	[M]	e.g. like MSTD -- means the system call is MP-safe.  If no
 ;		M prefix is used, the syscall wrapper will obtain the Giant
 ;		lock for the syscall.
 ;	STD	always included
 ;	COMPAT	included on COMPAT #ifdef
 ;	COMPAT4	included on COMPAT4 #ifdef (FreeBSD 4 compat)
 ;	LIBCOMPAT included on COMPAT #ifdef, and placed in syscall.h
 ;	OBSOL	obsolete, not included in system, only specifies name
 ;	UNIMPL	not implemented, placeholder only
 ;	NOSTD	implemented but as a lkm that can be statically
 ;			compiled in sysent entry will be filled with lkmsys
 ;			so the SYSCALL_MODULE macro works
 ;
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/ia64/ia32/syscalls.master  (take a best guess)
 ; [other 64 bit platforms with an alternate 32 bit syscall table go here too]
 ; #ifdef's, etc. may be included, and are copied to the output files.
 
 #include <sys/param.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 
 ; Reserved/unimplemented system calls in the range 0-150 inclusive
 ; are reserved for use in future Berkeley releases.
 ; Additional system calls implemented in vendor and other
 ; redistributions should be placed in the reserved range at the end
 ; of the current calls.
 
 0	STD	NOHIDE	{ int nosys(void); } syscall nosys_args int
 1	MSTD	NOHIDE	{ void sys_exit(int rval); } exit sys_exit_args void
 2	MSTD	POSIX	{ int fork(void); }
 3	MSTD	POSIX	{ ssize_t read(int fd, void *buf, size_t nbyte); }
 4	MSTD	POSIX	{ ssize_t write(int fd, const void *buf, size_t nbyte); }
 5	STD	POSIX	{ int open(char *path, int flags, int mode); }
 ; XXX should be		{ int open(const char *path, int flags, ...); }
 ; but we're not ready for `const' or varargs.
 ; XXX man page says `mode_t mode'.
 6	MSTD	POSIX	{ int close(int fd); }
 7	MSTD	BSD	{ int wait4(int pid, int *status, int options, \
 			    struct rusage *rusage); } wait4 wait_args int
 8	COMPAT	BSD	{ int creat(char *path, int mode); }
 9	STD	POSIX	{ int link(char *path, char *link); }
 10	STD	POSIX	{ int unlink(char *path); }
 11	OBSOL	NOHIDE	execv
 12	STD	POSIX	{ int chdir(char *path); }
 13	STD	BSD	{ int fchdir(int fd); }
 14	STD	POSIX	{ int mknod(char *path, int mode, int dev); }
 15	STD	POSIX	{ int chmod(char *path, int mode); }
 16	STD	POSIX	{ int chown(char *path, int uid, int gid); }
 17	MSTD	BSD	{ int obreak(char *nsize); } break obreak_args int
-18	STD	BSD	{ int getfsstat(struct statfs *buf, long bufsize, \
+18	COMPAT4	BSD	{ int getfsstat(struct ostatfs *buf, long bufsize, \
 			    int flags); }
 19	COMPAT	POSIX	{ long lseek(int fd, long offset, int whence); }
 20	MSTD	POSIX	{ pid_t getpid(void); }
 21	STD	BSD	{ int mount(char *type, char *path, int flags, \
 			    caddr_t data); }
 ; XXX `path' should have type `const char *' but we're not ready for that.
 22	STD	BSD	{ int unmount(char *path, int flags); }
 23	MSTD	POSIX	{ int setuid(uid_t uid); }
 24	MSTD	POSIX	{ uid_t getuid(void); }
 25	MSTD	POSIX	{ uid_t geteuid(void); }
 26	MSTD	BSD	{ int ptrace(int req, pid_t pid, caddr_t addr, \
 			    int data); }
 27	MSTD	BSD	{ int recvmsg(int s, struct msghdr *msg, int flags); }
 28	MSTD	BSD	{ int sendmsg(int s, caddr_t msg, int flags); }
 29	MSTD	BSD	{ int recvfrom(int s, caddr_t buf, size_t len, \
 			    int flags, caddr_t from, int *fromlenaddr); }
 30	MSTD	BSD	{ int accept(int s, caddr_t name, int *anamelen); }
 31	MSTD	BSD	{ int getpeername(int fdes, caddr_t asa, int *alen); }
 32	MSTD	BSD	{ int getsockname(int fdes, caddr_t asa, int *alen); }
 33	STD	POSIX	{ int access(char *path, int flags); }
 34	STD	BSD	{ int chflags(char *path, int flags); }
 35	STD	BSD	{ int fchflags(int fd, int flags); }
 36	STD	BSD	{ int sync(void); }
 37	MSTD	POSIX	{ int kill(int pid, int signum); }
 38	COMPAT	POSIX	{ int stat(char *path, struct ostat *ub); }
 39	MSTD	POSIX	{ pid_t getppid(void); }
 40	COMPAT	POSIX	{ int lstat(char *path, struct ostat *ub); }
 41	MSTD	POSIX	{ int dup(u_int fd); }
 42	MSTD	POSIX	{ int pipe(void); }
 43	MSTD	POSIX	{ gid_t getegid(void); }
 44	MSTD	BSD	{ int profil(caddr_t samples, size_t size, \
 			    size_t offset, u_int scale); }
 45	MSTD	BSD	{ int ktrace(const char *fname, int ops, int facs, \
 			    int pid); }
 46	MCOMPAT	POSIX	{ int sigaction(int signum, struct osigaction *nsa, \
 			    struct osigaction *osa); }
 47	MSTD	POSIX	{ gid_t getgid(void); }
 48	MCOMPAT	POSIX	{ int sigprocmask(int how, osigset_t mask); }
 ; XXX note nonstandard (bogus) calling convention - the libc stub passes
 ; us the mask, not a pointer to it, and we return the old mask as the
 ; (int) return value.
 49	MSTD	BSD	{ int getlogin(char *namebuf, u_int namelen); }
 50	MSTD	BSD	{ int setlogin(char *namebuf); }
 51	MSTD	BSD	{ int acct(char *path); }
 52	MCOMPAT	POSIX	{ int sigpending(void); }
 53	MSTD	BSD	{ int sigaltstack(stack_t *ss, stack_t *oss); }
 54	MSTD	POSIX	{ int ioctl(int fd, u_long com, caddr_t data); }
 55	MSTD	BSD	{ int reboot(int opt); }
 56	STD	POSIX	{ int revoke(char *path); }
 57	STD	POSIX	{ int symlink(char *path, char *link); }
 58	STD	POSIX	{ int readlink(char *path, char *buf, int count); }
 59	MSTD	POSIX	{ int execve(char *fname, char **argv, char **envv); }
 60	MSTD	POSIX	{ int umask(int newmask); } umask umask_args int
 61	STD	BSD	{ int chroot(char *path); }
 62	MCOMPAT	POSIX	{ int fstat(int fd, struct ostat *sb); }
 63	MCOMPAT	BSD	{ int getkerninfo(int op, char *where, size_t *size, \
 			    int arg); } getkerninfo getkerninfo_args int
 64	MCOMPAT	BSD	{ int getpagesize(void); } \
 			    getpagesize getpagesize_args int
 65	MSTD	BSD	{ int msync(void *addr, size_t len, int flags); }
 66	MSTD	BSD	{ int vfork(void); }
 67	OBSOL	NOHIDE	vread
 68	OBSOL	NOHIDE	vwrite
 69	MSTD	BSD	{ int sbrk(int incr); }
 70	MSTD	BSD	{ int sstk(int incr); }
 71	MCOMPAT	BSD	{ int mmap(void *addr, int len, int prot, \
 			    int flags, int fd, long pos); }
 72	MSTD	BSD	{ int ovadvise(int anom); } vadvise ovadvise_args int
 73	MSTD	BSD	{ int munmap(void *addr, size_t len); }
 74	MSTD	BSD	{ int mprotect(const void *addr, size_t len, int prot); }
 75	MSTD	BSD	{ int madvise(void *addr, size_t len, int behav); }
 76	OBSOL	NOHIDE	vhangup
 77	OBSOL	NOHIDE	vlimit
 78	MSTD	BSD	{ int mincore(const void *addr, size_t len, \
 			    char *vec); }
 79	MSTD	POSIX	{ int getgroups(u_int gidsetsize, gid_t *gidset); }
 80	MSTD	POSIX	{ int setgroups(u_int gidsetsize, gid_t *gidset); }
 81	MSTD	POSIX	{ int getpgrp(void); }
 82	MSTD	POSIX	{ int setpgid(int pid, int pgid); }
 83	MSTD	BSD	{ int setitimer(u_int which, struct itimerval *itv, \
 			    struct itimerval *oitv); }
 84	MCOMPAT	BSD	{ int wait(void); }
 85	MSTD	BSD	{ int swapon(char *name); }
 86	MSTD	BSD	{ int getitimer(u_int which, struct itimerval *itv); }
 87	MCOMPAT	BSD	{ int gethostname(char *hostname, u_int len); } \
 			    gethostname gethostname_args int
 88	MCOMPAT	BSD	{ int sethostname(char *hostname, u_int len); } \
 			    sethostname sethostname_args int
 89	MSTD	BSD	{ int getdtablesize(void); }
 90	MSTD	POSIX	{ int dup2(u_int from, u_int to); }
 91	UNIMPL	BSD	getdopt
 92	MSTD	POSIX	{ int fcntl(int fd, int cmd, long arg); }
 ; XXX should be		{ int fcntl(int fd, int cmd, ...); }
 ; but we're not ready for varargs.
 93	MSTD	BSD	{ int select(int nd, fd_set *in, fd_set *ou, \
 			    fd_set *ex, struct timeval *tv); }
 94	UNIMPL	BSD	setdopt
 95	STD	POSIX	{ int fsync(int fd); }
 96	MSTD	BSD	{ int setpriority(int which, int who, int prio); }
 97	MSTD	BSD	{ int socket(int domain, int type, int protocol); }
 98	MSTD	BSD	{ int connect(int s, caddr_t name, int namelen); }
 99	MCPT_NOA BSD	{ int accept(int s, caddr_t name, int *anamelen); } \
 			    accept accept_args int
 100	MSTD	BSD	{ int getpriority(int which, int who); }
 101	MCOMPAT	BSD	{ int send(int s, caddr_t buf, int len, int flags); }
 102	MCOMPAT	BSD	{ int recv(int s, caddr_t buf, int len, int flags); }
 103	MCOMPAT	BSD	{ int sigreturn(struct osigcontext *sigcntxp); }
 104	MSTD	BSD	{ int bind(int s, caddr_t name, int namelen); }
 105	MSTD	BSD	{ int setsockopt(int s, int level, int name, \
 			    caddr_t val, int valsize); }
 106	MSTD	BSD	{ int listen(int s, int backlog); }
 107	OBSOL	NOHIDE	vtimes
 108	MCOMPAT	BSD	{ int sigvec(int signum, struct sigvec *nsv, \
 			    struct sigvec *osv); }
 109	MCOMPAT	BSD	{ int sigblock(int mask); }
 110	MCOMPAT	BSD	{ int sigsetmask(int mask); }
 111	MCOMPAT	POSIX	{ int sigsuspend(osigset_t mask); }
 ; XXX note nonstandard (bogus) calling convention - the libc stub passes
 ; us the mask, not a pointer to it.
 112	MCOMPAT	BSD	{ int sigstack(struct sigstack *nss, \
 			    struct sigstack *oss); }
 113	MCOMPAT	BSD	{ int recvmsg(int s, struct omsghdr *msg, int flags); }
 114	MCOMPAT	BSD	{ int sendmsg(int s, caddr_t msg, int flags); }
 115	OBSOL	NOHIDE	vtrace
 116	MSTD	BSD	{ int gettimeofday(struct timeval *tp, \
 			    struct timezone *tzp); }
 117	MSTD	BSD	{ int getrusage(int who, struct rusage *rusage); }
 118	MSTD	BSD	{ int getsockopt(int s, int level, int name, \
 			    caddr_t val, int *avalsize); }
 119	UNIMPL	NOHIDE	resuba (BSD/OS 2.x)
 120	MSTD	BSD	{ int readv(int fd, struct iovec *iovp, u_int iovcnt); }
 121	MSTD	BSD	{ int writev(int fd, struct iovec *iovp, \
 			    u_int iovcnt); }
 122	MSTD	BSD	{ int settimeofday(struct timeval *tv, \
 			    struct timezone *tzp); }
 123	STD	BSD	{ int fchown(int fd, int uid, int gid); }
 124	STD	BSD	{ int fchmod(int fd, int mode); }
 125	MCPT_NOA BSD	{ int recvfrom(int s, caddr_t buf, size_t len, \
 			    int flags, caddr_t from, int *fromlenaddr); } \
 			    recvfrom recvfrom_args int
 126	MSTD	BSD	{ int setreuid(int ruid, int euid); }
 127	MSTD	BSD	{ int setregid(int rgid, int egid); }
 128	STD	POSIX	{ int rename(char *from, char *to); }
 129	COMPAT	BSD	{ int truncate(char *path, long length); }
 130	COMPAT	BSD	{ int ftruncate(int fd, long length); }
 131	MSTD	BSD	{ int flock(int fd, int how); }
 132	STD	POSIX	{ int mkfifo(char *path, int mode); }
 133	MSTD	BSD	{ int sendto(int s, caddr_t buf, size_t len, \
 			    int flags, caddr_t to, int tolen); }
 134	MSTD	BSD	{ int shutdown(int s, int how); }
 135	MSTD	BSD	{ int socketpair(int domain, int type, int protocol, \
 			    int *rsv); }
 136	STD	POSIX	{ int mkdir(char *path, int mode); }
 137	STD	POSIX	{ int rmdir(char *path); }
 138	STD	BSD	{ int utimes(char *path, struct timeval *tptr); }
 139	OBSOL	NOHIDE	4.2 sigreturn
 140	MSTD	BSD	{ int adjtime(struct timeval *delta, \
 			    struct timeval *olddelta); }
 141	MCOMPAT	BSD	{ int getpeername(int fdes, caddr_t asa, int *alen); }
 142	MCOMPAT	BSD	{ long gethostid(void); }
 143	MCOMPAT	BSD	{ int sethostid(long hostid); }
 144	MCOMPAT	BSD	{ int getrlimit(u_int which, struct orlimit *rlp); }
 145	MCOMPAT	BSD	{ int setrlimit(u_int which, struct orlimit *rlp); }
 146	MCOMPAT	BSD	{ int killpg(int pgid, int signum); }
 147	MSTD	POSIX	{ int setsid(void); }
 148	STD	BSD	{ int quotactl(char *path, int cmd, int uid, \
 			    caddr_t arg); }
 149	MCOMPAT	BSD	{ int quota(void); }
 150	MCPT_NOA BSD	{ int getsockname(int fdec, caddr_t asa, int *alen); }\
 			    getsockname getsockname_args int
 
 ; Syscalls 151-180 inclusive are reserved for vendor-specific
 ; system calls.  (This includes various calls added for compatibity
 ; with other Unix variants.)
 ; Some of these calls are now supported by BSD...
 151	UNIMPL	NOHIDE	sem_lock (BSD/OS 2.x)
 152	UNIMPL	NOHIDE	sem_wakeup (BSD/OS 2.x)
 153	UNIMPL	NOHIDE	asyncdaemon (BSD/OS 2.x)
 154	UNIMPL	NOHIDE	nosys
 ; 155 is initialized by the NFS code, if present.
 155	MNOIMPL	BSD	{ int nfssvc(int flag, caddr_t argp); }
 156	COMPAT	BSD	{ int getdirentries(int fd, char *buf, u_int count, \
 			    long *basep); }
-157	STD	BSD	{ int statfs(char *path, struct statfs *buf); }
-158	STD	BSD	{ int fstatfs(int fd, struct statfs *buf); }
+157	COMPAT4	BSD	{ int statfs(char *path, struct ostatfs *buf); }
+158	COMPAT4	BSD	{ int fstatfs(int fd, struct ostatfs *buf); }
 159	UNIMPL	NOHIDE	nosys
 160	UNIMPL	NOHIDE	nosys
 161	STD	BSD	{ int getfh(char *fname, struct fhandle *fhp); }
 162	MSTD	BSD	{ int getdomainname(char *domainname, int len); }
 163	MSTD	BSD	{ int setdomainname(char *domainname, int len); }
 164	MSTD	BSD	{ int uname(struct utsname *name); }
 165	MSTD	BSD	{ int sysarch(int op, char *parms); }
 166	MSTD	BSD	{ int rtprio(int function, pid_t pid, \
 			    struct rtprio *rtp); }
 167	UNIMPL	NOHIDE	nosys
 168	UNIMPL	NOHIDE	nosys
 ; 169 is initialized by the SYSVSEM code if present or loaded
 169	MNOSTD	BSD	{ int semsys(int which, int a2, int a3, int a4, \
 			    int a5); }
 ; 169 is initialized by the SYSVMSG code if present or loaded
 ; XXX should be		{ int semsys(int which, ...); }
 170	MNOSTD	BSD	{ int msgsys(int which, int a2, int a3, int a4, \
 			    int a5, int a6); }
 ; 169 is initialized by the SYSVSHM code if present or loaded
 ; XXX should be		{ int msgsys(int which, ...); }
 171	MNOSTD	BSD	{ int shmsys(int which, int a2, int a3, int a4); }
 ; XXX should be		{ int shmsys(int which, ...); }
 172	UNIMPL	NOHIDE	nosys
 173	MSTD	POSIX	{ ssize_t pread(int fd, void *buf, size_t nbyte, \
 			    int pad, off_t offset); }
 174	MSTD	POSIX	{ ssize_t pwrite(int fd, const void *buf, \
 			    size_t nbyte, int pad, off_t offset); }
 175	UNIMPL	NOHIDE	nosys
 176	MSTD	BSD	{ int ntp_adjtime(struct timex *tp); }
 177	UNIMPL	NOHIDE	sfork (BSD/OS 2.x)
 178	UNIMPL	NOHIDE	getdescriptor (BSD/OS 2.x)
 179	UNIMPL	NOHIDE	setdescriptor (BSD/OS 2.x)
 180	UNIMPL	NOHIDE	nosys
 
 ; Syscalls 181-199 are used by/reserved for BSD
 181	MSTD	POSIX	{ int setgid(gid_t gid); }
 182	MSTD	BSD	{ int setegid(gid_t egid); }
 183	MSTD	BSD	{ int seteuid(uid_t euid); }
 184	UNIMPL	BSD	lfs_bmapv
 185	UNIMPL	BSD	lfs_markv
 186	UNIMPL	BSD	lfs_segclean
 187	UNIMPL	BSD	lfs_segwait
 188	STD	POSIX	{ int stat(char *path, struct stat *ub); }
 189	MSTD	POSIX	{ int fstat(int fd, struct stat *sb); }
 190	STD	POSIX	{ int lstat(char *path, struct stat *ub); }
 191	STD	POSIX	{ int pathconf(char *path, int name); }
 192	MSTD	POSIX	{ int fpathconf(int fd, int name); }
 193	UNIMPL	NOHIDE	nosys
 194	MSTD	BSD	{ int getrlimit(u_int which, \
 			    struct rlimit *rlp); } \
 			    getrlimit __getrlimit_args int
 195	MSTD	BSD	{ int setrlimit(u_int which, \
 			    struct rlimit *rlp); } \
 			    setrlimit __setrlimit_args int
 196	STD	BSD	{ int getdirentries(int fd, char *buf, u_int count, \
 			    long *basep); }
 197	MSTD	BSD	{ caddr_t mmap(caddr_t addr, size_t len, int prot, \
 			    int flags, int fd, int pad, off_t pos); }
 198	STD	NOHIDE	{ int nosys(void); } __syscall __syscall_args int
 199	STD	POSIX	{ off_t lseek(int fd, int pad, off_t offset, \
 			    int whence); }
 200	STD	BSD	{ int truncate(char *path, int pad, off_t length); }
 201	STD	BSD	{ int ftruncate(int fd, int pad, off_t length); }
 202	MSTD	BSD	{ int __sysctl(int *name, u_int namelen, void *old, \
 			    size_t *oldlenp, void *new, size_t newlen); } \
 			    __sysctl sysctl_args int
 ; properly, __sysctl should be a NOHIDE, but making an exception
 ; here allows to avoid one in libc/sys/Makefile.inc.
 203	MSTD	BSD	{ int mlock(const void *addr, size_t len); }
 204	MSTD	BSD	{ int munlock(const void *addr, size_t len); }
 205	STD	BSD	{ int undelete(char *path); }
 206	STD	BSD	{ int futimes(int fd, struct timeval *tptr); }
 207	MSTD	BSD	{ int getpgid(pid_t pid); }
 208	UNIMPL	NOHIDE	newreboot (NetBSD)
 209	MSTD	BSD	{ int poll(struct pollfd *fds, u_int nfds, \
 			    int timeout); }
 
 ;
 ; The following are reserved for loadable syscalls
 ;
 210	NODEF	NOHIDE	lkmnosys lkmnosys nosys_args int
 211	NODEF	NOHIDE	lkmnosys lkmnosys nosys_args int
 212	NODEF	NOHIDE	lkmnosys lkmnosys nosys_args int
 213	NODEF	NOHIDE	lkmnosys lkmnosys nosys_args int
 214	NODEF	NOHIDE	lkmnosys lkmnosys nosys_args int
 215	NODEF	NOHIDE	lkmnosys lkmnosys nosys_args int
 216	NODEF	NOHIDE	lkmnosys lkmnosys nosys_args int
 217	NODEF	NOHIDE	lkmnosys lkmnosys nosys_args int
 218	NODEF	NOHIDE	lkmnosys lkmnosys nosys_args int
 219	NODEF	NOHIDE	lkmnosys lkmnosys nosys_args int
 
 ;
 ; The following were introduced with NetBSD/4.4Lite-2
 ; They are initialized by thier respective modules/sysinits
 220	MNOSTD	BSD	{ int __semctl(int semid, int semnum, int cmd, \
 			    union semun *arg); }
 221	MNOSTD	BSD	{ int semget(key_t key, int nsems, int semflg); }
 222	MNOSTD	BSD	{ int semop(int semid, struct sembuf *sops, \
 			    size_t nsops); }
 223	UNIMPL	NOHIDE	semconfig
 224	MNOSTD	BSD	{ int msgctl(int msqid, int cmd, \
 			    struct msqid_ds *buf); }
 225	MNOSTD	BSD	{ int msgget(key_t key, int msgflg); }
 226	MNOSTD	BSD	{ int msgsnd(int msqid, const void *msgp, size_t msgsz, \
 			    int msgflg); }
 227	MNOSTD	BSD	{ int msgrcv(int msqid, void *msgp, size_t msgsz, \
 			    long msgtyp, int msgflg); }
 228	MNOSTD	BSD	{ int shmat(int shmid, const void *shmaddr, int shmflg); }
 229	MNOSTD	BSD	{ int shmctl(int shmid, int cmd, \
 			    struct shmid_ds *buf); }
 230	MNOSTD	BSD	{ int shmdt(const void *shmaddr); }
 231	MNOSTD	BSD	{ int shmget(key_t key, size_t size, int shmflg); }
 ;
 232	MSTD	POSIX	{ int clock_gettime(clockid_t clock_id, \
 			    struct timespec *tp); }
 233	MSTD	POSIX	{ int clock_settime(clockid_t clock_id, \
 			    const struct timespec *tp); }
 234	MSTD	POSIX	{ int clock_getres(clockid_t clock_id, \
 			    struct timespec *tp); }
 235	UNIMPL	NOHIDE	timer_create
 236	UNIMPL	NOHIDE	timer_delete
 237	UNIMPL	NOHIDE	timer_settime
 238	UNIMPL	NOHIDE	timer_gettime
 239	UNIMPL	NOHIDE	timer_getoverrun
 240	MSTD	POSIX	{ int nanosleep(const struct timespec *rqtp, \
 			    struct timespec *rmtp); }
 241	UNIMPL	NOHIDE	nosys
 242	UNIMPL	NOHIDE	nosys
 243	UNIMPL	NOHIDE	nosys
 244	UNIMPL	NOHIDE	nosys
 245	UNIMPL	NOHIDE	nosys
 246	UNIMPL	NOHIDE	nosys
 247	UNIMPL	NOHIDE	nosys
 248	UNIMPL	NOHIDE	nosys
 249	UNIMPL	NOHIDE	nosys
 ; syscall numbers initially used in OpenBSD
 250	MSTD	BSD	{ int minherit(void *addr, size_t len, int inherit); }
 251	MSTD	BSD	{ int rfork(int flags); }
 252	MSTD	BSD	{ int openbsd_poll(struct pollfd *fds, u_int nfds, \
 			    int timeout); }
 253	MSTD	BSD	{ int issetugid(void); }
 254	STD	BSD	{ int lchown(char *path, int uid, int gid); }
 255	UNIMPL	NOHIDE	nosys
 256	UNIMPL	NOHIDE	nosys
 257	UNIMPL	NOHIDE	nosys
 258	UNIMPL	NOHIDE	nosys
 259	UNIMPL	NOHIDE	nosys
 260	UNIMPL	NOHIDE	nosys
 261	UNIMPL	NOHIDE	nosys
 262	UNIMPL	NOHIDE	nosys
 263	UNIMPL	NOHIDE	nosys
 264	UNIMPL	NOHIDE	nosys
 265	UNIMPL	NOHIDE	nosys
 266	UNIMPL	NOHIDE	nosys
 267	UNIMPL	NOHIDE	nosys
 268	UNIMPL	NOHIDE	nosys
 269	UNIMPL	NOHIDE	nosys
 270	UNIMPL	NOHIDE	nosys
 271	UNIMPL	NOHIDE	nosys
 272	STD	BSD	{ int getdents(int fd, char *buf, size_t count); }
 273	UNIMPL	NOHIDE	nosys
 274	STD	BSD	{ int lchmod(char *path, mode_t mode); }
 275	NOPROTO BSD	{ int lchown(char *path, uid_t uid, gid_t gid); } netbsd_lchown lchown_args int
 276	STD	BSD	{ int lutimes(char *path, struct timeval *tptr); }
 277	MNOPROTO BSD	{ int msync(void *addr, size_t len, int flags); } netbsd_msync msync_args int
 278	STD	BSD	{ int nstat(char *path, struct nstat *ub); }
 279	MSTD	BSD	{ int nfstat(int fd, struct nstat *sb); }
 280	STD	BSD	{ int nlstat(char *path, struct nstat *ub); }
 281	UNIMPL	NOHIDE	nosys
 282	UNIMPL	NOHIDE	nosys
 283	UNIMPL	NOHIDE	nosys
 284	UNIMPL	NOHIDE	nosys
 285	UNIMPL	NOHIDE	nosys
 286	UNIMPL	NOHIDE	nosys
 287	UNIMPL	NOHIDE	nosys
 288	UNIMPL	NOHIDE	nosys
 289	UNIMPL	NOHIDE	nosys
 290	UNIMPL	NOHIDE	nosys
 291	UNIMPL	NOHIDE	nosys
 292	UNIMPL	NOHIDE	nosys
 293	UNIMPL	NOHIDE	nosys
 294	UNIMPL	NOHIDE	nosys
 295	UNIMPL	NOHIDE	nosys
 296	UNIMPL	NOHIDE	nosys
 ; XXX 297 is 300 in NetBSD 
-297	STD	BSD	{ int fhstatfs(const struct fhandle *u_fhp, struct statfs *buf); }
+297	COMPAT4	BSD	{ int fhstatfs(const struct fhandle *u_fhp, struct ostatfs *buf); }
 298	STD	BSD	{ int fhopen(const struct fhandle *u_fhp, int flags); }
 299	STD	BSD 	{ int fhstat(const struct fhandle *u_fhp, struct stat *sb); }
 ; syscall numbers for FreeBSD
 300	MSTD	BSD	{ int modnext(int modid); }
 301	MSTD	BSD	{ int modstat(int modid, struct module_stat* stat); }
 302	MSTD	BSD	{ int modfnext(int modid); }
 303	MSTD	BSD	{ int modfind(const char *name); }
 304	MSTD	BSD	{ int kldload(const char *file); }
 305	MSTD	BSD	{ int kldunload(int fileid); }
 306	MSTD	BSD	{ int kldfind(const char *file); }
 307	MSTD	BSD	{ int kldnext(int fileid); }
 308	MSTD	BSD	{ int kldstat(int fileid, struct kld_file_stat* stat); }
 309	MSTD	BSD	{ int kldfirstmod(int fileid); }
 310	MSTD	BSD	{ int getsid(pid_t pid); }
 311	MSTD	BSD	{ int setresuid(uid_t ruid, uid_t euid, uid_t suid); }
 312	MSTD	BSD	{ int setresgid(gid_t rgid, gid_t egid, gid_t sgid); }
 313	OBSOL	NOHIDE	signanosleep
 314     NOSTD   BSD     { int aio_return(struct aiocb *aiocbp); }
 315     NOSTD   BSD     { int aio_suspend(struct aiocb * const * aiocbp, int nent, const struct timespec *timeout); }
 316     NOSTD   BSD     { int aio_cancel(int fd, struct aiocb *aiocbp); }
 317     NOSTD   BSD     { int aio_error(struct aiocb *aiocbp); }
 318     NOSTD   BSD     { int aio_read(struct aiocb *aiocbp); }
 319     NOSTD   BSD     { int aio_write(struct aiocb *aiocbp); }
 320     NOSTD   BSD     { int lio_listio(int mode, struct aiocb * const *acb_list, int nent, struct sigevent *sig); }
 321     MSTD	BSD     { int yield(void); }
 322	OBSOL	NOHIDE	thr_sleep
 323	OBSOL	NOHIDE	thr_wakeup
 324     MSTD	BSD     { int mlockall(int how); }
 325     MSTD	BSD     { int munlockall(void); }
 326     STD     BSD     { int __getcwd(u_char *buf, u_int buflen); }
 
 327     MSTD     POSIX   { int sched_setparam (pid_t pid, const struct sched_param *param); }
 328     MSTD     POSIX   { int sched_getparam (pid_t pid, struct sched_param *param); }
 
 329     MSTD     POSIX   { int sched_setscheduler (pid_t pid, int policy, const struct sched_param *param); }
 330     MSTD     POSIX   { int sched_getscheduler (pid_t pid); }
 
 331     MSTD     POSIX   { int sched_yield (void); }
 332     MSTD     POSIX   { int sched_get_priority_max (int policy); }
 333     MSTD     POSIX   { int sched_get_priority_min (int policy); }
 334     MSTD     POSIX   { int sched_rr_get_interval (pid_t pid, struct timespec *interval); }
 335	MSTD	BSD	{ int utrace(const void *addr, size_t len); }
 336	MCOMPAT4	BSD	{ int sendfile(int fd, int s, off_t offset, size_t nbytes, \
 				struct sf_hdtr *hdtr, off_t *sbytes, int flags); }
 337	STD	BSD	{ int kldsym(int fileid, int cmd, void *data); }
 338	MSTD	BSD	{ int jail(struct jail *jail); }
 339	UNIMPL	BSD	pioctl
 340	MSTD	POSIX	{ int sigprocmask(int how, const sigset_t *set, \
 			    sigset_t *oset); }
 341	MSTD	POSIX	{ int sigsuspend(const sigset_t *sigmask); }
 342	MCOMPAT4 POSIX	{ int sigaction(int sig, const struct sigaction *act, \
 			    struct sigaction *oact); }
 343	MSTD	POSIX	{ int sigpending(sigset_t *set); }
 344	MCOMPAT4 BSD	{ int sigreturn(const struct ucontext4 *sigcntxp); }
 345	MSTD	NOHIDE	{ int sigtimedwait(const sigset_t *set, \
 			    siginfo_t *info, const struct timespec *timeout); }
 346	MSTD	NOHIDE	{ int sigwaitinfo(const sigset_t *set, \
 			    siginfo_t *info); }
 347	MSTD	BSD	{ int __acl_get_file(const char *path, \
 			    acl_type_t type, struct acl *aclp); }
 348	MSTD	BSD	{ int __acl_set_file(const char *path, \
 			    acl_type_t type, struct acl *aclp); }
 349	MSTD	BSD	{ int __acl_get_fd(int filedes, acl_type_t type, \
 			    struct acl *aclp); }
 350	MSTD	BSD	{ int __acl_set_fd(int filedes, acl_type_t type, \
 			    struct acl *aclp); }
 351	MSTD	BSD	{ int __acl_delete_file(const char *path, \
 			    acl_type_t type); }
 352	MSTD	BSD	{ int __acl_delete_fd(int filedes, acl_type_t type); }
 353	MSTD	BSD	{ int __acl_aclcheck_file(const char *path, \
 			    acl_type_t type, struct acl *aclp); }
 354	MSTD	BSD	{ int __acl_aclcheck_fd(int filedes, acl_type_t type, \
 			    struct acl *aclp); }
 355	STD	BSD	{ int extattrctl(const char *path, int cmd, \
 			    const char *filename, int attrnamespace, \
 			    const char *attrname); }
 356	STD	BSD	{ int extattr_set_file(const char *path, \
 			    int attrnamespace, const char *attrname, \
 			    void *data, size_t nbytes); }
 357	STD	BSD	{ ssize_t extattr_get_file(const char *path, \
 			    int attrnamespace, const char *attrname, \
 			    void *data, size_t nbytes); }
 358	STD	BSD	{ int extattr_delete_file(const char *path, \
 			    int attrnamespace, const char *attrname); }
 359	NOSTD	BSD	{ int aio_waitcomplete(struct aiocb **aiocbp, struct timespec *timeout); }
 360	MSTD	BSD	{ int getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); }
 361	MSTD	BSD	{ int getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); }
 362	MSTD	BSD	{ int kqueue(void); }
 363	MSTD	BSD	{ int kevent(int fd, \
 			    const struct kevent *changelist, int nchanges, \
 			    struct kevent *eventlist, int nevents, \
 			    const struct timespec *timeout); }
 364	UNIMPL	BSD	__cap_get_proc
 365	UNIMPL	BSD	__cap_set_proc
 366	UNIMPL	BSD	__cap_get_fd
 367	UNIMPL	BSD	__cap_get_file
 368	UNIMPL	BSD	__cap_set_fd
 369	UNIMPL	BSD	__cap_set_file
 370	NODEF	NOHIDE	lkmressys lkmressys nosys_args int
 371	STD	BSD	{ int extattr_set_fd(int fd, int attrnamespace, \
 			    const char *attrname, void *data, \
 			    size_t nbytes); }
 372	STD	BSD	{ ssize_t extattr_get_fd(int fd, int attrnamespace, \
 			    const char *attrname, void *data, size_t nbytes); }
 373	STD	BSD	{ int extattr_delete_fd(int fd, int attrnamespace, \
 			    const char *attrname); }
 374	MSTD	BSD	{ int __setugid(int flag); }
 375	NOIMPL	BSD	{ int nfsclnt(int flag, caddr_t argp); }
 376	STD	BSD	{ int eaccess(char *path, int flags); }
 377	UNIMPL	BSD	afs_syscall
 378	STD	BSD	{ int nmount(struct iovec *iovp, unsigned int iovcnt, \
 			    int flags); }
 379	MSTD	BSD	{ int kse_exit(void); }
 380	MSTD	BSD	{ int kse_wakeup(struct kse_mailbox *mbx); }
 381	STD	BSD	{ int kse_create(struct kse_mailbox *mbx, \
 			    int newgroup); }
 382	MSTD	BSD	{ int kse_thr_interrupt(struct kse_thr_mailbox *tmbx, int cmd, long data); }
 383	MSTD	BSD	{ int kse_release(struct timespec *timeout); }
 384	MSTD	BSD	{ int __mac_get_proc(struct mac *mac_p); }
 385	MSTD	BSD	{ int __mac_set_proc(struct mac *mac_p); }
 386	MSTD	BSD	{ int __mac_get_fd(int fd, struct mac *mac_p); }
 387	MSTD	BSD	{ int __mac_get_file(const char *path_p, \
 			    struct mac *mac_p); }
 388	MSTD	BSD	{ int __mac_set_fd(int fd, struct mac *mac_p); }
 389	MSTD	BSD	{ int __mac_set_file(const char *path_p, \
 			    struct mac *mac_p); }
 390	STD	BSD	{ int kenv(int what, const char *name, char *value, \
 			    int len); }
 391	STD	BSD	{ int lchflags(const char *path, int flags); }
 392	STD	BSD	{ int uuidgen(struct uuid *store, int count); }
 393	MSTD	BSD	{ int sendfile(int fd, int s, off_t offset, size_t nbytes, \
 				struct sf_hdtr *hdtr, off_t *sbytes, int flags); }
 394	MSTD	BSD	{ int mac_syscall(const char *policy, int call, \
 				void *arg); }
-395	UNIMPL	NOHIDE	nosys
-396	UNIMPL	NOHIDE	nosys
-397	UNIMPL	NOHIDE	nosys
-398	UNIMPL	NOHIDE	nosys
+395	STD	BSD	{ int getfsstat(struct statfs *buf, long bufsize, \
+			    int flags); }
+396	STD	BSD	{ int statfs(char *path, struct statfs *buf); }
+397	STD	BSD	{ int fstatfs(int fd, struct statfs *buf); }
+398	STD	BSD	{ int fhstatfs(const struct fhandle *u_fhp, \
+			    struct statfs *buf); }
 399	UNIMPL	NOHIDE	nosys
 400	MNOSTD	BSD	{ int ksem_close(semid_t id); }
 401	MNOSTD	BSD	{ int ksem_post(semid_t id); }
 402	MNOSTD	BSD	{ int ksem_wait(semid_t id); }
 403	MNOSTD	BSD	{ int ksem_trywait(semid_t id); }
 404	MNOSTD	BSD	{ int ksem_init(semid_t *idp, unsigned int value); }
 405	MNOSTD	BSD	{ int ksem_open(semid_t *idp, const char *name, \
 				int oflag, mode_t mode, unsigned int value); }
 406	MNOSTD	BSD	{ int ksem_unlink(const char *name); }
 407	MNOSTD	BSD	{ int ksem_getvalue(semid_t id, int *val); }
 408	MNOSTD	BSD	{ int ksem_destroy(semid_t id); }
 409	MSTD	BSD	{ int __mac_get_pid(pid_t pid, struct mac *mac_p); }
 410	MSTD	BSD	{ int __mac_get_link(const char *path_p, \
 			    struct mac *mac_p); }
 411	MSTD	BSD	{ int __mac_set_link(const char *path_p, \
 			    struct mac *mac_p); }
 412	STD	BSD	{ int extattr_set_link(const char *path, \
 			    int attrnamespace, const char *attrname, \
 			    void *data, size_t nbytes); }
 413	STD	BSD	{ ssize_t extattr_get_link(const char *path, \
 			    int attrnamespace, const char *attrname, \
 			    void *data, size_t nbytes); }
 414	STD	BSD	{ int extattr_delete_link(const char *path, \
 			    int attrnamespace, const char *attrname); }
 415	MSTD	BSD	{ int __mac_execve(char *fname, char **argv, \
 			    char **envv, struct mac *mac_p); }
 416	MSTD	POSIX	{ int sigaction(int sig, const struct sigaction *act, \
 			    struct sigaction *oact); }
 417	MSTD	BSD	{ int sigreturn(const struct __ucontext *sigcntxp); }
 418	UNIMPL	BSD	__xstat
 419	UNIMPL	BSD	__xfstat
 420	UNIMPL	BSD	__xlstat
 421	MSTD	BSD	{ int getcontext(struct __ucontext *ucp); }
 422	MSTD	BSD	{ int setcontext(const struct __ucontext *ucp); }
 423	MSTD	BSD	{ int swapcontext(struct __ucontext *oucp, \
 			    const struct __ucontext *ucp); }
 424	MSTD	BSD	{ int swapoff(const char *name); }
 425	MSTD	BSD	{ int __acl_get_link(const char *path, \
 			    acl_type_t type, struct acl *aclp); }
 426	MSTD	BSD	{ int __acl_set_link(const char *path, \
 			    acl_type_t type, struct acl *aclp); }
 427	MSTD	BSD	{ int __acl_delete_link(const char *path, \
 			    acl_type_t type); }
 428	MSTD	BSD	{ int __acl_aclcheck_link(const char *path, \
 			    acl_type_t type, struct acl *aclp); }
 429	MSTD	NOHIDE	{ int sigwait(const sigset_t *set, int *sig); }
 430	MSTD	BSD	{ int thr_create(ucontext_t *ctx, thr_id_t *id, int flags); }
 431	MSTD	BSD	{ void thr_exit(void); }
 432	MSTD	BSD	{ int thr_self(thr_id_t *id); }
 433	MSTD	BSD	{ int thr_kill(thr_id_t id, int sig); }
 434	MSTD	BSD	{ int _umtx_lock(struct umtx *umtx); }
 435	MSTD	BSD	{ int _umtx_unlock(struct umtx *umtx); }
 436	MSTD	BSD	{ int jail_attach(int jid); }
 437	STD	BSD	{ ssize_t extattr_list_fd(int fd, int attrnamespace, \
 			    void *data, size_t nbytes); }
 438	STD	BSD	{ ssize_t extattr_list_file(const char *path, \
 			    int attrnamespace, void *data, size_t nbytes); }
 439	STD	BSD	{ ssize_t extattr_list_link(const char *path, \
 			    int attrnamespace, void *data, size_t nbytes); }
 
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/ia64/ia32/syscalls.master  (take a best guess)
 ; [other 64 bit platforms with an alternate 32 bit syscall table go here too]
Index: head/sys/kern/vfs_bio.c
===================================================================
--- head/sys/kern/vfs_bio.c	(revision 122536)
+++ head/sys/kern/vfs_bio.c	(revision 122537)
@@ -1,3812 +1,3812 @@
 /*
  * Copyright (c) 1994,1997 John S. Dyson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice immediately at the beginning of the file, without modification,
  *    this list of conditions, and the following disclaimer.
  * 2. Absolutely no warranty of function or purpose is made by the author
  *		John S. Dyson.
  */
 
 /*
  * this file contains a new buffer I/O scheme implementing a coherent
  * VM object and buffer cache scheme.  Pains have been taken to make
  * sure that the performance degradation associated with schemes such
  * as this is not realized.
  *
  * Author:  John S. Dyson
  * Significant help during the development and debugging phases
  * had been provided by David Greenman, also of the FreeBSD core team.
  *
  * see man buf(9) for more info.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/conf.h>
 #include <sys/buf.h>
 #include <sys/devicestat.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include "opt_directio.h"
 #include "opt_swap.h"
 
 static MALLOC_DEFINE(M_BIOBUF, "BIO buffer", "BIO buffer");
 
 struct	bio_ops bioops;		/* I/O operation notification */
 
 struct	buf_ops buf_ops_bio = {
 	"buf_ops_bio",
 	bwrite
 };
 
 /*
  * XXX buf is global because kern_shutdown.c and ffs_checkoverlap has
  * carnal knowledge of buffers.  This knowledge should be moved to vfs_bio.c.
  */
 struct buf *buf;		/* buffer header pool */
 
 static struct proc *bufdaemonproc;
 
 static void vm_hold_free_pages(struct buf * bp, vm_offset_t from,
 		vm_offset_t to);
 static void vm_hold_load_pages(struct buf * bp, vm_offset_t from,
 		vm_offset_t to);
 static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off,
 			       int pageno, vm_page_t m);
 static void vfs_clean_pages(struct buf * bp);
 static void vfs_setdirty(struct buf *bp);
 static void vfs_vmio_release(struct buf *bp);
 static void vfs_backgroundwritedone(struct buf *bp);
 static int vfs_bio_clcheck(struct vnode *vp, int size,
 		daddr_t lblkno, daddr_t blkno);
 static int flushbufqueues(int flushdeps);
 static void buf_daemon(void);
 void bremfreel(struct buf * bp);
 
 int vmiodirenable = TRUE;
 SYSCTL_INT(_vfs, OID_AUTO, vmiodirenable, CTLFLAG_RW, &vmiodirenable, 0,
     "Use the VM system for directory writes");
 int runningbufspace;
 SYSCTL_INT(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD, &runningbufspace, 0,
     "Amount of presently outstanding async buffer io");
 static int bufspace;
 SYSCTL_INT(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, &bufspace, 0,
     "KVA memory used for bufs");
 static int maxbufspace;
 SYSCTL_INT(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD, &maxbufspace, 0,
     "Maximum allowed value of bufspace (including buf_daemon)");
 static int bufmallocspace;
 SYSCTL_INT(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0,
     "Amount of malloced memory for buffers");
 static int maxbufmallocspace;
 SYSCTL_INT(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace, 0,
     "Maximum amount of malloced memory for buffers");
 static int lobufspace;
 SYSCTL_INT(_vfs, OID_AUTO, lobufspace, CTLFLAG_RD, &lobufspace, 0,
     "Minimum amount of buffers we want to have");
 static int hibufspace;
 SYSCTL_INT(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD, &hibufspace, 0,
     "Maximum allowed value of bufspace (excluding buf_daemon)");
 static int bufreusecnt;
 SYSCTL_INT(_vfs, OID_AUTO, bufreusecnt, CTLFLAG_RW, &bufreusecnt, 0,
     "Number of times we have reused a buffer");
 static int buffreekvacnt;
 SYSCTL_INT(_vfs, OID_AUTO, buffreekvacnt, CTLFLAG_RW, &buffreekvacnt, 0,
     "Number of times we have freed the KVA space from some buffer");
 static int bufdefragcnt;
 SYSCTL_INT(_vfs, OID_AUTO, bufdefragcnt, CTLFLAG_RW, &bufdefragcnt, 0,
     "Number of times we have had to repeat buffer allocation to defragment");
 static int lorunningspace;
 SYSCTL_INT(_vfs, OID_AUTO, lorunningspace, CTLFLAG_RW, &lorunningspace, 0,
     "Minimum preferred space used for in-progress I/O");
 static int hirunningspace;
 SYSCTL_INT(_vfs, OID_AUTO, hirunningspace, CTLFLAG_RW, &hirunningspace, 0,
     "Maximum amount of space to use for in-progress I/O");
 static int dirtybufferflushes;
 SYSCTL_INT(_vfs, OID_AUTO, dirtybufferflushes, CTLFLAG_RW, &dirtybufferflushes,
     0, "Number of bdwrite to bawrite conversions to limit dirty buffers");
 static int altbufferflushes;
 SYSCTL_INT(_vfs, OID_AUTO, altbufferflushes, CTLFLAG_RW, &altbufferflushes,
     0, "Number of fsync flushes to limit dirty buffers");
 static int recursiveflushes;
 SYSCTL_INT(_vfs, OID_AUTO, recursiveflushes, CTLFLAG_RW, &recursiveflushes,
     0, "Number of flushes skipped due to being recursive");
 static int numdirtybuffers;
 SYSCTL_INT(_vfs, OID_AUTO, numdirtybuffers, CTLFLAG_RD, &numdirtybuffers, 0,
     "Number of buffers that are dirty (has unwritten changes) at the moment");
 static int lodirtybuffers;
 SYSCTL_INT(_vfs, OID_AUTO, lodirtybuffers, CTLFLAG_RW, &lodirtybuffers, 0,
     "How many buffers we want to have free before bufdaemon can sleep");
 static int hidirtybuffers;
 SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RW, &hidirtybuffers, 0,
     "When the number of dirty buffers is considered severe");
 static int dirtybufthresh;
 SYSCTL_INT(_vfs, OID_AUTO, dirtybufthresh, CTLFLAG_RW, &dirtybufthresh,
     0, "Number of bdwrite to bawrite conversions to clear dirty buffers");
 static int numfreebuffers;
 SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD, &numfreebuffers, 0,
     "Number of free buffers");
 static int lofreebuffers;
 SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RW, &lofreebuffers, 0,
    "XXX Unused");
 static int hifreebuffers;
 SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RW, &hifreebuffers, 0,
    "XXX Complicatedly unused");
 static int getnewbufcalls;
 SYSCTL_INT(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RW, &getnewbufcalls, 0,
    "Number of calls to getnewbuf");
 static int getnewbufrestarts;
 SYSCTL_INT(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RW, &getnewbufrestarts, 0,
     "Number of times getnewbuf has had to restart a buffer aquisition");
 static int dobkgrdwrite = 1;
 SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
     "Do background writes (honoring the BV_BKGRDWRITE flag)?");
 
 /*
  * Wakeup point for bufdaemon, as well as indicator of whether it is already
  * active.  Set to 1 when the bufdaemon is already "on" the queue, 0 when it
  * is idling.
  */
 static int bd_request;
 
 /*
  * This lock synchronizes access to bd_request.
  */
 static struct mtx bdlock;
 
 /*
  * bogus page -- for I/O to/from partially complete buffers
  * this is a temporary solution to the problem, but it is not
  * really that bad.  it would be better to split the buffer
  * for input in the case of buffers partially already in memory,
  * but the code is intricate enough already.
  */
 vm_page_t bogus_page;
 
 /*
  * Synchronization (sleep/wakeup) variable for active buffer space requests.
  * Set when wait starts, cleared prior to wakeup().
  * Used in runningbufwakeup() and waitrunningbufspace().
  */
 static int runningbufreq;
 
 /*
  * This lock protects the runningbufreq and synchronizes runningbufwakeup and
  * waitrunningbufspace().
  */
 static struct mtx rbreqlock;
 
 /* 
  * Synchronization (sleep/wakeup) variable for buffer requests.
  * Can contain the VFS_BIO_NEED flags defined below; setting/clearing is done
  * by and/or.
  * Used in numdirtywakeup(), bufspacewakeup(), bufcountwakeup(), bwillwrite(),
  * getnewbuf(), and getblk().
  */
 static int needsbuffer;
 
 /*
  * Lock that protects needsbuffer and the sleeps/wakeups surrounding it.
  */
 static struct mtx nblock;
 
 /*
  * Lock that protects against bwait()/bdone()/B_DONE races.
  */
 
 static struct mtx bdonelock;
 
 /*
  * Definitions for the buffer free lists.
  */
 #define BUFFER_QUEUES	5	/* number of free buffer queues */
 
 #define QUEUE_NONE	0	/* on no queue */
 #define QUEUE_CLEAN	1	/* non-B_DELWRI buffers */
 #define QUEUE_DIRTY	2	/* B_DELWRI buffers */
 #define QUEUE_EMPTYKVA	3	/* empty buffer headers w/KVA assignment */
 #define QUEUE_EMPTY	4	/* empty buffer headers */
 
 /* Queues for free buffers with various properties */
 static TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES] = { { 0 } };
 
 /* Lock for the bufqueues */
 static struct mtx bqlock;
 
 /*
  * Single global constant for BUF_WMESG, to avoid getting multiple references.
  * buf_wmesg is referred from macros.
  */
 const char *buf_wmesg = BUF_WMESG;
 
 #define VFS_BIO_NEED_ANY	0x01	/* any freeable buffer */
 #define VFS_BIO_NEED_DIRTYFLUSH	0x02	/* waiting for dirty buffer flush */
 #define VFS_BIO_NEED_FREE	0x04	/* wait for free bufs, hi hysteresis */
 #define VFS_BIO_NEED_BUFSPACE	0x08	/* wait for buf space, lo hysteresis */
 
 #ifdef DIRECTIO
 extern void ffs_rawread_setup(void);
 #endif /* DIRECTIO */
 /*
  *	numdirtywakeup:
  *
  *	If someone is blocked due to there being too many dirty buffers,
  *	and numdirtybuffers is now reasonable, wake them up.
  */
 
 static __inline void
 numdirtywakeup(int level)
 {
 	if (numdirtybuffers <= level) {
 		mtx_lock(&nblock);
 		if (needsbuffer & VFS_BIO_NEED_DIRTYFLUSH) {
 			needsbuffer &= ~VFS_BIO_NEED_DIRTYFLUSH;
 			wakeup(&needsbuffer);
 		}
 		mtx_unlock(&nblock);
 	}
 }
 
 /*
  *	bufspacewakeup:
  *
  *	Called when buffer space is potentially available for recovery.
  *	getnewbuf() will block on this flag when it is unable to free 
  *	sufficient buffer space.  Buffer space becomes recoverable when 
  *	bp's get placed back in the queues.
  */
 
 static __inline void
 bufspacewakeup(void)
 {
 	/*
 	 * If someone is waiting for BUF space, wake them up.  Even
 	 * though we haven't freed the kva space yet, the waiting
 	 * process will be able to now.
 	 */
 	mtx_lock(&nblock);
 	if (needsbuffer & VFS_BIO_NEED_BUFSPACE) {
 		needsbuffer &= ~VFS_BIO_NEED_BUFSPACE;
 		wakeup(&needsbuffer);
 	}
 	mtx_unlock(&nblock);
 }
 
 /*
  * runningbufwakeup() - in-progress I/O accounting.
  *
  */
 static __inline void
 runningbufwakeup(struct buf *bp)
 {
 	if (bp->b_runningbufspace) {
 		atomic_subtract_int(&runningbufspace, bp->b_runningbufspace);
 		bp->b_runningbufspace = 0;
 		mtx_lock(&rbreqlock);
 		if (runningbufreq && runningbufspace <= lorunningspace) {
 			runningbufreq = 0;
 			wakeup(&runningbufreq);
 		}
 		mtx_unlock(&rbreqlock);
 	}
 }
 
 /*
  *	bufcountwakeup:
  *
  *	Called when a buffer has been added to one of the free queues to
  *	account for the buffer and to wakeup anyone waiting for free buffers.
  *	This typically occurs when large amounts of metadata are being handled
  *	by the buffer cache ( else buffer space runs out first, usually ).
  */
 
 static __inline void
 bufcountwakeup(void) 
 {
 	atomic_add_int(&numfreebuffers, 1);
 	mtx_lock(&nblock);
 	if (needsbuffer) {
 		needsbuffer &= ~VFS_BIO_NEED_ANY;
 		if (numfreebuffers >= hifreebuffers)
 			needsbuffer &= ~VFS_BIO_NEED_FREE;
 		wakeup(&needsbuffer);
 	}
 	mtx_unlock(&nblock);
 }
 
 /*
  *	waitrunningbufspace()
  *
  *	runningbufspace is a measure of the amount of I/O currently
  *	running.  This routine is used in async-write situations to
  *	prevent creating huge backups of pending writes to a device.
  *	Only asynchronous writes are governed by this function.
  *
  *	Reads will adjust runningbufspace, but will not block based on it.
  *	The read load has a side effect of reducing the allowed write load.
  *
  *	This does NOT turn an async write into a sync write.  It waits  
  *	for earlier writes to complete and generally returns before the
  *	caller's write has reached the device.
  */
 static __inline void
 waitrunningbufspace(void)
 {
 	mtx_lock(&rbreqlock);
 	while (runningbufspace > hirunningspace) {
 		++runningbufreq;
 		msleep(&runningbufreq, &rbreqlock, PVM, "wdrain", 0);
 	}
 	mtx_unlock(&rbreqlock);
 }
 
 
 /*
  *	vfs_buf_test_cache:
  *
  *	Called when a buffer is extended.  This function clears the B_CACHE
  *	bit if the newly extended portion of the buffer does not contain
  *	valid data.
  */
 static __inline__
 void
 vfs_buf_test_cache(struct buf *bp,
 		  vm_ooffset_t foff, vm_offset_t off, vm_offset_t size,
 		  vm_page_t m)
 {
 	GIANT_REQUIRED;
 
 	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 	if (bp->b_flags & B_CACHE) {
 		int base = (foff + off) & PAGE_MASK;
 		if (vm_page_is_valid(m, base, size) == 0)
 			bp->b_flags &= ~B_CACHE;
 	}
 }
 
 /* Wake up the buffer deamon if necessary */
 static __inline__
 void
 bd_wakeup(int dirtybuflevel)
 {
 	mtx_lock(&bdlock);
 	if (bd_request == 0 && numdirtybuffers >= dirtybuflevel) {
 		bd_request = 1;
 		wakeup(&bd_request);
 	}
 	mtx_unlock(&bdlock);
 }
 
 /*
  * bd_speedup - speedup the buffer cache flushing code
  */
 
 static __inline__
 void
 bd_speedup(void)
 {
 	bd_wakeup(1);
 }
 
 /*
  * Calculating buffer cache scaling values and reserve space for buffer
  * headers.  This is called during low level kernel initialization and
  * may be called more then once.  We CANNOT write to the memory area
  * being reserved at this time.
  */
 caddr_t
 kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est)
 {
 	/*
 	 * physmem_est is in pages.  Convert it to kilobytes (assumes
 	 * PAGE_SIZE is >= 1K)
 	 */
 	physmem_est = physmem_est * (PAGE_SIZE / 1024);
 
 	/*
 	 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
 	 * For the first 64MB of ram nominally allocate sufficient buffers to
 	 * cover 1/4 of our ram.  Beyond the first 64MB allocate additional
 	 * buffers to cover 1/20 of our ram over 64MB.  When auto-sizing
 	 * the buffer cache we limit the eventual kva reservation to
 	 * maxbcache bytes.
 	 *
 	 * factor represents the 1/4 x ram conversion.
 	 */
 	if (nbuf == 0) {
 		int factor = 4 * BKVASIZE / 1024;
 
 		nbuf = 50;
 		if (physmem_est > 4096)
 			nbuf += min((physmem_est - 4096) / factor,
 			    65536 / factor);
 		if (physmem_est > 65536)
 			nbuf += (physmem_est - 65536) * 2 / (factor * 5);
 
 		if (maxbcache && nbuf > maxbcache / BKVASIZE)
 			nbuf = maxbcache / BKVASIZE;
 	}
 
 #if 0
 	/*
 	 * Do not allow the buffer_map to be more then 1/2 the size of the
 	 * kernel_map.
 	 */
 	if (nbuf > (kernel_map->max_offset - kernel_map->min_offset) / 
 	    (BKVASIZE * 2)) {
 		nbuf = (kernel_map->max_offset - kernel_map->min_offset) / 
 		    (BKVASIZE * 2);
 		printf("Warning: nbufs capped at %d\n", nbuf);
 	}
 #endif
 
 	/*
 	 * swbufs are used as temporary holders for I/O, such as paging I/O.
 	 * We have no less then 16 and no more then 256.
 	 */
 	nswbuf = max(min(nbuf/4, 256), 16);
 #ifdef NSWBUF_MIN
 	if (nswbuf < NSWBUF_MIN)
 		nswbuf = NSWBUF_MIN;
 #endif
 #ifdef DIRECTIO
 	ffs_rawread_setup();
 #endif
 
 	/*
 	 * Reserve space for the buffer cache buffers
 	 */
 	swbuf = (void *)v;
 	v = (caddr_t)(swbuf + nswbuf);
 	buf = (void *)v;
 	v = (caddr_t)(buf + nbuf);
 
 	return(v);
 }
 
 /* Initialize the buffer subsystem.  Called before use of any buffers. */
 void
 bufinit(void)
 {
 	struct buf *bp;
 	int i;
 
 	GIANT_REQUIRED;
 
 	mtx_init(&bqlock, "buf queue lock", NULL, MTX_DEF);
 	mtx_init(&rbreqlock, "runningbufspace lock", NULL, MTX_DEF);
 	mtx_init(&nblock, "needsbuffer lock", NULL, MTX_DEF);
 	mtx_init(&bdlock, "buffer daemon lock", NULL, MTX_DEF);
 	mtx_init(&bdonelock, "bdone lock", NULL, MTX_DEF);
 
 	/* next, make a null set of free lists */
 	for (i = 0; i < BUFFER_QUEUES; i++)
 		TAILQ_INIT(&bufqueues[i]);
 
 	/* finally, initialize each buffer header and stick on empty q */
 	for (i = 0; i < nbuf; i++) {
 		bp = &buf[i];
 		bzero(bp, sizeof *bp);
 		bp->b_flags = B_INVAL;	/* we're just an empty header */
 		bp->b_dev = NODEV;
 		bp->b_rcred = NOCRED;
 		bp->b_wcred = NOCRED;
 		bp->b_qindex = QUEUE_EMPTY;
 		bp->b_vflags = 0;
 		bp->b_xflags = 0;
 		LIST_INIT(&bp->b_dep);
 		BUF_LOCKINIT(bp);
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
 	}
 
 	/*
 	 * maxbufspace is the absolute maximum amount of buffer space we are 
 	 * allowed to reserve in KVM and in real terms.  The absolute maximum
 	 * is nominally used by buf_daemon.  hibufspace is the nominal maximum
 	 * used by most other processes.  The differential is required to 
 	 * ensure that buf_daemon is able to run when other processes might 
 	 * be blocked waiting for buffer space.
 	 *
 	 * maxbufspace is based on BKVASIZE.  Allocating buffers larger then
 	 * this may result in KVM fragmentation which is not handled optimally
 	 * by the system.
 	 */
 	maxbufspace = nbuf * BKVASIZE;
 	hibufspace = imax(3 * maxbufspace / 4, maxbufspace - MAXBSIZE * 10);
 	lobufspace = hibufspace - MAXBSIZE;
 
 	lorunningspace = 512 * 1024;
 	hirunningspace = 1024 * 1024;
 
 /*
  * Limit the amount of malloc memory since it is wired permanently into
  * the kernel space.  Even though this is accounted for in the buffer
  * allocation, we don't want the malloced region to grow uncontrolled.
  * The malloc scheme improves memory utilization significantly on average
  * (small) directories.
  */
 	maxbufmallocspace = hibufspace / 20;
 
 /*
  * Reduce the chance of a deadlock occuring by limiting the number
  * of delayed-write dirty buffers we allow to stack up.
  */
 	hidirtybuffers = nbuf / 4 + 20;
 	dirtybufthresh = hidirtybuffers * 9 / 10;
 	numdirtybuffers = 0;
 /*
  * To support extreme low-memory systems, make sure hidirtybuffers cannot
  * eat up all available buffer space.  This occurs when our minimum cannot
  * be met.  We try to size hidirtybuffers to 3/4 our buffer space assuming
  * BKVASIZE'd (8K) buffers.
  */
 	while (hidirtybuffers * BKVASIZE > 3 * hibufspace / 4) {
 		hidirtybuffers >>= 1;
 	}
 	lodirtybuffers = hidirtybuffers / 2;
 
 /*
  * Try to keep the number of free buffers in the specified range,
  * and give special processes (e.g. like buf_daemon) access to an 
  * emergency reserve.
  */
 	lofreebuffers = nbuf / 18 + 5;
 	hifreebuffers = 2 * lofreebuffers;
 	numfreebuffers = nbuf;
 
 /*
  * Maximum number of async ops initiated per buf_daemon loop.  This is
  * somewhat of a hack at the moment, we really need to limit ourselves
  * based on the number of bytes of I/O in-transit that were initiated
  * from buf_daemon.
  */
 
 	bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
 }
 
 /*
  * bfreekva() - free the kva allocation for a buffer.
  *
  *	Must be called at splbio() or higher as this is the only locking for
  *	buffer_map.
  *
  *	Since this call frees up buffer space, we call bufspacewakeup().
  */
 static void
 bfreekva(struct buf * bp)
 {
 	GIANT_REQUIRED;
 
 	if (bp->b_kvasize) {
 		atomic_add_int(&buffreekvacnt, 1);
 		atomic_subtract_int(&bufspace, bp->b_kvasize);
 		vm_map_delete(buffer_map,
 		    (vm_offset_t) bp->b_kvabase,
 		    (vm_offset_t) bp->b_kvabase + bp->b_kvasize
 		);
 		bp->b_kvasize = 0;
 		bufspacewakeup();
 	}
 }
 
 /*
  *	bremfree:
  *
  *	Remove the buffer from the appropriate free list.
  */
 void
 bremfree(struct buf * bp)
 {
 	mtx_lock(&bqlock);
 	bremfreel(bp);
 	mtx_unlock(&bqlock);
 }
 
 void
 bremfreel(struct buf * bp)
 {
 	int s = splbio();
 	int old_qindex = bp->b_qindex;
 
 	GIANT_REQUIRED;
 
 	if (bp->b_qindex != QUEUE_NONE) {
 		KASSERT(BUF_REFCNT(bp) == 1, ("bremfree: bp %p not locked",bp));
 		TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist);
 		bp->b_qindex = QUEUE_NONE;
 	} else {
 		if (BUF_REFCNT(bp) <= 1)
 			panic("bremfree: removing a buffer not on a queue");
 	}
 
 	/*
 	 * Fixup numfreebuffers count.  If the buffer is invalid or not
 	 * delayed-write, and it was on the EMPTY, LRU, or AGE queues,
 	 * the buffer was free and we must decrement numfreebuffers.
 	 */
 	if ((bp->b_flags & B_INVAL) || (bp->b_flags & B_DELWRI) == 0) {
 		switch(old_qindex) {
 		case QUEUE_DIRTY:
 		case QUEUE_CLEAN:
 		case QUEUE_EMPTY:
 		case QUEUE_EMPTYKVA:
 			atomic_subtract_int(&numfreebuffers, 1);
 			break;
 		default:
 			break;
 		}
 	}
 	splx(s);
 }
 
 
 /*
  * Get a buffer with the specified data.  Look in the cache first.  We
  * must clear BIO_ERROR and B_INVAL prior to initiating I/O.  If B_CACHE
  * is set, the buffer is valid and we do not have to do anything ( see
  * getblk() ).  This is really just a special case of breadn().
  */
 int
 bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred,
     struct buf ** bpp)
 {
 
 	return (breadn(vp, blkno, size, 0, 0, 0, cred, bpp));
 }
 
 /*
  * Operates like bread, but also starts asynchronous I/O on
  * read-ahead blocks.  We must clear BIO_ERROR and B_INVAL prior
  * to initiating I/O . If B_CACHE is set, the buffer is valid 
  * and we do not have to do anything.
  */
 int
 breadn(struct vnode * vp, daddr_t blkno, int size,
     daddr_t * rablkno, int *rabsize,
     int cnt, struct ucred * cred, struct buf ** bpp)
 {
 	struct buf *bp, *rabp;
 	int i;
 	int rv = 0, readwait = 0;
 
 	*bpp = bp = getblk(vp, blkno, size, 0, 0, 0);
 
 	/* if not found in cache, do some I/O */
 	if ((bp->b_flags & B_CACHE) == 0) {
 		if (curthread != PCPU_GET(idlethread))
 			curthread->td_proc->p_stats->p_ru.ru_inblock++;
 		bp->b_iocmd = BIO_READ;
 		bp->b_flags &= ~B_INVAL;
 		bp->b_ioflags &= ~BIO_ERROR;
 		if (bp->b_rcred == NOCRED && cred != NOCRED)
 			bp->b_rcred = crhold(cred);
 		vfs_busy_pages(bp, 0);
 		bp->b_iooffset = dbtob(bp->b_blkno);
 		if (vp->v_type == VCHR)
 			VOP_SPECSTRATEGY(vp, bp);
 		else
 			VOP_STRATEGY(vp, bp);
 		++readwait;
 	}
 
 	for (i = 0; i < cnt; i++, rablkno++, rabsize++) {
 		if (inmem(vp, *rablkno))
 			continue;
 		rabp = getblk(vp, *rablkno, *rabsize, 0, 0, 0);
 
 		if ((rabp->b_flags & B_CACHE) == 0) {
 			if (curthread != PCPU_GET(idlethread))
 				curthread->td_proc->p_stats->p_ru.ru_inblock++;
 			rabp->b_flags |= B_ASYNC;
 			rabp->b_flags &= ~B_INVAL;
 			rabp->b_ioflags &= ~BIO_ERROR;
 			rabp->b_iocmd = BIO_READ;
 			if (rabp->b_rcred == NOCRED && cred != NOCRED)
 				rabp->b_rcred = crhold(cred);
 			vfs_busy_pages(rabp, 0);
 			BUF_KERNPROC(rabp);
 			rabp->b_iooffset = dbtob(rabp->b_blkno);
 			if (vp->v_type == VCHR)
 				VOP_SPECSTRATEGY(vp, rabp);
 			else
 				VOP_STRATEGY(vp, rabp);
 		} else {
 			brelse(rabp);
 		}
 	}
 
 	if (readwait) {
 		rv = bufwait(bp);
 	}
 	return (rv);
 }
 
 /*
  * Write, release buffer on completion.  (Done by iodone
  * if async).  Do not bother writing anything if the buffer
  * is invalid.
  *
  * Note that we set B_CACHE here, indicating that buffer is
  * fully valid and thus cacheable.  This is true even of NFS
  * now so we set it generally.  This could be set either here 
  * or in biodone() since the I/O is synchronous.  We put it
  * here.
  */
 
 int
 bwrite(struct buf * bp)
 {
 	int oldflags, s;
 	struct buf *newbp;
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return (0);
 	}
 
 	oldflags = bp->b_flags;
 
 	if (BUF_REFCNT(bp) == 0)
 		panic("bwrite: buffer is not busy???");
 	s = splbio();
 	/*
 	 * If a background write is already in progress, delay
 	 * writing this block if it is asynchronous. Otherwise
 	 * wait for the background write to complete.
 	 */
 	VI_LOCK(bp->b_vp);
 	if (bp->b_vflags & BV_BKGRDINPROG) {
 		if (bp->b_flags & B_ASYNC) {
 			VI_UNLOCK(bp->b_vp);
 			splx(s);
 			bdwrite(bp);
 			return (0);
 		}
 		bp->b_vflags |= BV_BKGRDWAIT;
 		msleep(&bp->b_xflags, VI_MTX(bp->b_vp), PRIBIO, "bwrbg", 0);
 		if (bp->b_vflags & BV_BKGRDINPROG)
 			panic("bwrite: still writing");
 	}
 	VI_UNLOCK(bp->b_vp);
 
 	/* Mark the buffer clean */
 	bundirty(bp);
 
 	/*
 	 * If this buffer is marked for background writing and we
 	 * do not have to wait for it, make a copy and write the
 	 * copy so as to leave this buffer ready for further use.
 	 *
 	 * This optimization eats a lot of memory.  If we have a page
 	 * or buffer shortfall we can't do it.
 	 */
 	if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) && 
 	    (bp->b_flags & B_ASYNC) &&
 	    !vm_page_count_severe() &&
 	    !buf_dirty_count_severe()) {
 		if (bp->b_iodone != NULL) {
 			printf("bp->b_iodone = %p\n", bp->b_iodone);
 			panic("bwrite: need chained iodone");
 		}
 
 		/* get a new block */
 		newbp = geteblk(bp->b_bufsize);
 
 		/*
 		 * set it to be identical to the old block.  We have to
 		 * set b_lblkno and BKGRDMARKER before calling bgetvp()
 		 * to avoid confusing the splay tree and gbincore().
 		 */
 		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
 		newbp->b_lblkno = bp->b_lblkno;
 		newbp->b_xflags |= BX_BKGRDMARKER;
 		VI_LOCK(bp->b_vp);
 		bp->b_vflags |= BV_BKGRDINPROG;
 		bgetvp(bp->b_vp, newbp);
 		VI_UNLOCK(bp->b_vp);
 		newbp->b_blkno = bp->b_blkno;
 		newbp->b_offset = bp->b_offset;
 		newbp->b_iodone = vfs_backgroundwritedone;
 		newbp->b_flags |= B_ASYNC;
 		newbp->b_flags &= ~B_INVAL;
 
 		/* move over the dependencies */
 		if (LIST_FIRST(&bp->b_dep) != NULL)
 			buf_movedeps(bp, newbp);
 
 		/*
 		 * Initiate write on the copy, release the original to
 		 * the B_LOCKED queue so that it cannot go away until
 		 * the background write completes. If not locked it could go
 		 * away and then be reconstituted while it was being written.
 		 * If the reconstituted buffer were written, we could end up
 		 * with two background copies being written at the same time.
 		 */
 		bqrelse(bp);
 		bp = newbp;
 	}
 
 	bp->b_flags &= ~B_DONE;
 	bp->b_ioflags &= ~BIO_ERROR;
 	bp->b_flags |= B_WRITEINPROG | B_CACHE;
 	bp->b_iocmd = BIO_WRITE;
 
 	VI_LOCK(bp->b_vp);
 	bp->b_vp->v_numoutput++;
 	VI_UNLOCK(bp->b_vp);
 	vfs_busy_pages(bp, 1);
 
 	/*
 	 * Normal bwrites pipeline writes
 	 */
 	bp->b_runningbufspace = bp->b_bufsize;
 	atomic_add_int(&runningbufspace, bp->b_runningbufspace);
 
 	if (curthread != PCPU_GET(idlethread))
 		curthread->td_proc->p_stats->p_ru.ru_oublock++;
 	splx(s);
 	if (oldflags & B_ASYNC)
 		BUF_KERNPROC(bp);
 	bp->b_iooffset = dbtob(bp->b_blkno);
 	if (bp->b_vp->v_type == VCHR)
 		VOP_SPECSTRATEGY(bp->b_vp, bp);
 	else
 		VOP_STRATEGY(bp->b_vp, bp);
 
 	if ((oldflags & B_ASYNC) == 0) {
 		int rtval = bufwait(bp);
 		brelse(bp);
 		return (rtval);
 	} else {
 		/*
 		 * don't allow the async write to saturate the I/O
 		 * system.  We will not deadlock here because
 		 * we are blocking waiting for I/O that is already in-progress
 		 * to complete. We do not block here if it is the update
 		 * or syncer daemon trying to clean up as that can lead
 		 * to deadlock.
 		 */
 		if (curthread->td_proc != bufdaemonproc &&
 		    curthread->td_proc != updateproc)
 			waitrunningbufspace();
 	}
 
 	return (0);
 }
 
 /*
  * Complete a background write started from bwrite.
  */
 static void
 vfs_backgroundwritedone(bp)
 	struct buf *bp;
 {
 	struct buf *origbp;
 
 	/*
 	 * Find the original buffer that we are writing.
 	 */
 	VI_LOCK(bp->b_vp);
 	if ((origbp = gbincore(bp->b_vp, bp->b_lblkno)) == NULL)
 		panic("backgroundwritedone: lost buffer");
 
 	/*
 	 * Clear the BV_BKGRDINPROG flag in the original buffer
 	 * and awaken it if it is waiting for the write to complete.
 	 * If BV_BKGRDINPROG is not set in the original buffer it must
 	 * have been released and re-instantiated - which is not legal.
 	 */
 	KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
 	    ("backgroundwritedone: lost buffer2"));
 	origbp->b_vflags &= ~BV_BKGRDINPROG;
 	if (origbp->b_vflags & BV_BKGRDWAIT) {
 		origbp->b_vflags &= ~BV_BKGRDWAIT;
 		wakeup(&origbp->b_xflags);
 	}
 	VI_UNLOCK(bp->b_vp);
 	/*
 	 * Process dependencies then return any unfinished ones.
 	 */
 	if (LIST_FIRST(&bp->b_dep) != NULL)
 		buf_complete(bp);
 	if (LIST_FIRST(&bp->b_dep) != NULL)
 		buf_movedeps(bp, origbp);
 
 	/*
 	 * This buffer is marked B_NOCACHE, so when it is released
 	 * by biodone, it will be tossed. We mark it with BIO_READ
 	 * to avoid biodone doing a second vwakeup.
 	 */
 	bp->b_flags |= B_NOCACHE;
 	bp->b_iocmd = BIO_READ;
 	bp->b_flags &= ~(B_CACHE | B_DONE);
 	bp->b_iodone = 0;
 	bufdone(bp);
 }
 
 /*
  * Delayed write. (Buffer is marked dirty).  Do not bother writing
  * anything if the buffer is marked invalid.
  *
  * Note that since the buffer must be completely valid, we can safely
  * set B_CACHE.  In fact, we have to set B_CACHE here rather then in
  * biodone() in order to prevent getblk from writing the buffer
  * out synchronously.
  */
 void
 bdwrite(struct buf * bp)
 {
 	struct thread *td = curthread;
 	struct vnode *vp;
 	struct buf *nbp;
 
 	GIANT_REQUIRED;
 
 	if (BUF_REFCNT(bp) == 0)
 		panic("bdwrite: buffer is not busy");
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return;
 	}
 
 	/*
 	 * If we have too many dirty buffers, don't create any more.
 	 * If we are wildly over our limit, then force a complete
 	 * cleanup. Otherwise, just keep the situation from getting
 	 * out of control. Note that we have to avoid a recursive
 	 * disaster and not try to clean up after our own cleanup!
 	 */
 	vp = bp->b_vp;
 	VI_LOCK(vp);
 	if (td->td_pflags & TDP_COWINPROGRESS) {
 		recursiveflushes++;
 	} else if (vp != NULL && vp->v_dirtybufcnt > dirtybufthresh + 10) {
 		VI_UNLOCK(vp);
 		(void) VOP_FSYNC(vp, td->td_ucred, MNT_NOWAIT, td);
 		VI_LOCK(vp);
 		altbufferflushes++;
 	} else if (vp != NULL && vp->v_dirtybufcnt > dirtybufthresh) {
 		/*
 		 * Try to find a buffer to flush.
 		 */
 		TAILQ_FOREACH(nbp, &vp->v_dirtyblkhd, b_vnbufs) {
 			if ((nbp->b_vflags & BV_BKGRDINPROG) ||
 			    buf_countdeps(nbp, 0) ||
 			    BUF_LOCK(nbp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
 				continue;
 			if (bp == nbp)
 				panic("bdwrite: found ourselves");
 			VI_UNLOCK(vp);
 			if (nbp->b_flags & B_CLUSTEROK) {
 				vfs_bio_awrite(nbp);
 			} else {
 				bremfree(nbp);
 				bawrite(nbp);
 			}
 			VI_LOCK(vp);
 			dirtybufferflushes++;
 			break;
 		}
 	}
 	VI_UNLOCK(vp);
 
 	bdirty(bp);
 	/*
 	 * Set B_CACHE, indicating that the buffer is fully valid.  This is
 	 * true even of NFS now.
 	 */
 	bp->b_flags |= B_CACHE;
 
 	/*
 	 * This bmap keeps the system from needing to do the bmap later,
 	 * perhaps when the system is attempting to do a sync.  Since it
 	 * is likely that the indirect block -- or whatever other datastructure
 	 * that the filesystem needs is still in memory now, it is a good
 	 * thing to do this.  Note also, that if the pageout daemon is
 	 * requesting a sync -- there might not be enough memory to do
 	 * the bmap then...  So, this is important to do.
 	 */
 	if (vp->v_type != VCHR && bp->b_lblkno == bp->b_blkno) {
 		VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
 	}
 
 	/*
 	 * Set the *dirty* buffer range based upon the VM system dirty pages.
 	 */
 	vfs_setdirty(bp);
 
 	/*
 	 * We need to do this here to satisfy the vnode_pager and the
 	 * pageout daemon, so that it thinks that the pages have been
 	 * "cleaned".  Note that since the pages are in a delayed write
 	 * buffer -- the VFS layer "will" see that the pages get written
 	 * out on the next sync, or perhaps the cluster will be completed.
 	 */
 	vfs_clean_pages(bp);
 	bqrelse(bp);
 
 	/*
 	 * Wakeup the buffer flushing daemon if we have a lot of dirty
 	 * buffers (midpoint between our recovery point and our stall
 	 * point).
 	 */
 	bd_wakeup((lodirtybuffers + hidirtybuffers) / 2);
 
 	/*
 	 * note: we cannot initiate I/O from a bdwrite even if we wanted to,
 	 * due to the softdep code.
 	 */
 }
 
 /*
  *	bdirty:
  *
  *	Turn buffer into delayed write request.  We must clear BIO_READ and
  *	B_RELBUF, and we must set B_DELWRI.  We reassign the buffer to 
  *	itself to properly update it in the dirty/clean lists.  We mark it
  *	B_DONE to ensure that any asynchronization of the buffer properly
  *	clears B_DONE ( else a panic will occur later ).  
  *
  *	bdirty() is kinda like bdwrite() - we have to clear B_INVAL which
  *	might have been set pre-getblk().  Unlike bwrite/bdwrite, bdirty()
  *	should only be called if the buffer is known-good.
  *
  *	Since the buffer is not on a queue, we do not update the numfreebuffers
  *	count.
  *
  *	Must be called at splbio().
  *	The buffer must be on QUEUE_NONE.
  */
 void
 bdirty(bp)
 	struct buf *bp;
 {
 	KASSERT(bp->b_qindex == QUEUE_NONE,
 	    ("bdirty: buffer %p still on queue %d", bp, bp->b_qindex));
 	bp->b_flags &= ~(B_RELBUF);
 	bp->b_iocmd = BIO_WRITE;
 
 	if ((bp->b_flags & B_DELWRI) == 0) {
 		bp->b_flags |= B_DONE | B_DELWRI;
 		reassignbuf(bp, bp->b_vp);
 		atomic_add_int(&numdirtybuffers, 1);
 		bd_wakeup((lodirtybuffers + hidirtybuffers) / 2);
 	}
 }
 
 /*
  *	bundirty:
  *
  *	Clear B_DELWRI for buffer.
  *
  *	Since the buffer is not on a queue, we do not update the numfreebuffers
  *	count.
  *	
  *	Must be called at splbio().
  *	The buffer must be on QUEUE_NONE.
  */
 
 void
 bundirty(bp)
 	struct buf *bp;
 {
 	KASSERT(bp->b_qindex == QUEUE_NONE,
 	    ("bundirty: buffer %p still on queue %d", bp, bp->b_qindex));
 
 	if (bp->b_flags & B_DELWRI) {
 		bp->b_flags &= ~B_DELWRI;
 		reassignbuf(bp, bp->b_vp);
 		atomic_subtract_int(&numdirtybuffers, 1);
 		numdirtywakeup(lodirtybuffers);
 	}
 	/*
 	 * Since it is now being written, we can clear its deferred write flag.
 	 */
 	bp->b_flags &= ~B_DEFERRED;
 }
 
 /*
  *	bawrite:
  *
  *	Asynchronous write.  Start output on a buffer, but do not wait for
  *	it to complete.  The buffer is released when the output completes.
  *
  *	bwrite() ( or the VOP routine anyway ) is responsible for handling 
  *	B_INVAL buffers.  Not us.
  */
 void
 bawrite(struct buf * bp)
 {
 	bp->b_flags |= B_ASYNC;
 	(void) BUF_WRITE(bp);
 }
 
 /*
  *	bwillwrite:
  *
  *	Called prior to the locking of any vnodes when we are expecting to
  *	write.  We do not want to starve the buffer cache with too many
  *	dirty buffers so we block here.  By blocking prior to the locking
  *	of any vnodes we attempt to avoid the situation where a locked vnode
  *	prevents the various system daemons from flushing related buffers.
  */
 
 void
 bwillwrite(void)
 {
 	if (numdirtybuffers >= hidirtybuffers) {
 		int s;
 
 		mtx_lock(&Giant);
 		s = splbio();
 		mtx_lock(&nblock);
 		while (numdirtybuffers >= hidirtybuffers) {
 			bd_wakeup(1);
 			needsbuffer |= VFS_BIO_NEED_DIRTYFLUSH;
 			msleep(&needsbuffer, &nblock,
 			    (PRIBIO + 4), "flswai", 0);
 		}
 		splx(s);
 		mtx_unlock(&nblock);
 		mtx_unlock(&Giant);
 	}
 }
 
 /*
  * Return true if we have too many dirty buffers.
  */
 int
 buf_dirty_count_severe(void)
 {
 	return(numdirtybuffers >= hidirtybuffers);
 }
 
 /*
  *	brelse:
  *
  *	Release a busy buffer and, if requested, free its resources.  The
  *	buffer will be stashed in the appropriate bufqueue[] allowing it
  *	to be accessed later as a cache entity or reused for other purposes.
  */
 void
 brelse(struct buf * bp)
 {
 	int s;
 
 	GIANT_REQUIRED;
 
 	KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)),
 	    ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
 
 	s = splbio();
 
 	if (bp->b_iocmd == BIO_WRITE &&
 	    (bp->b_ioflags & BIO_ERROR) &&
 	    !(bp->b_flags & B_INVAL)) {
 		/*
 		 * Failed write, redirty.  Must clear BIO_ERROR to prevent
 		 * pages from being scrapped.  If B_INVAL is set then
 		 * this case is not run and the next case is run to 
 		 * destroy the buffer.  B_INVAL can occur if the buffer
 		 * is outside the range supported by the underlying device.
 		 */
 		bp->b_ioflags &= ~BIO_ERROR;
 		bdirty(bp);
 	} else if ((bp->b_flags & (B_NOCACHE | B_INVAL)) ||
 	    (bp->b_ioflags & BIO_ERROR) ||
 	    bp->b_iocmd == BIO_DELETE || (bp->b_bufsize <= 0)) {
 		/*
 		 * Either a failed I/O or we were asked to free or not
 		 * cache the buffer.
 		 */
 		bp->b_flags |= B_INVAL;
 		if (LIST_FIRST(&bp->b_dep) != NULL)
 			buf_deallocate(bp);
 		if (bp->b_flags & B_DELWRI) {
 			atomic_subtract_int(&numdirtybuffers, 1);
 			numdirtywakeup(lodirtybuffers);
 		}
 		bp->b_flags &= ~(B_DELWRI | B_CACHE);
 		if ((bp->b_flags & B_VMIO) == 0) {
 			if (bp->b_bufsize)
 				allocbuf(bp, 0);
 			if (bp->b_vp)
 				brelvp(bp);
 		}
 	}
 
 	/*
 	 * We must clear B_RELBUF if B_DELWRI is set.  If vfs_vmio_release() 
 	 * is called with B_DELWRI set, the underlying pages may wind up
 	 * getting freed causing a previous write (bdwrite()) to get 'lost'
 	 * because pages associated with a B_DELWRI bp are marked clean.
 	 * 
 	 * We still allow the B_INVAL case to call vfs_vmio_release(), even
 	 * if B_DELWRI is set.
 	 *
 	 * If B_DELWRI is not set we may have to set B_RELBUF if we are low
 	 * on pages to return pages to the VM page queues.
 	 */
 	if (bp->b_flags & B_DELWRI)
 		bp->b_flags &= ~B_RELBUF;
 	else if (vm_page_count_severe()) {
 		/*
 		 * XXX This lock may not be necessary since BKGRDINPROG
 		 * cannot be set while we hold the buf lock, it can only be
 		 * cleared if it is already pending.
 		 */
 		if (bp->b_vp) {
 			VI_LOCK(bp->b_vp);
 			if (!(bp->b_vflags & BV_BKGRDINPROG))
 				bp->b_flags |= B_RELBUF;
 			VI_UNLOCK(bp->b_vp);
 		} else
 			bp->b_flags |= B_RELBUF;
 	}
 
 	/*
 	 * VMIO buffer rundown.  It is not very necessary to keep a VMIO buffer
 	 * constituted, not even NFS buffers now.  Two flags effect this.  If
 	 * B_INVAL, the struct buf is invalidated but the VM object is kept
 	 * around ( i.e. so it is trivial to reconstitute the buffer later ).
 	 *
 	 * If BIO_ERROR or B_NOCACHE is set, pages in the VM object will be
 	 * invalidated.  BIO_ERROR cannot be set for a failed write unless the
 	 * buffer is also B_INVAL because it hits the re-dirtying code above.
 	 *
 	 * Normally we can do this whether a buffer is B_DELWRI or not.  If
 	 * the buffer is an NFS buffer, it is tracking piecemeal writes or
 	 * the commit state and we cannot afford to lose the buffer. If the
 	 * buffer has a background write in progress, we need to keep it
 	 * around to prevent it from being reconstituted and starting a second
 	 * background write.
 	 */
 	if ((bp->b_flags & B_VMIO)
 	    && !(bp->b_vp->v_mount != NULL &&
 		 (bp->b_vp->v_mount->mnt_vfc->vfc_flags & VFCF_NETWORK) != 0 &&
 		 !vn_isdisk(bp->b_vp, NULL) &&
 		 (bp->b_flags & B_DELWRI))
 	    ) {
 
 		int i, j, resid;
 		vm_page_t m;
 		off_t foff;
 		vm_pindex_t poff;
 		vm_object_t obj;
 		struct vnode *vp;
 
 		vp = bp->b_vp;
 		obj = bp->b_object;
 
 		/*
 		 * Get the base offset and length of the buffer.  Note that 
 		 * in the VMIO case if the buffer block size is not
 		 * page-aligned then b_data pointer may not be page-aligned.
 		 * But our b_pages[] array *IS* page aligned.
 		 *
 		 * block sizes less then DEV_BSIZE (usually 512) are not 
 		 * supported due to the page granularity bits (m->valid,
 		 * m->dirty, etc...). 
 		 *
 		 * See man buf(9) for more information
 		 */
 		resid = bp->b_bufsize;
 		foff = bp->b_offset;
 		VM_OBJECT_LOCK(obj);
 		for (i = 0; i < bp->b_npages; i++) {
 			int had_bogus = 0;
 
 			m = bp->b_pages[i];
 			vm_page_lock_queues();
 			vm_page_flag_clear(m, PG_ZERO);
 			vm_page_unlock_queues();
 
 			/*
 			 * If we hit a bogus page, fixup *all* the bogus pages
 			 * now.
 			 */
 			if (m == bogus_page) {
 				poff = OFF_TO_IDX(bp->b_offset);
 				had_bogus = 1;
 
 				for (j = i; j < bp->b_npages; j++) {
 					vm_page_t mtmp;
 					mtmp = bp->b_pages[j];
 					if (mtmp == bogus_page) {
 						mtmp = vm_page_lookup(obj, poff + j);
 						if (!mtmp) {
 							panic("brelse: page missing\n");
 						}
 						bp->b_pages[j] = mtmp;
 					}
 				}
 
 				if ((bp->b_flags & B_INVAL) == 0) {
 					pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
 				}
 				m = bp->b_pages[i];
 			}
 			if ((bp->b_flags & B_NOCACHE) || (bp->b_ioflags & BIO_ERROR)) {
 				int poffset = foff & PAGE_MASK;
 				int presid = resid > (PAGE_SIZE - poffset) ?
 					(PAGE_SIZE - poffset) : resid;
 
 				KASSERT(presid >= 0, ("brelse: extra page"));
 				vm_page_lock_queues();
 				vm_page_set_invalid(m, poffset, presid);
 				vm_page_unlock_queues();
 				if (had_bogus)
 					printf("avoided corruption bug in bogus_page/brelse code\n");
 			}
 			resid -= PAGE_SIZE - (foff & PAGE_MASK);
 			foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 		}
 		VM_OBJECT_UNLOCK(obj);
 		if (bp->b_flags & (B_INVAL | B_RELBUF))
 			vfs_vmio_release(bp);
 
 	} else if (bp->b_flags & B_VMIO) {
 
 		if (bp->b_flags & (B_INVAL | B_RELBUF)) {
 			vfs_vmio_release(bp);
 		}
 
 	}
 			
 	if (bp->b_qindex != QUEUE_NONE)
 		panic("brelse: free buffer onto another queue???");
 	if (BUF_REFCNT(bp) > 1) {
 		/* do not release to free list */
 		BUF_UNLOCK(bp);
 		splx(s);
 		return;
 	}
 
 	/* enqueue */
 	mtx_lock(&bqlock);
 
 	/* buffers with no memory */
 	if (bp->b_bufsize == 0) {
 		bp->b_flags |= B_INVAL;
 		bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA);
 		if (bp->b_vflags & BV_BKGRDINPROG)
 			panic("losing buffer 1");
 		if (bp->b_kvasize) {
 			bp->b_qindex = QUEUE_EMPTYKVA;
 		} else {
 			bp->b_qindex = QUEUE_EMPTY;
 		}
 		TAILQ_INSERT_HEAD(&bufqueues[bp->b_qindex], bp, b_freelist);
 		bp->b_dev = NODEV;
 	/* buffers with junk contents */
 	} else if (bp->b_flags & (B_INVAL | B_NOCACHE | B_RELBUF) ||
 	    (bp->b_ioflags & BIO_ERROR)) {
 		bp->b_flags |= B_INVAL;
 		bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA);
 		if (bp->b_vflags & BV_BKGRDINPROG)
 			panic("losing buffer 2");
 		bp->b_qindex = QUEUE_CLEAN;
 		TAILQ_INSERT_HEAD(&bufqueues[QUEUE_CLEAN], bp, b_freelist);
 		bp->b_dev = NODEV;
 	/* remaining buffers */
 	} else {
 		if (bp->b_flags & B_DELWRI)
 			bp->b_qindex = QUEUE_DIRTY;
 		else
 			bp->b_qindex = QUEUE_CLEAN;
 		if (bp->b_flags & B_AGE)
 			TAILQ_INSERT_HEAD(&bufqueues[bp->b_qindex], bp, b_freelist);
 		else
 			TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist);
 	}
 	mtx_unlock(&bqlock);
 
 	/*
 	 * If B_INVAL and B_DELWRI is set, clear B_DELWRI.  We have already
 	 * placed the buffer on the correct queue.  We must also disassociate
 	 * the device and vnode for a B_INVAL buffer so gbincore() doesn't
 	 * find it.
 	 */
 	if (bp->b_flags & B_INVAL) {
 		if (bp->b_flags & B_DELWRI)
 			bundirty(bp);
 		if (bp->b_vp)
 			brelvp(bp);
 	}
 
 	/*
 	 * Fixup numfreebuffers count.  The bp is on an appropriate queue
 	 * unless locked.  We then bump numfreebuffers if it is not B_DELWRI.
 	 * We've already handled the B_INVAL case ( B_DELWRI will be clear
 	 * if B_INVAL is set ).
 	 */
 
 	if (!(bp->b_flags & B_DELWRI))
 		bufcountwakeup();
 
 	/*
 	 * Something we can maybe free or reuse
 	 */
 	if (bp->b_bufsize || bp->b_kvasize)
 		bufspacewakeup();
 
 	bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF | B_DIRECT);
 	if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
 		panic("brelse: not dirty");
 	/* unlock */
 	BUF_UNLOCK(bp);
 	splx(s);
 }
 
 /*
  * Release a buffer back to the appropriate queue but do not try to free
  * it.  The buffer is expected to be used again soon.
  *
  * bqrelse() is used by bdwrite() to requeue a delayed write, and used by
  * biodone() to requeue an async I/O on completion.  It is also used when
  * known good buffers need to be requeued but we think we may need the data
  * again soon.
  *
  * XXX we should be able to leave the B_RELBUF hint set on completion.
  */
 void
 bqrelse(struct buf * bp)
 {
 	int s;
 
 	s = splbio();
 
 	KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("bqrelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
 
 	if (bp->b_qindex != QUEUE_NONE)
 		panic("bqrelse: free buffer onto another queue???");
 	if (BUF_REFCNT(bp) > 1) {
 		/* do not release to free list */
 		BUF_UNLOCK(bp);
 		splx(s);
 		return;
 	}
 	mtx_lock(&bqlock);
 	/* buffers with stale but valid contents */
 	if (bp->b_flags & B_DELWRI) {
 		bp->b_qindex = QUEUE_DIRTY;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_DIRTY], bp, b_freelist);
 	} else {
 		/*
 		 * XXX This lock may not be necessary since BKGRDINPROG
 		 * cannot be set while we hold the buf lock, it can only be
 		 * cleared if it is already pending.
 		 */
 		VI_LOCK(bp->b_vp);
 		if (!vm_page_count_severe() || bp->b_vflags & BV_BKGRDINPROG) {
 			VI_UNLOCK(bp->b_vp);
 			bp->b_qindex = QUEUE_CLEAN;
 			TAILQ_INSERT_TAIL(&bufqueues[QUEUE_CLEAN], bp,
 			    b_freelist);
 		} else {
 			/*
 			 * We are too low on memory, we have to try to free
 			 * the buffer (most importantly: the wired pages
 			 * making up its backing store) *now*.
 			 */
 			VI_UNLOCK(bp->b_vp);
 			mtx_unlock(&bqlock);
 			splx(s);
 			brelse(bp);
 			return;
 		}
 	}
 	mtx_unlock(&bqlock);
 
 	if ((bp->b_flags & B_INVAL) || !(bp->b_flags & B_DELWRI))
 		bufcountwakeup();
 
 	/*
 	 * Something we can maybe free or reuse.
 	 */
 	if (bp->b_bufsize && !(bp->b_flags & B_DELWRI))
 		bufspacewakeup();
 
 	bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
 	if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
 		panic("bqrelse: not dirty");
 	/* unlock */
 	BUF_UNLOCK(bp);
 	splx(s);
 }
 
 /* Give pages used by the bp back to the VM system (where possible) */
 static void
 vfs_vmio_release(bp)
 	struct buf *bp;
 {
 	int i;
 	vm_page_t m;
 
 	GIANT_REQUIRED;
 	if (bp->b_object != NULL)
 		VM_OBJECT_LOCK(bp->b_object);
 	vm_page_lock_queues();
 	for (i = 0; i < bp->b_npages; i++) {
 		m = bp->b_pages[i];
 		bp->b_pages[i] = NULL;
 		/*
 		 * In order to keep page LRU ordering consistent, put
 		 * everything on the inactive queue.
 		 */
 		vm_page_unwire(m, 0);
 		/*
 		 * We don't mess with busy pages, it is
 		 * the responsibility of the process that
 		 * busied the pages to deal with them.
 		 */
 		if ((m->flags & PG_BUSY) || (m->busy != 0))
 			continue;
 			
 		if (m->wire_count == 0) {
 			vm_page_flag_clear(m, PG_ZERO);
 			/*
 			 * Might as well free the page if we can and it has
 			 * no valid data.  We also free the page if the
 			 * buffer was used for direct I/O
 			 */
 			if ((bp->b_flags & B_ASYNC) == 0 && !m->valid &&
 			    m->hold_count == 0) {
 				vm_page_busy(m);
 				pmap_remove_all(m);
 				vm_page_free(m);
 			} else if (bp->b_flags & B_DIRECT) {
 				vm_page_try_to_free(m);
 			} else if (vm_page_count_severe()) {
 				vm_page_try_to_cache(m);
 			}
 		}
 	}
 	vm_page_unlock_queues();
 	if (bp->b_object != NULL)
 		VM_OBJECT_UNLOCK(bp->b_object);
 	pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
 	
 	if (bp->b_bufsize) {
 		bufspacewakeup();
 		bp->b_bufsize = 0;
 	}
 	bp->b_npages = 0;
 	bp->b_flags &= ~B_VMIO;
 	if (bp->b_vp)
 		brelvp(bp);
 }
 
 /*
  * Check to see if a block at a particular lbn is available for a clustered
  * write.
  */
 static int
 vfs_bio_clcheck(struct vnode *vp, int size, daddr_t lblkno, daddr_t blkno)
 {
 	struct buf *bpa;
 	int match;
 
 	match = 0;
 
 	/* If the buf isn't in core skip it */
 	if ((bpa = gbincore(vp, lblkno)) == NULL)
 		return (0);
 
 	/* If the buf is busy we don't want to wait for it */
 	if (BUF_LOCK(bpa, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
 		return (0);
 
 	/* Only cluster with valid clusterable delayed write buffers */
 	if ((bpa->b_flags & (B_DELWRI | B_CLUSTEROK | B_INVAL)) !=
 	    (B_DELWRI | B_CLUSTEROK))
 		goto done;
 
 	if (bpa->b_bufsize != size)
 		goto done;
 
 	/*
 	 * Check to see if it is in the expected place on disk and that the
 	 * block has been mapped.
 	 */
 	if ((bpa->b_blkno != bpa->b_lblkno) && (bpa->b_blkno == blkno))
 		match = 1;
 done:
 	BUF_UNLOCK(bpa);
 	return (match);
 }
 
 /*
  *	vfs_bio_awrite:
  *
  *	Implement clustered async writes for clearing out B_DELWRI buffers.
  *	This is much better then the old way of writing only one buffer at
  *	a time.  Note that we may not be presented with the buffers in the 
  *	correct order, so we search for the cluster in both directions.
  */
 int
 vfs_bio_awrite(struct buf * bp)
 {
 	int i;
 	int j;
 	daddr_t lblkno = bp->b_lblkno;
 	struct vnode *vp = bp->b_vp;
 	int s;
 	int ncl;
 	int nwritten;
 	int size;
 	int maxcl;
 
 	s = splbio();
 	/*
 	 * right now we support clustered writing only to regular files.  If
 	 * we find a clusterable block we could be in the middle of a cluster
 	 * rather then at the beginning.
 	 */
 	if ((vp->v_type == VREG) && 
 	    (vp->v_mount != 0) && /* Only on nodes that have the size info */
 	    (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) {
 
 		size = vp->v_mount->mnt_stat.f_iosize;
 		maxcl = MAXPHYS / size;
 
 		VI_LOCK(vp);
 		for (i = 1; i < maxcl; i++)
 			if (vfs_bio_clcheck(vp, size, lblkno + i,
 			    bp->b_blkno + ((i * size) >> DEV_BSHIFT)) == 0)
 				break;
 
 		for (j = 1; i + j <= maxcl && j <= lblkno; j++) 
 			if (vfs_bio_clcheck(vp, size, lblkno - j,
 			    bp->b_blkno - ((j * size) >> DEV_BSHIFT)) == 0)
 				break;
 
 		VI_UNLOCK(vp);
 		--j;
 		ncl = i + j;
 		/*
 		 * this is a possible cluster write
 		 */
 		if (ncl != 1) {
 			BUF_UNLOCK(bp);
 			nwritten = cluster_wbuild(vp, size, lblkno - j, ncl);
 			splx(s);
 			return nwritten;
 		}
 	}
 
 	bremfree(bp);
 	bp->b_flags |= B_ASYNC;
 
 	splx(s);
 	/*
 	 * default (old) behavior, writing out only one block
 	 *
 	 * XXX returns b_bufsize instead of b_bcount for nwritten?
 	 */
 	nwritten = bp->b_bufsize;
 	(void) BUF_WRITE(bp);
 
 	return nwritten;
 }
 
 /*
  *	getnewbuf:
  *
  *	Find and initialize a new buffer header, freeing up existing buffers 
  *	in the bufqueues as necessary.  The new buffer is returned locked.
  *
  *	Important:  B_INVAL is not set.  If the caller wishes to throw the
  *	buffer away, the caller must set B_INVAL prior to calling brelse().
  *
  *	We block if:
  *		We have insufficient buffer headers
  *		We have insufficient buffer space
  *		buffer_map is too fragmented ( space reservation fails )
  *		If we have to flush dirty buffers ( but we try to avoid this )
  *
  *	To avoid VFS layer recursion we do not flush dirty buffers ourselves.
  *	Instead we ask the buf daemon to do it for us.  We attempt to
  *	avoid piecemeal wakeups of the pageout daemon.
  */
 
 static struct buf *
 getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
 {
 	struct buf *bp;
 	struct buf *nbp;
 	int defrag = 0;
 	int nqindex;
 	static int flushingbufs;
 
 	GIANT_REQUIRED;
 
 	/*
 	 * We can't afford to block since we might be holding a vnode lock,
 	 * which may prevent system daemons from running.  We deal with
 	 * low-memory situations by proactively returning memory and running
 	 * async I/O rather then sync I/O.
 	 */
 
 	atomic_add_int(&getnewbufcalls, 1);
 	atomic_subtract_int(&getnewbufrestarts, 1);
 restart:
 	atomic_add_int(&getnewbufrestarts, 1);
 
 	/*
 	 * Setup for scan.  If we do not have enough free buffers,
 	 * we setup a degenerate case that immediately fails.  Note
 	 * that if we are specially marked process, we are allowed to
 	 * dip into our reserves.
 	 *
 	 * The scanning sequence is nominally:  EMPTY->EMPTYKVA->CLEAN
 	 *
 	 * We start with EMPTYKVA.  If the list is empty we backup to EMPTY.
 	 * However, there are a number of cases (defragging, reusing, ...)
 	 * where we cannot backup.
 	 */
 	mtx_lock(&bqlock);
 	nqindex = QUEUE_EMPTYKVA;
 	nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA]);
 
 	if (nbp == NULL) {
 		/*
 		 * If no EMPTYKVA buffers and we are either
 		 * defragging or reusing, locate a CLEAN buffer
 		 * to free or reuse.  If bufspace useage is low
 		 * skip this step so we can allocate a new buffer.
 		 */
 		if (defrag || bufspace >= lobufspace) {
 			nqindex = QUEUE_CLEAN;
 			nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]);
 		}
 
 		/*
 		 * If we could not find or were not allowed to reuse a
 		 * CLEAN buffer, check to see if it is ok to use an EMPTY
 		 * buffer.  We can only use an EMPTY buffer if allocating
 		 * its KVA would not otherwise run us out of buffer space.
 		 */
 		if (nbp == NULL && defrag == 0 &&
 		    bufspace + maxsize < hibufspace) {
 			nqindex = QUEUE_EMPTY;
 			nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]);
 		}
 	}
 
 	/*
 	 * Run scan, possibly freeing data and/or kva mappings on the fly
 	 * depending.
 	 */
 
 	while ((bp = nbp) != NULL) {
 		int qindex = nqindex;
 
 		/*
 		 * Calculate next bp ( we can only use it if we do not block
 		 * or do other fancy things ).
 		 */
 		if ((nbp = TAILQ_NEXT(bp, b_freelist)) == NULL) {
 			switch(qindex) {
 			case QUEUE_EMPTY:
 				nqindex = QUEUE_EMPTYKVA;
 				if ((nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA])))
 					break;
 				/* FALLTHROUGH */
 			case QUEUE_EMPTYKVA:
 				nqindex = QUEUE_CLEAN;
 				if ((nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN])))
 					break;
 				/* FALLTHROUGH */
 			case QUEUE_CLEAN:
 				/*
 				 * nbp is NULL. 
 				 */
 				break;
 			}
 		}
 		if (bp->b_vp) {
 			VI_LOCK(bp->b_vp);
 			if (bp->b_vflags & BV_BKGRDINPROG) {
 				VI_UNLOCK(bp->b_vp);
 				continue;
 			}
 			VI_UNLOCK(bp->b_vp);
 		}
 
 		/*
 		 * Sanity Checks
 		 */
 		KASSERT(bp->b_qindex == qindex, ("getnewbuf: inconsistant queue %d bp %p", qindex, bp));
 
 		/*
 		 * Note: we no longer distinguish between VMIO and non-VMIO
 		 * buffers.
 		 */
 
 		KASSERT((bp->b_flags & B_DELWRI) == 0, ("delwri buffer %p found in queue %d", bp, qindex));
 
 		/*
 		 * If we are defragging then we need a buffer with 
 		 * b_kvasize != 0.  XXX this situation should no longer
 		 * occur, if defrag is non-zero the buffer's b_kvasize
 		 * should also be non-zero at this point.  XXX
 		 */
 		if (defrag && bp->b_kvasize == 0) {
 			printf("Warning: defrag empty buffer %p\n", bp);
 			continue;
 		}
 
 		/*
 		 * Start freeing the bp.  This is somewhat involved.  nbp
 		 * remains valid only for QUEUE_EMPTY[KVA] bp's.
 		 */
 
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
 			panic("getnewbuf: locked buf");
 		bremfreel(bp);
 		mtx_unlock(&bqlock);
 
 		if (qindex == QUEUE_CLEAN) {
 			if (bp->b_flags & B_VMIO) {
 				bp->b_flags &= ~B_ASYNC;
 				vfs_vmio_release(bp);
 			}
 			if (bp->b_vp)
 				brelvp(bp);
 		}
 
 		/*
 		 * NOTE:  nbp is now entirely invalid.  We can only restart
 		 * the scan from this point on.
 		 *
 		 * Get the rest of the buffer freed up.  b_kva* is still
 		 * valid after this operation.
 		 */
 
 		if (bp->b_rcred != NOCRED) {
 			crfree(bp->b_rcred);
 			bp->b_rcred = NOCRED;
 		}
 		if (bp->b_wcred != NOCRED) {
 			crfree(bp->b_wcred);
 			bp->b_wcred = NOCRED;
 		}
 		if (LIST_FIRST(&bp->b_dep) != NULL)
 			buf_deallocate(bp);
 		if (bp->b_vflags & BV_BKGRDINPROG)
 			panic("losing buffer 3");
 
 		if (bp->b_bufsize)
 			allocbuf(bp, 0);
 
 		bp->b_flags = 0;
 		bp->b_ioflags = 0;
 		bp->b_xflags = 0;
 		bp->b_vflags = 0;
 		bp->b_dev = NODEV;
 		bp->b_vp = NULL;
 		bp->b_blkno = bp->b_lblkno = 0;
 		bp->b_offset = NOOFFSET;
 		bp->b_iodone = 0;
 		bp->b_error = 0;
 		bp->b_resid = 0;
 		bp->b_bcount = 0;
 		bp->b_npages = 0;
 		bp->b_dirtyoff = bp->b_dirtyend = 0;
 		bp->b_magic = B_MAGIC_BIO;
 		bp->b_op = &buf_ops_bio;
 		bp->b_object = NULL;
 
 		LIST_INIT(&bp->b_dep);
 
 		/*
 		 * If we are defragging then free the buffer.
 		 */
 		if (defrag) {
 			bp->b_flags |= B_INVAL;
 			bfreekva(bp);
 			brelse(bp);
 			defrag = 0;
 			goto restart;
 		}
 
 		/*
 		 * If we are overcomitted then recover the buffer and its
 		 * KVM space.  This occurs in rare situations when multiple
 		 * processes are blocked in getnewbuf() or allocbuf().
 		 */
 		if (bufspace >= hibufspace)
 			flushingbufs = 1;
 		if (flushingbufs && bp->b_kvasize != 0) {
 			bp->b_flags |= B_INVAL;
 			bfreekva(bp);
 			brelse(bp);
 			goto restart;
 		}
 		if (bufspace < lobufspace)
 			flushingbufs = 0;
 		break;
 	}
 
 	/*
 	 * If we exhausted our list, sleep as appropriate.  We may have to
 	 * wakeup various daemons and write out some dirty buffers.
 	 *
 	 * Generally we are sleeping due to insufficient buffer space.
 	 */
 
 	if (bp == NULL) {
 		int flags;
 		char *waitmsg;
 
 		mtx_unlock(&bqlock);
 		if (defrag) {
 			flags = VFS_BIO_NEED_BUFSPACE;
 			waitmsg = "nbufkv";
 		} else if (bufspace >= hibufspace) {
 			waitmsg = "nbufbs";
 			flags = VFS_BIO_NEED_BUFSPACE;
 		} else {
 			waitmsg = "newbuf";
 			flags = VFS_BIO_NEED_ANY;
 		}
 
 		bd_speedup();	/* heeeelp */
 
 		mtx_lock(&nblock);
 		needsbuffer |= flags;
 		while (needsbuffer & flags) {
 			if (msleep(&needsbuffer, &nblock,
 			    (PRIBIO + 4) | slpflag, waitmsg, slptimeo)) {
 				mtx_unlock(&nblock);
 				return (NULL);
 			}
 		}
 		mtx_unlock(&nblock);
 	} else {
 		/*
 		 * We finally have a valid bp.  We aren't quite out of the
 		 * woods, we still have to reserve kva space.  In order
 		 * to keep fragmentation sane we only allocate kva in
 		 * BKVASIZE chunks.
 		 */
 		maxsize = (maxsize + BKVAMASK) & ~BKVAMASK;
 
 		if (maxsize != bp->b_kvasize) {
 			vm_offset_t addr = 0;
 
 			bfreekva(bp);
 
 			if (vm_map_findspace(buffer_map,
 				vm_map_min(buffer_map), maxsize, &addr)) {
 				/*
 				 * Uh oh.  Buffer map is to fragmented.  We
 				 * must defragment the map.
 				 */
 				atomic_add_int(&bufdefragcnt, 1);
 				defrag = 1;
 				bp->b_flags |= B_INVAL;
 				brelse(bp);
 				goto restart;
 			}
 			if (addr) {
 				vm_map_insert(buffer_map, NULL, 0,
 					addr, addr + maxsize,
 					VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
 
 				bp->b_kvabase = (caddr_t) addr;
 				bp->b_kvasize = maxsize;
 				atomic_add_int(&bufspace, bp->b_kvasize);
 				atomic_add_int(&bufreusecnt, 1);
 			}
 		}
 		bp->b_saveaddr = bp->b_kvabase;
 		bp->b_data = bp->b_saveaddr;
 	}
 	return(bp);
 }
 
 /*
  *	buf_daemon:
  *
  *	buffer flushing daemon.  Buffers are normally flushed by the
  *	update daemon but if it cannot keep up this process starts to
  *	take the load in an attempt to prevent getnewbuf() from blocking.
  */
 
 static struct kproc_desc buf_kp = {
 	"bufdaemon",
 	buf_daemon,
 	&bufdaemonproc
 };
 SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp)
 
 static void
 buf_daemon()
 {
 	int s;
 
 	mtx_lock(&Giant);
 
 	/*
 	 * This process needs to be suspended prior to shutdown sync.
 	 */
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, bufdaemonproc,
 	    SHUTDOWN_PRI_LAST);
 
 	/*
 	 * This process is allowed to take the buffer cache to the limit
 	 */
 	s = splbio();
 	mtx_lock(&bdlock);
 
 	for (;;) {
 		bd_request = 0;
 		mtx_unlock(&bdlock);
 
 		kthread_suspend_check(bufdaemonproc);
 
 		/*
 		 * Do the flush.  Limit the amount of in-transit I/O we
 		 * allow to build up, otherwise we would completely saturate
 		 * the I/O system.  Wakeup any waiting processes before we
 		 * normally would so they can run in parallel with our drain.
 		 */
 		while (numdirtybuffers > lodirtybuffers) {
 			if (flushbufqueues(0) == 0) {
 				/*
 				 * Could not find any buffers without rollback
 				 * dependencies, so just write the first one
 				 * in the hopes of eventually making progress.
 				 */
 				flushbufqueues(1);
 				break;
 			}
 			waitrunningbufspace();
 			numdirtywakeup((lodirtybuffers + hidirtybuffers) / 2);
 		}
 
 		/*
 		 * Only clear bd_request if we have reached our low water
 		 * mark.  The buf_daemon normally waits 1 second and
 		 * then incrementally flushes any dirty buffers that have
 		 * built up, within reason.
 		 *
 		 * If we were unable to hit our low water mark and couldn't
 		 * find any flushable buffers, we sleep half a second.
 		 * Otherwise we loop immediately.
 		 */
 		mtx_lock(&bdlock);
 		if (numdirtybuffers <= lodirtybuffers) {
 			/*
 			 * We reached our low water mark, reset the
 			 * request and sleep until we are needed again.
 			 * The sleep is just so the suspend code works.
 			 */
 			bd_request = 0;
 			msleep(&bd_request, &bdlock, PVM, "psleep", hz);
 		} else {
 			/*
 			 * We couldn't find any flushable dirty buffers but
 			 * still have too many dirty buffers, we
 			 * have to sleep and try again.  (rare)
 			 */
 			msleep(&bd_request, &bdlock, PVM, "qsleep", hz / 10);
 		}
 	}
 }
 
 /*
  *	flushbufqueues:
  *
  *	Try to flush a buffer in the dirty queue.  We must be careful to
  *	free up B_INVAL buffers instead of write them, which NFS is 
  *	particularly sensitive to.
  */
 int flushwithdeps = 0;
 SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps, CTLFLAG_RW, &flushwithdeps,
     0, "Number of buffers flushed with dependecies that require rollbacks");
 static int
 flushbufqueues(int flushdeps)
 {
 	struct thread *td = curthread;
 	struct vnode *vp;
 	struct mount *mp;
 	struct buf *bp;
 	int hasdeps;
 
 	mtx_lock(&bqlock);
 	TAILQ_FOREACH(bp, &bufqueues[QUEUE_DIRTY], b_freelist) {
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
 			continue;
 		KASSERT((bp->b_flags & B_DELWRI),
 		    ("unexpected clean buffer %p", bp));
 		VI_LOCK(bp->b_vp);
 		if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {
 			VI_UNLOCK(bp->b_vp);
 			BUF_UNLOCK(bp);
 			continue;
 		}
 		VI_UNLOCK(bp->b_vp);
 		if (bp->b_flags & B_INVAL) {
 			bremfreel(bp);
 			mtx_unlock(&bqlock);
 			brelse(bp);
 			return (1);
 		}
 
 		if (LIST_FIRST(&bp->b_dep) != NULL && buf_countdeps(bp, 0)) {
 			if (flushdeps == 0) {
 				BUF_UNLOCK(bp);
 				continue;
 			}
 			hasdeps = 1;
 		} else
 			hasdeps = 0;
 		/*
 		 * We must hold the lock on a vnode before writing
 		 * one of its buffers. Otherwise we may confuse, or
 		 * in the case of a snapshot vnode, deadlock the
 		 * system.
 		 *
 		 * The lock order here is the reverse of the normal
 		 * of vnode followed by buf lock.  This is ok because
 		 * the NOWAIT will prevent deadlock.
 		 */
 		vp = bp->b_vp;
 		if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
 			BUF_UNLOCK(bp);
 			continue;
 		}
 		if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT, td) == 0) {
 			mtx_unlock(&bqlock);
 			vfs_bio_awrite(bp);
 			vn_finished_write(mp);
 			VOP_UNLOCK(vp, 0, td);
 			flushwithdeps += hasdeps;
 			return (1);
 		}
 		vn_finished_write(mp);
 		BUF_UNLOCK(bp);
 	}
 	mtx_unlock(&bqlock);
 	return (0);
 }
 
 /*
  * Check to see if a block is currently memory resident.
  */
 struct buf *
 incore(struct vnode * vp, daddr_t blkno)
 {
 	struct buf *bp;
 
 	int s = splbio();
 	VI_LOCK(vp);
 	bp = gbincore(vp, blkno);
 	VI_UNLOCK(vp);
 	splx(s);
 	return (bp);
 }
 
 /*
  * Returns true if no I/O is needed to access the
  * associated VM object.  This is like incore except
  * it also hunts around in the VM system for the data.
  */
 
 int
 inmem(struct vnode * vp, daddr_t blkno)
 {
 	vm_object_t obj;
 	vm_offset_t toff, tinc, size;
 	vm_page_t m;
 	vm_ooffset_t off;
 
 	GIANT_REQUIRED;
 	ASSERT_VOP_LOCKED(vp, "inmem");
 
 	if (incore(vp, blkno))
 		return 1;
 	if (vp->v_mount == NULL)
 		return 0;
 	if (VOP_GETVOBJECT(vp, &obj) != 0 || (vp->v_vflag & VV_OBJBUF) == 0)
 		return 0;
 
 	size = PAGE_SIZE;
 	if (size > vp->v_mount->mnt_stat.f_iosize)
 		size = vp->v_mount->mnt_stat.f_iosize;
 	off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize;
 
 	VM_OBJECT_LOCK(obj);
 	for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
 		m = vm_page_lookup(obj, OFF_TO_IDX(off + toff));
 		if (!m)
 			goto notinmem;
 		tinc = size;
 		if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK))
 			tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);
 		if (vm_page_is_valid(m,
 		    (vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0)
 			goto notinmem;
 	}
 	VM_OBJECT_UNLOCK(obj);
 	return 1;
 
 notinmem:
 	VM_OBJECT_UNLOCK(obj);
 	return (0);
 }
 
 /*
  *	vfs_setdirty:
  *
  *	Sets the dirty range for a buffer based on the status of the dirty
  *	bits in the pages comprising the buffer.
  *
  *	The range is limited to the size of the buffer.
  *
  *	This routine is primarily used by NFS, but is generalized for the
  *	B_VMIO case.
  */
 static void
 vfs_setdirty(struct buf *bp) 
 {
 	int i;
 	vm_object_t object;
 
 	GIANT_REQUIRED;
 	/*
 	 * Degenerate case - empty buffer
 	 */
 
 	if (bp->b_bufsize == 0)
 		return;
 
 	/*
 	 * We qualify the scan for modified pages on whether the
 	 * object has been flushed yet.  The OBJ_WRITEABLE flag
 	 * is not cleared simply by protecting pages off.
 	 */
 
 	if ((bp->b_flags & B_VMIO) == 0)
 		return;
 
 	object = bp->b_pages[0]->object;
 	VM_OBJECT_LOCK(object);
 	if ((object->flags & OBJ_WRITEABLE) && !(object->flags & OBJ_MIGHTBEDIRTY))
 		printf("Warning: object %p writeable but not mightbedirty\n", object);
 	if (!(object->flags & OBJ_WRITEABLE) && (object->flags & OBJ_MIGHTBEDIRTY))
 		printf("Warning: object %p mightbedirty but not writeable\n", object);
 
 	if (object->flags & (OBJ_MIGHTBEDIRTY|OBJ_CLEANING)) {
 		vm_offset_t boffset;
 		vm_offset_t eoffset;
 
 		vm_page_lock_queues();
 		/*
 		 * test the pages to see if they have been modified directly
 		 * by users through the VM system.
 		 */
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_flag_clear(bp->b_pages[i], PG_ZERO);
 			vm_page_test_dirty(bp->b_pages[i]);
 		}
 
 		/*
 		 * Calculate the encompassing dirty range, boffset and eoffset,
 		 * (eoffset - boffset) bytes.
 		 */
 
 		for (i = 0; i < bp->b_npages; i++) {
 			if (bp->b_pages[i]->dirty)
 				break;
 		}
 		boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
 
 		for (i = bp->b_npages - 1; i >= 0; --i) {
 			if (bp->b_pages[i]->dirty) {
 				break;
 			}
 		}
 		eoffset = ((i + 1) << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
 
 		vm_page_unlock_queues();
 		/*
 		 * Fit it to the buffer.
 		 */
 
 		if (eoffset > bp->b_bcount)
 			eoffset = bp->b_bcount;
 
 		/*
 		 * If we have a good dirty range, merge with the existing
 		 * dirty range.
 		 */
 
 		if (boffset < eoffset) {
 			if (bp->b_dirtyoff > boffset)
 				bp->b_dirtyoff = boffset;
 			if (bp->b_dirtyend < eoffset)
 				bp->b_dirtyend = eoffset;
 		}
 	}
 	VM_OBJECT_UNLOCK(object);
 }
 
 /*
  *	getblk:
  *
  *	Get a block given a specified block and offset into a file/device.
  *	The buffers B_DONE bit will be cleared on return, making it almost
  * 	ready for an I/O initiation.  B_INVAL may or may not be set on 
  *	return.  The caller should clear B_INVAL prior to initiating a
  *	READ.
  *
  *	For a non-VMIO buffer, B_CACHE is set to the opposite of B_INVAL for
  *	an existing buffer.
  *
  *	For a VMIO buffer, B_CACHE is modified according to the backing VM.
  *	If getblk()ing a previously 0-sized invalid buffer, B_CACHE is set
  *	and then cleared based on the backing VM.  If the previous buffer is
  *	non-0-sized but invalid, B_CACHE will be cleared.
  *
  *	If getblk() must create a new buffer, the new buffer is returned with
  *	both B_INVAL and B_CACHE clear unless it is a VMIO buffer, in which
  *	case it is returned with B_INVAL clear and B_CACHE set based on the
  *	backing VM.
  *
  *	getblk() also forces a BUF_WRITE() for any B_DELWRI buffer whos
  *	B_CACHE bit is clear.
  *	
  *	What this means, basically, is that the caller should use B_CACHE to
  *	determine whether the buffer is fully valid or not and should clear
  *	B_INVAL prior to issuing a read.  If the caller intends to validate
  *	the buffer by loading its data area with something, the caller needs
  *	to clear B_INVAL.  If the caller does this without issuing an I/O, 
  *	the caller should set B_CACHE ( as an optimization ), else the caller
  *	should issue the I/O and biodone() will set B_CACHE if the I/O was
  *	a write attempt or if it was a successfull read.  If the caller 
  *	intends to issue a READ, the caller must clear B_INVAL and BIO_ERROR
  *	prior to issuing the READ.  biodone() will *not* clear B_INVAL.
  */
 struct buf *
 getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo,
     int flags)
 {
 	struct buf *bp;
 	int s;
 	int error;
 	ASSERT_VOP_LOCKED(vp, "getblk");
 
 	if (size > MAXBSIZE)
 		panic("getblk: size(%d) > MAXBSIZE(%d)\n", size, MAXBSIZE);
 
 	s = splbio();
 loop:
 	/*
 	 * Block if we are low on buffers.   Certain processes are allowed
 	 * to completely exhaust the buffer cache.
          *
          * If this check ever becomes a bottleneck it may be better to
          * move it into the else, when gbincore() fails.  At the moment
          * it isn't a problem.
 	 *
 	 * XXX remove if 0 sections (clean this up after its proven)
          */
 	if (numfreebuffers == 0) {
 		if (curthread == PCPU_GET(idlethread))
 			return NULL;
 		mtx_lock(&nblock);
 		needsbuffer |= VFS_BIO_NEED_ANY;
 		mtx_unlock(&nblock);
 	}
 
 	VI_LOCK(vp);
 	if ((bp = gbincore(vp, blkno))) {
 		int lockflags;
 		/*
 		 * Buffer is in-core.  If the buffer is not busy, it must
 		 * be on a queue.
 		 */
 		lockflags = LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK;
 
 		if (flags & GB_LOCK_NOWAIT)
 			lockflags |= LK_NOWAIT;
 
 		error = BUF_TIMELOCK(bp, lockflags,
 		    VI_MTX(vp), "getblk", slpflag, slptimeo);
 
 		/*
 		 * If we slept and got the lock we have to restart in case
 		 * the buffer changed identities.
 		 */
 		if (error == ENOLCK)
 			goto loop;
 		/* We timed out or were interrupted. */
 		else if (error)
 			return (NULL);
 
 		/*
 		 * The buffer is locked.  B_CACHE is cleared if the buffer is 
 		 * invalid.  Otherwise, for a non-VMIO buffer, B_CACHE is set
 		 * and for a VMIO buffer B_CACHE is adjusted according to the
 		 * backing VM cache.
 		 */
 		if (bp->b_flags & B_INVAL)
 			bp->b_flags &= ~B_CACHE;
 		else if ((bp->b_flags & (B_VMIO | B_INVAL)) == 0)
 			bp->b_flags |= B_CACHE;
 		bremfree(bp);
 
 		/*
 		 * check for size inconsistancies for non-VMIO case.
 		 */
 
 		if (bp->b_bcount != size) {
 			if ((bp->b_flags & B_VMIO) == 0 ||
 			    (size > bp->b_kvasize)) {
 				if (bp->b_flags & B_DELWRI) {
 					bp->b_flags |= B_NOCACHE;
 					BUF_WRITE(bp);
 				} else {
 					if ((bp->b_flags & B_VMIO) &&
 					   (LIST_FIRST(&bp->b_dep) == NULL)) {
 						bp->b_flags |= B_RELBUF;
 						brelse(bp);
 					} else {
 						bp->b_flags |= B_NOCACHE;
 						BUF_WRITE(bp);
 					}
 				}
 				goto loop;
 			}
 		}
 
 		/*
 		 * If the size is inconsistant in the VMIO case, we can resize
 		 * the buffer.  This might lead to B_CACHE getting set or
 		 * cleared.  If the size has not changed, B_CACHE remains
 		 * unchanged from its previous state.
 		 */
 
 		if (bp->b_bcount != size)
 			allocbuf(bp, size);
 
 		KASSERT(bp->b_offset != NOOFFSET, 
 		    ("getblk: no buffer offset"));
 
 		/*
 		 * A buffer with B_DELWRI set and B_CACHE clear must
 		 * be committed before we can return the buffer in
 		 * order to prevent the caller from issuing a read
 		 * ( due to B_CACHE not being set ) and overwriting
 		 * it.
 		 *
 		 * Most callers, including NFS and FFS, need this to
 		 * operate properly either because they assume they
 		 * can issue a read if B_CACHE is not set, or because
 		 * ( for example ) an uncached B_DELWRI might loop due 
 		 * to softupdates re-dirtying the buffer.  In the latter
 		 * case, B_CACHE is set after the first write completes,
 		 * preventing further loops.
 		 * NOTE!  b*write() sets B_CACHE.  If we cleared B_CACHE
 		 * above while extending the buffer, we cannot allow the
 		 * buffer to remain with B_CACHE set after the write
 		 * completes or it will represent a corrupt state.  To
 		 * deal with this we set B_NOCACHE to scrap the buffer
 		 * after the write.
 		 *
 		 * We might be able to do something fancy, like setting
 		 * B_CACHE in bwrite() except if B_DELWRI is already set,
 		 * so the below call doesn't set B_CACHE, but that gets real
 		 * confusing.  This is much easier.
 		 */
 
 		if ((bp->b_flags & (B_CACHE|B_DELWRI)) == B_DELWRI) {
 			bp->b_flags |= B_NOCACHE;
 			BUF_WRITE(bp);
 			goto loop;
 		}
 
 		splx(s);
 		bp->b_flags &= ~B_DONE;
 	} else {
 		int bsize, maxsize, vmio;
 		off_t offset;
 
 		/*
 		 * Buffer is not in-core, create new buffer.  The buffer
 		 * returned by getnewbuf() is locked.  Note that the returned
 		 * buffer is also considered valid (not marked B_INVAL).
 		 */
 		VI_UNLOCK(vp);
 		/*
 		 * If the user does not want us to create the buffer, bail out
 		 * here.
 		 */
 		if (flags & GB_NOCREAT) {
 			splx(s);
 			return NULL;
 		}
 		if (vn_isdisk(vp, NULL))
 			bsize = DEV_BSIZE;
 		else if (vp->v_mountedhere)
 			bsize = vp->v_mountedhere->mnt_stat.f_iosize;
 		else if (vp->v_mount)
 			bsize = vp->v_mount->mnt_stat.f_iosize;
 		else
 			bsize = size;
 
 		offset = blkno * bsize;
 		vmio = (VOP_GETVOBJECT(vp, NULL) == 0) &&
 		    (vp->v_vflag & VV_OBJBUF);
 		maxsize = vmio ? size + (offset & PAGE_MASK) : size;
 		maxsize = imax(maxsize, bsize);
 
 		if ((bp = getnewbuf(slpflag, slptimeo, size, maxsize)) == NULL) {
 			if (slpflag || slptimeo) {
 				splx(s);
 				return NULL;
 			}
 			goto loop;
 		}
 
 		/*
 		 * This code is used to make sure that a buffer is not
 		 * created while the getnewbuf routine is blocked.
 		 * This can be a problem whether the vnode is locked or not.
 		 * If the buffer is created out from under us, we have to
 		 * throw away the one we just created.  There is now window
 		 * race because we are safely running at splbio() from the
 		 * point of the duplicate buffer creation through to here,
 		 * and we've locked the buffer.
 		 *
 		 * Note: this must occur before we associate the buffer
 		 * with the vp especially considering limitations in
 		 * the splay tree implementation when dealing with duplicate
 		 * lblkno's.
 		 */
 		VI_LOCK(vp);
 		if (gbincore(vp, blkno)) {
 			VI_UNLOCK(vp);
 			bp->b_flags |= B_INVAL;
 			brelse(bp);
 			goto loop;
 		}
 
 		/*
 		 * Insert the buffer into the hash, so that it can
 		 * be found by incore.
 		 */
 		bp->b_blkno = bp->b_lblkno = blkno;
 		bp->b_offset = offset;
 
 		bgetvp(vp, bp);
 		VI_UNLOCK(vp);
 
 		/*
 		 * set B_VMIO bit.  allocbuf() the buffer bigger.  Since the
 		 * buffer size starts out as 0, B_CACHE will be set by
 		 * allocbuf() for the VMIO case prior to it testing the
 		 * backing store for validity.
 		 */
 
 		if (vmio) {
 			bp->b_flags |= B_VMIO;
 #if defined(VFS_BIO_DEBUG)
 			if (vp->v_type != VREG)
 				printf("getblk: vmioing file type %d???\n", vp->v_type);
 #endif
 			VOP_GETVOBJECT(vp, &bp->b_object);
 		} else {
 			bp->b_flags &= ~B_VMIO;
 			bp->b_object = NULL;
 		}
 
 		allocbuf(bp, size);
 
 		splx(s);
 		bp->b_flags &= ~B_DONE;
 	}
 	KASSERT(BUF_REFCNT(bp) == 1, ("getblk: bp %p not locked",bp));
 	return (bp);
 }
 
 /*
  * Get an empty, disassociated buffer of given size.  The buffer is initially
  * set to B_INVAL.
  */
 struct buf *
 geteblk(int size)
 {
 	struct buf *bp;
 	int s;
 	int maxsize;
 
 	maxsize = (size + BKVAMASK) & ~BKVAMASK;
 
 	s = splbio();
 	while ((bp = getnewbuf(0, 0, size, maxsize)) == 0)
 		continue;
 	splx(s);
 	allocbuf(bp, size);
 	bp->b_flags |= B_INVAL;	/* b_dep cleared by getnewbuf() */
 	KASSERT(BUF_REFCNT(bp) == 1, ("geteblk: bp %p not locked",bp));
 	return (bp);
 }
 
 
 /*
  * This code constitutes the buffer memory from either anonymous system
  * memory (in the case of non-VMIO operations) or from an associated
  * VM object (in the case of VMIO operations).  This code is able to
  * resize a buffer up or down.
  *
  * Note that this code is tricky, and has many complications to resolve
  * deadlock or inconsistant data situations.  Tread lightly!!! 
  * There are B_CACHE and B_DELWRI interactions that must be dealt with by 
  * the caller.  Calling this code willy nilly can result in the loss of data.
  *
  * allocbuf() only adjusts B_CACHE for VMIO buffers.  getblk() deals with
  * B_CACHE for the non-VMIO case.
  */
 
 int
 allocbuf(struct buf *bp, int size)
 {
 	int newbsize, mbsize;
 	int i;
 
 	GIANT_REQUIRED;
 
 	if (BUF_REFCNT(bp) == 0)
 		panic("allocbuf: buffer not busy");
 
 	if (bp->b_kvasize < size)
 		panic("allocbuf: buffer too small");
 
 	if ((bp->b_flags & B_VMIO) == 0) {
 		caddr_t origbuf;
 		int origbufsize;
 		/*
 		 * Just get anonymous memory from the kernel.  Don't
 		 * mess with B_CACHE.
 		 */
 		mbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 		if (bp->b_flags & B_MALLOC)
 			newbsize = mbsize;
 		else
 			newbsize = round_page(size);
 
 		if (newbsize < bp->b_bufsize) {
 			/*
 			 * malloced buffers are not shrunk
 			 */
 			if (bp->b_flags & B_MALLOC) {
 				if (newbsize) {
 					bp->b_bcount = size;
 				} else {
 					free(bp->b_data, M_BIOBUF);
 					if (bp->b_bufsize) {
 						atomic_subtract_int(
 						    &bufmallocspace,
 						    bp->b_bufsize);
 						bufspacewakeup();
 						bp->b_bufsize = 0;
 					}
 					bp->b_saveaddr = bp->b_kvabase;
 					bp->b_data = bp->b_saveaddr;
 					bp->b_bcount = 0;
 					bp->b_flags &= ~B_MALLOC;
 				}
 				return 1;
 			}		
 			vm_hold_free_pages(
 			    bp,
 			    (vm_offset_t) bp->b_data + newbsize,
 			    (vm_offset_t) bp->b_data + bp->b_bufsize);
 		} else if (newbsize > bp->b_bufsize) {
 			/*
 			 * We only use malloced memory on the first allocation.
 			 * and revert to page-allocated memory when the buffer
 			 * grows.
 			 */
 			/*
 			 * There is a potential smp race here that could lead
 			 * to bufmallocspace slightly passing the max.  It
 			 * is probably extremely rare and not worth worrying
 			 * over.
 			 */
 			if ( (bufmallocspace < maxbufmallocspace) &&
 				(bp->b_bufsize == 0) &&
 				(mbsize <= PAGE_SIZE/2)) {
 
 				bp->b_data = malloc(mbsize, M_BIOBUF, M_WAITOK);
 				bp->b_bufsize = mbsize;
 				bp->b_bcount = size;
 				bp->b_flags |= B_MALLOC;
 				atomic_add_int(&bufmallocspace, mbsize);
 				return 1;
 			}
 			origbuf = NULL;
 			origbufsize = 0;
 			/*
 			 * If the buffer is growing on its other-than-first allocation,
 			 * then we revert to the page-allocation scheme.
 			 */
 			if (bp->b_flags & B_MALLOC) {
 				origbuf = bp->b_data;
 				origbufsize = bp->b_bufsize;
 				bp->b_data = bp->b_kvabase;
 				if (bp->b_bufsize) {
 					atomic_subtract_int(&bufmallocspace,
 					    bp->b_bufsize);
 					bufspacewakeup();
 					bp->b_bufsize = 0;
 				}
 				bp->b_flags &= ~B_MALLOC;
 				newbsize = round_page(newbsize);
 			}
 			vm_hold_load_pages(
 			    bp,
 			    (vm_offset_t) bp->b_data + bp->b_bufsize,
 			    (vm_offset_t) bp->b_data + newbsize);
 			if (origbuf) {
 				bcopy(origbuf, bp->b_data, origbufsize);
 				free(origbuf, M_BIOBUF);
 			}
 		}
 	} else {
 		int desiredpages;
 
 		newbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 		desiredpages = (size == 0) ? 0 :
 			num_pages((bp->b_offset & PAGE_MASK) + newbsize);
 
 		if (bp->b_flags & B_MALLOC)
 			panic("allocbuf: VMIO buffer can't be malloced");
 		/*
 		 * Set B_CACHE initially if buffer is 0 length or will become
 		 * 0-length.
 		 */
 		if (size == 0 || bp->b_bufsize == 0)
 			bp->b_flags |= B_CACHE;
 
 		if (newbsize < bp->b_bufsize) {
 			/*
 			 * DEV_BSIZE aligned new buffer size is less then the
 			 * DEV_BSIZE aligned existing buffer size.  Figure out
 			 * if we have to remove any pages.
 			 */
 			if (desiredpages < bp->b_npages) {
 				vm_page_t m;
 
 				vm_page_lock_queues();
 				for (i = desiredpages; i < bp->b_npages; i++) {
 					/*
 					 * the page is not freed here -- it
 					 * is the responsibility of 
 					 * vnode_pager_setsize
 					 */
 					m = bp->b_pages[i];
 					KASSERT(m != bogus_page,
 					    ("allocbuf: bogus page found"));
 					while (vm_page_sleep_if_busy(m, TRUE, "biodep"))
 						vm_page_lock_queues();
 
 					bp->b_pages[i] = NULL;
 					vm_page_unwire(m, 0);
 				}
 				vm_page_unlock_queues();
 				pmap_qremove((vm_offset_t) trunc_page((vm_offset_t)bp->b_data) +
 				    (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages));
 				bp->b_npages = desiredpages;
 			}
 		} else if (size > bp->b_bcount) {
 			/*
 			 * We are growing the buffer, possibly in a 
 			 * byte-granular fashion.
 			 */
 			struct vnode *vp;
 			vm_object_t obj;
 			vm_offset_t toff;
 			vm_offset_t tinc;
 
 			/*
 			 * Step 1, bring in the VM pages from the object, 
 			 * allocating them if necessary.  We must clear
 			 * B_CACHE if these pages are not valid for the 
 			 * range covered by the buffer.
 			 */
 
 			vp = bp->b_vp;
 			obj = bp->b_object;
 
 			VM_OBJECT_LOCK(obj);
 			while (bp->b_npages < desiredpages) {
 				vm_page_t m;
 				vm_pindex_t pi;
 
 				pi = OFF_TO_IDX(bp->b_offset) + bp->b_npages;
 				if ((m = vm_page_lookup(obj, pi)) == NULL) {
 					/*
 					 * note: must allocate system pages
 					 * since blocking here could intefere
 					 * with paging I/O, no matter which
 					 * process we are.
 					 */
 					m = vm_page_alloc(obj, pi,
 					    VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
 					if (m == NULL) {
 						atomic_add_int(&vm_pageout_deficit,
 						    desiredpages - bp->b_npages);
 						VM_OBJECT_UNLOCK(obj);
 						VM_WAIT;
 						VM_OBJECT_LOCK(obj);
 					} else {
 						vm_page_lock_queues();
 						vm_page_wakeup(m);
 						vm_page_unlock_queues();
 						bp->b_flags &= ~B_CACHE;
 						bp->b_pages[bp->b_npages] = m;
 						++bp->b_npages;
 					}
 					continue;
 				}
 
 				/*
 				 * We found a page.  If we have to sleep on it,
 				 * retry because it might have gotten freed out
 				 * from under us.
 				 *
 				 * We can only test PG_BUSY here.  Blocking on
 				 * m->busy might lead to a deadlock:
 				 *
 				 *  vm_fault->getpages->cluster_read->allocbuf
 				 *
 				 */
 				vm_page_lock_queues();
 				if (vm_page_sleep_if_busy(m, FALSE, "pgtblk"))
 					continue;
 
 				/*
 				 * We have a good page.  Should we wakeup the
 				 * page daemon?
 				 */
 				if ((curproc != pageproc) &&
 				    ((m->queue - m->pc) == PQ_CACHE) &&
 				    ((cnt.v_free_count + cnt.v_cache_count) <
 					(cnt.v_free_min + cnt.v_cache_min))) {
 					pagedaemon_wakeup();
 				}
 				vm_page_flag_clear(m, PG_ZERO);
 				vm_page_wire(m);
 				vm_page_unlock_queues();
 				bp->b_pages[bp->b_npages] = m;
 				++bp->b_npages;
 			}
 
 			/*
 			 * Step 2.  We've loaded the pages into the buffer,
 			 * we have to figure out if we can still have B_CACHE
 			 * set.  Note that B_CACHE is set according to the
 			 * byte-granular range ( bcount and size ), new the
 			 * aligned range ( newbsize ).
 			 *
 			 * The VM test is against m->valid, which is DEV_BSIZE
 			 * aligned.  Needless to say, the validity of the data
 			 * needs to also be DEV_BSIZE aligned.  Note that this
 			 * fails with NFS if the server or some other client
 			 * extends the file's EOF.  If our buffer is resized, 
 			 * B_CACHE may remain set! XXX
 			 */
 
 			toff = bp->b_bcount;
 			tinc = PAGE_SIZE - ((bp->b_offset + toff) & PAGE_MASK);
 
 			while ((bp->b_flags & B_CACHE) && toff < size) {
 				vm_pindex_t pi;
 
 				if (tinc > (size - toff))
 					tinc = size - toff;
 
 				pi = ((bp->b_offset & PAGE_MASK) + toff) >> 
 				    PAGE_SHIFT;
 
 				vfs_buf_test_cache(
 				    bp, 
 				    bp->b_offset,
 				    toff, 
 				    tinc, 
 				    bp->b_pages[pi]
 				);
 				toff += tinc;
 				tinc = PAGE_SIZE;
 			}
 			VM_OBJECT_UNLOCK(obj);
 
 			/*
 			 * Step 3, fixup the KVM pmap.  Remember that
 			 * bp->b_data is relative to bp->b_offset, but 
 			 * bp->b_offset may be offset into the first page.
 			 */
 
 			bp->b_data = (caddr_t)
 			    trunc_page((vm_offset_t)bp->b_data);
 			pmap_qenter(
 			    (vm_offset_t)bp->b_data,
 			    bp->b_pages, 
 			    bp->b_npages
 			);
 			
 			bp->b_data = (caddr_t)((vm_offset_t)bp->b_data | 
 			    (vm_offset_t)(bp->b_offset & PAGE_MASK));
 		}
 	}
 	if (newbsize < bp->b_bufsize)
 		bufspacewakeup();
 	bp->b_bufsize = newbsize;	/* actual buffer allocation	*/
 	bp->b_bcount = size;		/* requested buffer size	*/
 	return 1;
 }
 
 void
 biodone(struct bio *bp)
 {
 	mtx_lock(&bdonelock);
 	bp->bio_flags |= BIO_DONE;
 	if (bp->bio_done == NULL)
 		wakeup(bp);
 	mtx_unlock(&bdonelock);
 	if (bp->bio_done != NULL)
 		bp->bio_done(bp);
 }
 
 /*
  * Wait for a BIO to finish.
  *
  * XXX: resort to a timeout for now.  The optimal locking (if any) for this
  * case is not yet clear.
  */
 int
 biowait(struct bio *bp, const char *wchan)
 {
 
 	mtx_lock(&bdonelock);
 	while ((bp->bio_flags & BIO_DONE) == 0)
 		msleep(bp, &bdonelock, PRIBIO, wchan, hz / 10);
 	mtx_unlock(&bdonelock);
 	if (bp->bio_error != 0)
 		return (bp->bio_error);
 	if (!(bp->bio_flags & BIO_ERROR))
 		return (0);
 	return (EIO);
 }
 
 void
 biofinish(struct bio *bp, struct devstat *stat, int error)
 {
 	
 	if (error) {
 		bp->bio_error = error;
 		bp->bio_flags |= BIO_ERROR;
 	}
 	if (stat != NULL)
 		devstat_end_transaction_bio(stat, bp);
 	biodone(bp);
 }
 
 /*
  *	bufwait:
  *
  *	Wait for buffer I/O completion, returning error status.  The buffer
  *	is left locked and B_DONE on return.  B_EINTR is converted into an EINTR
  *	error and cleared.
  */
 int
 bufwait(register struct buf * bp)
 {
 	int s;
 
 	s = splbio();
 	if (bp->b_iocmd == BIO_READ)
 		bwait(bp, PRIBIO, "biord");
 	else
 		bwait(bp, PRIBIO, "biowr");
 	splx(s);
 	if (bp->b_flags & B_EINTR) {
 		bp->b_flags &= ~B_EINTR;
 		return (EINTR);
 	}
 	if (bp->b_ioflags & BIO_ERROR) {
 		return (bp->b_error ? bp->b_error : EIO);
 	} else {
 		return (0);
 	}
 }
 
  /*
   * Call back function from struct bio back up to struct buf.
   * The corresponding initialization lives in sys/conf.h:DEV_STRATEGY().
   */
 static void
 bufdonebio(struct bio *bp)
 {
 
 	/* Device drivers may or may not hold giant, hold it here. */
 	mtx_lock(&Giant);
 	bufdone(bp->bio_caller2);
 	mtx_unlock(&Giant);
 }
 
 void
 dev_strategy(struct buf *bp)
 {
 
 	if ((!bp->b_iocmd) || (bp->b_iocmd & (bp->b_iocmd - 1)))
 		panic("b_iocmd botch");
 	if (bp->b_flags & B_PHYS)
 		bp->b_io.bio_offset = bp->b_offset;
 	else
 		bp->b_io.bio_offset = dbtob(bp->b_blkno);
 	bp->b_io.bio_done = bufdonebio;
 	bp->b_io.bio_caller2 = bp;
 	(*devsw(bp->b_io.bio_dev)->d_strategy)(&bp->b_io);
 }
 
 /*
  *	bufdone:
  *
  *	Finish I/O on a buffer, optionally calling a completion function.
  *	This is usually called from an interrupt so process blocking is
  *	not allowed.
  *
  *	biodone is also responsible for setting B_CACHE in a B_VMIO bp.
  *	In a non-VMIO bp, B_CACHE will be set on the next getblk() 
  *	assuming B_INVAL is clear.
  *
  *	For the VMIO case, we set B_CACHE if the op was a read and no
  *	read error occured, or if the op was a write.  B_CACHE is never
  *	set if the buffer is invalid or otherwise uncacheable.
  *
  *	biodone does not mess with B_INVAL, allowing the I/O routine or the
  *	initiator to leave B_INVAL set to brelse the buffer out of existance
  *	in the biodone routine.
  */
 void
 bufdone(struct buf *bp)
 {
 	int s;
 	void    (*biodone)(struct buf *);
 
 	GIANT_REQUIRED;
 
 	s = splbio();
 
 	KASSERT(BUF_REFCNT(bp) > 0, ("biodone: bp %p not busy %d", bp, BUF_REFCNT(bp)));
 	KASSERT(!(bp->b_flags & B_DONE), ("biodone: bp %p already done", bp));
 
 	bp->b_flags |= B_DONE;
 	runningbufwakeup(bp);
 
 	if (bp->b_iocmd == BIO_DELETE) {
 		brelse(bp);
 		splx(s);
 		return;
 	}
 
 	if (bp->b_iocmd == BIO_WRITE) {
 		vwakeup(bp);
 	}
 
 	/* call optional completion function if requested */
 	if (bp->b_iodone != NULL) {
 		biodone = bp->b_iodone;
 		bp->b_iodone = NULL;
 		(*biodone) (bp);
 		splx(s);
 		return;
 	}
 	if (LIST_FIRST(&bp->b_dep) != NULL)
 		buf_complete(bp);
 
 	if (bp->b_flags & B_VMIO) {
 		int i;
 		vm_ooffset_t foff;
 		vm_page_t m;
 		vm_object_t obj;
 		int iosize;
 		struct vnode *vp = bp->b_vp;
 
 		obj = bp->b_object;
 
 #if defined(VFS_BIO_DEBUG)
 		mp_fixme("usecount and vflag accessed without locks.");
 		if (vp->v_usecount == 0) {
 			panic("biodone: zero vnode ref count");
 		}
 
 		if ((vp->v_vflag & VV_OBJBUF) == 0) {
 			panic("biodone: vnode is not setup for merged cache");
 		}
 #endif
 
 		foff = bp->b_offset;
 		KASSERT(bp->b_offset != NOOFFSET,
 		    ("biodone: no buffer offset"));
 
 		VM_OBJECT_LOCK(obj);
 #if defined(VFS_BIO_DEBUG)
 		if (obj->paging_in_progress < bp->b_npages) {
 			printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
 			    obj->paging_in_progress, bp->b_npages);
 		}
 #endif
 
 		/*
 		 * Set B_CACHE if the op was a normal read and no error
 		 * occured.  B_CACHE is set for writes in the b*write()
 		 * routines.
 		 */
 		iosize = bp->b_bcount - bp->b_resid;
 		if (bp->b_iocmd == BIO_READ &&
 		    !(bp->b_flags & (B_INVAL|B_NOCACHE)) &&
 		    !(bp->b_ioflags & BIO_ERROR)) {
 			bp->b_flags |= B_CACHE;
 		}
 		vm_page_lock_queues();
 		for (i = 0; i < bp->b_npages; i++) {
 			int bogusflag = 0;
 			int resid;
 
 			resid = ((foff + PAGE_SIZE) & ~(off_t)PAGE_MASK) - foff;
 			if (resid > iosize)
 				resid = iosize;
 
 			/*
 			 * cleanup bogus pages, restoring the originals
 			 */
 			m = bp->b_pages[i];
 			if (m == bogus_page) {
 				bogusflag = 1;
 				m = vm_page_lookup(obj, OFF_TO_IDX(foff));
 				if (m == NULL)
 					panic("biodone: page disappeared!");
 				bp->b_pages[i] = m;
 				pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
 			}
 #if defined(VFS_BIO_DEBUG)
 			if (OFF_TO_IDX(foff) != m->pindex) {
 				printf(
 "biodone: foff(%jd)/m->pindex(%ju) mismatch\n",
 				    (intmax_t)foff, (uintmax_t)m->pindex);
 			}
 #endif
 
 			/*
 			 * In the write case, the valid and clean bits are
 			 * already changed correctly ( see bdwrite() ), so we 
 			 * only need to do this here in the read case.
 			 */
 			if ((bp->b_iocmd == BIO_READ) && !bogusflag && resid > 0) {
 				vfs_page_set_valid(bp, foff, i, m);
 			}
 			vm_page_flag_clear(m, PG_ZERO);
 
 			/*
 			 * when debugging new filesystems or buffer I/O methods, this
 			 * is the most common error that pops up.  if you see this, you
 			 * have not set the page busy flag correctly!!!
 			 */
 			if (m->busy == 0) {
 				printf("biodone: page busy < 0, "
 				    "pindex: %d, foff: 0x(%x,%x), "
 				    "resid: %d, index: %d\n",
 				    (int) m->pindex, (int)(foff >> 32),
 						(int) foff & 0xffffffff, resid, i);
 				if (!vn_isdisk(vp, NULL))
-					printf(" iosize: %ld, lblkno: %jd, flags: 0x%x, npages: %d\n",
-					    bp->b_vp->v_mount->mnt_stat.f_iosize,
+					printf(" iosize: %jd, lblkno: %jd, flags: 0x%x, npages: %d\n",
+					    (intmax_t)bp->b_vp->v_mount->mnt_stat.f_iosize,
 					    (intmax_t) bp->b_lblkno,
 					    bp->b_flags, bp->b_npages);
 				else
 					printf(" VDEV, lblkno: %jd, flags: 0x%x, npages: %d\n",
 					    (intmax_t) bp->b_lblkno,
 					    bp->b_flags, bp->b_npages);
 				printf(" valid: 0x%lx, dirty: 0x%lx, wired: %d\n",
 				    (u_long)m->valid, (u_long)m->dirty,
 				    m->wire_count);
 				panic("biodone: page busy < 0\n");
 			}
 			vm_page_io_finish(m);
 			vm_object_pip_subtract(obj, 1);
 			foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 			iosize -= resid;
 		}
 		vm_page_unlock_queues();
 		vm_object_pip_wakeupn(obj, 0);
 		VM_OBJECT_UNLOCK(obj);
 	}
 
 	/*
 	 * For asynchronous completions, release the buffer now. The brelse
 	 * will do a wakeup there if necessary - so no need to do a wakeup
 	 * here in the async case. The sync case always needs to do a wakeup.
 	 */
 
 	if (bp->b_flags & B_ASYNC) {
 		if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_RELBUF)) || (bp->b_ioflags & BIO_ERROR))
 			brelse(bp);
 		else
 			bqrelse(bp);
 	} else {
 		bdone(bp);
 	}
 	splx(s);
 }
 
 /*
  * This routine is called in lieu of iodone in the case of
  * incomplete I/O.  This keeps the busy status for pages
  * consistant.
  */
 void
 vfs_unbusy_pages(struct buf * bp)
 {
 	int i;
 
 	GIANT_REQUIRED;
 
 	runningbufwakeup(bp);
 	if (bp->b_flags & B_VMIO) {
 		vm_object_t obj;
 
 		obj = bp->b_object;
 		VM_OBJECT_LOCK(obj);
 		vm_page_lock_queues();
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 
 			if (m == bogus_page) {
 				m = vm_page_lookup(obj, OFF_TO_IDX(bp->b_offset) + i);
 				if (!m) {
 					panic("vfs_unbusy_pages: page missing\n");
 				}
 				bp->b_pages[i] = m;
 				pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
 			}
 			vm_object_pip_subtract(obj, 1);
 			vm_page_flag_clear(m, PG_ZERO);
 			vm_page_io_finish(m);
 		}
 		vm_page_unlock_queues();
 		vm_object_pip_wakeupn(obj, 0);
 		VM_OBJECT_UNLOCK(obj);
 	}
 }
 
 /*
  * vfs_page_set_valid:
  *
  *	Set the valid bits in a page based on the supplied offset.   The
  *	range is restricted to the buffer's size.
  *
  *	This routine is typically called after a read completes.
  */
 static void
 vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
 {
 	vm_ooffset_t soff, eoff;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	/*
 	 * Start and end offsets in buffer.  eoff - soff may not cross a
 	 * page boundry or cross the end of the buffer.  The end of the
 	 * buffer, in this case, is our file EOF, not the allocation size
 	 * of the buffer.
 	 */
 	soff = off;
 	eoff = (off + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 	if (eoff > bp->b_offset + bp->b_bcount)
 		eoff = bp->b_offset + bp->b_bcount;
 
 	/*
 	 * Set valid range.  This is typically the entire buffer and thus the
 	 * entire page.
 	 */
 	if (eoff > soff) {
 		vm_page_set_validclean(
 		    m,
 		   (vm_offset_t) (soff & PAGE_MASK),
 		   (vm_offset_t) (eoff - soff)
 		);
 	}
 }
 
 /*
  * This routine is called before a device strategy routine.
  * It is used to tell the VM system that paging I/O is in
  * progress, and treat the pages associated with the buffer
  * almost as being PG_BUSY.  Also the object paging_in_progress
  * flag is handled to make sure that the object doesn't become
  * inconsistant.
  *
  * Since I/O has not been initiated yet, certain buffer flags
  * such as BIO_ERROR or B_INVAL may be in an inconsistant state
  * and should be ignored.
  */
 void
 vfs_busy_pages(struct buf * bp, int clear_modify)
 {
 	int i, bogus;
 
 	if (bp->b_flags & B_VMIO) {
 		vm_object_t obj;
 		vm_ooffset_t foff;
 
 		obj = bp->b_object;
 		foff = bp->b_offset;
 		KASSERT(bp->b_offset != NOOFFSET,
 		    ("vfs_busy_pages: no buffer offset"));
 		vfs_setdirty(bp);
 		VM_OBJECT_LOCK(obj);
 retry:
 		vm_page_lock_queues();
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 
 			if (vm_page_sleep_if_busy(m, FALSE, "vbpage"))
 				goto retry;
 		}
 		bogus = 0;
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 
 			vm_page_flag_clear(m, PG_ZERO);
 			if ((bp->b_flags & B_CLUSTER) == 0) {
 				vm_object_pip_add(obj, 1);
 				vm_page_io_start(m);
 			}
 			/*
 			 * When readying a buffer for a read ( i.e
 			 * clear_modify == 0 ), it is important to do
 			 * bogus_page replacement for valid pages in 
 			 * partially instantiated buffers.  Partially 
 			 * instantiated buffers can, in turn, occur when
 			 * reconstituting a buffer from its VM backing store
 			 * base.  We only have to do this if B_CACHE is
 			 * clear ( which causes the I/O to occur in the
 			 * first place ).  The replacement prevents the read
 			 * I/O from overwriting potentially dirty VM-backed
 			 * pages.  XXX bogus page replacement is, uh, bogus.
 			 * It may not work properly with small-block devices.
 			 * We need to find a better way.
 			 */
 			pmap_remove_all(m);
 			if (clear_modify)
 				vfs_page_set_valid(bp, foff, i, m);
 			else if (m->valid == VM_PAGE_BITS_ALL &&
 				(bp->b_flags & B_CACHE) == 0) {
 				bp->b_pages[i] = bogus_page;
 				bogus++;
 			}
 			foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 		}
 		vm_page_unlock_queues();
 		VM_OBJECT_UNLOCK(obj);
 		if (bogus)
 			pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
 	}
 }
 
 /*
  * Tell the VM system that the pages associated with this buffer
  * are clean.  This is used for delayed writes where the data is
  * going to go to disk eventually without additional VM intevention.
  *
  * Note that while we only really need to clean through to b_bcount, we
  * just go ahead and clean through to b_bufsize.
  */
 static void
 vfs_clean_pages(struct buf * bp)
 {
 	int i;
 
 	if (bp->b_flags & B_VMIO) {
 		vm_ooffset_t foff;
 
 		foff = bp->b_offset;
 		KASSERT(bp->b_offset != NOOFFSET,
 		    ("vfs_clean_pages: no buffer offset"));
 		VM_OBJECT_LOCK(bp->b_object);
 		vm_page_lock_queues();
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 			vm_ooffset_t noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 			vm_ooffset_t eoff = noff;
 
 			if (eoff > bp->b_offset + bp->b_bufsize)
 				eoff = bp->b_offset + bp->b_bufsize;
 			vfs_page_set_valid(bp, foff, i, m);
 			/* vm_page_clear_dirty(m, foff & PAGE_MASK, eoff - foff); */
 			foff = noff;
 		}
 		vm_page_unlock_queues();
 		VM_OBJECT_UNLOCK(bp->b_object);
 	}
 }
 
 /*
  *	vfs_bio_set_validclean:
  *
  *	Set the range within the buffer to valid and clean.  The range is 
  *	relative to the beginning of the buffer, b_offset.  Note that b_offset
  *	itself may be offset from the beginning of the first page.
  *
  */
 
 void   
 vfs_bio_set_validclean(struct buf *bp, int base, int size)
 {
 	if (bp->b_flags & B_VMIO) {
 		int i;
 		int n;
 
 		/*
 		 * Fixup base to be relative to beginning of first page.
 		 * Set initial n to be the maximum number of bytes in the
 		 * first page that can be validated.
 		 */
 
 		base += (bp->b_offset & PAGE_MASK);
 		n = PAGE_SIZE - (base & PAGE_MASK);
 
 		VM_OBJECT_LOCK(bp->b_object);
 		vm_page_lock_queues();
 		for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) {
 			vm_page_t m = bp->b_pages[i];
 
 			if (n > size)
 				n = size;
 
 			vm_page_set_validclean(m, base & PAGE_MASK, n);
 			base += n;
 			size -= n;
 			n = PAGE_SIZE;
 		}
 		vm_page_unlock_queues();
 		VM_OBJECT_UNLOCK(bp->b_object);
 	}
 }
 
 /*
  *	vfs_bio_clrbuf:
  *
  *	clear a buffer.  This routine essentially fakes an I/O, so we need
  *	to clear BIO_ERROR and B_INVAL.
  *
  *	Note that while we only theoretically need to clear through b_bcount,
  *	we go ahead and clear through b_bufsize.
  */
 
 void
 vfs_bio_clrbuf(struct buf *bp) 
 {
 	int i, mask = 0;
 	caddr_t sa, ea;
 
 	GIANT_REQUIRED;
 
 	if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) {
 		bp->b_flags &= ~B_INVAL;
 		bp->b_ioflags &= ~BIO_ERROR;
 		if (bp->b_object != NULL)
 			VM_OBJECT_LOCK(bp->b_object);
 		if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
 		    (bp->b_offset & PAGE_MASK) == 0) {
 			mask = (1 << (bp->b_bufsize / DEV_BSIZE)) - 1;
 			if (bp->b_pages[0] != bogus_page)
 				VM_OBJECT_LOCK_ASSERT(bp->b_pages[0]->object, MA_OWNED);
 			if ((bp->b_pages[0]->valid & mask) == mask)
 				goto unlock;
 			if (((bp->b_pages[0]->flags & PG_ZERO) == 0) &&
 			    ((bp->b_pages[0]->valid & mask) == 0)) {
 				bzero(bp->b_data, bp->b_bufsize);
 				bp->b_pages[0]->valid |= mask;
 				goto unlock;
 			}
 		}
 		ea = sa = bp->b_data;
 		for(i=0;i<bp->b_npages;i++,sa=ea) {
 			int j = ((vm_offset_t)sa & PAGE_MASK) / DEV_BSIZE;
 			ea = (caddr_t)trunc_page((vm_offset_t)sa + PAGE_SIZE);
 			ea = (caddr_t)(vm_offset_t)ulmin(
 			    (u_long)(vm_offset_t)ea,
 			    (u_long)(vm_offset_t)bp->b_data + bp->b_bufsize);
 			mask = ((1 << ((ea - sa) / DEV_BSIZE)) - 1) << j;
 			if (bp->b_pages[i] != bogus_page)
 				VM_OBJECT_LOCK_ASSERT(bp->b_pages[i]->object, MA_OWNED);
 			if ((bp->b_pages[i]->valid & mask) == mask)
 				continue;
 			if ((bp->b_pages[i]->valid & mask) == 0) {
 				if ((bp->b_pages[i]->flags & PG_ZERO) == 0) {
 					bzero(sa, ea - sa);
 				}
 			} else {
 				for (; sa < ea; sa += DEV_BSIZE, j++) {
 					if (((bp->b_pages[i]->flags & PG_ZERO) == 0) &&
 						(bp->b_pages[i]->valid & (1<<j)) == 0)
 						bzero(sa, DEV_BSIZE);
 				}
 			}
 			bp->b_pages[i]->valid |= mask;
 			vm_page_lock_queues();
 			vm_page_flag_clear(bp->b_pages[i], PG_ZERO);
 			vm_page_unlock_queues();
 		}
 unlock:
 		if (bp->b_object != NULL)
 			VM_OBJECT_UNLOCK(bp->b_object);
 		bp->b_resid = 0;
 	} else {
 		clrbuf(bp);
 	}
 }
 
 /*
  * vm_hold_load_pages and vm_hold_free_pages get pages into
  * a buffers address space.  The pages are anonymous and are
  * not associated with a file object.
  */
 static void
 vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
 {
 	vm_offset_t pg;
 	vm_page_t p;
 	int index;
 
 	to = round_page(to);
 	from = round_page(from);
 	index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
 
 	VM_OBJECT_LOCK(kernel_object);
 	for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
 tryagain:
 		/*
 		 * note: must allocate system pages since blocking here
 		 * could intefere with paging I/O, no matter which
 		 * process we are.
 		 */
 		p = vm_page_alloc(kernel_object,
 			((pg - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
 		    VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
 		if (!p) {
 			atomic_add_int(&vm_pageout_deficit,
 			    (to - pg) >> PAGE_SHIFT);
 			VM_OBJECT_UNLOCK(kernel_object);
 			VM_WAIT;
 			VM_OBJECT_LOCK(kernel_object);
 			goto tryagain;
 		}
 		p->valid = VM_PAGE_BITS_ALL;
 		pmap_qenter(pg, &p, 1);
 		bp->b_pages[index] = p;
 		vm_page_lock_queues();
 		vm_page_wakeup(p);
 		vm_page_unlock_queues();
 	}
 	VM_OBJECT_UNLOCK(kernel_object);
 	bp->b_npages = index;
 }
 
 /* Return pages associated with this buf to the vm system */
 static void
 vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
 {
 	vm_offset_t pg;
 	vm_page_t p;
 	int index, newnpages;
 
 	GIANT_REQUIRED;
 
 	from = round_page(from);
 	to = round_page(to);
 	newnpages = index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
 
 	VM_OBJECT_LOCK(kernel_object);
 	for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
 		p = bp->b_pages[index];
 		if (p && (index < bp->b_npages)) {
 			if (p->busy) {
 				printf(
 			    "vm_hold_free_pages: blkno: %jd, lblkno: %jd\n",
 				    (intmax_t)bp->b_blkno,
 				    (intmax_t)bp->b_lblkno);
 			}
 			bp->b_pages[index] = NULL;
 			pmap_qremove(pg, 1);
 			vm_page_lock_queues();
 			vm_page_busy(p);
 			vm_page_unwire(p, 0);
 			vm_page_free(p);
 			vm_page_unlock_queues();
 		}
 	}
 	VM_OBJECT_UNLOCK(kernel_object);
 	bp->b_npages = newnpages;
 }
 
 /*
  * Map an IO request into kernel virtual address space.
  *
  * All requests are (re)mapped into kernel VA space.
  * Notice that we use b_bufsize for the size of the buffer
  * to be mapped.  b_bcount might be modified by the driver.
  *
  * Note that even if the caller determines that the address space should
  * be valid, a race or a smaller-file mapped into a larger space may
  * actually cause vmapbuf() to fail, so all callers of vmapbuf() MUST
  * check the return value.
  */
 int
 vmapbuf(struct buf *bp)
 {
 	caddr_t addr, kva;
 	vm_prot_t prot;
 	int pidx, i;
 	struct vm_page *m;
 	struct pmap *pmap = &curproc->p_vmspace->vm_pmap;
 
 	GIANT_REQUIRED;
 
 	if (bp->b_bufsize < 0)
 		return (-1);
 	prot = (bp->b_iocmd == BIO_READ) ? VM_PROT_READ | VM_PROT_WRITE :
 	    VM_PROT_READ;
 	for (addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data), pidx = 0;
 	     addr < bp->b_data + bp->b_bufsize;
 	     addr += PAGE_SIZE, pidx++) {
 		/*
 		 * Do the vm_fault if needed; do the copy-on-write thing
 		 * when reading stuff off device into memory.
 		 *
 		 * NOTE! Must use pmap_extract() because addr may be in
 		 * the userland address space, and kextract is only guarenteed
 		 * to work for the kernland address space (see: sparc64 port).
 		 */
 retry:
 		if (vm_fault_quick(addr >= bp->b_data ? addr : bp->b_data,
 		    prot) < 0) {
 			vm_page_lock_queues();
 			for (i = 0; i < pidx; ++i) {
 				vm_page_unhold(bp->b_pages[i]);
 				bp->b_pages[i] = NULL;
 			}
 			vm_page_unlock_queues();
 			return(-1);
 		}
 		m = pmap_extract_and_hold(pmap, (vm_offset_t)addr, prot);
 		if (m == NULL)
 			goto retry;
 		bp->b_pages[pidx] = m;
 	}
 	if (pidx > btoc(MAXPHYS))
 		panic("vmapbuf: mapped more than MAXPHYS");
 	pmap_qenter((vm_offset_t)bp->b_saveaddr, bp->b_pages, pidx);
 	
 	kva = bp->b_saveaddr;
 	bp->b_npages = pidx;
 	bp->b_saveaddr = bp->b_data;
 	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
 	return(0);
 }
 
 /*
  * Free the io map PTEs associated with this IO operation.
  * We also invalidate the TLB entries and restore the original b_addr.
  */
 void
 vunmapbuf(struct buf *bp)
 {
 	int pidx;
 	int npages;
 
 	GIANT_REQUIRED;
 
 	npages = bp->b_npages;
 	pmap_qremove(trunc_page((vm_offset_t)bp->b_data),
 		     npages);
 	vm_page_lock_queues();
 	for (pidx = 0; pidx < npages; pidx++)
 		vm_page_unhold(bp->b_pages[pidx]);
 	vm_page_unlock_queues();
 
 	bp->b_data = bp->b_saveaddr;
 }
 
 void
 bdone(struct buf *bp)
 {
 	mtx_lock(&bdonelock);
 	bp->b_flags |= B_DONE;
 	wakeup(bp);
 	mtx_unlock(&bdonelock);
 }
 
 void
 bwait(struct buf *bp, u_char pri, const char *wchan)
 {
 	mtx_lock(&bdonelock);
 	while ((bp->b_flags & B_DONE) == 0)
 		msleep(bp, &bdonelock, pri, wchan, 0);
 	mtx_unlock(&bdonelock);
 }
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <ddb/ddb.h>
 
 /* DDB command to show buffer data */
 DB_SHOW_COMMAND(buffer, db_show_buffer)
 {
 	/* get args */
 	struct buf *bp = (struct buf *)addr;
 
 	if (!have_addr) {
 		db_printf("usage: show buffer <addr>\n");
 		return;
 	}
 
 	db_printf("b_flags = 0x%b\n", (u_int)bp->b_flags, PRINT_BUF_FLAGS);
 	db_printf(
 	    "b_error = %d, b_bufsize = %ld, b_bcount = %ld, b_resid = %ld\n"
 	    "b_dev = (%d,%d), b_data = %p, b_blkno = %jd\n",
 	    bp->b_error, bp->b_bufsize, bp->b_bcount, bp->b_resid,
 	    major(bp->b_dev), minor(bp->b_dev), bp->b_data,
 	    (intmax_t)bp->b_blkno);
 	if (bp->b_npages) {
 		int i;
 		db_printf("b_npages = %d, pages(OBJ, IDX, PA): ", bp->b_npages);
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m;
 			m = bp->b_pages[i];
 			db_printf("(%p, 0x%lx, 0x%lx)", (void *)m->object,
 			    (u_long)m->pindex, (u_long)VM_PAGE_TO_PHYS(m));
 			if ((i + 1) < bp->b_npages)
 				db_printf(",");
 		}
 		db_printf("\n");
 	}
 }
 #endif /* DDB */
Index: head/sys/kern/vfs_cluster.c
===================================================================
--- head/sys/kern/vfs_cluster.c	(revision 122536)
+++ head/sys/kern/vfs_cluster.c	(revision 122537)
@@ -1,1035 +1,1035 @@
 /*-
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  * Modifications/enhancements:
  * 	Copyright (c) 1995 John S. Dyson.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_cluster.c	8.7 (Berkeley) 2/13/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_debug_cluster.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/resourcevar.h>
 #include <sys/vmmeter.h>
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <sys/sysctl.h>
 
 #if defined(CLUSTERDEBUG)
 static int	rcluster= 0;
 SYSCTL_INT(_debug, OID_AUTO, rcluster, CTLFLAG_RW, &rcluster, 0,
     "Debug VFS clustering code");
 #endif
 
 static MALLOC_DEFINE(M_SEGMENT, "cluster_save buffer", "cluster_save buffer");
 
 static struct cluster_save *
 	cluster_collectbufs(struct vnode *vp, struct buf *last_bp);
 static struct buf *
 	cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
 			 daddr_t blkno, long size, int run, struct buf *fbp);
 
 static int write_behind = 1;
 SYSCTL_INT(_vfs, OID_AUTO, write_behind, CTLFLAG_RW, &write_behind, 0,
     "Cluster write-behind; 0: disable, 1: enable, 2: backed off");
 
 static int read_max = 8;
 SYSCTL_INT(_vfs, OID_AUTO, read_max, CTLFLAG_RW, &read_max, 0,
     "Cluster read-ahead max block count");
 
 /* Page expended to mark partially backed buffers */
 extern vm_page_t	bogus_page;
 
 /*
  * Number of physical bufs (pbufs) this subsystem is allowed.
  * Manipulated by vm_pager.c
  */
 extern int cluster_pbuf_freecnt;
 
 /*
  * Read data to a buf, including read-ahead if we find this to be beneficial.
  * cluster_read replaces bread.
  */
 int
 cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp)
 	struct vnode *vp;
 	u_quad_t filesize;
 	daddr_t lblkno;
 	long size;
 	struct ucred *cred;
 	long totread;
 	int seqcount;
 	struct buf **bpp;
 {
 	struct buf *bp, *rbp, *reqbp;
 	daddr_t blkno, origblkno;
 	int maxra, racluster;
 	int error, ncontig;
 	int i;
 
 	error = 0;
 
 	/*
 	 * Try to limit the amount of read-ahead by a few
 	 * ad-hoc parameters.  This needs work!!!
 	 */
 	racluster = vp->v_mount->mnt_iosize_max / size;
 	maxra = seqcount;
 	maxra = min(read_max, maxra);
 	maxra = min(nbuf/8, maxra);
 	if (((u_quad_t)(lblkno + maxra + 1) * size) > filesize)
 		maxra = (filesize / size) - lblkno;
 
 	/*
 	 * get the requested block
 	 */
 	*bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0, 0);
 	origblkno = lblkno;
 
 	/*
 	 * if it is in the cache, then check to see if the reads have been
 	 * sequential.  If they have, then try some read-ahead, otherwise
 	 * back-off on prospective read-aheads.
 	 */
 	if (bp->b_flags & B_CACHE) {
 		if (!seqcount) {
 			return 0;
 		} else if ((bp->b_flags & B_RAM) == 0) {
 			return 0;
 		} else {
 			int s;
 			bp->b_flags &= ~B_RAM;
 			/*
 			 * We do the spl here so that there is no window
 			 * between the incore and the b_usecount increment
 			 * below.  We opt to keep the spl out of the loop
 			 * for efficiency.
 			 */
 			s = splbio();
 			VI_LOCK(vp);
 			for (i = 1; i < maxra; i++) {
 				/*
 				 * Stop if the buffer does not exist or it
 				 * is invalid (about to go away?)
 				 */
 				rbp = gbincore(vp, lblkno+i);
 				if (rbp == NULL || (rbp->b_flags & B_INVAL))
 					break;
 
 				/*
 				 * Set another read-ahead mark so we know 
 				 * to check again.
 				 */
 				if (((i % racluster) == (racluster - 1)) ||
 					(i == (maxra - 1)))
 					rbp->b_flags |= B_RAM;
 			}
 			VI_UNLOCK(vp);
 			splx(s);
 			if (i >= maxra) {
 				return 0;
 			}
 			lblkno += i;
 		}
 		reqbp = bp = NULL;
 	/*
 	 * If it isn't in the cache, then get a chunk from
 	 * disk if sequential, otherwise just get the block.
 	 */
 	} else {
 		off_t firstread = bp->b_offset;
 		int nblks;
 
 		KASSERT(bp->b_offset != NOOFFSET,
 		    ("cluster_read: no buffer offset"));
 
 		ncontig = 0;
 
 		/*
 		 * Compute the total number of blocks that we should read
 		 * synchronously.
 		 */
 		if (firstread + totread > filesize)
 			totread = filesize - firstread;
 		nblks = howmany(totread, size);
 		if (nblks > racluster)
 			nblks = racluster;
 
 		/*
 		 * Now compute the number of contiguous blocks.
 		 */
 		if (nblks > 1) {
 	    		error = VOP_BMAP(vp, lblkno, NULL,
 				&blkno, &ncontig, NULL);
 			/*
 			 * If this failed to map just do the original block.
 			 */
 			if (error || blkno == -1)
 				ncontig = 0;
 		}
 
 		/*
 		 * If we have contiguous data available do a cluster
 		 * otherwise just read the requested block.
 		 */
 		if (ncontig) {
 			/* Account for our first block. */
 			ncontig = min(ncontig + 1, nblks);
 			if (ncontig < nblks)
 				nblks = ncontig;
 			bp = cluster_rbuild(vp, filesize, lblkno,
 				blkno, size, nblks, bp);
 			lblkno += (bp->b_bufsize / size);
 		} else {
 			bp->b_flags |= B_RAM;
 			bp->b_iocmd = BIO_READ;
 			lblkno += 1;
 		}
 	}
 
 	/*
 	 * handle the synchronous read so that it is available ASAP.
 	 */
 	if (bp) {
 		if ((bp->b_flags & B_CLUSTER) == 0) {
 			vfs_busy_pages(bp, 0);
 		}
 		bp->b_flags &= ~B_INVAL;
 		bp->b_ioflags &= ~BIO_ERROR;
 		if ((bp->b_flags & B_ASYNC) || bp->b_iodone != NULL)
 			BUF_KERNPROC(bp);
 		bp->b_iooffset = dbtob(bp->b_blkno);
 		error = VOP_STRATEGY(vp, bp);
 		curproc->p_stats->p_ru.ru_inblock++;
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * If we have been doing sequential I/O, then do some read-ahead.
 	 */
 	while (lblkno < (origblkno + maxra)) {
 		error = VOP_BMAP(vp, lblkno, NULL, &blkno, &ncontig, NULL);
 		if (error)
 			break;
 
 		if (blkno == -1)
 			break;
 
 		/*
 		 * We could throttle ncontig here by maxra but we might as
 		 * well read the data if it is contiguous.  We're throttled
 		 * by racluster anyway.
 		 */
 		if (ncontig) {
 			ncontig = min(ncontig + 1, racluster);
 			rbp = cluster_rbuild(vp, filesize, lblkno, blkno,
 				size, ncontig, NULL);
 			lblkno += (rbp->b_bufsize / size);
 			if (rbp->b_flags & B_DELWRI) {
 				bqrelse(rbp);
 				continue;
 			}
 		} else {
 			rbp = getblk(vp, lblkno, size, 0, 0, 0);
 			lblkno += 1;
 			if (rbp->b_flags & B_DELWRI) {
 				bqrelse(rbp);
 				continue;
 			}
 			rbp->b_flags |= B_ASYNC | B_RAM;
 			rbp->b_iocmd = BIO_READ;
 			rbp->b_blkno = blkno;
 		}
 		if (rbp->b_flags & B_CACHE) {
 			rbp->b_flags &= ~B_ASYNC;
 			bqrelse(rbp);
 			continue;
 		}
 		if ((rbp->b_flags & B_CLUSTER) == 0) {
 			vfs_busy_pages(rbp, 0);
 		}
 		rbp->b_flags &= ~B_INVAL;
 		rbp->b_ioflags &= ~BIO_ERROR;
 		if ((rbp->b_flags & B_ASYNC) || rbp->b_iodone != NULL)
 			BUF_KERNPROC(rbp);
 		rbp->b_iooffset = dbtob(rbp->b_blkno);
 		(void) VOP_STRATEGY(vp, rbp);
 		curproc->p_stats->p_ru.ru_inblock++;
 	}
 
 	if (reqbp)
 		return (bufwait(reqbp));
 	else
 		return (error);
 }
 
 /*
  * If blocks are contiguous on disk, use this to provide clustered
  * read ahead.  We will read as many blocks as possible sequentially
  * and then parcel them up into logical blocks in the buffer hash table.
  */
 static struct buf *
 cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
 	struct vnode *vp;
 	u_quad_t filesize;
 	daddr_t lbn;
 	daddr_t blkno;
 	long size;
 	int run;
 	struct buf *fbp;
 {
 	struct buf *bp, *tbp;
 	daddr_t bn;
 	int i, inc, j;
 
 	GIANT_REQUIRED;
 
 	KASSERT(size == vp->v_mount->mnt_stat.f_iosize,
-	    ("cluster_rbuild: size %ld != filesize %ld\n",
-	    size, vp->v_mount->mnt_stat.f_iosize));
+	    ("cluster_rbuild: size %ld != filesize %jd\n",
+	    size, (intmax_t)vp->v_mount->mnt_stat.f_iosize));
 
 	/*
 	 * avoid a division
 	 */
 	while ((u_quad_t) size * (lbn + run) > filesize) {
 		--run;
 	}
 
 	if (fbp) {
 		tbp = fbp;
 		tbp->b_iocmd = BIO_READ; 
 	} else {
 		tbp = getblk(vp, lbn, size, 0, 0, 0);
 		if (tbp->b_flags & B_CACHE)
 			return tbp;
 		tbp->b_flags |= B_ASYNC | B_RAM;
 		tbp->b_iocmd = BIO_READ;
 	}
 
 	tbp->b_blkno = blkno;
 	if( (tbp->b_flags & B_MALLOC) ||
 		((tbp->b_flags & B_VMIO) == 0) || (run <= 1) )
 		return tbp;
 
 	bp = trypbuf(&cluster_pbuf_freecnt);
 	if (bp == 0)
 		return tbp;
 
 	/*
 	 * We are synthesizing a buffer out of vm_page_t's, but
 	 * if the block size is not page aligned then the starting
 	 * address may not be either.  Inherit the b_data offset
 	 * from the original buffer.
 	 */
 	bp->b_data = (char *)((vm_offset_t)bp->b_data |
 	    ((vm_offset_t)tbp->b_data & PAGE_MASK));
 	bp->b_flags = B_ASYNC | B_CLUSTER | B_VMIO;
 	bp->b_iocmd = BIO_READ;
 	bp->b_iodone = cluster_callback;
 	bp->b_blkno = blkno;
 	bp->b_lblkno = lbn;
 	bp->b_offset = tbp->b_offset;
 	KASSERT(bp->b_offset != NOOFFSET, ("cluster_rbuild: no buffer offset"));
 	pbgetvp(vp, bp);
 
 	TAILQ_INIT(&bp->b_cluster.cluster_head);
 
 	bp->b_bcount = 0;
 	bp->b_bufsize = 0;
 	bp->b_npages = 0;
 
 	inc = btodb(size);
 	for (bn = blkno, i = 0; i < run; ++i, bn += inc) {
 		if (i != 0) {
 			if ((bp->b_npages * PAGE_SIZE) +
 			    round_page(size) > vp->v_mount->mnt_iosize_max) {
 				break;
 			}
 
 			tbp = getblk(vp, lbn + i, size, 0, 0, GB_LOCK_NOWAIT);
 
 			/* Don't wait around for locked bufs. */
 			if (tbp == NULL)
 				break;
 
 			/*
 			 * Stop scanning if the buffer is fully valid
 			 * (marked B_CACHE), or locked (may be doing a
 			 * background write), or if the buffer is not
 			 * VMIO backed.  The clustering code can only deal
 			 * with VMIO-backed buffers.
 			 */
 			VI_LOCK(bp->b_vp);
 			if ((tbp->b_vflags & BV_BKGRDINPROG) ||
 			    (tbp->b_flags & B_CACHE) ||
 			    (tbp->b_flags & B_VMIO) == 0) {
 				VI_UNLOCK(bp->b_vp);
 				bqrelse(tbp);
 				break;
 			}
 			VI_UNLOCK(bp->b_vp);
 
 			/*
 			 * The buffer must be completely invalid in order to
 			 * take part in the cluster.  If it is partially valid
 			 * then we stop.
 			 */
 			VM_OBJECT_LOCK(tbp->b_object);
 			for (j = 0;j < tbp->b_npages; j++) {
 				VM_OBJECT_LOCK_ASSERT(tbp->b_pages[j]->object,
 				    MA_OWNED);
 				if (tbp->b_pages[j]->valid)
 					break;
 			}
 			VM_OBJECT_UNLOCK(tbp->b_object);
 			if (j != tbp->b_npages) {
 				bqrelse(tbp);
 				break;
 			}
 
 			/*
 			 * Set a read-ahead mark as appropriate
 			 */
 			if ((fbp && (i == 1)) || (i == (run - 1)))
 				tbp->b_flags |= B_RAM;
 
 			/*
 			 * Set the buffer up for an async read (XXX should
 			 * we do this only if we do not wind up brelse()ing?).
 			 * Set the block number if it isn't set, otherwise
 			 * if it is make sure it matches the block number we
 			 * expect.
 			 */
 			tbp->b_flags |= B_ASYNC;
 			tbp->b_iocmd = BIO_READ;
 			if (tbp->b_blkno == tbp->b_lblkno) {
 				tbp->b_blkno = bn;
 			} else if (tbp->b_blkno != bn) {
 				brelse(tbp);
 				break;
 			}
 		}
 		/*
 		 * XXX fbp from caller may not be B_ASYNC, but we are going
 		 * to biodone() it in cluster_callback() anyway
 		 */
 		BUF_KERNPROC(tbp);
 		TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
 			tbp, b_cluster.cluster_entry);
 		VM_OBJECT_LOCK(tbp->b_object);
 		vm_page_lock_queues();
 		for (j = 0; j < tbp->b_npages; j += 1) {
 			vm_page_t m;
 			m = tbp->b_pages[j];
 			vm_page_io_start(m);
 			vm_object_pip_add(m->object, 1);
 			if ((bp->b_npages == 0) ||
 				(bp->b_pages[bp->b_npages-1] != m)) {
 				bp->b_pages[bp->b_npages] = m;
 				bp->b_npages++;
 			}
 			if ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL)
 				tbp->b_pages[j] = bogus_page;
 		}
 		vm_page_unlock_queues();
 		VM_OBJECT_UNLOCK(tbp->b_object);
 		/*
 		 * XXX shouldn't this be += size for both, like in
 		 * cluster_wbuild()?
 		 *
 		 * Don't inherit tbp->b_bufsize as it may be larger due to
 		 * a non-page-aligned size.  Instead just aggregate using
 		 * 'size'.
 		 */
 		if (tbp->b_bcount != size)
 			printf("warning: tbp->b_bcount wrong %ld vs %ld\n", tbp->b_bcount, size);
 		if (tbp->b_bufsize != size)
 			printf("warning: tbp->b_bufsize wrong %ld vs %ld\n", tbp->b_bufsize, size);
 		bp->b_bcount += size;
 		bp->b_bufsize += size;
 	}
 
 	/*
 	 * Fully valid pages in the cluster are already good and do not need
 	 * to be re-read from disk.  Replace the page with bogus_page
 	 */
 	VM_OBJECT_LOCK(bp->b_object);
 	for (j = 0; j < bp->b_npages; j++) {
 		VM_OBJECT_LOCK_ASSERT(bp->b_pages[j]->object, MA_OWNED);
 		if ((bp->b_pages[j]->valid & VM_PAGE_BITS_ALL) ==
 		    VM_PAGE_BITS_ALL) {
 			bp->b_pages[j] = bogus_page;
 		}
 	}
 	VM_OBJECT_UNLOCK(bp->b_object);
 	if (bp->b_bufsize > bp->b_kvasize)
 		panic("cluster_rbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
 		    bp->b_bufsize, bp->b_kvasize);
 	bp->b_kvasize = bp->b_bufsize;
 
 	pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
 		(vm_page_t *)bp->b_pages, bp->b_npages);
 	return (bp);
 }
 
 /*
  * Cleanup after a clustered read or write.
  * This is complicated by the fact that any of the buffers might have
  * extra memory (if there were no empty buffer headers at allocbuf time)
  * that we will need to shift around.
  */
 void
 cluster_callback(bp)
 	struct buf *bp;
 {
 	struct buf *nbp, *tbp;
 	int error = 0;
 
 	GIANT_REQUIRED;
 
 	/*
 	 * Must propogate errors to all the components.
 	 */
 	if (bp->b_ioflags & BIO_ERROR)
 		error = bp->b_error;
 
 	pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
 	/*
 	 * Move memory from the large cluster buffer into the component
 	 * buffers and mark IO as done on these.
 	 */
 	for (tbp = TAILQ_FIRST(&bp->b_cluster.cluster_head);
 		tbp; tbp = nbp) {
 		nbp = TAILQ_NEXT(&tbp->b_cluster, cluster_entry);
 		if (error) {
 			tbp->b_ioflags |= BIO_ERROR;
 			tbp->b_error = error;
 		} else {
 			tbp->b_dirtyoff = tbp->b_dirtyend = 0;
 			tbp->b_flags &= ~B_INVAL;
 			tbp->b_ioflags &= ~BIO_ERROR;
 			/*
 			 * XXX the bdwrite()/bqrelse() issued during
 			 * cluster building clears B_RELBUF (see bqrelse()
 			 * comment).  If direct I/O was specified, we have
 			 * to restore it here to allow the buffer and VM
 			 * to be freed.
 			 */
 			if (tbp->b_flags & B_DIRECT)
 				tbp->b_flags |= B_RELBUF;
 		}
 		bufdone(tbp);
 	}
 	relpbuf(bp, &cluster_pbuf_freecnt);
 }
 
 /*
  *	cluster_wbuild_wb:
  *
  *	Implement modified write build for cluster.
  *
  *		write_behind = 0	write behind disabled
  *		write_behind = 1	write behind normal (default)
  *		write_behind = 2	write behind backed-off
  */
 
 static __inline int
 cluster_wbuild_wb(struct vnode *vp, long size, daddr_t start_lbn, int len)
 {
 	int r = 0;
 
 	switch(write_behind) {
 	case 2:
 		if (start_lbn < len)
 			break;
 		start_lbn -= len;
 		/* FALLTHROUGH */
 	case 1:
 		r = cluster_wbuild(vp, size, start_lbn, len);
 		/* FALLTHROUGH */
 	default:
 		/* FALLTHROUGH */
 		break;
 	}
 	return(r);
 }
 
 /*
  * Do clustered write for FFS.
  *
  * Three cases:
  *	1. Write is not sequential (write asynchronously)
  *	Write is sequential:
  *	2.	beginning of cluster - begin cluster
  *	3.	middle of a cluster - add to cluster
  *	4.	end of a cluster - asynchronously write cluster
  */
 void
 cluster_write(bp, filesize, seqcount)
 	struct buf *bp;
 	u_quad_t filesize;
 	int seqcount;
 {
 	struct vnode *vp;
 	daddr_t lbn;
 	int maxclen, cursize;
 	int lblocksize;
 	int async;
 
 	vp = bp->b_vp;
 	if (vp->v_type == VREG) {
 		async = vp->v_mount->mnt_flag & MNT_ASYNC;
 		lblocksize = vp->v_mount->mnt_stat.f_iosize;
 	} else {
 		async = 0;
 		lblocksize = bp->b_bufsize;
 	}
 	lbn = bp->b_lblkno;
 	KASSERT(bp->b_offset != NOOFFSET, ("cluster_write: no buffer offset"));
 
 	/* Initialize vnode to beginning of file. */
 	if (lbn == 0)
 		vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
 
 	if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 ||
 	    (bp->b_blkno != vp->v_lasta + btodb(lblocksize))) {
 		maxclen = vp->v_mount->mnt_iosize_max / lblocksize - 1;
 		if (vp->v_clen != 0) {
 			/*
 			 * Next block is not sequential.
 			 *
 			 * If we are not writing at end of file, the process
 			 * seeked to another point in the file since its last
 			 * write, or we have reached our maximum cluster size,
 			 * then push the previous cluster. Otherwise try
 			 * reallocating to make it sequential.
 			 *
 			 * Change to algorithm: only push previous cluster if
 			 * it was sequential from the point of view of the
 			 * seqcount heuristic, otherwise leave the buffer 
 			 * intact so we can potentially optimize the I/O
 			 * later on in the buf_daemon or update daemon
 			 * flush.
 			 */
 			cursize = vp->v_lastw - vp->v_cstart + 1;
 			if (((u_quad_t) bp->b_offset + lblocksize) != filesize ||
 			    lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) {
 				if (!async && seqcount > 0) {
 					cluster_wbuild_wb(vp, lblocksize,
 						vp->v_cstart, cursize);
 				}
 			} else {
 				struct buf **bpp, **endbp;
 				struct cluster_save *buflist;
 
 				buflist = cluster_collectbufs(vp, bp);
 				endbp = &buflist->bs_children
 				    [buflist->bs_nchildren - 1];
 				if (VOP_REALLOCBLKS(vp, buflist)) {
 					/*
 					 * Failed, push the previous cluster
 					 * if *really* writing sequentially
 					 * in the logical file (seqcount > 1),
 					 * otherwise delay it in the hopes that
 					 * the low level disk driver can
 					 * optimize the write ordering.
 					 */
 					for (bpp = buflist->bs_children;
 					     bpp < endbp; bpp++)
 						brelse(*bpp);
 					free(buflist, M_SEGMENT);
 					if (seqcount > 1) {
 						cluster_wbuild_wb(vp, 
 						    lblocksize, vp->v_cstart, 
 						    cursize);
 					}
 				} else {
 					/*
 					 * Succeeded, keep building cluster.
 					 */
 					for (bpp = buflist->bs_children;
 					     bpp <= endbp; bpp++)
 						bdwrite(*bpp);
 					free(buflist, M_SEGMENT);
 					vp->v_lastw = lbn;
 					vp->v_lasta = bp->b_blkno;
 					return;
 				}
 			}
 		}
 		/*
 		 * Consider beginning a cluster. If at end of file, make
 		 * cluster as large as possible, otherwise find size of
 		 * existing cluster.
 		 */
 		if ((vp->v_type == VREG) &&
 			((u_quad_t) bp->b_offset + lblocksize) != filesize &&
 		    (bp->b_blkno == bp->b_lblkno) &&
 		    (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen, NULL) ||
 		     bp->b_blkno == -1)) {
 			bawrite(bp);
 			vp->v_clen = 0;
 			vp->v_lasta = bp->b_blkno;
 			vp->v_cstart = lbn + 1;
 			vp->v_lastw = lbn;
 			return;
 		}
 		vp->v_clen = maxclen;
 		if (!async && maxclen == 0) {	/* I/O not contiguous */
 			vp->v_cstart = lbn + 1;
 			bawrite(bp);
 		} else {	/* Wait for rest of cluster */
 			vp->v_cstart = lbn;
 			bdwrite(bp);
 		}
 	} else if (lbn == vp->v_cstart + vp->v_clen) {
 		/*
 		 * At end of cluster, write it out if seqcount tells us we
 		 * are operating sequentially, otherwise let the buf or
 		 * update daemon handle it.
 		 */
 		bdwrite(bp);
 		if (seqcount > 1)
 			cluster_wbuild_wb(vp, lblocksize, vp->v_cstart, vp->v_clen + 1);
 		vp->v_clen = 0;
 		vp->v_cstart = lbn + 1;
 	} else if (vm_page_count_severe()) {
 		/*
 		 * We are low on memory, get it going NOW
 		 */
 		bawrite(bp);
 	} else {
 		/*
 		 * In the middle of a cluster, so just delay the I/O for now.
 		 */
 		bdwrite(bp);
 	}
 	vp->v_lastw = lbn;
 	vp->v_lasta = bp->b_blkno;
 }
 
 
 /*
  * This is an awful lot like cluster_rbuild...wish they could be combined.
  * The last lbn argument is the current block on which I/O is being
  * performed.  Check to see that it doesn't fall in the middle of
  * the current block (if last_bp == NULL).
  */
 int
 cluster_wbuild(vp, size, start_lbn, len)
 	struct vnode *vp;
 	long size;
 	daddr_t start_lbn;
 	int len;
 {
 	struct buf *bp, *tbp;
 	int i, j, s;
 	int totalwritten = 0;
 	int dbsize = btodb(size);
 
 	GIANT_REQUIRED;
 
 	while (len > 0) {
 		s = splbio();
 		/*
 		 * If the buffer is not delayed-write (i.e. dirty), or it
 		 * is delayed-write but either locked or inval, it cannot
 		 * partake in the clustered write.
 		 */
 		VI_LOCK(vp);
 		if ((tbp = gbincore(vp, start_lbn)) == NULL ||
 		    (tbp->b_vflags & BV_BKGRDINPROG)) {
 			VI_UNLOCK(vp);
 			++start_lbn;
 			--len;
 			splx(s);
 			continue;
 		}
 		if (BUF_LOCK(tbp,
 		    LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, VI_MTX(vp))) {
 			++start_lbn;
 			--len;
 			splx(s);
 			continue;
 		}
 		if ((tbp->b_flags & (B_INVAL | B_DELWRI)) != B_DELWRI) {
 			BUF_UNLOCK(tbp);
 			++start_lbn;
 			--len;
 			splx(s);
 			continue;
 		}
 		bremfree(tbp);
 		tbp->b_flags &= ~B_DONE;
 		splx(s);
 
 		/*
 		 * Extra memory in the buffer, punt on this buffer.
 		 * XXX we could handle this in most cases, but we would
 		 * have to push the extra memory down to after our max
 		 * possible cluster size and then potentially pull it back
 		 * up if the cluster was terminated prematurely--too much
 		 * hassle.
 		 */
 		if (((tbp->b_flags & (B_CLUSTEROK | B_MALLOC | B_VMIO)) != 
 		     (B_CLUSTEROK | B_VMIO)) ||
 		  (tbp->b_bcount != tbp->b_bufsize) ||
 		  (tbp->b_bcount != size) ||
 		  (len == 1) ||
 		  ((bp = getpbuf(&cluster_pbuf_freecnt)) == NULL)) {
 			totalwritten += tbp->b_bufsize;
 			bawrite(tbp);
 			++start_lbn;
 			--len;
 			continue;
 		}
 
 		/*
 		 * We got a pbuf to make the cluster in.
 		 * so initialise it.
 		 */
 		TAILQ_INIT(&bp->b_cluster.cluster_head);
 		bp->b_bcount = 0;
 		bp->b_magic = tbp->b_magic;
 		bp->b_op = tbp->b_op;
 		bp->b_bufsize = 0;
 		bp->b_npages = 0;
 		if (tbp->b_wcred != NOCRED)
 			bp->b_wcred = crhold(tbp->b_wcred);
 
 		bp->b_blkno = tbp->b_blkno;
 		bp->b_lblkno = tbp->b_lblkno;
 		bp->b_offset = tbp->b_offset;
 
 		/*
 		 * We are synthesizing a buffer out of vm_page_t's, but
 		 * if the block size is not page aligned then the starting
 		 * address may not be either.  Inherit the b_data offset
 		 * from the original buffer.
 		 */
 		bp->b_data = (char *)((vm_offset_t)bp->b_data |
 		    ((vm_offset_t)tbp->b_data & PAGE_MASK));
 		bp->b_flags |= B_CLUSTER |
 				(tbp->b_flags & (B_VMIO | B_NEEDCOMMIT));
 		bp->b_iodone = cluster_callback;
 		pbgetvp(vp, bp);
 		/*
 		 * From this location in the file, scan forward to see
 		 * if there are buffers with adjacent data that need to
 		 * be written as well.
 		 */
 		for (i = 0; i < len; ++i, ++start_lbn) {
 			if (i != 0) { /* If not the first buffer */
 				s = splbio();
 				/*
 				 * If the adjacent data is not even in core it
 				 * can't need to be written.
 				 */
 				VI_LOCK(vp);
 				if ((tbp = gbincore(vp, start_lbn)) == NULL ||
 				    (tbp->b_vflags & BV_BKGRDINPROG)) {
 					VI_UNLOCK(vp);
 					splx(s);
 					break;
 				}
 
 				/*
 				 * If it IS in core, but has different
 				 * characteristics, or is locked (which
 				 * means it could be undergoing a background
 				 * I/O or be in a weird state), then don't
 				 * cluster with it.
 				 */
 				if (BUF_LOCK(tbp,
 				    LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
 				    VI_MTX(vp))) {
 					splx(s);
 					break;
 				}
 
 				if ((tbp->b_flags & (B_VMIO | B_CLUSTEROK |
 				    B_INVAL | B_DELWRI | B_NEEDCOMMIT))
 				    != (B_DELWRI | B_CLUSTEROK |
 				    (bp->b_flags & (B_VMIO | B_NEEDCOMMIT))) ||
 				    tbp->b_wcred != bp->b_wcred) {
 					BUF_UNLOCK(tbp);
 					splx(s);
 					break;
 				}
 
 				/*
 				 * Check that the combined cluster
 				 * would make sense with regard to pages
 				 * and would not be too large
 				 */
 				if ((tbp->b_bcount != size) ||
 				  ((bp->b_blkno + (dbsize * i)) !=
 				    tbp->b_blkno) ||
 				  ((tbp->b_npages + bp->b_npages) >
 				    (vp->v_mount->mnt_iosize_max / PAGE_SIZE))) {
 					BUF_UNLOCK(tbp);
 					splx(s);
 					break;
 				}
 				/*
 				 * Ok, it's passed all the tests,
 				 * so remove it from the free list
 				 * and mark it busy. We will use it.
 				 */
 				bremfree(tbp);
 				tbp->b_flags &= ~B_DONE;
 				splx(s);
 			} /* end of code for non-first buffers only */
 			/* check for latent dependencies to be handled */
 			if ((LIST_FIRST(&tbp->b_dep)) != NULL) {
 				tbp->b_iocmd = BIO_WRITE;
 				buf_start(tbp);
 			}
 			/*
 			 * If the IO is via the VM then we do some
 			 * special VM hackery (yuck).  Since the buffer's
 			 * block size may not be page-aligned it is possible
 			 * for a page to be shared between two buffers.  We
 			 * have to get rid of the duplication when building
 			 * the cluster.
 			 */
 			if (tbp->b_flags & B_VMIO) {
 				vm_page_t m;
 
 				if (i != 0) { /* if not first buffer */
 					for (j = 0; j < tbp->b_npages; j += 1) {
 						m = tbp->b_pages[j];
 						if (m->flags & PG_BUSY) {
 							bqrelse(tbp);
 							goto finishcluster;
 						}
 					}
 				}
 				if (tbp->b_object != NULL)
 					VM_OBJECT_LOCK(tbp->b_object);
 				vm_page_lock_queues();
 				for (j = 0; j < tbp->b_npages; j += 1) {
 					m = tbp->b_pages[j];
 					vm_page_io_start(m);
 					vm_object_pip_add(m->object, 1);
 					if ((bp->b_npages == 0) ||
 					  (bp->b_pages[bp->b_npages - 1] != m)) {
 						bp->b_pages[bp->b_npages] = m;
 						bp->b_npages++;
 					}
 				}
 				vm_page_unlock_queues();
 				if (tbp->b_object != NULL)
 					VM_OBJECT_UNLOCK(tbp->b_object);
 			}
 			bp->b_bcount += size;
 			bp->b_bufsize += size;
 
 			s = splbio();
 			bundirty(tbp);
 			tbp->b_flags &= ~B_DONE;
 			tbp->b_ioflags &= ~BIO_ERROR;
 			tbp->b_flags |= B_ASYNC;
 			tbp->b_iocmd = BIO_WRITE;
 			reassignbuf(tbp, tbp->b_vp);	/* put on clean list */
 			VI_LOCK(tbp->b_vp);
 			++tbp->b_vp->v_numoutput;
 			VI_UNLOCK(tbp->b_vp);
 			splx(s);
 			BUF_KERNPROC(tbp);
 			TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
 				tbp, b_cluster.cluster_entry);
 		}
 	finishcluster:
 		pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
 			(vm_page_t *) bp->b_pages, bp->b_npages);
 		if (bp->b_bufsize > bp->b_kvasize)
 			panic(
 			    "cluster_wbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
 			    bp->b_bufsize, bp->b_kvasize);
 		bp->b_kvasize = bp->b_bufsize;
 		totalwritten += bp->b_bufsize;
 		bp->b_dirtyoff = 0;
 		bp->b_dirtyend = bp->b_bufsize;
 		bawrite(bp);
 
 		len -= i;
 	}
 	return totalwritten;
 }
 
 /*
  * Collect together all the buffers in a cluster.
  * Plus add one additional buffer.
  */
 static struct cluster_save *
 cluster_collectbufs(vp, last_bp)
 	struct vnode *vp;
 	struct buf *last_bp;
 {
 	struct cluster_save *buflist;
 	struct buf *bp;
 	daddr_t lbn;
 	int i, len;
 
 	len = vp->v_lastw - vp->v_cstart + 1;
 	buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist),
 	    M_SEGMENT, M_WAITOK);
 	buflist->bs_nchildren = 0;
 	buflist->bs_children = (struct buf **) (buflist + 1);
 	for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) {
 		(void) bread(vp, lbn, last_bp->b_bcount, NOCRED, &bp);
 		buflist->bs_children[i] = bp;
 		if (bp->b_blkno == bp->b_lblkno)
 			VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno,
 				NULL, NULL);
 	}
 	buflist->bs_children[i] = bp = last_bp;
 	if (bp->b_blkno == bp->b_lblkno)
 		VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno,
 			NULL, NULL);
 	buflist->bs_nchildren = i + 1;
 	return (buflist);
 }
Index: head/sys/kern/vfs_extattr.c
===================================================================
--- head/sys/kern/vfs_extattr.c	(revision 122536)
+++ head/sys/kern/vfs_extattr.c	(revision 122537)
@@ -1,4508 +1,4791 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /* For 4.3 integer FS ID compatibility */
 #include "opt_compat.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/sysent.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/sysproto.h>
 #include <sys/namei.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/proc.h>
 #include <sys/dirent.h>
 #include <sys/extattr.h>
 #include <sys/jail.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 
 #include <machine/stdarg.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/uma.h>
 
 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
 static int setfmode(struct thread *td, struct vnode *, int);
 static int setfflags(struct thread *td, struct vnode *, int);
 static int setutimes(struct thread *td, struct vnode *,
     const struct timespec *, int, int);
 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
     struct thread *td);
 
 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
     size_t nbytes, struct thread *td);
 
 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
 int (*softdep_fsync_hook)(struct vnode *);
 
 /*
  * The module initialization routine for POSIX asynchronous I/O will
  * set this to the version of AIO that it implements.  (Zero means
  * that it is not implemented.)  This value is used here by pathconf()
  * and in kern_descrip.c by fpathconf().
  */
 int async_io_version;
 
 /*
  * Sync each mounted filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct sync_args {
         int     dummy;
 };
 #endif
 
 #ifdef DEBUG
 static int syncprt = 0;
 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
 #endif
 
 /* ARGSUSED */
 int
 sync(td, uap)
 	struct thread *td;
 	struct sync_args *uap;
 {
 	struct mount *mp, *nmp;
 	int asyncflag;
 
 	mtx_lock(&mountlist_mtx);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
 			asyncflag = mp->mnt_flag & MNT_ASYNC;
 			mp->mnt_flag &= ~MNT_ASYNC;
 			vfs_msync(mp, MNT_NOWAIT);
 			VFS_SYNC(mp, MNT_NOWAIT,
 			    ((td != NULL) ? td->td_ucred : NOCRED), td);
 			mp->mnt_flag |= asyncflag;
 			vn_finished_write(mp);
 		}
 		mtx_lock(&mountlist_mtx);
 		nmp = TAILQ_NEXT(mp, mnt_list);
 		vfs_unbusy(mp, td);
 	}
 	mtx_unlock(&mountlist_mtx);
 #if 0
 /*
  * XXX don't call vfs_bufstats() yet because that routine
  * was not imported in the Lite2 merge.
  */
 #ifdef DIAGNOSTIC
 	if (syncprt)
 		vfs_bufstats();
 #endif /* DIAGNOSTIC */
 #endif
 	return (0);
 }
 
 /* XXX PRISON: could be per prison flag */
 static int prison_quotas;
 #if 0
 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
 #endif
 
 /*
  * Change filesystem quotas.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct quotactl_args {
 	char *path;
 	int cmd;
 	int uid;
 	caddr_t arg;
 };
 #endif
 /* ARGSUSED */
 int
 quotactl(td, uap)
 	struct thread *td;
 	register struct quotactl_args /* {
 		char *path;
 		int cmd;
 		int uid;
 		caddr_t arg;
 	} */ *uap;
 {
 	struct mount *mp;
 	int error;
 	struct nameidata nd;
 
 	if (jailed(td->td_ucred) && !prison_quotas)
 		return (EPERM);
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
 	vrele(nd.ni_vp);
 	if (error)
 		return (error);
 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct statfs_args {
 	char *path;
 	struct statfs *buf;
 };
 #endif
 /* ARGSUSED */
 int
 statfs(td, uap)
 	struct thread *td;
 	register struct statfs_args /* {
 		char *path;
 		struct statfs *buf;
 	} */ *uap;
 {
-	register struct mount *mp;
-	register struct statfs *sp;
+	struct mount *mp;
+	struct statfs *sp, sb;
 	int error;
 	struct nameidata nd;
-	struct statfs sb;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	mp = nd.ni_vp->v_mount;
 	sp = &mp->mnt_stat;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vrele(nd.ni_vp);
 #ifdef MAC
 	error = mac_check_mount_stat(td->td_ucred, mp);
 	if (error)
 		return (error);
 #endif
+	/*
+	 * Set these in case the underlying filesystem fails to do so.
+	 */
+	sp->f_version = STATFS_VERSION;
+	sp->f_namemax = NAME_MAX;
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	error = VFS_STATFS(mp, sp, td);
 	if (error)
 		return (error);
-	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	if (suser(td)) {
 		bcopy(sp, &sb, sizeof(sb));
 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
 		sp = &sb;
 	}
 	return (copyout(sp, uap->buf, sizeof(*sp)));
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fstatfs_args {
 	int fd;
 	struct statfs *buf;
 };
 #endif
 /* ARGSUSED */
 int
 fstatfs(td, uap)
 	struct thread *td;
 	register struct fstatfs_args /* {
 		int fd;
 		struct statfs *buf;
 	} */ *uap;
 {
 	struct file *fp;
 	struct mount *mp;
-	register struct statfs *sp;
+	struct statfs *sp, sb;
 	int error;
-	struct statfs sb;
 
 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
 	mp = fp->f_vnode->v_mount;
 	fdrop(fp, td);
 	if (mp == NULL)
 		return (EBADF);
 #ifdef MAC
 	error = mac_check_mount_stat(td->td_ucred, mp);
 	if (error)
 		return (error);
 #endif
 	sp = &mp->mnt_stat;
+	/*
+	 * Set these in case the underlying filesystem fails to do so.
+	 */
+	sp->f_version = STATFS_VERSION;
+	sp->f_namemax = NAME_MAX;
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	error = VFS_STATFS(mp, sp, td);
 	if (error)
 		return (error);
-	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	if (suser(td)) {
 		bcopy(sp, &sb, sizeof(sb));
 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
 		sp = &sb;
 	}
 	return (copyout(sp, uap->buf, sizeof(*sp)));
 }
 
 /*
  * Get statistics on all filesystems.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getfsstat_args {
 	struct statfs *buf;
 	long bufsize;
 	int flags;
 };
 #endif
 int
 getfsstat(td, uap)
 	struct thread *td;
 	register struct getfsstat_args /* {
 		struct statfs *buf;
 		long bufsize;
 		int flags;
 	} */ *uap;
 {
-	register struct mount *mp, *nmp;
-	register struct statfs *sp;
+	struct mount *mp, *nmp;
+	struct statfs *sp, sb;
 	caddr_t sfsp;
 	long count, maxcount, error;
 
 	maxcount = uap->bufsize / sizeof(struct statfs);
 	sfsp = (caddr_t)uap->buf;
 	count = 0;
 	mtx_lock(&mountlist_mtx);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 #ifdef MAC
 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 #endif
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 		if (sfsp && count < maxcount) {
 			sp = &mp->mnt_stat;
 			/*
+			 * Set these in case the underlying filesystem
+			 * fails to do so.
+			 */
+			sp->f_version = STATFS_VERSION;
+			sp->f_namemax = NAME_MAX;
+			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+			/*
 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
 			 * overrides MNT_WAIT.
 			 */
 			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
 			    (uap->flags & MNT_WAIT)) &&
 			    (error = VFS_STATFS(mp, sp, td))) {
 				mtx_lock(&mountlist_mtx);
 				nmp = TAILQ_NEXT(mp, mnt_list);
 				vfs_unbusy(mp, td);
 				continue;
 			}
-			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+			if (suser(td)) {
+				bcopy(sp, &sb, sizeof(sb));
+				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
+				sp = &sb;
+			}
 			error = copyout(sp, sfsp, sizeof(*sp));
 			if (error) {
 				vfs_unbusy(mp, td);
 				return (error);
 			}
 			sfsp += sizeof(*sp);
 		}
 		count++;
 		mtx_lock(&mountlist_mtx);
 		nmp = TAILQ_NEXT(mp, mnt_list);
 		vfs_unbusy(mp, td);
 	}
 	mtx_unlock(&mountlist_mtx);
 	if (sfsp && count > maxcount)
 		td->td_retval[0] = maxcount;
 	else
 		td->td_retval[0] = count;
 	return (0);
 }
 
+#ifdef COMPAT_FREEBSD4
 /*
+ * Get old format filesystem statistics.
+ */
+static void cvtstatfs(struct thread *, struct statfs *, struct ostatfs *);
+
+#ifndef _SYS_SYSPROTO_H_
+struct freebsd4_statfs_args {
+	char *path;
+	struct ostatfs *buf;
+};
+#endif
+/* ARGSUSED */
+int
+freebsd4_statfs(td, uap)
+	struct thread *td;
+	struct freebsd4_statfs_args /* {
+		char *path;
+		struct ostatfs *buf;
+	} */ *uap;
+{
+	struct mount *mp;
+	struct statfs *sp;
+	struct ostatfs osb;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
+	if ((error = namei(&nd)) != 0)
+		return (error);
+	mp = nd.ni_vp->v_mount;
+	sp = &mp->mnt_stat;
+	NDFREE(&nd, NDF_ONLY_PNBUF);
+	vrele(nd.ni_vp);
+#ifdef MAC
+	error = mac_check_mount_stat(td->td_ucred, mp);
+	if (error)
+		return (error);
+#endif
+	error = VFS_STATFS(mp, sp, td);
+	if (error)
+		return (error);
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+	cvtstatfs(td, sp, &osb);
+	return (copyout(&osb, uap->buf, sizeof(osb)));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct freebsd4_fstatfs_args {
+	int fd;
+	struct ostatfs *buf;
+};
+#endif
+/* ARGSUSED */
+int
+freebsd4_fstatfs(td, uap)
+	struct thread *td;
+	struct freebsd4_fstatfs_args /* {
+		int fd;
+		struct ostatfs *buf;
+	} */ *uap;
+{
+	struct file *fp;
+	struct mount *mp;
+	struct statfs *sp;
+	struct ostatfs osb;
+	int error;
+
+	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
+		return (error);
+	mp = fp->f_vnode->v_mount;
+	fdrop(fp, td);
+	if (mp == NULL)
+		return (EBADF);
+#ifdef MAC
+	error = mac_check_mount_stat(td->td_ucred, mp);
+	if (error)
+		return (error);
+#endif
+	sp = &mp->mnt_stat;
+	error = VFS_STATFS(mp, sp, td);
+	if (error)
+		return (error);
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+	cvtstatfs(td, sp, &osb);
+	return (copyout(&osb, uap->buf, sizeof(osb)));
+}
+
+/*
+ * Get statistics on all filesystems.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct freebsd4_getfsstat_args {
+	struct ostatfs *buf;
+	long bufsize;
+	int flags;
+};
+#endif
+int
+freebsd4_getfsstat(td, uap)
+	struct thread *td;
+	register struct freebsd4_getfsstat_args /* {
+		struct ostatfs *buf;
+		long bufsize;
+		int flags;
+	} */ *uap;
+{
+	struct mount *mp, *nmp;
+	struct statfs *sp;
+	struct ostatfs osb;
+	caddr_t sfsp;
+	long count, maxcount, error;
+
+	maxcount = uap->bufsize / sizeof(struct ostatfs);
+	sfsp = (caddr_t)uap->buf;
+	count = 0;
+	mtx_lock(&mountlist_mtx);
+	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
+#ifdef MAC
+		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
+			nmp = TAILQ_NEXT(mp, mnt_list);
+			continue;
+		}
+#endif
+		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
+			nmp = TAILQ_NEXT(mp, mnt_list);
+			continue;
+		}
+		if (sfsp && count < maxcount) {
+			sp = &mp->mnt_stat;
+			/*
+			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
+			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
+			 * overrides MNT_WAIT.
+			 */
+			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
+			    (uap->flags & MNT_WAIT)) &&
+			    (error = VFS_STATFS(mp, sp, td))) {
+				mtx_lock(&mountlist_mtx);
+				nmp = TAILQ_NEXT(mp, mnt_list);
+				vfs_unbusy(mp, td);
+				continue;
+			}
+			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+			cvtstatfs(td, sp, &osb);
+			error = copyout(&osb, sfsp, sizeof(osb));
+			if (error) {
+				vfs_unbusy(mp, td);
+				return (error);
+			}
+			sfsp += sizeof(osb);
+		}
+		count++;
+		mtx_lock(&mountlist_mtx);
+		nmp = TAILQ_NEXT(mp, mnt_list);
+		vfs_unbusy(mp, td);
+	}
+	mtx_unlock(&mountlist_mtx);
+	if (sfsp && count > maxcount)
+		td->td_retval[0] = maxcount;
+	else
+		td->td_retval[0] = count;
+	return (0);
+}
+
+/*
+ * Implement fstatfs() for (NFS) file handles.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct freebsd4_fhstatfs_args {
+	struct fhandle *u_fhp;
+	struct ostatfs *buf;
+};
+#endif
+int
+freebsd4_fhstatfs(td, uap)
+	struct thread *td;
+	struct freebsd4_fhstatfs_args /* {
+		struct fhandle *u_fhp;
+		struct ostatfs *buf;
+	} */ *uap;
+{
+	struct statfs *sp;
+	struct mount *mp;
+	struct vnode *vp;
+	struct ostatfs osb;
+	fhandle_t fh;
+	int error;
+
+	/*
+	 * Must be super user
+	 */
+	error = suser(td);
+	if (error)
+		return (error);
+
+	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
+		return (error);
+
+	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
+		return (error);
+	mp = vp->v_mount;
+	sp = &mp->mnt_stat;
+	vput(vp);
+#ifdef MAC
+	error = mac_check_mount_stat(td->td_ucred, mp);
+	if (error)
+		return (error);
+#endif
+	if ((error = VFS_STATFS(mp, sp, td)) != 0)
+		return (error);
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+	cvtstatfs(td, sp, &osb);
+	return (copyout(&osb, uap->buf, sizeof(osb)));
+}
+
+/*
+ * Convert a new format statfs structure to an old format statfs structure.
+ */
+static void
+cvtstatfs(td, nsp, osp)
+	struct thread *td;
+	struct statfs *nsp;
+	struct ostatfs *osp;
+{
+
+	bzero(osp, sizeof(*osp));
+	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
+	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
+	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
+	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
+	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
+	osp->f_files = MIN(nsp->f_files, LONG_MAX);
+	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
+	osp->f_owner = nsp->f_owner;
+	osp->f_type = nsp->f_type;
+	osp->f_flags = nsp->f_flags;
+	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
+	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
+	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
+	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
+	bcopy(nsp->f_fstypename, osp->f_fstypename,
+	    MIN(MFSNAMELEN, OMNAMELEN));
+	bcopy(nsp->f_mntonname, osp->f_mntonname,
+	    MIN(MFSNAMELEN, OMNAMELEN));
+	bcopy(nsp->f_mntfromname, osp->f_mntfromname,
+	    MIN(MFSNAMELEN, OMNAMELEN));
+	if (suser(td)) {
+		osp->f_fsid.val[0] = osp->f_fsid.val[1] = 0;
+	} else {
+		osp->f_fsid = nsp->f_fsid;
+	}
+}
+#endif /* COMPAT_FREEBSD4 */
+
+/*
  * Change current working directory to a given file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchdir_args {
 	int	fd;
 };
 #endif
 /* ARGSUSED */
 int
 fchdir(td, uap)
 	struct thread *td;
 	struct fchdir_args /* {
 		int fd;
 	} */ *uap;
 {
 	register struct filedesc *fdp = td->td_proc->p_fd;
 	struct vnode *vp, *tdp, *vpold;
 	struct mount *mp;
 	struct file *fp;
 	int error;
 
 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
 		return (error);
 	vp = fp->f_vnode;
 	VREF(vp);
 	fdrop(fp, td);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	if (vp->v_type != VDIR)
 		error = ENOTDIR;
 #ifdef MAC
 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
 	}
 #endif
 	else
 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
 	while (!error && (mp = vp->v_mountedhere) != NULL) {
 		if (vfs_busy(mp, 0, 0, td))
 			continue;
 		error = VFS_ROOT(mp, &tdp);
 		vfs_unbusy(mp, td);
 		if (error)
 			break;
 		vput(vp);
 		vp = tdp;
 	}
 	if (error) {
 		vput(vp);
 		return (error);
 	}
 	VOP_UNLOCK(vp, 0, td);
 	FILEDESC_LOCK(fdp);
 	vpold = fdp->fd_cdir;
 	fdp->fd_cdir = vp;
 	FILEDESC_UNLOCK(fdp);
 	vrele(vpold);
 	return (0);
 }
 
 /*
  * Change current working directory (``.'').
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chdir_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 chdir(td, uap)
 	struct thread *td;
 	struct chdir_args /* {
 		char *path;
 	} */ *uap;
 {
 
 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
 }
 
 int
 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
 {
 	register struct filedesc *fdp = td->td_proc->p_fd;
 	int error;
 	struct nameidata nd;
 	struct vnode *vp;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
 		vput(nd.ni_vp);
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		return (error);
 	}
 	VOP_UNLOCK(nd.ni_vp, 0, td);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	FILEDESC_LOCK(fdp);
 	vp = fdp->fd_cdir;
 	fdp->fd_cdir = nd.ni_vp;
 	FILEDESC_UNLOCK(fdp);
 	vrele(vp);
 	return (0);
 }
 
 /*
  * Helper function for raised chroot(2) security function:  Refuse if
  * any filedescriptors are open directories.
  */
 static int
 chroot_refuse_vdir_fds(fdp)
 	struct filedesc *fdp;
 {
 	struct vnode *vp;
 	struct file *fp;
 	int fd;
 
 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
 		fp = fget_locked(fdp, fd);
 		if (fp == NULL)
 			continue;
 		if (fp->f_type == DTYPE_VNODE) {
 			vp = fp->f_vnode;
 			if (vp->v_type == VDIR)
 				return (EPERM);
 		}
 	}
 	return (0);
 }
 
 /*
  * This sysctl determines if we will allow a process to chroot(2) if it
  * has a directory open:
  *	0: disallowed for all processes.
  *	1: allowed for processes that were not already chroot(2)'ed.
  *	2: allowed for all processes.
  */
 
 static int chroot_allow_open_directories = 1;
 
 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
      &chroot_allow_open_directories, 0, "");
 
 /*
  * Change notion of root (``/'') directory.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chroot_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 chroot(td, uap)
 	struct thread *td;
 	struct chroot_args /* {
 		char *path;
 	} */ *uap;
 {
 	int error;
 	struct nameidata nd;
 
 	error = suser_cred(td->td_ucred, PRISON_ROOT);
 	if (error)
 		return (error);
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
 	mtx_lock(&Giant);
 	error = namei(&nd);
 	if (error)
 		goto error;
 	if ((error = change_dir(nd.ni_vp, td)) != 0)
 		goto e_vunlock;
 #ifdef MAC
 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
 		goto e_vunlock;
 #endif
 	VOP_UNLOCK(nd.ni_vp, 0, td);
 	error = change_root(nd.ni_vp, td);
 	vrele(nd.ni_vp);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	mtx_unlock(&Giant);
 	return (error);
 e_vunlock:
 	vput(nd.ni_vp);
 error:
 	mtx_unlock(&Giant);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	return (error);
 }
 
 /*
  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  * instance.
  */
 int
 change_dir(vp, td)
 	struct vnode *vp;
 	struct thread *td;
 {
 	int error;
 
 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
 	if (vp->v_type != VDIR)
 		return (ENOTDIR);
 #ifdef MAC
 	error = mac_check_vnode_chdir(td->td_ucred, vp);
 	if (error)
 		return (error);
 #endif
 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
 	return (error);
 }
 
 /*
  * Common routine for kern_chroot() and jail_attach().  The caller is
  * responsible for invoking suser() and mac_check_chroot() to authorize this
  * operation.
  */
 int
 change_root(vp, td)
 	struct vnode *vp;
 	struct thread *td;
 {
 	struct filedesc *fdp;
 	struct vnode *oldvp;
 	int error;
 
 	mtx_assert(&Giant, MA_OWNED);
 	fdp = td->td_proc->p_fd;
 	FILEDESC_LOCK(fdp);
 	if (chroot_allow_open_directories == 0 ||
 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
 		error = chroot_refuse_vdir_fds(fdp);
 		if (error) {
 			FILEDESC_UNLOCK(fdp);
 			return (error);
 		}
 	}
 	oldvp = fdp->fd_rdir;
 	fdp->fd_rdir = vp;
 	VREF(fdp->fd_rdir);
 	if (!fdp->fd_jdir) {
 		fdp->fd_jdir = vp;
 		VREF(fdp->fd_jdir);
 	}
 	FILEDESC_UNLOCK(fdp);
 	vrele(oldvp);
 	return (0);
 }
 
 /*
  * Check permissions, allocate an open file structure,
  * and call the device open routine if any.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct open_args {
 	char	*path;
 	int	flags;
 	int	mode;
 };
 #endif
 int
 open(td, uap)
 	struct thread *td;
 	register struct open_args /* {
 		char *path;
 		int flags;
 		int mode;
 	} */ *uap;
 {
 
 	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
 }
 
 int
 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
     int mode)
 {
 	struct proc *p = td->td_proc;
 	struct filedesc *fdp = p->p_fd;
 	struct file *fp;
 	struct vnode *vp;
 	struct vattr vat;
 	struct mount *mp;
 	int cmode;
 	struct file *nfp;
 	int type, indx, error;
 	struct flock lf;
 	struct nameidata nd;
 
 	if ((flags & O_ACCMODE) == O_ACCMODE)
 		return (EINVAL);
 	flags = FFLAGS(flags);
 	error = falloc(td, &nfp, &indx);
 	if (error)
 		return (error);
 	/* An extra reference on `nfp' has been held for us by falloc(). */
 	fp = nfp;
 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
 	td->td_dupfd = -1;		/* XXX check for fdopen */
 	error = vn_open(&nd, &flags, cmode, indx);
 	if (error) {
 
 		/*
 		 * If the vn_open replaced the method vector, something
 		 * wonderous happened deep below and we just pass it up
 		 * pretending we know what we do.
 		 */
 		if (error == ENXIO && fp->f_ops != &badfileops) {
 			fdrop(fp, td);
 			td->td_retval[0] = indx;
 			return (0);
 		}
 
 		/*
 		 * release our own reference
 		 */
 		fdrop(fp, td);
 
 		/*
 		 * handle special fdopen() case.  bleh.  dupfdopen() is
 		 * responsible for dropping the old contents of ofiles[indx]
 		 * if it succeeds.
 		 */
 		if ((error == ENODEV || error == ENXIO) &&
 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
 		    (error =
 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
 			td->td_retval[0] = indx;
 			return (0);
 		}
 		/*
 		 * Clean up the descriptor, but only if another thread hadn't
 		 * replaced or closed it.
 		 */
 		FILEDESC_LOCK(fdp);
 		if (fdp->fd_ofiles[indx] == fp) {
 			fdp->fd_ofiles[indx] = NULL;
 			FILEDESC_UNLOCK(fdp);
 			fdrop(fp, td);
 		} else
 			FILEDESC_UNLOCK(fdp);
 
 		if (error == ERESTART)
 			error = EINTR;
 		return (error);
 	}
 	td->td_dupfd = 0;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 
 	/*
 	 * There should be 2 references on the file, one from the descriptor
 	 * table, and one for us.
 	 *
 	 * Handle the case where someone closed the file (via its file
 	 * descriptor) while we were blocked.  The end result should look
 	 * like opening the file succeeded but it was immediately closed.
 	 */
 	FILEDESC_LOCK(fdp);
 	FILE_LOCK(fp);
 	if (fp->f_count == 1) {
 		KASSERT(fdp->fd_ofiles[indx] != fp,
 		    ("Open file descriptor lost all refs"));
 		FILEDESC_UNLOCK(fdp);
 		FILE_UNLOCK(fp);
 		VOP_UNLOCK(vp, 0, td);
 		vn_close(vp, flags & FMASK, fp->f_cred, td);
 		fdrop(fp, td);
 		td->td_retval[0] = indx;
 		return 0;
 	}
 	fp->f_vnode = vp;
 	fp->f_data = vp;
 	fp->f_flag = flags & FMASK;
 	fp->f_ops = &vnops;
 	fp->f_seqcount = 1;
 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
 	FILEDESC_UNLOCK(fdp);
 	FILE_UNLOCK(fp);
 
 	/* assert that vn_open created a backing object if one is needed */
 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
 		("open: vmio vnode has no backing object after vn_open"));
 
 	VOP_UNLOCK(vp, 0, td);
 	if (flags & (O_EXLOCK | O_SHLOCK)) {
 		lf.l_whence = SEEK_SET;
 		lf.l_start = 0;
 		lf.l_len = 0;
 		if (flags & O_EXLOCK)
 			lf.l_type = F_WRLCK;
 		else
 			lf.l_type = F_RDLCK;
 		type = F_FLOCK;
 		if ((flags & FNONBLOCK) == 0)
 			type |= F_WAIT;
 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 			    type)) != 0)
 			goto bad;
 		fp->f_flag |= FHASLOCK;
 	}
 	if (flags & O_TRUNC) {
 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 			goto bad;
 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 		VATTR_NULL(&vat);
 		vat.va_size = 0;
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 #ifdef MAC
 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
 		if (error == 0)
 #endif
 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
 		VOP_UNLOCK(vp, 0, td);
 		vn_finished_write(mp);
 		if (error)
 			goto bad;
 	}
 	/*
 	 * Release our private reference, leaving the one associated with
 	 * the descriptor table intact.
 	 */
 	fdrop(fp, td);
 	td->td_retval[0] = indx;
 	return (0);
 bad:
 	FILEDESC_LOCK(fdp);
 	if (fdp->fd_ofiles[indx] == fp) {
 		fdp->fd_ofiles[indx] = NULL;
 		FILEDESC_UNLOCK(fdp);
 		fdrop(fp, td);
 	} else
 		FILEDESC_UNLOCK(fdp);
 	fdrop(fp, td);
 	return (error);
 }
 
 #ifdef COMPAT_43
 /*
  * Create a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ocreat_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 ocreat(td, uap)
 	struct thread *td;
 	register struct ocreat_args /* {
 		char *path;
 		int mode;
 	} */ *uap;
 {
 	struct open_args /* {
 		char *path;
 		int flags;
 		int mode;
 	} */ nuap;
 
 	nuap.path = uap->path;
 	nuap.mode = uap->mode;
 	nuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
 	return (open(td, &nuap));
 }
 #endif /* COMPAT_43 */
 
 /*
  * Create a special file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mknod_args {
 	char	*path;
 	int	mode;
 	int	dev;
 };
 #endif
 /* ARGSUSED */
 int
 mknod(td, uap)
 	struct thread *td;
 	register struct mknod_args /* {
 		char *path;
 		int mode;
 		int dev;
 	} */ *uap;
 {
 
 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
 }
 
 int
 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
     int dev)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct vattr vattr;
 	int error;
 	int whiteout = 0;
 	struct nameidata nd;
 
 	switch (mode & S_IFMT) {
 	case S_IFCHR:
 	case S_IFBLK:
 		error = suser(td);
 		break;
 	default:
 		error = suser_cred(td->td_ucred, PRISON_ROOT);
 		break;
 	}
 	if (error)
 		return (error);
 restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	if (vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vrele(vp);
 		if (vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		return (EEXIST);
 	} else {
 		VATTR_NULL(&vattr);
 		FILEDESC_LOCK(td->td_proc->p_fd);
 		vattr.va_mode = (mode & ALLPERMS) &
 		    ~td->td_proc->p_fd->fd_cmask;
 		FILEDESC_UNLOCK(td->td_proc->p_fd);
 		vattr.va_rdev = dev;
 		whiteout = 0;
 
 		switch (mode & S_IFMT) {
 		case S_IFMT:	/* used by badsect to flag bad sectors */
 			vattr.va_type = VBAD;
 			break;
 		case S_IFCHR:
 			vattr.va_type = VCHR;
 			break;
 		case S_IFBLK:
 			vattr.va_type = VBLK;
 			break;
 		case S_IFWHT:
 			whiteout = 1;
 			break;
 		default:
 			error = EINVAL;
 			break;
 		}
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 #ifdef MAC
 	if (error == 0 && !whiteout)
 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
 		    &nd.ni_cnd, &vattr);
 #endif
 	if (!error) {
 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 		if (whiteout)
 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 		else {
 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 						&nd.ni_cnd, &vattr);
 			if (error == 0)
 				vput(nd.ni_vp);
 		}
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
 	return (error);
 }
 
 /*
  * Create a named pipe.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mkfifo_args {
 	char	*path;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 mkfifo(td, uap)
 	struct thread *td;
 	register struct mkfifo_args /* {
 		char *path;
 		int mode;
 	} */ *uap;
 {
 
 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
 }
 
 int
 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	if (nd.ni_vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vrele(nd.ni_vp);
 		if (nd.ni_vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		return (EEXIST);
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VFIFO;
 	FILEDESC_LOCK(td->td_proc->p_fd);
 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 	FILEDESC_UNLOCK(td->td_proc->p_fd);
 #ifdef MAC
 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 	if (error)
 		goto out;
 #endif
 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 	if (error == 0)
 		vput(nd.ni_vp);
 #ifdef MAC
 out:
 #endif
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Make a hard file link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct link_args {
 	char	*path;
 	char	*link;
 };
 #endif
 /* ARGSUSED */
 int
 link(td, uap)
 	struct thread *td;
 	register struct link_args /* {
 		char *path;
 		char *link;
 	} */ *uap;
 {
 
 	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
 }
 
 int
 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct nameidata nd;
 	int error;
 
 	bwillwrite();
 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 	if (vp->v_type == VDIR) {
 		vrele(vp);
 		return (EPERM);		/* POSIX */
 	}
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 		vrele(vp);
 		return (error);
 	}
 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
 	if ((error = namei(&nd)) == 0) {
 		if (nd.ni_vp != NULL) {
 			vrele(nd.ni_vp);
 			if (nd.ni_dvp == nd.ni_vp)
 				vrele(nd.ni_dvp);
 			else
 				vput(nd.ni_dvp);
 			error = EEXIST;
 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
 		    == 0) {
 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 #ifdef MAC
 			error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp,
 			    vp, &nd.ni_cnd);
 			if (error == 0)
 #endif
 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 			VOP_UNLOCK(vp, 0, td);
 			vput(nd.ni_dvp);
 		}
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 	}
 	vrele(vp);
 	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
 	return (error);
 }
 
 /*
  * Make a symbolic link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct symlink_args {
 	char	*path;
 	char	*link;
 };
 #endif
 /* ARGSUSED */
 int
 symlink(td, uap)
 	struct thread *td;
 	register struct symlink_args /* {
 		char *path;
 		char *link;
 	} */ *uap;
 {
 
 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
 }
 
 int
 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	char *syspath;
 	int error;
 	struct nameidata nd;
 
 	if (segflg == UIO_SYSSPACE) {
 		syspath = path;
 	} else {
 		syspath = uma_zalloc(namei_zone, M_WAITOK);
 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
 			goto out;
 	}
 restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
 	if ((error = namei(&nd)) != 0)
 		goto out;
 	if (nd.ni_vp) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vrele(nd.ni_vp);
 		if (nd.ni_vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		error = EEXIST;
 		goto out;
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	FILEDESC_LOCK(td->td_proc->p_fd);
 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 	FILEDESC_UNLOCK(td->td_proc->p_fd);
 #ifdef MAC
 	vattr.va_type = VLNK;
 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 	if (error)
 		goto out2;
 #endif
 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 	if (error == 0)
 		vput(nd.ni_vp);
 #ifdef MAC
 out2:
 #endif
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
 out:
 	if (segflg != UIO_SYSSPACE)
 		uma_zfree(namei_zone, syspath);
 	return (error);
 }
 
 /*
  * Delete a whiteout from the filesystem.
  */
 /* ARGSUSED */
 int
 undelete(td, uap)
 	struct thread *td;
 	register struct undelete_args /* {
 		char *path;
 	} */ *uap;
 {
 	int error;
 	struct mount *mp;
 	struct nameidata nd;
 
 restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
 	    uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return (error);
 
 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (nd.ni_vp)
 			vrele(nd.ni_vp);
 		if (nd.ni_vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		return (EEXIST);
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
 	return (error);
 }
 
 /*
  * Delete a name from the filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct unlink_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 unlink(td, uap)
 	struct thread *td;
 	struct unlink_args /* {
 		char *path;
 	} */ *uap;
 {
 
 	return (kern_unlink(td, uap->path, UIO_USERSPACE));
 }
 
 int
 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
 restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	if (vp->v_type == VDIR)
 		error = EPERM;		/* POSIX */
 	else {
 		/*
 		 * The root of a mounted filesystem cannot be deleted.
 		 *
 		 * XXX: can this only be a VDIR case?
 		 */
 		if (vp->v_vflag & VV_ROOT)
 			error = EBUSY;
 	}
 	if (error == 0) {
 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 			if (vp == nd.ni_dvp)
 				vrele(vp);
 			else
 				vput(vp);
 			vput(nd.ni_dvp);
 			if ((error = vn_start_write(NULL, &mp,
 			    V_XSLEEP | PCATCH)) != 0)
 				return (error);
 			goto restart;
 		}
 #ifdef MAC
 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
 		    &nd.ni_cnd);
 		if (error)
 			goto out;
 #endif
 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 #ifdef MAC
 out:
 #endif
 		vn_finished_write(mp);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (vp == nd.ni_dvp)
 		vrele(vp);
 	else
 		vput(vp);
 	vput(nd.ni_dvp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
 	return (error);
 }
 
 /*
  * Reposition read/write file offset.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lseek_args {
 	int	fd;
 	int	pad;
 	off_t	offset;
 	int	whence;
 };
 #endif
 int
 lseek(td, uap)
 	struct thread *td;
 	register struct lseek_args /* {
 		int fd;
 		int pad;
 		off_t offset;
 		int whence;
 	} */ *uap;
 {
 	struct ucred *cred = td->td_ucred;
 	struct file *fp;
 	struct vnode *vp;
 	struct vattr vattr;
 	off_t offset;
 	int error, noneg;
 
 	if ((error = fget(td, uap->fd, &fp)) != 0)
 		return (error);
 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
 		fdrop(fp, td);
 		return (ESPIPE);
 	}
 	vp = fp->f_vnode;
 	noneg = (vp->v_type != VCHR);
 	offset = uap->offset;
 	switch (uap->whence) {
 	case L_INCR:
 		if (noneg &&
 		    (fp->f_offset < 0 ||
 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
 			error = EOVERFLOW;
 			break;
 		}
 		offset += fp->f_offset;
 		break;
 	case L_XTND:
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 		error = VOP_GETATTR(vp, &vattr, cred, td);
 		VOP_UNLOCK(vp, 0, td);
 		if (error)
 			break;
 		if (noneg &&
 		    (vattr.va_size > OFF_MAX ||
 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
 			error = EOVERFLOW;
 			break;
 		}
 		offset += vattr.va_size;
 		break;
 	case L_SET:
 		break;
 	default:
 		error = EINVAL;
 	}
 	if (error == 0 && noneg && offset < 0)
 		error = EINVAL;
 	if (error != 0) {
 		fdrop(fp, td);
 		return (error);
 	}
 	fp->f_offset = offset;
 	*(off_t *)(td->td_retval) = fp->f_offset;
 	fdrop(fp, td);
 	return (0);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 /*
  * Reposition read/write file offset.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct olseek_args {
 	int	fd;
 	long	offset;
 	int	whence;
 };
 #endif
 int
 olseek(td, uap)
 	struct thread *td;
 	register struct olseek_args /* {
 		int fd;
 		long offset;
 		int whence;
 	} */ *uap;
 {
 	struct lseek_args /* {
 		int fd;
 		int pad;
 		off_t offset;
 		int whence;
 	} */ nuap;
 	int error;
 
 	nuap.fd = uap->fd;
 	nuap.offset = uap->offset;
 	nuap.whence = uap->whence;
 	error = lseek(td, &nuap);
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 /*
  * Check access permissions using passed credentials.
  */
 static int
 vn_access(vp, user_flags, cred, td)
 	struct vnode	*vp;
 	int		user_flags;
 	struct ucred	*cred;
 	struct thread	*td;
 {
 	int error, flags;
 
 	/* Flags == 0 means only check for existence. */
 	error = 0;
 	if (user_flags) {
 		flags = 0;
 		if (user_flags & R_OK)
 			flags |= VREAD;
 		if (user_flags & W_OK)
 			flags |= VWRITE;
 		if (user_flags & X_OK)
 			flags |= VEXEC;
 #ifdef MAC
 		error = mac_check_vnode_access(cred, vp, flags);
 		if (error)
 			return (error);
 #endif
 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 			error = VOP_ACCESS(vp, flags, cred, td);
 	}
 	return (error);
 }
 
 /*
  * Check access permissions using "real" credentials.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct access_args {
 	char	*path;
 	int	flags;
 };
 #endif
 int
 access(td, uap)
 	struct thread *td;
 	register struct access_args /* {
 		char *path;
 		int flags;
 	} */ *uap;
 {
 
 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
 }
 
 int
 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
 {
 	struct ucred *cred, *tmpcred;
 	register struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
 	/*
 	 * Create and modify a temporary credential instead of one that
 	 * is potentially shared.  This could also mess up socket
 	 * buffer accounting which can run in an interrupt context.
 	 *
 	 * XXX - Depending on how "threads" are finally implemented, it
 	 * may be better to explicitly pass the credential to namei()
 	 * rather than to modify the potentially shared process structure.
 	 */
 	cred = td->td_ucred;
 	tmpcred = crdup(cred);
 	tmpcred->cr_uid = cred->cr_ruid;
 	tmpcred->cr_groups[0] = cred->cr_rgid;
 	td->td_ucred = tmpcred;
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		goto out1;
 	vp = nd.ni_vp;
 
 	error = vn_access(vp, flags, tmpcred, td);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(vp);
 out1:
 	td->td_ucred = cred;
 	crfree(tmpcred);
 	return (error);
 }
 
 /*
  * Check access permissions using "effective" credentials.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct eaccess_args {
 	char	*path;
 	int	flags;
 };
 #endif
 int
 eaccess(td, uap)
 	struct thread *td;
 	register struct eaccess_args /* {
 		char *path;
 		int flags;
 	} */ *uap;
 {
 	struct nameidata nd;
 	struct vnode *vp;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 
 	error = vn_access(vp, uap->flags, td->td_ucred, td);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(vp);
 	return (error);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 /*
  * Get file status; this version follows links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ostat_args {
 	char	*path;
 	struct ostat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 ostat(td, uap)
 	struct thread *td;
 	register struct ostat_args /* {
 		char *path;
 		struct ostat *ub;
 	} */ *uap;
 {
 	struct stat sb;
 	struct ostat osb;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 	vput(nd.ni_vp);
 	if (error)
 		return (error);
 	cvtstat(&sb, &osb);
 	error = copyout(&osb, uap->ub, sizeof (osb));
 	return (error);
 }
 
 /*
  * Get file status; this version does not follow links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct olstat_args {
 	char	*path;
 	struct ostat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 olstat(td, uap)
 	struct thread *td;
 	register struct olstat_args /* {
 		char *path;
 		struct ostat *ub;
 	} */ *uap;
 {
 	struct vnode *vp;
 	struct stat sb;
 	struct ostat osb;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(vp);
 	if (error)
 		return (error);
 	cvtstat(&sb, &osb);
 	error = copyout(&osb, uap->ub, sizeof (osb));
 	return (error);
 }
 
 /*
  * Convert from an old to a new stat structure.
  */
 void
 cvtstat(st, ost)
 	struct stat *st;
 	struct ostat *ost;
 {
 
 	ost->st_dev = st->st_dev;
 	ost->st_ino = st->st_ino;
 	ost->st_mode = st->st_mode;
 	ost->st_nlink = st->st_nlink;
 	ost->st_uid = st->st_uid;
 	ost->st_gid = st->st_gid;
 	ost->st_rdev = st->st_rdev;
 	if (st->st_size < (quad_t)1 << 32)
 		ost->st_size = st->st_size;
 	else
 		ost->st_size = -2;
 	ost->st_atime = st->st_atime;
 	ost->st_mtime = st->st_mtime;
 	ost->st_ctime = st->st_ctime;
 	ost->st_blksize = st->st_blksize;
 	ost->st_blocks = st->st_blocks;
 	ost->st_flags = st->st_flags;
 	ost->st_gen = st->st_gen;
 }
 #endif /* COMPAT_43 || COMPAT_SUNOS */
 
 /*
  * Get file status; this version follows links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct stat_args {
 	char	*path;
 	struct stat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 stat(td, uap)
 	struct thread *td;
 	register struct stat_args /* {
 		char *path;
 		struct stat *ub;
 	} */ *uap;
 {
 	struct stat sb;
 	int error;
 	struct nameidata nd;
 
 #ifdef LOOKUP_SHARED
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
 	    UIO_USERSPACE, uap->path, td);
 #else
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    uap->path, td);
 #endif
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_vp);
 	if (error)
 		return (error);
 	error = copyout(&sb, uap->ub, sizeof (sb));
 	return (error);
 }
 
 /*
  * Get file status; this version does not follow links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lstat_args {
 	char	*path;
 	struct stat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 lstat(td, uap)
 	struct thread *td;
 	register struct lstat_args /* {
 		char *path;
 		struct stat *ub;
 	} */ *uap;
 {
 	int error;
 	struct vnode *vp;
 	struct stat sb;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(vp);
 	if (error)
 		return (error);
 	error = copyout(&sb, uap->ub, sizeof (sb));
 	return (error);
 }
 
 /*
  * Implementation of the NetBSD stat() function.
  * XXX This should probably be collapsed with the FreeBSD version,
  * as the differences are only due to vn_stat() clearing spares at
  * the end of the structures.  vn_stat could be split to avoid this,
  * and thus collapse the following to close to zero code.
  */
 void
 cvtnstat(sb, nsb)
 	struct stat *sb;
 	struct nstat *nsb;
 {
 	bzero(nsb, sizeof *nsb);
 	nsb->st_dev = sb->st_dev;
 	nsb->st_ino = sb->st_ino;
 	nsb->st_mode = sb->st_mode;
 	nsb->st_nlink = sb->st_nlink;
 	nsb->st_uid = sb->st_uid;
 	nsb->st_gid = sb->st_gid;
 	nsb->st_rdev = sb->st_rdev;
 	nsb->st_atimespec = sb->st_atimespec;
 	nsb->st_mtimespec = sb->st_mtimespec;
 	nsb->st_ctimespec = sb->st_ctimespec;
 	nsb->st_size = sb->st_size;
 	nsb->st_blocks = sb->st_blocks;
 	nsb->st_blksize = sb->st_blksize;
 	nsb->st_flags = sb->st_flags;
 	nsb->st_gen = sb->st_gen;
 	nsb->st_birthtimespec = sb->st_birthtimespec;
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct nstat_args {
 	char	*path;
 	struct nstat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 nstat(td, uap)
 	struct thread *td;
 	register struct nstat_args /* {
 		char *path;
 		struct nstat *ub;
 	} */ *uap;
 {
 	struct stat sb;
 	struct nstat nsb;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 	vput(nd.ni_vp);
 	if (error)
 		return (error);
 	cvtnstat(&sb, &nsb);
 	error = copyout(&nsb, uap->ub, sizeof (nsb));
 	return (error);
 }
 
 /*
  * NetBSD lstat.  Get file status; this version does not follow links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lstat_args {
 	char	*path;
 	struct stat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 nlstat(td, uap)
 	struct thread *td;
 	register struct nlstat_args /* {
 		char *path;
 		struct nstat *ub;
 	} */ *uap;
 {
 	int error;
 	struct vnode *vp;
 	struct stat sb;
 	struct nstat nsb;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
 	vput(vp);
 	if (error)
 		return (error);
 	cvtnstat(&sb, &nsb);
 	error = copyout(&nsb, uap->ub, sizeof (nsb));
 	return (error);
 }
 
 /*
  * Get configurable pathname variables.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct pathconf_args {
 	char	*path;
 	int	name;
 };
 #endif
 /* ARGSUSED */
 int
 pathconf(td, uap)
 	struct thread *td;
 	register struct pathconf_args /* {
 		char *path;
 		int name;
 	} */ *uap;
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	/* If asynchronous I/O is available, it works for all files. */
 	if (uap->name == _PC_ASYNC_IO)
 		td->td_retval[0] = async_io_version;
 	else
 		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
 	vput(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Return target name of a symbolic link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct readlink_args {
 	char	*path;
 	char	*buf;
 	int	count;
 };
 #endif
 /* ARGSUSED */
 int
 readlink(td, uap)
 	struct thread *td;
 	register struct readlink_args /* {
 		char *path;
 		char *buf;
 		int count;
 	} */ *uap;
 {
 
 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
 	    UIO_USERSPACE, uap->count));
 }
 
 int
 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
     enum uio_seg bufseg, int count)
 {
 	register struct vnode *vp;
 	struct iovec aiov;
 	struct uio auio;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 #ifdef MAC
 	error = mac_check_vnode_readlink(td->td_ucred, vp);
 	if (error) {
 		vput(vp);
 		return (error);
 	}
 #endif
 	if (vp->v_type != VLNK)
 		error = EINVAL;
 	else {
 		aiov.iov_base = buf;
 		aiov.iov_len = count;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = 0;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = bufseg;
 		auio.uio_td = td;
 		auio.uio_resid = count;
 		error = VOP_READLINK(vp, &auio, td->td_ucred);
 	}
 	vput(vp);
 	td->td_retval[0] = count - auio.uio_resid;
 	return (error);
 }
 
 /*
  * Common implementation code for chflags() and fchflags().
  */
 static int
 setfflags(td, vp, flags)
 	struct thread *td;
 	struct vnode *vp;
 	int flags;
 {
 	int error;
 	struct mount *mp;
 	struct vattr vattr;
 
 	/*
 	 * Prevent non-root users from setting flags on devices.  When
 	 * a device is reused, users can retain ownership of the device
 	 * if they are allowed to set flags and programs assume that
 	 * chown can't fail when done as root.
 	 */
 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
 		error = suser_cred(td->td_ucred, PRISON_ROOT);
 		if (error)
 			return (error);
 	}
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	VATTR_NULL(&vattr);
 	vattr.va_flags = flags;
 #ifdef MAC
 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
 	if (error == 0)
 #endif
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Change flags of a file given a path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chflags_args {
 	char	*path;
 	int	flags;
 };
 #endif
 /* ARGSUSED */
 int
 chflags(td, uap)
 	struct thread *td;
 	register struct chflags_args /* {
 		char *path;
 		int flags;
 	} */ *uap;
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfflags(td, nd.ni_vp, uap->flags);
 	vrele(nd.ni_vp);
 	return error;
 }
 
 /*
  * Same as chflags() but doesn't follow symlinks.
  */
 int
 lchflags(td, uap)
 	struct thread *td;
 	register struct lchflags_args /* {
 		char *path;
 		int flags;
 	} */ *uap;
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfflags(td, nd.ni_vp, uap->flags);
 	vrele(nd.ni_vp);
 	return error;
 }
 
 /*
  * Change flags of a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchflags_args {
 	int	fd;
 	int	flags;
 };
 #endif
 /* ARGSUSED */
 int
 fchflags(td, uap)
 	struct thread *td;
 	register struct fchflags_args /* {
 		int fd;
 		int flags;
 	} */ *uap;
 {
 	struct file *fp;
 	int error;
 
 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
 	error = setfflags(td, fp->f_vnode, uap->flags);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common implementation code for chmod(), lchmod() and fchmod().
  */
 static int
 setfmode(td, vp, mode)
 	struct thread *td;
 	struct vnode *vp;
 	int mode;
 {
 	int error;
 	struct mount *mp;
 	struct vattr vattr;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	VATTR_NULL(&vattr);
 	vattr.va_mode = mode & ALLPERMS;
 #ifdef MAC
 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
 	if (error == 0)
 #endif
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
 	return error;
 }
 
 /*
  * Change mode of a file given path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chmod_args {
 	char	*path;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 chmod(td, uap)
 	struct thread *td;
 	register struct chmod_args /* {
 		char *path;
 		int mode;
 	} */ *uap;
 {
 
 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
 }
 
 int
 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfmode(td, nd.ni_vp, mode);
 	vrele(nd.ni_vp);
 	return error;
 }
 
 /*
  * Change mode of a file given path name (don't follow links.)
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lchmod_args {
 	char	*path;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 lchmod(td, uap)
 	struct thread *td;
 	register struct lchmod_args /* {
 		char *path;
 		int mode;
 	} */ *uap;
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfmode(td, nd.ni_vp, uap->mode);
 	vrele(nd.ni_vp);
 	return error;
 }
 
 /*
  * Change mode of a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchmod_args {
 	int	fd;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 fchmod(td, uap)
 	struct thread *td;
 	register struct fchmod_args /* {
 		int fd;
 		int mode;
 	} */ *uap;
 {
 	struct file *fp;
 	int error;
 
 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
 	error = setfmode(td, fp->f_vnode, uap->mode);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common implementation for chown(), lchown(), and fchown()
  */
 static int
 setfown(td, vp, uid, gid)
 	struct thread *td;
 	struct vnode *vp;
 	uid_t uid;
 	gid_t gid;
 {
 	int error;
 	struct mount *mp;
 	struct vattr vattr;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	VATTR_NULL(&vattr);
 	vattr.va_uid = uid;
 	vattr.va_gid = gid;
 #ifdef MAC
 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
 	    vattr.va_gid);
 	if (error == 0)
 #endif
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
 	return error;
 }
 
 /*
  * Set ownership given a path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chown_args {
 	char	*path;
 	int	uid;
 	int	gid;
 };
 #endif
 /* ARGSUSED */
 int
 chown(td, uap)
 	struct thread *td;
 	register struct chown_args /* {
 		char *path;
 		int uid;
 		int gid;
 	} */ *uap;
 {
 
 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 }
 
 int
 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
     int gid)
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfown(td, nd.ni_vp, uid, gid);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set ownership given a path name, do not cross symlinks.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lchown_args {
 	char	*path;
 	int	uid;
 	int	gid;
 };
 #endif
 /* ARGSUSED */
 int
 lchown(td, uap)
 	struct thread *td;
 	register struct lchown_args /* {
 		char *path;
 		int uid;
 		int gid;
 	} */ *uap;
 {
 
 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 }
 
 int
 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
     int gid)
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfown(td, nd.ni_vp, uid, gid);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set ownership given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchown_args {
 	int	fd;
 	int	uid;
 	int	gid;
 };
 #endif
 /* ARGSUSED */
 int
 fchown(td, uap)
 	struct thread *td;
 	register struct fchown_args /* {
 		int fd;
 		int uid;
 		int gid;
 	} */ *uap;
 {
 	struct file *fp;
 	int error;
 
 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common implementation code for utimes(), lutimes(), and futimes().
  */
 static int
 getutimes(usrtvp, tvpseg, tsp)
 	const struct timeval *usrtvp;
 	enum uio_seg tvpseg;
 	struct timespec *tsp;
 {
 	struct timeval tv[2];
 	const struct timeval *tvp;
 	int error;
 
 	if (usrtvp == NULL) {
 		microtime(&tv[0]);
 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
 		tsp[1] = tsp[0];
 	} else {
 		if (tvpseg == UIO_SYSSPACE) {
 			tvp = usrtvp;
 		} else {
 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 				return (error);
 			tvp = tv;
 		}
 
 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 	}
 	return 0;
 }
 
 /*
  * Common implementation code for utimes(), lutimes(), and futimes().
  */
 static int
 setutimes(td, vp, ts, numtimes, nullflag)
 	struct thread *td;
 	struct vnode *vp;
 	const struct timespec *ts;
 	int numtimes;
 	int nullflag;
 {
 	int error, setbirthtime;
 	struct mount *mp;
 	struct vattr vattr;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	setbirthtime = 0;
 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 		setbirthtime = 1;
 	VATTR_NULL(&vattr);
 	vattr.va_atime = ts[0];
 	vattr.va_mtime = ts[1];
 	if (setbirthtime)
 		vattr.va_birthtime = ts[1];
 	if (numtimes > 2)
 		vattr.va_birthtime = ts[2];
 	if (nullflag)
 		vattr.va_vaflags |= VA_UTIMES_NULL;
 #ifdef MAC
 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
 	    vattr.va_mtime);
 #endif
 	if (error == 0)
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
 	return error;
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct utimes_args {
 	char	*path;
 	struct	timeval *tptr;
 };
 #endif
 /* ARGSUSED */
 int
 utimes(td, uap)
 	struct thread *td;
 	register struct utimes_args /* {
 		char *path;
 		struct timeval *tptr;
 	} */ *uap;
 {
 
 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 	    UIO_USERSPACE));
 }
 
 int
 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
     struct timeval *tptr, enum uio_seg tptrseg)
 {
 	struct timespec ts[2];
 	int error;
 	struct nameidata nd;
 
 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 		return (error);
 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lutimes_args {
 	char	*path;
 	struct	timeval *tptr;
 };
 #endif
 /* ARGSUSED */
 int
 lutimes(td, uap)
 	struct thread *td;
 	register struct lutimes_args /* {
 		char *path;
 		struct timeval *tptr;
 	} */ *uap;
 {
 
 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 	    UIO_USERSPACE));
 }
 
 int
 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
     struct timeval *tptr, enum uio_seg tptrseg)
 {
 	struct timespec ts[2];
 	int error;
 	struct nameidata nd;
 
 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 		return (error);
 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct futimes_args {
 	int	fd;
 	struct	timeval *tptr;
 };
 #endif
 /* ARGSUSED */
 int
 futimes(td, uap)
 	struct thread *td;
 	register struct futimes_args /* {
 		int  fd;
 		struct timeval *tptr;
 	} */ *uap;
 {
 
 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 }
 
 int
 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
     enum uio_seg tptrseg)
 {
 	struct timespec ts[2];
 	struct file *fp;
 	int error;
 
 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 		return (error);
 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
 		return (error);
 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Truncate a file given its path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct truncate_args {
 	char	*path;
 	int	pad;
 	off_t	length;
 };
 #endif
 /* ARGSUSED */
 int
 truncate(td, uap)
 	struct thread *td;
 	register struct truncate_args /* {
 		char *path;
 		int pad;
 		off_t length;
 	} */ *uap;
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 }
 
 int
 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	if (length < 0)
 		return(EINVAL);
 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 		vrele(vp);
 		return (error);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	if (vp->v_type == VDIR)
 		error = EISDIR;
 #ifdef MAC
 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
 	}
 #endif
 	else if ((error = vn_writechk(vp)) == 0 &&
 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 		VATTR_NULL(&vattr);
 		vattr.va_size = length;
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
 	}
 	vput(vp);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Truncate a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ftruncate_args {
 	int	fd;
 	int	pad;
 	off_t	length;
 };
 #endif
 /* ARGSUSED */
 int
 ftruncate(td, uap)
 	struct thread *td;
 	register struct ftruncate_args /* {
 		int fd;
 		int pad;
 		off_t length;
 	} */ *uap;
 {
 	struct mount *mp;
 	struct vattr vattr;
 	struct vnode *vp;
 	struct file *fp;
 	int error;
 
 	if (uap->length < 0)
 		return(EINVAL);
 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
 	if ((fp->f_flag & FWRITE) == 0) {
 		fdrop(fp, td);
 		return (EINVAL);
 	}
 	vp = fp->f_vnode;
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 		fdrop(fp, td);
 		return (error);
 	}
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	if (vp->v_type == VDIR)
 		error = EISDIR;
 #ifdef MAC
 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
 	    vp))) {
 	}
 #endif
 	else if ((error = vn_writechk(vp)) == 0) {
 		VATTR_NULL(&vattr);
 		vattr.va_size = uap->length;
 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
 	}
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
 	fdrop(fp, td);
 	return (error);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 /*
  * Truncate a file given its path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct otruncate_args {
 	char	*path;
 	long	length;
 };
 #endif
 /* ARGSUSED */
 int
 otruncate(td, uap)
 	struct thread *td;
 	register struct otruncate_args /* {
 		char *path;
 		long length;
 	} */ *uap;
 {
 	struct truncate_args /* {
 		char *path;
 		int pad;
 		off_t length;
 	} */ nuap;
 
 	nuap.path = uap->path;
 	nuap.length = uap->length;
 	return (truncate(td, &nuap));
 }
 
 /*
  * Truncate a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct oftruncate_args {
 	int	fd;
 	long	length;
 };
 #endif
 /* ARGSUSED */
 int
 oftruncate(td, uap)
 	struct thread *td;
 	register struct oftruncate_args /* {
 		int fd;
 		long length;
 	} */ *uap;
 {
 	struct ftruncate_args /* {
 		int fd;
 		int pad;
 		off_t length;
 	} */ nuap;
 
 	nuap.fd = uap->fd;
 	nuap.length = uap->length;
 	return (ftruncate(td, &nuap));
 }
 #endif /* COMPAT_43 || COMPAT_SUNOS */
 
 /*
  * Sync an open file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fsync_args {
 	int	fd;
 };
 #endif
 /* ARGSUSED */
 int
 fsync(td, uap)
 	struct thread *td;
 	struct fsync_args /* {
 		int fd;
 	} */ *uap;
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct file *fp;
 	vm_object_t obj;
 	int error;
 
 	GIANT_REQUIRED;
 
 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
 	vp = fp->f_vnode;
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 		fdrop(fp, td);
 		return (error);
 	}
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
 		VM_OBJECT_LOCK(obj);
 		vm_object_page_clean(obj, 0, 0, 0);
 		VM_OBJECT_UNLOCK(obj);
 	}
 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
 	    && softdep_fsync_hook != NULL)
 		error = (*softdep_fsync_hook)(vp);
 
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Rename files.  Source and destination must either both be directories,
  * or both not be directories.  If target is a directory, it must be empty.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rename_args {
 	char	*from;
 	char	*to;
 };
 #endif
 /* ARGSUSED */
 int
 rename(td, uap)
 	struct thread *td;
 	register struct rename_args /* {
 		char *from;
 		char *to;
 	} */ *uap;
 {
 
 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
 }
 
 int
 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
 {
 	struct mount *mp = NULL;
 	struct vnode *tvp, *fvp, *tdvp;
 	struct nameidata fromnd, tond;
 	int error;
 
 	bwillwrite();
 #ifdef MAC
 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
 	    from, td);
 #else
 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
 #endif
 	if ((error = namei(&fromnd)) != 0)
 		return (error);
 #ifdef MAC
 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
 	    fromnd.ni_vp, &fromnd.ni_cnd);
 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
 #endif
 	fvp = fromnd.ni_vp;
 	if (error == 0)
 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
 	if (error != 0) {
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 		goto out1;
 	}
 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
 	    NOOBJ, pathseg, to, td);
 	if (fromnd.ni_vp->v_type == VDIR)
 		tond.ni_cnd.cn_flags |= WILLBEDIR;
 	if ((error = namei(&tond)) != 0) {
 		/* Translate error code for rename("dir1", "dir2/."). */
 		if (error == EISDIR && fvp->v_type == VDIR)
 			error = EINVAL;
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 		goto out1;
 	}
 	tdvp = tond.ni_dvp;
 	tvp = tond.ni_vp;
 	if (tvp != NULL) {
 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 			error = ENOTDIR;
 			goto out;
 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 			error = EISDIR;
 			goto out;
 		}
 	}
 	if (fvp == tdvp)
 		error = EINVAL;
 	/*
 	 * If the source is the same as the destination (that is, if they
 	 * are links to the same vnode), then there is nothing to do.
 	 */
 	if (fvp == tvp)
 		error = -1;
 #ifdef MAC
 	else
 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 #endif
 out:
 	if (!error) {
 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
 		if (fromnd.ni_dvp != tdvp) {
 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 		}
 		if (tvp) {
 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
 		}
 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		NDFREE(&tond, NDF_ONLY_PNBUF);
 	} else {
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		NDFREE(&tond, NDF_ONLY_PNBUF);
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		if (tvp)
 			vput(tvp);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 	}
 	vrele(tond.ni_startdir);
 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
 out1:
 	vn_finished_write(mp);
 	if (fromnd.ni_startdir)
 		vrele(fromnd.ni_startdir);
 	if (error == -1)
 		return (0);
 	return (error);
 }
 
 /*
  * Make a directory file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mkdir_args {
 	char	*path;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 mkdir(td, uap)
 	struct thread *td;
 	register struct mkdir_args /* {
 		char *path;
 		int mode;
 	} */ *uap;
 {
 
 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
 }
 
 int
 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
 	nd.ni_cnd.cn_flags |= WILLBEDIR;
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	if (vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vrele(vp);
 		/*
 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
 		 * the strange behaviour of leaving the vnode unlocked
 		 * if the target is the same vnode as the parent.
 		 */
 		if (vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		return (EEXIST);
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VDIR;
 	FILEDESC_LOCK(td->td_proc->p_fd);
 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 	FILEDESC_UNLOCK(td->td_proc->p_fd);
 #ifdef MAC
 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 	if (error)
 		goto out;
 #endif
 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 #ifdef MAC
 out:
 #endif
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	if (!error)
 		vput(nd.ni_vp);
 	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
 	return (error);
 }
 
 /*
  * Remove a directory file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rmdir_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 rmdir(td, uap)
 	struct thread *td;
 	struct rmdir_args /* {
 		char *path;
 	} */ *uap;
 {
 
 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
 }
 
 int
 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
 restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	if (vp->v_type != VDIR) {
 		error = ENOTDIR;
 		goto out;
 	}
 	/*
 	 * No rmdir "." please.
 	 */
 	if (nd.ni_dvp == vp) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * The root of a mounted filesystem cannot be deleted.
 	 */
 	if (vp->v_vflag & VV_ROOT) {
 		error = EBUSY;
 		goto out;
 	}
 #ifdef MAC
 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
 	    &nd.ni_cnd);
 	if (error)
 		goto out;
 #endif
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vput(vp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 	vn_finished_write(mp);
 out:
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (nd.ni_dvp == vp)
 		vrele(nd.ni_dvp);
 	else
 		vput(nd.ni_dvp);
 	vput(vp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
 	return (error);
 }
 
 #ifdef COMPAT_43
 /*
  * Read a block of directory entries in a filesystem independent format.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ogetdirentries_args {
 	int	fd;
 	char	*buf;
 	u_int	count;
 	long	*basep;
 };
 #endif
 int
 ogetdirentries(td, uap)
 	struct thread *td;
 	register struct ogetdirentries_args /* {
 		int fd;
 		char *buf;
 		u_int count;
 		long *basep;
 	} */ *uap;
 {
 	struct vnode *vp;
 	struct file *fp;
 	struct uio auio, kuio;
 	struct iovec aiov, kiov;
 	struct dirent *dp, *edp;
 	caddr_t dirbuf;
 	int error, eofflag, readcnt;
 	long loff;
 
 	/* XXX arbitrary sanity limit on `count'. */
 	if (uap->count > 64 * 1024)
 		return (EINVAL);
 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
 	if ((fp->f_flag & FREAD) == 0) {
 		fdrop(fp, td);
 		return (EBADF);
 	}
 	vp = fp->f_vnode;
 unionread:
 	if (vp->v_type != VDIR) {
 		fdrop(fp, td);
 		return (EINVAL);
 	}
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->count;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_td = td;
 	auio.uio_resid = uap->count;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	loff = auio.uio_offset = fp->f_offset;
 #ifdef MAC
 	error = mac_check_vnode_readdir(td->td_ucred, vp);
 	if (error) {
 		VOP_UNLOCK(vp, 0, td);
 		fdrop(fp, td);
 		return (error);
 	}
 #endif
 #	if (BYTE_ORDER != LITTLE_ENDIAN)
 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 			    NULL, NULL);
 			fp->f_offset = auio.uio_offset;
 		} else
 #	endif
 	{
 		kuio = auio;
 		kuio.uio_iov = &kiov;
 		kuio.uio_segflg = UIO_SYSSPACE;
 		kiov.iov_len = uap->count;
 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
 		kiov.iov_base = dirbuf;
 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 			    NULL, NULL);
 		fp->f_offset = kuio.uio_offset;
 		if (error == 0) {
 			readcnt = uap->count - kuio.uio_resid;
 			edp = (struct dirent *)&dirbuf[readcnt];
 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 #				if (BYTE_ORDER == LITTLE_ENDIAN)
 					/*
 					 * The expected low byte of
 					 * dp->d_namlen is our dp->d_type.
 					 * The high MBZ byte of dp->d_namlen
 					 * is our dp->d_namlen.
 					 */
 					dp->d_type = dp->d_namlen;
 					dp->d_namlen = 0;
 #				else
 					/*
 					 * The dp->d_type is the high byte
 					 * of the expected dp->d_namlen,
 					 * so must be zero'ed.
 					 */
 					dp->d_type = 0;
 #				endif
 				if (dp->d_reclen > 0) {
 					dp = (struct dirent *)
 					    ((char *)dp + dp->d_reclen);
 				} else {
 					error = EIO;
 					break;
 				}
 			}
 			if (dp >= edp)
 				error = uiomove(dirbuf, readcnt, &auio);
 		}
 		FREE(dirbuf, M_TEMP);
 	}
 	VOP_UNLOCK(vp, 0, td);
 	if (error) {
 		fdrop(fp, td);
 		return (error);
 	}
 	if (uap->count == auio.uio_resid) {
 		if (union_dircheckp) {
 			error = union_dircheckp(td, &vp, fp);
 			if (error == -1)
 				goto unionread;
 			if (error) {
 				fdrop(fp, td);
 				return (error);
 			}
 		}
 		/*
 		 * XXX We could delay dropping the lock above but
 		 * union_dircheckp complicates things.
 		 */
 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
 		if ((vp->v_vflag & VV_ROOT) &&
 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
 			struct vnode *tvp = vp;
 			vp = vp->v_mount->mnt_vnodecovered;
 			VREF(vp);
 			fp->f_vnode = vp;
 			fp->f_data = vp;
 			fp->f_offset = 0;
 			vput(tvp);
 			goto unionread;
 		}
 		VOP_UNLOCK(vp, 0, td);
 	}
 	error = copyout(&loff, uap->basep, sizeof(long));
 	fdrop(fp, td);
 	td->td_retval[0] = uap->count - auio.uio_resid;
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 /*
  * Read a block of directory entries in a filesystem independent format.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getdirentries_args {
 	int	fd;
 	char	*buf;
 	u_int	count;
 	long	*basep;
 };
 #endif
 int
 getdirentries(td, uap)
 	struct thread *td;
 	register struct getdirentries_args /* {
 		int fd;
 		char *buf;
 		u_int count;
 		long *basep;
 	} */ *uap;
 {
 	struct vnode *vp;
 	struct file *fp;
 	struct uio auio;
 	struct iovec aiov;
 	long loff;
 	int error, eofflag;
 
 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
 	if ((fp->f_flag & FREAD) == 0) {
 		fdrop(fp, td);
 		return (EBADF);
 	}
 	vp = fp->f_vnode;
 unionread:
 	if (vp->v_type != VDIR) {
 		fdrop(fp, td);
 		return (EINVAL);
 	}
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->count;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_td = td;
 	auio.uio_resid = uap->count;
 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	loff = auio.uio_offset = fp->f_offset;
 #ifdef MAC
 	error = mac_check_vnode_readdir(td->td_ucred, vp);
 	if (error == 0)
 #endif
 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 		    NULL);
 	fp->f_offset = auio.uio_offset;
 	VOP_UNLOCK(vp, 0, td);
 	if (error) {
 		fdrop(fp, td);
 		return (error);
 	}
 	if (uap->count == auio.uio_resid) {
 		if (union_dircheckp) {
 			error = union_dircheckp(td, &vp, fp);
 			if (error == -1)
 				goto unionread;
 			if (error) {
 				fdrop(fp, td);
 				return (error);
 			}
 		}
 		/*
 		 * XXX We could delay dropping the lock above but
 		 * union_dircheckp complicates things.
 		 */
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 		if ((vp->v_vflag & VV_ROOT) &&
 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
 			struct vnode *tvp = vp;
 			vp = vp->v_mount->mnt_vnodecovered;
 			VREF(vp);
 			fp->f_vnode = vp;
 			fp->f_data = vp;
 			fp->f_offset = 0;
 			vput(tvp);
 			goto unionread;
 		}
 		VOP_UNLOCK(vp, 0, td);
 	}
 	if (uap->basep != NULL) {
 		error = copyout(&loff, uap->basep, sizeof(long));
 	}
 	td->td_retval[0] = uap->count - auio.uio_resid;
 	fdrop(fp, td);
 	return (error);
 }
 #ifndef _SYS_SYSPROTO_H_
 struct getdents_args {
 	int fd;
 	char *buf;
 	size_t count;
 };
 #endif
 int
 getdents(td, uap)
 	struct thread *td;
 	register struct getdents_args /* {
 		int fd;
 		char *buf;
 		u_int count;
 	} */ *uap;
 {
 	struct getdirentries_args ap;
 	ap.fd = uap->fd;
 	ap.buf = uap->buf;
 	ap.count = uap->count;
 	ap.basep = NULL;
 	return getdirentries(td, &ap);
 }
 
 /*
  * Set the mode mask for creation of filesystem nodes.
  *
  * MP SAFE
  */
 #ifndef _SYS_SYSPROTO_H_
 struct umask_args {
 	int	newmask;
 };
 #endif
 int
 umask(td, uap)
 	struct thread *td;
 	struct umask_args /* {
 		int newmask;
 	} */ *uap;
 {
 	register struct filedesc *fdp;
 
 	FILEDESC_LOCK(td->td_proc->p_fd);
 	fdp = td->td_proc->p_fd;
 	td->td_retval[0] = fdp->fd_cmask;
 	fdp->fd_cmask = uap->newmask & ALLPERMS;
 	FILEDESC_UNLOCK(td->td_proc->p_fd);
 	return (0);
 }
 
 /*
  * Void all references to file by ripping underlying filesystem
  * away from vnode.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct revoke_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 revoke(td, uap)
 	struct thread *td;
 	register struct revoke_args /* {
 		char *path;
 	} */ *uap;
 {
 	struct mount *mp;
 	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (vp->v_type != VCHR) {
 		vput(vp);
 		return (EINVAL);
 	}
 #ifdef MAC
 	error = mac_check_vnode_revoke(td->td_ucred, vp);
 	if (error) {
 		vput(vp);
 		return (error);
 	}
 #endif
 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
 	if (error) {
 		vput(vp);
 		return (error);
 	}
 	VOP_UNLOCK(vp, 0, td);
 	if (td->td_ucred->cr_uid != vattr.va_uid) {
 		error = suser_cred(td->td_ucred, PRISON_ROOT);
 		if (error)
 			goto out;
 	}
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		goto out;
 	if (vcount(vp) > 1)
 		VOP_REVOKE(vp, REVOKEALL);
 	vn_finished_write(mp);
 out:
 	vrele(vp);
 	return (error);
 }
 
 /*
  * Convert a user file descriptor to a kernel file entry.
  * The file entry is locked upon returning.
  */
 int
 getvnode(fdp, fd, fpp)
 	struct filedesc *fdp;
 	int fd;
 	struct file **fpp;
 {
 	int error;
 	struct file *fp;
 
 	fp = NULL;
 	if (fdp == NULL)
 		error = EBADF;
 	else {
 		FILEDESC_LOCK(fdp);
 		if ((u_int)fd >= fdp->fd_nfiles ||
 		    (fp = fdp->fd_ofiles[fd]) == NULL)
 			error = EBADF;
 		else if (fp->f_vnode == NULL) {
 			fp = NULL;
 			error = EINVAL;
 		} else {
 			fhold(fp);
 			error = 0;
 		}
 		FILEDESC_UNLOCK(fdp);
 	}
 	*fpp = fp;
 	return (error);
 }
 
 /*
  * Get (NFS) file handle
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getfh_args {
 	char	*fname;
 	fhandle_t *fhp;
 };
 #endif
 int
 getfh(td, uap)
 	struct thread *td;
 	register struct getfh_args *uap;
 {
 	struct nameidata nd;
 	fhandle_t fh;
 	register struct vnode *vp;
 	int error;
 
 	/*
 	 * Must be super user
 	 */
 	error = suser(td);
 	if (error)
 		return (error);
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
 	error = namei(&nd);
 	if (error)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 	bzero(&fh, sizeof(fh));
 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 	error = VFS_VPTOFH(vp, &fh.fh_fid);
 	vput(vp);
 	if (error)
 		return (error);
 	error = copyout(&fh, uap->fhp, sizeof (fh));
 	return (error);
 }
 
 /*
  * syscall for the rpc.lockd to use to translate a NFS file handle into
  * an open descriptor.
  *
  * warning: do not remove the suser() call or this becomes one giant
  * security hole.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fhopen_args {
 	const struct fhandle *u_fhp;
 	int flags;
 };
 #endif
 int
 fhopen(td, uap)
 	struct thread *td;
 	struct fhopen_args /* {
 		const struct fhandle *u_fhp;
 		int flags;
 	} */ *uap;
 {
 	struct proc *p = td->td_proc;
 	struct mount *mp;
 	struct vnode *vp;
 	struct fhandle fhp;
 	struct vattr vat;
 	struct vattr *vap = &vat;
 	struct flock lf;
 	struct file *fp;
 	register struct filedesc *fdp = p->p_fd;
 	int fmode, mode, error, type;
 	struct file *nfp; 
 	int indx;
 
 	/*
 	 * Must be super user
 	 */
 	error = suser(td);
 	if (error)
 		return (error);
 
 	fmode = FFLAGS(uap->flags);
 	/* why not allow a non-read/write open for our lockd? */
 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 		return (EINVAL);
 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 	if (error)
 		return(error);
 	/* find the mount point */
 	mp = vfs_getvfs(&fhp.fh_fsid);
 	if (mp == NULL)
 		return (ESTALE);
 	/* now give me my vnode, it gets returned to me locked */
 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
 	if (error)
 		return (error);
  	/*
 	 * from now on we have to make sure not
 	 * to forget about the vnode
 	 * any error that causes an abort must vput(vp) 
 	 * just set error = err and 'goto bad;'.
 	 */
 
 	/* 
 	 * from vn_open 
 	 */
 	if (vp->v_type == VLNK) {
 		error = EMLINK;
 		goto bad;
 	}
 	if (vp->v_type == VSOCK) {
 		error = EOPNOTSUPP;
 		goto bad;
 	}
 	mode = 0;
 	if (fmode & (FWRITE | O_TRUNC)) {
 		if (vp->v_type == VDIR) {
 			error = EISDIR;
 			goto bad;
 		}
 		error = vn_writechk(vp);
 		if (error)
 			goto bad;
 		mode |= VWRITE;
 	}
 	if (fmode & FREAD)
 		mode |= VREAD;
 	if (fmode & O_APPEND)
 		mode |= VAPPEND;
 #ifdef MAC
 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
 	if (error)
 		goto bad;
 #endif
 	if (mode) {
 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
 		if (error)
 			goto bad;
 	}
 	if (fmode & O_TRUNC) {
 		VOP_UNLOCK(vp, 0, td);				/* XXX */
 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
 			vrele(vp);
 			return (error);
 		}
 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
 #ifdef MAC
 		/*
 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
 		 * should be right.
 		 */
 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
 		if (error == 0) {
 #endif
 			VATTR_NULL(vap);
 			vap->va_size = 0;
 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
 #ifdef MAC
 		}
 #endif
 		vn_finished_write(mp);
 		if (error)
 			goto bad;
 	}
 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
 	if (error)
 		goto bad;
 	/*
 	 * Make sure that a VM object is created for VMIO support.
 	 */
 	if (vn_canvmio(vp) == TRUE) {
 		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
 			goto bad;
 	}
 	if (fmode & FWRITE)
 		vp->v_writecount++;
 
 	/*
 	 * end of vn_open code 
 	 */
 
 	if ((error = falloc(td, &nfp, &indx)) != 0) {
 		if (fmode & FWRITE)
 			vp->v_writecount--;
 		goto bad;
 	}
 	/* An extra reference on `nfp' has been held for us by falloc(). */
 	fp = nfp;	
 
 	nfp->f_vnode = vp;
 	nfp->f_data = vp;
 	nfp->f_flag = fmode & FMASK;
 	nfp->f_ops = &vnops;
 	nfp->f_type = DTYPE_VNODE;
 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
 		lf.l_whence = SEEK_SET;
 		lf.l_start = 0;
 		lf.l_len = 0;
 		if (fmode & O_EXLOCK)
 			lf.l_type = F_WRLCK;
 		else
 			lf.l_type = F_RDLCK;
 		type = F_FLOCK;
 		if ((fmode & FNONBLOCK) == 0)
 			type |= F_WAIT;
 		VOP_UNLOCK(vp, 0, td);
 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 			    type)) != 0) {
 			/*
 			 * The lock request failed.  Normally close the
 			 * descriptor but handle the case where someone might
 			 * have dup()d or close()d it when we weren't looking.
 			 */
 			FILEDESC_LOCK(fdp);
 			if (fdp->fd_ofiles[indx] == fp) {
 				fdp->fd_ofiles[indx] = NULL;
 				FILEDESC_UNLOCK(fdp);
 				fdrop(fp, td);
 			} else
 				FILEDESC_UNLOCK(fdp);
 			/*
 			 * release our private reference
 			 */
 			fdrop(fp, td);
 			return(error);
 		}
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 		fp->f_flag |= FHASLOCK;
 	}
 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
 		vfs_object_create(vp, td, td->td_ucred);
 
 	VOP_UNLOCK(vp, 0, td);
 	fdrop(fp, td);
 	td->td_retval[0] = indx;
 	return (0);
 
 bad:
 	vput(vp);
 	return (error);
 }
 
 /*
  * Stat an (NFS) file handle.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fhstat_args {
 	struct fhandle *u_fhp;
 	struct stat *sb;
 };
 #endif
 int
 fhstat(td, uap)
 	struct thread *td;
 	register struct fhstat_args /* {
 		struct fhandle *u_fhp;
 		struct stat *sb;
 	} */ *uap;
 {
 	struct stat sb;
 	fhandle_t fh;
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	/*
 	 * Must be super user
 	 */
 	error = suser(td);
 	if (error)
 		return (error);
 	
 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 	if (error)
 		return (error);
 
 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
 		return (ESTALE);
 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
 		return (error);
 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
 	vput(vp);
 	if (error)
 		return (error);
 	error = copyout(&sb, uap->sb, sizeof(sb));
 	return (error);
 }
 
 /*
  * Implement fstatfs() for (NFS) file handles.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fhstatfs_args {
 	struct fhandle *u_fhp;
 	struct statfs *buf;
 };
 #endif
 int
 fhstatfs(td, uap)
 	struct thread *td;
 	struct fhstatfs_args /* {
 		struct fhandle *u_fhp;
 		struct statfs *buf;
 	} */ *uap;
 {
-	struct statfs *sp;
+	struct statfs *sp, sb;
 	struct mount *mp;
 	struct vnode *vp;
-	struct statfs sb;
 	fhandle_t fh;
 	int error;
 
 	/*
 	 * Must be super user
 	 */
 	error = suser(td);
 	if (error)
 		return (error);
 
 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
 		return (error);
 
 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
 		return (ESTALE);
 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
 		return (error);
 	mp = vp->v_mount;
 	sp = &mp->mnt_stat;
 	vput(vp);
 #ifdef MAC
 	error = mac_check_mount_stat(td->td_ucred, mp);
 	if (error)
 		return (error);
 #endif
+	/*
+	 * Set these in case the underlying filesystem fails to do so.
+	 */
+	sp->f_version = STATFS_VERSION;
+	sp->f_namemax = NAME_MAX;
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
 		return (error);
-	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	if (suser(td)) {
 		bcopy(sp, &sb, sizeof(sb));
 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
 		sp = &sb;
 	}
 	return (copyout(sp, uap->buf, sizeof(*sp)));
 }
 
 /*
  * Syscall to push extended attribute configuration information into the
  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
  * a command (int cmd), and attribute name and misc data.  For now, the
  * attribute name is left in userspace for consumption by the VFS_op.
  * It will probably be changed to be copied into sysspace by the
  * syscall in the future, once issues with various consumers of the
  * attribute code have raised their hands.
  *
  * Currently this is used only by UFS Extended Attributes.
  */
 int
 extattrctl(td, uap)
 	struct thread *td;
 	struct extattrctl_args /* {
 		const char *path;
 		int cmd;
 		const char *filename;
 		int attrnamespace;
 		const char *attrname;
 	} */ *uap;
 {
 	struct vnode *filename_vp;
 	struct nameidata nd;
 	struct mount *mp, *mp_writable;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	/*
 	 * uap->attrname is not always defined.  We check again later when we
 	 * invoke the VFS call so as to pass in NULL there if needed.
 	 */
 	if (uap->attrname != NULL) {
 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
 		    NULL);
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
 	 * which VFS_EXTATTRCTL() will later release.
 	 */
 	filename_vp = NULL;
 	if (uap->filename != NULL) {
 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 		    uap->filename, td);
 		error = namei(&nd);
 		if (error)
 			return (error);
 		filename_vp = nd.ni_vp;
 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
 	}
 
 	/* uap->path is always defined. */
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error) {
 		if (filename_vp != NULL)
 			vput(filename_vp);
 		return (error);
 	}
 	mp = nd.ni_vp->v_mount;
 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
 	NDFREE(&nd, 0);
 	if (error) {
 		if (filename_vp != NULL)
 			vput(filename_vp);
 		return (error);
 	}
 
 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
 	    uap->attrname != NULL ? attrname : NULL, td);
 
 	vn_finished_write(mp_writable);
 	/*
 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
 	 * filename_vp, so vrele it if it is defined.
 	 */
 	if (filename_vp != NULL)
 		vrele(filename_vp);
 	return (error);
 }
 
 /*-
  * Set a named extended attribute on a file or directory
  * 
  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
  *            kernelspace string pointer "attrname", userspace buffer
  *            pointer "data", buffer length "nbytes", thread "td".
  * Returns: 0 on success, an error number otherwise
  * Locks: none
  * References: vp must be a valid reference for the duration of the call
  */
 static int
 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
     void *data, size_t nbytes, struct thread *td)
 {
 	struct mount *mp;
 	struct uio auio;
 	struct iovec aiov;
 	ssize_t cnt;
 	int error;
 
 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 	if (error)
 		return (error);
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 
 	aiov.iov_base = data;
 	aiov.iov_len = nbytes;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_offset = 0;
 	if (nbytes > INT_MAX) {
 		error = EINVAL;
 		goto done;
 	}
 	auio.uio_resid = nbytes;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_td = td;
 	cnt = nbytes;
 
 #ifdef MAC
 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
 	    attrname, &auio);
 	if (error)
 		goto done;
 #endif
 
 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
 	    td->td_ucred, td);
 	cnt -= auio.uio_resid;
 	td->td_retval[0] = cnt;
 
 done:
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
 	return (error);
 }
 
 int
 extattr_set_fd(td, uap)
 	struct thread *td;
 	struct extattr_set_fd_args /* {
 		int fd;
 		int attrnamespace;
 		const char *attrname;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct file *fp;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return (error);
 
 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
 	if (error)
 		return (error);
 
 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
 	    attrname, uap->data, uap->nbytes, td);
 	fdrop(fp, td);
 
 	return (error);
 }
 
 int
 extattr_set_file(td, uap)
 	struct thread *td;
 	struct extattr_set_file_args /* {
 		const char *path;
 		int attrnamespace;
 		const char *attrname;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct nameidata nd;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return (error);
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
 	    uap->data, uap->nbytes, td);
 
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 int
 extattr_set_link(td, uap)
 	struct thread *td;
 	struct extattr_set_link_args /* {
 		const char *path;
 		int attrnamespace;
 		const char *attrname;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct nameidata nd;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return (error);
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
 	    uap->data, uap->nbytes, td);
 
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*-
  * Get a named extended attribute on a file or directory
  * 
  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
  *            kernelspace string pointer "attrname", userspace buffer
  *            pointer "data", buffer length "nbytes", thread "td".
  * Returns: 0 on success, an error number otherwise
  * Locks: none
  * References: vp must be a valid reference for the duration of the call
  */
 static int
 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
     void *data, size_t nbytes, struct thread *td)
 {
 	struct uio auio, *auiop;
 	struct iovec aiov;
 	ssize_t cnt;
 	size_t size, *sizep;
 	int error;
 
 	/*
 	 * XXX: Temporary API compatibility for applications that know
 	 * about this hack ("" means list), but haven't been updated
 	 * for the extattr_list_*() system calls yet.  This will go
 	 * away for FreeBSD 5.3.
 	 */
 	if (strlen(attrname) == 0)
 		return (extattr_list_vp(vp, attrnamespace, data, nbytes, td));
 
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 
 	/*
 	 * Slightly unusual semantics: if the user provides a NULL data
 	 * pointer, they don't want to receive the data, just the
 	 * maximum read length.
 	 */
 	auiop = NULL;
 	sizep = NULL;
 	cnt = 0;
 	if (data != NULL) {
 		aiov.iov_base = data;
 		aiov.iov_len = nbytes;
 		auio.uio_iov = &aiov;
 		auio.uio_offset = 0;
 		if (nbytes > INT_MAX) {
 			error = EINVAL;
 			goto done;
 		}
 		auio.uio_resid = nbytes;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = UIO_USERSPACE;
 		auio.uio_td = td;
 		auiop = &auio;
 		cnt = nbytes;
 	} else
 		sizep = &size;
 
 #ifdef MAC
 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
 	    attrname, &auio);
 	if (error)
 		goto done;
 #endif
 
 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
 	    td->td_ucred, td);
 
 	if (auiop != NULL) {
 		cnt -= auio.uio_resid;
 		td->td_retval[0] = cnt;
 	} else
 		td->td_retval[0] = size;
 
 done:
 	VOP_UNLOCK(vp, 0, td);
 	return (error);
 }
 
 int
 extattr_get_fd(td, uap)
 	struct thread *td;
 	struct extattr_get_fd_args /* {
 		int fd;
 		int attrnamespace;
 		const char *attrname;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct file *fp;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return (error);
 
 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
 	if (error)
 		return (error);
 
 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
 	    attrname, uap->data, uap->nbytes, td);
 
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 extattr_get_file(td, uap)
 	struct thread *td;
 	struct extattr_get_file_args /* {
 		const char *path;
 		int attrnamespace;
 		const char *attrname;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct nameidata nd;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return (error);
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
 	    uap->data, uap->nbytes, td);
 
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 int
 extattr_get_link(td, uap)
 	struct thread *td;
 	struct extattr_get_link_args /* {
 		const char *path;
 		int attrnamespace;
 		const char *attrname;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct nameidata nd;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return (error);
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
 	    uap->data, uap->nbytes, td);
 
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * extattr_delete_vp(): Delete a named extended attribute on a file or
  *                      directory
  * 
  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
  *            kernelspace string pointer "attrname", proc "p"
  * Returns: 0 on success, an error number otherwise
  * Locks: none
  * References: vp must be a valid reference for the duration of the call
  */
 static int
 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
     struct thread *td)
 {
 	struct mount *mp;
 	int error;
 
 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 	if (error)
 		return (error);
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 
 #ifdef MAC
 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
 	    attrname);
 	if (error)
 		goto done;
 #endif
 
 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
 	    td);
 	if (error == EOPNOTSUPP)
 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
 		    td->td_ucred, td);
 #ifdef MAC
 done:
 #endif
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
 	return (error);
 }
 
 int
 extattr_delete_fd(td, uap)
 	struct thread *td;
 	struct extattr_delete_fd_args /* {
 		int fd;
 		int attrnamespace;
 		const char *attrname;
 	} */ *uap;
 {
 	struct file *fp;
 	struct vnode *vp;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return (error);
 
 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
 	if (error)
 		return (error);
 	vp = fp->f_vnode;
 
 	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 extattr_delete_file(td, uap)
 	struct thread *td;
 	struct extattr_delete_file_args /* {
 		const char *path;
 		int attrnamespace;
 		const char *attrname;
 	} */ *uap;
 {
 	struct nameidata nd;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return(error);
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return(error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
 	vrele(nd.ni_vp);
 	return(error);
 }
 
 int
 extattr_delete_link(td, uap)
 	struct thread *td;
 	struct extattr_delete_link_args /* {
 		const char *path;
 		int attrnamespace;
 		const char *attrname;
 	} */ *uap;
 {
 	struct nameidata nd;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return(error);
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return(error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
 	vrele(nd.ni_vp);
 	return(error);
 }
 
 /*-
  * Retrieve a list of extended attributes on a file or directory.
  *
  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
  *            userspace buffer pointer "data", buffer length "nbytes",
  *            thread "td".
  * Returns: 0 on success, an error number otherwise
  * Locks: none
  * References: vp must be a valid reference for the duration of the call
  */
 static int
 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
     size_t nbytes, struct thread *td)
 {
 	struct uio auio, *auiop;
 	size_t size, *sizep;
 	struct iovec aiov;
 	ssize_t cnt;
 	int error;
 
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 
 	auiop = NULL;
 	sizep = NULL;
 	cnt = 0;
 	if (data != NULL) {
 		aiov.iov_base = data;
 		aiov.iov_len = nbytes;
 		auio.uio_iov = &aiov;
 		auio.uio_offset = 0;
 		if (nbytes > INT_MAX) {
 			error = EINVAL;
 			goto done;
 		}
 		auio.uio_resid = nbytes;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = UIO_USERSPACE;
 		auio.uio_td = td;
 		auiop = &auio;
 		cnt = nbytes;
 	} else
 		sizep = &size;
 
 #ifdef MAC
 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
 	if (error)
 		goto done;
 #endif
 
 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
 	    td->td_ucred, td);
 
 	if (auiop != NULL) {
 		cnt -= auio.uio_resid;
 		td->td_retval[0] = cnt;
 	} else
 		td->td_retval[0] = size;
 
 done:
 	VOP_UNLOCK(vp, 0, td);
 	return (error);
 }
 
 
 int
 extattr_list_fd(td, uap)
 	struct thread *td;
 	struct extattr_list_fd_args /* {
 		int fd;
 		int attrnamespace;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct file *fp;
 	int error;
 
 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
 	if (error)
 		return (error);
 
 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
 	    uap->nbytes, td);
 
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 extattr_list_file(td, uap)
 	struct thread*td;
 	struct extattr_list_file_args /* {
 		const char *path;
 		int attrnamespace;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
 	    uap->nbytes, td);
 
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 int
 extattr_list_link(td, uap)
 	struct thread*td;
 	struct extattr_list_link_args /* {
 		const char *path;
 		int attrnamespace;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
 	    uap->nbytes, td);
 
 	vrele(nd.ni_vp);
 	return (error);
 }
 
Index: head/sys/kern/vfs_syscalls.c
===================================================================
--- head/sys/kern/vfs_syscalls.c	(revision 122536)
+++ head/sys/kern/vfs_syscalls.c	(revision 122537)
@@ -1,4508 +1,4791 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /* For 4.3 integer FS ID compatibility */
 #include "opt_compat.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/sysent.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/sysproto.h>
 #include <sys/namei.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/proc.h>
 #include <sys/dirent.h>
 #include <sys/extattr.h>
 #include <sys/jail.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 
 #include <machine/stdarg.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/uma.h>
 
 static int chroot_refuse_vdir_fds(struct filedesc *fdp);
 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
 static int setfown(struct thread *td, struct vnode *, uid_t, gid_t);
 static int setfmode(struct thread *td, struct vnode *, int);
 static int setfflags(struct thread *td, struct vnode *, int);
 static int setutimes(struct thread *td, struct vnode *,
     const struct timespec *, int, int);
 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
     struct thread *td);
 
 static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
     size_t nbytes, struct thread *td);
 
 int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *);
 int (*softdep_fsync_hook)(struct vnode *);
 
 /*
  * The module initialization routine for POSIX asynchronous I/O will
  * set this to the version of AIO that it implements.  (Zero means
  * that it is not implemented.)  This value is used here by pathconf()
  * and in kern_descrip.c by fpathconf().
  */
 int async_io_version;
 
 /*
  * Sync each mounted filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct sync_args {
         int     dummy;
 };
 #endif
 
 #ifdef DEBUG
 static int syncprt = 0;
 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
 #endif
 
 /* ARGSUSED */
 int
 sync(td, uap)
 	struct thread *td;
 	struct sync_args *uap;
 {
 	struct mount *mp, *nmp;
 	int asyncflag;
 
 	mtx_lock(&mountlist_mtx);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
 			asyncflag = mp->mnt_flag & MNT_ASYNC;
 			mp->mnt_flag &= ~MNT_ASYNC;
 			vfs_msync(mp, MNT_NOWAIT);
 			VFS_SYNC(mp, MNT_NOWAIT,
 			    ((td != NULL) ? td->td_ucred : NOCRED), td);
 			mp->mnt_flag |= asyncflag;
 			vn_finished_write(mp);
 		}
 		mtx_lock(&mountlist_mtx);
 		nmp = TAILQ_NEXT(mp, mnt_list);
 		vfs_unbusy(mp, td);
 	}
 	mtx_unlock(&mountlist_mtx);
 #if 0
 /*
  * XXX don't call vfs_bufstats() yet because that routine
  * was not imported in the Lite2 merge.
  */
 #ifdef DIAGNOSTIC
 	if (syncprt)
 		vfs_bufstats();
 #endif /* DIAGNOSTIC */
 #endif
 	return (0);
 }
 
 /* XXX PRISON: could be per prison flag */
 static int prison_quotas;
 #if 0
 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
 #endif
 
 /*
  * Change filesystem quotas.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct quotactl_args {
 	char *path;
 	int cmd;
 	int uid;
 	caddr_t arg;
 };
 #endif
 /* ARGSUSED */
 int
 quotactl(td, uap)
 	struct thread *td;
 	register struct quotactl_args /* {
 		char *path;
 		int cmd;
 		int uid;
 		caddr_t arg;
 	} */ *uap;
 {
 	struct mount *mp;
 	int error;
 	struct nameidata nd;
 
 	if (jailed(td->td_ucred) && !prison_quotas)
 		return (EPERM);
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
 	vrele(nd.ni_vp);
 	if (error)
 		return (error);
 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct statfs_args {
 	char *path;
 	struct statfs *buf;
 };
 #endif
 /* ARGSUSED */
 int
 statfs(td, uap)
 	struct thread *td;
 	register struct statfs_args /* {
 		char *path;
 		struct statfs *buf;
 	} */ *uap;
 {
-	register struct mount *mp;
-	register struct statfs *sp;
+	struct mount *mp;
+	struct statfs *sp, sb;
 	int error;
 	struct nameidata nd;
-	struct statfs sb;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	mp = nd.ni_vp->v_mount;
 	sp = &mp->mnt_stat;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vrele(nd.ni_vp);
 #ifdef MAC
 	error = mac_check_mount_stat(td->td_ucred, mp);
 	if (error)
 		return (error);
 #endif
+	/*
+	 * Set these in case the underlying filesystem fails to do so.
+	 */
+	sp->f_version = STATFS_VERSION;
+	sp->f_namemax = NAME_MAX;
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	error = VFS_STATFS(mp, sp, td);
 	if (error)
 		return (error);
-	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	if (suser(td)) {
 		bcopy(sp, &sb, sizeof(sb));
 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
 		sp = &sb;
 	}
 	return (copyout(sp, uap->buf, sizeof(*sp)));
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fstatfs_args {
 	int fd;
 	struct statfs *buf;
 };
 #endif
 /* ARGSUSED */
 int
 fstatfs(td, uap)
 	struct thread *td;
 	register struct fstatfs_args /* {
 		int fd;
 		struct statfs *buf;
 	} */ *uap;
 {
 	struct file *fp;
 	struct mount *mp;
-	register struct statfs *sp;
+	struct statfs *sp, sb;
 	int error;
-	struct statfs sb;
 
 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
 	mp = fp->f_vnode->v_mount;
 	fdrop(fp, td);
 	if (mp == NULL)
 		return (EBADF);
 #ifdef MAC
 	error = mac_check_mount_stat(td->td_ucred, mp);
 	if (error)
 		return (error);
 #endif
 	sp = &mp->mnt_stat;
+	/*
+	 * Set these in case the underlying filesystem fails to do so.
+	 */
+	sp->f_version = STATFS_VERSION;
+	sp->f_namemax = NAME_MAX;
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	error = VFS_STATFS(mp, sp, td);
 	if (error)
 		return (error);
-	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	if (suser(td)) {
 		bcopy(sp, &sb, sizeof(sb));
 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
 		sp = &sb;
 	}
 	return (copyout(sp, uap->buf, sizeof(*sp)));
 }
 
 /*
  * Get statistics on all filesystems.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getfsstat_args {
 	struct statfs *buf;
 	long bufsize;
 	int flags;
 };
 #endif
 int
 getfsstat(td, uap)
 	struct thread *td;
 	register struct getfsstat_args /* {
 		struct statfs *buf;
 		long bufsize;
 		int flags;
 	} */ *uap;
 {
-	register struct mount *mp, *nmp;
-	register struct statfs *sp;
+	struct mount *mp, *nmp;
+	struct statfs *sp, sb;
 	caddr_t sfsp;
 	long count, maxcount, error;
 
 	maxcount = uap->bufsize / sizeof(struct statfs);
 	sfsp = (caddr_t)uap->buf;
 	count = 0;
 	mtx_lock(&mountlist_mtx);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 #ifdef MAC
 		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 #endif
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 		if (sfsp && count < maxcount) {
 			sp = &mp->mnt_stat;
 			/*
+			 * Set these in case the underlying filesystem
+			 * fails to do so.
+			 */
+			sp->f_version = STATFS_VERSION;
+			sp->f_namemax = NAME_MAX;
+			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+			/*
 			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
 			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
 			 * overrides MNT_WAIT.
 			 */
 			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
 			    (uap->flags & MNT_WAIT)) &&
 			    (error = VFS_STATFS(mp, sp, td))) {
 				mtx_lock(&mountlist_mtx);
 				nmp = TAILQ_NEXT(mp, mnt_list);
 				vfs_unbusy(mp, td);
 				continue;
 			}
-			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+			if (suser(td)) {
+				bcopy(sp, &sb, sizeof(sb));
+				sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
+				sp = &sb;
+			}
 			error = copyout(sp, sfsp, sizeof(*sp));
 			if (error) {
 				vfs_unbusy(mp, td);
 				return (error);
 			}
 			sfsp += sizeof(*sp);
 		}
 		count++;
 		mtx_lock(&mountlist_mtx);
 		nmp = TAILQ_NEXT(mp, mnt_list);
 		vfs_unbusy(mp, td);
 	}
 	mtx_unlock(&mountlist_mtx);
 	if (sfsp && count > maxcount)
 		td->td_retval[0] = maxcount;
 	else
 		td->td_retval[0] = count;
 	return (0);
 }
 
+#ifdef COMPAT_FREEBSD4
 /*
+ * Get old format filesystem statistics.
+ */
+static void cvtstatfs(struct thread *, struct statfs *, struct ostatfs *);
+
+#ifndef _SYS_SYSPROTO_H_
+struct freebsd4_statfs_args {
+	char *path;
+	struct ostatfs *buf;
+};
+#endif
+/* ARGSUSED */
+int
+freebsd4_statfs(td, uap)
+	struct thread *td;
+	struct freebsd4_statfs_args /* {
+		char *path;
+		struct ostatfs *buf;
+	} */ *uap;
+{
+	struct mount *mp;
+	struct statfs *sp;
+	struct ostatfs osb;
+	int error;
+	struct nameidata nd;
+
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
+	if ((error = namei(&nd)) != 0)
+		return (error);
+	mp = nd.ni_vp->v_mount;
+	sp = &mp->mnt_stat;
+	NDFREE(&nd, NDF_ONLY_PNBUF);
+	vrele(nd.ni_vp);
+#ifdef MAC
+	error = mac_check_mount_stat(td->td_ucred, mp);
+	if (error)
+		return (error);
+#endif
+	error = VFS_STATFS(mp, sp, td);
+	if (error)
+		return (error);
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+	cvtstatfs(td, sp, &osb);
+	return (copyout(&osb, uap->buf, sizeof(osb)));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct freebsd4_fstatfs_args {
+	int fd;
+	struct ostatfs *buf;
+};
+#endif
+/* ARGSUSED */
+int
+freebsd4_fstatfs(td, uap)
+	struct thread *td;
+	struct freebsd4_fstatfs_args /* {
+		int fd;
+		struct ostatfs *buf;
+	} */ *uap;
+{
+	struct file *fp;
+	struct mount *mp;
+	struct statfs *sp;
+	struct ostatfs osb;
+	int error;
+
+	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
+		return (error);
+	mp = fp->f_vnode->v_mount;
+	fdrop(fp, td);
+	if (mp == NULL)
+		return (EBADF);
+#ifdef MAC
+	error = mac_check_mount_stat(td->td_ucred, mp);
+	if (error)
+		return (error);
+#endif
+	sp = &mp->mnt_stat;
+	error = VFS_STATFS(mp, sp, td);
+	if (error)
+		return (error);
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+	cvtstatfs(td, sp, &osb);
+	return (copyout(&osb, uap->buf, sizeof(osb)));
+}
+
+/*
+ * Get statistics on all filesystems.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct freebsd4_getfsstat_args {
+	struct ostatfs *buf;
+	long bufsize;
+	int flags;
+};
+#endif
+int
+freebsd4_getfsstat(td, uap)
+	struct thread *td;
+	register struct freebsd4_getfsstat_args /* {
+		struct ostatfs *buf;
+		long bufsize;
+		int flags;
+	} */ *uap;
+{
+	struct mount *mp, *nmp;
+	struct statfs *sp;
+	struct ostatfs osb;
+	caddr_t sfsp;
+	long count, maxcount, error;
+
+	maxcount = uap->bufsize / sizeof(struct ostatfs);
+	sfsp = (caddr_t)uap->buf;
+	count = 0;
+	mtx_lock(&mountlist_mtx);
+	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
+#ifdef MAC
+		if (mac_check_mount_stat(td->td_ucred, mp) != 0) {
+			nmp = TAILQ_NEXT(mp, mnt_list);
+			continue;
+		}
+#endif
+		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) {
+			nmp = TAILQ_NEXT(mp, mnt_list);
+			continue;
+		}
+		if (sfsp && count < maxcount) {
+			sp = &mp->mnt_stat;
+			/*
+			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
+			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
+			 * overrides MNT_WAIT.
+			 */
+			if (((uap->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
+			    (uap->flags & MNT_WAIT)) &&
+			    (error = VFS_STATFS(mp, sp, td))) {
+				mtx_lock(&mountlist_mtx);
+				nmp = TAILQ_NEXT(mp, mnt_list);
+				vfs_unbusy(mp, td);
+				continue;
+			}
+			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+			cvtstatfs(td, sp, &osb);
+			error = copyout(&osb, sfsp, sizeof(osb));
+			if (error) {
+				vfs_unbusy(mp, td);
+				return (error);
+			}
+			sfsp += sizeof(osb);
+		}
+		count++;
+		mtx_lock(&mountlist_mtx);
+		nmp = TAILQ_NEXT(mp, mnt_list);
+		vfs_unbusy(mp, td);
+	}
+	mtx_unlock(&mountlist_mtx);
+	if (sfsp && count > maxcount)
+		td->td_retval[0] = maxcount;
+	else
+		td->td_retval[0] = count;
+	return (0);
+}
+
+/*
+ * Implement fstatfs() for (NFS) file handles.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct freebsd4_fhstatfs_args {
+	struct fhandle *u_fhp;
+	struct ostatfs *buf;
+};
+#endif
+int
+freebsd4_fhstatfs(td, uap)
+	struct thread *td;
+	struct freebsd4_fhstatfs_args /* {
+		struct fhandle *u_fhp;
+		struct ostatfs *buf;
+	} */ *uap;
+{
+	struct statfs *sp;
+	struct mount *mp;
+	struct vnode *vp;
+	struct ostatfs osb;
+	fhandle_t fh;
+	int error;
+
+	/*
+	 * Must be super user
+	 */
+	error = suser(td);
+	if (error)
+		return (error);
+
+	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
+		return (error);
+
+	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
+		return (error);
+	mp = vp->v_mount;
+	sp = &mp->mnt_stat;
+	vput(vp);
+#ifdef MAC
+	error = mac_check_mount_stat(td->td_ucred, mp);
+	if (error)
+		return (error);
+#endif
+	if ((error = VFS_STATFS(mp, sp, td)) != 0)
+		return (error);
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+	cvtstatfs(td, sp, &osb);
+	return (copyout(&osb, uap->buf, sizeof(osb)));
+}
+
+/*
+ * Convert a new format statfs structure to an old format statfs structure.
+ */
+static void
+cvtstatfs(td, nsp, osp)
+	struct thread *td;
+	struct statfs *nsp;
+	struct ostatfs *osp;
+{
+
+	bzero(osp, sizeof(*osp));
+	osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX);
+	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
+	osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX);
+	osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX);
+	osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX);
+	osp->f_files = MIN(nsp->f_files, LONG_MAX);
+	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
+	osp->f_owner = nsp->f_owner;
+	osp->f_type = nsp->f_type;
+	osp->f_flags = nsp->f_flags;
+	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
+	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
+	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
+	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
+	bcopy(nsp->f_fstypename, osp->f_fstypename,
+	    MIN(MFSNAMELEN, OMNAMELEN));
+	bcopy(nsp->f_mntonname, osp->f_mntonname,
+	    MIN(MFSNAMELEN, OMNAMELEN));
+	bcopy(nsp->f_mntfromname, osp->f_mntfromname,
+	    MIN(MFSNAMELEN, OMNAMELEN));
+	if (suser(td)) {
+		osp->f_fsid.val[0] = osp->f_fsid.val[1] = 0;
+	} else {
+		osp->f_fsid = nsp->f_fsid;
+	}
+}
+#endif /* COMPAT_FREEBSD4 */
+
+/*
  * Change current working directory to a given file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchdir_args {
 	int	fd;
 };
 #endif
 /* ARGSUSED */
 int
 fchdir(td, uap)
 	struct thread *td;
 	struct fchdir_args /* {
 		int fd;
 	} */ *uap;
 {
 	register struct filedesc *fdp = td->td_proc->p_fd;
 	struct vnode *vp, *tdp, *vpold;
 	struct mount *mp;
 	struct file *fp;
 	int error;
 
 	if ((error = getvnode(fdp, uap->fd, &fp)) != 0)
 		return (error);
 	vp = fp->f_vnode;
 	VREF(vp);
 	fdrop(fp, td);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	if (vp->v_type != VDIR)
 		error = ENOTDIR;
 #ifdef MAC
 	else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) {
 	}
 #endif
 	else
 		error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
 	while (!error && (mp = vp->v_mountedhere) != NULL) {
 		if (vfs_busy(mp, 0, 0, td))
 			continue;
 		error = VFS_ROOT(mp, &tdp);
 		vfs_unbusy(mp, td);
 		if (error)
 			break;
 		vput(vp);
 		vp = tdp;
 	}
 	if (error) {
 		vput(vp);
 		return (error);
 	}
 	VOP_UNLOCK(vp, 0, td);
 	FILEDESC_LOCK(fdp);
 	vpold = fdp->fd_cdir;
 	fdp->fd_cdir = vp;
 	FILEDESC_UNLOCK(fdp);
 	vrele(vpold);
 	return (0);
 }
 
 /*
  * Change current working directory (``.'').
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chdir_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 chdir(td, uap)
 	struct thread *td;
 	struct chdir_args /* {
 		char *path;
 	} */ *uap;
 {
 
 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
 }
 
 int
 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg)
 {
 	register struct filedesc *fdp = td->td_proc->p_fd;
 	int error;
 	struct nameidata nd;
 	struct vnode *vp;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
 		vput(nd.ni_vp);
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		return (error);
 	}
 	VOP_UNLOCK(nd.ni_vp, 0, td);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	FILEDESC_LOCK(fdp);
 	vp = fdp->fd_cdir;
 	fdp->fd_cdir = nd.ni_vp;
 	FILEDESC_UNLOCK(fdp);
 	vrele(vp);
 	return (0);
 }
 
 /*
  * Helper function for raised chroot(2) security function:  Refuse if
  * any filedescriptors are open directories.
  */
 static int
 chroot_refuse_vdir_fds(fdp)
 	struct filedesc *fdp;
 {
 	struct vnode *vp;
 	struct file *fp;
 	int fd;
 
 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
 	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
 		fp = fget_locked(fdp, fd);
 		if (fp == NULL)
 			continue;
 		if (fp->f_type == DTYPE_VNODE) {
 			vp = fp->f_vnode;
 			if (vp->v_type == VDIR)
 				return (EPERM);
 		}
 	}
 	return (0);
 }
 
 /*
  * This sysctl determines if we will allow a process to chroot(2) if it
  * has a directory open:
  *	0: disallowed for all processes.
  *	1: allowed for processes that were not already chroot(2)'ed.
  *	2: allowed for all processes.
  */
 
 static int chroot_allow_open_directories = 1;
 
 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
      &chroot_allow_open_directories, 0, "");
 
 /*
  * Change notion of root (``/'') directory.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chroot_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 chroot(td, uap)
 	struct thread *td;
 	struct chroot_args /* {
 		char *path;
 	} */ *uap;
 {
 	int error;
 	struct nameidata nd;
 
 	error = suser_cred(td->td_ucred, PRISON_ROOT);
 	if (error)
 		return (error);
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
 	mtx_lock(&Giant);
 	error = namei(&nd);
 	if (error)
 		goto error;
 	if ((error = change_dir(nd.ni_vp, td)) != 0)
 		goto e_vunlock;
 #ifdef MAC
 	if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp)))
 		goto e_vunlock;
 #endif
 	VOP_UNLOCK(nd.ni_vp, 0, td);
 	error = change_root(nd.ni_vp, td);
 	vrele(nd.ni_vp);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	mtx_unlock(&Giant);
 	return (error);
 e_vunlock:
 	vput(nd.ni_vp);
 error:
 	mtx_unlock(&Giant);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	return (error);
 }
 
 /*
  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  * instance.
  */
 int
 change_dir(vp, td)
 	struct vnode *vp;
 	struct thread *td;
 {
 	int error;
 
 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
 	if (vp->v_type != VDIR)
 		return (ENOTDIR);
 #ifdef MAC
 	error = mac_check_vnode_chdir(td->td_ucred, vp);
 	if (error)
 		return (error);
 #endif
 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
 	return (error);
 }
 
 /*
  * Common routine for kern_chroot() and jail_attach().  The caller is
  * responsible for invoking suser() and mac_check_chroot() to authorize this
  * operation.
  */
 int
 change_root(vp, td)
 	struct vnode *vp;
 	struct thread *td;
 {
 	struct filedesc *fdp;
 	struct vnode *oldvp;
 	int error;
 
 	mtx_assert(&Giant, MA_OWNED);
 	fdp = td->td_proc->p_fd;
 	FILEDESC_LOCK(fdp);
 	if (chroot_allow_open_directories == 0 ||
 	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
 		error = chroot_refuse_vdir_fds(fdp);
 		if (error) {
 			FILEDESC_UNLOCK(fdp);
 			return (error);
 		}
 	}
 	oldvp = fdp->fd_rdir;
 	fdp->fd_rdir = vp;
 	VREF(fdp->fd_rdir);
 	if (!fdp->fd_jdir) {
 		fdp->fd_jdir = vp;
 		VREF(fdp->fd_jdir);
 	}
 	FILEDESC_UNLOCK(fdp);
 	vrele(oldvp);
 	return (0);
 }
 
 /*
  * Check permissions, allocate an open file structure,
  * and call the device open routine if any.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct open_args {
 	char	*path;
 	int	flags;
 	int	mode;
 };
 #endif
 int
 open(td, uap)
 	struct thread *td;
 	register struct open_args /* {
 		char *path;
 		int flags;
 		int mode;
 	} */ *uap;
 {
 
 	return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode));
 }
 
 int
 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags,
     int mode)
 {
 	struct proc *p = td->td_proc;
 	struct filedesc *fdp = p->p_fd;
 	struct file *fp;
 	struct vnode *vp;
 	struct vattr vat;
 	struct mount *mp;
 	int cmode;
 	struct file *nfp;
 	int type, indx, error;
 	struct flock lf;
 	struct nameidata nd;
 
 	if ((flags & O_ACCMODE) == O_ACCMODE)
 		return (EINVAL);
 	flags = FFLAGS(flags);
 	error = falloc(td, &nfp, &indx);
 	if (error)
 		return (error);
 	/* An extra reference on `nfp' has been held for us by falloc(). */
 	fp = nfp;
 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
 	td->td_dupfd = -1;		/* XXX check for fdopen */
 	error = vn_open(&nd, &flags, cmode, indx);
 	if (error) {
 
 		/*
 		 * If the vn_open replaced the method vector, something
 		 * wonderous happened deep below and we just pass it up
 		 * pretending we know what we do.
 		 */
 		if (error == ENXIO && fp->f_ops != &badfileops) {
 			fdrop(fp, td);
 			td->td_retval[0] = indx;
 			return (0);
 		}
 
 		/*
 		 * release our own reference
 		 */
 		fdrop(fp, td);
 
 		/*
 		 * handle special fdopen() case.  bleh.  dupfdopen() is
 		 * responsible for dropping the old contents of ofiles[indx]
 		 * if it succeeds.
 		 */
 		if ((error == ENODEV || error == ENXIO) &&
 		    td->td_dupfd >= 0 &&		/* XXX from fdopen */
 		    (error =
 			dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) {
 			td->td_retval[0] = indx;
 			return (0);
 		}
 		/*
 		 * Clean up the descriptor, but only if another thread hadn't
 		 * replaced or closed it.
 		 */
 		FILEDESC_LOCK(fdp);
 		if (fdp->fd_ofiles[indx] == fp) {
 			fdp->fd_ofiles[indx] = NULL;
 			FILEDESC_UNLOCK(fdp);
 			fdrop(fp, td);
 		} else
 			FILEDESC_UNLOCK(fdp);
 
 		if (error == ERESTART)
 			error = EINTR;
 		return (error);
 	}
 	td->td_dupfd = 0;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 
 	/*
 	 * There should be 2 references on the file, one from the descriptor
 	 * table, and one for us.
 	 *
 	 * Handle the case where someone closed the file (via its file
 	 * descriptor) while we were blocked.  The end result should look
 	 * like opening the file succeeded but it was immediately closed.
 	 */
 	FILEDESC_LOCK(fdp);
 	FILE_LOCK(fp);
 	if (fp->f_count == 1) {
 		KASSERT(fdp->fd_ofiles[indx] != fp,
 		    ("Open file descriptor lost all refs"));
 		FILEDESC_UNLOCK(fdp);
 		FILE_UNLOCK(fp);
 		VOP_UNLOCK(vp, 0, td);
 		vn_close(vp, flags & FMASK, fp->f_cred, td);
 		fdrop(fp, td);
 		td->td_retval[0] = indx;
 		return 0;
 	}
 	fp->f_vnode = vp;
 	fp->f_data = vp;
 	fp->f_flag = flags & FMASK;
 	fp->f_ops = &vnops;
 	fp->f_seqcount = 1;
 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
 	FILEDESC_UNLOCK(fdp);
 	FILE_UNLOCK(fp);
 
 	/* assert that vn_open created a backing object if one is needed */
 	KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0,
 		("open: vmio vnode has no backing object after vn_open"));
 
 	VOP_UNLOCK(vp, 0, td);
 	if (flags & (O_EXLOCK | O_SHLOCK)) {
 		lf.l_whence = SEEK_SET;
 		lf.l_start = 0;
 		lf.l_len = 0;
 		if (flags & O_EXLOCK)
 			lf.l_type = F_WRLCK;
 		else
 			lf.l_type = F_RDLCK;
 		type = F_FLOCK;
 		if ((flags & FNONBLOCK) == 0)
 			type |= F_WAIT;
 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 			    type)) != 0)
 			goto bad;
 		fp->f_flag |= FHASLOCK;
 	}
 	if (flags & O_TRUNC) {
 		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 			goto bad;
 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 		VATTR_NULL(&vat);
 		vat.va_size = 0;
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 #ifdef MAC
 		error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp);
 		if (error == 0)
 #endif
 			error = VOP_SETATTR(vp, &vat, td->td_ucred, td);
 		VOP_UNLOCK(vp, 0, td);
 		vn_finished_write(mp);
 		if (error)
 			goto bad;
 	}
 	/*
 	 * Release our private reference, leaving the one associated with
 	 * the descriptor table intact.
 	 */
 	fdrop(fp, td);
 	td->td_retval[0] = indx;
 	return (0);
 bad:
 	FILEDESC_LOCK(fdp);
 	if (fdp->fd_ofiles[indx] == fp) {
 		fdp->fd_ofiles[indx] = NULL;
 		FILEDESC_UNLOCK(fdp);
 		fdrop(fp, td);
 	} else
 		FILEDESC_UNLOCK(fdp);
 	fdrop(fp, td);
 	return (error);
 }
 
 #ifdef COMPAT_43
 /*
  * Create a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ocreat_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 ocreat(td, uap)
 	struct thread *td;
 	register struct ocreat_args /* {
 		char *path;
 		int mode;
 	} */ *uap;
 {
 	struct open_args /* {
 		char *path;
 		int flags;
 		int mode;
 	} */ nuap;
 
 	nuap.path = uap->path;
 	nuap.mode = uap->mode;
 	nuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
 	return (open(td, &nuap));
 }
 #endif /* COMPAT_43 */
 
 /*
  * Create a special file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mknod_args {
 	char	*path;
 	int	mode;
 	int	dev;
 };
 #endif
 /* ARGSUSED */
 int
 mknod(td, uap)
 	struct thread *td;
 	register struct mknod_args /* {
 		char *path;
 		int mode;
 		int dev;
 	} */ *uap;
 {
 
 	return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev));
 }
 
 int
 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode,
     int dev)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct vattr vattr;
 	int error;
 	int whiteout = 0;
 	struct nameidata nd;
 
 	switch (mode & S_IFMT) {
 	case S_IFCHR:
 	case S_IFBLK:
 		error = suser(td);
 		break;
 	default:
 		error = suser_cred(td->td_ucred, PRISON_ROOT);
 		break;
 	}
 	if (error)
 		return (error);
 restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	if (vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vrele(vp);
 		if (vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		return (EEXIST);
 	} else {
 		VATTR_NULL(&vattr);
 		FILEDESC_LOCK(td->td_proc->p_fd);
 		vattr.va_mode = (mode & ALLPERMS) &
 		    ~td->td_proc->p_fd->fd_cmask;
 		FILEDESC_UNLOCK(td->td_proc->p_fd);
 		vattr.va_rdev = dev;
 		whiteout = 0;
 
 		switch (mode & S_IFMT) {
 		case S_IFMT:	/* used by badsect to flag bad sectors */
 			vattr.va_type = VBAD;
 			break;
 		case S_IFCHR:
 			vattr.va_type = VCHR;
 			break;
 		case S_IFBLK:
 			vattr.va_type = VBLK;
 			break;
 		case S_IFWHT:
 			whiteout = 1;
 			break;
 		default:
 			error = EINVAL;
 			break;
 		}
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 #ifdef MAC
 	if (error == 0 && !whiteout)
 		error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp,
 		    &nd.ni_cnd, &vattr);
 #endif
 	if (!error) {
 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 		if (whiteout)
 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 		else {
 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 						&nd.ni_cnd, &vattr);
 			if (error == 0)
 				vput(nd.ni_vp);
 		}
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
 	return (error);
 }
 
 /*
  * Create a named pipe.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mkfifo_args {
 	char	*path;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 mkfifo(td, uap)
 	struct thread *td;
 	register struct mkfifo_args /* {
 		char *path;
 		int mode;
 	} */ *uap;
 {
 
 	return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode));
 }
 
 int
 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	if (nd.ni_vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vrele(nd.ni_vp);
 		if (nd.ni_vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		return (EEXIST);
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VFIFO;
 	FILEDESC_LOCK(td->td_proc->p_fd);
 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 	FILEDESC_UNLOCK(td->td_proc->p_fd);
 #ifdef MAC
 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 	if (error)
 		goto out;
 #endif
 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 	if (error == 0)
 		vput(nd.ni_vp);
 #ifdef MAC
 out:
 #endif
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Make a hard file link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct link_args {
 	char	*path;
 	char	*link;
 };
 #endif
 /* ARGSUSED */
 int
 link(td, uap)
 	struct thread *td;
 	register struct link_args /* {
 		char *path;
 		char *link;
 	} */ *uap;
 {
 
 	return (kern_link(td, uap->path, uap->link, UIO_USERSPACE));
 }
 
 int
 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct nameidata nd;
 	int error;
 
 	bwillwrite();
 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, segflg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 	if (vp->v_type == VDIR) {
 		vrele(vp);
 		return (EPERM);		/* POSIX */
 	}
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 		vrele(vp);
 		return (error);
 	}
 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
 	if ((error = namei(&nd)) == 0) {
 		if (nd.ni_vp != NULL) {
 			vrele(nd.ni_vp);
 			if (nd.ni_dvp == nd.ni_vp)
 				vrele(nd.ni_dvp);
 			else
 				vput(nd.ni_dvp);
 			error = EEXIST;
 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td))
 		    == 0) {
 			VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 			VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 #ifdef MAC
 			error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp,
 			    vp, &nd.ni_cnd);
 			if (error == 0)
 #endif
 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 			VOP_UNLOCK(vp, 0, td);
 			vput(nd.ni_dvp);
 		}
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 	}
 	vrele(vp);
 	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
 	return (error);
 }
 
 /*
  * Make a symbolic link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct symlink_args {
 	char	*path;
 	char	*link;
 };
 #endif
 /* ARGSUSED */
 int
 symlink(td, uap)
 	struct thread *td;
 	register struct symlink_args /* {
 		char *path;
 		char *link;
 	} */ *uap;
 {
 
 	return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE));
 }
 
 int
 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	char *syspath;
 	int error;
 	struct nameidata nd;
 
 	if (segflg == UIO_SYSSPACE) {
 		syspath = path;
 	} else {
 		syspath = uma_zalloc(namei_zone, M_WAITOK);
 		if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0)
 			goto out;
 	}
 restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT | NOOBJ | SAVENAME, segflg, link, td);
 	if ((error = namei(&nd)) != 0)
 		goto out;
 	if (nd.ni_vp) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vrele(nd.ni_vp);
 		if (nd.ni_vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		error = EEXIST;
 		goto out;
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	FILEDESC_LOCK(td->td_proc->p_fd);
 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 	FILEDESC_UNLOCK(td->td_proc->p_fd);
 #ifdef MAC
 	vattr.va_type = VLNK;
 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 	if (error)
 		goto out2;
 #endif
 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 	if (error == 0)
 		vput(nd.ni_vp);
 #ifdef MAC
 out2:
 #endif
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
 out:
 	if (segflg != UIO_SYSSPACE)
 		uma_zfree(namei_zone, syspath);
 	return (error);
 }
 
 /*
  * Delete a whiteout from the filesystem.
  */
 /* ARGSUSED */
 int
 undelete(td, uap)
 	struct thread *td;
 	register struct undelete_args /* {
 		char *path;
 	} */ *uap;
 {
 	int error;
 	struct mount *mp;
 	struct nameidata nd;
 
 restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
 	    uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return (error);
 
 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (nd.ni_vp)
 			vrele(nd.ni_vp);
 		if (nd.ni_vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		return (EEXIST);
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
 	return (error);
 }
 
 /*
  * Delete a name from the filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct unlink_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 unlink(td, uap)
 	struct thread *td;
 	struct unlink_args /* {
 		char *path;
 	} */ *uap;
 {
 
 	return (kern_unlink(td, uap->path, UIO_USERSPACE));
 }
 
 int
 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
 restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT|LOCKLEAF, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	if (vp->v_type == VDIR)
 		error = EPERM;		/* POSIX */
 	else {
 		/*
 		 * The root of a mounted filesystem cannot be deleted.
 		 *
 		 * XXX: can this only be a VDIR case?
 		 */
 		if (vp->v_vflag & VV_ROOT)
 			error = EBUSY;
 	}
 	if (error == 0) {
 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 			if (vp == nd.ni_dvp)
 				vrele(vp);
 			else
 				vput(vp);
 			vput(nd.ni_dvp);
 			if ((error = vn_start_write(NULL, &mp,
 			    V_XSLEEP | PCATCH)) != 0)
 				return (error);
 			goto restart;
 		}
 #ifdef MAC
 		error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
 		    &nd.ni_cnd);
 		if (error)
 			goto out;
 #endif
 		VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 #ifdef MAC
 out:
 #endif
 		vn_finished_write(mp);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (vp == nd.ni_dvp)
 		vrele(vp);
 	else
 		vput(vp);
 	vput(nd.ni_dvp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
 	return (error);
 }
 
 /*
  * Reposition read/write file offset.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lseek_args {
 	int	fd;
 	int	pad;
 	off_t	offset;
 	int	whence;
 };
 #endif
 int
 lseek(td, uap)
 	struct thread *td;
 	register struct lseek_args /* {
 		int fd;
 		int pad;
 		off_t offset;
 		int whence;
 	} */ *uap;
 {
 	struct ucred *cred = td->td_ucred;
 	struct file *fp;
 	struct vnode *vp;
 	struct vattr vattr;
 	off_t offset;
 	int error, noneg;
 
 	if ((error = fget(td, uap->fd, &fp)) != 0)
 		return (error);
 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) {
 		fdrop(fp, td);
 		return (ESPIPE);
 	}
 	vp = fp->f_vnode;
 	noneg = (vp->v_type != VCHR);
 	offset = uap->offset;
 	switch (uap->whence) {
 	case L_INCR:
 		if (noneg &&
 		    (fp->f_offset < 0 ||
 		    (offset > 0 && fp->f_offset > OFF_MAX - offset))) {
 			error = EOVERFLOW;
 			break;
 		}
 		offset += fp->f_offset;
 		break;
 	case L_XTND:
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 		error = VOP_GETATTR(vp, &vattr, cred, td);
 		VOP_UNLOCK(vp, 0, td);
 		if (error)
 			break;
 		if (noneg &&
 		    (vattr.va_size > OFF_MAX ||
 		    (offset > 0 && vattr.va_size > OFF_MAX - offset))) {
 			error = EOVERFLOW;
 			break;
 		}
 		offset += vattr.va_size;
 		break;
 	case L_SET:
 		break;
 	default:
 		error = EINVAL;
 	}
 	if (error == 0 && noneg && offset < 0)
 		error = EINVAL;
 	if (error != 0) {
 		fdrop(fp, td);
 		return (error);
 	}
 	fp->f_offset = offset;
 	*(off_t *)(td->td_retval) = fp->f_offset;
 	fdrop(fp, td);
 	return (0);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 /*
  * Reposition read/write file offset.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct olseek_args {
 	int	fd;
 	long	offset;
 	int	whence;
 };
 #endif
 int
 olseek(td, uap)
 	struct thread *td;
 	register struct olseek_args /* {
 		int fd;
 		long offset;
 		int whence;
 	} */ *uap;
 {
 	struct lseek_args /* {
 		int fd;
 		int pad;
 		off_t offset;
 		int whence;
 	} */ nuap;
 	int error;
 
 	nuap.fd = uap->fd;
 	nuap.offset = uap->offset;
 	nuap.whence = uap->whence;
 	error = lseek(td, &nuap);
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 /*
  * Check access permissions using passed credentials.
  */
 static int
 vn_access(vp, user_flags, cred, td)
 	struct vnode	*vp;
 	int		user_flags;
 	struct ucred	*cred;
 	struct thread	*td;
 {
 	int error, flags;
 
 	/* Flags == 0 means only check for existence. */
 	error = 0;
 	if (user_flags) {
 		flags = 0;
 		if (user_flags & R_OK)
 			flags |= VREAD;
 		if (user_flags & W_OK)
 			flags |= VWRITE;
 		if (user_flags & X_OK)
 			flags |= VEXEC;
 #ifdef MAC
 		error = mac_check_vnode_access(cred, vp, flags);
 		if (error)
 			return (error);
 #endif
 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 			error = VOP_ACCESS(vp, flags, cred, td);
 	}
 	return (error);
 }
 
 /*
  * Check access permissions using "real" credentials.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct access_args {
 	char	*path;
 	int	flags;
 };
 #endif
 int
 access(td, uap)
 	struct thread *td;
 	register struct access_args /* {
 		char *path;
 		int flags;
 	} */ *uap;
 {
 
 	return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags));
 }
 
 int
 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags)
 {
 	struct ucred *cred, *tmpcred;
 	register struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
 	/*
 	 * Create and modify a temporary credential instead of one that
 	 * is potentially shared.  This could also mess up socket
 	 * buffer accounting which can run in an interrupt context.
 	 *
 	 * XXX - Depending on how "threads" are finally implemented, it
 	 * may be better to explicitly pass the credential to namei()
 	 * rather than to modify the potentially shared process structure.
 	 */
 	cred = td->td_ucred;
 	tmpcred = crdup(cred);
 	tmpcred->cr_uid = cred->cr_ruid;
 	tmpcred->cr_groups[0] = cred->cr_rgid;
 	td->td_ucred = tmpcred;
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		goto out1;
 	vp = nd.ni_vp;
 
 	error = vn_access(vp, flags, tmpcred, td);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(vp);
 out1:
 	td->td_ucred = cred;
 	crfree(tmpcred);
 	return (error);
 }
 
 /*
  * Check access permissions using "effective" credentials.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct eaccess_args {
 	char	*path;
 	int	flags;
 };
 #endif
 int
 eaccess(td, uap)
 	struct thread *td;
 	register struct eaccess_args /* {
 		char *path;
 		int flags;
 	} */ *uap;
 {
 	struct nameidata nd;
 	struct vnode *vp;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 
 	error = vn_access(vp, uap->flags, td->td_ucred, td);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(vp);
 	return (error);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 /*
  * Get file status; this version follows links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ostat_args {
 	char	*path;
 	struct ostat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 ostat(td, uap)
 	struct thread *td;
 	register struct ostat_args /* {
 		char *path;
 		struct ostat *ub;
 	} */ *uap;
 {
 	struct stat sb;
 	struct ostat osb;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 	vput(nd.ni_vp);
 	if (error)
 		return (error);
 	cvtstat(&sb, &osb);
 	error = copyout(&osb, uap->ub, sizeof (osb));
 	return (error);
 }
 
 /*
  * Get file status; this version does not follow links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct olstat_args {
 	char	*path;
 	struct ostat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 olstat(td, uap)
 	struct thread *td;
 	register struct olstat_args /* {
 		char *path;
 		struct ostat *ub;
 	} */ *uap;
 {
 	struct vnode *vp;
 	struct stat sb;
 	struct ostat osb;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(vp);
 	if (error)
 		return (error);
 	cvtstat(&sb, &osb);
 	error = copyout(&osb, uap->ub, sizeof (osb));
 	return (error);
 }
 
 /*
  * Convert from an old to a new stat structure.
  */
 void
 cvtstat(st, ost)
 	struct stat *st;
 	struct ostat *ost;
 {
 
 	ost->st_dev = st->st_dev;
 	ost->st_ino = st->st_ino;
 	ost->st_mode = st->st_mode;
 	ost->st_nlink = st->st_nlink;
 	ost->st_uid = st->st_uid;
 	ost->st_gid = st->st_gid;
 	ost->st_rdev = st->st_rdev;
 	if (st->st_size < (quad_t)1 << 32)
 		ost->st_size = st->st_size;
 	else
 		ost->st_size = -2;
 	ost->st_atime = st->st_atime;
 	ost->st_mtime = st->st_mtime;
 	ost->st_ctime = st->st_ctime;
 	ost->st_blksize = st->st_blksize;
 	ost->st_blocks = st->st_blocks;
 	ost->st_flags = st->st_flags;
 	ost->st_gen = st->st_gen;
 }
 #endif /* COMPAT_43 || COMPAT_SUNOS */
 
 /*
  * Get file status; this version follows links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct stat_args {
 	char	*path;
 	struct stat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 stat(td, uap)
 	struct thread *td;
 	register struct stat_args /* {
 		char *path;
 		struct stat *ub;
 	} */ *uap;
 {
 	struct stat sb;
 	int error;
 	struct nameidata nd;
 
 #ifdef LOOKUP_SHARED
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
 	    UIO_USERSPACE, uap->path, td);
 #else
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    uap->path, td);
 #endif
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_vp);
 	if (error)
 		return (error);
 	error = copyout(&sb, uap->ub, sizeof (sb));
 	return (error);
 }
 
 /*
  * Get file status; this version does not follow links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lstat_args {
 	char	*path;
 	struct stat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 lstat(td, uap)
 	struct thread *td;
 	register struct lstat_args /* {
 		char *path;
 		struct stat *ub;
 	} */ *uap;
 {
 	int error;
 	struct vnode *vp;
 	struct stat sb;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(vp);
 	if (error)
 		return (error);
 	error = copyout(&sb, uap->ub, sizeof (sb));
 	return (error);
 }
 
 /*
  * Implementation of the NetBSD stat() function.
  * XXX This should probably be collapsed with the FreeBSD version,
  * as the differences are only due to vn_stat() clearing spares at
  * the end of the structures.  vn_stat could be split to avoid this,
  * and thus collapse the following to close to zero code.
  */
 void
 cvtnstat(sb, nsb)
 	struct stat *sb;
 	struct nstat *nsb;
 {
 	bzero(nsb, sizeof *nsb);
 	nsb->st_dev = sb->st_dev;
 	nsb->st_ino = sb->st_ino;
 	nsb->st_mode = sb->st_mode;
 	nsb->st_nlink = sb->st_nlink;
 	nsb->st_uid = sb->st_uid;
 	nsb->st_gid = sb->st_gid;
 	nsb->st_rdev = sb->st_rdev;
 	nsb->st_atimespec = sb->st_atimespec;
 	nsb->st_mtimespec = sb->st_mtimespec;
 	nsb->st_ctimespec = sb->st_ctimespec;
 	nsb->st_size = sb->st_size;
 	nsb->st_blocks = sb->st_blocks;
 	nsb->st_blksize = sb->st_blksize;
 	nsb->st_flags = sb->st_flags;
 	nsb->st_gen = sb->st_gen;
 	nsb->st_birthtimespec = sb->st_birthtimespec;
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct nstat_args {
 	char	*path;
 	struct nstat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 nstat(td, uap)
 	struct thread *td;
 	register struct nstat_args /* {
 		char *path;
 		struct nstat *ub;
 	} */ *uap;
 {
 	struct stat sb;
 	struct nstat nsb;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td);
 	vput(nd.ni_vp);
 	if (error)
 		return (error);
 	cvtnstat(&sb, &nsb);
 	error = copyout(&nsb, uap->ub, sizeof (nsb));
 	return (error);
 }
 
 /*
  * NetBSD lstat.  Get file status; this version does not follow links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lstat_args {
 	char	*path;
 	struct stat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 nlstat(td, uap)
 	struct thread *td;
 	register struct nlstat_args /* {
 		char *path;
 		struct nstat *ub;
 	} */ *uap;
 {
 	int error;
 	struct vnode *vp;
 	struct stat sb;
 	struct nstat nsb;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
 	vput(vp);
 	if (error)
 		return (error);
 	cvtnstat(&sb, &nsb);
 	error = copyout(&nsb, uap->ub, sizeof (nsb));
 	return (error);
 }
 
 /*
  * Get configurable pathname variables.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct pathconf_args {
 	char	*path;
 	int	name;
 };
 #endif
 /* ARGSUSED */
 int
 pathconf(td, uap)
 	struct thread *td;
 	register struct pathconf_args /* {
 		char *path;
 		int name;
 	} */ *uap;
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	/* If asynchronous I/O is available, it works for all files. */
 	if (uap->name == _PC_ASYNC_IO)
 		td->td_retval[0] = async_io_version;
 	else
 		error = VOP_PATHCONF(nd.ni_vp, uap->name, td->td_retval);
 	vput(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Return target name of a symbolic link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct readlink_args {
 	char	*path;
 	char	*buf;
 	int	count;
 };
 #endif
 /* ARGSUSED */
 int
 readlink(td, uap)
 	struct thread *td;
 	register struct readlink_args /* {
 		char *path;
 		char *buf;
 		int count;
 	} */ *uap;
 {
 
 	return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf,
 	    UIO_USERSPACE, uap->count));
 }
 
 int
 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf,
     enum uio_seg bufseg, int count)
 {
 	register struct vnode *vp;
 	struct iovec aiov;
 	struct uio auio;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 #ifdef MAC
 	error = mac_check_vnode_readlink(td->td_ucred, vp);
 	if (error) {
 		vput(vp);
 		return (error);
 	}
 #endif
 	if (vp->v_type != VLNK)
 		error = EINVAL;
 	else {
 		aiov.iov_base = buf;
 		aiov.iov_len = count;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = 0;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = bufseg;
 		auio.uio_td = td;
 		auio.uio_resid = count;
 		error = VOP_READLINK(vp, &auio, td->td_ucred);
 	}
 	vput(vp);
 	td->td_retval[0] = count - auio.uio_resid;
 	return (error);
 }
 
 /*
  * Common implementation code for chflags() and fchflags().
  */
 static int
 setfflags(td, vp, flags)
 	struct thread *td;
 	struct vnode *vp;
 	int flags;
 {
 	int error;
 	struct mount *mp;
 	struct vattr vattr;
 
 	/*
 	 * Prevent non-root users from setting flags on devices.  When
 	 * a device is reused, users can retain ownership of the device
 	 * if they are allowed to set flags and programs assume that
 	 * chown can't fail when done as root.
 	 */
 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
 		error = suser_cred(td->td_ucred, PRISON_ROOT);
 		if (error)
 			return (error);
 	}
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	VATTR_NULL(&vattr);
 	vattr.va_flags = flags;
 #ifdef MAC
 	error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags);
 	if (error == 0)
 #endif
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Change flags of a file given a path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chflags_args {
 	char	*path;
 	int	flags;
 };
 #endif
 /* ARGSUSED */
 int
 chflags(td, uap)
 	struct thread *td;
 	register struct chflags_args /* {
 		char *path;
 		int flags;
 	} */ *uap;
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfflags(td, nd.ni_vp, uap->flags);
 	vrele(nd.ni_vp);
 	return error;
 }
 
 /*
  * Same as chflags() but doesn't follow symlinks.
  */
 int
 lchflags(td, uap)
 	struct thread *td;
 	register struct lchflags_args /* {
 		char *path;
 		int flags;
 	} */ *uap;
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfflags(td, nd.ni_vp, uap->flags);
 	vrele(nd.ni_vp);
 	return error;
 }
 
 /*
  * Change flags of a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchflags_args {
 	int	fd;
 	int	flags;
 };
 #endif
 /* ARGSUSED */
 int
 fchflags(td, uap)
 	struct thread *td;
 	register struct fchflags_args /* {
 		int fd;
 		int flags;
 	} */ *uap;
 {
 	struct file *fp;
 	int error;
 
 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
 	error = setfflags(td, fp->f_vnode, uap->flags);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common implementation code for chmod(), lchmod() and fchmod().
  */
 static int
 setfmode(td, vp, mode)
 	struct thread *td;
 	struct vnode *vp;
 	int mode;
 {
 	int error;
 	struct mount *mp;
 	struct vattr vattr;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	VATTR_NULL(&vattr);
 	vattr.va_mode = mode & ALLPERMS;
 #ifdef MAC
 	error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode);
 	if (error == 0)
 #endif
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
 	return error;
 }
 
 /*
  * Change mode of a file given path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chmod_args {
 	char	*path;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 chmod(td, uap)
 	struct thread *td;
 	register struct chmod_args /* {
 		char *path;
 		int mode;
 	} */ *uap;
 {
 
 	return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode));
 }
 
 int
 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode)
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfmode(td, nd.ni_vp, mode);
 	vrele(nd.ni_vp);
 	return error;
 }
 
 /*
  * Change mode of a file given path name (don't follow links.)
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lchmod_args {
 	char	*path;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 lchmod(td, uap)
 	struct thread *td;
 	register struct lchmod_args /* {
 		char *path;
 		int mode;
 	} */ *uap;
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfmode(td, nd.ni_vp, uap->mode);
 	vrele(nd.ni_vp);
 	return error;
 }
 
 /*
  * Change mode of a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchmod_args {
 	int	fd;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 fchmod(td, uap)
 	struct thread *td;
 	register struct fchmod_args /* {
 		int fd;
 		int mode;
 	} */ *uap;
 {
 	struct file *fp;
 	int error;
 
 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
 	error = setfmode(td, fp->f_vnode, uap->mode);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common implementation for chown(), lchown(), and fchown()
  */
 static int
 setfown(td, vp, uid, gid)
 	struct thread *td;
 	struct vnode *vp;
 	uid_t uid;
 	gid_t gid;
 {
 	int error;
 	struct mount *mp;
 	struct vattr vattr;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	VATTR_NULL(&vattr);
 	vattr.va_uid = uid;
 	vattr.va_gid = gid;
 #ifdef MAC
 	error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid,
 	    vattr.va_gid);
 	if (error == 0)
 #endif
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
 	return error;
 }
 
 /*
  * Set ownership given a path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chown_args {
 	char	*path;
 	int	uid;
 	int	gid;
 };
 #endif
 /* ARGSUSED */
 int
 chown(td, uap)
 	struct thread *td;
 	register struct chown_args /* {
 		char *path;
 		int uid;
 		int gid;
 	} */ *uap;
 {
 
 	return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 }
 
 int
 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
     int gid)
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfown(td, nd.ni_vp, uid, gid);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set ownership given a path name, do not cross symlinks.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lchown_args {
 	char	*path;
 	int	uid;
 	int	gid;
 };
 #endif
 /* ARGSUSED */
 int
 lchown(td, uap)
 	struct thread *td;
 	register struct lchown_args /* {
 		char *path;
 		int uid;
 		int gid;
 	} */ *uap;
 {
 
 	return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid));
 }
 
 int
 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid,
     int gid)
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfown(td, nd.ni_vp, uid, gid);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set ownership given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchown_args {
 	int	fd;
 	int	uid;
 	int	gid;
 };
 #endif
 /* ARGSUSED */
 int
 fchown(td, uap)
 	struct thread *td;
 	register struct fchown_args /* {
 		int fd;
 		int uid;
 		int gid;
 	} */ *uap;
 {
 	struct file *fp;
 	int error;
 
 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
 	error = setfown(td, fp->f_vnode, uap->uid, uap->gid);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common implementation code for utimes(), lutimes(), and futimes().
  */
 static int
 getutimes(usrtvp, tvpseg, tsp)
 	const struct timeval *usrtvp;
 	enum uio_seg tvpseg;
 	struct timespec *tsp;
 {
 	struct timeval tv[2];
 	const struct timeval *tvp;
 	int error;
 
 	if (usrtvp == NULL) {
 		microtime(&tv[0]);
 		TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
 		tsp[1] = tsp[0];
 	} else {
 		if (tvpseg == UIO_SYSSPACE) {
 			tvp = usrtvp;
 		} else {
 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 				return (error);
 			tvp = tv;
 		}
 
 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 	}
 	return 0;
 }
 
 /*
  * Common implementation code for utimes(), lutimes(), and futimes().
  */
 static int
 setutimes(td, vp, ts, numtimes, nullflag)
 	struct thread *td;
 	struct vnode *vp;
 	const struct timespec *ts;
 	int numtimes;
 	int nullflag;
 {
 	int error, setbirthtime;
 	struct mount *mp;
 	struct vattr vattr;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	setbirthtime = 0;
 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 &&
 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 		setbirthtime = 1;
 	VATTR_NULL(&vattr);
 	vattr.va_atime = ts[0];
 	vattr.va_mtime = ts[1];
 	if (setbirthtime)
 		vattr.va_birthtime = ts[1];
 	if (numtimes > 2)
 		vattr.va_birthtime = ts[2];
 	if (nullflag)
 		vattr.va_vaflags |= VA_UTIMES_NULL;
 #ifdef MAC
 	error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime,
 	    vattr.va_mtime);
 #endif
 	if (error == 0)
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
 	return error;
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct utimes_args {
 	char	*path;
 	struct	timeval *tptr;
 };
 #endif
 /* ARGSUSED */
 int
 utimes(td, uap)
 	struct thread *td;
 	register struct utimes_args /* {
 		char *path;
 		struct timeval *tptr;
 	} */ *uap;
 {
 
 	return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 	    UIO_USERSPACE));
 }
 
 int
 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
     struct timeval *tptr, enum uio_seg tptrseg)
 {
 	struct timespec ts[2];
 	int error;
 	struct nameidata nd;
 
 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 		return (error);
 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lutimes_args {
 	char	*path;
 	struct	timeval *tptr;
 };
 #endif
 /* ARGSUSED */
 int
 lutimes(td, uap)
 	struct thread *td;
 	register struct lutimes_args /* {
 		char *path;
 		struct timeval *tptr;
 	} */ *uap;
 {
 
 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 	    UIO_USERSPACE));
 }
 
 int
 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg,
     struct timeval *tptr, enum uio_seg tptrseg)
 {
 	struct timespec ts[2];
 	int error;
 	struct nameidata nd;
 
 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 		return (error);
 	NDINIT(&nd, LOOKUP, NOFOLLOW, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct futimes_args {
 	int	fd;
 	struct	timeval *tptr;
 };
 #endif
 /* ARGSUSED */
 int
 futimes(td, uap)
 	struct thread *td;
 	register struct futimes_args /* {
 		int  fd;
 		struct timeval *tptr;
 	} */ *uap;
 {
 
 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 }
 
 int
 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
     enum uio_seg tptrseg)
 {
 	struct timespec ts[2];
 	struct file *fp;
 	int error;
 
 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 		return (error);
 	if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0)
 		return (error);
 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Truncate a file given its path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct truncate_args {
 	char	*path;
 	int	pad;
 	off_t	length;
 };
 #endif
 /* ARGSUSED */
 int
 truncate(td, uap)
 	struct thread *td;
 	register struct truncate_args /* {
 		char *path;
 		int pad;
 		off_t length;
 	} */ *uap;
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 }
 
 int
 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	if (length < 0)
 		return(EINVAL);
 	NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 		vrele(vp);
 		return (error);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	if (vp->v_type == VDIR)
 		error = EISDIR;
 #ifdef MAC
 	else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) {
 	}
 #endif
 	else if ((error = vn_writechk(vp)) == 0 &&
 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 		VATTR_NULL(&vattr);
 		vattr.va_size = length;
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred, td);
 	}
 	vput(vp);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Truncate a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ftruncate_args {
 	int	fd;
 	int	pad;
 	off_t	length;
 };
 #endif
 /* ARGSUSED */
 int
 ftruncate(td, uap)
 	struct thread *td;
 	register struct ftruncate_args /* {
 		int fd;
 		int pad;
 		off_t length;
 	} */ *uap;
 {
 	struct mount *mp;
 	struct vattr vattr;
 	struct vnode *vp;
 	struct file *fp;
 	int error;
 
 	if (uap->length < 0)
 		return(EINVAL);
 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
 	if ((fp->f_flag & FWRITE) == 0) {
 		fdrop(fp, td);
 		return (EINVAL);
 	}
 	vp = fp->f_vnode;
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 		fdrop(fp, td);
 		return (error);
 	}
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	if (vp->v_type == VDIR)
 		error = EISDIR;
 #ifdef MAC
 	else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred,
 	    vp))) {
 	}
 #endif
 	else if ((error = vn_writechk(vp)) == 0) {
 		VATTR_NULL(&vattr);
 		vattr.va_size = uap->length;
 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, td);
 	}
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
 	fdrop(fp, td);
 	return (error);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 /*
  * Truncate a file given its path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct otruncate_args {
 	char	*path;
 	long	length;
 };
 #endif
 /* ARGSUSED */
 int
 otruncate(td, uap)
 	struct thread *td;
 	register struct otruncate_args /* {
 		char *path;
 		long length;
 	} */ *uap;
 {
 	struct truncate_args /* {
 		char *path;
 		int pad;
 		off_t length;
 	} */ nuap;
 
 	nuap.path = uap->path;
 	nuap.length = uap->length;
 	return (truncate(td, &nuap));
 }
 
 /*
  * Truncate a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct oftruncate_args {
 	int	fd;
 	long	length;
 };
 #endif
 /* ARGSUSED */
 int
 oftruncate(td, uap)
 	struct thread *td;
 	register struct oftruncate_args /* {
 		int fd;
 		long length;
 	} */ *uap;
 {
 	struct ftruncate_args /* {
 		int fd;
 		int pad;
 		off_t length;
 	} */ nuap;
 
 	nuap.fd = uap->fd;
 	nuap.length = uap->length;
 	return (ftruncate(td, &nuap));
 }
 #endif /* COMPAT_43 || COMPAT_SUNOS */
 
 /*
  * Sync an open file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fsync_args {
 	int	fd;
 };
 #endif
 /* ARGSUSED */
 int
 fsync(td, uap)
 	struct thread *td;
 	struct fsync_args /* {
 		int fd;
 	} */ *uap;
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct file *fp;
 	vm_object_t obj;
 	int error;
 
 	GIANT_REQUIRED;
 
 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
 	vp = fp->f_vnode;
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 		fdrop(fp, td);
 		return (error);
 	}
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	if (VOP_GETVOBJECT(vp, &obj) == 0) {
 		VM_OBJECT_LOCK(obj);
 		vm_object_page_clean(obj, 0, 0, 0);
 		VM_OBJECT_UNLOCK(obj);
 	}
 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td);
 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)
 	    && softdep_fsync_hook != NULL)
 		error = (*softdep_fsync_hook)(vp);
 
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Rename files.  Source and destination must either both be directories,
  * or both not be directories.  If target is a directory, it must be empty.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rename_args {
 	char	*from;
 	char	*to;
 };
 #endif
 /* ARGSUSED */
 int
 rename(td, uap)
 	struct thread *td;
 	register struct rename_args /* {
 		char *from;
 		char *to;
 	} */ *uap;
 {
 
 	return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE));
 }
 
 int
 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg)
 {
 	struct mount *mp = NULL;
 	struct vnode *tvp, *fvp, *tdvp;
 	struct nameidata fromnd, tond;
 	int error;
 
 	bwillwrite();
 #ifdef MAC
 	NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART, pathseg,
 	    from, td);
 #else
 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, pathseg, from, td);
 #endif
 	if ((error = namei(&fromnd)) != 0)
 		return (error);
 #ifdef MAC
 	error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp,
 	    fromnd.ni_vp, &fromnd.ni_cnd);
 	VOP_UNLOCK(fromnd.ni_dvp, 0, td);
 	VOP_UNLOCK(fromnd.ni_vp, 0, td);
 #endif
 	fvp = fromnd.ni_vp;
 	if (error == 0)
 		error = vn_start_write(fvp, &mp, V_WAIT | PCATCH);
 	if (error != 0) {
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 		goto out1;
 	}
 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
 	    NOOBJ, pathseg, to, td);
 	if (fromnd.ni_vp->v_type == VDIR)
 		tond.ni_cnd.cn_flags |= WILLBEDIR;
 	if ((error = namei(&tond)) != 0) {
 		/* Translate error code for rename("dir1", "dir2/."). */
 		if (error == EISDIR && fvp->v_type == VDIR)
 			error = EINVAL;
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 		goto out1;
 	}
 	tdvp = tond.ni_dvp;
 	tvp = tond.ni_vp;
 	if (tvp != NULL) {
 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 			error = ENOTDIR;
 			goto out;
 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 			error = EISDIR;
 			goto out;
 		}
 	}
 	if (fvp == tdvp)
 		error = EINVAL;
 	/*
 	 * If the source is the same as the destination (that is, if they
 	 * are links to the same vnode), then there is nothing to do.
 	 */
 	if (fvp == tvp)
 		error = -1;
 #ifdef MAC
 	else
 		error = mac_check_vnode_rename_to(td->td_ucred, tdvp,
 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 #endif
 out:
 	if (!error) {
 		VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE);
 		if (fromnd.ni_dvp != tdvp) {
 			VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 		}
 		if (tvp) {
 			VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE);
 		}
 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		NDFREE(&tond, NDF_ONLY_PNBUF);
 	} else {
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		NDFREE(&tond, NDF_ONLY_PNBUF);
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		if (tvp)
 			vput(tvp);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 	}
 	vrele(tond.ni_startdir);
 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
 out1:
 	vn_finished_write(mp);
 	if (fromnd.ni_startdir)
 		vrele(fromnd.ni_startdir);
 	if (error == -1)
 		return (0);
 	return (error);
 }
 
 /*
  * Make a directory file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mkdir_args {
 	char	*path;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 mkdir(td, uap)
 	struct thread *td;
 	register struct mkdir_args /* {
 		char *path;
 		int mode;
 	} */ *uap;
 {
 
 	return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode));
 }
 
 int
 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME, segflg, path, td);
 	nd.ni_cnd.cn_flags |= WILLBEDIR;
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	if (vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vrele(vp);
 		/*
 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
 		 * the strange behaviour of leaving the vnode unlocked
 		 * if the target is the same vnode as the parent.
 		 */
 		if (vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		return (EEXIST);
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VDIR;
 	FILEDESC_LOCK(td->td_proc->p_fd);
 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 	FILEDESC_UNLOCK(td->td_proc->p_fd);
 #ifdef MAC
 	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 	if (error)
 		goto out;
 #endif
 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 #ifdef MAC
 out:
 #endif
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	if (!error)
 		vput(nd.ni_vp);
 	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
 	return (error);
 }
 
 /*
  * Remove a directory file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rmdir_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 rmdir(td, uap)
 	struct thread *td;
 	struct rmdir_args /* {
 		char *path;
 	} */ *uap;
 {
 
 	return (kern_rmdir(td, uap->path, UIO_USERSPACE));
 }
 
 int
 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
 restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	if (vp->v_type != VDIR) {
 		error = ENOTDIR;
 		goto out;
 	}
 	/*
 	 * No rmdir "." please.
 	 */
 	if (nd.ni_dvp == vp) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * The root of a mounted filesystem cannot be deleted.
 	 */
 	if (vp->v_vflag & VV_ROOT) {
 		error = EBUSY;
 		goto out;
 	}
 #ifdef MAC
 	error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp,
 	    &nd.ni_cnd);
 	if (error)
 		goto out;
 #endif
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vput(vp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 	vn_finished_write(mp);
 out:
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (nd.ni_dvp == vp)
 		vrele(nd.ni_dvp);
 	else
 		vput(nd.ni_dvp);
 	vput(vp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
 	return (error);
 }
 
 #ifdef COMPAT_43
 /*
  * Read a block of directory entries in a filesystem independent format.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ogetdirentries_args {
 	int	fd;
 	char	*buf;
 	u_int	count;
 	long	*basep;
 };
 #endif
 int
 ogetdirentries(td, uap)
 	struct thread *td;
 	register struct ogetdirentries_args /* {
 		int fd;
 		char *buf;
 		u_int count;
 		long *basep;
 	} */ *uap;
 {
 	struct vnode *vp;
 	struct file *fp;
 	struct uio auio, kuio;
 	struct iovec aiov, kiov;
 	struct dirent *dp, *edp;
 	caddr_t dirbuf;
 	int error, eofflag, readcnt;
 	long loff;
 
 	/* XXX arbitrary sanity limit on `count'. */
 	if (uap->count > 64 * 1024)
 		return (EINVAL);
 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
 	if ((fp->f_flag & FREAD) == 0) {
 		fdrop(fp, td);
 		return (EBADF);
 	}
 	vp = fp->f_vnode;
 unionread:
 	if (vp->v_type != VDIR) {
 		fdrop(fp, td);
 		return (EINVAL);
 	}
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->count;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_td = td;
 	auio.uio_resid = uap->count;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	loff = auio.uio_offset = fp->f_offset;
 #ifdef MAC
 	error = mac_check_vnode_readdir(td->td_ucred, vp);
 	if (error) {
 		VOP_UNLOCK(vp, 0, td);
 		fdrop(fp, td);
 		return (error);
 	}
 #endif
 #	if (BYTE_ORDER != LITTLE_ENDIAN)
 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 			    NULL, NULL);
 			fp->f_offset = auio.uio_offset;
 		} else
 #	endif
 	{
 		kuio = auio;
 		kuio.uio_iov = &kiov;
 		kuio.uio_segflg = UIO_SYSSPACE;
 		kiov.iov_len = uap->count;
 		MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
 		kiov.iov_base = dirbuf;
 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 			    NULL, NULL);
 		fp->f_offset = kuio.uio_offset;
 		if (error == 0) {
 			readcnt = uap->count - kuio.uio_resid;
 			edp = (struct dirent *)&dirbuf[readcnt];
 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 #				if (BYTE_ORDER == LITTLE_ENDIAN)
 					/*
 					 * The expected low byte of
 					 * dp->d_namlen is our dp->d_type.
 					 * The high MBZ byte of dp->d_namlen
 					 * is our dp->d_namlen.
 					 */
 					dp->d_type = dp->d_namlen;
 					dp->d_namlen = 0;
 #				else
 					/*
 					 * The dp->d_type is the high byte
 					 * of the expected dp->d_namlen,
 					 * so must be zero'ed.
 					 */
 					dp->d_type = 0;
 #				endif
 				if (dp->d_reclen > 0) {
 					dp = (struct dirent *)
 					    ((char *)dp + dp->d_reclen);
 				} else {
 					error = EIO;
 					break;
 				}
 			}
 			if (dp >= edp)
 				error = uiomove(dirbuf, readcnt, &auio);
 		}
 		FREE(dirbuf, M_TEMP);
 	}
 	VOP_UNLOCK(vp, 0, td);
 	if (error) {
 		fdrop(fp, td);
 		return (error);
 	}
 	if (uap->count == auio.uio_resid) {
 		if (union_dircheckp) {
 			error = union_dircheckp(td, &vp, fp);
 			if (error == -1)
 				goto unionread;
 			if (error) {
 				fdrop(fp, td);
 				return (error);
 			}
 		}
 		/*
 		 * XXX We could delay dropping the lock above but
 		 * union_dircheckp complicates things.
 		 */
 		vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td);
 		if ((vp->v_vflag & VV_ROOT) &&
 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
 			struct vnode *tvp = vp;
 			vp = vp->v_mount->mnt_vnodecovered;
 			VREF(vp);
 			fp->f_vnode = vp;
 			fp->f_data = vp;
 			fp->f_offset = 0;
 			vput(tvp);
 			goto unionread;
 		}
 		VOP_UNLOCK(vp, 0, td);
 	}
 	error = copyout(&loff, uap->basep, sizeof(long));
 	fdrop(fp, td);
 	td->td_retval[0] = uap->count - auio.uio_resid;
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 /*
  * Read a block of directory entries in a filesystem independent format.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getdirentries_args {
 	int	fd;
 	char	*buf;
 	u_int	count;
 	long	*basep;
 };
 #endif
 int
 getdirentries(td, uap)
 	struct thread *td;
 	register struct getdirentries_args /* {
 		int fd;
 		char *buf;
 		u_int count;
 		long *basep;
 	} */ *uap;
 {
 	struct vnode *vp;
 	struct file *fp;
 	struct uio auio;
 	struct iovec aiov;
 	long loff;
 	int error, eofflag;
 
 	if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0)
 		return (error);
 	if ((fp->f_flag & FREAD) == 0) {
 		fdrop(fp, td);
 		return (EBADF);
 	}
 	vp = fp->f_vnode;
 unionread:
 	if (vp->v_type != VDIR) {
 		fdrop(fp, td);
 		return (EINVAL);
 	}
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->count;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_td = td;
 	auio.uio_resid = uap->count;
 	/* vn_lock(vp, LK_SHARED | LK_RETRY, td); */
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 	loff = auio.uio_offset = fp->f_offset;
 #ifdef MAC
 	error = mac_check_vnode_readdir(td->td_ucred, vp);
 	if (error == 0)
 #endif
 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 		    NULL);
 	fp->f_offset = auio.uio_offset;
 	VOP_UNLOCK(vp, 0, td);
 	if (error) {
 		fdrop(fp, td);
 		return (error);
 	}
 	if (uap->count == auio.uio_resid) {
 		if (union_dircheckp) {
 			error = union_dircheckp(td, &vp, fp);
 			if (error == -1)
 				goto unionread;
 			if (error) {
 				fdrop(fp, td);
 				return (error);
 			}
 		}
 		/*
 		 * XXX We could delay dropping the lock above but
 		 * union_dircheckp complicates things.
 		 */
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 		if ((vp->v_vflag & VV_ROOT) &&
 		    (vp->v_mount->mnt_flag & MNT_UNION)) {
 			struct vnode *tvp = vp;
 			vp = vp->v_mount->mnt_vnodecovered;
 			VREF(vp);
 			fp->f_vnode = vp;
 			fp->f_data = vp;
 			fp->f_offset = 0;
 			vput(tvp);
 			goto unionread;
 		}
 		VOP_UNLOCK(vp, 0, td);
 	}
 	if (uap->basep != NULL) {
 		error = copyout(&loff, uap->basep, sizeof(long));
 	}
 	td->td_retval[0] = uap->count - auio.uio_resid;
 	fdrop(fp, td);
 	return (error);
 }
 #ifndef _SYS_SYSPROTO_H_
 struct getdents_args {
 	int fd;
 	char *buf;
 	size_t count;
 };
 #endif
 int
 getdents(td, uap)
 	struct thread *td;
 	register struct getdents_args /* {
 		int fd;
 		char *buf;
 		u_int count;
 	} */ *uap;
 {
 	struct getdirentries_args ap;
 	ap.fd = uap->fd;
 	ap.buf = uap->buf;
 	ap.count = uap->count;
 	ap.basep = NULL;
 	return getdirentries(td, &ap);
 }
 
 /*
  * Set the mode mask for creation of filesystem nodes.
  *
  * MP SAFE
  */
 #ifndef _SYS_SYSPROTO_H_
 struct umask_args {
 	int	newmask;
 };
 #endif
 int
 umask(td, uap)
 	struct thread *td;
 	struct umask_args /* {
 		int newmask;
 	} */ *uap;
 {
 	register struct filedesc *fdp;
 
 	FILEDESC_LOCK(td->td_proc->p_fd);
 	fdp = td->td_proc->p_fd;
 	td->td_retval[0] = fdp->fd_cmask;
 	fdp->fd_cmask = uap->newmask & ALLPERMS;
 	FILEDESC_UNLOCK(td->td_proc->p_fd);
 	return (0);
 }
 
 /*
  * Void all references to file by ripping underlying filesystem
  * away from vnode.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct revoke_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 revoke(td, uap)
 	struct thread *td;
 	register struct revoke_args /* {
 		char *path;
 	} */ *uap;
 {
 	struct mount *mp;
 	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (vp->v_type != VCHR) {
 		vput(vp);
 		return (EINVAL);
 	}
 #ifdef MAC
 	error = mac_check_vnode_revoke(td->td_ucred, vp);
 	if (error) {
 		vput(vp);
 		return (error);
 	}
 #endif
 	error = VOP_GETATTR(vp, &vattr, td->td_ucred, td);
 	if (error) {
 		vput(vp);
 		return (error);
 	}
 	VOP_UNLOCK(vp, 0, td);
 	if (td->td_ucred->cr_uid != vattr.va_uid) {
 		error = suser_cred(td->td_ucred, PRISON_ROOT);
 		if (error)
 			goto out;
 	}
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		goto out;
 	if (vcount(vp) > 1)
 		VOP_REVOKE(vp, REVOKEALL);
 	vn_finished_write(mp);
 out:
 	vrele(vp);
 	return (error);
 }
 
 /*
  * Convert a user file descriptor to a kernel file entry.
  * The file entry is locked upon returning.
  */
 int
 getvnode(fdp, fd, fpp)
 	struct filedesc *fdp;
 	int fd;
 	struct file **fpp;
 {
 	int error;
 	struct file *fp;
 
 	fp = NULL;
 	if (fdp == NULL)
 		error = EBADF;
 	else {
 		FILEDESC_LOCK(fdp);
 		if ((u_int)fd >= fdp->fd_nfiles ||
 		    (fp = fdp->fd_ofiles[fd]) == NULL)
 			error = EBADF;
 		else if (fp->f_vnode == NULL) {
 			fp = NULL;
 			error = EINVAL;
 		} else {
 			fhold(fp);
 			error = 0;
 		}
 		FILEDESC_UNLOCK(fdp);
 	}
 	*fpp = fp;
 	return (error);
 }
 
 /*
  * Get (NFS) file handle
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getfh_args {
 	char	*fname;
 	fhandle_t *fhp;
 };
 #endif
 int
 getfh(td, uap)
 	struct thread *td;
 	register struct getfh_args *uap;
 {
 	struct nameidata nd;
 	fhandle_t fh;
 	register struct vnode *vp;
 	int error;
 
 	/*
 	 * Must be super user
 	 */
 	error = suser(td);
 	if (error)
 		return (error);
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td);
 	error = namei(&nd);
 	if (error)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 	bzero(&fh, sizeof(fh));
 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 	error = VFS_VPTOFH(vp, &fh.fh_fid);
 	vput(vp);
 	if (error)
 		return (error);
 	error = copyout(&fh, uap->fhp, sizeof (fh));
 	return (error);
 }
 
 /*
  * syscall for the rpc.lockd to use to translate a NFS file handle into
  * an open descriptor.
  *
  * warning: do not remove the suser() call or this becomes one giant
  * security hole.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fhopen_args {
 	const struct fhandle *u_fhp;
 	int flags;
 };
 #endif
 int
 fhopen(td, uap)
 	struct thread *td;
 	struct fhopen_args /* {
 		const struct fhandle *u_fhp;
 		int flags;
 	} */ *uap;
 {
 	struct proc *p = td->td_proc;
 	struct mount *mp;
 	struct vnode *vp;
 	struct fhandle fhp;
 	struct vattr vat;
 	struct vattr *vap = &vat;
 	struct flock lf;
 	struct file *fp;
 	register struct filedesc *fdp = p->p_fd;
 	int fmode, mode, error, type;
 	struct file *nfp; 
 	int indx;
 
 	/*
 	 * Must be super user
 	 */
 	error = suser(td);
 	if (error)
 		return (error);
 
 	fmode = FFLAGS(uap->flags);
 	/* why not allow a non-read/write open for our lockd? */
 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 		return (EINVAL);
 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 	if (error)
 		return(error);
 	/* find the mount point */
 	mp = vfs_getvfs(&fhp.fh_fsid);
 	if (mp == NULL)
 		return (ESTALE);
 	/* now give me my vnode, it gets returned to me locked */
 	error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp);
 	if (error)
 		return (error);
  	/*
 	 * from now on we have to make sure not
 	 * to forget about the vnode
 	 * any error that causes an abort must vput(vp) 
 	 * just set error = err and 'goto bad;'.
 	 */
 
 	/* 
 	 * from vn_open 
 	 */
 	if (vp->v_type == VLNK) {
 		error = EMLINK;
 		goto bad;
 	}
 	if (vp->v_type == VSOCK) {
 		error = EOPNOTSUPP;
 		goto bad;
 	}
 	mode = 0;
 	if (fmode & (FWRITE | O_TRUNC)) {
 		if (vp->v_type == VDIR) {
 			error = EISDIR;
 			goto bad;
 		}
 		error = vn_writechk(vp);
 		if (error)
 			goto bad;
 		mode |= VWRITE;
 	}
 	if (fmode & FREAD)
 		mode |= VREAD;
 	if (fmode & O_APPEND)
 		mode |= VAPPEND;
 #ifdef MAC
 	error = mac_check_vnode_open(td->td_ucred, vp, mode);
 	if (error)
 		goto bad;
 #endif
 	if (mode) {
 		error = VOP_ACCESS(vp, mode, td->td_ucred, td);
 		if (error)
 			goto bad;
 	}
 	if (fmode & O_TRUNC) {
 		VOP_UNLOCK(vp, 0, td);				/* XXX */
 		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
 			vrele(vp);
 			return (error);
 		}
 		VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);	/* XXX */
 #ifdef MAC
 		/*
 		 * We don't yet have fp->f_cred, so use td->td_ucred, which
 		 * should be right.
 		 */
 		error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp);
 		if (error == 0) {
 #endif
 			VATTR_NULL(vap);
 			vap->va_size = 0;
 			error = VOP_SETATTR(vp, vap, td->td_ucred, td);
 #ifdef MAC
 		}
 #endif
 		vn_finished_write(mp);
 		if (error)
 			goto bad;
 	}
 	error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1);
 	if (error)
 		goto bad;
 	/*
 	 * Make sure that a VM object is created for VMIO support.
 	 */
 	if (vn_canvmio(vp) == TRUE) {
 		if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0)
 			goto bad;
 	}
 	if (fmode & FWRITE)
 		vp->v_writecount++;
 
 	/*
 	 * end of vn_open code 
 	 */
 
 	if ((error = falloc(td, &nfp, &indx)) != 0) {
 		if (fmode & FWRITE)
 			vp->v_writecount--;
 		goto bad;
 	}
 	/* An extra reference on `nfp' has been held for us by falloc(). */
 	fp = nfp;	
 
 	nfp->f_vnode = vp;
 	nfp->f_data = vp;
 	nfp->f_flag = fmode & FMASK;
 	nfp->f_ops = &vnops;
 	nfp->f_type = DTYPE_VNODE;
 	if (fmode & (O_EXLOCK | O_SHLOCK)) {
 		lf.l_whence = SEEK_SET;
 		lf.l_start = 0;
 		lf.l_len = 0;
 		if (fmode & O_EXLOCK)
 			lf.l_type = F_WRLCK;
 		else
 			lf.l_type = F_RDLCK;
 		type = F_FLOCK;
 		if ((fmode & FNONBLOCK) == 0)
 			type |= F_WAIT;
 		VOP_UNLOCK(vp, 0, td);
 		if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 			    type)) != 0) {
 			/*
 			 * The lock request failed.  Normally close the
 			 * descriptor but handle the case where someone might
 			 * have dup()d or close()d it when we weren't looking.
 			 */
 			FILEDESC_LOCK(fdp);
 			if (fdp->fd_ofiles[indx] == fp) {
 				fdp->fd_ofiles[indx] = NULL;
 				FILEDESC_UNLOCK(fdp);
 				fdrop(fp, td);
 			} else
 				FILEDESC_UNLOCK(fdp);
 			/*
 			 * release our private reference
 			 */
 			fdrop(fp, td);
 			return(error);
 		}
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 		fp->f_flag |= FHASLOCK;
 	}
 	if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0))
 		vfs_object_create(vp, td, td->td_ucred);
 
 	VOP_UNLOCK(vp, 0, td);
 	fdrop(fp, td);
 	td->td_retval[0] = indx;
 	return (0);
 
 bad:
 	vput(vp);
 	return (error);
 }
 
 /*
  * Stat an (NFS) file handle.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fhstat_args {
 	struct fhandle *u_fhp;
 	struct stat *sb;
 };
 #endif
 int
 fhstat(td, uap)
 	struct thread *td;
 	register struct fhstat_args /* {
 		struct fhandle *u_fhp;
 		struct stat *sb;
 	} */ *uap;
 {
 	struct stat sb;
 	fhandle_t fh;
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	/*
 	 * Must be super user
 	 */
 	error = suser(td);
 	if (error)
 		return (error);
 	
 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 	if (error)
 		return (error);
 
 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
 		return (ESTALE);
 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
 		return (error);
 	error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td);
 	vput(vp);
 	if (error)
 		return (error);
 	error = copyout(&sb, uap->sb, sizeof(sb));
 	return (error);
 }
 
 /*
  * Implement fstatfs() for (NFS) file handles.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fhstatfs_args {
 	struct fhandle *u_fhp;
 	struct statfs *buf;
 };
 #endif
 int
 fhstatfs(td, uap)
 	struct thread *td;
 	struct fhstatfs_args /* {
 		struct fhandle *u_fhp;
 		struct statfs *buf;
 	} */ *uap;
 {
-	struct statfs *sp;
+	struct statfs *sp, sb;
 	struct mount *mp;
 	struct vnode *vp;
-	struct statfs sb;
 	fhandle_t fh;
 	int error;
 
 	/*
 	 * Must be super user
 	 */
 	error = suser(td);
 	if (error)
 		return (error);
 
 	if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0)
 		return (error);
 
 	if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
 		return (ESTALE);
 	if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp)))
 		return (error);
 	mp = vp->v_mount;
 	sp = &mp->mnt_stat;
 	vput(vp);
 #ifdef MAC
 	error = mac_check_mount_stat(td->td_ucred, mp);
 	if (error)
 		return (error);
 #endif
+	/*
+	 * Set these in case the underlying filesystem fails to do so.
+	 */
+	sp->f_version = STATFS_VERSION;
+	sp->f_namemax = NAME_MAX;
+	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	if ((error = VFS_STATFS(mp, sp, td)) != 0)
 		return (error);
-	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	if (suser(td)) {
 		bcopy(sp, &sb, sizeof(sb));
 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
 		sp = &sb;
 	}
 	return (copyout(sp, uap->buf, sizeof(*sp)));
 }
 
 /*
  * Syscall to push extended attribute configuration information into the
  * VFS.  Accepts a path, which it converts to a mountpoint, as well as
  * a command (int cmd), and attribute name and misc data.  For now, the
  * attribute name is left in userspace for consumption by the VFS_op.
  * It will probably be changed to be copied into sysspace by the
  * syscall in the future, once issues with various consumers of the
  * attribute code have raised their hands.
  *
  * Currently this is used only by UFS Extended Attributes.
  */
 int
 extattrctl(td, uap)
 	struct thread *td;
 	struct extattrctl_args /* {
 		const char *path;
 		int cmd;
 		const char *filename;
 		int attrnamespace;
 		const char *attrname;
 	} */ *uap;
 {
 	struct vnode *filename_vp;
 	struct nameidata nd;
 	struct mount *mp, *mp_writable;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	/*
 	 * uap->attrname is not always defined.  We check again later when we
 	 * invoke the VFS call so as to pass in NULL there if needed.
 	 */
 	if (uap->attrname != NULL) {
 		error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN,
 		    NULL);
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * uap->filename is not always defined.  If it is, grab a vnode lock,
 	 * which VFS_EXTATTRCTL() will later release.
 	 */
 	filename_vp = NULL;
 	if (uap->filename != NULL) {
 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 		    uap->filename, td);
 		error = namei(&nd);
 		if (error)
 			return (error);
 		filename_vp = nd.ni_vp;
 		NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK);
 	}
 
 	/* uap->path is always defined. */
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error) {
 		if (filename_vp != NULL)
 			vput(filename_vp);
 		return (error);
 	}
 	mp = nd.ni_vp->v_mount;
 	error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH);
 	NDFREE(&nd, 0);
 	if (error) {
 		if (filename_vp != NULL)
 			vput(filename_vp);
 		return (error);
 	}
 
 	error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace,
 	    uap->attrname != NULL ? attrname : NULL, td);
 
 	vn_finished_write(mp_writable);
 	/*
 	 * VFS_EXTATTRCTL will have unlocked, but not de-ref'd,
 	 * filename_vp, so vrele it if it is defined.
 	 */
 	if (filename_vp != NULL)
 		vrele(filename_vp);
 	return (error);
 }
 
 /*-
  * Set a named extended attribute on a file or directory
  * 
  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
  *            kernelspace string pointer "attrname", userspace buffer
  *            pointer "data", buffer length "nbytes", thread "td".
  * Returns: 0 on success, an error number otherwise
  * Locks: none
  * References: vp must be a valid reference for the duration of the call
  */
 static int
 extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
     void *data, size_t nbytes, struct thread *td)
 {
 	struct mount *mp;
 	struct uio auio;
 	struct iovec aiov;
 	ssize_t cnt;
 	int error;
 
 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 	if (error)
 		return (error);
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 
 	aiov.iov_base = data;
 	aiov.iov_len = nbytes;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_offset = 0;
 	if (nbytes > INT_MAX) {
 		error = EINVAL;
 		goto done;
 	}
 	auio.uio_resid = nbytes;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_td = td;
 	cnt = nbytes;
 
 #ifdef MAC
 	error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace,
 	    attrname, &auio);
 	if (error)
 		goto done;
 #endif
 
 	error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio,
 	    td->td_ucred, td);
 	cnt -= auio.uio_resid;
 	td->td_retval[0] = cnt;
 
 done:
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
 	return (error);
 }
 
 int
 extattr_set_fd(td, uap)
 	struct thread *td;
 	struct extattr_set_fd_args /* {
 		int fd;
 		int attrnamespace;
 		const char *attrname;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct file *fp;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return (error);
 
 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
 	if (error)
 		return (error);
 
 	error = extattr_set_vp(fp->f_vnode, uap->attrnamespace,
 	    attrname, uap->data, uap->nbytes, td);
 	fdrop(fp, td);
 
 	return (error);
 }
 
 int
 extattr_set_file(td, uap)
 	struct thread *td;
 	struct extattr_set_file_args /* {
 		const char *path;
 		int attrnamespace;
 		const char *attrname;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct nameidata nd;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return (error);
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
 	    uap->data, uap->nbytes, td);
 
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 int
 extattr_set_link(td, uap)
 	struct thread *td;
 	struct extattr_set_link_args /* {
 		const char *path;
 		int attrnamespace;
 		const char *attrname;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct nameidata nd;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return (error);
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname,
 	    uap->data, uap->nbytes, td);
 
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*-
  * Get a named extended attribute on a file or directory
  * 
  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
  *            kernelspace string pointer "attrname", userspace buffer
  *            pointer "data", buffer length "nbytes", thread "td".
  * Returns: 0 on success, an error number otherwise
  * Locks: none
  * References: vp must be a valid reference for the duration of the call
  */
 static int
 extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
     void *data, size_t nbytes, struct thread *td)
 {
 	struct uio auio, *auiop;
 	struct iovec aiov;
 	ssize_t cnt;
 	size_t size, *sizep;
 	int error;
 
 	/*
 	 * XXX: Temporary API compatibility for applications that know
 	 * about this hack ("" means list), but haven't been updated
 	 * for the extattr_list_*() system calls yet.  This will go
 	 * away for FreeBSD 5.3.
 	 */
 	if (strlen(attrname) == 0)
 		return (extattr_list_vp(vp, attrnamespace, data, nbytes, td));
 
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 
 	/*
 	 * Slightly unusual semantics: if the user provides a NULL data
 	 * pointer, they don't want to receive the data, just the
 	 * maximum read length.
 	 */
 	auiop = NULL;
 	sizep = NULL;
 	cnt = 0;
 	if (data != NULL) {
 		aiov.iov_base = data;
 		aiov.iov_len = nbytes;
 		auio.uio_iov = &aiov;
 		auio.uio_offset = 0;
 		if (nbytes > INT_MAX) {
 			error = EINVAL;
 			goto done;
 		}
 		auio.uio_resid = nbytes;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = UIO_USERSPACE;
 		auio.uio_td = td;
 		auiop = &auio;
 		cnt = nbytes;
 	} else
 		sizep = &size;
 
 #ifdef MAC
 	error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace,
 	    attrname, &auio);
 	if (error)
 		goto done;
 #endif
 
 	error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
 	    td->td_ucred, td);
 
 	if (auiop != NULL) {
 		cnt -= auio.uio_resid;
 		td->td_retval[0] = cnt;
 	} else
 		td->td_retval[0] = size;
 
 done:
 	VOP_UNLOCK(vp, 0, td);
 	return (error);
 }
 
 int
 extattr_get_fd(td, uap)
 	struct thread *td;
 	struct extattr_get_fd_args /* {
 		int fd;
 		int attrnamespace;
 		const char *attrname;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct file *fp;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return (error);
 
 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
 	if (error)
 		return (error);
 
 	error = extattr_get_vp(fp->f_vnode, uap->attrnamespace,
 	    attrname, uap->data, uap->nbytes, td);
 
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 extattr_get_file(td, uap)
 	struct thread *td;
 	struct extattr_get_file_args /* {
 		const char *path;
 		int attrnamespace;
 		const char *attrname;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct nameidata nd;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return (error);
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
 	    uap->data, uap->nbytes, td);
 
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 int
 extattr_get_link(td, uap)
 	struct thread *td;
 	struct extattr_get_link_args /* {
 		const char *path;
 		int attrnamespace;
 		const char *attrname;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct nameidata nd;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return (error);
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname,
 	    uap->data, uap->nbytes, td);
 
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * extattr_delete_vp(): Delete a named extended attribute on a file or
  *                      directory
  * 
  * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace",
  *            kernelspace string pointer "attrname", proc "p"
  * Returns: 0 on success, an error number otherwise
  * Locks: none
  * References: vp must be a valid reference for the duration of the call
  */
 static int
 extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
     struct thread *td)
 {
 	struct mount *mp;
 	int error;
 
 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 	if (error)
 		return (error);
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 
 #ifdef MAC
 	error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace,
 	    attrname);
 	if (error)
 		goto done;
 #endif
 
 	error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred,
 	    td);
 	if (error == EOPNOTSUPP)
 		error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
 		    td->td_ucred, td);
 #ifdef MAC
 done:
 #endif
 	VOP_UNLOCK(vp, 0, td);
 	vn_finished_write(mp);
 	return (error);
 }
 
 int
 extattr_delete_fd(td, uap)
 	struct thread *td;
 	struct extattr_delete_fd_args /* {
 		int fd;
 		int attrnamespace;
 		const char *attrname;
 	} */ *uap;
 {
 	struct file *fp;
 	struct vnode *vp;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return (error);
 
 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
 	if (error)
 		return (error);
 	vp = fp->f_vnode;
 
 	error = extattr_delete_vp(vp, uap->attrnamespace, attrname, td);
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 extattr_delete_file(td, uap)
 	struct thread *td;
 	struct extattr_delete_file_args /* {
 		const char *path;
 		int attrnamespace;
 		const char *attrname;
 	} */ *uap;
 {
 	struct nameidata nd;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return(error);
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return(error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
 	vrele(nd.ni_vp);
 	return(error);
 }
 
 int
 extattr_delete_link(td, uap)
 	struct thread *td;
 	struct extattr_delete_link_args /* {
 		const char *path;
 		int attrnamespace;
 		const char *attrname;
 	} */ *uap;
 {
 	struct nameidata nd;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int error;
 
 	error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL);
 	if (error)
 		return(error);
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return(error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td);
 	vrele(nd.ni_vp);
 	return(error);
 }
 
 /*-
  * Retrieve a list of extended attributes on a file or directory.
  *
  * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace",
  *            userspace buffer pointer "data", buffer length "nbytes",
  *            thread "td".
  * Returns: 0 on success, an error number otherwise
  * Locks: none
  * References: vp must be a valid reference for the duration of the call
  */
 static int
 extattr_list_vp(struct vnode *vp, int attrnamespace, void *data,
     size_t nbytes, struct thread *td)
 {
 	struct uio auio, *auiop;
 	size_t size, *sizep;
 	struct iovec aiov;
 	ssize_t cnt;
 	int error;
 
 	VOP_LEASE(vp, td, td->td_ucred, LEASE_READ);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
 
 	auiop = NULL;
 	sizep = NULL;
 	cnt = 0;
 	if (data != NULL) {
 		aiov.iov_base = data;
 		aiov.iov_len = nbytes;
 		auio.uio_iov = &aiov;
 		auio.uio_offset = 0;
 		if (nbytes > INT_MAX) {
 			error = EINVAL;
 			goto done;
 		}
 		auio.uio_resid = nbytes;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = UIO_USERSPACE;
 		auio.uio_td = td;
 		auiop = &auio;
 		cnt = nbytes;
 	} else
 		sizep = &size;
 
 #ifdef MAC
 	error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace);
 	if (error)
 		goto done;
 #endif
 
 	error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep,
 	    td->td_ucred, td);
 
 	if (auiop != NULL) {
 		cnt -= auio.uio_resid;
 		td->td_retval[0] = cnt;
 	} else
 		td->td_retval[0] = size;
 
 done:
 	VOP_UNLOCK(vp, 0, td);
 	return (error);
 }
 
 
 int
 extattr_list_fd(td, uap)
 	struct thread *td;
 	struct extattr_list_fd_args /* {
 		int fd;
 		int attrnamespace;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct file *fp;
 	int error;
 
 	error = getvnode(td->td_proc->p_fd, uap->fd, &fp);
 	if (error)
 		return (error);
 
 	error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data,
 	    uap->nbytes, td);
 
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 extattr_list_file(td, uap)
 	struct thread*td;
 	struct extattr_list_file_args /* {
 		const char *path;
 		int attrnamespace;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
 	    uap->nbytes, td);
 
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 int
 extattr_list_link(td, uap)
 	struct thread*td;
 	struct extattr_list_link_args /* {
 		const char *path;
 		int attrnamespace;
 		void *data;
 		size_t nbytes;
 	} */ *uap;
 {
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data,
 	    uap->nbytes, td);
 
 	vrele(nd.ni_vp);
 	return (error);
 }
 
Index: head/sys/sys/mount.h
===================================================================
--- head/sys/sys/mount.h	(revision 122536)
+++ head/sys/sys/mount.h	(revision 122537)
@@ -1,550 +1,577 @@
 /*
  * Copyright (c) 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)mount.h	8.21 (Berkeley) 5/20/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_MOUNT_H_
 #define _SYS_MOUNT_H_
 
 #include <sys/ucred.h>
 #include <sys/queue.h>
 #ifdef _KERNEL
 #include <sys/lockmgr.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #endif
 
 typedef struct fsid { int32_t val[2]; } fsid_t;	/* filesystem id type */
 
 /*
  * File identifier.
  * These are unique per filesystem on a single machine.
  */
 #define	MAXFIDSZ	16
 
 struct fid {
 	u_short		fid_len;		/* length of data in bytes */
 	u_short		fid_reserved;		/* force longword alignment */
 	char		fid_data[MAXFIDSZ];	/* data (variable length) */
 };
 
 /*
  * filesystem statistics
  */
+#define	MFSNAMELEN	16		/* length of type name including null */
+#define	MNAMELEN	88		/* size of on/from name bufs */
+#define	STATFS_VERSION	0x20030518	/* current version number */
+struct statfs {
+	uint32_t f_version;		/* structure version number */
+	uint32_t f_type;		/* type of filesystem */
+	uint64_t f_flags;		/* copy of mount exported flags */
+	uint64_t f_bsize;		/* filesystem fragment size */
+	uint64_t f_iosize;		/* optimal transfer block size */
+	uint64_t f_blocks;		/* total data blocks in filesystem */
+	uint64_t f_bfree;		/* free blocks in filesystem */
+	int64_t	 f_bavail;		/* free blocks avail to non-superuser */
+	uint64_t f_files;		/* total file nodes in filesystem */
+	int64_t	 f_ffree;		/* free nodes avail to non-superuser */
+	uint64_t f_syncwrites;		/* count of sync writes since mount */
+	uint64_t f_asyncwrites;		/* count of async writes since mount */
+	uint64_t f_syncreads;		/* count of sync reads since mount */
+	uint64_t f_asyncreads;		/* count of async reads since mount */
+	uint64_t f_spare[10];		/* unused spare */
+	uint32_t f_namemax;		/* maximum filename length */
+	uid_t	  f_owner;		/* user that mounted the filesystem */
+	fsid_t	  f_fsid;		/* filesystem id */
+	char	  f_charspare[80];	    /* spare string space */
+	char	  f_fstypename[MFSNAMELEN]; /* filesystem type name */
+	char	  f_mntfromname[MNAMELEN];  /* mounted filesystem */
+	char	  f_mntonname[MNAMELEN];    /* directory on which mounted */
+};
 
-#define	MFSNAMELEN	16	/* length of fs type name, including null */
-#define	MNAMELEN	(88 - 2 * sizeof(long))	/* size of on/from name bufs */
+#ifdef _KERNEL
+#define	OMFSNAMELEN	16	/* length of fs type name, including null */
+#define	OMNAMELEN	(88 - 2 * sizeof(long))	/* size of on/from name bufs */
 
 /* XXX getfsstat.2 is out of date with write and read counter changes here. */
 /* XXX statfs.2 is out of date with read counter changes here. */
-struct statfs {
+struct ostatfs {
 	long	f_spare2;		/* placeholder */
 	long	f_bsize;		/* fundamental filesystem block size */
 	long	f_iosize;		/* optimal transfer block size */
 	long	f_blocks;		/* total data blocks in filesystem */
 	long	f_bfree;		/* free blocks in fs */
 	long	f_bavail;		/* free blocks avail to non-superuser */
 	long	f_files;		/* total file nodes in filesystem */
 	long	f_ffree;		/* free file nodes in fs */
 	fsid_t	f_fsid;			/* filesystem id */
 	uid_t	f_owner;		/* user that mounted the filesystem */
 	int	f_type;			/* type of filesystem */
 	int	f_flags;		/* copy of mount exported flags */
 	long	f_syncwrites;		/* count of sync writes since mount */
 	long	f_asyncwrites;		/* count of async writes since mount */
-	char	f_fstypename[MFSNAMELEN]; /* fs type name */
-	char	f_mntonname[MNAMELEN];	/* directory on which mounted */
+	char	f_fstypename[OMFSNAMELEN]; /* fs type name */
+	char	f_mntonname[OMNAMELEN];	/* directory on which mounted */
 	long	f_syncreads;		/* count of sync reads since mount */
 	long	f_asyncreads;		/* count of async reads since mount */
 	short	f_spares1;		/* unused spare */
-	char	f_mntfromname[MNAMELEN];/* mounted filesystem */
+	char	f_mntfromname[OMNAMELEN];/* mounted filesystem */
 	short	f_spares2;		/* unused spare */
 	/*
 	 * XXX on machines where longs are aligned to 8-byte boundaries, there
 	 * is an unnamed int32_t here.  This spare was after the apparent end
 	 * of the struct until we bit off the read counters from f_mntonname.
 	 */
 	long	f_spare[2];		/* unused spare */
 };
 
-#ifdef _KERNEL
 #define	MMAXOPTIONLEN	65536		/* maximum length of a mount option */
 
 TAILQ_HEAD(vnodelst, vnode);
 TAILQ_HEAD(vfsoptlist, vfsopt);
 struct vfsopt {
 	TAILQ_ENTRY(vfsopt) link;
 	char	*name;
 	void	*value;
 	int	len;
 };
 
 /*
  * Structure per mounted filesystem.  Each mounted filesystem has an
  * array of operations and an instance record.  The filesystems are
  * put on a doubly linked list.
  *
  * NOTE: mnt_nvnodelist and mnt_reservedvnlist.  At the moment vnodes
  * are linked into mnt_nvnodelist.  At some point in the near future the
  * vnode list will be split into a 'dirty' and 'clean' list. mnt_nvnodelist
  * will become the dirty list and mnt_reservedvnlist will become the 'clean'
  * list.  Filesystem kld's syncing code should remain compatible since
  * they only need to scan the dirty vnode list (nvnodelist -> dirtyvnodelist).
  */
 struct mount {
 	TAILQ_ENTRY(mount) mnt_list;		/* mount list */
 	struct vfsops	*mnt_op;		/* operations on fs */
 	struct vfsconf	*mnt_vfc;		/* configuration info */
 	struct vnode	*mnt_vnodecovered;	/* vnode we mounted on */
 	struct vnode	*mnt_syncer;		/* syncer vnode */
 	struct vnodelst	mnt_nvnodelist;		/* list of vnodes this mount */
 	struct vnodelst	mnt_reservedvnlist;	/* (future) dirty vnode list */
 	struct lock	mnt_lock;		/* mount structure lock */
 	struct mtx	mnt_mtx;		/* mount structure interlock */
 	int		mnt_writeopcount;	/* write syscalls in progress */
 	int		mnt_flag;		/* flags shared with user */
 	struct vfsoptlist *mnt_opt;		/* current mount options */
 	struct vfsoptlist *mnt_optnew;		/* new options passed to fs */
 	int		mnt_kern_flag;		/* kernel only flags */
 	int		mnt_maxsymlinklen;	/* max size of short symlink */
 	struct statfs	mnt_stat;		/* cache of filesystem stats */
 	struct ucred	*mnt_cred;		/* credentials of mounter */
 	qaddr_t		mnt_data;		/* private data */
 	time_t		mnt_time;		/* last time written*/
 	int		mnt_iosize_max;		/* max size for clusters, etc */
 	struct netexport *mnt_export;		/* export list */
 	struct label	*mnt_mntlabel;		/* MAC label for the mount */
 	struct label	*mnt_fslabel;		/* MAC label for the fs */
 	int		mnt_nvnodelistsize;	/* # of vnodes on this mount */
 };
 
 
 #define	MNT_ILOCK(mp)	mtx_lock(&(mp)->mnt_mtx)
 #define	MNT_IUNLOCK(mp)	mtx_unlock(&(mp)->mnt_mtx)
 
 #endif /* _KERNEL */
 
 /*
  * User specifiable flags.
  */
 #define	MNT_RDONLY	0x00000001	/* read only filesystem */
 #define	MNT_SYNCHRONOUS	0x00000002	/* filesystem written synchronously */
 #define	MNT_NOEXEC	0x00000004	/* can't exec from filesystem */
 #define	MNT_NOSUID	0x00000008	/* don't honor setuid bits on fs */
 #define	MNT_NODEV	0x00000010	/* don't interpret special files */
 #define	MNT_UNION	0x00000020	/* union with underlying filesystem */
 #define	MNT_ASYNC	0x00000040	/* filesystem written asynchronously */
 #define	MNT_SUIDDIR	0x00100000	/* special handling of SUID on dirs */
 #define	MNT_SOFTDEP	0x00200000	/* soft updates being done */
 #define	MNT_NOSYMFOLLOW	0x00400000	/* do not follow symlinks */
 #define	MNT_JAILDEVFS	0x02000000	/* jail-friendly DEVFS behaviour */
 #define	MNT_MULTILABEL	0x04000000	/* MAC support for individual objects */
 #define	MNT_ACLS	0x08000000	/* ACL support enabled */
 #define	MNT_NOATIME	0x10000000	/* disable update of file access time */
 #define	MNT_NOCLUSTERR	0x40000000	/* disable cluster read */
 #define	MNT_NOCLUSTERW	0x80000000	/* disable cluster write */
 
 /*
  * NFS export related mount flags.
  */
 #define	MNT_EXRDONLY	0x00000080	/* exported read only */
 #define	MNT_EXPORTED	0x00000100	/* filesystem is exported */
 #define	MNT_DEFEXPORTED	0x00000200	/* exported to the world */
 #define	MNT_EXPORTANON	0x00000400	/* use anon uid mapping for everyone */
 #define	MNT_EXKERB	0x00000800	/* exported with Kerberos uid mapping */
 #define	MNT_EXPUBLIC	0x20000000	/* public export (WebNFS) */
 
 /*
  * Flags set by internal operations,
  * but visible to the user.
  * XXX some of these are not quite right.. (I've never seen the root flag set)
  */
 #define	MNT_LOCAL	0x00001000	/* filesystem is stored locally */
 #define	MNT_QUOTA	0x00002000	/* quotas are enabled on filesystem */
 #define	MNT_ROOTFS	0x00004000	/* identifies the root filesystem */
 #define	MNT_USER	0x00008000	/* mounted by a user */
 #define	MNT_IGNORE	0x00800000	/* do not show entry in df */
 
 /*
  * Mask of flags that are visible to statfs().
  * XXX I think that this could now become (~(MNT_CMDFLAGS))
  * but the 'mount' program may need changing to handle this.
  */
 #define	MNT_VISFLAGMASK	(MNT_RDONLY	| MNT_SYNCHRONOUS | MNT_NOEXEC	| \
 			MNT_NOSUID	| MNT_NODEV	| MNT_UNION	| \
 			MNT_ASYNC	| MNT_EXRDONLY	| MNT_EXPORTED	| \
 			MNT_DEFEXPORTED	| MNT_EXPORTANON| MNT_EXKERB	| \
 			MNT_LOCAL	| MNT_USER	| MNT_QUOTA	| \
 			MNT_ROOTFS	| MNT_NOATIME	| MNT_NOCLUSTERR| \
 			MNT_NOCLUSTERW	| MNT_SUIDDIR	| MNT_SOFTDEP	| \
 			MNT_IGNORE	| MNT_EXPUBLIC	| MNT_NOSYMFOLLOW | \
 			MNT_JAILDEVFS	| MNT_MULTILABEL | MNT_ACLS)
 
 /* Mask of flags that can be updated. */
 #define	MNT_UPDATEMASK (MNT_NOSUID	| MNT_NOEXEC	| MNT_NODEV	| \
 			MNT_SYNCHRONOUS	| MNT_UNION	| MNT_ASYNC	| \
 			MNT_NOATIME | \
 			MNT_NOSYMFOLLOW	| MNT_IGNORE	| MNT_JAILDEVFS	| \
 			MNT_NOCLUSTERR	| MNT_NOCLUSTERW | MNT_SUIDDIR	| \
 			MNT_ACLS )
 
 /*
  * External filesystem command modifier flags.
  * Unmount can use the MNT_FORCE flag.
  * XXX These are not STATES and really should be somewhere else.
  */
 #define	MNT_UPDATE	0x00010000	/* not a real mount, just an update */
 #define	MNT_DELEXPORT	0x00020000	/* delete export host lists */
 #define	MNT_RELOAD	0x00040000	/* reload filesystem data */
 #define	MNT_FORCE	0x00080000	/* force unmount or readonly change */
 #define	MNT_SNAPSHOT	0x01000000	/* snapshot the filesystem */
 #define	MNT_BYFSID	0x08000000	/* specify filesystem by ID. */
 #define MNT_CMDFLAGS   (MNT_UPDATE	| MNT_DELEXPORT	| MNT_RELOAD	| \
 			MNT_FORCE	| MNT_SNAPSHOT	| MNT_BYFSID)
 /*
  * Internal filesystem control flags stored in mnt_kern_flag.
  *
  * MNTK_UNMOUNT locks the mount entry so that name lookup cannot proceed
  * past the mount point.  This keeps the subtree stable during mounts
  * and unmounts.
  *
  * MNTK_UNMOUNTF permits filesystems to detect a forced unmount while
  * dounmount() is still waiting to lock the mountpoint. This allows
  * the filesystem to cancel operations that might otherwise deadlock
  * with the unmount attempt (used by NFS).
  */
 #define MNTK_UNMOUNTF	0x00000001	/* forced unmount in progress */
 #define MNTK_UNMOUNT	0x01000000	/* unmount in progress */
 #define	MNTK_MWAIT	0x02000000	/* waiting for unmount to finish */
 #define MNTK_WANTRDWR	0x04000000	/* upgrade to read/write requested */
 #define	MNTK_SUSPEND	0x08000000	/* request write suspension */
 #define	MNTK_SUSPENDED	0x10000000	/* write operations are suspended */
 
 /*
  * Sysctl CTL_VFS definitions.
  *
  * Second level identifier specifies which filesystem. Second level
  * identifier VFS_VFSCONF returns information about all filesystems.
  * Second level identifier VFS_GENERIC is non-terminal.
  */
 #define	VFS_VFSCONF		0	/* get configured filesystems */
 #define	VFS_GENERIC		0	/* generic filesystem information */
 /*
  * Third level identifiers for VFS_GENERIC are given below; third
  * level identifiers for specific filesystems are given in their
  * mount specific header files.
  */
 #define VFS_MAXTYPENUM	1	/* int: highest defined filesystem type */
 #define VFS_CONF	2	/* struct: vfsconf for filesystem given
 				   as next argument */
 
 /*
  * Flags for various system call interfaces.
  *
  * waitfor flags to vfs_sync() and getfsstat()
  */
 #define MNT_WAIT	1	/* synchronously wait for I/O to complete */
 #define MNT_NOWAIT	2	/* start all I/O, but do not wait for it */
 #define MNT_LAZY	3	/* push data not written by filesystem syncer */
 
 /*
  * Generic file handle
  */
 struct fhandle {
 	fsid_t	fh_fsid;	/* Filesystem id of mount point */
 	struct	fid fh_fid;	/* Filesys specific id */
 };
 typedef struct fhandle	fhandle_t;
 
 /*
  * Export arguments for local filesystem mount calls.
  */
 struct export_args {
 	int	ex_flags;		/* export related flags */
 	uid_t	ex_root;		/* mapping for root uid */
 	struct	xucred ex_anon;		/* mapping for anonymous user */
 	struct	sockaddr *ex_addr;	/* net address to which exported */
 	u_char	ex_addrlen;		/* and the net address length */
 	struct	sockaddr *ex_mask;	/* mask of valid bits in saddr */
 	u_char	ex_masklen;		/* and the smask length */
 	char	*ex_indexfile;		/* index file for WebNFS URLs */
 };
 
 /*
  * Structure holding information for a publicly exported filesystem
  * (WebNFS). Currently the specs allow just for one such filesystem.
  */
 struct nfs_public {
 	int		np_valid;	/* Do we hold valid information */
 	fhandle_t	np_handle;	/* Filehandle for pub fs (internal) */
 	struct mount	*np_mount;	/* Mountpoint of exported fs */
 	char		*np_index;	/* Index file */
 };
 
 /*
  * Filesystem configuration information. One of these exists for each
  * type of filesystem supported by the kernel. These are searched at
  * mount time to identify the requested filesystem.
  */
 struct vfsconf {
 	struct	vfsops *vfc_vfsops;	/* filesystem operations vector */
 	char	vfc_name[MFSNAMELEN];	/* filesystem type name */
 	int	vfc_typenum;		/* historic filesystem type number */
 	int	vfc_refcount;		/* number mounted of this type */
 	int	vfc_flags;		/* permanent flags */
 	struct	vfsoptdecl *vfc_opts;	/* mount options */
 	struct	vfsconf *vfc_next;	/* next in list */
 };
 
 /* Userland version of the struct vfsconf. */
 struct xvfsconf {
 	struct	vfsops *vfc_vfsops;	/* filesystem operations vector */
 	char	vfc_name[MFSNAMELEN];	/* filesystem type name */
 	int	vfc_typenum;		/* historic filesystem type number */
 	int	vfc_refcount;		/* number mounted of this type */
 	int	vfc_flags;		/* permanent flags */
 	struct	vfsconf *vfc_next;	/* next in list */
 };
 
 struct ovfsconf {
 	void	*vfc_vfsops;
 	char	vfc_name[32];
 	int	vfc_index;
 	int	vfc_refcount;
 	int	vfc_flags;
 };
 
 /*
  * NB: these flags refer to IMPLEMENTATION properties, not properties of
  * any actual mounts; i.e., it does not make sense to change the flags.
  */
 #define	VFCF_STATIC	0x00010000	/* statically compiled into kernel */
 #define	VFCF_NETWORK	0x00020000	/* may get data over the network */
 #define	VFCF_READONLY	0x00040000	/* writes are not implemented */
 #define VFCF_SYNTHETIC	0x00080000	/* data does not represent real files */
 #define	VFCF_LOOPBACK	0x00100000	/* aliases some other mounted FS */
 #define	VFCF_UNICODE	0x00200000	/* stores file names as Unicode*/
 
 struct iovec;
 struct uio;
 
 #ifdef _KERNEL
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_MOUNT);
 #endif
 extern int maxvfsconf;		/* highest defined filesystem type */
 extern int nfs_mount_type;	/* vfc_typenum for nfs, or -1 */
 extern struct vfsconf *vfsconf;	/* head of list of filesystem types */
 
 /*
  * Operations supported on mounted filesystem.
  */
 struct mount_args;
 struct nameidata;
 
 typedef int vfs_mount_t(struct mount *mp, char *path, caddr_t data,
 			struct nameidata *ndp, struct thread *td);
 typedef int vfs_start_t(struct mount *mp, int flags, struct thread *td);
 typedef int vfs_unmount_t(struct mount *mp, int mntflags, struct thread *td);
 typedef int vfs_root_t(struct mount *mp, struct vnode **vpp);
 typedef	int vfs_quotactl_t(struct mount *mp, int cmds, uid_t uid,
 		    caddr_t arg, struct thread *td);
 typedef	int vfs_statfs_t(struct mount *mp, struct statfs *sbp,
 		    struct thread *td);
 typedef	int vfs_sync_t(struct mount *mp, int waitfor, struct ucred *cred,
 		    struct thread *td);
 typedef	int vfs_vget_t(struct mount *mp, ino_t ino, int flags,
 		    struct vnode **vpp);
 typedef	int vfs_fhtovp_t(struct mount *mp, struct fid *fhp, struct vnode **vpp);
 typedef	int vfs_checkexp_t(struct mount *mp, struct sockaddr *nam,
 		    int *extflagsp, struct ucred **credanonp);
 typedef	int vfs_vptofh_t(struct vnode *vp, struct fid *fhp);
 typedef	int vfs_init_t(struct vfsconf *);
 typedef	int vfs_uninit_t(struct vfsconf *);
 typedef	int vfs_extattrctl_t(struct mount *mp, int cmd,
 		    struct vnode *filename_vp, int attrnamespace,
 		    const char *attrname, struct thread *td);
 typedef	int vfs_nmount_t(struct mount *mp, struct nameidata *ndp,
 		    struct thread *td);
 
 struct vfsops {
 	vfs_mount_t		*vfs_mount;
 	vfs_start_t		*vfs_start;
 	vfs_unmount_t		*vfs_unmount;
 	vfs_root_t		*vfs_root;
 	vfs_quotactl_t		*vfs_quotactl;
 	vfs_statfs_t		*vfs_statfs;
 	vfs_sync_t		*vfs_sync;
 	vfs_vget_t		*vfs_vget;
 	vfs_fhtovp_t		*vfs_fhtovp;
 	vfs_checkexp_t		*vfs_checkexp;
 	vfs_vptofh_t		*vfs_vptofh;
 	vfs_init_t		*vfs_init;
 	vfs_uninit_t		*vfs_uninit;
 	vfs_extattrctl_t	*vfs_extattrctl;
 	/* Additions below are not binary compatible with 5.0 and below. */
 	vfs_nmount_t		*vfs_nmount;
 };
 
 #define VFS_NMOUNT(MP, NDP, P)    (*(MP)->mnt_op->vfs_nmount)(MP, NDP, P)
 #define VFS_MOUNT(MP, PATH, DATA, NDP, P) \
 	(*(MP)->mnt_op->vfs_mount)(MP, PATH, DATA, NDP, P)
 #define VFS_START(MP, FLAGS, P)	  (*(MP)->mnt_op->vfs_start)(MP, FLAGS, P)
 #define VFS_UNMOUNT(MP, FORCE, P) (*(MP)->mnt_op->vfs_unmount)(MP, FORCE, P)
 #define VFS_ROOT(MP, VPP)	  (*(MP)->mnt_op->vfs_root)(MP, VPP)
 #define VFS_QUOTACTL(MP,C,U,A,P)  (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A, P)
 #define VFS_STATFS(MP, SBP, P)	  (*(MP)->mnt_op->vfs_statfs)(MP, SBP, P)
 #define VFS_SYNC(MP, WAIT, C, P)  (*(MP)->mnt_op->vfs_sync)(MP, WAIT, C, P)
 #define VFS_VGET(MP, INO, FLAGS, VPP) \
 	(*(MP)->mnt_op->vfs_vget)(MP, INO, FLAGS, VPP)
 #define VFS_FHTOVP(MP, FIDP, VPP) \
 	(*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, VPP)
 #define	VFS_VPTOFH(VP, FIDP)	  (*(VP)->v_mount->mnt_op->vfs_vptofh)(VP, FIDP)
 #define VFS_CHECKEXP(MP, NAM, EXFLG, CRED) \
 	(*(MP)->mnt_op->vfs_checkexp)(MP, NAM, EXFLG, CRED)
 #define VFS_EXTATTRCTL(MP, C, FN, NS, N, P) \
 	(*(MP)->mnt_op->vfs_extattrctl)(MP, C, FN, NS, N, P)
 
 #include <sys/module.h>
 
 #define VFS_SET(vfsops, fsname, flags) \
 	static struct vfsconf fsname ## _vfsconf = {		\
 		&vfsops,					\
 		#fsname,					\
 		-1,						\
 		0,						\
 		flags						\
 	};							\
 	static moduledata_t fsname ## _mod = {			\
 		#fsname,					\
 		vfs_modevent,					\
 		& fsname ## _vfsconf				\
 	};							\
 	DECLARE_MODULE(fsname, fsname ## _mod, SI_SUB_VFS, SI_ORDER_MIDDLE)
 
 extern	char *mountrootfsname;
 
 /*
  * exported vnode operations
  */
 int	dounmount(struct mount *, int, struct thread *td);
 int	kernel_mount(struct iovec *iovp, unsigned int iovcnt, int flags);
 int	kernel_vmount(int flags, ...);
 int	vfs_getopt(struct vfsoptlist *, const char *, void **, int *);
 int	vfs_copyopt(struct vfsoptlist *, const char *, void *, int);
 int	vfs_mount(struct thread *td, const char *type, char *path,
 	    int flags, void *data);
 int	vfs_setpublicfs			    /* set publicly exported fs */
 	    (struct mount *, struct netexport *, struct export_args *);
 int	vfs_lock(struct mount *);         /* lock a vfs */
 void	vfs_msync(struct mount *, int);
 void	vfs_unlock(struct mount *);       /* unlock a vfs */
 int	vfs_busy(struct mount *, int, struct mtx *, struct thread *td);
 int	vfs_export			 /* process mount export info */
 	    (struct mount *, struct export_args *);
 struct	netcred *vfs_export_lookup	    /* lookup host in fs export list */
 	    (struct mount *, struct sockaddr *);
 int	vfs_allocate_syncvnode(struct mount *);
 void	vfs_getnewfsid(struct mount *);
 dev_t	vfs_getrootfsid(struct mount *);
 struct	mount *vfs_getvfs(fsid_t *);      /* return vfs given fsid */
 int	vfs_modevent(module_t, int, void *);
 int	vfs_mountedon(struct vnode *);    /* is a vfs mounted on vp */
 void	vfs_mountroot(void);			/* mount our root filesystem */
 int	vfs_rootmountalloc(char *, char *, struct mount **);
 void	vfs_mount_destroy(struct mount *, struct thread *);
 void	vfs_unbusy(struct mount *, struct thread *td);
 void	vfs_unmountall(void);
 int	vfs_register(struct vfsconf *);
 int	vfs_unregister(struct vfsconf *);
 extern	TAILQ_HEAD(mntlist, mount) mountlist;	/* mounted filesystem list */
 extern	struct mtx mountlist_mtx;
 extern	struct nfs_public nfs_pub;
 
 /*
  * Declarations for these vfs default operations are located in
  * kern/vfs_default.c, they should be used instead of making "dummy"
  * functions or casting entries in the VFS op table to "enopnotsupp()".
  */
 vfs_start_t		vfs_stdstart;
 vfs_root_t		vfs_stdroot;
 vfs_quotactl_t		vfs_stdquotactl;
 vfs_statfs_t		vfs_stdstatfs;
 vfs_sync_t		vfs_stdsync;
 vfs_sync_t		vfs_stdnosync;
 vfs_vget_t		vfs_stdvget;
 vfs_fhtovp_t		vfs_stdfhtovp;
 vfs_checkexp_t		vfs_stdcheckexp;
 vfs_vptofh_t		vfs_stdvptofh;
 vfs_init_t		vfs_stdinit;
 vfs_uninit_t		vfs_stduninit;
 vfs_extattrctl_t	vfs_stdextattrctl;
 
 /* XXX - these should be indirect functions!!! */
 int	softdep_fsync(struct vnode *);
 int	softdep_process_worklist(struct mount *);
 
 #else /* !_KERNEL */
 
 #include <sys/cdefs.h>
 
 struct stat;
 
 __BEGIN_DECLS
 int	fhopen(const struct fhandle *, int);
 int	fhstat(const struct fhandle *, struct stat *);
 int	fhstatfs(const struct fhandle *, struct statfs *);
 int	fstatfs(int, struct statfs *);
 int	getfh(const char *, fhandle_t *);
 int	getfsstat(struct statfs *, long, int);
 int	getmntinfo(struct statfs **, int);
 int	mount(const char *, const char *, int, void *);
 int	nmount(struct iovec *, u_int, int);
 int	statfs(const char *, struct statfs *);
 int	unmount(const char *, int);
 
 /* C library stuff */
 void	endvfsent(void);
 struct	ovfsconf *getvfsbytype(int);
 struct	ovfsconf *getvfsent(void);
 int	getvfsbyname(const char *, struct xvfsconf *);
 void	setvfsent(int);
 int	vfsisloadable(const char *);
 int	vfsload(const char *);
 __END_DECLS
 
 #endif /* _KERNEL */
 
 #endif /* !_SYS_MOUNT_H_ */
Index: head/sys/ufs/ffs/ffs_vfsops.c
===================================================================
--- head/sys/ufs/ffs/ffs_vfsops.c	(revision 122536)
+++ head/sys/ufs/ffs/ffs_vfsops.c	(revision 122537)
@@ -1,1549 +1,1560 @@
 /*
  * Copyright (c) 1989, 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_mac.h"
 #include "opt_quota.h"
 #include "opt_ufs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/mac.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/disk.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
 #include <vm/vm.h>
 #include <vm/uma.h>
 #include <vm/vm_page.h>
 
 uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
 
 static int	ffs_sbupdate(struct ufsmount *, int);
        int	ffs_reload(struct mount *,struct ucred *,struct thread *);
 static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
 		    ufs2_daddr_t);
 static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
 static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
 static vfs_init_t ffs_init;
 static vfs_uninit_t ffs_uninit;
 static vfs_extattrctl_t ffs_extattrctl;
 
 static struct vfsops ufs_vfsops = {
 	.vfs_extattrctl =	ffs_extattrctl,
 	.vfs_fhtovp =		ffs_fhtovp,
 	.vfs_init =		ffs_init,
 	.vfs_mount =		ffs_mount,
 	.vfs_quotactl =		ufs_quotactl,
 	.vfs_root =		ufs_root,
 	.vfs_start =		ufs_start,
 	.vfs_statfs =		ffs_statfs,
 	.vfs_sync =		ffs_sync,
 	.vfs_uninit =		ffs_uninit,
 	.vfs_unmount =		ffs_unmount,
 	.vfs_vget =		ffs_vget,
 	.vfs_vptofh =		ffs_vptofh,
 };
 
 VFS_SET(ufs_vfsops, ufs, 0);
 
 /*
  * ffs_mount
  *
  * Called when mounting local physical media
  *
  * PARAMETERS:
  *		mountroot
  *			mp	mount point structure
  *			path	NULL (flag for root mount!!!)
  *			data	<unused>
  *			ndp	<unused>
  *			p	process (user credentials check [statfs])
  *
  *		mount
  *			mp	mount point structure
  *			path	path to mount point
  *			data	pointer to argument struct in user space
  *			ndp	mount point namei() return (used for
  *				credentials on reload), reused to look
  *				up block device.
  *			p	process (user credentials check)
  *
  * RETURNS:	0	Success
  *		!0	error number (errno.h)
  *
  * LOCK STATE:
  *
  *		ENTRY
  *			mount point is locked
  *		EXIT
  *			mount point is locked
  *
  * NOTES:
  *		A NULL path can be used for a flag since the mount
  *		system call will fail with EFAULT in copyinstr in
  *		namei() if it is a genuine NULL from the user.
  */
 int
 ffs_mount(mp, path, data, ndp, td)
         struct mount		*mp;	/* mount struct pointer*/
         char			*path;	/* path to mount point*/
         caddr_t			data;	/* arguments to FS specific mount*/
         struct nameidata	*ndp;	/* mount point credentials*/
         struct thread		*td;	/* process requesting mount*/
 {
 	size_t size;
 	struct vnode *devvp;
 	struct ufs_args args;
 	struct ufsmount *ump = 0;
 	struct fs *fs;
 	int error, flags;
 	mode_t accessmode;
 
 	if (uma_inode == NULL) {
 		uma_inode = uma_zcreate("FFS inode",
 		    sizeof(struct inode), NULL, NULL, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		uma_ufs1 = uma_zcreate("FFS1 dinode",
 		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		uma_ufs2 = uma_zcreate("FFS2 dinode",
 		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 	}
 	/*
 	 * Use NULL path to indicate we are mounting the root filesystem.
 	 */
 	if (path == NULL) {
 		if ((error = bdevvp(rootdev, &rootvp))) {
 			printf("ffs_mountroot: can't find rootvp\n");
 			return (error);
 		}
 
 		if ((error = ffs_mountfs(rootvp, mp, td)) != 0)
 			return (error);
 		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
 		return (0);
 	}
 
 	/*
 	 * Mounting non-root filesystem or updating a filesystem
 	 */
 	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
 		return (error);
 
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		ump = VFSTOUFS(mp);
 		fs = ump->um_fs;
 		devvp = ump->um_devvp;
 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 				return (error);
 			/*
 			 * Flush any dirty data.
 			 */
 			if ((error = VFS_SYNC(mp, MNT_WAIT,
 			    td->td_ucred, td)) != 0) {
 				vn_finished_write(mp);
 				return (error);
 			}
 			/*
 			 * Check for and optionally get rid of files open
 			 * for writing.
 			 */
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			if (mp->mnt_flag & MNT_SOFTDEP) {
 				error = softdep_flushfiles(mp, flags, td);
 			} else {
 				error = ffs_flushfiles(mp, flags, td);
 			}
 			if (error) {
 				vn_finished_write(mp);
 				return (error);
 			}
 			if (fs->fs_pendingblocks != 0 ||
 			    fs->fs_pendinginodes != 0) {
 				printf("%s: %s: blocks %jd files %d\n",
 				    fs->fs_fsmnt, "update error",
 				    (intmax_t)fs->fs_pendingblocks,
 				    fs->fs_pendinginodes);
 				fs->fs_pendingblocks = 0;
 				fs->fs_pendinginodes = 0;
 			}
 			fs->fs_ronly = 1;
 			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
 				fs->fs_clean = 1;
 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
 				fs->fs_ronly = 0;
 				fs->fs_clean = 0;
 				vn_finished_write(mp);
 				return (error);
 			}
 			vn_finished_write(mp);
 		}
 		if ((mp->mnt_flag & MNT_RELOAD) &&
 		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0)
 			return (error);
 		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
 			 */
 			if (suser(td)) {
 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
 				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
 				    td->td_ucred, td)) != 0) {
 					VOP_UNLOCK(devvp, 0, td);
 					return (error);
 				}
 				VOP_UNLOCK(devvp, 0, td);
 			}
 			fs->fs_flags &= ~FS_UNCLEAN;
 			if (fs->fs_clean == 0) {
 				fs->fs_flags |= FS_UNCLEAN;
 				if ((mp->mnt_flag & MNT_FORCE) ||
 				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
 				     (fs->fs_flags & FS_DOSOFTDEP))) {
 					printf("WARNING: %s was not %s\n",
 					   fs->fs_fsmnt, "properly dismounted");
 				} else {
 					printf(
 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 					    fs->fs_fsmnt);
 					return (EPERM);
 				}
 			}
 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 				return (error);
 			fs->fs_ronly = 0;
 			fs->fs_clean = 0;
 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
 				vn_finished_write(mp);
 				return (error);
 			}
 			/* check to see if we need to start softdep */
 			if ((fs->fs_flags & FS_DOSOFTDEP) &&
 			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
 				vn_finished_write(mp);
 				return (error);
 			}
 			if (fs->fs_snapinum[0] != 0)
 				ffs_snapshot_mount(mp);
 			vn_finished_write(mp);
 		}
 		/*
 		 * Soft updates is incompatible with "async",
 		 * so if we are doing softupdates stop the user
 		 * from setting the async flag in an update.
 		 * Softdep_mount() clears it in an initial mount 
 		 * or ro->rw remount.
 		 */
 		if (mp->mnt_flag & MNT_SOFTDEP)
 			mp->mnt_flag &= ~MNT_ASYNC;
 		/*
 		 * If not updating name, process export requests.
 		 */
 		if (args.fspec == 0)
 			return (vfs_export(mp, &args.export));
 		/*
 		 * If this is a snapshot request, take the snapshot.
 		 */
 		if (mp->mnt_flag & MNT_SNAPSHOT)
 			return (ffs_snapshot(mp, args.fspec));
 	}
 
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
 	if ((error = namei(ndp)) != 0)
 		return (error);
 	NDFREE(ndp, NDF_ONLY_PNBUF);
 	devvp = ndp->ni_vp;
 	if (!vn_isdisk(devvp, &error)) {
 		vrele(devvp);
 		return (error);
 	}
 
 	/*
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
 	 */
 	if (suser(td)) {
 		accessmode = VREAD;
 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
 			accessmode |= VWRITE;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
 		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
 			vput(devvp);
 			return (error);
 		}
 		VOP_UNLOCK(devvp, 0, td);
 	}
 
 	if (mp->mnt_flag & MNT_UPDATE) {
 		/*
 		 * Update only
 		 *
 		 * If it's not the same vnode, or at least the same device
 		 * then it's not correct.
 		 */
 
 		if (devvp != ump->um_devvp &&
 		    devvp->v_rdev != ump->um_devvp->v_rdev)
 			error = EINVAL;	/* needs translation */
 		vrele(devvp);
 		if (error)
 			return (error);
 	} else {
 		/*
 		 * New mount
 		 *
 		 * We need the name for the mount point (also used for
 		 * "last mounted on") copied in. If an error occurs,
 		 * the mount point is discarded by the upper level code.
 		 * Note that vfs_mount() populates f_mntonname for us.
 		 */
 		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
 			vrele(devvp);
 			return (error);
 		}
 	}
 	/*
 	 * Save "mounted from" device name info for mount point (NULL pad).
 	 */
 	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
 	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	/*
 	 * Initialize filesystem stat information in mount struct.
 	 */
 	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
 	return (0);
 }
 
 /*
  * Reload all incore data for a filesystem (used after running fsck on
  * the root filesystem and finding things to fix). The filesystem must
  * be mounted read-only.
  *
  * Things to do to update the mount:
  *	1) invalidate all cached meta-data.
  *	2) re-read superblock from disk.
  *	3) re-read summary information from disk.
  *	4) invalidate all inactive vnodes.
  *	5) invalidate all cached file data.
  *	6) re-read inode data for all active vnodes.
  */
 int
 ffs_reload(mp, cred, td)
 	struct mount *mp;
 	struct ucred *cred;
 	struct thread *td;
 {
 	struct vnode *vp, *nvp, *devvp;
 	struct inode *ip;
 	void *space;
 	struct buf *bp;
 	struct fs *fs, *newfs;
 	ufs2_daddr_t sblockloc;
 	int i, blks, size, error;
 	int32_t *lp;
 
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		return (EINVAL);
 	/*
 	 * Step 1: invalidate all cached meta-data.
 	 */
 	devvp = VFSTOUFS(mp)->um_devvp;
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
 	error = vinvalbuf(devvp, 0, cred, td, 0, 0);
 	VOP_UNLOCK(devvp, 0, td);
 	if (error)
 		panic("ffs_reload: dirty1");
 
 	/*
 	 * Only VMIO the backing device if the backing device is a real
 	 * block device.
 	 */
 	if (vn_isdisk(devvp, NULL)) {
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
 		vfs_object_create(devvp, td, td->td_ucred);
 		VOP_UNLOCK(devvp, 0, td);
 	}
 
 	/*
 	 * Step 2: re-read superblock from disk.
 	 */
 	fs = VFSTOUFS(mp)->um_fs;
 	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
 	    NOCRED, &bp)) != 0)
 		return (error);
 	newfs = (struct fs *)bp->b_data;
 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
 	     newfs->fs_magic != FS_UFS2_MAGIC) ||
 	    newfs->fs_bsize > MAXBSIZE ||
 	    newfs->fs_bsize < sizeof(struct fs)) {
 			brelse(bp);
 			return (EIO);		/* XXX needs translation */
 	}
 	/*
 	 * Copy pointer fields back into superblock before copying in	XXX
 	 * new superblock. These should really be in the ufsmount.	XXX
 	 * Note that important parameters (eg fs_ncg) are unchanged.
 	 */
 	newfs->fs_csp = fs->fs_csp;
 	newfs->fs_maxcluster = fs->fs_maxcluster;
 	newfs->fs_contigdirs = fs->fs_contigdirs;
 	newfs->fs_active = fs->fs_active;
 	sblockloc = fs->fs_sblockloc;
 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 	brelse(bp);
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 		printf("%s: reload pending error: blocks %jd files %d\n",
 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 		    fs->fs_pendinginodes);
 		fs->fs_pendingblocks = 0;
 		fs->fs_pendinginodes = 0;
 	}
 
 	/*
 	 * Step 3: re-read summary information from disk.
 	 */
 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
 	space = fs->fs_csp;
 	for (i = 0; i < blks; i += fs->fs_frag) {
 		size = fs->fs_bsize;
 		if (i + fs->fs_frag > blks)
 			size = (blks - i) * fs->fs_fsize;
 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 		    NOCRED, &bp);
 		if (error)
 			return (error);
 		bcopy(bp->b_data, space, (u_int)size);
 		space = (char *)space + size;
 		brelse(bp);
 	}
 	/*
 	 * We no longer know anything about clusters per cylinder group.
 	 */
 	if (fs->fs_contigsumsize > 0) {
 		lp = fs->fs_maxcluster;
 		for (i = 0; i < fs->fs_ncg; i++)
 			*lp++ = fs->fs_contigsumsize;
 	}
 
 loop:
 	MNT_ILOCK(mp);
 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
 		if (vp->v_mount != mp) {
 			MNT_IUNLOCK(mp);
 			goto loop;
 		}
 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
 		VI_LOCK(vp);
 		if (vp->v_iflag & VI_XLOCK) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		MNT_IUNLOCK(mp);
 		/*
 		 * Step 4: invalidate all inactive vnodes.
 		 */
 		if (vp->v_usecount == 0) {
 			vgonel(vp, td);
 			goto loop;
 		}
 		/*
 		 * Step 5: invalidate all cached file data.
 		 */
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
 			goto loop;
 		}
 		if (vinvalbuf(vp, 0, cred, td, 0, 0))
 			panic("ffs_reload: dirty2");
 		/*
 		 * Step 6: re-read inode data for all active vnodes.
 		 */
 		ip = VTOI(vp);
 		error =
 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 		    (int)fs->fs_bsize, NOCRED, &bp);
 		if (error) {
 			VOP_UNLOCK(vp, 0, td);
 			vrele(vp);
 			return (error);
 		}
 		ffs_load_inode(bp, ip, fs, ip->i_number);
 		ip->i_effnlink = ip->i_nlink;
 		brelse(bp);
 		VOP_UNLOCK(vp, 0, td);
 		vrele(vp);
 		MNT_ILOCK(mp);
 	}
 	MNT_IUNLOCK(mp);
 	return (0);
 }
 
 /*
  * Possible superblock locations ordered from most to least likely.
  */
 static int sblock_try[] = SBLOCKSEARCH;
 
 /*
  * Common code for mount and mountroot
  */
 static int
 ffs_mountfs(devvp, mp, td)
 	struct vnode *devvp;
 	struct mount *mp;
 	struct thread *td;
 {
 	struct ufsmount *ump;
 	struct buf *bp;
 	struct fs *fs;
 	dev_t dev;
 	void *space;
 	ufs2_daddr_t sblockloc;
 	int error, i, blks, size, ronly;
 	int32_t *lp;
 	struct ucred *cred;
 	size_t strsize;
 	int ncount;
 
 	dev = devvp->v_rdev;
 	cred = td ? td->td_ucred : NOCRED;
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
 	 * (except for root, which might share swap device for miniroot).
 	 * Flush out any old buffers remaining from a previous use.
 	 */
 	error = vfs_mountedon(devvp);
 	if (error)
 		return (error);
 	ncount = vcount(devvp);
 
 	if (ncount > 1 && devvp != rootvp)
 		return (EBUSY);
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
 	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
 	VOP_UNLOCK(devvp, 0, td);
 	if (error)
 		return (error);
 
 	/*
 	 * Only VMIO the backing device if the backing device is a real
 	 * block device.
 	 * Note that it is optional that the backing device be VMIOed.  This
 	 * increases the opportunity for metadata caching.
 	 */
 	if (vn_isdisk(devvp, NULL)) {
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
 		vfs_object_create(devvp, td, cred);
 		VOP_UNLOCK(devvp, 0, td);
 	}
 
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
 	/*
 	 * XXX: We don't re-VOP_OPEN in FREAD|FWRITE mode if the filesystem
 	 * XXX: is subsequently remounted, so open it FREAD|FWRITE from the
 	 * XXX: start to avoid getting trashed later on.
 	 */
 #ifdef notyet
 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td, -1);
 #else
 	error = VOP_OPEN(devvp, FREAD|FWRITE, FSCRED, td, -1);
 #endif
 	VOP_UNLOCK(devvp, 0, td);
 	if (error)
 		return (error);
 	if (devvp->v_rdev->si_iosize_max != 0)
 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
 	if (mp->mnt_iosize_max > MAXPHYS)
 		mp->mnt_iosize_max = MAXPHYS;
 
 	bp = NULL;
 	ump = NULL;
 	fs = NULL;
 	sblockloc = 0;
 	/*
 	 * Try reading the superblock in each of its possible locations.
 	 */
 	for (i = 0; sblock_try[i] != -1; i++) {
 		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
 		    cred, &bp)) != 0)
 			goto out;
 		fs = (struct fs *)bp->b_data;
 		sblockloc = sblock_try[i];
 		if ((fs->fs_magic == FS_UFS1_MAGIC ||
 		     (fs->fs_magic == FS_UFS2_MAGIC &&
 		      (fs->fs_sblockloc == sblockloc ||
 		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
 		    fs->fs_bsize <= MAXBSIZE &&
 		    fs->fs_bsize >= sizeof(struct fs))
 			break;
 		brelse(bp);
 		bp = NULL;
 	}
 	if (sblock_try[i] == -1) {
 		error = EINVAL;		/* XXX needs translation */
 		goto out;
 	}
 	fs->fs_fmod = 0;
 	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
 	fs->fs_flags &= ~FS_UNCLEAN;
 	if (fs->fs_clean == 0) {
 		fs->fs_flags |= FS_UNCLEAN;
 		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
 		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
 		     (fs->fs_flags & FS_DOSOFTDEP))) {
 			printf(
 "WARNING: %s was not properly dismounted\n",
 			    fs->fs_fsmnt);
 		} else {
 			printf(
 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 			    fs->fs_fsmnt);
 			error = EPERM;
 			goto out;
 		}
 		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
 		    (mp->mnt_flag & MNT_FORCE)) {
 			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
 			    (intmax_t)fs->fs_pendingblocks,
 			    fs->fs_pendinginodes);
 			fs->fs_pendingblocks = 0;
 			fs->fs_pendinginodes = 0;
 		}
 	}
 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 		printf("%s: mount pending error: blocks %jd files %d\n",
 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 		    fs->fs_pendinginodes);
 		fs->fs_pendingblocks = 0;
 		fs->fs_pendinginodes = 0;
 	}
 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
 	    M_WAITOK);
 	if (fs->fs_magic == FS_UFS1_MAGIC) {
 		ump->um_fstype = UFS1;
 		ump->um_balloc = ffs_balloc_ufs1;
 	} else {
 		ump->um_fstype = UFS2;
 		ump->um_balloc = ffs_balloc_ufs2;
 	}
 	ump->um_blkatoff = ffs_blkatoff;
 	ump->um_truncate = ffs_truncate;
 	ump->um_update = ffs_update;
 	ump->um_valloc = ffs_valloc;
 	ump->um_vfree = ffs_vfree;
 	ump->um_ifree = ffs_ifree;
 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
 	if (fs->fs_sbsize < SBLOCKSIZE)
 		bp->b_flags |= B_INVAL | B_NOCACHE;
 	brelse(bp);
 	bp = NULL;
 	fs = ump->um_fs;
 	ffs_oldfscompat_read(fs, ump, sblockloc);
 	fs->fs_ronly = ronly;
 	size = fs->fs_cssize;
 	blks = howmany(size, fs->fs_fsize);
 	if (fs->fs_contigsumsize > 0)
 		size += fs->fs_ncg * sizeof(int32_t);
 	size += fs->fs_ncg * sizeof(u_int8_t);
 	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
 	fs->fs_csp = space;
 	for (i = 0; i < blks; i += fs->fs_frag) {
 		size = fs->fs_bsize;
 		if (i + fs->fs_frag > blks)
 			size = (blks - i) * fs->fs_fsize;
 		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 		    cred, &bp)) != 0) {
 			free(fs->fs_csp, M_UFSMNT);
 			goto out;
 		}
 		bcopy(bp->b_data, space, (u_int)size);
 		space = (char *)space + size;
 		brelse(bp);
 		bp = NULL;
 	}
 	if (fs->fs_contigsumsize > 0) {
 		fs->fs_maxcluster = lp = space;
 		for (i = 0; i < fs->fs_ncg; i++)
 			*lp++ = fs->fs_contigsumsize;
 		space = lp;
 	}
 	size = fs->fs_ncg * sizeof(u_int8_t);
 	fs->fs_contigdirs = (u_int8_t *)space;
 	bzero(fs->fs_contigdirs, size);
 	fs->fs_active = NULL;
 	mp->mnt_data = (qaddr_t)ump;
 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 || 
 	    vfs_getvfs(&mp->mnt_stat.f_fsid)) 
 		vfs_getnewfsid(mp);
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 	mp->mnt_flag |= MNT_LOCAL;
 	if ((fs->fs_flags & FS_MULTILABEL) != 0)
 #ifdef MAC
 		mp->mnt_flag |= MNT_MULTILABEL;
 #else
 		printf(
 "WARNING: %s: multilabel flag on fs but no MAC support\n",
 		    fs->fs_fsmnt);
 #endif
 	if ((fs->fs_flags & FS_ACLS) != 0)
 #ifdef UFS_ACL
 		mp->mnt_flag |= MNT_ACLS;
 #else
 		printf(
 "WARNING: %s: ACLs flag on fs but no ACLs support\n",
 		    fs->fs_fsmnt);
 #endif
 	ump->um_mountp = mp;
 	ump->um_dev = dev;
 	ump->um_devvp = devvp;
 	ump->um_nindir = fs->fs_nindir;
 	ump->um_bptrtodb = fs->fs_fsbtodb;
 	ump->um_seqinc = fs->fs_frag;
 	for (i = 0; i < MAXQUOTAS; i++)
 		ump->um_quotas[i] = NULLVP;
 #ifdef UFS_EXTATTR
 	ufs_extattr_uepm_init(&ump->um_extattr);
 #endif
 	devvp->v_rdev->si_mountpoint = mp;
 
 	/*
 	 * Set FS local "last mounted on" information (NULL pad)
 	 */
 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
 			fs->fs_fsmnt,			/* copy area*/
 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
 			&strsize);			/* real size*/
 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
 
 	if( mp->mnt_flag & MNT_ROOTFS) {
 		/*
 		 * Root mount; update timestamp in mount structure.
 		 * this will be used by the common root mount code
 		 * to update the system clock.
 		 */
 		mp->mnt_time = fs->fs_time;
 	}
 
 	if (ronly == 0) {
 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
 			free(fs->fs_csp, M_UFSMNT);
 			goto out;
 		}
 		if (fs->fs_snapinum[0] != 0)
 			ffs_snapshot_mount(mp);
 		fs->fs_fmod = 1;
 		fs->fs_clean = 0;
 		(void) ffs_sbupdate(ump, MNT_WAIT);
 	}
 #ifdef UFS_EXTATTR
 #ifdef UFS_EXTATTR_AUTOSTART
 	/*
 	 *
 	 * Auto-starting does the following:
 	 *	- check for /.attribute in the fs, and extattr_start if so
 	 *	- for each file in .attribute, enable that file with
 	 * 	  an attribute of the same name.
 	 * Not clear how to report errors -- probably eat them.
 	 * This would all happen while the filesystem was busy/not
 	 * available, so would effectively be "atomic".
 	 */
 	(void) ufs_extattr_autostart(mp, td);
 #endif /* !UFS_EXTATTR_AUTOSTART */
 #endif /* !UFS_EXTATTR */
 	return (0);
 out:
 	devvp->v_rdev->si_mountpoint = NULL;
 	if (bp)
 		brelse(bp);
 	/* XXX: see comment above VOP_OPEN */
 #ifdef notyet
 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, td);
 #else
 	(void)VOP_CLOSE(devvp, FREAD|FWRITE, cred, td);
 #endif
 	if (ump) {
 		free(ump->um_fs, M_UFSMNT);
 		free(ump, M_UFSMNT);
 		mp->mnt_data = (qaddr_t)0;
 	}
 	return (error);
 }
 
 #include <sys/sysctl.h>
 int bigcgs = 0;
 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
 
 /*
  * Sanity checks for loading old filesystem superblocks.
  * See ffs_oldfscompat_write below for unwound actions.
  *
  * XXX - Parts get retired eventually.
  * Unfortunately new bits get added.
  */
 static void
 ffs_oldfscompat_read(fs, ump, sblockloc)
 	struct fs *fs;
 	struct ufsmount *ump;
 	ufs2_daddr_t sblockloc;
 {
 	off_t maxfilesize;
 
 	/*
 	 * If not yet done, update fs_flags location and value of fs_sblockloc.
 	 */
 	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
 		fs->fs_flags = fs->fs_old_flags;
 		fs->fs_old_flags |= FS_FLAGS_UPDATED;
 		fs->fs_sblockloc = sblockloc;
 	}
 	/*
 	 * If not yet done, update UFS1 superblock with new wider fields.
 	 */
 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
 		fs->fs_maxbsize = fs->fs_bsize;
 		fs->fs_time = fs->fs_old_time;
 		fs->fs_size = fs->fs_old_size;
 		fs->fs_dsize = fs->fs_old_dsize;
 		fs->fs_csaddr = fs->fs_old_csaddr;
 		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
 		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
 		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
 		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
 	}
 	if (fs->fs_magic == FS_UFS1_MAGIC &&
 	    fs->fs_old_inodefmt < FS_44INODEFMT) {
 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
 		fs->fs_qbmask = ~fs->fs_bmask;
 		fs->fs_qfmask = ~fs->fs_fmask;
 	}
 	if (fs->fs_magic == FS_UFS1_MAGIC) {
 		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
 		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
 		if (fs->fs_maxfilesize > maxfilesize)
 			fs->fs_maxfilesize = maxfilesize;
 	}
 	/* Compatibility for old filesystems */
 	if (fs->fs_avgfilesize <= 0)
 		fs->fs_avgfilesize = AVFILESIZ;
 	if (fs->fs_avgfpdir <= 0)
 		fs->fs_avgfpdir = AFPDIR;
 	if (bigcgs) {
 		fs->fs_save_cgsize = fs->fs_cgsize;
 		fs->fs_cgsize = fs->fs_bsize;
 	}
 }
 
 /*
  * Unwinding superblock updates for old filesystems.
  * See ffs_oldfscompat_read above for details.
  *
  * XXX - Parts get retired eventually.
  * Unfortunately new bits get added.
  */
 static void
 ffs_oldfscompat_write(fs, ump)
 	struct fs *fs;
 	struct ufsmount *ump;
 {
 
 	/*
 	 * Copy back UFS2 updated fields that UFS1 inspects.
 	 */
 	if (fs->fs_magic == FS_UFS1_MAGIC) {
 		fs->fs_old_time = fs->fs_time;
 		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
 		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
 		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
 		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
 		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
 	}
 	if (bigcgs) {
 		fs->fs_cgsize = fs->fs_save_cgsize;
 		fs->fs_save_cgsize = 0;
 	}
 }
 
 /*
  * unmount system call
  */
 int
 ffs_unmount(mp, mntflags, td)
 	struct mount *mp;
 	int mntflags;
 	struct thread *td;
 {
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct fs *fs;
 	int error, flags;
 
 	flags = 0;
 	if (mntflags & MNT_FORCE) {
 		flags |= FORCECLOSE;
 	}
 #ifdef UFS_EXTATTR
 	if ((error = ufs_extattr_stop(mp, td))) {
 		if (error != EOPNOTSUPP)
 			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
 			    error);
 	} else {
 		ufs_extattr_uepm_destroy(&ump->um_extattr);
 	}
 #endif
 	if (mp->mnt_flag & MNT_SOFTDEP) {
 		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
 			return (error);
 	} else {
 		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
 			return (error);
 	}
 	fs = ump->um_fs;
 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 		printf("%s: unmount pending error: blocks %jd files %d\n",
 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 		    fs->fs_pendinginodes);
 		fs->fs_pendingblocks = 0;
 		fs->fs_pendinginodes = 0;
 	}
 	if (fs->fs_ronly == 0) {
 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
 		error = ffs_sbupdate(ump, MNT_WAIT);
 		if (error) {
 			fs->fs_clean = 0;
 			return (error);
 		}
 	}
 	ump->um_devvp->v_rdev->si_mountpoint = NULL;
 
 	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
 	/* XXX: see comment above VOP_OPEN */
 #ifdef notyet
 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
 		NOCRED, td);
 #else
 	error = VOP_CLOSE(ump->um_devvp, FREAD|FWRITE, NOCRED, td);
 #endif
 
 	vrele(ump->um_devvp);
 
 	free(fs->fs_csp, M_UFSMNT);
 	free(fs, M_UFSMNT);
 	free(ump, M_UFSMNT);
 	mp->mnt_data = (qaddr_t)0;
 	mp->mnt_flag &= ~MNT_LOCAL;
 	return (error);
 }
 
 /*
  * Flush out all the files in a filesystem.
  */
 int
 ffs_flushfiles(mp, flags, td)
 	struct mount *mp;
 	int flags;
 	struct thread *td;
 {
 	struct ufsmount *ump;
 	int error;
 
 	ump = VFSTOUFS(mp);
 #ifdef QUOTA
 	if (mp->mnt_flag & MNT_QUOTA) {
 		int i;
 		error = vflush(mp, 0, SKIPSYSTEM|flags);
 		if (error)
 			return (error);
 		for (i = 0; i < MAXQUOTAS; i++) {
 			if (ump->um_quotas[i] == NULLVP)
 				continue;
 			quotaoff(td, mp, i);
 		}
 		/*
 		 * Here we fall through to vflush again to ensure
 		 * that we have gotten rid of all the system vnodes.
 		 */
 	}
 #endif
 	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
 	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
 		if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
 			return (error);
 		ffs_snapshot_unmount(mp);
 		/*
 		 * Here we fall through to vflush again to ensure
 		 * that we have gotten rid of all the system vnodes.
 		 */
 	}
         /*
 	 * Flush all the files.
 	 */
 	if ((error = vflush(mp, 0, flags)) != 0)
 		return (error);
 	/*
 	 * Flush filesystem metadata.
 	 */
 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
 	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
 	VOP_UNLOCK(ump->um_devvp, 0, td);
 	return (error);
 }
 
 /*
  * Get filesystem statistics.
  */
 int
 ffs_statfs(mp, sbp, td)
 	struct mount *mp;
 	struct statfs *sbp;
 	struct thread *td;
 {
 	struct ufsmount *ump;
 	struct fs *fs;
 
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
 		panic("ffs_statfs");
+	sbp->f_version = STATFS_VERSION;
 	sbp->f_bsize = fs->fs_fsize;
 	sbp->f_iosize = fs->fs_bsize;
 	sbp->f_blocks = fs->fs_dsize;
 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
 	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
 	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
 	    dbtofsb(fs, fs->fs_pendingblocks);
 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
+	sbp->f_namemax = NAME_MAX;
 	if (sbp != &mp->mnt_stat) {
+		sbp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
+		sbp->f_syncwrites = mp->mnt_stat.f_syncwrites;
+		sbp->f_asyncwrites = mp->mnt_stat.f_asyncwrites;
+		sbp->f_syncreads = mp->mnt_stat.f_syncreads;
+		sbp->f_asyncreads = mp->mnt_stat.f_asyncreads;
+		sbp->f_owner = mp->mnt_stat.f_owner;
+		sbp->f_fsid = mp->mnt_stat.f_fsid;
+		bcopy((caddr_t)mp->mnt_stat.f_fstypename,
+			(caddr_t)&sbp->f_fstypename[0], MFSNAMELEN);
 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
 	}
 	return (0);
 }
 
 /*
  * Go through the disk queues to initiate sandbagged IO;
  * go through the inodes to write those that have been modified;
  * initiate the writing of the super block if it has been modified.
  *
  * Note: we are always called with the filesystem marked `MPBUSY'.
  */
 int
 ffs_sync(mp, waitfor, cred, td)
 	struct mount *mp;
 	int waitfor;
 	struct ucred *cred;
 	struct thread *td;
 {
 	struct vnode *nvp, *vp, *devvp;
 	struct inode *ip;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct fs *fs;
 	int error, count, wait, lockreq, allerror = 0;
 
 	fs = ump->um_fs;
 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
 		printf("fs = %s\n", fs->fs_fsmnt);
 		panic("ffs_sync: rofs mod");
 	}
 	/*
 	 * Write back each (modified) inode.
 	 */
 	wait = 0;
 	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
 	if (waitfor == MNT_WAIT) {
 		wait = 1;
 		lockreq = LK_EXCLUSIVE;
 	}
 	lockreq |= LK_INTERLOCK;
 	MNT_ILOCK(mp);
 loop:
 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
 		/*
 		 * If the vnode that we are about to sync is no longer
 		 * associated with this mount point, start over.
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 
 		/*
 		 * Depend on the mntvnode_slock to keep things stable enough
 		 * for a quick test.  Since there might be hundreds of
 		 * thousands of vnodes, we cannot afford even a subroutine
 		 * call unless there's a good chance that we have work to do.
 		 */
 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
 		VI_LOCK(vp);
 		if (vp->v_iflag & VI_XLOCK) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		ip = VTOI(vp);
 		if (vp->v_type == VNON || ((ip->i_flag &
 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
 		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		MNT_IUNLOCK(mp);
 		if ((error = vget(vp, lockreq, td)) != 0) {
 			MNT_ILOCK(mp);
 			if (error == ENOENT)
 				goto loop;
 			continue;
 		}
 		if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
 			allerror = error;
 		VOP_UNLOCK(vp, 0, td);
 		vrele(vp);
 		MNT_ILOCK(mp);
 		if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
 			goto loop;
 	}
 	MNT_IUNLOCK(mp);
 	/*
 	 * Force stale filesystem control information to be flushed.
 	 */
 	if (waitfor == MNT_WAIT) {
 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
 			allerror = error;
 		/* Flushed work items may create new vnodes to clean */
 		if (allerror == 0 && count) {
 			MNT_ILOCK(mp);
 			goto loop;
 		}
 	}
 #ifdef QUOTA
 	qsync(mp);
 #endif
 	devvp = ump->um_devvp;
 	VI_LOCK(devvp);
 	if (waitfor != MNT_LAZY &&
 	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
 		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
 			allerror = error;
 		VOP_UNLOCK(devvp, 0, td);
 		if (allerror == 0 && waitfor == MNT_WAIT) {
 			MNT_ILOCK(mp);
 			goto loop;
 		}
 	} else
 		VI_UNLOCK(devvp);
 	/*
 	 * Write back modified superblock.
 	 */
 	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
 		allerror = error;
 	return (allerror);
 }
 
 int
 ffs_vget(mp, ino, flags, vpp)
 	struct mount *mp;
 	ino_t ino;
 	int flags;
 	struct vnode **vpp;
 {
 	struct thread *td = curthread; 		/* XXX */
 	struct fs *fs;
 	struct inode *ip;
 	struct ufsmount *ump;
 	struct buf *bp;
 	struct vnode *vp;
 	dev_t dev;
 	int error;
 
 	ump = VFSTOUFS(mp);
 	dev = ump->um_dev;
 
 	/*
 	 * We do not lock vnode creation as it is believed to be too
 	 * expensive for such rare case as simultaneous creation of vnode
 	 * for same ino by different processes. We just allow them to race
 	 * and check later to decide who wins. Let the race begin!
 	 */
 	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
 		return (error);
 	if (*vpp != NULL)
 		return (0);
 
 	/*
 	 * If this MALLOC() is performed after the getnewvnode()
 	 * it might block, leaving a vnode with a NULL v_data to be
 	 * found by ffs_sync() if a sync happens to fire right then,
 	 * which will cause a panic because ffs_sync() blindly
 	 * dereferences vp->v_data (as well it should).
 	 */
 	ip = uma_zalloc(uma_inode, M_WAITOK);
 
 	/* Allocate a new vnode/inode. */
 	error = getnewvnode("ufs", mp, ffs_vnodeop_p, &vp);
 	if (error) {
 		*vpp = NULL;
 		uma_zfree(uma_inode, ip);
 		return (error);
 	}
 	bzero((caddr_t)ip, sizeof(struct inode));
 	/*
 	 * FFS supports recursive locking.
 	 */
 	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
 	vp->v_data = ip;
 	ip->i_vnode = vp;
 	ip->i_ump = ump;
 	ip->i_fs = fs = ump->um_fs;
 	ip->i_dev = dev;
 	ip->i_number = ino;
 #ifdef QUOTA
 	{
 		int i;
 		for (i = 0; i < MAXQUOTAS; i++)
 			ip->i_dquot[i] = NODQUOT;
 	}
 #endif
 	/*
 	 * Exclusively lock the vnode before adding to hash. Note, that we
 	 * must not release nor downgrade the lock (despite flags argument
 	 * says) till it is fully initialized.
 	 */
 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
 
 	/*
 	 * Atomicaly (in terms of ufs_hash operations) check the hash for
 	 * duplicate of vnode being created and add it to the hash. If a
 	 * duplicate vnode was found, it will be vget()ed from hash for us.
 	 */
 	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 
 	/* We lost the race, then throw away our vnode and return existing */
 	if (*vpp != NULL) {
 		vput(vp);
 		return (0);
 	}
 
 	/* Read in the disk contents for the inode, copy into the inode. */
 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
 	    (int)fs->fs_bsize, NOCRED, &bp);
 	if (error) {
 		/*
 		 * The inode does not contain anything useful, so it would
 		 * be misleading to leave it on its hash chain. With mode
 		 * still zero, it will be unlinked and returned to the free
 		 * list by vput().
 		 */
 		brelse(bp);
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 	if (ip->i_ump->um_fstype == UFS1)
 		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
 	else
 		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
 	ffs_load_inode(bp, ip, fs, ino);
 	if (DOINGSOFTDEP(vp))
 		softdep_load_inodeblock(ip);
 	else
 		ip->i_effnlink = ip->i_nlink;
 	bqrelse(bp);
 
 	/*
 	 * Initialize the vnode from the inode, check for aliases.
 	 * Note that the underlying vnode may have changed.
 	 */
 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
 	if (error) {
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 	/*
 	 * Finish inode initialization.
 	 */
 	VREF(ip->i_devvp);
 	/*
 	 * Set up a generation number for this inode if it does not
 	 * already have one. This should only happen on old filesystems.
 	 */
 	if (ip->i_gen == 0) {
 		ip->i_gen = arc4random() / 2 + 1;
 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			ip->i_flag |= IN_MODIFIED;
 			DIP(ip, i_gen) = ip->i_gen;
 		}
 	}
 	/*
 	 * Ensure that uid and gid are correct. This is a temporary
 	 * fix until fsck has been changed to do the update.
 	 */
 	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
 	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
 		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
 		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
 	}						/* XXX */
 
 #ifdef MAC
 	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
 		/*
 		 * If this vnode is already allocated, and we're running
 		 * multi-label, attempt to perform a label association
 		 * from the extended attributes on the inode.
 		 */
 		error = mac_associate_vnode_extattr(mp, vp);
 		if (error) {
 			/* ufs_inactive will release ip->i_devvp ref. */
 			vput(vp);
 			*vpp = NULL;
 			return (error);
 		}
 	}
 #endif
 
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * File handle to vnode
  *
  * Have to be really careful about stale file handles:
  * - check that the inode number is valid
  * - call ffs_vget() to get the locked inode
  * - check for an unallocated inode (i_mode == 0)
  * - check that the given client host has export rights and return
  *   those rights via. exflagsp and credanonp
  */
 int
 ffs_fhtovp(mp, fhp, vpp)
 	struct mount *mp;
 	struct fid *fhp;
 	struct vnode **vpp;
 {
 	struct ufid *ufhp;
 	struct fs *fs;
 
 	ufhp = (struct ufid *)fhp;
 	fs = VFSTOUFS(mp)->um_fs;
 	if (ufhp->ufid_ino < ROOTINO ||
 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
 		return (ESTALE);
 	return (ufs_fhtovp(mp, ufhp, vpp));
 }
 
 /*
  * Vnode pointer to File handle
  */
 /* ARGSUSED */
 int
 ffs_vptofh(vp, fhp)
 	struct vnode *vp;
 	struct fid *fhp;
 {
 	struct inode *ip;
 	struct ufid *ufhp;
 
 	ip = VTOI(vp);
 	ufhp = (struct ufid *)fhp;
 	ufhp->ufid_len = sizeof(struct ufid);
 	ufhp->ufid_ino = ip->i_number;
 	ufhp->ufid_gen = ip->i_gen;
 	return (0);
 }
 
 /*
  * Initialize the filesystem.
  */
 static int
 ffs_init(vfsp)
 	struct vfsconf *vfsp;
 {
 
 	softdep_initialize();
 	return (ufs_init(vfsp));
 }
 
 /*
  * Undo the work of ffs_init().
  */
 static int
 ffs_uninit(vfsp)
 	struct vfsconf *vfsp;
 {
 	int ret;
 
 	ret = ufs_uninit(vfsp);
 	softdep_uninitialize();
 	return (ret);
 }
 
 /*
  * Write a superblock and associated information back to disk.
  */
 static int
 ffs_sbupdate(mp, waitfor)
 	struct ufsmount *mp;
 	int waitfor;
 {
 	struct fs *fs = mp->um_fs;
 	struct buf *bp;
 	int blks;
 	void *space;
 	int i, size, error, allerror = 0;
 
 	if (fs->fs_ronly == 1 &&
 	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) != 
 	    (MNT_RDONLY | MNT_UPDATE))
 		panic("ffs_sbupdate: write read-only filesystem");
 	/*
 	 * First write back the summary information.
 	 */
 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
 	space = fs->fs_csp;
 	for (i = 0; i < blks; i += fs->fs_frag) {
 		size = fs->fs_bsize;
 		if (i + fs->fs_frag > blks)
 			size = (blks - i) * fs->fs_fsize;
 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
 		    size, 0, 0, 0);
 		bcopy(space, bp->b_data, (u_int)size);
 		space = (char *)space + size;
 		if (waitfor != MNT_WAIT)
 			bawrite(bp);
 		else if ((error = bwrite(bp)) != 0)
 			allerror = error;
 	}
 	/*
 	 * Now write back the superblock itself. If any errors occurred
 	 * up to this point, then fail so that the superblock avoids
 	 * being written out as clean.
 	 */
 	if (allerror)
 		return (allerror);
 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
 	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
 		printf("%s: correcting fs_sblockloc from %jd to %d\n",
 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
 		fs->fs_sblockloc = SBLOCK_UFS1;
 	}
 	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
 	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
 		printf("%s: correcting fs_sblockloc from %jd to %d\n",
 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
 		fs->fs_sblockloc = SBLOCK_UFS2;
 	}
 	bp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
 	    0, 0, 0);
 	fs->fs_fmod = 0;
 	fs->fs_time = time_second;
 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
 	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
 	if (waitfor != MNT_WAIT)
 		bawrite(bp);
 	else if ((error = bwrite(bp)) != 0)
 		allerror = error;
 	return (allerror);
 }
 
 static int
 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
 	int attrnamespace, const char *attrname, struct thread *td)
 {
 
 #ifdef UFS_EXTATTR
 	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
 	    attrname, td));
 #else
 	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
 	    attrname, td));
 #endif
 }
 
 static void
 ffs_ifree(struct ufsmount *ump, struct inode *ip)
 {
 
 	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
 		uma_zfree(uma_ufs1, ip->i_din1);
 	else if (ip->i_din2 != NULL)
 		uma_zfree(uma_ufs2, ip->i_din2);
 	uma_zfree(uma_inode, ip);
 }