Index: projects/clang1000-import/include/stdlib.h
===================================================================
--- projects/clang1000-import/include/stdlib.h	(revision 357389)
+++ projects/clang1000-import/include/stdlib.h	(revision 357390)
@@ -1,355 +1,359 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)stdlib.h	8.5 (Berkeley) 5/19/95
  * $FreeBSD$
  */
 
 #ifndef _STDLIB_H_
 #define	_STDLIB_H_
 
 #include <sys/cdefs.h>
 #include <sys/_null.h>
 #include <sys/_types.h>
 
 __NULLABILITY_PRAGMA_PUSH
 
 #if __BSD_VISIBLE
 #ifndef _RUNE_T_DECLARED
 typedef	__rune_t	rune_t;
 #define	_RUNE_T_DECLARED
 #endif
 #endif
 
 #ifndef _SIZE_T_DECLARED
 typedef	__size_t	size_t;
 #define	_SIZE_T_DECLARED
 #endif
 
 #ifndef	__cplusplus
 #ifndef _WCHAR_T_DECLARED
 typedef	___wchar_t	wchar_t;
 #define	_WCHAR_T_DECLARED
 #endif
 #endif
 
 typedef struct {
 	int	quot;		/* quotient */
 	int	rem;		/* remainder */
 } div_t;
 
 typedef struct {
 	long	quot;
 	long	rem;
 } ldiv_t;
 
 #define	EXIT_FAILURE	1
 #define	EXIT_SUCCESS	0
 
-#define	RAND_MAX	0x7ffffffd
+/*
+ * I.e., INT_MAX; rand(3) returns a signed integer but must produce output in
+ * the range [0, RAND_MAX], so half of the possible output range is unused.
+ */
+#define	RAND_MAX	0x7fffffff
 
 __BEGIN_DECLS
 #ifdef _XLOCALE_H_
 #include <xlocale/_stdlib.h>
 #endif
 extern int __mb_cur_max;
 extern int ___mb_cur_max(void);
 #define	MB_CUR_MAX	((size_t)___mb_cur_max())
 
 _Noreturn void	 abort(void);
 int	 abs(int) __pure2;
 int	 atexit(void (* _Nonnull)(void));
 double	 atof(const char *);
 int	 atoi(const char *);
 long	 atol(const char *);
 void	*bsearch(const void *, const void *, size_t,
 	    size_t, int (*)(const void * _Nonnull, const void *));
 void	*calloc(size_t, size_t) __malloc_like __result_use_check
 	     __alloc_size2(1, 2);
 div_t	 div(int, int) __pure2;
 _Noreturn void	 exit(int);
 void	 free(void *);
 char	*getenv(const char *);
 long	 labs(long) __pure2;
 ldiv_t	 ldiv(long, long) __pure2;
 void	*malloc(size_t) __malloc_like __result_use_check __alloc_size(1);
 int	 mblen(const char *, size_t);
 size_t	 mbstowcs(wchar_t * __restrict , const char * __restrict, size_t);
 int	 mbtowc(wchar_t * __restrict, const char * __restrict, size_t);
 void	 qsort(void *, size_t, size_t,
 	    int (* _Nonnull)(const void *, const void *));
 int	 rand(void);
 void	*realloc(void *, size_t) __result_use_check __alloc_size(2);
 void	 srand(unsigned);
 double	 strtod(const char * __restrict, char ** __restrict);
 float	 strtof(const char * __restrict, char ** __restrict);
 long	 strtol(const char * __restrict, char ** __restrict, int);
 long double
 	 strtold(const char * __restrict, char ** __restrict);
 unsigned long
 	 strtoul(const char * __restrict, char ** __restrict, int);
 int	 system(const char *);
 int	 wctomb(char *, wchar_t);
 size_t	 wcstombs(char * __restrict, const wchar_t * __restrict, size_t);
 
 /*
  * Functions added in C99 which we make conditionally available in the
  * BSD^C89 namespace if the compiler supports `long long'.
  * The #if test is more complicated than it ought to be because
  * __BSD_VISIBLE implies __ISO_C_VISIBLE == 1999 *even if* `long long'
  * is not supported in the compilation environment (which therefore means
  * that it can't really be ISO C99).
  *
  * (The only other extension made by C99 in thie header is _Exit().)
  */
 #if __ISO_C_VISIBLE >= 1999 || defined(__cplusplus)
 #ifdef __LONG_LONG_SUPPORTED
 /* LONGLONG */
 typedef struct {
 	long long quot;
 	long long rem;
 } lldiv_t;
 
 /* LONGLONG */
 long long
 	 atoll(const char *);
 /* LONGLONG */
 long long
 	 llabs(long long) __pure2;
 /* LONGLONG */
 lldiv_t	 lldiv(long long, long long) __pure2;
 /* LONGLONG */
 long long
 	 strtoll(const char * __restrict, char ** __restrict, int);
 /* LONGLONG */
 unsigned long long
 	 strtoull(const char * __restrict, char ** __restrict, int);
 #endif /* __LONG_LONG_SUPPORTED */
 
 _Noreturn void	 _Exit(int);
 #endif /* __ISO_C_VISIBLE >= 1999 */
 
 /*
  * If we're in a mode greater than C99, expose C11 functions.
  */
 #if __ISO_C_VISIBLE >= 2011 || __cplusplus >= 201103L
 void *	aligned_alloc(size_t, size_t) __malloc_like __alloc_align(1)
 	    __alloc_size(2);
 int	at_quick_exit(void (*)(void));
 _Noreturn void
 	quick_exit(int);
 #endif /* __ISO_C_VISIBLE >= 2011 */
 /*
  * Extensions made by POSIX relative to C.
  */
 #if __POSIX_VISIBLE >= 199506 || __XSI_VISIBLE
 char	*realpath(const char * __restrict, char * __restrict);
 #endif
 #if __POSIX_VISIBLE >= 199506
 int	 rand_r(unsigned *);			/* (TSF) */
 #endif
 #if __POSIX_VISIBLE >= 200112
 int	 posix_memalign(void **, size_t, size_t); /* (ADV) */
 int	 setenv(const char *, const char *, int);
 int	 unsetenv(const char *);
 #endif
 
 #if __POSIX_VISIBLE >= 200809 || __XSI_VISIBLE
 int	 getsubopt(char **, char *const *, char **);
 #ifndef _MKDTEMP_DECLARED
 char	*mkdtemp(char *);
 #define	_MKDTEMP_DECLARED
 #endif
 #ifndef _MKSTEMP_DECLARED
 int	 mkstemp(char *);
 #define	_MKSTEMP_DECLARED
 #endif
 #endif /* __POSIX_VISIBLE >= 200809 || __XSI_VISIBLE */
 
 /*
  * The only changes to the XSI namespace in revision 6 were the deletion
  * of the ttyslot() and valloc() functions, which FreeBSD never declared
  * in this header.  For revision 7, ecvt(), fcvt(), and gcvt(), which
  * FreeBSD also does not have, and mktemp(), are to be deleted.
  */
 #if __XSI_VISIBLE
 /* XXX XSI requires pollution from <sys/wait.h> here.  We'd rather not. */
 long	 a64l(const char *);
 double	 drand48(void);
 /* char	*ecvt(double, int, int * __restrict, int * __restrict); */
 double	 erand48(unsigned short[3]);
 /* char	*fcvt(double, int, int * __restrict, int * __restrict); */
 /* char	*gcvt(double, int, int * __restrict, int * __restrict); */
 int	 grantpt(int);
 char	*initstate(unsigned int, char *, size_t);
 long	 jrand48(unsigned short[3]);
 char	*l64a(long);
 void	 lcong48(unsigned short[7]);
 long	 lrand48(void);
 #if !defined(_MKTEMP_DECLARED) && (__BSD_VISIBLE || __XSI_VISIBLE <= 600)
 char	*mktemp(char *);
 #define	_MKTEMP_DECLARED
 #endif
 long	 mrand48(void);
 long	 nrand48(unsigned short[3]);
 int	 posix_openpt(int);
 char	*ptsname(int);
 int	 putenv(char *);
 long	 random(void);
 unsigned short
 	*seed48(unsigned short[3]);
 char	*setstate(/* const */ char *);
 void	 srand48(long);
 void	 srandom(unsigned int);
 int	 unlockpt(int);
 #endif /* __XSI_VISIBLE */
 
 #if __BSD_VISIBLE
 extern const char *malloc_conf;
 extern void (*malloc_message)(void *, const char *);
 
 /*
  * The alloca() function can't be implemented in C, and on some
  * platforms it can't be implemented at all as a callable function.
  * The GNU C compiler provides a built-in alloca() which we can use.
  * On platforms where alloca() is not in libc, programs which use it
  * will fail to link when compiled with non-GNU compilers.
  */
 #if __GNUC__ >= 2 || defined(__INTEL_COMPILER)
 #undef  alloca	/* some GNU bits try to get cute and define this on their own */
 #define alloca(sz) __builtin_alloca(sz)
 #endif
 
 void	 abort2(const char *, int, void **) __dead2;
 __uint32_t
 	 arc4random(void);
 void	 arc4random_buf(void *, size_t);
 __uint32_t 
 	 arc4random_uniform(__uint32_t);
 
 #ifdef __BLOCKS__
 int	 atexit_b(void (^ _Nonnull)(void));
 void	*bsearch_b(const void *, const void *, size_t,
 	    size_t, int (^ _Nonnull)(const void *, const void *));
 #endif
 char	*getbsize(int *, long *);
 					/* getcap(3) functions */
 char	*cgetcap(char *, const char *, int);
 int	 cgetclose(void);
 int	 cgetent(char **, char **, const char *);
 int	 cgetfirst(char **, char **);
 int	 cgetmatch(const char *, const char *);
 int	 cgetnext(char **, char **);
 int	 cgetnum(char *, const char *, long *);
 int	 cgetset(const char *);
 int	 cgetstr(char *, const char *, char **);
 int	 cgetustr(char *, const char *, char **);
 
 int	 daemon(int, int);
 int	 daemonfd(int, int);
 char	*devname(__dev_t, __mode_t);
 char	*devname_r(__dev_t, __mode_t, char *, int);
 char	*fdevname(int);
 char	*fdevname_r(int, char *, int);
 int	 getloadavg(double [], int);
 const char *
 	 getprogname(void);
 
 int	 heapsort(void *, size_t, size_t,
 	    int (* _Nonnull)(const void *, const void *));
 #ifdef __BLOCKS__
 int	 heapsort_b(void *, size_t, size_t,
 	    int (^ _Nonnull)(const void *, const void *));
 void	 qsort_b(void *, size_t, size_t,
 	    int (^ _Nonnull)(const void *, const void *));
 #endif
 int	 l64a_r(long, char *, int);
 int	 mergesort(void *, size_t, size_t, int (*)(const void *, const void *));
 #ifdef __BLOCKS__
 int	 mergesort_b(void *, size_t, size_t, int (^)(const void *, const void *));
 #endif
 int	 mkostemp(char *, int);
 int	 mkostemps(char *, int, int);
 int	 mkostempsat(int, char *, int, int);
 void	 qsort_r(void *, size_t, size_t, void *,
 	    int (*)(void *, const void *, const void *));
 int	 radixsort(const unsigned char **, int, const unsigned char *,
 	    unsigned);
 void	*reallocarray(void *, size_t, size_t) __result_use_check
 	    __alloc_size2(2, 3);
 void	*reallocf(void *, size_t) __result_use_check __alloc_size(2);
 int	 rpmatch(const char *);
 void	 setprogname(const char *);
 int	 sradixsort(const unsigned char **, int, const unsigned char *,
 	    unsigned);
 void	 srandomdev(void);
 long long
 	strtonum(const char *, long long, long long, const char **);
 
 /* Deprecated interfaces, to be removed. */
 __int64_t
 	 strtoq(const char *, char **, int);
 __uint64_t
 	 strtouq(const char *, char **, int);
 
 extern char *suboptarg;			/* getsubopt(3) external variable */
 #endif /* __BSD_VISIBLE */
 
 #if __EXT1_VISIBLE
 
 #ifndef _RSIZE_T_DEFINED
 #define _RSIZE_T_DEFINED
 typedef size_t rsize_t;
 #endif
 
 #ifndef _ERRNO_T_DEFINED
 #define _ERRNO_T_DEFINED
 typedef int errno_t;
 #endif
 
 /* K.3.6 */
 typedef void (*constraint_handler_t)(const char * __restrict,
     void * __restrict, errno_t);
 /* K.3.6.1.1 */
 constraint_handler_t set_constraint_handler_s(constraint_handler_t handler);
 /* K.3.6.1.2 */
 _Noreturn void abort_handler_s(const char * __restrict, void * __restrict,
     errno_t);
 /* K3.6.1.3 */
 void ignore_handler_s(const char * __restrict, void * __restrict, errno_t);
 /* K.3.6.3.2 */
 errno_t	 qsort_s(void *, rsize_t, rsize_t,
     int (*)(const void *, const void *, void *), void *);
 #endif /* __EXT1_VISIBLE */
 
 __END_DECLS
 __NULLABILITY_PRAGMA_POP
 
 #endif /* !_STDLIB_H_ */
Index: projects/clang1000-import/lib/libc/stdlib/Symbol.map
===================================================================
--- projects/clang1000-import/lib/libc/stdlib/Symbol.map	(revision 357389)
+++ projects/clang1000-import/lib/libc/stdlib/Symbol.map	(revision 357390)
@@ -1,136 +1,136 @@
 /*
  * $FreeBSD$
  */
 
 FBSD_1.0 {
 	_Exit;
 	a64l;
 	abort;
 	abs;
 	atexit;
 	__cxa_atexit;
 	__cxa_finalize;
 	atof;
 	atoi;
 	atol;
 	atoll;
 	bsearch;
 	div;
 	__isthreaded;
 	exit;
 	getenv;
 	opterr;
 	optind;
 	optopt;
 	optreset;
 	optarg;
 	getopt;
 	getopt_long;
 	getopt_long_only;
 	suboptarg;
 	getsubopt;
 	grantpt;
 	ptsname;
 	unlockpt;
 	hcreate;
 	hdestroy;
 	hsearch;
 	heapsort;
 	imaxabs;
 	imaxdiv;
 	insque;
 	l64a;
 	l64a_r;
 	labs;
 	ldiv;
 	llabs;
 	lldiv;
 	lsearch;
 	lfind;
 	mergesort;
 	putenv;
 	qsort_r;
 	qsort;
 	radixsort;
 	sradixsort;
 	rand_r;
-	rand;
-	srand;
 	srandom;
 	srandomdev;
 	initstate;
 	setstate;
 	random;
 	reallocf;
 	realpath;
 	remque;
 	setenv;
 	unsetenv;
 	strfmon;
 	strtoimax;
 	strtol;
 	strtoll;
 	strtonum;
 	strtoq;
 	strtoul;
 	strtoull;
 	strtoumax;
 	strtouq;
 	system;
 	tdelete;
 	tfind;
 	tsearch;
 	twalk;
 };
 
 FBSD_1.3 {
 	at_quick_exit;
 	atof_l;
 	atoi_l;
 	atol_l;
 	atoll_l;
 	quick_exit;
 	strtod_l;
 	strtof_l;
 	strtoimax_l;
 	strtol_l;
 	strtold_l;
 	strtoll_l;
 	strtoq_l;
 	strtoul_l;
 	strtoull_l;
 	strtoumax_l;
 	strtouq_l;
 };
 
 FBSD_1.4 {
 	atexit_b;
 	bsearch_b;
 	heapsort_b;
 	mergesort_b;
 	qsort_b;
 	hcreate_r;
 	hdestroy_r;
 	hsearch_r;
 	reallocarray;
 };
 
 FBSD_1.5 {
 	__cxa_thread_atexit;
 	__cxa_thread_atexit_impl;
 	abort_handler_s;
 	ignore_handler_s;
 	set_constraint_handler_s;
 };
 
 FBSD_1.6 {
 	qsort_s;
+	rand;
+	srand;
 };
 
 FBSDprivate_1.0 {
 	__system;
 	_system;
 	__libc_system;
 	__cxa_thread_call_dtors;
 	__libc_atexit;
 };
Index: projects/clang1000-import/lib/libc/stdlib/rand.3
===================================================================
--- projects/clang1000-import/lib/libc/stdlib/rand.3	(revision 357389)
+++ projects/clang1000-import/lib/libc/stdlib/rand.3	(revision 357390)
@@ -1,119 +1,149 @@
 .\" Copyright (c) 1990, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" This code is derived from software contributed to Berkeley by
 .\" the American National Standards Committee X3, on Information
 .\" Processing Systems.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"     @(#)rand.3	8.1 (Berkeley) 6/4/93
 .\" $FreeBSD$
 .\"
-.Dd December 14, 2019
+.Dd February 1, 2020
 .Dt RAND 3
 .Os
 .Sh NAME
 .Nm rand ,
 .Nm srand ,
 .Nm rand_r
 .Nd bad random number generator
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
 .In stdlib.h
 .Ft void
 .Fn srand "unsigned seed"
 .Ft int
 .Fn rand void
 .Ft int
 .Fn rand_r "unsigned *ctx"
 .Sh DESCRIPTION
 .Bf -symbolic
 The functions described in this manual page are not cryptographically
 secure.
 Applications which require unpredictable random numbers should use
 .Xr arc4random 3
 instead.
 .Ef
 .Pp
-These interfaces are obsoleted by
-.Xr random 3 .
-.Pp
 The
 .Fn rand
 function computes a sequence of pseudo-random integers in the range
 of 0 to
-.Dv RAND_MAX
-(as defined by the header file
-.In stdlib.h ) .
+.Dv RAND_MAX ,
+inclusive.
 .Pp
 The
 .Fn srand
-function sets its argument
+function seeds the algorithm with the
 .Fa seed
-as the seed for a new sequence of
-pseudo-random numbers to be returned by
-.Fn rand .
-These sequences are repeatable by calling
+parameter.
+Repeatable sequences of
+.Fn rand
+output may be obtained by calling
 .Fn srand
-with the same seed value.
+with the same
+.Fa seed .
+.Fn rand
+is implicitly initialized as if
+.Fn srand "1"
+had been invoked explicitly.
 .Pp
-If no
-.Fa seed
-value is provided, the functions are automatically
-seeded with a value of 1.
-.Pp
-The
+In
+.Fx 13 ,
+.Fn rand
+is implemented using the same 128-byte state LFSR generator algorithm as
+.Xr random 3 .
+However, the legacy
 .Fn rand_r
-function
-provides the same functionality as
-.Fn rand .
-A pointer to the context value
-.Fa ctx
-must be supplied by the caller.
-.Pp
-For better generator quality, use
-.Xr random 3
-or
-.Xr lrand48 3 .
+function is not (and can not be, because of its limited
+.Fa *ctx
+size).
+.Fn rand_r
+implements the historical, poor-quality Park-Miller 32-bit LCG and should not
+be used in new designs.
+.Sh IMPLEMENTATION NOTES
+Since
+.Fx 13 ,
+.Fn rand
+is implemented with the same generator as
+.Xr random 3 ,
+so the low-order bits should no longer be significantly worse than the
+high-order bits.
 .Sh SEE ALSO
 .Xr arc4random 3 ,
-.Xr lrand48 3 ,
 .Xr random 3 ,
 .Xr random 4
 .Sh STANDARDS
 The
 .Fn rand
 and
 .Fn srand
 functions
 conform to
 .St -isoC .
 .Pp
 The
 .Fn rand_r
-function is marked as obsolescent in POSIX and may be removed in a future
-revision of the standard.
+function is not part of
+.St -isoC
+and is marked obsolescent in
+.St -p1003.1-2008 .
+It may be removed in a future revision of POSIX.
+.Sh CAVEATS
+Prior to
+.Fx 13 ,
+.Fn rand
+used the historical Park-Miller generator with 32 bits of state and produced
+poor quality output, especially in the lower bits.
+.Fn rand
+in earlier versions of
+.Fx ,
+as well as other standards-conforming implementations, may continue to produce
+poor quality output.
+.Pp
+.Em These functions should not be used in portable applications that want a
+.Em high quality or high performance pseudorandom number generator .
+One possible replacement,
+.Xr random 3 ,
+is portable to Linux — but it is not especially fast, nor standardized.
+.Pp
+If broader portability or better performance is desired, any of the widely
+available and permissively licensed SFC64/32, JSF64/32, PCG64/32, or SplitMix64
+algorithm implementations may be embedded in your application.
+These algorithms have the benefit of requiring less space than
+.Xr random 3
+and being quite fast (in header inline implementations).
Index: projects/clang1000-import/lib/libc/stdlib/rand.c
===================================================================
--- projects/clang1000-import/lib/libc/stdlib/rand.c	(revision 357389)
+++ projects/clang1000-import/lib/libc/stdlib/rand.c	(revision 357390)
@@ -1,114 +1,167 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Posix rand_r function added May 1999 by Wes Peters <wes@softweyr.com>.
  */
 
 #if defined(LIBC_SCCS) && !defined(lint)
 static char sccsid[] = "@(#)rand.c	8.1 (Berkeley) 6/14/93";
 #endif /* LIBC_SCCS and not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "namespace.h"
 #include <sys/param.h>
 #include <sys/sysctl.h>
+#include <assert.h>
 #include <stdbool.h>
 #include <stdlib.h>
 #include <syslog.h>
 #include "un-namespace.h"
 
+#include "random.h"
+
+/*
+ * Implement rand(3), the standard C PRNG API, using the non-standard but
+ * higher quality random(3) implementation and the same size 128-byte state
+ * LFSR as the random(3) default.
+ *
+ * It turns out there are portable applications that want a PRNG but are too
+ * lazy to use better-but-nonstandard interfaces like random(3), when
+ * available, and too lazy to import higher-quality and faster PRNGs into their
+ * codebase (such as any of SFC, JSF, 128-bit LCGs, PCG, or Splitmix64).
+ *
+ * Since we're stuck with rand(3) due to the C standard, we can at least have
+ * it produce a relatively good PRNG sequence using our existing random(3)
+ * LFSR.  The random(3) design is not particularly fast nor compact, but it has
+ * the advantage of being the one already in the tree.
+ */
+static struct __random_state *rand3_state;
+
+static void
+initialize_rand3(void)
+{
+	int error;
+
+	rand3_state = allocatestate(TYPE_3);
+	error = initstate_r(rand3_state, 1, rand3_state->rst_randtbl, BREAK_3);
+	assert(error == 0);
+}
+
+int
+rand(void)
+{
+	if (rand3_state == NULL)
+		initialize_rand3();
+	return ((int)random_r(rand3_state));
+}
+
+void
+srand(unsigned seed)
+{
+	if (rand3_state == NULL)
+		initialize_rand3();
+	srandom_r(rand3_state, seed);
+}
+
+/*
+ * FreeBSD 12 and prior compatibility implementation of rand(3).
+ */
 static int
 do_rand(unsigned long *ctx)
 {
 /*
  * Compute x = (7^5 * x) mod (2^31 - 1)
  * without overflowing 31 bits:
  *      (2^31 - 1) = 127773 * (7^5) + 2836
  * From "Random number generators: good ones are hard to find",
  * Park and Miller, Communications of the ACM, vol. 31, no. 10,
  * October 1988, p. 1195.
  */
 	long hi, lo, x;
 
 	/* Transform to [1, 0x7ffffffe] range. */
 	x = (*ctx % 0x7ffffffe) + 1;
 	hi = x / 127773;
 	lo = x % 127773;
 	x = 16807 * lo - 2836 * hi;
 	if (x < 0)
 		x += 0x7fffffff;
 	/* Transform to [0, 0x7ffffffd] range. */
 	x--;
 	*ctx = x;
 	return (x);
 }
 
-
+/*
+ * Can't fix this garbage; too little state.
+ */
 int
 rand_r(unsigned *ctx)
 {
 	u_long val;
 	int r;
 
 	val = *ctx;
 	r = do_rand(&val);
 	*ctx = (unsigned)val;
 	return (r);
 }
 
-
 static u_long next = 1;
 
+int __rand_fbsd12(void);
 int
-rand(void)
+__rand_fbsd12(void)
 {
 	return (do_rand(&next));
 }
+__sym_compat(rand, __rand_fbsd12, FBSD_1.0);
 
+void __srand_fbsd12(unsigned seed);
 void
-srand(unsigned seed)
+__srand_fbsd12(unsigned seed)
 {
 	next = seed;
 }
-
+__sym_compat(srand, __srand_fbsd12, FBSD_1.0);
 
 void __sranddev_fbsd12(void);
 void
 __sranddev_fbsd12(void)
 {
 	static bool warned = false;
 
 	if (!warned) {
 		syslog(LOG_DEBUG, "Deprecated function sranddev() called");
 		warned = true;
 	}
 }
 __sym_compat(sranddev, __sranddev_fbsd12, FBSD_1.0);
Index: projects/clang1000-import/lib/libc/stdlib/random.3
===================================================================
--- projects/clang1000-import/lib/libc/stdlib/random.3	(revision 357389)
+++ projects/clang1000-import/lib/libc/stdlib/random.3	(revision 357390)
@@ -1,182 +1,178 @@
 .\" Copyright (c) 1983, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"     @(#)random.3	8.1 (Berkeley) 6/4/93
 .\" $FreeBSD$
 .\"
-.Dd January 20, 2020
+.Dd February 1, 2020
 .Dt RANDOM 3
 .Os
 .Sh NAME
 .Nm random ,
 .Nm srandom ,
 .Nm srandomdev ,
 .Nm initstate ,
 .Nm setstate
 .Nd non-cryptographic pseudorandom number generator; routines for changing generators
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
 .In stdlib.h
 .Ft long
 .Fn random void
 .Ft void
 .Fn srandom "unsigned int seed"
 .Ft void
 .Fn srandomdev void
 .Ft char *
 .Fn initstate "unsigned int seed" "char *state" "size_t n"
 .Ft char *
 .Fn setstate "char *state"
 .Sh DESCRIPTION
 .Bf -symbolic
 The functions described in this manual page are not secure.
 Applications which require unpredictable random numbers should use
 .Xr arc4random 3
 instead.
 .Ef
 .Pp
 Unless initialized with less than 32 bytes of state, the
 .Fn random
 function
 uses a non-linear additive feedback random number generator employing a
 default table of size 31 long integers to return successive pseudo-random
 numbers in the range from 0 to
 .if t 2\u\s731\s10\d\(mi1.
 .if n (2**31)\(mi1.
 The period of this random number generator is very large, approximately
 .if t 16\(mu(2\u\s731\s10\d\(mi1).
 .if n 16*((2**31)\(mi1).
 .Pp
 If initialized with less than 32 bytes of state,
 .Fn random
-uses the same poor-quality Park-Miller LCG as
-.Xr rand 3 .
+uses the poor-quality 32-bit Park-Miller LCG.
 .Pp
 The
 .Fn random
 and
 .Fn srandom
 functions are analagous to
 .Xr rand 3
 and
 .Xr srand 3 .
-The difference is that
-.Xr rand 3
-is a worse pseudo-random number generator.
 .Pp
 Like
 .Xr rand 3 ,
 .Fn random
 is implicitly initialized as if
 .Fn srandom "1"
 had been invoked explicitly.
 .Pp
 The
 .Fn srandomdev
 routine initializes the state array using random numbers obtained from the
 kernel.
 This can generate states which are impossible to reproduce by calling
 .Fn srandom ,
 because the succeeding terms in the state buffer are no longer derived from the
 Park-Miller LCG algorithm applied to a fixed seed.
 .Pp
 The
 .Fn initstate
 routine initializes the provided state array of
 .Vt uint32_t
 values and uses it in future
 .Fn random
 invocations.
 (Despite the
 .Vt char *
 type of
 .Fa state ,
 the underlying object must be a naturally aligned array of 32-bit values.)
 The size of the state array (in bytes) is used by
 .Fn initstate
 to decide how sophisticated a random number generator it should use \(em the
 more state, the better the random numbers will be.
 (Current "optimal" values for the amount of state information are
 8, 32, 64, 128, and 256 bytes; other amounts will be rounded down to
 the nearest known amount.
 Using less than 8 bytes will cause an error.)
 The
 .Fa seed
 is used as in
 .Fn srandom .
 The
 .Fn initstate
 function
 returns a pointer to the previous state information array.
 .Pp
 The
 .Fn setstate
 routine switches
 .Fn random
 to using the provided state.
 It returns a pointer to the previous state.
 .Pp
 Once a state array has been initialized, it may be restarted at a
 different point either by calling
 .Fn initstate
 (with the desired seed, the state array, and its size) or by calling
 both
 .Fn setstate
 (with the state array) and
 .Fn srandom
 (with the desired seed).
 The advantage of calling both
 .Fn setstate
 and
 .Fn srandom
 is that the size of the state array does not have to be remembered after
 it is initialized.
 .Pp
 With 256 bytes of state information, the period of the random number
 generator is greater than
 .if t 2\u\s769\s10\d,
 .if n 2**69
 which should be sufficient for most purposes.
 .Sh DIAGNOSTICS
 If
 .Fn initstate
 is called with less than 8 bytes of state information, or if
 .Fn setstate
 detects that the state information has been garbled,
 NULL is returned.
 .Sh SEE ALSO
 .Xr arc4random 3 ,
 .Xr lrand48 3 ,
 .Xr rand 3 ,
 .Xr random 4
 .Sh HISTORY
 These
 functions appeared in
 .Bx 4.2 .
 .Sh AUTHORS
 .An Earl T. Cohen
Index: projects/clang1000-import/lib/libc/stdlib/random.c
===================================================================
--- projects/clang1000-import/lib/libc/stdlib/random.c	(revision 357389)
+++ projects/clang1000-import/lib/libc/stdlib/random.c	(revision 357390)
@@ -1,526 +1,507 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1983, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if defined(LIBC_SCCS) && !defined(lint)
 static char sccsid[] = "@(#)random.c	8.2 (Berkeley) 5/19/95";
 #endif /* LIBC_SCCS and not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "namespace.h"
 #include <sys/param.h>
 #include <sys/sysctl.h>
 #include <errno.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include "un-namespace.h"
 
 #include "random.h"
 
 /*
  * random.c:
  *
  * An improved random number generation package.  In addition to the standard
  * rand()/srand() like interface, this package also has a special state info
  * interface.  The initstate() routine is called with a seed, an array of
  * bytes, and a count of how many bytes are being passed in; this array is
  * then initialized to contain information for random number generation with
  * that much state information.  Good sizes for the amount of state
  * information are 32, 64, 128, and 256 bytes.  The state can be switched by
  * calling the setstate() routine with the same array as was initiallized
  * with initstate().  By default, the package runs with 128 bytes of state
  * information and generates far better random numbers than a linear
  * congruential generator.  If the amount of state information is less than
  * 32 bytes, a simple linear congruential R.N.G. is used.
  *
  * Internally, the state information is treated as an array of uint32_t's; the
  * zeroeth element of the array is the type of R.N.G. being used (small
  * integer); the remainder of the array is the state information for the
  * R.N.G.  Thus, 32 bytes of state information will give 7 ints worth of
  * state information, which will allow a degree seven polynomial.  (Note:
  * the zeroeth word of state information also has some other information
  * stored in it -- see setstate() for details).
  *
  * The random number generation technique is a linear feedback shift register
  * approach, employing trinomials (since there are fewer terms to sum up that
  * way).  In this approach, the least significant bit of all the numbers in
  * the state table will act as a linear feedback shift register, and will
  * have period 2^deg - 1 (where deg is the degree of the polynomial being
  * used, assuming that the polynomial is irreducible and primitive).  The
  * higher order bits will have longer periods, since their values are also
  * influenced by pseudo-random carries out of the lower bits.  The total
  * period of the generator is approximately deg*(2**deg - 1); thus doubling
  * the amount of state information has a vast influence on the period of the
  * generator.  Note: the deg*(2**deg - 1) is an approximation only good for
  * large deg, when the period of the shift is the dominant factor.
  * With deg equal to seven, the period is actually much longer than the
  * 7*(2**7 - 1) predicted by this formula.
  *
  * Modified 28 December 1994 by Jacob S. Rosenberg.
  * The following changes have been made:
  * All references to the type u_int have been changed to unsigned long.
  * All references to type int have been changed to type long.  Other
  * cleanups have been made as well.  A warning for both initstate and
  * setstate has been inserted to the effect that on Sparc platforms
  * the 'arg_state' variable must be forced to begin on word boundaries.
  * This can be easily done by casting a long integer array to char *.
  * The overall logic has been left STRICTLY alone.  This software was
  * tested on both a VAX and Sun SpacsStation with exactly the same
  * results.  The new version and the original give IDENTICAL results.
  * The new version is somewhat faster than the original.  As the
  * documentation says:  "By default, the package runs with 128 bytes of
  * state information and generates far better random numbers than a linear
  * congruential generator.  If the amount of state information is less than
  * 32 bytes, a simple linear congruential R.N.G. is used."  For a buffer of
  * 128 bytes, this new version runs about 19 percent faster and for a 16
  * byte buffer it is about 5 percent faster.
  */
 
-/*
- * For each of the currently supported random number generators, we have a
- * break value on the amount of state information (you need at least this
- * many bytes of state info to support this random number generator), a degree
- * for the polynomial (actually a trinomial) that the R.N.G. is based on, and
- * the separation between the two lower order coefficients of the trinomial.
- */
-#define	TYPE_0		0		/* linear congruential */
-#define	BREAK_0		8
-#define	DEG_0		0
-#define	SEP_0		0
-
-#define	TYPE_1		1		/* x**7 + x**3 + 1 */
-#define	BREAK_1		32
-#define	DEG_1		7
-#define	SEP_1		3
-
-#define	TYPE_2		2		/* x**15 + x + 1 */
-#define	BREAK_2		64
-#define	DEG_2		15
-#define	SEP_2		1
-
-#define	TYPE_3		3		/* x**31 + x**3 + 1 */
-#define	BREAK_3		128
-#define	DEG_3		31
-#define	SEP_3		3
-
-#define	TYPE_4		4		/* x**63 + x + 1 */
-#define	BREAK_4		256
-#define	DEG_4		63
-#define	SEP_4		1
-
-/*
- * Array versions of the above information to make code run faster --
- * relies on fact that TYPE_i == i.
- */
-#define	MAX_TYPES	5		/* max number of types above */
-
 #define NSHUFF 50       /* to drop some "seed -> 1st value" linearity */
 
 static const int degrees[MAX_TYPES] =	{ DEG_0, DEG_1, DEG_2, DEG_3, DEG_4 };
-static const int seps [MAX_TYPES] =	{ SEP_0, SEP_1, SEP_2, SEP_3, SEP_4 };
+static const int seps[MAX_TYPES] =	{ SEP_0, SEP_1, SEP_2, SEP_3, SEP_4 };
+static const int breaks[MAX_TYPES] = {
+	BREAK_0, BREAK_1, BREAK_2, BREAK_3, BREAK_4
+};
 
 /*
  * Initially, everything is set up as if from:
  *
  *	initstate(1, randtbl, 128);
  *
  * Note that this initialization takes advantage of the fact that srandom()
  * advances the front and rear pointers 10*rand_deg times, and hence the
  * rear pointer which starts at 0 will also end up at zero; thus the zeroeth
  * element of the state information, which contains info about the current
  * position of the rear pointer is just
  *
  *	MAX_TYPES * (rptr - state) + TYPE_3 == TYPE_3.
  */
 static struct __random_state implicit = {
 	.rst_randtbl = {
 		TYPE_3,
 		0x2cf41758, 0x27bb3711, 0x4916d4d1, 0x7b02f59f, 0x9b8e28eb, 0xc0e80269,
 		0x696f5c16, 0x878f1ff5, 0x52d9c07f, 0x916a06cd, 0xb50b3a20, 0x2776970a,
 		0xee4eb2a6, 0xe94640ec, 0xb1d65612, 0x9d1ed968, 0x1043f6b7, 0xa3432a76,
 		0x17eacbb9, 0x3c09e2eb, 0x4f8c2b3,  0x708a1f57, 0xee341814, 0x95d0e4d2,
 		0xb06f216c, 0x8bd2e72e, 0x8f7c38d7, 0xcfc6a8fc, 0x2a59495,  0xa20d2a69,
 		0xe29d12d1
 	},
 
 	/*
 	 * fptr and rptr are two pointers into the state info, a front and a rear
 	 * pointer.  These two pointers are always rand_sep places aparts, as they
 	 * cycle cyclically through the state information.  (Yes, this does mean we
 	 * could get away with just one pointer, but the code for random() is more
 	 * efficient this way).  The pointers are left positioned as they would be
 	 * from the call
 	 *
 	 *	initstate(1, randtbl, 128);
 	 *
 	 * (The position of the rear pointer, rptr, is really 0 (as explained above
 	 * in the initialization of randtbl) because the state table pointer is set
 	 * to point to randtbl[1] (as explained below).
 	 */
 	.rst_fptr = &implicit.rst_randtbl[SEP_3 + 1],
 	.rst_rptr = &implicit.rst_randtbl[1],
 
 	/*
 	 * The following things are the pointer to the state information table, the
 	 * type of the current generator, the degree of the current polynomial being
 	 * used, and the separation between the two pointers.  Note that for efficiency
 	 * of random(), we remember the first location of the state information, not
 	 * the zeroeth.  Hence it is valid to access state[-1], which is used to
 	 * store the type of the R.N.G.  Also, we remember the last location, since
 	 * this is more efficient than indexing every time to find the address of
 	 * the last element to see if the front and rear pointers have wrapped.
 	 */
 	.rst_state = &implicit.rst_randtbl[1],
 	.rst_type = TYPE_3,
 	.rst_deg = DEG_3,
 	.rst_sep = SEP_3,
 	.rst_end_ptr = &implicit.rst_randtbl[DEG_3 + 1],
 };
 
 /*
  * This is the same low quality PRNG used in rand(3) in FreeBSD 12 and prior.
  * It may be sufficient for distributing bits and expanding a small seed
  * integer into a larger state.
  */
 static inline uint32_t
 parkmiller32(uint32_t ctx)
 {
 /*
  * Compute x = (7^5 * x) mod (2^31 - 1)
  * wihout overflowing 31 bits:
  *      (2^31 - 1) = 127773 * (7^5) + 2836
  * From "Random number generators: good ones are hard to find",
  * Park and Miller, Communications of the ACM, vol. 31, no. 10,
  * October 1988, p. 1195.
  */
 	int32_t hi, lo, x;
 
 	/* Transform to [1, 0x7ffffffe] range. */
 	x = (ctx % 0x7ffffffe) + 1;
 	hi = x / 127773;
 	lo = x % 127773;
 	x = 16807 * lo - 2836 * hi;
 	if (x < 0)
 		x += 0x7fffffff;
 	/* Transform to [0, 0x7ffffffd] range. */
 	return (x - 1);
 }
 
 /*
  * srandom:
  *
  * Initialize the random number generator based on the given seed.  If the
  * type is the trivial no-state-information type, just remember the seed.
  * Otherwise, initializes state[] based on the given "seed" via a linear
  * congruential generator.  Then, the pointers are set to known locations
  * that are exactly rand_sep places apart.  Lastly, it cycles the state
  * information a given number of times to get rid of any initial dependencies
  * introduced by the L.C.R.N.G.  Note that the initialization of randtbl[]
  * for default usage relies on values produced by this routine.
  */
 void
 srandom_r(struct __random_state *estate, unsigned x)
 {
 	int i, lim;
 
 	estate->rst_state[0] = (uint32_t)x;
 	if (estate->rst_type == TYPE_0)
 		lim = NSHUFF;
 	else {
 		for (i = 1; i < estate->rst_deg; i++)
 			estate->rst_state[i] =
 			    parkmiller32(estate->rst_state[i - 1]);
 		estate->rst_fptr = &estate->rst_state[estate->rst_sep];
 		estate->rst_rptr = &estate->rst_state[0];
 		lim = 10 * estate->rst_deg;
 	}
 	for (i = 0; i < lim; i++)
 		(void)random_r(estate);
 }
 
 void
 srandom(unsigned x)
 {
 	srandom_r(&implicit, x);
 }
 
 /*
  * srandomdev:
  *
  * Many programs choose the seed value in a totally predictable manner.
  * This often causes problems.  We seed the generator using pseudo-random
  * data from the kernel.
  *
  * Note that this particular seeding procedure can generate states
  * which are impossible to reproduce by calling srandom() with any
  * value, since the succeeding terms in the state buffer are no longer
  * derived from the LC algorithm applied to a fixed seed.
  */
 void
 srandomdev_r(struct __random_state *estate)
 {
 	int mib[2];
 	size_t expected, len;
 
 	if (estate->rst_type == TYPE_0)
 		len = sizeof(estate->rst_state[0]);
 	else
 		len = estate->rst_deg * sizeof(estate->rst_state[0]);
 	expected = len;
 
 	mib[0] = CTL_KERN;
 	mib[1] = KERN_ARND;
 	if (sysctl(mib, 2, estate->rst_state, &len, NULL, 0) == -1 ||
 	    len != expected) {
 		/*
 		 * The sysctl cannot fail. If it does fail on some FreeBSD
 		 * derivative or after some future change, just abort so that
 		 * the problem will be found and fixed. abort is not normally
 		 * suitable for a library but makes sense here.
 		 */
 		abort();
 	}
 
 	if (estate->rst_type != TYPE_0) {
 		estate->rst_fptr = &estate->rst_state[estate->rst_sep];
 		estate->rst_rptr = &estate->rst_state[0];
 	}
 }
 
 void
 srandomdev(void)
 {
 	srandomdev_r(&implicit);
 }
 
 /*
  * initstate_r:
  *
  * Initialize the state information in the given array of n bytes for future
  * random number generation.  Based on the number of bytes we are given, and
  * the break values for the different R.N.G.'s, we choose the best (largest)
  * one we can and set things up for it.  srandom() is then called to
  * initialize the state information.
  *
  * Returns zero on success, or an error number on failure.
  *
  * Note: There is no need for a setstate_r(); just use a new context.
  */
 int
 initstate_r(struct __random_state *estate, unsigned seed, uint32_t *arg_state,
     size_t sz)
 {
 	if (sz < BREAK_0)
 		return (EINVAL);
 
 	if (sz < BREAK_1) {
 		estate->rst_type = TYPE_0;
 		estate->rst_deg = DEG_0;
 		estate->rst_sep = SEP_0;
 	} else if (sz < BREAK_2) {
 		estate->rst_type = TYPE_1;
 		estate->rst_deg = DEG_1;
 		estate->rst_sep = SEP_1;
 	} else if (sz < BREAK_3) {
 		estate->rst_type = TYPE_2;
 		estate->rst_deg = DEG_2;
 		estate->rst_sep = SEP_2;
 	} else if (sz < BREAK_4) {
 		estate->rst_type = TYPE_3;
 		estate->rst_deg = DEG_3;
 		estate->rst_sep = SEP_3;
 	} else {
 		estate->rst_type = TYPE_4;
 		estate->rst_deg = DEG_4;
 		estate->rst_sep = SEP_4;
 	}
 	estate->rst_state = arg_state + 1;
 	estate->rst_end_ptr = &estate->rst_state[estate->rst_deg];
 	srandom_r(estate, seed);
 	return (0);
 }
 
 /*
  * initstate:
  *
  * Note: the first thing we do is save the current state, if any, just like
  * setstate() so that it doesn't matter when initstate is called.
  *
  * Note that on return from initstate_r(), we set state[-1] to be the type
  * multiplexed with the current value of the rear pointer; this is so
  * successive calls to initstate() won't lose this information and will be able
  * to restart with setstate().
  *
  * Returns a pointer to the old state.
  *
  * Despite the misleading "char *" type, arg_state must alias an array of
  * 32-bit unsigned integer values.  Naturally, such an array is 32-bit aligned.
  * Usually objects are naturally aligned to at least 32-bits on all platforms,
  * but if you treat the provided 'state' as char* you may inadvertently
  * misalign it.  Don't do that.
  */
 char *
 initstate(unsigned int seed, char *arg_state, size_t n)
 {
 	char *ostate = (char *)(&implicit.rst_state[-1]);
 	uint32_t *int_arg_state = (uint32_t *)arg_state;
 	int error;
 
 	/*
 	 * Persist rptr offset and rst_type in the first word of the prior
 	 * state we are replacing.
 	 */
 	if (implicit.rst_type == TYPE_0)
 		implicit.rst_state[-1] = implicit.rst_type;
 	else
 		implicit.rst_state[-1] = MAX_TYPES *
 		    (implicit.rst_rptr - implicit.rst_state) +
 		    implicit.rst_type;
 
 	error = initstate_r(&implicit, seed, int_arg_state, n);
 	if (error != 0)
 		return (NULL);
 
 	/*
 	 * Persist rptr offset and rst_type of the new state in its first word.
 	 */
 	if (implicit.rst_type == TYPE_0)
 		int_arg_state[0] = implicit.rst_type;
 	else
 		int_arg_state[0] = MAX_TYPES *
 		    (implicit.rst_rptr - implicit.rst_state) +
 		    implicit.rst_type;
 
 	return (ostate);
 }
 
 /*
  * setstate:
  *
  * Restore the state from the given state array.
  *
  * Note: it is important that we also remember the locations of the pointers
  * in the current state information, and restore the locations of the pointers
  * from the old state information.  This is done by multiplexing the pointer
  * location into the zeroeth word of the state information.
  *
  * Note that due to the order in which things are done, it is OK to call
  * setstate() with the same state as the current state.
  *
  * Returns a pointer to the old state information.
  *
  * Note: The Sparc platform requires that arg_state begin on an int
  * word boundary; otherwise a bus error will occur. Even so, lint will
  * complain about mis-alignment, but you should disregard these messages.
  */
 char *
 setstate(char *arg_state)
 {
 	uint32_t *new_state = (uint32_t *)arg_state;
 	uint32_t type = new_state[0] % MAX_TYPES;
 	uint32_t rear = new_state[0] / MAX_TYPES;
 	char *ostate = (char *)(&implicit.rst_state[-1]);
 
 	if (type != TYPE_0 && rear >= degrees[type])
 		return (NULL);
 	if (implicit.rst_type == TYPE_0)
 		implicit.rst_state[-1] = implicit.rst_type;
 	else
 		implicit.rst_state[-1] = MAX_TYPES *
 		    (implicit.rst_rptr - implicit.rst_state) +
 		    implicit.rst_type;
 	implicit.rst_type = type;
 	implicit.rst_deg = degrees[type];
 	implicit.rst_sep = seps[type];
 	implicit.rst_state = new_state + 1;
 	if (implicit.rst_type != TYPE_0) {
 		implicit.rst_rptr = &implicit.rst_state[rear];
 		implicit.rst_fptr = &implicit.rst_state[
 		    (rear + implicit.rst_sep) % implicit.rst_deg];
 	}
 	implicit.rst_end_ptr = &implicit.rst_state[implicit.rst_deg];
 	return (ostate);
 }
 
 /*
  * random:
  *
  * If we are using the trivial TYPE_0 R.N.G., just do the old linear
  * congruential bit.  Otherwise, we do our fancy trinomial stuff, which is
  * the same in all the other cases due to all the global variables that have
  * been set up.  The basic operation is to add the number at the rear pointer
  * into the one at the front pointer.  Then both pointers are advanced to
  * the next location cyclically in the table.  The value returned is the sum
  * generated, reduced to 31 bits by throwing away the "least random" low bit.
  *
  * Note: the code takes advantage of the fact that both the front and
  * rear pointers can't wrap on the same call by not testing the rear
  * pointer if the front one has wrapped.
  *
  * Returns a 31-bit random number.
  */
 long
 random_r(struct __random_state *estate)
 {
 	uint32_t i;
 	uint32_t *f, *r;
 
 	if (estate->rst_type == TYPE_0) {
 		i = estate->rst_state[0];
 		i = parkmiller32(i);
 		estate->rst_state[0] = i;
 	} else {
 		/*
 		 * Use local variables rather than static variables for speed.
 		 */
 		f = estate->rst_fptr;
 		r = estate->rst_rptr;
 		*f += *r;
 		i = *f >> 1;	/* chucking least random bit */
 		if (++f >= estate->rst_end_ptr) {
 			f = estate->rst_state;
 			++r;
 		}
 		else if (++r >= estate->rst_end_ptr) {
 			r = estate->rst_state;
 		}
 
 		estate->rst_fptr = f;
 		estate->rst_rptr = r;
 	}
 	return ((long)i);
 }
 
 long
 random(void)
 {
 	return (random_r(&implicit));
+}
+
+struct __random_state *
+allocatestate(unsigned type)
+{
+	size_t asize;
+
+	/* No point using this interface to get the Park-Miller LCG. */
+	if (type < TYPE_1)
+		abort();
+	/* Clamp to widest supported variant. */
+	if (type > (MAX_TYPES - 1))
+		type = (MAX_TYPES - 1);
+
+	asize = sizeof(struct __random_state) + (size_t)breaks[type];
+	return (malloc(asize));
 }
Index: projects/clang1000-import/lib/libc/stdlib/random.h
===================================================================
--- projects/clang1000-import/lib/libc/stdlib/random.h	(revision 357389)
+++ projects/clang1000-import/lib/libc/stdlib/random.h	(revision 357390)
@@ -1,46 +1,85 @@
 /*-
  * Copyright 2020 Conrad Meyer <cem@FreeBSD.org>.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #pragma once
 
+/*
+ * For each of the currently supported random number generators, we have a
+ * break value on the amount of state information (you need at least this
+ * many bytes of state info to support this random number generator), a degree
+ * for the polynomial (actually a trinomial) that the R.N.G. is based on, and
+ * the separation between the two lower order coefficients of the trinomial.
+ */
+#define	TYPE_0		0		/* linear congruential */
+#define	BREAK_0		8
+#define	DEG_0		0
+#define	SEP_0		0
+
+#define	TYPE_1		1		/* x**7 + x**3 + 1 */
+#define	BREAK_1		32
+#define	DEG_1		7
+#define	SEP_1		3
+
+#define	TYPE_2		2		/* x**15 + x + 1 */
+#define	BREAK_2		64
+#define	DEG_2		15
+#define	SEP_2		1
+
+#define	TYPE_3		3		/* x**31 + x**3 + 1 */
+#define	BREAK_3		128
+#define	DEG_3		31
+#define	SEP_3		3
+
+#define	TYPE_4		4		/* x**63 + x + 1 */
+#define	BREAK_4		256
+#define	DEG_4		63
+#define	SEP_4		1
+
+/*
+ * Array versions of the above information to make code run faster --
+ * relies on fact that TYPE_i == i.
+ */
+#define	MAX_TYPES	5		/* max number of types above */
+
 /* A full instance of the random(3) generator. */
 struct __random_state {
 	uint32_t	*rst_fptr;
 	uint32_t	*rst_rptr;
 	uint32_t	*rst_state;
 	int		rst_type;
 	int		rst_deg;
 	int		rst_sep;
 	uint32_t	*rst_end_ptr;
 	/* Flexible array member must be last. */
 	uint32_t	rst_randtbl[];
 };
 
+struct __random_state *allocatestate(unsigned type);
 int initstate_r(struct __random_state *, unsigned, uint32_t *, size_t);
 long random_r(struct __random_state *);
 void srandom_r(struct __random_state *, unsigned);
 void srandomdev_r(struct __random_state *);
Index: projects/clang1000-import/share/man/man4/hwpstate_intel.4
===================================================================
--- projects/clang1000-import/share/man/man4/hwpstate_intel.4	(revision 357389)
+++ projects/clang1000-import/share/man/man4/hwpstate_intel.4	(revision 357390)
@@ -1,89 +1,97 @@
 .\"
 .\" Copyright (c) 2019 Intel Corporation
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd January 22, 2020
+.Dd February 1, 2020
 .Dt HWPSTATE_INTEL 4
 .Os
 .Sh NAME
 .Nm hwpstate_intel
 .Nd Intel Speed Shift Technology driver
 .Sh SYNOPSIS
 To compile this driver into your kernel
 place the following line in your kernel
 configuration file:
 .Bd -ragged -offset indent
 .Cd "device cpufreq"
 .Ed
 .Sh DESCRIPTION
 The
 .Nm
 driver provides support for hardware-controlled performance states on Intel
 platforms, also known as Intel Speed Shift Technology.
 .Sh LOADER TUNABLES
 .Bl -tag -width indent
 .It Va hint.hwpstate_intel.0.disabled
 Can be used to disable
 .Nm ,
 allowing other compatible drivers to manage performance states, like
 .Xr est 4 .
-.Pq default 0
+Defaults to
+.Dv Qq 0
+(enabled).
+.It Va machdep.hwpstate_pkg_ctrl
+Selects between package-level control (the default) and per-core control.
+.Dv Qq 1
+selects package-level control and
+.Dv Qq 0
+selects core-level control.
 .El
 .Sh SYSCTL VARIABLES
 The following
 .Xr sysctl 8
 values are available
 .Bl -tag -width indent
 .It Va dev.hwpstate_intel.%d.\%desc
 Describes the attached driver
 .It dev.hwpstate_intel.0.%desc: Intel Speed Shift
 .It Va dev.hwpstate_intel.%d.\%driver
 Driver in use, always hwpstate_intel.
 .It dev.hwpstate_intel.0.%driver: hwpstate_intel
 .It Va dev.hwpstate_intel.%d.\%parent
 .It dev.hwpstate_intel.0.%parent: cpu0
 The cpu that is exposing these frequencies.
 For example
 .Va cpu0 .
 .It Va dev.hwpstate_intel.%d.epp
 Energy/Performance Preference.
 Valid values range from 0 to 100.
 Setting this field conveys a hint to the hardware regarding a preference towards
 performance (at value 0), energy efficiency (at value 100), or somewhere in
 between.
 .It dev.hwpstate_intel.0.epp: 0
 .El
 .Sh COMPATIBILITY
 .Nm
 is only found on supported Intel CPUs.
 .Sh SEE ALSO
 .Xr cpufreq 4
 .Rs
 .%T "Intel 64 and IA-32 Architectures Software Developer Manuals"
 .%U "http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html"
 .Re
 .Sh AUTHORS
 This manual page was written by
 .An D Scott Phillips Aq Mt scottph@FreeBSD.org .
Index: projects/clang1000-import/sys/compat/linux/linux_getcwd.c
===================================================================
--- projects/clang1000-import/sys/compat/linux/linux_getcwd.c	(revision 357389)
+++ projects/clang1000-import/sys/compat/linux/linux_getcwd.c	(revision 357390)
@@ -1,87 +1,84 @@
 /* $OpenBSD: linux_getcwd.c,v 1.2 2001/05/16 12:50:21 ho Exp $ */
 /* $NetBSD: vfs_getcwd.c,v 1.3.2.3 1999/07/11 10:24:09 sommerfeld Exp $ */
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-NetBSD
  *
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
  * Copyright (c) 2015 The FreeBSD Foundation
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Bill Sommerfeld.
  *
  * Portions of this software were developed by Edward Tomasz Napierala
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/syscallsubr.h>
+#include <sys/vnode.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 
 #ifdef COMPAT_LINUX32
 #include <machine/../linux32/linux.h>
 #include <machine/../linux32/linux32_proto.h>
 #else
 #include <machine/../linux/linux.h>
 #include <machine/../linux/linux_proto.h>
 #endif
 #include <compat/linux/linux_misc.h>
 #include <compat/linux/linux_util.h>
 
 /*
  * Find pathname of process's current directory.
  */
 int
-linux_getcwd(struct thread *td, struct linux_getcwd_args *args)
+linux_getcwd(struct thread *td, struct linux_getcwd_args *uap)
 {
-	char *path;
-	int error, lenused;
+	char *buf, *retbuf;
+	size_t buflen;
+	int error;
 
-	/*
-	 * Linux returns ERANGE instead of EINVAL.
-	 */
-	if (args->bufsize < 2)
+	buflen = uap->bufsize;
+	if (__predict_false(buflen < 2))
 		return (ERANGE);
+	if (buflen > LINUX_PATH_MAX)
+		buflen = LINUX_PATH_MAX;
 
-	path = malloc(LINUX_PATH_MAX, M_LINUX, M_WAITOK);
-
-	error = kern___getcwd(td, path, UIO_SYSSPACE, args->bufsize,
-	    LINUX_PATH_MAX);
+	buf = malloc(buflen, M_TEMP, M_WAITOK);
+	error = vn_getcwd(td, buf, &retbuf, &buflen);
 	if (error == 0) {
-		lenused = strlen(path) + 1;
-		error = copyout(path, args->buf, lenused);
+		error = copyout(retbuf, uap->buf, buflen);
 		if (error == 0)
-			td->td_retval[0] = lenused;
+			td->td_retval[0] = buflen;
 	}
-
-	free(path, M_LINUX);
+	free(buf, M_TEMP);
 	return (error);
 }
Index: projects/clang1000-import/sys/dev/tpm/tpm_crb.c
===================================================================
--- projects/clang1000-import/sys/dev/tpm/tpm_crb.c	(revision 357389)
+++ projects/clang1000-import/sys/dev/tpm/tpm_crb.c	(revision 357390)
@@ -1,420 +1,421 @@
 /*-
  * Copyright (c) 2018 Stormshield.
  * Copyright (c) 2018 Semihalf.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "tpm20.h"
 
 /*
  * CRB register space as defined in
  * TCG_PC_Client_Platform_TPM_Profile_PTP_2.0_r1.03_v22
  */
 #define	TPM_LOC_STATE			0x0
 #define	TPM_LOC_CTRL			0x8
 #define	TPM_LOC_STS			0xC
 #define	TPM_CRB_INTF_ID			0x30
 #define	TPM_CRB_CTRL_EXT		0x38
 #define	TPM_CRB_CTRL_REQ		0x40
 #define	TPM_CRB_CTRL_STS		0x44
 #define	TPM_CRB_CTRL_CANCEL		0x48
 #define	TPM_CRB_CTRL_START		0x4C
 #define	TPM_CRB_INT_ENABLE		0x50
 #define	TPM_CRB_INT_STS			0x54
 #define	TPM_CRB_CTRL_CMD_SIZE		0x58
 #define	TPM_CRB_CTRL_CMD_LADDR		0x5C
 #define	TPM_CRB_CTRL_CMD_HADDR		0x60
 #define	TPM_CRB_CTRL_RSP_SIZE		0x64
 #define	TPM_CRB_CTRL_RSP_ADDR		0x68
 #define	TPM_CRB_CTRL_RSP_HADDR		0x6c
 #define	TPM_CRB_DATA_BUFFER		0x80
 
 #define	TPM_LOC_STATE_ESTB		BIT(0)
 #define	TPM_LOC_STATE_ASSIGNED		BIT(1)
 #define	TPM_LOC_STATE_ACTIVE_MASK	0x9C
 #define	TPM_LOC_STATE_VALID		BIT(7)
 
 #define	TPM_CRB_INTF_ID_TYPE_CRB	0x1
 #define	TPM_CRB_INTF_ID_TYPE		0x7
 
 #define	TPM_LOC_CTRL_REQUEST		BIT(0)
 #define	TPM_LOC_CTRL_RELINQUISH		BIT(1)
 
 #define	TPM_CRB_CTRL_REQ_GO_READY	BIT(0)
 #define	TPM_CRB_CTRL_REQ_GO_IDLE	BIT(1)
 
 #define	TPM_CRB_CTRL_STS_ERR_BIT	BIT(0)
 #define	TPM_CRB_CTRL_STS_IDLE_BIT	BIT(1)
 
-#define	TPM_CRB_CTRL_CANCEL_CMD		BIT(0)
+#define	TPM_CRB_CTRL_CANCEL_CMD		0x1
+#define	TPM_CRB_CTRL_CANCEL_CLEAR	0x0
 
 #define	TPM_CRB_CTRL_START_CMD		BIT(0)
 
 #define	TPM_CRB_INT_ENABLE_BIT		BIT(31)
 
 struct tpmcrb_sc {
 	struct tpm_sc	base;
 	bus_size_t	cmd_off;
 	bus_size_t	rsp_off;
 	size_t		cmd_buf_size;
 	size_t		rsp_buf_size;
 };
 
 
 int tpmcrb_transmit(struct tpm_sc *sc, size_t size);
 
 static int tpmcrb_acpi_probe(device_t dev);
 static int tpmcrb_attach(device_t dev);
 static int tpmcrb_detach(device_t dev);
 
 static ACPI_STATUS tpmcrb_fix_buff_offsets(ACPI_RESOURCE *res, void *arg);
 
 static bool tpm_wait_for_u32(struct tpm_sc *sc, bus_size_t off,
     uint32_t mask, uint32_t val, int32_t timeout);
 static bool tpmcrb_request_locality(struct tpm_sc *sc, int locality);
 static void tpmcrb_relinquish_locality(struct tpm_sc *sc);
 static bool tpmcrb_cancel_cmd(struct tpm_sc *sc);
 
 char *tpmcrb_ids[] = {"MSFT0101", NULL};
 
 static int
 tpmcrb_acpi_probe(device_t dev)
 {
 	int err;
 	ACPI_TABLE_TPM23 *tbl;
 	ACPI_STATUS status;
 	err = ACPI_ID_PROBE(device_get_parent(dev), dev, tpmcrb_ids, NULL);
 	if (err > 0)
 		return (err);
 	/*Find TPM2 Header*/
 	status = AcpiGetTable(ACPI_SIG_TPM2, 1, (ACPI_TABLE_HEADER **) &tbl);
 	if(ACPI_FAILURE(status) ||
 	   tbl->StartMethod != TPM2_START_METHOD_CRB)
 		err = ENXIO;
 
 	device_set_desc(dev, "Trusted Platform Module 2.0, CRB mode");
 	return (err);
 }
 
 static ACPI_STATUS
 tpmcrb_fix_buff_offsets(ACPI_RESOURCE *res, void *arg)
 {
 	struct tpmcrb_sc *crb_sc;
 	size_t length;
 	uint32_t base_addr;
 
 	crb_sc = (struct tpmcrb_sc *)arg;
 
 	if (res->Type != ACPI_RESOURCE_TYPE_FIXED_MEMORY32)
 		return (AE_OK);
 
 	base_addr = res->Data.FixedMemory32.Address;
 	length = res->Data.FixedMemory32.AddressLength;
 
 	if (crb_sc->cmd_off > base_addr && crb_sc->cmd_off < base_addr + length)
 		crb_sc->cmd_off -= base_addr;
 	if (crb_sc->rsp_off > base_addr && crb_sc->rsp_off < base_addr + length)
 		crb_sc->rsp_off -= base_addr;
 
 	return (AE_OK);
 }
 
 static int
 tpmcrb_attach(device_t dev)
 {
 	struct tpmcrb_sc *crb_sc;
 	struct tpm_sc *sc;
 	ACPI_HANDLE handle;
 	ACPI_STATUS status;
 	int result;
 
 	crb_sc = device_get_softc(dev);
 	sc = &crb_sc->base;
 	handle = acpi_get_handle(dev);
 
 	sc->dev = dev;
 
 	sc->mem_rid = 0;
 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->mem_rid,
 					     RF_ACTIVE);
 	if (sc->mem_res == NULL)
 		return (ENXIO);
 
 	if(!tpmcrb_request_locality(sc, 0)) {
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    sc->mem_rid, sc->mem_res);
 		return (ENXIO);
 	}
 
 	/*
 	 * Disable all interrupts for now, since I don't have a device that
 	 * works in CRB mode and supports them.
 	 */
 	AND4(sc, TPM_CRB_INT_ENABLE, ~TPM_CRB_INT_ENABLE_BIT);
 	sc->interrupts = false;
 
 	/*
 	 * Read addresses of Tx/Rx buffers and their sizes. Note that they
 	 * can be implemented by a single buffer. Also for some reason CMD
 	 * addr is stored in two 4 byte neighboring registers, whereas RSP is
 	 * stored in a single 8 byte one.
 	 */
 #ifdef __amd64__
 	crb_sc->rsp_off = RD8(sc, TPM_CRB_CTRL_RSP_ADDR);
 #else
 	crb_sc->rsp_off = RD4(sc, TPM_CRB_CTRL_RSP_ADDR);
 	crb_sc->rsp_off |= ((uint64_t) RD4(sc, TPM_CRB_CTRL_RSP_HADDR) << 32);
 #endif
 	crb_sc->cmd_off = RD4(sc, TPM_CRB_CTRL_CMD_LADDR);
 	crb_sc->cmd_off |= ((uint64_t) RD4(sc, TPM_CRB_CTRL_CMD_HADDR) << 32);
 	crb_sc->cmd_buf_size = RD4(sc, TPM_CRB_CTRL_CMD_SIZE);
 	crb_sc->rsp_buf_size = RD4(sc, TPM_CRB_CTRL_RSP_SIZE);
 
 	tpmcrb_relinquish_locality(sc);
 
 	/* Emulator returns address in acpi space instead of an offset */
 	status = AcpiWalkResources(handle, "_CRS", tpmcrb_fix_buff_offsets,
 		    (void *)crb_sc);
 	if (ACPI_FAILURE(status)) {
 		tpmcrb_detach(dev);
 		return (ENXIO);
 	}
 
 	if (crb_sc->rsp_off == crb_sc->cmd_off) {
 		/*
 		 * If Tx/Rx buffers are implemented as one they have to be of
 		 * same size
 		 */
 		if (crb_sc->cmd_buf_size != crb_sc->rsp_buf_size) {
 			device_printf(sc->dev,
 			    "Overlapping Tx/Rx buffers have different sizes\n");
 			tpmcrb_detach(dev);
 			return (ENXIO);
 		}
 	}
 
 	sc->transmit = tpmcrb_transmit;
 
 	result = tpm20_init(sc);
 	if (result != 0)
 		tpmcrb_detach(dev);
 
 	return (result);
 }
 
 static int
 tpmcrb_detach(device_t dev)
 {
 	struct tpm_sc *sc;
 
 	sc = device_get_softc(dev);
 	tpm20_release(sc);
 
 	if (sc->mem_res != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    sc->mem_rid, sc->mem_res);
 
 	return (0);
 }
 
 static bool
 tpm_wait_for_u32(struct tpm_sc *sc, bus_size_t off, uint32_t mask, uint32_t val,
     int32_t timeout)
 {
 
 	/* Check for condition */
 	if ((RD4(sc, off) & mask) == val)
 		return (true);
 
 	while (timeout > 0) {
 		if ((RD4(sc, off) & mask) == val)
 			return (true);
 
 		pause("TPM in polling mode", 1);
 		timeout -= tick;
 	}
 	return (false);
 }
 
 static bool
 tpmcrb_request_locality(struct tpm_sc *sc, int locality)
 {
 	uint32_t mask;
 
 	/* Currently we only support Locality 0 */
 	if (locality != 0)
 		return (false);
 
 	mask = TPM_LOC_STATE_VALID | TPM_LOC_STATE_ASSIGNED;
 
 	OR4(sc, TPM_LOC_CTRL, TPM_LOC_CTRL_REQUEST);
 	if (!tpm_wait_for_u32(sc, TPM_LOC_STATE, mask, mask, TPM_TIMEOUT_C))
 		return (false);
 
 	return (true);
 }
 
 static void
 tpmcrb_relinquish_locality(struct tpm_sc *sc)
 {
 
 	OR4(sc, TPM_LOC_CTRL, TPM_LOC_CTRL_RELINQUISH);
 }
 
 static bool
 tpmcrb_cancel_cmd(struct tpm_sc *sc)
 {
 	uint32_t mask = ~0;
 
 	WR4(sc, TPM_CRB_CTRL_CANCEL, TPM_CRB_CTRL_CANCEL_CMD);
 	if (!tpm_wait_for_u32(sc, TPM_CRB_CTRL_START,
 		    mask, ~mask, TPM_TIMEOUT_B)) {
 		device_printf(sc->dev,
 		    "Device failed to cancel command\n");
 		return (false);
 	}
 
-	WR4(sc, TPM_CRB_CTRL_CANCEL, ~TPM_CRB_CTRL_CANCEL_CMD);
+	WR4(sc, TPM_CRB_CTRL_CANCEL, TPM_CRB_CTRL_CANCEL_CLEAR;
 	return (true);
 }
 
 int
 tpmcrb_transmit(struct tpm_sc *sc, size_t length)
 {
 	struct tpmcrb_sc *crb_sc;
 	uint32_t mask, curr_cmd;
 	int timeout, bytes_available;
 
 	crb_sc = (struct tpmcrb_sc *)sc;
 
 	sx_assert(&sc->dev_lock, SA_XLOCKED);
 
 	if (length > crb_sc->cmd_buf_size) {
 		device_printf(sc->dev,
 		    "Requested transfer is bigger than buffer size\n");
 		return (E2BIG);
 	}
 
 	if (RD4(sc, TPM_CRB_CTRL_STS) & TPM_CRB_CTRL_STS_ERR_BIT) {
 		device_printf(sc->dev,
 		    "Device has Error bit set\n");
 		return (EIO);
 	}
 	if (!tpmcrb_request_locality(sc, 0)) {
 		device_printf(sc->dev,
 		    "Failed to obtain locality\n");
 		return (EIO);
 	}
 	/* Clear cancellation bit */
-	WR4(sc, TPM_CRB_CTRL_CANCEL, ~TPM_CRB_CTRL_CANCEL_CMD);
+	WR4(sc, TPM_CRB_CTRL_CANCEL, TPM_CRB_CTRL_CANCEL_CLEAR;
 
 	/* Switch device to idle state if necessary */
 	if (!(RD4(sc, TPM_CRB_CTRL_STS) & TPM_CRB_CTRL_STS_IDLE_BIT)) {
 		OR4(sc, TPM_CRB_CTRL_REQ, TPM_CRB_CTRL_REQ_GO_IDLE);
 
 		mask = TPM_CRB_CTRL_STS_IDLE_BIT;
 		if (!tpm_wait_for_u32(sc, TPM_CRB_CTRL_STS,
 			    mask, mask, TPM_TIMEOUT_C)) {
 			device_printf(sc->dev,
 			    "Failed to transition to idle state\n");
 			return (EIO);
 		}
 	}
 	/* Switch to ready state */
 	OR4(sc, TPM_CRB_CTRL_REQ, TPM_CRB_CTRL_REQ_GO_READY);
 
 	mask = TPM_CRB_CTRL_REQ_GO_READY;
 	if (!tpm_wait_for_u32(sc, TPM_CRB_CTRL_STS,
 		    mask, !mask, TPM_TIMEOUT_C)) {
 		device_printf(sc->dev,
 		    "Failed to transition to ready state\n");
 		return (EIO);
 	}
 
 	/*
 	 * Calculate timeout for current command.
 	 * Command code is passed in bytes 6-10.
 	 */
 	curr_cmd = be32toh(*(uint32_t *) (&sc->buf[6]));
 	timeout = tpm20_get_timeout(curr_cmd);
 
 	/* Send command and tell device to process it. */
 	bus_write_region_stream_1(sc->mem_res, crb_sc->cmd_off,
 	    sc->buf, length);
 	bus_barrier(sc->mem_res, crb_sc->cmd_off,
 	    length, BUS_SPACE_BARRIER_WRITE);
 
 	WR4(sc, TPM_CRB_CTRL_START, TPM_CRB_CTRL_START_CMD);
 	bus_barrier(sc->mem_res, TPM_CRB_CTRL_START,
 	    4, BUS_SPACE_BARRIER_WRITE);
 
 	mask = ~0;
 	if (!tpm_wait_for_u32(sc, TPM_CRB_CTRL_START, mask, ~mask, timeout)) {
 		device_printf(sc->dev,
 		    "Timeout while waiting for device to process cmd\n");
 		if (!tpmcrb_cancel_cmd(sc))
 			return (EIO);
 	}
 
 	/* Read response header. Length is passed in bytes 2 - 6. */
 	bus_read_region_stream_1(sc->mem_res, crb_sc->rsp_off,
 	    sc->buf, TPM_HEADER_SIZE);
 	bytes_available = be32toh(*(uint32_t *) (&sc->buf[2]));
 
 	if (bytes_available > TPM_BUFSIZE || bytes_available < TPM_HEADER_SIZE) {
 		device_printf(sc->dev,
 		    "Incorrect response size: %d\n",
 		    bytes_available);
 		return (EIO);
 	}
 
 	bus_read_region_stream_1(sc->mem_res, crb_sc->rsp_off + TPM_HEADER_SIZE,
 	      &sc->buf[TPM_HEADER_SIZE], bytes_available - TPM_HEADER_SIZE);
 
 	OR4(sc, TPM_CRB_CTRL_REQ, TPM_CRB_CTRL_REQ_GO_IDLE);
 
 	tpmcrb_relinquish_locality(sc);
 	sc->pending_data_length = bytes_available;
 
 	return (0);
 }
 
 /* ACPI Driver */
 static device_method_t	tpmcrb_methods[] = {
 	DEVMETHOD(device_probe,		tpmcrb_acpi_probe),
 	DEVMETHOD(device_attach,	tpmcrb_attach),
 	DEVMETHOD(device_detach,	tpmcrb_detach),
 	DEVMETHOD(device_shutdown,	tpm20_shutdown),
 	DEVMETHOD(device_suspend,	tpm20_suspend),
 	{0, 0}
 };
 static driver_t	tpmcrb_driver = {
 	"tpmcrb", tpmcrb_methods, sizeof(struct tpmcrb_sc),
 };
 
 devclass_t tpmcrb_devclass;
 DRIVER_MODULE(tpmcrb, acpi, tpmcrb_driver, tpmcrb_devclass, 0, 0);
Index: projects/clang1000-import/sys/dev/tpm/tpm_tis.c
===================================================================
--- projects/clang1000-import/sys/dev/tpm/tpm_tis.c	(revision 357389)
+++ projects/clang1000-import/sys/dev/tpm/tpm_tis.c	(revision 357390)
@@ -1,507 +1,506 @@
 /*-
  * Copyright (c) 2018 Stormshield.
  * Copyright (c) 2018 Semihalf.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "tpm20.h"
 
 /*
  * TIS register space as defined in
  * TCG_PC_Client_Platform_TPM_Profile_PTP_2.0_r1.03_v22
  */
 #define	TPM_ACCESS			0x0
 #define	TPM_INT_ENABLE			0x8
 #define	TPM_INT_VECTOR			0xc
 #define	TPM_INT_STS			0x10
 #define	TPM_INTF_CAPS			0x14
 #define	TPM_STS				0x18
 #define	TPM_DATA_FIFO			0x24
 #define	TPM_INTF_ID			0x30
 #define	TPM_XDATA_FIFO			0x80
 #define	TPM_DID_VID			0xF00
 #define	TPM_RID				0xF04
 
 #define	TPM_ACCESS_LOC_REQ		BIT(1)
 #define	TPM_ACCESS_LOC_Seize		BIT(3)
 #define	TPM_ACCESS_LOC_ACTIVE		BIT(5)
 #define	TPM_ACCESS_LOC_RELINQUISH	BIT(5)
 #define	TPM_ACCESS_VALID		BIT(7)
 
 #define	TPM_INT_ENABLE_GLOBAL_ENABLE	BIT(31)
 #define	TPM_INT_ENABLE_CMD_RDY		BIT(7)
 #define	TPM_INT_ENABLE_LOC_CHANGE	BIT(2)
 #define	TPM_INT_ENABLE_STS_VALID	BIT(1)
 #define	TPM_INT_ENABLE_DATA_AVAIL	BIT(0)
 
 #define	TPM_INT_STS_CMD_RDY		BIT(7)
 #define	TPM_INT_STS_LOC_CHANGE		BIT(2)
 #define	TPM_INT_STS_VALID		BIT(1)
 #define	TPM_INT_STS_DATA_AVAIL		BIT(0)
 
 #define	TPM_INTF_CAPS_VERSION		0x70000000
 #define	TPM_INTF_CAPS_TPM20		0x30000000
 
 #define	TPM_STS_VALID			BIT(7)
 #define	TPM_STS_CMD_RDY			BIT(6)
 #define	TPM_STS_CMD_START		BIT(5)
 #define	TPM_STS_DATA_AVAIL		BIT(4)
 #define	TPM_STS_DATA_EXPECTED		BIT(3)
 #define	TPM_STS_BURST_MASK		0xFFFF00
 #define	TPM_STS_BURST_OFFSET		0x8
 
 static int tpmtis_transmit(struct tpm_sc *sc, size_t length);
 
 static int tpmtis_acpi_probe(device_t dev);
 static int tpmtis_attach(device_t dev);
 static int tpmtis_detach(device_t dev);
 
 static void tpmtis_intr_handler(void *arg);
 
 static ACPI_STATUS tpmtis_get_SIRQ_channel(ACPI_RESOURCE *res, void *arg);
 static bool tpmtis_setup_intr(struct tpm_sc *sc);
 
 static bool tpmtis_read_bytes(struct tpm_sc *sc, size_t count, uint8_t *buf);
 static bool tpmtis_write_bytes(struct tpm_sc *sc, size_t count, uint8_t *buf);
 static bool tpmtis_request_locality(struct tpm_sc *sc, int locality);
 static void tpmtis_relinquish_locality(struct tpm_sc *sc);
 static bool tpmtis_go_ready(struct tpm_sc *sc);
 
 static bool tpm_wait_for_u32(struct tpm_sc *sc, bus_size_t off,
     uint32_t mask, uint32_t val, int32_t timeout);
 
 static uint16_t tpmtis_wait_for_burst(struct tpm_sc *sc);
 
 char *tpmtis_ids[] = {"MSFT0101", NULL};
 
 static int
 tpmtis_acpi_probe(device_t dev)
 {
 	int err;
 	ACPI_TABLE_TPM23 *tbl;
 	ACPI_STATUS status;
 
 	err = ACPI_ID_PROBE(device_get_parent(dev), dev, tpmtis_ids, NULL);
 	if (err > 0)
 		return (err);
 	/*Find TPM2 Header*/
 	status = AcpiGetTable(ACPI_SIG_TPM2, 1, (ACPI_TABLE_HEADER **) &tbl);
 	if(ACPI_FAILURE(status) ||
 	   tbl->StartMethod != TPM2_START_METHOD_TIS)
 	    err = ENXIO;
 
 	device_set_desc(dev, "Trusted Platform Module 2.0, FIFO mode");
 	return (err);
 }
 
 static int
 tpmtis_attach(device_t dev)
 {
 	struct tpm_sc *sc;
 	int result;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
 	sc->mem_rid = 0;
 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->mem_rid,
 		    RF_ACTIVE);
 	if (sc->mem_res == NULL)
 		return (ENXIO);
 
 	sc->irq_rid = 0;
 	sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irq_rid,
 		    RF_ACTIVE | RF_SHAREABLE);
 	if (sc->irq_res != NULL) {
 		if (bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE,
 		    NULL, tpmtis_intr_handler, sc, &sc->intr_cookie))
 			sc->interrupts = false;
 		else
 			sc->interrupts = tpmtis_setup_intr(sc);
 	} else {
 		sc->interrupts = false;
 	}
 
 	sc->intr_type = -1;
 
 	sc->transmit = tpmtis_transmit;
 
 	result = tpm20_init(sc);
 	if (result != 0)
 		tpmtis_detach(dev);
 
 	return (result);
 }
 
 static int
 tpmtis_detach(device_t dev)
 {
 	struct tpm_sc *sc;
 
 	sc = device_get_softc(dev);
 	tpm20_release(sc);
 
 	if (sc->intr_cookie != NULL)
 		bus_teardown_intr(dev, sc->irq_res, sc->intr_cookie);
 
 	if (sc->irq_res != NULL)
 		bus_release_resource(dev, SYS_RES_IRQ,
 		    sc->irq_rid, sc->irq_res);
 
 	if (sc->mem_res != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    sc->mem_rid, sc->mem_res);
 
 	return (0);
 }
 
 static ACPI_STATUS
 tpmtis_get_SIRQ_channel(ACPI_RESOURCE *res, void *arg)
 {
 	struct tpm_sc *sc;
 	uint8_t channel;
 
 	sc = (struct tpm_sc *)arg;
 
 	switch (res->Type) {
 	case ACPI_RESOURCE_TYPE_IRQ:
 		channel = res->Data.Irq.Interrupts[0];
 		break;
 	case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
 		channel = res->Data.ExtendedIrq.Interrupts[0];
 		break;
 	default:
 		return (AE_OK);
 	}
 
 	WR1(sc, TPM_INT_VECTOR, channel);
 	return (AE_OK);
 }
 
 static bool
 tpmtis_setup_intr(struct tpm_sc *sc)
 {
 	ACPI_STATUS status;
 	ACPI_HANDLE handle;
 	uint32_t irq_mask;
 
 	handle = acpi_get_handle(sc->dev);
 
 	if(!tpmtis_request_locality(sc, 0))
 		return (false);
 
 	irq_mask = RD4(sc, TPM_INT_ENABLE);
 	irq_mask |= TPM_INT_ENABLE_GLOBAL_ENABLE |
 	    TPM_INT_ENABLE_DATA_AVAIL |
 	    TPM_INT_ENABLE_LOC_CHANGE |
 	    TPM_INT_ENABLE_CMD_RDY |
 	    TPM_INT_ENABLE_STS_VALID;
 	WR4(sc, TPM_INT_ENABLE, irq_mask);
 
 	status = AcpiWalkResources(handle, "_CRS",
 	    tpmtis_get_SIRQ_channel, (void *)sc);
 
 	tpmtis_relinquish_locality(sc);
 
 	return (ACPI_SUCCESS(status));
 }
 
 static void
 tpmtis_intr_handler(void *arg)
 {
 	struct tpm_sc *sc;
 	uint32_t status;
 
 	sc = (struct tpm_sc *)arg;
 	status = RD4(sc, TPM_INT_STS);
 
 	WR4(sc, TPM_INT_STS, status);
 	if (sc->intr_type != -1 && sc->intr_type & status)
 		wakeup(sc);
 }
 
 static bool
 tpm_wait_for_u32(struct tpm_sc *sc, bus_size_t off, uint32_t mask, uint32_t val,
     int32_t timeout)
 {
 
 	/* Check for condition */
 	if ((RD4(sc, off) & mask) == val)
 		return (true);
 
 	/* If interrupts are enabled sleep for timeout duration */
 	if(sc->interrupts && sc->intr_type != -1) {
 		tsleep(sc, PWAIT, "TPM WITH INTERRUPTS", timeout / tick);
 
 		sc->intr_type = -1;
 		return ((RD4(sc, off) & mask) == val);
 	}
 
 	/* If we don't have interrupts poll the device every tick */
 	while (timeout > 0) {
 		if ((RD4(sc, off) & mask) == val)
 			return (true);
 
 		pause("TPM POLLING", 1);
 		timeout -= tick;
 	}
 	return (false);
 }
 
 static uint16_t
 tpmtis_wait_for_burst(struct tpm_sc *sc)
 {
 	int timeout;
 	uint16_t burst_count;
 
 	timeout = TPM_TIMEOUT_A;
 
 	while (timeout-- > 0) {
 		burst_count = (RD4(sc, TPM_STS) & TPM_STS_BURST_MASK) >>
 		    TPM_STS_BURST_OFFSET;
 		if (burst_count > 0)
 			break;
 
 		DELAY(1);
 	}
 	return (burst_count);
 }
 
 static bool
 tpmtis_read_bytes(struct tpm_sc *sc, size_t count, uint8_t *buf)
 {
 	uint16_t burst_count;
 
 	while (count > 0) {
 		burst_count = tpmtis_wait_for_burst(sc);
 		if (burst_count == 0)
 			return (false);
 
 		burst_count = MIN(burst_count, count);
 		count -= burst_count;
 
 		while (burst_count-- > 0)
 			*buf++ = RD1(sc, TPM_DATA_FIFO);
 	}
 
 	return (true);
 }
 
 static bool
 tpmtis_write_bytes(struct tpm_sc *sc, size_t count, uint8_t *buf)
 {
 	uint16_t burst_count;
 
 	while (count > 0) {
 		burst_count = tpmtis_wait_for_burst(sc);
 		if (burst_count == 0)
 			return (false);
 
 		burst_count = MIN(burst_count, count);
 		count -= burst_count;
 
 		while (burst_count-- > 0)
 			WR1(sc, TPM_DATA_FIFO, *buf++);
 	}
 
 	return (true);
 }
 
 
 static bool
 tpmtis_request_locality(struct tpm_sc *sc, int locality)
 {
 	uint8_t mask;
 	int timeout;
 
 	/* Currently we only support Locality 0 */
 	if (locality != 0)
 		return (false);
 
 	mask = TPM_ACCESS_LOC_ACTIVE | TPM_ACCESS_VALID;
 	timeout = TPM_TIMEOUT_A;
 	sc->intr_type = TPM_INT_STS_LOC_CHANGE;
 
 	WR1(sc, TPM_ACCESS, TPM_ACCESS_LOC_REQ);
 	bus_barrier(sc->mem_res, TPM_ACCESS, 1, BUS_SPACE_BARRIER_WRITE);
 	if(sc->interrupts) {
 		tsleep(sc, PWAIT, "TPMLOCREQUEST with INTR", timeout / tick);
 		return ((RD1(sc, TPM_ACCESS) & mask) == mask);
 	} else  {
 		while(timeout > 0) {
 			if ((RD1(sc, TPM_ACCESS) & mask) == mask)
 				return (true);
 
 			pause("TPMLOCREQUEST POLLING", 1);
 			timeout -= tick;
 		}
 	}
 
 	return (false);
 }
 
 static void
 tpmtis_relinquish_locality(struct tpm_sc *sc)
 {
 
 	/*
 	 * Interrupts can only be cleared when a locality is active.
 	 * Clear them now in case interrupt handler didn't make it in time.
 	 */
 	if(sc->interrupts)
 		AND4(sc, TPM_INT_STS, RD4(sc, TPM_INT_STS));
 
 	OR1(sc, TPM_ACCESS, TPM_ACCESS_LOC_RELINQUISH);
 }
 
 static bool
 tpmtis_go_ready(struct tpm_sc *sc)
 {
 	uint32_t mask;
 
 	mask = TPM_STS_CMD_RDY;
 	sc->intr_type = TPM_INT_STS_CMD_RDY;
 
-	OR4(sc, TPM_STS, TPM_STS_CMD_RDY);
+	WR4(sc, TPM_STS, TPM_STS_CMD_RDY);
 	bus_barrier(sc->mem_res, TPM_STS, 4, BUS_SPACE_BARRIER_WRITE);
 	if (!tpm_wait_for_u32(sc, TPM_STS, mask, mask, TPM_TIMEOUT_B))
 		return (false);
 
-	AND4(sc, TPM_STS, ~TPM_STS_CMD_RDY);
 	return (true);
 }
 
 static int
 tpmtis_transmit(struct tpm_sc *sc, size_t length)
 {
 	size_t bytes_available;
 	uint32_t mask, curr_cmd;
 	int timeout;
 
 	sx_assert(&sc->dev_lock, SA_XLOCKED);
 
 	if (!tpmtis_request_locality(sc, 0)) {
 		device_printf(sc->dev,
 		    "Failed to obtain locality\n");
 		return (EIO);
 	}
 	if (!tpmtis_go_ready(sc)) {
 		device_printf(sc->dev,
 		    "Failed to switch to ready state\n");
 		return (EIO);
 	}
 	if (!tpmtis_write_bytes(sc, length, sc->buf)) {
 		device_printf(sc->dev,
 		    "Failed to write cmd to device\n");
 		return (EIO);
 	}
 
 	mask = TPM_STS_VALID;
 	sc->intr_type = TPM_INT_STS_VALID;
 	if (!tpm_wait_for_u32(sc, TPM_STS, mask, mask, TPM_TIMEOUT_C)) {
 		device_printf(sc->dev,
 		    "Timeout while waiting for valid bit\n");
 		return (EIO);
 	}
 	if (RD4(sc, TPM_STS) & TPM_STS_DATA_EXPECTED) {
 		device_printf(sc->dev,
 		    "Device expects more data even though we already"
 		    " sent everything we had\n");
 		return (EIO);
 	}
 
 	/*
 	 * Calculate timeout for current command.
 	 * Command code is passed in bytes 6-10.
 	 */
 	curr_cmd = be32toh(*(uint32_t *) (&sc->buf[6]));
 	timeout = tpm20_get_timeout(curr_cmd);
 
 	WR4(sc, TPM_STS, TPM_STS_CMD_START);
 	bus_barrier(sc->mem_res, TPM_STS, 4, BUS_SPACE_BARRIER_WRITE);
 
 	mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID;
 	sc->intr_type = TPM_INT_STS_DATA_AVAIL;
 	if (!tpm_wait_for_u32(sc, TPM_STS, mask, mask, timeout)) {
 		device_printf(sc->dev,
 		    "Timeout while waiting for device to process cmd\n");
 		/*
 		 * Switching to ready state also cancels processing
 		 * current command
 		 */
 		if (!tpmtis_go_ready(sc))
 			return (EIO);
 
 		/*
 		 * After canceling a command we should get a response,
 		 * check if there is one.
 		 */
 		sc->intr_type = TPM_INT_STS_DATA_AVAIL;
 		if (!tpm_wait_for_u32(sc, TPM_STS, mask, mask, TPM_TIMEOUT_C))
 			return (EIO);
 	}
 	/* Read response header. Length is passed in bytes 2 - 6. */
 	if(!tpmtis_read_bytes(sc, TPM_HEADER_SIZE, sc->buf)) {
 		device_printf(sc->dev,
 		    "Failed to read response header\n");
 		return (EIO);
 	}
 	bytes_available = be32toh(*(uint32_t *) (&sc->buf[2]));
 
 	if (bytes_available > TPM_BUFSIZE || bytes_available < TPM_HEADER_SIZE) {
 		device_printf(sc->dev,
 		    "Incorrect response size: %zu\n",
 		    bytes_available);
 		return (EIO);
 	}
 	if(!tpmtis_read_bytes(sc, bytes_available - TPM_HEADER_SIZE,
 	    &sc->buf[TPM_HEADER_SIZE])) {
 		device_printf(sc->dev,
 		    "Failed to read response\n");
 		return (EIO);
 	}
 	tpmtis_relinquish_locality(sc);
 	sc->pending_data_length = bytes_available;
 
 	return (0);
 }
 
 /* ACPI Driver */
 static device_method_t tpmtis_methods[] = {
 	DEVMETHOD(device_probe,		tpmtis_acpi_probe),
 	DEVMETHOD(device_attach,	tpmtis_attach),
 	DEVMETHOD(device_detach,	tpmtis_detach),
 	DEVMETHOD(device_shutdown,	tpm20_shutdown),
 	DEVMETHOD(device_suspend,	tpm20_suspend),
 	{0, 0}
 };
 static driver_t	tpmtis_driver = {
 	"tpmtis", tpmtis_methods, sizeof(struct tpm_sc),
 };
 
 devclass_t tpmtis_devclass;
 DRIVER_MODULE(tpmtis, acpi, tpmtis_driver, tpmtis_devclass, 0, 0);
Index: projects/clang1000-import/sys/fs/devfs/devfs_vnops.c
===================================================================
--- projects/clang1000-import/sys/fs/devfs/devfs_vnops.c	(revision 357389)
+++ projects/clang1000-import/sys/fs/devfs/devfs_vnops.c	(revision 357390)
@@ -1,1982 +1,1982 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2000-2004
  *	Poul-Henning Kamp.  All rights reserved.
  * Copyright (c) 1989, 1992-1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software donated to Berkeley by
  * Jan-Simon Pendry.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kernfs_vnops.c	8.15 (Berkeley) 5/21/95
  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43
  *
  * $FreeBSD$
  */
 
 /*
  * TODO:
  *	mkdir: want it ?
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/dirent.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/filio.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
 #include <sys/ttycom.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 
 static struct vop_vector devfs_vnodeops;
 static struct vop_vector devfs_specops;
 static struct fileops devfs_ops_f;
 
 #include <fs/devfs/devfs.h>
 #include <fs/devfs/devfs_int.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 
 static MALLOC_DEFINE(M_CDEVPDATA, "DEVFSP", "Metainfo for cdev-fp data");
 
 struct mtx	devfs_de_interlock;
 MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF);
 struct sx	clone_drain_lock;
 SX_SYSINIT(clone_drain_lock, &clone_drain_lock, "clone events drain lock");
 struct mtx	cdevpriv_mtx;
 MTX_SYSINIT(cdevpriv_mtx, &cdevpriv_mtx, "cdevpriv lock", MTX_DEF);
 
 SYSCTL_DECL(_vfs_devfs);
 
 static int devfs_dotimes;
 SYSCTL_INT(_vfs_devfs, OID_AUTO, dotimes, CTLFLAG_RW,
     &devfs_dotimes, 0, "Update timestamps on DEVFS with default precision");
 
 /*
  * Update devfs node timestamp.  Note that updates are unlocked and
  * stat(2) could see partially updated times.
  */
 static void
 devfs_timestamp(struct timespec *tsp)
 {
 	time_t ts;
 
 	if (devfs_dotimes) {
 		vfs_timestamp(tsp);
 	} else {
 		ts = time_second;
 		if (tsp->tv_sec != ts) {
 			tsp->tv_sec = ts;
 			tsp->tv_nsec = 0;
 		}
 	}
 }
 
 static int
 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp,
     int *ref)
 {
 
 	*dswp = devvn_refthread(fp->f_vnode, devp, ref);
 	if (*devp != fp->f_data) {
 		if (*dswp != NULL)
 			dev_relthread(*devp, *ref);
 		return (ENXIO);
 	}
 	KASSERT((*devp)->si_refcount > 0,
 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp)));
 	if (*dswp == NULL)
 		return (ENXIO);
 	curthread->td_fpop = fp;
 	return (0);
 }
 
 int
 devfs_get_cdevpriv(void **datap)
 {
 	struct file *fp;
 	struct cdev_privdata *p;
 	int error;
 
 	fp = curthread->td_fpop;
 	if (fp == NULL)
 		return (EBADF);
 	p = fp->f_cdevpriv;
 	if (p != NULL) {
 		error = 0;
 		*datap = p->cdpd_data;
 	} else
 		error = ENOENT;
 	return (error);
 }
 
 int
 devfs_set_cdevpriv(void *priv, d_priv_dtor_t *priv_dtr)
 {
 	struct file *fp;
 	struct cdev_priv *cdp;
 	struct cdev_privdata *p;
 	int error;
 
 	fp = curthread->td_fpop;
 	if (fp == NULL)
 		return (ENOENT);
 	cdp = cdev2priv((struct cdev *)fp->f_data);
 	p = malloc(sizeof(struct cdev_privdata), M_CDEVPDATA, M_WAITOK);
 	p->cdpd_data = priv;
 	p->cdpd_dtr = priv_dtr;
 	p->cdpd_fp = fp;
 	mtx_lock(&cdevpriv_mtx);
 	if (fp->f_cdevpriv == NULL) {
 		LIST_INSERT_HEAD(&cdp->cdp_fdpriv, p, cdpd_list);
 		fp->f_cdevpriv = p;
 		mtx_unlock(&cdevpriv_mtx);
 		error = 0;
 	} else {
 		mtx_unlock(&cdevpriv_mtx);
 		free(p, M_CDEVPDATA);
 		error = EBUSY;
 	}
 	return (error);
 }
 
 void
 devfs_destroy_cdevpriv(struct cdev_privdata *p)
 {
 
 	mtx_assert(&cdevpriv_mtx, MA_OWNED);
 	KASSERT(p->cdpd_fp->f_cdevpriv == p,
 	    ("devfs_destoy_cdevpriv %p != %p", p->cdpd_fp->f_cdevpriv, p));
 	p->cdpd_fp->f_cdevpriv = NULL;
 	LIST_REMOVE(p, cdpd_list);
 	mtx_unlock(&cdevpriv_mtx);
 	(p->cdpd_dtr)(p->cdpd_data);
 	free(p, M_CDEVPDATA);
 }
 
 static void
 devfs_fpdrop(struct file *fp)
 {
 	struct cdev_privdata *p;
 
 	mtx_lock(&cdevpriv_mtx);
 	if ((p = fp->f_cdevpriv) == NULL) {
 		mtx_unlock(&cdevpriv_mtx);
 		return;
 	}
 	devfs_destroy_cdevpriv(p);
 }
 
 void
 devfs_clear_cdevpriv(void)
 {
 	struct file *fp;
 
 	fp = curthread->td_fpop;
 	if (fp == NULL)
 		return;
 	devfs_fpdrop(fp);
 }
 
 /*
  * On success devfs_populate_vp() returns with dmp->dm_lock held.
  */
 static int
 devfs_populate_vp(struct vnode *vp)
 {
 	struct devfs_dirent *de;
 	struct devfs_mount *dmp;
 	int locked;
 
 	ASSERT_VOP_LOCKED(vp, "devfs_populate_vp");
 
 	dmp = VFSTODEVFS(vp->v_mount);
 	locked = VOP_ISLOCKED(vp);
 
 	sx_xlock(&dmp->dm_lock);
 	DEVFS_DMP_HOLD(dmp);
 
 	/* Can't call devfs_populate() with the vnode lock held. */
 	VOP_UNLOCK(vp);
 	devfs_populate(dmp);
 
 	sx_xunlock(&dmp->dm_lock);
 	vn_lock(vp, locked | LK_RETRY);
 	sx_xlock(&dmp->dm_lock);
 	if (DEVFS_DMP_DROP(dmp)) {
 		sx_xunlock(&dmp->dm_lock);
 		devfs_unmount_final(dmp);
 		return (ERESTART);
 	}
 	if (VN_IS_DOOMED(vp)) {
 		sx_xunlock(&dmp->dm_lock);
 		return (ERESTART);
 	}
 	de = vp->v_data;
 	KASSERT(de != NULL,
 	    ("devfs_populate_vp: vp->v_data == NULL but vnode not doomed"));
 	if ((de->de_flags & DE_DOOMED) != 0) {
 		sx_xunlock(&dmp->dm_lock);
 		return (ERESTART);
 	}
 
 	return (0);
 }
 
 static int
 devfs_vptocnp(struct vop_vptocnp_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode **dvp = ap->a_vpp;
 	struct devfs_mount *dmp;
 	char *buf = ap->a_buf;
-	int *buflen = ap->a_buflen;
+	size_t *buflen = ap->a_buflen;
 	struct devfs_dirent *dd, *de;
 	int i, error;
 
 	dmp = VFSTODEVFS(vp->v_mount);
 
 	error = devfs_populate_vp(vp);
 	if (error != 0)
 		return (error);
 
 	if (vp->v_type != VCHR && vp->v_type != VDIR) {
 		error = ENOENT;
 		goto finished;
 	}
 
 	dd = vp->v_data;
 	if (vp->v_type == VDIR && dd == dmp->dm_rootdir) {
 		*dvp = vp;
 		vref(*dvp);
 		goto finished;
 	}
 
 	i = *buflen;
 	i -= dd->de_dirent->d_namlen;
 	if (i < 0) {
 		error = ENOMEM;
 		goto finished;
 	}
 	bcopy(dd->de_dirent->d_name, buf + i, dd->de_dirent->d_namlen);
 	*buflen = i;
 	de = devfs_parent_dirent(dd);
 	if (de == NULL) {
 		error = ENOENT;
 		goto finished;
 	}
 	mtx_lock(&devfs_de_interlock);
 	*dvp = de->de_vnode;
 	if (*dvp != NULL) {
 		VI_LOCK(*dvp);
 		mtx_unlock(&devfs_de_interlock);
 		vholdl(*dvp);
 		VI_UNLOCK(*dvp);
 		vref(*dvp);
 		vdrop(*dvp);
 	} else {
 		mtx_unlock(&devfs_de_interlock);
 		error = ENOENT;
 	}
 finished:
 	sx_xunlock(&dmp->dm_lock);
 	return (error);
 }
 
 /*
  * Construct the fully qualified path name relative to the mountpoint.
  * If a NULL cnp is provided, no '/' is appended to the resulting path.
  */
 char *
 devfs_fqpn(char *buf, struct devfs_mount *dmp, struct devfs_dirent *dd,
     struct componentname *cnp)
 {
 	int i;
 	struct devfs_dirent *de;
 
 	sx_assert(&dmp->dm_lock, SA_LOCKED);
 
 	i = SPECNAMELEN;
 	buf[i] = '\0';
 	if (cnp != NULL)
 		i -= cnp->cn_namelen;
 	if (i < 0)
 		 return (NULL);
 	if (cnp != NULL)
 		bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen);
 	de = dd;
 	while (de != dmp->dm_rootdir) {
 		if (cnp != NULL || i < SPECNAMELEN) {
 			i--;
 			if (i < 0)
 				 return (NULL);
 			buf[i] = '/';
 		}
 		i -= de->de_dirent->d_namlen;
 		if (i < 0)
 			 return (NULL);
 		bcopy(de->de_dirent->d_name, buf + i,
 		    de->de_dirent->d_namlen);
 		de = devfs_parent_dirent(de);
 		if (de == NULL)
 			return (NULL);
 	}
 	return (buf + i);
 }
 
 static int
 devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp,
 	struct devfs_dirent *de)
 {
 	int not_found;
 
 	not_found = 0;
 	if (de->de_flags & DE_DOOMED)
 		not_found = 1;
 	if (DEVFS_DE_DROP(de)) {
 		KASSERT(not_found == 1, ("DEVFS de dropped but not doomed"));
 		devfs_dirent_free(de);
 	}
 	if (DEVFS_DMP_DROP(dmp)) {
 		KASSERT(not_found == 1,
 			("DEVFS mount struct freed before dirent"));
 		not_found = 2;
 		sx_xunlock(&dmp->dm_lock);
 		devfs_unmount_final(dmp);
 	}
 	if (not_found == 1 || (drop_dm_lock && not_found != 2))
 		sx_unlock(&dmp->dm_lock);
 	return (not_found);
 }
 
 static void
 devfs_insmntque_dtr(struct vnode *vp, void *arg)
 {
 	struct devfs_dirent *de;
 
 	de = (struct devfs_dirent *)arg;
 	mtx_lock(&devfs_de_interlock);
 	vp->v_data = NULL;
 	de->de_vnode = NULL;
 	mtx_unlock(&devfs_de_interlock);
 	vgone(vp);
 	vput(vp);
 }
 
 /*
  * devfs_allocv shall be entered with dmp->dm_lock held, and it drops
  * it on return.
  */
 int
 devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
     struct vnode **vpp)
 {
 	int error;
 	struct vnode *vp;
 	struct cdev *dev;
 	struct devfs_mount *dmp;
 	struct cdevsw *dsw;
 
 	dmp = VFSTODEVFS(mp);
 	if (de->de_flags & DE_DOOMED) {
 		sx_xunlock(&dmp->dm_lock);
 		return (ENOENT);
 	}
 loop:
 	DEVFS_DE_HOLD(de);
 	DEVFS_DMP_HOLD(dmp);
 	mtx_lock(&devfs_de_interlock);
 	vp = de->de_vnode;
 	if (vp != NULL) {
 		VI_LOCK(vp);
 		mtx_unlock(&devfs_de_interlock);
 		sx_xunlock(&dmp->dm_lock);
 		vget(vp, lockmode | LK_INTERLOCK | LK_RETRY, curthread);
 		sx_xlock(&dmp->dm_lock);
 		if (devfs_allocv_drop_refs(0, dmp, de)) {
 			vput(vp);
 			return (ENOENT);
 		}
 		else if (VN_IS_DOOMED(vp)) {
 			mtx_lock(&devfs_de_interlock);
 			if (de->de_vnode == vp) {
 				de->de_vnode = NULL;
 				vp->v_data = NULL;
 			}
 			mtx_unlock(&devfs_de_interlock);
 			vput(vp);
 			goto loop;
 		}
 		sx_xunlock(&dmp->dm_lock);
 		*vpp = vp;
 		return (0);
 	}
 	mtx_unlock(&devfs_de_interlock);
 	if (de->de_dirent->d_type == DT_CHR) {
 		if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) {
 			devfs_allocv_drop_refs(1, dmp, de);
 			return (ENOENT);
 		}
 		dev = &de->de_cdp->cdp_c;
 	} else {
 		dev = NULL;
 	}
 	error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp);
 	if (error != 0) {
 		devfs_allocv_drop_refs(1, dmp, de);
 		printf("devfs_allocv: failed to allocate new vnode\n");
 		return (error);
 	}
 
 	if (de->de_dirent->d_type == DT_CHR) {
 		vp->v_type = VCHR;
 		VI_LOCK(vp);
 		dev_lock();
 		dev_refl(dev);
 		/* XXX: v_rdev should be protect by vnode lock */
 		vp->v_rdev = dev;
 		KASSERT(vp->v_usecount == 1,
 		    ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount));
 		dev->si_usecount++;
 		/* Special casing of ttys for deadfs.  Probably redundant. */
 		dsw = dev->si_devsw;
 		if (dsw != NULL && (dsw->d_flags & D_TTY) != 0)
 			vp->v_vflag |= VV_ISTTY;
 		dev_unlock();
 		VI_UNLOCK(vp);
 		if ((dev->si_flags & SI_ETERNAL) != 0)
 			vp->v_vflag |= VV_ETERNALDEV;
 		vp->v_op = &devfs_specops;
 	} else if (de->de_dirent->d_type == DT_DIR) {
 		vp->v_type = VDIR;
 	} else if (de->de_dirent->d_type == DT_LNK) {
 		vp->v_type = VLNK;
 	} else {
 		vp->v_type = VBAD;
 	}
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWITNESS);
 	VN_LOCK_ASHARE(vp);
 	mtx_lock(&devfs_de_interlock);
 	vp->v_data = de;
 	de->de_vnode = vp;
 	mtx_unlock(&devfs_de_interlock);
 	error = insmntque1(vp, mp, devfs_insmntque_dtr, de);
 	if (error != 0) {
 		(void) devfs_allocv_drop_refs(1, dmp, de);
 		return (error);
 	}
 	if (devfs_allocv_drop_refs(0, dmp, de)) {
 		vput(vp);
 		return (ENOENT);
 	}
 #ifdef MAC
 	mac_devfs_vnode_associate(mp, de, vp);
 #endif
 	sx_xunlock(&dmp->dm_lock);
 	*vpp = vp;
 	return (0);
 }
 
 static int
 devfs_access(struct vop_access_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct devfs_dirent *de;
 	struct proc *p;
 	int error;
 
 	de = vp->v_data;
 	if (vp->v_type == VDIR)
 		de = de->de_dir;
 
 	error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid,
 	    ap->a_accmode, ap->a_cred, NULL);
 	if (error == 0)
 		return (0);
 	if (error != EACCES)
 		return (error);
 	p = ap->a_td->td_proc;
 	/* We do, however, allow access to the controlling terminal */
 	PROC_LOCK(p);
 	if (!(p->p_flag & P_CONTROLT)) {
 		PROC_UNLOCK(p);
 		return (error);
 	}
 	if (p->p_session->s_ttydp == de->de_cdp)
 		error = 0;
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 _Static_assert(((FMASK | FCNTLFLAGS) & (FLASTCLOSE | FREVOKE)) == 0,
     "devfs-only flag reuse failed");
 
 static int
 devfs_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp, *oldvp;
 	struct thread *td = ap->a_td;
 	struct proc *p;
 	struct cdev *dev = vp->v_rdev;
 	struct cdevsw *dsw;
 	int dflags, error, ref, vp_locked;
 
 	/*
 	 * XXX: Don't call d_close() if we were called because of
 	 * XXX: insmntque1() failure.
 	 */
 	if (vp->v_data == NULL)
 		return (0);
 
 	/*
 	 * Hack: a tty device that is a controlling terminal
 	 * has a reference from the session structure.
 	 * We cannot easily tell that a character device is
 	 * a controlling terminal, unless it is the closing
 	 * process' controlling terminal.  In that case,
 	 * if the reference count is 2 (this last descriptor
 	 * plus the session), release the reference from the session.
 	 */
 	if (vp->v_usecount == 2 && td != NULL) {
 		p = td->td_proc;
 		PROC_LOCK(p);
 		if (vp == p->p_session->s_ttyvp) {
 			PROC_UNLOCK(p);
 			oldvp = NULL;
 			sx_xlock(&proctree_lock);
 			if (vp == p->p_session->s_ttyvp) {
 				SESS_LOCK(p->p_session);
 				VI_LOCK(vp);
 				if (vp->v_usecount == 2 && vcount(vp) == 1 &&
 				    !VN_IS_DOOMED(vp)) {
 					p->p_session->s_ttyvp = NULL;
 					p->p_session->s_ttydp = NULL;
 					oldvp = vp;
 				}
 				VI_UNLOCK(vp);
 				SESS_UNLOCK(p->p_session);
 			}
 			sx_xunlock(&proctree_lock);
 			if (oldvp != NULL)
 				vrele(oldvp);
 		} else
 			PROC_UNLOCK(p);
 	}
 	/*
 	 * We do not want to really close the device if it
 	 * is still in use unless we are trying to close it
 	 * forcibly. Since every use (buffer, vnode, swap, cmap)
 	 * holds a reference to the vnode, and because we mark
 	 * any other vnodes that alias this device, when the
 	 * sum of the reference counts on all the aliased
 	 * vnodes descends to one, we are on last close.
 	 */
 	dsw = dev_refthread(dev, &ref);
 	if (dsw == NULL)
 		return (ENXIO);
 	dflags = 0;
 	VI_LOCK(vp);
 	if (vp->v_usecount == 1 && vcount(vp) == 1)
 		dflags |= FLASTCLOSE;
 	if (VN_IS_DOOMED(vp)) {
 		/* Forced close. */
 		dflags |= FREVOKE | FNONBLOCK;
 	} else if (dsw->d_flags & D_TRACKCLOSE) {
 		/* Keep device updated on status. */
 	} else if ((dflags & FLASTCLOSE) == 0) {
 		VI_UNLOCK(vp);
 		dev_relthread(dev, ref);
 		return (0);
 	}
 	vholdnz(vp);
 	VI_UNLOCK(vp);
 	vp_locked = VOP_ISLOCKED(vp);
 	VOP_UNLOCK(vp);
 	KASSERT(dev->si_refcount > 0,
 	    ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev)));
 	error = dsw->d_close(dev, ap->a_fflag | dflags, S_IFCHR, td);
 	dev_relthread(dev, ref);
 	vn_lock(vp, vp_locked | LK_RETRY);
 	vdrop(vp);
 	return (error);
 }
 
 static int
 devfs_close_f(struct file *fp, struct thread *td)
 {
 	int error;
 	struct file *fpop;
 
 	/*
 	 * NB: td may be NULL if this descriptor is closed due to
 	 * garbage collection from a closed UNIX domain socket.
 	 */
 	fpop = curthread->td_fpop;
 	curthread->td_fpop = fp;
 	error = vnops.fo_close(fp, td);
 	curthread->td_fpop = fpop;
 
 	/*
 	 * The f_cdevpriv cannot be assigned non-NULL value while we
 	 * are destroying the file.
 	 */
 	if (fp->f_cdevpriv != NULL)
 		devfs_fpdrop(fp);
 	return (error);
 }
 
 static int
 devfs_getattr(struct vop_getattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
 	struct devfs_dirent *de;
 	struct devfs_mount *dmp;
 	struct cdev *dev;
 	struct timeval boottime;
 	int error;
 
 	error = devfs_populate_vp(vp);
 	if (error != 0)
 		return (error);
 
 	dmp = VFSTODEVFS(vp->v_mount);
 	sx_xunlock(&dmp->dm_lock);
 
 	de = vp->v_data;
 	KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp));
 	if (vp->v_type == VDIR) {
 		de = de->de_dir;
 		KASSERT(de != NULL,
 		    ("Null dir dirent in devfs_getattr vp=%p", vp));
 	}
 	vap->va_uid = de->de_uid;
 	vap->va_gid = de->de_gid;
 	vap->va_mode = de->de_mode;
 	if (vp->v_type == VLNK)
 		vap->va_size = strlen(de->de_symlink);
 	else if (vp->v_type == VDIR)
 		vap->va_size = vap->va_bytes = DEV_BSIZE;
 	else
 		vap->va_size = 0;
 	if (vp->v_type != VDIR)
 		vap->va_bytes = 0;
 	vap->va_blocksize = DEV_BSIZE;
 	vap->va_type = vp->v_type;
 
 	getboottime(&boottime);
 #define fix(aa)							\
 	do {							\
 		if ((aa).tv_sec <= 3600) {			\
 			(aa).tv_sec = boottime.tv_sec;		\
 			(aa).tv_nsec = boottime.tv_usec * 1000; \
 		}						\
 	} while (0)
 
 	if (vp->v_type != VCHR)  {
 		fix(de->de_atime);
 		vap->va_atime = de->de_atime;
 		fix(de->de_mtime);
 		vap->va_mtime = de->de_mtime;
 		fix(de->de_ctime);
 		vap->va_ctime = de->de_ctime;
 	} else {
 		dev = vp->v_rdev;
 		fix(dev->si_atime);
 		vap->va_atime = dev->si_atime;
 		fix(dev->si_mtime);
 		vap->va_mtime = dev->si_mtime;
 		fix(dev->si_ctime);
 		vap->va_ctime = dev->si_ctime;
 
 		vap->va_rdev = cdev2priv(dev)->cdp_inode;
 	}
 	vap->va_gen = 0;
 	vap->va_flags = 0;
 	vap->va_filerev = 0;
 	vap->va_nlink = de->de_links;
 	vap->va_fileid = de->de_inode;
 
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td)
 {
 	struct file *fpop;
 	int error;
 
 	fpop = td->td_fpop;
 	td->td_fpop = fp;
 	error = vnops.fo_ioctl(fp, com, data, cred, td);
 	td->td_fpop = fpop;
 	return (error);
 }
 
 void *
 fiodgname_buf_get_ptr(void *fgnp, u_long com)
 {
 	union {
 		struct fiodgname_arg	fgn;
 #ifdef COMPAT_FREEBSD32
 		struct fiodgname_arg32	fgn32;
 #endif
 	} *fgnup;
 
 	fgnup = fgnp;
 	switch (com) {
 	case FIODGNAME:
 		return (fgnup->fgn.buf);
 #ifdef COMPAT_FREEBSD32
 	case FIODGNAME_32:
 		return ((void *)(uintptr_t)fgnup->fgn32.buf);
 #endif
 	default:
 		panic("Unhandled ioctl command %ld", com);
 	}
 }
 
 static int
 devfs_ioctl(struct vop_ioctl_args *ap)
 {
 	struct fiodgname_arg *fgn;
 	struct vnode *vpold, *vp;
 	struct cdevsw *dsw;
 	struct thread *td;
 	struct cdev *dev;
 	int error, ref, i;
 	const char *p;
 	u_long com;
 
 	vp = ap->a_vp;
 	com = ap->a_command;
 	td = ap->a_td;
 
 	dsw = devvn_refthread(vp, &dev, &ref);
 	if (dsw == NULL)
 		return (ENXIO);
 	KASSERT(dev->si_refcount > 0,
 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(dev)));
 
 	switch (com) {
 	case FIODTYPE:
 		*(int *)ap->a_data = dsw->d_flags & D_TYPEMASK;
 		error = 0;
 		break;
 	case FIODGNAME:
 #ifdef	COMPAT_FREEBSD32
 	case FIODGNAME_32:
 #endif
 		fgn = ap->a_data;
 		p = devtoname(dev);
 		i = strlen(p) + 1;
 		if (i > fgn->len)
 			error = EINVAL;
 		else
 			error = copyout(p, fiodgname_buf_get_ptr(fgn, com), i);
 		break;
 	default:
 		error = dsw->d_ioctl(dev, com, ap->a_data, ap->a_fflag, td);
 	}
 
 	dev_relthread(dev, ref);
 	if (error == ENOIOCTL)
 		error = ENOTTY;
 
 	if (error == 0 && com == TIOCSCTTY) {
 		/*
 		 * Do nothing if reassigning same control tty, or if the
 		 * control tty has already disappeared.  If it disappeared,
 		 * it's because we were racing with TIOCNOTTY.  TIOCNOTTY
 		 * already took care of releasing the old vnode and we have
 		 * nothing left to do.
 		 */
 		sx_slock(&proctree_lock);
 		if (td->td_proc->p_session->s_ttyvp == vp ||
 		    td->td_proc->p_session->s_ttyp == NULL) {
 			sx_sunlock(&proctree_lock);
 			return (0);
 		}
 
 		vpold = td->td_proc->p_session->s_ttyvp;
 		VREF(vp);
 		SESS_LOCK(td->td_proc->p_session);
 		td->td_proc->p_session->s_ttyvp = vp;
 		td->td_proc->p_session->s_ttydp = cdev2priv(dev);
 		SESS_UNLOCK(td->td_proc->p_session);
 
 		sx_sunlock(&proctree_lock);
 
 		/* Get rid of reference to old control tty */
 		if (vpold)
 			vrele(vpold);
 	}
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 devfs_kqfilter_f(struct file *fp, struct knote *kn)
 {
 	struct cdev *dev;
 	struct cdevsw *dsw;
 	int error, ref;
 	struct file *fpop;
 	struct thread *td;
 
 	td = curthread;
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error)
 		return (error);
 	error = dsw->d_kqfilter(dev, kn);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 	return (error);
 }
 
 static inline int
 devfs_prison_check(struct devfs_dirent *de, struct thread *td)
 {
 	struct cdev_priv *cdp;
 	struct ucred *dcr;
 	struct proc *p;
 	int error;
 
 	cdp = de->de_cdp;
 	if (cdp == NULL)
 		return (0);
 	dcr = cdp->cdp_c.si_cred;
 	if (dcr == NULL)
 		return (0);
 
 	error = prison_check(td->td_ucred, dcr);
 	if (error == 0)
 		return (0);
 	/* We do, however, allow access to the controlling terminal */
 	p = td->td_proc;
 	PROC_LOCK(p);
 	if (!(p->p_flag & P_CONTROLT)) {
 		PROC_UNLOCK(p);
 		return (error);
 	}
 	if (p->p_session->s_ttydp == cdp)
 		error = 0;
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 static int
 devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock)
 {
 	struct componentname *cnp;
 	struct vnode *dvp, **vpp;
 	struct thread *td;
 	struct devfs_dirent *de, *dd;
 	struct devfs_dirent **dde;
 	struct devfs_mount *dmp;
 	struct mount *mp;
 	struct cdev *cdev;
 	int error, flags, nameiop, dvplocked;
 	char specname[SPECNAMELEN + 1], *pname;
 
 	cnp = ap->a_cnp;
 	vpp = ap->a_vpp;
 	dvp = ap->a_dvp;
 	pname = cnp->cn_nameptr;
 	td = cnp->cn_thread;
 	flags = cnp->cn_flags;
 	nameiop = cnp->cn_nameiop;
 	mp = dvp->v_mount;
 	dmp = VFSTODEVFS(mp);
 	dd = dvp->v_data;
 	*vpp = NULLVP;
 
 	if ((flags & ISLASTCN) && nameiop == RENAME)
 		return (EOPNOTSUPP);
 
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 
 	if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT))
 		return (EIO);
 
 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td);
 	if (error)
 		return (error);
 
 	if (cnp->cn_namelen == 1 && *pname == '.') {
 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
 			return (EINVAL);
 		*vpp = dvp;
 		VREF(dvp);
 		return (0);
 	}
 
 	if (flags & ISDOTDOT) {
 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
 			return (EINVAL);
 		de = devfs_parent_dirent(dd);
 		if (de == NULL)
 			return (ENOENT);
 		dvplocked = VOP_ISLOCKED(dvp);
 		VOP_UNLOCK(dvp);
 		error = devfs_allocv(de, mp, cnp->cn_lkflags & LK_TYPE_MASK,
 		    vpp);
 		*dm_unlock = 0;
 		vn_lock(dvp, dvplocked | LK_RETRY);
 		return (error);
 	}
 
 	dd = dvp->v_data;
 	de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen, 0);
 	while (de == NULL) {	/* While(...) so we can use break */
 
 		if (nameiop == DELETE)
 			return (ENOENT);
 
 		/*
 		 * OK, we didn't have an entry for the name we were asked for
 		 * so we try to see if anybody can create it on demand.
 		 */
 		pname = devfs_fqpn(specname, dmp, dd, cnp);
 		if (pname == NULL)
 			break;
 
 		cdev = NULL;
 		DEVFS_DMP_HOLD(dmp);
 		sx_xunlock(&dmp->dm_lock);
 		sx_slock(&clone_drain_lock);
 		EVENTHANDLER_INVOKE(dev_clone,
 		    td->td_ucred, pname, strlen(pname), &cdev);
 		sx_sunlock(&clone_drain_lock);
 
 		if (cdev == NULL)
 			sx_xlock(&dmp->dm_lock);
 		else if (devfs_populate_vp(dvp) != 0) {
 			*dm_unlock = 0;
 			sx_xlock(&dmp->dm_lock);
 			if (DEVFS_DMP_DROP(dmp)) {
 				sx_xunlock(&dmp->dm_lock);
 				devfs_unmount_final(dmp);
 			} else
 				sx_xunlock(&dmp->dm_lock);
 			dev_rel(cdev);
 			return (ENOENT);
 		}
 		if (DEVFS_DMP_DROP(dmp)) {
 			*dm_unlock = 0;
 			sx_xunlock(&dmp->dm_lock);
 			devfs_unmount_final(dmp);
 			if (cdev != NULL)
 				dev_rel(cdev);
 			return (ENOENT);
 		}
 
 		if (cdev == NULL)
 			break;
 
 		dev_lock();
 		dde = &cdev2priv(cdev)->cdp_dirents[dmp->dm_idx];
 		if (dde != NULL && *dde != NULL)
 			de = *dde;
 		dev_unlock();
 		dev_rel(cdev);
 		break;
 	}
 
 	if (de == NULL || de->de_flags & DE_WHITEOUT) {
 		if ((nameiop == CREATE || nameiop == RENAME) &&
 		    (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) {
 			cnp->cn_flags |= SAVENAME;
 			return (EJUSTRETURN);
 		}
 		return (ENOENT);
 	}
 
 	if (devfs_prison_check(de, td))
 		return (ENOENT);
 
 	if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) {
 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
 		if (error)
 			return (error);
 		if (*vpp == dvp) {
 			VREF(dvp);
 			*vpp = dvp;
 			return (0);
 		}
 	}
 	error = devfs_allocv(de, mp, cnp->cn_lkflags & LK_TYPE_MASK, vpp);
 	*dm_unlock = 0;
 	return (error);
 }
 
 static int
 devfs_lookup(struct vop_lookup_args *ap)
 {
 	int j;
 	struct devfs_mount *dmp;
 	int dm_unlock;
 
 	if (devfs_populate_vp(ap->a_dvp) != 0)
 		return (ENOTDIR);
 
 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
 	dm_unlock = 1;
 	j = devfs_lookupx(ap, &dm_unlock);
 	if (dm_unlock == 1)
 		sx_xunlock(&dmp->dm_lock);
 	return (j);
 }
 
 static int
 devfs_mknod(struct vop_mknod_args *ap)
 {
 	struct componentname *cnp;
 	struct vnode *dvp, **vpp;
 	struct devfs_dirent *dd, *de;
 	struct devfs_mount *dmp;
 	int error;
 
 	/*
 	 * The only type of node we should be creating here is a
 	 * character device, for anything else return EOPNOTSUPP.
 	 */
 	if (ap->a_vap->va_type != VCHR)
 		return (EOPNOTSUPP);
 	dvp = ap->a_dvp;
 	dmp = VFSTODEVFS(dvp->v_mount);
 
 	cnp = ap->a_cnp;
 	vpp = ap->a_vpp;
 	dd = dvp->v_data;
 
 	error = ENOENT;
 	sx_xlock(&dmp->dm_lock);
 	TAILQ_FOREACH(de, &dd->de_dlist, de_list) {
 		if (cnp->cn_namelen != de->de_dirent->d_namlen)
 			continue;
 		if (de->de_dirent->d_type == DT_CHR &&
 		    (de->de_cdp->cdp_flags & CDP_ACTIVE) == 0)
 			continue;
 		if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name,
 		    de->de_dirent->d_namlen) != 0)
 			continue;
 		if (de->de_flags & DE_WHITEOUT)
 			break;
 		goto notfound;
 	}
 	if (de == NULL)
 		goto notfound;
 	de->de_flags &= ~DE_WHITEOUT;
 	error = devfs_allocv(de, dvp->v_mount, LK_EXCLUSIVE, vpp);
 	return (error);
 notfound:
 	sx_xunlock(&dmp->dm_lock);
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 devfs_open(struct vop_open_args *ap)
 {
 	struct thread *td = ap->a_td;
 	struct vnode *vp = ap->a_vp;
 	struct cdev *dev = vp->v_rdev;
 	struct file *fp = ap->a_fp;
 	int error, ref, vlocked;
 	struct cdevsw *dsw;
 	struct file *fpop;
 
 	if (vp->v_type == VBLK)
 		return (ENXIO);
 
 	if (dev == NULL)
 		return (ENXIO);
 
 	/* Make this field valid before any I/O in d_open. */
 	if (dev->si_iosize_max == 0)
 		dev->si_iosize_max = DFLTPHYS;
 
 	dsw = dev_refthread(dev, &ref);
 	if (dsw == NULL)
 		return (ENXIO);
 	if (fp == NULL && dsw->d_fdopen != NULL) {
 		dev_relthread(dev, ref);
 		return (ENXIO);
 	}
 
 	vlocked = VOP_ISLOCKED(vp);
 	VOP_UNLOCK(vp);
 
 	fpop = td->td_fpop;
 	td->td_fpop = fp;
 	if (fp != NULL) {
 		fp->f_data = dev;
 		fp->f_vnode = vp;
 	}
 	if (dsw->d_fdopen != NULL)
 		error = dsw->d_fdopen(dev, ap->a_mode, td, fp);
 	else
 		error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
 	/* Clean up any cdevpriv upon error. */
 	if (error != 0)
 		devfs_clear_cdevpriv();
 	td->td_fpop = fpop;
 
 	vn_lock(vp, vlocked | LK_RETRY);
 	dev_relthread(dev, ref);
 	if (error != 0) {
 		if (error == ERESTART)
 			error = EINTR;
 		return (error);
 	}
 
 #if 0	/* /dev/console */
 	KASSERT(fp != NULL, ("Could not vnode bypass device on NULL fp"));
 #else
 	if (fp == NULL)
 		return (error);
 #endif
 	if (fp->f_ops == &badfileops)
 		finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f);
 	return (error);
 }
 
 static int
 devfs_pathconf(struct vop_pathconf_args *ap)
 {
 
 	switch (ap->a_name) {
 	case _PC_FILESIZEBITS:
 		*ap->a_retval = 64;
 		return (0);
 	case _PC_NAME_MAX:
 		*ap->a_retval = NAME_MAX;
 		return (0);
 	case _PC_LINK_MAX:
 		*ap->a_retval = INT_MAX;
 		return (0);
 	case _PC_SYMLINK_MAX:
 		*ap->a_retval = MAXPATHLEN;
 		return (0);
 	case _PC_MAX_CANON:
 		if (ap->a_vp->v_vflag & VV_ISTTY) {
 			*ap->a_retval = MAX_CANON;
 			return (0);
 		}
 		return (EINVAL);
 	case _PC_MAX_INPUT:
 		if (ap->a_vp->v_vflag & VV_ISTTY) {
 			*ap->a_retval = MAX_INPUT;
 			return (0);
 		}
 		return (EINVAL);
 	case _PC_VDISABLE:
 		if (ap->a_vp->v_vflag & VV_ISTTY) {
 			*ap->a_retval = _POSIX_VDISABLE;
 			return (0);
 		}
 		return (EINVAL);
 	case _PC_MAC_PRESENT:
 #ifdef MAC
 		/*
 		 * If MAC is enabled, devfs automatically supports
 		 * trivial non-persistant label storage.
 		 */
 		*ap->a_retval = 1;
 #else
 		*ap->a_retval = 0;
 #endif
 		return (0);
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		return (0);
 	default:
 		return (vop_stdpathconf(ap));
 	}
 	/* NOTREACHED */
 }
 
 /* ARGSUSED */
 static int
 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
 {
 	struct cdev *dev;
 	struct cdevsw *dsw;
 	int error, ref;
 	struct file *fpop;
 
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error != 0) {
 		error = vnops.fo_poll(fp, events, cred, td);
 		return (error);
 	}
 	error = dsw->d_poll(dev, events, td);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 	return(error);
 }
 
 /*
  * Print out the contents of a special device vnode.
  */
 static int
 devfs_print(struct vop_print_args *ap)
 {
 
 	printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev));
 	return (0);
 }
 
 static int
 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred,
     int flags, struct thread *td)
 {
 	struct cdev *dev;
 	int ioflag, error, ref;
 	ssize_t resid;
 	struct cdevsw *dsw;
 	struct file *fpop;
 
 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
 		return (EINVAL);
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error != 0) {
 		error = vnops.fo_read(fp, uio, cred, flags, td);
 		return (error);
 	}
 	resid = uio->uio_resid;
 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT);
 	if (ioflag & O_DIRECT)
 		ioflag |= IO_DIRECT;
 
 	foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
 	error = dsw->d_read(dev, uio, ioflag);
 	if (uio->uio_resid != resid || (error == 0 && resid != 0))
 		devfs_timestamp(&dev->si_atime);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 
 	foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF);
 	return (error);
 }
 
 static int
 devfs_readdir(struct vop_readdir_args *ap)
 {
 	int error;
 	struct uio *uio;
 	struct dirent *dp;
 	struct devfs_dirent *dd;
 	struct devfs_dirent *de;
 	struct devfs_mount *dmp;
 	off_t off;
 	int *tmp_ncookies = NULL;
 
 	if (ap->a_vp->v_type != VDIR)
 		return (ENOTDIR);
 
 	uio = ap->a_uio;
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
 	/*
 	 * XXX: This is a temporary hack to get around this filesystem not
 	 * supporting cookies. We store the location of the ncookies pointer
 	 * in a temporary variable before calling vfs_subr.c:vfs_read_dirent()
 	 * and set the number of cookies to 0. We then set the pointer to
 	 * NULL so that vfs_read_dirent doesn't try to call realloc() on 
 	 * ap->a_cookies. Later in this function, we restore the ap->a_ncookies
 	 * pointer to its original location before returning to the caller.
 	 */
 	if (ap->a_ncookies != NULL) {
 		tmp_ncookies = ap->a_ncookies;
 		*ap->a_ncookies = 0;
 		ap->a_ncookies = NULL;
 	}
 
 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
 	if (devfs_populate_vp(ap->a_vp) != 0) {
 		if (tmp_ncookies != NULL)
 			ap->a_ncookies = tmp_ncookies;
 		return (EIO);
 	}
 	error = 0;
 	de = ap->a_vp->v_data;
 	off = 0;
 	TAILQ_FOREACH(dd, &de->de_dlist, de_list) {
 		KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__));
 		if (dd->de_flags & (DE_COVERED | DE_WHITEOUT))
 			continue;
 		if (devfs_prison_check(dd, uio->uio_td))
 			continue;
 		if (dd->de_dirent->d_type == DT_DIR)
 			de = dd->de_dir;
 		else
 			de = dd;
 		dp = dd->de_dirent;
 		MPASS(dp->d_reclen == GENERIC_DIRSIZ(dp));
 		if (dp->d_reclen > uio->uio_resid)
 			break;
 		dp->d_fileno = de->de_inode;
 		/* NOTE: d_off is the offset for the *next* entry. */
 		dp->d_off = off + dp->d_reclen;
 		if (off >= uio->uio_offset) {
 			error = vfs_read_dirent(ap, dp, off);
 			if (error)
 				break;
 		}
 		off += dp->d_reclen;
 	}
 	sx_xunlock(&dmp->dm_lock);
 	uio->uio_offset = off;
 
 	/*
 	 * Restore ap->a_ncookies if it wasn't originally NULL in the first
 	 * place.
 	 */
 	if (tmp_ncookies != NULL)
 		ap->a_ncookies = tmp_ncookies;
 
 	return (error);
 }
 
 static int
 devfs_readlink(struct vop_readlink_args *ap)
 {
 	struct devfs_dirent *de;
 
 	de = ap->a_vp->v_data;
 	return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio));
 }
 
 static int
 devfs_reclaim(struct vop_reclaim_args *ap)
 {
 	struct vnode *vp;
 	struct devfs_dirent *de;
 
 	vp = ap->a_vp;
 	mtx_lock(&devfs_de_interlock);
 	de = vp->v_data;
 	if (de != NULL) {
 		de->de_vnode = NULL;
 		vp->v_data = NULL;
 	}
 	mtx_unlock(&devfs_de_interlock);
 	return (0);
 }
 
 static int
 devfs_reclaim_vchr(struct vop_reclaim_args *ap)
 {
 	struct vnode *vp;
 	struct cdev *dev;
 
 	vp = ap->a_vp;
 	MPASS(vp->v_type == VCHR);
 
 	devfs_reclaim(ap);
 
 	VI_LOCK(vp);
 	dev_lock();
 	dev = vp->v_rdev;
 	vp->v_rdev = NULL;
 	if (dev != NULL)
 		dev->si_usecount -= (vp->v_usecount > 0);
 	dev_unlock();
 	VI_UNLOCK(vp);
 	if (dev != NULL)
 		dev_rel(dev);
 	return (0);
 }
 
 static int
 devfs_remove(struct vop_remove_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode *vp = ap->a_vp;
 	struct devfs_dirent *dd;
 	struct devfs_dirent *de, *de_covered;
 	struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount);
 
 	ASSERT_VOP_ELOCKED(dvp, "devfs_remove");
 	ASSERT_VOP_ELOCKED(vp, "devfs_remove");
 
 	sx_xlock(&dmp->dm_lock);
 	dd = ap->a_dvp->v_data;
 	de = vp->v_data;
 	if (de->de_cdp == NULL) {
 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
 		if (de->de_dirent->d_type == DT_LNK) {
 			de_covered = devfs_find(dd, de->de_dirent->d_name,
 			    de->de_dirent->d_namlen, 0);
 			if (de_covered != NULL)
 				de_covered->de_flags &= ~DE_COVERED;
 		}
 		/* We need to unlock dvp because devfs_delete() may lock it. */
 		VOP_UNLOCK(vp);
 		if (dvp != vp)
 			VOP_UNLOCK(dvp);
 		devfs_delete(dmp, de, 0);
 		sx_xunlock(&dmp->dm_lock);
 		if (dvp != vp)
 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	} else {
 		de->de_flags |= DE_WHITEOUT;
 		sx_xunlock(&dmp->dm_lock);
 	}
 	return (0);
 }
 
 /*
  * Revoke is called on a tty when a terminal session ends.  The vnode
  * is orphaned by setting v_op to deadfs so we need to let go of it
  * as well so that we create a new one next time around.
  *
  */
 static int
 devfs_revoke(struct vop_revoke_args *ap)
 {
 	struct vnode *vp = ap->a_vp, *vp2;
 	struct cdev *dev;
 	struct cdev_priv *cdp;
 	struct devfs_dirent *de;
 	u_int i;
 
 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL"));
 
 	dev = vp->v_rdev;
 	cdp = cdev2priv(dev);
  
 	dev_lock();
 	cdp->cdp_inuse++;
 	dev_unlock();
 
 	vhold(vp);
 	vgone(vp);
 	vdrop(vp);
 
 	VOP_UNLOCK(vp);
  loop:
 	for (;;) {
 		mtx_lock(&devfs_de_interlock);
 		dev_lock();
 		vp2 = NULL;
 		for (i = 0; i <= cdp->cdp_maxdirent; i++) {
 			de = cdp->cdp_dirents[i];
 			if (de == NULL)
 				continue;
 
 			vp2 = de->de_vnode;
 			if (vp2 != NULL) {
 				dev_unlock();
 				VI_LOCK(vp2);
 				mtx_unlock(&devfs_de_interlock);
 				if (vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK,
 				    curthread))
 					goto loop;
 				vhold(vp2);
 				vgone(vp2);
 				vdrop(vp2);
 				vput(vp2);
 				break;
 			} 
 		}
 		if (vp2 != NULL) {
 			continue;
 		}
 		dev_unlock();
 		mtx_unlock(&devfs_de_interlock);
 		break;
 	}
 	dev_lock();
 	cdp->cdp_inuse--;
 	if (!(cdp->cdp_flags & CDP_ACTIVE) && cdp->cdp_inuse == 0) {
 		TAILQ_REMOVE(&cdevp_list, cdp, cdp_list);
 		dev_unlock();
 		dev_rel(&cdp->cdp_c);
 	} else
 		dev_unlock();
 
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	return (0);
 }
 
 static int
 devfs_rioctl(struct vop_ioctl_args *ap)
 {
 	struct vnode *vp;
 	struct devfs_mount *dmp;
 	int error;
 
 	vp = ap->a_vp;
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	if (VN_IS_DOOMED(vp)) {
 		VOP_UNLOCK(vp);
 		return (EBADF);
 	}
 	dmp = VFSTODEVFS(vp->v_mount);
 	sx_xlock(&dmp->dm_lock);
 	VOP_UNLOCK(vp);
 	DEVFS_DMP_HOLD(dmp);
 	devfs_populate(dmp);
 	if (DEVFS_DMP_DROP(dmp)) {
 		sx_xunlock(&dmp->dm_lock);
 		devfs_unmount_final(dmp);
 		return (ENOENT);
 	}
 	error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td);
 	sx_xunlock(&dmp->dm_lock);
 	return (error);
 }
 
 static int
 devfs_rread(struct vop_read_args *ap)
 {
 
 	if (ap->a_vp->v_type != VDIR)
 		return (EINVAL);
 	return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL));
 }
 
 static int
 devfs_setattr(struct vop_setattr_args *ap)
 {
 	struct devfs_dirent *de;
 	struct vattr *vap;
 	struct vnode *vp;
 	struct thread *td;
 	int c, error;
 	uid_t uid;
 	gid_t gid;
 
 	vap = ap->a_vap;
 	vp = ap->a_vp;
 	td = curthread;
 	if ((vap->va_type != VNON) ||
 	    (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) ||
 	    (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) ||
 	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
 	    (vap->va_rdev != VNOVAL) ||
 	    ((int)vap->va_bytes != VNOVAL) ||
 	    (vap->va_gen != VNOVAL)) {
 		return (EINVAL);
 	}
 
 	error = devfs_populate_vp(vp);
 	if (error != 0)
 		return (error);
 
 	de = vp->v_data;
 	if (vp->v_type == VDIR)
 		de = de->de_dir;
 
 	c = 0;
 	if (vap->va_uid == (uid_t)VNOVAL)
 		uid = de->de_uid;
 	else
 		uid = vap->va_uid;
 	if (vap->va_gid == (gid_t)VNOVAL)
 		gid = de->de_gid;
 	else
 		gid = vap->va_gid;
 	if (uid != de->de_uid || gid != de->de_gid) {
 		if ((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid ||
 		    (gid != de->de_gid && !groupmember(gid, ap->a_cred))) {
 			error = priv_check(td, PRIV_VFS_CHOWN);
 			if (error != 0)
 				goto ret;
 		}
 		de->de_uid = uid;
 		de->de_gid = gid;
 		c = 1;
 	}
 
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (ap->a_cred->cr_uid != de->de_uid) {
 			error = priv_check(td, PRIV_VFS_ADMIN);
 			if (error != 0)
 				goto ret;
 		}
 		de->de_mode = vap->va_mode;
 		c = 1;
 	}
 
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		error = vn_utimes_perm(vp, vap, ap->a_cred, td);
 		if (error != 0)
 			goto ret;
 		if (vap->va_atime.tv_sec != VNOVAL) {
 			if (vp->v_type == VCHR)
 				vp->v_rdev->si_atime = vap->va_atime;
 			else
 				de->de_atime = vap->va_atime;
 		}
 		if (vap->va_mtime.tv_sec != VNOVAL) {
 			if (vp->v_type == VCHR)
 				vp->v_rdev->si_mtime = vap->va_mtime;
 			else
 				de->de_mtime = vap->va_mtime;
 		}
 		c = 1;
 	}
 
 	if (c) {
 		if (vp->v_type == VCHR)
 			vfs_timestamp(&vp->v_rdev->si_ctime);
 		else
 			vfs_timestamp(&de->de_mtime);
 	}
 
 ret:
 	sx_xunlock(&VFSTODEVFS(vp->v_mount)->dm_lock);
 	return (error);
 }
 
 #ifdef MAC
 static int
 devfs_setlabel(struct vop_setlabel_args *ap)
 {
 	struct vnode *vp;
 	struct devfs_dirent *de;
 
 	vp = ap->a_vp;
 	de = vp->v_data;
 
 	mac_vnode_relabel(ap->a_cred, vp, ap->a_label);
 	mac_devfs_update(vp->v_mount, de, vp);
 
 	return (0);
 }
 #endif
 
 static int
 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td)
 {
 
 	return (vnops.fo_stat(fp, sb, cred, td));
 }
 
 static int
 devfs_symlink(struct vop_symlink_args *ap)
 {
 	int i, error;
 	struct devfs_dirent *dd;
 	struct devfs_dirent *de, *de_covered, *de_dotdot;
 	struct devfs_mount *dmp;
 
 	error = priv_check(curthread, PRIV_DEVFS_SYMLINK);
 	if (error)
 		return(error);
 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
 	if (devfs_populate_vp(ap->a_dvp) != 0)
 		return (ENOENT);
 
 	dd = ap->a_dvp->v_data;
 	de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen);
 	de->de_flags = DE_USER;
 	de->de_uid = 0;
 	de->de_gid = 0;
 	de->de_mode = 0755;
 	de->de_inode = alloc_unr(devfs_inos);
 	de->de_dir = dd;
 	de->de_dirent->d_type = DT_LNK;
 	i = strlen(ap->a_target) + 1;
 	de->de_symlink = malloc(i, M_DEVFS, M_WAITOK);
 	bcopy(ap->a_target, de->de_symlink, i);
 #ifdef MAC
 	mac_devfs_create_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de);
 #endif
 	de_covered = devfs_find(dd, de->de_dirent->d_name,
 	    de->de_dirent->d_namlen, 0);
 	if (de_covered != NULL) {
 		if ((de_covered->de_flags & DE_USER) != 0) {
 			devfs_delete(dmp, de, DEVFS_DEL_NORECURSE);
 			sx_xunlock(&dmp->dm_lock);
 			return (EEXIST);
 		}
 		KASSERT((de_covered->de_flags & DE_COVERED) == 0,
 		    ("devfs_symlink: entry %p already covered", de_covered));
 		de_covered->de_flags |= DE_COVERED;
 	}
 
 	de_dotdot = TAILQ_FIRST(&dd->de_dlist);		/* "." */
 	de_dotdot = TAILQ_NEXT(de_dotdot, de_list);	/* ".." */
 	TAILQ_INSERT_AFTER(&dd->de_dlist, de_dotdot, de, de_list);
 	devfs_dir_ref_de(dmp, dd);
 	devfs_rules_apply(dmp, de);
 
 	return (devfs_allocv(de, ap->a_dvp->v_mount, LK_EXCLUSIVE, ap->a_vpp));
 }
 
 static int
 devfs_truncate_f(struct file *fp, off_t length, struct ucred *cred, struct thread *td)
 {
 
 	return (vnops.fo_truncate(fp, length, cred, td));
 }
 
 static int
 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred,
     int flags, struct thread *td)
 {
 	struct cdev *dev;
 	int error, ioflag, ref;
 	ssize_t resid;
 	struct cdevsw *dsw;
 	struct file *fpop;
 
 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
 		return (EINVAL);
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error != 0) {
 		error = vnops.fo_write(fp, uio, cred, flags, td);
 		return (error);
 	}
 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td));
 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC);
 	if (ioflag & O_DIRECT)
 		ioflag |= IO_DIRECT;
 	foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
 
 	resid = uio->uio_resid;
 
 	error = dsw->d_write(dev, uio, ioflag);
 	if (uio->uio_resid != resid || (error == 0 && resid != 0)) {
 		devfs_timestamp(&dev->si_ctime);
 		dev->si_mtime = dev->si_ctime;
 	}
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 
 	foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF);
 	return (error);
 }
 
 static int
 devfs_mmap_f(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
     vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff,
     struct thread *td)
 {
 	struct cdev *dev;
 	struct cdevsw *dsw;
 	struct mount *mp;
 	struct vnode *vp;
 	struct file *fpop;
 	vm_object_t object;
 	vm_prot_t maxprot;
 	int error, ref;
 
 	vp = fp->f_vnode;
 
 	/*
 	 * Ensure that file and memory protections are
 	 * compatible.
 	 */
 	mp = vp->v_mount;
 	if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) {
 		maxprot = VM_PROT_NONE;
 		if ((prot & VM_PROT_EXECUTE) != 0)
 			return (EACCES);
 	} else
 		maxprot = VM_PROT_EXECUTE;
 	if ((fp->f_flag & FREAD) != 0)
 		maxprot |= VM_PROT_READ;
 	else if ((prot & VM_PROT_READ) != 0)
 		return (EACCES);
 
 	/*
 	 * If we are sharing potential changes via MAP_SHARED and we
 	 * are trying to get write permission although we opened it
 	 * without asking for it, bail out.
 	 *
 	 * Note that most character devices always share mappings.
 	 * The one exception is that D_MMAP_ANON devices
 	 * (i.e. /dev/zero) permit private writable mappings.
 	 *
 	 * Rely on vm_mmap_cdev() to fail invalid MAP_PRIVATE requests
 	 * as well as updating maxprot to permit writing for
 	 * D_MMAP_ANON devices rather than doing that here.
 	 */
 	if ((flags & MAP_SHARED) != 0) {
 		if ((fp->f_flag & FWRITE) != 0)
 			maxprot |= VM_PROT_WRITE;
 		else if ((prot & VM_PROT_WRITE) != 0)
 			return (EACCES);
 	}
 	maxprot &= cap_maxprot;
 
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error != 0)
 		return (error);
 
 	error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, dev, dsw, &foff,
 	    &object);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 	if (error != 0)
 		return (error);
 
 	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
 	    foff, FALSE, td);
 	if (error != 0)
 		vm_object_deallocate(object);
 	return (error);
 }
 
 dev_t
 dev2udev(struct cdev *x)
 {
 	if (x == NULL)
 		return (NODEV);
 	return (cdev2priv(x)->cdp_inode);
 }
 
 static struct fileops devfs_ops_f = {
 	.fo_read =	devfs_read_f,
 	.fo_write =	devfs_write_f,
 	.fo_truncate =	devfs_truncate_f,
 	.fo_ioctl =	devfs_ioctl_f,
 	.fo_poll =	devfs_poll_f,
 	.fo_kqfilter =	devfs_kqfilter_f,
 	.fo_stat =	devfs_stat_f,
 	.fo_close =	devfs_close_f,
 	.fo_chmod =	vn_chmod,
 	.fo_chown =	vn_chown,
 	.fo_sendfile =	vn_sendfile,
 	.fo_seek =	vn_seek,
 	.fo_fill_kinfo = vn_fill_kinfo,
 	.fo_mmap =	devfs_mmap_f,
 	.fo_flags =	DFLAG_PASSABLE | DFLAG_SEEKABLE
 };
 
 /* Vops for non-CHR vnodes in /dev. */
 static struct vop_vector devfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		devfs_access,
 	.vop_getattr =		devfs_getattr,
 	.vop_ioctl =		devfs_rioctl,
 	.vop_lookup =		devfs_lookup,
 	.vop_mknod =		devfs_mknod,
 	.vop_pathconf =		devfs_pathconf,
 	.vop_read =		devfs_rread,
 	.vop_readdir =		devfs_readdir,
 	.vop_readlink =		devfs_readlink,
 	.vop_reclaim =		devfs_reclaim,
 	.vop_remove =		devfs_remove,
 	.vop_revoke =		devfs_revoke,
 	.vop_setattr =		devfs_setattr,
 #ifdef MAC
 	.vop_setlabel =		devfs_setlabel,
 #endif
 	.vop_symlink =		devfs_symlink,
 	.vop_vptocnp =		devfs_vptocnp,
 };
 VFS_VOP_VECTOR_REGISTER(devfs_vnodeops);
 
 /* Vops for VCHR vnodes in /dev. */
 static struct vop_vector devfs_specops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		devfs_access,
 	.vop_bmap =		VOP_PANIC,
 	.vop_close =		devfs_close,
 	.vop_create =		VOP_PANIC,
 	.vop_fsync =		vop_stdfsync,
 	.vop_getattr =		devfs_getattr,
 	.vop_ioctl =		devfs_ioctl,
 	.vop_link =		VOP_PANIC,
 	.vop_mkdir =		VOP_PANIC,
 	.vop_mknod =		VOP_PANIC,
 	.vop_open =		devfs_open,
 	.vop_pathconf =		devfs_pathconf,
 	.vop_poll =		dead_poll,
 	.vop_print =		devfs_print,
 	.vop_read =		dead_read,
 	.vop_readdir =		VOP_PANIC,
 	.vop_readlink =		VOP_PANIC,
 	.vop_reallocblks =	VOP_PANIC,
 	.vop_reclaim =		devfs_reclaim_vchr,
 	.vop_remove =		devfs_remove,
 	.vop_rename =		VOP_PANIC,
 	.vop_revoke =		devfs_revoke,
 	.vop_rmdir =		VOP_PANIC,
 	.vop_setattr =		devfs_setattr,
 #ifdef MAC
 	.vop_setlabel =		devfs_setlabel,
 #endif
 	.vop_strategy =		VOP_PANIC,
 	.vop_symlink =		VOP_PANIC,
 	.vop_vptocnp =		devfs_vptocnp,
 	.vop_write =		dead_write,
 };
 VFS_VOP_VECTOR_REGISTER(devfs_specops);
 
 /*
  * Our calling convention to the device drivers used to be that we passed
  * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_ 
  * flags instead since that's what open(), close() and ioctl() takes and
  * we don't really want vnode.h in device drivers.
  * We solved the source compatibility by redefining some vnode flags to
  * be the same as the fcntl ones and by sending down the bitwise OR of
  * the respective fcntl/vnode flags.  These CTASSERTS make sure nobody
  * pulls the rug out under this.
  */
 CTASSERT(O_NONBLOCK == IO_NDELAY);
 CTASSERT(O_FSYNC == IO_SYNC);
Index: projects/clang1000-import/sys/fs/pseudofs/pseudofs_vnops.c
===================================================================
--- projects/clang1000-import/sys/fs/pseudofs/pseudofs_vnops.c	(revision 357389)
+++ projects/clang1000-import/sys/fs/pseudofs/pseudofs_vnops.c	(revision 357390)
@@ -1,1098 +1,1098 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2001 Dag-Erling Coïdan Smørgrav
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_pseudofs.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/ctype.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 
 #include <fs/pseudofs/pseudofs.h>
 #include <fs/pseudofs/pseudofs_internal.h>
 
 #define KASSERT_PN_IS_DIR(pn)						\
 	KASSERT((pn)->pn_type == pfstype_root ||			\
 	    (pn)->pn_type == pfstype_dir ||				\
 	    (pn)->pn_type == pfstype_procdir,				\
 	    ("%s(): VDIR vnode refers to non-directory pfs_node", __func__))
 
 #define KASSERT_PN_IS_FILE(pn)						\
 	KASSERT((pn)->pn_type == pfstype_file,				\
 	    ("%s(): VREG vnode refers to non-file pfs_node", __func__))
 
 #define KASSERT_PN_IS_LINK(pn)						\
 	KASSERT((pn)->pn_type == pfstype_symlink,			\
 	    ("%s(): VLNK vnode refers to non-link pfs_node", __func__))
 
 /*
  * Returns the fileno, adjusted for target pid
  */
 static uint32_t
 pn_fileno(struct pfs_node *pn, pid_t pid)
 {
 
 	KASSERT(pn->pn_fileno > 0,
 	    ("%s(): no fileno allocated", __func__));
 	if (pid != NO_PID)
 		return (pn->pn_fileno * NO_PID + pid);
 	return (pn->pn_fileno);
 }
 
 /*
  * Returns non-zero if given file is visible to given thread.
  */
 static int
 pfs_visible_proc(struct thread *td, struct pfs_node *pn, struct proc *proc)
 {
 	int visible;
 
 	if (proc == NULL)
 		return (0);
 
 	PROC_LOCK_ASSERT(proc, MA_OWNED);
 
 	visible = ((proc->p_flag & P_WEXIT) == 0);
 	if (visible)
 		visible = (p_cansee(td, proc) == 0);
 	if (visible && pn->pn_vis != NULL)
 		visible = pn_vis(td, proc, pn);
 	if (!visible)
 		return (0);
 	return (1);
 }
 
 static int
 pfs_visible(struct thread *td, struct pfs_node *pn, pid_t pid,
     struct proc **p)
 {
 	struct proc *proc;
 
 	PFS_TRACE(("%s (pid: %d, req: %d)",
 	    pn->pn_name, pid, td->td_proc->p_pid));
 
 	if (p)
 		*p = NULL;
 	if (pid == NO_PID)
 		PFS_RETURN (1);
 	proc = pfind(pid);
 	if (proc == NULL)
 		PFS_RETURN (0);
 	if (pfs_visible_proc(td, pn, proc)) {
 		if (p)
 			*p = proc;
 		else
 			PROC_UNLOCK(proc);
 		PFS_RETURN (1);
 	}
 	PROC_UNLOCK(proc);
 	PFS_RETURN (0);
 }
 
 static int
 pfs_lookup_proc(pid_t pid, struct proc **p)
 {
 	struct proc *proc;
 
 	proc = pfind(pid);
 	if (proc == NULL)
 		return (0);
 	if ((proc->p_flag & P_WEXIT) != 0) {
 		PROC_UNLOCK(proc);
 		return (0);
 	}
 	_PHOLD(proc);
 	PROC_UNLOCK(proc);
 	*p = proc;
 	return (1);
 }
 
 /*
  * Verify permissions
  */
 static int
 pfs_access(struct vop_access_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct vattr vattr;
 	int error;
 
 	PFS_TRACE(("%s", pvd->pvd_pn->pn_name));
 	(void)pvd;
 
 	error = VOP_GETATTR(vn, &vattr, va->a_cred);
 	if (error)
 		PFS_RETURN (error);
 	error = vaccess(vn->v_type, vattr.va_mode, vattr.va_uid,
 	    vattr.va_gid, va->a_accmode, va->a_cred, NULL);
 	PFS_RETURN (error);
 }
 
 /*
  * Close a file or directory
  */
 static int
 pfs_close(struct vop_close_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct proc *proc;
 	int error;
 
 	PFS_TRACE(("%s", pn->pn_name));
 	pfs_assert_not_owned(pn);
 
 	/*
 	 * Do nothing unless this is the last close and the node has a
 	 * last-close handler.
 	 */
 	if (vrefcnt(vn) > 1 || pn->pn_close == NULL)
 		PFS_RETURN (0);
 
 	if (pvd->pvd_pid != NO_PID) {
 		proc = pfind(pvd->pvd_pid);
 	} else {
 		proc = NULL;
 	}
 
 	error = pn_close(va->a_td, proc, pn);
 
 	if (proc != NULL)
 		PROC_UNLOCK(proc);
 
 	PFS_RETURN (error);
 }
 
 /*
  * Get file attributes
  */
 static int
 pfs_getattr(struct vop_getattr_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct vattr *vap = va->a_vap;
 	struct proc *proc;
 	int error = 0;
 
 	PFS_TRACE(("%s", pn->pn_name));
 	pfs_assert_not_owned(pn);
 
 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
 		PFS_RETURN (ENOENT);
 
 	vap->va_type = vn->v_type;
 	vap->va_fileid = pn_fileno(pn, pvd->pvd_pid);
 	vap->va_flags = 0;
 	vap->va_blocksize = PAGE_SIZE;
 	vap->va_bytes = vap->va_size = 0;
 	vap->va_filerev = 0;
 	vap->va_fsid = vn->v_mount->mnt_stat.f_fsid.val[0];
 	vap->va_nlink = 1;
 	nanotime(&vap->va_ctime);
 	vap->va_atime = vap->va_mtime = vap->va_ctime;
 
 	switch (pn->pn_type) {
 	case pfstype_procdir:
 	case pfstype_root:
 	case pfstype_dir:
 #if 0
 		pfs_lock(pn);
 		/* compute link count */
 		pfs_unlock(pn);
 #endif
 		vap->va_mode = 0555;
 		break;
 	case pfstype_file:
 	case pfstype_symlink:
 		vap->va_mode = 0444;
 		break;
 	default:
 		printf("shouldn't be here!\n");
 		vap->va_mode = 0;
 		break;
 	}
 
 	if (proc != NULL) {
 		vap->va_uid = proc->p_ucred->cr_ruid;
 		vap->va_gid = proc->p_ucred->cr_rgid;
 	} else {
 		vap->va_uid = 0;
 		vap->va_gid = 0;
 	}
 
 	if (pn->pn_attr != NULL)
 		error = pn_attr(curthread, proc, pn, vap);
 
 	if(proc != NULL)
 		PROC_UNLOCK(proc);
 
 	PFS_RETURN (error);
 }
 
 /*
  * Perform an ioctl
  */
 static int
 pfs_ioctl(struct vop_ioctl_args *va)
 {
 	struct vnode *vn;
 	struct pfs_vdata *pvd;
 	struct pfs_node *pn;
 	struct proc *proc;
 	int error;
 
 	vn = va->a_vp;
 	vn_lock(vn, LK_SHARED | LK_RETRY);
 	if (VN_IS_DOOMED(vn)) {
 		VOP_UNLOCK(vn);
 		return (EBADF);
 	}
 	pvd = vn->v_data;
 	pn = pvd->pvd_pn;
 
 	PFS_TRACE(("%s: %lx", pn->pn_name, va->a_command));
 	pfs_assert_not_owned(pn);
 
 	if (vn->v_type != VREG) {
 		VOP_UNLOCK(vn);
 		PFS_RETURN (EINVAL);
 	}
 	KASSERT_PN_IS_FILE(pn);
 
 	if (pn->pn_ioctl == NULL) {
 		VOP_UNLOCK(vn);
 		PFS_RETURN (ENOTTY);
 	}
 
 	/*
 	 * This is necessary because process' privileges may
 	 * have changed since the open() call.
 	 */
 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) {
 		VOP_UNLOCK(vn);
 		PFS_RETURN (EIO);
 	}
 
 	error = pn_ioctl(curthread, proc, pn, va->a_command, va->a_data);
 
 	if (proc != NULL)
 		PROC_UNLOCK(proc);
 
 	VOP_UNLOCK(vn);
 	PFS_RETURN (error);
 }
 
 /*
  * Perform getextattr
  */
 static int
 pfs_getextattr(struct vop_getextattr_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct proc *proc;
 	int error;
 
 	PFS_TRACE(("%s", pn->pn_name));
 	pfs_assert_not_owned(pn);
 
 	/*
 	 * This is necessary because either process' privileges may
 	 * have changed since the open() call.
 	 */
 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
 		PFS_RETURN (EIO);
 
 	if (pn->pn_getextattr == NULL)
 		error = EOPNOTSUPP;
 	else
 		error = pn_getextattr(curthread, proc, pn,
 		    va->a_attrnamespace, va->a_name, va->a_uio,
 		    va->a_size, va->a_cred);
 
 	if (proc != NULL)
 		PROC_UNLOCK(proc);
 
 	PFS_RETURN (error);
 }
 
 /*
  * Convert a vnode to its component name
  */
 static int
 pfs_vptocnp(struct vop_vptocnp_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode **dvp = ap->a_vpp;
 	struct pfs_vdata *pvd = vp->v_data;
 	struct pfs_node *pd = pvd->pvd_pn;
 	struct pfs_node *pn;
 	struct mount *mp;
 	char *buf = ap->a_buf;
-	int *buflen = ap->a_buflen;
+	size_t *buflen = ap->a_buflen;
 	char pidbuf[PFS_NAMELEN];
 	pid_t pid = pvd->pvd_pid;
 	int len, i, error, locked;
 
 	i = *buflen;
 	error = 0;
 
 	pfs_lock(pd);
 
 	if (vp->v_type == VDIR && pd->pn_type == pfstype_root) {
 		*dvp = vp;
 		vhold(*dvp);
 		pfs_unlock(pd);
 		PFS_RETURN (0);
 	} else if (vp->v_type == VDIR && pd->pn_type == pfstype_procdir) {
 		len = snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
 		i -= len;
 		if (i < 0) {
 			error = ENOMEM;
 			goto failed;
 		}
 		bcopy(pidbuf, buf + i, len);
 	} else {
 		len = strlen(pd->pn_name);
 		i -= len;
 		if (i < 0) {
 			error = ENOMEM;
 			goto failed;
 		}
 		bcopy(pd->pn_name, buf + i, len);
 	}
 
 	pn = pd->pn_parent;
 	pfs_unlock(pd);
 
 	mp = vp->v_mount;
 	error = vfs_busy(mp, 0);
 	if (error)
 		return (error);
 
 	/*
 	 * vp is held by caller.
 	 */
 	locked = VOP_ISLOCKED(vp);
 	VOP_UNLOCK(vp);
 
 	error = pfs_vncache_alloc(mp, dvp, pn, pid);
 	if (error) {
 		vn_lock(vp, locked | LK_RETRY);
 		vfs_unbusy(mp);
 		PFS_RETURN(error);
 	}
 
 	*buflen = i;
 	VOP_UNLOCK(*dvp);
 	vn_lock(vp, locked | LK_RETRY);
 	vfs_unbusy(mp);
 
 	PFS_RETURN (0);
 failed:
 	pfs_unlock(pd);
 	PFS_RETURN(error);
 }
 
 /*
  * Look up a file or directory
  */
 static int
 pfs_lookup(struct vop_cachedlookup_args *va)
 {
 	struct vnode *vn = va->a_dvp;
 	struct vnode **vpp = va->a_vpp;
 	struct componentname *cnp = va->a_cnp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pd = pvd->pvd_pn;
 	struct pfs_node *pn, *pdn = NULL;
 	struct mount *mp;
 	pid_t pid = pvd->pvd_pid;
 	char *pname;
 	int error, i, namelen, visible;
 
 	PFS_TRACE(("%.*s", (int)cnp->cn_namelen, cnp->cn_nameptr));
 	pfs_assert_not_owned(pd);
 
 	if (vn->v_type != VDIR)
 		PFS_RETURN (ENOTDIR);
 	KASSERT_PN_IS_DIR(pd);
 
 	error = VOP_ACCESS(vn, VEXEC, cnp->cn_cred, cnp->cn_thread);
 	if (error)
 		PFS_RETURN (error);
 
 	/*
 	 * Don't support DELETE or RENAME.  CREATE is supported so
 	 * that O_CREAT will work, but the lookup will still fail if
 	 * the file does not exist.
 	 */
 	if ((cnp->cn_flags & ISLASTCN) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		PFS_RETURN (EOPNOTSUPP);
 
 	/* shortcut: check if the name is too long */
 	if (cnp->cn_namelen >= PFS_NAMELEN)
 		PFS_RETURN (ENOENT);
 
 	/* check that parent directory is visible... */
 	if (!pfs_visible(curthread, pd, pvd->pvd_pid, NULL))
 		PFS_RETURN (ENOENT);
 
 	/* self */
 	namelen = cnp->cn_namelen;
 	pname = cnp->cn_nameptr;
 	if (namelen == 1 && pname[0] == '.') {
 		pn = pd;
 		*vpp = vn;
 		VREF(vn);
 		PFS_RETURN (0);
 	}
 
 	mp = vn->v_mount;
 
 	/* parent */
 	if (cnp->cn_flags & ISDOTDOT) {
 		if (pd->pn_type == pfstype_root)
 			PFS_RETURN (EIO);
 		error = vfs_busy(mp, MBF_NOWAIT);
 		if (error != 0) {
 			vfs_ref(mp);
 			VOP_UNLOCK(vn);
 			error = vfs_busy(mp, 0);
 			vn_lock(vn, LK_EXCLUSIVE | LK_RETRY);
 			vfs_rel(mp);
 			if (error != 0)
 				PFS_RETURN(ENOENT);
 			if (VN_IS_DOOMED(vn)) {
 				vfs_unbusy(mp);
 				PFS_RETURN(ENOENT);
 			}
 		}
 		VOP_UNLOCK(vn);
 		KASSERT(pd->pn_parent != NULL,
 		    ("%s(): non-root directory has no parent", __func__));
 		/*
 		 * This one is tricky.  Descendents of procdir nodes
 		 * inherit their parent's process affinity, but
 		 * there's no easy reverse mapping.  For simplicity,
 		 * we assume that if this node is a procdir, its
 		 * parent isn't (which is correct as long as
 		 * descendents of procdir nodes are never procdir
 		 * nodes themselves)
 		 */
 		if (pd->pn_type == pfstype_procdir)
 			pid = NO_PID;
 		pfs_lock(pd);
 		pn = pd->pn_parent;
 		pfs_unlock(pd);
 		goto got_pnode;
 	}
 
 	pfs_lock(pd);
 
 	/* named node */
 	for (pn = pd->pn_nodes; pn != NULL; pn = pn->pn_next)
 		if (pn->pn_type == pfstype_procdir)
 			pdn = pn;
 		else if (pn->pn_name[namelen] == '\0' &&
 		    bcmp(pname, pn->pn_name, namelen) == 0) {
 			pfs_unlock(pd);
 			goto got_pnode;
 		}
 
 	/* process dependent node */
 	if ((pn = pdn) != NULL) {
 		pid = 0;
 		for (pid = 0, i = 0; i < namelen && isdigit(pname[i]); ++i)
 			if ((pid = pid * 10 + pname[i] - '0') > PID_MAX)
 				break;
 		if (i == cnp->cn_namelen) {
 			pfs_unlock(pd);
 			goto got_pnode;
 		}
 	}
 
 	pfs_unlock(pd);
 
 	PFS_RETURN (ENOENT);
 
  got_pnode:
 	pfs_assert_not_owned(pd);
 	pfs_assert_not_owned(pn);
 	visible = pfs_visible(curthread, pn, pid, NULL);
 	if (!visible) {
 		error = ENOENT;
 		goto failed;
 	}
 
 	error = pfs_vncache_alloc(mp, vpp, pn, pid);
 	if (error)
 		goto failed;
 
 	if (cnp->cn_flags & ISDOTDOT) {
 		vfs_unbusy(mp);
 		vn_lock(vn, LK_EXCLUSIVE | LK_RETRY);
 		if (VN_IS_DOOMED(vn)) {
 			vput(*vpp);
 			*vpp = NULL;
 			PFS_RETURN(ENOENT);
 		}
 	}
 	if (cnp->cn_flags & MAKEENTRY && !VN_IS_DOOMED(vn))
 		cache_enter(vn, *vpp, cnp);
 	PFS_RETURN (0);
  failed:
 	if (cnp->cn_flags & ISDOTDOT) {
 		vfs_unbusy(mp);
 		vn_lock(vn, LK_EXCLUSIVE | LK_RETRY);
 		*vpp = NULL;
 	}
 	PFS_RETURN(error);
 }
 
 /*
  * Open a file or directory.
  */
 static int
 pfs_open(struct vop_open_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	int mode = va->a_mode;
 
 	PFS_TRACE(("%s (mode 0x%x)", pn->pn_name, mode));
 	pfs_assert_not_owned(pn);
 
 	/* check if the requested mode is permitted */
 	if (((mode & FREAD) && !(mode & PFS_RD)) ||
 	    ((mode & FWRITE) && !(mode & PFS_WR)))
 		PFS_RETURN (EPERM);
 
 	/* we don't support locking */
 	if ((mode & O_SHLOCK) || (mode & O_EXLOCK))
 		PFS_RETURN (EOPNOTSUPP);
 
 	PFS_RETURN (0);
 }
 
 /*
  * Read from a file
  */
 static int
 pfs_read(struct vop_read_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct uio *uio = va->a_uio;
 	struct proc *proc;
 	struct sbuf *sb = NULL;
 	int error, locked;
 	off_t buflen;
 
 	PFS_TRACE(("%s", pn->pn_name));
 	pfs_assert_not_owned(pn);
 
 	if (vn->v_type != VREG)
 		PFS_RETURN (EINVAL);
 	KASSERT_PN_IS_FILE(pn);
 
 	if (!(pn->pn_flags & PFS_RD))
 		PFS_RETURN (EBADF);
 
 	if (pn->pn_fill == NULL)
 		PFS_RETURN (EIO);
 
 	/*
 	 * This is necessary because either process' privileges may
 	 * have changed since the open() call.
 	 */
 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
 		PFS_RETURN (EIO);
 	if (proc != NULL) {
 		_PHOLD(proc);
 		PROC_UNLOCK(proc);
 	}
 
 	vhold(vn);
 	locked = VOP_ISLOCKED(vn);
 	VOP_UNLOCK(vn);
 
 	if (pn->pn_flags & PFS_RAWRD) {
 		PFS_TRACE(("%zd resid", uio->uio_resid));
 		error = pn_fill(curthread, proc, pn, NULL, uio);
 		PFS_TRACE(("%zd resid", uio->uio_resid));
 		goto ret;
 	}
 
 	if (uio->uio_resid < 0 || uio->uio_offset < 0 ||
 	    uio->uio_resid > OFF_MAX - uio->uio_offset) {
 		error = EINVAL;
 		goto ret;
 	}
 	buflen = uio->uio_offset + uio->uio_resid;
 	if (buflen > MAXPHYS)
 		buflen = MAXPHYS;
 
 	sb = sbuf_new(sb, NULL, buflen + 1, 0);
 	if (sb == NULL) {
 		error = EIO;
 		goto ret;
 	}
 
 	error = pn_fill(curthread, proc, pn, sb, uio);
 
 	if (error) {
 		sbuf_delete(sb);
 		goto ret;
 	}
 
 	/*
 	 * XXX: If the buffer overflowed, sbuf_len() will not return
 	 * the data length. Then just use the full length because an
 	 * overflowed sbuf must be full.
 	 */
 	if (sbuf_finish(sb) == 0)
 		buflen = sbuf_len(sb);
 	error = uiomove_frombuf(sbuf_data(sb), buflen, uio);
 	sbuf_delete(sb);
 ret:
 	vn_lock(vn, locked | LK_RETRY);
 	vdrop(vn);
 	if (proc != NULL)
 		PRELE(proc);
 	PFS_RETURN (error);
 }
 
 /*
  * Iterate through directory entries
  */
 static int
 pfs_iterate(struct thread *td, struct proc *proc, struct pfs_node *pd,
 	    struct pfs_node **pn, struct proc **p)
 {
 	int visible;
 
 	sx_assert(&allproc_lock, SX_SLOCKED);
 	pfs_assert_owned(pd);
  again:
 	if (*pn == NULL) {
 		/* first node */
 		*pn = pd->pn_nodes;
 	} else if ((*pn)->pn_type != pfstype_procdir) {
 		/* next node */
 		*pn = (*pn)->pn_next;
 	}
 	if (*pn != NULL && (*pn)->pn_type == pfstype_procdir) {
 		/* next process */
 		if (*p == NULL)
 			*p = LIST_FIRST(&allproc);
 		else
 			*p = LIST_NEXT(*p, p_list);
 		/* out of processes: next node */
 		if (*p == NULL)
 			*pn = (*pn)->pn_next;
 		else
 			PROC_LOCK(*p);
 	}
 
 	if ((*pn) == NULL)
 		return (-1);
 
 	if (*p != NULL) {
 		visible = pfs_visible_proc(td, *pn, *p);
 		PROC_UNLOCK(*p);
 	} else if (proc != NULL) {
 		visible = pfs_visible_proc(td, *pn, proc);
 	} else {
 		visible = 1;
 	}
 	if (!visible)
 		goto again;
 
 	return (0);
 }
 
 /* Directory entry list */
 struct pfsentry {
 	STAILQ_ENTRY(pfsentry)	link;
 	struct dirent		entry;
 };
 STAILQ_HEAD(pfsdirentlist, pfsentry);
 
 /*
  * Return directory entries.
  */
 static int
 pfs_readdir(struct vop_readdir_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pd = pvd->pvd_pn;
 	pid_t pid = pvd->pvd_pid;
 	struct proc *p, *proc;
 	struct pfs_node *pn;
 	struct uio *uio;
 	struct pfsentry *pfsent, *pfsent2;
 	struct pfsdirentlist lst;
 	off_t offset;
 	int error, i, resid;
 
 	STAILQ_INIT(&lst);
 	error = 0;
 	KASSERT(pd->pn_info == vn->v_mount->mnt_data,
 	    ("%s(): pn_info does not match mountpoint", __func__));
 	PFS_TRACE(("%s pid %lu", pd->pn_name, (unsigned long)pid));
 	pfs_assert_not_owned(pd);
 
 	if (vn->v_type != VDIR)
 		PFS_RETURN (ENOTDIR);
 	KASSERT_PN_IS_DIR(pd);
 	uio = va->a_uio;
 
 	/* only allow reading entire entries */
 	offset = uio->uio_offset;
 	resid = uio->uio_resid;
 	if (offset < 0 || offset % PFS_DELEN != 0 ||
 	    (resid && resid < PFS_DELEN))
 		PFS_RETURN (EINVAL);
 	if (resid == 0)
 		PFS_RETURN (0);
 
 	proc = NULL;
 	if (pid != NO_PID && !pfs_lookup_proc(pid, &proc))
 		PFS_RETURN (ENOENT);
 
 	sx_slock(&allproc_lock);
 	pfs_lock(pd);
 
 	KASSERT(pid == NO_PID || proc != NULL,
 	    ("%s(): no process for pid %lu", __func__, (unsigned long)pid));
 
 	if (pid != NO_PID) {
 		PROC_LOCK(proc);
 
 		/* check if the directory is visible to the caller */
 		if (!pfs_visible_proc(curthread, pd, proc)) {
 			_PRELE(proc);
 			PROC_UNLOCK(proc);
 			sx_sunlock(&allproc_lock);
 			pfs_unlock(pd);
 			PFS_RETURN (ENOENT);
 		}
 	}
 
 	/* skip unwanted entries */
 	for (pn = NULL, p = NULL; offset > 0; offset -= PFS_DELEN) {
 		if (pfs_iterate(curthread, proc, pd, &pn, &p) == -1) {
 			/* nothing left... */
 			if (proc != NULL) {
 				_PRELE(proc);
 				PROC_UNLOCK(proc);
 			}
 			pfs_unlock(pd);
 			sx_sunlock(&allproc_lock);
 			PFS_RETURN (0);
 		}
 	}
 
 	/* fill in entries */
 	while (pfs_iterate(curthread, proc, pd, &pn, &p) != -1 &&
 	    resid >= PFS_DELEN) {
 		if ((pfsent = malloc(sizeof(struct pfsentry), M_IOV,
 		    M_NOWAIT | M_ZERO)) == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		pfsent->entry.d_reclen = PFS_DELEN;
 		pfsent->entry.d_fileno = pn_fileno(pn, pid);
 		/* PFS_DELEN was picked to fit PFS_NAMLEN */
 		for (i = 0; i < PFS_NAMELEN - 1 && pn->pn_name[i] != '\0'; ++i)
 			pfsent->entry.d_name[i] = pn->pn_name[i];
 		pfsent->entry.d_namlen = i;
 		/* NOTE: d_off is the offset of the *next* entry. */
 		pfsent->entry.d_off = offset + PFS_DELEN;
 		switch (pn->pn_type) {
 		case pfstype_procdir:
 			KASSERT(p != NULL,
 			    ("reached procdir node with p == NULL"));
 			pfsent->entry.d_namlen = snprintf(pfsent->entry.d_name,
 			    PFS_NAMELEN, "%d", p->p_pid);
 			/* fall through */
 		case pfstype_root:
 		case pfstype_dir:
 		case pfstype_this:
 		case pfstype_parent:
 			pfsent->entry.d_type = DT_DIR;
 			break;
 		case pfstype_file:
 			pfsent->entry.d_type = DT_REG;
 			break;
 		case pfstype_symlink:
 			pfsent->entry.d_type = DT_LNK;
 			break;
 		default:
 			panic("%s has unexpected node type: %d", pn->pn_name, pn->pn_type);
 		}
 		PFS_TRACE(("%s", pfsent->entry.d_name));
 		dirent_terminate(&pfsent->entry);
 		STAILQ_INSERT_TAIL(&lst, pfsent, link);
 		offset += PFS_DELEN;
 		resid -= PFS_DELEN;
 	}
 	if (proc != NULL) {
 		_PRELE(proc);
 		PROC_UNLOCK(proc);
 	}
 	pfs_unlock(pd);
 	sx_sunlock(&allproc_lock);
 	i = 0;
 	STAILQ_FOREACH_SAFE(pfsent, &lst, link, pfsent2) {
 		if (error == 0)
 			error = uiomove(&pfsent->entry, PFS_DELEN, uio);
 		free(pfsent, M_IOV);
 		i++;
 	}
 	PFS_TRACE(("%ju bytes", (uintmax_t)(i * PFS_DELEN)));
 	PFS_RETURN (error);
 }
 
 /*
  * Read a symbolic link
  */
 static int
 pfs_readlink(struct vop_readlink_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct uio *uio = va->a_uio;
 	struct proc *proc = NULL;
 	char buf[PATH_MAX];
 	struct sbuf sb;
 	int error, locked;
 
 	PFS_TRACE(("%s", pn->pn_name));
 	pfs_assert_not_owned(pn);
 
 	if (vn->v_type != VLNK)
 		PFS_RETURN (EINVAL);
 	KASSERT_PN_IS_LINK(pn);
 
 	if (pn->pn_fill == NULL)
 		PFS_RETURN (EIO);
 
 	if (pvd->pvd_pid != NO_PID) {
 		if ((proc = pfind(pvd->pvd_pid)) == NULL)
 			PFS_RETURN (EIO);
 		if (proc->p_flag & P_WEXIT) {
 			PROC_UNLOCK(proc);
 			PFS_RETURN (EIO);
 		}
 		_PHOLD(proc);
 		PROC_UNLOCK(proc);
 	}
 	vhold(vn);
 	locked = VOP_ISLOCKED(vn);
 	VOP_UNLOCK(vn);
 
 	/* sbuf_new() can't fail with a static buffer */
 	sbuf_new(&sb, buf, sizeof buf, 0);
 
 	error = pn_fill(curthread, proc, pn, &sb, NULL);
 
 	if (proc != NULL)
 		PRELE(proc);
 	vn_lock(vn, locked | LK_RETRY);
 	vdrop(vn);
 
 	if (error) {
 		sbuf_delete(&sb);
 		PFS_RETURN (error);
 	}
 
 	if (sbuf_finish(&sb) != 0) {
 		sbuf_delete(&sb);
 		PFS_RETURN (ENAMETOOLONG);
 	}
 
 	error = uiomove_frombuf(sbuf_data(&sb), sbuf_len(&sb), uio);
 	sbuf_delete(&sb);
 	PFS_RETURN (error);
 }
 
 /*
  * Reclaim a vnode
  */
 static int
 pfs_reclaim(struct vop_reclaim_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 
 	PFS_TRACE(("%s", pn->pn_name));
 	pfs_assert_not_owned(pn);
 
 	return (pfs_vncache_free(va->a_vp));
 }
 
 /*
  * Set attributes
  */
 static int
 pfs_setattr(struct vop_setattr_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 
 	PFS_TRACE(("%s", pn->pn_name));
 	pfs_assert_not_owned(pn);
 
 	/* Silently ignore unchangeable attributes. */
 	PFS_RETURN (0);
 }
 
 /*
  * Write to a file
  */
 static int
 pfs_write(struct vop_write_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct uio *uio = va->a_uio;
 	struct proc *proc;
 	struct sbuf sb;
 	int error;
 
 	PFS_TRACE(("%s", pn->pn_name));
 	pfs_assert_not_owned(pn);
 
 	if (vn->v_type != VREG)
 		PFS_RETURN (EINVAL);
 	KASSERT_PN_IS_FILE(pn);
 
 	if (!(pn->pn_flags & PFS_WR))
 		PFS_RETURN (EBADF);
 
 	if (pn->pn_fill == NULL)
 		PFS_RETURN (EIO);
 
 	/*
 	 * This is necessary because either process' privileges may
 	 * have changed since the open() call.
 	 */
 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
 		PFS_RETURN (EIO);
 	if (proc != NULL) {
 		_PHOLD(proc);
 		PROC_UNLOCK(proc);
 	}
 
 	if (pn->pn_flags & PFS_RAWWR) {
 		error = pn_fill(curthread, proc, pn, NULL, uio);
 		if (proc != NULL)
 			PRELE(proc);
 		PFS_RETURN (error);
 	}
 
 	sbuf_uionew(&sb, uio, &error);
 	if (error) {
 		if (proc != NULL)
 			PRELE(proc);
 		PFS_RETURN (error);
 	}
 
 	error = pn_fill(curthread, proc, pn, &sb, uio);
 
 	sbuf_delete(&sb);
 	if (proc != NULL)
 		PRELE(proc);
 	PFS_RETURN (error);
 }
 
 /*
  * Vnode operations
  */
 struct vop_vector pfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		pfs_access,
 	.vop_cachedlookup =	pfs_lookup,
 	.vop_close =		pfs_close,
 	.vop_create =		VOP_EOPNOTSUPP,
 	.vop_getattr =		pfs_getattr,
 	.vop_getextattr =	pfs_getextattr,
 	.vop_ioctl =		pfs_ioctl,
 	.vop_link =		VOP_EOPNOTSUPP,
 	.vop_lookup =		vfs_cache_lookup,
 	.vop_mkdir =		VOP_EOPNOTSUPP,
 	.vop_mknod =		VOP_EOPNOTSUPP,
 	.vop_open =		pfs_open,
 	.vop_read =		pfs_read,
 	.vop_readdir =		pfs_readdir,
 	.vop_readlink =		pfs_readlink,
 	.vop_reclaim =		pfs_reclaim,
 	.vop_remove =		VOP_EOPNOTSUPP,
 	.vop_rename =		VOP_EOPNOTSUPP,
 	.vop_rmdir =		VOP_EOPNOTSUPP,
 	.vop_setattr =		pfs_setattr,
 	.vop_symlink =		VOP_EOPNOTSUPP,
 	.vop_vptocnp =		pfs_vptocnp,
 	.vop_write =		pfs_write,
 	/* XXX I've probably forgotten a few that need VOP_EOPNOTSUPP */
 };
 VFS_VOP_VECTOR_REGISTER(pfs_vnodeops);
Index: projects/clang1000-import/sys/fs/tmpfs/tmpfs_vnops.c
===================================================================
--- projects/clang1000-import/sys/fs/tmpfs/tmpfs_vnops.c	(revision 357389)
+++ projects/clang1000-import/sys/fs/tmpfs/tmpfs_vnops.c	(revision 357390)
@@ -1,1648 +1,1648 @@
 /*	$NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-NetBSD
  *
  * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
  * 2005 program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * tmpfs vnode interface.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/limits.h>
 #include <sys/lockf.h>
 #include <sys/lock.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 
 #include <fs/tmpfs/tmpfs_vnops.h>
 #include <fs/tmpfs/tmpfs.h>
 
 SYSCTL_DECL(_vfs_tmpfs);
 
 static volatile int tmpfs_rename_restarts;
 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, rename_restarts, CTLFLAG_RD,
     __DEVOLATILE(int *, &tmpfs_rename_restarts), 0,
     "Times rename had to restart due to lock contention");
 
 static int
 tmpfs_vn_get_ino_alloc(struct mount *mp, void *arg, int lkflags,
     struct vnode **rvp)
 {
 
 	return (tmpfs_alloc_vp(mp, arg, lkflags, rvp));
 }
 
 static int
 tmpfs_lookup1(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
 {
 	struct tmpfs_dirent *de;
 	struct tmpfs_node *dnode, *pnode;
 	struct tmpfs_mount *tm;
 	int error;
 
 	dnode = VP_TO_TMPFS_DIR(dvp);
 	*vpp = NULLVP;
 
 	/* Check accessibility of requested node as a first step. */
 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, cnp->cn_thread);
 	if (error != 0)
 		goto out;
 
 	/* We cannot be requesting the parent directory of the root node. */
 	MPASS(IMPLIES(dnode->tn_type == VDIR &&
 	    dnode->tn_dir.tn_parent == dnode,
 	    !(cnp->cn_flags & ISDOTDOT)));
 
 	TMPFS_ASSERT_LOCKED(dnode);
 	if (dnode->tn_dir.tn_parent == NULL) {
 		error = ENOENT;
 		goto out;
 	}
 	if (cnp->cn_flags & ISDOTDOT) {
 		tm = VFS_TO_TMPFS(dvp->v_mount);
 		pnode = dnode->tn_dir.tn_parent;
 		tmpfs_ref_node(pnode);
 		error = vn_vget_ino_gen(dvp, tmpfs_vn_get_ino_alloc,
 		    pnode, cnp->cn_lkflags, vpp);
 		tmpfs_free_node(tm, pnode);
 		if (error != 0)
 			goto out;
 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
 		VREF(dvp);
 		*vpp = dvp;
 		error = 0;
 	} else {
 		de = tmpfs_dir_lookup(dnode, NULL, cnp);
 		if (de != NULL && de->td_node == NULL)
 			cnp->cn_flags |= ISWHITEOUT;
 		if (de == NULL || de->td_node == NULL) {
 			/*
 			 * The entry was not found in the directory.
 			 * This is OK if we are creating or renaming an
 			 * entry and are working on the last component of
 			 * the path name.
 			 */
 			if ((cnp->cn_flags & ISLASTCN) &&
 			    (cnp->cn_nameiop == CREATE || \
 			    cnp->cn_nameiop == RENAME ||
 			    (cnp->cn_nameiop == DELETE &&
 			    cnp->cn_flags & DOWHITEOUT &&
 			    cnp->cn_flags & ISWHITEOUT))) {
 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
 				    cnp->cn_thread);
 				if (error != 0)
 					goto out;
 
 				/*
 				 * Keep the component name in the buffer for
 				 * future uses.
 				 */
 				cnp->cn_flags |= SAVENAME;
 
 				error = EJUSTRETURN;
 			} else
 				error = ENOENT;
 		} else {
 			struct tmpfs_node *tnode;
 
 			/*
 			 * The entry was found, so get its associated
 			 * tmpfs_node.
 			 */
 			tnode = de->td_node;
 
 			/*
 			 * If we are not at the last path component and
 			 * found a non-directory or non-link entry (which
 			 * may itself be pointing to a directory), raise
 			 * an error.
 			 */
 			if ((tnode->tn_type != VDIR &&
 			    tnode->tn_type != VLNK) &&
 			    !(cnp->cn_flags & ISLASTCN)) {
 				error = ENOTDIR;
 				goto out;
 			}
 
 			/*
 			 * If we are deleting or renaming the entry, keep
 			 * track of its tmpfs_dirent so that it can be
 			 * easily deleted later.
 			 */
 			if ((cnp->cn_flags & ISLASTCN) &&
 			    (cnp->cn_nameiop == DELETE ||
 			    cnp->cn_nameiop == RENAME)) {
 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
 				    cnp->cn_thread);
 				if (error != 0)
 					goto out;
 
 				/* Allocate a new vnode on the matching entry. */
 				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
 				    cnp->cn_lkflags, vpp);
 				if (error != 0)
 					goto out;
 
 				if ((dnode->tn_mode & S_ISTXT) &&
 				  VOP_ACCESS(dvp, VADMIN, cnp->cn_cred,
 				  cnp->cn_thread) && VOP_ACCESS(*vpp, VADMIN,
 				  cnp->cn_cred, cnp->cn_thread)) {
 					error = EPERM;
 					vput(*vpp);
 					*vpp = NULL;
 					goto out;
 				}
 				cnp->cn_flags |= SAVENAME;
 			} else {
 				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
 				    cnp->cn_lkflags, vpp);
 				if (error != 0)
 					goto out;
 			}
 		}
 	}
 
 	/*
 	 * Store the result of this lookup in the cache.  Avoid this if the
 	 * request was for creation, as it does not improve timings on
 	 * emprical tests.
 	 */
 	if ((cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp))
 		cache_enter(dvp, *vpp, cnp);
 
 out:
 	/*
 	 * If there were no errors, *vpp cannot be null and it must be
 	 * locked.
 	 */
 	MPASS(IFF(error == 0, *vpp != NULLVP && VOP_ISLOCKED(*vpp)));
 
 	return (error);
 }
 
 static int
 tmpfs_cached_lookup(struct vop_cachedlookup_args *v)
 {
 
 	return (tmpfs_lookup1(v->a_dvp, v->a_vpp, v->a_cnp));
 }
 
 static int
 tmpfs_lookup(struct vop_lookup_args *v)
 {
 
 	return (tmpfs_lookup1(v->a_dvp, v->a_vpp, v->a_cnp));
 }
 
 static int
 tmpfs_create(struct vop_create_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct vattr *vap = v->a_vap;
 	int error;
 
 	MPASS(vap->va_type == VREG || vap->va_type == VSOCK);
 
 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
 	if (error == 0 && (cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp))
 		cache_enter(dvp, *vpp, cnp);
 	return (error);
 }
 
 static int
 tmpfs_mknod(struct vop_mknod_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct vattr *vap = v->a_vap;
 
 	if (vap->va_type != VBLK && vap->va_type != VCHR &&
 	    vap->va_type != VFIFO)
 		return EINVAL;
 
 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
 }
 
 static int
 tmpfs_open(struct vop_open_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	int mode = v->a_mode;
 
 	int error;
 	struct tmpfs_node *node;
 
 	MPASS(VOP_ISLOCKED(vp));
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	/* The file is still active but all its names have been removed
 	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
 	 * it is about to die. */
 	if (node->tn_links < 1)
 		return (ENOENT);
 
 	/* If the file is marked append-only, deny write requests. */
 	if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE)
 		error = EPERM;
 	else {
 		error = 0;
 		/* For regular files, the call below is nop. */
 		KASSERT(vp->v_type != VREG || (node->tn_reg.tn_aobj->flags &
 		    OBJ_DEAD) == 0, ("dead object"));
 		vnode_create_vobject(vp, node->tn_size, v->a_td);
 	}
 
 	MPASS(VOP_ISLOCKED(vp));
 	return error;
 }
 
 static int
 tmpfs_close(struct vop_close_args *v)
 {
 	struct vnode *vp = v->a_vp;
 
 	/* Update node times. */
 	tmpfs_update(vp);
 
 	return (0);
 }
 
 int
 tmpfs_access(struct vop_access_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	accmode_t accmode = v->a_accmode;
 	struct ucred *cred = v->a_cred;
 	mode_t all_x = S_IXUSR | S_IXGRP | S_IXOTH;
 	int error;
 	struct tmpfs_node *node;
 
 	MPASS(VOP_ISLOCKED(vp));
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	/*
 	 * Common case path lookup.
 	 */
 	if (__predict_true(accmode == VEXEC && (node->tn_mode & all_x) == all_x))
 		return (0);
 
 	switch (vp->v_type) {
 	case VDIR:
 		/* FALLTHROUGH */
 	case VLNK:
 		/* FALLTHROUGH */
 	case VREG:
 		if (accmode & VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) {
 			error = EROFS;
 			goto out;
 		}
 		break;
 
 	case VBLK:
 		/* FALLTHROUGH */
 	case VCHR:
 		/* FALLTHROUGH */
 	case VSOCK:
 		/* FALLTHROUGH */
 	case VFIFO:
 		break;
 
 	default:
 		error = EINVAL;
 		goto out;
 	}
 
 	if (accmode & VWRITE && node->tn_flags & IMMUTABLE) {
 		error = EPERM;
 		goto out;
 	}
 
 	error = vaccess(vp->v_type, node->tn_mode, node->tn_uid,
 	    node->tn_gid, accmode, cred, NULL);
 
 out:
 	MPASS(VOP_ISLOCKED(vp));
 
 	return error;
 }
 
 int
 tmpfs_getattr(struct vop_getattr_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct vattr *vap = v->a_vap;
 	vm_object_t obj;
 	struct tmpfs_node *node;
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	tmpfs_update(vp);
 
 	vap->va_type = vp->v_type;
 	vap->va_mode = node->tn_mode;
 	vap->va_nlink = node->tn_links;
 	vap->va_uid = node->tn_uid;
 	vap->va_gid = node->tn_gid;
 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
 	vap->va_fileid = node->tn_id;
 	vap->va_size = node->tn_size;
 	vap->va_blocksize = PAGE_SIZE;
 	vap->va_atime = node->tn_atime;
 	vap->va_mtime = node->tn_mtime;
 	vap->va_ctime = node->tn_ctime;
 	vap->va_birthtime = node->tn_birthtime;
 	vap->va_gen = node->tn_gen;
 	vap->va_flags = node->tn_flags;
 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
 		node->tn_rdev : NODEV;
 	if (vp->v_type == VREG) {
 		obj = node->tn_reg.tn_aobj;
 		vap->va_bytes = (u_quad_t)obj->resident_page_count * PAGE_SIZE;
 	} else
 		vap->va_bytes = node->tn_size;
 	vap->va_filerev = 0;
 
 	return 0;
 }
 
 int
 tmpfs_setattr(struct vop_setattr_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct vattr *vap = v->a_vap;
 	struct ucred *cred = v->a_cred;
 	struct thread *td = curthread;
 
 	int error;
 
 	MPASS(VOP_ISLOCKED(vp));
 
 	error = 0;
 
 	/* Abort if any unsettable attribute is given. */
 	if (vap->va_type != VNON ||
 	    vap->va_nlink != VNOVAL ||
 	    vap->va_fsid != VNOVAL ||
 	    vap->va_fileid != VNOVAL ||
 	    vap->va_blocksize != VNOVAL ||
 	    vap->va_gen != VNOVAL ||
 	    vap->va_rdev != VNOVAL ||
 	    vap->va_bytes != VNOVAL)
 		error = EINVAL;
 
 	if (error == 0 && (vap->va_flags != VNOVAL))
 		error = tmpfs_chflags(vp, vap->va_flags, cred, td);
 
 	if (error == 0 && (vap->va_size != VNOVAL))
 		error = tmpfs_chsize(vp, vap->va_size, cred, td);
 
 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
 
 	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL))
 		error = tmpfs_chmod(vp, vap->va_mode, cred, td);
 
 	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
 	    vap->va_atime.tv_nsec != VNOVAL) ||
 	    (vap->va_mtime.tv_sec != VNOVAL &&
 	    vap->va_mtime.tv_nsec != VNOVAL) ||
 	    (vap->va_birthtime.tv_sec != VNOVAL &&
 	    vap->va_birthtime.tv_nsec != VNOVAL)))
 		error = tmpfs_chtimes(vp, vap, cred, td);
 
 	/* Update the node times.  We give preference to the error codes
 	 * generated by this function rather than the ones that may arise
 	 * from tmpfs_update. */
 	tmpfs_update(vp);
 
 	MPASS(VOP_ISLOCKED(vp));
 
 	return error;
 }
 
 static int
 tmpfs_read(struct vop_read_args *v)
 {
 	struct vnode *vp;
 	struct uio *uio;
 	struct tmpfs_node *node;
 
 	vp = v->a_vp;
 	if (vp->v_type != VREG)
 		return (EISDIR);
 	uio = v->a_uio;
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	node = VP_TO_TMPFS_NODE(vp);
 	tmpfs_set_status(VFS_TO_TMPFS(vp->v_mount), node, TMPFS_NODE_ACCESSED);
 	return (uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio));
 }
 
 static int
 tmpfs_write(struct vop_write_args *v)
 {
 	struct vnode *vp;
 	struct uio *uio;
 	struct tmpfs_node *node;
 	off_t oldsize;
 	int error, ioflag;
 
 	vp = v->a_vp;
 	uio = v->a_uio;
 	ioflag = v->a_ioflag;
 	error = 0;
 	node = VP_TO_TMPFS_NODE(vp);
 	oldsize = node->tn_size;
 
 	if (uio->uio_offset < 0 || vp->v_type != VREG)
 		return (EINVAL);
 	if (uio->uio_resid == 0)
 		return (0);
 	if (ioflag & IO_APPEND)
 		uio->uio_offset = node->tn_size;
 	if (uio->uio_offset + uio->uio_resid >
 	  VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
 		return (EFBIG);
 	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
 		return (EFBIG);
 	if (uio->uio_offset + uio->uio_resid > node->tn_size) {
 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid,
 		    FALSE);
 		if (error != 0)
 			goto out;
 	}
 
 	error = uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio);
 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
 	    TMPFS_NODE_CHANGED;
 	if (node->tn_mode & (S_ISUID | S_ISGID)) {
 		if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID))
 			node->tn_mode &= ~(S_ISUID | S_ISGID);
 	}
 	if (error != 0)
 		(void)tmpfs_reg_resize(vp, oldsize, TRUE);
 
 out:
 	MPASS(IMPLIES(error == 0, uio->uio_resid == 0));
 	MPASS(IMPLIES(error != 0, oldsize == node->tn_size));
 
 	return (error);
 }
 
 static int
 tmpfs_fsync(struct vop_fsync_args *v)
 {
 	struct vnode *vp = v->a_vp;
 
 	MPASS(VOP_ISLOCKED(vp));
 
 	tmpfs_check_mtime(vp);
 	tmpfs_update(vp);
 
 	return 0;
 }
 
 static int
 tmpfs_remove(struct vop_remove_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode *vp = v->a_vp;
 
 	int error;
 	struct tmpfs_dirent *de;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *dnode;
 	struct tmpfs_node *node;
 
 	MPASS(VOP_ISLOCKED(dvp));
 	MPASS(VOP_ISLOCKED(vp));
 
 	if (vp->v_type == VDIR) {
 		error = EISDIR;
 		goto out;
 	}
 
 	dnode = VP_TO_TMPFS_DIR(dvp);
 	node = VP_TO_TMPFS_NODE(vp);
 	tmp = VFS_TO_TMPFS(vp->v_mount);
 	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
 	MPASS(de != NULL);
 
 	/* Files marked as immutable or append-only cannot be deleted. */
 	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
 	    (dnode->tn_flags & APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 
 	/* Remove the entry from the directory; as it is a file, we do not
 	 * have to change the number of hard links of the directory. */
 	tmpfs_dir_detach(dvp, de);
 	if (v->a_cnp->cn_flags & DOWHITEOUT)
 		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
 
 	/* Free the directory entry we just deleted.  Note that the node
 	 * referred by it will not be removed until the vnode is really
 	 * reclaimed. */
 	tmpfs_free_dirent(tmp, de);
 
 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED;
 	error = 0;
 
 out:
 
 	return error;
 }
 
 static int
 tmpfs_link(struct vop_link_args *v)
 {
 	struct vnode *dvp = v->a_tdvp;
 	struct vnode *vp = v->a_vp;
 	struct componentname *cnp = v->a_cnp;
 
 	int error;
 	struct tmpfs_dirent *de;
 	struct tmpfs_node *node;
 
 	MPASS(VOP_ISLOCKED(dvp));
 	MPASS(cnp->cn_flags & HASBUF);
 	MPASS(dvp != vp); /* XXX When can this be false? */
 	node = VP_TO_TMPFS_NODE(vp);
 
 	/* Ensure that we do not overflow the maximum number of links imposed
 	 * by the system. */
 	MPASS(node->tn_links <= TMPFS_LINK_MAX);
 	if (node->tn_links == TMPFS_LINK_MAX) {
 		error = EMLINK;
 		goto out;
 	}
 
 	/* We cannot create links of files marked immutable or append-only. */
 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 
 	/* Allocate a new directory entry to represent the node. */
 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
 	if (error != 0)
 		goto out;
 
 	/* Insert the new directory entry into the appropriate directory. */
 	if (cnp->cn_flags & ISWHITEOUT)
 		tmpfs_dir_whiteout_remove(dvp, cnp);
 	tmpfs_dir_attach(dvp, de);
 
 	/* vp link count has changed, so update node times. */
 	node->tn_status |= TMPFS_NODE_CHANGED;
 	tmpfs_update(vp);
 
 	error = 0;
 
 out:
 	return error;
 }
 
 /*
  * We acquire all but fdvp locks using non-blocking acquisitions.  If we
  * fail to acquire any lock in the path we will drop all held locks,
  * acquire the new lock in a blocking fashion, and then release it and
  * restart the rename.  This acquire/release step ensures that we do not
  * spin on a lock waiting for release.  On error release all vnode locks
  * and decrement references the way tmpfs_rename() would do.
  */
 static int
 tmpfs_rename_relock(struct vnode *fdvp, struct vnode **fvpp,
     struct vnode *tdvp, struct vnode **tvpp,
     struct componentname *fcnp, struct componentname *tcnp)
 {
 	struct vnode *nvp;
 	struct mount *mp;
 	struct tmpfs_dirent *de;
 	int error, restarts = 0;
 
 	VOP_UNLOCK(tdvp);
 	if (*tvpp != NULL && *tvpp != tdvp)
 		VOP_UNLOCK(*tvpp);
 	mp = fdvp->v_mount;
 
 relock:
 	restarts += 1;
 	error = vn_lock(fdvp, LK_EXCLUSIVE);
 	if (error)
 		goto releout;
 	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 		VOP_UNLOCK(fdvp);
 		error = vn_lock(tdvp, LK_EXCLUSIVE);
 		if (error)
 			goto releout;
 		VOP_UNLOCK(tdvp);
 		goto relock;
 	}
 	/*
 	 * Re-resolve fvp to be certain it still exists and fetch the
 	 * correct vnode.
 	 */
 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(fdvp), NULL, fcnp);
 	if (de == NULL) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
 		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
 			error = EINVAL;
 		else
 			error = ENOENT;
 		goto releout;
 	}
 	error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 	if (error != 0) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		if (error != EBUSY)
 			goto releout;
 		error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp);
 		if (error != 0)
 			goto releout;
 		VOP_UNLOCK(nvp);
 		/*
 		 * Concurrent rename race.
 		 */
 		if (nvp == tdvp) {
 			vrele(nvp);
 			error = EINVAL;
 			goto releout;
 		}
 		vrele(*fvpp);
 		*fvpp = nvp;
 		goto relock;
 	}
 	vrele(*fvpp);
 	*fvpp = nvp;
 	VOP_UNLOCK(*fvpp);
 	/*
 	 * Re-resolve tvp and acquire the vnode lock if present.
 	 */
 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(tdvp), NULL, tcnp);
 	/*
 	 * If tvp disappeared we just carry on.
 	 */
 	if (de == NULL && *tvpp != NULL) {
 		vrele(*tvpp);
 		*tvpp = NULL;
 	}
 	/*
 	 * Get the tvp ino if the lookup succeeded.  We may have to restart
 	 * if the non-blocking acquire fails.
 	 */
 	if (de != NULL) {
 		nvp = NULL;
 		error = tmpfs_alloc_vp(mp, de->td_node,
 		    LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 		if (*tvpp != NULL)
 			vrele(*tvpp);
 		*tvpp = nvp;
 		if (error != 0) {
 			VOP_UNLOCK(fdvp);
 			VOP_UNLOCK(tdvp);
 			if (error != EBUSY)
 				goto releout;
 			error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE,
 			    &nvp);
 			if (error != 0)
 				goto releout;
 			VOP_UNLOCK(nvp);
 			/*
 			 * fdvp contains fvp, thus tvp (=fdvp) is not empty.
 			 */
 			if (nvp == fdvp) {
 				error = ENOTEMPTY;
 				goto releout;
 			}
 			goto relock;
 		}
 	}
 	tmpfs_rename_restarts += restarts;
 
 	return (0);
 
 releout:
 	vrele(fdvp);
 	vrele(*fvpp);
 	vrele(tdvp);
 	if (*tvpp != NULL)
 		vrele(*tvpp);
 	tmpfs_rename_restarts += restarts;
 
 	return (error);
 }
 
 static int
 tmpfs_rename(struct vop_rename_args *v)
 {
 	struct vnode *fdvp = v->a_fdvp;
 	struct vnode *fvp = v->a_fvp;
 	struct componentname *fcnp = v->a_fcnp;
 	struct vnode *tdvp = v->a_tdvp;
 	struct vnode *tvp = v->a_tvp;
 	struct componentname *tcnp = v->a_tcnp;
 	char *newname;
 	struct tmpfs_dirent *de;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *fdnode;
 	struct tmpfs_node *fnode;
 	struct tmpfs_node *tnode;
 	struct tmpfs_node *tdnode;
 	int error;
 
 	MPASS(VOP_ISLOCKED(tdvp));
 	MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp)));
 	MPASS(fcnp->cn_flags & HASBUF);
 	MPASS(tcnp->cn_flags & HASBUF);
 
 	/*
 	 * Disallow cross-device renames.
 	 * XXX Why isn't this done by the caller?
 	 */
 	if (fvp->v_mount != tdvp->v_mount ||
 	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
 		error = EXDEV;
 		goto out;
 	}
 
 	/* If source and target are the same file, there is nothing to do. */
 	if (fvp == tvp) {
 		error = 0;
 		goto out;
 	}
 
 	/*
 	 * If we need to move the directory between entries, lock the
 	 * source so that we can safely operate on it.
 	 */
 	if (fdvp != tdvp && fdvp != tvp) {
 		if (vn_lock(fdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 			error = tmpfs_rename_relock(fdvp, &fvp, tdvp, &tvp,
 			    fcnp, tcnp);
 			if (error != 0)
 				return (error);
 			ASSERT_VOP_ELOCKED(fdvp,
 			    "tmpfs_rename: fdvp not locked");
 			ASSERT_VOP_ELOCKED(tdvp,
 			    "tmpfs_rename: tdvp not locked");
 			if (tvp != NULL)
 				ASSERT_VOP_ELOCKED(tvp,
 				    "tmpfs_rename: tvp not locked");
 			if (fvp == tvp) {
 				error = 0;
 				goto out_locked;
 			}
 		}
 	}
 
 	tmp = VFS_TO_TMPFS(tdvp->v_mount);
 	tdnode = VP_TO_TMPFS_DIR(tdvp);
 	tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
 	fdnode = VP_TO_TMPFS_DIR(fdvp);
 	fnode = VP_TO_TMPFS_NODE(fvp);
 	de = tmpfs_dir_lookup(fdnode, fnode, fcnp);
 
 	/*
 	 * Entry can disappear before we lock fdvp,
 	 * also avoid manipulating '.' and '..' entries.
 	 */
 	if (de == NULL) {
 		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
 		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
 			error = EINVAL;
 		else
 			error = ENOENT;
 		goto out_locked;
 	}
 	MPASS(de->td_node == fnode);
 
 	/*
 	 * If re-naming a directory to another preexisting directory
 	 * ensure that the target directory is empty so that its
 	 * removal causes no side effects.
 	 * Kern_rename guarantees the destination to be a directory
 	 * if the source is one.
 	 */
 	if (tvp != NULL) {
 		MPASS(tnode != NULL);
 
 		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
 			error = EPERM;
 			goto out_locked;
 		}
 
 		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
 			if (tnode->tn_size > 0) {
 				error = ENOTEMPTY;
 				goto out_locked;
 			}
 		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
 			error = ENOTDIR;
 			goto out_locked;
 		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
 			error = EISDIR;
 			goto out_locked;
 		} else {
 			MPASS(fnode->tn_type != VDIR &&
 				tnode->tn_type != VDIR);
 		}
 	}
 
 	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))
 	    || (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
 		error = EPERM;
 		goto out_locked;
 	}
 
 	/*
 	 * Ensure that we have enough memory to hold the new name, if it
 	 * has to be changed.
 	 */
 	if (fcnp->cn_namelen != tcnp->cn_namelen ||
 	    bcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) {
 		newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK);
 	} else
 		newname = NULL;
 
 	/*
 	 * If the node is being moved to another directory, we have to do
 	 * the move.
 	 */
 	if (fdnode != tdnode) {
 		/*
 		 * In case we are moving a directory, we have to adjust its
 		 * parent to point to the new parent.
 		 */
 		if (de->td_node->tn_type == VDIR) {
 			struct tmpfs_node *n;
 
 			/*
 			 * Ensure the target directory is not a child of the
 			 * directory being moved.  Otherwise, we'd end up
 			 * with stale nodes.
 			 */
 			n = tdnode;
 			/*
 			 * TMPFS_LOCK guaranties that no nodes are freed while
 			 * traversing the list. Nodes can only be marked as
 			 * removed: tn_parent == NULL.
 			 */
 			TMPFS_LOCK(tmp);
 			TMPFS_NODE_LOCK(n);
 			while (n != n->tn_dir.tn_parent) {
 				struct tmpfs_node *parent;
 
 				if (n == fnode) {
 					TMPFS_NODE_UNLOCK(n);
 					TMPFS_UNLOCK(tmp);
 					error = EINVAL;
 					if (newname != NULL)
 						    free(newname, M_TMPFSNAME);
 					goto out_locked;
 				}
 				parent = n->tn_dir.tn_parent;
 				TMPFS_NODE_UNLOCK(n);
 				if (parent == NULL) {
 					n = NULL;
 					break;
 				}
 				TMPFS_NODE_LOCK(parent);
 				if (parent->tn_dir.tn_parent == NULL) {
 					TMPFS_NODE_UNLOCK(parent);
 					n = NULL;
 					break;
 				}
 				n = parent;
 			}
 			TMPFS_UNLOCK(tmp);
 			if (n == NULL) {
 				error = EINVAL;
 				if (newname != NULL)
 					    free(newname, M_TMPFSNAME);
 				goto out_locked;
 			}
 			TMPFS_NODE_UNLOCK(n);
 
 			/* Adjust the parent pointer. */
 			TMPFS_VALIDATE_DIR(fnode);
 			TMPFS_NODE_LOCK(de->td_node);
 			de->td_node->tn_dir.tn_parent = tdnode;
 			TMPFS_NODE_UNLOCK(de->td_node);
 
 			/*
 			 * As a result of changing the target of the '..'
 			 * entry, the link count of the source and target
 			 * directories has to be adjusted.
 			 */
 			TMPFS_NODE_LOCK(tdnode);
 			TMPFS_ASSERT_LOCKED(tdnode);
 			tdnode->tn_links++;
 			TMPFS_NODE_UNLOCK(tdnode);
 
 			TMPFS_NODE_LOCK(fdnode);
 			TMPFS_ASSERT_LOCKED(fdnode);
 			fdnode->tn_links--;
 			TMPFS_NODE_UNLOCK(fdnode);
 		}
 	}
 
 	/*
 	 * Do the move: just remove the entry from the source directory
 	 * and insert it into the target one.
 	 */
 	tmpfs_dir_detach(fdvp, de);
 
 	if (fcnp->cn_flags & DOWHITEOUT)
 		tmpfs_dir_whiteout_add(fdvp, fcnp);
 	if (tcnp->cn_flags & ISWHITEOUT)
 		tmpfs_dir_whiteout_remove(tdvp, tcnp);
 
 	/*
 	 * If the name has changed, we need to make it effective by changing
 	 * it in the directory entry.
 	 */
 	if (newname != NULL) {
 		MPASS(tcnp->cn_namelen <= MAXNAMLEN);
 
 		free(de->ud.td_name, M_TMPFSNAME);
 		de->ud.td_name = newname;
 		tmpfs_dirent_init(de, tcnp->cn_nameptr, tcnp->cn_namelen);
 
 		fnode->tn_status |= TMPFS_NODE_CHANGED;
 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
 	}
 
 	/*
 	 * If we are overwriting an entry, we have to remove the old one
 	 * from the target directory.
 	 */
 	if (tvp != NULL) {
 		struct tmpfs_dirent *tde;
 
 		/* Remove the old entry from the target directory. */
 		tde = tmpfs_dir_lookup(tdnode, tnode, tcnp);
 		tmpfs_dir_detach(tdvp, tde);
 
 		/*
 		 * Free the directory entry we just deleted.  Note that the
 		 * node referred by it will not be removed until the vnode is
 		 * really reclaimed.
 		 */
 		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
 	}
 
 	tmpfs_dir_attach(tdvp, de);
 
 	if (tmpfs_use_nc(fvp)) {
 		cache_purge(fvp);
 		if (tvp != NULL)
 			cache_purge(tvp);
 		cache_purge_negative(tdvp);
 	}
 
 	error = 0;
 
 out_locked:
 	if (fdvp != tdvp && fdvp != tvp)
 		VOP_UNLOCK(fdvp);
 
 out:
 	/*
 	 * Release target nodes.
 	 * XXX: I don't understand when tdvp can be the same as tvp, but
 	 * other code takes care of this...
 	 */
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp != NULL)
 		vput(tvp);
 
 	/* Release source nodes. */
 	vrele(fdvp);
 	vrele(fvp);
 
 	return (error);
 }
 
 static int
 tmpfs_mkdir(struct vop_mkdir_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct vattr *vap = v->a_vap;
 
 	MPASS(vap->va_type == VDIR);
 
 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
 }
 
 static int
 tmpfs_rmdir(struct vop_rmdir_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode *vp = v->a_vp;
 
 	int error;
 	struct tmpfs_dirent *de;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *dnode;
 	struct tmpfs_node *node;
 
 	MPASS(VOP_ISLOCKED(dvp));
 	MPASS(VOP_ISLOCKED(vp));
 
 	tmp = VFS_TO_TMPFS(dvp->v_mount);
 	dnode = VP_TO_TMPFS_DIR(dvp);
 	node = VP_TO_TMPFS_DIR(vp);
 
 	/* Directories with more than two entries ('.' and '..') cannot be
 	 * removed. */
 	 if (node->tn_size > 0) {
 		 error = ENOTEMPTY;
 		 goto out;
 	 }
 
 	if ((dnode->tn_flags & APPEND)
 	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
 		error = EPERM;
 		goto out;
 	}
 
 	/* This invariant holds only if we are not trying to remove "..".
 	  * We checked for that above so this is safe now. */
 	MPASS(node->tn_dir.tn_parent == dnode);
 
 	/* Get the directory entry associated with node (vp).  This was
 	 * filled by tmpfs_lookup while looking up the entry. */
 	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
 	MPASS(TMPFS_DIRENT_MATCHES(de,
 	    v->a_cnp->cn_nameptr,
 	    v->a_cnp->cn_namelen));
 
 	/* Check flags to see if we are allowed to remove the directory. */
 	if ((dnode->tn_flags & APPEND) != 0 ||
 	    (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) != 0) {
 		error = EPERM;
 		goto out;
 	}
 
 
 	/* Detach the directory entry from the directory (dnode). */
 	tmpfs_dir_detach(dvp, de);
 	if (v->a_cnp->cn_flags & DOWHITEOUT)
 		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
 
 	/* No vnode should be allocated for this entry from this point */
 	TMPFS_NODE_LOCK(node);
 	node->tn_links--;
 	node->tn_dir.tn_parent = NULL;
 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED |
 	    TMPFS_NODE_MODIFIED;
 
 	TMPFS_NODE_UNLOCK(node);
 
 	TMPFS_NODE_LOCK(dnode);
 	dnode->tn_links--;
 	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED |
 	    TMPFS_NODE_MODIFIED;
 	TMPFS_NODE_UNLOCK(dnode);
 
 	if (tmpfs_use_nc(dvp)) {
 		cache_purge(dvp);
 		cache_purge(vp);
 	}
 
 	/* Free the directory entry we just deleted.  Note that the node
 	 * referred by it will not be removed until the vnode is really
 	 * reclaimed. */
 	tmpfs_free_dirent(tmp, de);
 
 	/* Release the deleted vnode (will destroy the node, notify
 	 * interested parties and clean it from the cache). */
 
 	dnode->tn_status |= TMPFS_NODE_CHANGED;
 	tmpfs_update(dvp);
 
 	error = 0;
 
 out:
 	return error;
 }
 
 static int
 tmpfs_symlink(struct vop_symlink_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct vattr *vap = v->a_vap;
 	const char *target = v->a_target;
 
 #ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */
 	MPASS(vap->va_type == VLNK);
 #else
 	vap->va_type = VLNK;
 #endif
 
 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
 }
 
 static int
 tmpfs_readdir(struct vop_readdir_args *va)
 {
 	struct vnode *vp;
 	struct uio *uio;
 	struct tmpfs_mount *tm;
 	struct tmpfs_node *node;
 	u_long **cookies;
 	int *eofflag, *ncookies;
 	ssize_t startresid;
 	int error, maxcookies;
 
 	vp = va->a_vp;
 	uio = va->a_uio;
 	eofflag = va->a_eofflag;
 	cookies = va->a_cookies;
 	ncookies = va->a_ncookies;
 
 	/* This operation only makes sense on directory nodes. */
 	if (vp->v_type != VDIR)
 		return ENOTDIR;
 
 	maxcookies = 0;
 	node = VP_TO_TMPFS_DIR(vp);
 	tm = VFS_TO_TMPFS(vp->v_mount);
 
 	startresid = uio->uio_resid;
 
 	/* Allocate cookies for NFS and compat modules. */
 	if (cookies != NULL && ncookies != NULL) {
 		maxcookies = howmany(node->tn_size,
 		    sizeof(struct tmpfs_dirent)) + 2;
 		*cookies = malloc(maxcookies * sizeof(**cookies), M_TEMP,
 		    M_WAITOK);
 		*ncookies = 0;
 	}
 
 	if (cookies == NULL)
 		error = tmpfs_dir_getdents(tm, node, uio, 0, NULL, NULL);
 	else
 		error = tmpfs_dir_getdents(tm, node, uio, maxcookies, *cookies,
 		    ncookies);
 
 	/* Buffer was filled without hitting EOF. */
 	if (error == EJUSTRETURN)
 		error = (uio->uio_resid != startresid) ? 0 : EINVAL;
 
 	if (error != 0 && cookies != NULL && ncookies != NULL) {
 		free(*cookies, M_TEMP);
 		*cookies = NULL;
 		*ncookies = 0;
 	}
 
 	if (eofflag != NULL)
 		*eofflag =
 		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
 
 	return error;
 }
 
 static int
 tmpfs_readlink(struct vop_readlink_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct uio *uio = v->a_uio;
 
 	int error;
 	struct tmpfs_node *node;
 
 	MPASS(uio->uio_offset == 0);
 	MPASS(vp->v_type == VLNK);
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	error = uiomove(node->tn_link, MIN(node->tn_size, uio->uio_resid),
 	    uio);
 	tmpfs_set_status(VFS_TO_TMPFS(vp->v_mount), node, TMPFS_NODE_ACCESSED);
 
 	return (error);
 }
 
 static int
 tmpfs_inactive(struct vop_inactive_args *v)
 {
 	struct vnode *vp;
 	struct tmpfs_node *node;
 
 	vp = v->a_vp;
 	node = VP_TO_TMPFS_NODE(vp);
 	if (node->tn_links == 0)
 		vrecycle(vp);
 	else
 		tmpfs_check_mtime(vp);
 	return (0);
 }
 
 static int
 tmpfs_need_inactive(struct vop_need_inactive_args *ap)
 {
 	struct vnode *vp;
 	struct tmpfs_node *node;
 	struct vm_object *obj;
 
 	vp = ap->a_vp;
 	node = VP_TO_TMPFS_NODE(vp);
 	if (node->tn_links == 0)
 		goto need;
 	if (vp->v_type == VREG) {
 		obj = vp->v_object;
 		if (obj->generation != obj->cleangeneration)
 			goto need;
 	}
 	return (0);
 need:
 	return (1);
 }
 
 int
 tmpfs_reclaim(struct vop_reclaim_args *v)
 {
 	struct vnode *vp = v->a_vp;
 
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *node;
 
 	node = VP_TO_TMPFS_NODE(vp);
 	tmp = VFS_TO_TMPFS(vp->v_mount);
 
 	if (vp->v_type == VREG)
 		tmpfs_destroy_vobject(vp, node->tn_reg.tn_aobj);
 	vp->v_object = NULL;
 	if (tmpfs_use_nc(vp))
 		cache_purge(vp);
 
 	TMPFS_NODE_LOCK(node);
 	tmpfs_free_vp(vp);
 
 	/* If the node referenced by this vnode was deleted by the user,
 	 * we must free its associated data structures (now that the vnode
 	 * is being reclaimed). */
 	if (node->tn_links == 0 &&
 	    (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) {
 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
 		TMPFS_NODE_UNLOCK(node);
 		tmpfs_free_node(tmp, node);
 	} else
 		TMPFS_NODE_UNLOCK(node);
 
 	MPASS(vp->v_data == NULL);
 	return 0;
 }
 
 int
 tmpfs_print(struct vop_print_args *v)
 {
 	struct vnode *vp = v->a_vp;
 
 	struct tmpfs_node *node;
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%lx, links %jd\n",
 	    node, node->tn_flags, (uintmax_t)node->tn_links);
 	printf("\tmode 0%o, owner %d, group %d, size %jd, status 0x%x\n",
 	    node->tn_mode, node->tn_uid, node->tn_gid,
 	    (intmax_t)node->tn_size, node->tn_status);
 
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 
 	printf("\n");
 
 	return 0;
 }
 
 int
 tmpfs_pathconf(struct vop_pathconf_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	int name = v->a_name;
 	long *retval = v->a_retval;
 
 	int error;
 
 	error = 0;
 
 	switch (name) {
 	case _PC_LINK_MAX:
 		*retval = TMPFS_LINK_MAX;
 		break;
 
 	case _PC_NAME_MAX:
 		*retval = NAME_MAX;
 		break;
 
 	case _PC_PIPE_BUF:
 		if (vp->v_type == VDIR || vp->v_type == VFIFO)
 			*retval = PIPE_BUF;
 		else
 			error = EINVAL;
 		break;
 
 	case _PC_CHOWN_RESTRICTED:
 		*retval = 1;
 		break;
 
 	case _PC_NO_TRUNC:
 		*retval = 1;
 		break;
 
 	case _PC_SYNC_IO:
 		*retval = 1;
 		break;
 
 	case _PC_FILESIZEBITS:
 		*retval = 64;
 		break;
 
 	default:
 		error = vop_stdpathconf(v);
 	}
 
 	return error;
 }
 
 static int
 tmpfs_vptofh(struct vop_vptofh_args *ap)
 {
 	struct tmpfs_fid *tfhp;
 	struct tmpfs_node *node;
 
 	tfhp = (struct tmpfs_fid *)ap->a_fhp;
 	node = VP_TO_TMPFS_NODE(ap->a_vp);
 
 	tfhp->tf_len = sizeof(struct tmpfs_fid);
 	tfhp->tf_id = node->tn_id;
 	tfhp->tf_gen = node->tn_gen;
 
 	return (0);
 }
 
 static int
 tmpfs_whiteout(struct vop_whiteout_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct tmpfs_dirent *de;
 
 	switch (ap->a_flags) {
 	case LOOKUP:
 		return (0);
 	case CREATE:
 		de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
 		if (de != NULL)
 			return (de->td_node == NULL ? 0 : EEXIST);
 		return (tmpfs_dir_whiteout_add(dvp, cnp));
 	case DELETE:
 		tmpfs_dir_whiteout_remove(dvp, cnp);
 		return (0);
 	default:
 		panic("tmpfs_whiteout: unknown op");
 	}
 }
 
 static int
 tmpfs_vptocnp_dir(struct tmpfs_node *tn, struct tmpfs_node *tnp,
     struct tmpfs_dirent **pde)
 {
 	struct tmpfs_dir_cursor dc;
 	struct tmpfs_dirent *de;
 
 	for (de = tmpfs_dir_first(tnp, &dc); de != NULL;
 	     de = tmpfs_dir_next(tnp, &dc)) {
 		if (de->td_node == tn) {
 			*pde = de;
 			return (0);
 		}
 	}
 	return (ENOENT);
 }
 
 static int
 tmpfs_vptocnp_fill(struct vnode *vp, struct tmpfs_node *tn,
-    struct tmpfs_node *tnp, char *buf, int *buflen, struct vnode **dvp)
+    struct tmpfs_node *tnp, char *buf, size_t *buflen, struct vnode **dvp)
 {
 	struct tmpfs_dirent *de;
 	int error, i;
 
 	error = vn_vget_ino_gen(vp, tmpfs_vn_get_ino_alloc, tnp, LK_SHARED,
 	    dvp);
 	if (error != 0)
 		return (error);
 	error = tmpfs_vptocnp_dir(tn, tnp, &de);
 	if (error == 0) {
 		i = *buflen;
 		i -= de->td_namelen;
 		if (i < 0) {
 			error = ENOMEM;
 		} else {
 			bcopy(de->ud.td_name, buf + i, de->td_namelen);
 			*buflen = i;
 		}
 	}
 	if (error == 0) {
 		if (vp != *dvp)
 			VOP_UNLOCK(*dvp);
 	} else {
 		if (vp != *dvp)
 			vput(*dvp);
 		else
 			vrele(vp);
 	}
 	return (error);
 }
 
 static int
 tmpfs_vptocnp(struct vop_vptocnp_args *ap)
 {
 	struct vnode *vp, **dvp;
 	struct tmpfs_node *tn, *tnp, *tnp1;
 	struct tmpfs_dirent *de;
 	struct tmpfs_mount *tm;
 	char *buf;
-	int *buflen;
+	size_t *buflen;
 	int error;
 
 	vp = ap->a_vp;
 	dvp = ap->a_vpp;
 	buf = ap->a_buf;
 	buflen = ap->a_buflen;
 
 	tm = VFS_TO_TMPFS(vp->v_mount);
 	tn = VP_TO_TMPFS_NODE(vp);
 	if (tn->tn_type == VDIR) {
 		tnp = tn->tn_dir.tn_parent;
 		if (tnp == NULL)
 			return (ENOENT);
 		tmpfs_ref_node(tnp);
 		error = tmpfs_vptocnp_fill(vp, tn, tn->tn_dir.tn_parent, buf,
 		    buflen, dvp);
 		tmpfs_free_node(tm, tnp);
 		return (error);
 	}
 restart:
 	TMPFS_LOCK(tm);
 	LIST_FOREACH_SAFE(tnp, &tm->tm_nodes_used, tn_entries, tnp1) {
 		if (tnp->tn_type != VDIR)
 			continue;
 		TMPFS_NODE_LOCK(tnp);
 		tmpfs_ref_node_locked(tnp);
 
 		/*
 		 * tn_vnode cannot be instantiated while we hold the
 		 * node lock, so the directory cannot be changed while
 		 * we iterate over it.  Do this to avoid instantiating
 		 * vnode for directories which cannot point to our
 		 * node.
 		 */
 		error = tnp->tn_vnode == NULL ? tmpfs_vptocnp_dir(tn, tnp,
 		    &de) : 0;
 
 		if (error == 0) {
 			TMPFS_NODE_UNLOCK(tnp);
 			TMPFS_UNLOCK(tm);
 			error = tmpfs_vptocnp_fill(vp, tn, tnp, buf, buflen,
 			    dvp);
 			if (error == 0) {
 				tmpfs_free_node(tm, tnp);
 				return (0);
 			}
 			if (VN_IS_DOOMED(vp)) {
 				tmpfs_free_node(tm, tnp);
 				return (ENOENT);
 			}
 			TMPFS_LOCK(tm);
 			TMPFS_NODE_LOCK(tnp);
 		}
 		if (tmpfs_free_node_locked(tm, tnp, false)) {
 			goto restart;
 		} else {
 			KASSERT(tnp->tn_refcount > 0,
 			    ("node %p refcount zero", tnp));
 			tnp1 = LIST_NEXT(tnp, tn_entries);
 			TMPFS_NODE_UNLOCK(tnp);
 		}
 	}
 	TMPFS_UNLOCK(tm);
 	return (ENOENT);
 }
 
 /*
  * Vnode operations vector used for files stored in a tmpfs file system.
  */
 struct vop_vector tmpfs_vnodeop_entries = {
 	.vop_default =			&default_vnodeops,
 	.vop_lookup =			vfs_cache_lookup,
 	.vop_cachedlookup =		tmpfs_cached_lookup,
 	.vop_create =			tmpfs_create,
 	.vop_mknod =			tmpfs_mknod,
 	.vop_open =			tmpfs_open,
 	.vop_close =			tmpfs_close,
 	.vop_access =			tmpfs_access,
 	.vop_getattr =			tmpfs_getattr,
 	.vop_setattr =			tmpfs_setattr,
 	.vop_read =			tmpfs_read,
 	.vop_write =			tmpfs_write,
 	.vop_fsync =			tmpfs_fsync,
 	.vop_remove =			tmpfs_remove,
 	.vop_link =			tmpfs_link,
 	.vop_rename =			tmpfs_rename,
 	.vop_mkdir =			tmpfs_mkdir,
 	.vop_rmdir =			tmpfs_rmdir,
 	.vop_symlink =			tmpfs_symlink,
 	.vop_readdir =			tmpfs_readdir,
 	.vop_readlink =			tmpfs_readlink,
 	.vop_inactive =			tmpfs_inactive,
 	.vop_need_inactive =		tmpfs_need_inactive,
 	.vop_reclaim =			tmpfs_reclaim,
 	.vop_print =			tmpfs_print,
 	.vop_pathconf =			tmpfs_pathconf,
 	.vop_vptofh =			tmpfs_vptofh,
 	.vop_whiteout =			tmpfs_whiteout,
 	.vop_bmap =			VOP_EOPNOTSUPP,
 	.vop_vptocnp =			tmpfs_vptocnp,
 	.vop_lock1 =			vop_lock,
 	.vop_unlock = 			vop_unlock,
 	.vop_islocked = 		vop_islocked,
 };
 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_entries);
 
 /*
  * Same vector for mounts which do not use namecache.
  */
 struct vop_vector tmpfs_vnodeop_nonc_entries = {
 	.vop_default =			&tmpfs_vnodeop_entries,
 	.vop_lookup =			tmpfs_lookup,
 };
 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_nonc_entries);
Index: projects/clang1000-import/sys/kern/kern_sig.c
===================================================================
--- projects/clang1000-import/sys/kern/kern_sig.c	(revision 357389)
+++ projects/clang1000-import/sys/kern/kern_sig.c	(revision 357390)
@@ -1,3914 +1,3916 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_sig.c	8.7 (Berkeley) 4/18/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/ctype.h>
 #include <sys/systm.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 #include <sys/acct.h>
 #include <sys/bus.h>
 #include <sys/capsicum.h>
 #include <sys/compressor.h>
 #include <sys/condvar.h>
 #include <sys/event.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/ktrace.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/refcount.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/procdesc.h>
 #include <sys/ptrace.h>
 #include <sys/posix4.h>
 #include <sys/pioctl.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/sdt.h>
 #include <sys/sbuf.h>
 #include <sys/sleepqueue.h>
 #include <sys/smp.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/syslog.h>
 #include <sys/sysproto.h>
 #include <sys/timers.h>
 #include <sys/unistd.h>
 #include <sys/wait.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 #include <sys/jail.h>
 
 #include <machine/cpu.h>
 
 #include <security/audit/audit.h>
 
 #define	ONSIG	32		/* NSIG for osig* syscalls.  XXX. */
 
 SDT_PROVIDER_DECLARE(proc);
 SDT_PROBE_DEFINE3(proc, , , signal__send,
     "struct thread *", "struct proc *", "int");
 SDT_PROBE_DEFINE2(proc, , , signal__clear,
     "int", "ksiginfo_t *");
 SDT_PROBE_DEFINE3(proc, , , signal__discard,
     "struct thread *", "struct proc *", "int");
 
 static int	coredump(struct thread *);
 static int	killpg1(struct thread *td, int sig, int pgid, int all,
 		    ksiginfo_t *ksi);
 static int	issignal(struct thread *td);
 static int	sigprop(int sig);
 static void	tdsigwakeup(struct thread *, int, sig_t, int);
 static int	sig_suspend_threads(struct thread *, struct proc *, int);
 static int	filt_sigattach(struct knote *kn);
 static void	filt_sigdetach(struct knote *kn);
 static int	filt_signal(struct knote *kn, long hint);
 static struct thread *sigtd(struct proc *p, int sig, int prop);
 static void	sigqueue_start(void);
 
 static uma_zone_t	ksiginfo_zone = NULL;
 struct filterops sig_filtops = {
 	.f_isfd = 0,
 	.f_attach = filt_sigattach,
 	.f_detach = filt_sigdetach,
 	.f_event = filt_signal,
 };
 
 static int	kern_logsigexit = 1;
 SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW,
     &kern_logsigexit, 0,
     "Log processes quitting on abnormal signals to syslog(3)");
 
 static int	kern_forcesigexit = 1;
 SYSCTL_INT(_kern, OID_AUTO, forcesigexit, CTLFLAG_RW,
     &kern_forcesigexit, 0, "Force trap signal to be handled");
 
 static SYSCTL_NODE(_kern, OID_AUTO, sigqueue, CTLFLAG_RW, 0,
     "POSIX real time signal");
 
 static int	max_pending_per_proc = 128;
 SYSCTL_INT(_kern_sigqueue, OID_AUTO, max_pending_per_proc, CTLFLAG_RW,
     &max_pending_per_proc, 0, "Max pending signals per proc");
 
 static int	preallocate_siginfo = 1024;
 SYSCTL_INT(_kern_sigqueue, OID_AUTO, preallocate, CTLFLAG_RDTUN,
     &preallocate_siginfo, 0, "Preallocated signal memory size");
 
 static int	signal_overflow = 0;
 SYSCTL_INT(_kern_sigqueue, OID_AUTO, overflow, CTLFLAG_RD,
     &signal_overflow, 0, "Number of signals overflew");
 
 static int	signal_alloc_fail = 0;
 SYSCTL_INT(_kern_sigqueue, OID_AUTO, alloc_fail, CTLFLAG_RD,
     &signal_alloc_fail, 0, "signals failed to be allocated");
 
 static int	kern_lognosys = 0;
 SYSCTL_INT(_kern, OID_AUTO, lognosys, CTLFLAG_RWTUN, &kern_lognosys, 0,
     "Log invalid syscalls");
 
 SYSINIT(signal, SI_SUB_P1003_1B, SI_ORDER_FIRST+3, sigqueue_start, NULL);
 
 /*
  * Policy -- Can ucred cr1 send SIGIO to process cr2?
  * Should use cr_cansignal() once cr_cansignal() allows SIGIO and SIGURG
  * in the right situations.
  */
 #define CANSIGIO(cr1, cr2) \
 	((cr1)->cr_uid == 0 || \
 	    (cr1)->cr_ruid == (cr2)->cr_ruid || \
 	    (cr1)->cr_uid == (cr2)->cr_ruid || \
 	    (cr1)->cr_ruid == (cr2)->cr_uid || \
 	    (cr1)->cr_uid == (cr2)->cr_uid)
 
 static int	sugid_coredump;
 SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RWTUN,
     &sugid_coredump, 0, "Allow setuid and setgid processes to dump core");
 
 static int	capmode_coredump;
 SYSCTL_INT(_kern, OID_AUTO, capmode_coredump, CTLFLAG_RWTUN,
     &capmode_coredump, 0, "Allow processes in capability mode to dump core");
 
 static int	do_coredump = 1;
 SYSCTL_INT(_kern, OID_AUTO, coredump, CTLFLAG_RW,
 	&do_coredump, 0, "Enable/Disable coredumps");
 
 static int	set_core_nodump_flag = 0;
 SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag,
 	0, "Enable setting the NODUMP flag on coredump files");
 
 static int	coredump_devctl = 0;
 SYSCTL_INT(_kern, OID_AUTO, coredump_devctl, CTLFLAG_RW, &coredump_devctl,
 	0, "Generate a devctl notification when processes coredump");
 
 /*
  * Signal properties and actions.
  * The array below categorizes the signals and their default actions
  * according to the following properties:
  */
 #define	SIGPROP_KILL		0x01	/* terminates process by default */
 #define	SIGPROP_CORE		0x02	/* ditto and coredumps */
 #define	SIGPROP_STOP		0x04	/* suspend process */
 #define	SIGPROP_TTYSTOP		0x08	/* ditto, from tty */
 #define	SIGPROP_IGNORE		0x10	/* ignore by default */
 #define	SIGPROP_CONT		0x20	/* continue if suspended */
 #define	SIGPROP_CANTMASK	0x40	/* non-maskable, catchable */
 
 static int sigproptbl[NSIG] = {
 	[SIGHUP] =	SIGPROP_KILL,
 	[SIGINT] =	SIGPROP_KILL,
 	[SIGQUIT] =	SIGPROP_KILL | SIGPROP_CORE,
 	[SIGILL] =	SIGPROP_KILL | SIGPROP_CORE,
 	[SIGTRAP] =	SIGPROP_KILL | SIGPROP_CORE,
 	[SIGABRT] =	SIGPROP_KILL | SIGPROP_CORE,
 	[SIGEMT] =	SIGPROP_KILL | SIGPROP_CORE,
 	[SIGFPE] =	SIGPROP_KILL | SIGPROP_CORE,
 	[SIGKILL] =	SIGPROP_KILL,
 	[SIGBUS] =	SIGPROP_KILL | SIGPROP_CORE,
 	[SIGSEGV] =	SIGPROP_KILL | SIGPROP_CORE,
 	[SIGSYS] =	SIGPROP_KILL | SIGPROP_CORE,
 	[SIGPIPE] =	SIGPROP_KILL,
 	[SIGALRM] =	SIGPROP_KILL,
 	[SIGTERM] =	SIGPROP_KILL,
 	[SIGURG] =	SIGPROP_IGNORE,
 	[SIGSTOP] =	SIGPROP_STOP,
 	[SIGTSTP] =	SIGPROP_STOP | SIGPROP_TTYSTOP,
 	[SIGCONT] =	SIGPROP_IGNORE | SIGPROP_CONT,
 	[SIGCHLD] =	SIGPROP_IGNORE,
 	[SIGTTIN] =	SIGPROP_STOP | SIGPROP_TTYSTOP,
 	[SIGTTOU] =	SIGPROP_STOP | SIGPROP_TTYSTOP,
 	[SIGIO] =	SIGPROP_IGNORE,
 	[SIGXCPU] =	SIGPROP_KILL,
 	[SIGXFSZ] =	SIGPROP_KILL,
 	[SIGVTALRM] =	SIGPROP_KILL,
 	[SIGPROF] =	SIGPROP_KILL,
 	[SIGWINCH] =	SIGPROP_IGNORE,
 	[SIGINFO] =	SIGPROP_IGNORE,
 	[SIGUSR1] =	SIGPROP_KILL,
 	[SIGUSR2] =	SIGPROP_KILL,
 };
 
 static void reschedule_signals(struct proc *p, sigset_t block, int flags);
 
 static void
 sigqueue_start(void)
 {
 	ksiginfo_zone = uma_zcreate("ksiginfo", sizeof(ksiginfo_t),
 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	uma_prealloc(ksiginfo_zone, preallocate_siginfo);
 	p31b_setcfg(CTL_P1003_1B_REALTIME_SIGNALS, _POSIX_REALTIME_SIGNALS);
 	p31b_setcfg(CTL_P1003_1B_RTSIG_MAX, SIGRTMAX - SIGRTMIN + 1);
 	p31b_setcfg(CTL_P1003_1B_SIGQUEUE_MAX, max_pending_per_proc);
 }
 
 ksiginfo_t *
 ksiginfo_alloc(int wait)
 {
 	int flags;
 
 	flags = M_ZERO;
 	if (! wait)
 		flags |= M_NOWAIT;
 	if (ksiginfo_zone != NULL)
 		return ((ksiginfo_t *)uma_zalloc(ksiginfo_zone, flags));
 	return (NULL);
 }
 
 void
 ksiginfo_free(ksiginfo_t *ksi)
 {
 	uma_zfree(ksiginfo_zone, ksi);
 }
 
 static __inline int
 ksiginfo_tryfree(ksiginfo_t *ksi)
 {
 	if (!(ksi->ksi_flags & KSI_EXT)) {
 		uma_zfree(ksiginfo_zone, ksi);
 		return (1);
 	}
 	return (0);
 }
 
 void
 sigqueue_init(sigqueue_t *list, struct proc *p)
 {
 	SIGEMPTYSET(list->sq_signals);
 	SIGEMPTYSET(list->sq_kill);
 	SIGEMPTYSET(list->sq_ptrace);
 	TAILQ_INIT(&list->sq_list);
 	list->sq_proc = p;
 	list->sq_flags = SQ_INIT;
 }
 
 /*
  * Get a signal's ksiginfo.
  * Return:
  *	0	-	signal not found
  *	others	-	signal number
  */
 static int
 sigqueue_get(sigqueue_t *sq, int signo, ksiginfo_t *si)
 {
 	struct proc *p = sq->sq_proc;
 	struct ksiginfo *ksi, *next;
 	int count = 0;
 
 	KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited"));
 
 	if (!SIGISMEMBER(sq->sq_signals, signo))
 		return (0);
 
 	if (SIGISMEMBER(sq->sq_ptrace, signo)) {
 		count++;
 		SIGDELSET(sq->sq_ptrace, signo);
 		si->ksi_flags |= KSI_PTRACE;
 	}
 	if (SIGISMEMBER(sq->sq_kill, signo)) {
 		count++;
 		if (count == 1)
 			SIGDELSET(sq->sq_kill, signo);
 	}
 
 	TAILQ_FOREACH_SAFE(ksi, &sq->sq_list, ksi_link, next) {
 		if (ksi->ksi_signo == signo) {
 			if (count == 0) {
 				TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link);
 				ksi->ksi_sigq = NULL;
 				ksiginfo_copy(ksi, si);
 				if (ksiginfo_tryfree(ksi) && p != NULL)
 					p->p_pendingcnt--;
 			}
 			if (++count > 1)
 				break;
 		}
 	}
 
 	if (count <= 1)
 		SIGDELSET(sq->sq_signals, signo);
 	si->ksi_signo = signo;
 	return (signo);
 }
 
 void
 sigqueue_take(ksiginfo_t *ksi)
 {
 	struct ksiginfo *kp;
 	struct proc	*p;
 	sigqueue_t	*sq;
 
 	if (ksi == NULL || (sq = ksi->ksi_sigq) == NULL)
 		return;
 
 	p = sq->sq_proc;
 	TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link);
 	ksi->ksi_sigq = NULL;
 	if (!(ksi->ksi_flags & KSI_EXT) && p != NULL)
 		p->p_pendingcnt--;
 
 	for (kp = TAILQ_FIRST(&sq->sq_list); kp != NULL;
 	     kp = TAILQ_NEXT(kp, ksi_link)) {
 		if (kp->ksi_signo == ksi->ksi_signo)
 			break;
 	}
 	if (kp == NULL && !SIGISMEMBER(sq->sq_kill, ksi->ksi_signo) &&
 	    !SIGISMEMBER(sq->sq_ptrace, ksi->ksi_signo))
 		SIGDELSET(sq->sq_signals, ksi->ksi_signo);
 }
 
 static int
 sigqueue_add(sigqueue_t *sq, int signo, ksiginfo_t *si)
 {
 	struct proc *p = sq->sq_proc;
 	struct ksiginfo *ksi;
 	int ret = 0;
 
 	KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited"));
 
 	/*
 	 * SIGKILL/SIGSTOP cannot be caught or masked, so take the fast path
 	 * for these signals.
 	 */
 	if (signo == SIGKILL || signo == SIGSTOP || si == NULL) {
 		SIGADDSET(sq->sq_kill, signo);
 		goto out_set_bit;
 	}
 
 	/* directly insert the ksi, don't copy it */
 	if (si->ksi_flags & KSI_INS) {
 		if (si->ksi_flags & KSI_HEAD)
 			TAILQ_INSERT_HEAD(&sq->sq_list, si, ksi_link);
 		else
 			TAILQ_INSERT_TAIL(&sq->sq_list, si, ksi_link);
 		si->ksi_sigq = sq;
 		goto out_set_bit;
 	}
 
 	if (__predict_false(ksiginfo_zone == NULL)) {
 		SIGADDSET(sq->sq_kill, signo);
 		goto out_set_bit;
 	}
 
 	if (p != NULL && p->p_pendingcnt >= max_pending_per_proc) {
 		signal_overflow++;
 		ret = EAGAIN;
 	} else if ((ksi = ksiginfo_alloc(0)) == NULL) {
 		signal_alloc_fail++;
 		ret = EAGAIN;
 	} else {
 		if (p != NULL)
 			p->p_pendingcnt++;
 		ksiginfo_copy(si, ksi);
 		ksi->ksi_signo = signo;
 		if (si->ksi_flags & KSI_HEAD)
 			TAILQ_INSERT_HEAD(&sq->sq_list, ksi, ksi_link);
 		else
 			TAILQ_INSERT_TAIL(&sq->sq_list, ksi, ksi_link);
 		ksi->ksi_sigq = sq;
 	}
 
 	if (ret != 0) {
 		if ((si->ksi_flags & KSI_PTRACE) != 0) {
 			SIGADDSET(sq->sq_ptrace, signo);
 			ret = 0;
 			goto out_set_bit;
 		} else if ((si->ksi_flags & KSI_TRAP) != 0 ||
 		    (si->ksi_flags & KSI_SIGQ) == 0) {
 			SIGADDSET(sq->sq_kill, signo);
 			ret = 0;
 			goto out_set_bit;
 		}
 		return (ret);
 	}
 
 out_set_bit:
 	SIGADDSET(sq->sq_signals, signo);
 	return (ret);
 }
 
 void
 sigqueue_flush(sigqueue_t *sq)
 {
 	struct proc *p = sq->sq_proc;
 	ksiginfo_t *ksi;
 
 	KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited"));
 
 	if (p != NULL)
 		PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	while ((ksi = TAILQ_FIRST(&sq->sq_list)) != NULL) {
 		TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link);
 		ksi->ksi_sigq = NULL;
 		if (ksiginfo_tryfree(ksi) && p != NULL)
 			p->p_pendingcnt--;
 	}
 
 	SIGEMPTYSET(sq->sq_signals);
 	SIGEMPTYSET(sq->sq_kill);
 	SIGEMPTYSET(sq->sq_ptrace);
 }
 
 static void
 sigqueue_move_set(sigqueue_t *src, sigqueue_t *dst, const sigset_t *set)
 {
 	sigset_t tmp;
 	struct proc *p1, *p2;
 	ksiginfo_t *ksi, *next;
 
 	KASSERT(src->sq_flags & SQ_INIT, ("src sigqueue not inited"));
 	KASSERT(dst->sq_flags & SQ_INIT, ("dst sigqueue not inited"));
 	p1 = src->sq_proc;
 	p2 = dst->sq_proc;
 	/* Move siginfo to target list */
 	TAILQ_FOREACH_SAFE(ksi, &src->sq_list, ksi_link, next) {
 		if (SIGISMEMBER(*set, ksi->ksi_signo)) {
 			TAILQ_REMOVE(&src->sq_list, ksi, ksi_link);
 			if (p1 != NULL)
 				p1->p_pendingcnt--;
 			TAILQ_INSERT_TAIL(&dst->sq_list, ksi, ksi_link);
 			ksi->ksi_sigq = dst;
 			if (p2 != NULL)
 				p2->p_pendingcnt++;
 		}
 	}
 
 	/* Move pending bits to target list */
 	tmp = src->sq_kill;
 	SIGSETAND(tmp, *set);
 	SIGSETOR(dst->sq_kill, tmp);
 	SIGSETNAND(src->sq_kill, tmp);
 
 	tmp = src->sq_ptrace;
 	SIGSETAND(tmp, *set);
 	SIGSETOR(dst->sq_ptrace, tmp);
 	SIGSETNAND(src->sq_ptrace, tmp);
 
 	tmp = src->sq_signals;
 	SIGSETAND(tmp, *set);
 	SIGSETOR(dst->sq_signals, tmp);
 	SIGSETNAND(src->sq_signals, tmp);
 }
 
 #if 0
 static void
 sigqueue_move(sigqueue_t *src, sigqueue_t *dst, int signo)
 {
 	sigset_t set;
 
 	SIGEMPTYSET(set);
 	SIGADDSET(set, signo);
 	sigqueue_move_set(src, dst, &set);
 }
 #endif
 
 static void
 sigqueue_delete_set(sigqueue_t *sq, const sigset_t *set)
 {
 	struct proc *p = sq->sq_proc;
 	ksiginfo_t *ksi, *next;
 
 	KASSERT(sq->sq_flags & SQ_INIT, ("src sigqueue not inited"));
 
 	/* Remove siginfo queue */
 	TAILQ_FOREACH_SAFE(ksi, &sq->sq_list, ksi_link, next) {
 		if (SIGISMEMBER(*set, ksi->ksi_signo)) {
 			TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link);
 			ksi->ksi_sigq = NULL;
 			if (ksiginfo_tryfree(ksi) && p != NULL)
 				p->p_pendingcnt--;
 		}
 	}
 	SIGSETNAND(sq->sq_kill, *set);
 	SIGSETNAND(sq->sq_ptrace, *set);
 	SIGSETNAND(sq->sq_signals, *set);
 }
 
 void
 sigqueue_delete(sigqueue_t *sq, int signo)
 {
 	sigset_t set;
 
 	SIGEMPTYSET(set);
 	SIGADDSET(set, signo);
 	sigqueue_delete_set(sq, &set);
 }
 
 /* Remove a set of signals for a process */
 static void
 sigqueue_delete_set_proc(struct proc *p, const sigset_t *set)
 {
 	sigqueue_t worklist;
 	struct thread *td0;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	sigqueue_init(&worklist, NULL);
 	sigqueue_move_set(&p->p_sigqueue, &worklist, set);
 
 	FOREACH_THREAD_IN_PROC(p, td0)
 		sigqueue_move_set(&td0->td_sigqueue, &worklist, set);
 
 	sigqueue_flush(&worklist);
 }
 
 void
 sigqueue_delete_proc(struct proc *p, int signo)
 {
 	sigset_t set;
 
 	SIGEMPTYSET(set);
 	SIGADDSET(set, signo);
 	sigqueue_delete_set_proc(p, &set);
 }
 
 static void
 sigqueue_delete_stopmask_proc(struct proc *p)
 {
 	sigset_t set;
 
 	SIGEMPTYSET(set);
 	SIGADDSET(set, SIGSTOP);
 	SIGADDSET(set, SIGTSTP);
 	SIGADDSET(set, SIGTTIN);
 	SIGADDSET(set, SIGTTOU);
 	sigqueue_delete_set_proc(p, &set);
 }
 
 /*
  * Determine signal that should be delivered to thread td, the current
  * thread, 0 if none.  If there is a pending stop signal with default
  * action, the process stops in issignal().
  */
 int
 cursig(struct thread *td)
 {
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 	mtx_assert(&td->td_proc->p_sigacts->ps_mtx, MA_OWNED);
 	THREAD_LOCK_ASSERT(td, MA_NOTOWNED);
 	return (SIGPENDING(td) ? issignal(td) : 0);
 }
 
 /*
  * Arrange for ast() to handle unmasked pending signals on return to user
  * mode.  This must be called whenever a signal is added to td_sigqueue or
  * unmasked in td_sigmask.
  */
 void
 signotify(struct thread *td)
 {
 
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 
 	if (SIGPENDING(td)) {
 		thread_lock(td);
 		td->td_flags |= TDF_NEEDSIGCHK | TDF_ASTPENDING;
 		thread_unlock(td);
 	}
 }
 
 /*
  * Returns 1 (true) if altstack is configured for the thread, and the
  * passed stack bottom address falls into the altstack range.  Handles
  * the 43 compat special case where the alt stack size is zero.
  */
 int
 sigonstack(size_t sp)
 {
 	struct thread *td;
 
 	td = curthread;
 	if ((td->td_pflags & TDP_ALTSTACK) == 0)
 		return (0);
 #if defined(COMPAT_43)
 	if (SV_PROC_FLAG(td->td_proc, SV_AOUT) && td->td_sigstk.ss_size == 0)
 		return ((td->td_sigstk.ss_flags & SS_ONSTACK) != 0);
 #endif
 	return (sp >= (size_t)td->td_sigstk.ss_sp &&
 	    sp < td->td_sigstk.ss_size + (size_t)td->td_sigstk.ss_sp);
 }
 
 static __inline int
 sigprop(int sig)
 {
 
 	if (sig > 0 && sig < nitems(sigproptbl))
 		return (sigproptbl[sig]);
 	return (0);
 }
 
 int
 sig_ffs(sigset_t *set)
 {
 	int i;
 
 	for (i = 0; i < _SIG_WORDS; i++)
 		if (set->__bits[i])
 			return (ffs(set->__bits[i]) + (i * 32));
 	return (0);
 }
 
 static bool
 sigact_flag_test(const struct sigaction *act, int flag)
 {
 
 	/*
 	 * SA_SIGINFO is reset when signal disposition is set to
 	 * ignore or default.  Other flags are kept according to user
 	 * settings.
 	 */
 	return ((act->sa_flags & flag) != 0 && (flag != SA_SIGINFO ||
 	    ((__sighandler_t *)act->sa_sigaction != SIG_IGN &&
 	    (__sighandler_t *)act->sa_sigaction != SIG_DFL)));
 }
 
 /*
  * kern_sigaction
  * sigaction
  * freebsd4_sigaction
  * osigaction
  */
 int
 kern_sigaction(struct thread *td, int sig, const struct sigaction *act,
     struct sigaction *oact, int flags)
 {
 	struct sigacts *ps;
 	struct proc *p = td->td_proc;
 
 	if (!_SIG_VALID(sig))
 		return (EINVAL);
 	if (act != NULL && act->sa_handler != SIG_DFL &&
 	    act->sa_handler != SIG_IGN && (act->sa_flags & ~(SA_ONSTACK |
 	    SA_RESTART | SA_RESETHAND | SA_NOCLDSTOP | SA_NODEFER |
 	    SA_NOCLDWAIT | SA_SIGINFO)) != 0)
 		return (EINVAL);
 
 	PROC_LOCK(p);
 	ps = p->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	if (oact) {
 		memset(oact, 0, sizeof(*oact));
 		oact->sa_mask = ps->ps_catchmask[_SIG_IDX(sig)];
 		if (SIGISMEMBER(ps->ps_sigonstack, sig))
 			oact->sa_flags |= SA_ONSTACK;
 		if (!SIGISMEMBER(ps->ps_sigintr, sig))
 			oact->sa_flags |= SA_RESTART;
 		if (SIGISMEMBER(ps->ps_sigreset, sig))
 			oact->sa_flags |= SA_RESETHAND;
 		if (SIGISMEMBER(ps->ps_signodefer, sig))
 			oact->sa_flags |= SA_NODEFER;
 		if (SIGISMEMBER(ps->ps_siginfo, sig)) {
 			oact->sa_flags |= SA_SIGINFO;
 			oact->sa_sigaction =
 			    (__siginfohandler_t *)ps->ps_sigact[_SIG_IDX(sig)];
 		} else
 			oact->sa_handler = ps->ps_sigact[_SIG_IDX(sig)];
 		if (sig == SIGCHLD && ps->ps_flag & PS_NOCLDSTOP)
 			oact->sa_flags |= SA_NOCLDSTOP;
 		if (sig == SIGCHLD && ps->ps_flag & PS_NOCLDWAIT)
 			oact->sa_flags |= SA_NOCLDWAIT;
 	}
 	if (act) {
 		if ((sig == SIGKILL || sig == SIGSTOP) &&
 		    act->sa_handler != SIG_DFL) {
 			mtx_unlock(&ps->ps_mtx);
 			PROC_UNLOCK(p);
 			return (EINVAL);
 		}
 
 		/*
 		 * Change setting atomically.
 		 */
 
 		ps->ps_catchmask[_SIG_IDX(sig)] = act->sa_mask;
 		SIG_CANTMASK(ps->ps_catchmask[_SIG_IDX(sig)]);
 		if (sigact_flag_test(act, SA_SIGINFO)) {
 			ps->ps_sigact[_SIG_IDX(sig)] =
 			    (__sighandler_t *)act->sa_sigaction;
 			SIGADDSET(ps->ps_siginfo, sig);
 		} else {
 			ps->ps_sigact[_SIG_IDX(sig)] = act->sa_handler;
 			SIGDELSET(ps->ps_siginfo, sig);
 		}
 		if (!sigact_flag_test(act, SA_RESTART))
 			SIGADDSET(ps->ps_sigintr, sig);
 		else
 			SIGDELSET(ps->ps_sigintr, sig);
 		if (sigact_flag_test(act, SA_ONSTACK))
 			SIGADDSET(ps->ps_sigonstack, sig);
 		else
 			SIGDELSET(ps->ps_sigonstack, sig);
 		if (sigact_flag_test(act, SA_RESETHAND))
 			SIGADDSET(ps->ps_sigreset, sig);
 		else
 			SIGDELSET(ps->ps_sigreset, sig);
 		if (sigact_flag_test(act, SA_NODEFER))
 			SIGADDSET(ps->ps_signodefer, sig);
 		else
 			SIGDELSET(ps->ps_signodefer, sig);
 		if (sig == SIGCHLD) {
 			if (act->sa_flags & SA_NOCLDSTOP)
 				ps->ps_flag |= PS_NOCLDSTOP;
 			else
 				ps->ps_flag &= ~PS_NOCLDSTOP;
 			if (act->sa_flags & SA_NOCLDWAIT) {
 				/*
 				 * Paranoia: since SA_NOCLDWAIT is implemented
 				 * by reparenting the dying child to PID 1 (and
 				 * trust it to reap the zombie), PID 1 itself
 				 * is forbidden to set SA_NOCLDWAIT.
 				 */
 				if (p->p_pid == 1)
 					ps->ps_flag &= ~PS_NOCLDWAIT;
 				else
 					ps->ps_flag |= PS_NOCLDWAIT;
 			} else
 				ps->ps_flag &= ~PS_NOCLDWAIT;
 			if (ps->ps_sigact[_SIG_IDX(SIGCHLD)] == SIG_IGN)
 				ps->ps_flag |= PS_CLDSIGIGN;
 			else
 				ps->ps_flag &= ~PS_CLDSIGIGN;
 		}
 		/*
 		 * Set bit in ps_sigignore for signals that are set to SIG_IGN,
 		 * and for signals set to SIG_DFL where the default is to
 		 * ignore. However, don't put SIGCONT in ps_sigignore, as we
 		 * have to restart the process.
 		 */
 		if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN ||
 		    (sigprop(sig) & SIGPROP_IGNORE &&
 		     ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL)) {
 			/* never to be seen again */
 			sigqueue_delete_proc(p, sig);
 			if (sig != SIGCONT)
 				/* easier in psignal */
 				SIGADDSET(ps->ps_sigignore, sig);
 			SIGDELSET(ps->ps_sigcatch, sig);
 		} else {
 			SIGDELSET(ps->ps_sigignore, sig);
 			if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL)
 				SIGDELSET(ps->ps_sigcatch, sig);
 			else
 				SIGADDSET(ps->ps_sigcatch, sig);
 		}
 #ifdef COMPAT_FREEBSD4
 		if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN ||
 		    ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL ||
 		    (flags & KSA_FREEBSD4) == 0)
 			SIGDELSET(ps->ps_freebsd4, sig);
 		else
 			SIGADDSET(ps->ps_freebsd4, sig);
 #endif
 #ifdef COMPAT_43
 		if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN ||
 		    ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL ||
 		    (flags & KSA_OSIGSET) == 0)
 			SIGDELSET(ps->ps_osigset, sig);
 		else
 			SIGADDSET(ps->ps_osigset, sig);
 #endif
 	}
 	mtx_unlock(&ps->ps_mtx);
 	PROC_UNLOCK(p);
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigaction_args {
 	int	sig;
 	struct	sigaction *act;
 	struct	sigaction *oact;
 };
 #endif
 int
 sys_sigaction(struct thread *td, struct sigaction_args *uap)
 {
 	struct sigaction act, oact;
 	struct sigaction *actp, *oactp;
 	int error;
 
 	actp = (uap->act != NULL) ? &act : NULL;
 	oactp = (uap->oact != NULL) ? &oact : NULL;
 	if (actp) {
 		error = copyin(uap->act, actp, sizeof(act));
 		if (error)
 			return (error);
 	}
 	error = kern_sigaction(td, uap->sig, actp, oactp, 0);
 	if (oactp && !error)
 		error = copyout(oactp, uap->oact, sizeof(oact));
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd4_sigaction_args {
 	int	sig;
 	struct	sigaction *act;
 	struct	sigaction *oact;
 };
 #endif
 int
 freebsd4_sigaction(struct thread *td, struct freebsd4_sigaction_args *uap)
 {
 	struct sigaction act, oact;
 	struct sigaction *actp, *oactp;
 	int error;
 
 	actp = (uap->act != NULL) ? &act : NULL;
 	oactp = (uap->oact != NULL) ? &oact : NULL;
 	if (actp) {
 		error = copyin(uap->act, actp, sizeof(act));
 		if (error)
 			return (error);
 	}
 	error = kern_sigaction(td, uap->sig, actp, oactp, KSA_FREEBSD4);
 	if (oactp && !error)
 		error = copyout(oactp, uap->oact, sizeof(oact));
 	return (error);
 }
 #endif	/* COMAPT_FREEBSD4 */
 
 #ifdef COMPAT_43	/* XXX - COMPAT_FBSD3 */
 #ifndef _SYS_SYSPROTO_H_
 struct osigaction_args {
 	int	signum;
 	struct	osigaction *nsa;
 	struct	osigaction *osa;
 };
 #endif
 int
 osigaction(struct thread *td, struct osigaction_args *uap)
 {
 	struct osigaction sa;
 	struct sigaction nsa, osa;
 	struct sigaction *nsap, *osap;
 	int error;
 
 	if (uap->signum <= 0 || uap->signum >= ONSIG)
 		return (EINVAL);
 
 	nsap = (uap->nsa != NULL) ? &nsa : NULL;
 	osap = (uap->osa != NULL) ? &osa : NULL;
 
 	if (nsap) {
 		error = copyin(uap->nsa, &sa, sizeof(sa));
 		if (error)
 			return (error);
 		nsap->sa_handler = sa.sa_handler;
 		nsap->sa_flags = sa.sa_flags;
 		OSIG2SIG(sa.sa_mask, nsap->sa_mask);
 	}
 	error = kern_sigaction(td, uap->signum, nsap, osap, KSA_OSIGSET);
 	if (osap && !error) {
 		sa.sa_handler = osap->sa_handler;
 		sa.sa_flags = osap->sa_flags;
 		SIG2OSIG(osap->sa_mask, sa.sa_mask);
 		error = copyout(&sa, uap->osa, sizeof(sa));
 	}
 	return (error);
 }
 
 #if !defined(__i386__)
 /* Avoid replicating the same stub everywhere */
 int
 osigreturn(struct thread *td, struct osigreturn_args *uap)
 {
 
 	return (nosys(td, (struct nosys_args *)uap));
 }
 #endif
 #endif /* COMPAT_43 */
 
 /*
  * Initialize signal state for process 0;
  * set to ignore signals that are ignored by default.
  */
 void
 siginit(struct proc *p)
 {
 	int i;
 	struct sigacts *ps;
 
 	PROC_LOCK(p);
 	ps = p->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	for (i = 1; i <= NSIG; i++) {
 		if (sigprop(i) & SIGPROP_IGNORE && i != SIGCONT) {
 			SIGADDSET(ps->ps_sigignore, i);
 		}
 	}
 	mtx_unlock(&ps->ps_mtx);
 	PROC_UNLOCK(p);
 }
 
 /*
  * Reset specified signal to the default disposition.
  */
 static void
 sigdflt(struct sigacts *ps, int sig)
 {
 
 	mtx_assert(&ps->ps_mtx, MA_OWNED);
 	SIGDELSET(ps->ps_sigcatch, sig);
 	if ((sigprop(sig) & SIGPROP_IGNORE) != 0 && sig != SIGCONT)
 		SIGADDSET(ps->ps_sigignore, sig);
 	ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL;
 	SIGDELSET(ps->ps_siginfo, sig);
 }
 
 /*
  * Reset signals for an exec of the specified process.
  */
 void
 execsigs(struct proc *p)
 {
 	sigset_t osigignore;
 	struct sigacts *ps;
 	int sig;
 	struct thread *td;
 
 	/*
 	 * Reset caught signals.  Held signals remain held
 	 * through td_sigmask (unless they were caught,
 	 * and are now ignored by default).
 	 */
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	ps = p->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	sig_drop_caught(p);
 
 	/*
 	 * As CloudABI processes cannot modify signal handlers, fully
 	 * reset all signals to their default behavior. Do ignore
 	 * SIGPIPE, as it would otherwise be impossible to recover from
 	 * writes to broken pipes and sockets.
 	 */
 	if (SV_PROC_ABI(p) == SV_ABI_CLOUDABI) {
 		osigignore = ps->ps_sigignore;
 		while (SIGNOTEMPTY(osigignore)) {
 			sig = sig_ffs(&osigignore);
 			SIGDELSET(osigignore, sig);
 			if (sig != SIGPIPE)
 				sigdflt(ps, sig);
 		}
 		SIGADDSET(ps->ps_sigignore, SIGPIPE);
 	}
 
 	/*
 	 * Reset stack state to the user stack.
 	 * Clear set of signals caught on the signal stack.
 	 */
 	td = curthread;
 	MPASS(td->td_proc == p);
 	td->td_sigstk.ss_flags = SS_DISABLE;
 	td->td_sigstk.ss_size = 0;
 	td->td_sigstk.ss_sp = 0;
 	td->td_pflags &= ~TDP_ALTSTACK;
 	/*
 	 * Reset no zombies if child dies flag as Solaris does.
 	 */
 	ps->ps_flag &= ~(PS_NOCLDWAIT | PS_CLDSIGIGN);
 	if (ps->ps_sigact[_SIG_IDX(SIGCHLD)] == SIG_IGN)
 		ps->ps_sigact[_SIG_IDX(SIGCHLD)] = SIG_DFL;
 	mtx_unlock(&ps->ps_mtx);
 }
 
 /*
  * kern_sigprocmask()
  *
  *	Manipulate signal mask.
  */
 int
 kern_sigprocmask(struct thread *td, int how, sigset_t *set, sigset_t *oset,
     int flags)
 {
 	sigset_t new_block, oset1;
 	struct proc *p;
 	int error;
 
 	p = td->td_proc;
 	if ((flags & SIGPROCMASK_PROC_LOCKED) != 0)
 		PROC_LOCK_ASSERT(p, MA_OWNED);
 	else
 		PROC_LOCK(p);
 	mtx_assert(&p->p_sigacts->ps_mtx, (flags & SIGPROCMASK_PS_LOCKED) != 0
 	    ? MA_OWNED : MA_NOTOWNED);
 	if (oset != NULL)
 		*oset = td->td_sigmask;
 
 	error = 0;
 	if (set != NULL) {
 		switch (how) {
 		case SIG_BLOCK:
 			SIG_CANTMASK(*set);
 			oset1 = td->td_sigmask;
 			SIGSETOR(td->td_sigmask, *set);
 			new_block = td->td_sigmask;
 			SIGSETNAND(new_block, oset1);
 			break;
 		case SIG_UNBLOCK:
 			SIGSETNAND(td->td_sigmask, *set);
 			signotify(td);
 			goto out;
 		case SIG_SETMASK:
 			SIG_CANTMASK(*set);
 			oset1 = td->td_sigmask;
 			if (flags & SIGPROCMASK_OLD)
 				SIGSETLO(td->td_sigmask, *set);
 			else
 				td->td_sigmask = *set;
 			new_block = td->td_sigmask;
 			SIGSETNAND(new_block, oset1);
 			signotify(td);
 			break;
 		default:
 			error = EINVAL;
 			goto out;
 		}
 
 		/*
 		 * The new_block set contains signals that were not previously
 		 * blocked, but are blocked now.
 		 *
 		 * In case we block any signal that was not previously blocked
 		 * for td, and process has the signal pending, try to schedule
 		 * signal delivery to some thread that does not block the
 		 * signal, possibly waking it up.
 		 */
 		if (p->p_numthreads != 1)
 			reschedule_signals(p, new_block, flags);
 	}
 
 out:
 	if (!(flags & SIGPROCMASK_PROC_LOCKED))
 		PROC_UNLOCK(p);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigprocmask_args {
 	int	how;
 	const sigset_t *set;
 	sigset_t *oset;
 };
 #endif
 int
 sys_sigprocmask(struct thread *td, struct sigprocmask_args *uap)
 {
 	sigset_t set, oset;
 	sigset_t *setp, *osetp;
 	int error;
 
 	setp = (uap->set != NULL) ? &set : NULL;
 	osetp = (uap->oset != NULL) ? &oset : NULL;
 	if (setp) {
 		error = copyin(uap->set, setp, sizeof(set));
 		if (error)
 			return (error);
 	}
 	error = kern_sigprocmask(td, uap->how, setp, osetp, 0);
 	if (osetp && !error) {
 		error = copyout(osetp, uap->oset, sizeof(oset));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_43	/* XXX - COMPAT_FBSD3 */
 #ifndef _SYS_SYSPROTO_H_
 struct osigprocmask_args {
 	int	how;
 	osigset_t mask;
 };
 #endif
 int
 osigprocmask(struct thread *td, struct osigprocmask_args *uap)
 {
 	sigset_t set, oset;
 	int error;
 
 	OSIG2SIG(uap->mask, set);
 	error = kern_sigprocmask(td, uap->how, &set, &oset, 1);
 	SIG2OSIG(oset, td->td_retval[0]);
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 int
 sys_sigwait(struct thread *td, struct sigwait_args *uap)
 {
 	ksiginfo_t ksi;
 	sigset_t set;
 	int error;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error) {
 		td->td_retval[0] = error;
 		return (0);
 	}
 
 	error = kern_sigtimedwait(td, set, &ksi, NULL);
 	if (error) {
 		if (error == EINTR && td->td_proc->p_osrel < P_OSREL_SIGWAIT)
 			error = ERESTART;
 		if (error == ERESTART)
 			return (error);
 		td->td_retval[0] = error;
 		return (0);
 	}
 
 	error = copyout(&ksi.ksi_signo, uap->sig, sizeof(ksi.ksi_signo));
 	td->td_retval[0] = error;
 	return (0);
 }
 
 int
 sys_sigtimedwait(struct thread *td, struct sigtimedwait_args *uap)
 {
 	struct timespec ts;
 	struct timespec *timeout;
 	sigset_t set;
 	ksiginfo_t ksi;
 	int error;
 
 	if (uap->timeout) {
 		error = copyin(uap->timeout, &ts, sizeof(ts));
 		if (error)
 			return (error);
 
 		timeout = &ts;
 	} else
 		timeout = NULL;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error)
 		return (error);
 
 	error = kern_sigtimedwait(td, set, &ksi, timeout);
 	if (error)
 		return (error);
 
 	if (uap->info)
 		error = copyout(&ksi.ksi_info, uap->info, sizeof(siginfo_t));
 
 	if (error == 0)
 		td->td_retval[0] = ksi.ksi_signo;
 	return (error);
 }
 
 int
 sys_sigwaitinfo(struct thread *td, struct sigwaitinfo_args *uap)
 {
 	ksiginfo_t ksi;
 	sigset_t set;
 	int error;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error)
 		return (error);
 
 	error = kern_sigtimedwait(td, set, &ksi, NULL);
 	if (error)
 		return (error);
 
 	if (uap->info)
 		error = copyout(&ksi.ksi_info, uap->info, sizeof(siginfo_t));
 
 	if (error == 0)
 		td->td_retval[0] = ksi.ksi_signo;
 	return (error);
 }
 
 static void
 proc_td_siginfo_capture(struct thread *td, siginfo_t *si)
 {
 	struct thread *thr;
 
 	FOREACH_THREAD_IN_PROC(td->td_proc, thr) {
 		if (thr == td)
 			thr->td_si = *si;
 		else
 			thr->td_si.si_signo = 0;
 	}
 }
 
 int
 kern_sigtimedwait(struct thread *td, sigset_t waitset, ksiginfo_t *ksi,
 	struct timespec *timeout)
 {
 	struct sigacts *ps;
 	sigset_t saved_mask, new_block;
 	struct proc *p;
 	int error, sig, timo, timevalid = 0;
 	struct timespec rts, ets, ts;
 	struct timeval tv;
 	bool traced;
 
 	p = td->td_proc;
 	error = 0;
 	ets.tv_sec = 0;
 	ets.tv_nsec = 0;
 	traced = false;
 
 	if (timeout != NULL) {
 		if (timeout->tv_nsec >= 0 && timeout->tv_nsec < 1000000000) {
 			timevalid = 1;
 			getnanouptime(&rts);
 			timespecadd(&rts, timeout, &ets);
 		}
 	}
 	ksiginfo_init(ksi);
 	/* Some signals can not be waited for. */
 	SIG_CANTMASK(waitset);
 	ps = p->p_sigacts;
 	PROC_LOCK(p);
 	saved_mask = td->td_sigmask;
 	SIGSETNAND(td->td_sigmask, waitset);
 	for (;;) {
 		mtx_lock(&ps->ps_mtx);
 		sig = cursig(td);
 		mtx_unlock(&ps->ps_mtx);
 		KASSERT(sig >= 0, ("sig %d", sig));
 		if (sig != 0 && SIGISMEMBER(waitset, sig)) {
 			if (sigqueue_get(&td->td_sigqueue, sig, ksi) != 0 ||
 			    sigqueue_get(&p->p_sigqueue, sig, ksi) != 0) {
 				error = 0;
 				break;
 			}
 		}
 
 		if (error != 0)
 			break;
 
 		/*
 		 * POSIX says this must be checked after looking for pending
 		 * signals.
 		 */
 		if (timeout != NULL) {
 			if (!timevalid) {
 				error = EINVAL;
 				break;
 			}
 			getnanouptime(&rts);
 			if (timespeccmp(&rts, &ets, >=)) {
 				error = EAGAIN;
 				break;
 			}
 			timespecsub(&ets, &rts, &ts);
 			TIMESPEC_TO_TIMEVAL(&tv, &ts);
 			timo = tvtohz(&tv);
 		} else {
 			timo = 0;
 		}
 
 		if (traced) {
 			error = EINTR;
 			break;
 		}
 
 		error = msleep(ps, &p->p_mtx, PPAUSE|PCATCH, "sigwait", timo);
 
 		if (timeout != NULL) {
 			if (error == ERESTART) {
 				/* Timeout can not be restarted. */
 				error = EINTR;
 			} else if (error == EAGAIN) {
 				/* We will calculate timeout by ourself. */
 				error = 0;
 			}
 		}
 
 		/*
 		 * If PTRACE_SCE or PTRACE_SCX were set after
 		 * userspace entered the syscall, return spurious
 		 * EINTR after wait was done.  Only do this as last
 		 * resort after rechecking for possible queued signals
 		 * and expired timeouts.
 		 */
 		if (error == 0 && (p->p_ptevents & PTRACE_SYSCALL) != 0)
 			traced = true;
 	}
 
 	new_block = saved_mask;
 	SIGSETNAND(new_block, td->td_sigmask);
 	td->td_sigmask = saved_mask;
 	/*
 	 * Fewer signals can be delivered to us, reschedule signal
 	 * notification.
 	 */
 	if (p->p_numthreads != 1)
 		reschedule_signals(p, new_block, 0);
 
 	if (error == 0) {
 		SDT_PROBE2(proc, , , signal__clear, sig, ksi);
 
 		if (ksi->ksi_code == SI_TIMER)
 			itimer_accept(p, ksi->ksi_timerid, ksi);
 
 #ifdef KTRACE
 		if (KTRPOINT(td, KTR_PSIG)) {
 			sig_t action;
 
 			mtx_lock(&ps->ps_mtx);
 			action = ps->ps_sigact[_SIG_IDX(sig)];
 			mtx_unlock(&ps->ps_mtx);
 			ktrpsig(sig, action, &td->td_sigmask, ksi->ksi_code);
 		}
 #endif
 		if (sig == SIGKILL) {
 			proc_td_siginfo_capture(td, &ksi->ksi_info);
 			sigexit(td, sig);
 		}
 	}
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigpending_args {
 	sigset_t	*set;
 };
 #endif
 int
 sys_sigpending(struct thread *td, struct sigpending_args *uap)
 {
 	struct proc *p = td->td_proc;
 	sigset_t pending;
 
 	PROC_LOCK(p);
 	pending = p->p_sigqueue.sq_signals;
 	SIGSETOR(pending, td->td_sigqueue.sq_signals);
 	PROC_UNLOCK(p);
 	return (copyout(&pending, uap->set, sizeof(sigset_t)));
 }
 
 #ifdef COMPAT_43	/* XXX - COMPAT_FBSD3 */
 #ifndef _SYS_SYSPROTO_H_
 struct osigpending_args {
 	int	dummy;
 };
 #endif
 int
 osigpending(struct thread *td, struct osigpending_args *uap)
 {
 	struct proc *p = td->td_proc;
 	sigset_t pending;
 
 	PROC_LOCK(p);
 	pending = p->p_sigqueue.sq_signals;
 	SIGSETOR(pending, td->td_sigqueue.sq_signals);
 	PROC_UNLOCK(p);
 	SIG2OSIG(pending, td->td_retval[0]);
 	return (0);
 }
 #endif /* COMPAT_43 */
 
 #if defined(COMPAT_43)
 /*
  * Generalized interface signal handler, 4.3-compatible.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct osigvec_args {
 	int	signum;
 	struct	sigvec *nsv;
 	struct	sigvec *osv;
 };
 #endif
 /* ARGSUSED */
 int
 osigvec(struct thread *td, struct osigvec_args *uap)
 {
 	struct sigvec vec;
 	struct sigaction nsa, osa;
 	struct sigaction *nsap, *osap;
 	int error;
 
 	if (uap->signum <= 0 || uap->signum >= ONSIG)
 		return (EINVAL);
 	nsap = (uap->nsv != NULL) ? &nsa : NULL;
 	osap = (uap->osv != NULL) ? &osa : NULL;
 	if (nsap) {
 		error = copyin(uap->nsv, &vec, sizeof(vec));
 		if (error)
 			return (error);
 		nsap->sa_handler = vec.sv_handler;
 		OSIG2SIG(vec.sv_mask, nsap->sa_mask);
 		nsap->sa_flags = vec.sv_flags;
 		nsap->sa_flags ^= SA_RESTART;	/* opposite of SV_INTERRUPT */
 	}
 	error = kern_sigaction(td, uap->signum, nsap, osap, KSA_OSIGSET);
 	if (osap && !error) {
 		vec.sv_handler = osap->sa_handler;
 		SIG2OSIG(osap->sa_mask, vec.sv_mask);
 		vec.sv_flags = osap->sa_flags;
 		vec.sv_flags &= ~SA_NOCLDWAIT;
 		vec.sv_flags ^= SA_RESTART;
 		error = copyout(&vec, uap->osv, sizeof(vec));
 	}
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct osigblock_args {
 	int	mask;
 };
 #endif
 int
 osigblock(struct thread *td, struct osigblock_args *uap)
 {
 	sigset_t set, oset;
 
 	OSIG2SIG(uap->mask, set);
 	kern_sigprocmask(td, SIG_BLOCK, &set, &oset, 0);
 	SIG2OSIG(oset, td->td_retval[0]);
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct osigsetmask_args {
 	int	mask;
 };
 #endif
 int
 osigsetmask(struct thread *td, struct osigsetmask_args *uap)
 {
 	sigset_t set, oset;
 
 	OSIG2SIG(uap->mask, set);
 	kern_sigprocmask(td, SIG_SETMASK, &set, &oset, 0);
 	SIG2OSIG(oset, td->td_retval[0]);
 	return (0);
 }
 #endif /* COMPAT_43 */
 
 /*
  * Suspend calling thread until signal, providing mask to be set in the
  * meantime.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct sigsuspend_args {
 	const sigset_t *sigmask;
 };
 #endif
 /* ARGSUSED */
 int
 sys_sigsuspend(struct thread *td, struct sigsuspend_args *uap)
 {
 	sigset_t mask;
 	int error;
 
 	error = copyin(uap->sigmask, &mask, sizeof(mask));
 	if (error)
 		return (error);
 	return (kern_sigsuspend(td, mask));
 }
 
 int
 kern_sigsuspend(struct thread *td, sigset_t mask)
 {
 	struct proc *p = td->td_proc;
 	int has_sig, sig;
 
 	/*
 	 * When returning from sigsuspend, we want
 	 * the old mask to be restored after the
 	 * signal handler has finished.  Thus, we
 	 * save it here and mark the sigacts structure
 	 * to indicate this.
 	 */
 	PROC_LOCK(p);
 	kern_sigprocmask(td, SIG_SETMASK, &mask, &td->td_oldsigmask,
 	    SIGPROCMASK_PROC_LOCKED);
 	td->td_pflags |= TDP_OLDMASK;
 
 	/*
 	 * Process signals now. Otherwise, we can get spurious wakeup
 	 * due to signal entered process queue, but delivered to other
 	 * thread. But sigsuspend should return only on signal
 	 * delivery.
 	 */
 	(p->p_sysent->sv_set_syscall_retval)(td, EINTR);
 	for (has_sig = 0; !has_sig;) {
 		while (msleep(&p->p_sigacts, &p->p_mtx, PPAUSE|PCATCH, "pause",
 			0) == 0)
 			/* void */;
 		thread_suspend_check(0);
 		mtx_lock(&p->p_sigacts->ps_mtx);
 		while ((sig = cursig(td)) != 0) {
 			KASSERT(sig >= 0, ("sig %d", sig));
 			has_sig += postsig(sig);
 		}
 		mtx_unlock(&p->p_sigacts->ps_mtx);
 
 		/*
 		 * If PTRACE_SCE or PTRACE_SCX were set after
 		 * userspace entered the syscall, return spurious
 		 * EINTR.
 		 */
 		if ((p->p_ptevents & PTRACE_SYSCALL) != 0)
 			has_sig += 1;
 	}
 	PROC_UNLOCK(p);
 	td->td_errno = EINTR;
 	td->td_pflags |= TDP_NERRNO;
 	return (EJUSTRETURN);
 }
 
 #ifdef COMPAT_43	/* XXX - COMPAT_FBSD3 */
 /*
  * Compatibility sigsuspend call for old binaries.  Note nonstandard calling
  * convention: libc stub passes mask, not pointer, to save a copyin.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct osigsuspend_args {
 	osigset_t mask;
 };
 #endif
 /* ARGSUSED */
 int
 osigsuspend(struct thread *td, struct osigsuspend_args *uap)
 {
 	sigset_t mask;
 
 	OSIG2SIG(uap->mask, mask);
 	return (kern_sigsuspend(td, mask));
 }
 #endif /* COMPAT_43 */
 
 #if defined(COMPAT_43)
 #ifndef _SYS_SYSPROTO_H_
 struct osigstack_args {
 	struct	sigstack *nss;
 	struct	sigstack *oss;
 };
 #endif
 /* ARGSUSED */
 int
 osigstack(struct thread *td, struct osigstack_args *uap)
 {
 	struct sigstack nss, oss;
 	int error = 0;
 
 	if (uap->nss != NULL) {
 		error = copyin(uap->nss, &nss, sizeof(nss));
 		if (error)
 			return (error);
 	}
 	oss.ss_sp = td->td_sigstk.ss_sp;
 	oss.ss_onstack = sigonstack(cpu_getstack(td));
 	if (uap->nss != NULL) {
 		td->td_sigstk.ss_sp = nss.ss_sp;
 		td->td_sigstk.ss_size = 0;
 		td->td_sigstk.ss_flags |= nss.ss_onstack & SS_ONSTACK;
 		td->td_pflags |= TDP_ALTSTACK;
 	}
 	if (uap->oss != NULL)
 		error = copyout(&oss, uap->oss, sizeof(oss));
 
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigaltstack_args {
 	stack_t	*ss;
 	stack_t	*oss;
 };
 #endif
 /* ARGSUSED */
 int
 sys_sigaltstack(struct thread *td, struct sigaltstack_args *uap)
 {
 	stack_t ss, oss;
 	int error;
 
 	if (uap->ss != NULL) {
 		error = copyin(uap->ss, &ss, sizeof(ss));
 		if (error)
 			return (error);
 	}
 	error = kern_sigaltstack(td, (uap->ss != NULL) ? &ss : NULL,
 	    (uap->oss != NULL) ? &oss : NULL);
 	if (error)
 		return (error);
 	if (uap->oss != NULL)
 		error = copyout(&oss, uap->oss, sizeof(stack_t));
 	return (error);
 }
 
 int
 kern_sigaltstack(struct thread *td, stack_t *ss, stack_t *oss)
 {
 	struct proc *p = td->td_proc;
 	int oonstack;
 
 	oonstack = sigonstack(cpu_getstack(td));
 
 	if (oss != NULL) {
 		*oss = td->td_sigstk;
 		oss->ss_flags = (td->td_pflags & TDP_ALTSTACK)
 		    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	}
 
 	if (ss != NULL) {
 		if (oonstack)
 			return (EPERM);
 		if ((ss->ss_flags & ~SS_DISABLE) != 0)
 			return (EINVAL);
 		if (!(ss->ss_flags & SS_DISABLE)) {
 			if (ss->ss_size < p->p_sysent->sv_minsigstksz)
 				return (ENOMEM);
 
 			td->td_sigstk = *ss;
 			td->td_pflags |= TDP_ALTSTACK;
 		} else {
 			td->td_pflags &= ~TDP_ALTSTACK;
 		}
 	}
 	return (0);
 }
 
 struct killpg1_ctx {
 	struct thread *td;
 	ksiginfo_t *ksi;
 	int sig;
 	bool sent;
 	bool found;
 	int ret;
 };
 
 static void
 killpg1_sendsig(struct proc *p, bool notself, struct killpg1_ctx *arg)
 {
 	int err;
 
 	if (p->p_pid <= 1 || (p->p_flag & P_SYSTEM) != 0 ||
 	    (notself && p == arg->td->td_proc) || p->p_state == PRS_NEW)
 		return;
 	PROC_LOCK(p);
 	err = p_cansignal(arg->td, p, arg->sig);
 	if (err == 0 && arg->sig != 0)
 		pksignal(p, arg->sig, arg->ksi);
 	PROC_UNLOCK(p);
 	if (err != ESRCH)
 		arg->found = true;
 	if (err == 0)
 		arg->sent = true;
 	else if (arg->ret == 0 && err != ESRCH && err != EPERM)
 		arg->ret = err;
 }
 
 /*
  * Common code for kill process group/broadcast kill.
  * cp is calling process.
  */
 static int
 killpg1(struct thread *td, int sig, int pgid, int all, ksiginfo_t *ksi)
 {
 	struct proc *p;
 	struct pgrp *pgrp;
 	struct killpg1_ctx arg;
 
 	arg.td = td;
 	arg.ksi = ksi;
 	arg.sig = sig;
 	arg.sent = false;
 	arg.found = false;
 	arg.ret = 0;
 	if (all) {
 		/*
 		 * broadcast
 		 */
 		sx_slock(&allproc_lock);
 		FOREACH_PROC_IN_SYSTEM(p) {
 			killpg1_sendsig(p, true, &arg);
 		}
 		sx_sunlock(&allproc_lock);
 	} else {
 		sx_slock(&proctree_lock);
 		if (pgid == 0) {
 			/*
 			 * zero pgid means send to my process group.
 			 */
 			pgrp = td->td_proc->p_pgrp;
 			PGRP_LOCK(pgrp);
 		} else {
 			pgrp = pgfind(pgid);
 			if (pgrp == NULL) {
 				sx_sunlock(&proctree_lock);
 				return (ESRCH);
 			}
 		}
 		sx_sunlock(&proctree_lock);
 		LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
 			killpg1_sendsig(p, false, &arg);
 		}
 		PGRP_UNLOCK(pgrp);
 	}
 	MPASS(arg.ret != 0 || arg.found || !arg.sent);
 	if (arg.ret == 0 && !arg.sent)
 		arg.ret = arg.found ? EPERM : ESRCH;
 	return (arg.ret);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct kill_args {
 	int	pid;
 	int	signum;
 };
 #endif
 /* ARGSUSED */
 int
 sys_kill(struct thread *td, struct kill_args *uap)
 {
 
 	return (kern_kill(td, uap->pid, uap->signum));
 }
 
 int
 kern_kill(struct thread *td, pid_t pid, int signum)
 {
 	ksiginfo_t ksi;
 	struct proc *p;
 	int error;
 
 	/*
 	 * A process in capability mode can send signals only to himself.
 	 * The main rationale behind this is that abort(3) is implemented as
 	 * kill(getpid(), SIGABRT).
 	 */
 	if (IN_CAPABILITY_MODE(td) && pid != td->td_proc->p_pid)
 		return (ECAPMODE);
 
 	AUDIT_ARG_SIGNUM(signum);
 	AUDIT_ARG_PID(pid);
 	if ((u_int)signum > _SIG_MAXSIG)
 		return (EINVAL);
 
 	ksiginfo_init(&ksi);
 	ksi.ksi_signo = signum;
 	ksi.ksi_code = SI_USER;
 	ksi.ksi_pid = td->td_proc->p_pid;
 	ksi.ksi_uid = td->td_ucred->cr_ruid;
 
 	if (pid > 0) {
 		/* kill single process */
 		if ((p = pfind_any(pid)) == NULL)
 			return (ESRCH);
 		AUDIT_ARG_PROCESS(p);
 		error = p_cansignal(td, p, signum);
 		if (error == 0 && signum)
 			pksignal(p, signum, &ksi);
 		PROC_UNLOCK(p);
 		return (error);
 	}
 	switch (pid) {
 	case -1:		/* broadcast signal */
 		return (killpg1(td, signum, 0, 1, &ksi));
 	case 0:			/* signal own process group */
 		return (killpg1(td, signum, 0, 0, &ksi));
 	default:		/* negative explicit process group */
 		return (killpg1(td, signum, -pid, 0, &ksi));
 	}
 	/* NOTREACHED */
 }
 
 int
 sys_pdkill(struct thread *td, struct pdkill_args *uap)
 {
 	struct proc *p;
 	int error;
 
 	AUDIT_ARG_SIGNUM(uap->signum);
 	AUDIT_ARG_FD(uap->fd);
 	if ((u_int)uap->signum > _SIG_MAXSIG)
 		return (EINVAL);
 
 	error = procdesc_find(td, uap->fd, &cap_pdkill_rights, &p);
 	if (error)
 		return (error);
 	AUDIT_ARG_PROCESS(p);
 	error = p_cansignal(td, p, uap->signum);
 	if (error == 0 && uap->signum)
 		kern_psignal(p, uap->signum);
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 #if defined(COMPAT_43)
 #ifndef _SYS_SYSPROTO_H_
 struct okillpg_args {
 	int	pgid;
 	int	signum;
 };
 #endif
 /* ARGSUSED */
 int
 okillpg(struct thread *td, struct okillpg_args *uap)
 {
 	ksiginfo_t ksi;
 
 	AUDIT_ARG_SIGNUM(uap->signum);
 	AUDIT_ARG_PID(uap->pgid);
 	if ((u_int)uap->signum > _SIG_MAXSIG)
 		return (EINVAL);
 
 	ksiginfo_init(&ksi);
 	ksi.ksi_signo = uap->signum;
 	ksi.ksi_code = SI_USER;
 	ksi.ksi_pid = td->td_proc->p_pid;
 	ksi.ksi_uid = td->td_ucred->cr_ruid;
 	return (killpg1(td, uap->signum, uap->pgid, 0, &ksi));
 }
 #endif /* COMPAT_43 */
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigqueue_args {
 	pid_t pid;
 	int signum;
 	/* union sigval */ void *value;
 };
 #endif
 int
 sys_sigqueue(struct thread *td, struct sigqueue_args *uap)
 {
 	union sigval sv;
 
 	sv.sival_ptr = uap->value;
 
 	return (kern_sigqueue(td, uap->pid, uap->signum, &sv));
 }
 
 int
 kern_sigqueue(struct thread *td, pid_t pid, int signum, union sigval *value)
 {
 	ksiginfo_t ksi;
 	struct proc *p;
 	int error;
 
 	if ((u_int)signum > _SIG_MAXSIG)
 		return (EINVAL);
 
 	/*
 	 * Specification says sigqueue can only send signal to
 	 * single process.
 	 */
 	if (pid <= 0)
 		return (EINVAL);
 
 	if ((p = pfind_any(pid)) == NULL)
 		return (ESRCH);
 	error = p_cansignal(td, p, signum);
 	if (error == 0 && signum != 0) {
 		ksiginfo_init(&ksi);
 		ksi.ksi_flags = KSI_SIGQ;
 		ksi.ksi_signo = signum;
 		ksi.ksi_code = SI_QUEUE;
 		ksi.ksi_pid = td->td_proc->p_pid;
 		ksi.ksi_uid = td->td_ucred->cr_ruid;
 		ksi.ksi_value = *value;
 		error = pksignal(p, ksi.ksi_signo, &ksi);
 	}
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 /*
  * Send a signal to a process group.
  */
 void
 gsignal(int pgid, int sig, ksiginfo_t *ksi)
 {
 	struct pgrp *pgrp;
 
 	if (pgid != 0) {
 		sx_slock(&proctree_lock);
 		pgrp = pgfind(pgid);
 		sx_sunlock(&proctree_lock);
 		if (pgrp != NULL) {
 			pgsignal(pgrp, sig, 0, ksi);
 			PGRP_UNLOCK(pgrp);
 		}
 	}
 }
 
 /*
  * Send a signal to a process group.  If checktty is 1,
  * limit to members which have a controlling terminal.
  */
 void
 pgsignal(struct pgrp *pgrp, int sig, int checkctty, ksiginfo_t *ksi)
 {
 	struct proc *p;
 
 	if (pgrp) {
 		PGRP_LOCK_ASSERT(pgrp, MA_OWNED);
 		LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NORMAL &&
 			    (checkctty == 0 || p->p_flag & P_CONTROLT))
 				pksignal(p, sig, ksi);
 			PROC_UNLOCK(p);
 		}
 	}
 }
 
 /*
  * Recalculate the signal mask and reset the signal disposition after
  * usermode frame for delivery is formed.  Should be called after
  * mach-specific routine, because sysent->sv_sendsig() needs correct
  * ps_siginfo and signal mask.
  */
 static void
 postsig_done(int sig, struct thread *td, struct sigacts *ps)
 {
 	sigset_t mask;
 
 	mtx_assert(&ps->ps_mtx, MA_OWNED);
 	td->td_ru.ru_nsignals++;
 	mask = ps->ps_catchmask[_SIG_IDX(sig)];
 	if (!SIGISMEMBER(ps->ps_signodefer, sig))
 		SIGADDSET(mask, sig);
 	kern_sigprocmask(td, SIG_BLOCK, &mask, NULL,
 	    SIGPROCMASK_PROC_LOCKED | SIGPROCMASK_PS_LOCKED);
 	if (SIGISMEMBER(ps->ps_sigreset, sig))
 		sigdflt(ps, sig);
 }
 
 /*
  * Send a signal caused by a trap to the current thread.  If it will be
  * caught immediately, deliver it with correct code.  Otherwise, post it
  * normally.
  */
 void
 trapsignal(struct thread *td, ksiginfo_t *ksi)
 {
 	struct sigacts *ps;
 	struct proc *p;
 	int sig;
 	int code;
 
 	p = td->td_proc;
 	sig = ksi->ksi_signo;
 	code = ksi->ksi_code;
 	KASSERT(_SIG_VALID(sig), ("invalid signal"));
 
 	PROC_LOCK(p);
 	ps = p->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	if ((p->p_flag & P_TRACED) == 0 && SIGISMEMBER(ps->ps_sigcatch, sig) &&
 	    !SIGISMEMBER(td->td_sigmask, sig)) {
 #ifdef KTRACE
 		if (KTRPOINT(curthread, KTR_PSIG))
 			ktrpsig(sig, ps->ps_sigact[_SIG_IDX(sig)],
 			    &td->td_sigmask, code);
 #endif
 		(*p->p_sysent->sv_sendsig)(ps->ps_sigact[_SIG_IDX(sig)],
 				ksi, &td->td_sigmask);
 		postsig_done(sig, td, ps);
 		mtx_unlock(&ps->ps_mtx);
 	} else {
 		/*
 		 * Avoid a possible infinite loop if the thread
 		 * masking the signal or process is ignoring the
 		 * signal.
 		 */
 		if (kern_forcesigexit &&
 		    (SIGISMEMBER(td->td_sigmask, sig) ||
 		     ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN)) {
 			SIGDELSET(td->td_sigmask, sig);
 			SIGDELSET(ps->ps_sigcatch, sig);
 			SIGDELSET(ps->ps_sigignore, sig);
 			ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL;
 		}
 		mtx_unlock(&ps->ps_mtx);
 		p->p_sig = sig;		/* XXX to verify code */
 		tdsendsignal(p, td, sig, ksi);
 	}
 	PROC_UNLOCK(p);
 }
 
 static struct thread *
 sigtd(struct proc *p, int sig, int prop)
 {
 	struct thread *td, *signal_td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	/*
 	 * Check if current thread can handle the signal without
 	 * switching context to another thread.
 	 */
 	if (curproc == p && !SIGISMEMBER(curthread->td_sigmask, sig))
 		return (curthread);
 	signal_td = NULL;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		if (!SIGISMEMBER(td->td_sigmask, sig)) {
 			signal_td = td;
 			break;
 		}
 	}
 	if (signal_td == NULL)
 		signal_td = FIRST_THREAD_IN_PROC(p);
 	return (signal_td);
 }
 
 /*
  * Send the signal to the process.  If the signal has an action, the action
  * is usually performed by the target process rather than the caller; we add
  * the signal to the set of pending signals for the process.
  *
  * Exceptions:
  *   o When a stop signal is sent to a sleeping process that takes the
  *     default action, the process is stopped without awakening it.
  *   o SIGCONT restarts stopped processes (or puts them back to sleep)
  *     regardless of the signal action (eg, blocked or ignored).
  *
  * Other ignored signals are discarded immediately.
  *
  * NB: This function may be entered from the debugger via the "kill" DDB
  * command.  There is little that can be done to mitigate the possibly messy
  * side effects of this unwise possibility.
  */
 void
 kern_psignal(struct proc *p, int sig)
 {
 	ksiginfo_t ksi;
 
 	ksiginfo_init(&ksi);
 	ksi.ksi_signo = sig;
 	ksi.ksi_code = SI_KERNEL;
 	(void) tdsendsignal(p, NULL, sig, &ksi);
 }
 
 int
 pksignal(struct proc *p, int sig, ksiginfo_t *ksi)
 {
 
 	return (tdsendsignal(p, NULL, sig, ksi));
 }
 
 /* Utility function for finding a thread to send signal event to. */
 int
 sigev_findtd(struct proc *p ,struct sigevent *sigev, struct thread **ttd)
 {
 	struct thread *td;
 
 	if (sigev->sigev_notify == SIGEV_THREAD_ID) {
 		td = tdfind(sigev->sigev_notify_thread_id, p->p_pid);
 		if (td == NULL)
 			return (ESRCH);
 		*ttd = td;
 	} else {
 		*ttd = NULL;
 		PROC_LOCK(p);
 	}
 	return (0);
 }
 
 void
 tdsignal(struct thread *td, int sig)
 {
 	ksiginfo_t ksi;
 
 	ksiginfo_init(&ksi);
 	ksi.ksi_signo = sig;
 	ksi.ksi_code = SI_KERNEL;
 	(void) tdsendsignal(td->td_proc, td, sig, &ksi);
 }
 
 void
 tdksignal(struct thread *td, int sig, ksiginfo_t *ksi)
 {
 
 	(void) tdsendsignal(td->td_proc, td, sig, ksi);
 }
 
 int
 tdsendsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi)
 {
 	sig_t action;
 	sigqueue_t *sigqueue;
 	int prop;
 	struct sigacts *ps;
 	int intrval;
 	int ret = 0;
 	int wakeup_swapper;
 
 	MPASS(td == NULL || p == td->td_proc);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if (!_SIG_VALID(sig))
 		panic("%s(): invalid signal %d", __func__, sig);
 
 	KASSERT(ksi == NULL || !KSI_ONQ(ksi), ("%s: ksi on queue", __func__));
 
 	/*
 	 * IEEE Std 1003.1-2001: return success when killing a zombie.
 	 */
 	if (p->p_state == PRS_ZOMBIE) {
 		if (ksi && (ksi->ksi_flags & KSI_INS))
 			ksiginfo_tryfree(ksi);
 		return (ret);
 	}
 
 	ps = p->p_sigacts;
 	KNOTE_LOCKED(p->p_klist, NOTE_SIGNAL | sig);
 	prop = sigprop(sig);
 
 	if (td == NULL) {
 		td = sigtd(p, sig, prop);
 		sigqueue = &p->p_sigqueue;
 	} else
 		sigqueue = &td->td_sigqueue;
 
 	SDT_PROBE3(proc, , , signal__send, td, p, sig);
 
 	/*
 	 * If the signal is being ignored,
 	 * then we forget about it immediately.
 	 * (Note: we don't set SIGCONT in ps_sigignore,
 	 * and if it is set to SIG_IGN,
 	 * action will be SIG_DFL here.)
 	 */
 	mtx_lock(&ps->ps_mtx);
 	if (SIGISMEMBER(ps->ps_sigignore, sig)) {
 		SDT_PROBE3(proc, , , signal__discard, td, p, sig);
 
 		mtx_unlock(&ps->ps_mtx);
 		if (ksi && (ksi->ksi_flags & KSI_INS))
 			ksiginfo_tryfree(ksi);
 		return (ret);
 	}
 	if (SIGISMEMBER(td->td_sigmask, sig))
 		action = SIG_HOLD;
 	else if (SIGISMEMBER(ps->ps_sigcatch, sig))
 		action = SIG_CATCH;
 	else
 		action = SIG_DFL;
 	if (SIGISMEMBER(ps->ps_sigintr, sig))
 		intrval = EINTR;
 	else
 		intrval = ERESTART;
 	mtx_unlock(&ps->ps_mtx);
 
 	if (prop & SIGPROP_CONT)
 		sigqueue_delete_stopmask_proc(p);
 	else if (prop & SIGPROP_STOP) {
 		/*
 		 * If sending a tty stop signal to a member of an orphaned
 		 * process group, discard the signal here if the action
 		 * is default; don't stop the process below if sleeping,
 		 * and don't clear any pending SIGCONT.
 		 */
 		if ((prop & SIGPROP_TTYSTOP) &&
 		    (p->p_pgrp->pg_jobc == 0) &&
 		    (action == SIG_DFL)) {
 			if (ksi && (ksi->ksi_flags & KSI_INS))
 				ksiginfo_tryfree(ksi);
 			return (ret);
 		}
 		sigqueue_delete_proc(p, SIGCONT);
 		if (p->p_flag & P_CONTINUED) {
 			p->p_flag &= ~P_CONTINUED;
 			PROC_LOCK(p->p_pptr);
 			sigqueue_take(p->p_ksi);
 			PROC_UNLOCK(p->p_pptr);
 		}
 	}
 
 	ret = sigqueue_add(sigqueue, sig, ksi);
 	if (ret != 0)
 		return (ret);
 	signotify(td);
 	/*
 	 * Defer further processing for signals which are held,
 	 * except that stopped processes must be continued by SIGCONT.
 	 */
 	if (action == SIG_HOLD &&
 	    !((prop & SIGPROP_CONT) && (p->p_flag & P_STOPPED_SIG)))
 		return (ret);
 
 	/* SIGKILL: Remove procfs STOPEVENTs. */
 	if (sig == SIGKILL) {
 		/* from procfs_ioctl.c: PIOCBIC */
 		p->p_stops = 0;
 		/* from procfs_ioctl.c: PIOCCONT */
 		p->p_step = 0;
 		wakeup(&p->p_step);
 	}
 	wakeup_swapper = 0;
 
 	/*
 	 * Some signals have a process-wide effect and a per-thread
 	 * component.  Most processing occurs when the process next
 	 * tries to cross the user boundary, however there are some
 	 * times when processing needs to be done immediately, such as
 	 * waking up threads so that they can cross the user boundary.
 	 * We try to do the per-process part here.
 	 */
 	if (P_SHOULDSTOP(p)) {
 		KASSERT(!(p->p_flag & P_WEXIT),
 		    ("signal to stopped but exiting process"));
 		if (sig == SIGKILL) {
 			/*
 			 * If traced process is already stopped,
 			 * then no further action is necessary.
 			 */
 			if (p->p_flag & P_TRACED)
 				goto out;
 			/*
 			 * SIGKILL sets process running.
 			 * It will die elsewhere.
 			 * All threads must be restarted.
 			 */
 			p->p_flag &= ~P_STOPPED_SIG;
 			goto runfast;
 		}
 
 		if (prop & SIGPROP_CONT) {
 			/*
 			 * If traced process is already stopped,
 			 * then no further action is necessary.
 			 */
 			if (p->p_flag & P_TRACED)
 				goto out;
 			/*
 			 * If SIGCONT is default (or ignored), we continue the
 			 * process but don't leave the signal in sigqueue as
 			 * it has no further action.  If SIGCONT is held, we
 			 * continue the process and leave the signal in
 			 * sigqueue.  If the process catches SIGCONT, let it
 			 * handle the signal itself.  If it isn't waiting on
 			 * an event, it goes back to run state.
 			 * Otherwise, process goes back to sleep state.
 			 */
 			p->p_flag &= ~P_STOPPED_SIG;
 			PROC_SLOCK(p);
 			if (p->p_numthreads == p->p_suspcount) {
 				PROC_SUNLOCK(p);
 				p->p_flag |= P_CONTINUED;
 				p->p_xsig = SIGCONT;
 				PROC_LOCK(p->p_pptr);
 				childproc_continued(p);
 				PROC_UNLOCK(p->p_pptr);
 				PROC_SLOCK(p);
 			}
 			if (action == SIG_DFL) {
 				thread_unsuspend(p);
 				PROC_SUNLOCK(p);
 				sigqueue_delete(sigqueue, sig);
 				goto out;
 			}
 			if (action == SIG_CATCH) {
 				/*
 				 * The process wants to catch it so it needs
 				 * to run at least one thread, but which one?
 				 */
 				PROC_SUNLOCK(p);
 				goto runfast;
 			}
 			/*
 			 * The signal is not ignored or caught.
 			 */
 			thread_unsuspend(p);
 			PROC_SUNLOCK(p);
 			goto out;
 		}
 
 		if (prop & SIGPROP_STOP) {
 			/*
 			 * If traced process is already stopped,
 			 * then no further action is necessary.
 			 */
 			if (p->p_flag & P_TRACED)
 				goto out;
 			/*
 			 * Already stopped, don't need to stop again
 			 * (If we did the shell could get confused).
 			 * Just make sure the signal STOP bit set.
 			 */
 			p->p_flag |= P_STOPPED_SIG;
 			sigqueue_delete(sigqueue, sig);
 			goto out;
 		}
 
 		/*
 		 * All other kinds of signals:
 		 * If a thread is sleeping interruptibly, simulate a
 		 * wakeup so that when it is continued it will be made
 		 * runnable and can look at the signal.  However, don't make
 		 * the PROCESS runnable, leave it stopped.
 		 * It may run a bit until it hits a thread_suspend_check().
 		 */
 		PROC_SLOCK(p);
 		thread_lock(td);
 		if (TD_CAN_ABORT(td))
 			wakeup_swapper = sleepq_abort(td, intrval);
 		else
 			thread_unlock(td);
 		PROC_SUNLOCK(p);
 		goto out;
 		/*
 		 * Mutexes are short lived. Threads waiting on them will
 		 * hit thread_suspend_check() soon.
 		 */
 	} else if (p->p_state == PRS_NORMAL) {
 		if (p->p_flag & P_TRACED || action == SIG_CATCH) {
 			tdsigwakeup(td, sig, action, intrval);
 			goto out;
 		}
 
 		MPASS(action == SIG_DFL);
 
 		if (prop & SIGPROP_STOP) {
 			if (p->p_flag & (P_PPWAIT|P_WEXIT))
 				goto out;
 			p->p_flag |= P_STOPPED_SIG;
 			p->p_xsig = sig;
 			PROC_SLOCK(p);
 			wakeup_swapper = sig_suspend_threads(td, p, 1);
 			if (p->p_numthreads == p->p_suspcount) {
 				/*
 				 * only thread sending signal to another
 				 * process can reach here, if thread is sending
 				 * signal to its process, because thread does
 				 * not suspend itself here, p_numthreads
 				 * should never be equal to p_suspcount.
 				 */
 				thread_stopped(p);
 				PROC_SUNLOCK(p);
 				sigqueue_delete_proc(p, p->p_xsig);
 			} else
 				PROC_SUNLOCK(p);
 			goto out;
 		}
 	} else {
 		/* Not in "NORMAL" state. discard the signal. */
 		sigqueue_delete(sigqueue, sig);
 		goto out;
 	}
 
 	/*
 	 * The process is not stopped so we need to apply the signal to all the
 	 * running threads.
 	 */
 runfast:
 	tdsigwakeup(td, sig, action, intrval);
 	PROC_SLOCK(p);
 	thread_unsuspend(p);
 	PROC_SUNLOCK(p);
 out:
 	/* If we jump here, proc slock should not be owned. */
 	PROC_SLOCK_ASSERT(p, MA_NOTOWNED);
 	if (wakeup_swapper)
 		kick_proc0();
 
 	return (ret);
 }
 
 /*
  * The force of a signal has been directed against a single
  * thread.  We need to see what we can do about knocking it
  * out of any sleep it may be in etc.
  */
 static void
 tdsigwakeup(struct thread *td, int sig, sig_t action, int intrval)
 {
 	struct proc *p = td->td_proc;
 	int prop, wakeup_swapper;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	prop = sigprop(sig);
 
 	PROC_SLOCK(p);
 	thread_lock(td);
 	/*
 	 * Bring the priority of a thread up if we want it to get
 	 * killed in this lifetime.  Be careful to avoid bumping the
 	 * priority of the idle thread, since we still allow to signal
 	 * kernel processes.
 	 */
 	if (action == SIG_DFL && (prop & SIGPROP_KILL) != 0 &&
 	    td->td_priority > PUSER && !TD_IS_IDLETHREAD(td))
 		sched_prio(td, PUSER);
 	if (TD_ON_SLEEPQ(td)) {
 		/*
 		 * If thread is sleeping uninterruptibly
 		 * we can't interrupt the sleep... the signal will
 		 * be noticed when the process returns through
 		 * trap() or syscall().
 		 */
 		if ((td->td_flags & TDF_SINTR) == 0)
 			goto out;
 		/*
 		 * If SIGCONT is default (or ignored) and process is
 		 * asleep, we are finished; the process should not
 		 * be awakened.
 		 */
 		if ((prop & SIGPROP_CONT) && action == SIG_DFL) {
 			thread_unlock(td);
 			PROC_SUNLOCK(p);
 			sigqueue_delete(&p->p_sigqueue, sig);
 			/*
 			 * It may be on either list in this state.
 			 * Remove from both for now.
 			 */
 			sigqueue_delete(&td->td_sigqueue, sig);
 			return;
 		}
 
 		/*
 		 * Don't awaken a sleeping thread for SIGSTOP if the
 		 * STOP signal is deferred.
 		 */
 		if ((prop & SIGPROP_STOP) != 0 && (td->td_flags & (TDF_SBDRY |
 		    TDF_SERESTART | TDF_SEINTR)) == TDF_SBDRY)
 			goto out;
 
 		/*
 		 * Give low priority threads a better chance to run.
 		 */
 		if (td->td_priority > PUSER && !TD_IS_IDLETHREAD(td))
 			sched_prio(td, PUSER);
 
 		wakeup_swapper = sleepq_abort(td, intrval);
 		PROC_SUNLOCK(p);
 		if (wakeup_swapper)
 			kick_proc0();
 		return;
 	}
 
 	/*
 	 * Other states do nothing with the signal immediately,
 	 * other than kicking ourselves if we are running.
 	 * It will either never be noticed, or noticed very soon.
 	 */
 #ifdef SMP
 	if (TD_IS_RUNNING(td) && td != curthread)
 		forward_signal(td);
 #endif
 
 out:
 	PROC_SUNLOCK(p);
 	thread_unlock(td);
 }
 
 static int
 sig_suspend_threads(struct thread *td, struct proc *p, int sending)
 {
 	struct thread *td2;
 	int wakeup_swapper;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_SLOCK_ASSERT(p, MA_OWNED);
 	MPASS(sending || td == curthread);
 
 	wakeup_swapper = 0;
 	FOREACH_THREAD_IN_PROC(p, td2) {
 		thread_lock(td2);
 		td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
 		if ((TD_IS_SLEEPING(td2) || TD_IS_SWAPPED(td2)) &&
 		    (td2->td_flags & TDF_SINTR)) {
 			if (td2->td_flags & TDF_SBDRY) {
 				/*
 				 * Once a thread is asleep with
 				 * TDF_SBDRY and without TDF_SERESTART
 				 * or TDF_SEINTR set, it should never
 				 * become suspended due to this check.
 				 */
 				KASSERT(!TD_IS_SUSPENDED(td2),
 				    ("thread with deferred stops suspended"));
 				if (TD_SBDRY_INTR(td2)) {
 					wakeup_swapper |= sleepq_abort(td2,
 					    TD_SBDRY_ERRNO(td2));
 					continue;
 				}
 			} else if (!TD_IS_SUSPENDED(td2))
 				thread_suspend_one(td2);
 		} else if (!TD_IS_SUSPENDED(td2)) {
 			if (sending || td != td2)
 				td2->td_flags |= TDF_ASTPENDING;
 #ifdef SMP
 			if (TD_IS_RUNNING(td2) && td2 != td)
 				forward_signal(td2);
 #endif
 		}
 		thread_unlock(td2);
 	}
 	return (wakeup_swapper);
 }
 
 /*
  * Stop the process for an event deemed interesting to the debugger. If si is
  * non-NULL, this is a signal exchange; the new signal requested by the
  * debugger will be returned for handling. If si is NULL, this is some other
  * type of interesting event. The debugger may request a signal be delivered in
  * that case as well, however it will be deferred until it can be handled.
  */
 int
 ptracestop(struct thread *td, int sig, ksiginfo_t *si)
 {
 	struct proc *p = td->td_proc;
 	struct thread *td2;
 	ksiginfo_t ksi;
 	int prop;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	KASSERT(!(p->p_flag & P_WEXIT), ("Stopping exiting process"));
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
 	    &p->p_mtx.lock_object, "Stopping for traced signal");
 
 	td->td_xsig = sig;
 
 	if (si == NULL || (si->ksi_flags & KSI_PTRACE) == 0) {
 		td->td_dbgflags |= TDB_XSIG;
 		CTR4(KTR_PTRACE, "ptracestop: tid %d (pid %d) flags %#x sig %d",
 		    td->td_tid, p->p_pid, td->td_dbgflags, sig);
 		PROC_SLOCK(p);
 		while ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_XSIG)) {
 			if (P_KILLED(p)) {
 				/*
 				 * Ensure that, if we've been PT_KILLed, the
 				 * exit status reflects that. Another thread
 				 * may also be in ptracestop(), having just
 				 * received the SIGKILL, but this thread was
 				 * unsuspended first.
 				 */
 				td->td_dbgflags &= ~TDB_XSIG;
 				td->td_xsig = SIGKILL;
 				p->p_ptevents = 0;
 				break;
 			}
 			if (p->p_flag & P_SINGLE_EXIT &&
 			    !(td->td_dbgflags & TDB_EXIT)) {
 				/*
 				 * Ignore ptrace stops except for thread exit
 				 * events when the process exits.
 				 */
 				td->td_dbgflags &= ~TDB_XSIG;
 				PROC_SUNLOCK(p);
 				return (0);
 			}
 
 			/*
 			 * Make wait(2) work.  Ensure that right after the
 			 * attach, the thread which was decided to become the
 			 * leader of attach gets reported to the waiter.
 			 * Otherwise, just avoid overwriting another thread's
 			 * assignment to p_xthread.  If another thread has
 			 * already set p_xthread, the current thread will get
 			 * a chance to report itself upon the next iteration.
 			 */
 			if ((td->td_dbgflags & TDB_FSTP) != 0 ||
 			    ((p->p_flag2 & P2_PTRACE_FSTP) == 0 &&
 			    p->p_xthread == NULL)) {
 				p->p_xsig = sig;
 				p->p_xthread = td;
 
 				/*
 				 * If we are on sleepqueue already,
 				 * let sleepqueue code decide if it
 				 * needs to go sleep after attach.
 				 */
 				if (td->td_wchan == NULL)
 					td->td_dbgflags &= ~TDB_FSTP;
 
 				p->p_flag2 &= ~P2_PTRACE_FSTP;
 				p->p_flag |= P_STOPPED_SIG | P_STOPPED_TRACE;
 				sig_suspend_threads(td, p, 0);
 			}
 			if ((td->td_dbgflags & TDB_STOPATFORK) != 0) {
 				td->td_dbgflags &= ~TDB_STOPATFORK;
 			}
 stopme:
 			thread_suspend_switch(td, p);
 			if (p->p_xthread == td)
 				p->p_xthread = NULL;
 			if (!(p->p_flag & P_TRACED))
 				break;
 			if (td->td_dbgflags & TDB_SUSPEND) {
 				if (p->p_flag & P_SINGLE_EXIT)
 					break;
 				goto stopme;
 			}
 		}
 		PROC_SUNLOCK(p);
 	}
 
 	if (si != NULL && sig == td->td_xsig) {
 		/* Parent wants us to take the original signal unchanged. */
 		si->ksi_flags |= KSI_HEAD;
 		if (sigqueue_add(&td->td_sigqueue, sig, si) != 0)
 			si->ksi_signo = 0;
 	} else if (td->td_xsig != 0) {
 		/*
 		 * If parent wants us to take a new signal, then it will leave
 		 * it in td->td_xsig; otherwise we just look for signals again.
 		 */
 		ksiginfo_init(&ksi);
 		ksi.ksi_signo = td->td_xsig;
 		ksi.ksi_flags |= KSI_PTRACE;
 		prop = sigprop(td->td_xsig);
 		td2 = sigtd(p, td->td_xsig, prop);
 		tdsendsignal(p, td2, td->td_xsig, &ksi);
 		if (td != td2)
 			return (0);
 	}
 
 	return (td->td_xsig);
 }
 
 static void
 reschedule_signals(struct proc *p, sigset_t block, int flags)
 {
 	struct sigacts *ps;
 	struct thread *td;
 	int sig;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	ps = p->p_sigacts;
 	mtx_assert(&ps->ps_mtx, (flags & SIGPROCMASK_PS_LOCKED) != 0 ?
 	    MA_OWNED : MA_NOTOWNED);
 	if (SIGISEMPTY(p->p_siglist))
 		return;
 	SIGSETAND(block, p->p_siglist);
 	while ((sig = sig_ffs(&block)) != 0) {
 		SIGDELSET(block, sig);
 		td = sigtd(p, sig, 0);
 		signotify(td);
 		if (!(flags & SIGPROCMASK_PS_LOCKED))
 			mtx_lock(&ps->ps_mtx);
 		if (p->p_flag & P_TRACED ||
 		    (SIGISMEMBER(ps->ps_sigcatch, sig) &&
 		    !SIGISMEMBER(td->td_sigmask, sig)))
 			tdsigwakeup(td, sig, SIG_CATCH,
 			    (SIGISMEMBER(ps->ps_sigintr, sig) ? EINTR :
 			     ERESTART));
 		if (!(flags & SIGPROCMASK_PS_LOCKED))
 			mtx_unlock(&ps->ps_mtx);
 	}
 }
 
 void
 tdsigcleanup(struct thread *td)
 {
 	struct proc *p;
 	sigset_t unblocked;
 
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	sigqueue_flush(&td->td_sigqueue);
 	if (p->p_numthreads == 1)
 		return;
 
 	/*
 	 * Since we cannot handle signals, notify signal post code
 	 * about this by filling the sigmask.
 	 *
 	 * Also, if needed, wake up thread(s) that do not block the
 	 * same signals as the exiting thread, since the thread might
 	 * have been selected for delivery and woken up.
 	 */
 	SIGFILLSET(unblocked);
 	SIGSETNAND(unblocked, td->td_sigmask);
 	SIGFILLSET(td->td_sigmask);
 	reschedule_signals(p, unblocked, 0);
 
 }
 
 static int
 sigdeferstop_curr_flags(int cflags)
 {
 
 	MPASS((cflags & (TDF_SEINTR | TDF_SERESTART)) == 0 ||
 	    (cflags & TDF_SBDRY) != 0);
 	return (cflags & (TDF_SBDRY | TDF_SEINTR | TDF_SERESTART));
 }
 
 /*
  * Defer the delivery of SIGSTOP for the current thread, according to
  * the requested mode.  Returns previous flags, which must be restored
  * by sigallowstop().
  *
  * TDF_SBDRY, TDF_SEINTR, and TDF_SERESTART flags are only set and
  * cleared by the current thread, which allow the lock-less read-only
  * accesses below.
  */
 int
 sigdeferstop_impl(int mode)
 {
 	struct thread *td;
 	int cflags, nflags;
 
 	td = curthread;
 	cflags = sigdeferstop_curr_flags(td->td_flags);
 	switch (mode) {
 	case SIGDEFERSTOP_NOP:
 		nflags = cflags;
 		break;
 	case SIGDEFERSTOP_OFF:
 		nflags = 0;
 		break;
 	case SIGDEFERSTOP_SILENT:
 		nflags = (cflags | TDF_SBDRY) & ~(TDF_SEINTR | TDF_SERESTART);
 		break;
 	case SIGDEFERSTOP_EINTR:
 		nflags = (cflags | TDF_SBDRY | TDF_SEINTR) & ~TDF_SERESTART;
 		break;
 	case SIGDEFERSTOP_ERESTART:
 		nflags = (cflags | TDF_SBDRY | TDF_SERESTART) & ~TDF_SEINTR;
 		break;
 	default:
 		panic("sigdeferstop: invalid mode %x", mode);
 		break;
 	}
 	if (cflags == nflags)
 		return (SIGDEFERSTOP_VAL_NCHG);
 	thread_lock(td);
 	td->td_flags = (td->td_flags & ~cflags) | nflags;
 	thread_unlock(td);
 	return (cflags);
 }
 
 /*
  * Restores the STOP handling mode, typically permitting the delivery
  * of SIGSTOP for the current thread.  This does not immediately
  * suspend if a stop was posted.  Instead, the thread will suspend
  * either via ast() or a subsequent interruptible sleep.
  */
 void
 sigallowstop_impl(int prev)
 {
 	struct thread *td;
 	int cflags;
 
 	KASSERT(prev != SIGDEFERSTOP_VAL_NCHG, ("failed sigallowstop"));
 	KASSERT((prev & ~(TDF_SBDRY | TDF_SEINTR | TDF_SERESTART)) == 0,
 	    ("sigallowstop: incorrect previous mode %x", prev));
 	td = curthread;
 	cflags = sigdeferstop_curr_flags(td->td_flags);
 	if (cflags != prev) {
 		thread_lock(td);
 		td->td_flags = (td->td_flags & ~cflags) | prev;
 		thread_unlock(td);
 	}
 }
 
 /*
  * If the current process has received a signal (should be caught or cause
  * termination, should interrupt current syscall), return the signal number.
  * Stop signals with default action are processed immediately, then cleared;
  * they aren't returned.  This is checked after each entry to the system for
  * a syscall or trap (though this can usually be done without calling issignal
  * by checking the pending signal masks in cursig.) The normal call
  * sequence is
  *
  *	while (sig = cursig(curthread))
  *		postsig(sig);
  */
 static int
 issignal(struct thread *td)
 {
 	struct proc *p;
 	struct sigacts *ps;
 	struct sigqueue *queue;
 	sigset_t sigpending;
 	ksiginfo_t ksi;
 	int prop, sig, traced;
 
 	p = td->td_proc;
 	ps = p->p_sigacts;
 	mtx_assert(&ps->ps_mtx, MA_OWNED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	for (;;) {
 		traced = (p->p_flag & P_TRACED) || (p->p_stops & S_SIG);
 
 		sigpending = td->td_sigqueue.sq_signals;
 		SIGSETOR(sigpending, p->p_sigqueue.sq_signals);
 		SIGSETNAND(sigpending, td->td_sigmask);
 
 		if ((p->p_flag & P_PPWAIT) != 0 || (td->td_flags &
 		    (TDF_SBDRY | TDF_SERESTART | TDF_SEINTR)) == TDF_SBDRY)
 			SIG_STOPSIGMASK(sigpending);
 		if (SIGISEMPTY(sigpending))	/* no signal to send */
 			return (0);
 		if ((p->p_flag & (P_TRACED | P_PPTRACE)) == P_TRACED &&
 		    (p->p_flag2 & P2_PTRACE_FSTP) != 0 &&
 		    SIGISMEMBER(sigpending, SIGSTOP)) {
 			/*
 			 * If debugger just attached, always consume
 			 * SIGSTOP from ptrace(PT_ATTACH) first, to
 			 * execute the debugger attach ritual in
 			 * order.
 			 */
 			sig = SIGSTOP;
 			td->td_dbgflags |= TDB_FSTP;
 		} else {
 			sig = sig_ffs(&sigpending);
 		}
 
 		if (p->p_stops & S_SIG) {
 			mtx_unlock(&ps->ps_mtx);
 			stopevent(p, S_SIG, sig);
 			mtx_lock(&ps->ps_mtx);
 		}
 
 		/*
 		 * We should see pending but ignored signals
 		 * only if P_TRACED was on when they were posted.
 		 */
 		if (SIGISMEMBER(ps->ps_sigignore, sig) && (traced == 0)) {
 			sigqueue_delete(&td->td_sigqueue, sig);
 			sigqueue_delete(&p->p_sigqueue, sig);
 			continue;
 		}
 		if ((p->p_flag & (P_TRACED | P_PPTRACE)) == P_TRACED) {
 			/*
 			 * If traced, always stop.
 			 * Remove old signal from queue before the stop.
 			 * XXX shrug off debugger, it causes siginfo to
 			 * be thrown away.
 			 */
 			queue = &td->td_sigqueue;
 			ksiginfo_init(&ksi);
 			if (sigqueue_get(queue, sig, &ksi) == 0) {
 				queue = &p->p_sigqueue;
 				sigqueue_get(queue, sig, &ksi);
 			}
 			td->td_si = ksi.ksi_info;
 
 			mtx_unlock(&ps->ps_mtx);
 			sig = ptracestop(td, sig, &ksi);
 			mtx_lock(&ps->ps_mtx);
 
 			td->td_si.si_signo = 0;
 
 			/* 
 			 * Keep looking if the debugger discarded or
 			 * replaced the signal.
 			 */
 			if (sig == 0)
 				continue;
 
 			/*
 			 * If the signal became masked, re-queue it.
 			 */
 			if (SIGISMEMBER(td->td_sigmask, sig)) {
 				ksi.ksi_flags |= KSI_HEAD;
 				sigqueue_add(&p->p_sigqueue, sig, &ksi);
 				continue;
 			}
 
 			/*
 			 * If the traced bit got turned off, requeue
 			 * the signal and go back up to the top to
 			 * rescan signals.  This ensures that p_sig*
 			 * and p_sigact are consistent.
 			 */
 			if ((p->p_flag & P_TRACED) == 0) {
 				ksi.ksi_flags |= KSI_HEAD;
 				sigqueue_add(queue, sig, &ksi);
 				continue;
 			}
 		}
 
 		prop = sigprop(sig);
 
 		/*
 		 * Decide whether the signal should be returned.
 		 * Return the signal's number, or fall through
 		 * to clear it from the pending mask.
 		 */
 		switch ((intptr_t)p->p_sigacts->ps_sigact[_SIG_IDX(sig)]) {
 
 		case (intptr_t)SIG_DFL:
 			/*
 			 * Don't take default actions on system processes.
 			 */
 			if (p->p_pid <= 1) {
 #ifdef DIAGNOSTIC
 				/*
 				 * Are you sure you want to ignore SIGSEGV
 				 * in init? XXX
 				 */
 				printf("Process (pid %lu) got signal %d\n",
 					(u_long)p->p_pid, sig);
 #endif
 				break;		/* == ignore */
 			}
 			/*
 			 * If there is a pending stop signal to process with
 			 * default action, stop here, then clear the signal.
 			 * Traced or exiting processes should ignore stops.
 			 * Additionally, a member of an orphaned process group
 			 * should ignore tty stops.
 			 */
 			if (prop & SIGPROP_STOP) {
 				if (p->p_flag &
 				    (P_TRACED | P_WEXIT | P_SINGLE_EXIT) ||
 				    (p->p_pgrp->pg_jobc == 0 &&
 				     prop & SIGPROP_TTYSTOP))
 					break;	/* == ignore */
 				if (TD_SBDRY_INTR(td)) {
 					KASSERT((td->td_flags & TDF_SBDRY) != 0,
 					    ("lost TDF_SBDRY"));
 					return (-1);
 				}
 				mtx_unlock(&ps->ps_mtx);
 				WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
 				    &p->p_mtx.lock_object, "Catching SIGSTOP");
 				sigqueue_delete(&td->td_sigqueue, sig);
 				sigqueue_delete(&p->p_sigqueue, sig);
 				p->p_flag |= P_STOPPED_SIG;
 				p->p_xsig = sig;
 				PROC_SLOCK(p);
 				sig_suspend_threads(td, p, 0);
 				thread_suspend_switch(td, p);
 				PROC_SUNLOCK(p);
 				mtx_lock(&ps->ps_mtx);
 				goto next;
 			} else if (prop & SIGPROP_IGNORE) {
 				/*
 				 * Except for SIGCONT, shouldn't get here.
 				 * Default action is to ignore; drop it.
 				 */
 				break;		/* == ignore */
 			} else
 				return (sig);
 			/*NOTREACHED*/
 
 		case (intptr_t)SIG_IGN:
 			/*
 			 * Masking above should prevent us ever trying
 			 * to take action on an ignored signal other
 			 * than SIGCONT, unless process is traced.
 			 */
 			if ((prop & SIGPROP_CONT) == 0 &&
 			    (p->p_flag & P_TRACED) == 0)
 				printf("issignal\n");
 			break;		/* == ignore */
 
 		default:
 			/*
 			 * This signal has an action, let
 			 * postsig() process it.
 			 */
 			return (sig);
 		}
 		sigqueue_delete(&td->td_sigqueue, sig);	/* take the signal! */
 		sigqueue_delete(&p->p_sigqueue, sig);
 next:;
 	}
 	/* NOTREACHED */
 }
 
 void
 thread_stopped(struct proc *p)
 {
 	int n;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_SLOCK_ASSERT(p, MA_OWNED);
 	n = p->p_suspcount;
 	if (p == curproc)
 		n++;
 	if ((p->p_flag & P_STOPPED_SIG) && (n == p->p_numthreads)) {
 		PROC_SUNLOCK(p);
 		p->p_flag &= ~P_WAITED;
 		PROC_LOCK(p->p_pptr);
 		childproc_stopped(p, (p->p_flag & P_TRACED) ?
 			CLD_TRAPPED : CLD_STOPPED);
 		PROC_UNLOCK(p->p_pptr);
 		PROC_SLOCK(p);
 	}
 }
 
 /*
  * Take the action for the specified signal
  * from the current set of pending signals.
  */
 int
 postsig(int sig)
 {
 	struct thread *td;
 	struct proc *p;
 	struct sigacts *ps;
 	sig_t action;
 	ksiginfo_t ksi;
 	sigset_t returnmask;
 
 	KASSERT(sig != 0, ("postsig"));
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	ps = p->p_sigacts;
 	mtx_assert(&ps->ps_mtx, MA_OWNED);
 	ksiginfo_init(&ksi);
 	if (sigqueue_get(&td->td_sigqueue, sig, &ksi) == 0 &&
 	    sigqueue_get(&p->p_sigqueue, sig, &ksi) == 0)
 		return (0);
 	ksi.ksi_signo = sig;
 	if (ksi.ksi_code == SI_TIMER)
 		itimer_accept(p, ksi.ksi_timerid, &ksi);
 	action = ps->ps_sigact[_SIG_IDX(sig)];
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_PSIG))
 		ktrpsig(sig, action, td->td_pflags & TDP_OLDMASK ?
 		    &td->td_oldsigmask : &td->td_sigmask, ksi.ksi_code);
 #endif
 	if ((p->p_stops & S_SIG) != 0) {
 		mtx_unlock(&ps->ps_mtx);
 		stopevent(p, S_SIG, sig);
 		mtx_lock(&ps->ps_mtx);
 	}
 
 	if (action == SIG_DFL) {
 		/*
 		 * Default action, where the default is to kill
 		 * the process.  (Other cases were ignored above.)
 		 */
 		mtx_unlock(&ps->ps_mtx);
 		proc_td_siginfo_capture(td, &ksi.ksi_info);
 		sigexit(td, sig);
 		/* NOTREACHED */
 	} else {
 		/*
 		 * If we get here, the signal must be caught.
 		 */
 		KASSERT(action != SIG_IGN, ("postsig action %p", action));
 		KASSERT(!SIGISMEMBER(td->td_sigmask, sig),
 		    ("postsig action: blocked sig %d", sig));
 
 		/*
 		 * Set the new mask value and also defer further
 		 * occurrences of this signal.
 		 *
 		 * Special case: user has done a sigsuspend.  Here the
 		 * current mask is not of interest, but rather the
 		 * mask from before the sigsuspend is what we want
 		 * restored after the signal processing is completed.
 		 */
 		if (td->td_pflags & TDP_OLDMASK) {
 			returnmask = td->td_oldsigmask;
 			td->td_pflags &= ~TDP_OLDMASK;
 		} else
 			returnmask = td->td_sigmask;
 
 		if (p->p_sig == sig) {
 			p->p_sig = 0;
 		}
 		(*p->p_sysent->sv_sendsig)(action, &ksi, &returnmask);
 		postsig_done(sig, td, ps);
 	}
 	return (1);
 }
 
 void
 proc_wkilled(struct proc *p)
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	if ((p->p_flag & P_WKILLED) == 0) {
 		p->p_flag |= P_WKILLED;
 		/*
 		 * Notify swapper that there is a process to swap in.
 		 * The notification is racy, at worst it would take 10
 		 * seconds for the swapper process to notice.
 		 */
 		if ((p->p_flag & (P_INMEM | P_SWAPPINGIN)) == 0)
 			wakeup(&proc0);
 	}
 }
 
 /*
  * Kill the current process for stated reason.
  */
 void
 killproc(struct proc *p, char *why)
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	CTR3(KTR_PROC, "killproc: proc %p (pid %d, %s)", p, p->p_pid,
 	    p->p_comm);
 	log(LOG_ERR, "pid %d (%s), jid %d, uid %d, was killed: %s\n",
 	    p->p_pid, p->p_comm, p->p_ucred->cr_prison->pr_id,
 	    p->p_ucred->cr_uid, why);
 	proc_wkilled(p);
 	kern_psignal(p, SIGKILL);
 }
 
 /*
  * Force the current process to exit with the specified signal, dumping core
  * if appropriate.  We bypass the normal tests for masked and caught signals,
  * allowing unrecoverable failures to terminate the process without changing
  * signal state.  Mark the accounting record with the signal termination.
  * If dumping core, save the signal number for the debugger.  Calls exit and
  * does not return.
  */
 void
 sigexit(struct thread *td, int sig)
 {
 	struct proc *p = td->td_proc;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	p->p_acflag |= AXSIG;
 	/*
 	 * We must be single-threading to generate a core dump.  This
 	 * ensures that the registers in the core file are up-to-date.
 	 * Also, the ELF dump handler assumes that the thread list doesn't
 	 * change out from under it.
 	 *
 	 * XXX If another thread attempts to single-thread before us
 	 *     (e.g. via fork()), we won't get a dump at all.
 	 */
 	if ((sigprop(sig) & SIGPROP_CORE) &&
 	    thread_single(p, SINGLE_NO_EXIT) == 0) {
 		p->p_sig = sig;
 		/*
 		 * Log signals which would cause core dumps
 		 * (Log as LOG_INFO to appease those who don't want
 		 * these messages.)
 		 * XXX : Todo, as well as euid, write out ruid too
 		 * Note that coredump() drops proc lock.
 		 */
 		if (coredump(td) == 0)
 			sig |= WCOREFLAG;
 		if (kern_logsigexit)
 			log(LOG_INFO,
 			    "pid %d (%s), jid %d, uid %d: exited on "
 			    "signal %d%s\n", p->p_pid, p->p_comm,
 			    p->p_ucred->cr_prison->pr_id,
 			    td->td_ucred->cr_uid,
 			    sig &~ WCOREFLAG,
 			    sig & WCOREFLAG ? " (core dumped)" : "");
 	} else
 		PROC_UNLOCK(p);
 	exit1(td, 0, sig);
 	/* NOTREACHED */
 }
 
 /*
  * Send queued SIGCHLD to parent when child process's state
  * is changed.
  */
 static void
 sigparent(struct proc *p, int reason, int status)
 {
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_LOCK_ASSERT(p->p_pptr, MA_OWNED);
 
 	if (p->p_ksi != NULL) {
 		p->p_ksi->ksi_signo  = SIGCHLD;
 		p->p_ksi->ksi_code   = reason;
 		p->p_ksi->ksi_status = status;
 		p->p_ksi->ksi_pid    = p->p_pid;
 		p->p_ksi->ksi_uid    = p->p_ucred->cr_ruid;
 		if (KSI_ONQ(p->p_ksi))
 			return;
 	}
 	pksignal(p->p_pptr, SIGCHLD, p->p_ksi);
 }
 
 static void
 childproc_jobstate(struct proc *p, int reason, int sig)
 {
 	struct sigacts *ps;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_LOCK_ASSERT(p->p_pptr, MA_OWNED);
 
 	/*
 	 * Wake up parent sleeping in kern_wait(), also send
 	 * SIGCHLD to parent, but SIGCHLD does not guarantee
 	 * that parent will awake, because parent may masked
 	 * the signal.
 	 */
 	p->p_pptr->p_flag |= P_STATCHILD;
 	wakeup(p->p_pptr);
 
 	ps = p->p_pptr->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	if ((ps->ps_flag & PS_NOCLDSTOP) == 0) {
 		mtx_unlock(&ps->ps_mtx);
 		sigparent(p, reason, sig);
 	} else
 		mtx_unlock(&ps->ps_mtx);
 }
 
 void
 childproc_stopped(struct proc *p, int reason)
 {
 
 	childproc_jobstate(p, reason, p->p_xsig);
 }
 
 void
 childproc_continued(struct proc *p)
 {
 	childproc_jobstate(p, CLD_CONTINUED, SIGCONT);
 }
 
 void
 childproc_exited(struct proc *p)
 {
 	int reason, status;
 
 	if (WCOREDUMP(p->p_xsig)) {
 		reason = CLD_DUMPED;
 		status = WTERMSIG(p->p_xsig);
 	} else if (WIFSIGNALED(p->p_xsig)) {
 		reason = CLD_KILLED;
 		status = WTERMSIG(p->p_xsig);
 	} else {
 		reason = CLD_EXITED;
 		status = p->p_xexit;
 	}
 	/*
 	 * XXX avoid calling wakeup(p->p_pptr), the work is
 	 * done in exit1().
 	 */
 	sigparent(p, reason, status);
 }
 
 #define	MAX_NUM_CORE_FILES 100000
 #ifndef NUM_CORE_FILES
 #define	NUM_CORE_FILES 5
 #endif
 CTASSERT(NUM_CORE_FILES >= 0 && NUM_CORE_FILES <= MAX_NUM_CORE_FILES);
 static int num_cores = NUM_CORE_FILES;
 
 static int
 sysctl_debug_num_cores_check (SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int new_val;
 
 	new_val = num_cores;
 	error = sysctl_handle_int(oidp, &new_val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (new_val > MAX_NUM_CORE_FILES)
 		new_val = MAX_NUM_CORE_FILES;
 	if (new_val < 0)
 		new_val = 0;
 	num_cores = new_val;
 	return (0);
 }
 SYSCTL_PROC(_debug, OID_AUTO, ncores, CTLTYPE_INT|CTLFLAG_RW,
 	    0, sizeof(int), sysctl_debug_num_cores_check, "I",
 	    "Maximum number of generated process corefiles while using index format");
 
 #define	GZIP_SUFFIX	".gz"
 #define	ZSTD_SUFFIX	".zst"
 
 int compress_user_cores = 0;
 
 static int
 sysctl_compress_user_cores(SYSCTL_HANDLER_ARGS)
 {
 	int error, val;
 
 	val = compress_user_cores;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (val != 0 && !compressor_avail(val))
 		return (EINVAL);
 	compress_user_cores = val;
 	return (error);
 }
 SYSCTL_PROC(_kern, OID_AUTO, compress_user_cores, CTLTYPE_INT | CTLFLAG_RWTUN,
     0, sizeof(int), sysctl_compress_user_cores, "I",
     "Enable compression of user corefiles ("
     __XSTRING(COMPRESS_GZIP) " = gzip, "
     __XSTRING(COMPRESS_ZSTD) " = zstd)");
 
 int compress_user_cores_level = 6;
 SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_level, CTLFLAG_RWTUN,
     &compress_user_cores_level, 0,
     "Corefile compression level");
 
 /*
  * Protect the access to corefilename[] by allproc_lock.
  */
 #define	corefilename_lock	allproc_lock
 
 static char corefilename[MAXPATHLEN] = {"%N.core"};
 TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename));
 
 static int
 sysctl_kern_corefile(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	sx_xlock(&corefilename_lock);
 	error = sysctl_handle_string(oidp, corefilename, sizeof(corefilename),
 	    req);
 	sx_xunlock(&corefilename_lock);
 
 	return (error);
 }
 SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RW |
     CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A",
     "Process corefile name format string");
 
 static void
 vnode_close_locked(struct thread *td, struct vnode *vp)
 {
 
 	VOP_UNLOCK(vp);
 	vn_close(vp, FWRITE, td->td_ucred, td);
 }
 
 /*
  * If the core format has a %I in it, then we need to check
  * for existing corefiles before defining a name.
  * To do this we iterate over 0..ncores to find a
  * non-existing core file name to use. If all core files are
  * already used we choose the oldest one.
  */
 static int
 corefile_open_last(struct thread *td, char *name, int indexpos,
     int indexlen, int ncores, struct vnode **vpp)
 {
 	struct vnode *oldvp, *nextvp, *vp;
 	struct vattr vattr;
 	struct nameidata nd;
 	int error, i, flags, oflags, cmode;
 	char ch;
 	struct timespec lasttime;
 
 	nextvp = oldvp = NULL;
 	cmode = S_IRUSR | S_IWUSR;
 	oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE |
 	    (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0);
 
 	for (i = 0; i < ncores; i++) {
 		flags = O_CREAT | FWRITE | O_NOFOLLOW;
 
 		ch = name[indexpos + indexlen];
 		(void)snprintf(name + indexpos, indexlen + 1, "%.*u", indexlen,
 		    i);
 		name[indexpos + indexlen] = ch;
 
 		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, td);
 		error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred,
 		    NULL);
 		if (error != 0)
 			break;
 
 		vp = nd.ni_vp;
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if ((flags & O_CREAT) == O_CREAT) {
 			nextvp = vp;
 			break;
 		}
 
 		error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 		if (error != 0) {
 			vnode_close_locked(td, vp);
 			break;
 		}
 
 		if (oldvp == NULL ||
 		    lasttime.tv_sec > vattr.va_mtime.tv_sec ||
 		    (lasttime.tv_sec == vattr.va_mtime.tv_sec &&
 		    lasttime.tv_nsec >= vattr.va_mtime.tv_nsec)) {
 			if (oldvp != NULL)
 				vnode_close_locked(td, oldvp);
 			oldvp = vp;
 			lasttime = vattr.va_mtime;
 		} else {
 			vnode_close_locked(td, vp);
 		}
 	}
 
 	if (oldvp != NULL) {
 		if (nextvp == NULL) {
 			if ((td->td_proc->p_flag & P_SUGID) != 0) {
 				error = EFAULT;
 				vnode_close_locked(td, oldvp);
 			} else {
 				nextvp = oldvp;
 			}
 		} else {
 			vnode_close_locked(td, oldvp);
 		}
 	}
 	if (error != 0) {
 		if (nextvp != NULL)
 			vnode_close_locked(td, oldvp);
 	} else {
 		*vpp = nextvp;
 	}
 
 	return (error);
 }
 
 /*
  * corefile_open(comm, uid, pid, td, compress, vpp, namep)
  * Expand the name described in corefilename, using name, uid, and pid
  * and open/create core file.
  * corefilename is a printf-like string, with three format specifiers:
  *	%N	name of process ("name")
  *	%P	process id (pid)
  *	%U	user id (uid)
  * For example, "%N.core" is the default; they can be disabled completely
  * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P".
  * This is controlled by the sysctl variable kern.corefile (see above).
  */
 static int
 corefile_open(const char *comm, uid_t uid, pid_t pid, struct thread *td,
     int compress, int signum, struct vnode **vpp, char **namep)
 {
 	struct sbuf sb;
 	struct nameidata nd;
 	const char *format;
 	char *hostname, *name;
 	int cmode, error, flags, i, indexpos, indexlen, oflags, ncores;
 
 	hostname = NULL;
 	format = corefilename;
 	name = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO);
 	indexlen = 0;
 	indexpos = -1;
 	ncores = num_cores;
 	(void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN);
 	sx_slock(&corefilename_lock);
 	for (i = 0; format[i] != '\0'; i++) {
 		switch (format[i]) {
 		case '%':	/* Format character */
 			i++;
 			switch (format[i]) {
 			case '%':
 				sbuf_putc(&sb, '%');
 				break;
 			case 'H':	/* hostname */
 				if (hostname == NULL) {
 					hostname = malloc(MAXHOSTNAMELEN,
 					    M_TEMP, M_WAITOK);
 				}
 				getcredhostname(td->td_ucred, hostname,
 				    MAXHOSTNAMELEN);
 				sbuf_printf(&sb, "%s", hostname);
 				break;
 			case 'I':	/* autoincrementing index */
 				if (indexpos != -1) {
 					sbuf_printf(&sb, "%%I");
 					break;
 				}
 
 				indexpos = sbuf_len(&sb);
 				sbuf_printf(&sb, "%u", ncores - 1);
 				indexlen = sbuf_len(&sb) - indexpos;
 				break;
 			case 'N':	/* process name */
 				sbuf_printf(&sb, "%s", comm);
 				break;
 			case 'P':	/* process id */
 				sbuf_printf(&sb, "%u", pid);
 				break;
 			case 'S':	/* signal number */
 				sbuf_printf(&sb, "%i", signum);
 				break;
 			case 'U':	/* user id */
 				sbuf_printf(&sb, "%u", uid);
 				break;
 			default:
 				log(LOG_ERR,
 				    "Unknown format character %c in "
 				    "corename `%s'\n", format[i], format);
 				break;
 			}
 			break;
 		default:
 			sbuf_putc(&sb, format[i]);
 			break;
 		}
 	}
 	sx_sunlock(&corefilename_lock);
 	free(hostname, M_TEMP);
 	if (compress == COMPRESS_GZIP)
 		sbuf_printf(&sb, GZIP_SUFFIX);
 	else if (compress == COMPRESS_ZSTD)
 		sbuf_printf(&sb, ZSTD_SUFFIX);
 	if (sbuf_error(&sb) != 0) {
 		log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too "
 		    "long\n", (long)pid, comm, (u_long)uid);
 		sbuf_delete(&sb);
 		free(name, M_TEMP);
 		return (ENOMEM);
 	}
 	sbuf_finish(&sb);
 	sbuf_delete(&sb);
 
 	if (indexpos != -1) {
 		error = corefile_open_last(td, name, indexpos, indexlen, ncores,
 		    vpp);
 		if (error != 0) {
 			log(LOG_ERR,
 			    "pid %d (%s), uid (%u):  Path `%s' failed "
 			    "on initial open test, error = %d\n",
 			    pid, comm, uid, name, error);
 		}
 	} else {
 		cmode = S_IRUSR | S_IWUSR;
 		oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE |
 		    (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0);
 		flags = O_CREAT | FWRITE | O_NOFOLLOW;
 		if ((td->td_proc->p_flag & P_SUGID) != 0)
 			flags |= O_EXCL;
 
 		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, td);
 		error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred,
 		    NULL);
 		if (error == 0) {
 			*vpp = nd.ni_vp;
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 		}
 	}
 
 	if (error != 0) {
 #ifdef AUDIT
 		audit_proc_coredump(td, name, error);
 #endif
 		free(name, M_TEMP);
 		return (error);
 	}
 	*namep = name;
 	return (0);
 }
 
 /*
  * Dump a process' core.  The main routine does some
  * policy checking, and creates the name of the coredump;
  * then it passes on a vnode and a size limit to the process-specific
  * coredump routine if there is one; if there _is not_ one, it returns
  * ENOSYS; otherwise it returns the error from the process-specific routine.
  */
 
 static int
 coredump(struct thread *td)
 {
 	struct proc *p = td->td_proc;
 	struct ucred *cred = td->td_ucred;
 	struct vnode *vp;
 	struct flock lf;
 	struct vattr vattr;
+	size_t fullpathsize;
 	int error, error1, locked;
 	char *name;			/* name of corefile */
 	void *rl_cookie;
 	off_t limit;
 	char *fullpath, *freepath = NULL;
 	struct sbuf *sb;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td);
 	_STOPEVENT(p, S_CORE, 0);
 
 	if (!do_coredump || (!sugid_coredump && (p->p_flag & P_SUGID) != 0) ||
 	    (p->p_flag2 & P2_NOTRACE) != 0) {
 		PROC_UNLOCK(p);
 		return (EFAULT);
 	}
 
 	/*
 	 * Note that the bulk of limit checking is done after
 	 * the corefile is created.  The exception is if the limit
 	 * for corefiles is 0, in which case we don't bother
 	 * creating the corefile at all.  This layout means that
 	 * a corefile is truncated instead of not being created,
 	 * if it is larger than the limit.
 	 */
 	limit = (off_t)lim_cur(td, RLIMIT_CORE);
 	if (limit == 0 || racct_get_available(p, RACCT_CORE) == 0) {
 		PROC_UNLOCK(p);
 		return (EFBIG);
 	}
 	PROC_UNLOCK(p);
 
 	error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td,
 	    compress_user_cores, p->p_sig, &vp, &name);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Don't dump to non-regular files or files with links.
 	 * Do not dump into system files. Effective user must own the corefile.
 	 */
 	if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 ||
 	    vattr.va_nlink != 1 || (vp->v_vflag & VV_SYSTEM) != 0 ||
 	    vattr.va_uid != cred->cr_uid) {
 		VOP_UNLOCK(vp);
 		error = EFAULT;
 		goto out;
 	}
 
 	VOP_UNLOCK(vp);
 
 	/* Postpone other writers, including core dumps of other processes. */
 	rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 
 	lf.l_whence = SEEK_SET;
 	lf.l_start = 0;
 	lf.l_len = 0;
 	lf.l_type = F_WRLCK;
 	locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0);
 
 	VATTR_NULL(&vattr);
 	vattr.va_size = 0;
 	if (set_core_nodump_flag)
 		vattr.va_flags = UF_NODUMP;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	VOP_SETATTR(vp, &vattr, cred);
 	VOP_UNLOCK(vp);
 	PROC_LOCK(p);
 	p->p_acflag |= ACORE;
 	PROC_UNLOCK(p);
 
 	if (p->p_sysent->sv_coredump != NULL) {
 		error = p->p_sysent->sv_coredump(td, vp, limit, 0);
 	} else {
 		error = ENOSYS;
 	}
 
 	if (locked) {
 		lf.l_type = F_UNLCK;
 		VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK);
 	}
 	vn_rangelock_unlock(vp, rl_cookie);
 
 	/*
 	 * Notify the userland helper that a process triggered a core dump.
 	 * This allows the helper to run an automated debugging session.
 	 */
 	if (error != 0 || coredump_devctl == 0)
 		goto out;
 	sb = sbuf_new_auto();
 	if (vn_fullpath_global(td, p->p_textvp, &fullpath, &freepath) != 0)
 		goto out2;
 	sbuf_printf(sb, "comm=\"");
 	devctl_safe_quote_sb(sb, fullpath);
 	free(freepath, M_TEMP);
 	sbuf_printf(sb, "\" core=\"");
 
 	/*
 	 * We can't lookup core file vp directly. When we're replacing a core, and
 	 * other random times, we flush the name cache, so it will fail. Instead,
 	 * if the path of the core is relative, add the current dir in front if it.
 	 */
 	if (name[0] != '/') {
-		fullpath = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
-		if (kern___getcwd(td, fullpath, UIO_SYSSPACE, MAXPATHLEN, MAXPATHLEN) != 0) {
-			free(fullpath, M_TEMP);
+		fullpathsize = MAXPATHLEN;
+		freepath = malloc(fullpathsize, M_TEMP, M_WAITOK);
+		if (vn_getcwd(td, freepath, &fullpath, &fullpathsize) != 0) {
+			free(freepath, M_TEMP);
 			goto out2;
 		}
 		devctl_safe_quote_sb(sb, fullpath);
-		free(fullpath, M_TEMP);
+		free(freepath, M_TEMP);
 		sbuf_putc(sb, '/');
 	}
 	devctl_safe_quote_sb(sb, name);
 	sbuf_printf(sb, "\"");
 	if (sbuf_finish(sb) == 0)
 		devctl_notify("kernel", "signal", "coredump", sbuf_data(sb));
 out2:
 	sbuf_delete(sb);
 out:
 	error1 = vn_close(vp, FWRITE, cred, td);
 	if (error == 0)
 		error = error1;
 #ifdef AUDIT
 	audit_proc_coredump(td, name, error);
 #endif
 	free(name, M_TEMP);
 	return (error);
 }
 
 /*
  * Nonexistent system call-- signal process (may want to handle it).  Flag
  * error in case process won't see signal immediately (blocked or ignored).
  */
 #ifndef _SYS_SYSPROTO_H_
 struct nosys_args {
 	int	dummy;
 };
 #endif
 /* ARGSUSED */
 int
 nosys(struct thread *td, struct nosys_args *args)
 {
 	struct proc *p;
 
 	p = td->td_proc;
 
 	PROC_LOCK(p);
 	tdsignal(td, SIGSYS);
 	PROC_UNLOCK(p);
 	if (kern_lognosys == 1 || kern_lognosys == 3) {
 		uprintf("pid %d comm %s: nosys %d\n", p->p_pid, p->p_comm,
 		    td->td_sa.code);
 	}
 	if (kern_lognosys == 2 || kern_lognosys == 3) {
 		printf("pid %d comm %s: nosys %d\n", p->p_pid, p->p_comm,
 		    td->td_sa.code);
 	}
 	return (ENOSYS);
 }
 
 /*
  * Send a SIGIO or SIGURG signal to a process or process group using stored
  * credentials rather than those of the current process.
  */
 void
 pgsigio(struct sigio **sigiop, int sig, int checkctty)
 {
 	ksiginfo_t ksi;
 	struct sigio *sigio;
 
 	ksiginfo_init(&ksi);
 	ksi.ksi_signo = sig;
 	ksi.ksi_code = SI_KERNEL;
 
 	SIGIO_LOCK();
 	sigio = *sigiop;
 	if (sigio == NULL) {
 		SIGIO_UNLOCK();
 		return;
 	}
 	if (sigio->sio_pgid > 0) {
 		PROC_LOCK(sigio->sio_proc);
 		if (CANSIGIO(sigio->sio_ucred, sigio->sio_proc->p_ucred))
 			kern_psignal(sigio->sio_proc, sig);
 		PROC_UNLOCK(sigio->sio_proc);
 	} else if (sigio->sio_pgid < 0) {
 		struct proc *p;
 
 		PGRP_LOCK(sigio->sio_pgrp);
 		LIST_FOREACH(p, &sigio->sio_pgrp->pg_members, p_pglist) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NORMAL &&
 			    CANSIGIO(sigio->sio_ucred, p->p_ucred) &&
 			    (checkctty == 0 || (p->p_flag & P_CONTROLT)))
 				kern_psignal(p, sig);
 			PROC_UNLOCK(p);
 		}
 		PGRP_UNLOCK(sigio->sio_pgrp);
 	}
 	SIGIO_UNLOCK();
 }
 
 static int
 filt_sigattach(struct knote *kn)
 {
 	struct proc *p = curproc;
 
 	kn->kn_ptr.p_proc = p;
 	kn->kn_flags |= EV_CLEAR;		/* automatically set */
 
 	knlist_add(p->p_klist, kn, 0);
 
 	return (0);
 }
 
 static void
 filt_sigdetach(struct knote *kn)
 {
 	struct proc *p = kn->kn_ptr.p_proc;
 
 	knlist_remove(p->p_klist, kn, 0);
 }
 
 /*
  * signal knotes are shared with proc knotes, so we apply a mask to
  * the hint in order to differentiate them from process hints.  This
  * could be avoided by using a signal-specific knote list, but probably
  * isn't worth the trouble.
  */
 static int
 filt_signal(struct knote *kn, long hint)
 {
 
 	if (hint & NOTE_SIGNAL) {
 		hint &= ~NOTE_SIGNAL;
 
 		if (kn->kn_id == hint)
 			kn->kn_data++;
 	}
 	return (kn->kn_data != 0);
 }
 
 struct sigacts *
 sigacts_alloc(void)
 {
 	struct sigacts *ps;
 
 	ps = malloc(sizeof(struct sigacts), M_SUBPROC, M_WAITOK | M_ZERO);
 	refcount_init(&ps->ps_refcnt, 1);
 	mtx_init(&ps->ps_mtx, "sigacts", NULL, MTX_DEF);
 	return (ps);
 }
 
 void
 sigacts_free(struct sigacts *ps)
 {
 
 	if (refcount_release(&ps->ps_refcnt) == 0)
 		return;
 	mtx_destroy(&ps->ps_mtx);
 	free(ps, M_SUBPROC);
 }
 
 struct sigacts *
 sigacts_hold(struct sigacts *ps)
 {
 
 	refcount_acquire(&ps->ps_refcnt);
 	return (ps);
 }
 
 void
 sigacts_copy(struct sigacts *dest, struct sigacts *src)
 {
 
 	KASSERT(dest->ps_refcnt == 1, ("sigacts_copy to shared dest"));
 	mtx_lock(&src->ps_mtx);
 	bcopy(src, dest, offsetof(struct sigacts, ps_refcnt));
 	mtx_unlock(&src->ps_mtx);
 }
 
 int
 sigacts_shared(struct sigacts *ps)
 {
 
 	return (ps->ps_refcnt > 1);
 }
 
 void
 sig_drop_caught(struct proc *p)
 {
 	int sig;
 	struct sigacts *ps;
 
 	ps = p->p_sigacts;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	mtx_assert(&ps->ps_mtx, MA_OWNED);
 	while (SIGNOTEMPTY(ps->ps_sigcatch)) {
 		sig = sig_ffs(&ps->ps_sigcatch);
 		sigdflt(ps, sig);
 		if ((sigprop(sig) & SIGPROP_IGNORE) != 0)
 			sigqueue_delete_proc(p, sig);
 	}
 }
Index: projects/clang1000-import/sys/kern/vfs_cache.c
===================================================================
--- projects/clang1000-import/sys/kern/vfs_cache.c	(revision 357389)
+++ projects/clang1000-import/sys/kern/vfs_cache.c	(revision 357390)
@@ -1,2630 +1,2608 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Poul-Henning Kamp of the FreeBSD Project.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_cache.c	8.5 (Berkeley) 3/22/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/counter.h>
 #include <sys/filedesc.h>
 #include <sys/fnv_hash.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/fcntl.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sdt.h>
 #include <sys/smp.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/vnode.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <vm/uma.h>
 
 SDT_PROVIDER_DECLARE(vfs);
 SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *", "char *",
     "struct vnode *");
 SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, "struct vnode *",
     "char *");
 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, "struct vnode *");
 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, "struct vnode *",
     "char *", "struct vnode *");
 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, "struct vnode *");
 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, "int",
     "struct vnode *", "char *");
 SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, "struct vnode *", "char *",
     "struct vnode *");
 SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit__negative,
     "struct vnode *", "char *");
 SDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, "struct vnode *",
     "char *");
 SDT_PROBE_DEFINE1(vfs, namecache, purge, done, "struct vnode *");
 SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, "struct vnode *");
 SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *");
 SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *",
     "struct vnode *");
 SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *",
     "char *");
 SDT_PROBE_DEFINE2(vfs, namecache, shrink_negative, done, "struct vnode *",
     "char *");
 
 /*
  * This structure describes the elements in the cache of recent
  * names looked up by namei.
  */
 
 struct	namecache {
 	LIST_ENTRY(namecache) nc_hash;	/* hash chain */
 	LIST_ENTRY(namecache) nc_src;	/* source vnode list */
 	TAILQ_ENTRY(namecache) nc_dst;	/* destination vnode list */
 	struct	vnode *nc_dvp;		/* vnode of parent of name */
 	union {
 		struct	vnode *nu_vp;	/* vnode the name refers to */
 	} n_un;
 	u_char	nc_flag;		/* flag bits */
 	u_char	nc_nlen;		/* length of name */
 	char	nc_name[0];		/* segment name + nul */
 };
 
 /*
  * struct namecache_ts repeats struct namecache layout up to the
  * nc_nlen member.
  * struct namecache_ts is used in place of struct namecache when time(s) need
  * to be stored.  The nc_dotdottime field is used when a cache entry is mapping
  * both a non-dotdot directory name plus dotdot for the directory's
  * parent.
  */
 struct	namecache_ts {
 	struct	timespec nc_time;	/* timespec provided by fs */
 	struct	timespec nc_dotdottime;	/* dotdot timespec provided by fs */
 	int	nc_ticks;		/* ticks value when entry was added */
 	struct namecache nc_nc;
 };
 
 #define	nc_vp		n_un.nu_vp
 
 /*
  * Flags in namecache.nc_flag
  */
 #define NCF_WHITE	0x01
 #define NCF_ISDOTDOT	0x02
 #define	NCF_TS		0x04
 #define	NCF_DTS		0x08
 #define	NCF_DVDROP	0x10
 #define	NCF_NEGATIVE	0x20
 #define	NCF_HOTNEGATIVE	0x40
 
 /*
  * Name caching works as follows:
  *
  * Names found by directory scans are retained in a cache
  * for future reference.  It is managed LRU, so frequently
  * used names will hang around.  Cache is indexed by hash value
  * obtained from (dvp, name) where dvp refers to the directory
  * containing name.
  *
  * If it is a "negative" entry, (i.e. for a name that is known NOT to
  * exist) the vnode pointer will be NULL.
  *
  * Upon reaching the last segment of a path, if the reference
  * is for DELETE, or NOCACHE is set (rewrite), and the
  * name is located in the cache, it will be dropped.
  *
  * These locks are used (in the order in which they can be taken):
  * NAME		TYPE	ROLE
  * vnodelock	mtx	vnode lists and v_cache_dd field protection
  * bucketlock	rwlock	for access to given set of hash buckets
  * neglist	mtx	negative entry LRU management
  *
  * Additionally, ncneg_shrink_lock mtx is used to have at most one thread
  * shrinking the LRU list.
  *
  * It is legal to take multiple vnodelock and bucketlock locks. The locking
  * order is lower address first. Both are recursive.
  *
  * "." lookups are lockless.
  *
  * ".." and vnode -> name lookups require vnodelock.
  *
  * name -> vnode lookup requires the relevant bucketlock to be held for reading.
  *
  * Insertions and removals of entries require involved vnodes and bucketlocks
  * to be write-locked to prevent other threads from seeing the entry.
  *
  * Some lookups result in removal of the found entry (e.g. getting rid of a
  * negative entry with the intent to create a positive one), which poses a
  * problem when multiple threads reach the state. Similarly, two different
  * threads can purge two different vnodes and try to remove the same name.
  *
  * If the already held vnode lock is lower than the second required lock, we
  * can just take the other lock. However, in the opposite case, this could
  * deadlock. As such, this is resolved by trylocking and if that fails unlocking
  * the first node, locking everything in order and revalidating the state.
  */
 
 /*
  * Structures associated with name caching.
  */
 #define NCHHASH(hash) \
 	(&nchashtbl[(hash) & nchash])
 static __read_mostly LIST_HEAD(nchashhead, namecache) *nchashtbl;/* Hash Table */
 static u_long __read_mostly	nchash;			/* size of hash table */
 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0,
     "Size of namecache hash table");
 static u_long __read_mostly	ncnegfactor = 5; /* ratio of negative entries */
 SYSCTL_ULONG(_vfs, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0,
     "Ratio of negative namecache entries");
 static u_long __exclusive_cache_line	numneg;	/* number of negative entries allocated */
 static u_long __exclusive_cache_line	numcache;/* number of cache entries allocated */
 u_int ncsizefactor = 2;
 SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0,
     "Size factor for namecache");
 static u_int __read_mostly	ncpurgeminvnodes;
 SYSCTL_UINT(_vfs, OID_AUTO, ncpurgeminvnodes, CTLFLAG_RW, &ncpurgeminvnodes, 0,
     "Number of vnodes below which purgevfs ignores the request");
 static u_int __read_mostly	ncsize; /* the size as computed on creation or resizing */
 
 struct nchstats	nchstats;		/* cache effectiveness statistics */
 
 static struct mtx __exclusive_cache_line	ncneg_shrink_lock;
 static int	shrink_list_turn;
 
 struct neglist {
 	struct mtx		nl_lock;
 	TAILQ_HEAD(, namecache) nl_list;
 } __aligned(CACHE_LINE_SIZE);
 
 static struct neglist __read_mostly	*neglists;
 static struct neglist ncneg_hot;
 static u_long numhotneg;
 
 #define	numneglists (ncneghash + 1)
 static u_int __read_mostly	ncneghash;
 static inline struct neglist *
 NCP2NEGLIST(struct namecache *ncp)
 {
 
 	return (&neglists[(((uintptr_t)(ncp) >> 8) & ncneghash)]);
 }
 
 #define	numbucketlocks (ncbuckethash + 1)
 static u_int __read_mostly  ncbuckethash;
 static struct rwlock_padalign __read_mostly  *bucketlocks;
 #define	HASH2BUCKETLOCK(hash) \
 	((struct rwlock *)(&bucketlocks[((hash) & ncbuckethash)]))
 
 #define	numvnodelocks (ncvnodehash + 1)
 static u_int __read_mostly  ncvnodehash;
 static struct mtx __read_mostly *vnodelocks;
 static inline struct mtx *
 VP2VNODELOCK(struct vnode *vp)
 {
 
 	return (&vnodelocks[(((uintptr_t)(vp) >> 8) & ncvnodehash)]);
 }
 
 /*
  * UMA zones for the VFS cache.
  *
  * The small cache is used for entries with short names, which are the
  * most common.  The large cache is used for entries which are too big to
  * fit in the small cache.
  */
 static uma_zone_t __read_mostly cache_zone_small;
 static uma_zone_t __read_mostly cache_zone_small_ts;
 static uma_zone_t __read_mostly cache_zone_large;
 static uma_zone_t __read_mostly cache_zone_large_ts;
 
 #define	CACHE_PATH_CUTOFF	35
 
 static struct namecache *
 cache_alloc(int len, int ts)
 {
 	struct namecache_ts *ncp_ts;
 	struct namecache *ncp;
 
 	if (__predict_false(ts)) {
 		if (len <= CACHE_PATH_CUTOFF)
 			ncp_ts = uma_zalloc(cache_zone_small_ts, M_WAITOK);
 		else
 			ncp_ts = uma_zalloc(cache_zone_large_ts, M_WAITOK);
 		ncp = &ncp_ts->nc_nc;
 	} else {
 		if (len <= CACHE_PATH_CUTOFF)
 			ncp = uma_zalloc(cache_zone_small, M_WAITOK);
 		else
 			ncp = uma_zalloc(cache_zone_large, M_WAITOK);
 	}
 	return (ncp);
 }
 
 static void
 cache_free(struct namecache *ncp)
 {
 	struct namecache_ts *ncp_ts;
 
 	if (ncp == NULL)
 		return;
 	if ((ncp->nc_flag & NCF_DVDROP) != 0)
 		vdrop(ncp->nc_dvp);
 	if (__predict_false(ncp->nc_flag & NCF_TS)) {
 		ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc);
 		if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)
 			uma_zfree(cache_zone_small_ts, ncp_ts);
 		else
 			uma_zfree(cache_zone_large_ts, ncp_ts);
 	} else {
 		if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)
 			uma_zfree(cache_zone_small, ncp);
 		else
 			uma_zfree(cache_zone_large, ncp);
 	}
 }
 
 static void
 cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp)
 {
 	struct namecache_ts *ncp_ts;
 
 	KASSERT((ncp->nc_flag & NCF_TS) != 0 ||
 	    (tsp == NULL && ticksp == NULL),
 	    ("No NCF_TS"));
 
 	if (tsp == NULL && ticksp == NULL)
 		return;
 
 	ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc);
 	if (tsp != NULL)
 		*tsp = ncp_ts->nc_time;
 	if (ticksp != NULL)
 		*ticksp = ncp_ts->nc_ticks;
 }
 
 #ifdef DEBUG_CACHE
 static int __read_mostly	doingcache = 1;	/* 1 => enable the cache */
 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0,
     "VFS namecache enabled");
 #endif
 
 /* Export size information to userland */
 SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, SYSCTL_NULL_INT_PTR,
     sizeof(struct namecache), "sizeof(struct namecache)");
 
 /*
  * The new name cache statistics
  */
 static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0,
     "Name cache statistics");
 #define STATNODE_ULONG(name, descr)	\
 	SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, descr);
 #define STATNODE_COUNTER(name, descr)	\
 	static counter_u64_t __read_mostly name; \
 	SYSCTL_COUNTER_U64(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, descr);
 STATNODE_ULONG(numneg, "Number of negative cache entries");
 STATNODE_ULONG(numcache, "Number of cache entries");
 STATNODE_COUNTER(numcachehv, "Number of namecache entries with vnodes held");
 STATNODE_COUNTER(numcalls, "Number of cache lookups");
 STATNODE_COUNTER(dothits, "Number of '.' hits");
 STATNODE_COUNTER(dotdothits, "Number of '..' hits");
 STATNODE_COUNTER(numchecks, "Number of checks in lookup");
 STATNODE_COUNTER(nummiss, "Number of cache misses");
 STATNODE_COUNTER(nummisszap, "Number of cache misses we do not want to cache");
 STATNODE_COUNTER(numposzaps,
     "Number of cache hits (positive) we do not want to cache");
 STATNODE_COUNTER(numposhits, "Number of cache hits (positive)");
 STATNODE_COUNTER(numnegzaps,
     "Number of cache hits (negative) we do not want to cache");
 STATNODE_COUNTER(numneghits, "Number of cache hits (negative)");
-/* These count for kern___getcwd(), too. */
+/* These count for vn_getcwd(), too. */
 STATNODE_COUNTER(numfullpathcalls, "Number of fullpath search calls");
 STATNODE_COUNTER(numfullpathfail1, "Number of fullpath search errors (ENOTDIR)");
 STATNODE_COUNTER(numfullpathfail2,
     "Number of fullpath search errors (VOP_VPTOCNP failures)");
 STATNODE_COUNTER(numfullpathfail4, "Number of fullpath search errors (ENOMEM)");
 STATNODE_COUNTER(numfullpathfound, "Number of successful fullpath calls");
 STATNODE_COUNTER(zap_and_exit_bucket_relock_success,
     "Number of successful removals after relocking");
 static long zap_and_exit_bucket_fail; STATNODE_ULONG(zap_and_exit_bucket_fail,
     "Number of times zap_and_exit failed to lock");
 static long zap_and_exit_bucket_fail2; STATNODE_ULONG(zap_and_exit_bucket_fail2,
     "Number of times zap_and_exit failed to lock");
 static long cache_lock_vnodes_cel_3_failures;
 STATNODE_ULONG(cache_lock_vnodes_cel_3_failures,
     "Number of times 3-way vnode locking failed");
 STATNODE_ULONG(numhotneg, "Number of hot negative entries");
 STATNODE_COUNTER(numneg_evicted,
     "Number of negative entries evicted when adding a new entry");
 STATNODE_COUNTER(shrinking_skipped,
     "Number of times shrinking was already in progress");
 
 static void cache_zap_locked(struct namecache *ncp, bool neg_locked);
 static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
-    char *buf, char **retbuf, u_int buflen);
+    char *buf, char **retbuf, size_t *buflen);
 
 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
 
 static int cache_yield;
 SYSCTL_INT(_vfs_cache, OID_AUTO, yield, CTLFLAG_RD, &cache_yield, 0,
     "Number of times cache called yield");
 
 static void __noinline
 cache_maybe_yield(void)
 {
 
 	if (should_yield()) {
 		cache_yield++;
 		kern_yield(PRI_USER);
 	}
 }
 
 static inline void
 cache_assert_vlp_locked(struct mtx *vlp)
 {
 
 	if (vlp != NULL)
 		mtx_assert(vlp, MA_OWNED);
 }
 
 static inline void
 cache_assert_vnode_locked(struct vnode *vp)
 {
 	struct mtx *vlp;
 
 	vlp = VP2VNODELOCK(vp);
 	cache_assert_vlp_locked(vlp);
 }
 
 static uint32_t
 cache_get_hash(char *name, u_char len, struct vnode *dvp)
 {
 	uint32_t hash;
 
 	hash = fnv_32_buf(name, len, FNV1_32_INIT);
 	hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
 	return (hash);
 }
 
 static inline struct rwlock *
 NCP2BUCKETLOCK(struct namecache *ncp)
 {
 	uint32_t hash;
 
 	hash = cache_get_hash(ncp->nc_name, ncp->nc_nlen, ncp->nc_dvp);
 	return (HASH2BUCKETLOCK(hash));
 }
 
 #ifdef INVARIANTS
 static void
 cache_assert_bucket_locked(struct namecache *ncp, int mode)
 {
 	struct rwlock *blp;
 
 	blp = NCP2BUCKETLOCK(ncp);
 	rw_assert(blp, mode);
 }
 #else
 #define cache_assert_bucket_locked(x, y) do { } while (0)
 #endif
 
 #define cache_sort_vnodes(x, y)	_cache_sort_vnodes((void **)(x), (void **)(y))
 static void
 _cache_sort_vnodes(void **p1, void **p2)
 {
 	void *tmp;
 
 	MPASS(*p1 != NULL || *p2 != NULL);
 
 	if (*p1 > *p2) {
 		tmp = *p2;
 		*p2 = *p1;
 		*p1 = tmp;
 	}
 }
 
 static void
 cache_lock_all_buckets(void)
 {
 	u_int i;
 
 	for (i = 0; i < numbucketlocks; i++)
 		rw_wlock(&bucketlocks[i]);
 }
 
 static void
 cache_unlock_all_buckets(void)
 {
 	u_int i;
 
 	for (i = 0; i < numbucketlocks; i++)
 		rw_wunlock(&bucketlocks[i]);
 }
 
 static void
 cache_lock_all_vnodes(void)
 {
 	u_int i;
 
 	for (i = 0; i < numvnodelocks; i++)
 		mtx_lock(&vnodelocks[i]);
 }
 
 static void
 cache_unlock_all_vnodes(void)
 {
 	u_int i;
 
 	for (i = 0; i < numvnodelocks; i++)
 		mtx_unlock(&vnodelocks[i]);
 }
 
 static int
 cache_trylock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
 {
 
 	cache_sort_vnodes(&vlp1, &vlp2);
 
 	if (vlp1 != NULL) {
 		if (!mtx_trylock(vlp1))
 			return (EAGAIN);
 	}
 	if (!mtx_trylock(vlp2)) {
 		if (vlp1 != NULL)
 			mtx_unlock(vlp1);
 		return (EAGAIN);
 	}
 
 	return (0);
 }
 
 static void
 cache_lock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
 {
 
 	MPASS(vlp1 != NULL || vlp2 != NULL);
 	MPASS(vlp1 <= vlp2);
 
 	if (vlp1 != NULL)
 		mtx_lock(vlp1);
 	if (vlp2 != NULL)
 		mtx_lock(vlp2);
 }
 
 static void
 cache_unlock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
 {
 
 	MPASS(vlp1 != NULL || vlp2 != NULL);
 
 	if (vlp1 != NULL)
 		mtx_unlock(vlp1);
 	if (vlp2 != NULL)
 		mtx_unlock(vlp2);
 }
 
 static int
 sysctl_nchstats(SYSCTL_HANDLER_ARGS)
 {
 	struct nchstats snap;
 
 	if (req->oldptr == NULL)
 		return (SYSCTL_OUT(req, 0, sizeof(snap)));
 
 	snap = nchstats;
 	snap.ncs_goodhits = counter_u64_fetch(numposhits);
 	snap.ncs_neghits = counter_u64_fetch(numneghits);
 	snap.ncs_badhits = counter_u64_fetch(numposzaps) +
 	    counter_u64_fetch(numnegzaps);
 	snap.ncs_miss = counter_u64_fetch(nummisszap) +
 	    counter_u64_fetch(nummiss);
 
 	return (SYSCTL_OUT(req, &snap, sizeof(snap)));
 }
 SYSCTL_PROC(_vfs_cache, OID_AUTO, nchstats, CTLTYPE_OPAQUE | CTLFLAG_RD |
     CTLFLAG_MPSAFE, 0, 0, sysctl_nchstats, "LU",
     "VFS cache effectiveness statistics");
 
 #ifdef DIAGNOSTIC
 /*
  * Grab an atomic snapshot of the name cache hash chain lengths
  */
 static SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL,
     "hash table stats");
 
 static int
 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
 {
 	struct nchashhead *ncpp;
 	struct namecache *ncp;
 	int i, error, n_nchash, *cntbuf;
 
 retry:
 	n_nchash = nchash + 1;	/* nchash is max index, not count */
 	if (req->oldptr == NULL)
 		return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
 	cntbuf = malloc(n_nchash * sizeof(int), M_TEMP, M_ZERO | M_WAITOK);
 	cache_lock_all_buckets();
 	if (n_nchash != nchash + 1) {
 		cache_unlock_all_buckets();
 		free(cntbuf, M_TEMP);
 		goto retry;
 	}
 	/* Scan hash tables counting entries */
 	for (ncpp = nchashtbl, i = 0; i < n_nchash; ncpp++, i++)
 		LIST_FOREACH(ncp, ncpp, nc_hash)
 			cntbuf[i]++;
 	cache_unlock_all_buckets();
 	for (error = 0, i = 0; i < n_nchash; i++)
 		if ((error = SYSCTL_OUT(req, &cntbuf[i], sizeof(int))) != 0)
 			break;
 	free(cntbuf, M_TEMP);
 	return (error);
 }
 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD|
     CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int",
     "nchash chain lengths");
 
 static int
 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	struct nchashhead *ncpp;
 	struct namecache *ncp;
 	int n_nchash;
 	int count, maxlength, used, pct;
 
 	if (!req->oldptr)
 		return SYSCTL_OUT(req, 0, 4 * sizeof(int));
 
 	cache_lock_all_buckets();
 	n_nchash = nchash + 1;	/* nchash is max index, not count */
 	used = 0;
 	maxlength = 0;
 
 	/* Scan hash tables for applicable entries */
 	for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
 		count = 0;
 		LIST_FOREACH(ncp, ncpp, nc_hash) {
 			count++;
 		}
 		if (count)
 			used++;
 		if (maxlength < count)
 			maxlength = count;
 	}
 	n_nchash = nchash + 1;
 	cache_unlock_all_buckets();
 	pct = (used * 100) / (n_nchash / 100);
 	error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req, &used, sizeof(used));
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength));
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req, &pct, sizeof(pct));
 	if (error)
 		return (error);
 	return (0);
 }
 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD|
     CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I",
     "nchash statistics (number of total/used buckets, maximum chain length, usage percentage)");
 #endif
 
 /*
  * Negative entries management
  *
  * A variation of LRU scheme is used. New entries are hashed into one of
  * numneglists cold lists. Entries get promoted to the hot list on first hit.
  *
  * The shrinker will demote hot list head and evict from the cold list in a
  * round-robin manner.
  */
 static void
 cache_negative_hit(struct namecache *ncp)
 {
 	struct neglist *neglist;
 
 	MPASS(ncp->nc_flag & NCF_NEGATIVE);
 	if (ncp->nc_flag & NCF_HOTNEGATIVE)
 		return;
 	neglist = NCP2NEGLIST(ncp);
 	mtx_lock(&ncneg_hot.nl_lock);
 	mtx_lock(&neglist->nl_lock);
 	if (!(ncp->nc_flag & NCF_HOTNEGATIVE)) {
 		numhotneg++;
 		TAILQ_REMOVE(&neglist->nl_list, ncp, nc_dst);
 		TAILQ_INSERT_TAIL(&ncneg_hot.nl_list, ncp, nc_dst);
 		ncp->nc_flag |= NCF_HOTNEGATIVE;
 	}
 	mtx_unlock(&neglist->nl_lock);
 	mtx_unlock(&ncneg_hot.nl_lock);
 }
 
 static void
 cache_negative_insert(struct namecache *ncp, bool neg_locked)
 {
 	struct neglist *neglist;
 
 	MPASS(ncp->nc_flag & NCF_NEGATIVE);
 	cache_assert_bucket_locked(ncp, RA_WLOCKED);
 	neglist = NCP2NEGLIST(ncp);
 	if (!neg_locked) {
 		mtx_lock(&neglist->nl_lock);
 	} else {
 		mtx_assert(&neglist->nl_lock, MA_OWNED);
 	}
 	TAILQ_INSERT_TAIL(&neglist->nl_list, ncp, nc_dst);
 	if (!neg_locked)
 		mtx_unlock(&neglist->nl_lock);
 	atomic_add_rel_long(&numneg, 1);
 }
 
 static void
 cache_negative_remove(struct namecache *ncp, bool neg_locked)
 {
 	struct neglist *neglist;
 	bool hot_locked = false;
 	bool list_locked = false;
 
 	MPASS(ncp->nc_flag & NCF_NEGATIVE);
 	cache_assert_bucket_locked(ncp, RA_WLOCKED);
 	neglist = NCP2NEGLIST(ncp);
 	if (!neg_locked) {
 		if (ncp->nc_flag & NCF_HOTNEGATIVE) {
 			hot_locked = true;
 			mtx_lock(&ncneg_hot.nl_lock);
 			if (!(ncp->nc_flag & NCF_HOTNEGATIVE)) {
 				list_locked = true;
 				mtx_lock(&neglist->nl_lock);
 			}
 		} else {
 			list_locked = true;
 			mtx_lock(&neglist->nl_lock);
 		}
 	}
 	if (ncp->nc_flag & NCF_HOTNEGATIVE) {
 		mtx_assert(&ncneg_hot.nl_lock, MA_OWNED);
 		TAILQ_REMOVE(&ncneg_hot.nl_list, ncp, nc_dst);
 		numhotneg--;
 	} else {
 		mtx_assert(&neglist->nl_lock, MA_OWNED);
 		TAILQ_REMOVE(&neglist->nl_list, ncp, nc_dst);
 	}
 	if (list_locked)
 		mtx_unlock(&neglist->nl_lock);
 	if (hot_locked)
 		mtx_unlock(&ncneg_hot.nl_lock);
 	atomic_subtract_rel_long(&numneg, 1);
 }
 
 static void
 cache_negative_shrink_select(int start, struct namecache **ncpp,
     struct neglist **neglistpp)
 {
 	struct neglist *neglist;
 	struct namecache *ncp;
 	int i;
 
 	*ncpp = ncp = NULL;
 	neglist = NULL;
 
 	for (i = start; i < numneglists; i++) {
 		neglist = &neglists[i];
 		if (TAILQ_FIRST(&neglist->nl_list) == NULL)
 			continue;
 		mtx_lock(&neglist->nl_lock);
 		ncp = TAILQ_FIRST(&neglist->nl_list);
 		if (ncp != NULL)
 			break;
 		mtx_unlock(&neglist->nl_lock);
 	}
 
 	*neglistpp = neglist;
 	*ncpp = ncp;
 }
 
 static void
 cache_negative_zap_one(void)
 {
 	struct namecache *ncp, *ncp2;
 	struct neglist *neglist;
 	struct mtx *dvlp;
 	struct rwlock *blp;
 
 	if (mtx_owner(&ncneg_shrink_lock) != NULL ||
 	    !mtx_trylock(&ncneg_shrink_lock)) {
 		counter_u64_add(shrinking_skipped, 1);
 		return;
 	}
 
 	mtx_lock(&ncneg_hot.nl_lock);
 	ncp = TAILQ_FIRST(&ncneg_hot.nl_list);
 	if (ncp != NULL) {
 		neglist = NCP2NEGLIST(ncp);
 		mtx_lock(&neglist->nl_lock);
 		TAILQ_REMOVE(&ncneg_hot.nl_list, ncp, nc_dst);
 		TAILQ_INSERT_TAIL(&neglist->nl_list, ncp, nc_dst);
 		ncp->nc_flag &= ~NCF_HOTNEGATIVE;
 		numhotneg--;
 		mtx_unlock(&neglist->nl_lock);
 	}
 	mtx_unlock(&ncneg_hot.nl_lock);
 
 	cache_negative_shrink_select(shrink_list_turn, &ncp, &neglist);
 	shrink_list_turn++;
 	if (shrink_list_turn == numneglists)
 		shrink_list_turn = 0;
 	if (ncp == NULL && shrink_list_turn == 0)
 		cache_negative_shrink_select(shrink_list_turn, &ncp, &neglist);
 	mtx_unlock(&ncneg_shrink_lock);
 	if (ncp == NULL)
 		return;
 
 	MPASS(ncp->nc_flag & NCF_NEGATIVE);
 	dvlp = VP2VNODELOCK(ncp->nc_dvp);
 	blp = NCP2BUCKETLOCK(ncp);
 	mtx_unlock(&neglist->nl_lock);
 	mtx_lock(dvlp);
 	rw_wlock(blp);
 	mtx_lock(&neglist->nl_lock);
 	ncp2 = TAILQ_FIRST(&neglist->nl_list);
 	if (ncp != ncp2 || dvlp != VP2VNODELOCK(ncp2->nc_dvp) ||
 	    blp != NCP2BUCKETLOCK(ncp2) || !(ncp2->nc_flag & NCF_NEGATIVE)) {
 		ncp = NULL;
 	} else {
 		SDT_PROBE2(vfs, namecache, shrink_negative, done, ncp->nc_dvp,
 		    ncp->nc_name);
 
 		cache_zap_locked(ncp, true);
 		counter_u64_add(numneg_evicted, 1);
 	}
 	mtx_unlock(&neglist->nl_lock);
 	rw_wunlock(blp);
 	mtx_unlock(dvlp);
 	cache_free(ncp);
 }
 
 /*
  * cache_zap_locked():
  *
  *   Removes a namecache entry from cache, whether it contains an actual
  *   pointer to a vnode or if it is just a negative cache entry.
  */
 static void
 cache_zap_locked(struct namecache *ncp, bool neg_locked)
 {
 
 	if (!(ncp->nc_flag & NCF_NEGATIVE))
 		cache_assert_vnode_locked(ncp->nc_vp);
 	cache_assert_vnode_locked(ncp->nc_dvp);
 	cache_assert_bucket_locked(ncp, RA_WLOCKED);
 
 	CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp,
 	    (ncp->nc_flag & NCF_NEGATIVE) ? NULL : ncp->nc_vp);
 	LIST_REMOVE(ncp, nc_hash);
 	if (!(ncp->nc_flag & NCF_NEGATIVE)) {
 		SDT_PROBE3(vfs, namecache, zap, done, ncp->nc_dvp,
 		    ncp->nc_name, ncp->nc_vp);
 		TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
 		if (ncp == ncp->nc_vp->v_cache_dd)
 			ncp->nc_vp->v_cache_dd = NULL;
 	} else {
 		SDT_PROBE2(vfs, namecache, zap_negative, done, ncp->nc_dvp,
 		    ncp->nc_name);
 		cache_negative_remove(ncp, neg_locked);
 	}
 	if (ncp->nc_flag & NCF_ISDOTDOT) {
 		if (ncp == ncp->nc_dvp->v_cache_dd)
 			ncp->nc_dvp->v_cache_dd = NULL;
 	} else {
 		LIST_REMOVE(ncp, nc_src);
 		if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
 			ncp->nc_flag |= NCF_DVDROP;
 			counter_u64_add(numcachehv, -1);
 		}
 	}
 	atomic_subtract_rel_long(&numcache, 1);
 }
 
 static void
 cache_zap_negative_locked_vnode_kl(struct namecache *ncp, struct vnode *vp)
 {
 	struct rwlock *blp;
 
 	MPASS(ncp->nc_dvp == vp);
 	MPASS(ncp->nc_flag & NCF_NEGATIVE);
 	cache_assert_vnode_locked(vp);
 
 	blp = NCP2BUCKETLOCK(ncp);
 	rw_wlock(blp);
 	cache_zap_locked(ncp, false);
 	rw_wunlock(blp);
 }
 
 static bool
 cache_zap_locked_vnode_kl2(struct namecache *ncp, struct vnode *vp,
     struct mtx **vlpp)
 {
 	struct mtx *pvlp, *vlp1, *vlp2, *to_unlock;
 	struct rwlock *blp;
 
 	MPASS(vp == ncp->nc_dvp || vp == ncp->nc_vp);
 	cache_assert_vnode_locked(vp);
 
 	if (ncp->nc_flag & NCF_NEGATIVE) {
 		if (*vlpp != NULL) {
 			mtx_unlock(*vlpp);
 			*vlpp = NULL;
 		}
 		cache_zap_negative_locked_vnode_kl(ncp, vp);
 		return (true);
 	}
 
 	pvlp = VP2VNODELOCK(vp);
 	blp = NCP2BUCKETLOCK(ncp);
 	vlp1 = VP2VNODELOCK(ncp->nc_dvp);
 	vlp2 = VP2VNODELOCK(ncp->nc_vp);
 
 	if (*vlpp == vlp1 || *vlpp == vlp2) {
 		to_unlock = *vlpp;
 		*vlpp = NULL;
 	} else {
 		if (*vlpp != NULL) {
 			mtx_unlock(*vlpp);
 			*vlpp = NULL;
 		}
 		cache_sort_vnodes(&vlp1, &vlp2);
 		if (vlp1 == pvlp) {
 			mtx_lock(vlp2);
 			to_unlock = vlp2;
 		} else {
 			if (!mtx_trylock(vlp1))
 				goto out_relock;
 			to_unlock = vlp1;
 		}
 	}
 	rw_wlock(blp);
 	cache_zap_locked(ncp, false);
 	rw_wunlock(blp);
 	if (to_unlock != NULL)
 		mtx_unlock(to_unlock);
 	return (true);
 
 out_relock:
 	mtx_unlock(vlp2);
 	mtx_lock(vlp1);
 	mtx_lock(vlp2);
 	MPASS(*vlpp == NULL);
 	*vlpp = vlp1;
 	return (false);
 }
 
 static int __noinline
 cache_zap_locked_vnode(struct namecache *ncp, struct vnode *vp)
 {
 	struct mtx *pvlp, *vlp1, *vlp2, *to_unlock;
 	struct rwlock *blp;
 	int error = 0;
 
 	MPASS(vp == ncp->nc_dvp || vp == ncp->nc_vp);
 	cache_assert_vnode_locked(vp);
 
 	pvlp = VP2VNODELOCK(vp);
 	if (ncp->nc_flag & NCF_NEGATIVE) {
 		cache_zap_negative_locked_vnode_kl(ncp, vp);
 		goto out;
 	}
 
 	blp = NCP2BUCKETLOCK(ncp);
 	vlp1 = VP2VNODELOCK(ncp->nc_dvp);
 	vlp2 = VP2VNODELOCK(ncp->nc_vp);
 	cache_sort_vnodes(&vlp1, &vlp2);
 	if (vlp1 == pvlp) {
 		mtx_lock(vlp2);
 		to_unlock = vlp2;
 	} else {
 		if (!mtx_trylock(vlp1)) {
 			error = EAGAIN;
 			goto out;
 		}
 		to_unlock = vlp1;
 	}
 	rw_wlock(blp);
 	cache_zap_locked(ncp, false);
 	rw_wunlock(blp);
 	mtx_unlock(to_unlock);
 out:
 	mtx_unlock(pvlp);
 	return (error);
 }
 
 /*
  * If trylocking failed we can get here. We know enough to take all needed locks
  * in the right order and re-lookup the entry.
  */
 static int
 cache_zap_unlocked_bucket(struct namecache *ncp, struct componentname *cnp,
     struct vnode *dvp, struct mtx *dvlp, struct mtx *vlp, uint32_t hash,
     struct rwlock *blp)
 {
 	struct namecache *rncp;
 
 	cache_assert_bucket_locked(ncp, RA_UNLOCKED);
 
 	cache_sort_vnodes(&dvlp, &vlp);
 	cache_lock_vnodes(dvlp, vlp);
 	rw_wlock(blp);
 	LIST_FOREACH(rncp, (NCHHASH(hash)), nc_hash) {
 		if (rncp == ncp && rncp->nc_dvp == dvp &&
 		    rncp->nc_nlen == cnp->cn_namelen &&
 		    !bcmp(rncp->nc_name, cnp->cn_nameptr, rncp->nc_nlen))
 			break;
 	}
 	if (rncp != NULL) {
 		cache_zap_locked(rncp, false);
 		rw_wunlock(blp);
 		cache_unlock_vnodes(dvlp, vlp);
 		counter_u64_add(zap_and_exit_bucket_relock_success, 1);
 		return (0);
 	}
 
 	rw_wunlock(blp);
 	cache_unlock_vnodes(dvlp, vlp);
 	return (EAGAIN);
 }
 
 static int __noinline
 cache_zap_wlocked_bucket(struct namecache *ncp, struct componentname *cnp,
     uint32_t hash, struct rwlock *blp)
 {
 	struct mtx *dvlp, *vlp;
 	struct vnode *dvp;
 
 	cache_assert_bucket_locked(ncp, RA_WLOCKED);
 
 	dvlp = VP2VNODELOCK(ncp->nc_dvp);
 	vlp = NULL;
 	if (!(ncp->nc_flag & NCF_NEGATIVE))
 		vlp = VP2VNODELOCK(ncp->nc_vp);
 	if (cache_trylock_vnodes(dvlp, vlp) == 0) {
 		cache_zap_locked(ncp, false);
 		rw_wunlock(blp);
 		cache_unlock_vnodes(dvlp, vlp);
 		return (0);
 	}
 
 	dvp = ncp->nc_dvp;
 	rw_wunlock(blp);
 	return (cache_zap_unlocked_bucket(ncp, cnp, dvp, dvlp, vlp, hash, blp));
 }
 
 static int __noinline
 cache_zap_rlocked_bucket(struct namecache *ncp, struct componentname *cnp,
     uint32_t hash, struct rwlock *blp)
 {
 	struct mtx *dvlp, *vlp;
 	struct vnode *dvp;
 
 	cache_assert_bucket_locked(ncp, RA_RLOCKED);
 
 	dvlp = VP2VNODELOCK(ncp->nc_dvp);
 	vlp = NULL;
 	if (!(ncp->nc_flag & NCF_NEGATIVE))
 		vlp = VP2VNODELOCK(ncp->nc_vp);
 	if (cache_trylock_vnodes(dvlp, vlp) == 0) {
 		rw_runlock(blp);
 		rw_wlock(blp);
 		cache_zap_locked(ncp, false);
 		rw_wunlock(blp);
 		cache_unlock_vnodes(dvlp, vlp);
 		return (0);
 	}
 
 	dvp = ncp->nc_dvp;
 	rw_runlock(blp);
 	return (cache_zap_unlocked_bucket(ncp, cnp, dvp, dvlp, vlp, hash, blp));
 }
 
 static int
 cache_zap_wlocked_bucket_kl(struct namecache *ncp, struct rwlock *blp,
     struct mtx **vlpp1, struct mtx **vlpp2)
 {
 	struct mtx *dvlp, *vlp;
 
 	cache_assert_bucket_locked(ncp, RA_WLOCKED);
 
 	dvlp = VP2VNODELOCK(ncp->nc_dvp);
 	vlp = NULL;
 	if (!(ncp->nc_flag & NCF_NEGATIVE))
 		vlp = VP2VNODELOCK(ncp->nc_vp);
 	cache_sort_vnodes(&dvlp, &vlp);
 
 	if (*vlpp1 == dvlp && *vlpp2 == vlp) {
 		cache_zap_locked(ncp, false);
 		cache_unlock_vnodes(dvlp, vlp);
 		*vlpp1 = NULL;
 		*vlpp2 = NULL;
 		return (0);
 	}
 
 	if (*vlpp1 != NULL)
 		mtx_unlock(*vlpp1);
 	if (*vlpp2 != NULL)
 		mtx_unlock(*vlpp2);
 	*vlpp1 = NULL;
 	*vlpp2 = NULL;
 
 	if (cache_trylock_vnodes(dvlp, vlp) == 0) {
 		cache_zap_locked(ncp, false);
 		cache_unlock_vnodes(dvlp, vlp);
 		return (0);
 	}
 
 	rw_wunlock(blp);
 	*vlpp1 = dvlp;
 	*vlpp2 = vlp;
 	if (*vlpp1 != NULL)
 		mtx_lock(*vlpp1);
 	mtx_lock(*vlpp2);
 	rw_wlock(blp);
 	return (EAGAIN);
 }
 
 static void
 cache_lookup_unlock(struct rwlock *blp, struct mtx *vlp)
 {
 
 	if (blp != NULL) {
 		rw_runlock(blp);
 	} else {
 		mtx_unlock(vlp);
 	}
 }
 
 static int __noinline
 cache_lookup_dot(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
     struct timespec *tsp, int *ticksp)
 {
 	int ltype;
 
 	*vpp = dvp;
 	CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .",
 			dvp, cnp->cn_nameptr);
 	counter_u64_add(dothits, 1);
 	SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ".", *vpp);
 	if (tsp != NULL)
 		timespecclear(tsp);
 	if (ticksp != NULL)
 		*ticksp = ticks;
 	vrefact(*vpp);
 	/*
 	 * When we lookup "." we still can be asked to lock it
 	 * differently...
 	 */
 	ltype = cnp->cn_lkflags & LK_TYPE_MASK;
 	if (ltype != VOP_ISLOCKED(*vpp)) {
 		if (ltype == LK_EXCLUSIVE) {
 			vn_lock(*vpp, LK_UPGRADE | LK_RETRY);
 			if (VN_IS_DOOMED((*vpp))) {
 				/* forced unmount */
 				vrele(*vpp);
 				*vpp = NULL;
 				return (ENOENT);
 			}
 		} else
 			vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY);
 	}
 	return (-1);
 }
 
 static __noinline int
 cache_lookup_nomakeentry(struct vnode *dvp, struct vnode **vpp,
     struct componentname *cnp, struct timespec *tsp, int *ticksp)
 {
 	struct namecache *ncp;
 	struct rwlock *blp;
 	struct mtx *dvlp, *dvlp2;
 	uint32_t hash;
 	int error;
 
 	if (cnp->cn_namelen == 2 &&
 	    cnp->cn_nameptr[0] == '.' && cnp->cn_nameptr[1] == '.') {
 		counter_u64_add(dotdothits, 1);
 		dvlp = VP2VNODELOCK(dvp);
 		dvlp2 = NULL;
 		mtx_lock(dvlp);
 retry_dotdot:
 		ncp = dvp->v_cache_dd;
 		if (ncp == NULL) {
 			SDT_PROBE3(vfs, namecache, lookup, miss, dvp,
 			    "..", NULL);
 			mtx_unlock(dvlp);
 			if (dvlp2 != NULL)
 				mtx_unlock(dvlp2);
 			return (0);
 		}
 		if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) {
 			if (ncp->nc_dvp != dvp)
 				panic("dvp %p v_cache_dd %p\n", dvp, ncp);
 			if (!cache_zap_locked_vnode_kl2(ncp,
 			    dvp, &dvlp2))
 				goto retry_dotdot;
 			MPASS(dvp->v_cache_dd == NULL);
 			mtx_unlock(dvlp);
 			if (dvlp2 != NULL)
 				mtx_unlock(dvlp2);
 			cache_free(ncp);
 		} else {
 			dvp->v_cache_dd = NULL;
 			mtx_unlock(dvlp);
 			if (dvlp2 != NULL)
 				mtx_unlock(dvlp2);
 		}
 		return (0);
 	}
 
 	hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
 	blp = HASH2BUCKETLOCK(hash);
 retry:
 	if (LIST_EMPTY(NCHHASH(hash)))
 		goto out_no_entry;
 
 	rw_wlock(blp);
 
 	LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
 		counter_u64_add(numchecks, 1);
 		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
 		    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
 			break;
 	}
 
 	/* We failed to find an entry */
 	if (ncp == NULL) {
 		rw_wunlock(blp);
 		goto out_no_entry;
 	}
 
 	error = cache_zap_wlocked_bucket(ncp, cnp, hash, blp);
 	if (__predict_false(error != 0)) {
 		zap_and_exit_bucket_fail++;
 		cache_maybe_yield();
 		goto retry;
 	}
 	counter_u64_add(numposzaps, 1);
 	cache_free(ncp);
 	return (0);
 out_no_entry:
 	SDT_PROBE3(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr, NULL);
 	counter_u64_add(nummisszap, 1);
 	return (0);
 }
 
 /**
  * Lookup a name in the name cache
  *
  * # Arguments
  *
  * - dvp:	Parent directory in which to search.
  * - vpp:	Return argument.  Will contain desired vnode on cache hit.
  * - cnp:	Parameters of the name search.  The most interesting bits of
  *   		the cn_flags field have the following meanings:
  *   	- MAKEENTRY:	If clear, free an entry from the cache rather than look
  *   			it up.
  *   	- ISDOTDOT:	Must be set if and only if cn_nameptr == ".."
  * - tsp:	Return storage for cache timestamp.  On a successful (positive
  *   		or negative) lookup, tsp will be filled with any timespec that
  *   		was stored when this cache entry was created.  However, it will
  *   		be clear for "." entries.
  * - ticks:	Return storage for alternate cache timestamp.  On a successful
  *   		(positive or negative) lookup, it will contain the ticks value
  *   		that was current when the cache entry was created, unless cnp
  *   		was ".".
  *
  * # Returns
  *
  * - -1:	A positive cache hit.  vpp will contain the desired vnode.
  * - ENOENT:	A negative cache hit, or dvp was recycled out from under us due
  *		to a forced unmount.  vpp will not be modified.  If the entry
  *		is a whiteout, then the ISWHITEOUT flag will be set in
  *		cnp->cn_flags.
  * - 0:		A cache miss.  vpp will not be modified.
  *
  * # Locking
  *
  * On a cache hit, vpp will be returned locked and ref'd.  If we're looking up
  * .., dvp is unlocked.  If we're looking up . an extra ref is taken, but the
  * lock is not recursively acquired.
  */
 int
 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
     struct timespec *tsp, int *ticksp)
 {
 	struct namecache_ts *ncp_ts;
 	struct namecache *ncp;
 	struct rwlock *blp;
 	struct mtx *dvlp;
 	uint32_t hash;
 	enum vgetstate vs;
 	int error, ltype;
 
 #ifdef DEBUG_CACHE
 	if (__predict_false(!doingcache)) {
 		cnp->cn_flags &= ~MAKEENTRY;
 		return (0);
 	}
 #endif
 
 	counter_u64_add(numcalls, 1);
 
 	if (__predict_false(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.'))
 		return (cache_lookup_dot(dvp, vpp, cnp, tsp, ticksp));
 
 	if ((cnp->cn_flags & MAKEENTRY) == 0)
 		return (cache_lookup_nomakeentry(dvp, vpp, cnp, tsp, ticksp));
 
 retry:
 	blp = NULL;
 	dvlp = NULL;
 	error = 0;
 	if (cnp->cn_namelen == 2 &&
 	    cnp->cn_nameptr[0] == '.' && cnp->cn_nameptr[1] == '.') {
 		counter_u64_add(dotdothits, 1);
 		dvlp = VP2VNODELOCK(dvp);
 		mtx_lock(dvlp);
 		ncp = dvp->v_cache_dd;
 		if (ncp == NULL) {
 			SDT_PROBE3(vfs, namecache, lookup, miss, dvp,
 			    "..", NULL);
 			mtx_unlock(dvlp);
 			return (0);
 		}
 		if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) {
 			if (ncp->nc_flag & NCF_NEGATIVE)
 				*vpp = NULL;
 			else
 				*vpp = ncp->nc_vp;
 		} else
 			*vpp = ncp->nc_dvp;
 		/* Return failure if negative entry was found. */
 		if (*vpp == NULL)
 			goto negative_success;
 		CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..",
 		    dvp, cnp->cn_nameptr, *vpp);
 		SDT_PROBE3(vfs, namecache, lookup, hit, dvp, "..",
 		    *vpp);
 		cache_out_ts(ncp, tsp, ticksp);
 		if ((ncp->nc_flag & (NCF_ISDOTDOT | NCF_DTS)) ==
 		    NCF_DTS && tsp != NULL) {
 			ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc);
 			*tsp = ncp_ts->nc_dotdottime;
 		}
 		goto success;
 	}
 
 	hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
 	blp = HASH2BUCKETLOCK(hash);
 	rw_rlock(blp);
 
 	LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
 		counter_u64_add(numchecks, 1);
 		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
 		    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
 			break;
 	}
 
 	/* We failed to find an entry */
 	if (__predict_false(ncp == NULL)) {
 		rw_runlock(blp);
 		SDT_PROBE3(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr,
 		    NULL);
 		counter_u64_add(nummiss, 1);
 		return (0);
 	}
 
 	if (ncp->nc_flag & NCF_NEGATIVE)
 		goto negative_success;
 
 	/* We found a "positive" match, return the vnode */
 	counter_u64_add(numposhits, 1);
 	*vpp = ncp->nc_vp;
 	CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
 	    dvp, cnp->cn_nameptr, *vpp, ncp);
 	SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name,
 	    *vpp);
 	cache_out_ts(ncp, tsp, ticksp);
 success:
 	/*
 	 * On success we return a locked and ref'd vnode as per the lookup
 	 * protocol.
 	 */
 	MPASS(dvp != *vpp);
 	ltype = 0;	/* silence gcc warning */
 	if (cnp->cn_flags & ISDOTDOT) {
 		ltype = VOP_ISLOCKED(dvp);
 		VOP_UNLOCK(dvp);
 	}
 	vs = vget_prep(*vpp);
 	cache_lookup_unlock(blp, dvlp);
 	error = vget_finish(*vpp, cnp->cn_lkflags, vs);
 	if (cnp->cn_flags & ISDOTDOT) {
 		vn_lock(dvp, ltype | LK_RETRY);
 		if (VN_IS_DOOMED(dvp)) {
 			if (error == 0)
 				vput(*vpp);
 			*vpp = NULL;
 			return (ENOENT);
 		}
 	}
 	if (error) {
 		*vpp = NULL;
 		goto retry;
 	}
 	if ((cnp->cn_flags & ISLASTCN) &&
 	    (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) {
 		ASSERT_VOP_ELOCKED(*vpp, "cache_lookup");
 	}
 	return (-1);
 
 negative_success:
 	/* We found a negative match, and want to create it, so purge */
 	if (cnp->cn_nameiop == CREATE) {
 		counter_u64_add(numnegzaps, 1);
 		goto zap_and_exit;
 	}
 
 	counter_u64_add(numneghits, 1);
 	cache_negative_hit(ncp);
 	if (ncp->nc_flag & NCF_WHITE)
 		cnp->cn_flags |= ISWHITEOUT;
 	SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp,
 	    ncp->nc_name);
 	cache_out_ts(ncp, tsp, ticksp);
 	cache_lookup_unlock(blp, dvlp);
 	return (ENOENT);
 
 zap_and_exit:
 	if (blp != NULL)
 		error = cache_zap_rlocked_bucket(ncp, cnp, hash, blp);
 	else
 		error = cache_zap_locked_vnode(ncp, dvp);
 	if (__predict_false(error != 0)) {
 		zap_and_exit_bucket_fail2++;
 		cache_maybe_yield();
 		goto retry;
 	}
 	cache_free(ncp);
 	return (0);
 }
 
 struct celockstate {
 	struct mtx *vlp[3];
 	struct rwlock *blp[2];
 };
 CTASSERT((nitems(((struct celockstate *)0)->vlp) == 3));
 CTASSERT((nitems(((struct celockstate *)0)->blp) == 2));
 
 static inline void
 cache_celockstate_init(struct celockstate *cel)
 {
 
 	bzero(cel, sizeof(*cel));
 }
 
 static void
 cache_lock_vnodes_cel(struct celockstate *cel, struct vnode *vp,
     struct vnode *dvp)
 {
 	struct mtx *vlp1, *vlp2;
 
 	MPASS(cel->vlp[0] == NULL);
 	MPASS(cel->vlp[1] == NULL);
 	MPASS(cel->vlp[2] == NULL);
 
 	MPASS(vp != NULL || dvp != NULL);
 
 	vlp1 = VP2VNODELOCK(vp);
 	vlp2 = VP2VNODELOCK(dvp);
 	cache_sort_vnodes(&vlp1, &vlp2);
 
 	if (vlp1 != NULL) {
 		mtx_lock(vlp1);
 		cel->vlp[0] = vlp1;
 	}
 	mtx_lock(vlp2);
 	cel->vlp[1] = vlp2;
 }
 
 static void
 cache_unlock_vnodes_cel(struct celockstate *cel)
 {
 
 	MPASS(cel->vlp[0] != NULL || cel->vlp[1] != NULL);
 
 	if (cel->vlp[0] != NULL)
 		mtx_unlock(cel->vlp[0]);
 	if (cel->vlp[1] != NULL)
 		mtx_unlock(cel->vlp[1]);
 	if (cel->vlp[2] != NULL)
 		mtx_unlock(cel->vlp[2]);
 }
 
 static bool
 cache_lock_vnodes_cel_3(struct celockstate *cel, struct vnode *vp)
 {
 	struct mtx *vlp;
 	bool ret;
 
 	cache_assert_vlp_locked(cel->vlp[0]);
 	cache_assert_vlp_locked(cel->vlp[1]);
 	MPASS(cel->vlp[2] == NULL);
 
 	MPASS(vp != NULL);
 	vlp = VP2VNODELOCK(vp);
 
 	ret = true;
 	if (vlp >= cel->vlp[1]) {
 		mtx_lock(vlp);
 	} else {
 		if (mtx_trylock(vlp))
 			goto out;
 		cache_lock_vnodes_cel_3_failures++;
 		cache_unlock_vnodes_cel(cel);
 		if (vlp < cel->vlp[0]) {
 			mtx_lock(vlp);
 			mtx_lock(cel->vlp[0]);
 			mtx_lock(cel->vlp[1]);
 		} else {
 			if (cel->vlp[0] != NULL)
 				mtx_lock(cel->vlp[0]);
 			mtx_lock(vlp);
 			mtx_lock(cel->vlp[1]);
 		}
 		ret = false;
 	}
 out:
 	cel->vlp[2] = vlp;
 	return (ret);
 }
 
 static void
 cache_lock_buckets_cel(struct celockstate *cel, struct rwlock *blp1,
     struct rwlock *blp2)
 {
 
 	MPASS(cel->blp[0] == NULL);
 	MPASS(cel->blp[1] == NULL);
 
 	cache_sort_vnodes(&blp1, &blp2);
 
 	if (blp1 != NULL) {
 		rw_wlock(blp1);
 		cel->blp[0] = blp1;
 	}
 	rw_wlock(blp2);
 	cel->blp[1] = blp2;
 }
 
 static void
 cache_unlock_buckets_cel(struct celockstate *cel)
 {
 
 	if (cel->blp[0] != NULL)
 		rw_wunlock(cel->blp[0]);
 	rw_wunlock(cel->blp[1]);
 }
 
 /*
  * Lock part of the cache affected by the insertion.
  *
  * This means vnodelocks for dvp, vp and the relevant bucketlock.
  * However, insertion can result in removal of an old entry. In this
  * case we have an additional vnode and bucketlock pair to lock. If the
  * entry is negative, ncelock is locked instead of the vnode.
  *
  * That is, in the worst case we have to lock 3 vnodes and 2 bucketlocks, while
  * preserving the locking order (smaller address first).
  */
 static void
 cache_enter_lock(struct celockstate *cel, struct vnode *dvp, struct vnode *vp,
     uint32_t hash)
 {
 	struct namecache *ncp;
 	struct rwlock *blps[2];
 
 	blps[0] = HASH2BUCKETLOCK(hash);
 	for (;;) {
 		blps[1] = NULL;
 		cache_lock_vnodes_cel(cel, dvp, vp);
 		if (vp == NULL || vp->v_type != VDIR)
 			break;
 		ncp = vp->v_cache_dd;
 		if (ncp == NULL)
 			break;
 		if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
 			break;
 		MPASS(ncp->nc_dvp == vp);
 		blps[1] = NCP2BUCKETLOCK(ncp);
 		if (ncp->nc_flag & NCF_NEGATIVE)
 			break;
 		if (cache_lock_vnodes_cel_3(cel, ncp->nc_vp))
 			break;
 		/*
 		 * All vnodes got re-locked. Re-validate the state and if
 		 * nothing changed we are done. Otherwise restart.
 		 */
 		if (ncp == vp->v_cache_dd &&
 		    (ncp->nc_flag & NCF_ISDOTDOT) != 0 &&
 		    blps[1] == NCP2BUCKETLOCK(ncp) &&
 		    VP2VNODELOCK(ncp->nc_vp) == cel->vlp[2])
 			break;
 		cache_unlock_vnodes_cel(cel);
 		cel->vlp[0] = NULL;
 		cel->vlp[1] = NULL;
 		cel->vlp[2] = NULL;
 	}
 	cache_lock_buckets_cel(cel, blps[0], blps[1]);
 }
 
 static void
 cache_enter_lock_dd(struct celockstate *cel, struct vnode *dvp, struct vnode *vp,
     uint32_t hash)
 {
 	struct namecache *ncp;
 	struct rwlock *blps[2];
 
 	blps[0] = HASH2BUCKETLOCK(hash);
 	for (;;) {
 		blps[1] = NULL;
 		cache_lock_vnodes_cel(cel, dvp, vp);
 		ncp = dvp->v_cache_dd;
 		if (ncp == NULL)
 			break;
 		if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
 			break;
 		MPASS(ncp->nc_dvp == dvp);
 		blps[1] = NCP2BUCKETLOCK(ncp);
 		if (ncp->nc_flag & NCF_NEGATIVE)
 			break;
 		if (cache_lock_vnodes_cel_3(cel, ncp->nc_vp))
 			break;
 		if (ncp == dvp->v_cache_dd &&
 		    (ncp->nc_flag & NCF_ISDOTDOT) != 0 &&
 		    blps[1] == NCP2BUCKETLOCK(ncp) &&
 		    VP2VNODELOCK(ncp->nc_vp) == cel->vlp[2])
 			break;
 		cache_unlock_vnodes_cel(cel);
 		cel->vlp[0] = NULL;
 		cel->vlp[1] = NULL;
 		cel->vlp[2] = NULL;
 	}
 	cache_lock_buckets_cel(cel, blps[0], blps[1]);
 }
 
 static void
 cache_enter_unlock(struct celockstate *cel)
 {
 
 	cache_unlock_buckets_cel(cel);
 	cache_unlock_vnodes_cel(cel);
 }
 
 static void __noinline
 cache_enter_dotdot_prep(struct vnode *dvp, struct vnode *vp,
     struct componentname *cnp)
 {
 	struct celockstate cel;
 	struct namecache *ncp;
 	uint32_t hash;
 	int len;
 
 	if (dvp->v_cache_dd == NULL)
 		return;
 	len = cnp->cn_namelen;
 	cache_celockstate_init(&cel);
 	hash = cache_get_hash(cnp->cn_nameptr, len, dvp);
 	cache_enter_lock_dd(&cel, dvp, vp, hash);
 	ncp = dvp->v_cache_dd;
 	if (ncp != NULL && (ncp->nc_flag & NCF_ISDOTDOT)) {
 		KASSERT(ncp->nc_dvp == dvp, ("wrong isdotdot parent"));
 		cache_zap_locked(ncp, false);
 	} else {
 		ncp = NULL;
 	}
 	dvp->v_cache_dd = NULL;
 	cache_enter_unlock(&cel);
 	cache_free(ncp);
 }
 
 /*
  * Add an entry to the cache.
  */
 void
 cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
     struct timespec *tsp, struct timespec *dtsp)
 {
 	struct celockstate cel;
 	struct namecache *ncp, *n2, *ndd;
 	struct namecache_ts *ncp_ts, *n2_ts;
 	struct nchashhead *ncpp;
 	uint32_t hash;
 	int flag;
 	int len;
 	u_long lnumcache;
 
 	CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr);
 	VNASSERT(vp == NULL || !VN_IS_DOOMED(vp), vp,
 	    ("cache_enter: Adding a doomed vnode"));
 	VNASSERT(dvp == NULL || !VN_IS_DOOMED(dvp), dvp,
 	    ("cache_enter: Doomed vnode used as src"));
 
 #ifdef DEBUG_CACHE
 	if (__predict_false(!doingcache))
 		return;
 #endif
 
 	flag = 0;
 	if (__predict_false(cnp->cn_nameptr[0] == '.')) {
 		if (cnp->cn_namelen == 1)
 			return;
 		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
 			cache_enter_dotdot_prep(dvp, vp, cnp);
 			flag = NCF_ISDOTDOT;
 		}
 	}
 
 	/*
 	 * Avoid blowout in namecache entries.
 	 */
 	lnumcache = atomic_fetchadd_long(&numcache, 1) + 1;
 	if (__predict_false(lnumcache >= ncsize)) {
 		atomic_add_long(&numcache, -1);
 		return;
 	}
 
 	cache_celockstate_init(&cel);
 	ndd = NULL;
 	ncp_ts = NULL;
 
 	/*
 	 * Calculate the hash key and setup as much of the new
 	 * namecache entry as possible before acquiring the lock.
 	 */
 	ncp = cache_alloc(cnp->cn_namelen, tsp != NULL);
 	ncp->nc_flag = flag;
 	ncp->nc_vp = vp;
 	if (vp == NULL)
 		ncp->nc_flag |= NCF_NEGATIVE;
 	ncp->nc_dvp = dvp;
 	if (tsp != NULL) {
 		ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc);
 		ncp_ts->nc_time = *tsp;
 		ncp_ts->nc_ticks = ticks;
 		ncp_ts->nc_nc.nc_flag |= NCF_TS;
 		if (dtsp != NULL) {
 			ncp_ts->nc_dotdottime = *dtsp;
 			ncp_ts->nc_nc.nc_flag |= NCF_DTS;
 		}
 	}
 	len = ncp->nc_nlen = cnp->cn_namelen;
 	hash = cache_get_hash(cnp->cn_nameptr, len, dvp);
 	strlcpy(ncp->nc_name, cnp->cn_nameptr, len + 1);
 	cache_enter_lock(&cel, dvp, vp, hash);
 
 	/*
 	 * See if this vnode or negative entry is already in the cache
 	 * with this name.  This can happen with concurrent lookups of
 	 * the same path name.
 	 */
 	ncpp = NCHHASH(hash);
 	LIST_FOREACH(n2, ncpp, nc_hash) {
 		if (n2->nc_dvp == dvp &&
 		    n2->nc_nlen == cnp->cn_namelen &&
 		    !bcmp(n2->nc_name, cnp->cn_nameptr, n2->nc_nlen)) {
 			if (tsp != NULL) {
 				KASSERT((n2->nc_flag & NCF_TS) != 0,
 				    ("no NCF_TS"));
 				n2_ts = __containerof(n2, struct namecache_ts, nc_nc);
 				n2_ts->nc_time = ncp_ts->nc_time;
 				n2_ts->nc_ticks = ncp_ts->nc_ticks;
 				if (dtsp != NULL) {
 					n2_ts->nc_dotdottime = ncp_ts->nc_dotdottime;
 					if (ncp->nc_flag & NCF_NEGATIVE)
 						mtx_lock(&ncneg_hot.nl_lock);
 					n2_ts->nc_nc.nc_flag |= NCF_DTS;
 					if (ncp->nc_flag & NCF_NEGATIVE)
 						mtx_unlock(&ncneg_hot.nl_lock);
 				}
 			}
 			goto out_unlock_free;
 		}
 	}
 
 	if (flag == NCF_ISDOTDOT) {
 		/*
 		 * See if we are trying to add .. entry, but some other lookup
 		 * has populated v_cache_dd pointer already.
 		 */
 		if (dvp->v_cache_dd != NULL)
 			goto out_unlock_free;
 		KASSERT(vp == NULL || vp->v_type == VDIR,
 		    ("wrong vnode type %p", vp));
 		dvp->v_cache_dd = ncp;
 	}
 
 	if (vp != NULL) {
 		if (vp->v_type == VDIR) {
 			if (flag != NCF_ISDOTDOT) {
 				/*
 				 * For this case, the cache entry maps both the
 				 * directory name in it and the name ".." for the
 				 * directory's parent.
 				 */
 				if ((ndd = vp->v_cache_dd) != NULL) {
 					if ((ndd->nc_flag & NCF_ISDOTDOT) != 0)
 						cache_zap_locked(ndd, false);
 					else
 						ndd = NULL;
 				}
 				vp->v_cache_dd = ncp;
 			}
 		} else {
 			vp->v_cache_dd = NULL;
 		}
 	}
 
 	if (flag != NCF_ISDOTDOT) {
 		if (LIST_EMPTY(&dvp->v_cache_src)) {
 			vhold(dvp);
 			counter_u64_add(numcachehv, 1);
 		}
 		LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
 	}
 
 	/*
 	 * Insert the new namecache entry into the appropriate chain
 	 * within the cache entries table.
 	 */
 	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
 
 	/*
 	 * If the entry is "negative", we place it into the
 	 * "negative" cache queue, otherwise, we place it into the
 	 * destination vnode's cache entries queue.
 	 */
 	if (vp != NULL) {
 		TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
 		SDT_PROBE3(vfs, namecache, enter, done, dvp, ncp->nc_name,
 		    vp);
 	} else {
 		if (cnp->cn_flags & ISWHITEOUT)
 			ncp->nc_flag |= NCF_WHITE;
 		cache_negative_insert(ncp, false);
 		SDT_PROBE2(vfs, namecache, enter_negative, done, dvp,
 		    ncp->nc_name);
 	}
 	cache_enter_unlock(&cel);
 	if (numneg * ncnegfactor > lnumcache)
 		cache_negative_zap_one();
 	cache_free(ndd);
 	return;
 out_unlock_free:
 	cache_enter_unlock(&cel);
 	cache_free(ncp);
 	return;
 }
 
 static u_int
 cache_roundup_2(u_int val)
 {
 	u_int res;
 
 	for (res = 1; res <= val; res <<= 1)
 		continue;
 
 	return (res);
 }
 
 /*
  * Name cache initialization, from vfs_init() when we are booting
  */
 static void
 nchinit(void *dummy __unused)
 {
 	u_int i;
 
 	cache_zone_small = uma_zcreate("S VFS Cache",
 	    sizeof(struct namecache) + CACHE_PATH_CUTOFF + 1,
 	    NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct namecache),
 	    UMA_ZONE_ZINIT);
 	cache_zone_small_ts = uma_zcreate("STS VFS Cache",
 	    sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF + 1,
 	    NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct namecache_ts),
 	    UMA_ZONE_ZINIT);
 	cache_zone_large = uma_zcreate("L VFS Cache",
 	    sizeof(struct namecache) + NAME_MAX + 1,
 	    NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct namecache),
 	    UMA_ZONE_ZINIT);
 	cache_zone_large_ts = uma_zcreate("LTS VFS Cache",
 	    sizeof(struct namecache_ts) + NAME_MAX + 1,
 	    NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct namecache_ts),
 	    UMA_ZONE_ZINIT);
 
 	ncsize = desiredvnodes * ncsizefactor;
 	nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
 	ncbuckethash = cache_roundup_2(mp_ncpus * mp_ncpus) - 1;
 	if (ncbuckethash < 7) /* arbitrarily chosen to avoid having one lock */
 		ncbuckethash = 7;
 	if (ncbuckethash > nchash)
 		ncbuckethash = nchash;
 	bucketlocks = malloc(sizeof(*bucketlocks) * numbucketlocks, M_VFSCACHE,
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < numbucketlocks; i++)
 		rw_init_flags(&bucketlocks[i], "ncbuc", RW_DUPOK | RW_RECURSE);
 	ncvnodehash = ncbuckethash;
 	vnodelocks = malloc(sizeof(*vnodelocks) * numvnodelocks, M_VFSCACHE,
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < numvnodelocks; i++)
 		mtx_init(&vnodelocks[i], "ncvn", NULL, MTX_DUPOK | MTX_RECURSE);
 	ncpurgeminvnodes = numbucketlocks * 2;
 
 	ncneghash = 3;
 	neglists = malloc(sizeof(*neglists) * numneglists, M_VFSCACHE,
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < numneglists; i++) {
 		mtx_init(&neglists[i].nl_lock, "ncnegl", NULL, MTX_DEF);
 		TAILQ_INIT(&neglists[i].nl_list);
 	}
 	mtx_init(&ncneg_hot.nl_lock, "ncneglh", NULL, MTX_DEF);
 	TAILQ_INIT(&ncneg_hot.nl_list);
 
 	mtx_init(&ncneg_shrink_lock, "ncnegs", NULL, MTX_DEF);
 
 	numcachehv = counter_u64_alloc(M_WAITOK);
 	numcalls = counter_u64_alloc(M_WAITOK);
 	dothits = counter_u64_alloc(M_WAITOK);
 	dotdothits = counter_u64_alloc(M_WAITOK);
 	numchecks = counter_u64_alloc(M_WAITOK);
 	nummiss = counter_u64_alloc(M_WAITOK);
 	nummisszap = counter_u64_alloc(M_WAITOK);
 	numposzaps = counter_u64_alloc(M_WAITOK);
 	numposhits = counter_u64_alloc(M_WAITOK);
 	numnegzaps = counter_u64_alloc(M_WAITOK);
 	numneghits = counter_u64_alloc(M_WAITOK);
 	numfullpathcalls = counter_u64_alloc(M_WAITOK);
 	numfullpathfail1 = counter_u64_alloc(M_WAITOK);
 	numfullpathfail2 = counter_u64_alloc(M_WAITOK);
 	numfullpathfail4 = counter_u64_alloc(M_WAITOK);
 	numfullpathfound = counter_u64_alloc(M_WAITOK);
 	zap_and_exit_bucket_relock_success = counter_u64_alloc(M_WAITOK);
 	numneg_evicted = counter_u64_alloc(M_WAITOK);
 	shrinking_skipped = counter_u64_alloc(M_WAITOK);
 }
 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL);
 
 void
 cache_changesize(u_long newmaxvnodes)
 {
 	struct nchashhead *new_nchashtbl, *old_nchashtbl;
 	u_long new_nchash, old_nchash;
 	struct namecache *ncp;
 	uint32_t hash;
 	u_long newncsize;
 	int i;
 
 	newncsize = newmaxvnodes * ncsizefactor;
 	newmaxvnodes = cache_roundup_2(newmaxvnodes * 2);
 	if (newmaxvnodes < numbucketlocks)
 		newmaxvnodes = numbucketlocks;
 
 	new_nchashtbl = hashinit(newmaxvnodes, M_VFSCACHE, &new_nchash);
 	/* If same hash table size, nothing to do */
 	if (nchash == new_nchash) {
 		free(new_nchashtbl, M_VFSCACHE);
 		return;
 	}
 	/*
 	 * Move everything from the old hash table to the new table.
 	 * None of the namecache entries in the table can be removed
 	 * because to do so, they have to be removed from the hash table.
 	 */
 	cache_lock_all_vnodes();
 	cache_lock_all_buckets();
 	old_nchashtbl = nchashtbl;
 	old_nchash = nchash;
 	nchashtbl = new_nchashtbl;
 	nchash = new_nchash;
 	for (i = 0; i <= old_nchash; i++) {
 		while ((ncp = LIST_FIRST(&old_nchashtbl[i])) != NULL) {
 			hash = cache_get_hash(ncp->nc_name, ncp->nc_nlen,
 			    ncp->nc_dvp);
 			LIST_REMOVE(ncp, nc_hash);
 			LIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash);
 		}
 	}
 	ncsize = newncsize;
 	cache_unlock_all_buckets();
 	cache_unlock_all_vnodes();
 	free(old_nchashtbl, M_VFSCACHE);
 }
 
 /*
  * Invalidate all entries from and to a particular vnode.
  */
 void
 cache_purge(struct vnode *vp)
 {
 	TAILQ_HEAD(, namecache) ncps;
 	struct namecache *ncp, *nnp;
 	struct mtx *vlp, *vlp2;
 
 	CTR1(KTR_VFS, "cache_purge(%p)", vp);
 	SDT_PROBE1(vfs, namecache, purge, done, vp);
 	if (LIST_EMPTY(&vp->v_cache_src) && TAILQ_EMPTY(&vp->v_cache_dst) &&
 	    vp->v_cache_dd == NULL)
 		return;
 	TAILQ_INIT(&ncps);
 	vlp = VP2VNODELOCK(vp);
 	vlp2 = NULL;
 	mtx_lock(vlp);
 retry:
 	while (!LIST_EMPTY(&vp->v_cache_src)) {
 		ncp = LIST_FIRST(&vp->v_cache_src);
 		if (!cache_zap_locked_vnode_kl2(ncp, vp, &vlp2))
 			goto retry;
 		TAILQ_INSERT_TAIL(&ncps, ncp, nc_dst);
 	}
 	while (!TAILQ_EMPTY(&vp->v_cache_dst)) {
 		ncp = TAILQ_FIRST(&vp->v_cache_dst);
 		if (!cache_zap_locked_vnode_kl2(ncp, vp, &vlp2))
 			goto retry;
 		TAILQ_INSERT_TAIL(&ncps, ncp, nc_dst);
 	}
 	ncp = vp->v_cache_dd;
 	if (ncp != NULL) {
 		KASSERT(ncp->nc_flag & NCF_ISDOTDOT,
 		   ("lost dotdot link"));
 		if (!cache_zap_locked_vnode_kl2(ncp, vp, &vlp2))
 			goto retry;
 		TAILQ_INSERT_TAIL(&ncps, ncp, nc_dst);
 	}
 	KASSERT(vp->v_cache_dd == NULL, ("incomplete purge"));
 	mtx_unlock(vlp);
 	if (vlp2 != NULL)
 		mtx_unlock(vlp2);
 	TAILQ_FOREACH_SAFE(ncp, &ncps, nc_dst, nnp) {
 		cache_free(ncp);
 	}
 }
 
 /*
  * Invalidate all negative entries for a particular directory vnode.
  */
 void
 cache_purge_negative(struct vnode *vp)
 {
 	TAILQ_HEAD(, namecache) ncps;
 	struct namecache *ncp, *nnp;
 	struct mtx *vlp;
 
 	CTR1(KTR_VFS, "cache_purge_negative(%p)", vp);
 	SDT_PROBE1(vfs, namecache, purge_negative, done, vp);
 	if (LIST_EMPTY(&vp->v_cache_src))
 		return;
 	TAILQ_INIT(&ncps);
 	vlp = VP2VNODELOCK(vp);
 	mtx_lock(vlp);
 	LIST_FOREACH_SAFE(ncp, &vp->v_cache_src, nc_src, nnp) {
 		if (!(ncp->nc_flag & NCF_NEGATIVE))
 			continue;
 		cache_zap_negative_locked_vnode_kl(ncp, vp);
 		TAILQ_INSERT_TAIL(&ncps, ncp, nc_dst);
 	}
 	mtx_unlock(vlp);
 	TAILQ_FOREACH_SAFE(ncp, &ncps, nc_dst, nnp) {
 		cache_free(ncp);
 	}
 }
 
 /*
  * Flush all entries referencing a particular filesystem.
  */
 void
 cache_purgevfs(struct mount *mp, bool force)
 {
 	TAILQ_HEAD(, namecache) ncps;
 	struct mtx *vlp1, *vlp2;
 	struct rwlock *blp;
 	struct nchashhead *bucket;
 	struct namecache *ncp, *nnp;
 	u_long i, j, n_nchash;
 	int error;
 
 	/* Scan hash tables for applicable entries */
 	SDT_PROBE1(vfs, namecache, purgevfs, done, mp);
 	if (!force && mp->mnt_nvnodelistsize <= ncpurgeminvnodes)
 		return;
 	TAILQ_INIT(&ncps);
 	n_nchash = nchash + 1;
 	vlp1 = vlp2 = NULL;
 	for (i = 0; i < numbucketlocks; i++) {
 		blp = (struct rwlock *)&bucketlocks[i];
 		rw_wlock(blp);
 		for (j = i; j < n_nchash; j += numbucketlocks) {
 retry:
 			bucket = &nchashtbl[j];
 			LIST_FOREACH_SAFE(ncp, bucket, nc_hash, nnp) {
 				cache_assert_bucket_locked(ncp, RA_WLOCKED);
 				if (ncp->nc_dvp->v_mount != mp)
 					continue;
 				error = cache_zap_wlocked_bucket_kl(ncp, blp,
 				    &vlp1, &vlp2);
 				if (error != 0)
 					goto retry;
 				TAILQ_INSERT_HEAD(&ncps, ncp, nc_dst);
 			}
 		}
 		rw_wunlock(blp);
 		if (vlp1 == NULL && vlp2 == NULL)
 			cache_maybe_yield();
 	}
 	if (vlp1 != NULL)
 		mtx_unlock(vlp1);
 	if (vlp2 != NULL)
 		mtx_unlock(vlp2);
 
 	TAILQ_FOREACH_SAFE(ncp, &ncps, nc_dst, nnp) {
 		cache_free(ncp);
 	}
 }
 
 /*
  * Perform canonical checks and cache lookup and pass on to filesystem
  * through the vop_cachedlookup only if needed.
  */
 
 int
 vfs_cache_lookup(struct vop_lookup_args *ap)
 {
 	struct vnode *dvp;
 	int error;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct ucred *cred = cnp->cn_cred;
 	int flags = cnp->cn_flags;
 	struct thread *td = cnp->cn_thread;
 
 	*vpp = NULL;
 	dvp = ap->a_dvp;
 
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 
 	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 
 	error = VOP_ACCESS(dvp, VEXEC, cred, td);
 	if (error)
 		return (error);
 
 	error = cache_lookup(dvp, vpp, cnp, NULL, NULL);
 	if (error == 0)
 		return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
 	if (error == -1)
 		return (0);
 	return (error);
 }
 
-/*
- * XXX All of these sysctls would probably be more productive dead.
- */
-static int __read_mostly disablecwd;
-SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
-   "Disable the getcwd syscall");
-
 /* Implementation of the getcwd syscall. */
 int
 sys___getcwd(struct thread *td, struct __getcwd_args *uap)
 {
+	char *buf, *retbuf;
+	size_t buflen;
+	int error;
 
-	return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen,
-	    MAXPATHLEN));
+	buflen = uap->buflen;
+	if (__predict_false(buflen < 2))
+		return (EINVAL);
+	if (buflen > MAXPATHLEN)
+		buflen = MAXPATHLEN;
+
+	buf = malloc(buflen, M_TEMP, M_WAITOK);
+	error = vn_getcwd(td, buf, &retbuf, &buflen);
+	if (error == 0)
+		error = copyout(retbuf, uap->buf, buflen);
+	free(buf, M_TEMP);
+	return (error);
 }
 
 int
-kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg, size_t buflen,
-    size_t path_max)
+vn_getcwd(struct thread *td, char *buf, char **retbuf, size_t *buflen)
 {
-	char *bp, *tmpbuf;
 	struct filedesc *fdp;
 	struct vnode *cdir, *rdir;
 	int error;
 
-	if (__predict_false(disablecwd))
-		return (ENODEV);
-	if (__predict_false(buflen < 2))
-		return (EINVAL);
-	if (buflen > path_max)
-		buflen = path_max;
-
-	tmpbuf = malloc(buflen, M_TEMP, M_WAITOK);
 	fdp = td->td_proc->p_fd;
 	FILEDESC_SLOCK(fdp);
 	cdir = fdp->fd_cdir;
 	vrefact(cdir);
 	rdir = fdp->fd_rdir;
 	vrefact(rdir);
 	FILEDESC_SUNLOCK(fdp);
-	error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen);
+	error = vn_fullpath1(td, cdir, rdir, buf, retbuf, buflen);
 	vrele(rdir);
 	vrele(cdir);
 
-	if (!error) {
-		if (bufseg == UIO_SYSSPACE)
-			bcopy(bp, buf, strlen(bp) + 1);
-		else
-			error = copyout(bp, buf, strlen(bp) + 1);
 #ifdef KTRACE
-	if (KTRPOINT(curthread, KTR_NAMEI))
-		ktrnamei(bp);
+	if (KTRPOINT(curthread, KTR_NAMEI) && error == 0)
+		ktrnamei(*retbuf);
 #endif
-	}
-	free(tmpbuf, M_TEMP);
 	return (error);
 }
 
 /*
- * Thus begins the fullpath magic.
- */
-
-static int __read_mostly disablefullpath;
-SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
-    "Disable the vn_fullpath function");
-
-/*
  * Retrieve the full filesystem path that correspond to a vnode from the name
  * cache (if available)
  */
 int
 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
 {
 	char *buf;
 	struct filedesc *fdp;
 	struct vnode *rdir;
+	size_t buflen;
 	int error;
 
-	if (__predict_false(disablefullpath))
-		return (ENODEV);
 	if (__predict_false(vn == NULL))
 		return (EINVAL);
 
-	buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
+	buflen = MAXPATHLEN;
+	buf = malloc(buflen, M_TEMP, M_WAITOK);
 	fdp = td->td_proc->p_fd;
 	FILEDESC_SLOCK(fdp);
 	rdir = fdp->fd_rdir;
 	vrefact(rdir);
 	FILEDESC_SUNLOCK(fdp);
-	error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN);
+	error = vn_fullpath1(td, vn, rdir, buf, retbuf, &buflen);
 	vrele(rdir);
 
 	if (!error)
 		*freebuf = buf;
 	else
 		free(buf, M_TEMP);
 	return (error);
 }
 
 /*
  * This function is similar to vn_fullpath, but it attempts to lookup the
  * pathname relative to the global root mount point.  This is required for the
  * auditing sub-system, as audited pathnames must be absolute, relative to the
  * global root mount point.
  */
 int
 vn_fullpath_global(struct thread *td, struct vnode *vn,
     char **retbuf, char **freebuf)
 {
 	char *buf;
+	size_t buflen;
 	int error;
 
-	if (__predict_false(disablefullpath))
-		return (ENODEV);
 	if (__predict_false(vn == NULL))
 		return (EINVAL);
-	buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
-	error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN);
+	buflen = MAXPATHLEN;
+	buf = malloc(buflen, M_TEMP, M_WAITOK);
+	error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, &buflen);
 	if (!error)
 		*freebuf = buf;
 	else
 		free(buf, M_TEMP);
 	return (error);
 }
 
 int
-vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen)
+vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, size_t *buflen)
 {
 	struct vnode *dvp;
 	struct namecache *ncp;
 	struct mtx *vlp;
 	int error;
 
 	vlp = VP2VNODELOCK(*vp);
 	mtx_lock(vlp);
 	TAILQ_FOREACH(ncp, &((*vp)->v_cache_dst), nc_dst) {
 		if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
 			break;
 	}
 	if (ncp != NULL) {
 		if (*buflen < ncp->nc_nlen) {
 			mtx_unlock(vlp);
 			vrele(*vp);
 			counter_u64_add(numfullpathfail4, 1);
 			error = ENOMEM;
 			SDT_PROBE3(vfs, namecache, fullpath, return, error,
 			    vp, NULL);
 			return (error);
 		}
 		*buflen -= ncp->nc_nlen;
 		memcpy(buf + *buflen, ncp->nc_name, ncp->nc_nlen);
 		SDT_PROBE3(vfs, namecache, fullpath, hit, ncp->nc_dvp,
 		    ncp->nc_name, vp);
 		dvp = *vp;
 		*vp = ncp->nc_dvp;
 		vref(*vp);
 		mtx_unlock(vlp);
 		vrele(dvp);
 		return (0);
 	}
 	SDT_PROBE1(vfs, namecache, fullpath, miss, vp);
 
 	mtx_unlock(vlp);
 	vn_lock(*vp, LK_SHARED | LK_RETRY);
 	error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen);
 	vput(*vp);
 	if (error) {
 		counter_u64_add(numfullpathfail2, 1);
 		SDT_PROBE3(vfs, namecache, fullpath, return,  error, vp, NULL);
 		return (error);
 	}
 
 	*vp = dvp;
 	if (VN_IS_DOOMED(dvp)) {
 		/* forced unmount */
 		vrele(dvp);
 		error = ENOENT;
 		SDT_PROBE3(vfs, namecache, fullpath, return, error, vp, NULL);
 		return (error);
 	}
 	/*
 	 * *vp has its use count incremented still.
 	 */
 
 	return (0);
 }
 
 /*
- * The magic behind kern___getcwd() and vn_fullpath().
+ * The magic behind vn_getcwd() and vn_fullpath().
  */
 static int
 vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
-    char *buf, char **retbuf, u_int buflen)
+    char *buf, char **retbuf, size_t *len)
 {
 	int error, slash_prefixed;
 #ifdef KDTRACE_HOOKS
 	struct vnode *startvp = vp;
 #endif
 	struct vnode *vp1;
+	size_t buflen;
 
+	buflen = *len;
+
 	buflen--;
 	buf[buflen] = '\0';
 	error = 0;
 	slash_prefixed = 0;
 
 	SDT_PROBE1(vfs, namecache, fullpath, entry, vp);
 	counter_u64_add(numfullpathcalls, 1);
 	vref(vp);
 	if (vp->v_type != VDIR) {
 		error = vn_vptocnp(&vp, td->td_ucred, buf, &buflen);
 		if (error)
 			return (error);
 		if (buflen == 0) {
 			vrele(vp);
 			return (ENOMEM);
 		}
 		buf[--buflen] = '/';
 		slash_prefixed = 1;
 	}
 	while (vp != rdir && vp != rootvnode) {
 		/*
 		 * The vp vnode must be already fully constructed,
 		 * since it is either found in namecache or obtained
 		 * from VOP_VPTOCNP().  We may test for VV_ROOT safely
 		 * without obtaining the vnode lock.
 		 */
 		if ((vp->v_vflag & VV_ROOT) != 0) {
 			vn_lock(vp, LK_RETRY | LK_SHARED);
 
 			/*
 			 * With the vnode locked, check for races with
 			 * unmount, forced or not.  Note that we
 			 * already verified that vp is not equal to
 			 * the root vnode, which means that
 			 * mnt_vnodecovered can be NULL only for the
 			 * case of unmount.
 			 */
 			if (VN_IS_DOOMED(vp) ||
 			    (vp1 = vp->v_mount->mnt_vnodecovered) == NULL ||
 			    vp1->v_mountedhere != vp->v_mount) {
 				vput(vp);
 				error = ENOENT;
 				SDT_PROBE3(vfs, namecache, fullpath, return,
 				    error, vp, NULL);
 				break;
 			}
 
 			vref(vp1);
 			vput(vp);
 			vp = vp1;
 			continue;
 		}
 		if (vp->v_type != VDIR) {
 			vrele(vp);
 			counter_u64_add(numfullpathfail1, 1);
 			error = ENOTDIR;
 			SDT_PROBE3(vfs, namecache, fullpath, return,
 			    error, vp, NULL);
 			break;
 		}
 		error = vn_vptocnp(&vp, td->td_ucred, buf, &buflen);
 		if (error)
 			break;
 		if (buflen == 0) {
 			vrele(vp);
 			error = ENOMEM;
 			SDT_PROBE3(vfs, namecache, fullpath, return, error,
 			    startvp, NULL);
 			break;
 		}
 		buf[--buflen] = '/';
 		slash_prefixed = 1;
 	}
 	if (error)
 		return (error);
 	if (!slash_prefixed) {
 		if (buflen == 0) {
 			vrele(vp);
 			counter_u64_add(numfullpathfail4, 1);
 			SDT_PROBE3(vfs, namecache, fullpath, return, ENOMEM,
 			    startvp, NULL);
 			return (ENOMEM);
 		}
 		buf[--buflen] = '/';
 	}
 	counter_u64_add(numfullpathfound, 1);
 	vrele(vp);
 
 	SDT_PROBE3(vfs, namecache, fullpath, return, 0, startvp, buf + buflen);
 	*retbuf = buf + buflen;
+	*len -= buflen;
 	return (0);
 }
 
 struct vnode *
 vn_dir_dd_ino(struct vnode *vp)
 {
 	struct namecache *ncp;
 	struct vnode *ddvp;
 	struct mtx *vlp;
 	enum vgetstate vs;
 
 	ASSERT_VOP_LOCKED(vp, "vn_dir_dd_ino");
 	vlp = VP2VNODELOCK(vp);
 	mtx_lock(vlp);
 	TAILQ_FOREACH(ncp, &(vp->v_cache_dst), nc_dst) {
 		if ((ncp->nc_flag & NCF_ISDOTDOT) != 0)
 			continue;
 		ddvp = ncp->nc_dvp;
 		vs = vget_prep(ddvp);
 		mtx_unlock(vlp);
 		if (vget_finish(ddvp, LK_SHARED | LK_NOWAIT, vs))
 			return (NULL);
 		return (ddvp);
 	}
 	mtx_unlock(vlp);
 	return (NULL);
 }
 
 int
 vn_commname(struct vnode *vp, char *buf, u_int buflen)
 {
 	struct namecache *ncp;
 	struct mtx *vlp;
 	int l;
 
 	vlp = VP2VNODELOCK(vp);
 	mtx_lock(vlp);
 	TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst)
 		if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
 			break;
 	if (ncp == NULL) {
 		mtx_unlock(vlp);
 		return (ENOENT);
 	}
 	l = min(ncp->nc_nlen, buflen - 1);
 	memcpy(buf, ncp->nc_name, l);
 	mtx_unlock(vlp);
 	buf[l] = '\0';
 	return (0);
 }
 
 /*
  * This function updates path string to vnode's full global path
  * and checks the size of the new path string against the pathlen argument.
  *
  * Requires a locked, referenced vnode.
  * Vnode is re-locked on success or ENODEV, otherwise unlocked.
  *
- * If sysctl debug.disablefullpath is set, ENODEV is returned,
- * vnode is left locked and path remain untouched.
- *
  * If vp is a directory, the call to vn_fullpath_global() always succeeds
  * because it falls back to the ".." lookup if the namecache lookup fails.
  */
 int
 vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path,
     u_int pathlen)
 {
 	struct nameidata nd;
 	struct vnode *vp1;
 	char *rpath, *fbuf;
 	int error;
 
 	ASSERT_VOP_ELOCKED(vp, __func__);
-
-	/* Return ENODEV if sysctl debug.disablefullpath==1 */
-	if (__predict_false(disablefullpath))
-		return (ENODEV);
 
 	/* Construct global filesystem path from vp. */
 	VOP_UNLOCK(vp);
 	error = vn_fullpath_global(td, vp, &rpath, &fbuf);
 
 	if (error != 0) {
 		vrele(vp);
 		return (error);
 	}
 
 	if (strlen(rpath) >= pathlen) {
 		vrele(vp);
 		error = ENAMETOOLONG;
 		goto out;
 	}
 
 	/*
 	 * Re-lookup the vnode by path to detect a possible rename.
 	 * As a side effect, the vnode is relocked.
 	 * If vnode was renamed, return ENOENT.
 	 */
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1,
 	    UIO_SYSSPACE, path, td);
 	error = namei(&nd);
 	if (error != 0) {
 		vrele(vp);
 		goto out;
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp1 = nd.ni_vp;
 	vrele(vp);
 	if (vp1 == vp)
 		strcpy(path, rpath);
 	else {
 		vput(vp1);
 		error = ENOENT;
 	}
 
 out:
 	free(fbuf, M_TEMP);
 	return (error);
 }
 
 #ifdef DDB
 static void
 db_print_vpath(struct vnode *vp)
 {
 
 	while (vp != NULL) {
 		db_printf("%p: ", vp);
 		if (vp == rootvnode) {
 			db_printf("/");
 			vp = NULL;
 		} else {
 			if (vp->v_vflag & VV_ROOT) {
 				db_printf("<mount point>");
 				vp = vp->v_mount->mnt_vnodecovered;
 			} else {
 				struct namecache *ncp;
 				char *ncn;
 				int i;
 
 				ncp = TAILQ_FIRST(&vp->v_cache_dst);
 				if (ncp != NULL) {
 					ncn = ncp->nc_name;
 					for (i = 0; i < ncp->nc_nlen; i++)
 						db_printf("%c", *ncn++);
 					vp = ncp->nc_dvp;
 				} else {
 					vp = NULL;
 				}
 			}
 		}
 		db_printf("\n");
 	}
 
 	return;
 }
 
 DB_SHOW_COMMAND(vpath, db_show_vpath)
 {
 	struct vnode *vp;
 
 	if (!have_addr) {
 		db_printf("usage: show vpath <struct vnode *>\n");
 		return;
 	}
 
 	vp = (struct vnode *)addr;
 	db_print_vpath(vp);
 }
 
 #endif
Index: projects/clang1000-import/sys/kern/vfs_default.c
===================================================================
--- projects/clang1000-import/sys/kern/vfs_default.c	(revision 357389)
+++ projects/clang1000-import/sys/kern/vfs_default.c	(revision 357390)
@@ -1,1460 +1,1460 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed
  * to Berkeley by John Heidemann of the UCLA Ficus project.
  *
  * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/event.h>
 #include <sys/filio.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/lockf.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/rwlock.h>
 #include <sys/fcntl.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/poll.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 
 static int	vop_nolookup(struct vop_lookup_args *);
 static int	vop_norename(struct vop_rename_args *);
 static int	vop_nostrategy(struct vop_strategy_args *);
 static int	get_next_dirent(struct vnode *vp, struct dirent **dpp,
 				char *dirbuf, int dirbuflen, off_t *off,
 				char **cpos, int *len, int *eofflag,
 				struct thread *td);
 static int	dirent_exists(struct vnode *vp, const char *dirname,
 			      struct thread *td);
 
 #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
 
 static int vop_stdis_text(struct vop_is_text_args *ap);
 static int vop_stdunset_text(struct vop_unset_text_args *ap);
 static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
 static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap);
 static int vop_stdfdatasync(struct vop_fdatasync_args *ap);
 static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
 
 /*
  * This vnode table stores what we want to do if the filesystem doesn't
  * implement a particular VOP.
  *
  * If there is no specific entry here, we will return EOPNOTSUPP.
  *
  * Note that every filesystem has to implement either vop_access
  * or vop_accessx; failing to do so will result in immediate crash
  * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(),
  * which calls vop_stdaccess() etc.
  */
 
 struct vop_vector default_vnodeops = {
 	.vop_default =		NULL,
 	.vop_bypass =		VOP_EOPNOTSUPP,
 
 	.vop_access =		vop_stdaccess,
 	.vop_accessx =		vop_stdaccessx,
 	.vop_advise =		vop_stdadvise,
 	.vop_advlock =		vop_stdadvlock,
 	.vop_advlockasync =	vop_stdadvlockasync,
 	.vop_advlockpurge =	vop_stdadvlockpurge,
 	.vop_allocate =		vop_stdallocate,
 	.vop_bmap =		vop_stdbmap,
 	.vop_close =		VOP_NULL,
 	.vop_fsync =		VOP_NULL,
 	.vop_fdatasync =	vop_stdfdatasync,
 	.vop_getpages =		vop_stdgetpages,
 	.vop_getpages_async =	vop_stdgetpages_async,
 	.vop_getwritemount = 	vop_stdgetwritemount,
 	.vop_inactive =		VOP_NULL,
 	.vop_need_inactive =	vop_stdneed_inactive,
 	.vop_ioctl =		vop_stdioctl,
 	.vop_kqfilter =		vop_stdkqfilter,
 	.vop_islocked =		vop_stdislocked,
 	.vop_lock1 =		vop_stdlock,
 	.vop_lookup =		vop_nolookup,
 	.vop_open =		VOP_NULL,
 	.vop_pathconf =		VOP_EINVAL,
 	.vop_poll =		vop_nopoll,
 	.vop_putpages =		vop_stdputpages,
 	.vop_readlink =		VOP_EINVAL,
 	.vop_rename =		vop_norename,
 	.vop_revoke =		VOP_PANIC,
 	.vop_strategy =		vop_nostrategy,
 	.vop_unlock =		vop_stdunlock,
 	.vop_vptocnp =		vop_stdvptocnp,
 	.vop_vptofh =		vop_stdvptofh,
 	.vop_unp_bind =		vop_stdunp_bind,
 	.vop_unp_connect =	vop_stdunp_connect,
 	.vop_unp_detach =	vop_stdunp_detach,
 	.vop_is_text =		vop_stdis_text,
 	.vop_set_text =		vop_stdset_text,
 	.vop_unset_text =	vop_stdunset_text,
 	.vop_add_writecount =	vop_stdadd_writecount,
 	.vop_copy_file_range =	vop_stdcopy_file_range,
 };
 VFS_VOP_VECTOR_REGISTER(default_vnodeops);
 
 /*
  * Series of placeholder functions for various error returns for
  * VOPs.
  */
 
 int
 vop_eopnotsupp(struct vop_generic_args *ap)
 {
 	/*
 	printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
 	*/
 
 	return (EOPNOTSUPP);
 }
 
 int
 vop_ebadf(struct vop_generic_args *ap)
 {
 
 	return (EBADF);
 }
 
 int
 vop_enotty(struct vop_generic_args *ap)
 {
 
 	return (ENOTTY);
 }
 
 int
 vop_einval(struct vop_generic_args *ap)
 {
 
 	return (EINVAL);
 }
 
 int
 vop_enoent(struct vop_generic_args *ap)
 {
 
 	return (ENOENT);
 }
 
 int
 vop_null(struct vop_generic_args *ap)
 {
 
 	return (0);
 }
 
 /*
  * Helper function to panic on some bad VOPs in some filesystems.
  */
 int
 vop_panic(struct vop_generic_args *ap)
 {
 
 	panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
 }
 
 /*
  * vop_std<something> and vop_no<something> are default functions for use by
  * filesystems that need the "default reasonable" implementation for a
  * particular operation.
  *
  * The documentation for the operations they implement exists (if it exists)
  * in the VOP_<SOMETHING>(9) manpage (all uppercase).
  */
 
 /*
  * Default vop for filesystems that do not support name lookup
  */
 static int
 vop_nolookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 
 	*ap->a_vpp = NULL;
 	return (ENOTDIR);
 }
 
 /*
  * vop_norename:
  *
  * Handle unlock and reference counting for arguments of vop_rename
  * for filesystems that do not implement rename operation.
  */
 static int
 vop_norename(struct vop_rename_args *ap)
 {
 
 	vop_rename_fail(ap);
 	return (EOPNOTSUPP);
 }
 
 /*
  *	vop_nostrategy:
  *
  *	Strategy routine for VFS devices that have none.
  *
  *	BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
  *	routine.  Typically this is done for a BIO_READ strategy call.
  *	Typically B_INVAL is assumed to already be clear prior to a write
  *	and should not be cleared manually unless you just made the buffer
  *	invalid.  BIO_ERROR should be cleared either way.
  */
 
 static int
 vop_nostrategy (struct vop_strategy_args *ap)
 {
 	printf("No strategy for buffer at %p\n", ap->a_bp);
 	vn_printf(ap->a_vp, "vnode ");
 	ap->a_bp->b_ioflags |= BIO_ERROR;
 	ap->a_bp->b_error = EOPNOTSUPP;
 	bufdone(ap->a_bp);
 	return (EOPNOTSUPP);
 }
 
 static int
 get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
 		int dirbuflen, off_t *off, char **cpos, int *len,
 		int *eofflag, struct thread *td)
 {
 	int error, reclen;
 	struct uio uio;
 	struct iovec iov;
 	struct dirent *dp;
 
 	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
 	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
 
 	if (*len == 0) {
 		iov.iov_base = dirbuf;
 		iov.iov_len = dirbuflen;
 
 		uio.uio_iov = &iov;
 		uio.uio_iovcnt = 1;
 		uio.uio_offset = *off;
 		uio.uio_resid = dirbuflen;
 		uio.uio_segflg = UIO_SYSSPACE;
 		uio.uio_rw = UIO_READ;
 		uio.uio_td = td;
 
 		*eofflag = 0;
 
 #ifdef MAC
 		error = mac_vnode_check_readdir(td->td_ucred, vp);
 		if (error == 0)
 #endif
 			error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag,
 		    		NULL, NULL);
 		if (error)
 			return (error);
 
 		*off = uio.uio_offset;
 
 		*cpos = dirbuf;
 		*len = (dirbuflen - uio.uio_resid);
 
 		if (*len == 0)
 			return (ENOENT);
 	}
 
 	dp = (struct dirent *)(*cpos);
 	reclen = dp->d_reclen;
 	*dpp = dp;
 
 	/* check for malformed directory.. */
 	if (reclen < DIRENT_MINSIZE)
 		return (EINVAL);
 
 	*cpos += reclen;
 	*len -= reclen;
 
 	return (0);
 }
 
 /*
  * Check if a named file exists in a given directory vnode.
  */
 static int
 dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
 {
 	char *dirbuf, *cpos;
 	int error, eofflag, dirbuflen, len, found;
 	off_t off;
 	struct dirent *dp;
 	struct vattr va;
 
 	KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
 	KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
 
 	found = 0;
 
 	error = VOP_GETATTR(vp, &va, td->td_ucred);
 	if (error)
 		return (found);
 
 	dirbuflen = DEV_BSIZE;
 	if (dirbuflen < va.va_blocksize)
 		dirbuflen = va.va_blocksize;
 	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
 
 	off = 0;
 	len = 0;
 	do {
 		error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off,
 					&cpos, &len, &eofflag, td);
 		if (error)
 			goto out;
 
 		if (dp->d_type != DT_WHT && dp->d_fileno != 0 &&
 		    strcmp(dp->d_name, dirname) == 0) {
 			found = 1;
 			goto out;
 		}
 	} while (len > 0 || !eofflag);
 
 out:
 	free(dirbuf, M_TEMP);
 	return (found);
 }
 
 int
 vop_stdaccess(struct vop_access_args *ap)
 {
 
 	KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
 	    VAPPEND)) == 0, ("invalid bit in accmode"));
 
 	return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td));
 }
 
 int
 vop_stdaccessx(struct vop_accessx_args *ap)
 {
 	int error;
 	accmode_t accmode = ap->a_accmode;
 
 	error = vfs_unixify_accmode(&accmode);
 	if (error != 0)
 		return (error);
 
 	if (accmode == 0)
 		return (0);
 
 	return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td));
 }
 
 /*
  * Advisory record locking support
  */
 int
 vop_stdadvlock(struct vop_advlock_args *ap)
 {
 	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 
 	vp = ap->a_vp;
 	if (ap->a_fl->l_whence == SEEK_END) {
 		/*
 		 * The NFSv4 server must avoid doing a vn_lock() here, since it
 		 * can deadlock the nfsd threads, due to a LOR.  Fortunately
 		 * the NFSv4 server always uses SEEK_SET and this code is
 		 * only required for the SEEK_END case.
 		 */
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 		error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
 		VOP_UNLOCK(vp);
 		if (error)
 			return (error);
 	} else
 		vattr.va_size = 0;
 
 	return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size));
 }
 
 int
 vop_stdadvlockasync(struct vop_advlockasync_args *ap)
 {
 	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 
 	vp = ap->a_vp;
 	if (ap->a_fl->l_whence == SEEK_END) {
 		/* The size argument is only needed for SEEK_END. */
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 		error = VOP_GETATTR(vp, &vattr, curthread->td_ucred);
 		VOP_UNLOCK(vp);
 		if (error)
 			return (error);
 	} else
 		vattr.va_size = 0;
 
 	return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size));
 }
 
 int
 vop_stdadvlockpurge(struct vop_advlockpurge_args *ap)
 {
 	struct vnode *vp;
 
 	vp = ap->a_vp;
 	lf_purgelocks(vp, &vp->v_lockf);
 	return (0);
 }
 
 /*
  * vop_stdpathconf:
  *
  * Standard implementation of POSIX pathconf, to get information about limits
  * for a filesystem.
  * Override per filesystem for the case where the filesystem has smaller
  * limits.
  */
 int
 vop_stdpathconf(ap)
 	struct vop_pathconf_args /* {
 	struct vnode *a_vp;
 	int a_name;
 	int *a_retval;
 	} */ *ap;
 {
 
 	switch (ap->a_name) {
 		case _PC_ASYNC_IO:
 			*ap->a_retval = _POSIX_ASYNCHRONOUS_IO;
 			return (0);
 		case _PC_PATH_MAX:
 			*ap->a_retval = PATH_MAX;
 			return (0);
 		case _PC_ACL_EXTENDED:
 		case _PC_ACL_NFS4:
 		case _PC_CAP_PRESENT:
 		case _PC_INF_PRESENT:
 		case _PC_MAC_PRESENT:
 			*ap->a_retval = 0;
 			return (0);
 		default:
 			return (EINVAL);
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Standard lock, unlock and islocked functions.
  */
 int
 vop_stdlock(ap)
 	struct vop_lock1_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 		char *file;
 		int line;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct mtx *ilk;
 
 	ilk = VI_MTX(vp);
 	return (lockmgr_lock_fast_path(vp->v_vnlock, ap->a_flags,
 	    &ilk->lock_object, ap->a_file, ap->a_line));
 }
 
 /* See above. */
 int
 vop_stdunlock(ap)
 	struct vop_unlock_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 
 	return (lockmgr_unlock(vp->v_vnlock));
 }
 
 /* See above. */
 int
 vop_stdislocked(ap)
 	struct vop_islocked_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 
 	return (lockstatus(ap->a_vp->v_vnlock));
 }
 
 /*
  * Variants of the above set.
  *
  * Differences are:
  * - shared locking disablement is not supported
  * - v_vnlock pointer is not honored
  */
 int
 vop_lock(ap)
 	struct vop_lock1_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 		char *file;
 		int line;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	int flags = ap->a_flags;
 	struct mtx *ilk;
 
 	MPASS(vp->v_vnlock == &vp->v_lock);
 
 	if (__predict_false((flags & ~(LK_TYPE_MASK | LK_NODDLKTREAT | LK_RETRY)) != 0))
 		goto other;
 
 	switch (flags & LK_TYPE_MASK) {
 	case LK_SHARED:
 		return (lockmgr_slock(&vp->v_lock, flags, ap->a_file, ap->a_line));
 	case LK_EXCLUSIVE:
 		return (lockmgr_xlock(&vp->v_lock, flags, ap->a_file, ap->a_line));
 	}
 other:
 	ilk = VI_MTX(vp);
 	return (lockmgr_lock_fast_path(&vp->v_lock, flags,
 	    &ilk->lock_object, ap->a_file, ap->a_line));
 }
 
 int
 vop_unlock(ap)
 	struct vop_unlock_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 
 	MPASS(vp->v_vnlock == &vp->v_lock);
 
 	return (lockmgr_unlock(&vp->v_lock));
 }
 
 int
 vop_islocked(ap)
 	struct vop_islocked_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 
 	MPASS(vp->v_vnlock == &vp->v_lock);
 
 	return (lockstatus(&vp->v_lock));
 }
 
 /*
  * Return true for select/poll.
  */
 int
 vop_nopoll(ap)
 	struct vop_poll_args /* {
 		struct vnode *a_vp;
 		int  a_events;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 
 	return (poll_no_poll(ap->a_events));
 }
 
 /*
  * Implement poll for local filesystems that support it.
  */
 int
 vop_stdpoll(ap)
 	struct vop_poll_args /* {
 		struct vnode *a_vp;
 		int  a_events;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	if (ap->a_events & ~POLLSTANDARD)
 		return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
 }
 
 /*
  * Return our mount point, as we will take charge of the writes.
  */
 int
 vop_stdgetwritemount(ap)
 	struct vop_getwritemount_args /* {
 		struct vnode *a_vp;
 		struct mount **a_mpp;
 	} */ *ap;
 {
 	struct mount *mp;
 	struct vnode *vp;
 
 	/*
 	 * Note that having a reference does not prevent forced unmount from
 	 * setting ->v_mount to NULL after the lock gets released. This is of
 	 * no consequence for typical consumers (most notably vn_start_write)
 	 * since in this case the vnode is VIRF_DOOMED. Unmount might have
 	 * progressed far enough that its completion is only delayed by the
 	 * reference obtained here. The consumer only needs to concern itself
 	 * with releasing it.
 	 */
 	vp = ap->a_vp;
 	mp = vp->v_mount;
 	if (mp == NULL) {
 		*(ap->a_mpp) = NULL;
 		return (0);
 	}
 	if (vfs_op_thread_enter(mp)) {
 		if (mp == vp->v_mount) {
 			vfs_mp_count_add_pcpu(mp, ref, 1);
 			vfs_op_thread_exit(mp);
 		} else {
 			vfs_op_thread_exit(mp);
 			mp = NULL;
 		}
 	} else {
 		MNT_ILOCK(mp);
 		if (mp == vp->v_mount) {
 			MNT_REF(mp);
 			MNT_IUNLOCK(mp);
 		} else {
 			MNT_IUNLOCK(mp);
 			mp = NULL;
 		}
 	}
 	*(ap->a_mpp) = mp;
 	return (0);
 }
 
 /*
  * If the file system doesn't implement VOP_BMAP, then return sensible defaults:
  * - Return the vnode's bufobj instead of any underlying device's bufobj
  * - Calculate the physical block number as if there were equal size
  *   consecutive blocks, but
  * - Report no contiguous runs of blocks.
  */
 int
 vop_stdbmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct bufobj **a_bop;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 
 	if (ap->a_bop != NULL)
 		*ap->a_bop = &ap->a_vp->v_bufobj;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 	return (0);
 }
 
 int
 vop_stdfsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		int a_waitfor;
 		struct thread *a_td;
 	} */ *ap;
 {
 
 	return (vn_fsync_buf(ap->a_vp, ap->a_waitfor));
 }
 
 static int
 vop_stdfdatasync(struct vop_fdatasync_args *ap)
 {
 
 	return (VOP_FSYNC(ap->a_vp, MNT_WAIT, ap->a_td));
 }
 
 int
 vop_stdfdatasync_buf(struct vop_fdatasync_args *ap)
 {
 
 	return (vn_fsync_buf(ap->a_vp, MNT_WAIT));
 }
 
 /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
 int
 vop_stdgetpages(ap)
 	struct vop_getpages_args /* {
 		struct vnode *a_vp;
 		vm_page_t *a_m;
 		int a_count;
 		int *a_rbehind;
 		int *a_rahead;
 	} */ *ap;
 {
 
 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
 	    ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL);
 }
 
 static int
 vop_stdgetpages_async(struct vop_getpages_async_args *ap)
 {
 	int error;
 
 	error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
 	    ap->a_rahead);
 	ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error);
 	return (error);
 }
 
 int
 vop_stdkqfilter(struct vop_kqfilter_args *ap)
 {
 	return vfs_kqfilter(ap);
 }
 
 /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
 int
 vop_stdputpages(ap)
 	struct vop_putpages_args /* {
 		struct vnode *a_vp;
 		vm_page_t *a_m;
 		int a_count;
 		int a_sync;
 		int *a_rtvals;
 	} */ *ap;
 {
 
 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
 	     ap->a_sync, ap->a_rtvals);
 }
 
 int
 vop_stdvptofh(struct vop_vptofh_args *ap)
 {
 	return (EOPNOTSUPP);
 }
 
 int
 vop_stdvptocnp(struct vop_vptocnp_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode **dvp = ap->a_vpp;
 	struct ucred *cred = ap->a_cred;
 	char *buf = ap->a_buf;
-	int *buflen = ap->a_buflen;
+	size_t *buflen = ap->a_buflen;
 	char *dirbuf, *cpos;
 	int i, error, eofflag, dirbuflen, flags, locked, len, covered;
 	off_t off;
 	ino_t fileno;
 	struct vattr va;
 	struct nameidata nd;
 	struct thread *td;
 	struct dirent *dp;
 	struct vnode *mvp;
 
 	i = *buflen;
 	error = 0;
 	covered = 0;
 	td = curthread;
 
 	if (vp->v_type != VDIR)
 		return (ENOENT);
 
 	error = VOP_GETATTR(vp, &va, cred);
 	if (error)
 		return (error);
 
 	VREF(vp);
 	locked = VOP_ISLOCKED(vp);
 	VOP_UNLOCK(vp);
 	NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
 	    "..", vp, td);
 	flags = FREAD;
 	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL);
 	if (error) {
 		vn_lock(vp, locked | LK_RETRY);
 		return (error);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	mvp = *dvp = nd.ni_vp;
 
 	if (vp->v_mount != (*dvp)->v_mount &&
 	    ((*dvp)->v_vflag & VV_ROOT) &&
 	    ((*dvp)->v_mount->mnt_flag & MNT_UNION)) {
 		*dvp = (*dvp)->v_mount->mnt_vnodecovered;
 		VREF(mvp);
 		VOP_UNLOCK(mvp);
 		vn_close(mvp, FREAD, cred, td);
 		VREF(*dvp);
 		vn_lock(*dvp, LK_SHARED | LK_RETRY);
 		covered = 1;
 	}
 
 	fileno = va.va_fileid;
 
 	dirbuflen = DEV_BSIZE;
 	if (dirbuflen < va.va_blocksize)
 		dirbuflen = va.va_blocksize;
 	dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
 
 	if ((*dvp)->v_type != VDIR) {
 		error = ENOENT;
 		goto out;
 	}
 
 	off = 0;
 	len = 0;
 	do {
 		/* call VOP_READDIR of parent */
 		error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off,
 					&cpos, &len, &eofflag, td);
 		if (error)
 			goto out;
 
 		if ((dp->d_type != DT_WHT) &&
 		    (dp->d_fileno == fileno)) {
 			if (covered) {
 				VOP_UNLOCK(*dvp);
 				vn_lock(mvp, LK_SHARED | LK_RETRY);
 				if (dirent_exists(mvp, dp->d_name, td)) {
 					error = ENOENT;
 					VOP_UNLOCK(mvp);
 					vn_lock(*dvp, LK_SHARED | LK_RETRY);
 					goto out;
 				}
 				VOP_UNLOCK(mvp);
 				vn_lock(*dvp, LK_SHARED | LK_RETRY);
 			}
 			i -= dp->d_namlen;
 
 			if (i < 0) {
 				error = ENOMEM;
 				goto out;
 			}
 			if (dp->d_namlen == 1 && dp->d_name[0] == '.') {
 				error = ENOENT;
 			} else {
 				bcopy(dp->d_name, buf + i, dp->d_namlen);
 				error = 0;
 			}
 			goto out;
 		}
 	} while (len > 0 || !eofflag);
 	error = ENOENT;
 
 out:
 	free(dirbuf, M_TEMP);
 	if (!error) {
 		*buflen = i;
 		vref(*dvp);
 	}
 	if (covered) {
 		vput(*dvp);
 		vrele(mvp);
 	} else {
 		VOP_UNLOCK(mvp);
 		vn_close(mvp, FREAD, cred, td);
 	}
 	vn_lock(vp, locked | LK_RETRY);
 	return (error);
 }
 
 int
 vop_stdallocate(struct vop_allocate_args *ap)
 {
 #ifdef __notyet__
 	struct statfs *sfs;
 	off_t maxfilesize = 0;
 #endif
 	struct iovec aiov;
 	struct vattr vattr, *vap;
 	struct uio auio;
 	off_t fsize, len, cur, offset;
 	uint8_t *buf;
 	struct thread *td;
 	struct vnode *vp;
 	size_t iosize;
 	int error;
 
 	buf = NULL;
 	error = 0;
 	td = curthread;
 	vap = &vattr;
 	vp = ap->a_vp;
 	len = *ap->a_len;
 	offset = *ap->a_offset;
 
 	error = VOP_GETATTR(vp, vap, td->td_ucred);
 	if (error != 0)
 		goto out;
 	fsize = vap->va_size;
 	iosize = vap->va_blocksize;
 	if (iosize == 0)
 		iosize = BLKDEV_IOSIZE;
 	if (iosize > MAXPHYS)
 		iosize = MAXPHYS;
 	buf = malloc(iosize, M_TEMP, M_WAITOK);
 
 #ifdef __notyet__
 	/*
 	 * Check if the filesystem sets f_maxfilesize; if not use
 	 * VOP_SETATTR to perform the check.
 	 */
 	sfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = VFS_STATFS(vp->v_mount, sfs, td);
 	if (error == 0)
 		maxfilesize = sfs->f_maxfilesize;
 	free(sfs, M_STATFS);
 	if (error != 0)
 		goto out;
 	if (maxfilesize) {
 		if (offset > maxfilesize || len > maxfilesize ||
 		    offset + len > maxfilesize) {
 			error = EFBIG;
 			goto out;
 		}
 	} else
 #endif
 	if (offset + len > vap->va_size) {
 		/*
 		 * Test offset + len against the filesystem's maxfilesize.
 		 */
 		VATTR_NULL(vap);
 		vap->va_size = offset + len;
 		error = VOP_SETATTR(vp, vap, td->td_ucred);
 		if (error != 0)
 			goto out;
 		VATTR_NULL(vap);
 		vap->va_size = fsize;
 		error = VOP_SETATTR(vp, vap, td->td_ucred);
 		if (error != 0)
 			goto out;
 	}
 
 	for (;;) {
 		/*
 		 * Read and write back anything below the nominal file
 		 * size.  There's currently no way outside the filesystem
 		 * to know whether this area is sparse or not.
 		 */
 		cur = iosize;
 		if ((offset % iosize) != 0)
 			cur -= (offset % iosize);
 		if (cur > len)
 			cur = len;
 		if (offset < fsize) {
 			aiov.iov_base = buf;
 			aiov.iov_len = cur;
 			auio.uio_iov = &aiov;
 			auio.uio_iovcnt = 1;
 			auio.uio_offset = offset;
 			auio.uio_resid = cur;
 			auio.uio_segflg = UIO_SYSSPACE;
 			auio.uio_rw = UIO_READ;
 			auio.uio_td = td;
 			error = VOP_READ(vp, &auio, 0, td->td_ucred);
 			if (error != 0)
 				break;
 			if (auio.uio_resid > 0) {
 				bzero(buf + cur - auio.uio_resid,
 				    auio.uio_resid);
 			}
 		} else {
 			bzero(buf, cur);
 		}
 
 		aiov.iov_base = buf;
 		aiov.iov_len = cur;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = offset;
 		auio.uio_resid = cur;
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_rw = UIO_WRITE;
 		auio.uio_td = td;
 
 		error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
 		if (error != 0)
 			break;
 
 		len -= cur;
 		offset += cur;
 		if (len == 0)
 			break;
 		if (should_yield())
 			break;
 	}
 
  out:
 	*ap->a_len = len;
 	*ap->a_offset = offset;
 	free(buf, M_TEMP);
 	return (error);
 }
 
 int
 vop_stdadvise(struct vop_advise_args *ap)
 {
 	struct vnode *vp;
 	struct bufobj *bo;
 	daddr_t startn, endn;
 	off_t bstart, bend, start, end;
 	int bsize, error;
 
 	vp = ap->a_vp;
 	switch (ap->a_advice) {
 	case POSIX_FADV_WILLNEED:
 		/*
 		 * Do nothing for now.  Filesystems should provide a
 		 * custom method which starts an asynchronous read of
 		 * the requested region.
 		 */
 		error = 0;
 		break;
 	case POSIX_FADV_DONTNEED:
 		error = 0;
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		if (VN_IS_DOOMED(vp)) {
 			VOP_UNLOCK(vp);
 			break;
 		}
 
 		/*
 		 * Round to block boundaries (and later possibly further to
 		 * page boundaries).  Applications cannot reasonably be aware  
 		 * of the boundaries, and the rounding must be to expand at
 		 * both extremities to cover enough.  It still doesn't cover
 		 * read-ahead.  For partial blocks, this gives unnecessary
 		 * discarding of buffers but is efficient enough since the
 		 * pages usually remain in VMIO for some time.
 		 */
 		bsize = vp->v_bufobj.bo_bsize;
 		bstart = rounddown(ap->a_start, bsize);
 		bend = roundup(ap->a_end, bsize);
 
 		/*
 		 * Deactivate pages in the specified range from the backing VM
 		 * object.  Pages that are resident in the buffer cache will
 		 * remain wired until their corresponding buffers are released
 		 * below.
 		 */
 		if (vp->v_object != NULL) {
 			start = trunc_page(bstart);
 			end = round_page(bend);
 			VM_OBJECT_RLOCK(vp->v_object);
 			vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start),
 			    OFF_TO_IDX(end));
 			VM_OBJECT_RUNLOCK(vp->v_object);
 		}
 
 		bo = &vp->v_bufobj;
 		BO_RLOCK(bo);
 		startn = bstart / bsize;
 		endn = bend / bsize;
 		error = bnoreuselist(&bo->bo_clean, bo, startn, endn);
 		if (error == 0)
 			error = bnoreuselist(&bo->bo_dirty, bo, startn, endn);
 		BO_RUNLOCK(bo);
 		VOP_UNLOCK(vp);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 int
 vop_stdunp_bind(struct vop_unp_bind_args *ap)
 {
 
 	ap->a_vp->v_unpcb = ap->a_unpcb;
 	return (0);
 }
 
 int
 vop_stdunp_connect(struct vop_unp_connect_args *ap)
 {
 
 	*ap->a_unpcb = ap->a_vp->v_unpcb;
 	return (0);
 }
 
 int
 vop_stdunp_detach(struct vop_unp_detach_args *ap)
 {
 
 	ap->a_vp->v_unpcb = NULL;
 	return (0);
 }
 
 static int
 vop_stdis_text(struct vop_is_text_args *ap)
 {
 
 	return (ap->a_vp->v_writecount < 0);
 }
 
 int
 vop_stdset_text(struct vop_set_text_args *ap)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	int error;
 
 	vp = ap->a_vp;
 	VI_LOCK(vp);
 	if (vp->v_writecount > 0) {
 		error = ETXTBSY;
 	} else {
 		/*
 		 * If requested by fs, keep a use reference to the
 		 * vnode until the last text reference is released.
 		 */
 		mp = vp->v_mount;
 		if (mp != NULL && (mp->mnt_kern_flag & MNTK_TEXT_REFS) != 0 &&
 		    vp->v_writecount == 0) {
 			vp->v_iflag |= VI_TEXT_REF;
 			vrefl(vp);
 		}
 
 		vp->v_writecount--;
 		error = 0;
 	}
 	VI_UNLOCK(vp);
 	return (error);
 }
 
 static int
 vop_stdunset_text(struct vop_unset_text_args *ap)
 {
 	struct vnode *vp;
 	int error;
 	bool last;
 
 	vp = ap->a_vp;
 	last = false;
 	VI_LOCK(vp);
 	if (vp->v_writecount < 0) {
 		if ((vp->v_iflag & VI_TEXT_REF) != 0 &&
 		    vp->v_writecount == -1) {
 			last = true;
 			vp->v_iflag &= ~VI_TEXT_REF;
 		}
 		vp->v_writecount++;
 		error = 0;
 	} else {
 		error = EINVAL;
 	}
 	VI_UNLOCK(vp);
 	if (last)
 		vunref(vp);
 	return (error);
 }
 
 static int
 vop_stdadd_writecount(struct vop_add_writecount_args *ap)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	int error;
 
 	vp = ap->a_vp;
 	VI_LOCK_FLAGS(vp, MTX_DUPOK);
 	if (vp->v_writecount < 0) {
 		error = ETXTBSY;
 	} else {
 		VNASSERT(vp->v_writecount + ap->a_inc >= 0, vp,
 		    ("neg writecount increment %d", ap->a_inc));
 		if (vp->v_writecount == 0) {
 			mp = vp->v_mount;
 			if (mp != NULL && (mp->mnt_kern_flag & MNTK_NOMSYNC) == 0)
 				vlazy(vp);
 		}
 		vp->v_writecount += ap->a_inc;
 		error = 0;
 	}
 	VI_UNLOCK(vp);
 	return (error);
 }
 
 int
 vop_stdneed_inactive(struct vop_need_inactive_args *ap)
 {
 
 	return (1);
 }
 
 int
 vop_stdioctl(struct vop_ioctl_args *ap)
 {
 	struct vnode *vp;
 	struct vattr va;
 	off_t *offp;
 	int error;
 
 	switch (ap->a_command) {
 	case FIOSEEKDATA:
 	case FIOSEEKHOLE:
 		vp = ap->a_vp;
 		error = vn_lock(vp, LK_SHARED);
 		if (error != 0)
 			return (EBADF);
 		if (vp->v_type == VREG)
 			error = VOP_GETATTR(vp, &va, ap->a_cred);
 		else
 			error = ENOTTY;
 		if (error == 0) {
 			offp = ap->a_data;
 			if (*offp < 0 || *offp >= va.va_size)
 				error = ENXIO;
 			else if (ap->a_command == FIOSEEKHOLE)
 				*offp = va.va_size;
 		}
 		VOP_UNLOCK(vp);
 		break;
 	default:
 		error = ENOTTY;
 		break;
 	}
 	return (error);
 }
 
 /*
  * vfs default ops
  * used to fill the vfs function table to get reasonable default return values.
  */
 int
 vfs_stdroot (mp, flags, vpp)
 	struct mount *mp;
 	int flags;
 	struct vnode **vpp;
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdstatfs (mp, sbp)
 	struct mount *mp;
 	struct statfs *sbp;
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdquotactl (mp, cmds, uid, arg)
 	struct mount *mp;
 	int cmds;
 	uid_t uid;
 	void *arg;
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdsync(mp, waitfor)
 	struct mount *mp;
 	int waitfor;
 {
 	struct vnode *vp, *mvp;
 	struct thread *td;
 	int error, lockreq, allerror = 0;
 
 	td = curthread;
 	lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
 	if (waitfor != MNT_WAIT)
 		lockreq |= LK_NOWAIT;
 	/*
 	 * Force stale buffer cache information to be flushed.
 	 */
 loop:
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		if (vp->v_bufobj.bo_dirty.bv_cnt == 0) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		if ((error = vget(vp, lockreq, td)) != 0) {
 			if (error == ENOENT) {
 				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 				goto loop;
 			}
 			continue;
 		}
 		error = VOP_FSYNC(vp, waitfor, td);
 		if (error)
 			allerror = error;
 		vput(vp);
 	}
 	return (allerror);
 }
 
 int
 vfs_stdnosync (mp, waitfor)
 	struct mount *mp;
 	int waitfor;
 {
 
 	return (0);
 }
 
 static int
 vop_stdcopy_file_range(struct vop_copy_file_range_args *ap)
 {
 	int error;
 
 	error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp,
 	    ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, ap->a_incred,
 	    ap->a_outcred, ap->a_fsizetd);
 	return (error);
 }
 
 int
 vfs_stdvget (mp, ino, flags, vpp)
 	struct mount *mp;
 	ino_t ino;
 	int flags;
 	struct vnode **vpp;
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdfhtovp (mp, fhp, flags, vpp)
 	struct mount *mp;
 	struct fid *fhp;
 	int flags;
 	struct vnode **vpp;
 {
 
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdinit (vfsp)
 	struct vfsconf *vfsp;
 {
 
 	return (0);
 }
 
 int
 vfs_stduninit (vfsp)
 	struct vfsconf *vfsp;
 {
 
 	return(0);
 }
 
 int
 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)
 	struct mount *mp;
 	int cmd;
 	struct vnode *filename_vp;
 	int attrnamespace;
 	const char *attrname;
 {
 
 	if (filename_vp != NULL)
 		VOP_UNLOCK(filename_vp);
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdsysctl(mp, op, req)
 	struct mount *mp;
 	fsctlop_t op;
 	struct sysctl_req *req;
 {
 
 	return (EOPNOTSUPP);
 }
 
 static vop_bypass_t *
 bp_by_off(struct vop_vector *vop, struct vop_generic_args *a)
 {
 
 	return (*(vop_bypass_t **)((char *)vop + a->a_desc->vdesc_vop_offset));
 }
 
 int
 vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a)
 {
 	vop_bypass_t *bp;
 	int prev_stops, rc;
 
 	bp = bp_by_off(vop, a);
 	MPASS(bp != NULL);
 
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	rc = bp(a);
 	sigallowstop(prev_stops);
 	return (rc);
 }
Index: projects/clang1000-import/sys/kern/vnode_if.src
===================================================================
--- projects/clang1000-import/sys/kern/vnode_if.src	(revision 357389)
+++ projects/clang1000-import/sys/kern/vnode_if.src	(revision 357390)
@@ -1,765 +1,765 @@
 #-
 # Copyright (c) 1992, 1993
 #	The Regents of the University of California.  All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 # 3. Neither the name of the University nor the names of its contributors
 #    may be used to endorse or promote products derived from this software
 #    without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 #	@(#)vnode_if.src	8.12 (Berkeley) 5/14/95
 # $FreeBSD$
 #
 
 #
 # Above each of the vop descriptors in lines starting with %%
 # is a specification of the locking protocol used by each vop call.
 # The first column is the name of the variable, the remaining three
 # columns are in, out and error respectively.  The "in" column defines
 # the lock state on input, the "out" column defines the state on successful
 # return, and the "error" column defines the locking state on error exit.
 #
 # The locking value can take the following values:
 # L: locked; not converted to type of lock.
 # E: locked with exclusive lock for this process.
 # U: unlocked.
 # -: not applicable.  vnode does not yet (or no longer) exists.
 # =: the same on input and output, may be either L or U.
 #
 # The paramater named "vpp" is assumed to be always used with double
 # indirection (**vpp) and that name is hard-coded in vnode_if.awk !
 #
 # Lines starting with %! specify a pre or post-condition function
 # to call before/after the vop call.
 #
 # If other such parameters are introduced, they have to be added to
 # the AWK script at the head of the definition of "add_debug_code()".
 #
 
 vop_islocked {
 	IN struct vnode *vp;
 };
 
 
 %% lookup	dvp	L L L
 %% lookup	vpp	- L -
 
 # XXX - the lookup locking protocol defies simple description and depends
 #	on the flags and operation fields in the (cnp) structure.  Note
 #	especially that *vpp may equal dvp and both may be locked.
 
 vop_lookup {
 	IN struct vnode *dvp;
 	INOUT struct vnode **vpp;
 	IN struct componentname *cnp;
 };
 
 
 %% cachedlookup	dvp	L L L
 %% cachedlookup	vpp	- L -
 
 # This must be an exact copy of lookup.  See kern/vfs_cache.c for details.
 
 vop_cachedlookup {
 	IN struct vnode *dvp;
 	INOUT struct vnode **vpp;
 	IN struct componentname *cnp;
 };
 
 
 %% create	dvp	E E E
 %% create	vpp	- L -
 %! create	post	vop_create_post
 
 vop_create {
 	IN struct vnode *dvp;
 	OUT struct vnode **vpp;
 	IN struct componentname *cnp;
 	IN struct vattr *vap;
 };
 
 
 %% whiteout	dvp	E E E
 
 vop_whiteout {
 	IN struct vnode *dvp;
 	IN struct componentname *cnp;
 	IN int flags;
 };
 
 
 %% mknod	dvp	E E E
 %% mknod	vpp	- L -
 %! mknod	post	vop_mknod_post
 
 vop_mknod {
 	IN struct vnode *dvp;
 	OUT struct vnode **vpp;
 	IN struct componentname *cnp;
 	IN struct vattr *vap;
 };
 
 
 %% open		vp	L L L
 %! open		post	vop_open_post
 
 vop_open {
 	IN struct vnode *vp;
 	IN int mode;
 	IN struct ucred *cred;
 	IN struct thread *td;
 	IN struct file *fp;
 };
 
 
 %% close	vp	L L L
 %! close	post	vop_close_post
 
 vop_close {
 	IN struct vnode *vp;
 	IN int fflag;
 	IN struct ucred *cred;
 	IN struct thread *td;
 };
 
 
 %% access	vp	L L L
 
 vop_access {
 	IN struct vnode *vp;
 	IN accmode_t accmode;
 	IN struct ucred *cred;
 	IN struct thread *td;
 };
 
 
 %% accessx	vp	L L L
 
 vop_accessx {
 	IN struct vnode *vp;
 	IN accmode_t accmode;
 	IN struct ucred *cred;
 	IN struct thread *td;
 };
 
 
 %% getattr	vp	L L L
 
 vop_getattr {
 	IN struct vnode *vp;
 	OUT struct vattr *vap;
 	IN struct ucred *cred;
 };
 
 
 %% setattr	vp	E E E
 %! setattr	post	vop_setattr_post
 
 vop_setattr {
 	IN struct vnode *vp;
 	IN struct vattr *vap;
 	IN struct ucred *cred;
 };
 
 
 %% mmapped	vp	L L L
 
 vop_mmapped {
 	IN struct vnode *vp;
 };
 
 
 %% read		vp	L L L
 %! read		post	vop_read_post
 
 vop_read {
 	IN struct vnode *vp;
 	INOUT struct uio *uio;
 	IN int ioflag;
 	IN struct ucred *cred;
 };
 
 
 %% write	vp	L L L
 %! write	pre	VOP_WRITE_PRE
 %! write	post	VOP_WRITE_POST
 
 vop_write {
 	IN struct vnode *vp;
 	INOUT struct uio *uio;
 	IN int ioflag;
 	IN struct ucred *cred;
 };
 
 
 %% ioctl	vp	U U U
 
 vop_ioctl {
 	IN struct vnode *vp;
 	IN u_long command;
 	IN void *data;
 	IN int fflag;
 	IN struct ucred *cred;
 	IN struct thread *td;
 };
 
 
 %% poll		vp	U U U
 
 vop_poll {
 	IN struct vnode *vp;
 	IN int events;
 	IN struct ucred *cred;
 	IN struct thread *td;
 };
 
 
 %% kqfilter	vp	U U U
 
 vop_kqfilter {
 	IN struct vnode *vp;
 	IN struct knote *kn;
 };
 
 
 %% revoke	vp	L L L
 
 vop_revoke {
 	IN struct vnode *vp;
 	IN int flags;
 };
 
 
 %% fsync	vp	L L L
 
 vop_fsync {
 	IN struct vnode *vp;
 	IN int waitfor;
 	IN struct thread *td;
 };
 
 
 %% remove	dvp	E E E
 %% remove	vp	E E E
 %! remove	post	vop_remove_post
 
 vop_remove {
 	IN struct vnode *dvp;
 	IN struct vnode *vp;
 	IN struct componentname *cnp;
 };
 
 
 %% link		tdvp	E E E
 %% link		vp	E E E
 %! link		post	vop_link_post
 
 vop_link {
 	IN struct vnode *tdvp;
 	IN struct vnode *vp;
 	IN struct componentname *cnp;
 };
 
 
 %! rename	pre	vop_rename_pre
 %! rename	post	vop_rename_post
 
 vop_rename {
 	IN WILLRELE struct vnode *fdvp;
 	IN WILLRELE struct vnode *fvp;
 	IN struct componentname *fcnp;
 	IN WILLRELE struct vnode *tdvp;
 	IN WILLRELE struct vnode *tvp;
 	IN struct componentname *tcnp;
 };
 
 
 %% mkdir	dvp	E E E
 %% mkdir	vpp	- E -
 %! mkdir	post	vop_mkdir_post
 
 vop_mkdir {
 	IN struct vnode *dvp;
 	OUT struct vnode **vpp;
 	IN struct componentname *cnp;
 	IN struct vattr *vap;
 };
 
 
 %% rmdir	dvp	E E E
 %% rmdir	vp	E E E
 %! rmdir	post	vop_rmdir_post
 
 vop_rmdir {
 	IN struct vnode *dvp;
 	IN struct vnode *vp;
 	IN struct componentname *cnp;
 };
 
 
 %% symlink	dvp	E E E
 %% symlink	vpp	- E -
 %! symlink	post	vop_symlink_post
 
 vop_symlink {
 	IN struct vnode *dvp;
 	OUT struct vnode **vpp;
 	IN struct componentname *cnp;
 	IN struct vattr *vap;
 	IN const char *target;
 };
 
 
 %% readdir	vp	L L L
 %! readdir	post	vop_readdir_post
 
 vop_readdir {
 	IN struct vnode *vp;
 	INOUT struct uio *uio;
 	IN struct ucred *cred;
 	INOUT int *eofflag;
 	OUT int *ncookies;
 	INOUT u_long **cookies;
 };
 
 
 %% readlink	vp	L L L
 
 vop_readlink {
 	IN struct vnode *vp;
 	INOUT struct uio *uio;
 	IN struct ucred *cred;
 };
 
 
 %% inactive	vp	E E E
 
 vop_inactive {
 	IN struct vnode *vp;
 	IN struct thread *td;
 };
 
 %! need_inactive	pre	vop_need_inactive_pre
 %! need_inactive	post	vop_need_inactive_post
 
 vop_need_inactive {
         IN struct vnode *vp;
 };
 
 %% reclaim	vp	E E E
 %! reclaim	post	vop_reclaim_post
 
 vop_reclaim {
 	IN struct vnode *vp;
 	IN struct thread *td;
 };
 
 
 %! lock1	pre	vop_lock_pre
 %! lock1	post	vop_lock_post
 
 vop_lock1 {
 	IN struct vnode *vp;
 	IN int flags;
 	IN char *file;
 	IN int line;
 };
 
 
 %! unlock	pre	vop_unlock_pre
 %! unlock	post	vop_unlock_post
 
 vop_unlock {
 	IN struct vnode *vp;
 };
 
 
 %% bmap		vp	L L L
 
 vop_bmap {
 	IN struct vnode *vp;
 	IN daddr_t bn;
 	OUT struct bufobj **bop;
 	IN daddr_t *bnp;
 	OUT int *runp;
 	OUT int *runb;
 };
 
 
 %% strategy	vp	L L L
 %! strategy	pre	vop_strategy_pre
 
 vop_strategy {
 	IN struct vnode *vp;
 	IN struct buf *bp;
 };
 
 
 %% getwritemount vp	= = =
 
 vop_getwritemount {
 	IN struct vnode *vp;
 	OUT struct mount **mpp;
 };
 
 
 %% print	vp	- - -
 
 vop_print {
 	IN struct vnode *vp;
 };
 
 
 %% pathconf	vp	L L L
 
 vop_pathconf {
 	IN struct vnode *vp;
 	IN int name;
 	OUT long *retval;
 };
 
 
 %% advlock	vp	U U U
 
 vop_advlock {
 	IN struct vnode *vp;
 	IN void *id;
 	IN int op;
 	IN struct flock *fl;
 	IN int flags;
 };
 
 
 %% advlockasync	vp	U U U
 
 vop_advlockasync {
 	IN struct vnode *vp;
 	IN void *id;
 	IN int op;
 	IN struct flock *fl;
 	IN int flags;
 	IN struct task *task;	
 	INOUT void **cookiep;
 };
 
 
 %% advlockpurge	vp	E E E
 
 vop_advlockpurge {
 	IN struct vnode *vp;
 };
 
 
 %% reallocblks	vp	E E E
 
 vop_reallocblks {
 	IN struct vnode *vp;
 	IN struct cluster_save *buflist;
 };
 
 
 %% getpages	vp	L L L
 
 vop_getpages {
 	IN struct vnode *vp;
 	IN vm_page_t *m;
 	IN int count;
 	IN int *rbehind;
 	IN int *rahead;
 };
 
 
 %% getpages_async	vp	L L L
 
 vop_getpages_async {
 	IN struct vnode *vp;
 	IN vm_page_t *m;
 	IN int count;
 	IN int *rbehind;
 	IN int *rahead;
 	IN vop_getpages_iodone_t *iodone;
 	IN void *arg;
 };
 
 
 %% putpages	vp	L L L
 
 vop_putpages {
 	IN struct vnode *vp;
 	IN vm_page_t *m;
 	IN int count;
 	IN int sync;
 	IN int *rtvals;
 };
 
 
 %% getacl	vp	L L L
 
 vop_getacl {
 	IN struct vnode *vp;
 	IN acl_type_t type;
 	OUT struct acl *aclp;
 	IN struct ucred *cred;
 	IN struct thread *td;
 };
 
 
 %% setacl	vp	E E E
 
 vop_setacl {
 	IN struct vnode *vp;
 	IN acl_type_t type;
 	IN struct acl *aclp;
 	IN struct ucred *cred;
 	IN struct thread *td;
 };
 
 
 %% aclcheck	vp	= = =
 
 vop_aclcheck {
 	IN struct vnode *vp;
 	IN acl_type_t type;
 	IN struct acl *aclp;
 	IN struct ucred *cred;
 	IN struct thread *td;
 };
 
 
 %% closeextattr	vp	L L L
 
 vop_closeextattr {
 	IN struct vnode *vp;
 	IN int commit;
 	IN struct ucred *cred;
 	IN struct thread *td;
 };
 
 
 %% getextattr	vp	L L L
 
 vop_getextattr {
 	IN struct vnode *vp;
 	IN int attrnamespace;
 	IN const char *name;
 	INOUT struct uio *uio;
 	OUT size_t *size;
 	IN struct ucred *cred;
 	IN struct thread *td;
 };
 
 
 %% listextattr	vp	L L L
 
 vop_listextattr {
 	IN struct vnode *vp;
 	IN int attrnamespace;
 	INOUT struct uio *uio;
 	OUT size_t *size;
 	IN struct ucred *cred;
 	IN struct thread *td;
 };
 
 
 %% openextattr	vp	L L L
 
 vop_openextattr {
 	IN struct vnode *vp;
 	IN struct ucred *cred;
 	IN struct thread *td;
 };
 
 
 %% deleteextattr	vp	E E E
 %! deleteextattr	post	vop_deleteextattr_post
 
 vop_deleteextattr {
 	IN struct vnode *vp;
 	IN int attrnamespace;
 	IN const char *name;
 	IN struct ucred *cred;
 	IN struct thread *td;
 };
 
 
 %% setextattr	vp	E E E
 %! setextattr	post	vop_setextattr_post
 
 vop_setextattr {
 	IN struct vnode *vp;
 	IN int attrnamespace;
 	IN const char *name;
 	INOUT struct uio *uio;
 	IN struct ucred *cred;
 	IN struct thread *td;
 };
 
 
 %% setlabel	vp	E E E
 
 vop_setlabel {
 	IN struct vnode *vp;
 	IN struct label *label;
 	IN struct ucred *cred;
 	IN struct thread *td;
 };
 
 
 %% vptofh	vp	= = =
 
 vop_vptofh {
 	IN struct vnode *vp;
 	IN struct fid *fhp;
 };
 
 
 %% vptocnp		vp	L L L
 %% vptocnp		vpp	- U -
 
 vop_vptocnp {
 	IN struct vnode *vp;
 	OUT struct vnode **vpp;
 	IN struct ucred *cred;
 	INOUT char *buf;
-	INOUT int *buflen;
+	INOUT size_t *buflen;
 };
 
 
 %% allocate	vp	E E E
 
 vop_allocate {
 	IN struct vnode *vp;
 	INOUT off_t *offset;
 	INOUT off_t *len;
 };
 
 
 %% advise	vp	U U U
 
 vop_advise {
 	IN struct vnode *vp;
 	IN off_t start;
 	IN off_t end;
 	IN int advice;
 };
 
 
 %% unp_bind	vp	E E E
 
 vop_unp_bind {
 	IN struct vnode *vp;
 	IN struct unpcb *unpcb;
 };
 
 
 %% unp_connect	vp	L L L
 
 vop_unp_connect {
 	IN struct vnode *vp;
 	OUT struct unpcb **unpcb;
 };
 
 
 %% unp_detach	vp	= = =
 
 vop_unp_detach {
 	IN struct vnode *vp;
 };
 
 
 %% is_text	vp	L L L
 
 vop_is_text {
 	IN struct vnode *vp;
 };
 
 
 %% set_text	vp	= = =
 
 vop_set_text {
 	IN struct vnode *vp;
 };
 
 
 %% vop_unset_text	vp	L L L
 
 vop_unset_text {
 	IN struct vnode *vp;
 };
 
 
 %% add_writecount	vp	L L L
 
 vop_add_writecount {
 	IN struct vnode *vp;
 	IN int inc;
 };
 
 
 %% fdatasync	vp	L L L
 
 vop_fdatasync {
 	IN struct vnode *vp;
 	IN struct thread *td;
 };
 
 
 %% copy_file_range	invp	U U U
 %% copy_file_range	outvp	U U U
 
 vop_copy_file_range {
 	IN struct vnode *invp;
 	INOUT off_t *inoffp;
 	IN struct vnode *outvp;
 	INOUT off_t *outoffp;
 	INOUT size_t *lenp;
 	IN unsigned int flags;
 	IN struct ucred *incred;
 	IN struct ucred *outcred;
 	IN struct thread *fsizetd;
 };
 
 
 # The VOPs below are spares at the end of the table to allow new VOPs to be
 # added in stable branches without breaking the KBI.  New VOPs in HEAD should
 # be added above these spares.  When merging a new VOP to a stable branch,
 # the new VOP should replace one of the spares.
 
 vop_spare1 {
 	IN struct vnode *vp;
 };
 
 vop_spare2 {
 	IN struct vnode *vp;
 };
 
 vop_spare3 {
 	IN struct vnode *vp;
 };
 
 vop_spare4 {
 	IN struct vnode *vp;
 };
 
 vop_spare5 {
 	IN struct vnode *vp;
 };
Index: projects/clang1000-import/sys/riscv/sifive/fu540_prci.c
===================================================================
--- projects/clang1000-import/sys/riscv/sifive/fu540_prci.c	(revision 357389)
+++ projects/clang1000-import/sys/riscv/sifive/fu540_prci.c	(revision 357390)
@@ -1,268 +1,327 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2019 Axiado Corporation
  * All rights reserved.
  *
  * This software was developed in part by Kristof Provost under contract for
  * Axiado Corporation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/rman.h>
 
 #include <machine/bus.h>
 #include <machine/cpu.h>
 
 #include <dev/extres/clk/clk.h>
+#include <dev/extres/clk/clk_fixed.h>
 
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 #include <dev/ofw/openfirm.h>
 
+#include <gnu/dts/include/dt-bindings/clock/sifive-fu540-prci.h>
+
+static struct ofw_compat_data compat_data[] = {
+	{ "sifive,aloeprci0",		1 },
+	{ "sifive,ux00prci0",		1 },
+	{ "sifive,fu540-c000-prci",	1 },
+	{ NULL,				0 },
+};
+
 static struct resource_spec prci_spec[] = {
 	{ SYS_RES_MEMORY, 0, RF_ACTIVE },
 	RESOURCE_SPEC_END
 };
 
 struct prci_softc {
 	device_t		dev;
 
 	struct mtx		mtx;
 
 	struct clkdom		*clkdom;
 	struct resource		*res;
 	bus_space_tag_t		bst;
 	bus_space_handle_t	bsh;
 };
 
 struct prci_clk_pll_sc {
 	struct prci_softc	*parent_sc;
+	uint32_t		reg;
 };
 
 #define	PRCI_LOCK(sc)			mtx_lock(&(sc)->mtx)
 #define	PRCI_UNLOCK(sc)			mtx_unlock(&(sc)->mtx)
 #define	PRCI_ASSERT_LOCKED(sc)		mtx_assert(&(sc)->mtx, MA_OWNED);
 #define	PRCI_ASSERT_UNLOCKED(sc)	mtx_assert(&(sc)->mtx, MA_NOTOWNED);
 
-#define	PRCI_COREPLL			0x4
-#define		PRCI_COREPLL_DIVR_MASK	0x3f
-#define		PRCI_COREPLL_DIVR_SHIFT	0
-#define		PRCI_COREPLL_DIVF_MASK	0x7fc0
-#define		PRCI_COREPLL_DIVF_SHIFT	6
-#define		PRCI_COREPLL_DIVQ_MASK	0x38000
-#define		PRCI_COREPLL_DIVQ_SHIFT	15
+#define	PRCI_COREPLL_CFG0		0x4
+#define	PRCI_DDRPLL_CFG0		0xC
+#define	PRCI_GEMGXLPLL_CFG0		0x1C
 
+#define	PRCI_PLL_DIVR_MASK		0x3f
+#define	PRCI_PLL_DIVR_SHIFT		0
+#define	PRCI_PLL_DIVF_MASK		0x7fc0
+#define	PRCI_PLL_DIVF_SHIFT		6
+#define	PRCI_PLL_DIVQ_MASK		0x38000
+#define	PRCI_PLL_DIVQ_SHIFT		15
+
 #define	PRCI_READ(_sc, _reg)		\
     bus_space_read_4((_sc)->bst, (_sc)->bsh, (_reg))
 
+struct prci_pll_def {
+	uint32_t	id;
+	const char	*name;
+	uint32_t	reg;
+};
+
+#define PLL(_id, _name, _base)					\
+{								\
+	.id = (_id),						\
+	.name = (_name),					\
+	.reg = (_base),						\
+}
+
+/* PLL Clocks */
+struct prci_pll_def pll_clks[] = {
+	PLL(PRCI_CLK_COREPLL, "coreclk",  PRCI_COREPLL_CFG0),
+	PLL(PRCI_CLK_DDRPLL, "ddrclk",   PRCI_DDRPLL_CFG0),
+	PLL(PRCI_CLK_GEMGXLPLL, "gemgxclk", PRCI_GEMGXLPLL_CFG0),
+};
+
+/* Fixed divisor clock TLCLK. */
+struct clk_fixed_def tlclk_def = {
+	.clkdef.id = PRCI_CLK_TLCLK,
+	.clkdef.name = "prci_tlclk",
+	.clkdef.parent_names = (const char *[]){"coreclk"},
+	.clkdef.parent_cnt = 1,
+	.clkdef.flags = CLK_NODE_STATIC_STRINGS,
+	.mult = 1,
+	.div = 2,
+};
+
 static int
 prci_clk_pll_init(struct clknode *clk, device_t dev)
 {
 
 	clknode_init_parent_idx(clk, 0);
 
 	return (0);
 }
 
 static int
 prci_clk_pll_recalc(struct clknode *clk, uint64_t *freq)
 {
 	struct prci_clk_pll_sc *sc;
 	struct clknode *parent_clk;
 	uint32_t val;
 	uint64_t refclk, divf, divq, divr;
 	int err;
 
 	KASSERT(freq != NULL, ("freq cannot be NULL"));
 
 	sc = clknode_get_softc(clk);
 
 	PRCI_LOCK(sc->parent_sc);
 
 	/* Get refclock frequency. */
 	parent_clk = clknode_get_parent(clk);
 	err = clknode_get_freq(parent_clk, &refclk);
 	if (err) {
 		device_printf(sc->parent_sc->dev,
 		    "Failed to get refclk frequency\n");
 		PRCI_UNLOCK(sc->parent_sc);
 		return (err);
 	}
 
 	/* Calculate the PLL output */
-	val = PRCI_READ(sc->parent_sc, PRCI_COREPLL);
+	val = PRCI_READ(sc->parent_sc, sc->reg);
 
-	divf = (val & PRCI_COREPLL_DIVF_MASK) >> PRCI_COREPLL_DIVF_SHIFT;
-	divq = (val & PRCI_COREPLL_DIVQ_MASK) >> PRCI_COREPLL_DIVQ_SHIFT;
-	divr = (val & PRCI_COREPLL_DIVR_MASK) >> PRCI_COREPLL_DIVR_SHIFT;
+	divf = (val & PRCI_PLL_DIVF_MASK) >> PRCI_PLL_DIVF_SHIFT;
+	divq = (val & PRCI_PLL_DIVQ_MASK) >> PRCI_PLL_DIVQ_SHIFT;
+	divr = (val & PRCI_PLL_DIVR_MASK) >> PRCI_PLL_DIVR_SHIFT;
 
 	*freq = refclk / (divr + 1) * (2 * (divf + 1)) / (1 << divq);
 
 	PRCI_UNLOCK(sc->parent_sc);
 
 	return (0);
 }
 
 static clknode_method_t prci_clk_pll_clknode_methods[] = {
 	CLKNODEMETHOD(clknode_init,		prci_clk_pll_init),
 	CLKNODEMETHOD(clknode_recalc_freq,	prci_clk_pll_recalc),
 	CLKNODEMETHOD_END
 };
 
 DEFINE_CLASS_1(prci_clk_pll_clknode, prci_clk_pll_clknode_class,
     prci_clk_pll_clknode_methods, sizeof(struct prci_clk_pll_sc),
     clknode_class);
 
 static int
 prci_probe(device_t dev)
 {
 
 	if (!ofw_bus_status_okay(dev))
 		return (ENXIO);
 
-	if (!ofw_bus_is_compatible(dev, "sifive,aloeprci0"))
+	if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0)
 		return (ENXIO);
 
 	device_set_desc(dev, "SiFive FU540 Power Reset Clocking Interrupt");
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 static void
-prci_pll_register(struct prci_softc *parent_sc, struct clknode_init_def *clkdef)
+prci_pll_register(struct prci_softc *parent_sc, struct clknode_init_def *clkdef,
+	uint32_t reg)
 {
 	struct clknode *clk;
 	struct prci_clk_pll_sc *sc;
 
 	clk = clknode_create(parent_sc->clkdom, &prci_clk_pll_clknode_class,
 	    clkdef);
 	if (clk == NULL)
 		panic("Failed to create clknode");
 
 	sc = clknode_get_softc(clk);
 	sc->parent_sc = parent_sc;
+	sc->reg = reg;
 
 	clknode_register(parent_sc->clkdom, clk);
 }
 
 static int
 prci_attach(device_t dev)
 {
 	struct clknode_init_def clkdef;
 	struct prci_softc *sc;
 	clk_t clk_parent;
 	phandle_t node;
 	int i, ncells, error;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
 	mtx_init(&sc->mtx, device_get_nameunit(sc->dev), NULL, MTX_DEF);
 
 	error = bus_alloc_resources(dev, prci_spec, &sc->res);
 	if (error) {
 		device_printf(dev, "Couldn't allocate resources\n");
 		goto fail;
 	}
 	sc->bst = rman_get_bustag(sc->res);
 	sc->bsh = rman_get_bushandle(sc->res);
 
 	node = ofw_bus_get_node(dev);
 	error = ofw_bus_parse_xref_list_get_length(node, "clocks",
 	    "#clock-cells", &ncells);
-	if (error != 0 || ncells != 1) {
+	if (error != 0 || ncells < 1) {
 		device_printf(dev, "couldn't find parent clock\n");
 		goto fail;
 	}
 
 	bzero(&clkdef, sizeof(clkdef));
-	clkdef.id = 0;
-	clkdef.name = "coreclk";
 	clkdef.parent_names = mallocarray(ncells, sizeof(char *), M_OFWPROP,
 	    M_WAITOK);
 	for (i = 0; i < ncells; i++) {
 		error = clk_get_by_ofw_index(dev, 0, i, &clk_parent);
 		if (error != 0) {
 			device_printf(dev, "cannot get clock %d\n", error);
 			goto fail1;
 		}
 		clkdef.parent_names[i] = clk_get_name(clk_parent);
 		if (bootverbose)
 			device_printf(dev, "clk parent: %s\n",
 			    clkdef.parent_names[i]);
 		clk_release(clk_parent);
 	}
 	clkdef.parent_cnt = ncells;
 
 	sc->clkdom = clkdom_create(dev);
 	if (sc->clkdom == NULL) {
 		device_printf(dev, "Couldn't create clock domain\n");
 		goto fail;
 	}
 
 	/* We can't free a clkdom, so from now on we cannot fail. */
-	prci_pll_register(sc, &clkdef);
+	for (i = 0; i < nitems(pll_clks); i++) {
+		clkdef.id = pll_clks[i].id;
+		clkdef.name = pll_clks[i].name;
+		prci_pll_register(sc, &clkdef, pll_clks[i].reg);
+	}
+
+	/*
+	 * Register the fixed clock "tlclk".
+	 *
+	 * If an older device tree is being used, tlclk may appear as its own
+	 * entity in the device tree, under soc/tlclk. If this is the case it
+	 * will be registered automatically by the fixed_clk driver, and the
+	 * version we register here will be an unreferenced duplicate.
+	 */
+	clknode_fixed_register(sc->clkdom, &tlclk_def);
 
 	error = clkdom_finit(sc->clkdom);
 	if (error)
 		panic("Couldn't finalise clock domain");
 
 	return (0);
 
 fail1:
 	free(clkdef.parent_names, M_OFWPROP);
 
 fail:
 	bus_release_resources(dev, prci_spec, &sc->res);
 	mtx_destroy(&sc->mtx);
 	return (error);
 }
 
 static device_method_t prci_methods[] = {
 	DEVMETHOD(device_probe,		prci_probe),
 	DEVMETHOD(device_attach,	prci_attach),
 
 	DEVMETHOD_END
 };
 
 static driver_t prci_driver = {
 	"fu540prci",
 	prci_methods,
 	sizeof(struct prci_softc)
 };
 
 static devclass_t prci_devclass;
 
 EARLY_DRIVER_MODULE(fu540prci, simplebus, prci_driver, prci_devclass, 0, 0,
     BUS_PASS_BUS);
Index: projects/clang1000-import/sys/sys/syscallsubr.h
===================================================================
--- projects/clang1000-import/sys/sys/syscallsubr.h	(revision 357389)
+++ projects/clang1000-import/sys/sys/syscallsubr.h	(revision 357390)
@@ -1,329 +1,327 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002 Ian Dowse.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_SYSCALLSUBR_H_
 #define _SYS_SYSCALLSUBR_H_
 
 #include <sys/signal.h>
 #include <sys/socket.h>
 #include <sys/mac.h>
 #include <sys/mount.h>
 #include <sys/_cpuset.h>
 #include <sys/_domainset.h>
 #include <sys/_uio.h>
 
 struct __wrusage;
 struct file;
 struct filecaps;
 enum idtype;
 struct itimerval;
 struct image_args;
 struct jail;
 struct kevent;
 struct kevent_copyops;
 struct kld_file_stat;
 struct ksiginfo;
 struct mbuf;
 struct msghdr;
 struct msqid_ds;
 struct pollfd;
 struct ogetdirentries_args;
 struct rlimit;
 struct rusage;
 struct sched_param;
 union semun;
 struct sockaddr;
 struct stat;
 struct thr_param;
 struct uio;
 
 typedef int (*mmap_check_fp_fn)(struct file *, int, int, int);
 
-int	kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg,
-	    size_t buflen, size_t path_max);
 int	kern_accept(struct thread *td, int s, struct sockaddr **name,
 	    socklen_t *namelen, struct file **fp);
 int	kern_accept4(struct thread *td, int s, struct sockaddr **name,
 	    socklen_t *namelen, int flags, struct file **fp);
 int	kern_accessat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, int flags, int mode);
 int	kern_adjtime(struct thread *td, struct timeval *delta,
 	    struct timeval *olddelta);
 int	kern_alternate_path(struct thread *td, const char *prefix, const char *path,
 	    enum uio_seg pathseg, char **pathbuf, int create, int dirfd);
 int	kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa);
 int	kern_break(struct thread *td, uintptr_t *addr);
 int	kern_cap_ioctls_limit(struct thread *td, int fd, u_long *cmds,
 	    size_t ncmds);
 int	kern_cap_rights_limit(struct thread *td, int fd, cap_rights_t *rights);
 int	kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg);
 int	kern_clock_getcpuclockid2(struct thread *td, id_t id, int which,
 	    clockid_t *clk_id);
 int	kern_clock_getres(struct thread *td, clockid_t clock_id,
 	    struct timespec *ts);
 int	kern_clock_gettime(struct thread *td, clockid_t clock_id,
 	    struct timespec *ats);
 int	kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags,
 	    const struct timespec *rqtp, struct timespec *rmtp);
 int	kern_clock_settime(struct thread *td, clockid_t clock_id,
 	    struct timespec *ats);
 int	kern_close(struct thread *td, int fd);
 int	kern_connectat(struct thread *td, int dirfd, int fd,
 	    struct sockaddr *sa);
 int	kern_copy_file_range(struct thread *td, int infd, off_t *inoffp,
 	    int outfd, off_t *outoffp, size_t len, unsigned int flags);
 int	kern_cpuset_getaffinity(struct thread *td, cpulevel_t level,
 	    cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp);
 int	kern_cpuset_setaffinity(struct thread *td, cpulevel_t level,
 	    cpuwhich_t which, id_t id, size_t cpusetsize,
 	    const cpuset_t *maskp);
 int	kern_cpuset_getdomain(struct thread *td, cpulevel_t level,
 	    cpuwhich_t which, id_t id, size_t domainsetsize,
 	    domainset_t *maskp, int *policyp);
 int	kern_cpuset_setdomain(struct thread *td, cpulevel_t level,
 	    cpuwhich_t which, id_t id, size_t domainsetsize,
 	    const domainset_t *maskp, int policy);
 int	kern_cpuset_getid(struct thread *td, cpulevel_t level,
 	    cpuwhich_t which, id_t id, cpusetid_t *setid);
 int	kern_cpuset_setid(struct thread *td, cpuwhich_t which,
 	    id_t id, cpusetid_t setid);
 int	kern_dup(struct thread *td, u_int mode, int flags, int old, int new);
 int	kern_execve(struct thread *td, struct image_args *args,
 	    struct mac *mac_p);
 int	kern_fchmodat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, mode_t mode, int flag);
 int	kern_fchownat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, int uid, int gid, int flag);
 int	kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg);
 int	kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg);
 int	kern_fhstat(struct thread *td, fhandle_t fh, struct stat *buf);
 int	kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf);
 int	kern_fpathconf(struct thread *td, int fd, int name, long *valuep);
 int	kern_fstat(struct thread *td, int fd, struct stat *sbp);
 int	kern_fstatfs(struct thread *td, int fd, struct statfs *buf);
 int	kern_fsync(struct thread *td, int fd, bool fullsync);
 int	kern_ftruncate(struct thread *td, int fd, off_t length);
 int	kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 	    enum uio_seg tptrseg);
 int	kern_futimens(struct thread *td, int fd, struct timespec *tptr,
 	    enum uio_seg tptrseg);
 int	kern_getdirentries(struct thread *td, int fd, char *buf, size_t count,
 	    off_t *basep, ssize_t *residp, enum uio_seg bufseg);
 int	kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
 	    size_t *countp, enum uio_seg bufseg, int mode);
 int	kern_getitimer(struct thread *, u_int, struct itimerval *);
 int	kern_getppid(struct thread *);
 int	kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
 	    socklen_t *alen);
 int	kern_getpriority(struct thread *td, int which, int who);
 int	kern_getrusage(struct thread *td, int who, struct rusage *rup);
 int	kern_getsid(struct thread *td, pid_t pid);
 int	kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
 	    socklen_t *alen);
 int	kern_getsockopt(struct thread *td, int s, int level, int name,
 	    void *optval, enum uio_seg valseg, socklen_t *valsize);
 int	kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data);
 int	kern_jail(struct thread *td, struct jail *j);
 int	kern_jail_get(struct thread *td, struct uio *options, int flags);
 int	kern_jail_set(struct thread *td, struct uio *options, int flags);
 int	kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
 	    struct kevent_copyops *k_ops, const struct timespec *timeout);
 int	kern_kevent_anonymous(struct thread *td, int nevents,
 	    struct kevent_copyops *k_ops);
 int	kern_kevent_fp(struct thread *td, struct file *fp, int nchanges,
 	    int nevents, struct kevent_copyops *k_ops,
 	    const struct timespec *timeout);
 int	kern_kill(struct thread *td, pid_t pid, int signum);
 int	kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps);
 int	kern_kldload(struct thread *td, const char *file, int *fileid);
 int	kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat);
 int	kern_kldunload(struct thread *td, int fileid, int flags);
 int	kern_linkat(struct thread *td, int fd1, int fd2, const char *path1,
 	    const char *path2, enum uio_seg segflg, int follow);
 int	kern_listen(struct thread *td, int s, int backlog);
 int	kern_lseek(struct thread *td, int fd, off_t offset, int whence);
 int	kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg,
 	    struct timeval *tptr, enum uio_seg tptrseg);
 int	kern_madvise(struct thread *td, uintptr_t addr, size_t len, int behav);
 int	kern_mincore(struct thread *td, uintptr_t addr, size_t len, char *vec);
 int	kern_mkdirat(struct thread *td, int fd, const char *path,
 	    enum uio_seg segflg, int mode);
 int	kern_mkfifoat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, int mode);
 int	kern_mknodat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, int mode, dev_t dev);
 int	kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr,
 	    size_t len);
 int	kern_mmap(struct thread *td, uintptr_t addr, size_t len, int prot,
 	    int flags, int fd, off_t pos);
 int	kern_mmap_fpcheck(struct thread *td, uintptr_t addr, size_t len,
 	    int prot, int flags, int fd, off_t pos,
 	    mmap_check_fp_fn check_fp_fn);
 int	kern_mmap_maxprot(struct proc *p, int prot);
 int	kern_mprotect(struct thread *td, uintptr_t addr, size_t size, int prot);
 int	kern_msgctl(struct thread *, int, int, struct msqid_ds *);
 int	kern_msgrcv(struct thread *, int, void *, size_t, long, int, long *);
 int	kern_msgsnd(struct thread *, int, const void *, size_t, int, long);
 int	kern_msync(struct thread *td, uintptr_t addr, size_t size, int flags);
 int	kern_munlock(struct thread *td, uintptr_t addr, size_t size);
 int	kern_munmap(struct thread *td, uintptr_t addr, size_t size);
 int     kern_nanosleep(struct thread *td, struct timespec *rqt,
 	    struct timespec *rmt);
 int	kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 	    long *ploff);
 int	kern_openat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, int flags, int mode);
 int	kern_pathconf(struct thread *td, const char *path,
 	    enum uio_seg pathseg, int name, u_long flags, long *valuep);
 int	kern_pipe(struct thread *td, int fildes[2], int flags,
 	    struct filecaps *fcaps1, struct filecaps *fcaps2);
 int	kern_poll(struct thread *td, struct pollfd *fds, u_int nfds,
 	    struct timespec *tsp, sigset_t *uset);
 int	kern_posix_error(struct thread *td, int error);
 int	kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 	    int advice);
 int	kern_posix_fallocate(struct thread *td, int fd, off_t offset,
 	    off_t len);
 int	kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com,
 	    void *data);
 int	kern_pread(struct thread *td, int fd, void *buf, size_t nbyte,
 	    off_t offset);
 int	kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset);
 int	kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou,
 	    fd_set *ex, struct timeval *tvp, sigset_t *uset, int abi_nfdbits);
 int	kern_ptrace(struct thread *td, int req, pid_t pid, void *addr,
 	    int data);
 int	kern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte,
 	    off_t offset);
 int	kern_pwritev(struct thread *td, int fd, struct uio *auio, off_t offset);
 int	kern_readlinkat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count);
 int	kern_readv(struct thread *td, int fd, struct uio *auio);
 int	kern_recvit(struct thread *td, int s, struct msghdr *mp,
 	    enum uio_seg fromseg, struct mbuf **controlp);
 int	kern_renameat(struct thread *td, int oldfd, const char *old, int newfd,
 	    const char *new, enum uio_seg pathseg);
 int	kern_frmdirat(struct thread *td, int dfd, const char *path, int fd,
 	    enum uio_seg pathseg, int flag);
 int	kern_sched_getparam(struct thread *td, struct thread *targettd,
 	    struct sched_param *param);
 int	kern_sched_getscheduler(struct thread *td, struct thread *targettd,
 	    int *policy);
 int	kern_sched_setparam(struct thread *td, struct thread *targettd,
 	    struct sched_param *param);
 int	kern_sched_setscheduler(struct thread *td, struct thread *targettd,
 	    int policy, struct sched_param *param);
 int	kern_sched_rr_get_interval(struct thread *td, pid_t pid,
 	    struct timespec *ts);
 int	kern_sched_rr_get_interval_td(struct thread *td, struct thread *targettd,
 	    struct timespec *ts);
 int	kern_semctl(struct thread *td, int semid, int semnum, int cmd,
 	    union semun *arg, register_t *rval);
 int	kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
 	    fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits);
 int	kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
 	    struct mbuf *control, enum uio_seg segflg);
 int	kern_setgroups(struct thread *td, u_int ngrp, gid_t *groups);
 int	kern_setitimer(struct thread *, u_int, struct itimerval *,
 	    struct itimerval *);
 int	kern_setpriority(struct thread *td, int which, int who, int prio);
 int	kern_setrlimit(struct thread *, u_int, struct rlimit *);
 int	kern_setsockopt(struct thread *td, int s, int level, int name,
 	    const void *optval, enum uio_seg valseg, socklen_t valsize);
 int	kern_settimeofday(struct thread *td, struct timeval *tv,
 	    struct timezone *tzp);
 int	kern_shm_open(struct thread *td, const char *userpath, int flags,
 	    mode_t mode, struct filecaps *fcaps);
 int	kern_shm_open2(struct thread *td, const char *path, int flags,
 	    mode_t mode, int shmflags, struct filecaps *fcaps,
 	    const char *name);
 int	kern_shmat(struct thread *td, int shmid, const void *shmaddr,
 	    int shmflg);
 int	kern_shmctl(struct thread *td, int shmid, int cmd, void *buf,
 	    size_t *bufsz);
 int	kern_shutdown(struct thread *td, int s, int how);
 int	kern_sigaction(struct thread *td, int sig, const struct sigaction *act,
 	    struct sigaction *oact, int flags);
 int	kern_sigaltstack(struct thread *td, stack_t *ss, stack_t *oss);
 int	kern_sigprocmask(struct thread *td, int how,
 	    sigset_t *set, sigset_t *oset, int flags);
 int	kern_sigsuspend(struct thread *td, sigset_t mask);
 int	kern_sigtimedwait(struct thread *td, sigset_t waitset,
 	    struct ksiginfo *ksi, struct timespec *timeout);
 int	kern_sigqueue(struct thread *td, pid_t pid, int signum,
 	    union sigval *value);
 int	kern_socket(struct thread *td, int domain, int type, int protocol);
 int	kern_statat(struct thread *td, int flag, int fd, const char *path,
 	    enum uio_seg pathseg, struct stat *sbp,
 	    void (*hook)(struct vnode *vp, struct stat *sbp));
 int	kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg,
 	    struct statfs *buf);
 int	kern_symlinkat(struct thread *td, const char *path1, int fd,
 	    const char *path2, enum uio_seg segflg);
 int	kern_sync(struct thread *td);
 int	kern_ktimer_create(struct thread *td, clockid_t clock_id,
 	    struct sigevent *evp, int *timerid, int preset_id);
 int	kern_ktimer_delete(struct thread *, int);
 int	kern_ktimer_settime(struct thread *td, int timer_id, int flags,
 	    struct itimerspec *val, struct itimerspec *oval);
 int	kern_ktimer_gettime(struct thread *td, int timer_id,
 	    struct itimerspec *val);
 int	kern_ktimer_getoverrun(struct thread *td, int timer_id);
 int	kern_thr_alloc(struct proc *, int pages, struct thread **);
 int	kern_thr_exit(struct thread *td);
 int	kern_thr_new(struct thread *td, struct thr_param *param);
 int	kern_thr_suspend(struct thread *td, struct timespec *tsp);
 int	kern_truncate(struct thread *td, const char *path,
 	    enum uio_seg pathseg, off_t length);
 int	kern_funlinkat(struct thread *td, int dfd, const char *path, int fd,
 	    enum uio_seg pathseg, int flag, ino_t oldinum);
 int	kern_utimesat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg);
 int	kern_utimensat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg,
 	    int follow);
 int	kern_wait(struct thread *td, pid_t pid, int *status, int options,
 	    struct rusage *rup);
 int	kern_wait6(struct thread *td, enum idtype idtype, id_t id, int *status,
 	    int options, struct __wrusage *wrup, siginfo_t *sip);
 int	kern_writev(struct thread *td, int fd, struct uio *auio);
 int	kern_socketpair(struct thread *td, int domain, int type, int protocol,
 	    int *rsv);
 int	kern_unmount(struct thread *td, const char *path, int flags);
 
 /* flags for kern_sigaction */
 #define	KSA_OSIGSET	0x0001	/* uses osigact_t */
 #define	KSA_FREEBSD4	0x0002	/* uses ucontext4 */
 
 struct freebsd11_dirent;
 
 int	freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int
 	    count, long *basep, void (*func)(struct freebsd11_dirent *));
 
 #endif /* !_SYS_SYSCALLSUBR_H_ */
Index: projects/clang1000-import/sys/sys/vnode.h
===================================================================
--- projects/clang1000-import/sys/sys/vnode.h	(revision 357389)
+++ projects/clang1000-import/sys/sys/vnode.h	(revision 357390)
@@ -1,979 +1,980 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vnode.h	8.7 (Berkeley) 2/4/94
  * $FreeBSD$
  */
 
 #ifndef _SYS_VNODE_H_
 #define	_SYS_VNODE_H_
 
 #include <sys/bufobj.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/lockmgr.h>
 #include <sys/mutex.h>
 #include <sys/rangelock.h>
 #include <sys/selinfo.h>
 #include <sys/uio.h>
 #include <sys/acl.h>
 #include <sys/ktr.h>
 
 /*
  * The vnode is the focus of all file activity in UNIX.  There is a
  * unique vnode allocated for each active file, each current directory,
  * each mounted-on file, text file, and the root.
  */
 
 /*
  * Vnode types.  VNON means no type.
  */
 enum vtype	{ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD,
 		  VMARKER };
 
 enum vgetstate	{ VGET_HOLDCNT, VGET_USECOUNT };
 /*
  * Each underlying filesystem allocates its own private area and hangs
  * it from v_data.  If non-null, this area is freed in getnewvnode().
  */
 
 struct namecache;
 
 struct vpollinfo {
 	struct	mtx vpi_lock;		/* lock to protect below */
 	struct	selinfo vpi_selinfo;	/* identity of poller(s) */
 	short	vpi_events;		/* what they are looking for */
 	short	vpi_revents;		/* what has happened */
 };
 
 /*
  * Reading or writing any of these items requires holding the appropriate lock.
  *
  * Lock reference:
  *	c - namecache mutex
  *	i - interlock
  *	l - mp mnt_listmtx or freelist mutex
  *	I - updated with atomics, 0->1 and 1->0 transitions with interlock held
  *	m - mount point interlock
  *	p - pollinfo lock
  *	u - Only a reference to the vnode is needed to read.
  *	v - vnode lock
  *
  * Vnodes may be found on many lists.  The general way to deal with operating
  * on a vnode that is on a list is:
  *	1) Lock the list and find the vnode.
  *	2) Lock interlock so that the vnode does not go away.
  *	3) Unlock the list to avoid lock order reversals.
  *	4) vget with LK_INTERLOCK and check for ENOENT, or
  *	5) Check for DOOMED if the vnode lock is not required.
  *	6) Perform your operation, then vput().
  */
 
 #if defined(_KERNEL) || defined(_KVM_VNODE)
 
 struct vnode {
 	/*
 	 * Fields which define the identity of the vnode.  These fields are
 	 * owned by the filesystem (XXX: and vgone() ?)
 	 */
 	enum	vtype v_type:8;			/* u vnode type */
 	short	v_irflag;			/* i frequently read flags */
 	struct	vop_vector *v_op;		/* u vnode operations vector */
 	void	*v_data;			/* u private data for fs */
 
 	/*
 	 * Filesystem instance stuff
 	 */
 	struct	mount *v_mount;			/* u ptr to vfs we are in */
 	TAILQ_ENTRY(vnode) v_nmntvnodes;	/* m vnodes for mount point */
 
 	/*
 	 * Type specific fields, only one applies to any given vnode.
 	 */
 	union {
 		struct mount	*v_mountedhere;	/* v ptr to mountpoint (VDIR) */
 		struct unpcb	*v_unpcb;	/* v unix domain net (VSOCK) */
 		struct cdev	*v_rdev; 	/* v device (VCHR, VBLK) */
 		struct fifoinfo	*v_fifoinfo;	/* v fifo (VFIFO) */
 	};
 
 	/*
 	 * vfs_hash: (mount + inode) -> vnode hash.  The hash value
 	 * itself is grouped with other int fields, to avoid padding.
 	 */
 	LIST_ENTRY(vnode)	v_hashlist;
 
 	/*
 	 * VFS_namecache stuff
 	 */
 	LIST_HEAD(, namecache) v_cache_src;	/* c Cache entries from us */
 	TAILQ_HEAD(, namecache) v_cache_dst;	/* c Cache entries to us */
 	struct namecache *v_cache_dd;		/* c Cache entry for .. vnode */
 
 	/*
 	 * Locking
 	 */
 	struct	lock v_lock;			/* u (if fs don't have one) */
 	struct	mtx v_interlock;		/* lock for "i" things */
 	struct	lock *v_vnlock;			/* u pointer to vnode lock */
 
 	/*
 	 * The machinery of being a vnode
 	 */
 	TAILQ_ENTRY(vnode) v_vnodelist;		/* l vnode lists */
 	TAILQ_ENTRY(vnode) v_lazylist;		/* l vnode lazy list */
 	struct bufobj	v_bufobj;		/* * Buffer cache object */
 
 	/*
 	 * Hooks for various subsystems and features.
 	 */
 	struct vpollinfo *v_pollinfo;		/* i Poll events, p for *v_pi */
 	struct label *v_label;			/* MAC label for vnode */
 	struct lockf *v_lockf;		/* Byte-level advisory lock list */
 	struct rangelock v_rl;			/* Byte-range lock */
 
 	/*
 	 * clustering stuff
 	 */
 	daddr_t	v_cstart;			/* v start block of cluster */
 	daddr_t	v_lasta;			/* v last allocation  */
 	daddr_t	v_lastw;			/* v last write  */
 	int	v_clen;				/* v length of cur. cluster */
 
 	u_int	v_holdcnt;			/* I prevents recycling. */
 	u_int	v_usecount;			/* I ref count of users */
 	u_int	v_iflag;			/* i vnode flags (see below) */
 	u_int	v_vflag;			/* v vnode flags */
 	u_short	v_mflag;			/* l mnt-specific vnode flags */
 	short	v_dbatchcpu;			/* i LRU requeue deferral batch */
 	int	v_writecount;			/* I ref count of writers or
 						   (negative) text users */
 	u_int	v_hash;
 };
 
 #endif /* defined(_KERNEL) || defined(_KVM_VNODE) */
 
 #define	bo2vnode(bo)	__containerof((bo), struct vnode, v_bufobj)
 
 /* XXX: These are temporary to avoid a source sweep at this time */
 #define v_object	v_bufobj.bo_object
 
 /*
  * Userland version of struct vnode, for sysctl.
  */
 struct xvnode {
 	size_t	xv_size;			/* sizeof(struct xvnode) */
 	void	*xv_vnode;			/* address of real vnode */
 	u_long	xv_flag;			/* vnode vflags */
 	int	xv_usecount;			/* reference count of users */
 	int	xv_writecount;			/* reference count of writers */
 	int	xv_holdcnt;			/* page & buffer references */
 	u_long	xv_id;				/* capability identifier */
 	void	*xv_mount;			/* address of parent mount */
 	long	xv_numoutput;			/* num of writes in progress */
 	enum	vtype xv_type;			/* vnode type */
 	union {
 		void	*xvu_socket;		/* unpcb, if VSOCK */
 		void	*xvu_fifo;		/* fifo, if VFIFO */
 		dev_t	xvu_rdev;		/* maj/min, if VBLK/VCHR */
 		struct {
 			dev_t	xvu_dev;	/* device, if VDIR/VREG/VLNK */
 			ino_t	xvu_ino;	/* id, if VDIR/VREG/VLNK */
 		} xv_uns;
 	} xv_un;
 };
 #define xv_socket	xv_un.xvu_socket
 #define xv_fifo		xv_un.xvu_fifo
 #define xv_rdev		xv_un.xvu_rdev
 #define xv_dev		xv_un.xv_uns.xvu_dev
 #define xv_ino		xv_un.xv_uns.xvu_ino
 
 /* We don't need to lock the knlist */
 #define	VN_KNLIST_EMPTY(vp) ((vp)->v_pollinfo == NULL ||	\
 	    KNLIST_EMPTY(&(vp)->v_pollinfo->vpi_selinfo.si_note))
 
 #define VN_KNOTE(vp, b, a)					\
 	do {							\
 		if (!VN_KNLIST_EMPTY(vp))			\
 			KNOTE(&vp->v_pollinfo->vpi_selinfo.si_note, (b), \
 			    (a) | KNF_NOKQLOCK);		\
 	} while (0)
 #define	VN_KNOTE_LOCKED(vp, b)		VN_KNOTE(vp, b, KNF_LISTLOCKED)
 #define	VN_KNOTE_UNLOCKED(vp, b)	VN_KNOTE(vp, b, 0)
 
 /*
  * Vnode flags.
  *	VI flags are protected by interlock and live in v_iflag
  *	VV flags are protected by the vnode lock and live in v_vflag
  *
  *	VIRF_DOOMED is doubly protected by the interlock and vnode lock.  Both
  *	are required for writing but the status may be checked with either.
  */
 #define	VIRF_DOOMED	0x0001	/* This vnode is being recycled */
 
 #define	VI_TEXT_REF	0x0001	/* Text ref grabbed use ref */
 #define	VI_MOUNT	0x0020	/* Mount in progress */
 #define	VI_DOINGINACT	0x0800	/* VOP_INACTIVE is in progress */
 #define	VI_OWEINACT	0x1000	/* Need to call inactive */
 #define	VI_DEFINACT	0x2000	/* deferred inactive */
 
 #define	VV_ROOT		0x0001	/* root of its filesystem */
 #define	VV_ISTTY	0x0002	/* vnode represents a tty */
 #define	VV_NOSYNC	0x0004	/* unlinked, stop syncing */
 #define	VV_ETERNALDEV	0x0008	/* device that is never destroyed */
 #define	VV_CACHEDLABEL	0x0010	/* Vnode has valid cached MAC label */
 #define	VV_VMSIZEVNLOCK	0x0020	/* object size check requires vnode lock */
 #define	VV_COPYONWRITE	0x0040	/* vnode is doing copy-on-write */
 #define	VV_SYSTEM	0x0080	/* vnode being used by kernel */
 #define	VV_PROCDEP	0x0100	/* vnode is process dependent */
 #define	VV_NOKNOTE	0x0200	/* don't activate knotes on this vnode */
 #define	VV_DELETED	0x0400	/* should be removed */
 #define	VV_MD		0x0800	/* vnode backs the md device */
 #define	VV_FORCEINSMQ	0x1000	/* force the insmntque to succeed */
 #define	VV_READLINK	0x2000	/* fdescfs linux vnode */
 
 #define	VMP_LAZYLIST	0x0001	/* Vnode is on mnt's lazy list */
 
 /*
  * Vnode attributes.  A field value of VNOVAL represents a field whose value
  * is unavailable (getattr) or which is not to be changed (setattr).
  */
 struct vattr {
 	enum vtype	va_type;	/* vnode type (for create) */
 	u_short		va_mode;	/* files access mode and type */
 	u_short		va_padding0;
 	uid_t		va_uid;		/* owner user id */
 	gid_t		va_gid;		/* owner group id */
 	nlink_t		va_nlink;	/* number of references to file */
 	dev_t		va_fsid;	/* filesystem id */
 	ino_t		va_fileid;	/* file id */
 	u_quad_t	va_size;	/* file size in bytes */
 	long		va_blocksize;	/* blocksize preferred for i/o */
 	struct timespec	va_atime;	/* time of last access */
 	struct timespec	va_mtime;	/* time of last modification */
 	struct timespec	va_ctime;	/* time file changed */
 	struct timespec	va_birthtime;	/* time file created */
 	u_long		va_gen;		/* generation number of file */
 	u_long		va_flags;	/* flags defined for file */
 	dev_t		va_rdev;	/* device the special file represents */
 	u_quad_t	va_bytes;	/* bytes of disk space held by file */
 	u_quad_t	va_filerev;	/* file modification number */
 	u_int		va_vaflags;	/* operations flags, see below */
 	long		va_spare;	/* remain quad aligned */
 };
 
 /*
  * Flags for va_vaflags.
  */
 #define	VA_UTIMES_NULL	0x01		/* utimes argument was NULL */
 #define	VA_EXCLUSIVE	0x02		/* exclusive create request */
 #define	VA_SYNC		0x04		/* O_SYNC truncation */
 
 /*
  * Flags for ioflag. (high 16 bits used to ask for read-ahead and
  * help with write clustering)
  * NB: IO_NDELAY and IO_DIRECT are linked to fcntl.h
  */
 #define	IO_UNIT		0x0001		/* do I/O as atomic unit */
 #define	IO_APPEND	0x0002		/* append write to end */
 #define	IO_NDELAY	0x0004		/* FNDELAY flag set in file table */
 #define	IO_NODELOCKED	0x0008		/* underlying node already locked */
 #define	IO_ASYNC	0x0010		/* bawrite rather then bdwrite */
 #define	IO_VMIO		0x0020		/* data already in VMIO space */
 #define	IO_INVAL	0x0040		/* invalidate after I/O */
 #define	IO_SYNC		0x0080		/* do I/O synchronously */
 #define	IO_DIRECT	0x0100		/* attempt to bypass buffer cache */
 #define	IO_NOREUSE	0x0200		/* VMIO data won't be reused */
 #define	IO_EXT		0x0400		/* operate on external attributes */
 #define	IO_NORMAL	0x0800		/* operate on regular data */
 #define	IO_NOMACCHECK	0x1000		/* MAC checks unnecessary */
 #define	IO_BUFLOCKED	0x2000		/* ffs flag; indir buf is locked */
 #define	IO_RANGELOCKED	0x4000		/* range locked */
 
 #define IO_SEQMAX	0x7F		/* seq heuristic max value */
 #define IO_SEQSHIFT	16		/* seq heuristic in upper 16 bits */
 
 /*
  * Flags for accmode_t.
  */
 #define	VEXEC			000000000100 /* execute/search permission */
 #define	VWRITE			000000000200 /* write permission */
 #define	VREAD			000000000400 /* read permission */
 #define	VADMIN			000000010000 /* being the file owner */
 #define	VAPPEND			000000040000 /* permission to write/append */
 /*
  * VEXPLICIT_DENY makes VOP_ACCESSX(9) return EPERM or EACCES only
  * if permission was denied explicitly, by a "deny" rule in NFSv4 ACL,
  * and 0 otherwise.  This never happens with ordinary unix access rights
  * or POSIX.1e ACLs.  Obviously, VEXPLICIT_DENY must be OR-ed with
  * some other V* constant.
  */
 #define	VEXPLICIT_DENY		000000100000
 #define	VREAD_NAMED_ATTRS 	000000200000 /* not used */
 #define	VWRITE_NAMED_ATTRS 	000000400000 /* not used */
 #define	VDELETE_CHILD	 	000001000000
 #define	VREAD_ATTRIBUTES 	000002000000 /* permission to stat(2) */
 #define	VWRITE_ATTRIBUTES 	000004000000 /* change {m,c,a}time */
 #define	VDELETE		 	000010000000
 #define	VREAD_ACL	 	000020000000 /* read ACL and file mode */
 #define	VWRITE_ACL	 	000040000000 /* change ACL and/or file mode */
 #define	VWRITE_OWNER	 	000100000000 /* change file owner */
 #define	VSYNCHRONIZE	 	000200000000 /* not used */
 #define	VCREAT			000400000000 /* creating new file */
 #define	VVERIFY			001000000000 /* verification required */
 
 /*
  * Permissions that were traditionally granted only to the file owner.
  */
 #define VADMIN_PERMS	(VADMIN | VWRITE_ATTRIBUTES | VWRITE_ACL | \
     VWRITE_OWNER)
 
 /*
  * Permissions that were traditionally granted to everyone.
  */
 #define VSTAT_PERMS	(VREAD_ATTRIBUTES | VREAD_ACL)
 
 /*
  * Permissions that allow to change the state of the file in any way.
  */
 #define VMODIFY_PERMS	(VWRITE | VAPPEND | VADMIN_PERMS | VDELETE_CHILD | \
     VDELETE)
 
 /*
  * Token indicating no attribute value yet assigned.
  */
 #define	VNOVAL	(-1)
 
 /*
  * LK_TIMELOCK timeout for vnode locks (used mainly by the pageout daemon)
  */
 #define VLKTIMEOUT	(hz / 20 + 1)
 
 #ifdef _KERNEL
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_VNODE);
 #endif
 
 extern u_int ncsizefactor;
 
 /*
  * Convert between vnode types and inode formats (since POSIX.1
  * defines mode word of stat structure in terms of inode formats).
  */
 extern enum vtype	iftovt_tab[];
 extern int		vttoif_tab[];
 #define	IFTOVT(mode)	(iftovt_tab[((mode) & S_IFMT) >> 12])
 #define	VTTOIF(indx)	(vttoif_tab[(int)(indx)])
 #define	MAKEIMODE(indx, mode)	(int)(VTTOIF(indx) | (mode))
 
 /*
  * Flags to various vnode functions.
  */
 #define	SKIPSYSTEM	0x0001	/* vflush: skip vnodes marked VSYSTEM */
 #define	FORCECLOSE	0x0002	/* vflush: force file closure */
 #define	WRITECLOSE	0x0004	/* vflush: only close writable files */
 #define	EARLYFLUSH	0x0008	/* vflush: early call for ffs_flushfiles */
 #define	V_SAVE		0x0001	/* vinvalbuf: sync file first */
 #define	V_ALT		0x0002	/* vinvalbuf: invalidate only alternate bufs */
 #define	V_NORMAL	0x0004	/* vinvalbuf: invalidate only regular bufs */
 #define	V_CLEANONLY	0x0008	/* vinvalbuf: invalidate only clean bufs */
 #define	V_VMIO		0x0010	/* vinvalbuf: called during pageout */
 #define	V_ALLOWCLEAN	0x0020	/* vinvalbuf: allow clean buffers after flush */
 #define	REVOKEALL	0x0001	/* vop_revoke: revoke all aliases */
 #define	V_WAIT		0x0001	/* vn_start_write: sleep for suspend */
 #define	V_NOWAIT	0x0002	/* vn_start_write: don't sleep for suspend */
 #define	V_XSLEEP	0x0004	/* vn_start_write: just return after sleep */
 #define	V_MNTREF	0x0010	/* vn_start_write: mp is already ref-ed */
 
 #define	VR_START_WRITE	0x0001	/* vfs_write_resume: start write atomically */
 #define	VR_NO_SUSPCLR	0x0002	/* vfs_write_resume: do not clear suspension */
 
 #define	VS_SKIP_UNMOUNT	0x0001	/* vfs_write_suspend: fail if the
 				   filesystem is being unmounted */
 
 #define	VREF(vp)	vref(vp)
 
 #ifdef DIAGNOSTIC
 #define	VATTR_NULL(vap)	vattr_null(vap)
 #else
 #define	VATTR_NULL(vap)	(*(vap) = va_null)	/* initialize a vattr */
 #endif /* DIAGNOSTIC */
 
 #define	NULLVP	((struct vnode *)NULL)
 
 /*
  * Global vnode data.
  */
 extern	struct vnode *rootvnode;	/* root (i.e. "/") vnode */
 extern	struct mount *rootdevmp;	/* "/dev" mount */
 extern	u_long desiredvnodes;		/* number of vnodes desired */
 extern	struct uma_zone *namei_zone;
 extern	struct vattr va_null;		/* predefined null vattr structure */
 
 #define	VI_LOCK(vp)	mtx_lock(&(vp)->v_interlock)
 #define	VI_LOCK_FLAGS(vp, flags) mtx_lock_flags(&(vp)->v_interlock, (flags))
 #define	VI_TRYLOCK(vp)	mtx_trylock(&(vp)->v_interlock)
 #define	VI_UNLOCK(vp)	mtx_unlock(&(vp)->v_interlock)
 #define	VI_MTX(vp)	(&(vp)->v_interlock)
 
 #define	VN_LOCK_AREC(vp)	lockallowrecurse((vp)->v_vnlock)
 #define	VN_LOCK_ASHARE(vp)	lockallowshare((vp)->v_vnlock)
 #define	VN_LOCK_DSHARE(vp)	lockdisableshare((vp)->v_vnlock)
 
 #endif /* _KERNEL */
 
 /*
  * Mods for extensibility.
  */
 
 /*
  * Flags for vdesc_flags:
  */
 #define	VDESC_MAX_VPS		16
 /* Low order 16 flag bits are reserved for willrele flags for vp arguments. */
 #define	VDESC_VP0_WILLRELE	0x0001
 #define	VDESC_VP1_WILLRELE	0x0002
 #define	VDESC_VP2_WILLRELE	0x0004
 #define	VDESC_VP3_WILLRELE	0x0008
 #define	VDESC_VPP_WILLRELE	0x0200
 
 /*
  * A generic structure.
  * This can be used by bypass routines to identify generic arguments.
  */
 struct vop_generic_args {
 	struct vnodeop_desc *a_desc;
 	/* other random data follows, presumably */
 };
 
 typedef int vop_bypass_t(struct vop_generic_args *);
 
 /*
  * VDESC_NO_OFFSET is used to identify the end of the offset list
  * and in places where no such field exists.
  */
 #define VDESC_NO_OFFSET -1
 
 /*
  * This structure describes the vnode operation taking place.
  */
 struct vnodeop_desc {
 	char	*vdesc_name;		/* a readable name for debugging */
 	int	 vdesc_flags;		/* VDESC_* flags */
 	int	vdesc_vop_offset;
 	vop_bypass_t	*vdesc_call;	/* Function to call */
 
 	/*
 	 * These ops are used by bypass routines to map and locate arguments.
 	 * Creds and procs are not needed in bypass routines, but sometimes
 	 * they are useful to (for example) transport layers.
 	 * Nameidata is useful because it has a cred in it.
 	 */
 	int	*vdesc_vp_offsets;	/* list ended by VDESC_NO_OFFSET */
 	int	vdesc_vpp_offset;	/* return vpp location */
 	int	vdesc_cred_offset;	/* cred location, if any */
 	int	vdesc_thread_offset;	/* thread location, if any */
 	int	vdesc_componentname_offset; /* if any */
 };
 
 #ifdef _KERNEL
 /*
  * A list of all the operation descs.
  */
 extern struct vnodeop_desc *vnodeop_descs[];
 
 #define	VOPARG_OFFSETOF(s_type, field)	__offsetof(s_type, field)
 #define	VOPARG_OFFSETTO(s_type, s_offset, struct_p) \
     ((s_type)(((char*)(struct_p)) + (s_offset)))
 
 
 #ifdef DEBUG_VFS_LOCKS
 /*
  * Support code to aid in debugging VFS locking problems.  Not totally
  * reliable since if the thread sleeps between changing the lock
  * state and checking it with the assert, some other thread could
  * change the state.  They are good enough for debugging a single
  * filesystem using a single-threaded test.  Note that the unreliability is
  * limited to false negatives; efforts were made to ensure that false
  * positives cannot occur.
  */
 void	assert_vi_locked(struct vnode *vp, const char *str);
 void	assert_vi_unlocked(struct vnode *vp, const char *str);
 void	assert_vop_elocked(struct vnode *vp, const char *str);
 void	assert_vop_locked(struct vnode *vp, const char *str);
 void	assert_vop_unlocked(struct vnode *vp, const char *str);
 
 #define	ASSERT_VI_LOCKED(vp, str)	assert_vi_locked((vp), (str))
 #define	ASSERT_VI_UNLOCKED(vp, str)	assert_vi_unlocked((vp), (str))
 #define	ASSERT_VOP_ELOCKED(vp, str)	assert_vop_elocked((vp), (str))
 #define	ASSERT_VOP_LOCKED(vp, str)	assert_vop_locked((vp), (str))
 #define	ASSERT_VOP_UNLOCKED(vp, str)	assert_vop_unlocked((vp), (str))
 
 #else /* !DEBUG_VFS_LOCKS */
 
 #define	ASSERT_VI_LOCKED(vp, str)	((void)0)
 #define	ASSERT_VI_UNLOCKED(vp, str)	((void)0)
 #define	ASSERT_VOP_ELOCKED(vp, str)	((void)0)
 #define	ASSERT_VOP_LOCKED(vp, str)	((void)0)
 #define	ASSERT_VOP_UNLOCKED(vp, str)	((void)0)
 #endif /* DEBUG_VFS_LOCKS */
 
 
 /*
  * This call works for vnodes in the kernel.
  */
 #define VCALL(c) ((c)->a_desc->vdesc_call(c))
 
 #define DOINGASYNC(vp)	   					\
 	(((vp)->v_mount->mnt_kern_flag & MNTK_ASYNC) != 0 &&	\
 	 ((curthread->td_pflags & TDP_SYNCIO) == 0))
 
 /*
  * VMIO support inline
  */
 
 extern int vmiodirenable;
 
 static __inline int
 vn_canvmio(struct vnode *vp)
 {
       if (vp && (vp->v_type == VREG || (vmiodirenable && vp->v_type == VDIR)))
 		return(TRUE);
 	return(FALSE);
 }
 
 /*
  * Finally, include the default set of vnode operations.
  */
 typedef void vop_getpages_iodone_t(void *, vm_page_t *, int, int);
 #include "vnode_if.h"
 
 /* vn_open_flags */
 #define	VN_OPEN_NOAUDIT		0x00000001
 #define	VN_OPEN_NOCAPCHECK	0x00000002
 #define	VN_OPEN_NAMECACHE	0x00000004
 #define	VN_OPEN_INVFS		0x00000008
 
 /*
  * Public vnode manipulation functions.
  */
 struct componentname;
 struct file;
 struct mount;
 struct nameidata;
 struct ostat;
 struct freebsd11_stat;
 struct thread;
 struct proc;
 struct stat;
 struct nstat;
 struct ucred;
 struct uio;
 struct vattr;
 struct vfsops;
 struct vnode;
 
 typedef int (*vn_get_ino_t)(struct mount *, void *, int, struct vnode **);
 
 int	bnoreuselist(struct bufv *bufv, struct bufobj *bo, daddr_t startn,
 	    daddr_t endn);
 /* cache_* may belong in namei.h. */
 void	cache_changesize(u_long newhashsize);
 #define	cache_enter(dvp, vp, cnp)					\
 	cache_enter_time(dvp, vp, cnp, NULL, NULL)
 void	cache_enter_time(struct vnode *dvp, struct vnode *vp,
 	    struct componentname *cnp, struct timespec *tsp,
 	    struct timespec *dtsp);
 int	cache_lookup(struct vnode *dvp, struct vnode **vpp,
 	    struct componentname *cnp, struct timespec *tsp, int *ticksp);
 void	cache_purge(struct vnode *vp);
 void	cache_purge_negative(struct vnode *vp);
 void	cache_purgevfs(struct mount *mp, bool force);
 int	change_dir(struct vnode *vp, struct thread *td);
 void	cvtstat(struct stat *st, struct ostat *ost);
 void	freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb);
 int	freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost);
 int	getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
 	    struct vnode **vpp);
 void	getnewvnode_reserve(void);
 void	getnewvnode_drop_reserve(void);
 int	insmntque1(struct vnode *vp, struct mount *mp,
 	    void (*dtr)(struct vnode *, void *), void *dtr_arg);
 int	insmntque(struct vnode *vp, struct mount *mp);
 u_quad_t init_va_filerev(void);
 int	speedup_syncer(void);
 int	vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf,
-	    u_int *buflen);
+	    size_t *buflen);
+int	vn_getcwd(struct thread *td, char *buf, char **retbuf, size_t *buflen);
 int	vn_fullpath(struct thread *td, struct vnode *vn,
 	    char **retbuf, char **freebuf);
 int	vn_fullpath_global(struct thread *td, struct vnode *vn,
 	    char **retbuf, char **freebuf);
 struct vnode *
 	vn_dir_dd_ino(struct vnode *vp);
 int	vn_commname(struct vnode *vn, char *buf, u_int buflen);
 int	vn_path_to_global_path(struct thread *td, struct vnode *vp,
 	    char *path, u_int pathlen);
 int	vaccess(enum vtype type, mode_t file_mode, uid_t file_uid,
 	    gid_t file_gid, accmode_t accmode, struct ucred *cred,
 	    int *privused);
 int	vaccess_acl_nfs4(enum vtype type, uid_t file_uid, gid_t file_gid,
 	    struct acl *aclp, accmode_t accmode, struct ucred *cred,
 	    int *privused);
 int	vaccess_acl_posix1e(enum vtype type, uid_t file_uid,
 	    gid_t file_gid, struct acl *acl, accmode_t accmode,
 	    struct ucred *cred, int *privused);
 void	vattr_null(struct vattr *vap);
 int	vcount(struct vnode *vp);
 void	vlazy(struct vnode *);
 void	vdrop(struct vnode *);
 void	vdropl(struct vnode *);
 int	vflush(struct mount *mp, int rootrefs, int flags, struct thread *td);
 int	vget(struct vnode *vp, int flags, struct thread *td);
 enum vgetstate	vget_prep(struct vnode *vp);
 int	vget_finish(struct vnode *vp, int flags, enum vgetstate vs);
 void	vgone(struct vnode *vp);
 void	vhold(struct vnode *);
 void	vholdl(struct vnode *);
 void	vholdnz(struct vnode *);
 void	vinactive(struct vnode *vp);
 int	vinvalbuf(struct vnode *vp, int save, int slpflag, int slptimeo);
 int	vtruncbuf(struct vnode *vp, off_t length, int blksize);
 void	v_inval_buf_range(struct vnode *vp, daddr_t startlbn, daddr_t endlbn,
 	    int blksize);
 void	vunref(struct vnode *);
 void	vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3);
 int	vrecycle(struct vnode *vp);
 int	vrecyclel(struct vnode *vp);
 int	vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off,
 	    struct ucred *cred);
 int	vn_close(struct vnode *vp,
 	    int flags, struct ucred *file_cred, struct thread *td);
 int	vn_copy_file_range(struct vnode *invp, off_t *inoffp,
 	    struct vnode *outvp, off_t *outoffp, size_t *lenp,
 	    unsigned int flags, struct ucred *incred, struct ucred *outcred,
 	    struct thread *fsize_td);
 void	vn_finished_write(struct mount *mp);
 void	vn_finished_secondary_write(struct mount *mp);
 int	vn_fsync_buf(struct vnode *vp, int waitfor);
 int	vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp,
 	    struct vnode *outvp, off_t *outoffp, size_t *lenp,
 	    unsigned int flags, struct ucred *incred, struct ucred *outcred,
 	    struct thread *fsize_td);
 int	vn_need_pageq_flush(struct vnode *vp);
 int	vn_isdisk(struct vnode *vp, int *errp);
 int	_vn_lock(struct vnode *vp, int flags, char *file, int line);
 #define vn_lock(vp, flags) _vn_lock(vp, flags, __FILE__, __LINE__)
 int	vn_open(struct nameidata *ndp, int *flagp, int cmode, struct file *fp);
 int	vn_open_cred(struct nameidata *ndp, int *flagp, int cmode,
 	    u_int vn_open_flags, struct ucred *cred, struct file *fp);
 int	vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred,
 	    struct thread *td, struct file *fp);
 void	vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end);
 int	vn_pollrecord(struct vnode *vp, struct thread *p, int events);
 int	vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base,
 	    int len, off_t offset, enum uio_seg segflg, int ioflg,
 	    struct ucred *active_cred, struct ucred *file_cred, ssize_t *aresid,
 	    struct thread *td);
 int	vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, void *base,
 	    size_t len, off_t offset, enum uio_seg segflg, int ioflg,
 	    struct ucred *active_cred, struct ucred *file_cred, size_t *aresid,
 	    struct thread *td);
 int	vn_rlimit_fsize(const struct vnode *vn, const struct uio *uio,
 	    struct thread *td);
 int	vn_stat(struct vnode *vp, struct stat *sb, struct ucred *active_cred,
 	    struct ucred *file_cred, struct thread *td);
 int	vn_start_write(struct vnode *vp, struct mount **mpp, int flags);
 int	vn_start_secondary_write(struct vnode *vp, struct mount **mpp,
 	    int flags);
 int	vn_truncate_locked(struct vnode *vp, off_t length, bool sync,
 	    struct ucred *cred);
 int	vn_writechk(struct vnode *vp);
 int	vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
 	    const char *attrname, int *buflen, char *buf, struct thread *td);
 int	vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
 	    const char *attrname, int buflen, char *buf, struct thread *td);
 int	vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
 	    const char *attrname, struct thread *td);
 int	vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags,
 	    struct vnode **rvp);
 int	vn_vget_ino_gen(struct vnode *vp, vn_get_ino_t alloc,
 	    void *alloc_arg, int lkflags, struct vnode **rvp);
 int	vn_utimes_perm(struct vnode *vp, struct vattr *vap,
 	    struct ucred *cred, struct thread *td);
 
 int	vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio);
 int	vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize,
 	    struct uio *uio);
 
 #define	vn_rangelock_unlock(vp, cookie)					\
 	rangelock_unlock(&(vp)->v_rl, (cookie), VI_MTX(vp))
 #define	vn_rangelock_unlock_range(vp, cookie, start, end)		\
 	rangelock_unlock_range(&(vp)->v_rl, (cookie), (start), (end), 	\
 	    VI_MTX(vp))
 #define	vn_rangelock_rlock(vp, start, end)				\
 	rangelock_rlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
 #define	vn_rangelock_tryrlock(vp, start, end)				\
 	rangelock_tryrlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
 #define	vn_rangelock_wlock(vp, start, end)				\
 	rangelock_wlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
 #define	vn_rangelock_trywlock(vp, start, end)				\
 	rangelock_trywlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
 
 int	vfs_cache_lookup(struct vop_lookup_args *ap);
 int	vfs_cache_root(struct mount *mp, int flags, struct vnode **vpp);
 void	vfs_timestamp(struct timespec *);
 void	vfs_write_resume(struct mount *mp, int flags);
 int	vfs_write_suspend(struct mount *mp, int flags);
 int	vfs_write_suspend_umnt(struct mount *mp);
 void	vnlru_free(int, struct vfsops *);
 int	vop_stdbmap(struct vop_bmap_args *);
 int	vop_stdfdatasync_buf(struct vop_fdatasync_args *);
 int	vop_stdfsync(struct vop_fsync_args *);
 int	vop_stdgetwritemount(struct vop_getwritemount_args *);
 int	vop_stdgetpages(struct vop_getpages_args *);
 int	vop_stdinactive(struct vop_inactive_args *);
 int	vop_stdioctl(struct vop_ioctl_args *);
 int	vop_stdneed_inactive(struct vop_need_inactive_args *);
 int	vop_stdkqfilter(struct vop_kqfilter_args *);
 int	vop_stdlock(struct vop_lock1_args *);
 int	vop_stdunlock(struct vop_unlock_args *);
 int	vop_stdislocked(struct vop_islocked_args *);
 int	vop_lock(struct vop_lock1_args *);
 int	vop_unlock(struct vop_unlock_args *);
 int	vop_islocked(struct vop_islocked_args *);
 int	vop_stdputpages(struct vop_putpages_args *);
 int	vop_nopoll(struct vop_poll_args *);
 int	vop_stdaccess(struct vop_access_args *ap);
 int	vop_stdaccessx(struct vop_accessx_args *ap);
 int	vop_stdadvise(struct vop_advise_args *ap);
 int	vop_stdadvlock(struct vop_advlock_args *ap);
 int	vop_stdadvlockasync(struct vop_advlockasync_args *ap);
 int	vop_stdadvlockpurge(struct vop_advlockpurge_args *ap);
 int	vop_stdallocate(struct vop_allocate_args *ap);
 int	vop_stdset_text(struct vop_set_text_args *ap);
 int	vop_stdpathconf(struct vop_pathconf_args *);
 int	vop_stdpoll(struct vop_poll_args *);
 int	vop_stdvptocnp(struct vop_vptocnp_args *ap);
 int	vop_stdvptofh(struct vop_vptofh_args *ap);
 int	vop_stdunp_bind(struct vop_unp_bind_args *ap);
 int	vop_stdunp_connect(struct vop_unp_connect_args *ap);
 int	vop_stdunp_detach(struct vop_unp_detach_args *ap);
 int	vop_eopnotsupp(struct vop_generic_args *ap);
 int	vop_ebadf(struct vop_generic_args *ap);
 int	vop_einval(struct vop_generic_args *ap);
 int	vop_enoent(struct vop_generic_args *ap);
 int	vop_enotty(struct vop_generic_args *ap);
 int	vop_null(struct vop_generic_args *ap);
 int	vop_panic(struct vop_generic_args *ap);
 int	dead_poll(struct vop_poll_args *ap);
 int	dead_read(struct vop_read_args *ap);
 int	dead_write(struct vop_write_args *ap);
 
 /* These are called from within the actual VOPS. */
 void	vop_close_post(void *a, int rc);
 void	vop_create_post(void *a, int rc);
 void	vop_deleteextattr_post(void *a, int rc);
 void	vop_link_post(void *a, int rc);
 void	vop_lookup_post(void *a, int rc);
 void	vop_lookup_pre(void *a);
 void	vop_mkdir_post(void *a, int rc);
 void	vop_mknod_post(void *a, int rc);
 void	vop_open_post(void *a, int rc);
 void	vop_read_post(void *a, int rc);
 void	vop_readdir_post(void *a, int rc);
 void	vop_reclaim_post(void *a, int rc);
 void	vop_remove_post(void *a, int rc);
 void	vop_rename_post(void *a, int rc);
 void	vop_rename_pre(void *a);
 void	vop_rmdir_post(void *a, int rc);
 void	vop_setattr_post(void *a, int rc);
 void	vop_setextattr_post(void *a, int rc);
 void	vop_symlink_post(void *a, int rc);
 int	vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a);
 
 #ifdef DEBUG_VFS_LOCKS
 void	vop_strategy_pre(void *a);
 void	vop_lock_pre(void *a);
 void	vop_lock_post(void *a, int rc);
 void	vop_unlock_pre(void *a);
 void	vop_unlock_post(void *a, int rc);
 void	vop_need_inactive_pre(void *a);
 void	vop_need_inactive_post(void *a, int rc);
 #else
 #define	vop_strategy_pre(x)	do { } while (0)
 #define	vop_lock_pre(x)		do { } while (0)
 #define	vop_lock_post(x, y)	do { } while (0)
 #define	vop_unlock_pre(x)	do { } while (0)
 #define	vop_unlock_post(x, y)	do { } while (0)
 #define	vop_need_inactive_pre(x)	do { } while (0)
 #define	vop_need_inactive_post(x, y)	do { } while (0)
 #endif
 
 void	vop_rename_fail(struct vop_rename_args *ap);
 
 #define	VOP_WRITE_PRE(ap)						\
 	struct vattr va;						\
 	int error;							\
 	off_t osize, ooffset, noffset;					\
 									\
 	osize = ooffset = noffset = 0;					\
 	if (!VN_KNLIST_EMPTY((ap)->a_vp)) {				\
 		error = VOP_GETATTR((ap)->a_vp, &va, (ap)->a_cred);	\
 		if (error)						\
 			return (error);					\
 		ooffset = (ap)->a_uio->uio_offset;			\
 		osize = (off_t)va.va_size;				\
 	}
 
 #define VOP_WRITE_POST(ap, ret)						\
 	noffset = (ap)->a_uio->uio_offset;				\
 	if (noffset > ooffset && !VN_KNLIST_EMPTY((ap)->a_vp)) {	\
 		VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE			\
 		    | (noffset > osize ? NOTE_EXTEND : 0));		\
 	}
 
 #define VOP_LOCK(vp, flags) VOP_LOCK1(vp, flags, __FILE__, __LINE__)
 
 #ifdef INVARIANTS
 #define	VOP_ADD_WRITECOUNT_CHECKED(vp, cnt)				\
 do {									\
 	int error_;							\
 									\
 	error_ = VOP_ADD_WRITECOUNT((vp), (cnt));			\
 	VNASSERT(error_ == 0, (vp), ("VOP_ADD_WRITECOUNT returned %d",	\
 	    error_));							\
 } while (0)
 #define	VOP_SET_TEXT_CHECKED(vp)					\
 do {									\
 	int error_;							\
 									\
 	error_ = VOP_SET_TEXT((vp));					\
 	VNASSERT(error_ == 0, (vp), ("VOP_SET_TEXT returned %d",	\
 	    error_));							\
 } while (0)
 #define	VOP_UNSET_TEXT_CHECKED(vp)					\
 do {									\
 	int error_;							\
 									\
 	error_ = VOP_UNSET_TEXT((vp));					\
 	VNASSERT(error_ == 0, (vp), ("VOP_UNSET_TEXT returned %d",	\
 	    error_));							\
 } while (0)
 #else
 #define	VOP_ADD_WRITECOUNT_CHECKED(vp, cnt)	VOP_ADD_WRITECOUNT((vp), (cnt))
 #define	VOP_SET_TEXT_CHECKED(vp)		VOP_SET_TEXT((vp))
 #define	VOP_UNSET_TEXT_CHECKED(vp)		VOP_UNSET_TEXT((vp))
 #endif
 
 #define	VN_IS_DOOMED(vp)	__predict_false((vp)->v_irflag & VIRF_DOOMED)
 
 void	vput(struct vnode *vp);
 void	vrele(struct vnode *vp);
 void	vref(struct vnode *vp);
 void	vrefl(struct vnode *vp);
 void	vrefact(struct vnode *vp);
 void	vrefactn(struct vnode *vp, u_int n);
 int	vrefcnt(struct vnode *vp);
 void 	v_addpollinfo(struct vnode *vp);
 
 int vnode_create_vobject(struct vnode *vp, off_t size, struct thread *td);
 void vnode_destroy_vobject(struct vnode *vp);
 
 extern struct vop_vector fifo_specops;
 extern struct vop_vector dead_vnodeops;
 extern struct vop_vector default_vnodeops;
 
 #define VOP_PANIC	((void*)(uintptr_t)vop_panic)
 #define VOP_NULL	((void*)(uintptr_t)vop_null)
 #define VOP_EBADF	((void*)(uintptr_t)vop_ebadf)
 #define VOP_ENOTTY	((void*)(uintptr_t)vop_enotty)
 #define VOP_EINVAL	((void*)(uintptr_t)vop_einval)
 #define VOP_ENOENT	((void*)(uintptr_t)vop_enoent)
 #define VOP_EOPNOTSUPP	((void*)(uintptr_t)vop_eopnotsupp)
 
 /* fifo_vnops.c */
 int	fifo_printinfo(struct vnode *);
 
 /* vfs_hash.c */
 typedef int vfs_hash_cmp_t(struct vnode *vp, void *arg);
 
 void vfs_hash_changesize(u_long newhashsize);
 int vfs_hash_get(const struct mount *mp, u_int hash, int flags,
     struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg);
 u_int vfs_hash_index(struct vnode *vp);
 int vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td,
     struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg);
 void vfs_hash_ref(const struct mount *mp, u_int hash, struct thread *td,
     struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg);
 void vfs_hash_rehash(struct vnode *vp, u_int hash);
 void vfs_hash_remove(struct vnode *vp);
 
 int vfs_kqfilter(struct vop_kqfilter_args *);
 struct dirent;
 int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off);
 int vfs_emptydir(struct vnode *vp);
 
 int vfs_unixify_accmode(accmode_t *accmode);
 
 void vfs_unp_reclaim(struct vnode *vp);
 
 int setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode);
 int setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid,
     gid_t gid);
 int vn_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
     struct thread *td);
 int vn_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
     struct thread *td);
 
 void vn_fsid(struct vnode *vp, struct vattr *va);
 
 #define VOP_UNLOCK_FLAGS(vp, flags)	({				\
 	struct vnode *_vp = (vp);					\
 	int _flags = (flags);						\
 	int _error;							\
 									\
         if ((_flags & ~(LK_INTERLOCK | LK_RELEASE)) != 0)		\
                 panic("%s: unsupported flags %x\n", __func__, flags);	\
         _error = VOP_UNLOCK(_vp);					\
         if (_flags & LK_INTERLOCK)					\
                 VI_UNLOCK(_vp);						\
         _error;								\
 })
 
 #include <sys/kernel.h>
 
 #define VFS_VOP_VECTOR_REGISTER(vnodeops) \
 	SYSINIT(vfs_vector_##vnodeops##_f, SI_SUB_VFS, SI_ORDER_ANY, \
 	    vfs_vector_op_register, &vnodeops)
 
 #endif /* _KERNEL */
 
 #endif /* !_SYS_VNODE_H_ */
Index: projects/clang1000-import/sys/vm/vm_page.c
===================================================================
--- projects/clang1000-import/sys/vm/vm_page.c	(revision 357389)
+++ projects/clang1000-import/sys/vm/vm_page.c	(revision 357390)
@@ -1,5198 +1,5151 @@
 /*-
  * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
  *
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1998 Matthew Dillon.  All Rights Reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_page.c	7.4 (Berkeley) 5/7/91
  */
 
 /*-
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 /*
  *	Resident memory management module.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/counter.h>
 #include <sys/domainset.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sleepqueue.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 #include <vm/vm_domainset.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_pagequeue.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 
 #include <machine/md_var.h>
 
 struct vm_domain vm_dom[MAXMEMDOM];
 
 DPCPU_DEFINE_STATIC(struct vm_batchqueue, pqbatch[MAXMEMDOM][PQ_COUNT]);
 
 struct mtx_padalign __exclusive_cache_line pa_lock[PA_LOCK_COUNT];
 
 struct mtx_padalign __exclusive_cache_line vm_domainset_lock;
 /* The following fields are protected by the domainset lock. */
 domainset_t __exclusive_cache_line vm_min_domains;
 domainset_t __exclusive_cache_line vm_severe_domains;
 static int vm_min_waiters;
 static int vm_severe_waiters;
 static int vm_pageproc_waiters;
 
 static SYSCTL_NODE(_vm_stats, OID_AUTO, page, CTLFLAG_RD, 0,
     "VM page statistics");
 
 static counter_u64_t pqstate_commit_retries = EARLY_COUNTER;
 SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, pqstate_commit_retries,
     CTLFLAG_RD, &pqstate_commit_retries,
     "Number of failed per-page atomic queue state updates");
 
 static counter_u64_t queue_ops = EARLY_COUNTER;
 SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, queue_ops,
     CTLFLAG_RD, &queue_ops,
     "Number of batched queue operations");
 
 static counter_u64_t queue_nops = EARLY_COUNTER;
 SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, queue_nops,
     CTLFLAG_RD, &queue_nops,
     "Number of batched queue operations with no effects");
 
 static void
 counter_startup(void)
 {
 
 	pqstate_commit_retries = counter_u64_alloc(M_WAITOK);
 	queue_ops = counter_u64_alloc(M_WAITOK);
 	queue_nops = counter_u64_alloc(M_WAITOK);
 }
 SYSINIT(page_counters, SI_SUB_CPU, SI_ORDER_ANY, counter_startup, NULL);
 
 /*
  * bogus page -- for I/O to/from partially complete buffers,
  * or for paging into sparsely invalid regions.
  */
 vm_page_t bogus_page;
 
 vm_page_t vm_page_array;
 long vm_page_array_size;
 long first_page;
 
 static TAILQ_HEAD(, vm_page) blacklist_head;
 static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages");
 
 static uma_zone_t fakepg_zone;
 
 static void vm_page_alloc_check(vm_page_t m);
 static bool _vm_page_busy_sleep(vm_object_t obj, vm_page_t m,
     const char *wmesg, bool nonshared, bool locked);
 static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
 static void vm_page_enqueue(vm_page_t m, uint8_t queue);
 static bool vm_page_free_prep(vm_page_t m);
 static void vm_page_free_toq(vm_page_t m);
 static void vm_page_init(void *dummy);
 static int vm_page_insert_after(vm_page_t m, vm_object_t object,
     vm_pindex_t pindex, vm_page_t mpred);
 static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object,
     vm_page_t mpred);
 static void vm_page_mvqueue(vm_page_t m, const uint8_t queue,
     const uint16_t nflag);
 static int vm_page_reclaim_run(int req_class, int domain, u_long npages,
     vm_page_t m_run, vm_paddr_t high);
 static void vm_page_release_toq(vm_page_t m, uint8_t nqueue, bool noreuse);
 static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object,
     int req);
 static int vm_page_zone_import(void *arg, void **store, int cnt, int domain,
     int flags);
 static void vm_page_zone_release(void *arg, void **store, int cnt);
 
 SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL);
 
 static void
 vm_page_init(void *dummy)
 {
 
 	fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM);
 	bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
 }
 
 /*
  * The cache page zone is initialized later since we need to be able to allocate
  * pages before UMA is fully initialized.
  */
 static void
 vm_page_init_cache_zones(void *dummy __unused)
 {
 	struct vm_domain *vmd;
 	struct vm_pgcache *pgcache;
 	int cache, domain, maxcache, pool;
 
 	maxcache = 0;
 	TUNABLE_INT_FETCH("vm.pgcache_zone_max_pcpu", &maxcache);
 	maxcache *= mp_ncpus;
 	for (domain = 0; domain < vm_ndomains; domain++) {
 		vmd = VM_DOMAIN(domain);
 		for (pool = 0; pool < VM_NFREEPOOL; pool++) {
 			pgcache = &vmd->vmd_pgcache[pool];
 			pgcache->domain = domain;
 			pgcache->pool = pool;
 			pgcache->zone = uma_zcache_create("vm pgcache",
 			    PAGE_SIZE, NULL, NULL, NULL, NULL,
 			    vm_page_zone_import, vm_page_zone_release, pgcache,
 			    UMA_ZONE_VM);
 
 			/*
 			 * Limit each pool's zone to 0.1% of the pages in the
 			 * domain.
 			 */
 			cache = maxcache != 0 ? maxcache :
 			    vmd->vmd_page_count / 1000;
 			uma_zone_set_maxcache(pgcache->zone, cache);
 		}
 	}
 }
 SYSINIT(vm_page2, SI_SUB_VM_CONF, SI_ORDER_ANY, vm_page_init_cache_zones, NULL);
 
 /* Make sure that u_long is at least 64 bits when PAGE_SIZE is 32K. */
 #if PAGE_SIZE == 32768
 #ifdef CTASSERT
 CTASSERT(sizeof(u_long) >= 8);
 #endif
 #endif
 
 /*
  *	vm_set_page_size:
  *
  *	Sets the page size, perhaps based upon the memory
  *	size.  Must be called before any use of page-size
  *	dependent functions.
  */
 void
 vm_set_page_size(void)
 {
 	if (vm_cnt.v_page_size == 0)
 		vm_cnt.v_page_size = PAGE_SIZE;
 	if (((vm_cnt.v_page_size - 1) & vm_cnt.v_page_size) != 0)
 		panic("vm_set_page_size: page size not a power of two");
 }
 
 /*
  *	vm_page_blacklist_next:
  *
  *	Find the next entry in the provided string of blacklist
  *	addresses.  Entries are separated by space, comma, or newline.
  *	If an invalid integer is encountered then the rest of the
  *	string is skipped.  Updates the list pointer to the next
  *	character, or NULL if the string is exhausted or invalid.
  */
 static vm_paddr_t
 vm_page_blacklist_next(char **list, char *end)
 {
 	vm_paddr_t bad;
 	char *cp, *pos;
 
 	if (list == NULL || *list == NULL)
 		return (0);
 	if (**list =='\0') {
 		*list = NULL;
 		return (0);
 	}
 
 	/*
 	 * If there's no end pointer then the buffer is coming from
 	 * the kenv and we know it's null-terminated.
 	 */
 	if (end == NULL)
 		end = *list + strlen(*list);
 
 	/* Ensure that strtoq() won't walk off the end */
 	if (*end != '\0') {
 		if (*end == '\n' || *end == ' ' || *end  == ',')
 			*end = '\0';
 		else {
 			printf("Blacklist not terminated, skipping\n");
 			*list = NULL;
 			return (0);
 		}
 	}
 
 	for (pos = *list; *pos != '\0'; pos = cp) {
 		bad = strtoq(pos, &cp, 0);
 		if (*cp == '\0' || *cp == ' ' || *cp == ',' || *cp == '\n') {
 			if (bad == 0) {
 				if (++cp < end)
 					continue;
 				else
 					break;
 			}
 		} else
 			break;
 		if (*cp == '\0' || ++cp >= end)
 			*list = NULL;
 		else
 			*list = cp;
 		return (trunc_page(bad));
 	}
 	printf("Garbage in RAM blacklist, skipping\n");
 	*list = NULL;
 	return (0);
 }
 
 bool
 vm_page_blacklist_add(vm_paddr_t pa, bool verbose)
 {
 	struct vm_domain *vmd;
 	vm_page_t m;
 	int ret;
 
 	m = vm_phys_paddr_to_vm_page(pa);
 	if (m == NULL)
 		return (true); /* page does not exist, no failure */
 
 	vmd = vm_pagequeue_domain(m);
 	vm_domain_free_lock(vmd);
 	ret = vm_phys_unfree_page(m);
 	vm_domain_free_unlock(vmd);
 	if (ret != 0) {
 		vm_domain_freecnt_inc(vmd, -1);
 		TAILQ_INSERT_TAIL(&blacklist_head, m, listq);
 		if (verbose)
 			printf("Skipping page with pa 0x%jx\n", (uintmax_t)pa);
 	}
 	return (ret);
 }
 
 /*
  *	vm_page_blacklist_check:
  *
  *	Iterate through the provided string of blacklist addresses, pulling
  *	each entry out of the physical allocator free list and putting it
  *	onto a list for reporting via the vm.page_blacklist sysctl.
  */
 static void
 vm_page_blacklist_check(char *list, char *end)
 {
 	vm_paddr_t pa;
 	char *next;
 
 	next = list;
 	while (next != NULL) {
 		if ((pa = vm_page_blacklist_next(&next, end)) == 0)
 			continue;
 		vm_page_blacklist_add(pa, bootverbose);
 	}
 }
 
 /*
  *	vm_page_blacklist_load:
  *
  *	Search for a special module named "ram_blacklist".  It'll be a
  *	plain text file provided by the user via the loader directive
  *	of the same name.
  */
 static void
 vm_page_blacklist_load(char **list, char **end)
 {
 	void *mod;
 	u_char *ptr;
 	u_int len;
 
 	mod = NULL;
 	ptr = NULL;
 
 	mod = preload_search_by_type("ram_blacklist");
 	if (mod != NULL) {
 		ptr = preload_fetch_addr(mod);
 		len = preload_fetch_size(mod);
         }
 	*list = ptr;
 	if (ptr != NULL)
 		*end = ptr + len;
 	else
 		*end = NULL;
 	return;
 }
 
 static int
 sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS)
 {
 	vm_page_t m;
 	struct sbuf sbuf;
 	int error, first;
 
 	first = 1;
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 	TAILQ_FOREACH(m, &blacklist_head, listq) {
 		sbuf_printf(&sbuf, "%s%#jx", first ? "" : ",",
 		    (uintmax_t)m->phys_addr);
 		first = 0;
 	}
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 
 /*
  * Initialize a dummy page for use in scans of the specified paging queue.
  * In principle, this function only needs to set the flag PG_MARKER.
  * Nonetheless, it write busies the page as a safety precaution.
  */
 static void
 vm_page_init_marker(vm_page_t marker, int queue, uint16_t aflags)
 {
 
 	bzero(marker, sizeof(*marker));
 	marker->flags = PG_MARKER;
 	marker->a.flags = aflags;
 	marker->busy_lock = VPB_CURTHREAD_EXCLUSIVE;
 	marker->a.queue = queue;
 }
 
 static void
 vm_page_domain_init(int domain)
 {
 	struct vm_domain *vmd;
 	struct vm_pagequeue *pq;
 	int i;
 
 	vmd = VM_DOMAIN(domain);
 	bzero(vmd, sizeof(*vmd));
 	*__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) =
 	    "vm inactive pagequeue";
 	*__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) =
 	    "vm active pagequeue";
 	*__DECONST(char **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_name) =
 	    "vm laundry pagequeue";
 	*__DECONST(char **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_name) =
 	    "vm unswappable pagequeue";
 	vmd->vmd_domain = domain;
 	vmd->vmd_page_count = 0;
 	vmd->vmd_free_count = 0;
 	vmd->vmd_segs = 0;
 	vmd->vmd_oom = FALSE;
 	for (i = 0; i < PQ_COUNT; i++) {
 		pq = &vmd->vmd_pagequeues[i];
 		TAILQ_INIT(&pq->pq_pl);
 		mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue",
 		    MTX_DEF | MTX_DUPOK);
 		pq->pq_pdpages = 0;
 		vm_page_init_marker(&vmd->vmd_markers[i], i, 0);
 	}
 	mtx_init(&vmd->vmd_free_mtx, "vm page free queue", NULL, MTX_DEF);
 	mtx_init(&vmd->vmd_pageout_mtx, "vm pageout lock", NULL, MTX_DEF);
 	snprintf(vmd->vmd_name, sizeof(vmd->vmd_name), "%d", domain);
 
 	/*
 	 * inacthead is used to provide FIFO ordering for LRU-bypassing
 	 * insertions.
 	 */
 	vm_page_init_marker(&vmd->vmd_inacthead, PQ_INACTIVE, PGA_ENQUEUED);
 	TAILQ_INSERT_HEAD(&vmd->vmd_pagequeues[PQ_INACTIVE].pq_pl,
 	    &vmd->vmd_inacthead, plinks.q);
 
 	/*
 	 * The clock pages are used to implement active queue scanning without
 	 * requeues.  Scans start at clock[0], which is advanced after the scan
 	 * ends.  When the two clock hands meet, they are reset and scanning
 	 * resumes from the head of the queue.
 	 */
 	vm_page_init_marker(&vmd->vmd_clock[0], PQ_ACTIVE, PGA_ENQUEUED);
 	vm_page_init_marker(&vmd->vmd_clock[1], PQ_ACTIVE, PGA_ENQUEUED);
 	TAILQ_INSERT_HEAD(&vmd->vmd_pagequeues[PQ_ACTIVE].pq_pl,
 	    &vmd->vmd_clock[0], plinks.q);
 	TAILQ_INSERT_TAIL(&vmd->vmd_pagequeues[PQ_ACTIVE].pq_pl,
 	    &vmd->vmd_clock[1], plinks.q);
 }
 
 /*
  * Initialize a physical page in preparation for adding it to the free
  * lists.
  */
 static void
 vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segind)
 {
 
 	m->object = NULL;
 	m->ref_count = 0;
 	m->busy_lock = VPB_UNBUSIED;
 	m->flags = m->a.flags = 0;
 	m->phys_addr = pa;
 	m->a.queue = PQ_NONE;
 	m->psind = 0;
 	m->segind = segind;
 	m->order = VM_NFREEORDER;
 	m->pool = VM_FREEPOOL_DEFAULT;
 	m->valid = m->dirty = 0;
 	pmap_page_init(m);
 }
 
 #ifndef PMAP_HAS_PAGE_ARRAY
 static vm_paddr_t
 vm_page_array_alloc(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t page_range)
 {
 	vm_paddr_t new_end;
 
 	/*
 	 * Reserve an unmapped guard page to trap access to vm_page_array[-1].
 	 * However, because this page is allocated from KVM, out-of-bounds
 	 * accesses using the direct map will not be trapped.
 	 */
 	*vaddr += PAGE_SIZE;
 
 	/*
 	 * Allocate physical memory for the page structures, and map it.
 	 */
 	new_end = trunc_page(end - page_range * sizeof(struct vm_page));
 	vm_page_array = (vm_page_t)pmap_map(vaddr, new_end, end,
 	    VM_PROT_READ | VM_PROT_WRITE);
 	vm_page_array_size = page_range;
 
 	return (new_end);
 }
 #endif
 
 /*
  *	vm_page_startup:
  *
  *	Initializes the resident memory module.  Allocates physical memory for
  *	bootstrapping UMA and some data structures that are used to manage
  *	physical pages.  Initializes these structures, and populates the free
  *	page queues.
  */
 vm_offset_t
 vm_page_startup(vm_offset_t vaddr)
 {
 	struct vm_phys_seg *seg;
 	vm_page_t m;
 	char *list, *listend;
 	vm_paddr_t end, high_avail, low_avail, new_end, size;
 	vm_paddr_t page_range __unused;
 	vm_paddr_t last_pa, pa;
 	u_long pagecount;
 	int biggestone, i, segind;
 #ifdef WITNESS
 	vm_offset_t mapped;
 	int witness_size;
 #endif
 #if defined(__i386__) && defined(VM_PHYSSEG_DENSE)
 	long ii;
 #endif
 
 	vaddr = round_page(vaddr);
 
 	vm_phys_early_startup();
 	biggestone = vm_phys_avail_largest();
 	end = phys_avail[biggestone+1];
 
 	/*
 	 * Initialize the page and queue locks.
 	 */
 	mtx_init(&vm_domainset_lock, "vm domainset lock", NULL, MTX_DEF);
 	for (i = 0; i < PA_LOCK_COUNT; i++)
 		mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF);
 	for (i = 0; i < vm_ndomains; i++)
 		vm_page_domain_init(i);
 
 	new_end = end;
 #ifdef WITNESS
 	witness_size = round_page(witness_startup_count());
 	new_end -= witness_size;
 	mapped = pmap_map(&vaddr, new_end, new_end + witness_size,
 	    VM_PROT_READ | VM_PROT_WRITE);
 	bzero((void *)mapped, witness_size);
 	witness_startup((void *)mapped);
 #endif
 
 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \
     defined(__i386__) || defined(__mips__) || defined(__riscv) || \
     defined(__powerpc64__)
 	/*
 	 * Allocate a bitmap to indicate that a random physical page
 	 * needs to be included in a minidump.
 	 *
 	 * The amd64 port needs this to indicate which direct map pages
 	 * need to be dumped, via calls to dump_add_page()/dump_drop_page().
 	 *
 	 * However, i386 still needs this workspace internally within the
 	 * minidump code.  In theory, they are not needed on i386, but are
 	 * included should the sf_buf code decide to use them.
 	 */
 	last_pa = 0;
 	for (i = 0; dump_avail[i + 1] != 0; i += 2)
 		if (dump_avail[i + 1] > last_pa)
 			last_pa = dump_avail[i + 1];
 	page_range = last_pa / PAGE_SIZE;
 	vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY);
 	new_end -= vm_page_dump_size;
 	vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end,
 	    new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE);
 	bzero((void *)vm_page_dump, vm_page_dump_size);
 #else
 	(void)last_pa;
 #endif
 #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \
     defined(__riscv) || defined(__powerpc64__)
 	/*
 	 * Include the UMA bootstrap pages, witness pages and vm_page_dump
 	 * in a crash dump.  When pmap_map() uses the direct map, they are
 	 * not automatically included.
 	 */
 	for (pa = new_end; pa < end; pa += PAGE_SIZE)
 		dump_add_page(pa);
 #endif
 	phys_avail[biggestone + 1] = new_end;
 #ifdef __amd64__
 	/*
 	 * Request that the physical pages underlying the message buffer be
 	 * included in a crash dump.  Since the message buffer is accessed
 	 * through the direct map, they are not automatically included.
 	 */
 	pa = DMAP_TO_PHYS((vm_offset_t)msgbufp->msg_ptr);
 	last_pa = pa + round_page(msgbufsize);
 	while (pa < last_pa) {
 		dump_add_page(pa);
 		pa += PAGE_SIZE;
 	}
 #endif
 	/*
 	 * Compute the number of pages of memory that will be available for
 	 * use, taking into account the overhead of a page structure per page.
 	 * In other words, solve
 	 *	"available physical memory" - round_page(page_range *
 	 *	    sizeof(struct vm_page)) = page_range * PAGE_SIZE 
 	 * for page_range.  
 	 */
 	low_avail = phys_avail[0];
 	high_avail = phys_avail[1];
 	for (i = 0; i < vm_phys_nsegs; i++) {
 		if (vm_phys_segs[i].start < low_avail)
 			low_avail = vm_phys_segs[i].start;
 		if (vm_phys_segs[i].end > high_avail)
 			high_avail = vm_phys_segs[i].end;
 	}
 	/* Skip the first chunk.  It is already accounted for. */
 	for (i = 2; phys_avail[i + 1] != 0; i += 2) {
 		if (phys_avail[i] < low_avail)
 			low_avail = phys_avail[i];
 		if (phys_avail[i + 1] > high_avail)
 			high_avail = phys_avail[i + 1];
 	}
 	first_page = low_avail / PAGE_SIZE;
 #ifdef VM_PHYSSEG_SPARSE
 	size = 0;
 	for (i = 0; i < vm_phys_nsegs; i++)
 		size += vm_phys_segs[i].end - vm_phys_segs[i].start;
 	for (i = 0; phys_avail[i + 1] != 0; i += 2)
 		size += phys_avail[i + 1] - phys_avail[i];
 #elif defined(VM_PHYSSEG_DENSE)
 	size = high_avail - low_avail;
 #else
 #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined."
 #endif
 
 #ifdef PMAP_HAS_PAGE_ARRAY
 	pmap_page_array_startup(size / PAGE_SIZE);
 	biggestone = vm_phys_avail_largest();
 	end = new_end = phys_avail[biggestone + 1];
 #else
 #ifdef VM_PHYSSEG_DENSE
 	/*
 	 * In the VM_PHYSSEG_DENSE case, the number of pages can account for
 	 * the overhead of a page structure per page only if vm_page_array is
 	 * allocated from the last physical memory chunk.  Otherwise, we must
 	 * allocate page structures representing the physical memory
 	 * underlying vm_page_array, even though they will not be used.
 	 */
 	if (new_end != high_avail)
 		page_range = size / PAGE_SIZE;
 	else
 #endif
 	{
 		page_range = size / (PAGE_SIZE + sizeof(struct vm_page));
 
 		/*
 		 * If the partial bytes remaining are large enough for
 		 * a page (PAGE_SIZE) without a corresponding
 		 * 'struct vm_page', then new_end will contain an
 		 * extra page after subtracting the length of the VM
 		 * page array.  Compensate by subtracting an extra
 		 * page from new_end.
 		 */
 		if (size % (PAGE_SIZE + sizeof(struct vm_page)) >= PAGE_SIZE) {
 			if (new_end == high_avail)
 				high_avail -= PAGE_SIZE;
 			new_end -= PAGE_SIZE;
 		}
 	}
 	end = new_end;
 	new_end = vm_page_array_alloc(&vaddr, end, page_range);
 #endif
 
 #if VM_NRESERVLEVEL > 0
 	/*
 	 * Allocate physical memory for the reservation management system's
 	 * data structures, and map it.
 	 */
 	new_end = vm_reserv_startup(&vaddr, new_end);
 #endif
 #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \
     defined(__riscv) || defined(__powerpc64__)
 	/*
 	 * Include vm_page_array and vm_reserv_array in a crash dump.
 	 */
 	for (pa = new_end; pa < end; pa += PAGE_SIZE)
 		dump_add_page(pa);
 #endif
 	phys_avail[biggestone + 1] = new_end;
 
 	/*
 	 * Add physical memory segments corresponding to the available
 	 * physical pages.
 	 */
 	for (i = 0; phys_avail[i + 1] != 0; i += 2)
 		if (vm_phys_avail_size(i) != 0)
 			vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]);
 
 	/*
 	 * Initialize the physical memory allocator.
 	 */
 	vm_phys_init();
 
 	/*
 	 * Initialize the page structures and add every available page to the
 	 * physical memory allocator's free lists.
 	 */
 #if defined(__i386__) && defined(VM_PHYSSEG_DENSE)
 	for (ii = 0; ii < vm_page_array_size; ii++) {
 		m = &vm_page_array[ii];
 		vm_page_init_page(m, (first_page + ii) << PAGE_SHIFT, 0);
 		m->flags = PG_FICTITIOUS;
 	}
 #endif
 	vm_cnt.v_page_count = 0;
 	for (segind = 0; segind < vm_phys_nsegs; segind++) {
 		seg = &vm_phys_segs[segind];
 		for (m = seg->first_page, pa = seg->start; pa < seg->end;
 		    m++, pa += PAGE_SIZE)
 			vm_page_init_page(m, pa, segind);
 
 		/*
 		 * Add the segment to the free lists only if it is covered by
 		 * one of the ranges in phys_avail.  Because we've added the
 		 * ranges to the vm_phys_segs array, we can assume that each
 		 * segment is either entirely contained in one of the ranges,
 		 * or doesn't overlap any of them.
 		 */
 		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
 			struct vm_domain *vmd;
 
 			if (seg->start < phys_avail[i] ||
 			    seg->end > phys_avail[i + 1])
 				continue;
 
 			m = seg->first_page;
 			pagecount = (u_long)atop(seg->end - seg->start);
 
 			vmd = VM_DOMAIN(seg->domain);
 			vm_domain_free_lock(vmd);
 			vm_phys_enqueue_contig(m, pagecount);
 			vm_domain_free_unlock(vmd);
 			vm_domain_freecnt_inc(vmd, pagecount);
 			vm_cnt.v_page_count += (u_int)pagecount;
 
 			vmd = VM_DOMAIN(seg->domain);
 			vmd->vmd_page_count += (u_int)pagecount;
 			vmd->vmd_segs |= 1UL << m->segind;
 			break;
 		}
 	}
 
 	/*
 	 * Remove blacklisted pages from the physical memory allocator.
 	 */
 	TAILQ_INIT(&blacklist_head);
 	vm_page_blacklist_load(&list, &listend);
 	vm_page_blacklist_check(list, listend);
 
 	list = kern_getenv("vm.blacklist");
 	vm_page_blacklist_check(list, NULL);
 
 	freeenv(list);
 #if VM_NRESERVLEVEL > 0
 	/*
 	 * Initialize the reservation management system.
 	 */
 	vm_reserv_init();
 #endif
 
 	return (vaddr);
 }
 
 void
 vm_page_reference(vm_page_t m)
 {
 
 	vm_page_aflag_set(m, PGA_REFERENCED);
 }
 
 static bool
 vm_page_acquire_flags(vm_page_t m, int allocflags)
 {
 	bool locked;
 
 	if ((allocflags & (VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY)) != 0)
 		locked = vm_page_trysbusy(m);
 	else
 		locked = vm_page_tryxbusy(m);
 	if (locked && (allocflags & VM_ALLOC_WIRED) != 0)
 		vm_page_wire(m);
 	return (locked);
 }
 
 /*
  *	vm_page_busy_sleep_flags
  *
  *	Sleep for busy according to VM_ALLOC_ parameters.
  */
 static bool
 vm_page_busy_sleep_flags(vm_object_t object, vm_page_t m, const char *wmesg,
     int allocflags)
 {
 
 	if ((allocflags & VM_ALLOC_NOWAIT) != 0)
 		return (false);
 	/*
 	 * Reference the page before unlocking and
 	 * sleeping so that the page daemon is less
 	 * likely to reclaim it.
 	 */
 	if ((allocflags & VM_ALLOC_NOCREAT) == 0)
 		vm_page_aflag_set(m, PGA_REFERENCED);
 	if (_vm_page_busy_sleep(object, m, wmesg, (allocflags &
 	    VM_ALLOC_IGN_SBUSY) != 0, true))
 		VM_OBJECT_WLOCK(object);
 	if ((allocflags & VM_ALLOC_WAITFAIL) != 0)
 		return (false);
 	return (true);
 }
 
 /*
  *	vm_page_busy_acquire:
  *
  *	Acquire the busy lock as described by VM_ALLOC_* flags.  Will loop
  *	and drop the object lock if necessary.
  */
 bool
 vm_page_busy_acquire(vm_page_t m, int allocflags)
 {
 	vm_object_t obj;
 	bool locked;
 
 	/*
 	 * The page-specific object must be cached because page
 	 * identity can change during the sleep, causing the
 	 * re-lock of a different object.
 	 * It is assumed that a reference to the object is already
 	 * held by the callers.
 	 */
 	obj = m->object;
 	for (;;) {
 		if (vm_page_acquire_flags(m, allocflags))
 			return (true);
 		if ((allocflags & VM_ALLOC_NOWAIT) != 0)
 			return (false);
 		if (obj != NULL)
 			locked = VM_OBJECT_WOWNED(obj);
 		else
 			locked = false;
 		MPASS(locked || vm_page_wired(m));
 		if (_vm_page_busy_sleep(obj, m, "vmpba",
 		    (allocflags & VM_ALLOC_SBUSY) != 0, locked))
 			VM_OBJECT_WLOCK(obj);
 		if ((allocflags & VM_ALLOC_WAITFAIL) != 0)
 			return (false);
 		KASSERT(m->object == obj || m->object == NULL,
 		    ("vm_page_busy_acquire: page %p does not belong to %p",
 		    m, obj));
 	}
 }
 
 /*
  *	vm_page_busy_downgrade:
  *
  *	Downgrade an exclusive busy page into a single shared busy page.
  */
 void
 vm_page_busy_downgrade(vm_page_t m)
 {
 	u_int x;
 
 	vm_page_assert_xbusied(m);
 
 	x = m->busy_lock;
 	for (;;) {
 		if (atomic_fcmpset_rel_int(&m->busy_lock,
 		    &x, VPB_SHARERS_WORD(1)))
 			break;
 	}
 	if ((x & VPB_BIT_WAITERS) != 0)
 		wakeup(m);
 }
 
 /*
  *
  *	vm_page_busy_tryupgrade:
  *
  *	Attempt to upgrade a single shared busy into an exclusive busy.
  */
 int
 vm_page_busy_tryupgrade(vm_page_t m)
 {
 	u_int ce, x;
 
 	vm_page_assert_sbusied(m);
 
 	x = m->busy_lock;
 	ce = VPB_CURTHREAD_EXCLUSIVE;
 	for (;;) {
 		if (VPB_SHARERS(x) > 1)
 			return (0);
 		KASSERT((x & ~VPB_BIT_WAITERS) == VPB_SHARERS_WORD(1),
 		    ("vm_page_busy_tryupgrade: invalid lock state"));
 		if (!atomic_fcmpset_acq_int(&m->busy_lock, &x,
 		    ce | (x & VPB_BIT_WAITERS)))
 			continue;
 		return (1);
 	}
 }
 
 /*
  *	vm_page_sbusied:
  *
  *	Return a positive value if the page is shared busied, 0 otherwise.
  */
 int
 vm_page_sbusied(vm_page_t m)
 {
 	u_int x;
 
 	x = m->busy_lock;
 	return ((x & VPB_BIT_SHARED) != 0 && x != VPB_UNBUSIED);
 }
 
 /*
  *	vm_page_sunbusy:
  *
  *	Shared unbusy a page.
  */
 void
 vm_page_sunbusy(vm_page_t m)
 {
 	u_int x;
 
 	vm_page_assert_sbusied(m);
 
 	x = m->busy_lock;
 	for (;;) {
 		if (VPB_SHARERS(x) > 1) {
 			if (atomic_fcmpset_int(&m->busy_lock, &x,
 			    x - VPB_ONE_SHARER))
 				break;
 			continue;
 		}
 		KASSERT((x & ~VPB_BIT_WAITERS) == VPB_SHARERS_WORD(1),
 		    ("vm_page_sunbusy: invalid lock state"));
 		if (!atomic_fcmpset_rel_int(&m->busy_lock, &x, VPB_UNBUSIED))
 			continue;
 		if ((x & VPB_BIT_WAITERS) == 0)
 			break;
 		wakeup(m);
 		break;
 	}
 }
 
 /*
  *	vm_page_busy_sleep:
  *
  *	Sleep if the page is busy, using the page pointer as wchan.
  *	This is used to implement the hard-path of busying mechanism.
  *
  *	If nonshared is true, sleep only if the page is xbusy.
  *
  *	The object lock must be held on entry and will be released on exit.
  */
 void
 vm_page_busy_sleep(vm_page_t m, const char *wmesg, bool nonshared)
 {
 	vm_object_t obj;
 
 	obj = m->object;
 	VM_OBJECT_ASSERT_LOCKED(obj);
 	vm_page_lock_assert(m, MA_NOTOWNED);
 
 	if (!_vm_page_busy_sleep(obj, m, wmesg, nonshared, true))
 		VM_OBJECT_DROP(obj);
 }
 
 /*
  *	_vm_page_busy_sleep:
  *
  *	Internal busy sleep function.
  */
 static bool
 _vm_page_busy_sleep(vm_object_t obj, vm_page_t m, const char *wmesg,
     bool nonshared, bool locked)
 {
 	u_int x;
 
 	/*
 	 * If the object is busy we must wait for that to drain to zero
 	 * before trying the page again.
 	 */
 	if (obj != NULL && vm_object_busied(obj)) {
 		if (locked)
 			VM_OBJECT_DROP(obj);
 		vm_object_busy_wait(obj, wmesg);
 		return (locked);
 	}
 	sleepq_lock(m);
 	x = m->busy_lock;
 	if (x == VPB_UNBUSIED || (nonshared && (x & VPB_BIT_SHARED) != 0) ||
 	    ((x & VPB_BIT_WAITERS) == 0 &&
 	    !atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS))) {
 		sleepq_release(m);
 		return (false);
 	}
 	if (locked)
 		VM_OBJECT_DROP(obj);
 	DROP_GIANT();
 	sleepq_add(m, NULL, wmesg, 0, 0);
 	sleepq_wait(m, PVM);
 	PICKUP_GIANT();
 	return (locked);
 }
 
 /*
  *	vm_page_trysbusy:
  *
  *	Try to shared busy a page.
  *	If the operation succeeds 1 is returned otherwise 0.
  *	The operation never sleeps.
  */
 int
 vm_page_trysbusy(vm_page_t m)
 {
 	vm_object_t obj;
 	u_int x;
 
 	obj = m->object;
 	x = m->busy_lock;
 	for (;;) {
 		if ((x & VPB_BIT_SHARED) == 0)
 			return (0);
 		/*
 		 * Reduce the window for transient busies that will trigger
 		 * false negatives in vm_page_ps_test().
 		 */
 		if (obj != NULL && vm_object_busied(obj))
 			return (0);
 		if (atomic_fcmpset_acq_int(&m->busy_lock, &x,
 		    x + VPB_ONE_SHARER))
 			break;
 	}
 
 	/* Refetch the object now that we're guaranteed that it is stable. */
 	obj = m->object;
 	if (obj != NULL && vm_object_busied(obj)) {
 		vm_page_sunbusy(m);
 		return (0);
 	}
 	return (1);
 }
 
 /*
  *	vm_page_tryxbusy:
  *
  *	Try to exclusive busy a page.
  *	If the operation succeeds 1 is returned otherwise 0.
  *	The operation never sleeps.
  */
 int
 vm_page_tryxbusy(vm_page_t m)
 {
 	vm_object_t obj;
 
         if (atomic_cmpset_acq_int(&(m)->busy_lock, VPB_UNBUSIED,
             VPB_CURTHREAD_EXCLUSIVE) == 0)
 		return (0);
 
 	obj = m->object;
 	if (obj != NULL && vm_object_busied(obj)) {
 		vm_page_xunbusy(m);
 		return (0);
 	}
 	return (1);
 }
 
 static void
 vm_page_xunbusy_hard_tail(vm_page_t m)
 {
 	atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
 	/* Wake the waiter. */
 	wakeup(m);
 }
 
 /*
  *	vm_page_xunbusy_hard:
  *
  *	Called when unbusy has failed because there is a waiter.
  */
 void
 vm_page_xunbusy_hard(vm_page_t m)
 {
 	vm_page_assert_xbusied(m);
 	vm_page_xunbusy_hard_tail(m);
 }
 
 void
 vm_page_xunbusy_hard_unchecked(vm_page_t m)
 {
 	vm_page_assert_xbusied_unchecked(m);
 	vm_page_xunbusy_hard_tail(m);
 }
 
 /*
- * Avoid releasing and reacquiring the same page lock.
- */
-void
-vm_page_change_lock(vm_page_t m, struct mtx **mtx)
-{
-	struct mtx *mtx1;
-
-	mtx1 = vm_page_lockptr(m);
-	if (*mtx == mtx1)
-		return;
-	if (*mtx != NULL)
-		mtx_unlock(*mtx);
-	*mtx = mtx1;
-	mtx_lock(mtx1);
-}
-
-/*
  *	vm_page_unhold_pages:
  *
  *	Unhold each of the pages that is referenced by the given array.
  */
 void
 vm_page_unhold_pages(vm_page_t *ma, int count)
 {
 
 	for (; count != 0; count--) {
 		vm_page_unwire(*ma, PQ_ACTIVE);
 		ma++;
 	}
 }
 
 vm_page_t
 PHYS_TO_VM_PAGE(vm_paddr_t pa)
 {
 	vm_page_t m;
 
 #ifdef VM_PHYSSEG_SPARSE
 	m = vm_phys_paddr_to_vm_page(pa);
 	if (m == NULL)
 		m = vm_phys_fictitious_to_vm_page(pa);
 	return (m);
 #elif defined(VM_PHYSSEG_DENSE)
 	long pi;
 
 	pi = atop(pa);
 	if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
 		m = &vm_page_array[pi - first_page];
 		return (m);
 	}
 	return (vm_phys_fictitious_to_vm_page(pa));
 #else
 #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined."
 #endif
 }
 
 /*
  *	vm_page_getfake:
  *
  *	Create a fictitious page with the specified physical address and
  *	memory attribute.  The memory attribute is the only the machine-
  *	dependent aspect of a fictitious page that must be initialized.
  */
 vm_page_t
 vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr)
 {
 	vm_page_t m;
 
 	m = uma_zalloc(fakepg_zone, M_WAITOK | M_ZERO);
 	vm_page_initfake(m, paddr, memattr);
 	return (m);
 }
 
 void
 vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr)
 {
 
 	if ((m->flags & PG_FICTITIOUS) != 0) {
 		/*
 		 * The page's memattr might have changed since the
 		 * previous initialization.  Update the pmap to the
 		 * new memattr.
 		 */
 		goto memattr;
 	}
 	m->phys_addr = paddr;
 	m->a.queue = PQ_NONE;
 	/* Fictitious pages don't use "segind". */
 	m->flags = PG_FICTITIOUS;
 	/* Fictitious pages don't use "order" or "pool". */
 	m->oflags = VPO_UNMANAGED;
 	m->busy_lock = VPB_CURTHREAD_EXCLUSIVE;
 	/* Fictitious pages are unevictable. */
 	m->ref_count = 1;
 	pmap_page_init(m);
 memattr:
 	pmap_page_set_memattr(m, memattr);
 }
 
 /*
  *	vm_page_putfake:
  *
  *	Release a fictitious page.
  */
 void
 vm_page_putfake(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("managed %p", m));
 	KASSERT((m->flags & PG_FICTITIOUS) != 0,
 	    ("vm_page_putfake: bad page %p", m));
 	vm_page_xunbusy(m);
 	uma_zfree(fakepg_zone, m);
 }
 
 /*
  *	vm_page_updatefake:
  *
  *	Update the given fictitious page to the specified physical address and
  *	memory attribute.
  */
 void
 vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr)
 {
 
 	KASSERT((m->flags & PG_FICTITIOUS) != 0,
 	    ("vm_page_updatefake: bad page %p", m));
 	m->phys_addr = paddr;
 	pmap_page_set_memattr(m, memattr);
 }
 
 /*
  *	vm_page_free:
  *
  *	Free a page.
  */
 void
 vm_page_free(vm_page_t m)
 {
 
 	m->flags &= ~PG_ZERO;
 	vm_page_free_toq(m);
 }
 
 /*
  *	vm_page_free_zero:
  *
  *	Free a page to the zerod-pages queue
  */
 void
 vm_page_free_zero(vm_page_t m)
 {
 
 	m->flags |= PG_ZERO;
 	vm_page_free_toq(m);
 }
 
 /*
  * Unbusy and handle the page queueing for a page from a getpages request that
  * was optionally read ahead or behind.
  */
 void
 vm_page_readahead_finish(vm_page_t m)
 {
 
 	/* We shouldn't put invalid pages on queues. */
 	KASSERT(!vm_page_none_valid(m), ("%s: %p is invalid", __func__, m));
 
 	/*
 	 * Since the page is not the actually needed one, whether it should
 	 * be activated or deactivated is not obvious.  Empirical results
 	 * have shown that deactivating the page is usually the best choice,
 	 * unless the page is wanted by another thread.
 	 */
 	if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
 		vm_page_activate(m);
 	else
 		vm_page_deactivate(m);
 	vm_page_xunbusy_unchecked(m);
 }
 
 /*
  *	vm_page_sleep_if_busy:
  *
  *	Sleep and release the object lock if the page is busied.
  *	Returns TRUE if the thread slept.
  *
  *	The given page must be unlocked and object containing it must
  *	be locked.
  */
 int
 vm_page_sleep_if_busy(vm_page_t m, const char *msg)
 {
 	vm_object_t obj;
 
 	vm_page_lock_assert(m, MA_NOTOWNED);
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 
 	/*
 	 * The page-specific object must be cached because page
 	 * identity can change during the sleep, causing the
 	 * re-lock of a different object.
 	 * It is assumed that a reference to the object is already
 	 * held by the callers.
 	 */
 	obj = m->object;
 	if (vm_page_busied(m) || (obj != NULL && obj->busy)) {
 		vm_page_busy_sleep(m, msg, false);
 		VM_OBJECT_WLOCK(obj);
 		return (TRUE);
 	}
 	return (FALSE);
 }
 
 /*
  *	vm_page_sleep_if_xbusy:
  *
  *	Sleep and release the object lock if the page is xbusied.
  *	Returns TRUE if the thread slept.
  *
  *	The given page must be unlocked and object containing it must
  *	be locked.
  */
 int
 vm_page_sleep_if_xbusy(vm_page_t m, const char *msg)
 {
 	vm_object_t obj;
 
 	vm_page_lock_assert(m, MA_NOTOWNED);
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 
 	/*
 	 * The page-specific object must be cached because page
 	 * identity can change during the sleep, causing the
 	 * re-lock of a different object.
 	 * It is assumed that a reference to the object is already
 	 * held by the callers.
 	 */
 	obj = m->object;
 	if (vm_page_xbusied(m) || (obj != NULL && obj->busy)) {
 		vm_page_busy_sleep(m, msg, true);
 		VM_OBJECT_WLOCK(obj);
 		return (TRUE);
 	}
 	return (FALSE);
 }
 
 /*
  *	vm_page_dirty_KBI:		[ internal use only ]
  *
  *	Set all bits in the page's dirty field.
  *
  *	The object containing the specified page must be locked if the
  *	call is made from the machine-independent layer.
  *
  *	See vm_page_clear_dirty_mask().
  *
  *	This function should only be called by vm_page_dirty().
  */
 void
 vm_page_dirty_KBI(vm_page_t m)
 {
 
 	/* Refer to this operation by its public name. */
 	KASSERT(vm_page_all_valid(m), ("vm_page_dirty: page is invalid!"));
 	m->dirty = VM_PAGE_BITS_ALL;
 }
 
 /*
  *	vm_page_insert:		[ internal use only ]
  *
  *	Inserts the given mem entry into the object and object list.
  *
  *	The object must be locked.
  */
 int
 vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
 {
 	vm_page_t mpred;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	mpred = vm_radix_lookup_le(&object->rtree, pindex);
 	return (vm_page_insert_after(m, object, pindex, mpred));
 }
 
 /*
  *	vm_page_insert_after:
  *
  *	Inserts the page "m" into the specified object at offset "pindex".
  *
  *	The page "mpred" must immediately precede the offset "pindex" within
  *	the specified object.
  *
  *	The object must be locked.
  */
 static int
 vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
     vm_page_t mpred)
 {
 	vm_page_t msucc;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(m->object == NULL,
 	    ("vm_page_insert_after: page already inserted"));
 	if (mpred != NULL) {
 		KASSERT(mpred->object == object,
 		    ("vm_page_insert_after: object doesn't contain mpred"));
 		KASSERT(mpred->pindex < pindex,
 		    ("vm_page_insert_after: mpred doesn't precede pindex"));
 		msucc = TAILQ_NEXT(mpred, listq);
 	} else
 		msucc = TAILQ_FIRST(&object->memq);
 	if (msucc != NULL)
 		KASSERT(msucc->pindex > pindex,
 		    ("vm_page_insert_after: msucc doesn't succeed pindex"));
 
 	/*
 	 * Record the object/offset pair in this page.
 	 */
 	m->object = object;
 	m->pindex = pindex;
 	m->ref_count |= VPRC_OBJREF;
 
 	/*
 	 * Now link into the object's ordered list of backed pages.
 	 */
 	if (vm_radix_insert(&object->rtree, m)) {
 		m->object = NULL;
 		m->pindex = 0;
 		m->ref_count &= ~VPRC_OBJREF;
 		return (1);
 	}
 	vm_page_insert_radixdone(m, object, mpred);
 	return (0);
 }
 
 /*
  *	vm_page_insert_radixdone:
  *
  *	Complete page "m" insertion into the specified object after the
  *	radix trie hooking.
  *
  *	The page "mpred" must precede the offset "m->pindex" within the
  *	specified object.
  *
  *	The object must be locked.
  */
 static void
 vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(object != NULL && m->object == object,
 	    ("vm_page_insert_radixdone: page %p has inconsistent object", m));
 	KASSERT((m->ref_count & VPRC_OBJREF) != 0,
 	    ("vm_page_insert_radixdone: page %p is missing object ref", m));
 	if (mpred != NULL) {
 		KASSERT(mpred->object == object,
 		    ("vm_page_insert_radixdone: object doesn't contain mpred"));
 		KASSERT(mpred->pindex < m->pindex,
 		    ("vm_page_insert_radixdone: mpred doesn't precede pindex"));
 	}
 
 	if (mpred != NULL)
 		TAILQ_INSERT_AFTER(&object->memq, mpred, m, listq);
 	else
 		TAILQ_INSERT_HEAD(&object->memq, m, listq);
 
 	/*
 	 * Show that the object has one more resident page.
 	 */
 	object->resident_page_count++;
 
 	/*
 	 * Hold the vnode until the last page is released.
 	 */
 	if (object->resident_page_count == 1 && object->type == OBJT_VNODE)
 		vhold(object->handle);
 
 	/*
 	 * Since we are inserting a new and possibly dirty page,
 	 * update the object's generation count.
 	 */
 	if (pmap_page_is_write_mapped(m))
 		vm_object_set_writeable_dirty(object);
 }
 
 /*
  * Do the work to remove a page from its object.  The caller is responsible for
  * updating the page's fields to reflect this removal.
  */
 static void
 vm_page_object_remove(vm_page_t m)
 {
 	vm_object_t object;
 	vm_page_t mrem;
 
 	vm_page_assert_xbusied(m);
 	object = m->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT((m->ref_count & VPRC_OBJREF) != 0,
 	    ("page %p is missing its object ref", m));
 
 	/* Deferred free of swap space. */
 	if ((m->a.flags & PGA_SWAP_FREE) != 0)
 		vm_pager_page_unswapped(m);
 
 	mrem = vm_radix_remove(&object->rtree, m->pindex);
 	KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m));
 
 	/*
 	 * Now remove from the object's list of backed pages.
 	 */
 	TAILQ_REMOVE(&object->memq, m, listq);
 
 	/*
 	 * And show that the object has one fewer resident page.
 	 */
 	object->resident_page_count--;
 
 	/*
 	 * The vnode may now be recycled.
 	 */
 	if (object->resident_page_count == 0 && object->type == OBJT_VNODE)
 		vdrop(object->handle);
 }
 
 /*
  *	vm_page_remove:
  *
  *	Removes the specified page from its containing object, but does not
  *	invalidate any backing storage.  Returns true if the object's reference
  *	was the last reference to the page, and false otherwise.
  *
  *	The object must be locked and the page must be exclusively busied.
  *	The exclusive busy will be released on return.  If this is not the
  *	final ref and the caller does not hold a wire reference it may not
  *	continue to access the page.
  */
 bool
 vm_page_remove(vm_page_t m)
 {
 	bool dropped;
 
 	dropped = vm_page_remove_xbusy(m);
 	vm_page_xunbusy(m);
 
 	return (dropped);
 }
 
 /*
  *	vm_page_remove_xbusy
  *
  *	Removes the page but leaves the xbusy held.  Returns true if this
  *	removed the final ref and false otherwise.
  */
 bool
 vm_page_remove_xbusy(vm_page_t m)
 {
 
 	vm_page_object_remove(m);
 	m->object = NULL;
 	return (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF);
 }
 
 /*
  *	vm_page_lookup:
  *
  *	Returns the page associated with the object/offset
  *	pair specified; if none is found, NULL is returned.
  *
  *	The object must be locked.
  */
 vm_page_t
 vm_page_lookup(vm_object_t object, vm_pindex_t pindex)
 {
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	return (vm_radix_lookup(&object->rtree, pindex));
 }
 
 /*
  *	vm_page_find_least:
  *
  *	Returns the page associated with the object with least pindex
  *	greater than or equal to the parameter pindex, or NULL.
  *
  *	The object must be locked.
  */
 vm_page_t
 vm_page_find_least(vm_object_t object, vm_pindex_t pindex)
 {
 	vm_page_t m;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	if ((m = TAILQ_FIRST(&object->memq)) != NULL && m->pindex < pindex)
 		m = vm_radix_lookup_ge(&object->rtree, pindex);
 	return (m);
 }
 
 /*
  * Returns the given page's successor (by pindex) within the object if it is
  * resident; if none is found, NULL is returned.
  *
  * The object must be locked.
  */
 vm_page_t
 vm_page_next(vm_page_t m)
 {
 	vm_page_t next;
 
 	VM_OBJECT_ASSERT_LOCKED(m->object);
 	if ((next = TAILQ_NEXT(m, listq)) != NULL) {
 		MPASS(next->object == m->object);
 		if (next->pindex != m->pindex + 1)
 			next = NULL;
 	}
 	return (next);
 }
 
 /*
  * Returns the given page's predecessor (by pindex) within the object if it is
  * resident; if none is found, NULL is returned.
  *
  * The object must be locked.
  */
 vm_page_t
 vm_page_prev(vm_page_t m)
 {
 	vm_page_t prev;
 
 	VM_OBJECT_ASSERT_LOCKED(m->object);
 	if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL) {
 		MPASS(prev->object == m->object);
 		if (prev->pindex != m->pindex - 1)
 			prev = NULL;
 	}
 	return (prev);
 }
 
 /*
  * Uses the page mnew as a replacement for an existing page at index
  * pindex which must be already present in the object.
  *
  * Both pages must be exclusively busied on enter.  The old page is
  * unbusied on exit.
  *
  * A return value of true means mold is now free.  If this is not the
  * final ref and the caller does not hold a wire reference it may not
  * continue to access the page.
  */
 static bool
 vm_page_replace_hold(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex,
     vm_page_t mold)
 {
 	vm_page_t mret;
 	bool dropped;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	vm_page_assert_xbusied(mold);
 	KASSERT(mnew->object == NULL && (mnew->ref_count & VPRC_OBJREF) == 0,
 	    ("vm_page_replace: page %p already in object", mnew));
 
 	/*
 	 * This function mostly follows vm_page_insert() and
 	 * vm_page_remove() without the radix, object count and vnode
 	 * dance.  Double check such functions for more comments.
 	 */
 
 	mnew->object = object;
 	mnew->pindex = pindex;
 	atomic_set_int(&mnew->ref_count, VPRC_OBJREF);
 	mret = vm_radix_replace(&object->rtree, mnew);
 	KASSERT(mret == mold,
 	    ("invalid page replacement, mold=%p, mret=%p", mold, mret));
 	KASSERT((mold->oflags & VPO_UNMANAGED) ==
 	    (mnew->oflags & VPO_UNMANAGED),
 	    ("vm_page_replace: mismatched VPO_UNMANAGED"));
 
 	/* Keep the resident page list in sorted order. */
 	TAILQ_INSERT_AFTER(&object->memq, mold, mnew, listq);
 	TAILQ_REMOVE(&object->memq, mold, listq);
 	mold->object = NULL;
 
 	/*
 	 * The object's resident_page_count does not change because we have
 	 * swapped one page for another, but the generation count should
 	 * change if the page is dirty.
 	 */
 	if (pmap_page_is_write_mapped(mnew))
 		vm_object_set_writeable_dirty(object);
 	dropped = vm_page_drop(mold, VPRC_OBJREF) == VPRC_OBJREF;
 	vm_page_xunbusy(mold);
 
 	return (dropped);
 }
 
 void
 vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex,
     vm_page_t mold)
 {
 
 	vm_page_assert_xbusied(mnew);
 
 	if (vm_page_replace_hold(mnew, object, pindex, mold))
 		vm_page_free(mold);
 }
 
 /*
  *	vm_page_rename:
  *
  *	Move the given memory entry from its
  *	current object to the specified target object/offset.
  *
  *	Note: swap associated with the page must be invalidated by the move.  We
  *	      have to do this for several reasons:  (1) we aren't freeing the
  *	      page, (2) we are dirtying the page, (3) the VM system is probably
  *	      moving the page from object A to B, and will then later move
  *	      the backing store from A to B and we can't have a conflict.
  *
  *	Note: we *always* dirty the page.  It is necessary both for the
  *	      fact that we moved it, and because we may be invalidating
  *	      swap.
  *
  *	The objects must be locked.
  */
 int
 vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
 {
 	vm_page_t mpred;
 	vm_pindex_t opidx;
 
 	VM_OBJECT_ASSERT_WLOCKED(new_object);
 
 	KASSERT(m->ref_count != 0, ("vm_page_rename: page %p has no refs", m));
 	mpred = vm_radix_lookup_le(&new_object->rtree, new_pindex);
 	KASSERT(mpred == NULL || mpred->pindex != new_pindex,
 	    ("vm_page_rename: pindex already renamed"));
 
 	/*
 	 * Create a custom version of vm_page_insert() which does not depend
 	 * by m_prev and can cheat on the implementation aspects of the
 	 * function.
 	 */
 	opidx = m->pindex;
 	m->pindex = new_pindex;
 	if (vm_radix_insert(&new_object->rtree, m)) {
 		m->pindex = opidx;
 		return (1);
 	}
 
 	/*
 	 * The operation cannot fail anymore.  The removal must happen before
 	 * the listq iterator is tainted.
 	 */
 	m->pindex = opidx;
 	vm_page_object_remove(m);
 
 	/* Return back to the new pindex to complete vm_page_insert(). */
 	m->pindex = new_pindex;
 	m->object = new_object;
 
 	vm_page_insert_radixdone(m, new_object, mpred);
 	vm_page_dirty(m);
 	return (0);
 }
 
 /*
  *	vm_page_alloc:
  *
  *	Allocate and return a page that is associated with the specified
  *	object and offset pair.  By default, this page is exclusive busied.
  *
  *	The caller must always specify an allocation class.
  *
  *	allocation classes:
  *	VM_ALLOC_NORMAL		normal process request
  *	VM_ALLOC_SYSTEM		system *really* needs a page
  *	VM_ALLOC_INTERRUPT	interrupt time request
  *
  *	optional allocation flags:
  *	VM_ALLOC_COUNT(number)	the number of additional pages that the caller
  *				intends to allocate
  *	VM_ALLOC_NOBUSY		do not exclusive busy the page
  *	VM_ALLOC_NODUMP		do not include the page in a kernel core dump
  *	VM_ALLOC_NOOBJ		page is not associated with an object and
  *				should not be exclusive busy
  *	VM_ALLOC_SBUSY		shared busy the allocated page
  *	VM_ALLOC_WIRED		wire the allocated page
  *	VM_ALLOC_ZERO		prefer a zeroed page
  */
 vm_page_t
 vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
 {
 
 	return (vm_page_alloc_after(object, pindex, req, object != NULL ?
 	    vm_radix_lookup_le(&object->rtree, pindex) : NULL));
 }
 
 vm_page_t
 vm_page_alloc_domain(vm_object_t object, vm_pindex_t pindex, int domain,
     int req)
 {
 
 	return (vm_page_alloc_domain_after(object, pindex, domain, req,
 	    object != NULL ? vm_radix_lookup_le(&object->rtree, pindex) :
 	    NULL));
 }
 
 /*
  * Allocate a page in the specified object with the given page index.  To
  * optimize insertion of the page into the object, the caller must also specifiy
  * the resident page in the object with largest index smaller than the given
  * page index, or NULL if no such page exists.
  */
 vm_page_t
 vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex,
     int req, vm_page_t mpred)
 {
 	struct vm_domainset_iter di;
 	vm_page_t m;
 	int domain;
 
 	vm_domainset_iter_page_init(&di, object, pindex, &domain, &req);
 	do {
 		m = vm_page_alloc_domain_after(object, pindex, domain, req,
 		    mpred);
 		if (m != NULL)
 			break;
 	} while (vm_domainset_iter_page(&di, object, &domain) == 0);
 
 	return (m);
 }
 
 /*
  * Returns true if the number of free pages exceeds the minimum
  * for the request class and false otherwise.
  */
 static int
 _vm_domain_allocate(struct vm_domain *vmd, int req_class, int npages)
 {
 	u_int limit, old, new;
 
 	if (req_class == VM_ALLOC_INTERRUPT)
 		limit = 0;
 	else if (req_class == VM_ALLOC_SYSTEM)
 		limit = vmd->vmd_interrupt_free_min;
 	else
 		limit = vmd->vmd_free_reserved;
 
 	/*
 	 * Attempt to reserve the pages.  Fail if we're below the limit.
 	 */
 	limit += npages;
 	old = vmd->vmd_free_count;
 	do {
 		if (old < limit)
 			return (0);
 		new = old - npages;
 	} while (atomic_fcmpset_int(&vmd->vmd_free_count, &old, new) == 0);
 
 	/* Wake the page daemon if we've crossed the threshold. */
 	if (vm_paging_needed(vmd, new) && !vm_paging_needed(vmd, old))
 		pagedaemon_wakeup(vmd->vmd_domain);
 
 	/* Only update bitsets on transitions. */
 	if ((old >= vmd->vmd_free_min && new < vmd->vmd_free_min) ||
 	    (old >= vmd->vmd_free_severe && new < vmd->vmd_free_severe))
 		vm_domain_set(vmd);
 
 	return (1);
 }
 
 int
 vm_domain_allocate(struct vm_domain *vmd, int req, int npages)
 {
 	int req_class;
 
 	/*
 	 * The page daemon is allowed to dig deeper into the free page list.
 	 */
 	req_class = req & VM_ALLOC_CLASS_MASK;
 	if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
 		req_class = VM_ALLOC_SYSTEM;
 	return (_vm_domain_allocate(vmd, req_class, npages));
 }
 
 vm_page_t
 vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
     int req, vm_page_t mpred)
 {
 	struct vm_domain *vmd;
 	vm_page_t m;
 	int flags, pool;
 
 	KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
 	    (object != NULL || (req & VM_ALLOC_SBUSY) == 0) &&
 	    ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) !=
 	    (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)),
 	    ("inconsistent object(%p)/req(%x)", object, req));
 	KASSERT(object == NULL || (req & VM_ALLOC_WAITOK) == 0,
 	    ("Can't sleep and retry object insertion."));
 	KASSERT(mpred == NULL || mpred->pindex < pindex,
 	    ("mpred %p doesn't precede pindex 0x%jx", mpred,
 	    (uintmax_t)pindex));
 	if (object != NULL)
 		VM_OBJECT_ASSERT_WLOCKED(object);
 
 	flags = 0;
 	m = NULL;
 	pool = object != NULL ? VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT;
 again:
 #if VM_NRESERVLEVEL > 0
 	/*
 	 * Can we allocate the page from a reservation?
 	 */
 	if (vm_object_reserv(object) &&
 	    (m = vm_reserv_alloc_page(object, pindex, domain, req, mpred)) !=
 	    NULL) {
 		domain = vm_phys_domain(m);
 		vmd = VM_DOMAIN(domain);
 		goto found;
 	}
 #endif
 	vmd = VM_DOMAIN(domain);
 	if (vmd->vmd_pgcache[pool].zone != NULL) {
 		m = uma_zalloc(vmd->vmd_pgcache[pool].zone, M_NOWAIT);
 		if (m != NULL) {
 			flags |= PG_PCPU_CACHE;
 			goto found;
 		}
 	}
 	if (vm_domain_allocate(vmd, req, 1)) {
 		/*
 		 * If not, allocate it from the free page queues.
 		 */
 		vm_domain_free_lock(vmd);
 		m = vm_phys_alloc_pages(domain, pool, 0);
 		vm_domain_free_unlock(vmd);
 		if (m == NULL) {
 			vm_domain_freecnt_inc(vmd, 1);
 #if VM_NRESERVLEVEL > 0
 			if (vm_reserv_reclaim_inactive(domain))
 				goto again;
 #endif
 		}
 	}
 	if (m == NULL) {
 		/*
 		 * Not allocatable, give up.
 		 */
 		if (vm_domain_alloc_fail(vmd, object, req))
 			goto again;
 		return (NULL);
 	}
 
 	/*
 	 * At this point we had better have found a good page.
 	 */
 found:
 	vm_page_dequeue(m);
 	vm_page_alloc_check(m);
 
 	/*
 	 * Initialize the page.  Only the PG_ZERO flag is inherited.
 	 */
 	if ((req & VM_ALLOC_ZERO) != 0)
 		flags |= (m->flags & PG_ZERO);
 	if ((req & VM_ALLOC_NODUMP) != 0)
 		flags |= PG_NODUMP;
 	m->flags = flags;
 	m->a.flags = 0;
 	m->oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ?
 	    VPO_UNMANAGED : 0;
 	m->busy_lock = VPB_UNBUSIED;
 	if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0)
 		m->busy_lock = VPB_CURTHREAD_EXCLUSIVE;
 	if ((req & VM_ALLOC_SBUSY) != 0)
 		m->busy_lock = VPB_SHARERS_WORD(1);
 	if (req & VM_ALLOC_WIRED) {
 		vm_wire_add(1);
 		m->ref_count = 1;
 	}
 	m->a.act_count = 0;
 
 	if (object != NULL) {
 		if (vm_page_insert_after(m, object, pindex, mpred)) {
 			if (req & VM_ALLOC_WIRED) {
 				vm_wire_sub(1);
 				m->ref_count = 0;
 			}
 			KASSERT(m->object == NULL, ("page %p has object", m));
 			m->oflags = VPO_UNMANAGED;
 			m->busy_lock = VPB_UNBUSIED;
 			/* Don't change PG_ZERO. */
 			vm_page_free_toq(m);
 			if (req & VM_ALLOC_WAITFAIL) {
 				VM_OBJECT_WUNLOCK(object);
 				vm_radix_wait();
 				VM_OBJECT_WLOCK(object);
 			}
 			return (NULL);
 		}
 
 		/* Ignore device objects; the pager sets "memattr" for them. */
 		if (object->memattr != VM_MEMATTR_DEFAULT &&
 		    (object->flags & OBJ_FICTITIOUS) == 0)
 			pmap_page_set_memattr(m, object->memattr);
 	} else
 		m->pindex = pindex;
 
 	return (m);
 }
 
 /*
  *	vm_page_alloc_contig:
  *
  *	Allocate a contiguous set of physical pages of the given size "npages"
  *	from the free lists.  All of the physical pages must be at or above
  *	the given physical address "low" and below the given physical address
  *	"high".  The given value "alignment" determines the alignment of the
  *	first physical page in the set.  If the given value "boundary" is
  *	non-zero, then the set of physical pages cannot cross any physical
  *	address boundary that is a multiple of that value.  Both "alignment"
  *	and "boundary" must be a power of two.
  *
  *	If the specified memory attribute, "memattr", is VM_MEMATTR_DEFAULT,
  *	then the memory attribute setting for the physical pages is configured
  *	to the object's memory attribute setting.  Otherwise, the memory
  *	attribute setting for the physical pages is configured to "memattr",
  *	overriding the object's memory attribute setting.  However, if the
  *	object's memory attribute setting is not VM_MEMATTR_DEFAULT, then the
  *	memory attribute setting for the physical pages cannot be configured
  *	to VM_MEMATTR_DEFAULT.
  *
  *	The specified object may not contain fictitious pages.
  *
  *	The caller must always specify an allocation class.
  *
  *	allocation classes:
  *	VM_ALLOC_NORMAL		normal process request
  *	VM_ALLOC_SYSTEM		system *really* needs a page
  *	VM_ALLOC_INTERRUPT	interrupt time request
  *
  *	optional allocation flags:
  *	VM_ALLOC_NOBUSY		do not exclusive busy the page
  *	VM_ALLOC_NODUMP		do not include the page in a kernel core dump
  *	VM_ALLOC_NOOBJ		page is not associated with an object and
  *				should not be exclusive busy
  *	VM_ALLOC_SBUSY		shared busy the allocated page
  *	VM_ALLOC_WIRED		wire the allocated page
  *	VM_ALLOC_ZERO		prefer a zeroed page
  */
 vm_page_t
 vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary, vm_memattr_t memattr)
 {
 	struct vm_domainset_iter di;
 	vm_page_t m;
 	int domain;
 
 	vm_domainset_iter_page_init(&di, object, pindex, &domain, &req);
 	do {
 		m = vm_page_alloc_contig_domain(object, pindex, domain, req,
 		    npages, low, high, alignment, boundary, memattr);
 		if (m != NULL)
 			break;
 	} while (vm_domainset_iter_page(&di, object, &domain) == 0);
 
 	return (m);
 }
 
 vm_page_t
 vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
     int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary, vm_memattr_t memattr)
 {
 	struct vm_domain *vmd;
 	vm_page_t m, m_ret, mpred;
 	u_int busy_lock, flags, oflags;
 
 	mpred = NULL;	/* XXX: pacify gcc */
 	KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
 	    (object != NULL || (req & VM_ALLOC_SBUSY) == 0) &&
 	    ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) !=
 	    (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)),
 	    ("vm_page_alloc_contig: inconsistent object(%p)/req(%x)", object,
 	    req));
 	KASSERT(object == NULL || (req & VM_ALLOC_WAITOK) == 0,
 	    ("Can't sleep and retry object insertion."));
 	if (object != NULL) {
 		VM_OBJECT_ASSERT_WLOCKED(object);
 		KASSERT((object->flags & OBJ_FICTITIOUS) == 0,
 		    ("vm_page_alloc_contig: object %p has fictitious pages",
 		    object));
 	}
 	KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero"));
 
 	if (object != NULL) {
 		mpred = vm_radix_lookup_le(&object->rtree, pindex);
 		KASSERT(mpred == NULL || mpred->pindex != pindex,
 		    ("vm_page_alloc_contig: pindex already allocated"));
 	}
 
 	/*
 	 * Can we allocate the pages without the number of free pages falling
 	 * below the lower bound for the allocation class?
 	 */
 	m_ret = NULL;
 again:
 #if VM_NRESERVLEVEL > 0
 	/*
 	 * Can we allocate the pages from a reservation?
 	 */
 	if (vm_object_reserv(object) &&
 	    (m_ret = vm_reserv_alloc_contig(object, pindex, domain, req,
 	    mpred, npages, low, high, alignment, boundary)) != NULL) {
 		domain = vm_phys_domain(m_ret);
 		vmd = VM_DOMAIN(domain);
 		goto found;
 	}
 #endif
 	vmd = VM_DOMAIN(domain);
 	if (vm_domain_allocate(vmd, req, npages)) {
 		/*
 		 * allocate them from the free page queues.
 		 */
 		vm_domain_free_lock(vmd);
 		m_ret = vm_phys_alloc_contig(domain, npages, low, high,
 		    alignment, boundary);
 		vm_domain_free_unlock(vmd);
 		if (m_ret == NULL) {
 			vm_domain_freecnt_inc(vmd, npages);
 #if VM_NRESERVLEVEL > 0
 			if (vm_reserv_reclaim_contig(domain, npages, low,
 			    high, alignment, boundary))
 				goto again;
 #endif
 		}
 	}
 	if (m_ret == NULL) {
 		if (vm_domain_alloc_fail(vmd, object, req))
 			goto again;
 		return (NULL);
 	}
 #if VM_NRESERVLEVEL > 0
 found:
 #endif
 	for (m = m_ret; m < &m_ret[npages]; m++) {
 		vm_page_dequeue(m);
 		vm_page_alloc_check(m);
 	}
 
 	/*
 	 * Initialize the pages.  Only the PG_ZERO flag is inherited.
 	 */
 	flags = 0;
 	if ((req & VM_ALLOC_ZERO) != 0)
 		flags = PG_ZERO;
 	if ((req & VM_ALLOC_NODUMP) != 0)
 		flags |= PG_NODUMP;
 	oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ?
 	    VPO_UNMANAGED : 0;
 	busy_lock = VPB_UNBUSIED;
 	if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0)
 		busy_lock = VPB_CURTHREAD_EXCLUSIVE;
 	if ((req & VM_ALLOC_SBUSY) != 0)
 		busy_lock = VPB_SHARERS_WORD(1);
 	if ((req & VM_ALLOC_WIRED) != 0)
 		vm_wire_add(npages);
 	if (object != NULL) {
 		if (object->memattr != VM_MEMATTR_DEFAULT &&
 		    memattr == VM_MEMATTR_DEFAULT)
 			memattr = object->memattr;
 	}
 	for (m = m_ret; m < &m_ret[npages]; m++) {
 		m->a.flags = 0;
 		m->flags = (m->flags | PG_NODUMP) & flags;
 		m->busy_lock = busy_lock;
 		if ((req & VM_ALLOC_WIRED) != 0)
 			m->ref_count = 1;
 		m->a.act_count = 0;
 		m->oflags = oflags;
 		if (object != NULL) {
 			if (vm_page_insert_after(m, object, pindex, mpred)) {
 				if ((req & VM_ALLOC_WIRED) != 0)
 					vm_wire_sub(npages);
 				KASSERT(m->object == NULL,
 				    ("page %p has object", m));
 				mpred = m;
 				for (m = m_ret; m < &m_ret[npages]; m++) {
 					if (m <= mpred &&
 					    (req & VM_ALLOC_WIRED) != 0)
 						m->ref_count = 0;
 					m->oflags = VPO_UNMANAGED;
 					m->busy_lock = VPB_UNBUSIED;
 					/* Don't change PG_ZERO. */
 					vm_page_free_toq(m);
 				}
 				if (req & VM_ALLOC_WAITFAIL) {
 					VM_OBJECT_WUNLOCK(object);
 					vm_radix_wait();
 					VM_OBJECT_WLOCK(object);
 				}
 				return (NULL);
 			}
 			mpred = m;
 		} else
 			m->pindex = pindex;
 		if (memattr != VM_MEMATTR_DEFAULT)
 			pmap_page_set_memattr(m, memattr);
 		pindex++;
 	}
 	return (m_ret);
 }
 
 /*
  * Check a page that has been freshly dequeued from a freelist.
  */
 static void
 vm_page_alloc_check(vm_page_t m)
 {
 
 	KASSERT(m->object == NULL, ("page %p has object", m));
 	KASSERT(m->a.queue == PQ_NONE &&
 	    (m->a.flags & PGA_QUEUE_STATE_MASK) == 0,
 	    ("page %p has unexpected queue %d, flags %#x",
 	    m, m->a.queue, (m->a.flags & PGA_QUEUE_STATE_MASK)));
 	KASSERT(m->ref_count == 0, ("page %p has references", m));
 	KASSERT(!vm_page_busied(m), ("page %p is busy", m));
 	KASSERT(m->dirty == 0, ("page %p is dirty", m));
 	KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
 	    ("page %p has unexpected memattr %d",
 	    m, pmap_page_get_memattr(m)));
 	KASSERT(m->valid == 0, ("free page %p is valid", m));
 }
 
 /*
  * 	vm_page_alloc_freelist:
  *
  *	Allocate a physical page from the specified free page list.
  *
  *	The caller must always specify an allocation class.
  *
  *	allocation classes:
  *	VM_ALLOC_NORMAL		normal process request
  *	VM_ALLOC_SYSTEM		system *really* needs a page
  *	VM_ALLOC_INTERRUPT	interrupt time request
  *
  *	optional allocation flags:
  *	VM_ALLOC_COUNT(number)	the number of additional pages that the caller
  *				intends to allocate
  *	VM_ALLOC_WIRED		wire the allocated page
  *	VM_ALLOC_ZERO		prefer a zeroed page
  */
 vm_page_t
 vm_page_alloc_freelist(int freelist, int req)
 {
 	struct vm_domainset_iter di;
 	vm_page_t m;
 	int domain;
 
 	vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req);
 	do {
 		m = vm_page_alloc_freelist_domain(domain, freelist, req);
 		if (m != NULL)
 			break;
 	} while (vm_domainset_iter_page(&di, NULL, &domain) == 0);
 
 	return (m);
 }
 
 vm_page_t
 vm_page_alloc_freelist_domain(int domain, int freelist, int req)
 {
 	struct vm_domain *vmd;
 	vm_page_t m;
 	u_int flags;
 
 	m = NULL;
 	vmd = VM_DOMAIN(domain);
 again:
 	if (vm_domain_allocate(vmd, req, 1)) {
 		vm_domain_free_lock(vmd);
 		m = vm_phys_alloc_freelist_pages(domain, freelist,
 		    VM_FREEPOOL_DIRECT, 0);
 		vm_domain_free_unlock(vmd);
 		if (m == NULL)
 			vm_domain_freecnt_inc(vmd, 1);
 	}
 	if (m == NULL) {
 		if (vm_domain_alloc_fail(vmd, NULL, req))
 			goto again;
 		return (NULL);
 	}
 	vm_page_dequeue(m);
 	vm_page_alloc_check(m);
 
 	/*
 	 * Initialize the page.  Only the PG_ZERO flag is inherited.
 	 */
 	m->a.flags = 0;
 	flags = 0;
 	if ((req & VM_ALLOC_ZERO) != 0)
 		flags = PG_ZERO;
 	m->flags &= flags;
 	if ((req & VM_ALLOC_WIRED) != 0) {
 		vm_wire_add(1);
 		m->ref_count = 1;
 	}
 	/* Unmanaged pages don't use "act_count". */
 	m->oflags = VPO_UNMANAGED;
 	return (m);
 }
 
 static int
 vm_page_zone_import(void *arg, void **store, int cnt, int domain, int flags)
 {
 	struct vm_domain *vmd;
 	struct vm_pgcache *pgcache;
 	int i;
 
 	pgcache = arg;
 	vmd = VM_DOMAIN(pgcache->domain);
 
 	/*
 	 * The page daemon should avoid creating extra memory pressure since its
 	 * main purpose is to replenish the store of free pages.
 	 */
 	if (vmd->vmd_severeset || curproc == pageproc ||
 	    !_vm_domain_allocate(vmd, VM_ALLOC_NORMAL, cnt))
 		return (0);
 	domain = vmd->vmd_domain;
 	vm_domain_free_lock(vmd);
 	i = vm_phys_alloc_npages(domain, pgcache->pool, cnt,
 	    (vm_page_t *)store);
 	vm_domain_free_unlock(vmd);
 	if (cnt != i)
 		vm_domain_freecnt_inc(vmd, cnt - i);
 
 	return (i);
 }
 
 static void
 vm_page_zone_release(void *arg, void **store, int cnt)
 {
 	struct vm_domain *vmd;
 	struct vm_pgcache *pgcache;
 	vm_page_t m;
 	int i;
 
 	pgcache = arg;
 	vmd = VM_DOMAIN(pgcache->domain);
 	vm_domain_free_lock(vmd);
 	for (i = 0; i < cnt; i++) {
 		m = (vm_page_t)store[i];
 		vm_phys_free_pages(m, 0);
 	}
 	vm_domain_free_unlock(vmd);
 	vm_domain_freecnt_inc(vmd, cnt);
 }
 
 #define	VPSC_ANY	0	/* No restrictions. */
 #define	VPSC_NORESERV	1	/* Skip reservations; implies VPSC_NOSUPER. */
 #define	VPSC_NOSUPER	2	/* Skip superpages. */
 
 /*
  *	vm_page_scan_contig:
  *
  *	Scan vm_page_array[] between the specified entries "m_start" and
  *	"m_end" for a run of contiguous physical pages that satisfy the
  *	specified conditions, and return the lowest page in the run.  The
  *	specified "alignment" determines the alignment of the lowest physical
  *	page in the run.  If the specified "boundary" is non-zero, then the
  *	run of physical pages cannot span a physical address that is a
  *	multiple of "boundary".
  *
  *	"m_end" is never dereferenced, so it need not point to a vm_page
  *	structure within vm_page_array[].
  *
  *	"npages" must be greater than zero.  "m_start" and "m_end" must not
  *	span a hole (or discontiguity) in the physical address space.  Both
  *	"alignment" and "boundary" must be a power of two.
  */
 vm_page_t
 vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end,
     u_long alignment, vm_paddr_t boundary, int options)
 {
-	struct mtx *m_mtx;
 	vm_object_t object;
 	vm_paddr_t pa;
 	vm_page_t m, m_run;
 #if VM_NRESERVLEVEL > 0
 	int level;
 #endif
 	int m_inc, order, run_ext, run_len;
 
 	KASSERT(npages > 0, ("npages is 0"));
 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
 	KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
 	m_run = NULL;
 	run_len = 0;
-	m_mtx = NULL;
 	for (m = m_start; m < m_end && run_len < npages; m += m_inc) {
 		KASSERT((m->flags & PG_MARKER) == 0,
 		    ("page %p is PG_MARKER", m));
 		KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->ref_count >= 1,
 		    ("fictitious page %p has invalid ref count", m));
 
 		/*
 		 * If the current page would be the start of a run, check its
 		 * physical address against the end, alignment, and boundary
 		 * conditions.  If it doesn't satisfy these conditions, either
 		 * terminate the scan or advance to the next page that
 		 * satisfies the failed condition.
 		 */
 		if (run_len == 0) {
 			KASSERT(m_run == NULL, ("m_run != NULL"));
 			if (m + npages > m_end)
 				break;
 			pa = VM_PAGE_TO_PHYS(m);
 			if ((pa & (alignment - 1)) != 0) {
 				m_inc = atop(roundup2(pa, alignment) - pa);
 				continue;
 			}
 			if (rounddown2(pa ^ (pa + ptoa(npages) - 1),
 			    boundary) != 0) {
 				m_inc = atop(roundup2(pa, boundary) - pa);
 				continue;
 			}
 		} else
 			KASSERT(m_run != NULL, ("m_run == NULL"));
 
-		vm_page_change_lock(m, &m_mtx);
-		m_inc = 1;
 retry:
+		m_inc = 1;
 		if (vm_page_wired(m))
 			run_ext = 0;
 #if VM_NRESERVLEVEL > 0
 		else if ((level = vm_reserv_level(m)) >= 0 &&
 		    (options & VPSC_NORESERV) != 0) {
 			run_ext = 0;
 			/* Advance to the end of the reservation. */
 			pa = VM_PAGE_TO_PHYS(m);
 			m_inc = atop(roundup2(pa + 1, vm_reserv_size(level)) -
 			    pa);
 		}
 #endif
-		else if ((object = m->object) != NULL) {
+		else if ((object =
+		    (vm_object_t)atomic_load_ptr(&m->object)) != NULL) {
 			/*
 			 * The page is considered eligible for relocation if
 			 * and only if it could be laundered or reclaimed by
 			 * the page daemon.
 			 */
-			if (!VM_OBJECT_TRYRLOCK(object)) {
-				mtx_unlock(m_mtx);
-				VM_OBJECT_RLOCK(object);
-				mtx_lock(m_mtx);
-				if (m->object != object) {
-					/*
-					 * The page may have been freed.
-					 */
-					VM_OBJECT_RUNLOCK(object);
-					goto retry;
-				}
+			VM_OBJECT_RLOCK(object);
+			if (object != m->object) {
+				VM_OBJECT_RUNLOCK(object);
+				goto retry;
 			}
 			/* Don't care: PG_NODUMP, PG_ZERO. */
 			if (object->type != OBJT_DEFAULT &&
 			    object->type != OBJT_SWAP &&
 			    object->type != OBJT_VNODE) {
 				run_ext = 0;
 #if VM_NRESERVLEVEL > 0
 			} else if ((options & VPSC_NOSUPER) != 0 &&
 			    (level = vm_reserv_level_iffullpop(m)) >= 0) {
 				run_ext = 0;
 				/* Advance to the end of the superpage. */
 				pa = VM_PAGE_TO_PHYS(m);
 				m_inc = atop(roundup2(pa + 1,
 				    vm_reserv_size(level)) - pa);
 #endif
 			} else if (object->memattr == VM_MEMATTR_DEFAULT &&
-			    vm_page_queue(m) != PQ_NONE && !vm_page_busied(m) &&
-			    !vm_page_wired(m)) {
+			    vm_page_queue(m) != PQ_NONE && !vm_page_busied(m)) {
 				/*
 				 * The page is allocated but eligible for
 				 * relocation.  Extend the current run by one
 				 * page.
 				 */
 				KASSERT(pmap_page_get_memattr(m) ==
 				    VM_MEMATTR_DEFAULT,
 				    ("page %p has an unexpected memattr", m));
 				KASSERT((m->oflags & (VPO_SWAPINPROG |
 				    VPO_SWAPSLEEP | VPO_UNMANAGED)) == 0,
 				    ("page %p has unexpected oflags", m));
 				/* Don't care: PGA_NOSYNC. */
 				run_ext = 1;
 			} else
 				run_ext = 0;
 			VM_OBJECT_RUNLOCK(object);
 #if VM_NRESERVLEVEL > 0
 		} else if (level >= 0) {
 			/*
 			 * The page is reserved but not yet allocated.  In
 			 * other words, it is still free.  Extend the current
 			 * run by one page.
 			 */
 			run_ext = 1;
 #endif
 		} else if ((order = m->order) < VM_NFREEORDER) {
 			/*
 			 * The page is enqueued in the physical memory
 			 * allocator's free page queues.  Moreover, it is the
 			 * first page in a power-of-two-sized run of
 			 * contiguous free pages.  Add these pages to the end
 			 * of the current run, and jump ahead.
 			 */
 			run_ext = 1 << order;
 			m_inc = 1 << order;
 		} else {
 			/*
 			 * Skip the page for one of the following reasons: (1)
 			 * It is enqueued in the physical memory allocator's
 			 * free page queues.  However, it is not the first
 			 * page in a run of contiguous free pages.  (This case
 			 * rarely occurs because the scan is performed in
 			 * ascending order.) (2) It is not reserved, and it is
 			 * transitioning from free to allocated.  (Conversely,
 			 * the transition from allocated to free for managed
 			 * pages is blocked by the page lock.) (3) It is
 			 * allocated but not contained by an object and not
 			 * wired, e.g., allocated by Xen's balloon driver.
 			 */
 			run_ext = 0;
 		}
 
 		/*
 		 * Extend or reset the current run of pages.
 		 */
 		if (run_ext > 0) {
 			if (run_len == 0)
 				m_run = m;
 			run_len += run_ext;
 		} else {
 			if (run_len > 0) {
 				m_run = NULL;
 				run_len = 0;
 			}
 		}
 	}
-	if (m_mtx != NULL)
-		mtx_unlock(m_mtx);
 	if (run_len >= npages)
 		return (m_run);
 	return (NULL);
 }
 
 /*
  *	vm_page_reclaim_run:
  *
  *	Try to relocate each of the allocated virtual pages within the
  *	specified run of physical pages to a new physical address.  Free the
  *	physical pages underlying the relocated virtual pages.  A virtual page
  *	is relocatable if and only if it could be laundered or reclaimed by
  *	the page daemon.  Whenever possible, a virtual page is relocated to a
  *	physical address above "high".
  *
  *	Returns 0 if every physical page within the run was already free or
  *	just freed by a successful relocation.  Otherwise, returns a non-zero
  *	value indicating why the last attempt to relocate a virtual page was
  *	unsuccessful.
  *
  *	"req_class" must be an allocation class.
  */
 static int
 vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run,
     vm_paddr_t high)
 {
 	struct vm_domain *vmd;
-	struct mtx *m_mtx;
 	struct spglist free;
 	vm_object_t object;
 	vm_paddr_t pa;
 	vm_page_t m, m_end, m_new;
 	int error, order, req;
 
 	KASSERT((req_class & VM_ALLOC_CLASS_MASK) == req_class,
 	    ("req_class is not an allocation class"));
 	SLIST_INIT(&free);
 	error = 0;
 	m = m_run;
 	m_end = m_run + npages;
-	m_mtx = NULL;
 	for (; error == 0 && m < m_end; m++) {
 		KASSERT((m->flags & (PG_FICTITIOUS | PG_MARKER)) == 0,
 		    ("page %p is PG_FICTITIOUS or PG_MARKER", m));
 
 		/*
-		 * Avoid releasing and reacquiring the same page lock.
+		 * Racily check for wirings.  Races are handled once the object
+		 * lock is held and the page is unmapped.
 		 */
-		vm_page_change_lock(m, &m_mtx);
-retry:
-		/*
-		 * Racily check for wirings.  Races are handled below.
-		 */
 		if (vm_page_wired(m))
 			error = EBUSY;
-		else if ((object = m->object) != NULL) {
+		else if ((object =
+		    (vm_object_t)atomic_load_ptr(&m->object)) != NULL) {
 			/*
 			 * The page is relocated if and only if it could be
 			 * laundered or reclaimed by the page daemon.
 			 */
-			if (!VM_OBJECT_TRYWLOCK(object)) {
-				mtx_unlock(m_mtx);
-				VM_OBJECT_WLOCK(object);
-				mtx_lock(m_mtx);
-				if (m->object != object) {
-					/*
-					 * The page may have been freed.
-					 */
-					VM_OBJECT_WUNLOCK(object);
-					goto retry;
-				}
-			}
+			VM_OBJECT_WLOCK(object);
 			/* Don't care: PG_NODUMP, PG_ZERO. */
-			if (object->type != OBJT_DEFAULT &&
+			if (m->object != object ||
+			    (object->type != OBJT_DEFAULT &&
 			    object->type != OBJT_SWAP &&
-			    object->type != OBJT_VNODE)
+			    object->type != OBJT_VNODE))
 				error = EINVAL;
 			else if (object->memattr != VM_MEMATTR_DEFAULT)
 				error = EINVAL;
 			else if (vm_page_queue(m) != PQ_NONE &&
 			    vm_page_tryxbusy(m) != 0) {
 				if (vm_page_wired(m)) {
 					vm_page_xunbusy(m);
 					error = EBUSY;
 					goto unlock;
 				}
 				KASSERT(pmap_page_get_memattr(m) ==
 				    VM_MEMATTR_DEFAULT,
 				    ("page %p has an unexpected memattr", m));
 				KASSERT(m->oflags == 0,
 				    ("page %p has unexpected oflags", m));
 				/* Don't care: PGA_NOSYNC. */
 				if (!vm_page_none_valid(m)) {
 					/*
 					 * First, try to allocate a new page
 					 * that is above "high".  Failing
 					 * that, try to allocate a new page
 					 * that is below "m_run".  Allocate
 					 * the new page between the end of
 					 * "m_run" and "high" only as a last
 					 * resort.
 					 */
 					req = req_class | VM_ALLOC_NOOBJ;
 					if ((m->flags & PG_NODUMP) != 0)
 						req |= VM_ALLOC_NODUMP;
 					if (trunc_page(high) !=
 					    ~(vm_paddr_t)PAGE_MASK) {
 						m_new = vm_page_alloc_contig(
 						    NULL, 0, req, 1,
 						    round_page(high),
 						    ~(vm_paddr_t)0,
 						    PAGE_SIZE, 0,
 						    VM_MEMATTR_DEFAULT);
 					} else
 						m_new = NULL;
 					if (m_new == NULL) {
 						pa = VM_PAGE_TO_PHYS(m_run);
 						m_new = vm_page_alloc_contig(
 						    NULL, 0, req, 1,
 						    0, pa - 1, PAGE_SIZE, 0,
 						    VM_MEMATTR_DEFAULT);
 					}
 					if (m_new == NULL) {
 						pa += ptoa(npages);
 						m_new = vm_page_alloc_contig(
 						    NULL, 0, req, 1,
 						    pa, high, PAGE_SIZE, 0,
 						    VM_MEMATTR_DEFAULT);
 					}
 					if (m_new == NULL) {
 						vm_page_xunbusy(m);
 						error = ENOMEM;
 						goto unlock;
 					}
 
 					/*
 					 * Unmap the page and check for new
 					 * wirings that may have been acquired
 					 * through a pmap lookup.
 					 */
 					if (object->ref_count != 0 &&
 					    !vm_page_try_remove_all(m)) {
 						vm_page_xunbusy(m);
 						vm_page_free(m_new);
 						error = EBUSY;
 						goto unlock;
 					}
 
 					/*
 					 * Replace "m" with the new page.  For
 					 * vm_page_replace(), "m" must be busy
 					 * and dequeued.  Finally, change "m"
 					 * as if vm_page_free() was called.
 					 */
 					m_new->a.flags = m->a.flags &
 					    ~PGA_QUEUE_STATE_MASK;
 					KASSERT(m_new->oflags == VPO_UNMANAGED,
 					    ("page %p is managed", m_new));
 					m_new->oflags = 0;
 					pmap_copy_page(m, m_new);
 					m_new->valid = m->valid;
 					m_new->dirty = m->dirty;
 					m->flags &= ~PG_ZERO;
 					vm_page_dequeue(m);
 					if (vm_page_replace_hold(m_new, object,
 					    m->pindex, m) &&
 					    vm_page_free_prep(m))
 						SLIST_INSERT_HEAD(&free, m,
 						    plinks.s.ss);
 
 					/*
 					 * The new page must be deactivated
 					 * before the object is unlocked.
 					 */
-					vm_page_change_lock(m_new, &m_mtx);
 					vm_page_deactivate(m_new);
 				} else {
 					m->flags &= ~PG_ZERO;
 					vm_page_dequeue(m);
 					if (vm_page_free_prep(m))
 						SLIST_INSERT_HEAD(&free, m,
 						    plinks.s.ss);
 					KASSERT(m->dirty == 0,
 					    ("page %p is dirty", m));
 				}
 			} else
 				error = EBUSY;
 unlock:
 			VM_OBJECT_WUNLOCK(object);
 		} else {
 			MPASS(vm_phys_domain(m) == domain);
 			vmd = VM_DOMAIN(domain);
 			vm_domain_free_lock(vmd);
 			order = m->order;
 			if (order < VM_NFREEORDER) {
 				/*
 				 * The page is enqueued in the physical memory
 				 * allocator's free page queues.  Moreover, it
 				 * is the first page in a power-of-two-sized
 				 * run of contiguous free pages.  Jump ahead
 				 * to the last page within that run, and
 				 * continue from there.
 				 */
 				m += (1 << order) - 1;
 			}
 #if VM_NRESERVLEVEL > 0
 			else if (vm_reserv_is_page_free(m))
 				order = 0;
 #endif
 			vm_domain_free_unlock(vmd);
 			if (order == VM_NFREEORDER)
 				error = EINVAL;
 		}
 	}
-	if (m_mtx != NULL)
-		mtx_unlock(m_mtx);
 	if ((m = SLIST_FIRST(&free)) != NULL) {
 		int cnt;
 
 		vmd = VM_DOMAIN(domain);
 		cnt = 0;
 		vm_domain_free_lock(vmd);
 		do {
 			MPASS(vm_phys_domain(m) == domain);
 			SLIST_REMOVE_HEAD(&free, plinks.s.ss);
 			vm_phys_free_pages(m, 0);
 			cnt++;
 		} while ((m = SLIST_FIRST(&free)) != NULL);
 		vm_domain_free_unlock(vmd);
 		vm_domain_freecnt_inc(vmd, cnt);
 	}
 	return (error);
 }
 
 #define	NRUNS	16
 
 CTASSERT(powerof2(NRUNS));
 
 #define	RUN_INDEX(count)	((count) & (NRUNS - 1))
 
 #define	MIN_RECLAIM	8
 
 /*
  *	vm_page_reclaim_contig:
  *
  *	Reclaim allocated, contiguous physical memory satisfying the specified
  *	conditions by relocating the virtual pages using that physical memory.
  *	Returns true if reclamation is successful and false otherwise.  Since
  *	relocation requires the allocation of physical pages, reclamation may
  *	fail due to a shortage of free pages.  When reclamation fails, callers
  *	are expected to perform vm_wait() before retrying a failed allocation
  *	operation, e.g., vm_page_alloc_contig().
  *
  *	The caller must always specify an allocation class through "req".
  *
  *	allocation classes:
  *	VM_ALLOC_NORMAL		normal process request
  *	VM_ALLOC_SYSTEM		system *really* needs a page
  *	VM_ALLOC_INTERRUPT	interrupt time request
  *
  *	The optional allocation flags are ignored.
  *
  *	"npages" must be greater than zero.  Both "alignment" and "boundary"
  *	must be a power of two.
  */
 bool
 vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
     vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
 {
 	struct vm_domain *vmd;
 	vm_paddr_t curr_low;
 	vm_page_t m_run, m_runs[NRUNS];
 	u_long count, reclaimed;
 	int error, i, options, req_class;
 
 	KASSERT(npages > 0, ("npages is 0"));
 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
 	KASSERT(powerof2(boundary), ("boundary is not a power of 2"));
 	req_class = req & VM_ALLOC_CLASS_MASK;
 
 	/*
 	 * The page daemon is allowed to dig deeper into the free page list.
 	 */
 	if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
 		req_class = VM_ALLOC_SYSTEM;
 
 	/*
 	 * Return if the number of free pages cannot satisfy the requested
 	 * allocation.
 	 */
 	vmd = VM_DOMAIN(domain);
 	count = vmd->vmd_free_count;
 	if (count < npages + vmd->vmd_free_reserved || (count < npages +
 	    vmd->vmd_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) ||
 	    (count < npages && req_class == VM_ALLOC_INTERRUPT))
 		return (false);
 
 	/*
 	 * Scan up to three times, relaxing the restrictions ("options") on
 	 * the reclamation of reservations and superpages each time.
 	 */
 	for (options = VPSC_NORESERV;;) {
 		/*
 		 * Find the highest runs that satisfy the given constraints
 		 * and restrictions, and record them in "m_runs".
 		 */
 		curr_low = low;
 		count = 0;
 		for (;;) {
 			m_run = vm_phys_scan_contig(domain, npages, curr_low,
 			    high, alignment, boundary, options);
 			if (m_run == NULL)
 				break;
 			curr_low = VM_PAGE_TO_PHYS(m_run) + ptoa(npages);
 			m_runs[RUN_INDEX(count)] = m_run;
 			count++;
 		}
 
 		/*
 		 * Reclaim the highest runs in LIFO (descending) order until
 		 * the number of reclaimed pages, "reclaimed", is at least
 		 * MIN_RECLAIM.  Reset "reclaimed" each time because each
 		 * reclamation is idempotent, and runs will (likely) recur
 		 * from one scan to the next as restrictions are relaxed.
 		 */
 		reclaimed = 0;
 		for (i = 0; count > 0 && i < NRUNS; i++) {
 			count--;
 			m_run = m_runs[RUN_INDEX(count)];
 			error = vm_page_reclaim_run(req_class, domain, npages,
 			    m_run, high);
 			if (error == 0) {
 				reclaimed += npages;
 				if (reclaimed >= MIN_RECLAIM)
 					return (true);
 			}
 		}
 
 		/*
 		 * Either relax the restrictions on the next scan or return if
 		 * the last scan had no restrictions.
 		 */
 		if (options == VPSC_NORESERV)
 			options = VPSC_NOSUPER;
 		else if (options == VPSC_NOSUPER)
 			options = VPSC_ANY;
 		else if (options == VPSC_ANY)
 			return (reclaimed != 0);
 	}
 }
 
 bool
 vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high,
     u_long alignment, vm_paddr_t boundary)
 {
 	struct vm_domainset_iter di;
 	int domain;
 	bool ret;
 
 	vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req);
 	do {
 		ret = vm_page_reclaim_contig_domain(domain, req, npages, low,
 		    high, alignment, boundary);
 		if (ret)
 			break;
 	} while (vm_domainset_iter_page(&di, NULL, &domain) == 0);
 
 	return (ret);
 }
 
 /*
  * Set the domain in the appropriate page level domainset.
  */
 void
 vm_domain_set(struct vm_domain *vmd)
 {
 
 	mtx_lock(&vm_domainset_lock);
 	if (!vmd->vmd_minset && vm_paging_min(vmd)) {
 		vmd->vmd_minset = 1;
 		DOMAINSET_SET(vmd->vmd_domain, &vm_min_domains);
 	}
 	if (!vmd->vmd_severeset && vm_paging_severe(vmd)) {
 		vmd->vmd_severeset = 1;
 		DOMAINSET_SET(vmd->vmd_domain, &vm_severe_domains);
 	}
 	mtx_unlock(&vm_domainset_lock);
 }
 
 /*
  * Clear the domain from the appropriate page level domainset.
  */
 void
 vm_domain_clear(struct vm_domain *vmd)
 {
 
 	mtx_lock(&vm_domainset_lock);
 	if (vmd->vmd_minset && !vm_paging_min(vmd)) {
 		vmd->vmd_minset = 0;
 		DOMAINSET_CLR(vmd->vmd_domain, &vm_min_domains);
 		if (vm_min_waiters != 0) {
 			vm_min_waiters = 0;
 			wakeup(&vm_min_domains);
 		}
 	}
 	if (vmd->vmd_severeset && !vm_paging_severe(vmd)) {
 		vmd->vmd_severeset = 0;
 		DOMAINSET_CLR(vmd->vmd_domain, &vm_severe_domains);
 		if (vm_severe_waiters != 0) {
 			vm_severe_waiters = 0;
 			wakeup(&vm_severe_domains);
 		}
 	}
 
 	/*
 	 * If pageout daemon needs pages, then tell it that there are
 	 * some free.
 	 */
 	if (vmd->vmd_pageout_pages_needed &&
 	    vmd->vmd_free_count >= vmd->vmd_pageout_free_min) {
 		wakeup(&vmd->vmd_pageout_pages_needed);
 		vmd->vmd_pageout_pages_needed = 0;
 	}
 
 	/* See comments in vm_wait_doms(). */
 	if (vm_pageproc_waiters) {
 		vm_pageproc_waiters = 0;
 		wakeup(&vm_pageproc_waiters);
 	}
 	mtx_unlock(&vm_domainset_lock);
 }
 
 /*
  * Wait for free pages to exceed the min threshold globally.
  */
 void
 vm_wait_min(void)
 {
 
 	mtx_lock(&vm_domainset_lock);
 	while (vm_page_count_min()) {
 		vm_min_waiters++;
 		msleep(&vm_min_domains, &vm_domainset_lock, PVM, "vmwait", 0);
 	}
 	mtx_unlock(&vm_domainset_lock);
 }
 
 /*
  * Wait for free pages to exceed the severe threshold globally.
  */
 void
 vm_wait_severe(void)
 {
 
 	mtx_lock(&vm_domainset_lock);
 	while (vm_page_count_severe()) {
 		vm_severe_waiters++;
 		msleep(&vm_severe_domains, &vm_domainset_lock, PVM,
 		    "vmwait", 0);
 	}
 	mtx_unlock(&vm_domainset_lock);
 }
 
 u_int
 vm_wait_count(void)
 {
 
 	return (vm_severe_waiters + vm_min_waiters + vm_pageproc_waiters);
 }
 
 void
 vm_wait_doms(const domainset_t *wdoms)
 {
 
 	/*
 	 * We use racey wakeup synchronization to avoid expensive global
 	 * locking for the pageproc when sleeping with a non-specific vm_wait.
 	 * To handle this, we only sleep for one tick in this instance.  It
 	 * is expected that most allocations for the pageproc will come from
 	 * kmem or vm_page_grab* which will use the more specific and
 	 * race-free vm_wait_domain().
 	 */
 	if (curproc == pageproc) {
 		mtx_lock(&vm_domainset_lock);
 		vm_pageproc_waiters++;
 		msleep(&vm_pageproc_waiters, &vm_domainset_lock, PVM | PDROP,
 		    "pageprocwait", 1);
 	} else {
 		/*
 		 * XXX Ideally we would wait only until the allocation could
 		 * be satisfied.  This condition can cause new allocators to
 		 * consume all freed pages while old allocators wait.
 		 */
 		mtx_lock(&vm_domainset_lock);
 		if (vm_page_count_min_set(wdoms)) {
 			vm_min_waiters++;
 			msleep(&vm_min_domains, &vm_domainset_lock,
 			    PVM | PDROP, "vmwait", 0);
 		} else
 			mtx_unlock(&vm_domainset_lock);
 	}
 }
 
 /*
  *	vm_wait_domain:
  *
  *	Sleep until free pages are available for allocation.
  *	- Called in various places after failed memory allocations.
  */
 void
 vm_wait_domain(int domain)
 {
 	struct vm_domain *vmd;
 	domainset_t wdom;
 
 	vmd = VM_DOMAIN(domain);
 	vm_domain_free_assert_unlocked(vmd);
 
 	if (curproc == pageproc) {
 		mtx_lock(&vm_domainset_lock);
 		if (vmd->vmd_free_count < vmd->vmd_pageout_free_min) {
 			vmd->vmd_pageout_pages_needed = 1;
 			msleep(&vmd->vmd_pageout_pages_needed,
 			    &vm_domainset_lock, PDROP | PSWP, "VMWait", 0);
 		} else
 			mtx_unlock(&vm_domainset_lock);
 	} else {
 		if (pageproc == NULL)
 			panic("vm_wait in early boot");
 		DOMAINSET_ZERO(&wdom);
 		DOMAINSET_SET(vmd->vmd_domain, &wdom);
 		vm_wait_doms(&wdom);
 	}
 }
 
 /*
  *	vm_wait:
  *
  *	Sleep until free pages are available for allocation in the
  *	affinity domains of the obj.  If obj is NULL, the domain set
  *	for the calling thread is used.
  *	Called in various places after failed memory allocations.
  */
 void
 vm_wait(vm_object_t obj)
 {
 	struct domainset *d;
 
 	d = NULL;
 
 	/*
 	 * Carefully fetch pointers only once: the struct domainset
 	 * itself is ummutable but the pointer might change.
 	 */
 	if (obj != NULL)
 		d = obj->domain.dr_policy;
 	if (d == NULL)
 		d = curthread->td_domain.dr_policy;
 
 	vm_wait_doms(&d->ds_mask);
 }
 
 /*
  *	vm_domain_alloc_fail:
  *
  *	Called when a page allocation function fails.  Informs the
  *	pagedaemon and performs the requested wait.  Requires the
  *	domain_free and object lock on entry.  Returns with the
  *	object lock held and free lock released.  Returns an error when
  *	retry is necessary.
  *
  */
 static int
 vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req)
 {
 
 	vm_domain_free_assert_unlocked(vmd);
 
 	atomic_add_int(&vmd->vmd_pageout_deficit,
 	    max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1));
 	if (req & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) {
 		if (object != NULL) 
 			VM_OBJECT_WUNLOCK(object);
 		vm_wait_domain(vmd->vmd_domain);
 		if (object != NULL) 
 			VM_OBJECT_WLOCK(object);
 		if (req & VM_ALLOC_WAITOK)
 			return (EAGAIN);
 	}
 
 	return (0);
 }
 
 /*
  *	vm_waitpfault:
  *
  *	Sleep until free pages are available for allocation.
  *	- Called only in vm_fault so that processes page faulting
  *	  can be easily tracked.
  *	- Sleeps at a lower priority than vm_wait() so that vm_wait()ing
  *	  processes will be able to grab memory first.  Do not change
  *	  this balance without careful testing first.
  */
 void
 vm_waitpfault(struct domainset *dset, int timo)
 {
 
 	/*
 	 * XXX Ideally we would wait only until the allocation could
 	 * be satisfied.  This condition can cause new allocators to
 	 * consume all freed pages while old allocators wait.
 	 */
 	mtx_lock(&vm_domainset_lock);
 	if (vm_page_count_min_set(&dset->ds_mask)) {
 		vm_min_waiters++;
 		msleep(&vm_min_domains, &vm_domainset_lock, PUSER | PDROP,
 		    "pfault", timo);
 	} else
 		mtx_unlock(&vm_domainset_lock);
 }
 
 static struct vm_pagequeue *
 _vm_page_pagequeue(vm_page_t m, uint8_t queue)
 {
 
 	return (&vm_pagequeue_domain(m)->vmd_pagequeues[queue]);
 }
 
 #ifdef INVARIANTS
 static struct vm_pagequeue *
 vm_page_pagequeue(vm_page_t m)
 {
 
 	return (_vm_page_pagequeue(m, vm_page_astate_load(m).queue));
 }
 #endif
 
 static __always_inline bool
 vm_page_pqstate_fcmpset(vm_page_t m, vm_page_astate_t *old, vm_page_astate_t new)
 {
 	vm_page_astate_t tmp;
 
 	tmp = *old;
 	do {
 		if (__predict_true(vm_page_astate_fcmpset(m, old, new)))
 			return (true);
 		counter_u64_add(pqstate_commit_retries, 1);
 	} while (old->_bits == tmp._bits);
 
 	return (false);
 }
 
 /*
  * Do the work of committing a queue state update that moves the page out of
  * its current queue.
  */
 static bool
 _vm_page_pqstate_commit_dequeue(struct vm_pagequeue *pq, vm_page_t m,
     vm_page_astate_t *old, vm_page_astate_t new)
 {
 	vm_page_t next;
 
 	vm_pagequeue_assert_locked(pq);
 	KASSERT(vm_page_pagequeue(m) == pq,
 	    ("%s: queue %p does not match page %p", __func__, pq, m));
 	KASSERT(old->queue != PQ_NONE && new.queue != old->queue,
 	    ("%s: invalid queue indices %d %d",
 	    __func__, old->queue, new.queue));
 
 	/*
 	 * Once the queue index of the page changes there is nothing
 	 * synchronizing with further updates to the page's physical
 	 * queue state.  Therefore we must speculatively remove the page
 	 * from the queue now and be prepared to roll back if the queue
 	 * state update fails.  If the page is not physically enqueued then
 	 * we just update its queue index.
 	 */
 	if ((old->flags & PGA_ENQUEUED) != 0) {
 		new.flags &= ~PGA_ENQUEUED;
 		next = TAILQ_NEXT(m, plinks.q);
 		TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
 		vm_pagequeue_cnt_dec(pq);
 		if (!vm_page_pqstate_fcmpset(m, old, new)) {
 			if (next == NULL)
 				TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
 			else
 				TAILQ_INSERT_BEFORE(next, m, plinks.q);
 			vm_pagequeue_cnt_inc(pq);
 			return (false);
 		} else {
 			return (true);
 		}
 	} else {
 		return (vm_page_pqstate_fcmpset(m, old, new));
 	}
 }
 
 static bool
 vm_page_pqstate_commit_dequeue(vm_page_t m, vm_page_astate_t *old,
     vm_page_astate_t new)
 {
 	struct vm_pagequeue *pq;
 	vm_page_astate_t as;
 	bool ret;
 
 	pq = _vm_page_pagequeue(m, old->queue);
 
 	/*
 	 * The queue field and PGA_ENQUEUED flag are stable only so long as the
 	 * corresponding page queue lock is held.
 	 */
 	vm_pagequeue_lock(pq);
 	as = vm_page_astate_load(m);
 	if (__predict_false(as._bits != old->_bits)) {
 		*old = as;
 		ret = false;
 	} else {
 		ret = _vm_page_pqstate_commit_dequeue(pq, m, old, new);
 	}
 	vm_pagequeue_unlock(pq);
 	return (ret);
 }
 
 /*
  * Commit a queue state update that enqueues or requeues a page.
  */
 static bool
 _vm_page_pqstate_commit_requeue(struct vm_pagequeue *pq, vm_page_t m,
     vm_page_astate_t *old, vm_page_astate_t new)
 {
 	struct vm_domain *vmd;
 
 	vm_pagequeue_assert_locked(pq);
 	KASSERT(old->queue != PQ_NONE && new.queue == old->queue,
 	    ("%s: invalid queue indices %d %d",
 	    __func__, old->queue, new.queue));
 
 	new.flags |= PGA_ENQUEUED;
 	if (!vm_page_pqstate_fcmpset(m, old, new))
 		return (false);
 
 	if ((old->flags & PGA_ENQUEUED) != 0)
 		TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
 	else
 		vm_pagequeue_cnt_inc(pq);
 
 	/*
 	 * Give PGA_REQUEUE_HEAD precedence over PGA_REQUEUE.  In particular, if
 	 * both flags are set in close succession, only PGA_REQUEUE_HEAD will be
 	 * applied, even if it was set first.
 	 */
 	if ((old->flags & PGA_REQUEUE_HEAD) != 0) {
 		vmd = vm_pagequeue_domain(m);
 		KASSERT(pq == &vmd->vmd_pagequeues[PQ_INACTIVE],
 		    ("%s: invalid page queue for page %p", __func__, m));
 		TAILQ_INSERT_BEFORE(&vmd->vmd_inacthead, m, plinks.q);
 	} else {
 		TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
 	}
 	return (true);
 }
 
 /*
  * Commit a queue state update that encodes a request for a deferred queue
  * operation.
  */
 static bool
 vm_page_pqstate_commit_request(vm_page_t m, vm_page_astate_t *old,
     vm_page_astate_t new)
 {
 
 	KASSERT(old->queue == new.queue || new.queue != PQ_NONE,
 	    ("%s: invalid state, queue %d flags %x",
 	    __func__, new.queue, new.flags));
 
 	if (old->_bits != new._bits &&
 	    !vm_page_pqstate_fcmpset(m, old, new))
 		return (false);
 	vm_page_pqbatch_submit(m, new.queue);
 	return (true);
 }
 
 /*
  * A generic queue state update function.  This handles more cases than the
  * specialized functions above.
  */
 bool
 vm_page_pqstate_commit(vm_page_t m, vm_page_astate_t *old, vm_page_astate_t new)
 {
 
 	if (old->_bits == new._bits)
 		return (true);
 
 	if (old->queue != PQ_NONE && new.queue != old->queue) {
 		if (!vm_page_pqstate_commit_dequeue(m, old, new))
 			return (false);
 		if (new.queue != PQ_NONE)
 			vm_page_pqbatch_submit(m, new.queue);
 	} else {
 		if (!vm_page_pqstate_fcmpset(m, old, new))
 			return (false);
 		if (new.queue != PQ_NONE &&
 		    ((new.flags & ~old->flags) & PGA_QUEUE_OP_MASK) != 0)
 			vm_page_pqbatch_submit(m, new.queue);
 	}
 	return (true);
 }
 
 /*
  * Apply deferred queue state updates to a page.
  */
 static inline void
 vm_pqbatch_process_page(struct vm_pagequeue *pq, vm_page_t m, uint8_t queue)
 {
 	vm_page_astate_t new, old;
 
 	CRITICAL_ASSERT(curthread);
 	vm_pagequeue_assert_locked(pq);
 	KASSERT(queue < PQ_COUNT,
 	    ("%s: invalid queue index %d", __func__, queue));
 	KASSERT(pq == _vm_page_pagequeue(m, queue),
 	    ("%s: page %p does not belong to queue %p", __func__, m, pq));
 
 	for (old = vm_page_astate_load(m);;) {
 		if (__predict_false(old.queue != queue ||
 		    (old.flags & PGA_QUEUE_OP_MASK) == 0)) {
 			counter_u64_add(queue_nops, 1);
 			break;
 		}
 		KASSERT(old.queue != PQ_NONE || (old.flags & PGA_QUEUE_STATE_MASK) == 0,
 		    ("%s: page %p has unexpected queue state", __func__, m));
 
 		new = old;
 		if ((old.flags & PGA_DEQUEUE) != 0) {
 			new.flags &= ~PGA_QUEUE_OP_MASK;
 			new.queue = PQ_NONE;
 			if (__predict_true(_vm_page_pqstate_commit_dequeue(pq,
 			    m, &old, new))) {
 				counter_u64_add(queue_ops, 1);
 				break;
 			}
 		} else {
 			new.flags &= ~(PGA_REQUEUE | PGA_REQUEUE_HEAD);
 			if (__predict_true(_vm_page_pqstate_commit_requeue(pq,
 			    m, &old, new))) {
 				counter_u64_add(queue_ops, 1);
 				break;
 			}
 		}
 	}
 }
 
 static void
 vm_pqbatch_process(struct vm_pagequeue *pq, struct vm_batchqueue *bq,
     uint8_t queue)
 {
 	int i;
 
 	for (i = 0; i < bq->bq_cnt; i++)
 		vm_pqbatch_process_page(pq, bq->bq_pa[i], queue);
 	vm_batchqueue_init(bq);
 }
 
 /*
  *	vm_page_pqbatch_submit:		[ internal use only ]
  *
  *	Enqueue a page in the specified page queue's batched work queue.
  *	The caller must have encoded the requested operation in the page
  *	structure's a.flags field.
  */
 void
 vm_page_pqbatch_submit(vm_page_t m, uint8_t queue)
 {
 	struct vm_batchqueue *bq;
 	struct vm_pagequeue *pq;
 	int domain;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("page %p is unmanaged", m));
 	KASSERT(queue < PQ_COUNT, ("invalid queue %d", queue));
 
 	domain = vm_phys_domain(m);
 	pq = &vm_pagequeue_domain(m)->vmd_pagequeues[queue];
 
 	critical_enter();
 	bq = DPCPU_PTR(pqbatch[domain][queue]);
 	if (vm_batchqueue_insert(bq, m)) {
 		critical_exit();
 		return;
 	}
 	critical_exit();
 	vm_pagequeue_lock(pq);
 	critical_enter();
 	bq = DPCPU_PTR(pqbatch[domain][queue]);
 	vm_pqbatch_process(pq, bq, queue);
 	vm_pqbatch_process_page(pq, m, queue);
 	vm_pagequeue_unlock(pq);
 	critical_exit();
 }
 
 /*
  *	vm_page_pqbatch_drain:		[ internal use only ]
  *
  *	Force all per-CPU page queue batch queues to be drained.  This is
  *	intended for use in severe memory shortages, to ensure that pages
  *	do not remain stuck in the batch queues.
  */
 void
 vm_page_pqbatch_drain(void)
 {
 	struct thread *td;
 	struct vm_domain *vmd;
 	struct vm_pagequeue *pq;
 	int cpu, domain, queue;
 
 	td = curthread;
 	CPU_FOREACH(cpu) {
 		thread_lock(td);
 		sched_bind(td, cpu);
 		thread_unlock(td);
 
 		for (domain = 0; domain < vm_ndomains; domain++) {
 			vmd = VM_DOMAIN(domain);
 			for (queue = 0; queue < PQ_COUNT; queue++) {
 				pq = &vmd->vmd_pagequeues[queue];
 				vm_pagequeue_lock(pq);
 				critical_enter();
 				vm_pqbatch_process(pq,
 				    DPCPU_PTR(pqbatch[domain][queue]), queue);
 				critical_exit();
 				vm_pagequeue_unlock(pq);
 			}
 		}
 	}
 	thread_lock(td);
 	sched_unbind(td);
 	thread_unlock(td);
 }
 
 /*
  *	vm_page_dequeue_deferred:	[ internal use only ]
  *
  *	Request removal of the given page from its current page
  *	queue.  Physical removal from the queue may be deferred
  *	indefinitely.
  *
  *	The page must be locked.
  */
 void
 vm_page_dequeue_deferred(vm_page_t m)
 {
 	vm_page_astate_t new, old;
 
 	old = vm_page_astate_load(m);
 	do {
 		if (old.queue == PQ_NONE) {
 			KASSERT((old.flags & PGA_QUEUE_STATE_MASK) == 0,
 			    ("%s: page %p has unexpected queue state",
 			    __func__, m));
 			break;
 		}
 		new = old;
 		new.flags |= PGA_DEQUEUE;
 	} while (!vm_page_pqstate_commit_request(m, &old, new));
 }
 
 /*
  *	vm_page_dequeue:
  *
  *	Remove the page from whichever page queue it's in, if any, before
  *	returning.
  */
 void
 vm_page_dequeue(vm_page_t m)
 {
 	vm_page_astate_t new, old;
 
 	old = vm_page_astate_load(m);
 	do {
 		if (old.queue == PQ_NONE) {
 			KASSERT((old.flags & PGA_QUEUE_STATE_MASK) == 0,
 			    ("%s: page %p has unexpected queue state",
 			    __func__, m));
 			break;
 		}
 		new = old;
 		new.flags &= ~PGA_QUEUE_OP_MASK;
 		new.queue = PQ_NONE;
 	} while (!vm_page_pqstate_commit_dequeue(m, &old, new));
 
 }
 
 /*
  * Schedule the given page for insertion into the specified page queue.
  * Physical insertion of the page may be deferred indefinitely.
  */
 static void
 vm_page_enqueue(vm_page_t m, uint8_t queue)
 {
 
 	KASSERT(m->a.queue == PQ_NONE &&
 	    (m->a.flags & PGA_QUEUE_STATE_MASK) == 0,
 	    ("%s: page %p is already enqueued", __func__, m));
 	KASSERT(m->ref_count > 0,
 	    ("%s: page %p does not carry any references", __func__, m));
 
 	m->a.queue = queue;
 	if ((m->a.flags & PGA_REQUEUE) == 0)
 		vm_page_aflag_set(m, PGA_REQUEUE);
 	vm_page_pqbatch_submit(m, queue);
 }
 
 /*
  *	vm_page_free_prep:
  *
  *	Prepares the given page to be put on the free list,
  *	disassociating it from any VM object. The caller may return
  *	the page to the free list only if this function returns true.
  *
  *	The object must be locked.  The page must be locked if it is
  *	managed.
  */
 static bool
 vm_page_free_prep(vm_page_t m)
 {
 
 	/*
 	 * Synchronize with threads that have dropped a reference to this
 	 * page.
 	 */
 	atomic_thread_fence_acq();
 
 	if (vm_page_sbusied(m))
 		panic("vm_page_free_prep: freeing shared busy page %p", m);
 
 #if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP)
 	if (PMAP_HAS_DMAP && (m->flags & PG_ZERO) != 0) {
 		uint64_t *p;
 		int i;
 		p = (uint64_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 		for (i = 0; i < PAGE_SIZE / sizeof(uint64_t); i++, p++)
 			KASSERT(*p == 0, ("vm_page_free_prep %p PG_ZERO %d %jx",
 			    m, i, (uintmax_t)*p));
 	}
 #endif
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		KASSERT(!pmap_page_is_mapped(m),
 		    ("vm_page_free_prep: freeing mapped page %p", m));
 		KASSERT((m->a.flags & (PGA_EXECUTABLE | PGA_WRITEABLE)) == 0,
 		    ("vm_page_free_prep: mapping flags set in page %p", m));
 	} else {
 		KASSERT(m->a.queue == PQ_NONE,
 		    ("vm_page_free_prep: unmanaged page %p is queued", m));
 	}
 	VM_CNT_INC(v_tfree);
 
 	if (m->object != NULL) {
 		KASSERT(((m->oflags & VPO_UNMANAGED) != 0) ==
 		    ((m->object->flags & OBJ_UNMANAGED) != 0),
 		    ("vm_page_free_prep: managed flag mismatch for page %p",
 		    m));
 		vm_page_object_remove(m);
 
 		/*
 		 * The object reference can be released without an atomic
 		 * operation.
 		 */
 		KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
 		    m->ref_count == VPRC_OBJREF,
 		    ("vm_page_free_prep: page %p has unexpected ref_count %u",
 		    m, m->ref_count));
 		m->object = NULL;
 		m->ref_count -= VPRC_OBJREF;
 		vm_page_xunbusy(m);
 	}
 
 	if (vm_page_xbusied(m))
 		panic("vm_page_free_prep: freeing exclusive busy page %p", m);
 
 	/*
 	 * If fictitious remove object association and
 	 * return.
 	 */
 	if ((m->flags & PG_FICTITIOUS) != 0) {
 		KASSERT(m->ref_count == 1,
 		    ("fictitious page %p is referenced", m));
 		KASSERT(m->a.queue == PQ_NONE,
 		    ("fictitious page %p is queued", m));
 		return (false);
 	}
 
 	/*
 	 * Pages need not be dequeued before they are returned to the physical
 	 * memory allocator, but they must at least be marked for a deferred
 	 * dequeue.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0)
 		vm_page_dequeue_deferred(m);
 
 	m->valid = 0;
 	vm_page_undirty(m);
 
 	if (m->ref_count != 0)
 		panic("vm_page_free_prep: page %p has references", m);
 
 	/*
 	 * Restore the default memory attribute to the page.
 	 */
 	if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT)
 		pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
 
 #if VM_NRESERVLEVEL > 0
 	/*
 	 * Determine whether the page belongs to a reservation.  If the page was
 	 * allocated from a per-CPU cache, it cannot belong to a reservation, so
 	 * as an optimization, we avoid the check in that case.
 	 */
 	if ((m->flags & PG_PCPU_CACHE) == 0 && vm_reserv_free_page(m))
 		return (false);
 #endif
 
 	return (true);
 }
 
 /*
  *	vm_page_free_toq:
  *
  *	Returns the given page to the free list, disassociating it
  *	from any VM object.
  *
  *	The object must be locked.  The page must be locked if it is
  *	managed.
  */
 static void
 vm_page_free_toq(vm_page_t m)
 {
 	struct vm_domain *vmd;
 	uma_zone_t zone;
 
 	if (!vm_page_free_prep(m))
 		return;
 
 	vmd = vm_pagequeue_domain(m);
 	zone = vmd->vmd_pgcache[m->pool].zone;
 	if ((m->flags & PG_PCPU_CACHE) != 0 && zone != NULL) {
 		uma_zfree(zone, m);
 		return;
 	}
 	vm_domain_free_lock(vmd);
 	vm_phys_free_pages(m, 0);
 	vm_domain_free_unlock(vmd);
 	vm_domain_freecnt_inc(vmd, 1);
 }
 
 /*
  *	vm_page_free_pages_toq:
  *
  *	Returns a list of pages to the free list, disassociating it
  *	from any VM object.  In other words, this is equivalent to
  *	calling vm_page_free_toq() for each page of a list of VM objects.
  *
  *	The objects must be locked.  The pages must be locked if it is
  *	managed.
  */
 void
 vm_page_free_pages_toq(struct spglist *free, bool update_wire_count)
 {
 	vm_page_t m;
 	int count;
 
 	if (SLIST_EMPTY(free))
 		return;
 
 	count = 0;
 	while ((m = SLIST_FIRST(free)) != NULL) {
 		count++;
 		SLIST_REMOVE_HEAD(free, plinks.s.ss);
 		vm_page_free_toq(m);
 	}
 
 	if (update_wire_count)
 		vm_wire_sub(count);
 }
 
 /*
  * Mark this page as wired down, preventing reclamation by the page daemon
  * or when the containing object is destroyed.
  */
 void
 vm_page_wire(vm_page_t m)
 {
 	u_int old;
 
 	KASSERT(m->object != NULL,
 	    ("vm_page_wire: page %p does not belong to an object", m));
 	if (!vm_page_busied(m) && !vm_object_busied(m->object))
 		VM_OBJECT_ASSERT_LOCKED(m->object);
 	KASSERT((m->flags & PG_FICTITIOUS) == 0 ||
 	    VPRC_WIRE_COUNT(m->ref_count) >= 1,
 	    ("vm_page_wire: fictitious page %p has zero wirings", m));
 
 	old = atomic_fetchadd_int(&m->ref_count, 1);
 	KASSERT(VPRC_WIRE_COUNT(old) != VPRC_WIRE_COUNT_MAX,
 	    ("vm_page_wire: counter overflow for page %p", m));
 	if (VPRC_WIRE_COUNT(old) == 0) {
 		if ((m->oflags & VPO_UNMANAGED) == 0)
 			vm_page_aflag_set(m, PGA_DEQUEUE);
 		vm_wire_add(1);
 	}
 }
 
 /*
  * Attempt to wire a mapped page following a pmap lookup of that page.
  * This may fail if a thread is concurrently tearing down mappings of the page.
  * The transient failure is acceptable because it translates to the
  * failure of the caller pmap_extract_and_hold(), which should be then
  * followed by the vm_fault() fallback, see e.g. vm_fault_quick_hold_pages().
  */
 bool
 vm_page_wire_mapped(vm_page_t m)
 {
 	u_int old;
 
 	old = m->ref_count;
 	do {
 		KASSERT(old > 0,
 		    ("vm_page_wire_mapped: wiring unreferenced page %p", m));
 		if ((old & VPRC_BLOCKED) != 0)
 			return (false);
 	} while (!atomic_fcmpset_int(&m->ref_count, &old, old + 1));
 
 	if (VPRC_WIRE_COUNT(old) == 0) {
 		if ((m->oflags & VPO_UNMANAGED) == 0)
 			vm_page_aflag_set(m, PGA_DEQUEUE);
 		vm_wire_add(1);
 	}
 	return (true);
 }
 
 /*
  * Release a wiring reference to a managed page.  If the page still belongs to
  * an object, update its position in the page queues to reflect the reference.
  * If the wiring was the last reference to the page, free the page.
  */
 static void
 vm_page_unwire_managed(vm_page_t m, uint8_t nqueue, bool noreuse)
 {
 	u_int old;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("%s: page %p is unmanaged", __func__, m));
 
 	/*
 	 * Update LRU state before releasing the wiring reference.
 	 * Use a release store when updating the reference count to
 	 * synchronize with vm_page_free_prep().
 	 */
 	old = m->ref_count;
 	do {
 		KASSERT(VPRC_WIRE_COUNT(old) > 0,
 		    ("vm_page_unwire: wire count underflow for page %p", m));
 
 		if (old > VPRC_OBJREF + 1) {
 			/*
 			 * The page has at least one other wiring reference.  An
 			 * earlier iteration of this loop may have called
 			 * vm_page_release_toq() and cleared PGA_DEQUEUE, so
 			 * re-set it if necessary.
 			 */
 			if ((vm_page_astate_load(m).flags & PGA_DEQUEUE) == 0)
 				vm_page_aflag_set(m, PGA_DEQUEUE);
 		} else if (old == VPRC_OBJREF + 1) {
 			/*
 			 * This is the last wiring.  Clear PGA_DEQUEUE and
 			 * update the page's queue state to reflect the
 			 * reference.  If the page does not belong to an object
 			 * (i.e., the VPRC_OBJREF bit is clear), we only need to
 			 * clear leftover queue state.
 			 */
 			vm_page_release_toq(m, nqueue, false);
 		} else if (old == 1) {
 			vm_page_aflag_clear(m, PGA_DEQUEUE);
 		}
 	} while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1));
 
 	if (VPRC_WIRE_COUNT(old) == 1) {
 		vm_wire_sub(1);
 		if (old == 1)
 			vm_page_free(m);
 	}
 }
 
 /*
  * Release one wiring of the specified page, potentially allowing it to be
  * paged out.
  *
  * Only managed pages belonging to an object can be paged out.  If the number
  * of wirings transitions to zero and the page is eligible for page out, then
  * the page is added to the specified paging queue.  If the released wiring
  * represented the last reference to the page, the page is freed.
  *
  * A managed page must be locked.
  */
 void
 vm_page_unwire(vm_page_t m, uint8_t nqueue)
 {
 
 	KASSERT(nqueue < PQ_COUNT,
 	    ("vm_page_unwire: invalid queue %u request for page %p",
 	    nqueue, m));
 
 	if ((m->oflags & VPO_UNMANAGED) != 0) {
 		if (vm_page_unwire_noq(m) && m->ref_count == 0)
 			vm_page_free(m);
 		return;
 	}
 	vm_page_unwire_managed(m, nqueue, false);
 }
 
 /*
  * Unwire a page without (re-)inserting it into a page queue.  It is up
  * to the caller to enqueue, requeue, or free the page as appropriate.
  * In most cases involving managed pages, vm_page_unwire() should be used
  * instead.
  */
 bool
 vm_page_unwire_noq(vm_page_t m)
 {
 	u_int old;
 
 	old = vm_page_drop(m, 1);
 	KASSERT(VPRC_WIRE_COUNT(old) != 0,
 	    ("vm_page_unref: counter underflow for page %p", m));
 	KASSERT((m->flags & PG_FICTITIOUS) == 0 || VPRC_WIRE_COUNT(old) > 1,
 	    ("vm_page_unref: missing ref on fictitious page %p", m));
 
 	if (VPRC_WIRE_COUNT(old) > 1)
 		return (false);
 	if ((m->oflags & VPO_UNMANAGED) == 0)
 		vm_page_aflag_clear(m, PGA_DEQUEUE);
 	vm_wire_sub(1);
 	return (true);
 }
 
 /*
  * Ensure that the page ends up in the specified page queue.  If the page is
  * active or being moved to the active queue, ensure that its act_count is
  * at least ACT_INIT but do not otherwise mess with it.
  *
  * A managed page must be locked.
  */
 static __always_inline void
 vm_page_mvqueue(vm_page_t m, const uint8_t nqueue, const uint16_t nflag)
 {
 	vm_page_astate_t old, new;
 
 	KASSERT(m->ref_count > 0,
 	    ("%s: page %p does not carry any references", __func__, m));
 	KASSERT(nflag == PGA_REQUEUE || nflag == PGA_REQUEUE_HEAD,
 	    ("%s: invalid flags %x", __func__, nflag));
 
 	if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m))
 		return;
 
 	old = vm_page_astate_load(m);
 	do {
 		if ((old.flags & PGA_DEQUEUE) != 0)
 			break;
 		new = old;
 		new.flags &= ~PGA_QUEUE_OP_MASK;
 		if (nqueue == PQ_ACTIVE)
 			new.act_count = max(old.act_count, ACT_INIT);
 		if (old.queue == nqueue) {
 			if (nqueue != PQ_ACTIVE)
 				new.flags |= nflag;
 		} else {
 			new.flags |= nflag;
 			new.queue = nqueue;
 		}
 	} while (!vm_page_pqstate_commit(m, &old, new));
 }
 
 /*
  * Put the specified page on the active list (if appropriate).
  */
 void
 vm_page_activate(vm_page_t m)
 {
 
 	vm_page_mvqueue(m, PQ_ACTIVE, PGA_REQUEUE);
 }
 
 /*
  * Move the specified page to the tail of the inactive queue, or requeue
  * the page if it is already in the inactive queue.
  */
 void
 vm_page_deactivate(vm_page_t m)
 {
 
 	vm_page_mvqueue(m, PQ_INACTIVE, PGA_REQUEUE);
 }
 
 void
 vm_page_deactivate_noreuse(vm_page_t m)
 {
 
 	vm_page_mvqueue(m, PQ_INACTIVE, PGA_REQUEUE_HEAD);
 }
 
 /*
  * Put a page in the laundry, or requeue it if it is already there.
  */
 void
 vm_page_launder(vm_page_t m)
 {
 
 	vm_page_mvqueue(m, PQ_LAUNDRY, PGA_REQUEUE);
 }
 
 /*
  * Put a page in the PQ_UNSWAPPABLE holding queue.
  */
 void
 vm_page_unswappable(vm_page_t m)
 {
 
 	KASSERT(!vm_page_wired(m) && (m->oflags & VPO_UNMANAGED) == 0,
 	    ("page %p already unswappable", m));
 
 	vm_page_dequeue(m);
 	vm_page_enqueue(m, PQ_UNSWAPPABLE);
 }
 
 /*
  * Release a page back to the page queues in preparation for unwiring.
  */
 static void
 vm_page_release_toq(vm_page_t m, uint8_t nqueue, const bool noreuse)
 {
 	vm_page_astate_t old, new;
 	uint16_t nflag;
 
 	/*
 	 * Use a check of the valid bits to determine whether we should
 	 * accelerate reclamation of the page.  The object lock might not be
 	 * held here, in which case the check is racy.  At worst we will either
 	 * accelerate reclamation of a valid page and violate LRU, or
 	 * unnecessarily defer reclamation of an invalid page.
 	 *
 	 * If we were asked to not cache the page, place it near the head of the
 	 * inactive queue so that is reclaimed sooner.
 	 */
 	if (noreuse || m->valid == 0) {
 		nqueue = PQ_INACTIVE;
 		nflag = PGA_REQUEUE_HEAD;
 	} else {
 		nflag = PGA_REQUEUE;
 	}
 
 	old = vm_page_astate_load(m);
 	do {
 		new = old;
 
 		/*
 		 * If the page is already in the active queue and we are not
 		 * trying to accelerate reclamation, simply mark it as
 		 * referenced and avoid any queue operations.
 		 */
 		new.flags &= ~PGA_QUEUE_OP_MASK;
 		if (nflag != PGA_REQUEUE_HEAD && old.queue == PQ_ACTIVE)
 			new.flags |= PGA_REFERENCED;
 		else {
 			new.flags |= nflag;
 			new.queue = nqueue;
 		}
 	} while (!vm_page_pqstate_commit(m, &old, new));
 }
 
 /*
  * Unwire a page and either attempt to free it or re-add it to the page queues.
  */
 void
 vm_page_release(vm_page_t m, int flags)
 {
 	vm_object_t object;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("vm_page_release: page %p is unmanaged", m));
 
 	if ((flags & VPR_TRYFREE) != 0) {
 		for (;;) {
 			object = (vm_object_t)atomic_load_ptr(&m->object);
 			if (object == NULL)
 				break;
 			/* Depends on type-stability. */
 			if (vm_page_busied(m) || !VM_OBJECT_TRYWLOCK(object))
 				break;
 			if (object == m->object) {
 				vm_page_release_locked(m, flags);
 				VM_OBJECT_WUNLOCK(object);
 				return;
 			}
 			VM_OBJECT_WUNLOCK(object);
 		}
 	}
 	vm_page_unwire_managed(m, PQ_INACTIVE, flags != 0);
 }
 
 /* See vm_page_release(). */
 void
 vm_page_release_locked(vm_page_t m, int flags)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("vm_page_release_locked: page %p is unmanaged", m));
 
 	if (vm_page_unwire_noq(m)) {
 		if ((flags & VPR_TRYFREE) != 0 &&
 		    (m->object->ref_count == 0 || !pmap_page_is_mapped(m)) &&
 		    m->dirty == 0 && vm_page_tryxbusy(m)) {
 			vm_page_free(m);
 		} else {
 			vm_page_release_toq(m, PQ_INACTIVE, flags != 0);
 		}
 	}
 }
 
 static bool
 vm_page_try_blocked_op(vm_page_t m, void (*op)(vm_page_t))
 {
 	u_int old;
 
 	KASSERT(m->object != NULL && (m->oflags & VPO_UNMANAGED) == 0,
 	    ("vm_page_try_blocked_op: page %p has no object", m));
 	KASSERT(vm_page_busied(m),
 	    ("vm_page_try_blocked_op: page %p is not busy", m));
 	VM_OBJECT_ASSERT_LOCKED(m->object);
 
 	old = m->ref_count;
 	do {
 		KASSERT(old != 0,
 		    ("vm_page_try_blocked_op: page %p has no references", m));
 		if (VPRC_WIRE_COUNT(old) != 0)
 			return (false);
 	} while (!atomic_fcmpset_int(&m->ref_count, &old, old | VPRC_BLOCKED));
 
 	(op)(m);
 
 	/*
 	 * If the object is read-locked, new wirings may be created via an
 	 * object lookup.
 	 */
 	old = vm_page_drop(m, VPRC_BLOCKED);
 	KASSERT(!VM_OBJECT_WOWNED(m->object) ||
 	    old == (VPRC_BLOCKED | VPRC_OBJREF),
 	    ("vm_page_try_blocked_op: unexpected refcount value %u for %p",
 	    old, m));
 	return (true);
 }
 
 /*
  * Atomically check for wirings and remove all mappings of the page.
  */
 bool
 vm_page_try_remove_all(vm_page_t m)
 {
 
 	return (vm_page_try_blocked_op(m, pmap_remove_all));
 }
 
 /*
  * Atomically check for wirings and remove all writeable mappings of the page.
  */
 bool
 vm_page_try_remove_write(vm_page_t m)
 {
 
 	return (vm_page_try_blocked_op(m, pmap_remove_write));
 }
 
 /*
  * vm_page_advise
  *
  * 	Apply the specified advice to the given page.
  *
  *	The object and page must be locked.
  */
 void
 vm_page_advise(vm_page_t m, int advice)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (advice == MADV_FREE)
 		/*
 		 * Mark the page clean.  This will allow the page to be freed
 		 * without first paging it out.  MADV_FREE pages are often
 		 * quickly reused by malloc(3), so we do not do anything that
 		 * would result in a page fault on a later access.
 		 */
 		vm_page_undirty(m);
 	else if (advice != MADV_DONTNEED) {
 		if (advice == MADV_WILLNEED)
 			vm_page_activate(m);
 		return;
 	}
 
 	if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m))
 		vm_page_dirty(m);
 
 	/*
 	 * Clear any references to the page.  Otherwise, the page daemon will
 	 * immediately reactivate the page.
 	 */
 	vm_page_aflag_clear(m, PGA_REFERENCED);
 
 	/*
 	 * Place clean pages near the head of the inactive queue rather than
 	 * the tail, thus defeating the queue's LRU operation and ensuring that
 	 * the page will be reused quickly.  Dirty pages not already in the
 	 * laundry are moved there.
 	 */
 	if (m->dirty == 0)
 		vm_page_deactivate_noreuse(m);
 	else if (!vm_page_in_laundry(m))
 		vm_page_launder(m);
 }
 
 static inline int
 vm_page_grab_pflags(int allocflags)
 {
 	int pflags;
 
 	KASSERT((allocflags & VM_ALLOC_NOBUSY) == 0 ||
 	    (allocflags & VM_ALLOC_WIRED) != 0,
 	    ("vm_page_grab_pflags: the pages must be busied or wired"));
 	KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 ||
 	    (allocflags & VM_ALLOC_IGN_SBUSY) != 0,
 	    ("vm_page_grab_pflags: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY "
 	    "mismatch"));
 	pflags = allocflags &
 	    ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL |
 	    VM_ALLOC_NOBUSY);
 	if ((allocflags & VM_ALLOC_NOWAIT) == 0)
 		pflags |= VM_ALLOC_WAITFAIL;
 	if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0)
 		pflags |= VM_ALLOC_SBUSY;
 
 	return (pflags);
 }
 
 /*
  * Grab a page, waiting until we are waken up due to the page
  * changing state.  We keep on waiting, if the page continues
  * to be in the object.  If the page doesn't exist, first allocate it
  * and then conditionally zero it.
  *
  * This routine may sleep.
  *
  * The object must be locked on entry.  The lock will, however, be released
  * and reacquired if the routine sleeps.
  */
 vm_page_t
 vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
 {
 	vm_page_t m;
 	int pflags;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	pflags = vm_page_grab_pflags(allocflags);
 retrylookup:
 	if ((m = vm_page_lookup(object, pindex)) != NULL) {
 		if (!vm_page_acquire_flags(m, allocflags)) {
 			if (vm_page_busy_sleep_flags(object, m, "pgrbwt",
 			    allocflags))
 				goto retrylookup;
 			return (NULL);
 		}
 		goto out;
 	}
 	if ((allocflags & VM_ALLOC_NOCREAT) != 0)
 		return (NULL);
 	m = vm_page_alloc(object, pindex, pflags);
 	if (m == NULL) {
 		if ((allocflags & VM_ALLOC_NOWAIT) != 0)
 			return (NULL);
 		goto retrylookup;
 	}
 	if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0)
 		pmap_zero_page(m);
 
 out:
 	if ((allocflags & VM_ALLOC_NOBUSY) != 0) {
 		if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0)
 			vm_page_sunbusy(m);
 		else
 			vm_page_xunbusy(m);
 	}
 	return (m);
 }
 
 /*
  * Grab a page and make it valid, paging in if necessary.  Pages missing from
  * their pager are zero filled and validated.  If a VM_ALLOC_COUNT is supplied
  * and the page is not valid as many as VM_INITIAL_PAGEIN pages can be brought
  * in simultaneously.  Additional pages will be left on a paging queue but
  * will neither be wired nor busy regardless of allocflags.
  */
 int
 vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int allocflags)
 {
 	vm_page_t m;
 	vm_page_t ma[VM_INITIAL_PAGEIN];
 	bool sleep, xbusy;
 	int after, i, pflags, rv;
 
 	KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 ||
 	    (allocflags & VM_ALLOC_IGN_SBUSY) != 0,
 	    ("vm_page_grab_valid: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch"));
 	KASSERT((allocflags &
 	    (VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL | VM_ALLOC_ZERO)) == 0,
 	    ("vm_page_grab_valid: Invalid flags 0x%X", allocflags));
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	pflags = allocflags & ~(VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY);
 	pflags |= VM_ALLOC_WAITFAIL;
 
 retrylookup:
 	xbusy = false;
 	if ((m = vm_page_lookup(object, pindex)) != NULL) {
 		/*
 		 * If the page is fully valid it can only become invalid
 		 * with the object lock held.  If it is not valid it can
 		 * become valid with the busy lock held.  Therefore, we
 		 * may unnecessarily lock the exclusive busy here if we
 		 * race with I/O completion not using the object lock.
 		 * However, we will not end up with an invalid page and a
 		 * shared lock.
 		 */
 		if (!vm_page_all_valid(m) ||
 		    (allocflags & (VM_ALLOC_IGN_SBUSY | VM_ALLOC_SBUSY)) == 0) {
 			sleep = !vm_page_tryxbusy(m);
 			xbusy = true;
 		} else
 			sleep = !vm_page_trysbusy(m);
 		if (sleep) {
 			(void)vm_page_busy_sleep_flags(object, m, "pgrbwt",
 			    allocflags);
 			goto retrylookup;
 		}
 		if ((allocflags & VM_ALLOC_NOCREAT) != 0 &&
 		   !vm_page_all_valid(m)) {
 			if (xbusy)
 				vm_page_xunbusy(m);
 			else
 				vm_page_sunbusy(m);
 			*mp = NULL;
 			return (VM_PAGER_FAIL);
 		}
 		if ((allocflags & VM_ALLOC_WIRED) != 0)
 			vm_page_wire(m);
 		if (vm_page_all_valid(m))
 			goto out;
 	} else if ((allocflags & VM_ALLOC_NOCREAT) != 0) {
 		*mp = NULL;
 		return (VM_PAGER_FAIL);
 	} else if ((m = vm_page_alloc(object, pindex, pflags)) != NULL) {
 		xbusy = true;
 	} else {
 		goto retrylookup;
 	}
 
 	vm_page_assert_xbusied(m);
 	MPASS(xbusy);
 	if (vm_pager_has_page(object, pindex, NULL, &after)) {
 		after = MIN(after, VM_INITIAL_PAGEIN);
 		after = MIN(after, allocflags >> VM_ALLOC_COUNT_SHIFT);
 		after = MAX(after, 1);
 		ma[0] = m;
 		for (i = 1; i < after; i++) {
 			if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) {
 				if (ma[i]->valid || !vm_page_tryxbusy(ma[i]))
 					break;
 			} else {
 				ma[i] = vm_page_alloc(object, m->pindex + i,
 				    VM_ALLOC_NORMAL);
 				if (ma[i] == NULL)
 					break;
 			}
 		}
 		after = i;
 		vm_object_pip_add(object, after);
 		VM_OBJECT_WUNLOCK(object);
 		rv = vm_pager_get_pages(object, ma, after, NULL, NULL);
 		VM_OBJECT_WLOCK(object);
 		vm_object_pip_wakeupn(object, after);
 		/* Pager may have replaced a page. */
 		m = ma[0];
 		if (rv != VM_PAGER_OK) {
 			if ((allocflags & VM_ALLOC_WIRED) != 0)
 				vm_page_unwire_noq(m);
 			for (i = 0; i < after; i++) {
 				if (!vm_page_wired(ma[i]))
 					vm_page_free(ma[i]);
 				else
 					vm_page_xunbusy(ma[i]);
 			}
 			*mp = NULL;
 			return (rv);
 		}
 		for (i = 1; i < after; i++)
 			vm_page_readahead_finish(ma[i]);
 		MPASS(vm_page_all_valid(m));
 	} else {
 		vm_page_zero_invalid(m, TRUE);
 	}
 out:
 	if ((allocflags & VM_ALLOC_NOBUSY) != 0) {
 		if (xbusy)
 			vm_page_xunbusy(m);
 		else
 			vm_page_sunbusy(m);
 	}
 	if ((allocflags & VM_ALLOC_SBUSY) != 0 && xbusy)
 		vm_page_busy_downgrade(m);
 	*mp = m;
 	return (VM_PAGER_OK);
 }
 
 /*
  * Return the specified range of pages from the given object.  For each
  * page offset within the range, if a page already exists within the object
  * at that offset and it is busy, then wait for it to change state.  If,
  * instead, the page doesn't exist, then allocate it.
  *
  * The caller must always specify an allocation class.
  *
  * allocation classes:
  *	VM_ALLOC_NORMAL		normal process request
  *	VM_ALLOC_SYSTEM		system *really* needs the pages
  *
  * The caller must always specify that the pages are to be busied and/or
  * wired.
  *
  * optional allocation flags:
  *	VM_ALLOC_IGN_SBUSY	do not sleep on soft busy pages
  *	VM_ALLOC_NOBUSY		do not exclusive busy the page
  *	VM_ALLOC_NOWAIT		do not sleep
  *	VM_ALLOC_SBUSY		set page to sbusy state
  *	VM_ALLOC_WIRED		wire the pages
  *	VM_ALLOC_ZERO		zero and validate any invalid pages
  *
  * If VM_ALLOC_NOWAIT is not specified, this routine may sleep.  Otherwise, it
  * may return a partial prefix of the requested range.
  */
 int
 vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags,
     vm_page_t *ma, int count)
 {
 	vm_page_t m, mpred;
 	int pflags;
 	int i;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(((u_int)allocflags >> VM_ALLOC_COUNT_SHIFT) == 0,
 	    ("vm_page_grap_pages: VM_ALLOC_COUNT() is not allowed"));
 
 	pflags = vm_page_grab_pflags(allocflags);
 	if (count == 0)
 		return (0);
 
 	i = 0;
 retrylookup:
 	m = vm_radix_lookup_le(&object->rtree, pindex + i);
 	if (m == NULL || m->pindex != pindex + i) {
 		mpred = m;
 		m = NULL;
 	} else
 		mpred = TAILQ_PREV(m, pglist, listq);
 	for (; i < count; i++) {
 		if (m != NULL) {
 			if (!vm_page_acquire_flags(m, allocflags)) {
 				if (vm_page_busy_sleep_flags(object, m,
 				    "grbmaw", allocflags))
 					goto retrylookup;
 				break;
 			}
 		} else {
 			if ((allocflags & VM_ALLOC_NOCREAT) != 0)
 				break;
 			m = vm_page_alloc_after(object, pindex + i,
 			    pflags | VM_ALLOC_COUNT(count - i), mpred);
 			if (m == NULL) {
 				if ((allocflags & VM_ALLOC_NOWAIT) != 0)
 					break;
 				goto retrylookup;
 			}
 		}
 		if (vm_page_none_valid(m) &&
 		    (allocflags & VM_ALLOC_ZERO) != 0) {
 			if ((m->flags & PG_ZERO) == 0)
 				pmap_zero_page(m);
 			vm_page_valid(m);
 		}
 		if ((allocflags & VM_ALLOC_NOBUSY) != 0) {
 			if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0)
 				vm_page_sunbusy(m);
 			else
 				vm_page_xunbusy(m);
 		}
 		ma[i] = mpred = m;
 		m = vm_page_next(m);
 	}
 	return (i);
 }
 
 /*
  * Mapping function for valid or dirty bits in a page.
  *
  * Inputs are required to range within a page.
  */
 vm_page_bits_t
 vm_page_bits(int base, int size)
 {
 	int first_bit;
 	int last_bit;
 
 	KASSERT(
 	    base + size <= PAGE_SIZE,
 	    ("vm_page_bits: illegal base/size %d/%d", base, size)
 	);
 
 	if (size == 0)		/* handle degenerate case */
 		return (0);
 
 	first_bit = base >> DEV_BSHIFT;
 	last_bit = (base + size - 1) >> DEV_BSHIFT;
 
 	return (((vm_page_bits_t)2 << last_bit) -
 	    ((vm_page_bits_t)1 << first_bit));
 }
 
 void
 vm_page_bits_set(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t set)
 {
 
 #if PAGE_SIZE == 32768
 	atomic_set_64((uint64_t *)bits, set);
 #elif PAGE_SIZE == 16384
 	atomic_set_32((uint32_t *)bits, set);
 #elif (PAGE_SIZE == 8192) && defined(atomic_set_16)
 	atomic_set_16((uint16_t *)bits, set);
 #elif (PAGE_SIZE == 4096) && defined(atomic_set_8)
 	atomic_set_8((uint8_t *)bits, set);
 #else		/* PAGE_SIZE <= 8192 */
 	uintptr_t addr;
 	int shift;
 
 	addr = (uintptr_t)bits;
 	/*
 	 * Use a trick to perform a 32-bit atomic on the
 	 * containing aligned word, to not depend on the existence
 	 * of atomic_{set, clear}_{8, 16}.
 	 */
 	shift = addr & (sizeof(uint32_t) - 1);
 #if BYTE_ORDER == BIG_ENDIAN
 	shift = (sizeof(uint32_t) - sizeof(vm_page_bits_t) - shift) * NBBY;
 #else
 	shift *= NBBY;
 #endif
 	addr &= ~(sizeof(uint32_t) - 1);
 	atomic_set_32((uint32_t *)addr, set << shift);
 #endif		/* PAGE_SIZE */
 }
 
 static inline void
 vm_page_bits_clear(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t clear)
 {
 
 #if PAGE_SIZE == 32768
 	atomic_clear_64((uint64_t *)bits, clear);
 #elif PAGE_SIZE == 16384
 	atomic_clear_32((uint32_t *)bits, clear);
 #elif (PAGE_SIZE == 8192) && defined(atomic_clear_16)
 	atomic_clear_16((uint16_t *)bits, clear);
 #elif (PAGE_SIZE == 4096) && defined(atomic_clear_8)
 	atomic_clear_8((uint8_t *)bits, clear);
 #else		/* PAGE_SIZE <= 8192 */
 	uintptr_t addr;
 	int shift;
 
 	addr = (uintptr_t)bits;
 	/*
 	 * Use a trick to perform a 32-bit atomic on the
 	 * containing aligned word, to not depend on the existence
 	 * of atomic_{set, clear}_{8, 16}.
 	 */
 	shift = addr & (sizeof(uint32_t) - 1);
 #if BYTE_ORDER == BIG_ENDIAN
 	shift = (sizeof(uint32_t) - sizeof(vm_page_bits_t) - shift) * NBBY;
 #else
 	shift *= NBBY;
 #endif
 	addr &= ~(sizeof(uint32_t) - 1);
 	atomic_clear_32((uint32_t *)addr, clear << shift);
 #endif		/* PAGE_SIZE */
 }
 
 static inline vm_page_bits_t
 vm_page_bits_swap(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t newbits)
 {
 #if PAGE_SIZE == 32768
 	uint64_t old;
 
 	old = *bits;
 	while (atomic_fcmpset_64(bits, &old, newbits) == 0);
 	return (old);
 #elif PAGE_SIZE == 16384
 	uint32_t old;
 
 	old = *bits;
 	while (atomic_fcmpset_32(bits, &old, newbits) == 0);
 	return (old);
 #elif (PAGE_SIZE == 8192) && defined(atomic_fcmpset_16)
 	uint16_t old;
 
 	old = *bits;
 	while (atomic_fcmpset_16(bits, &old, newbits) == 0);
 	return (old);
 #elif (PAGE_SIZE == 4096) && defined(atomic_fcmpset_8)
 	uint8_t old;
 
 	old = *bits;
 	while (atomic_fcmpset_8(bits, &old, newbits) == 0);
 	return (old);
 #else		/* PAGE_SIZE <= 4096*/
 	uintptr_t addr;
 	uint32_t old, new, mask;
 	int shift;
 
 	addr = (uintptr_t)bits;
 	/*
 	 * Use a trick to perform a 32-bit atomic on the
 	 * containing aligned word, to not depend on the existence
 	 * of atomic_{set, swap, clear}_{8, 16}.
 	 */
 	shift = addr & (sizeof(uint32_t) - 1);
 #if BYTE_ORDER == BIG_ENDIAN
 	shift = (sizeof(uint32_t) - sizeof(vm_page_bits_t) - shift) * NBBY;
 #else
 	shift *= NBBY;
 #endif
 	addr &= ~(sizeof(uint32_t) - 1);
 	mask = VM_PAGE_BITS_ALL << shift;
 
 	old = *bits;
 	do {
 		new = old & ~mask;
 		new |= newbits << shift;
 	} while (atomic_fcmpset_32((uint32_t *)addr, &old, new) == 0);
 	return (old >> shift);
 #endif		/* PAGE_SIZE */
 }
 
 /*
  *	vm_page_set_valid_range:
  *
  *	Sets portions of a page valid.  The arguments are expected
  *	to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive
  *	of any partial chunks touched by the range.  The invalid portion of
  *	such chunks will be zeroed.
  *
  *	(base + size) must be less then or equal to PAGE_SIZE.
  */
 void
 vm_page_set_valid_range(vm_page_t m, int base, int size)
 {
 	int endoff, frag;
 	vm_page_bits_t pagebits;
 
 	vm_page_assert_busied(m);
 	if (size == 0)	/* handle degenerate case */
 		return;
 
 	/*
 	 * If the base is not DEV_BSIZE aligned and the valid
 	 * bit is clear, we have to zero out a portion of the
 	 * first block.
 	 */
 	if ((frag = rounddown2(base, DEV_BSIZE)) != base &&
 	    (m->valid & (1 << (base >> DEV_BSHIFT))) == 0)
 		pmap_zero_page_area(m, frag, base - frag);
 
 	/*
 	 * If the ending offset is not DEV_BSIZE aligned and the
 	 * valid bit is clear, we have to zero out a portion of
 	 * the last block.
 	 */
 	endoff = base + size;
 	if ((frag = rounddown2(endoff, DEV_BSIZE)) != endoff &&
 	    (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0)
 		pmap_zero_page_area(m, endoff,
 		    DEV_BSIZE - (endoff & (DEV_BSIZE - 1)));
 
 	/*
 	 * Assert that no previously invalid block that is now being validated
 	 * is already dirty.
 	 */
 	KASSERT((~m->valid & vm_page_bits(base, size) & m->dirty) == 0,
 	    ("vm_page_set_valid_range: page %p is dirty", m));
 
 	/*
 	 * Set valid bits inclusive of any overlap.
 	 */
 	pagebits = vm_page_bits(base, size);
 	if (vm_page_xbusied(m))
 		m->valid |= pagebits;
 	else
 		vm_page_bits_set(m, &m->valid, pagebits);
 }
 
 /*
  * Set the page dirty bits and free the invalid swap space if
  * present.  Returns the previous dirty bits.
  */
 vm_page_bits_t
 vm_page_set_dirty(vm_page_t m)
 {
 	vm_page_bits_t old;
 
 	VM_PAGE_OBJECT_BUSY_ASSERT(m);
 
 	if (vm_page_xbusied(m) && !pmap_page_is_write_mapped(m)) {
 		old = m->dirty;
 		m->dirty = VM_PAGE_BITS_ALL;
 	} else
 		old = vm_page_bits_swap(m, &m->dirty, VM_PAGE_BITS_ALL);
 	if (old == 0 && (m->a.flags & PGA_SWAP_SPACE) != 0)
 		vm_pager_page_unswapped(m);
 
 	return (old);
 }
 
 /*
  * Clear the given bits from the specified page's dirty field.
  */
 static __inline void
 vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits)
 {
 
 	vm_page_assert_busied(m);
 
 	/*
 	 * If the page is xbusied and not write mapped we are the
 	 * only thread that can modify dirty bits.  Otherwise, The pmap
 	 * layer can call vm_page_dirty() without holding a distinguished
 	 * lock.  The combination of page busy and atomic operations
 	 * suffice to guarantee consistency of the page dirty field.
 	 */
 	if (vm_page_xbusied(m) && !pmap_page_is_write_mapped(m))
 		m->dirty &= ~pagebits;
 	else
 		vm_page_bits_clear(m, &m->dirty, pagebits);
 }
 
 /*
  *	vm_page_set_validclean:
  *
  *	Sets portions of a page valid and clean.  The arguments are expected
  *	to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive
  *	of any partial chunks touched by the range.  The invalid portion of
  *	such chunks will be zero'd.
  *
  *	(base + size) must be less then or equal to PAGE_SIZE.
  */
 void
 vm_page_set_validclean(vm_page_t m, int base, int size)
 {
 	vm_page_bits_t oldvalid, pagebits;
 	int endoff, frag;
 
 	vm_page_assert_busied(m);
 	if (size == 0)	/* handle degenerate case */
 		return;
 
 	/*
 	 * If the base is not DEV_BSIZE aligned and the valid
 	 * bit is clear, we have to zero out a portion of the
 	 * first block.
 	 */
 	if ((frag = rounddown2(base, DEV_BSIZE)) != base &&
 	    (m->valid & ((vm_page_bits_t)1 << (base >> DEV_BSHIFT))) == 0)
 		pmap_zero_page_area(m, frag, base - frag);
 
 	/*
 	 * If the ending offset is not DEV_BSIZE aligned and the
 	 * valid bit is clear, we have to zero out a portion of
 	 * the last block.
 	 */
 	endoff = base + size;
 	if ((frag = rounddown2(endoff, DEV_BSIZE)) != endoff &&
 	    (m->valid & ((vm_page_bits_t)1 << (endoff >> DEV_BSHIFT))) == 0)
 		pmap_zero_page_area(m, endoff,
 		    DEV_BSIZE - (endoff & (DEV_BSIZE - 1)));
 
 	/*
 	 * Set valid, clear dirty bits.  If validating the entire
 	 * page we can safely clear the pmap modify bit.  We also
 	 * use this opportunity to clear the PGA_NOSYNC flag.  If a process
 	 * takes a write fault on a MAP_NOSYNC memory area the flag will
 	 * be set again.
 	 *
 	 * We set valid bits inclusive of any overlap, but we can only
 	 * clear dirty bits for DEV_BSIZE chunks that are fully within
 	 * the range.
 	 */
 	oldvalid = m->valid;
 	pagebits = vm_page_bits(base, size);
 	if (vm_page_xbusied(m))
 		m->valid |= pagebits;
 	else
 		vm_page_bits_set(m, &m->valid, pagebits);
 #if 0	/* NOT YET */
 	if ((frag = base & (DEV_BSIZE - 1)) != 0) {
 		frag = DEV_BSIZE - frag;
 		base += frag;
 		size -= frag;
 		if (size < 0)
 			size = 0;
 	}
 	pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1));
 #endif
 	if (base == 0 && size == PAGE_SIZE) {
 		/*
 		 * The page can only be modified within the pmap if it is
 		 * mapped, and it can only be mapped if it was previously
 		 * fully valid.
 		 */
 		if (oldvalid == VM_PAGE_BITS_ALL)
 			/*
 			 * Perform the pmap_clear_modify() first.  Otherwise,
 			 * a concurrent pmap operation, such as
 			 * pmap_protect(), could clear a modification in the
 			 * pmap and set the dirty field on the page before
 			 * pmap_clear_modify() had begun and after the dirty
 			 * field was cleared here.
 			 */
 			pmap_clear_modify(m);
 		m->dirty = 0;
 		vm_page_aflag_clear(m, PGA_NOSYNC);
 	} else if (oldvalid != VM_PAGE_BITS_ALL && vm_page_xbusied(m))
 		m->dirty &= ~pagebits;
 	else
 		vm_page_clear_dirty_mask(m, pagebits);
 }
 
 void
 vm_page_clear_dirty(vm_page_t m, int base, int size)
 {
 
 	vm_page_clear_dirty_mask(m, vm_page_bits(base, size));
 }
 
 /*
  *	vm_page_set_invalid:
  *
  *	Invalidates DEV_BSIZE'd chunks within a page.  Both the
  *	valid and dirty bits for the effected areas are cleared.
  */
 void
 vm_page_set_invalid(vm_page_t m, int base, int size)
 {
 	vm_page_bits_t bits;
 	vm_object_t object;
 
 	/*
 	 * The object lock is required so that pages can't be mapped
 	 * read-only while we're in the process of invalidating them.
 	 */
 	object = m->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	vm_page_assert_busied(m);
 
 	if (object->type == OBJT_VNODE && base == 0 && IDX_TO_OFF(m->pindex) +
 	    size >= object->un_pager.vnp.vnp_size)
 		bits = VM_PAGE_BITS_ALL;
 	else
 		bits = vm_page_bits(base, size);
 	if (object->ref_count != 0 && vm_page_all_valid(m) && bits != 0)
 		pmap_remove_all(m);
 	KASSERT((bits == 0 && vm_page_all_valid(m)) ||
 	    !pmap_page_is_mapped(m),
 	    ("vm_page_set_invalid: page %p is mapped", m));
 	if (vm_page_xbusied(m)) {
 		m->valid &= ~bits;
 		m->dirty &= ~bits;
 	} else {
 		vm_page_bits_clear(m, &m->valid, bits);
 		vm_page_bits_clear(m, &m->dirty, bits);
 	}
 }
 
 /*
  *	vm_page_invalid:
  *
  *	Invalidates the entire page.  The page must be busy, unmapped, and
  *	the enclosing object must be locked.  The object locks protects
  *	against concurrent read-only pmap enter which is done without
  *	busy.
  */
 void
 vm_page_invalid(vm_page_t m)
 {
 
 	vm_page_assert_busied(m);
 	VM_OBJECT_ASSERT_LOCKED(m->object);
 	MPASS(!pmap_page_is_mapped(m));
 
 	if (vm_page_xbusied(m))
 		m->valid = 0;
 	else
 		vm_page_bits_clear(m, &m->valid, VM_PAGE_BITS_ALL);
 }
 
 /*
  * vm_page_zero_invalid()
  *
  *	The kernel assumes that the invalid portions of a page contain
  *	garbage, but such pages can be mapped into memory by user code.
  *	When this occurs, we must zero out the non-valid portions of the
  *	page so user code sees what it expects.
  *
  *	Pages are most often semi-valid when the end of a file is mapped
  *	into memory and the file's size is not page aligned.
  */
 void
 vm_page_zero_invalid(vm_page_t m, boolean_t setvalid)
 {
 	int b;
 	int i;
 
 	/*
 	 * Scan the valid bits looking for invalid sections that
 	 * must be zeroed.  Invalid sub-DEV_BSIZE'd areas ( where the
 	 * valid bit may be set ) have already been zeroed by
 	 * vm_page_set_validclean().
 	 */
 	for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) {
 		if (i == (PAGE_SIZE / DEV_BSIZE) ||
 		    (m->valid & ((vm_page_bits_t)1 << i))) {
 			if (i > b) {
 				pmap_zero_page_area(m,
 				    b << DEV_BSHIFT, (i - b) << DEV_BSHIFT);
 			}
 			b = i + 1;
 		}
 	}
 
 	/*
 	 * setvalid is TRUE when we can safely set the zero'd areas
 	 * as being valid.  We can do this if there are no cache consistancy
 	 * issues.  e.g. it is ok to do with UFS, but not ok to do with NFS.
 	 */
 	if (setvalid)
 		vm_page_valid(m);
 }
 
 /*
  *	vm_page_is_valid:
  *
  *	Is (partial) page valid?  Note that the case where size == 0
  *	will return FALSE in the degenerate case where the page is
  *	entirely invalid, and TRUE otherwise.
  *
  *	Some callers envoke this routine without the busy lock held and
  *	handle races via higher level locks.  Typical callers should
  *	hold a busy lock to prevent invalidation.
  */
 int
 vm_page_is_valid(vm_page_t m, int base, int size)
 {
 	vm_page_bits_t bits;
 
 	bits = vm_page_bits(base, size);
 	return (m->valid != 0 && (m->valid & bits) == bits);
 }
 
 /*
  * Returns true if all of the specified predicates are true for the entire
  * (super)page and false otherwise.
  */
 bool
 vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m)
 {
 	vm_object_t object;
 	int i, npages;
 
 	object = m->object;
 	if (skip_m != NULL && skip_m->object != object)
 		return (false);
 	VM_OBJECT_ASSERT_LOCKED(object);
 	npages = atop(pagesizes[m->psind]);
 
 	/*
 	 * The physically contiguous pages that make up a superpage, i.e., a
 	 * page with a page size index ("psind") greater than zero, will
 	 * occupy adjacent entries in vm_page_array[].
 	 */
 	for (i = 0; i < npages; i++) {
 		/* Always test object consistency, including "skip_m". */
 		if (m[i].object != object)
 			return (false);
 		if (&m[i] == skip_m)
 			continue;
 		if ((flags & PS_NONE_BUSY) != 0 && vm_page_busied(&m[i]))
 			return (false);
 		if ((flags & PS_ALL_DIRTY) != 0) {
 			/*
 			 * Calling vm_page_test_dirty() or pmap_is_modified()
 			 * might stop this case from spuriously returning
 			 * "false".  However, that would require a write lock
 			 * on the object containing "m[i]".
 			 */
 			if (m[i].dirty != VM_PAGE_BITS_ALL)
 				return (false);
 		}
 		if ((flags & PS_ALL_VALID) != 0 &&
 		    m[i].valid != VM_PAGE_BITS_ALL)
 			return (false);
 	}
 	return (true);
 }
 
 /*
  * Set the page's dirty bits if the page is modified.
  */
 void
 vm_page_test_dirty(vm_page_t m)
 {
 
 	vm_page_assert_busied(m);
 	if (m->dirty != VM_PAGE_BITS_ALL && pmap_is_modified(m))
 		vm_page_dirty(m);
 }
 
 void
 vm_page_valid(vm_page_t m)
 {
 
 	vm_page_assert_busied(m);
 	if (vm_page_xbusied(m))
 		m->valid = VM_PAGE_BITS_ALL;
 	else
 		vm_page_bits_set(m, &m->valid, VM_PAGE_BITS_ALL);
 }
 
 void
 vm_page_lock_KBI(vm_page_t m, const char *file, int line)
 {
 
 	mtx_lock_flags_(vm_page_lockptr(m), 0, file, line);
 }
 
 void
 vm_page_unlock_KBI(vm_page_t m, const char *file, int line)
 {
 
 	mtx_unlock_flags_(vm_page_lockptr(m), 0, file, line);
 }
 
 int
 vm_page_trylock_KBI(vm_page_t m, const char *file, int line)
 {
 
 	return (mtx_trylock_flags_(vm_page_lockptr(m), 0, file, line));
 }
 
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 void
 vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line)
 {
 
 	vm_page_lock_assert_KBI(m, MA_OWNED, file, line);
 }
 
 void
 vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line)
 {
 
 	mtx_assert_(vm_page_lockptr(m), a, file, line);
 }
 #endif
 
 #ifdef INVARIANTS
 void
 vm_page_object_busy_assert(vm_page_t m)
 {
 
 	/*
 	 * Certain of the page's fields may only be modified by the
 	 * holder of a page or object busy.
 	 */
 	if (m->object != NULL && !vm_page_busied(m))
 		VM_OBJECT_ASSERT_BUSY(m->object);
 }
 
 void
 vm_page_assert_pga_writeable(vm_page_t m, uint16_t bits)
 {
 
 	if ((bits & PGA_WRITEABLE) == 0)
 		return;
 
 	/*
 	 * The PGA_WRITEABLE flag can only be set if the page is
 	 * managed, is exclusively busied or the object is locked.
 	 * Currently, this flag is only set by pmap_enter().
 	 */
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("PGA_WRITEABLE on unmanaged page"));
 	if (!vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_BUSY(m->object);
 }
 #endif
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <sys/kernel.h>
 
 #include <ddb/ddb.h>
 
 DB_SHOW_COMMAND(page, vm_page_print_page_info)
 {
 
 	db_printf("vm_cnt.v_free_count: %d\n", vm_free_count());
 	db_printf("vm_cnt.v_inactive_count: %d\n", vm_inactive_count());
 	db_printf("vm_cnt.v_active_count: %d\n", vm_active_count());
 	db_printf("vm_cnt.v_laundry_count: %d\n", vm_laundry_count());
 	db_printf("vm_cnt.v_wire_count: %d\n", vm_wire_count());
 	db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved);
 	db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min);
 	db_printf("vm_cnt.v_free_target: %d\n", vm_cnt.v_free_target);
 	db_printf("vm_cnt.v_inactive_target: %d\n", vm_cnt.v_inactive_target);
 }
 
 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
 {
 	int dom;
 
 	db_printf("pq_free %d\n", vm_free_count());
 	for (dom = 0; dom < vm_ndomains; dom++) {
 		db_printf(
     "dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d pq_unsw %d\n",
 		    dom,
 		    vm_dom[dom].vmd_page_count,
 		    vm_dom[dom].vmd_free_count,
 		    vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt,
 		    vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt,
 		    vm_dom[dom].vmd_pagequeues[PQ_LAUNDRY].pq_cnt,
 		    vm_dom[dom].vmd_pagequeues[PQ_UNSWAPPABLE].pq_cnt);
 	}
 }
 
 DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo)
 {
 	vm_page_t m;
 	boolean_t phys, virt;
 
 	if (!have_addr) {
 		db_printf("show pginfo addr\n");
 		return;
 	}
 
 	phys = strchr(modif, 'p') != NULL;
 	virt = strchr(modif, 'v') != NULL;
 	if (virt)
 		m = PHYS_TO_VM_PAGE(pmap_kextract(addr));
 	else if (phys)
 		m = PHYS_TO_VM_PAGE(addr);
 	else
 		m = (vm_page_t)addr;
 	db_printf(
     "page %p obj %p pidx 0x%jx phys 0x%jx q %d ref %u\n"
     "  af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n",
 	    m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr,
 	    m->a.queue, m->ref_count, m->a.flags, m->oflags,
 	    m->flags, m->a.act_count, m->busy_lock, m->valid, m->dirty);
 }
 #endif /* DDB */
Index: projects/clang1000-import/sys/vm/vm_page.h
===================================================================
--- projects/clang1000-import/sys/vm/vm_page.h	(revision 357389)
+++ projects/clang1000-import/sys/vm/vm_page.h	(revision 357390)
@@ -1,966 +1,965 @@
 /*-
  * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
  *
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_page.h	8.2 (Berkeley) 12/13/93
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
  * $FreeBSD$
  */
 
 /*
  *	Resident memory system definitions.
  */
 
 #ifndef	_VM_PAGE_
 #define	_VM_PAGE_
 
 #include <vm/pmap.h>
 
 /*
  *	Management of resident (logical) pages.
  *
  *	A small structure is kept for each resident
  *	page, indexed by page number.  Each structure
  *	is an element of several collections:
  *
  *		A radix tree used to quickly
  *		perform object/offset lookups
  *
  *		A list of all pages for a given object,
  *		so they can be quickly deactivated at
  *		time of deallocation.
  *
  *		An ordered list of pages due for pageout.
  *
  *	In addition, the structure contains the object
  *	and offset to which this page belongs (for pageout),
  *	and sundry status bits.
  *
  *	In general, operations on this structure's mutable fields are
  *	synchronized using either one of or a combination of locks.  If a
  *	field is annotated with two of these locks then holding either is
  *	sufficient for read access but both are required for write access.
  *	The physical address of a page is used to select its page lock from
  *	a pool.  The queue lock for a page depends on the value of its queue
  *	field and is described in detail below.
  *
  *	The following annotations are possible:
  *	(A) the field is atomic and may require additional synchronization.
  *	(B) the page busy lock.
  *	(C) the field is immutable.
  *	(F) the per-domain lock for the free queues
  *	(M) Machine dependent, defined by pmap layer.
  *	(O) the object that the page belongs to.
  *	(P) the page lock.
  *	(Q) the page's queue lock.
  *
  *	The busy lock is an embedded reader-writer lock that protects the
  *	page's contents and identity (i.e., its <object, pindex> tuple) as
  *	well as certain valid/dirty modifications.  To avoid bloating the
  *	the page structure, the busy lock lacks some of the features available
  *	the kernel's general-purpose synchronization primitives.  As a result,
  *	busy lock ordering rules are not verified, lock recursion is not
  *	detected, and an attempt to xbusy a busy page or sbusy an xbusy page
  *	results will trigger a panic rather than causing the thread to block.
  *	vm_page_sleep_if_busy() can be used to sleep until the page's busy
  *	state changes, after which the caller must re-lookup the page and
  *	re-evaluate its state.  vm_page_busy_acquire() will block until
  *	the lock is acquired.
  *
  *	The valid field is protected by the page busy lock (B) and object
  *	lock (O).  Transitions from invalid to valid are generally done
  *	via I/O or zero filling and do not require the object lock.
  *	These must be protected with the busy lock to prevent page-in or
  *	creation races.  Page invalidation generally happens as a result
  *	of truncate or msync.  When invalidated, pages must not be present
  *	in pmap and must hold the object lock to prevent concurrent
  *	speculative read-only mappings that do not require busy.  I/O
  *	routines may check for validity without a lock if they are prepared
  *	to handle invalidation races with higher level locks (vnode) or are
  *	unconcerned with races so long as they hold a reference to prevent
  *	recycling.  When a valid bit is set while holding a shared busy
  *	lock (A) atomic operations are used to protect against concurrent
  *	modification.
  *
  *	In contrast, the synchronization of accesses to the page's
  *	dirty field is a mix of machine dependent (M) and busy (B).  In
  *	the machine-independent layer, the page busy must be held to
  *	operate on the field.  However, the pmap layer is permitted to
  *	set all bits within the field without holding that lock.  If the
  *	underlying architecture does not support atomic read-modify-write
  *	operations on the field's type, then the machine-independent
  *	layer uses a 32-bit atomic on the aligned 32-bit word that
  *	contains the dirty field.  In the machine-independent layer,
  *	the implementation of read-modify-write operations on the
  *	field is encapsulated in vm_page_clear_dirty_mask().  An
  *	exclusive busy lock combined with pmap_remove_{write/all}() is the
  *	only way to ensure a page can not become dirty.  I/O generally
  *	removes the page from pmap to ensure exclusive access and atomic
  *	writes.
  *
  *	The ref_count field tracks references to the page.  References that
  *	prevent the page from being reclaimable are called wirings and are
  *	counted in the low bits of ref_count.  The containing object's
  *	reference, if one exists, is counted using the VPRC_OBJREF bit in the
  *	ref_count field.  Additionally, the VPRC_BLOCKED bit is used to
  *	atomically check for wirings and prevent new wirings via
  *	pmap_extract_and_hold().  When a page belongs to an object, it may be
  *	wired only when the object is locked, or the page is busy, or by
  *	pmap_extract_and_hold().  As a result, if the object is locked and the
  *	page is not busy (or is exclusively busied by the current thread), and
  *	the page is unmapped, its wire count will not increase.  The ref_count
  *	field is updated using atomic operations in most cases, except when it
  *	is known that no other references to the page exist, such as in the page
  *	allocator.  A page may be present in the page queues, or even actively
  *	scanned by the page daemon, without an explicitly counted referenced.
  *	The page daemon must therefore handle the possibility of a concurrent
  *	free of the page.
  *
  *	The queue state of a page consists of the queue and act_count fields of
  *	its atomically updated state, and the subset of atomic flags specified
  *	by PGA_QUEUE_STATE_MASK.  The queue field contains the page's page queue
  *	index, or PQ_NONE if it does not belong to a page queue.  To modify the
  *	queue field, the page queue lock corresponding to the old value must be
  *	held, unless that value is PQ_NONE, in which case the queue index must
  *	be updated using an atomic RMW operation.  There is one exception to
  *	this rule: the page daemon may transition the queue field from
  *	PQ_INACTIVE to PQ_NONE immediately prior to freeing the page during an
  *	inactive queue scan.  At that point the page is already dequeued and no
  *	other references to that vm_page structure can exist.  The PGA_ENQUEUED
  *	flag, when set, indicates that the page structure is physically inserted
  *	into the queue corresponding to the page's queue index, and may only be
  *	set or cleared with the corresponding page queue lock held.
  *
  *	To avoid contention on page queue locks, page queue operations (enqueue,
  *	dequeue, requeue) are batched using fixed-size per-CPU queues.  A
  *	deferred operation is requested by setting one of the flags in
  *	PGA_QUEUE_OP_MASK and inserting an entry into a batch queue.  When a
  *	queue is full, an attempt to insert a new entry will lock the page
  *	queues and trigger processing of the pending entries.  The
  *	type-stability of vm_page structures is crucial to this scheme since the
  *	processing of entries in a given batch queue may be deferred
  *	indefinitely.  In particular, a page may be freed with pending batch
  *	queue entries.  The page queue operation flags must be set using atomic
  *	RWM operations.
  */
 
 #if PAGE_SIZE == 4096
 #define VM_PAGE_BITS_ALL 0xffu
 typedef uint8_t vm_page_bits_t;
 #elif PAGE_SIZE == 8192
 #define VM_PAGE_BITS_ALL 0xffffu
 typedef uint16_t vm_page_bits_t;
 #elif PAGE_SIZE == 16384
 #define VM_PAGE_BITS_ALL 0xffffffffu
 typedef uint32_t vm_page_bits_t;
 #elif PAGE_SIZE == 32768
 #define VM_PAGE_BITS_ALL 0xfffffffffffffffflu
 typedef uint64_t vm_page_bits_t;
 #endif
 
 typedef union vm_page_astate {
 	struct {
 		uint16_t flags;
 		uint8_t	queue;
 		uint8_t act_count;
 	};
 	uint32_t _bits;
 } vm_page_astate_t;
 
 struct vm_page {
 	union {
 		TAILQ_ENTRY(vm_page) q; /* page queue or free list (Q) */
 		struct {
 			SLIST_ENTRY(vm_page) ss; /* private slists */
 		} s;
 		struct {
 			u_long p;
 			u_long v;
 		} memguard;
 		struct {
 			void *slab;
 			void *zone;
 		} uma;
 	} plinks;
 	TAILQ_ENTRY(vm_page) listq;	/* pages in same object (O) */
 	vm_object_t object;		/* which object am I in (O) */
 	vm_pindex_t pindex;		/* offset into object (O,P) */
 	vm_paddr_t phys_addr;		/* physical address of page (C) */
 	struct md_page md;		/* machine dependent stuff */
 	u_int ref_count;		/* page references (A) */
 	volatile u_int busy_lock;	/* busy owners lock */
 	union vm_page_astate a;		/* state accessed atomically */
 	uint8_t order;			/* index of the buddy queue (F) */
 	uint8_t pool;			/* vm_phys freepool index (F) */
 	uint8_t flags;			/* page PG_* flags (P) */
 	uint8_t oflags;			/* page VPO_* flags (O) */
 	int8_t psind;			/* pagesizes[] index (O) */
 	int8_t segind;			/* vm_phys segment index (C) */
 	/* NOTE that these must support one bit per DEV_BSIZE in a page */
 	/* so, on normal X86 kernels, they must be at least 8 bits wide */
 	vm_page_bits_t valid;		/* valid DEV_BSIZE chunk map (O,B) */
 	vm_page_bits_t dirty;		/* dirty DEV_BSIZE chunk map (M,B) */
 };
 
 /*
  * Special bits used in the ref_count field.
  *
  * ref_count is normally used to count wirings that prevent the page from being
  * reclaimed, but also supports several special types of references that do not
  * prevent reclamation.  Accesses to the ref_count field must be atomic unless
  * the page is unallocated.
  *
  * VPRC_OBJREF is the reference held by the containing object.  It can set or
  * cleared only when the corresponding object's write lock is held.
  *
  * VPRC_BLOCKED is used to atomically block wirings via pmap lookups while
  * attempting to tear down all mappings of a given page.  The page lock and
  * object write lock must both be held in order to set or clear this bit.
  */
 #define	VPRC_BLOCKED	0x40000000u	/* mappings are being removed */
 #define	VPRC_OBJREF	0x80000000u	/* object reference, cleared with (O) */
 #define	VPRC_WIRE_COUNT(c)	((c) & ~(VPRC_BLOCKED | VPRC_OBJREF))
 #define	VPRC_WIRE_COUNT_MAX	(~(VPRC_BLOCKED | VPRC_OBJREF))
 
 /*
  * Page flags stored in oflags:
  *
  * Access to these page flags is synchronized by the lock on the object
  * containing the page (O).
  *
  * Note: VPO_UNMANAGED (used by OBJT_DEVICE, OBJT_PHYS and OBJT_SG)
  * 	 indicates that the page is not under PV management but
  * 	 otherwise should be treated as a normal page.  Pages not
  * 	 under PV management cannot be paged out via the
  * 	 object/vm_page_t because there is no knowledge of their pte
  * 	 mappings, and such pages are also not on any PQ queue.
  *
  */
 #define	VPO_KMEM_EXEC	0x01		/* kmem mapping allows execution */
 #define	VPO_SWAPSLEEP	0x02		/* waiting for swap to finish */
 #define	VPO_UNMANAGED	0x04		/* no PV management for page */
 #define	VPO_SWAPINPROG	0x08		/* swap I/O in progress on page */
 
 /*
  * Busy page implementation details.
  * The algorithm is taken mostly by rwlock(9) and sx(9) locks implementation,
  * even if the support for owner identity is removed because of size
  * constraints.  Checks on lock recursion are then not possible, while the
  * lock assertions effectiveness is someway reduced.
  */
 #define	VPB_BIT_SHARED		0x01
 #define	VPB_BIT_EXCLUSIVE	0x02
 #define	VPB_BIT_WAITERS		0x04
 #define	VPB_BIT_FLAGMASK						\
 	(VPB_BIT_SHARED | VPB_BIT_EXCLUSIVE | VPB_BIT_WAITERS)
 
 #define	VPB_SHARERS_SHIFT	3
 #define	VPB_SHARERS(x)							\
 	(((x) & ~VPB_BIT_FLAGMASK) >> VPB_SHARERS_SHIFT)
 #define	VPB_SHARERS_WORD(x)	((x) << VPB_SHARERS_SHIFT | VPB_BIT_SHARED)
 #define	VPB_ONE_SHARER		(1 << VPB_SHARERS_SHIFT)
 
 #define	VPB_SINGLE_EXCLUSIVE	VPB_BIT_EXCLUSIVE
 #ifdef INVARIANTS
 #define	VPB_CURTHREAD_EXCLUSIVE						\
 	(VPB_BIT_EXCLUSIVE | ((u_int)(uintptr_t)curthread & ~VPB_BIT_FLAGMASK))
 #else
 #define	VPB_CURTHREAD_EXCLUSIVE	VPB_SINGLE_EXCLUSIVE
 #endif
 
 #define	VPB_UNBUSIED		VPB_SHARERS_WORD(0)
 
 #define	PQ_NONE		255
 #define	PQ_INACTIVE	0
 #define	PQ_ACTIVE	1
 #define	PQ_LAUNDRY	2
 #define	PQ_UNSWAPPABLE	3
 #define	PQ_COUNT	4
 
 #ifndef VM_PAGE_HAVE_PGLIST
 TAILQ_HEAD(pglist, vm_page);
 #define VM_PAGE_HAVE_PGLIST
 #endif
 SLIST_HEAD(spglist, vm_page);
 
 #ifdef _KERNEL
 extern vm_page_t bogus_page;
 #endif	/* _KERNEL */
 
 extern struct mtx_padalign pa_lock[];
 
 #if defined(__arm__)
 #define	PDRSHIFT	PDR_SHIFT
 #elif !defined(PDRSHIFT)
 #define PDRSHIFT	21
 #endif
 
 #define	pa_index(pa)	((pa) >> PDRSHIFT)
 #define	PA_LOCKPTR(pa)	((struct mtx *)(&pa_lock[pa_index(pa) % PA_LOCK_COUNT]))
 #define	PA_LOCKOBJPTR(pa)	((struct lock_object *)PA_LOCKPTR((pa)))
 #define	PA_LOCK(pa)	mtx_lock(PA_LOCKPTR(pa))
 #define	PA_TRYLOCK(pa)	mtx_trylock(PA_LOCKPTR(pa))
 #define	PA_UNLOCK(pa)	mtx_unlock(PA_LOCKPTR(pa))
 #define	PA_UNLOCK_COND(pa) 			\
 	do {		   			\
 		if ((pa) != 0) {		\
 			PA_UNLOCK((pa));	\
 			(pa) = 0;		\
 		}				\
 	} while (0)
 
 #define	PA_LOCK_ASSERT(pa, a)	mtx_assert(PA_LOCKPTR(pa), (a))
 
 #if defined(KLD_MODULE) && !defined(KLD_TIED)
 #define	vm_page_lock(m)		vm_page_lock_KBI((m), LOCK_FILE, LOCK_LINE)
 #define	vm_page_unlock(m)	vm_page_unlock_KBI((m), LOCK_FILE, LOCK_LINE)
 #define	vm_page_trylock(m)	vm_page_trylock_KBI((m), LOCK_FILE, LOCK_LINE)
 #else	/* !KLD_MODULE */
 #define	vm_page_lockptr(m)	(PA_LOCKPTR(VM_PAGE_TO_PHYS((m))))
 #define	vm_page_lock(m)		mtx_lock(vm_page_lockptr((m)))
 #define	vm_page_unlock(m)	mtx_unlock(vm_page_lockptr((m)))
 #define	vm_page_trylock(m)	mtx_trylock(vm_page_lockptr((m)))
 #endif
 #if defined(INVARIANTS)
 #define	vm_page_assert_locked(m)		\
     vm_page_assert_locked_KBI((m), __FILE__, __LINE__)
 #define	vm_page_lock_assert(m, a)		\
     vm_page_lock_assert_KBI((m), (a), __FILE__, __LINE__)
 #else
 #define	vm_page_assert_locked(m)
 #define	vm_page_lock_assert(m, a)
 #endif
 
 /*
  * The vm_page's aflags are updated using atomic operations.  To set or clear
  * these flags, the functions vm_page_aflag_set() and vm_page_aflag_clear()
  * must be used.  Neither these flags nor these functions are part of the KBI.
  *
  * PGA_REFERENCED may be cleared only if the page is locked.  It is set by
  * both the MI and MD VM layers.  However, kernel loadable modules should not
  * directly set this flag.  They should call vm_page_reference() instead.
  *
  * PGA_WRITEABLE is set exclusively on managed pages by pmap_enter().
  * When it does so, the object must be locked, or the page must be
  * exclusive busied.  The MI VM layer must never access this flag
  * directly.  Instead, it should call pmap_page_is_write_mapped().
  *
  * PGA_EXECUTABLE may be set by pmap routines, and indicates that a page has
  * at least one executable mapping.  It is not consumed by the MI VM layer.
  *
  * PGA_NOSYNC must be set and cleared with the page busy lock held.
  *
  * PGA_ENQUEUED is set and cleared when a page is inserted into or removed
  * from a page queue, respectively.  It determines whether the plinks.q field
  * of the page is valid.  To set or clear this flag, the queue lock for the
  * page must be held: the page queue lock corresponding to the page's "queue"
  * field if its value is not PQ_NONE, and the page lock otherwise.
  *
  * PGA_DEQUEUE is set when the page is scheduled to be dequeued from a page
  * queue, and cleared when the dequeue request is processed.  A page may
  * have PGA_DEQUEUE set and PGA_ENQUEUED cleared, for instance if a dequeue
  * is requested after the page is scheduled to be enqueued but before it is
  * actually inserted into the page queue.  For allocated pages, the page lock
  * must be held to set this flag, but it may be set by vm_page_free_prep()
  * without the page lock held.  The page queue lock must be held to clear the
  * PGA_DEQUEUE flag.
  *
  * PGA_REQUEUE is set when the page is scheduled to be enqueued or requeued
  * in its page queue.  The page lock must be held to set this flag, and the
  * queue lock for the page must be held to clear it.
  *
  * PGA_REQUEUE_HEAD is a special flag for enqueuing pages near the head of
  * the inactive queue, thus bypassing LRU.  The page lock must be held to
  * set this flag, and the queue lock for the page must be held to clear it.
  *
  * PGA_SWAP_FREE is used to defer freeing swap space to the pageout daemon
  * when the context that dirties the page does not have the object write lock
  * held.
  */
 #define	PGA_WRITEABLE	0x0001		/* page may be mapped writeable */
 #define	PGA_REFERENCED	0x0002		/* page has been referenced */
 #define	PGA_EXECUTABLE	0x0004		/* page may be mapped executable */
 #define	PGA_ENQUEUED	0x0008		/* page is enqueued in a page queue */
 #define	PGA_DEQUEUE	0x0010		/* page is due to be dequeued */
 #define	PGA_REQUEUE	0x0020		/* page is due to be requeued */
 #define	PGA_REQUEUE_HEAD 0x0040		/* page requeue should bypass LRU */
 #define	PGA_NOSYNC	0x0080		/* do not collect for syncer */
 #define	PGA_SWAP_FREE	0x0100		/* page with swap space was dirtied */
 #define	PGA_SWAP_SPACE	0x0200		/* page has allocated swap space */
 
 #define	PGA_QUEUE_OP_MASK	(PGA_DEQUEUE | PGA_REQUEUE | PGA_REQUEUE_HEAD)
 #define	PGA_QUEUE_STATE_MASK	(PGA_ENQUEUED | PGA_QUEUE_OP_MASK)
 
 /*
  * Page flags.  If changed at any other time than page allocation or
  * freeing, the modification must be protected by the vm_page lock.
  *
  * The PG_PCPU_CACHE flag is set at allocation time if the page was
  * allocated from a per-CPU cache.  It is cleared the next time that the
  * page is allocated from the physical memory allocator.
  */
 #define	PG_PCPU_CACHE	0x01		/* was allocated from per-CPU caches */
 #define	PG_FICTITIOUS	0x02		/* physical page doesn't exist */
 #define	PG_ZERO		0x04		/* page is zeroed */
 #define	PG_MARKER	0x08		/* special queue marker page */
 #define	PG_NODUMP	0x10		/* don't include this page in a dump */
 
 /*
  * Misc constants.
  */
 #define ACT_DECLINE		1
 #define ACT_ADVANCE		3
 #define ACT_INIT		5
 #define ACT_MAX			64
 
 #ifdef _KERNEL
 
 #include <sys/systm.h>
 
 #include <machine/atomic.h>
 
 /*
  * Each pageable resident page falls into one of five lists:
  *
  *	free
  *		Available for allocation now.
  *
  *	inactive
  *		Low activity, candidates for reclamation.
  *		This list is approximately LRU ordered.
  *
  *	laundry
  *		This is the list of pages that should be
  *		paged out next.
  *
  *	unswappable
  *		Dirty anonymous pages that cannot be paged
  *		out because no swap device is configured.
  *
  *	active
  *		Pages that are "active", i.e., they have been
  *		recently referenced.
  *
  */
 
 extern vm_page_t vm_page_array;		/* First resident page in table */
 extern long vm_page_array_size;		/* number of vm_page_t's */
 extern long first_page;			/* first physical page number */
 
 #define VM_PAGE_TO_PHYS(entry)	((entry)->phys_addr)
 
 /*
  * PHYS_TO_VM_PAGE() returns the vm_page_t object that represents a memory
  * page to which the given physical address belongs. The correct vm_page_t
  * object is returned for addresses that are not page-aligned.
  */
 vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa);
 
 /*
  * Page allocation parameters for vm_page for the functions
  * vm_page_alloc(), vm_page_grab(), vm_page_alloc_contig() and
  * vm_page_alloc_freelist().  Some functions support only a subset
  * of the flags, and ignore others, see the flags legend.
  *
  * The meaning of VM_ALLOC_ZERO differs slightly between the vm_page_alloc*()
  * and the vm_page_grab*() functions.  See these functions for details.
  *
  * Bits 0 - 1 define class.
  * Bits 2 - 15 dedicated for flags.
  * Legend:
  * (a) - vm_page_alloc() supports the flag.
  * (c) - vm_page_alloc_contig() supports the flag.
  * (f) - vm_page_alloc_freelist() supports the flag.
  * (g) - vm_page_grab() supports the flag.
  * (p) - vm_page_grab_pages() supports the flag.
  * Bits above 15 define the count of additional pages that the caller
  * intends to allocate.
  */
 #define VM_ALLOC_NORMAL		0
 #define VM_ALLOC_INTERRUPT	1
 #define VM_ALLOC_SYSTEM		2
 #define	VM_ALLOC_CLASS_MASK	3
 #define	VM_ALLOC_WAITOK		0x0008	/* (acf) Sleep and retry */
 #define	VM_ALLOC_WAITFAIL	0x0010	/* (acf) Sleep and return error */
 #define	VM_ALLOC_WIRED		0x0020	/* (acfgp) Allocate a wired page */
 #define	VM_ALLOC_ZERO		0x0040	/* (acfgp) Allocate a prezeroed page */
 #define	VM_ALLOC_NOOBJ		0x0100	/* (acg) No associated object */
 #define	VM_ALLOC_NOBUSY		0x0200	/* (acgp) Do not excl busy the page */
 #define	VM_ALLOC_NOCREAT	0x0400	/* (gp) Don't create a page */
 #define	VM_ALLOC_IGN_SBUSY	0x1000	/* (gp) Ignore shared busy flag */
 #define	VM_ALLOC_NODUMP		0x2000	/* (ag) don't include in dump */
 #define	VM_ALLOC_SBUSY		0x4000	/* (acgp) Shared busy the page */
 #define	VM_ALLOC_NOWAIT		0x8000	/* (acfgp) Do not sleep */
 #define	VM_ALLOC_COUNT_SHIFT	16
 #define	VM_ALLOC_COUNT(count)	((count) << VM_ALLOC_COUNT_SHIFT)
 
 #ifdef M_NOWAIT
 static inline int
 malloc2vm_flags(int malloc_flags)
 {
 	int pflags;
 
 	KASSERT((malloc_flags & M_USE_RESERVE) == 0 ||
 	    (malloc_flags & M_NOWAIT) != 0,
 	    ("M_USE_RESERVE requires M_NOWAIT"));
 	pflags = (malloc_flags & M_USE_RESERVE) != 0 ? VM_ALLOC_INTERRUPT :
 	    VM_ALLOC_SYSTEM;
 	if ((malloc_flags & M_ZERO) != 0)
 		pflags |= VM_ALLOC_ZERO;
 	if ((malloc_flags & M_NODUMP) != 0)
 		pflags |= VM_ALLOC_NODUMP;
 	if ((malloc_flags & M_NOWAIT))
 		pflags |= VM_ALLOC_NOWAIT;
 	if ((malloc_flags & M_WAITOK))
 		pflags |= VM_ALLOC_WAITOK;
 	return (pflags);
 }
 #endif
 
 /*
  * Predicates supported by vm_page_ps_test():
  *
  *	PS_ALL_DIRTY is true only if the entire (super)page is dirty.
  *	However, it can be spuriously false when the (super)page has become
  *	dirty in the pmap but that information has not been propagated to the
  *	machine-independent layer.
  */
 #define	PS_ALL_DIRTY	0x1
 #define	PS_ALL_VALID	0x2
 #define	PS_NONE_BUSY	0x4
 
 bool vm_page_busy_acquire(vm_page_t m, int allocflags);
 void vm_page_busy_downgrade(vm_page_t m);
 int vm_page_busy_tryupgrade(vm_page_t m);
 void vm_page_busy_sleep(vm_page_t m, const char *msg, bool nonshared);
 void vm_page_free(vm_page_t m);
 void vm_page_free_zero(vm_page_t m);
 
 void vm_page_activate (vm_page_t);
 void vm_page_advise(vm_page_t m, int advice);
 vm_page_t vm_page_alloc(vm_object_t, vm_pindex_t, int);
 vm_page_t vm_page_alloc_domain(vm_object_t, vm_pindex_t, int, int);
 vm_page_t vm_page_alloc_after(vm_object_t, vm_pindex_t, int, vm_page_t);
 vm_page_t vm_page_alloc_domain_after(vm_object_t, vm_pindex_t, int, int,
     vm_page_t);
 vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary, vm_memattr_t memattr);
 vm_page_t vm_page_alloc_contig_domain(vm_object_t object,
     vm_pindex_t pindex, int domain, int req, u_long npages, vm_paddr_t low,
     vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
     vm_memattr_t memattr);
 vm_page_t vm_page_alloc_freelist(int, int);
 vm_page_t vm_page_alloc_freelist_domain(int, int, int);
 void vm_page_bits_set(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t set);
 bool vm_page_blacklist_add(vm_paddr_t pa, bool verbose);
-void vm_page_change_lock(vm_page_t m, struct mtx **mtx);
 vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
 int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags,
     vm_page_t *ma, int count);
 int vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex,
     int allocflags);
 void vm_page_deactivate(vm_page_t);
 void vm_page_deactivate_noreuse(vm_page_t);
 void vm_page_dequeue(vm_page_t m);
 void vm_page_dequeue_deferred(vm_page_t m);
 vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);
 vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr);
 void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
 int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
 void vm_page_invalid(vm_page_t m);
 void vm_page_launder(vm_page_t m);
 vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t);
 vm_page_t vm_page_next(vm_page_t m);
 void vm_page_pqbatch_drain(void);
 void vm_page_pqbatch_submit(vm_page_t m, uint8_t queue);
 bool vm_page_pqstate_commit(vm_page_t m, vm_page_astate_t *old,
     vm_page_astate_t new);
 vm_page_t vm_page_prev(vm_page_t m);
 bool vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m);
 void vm_page_putfake(vm_page_t m);
 void vm_page_readahead_finish(vm_page_t m);
 bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low,
     vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
 bool vm_page_reclaim_contig_domain(int domain, int req, u_long npages,
     vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
 void vm_page_reference(vm_page_t m);
 #define	VPR_TRYFREE	0x01
 #define	VPR_NOREUSE	0x02
 void vm_page_release(vm_page_t m, int flags);
 void vm_page_release_locked(vm_page_t m, int flags);
 bool vm_page_remove(vm_page_t);
 bool vm_page_remove_xbusy(vm_page_t);
 int vm_page_rename(vm_page_t, vm_object_t, vm_pindex_t);
 void vm_page_replace(vm_page_t mnew, vm_object_t object,
     vm_pindex_t pindex, vm_page_t mold);
 int vm_page_sbusied(vm_page_t m);
 vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start,
     vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options);
 vm_page_bits_t vm_page_set_dirty(vm_page_t m);
 void vm_page_set_valid_range(vm_page_t m, int base, int size);
 int vm_page_sleep_if_busy(vm_page_t m, const char *msg);
 int vm_page_sleep_if_xbusy(vm_page_t m, const char *msg);
 vm_offset_t vm_page_startup(vm_offset_t vaddr);
 void vm_page_sunbusy(vm_page_t m);
 bool vm_page_try_remove_all(vm_page_t m);
 bool vm_page_try_remove_write(vm_page_t m);
 int vm_page_trysbusy(vm_page_t m);
 int vm_page_tryxbusy(vm_page_t m);
 void vm_page_unhold_pages(vm_page_t *ma, int count);
 void vm_page_unswappable(vm_page_t m);
 void vm_page_unwire(vm_page_t m, uint8_t queue);
 bool vm_page_unwire_noq(vm_page_t m);
 void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
 void vm_page_wire(vm_page_t);
 bool vm_page_wire_mapped(vm_page_t m);
 void vm_page_xunbusy_hard(vm_page_t m);
 void vm_page_xunbusy_hard_unchecked(vm_page_t m);
 void vm_page_set_validclean (vm_page_t, int, int);
 void vm_page_clear_dirty(vm_page_t, int, int);
 void vm_page_set_invalid(vm_page_t, int, int);
 void vm_page_valid(vm_page_t m);
 int vm_page_is_valid(vm_page_t, int, int);
 void vm_page_test_dirty(vm_page_t);
 vm_page_bits_t vm_page_bits(int base, int size);
 void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid);
 void vm_page_free_pages_toq(struct spglist *free, bool update_wire_count);
 
 void vm_page_dirty_KBI(vm_page_t m);
 void vm_page_lock_KBI(vm_page_t m, const char *file, int line);
 void vm_page_unlock_KBI(vm_page_t m, const char *file, int line);
 int vm_page_trylock_KBI(vm_page_t m, const char *file, int line);
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 void vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line);
 void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line);
 #endif
 
 #define	vm_page_assert_busied(m)					\
 	KASSERT(vm_page_busied(m),					\
 	    ("vm_page_assert_busied: page %p not busy @ %s:%d", \
 	    (m), __FILE__, __LINE__))
 
 #define	vm_page_assert_sbusied(m)					\
 	KASSERT(vm_page_sbusied(m),					\
 	    ("vm_page_assert_sbusied: page %p not shared busy @ %s:%d", \
 	    (m), __FILE__, __LINE__))
 
 #define	vm_page_assert_unbusied(m)					\
 	KASSERT(!vm_page_busied(m),					\
 	    ("vm_page_assert_unbusied: page %p busy @ %s:%d",		\
 	    (m), __FILE__, __LINE__))
 
 #define	vm_page_assert_xbusied_unchecked(m) do {			\
 	KASSERT(vm_page_xbusied(m),					\
 	    ("vm_page_assert_xbusied: page %p not exclusive busy @ %s:%d", \
 	    (m), __FILE__, __LINE__));					\
 } while (0)
 #define	vm_page_assert_xbusied(m) do {					\
 	vm_page_assert_xbusied_unchecked(m);				\
 	KASSERT((m->busy_lock & ~VPB_BIT_WAITERS) == 			\
 	    VPB_CURTHREAD_EXCLUSIVE,					\
 	    ("vm_page_assert_xbusied: page %p busy_lock %#x not owned"	\
             " by me @ %s:%d",						\
 	    (m), (m)->busy_lock, __FILE__, __LINE__));			\
 } while (0)
 
 #define	vm_page_busied(m)						\
 	((m)->busy_lock != VPB_UNBUSIED)
 
 #define	vm_page_sbusy(m) do {						\
 	if (!vm_page_trysbusy(m))					\
 		panic("%s: page %p failed shared busying", __func__,	\
 		    (m));						\
 } while (0)
 
 #define	vm_page_xbusied(m)						\
 	(((m)->busy_lock & VPB_SINGLE_EXCLUSIVE) != 0)
 
 #define	vm_page_xbusy(m) do {						\
 	if (!vm_page_tryxbusy(m))					\
 		panic("%s: page %p failed exclusive busying", __func__,	\
 		    (m));						\
 } while (0)
 
 /* Note: page m's lock must not be owned by the caller. */
 #define	vm_page_xunbusy(m) do {						\
 	if (!atomic_cmpset_rel_int(&(m)->busy_lock,			\
 	    VPB_CURTHREAD_EXCLUSIVE, VPB_UNBUSIED))			\
 		vm_page_xunbusy_hard(m);				\
 } while (0)
 #define	vm_page_xunbusy_unchecked(m) do {				\
 	if (!atomic_cmpset_rel_int(&(m)->busy_lock,			\
 	    VPB_CURTHREAD_EXCLUSIVE, VPB_UNBUSIED))			\
 		vm_page_xunbusy_hard_unchecked(m);			\
 } while (0)
 
 #ifdef INVARIANTS
 void vm_page_object_busy_assert(vm_page_t m);
 #define	VM_PAGE_OBJECT_BUSY_ASSERT(m)	vm_page_object_busy_assert(m)
 void vm_page_assert_pga_writeable(vm_page_t m, uint16_t bits);
 #define	VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits)				\
 	vm_page_assert_pga_writeable(m, bits)
 #else
 #define	VM_PAGE_OBJECT_BUSY_ASSERT(m)	(void)0
 #define	VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits)	(void)0
 #endif
 
 #if BYTE_ORDER == BIG_ENDIAN
 #define	VM_PAGE_AFLAG_SHIFT	16
 #else
 #define	VM_PAGE_AFLAG_SHIFT	0
 #endif
 
 /*
  *	Load a snapshot of a page's 32-bit atomic state.
  */
 static inline vm_page_astate_t
 vm_page_astate_load(vm_page_t m)
 {
 	vm_page_astate_t a;
 
 	a._bits = atomic_load_32(&m->a._bits);
 	return (a);
 }
 
 /*
  *	Atomically compare and set a page's atomic state.
  */
 static inline bool
 vm_page_astate_fcmpset(vm_page_t m, vm_page_astate_t *old, vm_page_astate_t new)
 {
 
 	KASSERT(new.queue == PQ_INACTIVE || (new.flags & PGA_REQUEUE_HEAD) == 0,
 	    ("%s: invalid head requeue request for page %p", __func__, m));
 	KASSERT((new.flags & PGA_ENQUEUED) == 0 || new.queue != PQ_NONE,
 	    ("%s: setting PGA_ENQUEUED with PQ_NONE in page %p", __func__, m));
 	KASSERT(new._bits != old->_bits,
 	    ("%s: bits are unchanged", __func__));
 
 	return (atomic_fcmpset_32(&m->a._bits, &old->_bits, new._bits) != 0);
 }
 
 /*
  *	Clear the given bits in the specified page.
  */
 static inline void
 vm_page_aflag_clear(vm_page_t m, uint16_t bits)
 {
 	uint32_t *addr, val;
 
 	/*
 	 * Access the whole 32-bit word containing the aflags field with an
 	 * atomic update.  Parallel non-atomic updates to the other fields
 	 * within this word are handled properly by the atomic update.
 	 */
 	addr = (void *)&m->a;
 	val = bits << VM_PAGE_AFLAG_SHIFT;
 	atomic_clear_32(addr, val);
 }
 
 /*
  *	Set the given bits in the specified page.
  */
 static inline void
 vm_page_aflag_set(vm_page_t m, uint16_t bits)
 {
 	uint32_t *addr, val;
 
 	VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits);
 
 	/*
 	 * Access the whole 32-bit word containing the aflags field with an
 	 * atomic update.  Parallel non-atomic updates to the other fields
 	 * within this word are handled properly by the atomic update.
 	 */
 	addr = (void *)&m->a;
 	val = bits << VM_PAGE_AFLAG_SHIFT;
 	atomic_set_32(addr, val);
 }
 
 /*
  *	vm_page_dirty:
  *
  *	Set all bits in the page's dirty field.
  *
  *	The object containing the specified page must be locked if the
  *	call is made from the machine-independent layer.
  *
  *	See vm_page_clear_dirty_mask().
  */
 static __inline void
 vm_page_dirty(vm_page_t m)
 {
 
 	/* Use vm_page_dirty_KBI() under INVARIANTS to save memory. */
 #if (defined(KLD_MODULE) && !defined(KLD_TIED)) || defined(INVARIANTS)
 	vm_page_dirty_KBI(m);
 #else
 	m->dirty = VM_PAGE_BITS_ALL;
 #endif
 }
 
 /*
  *	vm_page_undirty:
  *
  *	Set page to not be dirty.  Note: does not clear pmap modify bits
  */
 static __inline void
 vm_page_undirty(vm_page_t m)
 {
 
 	VM_PAGE_OBJECT_BUSY_ASSERT(m);
 	m->dirty = 0;
 }
 
 static inline uint8_t
 _vm_page_queue(vm_page_astate_t as)
 {
 
 	if ((as.flags & PGA_DEQUEUE) != 0)
 		return (PQ_NONE);
 	return (as.queue);
 }
 
 /*
  *	vm_page_queue:
  *
  *	Return the index of the queue containing m.
  */
 static inline uint8_t
 vm_page_queue(vm_page_t m)
 {
 
 	return (_vm_page_queue(vm_page_astate_load(m)));
 }
 
 static inline bool
 vm_page_active(vm_page_t m)
 {
 
 	return (vm_page_queue(m) == PQ_ACTIVE);
 }
 
 static inline bool
 vm_page_inactive(vm_page_t m)
 {
 
 	return (vm_page_queue(m) == PQ_INACTIVE);
 }
 
 static inline bool
 vm_page_in_laundry(vm_page_t m)
 {
 	uint8_t queue;
 
 	queue = vm_page_queue(m);
 	return (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE);
 }
 
 /*
  *	vm_page_drop:
  *
  *	Release a reference to a page and return the old reference count.
  */
 static inline u_int
 vm_page_drop(vm_page_t m, u_int val)
 {
 	u_int old;
 
 	/*
 	 * Synchronize with vm_page_free_prep(): ensure that all updates to the
 	 * page structure are visible before it is freed.
 	 */
 	atomic_thread_fence_rel();
 	old = atomic_fetchadd_int(&m->ref_count, -val);
 	KASSERT(old != VPRC_BLOCKED,
 	    ("vm_page_drop: page %p has an invalid refcount value", m));
 	return (old);
 }
 
 /*
  *	vm_page_wired:
  *
  *	Perform a racy check to determine whether a reference prevents the page
  *	from being reclaimable.  If the page's object is locked, and the page is
  *	unmapped and unbusied or exclusively busied by the current thread, no
  *	new wirings may be created.
  */
 static inline bool
 vm_page_wired(vm_page_t m)
 {
 
 	return (VPRC_WIRE_COUNT(m->ref_count) > 0);
 }
 
 static inline bool
 vm_page_all_valid(vm_page_t m)
 {
 
 	return (m->valid == VM_PAGE_BITS_ALL);
 }
 
 static inline bool
 vm_page_none_valid(vm_page_t m)
 {
 
 	return (m->valid == 0);
 }
 
 #endif				/* _KERNEL */
 #endif				/* !_VM_PAGE_ */
Index: projects/clang1000-import/sys/x86/cpufreq/hwpstate_intel.c
===================================================================
--- projects/clang1000-import/sys/x86/cpufreq/hwpstate_intel.c	(revision 357389)
+++ projects/clang1000-import/sys/x86/cpufreq/hwpstate_intel.c	(revision 357390)
@@ -1,519 +1,637 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2018 Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted providing that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/sbuf.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/smp.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/cputypes.h>
 #include <machine/specialreg.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 
 #include <dev/acpica/acpivar.h>
 
 #include <x86/cpufreq/hwpstate_intel_internal.h>
 
 #include "acpi_if.h"
 #include "cpufreq_if.h"
 
 extern uint64_t	tsc_freq;
 
 static int	intel_hwpstate_probe(device_t dev);
 static int	intel_hwpstate_attach(device_t dev);
 static int	intel_hwpstate_detach(device_t dev);
 static int	intel_hwpstate_suspend(device_t dev);
 static int	intel_hwpstate_resume(device_t dev);
 
 static int      intel_hwpstate_get(device_t dev, struct cf_setting *cf);
 static int      intel_hwpstate_type(device_t dev, int *type);
 
 static device_method_t intel_hwpstate_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	intel_hwpstate_identify),
 	DEVMETHOD(device_probe,		intel_hwpstate_probe),
 	DEVMETHOD(device_attach,	intel_hwpstate_attach),
 	DEVMETHOD(device_detach,	intel_hwpstate_detach),
 	DEVMETHOD(device_suspend,	intel_hwpstate_suspend),
 	DEVMETHOD(device_resume,	intel_hwpstate_resume),
 
 	/* cpufreq interface */
 	DEVMETHOD(cpufreq_drv_get,      intel_hwpstate_get),
 	DEVMETHOD(cpufreq_drv_type,     intel_hwpstate_type),
 
 	DEVMETHOD_END
 };
 
 struct hwp_softc {
 	device_t		dev;
 	bool 			hwp_notifications;
 	bool			hwp_activity_window;
 	bool			hwp_pref_ctrl;
 	bool			hwp_pkg_ctrl;
+	bool			hwp_pkg_ctrl_en;
+	bool			hwp_perf_bias;
+	bool			hwp_perf_bias_cached;
 
-	uint64_t		req; /* Cached copy of last request */
+	uint64_t		req; /* Cached copy of HWP_REQUEST */
+	uint64_t		hwp_energy_perf_bias;	/* Cache PERF_BIAS */
 
 	uint8_t			high;
 	uint8_t			guaranteed;
 	uint8_t			efficient;
 	uint8_t			low;
 };
 
 static devclass_t hwpstate_intel_devclass;
 static driver_t hwpstate_intel_driver = {
 	"hwpstate_intel",
 	intel_hwpstate_methods,
 	sizeof(struct hwp_softc),
 };
 
 DRIVER_MODULE(hwpstate_intel, cpu, hwpstate_intel_driver,
     hwpstate_intel_devclass, NULL, NULL);
 MODULE_VERSION(hwpstate_intel, 1);
 
+static bool hwpstate_pkg_ctrl_enable = true;
+SYSCTL_BOOL(_machdep, OID_AUTO, hwpstate_pkg_ctrl, CTLFLAG_RDTUN,
+    &hwpstate_pkg_ctrl_enable, 0,
+    "Set 1 (default) to enable package-level control, 0 to disable");
+
 static int
 intel_hwp_dump_sysctl_handler(SYSCTL_HANDLER_ARGS)
 {
 	device_t dev;
 	struct pcpu *pc;
 	struct sbuf *sb;
 	struct hwp_softc *sc;
 	uint64_t data, data2;
 	int ret;
 
 	sc = (struct hwp_softc *)arg1;
 	dev = sc->dev;
 
 	pc = cpu_get_pcpu(dev);
 	if (pc == NULL)
 		return (ENXIO);
 
 	sb = sbuf_new(NULL, NULL, 1024, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
 	sbuf_putc(sb, '\n');
 	thread_lock(curthread);
 	sched_bind(curthread, pc->pc_cpuid);
 	thread_unlock(curthread);
 
 	rdmsr_safe(MSR_IA32_PM_ENABLE, &data);
 	sbuf_printf(sb, "CPU%d: HWP %sabled\n", pc->pc_cpuid,
 	    ((data & 1) ? "En" : "Dis"));
 
 	if (data == 0) {
 		ret = 0;
 		goto out;
 	}
 
 	rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &data);
 	sbuf_printf(sb, "\tHighest Performance: %03ju\n", data & 0xff);
 	sbuf_printf(sb, "\tGuaranteed Performance: %03ju\n", (data >> 8) & 0xff);
 	sbuf_printf(sb, "\tEfficient Performance: %03ju\n", (data >> 16) & 0xff);
 	sbuf_printf(sb, "\tLowest Performance: %03ju\n", (data >> 24) & 0xff);
 
 	rdmsr_safe(MSR_IA32_HWP_REQUEST, &data);
 	data2 = 0;
 	if (sc->hwp_pkg_ctrl && (data & IA32_HWP_REQUEST_PACKAGE_CONTROL))
 		rdmsr_safe(MSR_IA32_HWP_REQUEST_PKG, &data2);
 
 	sbuf_putc(sb, '\n');
 
 #define pkg_print(x, name, offset) do {					\
 	if (!sc->hwp_pkg_ctrl || (data & x) != 0) 			\
 		sbuf_printf(sb, "\t%s: %03u\n", name,			\
 		    (unsigned)(data >> offset) & 0xff);			\
 	else								\
 		sbuf_printf(sb, "\t%s: %03u\n", name,			\
 		    (unsigned)(data2 >> offset) & 0xff);		\
 } while (0)
 
 	pkg_print(IA32_HWP_REQUEST_EPP_VALID,
 	    "Requested Efficiency Performance Preference", 24);
 	pkg_print(IA32_HWP_REQUEST_DESIRED_VALID,
 	    "Requested Desired Performance", 16);
 	pkg_print(IA32_HWP_REQUEST_MAXIMUM_VALID,
 	    "Requested Maximum Performance", 8);
 	pkg_print(IA32_HWP_REQUEST_MINIMUM_VALID,
 	    "Requested Minimum Performance", 0);
 #undef pkg_print
 
 	sbuf_putc(sb, '\n');
 
 out:
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
 
 	ret = sbuf_finish(sb);
 	if (ret == 0)
 		ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb));
 	sbuf_delete(sb);
 
 	return (ret);
 }
 
 static inline int
 percent_to_raw(int x)
 {
 
 	MPASS(x <= 100 && x >= 0);
 	return (0xff * x / 100);
 }
 
 /*
  * Given x * 10 in [0, 1000], round to the integer nearest x.
  *
  * This allows round-tripping nice human readable numbers through this
  * interface.  Otherwise, user-provided percentages such as 25, 50, 75 get
  * rounded down to 24, 49, and 74, which is a bit ugly.
  */
 static inline int
 round10(int xtimes10)
 {
 	return ((xtimes10 + 5) / 10);
 }
 
 static inline int
 raw_to_percent(int x)
 {
 	MPASS(x <= 0xff && x >= 0);
 	return (round10(x * 1000 / 0xff));
 }
 
+/* Range of MSR_IA32_ENERGY_PERF_BIAS is more limited: 0-0xf. */
+static inline int
+percent_to_raw_perf_bias(int x)
+{
+	/*
+	 * Round up so that raw values present as nice round human numbers and
+	 * also round-trip to the same raw value.
+	 */
+	MPASS(x <= 100 && x >= 0);
+	return (((0xf * x) + 50) / 100);
+}
+
+static inline int
+raw_to_percent_perf_bias(int x)
+{
+	/* Rounding to nice human numbers despite a step interval of 6.67%. */
+	MPASS(x <= 0xf && x >= 0);
+	return (((x * 20) / 0xf) * 5);
+}
+
 static int
 sysctl_epp_select(SYSCTL_HANDLER_ARGS)
 {
+	struct hwp_softc *sc;
 	device_t dev;
 	struct pcpu *pc;
-	uint64_t requested;
+	uint64_t epb;
 	uint32_t val;
 	int ret;
 
 	dev = oidp->oid_arg1;
+	sc = device_get_softc(dev);
+	if (!sc->hwp_pref_ctrl && !sc->hwp_perf_bias)
+		return (ENODEV);
+
 	pc = cpu_get_pcpu(dev);
 	if (pc == NULL)
 		return (ENXIO);
 
 	thread_lock(curthread);
 	sched_bind(curthread, pc->pc_cpuid);
 	thread_unlock(curthread);
 
-	rdmsr_safe(MSR_IA32_HWP_REQUEST, &requested);
-	val = (requested & IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE) >> 24;
-	val = raw_to_percent(val);
+	if (sc->hwp_pref_ctrl) {
+		val = (sc->req & IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE) >> 24;
+		val = raw_to_percent(val);
+	} else {
+		/*
+		 * If cpuid indicates EPP is not supported, the HWP controller
+		 * uses MSR_IA32_ENERGY_PERF_BIAS instead (Intel SDM §14.4.4).
+		 * This register is per-core (but not HT).
+		 */
+		if (!sc->hwp_perf_bias_cached) {
+			ret = rdmsr_safe(MSR_IA32_ENERGY_PERF_BIAS, &epb);
+			if (ret)
+				goto out;
+			sc->hwp_energy_perf_bias = epb;
+			sc->hwp_perf_bias_cached = true;
+		}
+		val = sc->hwp_energy_perf_bias &
+		    IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK;
+		val = raw_to_percent_perf_bias(val);
+	}
 
 	MPASS(val >= 0 && val <= 100);
 
 	ret = sysctl_handle_int(oidp, &val, 0, req);
 	if (ret || req->newptr == NULL)
 		goto out;
 
 	if (val > 100) {
 		ret = EINVAL;
 		goto out;
 	}
 
-	val = percent_to_raw(val);
+	if (sc->hwp_pref_ctrl) {
+		val = percent_to_raw(val);
 
-	requested &= ~IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE;
-	requested |= val << 24;
+		sc->req =
+		    ((sc->req & ~IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE)
+		    | (val << 24u));
 
-	wrmsr_safe(MSR_IA32_HWP_REQUEST, requested);
+		if (sc->hwp_pkg_ctrl_en)
+			ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req);
+		else
+			ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req);
+	} else {
+		val = percent_to_raw_perf_bias(val);
+		MPASS((val & ~IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK) == 0);
 
+		sc->hwp_energy_perf_bias =
+		    ((sc->hwp_energy_perf_bias &
+		    ~IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK) | val);
+		ret = wrmsr_safe(MSR_IA32_ENERGY_PERF_BIAS,
+		    sc->hwp_energy_perf_bias);
+	}
+
 out:
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
 
 	return (ret);
 }
 
 void
 intel_hwpstate_identify(driver_t *driver, device_t parent)
 {
-	uint32_t regs[4];
-
 	if (device_find_child(parent, "hwpstate_intel", -1) != NULL)
 		return;
 
 	if (cpu_vendor_id != CPU_VENDOR_INTEL)
 		return;
 
 	if (resource_disabled("hwpstate_intel", 0))
 		return;
 
 	/*
 	 * Intel SDM 14.4.1 (HWP Programming Interfaces):
-	 *   The CPUID instruction allows software to discover the presence of
-	 *   HWP support in an Intel processor. Specifically, execute CPUID
-	 *   instruction with EAX=06H as input will return 5 bit flags covering
-	 *   the following aspects in bits 7 through 11 of CPUID.06H:EAX.
-	 */
-
-	if (cpu_high < 6)
-		return;
-
-	/*
-	 * Intel SDM 14.4.1 (HWP Programming Interfaces):
 	 *   Availability of HWP baseline resource and capability,
 	 *   CPUID.06H:EAX[bit 7]: If this bit is set, HWP provides several new
 	 *   architectural MSRs: IA32_PM_ENABLE, IA32_HWP_CAPABILITIES,
 	 *   IA32_HWP_REQUEST, IA32_HWP_STATUS.
 	 */
-
-	do_cpuid(6, regs);
-	if ((regs[0] & CPUTPM1_HWP) == 0)
+	if ((cpu_power_eax & CPUTPM1_HWP) == 0)
 		return;
 
 	if (BUS_ADD_CHILD(parent, 10, "hwpstate_intel", -1) == NULL)
 		return;
 
 	if (bootverbose)
 		device_printf(parent, "hwpstate registered\n");
 }
 
 static int
 intel_hwpstate_probe(device_t dev)
 {
 
 	device_set_desc(dev, "Intel Speed Shift");
 	return (BUS_PROBE_NOWILDCARD);
 }
 
-/* FIXME: Need to support PKG variant */
 static int
 set_autonomous_hwp(struct hwp_softc *sc)
 {
 	struct pcpu *pc;
 	device_t dev;
 	uint64_t caps;
 	int ret;
 
 	dev = sc->dev;
 
 	pc = cpu_get_pcpu(dev);
 	if (pc == NULL)
 		return (ENXIO);
 
 	thread_lock(curthread);
 	sched_bind(curthread, pc->pc_cpuid);
 	thread_unlock(curthread);
 
 	/* XXX: Many MSRs aren't readable until feature is enabled */
 	ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1);
 	if (ret) {
+		/*
+		 * This is actually a package-level MSR, and only the first
+		 * write is not ignored.  So it is harmless to enable it across
+		 * all devices, and this allows us not to care especially in
+		 * which order cores (and packages) are probed.  This error
+		 * condition should not happen given we gate on the HWP CPUID
+		 * feature flag, if the Intel SDM is correct.
+		 */
 		device_printf(dev, "Failed to enable HWP for cpu%d (%d)\n",
 		    pc->pc_cpuid, ret);
 		goto out;
 	}
 
 	ret = rdmsr_safe(MSR_IA32_HWP_REQUEST, &sc->req);
-	if (ret)
-		return (ret);
+	if (ret) {
+		device_printf(dev,
+		    "Failed to read HWP request MSR for cpu%d (%d)\n",
+		    pc->pc_cpuid, ret);
+		goto out;
+	}
 
 	ret = rdmsr_safe(MSR_IA32_HWP_CAPABILITIES, &caps);
-	if (ret)
-		return (ret);
+	if (ret) {
+		device_printf(dev,
+		    "Failed to read HWP capabilities MSR for cpu%d (%d)\n",
+		    pc->pc_cpuid, ret);
+		goto out;
+	}
 
+	/*
+	 * High and low are static; "guaranteed" is dynamic; and efficient is
+	 * also dynamic.
+	 */
 	sc->high = IA32_HWP_CAPABILITIES_HIGHEST_PERFORMANCE(caps);
 	sc->guaranteed = IA32_HWP_CAPABILITIES_GUARANTEED_PERFORMANCE(caps);
 	sc->efficient = IA32_HWP_CAPABILITIES_EFFICIENT_PERFORMANCE(caps);
 	sc->low = IA32_HWP_CAPABILITIES_LOWEST_PERFORMANCE(caps);
 
 	/* hardware autonomous selection determines the performance target */
 	sc->req &= ~IA32_HWP_DESIRED_PERFORMANCE;
 
 	/* enable HW dynamic selection of window size */
 	sc->req &= ~IA32_HWP_ACTIVITY_WINDOW;
 
 	/* IA32_HWP_REQUEST.Minimum_Performance = IA32_HWP_CAPABILITIES.Lowest_Performance */
 	sc->req &= ~IA32_HWP_MINIMUM_PERFORMANCE;
 	sc->req |= sc->low;
 
 	/* IA32_HWP_REQUEST.Maximum_Performance = IA32_HWP_CAPABILITIES.Highest_Performance. */
 	sc->req &= ~IA32_HWP_REQUEST_MAXIMUM_PERFORMANCE;
 	sc->req |= sc->high << 8;
 
-	ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req);
+	/* If supported, request package-level control for this CPU. */
+	if (sc->hwp_pkg_ctrl_en)
+		ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req |
+		    IA32_HWP_REQUEST_PACKAGE_CONTROL);
+	else
+		ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req);
 	if (ret) {
 		device_printf(dev,
-		    "Failed to setup autonomous HWP for cpu%d (file a bug)\n",
-		    pc->pc_cpuid);
+		    "Failed to setup%s autonomous HWP for cpu%d\n",
+		    sc->hwp_pkg_ctrl_en ? " PKG" : "", pc->pc_cpuid);
+		goto out;
 	}
 
+	/* If supported, write the PKG-wide control MSR. */
+	if (sc->hwp_pkg_ctrl_en) {
+		/*
+		 * "The structure of the IA32_HWP_REQUEST_PKG MSR
+		 * (package-level) is identical to the IA32_HWP_REQUEST MSR
+		 * with the exception of the Package Control field, which does
+		 * not exist." (Intel SDM §14.4.4)
+		 */
+		ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req);
+		device_printf(dev,
+		    "Failed to set autonomous HWP for package\n");
+	}
+
 out:
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
 
 	return (ret);
 }
 
 static int
 intel_hwpstate_attach(device_t dev)
 {
 	struct hwp_softc *sc;
-	uint32_t regs[4];
 	int ret;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
-	do_cpuid(6, regs);
-	if (regs[0] & CPUTPM1_HWP_NOTIFICATION)
+	/* eax */
+	if (cpu_power_eax & CPUTPM1_HWP_NOTIFICATION)
 		sc->hwp_notifications = true;
-	if (regs[0] & CPUTPM1_HWP_ACTIVITY_WINDOW)
+	if (cpu_power_eax & CPUTPM1_HWP_ACTIVITY_WINDOW)
 		sc->hwp_activity_window = true;
-	if (regs[0] & CPUTPM1_HWP_PERF_PREF)
+	if (cpu_power_eax & CPUTPM1_HWP_PERF_PREF)
 		sc->hwp_pref_ctrl = true;
-	if (regs[0] & CPUTPM1_HWP_PKG)
+	if (cpu_power_eax & CPUTPM1_HWP_PKG)
 		sc->hwp_pkg_ctrl = true;
 
+	/* Allow administrators to disable pkg-level control. */
+	sc->hwp_pkg_ctrl_en = (sc->hwp_pkg_ctrl && hwpstate_pkg_ctrl_enable);
+
+	/* ecx */
+	if (cpu_power_ecx & CPUID_PERF_BIAS)
+		sc->hwp_perf_bias = true;
+
 	ret = set_autonomous_hwp(sc);
 	if (ret)
 		return (ret);
 
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_STATIC_CHILDREN(_debug), OID_AUTO, device_get_nameunit(dev),
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP,
 	    sc, 0, intel_hwp_dump_sysctl_handler, "A", "");
 
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
 	    "epp", CTLTYPE_INT | CTLFLAG_RWTUN, dev, 0,
 	    sysctl_epp_select, "I",
 	    "Efficiency/Performance Preference "
 	    "(range from 0, most performant, through 100, most efficient)");
 
 	return (cpufreq_register(dev));
 }
 
 static int
 intel_hwpstate_detach(device_t dev)
 {
 
 	return (cpufreq_unregister(dev));
 }
 
 static int
 intel_hwpstate_get(device_t dev, struct cf_setting *set)
 {
 	struct pcpu *pc;
 	uint64_t rate;
 	int ret;
 
 	if (set == NULL)
 		return (EINVAL);
 
 	pc = cpu_get_pcpu(dev);
 	if (pc == NULL)
 		return (ENXIO);
 
 	memset(set, CPUFREQ_VAL_UNKNOWN, sizeof(*set));
 	set->dev = dev;
 
 	ret = cpu_est_clockrate(pc->pc_cpuid, &rate);
 	if (ret == 0)
 		set->freq = rate / 1000000;
 
 	set->volts = CPUFREQ_VAL_UNKNOWN;
 	set->power = CPUFREQ_VAL_UNKNOWN;
 	set->lat = CPUFREQ_VAL_UNKNOWN;
 
 	return (0);
 }
 
 static int
 intel_hwpstate_type(device_t dev, int *type)
 {
 	if (type == NULL)
 		return (EINVAL);
 	*type = CPUFREQ_TYPE_ABSOLUTE | CPUFREQ_FLAG_INFO_ONLY | CPUFREQ_FLAG_UNCACHED;
 
 	return (0);
 }
 
 static int
 intel_hwpstate_suspend(device_t dev)
 {
 	return (0);
 }
 
 /*
  * Redo a subset of set_autonomous_hwp on resume; untested.  Without this,
  * testers observed that on resume MSR_IA32_HWP_REQUEST was bogus.
  */
 static int
 intel_hwpstate_resume(device_t dev)
 {
 	struct hwp_softc *sc;
 	struct pcpu *pc;
 	int ret;
 
 	sc = device_get_softc(dev);
 
 	pc = cpu_get_pcpu(dev);
 	if (pc == NULL)
 		return (ENXIO);
 
 	thread_lock(curthread);
 	sched_bind(curthread, pc->pc_cpuid);
 	thread_unlock(curthread);
 
 	ret = wrmsr_safe(MSR_IA32_PM_ENABLE, 1);
 	if (ret) {
 		device_printf(dev,
 		    "Failed to enable HWP for cpu%d after suspend (%d)\n",
 		    pc->pc_cpuid, ret);
 		goto out;
 	}
 
-	ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req);
+	if (sc->hwp_pkg_ctrl_en)
+		ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req |
+		    IA32_HWP_REQUEST_PACKAGE_CONTROL);
+	else
+		ret = wrmsr_safe(MSR_IA32_HWP_REQUEST, sc->req);
 	if (ret) {
 		device_printf(dev,
-		    "Failed to setup autonomous HWP for cpu%d after suspend\n",
-		    pc->pc_cpuid);
+		    "Failed to set%s autonomous HWP for cpu%d after suspend\n",
+		    sc->hwp_pkg_ctrl_en ? " PKG" : "", pc->pc_cpuid);
+		goto out;
+	}
+	if (sc->hwp_pkg_ctrl_en) {
+		ret = wrmsr_safe(MSR_IA32_HWP_REQUEST_PKG, sc->req);
+		if (ret) {
+			device_printf(dev,
+			    "Failed to set autonomous HWP for package after "
+			    "suspend\n");
+			goto out;
+		}
+	}
+	if (!sc->hwp_pref_ctrl && sc->hwp_perf_bias_cached) {
+		ret = wrmsr_safe(MSR_IA32_ENERGY_PERF_BIAS,
+		    sc->hwp_energy_perf_bias);
+		if (ret) {
+			device_printf(dev,
+			    "Failed to set energy perf bias for cpu%d after "
+			    "suspend\n", pc->pc_cpuid);
+		}
 	}
 
 out:
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
 
 	return (ret);
 }
Index: projects/clang1000-import/sys/x86/include/specialreg.h
===================================================================
--- projects/clang1000-import/sys/x86/include/specialreg.h	(revision 357389)
+++ projects/clang1000-import/sys/x86/include/specialreg.h	(revision 357390)
@@ -1,1166 +1,1170 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)specialreg.h	7.1 (Berkeley) 5/9/91
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_SPECIALREG_H_
 #define	_MACHINE_SPECIALREG_H_
 
 /*
  * Bits in 386 special registers:
  */
 #define	CR0_PE	0x00000001	/* Protected mode Enable */
 #define	CR0_MP	0x00000002	/* "Math" (fpu) Present */
 #define	CR0_EM	0x00000004	/* EMulate FPU instructions. (trap ESC only) */
 #define	CR0_TS	0x00000008	/* Task Switched (if MP, trap ESC and WAIT) */
 #define	CR0_PG	0x80000000	/* PaGing enable */
 
 /*
  * Bits in 486 special registers:
  */
 #define	CR0_NE	0x00000020	/* Numeric Error enable (EX16 vs IRQ13) */
 #define	CR0_WP	0x00010000	/* Write Protect (honor page protect in
 							   all modes) */
 #define	CR0_AM	0x00040000	/* Alignment Mask (set to enable AC flag) */
 #define	CR0_NW  0x20000000	/* Not Write-through */
 #define	CR0_CD  0x40000000	/* Cache Disable */
 
 #define	CR3_PCID_SAVE 0x8000000000000000
 #define	CR3_PCID_MASK 0xfff
 
 /*
  * Bits in PPro special registers
  */
 #define	CR4_VME	0x00000001	/* Virtual 8086 mode extensions */
 #define	CR4_PVI	0x00000002	/* Protected-mode virtual interrupts */
 #define	CR4_TSD	0x00000004	/* Time stamp disable */
 #define	CR4_DE	0x00000008	/* Debugging extensions */
 #define	CR4_PSE	0x00000010	/* Page size extensions */
 #define	CR4_PAE	0x00000020	/* Physical address extension */
 #define	CR4_MCE	0x00000040	/* Machine check enable */
 #define	CR4_PGE	0x00000080	/* Page global enable */
 #define	CR4_PCE	0x00000100	/* Performance monitoring counter enable */
 #define	CR4_FXSR 0x00000200	/* Fast FPU save/restore used by OS */
 #define	CR4_XMM	0x00000400	/* enable SIMD/MMX2 to use except 16 */
 #define	CR4_UMIP 0x00000800	/* User Mode Instruction Prevention */
 #define	CR4_VMXE 0x00002000	/* enable VMX operation (Intel-specific) */
 #define	CR4_FSGSBASE 0x00010000	/* Enable FS/GS BASE accessing instructions */
 #define	CR4_PCIDE 0x00020000	/* Enable Context ID */
 #define	CR4_XSAVE 0x00040000	/* XSETBV/XGETBV */
 #define	CR4_SMEP 0x00100000	/* Supervisor-Mode Execution Prevention */
 #define	CR4_SMAP 0x00200000	/* Supervisor-Mode Access Prevention */
 #define	CR4_PKE	0x00400000	/* Protection Keys Enable */
 
 /*
  * Bits in AMD64 special registers.  EFER is 64 bits wide.
  */
 #define	EFER_SCE 0x000000001	/* System Call Extensions (R/W) */
 #define	EFER_LME 0x000000100	/* Long mode enable (R/W) */
 #define	EFER_LMA 0x000000400	/* Long mode active (R) */
 #define	EFER_NXE 0x000000800	/* PTE No-Execute bit enable (R/W) */
 #define	EFER_SVM 0x000001000	/* SVM enable bit for AMD, reserved for Intel */
 #define	EFER_LMSLE 0x000002000	/* Long Mode Segment Limit Enable */
 #define	EFER_FFXSR 0x000004000	/* Fast FXSAVE/FSRSTOR */
 #define	EFER_TCE   0x000008000	/* Translation Cache Extension */
 #define	EFER_MCOMMIT	0x00020000	/* Enable MCOMMIT (AMD) */
 
 /*
  * Intel Extended Features registers
  */
 #define	XCR0	0		/* XFEATURE_ENABLED_MASK register */
 
 #define	XFEATURE_ENABLED_X87		0x00000001
 #define	XFEATURE_ENABLED_SSE		0x00000002
 #define	XFEATURE_ENABLED_YMM_HI128	0x00000004
 #define	XFEATURE_ENABLED_AVX		XFEATURE_ENABLED_YMM_HI128
 #define	XFEATURE_ENABLED_BNDREGS	0x00000008
 #define	XFEATURE_ENABLED_BNDCSR		0x00000010
 #define	XFEATURE_ENABLED_OPMASK		0x00000020
 #define	XFEATURE_ENABLED_ZMM_HI256	0x00000040
 #define	XFEATURE_ENABLED_HI16_ZMM	0x00000080
 
 #define	XFEATURE_AVX					\
     (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX)
 #define	XFEATURE_AVX512						\
     (XFEATURE_ENABLED_OPMASK | XFEATURE_ENABLED_ZMM_HI256 |	\
     XFEATURE_ENABLED_HI16_ZMM)
 #define	XFEATURE_MPX					\
     (XFEATURE_ENABLED_BNDREGS | XFEATURE_ENABLED_BNDCSR)
 
 /*
  * CPUID instruction features register
  */
 #define	CPUID_FPU	0x00000001
 #define	CPUID_VME	0x00000002
 #define	CPUID_DE	0x00000004
 #define	CPUID_PSE	0x00000008
 #define	CPUID_TSC	0x00000010
 #define	CPUID_MSR	0x00000020
 #define	CPUID_PAE	0x00000040
 #define	CPUID_MCE	0x00000080
 #define	CPUID_CX8	0x00000100
 #define	CPUID_APIC	0x00000200
 #define	CPUID_B10	0x00000400
 #define	CPUID_SEP	0x00000800
 #define	CPUID_MTRR	0x00001000
 #define	CPUID_PGE	0x00002000
 #define	CPUID_MCA	0x00004000
 #define	CPUID_CMOV	0x00008000
 #define	CPUID_PAT	0x00010000
 #define	CPUID_PSE36	0x00020000
 #define	CPUID_PSN	0x00040000
 #define	CPUID_CLFSH	0x00080000
 #define	CPUID_B20	0x00100000
 #define	CPUID_DS	0x00200000
 #define	CPUID_ACPI	0x00400000
 #define	CPUID_MMX	0x00800000
 #define	CPUID_FXSR	0x01000000
 #define	CPUID_SSE	0x02000000
 #define	CPUID_XMM	0x02000000
 #define	CPUID_SSE2	0x04000000
 #define	CPUID_SS	0x08000000
 #define	CPUID_HTT	0x10000000
 #define	CPUID_TM	0x20000000
 #define	CPUID_IA64	0x40000000
 #define	CPUID_PBE	0x80000000
 
 #define	CPUID2_SSE3	0x00000001
 #define	CPUID2_PCLMULQDQ 0x00000002
 #define	CPUID2_DTES64	0x00000004
 #define	CPUID2_MON	0x00000008
 #define	CPUID2_DS_CPL	0x00000010
 #define	CPUID2_VMX	0x00000020
 #define	CPUID2_SMX	0x00000040
 #define	CPUID2_EST	0x00000080
 #define	CPUID2_TM2	0x00000100
 #define	CPUID2_SSSE3	0x00000200
 #define	CPUID2_CNXTID	0x00000400
 #define	CPUID2_SDBG	0x00000800
 #define	CPUID2_FMA	0x00001000
 #define	CPUID2_CX16	0x00002000
 #define	CPUID2_XTPR	0x00004000
 #define	CPUID2_PDCM	0x00008000
 #define	CPUID2_PCID	0x00020000
 #define	CPUID2_DCA	0x00040000
 #define	CPUID2_SSE41	0x00080000
 #define	CPUID2_SSE42	0x00100000
 #define	CPUID2_X2APIC	0x00200000
 #define	CPUID2_MOVBE	0x00400000
 #define	CPUID2_POPCNT	0x00800000
 #define	CPUID2_TSCDLT	0x01000000
 #define	CPUID2_AESNI	0x02000000
 #define	CPUID2_XSAVE	0x04000000
 #define	CPUID2_OSXSAVE	0x08000000
 #define	CPUID2_AVX	0x10000000
 #define	CPUID2_F16C	0x20000000
 #define	CPUID2_RDRAND	0x40000000
 #define	CPUID2_HV	0x80000000
 
 /* Intel Processor Trace CPUID. */
 
 /* Leaf 0 ebx. */
 #define	CPUPT_CR3		(1 << 0)	/* CR3 Filtering Support */
 #define	CPUPT_PSB		(1 << 1)	/* Configurable PSB and Cycle-Accurate Mode Supported */
 #define	CPUPT_IPF		(1 << 2)	/* IP Filtering and TraceStop supported */
 #define	CPUPT_MTC		(1 << 3)	/* MTC Supported */
 #define	CPUPT_PRW		(1 << 4)	/* PTWRITE Supported */
 #define	CPUPT_PWR		(1 << 5)	/* Power Event Trace Supported */
 
 /* Leaf 0 ecx. */
 #define	CPUPT_TOPA		(1 << 0)	/* ToPA Output Supported */
 #define	CPUPT_TOPA_MULTI	(1 << 1)	/* ToPA Tables Allow Multiple Output Entries */
 #define	CPUPT_SINGLE		(1 << 2)	/* Single-Range Output Supported */
 #define	CPUPT_TT_OUT		(1 << 3)	/* Output to Trace Transport Subsystem Supported */
 #define	CPUPT_LINEAR_IP		(1 << 31)	/* IP Payloads are Linear IP, otherwise IP is effective */
 
 /* Leaf 1 eax. */
 #define	CPUPT_NADDR_S		0	/* Number of Address Ranges */
 #define	CPUPT_NADDR_M		(0x7 << CPUPT_NADDR_S)
 #define	CPUPT_MTC_BITMAP_S	16	/* Bitmap of supported MTC Period Encodings */
 #define	CPUPT_MTC_BITMAP_M	(0xffff << CPUPT_MTC_BITMAP_S)
 
 /* Leaf 1 ebx. */
 #define	CPUPT_CT_BITMAP_S	0	/* Bitmap of supported Cycle Threshold values */
 #define	CPUPT_CT_BITMAP_M	(0xffff << CPUPT_CT_BITMAP_S)
 #define	CPUPT_PFE_BITMAP_S	16	/* Bitmap of supported Configurable PSB Frequency encoding */
 #define	CPUPT_PFE_BITMAP_M	(0xffff << CPUPT_PFE_BITMAP_S)
 
 /*
  * Important bits in the AMD extended cpuid flags
  */
 #define	AMDID_SYSCALL	0x00000800
 #define	AMDID_MP	0x00080000
 #define	AMDID_NX	0x00100000
 #define	AMDID_EXT_MMX	0x00400000
 #define	AMDID_FFXSR	0x02000000
 #define	AMDID_PAGE1GB	0x04000000
 #define	AMDID_RDTSCP	0x08000000
 #define	AMDID_LM	0x20000000
 #define	AMDID_EXT_3DNOW	0x40000000
 #define	AMDID_3DNOW	0x80000000
 
 #define	AMDID2_LAHF	0x00000001
 #define	AMDID2_CMP	0x00000002
 #define	AMDID2_SVM	0x00000004
 #define	AMDID2_EXT_APIC	0x00000008
 #define	AMDID2_CR8	0x00000010
 #define	AMDID2_ABM	0x00000020
 #define	AMDID2_SSE4A	0x00000040
 #define	AMDID2_MAS	0x00000080
 #define	AMDID2_PREFETCH	0x00000100
 #define	AMDID2_OSVW	0x00000200
 #define	AMDID2_IBS	0x00000400
 #define	AMDID2_XOP	0x00000800
 #define	AMDID2_SKINIT	0x00001000
 #define	AMDID2_WDT	0x00002000
 #define	AMDID2_LWP	0x00008000
 #define	AMDID2_FMA4	0x00010000
 #define	AMDID2_TCE	0x00020000
 #define	AMDID2_NODE_ID	0x00080000
 #define	AMDID2_TBM	0x00200000
 #define	AMDID2_TOPOLOGY	0x00400000
 #define	AMDID2_PCXC	0x00800000
 #define	AMDID2_PNXC	0x01000000
 #define	AMDID2_DBE	0x04000000
 #define	AMDID2_PTSC	0x08000000
 #define	AMDID2_PTSCEL2I	0x10000000
 #define	AMDID2_MWAITX	0x20000000
 
 /*
  * CPUID instruction 1 eax info
  */
 #define	CPUID_STEPPING		0x0000000f
 #define	CPUID_MODEL		0x000000f0
 #define	CPUID_FAMILY		0x00000f00
 #define	CPUID_EXT_MODEL		0x000f0000
 #define	CPUID_EXT_FAMILY	0x0ff00000
 #ifdef __i386__
 #define	CPUID_TO_MODEL(id) \
     ((((id) & CPUID_MODEL) >> 4) | \
     ((((id) & CPUID_FAMILY) >= 0x600) ? \
     (((id) & CPUID_EXT_MODEL) >> 12) : 0))
 #define	CPUID_TO_FAMILY(id) \
     ((((id) & CPUID_FAMILY) >> 8) + \
     ((((id) & CPUID_FAMILY) == 0xf00) ? \
     (((id) & CPUID_EXT_FAMILY) >> 20) : 0))
 #else
 #define	CPUID_TO_MODEL(id) \
     ((((id) & CPUID_MODEL) >> 4) | \
     (((id) & CPUID_EXT_MODEL) >> 12))
 #define	CPUID_TO_FAMILY(id) \
     ((((id) & CPUID_FAMILY) >> 8) + \
     (((id) & CPUID_EXT_FAMILY) >> 20))
 #endif
 
 /*
  * CPUID instruction 1 ebx info
  */
 #define	CPUID_BRAND_INDEX	0x000000ff
 #define	CPUID_CLFUSH_SIZE	0x0000ff00
 #define	CPUID_HTT_CORES		0x00ff0000
 #define	CPUID_LOCAL_APIC_ID	0xff000000
 
 /*
  * CPUID instruction 5 info
  */
 #define	CPUID5_MON_MIN_SIZE	0x0000ffff	/* eax */
 #define	CPUID5_MON_MAX_SIZE	0x0000ffff	/* ebx */
 #define	CPUID5_MON_MWAIT_EXT	0x00000001	/* ecx */
 #define	CPUID5_MWAIT_INTRBREAK	0x00000002	/* ecx */
 
 /*
  * MWAIT cpu power states.  Lower 4 bits are sub-states.
  */
 #define	MWAIT_C0	0xf0
 #define	MWAIT_C1	0x00
 #define	MWAIT_C2	0x10
 #define	MWAIT_C3	0x20
 #define	MWAIT_C4	0x30
 
 /*
  * MWAIT extensions.
  */
 /* Interrupt breaks MWAIT even when masked. */
 #define	MWAIT_INTRBREAK		0x00000001
 
 /*
  * CPUID leaf 6: Thermal and Power management.
  */
 /* Eax. */
 #define	CPUTPM1_SENSOR			0x00000001
 #define	CPUTPM1_TURBO			0x00000002
 #define	CPUTPM1_ARAT			0x00000004
 #define	CPUTPM1_PLN			0x00000010
 #define	CPUTPM1_ECMD			0x00000020
 #define	CPUTPM1_PTM			0x00000040
 #define	CPUTPM1_HWP			0x00000080
 #define	CPUTPM1_HWP_NOTIFICATION	0x00000100
 #define	CPUTPM1_HWP_ACTIVITY_WINDOW	0x00000200
 #define	CPUTPM1_HWP_PERF_PREF		0x00000400
 #define	CPUTPM1_HWP_PKG			0x00000800
 #define	CPUTPM1_HDC			0x00002000
 #define	CPUTPM1_TURBO30			0x00004000
 #define	CPUTPM1_HWP_CAPABILITIES	0x00008000
 #define	CPUTPM1_HWP_PECI_OVR		0x00010000
 #define	CPUTPM1_HWP_FLEXIBLE		0x00020000
 #define	CPUTPM1_HWP_FAST_MSR		0x00040000
 #define	CPUTPM1_HWP_IGN_IDLE		0x00100000
 
 /* Ebx. */
 #define	CPUTPM_B_NSENSINTTHRESH		0x0000000f
 
 /* Ecx. */
 #define	CPUID_PERF_STAT			0x00000001
 #define	CPUID_PERF_BIAS			0x00000008
 
 /* 
  * CPUID instruction 0xb ebx info.
  */
 #define	CPUID_TYPE_INVAL	0
 #define	CPUID_TYPE_SMT		1
 #define	CPUID_TYPE_CORE		2
 
 /*
  * CPUID instruction 0xd Processor Extended State Enumeration Sub-leaf 1
  */
 #define	CPUID_EXTSTATE_XSAVEOPT	0x00000001
 #define	CPUID_EXTSTATE_XSAVEC	0x00000002
 #define	CPUID_EXTSTATE_XINUSE	0x00000004
 #define	CPUID_EXTSTATE_XSAVES	0x00000008
 
 /*
  * AMD extended function 8000_0007h ebx info
  */
 #define	AMDRAS_MCA_OF_RECOV	0x00000001
 #define	AMDRAS_SUCCOR		0x00000002
 #define	AMDRAS_HW_ASSERT	0x00000004
 #define	AMDRAS_SCALABLE_MCA	0x00000008
 #define	AMDRAS_PFEH_SUPPORT	0x00000010
 
 /*
  * AMD extended function 8000_0007h edx info
  */
 #define	AMDPM_TS		0x00000001
 #define	AMDPM_FID		0x00000002
 #define	AMDPM_VID		0x00000004
 #define	AMDPM_TTP		0x00000008
 #define	AMDPM_TM		0x00000010
 #define	AMDPM_STC		0x00000020
 #define	AMDPM_100MHZ_STEPS	0x00000040
 #define	AMDPM_HW_PSTATE		0x00000080
 #define	AMDPM_TSC_INVARIANT	0x00000100
 #define	AMDPM_CPB		0x00000200
 
 /*
  * AMD extended function 8000_0008h ebx info (amd_extended_feature_extensions)
  */
 #define	AMDFEID_CLZERO		0x00000001
 #define	AMDFEID_IRPERF		0x00000002
 #define	AMDFEID_XSAVEERPTR	0x00000004
 #define	AMDFEID_RDPRU		0x00000010
 #define	AMDFEID_MCOMMIT		0x00000100
 #define	AMDFEID_WBNOINVD	0x00000200
 #define	AMDFEID_IBPB		0x00001000
 #define	AMDFEID_IBRS		0x00004000
 #define	AMDFEID_STIBP		0x00008000
 /* The below are only defined if the corresponding base feature above exists. */
 #define	AMDFEID_IBRS_ALWAYSON	0x00010000
 #define	AMDFEID_STIBP_ALWAYSON	0x00020000
 #define	AMDFEID_PREFER_IBRS	0x00040000
 #define	AMDFEID_SSBD		0x01000000
 /* SSBD via MSRC001_011F instead of MSR 0x48: */
 #define	AMDFEID_VIRT_SSBD	0x02000000
 #define	AMDFEID_SSB_NO		0x04000000
 
 /*
  * AMD extended function 8000_0008h ecx info
  */
 #define	AMDID_CMP_CORES		0x000000ff
 #define	AMDID_COREID_SIZE	0x0000f000
 #define	AMDID_COREID_SIZE_SHIFT	12
 
 /*
  * CPUID instruction 7 Structured Extended Features, leaf 0 ebx info
  */
 #define	CPUID_STDEXT_FSGSBASE	0x00000001
 #define	CPUID_STDEXT_TSC_ADJUST	0x00000002
 #define	CPUID_STDEXT_SGX	0x00000004
 #define	CPUID_STDEXT_BMI1	0x00000008
 #define	CPUID_STDEXT_HLE	0x00000010
 #define	CPUID_STDEXT_AVX2	0x00000020
 #define	CPUID_STDEXT_FDP_EXC	0x00000040
 #define	CPUID_STDEXT_SMEP	0x00000080
 #define	CPUID_STDEXT_BMI2	0x00000100
 #define	CPUID_STDEXT_ERMS	0x00000200
 #define	CPUID_STDEXT_INVPCID	0x00000400
 #define	CPUID_STDEXT_RTM	0x00000800
 #define	CPUID_STDEXT_PQM	0x00001000
 #define	CPUID_STDEXT_NFPUSG	0x00002000
 #define	CPUID_STDEXT_MPX	0x00004000
 #define	CPUID_STDEXT_PQE	0x00008000
 #define	CPUID_STDEXT_AVX512F	0x00010000
 #define	CPUID_STDEXT_AVX512DQ	0x00020000
 #define	CPUID_STDEXT_RDSEED	0x00040000
 #define	CPUID_STDEXT_ADX	0x00080000
 #define	CPUID_STDEXT_SMAP	0x00100000
 #define	CPUID_STDEXT_AVX512IFMA	0x00200000
 /* Formerly PCOMMIT */
 #define	CPUID_STDEXT_CLFLUSHOPT	0x00800000
 #define	CPUID_STDEXT_CLWB	0x01000000
 #define	CPUID_STDEXT_PROCTRACE	0x02000000
 #define	CPUID_STDEXT_AVX512PF	0x04000000
 #define	CPUID_STDEXT_AVX512ER	0x08000000
 #define	CPUID_STDEXT_AVX512CD	0x10000000
 #define	CPUID_STDEXT_SHA	0x20000000
 #define	CPUID_STDEXT_AVX512BW	0x40000000
 #define	CPUID_STDEXT_AVX512VL	0x80000000
 
 /*
  * CPUID instruction 7 Structured Extended Features, leaf 0 ecx info
  */
 #define	CPUID_STDEXT2_PREFETCHWT1 	0x00000001
 #define	CPUID_STDEXT2_AVX512VBMI	0x00000002
 #define	CPUID_STDEXT2_UMIP		0x00000004
 #define	CPUID_STDEXT2_PKU		0x00000008
 #define	CPUID_STDEXT2_OSPKE		0x00000010
 #define	CPUID_STDEXT2_WAITPKG		0x00000020
 #define	CPUID_STDEXT2_AVX512VBMI2	0x00000040
 #define	CPUID_STDEXT2_GFNI		0x00000100
 #define	CPUID_STDEXT2_VAES		0x00000200
 #define	CPUID_STDEXT2_VPCLMULQDQ	0x00000400
 #define	CPUID_STDEXT2_AVX512VNNI	0x00000800
 #define	CPUID_STDEXT2_AVX512BITALG	0x00001000
 #define	CPUID_STDEXT2_AVX512VPOPCNTDQ	0x00004000
 #define	CPUID_STDEXT2_RDPID		0x00400000
 #define	CPUID_STDEXT2_CLDEMOTE		0x02000000
 #define	CPUID_STDEXT2_MOVDIRI		0x08000000
 #define	CPUID_STDEXT2_MOVDIR64B		0x10000000
 #define	CPUID_STDEXT2_ENQCMD		0x20000000
 #define	CPUID_STDEXT2_SGXLC		0x40000000
 
 /*
  * CPUID instruction 7 Structured Extended Features, leaf 0 edx info
  */
 #define	CPUID_STDEXT3_AVX5124VNNIW	0x00000004
 #define	CPUID_STDEXT3_AVX5124FMAPS	0x00000008
 #define	CPUID_STDEXT3_AVX512VP2INTERSECT	0x00000100
 #define	CPUID_STDEXT3_MD_CLEAR		0x00000400
 #define	CPUID_STDEXT3_TSXFA		0x00002000
 #define	CPUID_STDEXT3_PCONFIG		0x00040000
 #define	CPUID_STDEXT3_IBPB		0x04000000
 #define	CPUID_STDEXT3_STIBP		0x08000000
 #define	CPUID_STDEXT3_L1D_FLUSH		0x10000000
 #define	CPUID_STDEXT3_ARCH_CAP		0x20000000
 #define	CPUID_STDEXT3_CORE_CAP		0x40000000
 #define	CPUID_STDEXT3_SSBD		0x80000000
 
 /* MSR IA32_ARCH_CAP(ABILITIES) bits */
 #define	IA32_ARCH_CAP_RDCL_NO	0x00000001
 #define	IA32_ARCH_CAP_IBRS_ALL	0x00000002
 #define	IA32_ARCH_CAP_RSBA	0x00000004
 #define	IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY	0x00000008
 #define	IA32_ARCH_CAP_SSB_NO	0x00000010
 #define	IA32_ARCH_CAP_MDS_NO	0x00000020
 #define	IA32_ARCH_CAP_IF_PSCHANGE_MC_NO	0x00000040
 #define	IA32_ARCH_CAP_TSX_CTRL	0x00000080
 #define	IA32_ARCH_CAP_TAA_NO	0x00000100
 
 /* MSR IA32_TSX_CTRL bits */
 #define	IA32_TSX_CTRL_RTM_DISABLE	0x00000001
 #define	IA32_TSX_CTRL_TSX_CPUID_CLEAR	0x00000002
 
 /*
  * CPUID manufacturers identifiers
  */
 #define	AMD_VENDOR_ID		"AuthenticAMD"
 #define	CENTAUR_VENDOR_ID	"CentaurHauls"
 #define	CYRIX_VENDOR_ID		"CyrixInstead"
 #define	INTEL_VENDOR_ID		"GenuineIntel"
 #define	NEXGEN_VENDOR_ID	"NexGenDriven"
 #define	NSC_VENDOR_ID		"Geode by NSC"
 #define	RISE_VENDOR_ID		"RiseRiseRise"
 #define	SIS_VENDOR_ID		"SiS SiS SiS "
 #define	TRANSMETA_VENDOR_ID	"GenuineTMx86"
 #define	UMC_VENDOR_ID		"UMC UMC UMC "
 #define	HYGON_VENDOR_ID		"HygonGenuine"
 
 /*
  * Model-specific registers for the i386 family
  */
 #define	MSR_P5_MC_ADDR		0x000
 #define	MSR_P5_MC_TYPE		0x001
 #define	MSR_TSC			0x010
 #define	MSR_P5_CESR		0x011
 #define	MSR_P5_CTR0		0x012
 #define	MSR_P5_CTR1		0x013
 #define	MSR_IA32_PLATFORM_ID	0x017
 #define	MSR_APICBASE		0x01b
 #define	MSR_EBL_CR_POWERON	0x02a
 #define	MSR_TEST_CTL		0x033
 #define	MSR_IA32_FEATURE_CONTROL 0x03a
 #define	MSR_IA32_SPEC_CTRL	0x048
 #define	MSR_IA32_PRED_CMD	0x049
 #define	MSR_BIOS_UPDT_TRIG	0x079
 #define	MSR_BBL_CR_D0		0x088
 #define	MSR_BBL_CR_D1		0x089
 #define	MSR_BBL_CR_D2		0x08a
 #define	MSR_BIOS_SIGN		0x08b
 #define	MSR_PERFCTR0		0x0c1
 #define	MSR_PERFCTR1		0x0c2
 #define	MSR_PLATFORM_INFO	0x0ce
 #define	MSR_MPERF		0x0e7
 #define	MSR_APERF		0x0e8
 #define	MSR_IA32_EXT_CONFIG	0x0ee	/* Undocumented. Core Solo/Duo only */
 #define	MSR_MTRRcap		0x0fe
 #define	MSR_IA32_ARCH_CAP	0x10a
 #define	MSR_IA32_FLUSH_CMD	0x10b
 #define	MSR_TSX_FORCE_ABORT	0x10f
 #define	MSR_BBL_CR_ADDR		0x116
 #define	MSR_BBL_CR_DECC		0x118
 #define	MSR_BBL_CR_CTL		0x119
 #define	MSR_BBL_CR_TRIG		0x11a
 #define	MSR_BBL_CR_BUSY		0x11b
 #define	MSR_BBL_CR_CTL3		0x11e
 #define	MSR_IA32_TSX_CTRL	0x122
 #define	MSR_SYSENTER_CS_MSR	0x174
 #define	MSR_SYSENTER_ESP_MSR	0x175
 #define	MSR_SYSENTER_EIP_MSR	0x176
 #define	MSR_MCG_CAP		0x179
 #define	MSR_MCG_STATUS		0x17a
 #define	MSR_MCG_CTL		0x17b
 #define	MSR_EVNTSEL0		0x186
 #define	MSR_EVNTSEL1		0x187
 #define	MSR_THERM_CONTROL	0x19a
 #define	MSR_THERM_INTERRUPT	0x19b
 #define	MSR_THERM_STATUS	0x19c
 #define	MSR_IA32_MISC_ENABLE	0x1a0
 #define	MSR_IA32_TEMPERATURE_TARGET	0x1a2
 #define	MSR_TURBO_RATIO_LIMIT	0x1ad
 #define	MSR_TURBO_RATIO_LIMIT1	0x1ae
+#define	MSR_IA32_ENERGY_PERF_BIAS	0x1b0
 #define	MSR_DEBUGCTLMSR		0x1d9
 #define	MSR_LASTBRANCHFROMIP	0x1db
 #define	MSR_LASTBRANCHTOIP	0x1dc
 #define	MSR_LASTINTFROMIP	0x1dd
 #define	MSR_LASTINTTOIP		0x1de
 #define	MSR_ROB_CR_BKUPTMPDR6	0x1e0
 #define	MSR_MTRRVarBase		0x200
 #define	MSR_MTRR64kBase		0x250
 #define	MSR_MTRR16kBase		0x258
 #define	MSR_MTRR4kBase		0x268
 #define	MSR_PAT			0x277
 #define	MSR_MC0_CTL2		0x280
 #define	MSR_MTRRdefType		0x2ff
 #define	MSR_MC0_CTL		0x400
 #define	MSR_MC0_STATUS		0x401
 #define	MSR_MC0_ADDR		0x402
 #define	MSR_MC0_MISC		0x403
 #define	MSR_MC1_CTL		0x404
 #define	MSR_MC1_STATUS		0x405
 #define	MSR_MC1_ADDR		0x406
 #define	MSR_MC1_MISC		0x407
 #define	MSR_MC2_CTL		0x408
 #define	MSR_MC2_STATUS		0x409
 #define	MSR_MC2_ADDR		0x40a
 #define	MSR_MC2_MISC		0x40b
 #define	MSR_MC3_CTL		0x40c
 #define	MSR_MC3_STATUS		0x40d
 #define	MSR_MC3_ADDR		0x40e
 #define	MSR_MC3_MISC		0x40f
 #define	MSR_MC4_CTL		0x410
 #define	MSR_MC4_STATUS		0x411
 #define	MSR_MC4_ADDR		0x412
 #define	MSR_MC4_MISC		0x413
 #define	MSR_RAPL_POWER_UNIT	0x606
 #define	MSR_PKG_ENERGY_STATUS	0x611
 #define	MSR_DRAM_ENERGY_STATUS	0x619
 #define	MSR_PP0_ENERGY_STATUS	0x639
 #define	MSR_PP1_ENERGY_STATUS	0x641
 #define	MSR_PPERF		0x64e
 #define	MSR_TSC_DEADLINE	0x6e0	/* Writes are not serializing */
 #define	MSR_IA32_PM_ENABLE	0x770
 #define	MSR_IA32_HWP_CAPABILITIES	0x771
 #define	MSR_IA32_HWP_REQUEST_PKG	0x772
 #define	MSR_IA32_HWP_INTERRUPT		0x773
 #define	MSR_IA32_HWP_REQUEST	0x774
 #define	MSR_IA32_HWP_STATUS	0x777
 
 /*
  * VMX MSRs
  */
 #define	MSR_VMX_BASIC		0x480
 #define	MSR_VMX_PINBASED_CTLS	0x481
 #define	MSR_VMX_PROCBASED_CTLS	0x482
 #define	MSR_VMX_EXIT_CTLS	0x483
 #define	MSR_VMX_ENTRY_CTLS	0x484
 #define	MSR_VMX_CR0_FIXED0	0x486
 #define	MSR_VMX_CR0_FIXED1	0x487
 #define	MSR_VMX_CR4_FIXED0	0x488
 #define	MSR_VMX_CR4_FIXED1	0x489
 #define	MSR_VMX_PROCBASED_CTLS2	0x48b
 #define	MSR_VMX_EPT_VPID_CAP	0x48c
 #define	MSR_VMX_TRUE_PINBASED_CTLS	0x48d
 #define	MSR_VMX_TRUE_PROCBASED_CTLS	0x48e
 #define	MSR_VMX_TRUE_EXIT_CTLS	0x48f
 #define	MSR_VMX_TRUE_ENTRY_CTLS	0x490
 
 /*
  * X2APIC MSRs.
  * Writes are not serializing.
  */
 #define	MSR_APIC_000		0x800
 #define	MSR_APIC_ID		0x802
 #define	MSR_APIC_VERSION	0x803
 #define	MSR_APIC_TPR		0x808
 #define	MSR_APIC_EOI		0x80b
 #define	MSR_APIC_LDR		0x80d
 #define	MSR_APIC_SVR		0x80f
 #define	MSR_APIC_ISR0		0x810
 #define	MSR_APIC_ISR1		0x811
 #define	MSR_APIC_ISR2		0x812
 #define	MSR_APIC_ISR3		0x813
 #define	MSR_APIC_ISR4		0x814
 #define	MSR_APIC_ISR5		0x815
 #define	MSR_APIC_ISR6		0x816
 #define	MSR_APIC_ISR7		0x817
 #define	MSR_APIC_TMR0		0x818
 #define	MSR_APIC_IRR0		0x820
 #define	MSR_APIC_ESR		0x828
 #define	MSR_APIC_LVT_CMCI	0x82F
 #define	MSR_APIC_ICR		0x830
 #define	MSR_APIC_LVT_TIMER	0x832
 #define	MSR_APIC_LVT_THERMAL	0x833
 #define	MSR_APIC_LVT_PCINT	0x834
 #define	MSR_APIC_LVT_LINT0	0x835
 #define	MSR_APIC_LVT_LINT1	0x836
 #define	MSR_APIC_LVT_ERROR	0x837
 #define	MSR_APIC_ICR_TIMER	0x838
 #define	MSR_APIC_CCR_TIMER	0x839
 #define	MSR_APIC_DCR_TIMER	0x83e
 #define	MSR_APIC_SELF_IPI	0x83f
 
 #define	MSR_IA32_XSS		0xda0
 
 /*
  * Intel Processor Trace (PT) MSRs.
  */
 #define	MSR_IA32_RTIT_OUTPUT_BASE	0x560	/* Trace Output Base Register (R/W) */
 #define	MSR_IA32_RTIT_OUTPUT_MASK_PTRS	0x561	/* Trace Output Mask Pointers Register (R/W) */
 #define	MSR_IA32_RTIT_CTL		0x570	/* Trace Control Register (R/W) */
 #define	 RTIT_CTL_TRACEEN	(1 << 0)
 #define	 RTIT_CTL_CYCEN		(1 << 1)
 #define	 RTIT_CTL_OS		(1 << 2)
 #define	 RTIT_CTL_USER		(1 << 3)
 #define	 RTIT_CTL_PWREVTEN	(1 << 4)
 #define	 RTIT_CTL_FUPONPTW	(1 << 5)
 #define	 RTIT_CTL_FABRICEN	(1 << 6)
 #define	 RTIT_CTL_CR3FILTER	(1 << 7)
 #define	 RTIT_CTL_TOPA		(1 << 8)
 #define	 RTIT_CTL_MTCEN		(1 << 9)
 #define	 RTIT_CTL_TSCEN		(1 << 10)
 #define	 RTIT_CTL_DISRETC	(1 << 11)
 #define	 RTIT_CTL_PTWEN		(1 << 12)
 #define	 RTIT_CTL_BRANCHEN	(1 << 13)
 #define	 RTIT_CTL_MTC_FREQ_S	14
 #define	 RTIT_CTL_MTC_FREQ(n)	((n) << RTIT_CTL_MTC_FREQ_S)
 #define	 RTIT_CTL_MTC_FREQ_M	(0xf << RTIT_CTL_MTC_FREQ_S)
 #define	 RTIT_CTL_CYC_THRESH_S	19
 #define	 RTIT_CTL_CYC_THRESH_M	(0xf << RTIT_CTL_CYC_THRESH_S)
 #define	 RTIT_CTL_PSB_FREQ_S	24
 #define	 RTIT_CTL_PSB_FREQ_M	(0xf << RTIT_CTL_PSB_FREQ_S)
 #define	 RTIT_CTL_ADDR_CFG_S(n) (32 + (n) * 4)
 #define	 RTIT_CTL_ADDR0_CFG_S	32
 #define	 RTIT_CTL_ADDR0_CFG_M	(0xfULL << RTIT_CTL_ADDR0_CFG_S)
 #define	 RTIT_CTL_ADDR1_CFG_S	36
 #define	 RTIT_CTL_ADDR1_CFG_M	(0xfULL << RTIT_CTL_ADDR1_CFG_S)
 #define	 RTIT_CTL_ADDR2_CFG_S	40
 #define	 RTIT_CTL_ADDR2_CFG_M	(0xfULL << RTIT_CTL_ADDR2_CFG_S)
 #define	 RTIT_CTL_ADDR3_CFG_S	44
 #define	 RTIT_CTL_ADDR3_CFG_M	(0xfULL << RTIT_CTL_ADDR3_CFG_S)
 #define	MSR_IA32_RTIT_STATUS		0x571	/* Tracing Status Register (R/W) */
 #define	 RTIT_STATUS_FILTEREN	(1 << 0)
 #define	 RTIT_STATUS_CONTEXTEN	(1 << 1)
 #define	 RTIT_STATUS_TRIGGEREN	(1 << 2)
 #define	 RTIT_STATUS_ERROR	(1 << 4)
 #define	 RTIT_STATUS_STOPPED	(1 << 5)
 #define	 RTIT_STATUS_PACKETBYTECNT_S	32
 #define	 RTIT_STATUS_PACKETBYTECNT_M	(0x1ffffULL << RTIT_STATUS_PACKETBYTECNT_S)
 #define	MSR_IA32_RTIT_CR3_MATCH		0x572	/* Trace Filter CR3 Match Register (R/W) */
 #define	MSR_IA32_RTIT_ADDR_A(n)		(0x580 + (n) * 2)
 #define	MSR_IA32_RTIT_ADDR_B(n)		(0x581 + (n) * 2)
 #define	MSR_IA32_RTIT_ADDR0_A		0x580	/* Region 0 Start Address (R/W) */
 #define	MSR_IA32_RTIT_ADDR0_B		0x581	/* Region 0 End Address (R/W) */
 #define	MSR_IA32_RTIT_ADDR1_A		0x582	/* Region 1 Start Address (R/W) */
 #define	MSR_IA32_RTIT_ADDR1_B		0x583	/* Region 1 End Address (R/W) */
 #define	MSR_IA32_RTIT_ADDR2_A		0x584	/* Region 2 Start Address (R/W) */
 #define	MSR_IA32_RTIT_ADDR2_B		0x585	/* Region 2 End Address (R/W) */
 #define	MSR_IA32_RTIT_ADDR3_A		0x586	/* Region 3 Start Address (R/W) */
 #define	MSR_IA32_RTIT_ADDR3_B		0x587	/* Region 3 End Address (R/W) */
 
 /* Intel Processor Trace Table of Physical Addresses (ToPA). */
 #define	TOPA_SIZE_S	6
 #define	TOPA_SIZE_M	(0xf << TOPA_SIZE_S)
 #define	TOPA_SIZE_4K	(0 << TOPA_SIZE_S)
 #define	TOPA_SIZE_8K	(1 << TOPA_SIZE_S)
 #define	TOPA_SIZE_16K	(2 << TOPA_SIZE_S)
 #define	TOPA_SIZE_32K	(3 << TOPA_SIZE_S)
 #define	TOPA_SIZE_64K	(4 << TOPA_SIZE_S)
 #define	TOPA_SIZE_128K	(5 << TOPA_SIZE_S)
 #define	TOPA_SIZE_256K	(6 << TOPA_SIZE_S)
 #define	TOPA_SIZE_512K	(7 << TOPA_SIZE_S)
 #define	TOPA_SIZE_1M	(8 << TOPA_SIZE_S)
 #define	TOPA_SIZE_2M	(9 << TOPA_SIZE_S)
 #define	TOPA_SIZE_4M	(10 << TOPA_SIZE_S)
 #define	TOPA_SIZE_8M	(11 << TOPA_SIZE_S)
 #define	TOPA_SIZE_16M	(12 << TOPA_SIZE_S)
 #define	TOPA_SIZE_32M	(13 << TOPA_SIZE_S)
 #define	TOPA_SIZE_64M	(14 << TOPA_SIZE_S)
 #define	TOPA_SIZE_128M	(15 << TOPA_SIZE_S)
 #define	TOPA_STOP	(1 << 4)
 #define	TOPA_INT	(1 << 2)
 #define	TOPA_END	(1 << 0)
 
 /*
  * Constants related to MSR's.
  */
 #define	APICBASE_RESERVED	0x000002ff
 #define	APICBASE_BSP		0x00000100
 #define	APICBASE_X2APIC		0x00000400
 #define	APICBASE_ENABLED	0x00000800
 #define	APICBASE_ADDRESS	0xfffff000
 
 /* MSR_IA32_FEATURE_CONTROL related */
 #define	IA32_FEATURE_CONTROL_LOCK	0x01	/* lock bit */
 #define	IA32_FEATURE_CONTROL_SMX_EN	0x02	/* enable VMX inside SMX */
 #define	IA32_FEATURE_CONTROL_VMX_EN	0x04	/* enable VMX outside SMX */
 
 /* MSR IA32_MISC_ENABLE */
 #define	IA32_MISC_EN_FASTSTR	0x0000000000000001ULL
 #define	IA32_MISC_EN_ATCCE	0x0000000000000008ULL
 #define	IA32_MISC_EN_PERFMON	0x0000000000000080ULL
 #define	IA32_MISC_EN_PEBSU	0x0000000000001000ULL
 #define	IA32_MISC_EN_ESSTE	0x0000000000010000ULL
 #define	IA32_MISC_EN_MONE	0x0000000000040000ULL
 #define	IA32_MISC_EN_LIMCPUID	0x0000000000400000ULL
 #define	IA32_MISC_EN_xTPRD	0x0000000000800000ULL
 #define	IA32_MISC_EN_XDD	0x0000000400000000ULL
 
 /*
  * IA32_SPEC_CTRL and IA32_PRED_CMD MSRs are described in the Intel'
  * document 336996-001 Speculative Execution Side Channel Mitigations.
  *
  * AMD uses the same MSRs and bit definitions, as described in 111006-B
  * "Indirect Branch Control Extension" and 124441 "Speculative Store Bypass
  * Disable."
  */
 /* MSR IA32_SPEC_CTRL */
 #define	IA32_SPEC_CTRL_IBRS	0x00000001
 #define	IA32_SPEC_CTRL_STIBP	0x00000002
 #define	IA32_SPEC_CTRL_SSBD	0x00000004
 
 /* MSR IA32_PRED_CMD */
 #define	IA32_PRED_CMD_IBPB_BARRIER	0x0000000000000001ULL
 
 /* MSR IA32_FLUSH_CMD */
 #define	IA32_FLUSH_CMD_L1D	0x00000001
 
 /* MSR IA32_HWP_CAPABILITIES */
 #define	IA32_HWP_CAPABILITIES_HIGHEST_PERFORMANCE(x)	(((x) >> 0) & 0xff)
 #define	IA32_HWP_CAPABILITIES_GUARANTEED_PERFORMANCE(x)	(((x) >> 8) & 0xff)
 #define	IA32_HWP_CAPABILITIES_EFFICIENT_PERFORMANCE(x)	(((x) >> 16) & 0xff)
 #define	IA32_HWP_CAPABILITIES_LOWEST_PERFORMANCE(x)	(((x) >> 24) & 0xff)
 
 /* MSR IA32_HWP_REQUEST */
 #define	IA32_HWP_REQUEST_MINIMUM_VALID			(1ULL << 63)
 #define	IA32_HWP_REQUEST_MAXIMUM_VALID			(1ULL << 62)
 #define	IA32_HWP_REQUEST_DESIRED_VALID			(1ULL << 61)
 #define	IA32_HWP_REQUEST_EPP_VALID 			(1ULL << 60)
 #define	IA32_HWP_REQUEST_ACTIVITY_WINDOW_VALID		(1ULL << 59)
 #define	IA32_HWP_REQUEST_PACKAGE_CONTROL		(1ULL << 42)
 #define	IA32_HWP_ACTIVITY_WINDOW			(0x3ffULL << 32)
 #define	IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE	(0xffULL << 24)
 #define	IA32_HWP_DESIRED_PERFORMANCE			(0xffULL << 16)
 #define	IA32_HWP_REQUEST_MAXIMUM_PERFORMANCE		(0xffULL << 8)
 #define	IA32_HWP_MINIMUM_PERFORMANCE			(0xffULL << 0)
+
+/* MSR IA32_ENERGY_PERF_BIAS */
+#define	IA32_ENERGY_PERF_BIAS_POLICY_HINT_MASK		(0xfULL << 0)
 
 /*
  * PAT modes.
  */
 #define	PAT_UNCACHEABLE		0x00
 #define	PAT_WRITE_COMBINING	0x01
 #define	PAT_WRITE_THROUGH	0x04
 #define	PAT_WRITE_PROTECTED	0x05
 #define	PAT_WRITE_BACK		0x06
 #define	PAT_UNCACHED		0x07
 #define	PAT_VALUE(i, m)		((long long)(m) << (8 * (i)))
 #define	PAT_MASK(i)		PAT_VALUE(i, 0xff)
 
 /*
  * Constants related to MTRRs
  */
 #define	MTRR_UNCACHEABLE	0x00
 #define	MTRR_WRITE_COMBINING	0x01
 #define	MTRR_WRITE_THROUGH	0x04
 #define	MTRR_WRITE_PROTECTED	0x05
 #define	MTRR_WRITE_BACK		0x06
 #define	MTRR_N64K		8	/* numbers of fixed-size entries */
 #define	MTRR_N16K		16
 #define	MTRR_N4K		64
 #define	MTRR_CAP_WC		0x0000000000000400
 #define	MTRR_CAP_FIXED		0x0000000000000100
 #define	MTRR_CAP_VCNT		0x00000000000000ff
 #define	MTRR_DEF_ENABLE		0x0000000000000800
 #define	MTRR_DEF_FIXED_ENABLE	0x0000000000000400
 #define	MTRR_DEF_TYPE		0x00000000000000ff
 #define	MTRR_PHYSBASE_PHYSBASE	0x000ffffffffff000
 #define	MTRR_PHYSBASE_TYPE	0x00000000000000ff
 #define	MTRR_PHYSMASK_PHYSMASK	0x000ffffffffff000
 #define	MTRR_PHYSMASK_VALID	0x0000000000000800
 
 /*
  * Cyrix configuration registers, accessible as IO ports.
  */
 #define	CCR0			0xc0	/* Configuration control register 0 */
 #define	CCR0_NC0		0x01	/* First 64K of each 1M memory region is
 								   non-cacheable */
 #define	CCR0_NC1		0x02	/* 640K-1M region is non-cacheable */
 #define	CCR0_A20M		0x04	/* Enables A20M# input pin */
 #define	CCR0_KEN		0x08	/* Enables KEN# input pin */
 #define	CCR0_FLUSH		0x10	/* Enables FLUSH# input pin */
 #define	CCR0_BARB		0x20	/* Flushes internal cache when entering hold
 								   state */
 #define	CCR0_CO			0x40	/* Cache org: 1=direct mapped, 0=2x set
 								   assoc */
 #define	CCR0_SUSPEND	0x80	/* Enables SUSP# and SUSPA# pins */
 
 #define	CCR1			0xc1	/* Configuration control register 1 */
 #define	CCR1_RPL		0x01	/* Enables RPLSET and RPLVAL# pins */
 #define	CCR1_SMI		0x02	/* Enables SMM pins */
 #define	CCR1_SMAC		0x04	/* System management memory access */
 #define	CCR1_MMAC		0x08	/* Main memory access */
 #define	CCR1_NO_LOCK	0x10	/* Negate LOCK# */
 #define	CCR1_SM3		0x80	/* SMM address space address region 3 */
 
 #define	CCR2			0xc2
 #define	CCR2_WB			0x02	/* Enables WB cache interface pins */
 #define	CCR2_SADS		0x02	/* Slow ADS */
 #define	CCR2_LOCK_NW	0x04	/* LOCK NW Bit */
 #define	CCR2_SUSP_HLT	0x08	/* Suspend on HALT */
 #define	CCR2_WT1		0x10	/* WT region 1 */
 #define	CCR2_WPR1		0x10	/* Write-protect region 1 */
 #define	CCR2_BARB		0x20	/* Flushes write-back cache when entering
 								   hold state. */
 #define	CCR2_BWRT		0x40	/* Enables burst write cycles */
 #define	CCR2_USE_SUSP	0x80	/* Enables suspend pins */
 
 #define	CCR3			0xc3
 #define	CCR3_SMILOCK	0x01	/* SMM register lock */
 #define	CCR3_NMI		0x02	/* Enables NMI during SMM */
 #define	CCR3_LINBRST	0x04	/* Linear address burst cycles */
 #define	CCR3_SMMMODE	0x08	/* SMM Mode */
 #define	CCR3_MAPEN0		0x10	/* Enables Map0 */
 #define	CCR3_MAPEN1		0x20	/* Enables Map1 */
 #define	CCR3_MAPEN2		0x40	/* Enables Map2 */
 #define	CCR3_MAPEN3		0x80	/* Enables Map3 */
 
 #define	CCR4			0xe8
 #define	CCR4_IOMASK		0x07
 #define	CCR4_MEM		0x08	/* Enables momory bypassing */
 #define	CCR4_DTE		0x10	/* Enables directory table entry cache */
 #define	CCR4_FASTFPE	0x20	/* Fast FPU exception */
 #define	CCR4_CPUID		0x80	/* Enables CPUID instruction */
 
 #define	CCR5			0xe9
 #define	CCR5_WT_ALLOC	0x01	/* Write-through allocate */
 #define	CCR5_SLOP		0x02	/* LOOP instruction slowed down */
 #define	CCR5_LBR1		0x10	/* Local bus region 1 */
 #define	CCR5_ARREN		0x20	/* Enables ARR region */
 
 #define	CCR6			0xea
 
 #define	CCR7			0xeb
 
 /* Performance Control Register (5x86 only). */
 #define	PCR0			0x20
 #define	PCR0_RSTK		0x01	/* Enables return stack */
 #define	PCR0_BTB		0x02	/* Enables branch target buffer */
 #define	PCR0_LOOP		0x04	/* Enables loop */
 #define	PCR0_AIS		0x08	/* Enables all instrcutions stalled to
 								   serialize pipe. */
 #define	PCR0_MLR		0x10	/* Enables reordering of misaligned loads */
 #define	PCR0_BTBRT		0x40	/* Enables BTB test register. */
 #define	PCR0_LSSER		0x80	/* Disable reorder */
 
 /* Device Identification Registers */
 #define	DIR0			0xfe
 #define	DIR1			0xff
 
 /*
  * Machine Check register constants.
  */
 #define	MCG_CAP_COUNT		0x000000ff
 #define	MCG_CAP_CTL_P		0x00000100
 #define	MCG_CAP_EXT_P		0x00000200
 #define	MCG_CAP_CMCI_P		0x00000400
 #define	MCG_CAP_TES_P		0x00000800
 #define	MCG_CAP_EXT_CNT		0x00ff0000
 #define	MCG_CAP_SER_P		0x01000000
 #define	MCG_STATUS_RIPV		0x00000001
 #define	MCG_STATUS_EIPV		0x00000002
 #define	MCG_STATUS_MCIP		0x00000004
 #define	MCG_CTL_ENABLE		0xffffffffffffffff
 #define	MCG_CTL_DISABLE		0x0000000000000000
 #define	MSR_MC_CTL(x)		(MSR_MC0_CTL + (x) * 4)
 #define	MSR_MC_STATUS(x)	(MSR_MC0_STATUS + (x) * 4)
 #define	MSR_MC_ADDR(x)		(MSR_MC0_ADDR + (x) * 4)
 #define	MSR_MC_MISC(x)		(MSR_MC0_MISC + (x) * 4)
 #define	MSR_MC_CTL2(x)		(MSR_MC0_CTL2 + (x))	/* If MCG_CAP_CMCI_P */
 #define	MC_STATUS_MCA_ERROR	0x000000000000ffff
 #define	MC_STATUS_MODEL_ERROR	0x00000000ffff0000
 #define	MC_STATUS_OTHER_INFO	0x01ffffff00000000
 #define	MC_STATUS_COR_COUNT	0x001fffc000000000	/* If MCG_CAP_CMCI_P */
 #define	MC_STATUS_TES_STATUS	0x0060000000000000	/* If MCG_CAP_TES_P */
 #define	MC_STATUS_AR		0x0080000000000000	/* If MCG_CAP_TES_P */
 #define	MC_STATUS_S		0x0100000000000000	/* If MCG_CAP_TES_P */
 #define	MC_STATUS_PCC		0x0200000000000000
 #define	MC_STATUS_ADDRV		0x0400000000000000
 #define	MC_STATUS_MISCV		0x0800000000000000
 #define	MC_STATUS_EN		0x1000000000000000
 #define	MC_STATUS_UC		0x2000000000000000
 #define	MC_STATUS_OVER		0x4000000000000000
 #define	MC_STATUS_VAL		0x8000000000000000
 #define	MC_MISC_RA_LSB		0x000000000000003f	/* If MCG_CAP_SER_P */
 #define	MC_MISC_ADDRESS_MODE	0x00000000000001c0	/* If MCG_CAP_SER_P */
 #define	MC_CTL2_THRESHOLD	0x0000000000007fff
 #define	MC_CTL2_CMCI_EN		0x0000000040000000
 #define	MC_AMDNB_BANK		4
 #define	MC_MISC_AMD_VAL		0x8000000000000000	/* Counter presence valid */
 #define	MC_MISC_AMD_CNTP	0x4000000000000000	/* Counter present */
 #define	MC_MISC_AMD_LOCK	0x2000000000000000	/* Register locked */
 #define	MC_MISC_AMD_INTP	0x1000000000000000	/* Int. type can generate interrupts */
 #define	MC_MISC_AMD_LVT_MASK	0x00f0000000000000	/* Extended LVT offset */
 #define	MC_MISC_AMD_LVT_SHIFT	52
 #define	MC_MISC_AMD_CNTEN	0x0008000000000000	/* Counter enabled */
 #define	MC_MISC_AMD_INT_MASK	0x0006000000000000	/* Interrupt type */
 #define	MC_MISC_AMD_INT_LVT	0x0002000000000000	/* Interrupt via Extended LVT */
 #define	MC_MISC_AMD_INT_SMI	0x0004000000000000	/* SMI */
 #define	MC_MISC_AMD_OVERFLOW	0x0001000000000000	/* Counter overflow */
 #define	MC_MISC_AMD_CNT_MASK	0x00000fff00000000	/* Counter value */
 #define	MC_MISC_AMD_CNT_SHIFT	32
 #define	MC_MISC_AMD_CNT_MAX	0xfff
 #define	MC_MISC_AMD_PTR_MASK	0x00000000ff000000	/* Pointer to additional registers */
 #define	MC_MISC_AMD_PTR_SHIFT	24
 
 /* AMD Scalable MCA */
 #define MSR_SMCA_MC0_CTL          0xc0002000
 #define MSR_SMCA_MC0_STATUS       0xc0002001
 #define MSR_SMCA_MC0_ADDR         0xc0002002
 #define MSR_SMCA_MC0_MISC0        0xc0002003
 #define MSR_SMCA_MC_CTL(x)       (MSR_SMCA_MC0_CTL + 0x10 * (x))
 #define MSR_SMCA_MC_STATUS(x)    (MSR_SMCA_MC0_STATUS + 0x10 * (x))
 #define MSR_SMCA_MC_ADDR(x)      (MSR_SMCA_MC0_ADDR + 0x10 * (x))
 #define MSR_SMCA_MC_MISC(x)      (MSR_SMCA_MC0_MISC0 + 0x10 * (x))
 
 /*
  * The following four 3-byte registers control the non-cacheable regions.
  * These registers must be written as three separate bytes.
  *
  * NCRx+0: A31-A24 of starting address
  * NCRx+1: A23-A16 of starting address
  * NCRx+2: A15-A12 of starting address | NCR_SIZE_xx.
  *
  * The non-cacheable region's starting address must be aligned to the
  * size indicated by the NCR_SIZE_xx field.
  */
 #define	NCR1	0xc4
 #define	NCR2	0xc7
 #define	NCR3	0xca
 #define	NCR4	0xcd
 
 #define	NCR_SIZE_0K	0
 #define	NCR_SIZE_4K	1
 #define	NCR_SIZE_8K	2
 #define	NCR_SIZE_16K	3
 #define	NCR_SIZE_32K	4
 #define	NCR_SIZE_64K	5
 #define	NCR_SIZE_128K	6
 #define	NCR_SIZE_256K	7
 #define	NCR_SIZE_512K	8
 #define	NCR_SIZE_1M	9
 #define	NCR_SIZE_2M	10
 #define	NCR_SIZE_4M	11
 #define	NCR_SIZE_8M	12
 #define	NCR_SIZE_16M	13
 #define	NCR_SIZE_32M	14
 #define	NCR_SIZE_4G	15
 
 /*
  * The address region registers are used to specify the location and
  * size for the eight address regions.
  *
  * ARRx + 0: A31-A24 of start address
  * ARRx + 1: A23-A16 of start address
  * ARRx + 2: A15-A12 of start address | ARR_SIZE_xx
  */
 #define	ARR0	0xc4
 #define	ARR1	0xc7
 #define	ARR2	0xca
 #define	ARR3	0xcd
 #define	ARR4	0xd0
 #define	ARR5	0xd3
 #define	ARR6	0xd6
 #define	ARR7	0xd9
 
 #define	ARR_SIZE_0K		0
 #define	ARR_SIZE_4K		1
 #define	ARR_SIZE_8K		2
 #define	ARR_SIZE_16K	3
 #define	ARR_SIZE_32K	4
 #define	ARR_SIZE_64K	5
 #define	ARR_SIZE_128K	6
 #define	ARR_SIZE_256K	7
 #define	ARR_SIZE_512K	8
 #define	ARR_SIZE_1M		9
 #define	ARR_SIZE_2M		10
 #define	ARR_SIZE_4M		11
 #define	ARR_SIZE_8M		12
 #define	ARR_SIZE_16M	13
 #define	ARR_SIZE_32M	14
 #define	ARR_SIZE_4G		15
 
 /*
  * The region control registers specify the attributes associated with
  * the ARRx addres regions.
  */
 #define	RCR0	0xdc
 #define	RCR1	0xdd
 #define	RCR2	0xde
 #define	RCR3	0xdf
 #define	RCR4	0xe0
 #define	RCR5	0xe1
 #define	RCR6	0xe2
 #define	RCR7	0xe3
 
 #define	RCR_RCD	0x01	/* Disables caching for ARRx (x = 0-6). */
 #define	RCR_RCE	0x01	/* Enables caching for ARR7. */
 #define	RCR_WWO	0x02	/* Weak write ordering. */
 #define	RCR_WL	0x04	/* Weak locking. */
 #define	RCR_WG	0x08	/* Write gathering. */
 #define	RCR_WT	0x10	/* Write-through. */
 #define	RCR_NLB	0x20	/* LBA# pin is not asserted. */
 
 /* AMD Write Allocate Top-Of-Memory and Control Register */
 #define	AMD_WT_ALLOC_TME	0x40000	/* top-of-memory enable */
 #define	AMD_WT_ALLOC_PRE	0x20000	/* programmable range enable */
 #define	AMD_WT_ALLOC_FRE	0x10000	/* fixed (A0000-FFFFF) range enable */
 
 /* AMD64 MSR's */
 #define	MSR_EFER	0xc0000080	/* extended features */
 #define	MSR_STAR	0xc0000081	/* legacy mode SYSCALL target/cs/ss */
 #define	MSR_LSTAR	0xc0000082	/* long mode SYSCALL target rip */
 #define	MSR_CSTAR	0xc0000083	/* compat mode SYSCALL target rip */
 #define	MSR_SF_MASK	0xc0000084	/* syscall flags mask */
 #define	MSR_FSBASE	0xc0000100	/* base address of the %fs "segment" */
 #define	MSR_GSBASE	0xc0000101	/* base address of the %gs "segment" */
 #define	MSR_KGSBASE	0xc0000102	/* base address of the kernel %gs */
 #define	MSR_TSC_AUX	0xc0000103
 #define	MSR_PERFEVSEL0	0xc0010000
 #define	MSR_PERFEVSEL1	0xc0010001
 #define	MSR_PERFEVSEL2	0xc0010002
 #define	MSR_PERFEVSEL3	0xc0010003
 #define	MSR_K7_PERFCTR0	0xc0010004
 #define	MSR_K7_PERFCTR1	0xc0010005
 #define	MSR_K7_PERFCTR2	0xc0010006
 #define	MSR_K7_PERFCTR3	0xc0010007
 #define	MSR_SYSCFG	0xc0010010
 #define	MSR_HWCR	0xc0010015
 #define	MSR_IORRBASE0	0xc0010016
 #define	MSR_IORRMASK0	0xc0010017
 #define	MSR_IORRBASE1	0xc0010018
 #define	MSR_IORRMASK1	0xc0010019
 #define	MSR_TOP_MEM	0xc001001a	/* boundary for ram below 4G */
 #define	MSR_TOP_MEM2	0xc001001d	/* boundary for ram above 4G */
 #define	MSR_NB_CFG1	0xc001001f	/* NB configuration 1 */
 #define	MSR_K8_UCODE_UPDATE 0xc0010020	/* update microcode */
 #define	MSR_MC0_CTL_MASK 0xc0010044
 #define	MSR_P_STATE_LIMIT 0xc0010061	/* P-state Current Limit Register */
 #define	MSR_P_STATE_CONTROL 0xc0010062	/* P-state Control Register */
 #define	MSR_P_STATE_STATUS 0xc0010063	/* P-state Status Register */
 #define	MSR_P_STATE_CONFIG(n) (0xc0010064 + (n)) /* P-state Config */
 #define	MSR_SMM_ADDR	0xc0010112	/* SMM TSEG base address */
 #define	MSR_SMM_MASK	0xc0010113	/* SMM TSEG address mask */
 #define	MSR_VM_CR	0xc0010114	/* SVM: feature control */
 #define	MSR_VM_HSAVE_PA 0xc0010117	/* SVM: host save area address */
 #define	MSR_AMD_CPUID07	0xc0011002	/* CPUID 07 %ebx override */
 #define	MSR_EXTFEATURES	0xc0011005	/* Extended CPUID Features override */
 #define	MSR_LS_CFG	0xc0011020
 #define	MSR_IC_CFG	0xc0011021	/* Instruction Cache Configuration */
 
 /* MSR_VM_CR related */
 #define	VM_CR_SVMDIS		0x10	/* SVM: disabled by BIOS */
 
 /* VIA ACE crypto featureset: for via_feature_rng */
 #define	VIA_HAS_RNG		1	/* cpu has RNG */
 
 /* VIA ACE crypto featureset: for via_feature_xcrypt */
 #define	VIA_HAS_AES		1	/* cpu has AES */
 #define	VIA_HAS_SHA		2	/* cpu has SHA1 & SHA256 */
 #define	VIA_HAS_MM		4	/* cpu has RSA instructions */
 #define	VIA_HAS_AESCTR		8	/* cpu has AES-CTR instructions */
 
 /* Centaur Extended Feature flags */
 #define	VIA_CPUID_HAS_RNG	0x000004
 #define	VIA_CPUID_DO_RNG	0x000008
 #define	VIA_CPUID_HAS_ACE	0x000040
 #define	VIA_CPUID_DO_ACE	0x000080
 #define	VIA_CPUID_HAS_ACE2	0x000100
 #define	VIA_CPUID_DO_ACE2	0x000200
 #define	VIA_CPUID_HAS_PHE	0x000400
 #define	VIA_CPUID_DO_PHE	0x000800
 #define	VIA_CPUID_HAS_PMM	0x001000
 #define	VIA_CPUID_DO_PMM	0x002000
 
 /* VIA ACE xcrypt-* instruction context control options */
 #define	VIA_CRYPT_CWLO_ROUND_M		0x0000000f
 #define	VIA_CRYPT_CWLO_ALG_M		0x00000070
 #define	VIA_CRYPT_CWLO_ALG_AES		0x00000000
 #define	VIA_CRYPT_CWLO_KEYGEN_M		0x00000080
 #define	VIA_CRYPT_CWLO_KEYGEN_HW	0x00000000
 #define	VIA_CRYPT_CWLO_KEYGEN_SW	0x00000080
 #define	VIA_CRYPT_CWLO_NORMAL		0x00000000
 #define	VIA_CRYPT_CWLO_INTERMEDIATE	0x00000100
 #define	VIA_CRYPT_CWLO_ENCRYPT		0x00000000
 #define	VIA_CRYPT_CWLO_DECRYPT		0x00000200
 #define	VIA_CRYPT_CWLO_KEY128		0x0000000a	/* 128bit, 10 rds */
 #define	VIA_CRYPT_CWLO_KEY192		0x0000040c	/* 192bit, 12 rds */
 #define	VIA_CRYPT_CWLO_KEY256		0x0000080e	/* 256bit, 15 rds */
 
 #endif /* !_MACHINE_SPECIALREG_H_ */
Index: projects/clang1000-import/sys/x86/x86/identcpu.c
===================================================================
--- projects/clang1000-import/sys/x86/x86/identcpu.c	(revision 357389)
+++ projects/clang1000-import/sys/x86/x86/identcpu.c	(revision 357390)
@@ -1,2672 +1,2672 @@
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * Copyright (c) 1997 KATO Takenori.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: Id: machdep.c,v 1.193 1996/06/18 01:22:04 bde Exp
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/eventhandler.h>
 #include <sys/limits.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/power.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/asmacros.h>
 #include <machine/clock.h>
 #include <machine/cputypes.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <machine/segments.h>
 #include <machine/specialreg.h>
 
 #include <amd64/vmm/intel/vmx_controls.h>
 #include <x86/isa/icu.h>
 #include <x86/vmware.h>
 
 #ifdef __i386__
 #define	IDENTBLUE_CYRIX486	0
 #define	IDENTBLUE_IBMCPU	1
 #define	IDENTBLUE_CYRIXM2	2
 
 static void identifycyrix(void);
 static void print_transmeta_info(void);
 #endif
 static u_int find_cpu_vendor_id(void);
 static void print_AMD_info(void);
 static void print_INTEL_info(void);
 static void print_INTEL_TLB(u_int data);
 static void print_hypervisor_info(void);
 static void print_svm_info(void);
 static void print_via_padlock_info(void);
 static void print_vmx_info(void);
 
 #ifdef __i386__
 int	cpu;			/* Are we 386, 386sx, 486, etc? */
 int	cpu_class;
 #endif
 u_int	cpu_feature;		/* Feature flags */
 u_int	cpu_feature2;		/* Feature flags */
 u_int	amd_feature;		/* AMD feature flags */
 u_int	amd_feature2;		/* AMD feature flags */
 u_int	amd_rascap;		/* AMD RAS capabilities */
 u_int	amd_pminfo;		/* AMD advanced power management info */
 u_int	amd_extended_feature_extensions;
 u_int	via_feature_rng;	/* VIA RNG features */
 u_int	via_feature_xcrypt;	/* VIA ACE features */
 u_int	cpu_high;		/* Highest arg to CPUID */
 u_int	cpu_exthigh;		/* Highest arg to extended CPUID */
 u_int	cpu_id;			/* Stepping ID */
 u_int	cpu_procinfo;		/* HyperThreading Info / Brand Index / CLFUSH */
 u_int	cpu_procinfo2;		/* Multicore info */
 char	cpu_vendor[20];		/* CPU Origin code */
 u_int	cpu_vendor_id;		/* CPU vendor ID */
 u_int	cpu_fxsr;		/* SSE enabled */
 u_int	cpu_mxcsr_mask;		/* Valid bits in mxcsr */
 u_int	cpu_clflush_line_size = 32;
 u_int	cpu_stdext_feature;	/* %ebx */
 u_int	cpu_stdext_feature2;	/* %ecx */
 u_int	cpu_stdext_feature3;	/* %edx */
 uint64_t cpu_ia32_arch_caps;
 u_int	cpu_max_ext_state_size;
 u_int	cpu_mon_mwait_flags;	/* MONITOR/MWAIT flags (CPUID.05H.ECX) */
 u_int	cpu_mon_min_size;	/* MONITOR minimum range size, bytes */
 u_int	cpu_mon_max_size;	/* MONITOR minimum range size, bytes */
 u_int	cpu_maxphyaddr;		/* Max phys addr width in bits */
 u_int	cpu_power_eax;		/* 06H: Power management leaf, %eax */
-u_int	cpu_power_ebx;		/* 06H: Power management leaf, %eax */
-u_int	cpu_power_ecx;		/* 06H: Power management leaf, %eax */
-u_int	cpu_power_edx;		/* 06H: Power management leaf, %eax */
+u_int	cpu_power_ebx;		/* 06H: Power management leaf, %ebx */
+u_int	cpu_power_ecx;		/* 06H: Power management leaf, %ecx */
+u_int	cpu_power_edx;		/* 06H: Power management leaf, %edx */
 char machine[] = MACHINE;
 
 SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD,
     &via_feature_rng, 0,
     "VIA RNG feature available in CPU");
 SYSCTL_UINT(_hw, OID_AUTO, via_feature_xcrypt, CTLFLAG_RD,
     &via_feature_xcrypt, 0,
     "VIA xcrypt feature available in CPU");
 
 #ifdef __amd64__
 #ifdef SCTL_MASK32
 extern int adaptive_machine_arch;
 #endif
 
 static int
 sysctl_hw_machine(SYSCTL_HANDLER_ARGS)
 {
 #ifdef SCTL_MASK32
 	static const char machine32[] = "i386";
 #endif
 	int error;
 
 #ifdef SCTL_MASK32
 	if ((req->flags & SCTL_MASK32) != 0 && adaptive_machine_arch)
 		error = SYSCTL_OUT(req, machine32, sizeof(machine32));
 	else
 #endif
 		error = SYSCTL_OUT(req, machine, sizeof(machine));
 	return (error);
 
 }
 SYSCTL_PROC(_hw, HW_MACHINE, machine, CTLTYPE_STRING | CTLFLAG_RD |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_hw_machine, "A", "Machine class");
 #else
 SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD,
     machine, 0, "Machine class");
 #endif
 
 static char cpu_model[128];
 SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD | CTLFLAG_MPSAFE,
     cpu_model, 0, "Machine model");
 
 static int hw_clockrate;
 SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD,
     &hw_clockrate, 0, "CPU instruction clock rate");
 
 u_int hv_base;
 u_int hv_high;
 char hv_vendor[16];
 SYSCTL_STRING(_hw, OID_AUTO, hv_vendor, CTLFLAG_RD | CTLFLAG_MPSAFE, hv_vendor,
     0, "Hypervisor vendor");
 
 static eventhandler_tag tsc_post_tag;
 
 static char cpu_brand[48];
 
 #ifdef __i386__
 #define	MAX_BRAND_INDEX	8
 
 static const char *cpu_brandtable[MAX_BRAND_INDEX + 1] = {
 	NULL,			/* No brand */
 	"Intel Celeron",
 	"Intel Pentium III",
 	"Intel Pentium III Xeon",
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	"Intel Pentium 4"
 };
 
 static struct {
 	char	*cpu_name;
 	int	cpu_class;
 } cpus[] = {
 	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
 	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
 	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
 	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
 	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
 	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
 	{ "Cyrix 486",		CPUCLASS_486 },		/* CPU_486DLC */
 	{ "Pentium Pro",	CPUCLASS_686 },		/* CPU_686 */
 	{ "Cyrix 5x86",		CPUCLASS_486 },		/* CPU_M1SC */
 	{ "Cyrix 6x86",		CPUCLASS_486 },		/* CPU_M1 */
 	{ "Blue Lightning",	CPUCLASS_486 },		/* CPU_BLUE */
 	{ "Cyrix 6x86MX",	CPUCLASS_686 },		/* CPU_M2 */
 	{ "NexGen 586",		CPUCLASS_386 },		/* CPU_NX586 (XXX) */
 	{ "Cyrix 486S/DX",	CPUCLASS_486 },		/* CPU_CY486DX */
 	{ "Pentium II",		CPUCLASS_686 },		/* CPU_PII */
 	{ "Pentium III",	CPUCLASS_686 },		/* CPU_PIII */
 	{ "Pentium 4",		CPUCLASS_686 },		/* CPU_P4 */
 };
 #endif
 
 static struct {
 	char	*vendor;
 	u_int	vendor_id;
 } cpu_vendors[] = {
 	{ INTEL_VENDOR_ID,	CPU_VENDOR_INTEL },	/* GenuineIntel */
 	{ AMD_VENDOR_ID,	CPU_VENDOR_AMD },	/* AuthenticAMD */
 	{ HYGON_VENDOR_ID,	CPU_VENDOR_HYGON },	/* HygonGenuine*/
 	{ CENTAUR_VENDOR_ID,	CPU_VENDOR_CENTAUR },	/* CentaurHauls */
 #ifdef __i386__
 	{ NSC_VENDOR_ID,	CPU_VENDOR_NSC },	/* Geode by NSC */
 	{ CYRIX_VENDOR_ID,	CPU_VENDOR_CYRIX },	/* CyrixInstead */
 	{ TRANSMETA_VENDOR_ID,	CPU_VENDOR_TRANSMETA },	/* GenuineTMx86 */
 	{ SIS_VENDOR_ID,	CPU_VENDOR_SIS },	/* SiS SiS SiS  */
 	{ UMC_VENDOR_ID,	CPU_VENDOR_UMC },	/* UMC UMC UMC  */
 	{ NEXGEN_VENDOR_ID,	CPU_VENDOR_NEXGEN },	/* NexGenDriven */
 	{ RISE_VENDOR_ID,	CPU_VENDOR_RISE },	/* RiseRiseRise */
 #if 0
 	/* XXX CPUID 8000_0000h and 8086_0000h, not 0000_0000h */
 	{ "TransmetaCPU",	CPU_VENDOR_TRANSMETA },
 #endif
 #endif
 };
 
 void
 printcpuinfo(void)
 {
 	u_int regs[4], i;
 	char *brand;
 
 	printf("CPU: ");
 #ifdef __i386__
 	cpu_class = cpus[cpu].cpu_class;
 	strncpy(cpu_model, cpus[cpu].cpu_name, sizeof (cpu_model));
 #else
 	strncpy(cpu_model, "Hammer", sizeof (cpu_model));
 #endif
 
 	/* Check for extended CPUID information and a processor name. */
 	if (cpu_exthigh >= 0x80000004) {
 		brand = cpu_brand;
 		for (i = 0x80000002; i < 0x80000005; i++) {
 			do_cpuid(i, regs);
 			memcpy(brand, regs, sizeof(regs));
 			brand += sizeof(regs);
 		}
 	}
 
 	switch (cpu_vendor_id) {
 	case CPU_VENDOR_INTEL:
 #ifdef __i386__
 		if ((cpu_id & 0xf00) > 0x300) {
 			u_int brand_index;
 
 			cpu_model[0] = '\0';
 
 			switch (cpu_id & 0x3000) {
 			case 0x1000:
 				strcpy(cpu_model, "Overdrive ");
 				break;
 			case 0x2000:
 				strcpy(cpu_model, "Dual ");
 				break;
 			}
 
 			switch (cpu_id & 0xf00) {
 			case 0x400:
 				strcat(cpu_model, "i486 ");
 			        /* Check the particular flavor of 486 */
 				switch (cpu_id & 0xf0) {
 				case 0x00:
 				case 0x10:
 					strcat(cpu_model, "DX");
 					break;
 				case 0x20:
 					strcat(cpu_model, "SX");
 					break;
 				case 0x30:
 					strcat(cpu_model, "DX2");
 					break;
 				case 0x40:
 					strcat(cpu_model, "SL");
 					break;
 				case 0x50:
 					strcat(cpu_model, "SX2");
 					break;
 				case 0x70:
 					strcat(cpu_model,
 					    "DX2 Write-Back Enhanced");
 					break;
 				case 0x80:
 					strcat(cpu_model, "DX4");
 					break;
 				}
 				break;
 			case 0x500:
 			        /* Check the particular flavor of 586 */
 			        strcat(cpu_model, "Pentium");
 			        switch (cpu_id & 0xf0) {
 				case 0x00:
 				        strcat(cpu_model, " A-step");
 					break;
 				case 0x10:
 				        strcat(cpu_model, "/P5");
 					break;
 				case 0x20:
 				        strcat(cpu_model, "/P54C");
 					break;
 				case 0x30:
 				        strcat(cpu_model, "/P24T");
 					break;
 				case 0x40:
 				        strcat(cpu_model, "/P55C");
 					break;
 				case 0x70:
 				        strcat(cpu_model, "/P54C");
 					break;
 				case 0x80:
 				        strcat(cpu_model, "/P55C (quarter-micron)");
 					break;
 				default:
 				        /* nothing */
 					break;
 				}
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 				/*
 				 * XXX - If/when Intel fixes the bug, this
 				 * should also check the version of the
 				 * CPU, not just that it's a Pentium.
 				 */
 				has_f00f_bug = 1;
 #endif
 				break;
 			case 0x600:
 			        /* Check the particular flavor of 686 */
   			        switch (cpu_id & 0xf0) {
 				case 0x00:
 				        strcat(cpu_model, "Pentium Pro A-step");
 					break;
 				case 0x10:
 				        strcat(cpu_model, "Pentium Pro");
 					break;
 				case 0x30:
 				case 0x50:
 				case 0x60:
 				        strcat(cpu_model,
 				"Pentium II/Pentium II Xeon/Celeron");
 					cpu = CPU_PII;
 					break;
 				case 0x70:
 				case 0x80:
 				case 0xa0:
 				case 0xb0:
 				        strcat(cpu_model,
 					"Pentium III/Pentium III Xeon/Celeron");
 					cpu = CPU_PIII;
 					break;
 				default:
 				        strcat(cpu_model, "Unknown 80686");
 					break;
 				}
 				break;
 			case 0xf00:
 				strcat(cpu_model, "Pentium 4");
 				cpu = CPU_P4;
 				break;
 			default:
 				strcat(cpu_model, "unknown");
 				break;
 			}
 
 			/*
 			 * If we didn't get a brand name from the extended
 			 * CPUID, try to look it up in the brand table.
 			 */
 			if (cpu_high > 0 && *cpu_brand == '\0') {
 				brand_index = cpu_procinfo & CPUID_BRAND_INDEX;
 				if (brand_index <= MAX_BRAND_INDEX &&
 				    cpu_brandtable[brand_index] != NULL)
 					strcpy(cpu_brand,
 					    cpu_brandtable[brand_index]);
 			}
 		}
 #else
 		/* Please make up your mind folks! */
 		strcat(cpu_model, "EM64T");
 #endif
 		break;
 	case CPU_VENDOR_AMD:
 		/*
 		 * Values taken from AMD Processor Recognition
 		 * http://www.amd.com/K6/k6docs/pdf/20734g.pdf
 		 * (also describes ``Features'' encodings.
 		 */
 		strcpy(cpu_model, "AMD ");
 #ifdef __i386__
 		switch (cpu_id & 0xFF0) {
 		case 0x410:
 			strcat(cpu_model, "Standard Am486DX");
 			break;
 		case 0x430:
 			strcat(cpu_model, "Enhanced Am486DX2 Write-Through");
 			break;
 		case 0x470:
 			strcat(cpu_model, "Enhanced Am486DX2 Write-Back");
 			break;
 		case 0x480:
 			strcat(cpu_model, "Enhanced Am486DX4/Am5x86 Write-Through");
 			break;
 		case 0x490:
 			strcat(cpu_model, "Enhanced Am486DX4/Am5x86 Write-Back");
 			break;
 		case 0x4E0:
 			strcat(cpu_model, "Am5x86 Write-Through");
 			break;
 		case 0x4F0:
 			strcat(cpu_model, "Am5x86 Write-Back");
 			break;
 		case 0x500:
 			strcat(cpu_model, "K5 model 0");
 			break;
 		case 0x510:
 			strcat(cpu_model, "K5 model 1");
 			break;
 		case 0x520:
 			strcat(cpu_model, "K5 PR166 (model 2)");
 			break;
 		case 0x530:
 			strcat(cpu_model, "K5 PR200 (model 3)");
 			break;
 		case 0x560:
 			strcat(cpu_model, "K6");
 			break;
 		case 0x570:
 			strcat(cpu_model, "K6 266 (model 1)");
 			break;
 		case 0x580:
 			strcat(cpu_model, "K6-2");
 			break;
 		case 0x590:
 			strcat(cpu_model, "K6-III");
 			break;
 		case 0x5a0:
 			strcat(cpu_model, "Geode LX");
 			break;
 		default:
 			strcat(cpu_model, "Unknown");
 			break;
 		}
 #else
 		if ((cpu_id & 0xf00) == 0xf00)
 			strcat(cpu_model, "AMD64 Processor");
 		else
 			strcat(cpu_model, "Unknown");
 #endif
 		break;
 #ifdef __i386__
 	case CPU_VENDOR_CYRIX:
 		strcpy(cpu_model, "Cyrix ");
 		switch (cpu_id & 0xff0) {
 		case 0x440:
 			strcat(cpu_model, "MediaGX");
 			break;
 		case 0x520:
 			strcat(cpu_model, "6x86");
 			break;
 		case 0x540:
 			cpu_class = CPUCLASS_586;
 			strcat(cpu_model, "GXm");
 			break;
 		case 0x600:
 			strcat(cpu_model, "6x86MX");
 			break;
 		default:
 			/*
 			 * Even though CPU supports the cpuid
 			 * instruction, it can be disabled.
 			 * Therefore, this routine supports all Cyrix
 			 * CPUs.
 			 */
 			switch (cyrix_did & 0xf0) {
 			case 0x00:
 				switch (cyrix_did & 0x0f) {
 				case 0x00:
 					strcat(cpu_model, "486SLC");
 					break;
 				case 0x01:
 					strcat(cpu_model, "486DLC");
 					break;
 				case 0x02:
 					strcat(cpu_model, "486SLC2");
 					break;
 				case 0x03:
 					strcat(cpu_model, "486DLC2");
 					break;
 				case 0x04:
 					strcat(cpu_model, "486SRx");
 					break;
 				case 0x05:
 					strcat(cpu_model, "486DRx");
 					break;
 				case 0x06:
 					strcat(cpu_model, "486SRx2");
 					break;
 				case 0x07:
 					strcat(cpu_model, "486DRx2");
 					break;
 				case 0x08:
 					strcat(cpu_model, "486SRu");
 					break;
 				case 0x09:
 					strcat(cpu_model, "486DRu");
 					break;
 				case 0x0a:
 					strcat(cpu_model, "486SRu2");
 					break;
 				case 0x0b:
 					strcat(cpu_model, "486DRu2");
 					break;
 				default:
 					strcat(cpu_model, "Unknown");
 					break;
 				}
 				break;
 			case 0x10:
 				switch (cyrix_did & 0x0f) {
 				case 0x00:
 					strcat(cpu_model, "486S");
 					break;
 				case 0x01:
 					strcat(cpu_model, "486S2");
 					break;
 				case 0x02:
 					strcat(cpu_model, "486Se");
 					break;
 				case 0x03:
 					strcat(cpu_model, "486S2e");
 					break;
 				case 0x0a:
 					strcat(cpu_model, "486DX");
 					break;
 				case 0x0b:
 					strcat(cpu_model, "486DX2");
 					break;
 				case 0x0f:
 					strcat(cpu_model, "486DX4");
 					break;
 				default:
 					strcat(cpu_model, "Unknown");
 					break;
 				}
 				break;
 			case 0x20:
 				if ((cyrix_did & 0x0f) < 8)
 					strcat(cpu_model, "6x86");	/* Where did you get it? */
 				else
 					strcat(cpu_model, "5x86");
 				break;
 			case 0x30:
 				strcat(cpu_model, "6x86");
 				break;
 			case 0x40:
 				if ((cyrix_did & 0xf000) == 0x3000) {
 					cpu_class = CPUCLASS_586;
 					strcat(cpu_model, "GXm");
 				} else
 					strcat(cpu_model, "MediaGX");
 				break;
 			case 0x50:
 				strcat(cpu_model, "6x86MX");
 				break;
 			case 0xf0:
 				switch (cyrix_did & 0x0f) {
 				case 0x0d:
 					strcat(cpu_model, "Overdrive CPU");
 					break;
 				case 0x0e:
 					strcpy(cpu_model, "Texas Instruments 486SXL");
 					break;
 				case 0x0f:
 					strcat(cpu_model, "486SLC/DLC");
 					break;
 				default:
 					strcat(cpu_model, "Unknown");
 					break;
 				}
 				break;
 			default:
 				strcat(cpu_model, "Unknown");
 				break;
 			}
 			break;
 		}
 		break;
 	case CPU_VENDOR_RISE:
 		strcpy(cpu_model, "Rise ");
 		switch (cpu_id & 0xff0) {
 		case 0x500:	/* 6401 and 6441 (Kirin) */
 		case 0x520:	/* 6510 (Lynx) */
 			strcat(cpu_model, "mP6");
 			break;
 		default:
 			strcat(cpu_model, "Unknown");
 		}
 		break;
 #endif
 	case CPU_VENDOR_CENTAUR:
 #ifdef __i386__
 		switch (cpu_id & 0xff0) {
 		case 0x540:
 			strcpy(cpu_model, "IDT WinChip C6");
 			break;
 		case 0x580:
 			strcpy(cpu_model, "IDT WinChip 2");
 			break;
 		case 0x590:
 			strcpy(cpu_model, "IDT WinChip 3");
 			break;
 		case 0x660:
 			strcpy(cpu_model, "VIA C3 Samuel");
 			break;
 		case 0x670:
 			if (cpu_id & 0x8)
 				strcpy(cpu_model, "VIA C3 Ezra");
 			else
 				strcpy(cpu_model, "VIA C3 Samuel 2");
 			break;
 		case 0x680:
 			strcpy(cpu_model, "VIA C3 Ezra-T");
 			break;
 		case 0x690:
 			strcpy(cpu_model, "VIA C3 Nehemiah");
 			break;
 		case 0x6a0:
 		case 0x6d0:
 			strcpy(cpu_model, "VIA C7 Esther");
 			break;
 		case 0x6f0:
 			strcpy(cpu_model, "VIA Nano");
 			break;
 		default:
 			strcpy(cpu_model, "VIA/IDT Unknown");
 		}
 #else
 		strcpy(cpu_model, "VIA ");
 		if ((cpu_id & 0xff0) == 0x6f0)
 			strcat(cpu_model, "Nano Processor");
 		else
 			strcat(cpu_model, "Unknown");
 #endif
 		break;
 #ifdef __i386__
 	case CPU_VENDOR_IBM:
 		strcpy(cpu_model, "Blue Lightning CPU");
 		break;
 	case CPU_VENDOR_NSC:
 		switch (cpu_id & 0xff0) {
 		case 0x540:
 			strcpy(cpu_model, "Geode SC1100");
 			cpu = CPU_GEODE1100;
 			break;
 		default:
 			strcpy(cpu_model, "Geode/NSC unknown");
 			break;
 		}
 		break;
 #endif
 	case CPU_VENDOR_HYGON:
 		strcpy(cpu_model, "Hygon ");
 #ifdef __i386__
 		strcat(cpu_model, "Unknown");
 #else
 		if ((cpu_id & 0xf00) == 0xf00)
 			strcat(cpu_model, "AMD64 Processor");
 		else
 			strcat(cpu_model, "Unknown");
 #endif
 		break;
 
 	default:
 		strcat(cpu_model, "Unknown");
 		break;
 	}
 
 	/*
 	 * Replace cpu_model with cpu_brand minus leading spaces if
 	 * we have one.
 	 */
 	brand = cpu_brand;
 	while (*brand == ' ')
 		++brand;
 	if (*brand != '\0')
 		strcpy(cpu_model, brand);
 
 	printf("%s (", cpu_model);
 	if (tsc_freq != 0) {
 		hw_clockrate = (tsc_freq + 5000) / 1000000;
 		printf("%jd.%02d-MHz ",
 		    (intmax_t)(tsc_freq + 4999) / 1000000,
 		    (u_int)((tsc_freq + 4999) / 10000) % 100);
 	}
 #ifdef __i386__
 	switch(cpu_class) {
 	case CPUCLASS_286:
 		printf("286");
 		break;
 	case CPUCLASS_386:
 		printf("386");
 		break;
 #if defined(I486_CPU)
 	case CPUCLASS_486:
 		printf("486");
 		break;
 #endif
 #if defined(I586_CPU)
 	case CPUCLASS_586:
 		printf("586");
 		break;
 #endif
 #if defined(I686_CPU)
 	case CPUCLASS_686:
 		printf("686");
 		break;
 #endif
 	default:
 		printf("Unknown");	/* will panic below... */
 	}
 #else
 	printf("K8");
 #endif
 	printf("-class CPU)\n");
 	if (*cpu_vendor)
 		printf("  Origin=\"%s\"", cpu_vendor);
 	if (cpu_id)
 		printf("  Id=0x%x", cpu_id);
 
 	if (cpu_vendor_id == CPU_VENDOR_INTEL ||
 	    cpu_vendor_id == CPU_VENDOR_AMD ||
 	    cpu_vendor_id == CPU_VENDOR_HYGON ||
 	    cpu_vendor_id == CPU_VENDOR_CENTAUR ||
 #ifdef __i386__
 	    cpu_vendor_id == CPU_VENDOR_TRANSMETA ||
 	    cpu_vendor_id == CPU_VENDOR_RISE ||
 	    cpu_vendor_id == CPU_VENDOR_NSC ||
 	    (cpu_vendor_id == CPU_VENDOR_CYRIX && ((cpu_id & 0xf00) > 0x500)) ||
 #endif
 	    0) {
 		printf("  Family=0x%x", CPUID_TO_FAMILY(cpu_id));
 		printf("  Model=0x%x", CPUID_TO_MODEL(cpu_id));
 		printf("  Stepping=%u", cpu_id & CPUID_STEPPING);
 #ifdef __i386__
 		if (cpu_vendor_id == CPU_VENDOR_CYRIX)
 			printf("\n  DIR=0x%04x", cyrix_did);
 #endif
 
 		/*
 		 * AMD CPUID Specification
 		 * http://support.amd.com/us/Embedded_TechDocs/25481.pdf
 		 *
 		 * Intel Processor Identification and CPUID Instruction
 		 * http://www.intel.com/assets/pdf/appnote/241618.pdf
 		 */
 		if (cpu_high > 0) {
 
 			/*
 			 * Here we should probably set up flags indicating
 			 * whether or not various features are available.
 			 * The interesting ones are probably VME, PSE, PAE,
 			 * and PGE.  The code already assumes without bothering
 			 * to check that all CPUs >= Pentium have a TSC and
 			 * MSRs.
 			 */
 			printf("\n  Features=0x%b", cpu_feature,
 			"\020"
 			"\001FPU"	/* Integral FPU */
 			"\002VME"	/* Extended VM86 mode support */
 			"\003DE"	/* Debugging Extensions (CR4.DE) */
 			"\004PSE"	/* 4MByte page tables */
 			"\005TSC"	/* Timestamp counter */
 			"\006MSR"	/* Machine specific registers */
 			"\007PAE"	/* Physical address extension */
 			"\010MCE"	/* Machine Check support */
 			"\011CX8"	/* CMPEXCH8 instruction */
 			"\012APIC"	/* SMP local APIC */
 			"\013oldMTRR"	/* Previous implementation of MTRR */
 			"\014SEP"	/* Fast System Call */
 			"\015MTRR"	/* Memory Type Range Registers */
 			"\016PGE"	/* PG_G (global bit) support */
 			"\017MCA"	/* Machine Check Architecture */
 			"\020CMOV"	/* CMOV instruction */
 			"\021PAT"	/* Page attributes table */
 			"\022PSE36"	/* 36 bit address space support */
 			"\023PN"	/* Processor Serial number */
 			"\024CLFLUSH"	/* Has the CLFLUSH instruction */
 			"\025<b20>"
 			"\026DTS"	/* Debug Trace Store */
 			"\027ACPI"	/* ACPI support */
 			"\030MMX"	/* MMX instructions */
 			"\031FXSR"	/* FXSAVE/FXRSTOR */
 			"\032SSE"	/* Streaming SIMD Extensions */
 			"\033SSE2"	/* Streaming SIMD Extensions #2 */
 			"\034SS"	/* Self snoop */
 			"\035HTT"	/* Hyperthreading (see EBX bit 16-23) */
 			"\036TM"	/* Thermal Monitor clock slowdown */
 			"\037IA64"	/* CPU can execute IA64 instructions */
 			"\040PBE"	/* Pending Break Enable */
 			);
 
 			if (cpu_feature2 != 0) {
 				printf("\n  Features2=0x%b", cpu_feature2,
 				"\020"
 				"\001SSE3"	/* SSE3 */
 				"\002PCLMULQDQ"	/* Carry-Less Mul Quadword */
 				"\003DTES64"	/* 64-bit Debug Trace */
 				"\004MON"	/* MONITOR/MWAIT Instructions */
 				"\005DS_CPL"	/* CPL Qualified Debug Store */
 				"\006VMX"	/* Virtual Machine Extensions */
 				"\007SMX"	/* Safer Mode Extensions */
 				"\010EST"	/* Enhanced SpeedStep */
 				"\011TM2"	/* Thermal Monitor 2 */
 				"\012SSSE3"	/* SSSE3 */
 				"\013CNXT-ID"	/* L1 context ID available */
 				"\014SDBG"	/* IA32 silicon debug */
 				"\015FMA"	/* Fused Multiply Add */
 				"\016CX16"	/* CMPXCHG16B Instruction */
 				"\017xTPR"	/* Send Task Priority Messages*/
 				"\020PDCM"	/* Perf/Debug Capability MSR */
 				"\021<b16>"
 				"\022PCID"	/* Process-context Identifiers*/
 				"\023DCA"	/* Direct Cache Access */
 				"\024SSE4.1"	/* SSE 4.1 */
 				"\025SSE4.2"	/* SSE 4.2 */
 				"\026x2APIC"	/* xAPIC Extensions */
 				"\027MOVBE"	/* MOVBE Instruction */
 				"\030POPCNT"	/* POPCNT Instruction */
 				"\031TSCDLT"	/* TSC-Deadline Timer */
 				"\032AESNI"	/* AES Crypto */
 				"\033XSAVE"	/* XSAVE/XRSTOR States */
 				"\034OSXSAVE"	/* OS-Enabled State Management*/
 				"\035AVX"	/* Advanced Vector Extensions */
 				"\036F16C"	/* Half-precision conversions */
 				"\037RDRAND"	/* RDRAND Instruction */
 				"\040HV"	/* Hypervisor */
 				);
 			}
 
 			if (amd_feature != 0) {
 				printf("\n  AMD Features=0x%b", amd_feature,
 				"\020"		/* in hex */
 				"\001<s0>"	/* Same */
 				"\002<s1>"	/* Same */
 				"\003<s2>"	/* Same */
 				"\004<s3>"	/* Same */
 				"\005<s4>"	/* Same */
 				"\006<s5>"	/* Same */
 				"\007<s6>"	/* Same */
 				"\010<s7>"	/* Same */
 				"\011<s8>"	/* Same */
 				"\012<s9>"	/* Same */
 				"\013<b10>"	/* Undefined */
 				"\014SYSCALL"	/* Have SYSCALL/SYSRET */
 				"\015<s12>"	/* Same */
 				"\016<s13>"	/* Same */
 				"\017<s14>"	/* Same */
 				"\020<s15>"	/* Same */
 				"\021<s16>"	/* Same */
 				"\022<s17>"	/* Same */
 				"\023<b18>"	/* Reserved, unknown */
 				"\024MP"	/* Multiprocessor Capable */
 				"\025NX"	/* Has EFER.NXE, NX */
 				"\026<b21>"	/* Undefined */
 				"\027MMX+"	/* AMD MMX Extensions */
 				"\030<s23>"	/* Same */
 				"\031<s24>"	/* Same */
 				"\032FFXSR"	/* Fast FXSAVE/FXRSTOR */
 				"\033Page1GB"	/* 1-GB large page support */
 				"\034RDTSCP"	/* RDTSCP */
 				"\035<b28>"	/* Undefined */
 				"\036LM"	/* 64 bit long mode */
 				"\0373DNow!+"	/* AMD 3DNow! Extensions */
 				"\0403DNow!"	/* AMD 3DNow! */
 				);
 			}
 
 			if (amd_feature2 != 0) {
 				printf("\n  AMD Features2=0x%b", amd_feature2,
 				"\020"
 				"\001LAHF"	/* LAHF/SAHF in long mode */
 				"\002CMP"	/* CMP legacy */
 				"\003SVM"	/* Secure Virtual Mode */
 				"\004ExtAPIC"	/* Extended APIC register */
 				"\005CR8"	/* CR8 in legacy mode */
 				"\006ABM"	/* LZCNT instruction */
 				"\007SSE4A"	/* SSE4A */
 				"\010MAS"	/* Misaligned SSE mode */
 				"\011Prefetch"	/* 3DNow! Prefetch/PrefetchW */
 				"\012OSVW"	/* OS visible workaround */
 				"\013IBS"	/* Instruction based sampling */
 				"\014XOP"	/* XOP extended instructions */
 				"\015SKINIT"	/* SKINIT/STGI */
 				"\016WDT"	/* Watchdog timer */
 				"\017<b14>"
 				"\020LWP"	/* Lightweight Profiling */
 				"\021FMA4"	/* 4-operand FMA instructions */
 				"\022TCE"	/* Translation Cache Extension */
 				"\023<b18>"
 				"\024NodeId"	/* NodeId MSR support */
 				"\025<b20>"
 				"\026TBM"	/* Trailing Bit Manipulation */
 				"\027Topology"	/* Topology Extensions */
 				"\030PCXC"	/* Core perf count */
 				"\031PNXC"	/* NB perf count */
 				"\032<b25>"
 				"\033DBE"	/* Data Breakpoint extension */
 				"\034PTSC"	/* Performance TSC */
 				"\035PL2I"	/* L2I perf count */
 				"\036MWAITX"	/* MONITORX/MWAITX instructions */
 				"\037ADMSKX"	/* Address mask extension */
 				"\040<b31>"
 				);
 			}
 
 			if (cpu_stdext_feature != 0) {
 				printf("\n  Structured Extended Features=0x%b",
 				    cpu_stdext_feature,
 				       "\020"
 				       /* RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */
 				       "\001FSGSBASE"
 				       "\002TSCADJ"
 				       "\003SGX"
 				       /* Bit Manipulation Instructions */
 				       "\004BMI1"
 				       /* Hardware Lock Elision */
 				       "\005HLE"
 				       /* Advanced Vector Instructions 2 */
 				       "\006AVX2"
 				       /* FDP_EXCPTN_ONLY */
 				       "\007FDPEXC"
 				       /* Supervisor Mode Execution Prot. */
 				       "\010SMEP"
 				       /* Bit Manipulation Instructions */
 				       "\011BMI2"
 				       "\012ERMS"
 				       /* Invalidate Processor Context ID */
 				       "\013INVPCID"
 				       /* Restricted Transactional Memory */
 				       "\014RTM"
 				       "\015PQM"
 				       "\016NFPUSG"
 				       /* Intel Memory Protection Extensions */
 				       "\017MPX"
 				       "\020PQE"
 				       /* AVX512 Foundation */
 				       "\021AVX512F"
 				       "\022AVX512DQ"
 				       /* Enhanced NRBG */
 				       "\023RDSEED"
 				       /* ADCX + ADOX */
 				       "\024ADX"
 				       /* Supervisor Mode Access Prevention */
 				       "\025SMAP"
 				       "\026AVX512IFMA"
 				       /* Formerly PCOMMIT */
 				       "\027<b22>"
 				       "\030CLFLUSHOPT"
 				       "\031CLWB"
 				       "\032PROCTRACE"
 				       "\033AVX512PF"
 				       "\034AVX512ER"
 				       "\035AVX512CD"
 				       "\036SHA"
 				       "\037AVX512BW"
 				       "\040AVX512VL"
 				       );
 			}
 
 			if (cpu_stdext_feature2 != 0) {
 				printf("\n  Structured Extended Features2=0x%b",
 				    cpu_stdext_feature2,
 				       "\020"
 				       "\001PREFETCHWT1"
 				       "\002AVX512VBMI"
 				       "\003UMIP"
 				       "\004PKU"
 				       "\005OSPKE"
 				       "\006WAITPKG"
 				       "\007AVX512VBMI2"
 				       "\011GFNI"
 				       "\012VAES"
 				       "\013VPCLMULQDQ"
 				       "\014AVX512VNNI"
 				       "\015AVX512BITALG"
 				       "\016AVX512VPOPCNTDQ"
 				       "\027RDPID"
 				       "\032CLDEMOTE"
 				       "\034MOVDIRI"
 				       "\035MOVDIR64B"
 				       "\036ENQCMD"
 				       "\037SGXLC"
 				       );
 			}
 
 			if (cpu_stdext_feature3 != 0) {
 				printf("\n  Structured Extended Features3=0x%b",
 				    cpu_stdext_feature3,
 				       "\020"
 				       "\003AVX512_4VNNIW"
 				       "\004AVX512_4FMAPS"
 				       "\005FSRM"
 				       "\011AVX512VP2INTERSECT"
 				       "\013MD_CLEAR"
 				       "\016TSXFA"
 				       "\023PCONFIG"
 				       "\025IBT"
 				       "\033IBPB"
 				       "\034STIBP"
 				       "\035L1DFL"
 				       "\036ARCH_CAP"
 				       "\037CORE_CAP"
 				       "\040SSBD"
 				       );
 			}
 
 			if ((cpu_feature2 & CPUID2_XSAVE) != 0) {
 				cpuid_count(0xd, 0x1, regs);
 				if (regs[0] != 0) {
 					printf("\n  XSAVE Features=0x%b",
 					    regs[0],
 					    "\020"
 					    "\001XSAVEOPT"
 					    "\002XSAVEC"
 					    "\003XINUSE"
 					    "\004XSAVES");
 				}
 			}
 
 			if (cpu_ia32_arch_caps != 0) {
 				printf("\n  IA32_ARCH_CAPS=0x%b",
 				    (u_int)cpu_ia32_arch_caps,
 				       "\020"
 				       "\001RDCL_NO"
 				       "\002IBRS_ALL"
 				       "\003RSBA"
 				       "\004SKIP_L1DFL_VME"
 				       "\005SSB_NO"
 				       "\006MDS_NO"
 				       "\010TSX_CTRL"
 				       "\011TAA_NO"
 				       );
 			}
 
 			if (amd_extended_feature_extensions != 0) {
 				u_int amd_fe_masked;
 
 				amd_fe_masked = amd_extended_feature_extensions;
 				if ((amd_fe_masked & AMDFEID_IBRS) == 0)
 					amd_fe_masked &=
 					    ~(AMDFEID_IBRS_ALWAYSON |
 						AMDFEID_PREFER_IBRS);
 				if ((amd_fe_masked & AMDFEID_STIBP) == 0)
 					amd_fe_masked &=
 					    ~AMDFEID_STIBP_ALWAYSON;
 
 				printf("\n  "
 				    "AMD Extended Feature Extensions ID EBX="
 				    "0x%b", amd_fe_masked,
 				    "\020"
 				    "\001CLZERO"
 				    "\002IRPerf"
 				    "\003XSaveErPtr"
 				    "\005RDPRU"
 				    "\011MCOMMIT"
 				    "\012WBNOINVD"
 				    "\015IBPB"
 				    "\017IBRS"
 				    "\020STIBP"
 				    "\021IBRS_ALWAYSON"
 				    "\022STIBP_ALWAYSON"
 				    "\023PREFER_IBRS"
 				    "\031SSBD"
 				    "\032VIRT_SSBD"
 				    "\033SSB_NO"
 				    );
 			}
 
 			if (via_feature_rng != 0 || via_feature_xcrypt != 0)
 				print_via_padlock_info();
 
 			if (cpu_feature2 & CPUID2_VMX)
 				print_vmx_info();
 
 			if (amd_feature2 & AMDID2_SVM)
 				print_svm_info();
 
 			if ((cpu_feature & CPUID_HTT) &&
 			    (cpu_vendor_id == CPU_VENDOR_AMD ||
 			     cpu_vendor_id == CPU_VENDOR_HYGON))
 				cpu_feature &= ~CPUID_HTT;
 
 			/*
 			 * If this CPU supports P-state invariant TSC then
 			 * mention the capability.
 			 */
 			if (tsc_is_invariant) {
 				printf("\n  TSC: P-state invariant");
 				if (tsc_perf_stat)
 					printf(", performance statistics");
 			}
 		}
 #ifdef __i386__
 	} else if (cpu_vendor_id == CPU_VENDOR_CYRIX) {
 		printf("  DIR=0x%04x", cyrix_did);
 		printf("  Stepping=%u", (cyrix_did & 0xf000) >> 12);
 		printf("  Revision=%u", (cyrix_did & 0x0f00) >> 8);
 #ifndef CYRIX_CACHE_REALLY_WORKS
 		if (cpu == CPU_M1 && (cyrix_did & 0xff00) < 0x1700)
 			printf("\n  CPU cache: write-through mode");
 #endif
 #endif
 	}
 
 	/* Avoid ugly blank lines: only print newline when we have to. */
 	if (*cpu_vendor || cpu_id)
 		printf("\n");
 
 	if (bootverbose) {
 		if (cpu_vendor_id == CPU_VENDOR_AMD ||
 		    cpu_vendor_id == CPU_VENDOR_HYGON)
 			print_AMD_info();
 		else if (cpu_vendor_id == CPU_VENDOR_INTEL)
 			print_INTEL_info();
 #ifdef __i386__
 		else if (cpu_vendor_id == CPU_VENDOR_TRANSMETA)
 			print_transmeta_info();
 #endif
 	}
 
 	print_hypervisor_info();
 }
 
 #ifdef __i386__
 void
 panicifcpuunsupported(void)
 {
 
 #if !defined(lint)
 #if !defined(I486_CPU) && !defined(I586_CPU) && !defined(I686_CPU)
 #error This kernel is not configured for one of the supported CPUs
 #endif
 #else /* lint */
 #endif /* lint */
 	/*
 	 * Now that we have told the user what they have,
 	 * let them know if that machine type isn't configured.
 	 */
 	switch (cpu_class) {
 	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
 	case CPUCLASS_386:
 #if !defined(I486_CPU)
 	case CPUCLASS_486:
 #endif
 #if !defined(I586_CPU)
 	case CPUCLASS_586:
 #endif
 #if !defined(I686_CPU)
 	case CPUCLASS_686:
 #endif
 		panic("CPU class not configured");
 	default:
 		break;
 	}
 }
 
 static	volatile u_int trap_by_rdmsr;
 
 /*
  * Special exception 6 handler.
  * The rdmsr instruction generates invalid opcodes fault on 486-class
  * Cyrix CPU.  Stacked eip register points the rdmsr instruction in the
  * function identblue() when this handler is called.  Stacked eip should
  * be advanced.
  */
 inthand_t	bluetrap6;
 #ifdef __GNUCLIKE_ASM
 __asm
 ("									\n\
 	.text								\n\
 	.p2align 2,0x90							\n\
 	.type	" __XSTRING(CNAME(bluetrap6)) ",@function		\n\
 " __XSTRING(CNAME(bluetrap6)) ":					\n\
 	ss								\n\
 	movl	$0xa8c1d," __XSTRING(CNAME(trap_by_rdmsr)) "		\n\
 	addl	$2, (%esp)	/* rdmsr is a 2-byte instruction */	\n\
 	iret								\n\
 ");
 #endif
 
 /*
  * Special exception 13 handler.
  * Accessing non-existent MSR generates general protection fault.
  */
 inthand_t	bluetrap13;
 #ifdef __GNUCLIKE_ASM
 __asm
 ("									\n\
 	.text								\n\
 	.p2align 2,0x90							\n\
 	.type	" __XSTRING(CNAME(bluetrap13)) ",@function		\n\
 " __XSTRING(CNAME(bluetrap13)) ":					\n\
 	ss								\n\
 	movl	$0xa89c4," __XSTRING(CNAME(trap_by_rdmsr)) "		\n\
 	popl	%eax		/* discard error code */		\n\
 	addl	$2, (%esp)	/* rdmsr is a 2-byte instruction */	\n\
 	iret								\n\
 ");
 #endif
 
 /*
  * Distinguish IBM Blue Lightning CPU from Cyrix CPUs that does not
  * support cpuid instruction.  This function should be called after
  * loading interrupt descriptor table register.
  *
  * I don't like this method that handles fault, but I couldn't get
  * information for any other methods.  Does blue giant know?
  */
 static int
 identblue(void)
 {
 
 	trap_by_rdmsr = 0;
 
 	/*
 	 * Cyrix 486-class CPU does not support rdmsr instruction.
 	 * The rdmsr instruction generates invalid opcode fault, and exception
 	 * will be trapped by bluetrap6() on Cyrix 486-class CPU.  The
 	 * bluetrap6() set the magic number to trap_by_rdmsr.
 	 */
 	setidt(IDT_UD, bluetrap6, SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 
 	/*
 	 * Certain BIOS disables cpuid instruction of Cyrix 6x86MX CPU.
 	 * In this case, rdmsr generates general protection fault, and
 	 * exception will be trapped by bluetrap13().
 	 */
 	setidt(IDT_GP, bluetrap13, SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 
 	rdmsr(0x1002);		/* Cyrix CPU generates fault. */
 
 	if (trap_by_rdmsr == 0xa8c1d)
 		return IDENTBLUE_CYRIX486;
 	else if (trap_by_rdmsr == 0xa89c4)
 		return IDENTBLUE_CYRIXM2;
 	return IDENTBLUE_IBMCPU;
 }
 
 
 /*
  * identifycyrix() set lower 16 bits of cyrix_did as follows:
  *
  *  F E D C B A 9 8 7 6 5 4 3 2 1 0
  * +-------+-------+---------------+
  * |  SID  |  RID  |   Device ID   |
  * |    (DIR 1)    |    (DIR 0)    |
  * +-------+-------+---------------+
  */
 static void
 identifycyrix(void)
 {
 	register_t saveintr;
 	int	ccr2_test = 0, dir_test = 0;
 	u_char	ccr2, ccr3;
 
 	saveintr = intr_disable();
 
 	ccr2 = read_cyrix_reg(CCR2);
 	write_cyrix_reg(CCR2, ccr2 ^ CCR2_LOCK_NW);
 	read_cyrix_reg(CCR2);
 	if (read_cyrix_reg(CCR2) != ccr2)
 		ccr2_test = 1;
 	write_cyrix_reg(CCR2, ccr2);
 
 	ccr3 = read_cyrix_reg(CCR3);
 	write_cyrix_reg(CCR3, ccr3 ^ CCR3_MAPEN3);
 	read_cyrix_reg(CCR3);
 	if (read_cyrix_reg(CCR3) != ccr3)
 		dir_test = 1;					/* CPU supports DIRs. */
 	write_cyrix_reg(CCR3, ccr3);
 
 	if (dir_test) {
 		/* Device ID registers are available. */
 		cyrix_did = read_cyrix_reg(DIR1) << 8;
 		cyrix_did += read_cyrix_reg(DIR0);
 	} else if (ccr2_test)
 		cyrix_did = 0x0010;		/* 486S A-step */
 	else
 		cyrix_did = 0x00ff;		/* Old 486SLC/DLC and TI486SXLC/SXL */
 
 	intr_restore(saveintr);
 }
 #endif
 
 /* Update TSC freq with the value indicated by the caller. */
 static void
 tsc_freq_changed(void *arg __unused, const struct cf_level *level, int status)
 {
 
 	/* If there was an error during the transition, don't do anything. */
 	if (status != 0)
 		return;
 
 	/* Total setting for this level gives the new frequency in MHz. */
 	hw_clockrate = level->total_set.freq;
 }
 
 static void
 hook_tsc_freq(void *arg __unused)
 {
 
 	if (tsc_is_invariant)
 		return;
 
 	tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
 	    tsc_freq_changed, NULL, EVENTHANDLER_PRI_ANY);
 }
 
 SYSINIT(hook_tsc_freq, SI_SUB_CONFIGURE, SI_ORDER_ANY, hook_tsc_freq, NULL);
 
 static const struct {
 	const char *	vm_bname;
 	int		vm_guest;
 } vm_bnames[] = {
 	{ "QEMU",	VM_GUEST_VM },		/* QEMU */
 	{ "Plex86",	VM_GUEST_VM },		/* Plex86 */
 	{ "Bochs",	VM_GUEST_VM },		/* Bochs */
 	{ "Xen",	VM_GUEST_XEN },		/* Xen */
 	{ "BHYVE",	VM_GUEST_BHYVE },	/* bhyve */
 	{ "Seabios",	VM_GUEST_KVM },		/* KVM */
 };
 
 static const struct {
 	const char *	vm_pname;
 	int		vm_guest;
 } vm_pnames[] = {
 	{ "VMware Virtual Platform",	VM_GUEST_VMWARE },
 	{ "Virtual Machine",		VM_GUEST_VM }, /* Microsoft VirtualPC */
 	{ "VirtualBox",			VM_GUEST_VBOX },
 	{ "Parallels Virtual Platform",	VM_GUEST_PARALLELS },
 	{ "KVM",			VM_GUEST_KVM },
 };
 
 static struct {
 	const char	*vm_cpuid;
 	int		vm_guest;
 } vm_cpuids[] = {
 	{ "XENXENXEN",		VM_GUEST_XEN },		/* XEN */
 	{ "Microsoft Hv",	VM_GUEST_HV },		/* Microsoft Hyper-V */
 	{ "VMwareVMware",	VM_GUEST_VMWARE },	/* VMware VM */
 	{ "KVMKVMKVM",		VM_GUEST_KVM },		/* KVM */
 	{ "bhyve bhyve ",	VM_GUEST_BHYVE },	/* bhyve */
 	{ "VBoxVBoxVBox",	VM_GUEST_VBOX },	/* VirtualBox */
 };
 
 static void
 identify_hypervisor_cpuid_base(void)
 {
 	u_int leaf, regs[4];
 	int i;
 
 	/*
 	 * [RFC] CPUID usage for interaction between Hypervisors and Linux.
 	 * http://lkml.org/lkml/2008/10/1/246
 	 *
 	 * KB1009458: Mechanisms to determine if software is running in
 	 * a VMware virtual machine
 	 * http://kb.vmware.com/kb/1009458
 	 *
 	 * Search for a hypervisor that we recognize. If we cannot find
 	 * a specific hypervisor, return the first information about the
 	 * hypervisor that we found, as others may be able to use.
 	 */
 	for (leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
 		do_cpuid(leaf, regs);
 
 		/*
 		 * KVM from Linux kernels prior to commit
 		 * 57c22e5f35aa4b9b2fe11f73f3e62bbf9ef36190 set %eax
 		 * to 0 rather than a valid hv_high value.  Check for
 		 * the KVM signature bytes and fixup %eax to the
 		 * highest supported leaf in that case.
 		 */
 		if (regs[0] == 0 && regs[1] == 0x4b4d564b &&
 		    regs[2] == 0x564b4d56 && regs[3] == 0x0000004d)
 			regs[0] = leaf + 1;
 			
 		if (regs[0] >= leaf) {
 			for (i = 0; i < nitems(vm_cpuids); i++)
 				if (strncmp((const char *)&regs[1],
 				    vm_cpuids[i].vm_cpuid, 12) == 0) {
 					vm_guest = vm_cpuids[i].vm_guest;
 					break;
 				}
 
 			/*
 			 * If this is the first entry or we found a
 			 * specific hypervisor, record the base, high value,
 			 * and vendor identifier.
 			 */
 			if (vm_guest != VM_GUEST_VM || leaf == 0x40000000) {
 				hv_base = leaf;
 				hv_high = regs[0];
 				((u_int *)&hv_vendor)[0] = regs[1];
 				((u_int *)&hv_vendor)[1] = regs[2];
 				((u_int *)&hv_vendor)[2] = regs[3];
 				hv_vendor[12] = '\0';
 
 				/*
 				 * If we found a specific hypervisor, then
 				 * we are finished.
 				 */
 				if (vm_guest != VM_GUEST_VM)
 					return;
 			}
 		}
 	}
 }
 
 void
 identify_hypervisor(void)
 {
 	u_int regs[4];
 	char *p;
 	int i;
 
 	/*
 	 * If CPUID2_HV is set, we are running in a hypervisor environment.
 	 */
 	if (cpu_feature2 & CPUID2_HV) {
 		vm_guest = VM_GUEST_VM;
 		identify_hypervisor_cpuid_base();
 
 		/* If we have a definitive vendor, we can return now. */
 		if (*hv_vendor != '\0')
 			return;
 	}
 
 	/*
 	 * Examine SMBIOS strings for older hypervisors.
 	 */
 	p = kern_getenv("smbios.system.serial");
 	if (p != NULL) {
 		if (strncmp(p, "VMware-", 7) == 0 || strncmp(p, "VMW", 3) == 0) {
 			vmware_hvcall(VMW_HVCMD_GETVERSION, regs);
 			if (regs[1] == VMW_HVMAGIC) {
 				vm_guest = VM_GUEST_VMWARE;			
 				freeenv(p);
 				return;
 			}
 		}
 		freeenv(p);
 	}
 
 	/*
 	 * XXX: Some of these entries may not be needed since they were
 	 * added to FreeBSD before the checks above.
 	 */
 	p = kern_getenv("smbios.bios.vendor");
 	if (p != NULL) {
 		for (i = 0; i < nitems(vm_bnames); i++)
 			if (strcmp(p, vm_bnames[i].vm_bname) == 0) {
 				vm_guest = vm_bnames[i].vm_guest;
 				/* If we have a specific match, return */
 				if (vm_guest != VM_GUEST_VM) {
 					freeenv(p);
 					return;
 				}
 				/*
 				 * We are done with bnames, but there might be
 				 * a more specific match in the pnames
 				 */
 				break;
 			}
 		freeenv(p);
 	}
 	p = kern_getenv("smbios.system.product");
 	if (p != NULL) {
 		for (i = 0; i < nitems(vm_pnames); i++)
 			if (strcmp(p, vm_pnames[i].vm_pname) == 0) {
 				vm_guest = vm_pnames[i].vm_guest;
 				freeenv(p);
 				return;
 			}
 		freeenv(p);
 	}
 }
 
 bool
 fix_cpuid(void)
 {
 	uint64_t msr;
 
 	/*
 	 * Clear "Limit CPUID Maxval" bit and return true if the caller should
 	 * get the largest standard CPUID function number again if it is set
 	 * from BIOS.  It is necessary for probing correct CPU topology later
 	 * and for the correct operation of the AVX-aware userspace.
 	 */
 	if (cpu_vendor_id == CPU_VENDOR_INTEL &&
 	    ((CPUID_TO_FAMILY(cpu_id) == 0xf &&
 	    CPUID_TO_MODEL(cpu_id) >= 0x3) ||
 	    (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
 	    CPUID_TO_MODEL(cpu_id) >= 0xe))) {
 		msr = rdmsr(MSR_IA32_MISC_ENABLE);
 		if ((msr & IA32_MISC_EN_LIMCPUID) != 0) {
 			msr &= ~IA32_MISC_EN_LIMCPUID;
 			wrmsr(MSR_IA32_MISC_ENABLE, msr);
 			return (true);
 		}
 	}
 
 	/*
 	 * Re-enable AMD Topology Extension that could be disabled by BIOS
 	 * on some notebook processors.  Without the extension it's really
 	 * hard to determine the correct CPU cache topology.
 	 * See BIOS and Kernel Developer’s Guide (BKDG) for AMD Family 15h
 	 * Models 60h-6Fh Processors, Publication # 50742.
 	 */
 	if (vm_guest == VM_GUEST_NO && cpu_vendor_id == CPU_VENDOR_AMD &&
 	    CPUID_TO_FAMILY(cpu_id) == 0x15) {
 		msr = rdmsr(MSR_EXTFEATURES);
 		if ((msr & ((uint64_t)1 << 54)) == 0) {
 			msr |= (uint64_t)1 << 54;
 			wrmsr(MSR_EXTFEATURES, msr);
 			return (true);
 		}
 	}
 	return (false);
 }
 
 void
 identify_cpu1(void)
 {
 	u_int regs[4];
 
 	do_cpuid(0, regs);
 	cpu_high = regs[0];
 	((u_int *)&cpu_vendor)[0] = regs[1];
 	((u_int *)&cpu_vendor)[1] = regs[3];
 	((u_int *)&cpu_vendor)[2] = regs[2];
 	cpu_vendor[12] = '\0';
 
 	do_cpuid(1, regs);
 	cpu_id = regs[0];
 	cpu_procinfo = regs[1];
 	cpu_feature = regs[3];
 	cpu_feature2 = regs[2];
 }
 
 void
 identify_cpu2(void)
 {
 	u_int regs[4], cpu_stdext_disable;
 
 	if (cpu_high >= 6) {
 		cpuid_count(6, 0, regs);
 		cpu_power_eax = regs[0];
 		cpu_power_ebx = regs[1];
 		cpu_power_ecx = regs[2];
 		cpu_power_edx = regs[3];
 	}
 
 	if (cpu_high >= 7) {
 		cpuid_count(7, 0, regs);
 		cpu_stdext_feature = regs[1];
 
 		/*
 		 * Some hypervisors failed to filter out unsupported
 		 * extended features.  Allow to disable the
 		 * extensions, activation of which requires setting a
 		 * bit in CR4, and which VM monitors do not support.
 		 */
 		cpu_stdext_disable = 0;
 		TUNABLE_INT_FETCH("hw.cpu_stdext_disable", &cpu_stdext_disable);
 		cpu_stdext_feature &= ~cpu_stdext_disable;
 
 		cpu_stdext_feature2 = regs[2];
 		cpu_stdext_feature3 = regs[3];
 
 		if ((cpu_stdext_feature3 & CPUID_STDEXT3_ARCH_CAP) != 0)
 			cpu_ia32_arch_caps = rdmsr(MSR_IA32_ARCH_CAP);
 	}
 }
 
 void
 identify_cpu_fixup_bsp(void)
 {
 	u_int regs[4];
 
 	cpu_vendor_id = find_cpu_vendor_id();
 
 	if (fix_cpuid()) {
 		do_cpuid(0, regs);
 		cpu_high = regs[0];
 	}
 }
 
 /*
  * Final stage of CPU identification.
  */
 void
 finishidentcpu(void)
 {
 	u_int regs[4];
 #ifdef __i386__
 	u_char ccr3;
 #endif
 
 	identify_cpu_fixup_bsp();
 
 	if (cpu_high >= 5 && (cpu_feature2 & CPUID2_MON) != 0) {
 		do_cpuid(5, regs);
 		cpu_mon_mwait_flags = regs[2];
 		cpu_mon_min_size = regs[0] &  CPUID5_MON_MIN_SIZE;
 		cpu_mon_max_size = regs[1] &  CPUID5_MON_MAX_SIZE;
 	}
 
 	identify_cpu2();
 
 #ifdef __i386__
 	if (cpu_high > 0 &&
 	    (cpu_vendor_id == CPU_VENDOR_INTEL ||
 	     cpu_vendor_id == CPU_VENDOR_AMD ||
 	     cpu_vendor_id == CPU_VENDOR_HYGON ||
 	     cpu_vendor_id == CPU_VENDOR_TRANSMETA ||
 	     cpu_vendor_id == CPU_VENDOR_CENTAUR ||
 	     cpu_vendor_id == CPU_VENDOR_NSC)) {
 		do_cpuid(0x80000000, regs);
 		if (regs[0] >= 0x80000000)
 			cpu_exthigh = regs[0];
 	}
 #else
 	if (cpu_vendor_id == CPU_VENDOR_INTEL ||
 	    cpu_vendor_id == CPU_VENDOR_AMD ||
 	    cpu_vendor_id == CPU_VENDOR_HYGON ||
 	    cpu_vendor_id == CPU_VENDOR_CENTAUR) {
 		do_cpuid(0x80000000, regs);
 		cpu_exthigh = regs[0];
 	}
 #endif
 	if (cpu_exthigh >= 0x80000001) {
 		do_cpuid(0x80000001, regs);
 		amd_feature = regs[3] & ~(cpu_feature & 0x0183f3ff);
 		amd_feature2 = regs[2];
 	}
 	if (cpu_exthigh >= 0x80000007) {
 		do_cpuid(0x80000007, regs);
 		amd_rascap = regs[1];
 		amd_pminfo = regs[3];
 	}
 	if (cpu_exthigh >= 0x80000008) {
 		do_cpuid(0x80000008, regs);
 		cpu_maxphyaddr = regs[0] & 0xff;
 		amd_extended_feature_extensions = regs[1];
 		cpu_procinfo2 = regs[2];
 	} else {
 		cpu_maxphyaddr = (cpu_feature & CPUID_PAE) != 0 ? 36 : 32;
 	}
 
 #ifdef __i386__
 	if (cpu_vendor_id == CPU_VENDOR_CYRIX) {
 		if (cpu == CPU_486) {
 			/*
 			 * These conditions are equivalent to:
 			 *     - CPU does not support cpuid instruction.
 			 *     - Cyrix/IBM CPU is detected.
 			 */
 			if (identblue() == IDENTBLUE_IBMCPU) {
 				strcpy(cpu_vendor, "IBM");
 				cpu_vendor_id = CPU_VENDOR_IBM;
 				cpu = CPU_BLUE;
 				return;
 			}
 		}
 		switch (cpu_id & 0xf00) {
 		case 0x600:
 			/*
 			 * Cyrix's datasheet does not describe DIRs.
 			 * Therefor, I assume it does not have them
 			 * and use the result of the cpuid instruction.
 			 * XXX they seem to have it for now at least. -Peter
 			 */
 			identifycyrix();
 			cpu = CPU_M2;
 			break;
 		default:
 			identifycyrix();
 			/*
 			 * This routine contains a trick.
 			 * Don't check (cpu_id & 0x00f0) == 0x50 to detect M2, now.
 			 */
 			switch (cyrix_did & 0x00f0) {
 			case 0x00:
 			case 0xf0:
 				cpu = CPU_486DLC;
 				break;
 			case 0x10:
 				cpu = CPU_CY486DX;
 				break;
 			case 0x20:
 				if ((cyrix_did & 0x000f) < 8)
 					cpu = CPU_M1;
 				else
 					cpu = CPU_M1SC;
 				break;
 			case 0x30:
 				cpu = CPU_M1;
 				break;
 			case 0x40:
 				/* MediaGX CPU */
 				cpu = CPU_M1SC;
 				break;
 			default:
 				/* M2 and later CPUs are treated as M2. */
 				cpu = CPU_M2;
 
 				/*
 				 * enable cpuid instruction.
 				 */
 				ccr3 = read_cyrix_reg(CCR3);
 				write_cyrix_reg(CCR3, CCR3_MAPEN0);
 				write_cyrix_reg(CCR4, read_cyrix_reg(CCR4) | CCR4_CPUID);
 				write_cyrix_reg(CCR3, ccr3);
 
 				do_cpuid(0, regs);
 				cpu_high = regs[0];	/* eax */
 				do_cpuid(1, regs);
 				cpu_id = regs[0];	/* eax */
 				cpu_feature = regs[3];	/* edx */
 				break;
 			}
 		}
 	} else if (cpu == CPU_486 && *cpu_vendor == '\0') {
 		/*
 		 * There are BlueLightning CPUs that do not change
 		 * undefined flags by dividing 5 by 2.  In this case,
 		 * the CPU identification routine in locore.s leaves
 		 * cpu_vendor null string and puts CPU_486 into the
 		 * cpu.
 		 */
 		if (identblue() == IDENTBLUE_IBMCPU) {
 			strcpy(cpu_vendor, "IBM");
 			cpu_vendor_id = CPU_VENDOR_IBM;
 			cpu = CPU_BLUE;
 			return;
 		}
 	}
 #endif
 }
 
 int
 pti_get_default(void)
 {
 
 	if (strcmp(cpu_vendor, AMD_VENDOR_ID) == 0 ||
 	    strcmp(cpu_vendor, HYGON_VENDOR_ID) == 0)
 		return (0);
 	if ((cpu_ia32_arch_caps & IA32_ARCH_CAP_RDCL_NO) != 0)
 		return (0);
 	return (1);
 }
 
 static u_int
 find_cpu_vendor_id(void)
 {
 	int	i;
 
 	for (i = 0; i < nitems(cpu_vendors); i++)
 		if (strcmp(cpu_vendor, cpu_vendors[i].vendor) == 0)
 			return (cpu_vendors[i].vendor_id);
 	return (0);
 }
 
 static void
 print_AMD_assoc(int i)
 {
 	if (i == 255)
 		printf(", fully associative\n");
 	else
 		printf(", %d-way associative\n", i);
 }
 
 static void
 print_AMD_l2_assoc(int i)
 {
 	switch (i & 0x0f) {
 	case 0: printf(", disabled/not present\n"); break;
 	case 1: printf(", direct mapped\n"); break;
 	case 2: printf(", 2-way associative\n"); break;
 	case 4: printf(", 4-way associative\n"); break;
 	case 6: printf(", 8-way associative\n"); break;
 	case 8: printf(", 16-way associative\n"); break;
 	case 15: printf(", fully associative\n"); break;
 	default: printf(", reserved configuration\n"); break;
 	}
 }
 
 static void
 print_AMD_info(void)
 {
 #ifdef __i386__
 	uint64_t amd_whcr;
 #endif
 	u_int regs[4];
 
 	if (cpu_exthigh >= 0x80000005) {
 		do_cpuid(0x80000005, regs);
 		printf("L1 2MB data TLB: %d entries", (regs[0] >> 16) & 0xff);
 		print_AMD_assoc(regs[0] >> 24);
 
 		printf("L1 2MB instruction TLB: %d entries", regs[0] & 0xff);
 		print_AMD_assoc((regs[0] >> 8) & 0xff);
 
 		printf("L1 4KB data TLB: %d entries", (regs[1] >> 16) & 0xff);
 		print_AMD_assoc(regs[1] >> 24);
 
 		printf("L1 4KB instruction TLB: %d entries", regs[1] & 0xff);
 		print_AMD_assoc((regs[1] >> 8) & 0xff);
 
 		printf("L1 data cache: %d kbytes", regs[2] >> 24);
 		printf(", %d bytes/line", regs[2] & 0xff);
 		printf(", %d lines/tag", (regs[2] >> 8) & 0xff);
 		print_AMD_assoc((regs[2] >> 16) & 0xff);
 
 		printf("L1 instruction cache: %d kbytes", regs[3] >> 24);
 		printf(", %d bytes/line", regs[3] & 0xff);
 		printf(", %d lines/tag", (regs[3] >> 8) & 0xff);
 		print_AMD_assoc((regs[3] >> 16) & 0xff);
 	}
 
 	if (cpu_exthigh >= 0x80000006) {
 		do_cpuid(0x80000006, regs);
 		if ((regs[0] >> 16) != 0) {
 			printf("L2 2MB data TLB: %d entries",
 			    (regs[0] >> 16) & 0xfff);
 			print_AMD_l2_assoc(regs[0] >> 28);
 			printf("L2 2MB instruction TLB: %d entries",
 			    regs[0] & 0xfff);
 			print_AMD_l2_assoc((regs[0] >> 28) & 0xf);
 		} else {
 			printf("L2 2MB unified TLB: %d entries",
 			    regs[0] & 0xfff);
 			print_AMD_l2_assoc((regs[0] >> 28) & 0xf);
 		}
 		if ((regs[1] >> 16) != 0) {
 			printf("L2 4KB data TLB: %d entries",
 			    (regs[1] >> 16) & 0xfff);
 			print_AMD_l2_assoc(regs[1] >> 28);
 
 			printf("L2 4KB instruction TLB: %d entries",
 			    (regs[1] >> 16) & 0xfff);
 			print_AMD_l2_assoc((regs[1] >> 28) & 0xf);
 		} else {
 			printf("L2 4KB unified TLB: %d entries",
 			    (regs[1] >> 16) & 0xfff);
 			print_AMD_l2_assoc((regs[1] >> 28) & 0xf);
 		}
 		printf("L2 unified cache: %d kbytes", regs[2] >> 16);
 		printf(", %d bytes/line", regs[2] & 0xff);
 		printf(", %d lines/tag", (regs[2] >> 8) & 0x0f);
 		print_AMD_l2_assoc((regs[2] >> 12) & 0x0f);
 	}
 
 #ifdef __i386__
 	if (((cpu_id & 0xf00) == 0x500)
 	    && (((cpu_id & 0x0f0) > 0x80)
 		|| (((cpu_id & 0x0f0) == 0x80)
 		    && (cpu_id & 0x00f) > 0x07))) {
 		/* K6-2(new core [Stepping 8-F]), K6-III or later */
 		amd_whcr = rdmsr(0xc0000082);
 		if (!(amd_whcr & (0x3ff << 22))) {
 			printf("Write Allocate Disable\n");
 		} else {
 			printf("Write Allocate Enable Limit: %dM bytes\n",
 			    (u_int32_t)((amd_whcr & (0x3ff << 22)) >> 22) * 4);
 			printf("Write Allocate 15-16M bytes: %s\n",
 			    (amd_whcr & (1 << 16)) ? "Enable" : "Disable");
 		}
 	} else if (((cpu_id & 0xf00) == 0x500)
 		   && ((cpu_id & 0x0f0) > 0x50)) {
 		/* K6, K6-2(old core) */
 		amd_whcr = rdmsr(0xc0000082);
 		if (!(amd_whcr & (0x7f << 1))) {
 			printf("Write Allocate Disable\n");
 		} else {
 			printf("Write Allocate Enable Limit: %dM bytes\n",
 			    (u_int32_t)((amd_whcr & (0x7f << 1)) >> 1) * 4);
 			printf("Write Allocate 15-16M bytes: %s\n",
 			    (amd_whcr & 0x0001) ? "Enable" : "Disable");
 			printf("Hardware Write Allocate Control: %s\n",
 			    (amd_whcr & 0x0100) ? "Enable" : "Disable");
 		}
 	}
 #endif
 	/*
 	 * Opteron Rev E shows a bug as in very rare occasions a read memory
 	 * barrier is not performed as expected if it is followed by a
 	 * non-atomic read-modify-write instruction.
 	 * As long as that bug pops up very rarely (intensive machine usage
 	 * on other operating systems generally generates one unexplainable
 	 * crash any 2 months) and as long as a model specific fix would be
 	 * impractical at this stage, print out a warning string if the broken
 	 * model and family are identified.
 	 */
 	if (CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) >= 0x20 &&
 	    CPUID_TO_MODEL(cpu_id) <= 0x3f)
 		printf("WARNING: This architecture revision has known SMP "
 		    "hardware bugs which may cause random instability\n");
 }
 
 static void
 print_INTEL_info(void)
 {
 	u_int regs[4];
 	u_int rounds, regnum;
 	u_int nwaycode, nway;
 
 	if (cpu_high >= 2) {
 		rounds = 0;
 		do {
 			do_cpuid(0x2, regs);
 			if (rounds == 0 && (rounds = (regs[0] & 0xff)) == 0)
 				break;	/* we have a buggy CPU */
 
 			for (regnum = 0; regnum <= 3; ++regnum) {
 				if (regs[regnum] & (1<<31))
 					continue;
 				if (regnum != 0)
 					print_INTEL_TLB(regs[regnum] & 0xff);
 				print_INTEL_TLB((regs[regnum] >> 8) & 0xff);
 				print_INTEL_TLB((regs[regnum] >> 16) & 0xff);
 				print_INTEL_TLB((regs[regnum] >> 24) & 0xff);
 			}
 		} while (--rounds > 0);
 	}
 
 	if (cpu_exthigh >= 0x80000006) {
 		do_cpuid(0x80000006, regs);
 		nwaycode = (regs[2] >> 12) & 0x0f;
 		if (nwaycode >= 0x02 && nwaycode <= 0x08)
 			nway = 1 << (nwaycode / 2);
 		else
 			nway = 0;
 		printf("L2 cache: %u kbytes, %u-way associative, %u bytes/line\n",
 		    (regs[2] >> 16) & 0xffff, nway, regs[2] & 0xff);
 	}
 }
 
 static void
 print_INTEL_TLB(u_int data)
 {
 	switch (data) {
 	case 0x0:
 	case 0x40:
 	default:
 		break;
 	case 0x1:
 		printf("Instruction TLB: 4 KB pages, 4-way set associative, 32 entries\n");
 		break;
 	case 0x2:
 		printf("Instruction TLB: 4 MB pages, fully associative, 2 entries\n");
 		break;
 	case 0x3:
 		printf("Data TLB: 4 KB pages, 4-way set associative, 64 entries\n");
 		break;
 	case 0x4:
 		printf("Data TLB: 4 MB Pages, 4-way set associative, 8 entries\n");
 		break;
 	case 0x6:
 		printf("1st-level instruction cache: 8 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x8:
 		printf("1st-level instruction cache: 16 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x9:
 		printf("1st-level instruction cache: 32 KB, 4-way set associative, 64 byte line size\n");
 		break;
 	case 0xa:
 		printf("1st-level data cache: 8 KB, 2-way set associative, 32 byte line size\n");
 		break;
 	case 0xb:
 		printf("Instruction TLB: 4 MByte pages, 4-way set associative, 4 entries\n");
 		break;
 	case 0xc:
 		printf("1st-level data cache: 16 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0xd:
 		printf("1st-level data cache: 16 KBytes, 4-way set associative, 64 byte line size");
 		break;
 	case 0xe:
 		printf("1st-level data cache: 24 KBytes, 6-way set associative, 64 byte line size\n");
 		break;
 	case 0x1d:
 		printf("2nd-level cache: 128 KBytes, 2-way set associative, 64 byte line size\n");
 		break;
 	case 0x21:
 		printf("2nd-level cache: 256 KBytes, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0x22:
 		printf("3rd-level cache: 512 KB, 4-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x23:
 		printf("3rd-level cache: 1 MB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x24:
 		printf("2nd-level cache: 1 MBytes, 16-way set associative, 64 byte line size\n");
 		break;
 	case 0x25:
 		printf("3rd-level cache: 2 MB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x29:
 		printf("3rd-level cache: 4 MB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x2c:
 		printf("1st-level data cache: 32 KB, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0x30:
 		printf("1st-level instruction cache: 32 KB, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0x39: /* De-listed in SDM rev. 54 */
 		printf("2nd-level cache: 128 KB, 4-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x3b: /* De-listed in SDM rev. 54 */
 		printf("2nd-level cache: 128 KB, 2-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x3c: /* De-listed in SDM rev. 54 */
 		printf("2nd-level cache: 256 KB, 4-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x41:
 		printf("2nd-level cache: 128 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x42:
 		printf("2nd-level cache: 256 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x43:
 		printf("2nd-level cache: 512 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x44:
 		printf("2nd-level cache: 1 MB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x45:
 		printf("2nd-level cache: 2 MB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x46:
 		printf("3rd-level cache: 4 MB, 4-way set associative, 64 byte line size\n");
 		break;
 	case 0x47:
 		printf("3rd-level cache: 8 MB, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0x48:
 		printf("2nd-level cache: 3MByte, 12-way set associative, 64 byte line size\n");
 		break;
 	case 0x49:
 		if (CPUID_TO_FAMILY(cpu_id) == 0xf &&
 		    CPUID_TO_MODEL(cpu_id) == 0x6)
 			printf("3rd-level cache: 4MB, 16-way set associative, 64-byte line size\n");
 		else
 			printf("2nd-level cache: 4 MByte, 16-way set associative, 64 byte line size");
 		break;
 	case 0x4a:
 		printf("3rd-level cache: 6MByte, 12-way set associative, 64 byte line size\n");
 		break;
 	case 0x4b:
 		printf("3rd-level cache: 8MByte, 16-way set associative, 64 byte line size\n");
 		break;
 	case 0x4c:
 		printf("3rd-level cache: 12MByte, 12-way set associative, 64 byte line size\n");
 		break;
 	case 0x4d:
 		printf("3rd-level cache: 16MByte, 16-way set associative, 64 byte line size\n");
 		break;
 	case 0x4e:
 		printf("2nd-level cache: 6MByte, 24-way set associative, 64 byte line size\n");
 		break;
 	case 0x4f:
 		printf("Instruction TLB: 4 KByte pages, 32 entries\n");
 		break;
 	case 0x50:
 		printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 64 entries\n");
 		break;
 	case 0x51:
 		printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 128 entries\n");
 		break;
 	case 0x52:
 		printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 256 entries\n");
 		break;
 	case 0x55:
 		printf("Instruction TLB: 2-MByte or 4-MByte pages, fully associative, 7 entries\n");
 		break;
 	case 0x56:
 		printf("Data TLB0: 4 MByte pages, 4-way set associative, 16 entries\n");
 		break;
 	case 0x57:
 		printf("Data TLB0: 4 KByte pages, 4-way associative, 16 entries\n");
 		break;
 	case 0x59:
 		printf("Data TLB0: 4 KByte pages, fully associative, 16 entries\n");
 		break;
 	case 0x5a:
 		printf("Data TLB0: 2-MByte or 4 MByte pages, 4-way set associative, 32 entries\n");
 		break;
 	case 0x5b:
 		printf("Data TLB: 4 KB or 4 MB pages, fully associative, 64 entries\n");
 		break;
 	case 0x5c:
 		printf("Data TLB: 4 KB or 4 MB pages, fully associative, 128 entries\n");
 		break;
 	case 0x5d:
 		printf("Data TLB: 4 KB or 4 MB pages, fully associative, 256 entries\n");
 		break;
 	case 0x60:
 		printf("1st-level data cache: 16 KB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x61:
 		printf("Instruction TLB: 4 KByte pages, fully associative, 48 entries\n");
 		break;
 	case 0x63:
 		printf("Data TLB: 2 MByte or 4 MByte pages, 4-way set associative, 32 entries and a separate array with 1 GByte pages, 4-way set associative, 4 entries\n");
 		break;
 	case 0x64:
 		printf("Data TLB: 4 KBytes pages, 4-way set associative, 512 entries\n");
 		break;
 	case 0x66:
 		printf("1st-level data cache: 8 KB, 4-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x67:
 		printf("1st-level data cache: 16 KB, 4-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x68:
 		printf("1st-level data cache: 32 KB, 4 way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x6a:
 		printf("uTLB: 4KByte pages, 8-way set associative, 64 entries\n");
 		break;
 	case 0x6b:
 		printf("DTLB: 4KByte pages, 8-way set associative, 256 entries\n");
 		break;
 	case 0x6c:
 		printf("DTLB: 2M/4M pages, 8-way set associative, 128 entries\n");
 		break;
 	case 0x6d:
 		printf("DTLB: 1 GByte pages, fully associative, 16 entries\n");
 		break;
 	case 0x70:
 		printf("Trace cache: 12K-uops, 8-way set associative\n");
 		break;
 	case 0x71:
 		printf("Trace cache: 16K-uops, 8-way set associative\n");
 		break;
 	case 0x72:
 		printf("Trace cache: 32K-uops, 8-way set associative\n");
 		break;
 	case 0x76:
 		printf("Instruction TLB: 2M/4M pages, fully associative, 8 entries\n");
 		break;
 	case 0x78:
 		printf("2nd-level cache: 1 MB, 4-way set associative, 64-byte line size\n");
 		break;
 	case 0x79:
 		printf("2nd-level cache: 128 KB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x7a:
 		printf("2nd-level cache: 256 KB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x7b:
 		printf("2nd-level cache: 512 KB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x7c:
 		printf("2nd-level cache: 1 MB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x7d:
 		printf("2nd-level cache: 2-MB, 8-way set associative, 64-byte line size\n");
 		break;
 	case 0x7f:
 		printf("2nd-level cache: 512-KB, 2-way set associative, 64-byte line size\n");
 		break;
 	case 0x80:
 		printf("2nd-level cache: 512 KByte, 8-way set associative, 64-byte line size\n");
 		break;
 	case 0x82:
 		printf("2nd-level cache: 256 KB, 8-way set associative, 32 byte line size\n");
 		break;
 	case 0x83:
 		printf("2nd-level cache: 512 KB, 8-way set associative, 32 byte line size\n");
 		break;
 	case 0x84:
 		printf("2nd-level cache: 1 MB, 8-way set associative, 32 byte line size\n");
 		break;
 	case 0x85:
 		printf("2nd-level cache: 2 MB, 8-way set associative, 32 byte line size\n");
 		break;
 	case 0x86:
 		printf("2nd-level cache: 512 KB, 4-way set associative, 64 byte line size\n");
 		break;
 	case 0x87:
 		printf("2nd-level cache: 1 MB, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0xa0:
 		printf("DTLB: 4k pages, fully associative, 32 entries\n");
 		break;
 	case 0xb0:
 		printf("Instruction TLB: 4 KB Pages, 4-way set associative, 128 entries\n");
 		break;
 	case 0xb1:
 		printf("Instruction TLB: 2M pages, 4-way, 8 entries or 4M pages, 4-way, 4 entries\n");
 		break;
 	case 0xb2:
 		printf("Instruction TLB: 4KByte pages, 4-way set associative, 64 entries\n");
 		break;
 	case 0xb3:
 		printf("Data TLB: 4 KB Pages, 4-way set associative, 128 entries\n");
 		break;
 	case 0xb4:
 		printf("Data TLB1: 4 KByte pages, 4-way associative, 256 entries\n");
 		break;
 	case 0xb5:
 		printf("Instruction TLB: 4KByte pages, 8-way set associative, 64 entries\n");
 		break;
 	case 0xb6:
 		printf("Instruction TLB: 4KByte pages, 8-way set associative, 128 entries\n");
 		break;
 	case 0xba:
 		printf("Data TLB1: 4 KByte pages, 4-way associative, 64 entries\n");
 		break;
 	case 0xc0:
 		printf("Data TLB: 4 KByte and 4 MByte pages, 4-way associative, 8 entries\n");
 		break;
 	case 0xc1:
 		printf("Shared 2nd-Level TLB: 4 KByte/2MByte pages, 8-way associative, 1024 entries\n");
 		break;
 	case 0xc2:
 		printf("DTLB: 4 KByte/2 MByte pages, 4-way associative, 16 entries\n");
 		break;
 	case 0xc3:
 		printf("Shared 2nd-Level TLB: 4 KByte /2 MByte pages, 6-way associative, 1536 entries. Also 1GBbyte pages, 4-way, 16 entries\n");
 		break;
 	case 0xc4:
 		printf("DTLB: 2M/4M Byte pages, 4-way associative, 32 entries\n");
 		break;
 	case 0xca:
 		printf("Shared 2nd-Level TLB: 4 KByte pages, 4-way associative, 512 entries\n");
 		break;
 	case 0xd0:
 		printf("3rd-level cache: 512 KByte, 4-way set associative, 64 byte line size\n");
 		break;
 	case 0xd1:
 		printf("3rd-level cache: 1 MByte, 4-way set associative, 64 byte line size\n");
 		break;
 	case 0xd2:
 		printf("3rd-level cache: 2 MByte, 4-way set associative, 64 byte line size\n");
 		break;
 	case 0xd6:
 		printf("3rd-level cache: 1 MByte, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0xd7:
 		printf("3rd-level cache: 2 MByte, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0xd8:
 		printf("3rd-level cache: 4 MByte, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0xdc:
 		printf("3rd-level cache: 1.5 MByte, 12-way set associative, 64 byte line size\n");
 		break;
 	case 0xdd:
 		printf("3rd-level cache: 3 MByte, 12-way set associative, 64 byte line size\n");
 		break;
 	case 0xde:
 		printf("3rd-level cache: 6 MByte, 12-way set associative, 64 byte line size\n");
 		break;
 	case 0xe2:
 		printf("3rd-level cache: 2 MByte, 16-way set associative, 64 byte line size\n");
 		break;
 	case 0xe3:
 		printf("3rd-level cache: 4 MByte, 16-way set associative, 64 byte line size\n");
 		break;
 	case 0xe4:
 		printf("3rd-level cache: 8 MByte, 16-way set associative, 64 byte line size\n");
 		break;
 	case 0xea:
 		printf("3rd-level cache: 12MByte, 24-way set associative, 64 byte line size\n");
 		break;
 	case 0xeb:
 		printf("3rd-level cache: 18MByte, 24-way set associative, 64 byte line size\n");
 		break;
 	case 0xec:
 		printf("3rd-level cache: 24MByte, 24-way set associative, 64 byte line size\n");
 		break;
 	case 0xf0:
 		printf("64-Byte prefetching\n");
 		break;
 	case 0xf1:
 		printf("128-Byte prefetching\n");
 		break;
 	}
 }
 
 static void
 print_svm_info(void)
 {
 	u_int features, regs[4];
 	uint64_t msr;
 	int comma;
 
 	printf("\n  SVM: ");
 	do_cpuid(0x8000000A, regs);
 	features = regs[3];
 
 	msr = rdmsr(MSR_VM_CR);
 	if ((msr & VM_CR_SVMDIS) == VM_CR_SVMDIS)
 		printf("(disabled in BIOS) ");
 
 	if (!bootverbose) {
 		comma = 0;
 		if (features & (1 << 0)) {
 			printf("%sNP", comma ? "," : "");
                         comma = 1; 
 		}
 		if (features & (1 << 3)) {
 			printf("%sNRIP", comma ? "," : "");
                         comma = 1; 
 		}
 		if (features & (1 << 5)) {
 			printf("%sVClean", comma ? "," : "");
                         comma = 1; 
 		}
 		if (features & (1 << 6)) {
 			printf("%sAFlush", comma ? "," : "");
                         comma = 1; 
 		}
 		if (features & (1 << 7)) {
 			printf("%sDAssist", comma ? "," : "");
                         comma = 1; 
 		}
 		printf("%sNAsids=%d", comma ? "," : "", regs[1]);
 		return;
 	}
 
 	printf("Features=0x%b", features,
 	       "\020"
 	       "\001NP"			/* Nested paging */
 	       "\002LbrVirt"		/* LBR virtualization */
 	       "\003SVML"		/* SVM lock */
 	       "\004NRIPS"		/* NRIP save */
 	       "\005TscRateMsr"		/* MSR based TSC rate control */
 	       "\006VmcbClean"		/* VMCB clean bits */
 	       "\007FlushByAsid"	/* Flush by ASID */
 	       "\010DecodeAssist"	/* Decode assist */
 	       "\011<b8>"
 	       "\012<b9>"
 	       "\013PauseFilter"	/* PAUSE intercept filter */    
 	       "\014EncryptedMcodePatch"
 	       "\015PauseFilterThreshold" /* PAUSE filter threshold */
 	       "\016AVIC"		/* virtual interrupt controller */
 	       "\017<b14>"
 	       "\020V_VMSAVE_VMLOAD"
 	       "\021vGIF"
 	       "\022GMET"		/* Guest Mode Execute Trap */
 	       "\023<b18>"
 	       "\024<b19>"
 	       "\025<b20>"
 	       "\026<b21>"
 	       "\027<b22>"
 	       "\030<b23>"
 	       "\031<b24>"
 	       "\032<b25>"
 	       "\033<b26>"
 	       "\034<b27>"
 	       "\035<b28>"
 	       "\036<b29>"
 	       "\037<b30>"
 	       "\040<b31>"
                 );
 	printf("\nRevision=%d, ASIDs=%d", regs[0] & 0xff, regs[1]);
 }
 
 #ifdef __i386__
 static void
 print_transmeta_info(void)
 {
 	u_int regs[4], nreg = 0;
 
 	do_cpuid(0x80860000, regs);
 	nreg = regs[0];
 	if (nreg >= 0x80860001) {
 		do_cpuid(0x80860001, regs);
 		printf("  Processor revision %u.%u.%u.%u\n",
 		       (regs[1] >> 24) & 0xff,
 		       (regs[1] >> 16) & 0xff,
 		       (regs[1] >> 8) & 0xff,
 		       regs[1] & 0xff);
 	}
 	if (nreg >= 0x80860002) {
 		do_cpuid(0x80860002, regs);
 		printf("  Code Morphing Software revision %u.%u.%u-%u-%u\n",
 		       (regs[1] >> 24) & 0xff,
 		       (regs[1] >> 16) & 0xff,
 		       (regs[1] >> 8) & 0xff,
 		       regs[1] & 0xff,
 		       regs[2]);
 	}
 	if (nreg >= 0x80860006) {
 		char info[65];
 		do_cpuid(0x80860003, (u_int*) &info[0]);
 		do_cpuid(0x80860004, (u_int*) &info[16]);
 		do_cpuid(0x80860005, (u_int*) &info[32]);
 		do_cpuid(0x80860006, (u_int*) &info[48]);
 		info[64] = 0;
 		printf("  %s\n", info);
 	}
 }
 #endif
 
 static void
 print_via_padlock_info(void)
 {
 	u_int regs[4];
 
 	do_cpuid(0xc0000001, regs);
 	printf("\n  VIA Padlock Features=0x%b", regs[3],
 	"\020"
 	"\003RNG"		/* RNG */
 	"\007AES"		/* ACE */
 	"\011AES-CTR"		/* ACE2 */
 	"\013SHA1,SHA256"	/* PHE */
 	"\015RSA"		/* PMM */
 	);
 }
 
 static uint32_t
 vmx_settable(uint64_t basic, int msr, int true_msr)
 {
 	uint64_t val;
 
 	if (basic & (1ULL << 55))
 		val = rdmsr(true_msr);
 	else
 		val = rdmsr(msr);
 
 	/* Just report the controls that can be set to 1. */
 	return (val >> 32);
 }
 
 static void
 print_vmx_info(void)
 {
 	uint64_t basic, msr;
 	uint32_t entry, exit, mask, pin, proc, proc2;
 	int comma;
 
 	printf("\n  VT-x: ");
 	msr = rdmsr(MSR_IA32_FEATURE_CONTROL);
 	if (!(msr & IA32_FEATURE_CONTROL_VMX_EN))
 		printf("(disabled in BIOS) ");
 	basic = rdmsr(MSR_VMX_BASIC);
 	pin = vmx_settable(basic, MSR_VMX_PINBASED_CTLS,
 	    MSR_VMX_TRUE_PINBASED_CTLS);
 	proc = vmx_settable(basic, MSR_VMX_PROCBASED_CTLS,
 	    MSR_VMX_TRUE_PROCBASED_CTLS);
 	if (proc & PROCBASED_SECONDARY_CONTROLS)
 		proc2 = vmx_settable(basic, MSR_VMX_PROCBASED_CTLS2,
 		    MSR_VMX_PROCBASED_CTLS2);
 	else
 		proc2 = 0;
 	exit = vmx_settable(basic, MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS);
 	entry = vmx_settable(basic, MSR_VMX_ENTRY_CTLS, MSR_VMX_TRUE_ENTRY_CTLS);
 
 	if (!bootverbose) {
 		comma = 0;
 		if (exit & VM_EXIT_SAVE_PAT && exit & VM_EXIT_LOAD_PAT &&
 		    entry & VM_ENTRY_LOAD_PAT) {
 			printf("%sPAT", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc & PROCBASED_HLT_EXITING) {
 			printf("%sHLT", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc & PROCBASED_MTF) {
 			printf("%sMTF", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc & PROCBASED_PAUSE_EXITING) {
 			printf("%sPAUSE", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc2 & PROCBASED2_ENABLE_EPT) {
 			printf("%sEPT", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc2 & PROCBASED2_UNRESTRICTED_GUEST) {
 			printf("%sUG", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc2 & PROCBASED2_ENABLE_VPID) {
 			printf("%sVPID", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc & PROCBASED_USE_TPR_SHADOW &&
 		    proc2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES &&
 		    proc2 & PROCBASED2_VIRTUALIZE_X2APIC_MODE &&
 		    proc2 & PROCBASED2_APIC_REGISTER_VIRTUALIZATION &&
 		    proc2 & PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY) {
 			printf("%sVID", comma ? "," : "");
 			comma = 1;
 			if (pin & PINBASED_POSTED_INTERRUPT)
 				printf(",PostIntr");
 		}
 		return;
 	}
 
 	mask = basic >> 32;
 	printf("Basic Features=0x%b", mask,
 	"\020"
 	"\02132PA"		/* 32-bit physical addresses */
 	"\022SMM"		/* SMM dual-monitor */
 	"\027INS/OUTS"		/* VM-exit info for INS and OUTS */
 	"\030TRUE"		/* TRUE_CTLS MSRs */
 	);
 	printf("\n        Pin-Based Controls=0x%b", pin,
 	"\020"
 	"\001ExtINT"		/* External-interrupt exiting */
 	"\004NMI"		/* NMI exiting */
 	"\006VNMI"		/* Virtual NMIs */
 	"\007PreTmr"		/* Activate VMX-preemption timer */
 	"\010PostIntr"		/* Process posted interrupts */
 	);
 	printf("\n        Primary Processor Controls=0x%b", proc,
 	"\020"
 	"\003INTWIN"		/* Interrupt-window exiting */
 	"\004TSCOff"		/* Use TSC offsetting */
 	"\010HLT"		/* HLT exiting */
 	"\012INVLPG"		/* INVLPG exiting */
 	"\013MWAIT"		/* MWAIT exiting */
 	"\014RDPMC"		/* RDPMC exiting */
 	"\015RDTSC"		/* RDTSC exiting */
 	"\020CR3-LD"		/* CR3-load exiting */
 	"\021CR3-ST"		/* CR3-store exiting */
 	"\024CR8-LD"		/* CR8-load exiting */
 	"\025CR8-ST"		/* CR8-store exiting */
 	"\026TPR"		/* Use TPR shadow */
 	"\027NMIWIN"		/* NMI-window exiting */
 	"\030MOV-DR"		/* MOV-DR exiting */
 	"\031IO"		/* Unconditional I/O exiting */
 	"\032IOmap"		/* Use I/O bitmaps */
 	"\034MTF"		/* Monitor trap flag */
 	"\035MSRmap"		/* Use MSR bitmaps */
 	"\036MONITOR"		/* MONITOR exiting */
 	"\037PAUSE"		/* PAUSE exiting */
 	);
 	if (proc & PROCBASED_SECONDARY_CONTROLS)
 		printf("\n        Secondary Processor Controls=0x%b", proc2,
 		"\020"
 		"\001APIC"		/* Virtualize APIC accesses */
 		"\002EPT"		/* Enable EPT */
 		"\003DT"		/* Descriptor-table exiting */
 		"\004RDTSCP"		/* Enable RDTSCP */
 		"\005x2APIC"		/* Virtualize x2APIC mode */
 		"\006VPID"		/* Enable VPID */
 		"\007WBINVD"		/* WBINVD exiting */
 		"\010UG"		/* Unrestricted guest */
 		"\011APIC-reg"		/* APIC-register virtualization */
 		"\012VID"		/* Virtual-interrupt delivery */
 		"\013PAUSE-loop"	/* PAUSE-loop exiting */
 		"\014RDRAND"		/* RDRAND exiting */
 		"\015INVPCID"		/* Enable INVPCID */
 		"\016VMFUNC"		/* Enable VM functions */
 		"\017VMCS"		/* VMCS shadowing */
 		"\020EPT#VE"		/* EPT-violation #VE */
 		"\021XSAVES"		/* Enable XSAVES/XRSTORS */
 		);
 	printf("\n        Exit Controls=0x%b", mask,
 	"\020"
 	"\003DR"		/* Save debug controls */
 				/* Ignore Host address-space size */
 	"\015PERF"		/* Load MSR_PERF_GLOBAL_CTRL */
 	"\020AckInt"		/* Acknowledge interrupt on exit */
 	"\023PAT-SV"		/* Save MSR_PAT */
 	"\024PAT-LD"		/* Load MSR_PAT */
 	"\025EFER-SV"		/* Save MSR_EFER */
 	"\026EFER-LD"		/* Load MSR_EFER */
 	"\027PTMR-SV"		/* Save VMX-preemption timer value */
 	);
 	printf("\n        Entry Controls=0x%b", mask,
 	"\020"
 	"\003DR"		/* Save debug controls */
 				/* Ignore IA-32e mode guest */
 				/* Ignore Entry to SMM */
 				/* Ignore Deactivate dual-monitor treatment */
 	"\016PERF"		/* Load MSR_PERF_GLOBAL_CTRL */
 	"\017PAT"		/* Load MSR_PAT */
 	"\020EFER"		/* Load MSR_EFER */
 	);
 	if (proc & PROCBASED_SECONDARY_CONTROLS &&
 	    (proc2 & (PROCBASED2_ENABLE_EPT | PROCBASED2_ENABLE_VPID)) != 0) {
 		msr = rdmsr(MSR_VMX_EPT_VPID_CAP);
 		mask = msr;
 		printf("\n        EPT Features=0x%b", mask,
 		"\020"
 		"\001XO"		/* Execute-only translations */
 		"\007PW4"		/* Page-walk length of 4 */
 		"\011UC"		/* EPT paging-structure mem can be UC */
 		"\017WB"		/* EPT paging-structure mem can be WB */
 		"\0212M"		/* EPT PDE can map a 2-Mbyte page */
 		"\0221G"		/* EPT PDPTE can map a 1-Gbyte page */
 		"\025INVEPT"		/* INVEPT is supported */
 		"\026AD"		/* Accessed and dirty flags for EPT */
 		"\032single"		/* INVEPT single-context type */
 		"\033all"		/* INVEPT all-context type */
 		);
 		mask = msr >> 32;
 		printf("\n        VPID Features=0x%b", mask,
 		"\020"
 		"\001INVVPID"		/* INVVPID is supported */
 		"\011individual"	/* INVVPID individual-address type */
 		"\012single"		/* INVVPID single-context type */
 		"\013all"		/* INVVPID all-context type */
 		 /* INVVPID single-context-retaining-globals type */
 		"\014single-globals"
 		);
 	}
 }
 
 static void
 print_hypervisor_info(void)
 {
 
 	if (*hv_vendor != '\0')
 		printf("Hypervisor: Origin = \"%s\"\n", hv_vendor);
 }
 
 /*
  * Returns the maximum physical address that can be used with the
  * current system.
  */
 vm_paddr_t
 cpu_getmaxphyaddr(void)
 {
 
 #if defined(__i386__)
 	if (!pae_mode)
 		return (0xffffffff);
 #endif
 	return ((1ULL << cpu_maxphyaddr) - 1);
 }
Index: projects/clang1000-import/tests/sys/net/if_epair.c
===================================================================
--- projects/clang1000-import/tests/sys/net/if_epair.c	(revision 357389)
+++ projects/clang1000-import/tests/sys/net/if_epair.c	(revision 357390)
@@ -1,75 +1,75 @@
 /*-
  * Copyright (c) 2020   Kristof Provost <kp@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/ioctl.h>
 #include <sys/linker.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/types.h>
 
 #include <net/if.h>
 
 #include <errno.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <strings.h>
 
 #include <atf-c.h>
 
 ATF_TC(params);
 ATF_TC_HEAD(params, tc)
 {
         atf_tc_set_md_var(tc, "require.user", "root");
 }
 
 ATF_TC_BODY(params, tc)
 {
 	struct ifreq ifr;
 	int s;
 
 	s = kldload("if_epair");
-	if (s != 0 && errno != EEXIST)
+	if (s == -1 && errno != EEXIST)
 		atf_tc_fail("Failed to load if_epair");
 
 	s = socket(AF_INET, SOCK_DGRAM, 0);
 	if (s < 0)
 		atf_tc_fail("Failed to create socket");
 
         bzero(&ifr, sizeof(ifr));
 	ifr.ifr_data = (caddr_t)-1;
         (void) strlcpy(ifr.ifr_name, "epair", sizeof(ifr.ifr_name));
 
 	ioctl(s, SIOCIFCREATE2, &ifr);
 }
 
 ATF_TP_ADD_TCS(tp)
 {
         ATF_TP_ADD_TC(tp, params);
 
 	return (atf_no_error());
 }
Index: projects/clang1000-import
===================================================================
--- projects/clang1000-import	(revision 357389)
+++ projects/clang1000-import	(revision 357390)

Property changes on: projects/clang1000-import
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r357368-357388