Index: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/lz4.c
===================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/lz4.c	(revision 262163)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/lz4.c	(revision 262164)
@@ -1,1003 +1,1025 @@
 /*
  * LZ4 - Fast LZ compression algorithm
  * Header File
  * Copyright (C) 2011-2013, Yann Collet.
  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  *     * Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * You can contact the author at :
  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
  * - LZ4 source repository : http://code.google.com/p/lz4/
  */
 
 #include <sys/zfs_context.h>
 
 static int real_LZ4_compress(const char *source, char *dest, int isize,
     int osize);
 static int LZ4_compressBound(int isize);
 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
     int isize, int maxOutputSize);
 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
     int isize, int osize);
 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
     int isize, int osize);
 
+static kmem_cache_t *lz4_ctx_cache;
+
 /*ARGSUSED*/
 size_t
 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
 {
 	uint32_t bufsiz;
 	char *dest = d_start;
 
 	ASSERT(d_len >= sizeof (bufsiz));
 
 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
 	    d_len - sizeof (bufsiz));
 
 	/* Signal an error if the compression routine returned zero. */
 	if (bufsiz == 0)
 		return (s_len);
 
 	/*
 	 * Encode the compresed buffer size at the start. We'll need this in
 	 * decompression to counter the effects of padding which might be
 	 * added to the compressed buffer and which, if unhandled, would
 	 * confuse the hell out of our decompression function.
 	 */
 	*(uint32_t *)dest = BE_32(bufsiz);
 
 	return (bufsiz + sizeof (bufsiz));
 }
 
 /*ARGSUSED*/
 int
 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
 {
 	const char *src = s_start;
 	uint32_t bufsiz = BE_IN32(src);
 
 	/* invalid compressed buffer size encoded at start */
 	if (bufsiz + sizeof (bufsiz) > s_len)
 		return (1);
 
 	/*
 	 * Returns 0 on success (decompression function returned non-negative)
 	 * and non-zero on failure (decompression function returned negative.
 	 */
 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
 	    d_start, bufsiz, d_len) < 0);
 }
 
 /*
  * LZ4 API Description:
  *
  * Simple Functions:
  * real_LZ4_compress() :
  * 	isize  : is the input size. Max supported value is ~1.9GB
  * 	return : the number of bytes written in buffer dest
  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
  * 	note : destination buffer must be already allocated.
  * 		destination buffer must be sized to handle worst cases
  * 		situations (input data not compressible) worst case size
  * 		evaluation is provided by function LZ4_compressBound().
  *
  * Advanced Functions
  *
  * LZ4_compressBound() :
  * 	Provides the maximum size that LZ4 may output in a "worst case"
  * 	scenario (input data not compressible) primarily useful for memory
  * 	allocation of output buffer.
  *
  * 	isize  : is the input size. Max supported value is ~1.9GB
  * 	return : maximum output size in a "worst case" scenario
  * 	note : this function is limited by "int" range (2^31-1)
  *
  * LZ4_uncompress_unknownOutputSize() :
  * 	isize  : is the input size, therefore the compressed size
  * 	maxOutputSize : is the size of the destination buffer (which must be
  * 		already allocated)
  * 	return : the number of bytes decoded in the destination buffer
  * 		(necessarily <= maxOutputSize). If the source stream is
  * 		malformed, the function will stop decoding and return a
  * 		negative result, indicating the byte position of the faulty
  * 		instruction. This function never writes beyond dest +
  * 		maxOutputSize, and is therefore protected against malicious
  * 		data packets.
  * 	note   : Destination buffer must be already allocated.
  *
  * LZ4_compressCtx() :
  * 	This function explicitly handles the CTX memory structure.
  *
  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
  * 	isn't valid.
  *
  * LZ4_compress64kCtx() :
  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
  *
  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
  * 	isn't valid.
  */
 
 /*
  * Tuning parameters
  */
 
 /*
  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
  *	 Lowering this value reduces memory usage. Reduced memory usage
  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
  *	(examples : 12 -> 16KB ; 17 -> 512KB)
  */
 #define	COMPRESSIONLEVEL 12
 
 /*
  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
  *	algorithm skip faster data segments considered "incompressible".
  *	This may decrease compression ratio dramatically, but will be
  *	faster on incompressible data. Increasing this value will make
  *	the algorithm search more before declaring a segment "incompressible".
  *	This could improve compression a bit, but will be slower on
  *	incompressible data. The default value (6) is recommended.
  */
 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
 
 /*
  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
  * performance for big endian cpu, but the resulting compressed stream
  * will be incompatible with little-endian CPU. You can set this option
  * to 1 in situations where data will stay within closed environment.
  * This option is useless on Little_Endian CPU (such as x86).
  */
 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
 
 /*
  * CPU Feature Detection
  */
 
 /* 32 or 64 bits ? */
 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
     defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
     defined(__LP64__) || defined(_LP64))
 #define	LZ4_ARCH64 1
 /*
  * Illumos: On amd64 we have 20k of stack and 24k on sun4u and sun4v, so we
  * can spend 16k on the algorithm
  */
 /* FreeBSD: Use heap for all platforms for now */
 #define	STACKLIMIT 0
 #else
 #define	LZ4_ARCH64 0
 /*
  * Illumos: On i386 we only have 12k of stack, so in order to maintain the
  * same COMPRESSIONLEVEL we have to use heap allocation. Performance will
  * suck, but alas, it's ZFS on 32-bit we're talking about, so...
  */
 #define	STACKLIMIT 0
 #endif
 
 /*
  * Little Endian or Big Endian?
  * Note: overwrite the below #define if you know your architecture endianess.
  */
 #if BYTE_ORDER == BIG_ENDIAN
 #define	LZ4_BIG_ENDIAN 1
 #else
 /*
  * Little Endian assumed. PDP Endian and other very rare endian format
  * are unsupported.
  */
 #endif
 
 /*
  * Unaligned memory access is automatically enabled for "common" CPU,
  * such as x86. For others CPU, the compiler will be more cautious, and
  * insert extra code to ensure aligned access is respected. If you know
  * your target CPU supports unaligned memory access, you may want to
  * force this option manually to improve performance
  */
 #if defined(__ARM_FEATURE_UNALIGNED)
 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
 #endif
 
 /*
  * FreeBSD: can't use GCC's __builtin_ctz when using sparc64 because
  * gcc currently rely on libcompiler_rt.
  *
  * TODO: revisit this when situation changes.
  */
 #if defined(__sparc64__)
 #define	LZ4_FORCE_SW_BITCOUNT
 #endif
 
 /*
  * Compiler Options
  */
 #if __STDC_VERSION__ >= 199901L	/* C99 */
 /* "restrict" is a known keyword */
 #else
 /* Disable restrict */
 #define	restrict
 #endif
 
 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
 	(((x) & 0xffu) << 8)))
 
 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
 
 #if defined(likely)
 #undef likely
 #endif
 #if defined(unlikely)
 #undef unlikely
 #endif
 
 #define	likely(expr)	expect((expr) != 0, 1)
 #define	unlikely(expr)	expect((expr) != 0, 0)
 
 /* Basic types */
 #define	BYTE	uint8_t
 #define	U16	uint16_t
 #define	U32	uint32_t
 #define	S32	int32_t
 #define	U64	uint64_t
 
 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
 #pragma pack(1)
 #endif
 
 typedef struct _U16_S {
 	U16 v;
 } U16_S;
 typedef struct _U32_S {
 	U32 v;
 } U32_S;
 typedef struct _U64_S {
 	U64 v;
 } U64_S;
 
 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
 #pragma pack()
 #endif
 
 #define	A64(x) (((U64_S *)(x))->v)
 #define	A32(x) (((U32_S *)(x))->v)
 #define	A16(x) (((U16_S *)(x))->v)
 
 /*
  * Constants
  */
 #define	MINMATCH 4
 
 #define	HASH_LOG COMPRESSIONLEVEL
 #define	HASHTABLESIZE (1 << HASH_LOG)
 #define	HASH_MASK (HASHTABLESIZE - 1)
 
 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
 
 /*
  * Defines if memory is allocated into the stack (local variable),
  * or into the heap (kmem_alloc()).
  */
 #define	HEAPMODE (HASH_LOG > STACKLIMIT)
 #define	COPYLENGTH 8
 #define	LASTLITERALS 5
 #define	MFLIMIT (COPYLENGTH + MINMATCH)
 #define	MINLENGTH (MFLIMIT + 1)
 
 #define	MAXD_LOG 16
 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
 
 #define	ML_BITS 4
 #define	ML_MASK ((1U<<ML_BITS)-1)
 #define	RUN_BITS (8-ML_BITS)
 #define	RUN_MASK ((1U<<RUN_BITS)-1)
 
 
 /*
  * Architecture-specific macros
  */
 #if LZ4_ARCH64
 #define	STEPSIZE 8
 #define	UARCH U64
 #define	AARCH A64
 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
 #define	HTYPE U32
 #define	INITBASE(base)		const BYTE* const base = ip
 #else /* !LZ4_ARCH64 */
 #define	STEPSIZE 4
 #define	UARCH U32
 #define	AARCH A32
 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
 #define	HTYPE const BYTE *
 #define	INITBASE(base)		const int base = 0
 #endif /* !LZ4_ARCH64 */
 
 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
 #else
 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
 #endif
 
 
 /* Local structures */
 struct refTables {
 	HTYPE hashTable[HASHTABLESIZE];
 };
 
 
 /* Macros */
 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
 	HASH_LOG))
 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
 	d = e; }
 
 
 /* Private functions */
 #if LZ4_ARCH64
 
 static inline int
 LZ4_NbCommonBytes(register U64 val)
 {
 #if defined(LZ4_BIG_ENDIAN)
 #if !defined(LZ4_FORCE_SW_BITCOUNT)
 	return (__builtin_clzll(val) >> 3);
 #else
 	int r;
 	if (!(val >> 32)) {
 		r = 4;
 	} else {
 		r = 0;
 		val >>= 32;
 	}
 	if (!(val >> 16)) {
 		r += 2;
 		val >>= 8;
 	} else {
 		val >>= 24;
 	}
 	r += (!val);
 	return (r);
 #endif
 #else
 #if !defined(LZ4_FORCE_SW_BITCOUNT)
 	return (__builtin_ctzll(val) >> 3);
 #else
 	static const int DeBruijnBytePos[64] =
 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
 	};
 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
 	    58];
 #endif
 #endif
 }
 
 #else
 
 static inline int
 LZ4_NbCommonBytes(register U32 val)
 {
 #if defined(LZ4_BIG_ENDIAN)
 #if !defined(LZ4_FORCE_SW_BITCOUNT)
 	return (__builtin_clz(val) >> 3);
 #else
 	int r;
 	if (!(val >> 16)) {
 		r = 2;
 		val >>= 8;
 	} else {
 		r = 0;
 		val >>= 24;
 	}
 	r += (!val);
 	return (r);
 #endif
 #else
 #if !defined(LZ4_FORCE_SW_BITCOUNT)
 	return (__builtin_ctz(val) >> 3);
 #else
 	static const int DeBruijnBytePos[32] = {
 		0, 0, 3, 0, 3, 1, 3, 0,
 		3, 2, 2, 1, 3, 2, 0, 1,
 		3, 3, 1, 2, 2, 2, 2, 0,
 		3, 1, 2, 0, 1, 0, 1, 1
 	};
 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
 	    27];
 #endif
 #endif
 }
 
 #endif
 
 /* Public functions */
 
 static int
 LZ4_compressBound(int isize)
 {
 	return (isize + (isize / 255) + 16);
 }
 
 /* Compression functions */
 
 /*ARGSUSED*/
 static int
 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
     int osize)
 {
 #if HEAPMODE
 	struct refTables *srt = (struct refTables *)ctx;
 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
 #else
 	HTYPE HashTable[HASHTABLESIZE] = { 0 };
 #endif
 
 	const BYTE *ip = (BYTE *) source;
 	INITBASE(base);
 	const BYTE *anchor = ip;
 	const BYTE *const iend = ip + isize;
 	const BYTE *const oend = (BYTE *) dest + osize;
 	const BYTE *const mflimit = iend - MFLIMIT;
 #define	matchlimit (iend - LASTLITERALS)
 
 	BYTE *op = (BYTE *) dest;
 
 	int len, length;
 	const int skipStrength = SKIPSTRENGTH;
 	U32 forwardH;
 
 
 	/* Init */
 	if (isize < MINLENGTH)
 		goto _last_literals;
 
 	/* First Byte */
 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
 	ip++;
 	forwardH = LZ4_HASH_VALUE(ip);
 
 	/* Main Loop */
 	for (;;) {
 		int findMatchAttempts = (1U << skipStrength) + 3;
 		const BYTE *forwardIp = ip;
 		const BYTE *ref;
 		BYTE *token;
 
 		/* Find a match */
 		do {
 			U32 h = forwardH;
 			int step = findMatchAttempts++ >> skipStrength;
 			ip = forwardIp;
 			forwardIp = ip + step;
 
 			if unlikely(forwardIp > mflimit) {
 				goto _last_literals;
 			}
 
 			forwardH = LZ4_HASH_VALUE(forwardIp);
 			ref = base + HashTable[h];
 			HashTable[h] = ip - base;
 
 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
 
 		/* Catch up */
 		while ((ip > anchor) && (ref > (BYTE *) source) &&
 		    unlikely(ip[-1] == ref[-1])) {
 			ip--;
 			ref--;
 		}
 
 		/* Encode Literal length */
 		length = ip - anchor;
 		token = op++;
 
 		/* Check output limit */
 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
 		    (length >> 8) > oend)
 			return (0);
 
 		if (length >= (int)RUN_MASK) {
 			*token = (RUN_MASK << ML_BITS);
 			len = length - RUN_MASK;
 			for (; len > 254; len -= 255)
 				*op++ = 255;
 			*op++ = (BYTE)len;
 		} else
 			*token = (length << ML_BITS);
 
 		/* Copy Literals */
 		LZ4_BLINDCOPY(anchor, op, length);
 
 		_next_match:
 		/* Encode Offset */
 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
 
 		/* Start Counting */
 		ip += MINMATCH;
 		ref += MINMATCH;	/* MinMatch verified */
 		anchor = ip;
 		while likely(ip < matchlimit - (STEPSIZE - 1)) {
 			UARCH diff = AARCH(ref) ^ AARCH(ip);
 			if (!diff) {
 				ip += STEPSIZE;
 				ref += STEPSIZE;
 				continue;
 			}
 			ip += LZ4_NbCommonBytes(diff);
 			goto _endCount;
 		}
 #if LZ4_ARCH64
 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
 			ip += 4;
 			ref += 4;
 		}
 #endif
 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
 			ip += 2;
 			ref += 2;
 		}
 		if ((ip < matchlimit) && (*ref == *ip))
 			ip++;
 		_endCount:
 
 		/* Encode MatchLength */
 		len = (ip - anchor);
 		/* Check output limit */
 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
 			return (0);
 		if (len >= (int)ML_MASK) {
 			*token += ML_MASK;
 			len -= ML_MASK;
 			for (; len > 509; len -= 510) {
 				*op++ = 255;
 				*op++ = 255;
 			}
 			if (len > 254) {
 				len -= 255;
 				*op++ = 255;
 			}
 			*op++ = (BYTE)len;
 		} else
 			*token += len;
 
 		/* Test end of chunk */
 		if (ip > mflimit) {
 			anchor = ip;
 			break;
 		}
 		/* Fill table */
 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
 
 		/* Test next position */
 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
 			token = op++;
 			*token = 0;
 			goto _next_match;
 		}
 		/* Prepare next loop */
 		anchor = ip++;
 		forwardH = LZ4_HASH_VALUE(ip);
 	}
 
 	_last_literals:
 	/* Encode Last Literals */
 	{
 		int lastRun = iend - anchor;
 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
 		    oend)
 			return (0);
 		if (lastRun >= (int)RUN_MASK) {
 			*op++ = (RUN_MASK << ML_BITS);
 			lastRun -= RUN_MASK;
 			for (; lastRun > 254; lastRun -= 255) {
 				*op++ = 255;
 			}
 			*op++ = (BYTE)lastRun;
 		} else
 			*op++ = (lastRun << ML_BITS);
 		(void) memcpy(op, anchor, iend - anchor);
 		op += iend - anchor;
 	}
 
 	/* End */
 	return (int)(((char *)op) - dest);
 }
 
 
 
 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
 #define	HASHLOG64K (HASH_LOG + 1)
 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
 	HASHLOG64K))
 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
 
 /*ARGSUSED*/
 static int
 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
     int osize)
 {
 #if HEAPMODE
 	struct refTables *srt = (struct refTables *)ctx;
 	U16 *HashTable = (U16 *) (srt->hashTable);
 #else
 	U16 HashTable[HASH64KTABLESIZE] = { 0 };
 #endif
 
 	const BYTE *ip = (BYTE *) source;
 	const BYTE *anchor = ip;
 	const BYTE *const base = ip;
 	const BYTE *const iend = ip + isize;
 	const BYTE *const oend = (BYTE *) dest + osize;
 	const BYTE *const mflimit = iend - MFLIMIT;
 #define	matchlimit (iend - LASTLITERALS)
 
 	BYTE *op = (BYTE *) dest;
 
 	int len, length;
 	const int skipStrength = SKIPSTRENGTH;
 	U32 forwardH;
 
 	/* Init */
 	if (isize < MINLENGTH)
 		goto _last_literals;
 
 	/* First Byte */
 	ip++;
 	forwardH = LZ4_HASH64K_VALUE(ip);
 
 	/* Main Loop */
 	for (;;) {
 		int findMatchAttempts = (1U << skipStrength) + 3;
 		const BYTE *forwardIp = ip;
 		const BYTE *ref;
 		BYTE *token;
 
 		/* Find a match */
 		do {
 			U32 h = forwardH;
 			int step = findMatchAttempts++ >> skipStrength;
 			ip = forwardIp;
 			forwardIp = ip + step;
 
 			if (forwardIp > mflimit) {
 				goto _last_literals;
 			}
 
 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
 			ref = base + HashTable[h];
 			HashTable[h] = ip - base;
 
 		} while (A32(ref) != A32(ip));
 
 		/* Catch up */
 		while ((ip > anchor) && (ref > (BYTE *) source) &&
 		    (ip[-1] == ref[-1])) {
 			ip--;
 			ref--;
 		}
 
 		/* Encode Literal length */
 		length = ip - anchor;
 		token = op++;
 
 		/* Check output limit */
 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
 		    (length >> 8) > oend)
 			return (0);
 
 		if (length >= (int)RUN_MASK) {
 			*token = (RUN_MASK << ML_BITS);
 			len = length - RUN_MASK;
 			for (; len > 254; len -= 255)
 				*op++ = 255;
 			*op++ = (BYTE)len;
 		} else
 			*token = (length << ML_BITS);
 
 		/* Copy Literals */
 		LZ4_BLINDCOPY(anchor, op, length);
 
 		_next_match:
 		/* Encode Offset */
 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
 
 		/* Start Counting */
 		ip += MINMATCH;
 		ref += MINMATCH;	/* MinMatch verified */
 		anchor = ip;
 		while (ip < matchlimit - (STEPSIZE - 1)) {
 			UARCH diff = AARCH(ref) ^ AARCH(ip);
 			if (!diff) {
 				ip += STEPSIZE;
 				ref += STEPSIZE;
 				continue;
 			}
 			ip += LZ4_NbCommonBytes(diff);
 			goto _endCount;
 		}
 #if LZ4_ARCH64
 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
 			ip += 4;
 			ref += 4;
 		}
 #endif
 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
 			ip += 2;
 			ref += 2;
 		}
 		if ((ip < matchlimit) && (*ref == *ip))
 			ip++;
 		_endCount:
 
 		/* Encode MatchLength */
 		len = (ip - anchor);
 		/* Check output limit */
 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
 			return (0);
 		if (len >= (int)ML_MASK) {
 			*token += ML_MASK;
 			len -= ML_MASK;
 			for (; len > 509; len -= 510) {
 				*op++ = 255;
 				*op++ = 255;
 			}
 			if (len > 254) {
 				len -= 255;
 				*op++ = 255;
 			}
 			*op++ = (BYTE)len;
 		} else
 			*token += len;
 
 		/* Test end of chunk */
 		if (ip > mflimit) {
 			anchor = ip;
 			break;
 		}
 		/* Fill table */
 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
 
 		/* Test next position */
 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
 		if (A32(ref) == A32(ip)) {
 			token = op++;
 			*token = 0;
 			goto _next_match;
 		}
 		/* Prepare next loop */
 		anchor = ip++;
 		forwardH = LZ4_HASH64K_VALUE(ip);
 	}
 
 	_last_literals:
 	/* Encode Last Literals */
 	{
 		int lastRun = iend - anchor;
 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
 		    oend)
 			return (0);
 		if (lastRun >= (int)RUN_MASK) {
 			*op++ = (RUN_MASK << ML_BITS);
 			lastRun -= RUN_MASK;
 			for (; lastRun > 254; lastRun -= 255)
 				*op++ = 255;
 			*op++ = (BYTE)lastRun;
 		} else
 			*op++ = (lastRun << ML_BITS);
 		(void) memcpy(op, anchor, iend - anchor);
 		op += iend - anchor;
 	}
 
 	/* End */
 	return (int)(((char *)op) - dest);
 }
 
 static int
 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
 {
 #if HEAPMODE
-	void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
+	void *ctx = kmem_cache_alloc(lz4_ctx_cache, KM_NOSLEEP);
 	int result;
 
 	/*
 	 * out of kernel memory, gently fall through - this will disable
 	 * compression in zio_compress_data
 	 */
 	if (ctx == NULL)
 		return (0);
 
+	bzero(ctx, sizeof(struct refTables));
 	if (isize < LZ4_64KLIMIT)
 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
 	else
 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
 
-	kmem_free(ctx, sizeof (struct refTables));
+	kmem_cache_free(lz4_ctx_cache, ctx);
 	return (result);
 #else
 	if (isize < (int)LZ4_64KLIMIT)
 		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
 	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
 #endif
 }
 
 /* Decompression functions */
 
 /*
  * Note: The decoding functionLZ4_uncompress_unknownOutputSize() is safe
  *	against "buffer overflow" attack type. They will never write nor
  *	read outside of the provided output buffers.
  *	LZ4_uncompress_unknownOutputSize() also insures that it will never
  *	read outside of the input buffer.  A corrupted input will produce
  *	an error result, a negative int, indicating the position of the
  *	error within input stream.
  */
 
 static int
 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
     int maxOutputSize)
 {
 	/* Local Variables */
 	const BYTE *restrict ip = (const BYTE *) source;
 	const BYTE *const iend = ip + isize;
 	const BYTE *ref;
 
 	BYTE *op = (BYTE *) dest;
 	BYTE *const oend = op + maxOutputSize;
 	BYTE *cpy;
 
 	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
 #if LZ4_ARCH64
 	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
 #endif
 
 	/* Main Loop */
 	while (ip < iend) {
 		unsigned token;
 		size_t length;
 
 		/* get runlength */
 		token = *ip++;
 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
 			int s = 255;
 			while ((ip < iend) && (s == 255)) {
 				s = *ip++;
 				length += s;
 			}
 		}
 		/* copy literals */
 		cpy = op + length;
 		if ((cpy > oend - COPYLENGTH) ||
 		    (ip + length > iend - COPYLENGTH)) {
 			if (cpy > oend)
 				/* Error: writes beyond output buffer */
 				goto _output_error;
 			if (ip + length != iend)
 				/*
 				 * Error: LZ4 format requires to consume all
 				 * input at this stage
 				 */
 				goto _output_error;
 			(void) memcpy(op, ip, length);
 			op += length;
 			/* Necessarily EOF, due to parsing restrictions */
 			break;
 		}
 		LZ4_WILDCOPY(ip, op, cpy);
 		ip -= (op - cpy);
 		op = cpy;
 
 		/* get offset */
 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
 		ip += 2;
 		if (ref < (BYTE * const) dest)
 			/*
 			 * Error: offset creates reference outside of
 			 * destination buffer
 			 */
 			goto _output_error;
 
 		/* get matchlength */
 		if ((length = (token & ML_MASK)) == ML_MASK) {
 			while (ip < iend) {
 				int s = *ip++;
 				length += s;
 				if (s == 255)
 					continue;
 				break;
 			}
 		}
 		/* copy repeated sequence */
 		if unlikely(op - ref < STEPSIZE) {
 #if LZ4_ARCH64
 			size_t dec64 = dec64table[op-ref];
 #else
 			const int dec64 = 0;
 #endif
 			op[0] = ref[0];
 			op[1] = ref[1];
 			op[2] = ref[2];
 			op[3] = ref[3];
 			op += 4;
 			ref += 4;
 			ref -= dec32table[op-ref];
 			A32(op) = A32(ref);
 			op += STEPSIZE - 4;
 			ref -= dec64;
 		} else {
 			LZ4_COPYSTEP(ref, op);
 		}
 		cpy = op + length - (STEPSIZE - 4);
 		if (cpy > oend - COPYLENGTH) {
 			if (cpy > oend)
 				/*
 				 * Error: request to write outside of
 				 * destination buffer
 				 */
 				goto _output_error;
 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
 			while (op < cpy)
 				*op++ = *ref++;
 			op = cpy;
 			if (op == oend)
 				/*
 				 * Check EOF (should never happen, since
 				 * last 5 bytes are supposed to be literals)
 				 */
 				goto _output_error;
 			continue;
 		}
 		LZ4_SECURECOPY(ref, op, cpy);
 		op = cpy;	/* correction */
 	}
 
 	/* end of decoding */
 	return (int)(((char *)op) - dest);
 
 	/* write overflow error detected */
 	_output_error:
 	return (int)(-(((char *)ip) - source));
+}
+
+extern void
+lz4_init(void)
+{
+
+#if HEAPMODE
+	lz4_ctx_cache = kmem_cache_create("lz4_ctx", sizeof(struct refTables),
+	    0, NULL, NULL, NULL, NULL, NULL, 0);
+#endif
+}
+
+extern void
+lz4_fini(void)
+{
+
+#if HEAPMODE
+	kmem_cache_destroy(lz4_ctx_cache);
+#endif
 }
Index: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
===================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c	(revision 262163)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c	(revision 262164)
@@ -1,1920 +1,1922 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/spa_impl.h>
 #include <sys/spa_boot.h>
 #include <sys/zio.h>
 #include <sys/zio_checksum.h>
 #include <sys/zio_compress.h>
 #include <sys/dmu.h>
 #include <sys/dmu_tx.h>
 #include <sys/zap.h>
 #include <sys/zil.h>
 #include <sys/vdev_impl.h>
 #include <sys/metaslab.h>
 #include <sys/uberblock_impl.h>
 #include <sys/txg.h>
 #include <sys/avl.h>
 #include <sys/unique.h>
 #include <sys/dsl_pool.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_scan.h>
 #include <sys/fs/zfs.h>
 #include <sys/metaslab_impl.h>
 #include <sys/arc.h>
 #include <sys/ddt.h>
 #include "zfs_prop.h"
 #include "zfeature_common.h"
 
 /*
  * SPA locking
  *
  * There are four basic locks for managing spa_t structures:
  *
  * spa_namespace_lock (global mutex)
  *
  *	This lock must be acquired to do any of the following:
  *
  *		- Lookup a spa_t by name
  *		- Add or remove a spa_t from the namespace
  *		- Increase spa_refcount from non-zero
  *		- Check if spa_refcount is zero
  *		- Rename a spa_t
  *		- add/remove/attach/detach devices
  *		- Held for the duration of create/destroy/import/export
  *
  *	It does not need to handle recursion.  A create or destroy may
  *	reference objects (files or zvols) in other pools, but by
  *	definition they must have an existing reference, and will never need
  *	to lookup a spa_t by name.
  *
  * spa_refcount (per-spa refcount_t protected by mutex)
  *
  *	This reference count keep track of any active users of the spa_t.  The
  *	spa_t cannot be destroyed or freed while this is non-zero.  Internally,
  *	the refcount is never really 'zero' - opening a pool implicitly keeps
  *	some references in the DMU.  Internally we check against spa_minref, but
  *	present the image of a zero/non-zero value to consumers.
  *
  * spa_config_lock[] (per-spa array of rwlocks)
  *
  *	This protects the spa_t from config changes, and must be held in
  *	the following circumstances:
  *
  *		- RW_READER to perform I/O to the spa
  *		- RW_WRITER to change the vdev config
  *
  * The locking order is fairly straightforward:
  *
  *		spa_namespace_lock	->	spa_refcount
  *
  *	The namespace lock must be acquired to increase the refcount from 0
  *	or to check if it is zero.
  *
  *		spa_refcount		->	spa_config_lock[]
  *
  *	There must be at least one valid reference on the spa_t to acquire
  *	the config lock.
  *
  *		spa_namespace_lock	->	spa_config_lock[]
  *
  *	The namespace lock must always be taken before the config lock.
  *
  *
  * The spa_namespace_lock can be acquired directly and is globally visible.
  *
  * The namespace is manipulated using the following functions, all of which
  * require the spa_namespace_lock to be held.
  *
  *	spa_lookup()		Lookup a spa_t by name.
  *
  *	spa_add()		Create a new spa_t in the namespace.
  *
  *	spa_remove()		Remove a spa_t from the namespace.  This also
  *				frees up any memory associated with the spa_t.
  *
  *	spa_next()		Returns the next spa_t in the system, or the
  *				first if NULL is passed.
  *
  *	spa_evict_all()		Shutdown and remove all spa_t structures in
  *				the system.
  *
  *	spa_guid_exists()	Determine whether a pool/device guid exists.
  *
  * The spa_refcount is manipulated using the following functions:
  *
  *	spa_open_ref()		Adds a reference to the given spa_t.  Must be
  *				called with spa_namespace_lock held if the
  *				refcount is currently zero.
  *
  *	spa_close()		Remove a reference from the spa_t.  This will
  *				not free the spa_t or remove it from the
  *				namespace.  No locking is required.
  *
  *	spa_refcount_zero()	Returns true if the refcount is currently
  *				zero.  Must be called with spa_namespace_lock
  *				held.
  *
  * The spa_config_lock[] is an array of rwlocks, ordered as follows:
  * SCL_CONFIG > SCL_STATE > SCL_ALLOC > SCL_ZIO > SCL_FREE > SCL_VDEV.
  * spa_config_lock[] is manipulated with spa_config_{enter,exit,held}().
  *
  * To read the configuration, it suffices to hold one of these locks as reader.
  * To modify the configuration, you must hold all locks as writer.  To modify
  * vdev state without altering the vdev tree's topology (e.g. online/offline),
  * you must hold SCL_STATE and SCL_ZIO as writer.
  *
  * We use these distinct config locks to avoid recursive lock entry.
  * For example, spa_sync() (which holds SCL_CONFIG as reader) induces
  * block allocations (SCL_ALLOC), which may require reading space maps
  * from disk (dmu_read() -> zio_read() -> SCL_ZIO).
  *
  * The spa config locks cannot be normal rwlocks because we need the
  * ability to hand off ownership.  For example, SCL_ZIO is acquired
  * by the issuing thread and later released by an interrupt thread.
  * They do, however, obey the usual write-wanted semantics to prevent
  * writer (i.e. system administrator) starvation.
  *
  * The lock acquisition rules are as follows:
  *
  * SCL_CONFIG
  *	Protects changes to the vdev tree topology, such as vdev
  *	add/remove/attach/detach.  Protects the dirty config list
  *	(spa_config_dirty_list) and the set of spares and l2arc devices.
  *
  * SCL_STATE
  *	Protects changes to pool state and vdev state, such as vdev
  *	online/offline/fault/degrade/clear.  Protects the dirty state list
  *	(spa_state_dirty_list) and global pool state (spa_state).
  *
  * SCL_ALLOC
  *	Protects changes to metaslab groups and classes.
  *	Held as reader by metaslab_alloc() and metaslab_claim().
  *
  * SCL_ZIO
  *	Held by bp-level zios (those which have no io_vd upon entry)
  *	to prevent changes to the vdev tree.  The bp-level zio implicitly
  *	protects all of its vdev child zios, which do not hold SCL_ZIO.
  *
  * SCL_FREE
  *	Protects changes to metaslab groups and classes.
  *	Held as reader by metaslab_free().  SCL_FREE is distinct from
  *	SCL_ALLOC, and lower than SCL_ZIO, so that we can safely free
  *	blocks in zio_done() while another i/o that holds either
  *	SCL_ALLOC or SCL_ZIO is waiting for this i/o to complete.
  *
  * SCL_VDEV
  *	Held as reader to prevent changes to the vdev tree during trivial
  *	inquiries such as bp_get_dsize().  SCL_VDEV is distinct from the
  *	other locks, and lower than all of them, to ensure that it's safe
  *	to acquire regardless of caller context.
  *
  * In addition, the following rules apply:
  *
  * (a)	spa_props_lock protects pool properties, spa_config and spa_config_list.
  *	The lock ordering is SCL_CONFIG > spa_props_lock.
  *
  * (b)	I/O operations on leaf vdevs.  For any zio operation that takes
  *	an explicit vdev_t argument -- such as zio_ioctl(), zio_read_phys(),
  *	or zio_write_phys() -- the caller must ensure that the config cannot
  *	cannot change in the interim, and that the vdev cannot be reopened.
  *	SCL_STATE as reader suffices for both.
  *
  * The vdev configuration is protected by spa_vdev_enter() / spa_vdev_exit().
  *
  *	spa_vdev_enter()	Acquire the namespace lock and the config lock
  *				for writing.
  *
  *	spa_vdev_exit()		Release the config lock, wait for all I/O
  *				to complete, sync the updated configs to the
  *				cache, and release the namespace lock.
  *
  * vdev state is protected by spa_vdev_state_enter() / spa_vdev_state_exit().
  * Like spa_vdev_enter/exit, these are convenience wrappers -- the actual
  * locking is, always, based on spa_namespace_lock and spa_config_lock[].
  *
  * spa_rename() is also implemented within this file since it requires
  * manipulation of the namespace.
  */
 
 static avl_tree_t spa_namespace_avl;
 kmutex_t spa_namespace_lock;
 static kcondvar_t spa_namespace_cv;
 static int spa_active_count;
 int spa_max_replication_override = SPA_DVAS_PER_BP;
 
 static kmutex_t spa_spare_lock;
 static avl_tree_t spa_spare_avl;
 static kmutex_t spa_l2cache_lock;
 static avl_tree_t spa_l2cache_avl;
 
 kmem_cache_t *spa_buffer_pool;
 int spa_mode_global;
 
 #ifdef ZFS_DEBUG
 /* Everything except dprintf and spa is on by default in debug builds */
 int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SPA);
 #else
 int zfs_flags = 0;
 #endif
 SYSCTL_DECL(_debug);
 TUNABLE_INT("debug.zfs_flags", &zfs_flags);
 SYSCTL_INT(_debug, OID_AUTO, zfs_flags, CTLFLAG_RWTUN, &zfs_flags, 0,
     "ZFS debug flags.");
 
 /*
  * zfs_recover can be set to nonzero to attempt to recover from
  * otherwise-fatal errors, typically caused by on-disk corruption.  When
  * set, calls to zfs_panic_recover() will turn into warning messages.
  * This should only be used as a last resort, as it typically results
  * in leaked space, or worse.
  */
 int zfs_recover = 0;
 SYSCTL_DECL(_vfs_zfs);
 TUNABLE_INT("vfs.zfs.recover", &zfs_recover);
 SYSCTL_INT(_vfs_zfs, OID_AUTO, recover, CTLFLAG_RDTUN, &zfs_recover, 0,
     "Try to recover from otherwise-fatal errors.");
 
 /*
  * Expiration time in milliseconds. This value has two meanings. First it is
  * used to determine when the spa_deadman() logic should fire. By default the
  * spa_deadman() will fire if spa_sync() has not completed in 1000 seconds.
  * Secondly, the value determines if an I/O is considered "hung". Any I/O that
  * has not completed in zfs_deadman_synctime_ms is considered "hung" resulting
  * in a system panic.
  */
 uint64_t zfs_deadman_synctime_ms = 1000000ULL;
 TUNABLE_QUAD("vfs.zfs.deadman_synctime_ms", &zfs_deadman_synctime_ms);
 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, deadman_synctime_ms, CTLFLAG_RDTUN,
     &zfs_deadman_synctime_ms, 0,
     "Stalled ZFS I/O expiration time in milliseconds");
 
 /*
  * Check time in milliseconds. This defines the frequency at which we check
  * for hung I/O.
  */
 uint64_t zfs_deadman_checktime_ms = 5000ULL;
 TUNABLE_QUAD("vfs.zfs.deadman_checktime_ms", &zfs_deadman_checktime_ms);
 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, deadman_checktime_ms, CTLFLAG_RDTUN,
     &zfs_deadman_checktime_ms, 0,
     "Period of checks for stalled ZFS I/O in milliseconds");
 
 /*
  * Default value of -1 for zfs_deadman_enabled is resolved in
  * zfs_deadman_init()
  */
 int zfs_deadman_enabled = -1;
 TUNABLE_INT("vfs.zfs.deadman_enabled", &zfs_deadman_enabled);
 SYSCTL_INT(_vfs_zfs, OID_AUTO, deadman_enabled, CTLFLAG_RDTUN,
     &zfs_deadman_enabled, 0, "Kernel panic on stalled ZFS I/O");
 
 /*
  * The worst case is single-sector max-parity RAID-Z blocks, in which
  * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1)
  * times the size; so just assume that.  Add to this the fact that
  * we can have up to 3 DVAs per bp, and one more factor of 2 because
  * the block may be dittoed with up to 3 DVAs by ddt_sync().  All together,
  * the worst case is:
  *     (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2 == 24
  */
 int spa_asize_inflation = 24;
 
 #ifndef illumos
 #ifdef _KERNEL
 static void
 zfs_deadman_init()
 {
 	/*
 	 * If we are not i386 or amd64 or in a virtual machine,
 	 * disable ZFS deadman thread by default
 	 */
 	if (zfs_deadman_enabled == -1) {
 #if defined(__amd64__) || defined(__i386__)
 		zfs_deadman_enabled = (vm_guest == VM_GUEST_NO) ? 1 : 0;
 #else
 		zfs_deadman_enabled = 0;
 #endif
 	}
 }
 #endif	/* _KERNEL */
 #endif	/* !illumos */
 
 /*
  * ==========================================================================
  * SPA config locking
  * ==========================================================================
  */
 static void
 spa_config_lock_init(spa_t *spa)
 {
 	for (int i = 0; i < SCL_LOCKS; i++) {
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL);
 		cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL);
 		refcount_create_untracked(&scl->scl_count);
 		scl->scl_writer = NULL;
 		scl->scl_write_wanted = 0;
 	}
 }
 
 static void
 spa_config_lock_destroy(spa_t *spa)
 {
 	for (int i = 0; i < SCL_LOCKS; i++) {
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		mutex_destroy(&scl->scl_lock);
 		cv_destroy(&scl->scl_cv);
 		refcount_destroy(&scl->scl_count);
 		ASSERT(scl->scl_writer == NULL);
 		ASSERT(scl->scl_write_wanted == 0);
 	}
 }
 
 int
 spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
 {
 	for (int i = 0; i < SCL_LOCKS; i++) {
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		if (!(locks & (1 << i)))
 			continue;
 		mutex_enter(&scl->scl_lock);
 		if (rw == RW_READER) {
 			if (scl->scl_writer || scl->scl_write_wanted) {
 				mutex_exit(&scl->scl_lock);
 				spa_config_exit(spa, locks ^ (1 << i), tag);
 				return (0);
 			}
 		} else {
 			ASSERT(scl->scl_writer != curthread);
 			if (!refcount_is_zero(&scl->scl_count)) {
 				mutex_exit(&scl->scl_lock);
 				spa_config_exit(spa, locks ^ (1 << i), tag);
 				return (0);
 			}
 			scl->scl_writer = curthread;
 		}
 		(void) refcount_add(&scl->scl_count, tag);
 		mutex_exit(&scl->scl_lock);
 	}
 	return (1);
 }
 
 void
 spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw)
 {
 	int wlocks_held = 0;
 
 	ASSERT3U(SCL_LOCKS, <, sizeof (wlocks_held) * NBBY);
 
 	for (int i = 0; i < SCL_LOCKS; i++) {
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		if (scl->scl_writer == curthread)
 			wlocks_held |= (1 << i);
 		if (!(locks & (1 << i)))
 			continue;
 		mutex_enter(&scl->scl_lock);
 		if (rw == RW_READER) {
 			while (scl->scl_writer || scl->scl_write_wanted) {
 				cv_wait(&scl->scl_cv, &scl->scl_lock);
 			}
 		} else {
 			ASSERT(scl->scl_writer != curthread);
 			while (!refcount_is_zero(&scl->scl_count)) {
 				scl->scl_write_wanted++;
 				cv_wait(&scl->scl_cv, &scl->scl_lock);
 				scl->scl_write_wanted--;
 			}
 			scl->scl_writer = curthread;
 		}
 		(void) refcount_add(&scl->scl_count, tag);
 		mutex_exit(&scl->scl_lock);
 	}
 	ASSERT(wlocks_held <= locks);
 }
 
 void
 spa_config_exit(spa_t *spa, int locks, void *tag)
 {
 	for (int i = SCL_LOCKS - 1; i >= 0; i--) {
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		if (!(locks & (1 << i)))
 			continue;
 		mutex_enter(&scl->scl_lock);
 		ASSERT(!refcount_is_zero(&scl->scl_count));
 		if (refcount_remove(&scl->scl_count, tag) == 0) {
 			ASSERT(scl->scl_writer == NULL ||
 			    scl->scl_writer == curthread);
 			scl->scl_writer = NULL;	/* OK in either case */
 			cv_broadcast(&scl->scl_cv);
 		}
 		mutex_exit(&scl->scl_lock);
 	}
 }
 
 int
 spa_config_held(spa_t *spa, int locks, krw_t rw)
 {
 	int locks_held = 0;
 
 	for (int i = 0; i < SCL_LOCKS; i++) {
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		if (!(locks & (1 << i)))
 			continue;
 		if ((rw == RW_READER && !refcount_is_zero(&scl->scl_count)) ||
 		    (rw == RW_WRITER && scl->scl_writer == curthread))
 			locks_held |= 1 << i;
 	}
 
 	return (locks_held);
 }
 
 /*
  * ==========================================================================
  * SPA namespace functions
  * ==========================================================================
  */
 
 /*
  * Lookup the named spa_t in the AVL tree.  The spa_namespace_lock must be held.
  * Returns NULL if no matching spa_t is found.
  */
 spa_t *
 spa_lookup(const char *name)
 {
 	static spa_t search;	/* spa_t is large; don't allocate on stack */
 	spa_t *spa;
 	avl_index_t where;
 	char *cp;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	(void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
 
 	/*
 	 * If it's a full dataset name, figure out the pool name and
 	 * just use that.
 	 */
 	cp = strpbrk(search.spa_name, "/@");
 	if (cp != NULL)
 		*cp = '\0';
 
 	spa = avl_find(&spa_namespace_avl, &search, &where);
 
 	return (spa);
 }
 
 /*
  * Fires when spa_sync has not completed within zfs_deadman_synctime_ms.
  * If the zfs_deadman_enabled flag is set then it inspects all vdev queues
  * looking for potentially hung I/Os.
  */
 void
 spa_deadman(void *arg)
 {
 	spa_t *spa = arg;
 
 	/*
 	 * Disable the deadman timer if the pool is suspended.
 	 */
 	if (spa_suspended(spa)) {
 #ifdef illumos
 		VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, CY_INFINITY));
 #else
 		/* Nothing.  just don't schedule any future callouts. */
 #endif
 		return;
 	}
 
 	zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu",
 	    (gethrtime() - spa->spa_sync_starttime) / NANOSEC,
 	    ++spa->spa_deadman_calls);
 	if (zfs_deadman_enabled)
 		vdev_deadman(spa->spa_root_vdev);
 }
 
 /*
  * Create an uninitialized spa_t with the given name.  Requires
  * spa_namespace_lock.  The caller must ensure that the spa_t doesn't already
  * exist by calling spa_lookup() first.
  */
 spa_t *
 spa_add(const char *name, nvlist_t *config, const char *altroot)
 {
 	spa_t *spa;
 	spa_config_dirent_t *dp;
 #ifdef illumos
 	cyc_handler_t hdlr;
 	cyc_time_t when;
 #endif
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP);
 
 	mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
 
 	for (int t = 0; t < TXG_SIZE; t++)
 		bplist_create(&spa->spa_free_bplist[t]);
 
 	(void) strlcpy(spa->spa_name, name, sizeof (spa->spa_name));
 	spa->spa_state = POOL_STATE_UNINITIALIZED;
 	spa->spa_freeze_txg = UINT64_MAX;
 	spa->spa_final_txg = UINT64_MAX;
 	spa->spa_load_max_txg = UINT64_MAX;
 	spa->spa_proc = &p0;
 	spa->spa_proc_state = SPA_PROC_NONE;
 
 #ifdef illumos
 	hdlr.cyh_func = spa_deadman;
 	hdlr.cyh_arg = spa;
 	hdlr.cyh_level = CY_LOW_LEVEL;
 #endif
 
 	spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms);
 
 #ifdef illumos
 	/*
 	 * This determines how often we need to check for hung I/Os after
 	 * the cyclic has already fired. Since checking for hung I/Os is
 	 * an expensive operation we don't want to check too frequently.
 	 * Instead wait for 5 seconds before checking again.
 	 */
 	when.cyt_interval = MSEC2NSEC(zfs_deadman_checktime_ms);
 	when.cyt_when = CY_INFINITY;
 	mutex_enter(&cpu_lock);
 	spa->spa_deadman_cycid = cyclic_add(&hdlr, &when);
 	mutex_exit(&cpu_lock);
 #else	/* !illumos */
 #ifdef _KERNEL
 	callout_init(&spa->spa_deadman_cycid, CALLOUT_MPSAFE);
 #endif
 #endif
 	refcount_create(&spa->spa_refcount);
 	spa_config_lock_init(spa);
 
 	avl_add(&spa_namespace_avl, spa);
 
 	/*
 	 * Set the alternate root, if there is one.
 	 */
 	if (altroot) {
 		spa->spa_root = spa_strdup(altroot);
 		spa_active_count++;
 	}
 
 	/*
 	 * Every pool starts with the default cachefile
 	 */
 	list_create(&spa->spa_config_list, sizeof (spa_config_dirent_t),
 	    offsetof(spa_config_dirent_t, scd_link));
 
 	dp = kmem_zalloc(sizeof (spa_config_dirent_t), KM_SLEEP);
 	dp->scd_path = altroot ? NULL : spa_strdup(spa_config_path);
 	list_insert_head(&spa->spa_config_list, dp);
 
 	VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME,
 	    KM_SLEEP) == 0);
 
 	if (config != NULL) {
 		nvlist_t *features;
 
 		if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
 		    &features) == 0) {
 			VERIFY(nvlist_dup(features, &spa->spa_label_features,
 			    0) == 0);
 		}
 
 		VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
 	}
 
 	if (spa->spa_label_features == NULL) {
 		VERIFY(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME,
 		    KM_SLEEP) == 0);
 	}
 
 	spa->spa_debug = ((zfs_flags & ZFS_DEBUG_SPA) != 0);
 
 	return (spa);
 }
 
 /*
  * Removes a spa_t from the namespace, freeing up any memory used.  Requires
  * spa_namespace_lock.  This is called only after the spa_t has been closed and
  * deactivated.
  */
 void
 spa_remove(spa_t *spa)
 {
 	spa_config_dirent_t *dp;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
 
 	nvlist_free(spa->spa_config_splitting);
 
 	avl_remove(&spa_namespace_avl, spa);
 	cv_broadcast(&spa_namespace_cv);
 
 	if (spa->spa_root) {
 		spa_strfree(spa->spa_root);
 		spa_active_count--;
 	}
 
 	while ((dp = list_head(&spa->spa_config_list)) != NULL) {
 		list_remove(&spa->spa_config_list, dp);
 		if (dp->scd_path != NULL)
 			spa_strfree(dp->scd_path);
 		kmem_free(dp, sizeof (spa_config_dirent_t));
 	}
 
 	list_destroy(&spa->spa_config_list);
 
 	nvlist_free(spa->spa_label_features);
 	nvlist_free(spa->spa_load_info);
 	spa_config_set(spa, NULL);
 
 #ifdef illumos
 	mutex_enter(&cpu_lock);
 	if (spa->spa_deadman_cycid != CYCLIC_NONE)
 		cyclic_remove(spa->spa_deadman_cycid);
 	mutex_exit(&cpu_lock);
 	spa->spa_deadman_cycid = CYCLIC_NONE;
 #else	/* !illumos */
 #ifdef _KERNEL
 	callout_drain(&spa->spa_deadman_cycid);
 #endif
 #endif
 
 	refcount_destroy(&spa->spa_refcount);
 
 	spa_config_lock_destroy(spa);
 
 	for (int t = 0; t < TXG_SIZE; t++)
 		bplist_destroy(&spa->spa_free_bplist[t]);
 
 	cv_destroy(&spa->spa_async_cv);
 	cv_destroy(&spa->spa_proc_cv);
 	cv_destroy(&spa->spa_scrub_io_cv);
 	cv_destroy(&spa->spa_suspend_cv);
 
 	mutex_destroy(&spa->spa_async_lock);
 	mutex_destroy(&spa->spa_errlist_lock);
 	mutex_destroy(&spa->spa_errlog_lock);
 	mutex_destroy(&spa->spa_history_lock);
 	mutex_destroy(&spa->spa_proc_lock);
 	mutex_destroy(&spa->spa_props_lock);
 	mutex_destroy(&spa->spa_scrub_lock);
 	mutex_destroy(&spa->spa_suspend_lock);
 	mutex_destroy(&spa->spa_vdev_top_lock);
 
 	kmem_free(spa, sizeof (spa_t));
 }
 
 /*
  * Given a pool, return the next pool in the namespace, or NULL if there is
  * none.  If 'prev' is NULL, return the first pool.
  */
 spa_t *
 spa_next(spa_t *prev)
 {
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	if (prev)
 		return (AVL_NEXT(&spa_namespace_avl, prev));
 	else
 		return (avl_first(&spa_namespace_avl));
 }
 
 /*
  * ==========================================================================
  * SPA refcount functions
  * ==========================================================================
  */
 
 /*
  * Add a reference to the given spa_t.  Must have at least one reference, or
  * have the namespace lock held.
  */
 void
 spa_open_ref(spa_t *spa, void *tag)
 {
 	ASSERT(refcount_count(&spa->spa_refcount) >= spa->spa_minref ||
 	    MUTEX_HELD(&spa_namespace_lock));
 	(void) refcount_add(&spa->spa_refcount, tag);
 }
 
 /*
  * Remove a reference to the given spa_t.  Must have at least one reference, or
  * have the namespace lock held.
  */
 void
 spa_close(spa_t *spa, void *tag)
 {
 	ASSERT(refcount_count(&spa->spa_refcount) > spa->spa_minref ||
 	    MUTEX_HELD(&spa_namespace_lock));
 	(void) refcount_remove(&spa->spa_refcount, tag);
 }
 
 /*
  * Check to see if the spa refcount is zero.  Must be called with
  * spa_namespace_lock held.  We really compare against spa_minref, which is the
  * number of references acquired when opening a pool
  */
 boolean_t
 spa_refcount_zero(spa_t *spa)
 {
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	return (refcount_count(&spa->spa_refcount) == spa->spa_minref);
 }
 
 /*
  * ==========================================================================
  * SPA spare and l2cache tracking
  * ==========================================================================
  */
 
 /*
  * Hot spares and cache devices are tracked using the same code below,
  * for 'auxiliary' devices.
  */
 
 typedef struct spa_aux {
 	uint64_t	aux_guid;
 	uint64_t	aux_pool;
 	avl_node_t	aux_avl;
 	int		aux_count;
 } spa_aux_t;
 
 static int
 spa_aux_compare(const void *a, const void *b)
 {
 	const spa_aux_t *sa = a;
 	const spa_aux_t *sb = b;
 
 	if (sa->aux_guid < sb->aux_guid)
 		return (-1);
 	else if (sa->aux_guid > sb->aux_guid)
 		return (1);
 	else
 		return (0);
 }
 
 void
 spa_aux_add(vdev_t *vd, avl_tree_t *avl)
 {
 	avl_index_t where;
 	spa_aux_t search;
 	spa_aux_t *aux;
 
 	search.aux_guid = vd->vdev_guid;
 	if ((aux = avl_find(avl, &search, &where)) != NULL) {
 		aux->aux_count++;
 	} else {
 		aux = kmem_zalloc(sizeof (spa_aux_t), KM_SLEEP);
 		aux->aux_guid = vd->vdev_guid;
 		aux->aux_count = 1;
 		avl_insert(avl, aux, where);
 	}
 }
 
 void
 spa_aux_remove(vdev_t *vd, avl_tree_t *avl)
 {
 	spa_aux_t search;
 	spa_aux_t *aux;
 	avl_index_t where;
 
 	search.aux_guid = vd->vdev_guid;
 	aux = avl_find(avl, &search, &where);
 
 	ASSERT(aux != NULL);
 
 	if (--aux->aux_count == 0) {
 		avl_remove(avl, aux);
 		kmem_free(aux, sizeof (spa_aux_t));
 	} else if (aux->aux_pool == spa_guid(vd->vdev_spa)) {
 		aux->aux_pool = 0ULL;
 	}
 }
 
 boolean_t
 spa_aux_exists(uint64_t guid, uint64_t *pool, int *refcnt, avl_tree_t *avl)
 {
 	spa_aux_t search, *found;
 
 	search.aux_guid = guid;
 	found = avl_find(avl, &search, NULL);
 
 	if (pool) {
 		if (found)
 			*pool = found->aux_pool;
 		else
 			*pool = 0ULL;
 	}
 
 	if (refcnt) {
 		if (found)
 			*refcnt = found->aux_count;
 		else
 			*refcnt = 0;
 	}
 
 	return (found != NULL);
 }
 
 void
 spa_aux_activate(vdev_t *vd, avl_tree_t *avl)
 {
 	spa_aux_t search, *found;
 	avl_index_t where;
 
 	search.aux_guid = vd->vdev_guid;
 	found = avl_find(avl, &search, &where);
 	ASSERT(found != NULL);
 	ASSERT(found->aux_pool == 0ULL);
 
 	found->aux_pool = spa_guid(vd->vdev_spa);
 }
 
 /*
  * Spares are tracked globally due to the following constraints:
  *
  * 	- A spare may be part of multiple pools.
  * 	- A spare may be added to a pool even if it's actively in use within
  *	  another pool.
  * 	- A spare in use in any pool can only be the source of a replacement if
  *	  the target is a spare in the same pool.
  *
  * We keep track of all spares on the system through the use of a reference
  * counted AVL tree.  When a vdev is added as a spare, or used as a replacement
  * spare, then we bump the reference count in the AVL tree.  In addition, we set
  * the 'vdev_isspare' member to indicate that the device is a spare (active or
  * inactive).  When a spare is made active (used to replace a device in the
  * pool), we also keep track of which pool its been made a part of.
  *
  * The 'spa_spare_lock' protects the AVL tree.  These functions are normally
  * called under the spa_namespace lock as part of vdev reconfiguration.  The
  * separate spare lock exists for the status query path, which does not need to
  * be completely consistent with respect to other vdev configuration changes.
  */
 
 static int
 spa_spare_compare(const void *a, const void *b)
 {
 	return (spa_aux_compare(a, b));
 }
 
 void
 spa_spare_add(vdev_t *vd)
 {
 	mutex_enter(&spa_spare_lock);
 	ASSERT(!vd->vdev_isspare);
 	spa_aux_add(vd, &spa_spare_avl);
 	vd->vdev_isspare = B_TRUE;
 	mutex_exit(&spa_spare_lock);
 }
 
 void
 spa_spare_remove(vdev_t *vd)
 {
 	mutex_enter(&spa_spare_lock);
 	ASSERT(vd->vdev_isspare);
 	spa_aux_remove(vd, &spa_spare_avl);
 	vd->vdev_isspare = B_FALSE;
 	mutex_exit(&spa_spare_lock);
 }
 
 boolean_t
 spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt)
 {
 	boolean_t found;
 
 	mutex_enter(&spa_spare_lock);
 	found = spa_aux_exists(guid, pool, refcnt, &spa_spare_avl);
 	mutex_exit(&spa_spare_lock);
 
 	return (found);
 }
 
 void
 spa_spare_activate(vdev_t *vd)
 {
 	mutex_enter(&spa_spare_lock);
 	ASSERT(vd->vdev_isspare);
 	spa_aux_activate(vd, &spa_spare_avl);
 	mutex_exit(&spa_spare_lock);
 }
 
 /*
  * Level 2 ARC devices are tracked globally for the same reasons as spares.
  * Cache devices currently only support one pool per cache device, and so
  * for these devices the aux reference count is currently unused beyond 1.
  */
 
 static int
 spa_l2cache_compare(const void *a, const void *b)
 {
 	return (spa_aux_compare(a, b));
 }
 
 void
 spa_l2cache_add(vdev_t *vd)
 {
 	mutex_enter(&spa_l2cache_lock);
 	ASSERT(!vd->vdev_isl2cache);
 	spa_aux_add(vd, &spa_l2cache_avl);
 	vd->vdev_isl2cache = B_TRUE;
 	mutex_exit(&spa_l2cache_lock);
 }
 
 void
 spa_l2cache_remove(vdev_t *vd)
 {
 	mutex_enter(&spa_l2cache_lock);
 	ASSERT(vd->vdev_isl2cache);
 	spa_aux_remove(vd, &spa_l2cache_avl);
 	vd->vdev_isl2cache = B_FALSE;
 	mutex_exit(&spa_l2cache_lock);
 }
 
 boolean_t
 spa_l2cache_exists(uint64_t guid, uint64_t *pool)
 {
 	boolean_t found;
 
 	mutex_enter(&spa_l2cache_lock);
 	found = spa_aux_exists(guid, pool, NULL, &spa_l2cache_avl);
 	mutex_exit(&spa_l2cache_lock);
 
 	return (found);
 }
 
 void
 spa_l2cache_activate(vdev_t *vd)
 {
 	mutex_enter(&spa_l2cache_lock);
 	ASSERT(vd->vdev_isl2cache);
 	spa_aux_activate(vd, &spa_l2cache_avl);
 	mutex_exit(&spa_l2cache_lock);
 }
 
 /*
  * ==========================================================================
  * SPA vdev locking
  * ==========================================================================
  */
 
 /*
  * Lock the given spa_t for the purpose of adding or removing a vdev.
  * Grabs the global spa_namespace_lock plus the spa config lock for writing.
  * It returns the next transaction group for the spa_t.
  */
 uint64_t
 spa_vdev_enter(spa_t *spa)
 {
 	mutex_enter(&spa->spa_vdev_top_lock);
 	mutex_enter(&spa_namespace_lock);
 	return (spa_vdev_config_enter(spa));
 }
 
 /*
  * Internal implementation for spa_vdev_enter().  Used when a vdev
  * operation requires multiple syncs (i.e. removing a device) while
  * keeping the spa_namespace_lock held.
  */
 uint64_t
 spa_vdev_config_enter(spa_t *spa)
 {
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
 
 	return (spa_last_synced_txg(spa) + 1);
 }
 
 /*
  * Used in combination with spa_vdev_config_enter() to allow the syncing
  * of multiple transactions without releasing the spa_namespace_lock.
  */
 void
 spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag)
 {
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	int config_changed = B_FALSE;
 
 	ASSERT(txg > spa_last_synced_txg(spa));
 
 	spa->spa_pending_vdev = NULL;
 
 	/*
 	 * Reassess the DTLs.
 	 */
 	vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE);
 
 	if (error == 0 && !list_is_empty(&spa->spa_config_dirty_list)) {
 		config_changed = B_TRUE;
 		spa->spa_config_generation++;
 	}
 
 	/*
 	 * Verify the metaslab classes.
 	 */
 	ASSERT(metaslab_class_validate(spa_normal_class(spa)) == 0);
 	ASSERT(metaslab_class_validate(spa_log_class(spa)) == 0);
 
 	spa_config_exit(spa, SCL_ALL, spa);
 
 	/*
 	 * Panic the system if the specified tag requires it.  This
 	 * is useful for ensuring that configurations are updated
 	 * transactionally.
 	 */
 	if (zio_injection_enabled)
 		zio_handle_panic_injection(spa, tag, 0);
 
 	/*
 	 * Note: this txg_wait_synced() is important because it ensures
 	 * that there won't be more than one config change per txg.
 	 * This allows us to use the txg as the generation number.
 	 */
 	if (error == 0)
 		txg_wait_synced(spa->spa_dsl_pool, txg);
 
 	if (vd != NULL) {
 		ASSERT(!vd->vdev_detached || vd->vdev_dtl_sm == NULL);
 		spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
 		vdev_free(vd);
 		spa_config_exit(spa, SCL_ALL, spa);
 	}
 
 	/*
 	 * If the config changed, update the config cache.
 	 */
 	if (config_changed)
 		spa_config_sync(spa, B_FALSE, B_TRUE);
 }
 
 /*
  * Unlock the spa_t after adding or removing a vdev.  Besides undoing the
  * locking of spa_vdev_enter(), we also want make sure the transactions have
  * synced to disk, and then update the global configuration cache with the new
  * information.
  */
 int
 spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error)
 {
 	spa_vdev_config_exit(spa, vd, txg, error, FTAG);
 	mutex_exit(&spa_namespace_lock);
 	mutex_exit(&spa->spa_vdev_top_lock);
 
 	return (error);
 }
 
 /*
  * Lock the given spa_t for the purpose of changing vdev state.
  */
 void
 spa_vdev_state_enter(spa_t *spa, int oplocks)
 {
 	int locks = SCL_STATE_ALL | oplocks;
 
 	/*
 	 * Root pools may need to read of the underlying devfs filesystem
 	 * when opening up a vdev.  Unfortunately if we're holding the
 	 * SCL_ZIO lock it will result in a deadlock when we try to issue
 	 * the read from the root filesystem.  Instead we "prefetch"
 	 * the associated vnodes that we need prior to opening the
 	 * underlying devices and cache them so that we can prevent
 	 * any I/O when we are doing the actual open.
 	 */
 	if (spa_is_root(spa)) {
 		int low = locks & ~(SCL_ZIO - 1);
 		int high = locks & ~low;
 
 		spa_config_enter(spa, high, spa, RW_WRITER);
 		vdev_hold(spa->spa_root_vdev);
 		spa_config_enter(spa, low, spa, RW_WRITER);
 	} else {
 		spa_config_enter(spa, locks, spa, RW_WRITER);
 	}
 	spa->spa_vdev_locks = locks;
 }
 
 int
 spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
 {
 	boolean_t config_changed = B_FALSE;
 
 	if (vd != NULL || error == 0)
 		vdev_dtl_reassess(vd ? vd->vdev_top : spa->spa_root_vdev,
 		    0, 0, B_FALSE);
 
 	if (vd != NULL) {
 		vdev_state_dirty(vd->vdev_top);
 		config_changed = B_TRUE;
 		spa->spa_config_generation++;
 	}
 
 	if (spa_is_root(spa))
 		vdev_rele(spa->spa_root_vdev);
 
 	ASSERT3U(spa->spa_vdev_locks, >=, SCL_STATE_ALL);
 	spa_config_exit(spa, spa->spa_vdev_locks, spa);
 
 	/*
 	 * If anything changed, wait for it to sync.  This ensures that,
 	 * from the system administrator's perspective, zpool(1M) commands
 	 * are synchronous.  This is important for things like zpool offline:
 	 * when the command completes, you expect no further I/O from ZFS.
 	 */
 	if (vd != NULL)
 		txg_wait_synced(spa->spa_dsl_pool, 0);
 
 	/*
 	 * If the config changed, update the config cache.
 	 */
 	if (config_changed) {
 		mutex_enter(&spa_namespace_lock);
 		spa_config_sync(spa, B_FALSE, B_TRUE);
 		mutex_exit(&spa_namespace_lock);
 	}
 
 	return (error);
 }
 
 /*
  * ==========================================================================
  * Miscellaneous functions
  * ==========================================================================
  */
 
 void
 spa_activate_mos_feature(spa_t *spa, const char *feature)
 {
 	(void) nvlist_add_boolean(spa->spa_label_features, feature);
 	vdev_config_dirty(spa->spa_root_vdev);
 }
 
 void
 spa_deactivate_mos_feature(spa_t *spa, const char *feature)
 {
 	(void) nvlist_remove_all(spa->spa_label_features, feature);
 	vdev_config_dirty(spa->spa_root_vdev);
 }
 
 /*
  * Rename a spa_t.
  */
 int
 spa_rename(const char *name, const char *newname)
 {
 	spa_t *spa;
 	int err;
 
 	/*
 	 * Lookup the spa_t and grab the config lock for writing.  We need to
 	 * actually open the pool so that we can sync out the necessary labels.
 	 * It's OK to call spa_open() with the namespace lock held because we
 	 * allow recursive calls for other reasons.
 	 */
 	mutex_enter(&spa_namespace_lock);
 	if ((err = spa_open(name, &spa, FTAG)) != 0) {
 		mutex_exit(&spa_namespace_lock);
 		return (err);
 	}
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 
 	avl_remove(&spa_namespace_avl, spa);
 	(void) strlcpy(spa->spa_name, newname, sizeof (spa->spa_name));
 	avl_add(&spa_namespace_avl, spa);
 
 	/*
 	 * Sync all labels to disk with the new names by marking the root vdev
 	 * dirty and waiting for it to sync.  It will pick up the new pool name
 	 * during the sync.
 	 */
 	vdev_config_dirty(spa->spa_root_vdev);
 
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	txg_wait_synced(spa->spa_dsl_pool, 0);
 
 	/*
 	 * Sync the updated config cache.
 	 */
 	spa_config_sync(spa, B_FALSE, B_TRUE);
 
 	spa_close(spa, FTAG);
 
 	mutex_exit(&spa_namespace_lock);
 
 	return (0);
 }
 
 /*
  * Return the spa_t associated with given pool_guid, if it exists.  If
  * device_guid is non-zero, determine whether the pool exists *and* contains
  * a device with the specified device_guid.
  */
 spa_t *
 spa_by_guid(uint64_t pool_guid, uint64_t device_guid)
 {
 	spa_t *spa;
 	avl_tree_t *t = &spa_namespace_avl;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	for (spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) {
 		if (spa->spa_state == POOL_STATE_UNINITIALIZED)
 			continue;
 		if (spa->spa_root_vdev == NULL)
 			continue;
 		if (spa_guid(spa) == pool_guid) {
 			if (device_guid == 0)
 				break;
 
 			if (vdev_lookup_by_guid(spa->spa_root_vdev,
 			    device_guid) != NULL)
 				break;
 
 			/*
 			 * Check any devices we may be in the process of adding.
 			 */
 			if (spa->spa_pending_vdev) {
 				if (vdev_lookup_by_guid(spa->spa_pending_vdev,
 				    device_guid) != NULL)
 					break;
 			}
 		}
 	}
 
 	return (spa);
 }
 
 /*
  * Determine whether a pool with the given pool_guid exists.
  */
 boolean_t
 spa_guid_exists(uint64_t pool_guid, uint64_t device_guid)
 {
 	return (spa_by_guid(pool_guid, device_guid) != NULL);
 }
 
 char *
 spa_strdup(const char *s)
 {
 	size_t len;
 	char *new;
 
 	len = strlen(s);
 	new = kmem_alloc(len + 1, KM_SLEEP);
 	bcopy(s, new, len);
 	new[len] = '\0';
 
 	return (new);
 }
 
 void
 spa_strfree(char *s)
 {
 	kmem_free(s, strlen(s) + 1);
 }
 
 uint64_t
 spa_get_random(uint64_t range)
 {
 	uint64_t r;
 
 	ASSERT(range != 0);
 
 	(void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t));
 
 	return (r % range);
 }
 
 uint64_t
 spa_generate_guid(spa_t *spa)
 {
 	uint64_t guid = spa_get_random(-1ULL);
 
 	if (spa != NULL) {
 		while (guid == 0 || spa_guid_exists(spa_guid(spa), guid))
 			guid = spa_get_random(-1ULL);
 	} else {
 		while (guid == 0 || spa_guid_exists(guid, 0))
 			guid = spa_get_random(-1ULL);
 	}
 
 	return (guid);
 }
 
 void
 sprintf_blkptr(char *buf, const blkptr_t *bp)
 {
 	char type[256];
 	char *checksum = NULL;
 	char *compress = NULL;
 
 	if (bp != NULL) {
 		if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) {
 			dmu_object_byteswap_t bswap =
 			    DMU_OT_BYTESWAP(BP_GET_TYPE(bp));
 			(void) snprintf(type, sizeof (type), "bswap %s %s",
 			    DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) ?
 			    "metadata" : "data",
 			    dmu_ot_byteswap[bswap].ob_name);
 		} else {
 			(void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name,
 			    sizeof (type));
 		}
 		checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name;
 		compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
 	}
 
 	SPRINTF_BLKPTR(snprintf, ' ', buf, bp, type, checksum, compress);
 }
 
 void
 spa_freeze(spa_t *spa)
 {
 	uint64_t freeze_txg = 0;
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	if (spa->spa_freeze_txg == UINT64_MAX) {
 		freeze_txg = spa_last_synced_txg(spa) + TXG_SIZE;
 		spa->spa_freeze_txg = freeze_txg;
 	}
 	spa_config_exit(spa, SCL_ALL, FTAG);
 	if (freeze_txg != 0)
 		txg_wait_synced(spa_get_dsl(spa), freeze_txg);
 }
 
 void
 zfs_panic_recover(const char *fmt, ...)
 {
 	va_list adx;
 
 	va_start(adx, fmt);
 	vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx);
 	va_end(adx);
 }
 
 /*
  * This is a stripped-down version of strtoull, suitable only for converting
  * lowercase hexadecimal numbers that don't overflow.
  */
 uint64_t
 zfs_strtonum(const char *str, char **nptr)
 {
 	uint64_t val = 0;
 	char c;
 	int digit;
 
 	while ((c = *str) != '\0') {
 		if (c >= '0' && c <= '9')
 			digit = c - '0';
 		else if (c >= 'a' && c <= 'f')
 			digit = 10 + c - 'a';
 		else
 			break;
 
 		val *= 16;
 		val += digit;
 
 		str++;
 	}
 
 	if (nptr)
 		*nptr = (char *)str;
 
 	return (val);
 }
 
 /*
  * ==========================================================================
  * Accessor functions
  * ==========================================================================
  */
 
 boolean_t
 spa_shutting_down(spa_t *spa)
 {
 	return (spa->spa_async_suspended);
 }
 
 dsl_pool_t *
 spa_get_dsl(spa_t *spa)
 {
 	return (spa->spa_dsl_pool);
 }
 
 boolean_t
 spa_is_initializing(spa_t *spa)
 {
 	return (spa->spa_is_initializing);
 }
 
 blkptr_t *
 spa_get_rootblkptr(spa_t *spa)
 {
 	return (&spa->spa_ubsync.ub_rootbp);
 }
 
 void
 spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp)
 {
 	spa->spa_uberblock.ub_rootbp = *bp;
 }
 
 void
 spa_altroot(spa_t *spa, char *buf, size_t buflen)
 {
 	if (spa->spa_root == NULL)
 		buf[0] = '\0';
 	else
 		(void) strncpy(buf, spa->spa_root, buflen);
 }
 
 int
 spa_sync_pass(spa_t *spa)
 {
 	return (spa->spa_sync_pass);
 }
 
 char *
 spa_name(spa_t *spa)
 {
 	return (spa->spa_name);
 }
 
 uint64_t
 spa_guid(spa_t *spa)
 {
 	dsl_pool_t *dp = spa_get_dsl(spa);
 	uint64_t guid;
 
 	/*
 	 * If we fail to parse the config during spa_load(), we can go through
 	 * the error path (which posts an ereport) and end up here with no root
 	 * vdev.  We stash the original pool guid in 'spa_config_guid' to handle
 	 * this case.
 	 */
 	if (spa->spa_root_vdev == NULL)
 		return (spa->spa_config_guid);
 
 	guid = spa->spa_last_synced_guid != 0 ?
 	    spa->spa_last_synced_guid : spa->spa_root_vdev->vdev_guid;
 
 	/*
 	 * Return the most recently synced out guid unless we're
 	 * in syncing context.
 	 */
 	if (dp && dsl_pool_sync_context(dp))
 		return (spa->spa_root_vdev->vdev_guid);
 	else
 		return (guid);
 }
 
 uint64_t
 spa_load_guid(spa_t *spa)
 {
 	/*
 	 * This is a GUID that exists solely as a reference for the
 	 * purposes of the arc.  It is generated at load time, and
 	 * is never written to persistent storage.
 	 */
 	return (spa->spa_load_guid);
 }
 
 uint64_t
 spa_last_synced_txg(spa_t *spa)
 {
 	return (spa->spa_ubsync.ub_txg);
 }
 
 uint64_t
 spa_first_txg(spa_t *spa)
 {
 	return (spa->spa_first_txg);
 }
 
 uint64_t
 spa_syncing_txg(spa_t *spa)
 {
 	return (spa->spa_syncing_txg);
 }
 
 pool_state_t
 spa_state(spa_t *spa)
 {
 	return (spa->spa_state);
 }
 
 spa_load_state_t
 spa_load_state(spa_t *spa)
 {
 	return (spa->spa_load_state);
 }
 
 uint64_t
 spa_freeze_txg(spa_t *spa)
 {
 	return (spa->spa_freeze_txg);
 }
 
 /* ARGSUSED */
 uint64_t
 spa_get_asize(spa_t *spa, uint64_t lsize)
 {
 	return (lsize * spa_asize_inflation);
 }
 
 uint64_t
 spa_get_dspace(spa_t *spa)
 {
 	return (spa->spa_dspace);
 }
 
 void
 spa_update_dspace(spa_t *spa)
 {
 	spa->spa_dspace = metaslab_class_get_dspace(spa_normal_class(spa)) +
 	    ddt_get_dedup_dspace(spa);
 }
 
 /*
  * Return the failure mode that has been set to this pool. The default
  * behavior will be to block all I/Os when a complete failure occurs.
  */
 uint8_t
 spa_get_failmode(spa_t *spa)
 {
 	return (spa->spa_failmode);
 }
 
 boolean_t
 spa_suspended(spa_t *spa)
 {
 	return (spa->spa_suspended);
 }
 
 uint64_t
 spa_version(spa_t *spa)
 {
 	return (spa->spa_ubsync.ub_version);
 }
 
 boolean_t
 spa_deflate(spa_t *spa)
 {
 	return (spa->spa_deflate);
 }
 
 metaslab_class_t *
 spa_normal_class(spa_t *spa)
 {
 	return (spa->spa_normal_class);
 }
 
 metaslab_class_t *
 spa_log_class(spa_t *spa)
 {
 	return (spa->spa_log_class);
 }
 
 int
 spa_max_replication(spa_t *spa)
 {
 	/*
 	 * As of SPA_VERSION == SPA_VERSION_DITTO_BLOCKS, we are able to
 	 * handle BPs with more than one DVA allocated.  Set our max
 	 * replication level accordingly.
 	 */
 	if (spa_version(spa) < SPA_VERSION_DITTO_BLOCKS)
 		return (1);
 	return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override));
 }
 
 int
 spa_prev_software_version(spa_t *spa)
 {
 	return (spa->spa_prev_software_version);
 }
 
 uint64_t
 spa_deadman_synctime(spa_t *spa)
 {
 	return (spa->spa_deadman_synctime);
 }
 
 uint64_t
 dva_get_dsize_sync(spa_t *spa, const dva_t *dva)
 {
 	uint64_t asize = DVA_GET_ASIZE(dva);
 	uint64_t dsize = asize;
 
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
 
 	if (asize != 0 && spa->spa_deflate) {
 		vdev_t *vd = vdev_lookup_top(spa, DVA_GET_VDEV(dva));
 		dsize = (asize >> SPA_MINBLOCKSHIFT) * vd->vdev_deflate_ratio;
 	}
 
 	return (dsize);
 }
 
 uint64_t
 bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp)
 {
 	uint64_t dsize = 0;
 
 	for (int d = 0; d < SPA_DVAS_PER_BP; d++)
 		dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]);
 
 	return (dsize);
 }
 
 uint64_t
 bp_get_dsize(spa_t *spa, const blkptr_t *bp)
 {
 	uint64_t dsize = 0;
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
 	for (int d = 0; d < SPA_DVAS_PER_BP; d++)
 		dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]);
 
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	return (dsize);
 }
 
 /*
  * ==========================================================================
  * Initialization and Termination
  * ==========================================================================
  */
 
 static int
 spa_name_compare(const void *a1, const void *a2)
 {
 	const spa_t *s1 = a1;
 	const spa_t *s2 = a2;
 	int s;
 
 	s = strcmp(s1->spa_name, s2->spa_name);
 	if (s > 0)
 		return (1);
 	if (s < 0)
 		return (-1);
 	return (0);
 }
 
 int
 spa_busy(void)
 {
 	return (spa_active_count);
 }
 
 void
 spa_boot_init()
 {
 	spa_config_load();
 }
 
 #ifdef _KERNEL
 EVENTHANDLER_DEFINE(mountroot, spa_boot_init, NULL, 0);
 #endif
 
 void
 spa_init(int mode)
 {
 	mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa_l2cache_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&spa_namespace_cv, NULL, CV_DEFAULT, NULL);
 
 	avl_create(&spa_namespace_avl, spa_name_compare, sizeof (spa_t),
 	    offsetof(spa_t, spa_avl));
 
 	avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_aux_t),
 	    offsetof(spa_aux_t, aux_avl));
 
 	avl_create(&spa_l2cache_avl, spa_l2cache_compare, sizeof (spa_aux_t),
 	    offsetof(spa_aux_t, aux_avl));
 
 	spa_mode_global = mode;
 
 #ifdef illumos
 #ifdef _KERNEL
 	spa_arch_init();
 #else
 	if (spa_mode_global != FREAD && dprintf_find_string("watch")) {
 		arc_procfd = open("/proc/self/ctl", O_WRONLY);
 		if (arc_procfd == -1) {
 			perror("could not enable watchpoints: "
 			    "opening /proc/self/ctl failed: ");
 		} else {
 			arc_watch = B_TRUE;
 		}
 	}
 #endif
 #endif /* illumos */
 	refcount_sysinit();
 	unique_init();
 	range_tree_init();
 	zio_init();
+	lz4_init();
 	dmu_init();
 	zil_init();
 	vdev_cache_stat_init();
 	zfs_prop_init();
 	zpool_prop_init();
 	zpool_feature_init();
 	spa_config_load();
 	l2arc_start();
 #ifndef illumos
 #ifdef _KERNEL
 	zfs_deadman_init();
 #endif
 #endif	/* !illumos */
 }
 
 void
 spa_fini(void)
 {
 	l2arc_stop();
 
 	spa_evict_all();
 
 	vdev_cache_stat_fini();
 	zil_fini();
 	dmu_fini();
+	lz4_fini();
 	zio_fini();
 	range_tree_fini();
 	unique_fini();
 	refcount_fini();
 
 	avl_destroy(&spa_namespace_avl);
 	avl_destroy(&spa_spare_avl);
 	avl_destroy(&spa_l2cache_avl);
 
 	cv_destroy(&spa_namespace_cv);
 	mutex_destroy(&spa_namespace_lock);
 	mutex_destroy(&spa_spare_lock);
 	mutex_destroy(&spa_l2cache_lock);
 }
 
 /*
  * Return whether this pool has slogs. No locking needed.
  * It's not a problem if the wrong answer is returned as it's only for
  * performance and not correctness
  */
 boolean_t
 spa_has_slogs(spa_t *spa)
 {
 	return (spa->spa_log_class->mc_rotor != NULL);
 }
 
 spa_log_state_t
 spa_get_log_state(spa_t *spa)
 {
 	return (spa->spa_log_state);
 }
 
 void
 spa_set_log_state(spa_t *spa, spa_log_state_t state)
 {
 	spa->spa_log_state = state;
 }
 
 boolean_t
 spa_is_root(spa_t *spa)
 {
 	return (spa->spa_is_root);
 }
 
 boolean_t
 spa_writeable(spa_t *spa)
 {
 	return (!!(spa->spa_mode & FWRITE));
 }
 
 int
 spa_mode(spa_t *spa)
 {
 	return (spa->spa_mode);
 }
 
 uint64_t
 spa_bootfs(spa_t *spa)
 {
 	return (spa->spa_bootfs);
 }
 
 uint64_t
 spa_delegation(spa_t *spa)
 {
 	return (spa->spa_delegation);
 }
 
 objset_t *
 spa_meta_objset(spa_t *spa)
 {
 	return (spa->spa_meta_objset);
 }
 
 enum zio_checksum
 spa_dedup_checksum(spa_t *spa)
 {
 	return (spa->spa_dedup_checksum);
 }
 
 /*
  * Reset pool scan stat per scan pass (or reboot).
  */
 void
 spa_scan_stat_init(spa_t *spa)
 {
 	/* data not stored on disk */
 	spa->spa_scan_pass_start = gethrestime_sec();
 	spa->spa_scan_pass_exam = 0;
 	vdev_scan_stat_init(spa->spa_root_vdev);
 }
 
 /*
  * Get scan stats for zpool status reports
  */
 int
 spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps)
 {
 	dsl_scan_t *scn = spa->spa_dsl_pool ? spa->spa_dsl_pool->dp_scan : NULL;
 
 	if (scn == NULL || scn->scn_phys.scn_func == POOL_SCAN_NONE)
 		return (SET_ERROR(ENOENT));
 	bzero(ps, sizeof (pool_scan_stat_t));
 
 	/* data stored on disk */
 	ps->pss_func = scn->scn_phys.scn_func;
 	ps->pss_start_time = scn->scn_phys.scn_start_time;
 	ps->pss_end_time = scn->scn_phys.scn_end_time;
 	ps->pss_to_examine = scn->scn_phys.scn_to_examine;
 	ps->pss_examined = scn->scn_phys.scn_examined;
 	ps->pss_to_process = scn->scn_phys.scn_to_process;
 	ps->pss_processed = scn->scn_phys.scn_processed;
 	ps->pss_errors = scn->scn_phys.scn_errors;
 	ps->pss_state = scn->scn_phys.scn_state;
 
 	/* data not stored on disk */
 	ps->pss_pass_start = spa->spa_scan_pass_start;
 	ps->pss_pass_exam = spa->spa_scan_pass_exam;
 
 	return (0);
 }
 
 boolean_t
 spa_debug_enabled(spa_t *spa)
 {
 	return (spa->spa_debug);
 }
Index: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h
===================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h	(revision 262163)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_compress.h	(revision 262164)
@@ -1,90 +1,92 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 /*
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  */
 
 #ifndef _SYS_ZIO_COMPRESS_H
 #define	_SYS_ZIO_COMPRESS_H
 
 #include <sys/zio.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 /* Common signature for all zio compress functions. */
 typedef size_t zio_compress_func_t(void *src, void *dst,
     size_t s_len, size_t d_len, int);
 /* Common signature for all zio decompress functions. */
 typedef int zio_decompress_func_t(void *src, void *dst,
     size_t s_len, size_t d_len, int);
 
 /*
  * Information about each compression function.
  */
 typedef struct zio_compress_info {
 	zio_compress_func_t	*ci_compress;	/* compression function */
 	zio_decompress_func_t	*ci_decompress;	/* decompression function */
 	int			ci_level;	/* level parameter */
 	char			*ci_name;	/* algorithm name */
 } zio_compress_info_t;
 
 extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
 
 /*
  * Compression routines.
  */
 extern size_t lzjb_compress(void *src, void *dst, size_t s_len, size_t d_len,
     int level);
 extern int lzjb_decompress(void *src, void *dst, size_t s_len, size_t d_len,
     int level);
 extern size_t gzip_compress(void *src, void *dst, size_t s_len, size_t d_len,
     int level);
 extern int gzip_decompress(void *src, void *dst, size_t s_len, size_t d_len,
     int level);
 extern size_t zle_compress(void *src, void *dst, size_t s_len, size_t d_len,
     int level);
 extern int zle_decompress(void *src, void *dst, size_t s_len, size_t d_len,
     int level);
+extern void lz4_init(void);
+extern void lz4_fini(void);
 extern size_t lz4_compress(void *src, void *dst, size_t s_len, size_t d_len,
     int level);
 extern int lz4_decompress(void *src, void *dst, size_t s_len, size_t d_len,
     int level);
 
 /*
  * Compress and decompress data if necessary.
  */
 extern size_t zio_compress_data(enum zio_compress c, void *src, void *dst,
     size_t s_len, size_t minblocksize);
 extern int zio_decompress_data(enum zio_compress c, void *src, void *dst,
     size_t s_len, size_t d_len);
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_ZIO_COMPRESS_H */
Index: stable/9/sys/cddl/contrib/opensolaris
===================================================================
--- stable/9/sys/cddl/contrib/opensolaris	(revision 262163)
+++ stable/9/sys/cddl/contrib/opensolaris	(revision 262164)

Property changes on: stable/9/sys/cddl/contrib/opensolaris
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/cddl/contrib/opensolaris:r258137
Index: stable/9/sys
===================================================================
--- stable/9/sys	(revision 262163)
+++ stable/9/sys	(revision 262164)

Property changes on: stable/9/sys
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys:r258137
Index: stable/9
===================================================================
--- stable/9	(revision 262163)
+++ stable/9	(revision 262164)

Property changes on: stable/9
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r258137