Index: head/cddl/lib/libzpool/Makefile =================================================================== --- head/cddl/lib/libzpool/Makefile (revision 354252) +++ head/cddl/lib/libzpool/Makefile (revision 354253) @@ -1,78 +1,82 @@ # $FreeBSD$ .include "${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/Makefile.files" # ZFS_COMMON_SRCS .PATH: ${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs # LUA_SRCS .PATH: ${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/lua # ZFS_SHARED_SRCS .PATH: ${SRCTOP}/sys/cddl/contrib/opensolaris/common/zfs +# LZ4_COMMON_SRCS +.PATH: ${SRCTOP}/sys/cddl/contrib/opensolaris/common/lz4 # KERNEL_SRCS .PATH: ${SRCTOP}/cddl/contrib/opensolaris/lib/libzpool/common # LIST_SRCS .PATH: ${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/os # ATOMIC_SRCS .if exists(${SRCTOP}/sys/cddl/contrib/opensolaris/common/atomic/${MACHINE_ARCH}/opensolaris_atomic.S) .PATH: ${SRCTOP}/sys/cddl/contrib/opensolaris/common/atomic/${MACHINE_ARCH} ATOMIC_SRCS= opensolaris_atomic.S .if ${MACHINE_ARCH} != "sparc64" ACFLAGS+= -Wa,--noexecstack .endif .else .PATH: ${SRCTOP}/sys/cddl/compat/opensolaris/kern ATOMIC_SRCS= opensolaris_atomic.c .endif # UNICODE_SRCS .PATH: ${SRCTOP}/sys/cddl/contrib/opensolaris/common/unicode # LIBCMDUTILS_SRCS .PATH: ${SRCTOP}/cddl/contrib/opensolaris/lib/libcmdutils/common LIB= zpool ZFS_COMMON_SRCS= ${ZFS_COMMON_OBJS:C/.o$/.c/} trim_map.c ZFS_SHARED_SRCS= ${ZFS_SHARED_OBJS:C/.o$/.c/} +LZ4_COMMON_SRCS= lz4.c LUA_SRCS= ${LUA_OBJS:C/.o$/.c/} KERNEL_SRCS= kernel.c taskq.c util.c LIST_SRCS= list.c UNICODE_SRCS= u8_textprep.c LIBCMDUTILS_SRCS=nicenum.c SRCS= ${ZFS_COMMON_SRCS} ${ZFS_SHARED_SRCS} ${LUA_SRCS} \ - ${KERNEL_SRCS} ${LIST_SRCS} ${ATOMIC_SRCS} \ + ${LZ4_COMMON_SRCS} ${KERNEL_SRCS} ${LIST_SRCS} ${ATOMIC_SRCS} \ ${UNICODE_SRCS} ${LIBCMDUTILS_SRCS} WARNS?= 0 CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/lib/libumem CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzpool/common CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/lua CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/common/zfs +CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/common/lz4 CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/head CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libnvpair CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libcmdutils # XXX: pthread doesn't have mutex_owned() equivalent, so we need to look # into libthr private structures. That's sooo evil, but it's only for # ZFS debugging tools needs. CFLAGS+= -DWANTS_MUTEX_OWNED CFLAGS+= -I${SRCTOP}/lib/libpthread/thread CFLAGS+= -I${SRCTOP}/lib/libpthread/sys CFLAGS+= -I${SRCTOP}/lib/libthr/arch/${MACHINE_CPUARCH}/include CFLAGS.gcc+= -fms-extensions LIBADD= md pthread z nvpair avl umem # atomic.S doesn't like profiling. MK_PROFILE= no CSTD= c99 # Since there are many asserts in this library, it makes no sense to compile # it without debugging. CFLAGS+= -g -DDEBUG=1 .include Index: head/stand/efi/boot1/Makefile =================================================================== --- head/stand/efi/boot1/Makefile (revision 354252) +++ head/stand/efi/boot1/Makefile (revision 354253) @@ -1,100 +1,101 @@ # $FreeBSD$ .include BOOT1?= boot1 PROG= ${BOOT1}.sym INTERNALPROG= WARNS= 6 CFLAGS+= -DEFI_BOOT1 # We implement a slightly non-standard %S in that it always takes a # CHAR16 that's common in UEFI-land instead of a wchar_t. This only # seems to matter on arm64 where wchar_t defaults to an int instead # of a short. There's no good cast to use here so just ignore the # warnings for now. CWARNFLAGS.proto.c+= -Wno-format CWARNFLAGS.boot1.c+= -Wno-format # Disable bogus alignment issues CWARNFLAGS.ufs_module.c += -Wno-format CWARNFLAGS.ufs_module.c += -Wno-cast-align # Disable warnings that are currently incompatible with the zfs boot code CWARNFLAGS.zfs_module.c += -Wno-array-bounds CWARNFLAGS.zfs_module.c += -Wno-cast-align CWARNFLAGS.zfs_module.c += -Wno-cast-qual CWARNFLAGS.zfs_module.c += -Wno-missing-prototypes CWARNFLAGS.zfs_module.c += -Wno-sign-compare CWARNFLAGS.zfs_module.c += -Wno-unused-parameter CWARNFLAGS.zfs_module.c += -Wno-unused-function # architecture-specific loader code SRCS+= boot1.c proto.c self_reloc.c start.S ufs_module.c devpath.c .if ${MK_LOADER_ZFS} != "no" SRCS+= zfs_module.c CFLAGS.zfs_module.c+= -I${ZFSSRC} CFLAGS.zfs_module.c+= -I${SYSDIR}/cddl/boot/zfs CFLAGS.zfs_module.c+= -I${SYSDIR}/crypto/skein CFLAGS.zfs_module.c+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common +CFLAGS.zfs_module.c+= -I${SYSDIR}/cddl/contrib/opensolaris/common/lz4 CFLAGS+= -DEFI_ZFS_BOOT .endif .if ${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} > 40201 CWARNFLAGS.self_reloc.c+= -Wno-error=maybe-uninitialized .endif CFLAGS+= -I${EFIINC} CFLAGS+= -I${EFIINCMD} CFLAGS+= -I${SYSDIR}/contrib/dev/acpica/include CFLAGS+= -DEFI_UFS_BOOT .ifdef(EFI_DEBUG) CFLAGS+= -DEFI_DEBUG .endif # Always add MI sources and REGULAR efi loader bits .PATH: ${EFISRC}/loader/arch/${MACHINE} .PATH: ${EFISRC}/loader .PATH: ${LDRSRC} .PATH: ${EFISRC}/libefi CFLAGS+= -I${LDRSRC} FILES= ${BOOT1}.efi FILESMODE_${BOOT1}.efi= ${BINMODE} LDSCRIPT= ${EFISRC}/loader/arch/${MACHINE}/ldscript.${MACHINE} LDFLAGS+= -Wl,-T${LDSCRIPT},-Bsymbolic,-znotext -shared .if ${MACHINE_CPUARCH} == "aarch64" CFLAGS+= -mgeneral-regs-only .endif .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" CFLAGS+= -fPIC LDFLAGS+= -Wl,-znocombreloc .endif LIBEFI= ${BOOTOBJ}/efi/libefi/libefi.a # # Add libstand for the runtime functions used by the compiler - for example # __aeabi_* (arm) or __divdi3 (i386). # as well as required string and memory functions for all platforms. # DPADD+= ${LIBEFI} ${LIBSA} LDADD+= ${LIBEFI} ${LIBSA} DPADD+= ${LDSCRIPT} ${BOOT1}.efi: ${PROG} if ${NM} ${.ALLSRC} | grep ' U '; then \ echo "Undefined symbols in ${.ALLSRC}"; \ exit 1; \ fi SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH} \ ${OBJCOPY} -j .peheader -j .text -j .sdata -j .data \ -j .dynamic -j .dynsym -j .rel.dyn \ -j .rela.dyn -j .reloc -j .eh_frame \ --output-target=${EFI_TARGET} ${.ALLSRC} ${.TARGET} .include Index: head/stand/i386/gptzfsboot/Makefile =================================================================== --- head/stand/i386/gptzfsboot/Makefile (revision 354252) +++ head/stand/i386/gptzfsboot/Makefile (revision 354253) @@ -1,73 +1,74 @@ # $FreeBSD$ .include .PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/gptboot \ ${BOOTSRC}/i386/zfsboot ${BOOTSRC}/i386/common \ ${SASRC} FILES= gptzfsboot MAN= gptzfsboot.8 BOOT_COMCONSOLE_PORT?= 0x3f8 BOOT_COMCONSOLE_SPEED?= 9600 B2SIOFMT?= 0x3 REL1= 0x700 ORG1= 0x7c00 ORG2= 0x0 CFLAGS+=-DBOOTPROG=\"gptzfsboot\" \ -O1 \ -DGPT -DZFS -DBOOT2 \ -DSIOPRT=${BOOT_COMCONSOLE_PORT} \ -DSIOFMT=${B2SIOFMT} \ -DSIOSPD=${BOOT_COMCONSOLE_SPEED} \ -I${LDRSRC} \ -I${BOOTSRC}/i386/common \ -I${ZFSSRC} \ -I${SYSDIR}/crypto/skein \ -I${SYSDIR}/cddl/boot/zfs \ -I${SYSDIR}/cddl/contrib/opensolaris/uts/common \ + -I${SYSDIR}/cddl/contrib/opensolaris/common/lz4 \ -I${BOOTSRC}/i386/btx/lib \ -I${BOOTSRC}/i386/boot2 \ -Wall -Waggregate-return -Wbad-function-cast \ -Wmissing-declarations -Wmissing-prototypes -Wnested-externs \ -Wpointer-arith -Wshadow -Wstrict-prototypes -Wwrite-strings \ -Wno-pointer-sign CFLAGS.clang+= -Wno-tentative-definition-incomplete-type NO_WCAST_ALIGN= CFLAGS.gcc+= --param max-inline-insns-single=100 LD_FLAGS+=${LD_FLAGS_BIN} CLEANFILES+= gptzfsboot gptzfsboot: gptldr.bin gptzfsboot.bin ${BTXKERN} btxld -v -E ${ORG2} -f bin -b ${BTXKERN} -l gptldr.bin \ -o ${.TARGET} gptzfsboot.bin CLEANFILES+= gptldr.bin gptldr.out gptldr.o gptldr.bin: gptldr.out ${OBJCOPY} -S -O binary gptldr.out ${.TARGET} gptldr.out: gptldr.o ${LD} ${LD_FLAGS} -e start -Ttext ${ORG1} -o ${.TARGET} gptldr.o CLEANFILES+= gptzfsboot.bin gptzfsboot.out zfsboot.o sio.o cons.o \ drv.o gpt.o ${OPENCRYPTO_XTS} gptzfsboot.bin: gptzfsboot.out ${OBJCOPY} -S -O binary gptzfsboot.out ${.TARGET} gptzfsboot.out: ${BTXCRT} zfsboot.o sio.o gpt.o drv.o cons.o \ ${OPENCRYPTO_XTS} ${LD} ${LD_FLAGS} -Ttext ${ORG2} -o ${.TARGET} ${.ALLSRC} ${LIBSA32} zfsboot.o: ${ZFSSRC}/zfsimpl.c .include Index: head/stand/i386/zfsboot/Makefile =================================================================== --- head/stand/i386/zfsboot/Makefile (revision 354252) +++ head/stand/i386/zfsboot/Makefile (revision 354253) @@ -1,81 +1,82 @@ # $FreeBSD$ .include .PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/common ${SASRC} FILES= zfsboot MAN= zfsboot.8 BOOT_COMCONSOLE_PORT?= 0x3f8 BOOT_COMCONSOLE_SPEED?= 9600 B2SIOFMT?= 0x3 REL1= 0x700 ORG1= 0x7c00 ORG2= 0x2000 CFLAGS+=-DBOOTPROG=\"zfsboot\" \ -O1 \ -DZFS -DBOOT2 \ -DSIOPRT=${BOOT_COMCONSOLE_PORT} \ -DSIOFMT=${B2SIOFMT} \ -DSIOSPD=${BOOT_COMCONSOLE_SPEED} \ -I${LDRSRC} \ -I${BOOTSRC}/i386/common \ -I${BOOTSRC}/i386 \ -I${ZFSSRC} \ -I${SYSDIR}/crypto/skein \ -I${SYSDIR}/cddl/boot/zfs \ -I${SYSDIR}/cddl/contrib/opensolaris/uts/common \ + -I${SYSDIR}/cddl/contrib/opensolaris/common/lz4 \ -I${BOOTSRC}/i386/boot2 \ -Wall -Waggregate-return -Wbad-function-cast -Wno-cast-align \ -Wmissing-declarations -Wmissing-prototypes -Wnested-externs \ -Wpointer-arith -Wshadow -Wstrict-prototypes -Wwrite-strings CFLAGS.gcc+= --param max-inline-insns-single=100 LD_FLAGS+=${LD_FLAGS_BIN} CLEANFILES+= zfsboot zfsboot: zfsboot1 zfsboot2 cat zfsboot1 zfsboot2 > zfsboot CLEANFILES+= zfsboot1 zfsldr.out zfsldr.o zfsboot1: zfsldr.out ${OBJCOPY} -S -O binary zfsldr.out ${.TARGET} zfsldr.out: zfsldr.o ${LD} ${LD_FLAGS} -e start -Ttext ${ORG1} -o ${.TARGET} zfsldr.o CLEANFILES+= zfsboot2 zfsboot.ld zfsboot.ldr zfsboot.bin zfsboot.out \ zfsboot.o zfsboot.s zfsboot.s.tmp sio.o cons.o drv.o # We currently allow 256k bytes for zfsboot - in practice it could be # any size up to 3.5Mb but keeping it fixed size simplifies zfsldr. # BOOT2SIZE= 262144 zfsboot2: zfsboot.ld @set -- `ls -l ${.ALLSRC}`; x=$$((${BOOT2SIZE}-$$5)); \ echo "$$x bytes available"; test $$x -ge 0 ${DD} if=${.ALLSRC} of=${.TARGET} obs=${BOOT2SIZE} conv=osync zfsboot.ld: zfsboot.ldr zfsboot.bin ${BTXKERN} btxld -v -E ${ORG2} -f bin -b ${BTXKERN} -l zfsboot.ldr \ -o ${.TARGET} -P 1 zfsboot.bin zfsboot.ldr: cp /dev/null ${.TARGET} zfsboot.bin: zfsboot.out ${OBJCOPY} -S -O binary zfsboot.out ${.TARGET} zfsboot.out: ${BTXCRT} zfsboot.o sio.o drv.o cons.o ${LD} ${LD_FLAGS} -Ttext ${ORG2} -o ${.TARGET} ${.ALLSRC} ${LIBSA32} SRCS= zfsboot.c .include Index: head/stand/libsa/Makefile =================================================================== --- head/stand/libsa/Makefile (revision 354252) +++ head/stand/libsa/Makefile (revision 354253) @@ -1,172 +1,177 @@ # $FreeBSD$ # Originally from $NetBSD: Makefile,v 1.21 1997/10/26 22:08:38 lukem Exp $ # # Notes: # - We don't use the libc strerror/sys_errlist because the string table is # quite large. # .include LIBSA_CPUARCH?=${MACHINE_CPUARCH} LIB?= sa # standalone components and stuff we have modified locally SRCS+= gzguts.h zutil.h __main.c abort.c assert.c bcd.c environment.c getopt.c gets.c \ globals.c pager.c panic.c printf.c strdup.c strerror.c \ random.c sbrk.c twiddle.c zalloc.c zalloc_malloc.c # private (pruned) versions of libc string functions SRCS+= strcasecmp.c .PATH: ${LIBCSRC}/net SRCS+= ntoh.c # string functions from libc .PATH: ${LIBCSRC}/string SRCS+= bcmp.c bcopy.c bzero.c ffs.c fls.c \ memccpy.c memchr.c memcmp.c memcpy.c memmove.c memset.c \ strcat.c strchr.c strcmp.c strcpy.c stpcpy.c stpncpy.c \ strcspn.c strlcat.c strlcpy.c strlen.c strncat.c strncmp.c strncpy.c \ strnlen.c strpbrk.c strrchr.c strsep.c strspn.c strstr.c strtok.c swab.c # stdlib functions from libc .PATH: ${LIBCSRC}/stdlib SRCS+= abs.c strtol.c strtoll.c strtoul.c strtoull.c # common boot code .PATH: ${SYSDIR}/kern SRCS+= subr_boot.c .if ${MACHINE_CPUARCH} == "arm" .PATH: ${LIBCSRC}/arm/gen # Do not generate movt/movw, because the relocation fixup for them does not # translate to the -Bsymbolic -pie format required by self_reloc() in loader(8). # Also, the fpu is not available in a standalone environment. .if ${COMPILER_VERSION} < 30800 CFLAGS.clang+= -mllvm -arm-use-movt=0 .else CFLAGS.clang+= -mno-movt .endif CFLAGS.clang+= -mfpu=none .PATH: ${SRCTOP}/contrib/compiler-rt/lib/builtins/arm/ SRCS+= aeabi_idivmod.S aeabi_ldivmod.S aeabi_uidivmod.S aeabi_uldivmod.S SRCS+= aeabi_memcmp.S aeabi_memcpy.S aeabi_memmove.S aeabi_memset.S .endif .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "riscv" .PATH: ${LIBCSRC}/${MACHINE_CPUARCH}/gen .endif # Compiler support functions .PATH: ${SRCTOP}/contrib/compiler-rt/lib/builtins/ # __clzsi2 and ctzsi2 for various builtin functions SRCS+= clzsi2.c ctzsi2.c # Divide and modulus functions called by the compiler SRCS+= divmoddi4.c divmodsi4.c divdi3.c divsi3.c moddi3.c modsi3.c SRCS+= udivmoddi4.c udivmodsi4.c udivdi3.c udivsi3.c umoddi3.c umodsi3.c SRCS+= ashldi3.c ashrdi3.c lshrdi3.c .if ${MACHINE_CPUARCH:Namd64:Ni386} == "" .PATH: ${SASRC}/x86 SRCS+= hypervisor.c .endif .if ${MACHINE_CPUARCH} == "powerpc" SRCS+= syncicache.c .endif # uuid functions from libc .PATH: ${LIBCSRC}/uuid SRCS+= uuid_create_nil.c uuid_equal.c uuid_from_string.c uuid_is_nil.c uuid_to_string.c # _setjmp/_longjmp .PATH: ${SASRC}/${LIBSA_CPUARCH} SRCS+= _setjmp.S # decompression functionality from libbz2 # NOTE: to actually test this functionality after libbz2 upgrade compile # loader(8) with LOADER_BZIP2_SUPPORT defined .PATH: ${SRCTOP}/contrib/bzip2 CFLAGS+= -DBZ_NO_STDIO -DBZ_NO_COMPRESS SRCS+=bzlib.c crctable.c decompress.c huffman.c randtable.c # decompression functionality from zlib .PATH: ${SRCTOP}/sys/contrib/zlib CFLAGS+=-DHAVE_MEMCPY -I${SRCTOP}/sys/contrib/zlib SRCS+= adler32.c crc32.c SRCS+= infback.c inffast.c inflate.c inftrees.c zutil.c +# lz4 decompression functionality +.PATH: ${SRCTOP}/sys/cddl/contrib/opensolaris/common/lz4 +SRCS+= lz4.c +CFLAGS.lz4.c+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/common/lz4 + # Create a subset of includes that are safe, as well as adjusting those that aren't # The lists may drive people nuts, but they are explicitly opt-in FAKE_DIRS=xlocale arpa SAFE_INCS=a.out.h assert.h elf.h limits.h nlist.h setjmp.h stddef.h stdbool.h string.h strings.h time.h unistd.h uuid.h STAND_H_INC=ctype.h fcntl.h signal.h stdio.h stdlib.h OTHER_INC=stdarg.h errno.h stdint.h beforedepend: mkdir -p ${FAKE_DIRS}; \ for i in ${SAFE_INCS}; do \ ln -sf ${SRCTOP}/include/$$i $$i; \ done; \ ln -sf ${SYSDIR}/${MACHINE}/include/stdarg.h stdarg.h; \ ln -sf ${SYSDIR}/sys/errno.h errno.h; \ ln -sf ${SYSDIR}/sys/stdint.h stdint.h; \ ln -sf ${SRCTOP}/include/arpa/inet.h arpa/inet.h; \ ln -sf ${SRCTOP}/include/arpa/tftp.h arpa/tftp.h; \ for i in _time.h _strings.h _string.h; do \ [ -f xlocale/$$i ] || cp /dev/null xlocale/$$i; \ done; \ for i in ${STAND_H_INC}; do \ ln -sf ${SASRC}/stand.h $$i; \ done CLEANDIRS+=${FAKE_DIRS} CLEANFILES+= ${SAFE_INCS} ${STAND_H_INC} ${OTHER_INC} # io routines SRCS+= closeall.c dev.c ioctl.c nullfs.c stat.c \ fstat.c close.c lseek.c open.c read.c write.c readdir.c # network routines SRCS+= arp.c ether.c ip.c inet_ntoa.c in_cksum.c net.c udp.c netif.c rpc.c # network info services: SRCS+= bootp.c rarp.c bootparam.c # boot filesystems SRCS+= ufs.c nfs.c cd9660.c tftp.c gzipfs.c bzipfs.c SRCS+= dosfs.c ext2fs.c SRCS+= splitfs.c SRCS+= pkgfs.c # kernel ufs support .PATH: ${SRCTOP}/sys/ufs/ffs SRCS+=ffs_subr.c ffs_tables.c CFLAGS.bzipfs.c+= -I${SRCTOP}/contrib/bzip2 # explicit_bzero and calculate_crc32c .PATH: ${SYSDIR}/libkern SRCS+= explicit_bzero.c crc32_libkern.c # Maybe GELI .if ${MK_LOADER_GELI} == "yes" .include "${SASRC}/geli/Makefile.inc" .endif .if ${MK_LOADER_VERIEXEC} == "yes" && ${MK_BEARSSL} == "yes" .include "${SRCTOP}/lib/libbearssl/Makefile.libsa.inc" .include "${SRCTOP}/lib/libsecureboot/Makefile.libsa.inc" .endif # Maybe ZFS .if ${MK_LOADER_ZFS} == "yes" .include "${SASRC}/zfs/Makefile.inc" .endif .include Index: head/stand/libsa/zfs/Makefile.inc =================================================================== --- head/stand/libsa/zfs/Makefile.inc (revision 354252) +++ head/stand/libsa/zfs/Makefile.inc (revision 354253) @@ -1,15 +1,17 @@ # $FreeBSD$ .PATH: ${ZFSSRC} SRCS+= zfs.c skein.c skein_block.c list.c # Do not unroll skein loops, reduce code size CFLAGS+= -DSKEIN_LOOP=111 .PATH: ${SYSDIR}/crypto/skein .PATH: ${SYSDIR}/cddl/contrib/opensolaris/uts/common/os CFLAGS+= -I${LDRSRC} CFLAGS+= -I${SYSDIR}/cddl/boot/zfs CFLAGS+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common CFLAGS+= -I${SYSDIR}/crypto/skein +CFLAGS.zfs.c+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/common/lz4 + CFLAGS+= -Wformat -Wall Index: head/sys/cddl/boot/zfs/zfssubr.c =================================================================== --- head/sys/cddl/boot/zfs/zfssubr.c (revision 354252) +++ head/sys/cddl/boot/zfs/zfssubr.c (revision 354253) @@ -1,1788 +1,1789 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include __FBSDID("$FreeBSD$"); +#include + static uint64_t zfs_crc64_table[256]; #define ECKSUM 666 #define ASSERT3S(x, y, z) ((void)0) #define ASSERT3U(x, y, z) ((void)0) #define ASSERT3P(x, y, z) ((void)0) #define ASSERT0(x) ((void)0) #define ASSERT(x) ((void)0) #define panic(...) do { \ printf(__VA_ARGS__); \ for (;;) ; \ } while (0) #define kmem_alloc(size, flag) zfs_alloc((size)) #define kmem_free(ptr, size) zfs_free((ptr), (size)) static void zfs_init_crc(void) { int i, j; uint64_t *ct; /* * Calculate the crc64 table (used for the zap hash * function). */ if (zfs_crc64_table[128] != ZFS_CRC64_POLY) { memset(zfs_crc64_table, 0, sizeof(zfs_crc64_table)); for (i = 0; i < 256; i++) for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--) *ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY); } } static void zio_checksum_off(const void *buf, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); } /* * Signature for checksum functions. */ typedef void zio_checksum_t(const void *data, uint64_t size, const void *ctx_template, zio_cksum_t *zcp); typedef void *zio_checksum_tmpl_init_t(const zio_cksum_salt_t *salt); typedef void zio_checksum_tmpl_free_t(void *ctx_template); typedef enum zio_checksum_flags { /* Strong enough for metadata? */ ZCHECKSUM_FLAG_METADATA = (1 << 1), /* ZIO embedded checksum */ ZCHECKSUM_FLAG_EMBEDDED = (1 << 2), /* Strong enough for dedup (without verification)? */ ZCHECKSUM_FLAG_DEDUP = (1 << 3), /* Uses salt value */ ZCHECKSUM_FLAG_SALTED = (1 << 4), /* Strong enough for nopwrite? */ ZCHECKSUM_FLAG_NOPWRITE = (1 << 5) } zio_checksum_flags_t; /* * Information about each checksum function. */ typedef struct zio_checksum_info { /* checksum function for each byteorder */ zio_checksum_t *ci_func[2]; zio_checksum_tmpl_init_t *ci_tmpl_init; zio_checksum_tmpl_free_t *ci_tmpl_free; zio_checksum_flags_t ci_flags; const char *ci_name; /* descriptive name */ } zio_checksum_info_t; #include "blkptr.c" #include "fletcher.c" #include "sha256.c" #include "skein_zfs.c" static zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { {{NULL, NULL}, NULL, NULL, 0, "inherit"}, {{NULL, NULL}, NULL, NULL, 0, "on"}, {{zio_checksum_off, zio_checksum_off}, NULL, NULL, 0, "off"}, {{zio_checksum_SHA256, zio_checksum_SHA256}, NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED, "label"}, {{zio_checksum_SHA256, zio_checksum_SHA256}, NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED, "gang_header"}, {{fletcher_2_native, fletcher_2_byteswap}, NULL, NULL, ZCHECKSUM_FLAG_EMBEDDED, "zilog"}, {{fletcher_2_native, fletcher_2_byteswap}, NULL, NULL, 0, "fletcher2"}, {{fletcher_4_native, fletcher_4_byteswap}, NULL, NULL, ZCHECKSUM_FLAG_METADATA, "fletcher4"}, {{zio_checksum_SHA256, zio_checksum_SHA256}, NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP | ZCHECKSUM_FLAG_NOPWRITE, "SHA256"}, {{fletcher_4_native, fletcher_4_byteswap}, NULL, NULL, ZCHECKSUM_FLAG_EMBEDDED, "zillog2"}, {{zio_checksum_off, zio_checksum_off}, NULL, NULL, 0, "noparity"}, {{zio_checksum_SHA512_native, zio_checksum_SHA512_byteswap}, NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP | ZCHECKSUM_FLAG_NOPWRITE, "SHA512"}, {{zio_checksum_skein_native, zio_checksum_skein_byteswap}, zio_checksum_skein_tmpl_init, zio_checksum_skein_tmpl_free, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP | ZCHECKSUM_FLAG_SALTED | ZCHECKSUM_FLAG_NOPWRITE, "skein"}, /* no edonr for now */ {{NULL, NULL}, NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_SALTED | ZCHECKSUM_FLAG_NOPWRITE, "edonr"} }; /* * Common signature for all zio compress/decompress functions. */ typedef size_t zio_compress_func_t(void *src, void *dst, size_t s_len, size_t d_len, int); typedef int zio_decompress_func_t(void *src, void *dst, size_t s_len, size_t d_len, int); /* * Information about each compression function. */ typedef struct zio_compress_info { zio_compress_func_t *ci_compress; /* compression function */ zio_decompress_func_t *ci_decompress; /* decompression function */ int ci_level; /* level parameter */ const char *ci_name; /* algorithm name */ } zio_compress_info_t; #include "lzjb.c" #include "zle.c" -#include "lz4.c" /* * Compression vectors. */ static zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = { {NULL, NULL, 0, "inherit"}, {NULL, NULL, 0, "on"}, {NULL, NULL, 0, "uncompressed"}, {NULL, lzjb_decompress, 0, "lzjb"}, {NULL, NULL, 0, "empty"}, {NULL, NULL, 1, "gzip-1"}, {NULL, NULL, 2, "gzip-2"}, {NULL, NULL, 3, "gzip-3"}, {NULL, NULL, 4, "gzip-4"}, {NULL, NULL, 5, "gzip-5"}, {NULL, NULL, 6, "gzip-6"}, {NULL, NULL, 7, "gzip-7"}, {NULL, NULL, 8, "gzip-8"}, {NULL, NULL, 9, "gzip-9"}, {NULL, zle_decompress, 64, "zle"}, {NULL, lz4_decompress, 0, "lz4"}, }; static void byteswap_uint64_array(void *vbuf, size_t size) { uint64_t *buf = vbuf; size_t count = size >> 3; int i; ASSERT((size & 7) == 0); for (i = 0; i < count; i++) buf[i] = BSWAP_64(buf[i]); } /* * Set the external verifier for a gang block based on , * a tuple which is guaranteed to be unique for the life of the pool. */ static void zio_checksum_gang_verifier(zio_cksum_t *zcp, const blkptr_t *bp) { const dva_t *dva = BP_IDENTITY(bp); uint64_t txg = BP_PHYSICAL_BIRTH(bp); ASSERT(BP_IS_GANG(bp)); ZIO_SET_CHECKSUM(zcp, DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), txg, 0); } /* * Set the external verifier for a label block based on its offset. * The vdev is implicit, and the txg is unknowable at pool open time -- * hence the logic in vdev_uberblock_load() to find the most recent copy. */ static void zio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset) { ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0); } /* * Calls the template init function of a checksum which supports context * templates and installs the template into the spa_t. */ static void zio_checksum_template_init(enum zio_checksum checksum, spa_t *spa) { zio_checksum_info_t *ci = &zio_checksum_table[checksum]; if (ci->ci_tmpl_init == NULL) return; if (spa->spa_cksum_tmpls[checksum] != NULL) return; if (spa->spa_cksum_tmpls[checksum] == NULL) { spa->spa_cksum_tmpls[checksum] = ci->ci_tmpl_init(&spa->spa_cksum_salt); } } /* * Called by a spa_t that's about to be deallocated. This steps through * all of the checksum context templates and deallocates any that were * initialized using the algorithm-specific template init function. */ static void __unused zio_checksum_templates_free(spa_t *spa) { for (enum zio_checksum checksum = 0; checksum < ZIO_CHECKSUM_FUNCTIONS; checksum++) { if (spa->spa_cksum_tmpls[checksum] != NULL) { zio_checksum_info_t *ci = &zio_checksum_table[checksum]; ci->ci_tmpl_free(spa->spa_cksum_tmpls[checksum]); spa->spa_cksum_tmpls[checksum] = NULL; } } } static int zio_checksum_verify(const spa_t *spa, const blkptr_t *bp, void *data) { uint64_t size; unsigned int checksum; zio_checksum_info_t *ci; void *ctx = NULL; zio_cksum_t actual_cksum, expected_cksum, verifier; int byteswap; checksum = BP_GET_CHECKSUM(bp); size = BP_GET_PSIZE(bp); if (checksum >= ZIO_CHECKSUM_FUNCTIONS) return (EINVAL); ci = &zio_checksum_table[checksum]; if (ci->ci_func[0] == NULL || ci->ci_func[1] == NULL) return (EINVAL); if (spa != NULL) { zio_checksum_template_init(checksum, __DECONST(spa_t *,spa)); ctx = spa->spa_cksum_tmpls[checksum]; } if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { zio_eck_t *eck; ASSERT(checksum == ZIO_CHECKSUM_GANG_HEADER || checksum == ZIO_CHECKSUM_LABEL); eck = (zio_eck_t *)((char *)data + size) - 1; if (checksum == ZIO_CHECKSUM_GANG_HEADER) zio_checksum_gang_verifier(&verifier, bp); else if (checksum == ZIO_CHECKSUM_LABEL) zio_checksum_label_verifier(&verifier, DVA_GET_OFFSET(BP_IDENTITY(bp))); else verifier = bp->blk_cksum; byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC)); if (byteswap) byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); expected_cksum = eck->zec_cksum; eck->zec_cksum = verifier; ci->ci_func[byteswap](data, size, ctx, &actual_cksum); eck->zec_cksum = expected_cksum; if (byteswap) byteswap_uint64_array(&expected_cksum, sizeof (zio_cksum_t)); } else { byteswap = BP_SHOULD_BYTESWAP(bp); expected_cksum = bp->blk_cksum; ci->ci_func[byteswap](data, size, ctx, &actual_cksum); } if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) { /*printf("ZFS: read checksum %s failed\n", ci->ci_name);*/ return (EIO); } return (0); } static int zio_decompress_data(int cpfunc, void *src, uint64_t srcsize, void *dest, uint64_t destsize) { zio_compress_info_t *ci; if (cpfunc >= ZIO_COMPRESS_FUNCTIONS) { printf("ZFS: unsupported compression algorithm %u\n", cpfunc); return (EIO); } ci = &zio_compress_table[cpfunc]; if (!ci->ci_decompress) { printf("ZFS: unsupported compression algorithm %s\n", ci->ci_name); return (EIO); } return (ci->ci_decompress(src, dest, srcsize, destsize, ci->ci_level)); } static uint64_t zap_hash(uint64_t salt, const char *name) { const uint8_t *cp; uint8_t c; uint64_t crc = salt; ASSERT(crc != 0); ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); for (cp = (const uint8_t *)name; (c = *cp) != '\0'; cp++) crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ c) & 0xFF]; /* * Only use 28 bits, since we need 4 bits in the cookie for the * collision differentiator. We MUST use the high bits, since * those are the onces that we first pay attention to when * chosing the bucket. */ crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1); return (crc); } static void *zfs_alloc(size_t size); static void zfs_free(void *ptr, size_t size); typedef struct raidz_col { uint64_t rc_devidx; /* child device index for I/O */ uint64_t rc_offset; /* device offset */ uint64_t rc_size; /* I/O size */ void *rc_data; /* I/O data */ int rc_error; /* I/O error for this device */ uint8_t rc_tried; /* Did we attempt this I/O column? */ uint8_t rc_skipped; /* Did we skip this I/O column? */ } raidz_col_t; typedef struct raidz_map { uint64_t rm_cols; /* Regular column count */ uint64_t rm_scols; /* Count including skipped columns */ uint64_t rm_bigcols; /* Number of oversized columns */ uint64_t rm_asize; /* Actual total I/O size */ uint64_t rm_missingdata; /* Count of missing data devices */ uint64_t rm_missingparity; /* Count of missing parity devices */ uint64_t rm_firstdatacol; /* First data column/parity count */ uint64_t rm_nskip; /* Skipped sectors for padding */ uint64_t rm_skipstart; /* Column index of padding start */ uintptr_t rm_reports; /* # of referencing checksum reports */ uint8_t rm_freed; /* map no longer has referencing ZIO */ uint8_t rm_ecksuminjected; /* checksum error was injected */ raidz_col_t rm_col[1]; /* Flexible array of I/O columns */ } raidz_map_t; #define VDEV_RAIDZ_P 0 #define VDEV_RAIDZ_Q 1 #define VDEV_RAIDZ_R 2 #define VDEV_RAIDZ_MUL_2(x) (((x) << 1) ^ (((x) & 0x80) ? 0x1d : 0)) #define VDEV_RAIDZ_MUL_4(x) (VDEV_RAIDZ_MUL_2(VDEV_RAIDZ_MUL_2(x))) /* * We provide a mechanism to perform the field multiplication operation on a * 64-bit value all at once rather than a byte at a time. This works by * creating a mask from the top bit in each byte and using that to * conditionally apply the XOR of 0x1d. */ #define VDEV_RAIDZ_64MUL_2(x, mask) \ { \ (mask) = (x) & 0x8080808080808080ULL; \ (mask) = ((mask) << 1) - ((mask) >> 7); \ (x) = (((x) << 1) & 0xfefefefefefefefeULL) ^ \ ((mask) & 0x1d1d1d1d1d1d1d1dULL); \ } #define VDEV_RAIDZ_64MUL_4(x, mask) \ { \ VDEV_RAIDZ_64MUL_2((x), mask); \ VDEV_RAIDZ_64MUL_2((x), mask); \ } /* * These two tables represent powers and logs of 2 in the Galois field defined * above. These values were computed by repeatedly multiplying by 2 as above. */ static const uint8_t vdev_raidz_pow2[256] = { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0, 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09, 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16, 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01 }; static const uint8_t vdev_raidz_log2[256] = { 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b, 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81, 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71, 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21, 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45, 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6, 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd, 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88, 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40, 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e, 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d, 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b, 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57, 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d, 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18, 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c, 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e, 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd, 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61, 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e, 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2, 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76, 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6, 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a, 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51, 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7, 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf, }; /* * Multiply a given number by 2 raised to the given power. */ static uint8_t vdev_raidz_exp2(uint8_t a, int exp) { if (a == 0) return (0); ASSERT(exp >= 0); ASSERT(vdev_raidz_log2[a] > 0 || a == 1); exp += vdev_raidz_log2[a]; if (exp > 255) exp -= 255; return (vdev_raidz_pow2[exp]); } static void vdev_raidz_generate_parity_p(raidz_map_t *rm) { uint64_t *p, *src, pcount, ccount, i; int c; pcount = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]); for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { src = rm->rm_col[c].rc_data; p = rm->rm_col[VDEV_RAIDZ_P].rc_data; ccount = rm->rm_col[c].rc_size / sizeof (src[0]); if (c == rm->rm_firstdatacol) { ASSERT(ccount == pcount); for (i = 0; i < ccount; i++, src++, p++) { *p = *src; } } else { ASSERT(ccount <= pcount); for (i = 0; i < ccount; i++, src++, p++) { *p ^= *src; } } } } static void vdev_raidz_generate_parity_pq(raidz_map_t *rm) { uint64_t *p, *q, *src, pcnt, ccnt, mask, i; int c; pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]); ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size == rm->rm_col[VDEV_RAIDZ_Q].rc_size); for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { src = rm->rm_col[c].rc_data; p = rm->rm_col[VDEV_RAIDZ_P].rc_data; q = rm->rm_col[VDEV_RAIDZ_Q].rc_data; ccnt = rm->rm_col[c].rc_size / sizeof (src[0]); if (c == rm->rm_firstdatacol) { ASSERT(ccnt == pcnt || ccnt == 0); for (i = 0; i < ccnt; i++, src++, p++, q++) { *p = *src; *q = *src; } for (; i < pcnt; i++, src++, p++, q++) { *p = 0; *q = 0; } } else { ASSERT(ccnt <= pcnt); /* * Apply the algorithm described above by multiplying * the previous result and adding in the new value. */ for (i = 0; i < ccnt; i++, src++, p++, q++) { *p ^= *src; VDEV_RAIDZ_64MUL_2(*q, mask); *q ^= *src; } /* * Treat short columns as though they are full of 0s. * Note that there's therefore nothing needed for P. */ for (; i < pcnt; i++, q++) { VDEV_RAIDZ_64MUL_2(*q, mask); } } } } static void vdev_raidz_generate_parity_pqr(raidz_map_t *rm) { uint64_t *p, *q, *r, *src, pcnt, ccnt, mask, i; int c; pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]); ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size == rm->rm_col[VDEV_RAIDZ_Q].rc_size); ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size == rm->rm_col[VDEV_RAIDZ_R].rc_size); for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { src = rm->rm_col[c].rc_data; p = rm->rm_col[VDEV_RAIDZ_P].rc_data; q = rm->rm_col[VDEV_RAIDZ_Q].rc_data; r = rm->rm_col[VDEV_RAIDZ_R].rc_data; ccnt = rm->rm_col[c].rc_size / sizeof (src[0]); if (c == rm->rm_firstdatacol) { ASSERT(ccnt == pcnt || ccnt == 0); for (i = 0; i < ccnt; i++, src++, p++, q++, r++) { *p = *src; *q = *src; *r = *src; } for (; i < pcnt; i++, src++, p++, q++, r++) { *p = 0; *q = 0; *r = 0; } } else { ASSERT(ccnt <= pcnt); /* * Apply the algorithm described above by multiplying * the previous result and adding in the new value. */ for (i = 0; i < ccnt; i++, src++, p++, q++, r++) { *p ^= *src; VDEV_RAIDZ_64MUL_2(*q, mask); *q ^= *src; VDEV_RAIDZ_64MUL_4(*r, mask); *r ^= *src; } /* * Treat short columns as though they are full of 0s. * Note that there's therefore nothing needed for P. */ for (; i < pcnt; i++, q++, r++) { VDEV_RAIDZ_64MUL_2(*q, mask); VDEV_RAIDZ_64MUL_4(*r, mask); } } } } /* * Generate RAID parity in the first virtual columns according to the number of * parity columns available. */ static void vdev_raidz_generate_parity(raidz_map_t *rm) { switch (rm->rm_firstdatacol) { case 1: vdev_raidz_generate_parity_p(rm); break; case 2: vdev_raidz_generate_parity_pq(rm); break; case 3: vdev_raidz_generate_parity_pqr(rm); break; default: panic("invalid RAID-Z configuration"); } } /* BEGIN CSTYLED */ /* * In the general case of reconstruction, we must solve the system of linear * equations defined by the coeffecients used to generate parity as well as * the contents of the data and parity disks. This can be expressed with * vectors for the original data (D) and the actual data (d) and parity (p) * and a matrix composed of the identity matrix (I) and a dispersal matrix (V): * * __ __ __ __ * | | __ __ | p_0 | * | V | | D_0 | | p_m-1 | * | | x | : | = | d_0 | * | I | | D_n-1 | | : | * | | ~~ ~~ | d_n-1 | * ~~ ~~ ~~ ~~ * * I is simply a square identity matrix of size n, and V is a vandermonde * matrix defined by the coeffecients we chose for the various parity columns * (1, 2, 4). Note that these values were chosen both for simplicity, speedy * computation as well as linear separability. * * __ __ __ __ * | 1 .. 1 1 1 | | p_0 | * | 2^n-1 .. 4 2 1 | __ __ | : | * | 4^n-1 .. 16 4 1 | | D_0 | | p_m-1 | * | 1 .. 0 0 0 | | D_1 | | d_0 | * | 0 .. 0 0 0 | x | D_2 | = | d_1 | * | : : : : | | : | | d_2 | * | 0 .. 1 0 0 | | D_n-1 | | : | * | 0 .. 0 1 0 | ~~ ~~ | : | * | 0 .. 0 0 1 | | d_n-1 | * ~~ ~~ ~~ ~~ * * Note that I, V, d, and p are known. To compute D, we must invert the * matrix and use the known data and parity values to reconstruct the unknown * data values. We begin by removing the rows in V|I and d|p that correspond * to failed or missing columns; we then make V|I square (n x n) and d|p * sized n by removing rows corresponding to unused parity from the bottom up * to generate (V|I)' and (d|p)'. We can then generate the inverse of (V|I)' * using Gauss-Jordan elimination. In the example below we use m=3 parity * columns, n=8 data columns, with errors in d_1, d_2, and p_1: * __ __ * | 1 1 1 1 1 1 1 1 | * | 128 64 32 16 8 4 2 1 | <-----+-+-- missing disks * | 19 205 116 29 64 16 4 1 | / / * | 1 0 0 0 0 0 0 0 | / / * | 0 1 0 0 0 0 0 0 | <--' / * (V|I) = | 0 0 1 0 0 0 0 0 | <---' * | 0 0 0 1 0 0 0 0 | * | 0 0 0 0 1 0 0 0 | * | 0 0 0 0 0 1 0 0 | * | 0 0 0 0 0 0 1 0 | * | 0 0 0 0 0 0 0 1 | * ~~ ~~ * __ __ * | 1 1 1 1 1 1 1 1 | * | 128 64 32 16 8 4 2 1 | * | 19 205 116 29 64 16 4 1 | * | 1 0 0 0 0 0 0 0 | * | 0 1 0 0 0 0 0 0 | * (V|I)' = | 0 0 1 0 0 0 0 0 | * | 0 0 0 1 0 0 0 0 | * | 0 0 0 0 1 0 0 0 | * | 0 0 0 0 0 1 0 0 | * | 0 0 0 0 0 0 1 0 | * | 0 0 0 0 0 0 0 1 | * ~~ ~~ * * Here we employ Gauss-Jordan elimination to find the inverse of (V|I)'. We * have carefully chosen the seed values 1, 2, and 4 to ensure that this * matrix is not singular. * __ __ * | 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 | * | 19 205 116 29 64 16 4 1 0 1 0 0 0 0 0 0 | * | 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 | * | 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 | * | 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 | * | 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 | * | 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 | * | 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 | * ~~ ~~ * __ __ * | 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 | * | 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 | * | 19 205 116 29 64 16 4 1 0 1 0 0 0 0 0 0 | * | 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 | * | 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 | * | 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 | * | 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 | * | 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 | * ~~ ~~ * __ __ * | 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 | * | 0 1 1 0 0 0 0 0 1 0 1 1 1 1 1 1 | * | 0 205 116 0 0 0 0 0 0 1 19 29 64 16 4 1 | * | 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 | * | 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 | * | 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 | * | 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 | * | 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 | * ~~ ~~ * __ __ * | 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 | * | 0 1 1 0 0 0 0 0 1 0 1 1 1 1 1 1 | * | 0 0 185 0 0 0 0 0 205 1 222 208 141 221 201 204 | * | 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 | * | 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 | * | 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 | * | 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 | * | 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 | * ~~ ~~ * __ __ * | 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 | * | 0 1 1 0 0 0 0 0 1 0 1 1 1 1 1 1 | * | 0 0 1 0 0 0 0 0 166 100 4 40 158 168 216 209 | * | 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 | * | 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 | * | 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 | * | 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 | * | 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 | * ~~ ~~ * __ __ * | 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 | * | 0 1 0 0 0 0 0 0 167 100 5 41 159 169 217 208 | * | 0 0 1 0 0 0 0 0 166 100 4 40 158 168 216 209 | * | 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 | * | 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 | * | 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 | * | 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 | * | 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 | * ~~ ~~ * __ __ * | 0 0 1 0 0 0 0 0 | * | 167 100 5 41 159 169 217 208 | * | 166 100 4 40 158 168 216 209 | * (V|I)'^-1 = | 0 0 0 1 0 0 0 0 | * | 0 0 0 0 1 0 0 0 | * | 0 0 0 0 0 1 0 0 | * | 0 0 0 0 0 0 1 0 | * | 0 0 0 0 0 0 0 1 | * ~~ ~~ * * We can then simply compute D = (V|I)'^-1 x (d|p)' to discover the values * of the missing data. * * As is apparent from the example above, the only non-trivial rows in the * inverse matrix correspond to the data disks that we're trying to * reconstruct. Indeed, those are the only rows we need as the others would * only be useful for reconstructing data known or assumed to be valid. For * that reason, we only build the coefficients in the rows that correspond to * targeted columns. */ /* END CSTYLED */ static void vdev_raidz_matrix_init(raidz_map_t *rm, int n, int nmap, int *map, uint8_t **rows) { int i, j; int pow; ASSERT(n == rm->rm_cols - rm->rm_firstdatacol); /* * Fill in the missing rows of interest. */ for (i = 0; i < nmap; i++) { ASSERT3S(0, <=, map[i]); ASSERT3S(map[i], <=, 2); pow = map[i] * n; if (pow > 255) pow -= 255; ASSERT(pow <= 255); for (j = 0; j < n; j++) { pow -= map[i]; if (pow < 0) pow += 255; rows[i][j] = vdev_raidz_pow2[pow]; } } } static void vdev_raidz_matrix_invert(raidz_map_t *rm, int n, int nmissing, int *missing, uint8_t **rows, uint8_t **invrows, const uint8_t *used) { int i, j, ii, jj; uint8_t log; /* * Assert that the first nmissing entries from the array of used * columns correspond to parity columns and that subsequent entries * correspond to data columns. */ for (i = 0; i < nmissing; i++) { ASSERT3S(used[i], <, rm->rm_firstdatacol); } for (; i < n; i++) { ASSERT3S(used[i], >=, rm->rm_firstdatacol); } /* * First initialize the storage where we'll compute the inverse rows. */ for (i = 0; i < nmissing; i++) { for (j = 0; j < n; j++) { invrows[i][j] = (i == j) ? 1 : 0; } } /* * Subtract all trivial rows from the rows of consequence. */ for (i = 0; i < nmissing; i++) { for (j = nmissing; j < n; j++) { ASSERT3U(used[j], >=, rm->rm_firstdatacol); jj = used[j] - rm->rm_firstdatacol; ASSERT3S(jj, <, n); invrows[i][j] = rows[i][jj]; rows[i][jj] = 0; } } /* * For each of the rows of interest, we must normalize it and subtract * a multiple of it from the other rows. */ for (i = 0; i < nmissing; i++) { for (j = 0; j < missing[i]; j++) { ASSERT3U(rows[i][j], ==, 0); } ASSERT3U(rows[i][missing[i]], !=, 0); /* * Compute the inverse of the first element and multiply each * element in the row by that value. */ log = 255 - vdev_raidz_log2[rows[i][missing[i]]]; for (j = 0; j < n; j++) { rows[i][j] = vdev_raidz_exp2(rows[i][j], log); invrows[i][j] = vdev_raidz_exp2(invrows[i][j], log); } for (ii = 0; ii < nmissing; ii++) { if (i == ii) continue; ASSERT3U(rows[ii][missing[i]], !=, 0); log = vdev_raidz_log2[rows[ii][missing[i]]]; for (j = 0; j < n; j++) { rows[ii][j] ^= vdev_raidz_exp2(rows[i][j], log); invrows[ii][j] ^= vdev_raidz_exp2(invrows[i][j], log); } } } /* * Verify that the data that is left in the rows are properly part of * an identity matrix. */ for (i = 0; i < nmissing; i++) { for (j = 0; j < n; j++) { if (j == missing[i]) { ASSERT3U(rows[i][j], ==, 1); } else { ASSERT3U(rows[i][j], ==, 0); } } } } static void vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing, int *missing, uint8_t **invrows, const uint8_t *used) { int i, j, x, cc, c; uint8_t *src; uint64_t ccount; uint8_t *dst[VDEV_RAIDZ_MAXPARITY]; uint64_t dcount[VDEV_RAIDZ_MAXPARITY]; uint8_t log, val; int ll; uint8_t *invlog[VDEV_RAIDZ_MAXPARITY]; uint8_t *p, *pp; size_t psize; log = 0; /* gcc */ psize = sizeof (invlog[0][0]) * n * nmissing; p = zfs_alloc(psize); for (pp = p, i = 0; i < nmissing; i++) { invlog[i] = pp; pp += n; } for (i = 0; i < nmissing; i++) { for (j = 0; j < n; j++) { ASSERT3U(invrows[i][j], !=, 0); invlog[i][j] = vdev_raidz_log2[invrows[i][j]]; } } for (i = 0; i < n; i++) { c = used[i]; ASSERT3U(c, <, rm->rm_cols); src = rm->rm_col[c].rc_data; ccount = rm->rm_col[c].rc_size; for (j = 0; j < nmissing; j++) { cc = missing[j] + rm->rm_firstdatacol; ASSERT3U(cc, >=, rm->rm_firstdatacol); ASSERT3U(cc, <, rm->rm_cols); ASSERT3U(cc, !=, c); dst[j] = rm->rm_col[cc].rc_data; dcount[j] = rm->rm_col[cc].rc_size; } ASSERT(ccount >= rm->rm_col[missing[0]].rc_size || i > 0); for (x = 0; x < ccount; x++, src++) { if (*src != 0) log = vdev_raidz_log2[*src]; for (cc = 0; cc < nmissing; cc++) { if (x >= dcount[cc]) continue; if (*src == 0) { val = 0; } else { if ((ll = log + invlog[cc][i]) >= 255) ll -= 255; val = vdev_raidz_pow2[ll]; } if (i == 0) dst[cc][x] = val; else dst[cc][x] ^= val; } } } zfs_free(p, psize); } static int vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) { int n, i, c, t, tt; int nmissing_rows; int missing_rows[VDEV_RAIDZ_MAXPARITY]; int parity_map[VDEV_RAIDZ_MAXPARITY]; uint8_t *p, *pp; size_t psize; uint8_t *rows[VDEV_RAIDZ_MAXPARITY]; uint8_t *invrows[VDEV_RAIDZ_MAXPARITY]; uint8_t *used; int code = 0; n = rm->rm_cols - rm->rm_firstdatacol; /* * Figure out which data columns are missing. */ nmissing_rows = 0; for (t = 0; t < ntgts; t++) { if (tgts[t] >= rm->rm_firstdatacol) { missing_rows[nmissing_rows++] = tgts[t] - rm->rm_firstdatacol; } } /* * Figure out which parity columns to use to help generate the missing * data columns. */ for (tt = 0, c = 0, i = 0; i < nmissing_rows; c++) { ASSERT(tt < ntgts); ASSERT(c < rm->rm_firstdatacol); /* * Skip any targeted parity columns. */ if (c == tgts[tt]) { tt++; continue; } code |= 1 << c; parity_map[i] = c; i++; } ASSERT(code != 0); ASSERT3U(code, <, 1 << VDEV_RAIDZ_MAXPARITY); psize = (sizeof (rows[0][0]) + sizeof (invrows[0][0])) * nmissing_rows * n + sizeof (used[0]) * n; p = kmem_alloc(psize, KM_SLEEP); for (pp = p, i = 0; i < nmissing_rows; i++) { rows[i] = pp; pp += n; invrows[i] = pp; pp += n; } used = pp; for (i = 0; i < nmissing_rows; i++) { used[i] = parity_map[i]; } for (tt = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { if (tt < nmissing_rows && c == missing_rows[tt] + rm->rm_firstdatacol) { tt++; continue; } ASSERT3S(i, <, n); used[i] = c; i++; } /* * Initialize the interesting rows of the matrix. */ vdev_raidz_matrix_init(rm, n, nmissing_rows, parity_map, rows); /* * Invert the matrix. */ vdev_raidz_matrix_invert(rm, n, nmissing_rows, missing_rows, rows, invrows, used); /* * Reconstruct the missing data using the generated matrix. */ vdev_raidz_matrix_reconstruct(rm, n, nmissing_rows, missing_rows, invrows, used); kmem_free(p, psize); return (code); } static int vdev_raidz_reconstruct(raidz_map_t *rm, int *t, int nt) { int tgts[VDEV_RAIDZ_MAXPARITY]; int ntgts; int i, c; int code; int nbadparity, nbaddata; /* * The tgts list must already be sorted. */ for (i = 1; i < nt; i++) { ASSERT(t[i] > t[i - 1]); } nbadparity = rm->rm_firstdatacol; nbaddata = rm->rm_cols - nbadparity; ntgts = 0; for (i = 0, c = 0; c < rm->rm_cols; c++) { if (i < nt && c == t[i]) { tgts[ntgts++] = c; i++; } else if (rm->rm_col[c].rc_error != 0) { tgts[ntgts++] = c; } else if (c >= rm->rm_firstdatacol) { nbaddata--; } else { nbadparity--; } } ASSERT(ntgts >= nt); ASSERT(nbaddata >= 0); ASSERT(nbaddata + nbadparity == ntgts); code = vdev_raidz_reconstruct_general(rm, tgts, ntgts); ASSERT(code < (1 << VDEV_RAIDZ_MAXPARITY)); ASSERT(code > 0); return (code); } static raidz_map_t * vdev_raidz_map_alloc(void *data, off_t offset, size_t size, uint64_t unit_shift, uint64_t dcols, uint64_t nparity) { raidz_map_t *rm; uint64_t b = offset >> unit_shift; uint64_t s = size >> unit_shift; uint64_t f = b % dcols; uint64_t o = (b / dcols) << unit_shift; uint64_t q, r, c, bc, col, acols, scols, coff, devidx, asize, tot; q = s / (dcols - nparity); r = s - q * (dcols - nparity); bc = (r == 0 ? 0 : r + nparity); tot = s + nparity * (q + (r == 0 ? 0 : 1)); if (q == 0) { acols = bc; scols = MIN(dcols, roundup(bc, nparity + 1)); } else { acols = dcols; scols = dcols; } ASSERT3U(acols, <=, scols); rm = zfs_alloc(offsetof(raidz_map_t, rm_col[scols])); rm->rm_cols = acols; rm->rm_scols = scols; rm->rm_bigcols = bc; rm->rm_skipstart = bc; rm->rm_missingdata = 0; rm->rm_missingparity = 0; rm->rm_firstdatacol = nparity; rm->rm_reports = 0; rm->rm_freed = 0; rm->rm_ecksuminjected = 0; asize = 0; for (c = 0; c < scols; c++) { col = f + c; coff = o; if (col >= dcols) { col -= dcols; coff += 1ULL << unit_shift; } rm->rm_col[c].rc_devidx = col; rm->rm_col[c].rc_offset = coff; rm->rm_col[c].rc_data = NULL; rm->rm_col[c].rc_error = 0; rm->rm_col[c].rc_tried = 0; rm->rm_col[c].rc_skipped = 0; if (c >= acols) rm->rm_col[c].rc_size = 0; else if (c < bc) rm->rm_col[c].rc_size = (q + 1) << unit_shift; else rm->rm_col[c].rc_size = q << unit_shift; asize += rm->rm_col[c].rc_size; } ASSERT3U(asize, ==, tot << unit_shift); rm->rm_asize = roundup(asize, (nparity + 1) << unit_shift); rm->rm_nskip = roundup(tot, nparity + 1) - tot; ASSERT3U(rm->rm_asize - asize, ==, rm->rm_nskip << unit_shift); ASSERT3U(rm->rm_nskip, <=, nparity); for (c = 0; c < rm->rm_firstdatacol; c++) rm->rm_col[c].rc_data = zfs_alloc(rm->rm_col[c].rc_size); rm->rm_col[c].rc_data = data; for (c = c + 1; c < acols; c++) rm->rm_col[c].rc_data = (char *)rm->rm_col[c - 1].rc_data + rm->rm_col[c - 1].rc_size; /* * If all data stored spans all columns, there's a danger that parity * will always be on the same device and, since parity isn't read * during normal operation, that that device's I/O bandwidth won't be * used effectively. We therefore switch the parity every 1MB. * * ... at least that was, ostensibly, the theory. As a practical * matter unless we juggle the parity between all devices evenly, we * won't see any benefit. Further, occasional writes that aren't a * multiple of the LCM of the number of children and the minimum * stripe width are sufficient to avoid pessimal behavior. * Unfortunately, this decision created an implicit on-disk format * requirement that we need to support for all eternity, but only * for single-parity RAID-Z. * * If we intend to skip a sector in the zeroth column for padding * we must make sure to note this swap. We will never intend to * skip the first column since at least one data and one parity * column must appear in each row. */ ASSERT(rm->rm_cols >= 2); ASSERT(rm->rm_col[0].rc_size == rm->rm_col[1].rc_size); if (rm->rm_firstdatacol == 1 && (offset & (1ULL << 20))) { devidx = rm->rm_col[0].rc_devidx; o = rm->rm_col[0].rc_offset; rm->rm_col[0].rc_devidx = rm->rm_col[1].rc_devidx; rm->rm_col[0].rc_offset = rm->rm_col[1].rc_offset; rm->rm_col[1].rc_devidx = devidx; rm->rm_col[1].rc_offset = o; if (rm->rm_skipstart == 0) rm->rm_skipstart = 1; } return (rm); } static void vdev_raidz_map_free(raidz_map_t *rm) { int c; for (c = rm->rm_firstdatacol - 1; c >= 0; c--) zfs_free(rm->rm_col[c].rc_data, rm->rm_col[c].rc_size); zfs_free(rm, offsetof(raidz_map_t, rm_col[rm->rm_scols])); } static vdev_t * vdev_child(vdev_t *pvd, uint64_t devidx) { vdev_t *cvd; STAILQ_FOREACH(cvd, &pvd->v_children, v_childlink) { if (cvd->v_id == devidx) break; } return (cvd); } /* * We keep track of whether or not there were any injected errors, so that * any ereports we generate can note it. */ static int raidz_checksum_verify(const spa_t *spa, const blkptr_t *bp, void *data, uint64_t size) { return (zio_checksum_verify(spa, bp, data)); } /* * Generate the parity from the data columns. If we tried and were able to * read the parity without error, verify that the generated parity matches the * data we read. If it doesn't, we fire off a checksum error. Return the * number such failures. */ static int raidz_parity_verify(raidz_map_t *rm) { void *orig[VDEV_RAIDZ_MAXPARITY]; int c, ret = 0; raidz_col_t *rc; for (c = 0; c < rm->rm_firstdatacol; c++) { rc = &rm->rm_col[c]; if (!rc->rc_tried || rc->rc_error != 0) continue; orig[c] = zfs_alloc(rc->rc_size); bcopy(rc->rc_data, orig[c], rc->rc_size); } vdev_raidz_generate_parity(rm); for (c = rm->rm_firstdatacol - 1; c >= 0; c--) { rc = &rm->rm_col[c]; if (!rc->rc_tried || rc->rc_error != 0) continue; if (bcmp(orig[c], rc->rc_data, rc->rc_size) != 0) { rc->rc_error = ECKSUM; ret++; } zfs_free(orig[c], rc->rc_size); } return (ret); } /* * Iterate over all combinations of bad data and attempt a reconstruction. * Note that the algorithm below is non-optimal because it doesn't take into * account how reconstruction is actually performed. For example, with * triple-parity RAID-Z the reconstruction procedure is the same if column 4 * is targeted as invalid as if columns 1 and 4 are targeted since in both * cases we'd only use parity information in column 0. */ static int vdev_raidz_combrec(const spa_t *spa, raidz_map_t *rm, const blkptr_t *bp, void *data, off_t offset, uint64_t bytes, int total_errors, int data_errors) { raidz_col_t *rc; void *orig[VDEV_RAIDZ_MAXPARITY]; int tstore[VDEV_RAIDZ_MAXPARITY + 2]; int *tgts = &tstore[1]; int current, next, i, c, n; int code, ret = 0; ASSERT(total_errors < rm->rm_firstdatacol); /* * This simplifies one edge condition. */ tgts[-1] = -1; for (n = 1; n <= rm->rm_firstdatacol - total_errors; n++) { /* * Initialize the targets array by finding the first n columns * that contain no error. * * If there were no data errors, we need to ensure that we're * always explicitly attempting to reconstruct at least one * data column. To do this, we simply push the highest target * up into the data columns. */ for (c = 0, i = 0; i < n; i++) { if (i == n - 1 && data_errors == 0 && c < rm->rm_firstdatacol) { c = rm->rm_firstdatacol; } while (rm->rm_col[c].rc_error != 0) { c++; ASSERT3S(c, <, rm->rm_cols); } tgts[i] = c++; } /* * Setting tgts[n] simplifies the other edge condition. */ tgts[n] = rm->rm_cols; /* * These buffers were allocated in previous iterations. */ for (i = 0; i < n - 1; i++) { ASSERT(orig[i] != NULL); } orig[n - 1] = zfs_alloc(rm->rm_col[0].rc_size); current = 0; next = tgts[current]; while (current != n) { tgts[current] = next; current = 0; /* * Save off the original data that we're going to * attempt to reconstruct. */ for (i = 0; i < n; i++) { ASSERT(orig[i] != NULL); c = tgts[i]; ASSERT3S(c, >=, 0); ASSERT3S(c, <, rm->rm_cols); rc = &rm->rm_col[c]; bcopy(rc->rc_data, orig[i], rc->rc_size); } /* * Attempt a reconstruction and exit the outer loop on * success. */ code = vdev_raidz_reconstruct(rm, tgts, n); if (raidz_checksum_verify(spa, bp, data, bytes) == 0) { for (i = 0; i < n; i++) { c = tgts[i]; rc = &rm->rm_col[c]; ASSERT(rc->rc_error == 0); rc->rc_error = ECKSUM; } ret = code; goto done; } /* * Restore the original data. */ for (i = 0; i < n; i++) { c = tgts[i]; rc = &rm->rm_col[c]; bcopy(orig[i], rc->rc_data, rc->rc_size); } do { /* * Find the next valid column after the current * position.. */ for (next = tgts[current] + 1; next < rm->rm_cols && rm->rm_col[next].rc_error != 0; next++) continue; ASSERT(next <= tgts[current + 1]); /* * If that spot is available, we're done here. */ if (next != tgts[current + 1]) break; /* * Otherwise, find the next valid column after * the previous position. */ for (c = tgts[current - 1] + 1; rm->rm_col[c].rc_error != 0; c++) continue; tgts[current] = c; current++; } while (current != n); } } n--; done: for (i = n - 1; i >= 0; i--) { zfs_free(orig[i], rm->rm_col[0].rc_size); } return (ret); } static int vdev_raidz_read(vdev_t *vd, const blkptr_t *bp, void *data, off_t offset, size_t bytes) { vdev_t *tvd = vd->v_top; vdev_t *cvd; raidz_map_t *rm; raidz_col_t *rc; int c, error; int unexpected_errors; int parity_errors; int parity_untried; int data_errors; int total_errors; int n; int tgts[VDEV_RAIDZ_MAXPARITY]; int code; rc = NULL; /* gcc */ error = 0; rm = vdev_raidz_map_alloc(data, offset, bytes, tvd->v_ashift, vd->v_nchildren, vd->v_nparity); /* * Iterate over the columns in reverse order so that we hit the parity * last -- any errors along the way will force us to read the parity. */ for (c = rm->rm_cols - 1; c >= 0; c--) { rc = &rm->rm_col[c]; cvd = vdev_child(vd, rc->rc_devidx); if (cvd == NULL || cvd->v_state != VDEV_STATE_HEALTHY) { if (c >= rm->rm_firstdatacol) rm->rm_missingdata++; else rm->rm_missingparity++; rc->rc_error = ENXIO; rc->rc_tried = 1; /* don't even try */ rc->rc_skipped = 1; continue; } #if 0 /* XXX: Too hard for the boot code. */ if (vdev_dtl_contains(cvd, DTL_MISSING, zio->io_txg, 1)) { if (c >= rm->rm_firstdatacol) rm->rm_missingdata++; else rm->rm_missingparity++; rc->rc_error = ESTALE; rc->rc_skipped = 1; continue; } #endif if (c >= rm->rm_firstdatacol || rm->rm_missingdata > 0) { rc->rc_error = cvd->v_read(cvd, NULL, rc->rc_data, rc->rc_offset, rc->rc_size); rc->rc_tried = 1; rc->rc_skipped = 0; } } reconstruct: unexpected_errors = 0; parity_errors = 0; parity_untried = 0; data_errors = 0; total_errors = 0; ASSERT(rm->rm_missingparity <= rm->rm_firstdatacol); ASSERT(rm->rm_missingdata <= rm->rm_cols - rm->rm_firstdatacol); for (c = 0; c < rm->rm_cols; c++) { rc = &rm->rm_col[c]; if (rc->rc_error) { ASSERT(rc->rc_error != ECKSUM); /* child has no bp */ if (c < rm->rm_firstdatacol) parity_errors++; else data_errors++; if (!rc->rc_skipped) unexpected_errors++; total_errors++; } else if (c < rm->rm_firstdatacol && !rc->rc_tried) { parity_untried++; } } /* * There are three potential phases for a read: * 1. produce valid data from the columns read * 2. read all disks and try again * 3. perform combinatorial reconstruction * * Each phase is progressively both more expensive and less likely to * occur. If we encounter more errors than we can repair or all phases * fail, we have no choice but to return an error. */ /* * If the number of errors we saw was correctable -- less than or equal * to the number of parity disks read -- attempt to produce data that * has a valid checksum. Naturally, this case applies in the absence of * any errors. */ if (total_errors <= rm->rm_firstdatacol - parity_untried) { if (data_errors == 0) { if (raidz_checksum_verify(vd->spa, bp, data, bytes) == 0) { /* * If we read parity information (unnecessarily * as it happens since no reconstruction was * needed) regenerate and verify the parity. * We also regenerate parity when resilvering * so we can write it out to the failed device * later. */ if (parity_errors + parity_untried < rm->rm_firstdatacol) { n = raidz_parity_verify(rm); unexpected_errors += n; ASSERT(parity_errors + n <= rm->rm_firstdatacol); } goto done; } } else { /* * We either attempt to read all the parity columns or * none of them. If we didn't try to read parity, we * wouldn't be here in the correctable case. There must * also have been fewer parity errors than parity * columns or, again, we wouldn't be in this code path. */ ASSERT(parity_untried == 0); ASSERT(parity_errors < rm->rm_firstdatacol); /* * Identify the data columns that reported an error. */ n = 0; for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { rc = &rm->rm_col[c]; if (rc->rc_error != 0) { ASSERT(n < VDEV_RAIDZ_MAXPARITY); tgts[n++] = c; } } ASSERT(rm->rm_firstdatacol >= n); code = vdev_raidz_reconstruct(rm, tgts, n); if (raidz_checksum_verify(vd->spa, bp, data, bytes) == 0) { /* * If we read more parity disks than were used * for reconstruction, confirm that the other * parity disks produced correct data. This * routine is suboptimal in that it regenerates * the parity that we already used in addition * to the parity that we're attempting to * verify, but this should be a relatively * uncommon case, and can be optimized if it * becomes a problem. Note that we regenerate * parity when resilvering so we can write it * out to failed devices later. */ if (parity_errors < rm->rm_firstdatacol - n) { n = raidz_parity_verify(rm); unexpected_errors += n; ASSERT(parity_errors + n <= rm->rm_firstdatacol); } goto done; } } } /* * This isn't a typical situation -- either we got a read * error or a child silently returned bad data. Read every * block so we can try again with as much data and parity as * we can track down. If we've already been through once * before, all children will be marked as tried so we'll * proceed to combinatorial reconstruction. */ unexpected_errors = 1; rm->rm_missingdata = 0; rm->rm_missingparity = 0; n = 0; for (c = 0; c < rm->rm_cols; c++) { rc = &rm->rm_col[c]; if (rc->rc_tried) continue; cvd = vdev_child(vd, rc->rc_devidx); ASSERT(cvd != NULL); rc->rc_error = cvd->v_read(cvd, NULL, rc->rc_data, rc->rc_offset, rc->rc_size); if (rc->rc_error == 0) n++; rc->rc_tried = 1; rc->rc_skipped = 0; } /* * If we managed to read anything more, retry the * reconstruction. */ if (n > 0) goto reconstruct; /* * At this point we've attempted to reconstruct the data given the * errors we detected, and we've attempted to read all columns. There * must, therefore, be one or more additional problems -- silent errors * resulting in invalid data rather than explicit I/O errors resulting * in absent data. We check if there is enough additional data to * possibly reconstruct the data and then perform combinatorial * reconstruction over all possible combinations. If that fails, * we're cooked. */ if (total_errors > rm->rm_firstdatacol) { error = EIO; } else if (total_errors < rm->rm_firstdatacol && (code = vdev_raidz_combrec(vd->spa, rm, bp, data, offset, bytes, total_errors, data_errors)) != 0) { /* * If we didn't use all the available parity for the * combinatorial reconstruction, verify that the remaining * parity is correct. */ if (code != (1 << rm->rm_firstdatacol) - 1) (void) raidz_parity_verify(rm); } else { /* * We're here because either: * * total_errors == rm_first_datacol, or * vdev_raidz_combrec() failed * * In either case, there is enough bad data to prevent * reconstruction. * * Start checksum ereports for all children which haven't * failed, and the IO wasn't speculative. */ error = ECKSUM; } done: vdev_raidz_map_free(rm); return (error); } Index: head/sys/cddl/contrib/opensolaris/common/lz4/lz4.c =================================================================== --- head/sys/cddl/contrib/opensolaris/common/lz4/lz4.c (nonexistent) +++ head/sys/cddl/contrib/opensolaris/common/lz4/lz4.c (revision 354253) @@ -0,0 +1,1053 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Header File + * Copyright (C) 2011-2013, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + */ +/* + * Copyright (c) 2016 by Delphix. All rights reserved. + */ + +#if defined(_KERNEL) +#include +#elif defined(_STANDALONE) +#include +#include +#include +#include +#include + +#define ASSERT assert +#else +#include +#include +#include +#include +#include + +#define ASSERT assert +#endif +#include + +static int real_LZ4_compress(const char *source, char *dest, int isize, + int osize); +static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, + int isize, int maxOutputSize); +static int LZ4_compressCtx(void *ctx, const char *source, char *dest, + int isize, int osize); +static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, + int isize, int osize); + +#if defined(_KERNEL) +static kmem_cache_t *lz4_ctx_cache; +#endif + +size_t +lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, + int n __unused) +{ + uint32_t bufsiz; + char *dest = d_start; + + ASSERT(d_len >= sizeof (bufsiz)); + + bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, + d_len - sizeof (bufsiz)); + + /* Signal an error if the compression routine returned zero. */ + if (bufsiz == 0) + return (s_len); + + /* + * Encode the compresed buffer size at the start. We'll need this in + * decompression to counter the effects of padding which might be + * added to the compressed buffer and which, if unhandled, would + * confuse the hell out of our decompression function. + */ +#if defined(_KERNEL) + *(uint32_t *)(void *)dest = BE_32(bufsiz); +#else + *(uint32_t *)(void *)dest = htonl(bufsiz); +#endif + + return (bufsiz + sizeof (bufsiz)); +} + +int +lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, + int n __unused) +{ + const char *src = s_start; +#if defined(_KERNEL) + uint32_t bufsiz = BE_IN32(s_start); +#else + uint32_t bufsiz = htonl(*(uint32_t *)s_start); +#endif + + /* invalid compressed buffer size encoded at start */ + if (bufsiz + sizeof (bufsiz) > s_len) + return (1); + + /* + * Returns 0 on success (decompression function returned non-negative) + * and non-zero on failure (decompression function returned negative). + */ + return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], + d_start, bufsiz, d_len) < 0); +} + +/* + * LZ4 API Description: + * + * Simple Functions: + * real_LZ4_compress() : + * isize : is the input size. Max supported value is ~1.9GB + * return : the number of bytes written in buffer dest + * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). + * note : destination buffer must be already allocated. + * destination buffer must be sized to handle worst cases + * situations (input data not compressible). + * + * Advanced Functions + * + * LZ4_uncompress_unknownOutputSize() : + * isize : is the input size, therefore the compressed size + * maxOutputSize : is the size of the destination buffer (which must be + * already allocated) + * return : the number of bytes decoded in the destination buffer + * (necessarily <= maxOutputSize). If the source stream is + * malformed, the function will stop decoding and return a + * negative result, indicating the byte position of the faulty + * instruction. This function never writes beyond dest + + * maxOutputSize, and is therefore protected against malicious + * data packets. + * note : Destination buffer must be already allocated. + * + * LZ4_compressCtx() : + * This function explicitly handles the CTX memory structure. + * + * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated + * by the caller (either on the stack or using kmem_zalloc). Passing NULL + * isn't valid. + * + * LZ4_compress64kCtx() : + * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). + * isize *Must* be <64KB, otherwise the output will be corrupted. + * + * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated + * by the caller (either on the stack or using kmem_zalloc). Passing NULL + * isn't valid. + */ + +/* + * Tuning parameters + */ + +/* + * COMPRESSIONLEVEL: Increasing this value improves compression ratio + * Lowering this value reduces memory usage. Reduced memory usage + * typically improves speed, due to cache effect (ex: L1 32KB for Intel, + * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes + * (examples : 12 -> 16KB ; 17 -> 512KB) + */ +#define COMPRESSIONLEVEL 12 + +/* + * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the + * algorithm skip faster data segments considered "incompressible". + * This may decrease compression ratio dramatically, but will be + * faster on incompressible data. Increasing this value will make + * the algorithm search more before declaring a segment "incompressible". + * This could improve compression a bit, but will be slower on + * incompressible data. The default value (6) is recommended. + */ +#define NOTCOMPRESSIBLE_CONFIRMATION 6 + +/* + * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to + * performance for big endian cpu, but the resulting compressed stream + * will be incompatible with little-endian CPU. You can set this option + * to 1 in situations where data will stay within closed environment. + * This option is useless on Little_Endian CPU (such as x86). + */ +/* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ + +/* + * CPU Feature Detection + */ + +/* 32 or 64 bits ? */ +#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \ + defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \ + defined(__LP64__) || defined(_LP64)) +#define LZ4_ARCH64 1 +#else +#define LZ4_ARCH64 0 +#endif + +/* + * Limits the amount of stack space that the algorithm may consume to hold + * the compression lookup table. The value `9' here means we'll never use + * more than 2k of stack (see above for a description of COMPRESSIONLEVEL). + * If more memory is needed, it is allocated from the heap. + */ +/* FreeBSD: Use heap for all platforms for now */ +#define STACKLIMIT 0 + +/* + * Little Endian or Big Endian? + * Note: overwrite the below #define if you know your architecture endianess. + */ +#if BYTE_ORDER == BIG_ENDIAN +#define LZ4_BIG_ENDIAN 1 +#else +/* + * Little Endian assumed. PDP Endian and other very rare endian format + * are unsupported. + */ +#endif + +/* + * Unaligned memory access is automatically enabled for "common" CPU, + * such as x86. For others CPU, the compiler will be more cautious, and + * insert extra code to ensure aligned access is respected. If you know + * your target CPU supports unaligned memory access, you may want to + * force this option manually to improve performance + */ +#if defined(__ARM_FEATURE_UNALIGNED) +#define LZ4_FORCE_UNALIGNED_ACCESS 1 +#endif + +/* + * FreeBSD: can't use GCC's __builtin_ctz when using sparc64 because + * gcc currently rely on libcompiler_rt. + * + * TODO: revisit this when situation changes. + */ +#if defined(__sparc64__) +#define LZ4_FORCE_SW_BITCOUNT +#endif + +/* + * Compiler Options + */ +#if __STDC_VERSION__ >= 199901L /* C99 */ +/* "restrict" is a known keyword */ +#else +/* Disable restrict */ +#define restrict +#endif + +#define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ + (((x) & 0xffu) << 8))) + +#define expect(expr, value) (__builtin_expect((expr), (value))) + +#if defined(likely) +#undef likely +#endif +#if defined(unlikely) +#undef unlikely +#endif + +#ifndef likely +#define likely(expr) expect((expr) != 0, 1) +#endif + +#ifndef unlikely +#define unlikely(expr) expect((expr) != 0, 0) +#endif + +/* Basic types */ +#define BYTE uint8_t +#define U16 uint16_t +#define U32 uint32_t +#define S32 int32_t +#define U64 uint64_t + +#ifndef LZ4_FORCE_UNALIGNED_ACCESS +#pragma pack(1) +#endif + +typedef struct _U16_S { + U16 v; +} U16_S; +typedef struct _U32_S { + U32 v; +} U32_S; +typedef struct _U64_S { + U64 v; +} U64_S; + +#ifndef LZ4_FORCE_UNALIGNED_ACCESS +#pragma pack() +#endif + +#define A64(x) (((U64_S *)(__DECONST(void *, x)))->v) +#define A32(x) (((U32_S *)(__DECONST(void *, x)))->v) +#define A16(x) (((U16_S *)(__DECONST(void *, x)))->v) + +/* + * Constants + */ +#define MINMATCH 4 + +#define HASH_LOG COMPRESSIONLEVEL +#define HASHTABLESIZE (1 << HASH_LOG) +#define HASH_MASK (HASHTABLESIZE - 1) + +#define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ + NOTCOMPRESSIBLE_CONFIRMATION : 2) + +/* + * Defines if memory is allocated into the stack (local variable), + * or into the heap (kmem_alloc()). + */ +#define HEAPMODE (HASH_LOG > STACKLIMIT) +#define COPYLENGTH 8 +#define LASTLITERALS 5 +#define MFLIMIT (COPYLENGTH + MINMATCH) +#define MINLENGTH (MFLIMIT + 1) + +#define MAXD_LOG 16 +#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) + +#define ML_BITS 4 +#define ML_MASK ((1U<> ((MINMATCH * 8) - \ + HASH_LOG)) +#define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) +#define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); +#define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ + d = e; } + + +/* Private functions */ +#if LZ4_ARCH64 + +static inline int +LZ4_NbCommonBytes(register U64 val) +{ +#if defined(LZ4_BIG_ENDIAN) +#if !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clzll(val) >> 3); +#else + int r; + if (!(val >> 32)) { + r = 4; + } else { + r = 0; + val >>= 32; + } + if (!(val >> 16)) { + r += 2; + val >>= 8; + } else { + val >>= 24; + } + r += (!val); + return (r); +#endif +#else +#if !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctzll(val) >> 3); +#else + static const int DeBruijnBytePos[64] = + { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, + 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, + 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, + 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 + }; + return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> + 58]; +#endif +#endif +} + +#else + +static inline int +LZ4_NbCommonBytes(register U32 val) +{ +#if defined(LZ4_BIG_ENDIAN) +#if !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_clz(val) >> 3); +#else + int r; + if (!(val >> 16)) { + r = 2; + val >>= 8; + } else { + r = 0; + val >>= 24; + } + r += (!val); + return (r); +#endif +#else +#if !defined(LZ4_FORCE_SW_BITCOUNT) + return (__builtin_ctz(val) >> 3); +#else + static const int DeBruijnBytePos[32] = { + 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 + }; + return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> + 27]; +#endif +#endif +} + +#endif + +/* Compression functions */ + +/*ARGSUSED*/ +static int +LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, + int osize) +{ +#if HEAPMODE + struct refTables *srt = (struct refTables *)ctx; + HTYPE *HashTable = (HTYPE *) (srt->hashTable); +#else + HTYPE HashTable[HASHTABLESIZE] = { 0 }; +#endif + + const BYTE *ip = (const BYTE *) source; + INITBASE(base); + const BYTE *anchor = ip; + const BYTE *const iend = ip + isize; + const BYTE *const oend = (BYTE *) dest + osize; + const BYTE *const mflimit = iend - MFLIMIT; +#define matchlimit (iend - LASTLITERALS) + + BYTE *op = (BYTE *) dest; + + int len, length; + const int skipStrength = SKIPSTRENGTH; + U32 forwardH; + + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + /* First Byte */ + HashTable[LZ4_HASH_VALUE(ip)] = ip - base; + ip++; + forwardH = LZ4_HASH_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findMatchAttempts = (1U << skipStrength) + 3; + const BYTE *forwardIp = ip; + const BYTE *ref; + BYTE *token; + + /* Find a match */ + do { + U32 h = forwardH; + int step = findMatchAttempts++ >> skipStrength; + ip = forwardIp; + forwardIp = ip + step; + + if unlikely(forwardIp > mflimit) { + goto _last_literals; + } + + forwardH = LZ4_HASH_VALUE(forwardIp); + ref = base + HashTable[h]; + HashTable[h] = ip - base; + + } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); + + /* Catch up */ + while ((ip > anchor) && (ref > (const BYTE *) source) && + unlikely(ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = ip - anchor; + token = op++; + + /* Check output limit */ + if unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend) + return (0); + + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254; len -= 255) + *op++ = 255; + *op++ = (BYTE)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); + + _next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); + + /* Start Counting */ + ip += MINMATCH; + ref += MINMATCH; /* MinMatch verified */ + anchor = ip; + while likely(ip < matchlimit - (STEPSIZE - 1)) { + UARCH diff = AARCH(ref) ^ AARCH(ip); + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NbCommonBytes(diff); + goto _endCount; + } +#if LZ4_ARCH64 + if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } +#endif + if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < matchlimit) && (*ref == *ip)) + ip++; + _endCount: + + /* Encode MatchLength */ + len = (ip - anchor); + /* Check output limit */ + if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) + return (0); + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509; len -= 510) { + *op++ = 255; + *op++ = 255; + } + if (len > 254) { + len -= 255; + *op++ = 255; + } + *op++ = (BYTE)len; + } else + *token += len; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + /* Fill table */ + HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; + + /* Test next position */ + ref = base + HashTable[LZ4_HASH_VALUE(ip)]; + HashTable[LZ4_HASH_VALUE(ip)] = ip - base; + if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { + token = op++; + *token = 0; + goto _next_match; + } + /* Prepare next loop */ + anchor = ip++; + forwardH = LZ4_HASH_VALUE(ip); + } + + _last_literals: + /* Encode Last Literals */ + { + int lastRun = iend - anchor; + if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > + oend) + return (0); + if (lastRun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastRun -= RUN_MASK; + for (; lastRun > 254; lastRun -= 255) { + *op++ = 255; + } + *op++ = (BYTE)lastRun; + } else + *op++ = (lastRun << ML_BITS); + (void) memcpy(op, anchor, iend - anchor); + op += iend - anchor; + } + + /* End */ + return (int)(((char *)op) - dest); +} + + + +/* Note : this function is valid only if isize < LZ4_64KLIMIT */ +#define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) +#define HASHLOG64K (HASH_LOG + 1) +#define HASH64KTABLESIZE (1U << HASHLOG64K) +#define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ + HASHLOG64K)) +#define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) + +/*ARGSUSED*/ +static int +LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, + int osize) +{ +#if HEAPMODE + struct refTables *srt = (struct refTables *)ctx; + U16 *HashTable = (U16 *) (srt->hashTable); +#else + U16 HashTable[HASH64KTABLESIZE] = { 0 }; +#endif + + const BYTE *ip = (const BYTE *) source; + const BYTE *anchor = ip; + const BYTE *const base = ip; + const BYTE *const iend = ip + isize; + const BYTE *const oend = (BYTE *) dest + osize; + const BYTE *const mflimit = iend - MFLIMIT; +#define matchlimit (iend - LASTLITERALS) + + BYTE *op = (BYTE *) dest; + + int len, length; + const int skipStrength = SKIPSTRENGTH; + U32 forwardH; + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + /* First Byte */ + ip++; + forwardH = LZ4_HASH64K_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findMatchAttempts = (1U << skipStrength) + 3; + const BYTE *forwardIp = ip; + const BYTE *ref; + BYTE *token; + + /* Find a match */ + do { + U32 h = forwardH; + int step = findMatchAttempts++ >> skipStrength; + ip = forwardIp; + forwardIp = ip + step; + + if (forwardIp > mflimit) { + goto _last_literals; + } + + forwardH = LZ4_HASH64K_VALUE(forwardIp); + ref = base + HashTable[h]; + HashTable[h] = ip - base; + + } while (A32(ref) != A32(ip)); + + /* Catch up */ + while ((ip > anchor) && (ref > (const BYTE *) source) && + (ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = ip - anchor; + token = op++; + + /* Check output limit */ + if unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend) + return (0); + + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254; len -= 255) + *op++ = 255; + *op++ = (BYTE)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); + + _next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); + + /* Start Counting */ + ip += MINMATCH; + ref += MINMATCH; /* MinMatch verified */ + anchor = ip; + while (ip < matchlimit - (STEPSIZE - 1)) { + UARCH diff = AARCH(ref) ^ AARCH(ip); + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NbCommonBytes(diff); + goto _endCount; + } +#if LZ4_ARCH64 + if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } +#endif + if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < matchlimit) && (*ref == *ip)) + ip++; + _endCount: + + /* Encode MatchLength */ + len = (ip - anchor); + /* Check output limit */ + if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) + return (0); + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509; len -= 510) { + *op++ = 255; + *op++ = 255; + } + if (len > 254) { + len -= 255; + *op++ = 255; + } + *op++ = (BYTE)len; + } else + *token += len; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + /* Fill table */ + HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; + + /* Test next position */ + ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; + HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; + if (A32(ref) == A32(ip)) { + token = op++; + *token = 0; + goto _next_match; + } + /* Prepare next loop */ + anchor = ip++; + forwardH = LZ4_HASH64K_VALUE(ip); + } + + _last_literals: + /* Encode Last Literals */ + { + int lastRun = iend - anchor; + if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > + oend) + return (0); + if (lastRun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastRun -= RUN_MASK; + for (; lastRun > 254; lastRun -= 255) + *op++ = 255; + *op++ = (BYTE)lastRun; + } else + *op++ = (lastRun << ML_BITS); + (void) memcpy(op, anchor, iend - anchor); + op += iend - anchor; + } + + /* End */ + return (int)(((char *)op) - dest); +} + +static int +real_LZ4_compress(const char *source, char *dest, int isize, int osize) +{ +#if HEAPMODE +#if defined(_KERNEL) + void *ctx = kmem_cache_alloc(lz4_ctx_cache, KM_NOSLEEP); +#else + void *ctx = malloc(sizeof(struct refTables)); +#endif + int result; + + /* + * out of kernel memory, gently fall through - this will disable + * compression in zio_compress_data + */ + if (ctx == NULL) + return (0); + + bzero(ctx, sizeof(struct refTables)); + if (isize < LZ4_64KLIMIT) + result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); + else + result = LZ4_compressCtx(ctx, source, dest, isize, osize); + +#if defined(_KERNEL) + kmem_cache_free(lz4_ctx_cache, ctx); +#else + free(ctx); +#endif + return (result); +#else + if (isize < (int)LZ4_64KLIMIT) + return (LZ4_compress64kCtx(NULL, source, dest, isize, osize)); + return (LZ4_compressCtx(NULL, source, dest, isize, osize)); +#endif +} + +/* Decompression functions */ + +/* + * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe + * against "buffer overflow" attack type. It will never write nor + * read outside of the provided output buffers. + * LZ4_uncompress_unknownOutputSize() also insures that it will never + * read outside of the input buffer. A corrupted input will produce + * an error result, a negative int, indicating the position of the + * error within input stream. + */ + +static int +LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, + int maxOutputSize) +{ + /* Local Variables */ + const BYTE *restrict ip = (const BYTE *) source; + const BYTE *const iend = ip + isize; + const BYTE *ref; + + BYTE *op = (BYTE *) dest; + BYTE *const oend = op + maxOutputSize; + BYTE *cpy; + + size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 + size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; +#endif + + /* Main Loop */ + while (ip < iend) { + unsigned token; + size_t length; + + /* get runlength */ + token = *ip++; + if ((length = (token >> ML_BITS)) == RUN_MASK) { + int s = 255; + while ((ip < iend) && (s == 255)) { + s = *ip++; + length += s; + } + } + /* copy literals */ + cpy = op + length; + /* CORNER-CASE: cpy might overflow. */ + if (cpy < op) + goto _output_error; /* cpy was overflowed, bail! */ + if ((cpy > oend - COPYLENGTH) || + (ip + length > iend - COPYLENGTH)) { + if (cpy > oend) + /* Error: writes beyond output buffer */ + goto _output_error; + if (ip + length != iend) + /* + * Error: LZ4 format requires to consume all + * input at this stage + */ + goto _output_error; + (void) memcpy(op, ip, length); + op += length; + /* Necessarily EOF, due to parsing restrictions */ + break; + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + if (ref < (BYTE * const) dest) + /* + * Error: offset creates reference outside of + * destination buffer + */ + goto _output_error; + + /* get matchlength */ + if ((length = (token & ML_MASK)) == ML_MASK) { + while (ip < iend) { + int s = *ip++; + length += s; + if (s == 255) + continue; + break; + } + } + /* copy repeated sequence */ + if unlikely(op - ref < STEPSIZE) { +#if LZ4_ARCH64 + size_t dec64 = dec64table[op-ref]; +#else + const int dec64 = 0; +#endif + op[0] = ref[0]; + op[1] = ref[1]; + op[2] = ref[2]; + op[3] = ref[3]; + op += 4; + ref += 4; + ref -= dec32table[op-ref]; + A32(op) = A32(ref); + op += STEPSIZE - 4; + ref -= dec64; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE - 4); + if (cpy > oend - COPYLENGTH) { + if (cpy > oend) + /* + * Error: request to write outside of + * destination buffer + */ + goto _output_error; + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + if (op == oend) + /* + * Check EOF (should never happen, since + * last 5 bytes are supposed to be literals) + */ + goto _output_error; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + + /* end of decoding */ + return (int)(((char *)op) - dest); + + /* write overflow error detected */ + _output_error: + return (int)(-(((const char *)ip) - source)); +} + +#if defined(_KERNEL) +extern void +lz4_init(void) +{ + +#if HEAPMODE + lz4_ctx_cache = kmem_cache_create("lz4_ctx", sizeof(struct refTables), + 0, NULL, NULL, NULL, NULL, NULL, 0); +#endif +} + +extern void +lz4_fini(void) +{ + +#if HEAPMODE + kmem_cache_destroy(lz4_ctx_cache); +#endif +} +#endif /* _KERNEL */ Property changes on: head/sys/cddl/contrib/opensolaris/common/lz4/lz4.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/cddl/contrib/opensolaris/common/lz4/lz4.h =================================================================== --- head/sys/cddl/contrib/opensolaris/common/lz4/lz4.h (nonexistent) +++ head/sys/cddl/contrib/opensolaris/common/lz4/lz4.h (revision 354253) @@ -0,0 +1,50 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Header File + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + */ + +#ifndef _LZ4_H +#define _LZ4_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +extern size_t lz4_compress(void *, void *, size_t, size_t, int); +extern int lz4_decompress(void *, void *, size_t, size_t, int); + +#ifdef __cplusplus +} +#endif + +#endif /* _LZ4_H */ Property changes on: head/sys/cddl/contrib/opensolaris/common/lz4/lz4.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/cddl/contrib/opensolaris/uts/common/Makefile.files =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/Makefile.files (revision 354252) +++ head/sys/cddl/contrib/opensolaris/uts/common/Makefile.files (revision 354253) @@ -1,183 +1,182 @@ # # CDDL HEADER START # # The contents of this file are subject to the terms of the # Common Development and Distribution License (the "License"). # You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. # See the License for the specific language governing permissions # and limitations under the License. # # When distributing Covered Code, include this CDDL HEADER in each # file and include the License file at usr/src/OPENSOLARIS.LICENSE. # If applicable, add the following below this CDDL HEADER, with the # fields enclosed by brackets "[]" replaced with your own identifying # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END # # # Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved. # Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved. # Copyright (c) 2012 Joyent, Inc. All rights reserved. # Copyright (c) 2011, 2014 by Delphix. All rights reserved. # Copyright (c) 2013 by Saso Kiselkov. All rights reserved. # # # This Makefile defines all file modules for the directory uts/common # and its children. These are the source files which may be considered # common to all SunOS systems. LUA_OBJS += \ ldo.o \ lvm.o \ lbitlib.o \ lopcodes.o \ lstring.o \ ltable.o \ ltm.o \ lcorolib.o \ lauxlib.o \ ldebug.o \ lstate.o \ lgc.o \ lmem.o \ lctype.o \ lfunc.o \ ldump.o \ lundump.o \ lstrlib.o \ ltablib.o \ lapi.o \ lobject.o \ lbaselib.o \ lcompat.o \ lzio.o \ lcode.o \ llex.o \ lparser.o ZFS_COMMON_OBJS += \ abd.o \ aggsum.o \ arc.o \ bplist.o \ blkptr.o \ bpobj.o \ bptree.o \ bqueue.o \ cityhash.o \ dbuf.o \ dbuf_stats.o \ ddt.o \ ddt_zap.o \ dmu.o \ dmu_diff.o \ dmu_send.o \ dmu_object.o \ dmu_objset.o \ dmu_traverse.o \ dmu_tx.o \ dnode.o \ dnode_sync.o \ dsl_bookmark.o \ dsl_dir.o \ dsl_dataset.o \ dsl_deadlist.o \ dsl_destroy.o \ dsl_pool.o \ dsl_synctask.o \ dsl_userhold.o \ dmu_zfetch.o \ dsl_deleg.o \ dsl_prop.o \ dsl_scan.o \ zfeature.o \ gzip.o \ - lz4.o \ lzjb.o \ metaslab.o \ multilist.o \ range_tree.o \ refcount.o \ rrwlock.o \ sa.o \ sha256.o \ skein_zfs.o \ spa.o \ spa_checkpoint.o \ spa_config.o \ spa_errlog.o \ spa_history.o \ spa_misc.o \ space_map.o \ space_reftree.o \ txg.o \ uberblock.o \ unique.o \ vdev.o \ vdev_cache.o \ vdev_file.o \ vdev_indirect.o \ vdev_indirect_births.o \ vdev_indirect_mapping.o \ vdev_initialize.o \ vdev_label.o \ vdev_mirror.o \ vdev_missing.o \ vdev_queue.o \ vdev_raidz.o \ vdev_removal.o \ vdev_root.o \ zap.o \ zap_leaf.o \ zap_micro.o \ zcp.o \ zcp_get.o \ zcp_global.o \ zcp_iter.o \ zcp_synctask.o \ zfs_byteswap.o \ zfs_debug.o \ zfs_fm.o \ zfs_fuid.o \ zfs_sa.o \ zfs_znode.o \ zil.o \ zio.o \ zio_checksum.o \ zio_compress.o \ zio_inject.o \ zle.o \ zrlock.o \ zthr.o ZFS_SHARED_OBJS += \ zfeature_common.o \ zfs_comutil.o \ zfs_deleg.o \ zfs_fletcher.o \ zfs_namecheck.o \ zfs_prop.o \ zpool_prop.o \ zprop_common.o ZFS_OBJS += \ $(ZFS_COMMON_OBJS) \ $(ZFS_SHARED_OBJS) \ zfs_acl.o \ zfs_ctldir.o \ zfs_dir.o \ zfs_ioctl.o \ zfs_ioctl_compat.o \ zfs_log.o \ zfs_onexit.o \ zfs_replay.o \ zfs_rlock.o \ zfs_vfsops.o \ zfs_vnops.o \ zvol.o Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/lz4.c =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/lz4.c (revision 354252) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/lz4.c (nonexistent) @@ -1,1033 +0,0 @@ -/* - * LZ4 - Fast LZ compression algorithm - * Header File - * Copyright (C) 2011-2013, Yann Collet. - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You can contact the author at : - * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - * - LZ4 source repository : http://code.google.com/p/lz4/ - */ -/* - * Copyright (c) 2016 by Delphix. All rights reserved. - */ - -#include - -static int real_LZ4_compress(const char *source, char *dest, int isize, - int osize); -static int LZ4_compressBound(int isize); -static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, - int isize, int maxOutputSize); -static int LZ4_compressCtx(void *ctx, const char *source, char *dest, - int isize, int osize); -static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, - int isize, int osize); - -static kmem_cache_t *lz4_ctx_cache; - -/*ARGSUSED*/ -size_t -lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) -{ - uint32_t bufsiz; - char *dest = d_start; - - ASSERT(d_len >= sizeof (bufsiz)); - - bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, - d_len - sizeof (bufsiz)); - - /* Signal an error if the compression routine returned zero. */ - if (bufsiz == 0) - return (s_len); - - /* - * Encode the compresed buffer size at the start. We'll need this in - * decompression to counter the effects of padding which might be - * added to the compressed buffer and which, if unhandled, would - * confuse the hell out of our decompression function. - */ - *(uint32_t *)dest = BE_32(bufsiz); - - return (bufsiz + sizeof (bufsiz)); -} - -/*ARGSUSED*/ -int -lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) -{ - const char *src = s_start; - uint32_t bufsiz = BE_IN32(src); - - /* invalid compressed buffer size encoded at start */ - if (bufsiz + sizeof (bufsiz) > s_len) - return (1); - - /* - * Returns 0 on success (decompression function returned non-negative) - * and non-zero on failure (decompression function returned negative). - */ - return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], - d_start, bufsiz, d_len) < 0); -} - -/* - * LZ4 API Description: - * - * Simple Functions: - * real_LZ4_compress() : - * isize : is the input size. Max supported value is ~1.9GB - * return : the number of bytes written in buffer dest - * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). - * note : destination buffer must be already allocated. - * destination buffer must be sized to handle worst cases - * situations (input data not compressible) worst case size - * evaluation is provided by function LZ4_compressBound(). - * - * Advanced Functions - * - * LZ4_compressBound() : - * Provides the maximum size that LZ4 may output in a "worst case" - * scenario (input data not compressible) primarily useful for memory - * allocation of output buffer. - * - * isize : is the input size. Max supported value is ~1.9GB - * return : maximum output size in a "worst case" scenario - * note : this function is limited by "int" range (2^31-1) - * - * LZ4_uncompress_unknownOutputSize() : - * isize : is the input size, therefore the compressed size - * maxOutputSize : is the size of the destination buffer (which must be - * already allocated) - * return : the number of bytes decoded in the destination buffer - * (necessarily <= maxOutputSize). If the source stream is - * malformed, the function will stop decoding and return a - * negative result, indicating the byte position of the faulty - * instruction. This function never writes beyond dest + - * maxOutputSize, and is therefore protected against malicious - * data packets. - * note : Destination buffer must be already allocated. - * - * LZ4_compressCtx() : - * This function explicitly handles the CTX memory structure. - * - * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated - * by the caller (either on the stack or using kmem_zalloc). Passing NULL - * isn't valid. - * - * LZ4_compress64kCtx() : - * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). - * isize *Must* be <64KB, otherwise the output will be corrupted. - * - * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated - * by the caller (either on the stack or using kmem_zalloc). Passing NULL - * isn't valid. - */ - -/* - * Tuning parameters - */ - -/* - * COMPRESSIONLEVEL: Increasing this value improves compression ratio - * Lowering this value reduces memory usage. Reduced memory usage - * typically improves speed, due to cache effect (ex: L1 32KB for Intel, - * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes - * (examples : 12 -> 16KB ; 17 -> 512KB) - */ -#define COMPRESSIONLEVEL 12 - -/* - * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the - * algorithm skip faster data segments considered "incompressible". - * This may decrease compression ratio dramatically, but will be - * faster on incompressible data. Increasing this value will make - * the algorithm search more before declaring a segment "incompressible". - * This could improve compression a bit, but will be slower on - * incompressible data. The default value (6) is recommended. - */ -#define NOTCOMPRESSIBLE_CONFIRMATION 6 - -/* - * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to - * performance for big endian cpu, but the resulting compressed stream - * will be incompatible with little-endian CPU. You can set this option - * to 1 in situations where data will stay within closed environment. - * This option is useless on Little_Endian CPU (such as x86). - */ -/* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ - -/* - * CPU Feature Detection - */ - -/* 32 or 64 bits ? */ -#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \ - defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \ - defined(__LP64__) || defined(_LP64)) -#define LZ4_ARCH64 1 -#else -#define LZ4_ARCH64 0 -#endif - -/* - * Limits the amount of stack space that the algorithm may consume to hold - * the compression lookup table. The value `9' here means we'll never use - * more than 2k of stack (see above for a description of COMPRESSIONLEVEL). - * If more memory is needed, it is allocated from the heap. - */ -/* FreeBSD: Use heap for all platforms for now */ -#define STACKLIMIT 0 - -/* - * Little Endian or Big Endian? - * Note: overwrite the below #define if you know your architecture endianess. - */ -#if BYTE_ORDER == BIG_ENDIAN -#define LZ4_BIG_ENDIAN 1 -#else -/* - * Little Endian assumed. PDP Endian and other very rare endian format - * are unsupported. - */ -#endif - -/* - * Unaligned memory access is automatically enabled for "common" CPU, - * such as x86. For others CPU, the compiler will be more cautious, and - * insert extra code to ensure aligned access is respected. If you know - * your target CPU supports unaligned memory access, you may want to - * force this option manually to improve performance - */ -#if defined(__ARM_FEATURE_UNALIGNED) -#define LZ4_FORCE_UNALIGNED_ACCESS 1 -#endif - -/* - * FreeBSD: can't use GCC's __builtin_ctz when using sparc64 because - * gcc currently rely on libcompiler_rt. - * - * TODO: revisit this when situation changes. - */ -#if defined(__sparc64__) -#define LZ4_FORCE_SW_BITCOUNT -#endif - -/* - * Compiler Options - */ -#if __STDC_VERSION__ >= 199901L /* C99 */ -/* "restrict" is a known keyword */ -#else -/* Disable restrict */ -#define restrict -#endif - -#define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ - (((x) & 0xffu) << 8))) - -#define expect(expr, value) (__builtin_expect((expr), (value))) - -#if defined(likely) -#undef likely -#endif -#if defined(unlikely) -#undef unlikely -#endif - -#ifndef likely -#define likely(expr) expect((expr) != 0, 1) -#endif - -#ifndef unlikely -#define unlikely(expr) expect((expr) != 0, 0) -#endif - -/* Basic types */ -#define BYTE uint8_t -#define U16 uint16_t -#define U32 uint32_t -#define S32 int32_t -#define U64 uint64_t - -#ifndef LZ4_FORCE_UNALIGNED_ACCESS -#pragma pack(1) -#endif - -typedef struct _U16_S { - U16 v; -} U16_S; -typedef struct _U32_S { - U32 v; -} U32_S; -typedef struct _U64_S { - U64 v; -} U64_S; - -#ifndef LZ4_FORCE_UNALIGNED_ACCESS -#pragma pack() -#endif - -#define A64(x) (((U64_S *)(x))->v) -#define A32(x) (((U32_S *)(x))->v) -#define A16(x) (((U16_S *)(x))->v) - -/* - * Constants - */ -#define MINMATCH 4 - -#define HASH_LOG COMPRESSIONLEVEL -#define HASHTABLESIZE (1 << HASH_LOG) -#define HASH_MASK (HASHTABLESIZE - 1) - -#define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ - NOTCOMPRESSIBLE_CONFIRMATION : 2) - -/* - * Defines if memory is allocated into the stack (local variable), - * or into the heap (kmem_alloc()). - */ -#define HEAPMODE (HASH_LOG > STACKLIMIT) -#define COPYLENGTH 8 -#define LASTLITERALS 5 -#define MFLIMIT (COPYLENGTH + MINMATCH) -#define MINLENGTH (MFLIMIT + 1) - -#define MAXD_LOG 16 -#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) - -#define ML_BITS 4 -#define ML_MASK ((1U<> ((MINMATCH * 8) - \ - HASH_LOG)) -#define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) -#define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); -#define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ - d = e; } - - -/* Private functions */ -#if LZ4_ARCH64 - -static inline int -LZ4_NbCommonBytes(register U64 val) -{ -#if defined(LZ4_BIG_ENDIAN) -#if !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clzll(val) >> 3); -#else - int r; - if (!(val >> 32)) { - r = 4; - } else { - r = 0; - val >>= 32; - } - if (!(val >> 16)) { - r += 2; - val >>= 8; - } else { - val >>= 24; - } - r += (!val); - return (r); -#endif -#else -#if !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctzll(val) >> 3); -#else - static const int DeBruijnBytePos[64] = - { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, - 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, - 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, - 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 - }; - return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> - 58]; -#endif -#endif -} - -#else - -static inline int -LZ4_NbCommonBytes(register U32 val) -{ -#if defined(LZ4_BIG_ENDIAN) -#if !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clz(val) >> 3); -#else - int r; - if (!(val >> 16)) { - r = 2; - val >>= 8; - } else { - r = 0; - val >>= 24; - } - r += (!val); - return (r); -#endif -#else -#if !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctz(val) >> 3); -#else - static const int DeBruijnBytePos[32] = { - 0, 0, 3, 0, 3, 1, 3, 0, - 3, 2, 2, 1, 3, 2, 0, 1, - 3, 3, 1, 2, 2, 2, 2, 0, - 3, 1, 2, 0, 1, 0, 1, 1 - }; - return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> - 27]; -#endif -#endif -} - -#endif - -/* Public functions */ - -static int -LZ4_compressBound(int isize) -{ - return (isize + (isize / 255) + 16); -} - -/* Compression functions */ - -/*ARGSUSED*/ -static int -LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, - int osize) -{ -#if HEAPMODE - struct refTables *srt = (struct refTables *)ctx; - HTYPE *HashTable = (HTYPE *) (srt->hashTable); -#else - HTYPE HashTable[HASHTABLESIZE] = { 0 }; -#endif - - const BYTE *ip = (BYTE *) source; - INITBASE(base); - const BYTE *anchor = ip; - const BYTE *const iend = ip + isize; - const BYTE *const oend = (BYTE *) dest + osize; - const BYTE *const mflimit = iend - MFLIMIT; -#define matchlimit (iend - LASTLITERALS) - - BYTE *op = (BYTE *) dest; - - int len, length; - const int skipStrength = SKIPSTRENGTH; - U32 forwardH; - - - /* Init */ - if (isize < MINLENGTH) - goto _last_literals; - - /* First Byte */ - HashTable[LZ4_HASH_VALUE(ip)] = ip - base; - ip++; - forwardH = LZ4_HASH_VALUE(ip); - - /* Main Loop */ - for (;;) { - int findMatchAttempts = (1U << skipStrength) + 3; - const BYTE *forwardIp = ip; - const BYTE *ref; - BYTE *token; - - /* Find a match */ - do { - U32 h = forwardH; - int step = findMatchAttempts++ >> skipStrength; - ip = forwardIp; - forwardIp = ip + step; - - if unlikely(forwardIp > mflimit) { - goto _last_literals; - } - - forwardH = LZ4_HASH_VALUE(forwardIp); - ref = base + HashTable[h]; - HashTable[h] = ip - base; - - } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); - - /* Catch up */ - while ((ip > anchor) && (ref > (BYTE *) source) && - unlikely(ip[-1] == ref[-1])) { - ip--; - ref--; - } - - /* Encode Literal length */ - length = ip - anchor; - token = op++; - - /* Check output limit */ - if unlikely(op + length + (2 + 1 + LASTLITERALS) + - (length >> 8) > oend) - return (0); - - if (length >= (int)RUN_MASK) { - *token = (RUN_MASK << ML_BITS); - len = length - RUN_MASK; - for (; len > 254; len -= 255) - *op++ = 255; - *op++ = (BYTE)len; - } else - *token = (length << ML_BITS); - - /* Copy Literals */ - LZ4_BLINDCOPY(anchor, op, length); - - _next_match: - /* Encode Offset */ - LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); - - /* Start Counting */ - ip += MINMATCH; - ref += MINMATCH; /* MinMatch verified */ - anchor = ip; - while likely(ip < matchlimit - (STEPSIZE - 1)) { - UARCH diff = AARCH(ref) ^ AARCH(ip); - if (!diff) { - ip += STEPSIZE; - ref += STEPSIZE; - continue; - } - ip += LZ4_NbCommonBytes(diff); - goto _endCount; - } -#if LZ4_ARCH64 - if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { - ip += 4; - ref += 4; - } -#endif - if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { - ip += 2; - ref += 2; - } - if ((ip < matchlimit) && (*ref == *ip)) - ip++; - _endCount: - - /* Encode MatchLength */ - len = (ip - anchor); - /* Check output limit */ - if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) - return (0); - if (len >= (int)ML_MASK) { - *token += ML_MASK; - len -= ML_MASK; - for (; len > 509; len -= 510) { - *op++ = 255; - *op++ = 255; - } - if (len > 254) { - len -= 255; - *op++ = 255; - } - *op++ = (BYTE)len; - } else - *token += len; - - /* Test end of chunk */ - if (ip > mflimit) { - anchor = ip; - break; - } - /* Fill table */ - HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; - - /* Test next position */ - ref = base + HashTable[LZ4_HASH_VALUE(ip)]; - HashTable[LZ4_HASH_VALUE(ip)] = ip - base; - if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { - token = op++; - *token = 0; - goto _next_match; - } - /* Prepare next loop */ - anchor = ip++; - forwardH = LZ4_HASH_VALUE(ip); - } - - _last_literals: - /* Encode Last Literals */ - { - int lastRun = iend - anchor; - if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > - oend) - return (0); - if (lastRun >= (int)RUN_MASK) { - *op++ = (RUN_MASK << ML_BITS); - lastRun -= RUN_MASK; - for (; lastRun > 254; lastRun -= 255) { - *op++ = 255; - } - *op++ = (BYTE)lastRun; - } else - *op++ = (lastRun << ML_BITS); - (void) memcpy(op, anchor, iend - anchor); - op += iend - anchor; - } - - /* End */ - return (int)(((char *)op) - dest); -} - - - -/* Note : this function is valid only if isize < LZ4_64KLIMIT */ -#define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) -#define HASHLOG64K (HASH_LOG + 1) -#define HASH64KTABLESIZE (1U << HASHLOG64K) -#define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ - HASHLOG64K)) -#define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) - -/*ARGSUSED*/ -static int -LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, - int osize) -{ -#if HEAPMODE - struct refTables *srt = (struct refTables *)ctx; - U16 *HashTable = (U16 *) (srt->hashTable); -#else - U16 HashTable[HASH64KTABLESIZE] = { 0 }; -#endif - - const BYTE *ip = (BYTE *) source; - const BYTE *anchor = ip; - const BYTE *const base = ip; - const BYTE *const iend = ip + isize; - const BYTE *const oend = (BYTE *) dest + osize; - const BYTE *const mflimit = iend - MFLIMIT; -#define matchlimit (iend - LASTLITERALS) - - BYTE *op = (BYTE *) dest; - - int len, length; - const int skipStrength = SKIPSTRENGTH; - U32 forwardH; - - /* Init */ - if (isize < MINLENGTH) - goto _last_literals; - - /* First Byte */ - ip++; - forwardH = LZ4_HASH64K_VALUE(ip); - - /* Main Loop */ - for (;;) { - int findMatchAttempts = (1U << skipStrength) + 3; - const BYTE *forwardIp = ip; - const BYTE *ref; - BYTE *token; - - /* Find a match */ - do { - U32 h = forwardH; - int step = findMatchAttempts++ >> skipStrength; - ip = forwardIp; - forwardIp = ip + step; - - if (forwardIp > mflimit) { - goto _last_literals; - } - - forwardH = LZ4_HASH64K_VALUE(forwardIp); - ref = base + HashTable[h]; - HashTable[h] = ip - base; - - } while (A32(ref) != A32(ip)); - - /* Catch up */ - while ((ip > anchor) && (ref > (BYTE *) source) && - (ip[-1] == ref[-1])) { - ip--; - ref--; - } - - /* Encode Literal length */ - length = ip - anchor; - token = op++; - - /* Check output limit */ - if unlikely(op + length + (2 + 1 + LASTLITERALS) + - (length >> 8) > oend) - return (0); - - if (length >= (int)RUN_MASK) { - *token = (RUN_MASK << ML_BITS); - len = length - RUN_MASK; - for (; len > 254; len -= 255) - *op++ = 255; - *op++ = (BYTE)len; - } else - *token = (length << ML_BITS); - - /* Copy Literals */ - LZ4_BLINDCOPY(anchor, op, length); - - _next_match: - /* Encode Offset */ - LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); - - /* Start Counting */ - ip += MINMATCH; - ref += MINMATCH; /* MinMatch verified */ - anchor = ip; - while (ip < matchlimit - (STEPSIZE - 1)) { - UARCH diff = AARCH(ref) ^ AARCH(ip); - if (!diff) { - ip += STEPSIZE; - ref += STEPSIZE; - continue; - } - ip += LZ4_NbCommonBytes(diff); - goto _endCount; - } -#if LZ4_ARCH64 - if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { - ip += 4; - ref += 4; - } -#endif - if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { - ip += 2; - ref += 2; - } - if ((ip < matchlimit) && (*ref == *ip)) - ip++; - _endCount: - - /* Encode MatchLength */ - len = (ip - anchor); - /* Check output limit */ - if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) - return (0); - if (len >= (int)ML_MASK) { - *token += ML_MASK; - len -= ML_MASK; - for (; len > 509; len -= 510) { - *op++ = 255; - *op++ = 255; - } - if (len > 254) { - len -= 255; - *op++ = 255; - } - *op++ = (BYTE)len; - } else - *token += len; - - /* Test end of chunk */ - if (ip > mflimit) { - anchor = ip; - break; - } - /* Fill table */ - HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; - - /* Test next position */ - ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; - HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; - if (A32(ref) == A32(ip)) { - token = op++; - *token = 0; - goto _next_match; - } - /* Prepare next loop */ - anchor = ip++; - forwardH = LZ4_HASH64K_VALUE(ip); - } - - _last_literals: - /* Encode Last Literals */ - { - int lastRun = iend - anchor; - if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > - oend) - return (0); - if (lastRun >= (int)RUN_MASK) { - *op++ = (RUN_MASK << ML_BITS); - lastRun -= RUN_MASK; - for (; lastRun > 254; lastRun -= 255) - *op++ = 255; - *op++ = (BYTE)lastRun; - } else - *op++ = (lastRun << ML_BITS); - (void) memcpy(op, anchor, iend - anchor); - op += iend - anchor; - } - - /* End */ - return (int)(((char *)op) - dest); -} - -static int -real_LZ4_compress(const char *source, char *dest, int isize, int osize) -{ -#if HEAPMODE - void *ctx = kmem_cache_alloc(lz4_ctx_cache, KM_NOSLEEP); - int result; - - /* - * out of kernel memory, gently fall through - this will disable - * compression in zio_compress_data - */ - if (ctx == NULL) - return (0); - - bzero(ctx, sizeof(struct refTables)); - if (isize < LZ4_64KLIMIT) - result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); - else - result = LZ4_compressCtx(ctx, source, dest, isize, osize); - - kmem_cache_free(lz4_ctx_cache, ctx); - return (result); -#else - if (isize < (int)LZ4_64KLIMIT) - return (LZ4_compress64kCtx(NULL, source, dest, isize, osize)); - return (LZ4_compressCtx(NULL, source, dest, isize, osize)); -#endif -} - -/* Decompression functions */ - -/* - * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe - * against "buffer overflow" attack type. They will never write nor - * read outside of the provided output buffers. - * LZ4_uncompress_unknownOutputSize() also insures that it will never - * read outside of the input buffer. A corrupted input will produce - * an error result, a negative int, indicating the position of the - * error within input stream. - */ - -static int -LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, - int maxOutputSize) -{ - /* Local Variables */ - const BYTE *restrict ip = (const BYTE *) source; - const BYTE *const iend = ip + isize; - const BYTE *ref; - - BYTE *op = (BYTE *) dest; - BYTE *const oend = op + maxOutputSize; - BYTE *cpy; - - size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; -#if LZ4_ARCH64 - size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; -#endif - - /* Main Loop */ - while (ip < iend) { - unsigned token; - size_t length; - - /* get runlength */ - token = *ip++; - if ((length = (token >> ML_BITS)) == RUN_MASK) { - int s = 255; - while ((ip < iend) && (s == 255)) { - s = *ip++; - length += s; - } - } - /* copy literals */ - cpy = op + length; - /* CORNER-CASE: cpy might overflow. */ - if (cpy < op) - goto _output_error; /* cpy was overflowed, bail! */ - if ((cpy > oend - COPYLENGTH) || - (ip + length > iend - COPYLENGTH)) { - if (cpy > oend) - /* Error: writes beyond output buffer */ - goto _output_error; - if (ip + length != iend) - /* - * Error: LZ4 format requires to consume all - * input at this stage - */ - goto _output_error; - (void) memcpy(op, ip, length); - op += length; - /* Necessarily EOF, due to parsing restrictions */ - break; - } - LZ4_WILDCOPY(ip, op, cpy); - ip -= (op - cpy); - op = cpy; - - /* get offset */ - LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); - ip += 2; - if (ref < (BYTE * const) dest) - /* - * Error: offset creates reference outside of - * destination buffer - */ - goto _output_error; - - /* get matchlength */ - if ((length = (token & ML_MASK)) == ML_MASK) { - while (ip < iend) { - int s = *ip++; - length += s; - if (s == 255) - continue; - break; - } - } - /* copy repeated sequence */ - if unlikely(op - ref < STEPSIZE) { -#if LZ4_ARCH64 - size_t dec64 = dec64table[op-ref]; -#else - const int dec64 = 0; -#endif - op[0] = ref[0]; - op[1] = ref[1]; - op[2] = ref[2]; - op[3] = ref[3]; - op += 4; - ref += 4; - ref -= dec32table[op-ref]; - A32(op) = A32(ref); - op += STEPSIZE - 4; - ref -= dec64; - } else { - LZ4_COPYSTEP(ref, op); - } - cpy = op + length - (STEPSIZE - 4); - if (cpy > oend - COPYLENGTH) { - if (cpy > oend) - /* - * Error: request to write outside of - * destination buffer - */ - goto _output_error; - LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); - while (op < cpy) - *op++ = *ref++; - op = cpy; - if (op == oend) - /* - * Check EOF (should never happen, since - * last 5 bytes are supposed to be literals) - */ - goto _output_error; - continue; - } - LZ4_SECURECOPY(ref, op, cpy); - op = cpy; /* correction */ - } - - /* end of decoding */ - return (int)(((char *)op) - dest); - - /* write overflow error detected */ - _output_error: - return (int)(-(((char *)ip) - source)); -} - -extern void -lz4_init(void) -{ - -#if HEAPMODE - lz4_ctx_cache = kmem_cache_create("lz4_ctx", sizeof(struct refTables), - 0, NULL, NULL, NULL, NULL, NULL, 0); -#endif -} - -extern void -lz4_fini(void) -{ - -#if HEAPMODE - kmem_cache_destroy(lz4_ctx_cache); -#endif -} Property changes on: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/lz4.c ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: head/sys/modules/zfs/Makefile =================================================================== --- head/sys/modules/zfs/Makefile (revision 354252) +++ head/sys/modules/zfs/Makefile (revision 354253) @@ -1,115 +1,118 @@ # $FreeBSD$ SYSDIR?=${SRCTOP}/sys KMOD= zfs SRCS= bus_if.h device_if.h vnode_if.h opt_kstack_pages.h SUNW= ${SYSDIR}/cddl/contrib/opensolaris .PATH: ${SUNW}/common/acl SRCS+= acl_common.c .PATH: ${SUNW}/common/avl SRCS+= avl.c .PATH: ${SUNW}/common/nvpair SRCS+= opensolaris_nvpair.c SRCS+= opensolaris_nvpair_alloc_fixed.c SRCS+= opensolaris_fnvpair.c .PATH: ${SYSDIR}/cddl/contrib/opensolaris/common/unicode SRCS+= u8_textprep.c +.PATH: ${SUNW}/common/lz4 +SRCS+= lz4.c .PATH: ${SYSDIR}/cddl/compat/opensolaris/kern SRCS+= opensolaris_acl.c SRCS+= opensolaris_dtrace.c SRCS+= opensolaris_kobj.c SRCS+= opensolaris_kstat.c SRCS+= opensolaris_lookup.c SRCS+= opensolaris_policy.c SRCS+= opensolaris_string.c SRCS+= opensolaris_sysevent.c SRCS+= opensolaris_taskq.c SRCS+= opensolaris_uio.c SRCS+= opensolaris_vfs.c SRCS+= opensolaris_vm.c SRCS+= opensolaris_zone.c _A=${SYSDIR}/cddl/contrib/opensolaris/common/atomic .if exists(${_A}/${MACHINE_CPUARCH}/opensolaris_atomic.S) .PATH: ${_A}/${MACHINE_CPUARCH} SRCS+= opensolaris_atomic.S .elif exists(${_A}/${MACHINE_ARCH}/opensolaris_atomic.S) .PATH: ${_A}/${MACHINE_ARCH} SRCS+= opensolaris_atomic.S .else SRCS+= opensolaris_atomic.c .endif .PATH: ${SUNW}/uts/common/fs SRCS+= vnode.c .PATH: ${SUNW}/uts/common/os SRCS+= callb.c SRCS+= fm.c SRCS+= list.c SRCS+= nvpair_alloc_system.c .PATH: ${SUNW}/uts/common/zmod SRCS+= zmod.c .PATH: ${SYSDIR}/crypto/sha2 SRCS+= sha256c.c sha512c.c .PATH: ${SYSDIR}/crypto/skein SRCS+= skein.c skein_block.c .PATH: ${SUNW}/common/zfs .include "${SUNW}/uts/common/Makefile.files" .PATH: ${SUNW}/uts/common/fs/zfs ZFS_SRCS= ${ZFS_OBJS:C/.o$/.c/} SRCS+= ${ZFS_SRCS} SRCS+= vdev_geom.c SRCS+= trim_map.c .PATH: ${SUNW}/uts/common/fs/zfs/lua LUA_SRCS= ${LUA_OBJS:C/.o$/.c/} SRCS+= ${LUA_SRCS} # Use FreeBSD's namecache. CFLAGS+=-DFREEBSD_NAMECACHE CFLAGS+=-I${SYSDIR}/cddl/compat/opensolaris CFLAGS+=-I${SUNW}/uts/common/fs/zfs CFLAGS+=-I${SUNW}/uts/common/fs/zfs/lua CFLAGS+=-I${SUNW}/uts/common/zmod CFLAGS+=-I${SUNW}/uts/common CFLAGS+=-I${SYSDIR} CFLAGS+=-I${SUNW}/common/zfs +CFLAGS+=-I${SUNW}/common/lz4 CFLAGS+=-I${SUNW}/common CFLAGS+=-DBUILDING_ZFS CFLAGS.gcc+=-fms-extensions .if ${MACHINE_ARCH} == "powerpc64" CFLAGS.gcc+=-mminimal-toc .endif .ifdef ZFS_DEBUG CFLAGS+=-DDEBUG=1 DEBUG_FLAGS=-g .endif .include CFLAGS+= -include ${SYSDIR}/cddl/compat/opensolaris/sys/debug_compat.h CWARNFLAGS+=-Wno-missing-prototypes CWARNFLAGS+=-Wno-undef CWARNFLAGS+=-Wno-strict-prototypes CWARNFLAGS+=-Wno-cast-qual CWARNFLAGS+=-Wno-parentheses CWARNFLAGS+=-Wno-redundant-decls CWARNFLAGS+=-Wno-missing-braces CWARNFLAGS+=-Wno-uninitialized CWARNFLAGS+=-Wno-unused CWARNFLAGS+=-Wno-inline CWARNFLAGS+=-Wno-switch CWARNFLAGS+=-Wno-pointer-arith