Index: stable/11/lib/libstand/Makefile
===================================================================
--- stable/11/lib/libstand/Makefile	(revision 329098)
+++ stable/11/lib/libstand/Makefile	(revision 329099)
@@ -1,159 +1,163 @@
 # $FreeBSD$
 # Originally from	$NetBSD: Makefile,v 1.21 1997/10/26 22:08:38 lukem Exp $
 #
 # Notes:
 # - We don't use the libc strerror/sys_errlist because the string table is
 #   quite large.
 #
 
 PACKAGE=lib${LIB}
 MK_PROFILE=	no
 MK_SSP=		no
 
 .include <src.opts.mk>
 
 LIBSTAND_SRC?=	${.CURDIR}
 LIBSTAND_CPUARCH?=${MACHINE_CPUARCH}
 LIBC_SRC=	${LIBSTAND_SRC}/../libc
 
 LIB=		stand
 NO_PIC=
 INCS?=		stand.h
 MAN?=		libstand.3
 
 WARNS?=		0
 
 CFLAGS+= -I${LIBSTAND_SRC}
 
 # standalone components and stuff we have modified locally
 SRCS+=	gzguts.h zutil.h __main.c assert.c bcd.c environment.c getopt.c gets.c \
 	globals.c pager.c printf.c strdup.c strerror.c strtol.c strtoul.c random.c \
 	sbrk.c twiddle.c zalloc.c zalloc_malloc.c
 
 # private (pruned) versions of libc string functions
 SRCS+=	strcasecmp.c
 
 .PATH: ${LIBC_SRC}/net
 
 SRCS+= ntoh.c
 
 # string functions from libc
 .PATH: ${LIBC_SRC}/string
 SRCS+=	bcmp.c bcopy.c bzero.c ffs.c fls.c \
 	memccpy.c memchr.c memcmp.c memcpy.c memmove.c memset.c \
 	qdivrem.c strcat.c strchr.c strcmp.c strcpy.c \
 	strcspn.c strlcat.c strlcpy.c strlen.c strncat.c strncmp.c strncpy.c \
 	strnlen.c strpbrk.c strrchr.c strsep.c strspn.c strstr.c strtok.c swab.c
 .if ${MACHINE_CPUARCH} == "arm"
 .PATH: ${LIBC_SRC}/arm/gen
 
 # Do not generate movt/movw, because the relocation fixup for them does not
 # translate to the -Bsymbolic -pie format required by self_reloc() in loader(8).
 # Also, the fpu is not available in a standalone environment.
 .if ${COMPILER_VERSION} < 30800
 CFLAGS.clang+=	-mllvm -arm-use-movt=0
 .else
 CFLAGS.clang+=	-mno-movt
 .endif
 CFLAGS.clang+=	-mfpu=none
 
 # Compiler support functions
 .PATH: ${LIBSTAND_SRC}/../../contrib/compiler-rt/lib/builtins/
 # __clzsi2 and ctzsi2 for various builtin functions
 SRCS+=	clzsi2.c ctzsi2.c
 # Divide and modulus functions called by the compiler
 SRCS+=	 divmoddi4.c  divmodsi4.c  divdi3.c  divsi3.c  moddi3.c  modsi3.c
 SRCS+=	udivmoddi4.c udivmodsi4.c udivdi3.c udivsi3.c umoddi3.c umodsi3.c
 
 .PATH: ${LIBSTAND_SRC}/../../contrib/compiler-rt/lib/builtins/arm/
 SRCS+=	aeabi_idivmod.S aeabi_ldivmod.S aeabi_uidivmod.S aeabi_uldivmod.S
 SRCS+=	aeabi_memcmp.S aeabi_memcpy.S aeabi_memmove.S aeabi_memset.S
 .endif
 
 .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "riscv"
 .PATH: ${LIBC_SRC}/${MACHINE_CPUARCH}/gen
 .endif
 
 .if ${MACHINE_CPUARCH} == "powerpc"
 .PATH: ${LIBC_SRC}/quad
 SRCS+=	ashldi3.c ashrdi3.c
 SRCS+=	syncicache.c
 .endif
 
 # uuid functions from libc
 .PATH: ${LIBC_SRC}/uuid
 SRCS+= uuid_create_nil.c uuid_equal.c uuid_from_string.c uuid_is_nil.c uuid_to_string.c
 
 # _setjmp/_longjmp
 .PATH: ${LIBSTAND_SRC}/${LIBSTAND_CPUARCH}
 SRCS+=	_setjmp.S
 
 # decompression functionality from libbz2
 # NOTE: to actually test this functionality after libbz2 upgrade compile
 # loader(8) with LOADER_BZIP2_SUPPORT defined
 .PATH: ${LIBSTAND_SRC}/../../contrib/bzip2
 CFLAGS+= -DBZ_NO_STDIO -DBZ_NO_COMPRESS
 SRCS+=	libstand_bzlib_private.h
 
 .for file in bzlib.c crctable.c decompress.c huffman.c randtable.c
 SRCS+=	_${file}
 CLEANFILES+=	_${file}
 
 _${file}: ${file}
 	sed "s|bzlib_private\.h|libstand_bzlib_private.h|" \
 	    ${.ALLSRC} > ${.TARGET}
 .endfor
 
 CLEANFILES+= libstand_bzlib_private.h
 libstand_bzlib_private.h: bzlib_private.h
 	sed -e 's|<stdlib.h>|"stand.h"|' \
 		${.ALLSRC} > ${.TARGET}
 
 # decompression functionality from zlib
 .PATH: ${LIBSTAND_SRC}/../../contrib/zlib
 CFLAGS+=-DHAVE_MEMCPY -I${LIBSTAND_SRC}/../../contrib/zlib
 SRCS+=	adler32.c crc32.c libstand_zutil.h libstand_gzguts.h
 
 .for file in infback.c inffast.c inflate.c inftrees.c zutil.c
 SRCS+=	_${file}
 CLEANFILES+=	_${file}
 
 _${file}: ${file}
 	sed -e "s|zutil\.h|libstand_zutil.h|" \
 	    -e "s|gzguts\.h|libstand_gzguts.h|" \
 	    ${.ALLSRC} > ${.TARGET}
 .endfor
 
 # depend on stand.h being able to be included multiple times
 .for file in zutil.h gzguts.h
 CLEANFILES+= libstand_${file}
 libstand_${file}: ${file}
 	sed -e 's|<fcntl.h>|"stand.h"|' \
 	    -e 's|<stddef.h>|"stand.h"|' \
 	    -e 's|<string.h>|"stand.h"|' \
 	    -e 's|<stdio.h>|"stand.h"|' \
 	    -e 's|<stdlib.h>|"stand.h"|' \
 	    ${.ALLSRC} > ${.TARGET}
 .endfor
 
 # io routines
 SRCS+=	closeall.c dev.c ioctl.c nullfs.c stat.c \
 	fstat.c close.c lseek.c open.c read.c write.c readdir.c
 
 # network routines
 SRCS+=	arp.c ether.c inet_ntoa.c in_cksum.c net.c udp.c netif.c rpc.c
 
 # network info services:
 SRCS+=	bootp.c rarp.c bootparam.c
 
 # boot filesystems
 SRCS+=	ufs.c nfs.c cd9660.c tftp.c gzipfs.c bzipfs.c
 SRCS+=	dosfs.c ext2fs.c
 SRCS+=	splitfs.c
 SRCS+=	pkgfs.c
 .if ${MK_NAND} != "no"
 SRCS+=	nandfs.c
 .endif
 
+# explicit_bzero
+.PATH: ${SRCTOP}/sys/libkern
+SRCS+=  explicit_bzero.c
+
 .include <bsd.stand.mk>
 .include <bsd.lib.mk>
Index: stable/11/lib/libstand/bootp.c
===================================================================
--- stable/11/lib/libstand/bootp.c	(revision 329098)
+++ stable/11/lib/libstand/bootp.c	(revision 329099)
@@ -1,758 +1,788 @@
 /*	$NetBSD: bootp.c,v 1.14 1998/02/16 11:10:54 drochner Exp $	*/
 
 /*
  * Copyright (c) 1992 Regents of the University of California.
  * All rights reserved.
  *
  * This software was developed by the Computer Systems Engineering group
  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
  * contributed to Berkeley.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * @(#) Header: bootp.c,v 1.4 93/09/11 03:13:51 leres Exp  (LBL)
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
+#include <sys/limits.h>
 #include <sys/endian.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 
 #include <string.h>
 
 #define BOOTP_DEBUGxx
 #define SUPPORT_DHCP
 
 #define	DHCP_ENV_NOVENDOR	1	/* do not parse vendor options */
 #define	DHCP_ENV_PXE		10	/* assume pxe vendor options */
 #define	DHCP_ENV_FREEBSD	11	/* assume freebsd vendor options */
 /* set DHCP_ENV to one of the values above to export dhcp options to kenv */
 #define DHCP_ENV		DHCP_ENV_NO_VENDOR
 
 #include "stand.h"
 #include "net.h"
 #include "netif.h"
 #include "bootp.h"
 
 
 struct in_addr servip;
 
 static time_t	bot;
 
 static	char vm_rfc1048[4] = VM_RFC1048;
 #ifdef BOOTP_VEND_CMU
 static	char vm_cmu[4] = VM_CMU;
 #endif
 
 /* Local forwards */
 static	ssize_t bootpsend(struct iodesc *, void *, size_t);
 static	ssize_t bootprecv(struct iodesc *, void *, size_t, time_t);
 static	int vend_rfc1048(u_char *, u_int);
 #ifdef BOOTP_VEND_CMU
 static	void vend_cmu(u_char *);
 #endif
 
 #ifdef DHCP_ENV		/* export the dhcp response to kenv */
 struct dhcp_opt;
 static void setenv_(u_char *cp,  u_char *ep, struct dhcp_opt *opts);
 #else
 #define setenv_(a, b, c)
 #endif
 
 #ifdef SUPPORT_DHCP
 static char expected_dhcpmsgtype = -1, dhcp_ok;
 struct in_addr dhcp_serverip;
 #endif
 
 /* Fetch required bootp infomation */
 void
 bootp(sock, flag)
 	int sock;
 	int flag;
 {
 	struct iodesc *d;
 	struct bootp *bp;
 	struct {
 		u_char header[HEADER_SIZE];
 		struct bootp wbootp;
 	} wbuf;
 	struct {
 		u_char header[HEADER_SIZE];
 		struct bootp rbootp;
 	} rbuf;
 
 #ifdef BOOTP_DEBUG
  	if (debug)
 		printf("bootp: socket=%d\n", sock);
 #endif
 	if (!bot)
 		bot = getsecs();
 	
 	if (!(d = socktodesc(sock))) {
 		printf("bootp: bad socket. %d\n", sock);
 		return;
 	}
 #ifdef BOOTP_DEBUG
  	if (debug)
 		printf("bootp: d=%lx\n", (long)d);
 #endif
 
 	bp = &wbuf.wbootp;
 	bzero(bp, sizeof(*bp));
 
 	bp->bp_op = BOOTREQUEST;
 	bp->bp_htype = 1;		/* 10Mb Ethernet (48 bits) */
 	bp->bp_hlen = 6;
 	bp->bp_xid = htonl(d->xid);
 	MACPY(d->myea, bp->bp_chaddr);
 	strncpy(bp->bp_file, bootfile, sizeof(bp->bp_file));
 	bcopy(vm_rfc1048, bp->bp_vend, sizeof(vm_rfc1048));
 #ifdef SUPPORT_DHCP
 	bp->bp_vend[4] = TAG_DHCP_MSGTYPE;
 	bp->bp_vend[5] = 1;
 	bp->bp_vend[6] = DHCPDISCOVER;
 
 	/*
 	 * If we are booting from PXE, we want to send the string
 	 * 'PXEClient' to the DHCP server so you have the option of
 	 * only responding to PXE aware dhcp requests.
 	 */
 	if (flag & BOOTP_PXE) {
 		bp->bp_vend[7] = TAG_CLASSID;
 		bp->bp_vend[8] = 9;
 		bcopy("PXEClient", &bp->bp_vend[9], 9);
 		bp->bp_vend[18] = TAG_PARAM_REQ;
 		bp->bp_vend[19] = 8;
 		bp->bp_vend[20] = TAG_ROOTPATH;
 		bp->bp_vend[21] = TAG_TFTP_SERVER;
 		bp->bp_vend[22] = TAG_HOSTNAME;
 		bp->bp_vend[23] = TAG_SWAPSERVER;
 		bp->bp_vend[24] = TAG_GATEWAY;
 		bp->bp_vend[25] = TAG_SUBNET_MASK;
 		bp->bp_vend[26] = TAG_INTF_MTU;
 		bp->bp_vend[27] = TAG_SERVERID;
 		bp->bp_vend[28] = TAG_END;
 	} else
 		bp->bp_vend[7] = TAG_END;
 #else
 	bp->bp_vend[4] = TAG_END;
 #endif
 
 	d->myip.s_addr = INADDR_ANY;
 	d->myport = htons(IPPORT_BOOTPC);
 	d->destip.s_addr = INADDR_BROADCAST;
 	d->destport = htons(IPPORT_BOOTPS);
 
 #ifdef SUPPORT_DHCP
 	expected_dhcpmsgtype = DHCPOFFER;
 	dhcp_ok = 0;
 #endif
 
 	if(sendrecv(d,
 		    bootpsend, bp, sizeof(*bp),
 		    bootprecv, &rbuf.rbootp, sizeof(rbuf.rbootp))
 	   == -1) {
 	    printf("bootp: no reply\n");
 	    return;
 	}
 
 #ifdef SUPPORT_DHCP
 	if(dhcp_ok) {
 		u_int32_t leasetime;
 		bp->bp_vend[6] = DHCPREQUEST;
 		bp->bp_vend[7] = TAG_REQ_ADDR;
 		bp->bp_vend[8] = 4;
 		bcopy(&rbuf.rbootp.bp_yiaddr, &bp->bp_vend[9], 4);
 		bp->bp_vend[13] = TAG_SERVERID;
 		bp->bp_vend[14] = 4;
 		bcopy(&dhcp_serverip.s_addr, &bp->bp_vend[15], 4);
 		bp->bp_vend[19] = TAG_LEASETIME;
 		bp->bp_vend[20] = 4;
 		leasetime = htonl(300);
 		bcopy(&leasetime, &bp->bp_vend[21], 4);
 		if (flag & BOOTP_PXE) {
 			bp->bp_vend[25] = TAG_CLASSID;
 			bp->bp_vend[26] = 9;
 			bcopy("PXEClient", &bp->bp_vend[27], 9);
 			bp->bp_vend[36] = TAG_END;
 		} else
 			bp->bp_vend[25] = TAG_END;
 
 		expected_dhcpmsgtype = DHCPACK;
 
 		if(sendrecv(d,
 			    bootpsend, bp, sizeof(*bp),
 			    bootprecv, &rbuf.rbootp, sizeof(rbuf.rbootp))
 		   == -1) {
 			printf("DHCPREQUEST failed\n");
 			return;
 		}
 	}
 #endif
 
 	myip = d->myip = rbuf.rbootp.bp_yiaddr;
 	servip = rbuf.rbootp.bp_siaddr;
 	if(rootip.s_addr == INADDR_ANY) rootip = servip;
 	bcopy(rbuf.rbootp.bp_file, bootfile, sizeof(bootfile));
 	bootfile[sizeof(bootfile) - 1] = '\0';
 
 	if (!netmask) {
 		if (IN_CLASSA(ntohl(myip.s_addr)))
 			netmask = htonl(IN_CLASSA_NET);
 		else if (IN_CLASSB(ntohl(myip.s_addr)))
 			netmask = htonl(IN_CLASSB_NET);
 		else
 			netmask = htonl(IN_CLASSC_NET);
 #ifdef BOOTP_DEBUG
 		if (debug)
 			printf("'native netmask' is %s\n", intoa(netmask));
 #endif
 	}
 
 #ifdef BOOTP_DEBUG
 	if (debug)
 		printf("mask: %s\n", intoa(netmask));
 #endif
 
 	/* We need a gateway if root is on a different net */
 	if (!SAMENET(myip, rootip, netmask)) {
 #ifdef BOOTP_DEBUG
 		if (debug)
 			printf("need gateway for root ip\n");
 #endif
 	}
 
 	/* Toss gateway if on a different net */
 	if (!SAMENET(myip, gateip, netmask)) {
 #ifdef BOOTP_DEBUG
 		if (debug)
 			printf("gateway ip (%s) bad\n", inet_ntoa(gateip));
 #endif
 		gateip.s_addr = 0;
 	}
 
 	/* Bump xid so next request will be unique. */
 	++d->xid;
 }
 
 /* Transmit a bootp request */
 static ssize_t
 bootpsend(d, pkt, len)
 	struct iodesc *d;
 	void *pkt;
 	size_t len;
 {
 	struct bootp *bp;
 
 #ifdef BOOTP_DEBUG
 	if (debug)
 		printf("bootpsend: d=%lx called.\n", (long)d);
 #endif
 
 	bp = pkt;
 	bp->bp_secs = htons((u_short)(getsecs() - bot));
 
 #ifdef BOOTP_DEBUG
 	if (debug)
 		printf("bootpsend: calling sendudp\n");
 #endif
 
 	return (sendudp(d, pkt, len));
 }
 
 static ssize_t
 bootprecv(d, pkt, len, tleft)
 struct iodesc *d;
 void *pkt;
 size_t len;
 time_t tleft;
 {
 	ssize_t n;
 	struct bootp *bp;
 
 #ifdef BOOTP_DEBUGx
 	if (debug)
 		printf("bootp_recvoffer: called\n");
 #endif
 
 	n = readudp(d, pkt, len, tleft);
 	if (n == -1 || n < sizeof(struct bootp) - BOOTP_VENDSIZE)
 		goto bad;
 
 	bp = (struct bootp *)pkt;
 	
 #ifdef BOOTP_DEBUG
 	if (debug)
 		printf("bootprecv: checked.  bp = 0x%lx, n = %d\n",
 		    (long)bp, (int)n);
 #endif
 	if (bp->bp_xid != htonl(d->xid)) {
 #ifdef BOOTP_DEBUG
 		if (debug) {
 			printf("bootprecv: expected xid 0x%lx, got 0x%x\n",
 			    d->xid, ntohl(bp->bp_xid));
 		}
 #endif
 		goto bad;
 	}
 
 #ifdef BOOTP_DEBUG
 	if (debug)
 		printf("bootprecv: got one!\n");
 #endif
 
 	/* Suck out vendor info */
 	if (bcmp(vm_rfc1048, bp->bp_vend, sizeof(vm_rfc1048)) == 0) {
 		if(vend_rfc1048(bp->bp_vend, sizeof(bp->bp_vend)) != 0)
 		    goto bad;
 	}
 #ifdef BOOTP_VEND_CMU
 	else if (bcmp(vm_cmu, bp->bp_vend, sizeof(vm_cmu)) == 0)
 		vend_cmu(bp->bp_vend);
 #endif
 	else
 		printf("bootprecv: unknown vendor 0x%lx\n", (long)bp->bp_vend);
 
 	return(n);
 bad:
 	errno = 0;
 	return (-1);
 }
 
+int
+dhcp_try_rfc1048(u_char *cp, u_int len)
+{
+
+	expected_dhcpmsgtype = DHCPACK;
+	if (bcmp(vm_rfc1048, cp, sizeof(vm_rfc1048)) == 0) {
+		return (vend_rfc1048(cp, len));
+	}
+	return (-1);
+}
+
 static int
 vend_rfc1048(cp, len)
 	u_char *cp;
 	u_int len;
 {
 	u_char *ep;
 	int size;
 	u_char tag;
 	const char *val;
 
 #ifdef BOOTP_DEBUG
 	if (debug)
 		printf("vend_rfc1048 bootp info. len=%d\n", len);
 #endif
 	ep = cp + len;
 
 	/* Step over magic cookie */
 	cp += sizeof(int);
 
 	setenv_(cp, ep, NULL);
 
 	while (cp < ep) {
 		tag = *cp++;
 		size = *cp++;
 		if (tag == TAG_END)
 			break;
 
 		if (tag == TAG_SUBNET_MASK) {
 			bcopy(cp, &netmask, sizeof(netmask));
 		}
 		if (tag == TAG_GATEWAY) {
 			bcopy(cp, &gateip.s_addr, sizeof(gateip.s_addr));
 		}
 		if (tag == TAG_SWAPSERVER) {
 			/* let it override bp_siaddr */
 			bcopy(cp, &rootip.s_addr, sizeof(rootip.s_addr));
 		}
 		if (tag == TAG_ROOTPATH) {
 			if ((val = getenv("dhcp.root-path")) == NULL)
 				val = (const char *)cp;
 			strlcpy(rootpath, val, sizeof(rootpath));
 		}
 		if (tag == TAG_HOSTNAME) {
 			if ((val = getenv("dhcp.host-name")) == NULL)
 				val = (const char *)cp;
 			strlcpy(hostname, val, sizeof(hostname));
 		}
 		if (tag == TAG_INTF_MTU) {
+			intf_mtu = 0;
 			if ((val = getenv("dhcp.interface-mtu")) != NULL) {
-				intf_mtu = (u_int)strtoul(val, NULL, 0);
-			} else {
-				intf_mtu = be16dec(cp);
+				unsigned long tmp;
+				char *end;
+
+				errno = 0;
+				/*
+				 * Do not allow MTU to exceed max IPv4 packet
+				 * size, max value of 16-bit word.
+				 */
+				tmp = strtoul(val, &end, 0);
+				if (errno != 0 ||
+				    *val == '\0' || *end != '\0' ||
+				    tmp > USHRT_MAX) {
+					printf("%s: bad value: \"%s\", "
+					    "ignoring\n",
+					    "dhcp.interface-mtu", val);
+				} else {
+					intf_mtu = (u_int)tmp;
+				}
 			}
+			if (intf_mtu <= 0)
+				intf_mtu = be16dec(cp);
 		}
 #ifdef SUPPORT_DHCP
 		if (tag == TAG_DHCP_MSGTYPE) {
 			if(*cp != expected_dhcpmsgtype)
 			    return(-1);
 			dhcp_ok = 1;
 		}
 		if (tag == TAG_SERVERID) {
 			bcopy(cp, &dhcp_serverip.s_addr,
 			      sizeof(dhcp_serverip.s_addr));
 		}
 		if (tag == TAG_TFTP_SERVER) {
 			bcopy(cp, &tftpip.s_addr,
 			      sizeof(tftpip.s_addr));
 		}
 #endif
 		cp += size;
 	}
 	return(0);
 }
 
 #ifdef BOOTP_VEND_CMU
 static void
 vend_cmu(cp)
 	u_char *cp;
 {
 	struct cmu_vend *vp;
 
 #ifdef BOOTP_DEBUG
 	if (debug)
 		printf("vend_cmu bootp info.\n");
 #endif
 	vp = (struct cmu_vend *)cp;
 
 	if (vp->v_smask.s_addr != 0) {
 		netmask = vp->v_smask.s_addr;
 	}
 	if (vp->v_dgate.s_addr != 0) {
 		gateip = vp->v_dgate;
 	}
 }
 #endif
 
 #ifdef DHCP_ENV
 /*
  * Parse DHCP options and store them into kenv variables.
  * Original code from Danny Braniss, modifications by Luigi Rizzo.
  *
  * The parser is driven by tables which specify the type and name of
  * each dhcp option and how it appears in kenv.
  * The first entry in the list contains the prefix used to set the kenv
  * name (including the . if needed), the last entry must have a 0 tag.
  * Entries do not need to be sorted though it helps for readability.
  *
  * Certain vendor-specific tables can be enabled according to DHCP_ENV.
  * Set it to 0 if you don't want any.
  */
 enum opt_fmt { __NONE = 0,
 	__8 = 1, __16 = 2, __32 = 4,	/* Unsigned fields, value=size	*/
 	__IP,				/* IPv4 address			*/
 	__TXT,				/* C string			*/
 	__BYTES,			/* byte sequence, printed %02x	*/
 	__INDIR,			/* name=value			*/
 	__ILIST,			/* name=value;name=value ... */
 	__VE,				/* vendor specific, recurse	*/
 };
 
 struct dhcp_opt {
 	uint8_t	tag;
 	uint8_t	fmt;
 	const char	*desc;
 };
 
 static struct dhcp_opt vndr_opt[] = { /* Vendor Specific Options */
 #if DHCP_ENV == DHCP_ENV_FREEBSD /* FreeBSD table in the original code */
 	{0,	0,	"FreeBSD"},		/* prefix */
 	{1,	__TXT,	"kernel"},
 	{2,	__TXT,	"kernelname"},
 	{3,	__TXT,	"kernel_options"},
 	{4,	__IP,	"usr-ip"},
 	{5,	__TXT,	"conf-path"},
 	{6,	__TXT,	"rc.conf0"},
 	{7,	__TXT,	"rc.conf1"},
 	{8,	__TXT,	"rc.conf2"},
 	{9,	__TXT,	"rc.conf3"},
 	{10,	__TXT,	"rc.conf4"},
 	{11,	__TXT,	"rc.conf5"},
 	{12,	__TXT,	"rc.conf6"},
 	{13,	__TXT,	"rc.conf7"},
 	{14,	__TXT,	"rc.conf8"},
 	{15,	__TXT,	"rc.conf9"},
 
 	{20,	__TXT,  "boot.nfsroot.options"},
 
 	{245,	__INDIR, ""},
 	{246,	__INDIR, ""},
 	{247,	__INDIR, ""},
 	{248,	__INDIR, ""},
 	{249,	__INDIR, ""},
 	{250,	__INDIR, ""},
 	{251,	__INDIR, ""},
 	{252,	__INDIR, ""},
 	{253,	__INDIR, ""},
 	{254,	__INDIR, ""},
 
 #elif DHCP_ENV == DHCP_ENV_PXE		/* some pxe options, RFC4578 */
 	{0,	0,	"pxe"},		/* prefix */
 	{93,	__16,	"system-architecture"},
 	{94,	__BYTES,	"network-interface"},
 	{97,	__BYTES,	"machine-identifier"},
 #else					/* default (empty) table */
 	{0,	0,	"dhcp.vendor."},		/* prefix */
 #endif
 	{0,	__TXT,	"%soption-%d"}
 };
 
 static struct dhcp_opt dhcp_opt[] = {
 	/* DHCP Option names, formats and codes, from RFC2132. */
 	{0,	0,	"dhcp."},	// prefix
 	{1,	__IP,	"subnet-mask"},
 	{2,	__32,	"time-offset"}, /* this is signed */
 	{3,	__IP,	"routers"},
 	{4,	__IP,	"time-servers"},
 	{5,	__IP,	"ien116-name-servers"},
 	{6,	__IP,	"domain-name-servers"},
 	{7,	__IP,	"log-servers"},
 	{8,	__IP,	"cookie-servers"},
 	{9,	__IP,	"lpr-servers"},
 	{10,	__IP,	"impress-servers"},
 	{11,	__IP,	"resource-location-servers"},
 	{12,	__TXT,	"host-name"},
 	{13,	__16,	"boot-size"},
 	{14,	__TXT,	"merit-dump"},
 	{15,	__TXT,	"domain-name"},
 	{16,	__IP,	"swap-server"},
 	{17,	__TXT,	"root-path"},
 	{18,	__TXT,	"extensions-path"},
 	{19,	__8,	"ip-forwarding"},
 	{20,	__8,	"non-local-source-routing"},
 	{21,	__IP,	"policy-filter"},
 	{22,	__16,	"max-dgram-reassembly"},
 	{23,	__8,	"default-ip-ttl"},
 	{24,	__32,	"path-mtu-aging-timeout"},
 	{25,	__16,	"path-mtu-plateau-table"},
 	{26,	__16,	"interface-mtu"},
 	{27,	__8,	"all-subnets-local"},
 	{28,	__IP,	"broadcast-address"},
 	{29,	__8,	"perform-mask-discovery"},
 	{30,	__8,	"mask-supplier"},
 	{31,	__8,	"perform-router-discovery"},
 	{32,	__IP,	"router-solicitation-address"},
 	{33,	__IP,	"static-routes"},
 	{34,	__8,	"trailer-encapsulation"},
 	{35,	__32,	"arp-cache-timeout"},
 	{36,	__8,	"ieee802-3-encapsulation"},
 	{37,	__8,	"default-tcp-ttl"},
 	{38,	__32,	"tcp-keepalive-interval"},
 	{39,	__8,	"tcp-keepalive-garbage"},
 	{40,	__TXT,	"nis-domain"},
 	{41,	__IP,	"nis-servers"},
 	{42,	__IP,	"ntp-servers"},
 	{43,	__VE,	"vendor-encapsulated-options"},
 	{44,	__IP,	"netbios-name-servers"},
 	{45,	__IP,	"netbios-dd-server"},
 	{46,	__8,	"netbios-node-type"},
 	{47,	__TXT,	"netbios-scope"},
 	{48,	__IP,	"x-font-servers"},
 	{49,	__IP,	"x-display-managers"},
 	{50,	__IP,	"dhcp-requested-address"},
 	{51,	__32,	"dhcp-lease-time"},
 	{52,	__8,	"dhcp-option-overload"},
 	{53,	__8,	"dhcp-message-type"},
 	{54,	__IP,	"dhcp-server-identifier"},
 	{55,	__8,	"dhcp-parameter-request-list"},
 	{56,	__TXT,	"dhcp-message"},
 	{57,	__16,	"dhcp-max-message-size"},
 	{58,	__32,	"dhcp-renewal-time"},
 	{59,	__32,	"dhcp-rebinding-time"},
 	{60,	__TXT,	"vendor-class-identifier"},
 	{61,	__TXT,	"dhcp-client-identifier"},
 	{64,	__TXT,	"nisplus-domain"},
 	{65,	__IP,	"nisplus-servers"},
 	{66,	__TXT,	"tftp-server-name"},
 	{67,	__TXT,	"bootfile-name"},
 	{68,	__IP,	"mobile-ip-home-agent"},
 	{69,	__IP,	"smtp-server"},
 	{70,	__IP,	"pop-server"},
 	{71,	__IP,	"nntp-server"},
 	{72,	__IP,	"www-server"},
 	{73,	__IP,	"finger-server"},
 	{74,	__IP,	"irc-server"},
 	{75,	__IP,	"streettalk-server"},
 	{76,	__IP,	"streettalk-directory-assistance-server"},
 	{77,	__TXT,	"user-class"},
 	{85,	__IP,	"nds-servers"},
 	{86,	__TXT,	"nds-tree-name"},
 	{87,	__TXT,	"nds-context"},
 	{210,	__TXT,	"authenticate"},
 
 	/* use the following entries for arbitrary variables */
 	{246,	__ILIST, ""},
 	{247,	__ILIST, ""},
 	{248,	__ILIST, ""},
 	{249,	__ILIST, ""},
 	{250,	__INDIR, ""},
 	{251,	__INDIR, ""},
 	{252,	__INDIR, ""},
 	{253,	__INDIR, ""},
 	{254,	__INDIR, ""},
 	{0,	__TXT,	"%soption-%d"}
 };
 
 /*
  * parse a dhcp response, set environment variables translating options
  * names and values according to the tables above. Also set dhcp.tags
  * to the list of selected tags.
  */
 static void
 setenv_(u_char *cp,  u_char *ep, struct dhcp_opt *opts)
 {
     u_char	*ncp;
     u_char	tag;
     char	tags[512], *tp;	/* the list of tags */
 
 #define FLD_SEP	','	/* separator in list of elements */
     ncp = cp;
     tp = tags;
     if (opts == NULL)
 	opts = dhcp_opt;
 
     while (ncp < ep) {
 	unsigned int	size;		/* option size */
 	char *vp, *endv, buf[256];	/* the value buffer */
 	struct dhcp_opt *op;
 
 	tag = *ncp++;			/* extract tag and size */
 	size = *ncp++;
 	cp = ncp;			/* current payload */
 	ncp += size;			/* point to the next option */
 
 	if (tag == TAG_END)
 	    break;
 	if (tag == 0)
 	    continue;
 
 	for (op = opts+1; op->tag && op->tag != tag; op++)
 		;
 	/* if not found we end up on the default entry */
 
 	/*
 	 * Copy data into the buffer. libstand does not have snprintf so we
 	 * need to be careful with sprintf(). With strings, the source is
 	 * always <256 char so shorter than the buffer so we are safe; with
 	 * other arguments, the longest string is inet_ntoa which is 16 bytes
 	 * so we make sure to have always enough room in the string before
 	 * trying an sprint.
 	 */
 	vp = buf;
 	*vp = '\0';
 	endv = buf + sizeof(buf) - 1 - 16;	/* last valid write position */
 
 	switch(op->fmt) {
 	case __NONE:
 	    break;	/* should not happen */
 
 	case __VE: /* recurse, vendor specific */
 	    setenv_(cp, cp+size, vndr_opt);
 	    break;
 
 	case __IP:	/* ip address */
 	    for (; size > 0 && vp < endv; size -= 4, cp += 4) {
 		struct	in_addr in_ip;		/* ip addresses */
 		if (vp != buf)
 		    *vp++ = FLD_SEP;
 		bcopy(cp, &in_ip.s_addr, sizeof(in_ip.s_addr));
 		sprintf(vp, "%s", inet_ntoa(in_ip));
 		vp += strlen(vp);
 	    }
 	    break;
 
 	case __BYTES:	/* opaque byte string */
 	    for (; size > 0 && vp < endv; size -= 1, cp += 1) {
 		sprintf(vp, "%02x", *cp);
 		vp += strlen(vp);
 	    }
 	    break;
 
 	case __TXT:
 	    bcopy(cp, buf, size);	/* cannot overflow */
 	    buf[size] = 0;
 	    break;
 
 	case __32:
 	case __16:
 	case __8:	/* op->fmt is also the length of each field */
 	    for (; size > 0 && vp < endv; size -= op->fmt, cp += op->fmt) {
 		uint32_t v;
 		if (op->fmt == __32)
 			v = (cp[0]<<24) + (cp[1]<<16) + (cp[2]<<8) + cp[3];
 		else if (op->fmt == __16)
 			v = (cp[0]<<8) + cp[1];
 		else
 			v = cp[0];
 		if (vp != buf)
 		    *vp++ = FLD_SEP;
 		sprintf(vp, "%u", v);
 		vp += strlen(vp);
 	    }
 	    break;
 
 	case __INDIR:	/* name=value */
 	case __ILIST:	/* name=value;name=value... */
 	    bcopy(cp, buf, size);	/* cannot overflow */
 	    buf[size] = '\0';
 	    for (endv = buf; endv; endv = vp) {
 		u_char *s = NULL;	/* semicolon ? */
 
 		/* skip leading whitespace */
 		while (*endv && strchr(" \t\n\r", *endv))
 		    endv++;
 		vp = strchr(endv, '=');	/* find name=value separator */
 		if (!vp)
 		    break;
 		*vp++ = 0;
 		if (op->fmt == __ILIST && (s = strchr(vp, ';')))
 		    *s++ = '\0';
 		setenv(endv, vp, 1);
 		vp = s;	/* prepare for next round */
 	    }
 	    buf[0] = '\0';	/* option already done */
 	}
 
 	if (tp - tags < sizeof(tags) - 5) {	/* add tag to the list */
 	    if (tp != tags)
 		*tp++ = FLD_SEP;
 	    sprintf(tp, "%d", tag);
 	    tp += strlen(tp);
 	}
 	if (buf[0]) {
 	    char	env[128];	/* the string name */
 
 	    if (op->tag == 0)
 		sprintf(env, op->desc, opts[0].desc, tag);
 	    else
 		sprintf(env, "%s%s", opts[0].desc, op->desc);
 	    /*
 	     * Do not replace existing values in the environment, so that
 	     * locally-obtained values can override server-provided values.
 	     */
 	    setenv(env, buf, 0);
 	}
     }
     if (tp != tags) {
 	char	env[128];	/* the string name */
 	sprintf(env, "%stags", opts[0].desc);
 	setenv(env, tags, 1);
     }
 }
 #endif /* additional dhcp */
Index: stable/11/lib/libstand/bootp.h
===================================================================
--- stable/11/lib/libstand/bootp.h	(revision 329098)
+++ stable/11/lib/libstand/bootp.h	(revision 329099)
@@ -1,147 +1,153 @@
 /*	$NetBSD: bootp.h,v 1.4 1997/09/06 13:55:57 drochner Exp $	*/
 
 /*
  * Bootstrap Protocol (BOOTP).  RFC951 and RFC1048.
  *
  * This file specifies the "implementation-independent" BOOTP protocol
  * information which is common to both client and server.
  *
  * Copyright 1988 by Carnegie Mellon.
  *
  * Permission to use, copy, modify, and distribute this program for any
  * purpose and without fee is hereby granted, provided that this copyright
  * and permission notice appear on all copies and supporting documentation,
  * the name of Carnegie Mellon not be used in advertising or publicity
  * pertaining to distribution of the program without specific prior
  * permission, and notice be given in supporting documentation that copying
  * and distribution is by permission of Carnegie Mellon and Stanford
  * University.  Carnegie Mellon makes no representations about the
  * suitability of this software for any purpose.  It is provided "as is"
  * without express or implied warranty.
  *
  * $FreeBSD$
  */
 
+#ifndef _BOOTP_H_
+#define _BOOTP_H_
 
 struct bootp {
 	unsigned char	bp_op;		/* packet opcode type */
 	unsigned char	bp_htype;	/* hardware addr type */
 	unsigned char	bp_hlen;	/* hardware addr length */
 	unsigned char	bp_hops;	/* gateway hops */
 	unsigned int	bp_xid;		/* transaction ID */
 	unsigned short	bp_secs;	/* seconds since boot began */
 	unsigned short	bp_flags;
 	struct in_addr	bp_ciaddr;	/* client IP address */
 	struct in_addr	bp_yiaddr;	/* 'your' IP address */
 	struct in_addr	bp_siaddr;	/* server IP address */
 	struct in_addr	bp_giaddr;	/* gateway IP address */
 	unsigned char	bp_chaddr[16];	/* client hardware address */
 	unsigned char	bp_sname[64];	/* server host name */
 	unsigned char	bp_file[128];	/* boot file name */
 #ifdef SUPPORT_DHCP
 #define BOOTP_VENDSIZE 312
 #else
 #define BOOTP_VENDSIZE 64
 #endif
 	unsigned char	bp_vend[BOOTP_VENDSIZE];	/* vendor-specific area */
 };
 
 /*
  * UDP port numbers, server and client.
  */
 #define	IPPORT_BOOTPS		67
 #define	IPPORT_BOOTPC		68
 
 #define BOOTREPLY		2
 #define BOOTREQUEST		1
 
 
 /*
  * Vendor magic cookie (v_magic) for CMU
  */
 #define VM_CMU		"CMU"
 
 /*
  * Vendor magic cookie (v_magic) for RFC1048
  */
 #define VM_RFC1048	{ 99, 130, 83, 99 }
 
 
 
 /*
  * RFC1048 tag values used to specify what information is being supplied in
  * the vendor field of the packet.
  */
 
 #define TAG_PAD			((unsigned char)   0)
 #define TAG_SUBNET_MASK		((unsigned char)   1)
 #define TAG_TIME_OFFSET		((unsigned char)   2)
 #define TAG_GATEWAY		((unsigned char)   3)
 #define TAG_TIME_SERVER		((unsigned char)   4)
 #define TAG_NAME_SERVER		((unsigned char)   5)
 #define TAG_DOMAIN_SERVER	((unsigned char)   6)
 #define TAG_LOG_SERVER		((unsigned char)   7)
 #define TAG_COOKIE_SERVER	((unsigned char)   8)
 #define TAG_LPR_SERVER		((unsigned char)   9)
 #define TAG_IMPRESS_SERVER	((unsigned char)  10)
 #define TAG_RLP_SERVER		((unsigned char)  11)
 #define TAG_HOSTNAME		((unsigned char)  12)
 #define TAG_BOOTSIZE		((unsigned char)  13)
 #define TAG_DUMPFILE		((unsigned char)  14)
 #define TAG_DOMAINNAME		((unsigned char)  15)
 #define TAG_SWAPSERVER		((unsigned char)  16)
 #define TAG_ROOTPATH		((unsigned char)  17)
 #define TAG_INTF_MTU		((unsigned char)  26)
 
 #ifdef SUPPORT_DHCP
 #define TAG_REQ_ADDR		((unsigned char)  50)
 #define TAG_LEASETIME		((unsigned char)  51)
 #define TAG_OVERLOAD		((unsigned char)  52)
 #define TAG_DHCP_MSGTYPE	((unsigned char)  53)
 #define TAG_SERVERID		((unsigned char)  54)
 #define TAG_PARAM_REQ		((unsigned char)  55)
 #define TAG_MSG			((unsigned char)  56)
 #define TAG_MAXSIZE		((unsigned char)  57)
 #define TAG_T1			((unsigned char)  58)
 #define TAG_T2			((unsigned char)  59)
 #define TAG_CLASSID		((unsigned char)  60)
 #define TAG_CLIENTID		((unsigned char)  61)
 #define TAG_TFTP_SERVER		((unsigned char) 150)
 #endif
 
 #define TAG_END			((unsigned char) 255)
 
 #ifdef SUPPORT_DHCP
 #define DHCPDISCOVER 1
 #define DHCPOFFER 2
 #define DHCPREQUEST 3
 #define DHCPDECLINE 4
 #define DHCPACK 5
 #define DHCPNAK 6
 #define DHCPRELEASE 7
 #endif
 
 /*
  * bootp flags
  */
 #define	BOOTP_NONE		0x0000		/* No flags */
 #define	BOOTP_PXE		0x0001		/* Booting from PXE. */
 
 /*
  * "vendor" data permitted for CMU bootp clients.
  */
 
 struct cmu_vend {
 	unsigned char	v_magic[4];	/* magic number */
 	unsigned int	v_flags;	/* flags/opcodes, etc. */
 	struct in_addr	v_smask;	/* Subnet mask */
 	struct in_addr	v_dgate;	/* Default gateway */
 	struct in_addr	v_dns1, v_dns2; /* Domain name servers */
 	struct in_addr	v_ins1, v_ins2; /* IEN-116 name servers */
 	struct in_addr	v_ts1, v_ts2;	/* Time servers */
 	unsigned char	v_unused[25];	/* currently unused */
 };
 
 
 /* v_flags values */
 #define VF_SMASK	1	/* Subnet mask field contains valid data */
+
+int	dhcp_try_rfc1048(u_char *cp, u_int len);
+
+#endif /* _BOOTP_H_ */
Index: stable/11/lib/libstand/nfs.c
===================================================================
--- stable/11/lib/libstand/nfs.c	(revision 329098)
+++ stable/11/lib/libstand/nfs.c	(revision 329099)
@@ -1,1495 +1,845 @@
 /*	$NetBSD: nfs.c,v 1.2 1998/01/24 12:43:09 drochner Exp $	*/
 
 /*-
  *  Copyright (c) 1993 John Brezak
  *  All rights reserved.
  *
  *  Redistribution and use in source and binary forms, with or without
  *  modification, are permitted provided that the following conditions
  *  are met:
  *  1. Redistributions of source code must retain the above copyright
  *     notice, this list of conditions and the following disclaimer.
  *  2. Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimer in the
  *     documentation and/or other materials provided with the distribution.
  *  3. The name of the author may not be used to endorse or promote products
  *     derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <string.h>
 #include <stddef.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 
 #include "rpcv2.h"
 #include "nfsv2.h"
 
 #include "stand.h"
 #include "net.h"
 #include "netif.h"
 #include "rpc.h"
 
 #define NFS_DEBUGxx
 
 #define NFSREAD_MIN_SIZE 1024
 #define NFSREAD_MAX_SIZE 4096
 
-/* Define our own NFS attributes without NQNFS stuff. */
-#ifdef OLD_NFSV2
-struct nfsv2_fattrs {
-	n_long	fa_type;
-	n_long	fa_mode;
-	n_long	fa_nlink;
-	n_long	fa_uid;
-	n_long	fa_gid;
-	n_long	fa_size;
-	n_long	fa_blocksize;
-	n_long	fa_rdev;
-	n_long	fa_blocks;
-	n_long	fa_fsid;
-	n_long	fa_fileid;
-	struct nfsv2_time fa_atime;
-	struct nfsv2_time fa_mtime;
-	struct nfsv2_time fa_ctime;
-};
-
-struct nfs_read_args {
-	u_char	fh[NFS_FHSIZE];
-	n_long	off;
-	n_long	len;
-	n_long	xxx;			/* XXX what's this for? */
-};
-
-/* Data part of nfs rpc reply (also the largest thing we receive) */
-struct nfs_read_repl {
-	n_long	errno;
-	struct	nfsv2_fattrs fa;
-	n_long	count;
-	u_char	data[NFSREAD_MAX_SIZE];
-};
-
-#ifndef NFS_NOSYMLINK
-struct nfs_readlnk_repl {
-	n_long	errno;
-	n_long	len;
-	char	path[NFS_MAXPATHLEN];
-};
-#endif
-
-struct nfs_readdir_args {
-	u_char	fh[NFS_FHSIZE];
-	n_long	cookie;
-	n_long	count;
-};
-
-struct nfs_readdir_data {
-	n_long	fileid;
-	n_long	len;
-	char	name[0];
-};
-
-struct nfs_readdir_off {
-	n_long	cookie;
-	n_long	follows;
-};
-
-struct nfs_iodesc {
-	struct	iodesc	*iodesc;
-	off_t	off;
-	u_char	fh[NFS_FHSIZE];
-	struct nfsv2_fattrs fa;	/* all in network order */
-};
-#else	/* !OLD_NFSV2 */
-
 /* NFSv3 definitions */
 #define	NFS_V3MAXFHSIZE		64
 #define	NFS_VER3		3
 #define	RPCMNT_VER3		3
 #define	NFSPROCV3_LOOKUP	3
 #define	NFSPROCV3_READLINK	5
 #define	NFSPROCV3_READ		6
 #define	NFSPROCV3_READDIR	16
 
 typedef struct {
 	uint32_t val[2];
 } n_quad;
 
 struct nfsv3_time {
 	uint32_t nfs_sec;
 	uint32_t nfs_nsec;
 };
 
 struct nfsv3_fattrs {
 	uint32_t fa_type;
 	uint32_t fa_mode;
 	uint32_t fa_nlink;
 	uint32_t fa_uid;
 	uint32_t fa_gid;
 	n_quad fa_size;
 	n_quad fa_used;
 	n_quad fa_rdev;
 	n_quad fa_fsid;
 	n_quad fa_fileid;
 	struct nfsv3_time fa_atime;
 	struct nfsv3_time fa_mtime;
 	struct nfsv3_time fa_ctime;
 };
 
 /*
  * For NFSv3, the file handle is variable in size, so most fixed sized
  * structures for arguments won't work. For most cases, a structure
  * that starts with any fixed size section is followed by an array
  * that covers the maximum size required.
  */
 struct nfsv3_readdir_repl {
 	uint32_t errno;
 	uint32_t ok;
 	struct nfsv3_fattrs fa;
 	uint32_t cookiev0;
 	uint32_t cookiev1;
 };
 
 struct nfsv3_readdir_entry {
 	uint32_t follows;
 	uint32_t fid0;
 	uint32_t fid1;
 	uint32_t len;
 	uint32_t nameplus[0];
 };
 
 struct nfs_iodesc {
 	struct iodesc *iodesc;
 	off_t off;
 	uint32_t fhsize;
 	u_char fh[NFS_V3MAXFHSIZE];
 	struct nfsv3_fattrs fa;	/* all in network order */
 	uint64_t cookie;
 };
-#endif	/* OLD_NFSV2 */
 
 /*
  * XXX interactions with tftp? See nfswrapper.c for a confusing
  *     issue.
  */
 int		nfs_open(const char *path, struct open_file *f);
 static int	nfs_close(struct open_file *f);
 static int	nfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
 static int	nfs_write(struct open_file *f, void *buf, size_t size, size_t *resid);
 static off_t	nfs_seek(struct open_file *f, off_t offset, int where);
 static int	nfs_stat(struct open_file *f, struct stat *sb);
 static int	nfs_readdir(struct open_file *f, struct dirent *d);
 
 struct	nfs_iodesc nfs_root_node;
 
 struct fs_ops nfs_fsops = {
 	"nfs",
 	nfs_open,
 	nfs_close,
 	nfs_read,
 	nfs_write,
 	nfs_seek,
 	nfs_stat,
 	nfs_readdir
 };
 
 static int nfs_read_size = NFSREAD_MIN_SIZE;
 
-#ifdef	OLD_NFSV2
 /*
- * Fetch the root file handle (call mount daemon)
- * Return zero or error number.
+ * Improve boot performance over NFS
  */
-int
-nfs_getrootfh(struct iodesc *d, char *path, u_char *fhp)
+static void
+set_nfs_read_size(void)
 {
-	int len;
-	struct args {
-		n_long	len;
-		char	path[FNAME_SIZE];
-	} *args;
-	struct repl {
-		n_long	errno;
-		u_char	fh[NFS_FHSIZE];
-	} *repl;
-	struct {
-		n_long	h[RPC_HEADER_WORDS];
-		struct args d;
-	} sdata;
-	struct {
-		n_long	h[RPC_HEADER_WORDS];
-		struct repl d;
-	} rdata;
-	size_t cc;
+	char *env, *end;
+	char buf[10];
 
-#ifdef NFS_DEBUG
-	if (debug)
-		printf("nfs_getrootfh: %s\n", path);
-#endif
-
-	args = &sdata.d;
-	repl = &rdata.d;
-
-	bzero(args, sizeof(*args));
-	len = strlen(path);
-	if (len > sizeof(args->path))
-		len = sizeof(args->path);
-	args->len = htonl(len);
-	bcopy(path, args->path, len);
-	len = 4 + roundup(len, 4);
-
-	cc = rpc_call(d, RPCPROG_MNT, RPCMNT_VER1, RPCMNT_MOUNT,
-	    args, len, repl, sizeof(*repl));
-	if (cc == -1) {
-		/* errno was set by rpc_call */
-		return (errno);
+	if ((env = getenv("nfs.read_size")) != NULL) {
+		errno = 0;
+		nfs_read_size = (int)strtol(env, &end, 0);
+		if (errno != 0 || *env == '\0' || *end != '\0') {
+			printf("%s: bad value: \"%s\", defaulting to %d\n",
+			    "nfs.read_size", env, NFSREAD_MIN_SIZE);
+			nfs_read_size = NFSREAD_MIN_SIZE;
+		}
 	}
-	if (cc < 4)
-		return (EBADRPC);
-	if (repl->errno)
-		return (ntohl(repl->errno));
-	bcopy(repl->fh, fhp, sizeof(repl->fh));
-
-	/*
-	 * Improve boot performance over NFS
-	 */
-	if (getenv("nfs.read_size") != NULL)
-		nfs_read_size = strtol(getenv("nfs.read_size"), NULL, 0);
-	if (nfs_read_size < NFSREAD_MIN_SIZE)
+	if (nfs_read_size < NFSREAD_MIN_SIZE) {
+		printf("%s: bad value: \"%d\", defaulting to %d\n",
+		    "nfs.read_size", nfs_read_size, NFSREAD_MIN_SIZE);
 		nfs_read_size = NFSREAD_MIN_SIZE;
-	if (nfs_read_size > NFSREAD_MAX_SIZE)
+	}
+	if (nfs_read_size > NFSREAD_MAX_SIZE) {
+		printf("%s: bad value: \"%d\", defaulting to %d\n",
+		    "nfs.read_size", nfs_read_size, NFSREAD_MIN_SIZE);
 		nfs_read_size = NFSREAD_MAX_SIZE;
-
-	return (0);
-}
-
-/*
- * Lookup a file.  Store handle and attributes.
- * Return zero or error number.
- */
-int
-nfs_lookupfh(struct nfs_iodesc *d, const char *name, struct nfs_iodesc *newfd)
-{
-	int len, rlen;
-	struct args {
-		u_char	fh[NFS_FHSIZE];
-		n_long	len;
-		char	name[FNAME_SIZE];
-	} *args;
-	struct repl {
-		n_long	errno;
-		u_char	fh[NFS_FHSIZE];
-		struct	nfsv2_fattrs fa;
-	} *repl;
-	struct {
-		n_long	h[RPC_HEADER_WORDS];
-		struct args d;
-	} sdata;
-	struct {
-		n_long	h[RPC_HEADER_WORDS];
-		struct repl d;
-	} rdata;
-	ssize_t cc;
-
-#ifdef NFS_DEBUG
-	if (debug)
-		printf("lookupfh: called\n");
-#endif
-
-	args = &sdata.d;
-	repl = &rdata.d;
-
-	bzero(args, sizeof(*args));
-	bcopy(d->fh, args->fh, sizeof(args->fh));
-	len = strlen(name);
-	if (len > sizeof(args->name))
-		len = sizeof(args->name);
-	bcopy(name, args->name, len);
-	args->len = htonl(len);
-	len = 4 + roundup(len, 4);
-	len += NFS_FHSIZE;
-
-	rlen = sizeof(*repl);
-
-	cc = rpc_call(d->iodesc, NFS_PROG, NFS_VER2, NFSPROC_LOOKUP,
-	    args, len, repl, rlen);
-	if (cc == -1)
-		return (errno);		/* XXX - from rpc_call */
-	if (cc < 4)
-		return (EIO);
-	if (repl->errno) {
-		/* saerrno.h now matches NFS error numbers. */
-		return (ntohl(repl->errno));
 	}
-	bcopy( repl->fh, &newfd->fh, sizeof(newfd->fh));
-	bcopy(&repl->fa, &newfd->fa, sizeof(newfd->fa));
-	return (0);
+	snprintf(buf, sizeof (buf), "%d", nfs_read_size);
+	setenv("nfs.read_size", buf, 1);
 }
 
-#ifndef NFS_NOSYMLINK
 /*
- * Get the destination of a symbolic link.
- */
-int
-nfs_readlink(struct nfs_iodesc *d, char *buf)
-{
-	struct {
-		n_long	h[RPC_HEADER_WORDS];
-		u_char fh[NFS_FHSIZE];
-	} sdata;
-	struct {
-		n_long	h[RPC_HEADER_WORDS];
-		struct nfs_readlnk_repl d;
-	} rdata;
-	ssize_t cc;
-
-#ifdef NFS_DEBUG
-	if (debug)
-		printf("readlink: called\n");
-#endif
-
-	bcopy(d->fh, sdata.fh, NFS_FHSIZE);
-	cc = rpc_call(d->iodesc, NFS_PROG, NFS_VER2, NFSPROC_READLINK,
-		      sdata.fh, NFS_FHSIZE,
-		      &rdata.d, sizeof(rdata.d));
-	if (cc == -1)
-		return (errno);
-
-	if (cc < 4)
-		return (EIO);
-
-	if (rdata.d.errno)
-		return (ntohl(rdata.d.errno));
-
-	rdata.d.len = ntohl(rdata.d.len);
-	if (rdata.d.len > NFS_MAXPATHLEN)
-		return (ENAMETOOLONG);
-
-	bcopy(rdata.d.path, buf, rdata.d.len);
-	buf[rdata.d.len] = 0;
-	return (0);
-}
-#endif
-
-/*
- * Read data from a file.
- * Return transfer count or -1 (and set errno)
- */
-ssize_t
-nfs_readdata(struct nfs_iodesc *d, off_t off, void *addr, size_t len)
-{
-	struct nfs_read_args *args;
-	struct nfs_read_repl *repl;
-	struct {
-		n_long	h[RPC_HEADER_WORDS];
-		struct nfs_read_args d;
-	} sdata;
-	struct {
-		n_long	h[RPC_HEADER_WORDS];
-		struct nfs_read_repl d;
-	} rdata;
-	size_t cc;
-	long x;
-	int hlen, rlen;
-
-	args = &sdata.d;
-	repl = &rdata.d;
-
-	bcopy(d->fh, args->fh, NFS_FHSIZE);
-	args->off = htonl((n_long)off);
-	if (len > nfs_read_size)
-		len = nfs_read_size;
-	args->len = htonl((n_long)len);
-	args->xxx = htonl((n_long)0);
-	hlen = offsetof(struct nfs_read_rpl, data[0]);
-
-	cc = rpc_call(d->iodesc, NFS_PROG, NFS_VER2, NFSPROC_READ,
-	    args, sizeof(*args),
-	    repl, sizeof(*repl));
-	if (cc == -1) {
-		/* errno was already set by rpc_call */
-		return (-1);
-	}
-	if (cc < hlen) {
-		errno = EBADRPC;
-		return (-1);
-	}
-	if (repl->errno) {
-		errno = ntohl(repl->errno);
-		return (-1);
-	}
-	rlen = cc - hlen;
-	x = ntohl(repl->count);
-	if (rlen < x) {
-		printf("nfsread: short packet, %d < %ld\n", rlen, x);
-		errno = EBADRPC;
-		return(-1);
-	}
-	bcopy(repl->data, addr, x);
-	return (x);
-}
-
-/*
- * Open a file.
- * return zero or error number
- */
-int
-nfs_open(const char *upath, struct open_file *f)
-{
-	struct iodesc *desc;
-	struct nfs_iodesc *currfd;
-	char buf[2 * NFS_FHSIZE + 3];
-	u_char *fh;
-	char *cp;
-	int i;
-#ifndef NFS_NOSYMLINK
-	struct nfs_iodesc *newfd;
-	struct nfsv2_fattrs *fa;
-	char *ncp;
-	int c;
-	char namebuf[NFS_MAXPATHLEN + 1];
-	char linkbuf[NFS_MAXPATHLEN + 1];
-	int nlinks = 0;
-#endif
-	int error;
-	char *path;
-
-	if (netproto != NET_NFS)
-		return (EINVAL);
-
-#ifdef NFS_DEBUG
- 	if (debug)
- 	    printf("nfs_open: %s (rootpath=%s)\n", upath, rootpath);
-#endif
-	if (!rootpath[0]) {
-		printf("no rootpath, no nfs\n");
-		return (ENXIO);
-	}
-
-	/*
-	 * This is silly - we should look at dv_type but that value is
-	 * arch dependant and we can't use it here.
-	 */
-#ifndef __i386__
-	if (strcmp(f->f_dev->dv_name, "net") != 0)
-		return(EINVAL);
-#else
-	if (strcmp(f->f_dev->dv_name, "pxe") != 0)
-		return(EINVAL);
-#endif
-
-	if (!(desc = socktodesc(*(int *)(f->f_devdata))))
-		return(EINVAL);
-
-	/* Bind to a reserved port. */
-	desc->myport = htons(--rpc_port);
-	desc->destip = rootip;
-	if ((error = nfs_getrootfh(desc, rootpath, nfs_root_node.fh)))
-		return (error);
-	nfs_root_node.fa.fa_type  = htonl(NFDIR);
-	nfs_root_node.fa.fa_mode  = htonl(0755);
-	nfs_root_node.fa.fa_nlink = htonl(2);
-	nfs_root_node.iodesc = desc;
-
-	fh = &nfs_root_node.fh[0];
-	buf[0] = 'X';
-	cp = &buf[1];
-	for (i = 0; i < NFS_FHSIZE; i++, cp += 2)
-		sprintf(cp, "%02x", fh[i]);
-	sprintf(cp, "X");
-	setenv("boot.nfsroot.server", inet_ntoa(rootip), 1);
-	setenv("boot.nfsroot.path", rootpath, 1);
-	setenv("boot.nfsroot.nfshandle", buf, 1);
-
-	/* Allocate file system specific data structure */
-	currfd = malloc(sizeof(*newfd));
-	if (currfd == NULL) {
-		error = ENOMEM;
-		goto out;
-	}
-
-#ifndef NFS_NOSYMLINK
-	bcopy(&nfs_root_node, currfd, sizeof(*currfd));
-	newfd = NULL;
-
-	cp = path = strdup(upath);
-	if (path == NULL) {
-	    error = ENOMEM;
-	    goto out;
-	}
-	while (*cp) {
-		/*
-		 * Remove extra separators
-		 */
-		while (*cp == '/')
-			cp++;
-
-		if (*cp == '\0')
-			break;
-		/*
-		 * Check that current node is a directory.
-		 */
-		if (currfd->fa.fa_type != htonl(NFDIR)) {
-			error = ENOTDIR;
-			goto out;
-		}
-
-		/* allocate file system specific data structure */
-		newfd = malloc(sizeof(*newfd));
-		newfd->iodesc = currfd->iodesc;
-
-		/*
-		 * Get next component of path name.
-		 */
-		{
-			int len = 0;
-
-			ncp = cp;
-			while ((c = *cp) != '\0' && c != '/') {
-				if (++len > NFS_MAXNAMLEN) {
-					error = ENOENT;
-					goto out;
-				}
-				cp++;
-			}
-			*cp = '\0';
-		}
-
-		/* lookup a file handle */
-		error = nfs_lookupfh(currfd, ncp, newfd);
-		*cp = c;
-		if (error)
-			goto out;
-
-		/*
-		 * Check for symbolic link
-		 */
-		if (newfd->fa.fa_type == htonl(NFLNK)) {
-			int link_len, len;
-
-			error = nfs_readlink(newfd, linkbuf);
-			if (error)
-				goto out;
-
-			link_len = strlen(linkbuf);
-			len = strlen(cp);
-
-			if (link_len + len > MAXPATHLEN
-			    || ++nlinks > MAXSYMLINKS) {
-				error = ENOENT;
-				goto out;
-			}
-
-			bcopy(cp, &namebuf[link_len], len + 1);
-			bcopy(linkbuf, namebuf, link_len);
-
-			/*
-			 * If absolute pathname, restart at root.
-			 * If relative pathname, restart at parent directory.
-			 */
-			cp = namebuf;
-			if (*cp == '/')
-				bcopy(&nfs_root_node, currfd, sizeof(*currfd));
-
-			free(newfd);
-			newfd = NULL;
-
-			continue;
-		}
-
-		free(currfd);
-		currfd = newfd;
-		newfd = NULL;
-	}
-
-	error = 0;
-
-out:
-	free(newfd);
-	free(path);
-#else
-        currfd->iodesc = desc;
-
-        error = nfs_lookupfh(&nfs_root_node, upath, currfd);
-#endif
-	if (!error) {
-		currfd->off = 0;
-		f->f_fsdata = (void *)currfd;
-		return (0);
-	}
-
-#ifdef NFS_DEBUG
-	if (debug)
-		printf("nfs_open: %s lookupfh failed: %s\n",
-		    path, strerror(error));
-#endif
-	free(currfd);
-
-	return (error);
-}
-
-int
-nfs_close(struct open_file *f)
-{
-	struct nfs_iodesc *fp = (struct nfs_iodesc *)f->f_fsdata;
-
-#ifdef NFS_DEBUG
-	if (debug)
-		printf("nfs_close: fp=0x%lx\n", (u_long)fp);
-#endif
-
-	if (fp)
-		free(fp);
-	f->f_fsdata = (void *)0;
-
-	return (0);
-}
-
-/*
- * read a portion of a file
- */
-int
-nfs_read(struct open_file *f, void *buf, size_t size, size_t *resid)
-{
-	struct nfs_iodesc *fp = (struct nfs_iodesc *)f->f_fsdata;
-	ssize_t cc;
-	char *addr = buf;
-
-#ifdef NFS_DEBUG
-	if (debug)
-		printf("nfs_read: size=%lu off=%d\n", (u_long)size,
-		       (int)fp->off);
-#endif
-	while ((int)size > 0) {
-		twiddle(16);
-		cc = nfs_readdata(fp, fp->off, (void *)addr, size);
-		/* XXX maybe should retry on certain errors */
-		if (cc == -1) {
-#ifdef NFS_DEBUG
-			if (debug)
-				printf("nfs_read: read: %s", strerror(errno));
-#endif
-			return (errno);	/* XXX - from nfs_readdata */
-		}
-		if (cc == 0) {
-#ifdef NFS_DEBUG
-			if (debug)
-				printf("nfs_read: hit EOF unexpectantly");
-#endif
-			goto ret;
-		}
-		fp->off += cc;
-		addr += cc;
-		size -= cc;
-	}
-ret:
-	if (resid)
-		*resid = size;
-
-	return (0);
-}
-
-/*
- * Not implemented.
- */
-int
-nfs_write(struct open_file *f, void *buf, size_t size, size_t *resid)
-{
-	return (EROFS);
-}
-
-off_t
-nfs_seek(struct open_file *f, off_t offset, int where)
-{
-	struct nfs_iodesc *d = (struct nfs_iodesc *)f->f_fsdata;
-	n_long size = ntohl(d->fa.fa_size);
-
-	switch (where) {
-	case SEEK_SET:
-		d->off = offset;
-		break;
-	case SEEK_CUR:
-		d->off += offset;
-		break;
-	case SEEK_END:
-		d->off = size - offset;
-		break;
-	default:
-		errno = EINVAL;
-		return (-1);
-	}
-
-	return (d->off);
-}
-
-/* NFNON=0, NFREG=1, NFDIR=2, NFBLK=3, NFCHR=4, NFLNK=5 */
-int nfs_stat_types[8] = {
-	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 0 };
-
-int
-nfs_stat(struct open_file *f, struct stat *sb)
-{
-	struct nfs_iodesc *fp = (struct nfs_iodesc *)f->f_fsdata;
-	n_long ftype, mode;
-
-	ftype = ntohl(fp->fa.fa_type);
-	mode  = ntohl(fp->fa.fa_mode);
-	mode |= nfs_stat_types[ftype & 7];
-
-	sb->st_mode  = mode;
-	sb->st_nlink = ntohl(fp->fa.fa_nlink);
-	sb->st_uid   = ntohl(fp->fa.fa_uid);
-	sb->st_gid   = ntohl(fp->fa.fa_gid);
-	sb->st_size  = ntohl(fp->fa.fa_size);
-
-	return (0);
-}
-
-static int
-nfs_readdir(struct open_file *f, struct dirent *d)
-{
-	struct nfs_iodesc *fp = (struct nfs_iodesc *)f->f_fsdata;
-	struct nfs_readdir_args *args;
-	struct nfs_readdir_data *rd;
-	struct nfs_readdir_off  *roff = NULL;
-	static char *buf;
-	static struct nfs_iodesc *pfp = NULL;
-	static n_long cookie = 0;
-	size_t cc;
-	n_long eof;
-
-	struct {
-		n_long h[RPC_HEADER_WORDS];
-		struct nfs_readdir_args d;
-	} sdata;
-	static struct {
-		n_long h[RPC_HEADER_WORDS];
-		u_char d[NFS_READDIRSIZE];
-	} rdata;
-
-	if (fp != pfp || fp->off != cookie) {
-		pfp = NULL;
-	refill:
-		args = &sdata.d;
-		bzero(args, sizeof(*args));
-
-		bcopy(fp->fh, args->fh, NFS_FHSIZE);
-		args->cookie = htonl(fp->off);
-		args->count  = htonl(NFS_READDIRSIZE);
-
-		cc = rpc_call(fp->iodesc, NFS_PROG, NFS_VER2, NFSPROC_READDIR,
-			      args, sizeof(*args),
-			      rdata.d, sizeof(rdata.d));
-		buf  = rdata.d;
-		roff = (struct nfs_readdir_off *)buf;
-		if (ntohl(roff->cookie) != 0)
-			return EIO;
-		pfp = fp;
-		cookie = fp->off;
-	}
-	roff = (struct nfs_readdir_off *)buf;
-
-	if (ntohl(roff->follows) == 0) {
-		eof = ntohl((roff+1)->cookie);
-		if (eof) {
-			cookie = 0;
-			return ENOENT;
-		}
-		goto refill;
-	}
-
-	buf += sizeof(struct nfs_readdir_off);
-	rd = (struct nfs_readdir_data *)buf;
-	d->d_namlen = ntohl(rd->len);
-	bcopy(rd->name, d->d_name, d->d_namlen);
-	d->d_name[d->d_namlen] = '\0';
-
-	buf += (sizeof(struct nfs_readdir_data) + roundup(htonl(rd->len),4));
-	roff = (struct nfs_readdir_off *)buf;
-	fp->off = cookie = ntohl(roff->cookie);
-	return 0;
-}
-#else	/* !OLD_NFSV2 */
-/*
  * Fetch the root file handle (call mount daemon)
  * Return zero or error number.
  */
 int
 nfs_getrootfh(struct iodesc *d, char *path, uint32_t *fhlenp, u_char *fhp)
 {
 	int len;
 	struct args {
 		uint32_t len;
 		char path[FNAME_SIZE];
 	} *args;
 	struct repl {
 		uint32_t errno;
 		uint32_t fhsize;
 		u_char fh[NFS_V3MAXFHSIZE];
 		uint32_t authcnt;
 		uint32_t auth[7];
 	} *repl;
 	struct {
 		uint32_t h[RPC_HEADER_WORDS];
 		struct args d;
 	} sdata;
 	struct {
 		uint32_t h[RPC_HEADER_WORDS];
 		struct repl d;
 	} rdata;
 	size_t cc;
 
 #ifdef NFS_DEBUG
 	if (debug)
 		printf("nfs_getrootfh: %s\n", path);
 #endif
 
 	args = &sdata.d;
 	repl = &rdata.d;
 
 	bzero(args, sizeof(*args));
 	len = strlen(path);
 	if (len > sizeof(args->path))
 		len = sizeof(args->path);
 	args->len = htonl(len);
 	bcopy(path, args->path, len);
 	len = sizeof(uint32_t) + roundup(len, sizeof(uint32_t));
 
 	cc = rpc_call(d, RPCPROG_MNT, RPCMNT_VER3, RPCMNT_MOUNT,
 	    args, len, repl, sizeof(*repl));
 	if (cc == -1)
 		/* errno was set by rpc_call */
 		return (errno);
 	if (cc < 2 * sizeof (uint32_t))
 		return (EBADRPC);
 	if (repl->errno != 0)
 		return (ntohl(repl->errno));
 	*fhlenp = ntohl(repl->fhsize);
 	bcopy(repl->fh, fhp, *fhlenp);
+
+	set_nfs_read_size();
 	return (0);
 }
 
 /*
  * Lookup a file.  Store handle and attributes.
  * Return zero or error number.
  */
 int
 nfs_lookupfh(struct nfs_iodesc *d, const char *name, struct nfs_iodesc *newfd)
 {
 	int len, rlen, pos;
 	struct args {
 		uint32_t fhsize;
 		uint32_t fhplusname[1 +
 		    (NFS_V3MAXFHSIZE + FNAME_SIZE) / sizeof(uint32_t)];
 	} *args;
 	struct repl {
 		uint32_t errno;
 		uint32_t fhsize;
 		uint32_t fhplusattr[(NFS_V3MAXFHSIZE +
 		    2 * (sizeof(uint32_t) +
 		    sizeof(struct nfsv3_fattrs))) / sizeof(uint32_t)];
 	} *repl;
 	struct {
 		uint32_t h[RPC_HEADER_WORDS];
 		struct args d;
 	} sdata;
 	struct {
 		uint32_t h[RPC_HEADER_WORDS];
 		struct repl d;
 	} rdata;
 	ssize_t cc;
 
 #ifdef NFS_DEBUG
 	if (debug)
 		printf("lookupfh: called\n");
 #endif
 
 	args = &sdata.d;
 	repl = &rdata.d;
 
 	bzero(args, sizeof(*args));
 	args->fhsize = htonl(d->fhsize);
 	bcopy(d->fh, args->fhplusname, d->fhsize);
 	len = strlen(name);
 	if (len > FNAME_SIZE)
 		len = FNAME_SIZE;
 	pos = roundup(d->fhsize, sizeof(uint32_t)) / sizeof(uint32_t);
 	args->fhplusname[pos++] = htonl(len);
 	bcopy(name, &args->fhplusname[pos], len);
 	len = sizeof(uint32_t) + pos * sizeof(uint32_t) +
 	    roundup(len, sizeof(uint32_t));
 
 	rlen = sizeof(*repl);
 
 	cc = rpc_call(d->iodesc, NFS_PROG, NFS_VER3, NFSPROCV3_LOOKUP,
 	    args, len, repl, rlen);
 	if (cc == -1)
 		return (errno);		/* XXX - from rpc_call */
 	if (cc < 2 * sizeof(uint32_t))
 		return (EIO);
 	if (repl->errno != 0)
 		/* saerrno.h now matches NFS error numbers. */
 		return (ntohl(repl->errno));
 	newfd->fhsize = ntohl(repl->fhsize);
 	bcopy(repl->fhplusattr, &newfd->fh, newfd->fhsize);
 	pos = roundup(newfd->fhsize, sizeof(uint32_t)) / sizeof(uint32_t);
 	if (repl->fhplusattr[pos++] == 0)
 		return (EIO);
 	bcopy(&repl->fhplusattr[pos], &newfd->fa, sizeof(newfd->fa));
 	return (0);
 }
 
 #ifndef NFS_NOSYMLINK
 /*
  * Get the destination of a symbolic link.
  */
 int
 nfs_readlink(struct nfs_iodesc *d, char *buf)
 {
 	struct args {
 		uint32_t fhsize;
 		u_char fh[NFS_V3MAXFHSIZE];
 	} *args;
 	struct repl {
 		uint32_t errno;
 		uint32_t ok;
 		struct nfsv3_fattrs fa;
 		uint32_t len;
 		u_char path[NFS_MAXPATHLEN];
 	} *repl;
 	struct {
 		uint32_t h[RPC_HEADER_WORDS];
 		struct args d;
 	} sdata;
 	struct {
 		uint32_t h[RPC_HEADER_WORDS];
 		struct repl d;
 	} rdata;
 	ssize_t cc;
 
 #ifdef NFS_DEBUG
 	if (debug)
 		printf("readlink: called\n");
 #endif
 
 	args = &sdata.d;
 	repl = &rdata.d;
 
 	bzero(args, sizeof(*args));
 	args->fhsize = htonl(d->fhsize);
 	bcopy(d->fh, args->fh, d->fhsize);
 	cc = rpc_call(d->iodesc, NFS_PROG, NFS_VER3, NFSPROCV3_READLINK,
 	    args, sizeof(uint32_t) + roundup(d->fhsize, sizeof(uint32_t)),
 	    repl, sizeof(*repl));
 	if (cc == -1)
 		return (errno);
 
 	if (cc < 2 * sizeof(uint32_t))
 		return (EIO);
 
 	if (repl->errno != 0)
 		return (ntohl(repl->errno));
 
 	if (repl->ok == 0)
 		return (EIO);
 
 	repl->len = ntohl(repl->len);
 	if (repl->len > NFS_MAXPATHLEN)
 		return (ENAMETOOLONG);
 
 	bcopy(repl->path, buf, repl->len);
 	buf[repl->len] = 0;
 	return (0);
 }
 #endif
 
 /*
  * Read data from a file.
  * Return transfer count or -1 (and set errno)
  */
 ssize_t
 nfs_readdata(struct nfs_iodesc *d, off_t off, void *addr, size_t len)
 {
 	struct args {
 		uint32_t fhsize;
 		uint32_t fhoffcnt[NFS_V3MAXFHSIZE / sizeof(uint32_t) + 3];
 	} *args;
 	struct repl {
 		uint32_t errno;
 		uint32_t ok;
 		struct nfsv3_fattrs fa;
 		uint32_t count;
 		uint32_t eof;
 		uint32_t len;
 		u_char data[NFSREAD_MAX_SIZE];
 	} *repl;
 	struct {
 		uint32_t h[RPC_HEADER_WORDS];
 		struct args d;
 	} sdata;
 	struct {
 		uint32_t h[RPC_HEADER_WORDS];
 		struct repl d;
 	} rdata;
 	size_t cc;
 	long x;
 	int hlen, rlen, pos;
 
 	args = &sdata.d;
 	repl = &rdata.d;
 
 	bzero(args, sizeof(*args));
 	args->fhsize = htonl(d->fhsize);
 	bcopy(d->fh, args->fhoffcnt, d->fhsize);
 	pos = roundup(d->fhsize, sizeof(uint32_t)) / sizeof(uint32_t);
 	args->fhoffcnt[pos++] = 0;
 	args->fhoffcnt[pos++] = htonl((uint32_t)off);
 	if (len > nfs_read_size)
 		len = nfs_read_size;
 	args->fhoffcnt[pos] = htonl((uint32_t)len);
 	hlen = offsetof(struct repl, data[0]);
 
 	cc = rpc_call(d->iodesc, NFS_PROG, NFS_VER3, NFSPROCV3_READ,
 	    args, 4 * sizeof(uint32_t) + roundup(d->fhsize, sizeof(uint32_t)),
 	    repl, sizeof(*repl));
 	if (cc == -1)
 		/* errno was already set by rpc_call */
 		return (-1);
 	if (cc < hlen) {
 		errno = EBADRPC;
 		return (-1);
 	}
 	if (repl->errno != 0) {
 		errno = ntohl(repl->errno);
 		return (-1);
 	}
 	rlen = cc - hlen;
 	x = ntohl(repl->count);
 	if (rlen < x) {
 		printf("nfsread: short packet, %d < %ld\n", rlen, x);
 		errno = EBADRPC;
 		return (-1);
 	}
 	bcopy(repl->data, addr, x);
 	return (x);
 }
 
 /*
  * Open a file.
  * return zero or error number
  */
 int
 nfs_open(const char *upath, struct open_file *f)
 {
 	struct iodesc *desc;
 	struct nfs_iodesc *currfd;
 	char buf[2 * NFS_V3MAXFHSIZE + 3];
 	u_char *fh;
 	char *cp;
 	int i;
 #ifndef NFS_NOSYMLINK
 	struct nfs_iodesc *newfd;
 	struct nfsv3_fattrs *fa;
 	char *ncp;
 	int c;
 	char namebuf[NFS_MAXPATHLEN + 1];
 	char linkbuf[NFS_MAXPATHLEN + 1];
 	int nlinks = 0;
 #endif
 	int error;
 	char *path;
 
 	if (netproto != NET_NFS)
 		return (EINVAL);
 
 #ifdef NFS_DEBUG
  	if (debug)
  	    printf("nfs_open: %s (rootpath=%s)\n", upath, rootpath);
 #endif
 	if (!rootpath[0]) {
 		printf("no rootpath, no nfs\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * This is silly - we should look at dv_type but that value is
 	 * arch dependant and we can't use it here.
 	 */
 #ifndef __i386__
 	if (strcmp(f->f_dev->dv_name, "net") != 0)
 		return (EINVAL);
 #else
 	if (strcmp(f->f_dev->dv_name, "pxe") != 0)
 		return (EINVAL);
 #endif
 
 	if (!(desc = socktodesc(*(int *)(f->f_devdata))))
 		return (EINVAL);
 
 	/* Bind to a reserved port. */
 	desc->myport = htons(--rpc_port);
 	desc->destip = rootip;
 	if ((error = nfs_getrootfh(desc, rootpath, &nfs_root_node.fhsize,
 	    nfs_root_node.fh)))
 		return (error);
 	nfs_root_node.fa.fa_type  = htonl(NFDIR);
 	nfs_root_node.fa.fa_mode  = htonl(0755);
 	nfs_root_node.fa.fa_nlink = htonl(2);
 	nfs_root_node.iodesc = desc;
 
 	fh = &nfs_root_node.fh[0];
 	buf[0] = 'X';
 	cp = &buf[1];
 	for (i = 0; i < nfs_root_node.fhsize; i++, cp += 2)
 		sprintf(cp, "%02x", fh[i]);
 	sprintf(cp, "X");
 	setenv("boot.nfsroot.server", inet_ntoa(rootip), 1);
 	setenv("boot.nfsroot.path", rootpath, 1);
 	setenv("boot.nfsroot.nfshandle", buf, 1);
 	sprintf(buf, "%d", nfs_root_node.fhsize);
 	setenv("boot.nfsroot.nfshandlelen", buf, 1);
 
 	/* Allocate file system specific data structure */
 	currfd = malloc(sizeof(*newfd));
 	if (currfd == NULL) {
 		error = ENOMEM;
 		goto out;
 	}
 #ifndef NFS_NOSYMLINK
 	bcopy(&nfs_root_node, currfd, sizeof(*currfd));
 	newfd = NULL;
 
 	cp = path = strdup(upath);
 	if (path == NULL) {
 		error = ENOMEM;
 		goto out;
 	}
 	while (*cp) {
 		/*
 		 * Remove extra separators
 		 */
 		while (*cp == '/')
 			cp++;
 
 		if (*cp == '\0')
 			break;
 		/*
 		 * Check that current node is a directory.
 		 */
 		if (currfd->fa.fa_type != htonl(NFDIR)) {
 			error = ENOTDIR;
 			goto out;
 		}
 
 		/* allocate file system specific data structure */
 		newfd = malloc(sizeof(*newfd));
 		if (newfd == NULL) {
 			error = ENOMEM;
 			goto out;
 		}
 		newfd->iodesc = currfd->iodesc;
 
 		/*
 		 * Get next component of path name.
 		 */
 		{
 			int len = 0;
 
 			ncp = cp;
 			while ((c = *cp) != '\0' && c != '/') {
 				if (++len > NFS_MAXNAMLEN) {
 					error = ENOENT;
 					goto out;
 				}
 				cp++;
 			}
 			*cp = '\0';
 		}
 
 		/* lookup a file handle */
 		error = nfs_lookupfh(currfd, ncp, newfd);
 		*cp = c;
 		if (error)
 			goto out;
 
 		/*
 		 * Check for symbolic link
 		 */
 		if (newfd->fa.fa_type == htonl(NFLNK)) {
 			int link_len, len;
 
 			error = nfs_readlink(newfd, linkbuf);
 			if (error)
 				goto out;
 
 			link_len = strlen(linkbuf);
 			len = strlen(cp);
 
 			if (link_len + len > MAXPATHLEN
 			    || ++nlinks > MAXSYMLINKS) {
 				error = ENOENT;
 				goto out;
 			}
 
 			bcopy(cp, &namebuf[link_len], len + 1);
 			bcopy(linkbuf, namebuf, link_len);
 
 			/*
 			 * If absolute pathname, restart at root.
 			 * If relative pathname, restart at parent directory.
 			 */
 			cp = namebuf;
 			if (*cp == '/')
 				bcopy(&nfs_root_node, currfd, sizeof(*currfd));
 
 			free(newfd);
 			newfd = NULL;
 
 			continue;
 		}
 
 		free(currfd);
 		currfd = newfd;
 		newfd = NULL;
 	}
 
 	error = 0;
 
 out:
 	free(newfd);
 	free(path);
 #else
 	currfd->iodesc = desc;
 
 	error = nfs_lookupfh(&nfs_root_node, upath, currfd);
 #endif
 	if (!error) {
 		currfd->off = 0;
 		currfd->cookie = 0;
 		f->f_fsdata = (void *)currfd;
 		return (0);
 	}
 
 #ifdef NFS_DEBUG
 	if (debug)
 		printf("nfs_open: %s lookupfh failed: %s\n",
 		    path, strerror(error));
 #endif
 	free(currfd);
 
 	return (error);
 }
 
 int
 nfs_close(struct open_file *f)
 {
 	struct nfs_iodesc *fp = (struct nfs_iodesc *)f->f_fsdata;
 
 #ifdef NFS_DEBUG
 	if (debug)
 		printf("nfs_close: fp=0x%lx\n", (u_long)fp);
 #endif
 
 	if (fp)
 		free(fp);
 	f->f_fsdata = (void *)0;
 
 	return (0);
 }
 
 /*
  * read a portion of a file
  */
 int
 nfs_read(struct open_file *f, void *buf, size_t size, size_t *resid)
 {
 	struct nfs_iodesc *fp = (struct nfs_iodesc *)f->f_fsdata;
 	ssize_t cc;
 	char *addr = buf;
 
 #ifdef NFS_DEBUG
 	if (debug)
 		printf("nfs_read: size=%lu off=%d\n", (u_long)size,
 		       (int)fp->off);
 #endif
 	while ((int)size > 0) {
 		twiddle(16);
 		cc = nfs_readdata(fp, fp->off, (void *)addr, size);
 		/* XXX maybe should retry on certain errors */
 		if (cc == -1) {
 #ifdef NFS_DEBUG
 			if (debug)
 				printf("nfs_read: read: %s", strerror(errno));
 #endif
 			return (errno);	/* XXX - from nfs_readdata */
 		}
 		if (cc == 0) {
 #ifdef NFS_DEBUG
 			if (debug)
 				printf("nfs_read: hit EOF unexpectantly");
 #endif
 			goto ret;
 		}
 		fp->off += cc;
 		addr += cc;
 		size -= cc;
 	}
 ret:
 	if (resid)
 		*resid = size;
 
 	return (0);
 }
 
 /*
  * Not implemented.
  */
 int
 nfs_write(struct open_file *f, void *buf, size_t size, size_t *resid)
 {
 	return (EROFS);
 }
 
 off_t
 nfs_seek(struct open_file *f, off_t offset, int where)
 {
 	struct nfs_iodesc *d = (struct nfs_iodesc *)f->f_fsdata;
 	uint32_t size = ntohl(d->fa.fa_size.val[1]);
 
 	switch (where) {
 	case SEEK_SET:
 		d->off = offset;
 		break;
 	case SEEK_CUR:
 		d->off += offset;
 		break;
 	case SEEK_END:
 		d->off = size - offset;
 		break;
 	default:
 		errno = EINVAL;
 		return (-1);
 	}
 
 	return (d->off);
 }
 
 /* NFNON=0, NFREG=1, NFDIR=2, NFBLK=3, NFCHR=4, NFLNK=5, NFSOCK=6, NFFIFO=7 */
 int nfs_stat_types[9] = {
 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFSOCK, S_IFIFO, 0 };
 
 int
 nfs_stat(struct open_file *f, struct stat *sb)
 {
 	struct nfs_iodesc *fp = (struct nfs_iodesc *)f->f_fsdata;
 	uint32_t ftype, mode;
 
 	ftype = ntohl(fp->fa.fa_type);
 	mode  = ntohl(fp->fa.fa_mode);
 	mode |= nfs_stat_types[ftype & 7];
 
 	sb->st_mode  = mode;
 	sb->st_nlink = ntohl(fp->fa.fa_nlink);
 	sb->st_uid   = ntohl(fp->fa.fa_uid);
 	sb->st_gid   = ntohl(fp->fa.fa_gid);
 	sb->st_size  = ntohl(fp->fa.fa_size.val[1]);
 
 	return (0);
 }
 
 static int
 nfs_readdir(struct open_file *f, struct dirent *d)
 {
 	struct nfs_iodesc *fp = (struct nfs_iodesc *)f->f_fsdata;
 	struct nfsv3_readdir_repl *repl;
 	struct nfsv3_readdir_entry *rent;
 	static char *buf;
 	static struct nfs_iodesc *pfp = NULL;
 	static uint64_t cookie = 0;
 	size_t cc;
 	int pos;
 
 	struct args {
 		uint32_t fhsize;
 		uint32_t fhpluscookie[5 + NFS_V3MAXFHSIZE];
 	} *args;
 	struct {
 		uint32_t h[RPC_HEADER_WORDS];
 		struct args d;
 	} sdata;
 	static struct {
 		uint32_t h[RPC_HEADER_WORDS];
 		u_char d[NFS_READDIRSIZE];
 	} rdata;
 
 	if (fp != pfp || fp->off != cookie) {
 		pfp = NULL;
 	refill:
 		args = &sdata.d;
 		bzero(args, sizeof(*args));
 
 		args->fhsize = htonl(fp->fhsize);
 		bcopy(fp->fh, args->fhpluscookie, fp->fhsize);
 		pos = roundup(fp->fhsize, sizeof(uint32_t)) / sizeof(uint32_t);
 		args->fhpluscookie[pos++] = htonl(fp->off >> 32);
 		args->fhpluscookie[pos++] = htonl(fp->off);
 		args->fhpluscookie[pos++] = htonl(fp->cookie >> 32);
 		args->fhpluscookie[pos++] = htonl(fp->cookie);
 		args->fhpluscookie[pos] = htonl(NFS_READDIRSIZE);
 
 		cc = rpc_call(fp->iodesc, NFS_PROG, NFS_VER3, NFSPROCV3_READDIR,
 		    args, 6 * sizeof(uint32_t) +
 		    roundup(fp->fhsize, sizeof(uint32_t)),
 		    rdata.d, sizeof(rdata.d));
 		buf  = rdata.d;
 		repl = (struct nfsv3_readdir_repl *)buf;
 		if (repl->errno != 0)
 			return (ntohl(repl->errno));
 		pfp = fp;
 		cookie = fp->off;
 		fp->cookie = ((uint64_t)ntohl(repl->cookiev0) << 32) |
 		    ntohl(repl->cookiev1);
 		buf += sizeof (struct nfsv3_readdir_repl);
 	}
 	rent = (struct nfsv3_readdir_entry *)buf;
 
 	if (rent->follows == 0) {
 		/* fid0 is actually eof */
 		if (rent->fid0 != 0) {
 			cookie = 0;
 			return (ENOENT);
 		}
 		goto refill;
 	}
 
 	d->d_namlen = ntohl(rent->len);
 	bcopy(rent->nameplus, d->d_name, d->d_namlen);
 	d->d_name[d->d_namlen] = '\0';
 
 	pos = roundup(d->d_namlen, sizeof(uint32_t)) / sizeof(uint32_t);
 	fp->off = cookie = ((uint64_t)ntohl(rent->nameplus[pos]) << 32) |
 	    ntohl(rent->nameplus[pos + 1]);
 	pos += 2;
 	buf = (u_char *)&rent->nameplus[pos];
 	return (0);
 }
-#endif	/* OLD_NFSV2 */
Index: stable/11/lib/libstand/nfsv2.h
===================================================================
--- stable/11/lib/libstand/nfsv2.h	(revision 329098)
+++ stable/11/lib/libstand/nfsv2.h	(revision 329099)
@@ -1,164 +1,121 @@
 /* $FreeBSD$ */
 /*	$NetBSD: nfsv2.h,v 1.2 1996/02/26 23:05:23 gwr Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfsv2.h	8.1 (Berkeley) 6/10/93
  */
 
 /*
  * nfs definitions as per the version 2 specs
  */
 
 /*
  * Constants as defined in the Sun NFS Version 2 spec.
  * "NFS: Network File System Protocol Specification" RFC1094
  */
 
 #define NFS_PORT	2049
 #define	NFS_PROG	100003
 #define NFS_VER2	2
 #define	NFS_MAXDGRAMDATA 8192
 #define	NFS_MAXDATA	32768
 #define	NFS_MAXPATHLEN	1024
 #define	NFS_MAXNAMLEN	255
 #define	NFS_FHSIZE	32
 #define	NFS_MAXPKTHDR	404
 #define NFS_MAXPACKET	(NFS_MAXPKTHDR+NFS_MAXDATA)
 #define	NFS_MINPACKET	20
 #define	NFS_FABLKSIZE	512	/* Size in bytes of a block wrt fa_blocks */
 #define	NFS_READDIRSIZE	1024
 
 /* Stat numbers for rpc returns */
 #define	NFS_OK		0
 #define	NFSERR_PERM	1
 #define	NFSERR_NOENT	2
 #define	NFSERR_IO	5
 #define	NFSERR_NXIO	6
 #define	NFSERR_ACCES	13
 #define	NFSERR_EXIST	17
 #define	NFSERR_NODEV	19
 #define	NFSERR_NOTDIR	20
 #define	NFSERR_ISDIR	21
 #define	NFSERR_FBIG	27
 #define	NFSERR_NOSPC	28
 #define	NFSERR_ROFS	30
 #define	NFSERR_NAMETOL	63
 #define	NFSERR_NOTEMPTY	66
 #define	NFSERR_DQUOT	69
 #define	NFSERR_STALE	70
 #define	NFSERR_WFLUSH	99
 
 /* Sizes in bytes of various nfs rpc components */
 #define	NFSX_FH		32
 #define	NFSX_UNSIGNED	4
 #define	NFSX_FATTR	68
 #define	NFSX_SATTR	32
 #define NFSX_STATFS	20
 #define	NFSX_COOKIE	4
 
 /* nfs rpc procedure numbers */
 #define	NFSPROC_NULL		0
 #define	NFSPROC_GETATTR		1
 #define	NFSPROC_SETATTR		2
 #define	NFSPROC_NOOP		3
 #define	NFSPROC_ROOT		NFSPROC_NOOP	/* Obsolete */
 #define	NFSPROC_LOOKUP		4
 #define	NFSPROC_READLINK	5
 #define	NFSPROC_READ		6
 #define	NFSPROC_WRITECACHE	NFSPROC_NOOP	/* Obsolete */
 #define	NFSPROC_WRITE		8
 #define	NFSPROC_CREATE		9
 #define	NFSPROC_REMOVE		10
 #define	NFSPROC_RENAME		11
 #define	NFSPROC_LINK		12
 #define	NFSPROC_SYMLINK		13
 #define	NFSPROC_MKDIR		14
 #define	NFSPROC_RMDIR		15
 #define	NFSPROC_READDIR		16
 #define	NFSPROC_STATFS		17
 
 #define	NFS_NPROCS		18
 
 
 /* File types */
 typedef enum {
 	NFNON=0,
 	NFREG=1,
 	NFDIR=2,
 	NFBLK=3,
 	NFCHR=4,
 	NFLNK=5
 } nfstype;
-
-/* Structs for common parts of the rpc's */
-struct nfsv2_time {
-	n_long	nfs_sec;
-	n_long	nfs_usec;
-};
-
-/*
- * File attributes and setable attributes.
- */
-struct nfsv2_fattr {
-	n_long	fa_type;
-	n_long	fa_mode;
-	n_long	fa_nlink;
-	n_long	fa_uid;
-	n_long	fa_gid;
-	n_long	fa_size;
-	n_long	fa_blocksize;
-	n_long	fa_rdev;
-	n_long	fa_blocks;
-	n_long	fa_fsid;
-	n_long	fa_fileid;
-	struct nfsv2_time fa_atime;
-	struct nfsv2_time fa_mtime;
-	struct nfsv2_time fa_ctime;
-};
-
-struct nfsv2_sattr {
-	n_long	sa_mode;
-	n_long	sa_uid;
-	n_long	sa_gid;
-	n_long	sa_size;
-	struct nfsv2_time sa_atime;
-	struct nfsv2_time sa_mtime;
-};
-
-struct nfsv2_statfs {
-	n_long	sf_tsize;
-	n_long	sf_bsize;
-	n_long	sf_blocks;
-	n_long	sf_bfree;
-	n_long	sf_bavail;
-};
Index: stable/11/lib/libstand/stand.h
===================================================================
--- stable/11/lib/libstand/stand.h	(revision 329098)
+++ stable/11/lib/libstand/stand.h	(revision 329099)
@@ -1,421 +1,422 @@
 /*
  * Copyright (c) 1998 Michael Smith.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  * From	$NetBSD: stand.h,v 1.22 1997/06/26 19:17:40 drochner Exp $	
  */
 
 /*-
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)stand.h	8.1 (Berkeley) 6/11/93
  */
 
 #ifndef	STAND_H
 #define	STAND_H
 
 #include <sys/types.h>
 #include <sys/cdefs.h>
 #include <sys/stat.h>
 #include <sys/dirent.h>
 
 /* this header intentionally exports NULL from <string.h> */
 #include <string.h>
 
 #define CHK(fmt, args...)	printf("%s(%d): " fmt "\n", __func__, __LINE__ , ##args)
 #define PCHK(fmt, args...)	{printf("%s(%d): " fmt "\n", __func__, __LINE__ , ##args); getchar();}
 
 /* Avoid unwanted userlandish components */
 #define _KERNEL
 #include <sys/errno.h>
 #undef _KERNEL
 
 /* special stand error codes */
 #define	EADAPT	(ELAST+1)	/* bad adaptor */
 #define	ECTLR	(ELAST+2)	/* bad controller */
 #define	EUNIT	(ELAST+3)	/* bad unit */
 #define ESLICE	(ELAST+4)	/* bad slice */
 #define	EPART	(ELAST+5)	/* bad partition */
 #define	ERDLAB	(ELAST+6)	/* can't read disk label */
 #define	EUNLAB	(ELAST+7)	/* unlabeled disk */
 #define	EOFFSET	(ELAST+8)	/* relative seek not supported */
 #define	ESALAST	(ELAST+8)	/* */
 
 struct open_file;
 
 /*
  * This structure is used to define file system operations in a file system
  * independent way.
  *
  * XXX note that filesystem providers should export a pointer to their fs_ops
  *     struct, so that consumers can reference this and thus include the
  *     filesystems that they require.
  */
 struct fs_ops {
     const char	*fs_name;
     int		(*fo_open)(const char *path, struct open_file *f);
     int		(*fo_close)(struct open_file *f);
     int		(*fo_read)(struct open_file *f, void *buf,
 			   size_t size, size_t *resid);
     int		(*fo_write)(struct open_file *f, void *buf,
 			    size_t size, size_t *resid);
     off_t	(*fo_seek)(struct open_file *f, off_t offset, int where);
     int		(*fo_stat)(struct open_file *f, struct stat *sb);
     int		(*fo_readdir)(struct open_file *f, struct dirent *d);
 };
 
 /*
  * libstand-supplied filesystems
  */
 extern struct fs_ops ufs_fsops;
 extern struct fs_ops tftp_fsops;
 extern struct fs_ops nfs_fsops;
 extern struct fs_ops cd9660_fsops;
 extern struct fs_ops nandfs_fsops;
 extern struct fs_ops gzipfs_fsops;
 extern struct fs_ops bzipfs_fsops;
 extern struct fs_ops dosfs_fsops;
 extern struct fs_ops ext2fs_fsops;
 extern struct fs_ops splitfs_fsops;
 extern struct fs_ops pkgfs_fsops;
 
 /* where values for lseek(2) */
 #define	SEEK_SET	0	/* set file offset to offset */
 #define	SEEK_CUR	1	/* set file offset to current plus offset */
 #define	SEEK_END	2	/* set file offset to EOF plus offset */
 
 /* 
  * Device switch
  */
 struct devsw {
     const char	dv_name[8];
     int		dv_type;		/* opaque type constant, arch-dependant */
     int		(*dv_init)(void);	/* early probe call */
     int		(*dv_strategy)(void *devdata, int rw, daddr_t blk,
 			size_t size, char *buf, size_t *rsize);
     int		(*dv_open)(struct open_file *f, ...);
     int		(*dv_close)(struct open_file *f);
     int		(*dv_ioctl)(struct open_file *f, u_long cmd, void *data);
     int		(*dv_print)(int verbose);	/* print device information */
     void	(*dv_cleanup)(void);
 };
 
 /*
  * libstand-supplied device switch
  */
 extern struct devsw netdev;
 
 extern int errno;
 
 /*
  * Generic device specifier; architecture-dependent
  * versions may be larger, but should be allowed to
  * overlap.
  */
 struct devdesc
 {
     struct devsw	*d_dev;
     int			d_type;
 #define DEVT_NONE	0
 #define DEVT_DISK	1
 #define DEVT_NET	2
 #define DEVT_CD		3
 #define DEVT_ZFS	4
+#define DEVT_FD		5
     int			d_unit;
     void		*d_opendata;
 };
 
 struct open_file {
     int			f_flags;	/* see F_* below */
     struct devsw	*f_dev;		/* pointer to device operations */
     void		*f_devdata;	/* device specific data */
     struct fs_ops	*f_ops;		/* pointer to file system operations */
     void		*f_fsdata;	/* file system specific data */
     off_t		f_offset;	/* current file offset */
     char		*f_rabuf;	/* readahead buffer pointer */
     size_t		f_ralen;	/* valid data in readahead buffer */
     off_t		f_raoffset;	/* consumer offset in readahead buffer */
 #define SOPEN_RASIZE	512
 };
 
 #define	SOPEN_MAX	64
 extern struct open_file files[];
 
 /* f_flags values */
 #define	F_READ		0x0001	/* file opened for reading */
 #define	F_WRITE		0x0002	/* file opened for writing */
 #define	F_RAW		0x0004	/* raw device open - no file system */
 #define F_NODEV		0x0008	/* network open - no device */
 
 #define isascii(c)	(((c) & ~0x7F) == 0)
 
 static __inline int isupper(int c)
 {
     return c >= 'A' && c <= 'Z';
 }
 
 static __inline int islower(int c)
 {
     return c >= 'a' && c <= 'z';
 }
 
 static __inline int isspace(int c)
 {
     return c == ' ' || (c >= 0x9 && c <= 0xd);
 }
 
 static __inline int isdigit(int c)
 {
     return c >= '0' && c <= '9';
 }
 
 static __inline int isxdigit(int c)
 {
     return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
 }
 
 static __inline int isalpha(int c)
 {
     return isupper(c) || islower(c);
 }
 
 static __inline int isalnum(int c)
 {
     return isalpha(c) || isdigit(c);
 }
 
 static __inline int toupper(int c)
 {
     return islower(c) ? c - 'a' + 'A' : c;
 }
 
 static __inline int tolower(int c)
 {
     return isupper(c) ? c - 'A' + 'a' : c;
 }
 
 /* sbrk emulation */
 extern void	setheap(void *base, void *top);
 extern char	*sbrk(int incr);
 
 /* Matt Dillon's zalloc/zmalloc */
 extern void	*malloc(size_t bytes);
 extern void	free(void *ptr);
 /*#define free(p)	{CHK("free %p", p); free(p);} */ /* use for catching guard violations */
 extern void	*calloc(size_t n1, size_t n2);
 extern void	*realloc(void *ptr, size_t size);
 extern void	*reallocf(void *ptr, size_t size);
 extern void	mallocstats(void);
 
 extern int	printf(const char *fmt, ...) __printflike(1, 2);
 extern void	vprintf(const char *fmt, __va_list);
 extern int	sprintf(char *buf, const char *cfmt, ...) __printflike(2, 3);
 extern int	snprintf(char *buf, size_t size, const char *cfmt, ...) __printflike(3, 4);
 extern void	vsprintf(char *buf, const char *cfmt, __va_list);
 
 extern void	twiddle(u_int callerdiv);
 extern void	twiddle_divisor(u_int globaldiv);
 
 extern void	ngets(char *, int);
 #define gets(x)	ngets((x), 0)
 extern int	fgetstr(char *buf, int size, int fd);
 
 extern int	open(const char *, int);
 #define	O_RDONLY	0x0
 #define O_WRONLY	0x1
 #define O_RDWR		0x2
 extern int	close(int);
 extern void	closeall(void);
 extern ssize_t	read(int, void *, size_t);
 extern ssize_t	write(int, void *, size_t);
 extern struct	dirent *readdirfd(int);
 
 extern void	srandom(u_long seed);
 extern u_long	random(void);
     
 /* imports from stdlib, locally modified */
 extern long	strtol(const char *, char **, int);
 extern unsigned long	strtoul(const char *, char **, int);
 extern char	*optarg;			/* getopt(3) external variables */
 extern int	optind, opterr, optopt, optreset;
 extern int	getopt(int, char * const [], const char *);
 
 /* pager.c */
 extern void	pager_open(void);
 extern void	pager_close(void);
 extern int	pager_output(const char *lines);
 extern int	pager_file(const char *fname);
 
 /* No signal state to preserve */
 #define setjmp	_setjmp
 #define longjmp	_longjmp
 
 /* environment.c */
 #define EV_DYNAMIC	(1<<0)		/* value was dynamically allocated, free if changed/unset */
 #define EV_VOLATILE	(1<<1)		/* value is volatile, make a copy of it */
 #define EV_NOHOOK	(1<<2)		/* don't call hook when setting */
 
 struct env_var;
 typedef char	*(ev_format_t)(struct env_var *ev);
 typedef int	(ev_sethook_t)(struct env_var *ev, int flags,
 		    const void *value);
 typedef int	(ev_unsethook_t)(struct env_var *ev);
 
 struct env_var
 {
     char		*ev_name;
     int			ev_flags;
     void		*ev_value;
     ev_sethook_t	*ev_sethook;
     ev_unsethook_t	*ev_unsethook;
     struct env_var	*ev_next, *ev_prev;
 };
 extern struct env_var	*environ;
 
 extern struct env_var	*env_getenv(const char *name);
 extern int		env_setenv(const char *name, int flags,
 				   const void *value, ev_sethook_t sethook,
 				   ev_unsethook_t unsethook);
 extern char		*getenv(const char *name);
 extern int		setenv(const char *name, const char *value,
 			       int overwrite);
 extern int		putenv(const char *string);
 extern int		unsetenv(const char *name);
 
 extern ev_sethook_t	env_noset;		/* refuse set operation */
 extern ev_unsethook_t	env_nounset;		/* refuse unset operation */
 
 /* BCD conversions (undocumented) */
 extern u_char const	bcd2bin_data[];
 extern u_char const	bin2bcd_data[];
 extern char const	hex2ascii_data[];
 
 #define	bcd2bin(bcd)	(bcd2bin_data[bcd])
 #define	bin2bcd(bin)	(bin2bcd_data[bin])
 #define	hex2ascii(hex)	(hex2ascii_data[hex])
 
 /* min/max (undocumented) */
 static __inline int imax(int a, int b) { return (a > b ? a : b); }
 static __inline int imin(int a, int b) { return (a < b ? a : b); }
 static __inline long lmax(long a, long b) { return (a > b ? a : b); }
 static __inline long lmin(long a, long b) { return (a < b ? a : b); }
 static __inline u_int max(u_int a, u_int b) { return (a > b ? a : b); }
 static __inline u_int min(u_int a, u_int b) { return (a < b ? a : b); }
 static __inline quad_t qmax(quad_t a, quad_t b) { return (a > b ? a : b); }
 static __inline quad_t qmin(quad_t a, quad_t b) { return (a < b ? a : b); }
 static __inline u_long ulmax(u_long a, u_long b) { return (a > b ? a : b); }
 static __inline u_long ulmin(u_long a, u_long b) { return (a < b ? a : b); }
 
 
 /* null functions for device/filesystem switches (undocumented) */
 extern int	nodev(void);
 extern int	noioctl(struct open_file *, u_long, void *);
 extern void	nullsys(void);
 
 extern int	null_open(const char *path, struct open_file *f);
 extern int	null_close(struct open_file *f);
 extern int	null_read(struct open_file *f, void *buf, size_t size, size_t *resid);
 extern int	null_write(struct open_file *f, void *buf, size_t size, size_t *resid);
 extern off_t	null_seek(struct open_file *f, off_t offset, int where);
 extern int	null_stat(struct open_file *f, struct stat *sb);
 extern int	null_readdir(struct open_file *f, struct dirent *d);
 
 
 /* 
  * Machine dependent functions and data, must be provided or stubbed by 
  * the consumer 
  */
 extern int		getchar(void);
 extern int		ischar(void);
 extern void		putchar(int);
 extern int		devopen(struct open_file *, const char *, const char **);
 extern int		devclose(struct open_file *f);
 extern void		panic(const char *, ...) __dead2 __printflike(1, 2);
 extern struct fs_ops	*file_system[];
 extern struct fs_ops	*exclusive_file_system;
 extern struct devsw	*devsw[];
 
 /*
  * Expose byteorder(3) functions.
  */
 #ifndef _BYTEORDER_PROTOTYPED
 #define	_BYTEORDER_PROTOTYPED
 extern uint32_t		htonl(uint32_t);
 extern uint16_t		htons(uint16_t);
 extern uint32_t		ntohl(uint32_t);
 extern uint16_t		ntohs(uint16_t);
 #endif
 
 #ifndef _BYTEORDER_FUNC_DEFINED
 #define	_BYTEORDER_FUNC_DEFINED
 #define	htonl(x)	__htonl(x)
 #define	htons(x)	__htons(x)
 #define	ntohl(x)	__ntohl(x)
 #define	ntohs(x)	__ntohs(x)
 #endif
 
 void *Malloc(size_t, const char *, int);
 void *Calloc(size_t, size_t, const char *, int);
 void *Realloc(void *, size_t, const char *, int);
 void Free(void *, const char *, int);
 
 #if 1
 #define malloc(x)	Malloc(x, __FILE__, __LINE__)
 #define calloc(x, y)	Calloc(x, y, __FILE__, __LINE__)
 #define free(x)		Free(x, __FILE__, __LINE__)
 #define realloc(x, y)	Realloc(x, y, __FILE__, __LINE__)
 #else
 #define malloc(x)	Malloc(x, NULL, 0)
 #define calloc(x, y)	Calloc(x, y, NULL, 0)
 #define free(x)		Free(x, NULL, 0)
 #define realloc(x, y)	Realloc(x, y, NULL, 0)
 #endif
 
 #endif	/* STAND_H */
Index: stable/11/sys/boot/common/bcache.c
===================================================================
--- stable/11/sys/boot/common/bcache.c	(revision 329098)
+++ stable/11/sys/boot/common/bcache.c	(revision 329099)
@@ -1,456 +1,497 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * Copyright 2015 Toomas Soome <tsoome@me.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include <sys/param.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Simple hashed block cache
  */
 
 #include <sys/stdint.h>
 
 #include <stand.h>
 #include <string.h>
 #include <strings.h>
 
 #include "bootstrap.h"
 
 /* #define BCACHE_DEBUG */
 
 #ifdef BCACHE_DEBUG
 # define DEBUG(fmt, args...)	printf("%s: " fmt "\n" , __func__ , ## args)
 #else
 # define DEBUG(fmt, args...)
 #endif
 
 struct bcachectl
 {
     daddr_t	bc_blkno;
     int		bc_count;
 };
 
 /*
  * bcache per device node. cache is allocated on device first open and freed
  * on last close, to save memory. The issue there is the size; biosdisk
  * supports up to 31 (0x1f) devices. Classic setup would use single disk
  * to boot from, but this has changed with zfs.
  */
 struct bcache {
     struct bcachectl	*bcache_ctl;
     caddr_t		bcache_data;
-    u_int		bcache_nblks;
+    size_t		bcache_nblks;
     size_t		ra;
 };
 
 static u_int bcache_total_nblks;	/* set by bcache_init */
 static u_int bcache_blksize;		/* set by bcache_init */
 static u_int bcache_numdev;		/* set by bcache_add_dev */
 /* statistics */
 static u_int bcache_units;	/* number of devices with cache */
 static u_int bcache_unit_nblks;	/* nblocks per unit */
 static u_int bcache_hits;
 static u_int bcache_misses;
 static u_int bcache_ops;
 static u_int bcache_bypasses;
 static u_int bcache_bcount;
 static u_int bcache_rablks;
 
 #define	BHASH(bc, blkno)	((blkno) & ((bc)->bcache_nblks - 1))
 #define	BCACHE_LOOKUP(bc, blkno)	\
 	((bc)->bcache_ctl[BHASH((bc), (blkno))].bc_blkno != (blkno))
 #define	BCACHE_READAHEAD	256
 #define	BCACHE_MINREADAHEAD	32
+#define	BCACHE_MARKER		0xdeadbeef
 
 static void	bcache_invalidate(struct bcache *bc, daddr_t blkno);
 static void	bcache_insert(struct bcache *bc, daddr_t blkno);
 static void	bcache_free_instance(struct bcache *bc);
 
 /*
  * Initialise the cache for (nblks) of (bsize).
  */
 void
-bcache_init(u_int nblks, size_t bsize)
+bcache_init(size_t nblks, size_t bsize)
 {
     /* set up control data */
     bcache_total_nblks = nblks;
     bcache_blksize = bsize;
 }
 
 /*
  * add number of devices to bcache. we have to divide cache space
  * between the devices, so bcache_add_dev() can be used to set up the
  * number. The issue is, we need to get the number before actual allocations.
  * bcache_add_dev() is supposed to be called from device init() call, so the
  * assumption is, devsw dv_init is called for plain devices first, and
  * for zfs, last.
  */
 void
 bcache_add_dev(int devices)
 {
     bcache_numdev += devices;
 }
 
 void *
 bcache_allocate(void)
 {
     u_int i;
     struct bcache *bc = malloc(sizeof (struct bcache));
     int disks = bcache_numdev;
+    uint32_t *marker;
 
     if (disks == 0)
 	disks = 1;	/* safe guard */
 
     if (bc == NULL) {
 	errno = ENOMEM;
 	return (bc);
     }
 
     /*
      * the bcache block count must be power of 2 for hash function
      */
     i = fls(disks) - 1;		/* highbit - 1 */
     if (disks > (1 << i))	/* next power of 2 */
 	i++;
 
     bc->bcache_nblks = bcache_total_nblks >> i;
     bcache_unit_nblks = bc->bcache_nblks;
-    bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
+    bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize +
+	sizeof(uint32_t));
     if (bc->bcache_data == NULL) {
 	/* dont error out yet. fall back to 32 blocks and try again */
 	bc->bcache_nblks = 32;
-	bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
+	bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize +
+	sizeof(uint32_t));
     }
 
     bc->bcache_ctl = malloc(bc->bcache_nblks * sizeof(struct bcachectl));
 
     if ((bc->bcache_data == NULL) || (bc->bcache_ctl == NULL)) {
 	bcache_free_instance(bc);
 	errno = ENOMEM;
-	return(NULL);
+	return (NULL);
     }
+    /* Insert cache end marker. */
+    marker = (uint32_t *)(bc->bcache_data + bc->bcache_nblks * bcache_blksize);
+    *marker = BCACHE_MARKER;
 
     /* Flush the cache */
     for (i = 0; i < bc->bcache_nblks; i++) {
 	bc->bcache_ctl[i].bc_count = -1;
 	bc->bcache_ctl[i].bc_blkno = -1;
     }
     bcache_units++;
     bc->ra = BCACHE_READAHEAD;	/* optimistic read ahead */
     return (bc);
 }
 
 void
 bcache_free(void *cache)
 {
     struct bcache *bc = cache;
 
     if (bc == NULL)
 	return;
 
     bcache_free_instance(bc);
     bcache_units--;
 }
 
 /*
  * Handle a write request; write directly to the disk, and populate the
  * cache with the new values.
  */
 static int
 write_strategy(void *devdata, int rw, daddr_t blk, size_t size,
     char *buf, size_t *rsize)
 {
     struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
     struct bcache		*bc = dd->dv_cache;
     daddr_t			i, nblk;
 
     nblk = size / bcache_blksize;
 
     /* Invalidate the blocks being written */
     for (i = 0; i < nblk; i++) {
 	bcache_invalidate(bc, blk + i);
     }
 
     /* Write the blocks */
     return (dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize));
 }
 
 /*
  * Handle a read request; fill in parts of the request that can
  * be satisfied by the cache, use the supplied strategy routine to do
  * device I/O and then use the I/O results to populate the cache. 
  */
 static int
 read_strategy(void *devdata, int rw, daddr_t blk, size_t size,
     char *buf, size_t *rsize)
 {
     struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
     struct bcache		*bc = dd->dv_cache;
     size_t			i, nblk, p_size, r_size, complete, ra;
     int				result;
     daddr_t			p_blk;
     caddr_t			p_buf;
+    uint32_t			*marker;
 
     if (bc == NULL) {
 	errno = ENODEV;
 	return (-1);
     }
 
+    marker = (uint32_t *)(bc->bcache_data + bc->bcache_nblks * bcache_blksize);
+
     if (rsize != NULL)
 	*rsize = 0;
 
     nblk = size / bcache_blksize;
     if (nblk == 0 && size != 0)
 	nblk++;
     result = 0;
     complete = 1;
 
     /* Satisfy any cache hits up front, break on first miss */
     for (i = 0; i < nblk; i++) {
 	if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i))) {
 	    bcache_misses += (nblk - i);
 	    complete = 0;
 	    if (nblk - i > BCACHE_MINREADAHEAD && bc->ra > BCACHE_MINREADAHEAD)
 		bc->ra >>= 1;	/* reduce read ahead */
 	    break;
 	} else {
 	    bcache_hits++;
 	}
     }
 
    if (complete) {	/* whole set was in cache, return it */
 	if (bc->ra < BCACHE_READAHEAD)
 		bc->ra <<= 1;	/* increase read ahead */
 	bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)), buf, size);
 	goto done;
    }
 
     /*
      * Fill in any misses. From check we have i pointing to first missing
      * block, read in all remaining blocks + readahead.
      * We have space at least for nblk - i before bcache wraps.
      */
     p_blk = blk + i;
     p_buf = bc->bcache_data + (bcache_blksize * BHASH(bc, p_blk));
     r_size = bc->bcache_nblks - BHASH(bc, p_blk); /* remaining blocks */
 
     p_size = MIN(r_size, nblk - i);	/* read at least those blocks */
 
+    /*
+     * The read ahead size setup.
+     * While the read ahead can save us IO, it also can complicate things:
+     * 1. We do not want to read ahead by wrapping around the
+     * bcache end - this would complicate the cache management.
+     * 2. We are using bc->ra as dynamic hint for read ahead size,
+     * detected cache hits will increase the read-ahead block count, and
+     * misses will decrease, see the code above.
+     * 3. The bcache is sized by 512B blocks, however, the underlying device
+     * may have a larger sector size, and we should perform the IO by
+     * taking into account these larger sector sizes. We could solve this by
+     * passing the sector size to bcache_allocate(), or by using ioctl(), but
+     * in this version we are using the constant, 16 blocks, and are rounding
+     * read ahead block count down to multiple of 16.
+     * Using the constant has two reasons, we are not entirely sure if the
+     * BIOS disk interface is providing the correct value for sector size.
+     * And secondly, this way we get the most conservative setup for the ra.
+     *
+     * The selection of multiple of 16 blocks (8KB) is quite arbitrary, however,
+     * we want to cover CDs (2K) and 4K disks.
+     * bcache_allocate() will always fall back to a minimum of 32 blocks.
+     * Our choice of 16 read ahead blocks will always fit inside the bcache.
+     */
+
     ra = bc->bcache_nblks - BHASH(bc, p_blk + p_size);
-    if (ra != bc->bcache_nblks) { /* do we have RA space? */
-	ra = MIN(bc->ra, ra);
+    if (ra != 0 && ra != bc->bcache_nblks) { /* do we have RA space? */
+	ra = MIN(bc->ra, ra - 1);
+	ra = rounddown(ra, 16);		/* multiple of 16 blocks */
 	p_size += ra;
     }
 
     /* invalidate bcache */
     for (i = 0; i < p_size; i++) {
 	bcache_invalidate(bc, p_blk + i);
     }
 
     r_size = 0;
     /*
      * with read-ahead, it may happen we are attempting to read past
      * disk end, as bcache has no information about disk size.
      * in such case we should get partial read if some blocks can be
      * read or error, if no blocks can be read.
      * in either case we should return the data in bcache and only
      * return error if there is no data.
      */
     result = dd->dv_strategy(dd->dv_devdata, rw, p_blk,
 	p_size * bcache_blksize, p_buf, &r_size);
 
     r_size /= bcache_blksize;
     for (i = 0; i < r_size; i++)
 	bcache_insert(bc, p_blk + i);
 
     /* update ra statistics */
     if (r_size != 0) {
 	if (r_size < p_size)
 	    bcache_rablks += (p_size - r_size);
 	else
 	    bcache_rablks += ra;
     }
 
     /* check how much data can we copy */
     for (i = 0; i < nblk; i++) {
 	if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i)))
 	    break;
     }
 
     if (size > i * bcache_blksize)
 	size = i * bcache_blksize;
 
     if (size != 0) {
 	bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)), buf, size);
 	result = 0;
     }
 
+    if (*marker != BCACHE_MARKER) {
+	printf("BUG: bcache corruption detected: nblks: %zu p_blk: %lu, "
+	    "p_size: %zu, ra: %zu\n", bc->bcache_nblks,
+	    (long unsigned)BHASH(bc, p_blk), p_size, ra);
+    }
+
  done:
     if ((result == 0) && (rsize != NULL))
 	*rsize = size;
     return(result);
 }
 
 /* 
  * Requests larger than 1/2 cache size will be bypassed and go
  * directly to the disk.  XXX tune this.
  */
 int
 bcache_strategy(void *devdata, int rw, daddr_t blk, size_t size,
     char *buf, size_t *rsize)
 {
     struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
     struct bcache		*bc = dd->dv_cache;
     u_int bcache_nblks = 0;
     int nblk, cblk, ret;
     size_t csize, isize, total;
 
     bcache_ops++;
 
     if (bc != NULL)
 	bcache_nblks = bc->bcache_nblks;
 
     /* bypass large requests, or when the cache is inactive */
     if (bc == NULL ||
 	((size * 2 / bcache_blksize) > bcache_nblks)) {
-	DEBUG("bypass %d from %d", size / bcache_blksize, blk);
+	DEBUG("bypass %zu from %qu", size / bcache_blksize, blk);
 	bcache_bypasses++;
 	return (dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize));
     }
 
     switch (rw) {
     case F_READ:
 	nblk = size / bcache_blksize;
 	if (size != 0 && nblk == 0)
 	    nblk++;	/* read at least one block */
 
 	ret = 0;
 	total = 0;
 	while(size) {
 	    cblk = bcache_nblks - BHASH(bc, blk); /* # of blocks left */
 	    cblk = MIN(cblk, nblk);
 
 	    if (size <= bcache_blksize)
 		csize = size;
 	    else
 		csize = cblk * bcache_blksize;
 
 	    ret = read_strategy(devdata, rw, blk, csize, buf+total, &isize);
 
 	    /*
 	     * we may have error from read ahead, if we have read some data
 	     * return partial read.
 	     */
 	    if (ret != 0 || isize == 0) {
 		if (total != 0)
 		    ret = 0;
 		break;
 	    }
 	    blk += isize / bcache_blksize;
 	    total += isize;
 	    size -= isize;
 	    nblk = size / bcache_blksize;
 	}
 
 	if (rsize)
 	    *rsize = total;
 
 	return (ret);
     case F_WRITE:
 	return write_strategy(devdata, rw, blk, size, buf, rsize);
     }
     return -1;
 }
 
 /*
  * Free allocated bcache instance
  */
 static void
 bcache_free_instance(struct bcache *bc)
 {
     if (bc != NULL) {
 	if (bc->bcache_ctl)
 	    free(bc->bcache_ctl);
 	if (bc->bcache_data)
 	    free(bc->bcache_data);
 	free(bc);
     }
 }
 
 /*
  * Insert a block into the cache.
  */
 static void
 bcache_insert(struct bcache *bc, daddr_t blkno)
 {
     u_int	cand;
     
     cand = BHASH(bc, blkno);
 
     DEBUG("insert blk %llu -> %u # %d", blkno, cand, bcache_bcount);
     bc->bcache_ctl[cand].bc_blkno = blkno;
     bc->bcache_ctl[cand].bc_count = bcache_bcount++;
 }
 
 /*
  * Invalidate a block from the cache.
  */
 static void
 bcache_invalidate(struct bcache *bc, daddr_t blkno)
 {
     u_int	i;
     
     i = BHASH(bc, blkno);
     if (bc->bcache_ctl[i].bc_blkno == blkno) {
 	bc->bcache_ctl[i].bc_count = -1;
 	bc->bcache_ctl[i].bc_blkno = -1;
 	DEBUG("invalidate blk %llu", blkno);
     }
 }
 
 #ifndef BOOT2
 COMMAND_SET(bcachestat, "bcachestat", "get disk block cache stats", command_bcache);
 
 static int
 command_bcache(int argc, char *argv[])
 {
     if (argc != 1) {
 	command_errmsg = "wrong number of arguments";
 	return(CMD_ERROR);
     }
 
     printf("\ncache blocks: %d\n", bcache_total_nblks);
     printf("cache blocksz: %d\n", bcache_blksize);
     printf("cache readahead: %d\n", bcache_rablks);
     printf("unit cache blocks: %d\n", bcache_unit_nblks);
     printf("cached units: %d\n", bcache_units);
     printf("%d ops  %d bypasses  %d hits  %d misses\n", bcache_ops,
 	bcache_bypasses, bcache_hits, bcache_misses);
     return(CMD_OK);
 }
 #endif
Index: stable/11/sys/boot/common/bootstrap.h
===================================================================
--- stable/11/sys/boot/common/bootstrap.h	(revision 329098)
+++ stable/11/sys/boot/common/bootstrap.h	(revision 329099)
@@ -1,334 +1,334 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _BOOTSTRAP_H_
 #define	_BOOTSTRAP_H_
 
 #include <sys/types.h>
 #include <sys/queue.h>
 #include <sys/linker_set.h>
 
 /* Commands and return values; nonzero return sets command_errmsg != NULL */
 typedef int	(bootblk_cmd_t)(int argc, char *argv[]);
 #define	COMMAND_ERRBUFSZ	(256)
 extern char	*command_errmsg;	
 extern char	command_errbuf[COMMAND_ERRBUFSZ];
 #define CMD_OK		0
 #define CMD_WARN	1
 #define CMD_ERROR	2
 #define CMD_CRIT	3
 #define CMD_FATAL	4
 
 /* interp.c */
 void	interact(const char *rc);
 int	include(const char *filename);
 
 /* interp_backslash.c */
 char	*backslash(char *str);
 
 /* interp_parse.c */
 int	parse(int *argc, char ***argv, char *str);
 
 /* interp_forth.c */
 void	bf_init(const char *rc);
 int	bf_run(char *line);
 
 /* boot.c */
 int	autoboot(int timeout, char *prompt);
 void	autoboot_maybe(void);
 int	getrootmount(char *rootdev);
 
 /* misc.c */
 char	*unargv(int argc, char *argv[]);
 void	hexdump(caddr_t region, size_t len);
 size_t	strlenout(vm_offset_t str);
 char	*strdupout(vm_offset_t str);
 void	kern_bzero(vm_offset_t dest, size_t len);
 int	kern_pread(int fd, vm_offset_t dest, size_t len, off_t off);
 void	*alloc_pread(int fd, off_t off, size_t len);
 
 /* bcache.c */
-void	bcache_init(u_int nblks, size_t bsize);
+void	bcache_init(size_t nblks, size_t bsize);
 void	bcache_add_dev(int);
 void	*bcache_allocate(void);
 void	bcache_free(void *);
 int	bcache_strategy(void *devdata, int rw, daddr_t blk, size_t size,
 			char *buf, size_t *rsize);
 
 /*
  * Disk block cache
  */
 struct bcache_devdata
 {
     int         (*dv_strategy)(void *devdata, int rw, daddr_t blk,
 			size_t size, char *buf, size_t *rsize);
     void	*dv_devdata;
     void	*dv_cache;
 };
 
 /*
  * Modular console support.
  */
 struct console 
 {
     const char	*c_name;
     const char	*c_desc;
     int		c_flags;
 #define C_PRESENTIN	(1<<0)	    /* console can provide input */
 #define C_PRESENTOUT	(1<<1)	    /* console can provide output */
 #define C_ACTIVEIN	(1<<2)	    /* user wants input from console */
 #define C_ACTIVEOUT	(1<<3)	    /* user wants output to console */
 #define	C_WIDEOUT	(1<<4)	    /* c_out routine groks wide chars */
     void	(* c_probe)(struct console *cp);	/* set c_flags to match hardware */
     int		(* c_init)(int arg);			/* reinit XXX may need more args */
     void	(* c_out)(int c);			/* emit c */
     int		(* c_in)(void);				/* wait for and return input */
     int		(* c_ready)(void);			/* return nonzer if input waiting */
 };
 extern struct console	*consoles[];
 void		cons_probe(void);
 
 /*
  * Plug-and-play enumerator/configurator interface.
  */
 struct pnphandler 
 {
     const char	*pp_name;		/* handler/bus name */
     void	(* pp_enumerate)(void);	/* enumerate PnP devices, add to chain */
 };
 
 struct pnpident
 {
     char			*id_ident;	/* ASCII identifier, actual format varies with bus/handler */
     STAILQ_ENTRY(pnpident)	id_link;
 };
 
 struct pnpinfo
 {
     char			*pi_desc;	/* ASCII description, optional */
     int				pi_revision;	/* optional revision (or -1) if not supported */
     char			*pi_module;	/* module/args nominated to handle device */
     int				pi_argc;	/* module arguments */
     char			**pi_argv;
     struct pnphandler		*pi_handler;	/* handler which detected this device */
     STAILQ_HEAD(,pnpident)	pi_ident;	/* list of identifiers */
     STAILQ_ENTRY(pnpinfo)	pi_link;
 };
 
 STAILQ_HEAD(pnpinfo_stql, pnpinfo);
 
 extern struct pnphandler	*pnphandlers[];		/* provided by MD code */
 
 void			pnp_addident(struct pnpinfo *pi, char *ident);
 struct pnpinfo		*pnp_allocinfo(void);
 void			pnp_freeinfo(struct pnpinfo *pi);
 void			pnp_addinfo(struct pnpinfo *pi);
 char			*pnp_eisaformat(u_int8_t *data);
 
 /*
  *  < 0	- No ISA in system
  * == 0	- Maybe ISA, search for read data port
  *  > 0	- ISA in system, value is read data port address
  */
 extern int			isapnp_readport;
 
 /*
  * Preloaded file metadata header.
  *
  * Metadata are allocated on our heap, and copied into kernel space
  * before executing the kernel.
  */
 struct file_metadata 
 {
     size_t			md_size;
     u_int16_t			md_type;
     struct file_metadata	*md_next;
     char			md_data[1];	/* data are immediately appended */
 };
 
 struct preloaded_file;
 struct mod_depend;
 
 struct kernel_module
 {
     char			*m_name;	/* module name */
     int				m_version;	/* module version */
 /*    char			*m_args;*/	/* arguments for the module */
     struct preloaded_file	*m_fp;
     struct kernel_module	*m_next;
 };
 
 /*
  * Preloaded file information. Depending on type, file can contain
  * additional units called 'modules'.
  *
  * At least one file (the kernel) must be loaded in order to boot.
  * The kernel is always loaded first.
  *
  * String fields (m_name, m_type) should be dynamically allocated.
  */
 struct preloaded_file
 {
     char			*f_name;	/* file name */
     char			*f_type;	/* verbose file type, eg 'ELF kernel', 'pnptable', etc. */
     char			*f_args;	/* arguments for the file */
     struct file_metadata	*f_metadata;	/* metadata that will be placed in the module directory */
     int				f_loader;	/* index of the loader that read the file */
     vm_offset_t			f_addr;		/* load address */
     size_t			f_size;		/* file size */
     struct kernel_module	*f_modules;	/* list of modules if any */
     struct preloaded_file	*f_next;	/* next file */
 };
 
 struct file_format
 {
     /* Load function must return EFTYPE if it can't handle the module supplied */
     int		(* l_load)(char *filename, u_int64_t dest, struct preloaded_file **result);
     /* Only a loader that will load a kernel (first module) should have an exec handler */
     int		(* l_exec)(struct preloaded_file *mp);
 };
 
 extern struct file_format	*file_formats[];	/* supplied by consumer */
 extern struct preloaded_file	*preloaded_files;
 
 int			mod_load(char *name, struct mod_depend *verinfo, int argc, char *argv[]);
 int			mod_loadkld(const char *name, int argc, char *argv[]);
 void			unload(void);
 
 struct preloaded_file *file_alloc(void);
 struct preloaded_file *file_findfile(const char *name, const char *type);
 struct file_metadata *file_findmetadata(struct preloaded_file *fp, int type);
 struct preloaded_file *file_loadraw(const char *name, char *type, int insert);
 void file_discard(struct preloaded_file *fp);
 void file_addmetadata(struct preloaded_file *fp, int type, size_t size, void *p);
 int  file_addmodule(struct preloaded_file *fp, char *modname, int version,
 	struct kernel_module **newmp);
 void file_removemetadata(struct preloaded_file *fp);
 
 /* MI module loaders */
 #ifdef __elfN
 /* Relocation types. */
 #define ELF_RELOC_REL	1
 #define ELF_RELOC_RELA	2
 
 /* Relocation offset for some architectures */
 extern u_int64_t __elfN(relocation_offset);
 
 struct elf_file;
 typedef Elf_Addr (symaddr_fn)(struct elf_file *ef, Elf_Size symidx);
 
 int	__elfN(loadfile)(char *filename, u_int64_t dest, struct preloaded_file **result);
 int	__elfN(obj_loadfile)(char *filename, u_int64_t dest,
 	    struct preloaded_file **result);
 int	__elfN(reloc)(struct elf_file *ef, symaddr_fn *symaddr,
 	    const void *reldata, int reltype, Elf_Addr relbase,
 	    Elf_Addr dataaddr, void *data, size_t len);
 int __elfN(loadfile_raw)(char *filename, u_int64_t dest,
 	    struct preloaded_file **result, int multiboot);
 int __elfN(load_modmetadata)(struct preloaded_file *fp, u_int64_t dest);
 #endif
 
 /*
  * Support for commands 
  */
 struct bootblk_command 
 {
     const char		*c_name;
     const char		*c_desc;
     bootblk_cmd_t	*c_fn;
 };
 
 #define COMMAND_SET(tag, key, desc, func)				\
     static bootblk_cmd_t func;						\
     static struct bootblk_command _cmd_ ## tag = { key, desc, func };	\
     DATA_SET(Xcommand_set, _cmd_ ## tag)
 
 SET_DECLARE(Xcommand_set, struct bootblk_command);
 
 /* 
  * The intention of the architecture switch is to provide a convenient
  * encapsulation of the interface between the bootstrap MI and MD code.
  * MD code may selectively populate the switch at runtime based on the
  * actual configuration of the target system.
  */
 struct arch_switch
 {
     /* Automatically load modules as required by detected hardware */
     int		(*arch_autoload)(void);
     /* Locate the device for (name), return pointer to tail in (*path) */
     int		(*arch_getdev)(void **dev, const char *name, const char **path);
     /* Copy from local address space to module address space, similar to bcopy() */
     ssize_t	(*arch_copyin)(const void *src, vm_offset_t dest,
 			       const size_t len);
     /* Copy to local address space from module address space, similar to bcopy() */
     ssize_t	(*arch_copyout)(const vm_offset_t src, void *dest,
 				const size_t len);
     /* Read from file to module address space, same semantics as read() */
     ssize_t	(*arch_readin)(const int fd, vm_offset_t dest,
 			       const size_t len);
     /* Perform ISA byte port I/O (only for systems with ISA) */
     int		(*arch_isainb)(int port);
     void	(*arch_isaoutb)(int port, int value);
 
     /*
      * Interface to adjust the load address according to the "object"
      * being loaded.
      */
     uint64_t	(*arch_loadaddr)(u_int type, void *data, uint64_t addr);
 #define	LOAD_ELF	1	/* data points to the ELF header. */
 #define	LOAD_RAW	2	/* data points to the file name. */
 
     /*
      * Interface to inform MD code about a loaded (ELF) segment. This
      * can be used to flush caches and/or set up translations.
      */
 #ifdef __elfN
     void	(*arch_loadseg)(Elf_Ehdr *eh, Elf_Phdr *ph, uint64_t delta);
 #else
     void	(*arch_loadseg)(void *eh, void *ph, uint64_t delta);
 #endif
 
     /* Probe ZFS pool(s), if needed. */
     void	(*arch_zfs_probe)(void);
 };
 extern struct arch_switch archsw;
 
 /* This must be provided by the MD code, but should it be in the archsw? */
 void	delay(int delay);
 
 void	dev_cleanup(void);
 
 time_t	time(time_t *tloc);
 
 #ifndef CTASSERT                /* Allow lint to override */
 #define CTASSERT(x)             _CTASSERT(x, __LINE__)
 #define _CTASSERT(x, y)         __CTASSERT(x, y)
 #define __CTASSERT(x, y)        typedef char __assert ## y[(x) ? 1 : -1]
 #endif
 
 #endif /* !_BOOTSTRAP_H_ */
Index: stable/11/sys/boot/common/dev_net.c
===================================================================
--- stable/11/sys/boot/common/dev_net.c	(revision 329098)
+++ stable/11/sys/boot/common/dev_net.c	(revision 329099)
@@ -1,385 +1,385 @@
 /*	$NetBSD: dev_net.c,v 1.23 2008/04/28 20:24:06 martin Exp $	*/
 
 /*-
  * Copyright (c) 1997 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Gordon W. Ross.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*-
  * This module implements a "raw device" interface suitable for
  * use by the stand-alone I/O library NFS code.  This interface
  * does not support any "block" access, and exists only for the
  * purpose of initializing the network interface, getting boot
  * parameters, and performing the NFS mount.
  *
  * At open time, this does:
  *
  * find interface      - netif_open()
  * RARP for IP address - rarp_getipaddress()
  * RPC/bootparams      - callrpc(d, RPC_BOOTPARAMS, ...)
  * RPC/mountd          - nfs_mount(sock, ip, path)
  *
  * the root file handle from mountd is saved in a global
  * for use by the NFS open code (NFS/lookup).
  */
 
 #include <machine/stdarg.h>
 #include <sys/param.h>
 #include <sys/socket.h>
 #include <net/if.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 
 #include <stand.h>
 #include <string.h>
 #include <net.h>
 #include <netif.h>
 #include <bootp.h>
 #include <bootparam.h>
 
 #include "dev_net.h"
 #include "bootstrap.h"
 
 #ifdef	NETIF_DEBUG
 int debug = 0;
 #endif
 
 static char *netdev_name;
 static int netdev_sock = -1;
 static int netdev_opens;
 
 static int	net_init(void);
 static int	net_open(struct open_file *, ...);
 static int	net_close(struct open_file *);
 static void	net_cleanup(void);
 static int	net_strategy();
 static int	net_print(int);
 
 static int net_getparams(int sock);
 
 struct devsw netdev = {
 	"net",
 	DEVT_NET,
 	net_init,
 	net_strategy,
 	net_open,
 	net_close,
 	noioctl,
 	net_print,
 	net_cleanup
 };
 
 static int
 net_init(void)
 {
 
 	return (0);
 }
 
 /*
  * Called by devopen after it sets f->f_dev to our devsw entry.
  * This opens the low-level device and sets f->f_devdata.
  * This is declared with variable arguments...
  */
 static int
 net_open(struct open_file *f, ...)
 {
 	struct iodesc *d;
 	va_list args;
 	char *devname;		/* Device part of file name (or NULL). */
 	int error = 0;
 
 	va_start(args, f);
 	devname = va_arg(args, char*);
 	va_end(args);
 
 	/* Before opening another interface, close the previous one first. */
 	if (netdev_sock >= 0 && strcmp(devname, netdev_name) != 0)
 		net_cleanup();
 
 	/* On first open, do netif open, mount, etc. */
 	if (netdev_opens == 0) {
 		/* Find network interface. */
 		if (netdev_sock < 0) {
 			netdev_sock = netif_open(devname);
 			if (netdev_sock < 0) {
 				printf("net_open: netif_open() failed\n");
 				return (ENXIO);
 			}
 			netdev_name = strdup(devname);
 #ifdef	NETIF_DEBUG
 			if (debug)
 				printf("net_open: netif_open() succeeded\n");
 #endif
 		}
 		/*
 		 * If network params were not set by netif_open(), try to get
 		 * them via bootp, rarp, etc.
 		 */
 		if (rootip.s_addr == 0) {
 			/* Get root IP address, and path, etc. */
 			error = net_getparams(netdev_sock);
 			if (error) {
 				/* getparams makes its own noise */
 				free(netdev_name);
 				netif_close(netdev_sock);
 				netdev_sock = -1;
 				return (error);
 			}
 		}
 		/*
 		 * Set the variables required by the kernel's nfs_diskless
 		 * mechanism.  This is the minimum set of variables required to
 		 * mount a root filesystem without needing to obtain additional
 		 * info from bootp or other sources.
 		 */
 		d = socktodesc(netdev_sock);
 		setenv("boot.netif.hwaddr", ether_sprintf(d->myea), 1);
 		setenv("boot.netif.ip", inet_ntoa(myip), 1);
 		setenv("boot.netif.netmask", intoa(netmask), 1);
 		setenv("boot.netif.gateway", inet_ntoa(gateip), 1);
 		setenv("boot.netif.server", inet_ntoa(rootip), 1);
 		if (netproto == NET_TFTP) {
 			setenv("boot.tftproot.server", inet_ntoa(rootip), 1);
 			setenv("boot.tftproot.path", rootpath, 1);
 		} else if (netproto == NET_NFS) {
 			setenv("boot.nfsroot.server", inet_ntoa(rootip), 1);
 			setenv("boot.nfsroot.path", rootpath, 1);
 		}
 		if (intf_mtu != 0) {
 			char mtu[16];
-			sprintf(mtu, "%u", intf_mtu);
+			snprintf(mtu, sizeof(mtu), "%u", intf_mtu);
 			setenv("boot.netif.mtu", mtu, 1);
 		}
 
 	}
 	netdev_opens++;
 	f->f_devdata = &netdev_sock;
 	return (error);
 }
 
 static int
 net_close(struct open_file *f)
 {
 
 #ifdef	NETIF_DEBUG
 	if (debug)
 		printf("net_close: opens=%d\n", netdev_opens);
 #endif
 
 	f->f_devdata = NULL;
 
 	return (0);
 }
 
 static void
 net_cleanup(void)
 {
 
 	if (netdev_sock >= 0) {
 #ifdef	NETIF_DEBUG
 		if (debug)
 			printf("net_cleanup: calling netif_close()\n");
 #endif
 		rootip.s_addr = 0;
 		free(netdev_name);
 		netif_close(netdev_sock);
 		netdev_sock = -1;
 	}
 }
 
 static int
 net_strategy()
 {
 
 	return (EIO);
 }
 
 #define SUPPORT_BOOTP
 
 /*
  * Get info for NFS boot: our IP address, our hostname,
  * server IP address, and our root path on the server.
  * There are two ways to do this:  The old, Sun way,
  * and the more modern, BOOTP way. (RFC951, RFC1048)
  *
  * The default is to use the Sun bootparams RPC
  * (because that is what the kernel will do).
  * MD code can make try_bootp initialied data,
  * which will override this common definition.
  */
 #ifdef	SUPPORT_BOOTP
 int try_bootp = 1;
 #endif
 
 extern n_long ip_convertaddr(char *p);
 
 static int
 net_getparams(int sock)
 {
 	char buf[MAXHOSTNAMELEN];
 	n_long rootaddr, smask;
 
 #ifdef	SUPPORT_BOOTP
 	/*
 	 * Try to get boot info using BOOTP.  If we succeed, then
 	 * the server IP address, gateway, and root path will all
 	 * be initialized.  If any remain uninitialized, we will
 	 * use RARP and RPC/bootparam (the Sun way) to get them.
 	 */
 	if (try_bootp)
 		bootp(sock, BOOTP_NONE);
 	if (myip.s_addr != 0)
 		goto exit;
 #ifdef	NETIF_DEBUG
 	if (debug)
 		printf("net_open: BOOTP failed, trying RARP/RPC...\n");
 #endif
 #endif
 
 	/*
 	 * Use RARP to get our IP address.  This also sets our
 	 * netmask to the "natural" default for our address.
 	 */
 	if (rarp_getipaddress(sock)) {
 		printf("net_open: RARP failed\n");
 		return (EIO);
 	}
 	printf("net_open: client addr: %s\n", inet_ntoa(myip));
 
 	/* Get our hostname, server IP address, gateway. */
 	if (bp_whoami(sock)) {
 		printf("net_open: bootparam/whoami RPC failed\n");
 		return (EIO);
 	}
 #ifdef	NETIF_DEBUG
 	if (debug)
 		printf("net_open: client name: %s\n", hostname);
 #endif
 
 	/*
 	 * Ignore the gateway from whoami (unreliable).
 	 * Use the "gateway" parameter instead.
 	 */
 	smask = 0;
 	gateip.s_addr = 0;
 	if (bp_getfile(sock, "gateway", &gateip, buf) == 0) {
 		/* Got it!  Parse the netmask. */
 		smask = ip_convertaddr(buf);
 	}
 	if (smask) {
 		netmask = smask;
 #ifdef	NETIF_DEBUG
 		if (debug)
 			printf("net_open: subnet mask: %s\n", intoa(netmask));
 #endif
 	}
 #ifdef	NETIF_DEBUG
 	if (gateip.s_addr && debug)
 		printf("net_open: net gateway: %s\n", inet_ntoa(gateip));
 #endif
 
 	/* Get the root server and pathname. */
 	if (bp_getfile(sock, "root", &rootip, rootpath)) {
 		printf("net_open: bootparam/getfile RPC failed\n");
 		return (EIO);
 	}
 exit:
 	if ((rootaddr = net_parse_rootpath()) != INADDR_NONE)
 		rootip.s_addr = rootaddr;
 
 #ifdef	NETIF_DEBUG
 	if (debug) {
 		printf("net_open: server addr: %s\n", inet_ntoa(rootip));
 		printf("net_open: server path: %s\n", rootpath);
 	}
 #endif
 
 	return (0);
 }
 
 static int
 net_print(int verbose)
 {
 	struct netif_driver *drv;
 	int i, d, cnt;
 	int ret = 0;
 
 	if (netif_drivers[0] == NULL)
 		return (ret);
 
 	printf("%s devices:", netdev.dv_name);
 	if ((ret = pager_output("\n")) != 0)
 		return (ret);
 
 	cnt = 0;
 	for (d = 0; netif_drivers[d]; d++) {
 		drv = netif_drivers[d];
 		for (i = 0; i < drv->netif_nifs; i++) {
 			printf("\t%s%d:", netdev.dv_name, cnt++);
 			if (verbose) {
 				printf(" (%s%d)", drv->netif_bname,
 				    drv->netif_ifs[i].dif_unit);
 			}
 			if ((ret = pager_output("\n")) != 0)
 				return (ret);
 		}
 	}
 	return (ret);
 }
 
 /*
  * Strip the server's address off of the rootpath if present and return it in
  * network byte order, leaving just the pathname part in the global rootpath.
  */
 uint32_t
 net_parse_rootpath()
 {
 	int i;
 	n_long addr = INADDR_NONE;
 
 	netproto = NET_NFS;
 
 	if (tftpip.s_addr != 0) {
 		netproto = NET_TFTP;
 		addr = tftpip.s_addr;
 	}
 
 	for (i = 0; rootpath[i] != '\0' && i < FNAME_SIZE; i++)
 		if (rootpath[i] == ':')
 			break;
 	if (i && i != FNAME_SIZE && rootpath[i] == ':') {
 		rootpath[i++] = '\0';
 		addr = inet_addr(&rootpath[0]);
 		bcopy(&rootpath[i], rootpath, strlen(&rootpath[i])+1);
 	}
 
 	return (addr);
 }
Index: stable/11/sys/boot/common/disk.c
===================================================================
--- stable/11/sys/boot/common/disk.c	(revision 329098)
+++ stable/11/sys/boot/common/disk.c	(revision 329099)
@@ -1,534 +1,421 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/disk.h>
 #include <sys/queue.h>
 #include <stand.h>
 #include <stdarg.h>
 #include <bootstrap.h>
 #include <part.h>
 
 #include "disk.h"
 
 #ifdef DISK_DEBUG
 # define DEBUG(fmt, args...)	printf("%s: " fmt "\n" , __func__ , ## args)
 #else
 # define DEBUG(fmt, args...)
 #endif
 
 struct open_disk {
 	struct ptable		*table;
-	off_t			mediasize;
+	uint64_t		mediasize;
+	uint64_t		entrysize;
 	u_int			sectorsize;
-	u_int			flags;
-	int			rcnt;
 };
 
 struct print_args {
 	struct disk_devdesc	*dev;
 	const char		*prefix;
 	int			verbose;
 };
 
-struct dentry {
-	const struct devsw	*d_dev;
-	int			d_unit;
-	int			d_slice;
-	int			d_partition;
-
-	struct open_disk	*od;
-	off_t			d_offset;
-	STAILQ_ENTRY(dentry)	entry;
-#ifdef DISK_DEBUG
-	uint32_t		count;
-#endif
-};
-
-static STAILQ_HEAD(, dentry) opened_disks =
-    STAILQ_HEAD_INITIALIZER(opened_disks);
-
-static int
-disk_lookup(struct disk_devdesc *dev)
-{
-	struct dentry *entry;
-	int rc;
-
-	rc = ENOENT;
-	STAILQ_FOREACH(entry, &opened_disks, entry) {
-		if (entry->d_dev != dev->d_dev ||
-		    entry->d_unit != dev->d_unit)
-			continue;
-		dev->d_opendata = entry->od;
-		if (entry->d_slice == dev->d_slice &&
-		    entry->d_partition == dev->d_partition) {
-			dev->d_offset = entry->d_offset;
-			DEBUG("%s offset %lld", disk_fmtdev(dev),
-			    (long long)dev->d_offset);
-#ifdef DISK_DEBUG
-			entry->count++;
-#endif
-			return (0);
-		}
-		rc = EAGAIN;
-	}
-	return (rc);
-}
-
-static void
-disk_insert(struct disk_devdesc *dev)
-{
-	struct dentry *entry;
-
-	entry = (struct dentry *)malloc(sizeof(struct dentry));
-	if (entry == NULL) {
-		DEBUG("no memory");
-		return;
-	}
-	entry->d_dev = dev->d_dev;
-	entry->d_unit = dev->d_unit;
-	entry->d_slice = dev->d_slice;
-	entry->d_partition = dev->d_partition;
-	entry->od = (struct open_disk *)dev->d_opendata;
-	entry->od->rcnt++;
-	entry->d_offset = dev->d_offset;
-#ifdef DISK_DEBUG
-	entry->count = 1;
-#endif
-	STAILQ_INSERT_TAIL(&opened_disks, entry, entry);
-	DEBUG("%s cached", disk_fmtdev(dev));
-}
-
-#ifdef DISK_DEBUG
-COMMAND_SET(dcachestat, "dcachestat", "get disk cache stats",
-    command_dcachestat);
-
-static int
-command_dcachestat(int argc, char *argv[])
-{
-	struct disk_devdesc dev;
-	struct dentry *entry;
-
-	STAILQ_FOREACH(entry, &opened_disks, entry) {
-		dev.d_dev = (struct devsw *)entry->d_dev;
-		dev.d_unit = entry->d_unit;
-		dev.d_slice = entry->d_slice;
-		dev.d_partition = entry->d_partition;
-		printf("%s %d => %p [%d]\n", disk_fmtdev(&dev), entry->count,
-		    entry->od, entry->od->rcnt);
-	}
-	return (CMD_OK);
-}
-#endif /* DISK_DEBUG */
-
 /* Convert size to a human-readable number. */
 static char *
 display_size(uint64_t size, u_int sectorsize)
 {
 	static char buf[80];
 	char unit;
 
 	size = size * sectorsize / 1024;
 	unit = 'K';
 	if (size >= 10485760000LL) {
 		size /= 1073741824;
 		unit = 'T';
 	} else if (size >= 10240000) {
 		size /= 1048576;
 		unit = 'G';
 	} else if (size >= 10000) {
 		size /= 1024;
 		unit = 'M';
 	}
 	sprintf(buf, "%ld%cB", (long)size, unit);
 	return (buf);
 }
 
 int
-ptblread(void *d, void *buf, size_t blocks, off_t offset)
+ptblread(void *d, void *buf, size_t blocks, uint64_t offset)
 {
 	struct disk_devdesc *dev;
 	struct open_disk *od;
 
 	dev = (struct disk_devdesc *)d;
 	od = (struct open_disk *)dev->d_opendata;
 	return (dev->d_dev->dv_strategy(dev, F_READ, offset,
 	    blocks * od->sectorsize, (char *)buf, NULL));
 }
 
 #define	PWIDTH	35
 static int
 ptable_print(void *arg, const char *pname, const struct ptable_entry *part)
 {
+	struct disk_devdesc dev;
 	struct print_args *pa, bsd;
 	struct open_disk *od;
 	struct ptable *table;
 	char line[80];
 	int res;
 
 	pa = (struct print_args *)arg;
 	od = (struct open_disk *)pa->dev->d_opendata;
 	sprintf(line, "  %s%s: %s", pa->prefix, pname,
 	    parttype2str(part->type));
 	if (pa->verbose)
 		sprintf(line, "%-*s%s", PWIDTH, line,
 		    display_size(part->end - part->start + 1,
 		    od->sectorsize));
 	strcat(line, "\n");
 	if (pager_output(line))
 		return 1;
 	res = 0;
 	if (part->type == PART_FREEBSD) {
 		/* Open slice with BSD label */
-		pa->dev->d_offset = part->start;
-		table = ptable_open(pa->dev, part->end - part->start + 1,
-		    od->sectorsize, ptblread);
-		if (table == NULL)
-			return 0;
-		sprintf(line, "  %s%s", pa->prefix, pname);
-		bsd.dev = pa->dev;
-		bsd.prefix = line;
-		bsd.verbose = pa->verbose;
-		res = ptable_iterate(table, &bsd, ptable_print);
-		ptable_close(table);
+		dev.d_dev = pa->dev->d_dev;
+		dev.d_unit = pa->dev->d_unit;
+		dev.d_slice = part->index;
+		dev.d_partition = -1;
+		if (disk_open(&dev, part->end - part->start + 1,
+		    od->sectorsize) == 0) {
+			table = ptable_open(&dev, part->end - part->start + 1,
+			    od->sectorsize, ptblread);
+			if (table != NULL) {
+				sprintf(line, "  %s%s", pa->prefix, pname);
+				bsd.dev = pa->dev;
+				bsd.prefix = line;
+				bsd.verbose = pa->verbose;
+				res = ptable_iterate(table, &bsd, ptable_print);
+				ptable_close(table);
+			}
+			disk_close(&dev);
+		}
 	}
 
 	return (res);
 }
 #undef PWIDTH
 
 int
 disk_print(struct disk_devdesc *dev, char *prefix, int verbose)
 {
 	struct open_disk *od;
 	struct print_args pa;
 
 	/* Disk should be opened */
 	od = (struct open_disk *)dev->d_opendata;
 	pa.dev = dev;
 	pa.prefix = prefix;
 	pa.verbose = verbose;
 	return (ptable_iterate(od->table, &pa, ptable_print));
 }
 
 int
-disk_read(struct disk_devdesc *dev, void *buf, off_t offset, u_int blocks)
+disk_read(struct disk_devdesc *dev, void *buf, uint64_t offset, u_int blocks)
 {
 	struct open_disk *od;
 	int ret;
 
 	od = (struct open_disk *)dev->d_opendata;
 	ret = dev->d_dev->dv_strategy(dev, F_READ, dev->d_offset + offset,
 	    blocks * od->sectorsize, buf, NULL);
 
 	return (ret);
 }
 
 int
-disk_write(struct disk_devdesc *dev, void *buf, off_t offset, u_int blocks)
+disk_write(struct disk_devdesc *dev, void *buf, uint64_t offset, u_int blocks)
 {
 	struct open_disk *od;
 	int ret;
 
 	od = (struct open_disk *)dev->d_opendata;
 	ret = dev->d_dev->dv_strategy(dev, F_WRITE, dev->d_offset + offset,
 	    blocks * od->sectorsize, buf, NULL);
 
 	return (ret);
 }
 
 int
-disk_ioctl(struct disk_devdesc *dev, u_long cmd, void *buf)
+disk_ioctl(struct disk_devdesc *dev, u_long cmd, void *data)
 {
+	struct open_disk *od = dev->d_opendata;
 
-	if (dev->d_dev->dv_ioctl)
-		return ((*dev->d_dev->dv_ioctl)(dev->d_opendata, cmd, buf));
+	if (od == NULL)
+		return (ENOTTY);
 
-	return (ENXIO);
+	switch (cmd) {
+	case DIOCGSECTORSIZE:
+		*(u_int *)data = od->sectorsize;
+		break;
+	case DIOCGMEDIASIZE:
+		if (dev->d_offset == 0)
+			*(uint64_t *)data = od->mediasize;
+		else
+			*(uint64_t *)data = od->entrysize * od->sectorsize;
+		break;
+	default:
+		return (ENOTTY);
+	}
+
+	return (0);
 }
 
 int
-disk_open(struct disk_devdesc *dev, off_t mediasize, u_int sectorsize,
-    u_int flags)
+disk_open(struct disk_devdesc *dev, uint64_t mediasize, u_int sectorsize)
 {
 	struct open_disk *od;
 	struct ptable *table;
 	struct ptable_entry part;
 	int rc, slice, partition;
 
 	rc = 0;
-	if ((flags & DISK_F_NOCACHE) == 0) {
-		rc = disk_lookup(dev);
-		if (rc == 0)
-			return (0);
-	}
 	/*
 	 * While we are reading disk metadata, make sure we do it relative
 	 * to the start of the disk
 	 */
 	dev->d_offset = 0;
 	table = NULL;
 	slice = dev->d_slice;
 	partition = dev->d_partition;
-	if (rc == EAGAIN) {
-		/*
-		 * This entire disk was already opened and there is no
-		 * need to allocate new open_disk structure and open the
-		 * main partition table.
-		 */
-		od = (struct open_disk *)dev->d_opendata;
-		DEBUG("%s unit %d, slice %d, partition %d => %p (cached)",
-		    disk_fmtdev(dev), dev->d_unit, dev->d_slice,
-		    dev->d_partition, od);
-		goto opened;
-	} else {
-		od = (struct open_disk *)malloc(sizeof(struct open_disk));
-		if (od == NULL) {
-			DEBUG("no memory");
-			return (ENOMEM);
-		}
-		dev->d_opendata = od;
-		od->rcnt = 0;
+	od = (struct open_disk *)malloc(sizeof(struct open_disk));
+	if (od == NULL) {
+		DEBUG("no memory");
+		return (ENOMEM);
 	}
+	dev->d_opendata = od;
+	od->entrysize = 0;
 	od->mediasize = mediasize;
 	od->sectorsize = sectorsize;
-	od->flags = flags;
 	DEBUG("%s unit %d, slice %d, partition %d => %p",
 	    disk_fmtdev(dev), dev->d_unit, dev->d_slice, dev->d_partition, od);
 
 	/* Determine disk layout. */
 	od->table = ptable_open(dev, mediasize / sectorsize, sectorsize,
 	    ptblread);
 	if (od->table == NULL) {
 		DEBUG("Can't read partition table");
 		rc = ENXIO;
 		goto out;
 	}
-opened:
-	rc = 0;
+
+	if (ptable_getsize(od->table, &mediasize) != 0) {
+		rc = ENXIO;
+		goto out;
+	}
+	if (mediasize > od->mediasize) {
+		od->mediasize = mediasize;
+	}
+
 	if (ptable_gettype(od->table) == PTABLE_BSD &&
 	    partition >= 0) {
 		/* It doesn't matter what value has d_slice */
 		rc = ptable_getpart(od->table, &part, partition);
-		if (rc == 0)
+		if (rc == 0) {
 			dev->d_offset = part.start;
+			od->entrysize = part.end - part.start + 1;
+		}
 	} else if (slice >= 0) {
 		/* Try to get information about partition */
 		if (slice == 0)
 			rc = ptable_getbestpart(od->table, &part);
 		else
 			rc = ptable_getpart(od->table, &part, slice);
 		if (rc != 0) /* Partition doesn't exist */
 			goto out;
 		dev->d_offset = part.start;
+		od->entrysize = part.end - part.start + 1;
 		slice = part.index;
 		if (ptable_gettype(od->table) == PTABLE_GPT) {
 			partition = 255;
 			goto out; /* Nothing more to do */
 		} else if (partition == 255) {
 			/*
 			 * When we try to open GPT partition, but partition
 			 * table isn't GPT, reset d_partition value to -1
 			 * and try to autodetect appropriate value.
 			 */
 			partition = -1;
 		}
 		/*
 		 * If d_partition < 0 and we are looking at a BSD slice,
 		 * then try to read BSD label, otherwise return the
 		 * whole MBR slice.
 		 */
 		if (partition == -1 &&
 		    part.type != PART_FREEBSD)
 			goto out;
 		/* Try to read BSD label */
 		table = ptable_open(dev, part.end - part.start + 1,
 		    od->sectorsize, ptblread);
 		if (table == NULL) {
 			DEBUG("Can't read BSD label");
 			rc = ENXIO;
 			goto out;
 		}
 		/*
 		 * If slice contains BSD label and d_partition < 0, then
 		 * assume the 'a' partition. Otherwise just return the
 		 * whole MBR slice, because it can contain ZFS.
 		 */
 		if (partition < 0) {
 			if (ptable_gettype(table) != PTABLE_BSD)
 				goto out;
 			partition = 0;
 		}
 		rc = ptable_getpart(table, &part, partition);
 		if (rc != 0)
 			goto out;
 		dev->d_offset += part.start;
+		od->entrysize = part.end - part.start + 1;
 	}
 out:
 	if (table != NULL)
 		ptable_close(table);
 
 	if (rc != 0) {
-		if (od->rcnt < 1) {
-			if (od->table != NULL)
-				ptable_close(od->table);
-			free(od);
-		}
+		if (od->table != NULL)
+			ptable_close(od->table);
+		free(od);
 		DEBUG("%s could not open", disk_fmtdev(dev));
 	} else {
-		if ((flags & DISK_F_NOCACHE) == 0)
-			disk_insert(dev);
 		/* Save the slice and partition number to the dev */
 		dev->d_slice = slice;
 		dev->d_partition = partition;
 		DEBUG("%s offset %lld => %p", disk_fmtdev(dev),
 		    (long long)dev->d_offset, od);
 	}
 	return (rc);
 }
 
 int
 disk_close(struct disk_devdesc *dev)
 {
 	struct open_disk *od;
 
 	od = (struct open_disk *)dev->d_opendata;
-	DEBUG("%s closed => %p [%d]", disk_fmtdev(dev), od, od->rcnt);
-	if (od->flags & DISK_F_NOCACHE) {
-		ptable_close(od->table);
-		free(od);
-	}
+	DEBUG("%s closed => %p", disk_fmtdev(dev), od);
+	ptable_close(od->table);
+	free(od);
 	return (0);
-}
-
-void
-disk_cleanup(const struct devsw *d_dev)
-{
-#ifdef DISK_DEBUG
-	struct disk_devdesc dev;
-#endif
-	struct dentry *entry, *tmp;
-
-	STAILQ_FOREACH_SAFE(entry, &opened_disks, entry, tmp) {
-		if (entry->d_dev != d_dev)
-			continue;
-		entry->od->rcnt--;
-#ifdef DISK_DEBUG
-		dev.d_dev = (struct devsw *)entry->d_dev;
-		dev.d_unit = entry->d_unit;
-		dev.d_slice = entry->d_slice;
-		dev.d_partition = entry->d_partition;
-		DEBUG("%s was freed => %p [%d]", disk_fmtdev(&dev),
-		    entry->od, entry->od->rcnt);
-#endif
-		STAILQ_REMOVE(&opened_disks, entry, dentry, entry);
-		if (entry->od->rcnt < 1) {
-			if (entry->od->table != NULL)
-				ptable_close(entry->od->table);
-			free(entry->od);
-		}
-		free(entry);
-	}
 }
 
 char*
 disk_fmtdev(struct disk_devdesc *dev)
 {
 	static char buf[128];
 	char *cp;
 
 	cp = buf + sprintf(buf, "%s%d", dev->d_dev->dv_name, dev->d_unit);
 	if (dev->d_slice >= 0) {
 #ifdef LOADER_GPT_SUPPORT
 		if (dev->d_partition == 255) {
 			sprintf(cp, "p%d:", dev->d_slice);
 			return (buf);
 		} else
 #endif
 #ifdef LOADER_MBR_SUPPORT
 			cp += sprintf(cp, "s%d", dev->d_slice);
 #endif
 	}
 	if (dev->d_partition >= 0)
 		cp += sprintf(cp, "%c", dev->d_partition + 'a');
 	strcat(cp, ":");
 	return (buf);
 }
 
 int
 disk_parsedev(struct disk_devdesc *dev, const char *devspec, const char **path)
 {
 	int unit, slice, partition;
 	const char *np;
 	char *cp;
 
 	np = devspec;
 	unit = slice = partition = -1;
 	if (*np != '\0' && *np != ':') {
 		unit = strtol(np, &cp, 10);
 		if (cp == np)
 			return (EUNIT);
 #ifdef LOADER_GPT_SUPPORT
 		if (*cp == 'p') {
 			np = cp + 1;
 			slice = strtol(np, &cp, 10);
 			if (np == cp)
 				return (ESLICE);
 			/* we don't support nested partitions on GPT */
 			if (*cp != '\0' && *cp != ':')
 				return (EINVAL);
 			partition = 255;
 		} else
 #endif
 #ifdef LOADER_MBR_SUPPORT
 		if (*cp == 's') {
 			np = cp + 1;
 			slice = strtol(np, &cp, 10);
 			if (np == cp)
 				return (ESLICE);
 		}
 #endif
 		if (*cp != '\0' && *cp != ':') {
 			partition = *cp - 'a';
 			if (partition < 0)
 				return (EPART);
 			cp++;
 		}
 	} else
 		return (EINVAL);
 
 	if (*cp != '\0' && *cp != ':')
 		return (EINVAL);
 	dev->d_unit = unit;
 	dev->d_slice = slice;
 	dev->d_partition = partition;
 	if (path != NULL)
 		*path = (*cp == '\0') ? cp: cp + 1;
 	return (0);
 }
Index: stable/11/sys/boot/common/disk.h
===================================================================
--- stable/11/sys/boot/common/disk.h	(revision 329098)
+++ stable/11/sys/boot/common/disk.h	(revision 329099)
@@ -1,119 +1,117 @@
 /*-
  * Copyright (c) 2011 Google, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Device descriptor for partitioned disks. To use, set the
  * d_slice and d_partition variables as follows:
  *
  * Whole disk access:
  *
  * 	d_slice = -1
  * 	d_partition = -1
  *
  * Whole MBR slice:
  *
  * 	d_slice = MBR slice number (typically 1..4)
  * 	d_partition = -1
  *
  * BSD disklabel partition within an MBR slice:
  *
  * 	d_slice = MBR slice number (typically 1..4)
  * 	d_partition = disklabel partition (typically 0..19)
  *
  * BSD disklabel partition on the true dedicated disk:
  *
  * 	d_slice = -1
  * 	d_partition = disklabel partition (typically 0..19)
  *
  * GPT partition:
  *
  * 	d_slice = GPT partition number (typically 1..N)
  * 	d_partition = 255
  *
  * For both MBR and GPT, to automatically find the 'best' slice or partition,
  * set d_slice to zero. This uses the partition type to decide which partition
  * to use according to the following list of preferences:
  *
  * 	FreeBSD (active)
  * 	FreeBSD (inactive)
  * 	Linux (active)
  * 	Linux (inactive)
  * 	DOS/Windows (active)
  * 	DOS/Windows (inactive)
  *
  * Active MBR slices (marked as bootable) are preferred over inactive. GPT
  * doesn't have the concept of active/inactive partitions. In both MBR and GPT,
  * if there are multiple slices/partitions of a given type, the first one
  * is chosen.
  *
  * The low-level disk device will typically call disk_open() from its open
  * method to interpret the disk partition tables according to the rules above.
  * This will initialize d_offset to the block offset of the start of the
  * selected partition - this offset should be added to the offset passed to
  * the device's strategy method.
  */
 
+#ifndef	_DISK_H
+#define	_DISK_H
+
 struct disk_devdesc
 {
 	struct devsw	*d_dev;
 	int		d_type;
 	int		d_unit;
 	void		*d_opendata;
 	int		d_slice;
 	int		d_partition;
-	off_t		d_offset;
+	uint64_t	d_offset;
 };
 
 enum disk_ioctl {
 	IOCTL_GET_BLOCKS,
 	IOCTL_GET_BLOCK_SIZE
 };
 
 /*
  * Parse disk metadata and initialise dev->d_offset.
  */
-extern int disk_open(struct disk_devdesc *dev, off_t mediasize,
-    u_int sectorsize, u_int flags);
-#define	DISK_F_NOCACHE	0x0001		/* Do not use metadata caching */
-extern int disk_close(struct disk_devdesc *dev);
-extern void disk_cleanup(const struct devsw *d_dev);
-extern int disk_ioctl(struct disk_devdesc *dev, u_long cmd, void *buf);
-extern int disk_read(struct disk_devdesc *dev, void *buf, off_t offset,
-    u_int blocks);
-extern int disk_write(struct disk_devdesc *dev, void *buf, off_t offset,
-    u_int blocks);
-extern int ptblread(void *d, void *buf, size_t blocks, off_t offset);
+extern int disk_open(struct disk_devdesc *, uint64_t, u_int);
+extern int disk_close(struct disk_devdesc *);
+extern int disk_ioctl(struct disk_devdesc *, u_long, void *);
+extern int disk_read(struct disk_devdesc *, void *, uint64_t, u_int);
+extern int disk_write(struct disk_devdesc *, void *, uint64_t, u_int);
+extern int ptblread(void *, void *, size_t, uint64_t);
 
 /*
  * Print information about slices on a disk.
  */
-extern int disk_print(struct disk_devdesc *dev, char *prefix, int verbose);
-extern char* disk_fmtdev(struct disk_devdesc *dev);
-extern int disk_parsedev(struct disk_devdesc *dev, const char *devspec,
-    const char **path);
+extern int disk_print(struct disk_devdesc *, char *, int);
+extern char* disk_fmtdev(struct disk_devdesc *);
+extern int disk_parsedev(struct disk_devdesc *, const char *, const char **);
 
+#endif	/* _DISK_H */
Index: stable/11/sys/boot/common/ls.c
===================================================================
--- stable/11/sys/boot/common/ls.c	(revision 329098)
+++ stable/11/sys/boot/common/ls.c	(revision 329099)
@@ -1,187 +1,212 @@
 /*
  * $NetBSD: ls.c,v 1.3 1997/06/13 13:48:47 drochner Exp $
  */
 
 /*-
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (c) 1996
  *	Matthias Drochner.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 
 #include <stand.h>
 #include <string.h>
 
 #include "bootstrap.h"
 
 static char typestr[] = "?fc?d?b? ?l?s?w";
 
 static int	ls_getdir(char **pathp);
 
 COMMAND_SET(ls, "ls", "list files", command_ls);
 
 static int
 command_ls(int argc, char *argv[])
 {
     int		fd;
     struct stat	sb;
-    struct 	dirent *d;
+    struct	dirent *d;
     char	*buf, *path;
     char	lbuf[128];		/* one line */
     int		result, ch;
     int		verbose;
-	
+
     result = CMD_OK;
     fd = -1;
     verbose = 0;
     optind = 1;
     optreset = 1;
     while ((ch = getopt(argc, argv, "l")) != -1) {
-	switch(ch) {
+	switch (ch) {
 	case 'l':
 	    verbose = 1;
 	    break;
 	case '?':
 	default:
 	    /* getopt has already reported an error */
-	    return(CMD_OK);
+	    return (CMD_OK);
 	}
     }
     argv += (optind - 1);
     argc -= (optind - 1);
 
     if (argc < 2) {
 	path = "";
     } else {
 	path = argv[1];
     }
 
+    if (stat(path, &sb) == 0 && !S_ISDIR(sb.st_mode)) {
+	if (verbose) {
+	    printf(" %c %8d %s\n",
+		typestr[sb.st_mode >> 12],
+		(int)sb.st_size, path);
+	} else {
+	    printf(" %c  %s\n",
+		typestr[sb.st_mode >> 12], path);
+	}
+	return (CMD_OK);
+    }
+
     fd = ls_getdir(&path);
     if (fd == -1) {
 	result = CMD_ERROR;
 	goto out;
     }
     pager_open();
     pager_output(path);
     pager_output("\n");
 
     while ((d = readdirfd(fd)) != NULL) {
 	if (strcmp(d->d_name, ".") && strcmp(d->d_name, "..")) {
-	    if (verbose) {
+	    if (d->d_type == 0 || verbose) {
 		/* stat the file, if possible */
 		sb.st_size = 0;
+		sb.st_mode = 0;
 		buf = malloc(strlen(path) + strlen(d->d_name) + 2);
-		sprintf(buf, "%s/%s", path, d->d_name);
-		/* ignore return, could be symlink, etc. */
-		if (stat(buf, &sb))
-		    sb.st_size = 0;
-		free(buf);
-		sprintf(lbuf, " %c %8d %s\n", typestr[d->d_type],
+		if (buf != NULL) {
+		    sprintf(buf, "%s/%s", path, d->d_name);
+		    /* ignore return, could be symlink, etc. */
+		    if (stat(buf, &sb)) {
+			sb.st_size = 0;
+			sb.st_mode = 0;
+		    }
+		    free(buf);
+		}
+	    }
+	    if (verbose) {
+		snprintf(lbuf, sizeof(lbuf), " %c %8d %s\n",
+		    typestr[d->d_type? d->d_type:sb.st_mode >> 12],
 		    (int)sb.st_size, d->d_name);
 	    } else {
-		sprintf(lbuf, " %c  %s\n", typestr[d->d_type], d->d_name);
+		snprintf(lbuf, sizeof(lbuf), " %c  %s\n",
+		    typestr[d->d_type? d->d_type:sb.st_mode >> 12], d->d_name);
 	    }
 	    if (pager_output(lbuf))
 		goto out;
 	}
     }
  out:
     pager_close();
     if (fd != -1)
 	close(fd);
-    if (path != NULL)
-	free(path);
-    return(result);
+    free(path);		/* ls_getdir() did allocate path */
+    return (result);
 }
 
 /*
  * Given (path) containing a vaguely reasonable path specification, return an fd
  * on the directory, and an allocated copy of the path to the directory.
  */
 static int
 ls_getdir(char **pathp)
 {
     struct stat	sb;
     int		fd;
     const char	*cp;
     char	*path;
     
     fd = -1;
 
     /* one extra byte for a possible trailing slash required */
     path = malloc(strlen(*pathp) + 2);
+    if (path == NULL) {
+	snprintf(command_errbuf, sizeof (command_errbuf),
+	    "out of memory");
+	goto out;
+    }
     strcpy(path, *pathp);
 
     /* Make sure the path is respectable to begin with */
     if (archsw.arch_getdev(NULL, path, &cp)) {
 	snprintf(command_errbuf, sizeof(command_errbuf),
 	    "bad path '%s'", path);
 	goto out;
     }
-    
+
     /* If there's no path on the device, assume '/' */
     if (*cp == 0)
 	strcat(path, "/");
 
     fd = open(path, O_RDONLY);
     if (fd < 0) {
 	snprintf(command_errbuf, sizeof(command_errbuf),
 	    "open '%s' failed: %s", path, strerror(errno));
 	goto out;
     }
     if (fstat(fd, &sb) < 0) {
 	snprintf(command_errbuf, sizeof(command_errbuf),
 	    "stat failed: %s", strerror(errno));
 	goto out;
     }
     if (!S_ISDIR(sb.st_mode)) {
 	snprintf(command_errbuf, sizeof(command_errbuf),
 	    "%s: %s", path, strerror(ENOTDIR));
 	goto out;
     }
 
     *pathp = path;
-    return(fd);
+    return (fd);
 
  out:
     free(path);
     *pathp = NULL;
     if (fd != -1)
 	close(fd);
-    return(-1);
+    return (-1);
 }
Index: stable/11/sys/boot/common/part.c
===================================================================
--- stable/11/sys/boot/common/part.c	(revision 329098)
+++ stable/11/sys/boot/common/part.c	(revision 329099)
@@ -1,865 +1,898 @@
 /*-
  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 #include <sys/param.h>
 #include <sys/diskmbr.h>
 #include <sys/disklabel.h>
 #include <sys/endian.h>
 #include <sys/gpt.h>
 #include <sys/stddef.h>
 #include <sys/queue.h>
 #include <sys/vtoc.h>
 
 #include <crc32.h>
 #include <part.h>
 #include <uuid.h>
 
 #ifdef PART_DEBUG
 #define	DEBUG(fmt, args...) printf("%s: " fmt "\n" , __func__ , ## args)
 #else
 #define	DEBUG(fmt, args...)
 #endif
 
 #ifdef LOADER_GPT_SUPPORT
 #define	MAXTBLSZ	64
 static const uuid_t gpt_uuid_unused = GPT_ENT_TYPE_UNUSED;
 static const uuid_t gpt_uuid_ms_basic_data = GPT_ENT_TYPE_MS_BASIC_DATA;
 static const uuid_t gpt_uuid_freebsd_ufs = GPT_ENT_TYPE_FREEBSD_UFS;
 static const uuid_t gpt_uuid_efi = GPT_ENT_TYPE_EFI;
 static const uuid_t gpt_uuid_freebsd = GPT_ENT_TYPE_FREEBSD;
 static const uuid_t gpt_uuid_freebsd_boot = GPT_ENT_TYPE_FREEBSD_BOOT;
 static const uuid_t gpt_uuid_freebsd_nandfs = GPT_ENT_TYPE_FREEBSD_NANDFS;
 static const uuid_t gpt_uuid_freebsd_swap = GPT_ENT_TYPE_FREEBSD_SWAP;
 static const uuid_t gpt_uuid_freebsd_zfs = GPT_ENT_TYPE_FREEBSD_ZFS;
 static const uuid_t gpt_uuid_freebsd_vinum = GPT_ENT_TYPE_FREEBSD_VINUM;
 #endif
 
 struct pentry {
 	struct ptable_entry	part;
 	uint64_t		flags;
 	union {
 		uint8_t bsd;
 		uint8_t	mbr;
 		uuid_t	gpt;
 		uint16_t vtoc8;
 	} type;
 	STAILQ_ENTRY(pentry)	entry;
 };
 
 struct ptable {
 	enum ptable_type	type;
 	uint16_t		sectorsize;
 	uint64_t		sectors;
 
 	STAILQ_HEAD(, pentry)	entries;
 };
 
 static struct parttypes {
 	enum partition_type	type;
 	const char		*desc;
 } ptypes[] = {
 	{ PART_UNKNOWN,		"Unknown" },
 	{ PART_EFI,		"EFI" },
 	{ PART_FREEBSD,		"FreeBSD" },
 	{ PART_FREEBSD_BOOT,	"FreeBSD boot" },
 	{ PART_FREEBSD_NANDFS,	"FreeBSD nandfs" },
 	{ PART_FREEBSD_UFS,	"FreeBSD UFS" },
 	{ PART_FREEBSD_ZFS,	"FreeBSD ZFS" },
 	{ PART_FREEBSD_SWAP,	"FreeBSD swap" },
 	{ PART_FREEBSD_VINUM,	"FreeBSD vinum" },
 	{ PART_LINUX,		"Linux" },
 	{ PART_LINUX_SWAP,	"Linux swap" },
 	{ PART_DOS,		"DOS/Windows" },
 };
 
 const char *
 parttype2str(enum partition_type type)
 {
 	size_t i;
 
 	for (i = 0; i < nitems(ptypes); i++)
 		if (ptypes[i].type == type)
 			return (ptypes[i].desc);
 	return (ptypes[0].desc);
 }
 
 #ifdef LOADER_GPT_SUPPORT
 static void
 uuid_letoh(uuid_t *uuid)
 {
 
 	uuid->time_low = le32toh(uuid->time_low);
 	uuid->time_mid = le16toh(uuid->time_mid);
 	uuid->time_hi_and_version = le16toh(uuid->time_hi_and_version);
 }
 
 static enum partition_type
 gpt_parttype(uuid_t type)
 {
 
 	if (uuid_equal(&type, &gpt_uuid_efi, NULL))
 		return (PART_EFI);
 	else if (uuid_equal(&type, &gpt_uuid_ms_basic_data, NULL))
 		return (PART_DOS);
 	else if (uuid_equal(&type, &gpt_uuid_freebsd_boot, NULL))
 		return (PART_FREEBSD_BOOT);
 	else if (uuid_equal(&type, &gpt_uuid_freebsd_ufs, NULL))
 		return (PART_FREEBSD_UFS);
 	else if (uuid_equal(&type, &gpt_uuid_freebsd_zfs, NULL))
 		return (PART_FREEBSD_ZFS);
 	else if (uuid_equal(&type, &gpt_uuid_freebsd_swap, NULL))
 		return (PART_FREEBSD_SWAP);
 	else if (uuid_equal(&type, &gpt_uuid_freebsd_vinum, NULL))
 		return (PART_FREEBSD_VINUM);
 	else if (uuid_equal(&type, &gpt_uuid_freebsd_nandfs, NULL))
 		return (PART_FREEBSD_NANDFS);
 	else if (uuid_equal(&type, &gpt_uuid_freebsd, NULL))
 		return (PART_FREEBSD);
 	return (PART_UNKNOWN);
 }
 
 static struct gpt_hdr*
 gpt_checkhdr(struct gpt_hdr *hdr, uint64_t lba_self, uint64_t lba_last,
     uint16_t sectorsize)
 {
 	uint32_t sz, crc;
 
 	if (memcmp(hdr->hdr_sig, GPT_HDR_SIG, sizeof(hdr->hdr_sig)) != 0) {
 		DEBUG("no GPT signature");
 		return (NULL);
 	}
 	sz = le32toh(hdr->hdr_size);
 	if (sz < 92 || sz > sectorsize) {
 		DEBUG("invalid GPT header size: %d", sz);
 		return (NULL);
 	}
 	crc = le32toh(hdr->hdr_crc_self);
 	hdr->hdr_crc_self = 0;
 	if (crc32(hdr, sz) != crc) {
 		DEBUG("GPT header's CRC doesn't match");
 		return (NULL);
 	}
 	hdr->hdr_crc_self = crc;
 	hdr->hdr_revision = le32toh(hdr->hdr_revision);
 	if (hdr->hdr_revision < GPT_HDR_REVISION) {
 		DEBUG("unsupported GPT revision %d", hdr->hdr_revision);
 		return (NULL);
 	}
 	hdr->hdr_lba_self = le64toh(hdr->hdr_lba_self);
 	if (hdr->hdr_lba_self != lba_self) {
 		DEBUG("self LBA doesn't match");
 		return (NULL);
 	}
 	hdr->hdr_lba_alt = le64toh(hdr->hdr_lba_alt);
 	if (hdr->hdr_lba_alt == hdr->hdr_lba_self) {
 		DEBUG("invalid alternate LBA");
 		return (NULL);
 	}
 	hdr->hdr_entries = le32toh(hdr->hdr_entries);
 	hdr->hdr_entsz = le32toh(hdr->hdr_entsz);
 	if (hdr->hdr_entries == 0 ||
 	    hdr->hdr_entsz < sizeof(struct gpt_ent) ||
 	    sectorsize % hdr->hdr_entsz != 0) {
 		DEBUG("invalid entry size or number of entries");
 		return (NULL);
 	}
 	hdr->hdr_lba_start = le64toh(hdr->hdr_lba_start);
 	hdr->hdr_lba_end = le64toh(hdr->hdr_lba_end);
 	hdr->hdr_lba_table = le64toh(hdr->hdr_lba_table);
 	hdr->hdr_crc_table = le32toh(hdr->hdr_crc_table);
 	uuid_letoh(&hdr->hdr_uuid);
 	return (hdr);
 }
 
 static int
 gpt_checktbl(const struct gpt_hdr *hdr, u_char *tbl, size_t size,
     uint64_t lba_last)
 {
 	struct gpt_ent *ent;
 	uint32_t i, cnt;
 
 	cnt = size / hdr->hdr_entsz;
 	if (hdr->hdr_entries <= cnt) {
 		cnt = hdr->hdr_entries;
 		/* Check CRC only when buffer size is enough for table. */
 		if (hdr->hdr_crc_table !=
 		    crc32(tbl, hdr->hdr_entries * hdr->hdr_entsz)) {
 			DEBUG("GPT table's CRC doesn't match");
 			return (-1);
 		}
 	}
 	for (i = 0; i < cnt; i++) {
 		ent = (struct gpt_ent *)(tbl + i * hdr->hdr_entsz);
 		uuid_letoh(&ent->ent_type);
 		if (uuid_equal(&ent->ent_type, &gpt_uuid_unused, NULL))
 			continue;
 		ent->ent_lba_start = le64toh(ent->ent_lba_start);
 		ent->ent_lba_end = le64toh(ent->ent_lba_end);
 	}
 	return (0);
 }
 
 static struct ptable*
 ptable_gptread(struct ptable *table, void *dev, diskread_t dread)
 {
 	struct pentry *entry;
 	struct gpt_hdr *phdr, hdr;
 	struct gpt_ent *ent;
 	u_char *buf, *tbl;
 	uint64_t offset;
 	int pri, sec;
 	size_t size, i;
 
 	buf = malloc(table->sectorsize);
 	if (buf == NULL)
 		return (NULL);
 	tbl = malloc(table->sectorsize * MAXTBLSZ);
 	if (tbl == NULL) {
 		free(buf);
 		return (NULL);
 	}
 	/* Read the primary GPT header. */
 	if (dread(dev, buf, 1, 1) != 0) {
 		ptable_close(table);
 		table = NULL;
 		goto out;
 	}
 	pri = sec = 0;
 	/* Check the primary GPT header. */
 	phdr = gpt_checkhdr((struct gpt_hdr *)buf, 1, table->sectors - 1,
 	    table->sectorsize);
 	if (phdr != NULL) {
 		/* Read the primary GPT table. */
 		size = MIN(MAXTBLSZ,
 		    howmany(phdr->hdr_entries * phdr->hdr_entsz,
 		        table->sectorsize));
 		if (dread(dev, tbl, size, phdr->hdr_lba_table) == 0 &&
 		    gpt_checktbl(phdr, tbl, size * table->sectorsize,
 		    table->sectors - 1) == 0) {
 			memcpy(&hdr, phdr, sizeof(hdr));
 			pri = 1;
 		}
 	}
 	offset = pri ? hdr.hdr_lba_alt: table->sectors - 1;
 	/* Read the backup GPT header. */
 	if (dread(dev, buf, 1, offset) != 0)
 		phdr = NULL;
 	else
 		phdr = gpt_checkhdr((struct gpt_hdr *)buf, offset,
 		    table->sectors - 1, table->sectorsize);
 	if (phdr != NULL) {
 		/*
 		 * Compare primary and backup headers.
 		 * If they are equal, then we do not need to read backup
 		 * table. If they are different, then prefer backup header
 		 * and try to read backup table.
 		 */
 		if (pri == 0 ||
 		    uuid_equal(&hdr.hdr_uuid, &phdr->hdr_uuid, NULL) == 0 ||
 		    hdr.hdr_revision != phdr->hdr_revision ||
 		    hdr.hdr_size != phdr->hdr_size ||
 		    hdr.hdr_lba_start != phdr->hdr_lba_start ||
 		    hdr.hdr_lba_end != phdr->hdr_lba_end ||
 		    hdr.hdr_entries != phdr->hdr_entries ||
 		    hdr.hdr_entsz != phdr->hdr_entsz ||
 		    hdr.hdr_crc_table != phdr->hdr_crc_table) {
 			/* Read the backup GPT table. */
 			size = MIN(MAXTBLSZ,
 				   howmany(phdr->hdr_entries * phdr->hdr_entsz,
 				       table->sectorsize));
 			if (dread(dev, tbl, size, phdr->hdr_lba_table) == 0 &&
 			    gpt_checktbl(phdr, tbl, size * table->sectorsize,
 			    table->sectors - 1) == 0) {
 				memcpy(&hdr, phdr, sizeof(hdr));
 				sec = 1;
 			}
 		}
 	}
 	if (pri == 0 && sec == 0) {
 		/* Both primary and backup tables are invalid. */
 		table->type = PTABLE_NONE;
 		goto out;
 	}
 	DEBUG("GPT detected");
 	size = MIN(hdr.hdr_entries * hdr.hdr_entsz,
 	    MAXTBLSZ * table->sectorsize);
+
+	/*
+	 * If the disk's sector count is smaller than the sector count recorded
+	 * in the disk's GPT table header, set the table->sectors to the value
+	 * recorded in GPT tables. This is done to work around buggy firmware
+	 * that returns truncated disk sizes.
+	 *
+	 * Note, this is still not a foolproof way to get disk's size. For
+	 * example, an image file can be truncated when copied to smaller media.
+	 */
+	if (hdr.hdr_lba_alt + 1 > table->sectors)
+		table->sectors = hdr.hdr_lba_alt + 1;
+
 	for (i = 0; i < size / hdr.hdr_entsz; i++) {
 		ent = (struct gpt_ent *)(tbl + i * hdr.hdr_entsz);
 		if (uuid_equal(&ent->ent_type, &gpt_uuid_unused, NULL))
 			continue;
+
+		/* Simple sanity checks. */
+		if (ent->ent_lba_start < hdr.hdr_lba_start ||
+		    ent->ent_lba_end > hdr.hdr_lba_end ||
+		    ent->ent_lba_start > ent->ent_lba_end)
+			continue;
+
 		entry = malloc(sizeof(*entry));
 		if (entry == NULL)
 			break;
 		entry->part.start = ent->ent_lba_start;
 		entry->part.end = ent->ent_lba_end;
 		entry->part.index = i + 1;
 		entry->part.type = gpt_parttype(ent->ent_type);
 		entry->flags = le64toh(ent->ent_attr);
 		memcpy(&entry->type.gpt, &ent->ent_type, sizeof(uuid_t));
 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
 		DEBUG("new GPT partition added");
 	}
 out:
 	free(buf);
 	free(tbl);
 	return (table);
 }
 #endif /* LOADER_GPT_SUPPORT */
 
 #ifdef LOADER_MBR_SUPPORT
 /* We do not need to support too many EBR partitions in the loader */
 #define	MAXEBRENTRIES		8
 static enum partition_type
 mbr_parttype(uint8_t type)
 {
 
 	switch (type) {
 	case DOSPTYP_386BSD:
 		return (PART_FREEBSD);
 	case DOSPTYP_LINSWP:
 		return (PART_LINUX_SWAP);
 	case DOSPTYP_LINUX:
 		return (PART_LINUX);
 	case 0x01:
 	case 0x04:
 	case 0x06:
 	case 0x07:
 	case 0x0b:
 	case 0x0c:
 	case 0x0e:
 		return (PART_DOS);
 	}
 	return (PART_UNKNOWN);
 }
 
 static struct ptable*
 ptable_ebrread(struct ptable *table, void *dev, diskread_t dread)
 {
 	struct dos_partition *dp;
 	struct pentry *e1, *entry;
 	uint32_t start, end, offset;
 	u_char *buf;
 	int i, index;
 
 	STAILQ_FOREACH(e1, &table->entries, entry) {
 		if (e1->type.mbr == DOSPTYP_EXT ||
 		    e1->type.mbr == DOSPTYP_EXTLBA)
 			break;
 	}
 	if (e1 == NULL)
 		return (table);
 	index = 5;
 	offset = e1->part.start;
 	buf = malloc(table->sectorsize);
 	if (buf == NULL)
 		return (table);
 	DEBUG("EBR detected");
 	for (i = 0; i < MAXEBRENTRIES; i++) {
 #if 0	/* Some BIOSes return an incorrect number of sectors */
 		if (offset >= table->sectors)
 			break;
 #endif
 		if (dread(dev, buf, 1, offset) != 0)
 			break;
 		dp = (struct dos_partition *)(buf + DOSPARTOFF);
 		if (dp[0].dp_typ == 0)
 			break;
 		start = le32toh(dp[0].dp_start);
 		if (dp[0].dp_typ == DOSPTYP_EXT &&
 		    dp[1].dp_typ == 0) {
 			offset = e1->part.start + start;
 			continue;
 		}
 		end = le32toh(dp[0].dp_size);
 		entry = malloc(sizeof(*entry));
 		if (entry == NULL)
 			break;
 		entry->part.start = offset + start;
 		entry->part.end = entry->part.start + end - 1;
 		entry->part.index = index++;
 		entry->part.type = mbr_parttype(dp[0].dp_typ);
 		entry->flags = dp[0].dp_flag;
 		entry->type.mbr = dp[0].dp_typ;
 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
 		DEBUG("new EBR partition added");
 		if (dp[1].dp_typ == 0)
 			break;
 		offset = e1->part.start + le32toh(dp[1].dp_start);
 	}
 	free(buf);
 	return (table);
 }
 #endif /* LOADER_MBR_SUPPORT */
 
 static enum partition_type
 bsd_parttype(uint8_t type)
 {
 
 	switch (type) {
 	case FS_NANDFS:
 		return (PART_FREEBSD_NANDFS);
 	case FS_SWAP:
 		return (PART_FREEBSD_SWAP);
 	case FS_BSDFFS:
 		return (PART_FREEBSD_UFS);
 	case FS_VINUM:
 		return (PART_FREEBSD_VINUM);
 	case FS_ZFS:
 		return (PART_FREEBSD_ZFS);
 	}
 	return (PART_UNKNOWN);
 }
 
 static struct ptable*
 ptable_bsdread(struct ptable *table, void *dev, diskread_t dread)
 {
 	struct disklabel *dl;
 	struct partition *part;
 	struct pentry *entry;
 	u_char *buf;
 	uint32_t raw_offset;
 	int i;
 
 	if (table->sectorsize < sizeof(struct disklabel)) {
 		DEBUG("Too small sectorsize");
 		return (table);
 	}
 	buf = malloc(table->sectorsize);
 	if (buf == NULL)
 		return (table);
 	if (dread(dev, buf, 1, 1) != 0) {
 		DEBUG("read failed");
 		ptable_close(table);
 		table = NULL;
 		goto out;
 	}
 	dl = (struct disklabel *)buf;
 	if (le32toh(dl->d_magic) != DISKMAGIC &&
 	    le32toh(dl->d_magic2) != DISKMAGIC)
 		goto out;
 	if (le32toh(dl->d_secsize) != table->sectorsize) {
 		DEBUG("unsupported sector size");
 		goto out;
 	}
 	dl->d_npartitions = le16toh(dl->d_npartitions);
 	if (dl->d_npartitions > 20 || dl->d_npartitions < 8) {
 		DEBUG("invalid number of partitions");
 		goto out;
 	}
 	DEBUG("BSD detected");
 	part = &dl->d_partitions[0];
 	raw_offset = le32toh(part[RAW_PART].p_offset);
 	for (i = 0; i < dl->d_npartitions; i++, part++) {
 		if (i == RAW_PART)
 			continue;
 		if (part->p_size == 0)
 			continue;
 		entry = malloc(sizeof(*entry));
 		if (entry == NULL)
 			break;
 		entry->part.start = le32toh(part->p_offset) - raw_offset;
 		entry->part.end = entry->part.start +
 		    le32toh(part->p_size) - 1;
 		entry->part.type = bsd_parttype(part->p_fstype);
 		entry->part.index = i; /* starts from zero */
 		entry->type.bsd = part->p_fstype;
 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
 		DEBUG("new BSD partition added");
 	}
 	table->type = PTABLE_BSD;
 out:
 	free(buf);
 	return (table);
 }
 
 #ifdef LOADER_VTOC8_SUPPORT
 static enum partition_type
 vtoc8_parttype(uint16_t type)
 {
 
 	switch (type) {
 	case VTOC_TAG_FREEBSD_NANDFS:
 		return (PART_FREEBSD_NANDFS);
 	case VTOC_TAG_FREEBSD_SWAP:
 		return (PART_FREEBSD_SWAP);
 	case VTOC_TAG_FREEBSD_UFS:
 		return (PART_FREEBSD_UFS);
 	case VTOC_TAG_FREEBSD_VINUM:
 		return (PART_FREEBSD_VINUM);
 	case VTOC_TAG_FREEBSD_ZFS:
 		return (PART_FREEBSD_ZFS);
 	}
 	return (PART_UNKNOWN);
 }
 
 static struct ptable*
 ptable_vtoc8read(struct ptable *table, void *dev, diskread_t dread)
 {
 	struct pentry *entry;
 	struct vtoc8 *dl;
 	u_char *buf;
 	uint16_t sum, heads, sectors;
 	int i;
 
 	if (table->sectorsize != sizeof(struct vtoc8))
 		return (table);
 	buf = malloc(table->sectorsize);
 	if (buf == NULL)
 		return (table);
 	if (dread(dev, buf, 1, 0) != 0) {
 		DEBUG("read failed");
 		ptable_close(table);
 		table = NULL;
 		goto out;
 	}
 	dl = (struct vtoc8 *)buf;
 	/* Check the sum */
 	for (i = sum = 0; i < sizeof(struct vtoc8); i += sizeof(sum))
 		sum ^= be16dec(buf + i);
 	if (sum != 0) {
 		DEBUG("incorrect checksum");
 		goto out;
 	}
 	if (be16toh(dl->nparts) != VTOC8_NPARTS) {
 		DEBUG("invalid number of entries");
 		goto out;
 	}
 	sectors = be16toh(dl->nsecs);
 	heads = be16toh(dl->nheads);
 	if (sectors * heads == 0) {
 		DEBUG("invalid geometry");
 		goto out;
 	}
 	DEBUG("VTOC8 detected");
 	for (i = 0; i < VTOC8_NPARTS; i++) {
 		dl->part[i].tag = be16toh(dl->part[i].tag);
 		if (i == VTOC_RAW_PART ||
 		    dl->part[i].tag == VTOC_TAG_UNASSIGNED)
 			continue;
 		entry = malloc(sizeof(*entry));
 		if (entry == NULL)
 			break;
 		entry->part.start = be32toh(dl->map[i].cyl) * heads * sectors;
 		entry->part.end = be32toh(dl->map[i].nblks) +
 		    entry->part.start - 1;
 		entry->part.type = vtoc8_parttype(dl->part[i].tag);
 		entry->part.index = i; /* starts from zero */
 		entry->type.vtoc8 = dl->part[i].tag;
 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
 		DEBUG("new VTOC8 partition added");
 	}
 	table->type = PTABLE_VTOC8;
 out:
 	free(buf);
 	return (table);
 
 }
 #endif /* LOADER_VTOC8_SUPPORT */
 
 struct ptable*
-ptable_open(void *dev, off_t sectors, uint16_t sectorsize,
+ptable_open(void *dev, uint64_t sectors, uint16_t sectorsize,
     diskread_t *dread)
 {
 	struct dos_partition *dp;
 	struct ptable *table;
 	u_char *buf;
 	int i, count;
 #ifdef LOADER_MBR_SUPPORT
 	struct pentry *entry;
 	uint32_t start, end;
 	int has_ext;
 #endif
 	table = NULL;
 	buf = malloc(sectorsize);
 	if (buf == NULL)
 		return (NULL);
 	/* First, read the MBR. */
 	if (dread(dev, buf, 1, DOSBBSECTOR) != 0) {
 		DEBUG("read failed");
 		goto out;
 	}
 
 	table = malloc(sizeof(*table));
 	if (table == NULL)
 		goto out;
 	table->sectors = sectors;
 	table->sectorsize = sectorsize;
 	table->type = PTABLE_NONE;
 	STAILQ_INIT(&table->entries);
 
 #ifdef LOADER_VTOC8_SUPPORT
 	if (be16dec(buf + offsetof(struct vtoc8, magic)) == VTOC_MAGIC) {
 		if (ptable_vtoc8read(table, dev, dread) == NULL) {
 			/* Read error. */
 			table = NULL;
 			goto out;
 		} else if (table->type == PTABLE_VTOC8)
 			goto out;
 	}
 #endif
 	/* Check the BSD label. */
 	if (ptable_bsdread(table, dev, dread) == NULL) { /* Read error. */
 		table = NULL;
 		goto out;
 	} else if (table->type == PTABLE_BSD)
 		goto out;
 
 #if defined(LOADER_GPT_SUPPORT) || defined(LOADER_MBR_SUPPORT)
 	/* Check the MBR magic. */
 	if (buf[DOSMAGICOFFSET] != 0x55 ||
 	    buf[DOSMAGICOFFSET + 1] != 0xaa) {
 		DEBUG("magic sequence not found");
 #if defined(LOADER_GPT_SUPPORT)
 		/* There is no PMBR, check that we have backup GPT */
 		table->type = PTABLE_GPT;
 		table = ptable_gptread(table, dev, dread);
 #endif
 		goto out;
 	}
 	/* Check that we have PMBR. Also do some validation. */
 	dp = (struct dos_partition *)(buf + DOSPARTOFF);
 	for (i = 0, count = 0; i < NDOSPART; i++) {
 		if (dp[i].dp_flag != 0 && dp[i].dp_flag != 0x80) {
 			DEBUG("invalid partition flag %x", dp[i].dp_flag);
 			goto out;
 		}
 #ifdef LOADER_GPT_SUPPORT
 		if (dp[i].dp_typ == DOSPTYP_PMBR) {
 			table->type = PTABLE_GPT;
 			DEBUG("PMBR detected");
 		}
 #endif
 		if (dp[i].dp_typ != 0)
 			count++;
 	}
 	/* Do we have some invalid values? */
 	if (table->type == PTABLE_GPT && count > 1) {
 		if (dp[1].dp_typ != DOSPTYP_HFS) {
 			table->type = PTABLE_NONE;
 			DEBUG("Incorrect PMBR, ignore it");
 		} else {
 			DEBUG("Bootcamp detected");
 		}
 	}
 #ifdef LOADER_GPT_SUPPORT
 	if (table->type == PTABLE_GPT) {
 		table = ptable_gptread(table, dev, dread);
 		goto out;
 	}
 #endif
 #ifdef LOADER_MBR_SUPPORT
 	/* Read MBR. */
 	DEBUG("MBR detected");
 	table->type = PTABLE_MBR;
 	for (i = has_ext = 0; i < NDOSPART; i++) {
 		if (dp[i].dp_typ == 0)
 			continue;
 		start = le32dec(&(dp[i].dp_start));
 		end = le32dec(&(dp[i].dp_size));
 		if (start == 0 || end == 0)
 			continue;
 #if 0	/* Some BIOSes return an incorrect number of sectors */
 		if (start + end - 1 >= sectors)
 			continue;	/* XXX: ignore */
 #endif
 		if (dp[i].dp_typ == DOSPTYP_EXT ||
 		    dp[i].dp_typ == DOSPTYP_EXTLBA)
 			has_ext = 1;
 		entry = malloc(sizeof(*entry));
 		if (entry == NULL)
 			break;
 		entry->part.start = start;
 		entry->part.end = start + end - 1;
 		entry->part.index = i + 1;
 		entry->part.type = mbr_parttype(dp[i].dp_typ);
 		entry->flags = dp[i].dp_flag;
 		entry->type.mbr = dp[i].dp_typ;
 		STAILQ_INSERT_TAIL(&table->entries, entry, entry);
 		DEBUG("new MBR partition added");
 	}
 	if (has_ext) {
 		table = ptable_ebrread(table, dev, dread);
 		/* FALLTHROUGH */
 	}
 #endif /* LOADER_MBR_SUPPORT */
 #endif /* LOADER_MBR_SUPPORT || LOADER_GPT_SUPPORT */
 out:
 	free(buf);
 	return (table);
 }
 
 void
 ptable_close(struct ptable *table)
 {
 	struct pentry *entry;
 
 	while (!STAILQ_EMPTY(&table->entries)) {
 		entry = STAILQ_FIRST(&table->entries);
 		STAILQ_REMOVE_HEAD(&table->entries, entry);
 		free(entry);
 	}
 	free(table);
 }
 
 enum ptable_type
 ptable_gettype(const struct ptable *table)
 {
 
 	return (table->type);
+}
+
+int
+ptable_getsize(const struct ptable *table, uint64_t *sizep)
+{
+	uint64_t tmp = table->sectors * table->sectorsize;
+
+	if (tmp < table->sectors)
+		return (EOVERFLOW);
+
+	if (sizep != NULL)
+		*sizep = tmp;
+	return (0);
 }
 
 int
 ptable_getpart(const struct ptable *table, struct ptable_entry *part, int index)
 {
 	struct pentry *entry;
 
 	if (part == NULL || table == NULL)
 		return (EINVAL);
 
 	STAILQ_FOREACH(entry, &table->entries, entry) {
 		if (entry->part.index != index)
 			continue;
 		memcpy(part, &entry->part, sizeof(*part));
 		return (0);
 	}
 	return (ENOENT);
 }
 
 /*
  * Search for a slice with the following preferences:
  *
  * 1: Active FreeBSD slice
  * 2: Non-active FreeBSD slice
  * 3: Active Linux slice
  * 4: non-active Linux slice
  * 5: Active FAT/FAT32 slice
  * 6: non-active FAT/FAT32 slice
  */
 #define PREF_RAWDISK	0
 #define PREF_FBSD_ACT	1
 #define PREF_FBSD	2
 #define PREF_LINUX_ACT	3
 #define PREF_LINUX	4
 #define PREF_DOS_ACT	5
 #define PREF_DOS	6
 #define PREF_NONE	7
 int
 ptable_getbestpart(const struct ptable *table, struct ptable_entry *part)
 {
 	struct pentry *entry, *best;
 	int pref, preflevel;
 
 	if (part == NULL || table == NULL)
 		return (EINVAL);
 
 	best = NULL;
 	preflevel = pref = PREF_NONE;
 	STAILQ_FOREACH(entry, &table->entries, entry) {
 #ifdef LOADER_MBR_SUPPORT
 		if (table->type == PTABLE_MBR) {
 			switch (entry->type.mbr) {
 			case DOSPTYP_386BSD:
 				pref = entry->flags & 0x80 ? PREF_FBSD_ACT:
 				    PREF_FBSD;
 				break;
 			case DOSPTYP_LINUX:
 				pref = entry->flags & 0x80 ? PREF_LINUX_ACT:
 				    PREF_LINUX;
 				break;
 			case 0x01:		/* DOS/Windows */
 			case 0x04:
 			case 0x06:
 			case 0x0c:
 			case 0x0e:
 			case DOSPTYP_FAT32:
 				pref = entry->flags & 0x80 ? PREF_DOS_ACT:
 				    PREF_DOS;
 				break;
 			default:
 				pref = PREF_NONE;
 			}
 		}
 #endif /* LOADER_MBR_SUPPORT */
 #ifdef LOADER_GPT_SUPPORT
 		if (table->type == PTABLE_GPT) {
 			if (entry->part.type == PART_DOS)
 				pref = PREF_DOS;
 			else if (entry->part.type == PART_FREEBSD_UFS ||
 			    entry->part.type == PART_FREEBSD_ZFS)
 				pref = PREF_FBSD;
 			else
 				pref = PREF_NONE;
 		}
 #endif /* LOADER_GPT_SUPPORT */
 		if (pref < preflevel) {
 			preflevel = pref;
 			best = entry;
 		}
 	}
 	if (best != NULL) {
 		memcpy(part, &best->part, sizeof(*part));
 		return (0);
 	}
 	return (ENOENT);
 }
 
 int
 ptable_iterate(const struct ptable *table, void *arg, ptable_iterate_t *iter)
 {
 	struct pentry *entry;
 	char name[32];
 	int ret = 0;
 
 	name[0] = '\0';
 	STAILQ_FOREACH(entry, &table->entries, entry) {
 #ifdef LOADER_MBR_SUPPORT
 		if (table->type == PTABLE_MBR)
 			sprintf(name, "s%d", entry->part.index);
 		else
 #endif
 #ifdef LOADER_GPT_SUPPORT
 		if (table->type == PTABLE_GPT)
 			sprintf(name, "p%d", entry->part.index);
 		else
 #endif
 #ifdef LOADER_VTOC8_SUPPORT
 		if (table->type == PTABLE_VTOC8)
 			sprintf(name, "%c", (u_char) 'a' +
 			    entry->part.index);
 		else
 #endif
 		if (table->type == PTABLE_BSD)
 			sprintf(name, "%c", (u_char) 'a' +
 			    entry->part.index);
 		if ((ret = iter(arg, name, &entry->part)) != 0)
 			return (ret);
 	}
 	return (ret);
 }
Index: stable/11/sys/boot/common/part.h
===================================================================
--- stable/11/sys/boot/common/part.h	(revision 329098)
+++ stable/11/sys/boot/common/part.h	(revision 329099)
@@ -1,82 +1,83 @@
 /*-
  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _PART_H_
 #define	_PART_H_
 
 struct ptable;
 
 enum ptable_type {
 	PTABLE_NONE,
 	PTABLE_BSD,
 	PTABLE_MBR,
 	PTABLE_GPT,
 	PTABLE_VTOC8
 };
 
 enum partition_type {
 	PART_UNKNOWN,
 	PART_EFI,
 	PART_FREEBSD,
 	PART_FREEBSD_BOOT,
 	PART_FREEBSD_NANDFS,
 	PART_FREEBSD_UFS,
 	PART_FREEBSD_ZFS,
 	PART_FREEBSD_SWAP,
 	PART_FREEBSD_VINUM,
 	PART_LINUX,
 	PART_LINUX_SWAP,
 	PART_DOS,
 };
 
 struct ptable_entry {
 	uint64_t		start;
 	uint64_t		end;
 	int			index;
 	enum partition_type	type;
 };
 
 /* The offset and size are in sectors */
-typedef int (diskread_t)(void *arg, void *buf, size_t blocks, off_t offset);
+typedef int (diskread_t)(void *arg, void *buf, size_t blocks, uint64_t offset);
 typedef int (ptable_iterate_t)(void *arg, const char *partname,
     const struct ptable_entry *part);
 
-struct ptable *ptable_open(void *dev, off_t sectors, uint16_t sectorsize,
+struct ptable *ptable_open(void *dev, uint64_t sectors, uint16_t sectorsize,
     diskread_t *dread);
 void ptable_close(struct ptable *table);
 enum ptable_type ptable_gettype(const struct ptable *table);
+int ptable_getsize(const struct ptable *table, uint64_t *sizep);
 
 int ptable_getpart(const struct ptable *table, struct ptable_entry *part,
     int index);
 int ptable_getbestpart(const struct ptable *table, struct ptable_entry *part);
 
 int ptable_iterate(const struct ptable *table, void *arg,
     ptable_iterate_t *iter);
 const char *parttype2str(enum partition_type type);
 
 #endif	/* !_PART_H_ */
Index: stable/11/sys/boot/efi/include/efidevp.h
===================================================================
--- stable/11/sys/boot/efi/include/efidevp.h	(revision 329098)
+++ stable/11/sys/boot/efi/include/efidevp.h	(revision 329099)
@@ -1,428 +1,454 @@
 /* $FreeBSD$ */
 #ifndef _DEVPATH_H
 #define _DEVPATH_H
 
 /*++
 
 Copyright (c)  1999 - 2002 Intel Corporation. All rights reserved
 This software and associated documentation (if any) is furnished
 under a license and may only be used or copied in accordance
 with the terms of the license. Except as permitted by such
 license, no part of this software or documentation may be
 reproduced, stored in a retrieval system, or transmitted in any
 form or by any means without the express written consent of
 Intel Corporation.
 
 Module Name:
 
     devpath.h
 
 Abstract:
 
     Defines for parsing the EFI Device Path structures
 
 
 
 Revision History
 
 --*/
 
 //
 // Device Path structures - Section C
 //
 
 typedef struct _EFI_DEVICE_PATH {
         UINT8                           Type;
         UINT8                           SubType;
         UINT8                           Length[2];
 } EFI_DEVICE_PATH;
 
 #define EFI_DP_TYPE_MASK                    0x7F
 #define EFI_DP_TYPE_UNPACKED                0x80
 
 #define END_DEVICE_PATH_TYPE                0x7f
 
 #define END_ENTIRE_DEVICE_PATH_SUBTYPE      0xff
 #define END_INSTANCE_DEVICE_PATH_SUBTYPE    0x01
 #define END_DEVICE_PATH_LENGTH              (sizeof(EFI_DEVICE_PATH))
 
 
 #define DP_IS_END_TYPE(a)
 #define DP_IS_END_SUBTYPE(a)        ( ((a)->SubType == END_ENTIRE_DEVICE_PATH_SUBTYPE )
 
 #define DevicePathType(a)           ( ((a)->Type) & EFI_DP_TYPE_MASK )
 #define DevicePathSubType(a)        ( (a)->SubType )
 #define DevicePathNodeLength(a)     ( ((a)->Length[0]) | ((a)->Length[1] << 8) )
 #define NextDevicePathNode(a)       ( (EFI_DEVICE_PATH *) ( ((UINT8 *) (a)) + DevicePathNodeLength(a)))
 #define IsDevicePathType(a, t)      ( DevicePathType(a) == t )
 #define IsDevicePathEndType(a)      IsDevicePathType(a, END_DEVICE_PATH_TYPE)
 #define IsDevicePathEndSubType(a)   ( (a)->SubType == END_ENTIRE_DEVICE_PATH_SUBTYPE )
 #define IsDevicePathEnd(a)          ( IsDevicePathEndType(a) && IsDevicePathEndSubType(a) )
 #define IsDevicePathUnpacked(a)     ( (a)->Type & EFI_DP_TYPE_UNPACKED )
 
 
 #define SetDevicePathNodeLength(a,l) {                  \
             (a)->Length[0] = (UINT8) (l);               \
             (a)->Length[1] = (UINT8) ((l) >> 8);        \
             }
 
 #define SetDevicePathEndNode(a)  {                      \
             (a)->Type = END_DEVICE_PATH_TYPE;           \
             (a)->SubType = END_ENTIRE_DEVICE_PATH_SUBTYPE;     \
             (a)->Length[0] = sizeof(EFI_DEVICE_PATH);   \
             (a)->Length[1] = 0;                         \
             }
 
-
-
 /*
  *
  */
 #define HARDWARE_DEVICE_PATH            0x01
 
 #define HW_PCI_DP                       0x01
 typedef struct _PCI_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         UINT8                           Function;
         UINT8                           Device;
 } PCI_DEVICE_PATH;
 
 #define HW_PCCARD_DP                    0x02
 typedef struct _PCCARD_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         UINT8                           FunctionNumber;
 } PCCARD_DEVICE_PATH;
 
 #define HW_MEMMAP_DP                    0x03
 typedef struct _MEMMAP_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         UINT32                          MemoryType;
         EFI_PHYSICAL_ADDRESS            StartingAddress;
         EFI_PHYSICAL_ADDRESS            EndingAddress;
 } MEMMAP_DEVICE_PATH;
 
 #define HW_VENDOR_DP                    0x04
 typedef struct _VENDOR_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         EFI_GUID                        Guid;
 } VENDOR_DEVICE_PATH;
 
 #define UNKNOWN_DEVICE_GUID \
     { 0xcf31fac5, 0xc24e, 0x11d2, {0x85, 0xf3, 0x0, 0xa0, 0xc9, 0x3e, 0xc9, 0x3b} }
 
 typedef struct _UKNOWN_DEVICE_VENDOR_DP {
     VENDOR_DEVICE_PATH      DevicePath;
     UINT8                   LegacyDriveLetter;
 } UNKNOWN_DEVICE_VENDOR_DEVICE_PATH;
 
 #define HW_CONTROLLER_DP            0x05
 typedef struct _CONTROLLER_DEVICE_PATH {
         EFI_DEVICE_PATH     Header;
         UINT32              Controller;
 } CONTROLLER_DEVICE_PATH;
 
 /*
  *
  */
 #define ACPI_DEVICE_PATH                 0x02
 
 #define ACPI_DP                         0x01
 typedef struct _ACPI_HID_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         UINT32                          HID;
         UINT32                          UID;
 } ACPI_HID_DEVICE_PATH;
 
 #define ACPI_EXTENDED_DP          0x02
 typedef struct _ACPI_EXTENDED_HID_DEVICE_PATH {
   EFI_DEVICE_PATH                 Header;
   UINT32                          HID;
   UINT32                          UID;
   UINT32                          CID;
 } ACPI_EXTENDED_HID_DEVICE_PATH;
 
 //
 // EISA ID Macro
 // EISA ID Definition 32-bits
 //  bits[15:0] - three character compressed ASCII EISA ID.
 //  bits[31:16] - binary number
 //   Compressed ASCII is 5 bits per character 0b00001 = 'A' 0b11010 = 'Z'
 //
 #define PNP_EISA_ID_CONST       0x41d0    
 #define EISA_ID(_Name, _Num)    ((UINT32) ((_Name) | (_Num) << 16))   
 #define EISA_PNP_ID(_PNPId)     (EISA_ID(PNP_EISA_ID_CONST, (_PNPId)))
 #define EFI_PNP_ID(_PNPId)      (EISA_ID(PNP_EISA_ID_CONST, (_PNPId)))
 
 #define PNP_EISA_ID_MASK        0xffff
 #define EISA_ID_TO_NUM(_Id)     ((_Id) >> 16)
 /*
  *
  */
 #define MESSAGING_DEVICE_PATH           0x03 
 
 #define MSG_ATAPI_DP                    0x01
 typedef struct _ATAPI_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         UINT8                           PrimarySecondary;
         UINT8                           SlaveMaster;
         UINT16                          Lun;
 } ATAPI_DEVICE_PATH;
 
 #define MSG_SCSI_DP                     0x02
 typedef struct _SCSI_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         UINT16                          Pun;
         UINT16                          Lun; 
 } SCSI_DEVICE_PATH;
 
 #define MSG_FIBRECHANNEL_DP             0x03
 typedef struct _FIBRECHANNEL_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         UINT32                          Reserved;
         UINT64                          WWN;
         UINT64                          Lun;
 } FIBRECHANNEL_DEVICE_PATH;
 
 #define MSG_1394_DP                     0x04
 typedef struct _F1394_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         UINT32                          Reserved;
         UINT64                          Guid;
 } F1394_DEVICE_PATH;
 
 #define MSG_USB_DP                      0x05
 typedef struct _USB_DEVICE_PATH {
     EFI_DEVICE_PATH                     Header;
     UINT8					                      ParentPortNumber;
     UINT8					                      InterfaceNumber;
 } USB_DEVICE_PATH;
 
 #define MSG_USB_CLASS_DP                0x0F
 typedef struct _USB_CLASS_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         UINT16                          VendorId;
         UINT16                          ProductId;
         UINT8                           DeviceClass;
         UINT8                           DeviceSubClass;
         UINT8                           DeviceProtocol;
 } USB_CLASS_DEVICE_PATH;
 
 #define MSG_I2O_DP                      0x06
 typedef struct _I2O_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         UINT32                          Tid;
 } I2O_DEVICE_PATH;
 
 #define MSG_MAC_ADDR_DP                 0x0b
 typedef struct _MAC_ADDR_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         EFI_MAC_ADDRESS                 MacAddress;
         UINT8                           IfType;
 } MAC_ADDR_DEVICE_PATH;
 
 #define MSG_IPv4_DP                     0x0c
 typedef struct _IPv4_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         EFI_IPv4_ADDRESS                LocalIpAddress;
         EFI_IPv4_ADDRESS                RemoteIpAddress;
         UINT16                          LocalPort;
         UINT16                          RemotePort;
         UINT16                          Protocol;
         BOOLEAN                         StaticIpAddress;
 } IPv4_DEVICE_PATH;
 
 #define MSG_IPv6_DP                     0x0d
 typedef struct _IPv6_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         EFI_IPv6_ADDRESS                LocalIpAddress;
         EFI_IPv6_ADDRESS                RemoteIpAddress;
         UINT16                          LocalPort;
         UINT16                          RemotePort;
         UINT16                          Protocol;
         BOOLEAN                         StaticIpAddress;
 } IPv6_DEVICE_PATH;
 
 #define MSG_INFINIBAND_DP               0x09
 typedef struct _INFINIBAND_DEVICE_PATH {
   EFI_DEVICE_PATH                       Header;
   UINT32                                ResourceFlags;
   UINT8                                 PortGid[16];
   UINT64                                ServiceId;
   UINT64                                TargetPortId;
   UINT64                                DeviceId;
 } INFINIBAND_DEVICE_PATH;
 
 #define INFINIBAND_RESOURCE_FLAG_IOC_SERVICE                0x01
 #define INFINIBAND_RESOURCE_FLAG_EXTENDED_BOOT_ENVIRONMENT  0x02
 #define INFINIBAND_RESOURCE_FLAG_CONSOLE_PROTOCOL           0x04
 #define INFINIBAND_RESOURCE_FLAG_STORAGE_PROTOCOL           0x08
 #define INFINIBAND_RESOURCE_FLAG_NETWORK_PROTOCOL           0x10
 
 #define MSG_UART_DP                     0x0e
 typedef struct _UART_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         UINT32                          Reserved;
         UINT64                          BaudRate;
         UINT8                           DataBits;
         UINT8                           Parity;
         UINT8                           StopBits;
 } UART_DEVICE_PATH;
 
 #define MSG_VENDOR_DP                   0x0A
 /* Use VENDOR_DEVICE_PATH struct */
 
 #define DEVICE_PATH_MESSAGING_PC_ANSI \
     { 0xe0c14753, 0xf9be, 0x11d2, {0x9a, 0x0c, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d} }
 
 #define DEVICE_PATH_MESSAGING_VT_100 \
     { 0xdfa66065, 0xb419, 0x11d3, {0x9a, 0x2d, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d} }
 
 #define DEVICE_PATH_MESSAGING_VT_100_PLUS \
     { 0x7baec70b, 0x57e0, 0x4c76, {0x8e, 0x87, 0x2f, 0x9e, 0x28, 0x08, 0x83, 0x43} }
     
 #define DEVICE_PATH_MESSAGING_VT_UTF8 \
     { 0xad15a0d6, 0x8bec, 0x4acf, {0xa0, 0x73, 0xd0, 0x1d, 0xe7, 0x7e, 0x2d, 0x88} }
 
 #define MSG_SATA_DP			0x12
 typedef struct _SATA_DEVICE_PATH {
 	EFI_DEVICE_PATH			Header;
 	UINT16				HBAPortNumber;
 	UINT16				PortMultiplierPortNumber;
 	UINT16				Lun;
 } SATA_DEVICE_PATH;
 
 #define MEDIA_DEVICE_PATH               0x04
 
 #define MEDIA_HARDDRIVE_DP              0x01
 typedef struct _HARDDRIVE_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         UINT32                          PartitionNumber;
         UINT64                          PartitionStart;
         UINT64                          PartitionSize;
         UINT8                           Signature[16];
         UINT8                           MBRType;
         UINT8                           SignatureType;
 } HARDDRIVE_DEVICE_PATH;
 
 #define MBR_TYPE_PCAT                       0x01
 #define MBR_TYPE_EFI_PARTITION_TABLE_HEADER 0x02
 
 #define SIGNATURE_TYPE_MBR                  0x01
 #define SIGNATURE_TYPE_GUID                 0x02
 
 #define MEDIA_CDROM_DP                  0x02
 typedef struct _CDROM_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         UINT32                          BootEntry;
         UINT64                          PartitionStart;
         UINT64                          PartitionSize;
 } CDROM_DEVICE_PATH;
 
 #define MEDIA_VENDOR_DP                 0x03
 /* Use VENDOR_DEVICE_PATH struct */
 
 #define MEDIA_FILEPATH_DP               0x04
 typedef struct _FILEPATH_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         CHAR16                          PathName[1];
 } FILEPATH_DEVICE_PATH;
 
 #define SIZE_OF_FILEPATH_DEVICE_PATH EFI_FIELD_OFFSET(FILEPATH_DEVICE_PATH,PathName)
 
 #define MEDIA_PROTOCOL_DP               0x05
 typedef struct _MEDIA_PROTOCOL_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         EFI_GUID                        Protocol;
 } MEDIA_PROTOCOL_DEVICE_PATH;
 
 
 #define BBS_DEVICE_PATH                 0x05
 #define BBS_BBS_DP                      0x01
 typedef struct _BBS_BBS_DEVICE_PATH {
         EFI_DEVICE_PATH                 Header;
         UINT16                          DeviceType;
         UINT16                          StatusFlag;
         CHAR8                           String[1];
 } BBS_BBS_DEVICE_PATH;
 
 /* DeviceType definitions - from BBS specification */
 #define BBS_TYPE_FLOPPY                 0x01
 #define BBS_TYPE_HARDDRIVE              0x02
 #define BBS_TYPE_CDROM                  0x03
 #define BBS_TYPE_PCMCIA                 0x04
 #define BBS_TYPE_USB                    0x05
 #define BBS_TYPE_EMBEDDED_NETWORK       0x06
 #define BBS_TYPE_DEV                    0x80
 #define BBS_TYPE_UNKNOWN                0xFF
 
 typedef union {
     EFI_DEVICE_PATH                      DevPath;
     PCI_DEVICE_PATH                      Pci;
     PCCARD_DEVICE_PATH                   PcCard;
     MEMMAP_DEVICE_PATH                   MemMap;
     VENDOR_DEVICE_PATH                   Vendor;
     UNKNOWN_DEVICE_VENDOR_DEVICE_PATH    UnknownVendor;   
     CONTROLLER_DEVICE_PATH               Controller;
     ACPI_HID_DEVICE_PATH                 Acpi;
 
     ATAPI_DEVICE_PATH                    Atapi;
     SCSI_DEVICE_PATH                     Scsi;
     FIBRECHANNEL_DEVICE_PATH             FibreChannel;
 
     F1394_DEVICE_PATH                    F1394;
     USB_DEVICE_PATH                      Usb;
     USB_CLASS_DEVICE_PATH                UsbClass;
     I2O_DEVICE_PATH                      I2O;
     MAC_ADDR_DEVICE_PATH                 MacAddr;
     IPv4_DEVICE_PATH                     Ipv4;
     IPv6_DEVICE_PATH                     Ipv6;
     INFINIBAND_DEVICE_PATH               InfiniBand;
     UART_DEVICE_PATH                     Uart;
 
     HARDDRIVE_DEVICE_PATH                HardDrive;
     CDROM_DEVICE_PATH                    CD;
 
     FILEPATH_DEVICE_PATH                 FilePath;
     MEDIA_PROTOCOL_DEVICE_PATH           MediaProtocol;
 
     BBS_BBS_DEVICE_PATH                  Bbs;
 
 } EFI_DEV_PATH;
 
 typedef union {
     EFI_DEVICE_PATH                      *DevPath;
     PCI_DEVICE_PATH                      *Pci;
     PCCARD_DEVICE_PATH                   *PcCard;
     MEMMAP_DEVICE_PATH                   *MemMap;
     VENDOR_DEVICE_PATH                   *Vendor;
     UNKNOWN_DEVICE_VENDOR_DEVICE_PATH    *UnknownVendor;   
     CONTROLLER_DEVICE_PATH               *Controller;
     ACPI_HID_DEVICE_PATH                 *Acpi;
     ACPI_EXTENDED_HID_DEVICE_PATH        *ExtendedAcpi;
 
     ATAPI_DEVICE_PATH                    *Atapi;
     SCSI_DEVICE_PATH                     *Scsi;
     FIBRECHANNEL_DEVICE_PATH             *FibreChannel;
 
     F1394_DEVICE_PATH                    *F1394;
     USB_DEVICE_PATH                      *Usb;
     USB_CLASS_DEVICE_PATH                *UsbClass;
     I2O_DEVICE_PATH                      *I2O;
     MAC_ADDR_DEVICE_PATH                 *MacAddr;
     IPv4_DEVICE_PATH                     *Ipv4;
     IPv6_DEVICE_PATH                     *Ipv6;
     INFINIBAND_DEVICE_PATH               *InfiniBand;
     UART_DEVICE_PATH                     *Uart;
 
     HARDDRIVE_DEVICE_PATH                *HardDrive;
 
     FILEPATH_DEVICE_PATH                 *FilePath;
     MEDIA_PROTOCOL_DEVICE_PATH           *MediaProtocol;
 
     CDROM_DEVICE_PATH                    *CD;
     BBS_BBS_DEVICE_PATH                  *Bbs;
 
 } EFI_DEV_PATH_PTR;
 
+#define	EFI_LOADED_IMAGE_DEVICE_PATH_PROTOCOL_GUID			\
+    { 0xbc62157e, 0x3e33, 0x4fec, { 0x99, 0x20, 0x2d, 0x3b, 0x36, 0xd7, 0x50, 0xdf } }
+
+#define	EFI_DEVICE_PATH_TO_TEXT_PROTOCOL_GUID				\
+    { 0x8b843e20, 0x8132, 0x4852, { 0x90, 0xcc, 0x55, 0x1a, 0x4e, 0x4a, 0x7f, 0x1c } }
+
+INTERFACE_DECL(_EFI_DEVICE_PATH_PROTOCOL);
+
+typedef
+CHAR16*
+(EFIAPI *EFI_DEVICE_PATH_TO_TEXT_NODE) (
+    IN struct _EFI_DEVICE_PATH *This,
+    IN BOOLEAN                 DisplayOnly,
+    IN BOOLEAN                 AllowShortCuts
+    );
+
+typedef
+CHAR16*
+(EFIAPI *EFI_DEVICE_PATH_TO_TEXT_PATH) (
+    IN struct _EFI_DEVICE_PATH *This,
+    IN BOOLEAN                 DisplayOnly,
+    IN BOOLEAN                 AllowShortCuts
+    );
+
+typedef struct _EFI_DEVICE_PATH_TO_TEXT_PROTOCOL {
+	EFI_DEVICE_PATH_TO_TEXT_NODE ConvertDeviceNodeToText;
+	EFI_DEVICE_PATH_TO_TEXT_PATH ConvertDevicePathToText;
+} EFI_DEVICE_PATH_TO_TEXT_PROTOCOL;
 
 #endif
Index: stable/11/sys/boot/efi/include/efilib.h
===================================================================
--- stable/11/sys/boot/efi/include/efilib.h	(revision 329098)
+++ stable/11/sys/boot/efi/include/efilib.h	(revision 329099)
@@ -1,68 +1,98 @@
 /*-
  * Copyright (c) 2000 Doug Rabson
  * Copyright (c) 2006 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _LOADER_EFILIB_H
 #define	_LOADER_EFILIB_H
 
 #include <stand.h>
+#include <sys/queue.h>
 
 extern EFI_HANDLE		IH;
 extern EFI_SYSTEM_TABLE		*ST;
 extern EFI_BOOT_SERVICES	*BS;
 extern EFI_RUNTIME_SERVICES	*RS;
 
-extern struct devsw efipart_dev;
+extern struct devsw efipart_fddev;
+extern struct devsw efipart_cddev;
+extern struct devsw efipart_hddev;
 extern struct devsw efinet_dev;
 extern struct netif_driver efinetif;
 
+/* EFI block device data, included here to help efi_zfs_probe() */
+typedef STAILQ_HEAD(pdinfo_list, pdinfo) pdinfo_list_t;
+
+typedef struct pdinfo
+{
+	STAILQ_ENTRY(pdinfo)	pd_link;	/* link in device list */
+	pdinfo_list_t		pd_part;	/* link of partitions */
+	EFI_HANDLE		pd_handle;
+	EFI_HANDLE		pd_alias;
+	EFI_DEVICE_PATH		*pd_devpath;
+	EFI_BLOCK_IO		*pd_blkio;
+	int			pd_unit;	/* unit number */
+	int			pd_open;	/* reference counter */
+	void			*pd_bcache;	/* buffer cache data */
+} pdinfo_t;
+
+pdinfo_list_t *efiblk_get_pdinfo_list(struct devsw *dev);
+
 void *efi_get_table(EFI_GUID *tbl);
 
 int efi_register_handles(struct devsw *, EFI_HANDLE *, EFI_HANDLE *, int);
 EFI_HANDLE efi_find_handle(struct devsw *, int);
 int efi_handle_lookup(EFI_HANDLE, struct devsw **, int *,  uint64_t *);
 int efi_handle_update_dev(EFI_HANDLE, struct devsw *, int, uint64_t);
 
 EFI_DEVICE_PATH *efi_lookup_image_devpath(EFI_HANDLE);
 EFI_DEVICE_PATH *efi_lookup_devpath(EFI_HANDLE);
 EFI_HANDLE efi_devpath_handle(EFI_DEVICE_PATH *);
 EFI_DEVICE_PATH *efi_devpath_last_node(EFI_DEVICE_PATH *);
 EFI_DEVICE_PATH *efi_devpath_trim(EFI_DEVICE_PATH *);
+int efi_devpath_match(EFI_DEVICE_PATH *, EFI_DEVICE_PATH *);
 CHAR16 *efi_devpath_name(EFI_DEVICE_PATH *);
 void efi_free_devpath_name(CHAR16 *);
 
 int efi_status_to_errno(EFI_STATUS);
 
 void efi_time_init(void);
 void efi_time_fini(void);
 
 EFI_STATUS main(int argc, CHAR16 *argv[]);
 void exit(EFI_STATUS status);
 void delay(int usecs);
+
+/* EFI environment initialization. */
+void efi_init_environment(void);
+
+/* CHAR16 utility functions. */
+int wcscmp(CHAR16 *, CHAR16 *);
+void cpy8to16(const char *, CHAR16 *, size_t);
+void cpy16to8(const CHAR16 *, char *, size_t);
 
 #endif	/* _LOADER_EFILIB_H */
Index: stable/11/sys/boot/efi/libefi/Makefile
===================================================================
--- stable/11/sys/boot/efi/libefi/Makefile	(revision 329098)
+++ stable/11/sys/boot/efi/libefi/Makefile	(revision 329099)
@@ -1,53 +1,53 @@
 # $FreeBSD$
 
 .include <src.opts.mk>
 
 .if ${MK_FORTH} != "no"
+CFLAGS+= -DBOOT_FORTH
 .include "${.CURDIR}/../../Makefile.ficl"
 .endif
 
 LIB=	efi
 INTERNALLIB=
 WARNS?=	2
 
-SRCS=	delay.c devpath.c efi_console.c efinet.c efipart.c errno.c \
-	handles.c libefi.c
+SRCS=	delay.c devpath.c efi_console.c efinet.c efipart.c env.c errno.c \
+	handles.c wchar.c libefi.c
 
 .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
 SRCS+=	time.c
 .elif ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "arm"
 SRCS+=	time_event.c
 .endif
-.if ${MK_FORTH} != "no"
-SRCS+=	env.c
-.endif
 
 # We implement a slightly non-standard %S in that it always takes a
 # CHAR16 that's common in UEFI-land instead of a wchar_t. This only
 # seems to matter on arm64 where wchar_t defaults to an int instead
 # of a short. There's no good cast to use here so just ignore the
 # warnings for now.
 CWARNFLAGS.efinet.c+=	-Wno-format
+CWARNFLAGS.efipart.c+=	-Wno-format
+CWARNFLAGS.env.c+=	-Wno-format
 
 .if ${MACHINE_CPUARCH} == "aarch64"
 CFLAGS+=	-msoft-float -mgeneral-regs-only
 .endif
 .if ${MACHINE_ARCH} == "amd64"
 CFLAGS+= -fPIC -mno-red-zone
 .endif
 CFLAGS+= -I${.CURDIR}/../include
 CFLAGS+= -I${.CURDIR}/../include/${MACHINE}
 CFLAGS+= -I${.CURDIR}/../../../../lib/libstand
 
 # Pick up the bootstrap header for some interface items
 CFLAGS+= -I${.CURDIR}/../../common
 
 # Handle FreeBSD specific %b and %D printf format specifiers
 CFLAGS+= ${FORMAT_EXTENSIONS}
 
 # Do not use TERM_EMU on arm and arm64 as it doesn't behave well with serial console
 .if ${MACHINE_CPUARCH} != "arm" && ${MACHINE_CPUARCH} != "aarch64"
 CFLAGS+= -DTERM_EMU
 .endif
 
 .include <bsd.lib.mk>
Index: stable/11/sys/boot/efi/libefi/devpath.c
===================================================================
--- stable/11/sys/boot/efi/libefi/devpath.c	(revision 329098)
+++ stable/11/sys/boot/efi/libefi/devpath.c	(revision 329099)
@@ -1,167 +1,168 @@
 /*-
  * Copyright (c) 2016 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <efi.h>
 #include <efilib.h>
 
-/* XXX: This belongs in an efifoo.h header. */
-#define	EFI_LOADED_IMAGE_DEVICE_PATH_PROTOCOL_GUID			\
-    { 0xbc62157e, 0x3e33, 0x4fec, { 0x99, 0x20, 0x2d, 0x3b, 0x36, 0xd7, 0x50, 0xdf } }
-
-#define	EFI_DEVICE_PATH_TO_TEXT_PROTOCOL_GUID				\
-    { 0x8b843e20, 0x8132, 0x4852, { 0x90, 0xcc, 0x55, 0x1a, 0x4e, 0x4a, 0x7f, 0x1c } }
-
-INTERFACE_DECL(_EFI_DEVICE_PATH_PROTOCOL);
-
-typedef
-CHAR16*
-(EFIAPI *EFI_DEVICE_PATH_TO_TEXT_NODE) (
-    IN struct _EFI_DEVICE_PATH *This,
-    IN BOOLEAN                 DisplayOnly,
-    IN BOOLEAN                 AllowShortCuts
-    );
-
-typedef
-CHAR16*
-(EFIAPI *EFI_DEVICE_PATH_TO_TEXT_PATH) (
-    IN struct _EFI_DEVICE_PATH *This,
-    IN BOOLEAN                 DisplayOnly,
-    IN BOOLEAN                 AllowShortCuts
-    );
-
-typedef struct _EFI_DEVICE_PATH_TO_TEXT_PROTOCOL {
-	EFI_DEVICE_PATH_TO_TEXT_NODE ConvertDeviceNodeToText;
-	EFI_DEVICE_PATH_TO_TEXT_PATH ConvertDevicePathToText;
-} EFI_DEVICE_PATH_TO_TEXT_PROTOCOL;
-
 static EFI_GUID ImageDevicePathGUID =
     EFI_LOADED_IMAGE_DEVICE_PATH_PROTOCOL_GUID;
 static EFI_GUID DevicePathGUID = DEVICE_PATH_PROTOCOL;
 static EFI_GUID DevicePathToTextGUID = EFI_DEVICE_PATH_TO_TEXT_PROTOCOL_GUID;
 static EFI_DEVICE_PATH_TO_TEXT_PROTOCOL *textProtocol;
 
 EFI_DEVICE_PATH *
 efi_lookup_image_devpath(EFI_HANDLE handle)
 {
 	EFI_DEVICE_PATH *devpath;
 	EFI_STATUS status;
 
 	status = BS->HandleProtocol(handle, &ImageDevicePathGUID,
 	    (VOID **)&devpath);
 	if (EFI_ERROR(status))
 		devpath = NULL;
 	return (devpath);
 }
 
 EFI_DEVICE_PATH *
 efi_lookup_devpath(EFI_HANDLE handle)
 {
 	EFI_DEVICE_PATH *devpath;
 	EFI_STATUS status;
 
 	status = BS->HandleProtocol(handle, &DevicePathGUID, (VOID **)&devpath);
 	if (EFI_ERROR(status))
 		devpath = NULL;
 	return (devpath);
 }
 
 CHAR16 *
 efi_devpath_name(EFI_DEVICE_PATH *devpath)
 {
 	static int once = 1;
 	EFI_STATUS status;
 
 	if (devpath == NULL)
 		return (NULL);
 	if (once) {
 		status = BS->LocateProtocol(&DevicePathToTextGUID, NULL,
 		    (VOID **)&textProtocol);
 		if (EFI_ERROR(status))
 			textProtocol = NULL;
 		once = 0;
 	}
 	if (textProtocol == NULL)
 		return (NULL);
 
 	return (textProtocol->ConvertDevicePathToText(devpath, TRUE, TRUE));
 }
 
 void
 efi_free_devpath_name(CHAR16 *text)
 {
 
 	BS->FreePool(text);
 }
 
 EFI_DEVICE_PATH *
 efi_devpath_last_node(EFI_DEVICE_PATH *devpath)
 {
 
 	if (IsDevicePathEnd(devpath))
 		return (NULL);
 	while (!IsDevicePathEnd(NextDevicePathNode(devpath)))
 		devpath = NextDevicePathNode(devpath);
 	return (devpath);
 }
 
 EFI_DEVICE_PATH *
 efi_devpath_trim(EFI_DEVICE_PATH *devpath)
 {
 	EFI_DEVICE_PATH *node, *copy;
 	size_t prefix, len;
 
-	node = efi_devpath_last_node(devpath);
+	if ((node = efi_devpath_last_node(devpath)) == NULL)
+		return (NULL);
 	prefix = (UINT8 *)node - (UINT8 *)devpath;
 	if (prefix == 0)
 		return (NULL);
 	len = prefix + DevicePathNodeLength(NextDevicePathNode(node));
 	copy = malloc(len);
-	memcpy(copy, devpath, prefix);
-	node = (EFI_DEVICE_PATH *)((UINT8 *)copy + prefix);
-	SetDevicePathEndNode(node);
+	if (copy != NULL) {
+		memcpy(copy, devpath, prefix);
+		node = (EFI_DEVICE_PATH *)((UINT8 *)copy + prefix);
+		SetDevicePathEndNode(node);
+	}
 	return (copy);
 }
 
 EFI_HANDLE
 efi_devpath_handle(EFI_DEVICE_PATH *devpath)
 {
 	EFI_STATUS status;
 	EFI_HANDLE h;
 
 	/*
 	 * There isn't a standard way to locate a handle for a given
 	 * device path.  However, querying the EFI_DEVICE_PATH protocol
 	 * for a given device path should give us a handle for the
 	 * closest node in the path to the end that is valid.
 	 */
 	status = BS->LocateDevicePath(&DevicePathGUID, &devpath, &h);
 	if (EFI_ERROR(status))
 		return (NULL);
 	return (h);
+}
+
+int
+efi_devpath_match(EFI_DEVICE_PATH *devpath1, EFI_DEVICE_PATH *devpath2)
+{
+	int len;
+
+	if (devpath1 == NULL || devpath2 == NULL)
+		return (0);
+
+	while (1) {
+		if (DevicePathType(devpath1) != DevicePathType(devpath2) ||
+		    DevicePathSubType(devpath1) != DevicePathSubType(devpath2))
+			return (0);
+
+		len = DevicePathNodeLength(devpath1);
+		if (len != DevicePathNodeLength(devpath2))
+			return (0);
+
+		if (memcmp(devpath1, devpath2, (size_t)len) != 0)
+			return (0);
+
+		if (IsDevicePathEnd(devpath1))
+			break;
+		devpath1 = NextDevicePathNode(devpath1);
+		devpath2 = NextDevicePathNode(devpath2);
+	}
+	return (1);
 }
Index: stable/11/sys/boot/efi/libefi/efinet.c
===================================================================
--- stable/11/sys/boot/efi/libefi/efinet.c	(revision 329098)
+++ stable/11/sys/boot/efi/libefi/efinet.c	(revision 329099)
@@ -1,374 +1,384 @@
 /*-
  * Copyright (c) 2001 Doug Rabson
  * Copyright (c) 2002, 2006 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 
 #include <stand.h>
 #include <net.h>
 #include <netif.h>
 
 #include <dev_net.c>
 
 #include <efi.h>
 #include <efilib.h>
 
 static EFI_GUID sn_guid = EFI_SIMPLE_NETWORK_PROTOCOL;
 
 static void efinet_end(struct netif *);
 static int efinet_get(struct iodesc *, void *, size_t, time_t);
 static void efinet_init(struct iodesc *, void *);
 static int efinet_match(struct netif *, void *);
 static int efinet_probe(struct netif *, void *);
 static int efinet_put(struct iodesc *, void *, size_t);
 
 struct netif_driver efinetif = {   
 	.netif_bname = "efinet",
 	.netif_match = efinet_match,
 	.netif_probe = efinet_probe,
 	.netif_init = efinet_init,
 	.netif_get = efinet_get,
 	.netif_put = efinet_put,
 	.netif_end = efinet_end,
 	.netif_ifs = NULL,
 	.netif_nifs = 0
 };
 
 #ifdef EFINET_DEBUG
 static void
 dump_mode(EFI_SIMPLE_NETWORK_MODE *mode)
 {
 	int i;
 
 	printf("State                 = %x\n", mode->State);
 	printf("HwAddressSize         = %u\n", mode->HwAddressSize);
 	printf("MediaHeaderSize       = %u\n", mode->MediaHeaderSize);
 	printf("MaxPacketSize         = %u\n", mode->MaxPacketSize);
 	printf("NvRamSize             = %u\n", mode->NvRamSize);
 	printf("NvRamAccessSize       = %u\n", mode->NvRamAccessSize);
 	printf("ReceiveFilterMask     = %x\n", mode->ReceiveFilterMask);
 	printf("ReceiveFilterSetting  = %u\n", mode->ReceiveFilterSetting);
 	printf("MaxMCastFilterCount   = %u\n", mode->MaxMCastFilterCount);
 	printf("MCastFilterCount      = %u\n", mode->MCastFilterCount);
 	printf("MCastFilter           = {");
 	for (i = 0; i < mode->MCastFilterCount; i++)
 		printf(" %s", ether_sprintf(mode->MCastFilter[i].Addr));
 	printf(" }\n");
 	printf("CurrentAddress        = %s\n",
 	    ether_sprintf(mode->CurrentAddress.Addr));
 	printf("BroadcastAddress      = %s\n",
 	    ether_sprintf(mode->BroadcastAddress.Addr));
 	printf("PermanentAddress      = %s\n",
 	    ether_sprintf(mode->PermanentAddress.Addr));
 	printf("IfType                = %u\n", mode->IfType);
 	printf("MacAddressChangeable  = %d\n", mode->MacAddressChangeable);
 	printf("MultipleTxSupported   = %d\n", mode->MultipleTxSupported);
 	printf("MediaPresentSupported = %d\n", mode->MediaPresentSupported);
 	printf("MediaPresent          = %d\n", mode->MediaPresent);
 }
 #endif
 
 static int
 efinet_match(struct netif *nif, void *machdep_hint)
 {
 	struct devdesc *dev = machdep_hint;
 
 	if (dev->d_unit == nif->nif_unit)
 		return (1);
 	return(0);
 }
 
 static int
 efinet_probe(struct netif *nif, void *machdep_hint)
 {
 
 	return (0);
 }
 
 static int
 efinet_put(struct iodesc *desc, void *pkt, size_t len)
 {
 	struct netif *nif = desc->io_netif;
 	EFI_SIMPLE_NETWORK *net;
 	EFI_STATUS status;
 	void *buf;
 
 	net = nif->nif_devdata;
 	if (net == NULL)
 		return (-1);
 
 	status = net->Transmit(net, 0, len, pkt, 0, 0, 0);
 	if (status != EFI_SUCCESS)
 		return (-1);
 
 	/* Wait for the buffer to be transmitted */
 	do {
 		buf = NULL;	/* XXX Is this needed? */
 		status = net->GetStatus(net, 0, &buf);
 		/*
 		 * XXX EFI1.1 and the E1000 card returns a different 
 		 * address than we gave.  Sigh.
 		 */
 	} while (status == EFI_SUCCESS && buf == NULL);
 
 	/* XXX How do we deal with status != EFI_SUCCESS now? */
 	return ((status == EFI_SUCCESS) ? len : -1);
 }
 
 static int
 efinet_get(struct iodesc *desc, void *pkt, size_t len, time_t timeout)
 {
 	struct netif *nif = desc->io_netif;
 	EFI_SIMPLE_NETWORK *net;
 	EFI_STATUS status;
 	UINTN bufsz;
 	time_t t;
 	char buf[2048];
 
 	net = nif->nif_devdata;
 	if (net == NULL)
 		return (0);
 
 	t = time(0);
 	while ((time(0) - t) < timeout) {
 		bufsz = sizeof(buf);
 		status = net->Receive(net, 0, &bufsz, buf, 0, 0, 0);
 		if (status == EFI_SUCCESS) {
 			/*
 			 * XXX EFI1.1 and the E1000 card trash our
 			 * workspace if we do not do this silly copy.
 			 * Either they are not respecting the len
 			 * value or do not like the alignment.
 			 */
 			if (bufsz > len)
 				bufsz = len;
 			bcopy(buf, pkt, bufsz);
 			return (bufsz);
 		}
 		if (status != EFI_NOT_READY)
 			return (0);
 	}
 
 	return (0);
 }
 
 static void
 efinet_init(struct iodesc *desc, void *machdep_hint)
 {
 	struct netif *nif = desc->io_netif;
 	EFI_SIMPLE_NETWORK *net;
 	EFI_HANDLE h;
 	EFI_STATUS status;
 
 	if (nif->nif_driver->netif_ifs[nif->nif_unit].dif_unit < 0) {
 		printf("Invalid network interface %d\n", nif->nif_unit);
 		return;
 	}
 
 	h = nif->nif_driver->netif_ifs[nif->nif_unit].dif_private;
 	status = BS->HandleProtocol(h, &sn_guid, (VOID **)&nif->nif_devdata);
 	if (status != EFI_SUCCESS) {
 		printf("net%d: cannot fetch interface data (status=%lu)\n",
 		    nif->nif_unit, EFI_ERROR_CODE(status));
 		return;
 	}
 
 	net = nif->nif_devdata;
 	if (net->Mode->State == EfiSimpleNetworkStopped) {
 		status = net->Start(net);
 		if (status != EFI_SUCCESS) {
 			printf("net%d: cannot start interface (status=%ld)\n",
 			    nif->nif_unit, (long)status);
 			return;
 		}
 	}
 
 	if (net->Mode->State != EfiSimpleNetworkInitialized) {
 		status = net->Initialize(net, 0, 0);
 		if (status != EFI_SUCCESS) {
 			printf("net%d: cannot init. interface (status=%ld)\n",
 			    nif->nif_unit, (long)status);
 			return;
 		}
 	}
 
 	if (net->Mode->ReceiveFilterSetting == 0) {
 		UINT32 mask = EFI_SIMPLE_NETWORK_RECEIVE_UNICAST |
 		    EFI_SIMPLE_NETWORK_RECEIVE_BROADCAST;
 
 		status = net->ReceiveFilters(net, mask, 0, FALSE, 0, 0);
 		if (status != EFI_SUCCESS) {
 			printf("net%d: cannot set rx. filters (status=%ld)\n",
 			    nif->nif_unit, (long)status);
 			return;
 		}
 	}
 
 #ifdef EFINET_DEBUG
 	dump_mode(net->Mode);
 #endif
 
 	bcopy(net->Mode->CurrentAddress.Addr, desc->myea, 6);
 	desc->xid = 1;
 }
 
 static void
 efinet_end(struct netif *nif)
 {
 	EFI_SIMPLE_NETWORK *net = nif->nif_devdata; 
 
 	if (net == NULL)
 		return;
 
 	net->Shutdown(net);
 }
 
 static int efinet_dev_init(void);
 static int efinet_dev_print(int);
 
 struct devsw efinet_dev = {
 	.dv_name = "net",
 	.dv_type = DEVT_NET,
 	.dv_init = efinet_dev_init,
 	.dv_strategy = net_strategy,
 	.dv_open = net_open,
 	.dv_close = net_close,
 	.dv_ioctl = noioctl,
 	.dv_print = efinet_dev_print,
 	.dv_cleanup = NULL
 };
 
 static int
 efinet_dev_init()
 {
 	struct netif_dif *dif;
 	struct netif_stats *stats;
 	EFI_DEVICE_PATH *devpath, *node;
 	EFI_SIMPLE_NETWORK *net;
 	EFI_HANDLE *handles, *handles2;
 	EFI_STATUS status;
 	UINTN sz;
 	int err, i, nifs;
 
 	sz = 0;
 	handles = NULL;
 	status = BS->LocateHandle(ByProtocol, &sn_guid, 0, &sz, 0);
 	if (status == EFI_BUFFER_TOO_SMALL) {
 		handles = (EFI_HANDLE *)malloc(sz);
 		status = BS->LocateHandle(ByProtocol, &sn_guid, 0, &sz,
 		    handles);
 		if (EFI_ERROR(status))
 			free(handles);
 	}
 	if (EFI_ERROR(status))
 		return (efi_status_to_errno(status));
 	handles2 = (EFI_HANDLE *)malloc(sz);
+	if (handles2 == NULL) {
+		free(handles);
+		return (ENOMEM);
+	}
 	nifs = 0;
 	for (i = 0; i < sz / sizeof(EFI_HANDLE); i++) {
 		devpath = efi_lookup_devpath(handles[i]);
 		if (devpath == NULL)
 			continue;
-		node = efi_devpath_last_node(devpath);
+		if ((node = efi_devpath_last_node(devpath)) == NULL)
+			continue;
+
 		if (DevicePathType(node) != MESSAGING_DEVICE_PATH ||
 		    DevicePathSubType(node) != MSG_MAC_ADDR_DP)
 			continue;
 
 		/*
 		 * Open the network device in exclusive mode. Without this
 		 * we will be racing with the UEFI network stack. It will
 		 * pull packets off the network leading to lost packets.
 		 */
 		status = BS->OpenProtocol(handles[i], &sn_guid, (void **)&net,
 		    IH, 0, EFI_OPEN_PROTOCOL_EXCLUSIVE);
 		if (status != EFI_SUCCESS) {
 			printf("Unable to open network interface %d for "
 			    "exclusive access: %d\n", i, EFI_ERROR(status));
 		}
 
 		handles2[nifs] = handles[i];
 		nifs++;
 	}
 	free(handles);
 	if (nifs == 0) {
-		free(handles2);
-		return (ENOENT);
+		err = ENOENT;
+		goto done;
 	}
 
 	err = efi_register_handles(&efinet_dev, handles2, NULL, nifs);
-	if (err != 0) {
-		free(handles2);
-		return (err);
-	}
+	if (err != 0)
+		goto done;
 
-	efinetif.netif_nifs = nifs;
 	efinetif.netif_ifs = calloc(nifs, sizeof(struct netif_dif));
-
 	stats = calloc(nifs, sizeof(struct netif_stats));
+	if (efinetif.netif_ifs == NULL || stats == NULL) {
+		free(efinetif.netif_ifs);
+		free(stats);
+		efinetif.netif_ifs = NULL;
+		err = ENOMEM;
+		goto done;
+	}
+	efinetif.netif_nifs = nifs;
 
 	for (i = 0; i < nifs; i++) {
 
 		dif = &efinetif.netif_ifs[i];
 		dif->dif_unit = i;
 		dif->dif_nsel = 1;
 		dif->dif_stats = &stats[i];
 		dif->dif_private = handles2[i];
 	}
+done:
 	free(handles2);
-
-	return (0);
+	return (err);
 }
 
 static int
 efinet_dev_print(int verbose)
 {
 	CHAR16 *text;
 	EFI_HANDLE h;
 	int unit, ret = 0;
 
 	printf("%s devices:", efinet_dev.dv_name);
 	if ((ret = pager_output("\n")) != 0)
 		return (ret);
 
 	for (unit = 0, h = efi_find_handle(&efinet_dev, 0);
 	    h != NULL; h = efi_find_handle(&efinet_dev, ++unit)) {
 		printf("    %s%d:", efinet_dev.dv_name, unit);
 		if (verbose) {
 			text = efi_devpath_name(efi_lookup_devpath(h));
 			if (text != NULL) {
 				printf("    %S", text);
 				efi_free_devpath_name(text);
 			}
 		}
 		if ((ret = pager_output("\n")) != 0)
 			break;
 	}
 	return (ret);
 }
Index: stable/11/sys/boot/efi/libefi/efipart.c
===================================================================
--- stable/11/sys/boot/efi/libefi/efipart.c	(revision 329098)
+++ stable/11/sys/boot/efi/libefi/efipart.c	(revision 329099)
@@ -1,369 +1,968 @@
 /*-
  * Copyright (c) 2010 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include <sys/disk.h>
 #include <sys/param.h>
 #include <sys/time.h>
+#include <sys/queue.h>
 #include <stddef.h>
 #include <stdarg.h>
 
 #include <bootstrap.h>
 
 #include <efi.h>
 #include <efilib.h>
 #include <efiprot.h>
+#include <disk.h>
 
 static EFI_GUID blkio_guid = BLOCK_IO_PROTOCOL;
 
-static int efipart_init(void);
+static int efipart_initfd(void);
+static int efipart_initcd(void);
+static int efipart_inithd(void);
+
 static int efipart_strategy(void *, int, daddr_t, size_t, char *, size_t *);
 static int efipart_realstrategy(void *, int, daddr_t, size_t, char *, size_t *);
+
 static int efipart_open(struct open_file *, ...);
 static int efipart_close(struct open_file *);
-static int efipart_print(int);
+static int efipart_ioctl(struct open_file *, u_long, void *);
 
-struct devsw efipart_dev = {
-	.dv_name = "part",
-	.dv_type = DEVT_DISK,
-	.dv_init = efipart_init,
+static int efipart_printfd(int);
+static int efipart_printcd(int);
+static int efipart_printhd(int);
+
+struct devsw efipart_fddev = {
+	.dv_name = "fd",
+	.dv_type = DEVT_FD,
+	.dv_init = efipart_initfd,
 	.dv_strategy = efipart_strategy,
 	.dv_open = efipart_open,
 	.dv_close = efipart_close,
-	.dv_ioctl = noioctl,
-	.dv_print = efipart_print,
+	.dv_ioctl = efipart_ioctl,
+	.dv_print = efipart_printfd,
 	.dv_cleanup = NULL
 };
 
-/*
- * info structure to support bcache
- */
-struct pdinfo {
-	int	pd_unit;	/* unit number */
-	int	pd_open;	/* reference counter */
-	void	*pd_bcache;	/* buffer cache data */
+struct devsw efipart_cddev = {
+	.dv_name = "cd",
+	.dv_type = DEVT_CD,
+	.dv_init = efipart_initcd,
+	.dv_strategy = efipart_strategy,
+	.dv_open = efipart_open,
+	.dv_close = efipart_close,
+	.dv_ioctl = efipart_ioctl,
+	.dv_print = efipart_printcd,
+	.dv_cleanup = NULL
 };
-static struct pdinfo *pdinfo;
-static int npdinfo = 0;
 
-#define PD(dev)         (pdinfo[(dev)->d_unit])
+struct devsw efipart_hddev = {
+	.dv_name = "disk",
+	.dv_type = DEVT_DISK,
+	.dv_init = efipart_inithd,
+	.dv_strategy = efipart_strategy,
+	.dv_open = efipart_open,
+	.dv_close = efipart_close,
+	.dv_ioctl = efipart_ioctl,
+	.dv_print = efipart_printhd,
+	.dv_cleanup = NULL
+};
 
+static pdinfo_list_t fdinfo;
+static pdinfo_list_t cdinfo;
+static pdinfo_list_t hdinfo;
+
+static EFI_HANDLE *efipart_handles = NULL;
+static UINTN efipart_nhandles = 0;
+
+static pdinfo_t *
+efiblk_get_pdinfo(pdinfo_list_t *pdi, int unit)
+{
+	pdinfo_t *pd;
+
+	STAILQ_FOREACH(pd, pdi, pd_link) {
+		if (pd->pd_unit == unit)
+			return (pd);
+	}
+	return (NULL);
+}
+
 static int
-efipart_init(void) 
+efiblk_pdinfo_count(pdinfo_list_t *pdi)
 {
-	EFI_BLOCK_IO *blkio;
-	EFI_DEVICE_PATH *devpath, *devpathcpy, *tmpdevpath, *node;
-	EFI_HANDLE *hin, *hout, *aliases, handle;
-	EFI_STATUS status;
+	pdinfo_t *pd;
+	int i = 0;
+
+	STAILQ_FOREACH(pd, pdi, pd_link) {
+		i++;
+	}
+	return (i);
+}
+
+static int
+efipart_inithandles(void)
+{
 	UINTN sz;
-	u_int n, nin, nout, nrdisk;
-	int err;
+	EFI_HANDLE *hin;
+	EFI_STATUS status;
 
+	if (efipart_nhandles != 0) {
+		free(efipart_handles);
+		efipart_handles = NULL;
+		efipart_nhandles = 0;
+	}
+
 	sz = 0;
 	hin = NULL;
-	status = BS->LocateHandle(ByProtocol, &blkio_guid, 0, &sz, 0);
+	status = BS->LocateHandle(ByProtocol, &blkio_guid, 0, &sz, hin);
 	if (status == EFI_BUFFER_TOO_SMALL) {
-		hin = (EFI_HANDLE *)malloc(sz * 3);
+		hin = malloc(sz);
 		status = BS->LocateHandle(ByProtocol, &blkio_guid, 0, &sz,
 		    hin);
 		if (EFI_ERROR(status))
 			free(hin);
 	}
 	if (EFI_ERROR(status))
 		return (efi_status_to_errno(status));
 
-	/* Filter handles to only include FreeBSD partitions. */
-	nin = sz / sizeof(EFI_HANDLE);
-	hout = hin + nin;
-	aliases = hout + nin;
-	nout = 0;
-	nrdisk = 0;
+	efipart_handles = hin;
+	efipart_nhandles = sz;
+	return (0);
+}
 
-	bzero(aliases, nin * sizeof(EFI_HANDLE));
-	pdinfo = malloc(nin * sizeof(*pdinfo));
-	if (pdinfo == NULL)
+static ACPI_HID_DEVICE_PATH *
+efipart_floppy(EFI_DEVICE_PATH *node)
+{
+	ACPI_HID_DEVICE_PATH *acpi = NULL;
+
+	if (DevicePathType(node) == ACPI_DEVICE_PATH &&
+	    DevicePathSubType(node) == ACPI_DP) {
+		acpi = (ACPI_HID_DEVICE_PATH *) node;
+		if (acpi->HID == EISA_PNP_ID(0x604) ||
+		    acpi->HID == EISA_PNP_ID(0x700) ||
+		    acpi->HID == EISA_ID(0x41d1, 0x701)) {
+			return (acpi);
+		}
+	}
+	return (acpi);
+}
+
+/*
+ * Add or update entries with new handle data.
+ */
+static int
+efipart_fdinfo_add(EFI_HANDLE handle, uint32_t uid, EFI_DEVICE_PATH *devpath)
+{
+	pdinfo_t *fd;
+
+	fd = malloc(sizeof(pdinfo_t));
+	if (fd == NULL) {
+		printf("Failed to register floppy %d, out of memory\n", uid);
 		return (ENOMEM);
+	}
+	memset(fd, 0, sizeof(pdinfo_t));
+	STAILQ_INIT(&fd->pd_part);
 
-	for (n = 0; n < nin; n++) {
-		devpath = efi_lookup_devpath(hin[n]);
-		if (devpath == NULL) {
+	fd->pd_unit = uid;
+	fd->pd_handle = handle;
+	fd->pd_devpath = devpath;
+	STAILQ_INSERT_TAIL(&fdinfo, fd, pd_link);
+	return (0);
+}
+
+static void
+efipart_updatefd(void)
+{
+	EFI_DEVICE_PATH *devpath, *node;
+	ACPI_HID_DEVICE_PATH *acpi;
+	int i, nin;
+
+	nin = efipart_nhandles / sizeof (*efipart_handles);
+	for (i = 0; i < nin; i++) {
+		devpath = efi_lookup_devpath(efipart_handles[i]);
+		if (devpath == NULL)
 			continue;
+
+		if ((node = efi_devpath_last_node(devpath)) == NULL)
+			continue;
+		if ((acpi = efipart_floppy(node)) != NULL) {
+			efipart_fdinfo_add(efipart_handles[i], acpi->UID,
+			    devpath);
 		}
+	}
+}
 
-		status = BS->HandleProtocol(hin[n], &blkio_guid,
-		    (void**)&blkio);
-		if (EFI_ERROR(status))
+static int
+efipart_initfd(void)
+{
+	int rv;
+
+	rv = efipart_inithandles();
+	if (rv != 0)
+		return (rv);
+	STAILQ_INIT(&fdinfo);
+
+	efipart_updatefd();
+
+	bcache_add_dev(efiblk_pdinfo_count(&fdinfo));
+	return (0);
+}
+
+/*
+ * Add or update entries with new handle data.
+ */
+static int
+efipart_cdinfo_add(EFI_HANDLE handle, EFI_HANDLE alias,
+    EFI_DEVICE_PATH *devpath)
+{
+	int unit;
+	pdinfo_t *cd;
+	pdinfo_t *pd;
+
+	unit = 0;
+	STAILQ_FOREACH(pd, &cdinfo, pd_link) {
+		if (efi_devpath_match(pd->pd_devpath, devpath) != 0) {
+			pd->pd_handle = handle;
+			pd->pd_alias = alias;
+			return (0);
+		}
+		unit++;
+	}
+
+	cd = malloc(sizeof(pdinfo_t));
+	if (cd == NULL) {
+		printf("Failed to add cd %d, out of memory\n", unit);
+		return (ENOMEM);
+	}
+	memset(cd, 0, sizeof(pdinfo_t));
+	STAILQ_INIT(&cd->pd_part);
+
+	cd->pd_handle = handle;
+	cd->pd_unit = unit;
+	cd->pd_alias = alias;
+	cd->pd_devpath = devpath;
+	STAILQ_INSERT_TAIL(&cdinfo, cd, pd_link);
+	return (0);
+}
+
+static void
+efipart_updatecd(void)
+{
+	int i, nin;
+	EFI_DEVICE_PATH *devpath, *devpathcpy, *tmpdevpath, *node;
+	EFI_HANDLE handle;
+	EFI_BLOCK_IO *blkio;
+	EFI_STATUS status;
+
+	nin = efipart_nhandles / sizeof (*efipart_handles);
+	for (i = 0; i < nin; i++) {
+		devpath = efi_lookup_devpath(efipart_handles[i]);
+		if (devpath == NULL)
 			continue;
-		if (!blkio->Media->LogicalPartition) {
-			nrdisk++;
+
+		if ((node = efi_devpath_last_node(devpath)) == NULL)
 			continue;
-		}
+		if (efipart_floppy(node) != NULL)
+			continue;
 
+		status = BS->HandleProtocol(efipart_handles[i],
+		    &blkio_guid, (void **)&blkio);
+		if (EFI_ERROR(status))
+			continue;
 		/*
 		 * If we come across a logical partition of subtype CDROM
 		 * it doesn't refer to the CD filesystem itself, but rather
 		 * to any usable El Torito boot image on it. In this case
 		 * we try to find the parent device and add that instead as
 		 * that will be the CD filesystem.
 		 */
-		node = efi_devpath_last_node(devpath);
 		if (DevicePathType(node) == MEDIA_DEVICE_PATH &&
 		    DevicePathSubType(node) == MEDIA_CDROM_DP) {
 			devpathcpy = efi_devpath_trim(devpath);
+			if (devpathcpy == NULL)
+				continue;
 			tmpdevpath = devpathcpy;
 			status = BS->LocateDevicePath(&blkio_guid, &tmpdevpath,
 			    &handle);
 			free(devpathcpy);
 			if (EFI_ERROR(status))
 				continue;
-			hout[nout] = handle;
-			aliases[nout] = hin[n];
-		} else
-			hout[nout] = hin[n];
-		nout++;
-		pdinfo[npdinfo].pd_open = 0;
-		pdinfo[npdinfo].pd_bcache = NULL;
-		pdinfo[npdinfo].pd_unit = npdinfo;
-		npdinfo++;
+			devpath = efi_lookup_devpath(handle);
+			efipart_cdinfo_add(handle, efipart_handles[i],
+			    devpath);
+			continue;
+		}
+
+		if (DevicePathType(node) == MESSAGING_DEVICE_PATH &&
+		    DevicePathSubType(node) == MSG_ATAPI_DP) {
+			efipart_cdinfo_add(efipart_handles[i], NULL,
+			    devpath);
+			continue;
+		}
+
+		/* USB or SATA cd without the media. */
+		if (blkio->Media->RemovableMedia &&
+		    !blkio->Media->MediaPresent) {
+			efipart_cdinfo_add(efipart_handles[i], NULL,
+			    devpath);
+		}
 	}
+}
 
-	bcache_add_dev(npdinfo);
-	err = efi_register_handles(&efipart_dev, hout, aliases, nout);
-	free(hin);
+static int
+efipart_initcd(void)
+{
+	int rv;
 
-	if (nout == 0 && nrdisk > 0)
-		printf("Found %d disk(s) but no logical partition\n", nrdisk);
-	return (err);
+	rv = efipart_inithandles();
+	if (rv != 0)
+		return (rv);
+	STAILQ_INIT(&cdinfo);
+
+	efipart_updatecd();
+
+	bcache_add_dev(efiblk_pdinfo_count(&cdinfo));
+	return (0);
 }
 
 static int
-efipart_print(int verbose)
+efipart_hdinfo_add(EFI_HANDLE disk_handle, EFI_HANDLE part_handle)
 {
-	char line[80];
+	EFI_DEVICE_PATH *disk_devpath, *part_devpath;
+	HARDDRIVE_DEVICE_PATH *node;
+	int unit;
+	pdinfo_t *hd, *pd, *last;
+
+	disk_devpath = efi_lookup_devpath(disk_handle);
+	part_devpath = efi_lookup_devpath(part_handle);
+	if (disk_devpath == NULL || part_devpath == NULL) {
+		return (ENOENT);
+	}
+	node = (HARDDRIVE_DEVICE_PATH *)efi_devpath_last_node(part_devpath);
+	if (node == NULL)
+		return (ENOENT);	/* This should not happen. */
+
+	pd = malloc(sizeof(pdinfo_t));
+	if (pd == NULL) {
+		printf("Failed to add disk, out of memory\n");
+		return (ENOMEM);
+	}
+	memset(pd, 0, sizeof(pdinfo_t));
+	STAILQ_INIT(&pd->pd_part);
+
+	STAILQ_FOREACH(hd, &hdinfo, pd_link) {
+		if (efi_devpath_match(hd->pd_devpath, disk_devpath) != 0) {
+			/* Add the partition. */
+			pd->pd_handle = part_handle;
+			pd->pd_unit = node->PartitionNumber;
+			pd->pd_devpath = part_devpath;
+			STAILQ_INSERT_TAIL(&hd->pd_part, pd, pd_link);
+			return (0);
+		}
+	}
+
+	last = STAILQ_LAST(&hdinfo, pdinfo, pd_link);
+	if (last != NULL)
+		unit = last->pd_unit + 1;
+	else
+		unit = 0;
+
+	/* Add the disk. */
+	hd = pd;
+	hd->pd_handle = disk_handle;
+	hd->pd_unit = unit;
+	hd->pd_devpath = disk_devpath;
+	STAILQ_INSERT_TAIL(&hdinfo, hd, pd_link);
+
+	pd = malloc(sizeof(pdinfo_t));
+	if (pd == NULL) {
+		printf("Failed to add partition, out of memory\n");
+		return (ENOMEM);
+	}
+	memset(pd, 0, sizeof(pdinfo_t));
+	STAILQ_INIT(&pd->pd_part);
+
+	/* Add the partition. */
+	pd->pd_handle = part_handle;
+	pd->pd_unit = node->PartitionNumber;
+	pd->pd_devpath = part_devpath;
+	STAILQ_INSERT_TAIL(&hd->pd_part, pd, pd_link);
+
+	return (0);
+}
+
+/*
+ * The MEDIA_FILEPATH_DP has device name.
+ * From U-Boot sources it looks like names are in the form
+ * of typeN:M, where type is interface type, N is disk id
+ * and M is partition id.
+ */
+static int
+efipart_hdinfo_add_filepath(EFI_HANDLE disk_handle)
+{
+	EFI_DEVICE_PATH *devpath;
+	FILEPATH_DEVICE_PATH *node;
+	char *pathname, *p;
+	int unit, len;
+	pdinfo_t *pd, *last;
+
+	/* First collect and verify all the data */
+	if ((devpath = efi_lookup_devpath(disk_handle)) == NULL)
+		return (ENOENT);
+	node = (FILEPATH_DEVICE_PATH *)efi_devpath_last_node(devpath);
+	if (node == NULL)
+		return (ENOENT);	/* This should not happen. */
+
+	pd = malloc(sizeof(pdinfo_t));
+	if (pd == NULL) {
+		printf("Failed to add disk, out of memory\n");
+		return (ENOMEM);
+	}
+	memset(pd, 0, sizeof(pdinfo_t));
+	STAILQ_INIT(&pd->pd_part);
+	last = STAILQ_LAST(&hdinfo, pdinfo, pd_link);
+	if (last != NULL)
+		unit = last->pd_unit + 1;
+	else
+		unit = 0;
+
+	/* FILEPATH_DEVICE_PATH has 0 terminated string */
+	for (len = 0; node->PathName[len] != 0; len++)
+		;
+	if ((pathname = malloc(len + 1)) == NULL) {
+		printf("Failed to add disk, out of memory\n");
+		free(pd);
+		return (ENOMEM);
+	}
+	cpy16to8(node->PathName, pathname, len + 1);
+	p = strchr(pathname, ':');
+
+	/*
+	 * Assume we are receiving handles in order, first disk handle,
+	 * then partitions for this disk. If this assumption proves
+	 * false, this code would need update.
+	 */
+	if (p == NULL) {	/* no colon, add the disk */
+		pd->pd_handle = disk_handle;
+		pd->pd_unit = unit;
+		pd->pd_devpath = devpath;
+		STAILQ_INSERT_TAIL(&hdinfo, pd, pd_link);
+		free(pathname);
+		return (0);
+	}
+	p++;	/* skip the colon */
+	unit = (int)strtol(p, NULL, 0);
+
+	/*
+	 * We should have disk registered, if not, we are receiving
+	 * handles out of order, and this code should be reworked
+	 * to create "blank" disk for partition, and to find the
+	 * disk based on PathName compares.
+	 */
+	if (last == NULL) {
+		printf("BUG: No disk for partition \"%s\"\n", pathname);
+		free(pathname);
+		free(pd);
+		return (EINVAL);
+	}
+	/* Add the partition. */
+	pd->pd_handle = disk_handle;
+	pd->pd_unit = unit;
+	pd->pd_devpath = devpath;
+	STAILQ_INSERT_TAIL(&last->pd_part, pd, pd_link);
+	free(pathname);
+	return (0);
+}
+
+static void
+efipart_updatehd(void)
+{
+	int i, nin;
+	EFI_DEVICE_PATH *devpath, *devpathcpy, *tmpdevpath, *node;
+	EFI_HANDLE handle;
 	EFI_BLOCK_IO *blkio;
-	EFI_HANDLE h;
 	EFI_STATUS status;
-	u_int unit;
+
+	nin = efipart_nhandles / sizeof (*efipart_handles);
+	for (i = 0; i < nin; i++) {
+		devpath = efi_lookup_devpath(efipart_handles[i]);
+		if (devpath == NULL)
+			continue;
+
+		if ((node = efi_devpath_last_node(devpath)) == NULL)
+			continue;
+		if (efipart_floppy(node) != NULL)
+			continue;
+
+		status = BS->HandleProtocol(efipart_handles[i],
+		    &blkio_guid, (void **)&blkio);
+		if (EFI_ERROR(status))
+			continue;
+
+		if (DevicePathType(node) == MEDIA_DEVICE_PATH &&
+		    DevicePathSubType(node) == MEDIA_HARDDRIVE_DP) {
+			devpathcpy = efi_devpath_trim(devpath);
+			if (devpathcpy == NULL)
+				continue;
+			tmpdevpath = devpathcpy;
+			status = BS->LocateDevicePath(&blkio_guid, &tmpdevpath,
+			    &handle);
+			free(devpathcpy);
+			if (EFI_ERROR(status))
+				continue;
+			/*
+			 * We do not support nested partitions.
+			 */
+			devpathcpy = efi_lookup_devpath(handle);
+			if (devpathcpy == NULL)
+				continue;
+			if ((node = efi_devpath_last_node(devpathcpy)) == NULL)
+				continue;
+			if (DevicePathType(node) == MEDIA_DEVICE_PATH &&
+			    DevicePathSubType(node) == MEDIA_HARDDRIVE_DP)
+				continue;
+			efipart_hdinfo_add(handle, efipart_handles[i]);
+			continue;
+		}
+
+		if (DevicePathType(node) == MEDIA_DEVICE_PATH &&
+		    DevicePathSubType(node) == MEDIA_FILEPATH_DP) {
+			efipart_hdinfo_add_filepath(efipart_handles[i]);
+			continue;
+		}
+	}
+}
+
+static int
+efipart_inithd(void)
+{
+	int rv;
+
+	rv = efipart_inithandles();
+	if (rv != 0)
+		return (rv);
+	STAILQ_INIT(&hdinfo);
+
+	efipart_updatehd();
+
+	bcache_add_dev(efiblk_pdinfo_count(&hdinfo));
+	return (0);
+}
+
+static int
+efipart_print_common(struct devsw *dev, pdinfo_list_t *pdlist, int verbose)
+{
 	int ret = 0;
+	EFI_BLOCK_IO *blkio;
+	EFI_STATUS status;
+	EFI_HANDLE h;
+	pdinfo_t *pd;
+	CHAR16 *text;
+	struct disk_devdesc pd_dev;
+	char line[80];
 
-	printf("%s devices:", efipart_dev.dv_name);
+	if (STAILQ_EMPTY(pdlist))
+		return (0);
+
+	printf("%s devices:", dev->dv_name);
 	if ((ret = pager_output("\n")) != 0)
 		return (ret);
 
-	for (unit = 0, h = efi_find_handle(&efipart_dev, 0);
-	    h != NULL; h = efi_find_handle(&efipart_dev, ++unit)) {
-		snprintf(line, sizeof(line), "    %s%d:",
-		    efipart_dev.dv_name, unit);
-		if ((ret = pager_output(line)) != 0)
-			break;
-
+	STAILQ_FOREACH(pd, pdlist, pd_link) {
+		h = pd->pd_handle;
+		if (verbose) {	/* Output the device path. */
+			text = efi_devpath_name(efi_lookup_devpath(h));
+			if (text != NULL) {
+				printf("  %S", text);
+				efi_free_devpath_name(text);
+				if ((ret = pager_output("\n")) != 0)
+					break;
+			}
+		}
+		snprintf(line, sizeof(line),
+		    "    %s%d", dev->dv_name, pd->pd_unit);
+		printf("%s:", line);
 		status = BS->HandleProtocol(h, &blkio_guid, (void **)&blkio);
 		if (!EFI_ERROR(status)) {
-			snprintf(line, sizeof(line), "    %llu blocks",
-			    (unsigned long long)(blkio->Media->LastBlock + 1));
-			if ((ret = pager_output(line)) != 0)
+			printf("    %llu",
+			    blkio->Media->LastBlock == 0? 0:
+			    (unsigned long long) (blkio->Media->LastBlock + 1));
+			if (blkio->Media->LastBlock != 0) {
+				printf(" X %u", blkio->Media->BlockSize);
+			}
+			printf(" blocks");
+			if (blkio->Media->MediaPresent) {
+				if (blkio->Media->RemovableMedia)
+					printf(" (removable)");
+			} else
+				printf(" (no media)");
+			if ((ret = pager_output("\n")) != 0)
 				break;
-			if (blkio->Media->RemovableMedia)
-				if ((ret = pager_output(" (removable)")) != 0)
-					break;
+			if (!blkio->Media->MediaPresent)
+				continue;
+
+			pd->pd_blkio = blkio;
+			pd_dev.d_dev = dev;
+			pd_dev.d_unit = pd->pd_unit;
+			pd_dev.d_slice = -1;
+			pd_dev.d_partition = -1;
+			pd_dev.d_opendata = blkio;
+			ret = disk_open(&pd_dev, blkio->Media->BlockSize *
+			    (blkio->Media->LastBlock + 1),
+			    blkio->Media->BlockSize);
+			if (ret == 0) {
+				ret = disk_print(&pd_dev, line, verbose);
+				disk_close(&pd_dev);
+				if (ret != 0)
+					return (ret);
+			} else {
+				/* Do not fail from disk_open() */
+				ret = 0;
+			}
+		} else {
+			if ((ret = pager_output("\n")) != 0)
+				break;
 		}
-		if ((ret = pager_output("\n")) != 0)
-			break;
 	}
 	return (ret);
 }
 
 static int
+efipart_printfd(int verbose)
+{
+	return (efipart_print_common(&efipart_fddev, &fdinfo, verbose));
+}
+
+static int
+efipart_printcd(int verbose)
+{
+	return (efipart_print_common(&efipart_cddev, &cdinfo, verbose));
+}
+
+static int
+efipart_printhd(int verbose)
+{
+	return (efipart_print_common(&efipart_hddev, &hdinfo, verbose));
+}
+
+pdinfo_list_t *
+efiblk_get_pdinfo_list(struct devsw *dev)
+{
+	if (dev->dv_type == DEVT_DISK)
+		return (&hdinfo);
+	if (dev->dv_type == DEVT_CD)
+		return (&cdinfo);
+	if (dev->dv_type == DEVT_FD)
+		return (&fdinfo);
+	return (NULL);
+}
+
+static int
 efipart_open(struct open_file *f, ...)
 {
 	va_list args;
-	struct devdesc *dev;
+	struct disk_devdesc *dev;
+	pdinfo_list_t *pdi;
+	pdinfo_t *pd;
 	EFI_BLOCK_IO *blkio;
-	EFI_HANDLE h;
 	EFI_STATUS status;
 
 	va_start(args, f);
-	dev = va_arg(args, struct devdesc*);
+	dev = va_arg(args, struct disk_devdesc*);
 	va_end(args);
+	if (dev == NULL)
+		return (EINVAL);
 
-	h = efi_find_handle(&efipart_dev, dev->d_unit);
-	if (h == NULL)
+	pdi = efiblk_get_pdinfo_list(dev->d_dev);
+	if (pdi == NULL)
 		return (EINVAL);
 
-	status = BS->HandleProtocol(h, &blkio_guid, (void **)&blkio);
-	if (EFI_ERROR(status))
-		return (efi_status_to_errno(status));
+	pd = efiblk_get_pdinfo(pdi, dev->d_unit);
+	if (pd == NULL)
+		return (EIO);
 
+	if (pd->pd_blkio == NULL) {
+		status = BS->HandleProtocol(pd->pd_handle, &blkio_guid,
+		    (void **)&pd->pd_blkio);
+		if (EFI_ERROR(status))
+			return (efi_status_to_errno(status));
+	}
+
+	blkio = pd->pd_blkio;
 	if (!blkio->Media->MediaPresent)
 		return (EAGAIN);
 
-	dev->d_opendata = blkio;
-	PD(dev).pd_open++;
-	if (PD(dev).pd_bcache == NULL)
-		PD(dev).pd_bcache = bcache_allocate();
+	pd->pd_open++;
+	if (pd->pd_bcache == NULL)
+		pd->pd_bcache = bcache_allocate();
+
+	if (dev->d_dev->dv_type == DEVT_DISK) {
+		return (disk_open(dev,
+		    blkio->Media->BlockSize * (blkio->Media->LastBlock + 1),
+		    blkio->Media->BlockSize));
+	}
 	return (0);
 }
 
 static int
 efipart_close(struct open_file *f)
 {
-	struct devdesc *dev;
+	struct disk_devdesc *dev;
+	pdinfo_list_t *pdi;
+	pdinfo_t *pd;
 
-	dev = (struct devdesc *)(f->f_devdata);
-	if (dev->d_opendata == NULL)
+	dev = (struct disk_devdesc *)(f->f_devdata);
+	if (dev == NULL)
 		return (EINVAL);
+	pdi = efiblk_get_pdinfo_list(dev->d_dev);
+	if (pdi == NULL)
+		return (EINVAL);
 
-	dev->d_opendata = NULL;
-	PD(dev).pd_open--;
-	if (PD(dev).pd_open == 0) {
-		bcache_free(PD(dev).pd_bcache);
-		PD(dev).pd_bcache = NULL;
+	pd = efiblk_get_pdinfo(pdi, dev->d_unit);
+	if (pd == NULL)
+		return (EINVAL);
+
+	pd->pd_open--;
+	if (pd->pd_open == 0) {
+		pd->pd_blkio = NULL;
+		bcache_free(pd->pd_bcache);
+		pd->pd_bcache = NULL;
 	}
+	if (dev->d_dev->dv_type == DEVT_DISK)
+		return (disk_close(dev));
 	return (0);
 }
 
+static int
+efipart_ioctl(struct open_file *f, u_long cmd, void *data)
+{
+	struct disk_devdesc *dev;
+	pdinfo_list_t *pdi;
+	pdinfo_t *pd;
+	int rc;
+
+	dev = (struct disk_devdesc *)(f->f_devdata);
+	if (dev == NULL)
+		return (EINVAL);
+	pdi = efiblk_get_pdinfo_list(dev->d_dev);
+	if (pdi == NULL)
+		return (EINVAL);
+
+	pd = efiblk_get_pdinfo(pdi, dev->d_unit);
+	if (pd == NULL)
+		return (EINVAL);
+
+	if (dev->d_dev->dv_type == DEVT_DISK) {
+		rc = disk_ioctl(dev, cmd, data);
+		if (rc != ENOTTY)
+			return (rc);
+	}
+
+	switch (cmd) {
+	case DIOCGSECTORSIZE:
+		*(u_int *)data = pd->pd_blkio->Media->BlockSize;
+		break;
+	case DIOCGMEDIASIZE:
+		*(uint64_t *)data = pd->pd_blkio->Media->BlockSize *
+		    (pd->pd_blkio->Media->LastBlock + 1);
+		break;
+	default:
+		return (ENOTTY);
+	}
+
+	return (0);
+}
+
 /*
  * efipart_readwrite()
  * Internal equivalent of efipart_strategy(), which operates on the
  * media-native block size. This function expects all I/O requests
  * to be within the media size and returns an error if such is not
  * the case.
  */
 static int
 efipart_readwrite(EFI_BLOCK_IO *blkio, int rw, daddr_t blk, daddr_t nblks,
     char *buf)
 {
 	EFI_STATUS status;
 
 	if (blkio == NULL)
 		return (ENXIO);
 	if (blk < 0 || blk > blkio->Media->LastBlock)
 		return (EIO);
 	if ((blk + nblks - 1) > blkio->Media->LastBlock)
 		return (EIO);
 
 	switch (rw) {
 	case F_READ:
 		status = blkio->ReadBlocks(blkio, blkio->Media->MediaId, blk,
 		    nblks * blkio->Media->BlockSize, buf);
 		break;
 	case F_WRITE:
 		if (blkio->Media->ReadOnly)
 			return (EROFS);
 		status = blkio->WriteBlocks(blkio, blkio->Media->MediaId, blk,
 		    nblks * blkio->Media->BlockSize, buf);
 		break;
 	default:
 		return (ENOSYS);
 	}
 
-	if (EFI_ERROR(status))
-		printf("%s: rw=%d, status=%lu\n", __func__, rw, (u_long)status);
+	if (EFI_ERROR(status)) {
+		printf("%s: rw=%d, blk=%ju size=%ju status=%lu\n", __func__, rw,
+		    blk, nblks, EFI_ERROR_CODE(status));
+	}
 	return (efi_status_to_errno(status));
 }
 
 static int
 efipart_strategy(void *devdata, int rw, daddr_t blk, size_t size,
     char *buf, size_t *rsize)
 {
 	struct bcache_devdata bcd;
-	struct devdesc *dev;
+	struct disk_devdesc *dev;
+	pdinfo_list_t *pdi;
+	pdinfo_t *pd;
 
-	dev = (struct devdesc *)devdata;
+	dev = (struct disk_devdesc *)devdata;
+	if (dev == NULL)
+		return (EINVAL);
+	pdi = efiblk_get_pdinfo_list(dev->d_dev);
+	if (pdi == NULL)
+		return (EINVAL);
+
+	pd = efiblk_get_pdinfo(pdi, dev->d_unit);
+	if (pd == NULL)
+		return (EINVAL);
+
+	if (pd->pd_blkio->Media->RemovableMedia &&
+	    !pd->pd_blkio->Media->MediaPresent)
+		return (EIO);
+
 	bcd.dv_strategy = efipart_realstrategy;
 	bcd.dv_devdata = devdata;
-	bcd.dv_cache = PD(dev).pd_bcache;
+	bcd.dv_cache = pd->pd_bcache;
+
+	if (dev->d_dev->dv_type == DEVT_DISK) {
+		return (bcache_strategy(&bcd, rw, blk + dev->d_offset,
+		    size, buf, rsize));
+	}
 	return (bcache_strategy(&bcd, rw, blk, size, buf, rsize));
 }
 
 static int
 efipart_realstrategy(void *devdata, int rw, daddr_t blk, size_t size,
     char *buf, size_t *rsize)
 {
-	struct devdesc *dev = (struct devdesc *)devdata;
+	struct disk_devdesc *dev = (struct disk_devdesc *)devdata;
+	pdinfo_list_t *pdi;
+	pdinfo_t *pd;
 	EFI_BLOCK_IO *blkio;
-	off_t off;
+	uint64_t off, disk_blocks, d_offset = 0;
 	char *blkbuf;
 	size_t blkoff, blksz;
 	int error;
 
 	if (dev == NULL || blk < 0)
 		return (EINVAL);
 
-	blkio = dev->d_opendata;
+	pdi = efiblk_get_pdinfo_list(dev->d_dev);
+	if (pdi == NULL)
+		return (EINVAL);
+
+	pd = efiblk_get_pdinfo(pdi, dev->d_unit);
+	if (pd == NULL)
+		return (EINVAL);
+
+	blkio = pd->pd_blkio;
 	if (blkio == NULL)
 		return (ENXIO);
 
 	if (size == 0 || (size % 512) != 0)
 		return (EIO);
 
 	off = blk * 512;
+	/*
+	 * Get disk blocks, this value is either for whole disk or for
+	 * partition.
+	 */
+	disk_blocks = 0;
+	if (dev->d_dev->dv_type == DEVT_DISK) {
+		if (disk_ioctl(dev, DIOCGMEDIASIZE, &disk_blocks) == 0) {
+			/* DIOCGMEDIASIZE does return bytes. */
+			disk_blocks /= blkio->Media->BlockSize;
+		}
+		d_offset = dev->d_offset;
+	}
+	if (disk_blocks == 0)
+		disk_blocks = blkio->Media->LastBlock + 1 - d_offset;
+
 	/* make sure we don't read past disk end */
-	if ((off + size) / blkio->Media->BlockSize - 1 >
-	    blkio->Media->LastBlock) {
-		size = blkio->Media->LastBlock + 1 -
-		    off / blkio->Media->BlockSize;
+	if ((off + size) / blkio->Media->BlockSize > d_offset + disk_blocks) {
+		size = d_offset + disk_blocks - off / blkio->Media->BlockSize;
 		size = size * blkio->Media->BlockSize;
 	}
 
 	if (rsize != NULL)
 		*rsize = size;
 
-        if ((size % blkio->Media->BlockSize == 0) &&
-	    ((blk * 512) % blkio->Media->BlockSize == 0))
-                return (efipart_readwrite(blkio, rw,
-		    blk * 512 / blkio->Media->BlockSize,
+	if ((size % blkio->Media->BlockSize == 0) &&
+	    (off % blkio->Media->BlockSize == 0))
+		return (efipart_readwrite(blkio, rw,
+		    off / blkio->Media->BlockSize,
 		    size / blkio->Media->BlockSize, buf));
 
 	/*
 	 * The block size of the media is not a multiple of I/O.
 	 */
 	blkbuf = malloc(blkio->Media->BlockSize);
 	if (blkbuf == NULL)
 		return (ENOMEM);
 
 	error = 0;
 	blk = off / blkio->Media->BlockSize;
 	blkoff = off % blkio->Media->BlockSize;
 	blksz = blkio->Media->BlockSize - blkoff;
 	while (size > 0) {
 		error = efipart_readwrite(blkio, rw, blk, 1, blkbuf);
 		if (error)
 			break;
 		if (size < blksz)
 			blksz = size;
 		bcopy(blkbuf + blkoff, buf, blksz);
 		buf += blksz;
 		size -= blksz;
 		blk++;
 		blkoff = 0;
 		blksz = blkio->Media->BlockSize;
 	}
 
 	free(blkbuf);
 	return (error);
 }
Index: stable/11/sys/boot/efi/libefi/env.c
===================================================================
--- stable/11/sys/boot/efi/libefi/env.c	(revision 329098)
+++ stable/11/sys/boot/efi/libefi/env.c	(revision 329099)
@@ -1,234 +1,534 @@
 /*
  * Copyright (c) 2015 Netflix, Inc. All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include <sys/param.h>
 #include <stand.h>
 #include <string.h>
 #include <efi.h>
 #include <efilib.h>
 #include <uuid.h>
+#include <stdbool.h>
 #include "bootstrap.h"
+#ifdef BOOT_FORTH
 #include "ficl.h"
+#endif
 
-int efi_variable_support = 1;
-
 /*
  * Simple wrappers to the underlying UEFI functions.
  * See http://wiki.phoenix.com/wiki/index.php/EFI_RUNTIME_SERVICES
  * for details.
  */
 EFI_STATUS
-efi_get_next_variable_name(UINTN *variable_name_size, CHAR16 *variable_name, EFI_GUID *vendor_guid)
+efi_get_next_variable_name(UINTN *variable_name_size, CHAR16 *variable_name,
+    EFI_GUID *vendor_guid)
 {
-	return RS->GetNextVariableName(variable_name_size, variable_name, vendor_guid);
+	return (RS->GetNextVariableName(variable_name_size, variable_name,
+	    vendor_guid));
 }
 
 EFI_STATUS
-efi_get_variable(CHAR16 *variable_name, EFI_GUID *vendor_guid, UINT32 *attributes, UINTN *data_size,
-    void *data)
+efi_get_variable(CHAR16 *variable_name, EFI_GUID *vendor_guid,
+    UINT32 *attributes, UINTN *data_size, void *data)
 {
-	return RS->GetVariable(variable_name, vendor_guid, attributes, data_size, data);
+	return (RS->GetVariable(variable_name, vendor_guid, attributes,
+	    data_size, data));
 }
 
 EFI_STATUS
-efi_set_variable(CHAR16 *variable_name, EFI_GUID *vendor_guid, UINT32 attributes, UINTN data_size,
-    void *data)
+efi_set_variable(CHAR16 *variable_name, EFI_GUID *vendor_guid,
+    UINT32 attributes, UINTN data_size, void *data)
 {
-	return RS->SetVariable(variable_name, vendor_guid, attributes, data_size, data);
+	return (RS->SetVariable(variable_name, vendor_guid, attributes,
+	    data_size, data));
 }
 
+void
+efi_init_environment(void)
+{
+	char var[128];
+
+	snprintf(var, sizeof(var), "%d.%02d", ST->Hdr.Revision >> 16,
+	    ST->Hdr.Revision & 0xffff);
+	env_setenv("efi-version", EV_VOLATILE, var, env_noset, env_nounset);
+}
+
+COMMAND_SET(efishow, "efi-show", "print some or all EFI variables", command_efi_show);
+
+static int
+efi_print_var(CHAR16 *varnamearg, EFI_GUID *matchguid, int lflag)
+{
+	UINTN		datasz, i;
+	EFI_STATUS	status;
+	UINT32		attr;
+	CHAR16		*data;
+	char		*str;
+	uint32_t	uuid_status;
+	int		is_ascii;
+
+	datasz = 0;
+	status = RS->GetVariable(varnamearg, matchguid, &attr,
+	    &datasz, NULL);
+	if (status != EFI_BUFFER_TOO_SMALL) {
+		printf("Can't get the variable: error %#lx\n",
+		    EFI_ERROR_CODE(status));
+		return (CMD_ERROR);
+	}
+	data = malloc(datasz);
+	status = RS->GetVariable(varnamearg, matchguid, &attr,
+	    &datasz, data);
+	if (status != EFI_SUCCESS) {
+		printf("Can't get the variable: error %#lx\n",
+		    EFI_ERROR_CODE(status));
+		return (CMD_ERROR);
+	}
+	uuid_to_string((uuid_t *)matchguid, &str, &uuid_status);
+	if (lflag) {
+		printf("%s 0x%x %S", str, attr, varnamearg);
+	} else {
+		printf("%s 0x%x %S=", str, attr, varnamearg);
+		is_ascii = 1;
+		free(str);
+		str = (char *)data;
+		for (i = 0; i < datasz - 1; i++) {
+			/* Quick hack to see if this ascii-ish string printable range plus tab, cr and lf */
+			if ((str[i] < 32 || str[i] > 126) && str[i] != 9 && str[i] != 10 && str[i] != 13) {
+				is_ascii = 0;
+				break;
+			}
+		}
+		if (str[datasz - 1] != '\0')
+			is_ascii = 0;
+		if (is_ascii)
+			printf("%s", str);
+		else {
+			for (i = 0; i < datasz / 2; i++) {
+				if (isalnum(data[i]) || isspace(data[i]))
+					printf("%c", data[i]);
+				else
+					printf("\\x%02x", data[i]);
+			}
+		}
+	}
+	free(data);
+	if (pager_output("\n"))
+		return (CMD_WARN);
+	return (CMD_OK);
+}
+
+static int
+command_efi_show(int argc, char *argv[])
+{
+	/*
+	 * efi-show [-a]
+	 *	print all the env
+	 * efi-show -u UUID
+	 *	print all the env vars tagged with UUID
+	 * efi-show -v var
+	 *	search all the env vars and print the ones matching var
+	 * eif-show -u UUID -v var
+	 * eif-show UUID var
+	 *	print all the env vars that match UUID and var
+	 */
+	/* NB: We assume EFI_GUID is the same as uuid_t */
+	int		aflag = 0, gflag = 0, lflag = 0, vflag = 0;
+	int		ch, rv;
+	unsigned	i;
+	EFI_STATUS	status;
+	EFI_GUID	varguid = { 0,0,0,{0,0,0,0,0,0,0,0} };
+	EFI_GUID	matchguid = { 0,0,0,{0,0,0,0,0,0,0,0} };
+	uint32_t	uuid_status;
+	CHAR16		*varname;
+	CHAR16		*newnm;
+	CHAR16		varnamearg[128];
+	UINTN		varalloc;
+	UINTN		varsz;
+
+	while ((ch = getopt(argc, argv, "ag:lv:")) != -1) {
+		switch (ch) {
+		case 'a':
+			aflag = 1;
+			break;
+		case 'g':
+			gflag = 1;
+			uuid_from_string(optarg, (uuid_t *)&matchguid,
+			    &uuid_status);
+			if (uuid_status != uuid_s_ok) {
+				printf("uid %s could not be parsed\n", optarg);
+				return (CMD_ERROR);
+			}
+			break;
+		case 'l':
+			lflag = 1;
+			break;
+		case 'v':
+			vflag = 1;
+			if (strlen(optarg) >= nitems(varnamearg)) {
+				printf("Variable %s is longer than %zd characters\n",
+				    optarg, nitems(varnamearg));
+				return (CMD_ERROR);
+			}
+			for (i = 0; i < strlen(optarg); i++)
+				varnamearg[i] = optarg[i];
+			varnamearg[i] = 0;
+			break;
+		default:
+			printf("Invalid argument %c\n", ch);
+			return (CMD_ERROR);
+		}
+	}
+
+	if (aflag && (gflag || vflag)) {
+		printf("-a isn't compatible with -v or -u\n");
+		return (CMD_ERROR);
+	}
+
+	if (aflag && optind < argc) {
+		printf("-a doesn't take any args\n");
+		return (CMD_ERROR);
+	}
+
+	if (optind == argc)
+		aflag = 1;
+
+	argc -= optind;
+	argv += optind;
+
+	pager_open();
+	if (vflag && gflag) {
+		rv = efi_print_var(varnamearg, &matchguid, lflag);
+		pager_close();
+		return (rv);
+	}
+
+	if (argc == 2) {
+		optarg = argv[0];
+		if (strlen(optarg) >= nitems(varnamearg)) {
+			printf("Variable %s is longer than %zd characters\n",
+			    optarg, nitems(varnamearg));
+			pager_close();
+			return (CMD_ERROR);
+		}
+		for (i = 0; i < strlen(optarg); i++)
+			varnamearg[i] = optarg[i];
+		varnamearg[i] = 0;
+		optarg = argv[1];
+		uuid_from_string(optarg, (uuid_t *)&matchguid,
+		    &uuid_status);
+		if (uuid_status != uuid_s_ok) {
+			printf("uid %s could not be parsed\n", optarg);
+			pager_close();
+			return (CMD_ERROR);
+		}
+		rv = efi_print_var(varnamearg, &matchguid, lflag);
+		pager_close();
+		return (rv);
+	}
+
+	if (argc > 0) {
+		printf("Too many args %d\n", argc);
+		pager_close();
+		return (CMD_ERROR);
+	}
+
+	/*
+	 * Initiate the search -- note the standard takes pain
+	 * to specify the initial call must be a poiner to a NULL
+	 * character.
+	 */
+	varalloc = 1024;
+	varname = malloc(varalloc);
+	if (varname == NULL) {
+		printf("Can't allocate memory to get variables\n");
+		pager_close();
+		return (CMD_ERROR);
+	}
+	varname[0] = 0;
+	while (1) {
+		varsz = varalloc;
+		status = RS->GetNextVariableName(&varsz, varname, &varguid);
+		if (status == EFI_BUFFER_TOO_SMALL) {
+			varalloc = varsz;
+			newnm = realloc(varname, varalloc);
+			if (newnm == NULL) {
+				printf("Can't allocate memory to get variables\n");
+				free(varname);
+				pager_close();
+				return (CMD_ERROR);
+			}
+			varname = newnm;
+			continue; /* Try again with bigger buffer */
+		}
+		if (status != EFI_SUCCESS)
+			break;
+		if (aflag) {
+			if (efi_print_var(varname, &varguid, lflag) != CMD_OK)
+				break;
+			continue;
+		}
+		if (vflag) {
+			if (wcscmp(varnamearg, varname) == 0) {
+				if (efi_print_var(varname, &varguid, lflag) != CMD_OK)
+					break;
+				continue;
+			}
+		}
+		if (gflag) {
+			if (memcmp(&varguid, &matchguid, sizeof(varguid)) == 0) {
+				if (efi_print_var(varname, &varguid, lflag) != CMD_OK)
+					break;
+				continue;
+			}
+		}
+	}
+	free(varname);
+	pager_close();
+
+	return (CMD_OK);
+}
+
+COMMAND_SET(efiset, "efi-set", "set EFI variables", command_efi_set);
+
+static int
+command_efi_set(int argc, char *argv[])
+{
+	char *uuid, *var, *val;
+	CHAR16 wvar[128];
+	EFI_GUID guid;
+	uint32_t status;
+	EFI_STATUS err;
+
+	if (argc != 4) {
+		printf("efi-set uuid var new-value\n");
+		return (CMD_ERROR);
+	}
+	uuid = argv[1];
+	var = argv[2];
+	val = argv[3];
+	uuid_from_string(uuid, (uuid_t *)&guid, &status);
+	if (status != uuid_s_ok) {
+		printf("Invalid uuid %s %d\n", uuid, status);
+		return (CMD_ERROR);
+	}
+	cpy8to16(var, wvar, sizeof(wvar));
+	err = RS->SetVariable(wvar, &guid,
+	    EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_RUNTIME_ACCESS | EFI_VARIABLE_BOOTSERVICE_ACCESS,
+	    strlen(val) + 1, val);
+	if (EFI_ERROR(err)) {
+		printf("Failed to set variable: error %lu\n", EFI_ERROR_CODE(err));
+		return (CMD_ERROR);
+	}
+	return (CMD_OK);
+}
+
+COMMAND_SET(efiunset, "efi-unset", "delete / unset EFI variables", command_efi_unset);
+
+static int
+command_efi_unset(int argc, char *argv[])
+{
+	char *uuid, *var;
+	CHAR16 wvar[128];
+	EFI_GUID guid;
+	uint32_t status;
+	EFI_STATUS err;
+
+	if (argc != 3) {
+		printf("efi-unset uuid var\n");
+		return (CMD_ERROR);
+	}
+	uuid = argv[1];
+	var = argv[2];
+	uuid_from_string(uuid, (uuid_t *)&guid, &status);
+	if (status != uuid_s_ok) {
+		printf("Invalid uuid %s\n", uuid);
+		return (CMD_ERROR);
+	}
+	cpy8to16(var, wvar, sizeof(wvar));
+	err = RS->SetVariable(wvar, &guid, 0, 0, NULL);
+	if (EFI_ERROR(err)) {
+		printf("Failed to unset variable: error %lu\n", EFI_ERROR_CODE(err));
+		return (CMD_ERROR);
+	}
+	return (CMD_OK);
+}
+
+#ifdef BOOT_FORTH
 /*
- *		FreeBSD's loader interaction words and extras
+ * FreeBSD's loader interaction words and extras
  *
- * 		efi-setenv  ( value n name n guid n attr -- 0 | -1)
- * 		efi-getenv  ( guid n addr n -- addr' n' | -1 )
- * 		efi-unsetenv ( name n guid n'' -- )
+ *	efi-setenv  ( value n name n guid n attr -- 0 | -1)
+ * 	efi-getenv  ( guid n addr n -- addr' n' | -1 )
+ * 	efi-unsetenv ( name n guid n'' -- )
  */
 
 /*
  * efi-setenv
- * 		efi-setenv  ( value n name n guid n attr -- 0 | -1)
+ * 	efi-setenv  ( value n name n guid n attr -- 0 | -1)
  *
  * Set environment variables using the SetVariable EFI runtime service.
  *
  * Value and guid are passed through in binary form (so guid needs to be
  * converted to binary form from its string form). Name is converted from
  * ASCII to CHAR16. Since ficl doesn't have support for internationalization,
  * there's no native CHAR16 interface provided.
  *
  * attr is an int in the bitmask of the following attributes for this variable.
  *
  *	1	Non volatile
  *	2	Boot service access
  *	4	Run time access
  * (corresponding to the same bits in the UEFI spec).
  */
-void
+static void
 ficlEfiSetenv(FICL_VM *pVM)
 {
-#ifndef TESTMAIN
 	char	*value = NULL, *guid = NULL;
 	CHAR16	*name = NULL;
 	int	i;
-#endif
 	char	*namep, *valuep, *guidp;
 	int	names, values, guids, attr;
-	int	status;
+	EFI_STATUS status;
 	uuid_t	u;
 	uint32_t ustatus;
+	bool	error = true;
 
 #if FICL_ROBUST > 1
 	vmCheckStack(pVM, 6, 0);
 #endif
 	attr = stackPopINT(pVM->pStack);
 	guids = stackPopINT(pVM->pStack);
 	guidp = (char*)stackPopPtr(pVM->pStack);
 	names = stackPopINT(pVM->pStack);
 	namep = (char*)stackPopPtr(pVM->pStack);
 	values = stackPopINT(pVM->pStack);
 	valuep = (char*)stackPopPtr(pVM->pStack);
 
-#ifndef TESTMAIN
 	guid = (char*)ficlMalloc(guids);
 	if (guid == NULL)
-		vmThrowErr(pVM, "Error: out of memory");
+		goto out;
 	memcpy(guid, guidp, guids);
 	uuid_from_string(guid, &u, &ustatus);
 	if (ustatus != uuid_s_ok) {
 		stackPushINT(pVM->pStack, -1);
 		goto out;
 	}
 
-	name = (CHAR16 *)ficlMalloc((names + 1) * sizeof(CHAR16));
+	name = ficlMalloc((names + 1) * sizeof(CHAR16));
 	if (name == NULL)
-		vmThrowErr(pVM, "Error: out of memory");
+		goto out;
 	for (i = 0; i < names; i++)
 		name[i] = namep[i];
-	name[names] = (CHAR16)0;
+	name[names] = 0;
 
-	value = (char*)ficlMalloc(values + 1);
+	value = ficlMalloc(values + 1);
 	if (value == NULL)
-		vmThrowErr(pVM, "Error: out of memory");
+		goto out;
 	memcpy(value, valuep, values);
 
 	status = efi_set_variable(name, (EFI_GUID *)&u, attr, values, value);
 	if (status == EFI_SUCCESS)
 		stackPushINT(pVM->pStack, 0);
 	else
 		stackPushINT(pVM->pStack, -1);
+	error = false;
 out:
 	ficlFree(name);
 	ficlFree(value);
 	ficlFree(guid);
-#endif
 
-	return;
+	if (error == true)
+		vmThrowErr(pVM, "Error: out of memory");
 }
 
-void
+static void
 ficlEfiGetenv(FICL_VM *pVM)
 {
-#ifndef TESTMAIN
 	char	*name, *value;
-#endif
 	char	*namep;
 	int	names;
 
 #if FICL_ROBUST > 1
 	vmCheckStack(pVM, 2, 2);
 #endif
 	names = stackPopINT(pVM->pStack);
 	namep = (char*) stackPopPtr(pVM->pStack);
 
-#ifndef TESTMAIN
 	name = (char*) ficlMalloc(names+1);
 	if (name == NULL)
 		vmThrowErr(pVM, "Error: out of memory");
 	strncpy(name, namep, names);
 	name[names] = '\0';
 
 	value = getenv(name);
 	ficlFree(name);
 
 	if(value != NULL) {
 		stackPushPtr(pVM->pStack, value);
 		stackPushINT(pVM->pStack, strlen(value));
 	} else
-#endif
 		stackPushINT(pVM->pStack, -1);
-
-	return;
 }
 
-void
+static void
 ficlEfiUnsetenv(FICL_VM *pVM)
 {
-#ifndef TESTMAIN
 	char	*name;
-#endif
 	char	*namep;
 	int	names;
 
 #if FICL_ROBUST > 1
 	vmCheckStack(pVM, 2, 0);
 #endif
 	names = stackPopINT(pVM->pStack);
 	namep = (char*) stackPopPtr(pVM->pStack);
 
-#ifndef TESTMAIN
 	name = (char*) ficlMalloc(names+1);
 	if (name == NULL)
 		vmThrowErr(pVM, "Error: out of memory");
 	strncpy(name, namep, names);
 	name[names] = '\0';
 
 	unsetenv(name);
 	ficlFree(name);
-#endif
-
-	return;
 }
 
 /**************************************************************************
 ** Add FreeBSD UEFI platform extensions into the system dictionary
 **************************************************************************/
 void ficlEfiCompilePlatform(FICL_SYSTEM *pSys)
 {
-    FICL_DICT *dp = pSys->dp;
-    assert (dp);
+	FICL_DICT *dp = pSys->dp;
+	assert (dp);
 
-    dictAppendWord(dp, "efi-setenv",    ficlEfiSetenv,	    FW_DEFAULT);
-    dictAppendWord(dp, "efi-getenv",    ficlEfiGetenv,	    FW_DEFAULT);
-    dictAppendWord(dp, "efi-unsetenv",  ficlEfiUnsetenv,    FW_DEFAULT);
-
-    /* Would like to export the EFI version, but this will do for now */
-    ficlSetEnv(pSys, "efi-boot", 1);
-
-    return;
+	dictAppendWord(dp, "efi-setenv",    ficlEfiSetenv,	FW_DEFAULT);
+	dictAppendWord(dp, "efi-getenv",    ficlEfiGetenv,	FW_DEFAULT);
+	dictAppendWord(dp, "efi-unsetenv",  ficlEfiUnsetenv,    FW_DEFAULT);
 }
 
 FICL_COMPILE_SET(ficlEfiCompilePlatform);
+
+#endif	/* BOOT_FORTH */
Index: stable/11/sys/boot/efi/libefi/wchar.c
===================================================================
--- stable/11/sys/boot/efi/libefi/wchar.c	(nonexistent)
+++ stable/11/sys/boot/efi/libefi/wchar.c	(revision 329099)
@@ -0,0 +1,73 @@
+/*-
+ * Copyright 2016 Netflix, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <efi.h>
+#include <efilib.h>
+
+/*
+ * CHAR16 related functions moved from loader.
+ * Perhaps we should move those to libstand afterall, but they are
+ * needed only by UEFI.
+ */
+
+int
+wcscmp(CHAR16 *a, CHAR16 *b)
+{
+
+	while (*a && *b && *a == *b) {
+		a++;
+		b++;
+	}
+	return *a - *b;
+}
+
+/*
+ * cpy8to16 copies a traditional C string into a CHAR16 string and
+ * 0 terminates it. len is the size of *dst in bytes.
+ */
+void
+cpy8to16(const char *src, CHAR16 *dst, size_t len)
+{
+	len <<= 1;		/* Assume CHAR16 is 2 bytes */
+	while (len > 0 && *src) {
+		*dst++ = *src++;
+		len--;
+	}
+	*dst++ = (CHAR16)0;
+}
+
+void
+cpy16to8(const CHAR16 *src, char *dst, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len && src[i]; i++)
+		dst[i] = (char)src[i];
+	if (i < len)
+		dst[i] = '\0';
+}

Property changes on: stable/11/sys/boot/efi/libefi/wchar.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: stable/11/sys/boot/efi/loader/conf.c
===================================================================
--- stable/11/sys/boot/efi/loader/conf.c	(revision 329098)
+++ stable/11/sys/boot/efi/loader/conf.c	(revision 329099)
@@ -1,79 +1,81 @@
 /*-
  * Copyright (c) 2006 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 #include <bootstrap.h>
 #include <efi.h>
 #include <efilib.h>
 #ifdef EFI_ZFS_BOOT
 #include <libzfs.h>
 #endif
 
 struct devsw *devsw[] = {
-	&efipart_dev,
+	&efipart_fddev,
+	&efipart_cddev,
+	&efipart_hddev,
 	&efinet_dev,
 #ifdef EFI_ZFS_BOOT
 	&zfs_dev,
 #endif
 	NULL
 };
 
 struct fs_ops *file_system[] = {
 #ifdef EFI_ZFS_BOOT
 	&zfs_fsops,
 #endif
 	&dosfs_fsops,
 	&ufs_fsops,
 	&cd9660_fsops,
 	&tftp_fsops,
 	&nfs_fsops,
 	&gzipfs_fsops,
 	&bzipfs_fsops,
 	NULL
 };
 
 struct netif_driver *netif_drivers[] = {
 	&efinetif,
 	NULL
 };
 
 extern struct console efi_console;
 #if defined(__amd64__) || defined(__i386__)
 extern struct console comconsole;
 extern struct console nullconsole;
 #endif
 
 struct console *consoles[] = {
 	&efi_console,
 #if defined(__amd64__) || defined(__i386__)
 	&comconsole,
 	&nullconsole,
 #endif
 	NULL
 };
Index: stable/11/sys/boot/efi/loader/devicename.c
===================================================================
--- stable/11/sys/boot/efi/loader/devicename.c	(revision 329098)
+++ stable/11/sys/boot/efi/loader/devicename.c	(revision 329099)
@@ -1,197 +1,217 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * Copyright (c) 2006 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 #include <string.h>
 #include <sys/disklabel.h>
 #include <sys/param.h>
 #include <bootstrap.h>
+#include <disk.h>
 #ifdef EFI_ZFS_BOOT
 #include <libzfs.h>
 #endif
 
 #include <efi.h>
 #include <efilib.h>
 
 #include "loader_efi.h"
 
 static int efi_parsedev(struct devdesc **, const char *, const char **);
 
 /*
  * Point (dev) at an allocated device specifier for the device matching the
  * path in (devspec). If it contains an explicit device specification,
  * use that.  If not, use the default device.
  */
 int
 efi_getdev(void **vdev, const char *devspec, const char **path)
 {
 	struct devdesc **dev = (struct devdesc **)vdev;
 	int rv;
 
 	/*
 	 * If it looks like this is just a path and no device, then
 	 * use the current device instead.
 	 */
 	if (devspec == NULL || *devspec == '/' || !strchr(devspec, ':')) {
 		rv = efi_parsedev(dev, getenv("currdev"), NULL);
 		if (rv == 0 && path != NULL)
 			*path = devspec;
 		return (rv);
 	}
 
 	/* Parse the device name off the beginning of the devspec. */
 	return (efi_parsedev(dev, devspec, path));
 }
 
 /*
  * Point (dev) at an allocated device specifier matching the string version
  * at the beginning of (devspec).  Return a pointer to the remaining
  * text in (path).
  *
  * In all cases, the beginning of (devspec) is compared to the names
  * of known devices in the device switch, and then any following text
  * is parsed according to the rules applied to the device type.
  *
  * For disk-type devices, the syntax is:
  *
  * fs<unit>:
  */
 static int
 efi_parsedev(struct devdesc **dev, const char *devspec, const char **path)
 {
 	struct devdesc *idev;
 	struct devsw *dv;
 	char *cp;
 	const char *np;
-	int i;
+	int i, err;
 
 	/* minimum length check */
 	if (strlen(devspec) < 2)
 		return (EINVAL);
 
 	/* look for a device that matches */
 	for (i = 0; devsw[i] != NULL; i++) {
 		dv = devsw[i];
 		if (!strncmp(devspec, dv->dv_name, strlen(dv->dv_name)))
 			break;
 	}
 	if (devsw[i] == NULL)
 		return (ENOENT);
 
 	np = devspec + strlen(dv->dv_name);
+	err = 0;
 
-#ifdef EFI_ZFS_BOOT
-	if (dv->dv_type == DEVT_ZFS) {
-		int err;
+	switch (dv->dv_type) {
+	case DEVT_NONE:
+		break;
 
+	case DEVT_DISK:
+		idev = malloc(sizeof(struct disk_devdesc));
+		if (idev == NULL)
+			return (ENOMEM);
+
+		err = disk_parsedev((struct disk_devdesc *)idev, np, path);
+		if (err != 0) {
+			free(idev);
+			return (err);
+		}
+		break;
+
+#ifdef EFI_ZFS_BOOT
+	case DEVT_ZFS:
 		idev = malloc(sizeof(struct zfs_devdesc));
 		if (idev == NULL)
 			return (ENOMEM);
 
 		err = zfs_parsedev((struct zfs_devdesc*)idev, np, path);
 		if (err != 0) {
 			free(idev);
 			return (err);
 		}
-		cp = strchr(np + 1, ':');
-	} else
+		break;
 #endif
-	{
+	default:
 		idev = malloc(sizeof(struct devdesc));
 		if (idev == NULL)
 			return (ENOMEM);
 
-		idev->d_dev = dv;
-		idev->d_type = dv->dv_type;
 		idev->d_unit = -1;
+		cp = (char *)np;
 		if (*np != '\0' && *np != ':') {
 			idev->d_unit = strtol(np, &cp, 0);
 			if (cp == np) {
-				idev->d_unit = -1;
 				free(idev);
 				return (EUNIT);
 			}
 		}
-	}
+		if (*cp != '\0' && *cp != ':') {
+			free(idev);
+			return (EINVAL);
+		}
 
-	if (*cp != '\0' && *cp != ':') {
-		free(idev);
-		return (EINVAL);
+		if (path != NULL)
+			*path = (*cp == 0) ? cp : cp + 1;
+		break;
 	}
 
-	if (path != NULL)
-		*path = (*cp == 0) ? cp : cp + 1;
+	idev->d_dev = dv;
+	idev->d_type = dv->dv_type;
+
 	if (dev != NULL)
 		*dev = idev;
 	else
 		free(idev);
 	return (0);
 }
 
 char *
 efi_fmtdev(void *vdev)
 {
 	struct devdesc *dev = (struct devdesc *)vdev;
 	static char buf[SPECNAMELEN + 1];
 
 	switch(dev->d_type) {
-#ifdef EFI_ZFS_BOOT
-	case DEVT_ZFS:
-		return (zfs_fmtdev(dev));
-#endif
 	case DEVT_NONE:
 		strcpy(buf, "(no device)");
 		break;
 
+	case DEVT_DISK:
+		return (disk_fmtdev(vdev));
+
+#ifdef EFI_ZFS_BOOT
+	case DEVT_ZFS:
+		return (zfs_fmtdev(dev));
+#endif
 	default:
 		sprintf(buf, "%s%d:", dev->d_dev->dv_name, dev->d_unit);
 		break;
 	}
 
 	return (buf);
 }
 
 /*
  * Set currdev to suit the value being supplied in (value)
  */
 int
 efi_setcurrdev(struct env_var *ev, int flags, const void *value)
 {
 	struct devdesc *ncurr;
 	int rv;
 
 	rv = efi_parsedev(&ncurr, value, NULL);
 	if (rv != 0)
 		return (rv);
 
 	free(ncurr);
 	env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL);
 	return (0);
 }
Index: stable/11/sys/boot/efi/loader/main.c
===================================================================
--- stable/11/sys/boot/efi/loader/main.c	(revision 329098)
+++ stable/11/sys/boot/efi/loader/main.c	(revision 329099)
@@ -1,1110 +1,841 @@
 /*-
  * Copyright (c) 2008-2010 Rui Paulo
  * Copyright (c) 2006 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/reboot.h>
 #include <sys/boot.h>
 #include <inttypes.h>
 #include <stand.h>
 #include <string.h>
 #include <setjmp.h>
+#include <disk.h>
 
 #include <efi.h>
 #include <efilib.h>
 
 #include <uuid.h>
 
 #include <bootstrap.h>
 #include <smbios.h>
 
 #ifdef EFI_ZFS_BOOT
 #include <libzfs.h>
 #endif
 
 #include "loader_efi.h"
 
 extern char bootprog_info[];
 
-#ifdef BOOT_FORTH
-/*
- * Normally, efi.o from libefi.a would be brought in due to a function we call
- * there that's defined there.  However, none of its functions are callable from
- * here since it just adds words to the FORTH environment or implement those
- * words. So, add a reference to a symbol in efi.o to force it to be be brought
- * in so the init function there gets added to the "compile" linker set happens
- * correctly.
- *
- * This assumes there's no global analysys that notices dummy1 isn't used
- * anywhere and tries to eliminate it.
- */
-extern int efi_variable_support;
-int *dummy1 = &efi_variable_support;
-#endif
-
 struct arch_switch archsw;	/* MI/MD interface boundary */
 
 EFI_GUID acpi = ACPI_TABLE_GUID;
 EFI_GUID acpi20 = ACPI_20_TABLE_GUID;
 EFI_GUID devid = DEVICE_PATH_PROTOCOL;
 EFI_GUID imgid = LOADED_IMAGE_PROTOCOL;
 EFI_GUID mps = MPS_TABLE_GUID;
 EFI_GUID netid = EFI_SIMPLE_NETWORK_PROTOCOL;
 EFI_GUID smbios = SMBIOS_TABLE_GUID;
 EFI_GUID dxe = DXE_SERVICES_TABLE_GUID;
 EFI_GUID hoblist = HOB_LIST_TABLE_GUID;
 EFI_GUID memtype = MEMORY_TYPE_INFORMATION_TABLE_GUID;
 EFI_GUID debugimg = DEBUG_IMAGE_INFO_TABLE_GUID;
 EFI_GUID fdtdtb = FDT_TABLE_GUID;
 EFI_GUID inputid = SIMPLE_TEXT_INPUT_PROTOCOL;
 
 #ifdef EFI_ZFS_BOOT
 static void efi_zfs_probe(void);
+static uint64_t pool_guid;
 #endif
 
-/*
- * cpy8to16 copies a traditional C string into a CHAR16 string and
- * 0 terminates it. len is the size of *dst in bytes.
- */
-static void
-cpy8to16(const char *src, CHAR16 *dst, size_t len)
-{
-	len <<= 1;		/* Assume CHAR16 is 2 bytes */
-	while (len > 0 && *src) {
-		*dst++ = *src++;
-		len--;
-	}
-	*dst++ = (CHAR16)0;
-}
-
-static void
-cpy16to8(const CHAR16 *src, char *dst, size_t len)
-{
-	size_t i;
-
-	for (i = 0; i < len && src[i]; i++)
-		dst[i] = (char)src[i];
-	if (i < len)
-		dst[i] = '\0';
-}
-
 static int
 has_keyboard(void)
 {
 	EFI_STATUS status;
 	EFI_DEVICE_PATH *path;
 	EFI_HANDLE *hin, *hin_end, *walker;
 	UINTN sz;
 	int retval = 0;
 	
 	/*
 	 * Find all the handles that support the SIMPLE_TEXT_INPUT_PROTOCOL and
 	 * do the typical dance to get the right sized buffer.
 	 */
 	sz = 0;
 	hin = NULL;
 	status = BS->LocateHandle(ByProtocol, &inputid, 0, &sz, 0);
 	if (status == EFI_BUFFER_TOO_SMALL) {
 		hin = (EFI_HANDLE *)malloc(sz);
 		status = BS->LocateHandle(ByProtocol, &inputid, 0, &sz,
 		    hin);
 		if (EFI_ERROR(status))
 			free(hin);
 	}
 	if (EFI_ERROR(status))
 		return retval;
 
 	/*
 	 * Look at each of the handles. If it supports the device path protocol,
 	 * use it to get the device path for this handle. Then see if that
 	 * device path matches either the USB device path for keyboards or the
 	 * legacy device path for keyboards.
 	 */
 	hin_end = &hin[sz / sizeof(*hin)];
 	for (walker = hin; walker < hin_end; walker++) {
 		status = BS->HandleProtocol(*walker, &devid, (VOID **)&path);
 		if (EFI_ERROR(status))
 			continue;
 
 		while (!IsDevicePathEnd(path)) {
 			/*
 			 * Check for the ACPI keyboard node. All PNP3xx nodes
 			 * are keyboards of different flavors. Note: It is
 			 * unclear of there's always a keyboard node when
 			 * there's a keyboard controller, or if there's only one
 			 * when a keyboard is detected at boot.
 			 */
 			if (DevicePathType(path) == ACPI_DEVICE_PATH &&
 			    (DevicePathSubType(path) == ACPI_DP ||
 				DevicePathSubType(path) == ACPI_EXTENDED_DP)) {
 				ACPI_HID_DEVICE_PATH  *acpi;
 
 				acpi = (ACPI_HID_DEVICE_PATH *)(void *)path;
 				if ((EISA_ID_TO_NUM(acpi->HID) & 0xff00) == 0x300 &&
 				    (acpi->HID & 0xffff) == PNP_EISA_ID_CONST) {
 					retval = 1;
 					goto out;
 				}
 			/*
 			 * Check for USB keyboard node, if present. Unlike a
 			 * PS/2 keyboard, these definitely only appear when
 			 * connected to the system.
 			 */
 			} else if (DevicePathType(path) == MESSAGING_DEVICE_PATH &&
 			    DevicePathSubType(path) == MSG_USB_CLASS_DP) {
 				USB_CLASS_DEVICE_PATH *usb;
 			       
 				usb = (USB_CLASS_DEVICE_PATH *)(void *)path;
 				if (usb->DeviceClass == 3 && /* HID */
 				    usb->DeviceSubClass == 1 && /* Boot devices */
 				    usb->DeviceProtocol == 1) { /* Boot keyboards */
 					retval = 1;
 					goto out;
 				}
 			}
 			path = NextDevicePathNode(path);
 		}
 	}
 out:
 	free(hin);
 	return retval;
 }
 
+static void
+set_devdesc_currdev(struct devsw *dev, int unit)
+{
+	struct devdesc currdev;
+	char *devname;
+
+	currdev.d_dev = dev;
+	currdev.d_type = currdev.d_dev->dv_type;
+	currdev.d_unit = unit;
+	currdev.d_opendata = NULL;
+	devname = efi_fmtdev(&currdev);
+
+	env_setenv("currdev", EV_VOLATILE, devname, efi_setcurrdev,
+	    env_nounset);
+	env_setenv("loaddev", EV_VOLATILE, devname, env_noset, env_nounset);
+}
+
 static int
-find_currdev(EFI_LOADED_IMAGE *img, struct devsw **dev, int *unit,
-    uint64_t *extra)
+find_currdev(EFI_LOADED_IMAGE *img)
 {
+	pdinfo_list_t *pdi_list;
+	pdinfo_t *dp, *pp;
 	EFI_DEVICE_PATH *devpath, *copy;
 	EFI_HANDLE h;
+	char *devname;
+	struct devsw *dev;
+	int unit;
+	uint64_t extra;
 
+#ifdef EFI_ZFS_BOOT
+	/* Did efi_zfs_probe() detect the boot pool? */
+	if (pool_guid != 0) {
+		struct zfs_devdesc currdev;
+
+		currdev.d_dev = &zfs_dev;
+		currdev.d_unit = 0;
+		currdev.d_type = currdev.d_dev->dv_type;
+		currdev.d_opendata = NULL;
+		currdev.pool_guid = pool_guid;
+		currdev.root_guid = 0;
+		devname = efi_fmtdev(&currdev);
+
+		env_setenv("currdev", EV_VOLATILE, devname, efi_setcurrdev,
+		    env_nounset);
+		env_setenv("loaddev", EV_VOLATILE, devname, env_noset,
+		    env_nounset);
+		init_zfs_bootenv(devname);
+		return (0);
+	}
+#endif /* EFI_ZFS_BOOT */
+
+	/* We have device lists for hd, cd, fd, walk them all. */
+	pdi_list = efiblk_get_pdinfo_list(&efipart_hddev);
+	STAILQ_FOREACH(dp, pdi_list, pd_link) {
+		struct disk_devdesc currdev;
+
+		currdev.d_dev = &efipart_hddev;
+		currdev.d_type = currdev.d_dev->dv_type;
+		currdev.d_unit = dp->pd_unit;
+		currdev.d_opendata = NULL;
+		currdev.d_slice = -1;
+		currdev.d_partition = -1;
+
+		if (dp->pd_handle == img->DeviceHandle) {
+			devname = efi_fmtdev(&currdev);
+
+			env_setenv("currdev", EV_VOLATILE, devname,
+			    efi_setcurrdev, env_nounset);
+			env_setenv("loaddev", EV_VOLATILE, devname,
+			    env_noset, env_nounset);
+			return (0);
+		}
+		/* Assuming GPT partitioning. */
+		STAILQ_FOREACH(pp, &dp->pd_part, pd_link) {
+			if (pp->pd_handle == img->DeviceHandle) {
+				currdev.d_slice = pp->pd_unit;
+				currdev.d_partition = 255;
+				devname = efi_fmtdev(&currdev);
+
+				env_setenv("currdev", EV_VOLATILE, devname,
+				    efi_setcurrdev, env_nounset);
+				env_setenv("loaddev", EV_VOLATILE, devname,
+				    env_noset, env_nounset);
+				return (0);
+			}
+		}
+	}
+
+	pdi_list = efiblk_get_pdinfo_list(&efipart_cddev);
+	STAILQ_FOREACH(dp, pdi_list, pd_link) {
+		if (dp->pd_handle == img->DeviceHandle ||
+		    dp->pd_alias == img->DeviceHandle) {
+			set_devdesc_currdev(&efipart_cddev, dp->pd_unit);
+			return (0);
+		}
+	}
+
+	pdi_list = efiblk_get_pdinfo_list(&efipart_fddev);
+	STAILQ_FOREACH(dp, pdi_list, pd_link) {
+		if (dp->pd_handle == img->DeviceHandle) {
+			set_devdesc_currdev(&efipart_fddev, dp->pd_unit);
+			return (0);
+		}
+	}
+
 	/*
 	 * Try the device handle from our loaded image first.  If that
 	 * fails, use the device path from the loaded image and see if
 	 * any of the nodes in that path match one of the enumerated
 	 * handles.
 	 */
-	if (efi_handle_lookup(img->DeviceHandle, dev, unit, extra) == 0)
+	if (efi_handle_lookup(img->DeviceHandle, &dev, &unit, &extra) == 0) {
+		set_devdesc_currdev(dev, unit);
 		return (0);
+	}
 
 	copy = NULL;
 	devpath = efi_lookup_image_devpath(IH);
 	while (devpath != NULL) {
 		h = efi_devpath_handle(devpath);
 		if (h == NULL)
 			break;
 
-		if (efi_handle_lookup(h, dev, unit, extra) == 0) {
-			if (copy != NULL)
-				free(copy);
+		free(copy);
+		copy = NULL;
+
+		if (efi_handle_lookup(h, &dev, &unit, &extra) == 0) {
+			set_devdesc_currdev(dev, unit);
 			return (0);
 		}
 
-		if (copy != NULL)
-			free(copy);
 		devpath = efi_lookup_devpath(h);
 		if (devpath != NULL) {
 			copy = efi_devpath_trim(devpath);
 			devpath = copy;
 		}
 	}
+	free(copy);
 
-	/* Try to fallback on first device */
-	if (devsw[0] != NULL) {
-		*dev = devsw[0];
-		return (0);
-	}
 	return (ENOENT);
 }
 
 EFI_STATUS
 main(int argc, CHAR16 *argv[])
 {
 	char var[128];
 	EFI_LOADED_IMAGE *img;
 	EFI_GUID *guid;
-	int i, j, vargood, unit, howto;
-	struct devsw *dev;
-	uint64_t pool_guid;
+	int i, j, vargood, howto;
 	UINTN k;
 	int has_kbd;
 	char buf[40];
 
 	archsw.arch_autoload = efi_autoload;
 	archsw.arch_getdev = efi_getdev;
 	archsw.arch_copyin = efi_copyin;
 	archsw.arch_copyout = efi_copyout;
 	archsw.arch_readin = efi_readin;
 #ifdef EFI_ZFS_BOOT
 	/* Note this needs to be set before ZFS init. */
 	archsw.arch_zfs_probe = efi_zfs_probe;
 #endif
 
 	/* Init the time source */
 	efi_time_init();
 
 	has_kbd = has_keyboard();
 
 	/*
 	 * XXX Chicken-and-egg problem; we want to have console output
 	 * early, but some console attributes may depend on reading from
 	 * eg. the boot device, which we can't do yet.  We can use
 	 * printf() etc. once this is done.
 	 */
 	cons_probe();
 
 	/*
 	 * Initialise the block cache. Set the upper limit.
 	 */
 	bcache_init(32768, 512);
 
 	/*
 	 * Parse the args to set the console settings, etc
 	 * boot1.efi passes these in, if it can read /boot.config or /boot/config
 	 * or iPXE may be setup to pass these in.
 	 *
 	 * Loop through the args, and for each one that contains an '=' that is
 	 * not the first character, add it to the environment.  This allows
 	 * loader and kernel env vars to be passed on the command line.  Convert
 	 * args from UCS-2 to ASCII (16 to 8 bit) as they are copied.
 	 */
 	howto = 0;
 	for (i = 1; i < argc; i++) {
 		if (argv[i][0] == '-') {
 			for (j = 1; argv[i][j] != 0; j++) {
 				int ch;
 
 				ch = argv[i][j];
 				switch (ch) {
 				case 'a':
 					howto |= RB_ASKNAME;
 					break;
 				case 'd':
 					howto |= RB_KDB;
 					break;
 				case 'D':
 					howto |= RB_MULTIPLE;
 					break;
 				case 'h':
 					howto |= RB_SERIAL;
 					break;
 				case 'm':
 					howto |= RB_MUTE;
 					break;
 				case 'p':
 					howto |= RB_PAUSE;
 					break;
 				case 'P':
 					if (!has_kbd)
 						howto |= RB_SERIAL | RB_MULTIPLE;
 					break;
 				case 'r':
 					howto |= RB_DFLTROOT;
 					break;
 				case 's':
 					howto |= RB_SINGLE;
 					break;
 				case 'S':
 					if (argv[i][j + 1] == 0) {
 						if (i + 1 == argc) {
 							setenv("comconsole_speed", "115200", 1);
 						} else {
 							cpy16to8(&argv[i + 1][0], var,
 							    sizeof(var));
 							setenv("comconsole_speed", var, 1);
 						}
 						i++;
 						break;
 					} else {
 						cpy16to8(&argv[i][j + 1], var,
 						    sizeof(var));
 						setenv("comconsole_speed", var, 1);
 						break;
 					}
 				case 'v':
 					howto |= RB_VERBOSE;
 					break;
 				}
 			}
 		} else {
 			vargood = 0;
 			for (j = 0; argv[i][j] != 0; j++) {
 				if (j == sizeof(var)) {
 					vargood = 0;
 					break;
 				}
 				if (j > 0 && argv[i][j] == '=')
 					vargood = 1;
 				var[j] = (char)argv[i][j];
 			}
 			if (vargood) {
 				var[j] = 0;
 				putenv(var);
 			}
 		}
 	}
 	for (i = 0; howto_names[i].ev != NULL; i++)
 		if (howto & howto_names[i].mask)
 			setenv(howto_names[i].ev, "YES", 1);
 	if (howto & RB_MULTIPLE) {
 		if (howto & RB_SERIAL)
 			setenv("console", "comconsole efi" , 1);
 		else
 			setenv("console", "efi comconsole" , 1);
 	} else if (howto & RB_SERIAL) {
 		setenv("console", "comconsole" , 1);
 	}
 
 	if (efi_copy_init()) {
 		printf("failed to allocate staging area\n");
 		return (EFI_BUFFER_TOO_SMALL);
 	}
 
 	/*
 	 * March through the device switch probing for things.
 	 */
 	for (i = 0; devsw[i] != NULL; i++)
 		if (devsw[i]->dv_init != NULL)
 			(devsw[i]->dv_init)();
 
 	/* Get our loaded image protocol interface structure. */
 	BS->HandleProtocol(IH, &imgid, (VOID**)&img);
 
 	printf("Command line arguments:");
 	for (i = 0; i < argc; i++)
 		printf(" %S", argv[i]);
 	printf("\n");
 
 	printf("Image base: 0x%lx\n", (u_long)img->ImageBase);
 	printf("EFI version: %d.%02d\n", ST->Hdr.Revision >> 16,
 	    ST->Hdr.Revision & 0xffff);
 	printf("EFI Firmware: %S (rev %d.%02d)\n", ST->FirmwareVendor,
 	    ST->FirmwareRevision >> 16, ST->FirmwareRevision & 0xffff);
 
 	printf("\n%s", bootprog_info);
 
 	/*
 	 * Disable the watchdog timer. By default the boot manager sets
 	 * the timer to 5 minutes before invoking a boot option. If we
 	 * want to return to the boot manager, we have to disable the
 	 * watchdog timer and since we're an interactive program, we don't
 	 * want to wait until the user types "quit". The timer may have
 	 * fired by then. We don't care if this fails. It does not prevent
 	 * normal functioning in any way...
 	 */
 	BS->SetWatchdogTimer(0, 0, 0, NULL);
 
-	if (find_currdev(img, &dev, &unit, &pool_guid) != 0)
+	if (find_currdev(img) != 0)
 		return (EFI_NOT_FOUND);
 
-	switch (dev->dv_type) {
-#ifdef EFI_ZFS_BOOT
-	case DEVT_ZFS: {
-		struct zfs_devdesc currdev;
-
-		currdev.d_dev = dev;
-		currdev.d_unit = unit;
-		currdev.d_type = currdev.d_dev->dv_type;
-		currdev.d_opendata = NULL;
-		currdev.pool_guid = pool_guid;
-		currdev.root_guid = 0;
-		env_setenv("currdev", EV_VOLATILE, efi_fmtdev(&currdev),
-			   efi_setcurrdev, env_nounset);
-		env_setenv("loaddev", EV_VOLATILE, efi_fmtdev(&currdev), env_noset,
-			   env_nounset);
-		init_zfs_bootenv(zfs_fmtdev(&currdev));
-		break;
-	}
-#endif
-	default: {
-		struct devdesc currdev;
-
-		currdev.d_dev = dev;
-		currdev.d_unit = unit;
-		currdev.d_opendata = NULL;
-		currdev.d_type = currdev.d_dev->dv_type;
-		env_setenv("currdev", EV_VOLATILE, efi_fmtdev(&currdev),
-			   efi_setcurrdev, env_nounset);
-		env_setenv("loaddev", EV_VOLATILE, efi_fmtdev(&currdev), env_noset,
-			   env_nounset);
-		break;
-	}
-	}
-
-	snprintf(var, sizeof(var), "%d.%02d", ST->Hdr.Revision >> 16,
-	    ST->Hdr.Revision & 0xffff);
-	env_setenv("efi-version", EV_VOLATILE, var, env_noset, env_nounset);
+	efi_init_environment();
 	setenv("LINES", "24", 1);	/* optional */
 
 	for (k = 0; k < ST->NumberOfTableEntries; k++) {
 		guid = &ST->ConfigurationTable[k].VendorGuid;
 		if (!memcmp(guid, &smbios, sizeof(EFI_GUID))) {
 			snprintf(buf, sizeof(buf), "%p",
 			    ST->ConfigurationTable[k].VendorTable);
 			setenv("hint.smbios.0.mem", buf, 1);
 			smbios_detect(ST->ConfigurationTable[k].VendorTable);
 			break;
 		}
 	}
 
 	interact(NULL);			/* doesn't return */
 
 	return (EFI_SUCCESS);		/* keep compiler happy */
 }
 
-/* XXX move to lib stand ? */
-static int
-wcscmp(CHAR16 *a, CHAR16 *b)
-{
-
-	while (*a && *b && *a == *b) {
-		a++;
-		b++;
-	}
-	return *a - *b;
-}
-
-
 COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot);
 
 static int
 command_reboot(int argc, char *argv[])
 {
 	int i;
 
 	for (i = 0; devsw[i] != NULL; ++i)
 		if (devsw[i]->dv_cleanup != NULL)
 			(devsw[i]->dv_cleanup)();
 
 	RS->ResetSystem(EfiResetCold, EFI_SUCCESS, 23,
 	    (CHAR16 *)"Reboot from the loader");
 
 	/* NOTREACHED */
 	return (CMD_ERROR);
 }
 
 COMMAND_SET(quit, "quit", "exit the loader", command_quit);
 
 static int
 command_quit(int argc, char *argv[])
 {
 	exit(0);
 	return (CMD_OK);
 }
 
 COMMAND_SET(memmap, "memmap", "print memory map", command_memmap);
 
 static int
 command_memmap(int argc, char *argv[])
 {
 	UINTN sz;
 	EFI_MEMORY_DESCRIPTOR *map, *p;
 	UINTN key, dsz;
 	UINT32 dver;
 	EFI_STATUS status;
 	int i, ndesc;
 	char line[80];
 	static char *types[] = {
 	    "Reserved",
 	    "LoaderCode",
 	    "LoaderData",
 	    "BootServicesCode",
 	    "BootServicesData",
 	    "RuntimeServicesCode",
 	    "RuntimeServicesData",
 	    "ConventionalMemory",
 	    "UnusableMemory",
 	    "ACPIReclaimMemory",
 	    "ACPIMemoryNVS",
 	    "MemoryMappedIO",
 	    "MemoryMappedIOPortSpace",
 	    "PalCode"
 	};
 
 	sz = 0;
 	status = BS->GetMemoryMap(&sz, 0, &key, &dsz, &dver);
 	if (status != EFI_BUFFER_TOO_SMALL) {
 		printf("Can't determine memory map size\n");
 		return (CMD_ERROR);
 	}
 	map = malloc(sz);
 	status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver);
 	if (EFI_ERROR(status)) {
 		printf("Can't read memory map\n");
 		return (CMD_ERROR);
 	}
 
 	ndesc = sz / dsz;
 	snprintf(line, sizeof(line), "%23s %12s %12s %8s %4s\n",
 	    "Type", "Physical", "Virtual", "#Pages", "Attr");
 	pager_open();
 	if (pager_output(line)) {
 		pager_close();
 		return (CMD_OK);
 	}
 
 	for (i = 0, p = map; i < ndesc;
 	     i++, p = NextMemoryDescriptor(p, dsz)) {
 		printf("%23s %012jx %012jx %08jx ", types[p->Type],
 		    (uintmax_t)p->PhysicalStart, (uintmax_t)p->VirtualStart,
 		    (uintmax_t)p->NumberOfPages);
 		if (p->Attribute & EFI_MEMORY_UC)
 			printf("UC ");
 		if (p->Attribute & EFI_MEMORY_WC)
 			printf("WC ");
 		if (p->Attribute & EFI_MEMORY_WT)
 			printf("WT ");
 		if (p->Attribute & EFI_MEMORY_WB)
 			printf("WB ");
 		if (p->Attribute & EFI_MEMORY_UCE)
 			printf("UCE ");
 		if (p->Attribute & EFI_MEMORY_WP)
 			printf("WP ");
 		if (p->Attribute & EFI_MEMORY_RP)
 			printf("RP ");
 		if (p->Attribute & EFI_MEMORY_XP)
 			printf("XP ");
 		if (pager_output("\n"))
 			break;
 	}
 
 	pager_close();
 	return (CMD_OK);
 }
 
 COMMAND_SET(configuration, "configuration", "print configuration tables",
     command_configuration);
 
 static const char *
 guid_to_string(EFI_GUID *guid)
 {
 	static char buf[40];
 
 	sprintf(buf, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
 	    guid->Data1, guid->Data2, guid->Data3, guid->Data4[0],
 	    guid->Data4[1], guid->Data4[2], guid->Data4[3], guid->Data4[4],
 	    guid->Data4[5], guid->Data4[6], guid->Data4[7]);
 	return (buf);
 }
 
 static int
 command_configuration(int argc, char *argv[])
 {
 	char line[80];
 	UINTN i;
 
 	snprintf(line, sizeof(line), "NumberOfTableEntries=%lu\n",
 		(unsigned long)ST->NumberOfTableEntries);
 	pager_open();
 	if (pager_output(line)) {
 		pager_close();
 		return (CMD_OK);
 	}
 
 	for (i = 0; i < ST->NumberOfTableEntries; i++) {
 		EFI_GUID *guid;
 
 		printf("  ");
 		guid = &ST->ConfigurationTable[i].VendorGuid;
 		if (!memcmp(guid, &mps, sizeof(EFI_GUID)))
 			printf("MPS Table");
 		else if (!memcmp(guid, &acpi, sizeof(EFI_GUID)))
 			printf("ACPI Table");
 		else if (!memcmp(guid, &acpi20, sizeof(EFI_GUID)))
 			printf("ACPI 2.0 Table");
 		else if (!memcmp(guid, &smbios, sizeof(EFI_GUID)))
 			printf("SMBIOS Table %p",
 			    ST->ConfigurationTable[i].VendorTable);
 		else if (!memcmp(guid, &dxe, sizeof(EFI_GUID)))
 			printf("DXE Table");
 		else if (!memcmp(guid, &hoblist, sizeof(EFI_GUID)))
 			printf("HOB List Table");
 		else if (!memcmp(guid, &memtype, sizeof(EFI_GUID)))
 			printf("Memory Type Information Table");
 		else if (!memcmp(guid, &debugimg, sizeof(EFI_GUID)))
 			printf("Debug Image Info Table");
 		else if (!memcmp(guid, &fdtdtb, sizeof(EFI_GUID)))
 			printf("FDT Table");
 		else
 			printf("Unknown Table (%s)", guid_to_string(guid));
 		snprintf(line, sizeof(line), " at %p\n",
 		    ST->ConfigurationTable[i].VendorTable);
 		if (pager_output(line))
 			break;
 	}
 
 	pager_close();
 	return (CMD_OK);
 }
 
 
 COMMAND_SET(mode, "mode", "change or display EFI text modes", command_mode);
 
 static int
 command_mode(int argc, char *argv[])
 {
 	UINTN cols, rows;
 	unsigned int mode;
 	int i;
 	char *cp;
 	char rowenv[8];
 	EFI_STATUS status;
 	SIMPLE_TEXT_OUTPUT_INTERFACE *conout;
 	extern void HO(void);
 
 	conout = ST->ConOut;
 
 	if (argc > 1) {
 		mode = strtol(argv[1], &cp, 0);
 		if (cp[0] != '\0') {
 			printf("Invalid mode\n");
 			return (CMD_ERROR);
 		}
 		status = conout->QueryMode(conout, mode, &cols, &rows);
 		if (EFI_ERROR(status)) {
 			printf("invalid mode %d\n", mode);
 			return (CMD_ERROR);
 		}
 		status = conout->SetMode(conout, mode);
 		if (EFI_ERROR(status)) {
 			printf("couldn't set mode %d\n", mode);
 			return (CMD_ERROR);
 		}
 		sprintf(rowenv, "%u", (unsigned)rows);
 		setenv("LINES", rowenv, 1);
 		HO();		/* set cursor */
 		return (CMD_OK);
 	}
 
 	printf("Current mode: %d\n", conout->Mode->Mode);
 	for (i = 0; i <= conout->Mode->MaxMode; i++) {
 		status = conout->QueryMode(conout, i, &cols, &rows);
 		if (EFI_ERROR(status))
 			continue;
 		printf("Mode %d: %u columns, %u rows\n", i, (unsigned)cols,
 		    (unsigned)rows);
 	}
 
 	if (i != 0)
 		printf("Select a mode with the command \"mode <number>\"\n");
 
 	return (CMD_OK);
 }
 
 #ifdef EFI_ZFS_BOOT
 COMMAND_SET(lszfs, "lszfs", "list child datasets of a zfs dataset",
     command_lszfs);
 
 static int
 command_lszfs(int argc, char *argv[])
 {
 	int err;
 
 	if (argc != 2) {
 		command_errmsg = "wrong number of arguments";
 		return (CMD_ERROR);
 	}
 
 	err = zfs_list(argv[1]);
 	if (err != 0) {
 		command_errmsg = strerror(err);
 		return (CMD_ERROR);
 	}
 	return (CMD_OK);
 }
 
 COMMAND_SET(reloadbe, "reloadbe", "refresh the list of ZFS Boot Environments",
 	    command_reloadbe);
 
 static int
 command_reloadbe(int argc, char *argv[])
 {
 	int err;
 	char *root;
 
 	if (argc > 2) {
 		command_errmsg = "wrong number of arguments";
 		return (CMD_ERROR);
 	}
 
 	if (argc == 2) {
 		err = zfs_bootenv(argv[1]);
 	} else {
 		root = getenv("zfs_be_root");
 		if (root == NULL) {
 			return (CMD_OK);
 		}
 		err = zfs_bootenv(root);
 	}
 
 	if (err != 0) {
 		command_errmsg = strerror(err);
 		return (CMD_ERROR);
 	}
 
 	return (CMD_OK);
 }
 #endif
 
-COMMAND_SET(efishow, "efi-show", "print some or all EFI variables", command_efi_show);
-
-static int
-efi_print_var(CHAR16 *varnamearg, EFI_GUID *matchguid, int lflag)
-{
-	UINTN		datasz, i;
-	EFI_STATUS	status;
-	UINT32		attr;
-	CHAR16		*data;
-	char		*str;
-	uint32_t	uuid_status;
-	int		is_ascii;
-
-	datasz = 0;
-	status = RS->GetVariable(varnamearg, matchguid, &attr,
-	    &datasz, NULL);
-	if (status != EFI_BUFFER_TOO_SMALL) {
-		printf("Can't get the variable: error %#lx\n", status);
-		return (CMD_ERROR);
-	}
-	data = malloc(datasz);
-	status = RS->GetVariable(varnamearg, matchguid, &attr,
-	    &datasz, data);
-	if (status != EFI_SUCCESS) {
-		printf("Can't get the variable: error %#lx\n", status);
-		return (CMD_ERROR);
-	}
-	uuid_to_string((uuid_t *)matchguid, &str, &uuid_status);
-	if (lflag) {
-		printf("%s 0x%x %S", str, attr, varnamearg);
-	} else {
-		printf("%s 0x%x %S=", str, attr, varnamearg);
-		is_ascii = 1;
-		free(str);
-		str = (char *)data;
-		for (i = 0; i < datasz - 1; i++) {
-			/* Quick hack to see if this ascii-ish string printable range plus tab, cr and lf */
-			if ((str[i] < 32 || str[i] > 126) && str[i] != 9 && str[i] != 10 && str[i] != 13) {
-				is_ascii = 0;
-				break;
-			}
-		}
-		if (str[datasz - 1] != '\0')
-			is_ascii = 0;
-		if (is_ascii)
-			printf("%s", str);
-		else {
-			for (i = 0; i < datasz / 2; i++) {
-				if (isalnum(data[i]) || isspace(data[i]))
-					printf("%c", data[i]);
-				else
-					printf("\\x%02x", data[i]);
-			}
-		}
-	}
-	free(data);
-	if (pager_output("\n"))
-		return (CMD_WARN);
-	return (CMD_OK);
-}
-
-static int
-command_efi_show(int argc, char *argv[])
-{
-	/*
-	 * efi-show [-a]
-	 *	print all the env
-	 * efi-show -u UUID
-	 *	print all the env vars tagged with UUID
-	 * efi-show -v var
-	 *	search all the env vars and print the ones matching var
-	 * eif-show -u UUID -v var
-	 * eif-show UUID var
-	 *	print all the env vars that match UUID and var
-	 */
-	/* NB: We assume EFI_GUID is the same as uuid_t */
-	int		aflag = 0, gflag = 0, lflag = 0, vflag = 0;
-	int		ch, rv;
-	unsigned	i;
-	EFI_STATUS	status;
-	EFI_GUID	varguid = { 0,0,0,{0,0,0,0,0,0,0,0} };
-	EFI_GUID	matchguid = { 0,0,0,{0,0,0,0,0,0,0,0} };
-	uint32_t	uuid_status;
-	CHAR16		*varname;
-	CHAR16		*newnm;
-	CHAR16		varnamearg[128];
-	UINTN		varalloc;
-	UINTN		varsz;
-
-	while ((ch = getopt(argc, argv, "ag:lv:")) != -1) {
-		switch (ch) {
-		case 'a':
-			aflag = 1;
-			break;
-		case 'g':
-			gflag = 1;
-			uuid_from_string(optarg, (uuid_t *)&matchguid,
-			    &uuid_status);
-			if (uuid_status != uuid_s_ok) {
-				printf("uid %s could not be parsed\n", optarg);
-				return (CMD_ERROR);
-			}
-			break;
-		case 'l':
-			lflag = 1;
-			break;
-		case 'v':
-			vflag = 1;
-			if (strlen(optarg) >= nitems(varnamearg)) {
-				printf("Variable %s is longer than %zd characters\n",
-				    optarg, nitems(varnamearg));
-				return (CMD_ERROR);
-			}
-			for (i = 0; i < strlen(optarg); i++)
-				varnamearg[i] = optarg[i];
-			varnamearg[i] = 0;
-			break;
-		default:
-			printf("Invalid argument %c\n", ch);
-			return (CMD_ERROR);
-		}
-	}
-
-	if (aflag && (gflag || vflag)) {
-		printf("-a isn't compatible with -v or -u\n");
-		return (CMD_ERROR);
-	}
-
-	if (aflag && optind < argc) {
-		printf("-a doesn't take any args");
-		return (CMD_ERROR);
-	}
-
-	if (optind == argc)
-		aflag = 1;
-
-	argc -= optind;
-	argv += optind;
-
-	pager_open();
-	if (vflag && gflag) {
-		rv = efi_print_var(varnamearg, &matchguid, lflag);
-		pager_close();
-		return (rv);
-	}
-
-	if (argc == 2) {
-		optarg = argv[0];
-		if (strlen(optarg) >= nitems(varnamearg)) {
-			printf("Variable %s is longer than %zd characters\n",
-			    optarg, nitems(varnamearg));
-			pager_close();
-			return (CMD_ERROR);
-		}
-		for (i = 0; i < strlen(optarg); i++)
-			varnamearg[i] = optarg[i];
-		varnamearg[i] = 0;
-		optarg = argv[1];
-		uuid_from_string(optarg, (uuid_t *)&matchguid,
-		    &uuid_status);
-		if (uuid_status != uuid_s_ok) {
-			printf("uid %s could not be parsed\n", optarg);
-			pager_close();
-			return (CMD_ERROR);
-		}
-		rv = efi_print_var(varnamearg, &matchguid, lflag);
-		pager_close();
-		return (rv);
-	}
-
-	if (argc > 0) {
-		printf("Too many args %d\n", argc);
-		pager_close();
-		return (CMD_ERROR);
-	}
-
-	/*
-	 * Initiate the search -- note the standard takes pain
-	 * to specify the initial call must be a poiner to a NULL
-	 * character.
-	 */
-	varalloc = 1024;
-	varname = malloc(varalloc);
-	if (varname == NULL) {
-		printf("Can't allocate memory to get variables\n");
-		pager_close();
-		return (CMD_ERROR);
-	}
-	varname[0] = 0;
-	while (1) {
-		varsz = varalloc;
-		status = RS->GetNextVariableName(&varsz, varname, &varguid);
-		if (status == EFI_BUFFER_TOO_SMALL) {
-			varalloc = varsz;
-			newnm = malloc(varalloc);
-			if (newnm == NULL) {
-				printf("Can't allocate memory to get variables\n");
-				free(varname);
-				pager_close();
-				return (CMD_ERROR);
-			}
-			memcpy(newnm, varname, varsz);
-			free(varname);
-			varname = newnm;
-			continue; /* Try again with bigger buffer */
-		}
-		if (status != EFI_SUCCESS)
-			break;
-		if (aflag) {
-			if (efi_print_var(varname, &varguid, lflag) != CMD_OK)
-				break;
-			continue;
-		}
-		if (vflag) {
-			if (wcscmp(varnamearg, varname) == 0) {
-				if (efi_print_var(varname, &varguid, lflag) != CMD_OK)
-					break;
-				continue;
-			}
-		}
-		if (gflag) {
-			if (memcmp(&varguid, &matchguid, sizeof(varguid)) == 0) {
-				if (efi_print_var(varname, &varguid, lflag) != CMD_OK)
-					break;
-				continue;
-			}
-		}
-	}
-	free(varname);
-	pager_close();
-
-	return (CMD_OK);
-}
-
-COMMAND_SET(efiset, "efi-set", "set EFI variables", command_efi_set);
-
-static int
-command_efi_set(int argc, char *argv[])
-{
-	char *uuid, *var, *val;
-	CHAR16 wvar[128];
-	EFI_GUID guid;
-	uint32_t status;
-	EFI_STATUS err;
-
-	if (argc != 4) {
-		printf("efi-set uuid var new-value\n");
-		return (CMD_ERROR);
-	}
-	uuid = argv[1];
-	var = argv[2];
-	val = argv[3];
-	uuid_from_string(uuid, (uuid_t *)&guid, &status);
-	if (status != uuid_s_ok) {
-		printf("Invalid uuid %s %d\n", uuid, status);
-		return (CMD_ERROR);
-	}
-	cpy8to16(var, wvar, sizeof(wvar));
-	err = RS->SetVariable(wvar, &guid,
-	    EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_RUNTIME_ACCESS | EFI_VARIABLE_BOOTSERVICE_ACCESS,
-	    strlen(val) + 1, val);
-	if (EFI_ERROR(err)) {
-		printf("Failed to set variable: error %lu\n", EFI_ERROR_CODE(err));
-		return (CMD_ERROR);
-	}
-	return (CMD_OK);
-}
-
-COMMAND_SET(efiunset, "efi-unset", "delete / unset EFI variables", command_efi_unset);
-
-static int
-command_efi_unset(int argc, char *argv[])
-{
-	char *uuid, *var;
-	CHAR16 wvar[128];
-	EFI_GUID guid;
-	uint32_t status;
-	EFI_STATUS err;
-
-	if (argc != 3) {
-		printf("efi-unset uuid var\n");
-		return (CMD_ERROR);
-	}
-	uuid = argv[1];
-	var = argv[2];
-	uuid_from_string(uuid, (uuid_t *)&guid, &status);
-	if (status != uuid_s_ok) {
-		printf("Invalid uuid %s\n", uuid);
-		return (CMD_ERROR);
-	}
-	cpy8to16(var, wvar, sizeof(wvar));
-	err = RS->SetVariable(wvar, &guid, 0, 0, NULL);
-	if (EFI_ERROR(err)) {
-		printf("Failed to unset variable: error %lu\n", EFI_ERROR_CODE(err));
-		return (CMD_ERROR);
-	}
-	return (CMD_OK);
-}
-
 #ifdef LOADER_FDT_SUPPORT
 extern int command_fdt_internal(int argc, char *argv[]);
 
 /*
  * Since proper fdt command handling function is defined in fdt_loader_cmd.c,
  * and declaring it as extern is in contradiction with COMMAND_SET() macro
  * (which uses static pointer), we're defining wrapper function, which
  * calls the proper fdt handling routine.
  */
 static int
 command_fdt(int argc, char *argv[])
 {
 
 	return (command_fdt_internal(argc, argv));
 }
 
 COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt);
 #endif
 
 #ifdef EFI_ZFS_BOOT
 static void
 efi_zfs_probe(void)
 {
-	EFI_HANDLE h;
-	u_int unit;
-	int i;
-	char dname[SPECNAMELEN + 1];
-	uint64_t guid;
+	pdinfo_list_t *hdi;
+	pdinfo_t *hd, *pd = NULL;
+	EFI_GUID imgid = LOADED_IMAGE_PROTOCOL;
+	EFI_LOADED_IMAGE *img;
+	EFI_HANDLE boot_disk = NULL;
+	char devname[SPECNAMELEN + 1];
+	uint64_t *guidp = NULL;
 
-	unit = 0;
-	h = efi_find_handle(&efipart_dev, 0);
-	for (i = 0; h != NULL; h = efi_find_handle(&efipart_dev, ++i)) {
-		snprintf(dname, sizeof(dname), "%s%d:", efipart_dev.dv_name, i);
-		if (zfs_probe_dev(dname, &guid) == 0)
-			(void)efi_handle_update_dev(h, &zfs_dev, unit++, guid);
+	BS->HandleProtocol(IH, &imgid, (VOID**)&img);
+
+	/* Find the handle for the boot disk. */
+	hdi = efiblk_get_pdinfo_list(&efipart_hddev);
+	STAILQ_FOREACH(hd, hdi, pd_link) {
+		STAILQ_FOREACH(pd, &hd->pd_part, pd_link) {
+			if (pd->pd_handle == img->DeviceHandle)
+				boot_disk = hd->pd_handle;
+		}
+	}
+
+	/*
+	 * We provide non-NULL guid pointer if the disk was used for boot,
+	 * and reset after the first found pool.
+	 * Technically this solution is not very correct, we assume the boot
+	 * pool is the first pool on this disk.
+	 */
+
+	STAILQ_FOREACH(hd, hdi, pd_link) {
+		if (hd->pd_handle == boot_disk)
+			guidp = &pool_guid;
+
+		STAILQ_FOREACH(pd, &hd->pd_part, pd_link) {
+			snprintf(devname, sizeof(devname), "%s%dp%d:",
+			    efipart_hddev.dv_name, hd->pd_unit, pd->pd_unit);
+			(void) zfs_probe_dev(devname, guidp);
+			if (guidp != NULL && pool_guid != 0)
+				guidp = NULL;
+		}
 	}
 }
 #endif
Index: stable/11/sys/boot/forth/beastie.4th.8
===================================================================
--- stable/11/sys/boot/forth/beastie.4th.8	(revision 329098)
+++ stable/11/sys/boot/forth/beastie.4th.8	(revision 329099)
@@ -1,173 +1,173 @@
 .\" Copyright (c) 2011-2012 Devin Teske
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd January 6, 2016
 .Dt BEASTIE.4TH 8
 .Os
 .Sh NAME
 .Nm beastie.4th
 .Nd FreeBSD ASCII art boot module
 .Sh DESCRIPTION
 The file that goes by the name of
 .Nm
 is a set of commands designed to draw the ASCII art FreeBSD mascot
-.Nd known simply as
-.Ic beastie
-.Nd to the right of the boot loader menu.
+\(en known simply as
+.Em beastie
+\(en to the right of the boot loader menu.
 The commands of
 .Nm
 by themselves are not enough for most uses.
 Please refer to the
 examples below for the most common situations, and to
 .Xr loader 8
 for additional commands.
 .Pp
 Before using any of the commands provided in
 .Nm ,
 it must be included
 through the command:
 .Pp
 .Dl include beastie.4th
 .Pp
 This line is present in the default
 .Pa /boot/loader.rc
 file, so it is not needed (and should not be re-issued) in a normal setup.
 .Pp
 The commands provided by it are:
 .Pp
 .Bl -tag -width disable-module_module -compact -offset indent
 .It Ic draw-beastie
 Draws the FreeBSD logo.
 .Pp
 The logo that is drawn is configured by setting the
 .Ic loader_logo
 variable in
 .Xr loader.conf 5
 to one of
 .Dq Li beastie ,
 .Dq Li beastiebw ,
 .Dq Li fbsdbw ,
 .Dq Li orb ,
 and
 .Dq Li orbbw
 (the default).
 .Pp
 The position of the logo can be configured by setting the
 .Ic loader_logo_x
 and
 .Ic loader_logo_y
 variables in
 .Xr loader.conf 5 .
 The default values are 46 (x) and 4 (y).
 .Pp
 .It Ic clear-beastie
 Clears the screen of beastie.
 .Pp
 .It Ic beastie-start
 Initializes the interactive boot loader menu.
 .Pp
 The
 .Ic loader_delay
 variable can be configured in
 .Xr loader.conf 5
 to the number of seconds you would like to delay loading the boot menu.
 During the delay the user can press Ctrl-C to fall back to
 .Ic autoboot
 or ENTER to proceed.
 The default behavior is to not delay.
 .El
 .Pp
 The environment variables that effect its behavior are:
 .Bl -tag -width bootfile -offset indent
 .It Va loader_logo
 Selects the desired logo in the beastie boot menu. Possible values are:
 .Dq Li fbsdbw ,
 .Dq Li beastie ,
 .Dq Li beastiebw ,
 .Dq Li orb ,
 .Dq Li orbbw
 (default), and
 .Dq Li none .
 .It Va loader_logo_x
 Sets the desired column position of the logo. Default is 46.
 .It Va loader_logo_y
 Sets the desired row position of the logo. Default is 4.
 .It Va beastie_disable
 If set to
 .Dq YES ,
 the beastie boot menu will be skipped.
 The beastie boot menu is always skipped if running non-x86 hardware.
 .It Va loader_delay
 If set to a number higher than zero, introduces a delay before starting the
 beastie boot menu. During the delay the user can press either Ctrl-C to skip
 the menu or ENTER to proceed to the menu. The default is to not delay when
 loading the menu.
 .El
 .Sh FILES
 .Bl -tag -width /boot/loader.4th -compact
 .It Pa /boot/loader
 The
 .Xr loader 8 .
 .It Pa /boot/beastie.4th
 .Nm
 itself.
 .It Pa /boot/loader.rc
 .Xr loader 8
 bootstrapping script.
 .El
 .Sh EXAMPLES
 Standard i386
 .Pa /boot/loader.rc :
 .Pp
 .Bd -literal -offset indent -compact
 include /boot/beastie.4th
 beastie-start
 .Ed
 .Pp
 Set a different logo in
 .Xr loader.conf 5 :
 .Pp
 .Bd -literal -offset indent -compact
 loader_logo="beastie"
 .Ed
 .Sh SEE ALSO
 .Xr loader.conf 5 ,
 .Xr loader 8 ,
 .Xr loader.4th 8
 .Sh HISTORY
 The
 .Nm
 set of commands first appeared in
 .Fx 5.1 .
 .Sh AUTHORS
 The
 .Nm
 set of commands was written by
 .An -nosplit
 .An Scott Long Aq scottl@FreeBSD.org ,
 .An Aleksander Fafula Aq alex@fafula.com
 and
 .An Devin Teske Aq dteske@FreeBSD.org .
Index: stable/11/sys/boot/forth/loader.4th
===================================================================
--- stable/11/sys/boot/forth/loader.4th	(revision 329098)
+++ stable/11/sys/boot/forth/loader.4th	(revision 329099)
@@ -1,266 +1,266 @@
 \ Copyright (c) 1999 Daniel C. Sobral <dcs@FreeBSD.org>
 \ Copyright (c) 2011-2015 Devin Teske <dteske@FreeBSD.org>
 \ All rights reserved.
 \
 \ Redistribution and use in source and binary forms, with or without
 \ modification, are permitted provided that the following conditions
 \ are met:
 \ 1. Redistributions of source code must retain the above copyright
 \    notice, this list of conditions and the following disclaimer.
 \ 2. Redistributions in binary form must reproduce the above copyright
 \    notice, this list of conditions and the following disclaimer in the
 \    documentation and/or other materials provided with the distribution.
 \
 \ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 \ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 \ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 \ ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 \ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 \ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 \ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 \ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 \ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 \ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 \ SUCH DAMAGE.
 \
 \ $FreeBSD$
 
 only forth definitions
 
 s" arch-i386" environment? [if] [if]
 	s" loader_version" environment?  [if]
 		11 < [if]
 			.( Loader version 1.1+ required) cr
 			abort
 		[then]
 	[else]
 		.( Could not get loader version!) cr
 		abort
 	[then]
 [then] [then]
 
 256 dictthreshold !  \ 256 cells minimum free space
 2048 dictincrease !  \ 2048 additional cells each time
 
 include /boot/support.4th
 include /boot/color.4th
 include /boot/delay.4th
 include /boot/check-password.4th
-s" efi-boot" environment? [if] [if]
+s" efi-version" getenv? [if]
 	include /boot/efi.4th
-[then] [then]
+[then]
 
 only forth definitions
 
 : bootmsg ( -- )
   loader_color? dup ( -- bool bool )
   if 7 fg 4 bg then
   ." Booting..."
   if me then
   cr
 ;
 
 : try-menu-unset
   \ menu-unset may not be present
   s" beastie_disable" getenv
   dup -1 <> if
     s" YES" compare-insensitive 0= if
       exit
     then
   else
     drop
   then
   s" menu-unset"
   sfind if
     execute
   else
     drop
   then
   s" menusets-unset"
   sfind if
     execute
   else
     drop
   then
 ;
 
 only forth also support-functions also builtins definitions
 
 : boot
   0= if ( interpreted ) get_arguments then
 
   \ Unload only if a path was passed
   dup if
     >r over r> swap
     c@ [char] - <> if
       0 1 unload drop
     else
       s" kernelname" getenv? if ( a kernel has been loaded )
         try-menu-unset
         bootmsg 1 boot exit
       then
       load_kernel_and_modules
       ?dup if exit then
       try-menu-unset
       bootmsg 0 1 boot exit
     then
   else
     s" kernelname" getenv? if ( a kernel has been loaded )
       try-menu-unset
       bootmsg 1 boot exit
     then
     load_kernel_and_modules
     ?dup if exit then
     try-menu-unset
     bootmsg 0 1 boot exit
   then
   load_kernel_and_modules
   ?dup 0= if bootmsg 0 1 boot then
 ;
 
 \ ***** boot-conf
 \
 \	Prepares to boot as specified by loaded configuration files.
 
 : boot-conf
   0= if ( interpreted ) get_arguments then
   0 1 unload drop
   load_kernel_and_modules
   ?dup 0= if 0 1 autoboot then
 ;
 
 also forth definitions previous
 
 builtin: boot
 builtin: boot-conf
 
 only forth definitions also support-functions
 
 \ ***** start
 \
 \       Initializes support.4th global variables, sets loader_conf_files,
 \       processes conf files, and, if any one such file was successfully
 \       read to the end, loads kernel and modules.
 
 : start  ( -- ) ( throws: abort & user-defined )
   s" /boot/defaults/loader.conf" initialize
   include_conf_files
   include_nextboot_file
   \ If the user defined a post-initialize hook, call it now
   s" post-initialize" sfind if execute else drop then
   \ Will *NOT* try to load kernel and modules if no configuration file
   \ was successfully loaded!
   any_conf_read? if
     s" loader_delay" getenv -1 = if
       load_xen_throw
       load_kernel
       load_modules
     else
       drop
       ." Loading Kernel and Modules (Ctrl-C to Abort)" cr
       s" also support-functions" evaluate
       s" set delay_command='load_xen_throw load_kernel load_modules'" evaluate
       s" set delay_showdots" evaluate
       delay_execute
     then
   then
 ;
 
 \ ***** initialize
 \
 \	Overrides support.4th initialization word with one that does
 \	everything start one does, short of loading the kernel and
 \	modules. Returns a flag.
 
 : initialize ( -- flag )
   s" /boot/defaults/loader.conf" initialize
   include_conf_files
   include_nextboot_file
   \ If the user defined a post-initialize hook, call it now
   s" post-initialize" sfind if execute else drop then
   any_conf_read?
 ;
 
 \ ***** read-conf
 \
 \	Read a configuration file, whose name was specified on the command
 \	line, if interpreted, or given on the stack, if compiled in.
 
 : (read-conf)  ( addr len -- )
   conf_files string=
   include_conf_files \ Will recurse on new loader_conf_files definitions
 ;
 
 : read-conf  ( <filename> | addr len -- ) ( throws: abort & user-defined )
   state @ if
     \ Compiling
     postpone (read-conf)
   else
     \ Interpreting
     bl parse (read-conf)
   then
 ; immediate
 
 \ show, enable, disable, toggle module loading. They all take module from
 \ the next word
 
 : set-module-flag ( module_addr val -- ) \ set and print flag
   over module.flag !
   dup module.name strtype
   module.flag @ if ."  will be loaded" else ."  will not be loaded" then cr
 ;
 
 : enable-module find-module ?dup if true set-module-flag then ;
 
 : disable-module find-module ?dup if false set-module-flag then ;
 
 : toggle-module find-module ?dup if dup module.flag @ 0= set-module-flag then ;
 
 \ ***** show-module
 \
 \	Show loading information about a module.
 
 : show-module ( <module> -- ) find-module ?dup if show-one-module then ;
 
 \ Words to be used inside configuration files
 
 : retry false ;         \ For use in load error commands
 : ignore true ;         \ For use in load error commands
 
 \ Return to strict forth vocabulary
 
 : #type
   over - >r
   type
   r> spaces
 ;
 
 : .? 2 spaces 2swap 15 #type 2 spaces type cr ;
 
 \ Execute the ? command to print all the commands defined in
 \ C, then list the ones we support here. Please note that this
 \ doesn't use pager_* routines that the C implementation of ?
 \ does, so these will always appear, even if you stop early
 \ there. And they may cause the commands to scroll off the
 \ screen if the number of commands modulus LINES is close
 \ to LINEs....
 : ?
   ['] ? execute
   s" boot-conf" s" load kernel and modules, then autoboot" .?
   s" read-conf" s" read a configuration file" .?
   s" enable-module" s" enable loading of a module" .?
   s" disable-module" s" disable loading of a module" .?
   s" toggle-module" s" toggle loading of a module" .?
   s" show-module" s" show module load data" .?
   s" try-include" s" try to load/interpret files" .?
 ;
 
 : try-include ( -- ) \ see loader.4th(8)
   ['] include ( -- xt ) \ get the execution token of `include'
   catch ( xt -- exception# | 0 ) if \ failed
     LF parse ( c -- s-addr/u ) 2drop \ advance >in to EOL (drop data)
     \ ... prevents words unused by `include' from being interpreted
   then
 ; immediate \ interpret immediately for access to `source' (aka tib)
 
 only forth definitions
Index: stable/11/sys/boot/geli/Makefile
===================================================================
--- stable/11/sys/boot/geli/Makefile	(revision 329098)
+++ stable/11/sys/boot/geli/Makefile	(revision 329099)
@@ -1,56 +1,53 @@
 # $FreeBSD$
 # libgeliboot
 
 MAN=
 
 .include <src.opts.mk>
 MK_SSP=		no
 
 LIB=		geliboot
 INTERNALLIB=
 MK_PROFILE=	no
 NO_PIC=
 
 .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64"
 CFLAGS+=        -march=i386
 .endif
 .if ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "powerpc64"
 CFLAGS+=        -m32
 .endif
 
 WARNS?=		0
 
 # string functions from libc
 .PATH: ${.CURDIR}/../../../lib/libc/string
 SRCS+=  bcmp.c bcopy.c bzero.c
 
-# need explicit_bzero for crypto
-.PATH: ${.CURDIR}/../../../sys/libkern
-SRCS+=  explicit_bzero.c
-
 # Our password input method
 SRCS+=  pwgets.c
 
 # sha256 and sha512 from sys/crypto
 .PATH: ${SRCTOP}/sys/crypto/sha2
 CFLAGS+=	-DWEAK_REFS
 SRCS+=		sha256c.c sha512c.c
 
 # md5 from libmd
 .PATH: ${SRCTOP}/lib/libmd
 SRCS+=		md5c.c
 
 # AES implementation from sys/crypto
 .PATH: ${SRCTOP}/sys/crypto/rijndael
 CFLAGS+=	-I${.CURDIR}/../../
+CFLAGS+=	-I${.CURDIR}/../common/
 # Remove asserts
 CFLAGS+=	-DNDEBUG
 SRCS+=		rijndael-alg-fst.c rijndael-api-fst.c rijndael-api.c
 
 # local GELI Implementation
 .PATH: ${SRCTOP}/sys/geom/eli
 CFLAGS+=	-D_STAND
 SRCS+=		geliboot_crypto.c g_eli_hmac.c g_eli_key.c g_eli_key_cache.c pkcs5v2.c
 
 .include <bsd.stand.mk>
 .include <bsd.lib.mk>
Index: stable/11/sys/boot/geli/geliboot.c
===================================================================
--- stable/11/sys/boot/geli/geliboot.c	(revision 329098)
+++ stable/11/sys/boot/geli/geliboot.c	(revision 329099)
@@ -1,318 +1,437 @@
 /*-
  * Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
  * Copyright (c) 2005-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
+#include "geliboot_internal.h"
 #include "geliboot.h"
 
 SLIST_HEAD(geli_list, geli_entry) geli_head = SLIST_HEAD_INITIALIZER(geli_head);
 struct geli_list *geli_headp;
 
+typedef u_char geli_ukey[G_ELI_USERKEYLEN];
+
+static geli_ukey saved_keys[GELI_MAX_KEYS];
+static unsigned int nsaved_keys = 0;
+
+/*
+ * Copy keys from local storage to the keybuf struct.
+ * Destroy the local storage when finished.
+ */
+void
+geli_fill_keybuf(struct keybuf *fkeybuf)
+{
+	unsigned int i;
+
+	for (i = 0; i < nsaved_keys; i++) {
+		fkeybuf->kb_ents[i].ke_type = KEYBUF_TYPE_GELI;
+		memcpy(fkeybuf->kb_ents[i].ke_data, saved_keys[i],
+		    G_ELI_USERKEYLEN);
+	}
+	fkeybuf->kb_nents = nsaved_keys;
+	explicit_bzero(saved_keys, sizeof(saved_keys));
+}
+
+/*
+ * Copy keys from a keybuf struct into local storage.
+ * Zero out the keybuf.
+ */
+void
+geli_save_keybuf(struct keybuf *skeybuf)
+{
+	unsigned int i;
+
+	for (i = 0; i < skeybuf->kb_nents && i < GELI_MAX_KEYS; i++) {
+		memcpy(saved_keys[i], skeybuf->kb_ents[i].ke_data,
+		    G_ELI_USERKEYLEN);
+		explicit_bzero(skeybuf->kb_ents[i].ke_data,
+		    G_ELI_USERKEYLEN);
+		skeybuf->kb_ents[i].ke_type = KEYBUF_TYPE_NONE;
+	}
+	nsaved_keys = skeybuf->kb_nents;
+	skeybuf->kb_nents = 0;
+}
+
+static void
+save_key(geli_ukey key)
+{
+
+	/*
+	 * If we run out of key space, the worst that will happen is
+	 * it will ask the user for the password again.
+	 */
+	if (nsaved_keys < GELI_MAX_KEYS) {
+		memcpy(saved_keys[nsaved_keys], key, G_ELI_USERKEYLEN);
+		nsaved_keys++;
+	}
+}
+
 static int
 geli_same_device(struct geli_entry *ge, struct dsk *dskp)
 {
 
-	if (geli_e->dsk->drive == dskp->drive &&
-	    dskp->part == 255 && geli_e->dsk->part == dskp->slice) {
+	if (ge->dsk->drive == dskp->drive &&
+	    dskp->part == 255 && ge->dsk->part == dskp->slice) {
 		/*
 		 * Sometimes slice = slice, and sometimes part = slice
 		 * If the incoming struct dsk has part=255, it means look at
 		 * the slice instead of the part number
 		 */
 		return (0);
 	}
 
 	/* Is this the same device? */
-	if (geli_e->dsk->drive != dskp->drive ||
-	    geli_e->dsk->slice != dskp->slice ||
-	    geli_e->dsk->part != dskp->part) {
+	if (ge->dsk->drive != dskp->drive ||
+	    ge->dsk->slice != dskp->slice ||
+	    ge->dsk->part != dskp->part) {
 		return (1);
 	}
 
 	return (0);
 }
 
+static int
+geli_findkey(struct geli_entry *ge, struct dsk *dskp, u_char *mkey)
+{
+	u_int keynum;
+	int i;
+
+	if (ge->keybuf_slot >= 0) {
+		if (g_eli_mkey_decrypt(&ge->md, saved_keys[ge->keybuf_slot],
+		    mkey, &keynum) == 0) {
+			return (0);
+		}
+	}
+
+	for (i = 0; i < nsaved_keys; i++) {
+		if (g_eli_mkey_decrypt(&ge->md, saved_keys[i], mkey,
+		    &keynum) == 0) {
+			ge->keybuf_slot = i;
+			return (0);
+		}
+	}
+
+	return (1);
+}
+
 void
 geli_init(void)
 {
 
 	geli_count = 0;
 	SLIST_INIT(&geli_head);
 }
 
 /*
  * Read the last sector of the drive or partition pointed to by dsk and see
  * if it is GELI encrypted
  */
 int
 geli_taste(int read_func(void *vdev, void *priv, off_t off, void *buf,
     size_t bytes), struct dsk *dskp, daddr_t lastsector)
 {
 	struct g_eli_metadata md;
 	u_char buf[DEV_GELIBOOT_BSIZE];
 	int error;
 	off_t alignsector;
 
 	alignsector = rounddown2(lastsector * DEV_BSIZE, DEV_GELIBOOT_BSIZE);
 	if (alignsector + DEV_GELIBOOT_BSIZE > ((lastsector + 1) * DEV_BSIZE)) {
 		/* Don't read past the end of the disk */
 		alignsector = (lastsector * DEV_BSIZE) + DEV_BSIZE
 		    - DEV_GELIBOOT_BSIZE;
 	}
 	error = read_func(NULL, dskp, alignsector, &buf, DEV_GELIBOOT_BSIZE);
 	if (error != 0) {
 		return (error);
 	}
 	/* Extract the last 4k sector of the disk. */
 	error = eli_metadata_decode(buf, &md);
 	if (error != 0) {
 		/* Try the last 512 byte sector instead. */
 		error = eli_metadata_decode(buf +
 		    (DEV_GELIBOOT_BSIZE - DEV_BSIZE), &md);
 		if (error != 0) {
 			return (error);
 		}
 	}
 
 	if (!(md.md_flags & G_ELI_FLAG_GELIBOOT)) {
 		/* The GELIBOOT feature is not activated */
 		return (1);
 	}
 	if ((md.md_flags & G_ELI_FLAG_ONETIME)) {
 		/* Swap device, skip it. */
 		return (1);
 	}
 	if (md.md_iterations < 0) {
 		/* XXX TODO: Support loading key files. */
 		/* Disk does not have a passphrase, skip it. */
 		return (1);
 	}
 	geli_e = malloc(sizeof(struct geli_entry));
 	if (geli_e == NULL)
 		return (2);
 
 	geli_e->dsk = malloc(sizeof(struct dsk));
 	if (geli_e->dsk == NULL)
 		return (2);
 	memcpy(geli_e->dsk, dskp, sizeof(struct dsk));
 	geli_e->part_end = lastsector;
 	if (dskp->part == 255) {
 		geli_e->dsk->part = dskp->slice;
 	}
+	geli_e->keybuf_slot = -1;
 
 	geli_e->md = md;
 	eli_metadata_softc(&geli_e->sc, &md, DEV_BSIZE,
 	    (lastsector + DEV_BSIZE) * DEV_BSIZE);
 
 	SLIST_INSERT_HEAD(&geli_head, geli_e, entries);
 	geli_count++;
 
 	return (0);
 }
 
 /*
  * Attempt to decrypt the device
  */
 int
-geli_attach(struct dsk *dskp, const char *passphrase)
+geli_attach(struct dsk *dskp, const char *passphrase, const u_char *mkeyp)
 {
 	u_char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN], *mkp;
 	u_int keynum;
 	struct hmac_ctx ctx;
 	int error;
 
+	if (mkeyp != NULL) {
+		memcpy(&mkey, mkeyp, G_ELI_DATAIVKEYLEN);
+		explicit_bzero(mkeyp, G_ELI_DATAIVKEYLEN);
+	}
+
 	SLIST_FOREACH_SAFE(geli_e, &geli_head, entries, geli_e_tmp) {
 		if (geli_same_device(geli_e, dskp) != 0) {
 			continue;
 		}
 
+		if (mkeyp != NULL || geli_findkey(geli_e, dskp, mkey) == 0) {
+			goto found_key;
+		}
+
 		g_eli_crypto_hmac_init(&ctx, NULL, 0);
 		/*
 		 * Prepare Derived-Key from the user passphrase.
 		 */
 		if (geli_e->md.md_iterations < 0) {
 			/* XXX TODO: Support loading key files. */
 			return (1);
 		} else if (geli_e->md.md_iterations == 0) {
 			g_eli_crypto_hmac_update(&ctx, geli_e->md.md_salt,
 			    sizeof(geli_e->md.md_salt));
 			g_eli_crypto_hmac_update(&ctx, passphrase,
 			    strlen(passphrase));
 		} else if (geli_e->md.md_iterations > 0) {
 			printf("Calculating GELI Decryption Key disk%dp%d @ %d"
 			    " iterations...\n", dskp->unit,
 			    (dskp->slice > 0 ? dskp->slice : dskp->part),
 			    geli_e->md.md_iterations);
 			u_char dkey[G_ELI_USERKEYLEN];
 
 			pkcs5v2_genkey(dkey, sizeof(dkey), geli_e->md.md_salt,
 			    sizeof(geli_e->md.md_salt), passphrase,
 			    geli_e->md.md_iterations);
 			g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey));
-			bzero(&dkey, sizeof(dkey));
+			explicit_bzero(dkey, sizeof(dkey));
 		}
 
 		g_eli_crypto_hmac_final(&ctx, key, 0);
 
 		error = g_eli_mkey_decrypt(&geli_e->md, key, mkey, &keynum);
-		bzero(&key, sizeof(key));
 		if (error == -1) {
-			bzero(&mkey, sizeof(mkey));
-			printf("Bad GELI key: %d\n", error);
+			explicit_bzero(mkey, sizeof(mkey));
+			explicit_bzero(key, sizeof(key));
+			printf("Bad GELI key: bad password?\n");
 			return (error);
 		} else if (error != 0) {
-			bzero(&mkey, sizeof(mkey));
-			printf("Failed to decrypt GELI master key: %d\n", error);
+			explicit_bzero(mkey, sizeof(mkey));
+			explicit_bzero(key, sizeof(key));
+                        printf("Failed to decrypt GELI master key: %d\n", error);
 			return (error);
-		}
+		} else {
+                        /* Add key to keychain */
+                        save_key(key);
+                        explicit_bzero(&key, sizeof(key));
+                }
 
+found_key:
 		/* Store the keys */
 		bcopy(mkey, geli_e->sc.sc_mkey, sizeof(geli_e->sc.sc_mkey));
 		bcopy(mkey, geli_e->sc.sc_ivkey, sizeof(geli_e->sc.sc_ivkey));
 		mkp = mkey + sizeof(geli_e->sc.sc_ivkey);
 		if ((geli_e->sc.sc_flags & G_ELI_FLAG_AUTH) == 0) {
 			bcopy(mkp, geli_e->sc.sc_ekey, G_ELI_DATAKEYLEN);
 		} else {
 			/*
 			 * The encryption key is: ekey = HMAC_SHA512(Data-Key, 0x10)
 			 */
 			g_eli_crypto_hmac(mkp, G_ELI_MAXKEYLEN, "\x10", 1,
 			    geli_e->sc.sc_ekey, 0);
 		}
-		bzero(&mkey, sizeof(mkey));
+		explicit_bzero(mkey, sizeof(mkey));
 
 		/* Initialize the per-sector IV. */
 		switch (geli_e->sc.sc_ealgo) {
 		case CRYPTO_AES_XTS:
 			break;
 		default:
 			SHA256_Init(&geli_e->sc.sc_ivctx);
 			SHA256_Update(&geli_e->sc.sc_ivctx, geli_e->sc.sc_ivkey,
 			    sizeof(geli_e->sc.sc_ivkey));
 			break;
 		}
 
 		return (0);
 	}
 
 	/* Disk not found. */
 	return (2);
 }
 
 int
 is_geli(struct dsk *dskp)
 {
 	SLIST_FOREACH_SAFE(geli_e, &geli_head, entries, geli_e_tmp) {
 		if (geli_same_device(geli_e, dskp) == 0) {
 			return (0);
 		}
 	}
-	
+
 	return (1);
 }
 
 int
 geli_read(struct dsk *dskp, off_t offset, u_char *buf, size_t bytes)
 {
 	u_char iv[G_ELI_IVKEYLEN];
 	u_char *pbuf;
 	int error;
 	off_t dstoff;
 	uint64_t keyno;
 	size_t n, nsec, secsize;
 	struct g_eli_key gkey;
 
 	pbuf = buf;
 	SLIST_FOREACH_SAFE(geli_e, &geli_head, entries, geli_e_tmp) {
 		if (geli_same_device(geli_e, dskp) != 0) {
 			continue;
 		}
 
 		secsize = geli_e->sc.sc_sectorsize;
 		nsec = bytes / secsize;
 		if (nsec == 0) {
 			/*
 			 * A read of less than the GELI sector size has been
 			 * requested. The caller provided destination buffer may
 			 * not be big enough to boost the read to a full sector,
 			 * so just attempt to decrypt the truncated sector.
 			 */
 			secsize = bytes;
 			nsec = 1;
 		}
 
 		for (n = 0, dstoff = offset; n < nsec; n++, dstoff += secsize) {
 
 			g_eli_crypto_ivgen(&geli_e->sc, dstoff, iv,
 			    G_ELI_IVKEYLEN);
 
 			/* Get the key that corresponds to this offset. */
 			keyno = (dstoff >> G_ELI_KEY_SHIFT) / secsize;
 			g_eli_key_fill(&geli_e->sc, &gkey, keyno);
 
 			error = geliboot_crypt(geli_e->sc.sc_ealgo, 0, pbuf,
 			    secsize, gkey.gek_key,
 			    geli_e->sc.sc_ekeylen, iv);
 
 			if (error != 0) {
-				bzero(&gkey, sizeof(gkey));
+				explicit_bzero(&gkey, sizeof(gkey));
 				printf("Failed to decrypt in geli_read()!");
 				return (error);
 			}
 			pbuf += secsize;
 		}
-		bzero(&gkey, sizeof(gkey));
+		explicit_bzero(&gkey, sizeof(gkey));
 		return (0);
 	}
 
 	printf("GELI provider not found\n");
 	return (1);
 }
 
 int
+geli_havekey(struct dsk *dskp)
+{
+	u_char mkey[G_ELI_DATAIVKEYLEN];
+
+	SLIST_FOREACH_SAFE(geli_e, &geli_head, entries, geli_e_tmp) {
+		if (geli_same_device(geli_e, dskp) != 0) {
+			continue;
+		}
+
+		if (geli_findkey(geli_e, dskp, mkey) == 0) {
+			if (geli_attach(dskp, NULL, mkey) == 0) {
+				return (0);
+			}
+		}
+	}
+	explicit_bzero(mkey, sizeof(mkey));
+
+	return (1);
+}
+
+int
 geli_passphrase(char *pw, int disk, int parttype, int part, struct dsk *dskp)
 {
 	int i;
 
 	/* TODO: Implement GELI keyfile(s) support */
 	for (i = 0; i < 3; i++) {
 		/* Try cached passphrase */
 		if (i == 0 && pw[0] != '\0') {
-			if (geli_attach(dskp, pw) == 0) {
+			if (geli_attach(dskp, pw, NULL) == 0) {
 				return (0);
 			}
 		}
 		printf("GELI Passphrase for disk%d%c%d: ", disk, parttype, part);
 		pwgets(pw, GELI_PW_MAXLEN);
 		printf("\n");
-		if (geli_attach(dskp, pw) == 0) {
+		if (geli_attach(dskp, pw, NULL) == 0) {
 			return (0);
 		}
 	}
 
 	return (1);
 }
Index: stable/11/sys/boot/geli/geliboot.h
===================================================================
--- stable/11/sys/boot/geli/geliboot.h	(revision 329098)
+++ stable/11/sys/boot/geli/geliboot.h	(revision 329099)
@@ -1,92 +1,68 @@
 /*-
  * Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
  * Copyright (c) 2005-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
-#include <sys/endian.h>
-#include <sys/queue.h>
+#include <crypto/intake.h>
 
 #ifndef _GELIBOOT_H_
 #define _GELIBOOT_H_
 
-#define _STRING_H_
-#define _STRINGS_H_
-#define _STDIO_H_
-#include <geom/eli/g_eli.h>
-#include <geom/eli/pkcs5v2.h>
-
-/* Pull in the md5, sha256, and sha512 implementations */
-#include <md5.h>
-#include <crypto/sha2/sha256.h>
-#include <crypto/sha2/sha512.h>
-
-/* Pull in AES implementation */
-#include <crypto/rijndael/rijndael-api-fst.h>
-
-/* AES-XTS implementation */
-#define _STAND
-#define STAND_H /* We don't want stand.h in {gpt,zfs,gptzfs}boot */
-#include <opencrypto/xform_enc.h>
-
 #ifndef DEV_BSIZE
 #define DEV_BSIZE 			512
 #endif
 #ifndef DEV_GELIBOOT_BSIZE
 #define DEV_GELIBOOT_BSIZE		4096
 #endif
 
 #ifndef MIN
 #define    MIN(a,b) (((a) < (b)) ? (a) : (b))
 #endif
 
+#define	GELI_MAX_KEYS			64
 #define GELI_PW_MAXLEN			256
+
 extern void pwgets(char *buf, int n);
 
-struct geli_entry {
-	struct dsk		*dsk;
-	off_t			part_end;
-	struct g_eli_softc	sc;
-	struct g_eli_metadata	md;
-	SLIST_ENTRY(geli_entry)	entries;
-} *geli_e, *geli_e_tmp;
-
-int geli_count;
-
 void geli_init(void);
 int geli_taste(int read_func(void *vdev, void *priv, off_t off,
     void *buf, size_t bytes), struct dsk *dsk, daddr_t lastsector);
-int geli_attach(struct dsk *dskp, const char *passphrase);
+int geli_attach(struct dsk *dskp, const char *passphrase, const u_char *mkeyp);
 int is_geli(struct dsk *dsk);
 int geli_read(struct dsk *dsk, off_t offset, u_char *buf, size_t bytes);
 int geli_decrypt(u_int algo, u_char *data, size_t datasize,
     const u_char *key, size_t keysize, const uint8_t* iv);
+int geli_havekey(struct dsk *dskp);
 int geli_passphrase(char *pw, int disk, int parttype, int part, struct dsk *dskp);
 
 int geliboot_crypt(u_int algo, int enc, u_char *data, size_t datasize,
     const u_char *key, size_t keysize, u_char *iv);
+
+void geli_fill_keybuf(struct keybuf *keybuf);
+void geli_save_keybuf(struct keybuf *keybuf);
 
 #endif /* _GELIBOOT_H_ */
Index: stable/11/sys/boot/geli/geliboot_crypto.c
===================================================================
--- stable/11/sys/boot/geli/geliboot_crypto.c	(revision 329098)
+++ stable/11/sys/boot/geli/geliboot_crypto.c	(revision 329099)
@@ -1,139 +1,140 @@
 /*-
  * Copyright (c) 2005-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <stdio.h>
 #include <string.h>
 #include <strings.h>
 
+#include "geliboot_internal.h"
 #include "geliboot.h"
 
 int
 geliboot_crypt(u_int algo, int enc, u_char *data, size_t datasize,
     const u_char *key, size_t keysize, u_char *iv)
 {
 	keyInstance aeskey;
 	cipherInstance cipher;
 	struct aes_xts_ctx xtsctx, *ctxp;
 	size_t xts_len;
 	int err, blks, i;
 
 	switch (algo) {
 	case CRYPTO_AES_CBC:
 		err = rijndael_makeKey(&aeskey, !enc, keysize, 
 		    (const char *)key);
 		if (err < 0) {
 			printf("Failed to setup decryption keys: %d\n", err);
 			return (err);
 		}
 
 		err = rijndael_cipherInit(&cipher, MODE_CBC, iv);
 		if (err < 0) {
 			printf("Failed to setup IV: %d\n", err);
 			return (err);
 		}
 
 		switch (enc) {
 		case 0: /* decrypt */
 			blks = rijndael_blockDecrypt(&cipher, &aeskey, data, 
 			    datasize * 8, data);
 			break;
 		case 1: /* encrypt */
 			blks = rijndael_blockEncrypt(&cipher, &aeskey, data, 
 			    datasize * 8, data);
 			break;
 		}
 		if (datasize != (blks / 8)) {
 			printf("Failed to decrypt the entire input: "
 			    "%u != %u\n", blks, datasize);
 			return (1);
 		}
 		break;
 	case CRYPTO_AES_XTS:
 		xts_len = keysize << 1;
 		ctxp = &xtsctx;
 
 		rijndael_set_key(&ctxp->key1, key, xts_len / 2);
 		rijndael_set_key(&ctxp->key2, key + (xts_len / 16), xts_len / 2);
 
 		enc_xform_aes_xts.reinit(ctxp, iv);
 
 		switch (enc) {
 		case 0: /* decrypt */
 			for (i = 0; i < datasize; i += AES_XTS_BLOCKSIZE) {
 				enc_xform_aes_xts.decrypt(ctxp, data + i);
 			}
 			break;
 		case 1: /* encrypt */
 			for (i = 0; i < datasize; i += AES_XTS_BLOCKSIZE) {
 				enc_xform_aes_xts.encrypt(ctxp, data + i);
 			}
 			break;
 		}
 		break;
 	default:
 		printf("Unsupported crypto algorithm #%d\n", algo);
 		return (1);
 	}
 
 	return (0);
 }
 
 static int
 g_eli_crypto_cipher(u_int algo, int enc, u_char *data, size_t datasize,
     const u_char *key, size_t keysize)
 {
 	u_char iv[keysize];
 
-	bzero(iv, sizeof(iv));
+	explicit_bzero(iv, sizeof(iv));
 	return (geliboot_crypt(algo, enc, data, datasize, key, keysize, iv));
 }
 
 int
 g_eli_crypto_encrypt(u_int algo, u_char *data, size_t datasize,
     const u_char *key, size_t keysize)
 {
 
 	/* We prefer AES-CBC for metadata protection. */
 	if (algo == CRYPTO_AES_XTS)
 		algo = CRYPTO_AES_CBC;
 
 	return (g_eli_crypto_cipher(algo, 1, data, datasize, key, keysize));
 }
 
 int
 g_eli_crypto_decrypt(u_int algo, u_char *data, size_t datasize,
     const u_char *key, size_t keysize)
 {
 
 	/* We prefer AES-CBC for metadata protection. */
 	if (algo == CRYPTO_AES_XTS)
 		algo = CRYPTO_AES_CBC;
 
 	return (g_eli_crypto_cipher(algo, 0, data, datasize, key, keysize));
 }
Index: stable/11/sys/boot/geli/geliboot_internal.h
===================================================================
--- stable/11/sys/boot/geli/geliboot_internal.h	(nonexistent)
+++ stable/11/sys/boot/geli/geliboot_internal.h	(revision 329099)
@@ -0,0 +1,69 @@
+/*-
+ * Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
+ * Copyright (c) 2005-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _GELIBOOT_INTERNAL_H_
+#define _GELIBOOT_INTERNAL_H_
+
+#define _STRING_H_
+#define _STRINGS_H_
+#define _STDIO_H_
+
+#include <sys/endian.h>
+#include <sys/queue.h>
+
+#include <geom/eli/g_eli.h>
+#include <geom/eli/pkcs5v2.h>
+
+#include <bootstrap.h>
+
+/* Pull in the md5, sha256, and sha512 implementations */
+#include <md5.h>
+#include <crypto/sha2/sha256.h>
+#include <crypto/sha2/sha512.h>
+
+/* Pull in AES implementation */
+#include <crypto/rijndael/rijndael-api-fst.h>
+
+/* AES-XTS implementation */
+#define _STAND
+#define STAND_H /* We don't want stand.h in {gpt,zfs,gptzfs}boot */
+#include <opencrypto/xform_enc.h>
+
+struct geli_entry {
+	struct dsk		*dsk;
+	off_t			part_end;
+	struct g_eli_softc	sc;
+	struct g_eli_metadata	md;
+	int			keybuf_slot;
+	SLIST_ENTRY(geli_entry)	entries;
+} *geli_e, *geli_e_tmp;
+
+static int geli_count;
+
+#endif /* _GELIBOOT_INTERNAL_H_ */

Property changes on: stable/11/sys/boot/geli/geliboot_internal.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: stable/11/sys/boot/geli/pwgets.c
===================================================================
--- stable/11/sys/boot/geli/pwgets.c	(revision 329098)
+++ stable/11/sys/boot/geli/pwgets.c	(revision 329099)
@@ -1,83 +1,75 @@
 /*	$NetBSD: gets.c,v 1.6 1995/10/11 21:16:57 pk Exp $	*/
 
 /*-
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)gets.c	8.1 (Berkeley) 6/11/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "stand.h"
 
 /* gets() with constrained input length, for passwords */
 
 void
 pwgets(char *buf, int n)
 {
     int c;
     char *lp;
 
     for (lp = buf;;)
 	switch (c = getchar() & 0177) {
 	case '\n':
 	case '\r':
 	    *lp = '\0';
 	    putchar('\n');
 	    return;
 	case '\b':
 	case '\177':
 	    if (lp > buf) {
 		lp--;
 		putchar('\b');
 		putchar(' ');
 		putchar('\b');
 	    }
 	    break;
-	case 'r'&037: {
-	    char *p;
-
-	    putchar('\n');
-	    for (p = buf; p < lp; ++p)
-		putchar(*p);
-	    break;
-	}
 	case 'u'&037:
 	case 'w'&037:
 	    lp = buf;
 	    putchar('\n');
 	    break;
 	default:
 	    if ((n < 1) || ((lp - buf) < n - 1)) {
 		*lp++ = c;
 		putchar('*');
 	    }
 	}
     /*NOTREACHED*/
 }
Index: stable/11/sys/boot/i386/boot2/Makefile
===================================================================
--- stable/11/sys/boot/i386/boot2/Makefile	(revision 329098)
+++ stable/11/sys/boot/i386/boot2/Makefile	(revision 329099)
@@ -1,120 +1,109 @@
 # $FreeBSD$
 
 .include <bsd.own.mk>
 
 FILES=		boot boot1 boot2
 
 NM?=		nm
 
 # A value of 0x80 enables LBA support.
 BOOT_BOOT1_FLAGS?=	0x80
 
 BOOT_COMCONSOLE_PORT?= 0x3f8
 BOOT_COMCONSOLE_SPEED?= 9600
 B2SIOFMT?=	0x3
 
 REL1=	0x700
 ORG1=	0x7c00
 ORG2=	0x2000
 
 # Decide level of UFS support.
 BOOT2_UFS?=	UFS1_AND_UFS2
 #BOOT2_UFS?=	UFS2_ONLY
 #BOOT2_UFS?=	UFS1_ONLY
 
 CFLAGS=	-fomit-frame-pointer \
 	-mrtd \
 	-mregparm=3 \
-	-DUSE_XREAD \
 	-D${BOOT2_UFS} \
 	-DFLAGS=${BOOT_BOOT1_FLAGS} \
 	-DSIOPRT=${BOOT_COMCONSOLE_PORT} \
 	-DSIOFMT=${B2SIOFMT} \
 	-DSIOSPD=${BOOT_COMCONSOLE_SPEED} \
 	-I${.CURDIR}/../../common \
 	-I${.CURDIR}/../btx/lib -I. \
 	-Wall -Waggregate-return -Wbad-function-cast -Wno-cast-align \
 	-Wmissing-declarations -Wmissing-prototypes -Wnested-externs \
 	-Wpointer-arith -Wshadow -Wstrict-prototypes -Wwrite-strings \
 	-Winline
 
 CFLAGS.gcc+=	-Os \
 		-fno-asynchronous-unwind-tables \
-		-fno-guess-branch-probability \
-		-fno-unit-at-a-time \
 		--param max-inline-insns-single=100
 .if ${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} <= 40201
 CFLAGS.gcc+=   -mno-align-long-strings
 .endif
 
 CFLAGS.clang+=	-Oz ${CLANG_OPT_SMALL}
 
 LD_FLAGS=${LD_FLAGS_BIN}
 
 # Pick up ../Makefile.inc early.
 .include <bsd.init.mk>
 
 CLEANFILES=	boot
 
 boot: boot1 boot2
 	cat boot1 boot2 > boot
 
 CLEANFILES+=	boot1 boot1.out boot1.o
 
 boot1: boot1.out
 	${OBJCOPY} -S -O binary boot1.out ${.TARGET}
 
 boot1.out: boot1.o
 	${LD} ${LD_FLAGS} -e start -Ttext ${ORG1} -o ${.TARGET} boot1.o
 
 CLEANFILES+=	boot2 boot2.ld boot2.ldr boot2.bin boot2.out boot2.o \
-		boot2.s boot2.s.tmp boot2.h sio.o
+		boot2.h sio.o
 
 BOOT2SIZE=	7680
 
 boot2: boot2.ld
 	@set -- `ls -l ${.ALLSRC}`; x=$$((${BOOT2SIZE}-$$5)); \
 	    echo "$$x bytes available"; test $$x -ge 0
 	${DD} if=${.ALLSRC} of=${.TARGET} obs=${BOOT2SIZE} conv=osync
 
 boot2.ld: boot2.ldr boot2.bin ${BTXKERN}
 	btxld -v -E ${ORG2} -f bin -b ${BTXKERN} -l boot2.ldr \
 	    -o ${.TARGET} -P 1 boot2.bin
 
 boot2.ldr:
 	${DD} if=/dev/zero of=${.TARGET} bs=512 count=1
 
 boot2.bin: boot2.out
 	${OBJCOPY} -S -O binary boot2.out ${.TARGET}
 
 boot2.out: ${BTXCRT} boot2.o sio.o
 	${LD} ${LD_FLAGS} -Ttext ${ORG2} -o ${.TARGET} ${.ALLSRC}
 
-boot2.o: boot2.s
-	${CC} ${ACFLAGS} -c boot2.s
-
 SRCS=	boot2.c boot2.h
-
-boot2.s: boot2.c boot2.h ${.CURDIR}/../../common/ufsread.c
-	${CC} ${CFLAGS} -S -o boot2.s.tmp ${.CURDIR}/boot2.c
-	sed -e '/align/d' -e '/nop/d' < boot2.s.tmp > boot2.s
-	rm -f boot2.s.tmp
 
 boot2.h: boot1.out
 	${NM} -t d ${.ALLSRC} | awk '/([0-9])+ T xread/ \
 	    { x = $$1 - ORG1; \
 	    printf("#define XREADORG %#x\n", REL1 + x) }' \
 	    ORG1=`printf "%d" ${ORG1}` \
 	    REL1=`printf "%d" ${REL1}` > ${.TARGET}
 
 .if ${MACHINE_CPUARCH} == "amd64"
 beforedepend boot2.s: machine
 CLEANFILES+=	machine
 machine: ${.CURDIR}/../../../i386/include .NOMETA
 	ln -sf ${.ALLSRC} ${.TARGET}
 .endif
 
 .include <bsd.prog.mk>
 
 # XXX: clang integrated-as doesn't grok .codeNN directives yet
 CFLAGS.boot1.S=		${CLANG_NO_IAS}
Index: stable/11/sys/boot/i386/boot2/boot2.c
===================================================================
--- stable/11/sys/boot/i386/boot2/boot2.c	(revision 329098)
+++ stable/11/sys/boot/i386/boot2/boot2.c	(revision 329099)
@@ -1,646 +1,646 @@
 /*-
  * Copyright (c) 1998 Robert Nordier
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms are freely
  * permitted provided that the above copyright notice and this
  * paragraph and the following disclaimer are duplicated in all
  * such forms.
  *
  * This software is provided "AS IS" and without any express or
  * implied warranties, including, without limitation, the implied
  * warranties of merchantability and fitness for a particular
  * purpose.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/disklabel.h>
 #include <sys/diskmbr.h>
 #include <sys/dirent.h>
 #include <sys/reboot.h>
 
 #include <machine/bootinfo.h>
 #include <machine/elf.h>
 
 #include <stdarg.h>
 
 #include <a.out.h>
 
 #include <btxv86.h>
 
 #include "boot2.h"
 #include "lib.h"
 #include "paths.h"
 #include "rbx.h"
 
 /* Define to 0 to omit serial support */
 #ifndef SERIAL
 #define SERIAL 1
 #endif
 
 #define IO_KEYBOARD	1
 #define IO_SERIAL	2
 
 #if SERIAL
 #define DO_KBD (ioctrl & IO_KEYBOARD)
 #define DO_SIO (ioctrl & IO_SERIAL)
 #else
 #define DO_KBD (1)
 #define DO_SIO (0)
 #endif
 
 #define SECOND		18	/* Circa that many ticks in a second. */
 
 #define ARGS		0x900
 #define NOPT		14
 #define NDEV		3
 #define MEM_BASE	0x12
 #define MEM_EXT 	0x15
 
 #define DRV_HARD	0x80
 #define DRV_MASK	0x7f
 
 #define TYPE_AD		0
 #define TYPE_DA		1
 #define TYPE_MAXHARD	TYPE_DA
 #define TYPE_FD		2
 
 extern uint32_t _end;
 
 static const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
 static const unsigned char flags[NOPT] = {
     RBX_DUAL,
     RBX_SERIAL,
     RBX_ASKNAME,
     RBX_CDROM,
     RBX_CONFIG,
     RBX_KDB,
     RBX_GDB,
     RBX_MUTE,
     RBX_NOINTR,
     RBX_PAUSE,
     RBX_QUIET,
     RBX_DFLTROOT,
     RBX_SINGLE,
     RBX_VERBOSE
 };
 
 static const char *const dev_nm[NDEV] = {"ad", "da", "fd"};
 static const unsigned char dev_maj[NDEV] = {30, 4, 2};
 
 static struct dsk {
     unsigned drive;
     unsigned type;
     unsigned unit;
     uint8_t slice;
     uint8_t part;
     unsigned start;
     int init;
 } dsk;
 static char cmd[512], cmddup[512], knamebuf[1024];
 static const char *kname;
 uint32_t opts;
 static struct bootinfo bootinfo;
 #if SERIAL
 static int comspeed = SIOSPD;
 static uint8_t ioctrl = IO_KEYBOARD;
 #endif
 
 int main(void);
 void exit(int);
 static void load(void);
 static int parse(void);
 static int dskread(void *, unsigned, unsigned);
 static void printf(const char *,...);
 static void putchar(int);
 static int drvread(void *, unsigned, unsigned);
 static int keyhit(unsigned);
 static int xputc(int);
 static int xgetc(int);
 static inline int getc(int);
 
 static void memcpy(void *, const void *, int);
 static void
 memcpy(void *dst, const void *src, int len)
 {
     const char *s = src;
     char *d = dst;
 
     while (len--)
         *d++ = *s++;
 }
 
 static inline int
 strcmp(const char *s1, const char *s2)
 {
     for (; *s1 == *s2 && *s1; s1++, s2++);
     return (unsigned char)*s1 - (unsigned char)*s2;
 }
 
 #define	UFS_SMALL_CGBASE
 #include "ufsread.c"
 
-static inline int
+static int
 xfsread(ufs_ino_t inode, void *buf, size_t nbyte)
 {
     if ((size_t)fsread(inode, buf, nbyte) != nbyte) {
 	printf("Invalid %s\n", "format");
 	return -1;
     }
     return 0;
 }
 
 static inline void
 getstr(void)
 {
     char *s;
     int c;
 
     s = cmd;
     for (;;) {
 	switch (c = xgetc(0)) {
 	case 0:
 	    break;
 	case '\177':
 	case '\b':
 	    if (s > cmd) {
 		s--;
 		printf("\b \b");
 	    }
 	    break;
 	case '\n':
 	case '\r':
 	    *s = 0;
 	    return;
 	default:
 	    if (s - cmd < sizeof(cmd) - 1)
 		*s++ = c;
 	    putchar(c);
 	}
     }
 }
 
 static inline void
 putc(int c)
 {
     v86.addr = 0x10;
     v86.eax = 0xe00 | (c & 0xff);
     v86.ebx = 0x7;
     v86int();
 }
 
 int
 main(void)
 {
     uint8_t autoboot;
     ufs_ino_t ino;
     size_t nbyte;
 
     dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base);
     v86.ctl = V86_FLAGS;
     v86.efl = PSL_RESERVED_DEFAULT | PSL_I;
     dsk.drive = *(uint8_t *)PTOV(ARGS);
     dsk.type = dsk.drive & DRV_HARD ? TYPE_AD : TYPE_FD;
     dsk.unit = dsk.drive & DRV_MASK;
     dsk.slice = *(uint8_t *)PTOV(ARGS + 1) + 1;
     bootinfo.bi_version = BOOTINFO_VERSION;
     bootinfo.bi_size = sizeof(bootinfo);
 
     /* Process configuration file */
 
     autoboot = 1;
 
     if ((ino = lookup(PATH_CONFIG)) ||
         (ino = lookup(PATH_DOTCONFIG))) {
 	nbyte = fsread(ino, cmd, sizeof(cmd) - 1);
 	cmd[nbyte] = '\0';
     }
 
     if (*cmd) {
 	memcpy(cmddup, cmd, sizeof(cmd));
 	if (parse())
 	    autoboot = 0;
 	if (!OPT_CHECK(RBX_QUIET))
 	    printf("%s: %s", PATH_CONFIG, cmddup);
 	/* Do not process this command twice */
 	*cmd = 0;
     }
 
     /*
      * Try to exec stage 3 boot loader. If interrupted by a keypress,
      * or in case of failure, try to load a kernel directly instead.
      */
 
     if (!kname) {
 	kname = PATH_LOADER;
 	if (autoboot && !keyhit(3*SECOND)) {
 	    load();
 	    kname = PATH_KERNEL;
 	}
     }
 
     /* Present the user with the boot2 prompt. */
 
     for (;;) {
 	if (!autoboot || !OPT_CHECK(RBX_QUIET))
 	    printf("\nFreeBSD/x86 boot\n"
 		   "Default: %u:%s(%u,%c)%s\n"
 		   "boot: ",
 		   dsk.drive & DRV_MASK, dev_nm[dsk.type], dsk.unit,
 		   'a' + dsk.part, kname);
 	if (DO_SIO)
 	    sio_flush();
 	if (!autoboot || keyhit(3*SECOND))
 	    getstr();
 	else if (!autoboot || !OPT_CHECK(RBX_QUIET))
 	    putchar('\n');
 	autoboot = 0;
 	if (parse())
 	    putchar('\a');
 	else
 	    load();
     }
 }
 
 /* XXX - Needed for btxld to link the boot2 binary; do not remove. */
 void
 exit(int x)
 {
 }
 
 static void
 load(void)
 {
     union {
 	struct exec ex;
 	Elf32_Ehdr eh;
     } hdr;
     static Elf32_Phdr ep[2];
     static Elf32_Shdr es[2];
     caddr_t p;
     ufs_ino_t ino;
     uint32_t addr;
     int k;
     uint8_t i, j;
 
     if (!(ino = lookup(kname))) {
 	if (!ls)
 	    printf("No %s\n", kname);
 	return;
     }
     if (xfsread(ino, &hdr, sizeof(hdr)))
 	return;
 
     if (N_GETMAGIC(hdr.ex) == ZMAGIC) {
 	addr = hdr.ex.a_entry & 0xffffff;
 	p = PTOV(addr);
 	fs_off = PAGE_SIZE;
 	if (xfsread(ino, p, hdr.ex.a_text))
 	    return;
 	p += roundup2(hdr.ex.a_text, PAGE_SIZE);
 	if (xfsread(ino, p, hdr.ex.a_data))
 	    return;
     } else if (IS_ELF(hdr.eh)) {
 	fs_off = hdr.eh.e_phoff;
 	for (j = k = 0; k < hdr.eh.e_phnum && j < 2; k++) {
 	    if (xfsread(ino, ep + j, sizeof(ep[0])))
 		return;
 	    if (ep[j].p_type == PT_LOAD)
 		j++;
 	}
 	for (i = 0; i < 2; i++) {
 	    p = PTOV(ep[i].p_paddr & 0xffffff);
 	    fs_off = ep[i].p_offset;
 	    if (xfsread(ino, p, ep[i].p_filesz))
 		return;
 	}
 	p += roundup2(ep[1].p_memsz, PAGE_SIZE);
 	bootinfo.bi_symtab = VTOP(p);
 	if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
 	    fs_off = hdr.eh.e_shoff + sizeof(es[0]) *
 		(hdr.eh.e_shstrndx + 1);
 	    if (xfsread(ino, &es, sizeof(es)))
 		return;
 	    for (i = 0; i < 2; i++) {
 		*(Elf32_Word *)p = es[i].sh_size;
 		p += sizeof(es[i].sh_size);
 		fs_off = es[i].sh_offset;
 		if (xfsread(ino, p, es[i].sh_size))
 		    return;
 		p += es[i].sh_size;
 	    }
 	}
 	addr = hdr.eh.e_entry & 0xffffff;
 	bootinfo.bi_esymtab = VTOP(p);
     } else {
 	printf("Invalid %s\n", "format");
 	return;
     }
 
     bootinfo.bi_kernelname = VTOP(kname);
     bootinfo.bi_bios_dev = dsk.drive;
     __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
 	   MAKEBOOTDEV(dev_maj[dsk.type], dsk.slice, dsk.unit, dsk.part),
 	   0, 0, 0, VTOP(&bootinfo));
 }
 
 static int
 parse()
 {
     char *arg = cmd;
     char *ep, *p, *q;
     const char *cp;
     unsigned int drv;
     int c, i, j;
     size_t k;
 
     while ((c = *arg++)) {
 	if (c == ' ' || c == '\t' || c == '\n')
 	    continue;
 	for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++);
 	ep = p;
 	if (*p)
 	    *p++ = 0;
 	if (c == '-') {
 	    while ((c = *arg++)) {
 		if (c == 'P') {
 		    if (*(uint8_t *)PTOV(0x496) & 0x10) {
 			cp = "yes";
 		    } else {
 			opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL);
 			cp = "no";
 		    }
 		    printf("Keyboard: %s\n", cp);
 		    continue;
 #if SERIAL
 		} else if (c == 'S') {
 		    j = 0;
 		    while ((unsigned int)(i = *arg++ - '0') <= 9)
 			j = j * 10 + i;
 		    if (j > 0 && i == -'0') {
 			comspeed = j;
 			break;
 		    }
 		    /* Fall through to error below ('S' not in optstr[]). */
 #endif
 		}
 		for (i = 0; c != optstr[i]; i++)
 		    if (i == NOPT - 1)
 			return -1;
 		opts ^= OPT_SET(flags[i]);
 	    }
 #if SERIAL
 	    ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) :
 		     OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD;
 	    if (DO_SIO) {
 	        if (sio_init(115200 / comspeed) != 0)
 		    ioctrl &= ~IO_SERIAL;
 	    }
 #endif
 	} else {
 	    for (q = arg--; *q && *q != '('; q++);
 	    if (*q) {
 		drv = -1;
 		if (arg[1] == ':') {
 		    drv = *arg - '0';
 		    if (drv > 9)
 			return (-1);
 		    arg += 2;
 		}
 		if (q - arg != 2)
 		    return -1;
 		for (i = 0; arg[0] != dev_nm[i][0] ||
 			    arg[1] != dev_nm[i][1]; i++)
 		    if (i == NDEV - 1)
 			return -1;
 		dsk.type = i;
 		arg += 3;
 		dsk.unit = *arg - '0';
 		if (arg[1] != ',' || dsk.unit > 9)
 		    return -1;
 		arg += 2;
 		dsk.slice = WHOLE_DISK_SLICE;
 		if (arg[1] == ',') {
 		    dsk.slice = *arg - '0' + 1;
 		    if (dsk.slice > NDOSPART + 1)
 			return -1;
 		    arg += 2;
 		}
 		if (arg[1] != ')')
 		    return -1;
 		dsk.part = *arg - 'a';
 		if (dsk.part > 7)
 		    return (-1);
 		arg += 2;
 		if (drv == -1)
 		    drv = dsk.unit;
 		dsk.drive = (dsk.type <= TYPE_MAXHARD
 			     ? DRV_HARD : 0) + drv;
 		dsk_meta = 0;
 	    }
 	    k = ep - arg;
 	    if (k > 0) {
 		if (k >= sizeof(knamebuf))
 		    return -1;
 		memcpy(knamebuf, arg, k + 1);
 		kname = knamebuf;
 	    }
 	}
 	arg = p;
     }
     return 0;
 }
 
 static int
 dskread(void *buf, unsigned lba, unsigned nblk)
 {
     struct dos_partition *dp;
     struct disklabel *d;
     char *sec;
     unsigned i;
     uint8_t sl;
     const char *reason;
 
     if (!dsk_meta) {
 	sec = dmadat->secbuf;
 	dsk.start = 0;
 	if (drvread(sec, DOSBBSECTOR, 1))
 	    return -1;
 	dp = (void *)(sec + DOSPARTOFF);
 	sl = dsk.slice;
 	if (sl < BASE_SLICE) {
 	    for (i = 0; i < NDOSPART; i++)
 		if (dp[i].dp_typ == DOSPTYP_386BSD &&
 		    (dp[i].dp_flag & 0x80 || sl < BASE_SLICE)) {
 		    sl = BASE_SLICE + i;
 		    if (dp[i].dp_flag & 0x80 ||
 			dsk.slice == COMPATIBILITY_SLICE)
 			break;
 		}
 	    if (dsk.slice == WHOLE_DISK_SLICE)
 		dsk.slice = sl;
 	}
 	if (sl != WHOLE_DISK_SLICE) {
 	    if (sl != COMPATIBILITY_SLICE)
 		dp += sl - BASE_SLICE;
 	    if (dp->dp_typ != DOSPTYP_386BSD) {
 		reason = "slice";
 		goto error;
 	    }
 	    dsk.start = dp->dp_start;
 	}
 	if (drvread(sec, dsk.start + LABELSECTOR, 1))
 		return -1;
 	d = (void *)(sec + LABELOFFSET);
 	if (d->d_magic != DISKMAGIC || d->d_magic2 != DISKMAGIC) {
 	    if (dsk.part != RAW_PART) {
 		reason = "label";
 		goto error;
 	    }
 	} else {
 	    if (!dsk.init) {
 		if (d->d_type == DTYPE_SCSI)
 		    dsk.type = TYPE_DA;
 		dsk.init++;
 	    }
 	    if (dsk.part >= d->d_npartitions ||
 		!d->d_partitions[dsk.part].p_size) {
 		reason = "partition";
 		goto error;
 	    }
 	    dsk.start += d->d_partitions[dsk.part].p_offset;
 	    dsk.start -= d->d_partitions[RAW_PART].p_offset;
 	}
     }
     return drvread(buf, dsk.start + lba, nblk);
 error:
     printf("Invalid %s\n", reason);
     return -1;
 }
 
 static void
 printf(const char *fmt,...)
 {
     va_list ap;
     static char buf[10];
     char *s;
     unsigned u;
     int c;
 
     va_start(ap, fmt);
     while ((c = *fmt++)) {
 	if (c == '%') {
 	    c = *fmt++;
 	    switch (c) {
 	    case 'c':
 		putchar(va_arg(ap, int));
 		continue;
 	    case 's':
 		for (s = va_arg(ap, char *); *s; s++)
 		    putchar(*s);
 		continue;
 	    case 'u':
 		u = va_arg(ap, unsigned);
 		s = buf;
 		do
 		    *s++ = '0' + u % 10U;
 		while (u /= 10U);
 		while (--s >= buf)
 		    putchar(*s);
 		continue;
 	    }
 	}
 	putchar(c);
     }
     va_end(ap);
     return;
 }
 
 static void
 putchar(int c)
 {
     if (c == '\n')
 	xputc('\r');
     xputc(c);
 }
 
 static int
 drvread(void *buf, unsigned lba, unsigned nblk)
 {
     static unsigned c = 0x2d5c7c2f;
 
     if (!OPT_CHECK(RBX_QUIET)) {
 	xputc(c = c << 8 | c >> 24);
 	xputc('\b');
     }
     v86.ctl = V86_ADDR | V86_CALLF | V86_FLAGS;
     v86.addr = XREADORG;		/* call to xread in boot1 */
     v86.es = VTOPSEG(buf);
     v86.eax = lba;
     v86.ebx = VTOPOFF(buf);
     v86.ecx = lba >> 16;
     v86.edx = nblk << 8 | dsk.drive;
     v86int();
     v86.ctl = V86_FLAGS;
     if (V86_CY(v86.efl)) {
 	printf("error %u lba %u\n", v86.eax >> 8 & 0xff, lba);
 	return -1;
     }
     return 0;
 }
 
 static int
 keyhit(unsigned ticks)
 {
     uint32_t t0, t1;
 
     if (OPT_CHECK(RBX_NOINTR))
 	return 0;
     t0 = 0;
     for (;;) {
 	if (xgetc(1))
 	    return 1;
 	t1 = *(uint32_t *)PTOV(0x46c);
 	if (!t0)
 	    t0 = t1;
 	if ((uint32_t)(t1 - t0) >= ticks)
 	    return 0;
     }
 }
 
 static int
 xputc(int c)
 {
     if (DO_KBD)
 	putc(c);
     if (DO_SIO)
 	sio_putc(c);
     return c;
 }
 
 static int
 getc(int fn)
 {
     v86.addr = 0x16;
     v86.eax = fn << 8;
     v86int();
     return fn == 0 ? v86.eax & 0xff : !V86_ZR(v86.efl);
 }
 
 static int
 xgetc(int fn)
 {
     if (OPT_CHECK(RBX_NOINTR))
 	return 0;
     for (;;) {
 	if (DO_KBD && getc(1))
 	    return fn ? 1 : getc(0);
 	if (DO_SIO && sio_ischar())
 	    return fn ? 1 : sio_getc();
 	if (fn)
 	    return 0;
     }
 }
Index: stable/11/sys/boot/i386/btx/lib/btxv86.h
===================================================================
--- stable/11/sys/boot/i386/btx/lib/btxv86.h	(revision 329098)
+++ stable/11/sys/boot/i386/btx/lib/btxv86.h	(revision 329099)
@@ -1,67 +1,75 @@
 /*
  * Copyright (c) 1998 Robert Nordier
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms are freely
  * permitted provided that the above copyright notice and this
  * paragraph and the following disclaimer are duplicated in all
  * such forms.
  *
  * This software is provided "AS IS" and without any express or
  * implied warranties, including, without limitation, the implied
  * warranties of merchantability and fitness for a particular
  * purpose.
  */
 
 /*
  * $FreeBSD$
  */
 
 #ifndef _BTXV86_H_
 #define _BTXV86_H_
 
 #include <sys/types.h>
 #include <machine/psl.h>
 
+/*
+ * Memory buffer space for real mode IO.
+ * Just one page is not much, but the space is rather limited.
+ * See ../btx/btx.S for details.
+ */
+#define	V86_IO_BUFFER		0x8000
+#define	V86_IO_BUFFER_SIZE	0x1000
+
 #define V86_ADDR   0x10000	/* Segment:offset address */
 #define V86_CALLF  0x20000	/* Emulate far call */
 #define V86_FLAGS  0x40000	/* Return flags */
 
 struct __v86 {
     uint32_t ctl;		/* Control flags */
     uint32_t addr;		/* Interrupt number or address */
     uint32_t es;		/* V86 ES register */
     uint32_t ds;		/* V86 DS register */
     uint32_t fs;		/* V86 FS register */
     uint32_t gs;		/* V86 GS register */
     uint32_t eax;		/* V86 EAX register */
     uint32_t ecx;		/* V86 ECX register */
     uint32_t edx;		/* V86 EDX register */
     uint32_t ebx;		/* V86 EBX register */
     uint32_t efl;		/* V86 eflags register */
     uint32_t ebp;		/* V86 EBP register */
     uint32_t esi;		/* V86 ESI register */
     uint32_t edi;		/* V86 EDI register */
 };
 
 extern struct __v86 __v86;	/* V86 interface structure */
 void __v86int(void);
 
 #define v86	__v86
 #define v86int	__v86int
 
 extern u_int32_t	__base;
 extern u_int32_t	__args;
 
 #define	PTOV(pa)	((caddr_t)(pa) - __base)
 #define	VTOP(va)	((vm_offset_t)(va) + __base)
 #define	VTOPSEG(va)	(u_int16_t)(VTOP((caddr_t)va) >> 4)
 #define	VTOPOFF(va)	(u_int16_t)(VTOP((caddr_t)va) & 0xf)
 
 #define	V86_CY(x)	((x) & PSL_C)
 #define	V86_ZR(x)	((x) & PSL_Z)
 
 void __exit(int) __attribute__((__noreturn__));
 void __exec(caddr_t, ...);
 
 #endif /* !_BTXV86_H_ */
Index: stable/11/sys/boot/i386/common/bootargs.h
===================================================================
--- stable/11/sys/boot/i386/common/bootargs.h	(revision 329098)
+++ stable/11/sys/boot/i386/common/bootargs.h	(revision 329099)
@@ -1,75 +1,93 @@
 /*-
  * Copyright (c) 2012 Andriy Gapon <avg@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms are freely
  * permitted provided that the above copyright notice and this
  * paragraph and the following disclaimer are duplicated in all
  * such forms.
  *
  * This software is provided "AS IS" and without any express or
  * implied warranties, including, without limitation, the implied
  * warranties of merchantability and fitness for a particular
  * purpose.
  *
  * $FreeBSD$
  */
 
 #ifndef _BOOT_I386_ARGS_H_
 #define	_BOOT_I386_ARGS_H_
 
 #define	KARGS_FLAGS_CD		0x1
 #define	KARGS_FLAGS_PXE		0x2
 #define	KARGS_FLAGS_ZFS		0x4
 #define	KARGS_FLAGS_EXTARG	0x8	/* variably sized extended argument */
 
 #define	BOOTARGS_SIZE	24	/* sizeof(struct bootargs) */
 #define	BA_BOOTFLAGS	8	/* offsetof(struct bootargs, bootflags) */
 #define	BA_BOOTINFO	20	/* offsetof(struct bootargs, bootinfo) */
 #define	BI_SIZE		48	/* offsetof(struct bootinfo, bi_size) */
 
 /*
  * We reserve some space above BTX allocated stack for the arguments
  * and certain data that could hang off them.  Currently only struct bootinfo
  * is supported in that category.  The bootinfo is placed at the top
  * of the arguments area and the actual arguments are placed at ARGOFF offset
  * from the top and grow towards the top.  Hopefully we have enough space
  * for bootinfo and the arguments to not run into each other.
  * Arguments area below ARGOFF is reserved for future use.
  */
 #define	ARGSPACE	0x1000	/* total size of the BTX args area */
 #define	ARGOFF		0x800	/* actual args offset within the args area */
 #define	ARGADJ		(ARGSPACE - ARGOFF)
 
 #ifndef __ASSEMBLER__
 
 struct bootargs
 {
 	uint32_t			howto;
 	uint32_t			bootdev;
 	uint32_t			bootflags;
 	union {
 		struct {
 			uint32_t	pxeinfo;
 			uint32_t	reserved;
 		};
 		uint64_t		zfspool;
 	};
 	uint32_t			bootinfo;
 
 	/*
 	 * If KARGS_FLAGS_EXTARG is set in bootflags, then the above fields
 	 * are followed by a uint32_t field that specifies a size of the
 	 * extended arguments (including the size field).
 	 */
 };
 
+#ifdef LOADER_GELI_SUPPORT
+#include <crypto/intake.h>
+#endif
+
 struct geli_boot_args
 {
     uint32_t		size;
-    char		gelipw[256];
+    union {
+        char            gelipw[256];
+        struct {
+            char                notapw;	/* 
+					 * single null byte to stop keybuf
+					 * being interpreted as a password
+					 */
+            uint32_t            keybuf_sentinel;
+#ifdef LOADER_GELI_SUPPORT
+            struct keybuf       *keybuf;
+#else
+            void                *keybuf;
+#endif
+        };
+    };
 };
 
 #endif /*__ASSEMBLER__*/
 
 #endif	/* !_BOOT_I386_ARGS_H_ */
Index: stable/11/sys/boot/i386/common/drv.c
===================================================================
--- stable/11/sys/boot/i386/common/drv.c	(revision 329098)
+++ stable/11/sys/boot/i386/common/drv.c	(revision 329099)
@@ -1,117 +1,102 @@
 /*-
  * Copyright (c) 1998 Robert Nordier
  * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms are freely
  * permitted provided that the above copyright notice and this
  * paragraph and the following disclaimer are duplicated in all
  * such forms.
  *
  * This software is provided "AS IS" and without any express or
  * implied warranties, including, without limitation, the implied
  * warranties of merchantability and fitness for a particular
  * purpose.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 
 #include <btxv86.h>
 
 #include "rbx.h"
 #include "util.h"
 #include "drv.h"
 #include "edd.h"
-#ifdef USE_XREAD
-#include "xreadorg.h"
-#endif
 
 static struct edd_params params;
 
 uint64_t
 drvsize(struct dsk *dskp)
 {
 
 	params.len = sizeof(struct edd_params);
 	v86.ctl = V86_FLAGS;
 	v86.addr = 0x13;
 	v86.eax = 0x4800;
 	v86.edx = dskp->drive;
 	v86.ds = VTOPSEG(&params);
 	v86.esi = VTOPOFF(&params);
 	v86int();
 	if (V86_CY(v86.efl)) {
 		printf("error %u\n", v86.eax >> 8 & 0xff);
 		return (0);
 	}
 	return (params.sectors);
 }
 
-#ifndef USE_XREAD
 static struct edd_packet packet;
-#endif
 
 int
 drvread(struct dsk *dskp, void *buf, daddr_t lba, unsigned nblk)
 {
 	static unsigned c = 0x2d5c7c2f;
 
 	if (!OPT_CHECK(RBX_QUIET))
 		printf("%c\b", c = c << 8 | c >> 24);
-#ifndef USE_XREAD
 	packet.len = sizeof(struct edd_packet);
 	packet.count = nblk;
 	packet.off = VTOPOFF(buf);
 	packet.seg = VTOPSEG(buf);
 	packet.lba = lba;
 	v86.ctl = V86_FLAGS;
 	v86.addr = 0x13;
 	v86.eax = 0x4200;
 	v86.edx = dskp->drive;
 	v86.ds = VTOPSEG(&packet);
 	v86.esi = VTOPOFF(&packet);
-#else	/* USE_XREAD */
-	v86.ctl = V86_ADDR | V86_CALLF | V86_FLAGS;
-	v86.addr = XREADORG;		/* call to xread in boot1 */
-	v86.es = VTOPSEG(buf);
-	v86.eax = lba;
-	v86.ebx = VTOPOFF(buf);
-	v86.ecx = lba >> 32;
-	v86.edx = nblk << 8 | dskp->drive;
-#endif	/* USE_XREAD */
 	v86int();
 	if (V86_CY(v86.efl)) {
 		printf("%s: error %u lba %u\n",
 		    BOOTPROG, v86.eax >> 8 & 0xff, lba);
 		return (-1);
 	}
 	return (0);
 }
 
 #if defined(GPT) || defined(ZFS)
 int
 drvwrite(struct dsk *dskp, void *buf, daddr_t lba, unsigned nblk)
 {
 
 	packet.len = sizeof(struct edd_packet);
 	packet.count = nblk;
 	packet.off = VTOPOFF(buf);
 	packet.seg = VTOPSEG(buf);
 	packet.lba = lba;
 	v86.ctl = V86_FLAGS;
 	v86.addr = 0x13;
 	v86.eax = 0x4300;
 	v86.edx = dskp->drive;
 	v86.ds = VTOPSEG(&packet);
 	v86.esi = VTOPOFF(&packet);
 	v86int();
 	if (V86_CY(v86.efl)) {
 		printf("error %u lba %u\n", v86.eax >> 8 & 0xff, lba);
 		return (-1);
 	}
 	return (0);
 }
 #endif	/* GPT || ZFS */
Index: stable/11/sys/boot/i386/gptboot/Makefile
===================================================================
--- stable/11/sys/boot/i386/gptboot/Makefile	(revision 329098)
+++ stable/11/sys/boot/i386/gptboot/Makefile	(revision 329099)
@@ -1,92 +1,93 @@
 # $FreeBSD$
 
 .PATH:		${.CURDIR}/../boot2 ${.CURDIR}/../common ${.CURDIR}/../../common
 
 FILES=		gptboot
 MAN=		gptboot.8
 
 NM?=		nm
 
 BOOT_COMCONSOLE_PORT?= 0x3f8
 BOOT_COMCONSOLE_SPEED?= 9600
 B2SIOFMT?=	0x3
 
 REL1=	0x700
 ORG1=	0x7c00
 ORG2=	0x0
 
 # Decide level of UFS support.
 GPTBOOT_UFS?=	UFS1_AND_UFS2
 #GPTBOOT_UFS?=	UFS2_ONLY
 #GPTBOOT_UFS?=	UFS1_ONLY
 
 CFLAGS=	-DBOOTPROG=\"gptboot\" \
 	-O1 \
 	-DGPT \
 	-D${GPTBOOT_UFS} \
 	-DSIOPRT=${BOOT_COMCONSOLE_PORT} \
 	-DSIOFMT=${B2SIOFMT} \
 	-DSIOSPD=${BOOT_COMCONSOLE_SPEED} \
 	-I${.CURDIR}/../../common \
 	-I${.CURDIR}/../common \
 	-I${.CURDIR}/../btx/lib -I. \
 	-I${.CURDIR}/../boot2 \
 	-I${.CURDIR}/../../.. \
 	-Wall -Waggregate-return -Wbad-function-cast -Wno-cast-align \
 	-Wmissing-declarations -Wmissing-prototypes -Wnested-externs \
 	-Wpointer-arith -Wshadow -Wstrict-prototypes -Wwrite-strings \
 	-Winline -Wno-pointer-sign
 
 CFLAGS.gcc+=	--param max-inline-insns-single=100
 
 .if !defined(LOADER_NO_GELI_SUPPORT)
 CFLAGS+=	-DLOADER_GELI_SUPPORT
 CFLAGS+=	-I${.CURDIR}/../../geli
+CFLAGS+=	-I${.CURDIR}/../../..
 LIBGELIBOOT=	${.OBJDIR}/../../geli/libgeliboot.a
 .PATH:		${.CURDIR}/../../../opencrypto
 OPENCRYPTO_XTS=	xform_aes_xts.o
 .endif
 
 LD_FLAGS=${LD_FLAGS_BIN}
 
 LIBSTAND=	${.OBJDIR}/../../libstand32/libstand.a
 
 # Pick up ../Makefile.inc early.
 .include <bsd.init.mk>
 
 CLEANFILES=	gptboot
 
 gptboot: gptldr.bin gptboot.bin ${BTXKERN}
 	btxld -v -E ${ORG2} -f bin -b ${BTXKERN} -l gptldr.bin \
 	    -o ${.TARGET} gptboot.bin
 
 CLEANFILES+=	gptldr.bin gptldr.out gptldr.o
 
 gptldr.bin: gptldr.out
 	${OBJCOPY} -S -O binary gptldr.out ${.TARGET}
 
 gptldr.out: gptldr.o
 	${LD} ${LD_FLAGS} -e start -Ttext ${ORG1} -o ${.TARGET} gptldr.o
 
 CLEANFILES+=	gptboot.bin gptboot.out gptboot.o sio.o crc32.o drv.o \
 		cons.o util.o ${OPENCRYPTO_XTS}
 
 gptboot.bin: gptboot.out
 	${OBJCOPY} -S -O binary gptboot.out ${.TARGET}
 
 gptboot.out: ${BTXCRT} gptboot.o sio.o crc32.o drv.o cons.o util.o ${OPENCRYPTO_XTS}
 	${LD} ${LD_FLAGS} -Ttext ${ORG2} -o ${.TARGET} ${.ALLSRC} ${LIBGELIBOOT} ${LIBSTAND}
 
 gptboot.o: ${.CURDIR}/../../common/ufsread.c
 
 .if ${MACHINE_CPUARCH} == "amd64"
 beforedepend gptboot.o: machine
 CLEANFILES+=	machine
 machine: .NOMETA
 	ln -sf ${.CURDIR}/../../../i386/include machine
 .endif
 
 .include <bsd.prog.mk>
 
 # XXX: clang integrated-as doesn't grok .codeNN directives yet
 CFLAGS.gptldr.S=	${CLANG_NO_IAS}
Index: stable/11/sys/boot/i386/gptboot/gptboot.c
===================================================================
--- stable/11/sys/boot/i386/gptboot/gptboot.c	(revision 329098)
+++ stable/11/sys/boot/i386/gptboot/gptboot.c	(revision 329099)
@@ -1,638 +1,648 @@
 /*-
  * Copyright (c) 1998 Robert Nordier
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms are freely
  * permitted provided that the above copyright notice and this
  * paragraph and the following disclaimer are duplicated in all
  * such forms.
  *
  * This software is provided "AS IS" and without any express or
  * implied warranties, including, without limitation, the implied
  * warranties of merchantability and fitness for a particular
  * purpose.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/gpt.h>
 #include <sys/dirent.h>
 #include <sys/reboot.h>
 
 #include <machine/bootinfo.h>
 #include <machine/elf.h>
 #include <machine/pc/bios.h>
 #include <machine/psl.h>
 
 #include <stdarg.h>
 
 #include <a.out.h>
 
 #include <btxv86.h>
 
 #include "bootargs.h"
 #include "lib.h"
 #include "rbx.h"
 #include "drv.h"
 #include "util.h"
 #include "cons.h"
 #include "gpt.h"
 #include "paths.h"
 
 #define ARGS		0x900
 #define NOPT		14
 #define NDEV		3
 #define MEM_BASE	0x12
 #define MEM_EXT 	0x15
 
 #define DRV_HARD	0x80
 #define DRV_MASK	0x7f
 
 #define TYPE_AD		0
 #define TYPE_DA		1
 #define TYPE_MAXHARD	TYPE_DA
 #define TYPE_FD		2
 
 extern uint32_t _end;
 
 static const uuid_t freebsd_ufs_uuid = GPT_ENT_TYPE_FREEBSD_UFS;
 static const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
 static const unsigned char flags[NOPT] = {
 	RBX_DUAL,
 	RBX_SERIAL,
 	RBX_ASKNAME,
 	RBX_CDROM,
 	RBX_CONFIG,
 	RBX_KDB,
 	RBX_GDB,
 	RBX_MUTE,
 	RBX_NOINTR,
 	RBX_PAUSE,
 	RBX_QUIET,
 	RBX_DFLTROOT,
 	RBX_SINGLE,
 	RBX_VERBOSE
 };
 uint32_t opts;
 
 static const char *const dev_nm[NDEV] = {"ad", "da", "fd"};
 static const unsigned char dev_maj[NDEV] = {30, 4, 2};
 
 static struct dsk dsk;
 static char kname[1024];
 static int comspeed = SIOSPD;
 static struct bootinfo bootinfo;
+#ifdef LOADER_GELI_SUPPORT
 static struct geli_boot_args geliargs;
+#endif
 
 static vm_offset_t	high_heap_base;
 static uint32_t		bios_basemem, bios_extmem, high_heap_size;
 
 static struct bios_smap smap;
 
 /*
  * The minimum amount of memory to reserve in bios_extmem for the heap.
  */
 #define	HEAP_MIN	(3 * 1024 * 1024)
 
 static char *heap_next;
 static char *heap_end;
 
 void exit(int);
 static void load(void);
-static int parse(char *, int *);
+static int parse_cmds(char *, int *);
 static int dskread(void *, daddr_t, unsigned);
 void *malloc(size_t n);
 void free(void *ptr);
 #ifdef LOADER_GELI_SUPPORT
 static int vdev_read(void *vdev __unused, void *priv, off_t off, void *buf,
 	size_t bytes);
 #endif
 
 void *
 malloc(size_t n)
 {
 	char *p = heap_next;
 	if (p + n > heap_end) {
 		printf("malloc failure\n");
 		for (;;)
 		    ;
 		/* NOTREACHED */
 		return (0);
 	}
 	heap_next += n;
 	return (p);
 }
 
 void
 free(void *ptr)
 {
 
 	return;
 }
 
 #include "ufsread.c"
 #include "gpt.c"
 #ifdef LOADER_GELI_SUPPORT
 #include "geliboot.c"
 static char gelipw[GELI_PW_MAXLEN];
+static struct keybuf *gelibuf;
 #endif
 
 static inline int
 xfsread(ufs_ino_t inode, void *buf, size_t nbyte)
 {
 
 	if ((size_t)fsread(inode, buf, nbyte) != nbyte) {
 		printf("Invalid %s\n", "format");
 		return (-1);
 	}
 	return (0);
 }
 
 static void
 bios_getmem(void)
 {
     uint64_t size;
 
     /* Parse system memory map */
     v86.ebx = 0;
     do {
 	v86.ctl = V86_FLAGS;
 	v86.addr = MEM_EXT;		/* int 0x15 function 0xe820*/
 	v86.eax = 0xe820;
 	v86.ecx = sizeof(struct bios_smap);
 	v86.edx = SMAP_SIG;
 	v86.es = VTOPSEG(&smap);
 	v86.edi = VTOPOFF(&smap);
 	v86int();
 	if ((v86.efl & 1) || (v86.eax != SMAP_SIG))
 	    break;
 	/* look for a low-memory segment that's large enough */
 	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) &&
 	    (smap.length >= (512 * 1024)))
 	    bios_basemem = smap.length;
 	/* look for the first segment in 'extended' memory */
 	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0x100000)) {
 	    bios_extmem = smap.length;
 	}
 
 	/*
 	 * Look for the largest segment in 'extended' memory beyond
 	 * 1MB but below 4GB.
 	 */
 	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) &&
 	    (smap.base < 0x100000000ull)) {
 	    size = smap.length;
 
 	    /*
 	     * If this segment crosses the 4GB boundary, truncate it.
 	     */
 	    if (smap.base + size > 0x100000000ull)
 		size = 0x100000000ull - smap.base;
 
 	    if (size > high_heap_size) {
 		high_heap_size = size;
 		high_heap_base = smap.base;
 	    }
 	}
     } while (v86.ebx != 0);
 
     /* Fall back to the old compatibility function for base memory */
     if (bios_basemem == 0) {
 	v86.ctl = 0;
 	v86.addr = 0x12;		/* int 0x12 */
 	v86int();
 
 	bios_basemem = (v86.eax & 0xffff) * 1024;
     }
 
     /* Fall back through several compatibility functions for extended memory */
     if (bios_extmem == 0) {
 	v86.ctl = V86_FLAGS;
 	v86.addr = 0x15;		/* int 0x15 function 0xe801*/
 	v86.eax = 0xe801;
 	v86int();
 	if (!(v86.efl & 1)) {
 	    bios_extmem = ((v86.ecx & 0xffff) + ((v86.edx & 0xffff) * 64)) * 1024;
 	}
     }
     if (bios_extmem == 0) {
 	v86.ctl = 0;
 	v86.addr = 0x15;		/* int 0x15 function 0x88*/
 	v86.eax = 0x8800;
 	v86int();
 	bios_extmem = (v86.eax & 0xffff) * 1024;
     }
 
     /*
      * If we have extended memory and did not find a suitable heap
      * region in the SMAP, use the last 3MB of 'extended' memory as a
      * high heap candidate.
      */
     if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) {
 	high_heap_size = HEAP_MIN;
 	high_heap_base = bios_extmem + 0x100000 - HEAP_MIN;
     }
 }
 
 static int
 gptinit(void)
 {
 
 	if (gptread(&freebsd_ufs_uuid, &dsk, dmadat->secbuf) == -1) {
 		printf("%s: unable to load GPT\n", BOOTPROG);
 		return (-1);
 	}
 	if (gptfind(&freebsd_ufs_uuid, &dsk, dsk.part) == -1) {
 		printf("%s: no UFS partition was found\n", BOOTPROG);
 		return (-1);
 	}
 #ifdef LOADER_GELI_SUPPORT
 	if (geli_taste(vdev_read, &dsk, (gpttable[curent].ent_lba_end -
 	    gpttable[curent].ent_lba_start)) == 0) {
-		if (geli_passphrase(&gelipw, dsk.unit, 'p', curent + 1, &dsk) != 0) {
+		if (geli_havekey(&dsk) != 0 && geli_passphrase(&gelipw,
+		    dsk.unit, 'p', curent + 1, &dsk) != 0) {
 			printf("%s: unable to decrypt GELI key\n", BOOTPROG);
 			return (-1);
 		}
 	}
 #endif
 
 	dsk_meta = 0;
 	return (0);
 }
 
 int
 main(void)
 {
 	char cmd[512], cmdtmp[512];
 	ssize_t sz;
 	int autoboot, dskupdated;
 	ufs_ino_t ino;
 
 	dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base);
 
 	bios_getmem();
 
 	if (high_heap_size > 0) {
 		heap_end = PTOV(high_heap_base + high_heap_size);
 		heap_next = PTOV(high_heap_base);
 	} else {
 		heap_next = (char *)dmadat + sizeof(*dmadat);
 		heap_end = (char *)PTOV(bios_basemem);
 	}
 
 	v86.ctl = V86_FLAGS;
 	v86.efl = PSL_RESERVED_DEFAULT | PSL_I;
 	dsk.drive = *(uint8_t *)PTOV(ARGS);
 	dsk.type = dsk.drive & DRV_HARD ? TYPE_AD : TYPE_FD;
 	dsk.unit = dsk.drive & DRV_MASK;
 	dsk.part = -1;
 	dsk.start = 0;
 	bootinfo.bi_version = BOOTINFO_VERSION;
 	bootinfo.bi_size = sizeof(bootinfo);
 	bootinfo.bi_basemem = bios_basemem / 1024;
 	bootinfo.bi_extmem = bios_extmem / 1024;
 	bootinfo.bi_memsizes_valid++;
 	bootinfo.bi_bios_dev = dsk.drive;
 
 #ifdef LOADER_GELI_SUPPORT
 	geli_init();
 #endif
 	/* Process configuration file */
 
 	if (gptinit() != 0)
 		return (-1);
 
 	autoboot = 1;
 	*cmd = '\0';
 
 	for (;;) {
 		*kname = '\0';
 		if ((ino = lookup(PATH_CONFIG)) ||
 		    (ino = lookup(PATH_DOTCONFIG))) {
 			sz = fsread(ino, cmd, sizeof(cmd) - 1);
 			cmd[(sz < 0) ? 0 : sz] = '\0';
 		}
 		if (*cmd != '\0') {
 			memcpy(cmdtmp, cmd, sizeof(cmdtmp));
-			if (parse(cmdtmp, &dskupdated))
+			if (parse_cmds(cmdtmp, &dskupdated))
 				break;
 			if (dskupdated && gptinit() != 0)
 				break;
 			if (!OPT_CHECK(RBX_QUIET))
 				printf("%s: %s", PATH_CONFIG, cmd);
 			*cmd = '\0';
 		}
 
 		if (autoboot && keyhit(3)) {
 			if (*kname == '\0')
 				memcpy(kname, PATH_LOADER, sizeof(PATH_LOADER));
 			break;
 		}
 		autoboot = 0;
 
 		/*
 		 * Try to exec stage 3 boot loader. If interrupted by a
 		 * keypress, or in case of failure, try to load a kernel
 		 * directly instead.
 		 */
 		if (*kname != '\0')
 			load();
 		memcpy(kname, PATH_LOADER, sizeof(PATH_LOADER));
 		load();
 		memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL));
 		load();
 		gptbootfailed(&dsk);
 		if (gptfind(&freebsd_ufs_uuid, &dsk, -1) == -1)
 			break;
 		dsk_meta = 0;
 	}
 
 	/* Present the user with the boot2 prompt. */
 
 	for (;;) {
 		if (!OPT_CHECK(RBX_QUIET)) {
 			printf("\nFreeBSD/x86 boot\n"
 			    "Default: %u:%s(%up%u)%s\n"
 			    "boot: ",
 			    dsk.drive & DRV_MASK, dev_nm[dsk.type], dsk.unit,
 			    dsk.part, kname);
 		}
 		if (ioctrl & IO_SERIAL)
 			sio_flush();
 		*cmd = '\0';
 		if (keyhit(0))
 			getstr(cmd, sizeof(cmd));
 		else if (!OPT_CHECK(RBX_QUIET))
 			putchar('\n');
-		if (parse(cmd, &dskupdated)) {
+		if (parse_cmds(cmd, &dskupdated)) {
 			putchar('\a');
 			continue;
 		}
 		if (dskupdated && gptinit() != 0)
 			continue;
 		load();
 	}
 	/* NOTREACHED */
 }
 
 /* XXX - Needed for btxld to link the boot2 binary; do not remove. */
 void
 exit(int x)
 {
 }
 
 static void
 load(void)
 {
     union {
 	struct exec ex;
 	Elf32_Ehdr eh;
     } hdr;
     static Elf32_Phdr ep[2];
     static Elf32_Shdr es[2];
     caddr_t p;
     ufs_ino_t ino;
     uint32_t addr, x;
     int fmt, i, j;
 
     if (!(ino = lookup(kname))) {
 	if (!ls) {
 	    printf("%s: No %s on %u:%s(%up%u)\n", BOOTPROG,
 		kname, dsk.drive & DRV_MASK, dev_nm[dsk.type], dsk.unit,
 		dsk.part);
 	}
 	return;
     }
     if (xfsread(ino, &hdr, sizeof(hdr)))
 	return;
     if (N_GETMAGIC(hdr.ex) == ZMAGIC)
 	fmt = 0;
     else if (IS_ELF(hdr.eh))
 	fmt = 1;
     else {
 	printf("Invalid %s\n", "format");
 	return;
     }
     if (fmt == 0) {
 	addr = hdr.ex.a_entry & 0xffffff;
 	p = PTOV(addr);
 	fs_off = PAGE_SIZE;
 	if (xfsread(ino, p, hdr.ex.a_text))
 	    return;
 	p += roundup2(hdr.ex.a_text, PAGE_SIZE);
 	if (xfsread(ino, p, hdr.ex.a_data))
 	    return;
 	p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
 	bootinfo.bi_symtab = VTOP(p);
 	memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms));
 	p += sizeof(hdr.ex.a_syms);
 	if (hdr.ex.a_syms) {
 	    if (xfsread(ino, p, hdr.ex.a_syms))
 		return;
 	    p += hdr.ex.a_syms;
 	    if (xfsread(ino, p, sizeof(int)))
 		return;
 	    x = *(uint32_t *)p;
 	    p += sizeof(int);
 	    x -= sizeof(int);
 	    if (xfsread(ino, p, x))
 		return;
 	    p += x;
 	}
     } else {
 	fs_off = hdr.eh.e_phoff;
 	for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) {
 	    if (xfsread(ino, ep + j, sizeof(ep[0])))
 		return;
 	    if (ep[j].p_type == PT_LOAD)
 		j++;
 	}
 	for (i = 0; i < 2; i++) {
 	    p = PTOV(ep[i].p_paddr & 0xffffff);
 	    fs_off = ep[i].p_offset;
 	    if (xfsread(ino, p, ep[i].p_filesz))
 		return;
 	}
 	p += roundup2(ep[1].p_memsz, PAGE_SIZE);
 	bootinfo.bi_symtab = VTOP(p);
 	if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
 	    fs_off = hdr.eh.e_shoff + sizeof(es[0]) *
 		(hdr.eh.e_shstrndx + 1);
 	    if (xfsread(ino, &es, sizeof(es)))
 		return;
 	    for (i = 0; i < 2; i++) {
 		memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size));
 		p += sizeof(es[i].sh_size);
 		fs_off = es[i].sh_offset;
 		if (xfsread(ino, p, es[i].sh_size))
 		    return;
 		p += es[i].sh_size;
 	    }
 	}
 	addr = hdr.eh.e_entry & 0xffffff;
     }
     bootinfo.bi_esymtab = VTOP(p);
     bootinfo.bi_kernelname = VTOP(kname);
     bootinfo.bi_bios_dev = dsk.drive;
-    geliargs.size = sizeof(geliargs);
 #ifdef LOADER_GELI_SUPPORT
-    bcopy(gelipw, geliargs.gelipw, sizeof(geliargs.gelipw));
-    bzero(gelipw, sizeof(gelipw));
-#else
-	geliargs.gelipw[0] = '\0';
+    geliargs.size = sizeof(geliargs);
+    explicit_bzero(gelipw, sizeof(gelipw));
+    gelibuf = malloc(sizeof(struct keybuf) + (GELI_MAX_KEYS * sizeof(struct keybuf_ent)));
+    geli_fill_keybuf(gelibuf);
+    geliargs.notapw = '\0';
+    geliargs.keybuf_sentinel = KEYBUF_SENTINEL;
+    geliargs.keybuf = gelibuf;
 #endif
     __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
 	   MAKEBOOTDEV(dev_maj[dsk.type], dsk.part + 1, dsk.unit, 0xff),
-	   KARGS_FLAGS_EXTARG, 0, 0, VTOP(&bootinfo), geliargs);
+	   KARGS_FLAGS_EXTARG, 0, 0, VTOP(&bootinfo)
+#ifdef LOADER_GELI_SUPPORT
+	   , geliargs
+#endif
+	   );
 }
 
 static int
-parse(char *cmdstr, int *dskupdated)
+parse_cmds(char *cmdstr, int *dskupdated)
 {
     char *arg = cmdstr;
     char *ep, *p, *q;
     const char *cp;
     unsigned int drv;
     int c, i, j;
 
     *dskupdated = 0;
     while ((c = *arg++)) {
 	if (c == ' ' || c == '\t' || c == '\n')
 	    continue;
 	for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++);
 	ep = p;
 	if (*p)
 	    *p++ = 0;
 	if (c == '-') {
 	    while ((c = *arg++)) {
 		if (c == 'P') {
 		    if (*(uint8_t *)PTOV(0x496) & 0x10) {
 			cp = "yes";
 		    } else {
 			opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL);
 			cp = "no";
 		    }
 		    printf("Keyboard: %s\n", cp);
 		    continue;
 		} else if (c == 'S') {
 		    j = 0;
 		    while ((unsigned int)(i = *arg++ - '0') <= 9)
 			j = j * 10 + i;
 		    if (j > 0 && i == -'0') {
 			comspeed = j;
 			break;
 		    }
 		    /* Fall through to error below ('S' not in optstr[]). */
 		}
 		for (i = 0; c != optstr[i]; i++)
 		    if (i == NOPT - 1)
 			return -1;
 		opts ^= OPT_SET(flags[i]);
 	    }
 	    ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) :
 		     OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD;
 	    if (ioctrl & IO_SERIAL) {
 	        if (sio_init(115200 / comspeed) != 0)
 		    ioctrl &= ~IO_SERIAL;
 	    }
 	} else {
 	    for (q = arg--; *q && *q != '('; q++);
 	    if (*q) {
 		drv = -1;
 		if (arg[1] == ':') {
 		    drv = *arg - '0';
 		    if (drv > 9)
 			return (-1);
 		    arg += 2;
 		}
 		if (q - arg != 2)
 		    return -1;
 		for (i = 0; arg[0] != dev_nm[i][0] ||
 			    arg[1] != dev_nm[i][1]; i++)
 		    if (i == NDEV - 1)
 			return -1;
 		dsk.type = i;
 		arg += 3;
 		dsk.unit = *arg - '0';
 		if (arg[1] != 'p' || dsk.unit > 9)
 		    return -1;
 		arg += 2;
 		dsk.part = *arg - '0';
 		if (dsk.part < 1 || dsk.part > 9)
 		    return -1;
 		arg++;
 		if (arg[0] != ')')
 		    return -1;
 		arg++;
 		if (drv == -1)
 		    drv = dsk.unit;
 		dsk.drive = (dsk.type <= TYPE_MAXHARD
 			     ? DRV_HARD : 0) + drv;
 		*dskupdated = 1;
 	    }
 	    if ((i = ep - arg)) {
 		if ((size_t)i >= sizeof(kname))
 		    return -1;
 		memcpy(kname, arg, i + 1);
 	    }
 	}
 	arg = p;
     }
     return 0;
 }
 
 static int
 dskread(void *buf, daddr_t lba, unsigned nblk)
 {
 	int err;
 
 	err = drvread(&dsk, buf, lba + dsk.start, nblk);
 
 #ifdef LOADER_GELI_SUPPORT
 	if (err == 0 && is_geli(&dsk) == 0) {
 		/* Decrypt */
 		if (geli_read(&dsk, lba * DEV_BSIZE, buf, nblk * DEV_BSIZE))
 			return (err);
 	}
 #endif
 
 	return (err);
 }
 
 #ifdef LOADER_GELI_SUPPORT
 /*
  * Read function compartible with the ZFS callback, required to keep the GELI
  * Implementation the same for both UFS and ZFS
  */
 static int
 vdev_read(void *vdev __unused, void *priv, off_t off, void *buf, size_t bytes)
 {
 	char *p;
 	daddr_t lba;
 	unsigned int nb;
 	struct dsk *dskp = (struct dsk *) priv;
 
 	if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
 		return (-1);
 
 	p = buf;
 	lba = off / DEV_BSIZE;
 	lba += dskp->start;
 
 	while (bytes > 0) {
 		nb = bytes / DEV_BSIZE;
 		if (nb > VBLKSIZE / DEV_BSIZE)
 			nb = VBLKSIZE / DEV_BSIZE;
 		if (drvread(dskp, dmadat->blkbuf, lba, nb))
 			return (-1);
 		memcpy(p, dmadat->blkbuf, nb * DEV_BSIZE);
 		p += nb * DEV_BSIZE;
 		lba += nb;
 		bytes -= nb * DEV_BSIZE;
 	}
 
 	return (0);
 }
 #endif /* LOADER_GELI_SUPPORT */
Index: stable/11/sys/boot/i386/gptzfsboot/Makefile
===================================================================
--- stable/11/sys/boot/i386/gptzfsboot/Makefile	(revision 329098)
+++ stable/11/sys/boot/i386/gptzfsboot/Makefile	(revision 329099)
@@ -1,102 +1,104 @@
 # $FreeBSD$
 
 .include <bsd.own.mk>
 
 .PATH:		${.CURDIR}/../boot2 ${.CURDIR}/../gptboot \
 		${.CURDIR}/../zfsboot ${.CURDIR}/../common \
 		${.CURDIR}/../../common ${.CURDIR}/../../../crypto/skein
 
 FILES=		gptzfsboot
 MAN=		gptzfsboot.8
 
 NM?=		nm
 
 BOOT_COMCONSOLE_PORT?= 0x3f8
 BOOT_COMCONSOLE_SPEED?= 9600
 B2SIOFMT?=	0x3
 
 REL1=	0x700
 ORG1=	0x7c00
 ORG2=	0x0
 
 CFLAGS=	-DBOOTPROG=\"gptzfsboot\" \
 	-O1 \
 	-DGPT -DZFS -DBOOT2 \
 	-DSIOPRT=${BOOT_COMCONSOLE_PORT} \
 	-DSIOFMT=${B2SIOFMT} \
 	-DSIOSPD=${BOOT_COMCONSOLE_SPEED} \
 	-I${.CURDIR}/../../common \
 	-I${.CURDIR}/../common \
 	-I${.CURDIR}/../../zfs \
 	-I${.CURDIR}/../../../cddl/boot/zfs \
 	-I${.CURDIR}/../../../crypto/skein \
 	-I${.CURDIR}/../btx/lib -I. \
 	-I${.CURDIR}/../boot2 \
 	-I${.CURDIR}/../../.. \
-	-Wall -Waggregate-return -Wbad-function-cast -Wno-cast-align \
+	-Wall -Waggregate-return -Wbad-function-cast \
 	-Wmissing-declarations -Wmissing-prototypes -Wnested-externs \
 	-Wpointer-arith -Wshadow -Wstrict-prototypes -Wwrite-strings \
 	-Winline -Wno-pointer-sign
+
+NO_WCAST_ALIGN=
 
 .if ${COMPILER_TYPE} == "clang" || \
     (${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} > 40201)
 CFLAGS+=	-Wno-tentative-definition-incomplete-type
 .endif
 
 # Do not unroll skein loops, reduce code size
 CFLAGS+=	-DSKEIN_LOOP=111
 
 .if !defined(LOADER_NO_GELI_SUPPORT)
 CFLAGS+=	-DLOADER_GELI_SUPPORT
 CFLAGS+=	-I${.CURDIR}/../../geli
 LIBGELIBOOT=	${.OBJDIR}/../../geli/libgeliboot.a
 .PATH:		${.CURDIR}/../../../opencrypto
 OPENCRYPTO_XTS=	xform_aes_xts.o
 .endif
 
 CFLAGS.gcc+=	--param max-inline-insns-single=100
 
 LD_FLAGS=${LD_FLAGS_BIN}
 
 LIBSTAND=	${.OBJDIR}/../../libstand32/libstand.a
 
 # Pick up ../Makefile.inc early.
 .include <bsd.init.mk>
 
 CLEANFILES=	gptzfsboot
 
 gptzfsboot: gptldr.bin gptzfsboot.bin ${BTXKERN}
 	btxld -v -E ${ORG2} -f bin -b ${BTXKERN} -l gptldr.bin \
 	    -o ${.TARGET} gptzfsboot.bin
 
 CLEANFILES+=	gptldr.bin gptldr.out gptldr.o
 
 gptldr.bin: gptldr.out
 	${OBJCOPY} -S -O binary gptldr.out ${.TARGET}
 
 gptldr.out: gptldr.o
 	${LD} ${LD_FLAGS} -e start -Ttext ${ORG1} -o ${.TARGET} gptldr.o
 
 CLEANFILES+=	gptzfsboot.bin gptzfsboot.out zfsboot.o sio.o cons.o \
 		drv.o gpt.o util.o skein.o skein_block.o ${OPENCRYPTO_XTS}
 
 gptzfsboot.bin: gptzfsboot.out
 	${OBJCOPY} -S -O binary gptzfsboot.out ${.TARGET}
 
 gptzfsboot.out: ${BTXCRT} zfsboot.o sio.o gpt.o drv.o cons.o util.o \
 	skein.o skein_block.o ${OPENCRYPTO_XTS}
 	${LD} ${LD_FLAGS} -Ttext ${ORG2} -o ${.TARGET} ${.ALLSRC} ${LIBGELIBOOT} ${LIBSTAND}
 
 zfsboot.o: ${.CURDIR}/../../zfs/zfsimpl.c
 
 .if ${MACHINE_CPUARCH} == "amd64"
 beforedepend zfsboot.o: machine
 CLEANFILES+=	machine
 machine: .NOMETA
 	ln -sf ${.CURDIR}/../../../i386/include machine
 .endif
 
 .include <bsd.prog.mk>
 
 # XXX: clang integrated-as doesn't grok .codeNN directives yet
 CFLAGS.gptldr.S=	${CLANG_NO_IAS}
Index: stable/11/sys/boot/i386/libi386/bioscd.c
===================================================================
--- stable/11/sys/boot/i386/libi386/bioscd.c	(revision 329098)
+++ stable/11/sys/boot/i386/libi386/bioscd.c	(revision 329099)
@@ -1,454 +1,452 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * Copyright (c) 2001 John H. Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * BIOS CD device handling for CD's that have been booted off of via no
  * emulation booting as defined in the El Torito standard.
  * 
  * Ideas and algorithms from:
  *
  * - FreeBSD libi386/biosdisk.c
  *
  */
 
 #include <stand.h>
 
 #include <sys/param.h>
 #include <machine/bootinfo.h>
 
 #include <stdarg.h>
 
 #include <bootstrap.h>
 #include <btxv86.h>
 #include <edd.h>
 #include "libi386.h"
 
 #define BIOSCD_SECSIZE		2048
 #define BUFSIZE			(1 * BIOSCD_SECSIZE)
 #define	MAXBCDEV		1
 
 /* Major numbers for devices we frontend for. */
 #define ACDMAJOR		117
 #define	CDMAJOR			15
 
 #ifdef DISK_DEBUG
 # define DEBUG(fmt, args...)	printf("%s: " fmt "\n" , __func__ , ## args)
 #else
 # define DEBUG(fmt, args...)
 #endif
 
 struct specification_packet {
 	u_char	sp_size;
 	u_char	sp_bootmedia;
 	u_char	sp_drive;
 	u_char	sp_controller;
 	u_int	sp_lba;
 	u_short	sp_devicespec;
 	u_short	sp_buffersegment;
 	u_short	sp_loadsegment;
 	u_short	sp_sectorcount;
 	u_short	sp_cylsec;
 	u_char	sp_head;
 };
 
 /*
  * List of BIOS devices, translation from disk unit number to
  * BIOS unit number.
  */
 static struct bcinfo {
 	int	bc_unit;		/* BIOS unit number */
 	struct specification_packet bc_sp;
 	int	bc_open;		/* reference counter */
 	void	*bc_bcache;		/* buffer cache data */
 } bcinfo [MAXBCDEV];
 static int nbcinfo = 0;
 
 #define	BC(dev)	(bcinfo[(dev)->d_unit])
 
 static int	bc_read(int unit, daddr_t dblk, int blks, caddr_t dest);
 static int	bc_init(void);
 static int	bc_strategy(void *devdata, int flag, daddr_t dblk,
     size_t size, char *buf, size_t *rsize);
 static int	bc_realstrategy(void *devdata, int flag, daddr_t dblk,
     size_t size, char *buf, size_t *rsize);
 static int	bc_open(struct open_file *f, ...);
 static int	bc_close(struct open_file *f);
 static int	bc_print(int verbose);
 
 struct devsw bioscd = {
 	"cd", 
 	DEVT_CD, 
 	bc_init,
 	bc_strategy, 
 	bc_open, 
 	bc_close, 
 	noioctl,
 	bc_print,
 	NULL
 };
 
 /*
  * Translate between BIOS device numbers and our private unit numbers.
  */
 int
 bc_bios2unit(int biosdev)
 {
 	int i;
     
 	DEBUG("looking for bios device 0x%x", biosdev);
 	for (i = 0; i < nbcinfo; i++) {
 		DEBUG("bc unit %d is BIOS device 0x%x", i, bcinfo[i].bc_unit);
 		if (bcinfo[i].bc_unit == biosdev)
 			return(i);
 	}
 	return(-1);
 }
 
 int
 bc_unit2bios(int unit)
 {
 	if ((unit >= 0) && (unit < nbcinfo))
 		return(bcinfo[unit].bc_unit);
 	return(-1);
 }
 
 /*    
  * We can't quiz, we have to be told what device to use, so this functoin
  * doesn't do anything.  Instead, the loader calls bc_add() with the BIOS
  * device number to add.
  */
 static int
 bc_init(void) 
 {
 
 	return (0);
 }
 
 int
 bc_add(int biosdev)
 {
 
 	if (nbcinfo >= MAXBCDEV)
 		return (-1);
 	bcinfo[nbcinfo].bc_unit = biosdev;
 	v86.ctl = V86_FLAGS;
 	v86.addr = 0x13;
 	v86.eax = 0x4b01;
 	v86.edx = biosdev;
 	v86.ds = VTOPSEG(&bcinfo[nbcinfo].bc_sp);
 	v86.esi = VTOPOFF(&bcinfo[nbcinfo].bc_sp);
 	v86int();
 	if ((v86.eax & 0xff00) != 0)
 		return (-1);
 
 	printf("BIOS CD is cd%d\n", nbcinfo);
 	nbcinfo++;
 	bcache_add_dev(nbcinfo);	/* register cd device in bcache */
 	return(0);
 }
 
 /*
  * Print information about disks
  */
 static int
 bc_print(int verbose)
 {
 	char line[80];
 	int i, ret = 0;
 
 	if (nbcinfo == 0)
 		return (0);
 
 	printf("%s devices:", bioscd.dv_name);
 	if ((ret = pager_output("\n")) != 0)
 		return (ret);
 
 	for (i = 0; i < nbcinfo; i++) {
 		snprintf(line, sizeof(line), "    cd%d: Device 0x%x\n", i,
 		    bcinfo[i].bc_sp.sp_devicespec);
 		if ((ret = pager_output(line)) != 0)
 			break;
 	}
 	return (ret);
 }
 
 /*
  * Attempt to open the disk described by (dev) for use by (f).
  */
 static int 
 bc_open(struct open_file *f, ...)
 {
 	va_list ap;
 	struct i386_devdesc *dev;
 
 	va_start(ap, f);
 	dev = va_arg(ap, struct i386_devdesc *);
 	va_end(ap);
 	if (dev->d_unit >= nbcinfo) {
 		DEBUG("attempt to open nonexistent disk");
 		return(ENXIO);
 	}
 
 	BC(dev).bc_open++;
 	if (BC(dev).bc_bcache == NULL)
 		BC(dev).bc_bcache = bcache_allocate();
 	return(0);
 }
  
 static int 
 bc_close(struct open_file *f)
 {
 	struct i386_devdesc *dev;
 
 	dev = (struct i386_devdesc *)f->f_devdata;
 	BC(dev).bc_open--;
 	if (BC(dev).bc_open == 0) {
 		bcache_free(BC(dev).bc_bcache);
 		BC(dev).bc_bcache = NULL;
 	}
 	return(0);
 }
 
 static int
 bc_strategy(void *devdata, int rw, daddr_t dblk, size_t size,
     char *buf, size_t *rsize)
 {
 	struct bcache_devdata bcd;
 	struct i386_devdesc *dev;
 
 	dev = (struct i386_devdesc *)devdata;
 	bcd.dv_strategy = bc_realstrategy;
 	bcd.dv_devdata = devdata;
 	bcd.dv_cache = BC(dev).bc_bcache;
 
 	return (bcache_strategy(&bcd, rw, dblk, size, buf, rsize));
 }
 
 static int 
 bc_realstrategy(void *devdata, int rw, daddr_t dblk, size_t size,
     char *buf, size_t *rsize)
 {
 	struct i386_devdesc *dev;
 	int unit;
 	int blks;
 #ifdef BD_SUPPORT_FRAGS
 	char fragbuf[BIOSCD_SECSIZE];
 	size_t fragsize;
 
 	fragsize = size % BIOSCD_SECSIZE;
 #else
 	if (size % BIOSCD_SECSIZE)
 		return (EINVAL);
 #endif
 
 	if (rw != F_READ)
 		return(EROFS);
 	dev = (struct i386_devdesc *)devdata;
 	unit = dev->d_unit;
 	blks = size / BIOSCD_SECSIZE;
 	if (dblk % (BIOSCD_SECSIZE / DEV_BSIZE) != 0)
 		return (EINVAL);
 	dblk /= (BIOSCD_SECSIZE / DEV_BSIZE);
 	DEBUG("read %d from %lld to %p", blks, dblk, buf);
 
 	if (rsize)
 		*rsize = 0;
 	if ((blks = bc_read(unit, dblk, blks, buf)) < 0) {
 		DEBUG("read error");
 		return (EIO);
 	} else {
 		if (size / BIOSCD_SECSIZE > blks) {
 			if (rsize)
 				*rsize = blks * BIOSCD_SECSIZE;
 			return (0);
 		}
 	}
 #ifdef BD_SUPPORT_FRAGS
 	DEBUG("frag read %d from %lld+%d to %p", 
 	    fragsize, dblk, blks, buf + (blks * BIOSCD_SECSIZE));
 	if (fragsize && bc_read(unit, dblk + blks, 1, fragbuf) != 1) {
 		if (blks) {
 			if (rsize)
 				*rsize = blks * BIOSCD_SECSIZE;
 			return (0);
 		}
 		DEBUG("frag read error");
 		return(EIO);
 	}
 	bcopy(fragbuf, buf + (blks * BIOSCD_SECSIZE), fragsize);
 #endif	
 	if (rsize)
 		*rsize = size;
 	return (0);
 }
 
-/* Max number of sectors to bounce-buffer at a time. */
-#define	CD_BOUNCEBUF	8
-
 /* return negative value for an error, otherwise blocks read */
 static int
 bc_read(int unit, daddr_t dblk, int blks, caddr_t dest)
 {
 	u_int maxfer, resid, result, retry, x;
 	caddr_t bbuf, p, xp;
 	static struct edd_packet packet;
 	int biosdev;
 #ifdef DISK_DEBUG
 	int error;
 #endif
     
 	/* Just in case some idiot actually tries to read -1 blocks... */
 	if (blks < 0)
 		return (-1);
 
 	/* If nothing to do, just return succcess. */
 	if (blks == 0)
 		return (0);
 
 	/* Decide whether we have to bounce */
 	if (VTOP(dest) >> 20 != 0) {
 		/* 
 		 * The destination buffer is above first 1MB of
 		 * physical memory so we have to arrange a suitable
 		 * bounce buffer.
 		 */
-		x = min(CD_BOUNCEBUF, (unsigned)blks);
-		bbuf = alloca(x * BIOSCD_SECSIZE);
+		x = V86_IO_BUFFER_SIZE / BIOSCD_SECSIZE;
+		x = min(x, (unsigned)blks);
+		bbuf = PTOV(V86_IO_BUFFER);
 		maxfer = x;
 	} else {
 		bbuf = NULL;
 		maxfer = 0;
 	}
 	
 	biosdev = bc_unit2bios(unit);
 	resid = blks;
 	p = dest;
 
 	while (resid > 0) {
 		if (bbuf)
 			xp = bbuf;
 		else
 			xp = p;
 		x = resid;
 		if (maxfer > 0)
 			x = min(x, maxfer);
 
 		/*
 		 * Loop retrying the operation a couple of times.  The BIOS
 		 * may also retry.
 		 */
 		for (retry = 0; retry < 3; retry++) {
 			/* If retrying, reset the drive */
 			if (retry > 0) {
 				v86.ctl = V86_FLAGS;
 				v86.addr = 0x13;
 				v86.eax = 0;
 				v86.edx = biosdev;
 				v86int();
 			}
 
 			packet.len = sizeof(struct edd_packet);
 			packet.count = x;
 			packet.off = VTOPOFF(xp);
 			packet.seg = VTOPSEG(xp);
 			packet.lba = dblk;
 			v86.ctl = V86_FLAGS;
 			v86.addr = 0x13;
 			v86.eax = 0x4200;
 			v86.edx = biosdev;
 			v86.ds = VTOPSEG(&packet);
 			v86.esi = VTOPOFF(&packet);
 			v86int();
 			result = V86_CY(v86.efl);
 			if (result == 0)
 				break;
 			/* fall back to 1 sector read */
 			x = 1;
 		}
 	
 #ifdef DISK_DEBUG
 		error = (v86.eax >> 8) & 0xff;
 #endif
 		DEBUG("%d sectors from %lld to %p (0x%x) %s", x, dblk, p,
 		    VTOP(p), result ? "failed" : "ok");
 		DEBUG("unit %d  status 0x%x", unit, error);
 
 		/* still an error? break off */
 		if (result != 0)
 			break;
 
 		if (bbuf != NULL)
 			bcopy(bbuf, p, x * BIOSCD_SECSIZE);
 		p += (x * BIOSCD_SECSIZE);
 		dblk += x;
 		resid -= x;
 	}
 	
 /*	hexdump(dest, (blks * BIOSCD_SECSIZE)); */
 
 	if (blks - resid == 0)
 		return (-1);		/* read failed */
 
 	return (blks - resid);
 }
 
 /*
  * Return a suitable dev_t value for (dev).
  */
 int
 bc_getdev(struct i386_devdesc *dev)
 {
     int biosdev, unit;
     int major;
     int rootdev;
 
     unit = dev->d_unit;
     biosdev = bc_unit2bios(unit);
     DEBUG("unit %d BIOS device %d", unit, biosdev);
     if (biosdev == -1)				/* not a BIOS device */
 	return(-1);
 
     /*
      * XXX: Need to examine device spec here to figure out if SCSI or
      * ATAPI.  No idea on how to figure out device number.  All we can
      * really pass to the kernel is what bus and device on which bus we
      * were booted from, which dev_t isn't well suited to since those
      * number don't match to unit numbers very well.  We may just need
      * to engage in a hack where we pass -C to the boot args if we are
      * the boot device.
      */
     major = ACDMAJOR;
     unit = 0;	/* XXX */
 
     /* XXX: Assume partition 'a'. */
     rootdev = MAKEBOOTDEV(major, 0, unit, 0);
     DEBUG("dev is 0x%x\n", rootdev);
     return(rootdev);
 }
Index: stable/11/sys/boot/i386/libi386/biosdisk.c
===================================================================
--- stable/11/sys/boot/i386/libi386/biosdisk.c	(revision 329098)
+++ stable/11/sys/boot/i386/libi386/biosdisk.c	(revision 329099)
@@ -1,946 +1,1013 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * BIOS disk device handling.
  * 
  * Ideas and algorithms from:
  *
  * - NetBSD libi386/biosdisk.c
  * - FreeBSD biosboot/disk.c
  *
  */
 
 #include <sys/disk.h>
+#include <sys/limits.h>
 #include <stand.h>
 #include <machine/bootinfo.h>
 #include <stdarg.h>
 
 #include <bootstrap.h>
 #include <btxv86.h>
 #include <edd.h>
 #include "disk.h"
 #include "libi386.h"
 
 #ifdef LOADER_GELI_SUPPORT
 #include "cons.h"
 #include "drv.h"
 #include "gpt.h"
 #include "part.h"
 #include <uuid.h>
 struct pentry {
 	struct ptable_entry	part;
 	uint64_t		flags;
 	union {
 		uint8_t bsd;
 		uint8_t	mbr;
 		uuid_t	gpt;
 		uint16_t vtoc8;
 	} type;
 	STAILQ_ENTRY(pentry)	entry;
 };
 struct ptable {
 	enum ptable_type	type;
 	uint16_t		sectorsize;
 	uint64_t		sectors;
 
 	STAILQ_HEAD(, pentry)	entries;
 };
 
 #include "geliboot.c"
 #endif /* LOADER_GELI_SUPPORT */
 
 CTASSERT(sizeof(struct i386_devdesc) >= sizeof(struct disk_devdesc));
 
 #define BIOS_NUMDRIVES		0x475
 #define BIOSDISK_SECSIZE	512
 #define BUFSIZE			(1 * BIOSDISK_SECSIZE)
 
 #define DT_ATAPI		0x10		/* disk type for ATAPI floppies */
 #define WDMAJOR			0		/* major numbers for devices we frontend for */
 #define WFDMAJOR		1
 #define FDMAJOR			2
 #define DAMAJOR			4
 
 #ifdef DISK_DEBUG
 # define DEBUG(fmt, args...)	printf("%s: " fmt "\n" , __func__ , ## args)
 #else
 # define DEBUG(fmt, args...)
 #endif
 
 /*
  * List of BIOS devices, translation from disk unit number to
  * BIOS unit number.
  */
 static struct bdinfo
 {
 	int		bd_unit;	/* BIOS unit number */
 	int		bd_cyl;		/* BIOS geometry */
 	int		bd_hds;
 	int		bd_sec;
 	int		bd_flags;
 #define	BD_MODEINT13	0x0000
 #define	BD_MODEEDD1	0x0001
 #define	BD_MODEEDD3	0x0002
 #define	BD_MODEMASK	0x0003
 #define	BD_FLOPPY	0x0004
 	int		bd_type;	/* BIOS 'drive type' (floppy only) */
 	uint16_t	bd_sectorsize;	/* Sector size */
 	uint64_t	bd_sectors;	/* Disk size */
 	int		bd_open;	/* reference counter */
 	void		*bd_bcache;	/* buffer cache data */
 } bdinfo [MAXBDDEV];
 static int nbdinfo = 0;
 
 #define	BD(dev)		(bdinfo[(dev)->d_unit])
 
 static int bd_read(struct disk_devdesc *dev, daddr_t dblk, int blks,
     caddr_t dest);
 static int bd_write(struct disk_devdesc *dev, daddr_t dblk, int blks,
     caddr_t dest);
 static int bd_int13probe(struct bdinfo *bd);
 
 static int bd_init(void);
 static int bd_strategy(void *devdata, int flag, daddr_t dblk, size_t size,
     char *buf, size_t *rsize);
 static int bd_realstrategy(void *devdata, int flag, daddr_t dblk, size_t size,
     char *buf, size_t *rsize);
 static int bd_open(struct open_file *f, ...);
 static int bd_close(struct open_file *f);
 static int bd_ioctl(struct open_file *f, u_long cmd, void *data);
 static int bd_print(int verbose);
-static void bd_cleanup(void);
 
 #ifdef LOADER_GELI_SUPPORT
 static enum isgeli {
 	ISGELI_UNKNOWN,
 	ISGELI_NO,
 	ISGELI_YES
 };
 static enum isgeli geli_status[MAXBDDEV][MAXTBLENTS];
 
 int bios_read(void *vdev __unused, struct dsk *priv, off_t off, char *buf,
     size_t bytes);
 #endif /* LOADER_GELI_SUPPORT */
 
 struct devsw biosdisk = {
 	"disk",
 	DEVT_DISK,
 	bd_init,
 	bd_strategy,
 	bd_open,
 	bd_close,
 	bd_ioctl,
 	bd_print,
-	bd_cleanup
+	NULL
 };
 
 /*
  * Translate between BIOS device numbers and our private unit numbers.
  */
 int
 bd_bios2unit(int biosdev)
 {
 	int i;
 
 	DEBUG("looking for bios device 0x%x", biosdev);
 	for (i = 0; i < nbdinfo; i++) {
 		DEBUG("bd unit %d is BIOS device 0x%x", i, bdinfo[i].bd_unit);
 		if (bdinfo[i].bd_unit == biosdev)
 			return (i);
 	}
 	return (-1);
 }
 
 int
 bd_unit2bios(int unit)
 {
 
 	if ((unit >= 0) && (unit < nbdinfo))
 		return (bdinfo[unit].bd_unit);
 	return (-1);
 }
 
 /*
  * Quiz the BIOS for disk devices, save a little info about them.
  */
 static int
 bd_init(void)
 {
 	int base, unit, nfd = 0;
 
 #ifdef LOADER_GELI_SUPPORT
 	geli_init();
 #endif
 	/* sequence 0, 0x80 */
 	for (base = 0; base <= 0x80; base += 0x80) {
 		for (unit = base; (nbdinfo < MAXBDDEV); unit++) {
 #ifndef VIRTUALBOX
 			/*
 			 * Check the BIOS equipment list for number
 			 * of fixed disks.
 			 */
 			if(base == 0x80 &&
 			    (nfd >= *(unsigned char *)PTOV(BIOS_NUMDRIVES)))
 				break;
 #endif
 			bdinfo[nbdinfo].bd_open = 0;
 			bdinfo[nbdinfo].bd_bcache = NULL;
 			bdinfo[nbdinfo].bd_unit = unit;
 			bdinfo[nbdinfo].bd_flags = unit < 0x80 ? BD_FLOPPY: 0;
 			if (!bd_int13probe(&bdinfo[nbdinfo]))
 				break;
 
 			/* XXX we need "disk aliases" to make this simpler */
 			printf("BIOS drive %c: is disk%d\n", (unit < 0x80) ?
 			    ('A' + unit): ('C' + unit - 0x80), nbdinfo);
 			nbdinfo++;
 			if (base == 0x80)
 				nfd++;
 		}
 	}
 	bcache_add_dev(nbdinfo);
 	return(0);
 }
 
-static void
-bd_cleanup(void)
-{
-
-	disk_cleanup(&biosdisk);
-}
-
 /*
  * Try to detect a device supported by the legacy int13 BIOS
  */
 static int
 bd_int13probe(struct bdinfo *bd)
 {
 	struct edd_params params;
 	int ret = 1;	/* assume success */
 
 	v86.ctl = V86_FLAGS;
 	v86.addr = 0x13;
 	v86.eax = 0x800;
 	v86.edx = bd->bd_unit;
 	v86int();
 
 	/* Don't error out if we get bad sector number, try EDD as well */
 	if (V86_CY(v86.efl) ||	/* carry set */
 	    (v86.edx & 0xff) <= (unsigned)(bd->bd_unit & 0x7f))	/* unit # bad */
 		return (0);	/* skip device */
 
 	if ((v86.ecx & 0x3f) == 0) /* absurd sector number */
 		ret = 0;	/* set error */
 
 	/* Convert max cyl # -> # of cylinders */
 	bd->bd_cyl = ((v86.ecx & 0xc0) << 2) + ((v86.ecx & 0xff00) >> 8) + 1;
 	/* Convert max head # -> # of heads */
 	bd->bd_hds = ((v86.edx & 0xff00) >> 8) + 1;
 	bd->bd_sec = v86.ecx & 0x3f;
 	bd->bd_type = v86.ebx & 0xff;
 	bd->bd_flags |= BD_MODEINT13;
 
 	/* Calculate sectors count from the geometry */
 	bd->bd_sectors = bd->bd_cyl * bd->bd_hds * bd->bd_sec;
 	bd->bd_sectorsize = BIOSDISK_SECSIZE;
 	DEBUG("unit 0x%x geometry %d/%d/%d", bd->bd_unit, bd->bd_cyl,
 	    bd->bd_hds, bd->bd_sec);
 
 	/* Determine if we can use EDD with this device. */
 	v86.ctl = V86_FLAGS;
 	v86.addr = 0x13;
 	v86.eax = 0x4100;
 	v86.edx = bd->bd_unit;
 	v86.ebx = 0x55aa;
 	v86int();
 	if (V86_CY(v86.efl) ||	/* carry set */
 	    (v86.ebx & 0xffff) != 0xaa55 || /* signature */
 	    (v86.ecx & EDD_INTERFACE_FIXED_DISK) == 0)
 		return (ret);	/* return code from int13 AH=08 */
 
 	/* EDD supported */
 	bd->bd_flags |= BD_MODEEDD1;
 	if ((v86.eax & 0xff00) >= 0x3000)
 		bd->bd_flags |= BD_MODEEDD3;
 	/* Get disk params */
 	params.len = sizeof(struct edd_params);
 	v86.ctl = V86_FLAGS;
 	v86.addr = 0x13;
 	v86.eax = 0x4800;
 	v86.edx = bd->bd_unit;
 	v86.ds = VTOPSEG(&params);
 	v86.esi = VTOPOFF(&params);
 	v86int();
 	if (!V86_CY(v86.efl)) {
 		uint64_t total;
 
-		if (params.sectors != 0)
-			bd->bd_sectors = params.sectors;
+		/*
+		 * Sector size must be a multiple of 512 bytes.
+		 * An alternate test would be to check power of 2,
+		 * powerof2(params.sector_size).
+		 */
+		if (params.sector_size % BIOSDISK_SECSIZE)
+			bd->bd_sectorsize = BIOSDISK_SECSIZE;
+		else
+			bd->bd_sectorsize = params.sector_size;
 
+		total = bd->bd_sectorsize * params.sectors;
+		if (params.sectors != 0) {
+			/* Only update if we did not overflow. */
+			if (total > params.sectors)
+				bd->bd_sectors = params.sectors;
+		}
+
 		total = (uint64_t)params.cylinders *
 		    params.heads * params.sectors_per_track;
 		if (bd->bd_sectors < total)
 			bd->bd_sectors = total;
 
-		bd->bd_sectorsize = params.sector_size;
 		ret = 1;
 	}
 	DEBUG("unit 0x%x flags %x, sectors %llu, sectorsize %u",
 	    bd->bd_unit, bd->bd_flags, bd->bd_sectors, bd->bd_sectorsize);
 	return (ret);
 }
 
 /*
  * Print information about disks
  */
 static int
 bd_print(int verbose)
 {
 	static char line[80];
 	struct disk_devdesc dev;
 	int i, ret = 0;
 
 	if (nbdinfo == 0)
 		return (0);
 
 	printf("%s devices:", biosdisk.dv_name);
 	if ((ret = pager_output("\n")) != 0)
 		return (ret);
 
 	for (i = 0; i < nbdinfo; i++) {
 		snprintf(line, sizeof(line),
 		    "    disk%d:   BIOS drive %c (%ju X %u):\n", i,
 		    (bdinfo[i].bd_unit < 0x80) ? ('A' + bdinfo[i].bd_unit):
 		    ('C' + bdinfo[i].bd_unit - 0x80),
 		    (uintmax_t)bdinfo[i].bd_sectors,
 		    bdinfo[i].bd_sectorsize);
 		if ((ret = pager_output(line)) != 0)
 			break;
 		dev.d_dev = &biosdisk;
 		dev.d_unit = i;
 		dev.d_slice = -1;
 		dev.d_partition = -1;
 		if (disk_open(&dev,
 		    bdinfo[i].bd_sectorsize * bdinfo[i].bd_sectors,
-		    bdinfo[i].bd_sectorsize,
-		    (bdinfo[i].bd_flags & BD_FLOPPY) ?
-		    DISK_F_NOCACHE: 0) == 0) {
+		    bdinfo[i].bd_sectorsize) == 0) {
 			snprintf(line, sizeof(line), "    disk%d", i);
 			ret = disk_print(&dev, line, verbose);
 			disk_close(&dev);
 			if (ret != 0)
 			    return (ret);
 		}
 	}
 	return (ret);
 }
 
 /*
  * Attempt to open the disk described by (dev) for use by (f).
  *
  * Note that the philosophy here is "give them exactly what
  * they ask for".  This is necessary because being too "smart"
  * about what the user might want leads to complications.
  * (eg. given no slice or partition value, with a disk that is
  *  sliced - are they after the first BSD slice, or the DOS
  *  slice before it?)
  */
 static int
 bd_open(struct open_file *f, ...)
 {
 	struct disk_devdesc *dev, rdev;
+	struct disk_devdesc disk;
 	int err, g_err;
 	va_list ap;
+	uint64_t size;
 
 	va_start(ap, f);
 	dev = va_arg(ap, struct disk_devdesc *);
 	va_end(ap);
 
 	if (dev->d_unit < 0 || dev->d_unit >= nbdinfo)
 		return (EIO);
 	BD(dev).bd_open++;
 	if (BD(dev).bd_bcache == NULL)
 	    BD(dev).bd_bcache = bcache_allocate();
+
+	/*
+	 * Read disk size from partition.
+	 * This is needed to work around buggy BIOS systems returning
+	 * wrong (truncated) disk media size.
+	 * During bd_probe() we tested if the mulitplication of bd_sectors
+	 * would overflow so it should be safe to perform here.
+	 */
+	disk.d_dev = dev->d_dev;
+	disk.d_type = dev->d_type;
+	disk.d_unit = dev->d_unit;
+	disk.d_opendata = NULL;
+	disk.d_slice = -1;
+	disk.d_partition = -1;
+	disk.d_offset = 0;
+	if (disk_open(&disk, BD(dev).bd_sectors * BD(dev).bd_sectorsize,
+	    BD(dev).bd_sectorsize) == 0) {
+
+		if (disk_ioctl(&disk, DIOCGMEDIASIZE, &size) == 0) {
+			size /= BD(dev).bd_sectorsize;
+			if (size > BD(dev).bd_sectors)
+				BD(dev).bd_sectors = size;
+		}
+		disk_close(&disk);
+	}
+
 	err = disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize,
-	    BD(dev).bd_sectorsize, (BD(dev).bd_flags & BD_FLOPPY) ?
-	    DISK_F_NOCACHE: 0);
+	    BD(dev).bd_sectorsize);
 
 #ifdef LOADER_GELI_SUPPORT
 	static char gelipw[GELI_PW_MAXLEN];
 	char *passphrase;
 
 	if (err)
 		return (err);
 
 	/* if we already know there is no GELI, skip the rest */
 	if (geli_status[dev->d_unit][dev->d_slice] != ISGELI_UNKNOWN)
 		return (err);
 
 	struct dsk dskp;
 	struct ptable *table = NULL;
 	struct ptable_entry part;
 	struct pentry *entry;
 	int geli_part = 0;
 
 	dskp.drive = bd_unit2bios(dev->d_unit);
 	dskp.type = dev->d_type;
 	dskp.unit = dev->d_unit;
 	dskp.slice = dev->d_slice;
 	dskp.part = dev->d_partition;
 	dskp.start = dev->d_offset;
 
 	memcpy(&rdev, dev, sizeof(rdev));
 	/* to read the GPT table, we need to read the first sector */
 	rdev.d_offset = 0;
 	/* We need the LBA of the end of the partition */
 	table = ptable_open(&rdev, BD(dev).bd_sectors,
 	    BD(dev).bd_sectorsize, ptblread);
 	if (table == NULL) {
 		DEBUG("Can't read partition table");
 		/* soft failure, return the exit status of disk_open */
 		return (err);
 	}
 
 	if (table->type == PTABLE_GPT)
 		dskp.part = 255;
 
 	STAILQ_FOREACH(entry, &table->entries, entry) {
 		dskp.slice = entry->part.index;
 		dskp.start = entry->part.start;
 		if (is_geli(&dskp) == 0) {
 			geli_status[dev->d_unit][dskp.slice] = ISGELI_YES;
 			return (0);
 		}
 		if (geli_taste(bios_read, &dskp,
 		    entry->part.end - entry->part.start) == 0) {
+			if (geli_havekey(&dskp) == 0) {
+				geli_status[dev->d_unit][dskp.slice] = ISGELI_YES;
+				geli_part++;
+				continue;
+			}
 			if ((passphrase = getenv("kern.geom.eli.passphrase"))
 			    != NULL) {
 				/* Use the cached passphrase */
 				bcopy(passphrase, &gelipw, GELI_PW_MAXLEN);
 			}
 			if (geli_passphrase(&gelipw, dskp.unit, 'p',
 				    (dskp.slice > 0 ? dskp.slice : dskp.part),
 				    &dskp) == 0) {
 				setenv("kern.geom.eli.passphrase", &gelipw, 1);
 				bzero(gelipw, sizeof(gelipw));
 				geli_status[dev->d_unit][dskp.slice] = ISGELI_YES;
 				geli_part++;
+				continue;
 			}
 		} else
 			geli_status[dev->d_unit][dskp.slice] = ISGELI_NO;
 	}
 
 	/* none of the partitions on this disk have GELI */
 	if (geli_part == 0) {
 		/* found no GELI */
 		geli_status[dev->d_unit][dev->d_slice] = ISGELI_NO;
 	}
 #endif /* LOADER_GELI_SUPPORT */
 
 	return (err);
 }
 
 static int
 bd_close(struct open_file *f)
 {
 	struct disk_devdesc *dev;
 
 	dev = (struct disk_devdesc *)f->f_devdata;
 	BD(dev).bd_open--;
 	if (BD(dev).bd_open == 0) {
 	    bcache_free(BD(dev).bd_bcache);
 	    BD(dev).bd_bcache = NULL;
 	}
 	return (disk_close(dev));
 }
 
 static int
 bd_ioctl(struct open_file *f, u_long cmd, void *data)
 {
 	struct disk_devdesc *dev;
+	int rc;
 
 	dev = (struct disk_devdesc *)f->f_devdata;
+
+	rc = disk_ioctl(dev, cmd, data);
+	if (rc != ENOTTY)
+		return (rc);
+
 	switch (cmd) {
 	case DIOCGSECTORSIZE:
 		*(u_int *)data = BD(dev).bd_sectorsize;
 		break;
 	case DIOCGMEDIASIZE:
-		*(off_t *)data = BD(dev).bd_sectors * BD(dev).bd_sectorsize;
+		*(uint64_t *)data = BD(dev).bd_sectors * BD(dev).bd_sectorsize;
 		break;
 	default:
 		return (ENOTTY);
 	}
 	return (0);
 }
 
 static int
 bd_strategy(void *devdata, int rw, daddr_t dblk, size_t size,
     char *buf, size_t *rsize)
 {
 	struct bcache_devdata bcd;
 	struct disk_devdesc *dev;
 
 	dev = (struct disk_devdesc *)devdata;
 	bcd.dv_strategy = bd_realstrategy;
 	bcd.dv_devdata = devdata;
 	bcd.dv_cache = BD(dev).bd_bcache;
 	return (bcache_strategy(&bcd, rw, dblk + dev->d_offset,
 	    size, buf, rsize));
 }
 
 static int
 bd_realstrategy(void *devdata, int rw, daddr_t dblk, size_t size,
     char *buf, size_t *rsize)
 {
     struct disk_devdesc *dev = (struct disk_devdesc *)devdata;
-    int			blks, remaining;
+    uint64_t		disk_blocks;
+    int			blks, rc;
 #ifdef BD_SUPPORT_FRAGS /* XXX: sector size */
     char		fragbuf[BIOSDISK_SECSIZE];
     size_t		fragsize;
 
     fragsize = size % BIOSDISK_SECSIZE;
 #else
     if (size % BD(dev).bd_sectorsize)
 	panic("bd_strategy: %d bytes I/O not multiple of block size", size);
 #endif
 
     DEBUG("open_disk %p", dev);
+
+    /*
+     * Check the value of the size argument. We do have quite small
+     * heap (64MB), but we do not know good upper limit, so we check against
+     * INT_MAX here. This will also protect us against possible overflows
+     * while translating block count to bytes.
+     */
+    if (size > INT_MAX) {
+	DEBUG("too large read: %zu bytes", size);
+	return (EIO);
+    }
+
     blks = size / BD(dev).bd_sectorsize;
+    if (dblk > dblk + blks)
+	return (EIO);
+
     if (rsize)
 	*rsize = 0;
 
+    /* Get disk blocks, this value is either for whole disk or for partition */
+    if (disk_ioctl(dev, DIOCGMEDIASIZE, &disk_blocks)) {
+	/* DIOCGMEDIASIZE does return bytes. */
+        disk_blocks /= BD(dev).bd_sectorsize;
+    } else {
+	/* We should not get here. Just try to survive. */
+	disk_blocks = BD(dev).bd_sectors - dev->d_offset;
+    }
+
+    /* Validate source block address. */
+    if (dblk < dev->d_offset || dblk >= dev->d_offset + disk_blocks)
+	return (EIO);
+
     /*
-     * Perform partial read to prevent read-ahead crossing
-     * the end of disk - or any 32 bit aliases of the end.
-     * Signed arithmetic is used to handle wrap-around cases
-     * like we do for TCP sequence numbers.
+     * Truncate if we are crossing disk or partition end.
      */
-    remaining = (int)(BD(dev).bd_sectors - dblk);	/* truncate */
-    if (remaining > 0 && remaining < blks) {
-	blks = remaining;
+    if (dblk + blks >= dev->d_offset + disk_blocks) {
+	blks = dev->d_offset + disk_blocks - dblk;
 	size = blks * BD(dev).bd_sectorsize;
 	DEBUG("short read %d", blks);
     }
 
     switch(rw){
     case F_READ:
 	DEBUG("read %d from %lld to %p", blks, dblk, buf);
 
-	if (blks && bd_read(dev, dblk, blks, buf)) {
-	    DEBUG("read error");
+	if (blks && (rc = bd_read(dev, dblk, blks, buf))) {
+	    /* Filter out floppy controller errors */
+	    if (BD(dev).bd_flags != BD_FLOPPY || rc != 0x20) {
+		printf("read %d from %lld to %p, error: 0x%x", blks, dblk,
+		    buf, rc);
+	    }
 	    return (EIO);
 	}
 #ifdef BD_SUPPORT_FRAGS /* XXX: sector size */
 	DEBUG("bd_strategy: frag read %d from %d+%d to %p",
 	    fragsize, dblk, blks, buf + (blks * BIOSDISK_SECSIZE));
 	if (fragsize && bd_read(od, dblk + blks, 1, fragsize)) {
 	    DEBUG("frag read error");
 	    return(EIO);
 	}
 	bcopy(fragbuf, buf + (blks * BIOSDISK_SECSIZE), fragsize);
 #endif
 	break;
     case F_WRITE :
 	DEBUG("write %d from %d to %p", blks, dblk, buf);
 
 	if (blks && bd_write(dev, dblk, blks, buf)) {
 	    DEBUG("write error");
 	    return (EIO);
 	}
 #ifdef BD_SUPPORT_FRAGS
 	if(fragsize) {
 	    DEBUG("Attempted to write a frag");
 	    return (EIO);
 	}
 #endif
 	break;
     default:
 	/* DO NOTHING */
 	return (EROFS);
     }
 
     if (rsize)
 	*rsize = size;
     return (0);
 }
 
-/* Max number of sectors to bounce-buffer if the request crosses a 64k boundary */
-#define FLOPPY_BOUNCEBUF	18
-
 static int
 bd_edd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest,
     int write)
 {
     static struct edd_packet packet;
 
     packet.len = sizeof(struct edd_packet);
     packet.count = blks;
     packet.off = VTOPOFF(dest);
     packet.seg = VTOPSEG(dest);
     packet.lba = dblk;
     v86.ctl = V86_FLAGS;
     v86.addr = 0x13;
     if (write)
 	/* Should we Write with verify ?? 0x4302 ? */
 	v86.eax = 0x4300;
     else
 	v86.eax = 0x4200;
     v86.edx = BD(dev).bd_unit;
     v86.ds = VTOPSEG(&packet);
     v86.esi = VTOPOFF(&packet);
     v86int();
-    return (V86_CY(v86.efl));
+    if (V86_CY(v86.efl))
+	return (v86.eax >> 8);
+    return (0);
 }
 
 static int
 bd_chs_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest,
     int write)
 {
     u_int	x, bpc, cyl, hd, sec;
 
     bpc = BD(dev).bd_sec * BD(dev).bd_hds;	/* blocks per cylinder */
     x = dblk;
     cyl = x / bpc;			/* block # / blocks per cylinder */
     x %= bpc;				/* block offset into cylinder */
     hd = x / BD(dev).bd_sec;		/* offset / blocks per track */
     sec = x % BD(dev).bd_sec;		/* offset into track */
 
     /* correct sector number for 1-based BIOS numbering */
     sec++;
 
     if (cyl > 1023)
 	/* CHS doesn't support cylinders > 1023. */
 	return (1);
 
     v86.ctl = V86_FLAGS;
     v86.addr = 0x13;
     if (write)
 	v86.eax = 0x300 | blks;
     else
 	v86.eax = 0x200 | blks;
     v86.ecx = ((cyl & 0xff) << 8) | ((cyl & 0x300) >> 2) | sec;
     v86.edx = (hd << 8) | BD(dev).bd_unit;
     v86.es = VTOPSEG(dest);
     v86.ebx = VTOPOFF(dest);
     v86int();
-    return (V86_CY(v86.efl));
+    if (V86_CY(v86.efl))
+	return (v86.eax >> 8);
+    return (0);
 }
 
 static int
 bd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest, int write)
 {
     u_int	x, sec, result, resid, retry, maxfer;
-    caddr_t	p, xp, bbuf, breg;
+    caddr_t	p, xp, bbuf;
     
     /* Just in case some idiot actually tries to read/write -1 blocks... */
     if (blks < 0)
 	return (-1);
 
     resid = blks;
     p = dest;
 
     /* Decide whether we have to bounce */
     if (VTOP(dest) >> 20 != 0 || (BD(dev).bd_unit < 0x80 &&
 	(VTOP(dest) >> 16) != (VTOP(dest +
 	blks * BD(dev).bd_sectorsize) >> 16))) {
 
 	/* 
 	 * There is a 64k physical boundary somewhere in the
 	 * destination buffer, or the destination buffer is above
 	 * first 1MB of physical memory so we have to arrange a
 	 * suitable bounce buffer.  Allocate a buffer twice as large
 	 * as we need to.  Use the bottom half unless there is a break
 	 * there, in which case we use the top half.
 	 */
-	x = min(FLOPPY_BOUNCEBUF, (unsigned)blks);
-	bbuf = alloca(x * 2 * BD(dev).bd_sectorsize);
-	if (((u_int32_t)VTOP(bbuf) & 0xffff0000) ==
-	    ((u_int32_t)VTOP(bbuf + x * BD(dev).bd_sectorsize) & 0xffff0000)) {
-	    breg = bbuf;
-	} else {
-	    breg = bbuf + x * BD(dev).bd_sectorsize;
-	}
+	x = V86_IO_BUFFER_SIZE / BD(dev).bd_sectorsize;
+	x = min(x, (unsigned)blks);
+	bbuf = PTOV(V86_IO_BUFFER);
 	maxfer = x;		/* limit transfers to bounce region size */
     } else {
-	breg = bbuf = NULL;
+	bbuf = NULL;
 	maxfer = 0;
     }
     
     while (resid > 0) {
 	/*
 	 * Play it safe and don't cross track boundaries.
 	 * (XXX this is probably unnecessary)
 	 */
 	sec = dblk % BD(dev).bd_sec;	/* offset into track */
 	x = min(BD(dev).bd_sec - sec, resid);
 	if (maxfer > 0)
 	    x = min(x, maxfer);		/* fit bounce buffer */
 
 	/* where do we transfer to? */
-	xp = bbuf == NULL ? p : breg;
+	xp = bbuf == NULL ? p : bbuf;
 
 	/*
 	 * Put your Data In, Put your Data out,
 	 * Put your Data In, and shake it all about 
 	 */
 	if (write && bbuf != NULL)
-	    bcopy(p, breg, x * BD(dev).bd_sectorsize);
+	    bcopy(p, bbuf, x * BD(dev).bd_sectorsize);
 
 	/*
 	 * Loop retrying the operation a couple of times.  The BIOS
 	 * may also retry.
 	 */
 	for (retry = 0; retry < 3; retry++) {
 	    /* if retrying, reset the drive */
 	    if (retry > 0) {
 		v86.ctl = V86_FLAGS;
 		v86.addr = 0x13;
 		v86.eax = 0;
 		v86.edx = BD(dev).bd_unit;
 		v86int();
 	    }
 
 	    if (BD(dev).bd_flags & BD_MODEEDD1)
 		result = bd_edd_io(dev, dblk, x, xp, write);
 	    else
 		result = bd_chs_io(dev, dblk, x, xp, write);
 	    if (result == 0)
 		break;
 	}
 
 	if (write)
 	    DEBUG("Write %d sector(s) from %p (0x%x) to %lld %s", x,
 		p, VTOP(p), dblk, result ? "failed" : "ok");
 	else
 	    DEBUG("Read %d sector(s) from %lld to %p (0x%x) %s", x,
 		dblk, p, VTOP(p), result ? "failed" : "ok");
 	if (result) {
-	    return(-1);
+	    return (result);
 	}
 	if (!write && bbuf != NULL)
-	    bcopy(breg, p, x * BD(dev).bd_sectorsize);
+	    bcopy(bbuf, p, x * BD(dev).bd_sectorsize);
 	p += (x * BD(dev).bd_sectorsize);
 	dblk += x;
 	resid -= x;
     }
 
 /*    hexdump(dest, (blks * BD(dev).bd_sectorsize)); */
     return(0);
 }
 
 static int
 bd_read(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest)
 {
 #ifdef LOADER_GELI_SUPPORT
 	struct dsk dskp;
 	off_t p_off, diff;
 	daddr_t alignlba;
 	int err, n, alignblks;
 	char *tmpbuf;
 
 	/* if we already know there is no GELI, skip the rest */
 	if (geli_status[dev->d_unit][dev->d_slice] != ISGELI_YES)
 		return (bd_io(dev, dblk, blks, dest, 0));
 
 	if (geli_status[dev->d_unit][dev->d_slice] == ISGELI_YES) {
 		/*
 		 * Align reads to DEV_GELIBOOT_BSIZE bytes because partial
 		 * sectors cannot be decrypted. Round the requested LBA down to
 		 * nearest multiple of DEV_GELIBOOT_BSIZE bytes.
 		 */
 		alignlba = rounddown2(dblk * BD(dev).bd_sectorsize,
 		    DEV_GELIBOOT_BSIZE) / BD(dev).bd_sectorsize;
 		/*
 		 * Round number of blocks to read up to nearest multiple of
 		 * DEV_GELIBOOT_BSIZE
 		 */
 		diff = (dblk - alignlba) * BD(dev).bd_sectorsize;
 		alignblks = roundup2(blks * BD(dev).bd_sectorsize + diff,
 		    DEV_GELIBOOT_BSIZE) / BD(dev).bd_sectorsize;
 
 		/*
 		 * If the read is rounded up to a larger size, use a temporary
 		 * buffer here because the buffer provided by the caller may be
 		 * too small.
 		 */
 		if (diff == 0) {
 			tmpbuf = dest;
 		} else {
 			tmpbuf = malloc(alignblks * BD(dev).bd_sectorsize);
 			if (tmpbuf == NULL) {
 				return (-1);
 			}
 		}
 
 		err = bd_io(dev, alignlba, alignblks, tmpbuf, 0);
 		if (err)
 			return (err);
 
 		dskp.drive = bd_unit2bios(dev->d_unit);
 		dskp.type = dev->d_type;
 		dskp.unit = dev->d_unit;
 		dskp.slice = dev->d_slice;
 		dskp.part = dev->d_partition;
 		dskp.start = dev->d_offset;
 
 		/* GELI needs the offset relative to the partition start */
 		p_off = alignlba - dskp.start;
 
 		err = geli_read(&dskp, p_off * BD(dev).bd_sectorsize, tmpbuf,
 		    alignblks * BD(dev).bd_sectorsize);
 		if (err)
 			return (err);
 
 		if (tmpbuf != dest) {
 			bcopy(tmpbuf + diff, dest, blks * BD(dev).bd_sectorsize);
 			free(tmpbuf);
 		}
 		return (0);
 	}
 #endif /* LOADER_GELI_SUPPORT */
 
 	return (bd_io(dev, dblk, blks, dest, 0));
 }
 
 static int
 bd_write(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest)
 {
 
 	return (bd_io(dev, dblk, blks, dest, 1));
 }
 
 /*
  * Return the BIOS geometry of a given "fixed drive" in a format
  * suitable for the legacy bootinfo structure.  Since the kernel is
  * expecting raw int 0x13/0x8 values for N_BIOS_GEOM drives, we
  * prefer to get the information directly, rather than rely on being
  * able to put it together from information already maintained for
  * different purposes and for a probably different number of drives.
  *
  * For valid drives, the geometry is expected in the format (31..0)
  * "000000cc cccccccc hhhhhhhh 00ssssss"; and invalid drives are
  * indicated by returning the geometry of a "1.2M" PC-format floppy
  * disk.  And, incidentally, what is returned is not the geometry as
  * such but the highest valid cylinder, head, and sector numbers.
  */
 u_int32_t
 bd_getbigeom(int bunit)
 {
 
     v86.ctl = V86_FLAGS;
     v86.addr = 0x13;
     v86.eax = 0x800;
     v86.edx = 0x80 + bunit;
     v86int();
     if (V86_CY(v86.efl))
 	return 0x4f010f;
     return ((v86.ecx & 0xc0) << 18) | ((v86.ecx & 0xff00) << 8) |
 	   (v86.edx & 0xff00) | (v86.ecx & 0x3f);
 }
 
 /*
  * Return a suitable dev_t value for (dev).
  *
  * In the case where it looks like (dev) is a SCSI disk, we allow the number of
  * IDE disks to be specified in $num_ide_disks.  There should be a Better Way.
  */
 int
 bd_getdev(struct i386_devdesc *d)
 {
     struct disk_devdesc		*dev;
     int				biosdev;
     int 			major;
     int				rootdev;
     char			*nip, *cp;
     int				i, unit;
 
     dev = (struct disk_devdesc *)d;
     biosdev = bd_unit2bios(dev->d_unit);
     DEBUG("unit %d BIOS device %d", dev->d_unit, biosdev);
     if (biosdev == -1)				/* not a BIOS device */
 	return(-1);
     if (disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize,
-	BD(dev).bd_sectorsize,(BD(dev).bd_flags & BD_FLOPPY) ?
-	DISK_F_NOCACHE: 0) != 0)		/* oops, not a viable device */
+	BD(dev).bd_sectorsize) != 0)		/* oops, not a viable device */
 	    return (-1);
     else
 	disk_close(dev);
 
     if (biosdev < 0x80) {
 	/* floppy (or emulated floppy) or ATAPI device */
 	if (bdinfo[dev->d_unit].bd_type == DT_ATAPI) {
 	    /* is an ATAPI disk */
 	    major = WFDMAJOR;
 	} else {
 	    /* is a floppy disk */
 	    major = FDMAJOR;
 	}
     } else {
 	    /* assume an IDE disk */
 	    major = WDMAJOR;
     }
     /* default root disk unit number */
     unit = biosdev & 0x7f;
 
     /* XXX a better kludge to set the root disk unit number */
     if ((nip = getenv("root_disk_unit")) != NULL) {
 	i = strtol(nip, &cp, 0);
 	/* check for parse error */
 	if ((cp != nip) && (*cp == 0))
 	    unit = i;
     }
 
     rootdev = MAKEBOOTDEV(major, dev->d_slice + 1, unit, dev->d_partition);
     DEBUG("dev is 0x%x\n", rootdev);
     return(rootdev);
 }
 
 #ifdef LOADER_GELI_SUPPORT
 int
 bios_read(void *vdev __unused, struct dsk *priv, off_t off, char *buf, size_t bytes)
 {
 	struct disk_devdesc dev;
 
 	dev.d_dev = &biosdisk;
 	dev.d_type = priv->type;
 	dev.d_unit = priv->unit;
 	dev.d_slice = priv->slice;
 	dev.d_partition = priv->part;
 	dev.d_offset = priv->start;
 
 	off = off / BD(&dev).bd_sectorsize;
 	/* GELI gives us the offset relative to the partition start */
 	off += dev.d_offset;
 	bytes = bytes / BD(&dev).bd_sectorsize;
 
 	return (bd_io(&dev, off, bytes, buf, 0));
 }
 #endif /* LOADER_GELI_SUPPORT */
Index: stable/11/sys/boot/i386/libi386/bootinfo32.c
===================================================================
--- stable/11/sys/boot/i386/libi386/bootinfo32.c	(revision 329098)
+++ stable/11/sys/boot/i386/libi386/bootinfo32.c	(revision 329099)
@@ -1,274 +1,291 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 #include <sys/param.h>
 #include <sys/reboot.h>
 #include <sys/linker.h>
 #include <machine/bootinfo.h>
+#include <machine/metadata.h>
 #include "bootstrap.h"
 #include "libi386.h"
 #include "btxv86.h"
 
+#ifdef LOADER_GELI_SUPPORT
+#include "geliboot.h"
+
+static const size_t keybuf_size = sizeof(struct keybuf) +
+    (GELI_MAX_KEYS * sizeof(struct keybuf_ent));
+#endif
+
 static struct bootinfo  bi;
 
 /*
  * Copy module-related data into the load area, where it can be
  * used as a directory for loaded modules.
  *
  * Module data is presented in a self-describing format.  Each datum
  * is preceded by a 32-bit identifier and a 32-bit size field.
  *
  * Currently, the following data are saved:
  *
  * MOD_NAME	(variable)		module name (string)
  * MOD_TYPE	(variable)		module type (string)
  * MOD_ARGS	(variable)		module parameters (string)
  * MOD_ADDR	sizeof(vm_offset_t)	module load address
  * MOD_SIZE	sizeof(size_t)		module size
  * MOD_METADATA	(variable)		type-specific metadata
  */
 #define COPY32(v, a, c) {			\
     u_int32_t	x = (v);			\
     if (c)					\
 	i386_copyin(&x, a, sizeof(x));		\
     a += sizeof(x);				\
 }
 
 #define MOD_STR(t, a, s, c) {			\
     COPY32(t, a, c);				\
     COPY32(strlen(s) + 1, a, c);		\
     if (c)					\
 	i386_copyin(s, a, strlen(s) + 1);	\
     a += roundup(strlen(s) + 1, sizeof(u_long));\
 }
 
 #define MOD_NAME(a, s, c)	MOD_STR(MODINFO_NAME, a, s, c)
 #define MOD_TYPE(a, s, c)	MOD_STR(MODINFO_TYPE, a, s, c)
 #define MOD_ARGS(a, s, c)	MOD_STR(MODINFO_ARGS, a, s, c)
 
 #define MOD_VAR(t, a, s, c) {			\
     COPY32(t, a, c);				\
     COPY32(sizeof(s), a, c);			\
     if (c)					\
 	i386_copyin(&s, a, sizeof(s));		\
     a += roundup(sizeof(s), sizeof(u_long));	\
 }
 
 #define MOD_ADDR(a, s, c)	MOD_VAR(MODINFO_ADDR, a, s, c)
 #define MOD_SIZE(a, s, c)	MOD_VAR(MODINFO_SIZE, a, s, c)
 
 #define MOD_METADATA(a, mm, c) {		\
     COPY32(MODINFO_METADATA | mm->md_type, a, c); \
     COPY32(mm->md_size, a, c);			\
     if (c)					\
 	i386_copyin(mm->md_data, a, mm->md_size); \
     a += roundup(mm->md_size, sizeof(u_long));\
 }
 
 #define MOD_END(a, c) {				\
     COPY32(MODINFO_END, a, c);			\
     COPY32(0, a, c);				\
 }
 
 static vm_offset_t
 bi_copymodules32(vm_offset_t addr)
 {
     struct preloaded_file	*fp;
     struct file_metadata	*md;
     int				c;
 
     c = addr != 0;
     /* start with the first module on the list, should be the kernel */
     for (fp = file_findfile(NULL, NULL); fp != NULL; fp = fp->f_next) {
 
 	MOD_NAME(addr, fp->f_name, c);	/* this field must come first */
 	MOD_TYPE(addr, fp->f_type, c);
 	if (fp->f_args)
 	    MOD_ARGS(addr, fp->f_args, c);
 	MOD_ADDR(addr, fp->f_addr, c);
 	MOD_SIZE(addr, fp->f_size, c);
 	for (md = fp->f_metadata; md != NULL; md = md->md_next)
 	    if (!(md->md_type & MODINFOMD_NOCOPY))
 		MOD_METADATA(addr, md, c);
     }
     MOD_END(addr, c);
     return(addr);
 }
 
 /*
  * Load the information expected by an i386 kernel.
  *
  * - The 'boothowto' argument is constructed
  * - The 'bootdev' argument is constructed
  * - The 'bootinfo' struct is constructed, and copied into the kernel space.
  * - The kernel environment is copied into kernel space.
  * - Module metadata are formatted and placed in kernel space.
  */
 int
 bi_load32(char *args, int *howtop, int *bootdevp, vm_offset_t *bip, vm_offset_t *modulep, vm_offset_t *kernendp)
 {
     struct preloaded_file	*xp, *kfp;
     struct i386_devdesc		*rootdev;
     struct file_metadata	*md;
     vm_offset_t			addr;
     vm_offset_t			kernend;
     vm_offset_t			envp;
     vm_offset_t			size;
     vm_offset_t			ssym, esym;
     char			*rootdevname;
     int				bootdevnr, i, howto;
     char			*kernelname;
     const char			*kernelpath;
+#ifdef LOADER_GELI_SUPPORT
+    char                        buf[keybuf_size];
+    struct keybuf               *keybuf = (struct keybuf *)buf;
+#endif
 
     howto = bi_getboothowto(args);
 
-    /* 
-     * Allow the environment variable 'rootdev' to override the supplied device 
+    /*
+     * Allow the environment variable 'rootdev' to override the supplied device
      * This should perhaps go to MI code and/or have $rootdev tested/set by
      * MI code before launching the kernel.
      */
     rootdevname = getenv("rootdev");
     i386_getdev((void **)(&rootdev), rootdevname, NULL);
     if (rootdev == NULL) {		/* bad $rootdev/$currdev */
 	printf("can't determine root device\n");
 	return(EINVAL);
     }
 
     /* Try reading the /etc/fstab file to select the root device */
     getrootmount(i386_fmtdev((void *)rootdev));
 
     /* Do legacy rootdev guessing */
 
     /* XXX - use a default bootdev of 0.  Is this ok??? */
     bootdevnr = 0;
 
     switch(rootdev->d_type) {
     case DEVT_CD:
 	    /* Pass in BIOS device number. */
 	    bi.bi_bios_dev = bc_unit2bios(rootdev->d_unit);
 	    bootdevnr = bc_getdev(rootdev);
 	    break;
 
     case DEVT_DISK:
 	/* pass in the BIOS device number of the current disk */
 	bi.bi_bios_dev = bd_unit2bios(rootdev->d_unit);
 	bootdevnr = bd_getdev(rootdev);
 	break;
 
     case DEVT_NET:
     case DEVT_ZFS:
 	    break;
-	    
+
     default:
 	printf("WARNING - don't know how to boot from device type %d\n", rootdev->d_type);
     }
     if (bootdevnr == -1) {
 	printf("root device %s invalid\n", i386_fmtdev(rootdev));
 	return (EINVAL);
     }
     free(rootdev);
 
     /* find the last module in the chain */
     addr = 0;
     for (xp = file_findfile(NULL, NULL); xp != NULL; xp = xp->f_next) {
 	if (addr < (xp->f_addr + xp->f_size))
 	    addr = xp->f_addr + xp->f_size;
     }
     /* pad to a page boundary */
     addr = roundup(addr, PAGE_SIZE);
 
     /* copy our environment */
     envp = addr;
     addr = bi_copyenv(addr);
 
     /* pad to a page boundary */
     addr = roundup(addr, PAGE_SIZE);
 
     kfp = file_findfile(NULL, "elf kernel");
     if (kfp == NULL)
       kfp = file_findfile(NULL, "elf32 kernel");
     if (kfp == NULL)
 	panic("can't find kernel file");
     kernend = 0;	/* fill it in later */
     file_addmetadata(kfp, MODINFOMD_HOWTO, sizeof howto, &howto);
     file_addmetadata(kfp, MODINFOMD_ENVP, sizeof envp, &envp);
     file_addmetadata(kfp, MODINFOMD_KERNEND, sizeof kernend, &kernend);
     bios_addsmapdata(kfp);
+#ifdef LOADER_GELI_SUPPORT
+    geli_fill_keybuf(keybuf);
+    file_addmetadata(kfp, MODINFOMD_KEYBUF, keybuf_size, buf);
+    bzero(buf, sizeof(buf));
+#endif
 
     /* Figure out the size and location of the metadata */
     *modulep = addr;
     size = bi_copymodules32(0);
     kernend = roundup(addr + size, PAGE_SIZE);
     *kernendp = kernend;
 
     /* patch MODINFOMD_KERNEND */
     md = file_findmetadata(kfp, MODINFOMD_KERNEND);
     bcopy(&kernend, md->md_data, sizeof kernend);
 
     /* copy module list and metadata */
     (void)bi_copymodules32(addr);
 
     ssym = esym = 0;
     md = file_findmetadata(kfp, MODINFOMD_SSYM);
     if (md != NULL)
 	ssym = *((vm_offset_t *)&(md->md_data));
     md = file_findmetadata(kfp, MODINFOMD_ESYM);
     if (md != NULL)
 	esym = *((vm_offset_t *)&(md->md_data));
     if (ssym == 0 || esym == 0)
 	ssym = esym = 0;		/* sanity */
 
     /* legacy bootinfo structure */
     kernelname = getenv("kernelname");
     i386_getdev(NULL, kernelname, &kernelpath);
     bi.bi_version = BOOTINFO_VERSION;
     bi.bi_kernelname = 0;		/* XXX char * -> kernel name */
     bi.bi_nfs_diskless = 0;		/* struct nfs_diskless * */
     bi.bi_n_bios_used = 0;		/* XXX would have to hook biosdisk driver for these */
     for (i = 0; i < N_BIOS_GEOM; i++)
         bi.bi_bios_geom[i] = bd_getbigeom(i);
     bi.bi_size = sizeof(bi);
     bi.bi_memsizes_valid = 1;
     bi.bi_basemem = bios_basemem / 1024;
     bi.bi_extmem = bios_extmem / 1024;
     bi.bi_envp = envp;
     bi.bi_modulep = *modulep;
     bi.bi_kernend = kernend;
     bi.bi_kernelname = VTOP(kernelpath);
     bi.bi_symtab = ssym;       /* XXX this is only the primary kernel symtab */
     bi.bi_esymtab = esym;
 
     /* legacy boot arguments */
     *howtop = howto | RB_BOOTINFO;
     *bootdevp = bootdevnr;
     *bip = VTOP(&bi);
 
     return(0);
 }
Index: stable/11/sys/boot/i386/libi386/bootinfo64.c
===================================================================
--- stable/11/sys/boot/i386/libi386/bootinfo64.c	(revision 329098)
+++ stable/11/sys/boot/i386/libi386/bootinfo64.c	(revision 329099)
@@ -1,263 +1,280 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 #include <sys/param.h>
 #include <sys/reboot.h>
 #include <sys/linker.h>
 #include <machine/bootinfo.h>
 #include <machine/cpufunc.h>
 #include <machine/metadata.h>
 #include <machine/psl.h>
 #include <machine/specialreg.h>
 #include "bootstrap.h"
 #include "libi386.h"
 #include "btxv86.h"
 
+#ifdef LOADER_GELI_SUPPORT
+#include "geliboot.h"
+
+static const size_t keybuf_size = sizeof(struct keybuf) +
+    (GELI_MAX_KEYS * sizeof(struct keybuf_ent));
+#endif
+
 /*
  * Copy module-related data into the load area, where it can be
  * used as a directory for loaded modules.
  *
  * Module data is presented in a self-describing format.  Each datum
  * is preceded by a 32-bit identifier and a 32-bit size field.
  *
  * Currently, the following data are saved:
  *
  * MOD_NAME	(variable)		module name (string)
  * MOD_TYPE	(variable)		module type (string)
  * MOD_ARGS	(variable)		module parameters (string)
  * MOD_ADDR	sizeof(vm_offset_t)	module load address
  * MOD_SIZE	sizeof(size_t)		module size
  * MOD_METADATA	(variable)		type-specific metadata
  */
 #define COPY32(v, a, c) {			\
     u_int32_t	x = (v);			\
     if (c)					\
 	i386_copyin(&x, a, sizeof(x));		\
     a += sizeof(x);				\
 }
 
 #define MOD_STR(t, a, s, c) {			\
     COPY32(t, a, c);				\
     COPY32(strlen(s) + 1, a, c);		\
     if (c)					\
 	i386_copyin(s, a, strlen(s) + 1);	\
     a += roundup(strlen(s) + 1, sizeof(u_int64_t));\
 }
 
 #define MOD_NAME(a, s, c)	MOD_STR(MODINFO_NAME, a, s, c)
 #define MOD_TYPE(a, s, c)	MOD_STR(MODINFO_TYPE, a, s, c)
 #define MOD_ARGS(a, s, c)	MOD_STR(MODINFO_ARGS, a, s, c)
 
 #define MOD_VAR(t, a, s, c) {			\
     COPY32(t, a, c);				\
     COPY32(sizeof(s), a, c);			\
     if (c)					\
 	i386_copyin(&s, a, sizeof(s));		\
     a += roundup(sizeof(s), sizeof(u_int64_t));	\
 }
 
 #define MOD_ADDR(a, s, c)	MOD_VAR(MODINFO_ADDR, a, s, c)
 #define MOD_SIZE(a, s, c)	MOD_VAR(MODINFO_SIZE, a, s, c)
 
 #define MOD_METADATA(a, mm, c) {		\
     COPY32(MODINFO_METADATA | mm->md_type, a, c); \
     COPY32(mm->md_size, a, c);			\
     if (c)					\
 	i386_copyin(mm->md_data, a, mm->md_size); \
     a += roundup(mm->md_size, sizeof(u_int64_t));\
 }
 
 #define MOD_END(a, c) {				\
     COPY32(MODINFO_END, a, c);			\
     COPY32(0, a, c);				\
 }
 
 static vm_offset_t
 bi_copymodules64(vm_offset_t addr)
 {
     struct preloaded_file	*fp;
     struct file_metadata	*md;
     int				c;
     u_int64_t			v;
 
     c = addr != 0;
     /* start with the first module on the list, should be the kernel */
     for (fp = file_findfile(NULL, NULL); fp != NULL; fp = fp->f_next) {
 
 	MOD_NAME(addr, fp->f_name, c);	/* this field must come first */
 	MOD_TYPE(addr, fp->f_type, c);
 	if (fp->f_args)
 	    MOD_ARGS(addr, fp->f_args, c);
 	v = fp->f_addr;
 	MOD_ADDR(addr, v, c);
 	v = fp->f_size;
 	MOD_SIZE(addr, v, c);
 	for (md = fp->f_metadata; md != NULL; md = md->md_next)
 	    if (!(md->md_type & MODINFOMD_NOCOPY))
 		MOD_METADATA(addr, md, c);
     }
     MOD_END(addr, c);
     return(addr);
 }
 
 /*
  * Check to see if this CPU supports long mode.
  */
 static int
 bi_checkcpu(void)
 {
     char *cpu_vendor;
     int vendor[3];
     int eflags;
     unsigned int regs[4];
 
     /* Check for presence of "cpuid". */
     eflags = read_eflags();
     write_eflags(eflags ^ PSL_ID);
     if (!((eflags ^ read_eflags()) & PSL_ID))
 	return (0);
 
     /* Fetch the vendor string. */
     do_cpuid(0, regs);
     vendor[0] = regs[1];
     vendor[1] = regs[3];
     vendor[2] = regs[2];
     cpu_vendor = (char *)vendor;
 
     /* Check for vendors that support AMD features. */
     if (strncmp(cpu_vendor, INTEL_VENDOR_ID, 12) != 0 &&
 	strncmp(cpu_vendor, AMD_VENDOR_ID, 12) != 0 &&
 	strncmp(cpu_vendor, CENTAUR_VENDOR_ID, 12) != 0)
 	return (0);
 
     /* Has to support AMD features. */
     do_cpuid(0x80000000, regs);
     if (!(regs[0] >= 0x80000001))
 	return (0);
 
     /* Check for long mode. */
     do_cpuid(0x80000001, regs);
     return (regs[3] & AMDID_LM);
 }
 
 /*
  * Load the information expected by an amd64 kernel.
  *
  * - The 'boothowto' argument is constructed
  * - The 'bootdev' argument is constructed
  * - The 'bootinfo' struct is constructed, and copied into the kernel space.
  * - The kernel environment is copied into kernel space.
  * - Module metadata are formatted and placed in kernel space.
  */
 int
 bi_load64(char *args, vm_offset_t addr, vm_offset_t *modulep,
     vm_offset_t *kernendp, int add_smap)
 {
     struct preloaded_file	*xp, *kfp;
     struct i386_devdesc		*rootdev;
     struct file_metadata	*md;
     u_int64_t			kernend;
     u_int64_t			envp;
     u_int64_t			module;
     vm_offset_t			size;
     char			*rootdevname;
     int				howto;
+#ifdef LOADER_GELI_SUPPORT
+    char                        buf[keybuf_size];
+    struct keybuf               *keybuf = (struct keybuf *)buf;
+#endif
 
     if (!bi_checkcpu()) {
 	printf("CPU doesn't support long mode\n");
 	return (EINVAL);
     }
 
     howto = bi_getboothowto(args);
 
-    /* 
-     * Allow the environment variable 'rootdev' to override the supplied device 
+    /*
+     * Allow the environment variable 'rootdev' to override the supplied device
      * This should perhaps go to MI code and/or have $rootdev tested/set by
      * MI code before launching the kernel.
      */
     rootdevname = getenv("rootdev");
     i386_getdev((void **)(&rootdev), rootdevname, NULL);
     if (rootdev == NULL) {		/* bad $rootdev/$currdev */
 	printf("can't determine root device\n");
 	return(EINVAL);
     }
 
     /* Try reading the /etc/fstab file to select the root device */
     getrootmount(i386_fmtdev((void *)rootdev));
 
     if (addr == 0) {
         /* find the last module in the chain */
         for (xp = file_findfile(NULL, NULL); xp != NULL; xp = xp->f_next) {
             if (addr < (xp->f_addr + xp->f_size))
                 addr = xp->f_addr + xp->f_size;
         }
     }
     /* pad to a page boundary */
     addr = roundup(addr, PAGE_SIZE);
 
     /* place the metadata before anything */
     module = *modulep = addr;
 
     kfp = file_findfile(NULL, "elf kernel");
     if (kfp == NULL)
       kfp = file_findfile(NULL, "elf64 kernel");
     if (kfp == NULL)
 	panic("can't find kernel file");
     kernend = 0;	/* fill it in later */
     file_addmetadata(kfp, MODINFOMD_HOWTO, sizeof howto, &howto);
     file_addmetadata(kfp, MODINFOMD_ENVP, sizeof envp, &envp);
     file_addmetadata(kfp, MODINFOMD_KERNEND, sizeof kernend, &kernend);
     file_addmetadata(kfp, MODINFOMD_MODULEP, sizeof module, &module);
     if (add_smap != 0)
         bios_addsmapdata(kfp);
+
+#ifdef LOADER_GELI_SUPPORT
+    geli_fill_keybuf(keybuf);
+    file_addmetadata(kfp, MODINFOMD_KEYBUF, keybuf_size, buf);
+    bzero(buf, sizeof(buf));
+#endif
 
     size = bi_copymodules64(0);
 
     /* copy our environment */
     envp = roundup(addr + size, PAGE_SIZE);
     addr = bi_copyenv(envp);
 
     /* set kernend */
     kernend = roundup(addr, PAGE_SIZE);
     *kernendp = kernend;
 
     /* patch MODINFOMD_KERNEND */
     md = file_findmetadata(kfp, MODINFOMD_KERNEND);
     bcopy(&kernend, md->md_data, sizeof kernend);
 
     /* patch MODINFOMD_ENVP */
     md = file_findmetadata(kfp, MODINFOMD_ENVP);
     bcopy(&envp, md->md_data, sizeof envp);
 
     /* copy module list and metadata */
     (void)bi_copymodules64(*modulep);
 
     return(0);
 }
Index: stable/11/sys/boot/i386/libi386/pxe.c
===================================================================
--- stable/11/sys/boot/i386/libi386/pxe.c	(revision 329098)
+++ stable/11/sys/boot/i386/libi386/pxe.c	(revision 329099)
@@ -1,727 +1,690 @@
 /*-
  * Copyright (c) 2000 Alfred Perlstein <alfred@freebsd.org>
  * Copyright (c) 2000 Paul Saab <ps@freebsd.org>
  * Copyright (c) 2000 John Baldwin <jhb@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 #include <string.h>
 #include <stdarg.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/udp.h>
 
 #include <net.h>
 #include <netif.h>
 #include <nfsv2.h>
 #include <iodesc.h>
 
 #include <bootp.h>
 #include <bootstrap.h>
 #include "btxv86.h"
 #include "pxe.h"
 
 /*
  * Allocate the PXE buffers statically instead of sticking grimy fingers into
- * BTX's private data area.  The scratch buffer is used to send information to
+ * BTX's private data area. The scratch buffer is used to send information to
  * the PXE BIOS, and the data buffer is used to receive data from the PXE BIOS.
  */
 #define	PXE_BUFFER_SIZE		0x2000
 #define	PXE_TFTP_BUFFER_SIZE	512
 static char	scratch_buffer[PXE_BUFFER_SIZE];
 static char	data_buffer[PXE_BUFFER_SIZE];
 
-static pxenv_t	*pxenv_p = NULL;        /* PXENV+ */
-static pxe_t	*pxe_p   = NULL;	/* !PXE */
-static BOOTPLAYER	bootplayer;	/* PXE Cached information. */
+static pxenv_t *pxenv_p = NULL;	/* PXENV+ */
+static pxe_t *pxe_p = NULL;		/* !PXE */
+static BOOTPLAYER bootplayer;	/* PXE Cached information. */
 
-static int 	pxe_debug = 0;
+static int	pxe_debug = 0;
 static int	pxe_sock = -1;
 static int	pxe_opens = 0;
 
 void		pxe_enable(void *pxeinfo);
 static void	(*pxe_call)(int func);
 static void	pxenv_call(int func);
 static void	bangpxe_call(int func);
 
 static int	pxe_init(void);
 static int	pxe_strategy(void *devdata, int flag, daddr_t dblk,
-			     size_t size, char *buf, size_t *rsize);
+			    size_t size, char *buf, size_t *rsize);
 static int	pxe_open(struct open_file *f, ...);
 static int	pxe_close(struct open_file *f);
 static int	pxe_print(int verbose);
 static void	pxe_cleanup(void);
 static void	pxe_setnfshandle(char *rootpath);
 
 static void	pxe_perror(int error);
 static int	pxe_netif_match(struct netif *nif, void *machdep_hint);
 static int	pxe_netif_probe(struct netif *nif, void *machdep_hint);
 static void	pxe_netif_init(struct iodesc *desc, void *machdep_hint);
 static int	pxe_netif_get(struct iodesc *desc, void *pkt, size_t len,
-			      time_t timeout);
+			    time_t timeout);
 static int	pxe_netif_put(struct iodesc *desc, void *pkt, size_t len);
 static void	pxe_netif_end(struct netif *nif);
 
-#ifdef OLD_NFSV2
-int nfs_getrootfh(struct iodesc*, char*, u_char*);
-#else
 int nfs_getrootfh(struct iodesc*, char*, uint32_t*, u_char*);
-#endif
 
 extern struct netif_stats	pxe_st[];
 extern u_int16_t		__bangpxeseg;
 extern u_int16_t		__bangpxeoff;
 extern void			__bangpxeentry(void);
 extern u_int16_t		__pxenvseg;
 extern u_int16_t		__pxenvoff;
 extern void			__pxenventry(void);
+extern struct in_addr		servip;
 
 struct netif_dif pxe_ifs[] = {
-/*      dif_unit        dif_nsel        dif_stats       dif_private     */
+/*	dif_unit        dif_nsel        dif_stats       dif_private     */
 	{0,             1,              &pxe_st[0],     0}
 };
 
 struct netif_stats pxe_st[NENTS(pxe_ifs)];
 
 struct netif_driver pxenetif = {
 	"pxenet",
 	pxe_netif_match,
 	pxe_netif_probe,
 	pxe_netif_init,
 	pxe_netif_get,
 	pxe_netif_put,
 	pxe_netif_end,
 	pxe_ifs,
 	NENTS(pxe_ifs)
 };
 
 struct netif_driver *netif_drivers[] = {
 	&pxenetif,
 	NULL
 };
 
 struct devsw pxedisk = {
-	"pxe", 
+	"pxe",
 	DEVT_NET,
 	pxe_init,
-	pxe_strategy, 
-	pxe_open, 
-	pxe_close, 
+	pxe_strategy,
+	pxe_open,
+	pxe_close,
 	noioctl,
 	pxe_print,
 	pxe_cleanup
 };
 
 /*
  * This function is called by the loader to enable PXE support if we
- * are booted by PXE.  The passed in pointer is a pointer to the
- * PXENV+ structure.
+ * are booted by PXE. The passed in pointer is a pointer to the PXENV+
+ * structure.
  */
 void
 pxe_enable(void *pxeinfo)
 {
 	pxenv_p  = (pxenv_t *)pxeinfo;
 	pxe_p    = (pxe_t *)PTOV(pxenv_p->PXEPtr.segment * 16 +
 				 pxenv_p->PXEPtr.offset);
 	pxe_call = NULL;
 }
 
-/* 
+/*
  * return true if pxe structures are found/initialized,
- * also figures out our IP information via the pxe cached info struct 
+ * also figures out our IP information via the pxe cached info struct
  */
 static int
 pxe_init(void)
 {
-	t_PXENV_GET_CACHED_INFO	*gci_p;
-	int	counter;
+	t_PXENV_GET_CACHED_INFO *gci_p;
+	int counter;
 	uint8_t checksum;
 	uint8_t *checkptr;
-	
-	if(pxenv_p == NULL)
+
+	if (pxenv_p == NULL)
 		return (0);
 
-	/*  look for "PXENV+" */
+	/* look for "PXENV+" */
 	if (bcmp((void *)pxenv_p->Signature, S_SIZE("PXENV+"))) {
 		pxenv_p = NULL;
 		return (0);
 	}
 
 	/* make sure the size is something we can handle */
 	if (pxenv_p->Length > sizeof(*pxenv_p)) {
-	  	printf("PXENV+ structure too large, ignoring\n");
+		printf("PXENV+ structure too large, ignoring\n");
 		pxenv_p = NULL;
 		return (0);
 	}
-	    
-	/* 
+
+	/*
 	 * do byte checksum:
 	 * add up each byte in the structure, the total should be 0
 	 */
-	checksum = 0;	
+	checksum = 0;
 	checkptr = (uint8_t *) pxenv_p;
 	for (counter = 0; counter < pxenv_p->Length; counter++)
 		checksum += *checkptr++;
 	if (checksum != 0) {
 		printf("PXENV+ structure failed checksum, ignoring\n");
 		pxenv_p = NULL;
 		return (0);
 	}
 
-	
 	/*
 	 * PXENV+ passed, so use that if !PXE is not available or
 	 * the checksum fails.
 	 */
 	pxe_call = pxenv_call;
 	if (pxenv_p->Version >= 0x0200) {
 		for (;;) {
 			if (bcmp((void *)pxe_p->Signature, S_SIZE("!PXE"))) {
 				pxe_p = NULL;
 				break;
 			}
 			checksum = 0;
 			checkptr = (uint8_t *)pxe_p;
 			for (counter = 0; counter < pxe_p->StructLength;
-			     counter++)
+			    counter++)
 				checksum += *checkptr++;
 			if (checksum != 0) {
 				pxe_p = NULL;
 				break;
 			}
 			pxe_call = bangpxe_call;
 			break;
 		}
 	}
 	
 	printf("\nPXE version %d.%d, real mode entry point ",
-	       (uint8_t) (pxenv_p->Version >> 8),
-	       (uint8_t) (pxenv_p->Version & 0xFF));
+	    (uint8_t) (pxenv_p->Version >> 8),
+	    (uint8_t) (pxenv_p->Version & 0xFF));
 	if (pxe_call == bangpxe_call)
 		printf("@%04x:%04x\n",
-		       pxe_p->EntryPointSP.segment,
-		       pxe_p->EntryPointSP.offset);
+		    pxe_p->EntryPointSP.segment,
+		    pxe_p->EntryPointSP.offset);
 	else
 		printf("@%04x:%04x\n",
-		       pxenv_p->RMEntry.segment, pxenv_p->RMEntry.offset);
+		    pxenv_p->RMEntry.segment, pxenv_p->RMEntry.offset);
 
 	gci_p = (t_PXENV_GET_CACHED_INFO *) scratch_buffer;
 	bzero(gci_p, sizeof(*gci_p));
-	gci_p->PacketType =  PXENV_PACKET_TYPE_BINL_REPLY;
+	gci_p->PacketType = PXENV_PACKET_TYPE_BINL_REPLY;
 	pxe_call(PXENV_GET_CACHED_INFO);
 	if (gci_p->Status != 0) {
 		pxe_perror(gci_p->Status);
 		pxe_p = NULL;
 		return (0);
 	}
 	bcopy(PTOV((gci_p->Buffer.segment << 4) + gci_p->Buffer.offset),
-	      &bootplayer, gci_p->BufferSize);
+	    &bootplayer, gci_p->BufferSize);
 	return (1);
 }
 
 
 static int
 pxe_strategy(void *devdata, int flag, daddr_t dblk, size_t size,
     char *buf, size_t *rsize)
 {
 	return (EIO);
 }
 
 static int
 pxe_open(struct open_file *f, ...)
 {
-    va_list args;
-    char *devname;		/* Device part of file name (or NULL). */
-    char temp[FNAME_SIZE];
-    int error = 0;
-    int i;
-	
-    va_start(args, f);
-    devname = va_arg(args, char*);
-    va_end(args);
+	va_list args;
+	char *devname;		/* Device part of file name (or NULL). */
+	char temp[FNAME_SIZE];
+	int error = 0;
+	int i;
 
-    /* On first open, do netif open, mount, etc. */
-    if (pxe_opens == 0) {
-	/* Find network interface. */
-	if (pxe_sock < 0) {
-	    pxe_sock = netif_open(devname);
-	    if (pxe_sock < 0) {
-		printf("pxe_open: netif_open() failed\n");
-		return (ENXIO);
-	    }
-	    if (pxe_debug)
-		printf("pxe_open: netif_open() succeeded\n");
-	}
-	if (rootip.s_addr == 0) {
-		/*
-		 * Do a bootp/dhcp request to find out where our
-		 * NFS/TFTP server is.  Even if we dont get back
-		 * the proper information, fall back to the server
-		 * which brought us to life and a default rootpath.
-		 */
-		bootp(pxe_sock, BOOTP_PXE);
-		if (rootip.s_addr == 0)
-			rootip.s_addr = bootplayer.sip;
+	va_start(args, f);
+	devname = va_arg(args, char*);
+	va_end(args);
 
-		netproto = NET_NFS;
-		if (tftpip.s_addr != 0) {
-			netproto = NET_TFTP;
-			rootip.s_addr = tftpip.s_addr;
-		}
+	/* On first open, do netif open, mount, etc. */
+	if (pxe_opens == 0) {
+		/* Find network interface. */
+		if (pxe_sock < 0) {
+			pxe_sock = netif_open(devname);
+			if (pxe_sock < 0) {
+				printf("pxe_open: netif_open() failed\n");
+				return (ENXIO);
+			}
+			if (pxe_debug)
+				printf("pxe_open: netif_open() succeeded\n");
 
-		if (netproto == NET_NFS && !rootpath[0])
-			strcpy(rootpath, PXENFSROOTPATH);
+			if (socktodesc(pxe_sock) == NULL) {
+				printf("pxe_open: bad socket %d\n", pxe_sock);
+				return (ENXIO);
+			}
 
-		for (i = 0; rootpath[i] != '\0' && i < FNAME_SIZE; i++)
-			if (rootpath[i] == ':')
-				break;
-		if (i && i != FNAME_SIZE && rootpath[i] == ':') {
-			rootpath[i++] = '\0';
-			if (inet_addr(&rootpath[0]) != INADDR_NONE)
-				rootip.s_addr = inet_addr(&rootpath[0]);
-			bcopy(&rootpath[i], &temp[0], strlen(&rootpath[i])+1);
-			bcopy(&temp[0], &rootpath[0], strlen(&rootpath[i])+1);
 		}
-		setenv("boot.netif.ip", inet_ntoa(myip), 1);
-		setenv("boot.netif.netmask", intoa(netmask), 1);
-		setenv("boot.netif.gateway", inet_ntoa(gateip), 1);
-		setenv("boot.netif.server", inet_ntoa(rootip), 1);
-		if (bootplayer.Hardware == ETHER_TYPE) {
-		    sprintf(temp, "%6D", bootplayer.CAddr, ":");
-		    setenv("boot.netif.hwaddr", temp, 1);
-		}
-		if (intf_mtu != 0) {
-			char mtu[16];
-			sprintf(mtu, "%u", intf_mtu);
-			setenv("boot.netif.mtu", mtu, 1);
-		}
-		printf("pxe_open: server addr: %s\n", inet_ntoa(rootip));
-		printf("pxe_open: server path: %s\n", rootpath);
-		printf("pxe_open: gateway ip:  %s\n", inet_ntoa(gateip));
+		if (rootip.s_addr == 0) {
+			/*
+			 * Try to extract the RFC1048 data from PXE.
+			 * If fail do a bootp/dhcp request to find out where our
+			 * NFS/TFTP server is. Even if we dont get back
+			 * the proper information, fall back to the server
+			 * which brought us to life and a default rootpath.
+			 */
 
-		if (netproto == NET_TFTP) {
-			setenv("boot.tftproot.server", inet_ntoa(rootip), 1);
-			setenv("boot.tftproot.path", rootpath, 1);
-		} else if (netproto == NET_NFS) {
-			setenv("boot.nfsroot.server", inet_ntoa(rootip), 1);
-			setenv("boot.nfsroot.path", rootpath, 1);
-		}
-		setenv("dhcp.host-name", hostname, 1);
+			if (dhcp_try_rfc1048(bootplayer.vendor.d, BOOTP_DHCPVEND) < 0) {
+				if (pxe_debug)
+					printf("pxe_open: no RFC1048 data in PXE Cache\n");
+				bootp(pxe_sock, BOOTP_PXE);
+			} else if (pxe_debug) {
+				printf("pxe_open: loaded RFC1048 data from PXE Cache\n");
+			}
 
-		setenv("pxeboot.ip", inet_ntoa(myip), 1);
-		if (bootplayer.Hardware == ETHER_TYPE) {
-		    sprintf(temp, "%6D", bootplayer.CAddr, ":");
-		    setenv("pxeboot.hwaddr", temp, 1);
+#ifdef LOADER_TFTP_SUPPORT
+			bootp(pxe_sock, BOOTP_PXE);
+#endif
+			if (rootip.s_addr == 0)
+				rootip.s_addr = bootplayer.sip;
+			if (gateip.s_addr == 0)
+				gateip.s_addr = bootplayer.gip;
+			if (myip.s_addr == 0)
+				myip.s_addr = bootplayer.yip;
+			if (servip.s_addr == 0)
+				servip = rootip;
+
+			netproto = NET_NFS;
+			if (tftpip.s_addr != 0) {
+				netproto = NET_TFTP;
+				rootip.s_addr = tftpip.s_addr;
+			}
+
+			if (netproto == NET_NFS && !rootpath[0])
+				strcpy(rootpath, PXENFSROOTPATH);
+
+			for (i = 0; rootpath[i] != '\0' && i < FNAME_SIZE; i++)
+				if (rootpath[i] == ':')
+					break;
+			if (i && i != FNAME_SIZE && rootpath[i] == ':') {
+				rootpath[i++] = '\0';
+				if (inet_addr(&rootpath[0]) != INADDR_NONE)
+					rootip.s_addr = inet_addr(&rootpath[0]);
+				bcopy(&rootpath[i], &temp[0], strlen(&rootpath[i]) + 1);
+				bcopy(&temp[0], &rootpath[0], strlen(&rootpath[i]) + 1);
+			}
+			setenv("boot.netif.ip", inet_ntoa(myip), 1);
+			setenv("boot.netif.netmask", intoa(netmask), 1);
+			setenv("boot.netif.gateway", inet_ntoa(gateip), 1);
+			setenv("boot.netif.server", inet_ntoa(rootip), 1);
+			if (bootplayer.Hardware == ETHER_TYPE) {
+				sprintf(temp, "%6D", bootplayer.CAddr, ":");
+				setenv("boot.netif.hwaddr", temp, 1);
+			}
+			if (intf_mtu != 0) {
+				char mtu[16];
+				snprintf(sizeof(mtu), mtu, "%u", intf_mtu);
+				setenv("boot.netif.mtu", mtu, 1);
+			}
+			printf("pxe_open: server addr: %s\n", inet_ntoa(rootip));
+			printf("pxe_open: server path: %s\n", rootpath);
+			printf("pxe_open: gateway ip:  %s\n", inet_ntoa(gateip));
+			printf("pxe_open: my ip:       %s\n", inet_ntoa(myip));
+			printf("pxe_open: netmask:     %s\n", intoa(netmask));
+			printf("pxe_open: servip:      %s\n", inet_ntoa(servip));
+
+			if (netproto == NET_TFTP) {
+				setenv("boot.tftproot.server", inet_ntoa(rootip), 1);
+				setenv("boot.tftproot.path", rootpath, 1);
+			} else if (netproto == NET_NFS) {
+				setenv("boot.nfsroot.server", inet_ntoa(rootip), 1);
+				setenv("boot.nfsroot.path", rootpath, 1);
+			}
+			setenv("dhcp.host-name", hostname, 1);
+
+			setenv("pxeboot.ip", inet_ntoa(myip), 1);
+			if (bootplayer.Hardware == ETHER_TYPE) {
+				sprintf(temp, "%6D", bootplayer.CAddr, ":");
+				setenv("pxeboot.hwaddr", temp, 1);
+			}
 		}
 	}
-    }
-    pxe_opens++;
-    f->f_devdata = &pxe_sock;
-    return (error);
+	pxe_opens++;
+	f->f_devdata = &pxe_sock;
+	return (error);
 }
 
 static int
 pxe_close(struct open_file *f)
 {
 
 #ifdef	PXE_DEBUG
-    if (pxe_debug)
-	printf("pxe_close: opens=%d\n", pxe_opens);
+	if (pxe_debug)
+		printf("pxe_close: opens=%d\n", pxe_opens);
 #endif
 
-    /* On last close, do netif close, etc. */
-    f->f_devdata = NULL;
-    /* Extra close call? */
-    if (pxe_opens <= 0)
-	return (0);
-    pxe_opens--;
-    /* Not last close? */
-    if (pxe_opens > 0)
-	return(0);
+	/* On last close, do netif close, etc. */
+	f->f_devdata = NULL;
+	/* Extra close call? */
+	if (pxe_opens <= 0)
+		return (0);
+	pxe_opens--;
+	/* Not last close? */
+	if (pxe_opens > 0)
+		return (0);
 
-    if (netproto == NET_NFS) {
-	/* get an NFS filehandle for our root filesystem */
-	pxe_setnfshandle(rootpath);
-    }
+	if (netproto == NET_NFS) {
+		/* get an NFS filehandle for our root filesystem */
+		pxe_setnfshandle(rootpath);
+	}
 
-    if (pxe_sock >= 0) {
+	if (pxe_sock >= 0) {
 
 #ifdef PXE_DEBUG
 	if (pxe_debug)
-	    printf("pxe_close: calling netif_close()\n");
+		printf("pxe_close: calling netif_close()\n");
 #endif
 	netif_close(pxe_sock);
 	pxe_sock = -1;
-    }
-    return (0);
+	}
+	return (0);
 }
 
 static int
 pxe_print(int verbose)
 {
 	char line[255];
 	if (pxe_call == NULL)
 		return (0);
 
 	printf("%s devices:", pxedisk.dv_name);
 	if (pager_output("\n") != 0)
 		return (1);
 	if (verbose) {
 		snprintf(line, sizeof(line), "    pxe0:    %s:%s\n",
 		    inet_ntoa(rootip), rootpath);
 	} else {
 		snprintf(line, sizeof(line), "    pxe0:\n");
 	}
 	return (pager_output(line));
 }
 
 static void
 pxe_cleanup(void)
 {
 #ifdef PXE_DEBUG
 	t_PXENV_UNLOAD_STACK *unload_stack_p =
-	    (t_PXENV_UNLOAD_STACK *)scratch_buffer;
+		(t_PXENV_UNLOAD_STACK *)scratch_buffer;
 	t_PXENV_UNDI_SHUTDOWN *undi_shutdown_p =
-	    (t_PXENV_UNDI_SHUTDOWN *)scratch_buffer;
+		(t_PXENV_UNDI_SHUTDOWN *)scratch_buffer;
 #endif
 
 	if (pxe_call == NULL)
 		return;
 
 	pxe_call(PXENV_UNDI_SHUTDOWN);
 
 #ifdef PXE_DEBUG
 	if (pxe_debug && undi_shutdown_p->Status != 0)
 		printf("pxe_cleanup: UNDI_SHUTDOWN failed %x\n",
-		       undi_shutdown_p->Status);
+		    undi_shutdown_p->Status);
 #endif
 
 	pxe_call(PXENV_UNLOAD_STACK);
 
-#ifdef PXE_DEBUG	
+#ifdef PXE_DEBUG
 	if (pxe_debug && unload_stack_p->Status != 0)
 		printf("pxe_cleanup: UNLOAD_STACK failed %x\n",
 		    unload_stack_p->Status);
 #endif
 }
 
 void
 pxe_perror(int err)
 {
 	return;
 }
 
 /*
  * Reach inside the libstand NFS code and dig out an NFS handle
  * for the root filesystem.
  */
-#ifdef OLD_NFSV2
-struct nfs_iodesc {
-	struct	iodesc	*iodesc;
-	off_t	off;
-	u_char	fh[NFS_FHSIZE];
-	/* structure truncated here */
-};
-extern struct	nfs_iodesc nfs_root_node;
-extern int      rpc_port;
-
-static void
-pxe_rpcmountcall()
-{
-	struct	iodesc *d;
-	int     error;
-
-	if (!(d = socktodesc(pxe_sock)))
-		return;
-        d->myport = htons(--rpc_port);
-        d->destip = rootip;
-	if ((error = nfs_getrootfh(d, rootpath, nfs_root_node.fh)) != 0) 
-		printf("NFS MOUNT RPC error: %d\n", error);
-	nfs_root_node.iodesc = d;
-}
-
-static void
-pxe_setnfshandle(char *rootpath)
-{
-	int	i;
-	u_char	*fh;
-	char	buf[2 * NFS_FHSIZE + 3], *cp;
-
-	/*
-	 * If NFS files were never opened, we need to do mount call
-	 * ourselves. Use nfs_root_node.iodesc as flag indicating
-	 * previous NFS usage.
-	 */
-	if (nfs_root_node.iodesc == NULL)
-		pxe_rpcmountcall();
-
-	fh = &nfs_root_node.fh[0];
-	buf[0] = 'X';
-	cp = &buf[1];
-	for (i = 0; i < NFS_FHSIZE; i++, cp += 2)
-		sprintf(cp, "%02x", fh[i]);
-	sprintf(cp, "X");
-	setenv("boot.nfsroot.nfshandle", buf, 1);
-}
-#else	/* !OLD_NFSV2 */
-
 #define	NFS_V3MAXFHSIZE		64
 
 struct nfs_iodesc {
 	struct iodesc *iodesc;
 	off_t off;
 	uint32_t fhsize;
 	u_char fh[NFS_V3MAXFHSIZE];
 	/* structure truncated */
 };
 extern struct nfs_iodesc nfs_root_node;
 extern int rpc_port;
 
 static void
 pxe_rpcmountcall()
 {
 	struct iodesc *d;
 	int error;
 
 	if (!(d = socktodesc(pxe_sock)))
 		return;
-        d->myport = htons(--rpc_port);
-        d->destip = rootip;
+	d->myport = htons(--rpc_port);
+	d->destip = rootip;
 	if ((error = nfs_getrootfh(d, rootpath, &nfs_root_node.fhsize,
-	    nfs_root_node.fh)) != 0) {
+		nfs_root_node.fh)) != 0) {
 		printf("NFS MOUNT RPC error: %d\n", error);
 		nfs_root_node.fhsize = 0;
 	}
 	nfs_root_node.iodesc = d;
 }
 
 static void
 pxe_setnfshandle(char *rootpath)
 {
 	int i;
 	u_char *fh;
 	char buf[2 * NFS_V3MAXFHSIZE + 3], *cp;
 
 	/*
 	 * If NFS files were never opened, we need to do mount call
 	 * ourselves. Use nfs_root_node.iodesc as flag indicating
 	 * previous NFS usage.
 	 */
 	if (nfs_root_node.iodesc == NULL)
 		pxe_rpcmountcall();
 
 	fh = &nfs_root_node.fh[0];
 	buf[0] = 'X';
 	cp = &buf[1];
 	for (i = 0; i < nfs_root_node.fhsize; i++, cp += 2)
 		sprintf(cp, "%02x", fh[i]);
 	sprintf(cp, "X");
 	setenv("boot.nfsroot.nfshandle", buf, 1);
 	sprintf(buf, "%d", nfs_root_node.fhsize);
 	setenv("boot.nfsroot.nfshandlelen", buf, 1);
 }
-#endif	/* OLD_NFSV2 */
 
 void
 pxenv_call(int func)
 {
 #ifdef PXE_DEBUG
 	if (pxe_debug)
 		printf("pxenv_call %x\n", func);
 #endif
 	
 	bzero(&v86, sizeof(v86));
 	bzero(data_buffer, sizeof(data_buffer));
 
 	__pxenvseg = pxenv_p->RMEntry.segment;
 	__pxenvoff = pxenv_p->RMEntry.offset;
 	
 	v86.ctl  = V86_ADDR | V86_CALLF | V86_FLAGS;
 	v86.es   = VTOPSEG(scratch_buffer);
 	v86.edi  = VTOPOFF(scratch_buffer);
 	v86.addr = (VTOPSEG(__pxenventry) << 16) | VTOPOFF(__pxenventry);
 	v86.ebx  = func;
 	v86int();
 	v86.ctl  = V86_FLAGS;
 }
 
 void
 bangpxe_call(int func)
 {
 #ifdef PXE_DEBUG
 	if (pxe_debug)
 		printf("bangpxe_call %x\n", func);
 #endif
-	
+
 	bzero(&v86, sizeof(v86));
 	bzero(data_buffer, sizeof(data_buffer));
 
 	__bangpxeseg = pxe_p->EntryPointSP.segment;
 	__bangpxeoff = pxe_p->EntryPointSP.offset;
-	
+
 	v86.ctl  = V86_ADDR | V86_CALLF | V86_FLAGS;
 	v86.edx  = VTOPSEG(scratch_buffer);
 	v86.eax  = VTOPOFF(scratch_buffer);
 	v86.addr = (VTOPSEG(__bangpxeentry) << 16) | VTOPOFF(__bangpxeentry);
 	v86.ebx  = func;
 	v86int();
 	v86.ctl  = V86_FLAGS;
 }
 
 
-time_t
-getsecs(void)
-{
-	time_t n = 0;
-	time(&n);
-	return n;
-}
-
 static int
 pxe_netif_match(struct netif *nif, void *machdep_hint)
 {
 	return 1;
 }
 
-
 static int
 pxe_netif_probe(struct netif *nif, void *machdep_hint)
 {
 	t_PXENV_UDP_OPEN *udpopen_p = (t_PXENV_UDP_OPEN *)scratch_buffer;
 
 	if (pxe_call == NULL)
 		return -1;
 
 	bzero(udpopen_p, sizeof(*udpopen_p));
 	udpopen_p->src_ip = bootplayer.yip;
 	pxe_call(PXENV_UDP_OPEN);
 
 	if (udpopen_p->status != 0) {
 		printf("pxe_netif_probe: failed %x\n", udpopen_p->status);
 		return -1;
 	}
 	return 0;
 }
 
 static void
 pxe_netif_end(struct netif *nif)
 {
 	t_PXENV_UDP_CLOSE *udpclose_p = (t_PXENV_UDP_CLOSE *)scratch_buffer;
 	bzero(udpclose_p, sizeof(*udpclose_p));
 
 	pxe_call(PXENV_UDP_CLOSE);
 	if (udpclose_p->status != 0)
 		printf("pxe_end failed %x\n", udpclose_p->status);
 }
 
 static void
 pxe_netif_init(struct iodesc *desc, void *machdep_hint)
 {
 	int i;
 	for (i = 0; i < 6; ++i)
 		desc->myea[i] = bootplayer.CAddr[i];
 	desc->xid = bootplayer.ident;
 }
 
 static int
 pxe_netif_get(struct iodesc *desc, void *pkt, size_t len, time_t timeout)
 {
 	return len;
 }
 
 static int
 pxe_netif_put(struct iodesc *desc, void *pkt, size_t len)
 {
 	return len;
 }
 
 ssize_t
 sendudp(struct iodesc *h, void *pkt, size_t len)
 {
 	t_PXENV_UDP_WRITE *udpwrite_p = (t_PXENV_UDP_WRITE *)scratch_buffer;
 	bzero(udpwrite_p, sizeof(*udpwrite_p));
-	
+
 	udpwrite_p->ip             = h->destip.s_addr;
 	udpwrite_p->dst_port       = h->destport;
 	udpwrite_p->src_port       = h->myport;
 	udpwrite_p->buffer_size    = len;
 	udpwrite_p->buffer.segment = VTOPSEG(pkt);
 	udpwrite_p->buffer.offset  = VTOPOFF(pkt);
 
 	if (netmask == 0 || SAMENET(myip, h->destip, netmask))
 		udpwrite_p->gw = 0;
 	else
 		udpwrite_p->gw = gateip.s_addr;
 
 	pxe_call(PXENV_UDP_WRITE);
 
 #if 0
 	/* XXX - I dont know why we need this. */
 	delay(1000);
 #endif
 	if (udpwrite_p->status != 0) {
-		/* XXX: This happens a lot.  It shouldn't. */
+		/* XXX: This happens a lot. It shouldn't. */
 		if (udpwrite_p->status != 1)
 			printf("sendudp failed %x\n", udpwrite_p->status);
 		return -1;
 	}
 	return len;
 }
 
 ssize_t
 readudp(struct iodesc *h, void *pkt, size_t len, time_t timeout)
 {
 	t_PXENV_UDP_READ *udpread_p = (t_PXENV_UDP_READ *)scratch_buffer;
 	struct udphdr *uh = NULL;
-	
+
 	uh = (struct udphdr *) pkt - 1;
 	bzero(udpread_p, sizeof(*udpread_p));
-	
+
 	udpread_p->dest_ip        = h->myip.s_addr;
 	udpread_p->d_port         = h->myport;
 	udpread_p->buffer_size    = len;
 	udpread_p->buffer.segment = VTOPSEG(data_buffer);
 	udpread_p->buffer.offset  = VTOPOFF(data_buffer);
 
 	pxe_call(PXENV_UDP_READ);
 
 #if 0
 	/* XXX - I dont know why we need this. */
 	delay(1000);
 #endif
 	if (udpread_p->status != 0) {
-		/* XXX: This happens a lot.  It shouldn't. */
+		/* XXX: This happens a lot. It shouldn't. */
 		if (udpread_p->status != 1)
 			printf("readudp failed %x\n", udpread_p->status);
 		return -1;
 	}
 	bcopy(data_buffer, pkt, udpread_p->buffer_size);
 	uh->uh_sport = udpread_p->s_port;
 	return udpread_p->buffer_size;
 }
Index: stable/11/sys/boot/i386/libi386/pxe.h
===================================================================
--- stable/11/sys/boot/i386/libi386/pxe.h	(revision 329098)
+++ stable/11/sys/boot/i386/libi386/pxe.h	(revision 329099)
@@ -1,513 +1,513 @@
 /*
  * Copyright (c) 2000 Alfred Perlstein <alfred@freebsd.org>
  * All rights reserved.
  * Copyright (c) 2000 Paul Saab <ps@freebsd.org>
  * All rights reserved.
  * Copyright (c) 2000 John Baldwin <jhb@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * The typedefs and structures declared in this file
  * clearly violate style(9), the reason for this is to conform to the
  * typedefs/structure-names used in the Intel literature to avoid confusion.
  *
  * It's for your own good. :)
  */
 
 /* It seems that intel didn't think about ABI,
  * either that or 16bit ABI != 32bit ABI (which seems reasonable)
  * I have to thank Intel for the hair loss I incurred trying to figure
  * out why PXE was mis-reading structures I was passing it (at least
  * from my point of view)
  *
  * Solution: use gcc's '__packed' to correctly align
  * structures passed into PXE
  * Question: does this really work for PXE's expected ABI?
  */
 #define	PACKED		__packed
 
 #define	S_SIZE(s)	s, sizeof(s) - 1
 
 #define	PXENFSROOTPATH	"/pxeroot"
 
 typedef struct {
 	uint16_t		offset;
 	uint16_t		segment;
 } SEGOFF16_t;
 
 typedef struct {
 	uint16_t		Seg_Addr;
 	uint32_t		Phy_Addr;
 	uint16_t		Seg_Size;
 } SEGDESC_t;
 
 typedef	uint16_t		SEGSEL_t;
 typedef	uint16_t		PXENV_STATUS_t;
 typedef	uint32_t		IP4_t;
 typedef	uint32_t		ADDR32_t;
 typedef	uint16_t		UDP_PORT_t;
 
 #define	MAC_ADDR_LEN		16
 typedef	uint8_t			MAC_ADDR[MAC_ADDR_LEN];
 
 /* PXENV+ */
 typedef struct {
 	uint8_t		Signature[6];	/* 'PXENV+' */
 	uint16_t	Version;	/* MSB = major, LSB = minor */
 	uint8_t		Length;		/* structure length */
 	uint8_t		Checksum;	/* checksum pad */
 	SEGOFF16_t	RMEntry;	/* SEG:OFF to PXE entry point */
 	/* don't use PMOffset and PMSelector (from the 2.1 PXE manual) */
 	uint32_t	PMOffset;	/* Protected mode entry */
 	SEGSEL_t	PMSelector;	/* Protected mode selector */
 	SEGSEL_t	StackSeg;	/* Stack segment address */
 	uint16_t	StackSize;	/* Stack segment size (bytes) */
 	SEGSEL_t	BC_CodeSeg;	/* BC Code segment address */
 	uint16_t	BC_CodeSize;	/* BC Code segment size (bytes) */
 	SEGSEL_t	BC_DataSeg;	/* BC Data segment address */
 	uint16_t	BC_DataSize;	/* BC Data segment size (bytes) */
 	SEGSEL_t	UNDIDataSeg;	/* UNDI Data segment address */
 	uint16_t	UNDIDataSize;	/* UNDI Data segment size (bytes) */
 	SEGSEL_t	UNDICodeSeg;	/* UNDI Code segment address */
 	uint16_t	UNDICodeSize;	/* UNDI Code segment size (bytes) */
 	SEGOFF16_t	PXEPtr;		/* SEG:OFF to !PXE struct, 
 					   only present when Version > 2.1 */
 } PACKED pxenv_t;
 
 /* !PXE */
 typedef struct {
 	uint8_t		Signature[4];
 	uint8_t		StructLength;
 	uint8_t		StructCksum;
 	uint8_t		StructRev;
 	uint8_t		reserved_1;
 	SEGOFF16_t	UNDIROMID;
 	SEGOFF16_t	BaseROMID;
 	SEGOFF16_t	EntryPointSP;
 	SEGOFF16_t	EntryPointESP;
 	SEGOFF16_t	StatusCallout;
 	uint8_t		reserved_2;
 	uint8_t		SegDescCn;
 	SEGSEL_t	FirstSelector;
 	SEGDESC_t	Stack;
 	SEGDESC_t	UNDIData;
 	SEGDESC_t	UNDICode;
 	SEGDESC_t	UNDICodeWrite;
 	SEGDESC_t	BC_Data;
 	SEGDESC_t	BC_Code;
 	SEGDESC_t	BC_CodeWrite;
 } PACKED pxe_t;
 
 #define	PXENV_START_UNDI		0x0000
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint16_t	ax;
 	uint16_t	bx;
 	uint16_t	dx;
 	uint16_t	di;
 	uint16_t	es;
 } PACKED t_PXENV_START_UNDI;
 
 #define	PXENV_UNDI_STARTUP		0x0001
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_UNDI_STARTUP;
 
 #define	PXENV_UNDI_CLEANUP		0x0002
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_UNDI_CLEANUP;
 
 #define	PXENV_UNDI_INITIALIZE		0x0003
 typedef struct {
 	PXENV_STATUS_t	Status;
 	ADDR32_t	ProtocolIni;	/* Phys addr of a copy of the driver module */
 	uint8_t		reserved[8];
 } PACKED t_PXENV_UNDI_INITALIZE;
 
 
 #define	MAXNUM_MCADDR		8
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint16_t	MCastAddrCount;
 	MAC_ADDR	McastAddr[MAXNUM_MCADDR];
 } PACKED t_PXENV_UNDI_MCAST_ADDRESS;
 
 #define	PXENV_UNDI_RESET_ADAPTER	0x0004		
 typedef struct {
 	PXENV_STATUS_t	Status;
 	t_PXENV_UNDI_MCAST_ADDRESS R_Mcast_Buf;
 } PACKED t_PXENV_UNDI_RESET;
 
 #define	PXENV_UNDI_SHUTDOWN		0x0005
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_UNDI_SHUTDOWN;
 
 #define	PXENV_UNDI_OPEN			0x0006
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint16_t	OpenFlag;
 	uint16_t	PktFilter;
 #	define FLTR_DIRECTED	0x0001
 #	define FLTR_BRDCST	0x0002
-#	define FLTR_PRMSCS	0x0003
-#	define FLTR_SRC_RTG	0x0004
+#	define FLTR_PRMSCS	0x0004
+#	define FLTR_SRC_RTG	0x0008
 
 	t_PXENV_UNDI_MCAST_ADDRESS R_Mcast_Buf;
 } PACKED t_PXENV_UNDI_OPEN;
 
 #define	PXENV_UNDI_CLOSE		0x0007
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_UNDI_CLOSE;
 
 #define	PXENV_UNDI_TRANSMIT		0x0008
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint8_t		Protocol;
 #	define P_UNKNOWN	0
 #	define P_IP		1
 #	define P_ARP		2
 #	define P_RARP		3
 
 	uint8_t		XmitFlag;
 #	define XMT_DESTADDR	0x0000
 #	define XMT_BROADCAST	0x0001
 
 	SEGOFF16_t	DestAddr;
 	SEGOFF16_t	TBD;
 	uint32_t	Reserved[2];
 } PACKED t_PXENV_UNDI_TRANSMIT;
 
 #define	MAX_DATA_BLKS		8
 typedef struct {
 	uint16_t	ImmedLength;
 	SEGOFF16_t	Xmit;
 	uint16_t	DataBlkCount;
 	struct	DataBlk {
 		uint8_t		TDPtrType;
 		uint8_t		TDRsvdByte;
 		uint16_t	TDDataLen;
 		SEGOFF16_t	TDDataPtr;
 	} DataBlock[MAX_DATA_BLKS];
 } PACKED t_PXENV_UNDI_TBD;
 
 #define	PXENV_UNDI_SET_MCAST_ADDRESS	0x0009
 typedef struct {
 	PXENV_STATUS_t	Status;
 	t_PXENV_UNDI_MCAST_ADDRESS R_Mcast_Buf;
 } PACKED t_PXENV_UNDI_SET_MCAST_ADDR;
 
 #define	PXENV_UNDI_SET_STATION_ADDRESS	0x000A
 typedef struct {
 	PXENV_STATUS_t	Status;
 	MAC_ADDR	StationAddress;		/* Temp MAC address to use */
 } PACKED t_PXENV_UNDI_SET_STATION_ADDR;
 
 #define	PXENV_UNDI_SET_PACKET_FILTER	0x000B
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint8_t		filter;			/* see UNDI_OPEN (0x0006) */
 } PACKED t_PXENV_UNDI_SET_PACKET_FILTER;
 
 #define	PXENV_UNDI_GET_INFORMATION	0x000C
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint16_t	BaseIo;			/* Adapter base I/O address */
 	uint16_t	IntNumber;		/* Adapter IRQ number */
 	uint16_t	MaxTranUnit;		/* Adapter maximum transmit unit */
 	uint16_t	HwType;			/* Type of protocol at the hardware addr */
 #	define ETHER_TYPE	1
 #	define EXP_ETHER_TYPE	2
 #	define IEEE_TYPE	6
 #	define ARCNET_TYPE	7
 
 	uint16_t	HwAddrLen;		/* Length of hardware address */
 	MAC_ADDR	CurrentNodeAddress;	/* Current hardware address */
 	MAC_ADDR	PermNodeAddress;	/* Permanent hardware address */
 	SEGSEL_t	ROMAddress;		/* Real mode ROM segment address */
 	uint16_t	RxBufCt;		/* Receive queue length */
 	uint16_t	TxBufCt;		/* Transmit queue length */
 } PACKED t_PXENV_UNDI_GET_INFORMATION;
 
 #define	PXENV_UNDI_GET_STATISTICS	0x000D
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint32_t	XmitGoodFrames;		/* Number of successful transmissions */
 	uint32_t	RcvGoodFrames;		/* Number of good frames received */
 	uint32_t	RcvCRCErrors;		/* Number of frames with CRC errors */
 	uint32_t	RcvResourceErrors;	/* Number of frames dropped */
 } PACKED t_PXENV_UNDI_GET_STATISTICS;
 
 #define	PXENV_UNDI_CLEAR_STATISTICS	0x000E
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_UNDI_CLEAR_STATISTICS;
 
 #define	PXENV_UNDI_INITIATE_DIAGS	0x000F
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_UNDI_INITIATE_DIAGS;
 
 #define	PXENV_UNDI_FORCE_INTERRUPT	0x0010
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_UNDI_FORCE_INTERRUPT;
 
 #define	PXENV_UNDI_GET_MCAST_ADDRESS	0x0011
 typedef struct {
 	PXENV_STATUS_t	Status;
 	IP4_t		InetAddr;		/* IP mulicast address */
 	MAC_ADDR	MediaAddr;		/* MAC multicast address */
 } PACKED t_PXENV_UNDI_GET_MCAST_ADDR;
 
 #define	PXENV_UNDI_GET_NIC_TYPE		0x0012
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint8_t		NicType;		/* Type of NIC */
 #	define PCI_NIC		2
 #	define PnP_NIC		3
 #	define CardBus_NIC	4
 
 	union {
 		struct {
 			uint16_t	Vendor_ID;
 			uint16_t	Dev_ID;
 			uint8_t		Base_Class;
 			uint8_t		Sub_Class;
 			uint8_t		Prog_Intf;
 			uint8_t		Rev;
 			uint16_t	BusDevFunc;
 			uint16_t	SubVendor_ID;
 			uint16_t	SubDevice_ID;
 		} pci, cardbus;
 		struct {
 			uint32_t	EISA_Dev_ID;
 			uint8_t		Base_Class;
 			uint8_t		Sub_Class;
 			uint8_t		Prog_Intf;
 			uint16_t	CardSelNum;
 		} pnp;
 	} info;
 } PACKED t_PXENV_UNDI_GET_NIC_TYPE;
 
 #define	PXENV_UNDI_GET_IFACE_INFO	0x0013
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint8_t		IfaceType[16];		/* Name of MAC type in ASCII. */
 	uint32_t	LinkSpeed;		/* Defined in NDIS 2.0 spec */
 	uint32_t	ServiceFlags;		/* Defined in NDIS 2.0 spec */
 	uint32_t	Reserved[4];		/* must be 0 */
 } PACKED t_PXENV_UNDI_GET_NDIS_INFO;
 
 #define	PXENV_UNDI_ISR			0x0014
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint16_t	FuncFlag;		/* PXENV_UNDI_ISR_OUT_xxx */
 	uint16_t	BufferLength;		/* Length of Frame */
 	uint16_t	FrameLength;		/* Total length of receiver frame */
 	uint16_t	FrameHeaderLength;	/* Length of the media header in Frame */
 	SEGOFF16_t	Frame;			/* receive buffer */
 	uint8_t		ProtType;		/* Protocol type */
 	uint8_t		PktType;		/* Packet Type */
 #	define PXENV_UNDI_ISR_IN_START		1
 #	define PXENV_UNDI_ISR_IN_PROCESS	2
 #	define PXENV_UNDI_ISR_IN_GET_NEXT	3
 
 	/* one of these will be returned for PXENV_UNDI_ISR_IN_START */
 #	define PXENV_UNDI_ISR_OUT_OURS		0
 #	define PXENV_UNDI_ISR_OUT_NOT_OUTS	1
 
 	/*
 	 * one of these will bre returned for PXEND_UNDI_ISR_IN_PROCESS
 	 * and PXENV_UNDI_ISR_IN_GET_NEXT
 	 */
 #	define PXENV_UNDI_ISR_OUT_DONE		0
 #	define PXENV_UNDI_ISR_OUT_TRANSMIT	2
 #	define PXENV_UNDI_ISR_OUT_RECIEVE	3
 #	define PXENV_UNDI_ISR_OUT_BUSY		4
 } PACKED t_PXENV_UNDI_ISR;
 
 #define	PXENV_STOP_UNDI			0x0015
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_STOP_UNDI;
 
 #define	PXENV_TFTP_OPEN			0x0020
 typedef struct {
 	PXENV_STATUS_t	Status;
 	IP4_t		ServerIPAddress;
 	IP4_t		GatewayIPAddress;
 	uint8_t		FileName[128];
 	UDP_PORT_t	TFTPPort;
 	uint16_t	PacketSize;
 } PACKED t_PXENV_TFTP_OPEN;
 
 #define	PXENV_TFTP_CLOSE		0x0021
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_TFTP_CLOSE;
 
 #define	PXENV_TFTP_READ			0x0022
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint16_t	PacketNumber;
 	uint16_t	BufferSize;
 	SEGOFF16_t	Buffer;
 } PACKED t_PXENV_TFTP_READ;
 
 #define	PXENV_TFTP_READ_FILE		0x0023
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint8_t		FileName[128];
 	uint32_t	BufferSize;
 	ADDR32_t	Buffer;
 	IP4_t		ServerIPAddress;
 	IP4_t		GatewayIPAdress;
 	IP4_t		McastIPAdress;
 	UDP_PORT_t	TFTPClntPort;
 	UDP_PORT_t	TFTPSrvPort;
 	uint16_t	TFTPOpenTimeOut;
 	uint16_t	TFTPReopenDelay;
 } PACKED t_PXENV_TFTP_READ_FILE;
 
 #define	PXENV_TFTP_GET_FSIZE		0x0025
 typedef struct {
 	PXENV_STATUS_t	Status;
 	IP4_t		ServerIPAddress;
 	IP4_t		GatewayIPAdress;
 	uint8_t		FileName[128];
 	uint32_t	FileSize;
 } PACKED t_PXENV_TFTP_GET_FSIZE;
 
 #define	PXENV_UDP_OPEN			0x0030
 typedef struct {
 	PXENV_STATUS_t	status;
 	IP4_t		src_ip;		/* IP address of this station */
 } PACKED t_PXENV_UDP_OPEN;
 
 #define	PXENV_UDP_CLOSE			0x0031
 typedef struct {
 	PXENV_STATUS_t	status;
 } PACKED t_PXENV_UDP_CLOSE;
 
 #define	PXENV_UDP_READ			0x0032
 typedef struct {
 	PXENV_STATUS_t	status;
 	IP4_t		src_ip;		/* IP of sender */
 	IP4_t		dest_ip;	/* Only accept packets sent to this IP */
 	UDP_PORT_t	s_port;		/* UDP source port of sender */
 	UDP_PORT_t	d_port;		/* Only accept packets sent to this port */
 	uint16_t	buffer_size;	/* Size of the packet buffer */
 	SEGOFF16_t	buffer;		/* SEG:OFF to the packet buffer */
 } PACKED t_PXENV_UDP_READ;
 
 #define	PXENV_UDP_WRITE			0x0033
 typedef struct {
 	PXENV_STATUS_t	status;
 	IP4_t		ip;		/* dest ip addr */
 	IP4_t		gw;		/* ip gateway */
 	UDP_PORT_t	src_port;	/* source udp port */
 	UDP_PORT_t	dst_port;	/* destination udp port */
 	uint16_t	buffer_size;	/* Size of the packet buffer */
 	SEGOFF16_t	buffer;		/* SEG:OFF to the packet buffer */
 } PACKED t_PXENV_UDP_WRITE;
 
 #define	PXENV_UNLOAD_STACK		0x0070
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint8_t		reserved[10];
 } PACKED t_PXENV_UNLOAD_STACK;
 
 
 #define	PXENV_GET_CACHED_INFO		0x0071
 typedef struct {
 	PXENV_STATUS_t	Status;
 	uint16_t	PacketType;	/* type (defined right here) */
 #	define PXENV_PACKET_TYPE_DHCP_DISCOVER  1
 #	define PXENV_PACKET_TYPE_DHCP_ACK       2
 #	define PXENV_PACKET_TYPE_BINL_REPLY     3
 	uint16_t	BufferSize;	/* max to copy, leave at 0 for pointer */
 	SEGOFF16_t	Buffer;		/* copy to, leave at 0 for pointer */
 	uint16_t	BufferLimit;	/* max size of buffer in BC dataseg ? */
 } PACKED t_PXENV_GET_CACHED_INFO;
 
 
 /* structure filled in by PXENV_GET_CACHED_INFO 
  * (how we determine which IP we downloaded the initial bootstrap from)
  * words can't describe...
  */
 typedef struct {
 	uint8_t		opcode;
 #	define BOOTP_REQ	1
 #	define BOOTP_REP	2
 	uint8_t		Hardware;	/* hardware type */
 	uint8_t		Hardlen;	/* hardware addr len */
 	uint8_t		Gatehops;	/* zero it */
 	uint32_t	ident;		/* random number chosen by client */
 	uint16_t	seconds;	/* seconds since did initial bootstrap */
 	uint16_t	Flags;		/* seconds since did initial bootstrap */
 #	define BOOTP_BCAST	0x8000		/* ? */
 	IP4_t		cip;		/* Client IP */
 	IP4_t		yip;		/* Your IP */
 	IP4_t		sip;		/* IP to use for next boot stage */
 	IP4_t		gip;		/* Relay IP ? */
 	MAC_ADDR	CAddr;		/* Client hardware address */
 	uint8_t		Sname[64];	/* Server's hostname (Optional) */
 	uint8_t		bootfile[128];	/* boot filename */
 	union {
 #		if 1
 #		define BOOTP_DHCPVEND  1024    /* DHCP extended vendor field size */
 #		else
 #		define BOOTP_DHCPVEND  312	/* DHCP standard vendor field size */
 #		endif
 		uint8_t		d[BOOTP_DHCPVEND];	/* raw array of vendor/dhcp options */
 		struct {
 			uint8_t		magic[4];	/* DHCP magic cookie */
 #			ifndef		VM_RFC1048
 #			define		VM_RFC1048	0x63825363L	/* ? */
 #			endif
 			uint32_t	flags;		/* bootp flags/opcodes */
 			uint8_t		pad[56];	/* I don't think intel knows what a
 							   union does... */
 		} v;
 	} vendor;
 } PACKED BOOTPLAYER;
 
 #define	PXENV_RESTART_TFTP		0x0073
 #define	t_PXENV_RESTART_TFTP		t_PXENV_TFTP_READ_FILE
 
 #define	PXENV_START_BASE		0x0075
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_START_BASE;
 
 #define	PXENV_STOP_BASE			0x0076
 typedef struct {
 	PXENV_STATUS_t	Status;
 } PACKED t_PXENV_STOP_BASE;
Index: stable/11/sys/boot/i386/libi386/time.c
===================================================================
--- stable/11/sys/boot/i386/libi386/time.c	(revision 329098)
+++ stable/11/sys/boot/i386/libi386/time.c	(revision 329099)
@@ -1,109 +1,118 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 #include <btxv86.h>
 #include "bootstrap.h"
 #include "libi386.h"
 
+time_t		getsecs(void);
 static int	bios_seconds(void);
 
 /*
  * Return the BIOS time-of-day value.
  *
  * XXX uses undocumented BCD support from libstand.
  */
 static int
 bios_seconds(void)
 {
     int			hr, minute, sec;
     
     v86.ctl = 0;
     v86.addr = 0x1a;		/* int 0x1a, function 2 */
     v86.eax = 0x0200;
     v86int();
 
     hr = bcd2bin((v86.ecx & 0xff00) >> 8);	/* hour in %ch */
     minute = bcd2bin(v86.ecx & 0xff);		/* minute in %cl */
     sec = bcd2bin((v86.edx & 0xff00) >> 8);	/* second in %dh */
     
     return (hr * 3600 + minute * 60 + sec);
 }
 
 /*
  * Return the time in seconds since the beginning of the day.
  *
  * Some BIOSes (notably qemu) don't correctly read the RTC
  * registers in an atomic way, sometimes returning bogus values.
  * Therefore we "debounce" the reading by accepting it only when
  * we got 8 identical values in succession.
  *
  * If we pass midnight, don't wrap back to 0.
  */
 time_t
 time(time_t *t)
 {
     static time_t lasttime;
     time_t now, check;
     int same, try;
 
     same = try = 0;
     check = bios_seconds();
     do {
 	now = check;
 	check = bios_seconds();
 	if (check != now)
 	    same = 0;
     } while (++same < 8 && ++try < 1000);
 
     if (now < lasttime)
 	now += 24 * 3600;
     lasttime = now;
     
     if (t != NULL)
 	*t = now;
     return(now);
+}
+
+time_t
+getsecs(void)
+{
+	time_t n = 0;
+	time(&n);
+	return n;
 }
 
 /*
  * Use the BIOS Wait function to pause for (period) microseconds.
  *
  * Resolution of this function is variable, but typically around
  * 1ms.
  */
 void
 delay(int period)
 {
     v86.ctl = 0;
     v86.addr = 0x15;		/* int 0x15, function 0x86 */
     v86.eax = 0x8600;
     v86.ecx = period >> 16;
     v86.edx = period & 0xffff;
     v86int();
 }
Index: stable/11/sys/boot/i386/loader/Makefile
===================================================================
--- stable/11/sys/boot/i386/loader/Makefile	(revision 329098)
+++ stable/11/sys/boot/i386/loader/Makefile	(revision 329099)
@@ -1,133 +1,134 @@
 # $FreeBSD$
 
 .include <src.opts.mk>
 MK_SSP=		no
 
 LOADER?=	loader
 PROG=		${LOADER}.sym
 MAN=	
 INTERNALPROG=
 NEWVERSWHAT?=	"bootstrap loader" x86
 VERSION_FILE=	${.CURDIR}/../loader/version
 
 # architecture-specific loader code
 SRCS=		main.c conf.c vers.c
 
 # Put LOADER_FIREWIRE_SUPPORT=yes in /etc/make.conf for FireWire/dcons support
 .if defined(LOADER_FIREWIRE_SUPPORT)
 CFLAGS+=	-DLOADER_FIREWIRE_SUPPORT
 LIBFIREWIRE=	${.OBJDIR}/../libfirewire/libfirewire.a
 .endif
 
 # Set by zfsloader Makefile
 .if defined(LOADER_ZFS_SUPPORT)
 CFLAGS+=	-DLOADER_ZFS_SUPPORT
 LIBZFSBOOT=	${.OBJDIR}/../../zfs/libzfsboot.a
 .endif
 
 # Enable PXE TFTP or NFS support, not both.
 .if defined(LOADER_TFTP_SUPPORT)
 CFLAGS+=	-DLOADER_TFTP_SUPPORT
 .else
 CFLAGS+=	-DLOADER_NFS_SUPPORT
 .endif
 
 # Include bcache code.
 HAVE_BCACHE=	yes
 
 # Enable PnP and ISA-PnP code.
 HAVE_PNP=	yes
 HAVE_ISABUS=	yes
 
 .if ${MK_FORTH} != "no"
 # Enable BootForth
 BOOT_FORTH=	yes
 CFLAGS+=	-DBOOT_FORTH -I${.CURDIR}/../../ficl -I${.CURDIR}/../../ficl/i386
 .if ${MACHINE_CPUARCH} == "amd64"
 LIBFICL=	${.OBJDIR}/../../ficl32/libficl.a
 .else
 LIBFICL=	${.OBJDIR}/../../ficl/libficl.a
 .endif
 .endif
 
 .if defined(LOADER_BZIP2_SUPPORT)
 CFLAGS+=	-DLOADER_BZIP2_SUPPORT
 .endif
 .if !defined(LOADER_NO_GZIP_SUPPORT)
 CFLAGS+=	-DLOADER_GZIP_SUPPORT
 .endif
 .if defined(LOADER_NANDFS_SUPPORT)
 CFLAGS+=	-DLOADER_NANDFS_SUPPORT
 .endif
 .if !defined(LOADER_NO_GELI_SUPPORT)
 CFLAGS+=	-DLOADER_GELI_SUPPORT
+CFLAGS+=	-I${.CURDIR}/../../geli
 LIBGELIBOOT=	${.OBJDIR}/../../geli/libgeliboot.a
 .PATH:		${.CURDIR}/../../../opencrypto
 SRCS+=		xform_aes_xts.c
 CFLAGS+=	-I${.CURDIR}/../../.. -D_STAND
 .endif
 
 # Always add MI sources
 .PATH:		${.CURDIR}/../../common
 .include	"${.CURDIR}/../../common/Makefile.inc"
 CFLAGS+=	-I${.CURDIR}/../../common
 CFLAGS+=	-I.
 
 CLEANFILES=	${LOADER} ${LOADER}.bin loader.help
 
 CFLAGS+=	-Wall
 LDFLAGS=	-static -Ttext 0x0
 
 # i386 standalone support library
 LIBI386=	${.OBJDIR}/../libi386/libi386.a
 CFLAGS+=	-I${.CURDIR}/..
 
 LIBSTAND=	${.OBJDIR}/../../libstand32/libstand.a
 
 # BTX components
 CFLAGS+=	-I${.CURDIR}/../btx/lib
 
 # Debug me!
 #CFLAGS+=	-g
 #LDFLAGS+=	-g
 
 # Pick up ../Makefile.inc early.
 .include <bsd.init.mk>
 
 ${LOADER}: ${LOADER}.bin ${BTXLDR} ${BTXKERN}
 	btxld -v -f aout -e ${LOADER_ADDRESS} -o ${.TARGET} -l ${BTXLDR} \
 		-b ${BTXKERN} ${LOADER}.bin
 
 ${LOADER}.bin: ${LOADER}.sym
 	strip -R .comment -R .note -o ${.TARGET} ${.ALLSRC}
 
 loader.help: help.common help.i386
 	cat ${.ALLSRC} | awk -f ${.CURDIR}/../../common/merge_help.awk > ${.TARGET}
 
 FILES=	${LOADER}
 # XXX INSTALLFLAGS_loader= -b
 FILESMODE_${LOADER}= ${BINMODE} -b
 
 .if !defined(LOADER_ONLY)
 .PATH: ${.CURDIR}/../../forth
 .include	"${.CURDIR}/../../forth/Makefile.inc"
 FILES+=	pcibios.4th
 
 FILES+=	loader.rc menu.rc
 .endif
 
 # XXX crt0.o needs to be first for pxeboot(8) to work
 OBJS=	${BTXCRT}
 
 DPADD=	${LIBFICL} ${LIBFIREWIRE} ${LIBZFSBOOT} ${LIBI386} ${LIBGELIBOOT} ${LIBSTAND}
 LDADD=	${LIBFICL} ${LIBFIREWIRE} ${LIBZFSBOOT} ${LIBI386} ${LIBGELIBOOT} ${LIBSTAND}
 
 .include <bsd.prog.mk>
 
 .if ${MACHINE_CPUARCH} == "amd64"
 beforedepend ${OBJS}: machine
 CLEANFILES+=	machine
 CFLAGS+=	-DLOADER_PREFER_AMD64
 machine: .NOMETA
 	ln -sf ${.CURDIR}/../../../i386/include machine
 .endif
Index: stable/11/sys/boot/i386/loader/main.c
===================================================================
--- stable/11/sys/boot/i386/loader/main.c	(revision 329098)
+++ stable/11/sys/boot/i386/loader/main.c	(revision 329099)
@@ -1,457 +1,466 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * MD bootstrap main() and assorted miscellaneous
  * commands.
  */
 
 #include <stand.h>
 #include <stddef.h>
 #include <string.h>
 #include <machine/bootinfo.h>
 #include <machine/cpufunc.h>
 #include <machine/psl.h>
 #include <sys/reboot.h>
+#include <common/drv.h>
 
 #include "bootstrap.h"
 #include "common/bootargs.h"
 #include "libi386/libi386.h"
 #include "libi386/smbios.h"
 #include "btxv86.h"
 
 #ifdef LOADER_ZFS_SUPPORT
 #include "../zfs/libzfs.h"
 #endif
 
 CTASSERT(sizeof(struct bootargs) == BOOTARGS_SIZE);
 CTASSERT(offsetof(struct bootargs, bootinfo) == BA_BOOTINFO);
 CTASSERT(offsetof(struct bootargs, bootflags) == BA_BOOTFLAGS);
 CTASSERT(offsetof(struct bootinfo, bi_size) == BI_SIZE);
 
 /* Arguments passed in from the boot1/boot2 loader */
 static struct bootargs *kargs;
 
 static u_int32_t	initial_howto;
 static u_int32_t	initial_bootdev;
 static struct bootinfo	*initial_bootinfo;
 
 struct arch_switch	archsw;		/* MI/MD interface boundary */
 
 static void		extract_currdev(void);
 static int		isa_inb(int port);
 static void		isa_outb(int port, int value);
 void			exit(int code);
 #ifdef LOADER_GELI_SUPPORT
+#include "geliboot.h"
 struct geli_boot_args	*gargs;
 #endif
 #ifdef LOADER_ZFS_SUPPORT
 struct zfs_boot_args	*zargs;
 static void		i386_zfs_probe(void);
 #endif
 
 /* from vers.c */
 extern	char bootprog_info[];
 
 /* XXX debugging */
 extern char end[];
 
 static void *heap_top;
 static void *heap_bottom;
 
 int
 main(void)
 {
     int			i;
 
     /* Pick up arguments */
     kargs = (void *)__args;
     initial_howto = kargs->howto;
     initial_bootdev = kargs->bootdev;
     initial_bootinfo = kargs->bootinfo ? (struct bootinfo *)PTOV(kargs->bootinfo) : NULL;
 
     /* Initialize the v86 register set to a known-good state. */
     bzero(&v86, sizeof(v86));
     v86.efl = PSL_RESERVED_DEFAULT | PSL_I;
 
     /* 
      * Initialise the heap as early as possible.  Once this is done, malloc() is usable.
      */
     bios_getmem();
 
 #if defined(LOADER_BZIP2_SUPPORT) || defined(LOADER_FIREWIRE_SUPPORT) || \
     defined(LOADER_GPT_SUPPORT) || defined(LOADER_ZFS_SUPPORT)
     if (high_heap_size > 0) {
 	heap_top = PTOV(high_heap_base + high_heap_size);
 	heap_bottom = PTOV(high_heap_base);
 	if (high_heap_base < memtop_copyin)
 	    memtop_copyin = high_heap_base;
     } else
 #endif
     {
 	heap_top = (void *)PTOV(bios_basemem);
 	heap_bottom = (void *)end;
     }
     setheap(heap_bottom, heap_top);
 
     /*
      * XXX Chicken-and-egg problem; we want to have console output early, but some
      * console attributes may depend on reading from eg. the boot device, which we
      * can't do yet.
      *
      * We can use printf() etc. once this is done.
      * If the previous boot stage has requested a serial console, prefer that.
      */
     bi_setboothowto(initial_howto);
     if (initial_howto & RB_MULTIPLE) {
 	if (initial_howto & RB_SERIAL)
 	    setenv("console", "comconsole vidconsole", 1);
 	else
 	    setenv("console", "vidconsole comconsole", 1);
     } else if (initial_howto & RB_SERIAL)
 	setenv("console", "comconsole", 1);
     else if (initial_howto & RB_MUTE)
 	setenv("console", "nullconsole", 1);
     cons_probe();
 
     /*
      * Initialise the block cache. Set the upper limit.
      */
     bcache_init(32768, 512);
 
     /*
      * Special handling for PXE and CD booting.
      */
     if (kargs->bootinfo == 0) {
 	/*
 	 * We only want the PXE disk to try to init itself in the below
 	 * walk through devsw if we actually booted off of PXE.
 	 */
 	if (kargs->bootflags & KARGS_FLAGS_PXE)
 	    pxe_enable(kargs->pxeinfo ? PTOV(kargs->pxeinfo) : NULL);
 	else if (kargs->bootflags & KARGS_FLAGS_CD)
 	    bc_add(initial_bootdev);
     }
 
     archsw.arch_autoload = i386_autoload;
     archsw.arch_getdev = i386_getdev;
     archsw.arch_copyin = i386_copyin;
     archsw.arch_copyout = i386_copyout;
     archsw.arch_readin = i386_readin;
     archsw.arch_isainb = isa_inb;
     archsw.arch_isaoutb = isa_outb;
 #ifdef LOADER_ZFS_SUPPORT
     archsw.arch_zfs_probe = i386_zfs_probe;
 
 #ifdef LOADER_GELI_SUPPORT
     if ((kargs->bootflags & KARGS_FLAGS_EXTARG) != 0) {
 	zargs = (struct zfs_boot_args *)(kargs + 1);
 	if (zargs != NULL && zargs->size >= offsetof(struct zfs_boot_args, gelipw)) {
+	    if (zargs->size >= offsetof(struct zfs_boot_args, keybuf_sentinel) &&
+	      zargs->keybuf_sentinel == KEYBUF_SENTINEL) {
+		geli_save_keybuf(zargs->keybuf);
+	    }
 	    if (zargs->gelipw[0] != '\0') {
 		setenv("kern.geom.eli.passphrase", zargs->gelipw, 1);
-		bzero(zargs->gelipw, sizeof(zargs->gelipw));
+		explicit_bzero(zargs->gelipw, sizeof(zargs->gelipw));
 	    }
 	}
     }
 #endif /* LOADER_GELI_SUPPORT */
 #else /* !LOADER_ZFS_SUPPORT */
 #ifdef LOADER_GELI_SUPPORT
     if ((kargs->bootflags & KARGS_FLAGS_EXTARG) != 0) {
 	gargs = (struct geli_boot_args *)(kargs + 1);
 	if (gargs != NULL && gargs->size >= offsetof(struct geli_boot_args, gelipw)) {
+	    if (gargs->keybuf_sentinel == KEYBUF_SENTINEL) {
+		geli_save_keybuf(gargs->keybuf);
+	    }
 	    if (gargs->gelipw[0] != '\0') {
 		setenv("kern.geom.eli.passphrase", gargs->gelipw, 1);
-		bzero(gargs->gelipw, sizeof(gargs->gelipw));
+		explicit_bzero(gargs->gelipw, sizeof(gargs->gelipw));
 	    }
 	}
     }
 #endif /* LOADER_GELI_SUPPORT */
 #endif /* LOADER_ZFS_SUPPORT */
 
     /*
      * March through the device switch probing for things.
      */
     for (i = 0; devsw[i] != NULL; i++)
 	if (devsw[i]->dv_init != NULL)
 	    (devsw[i]->dv_init)();
     printf("BIOS %dkB/%dkB available memory\n", bios_basemem / 1024, bios_extmem / 1024);
     if (initial_bootinfo != NULL) {
 	initial_bootinfo->bi_basemem = bios_basemem / 1024;
 	initial_bootinfo->bi_extmem = bios_extmem / 1024;
     }
 
     /* detect ACPI for future reference */
     biosacpi_detect();
 
     /* detect SMBIOS for future reference */
     smbios_detect(NULL);
 
     /* detect PCI BIOS for future reference */
     biospci_detect();
 
     printf("\n%s", bootprog_info);
 
     extract_currdev();				/* set $currdev and $loaddev */
     setenv("LINES", "24", 1);			/* optional */
     
     bios_getsmap();
 
     interact(NULL);
 
     /* if we ever get here, it is an error */
     return (1);
 }
 
 /*
  * Set the 'current device' by (if possible) recovering the boot device as 
  * supplied by the initial bootstrap.
  *
  * XXX should be extended for netbooting.
  */
 static void
 extract_currdev(void)
 {
     struct i386_devdesc		new_currdev;
 #ifdef LOADER_ZFS_SUPPORT
     char			buf[20];
 #endif
     int				biosdev = -1;
 
     /* Assume we are booting from a BIOS disk by default */
     new_currdev.d_dev = &biosdisk;
 
     /* new-style boot loaders such as pxeldr and cdldr */
     if (kargs->bootinfo == 0) {
         if ((kargs->bootflags & KARGS_FLAGS_CD) != 0) {
 	    /* we are booting from a CD with cdboot */
 	    new_currdev.d_dev = &bioscd;
 	    new_currdev.d_unit = bc_bios2unit(initial_bootdev);
 	} else if ((kargs->bootflags & KARGS_FLAGS_PXE) != 0) {
 	    /* we are booting from pxeldr */
 	    new_currdev.d_dev = &pxedisk;
 	    new_currdev.d_unit = 0;
 	} else {
 	    /* we don't know what our boot device is */
 	    new_currdev.d_kind.biosdisk.slice = -1;
 	    new_currdev.d_kind.biosdisk.partition = 0;
 	    biosdev = -1;
 	}
 #ifdef LOADER_ZFS_SUPPORT
     } else if ((kargs->bootflags & KARGS_FLAGS_ZFS) != 0) {
 	zargs = NULL;
 	/* check for new style extended argument */
 	if ((kargs->bootflags & KARGS_FLAGS_EXTARG) != 0)
 	    zargs = (struct zfs_boot_args *)(kargs + 1);
 
 	if (zargs != NULL &&
 	    zargs->size >= offsetof(struct zfs_boot_args, primary_pool)) {
 	    /* sufficient data is provided */
 	    new_currdev.d_kind.zfs.pool_guid = zargs->pool;
 	    new_currdev.d_kind.zfs.root_guid = zargs->root;
 	    if (zargs->size >= sizeof(*zargs) && zargs->primary_vdev != 0) {
 		sprintf(buf, "%llu", zargs->primary_pool);
 		setenv("vfs.zfs.boot.primary_pool", buf, 1);
 		sprintf(buf, "%llu", zargs->primary_vdev);
 		setenv("vfs.zfs.boot.primary_vdev", buf, 1);
 	    }
 	} else {
 	    /* old style zfsboot block */
 	    new_currdev.d_kind.zfs.pool_guid = kargs->zfspool;
 	    new_currdev.d_kind.zfs.root_guid = 0;
 	}
 	new_currdev.d_dev = &zfs_dev;
 #endif
     } else if ((initial_bootdev & B_MAGICMASK) != B_DEVMAGIC) {
 	/* The passed-in boot device is bad */
 	new_currdev.d_kind.biosdisk.slice = -1;
 	new_currdev.d_kind.biosdisk.partition = 0;
 	biosdev = -1;
     } else {
 	new_currdev.d_kind.biosdisk.slice = B_SLICE(initial_bootdev) - 1;
 	new_currdev.d_kind.biosdisk.partition = B_PARTITION(initial_bootdev);
 	biosdev = initial_bootinfo->bi_bios_dev;
 
 	/*
 	 * If we are booted by an old bootstrap, we have to guess at the BIOS
 	 * unit number.  We will lose if there is more than one disk type
 	 * and we are not booting from the lowest-numbered disk type 
 	 * (ie. SCSI when IDE also exists).
 	 */
 	if ((biosdev == 0) && (B_TYPE(initial_bootdev) != 2))	/* biosdev doesn't match major */
 	    biosdev = 0x80 + B_UNIT(initial_bootdev);		/* assume harddisk */
     }
     new_currdev.d_type = new_currdev.d_dev->dv_type;
 
     /*
      * If we are booting off of a BIOS disk and we didn't succeed in determining
      * which one we booted off of, just use disk0: as a reasonable default.
      */
     if ((new_currdev.d_type == biosdisk.dv_type) &&
 	((new_currdev.d_unit = bd_bios2unit(biosdev)) == -1)) {
 	printf("Can't work out which disk we are booting from.\n"
 	       "Guessed BIOS device 0x%x not found by probes, defaulting to disk0:\n", biosdev);
 	new_currdev.d_unit = 0;
     }
 
 #ifdef LOADER_ZFS_SUPPORT
     if (new_currdev.d_type == DEVT_ZFS)
 	init_zfs_bootenv(zfs_fmtdev(&new_currdev));
 #endif
 
     env_setenv("currdev", EV_VOLATILE, i386_fmtdev(&new_currdev),
 	       i386_setcurrdev, env_nounset);
     env_setenv("loaddev", EV_VOLATILE, i386_fmtdev(&new_currdev), env_noset,
 	       env_nounset);
 }
 
 COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot);
 
 static int
 command_reboot(int argc, char *argv[])
 {
     int i;
 
     for (i = 0; devsw[i] != NULL; ++i)
 	if (devsw[i]->dv_cleanup != NULL)
 	    (devsw[i]->dv_cleanup)();
 
     printf("Rebooting...\n");
     delay(1000000);
     __exit(0);
 }
 
 /* provide this for panic, as it's not in the startup code */
 void
 exit(int code)
 {
     __exit(code);
 }
 
 COMMAND_SET(heap, "heap", "show heap usage", command_heap);
 
 static int
 command_heap(int argc, char *argv[])
 {
     mallocstats();
     printf("heap base at %p, top at %p, upper limit at %p\n", heap_bottom,
       sbrk(0), heap_top);
     return(CMD_OK);
 }
 
 #ifdef LOADER_ZFS_SUPPORT
 COMMAND_SET(lszfs, "lszfs", "list child datasets of a zfs dataset",
     command_lszfs);
 
 static int
 command_lszfs(int argc, char *argv[])
 {
     int err;
 
     if (argc != 2) {
 	command_errmsg = "wrong number of arguments";
 	return (CMD_ERROR);
     }
 
     err = zfs_list(argv[1]);
     if (err != 0) {
 	command_errmsg = strerror(err);
 	return (CMD_ERROR);
     }
 
     return (CMD_OK);
 }
 
 COMMAND_SET(reloadbe, "reloadbe", "refresh the list of ZFS Boot Environments",
     command_reloadbe);
 
 static int
 command_reloadbe(int argc, char *argv[])
 {
     int err;
     char *root;
 
     if (argc > 2) {
 	command_errmsg = "wrong number of arguments";
 	return (CMD_ERROR);
     }
 
     if (argc == 2) {
 	err = zfs_bootenv(argv[1]);
     } else {
 	root = getenv("zfs_be_root");
 	if (root == NULL) {
 	    /* There does not appear to be a ZFS pool here, exit without error */
 	    return (CMD_OK);
 	}
 	err = zfs_bootenv(getenv("zfs_be_root"));
     }
 
     if (err != 0) {
 	command_errmsg = strerror(err);
 	return (CMD_ERROR);
     }
 
     return (CMD_OK);
 }
 #endif
 
 /* ISA bus access functions for PnP. */
 static int
 isa_inb(int port)
 {
 
     return (inb(port));
 }
 
 static void
 isa_outb(int port, int value)
 {
 
     outb(port, value);
 }
 
 #ifdef LOADER_ZFS_SUPPORT
 static void
 i386_zfs_probe(void)
 {
     char devname[32];
     int unit;
 
     /*
      * Open all the disks we can find and see if we can reconstruct
      * ZFS pools from them.
      */
     for (unit = 0; unit < MAXBDDEV; unit++) {
 	if (bd_unit2bios(unit) == -1)
 	    break;
 	sprintf(devname, "disk%d:", unit);
 	zfs_probe_dev(devname, NULL);
     }
 }
 #endif
Index: stable/11/sys/boot/i386/zfsboot/zfsboot.c
===================================================================
--- stable/11/sys/boot/i386/zfsboot/zfsboot.c	(revision 329098)
+++ stable/11/sys/boot/i386/zfsboot/zfsboot.c	(revision 329099)
@@ -1,1072 +1,1080 @@
 /*-
  * Copyright (c) 1998 Robert Nordier
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms are freely
  * permitted provided that the above copyright notice and this
  * paragraph and the following disclaimer are duplicated in all
  * such forms.
  *
  * This software is provided "AS IS" and without any express or
  * implied warranties, including, without limitation, the implied
  * warranties of merchantability and fitness for a particular
  * purpose.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/diskmbr.h>
 #ifdef GPT
 #include <sys/gpt.h>
 #endif
 #include <sys/reboot.h>
 #include <sys/queue.h>
 
 #include <machine/bootinfo.h>
 #include <machine/elf.h>
 #include <machine/pc/bios.h>
 
 #include <stdarg.h>
 #include <stddef.h>
 
 #include <a.out.h>
 
 #include <btxv86.h>
 
 #include "lib.h"
 #include "rbx.h"
 #include "drv.h"
 #include "util.h"
 #include "cons.h"
 #include "bootargs.h"
 #include "paths.h"
 
 #include "libzfs.h"
 
 #define ARGS			0x900
 #define NOPT			14
 #define NDEV			3
 
 #define BIOS_NUMDRIVES		0x475
 #define DRV_HARD		0x80
 #define DRV_MASK		0x7f
 
 #define TYPE_AD			0
 #define TYPE_DA			1
 #define TYPE_MAXHARD		TYPE_DA
 #define TYPE_FD			2
 
 #define DEV_GELIBOOT_BSIZE	4096
 
 extern uint32_t _end;
 
 #ifdef GPT
 static const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS;
 #endif
 static const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
 static const unsigned char flags[NOPT] = {
     RBX_DUAL,
     RBX_SERIAL,
     RBX_ASKNAME,
     RBX_CDROM,
     RBX_CONFIG,
     RBX_KDB,
     RBX_GDB,
     RBX_MUTE,
     RBX_NOINTR,
     RBX_PAUSE,
     RBX_QUIET,
     RBX_DFLTROOT,
     RBX_SINGLE,
     RBX_VERBOSE
 };
 uint32_t opts;
 
 static const unsigned char dev_maj[NDEV] = {30, 4, 2};
 
 static char cmd[512];
 static char cmddup[512];
 static char kname[1024];
 static char rootname[256];
 static int comspeed = SIOSPD;
 static struct bootinfo bootinfo;
 static uint32_t bootdev;
 static struct zfs_boot_args zfsargs;
 static struct zfsmount zfsmount;
 
 vm_offset_t	high_heap_base;
 uint32_t	bios_basemem, bios_extmem, high_heap_size;
 
 static struct bios_smap smap;
 
 /*
  * The minimum amount of memory to reserve in bios_extmem for the heap.
  */
 #define	HEAP_MIN		(64 * 1024 * 1024)
 
 static char *heap_next;
 static char *heap_end;
 
 /* Buffers that must not span a 64k boundary. */
 #define READ_BUF_SIZE		8192
 struct dmadat {
 	char rdbuf[READ_BUF_SIZE];	/* for reading large things */
 	char secbuf[READ_BUF_SIZE];	/* for MBR/disklabel */
 };
 static struct dmadat *dmadat;
 
 void exit(int);
 void reboot(void);
 static void load(void);
-static int parse(void);
+static int parse_cmd(void);
 static void bios_getmem(void);
 void *malloc(size_t n);
 void free(void *ptr);
 
 void *
 malloc(size_t n)
 {
 	char *p = heap_next;
 	if (p + n > heap_end) {
 		printf("malloc failure\n");
 		for (;;)
 		    ;
 		/* NOTREACHED */
 		return (0);
 	}
 	heap_next += n;
 	return (p);
 }
 
 void
 free(void *ptr)
 {
 
 	return;
 }
 
 static char *
 strdup(const char *s)
 {
 	char *p = malloc(strlen(s) + 1);
 	strcpy(p, s);
 	return (p);
 }
 
 #ifdef LOADER_GELI_SUPPORT
 #include "geliboot.c"
 static char gelipw[GELI_PW_MAXLEN];
+static struct keybuf *gelibuf;
 #endif
 
 #include "zfsimpl.c"
 
 /*
  * Read from a dnode (which must be from a ZPL filesystem).
  */
 static int
 zfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size)
 {
 	const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus;
 	size_t n;
 	int rc;
 
 	n = size;
 	if (*offp + n > zp->zp_size)
 		n = zp->zp_size - *offp;
 
 	rc = dnode_read(spa, dnode, *offp, start, n);
 	if (rc)
 		return (-1);
 	*offp += n;
 
 	return (n);
 }
 
 /*
  * Current ZFS pool
  */
 static spa_t *spa;
 static spa_t *primary_spa;
 static vdev_t *primary_vdev;
 
 /*
  * A wrapper for dskread that doesn't have to worry about whether the
  * buffer pointer crosses a 64k boundary.
  */
 static int
 vdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
 {
 	char *p;
 	daddr_t lba, alignlba;
 	off_t diff;
 	unsigned int nb, alignnb;
 	struct dsk *dsk = (struct dsk *) priv;
 
 	if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
 		return -1;
 
 	p = buf;
 	lba = off / DEV_BSIZE;
 	lba += dsk->start;
 	/*
 	 * Align reads to 4k else 4k sector GELIs will not decrypt.
 	 * Round LBA down to nearest multiple of DEV_GELIBOOT_BSIZE bytes.
 	 */
 	alignlba = rounddown2(off, DEV_GELIBOOT_BSIZE) / DEV_BSIZE;
 	/*
 	 * The read must be aligned to DEV_GELIBOOT_BSIZE bytes relative to the
 	 * start of the GELI partition, not the start of the actual disk.
 	 */
 	alignlba += dsk->start;
 	diff = (lba - alignlba) * DEV_BSIZE;
 
 	while (bytes > 0) {
 		nb = bytes / DEV_BSIZE;
 		/*
 		 * Ensure that the read size plus the leading offset does not
 		 * exceed the size of the read buffer.
 		 */
 		if (nb > (READ_BUF_SIZE - diff) / DEV_BSIZE)
 			nb = (READ_BUF_SIZE - diff) / DEV_BSIZE;
 		/*
 		 * Round the number of blocks to read up to the nearest multiple
 		 * of DEV_GELIBOOT_BSIZE.
 		 */
 		alignnb = roundup2(nb * DEV_BSIZE + diff, DEV_GELIBOOT_BSIZE)
 		    / DEV_BSIZE;
 
 		if (drvread(dsk, dmadat->rdbuf, alignlba, alignnb))
 			return -1;
 #ifdef LOADER_GELI_SUPPORT
 		/* decrypt */
 		if (is_geli(dsk) == 0) {
 			if (geli_read(dsk, ((alignlba - dsk->start) *
 			    DEV_BSIZE), dmadat->rdbuf, alignnb * DEV_BSIZE))
 				return (-1);
 		}
 #endif
 		memcpy(p, dmadat->rdbuf + diff, nb * DEV_BSIZE);
 		p += nb * DEV_BSIZE;
 		lba += nb;
 		alignlba += alignnb;
 		bytes -= nb * DEV_BSIZE;
 		/* Don't need the leading offset after the first block. */
 		diff = 0;
 	}
 
 	return 0;
 }
 
 static int
 vdev_write(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
 {
 	char *p;
 	daddr_t lba;
 	unsigned int nb;
 	struct dsk *dsk = (struct dsk *) priv;
 
 	if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
 		return -1;
 
 	p = buf;
 	lba = off / DEV_BSIZE;
 	lba += dsk->start;
 	while (bytes > 0) {
 		nb = bytes / DEV_BSIZE;
 		if (nb > READ_BUF_SIZE / DEV_BSIZE)
 			nb = READ_BUF_SIZE / DEV_BSIZE;
 		memcpy(dmadat->rdbuf, p, nb * DEV_BSIZE);
 		if (drvwrite(dsk, dmadat->rdbuf, lba, nb))
 			return -1;
 		p += nb * DEV_BSIZE;
 		lba += nb;
 		bytes -= nb * DEV_BSIZE;
 	}
 
 	return 0;
 }
 
 static int
 xfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte)
 {
     if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) {
 	printf("Invalid format\n");
 	return -1;
     }
     return 0;
 }
 
 /*
  * Read Pad2 (formerly "Boot Block Header") area of the first
  * vdev label of the given vdev.
  */
 static int
 vdev_read_pad2(vdev_t *vdev, char *buf, size_t size)
 {
 	blkptr_t bp;
 	char *tmp = zap_scratch;
 	off_t off = offsetof(vdev_label_t, vl_pad2);
 
 	if (size > VDEV_PAD_SIZE)
 		size = VDEV_PAD_SIZE;
 
 	BP_ZERO(&bp);
 	BP_SET_LSIZE(&bp, VDEV_PAD_SIZE);
 	BP_SET_PSIZE(&bp, VDEV_PAD_SIZE);
 	BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
 	BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
 	DVA_SET_OFFSET(BP_IDENTITY(&bp), off);
 	if (vdev_read_phys(vdev, &bp, tmp, off, 0))
 		return (EIO);
 	memcpy(buf, tmp, size);
 	return (0);
 }
 
 static int
 vdev_clear_pad2(vdev_t *vdev)
 {
 	char *zeroes = zap_scratch;
 	uint64_t *end;
 	off_t off = offsetof(vdev_label_t, vl_pad2);
 
 	memset(zeroes, 0, VDEV_PAD_SIZE);
 	end = (uint64_t *)(zeroes + VDEV_PAD_SIZE);
 	/* ZIO_CHECKSUM_LABEL magic and pre-calcualted checksum for all zeros */
 	end[-5] = 0x0210da7ab10c7a11;
 	end[-4] = 0x97f48f807f6e2a3f;
 	end[-3] = 0xaf909f1658aacefc;
 	end[-2] = 0xcbd1ea57ff6db48b;
 	end[-1] = 0x6ec692db0d465fab;
 	if (vdev_write(vdev, vdev->v_read_priv, off, zeroes, VDEV_PAD_SIZE))
 		return (EIO);
 	return (0);
 }
 
 static void
 bios_getmem(void)
 {
     uint64_t size;
 
     /* Parse system memory map */
     v86.ebx = 0;
     do {
 	v86.ctl = V86_FLAGS;
 	v86.addr = 0x15;		/* int 0x15 function 0xe820*/
 	v86.eax = 0xe820;
 	v86.ecx = sizeof(struct bios_smap);
 	v86.edx = SMAP_SIG;
 	v86.es = VTOPSEG(&smap);
 	v86.edi = VTOPOFF(&smap);
 	v86int();
 	if (V86_CY(v86.efl) || (v86.eax != SMAP_SIG))
 	    break;
 	/* look for a low-memory segment that's large enough */
 	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) &&
 	    (smap.length >= (512 * 1024)))
 	    bios_basemem = smap.length;
 	/* look for the first segment in 'extended' memory */
 	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0x100000)) {
 	    bios_extmem = smap.length;
 	}
 
 	/*
 	 * Look for the largest segment in 'extended' memory beyond
 	 * 1MB but below 4GB.
 	 */
 	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) &&
 	    (smap.base < 0x100000000ull)) {
 	    size = smap.length;
 
 	    /*
 	     * If this segment crosses the 4GB boundary, truncate it.
 	     */
 	    if (smap.base + size > 0x100000000ull)
 		size = 0x100000000ull - smap.base;
 
 	    if (size > high_heap_size) {
 		high_heap_size = size;
 		high_heap_base = smap.base;
 	    }
 	}
     } while (v86.ebx != 0);
 
     /* Fall back to the old compatibility function for base memory */
     if (bios_basemem == 0) {
 	v86.ctl = 0;
 	v86.addr = 0x12;		/* int 0x12 */
 	v86int();
 	
 	bios_basemem = (v86.eax & 0xffff) * 1024;
     }
 
     /* Fall back through several compatibility functions for extended memory */
     if (bios_extmem == 0) {
 	v86.ctl = V86_FLAGS;
 	v86.addr = 0x15;		/* int 0x15 function 0xe801*/
 	v86.eax = 0xe801;
 	v86int();
 	if (!V86_CY(v86.efl)) {
 	    bios_extmem = ((v86.ecx & 0xffff) + ((v86.edx & 0xffff) * 64)) * 1024;
 	}
     }
     if (bios_extmem == 0) {
 	v86.ctl = 0;
 	v86.addr = 0x15;		/* int 0x15 function 0x88*/
 	v86.eax = 0x8800;
 	v86int();
 	bios_extmem = (v86.eax & 0xffff) * 1024;
     }
 
     /*
      * If we have extended memory and did not find a suitable heap
      * region in the SMAP, use the last 3MB of 'extended' memory as a
      * high heap candidate.
      */
     if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) {
 	high_heap_size = HEAP_MIN;
 	high_heap_base = bios_extmem + 0x100000 - HEAP_MIN;
     }
 }
 
 /*
  * Try to detect a device supported by the legacy int13 BIOS
  */
 static int
 int13probe(int drive)
 {
     v86.ctl = V86_FLAGS;
     v86.addr = 0x13;
     v86.eax = 0x800;
     v86.edx = drive;
     v86int();
     
     if (!V86_CY(v86.efl) &&				/* carry clear */
 	((v86.edx & 0xff) != (drive & DRV_MASK))) {	/* unit # OK */
 	if ((v86.ecx & 0x3f) == 0) {			/* absurd sector size */
 		return(0);				/* skip device */
 	}
 	return (1);
     }
     return(0);
 }
 
 /*
  * We call this when we find a ZFS vdev - ZFS consumes the dsk
  * structure so we must make a new one.
  */
 static struct dsk *
 copy_dsk(struct dsk *dsk)
 {
     struct dsk *newdsk;
 
     newdsk = malloc(sizeof(struct dsk));
     *newdsk = *dsk;
     return (newdsk);
 }
 
 static void
 probe_drive(struct dsk *dsk)
 {
 #ifdef GPT
     struct gpt_hdr hdr;
     struct gpt_ent *ent;
     unsigned part, entries_per_sec;
     daddr_t slba;
 #endif
 #if defined(GPT) || defined(LOADER_GELI_SUPPORT)
     daddr_t elba;
 #endif
 
     struct dos_partition *dp;
     char *sec;
     unsigned i;
 
     /*
      * If we find a vdev on the whole disk, stop here.
      */
     if (vdev_probe(vdev_read, dsk, NULL) == 0)
 	return;
 
 #ifdef LOADER_GELI_SUPPORT
     /*
      * Taste the disk, if it is GELI encrypted, decrypt it and check to see if
      * it is a usable vdev then. Otherwise dig
      * out the partition table and probe each slice/partition
      * in turn for a vdev or GELI encrypted vdev.
      */
     elba = drvsize(dsk);
     if (elba > 0) {
 	elba--;
     }
     if (geli_taste(vdev_read, dsk, elba) == 0) {
-	if (geli_passphrase(&gelipw, dsk->unit, ':', 0, dsk) == 0) {
+	if (geli_havekey(dsk) == 0 || geli_passphrase(&gelipw, dsk->unit,
+	  ':', 0, dsk) == 0) {
 	    if (vdev_probe(vdev_read, dsk, NULL) == 0) {
 		return;
 	    }
 	}
     }
 #endif /* LOADER_GELI_SUPPORT */
 
     sec = dmadat->secbuf;
     dsk->start = 0;
 
 #ifdef GPT
     /*
      * First check for GPT.
      */
     if (drvread(dsk, sec, 1, 1)) {
 	return;
     }
     memcpy(&hdr, sec, sizeof(hdr));
     if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 ||
 	hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 ||
 	hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) {
 	goto trymbr;
     }
 
     /*
      * Probe all GPT partitions for the presence of ZFS pools. We
      * return the spa_t for the first we find (if requested). This
      * will have the effect of booting from the first pool on the
      * disk.
      *
      * If no vdev is found, GELI decrypting the device and try again
      */
     entries_per_sec = DEV_BSIZE / hdr.hdr_entsz;
     slba = hdr.hdr_lba_table;
     elba = slba + hdr.hdr_entries / entries_per_sec;
     while (slba < elba) {
 	dsk->start = 0;
 	if (drvread(dsk, sec, slba, 1))
 	    return;
 	for (part = 0; part < entries_per_sec; part++) {
 	    ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz);
 	    if (memcmp(&ent->ent_type, &freebsd_zfs_uuid,
 		     sizeof(uuid_t)) == 0) {
 		dsk->start = ent->ent_lba_start;
 		dsk->slice = part + 1;
 		dsk->part = 255;
 		if (vdev_probe(vdev_read, dsk, NULL) == 0) {
 		    /*
 		     * This slice had a vdev. We need a new dsk
 		     * structure now since the vdev now owns this one.
 		     */
 		    dsk = copy_dsk(dsk);
 		}
 #ifdef LOADER_GELI_SUPPORT
 		else if (geli_taste(vdev_read, dsk, ent->ent_lba_end -
 			 ent->ent_lba_start) == 0) {
-		    if (geli_passphrase(&gelipw, dsk->unit, 'p', dsk->slice, dsk) == 0) {
+		    if (geli_havekey(dsk) == 0 || geli_passphrase(&gelipw,
+		      dsk->unit, 'p', dsk->slice, dsk) == 0) {
 			/*
 			 * This slice has GELI, check it for ZFS.
 			 */
 			if (vdev_probe(vdev_read, dsk, NULL) == 0) {
 			    /*
 			     * This slice had a vdev. We need a new dsk
 			     * structure now since the vdev now owns this one.
 			     */
 			    dsk = copy_dsk(dsk);
 			}
 			break;
 		    }
 		}
 #endif /* LOADER_GELI_SUPPORT */
 	    }
 	}
 	slba++;
     }
     return;
 trymbr:
 #endif /* GPT */
 
     if (drvread(dsk, sec, DOSBBSECTOR, 1))
 	return;
     dp = (void *)(sec + DOSPARTOFF);
 
     for (i = 0; i < NDOSPART; i++) {
 	if (!dp[i].dp_typ)
 	    continue;
 	dsk->start = dp[i].dp_start;
 	dsk->slice = i + 1;
 	if (vdev_probe(vdev_read, dsk, NULL) == 0) {
 	    dsk = copy_dsk(dsk);
 	}
 #ifdef LOADER_GELI_SUPPORT
 	else if (geli_taste(vdev_read, dsk, dp[i].dp_size -
 		 dp[i].dp_start) == 0) {
-	    if (geli_passphrase(&gelipw, dsk->unit, 's', i, dsk) == 0) {
+	    if (geli_havekey(dsk) == 0 || geli_passphrase(&gelipw, dsk->unit,
+	      's', i, dsk) == 0) {
 		/*
 		 * This slice has GELI, check it for ZFS.
 		 */
 		if (vdev_probe(vdev_read, dsk, NULL) == 0) {
 		    /*
 		     * This slice had a vdev. We need a new dsk
 		     * structure now since the vdev now owns this one.
 		     */
 		    dsk = copy_dsk(dsk);
 		}
 		break;
 	    }
 	}
 #endif /* LOADER_GELI_SUPPORT */
     }
 }
 
 int
 main(void)
 {
     dnode_phys_t dn;
     off_t off;
     struct dsk *dsk;
     int autoboot, i;
     int nextboot;
     int rc;
 
     dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base);
 
     bios_getmem();
 
     if (high_heap_size > 0) {
 	heap_end = PTOV(high_heap_base + high_heap_size);
 	heap_next = PTOV(high_heap_base);
     } else {
 	heap_next = (char *)dmadat + sizeof(*dmadat);
 	heap_end = (char *)PTOV(bios_basemem);
     }
 
     dsk = malloc(sizeof(struct dsk));
     dsk->drive = *(uint8_t *)PTOV(ARGS);
     dsk->type = dsk->drive & DRV_HARD ? TYPE_AD : TYPE_FD;
     dsk->unit = dsk->drive & DRV_MASK;
     dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1;
     dsk->part = 0;
     dsk->start = 0;
     dsk->init = 0;
 
     bootinfo.bi_version = BOOTINFO_VERSION;
     bootinfo.bi_size = sizeof(bootinfo);
     bootinfo.bi_basemem = bios_basemem / 1024;
     bootinfo.bi_extmem = bios_extmem / 1024;
     bootinfo.bi_memsizes_valid++;
     bootinfo.bi_bios_dev = dsk->drive;
 
     bootdev = MAKEBOOTDEV(dev_maj[dsk->type],
 			  dsk->slice, dsk->unit, dsk->part);
 
     /* Process configuration file */
 
     autoboot = 1;
 
 #ifdef LOADER_GELI_SUPPORT
     geli_init();
 #endif
     zfs_init();
 
     /*
      * Probe the boot drive first - we will try to boot from whatever
      * pool we find on that drive.
      */
     probe_drive(dsk);
 
     /*
      * Probe the rest of the drives that the bios knows about. This
      * will find any other available pools and it may fill in missing
      * vdevs for the boot pool.
      */
 #ifndef VIRTUALBOX
     for (i = 0; i < *(unsigned char *)PTOV(BIOS_NUMDRIVES); i++)
 #else
     for (i = 0; i < MAXBDDEV; i++)
 #endif
     {
 	if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS))
 	    continue;
 
 	if (!int13probe(i | DRV_HARD))
 	    break;
 
 	dsk = malloc(sizeof(struct dsk));
 	dsk->drive = i | DRV_HARD;
 	dsk->type = dsk->drive & TYPE_AD;
 	dsk->unit = i;
 	dsk->slice = 0;
 	dsk->part = 0;
 	dsk->start = 0;
 	dsk->init = 0;
 	probe_drive(dsk);
     }
 
     /*
      * The first discovered pool, if any, is the pool.
      */
     spa = spa_get_primary();
     if (!spa) {
 	printf("%s: No ZFS pools located, can't boot\n", BOOTPROG);
 	for (;;)
 	    ;
     }
 
     primary_spa = spa;
     primary_vdev = spa_get_primary_vdev(spa);
 
     nextboot = 0;
     rc  = vdev_read_pad2(primary_vdev, cmd, sizeof(cmd));
     if (vdev_clear_pad2(primary_vdev))
 	printf("failed to clear pad2 area of primary vdev\n");
     if (rc == 0) {
 	if (*cmd) {
 	    /*
 	     * We could find an old-style ZFS Boot Block header here.
 	     * Simply ignore it.
 	     */
 	    if (*(uint64_t *)cmd != 0x2f5b007b10c) {
 		/*
 		 * Note that parse() is destructive to cmd[] and we also want
 		 * to honor RBX_QUIET option that could be present in cmd[].
 		 */
 		nextboot = 1;
 		memcpy(cmddup, cmd, sizeof(cmd));
-		if (parse()) {
+		if (parse_cmd()) {
 		    printf("failed to parse pad2 area of primary vdev\n");
 		    reboot();
 		}
 		if (!OPT_CHECK(RBX_QUIET))
 		    printf("zfs nextboot: %s\n", cmddup);
 	    }
 	    /* Do not process this command twice */
 	    *cmd = 0;
 	}
     } else
 	printf("failed to read pad2 area of primary vdev\n");
 
     /* Mount ZFS only if it's not already mounted via nextboot parsing. */
     if (zfsmount.spa == NULL &&
 	(zfs_spa_init(spa) != 0 || zfs_mount(spa, 0, &zfsmount) != 0)) {
 	printf("%s: failed to mount default pool %s\n",
 	    BOOTPROG, spa->spa_name);
 	autoboot = 0;
     } else if (zfs_lookup(&zfsmount, PATH_CONFIG, &dn) == 0 ||
         zfs_lookup(&zfsmount, PATH_DOTCONFIG, &dn) == 0) {
 	off = 0;
 	zfs_read(spa, &dn, &off, cmd, sizeof(cmd));
     }
 
     if (*cmd) {
 	/*
-	 * Note that parse() is destructive to cmd[] and we also want
+	 * Note that parse_cmd() is destructive to cmd[] and we also want
 	 * to honor RBX_QUIET option that could be present in cmd[].
 	 */
 	memcpy(cmddup, cmd, sizeof(cmd));
-	if (parse())
+	if (parse_cmd())
 	    autoboot = 0;
 	if (!OPT_CHECK(RBX_QUIET))
 	    printf("%s: %s\n", PATH_CONFIG, cmddup);
 	/* Do not process this command twice */
 	*cmd = 0;
     }
 
     /* Do not risk waiting at the prompt forever. */
     if (nextboot && !autoboot)
 	reboot();
 
     /*
      * Try to exec /boot/loader. If interrupted by a keypress,
      * or in case of failure, try to load a kernel directly instead.
      */
 
     if (autoboot && !*kname) {
 	memcpy(kname, PATH_LOADER_ZFS, sizeof(PATH_LOADER_ZFS));
 	if (!keyhit(3)) {
 	    load();
 	    memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL));
 	}
     }
 
     /* Present the user with the boot2 prompt. */
 
     for (;;) {
 	if (!autoboot || !OPT_CHECK(RBX_QUIET)) {
 	    printf("\nFreeBSD/x86 boot\n");
 	    if (zfs_rlookup(spa, zfsmount.rootobj, rootname) != 0)
 		printf("Default: %s/<0x%llx>:%s\n"
 		       "boot: ",
 		       spa->spa_name, zfsmount.rootobj, kname);
 	    else if (rootname[0] != '\0')
 		printf("Default: %s/%s:%s\n"
 		       "boot: ",
 		       spa->spa_name, rootname, kname);
 	    else
 		printf("Default: %s:%s\n"
 		       "boot: ",
 		       spa->spa_name, kname);
 	}
 	if (ioctrl & IO_SERIAL)
 	    sio_flush();
 	if (!autoboot || keyhit(5))
 	    getstr(cmd, sizeof(cmd));
 	else if (!autoboot || !OPT_CHECK(RBX_QUIET))
 	    putchar('\n');
 	autoboot = 0;
-	if (parse())
+	if (parse_cmd())
 	    putchar('\a');
 	else
 	    load();
     }
 }
 
 /* XXX - Needed for btxld to link the boot2 binary; do not remove. */
 void
 exit(int x)
 {
     __exit(x);
 }
 
 void
 reboot(void)
 {
     __exit(0);
 }
 
 static void
 load(void)
 {
     union {
 	struct exec ex;
 	Elf32_Ehdr eh;
     } hdr;
     static Elf32_Phdr ep[2];
     static Elf32_Shdr es[2];
     caddr_t p;
     dnode_phys_t dn;
     off_t off;
     uint32_t addr, x;
     int fmt, i, j;
 
     if (zfs_lookup(&zfsmount, kname, &dn)) {
 	printf("\nCan't find %s\n", kname);
 	return;
     }
     off = 0;
     if (xfsread(&dn, &off, &hdr, sizeof(hdr)))
 	return;
     if (N_GETMAGIC(hdr.ex) == ZMAGIC)
 	fmt = 0;
     else if (IS_ELF(hdr.eh))
 	fmt = 1;
     else {
 	printf("Invalid %s\n", "format");
 	return;
     }
     if (fmt == 0) {
 	addr = hdr.ex.a_entry & 0xffffff;
 	p = PTOV(addr);
 	off = PAGE_SIZE;
 	if (xfsread(&dn, &off, p, hdr.ex.a_text))
 	    return;
 	p += roundup2(hdr.ex.a_text, PAGE_SIZE);
 	if (xfsread(&dn, &off, p, hdr.ex.a_data))
 	    return;
 	p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
 	bootinfo.bi_symtab = VTOP(p);
 	memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms));
 	p += sizeof(hdr.ex.a_syms);
 	if (hdr.ex.a_syms) {
 	    if (xfsread(&dn, &off, p, hdr.ex.a_syms))
 		return;
 	    p += hdr.ex.a_syms;
 	    if (xfsread(&dn, &off, p, sizeof(int)))
 		return;
 	    x = *(uint32_t *)p;
 	    p += sizeof(int);
 	    x -= sizeof(int);
 	    if (xfsread(&dn, &off, p, x))
 		return;
 	    p += x;
 	}
     } else {
 	off = hdr.eh.e_phoff;
 	for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) {
 	    if (xfsread(&dn, &off, ep + j, sizeof(ep[0])))
 		return;
 	    if (ep[j].p_type == PT_LOAD)
 		j++;
 	}
 	for (i = 0; i < 2; i++) {
 	    p = PTOV(ep[i].p_paddr & 0xffffff);
 	    off = ep[i].p_offset;
 	    if (xfsread(&dn, &off, p, ep[i].p_filesz))
 		return;
 	}
 	p += roundup2(ep[1].p_memsz, PAGE_SIZE);
 	bootinfo.bi_symtab = VTOP(p);
 	if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
 	    off = hdr.eh.e_shoff + sizeof(es[0]) *
 		(hdr.eh.e_shstrndx + 1);
 	    if (xfsread(&dn, &off, &es, sizeof(es)))
 		return;
 	    for (i = 0; i < 2; i++) {
 		memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size));
 		p += sizeof(es[i].sh_size);
 		off = es[i].sh_offset;
 		if (xfsread(&dn, &off, p, es[i].sh_size))
 		    return;
 		p += es[i].sh_size;
 	    }
 	}
 	addr = hdr.eh.e_entry & 0xffffff;
     }
     bootinfo.bi_esymtab = VTOP(p);
     bootinfo.bi_kernelname = VTOP(kname);
     zfsargs.size = sizeof(zfsargs);
     zfsargs.pool = zfsmount.spa->spa_guid;
     zfsargs.root = zfsmount.rootobj;
     zfsargs.primary_pool = primary_spa->spa_guid;
 #ifdef LOADER_GELI_SUPPORT
-    bcopy(gelipw, zfsargs.gelipw, sizeof(zfsargs.gelipw));
-    bzero(gelipw, sizeof(gelipw));
+    explicit_bzero(gelipw, sizeof(gelipw));
+    gelibuf = malloc(sizeof(struct keybuf) + (GELI_MAX_KEYS * sizeof(struct keybuf_ent)));
+    geli_fill_keybuf(gelibuf);
+    zfsargs.notapw = '\0';
+    zfsargs.keybuf_sentinel = KEYBUF_SENTINEL;
+    zfsargs.keybuf = gelibuf;
 #else
     zfsargs.gelipw[0] = '\0';
 #endif
     if (primary_vdev != NULL)
 	zfsargs.primary_vdev = primary_vdev->v_guid;
     else
 	printf("failed to detect primary vdev\n");
     __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
 	   bootdev,
 	   KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG,
 	   (uint32_t) spa->spa_guid,
 	   (uint32_t) (spa->spa_guid >> 32),
 	   VTOP(&bootinfo),
 	   zfsargs);
 }
 
 static int
 zfs_mount_ds(char *dsname)
 {
     uint64_t newroot;
     spa_t *newspa;
     char *q;
 
     q = strchr(dsname, '/');
     if (q)
 	*q++ = '\0';
     newspa = spa_find_by_name(dsname);
     if (newspa == NULL) {
 	printf("\nCan't find ZFS pool %s\n", dsname);
 	return -1;
     }
 
     if (zfs_spa_init(newspa))
 	return -1;
 
     newroot = 0;
     if (q) {
 	if (zfs_lookup_dataset(newspa, q, &newroot)) {
 	    printf("\nCan't find dataset %s in ZFS pool %s\n",
 		    q, newspa->spa_name);
 	    return -1;
 	}
     }
     if (zfs_mount(newspa, newroot, &zfsmount)) {
 	printf("\nCan't mount ZFS dataset\n");
 	return -1;
     }
     spa = newspa;
     return (0);
 }
 
 static int
-parse(void)
+parse_cmd(void)
 {
     char *arg = cmd;
     char *ep, *p, *q;
     const char *cp;
     int c, i, j;
 
     while ((c = *arg++)) {
 	if (c == ' ' || c == '\t' || c == '\n')
 	    continue;
 	for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++);
 	ep = p;
 	if (*p)
 	    *p++ = 0;
 	if (c == '-') {
 	    while ((c = *arg++)) {
 		if (c == 'P') {
 		    if (*(uint8_t *)PTOV(0x496) & 0x10) {
 			cp = "yes";
 		    } else {
 			opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL);
 			cp = "no";
 		    }
 		    printf("Keyboard: %s\n", cp);
 		    continue;
 		} else if (c == 'S') {
 		    j = 0;
 		    while ((unsigned int)(i = *arg++ - '0') <= 9)
 			j = j * 10 + i;
 		    if (j > 0 && i == -'0') {
 			comspeed = j;
 			break;
 		    }
 		    /* Fall through to error below ('S' not in optstr[]). */
 		}
 		for (i = 0; c != optstr[i]; i++)
 		    if (i == NOPT - 1)
 			return -1;
 		opts ^= OPT_SET(flags[i]);
 	    }
 	    ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) :
 		     OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD;
 	    if (ioctrl & IO_SERIAL) {
 	        if (sio_init(115200 / comspeed) != 0)
 		    ioctrl &= ~IO_SERIAL;
 	    }
 	} if (c == '?') {
 	    dnode_phys_t dn;
 
 	    if (zfs_lookup(&zfsmount, arg, &dn) == 0) {
 		zap_list(spa, &dn);
 	    }
 	    return -1;
 	} else {
 	    arg--;
 
 	    /*
 	     * Report pool status if the comment is 'status'. Lets
 	     * hope no-one wants to load /status as a kernel.
 	     */
 	    if (!strcmp(arg, "status")) {
 		spa_all_status();
 		return -1;
 	    }
 
 	    /*
 	     * If there is "zfs:" prefix simply ignore it.
 	     */
 	    if (strncmp(arg, "zfs:", 4) == 0)
 		arg += 4;
 
 	    /*
 	     * If there is a colon, switch pools.
 	     */
 	    q = strchr(arg, ':');
 	    if (q) {
 		*q++ = '\0';
 		if (zfs_mount_ds(arg) != 0)
 		    return -1;
 		arg = q;
 	    }
 	    if ((i = ep - arg)) {
 		if ((size_t)i >= sizeof(kname))
 		    return -1;
 		memcpy(kname, arg, i + 1);
 	    }
 	}
 	arg = p;
     }
     return 0;
 }
Index: stable/11/sys/boot/mips/beri/boot2/Makefile
===================================================================
--- stable/11/sys/boot/mips/beri/boot2/Makefile	(revision 329098)
+++ stable/11/sys/boot/mips/beri/boot2/Makefile	(revision 329099)
@@ -1,89 +1,89 @@
 #-
 # Copyright (c) 2013-2014 Robert N. M. Watson
 # All rights reserved.
 #
 # This software was developed by SRI International and the University of
 # Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
 # ("CTSRD"), as part of the DARPA CRASH research programme.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 # $FreeBSD$
 
 BINDIR?=	/boot
 INSTALLFLAGS=	-b
 
 LOADERS=	flashboot jtagboot
 FILES=		${LOADERS} ${LOADERS:S/$/.md5/}
 
 SRCS=		relocate.S		\
 		start.S			\
 		boot2.c			\
 		altera_jtag_uart.c	\
 		cfi.c			\
 		sdcard.c
 
 MAN=
 
 AFLAGS=		-G0
 
 CFLAGS=		-ffreestanding			\
 		-I${.CURDIR}			\
 		-I${.CURDIR}/../../../common	\
 		-I${.CURDIR}/../../../..	\
 		-D_KERNEL			\
 		-Wall				\
-		-G0 -Xassembler -G0		\
+		-G0 				\
 		-fno-pic -mno-abicalls		\
 		-msoft-float			\
 		-g
 
 # where to get libstand from
 CFLAGS+=	-I${.CURDIR}/../../../../../lib/libstand/
 LIBSTAND=	${.OBJDIR}/../../../../../lib/libstand/libstand.a
 
 LDFLAGS=	-nostdlib			\
 		-static				\
 		-N				\
 		-G0				\
 		-L${.CURDIR}
 
 .PATH:		${.CURDIR}/../common
 CFLAGS+=	-I${.CURDIR}/../common
 
 flashboot.elf: relocate.o start.o boot2.o altera_jtag_uart.o cfi.o sdcard.o
-	${LD} ${_LDFLAGS} -T ${.CURDIR}/flashboot.ldscript -o ${.TARGET}	\
+	${CC} ${LDFLAGS} -T ${.CURDIR}/flashboot.ldscript -o ${.TARGET}	\
 	    ${.ALLSRC} ${LIBSTAND}
 flashboot: flashboot.elf
 	${OBJCOPY} -S -O binary ${.TARGET}.elf ${.TARGET}
 flashboot.md5: flashboot
 	md5 flashboot > flashboot.md5
 
 jtagboot: start.o boot2.o altera_jtag_uart.o cfi.o sdcard.o
-	${LD} ${_LDFLAGS} -T ${.CURDIR}/jtagboot.ldscript -o ${.TARGET}	\
+	${CC} ${LDFLAGS} -T ${.CURDIR}/jtagboot.ldscript -o ${.TARGET}	\
 	    ${.ALLSRC} ${LIBSTAND}
 jtagboot.md5: jtagboot
 	md5 jtagboot > jtagboot.md5
 
 CLEANFILES+=	flashboot.elf
 
 .include <bsd.prog.mk>
Index: stable/11/sys/boot/mips/beri/common/common.ldscript
===================================================================
--- stable/11/sys/boot/mips/beri/common/common.ldscript	(revision 329098)
+++ stable/11/sys/boot/mips/beri/common/common.ldscript	(revision 329099)
@@ -1,77 +1,76 @@
 /*-
  * Copyright (c) 2011-2014 Robert N. M. Watson
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
  * ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * MIPS segment definitions.
  */
 __mips_ckseg_cached__ = 0xffffffff80000000;	/* BSD kernel here. */
 __mips64_xkphys_cached__ = 0x9800000000000000;	/* Device memory here. */
 __mips64_xkphys_uncached__ = 0x9000000000000000; /* Device I/O here. */
 
 /*
  * Physical addresses of various peripherals.
  */
 __cheri_flash_base__ = 0x74000000;
 __cheri_sdcard_base__ = 0x7f008000;
 
 /*
  * Location of boot2 in flash.
  */
 __cheri_flash_boot_loader_base_ = 0x03fe0000;
 __cheri_flash_boot_loader_vaddr__ = __mips64_xkphys_cached__ +
     __cheri_flash_base__ + __cheri_flash_boot_loader_base_;
 
 /*
  * Location of boot file system in flash.
  */
 __cheri_flash_bootfs_base__ = 0x1820000;
 __cheri_flash_bootfs_len__ = 0x27c0000;
 __cheri_flash_bootfs_vaddr__ = __mips64_xkphys_cached__ +
     __cheri_flash_base__ + __cheri_flash_bootfs_base__;
 
 /*
  * Location of SD card controller.
  */
 __cheri_sdcard_vaddr__ = __mips64_xkphys_uncached__ + __cheri_sdcard_base__;
 
 /*
  * Location where the production kernel gets put.  This must agree with other
  * definitions, such as in the kernel's own linker script.
  *
  * (As it happens, in the short run, we also place boot2 here, as Miniboot
  * expects to find an ELF binary there -- but that will change.)
  */
 __kernel_base__ = 0x100000;
 __kernel_vaddr__ = __mips64_xkphys_cached__ + __kernel_base__;
 
-OUTPUT_FORMAT("elf64-tradbigmips");
 OUTPUT_ARCH(mips)
Index: stable/11/sys/boot/mips/beri/loader/beri_disk_cfi.c
===================================================================
--- stable/11/sys/boot/mips/beri/loader/beri_disk_cfi.c	(revision 329098)
+++ stable/11/sys/boot/mips/beri/loader/beri_disk_cfi.c	(revision 329099)
@@ -1,149 +1,140 @@
 /*-
  * Copyright (c) 2013-2014 Robert N. M. Watson
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
  * ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 
 #include <bootstrap.h>
 #include <stdarg.h>
 
 #include <stand.h>
 #include <disk.h>
 
 #include <cfi.h>
 
 static int	beri_cfi_disk_init(void);
 static int	beri_cfi_disk_open(struct open_file *, ...);
 static int	beri_cfi_disk_close(struct open_file *);
-static void	beri_cfi_disk_cleanup(void);
 static int	beri_cfi_disk_strategy(void *, int, daddr_t, size_t,
 		    char *, size_t *);
 static int	beri_cfi_disk_print(int);
 
 struct devsw beri_cfi_disk = {
 	.dv_name = "cfi",
 	.dv_type = DEVT_DISK,
 	.dv_init = beri_cfi_disk_init,
 	.dv_strategy = beri_cfi_disk_strategy,
 	.dv_open = beri_cfi_disk_open,
 	.dv_close = beri_cfi_disk_close,
 	.dv_ioctl = noioctl,
 	.dv_print = beri_cfi_disk_print,
-	.dv_cleanup = beri_cfi_disk_cleanup,
+	.dv_cleanup = NULL,
 };
 
 static int
 beri_cfi_disk_init(void)
 {
 
 	return (0);
 }
 
 static int
 beri_cfi_disk_strategy(void *devdata, int flag, daddr_t dblk, size_t size,
     char *buf, size_t *rsizep)
 {
 	int error;
 
 	if (flag == F_WRITE)
 		return (EROFS);
 	if (flag != F_READ)
 		return (EINVAL);
 	if (rsizep != NULL)
 		*rsizep = 0;
 	error = cfi_read(buf, dblk, size >> 9);
 	if (error == 0 && rsizep != NULL)
 		*rsizep = size;
 	else if (error != 0)
 		printf("%s: error %d\n", __func__, error);
 	return (error);
 }
 
 static int
 beri_cfi_disk_open(struct open_file *f, ...)
 {
 	va_list ap;
 	struct disk_devdesc *dev;
 
 	va_start(ap, f);
 	dev = va_arg(ap, struct disk_devdesc *);
 	va_end(ap);
 
 	if (dev->d_unit != 0)
 		return (EIO);
-	return (disk_open(dev, cfi_get_mediasize(), cfi_get_sectorsize(), 0));
+	return (disk_open(dev, cfi_get_mediasize(), cfi_get_sectorsize()));
 }
 
 static int
 beri_cfi_disk_close(struct open_file *f)
 {
 	struct disk_devdesc *dev;
 
 	dev = (struct disk_devdesc *)f->f_devdata;
 	return (disk_close(dev));
 }
 
 static int
 beri_cfi_disk_print(int verbose)
 {
 	struct disk_devdesc dev;
 	char line[80];
 	int ret;
 
 	printf("%s devices:", beri_cfi_disk.dv_name);
 	if ((ret = pager_output("\n")) != 0)
 		return (ret);
 
 	snprintf(line, sizeof(line), "    cfi%d   CFI flash device\n", 0);
 	ret = pager_output(line);
 	if (ret != 0)
 		return (ret);
 	dev.d_dev = &beri_cfi_disk;
 	dev.d_unit = 0;
 	dev.d_slice = -1;
 	dev.d_partition = -1;
-	if (disk_open(&dev, cfi_get_mediasize(),
-	    cfi_get_sectorsize(), 0) == 0) {
+	if (disk_open(&dev, cfi_get_mediasize(), cfi_get_sectorsize()) == 0) {
 		snprintf(line, sizeof(line), "    cfi%d", 0);
 		ret = disk_print(&dev, line, verbose);
 		disk_close(&dev);
 	}
 
 	return (ret);
-}
-
-static void
-beri_cfi_disk_cleanup(void)
-{
-
-	disk_cleanup(&beri_cfi_disk);
 }
Index: stable/11/sys/boot/mips/beri/loader/beri_disk_sdcard.c
===================================================================
--- stable/11/sys/boot/mips/beri/loader/beri_disk_sdcard.c	(revision 329098)
+++ stable/11/sys/boot/mips/beri/loader/beri_disk_sdcard.c	(revision 329099)
@@ -1,154 +1,146 @@
 /*-
  * Copyright (c) 2013-2014 Robert N. M. Watson
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
  * ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 
 #include <bootstrap.h>
 #include <stdarg.h>
 
 #include <stand.h>
 #include <disk.h>
 
 #include <sdcard.h>
 
 static int	beri_sdcard_disk_init(void);
 static int	beri_sdcard_disk_open(struct open_file *, ...);
 static int	beri_sdcard_disk_close(struct open_file *);
-static void	beri_sdcard_disk_cleanup(void);
 static int	beri_sdcard_disk_strategy(void *, int, daddr_t, size_t,
 		    char *, size_t *);
 static int	beri_sdcard_disk_print(int);
 
 struct devsw beri_sdcard_disk = {
 	.dv_name = "sdcard",
 	.dv_type = DEVT_DISK,
 	.dv_init = beri_sdcard_disk_init,
 	.dv_strategy = beri_sdcard_disk_strategy,
 	.dv_open = beri_sdcard_disk_open,
 	.dv_close = beri_sdcard_disk_close,
 	.dv_ioctl = noioctl,
 	.dv_print = beri_sdcard_disk_print,
- 	.dv_cleanup = beri_sdcard_disk_cleanup,
+	.dv_cleanup = NULL,
 };
 
 static int
 beri_sdcard_disk_init(void)
 {
 
 	return (0);
 }
 
 static int
 beri_sdcard_disk_strategy(void *devdata, int flag, daddr_t dblk, size_t size,
     char *buf, size_t *rsizep)
 {
 	int error;
 
 	if (flag == F_WRITE)
 		return (EROFS);
 	if (flag != F_READ)
 		return (EINVAL);
 	if (rsizep != NULL)
 		*rsizep = 0;
 	error = altera_sdcard_read(buf, dblk, size >> 9);
 	if (error == 0 && rsizep != NULL)
 		*rsizep = size;
 	else if (error != 0)
 		printf("%s: error %d\n", __func__, error);
 	return (error);
 }
 
 static int
 beri_sdcard_disk_open(struct open_file *f, ...)
 {
 	va_list ap;
 	struct disk_devdesc *dev;
 
 	va_start(ap, f);
 	dev = va_arg(ap, struct disk_devdesc *);
 	va_end(ap);
 
 	if (!(altera_sdcard_get_present())) {
 		printf("SD card not present or not supported\n");
 		return (ENXIO);
 	}
 
 	if (dev->d_unit != 0)
 		return (EIO);
 	return (disk_open(dev, altera_sdcard_get_mediasize(),
-	    altera_sdcard_get_sectorsize(), 0));
+	    altera_sdcard_get_sectorsize()));
 }
 
 static int
 beri_sdcard_disk_close(struct open_file *f)
 {
 	struct disk_devdesc *dev;
 
 	dev = (struct disk_devdesc *)f->f_devdata;
 	return (disk_close(dev));
 }
 
 static int
 beri_sdcard_disk_print(int verbose)
 {
 	struct disk_devdesc dev;
 	char line[80];
 	int ret;
 
 	printf("%s devices:", beri_sdcard_disk.dv_name);
 	if ((ret = pager_output("\n")) != 0)
 		return (ret);
 	
 	snprintf(line, sizeof(line), "    sdcard%d   Altera SD card drive\n", 0);
 	ret = pager_output(line);
 	if (ret != 0)
 	    return (ret);
 	dev.d_dev = &beri_sdcard_disk;
 	dev.d_unit = 0;
 	dev.d_slice = -1;
 	dev.d_partition = -1;
 	if (disk_open(&dev, altera_sdcard_get_mediasize(),
-	    altera_sdcard_get_sectorsize(), 0) == 0) {
+	    altera_sdcard_get_sectorsize()) == 0) {
 		snprintf(line, sizeof(line), "    sdcard%d", 0);
 		ret = disk_print(&dev, line, verbose);
 		disk_close(&dev);
 	}
 	return (ret);
-}
-
-static void
-beri_sdcard_disk_cleanup(void)
-{
-
-	disk_cleanup(&beri_sdcard_disk);
 }
Index: stable/11/sys/boot/mips/beri/loader/loader.ldscript
===================================================================
--- stable/11/sys/boot/mips/beri/loader/loader.ldscript	(revision 329098)
+++ stable/11/sys/boot/mips/beri/loader/loader.ldscript	(revision 329099)
@@ -1,85 +1,84 @@
 /*-
  * Copyright (c) 2011-2014 Robert N. M. Watson
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
  * ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 INCLUDE ../common/common.ldscript
 
 /*
  * Location where loader will execute.
  */
 __loader_base__ = 0x20000;
 __loader_base_vaddr__ = __mips64_xkphys_cached__ + __loader_base__;
 
 /*
  * Highest address the loader is allowed to use below the kernel.
  */
 __loader_end__ = 0x100000;
 __loader_end_vaddr__ = __mips64_xkphys_cached__ + __loader_end__;
 
-OUTPUT_FORMAT("elf64-tradbigmips");
 OUTPUT_ARCH(mips)
 ENTRY(start)
 SECTIONS
 {
 	/*
 	 * We rely on boot2 having (a) configured a stack, and (b) loaded us
 	 * to an appropriate bit of physical/virtual memory such that no
 	 * self-relocating code is required here.
 	 */
 	. = __loader_base_vaddr__;
 	. += SIZEOF_HEADERS;
 
 	.text ALIGN(0x10): {
 		start.o(.text*)
 		*(EXCLUDE_FILE (start.o) .text*)
 		*(.rodata*)
 
 		__start_set_Xcommand_set = .;
 		KEEP(*(set_Xcommand_set))
 		__stop_set_Xcommand_set = .;
 
 		__start_set_Xficl_compile_set = .;
 		KEEP(*(set_Xficl_compile_set))
 		__stop_set_Xficl_compile_set = .;
 	}
 	.data ALIGN(0x10): { *(.data*)}
 	.bss ALIGN(0x10): { *(.bss*) }
 
 	__heap = ALIGN(0x8);	/* 64-bit aligned heap pointer */
 	__data_end = .;
 	__boot_loader_len__ = . - __loader_base_vaddr__;
 	__bss_start = ADDR(.bss);
 	__bss_end = ALIGN(__bss_start + SIZEOF(.bss), 0x8);
 
 	__heap_start = .;
 	__heap_end = __loader_end_vaddr__;
 	__heap_len = __heap_end - __heap_start;
 }
Index: stable/11/sys/boot/sparc64/loader/Makefile
===================================================================
--- stable/11/sys/boot/sparc64/loader/Makefile	(revision 329098)
+++ stable/11/sys/boot/sparc64/loader/Makefile	(revision 329099)
@@ -1,97 +1,100 @@
 # $FreeBSD$
 
 .include <src.opts.mk>
 MK_SSP=		no
 MAN=
 
 PROG?=		loader
 NEWVERSWHAT?=	"bootstrap loader" sparc64
 VERSION_FILE=	${.CURDIR}/../loader/version
 INSTALLFLAGS=	-b
 
 # Architecture-specific loader code
 SRCS=		locore.S main.c metadata.c vers.c
 
 LOADER_DISK_SUPPORT?=	yes
 LOADER_UFS_SUPPORT?=	yes
 LOADER_CD9660_SUPPORT?=	yes
 LOADER_ZFS_SUPPORT?=	no
 LOADER_NET_SUPPORT?=	yes
 LOADER_NFS_SUPPORT?=	yes
 LOADER_TFTP_SUPPORT?=	yes
 LOADER_GZIP_SUPPORT?=	yes
 LOADER_BZIP2_SUPPORT?=	no
 LOADER_DEBUG?=		no
 
 .if ${LOADER_DEBUG} == "yes"
 CFLAGS+=	-DLOADER_DEBUG
 .endif
 .if ${LOADER_DISK_SUPPORT} == "yes"
 CFLAGS+=	-DLOADER_DISK_SUPPORT
 .endif
 .if ${LOADER_UFS_SUPPORT} == "yes"
 CFLAGS+=	-DLOADER_UFS_SUPPORT
 .endif
 .if ${LOADER_CD9660_SUPPORT} == "yes"
 CFLAGS+=	-DLOADER_CD9660_SUPPORT
 .endif
 .if ${LOADER_ZFS_SUPPORT} == "yes"
 CFLAGS+=	-DLOADER_ZFS_SUPPORT
 CFLAGS+=	-I${.CURDIR}/../../zfs
 CFLAGS+=	-I${.CURDIR}/../../../cddl/boot/zfs
 LIBZFSBOOT=	${.OBJDIR}/../../zfs/libzfsboot.a
 .endif
 .if ${LOADER_GZIP_SUPPORT} == "yes"
 CFLAGS+=	-DLOADER_GZIP_SUPPORT
 .endif
 .if ${LOADER_BZIP2_SUPPORT} == "yes"
 CFLAGS+=	-DLOADER_BZIP2_SUPPORT
 .endif
 .if ${LOADER_NET_SUPPORT} == "yes"
 CFLAGS+=	-DLOADER_NET_SUPPORT
 .endif
 .if ${LOADER_NFS_SUPPORT} == "yes"
 CFLAGS+=	-DLOADER_NFS_SUPPORT
 .endif
 .if ${LOADER_TFTP_SUPPORT} == "yes"
 CFLAGS+=	-DLOADER_TFTP_SUPPORT
 .endif
 
 .if ${MK_FORTH} != "no"
 # Enable BootForth
 BOOT_FORTH=	yes
 CFLAGS+=	-DBOOT_FORTH -I${.CURDIR}/../../ficl
 CFLAGS+=	-I${.CURDIR}/../../ficl/sparc64
 LIBFICL=	${.OBJDIR}/../../ficl/libficl.a
 .endif
 
 # Always add MI sources
 .PATH:		${.CURDIR}/../../common
 .include	"${.CURDIR}/../../common/Makefile.inc"
 CFLAGS+=	-I${.CURDIR}/../../common
 CFLAGS+=	-I.
 
 CLEANFILES+=	loader.help
 
 LDFLAGS=	-static
 
 # Open Firmware standalone support library
 LIBOFW=		${.OBJDIR}/../../ofw/libofw/libofw.a
 CFLAGS+=	-I${.CURDIR}/../../ofw/libofw/
 
 # where to get libstand from
 CFLAGS+=	-I${.CURDIR}/../../../../lib/libstand/
 
+# Need sys/ for crypto/intake.h
+CFLAGS+=	-I${SRCTOP}/sys
+
 DPADD=		${LIBFICL} ${LIBZFSBOOT} ${LIBOFW} ${LIBSTAND}
 LDADD=		${LIBFICL} ${LIBZFSBOOT} ${LIBOFW} -lstand
 
 loader.help: help.common help.sparc64
 	cat ${.ALLSRC} | \
 	    awk -f ${.CURDIR}/../../common/merge_help.awk > ${.TARGET}
 
 .PATH: ${.CURDIR}/../../forth
 .include	"${.CURDIR}/../../forth/Makefile.inc"
 
 FILES+= loader.rc menu.rc
 
 .include <bsd.prog.mk>
Index: stable/11/sys/boot/uboot/lib/disk.c
===================================================================
--- stable/11/sys/boot/uboot/lib/disk.c	(revision 329098)
+++ stable/11/sys/boot/uboot/lib/disk.c	(revision 329099)
@@ -1,315 +1,314 @@
 /*-
  * Copyright (c) 2008 Semihalf, Rafal Jaworowski
  * Copyright (c) 2009 Semihalf, Piotr Ziecik
  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 /*
  * Block storage I/O routines for U-Boot
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/disk.h>
 #include <machine/stdarg.h>
 #include <stand.h>
 
 #include "api_public.h"
 #include "bootstrap.h"
 #include "disk.h"
 #include "glue.h"
 #include "libuboot.h"
 
 #define stor_printf(fmt, args...) do {			\
     printf("%s%d: ", dev->d_dev->dv_name, dev->d_unit);	\
     printf(fmt, ##args);				\
 } while (0)
 
 #ifdef DEBUG
 #define debugf(fmt, args...) do { printf("%s(): ", __func__);	\
     printf(fmt,##args); } while (0)
 #else
 #define debugf(fmt, args...)
 #endif
 
 static struct {
 	int		opened;	/* device is opened */
 	int		handle;	/* storage device handle */
 	int		type;	/* storage type */
 	off_t		blocks;	/* block count */
 	u_int		bsize;	/* block size */
 } stor_info[UB_MAX_DEV];
 
 #define	SI(dev)		(stor_info[(dev)->d_unit])
 
 static int stor_info_no = 0;
 static int stor_opendev(struct disk_devdesc *);
 static int stor_readdev(struct disk_devdesc *, daddr_t, size_t, char *);
 
 /* devsw I/F */
 static int stor_init(void);
 static int stor_strategy(void *, int, daddr_t, size_t, char *, size_t *);
 static int stor_open(struct open_file *, ...);
 static int stor_close(struct open_file *);
 static int stor_ioctl(struct open_file *f, u_long cmd, void *data);
 static int stor_print(int);
 static void stor_cleanup(void);
 
 struct devsw uboot_storage = {
 	"disk",
 	DEVT_DISK,
 	stor_init,
 	stor_strategy,
 	stor_open,
 	stor_close,
 	stor_ioctl,
 	stor_print,
 	stor_cleanup
 };
 
 static int
 stor_init(void)
 {
 	struct device_info *di;
 	int i;
 
 	if (devs_no == 0) {
 		printf("No U-Boot devices! Really enumerated?\n");
 		return (-1);
 	}
 
 	for (i = 0; i < devs_no; i++) {
 		di = ub_dev_get(i);
 		if ((di != NULL) && (di->type & DEV_TYP_STOR)) {
 			if (stor_info_no >= UB_MAX_DEV) {
 				printf("Too many storage devices: %d\n",
 				    stor_info_no);
 				return (-1);
 			}
 			stor_info[stor_info_no].handle = i;
 			stor_info[stor_info_no].opened = 0;
 			stor_info[stor_info_no].type = di->type;
 			stor_info[stor_info_no].blocks =
 			    di->di_stor.block_count;
 			stor_info[stor_info_no].bsize =
 			    di->di_stor.block_size;
 			stor_info_no++;
 		}
 	}
 
 	if (!stor_info_no) {
 		debugf("No storage devices\n");
 		return (-1);
 	}
 
 	debugf("storage devices found: %d\n", stor_info_no);
 	return (0);
 }
 
 static void
 stor_cleanup(void)
 {
 	int i;
 
 	for (i = 0; i < stor_info_no; i++)
 		if (stor_info[i].opened > 0)
 			ub_dev_close(stor_info[i].handle);
-	disk_cleanup(&uboot_storage);
 }
 
 static int
 stor_strategy(void *devdata, int rw, daddr_t blk, size_t size,
     char *buf, size_t *rsize)
 {
 	struct disk_devdesc *dev = (struct disk_devdesc *)devdata;
 	daddr_t bcount;
 	int err;
 
 	if (rw != F_READ) {
 		stor_printf("write attempt, operation not supported!\n");
 		return (EROFS);
 	}
 
 	if (size % SI(dev).bsize) {
 		stor_printf("size=%zu not multiple of device "
 		    "block size=%d\n",
 		    size, SI(dev).bsize);
 		return (EIO);
 	}
 	bcount = size / SI(dev).bsize;
 	if (rsize)
 		*rsize = 0;
 
 	err = stor_readdev(dev, blk + dev->d_offset, bcount, buf);
 	if (!err && rsize)
 		*rsize = size;
 
 	return (err);
 }
 
 static int
 stor_open(struct open_file *f, ...)
 {
 	va_list ap;
 	struct disk_devdesc *dev;
 
 	va_start(ap, f);
 	dev = va_arg(ap, struct disk_devdesc *);
 	va_end(ap);
 
 	return (stor_opendev(dev));
 }
 
 static int
 stor_opendev(struct disk_devdesc *dev)
 {
 	int err;
 
 	if (dev->d_unit < 0 || dev->d_unit >= stor_info_no)
 		return (EIO);
 
 	if (SI(dev).opened == 0) {
 		err = ub_dev_open(SI(dev).handle);
 		if (err != 0) {
 			stor_printf("device open failed with error=%d, "
 			    "handle=%d\n", err, SI(dev).handle);
 			return (ENXIO);
 		}
 		SI(dev).opened++;
 	}
 	return (disk_open(dev, SI(dev).blocks * SI(dev).bsize,
-	    SI(dev).bsize, 0));
+	    SI(dev).bsize));
 }
 
 static int
 stor_close(struct open_file *f)
 {
 	struct disk_devdesc *dev;
 
 	dev = (struct disk_devdesc *)(f->f_devdata);
 	return (disk_close(dev));
 }
 
 static int
 stor_readdev(struct disk_devdesc *dev, daddr_t blk, size_t size, char *buf)
 {
 	lbasize_t real_size;
 	int err;
 
 	debugf("reading blk=%d size=%d @ 0x%08x\n", (int)blk, size, (uint32_t)buf);
 
 	err = ub_dev_read(SI(dev).handle, buf, size, blk, &real_size);
 	if (err != 0) {
 		stor_printf("read failed, error=%d\n", err);
 		return (EIO);
 	}
 
 	if (real_size != size) {
 		stor_printf("real size != size\n");
 		err = EIO;
 	}
 
 	return (err);
 }
 
 static int
 stor_print(int verbose)
 {
 	struct disk_devdesc dev;
 	static char line[80];
 	int i, ret = 0;
 
 	if (stor_info_no == 0)
 		return (ret);
 
 	printf("%s devices:", uboot_storage.dv_name);
 	if ((ret = pager_output("\n")) != 0)
 		return (ret);
 
 	for (i = 0; i < stor_info_no; i++) {
 		dev.d_dev = &uboot_storage;
 		dev.d_unit = i;
 		dev.d_slice = -1;
 		dev.d_partition = -1;
 		snprintf(line, sizeof(line), "\tdisk%d (%s)\n", i,
 		    ub_stor_type(SI(&dev).type));
 		if ((ret = pager_output(line)) != 0)
 			break;
 		if (stor_opendev(&dev) == 0) {
 			sprintf(line, "\tdisk%d", i);
 			ret = disk_print(&dev, line, verbose);
 			disk_close(&dev);
 			if (ret != 0)
 				break;
 		}
 	}
 	return (ret);
 }
 
 static int
 stor_ioctl(struct open_file *f, u_long cmd, void *data)
 {
 	struct disk_devdesc *dev;
 
 	dev = (struct disk_devdesc *)f->f_devdata;
 	switch (cmd) {
 	case DIOCGSECTORSIZE:
 		*(u_int *)data = SI(dev).bsize;
 		break;
 	case DIOCGMEDIASIZE:
-		*(off_t *)data = SI(dev).bsize * SI(dev).blocks;
+		*(uint64_t *)data = SI(dev).bsize * SI(dev).blocks;
 		break;
 	default:
 		return (ENOTTY);
 	}
 	return (0);
 }
 
 
 /*
  * Return the device unit number for the given type and type-relative unit
  * number.
  */
 int
 uboot_diskgetunit(int type, int type_unit)
 {
 	int local_type_unit;
 	int i;
 
 	local_type_unit = 0;
 	for (i = 0; i < stor_info_no; i++) {
 		if ((stor_info[i].type & type) == type) {
 			if (local_type_unit == type_unit) {
 				return (i);
 			}
 			local_type_unit++;
 		}
 	}
 
 	return (-1);
 }
Index: stable/11/sys/boot/usb/storage/umass_loader.c
===================================================================
--- stable/11/sys/boot/usb/storage/umass_loader.c	(revision 329098)
+++ stable/11/sys/boot/usb/storage/umass_loader.c	(revision 329099)
@@ -1,229 +1,238 @@
 /* $FreeBSD$ */
 /*-
  * Copyright (c) 2014 Hans Petter Selasky <hselasky@FreeBSD.org>
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
  * ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 
 #include <bootstrap.h>
 #include <stdarg.h>
 
 #include <stand.h>
 #include <disk.h>
 
 #define	HAVE_STANDARD_DEFS
 
 #include USB_GLOBAL_INCLUDE_FILE
 
 #include "umass_common.h"
 
 static int umass_disk_init(void);
 static int umass_disk_open(struct open_file *,...);
 static int umass_disk_close(struct open_file *);
 static void umass_disk_cleanup(void);
 static int umass_disk_ioctl(struct open_file *, u_long, void *);
 static int umass_disk_strategy(void *, int, daddr_t, size_t, char *, size_t *);
 static int umass_disk_print(int);
 
 struct devsw umass_disk = {
 	.dv_name = "umass",
 	.dv_type = DEVT_DISK,
 	.dv_init = umass_disk_init,
 	.dv_strategy = umass_disk_strategy,
 	.dv_open = umass_disk_open,
 	.dv_close = umass_disk_close,
 	.dv_ioctl = umass_disk_ioctl,
 	.dv_print = umass_disk_print,
 	.dv_cleanup = umass_disk_cleanup,
 };
 
 static int
 umass_disk_init(void)
 {
 	uint32_t time;
 
 	usb_init();
 	usb_needs_explore_all();
 
 	/* wait 8 seconds for a USB mass storage device to appear */
 	for (time = 0; time < (8 * hz); time++) {
 		usb_idle();
 		delay(1000000 / hz);
 		time++;
 		callout_process(1);
 		if (umass_uaa.device != NULL)
 			return (0);
 	}
 	return (0);
 }
 
 static int
 umass_disk_strategy(void *devdata, int flag, daddr_t dblk, size_t size,
     char *buf, size_t *rsizep)
 {
 	if (umass_uaa.device == NULL)
 		return (ENXIO);
 	if (rsizep != NULL)
 		*rsizep = 0;
 
 	if (flag == F_WRITE) {
 		if (usb_msc_write_10(umass_uaa.device, 0, dblk, size >> 9, buf) != 0)
 			return (EINVAL);
 	} else if (flag == F_READ) {
 		if (usb_msc_read_10(umass_uaa.device, 0, dblk, size >> 9, buf) != 0)
 			return (EINVAL);
 	} else {
 		return (EROFS);
 	}
 
 	if (rsizep != NULL)
 		*rsizep = size;
 	return (0);
 }
 
 static int
 umass_disk_open_sub(struct disk_devdesc *dev)
 {
 	uint32_t nblock;
 	uint32_t blocksize;
 
 	if (usb_msc_read_capacity(umass_uaa.device, 0, &nblock, &blocksize) != 0)
 		return (EINVAL);
 
-	return (disk_open(dev, ((uint64_t)nblock + 1) * (uint64_t)blocksize, blocksize, 0));
+	return (disk_open(dev, ((uint64_t)nblock + 1) * (uint64_t)blocksize, blocksize));
 }
 
 static int
 umass_disk_open(struct open_file *f,...)
 {
 	va_list ap;
 	struct disk_devdesc *dev;
 
 	va_start(ap, f);
 	dev = va_arg(ap, struct disk_devdesc *);
 	va_end(ap);
 
 	if (umass_uaa.device == NULL)
 		return (ENXIO);
 	if (dev->d_unit != 0)
 		return (EIO);
 	return (umass_disk_open_sub(dev));
 }
 
 static int
-umass_disk_ioctl(struct open_file *f __unused, u_long cmd, void *buf)
+umass_disk_ioctl(struct open_file *f, u_long cmd, void *buf)
 {
+	struct disk_devdesc *dev;
 	uint32_t nblock;
 	uint32_t blocksize;
+	int rc;
 
+	dev = (struct disk_devdesc *)(f->f_devdata);
+	if (dev == NULL)
+		return (EINVAL);
+
+	rc = disk_ioctl(dev, cmd, buf);
+	if (rc != ENOTTY)
+		return (rc);
+
 	switch (cmd) {
-	case IOCTL_GET_BLOCK_SIZE:
-	case IOCTL_GET_BLOCKS:
+	case DIOCGSECTORSIZE:
+	case DIOCGMEDIASIZE:
 		if (usb_msc_read_capacity(umass_uaa.device, 0,
 		    &nblock, &blocksize) != 0)
 			return (EINVAL);
 
-		if (cmd == IOCTL_GET_BLOCKS)
-			*(uint32_t*)buf = nblock;
+		if (cmd == DIOCGMEDIASIZE)
+			*(uint64_t*)buf = nblock;
 		else
 			*(uint32_t*)buf = blocksize;
 
 		return (0);
 	default:
 		return (ENXIO);
 	}
 }
 
 static int
 umass_disk_close(struct open_file *f)
 {
 	struct disk_devdesc *dev;
 
 	dev = (struct disk_devdesc *)f->f_devdata;
 	return (disk_close(dev));
 }
 
 static int
 umass_disk_print(int verbose)
 {
 	struct disk_devdesc dev;
 
 	printf("%s devices:", umass_disk.dv_name);
 	if (pager_output("\n") != 0)
 		return (1);
 
 	memset(&dev, 0, sizeof(dev));
 
 	ret = pager_output("    umass0   UMASS device\n");
 	if (ret != 0)
 		return (ret);
 	dev.d_dev = &umass_disk;
 	dev.d_unit = 0;
 	dev.d_slice = -1;
 	dev.d_partition = -1;
 
 	if (umass_disk_open_sub(&dev) == 0) {
 		ret = disk_print(&dev, "    umass0", verbose);
 		disk_close(&dev);
 	}
 	return (ret);
 }
 
 static void
 umass_disk_cleanup(void)
 {
-	disk_cleanup(&umass_disk);
 
 	usb_uninit();
 }
 
 
 /* USB specific functions */
 
 extern void callout_process(int);
 extern void usb_idle(void);
 extern void usb_init(void);
 extern void usb_uninit(void);
 
 void
 DELAY(unsigned int usdelay)
 {
 	delay(usdelay);
 }
 
 int
 pause(const char *what, int timeout)
 {
 	if (timeout == 0)
 		timeout = 1;
 
 	delay((1000000 / hz) * timeout);
 
 	return (0);
 }
Index: stable/11/sys/boot/userboot/userboot/userboot_disk.c
===================================================================
--- stable/11/sys/boot/userboot/userboot/userboot_disk.c	(revision 329098)
+++ stable/11/sys/boot/userboot/userboot/userboot_disk.c	(revision 329099)
@@ -1,237 +1,236 @@
 /*-
  * Copyright (c) 2011 Google, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Userboot disk image handling.
  */
 
 #include <sys/disk.h>
 #include <stand.h>
 #include <stdarg.h>
 #include <bootstrap.h>
 
 #include "disk.h"
 #include "libuserboot.h"
 
 struct userdisk_info {
 	uint64_t	mediasize;
 	uint16_t	sectorsize;
 	int		ud_open;	/* reference counter */
 	void		*ud_bcache;	/* buffer cache data */
 };
 
 int userboot_disk_maxunit = 0;
 
 static int userdisk_maxunit = 0;
 static struct userdisk_info	*ud_info;
 
 static int	userdisk_init(void);
 static void	userdisk_cleanup(void);
 static int	userdisk_strategy(void *devdata, int flag, daddr_t dblk,
 		    size_t size, char *buf, size_t *rsize);
 static int	userdisk_realstrategy(void *devdata, int flag, daddr_t dblk,
 		    size_t size, char *buf, size_t *rsize);
 static int	userdisk_open(struct open_file *f, ...);
 static int	userdisk_close(struct open_file *f);
 static int	userdisk_ioctl(struct open_file *f, u_long cmd, void *data);
 static int	userdisk_print(int verbose);
 
 struct devsw userboot_disk = {
 	"disk",
 	DEVT_DISK,
 	userdisk_init,
 	userdisk_strategy,
 	userdisk_open,
 	userdisk_close,
 	userdisk_ioctl,
 	userdisk_print,
 	userdisk_cleanup
 };
 
 /*
  * Initialize userdisk_info structure for each disk.
  */
 static int
 userdisk_init(void)
 {
 	off_t mediasize;
 	u_int sectorsize;
 	int i;
 
 	userdisk_maxunit = userboot_disk_maxunit;
 	if (userdisk_maxunit > 0) {
 		ud_info = malloc(sizeof(*ud_info) * userdisk_maxunit);
 		if (ud_info == NULL)
 			return (ENOMEM);
 		for (i = 0; i < userdisk_maxunit; i++) {
 			if (CALLBACK(diskioctl, i, DIOCGSECTORSIZE,
 			    &sectorsize) != NULL || CALLBACK(diskioctl, i,
 			    DIOCGMEDIASIZE, &mediasize) != NULL)
 				return (ENXIO);
 			ud_info[i].mediasize = mediasize;
 			ud_info[i].sectorsize = sectorsize;
 			ud_info[i].ud_open = 0;
 			ud_info[i].ud_bcache = NULL;
 		}
 	}
 	bcache_add_dev(userdisk_maxunit);
 	return(0);
 }
 
 static void
 userdisk_cleanup(void)
 {
 
 	if (userdisk_maxunit > 0)
 		free(ud_info);
-	disk_cleanup(&userboot_disk);
 }
 
 /*
  * Print information about disks
  */
 static int
 userdisk_print(int verbose)
 {
 	struct disk_devdesc dev;
 	char line[80];
 	int i, ret = 0;
 
 	if (userdisk_maxunit == 0)
 		return (0);
 
 	printf("%s devices:", userboot_disk.dv_name);
 	if ((ret = pager_output("\n")) != 0)
 		return (ret);
 
 	for (i = 0; i < userdisk_maxunit; i++) {
 		snprintf(line, sizeof(line),
 		    "    disk%d:   Guest drive image\n", i);
 		ret = pager_output(line);
 		if (ret != 0)
 			break;
 		dev.d_dev = &userboot_disk;
 		dev.d_unit = i;
 		dev.d_slice = -1;
 		dev.d_partition = -1;
 		if (disk_open(&dev, ud_info[i].mediasize,
-		    ud_info[i].sectorsize, 0) == 0) {
+		    ud_info[i].sectorsize) == 0) {
 			snprintf(line, sizeof(line), "    disk%d", i);
 			ret = disk_print(&dev, line, verbose);
 			disk_close(&dev);
 			if (ret != 0)
 				break;
 		}
 	}
 	return (ret);
 }
 
 /*
  * Attempt to open the disk described by (dev) for use by (f).
  */
 static int
 userdisk_open(struct open_file *f, ...)
 {
 	va_list			ap;
 	struct disk_devdesc	*dev;
 
 	va_start(ap, f);
 	dev = va_arg(ap, struct disk_devdesc *);
 	va_end(ap);
 
 	if (dev->d_unit < 0 || dev->d_unit >= userdisk_maxunit)
 		return (EIO);
 	ud_info[dev->d_unit].ud_open++;
 	if (ud_info[dev->d_unit].ud_bcache == NULL)
 		ud_info[dev->d_unit].ud_bcache = bcache_allocate();
 	return (disk_open(dev, ud_info[dev->d_unit].mediasize,
-	    ud_info[dev->d_unit].sectorsize, 0));
+	    ud_info[dev->d_unit].sectorsize));
 }
 
 static int
 userdisk_close(struct open_file *f)
 {
 	struct disk_devdesc *dev;
 
 	dev = (struct disk_devdesc *)f->f_devdata;
 	ud_info[dev->d_unit].ud_open--;
 	if (ud_info[dev->d_unit].ud_open == 0) {
 		bcache_free(ud_info[dev->d_unit].ud_bcache);
 		ud_info[dev->d_unit].ud_bcache = NULL;
 	}
 	return (disk_close(dev));
 }
 
 static int
 userdisk_strategy(void *devdata, int rw, daddr_t dblk, size_t size,
     char *buf, size_t *rsize)
 {
 	struct bcache_devdata bcd;
 	struct disk_devdesc *dev;
 
 	dev = (struct disk_devdesc *)devdata;
 	bcd.dv_strategy = userdisk_realstrategy;
 	bcd.dv_devdata = devdata;
 	bcd.dv_cache = ud_info[dev->d_unit].ud_bcache;
 	return (bcache_strategy(&bcd, rw, dblk + dev->d_offset,
 	    size, buf, rsize));
 }
 
 static int
 userdisk_realstrategy(void *devdata, int rw, daddr_t dblk, size_t size,
     char *buf, size_t *rsize)
 {
 	struct disk_devdesc *dev = devdata;
 	uint64_t	off;
 	size_t		resid;
 	int		rc;
 
 	if (rw == F_WRITE)
 		return (EROFS);
 	if (rw != F_READ)
 		return (EINVAL);
 	if (rsize)
 		*rsize = 0;
 	off = dblk * ud_info[dev->d_unit].sectorsize;
 	rc = CALLBACK(diskread, dev->d_unit, off, buf, size, &resid);
 	if (rc)
 		return (rc);
 	if (rsize)
 		*rsize = size - resid;
 	return (0);
 }
 
 static int
 userdisk_ioctl(struct open_file *f, u_long cmd, void *data)
 {
 	struct disk_devdesc *dev;
 
 	dev = (struct disk_devdesc *)f->f_devdata;
 	return (CALLBACK(diskioctl, dev->d_unit, cmd, data));
 }
Index: stable/11/sys/boot/zfs/libzfs.h
===================================================================
--- stable/11/sys/boot/zfs/libzfs.h	(revision 329098)
+++ stable/11/sys/boot/zfs/libzfs.h	(revision 329099)
@@ -1,74 +1,92 @@
 /*-
  * Copyright (c) 2012 Andriy Gapon <avg@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _BOOT_LIBZFS_H_
 #define _BOOT_LIBZFS_H_
 
 #define	ZFS_MAXNAMELEN	256
 
 /*
  * ZFS fully-qualified device descriptor.
  * Note, this must match the 'struct devdesc' declaration in bootstrap.h.
  * Arch-specific device descriptors should be binary compatible with this
  * structure if they are to support ZFS.
  */
 struct zfs_devdesc
 {
     struct devsw	*d_dev;
     int			d_type;
     int			d_unit;
     void		*d_opendata;
     uint64_t		pool_guid;
     uint64_t		root_guid;
 };
 
+#ifdef LOADER_GELI_SUPPORT
+#include <crypto/intake.h>
+#endif
+
 struct zfs_boot_args
 {
     uint32_t		size;
     uint32_t		reserved;
     uint64_t		pool;
     uint64_t		root;
     uint64_t		primary_pool;
     uint64_t		primary_vdev;
-    char		gelipw[256];
+    union {
+	char		gelipw[256];
+	struct {
+            char                notapw;	/* 
+					 * single null byte to stop keybuf
+					 * being interpreted as a password
+					 */
+	    uint32_t		keybuf_sentinel;
+#ifdef LOADER_GELI_SUPPORT
+	    struct keybuf	*keybuf;
+#else
+	    void		*keybuf;
+#endif
+	};
+    };
 };
 
 int	zfs_parsedev(struct zfs_devdesc *dev, const char *devspec,
 		     const char **path);
 char	*zfs_fmtdev(void *vdev);
 int	zfs_probe_dev(const char *devname, uint64_t *pool_guid);
 int	zfs_list(const char *name);
 void	init_zfs_bootenv(char *currdev);
 int	zfs_bootenv(const char *name);
 int	zfs_belist_add(const char *name, uint64_t __unused);
 int	zfs_set_env(void);
 
 extern struct devsw zfs_dev;
 extern struct fs_ops zfs_fsops;
 
 #endif /*_BOOT_LIBZFS_H_*/
Index: stable/11/sys/boot/zfs/zfs.c
===================================================================
--- stable/11/sys/boot/zfs/zfs.c	(revision 329098)
+++ stable/11/sys/boot/zfs/zfs.c	(revision 329099)
@@ -1,907 +1,912 @@
 /*-
  * Copyright (c) 2007 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Stand-alone file reading package.
  */
 
 #include <sys/disk.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/queue.h>
 #include <part.h>
 #include <stddef.h>
 #include <stdarg.h>
 #include <string.h>
 #include <stand.h>
 #include <bootstrap.h>
 
 #include "libzfs.h"
 
 #include "zfsimpl.c"
 
 /* Define the range of indexes to be populated with ZFS Boot Environments */
 #define		ZFS_BE_FIRST	4
 #define		ZFS_BE_LAST	8
 
 static int	zfs_open(const char *path, struct open_file *f);
 static int	zfs_write(struct open_file *f, void *buf, size_t size, size_t *resid);
 static int	zfs_close(struct open_file *f);
 static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
 static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
 static int	zfs_stat(struct open_file *f, struct stat *sb);
 static int	zfs_readdir(struct open_file *f, struct dirent *d);
 
 struct devsw zfs_dev;
 
 struct fs_ops zfs_fsops = {
 	"zfs",
 	zfs_open,
 	zfs_close,
 	zfs_read,
 	zfs_write,
 	zfs_seek,
 	zfs_stat,
 	zfs_readdir
 };
 
 /*
  * In-core open file.
  */
 struct file {
 	off_t		f_seekp;	/* seek pointer */
 	dnode_phys_t	f_dnode;
 	uint64_t	f_zap_type;	/* zap type for readdir */
 	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
 	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
 };
 
 static int	zfs_env_index;
 static int	zfs_env_count;
 
 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
 struct zfs_be_list *zfs_be_headp;
 struct zfs_be_entry {
 	const char *name;
 	SLIST_ENTRY(zfs_be_entry) entries;
 } *zfs_be, *zfs_be_tmp;
 
 /*
  * Open a file.
  */
 static int
 zfs_open(const char *upath, struct open_file *f)
 {
 	struct zfsmount *mount = (struct zfsmount *)f->f_devdata;
 	struct file *fp;
 	int rc;
 
 	if (f->f_dev != &zfs_dev)
 		return (EINVAL);
 
 	/* allocate file system specific data structure */
 	fp = malloc(sizeof(struct file));
 	bzero(fp, sizeof(struct file));
 	f->f_fsdata = (void *)fp;
 
 	rc = zfs_lookup(mount, upath, &fp->f_dnode);
 	fp->f_seekp = 0;
 	if (rc) {
 		f->f_fsdata = NULL;
 		free(fp);
 	}
 	return (rc);
 }
 
 static int
 zfs_close(struct open_file *f)
 {
 	struct file *fp = (struct file *)f->f_fsdata;
 
 	dnode_cache_obj = NULL;
 	f->f_fsdata = (void *)0;
 	if (fp == (struct file *)0)
 		return (0);
 
 	free(fp);
 	return (0);
 }
 
 /*
  * Copy a portion of a file into kernel memory.
  * Cross block boundaries when necessary.
  */
 static int
 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
 {
 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
 	struct file *fp = (struct file *)f->f_fsdata;
 	struct stat sb;
 	size_t n;
 	int rc;
 
 	rc = zfs_stat(f, &sb);
 	if (rc)
 		return (rc);
 	n = size;
 	if (fp->f_seekp + n > sb.st_size)
 		n = sb.st_size - fp->f_seekp;
 
 	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
 	if (rc)
 		return (rc);
 
 	if (0) {
 	    int i;
 	    for (i = 0; i < n; i++)
 		putchar(((char*) start)[i]);
 	}
 	fp->f_seekp += n;
 	if (resid)
 		*resid = size - n;
 
 	return (0);
 }
 
 /*
  * Don't be silly - the bootstrap has no business writing anything.
  */
 static int
 zfs_write(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
 {
 
 	return (EROFS);
 }
 
 static off_t
 zfs_seek(struct open_file *f, off_t offset, int where)
 {
 	struct file *fp = (struct file *)f->f_fsdata;
 
 	switch (where) {
 	case SEEK_SET:
 		fp->f_seekp = offset;
 		break;
 	case SEEK_CUR:
 		fp->f_seekp += offset;
 		break;
 	case SEEK_END:
 	    {
 		struct stat sb;
 		int error;
 
 		error = zfs_stat(f, &sb);
 		if (error != 0) {
 			errno = error;
 			return (-1);
 		}
 		fp->f_seekp = sb.st_size - offset;
 		break;
 	    }
 	default:
 		errno = EINVAL;
 		return (-1);
 	}
 	return (fp->f_seekp);
 }
 
 static int
 zfs_stat(struct open_file *f, struct stat *sb)
 {
 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
 	struct file *fp = (struct file *)f->f_fsdata;
 
 	return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
 }
 
 static int
 zfs_readdir(struct open_file *f, struct dirent *d)
 {
 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
 	struct file *fp = (struct file *)f->f_fsdata;
 	mzap_ent_phys_t mze;
 	struct stat sb;
 	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
 	int rc;
 
 	rc = zfs_stat(f, &sb);
 	if (rc)
 		return (rc);
 	if (!S_ISDIR(sb.st_mode))
 		return (ENOTDIR);
 
 	/*
 	 * If this is the first read, get the zap type.
 	 */
 	if (fp->f_seekp == 0) {
 		rc = dnode_read(spa, &fp->f_dnode,
 				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
 		if (rc)
 			return (rc);
 
 		if (fp->f_zap_type == ZBT_MICRO) {
 			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
 		} else {
 			rc = dnode_read(spa, &fp->f_dnode,
 					offsetof(zap_phys_t, zap_num_leafs),
 					&fp->f_num_leafs,
 					sizeof(fp->f_num_leafs));
 			if (rc)
 				return (rc);
 
 			fp->f_seekp = bsize;
 			fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize);
 			rc = dnode_read(spa, &fp->f_dnode,
 					fp->f_seekp,
 					fp->f_zap_leaf,
 					bsize);
 			if (rc)
 				return (rc);
 		}
 	}
 
 	if (fp->f_zap_type == ZBT_MICRO) {
 	mzap_next:
 		if (fp->f_seekp >= bsize)
 			return (ENOENT);
 
 		rc = dnode_read(spa, &fp->f_dnode,
 				fp->f_seekp, &mze, sizeof(mze));
 		if (rc)
 			return (rc);
 		fp->f_seekp += sizeof(mze);
 
 		if (!mze.mze_name[0])
 			goto mzap_next;
 
 		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
 		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
 		strcpy(d->d_name, mze.mze_name);
 		d->d_namlen = strlen(d->d_name);
 		return (0);
 	} else {
 		zap_leaf_t zl;
 		zap_leaf_chunk_t *zc, *nc;
 		int chunk;
 		size_t namelen;
 		char *p;
 		uint64_t value;
 
 		/*
 		 * Initialise this so we can use the ZAP size
 		 * calculating macros.
 		 */
 		zl.l_bs = ilog2(bsize);
 		zl.l_phys = fp->f_zap_leaf;
 
 		/*
 		 * Figure out which chunk we are currently looking at
 		 * and consider seeking to the next leaf. We use the
 		 * low bits of f_seekp as a simple chunk index.
 		 */
 	fzap_next:
 		chunk = fp->f_seekp & (bsize - 1);
 		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
 			fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize;
 			chunk = 0;
 
 			/*
 			 * Check for EOF and read the new leaf.
 			 */
 			if (fp->f_seekp >= bsize * fp->f_num_leafs)
 				return (ENOENT);
 
 			rc = dnode_read(spa, &fp->f_dnode,
 					fp->f_seekp,
 					fp->f_zap_leaf,
 					bsize);
 			if (rc)
 				return (rc);
 		}
 
 		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
 		fp->f_seekp++;
 		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
 			goto fzap_next;
 
 		namelen = zc->l_entry.le_name_numints;
 		if (namelen > sizeof(d->d_name))
 			namelen = sizeof(d->d_name);
 
 		/*
 		 * Paste the name back together.
 		 */
 		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
 		p = d->d_name;
 		while (namelen > 0) {
 			int len;
 			len = namelen;
 			if (len > ZAP_LEAF_ARRAY_BYTES)
 				len = ZAP_LEAF_ARRAY_BYTES;
 			memcpy(p, nc->l_array.la_array, len);
 			p += len;
 			namelen -= len;
 			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
 		}
 		d->d_name[sizeof(d->d_name) - 1] = 0;
 
 		/*
 		 * Assume the first eight bytes of the value are
 		 * a uint64_t.
 		 */
 		value = fzap_leaf_value(&zl, zc);
 
 		d->d_fileno = ZFS_DIRENT_OBJ(value);
 		d->d_type = ZFS_DIRENT_TYPE(value);
 		d->d_namlen = strlen(d->d_name);
 
 		return (0);
 	}
 }
 
 static int
 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t size)
 {
 	int fd;
 
 	fd = (uintptr_t) priv;
 	lseek(fd, offset, SEEK_SET);
 	if (read(fd, buf, size) == size) {
 		return 0;
 	} else {
 		return (EIO);
 	}
 }
 
 static int
 zfs_dev_init(void)
 {
 	spa_t *spa;
 	spa_t *next;
 	spa_t *prev;
 
 	zfs_init();
 	if (archsw.arch_zfs_probe == NULL)
 		return (ENXIO);
 	archsw.arch_zfs_probe();
 
 	prev = NULL;
 	spa = STAILQ_FIRST(&zfs_pools);
 	while (spa != NULL) {
 		next = STAILQ_NEXT(spa, spa_link);
 		if (zfs_spa_init(spa)) {
 			if (prev == NULL)
 				STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
 			else
 				STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
 		} else
 			prev = spa;
 		spa = next;
 	}
 	return (0);
 }
 
 struct zfs_probe_args {
 	int		fd;
 	const char	*devname;
 	uint64_t	*pool_guid;
 	u_int		secsz;
 };
 
 static int
-zfs_diskread(void *arg, void *buf, size_t blocks, off_t offset)
+zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset)
 {
 	struct zfs_probe_args *ppa;
 
 	ppa = (struct zfs_probe_args *)arg;
 	return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
 	    offset * ppa->secsz, buf, blocks * ppa->secsz));
 }
 
 static int
 zfs_probe(int fd, uint64_t *pool_guid)
 {
 	spa_t *spa;
 	int ret;
 
 	ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa);
 	if (ret == 0 && pool_guid != NULL)
 		*pool_guid = spa->spa_guid;
 	return (ret);
 }
 
 static int
 zfs_probe_partition(void *arg, const char *partname,
     const struct ptable_entry *part)
 {
 	struct zfs_probe_args *ppa, pa;
 	struct ptable *table;
 	char devname[32];
 	int ret;
 
 	/* Probe only freebsd-zfs and freebsd partitions */
 	if (part->type != PART_FREEBSD &&
 	    part->type != PART_FREEBSD_ZFS)
 		return (0);
 
 	ppa = (struct zfs_probe_args *)arg;
 	strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
 	devname[strlen(ppa->devname) - 1] = '\0';
 	sprintf(devname, "%s%s:", devname, partname);
 	pa.fd = open(devname, O_RDONLY);
 	if (pa.fd == -1)
 		return (0);
 	ret = zfs_probe(pa.fd, ppa->pool_guid);
 	if (ret == 0)
 		return (0);
 	/* Do we have BSD label here? */
 	if (part->type == PART_FREEBSD) {
 		pa.devname = devname;
 		pa.pool_guid = ppa->pool_guid;
 		pa.secsz = ppa->secsz;
 		table = ptable_open(&pa, part->end - part->start + 1,
 		    ppa->secsz, zfs_diskread);
 		if (table != NULL) {
 			ptable_iterate(table, &pa, zfs_probe_partition);
 			ptable_close(table);
 		}
 	}
 	close(pa.fd);
 	return (0);
 }
 
 int
 zfs_probe_dev(const char *devname, uint64_t *pool_guid)
 {
 	struct ptable *table;
 	struct zfs_probe_args pa;
-	off_t mediasz;
+	uint64_t mediasz;
 	int ret;
 
+	if (pool_guid)
+		*pool_guid = 0;
 	pa.fd = open(devname, O_RDONLY);
 	if (pa.fd == -1)
 		return (ENXIO);
 	/* Probe the whole disk */
 	ret = zfs_probe(pa.fd, pool_guid);
 	if (ret == 0)
 		return (0);
+
 	/* Probe each partition */
 	ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
 	if (ret == 0)
 		ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
 	if (ret == 0) {
 		pa.devname = devname;
 		pa.pool_guid = pool_guid;
 		table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
 		    zfs_diskread);
 		if (table != NULL) {
 			ptable_iterate(table, &pa, zfs_probe_partition);
 			ptable_close(table);
 		}
 	}
 	close(pa.fd);
+	if (pool_guid && *pool_guid == 0)
+		ret = ENXIO;
 	return (ret);
 }
 
 /*
  * Print information about ZFS pools
  */
 static int
 zfs_dev_print(int verbose)
 {
 	spa_t *spa;
 	char line[80];
 	int ret = 0;
 
 	if (STAILQ_EMPTY(&zfs_pools))
 		return (0);
 
 	printf("%s devices:", zfs_dev.dv_name);
 	if ((ret = pager_output("\n")) != 0)
 		return (ret);
 
 	if (verbose) {
 		return (spa_all_status());
 	}
 	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
 		snprintf(line, sizeof(line), "    zfs:%s\n", spa->spa_name);
 		ret = pager_output(line);
 		if (ret != 0)
 			break;
 	}
 	return (ret);
 }
 
 /*
  * Attempt to open the pool described by (dev) for use by (f).
  */
 static int
 zfs_dev_open(struct open_file *f, ...)
 {
 	va_list		args;
 	struct zfs_devdesc	*dev;
 	struct zfsmount	*mount;
 	spa_t		*spa;
 	int		rv;
 
 	va_start(args, f);
 	dev = va_arg(args, struct zfs_devdesc *);
 	va_end(args);
 
 	if (dev->pool_guid == 0)
 		spa = STAILQ_FIRST(&zfs_pools);
 	else
 		spa = spa_find_by_guid(dev->pool_guid);
 	if (!spa)
 		return (ENXIO);
 	mount = malloc(sizeof(*mount));
 	rv = zfs_mount(spa, dev->root_guid, mount);
 	if (rv != 0) {
 		free(mount);
 		return (rv);
 	}
 	if (mount->objset.os_type != DMU_OST_ZFS) {
 		printf("Unexpected object set type %ju\n",
 		    (uintmax_t)mount->objset.os_type);
 		free(mount);
 		return (EIO);
 	}
 	f->f_devdata = mount;
 	free(dev);
 	return (0);
 }
 
 static int
 zfs_dev_close(struct open_file *f)
 {
 
 	free(f->f_devdata);
 	f->f_devdata = NULL;
 	return (0);
 }
 
 static int
 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
 {
 
 	return (ENOSYS);
 }
 
 struct devsw zfs_dev = {
 	.dv_name = "zfs",
 	.dv_type = DEVT_ZFS,
 	.dv_init = zfs_dev_init,
 	.dv_strategy = zfs_dev_strategy,
 	.dv_open = zfs_dev_open,
 	.dv_close = zfs_dev_close,
 	.dv_ioctl = noioctl,
 	.dv_print = zfs_dev_print,
 	.dv_cleanup = NULL
 };
 
 int
 zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path)
 {
 	static char	rootname[ZFS_MAXNAMELEN];
 	static char	poolname[ZFS_MAXNAMELEN];
 	spa_t		*spa;
 	const char	*end;
 	const char	*np;
 	const char	*sep;
 	int		rv;
 
 	np = devspec;
 	if (*np != ':')
 		return (EINVAL);
 	np++;
 	end = strchr(np, ':');
 	if (end == NULL)
 		return (EINVAL);
 	sep = strchr(np, '/');
 	if (sep == NULL || sep >= end)
 		sep = end;
 	memcpy(poolname, np, sep - np);
 	poolname[sep - np] = '\0';
 	if (sep < end) {
 		sep++;
 		memcpy(rootname, sep, end - sep);
 		rootname[end - sep] = '\0';
 	}
 	else
 		rootname[0] = '\0';
 
 	spa = spa_find_by_name(poolname);
 	if (!spa)
 		return (ENXIO);
 	dev->pool_guid = spa->spa_guid;
 	rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
 	if (rv != 0)
 		return (rv);
 	if (path != NULL)
 		*path = (*end == '\0') ? end : end + 1;
 	dev->d_dev = &zfs_dev;
 	dev->d_type = zfs_dev.dv_type;
 	return (0);
 }
 
 char *
 zfs_fmtdev(void *vdev)
 {
 	static char		rootname[ZFS_MAXNAMELEN];
 	static char		buf[2 * ZFS_MAXNAMELEN + 8];
 	struct zfs_devdesc	*dev = (struct zfs_devdesc *)vdev;
 	spa_t			*spa;
 
 	buf[0] = '\0';
 	if (dev->d_type != DEVT_ZFS)
 		return (buf);
 
 	if (dev->pool_guid == 0) {
 		spa = STAILQ_FIRST(&zfs_pools);
 		dev->pool_guid = spa->spa_guid;
 	} else
 		spa = spa_find_by_guid(dev->pool_guid);
 	if (spa == NULL) {
 		printf("ZFS: can't find pool by guid\n");
 		return (buf);
 	}
 	if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
 		printf("ZFS: can't find root filesystem\n");
 		return (buf);
 	}
 	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
 		printf("ZFS: can't find filesystem by guid\n");
 		return (buf);
 	}
 
 	if (rootname[0] == '\0')
 		sprintf(buf, "%s:%s:", dev->d_dev->dv_name, spa->spa_name);
 	else
 		sprintf(buf, "%s:%s/%s:", dev->d_dev->dv_name, spa->spa_name,
 		    rootname);
 	return (buf);
 }
 
 int
 zfs_list(const char *name)
 {
 	static char	poolname[ZFS_MAXNAMELEN];
 	uint64_t	objid;
 	spa_t		*spa;
 	const char	*dsname;
 	int		len;
 	int		rv;
 
 	len = strlen(name);
 	dsname = strchr(name, '/');
 	if (dsname != NULL) {
 		len = dsname - name;
 		dsname++;
 	} else
 		dsname = "";
 	memcpy(poolname, name, len);
 	poolname[len] = '\0';
 
 	spa = spa_find_by_name(poolname);
 	if (!spa)
 		return (ENXIO);
 	rv = zfs_lookup_dataset(spa, dsname, &objid);
 	if (rv != 0)
 		return (rv);
 
 	return (zfs_list_dataset(spa, objid));
 }
 
 void
 init_zfs_bootenv(char *currdev)
 {
 	char *beroot;
 
 	if (strlen(currdev) == 0)
 		return;
 	if(strncmp(currdev, "zfs:", 4) != 0)
 		return;
 	/* Remove the trailing : */
 	currdev[strlen(currdev) - 1] = '\0';
 	setenv("zfs_be_active", currdev, 1);
 	setenv("zfs_be_currpage", "1", 1);
 	/* Forward past zfs: */
 	currdev = strchr(currdev, ':');
 	currdev++;
 	/* Remove the last element (current bootenv) */
 	beroot = strrchr(currdev, '/');
 	if (beroot != NULL)
 		beroot[0] = '\0';
 	beroot = currdev;
 	setenv("zfs_be_root", beroot, 1);
 }
 
 int
 zfs_bootenv(const char *name)
 {
 	static char	poolname[ZFS_MAXNAMELEN], *dsname, *root;
 	char		becount[4];
 	uint64_t	objid;
 	spa_t		*spa;
 	int		len, rv, pages, perpage, currpage;
 
 	if (name == NULL)
 		return (EINVAL);
 	if ((root = getenv("zfs_be_root")) == NULL)
 		return (EINVAL);
 
 	if (strcmp(name, root) != 0) {
 		if (setenv("zfs_be_root", name, 1) != 0)
 			return (ENOMEM);
 	}
 
 	SLIST_INIT(&zfs_be_head);
 	zfs_env_count = 0;
 	len = strlen(name);
 	dsname = strchr(name, '/');
 	if (dsname != NULL) {
 		len = dsname - name;
 		dsname++;
 	} else
 		dsname = "";
 	memcpy(poolname, name, len);
 	poolname[len] = '\0';
 
 	spa = spa_find_by_name(poolname);
 	if (!spa)
 		return (ENXIO);
 	rv = zfs_lookup_dataset(spa, dsname, &objid);
 	if (rv != 0)
 		return (rv);
 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
 
 	/* Calculate and store the number of pages of BEs */
 	perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1);
 	pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0);
 	snprintf(becount, 4, "%d", pages);
 	if (setenv("zfs_be_pages", becount, 1) != 0)
 		return (ENOMEM);
 
 	/* Roll over the page counter if it has exceeded the maximum */
 	currpage = strtol(getenv("zfs_be_currpage"), NULL, 10);
 	if (currpage > pages) {
 		if (setenv("zfs_be_currpage", "1", 1) != 0)
 			return (ENOMEM);
 	}
 
 	/* Populate the menu environment variables */
 	zfs_set_env();
 
 	/* Clean up the SLIST of ZFS BEs */
 	while (!SLIST_EMPTY(&zfs_be_head)) {
 		zfs_be = SLIST_FIRST(&zfs_be_head);
 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
 		free(zfs_be);
 	}
 
 	return (rv);
 }
 
 int
 zfs_belist_add(const char *name, uint64_t value __unused)
 {
 
 	/* Skip special datasets that start with a $ character */
 	if (strncmp(name, "$", 1) == 0) {
 		return (0);
 	}
 	/* Add the boot environment to the head of the SLIST */
 	zfs_be = malloc(sizeof(struct zfs_be_entry));
 	if (zfs_be == NULL) {
 		return (ENOMEM);
 	}
 	zfs_be->name = name;
 	SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries);
 	zfs_env_count++;
 
 	return (0);
 }
 
 int
 zfs_set_env(void)
 {
 	char envname[32], envval[256];
 	char *beroot, *pagenum;
 	int rv, page, ctr;
 
 	beroot = getenv("zfs_be_root");
 	if (beroot == NULL) {
 		return (1);
 	}
 
 	pagenum = getenv("zfs_be_currpage");
 	if (pagenum != NULL) {
 		page = strtol(pagenum, NULL, 10);
 	} else {
 		page = 1;
 	}
 
 	ctr = 1;
 	rv = 0;
 	zfs_env_index = ZFS_BE_FIRST;
 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
 		/* Skip to the requested page number */
 		if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) {
 			ctr++;
 			continue;
 		}
 		
 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
 		snprintf(envval, sizeof(envval), "%s", zfs_be->name);
 		rv = setenv(envname, envval, 1);
 		if (rv != 0) {
 			break;
 		}
 
 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
 		rv = setenv(envname, envval, 1);
 		if (rv != 0){
 			break;
 		}
 
 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
 		rv = setenv(envname, "set_bootenv", 1);
 		if (rv != 0){
 			break;
 		}
 
 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
 		snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name);
 		rv = setenv(envname, envval, 1);
 		if (rv != 0){
 			break;
 		}
 
 		zfs_env_index++;
 		if (zfs_env_index > ZFS_BE_LAST) {
 			break;
 		}
 
 	}
 	
 	for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) {
 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
 		(void)unsetenv(envname);
 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
 		(void)unsetenv(envname);
 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
 		(void)unsetenv(envname);
 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
 		(void)unsetenv(envname);
 	}
 
 	return (rv);
 }
Index: stable/11/sys/crypto/intake.h
===================================================================
--- stable/11/sys/crypto/intake.h	(nonexistent)
+++ stable/11/sys/crypto/intake.h	(revision 329099)
@@ -0,0 +1,64 @@
+/*-
+ * Copyright (c) 2016 Eric McCorkle
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _INTAKE_H_
+#define _INTAKE_H_
+
+#include  <sys/param.h>
+
+/*
+ * This file provides an interface for providing keys to the kernel
+ * during boot time.
+ */
+
+#define MAX_KEY_BITS	4096
+#define	MAX_KEY_BYTES	(MAX_KEY_BITS / NBBY)
+
+#define KEYBUF_SENTINEL	0xcee54b5d	/* KEYS4BSD */
+
+enum {
+        KEYBUF_TYPE_NONE,
+        KEYBUF_TYPE_GELI
+};
+
+struct keybuf_ent {
+        unsigned int ke_type;
+        char ke_data[MAX_KEY_BYTES];
+};
+
+struct keybuf {
+        unsigned int kb_nents;
+        struct keybuf_ent kb_ents[];
+};
+
+#ifdef _KERNEL
+/* Get the key intake buffer */
+extern struct keybuf* get_keybuf(void);
+#endif
+
+#endif

Property changes on: stable/11/sys/crypto/intake.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: stable/11/sys/geom/eli/g_eli.c
===================================================================
--- stable/11/sys/geom/eli/g_eli.c	(revision 329098)
+++ stable/11/sys/geom/eli/g_eli.c	(revision 329099)
@@ -1,1270 +1,1316 @@
 /*-
  * Copyright (c) 2005-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/cons.h>
 #include <sys/kernel.h>
 #include <sys/linker.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/eventhandler.h>
 #include <sys/kthread.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/uio.h>
 #include <sys/vnode.h>
 
 #include <vm/uma.h>
 
 #include <geom/geom.h>
 #include <geom/eli/g_eli.h>
 #include <geom/eli/pkcs5v2.h>
 
+#include <crypto/intake.h>
+
 FEATURE(geom_eli, "GEOM crypto module");
 
 MALLOC_DEFINE(M_ELI, "eli data", "GEOM_ELI Data");
 
 SYSCTL_DECL(_kern_geom);
 SYSCTL_NODE(_kern_geom, OID_AUTO, eli, CTLFLAG_RW, 0, "GEOM_ELI stuff");
 static int g_eli_version = G_ELI_VERSION;
 SYSCTL_INT(_kern_geom_eli, OID_AUTO, version, CTLFLAG_RD, &g_eli_version, 0,
     "GELI version");
 int g_eli_debug = 0;
 SYSCTL_INT(_kern_geom_eli, OID_AUTO, debug, CTLFLAG_RWTUN, &g_eli_debug, 0,
     "Debug level");
 static u_int g_eli_tries = 3;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, tries, CTLFLAG_RWTUN, &g_eli_tries, 0,
     "Number of tries for entering the passphrase");
 static u_int g_eli_visible_passphrase = GETS_NOECHO;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, visible_passphrase, CTLFLAG_RWTUN,
     &g_eli_visible_passphrase, 0,
     "Visibility of passphrase prompt (0 = invisible, 1 = visible, 2 = asterisk)");
 u_int g_eli_overwrites = G_ELI_OVERWRITES;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, overwrites, CTLFLAG_RWTUN, &g_eli_overwrites,
     0, "Number of times on-disk keys should be overwritten when destroying them");
 static u_int g_eli_threads = 0;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, threads, CTLFLAG_RWTUN, &g_eli_threads, 0,
     "Number of threads doing crypto work");
 u_int g_eli_batch = 0;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, batch, CTLFLAG_RWTUN, &g_eli_batch, 0,
     "Use crypto operations batching");
 
 /*
  * Passphrase cached during boot, in order to be more user-friendly if
  * there are multiple providers using the same passphrase.
  */
 static char cached_passphrase[256];
 static u_int g_eli_boot_passcache = 1;
 TUNABLE_INT("kern.geom.eli.boot_passcache", &g_eli_boot_passcache);
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, boot_passcache, CTLFLAG_RD,
     &g_eli_boot_passcache, 0,
     "Passphrases are cached during boot process for possible reuse");
 static void
 fetch_loader_passphrase(void * dummy)
 {
 	char * env_passphrase;
 
 	KASSERT(dynamic_kenv, ("need dynamic kenv"));
 
 	if ((env_passphrase = kern_getenv("kern.geom.eli.passphrase")) != NULL) {
 		/* Extract passphrase from the environment. */
 		strlcpy(cached_passphrase, env_passphrase,
 		    sizeof(cached_passphrase));
 		freeenv(env_passphrase);
 
 		/* Wipe the passphrase from the environment. */
 		kern_unsetenv("kern.geom.eli.passphrase");
 	}
 }
 SYSINIT(geli_fetch_loader_passphrase, SI_SUB_KMEM + 1, SI_ORDER_ANY,
     fetch_loader_passphrase, NULL);
+
 static void
-zero_boot_passcache(void * dummy)
+zero_boot_passcache(void)
 {
 
-	memset(cached_passphrase, 0, sizeof(cached_passphrase));
+        explicit_bzero(cached_passphrase, sizeof(cached_passphrase));
 }
-EVENTHANDLER_DEFINE(mountroot, zero_boot_passcache, NULL, 0);
 
+static void
+zero_geli_intake_keys(void)
+{
+        struct keybuf *keybuf;
+        int i;
+
+        if ((keybuf = get_keybuf()) != NULL) {
+                /* Scan the key buffer, clear all GELI keys. */
+                for (i = 0; i < keybuf->kb_nents; i++) {
+                         if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) {
+                                 explicit_bzero(keybuf->kb_ents[i].ke_data,
+                                     sizeof(keybuf->kb_ents[i].ke_data));
+                                 keybuf->kb_ents[i].ke_type = KEYBUF_TYPE_NONE;
+                         }
+                }
+        }
+}
+
+static void
+zero_intake_passcache(void *dummy)
+{
+        zero_boot_passcache();
+        zero_geli_intake_keys();
+}
+EVENTHANDLER_DEFINE(mountroot, zero_intake_passcache, NULL, 0);
+
 static eventhandler_tag g_eli_pre_sync = NULL;
 
 static int g_eli_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static void g_eli_init(struct g_class *mp);
 static void g_eli_fini(struct g_class *mp);
 
 static g_taste_t g_eli_taste;
 static g_dumpconf_t g_eli_dumpconf;
 
 struct g_class g_eli_class = {
 	.name = G_ELI_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_eli_config,
 	.taste = g_eli_taste,
 	.destroy_geom = g_eli_destroy_geom,
 	.init = g_eli_init,
 	.fini = g_eli_fini
 };
 
 
 /*
  * Code paths:
  * BIO_READ:
  *	g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  * BIO_WRITE:
  *	g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 
 
 /*
  * EAGAIN from crypto(9) means, that we were probably balanced to another crypto
  * accelerator or something like this.
  * The function updates the SID and rerun the operation.
  */
 int
 g_eli_crypto_rerun(struct cryptop *crp)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_worker *wr;
 	struct bio *bp;
 	int error;
 
 	bp = (struct bio *)crp->crp_opaque;
 	sc = bp->bio_to->geom->softc;
 	LIST_FOREACH(wr, &sc->sc_workers, w_next) {
 		if (wr->w_number == bp->bio_pflags)
 			break;
 	}
 	KASSERT(wr != NULL, ("Invalid worker (%u).", bp->bio_pflags));
 	G_ELI_DEBUG(1, "Rerunning crypto %s request (sid: %ju -> %ju).",
 	    bp->bio_cmd == BIO_READ ? "READ" : "WRITE", (uintmax_t)wr->w_sid,
 	    (uintmax_t)crp->crp_sid);
 	wr->w_sid = crp->crp_sid;
 	crp->crp_etype = 0;
 	error = crypto_dispatch(crp);
 	if (error == 0)
 		return (0);
 	G_ELI_DEBUG(1, "%s: crypto_dispatch() returned %d.", __func__, error);
 	crp->crp_etype = error;
 	return (error);
 }
 
 /*
  * The function is called afer reading encrypted data from the provider.
  *
  * g_eli_start -> g_eli_crypto_read -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  */
 void
 g_eli_read_done(struct bio *bp)
 {
 	struct g_eli_softc *sc;
 	struct bio *pbp;
 
 	G_ELI_LOGREQ(2, bp, "Request done.");
 	pbp = bp->bio_parent;
 	if (pbp->bio_error == 0 && bp->bio_error != 0)
 		pbp->bio_error = bp->bio_error;
 	g_destroy_bio(bp);
 	/*
 	 * Do we have all sectors already?
 	 */
 	pbp->bio_inbed++;
 	if (pbp->bio_inbed < pbp->bio_children)
 		return;
 	sc = pbp->bio_to->geom->softc;
 	if (pbp->bio_error != 0) {
 		G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__,
 		    pbp->bio_error);
 		pbp->bio_completed = 0;
 		if (pbp->bio_driver2 != NULL) {
 			free(pbp->bio_driver2, M_ELI);
 			pbp->bio_driver2 = NULL;
 		}
 		g_io_deliver(pbp, pbp->bio_error);
 		atomic_subtract_int(&sc->sc_inflight, 1);
 		return;
 	}
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_tail(&sc->sc_queue, pbp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 }
 
 /*
  * The function is called after we encrypt and write data.
  *
  * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> G_ELI_WRITE_DONE -> g_io_deliver
  */
 void
 g_eli_write_done(struct bio *bp)
 {
 	struct g_eli_softc *sc;
 	struct bio *pbp;
 
 	G_ELI_LOGREQ(2, bp, "Request done.");
 	pbp = bp->bio_parent;
 	if (pbp->bio_error == 0 && bp->bio_error != 0)
 		pbp->bio_error = bp->bio_error;
 	g_destroy_bio(bp);
 	/*
 	 * Do we have all sectors already?
 	 */
 	pbp->bio_inbed++;
 	if (pbp->bio_inbed < pbp->bio_children)
 		return;
 	free(pbp->bio_driver2, M_ELI);
 	pbp->bio_driver2 = NULL;
 	if (pbp->bio_error != 0) {
 		G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__,
 		    pbp->bio_error);
 		pbp->bio_completed = 0;
 	} else
 		pbp->bio_completed = pbp->bio_length;
 
 	/*
 	 * Write is finished, send it up.
 	 */
 	sc = pbp->bio_to->geom->softc;
 	g_io_deliver(pbp, pbp->bio_error);
 	atomic_subtract_int(&sc->sc_inflight, 1);
 }
 
 /*
  * This function should never be called, but GEOM made as it set ->orphan()
  * method for every geom.
  */
 static void
 g_eli_orphan_spoil_assert(struct g_consumer *cp)
 {
 
 	panic("Function %s() called for %s.", __func__, cp->geom->name);
 }
 
 static void
 g_eli_orphan(struct g_consumer *cp)
 {
 	struct g_eli_softc *sc;
 
 	g_topology_assert();
 	sc = cp->geom->softc;
 	if (sc == NULL)
 		return;
 	g_eli_destroy(sc, TRUE);
 }
 
 /*
  * BIO_READ:
  *	G_ELI_START -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  * BIO_WRITE:
  *	G_ELI_START -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 static void
 g_eli_start(struct bio *bp)
 {
 	struct g_eli_softc *sc;
 	struct g_consumer *cp;
 	struct bio *cbp;
 
 	sc = bp->bio_to->geom->softc;
 	KASSERT(sc != NULL,
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 	G_ELI_LOGREQ(2, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_GETATTR:
 	case BIO_FLUSH:
 	case BIO_ZONE:
 		break;
 	case BIO_DELETE:
 		/*
 		 * If the user hasn't set the NODELETE flag, we just pass
 		 * it down the stack and let the layers beneath us do (or
 		 * not) whatever they do with it.  If they have, we
 		 * reject it.  A possible extension would be an
 		 * additional flag to take it as a hint to shred the data
 		 * with [multiple?] overwrites.
 		 */
 		if (!(sc->sc_flags & G_ELI_FLAG_NODELETE))
 			break;
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	bp->bio_driver1 = cbp;
 	bp->bio_pflags = G_ELI_NEW_BIO;
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) {
 			g_eli_crypto_read(sc, bp, 0);
 			break;
 		}
 		/* FALLTHROUGH */
 	case BIO_WRITE:
 		mtx_lock(&sc->sc_queue_mtx);
 		bioq_insert_tail(&sc->sc_queue, bp);
 		mtx_unlock(&sc->sc_queue_mtx);
 		wakeup(sc);
 		break;
 	case BIO_GETATTR:
 	case BIO_FLUSH:
 	case BIO_DELETE:
 	case BIO_ZONE:
 		cbp->bio_done = g_std_done;
 		cp = LIST_FIRST(&sc->sc_geom->consumer);
 		cbp->bio_to = cp->provider;
 		G_ELI_LOGREQ(2, cbp, "Sending request.");
 		g_io_request(cbp, cp);
 		break;
 	}
 }
 
 static int
 g_eli_newsession(struct g_eli_worker *wr)
 {
 	struct g_eli_softc *sc;
 	struct cryptoini crie, cria;
 	int error;
 
 	sc = wr->w_softc;
 
 	bzero(&crie, sizeof(crie));
 	crie.cri_alg = sc->sc_ealgo;
 	crie.cri_klen = sc->sc_ekeylen;
 	if (sc->sc_ealgo == CRYPTO_AES_XTS)
 		crie.cri_klen <<= 1;
 	if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) {
 		crie.cri_key = g_eli_key_hold(sc, 0,
 		    LIST_FIRST(&sc->sc_geom->consumer)->provider->sectorsize);
 	} else {
 		crie.cri_key = sc->sc_ekey;
 	}
 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
 		bzero(&cria, sizeof(cria));
 		cria.cri_alg = sc->sc_aalgo;
 		cria.cri_klen = sc->sc_akeylen;
 		cria.cri_key = sc->sc_akey;
 		crie.cri_next = &cria;
 	}
 
 	switch (sc->sc_crypto) {
 	case G_ELI_CRYPTO_SW:
 		error = crypto_newsession(&wr->w_sid, &crie,
 		    CRYPTOCAP_F_SOFTWARE);
 		break;
 	case G_ELI_CRYPTO_HW:
 		error = crypto_newsession(&wr->w_sid, &crie,
 		    CRYPTOCAP_F_HARDWARE);
 		break;
 	case G_ELI_CRYPTO_UNKNOWN:
 		error = crypto_newsession(&wr->w_sid, &crie,
 		    CRYPTOCAP_F_HARDWARE);
 		if (error == 0) {
 			mtx_lock(&sc->sc_queue_mtx);
 			if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN)
 				sc->sc_crypto = G_ELI_CRYPTO_HW;
 			mtx_unlock(&sc->sc_queue_mtx);
 		} else {
 			error = crypto_newsession(&wr->w_sid, &crie,
 			    CRYPTOCAP_F_SOFTWARE);
 			mtx_lock(&sc->sc_queue_mtx);
 			if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN)
 				sc->sc_crypto = G_ELI_CRYPTO_SW;
 			mtx_unlock(&sc->sc_queue_mtx);
 		}
 		break;
 	default:
 		panic("%s: invalid condition", __func__);
 	}
 
 	if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0)
 		g_eli_key_drop(sc, crie.cri_key);
 
 	return (error);
 }
 
 static void
 g_eli_freesession(struct g_eli_worker *wr)
 {
 
 	crypto_freesession(wr->w_sid);
 }
 
 static void
 g_eli_cancel(struct g_eli_softc *sc)
 {
 	struct bio *bp;
 
 	mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
 
 	while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) {
 		KASSERT(bp->bio_pflags == G_ELI_NEW_BIO,
 		    ("Not new bio when canceling (bp=%p).", bp));
 		g_io_deliver(bp, ENXIO);
 	}
 }
 
 static struct bio *
 g_eli_takefirst(struct g_eli_softc *sc)
 {
 	struct bio *bp;
 
 	mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
 
 	if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND))
 		return (bioq_takefirst(&sc->sc_queue));
 	/*
 	 * Device suspended, so we skip new I/O requests.
 	 */
 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 		if (bp->bio_pflags != G_ELI_NEW_BIO)
 			break;
 	}
 	if (bp != NULL)
 		bioq_remove(&sc->sc_queue, bp);
 	return (bp);
 }
 
 /*
  * This is the main function for kernel worker thread when we don't have
  * hardware acceleration and we have to do cryptography in software.
  * Dedicated thread is needed, so we don't slow down g_up/g_down GEOM
  * threads with crypto work.
  */
 static void
 g_eli_worker(void *arg)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_worker *wr;
 	struct bio *bp;
 	int error;
 
 	wr = arg;
 	sc = wr->w_softc;
 #ifdef EARLY_AP_STARTUP
 	MPASS(!sc->sc_cpubind || smp_started);
 #elif defined(SMP)
 	/* Before sched_bind() to a CPU, wait for all CPUs to go on-line. */
 	if (sc->sc_cpubind) {
 		while (!smp_started)
 			tsleep(wr, 0, "geli:smp", hz / 4);
 	}
 #endif
 	thread_lock(curthread);
 	sched_prio(curthread, PUSER);
 	if (sc->sc_cpubind)
 		sched_bind(curthread, wr->w_number % mp_ncpus);
 	thread_unlock(curthread);
 
 	G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm);
 
 	for (;;) {
 		mtx_lock(&sc->sc_queue_mtx);
 again:
 		bp = g_eli_takefirst(sc);
 		if (bp == NULL) {
 			if (sc->sc_flags & G_ELI_FLAG_DESTROY) {
 				g_eli_cancel(sc);
 				LIST_REMOVE(wr, w_next);
 				g_eli_freesession(wr);
 				free(wr, M_ELI);
 				G_ELI_DEBUG(1, "Thread %s exiting.",
 				    curthread->td_proc->p_comm);
 				wakeup(&sc->sc_workers);
 				mtx_unlock(&sc->sc_queue_mtx);
 				kproc_exit(0);
 			}
 			while (sc->sc_flags & G_ELI_FLAG_SUSPEND) {
 				if (sc->sc_inflight > 0) {
 					G_ELI_DEBUG(0, "inflight=%d",
 					    sc->sc_inflight);
 					/*
 					 * We still have inflight BIOs, so
 					 * sleep and retry.
 					 */
 					msleep(sc, &sc->sc_queue_mtx, PRIBIO,
 					    "geli:inf", hz / 5);
 					goto again;
 				}
 				/*
 				 * Suspend requested, mark the worker as
 				 * suspended and go to sleep.
 				 */
 				if (wr->w_active) {
 					g_eli_freesession(wr);
 					wr->w_active = FALSE;
 				}
 				wakeup(&sc->sc_workers);
 				msleep(sc, &sc->sc_queue_mtx, PRIBIO,
 				    "geli:suspend", 0);
 				if (!wr->w_active &&
 				    !(sc->sc_flags & G_ELI_FLAG_SUSPEND)) {
 					error = g_eli_newsession(wr);
 					KASSERT(error == 0,
 					    ("g_eli_newsession() failed on resume (error=%d)",
 					    error));
 					wr->w_active = TRUE;
 				}
 				goto again;
 			}
 			msleep(sc, &sc->sc_queue_mtx, PDROP, "geli:w", 0);
 			continue;
 		}
 		if (bp->bio_pflags == G_ELI_NEW_BIO)
 			atomic_add_int(&sc->sc_inflight, 1);
 		mtx_unlock(&sc->sc_queue_mtx);
 		if (bp->bio_pflags == G_ELI_NEW_BIO) {
 			bp->bio_pflags = 0;
 			if (sc->sc_flags & G_ELI_FLAG_AUTH) {
 				if (bp->bio_cmd == BIO_READ)
 					g_eli_auth_read(sc, bp);
 				else
 					g_eli_auth_run(wr, bp);
 			} else {
 				if (bp->bio_cmd == BIO_READ)
 					g_eli_crypto_read(sc, bp, 1);
 				else
 					g_eli_crypto_run(wr, bp);
 			}
 		} else {
 			if (sc->sc_flags & G_ELI_FLAG_AUTH)
 				g_eli_auth_run(wr, bp);
 			else
 				g_eli_crypto_run(wr, bp);
 		}
 	}
 }
 
 int
 g_eli_read_metadata(struct g_class *mp, struct g_provider *pp,
     struct g_eli_metadata *md)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	u_char *buf = NULL;
 	int error;
 
 	g_topology_assert();
 
 	gp = g_new_geomf(mp, "eli:taste");
 	gp->start = g_eli_start;
 	gp->access = g_std_access;
 	/*
 	 * g_eli_read_metadata() is always called from the event thread.
 	 * Our geom is created and destroyed in the same event, so there
 	 * could be no orphan nor spoil event in the meantime.
 	 */
 	gp->orphan = g_eli_orphan_spoil_assert;
 	gp->spoiled = g_eli_orphan_spoil_assert;
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 	if (error != 0)
 		goto end;
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		goto end;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	if (buf == NULL)
 		goto end;
 	error = eli_metadata_decode(buf, md);
 	if (error != 0)
 		goto end;
 	/* Metadata was read and decoded successfully. */
 end:
 	if (buf != NULL)
 		g_free(buf);
 	if (cp->provider != NULL) {
 		if (cp->acr == 1)
 			g_access(cp, -1, 0, 0);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	return (error);
 }
 
 /*
  * The function is called when we had last close on provider and user requested
  * to close it when this situation occur.
  */
 static void
 g_eli_last_close(void *arg, int flags __unused)
 {
 	struct g_geom *gp;
 	char gpname[64];
 	int error;
 
 	g_topology_assert();
 	gp = arg;
 	strlcpy(gpname, gp->name, sizeof(gpname));
 	error = g_eli_destroy(gp->softc, TRUE);
 	KASSERT(error == 0, ("Cannot detach %s on last close (error=%d).",
 	    gpname, error));
 	G_ELI_DEBUG(0, "Detached %s on last close.", gpname);
 }
 
 int
 g_eli_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_eli_softc *sc;
 	struct g_geom *gp;
 
 	gp = pp->geom;
 	sc = gp->softc;
 
 	if (dw > 0) {
 		if (sc->sc_flags & G_ELI_FLAG_RO) {
 			/* Deny write attempts. */
 			return (EROFS);
 		}
 		/* Someone is opening us for write, we need to remember that. */
 		sc->sc_flags |= G_ELI_FLAG_WOPEN;
 		return (0);
 	}
 	/* Is this the last close? */
 	if (pp->acr + dr > 0 || pp->acw + dw > 0 || pp->ace + de > 0)
 		return (0);
 
 	/*
 	 * Automatically detach on last close if requested.
 	 */
 	if ((sc->sc_flags & G_ELI_FLAG_RW_DETACH) ||
 	    (sc->sc_flags & G_ELI_FLAG_WOPEN)) {
 		g_post_event(g_eli_last_close, gp, M_WAITOK, NULL);
 	}
 	return (0);
 }
 
 static int
 g_eli_cpu_is_disabled(int cpu)
 {
 #ifdef SMP
 	return (CPU_ISSET(cpu, &hlt_cpus_mask));
 #else
 	return (0);
 #endif
 }
 
 struct g_geom *
 g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
     const struct g_eli_metadata *md, const u_char *mkey, int nkey)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_worker *wr;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	u_int i, threads;
 	int error;
 
 	G_ELI_DEBUG(1, "Creating device %s%s.", bpp->name, G_ELI_SUFFIX);
 
 	gp = g_new_geomf(mp, "%s%s", bpp->name, G_ELI_SUFFIX);
 	sc = malloc(sizeof(*sc), M_ELI, M_WAITOK | M_ZERO);
 	gp->start = g_eli_start;
 	/*
 	 * Spoiling can happen even though we have the provider open
 	 * exclusively, e.g. through media change events.
 	 */
 	gp->spoiled = g_eli_orphan;
 	gp->orphan = g_eli_orphan;
 	gp->dumpconf = g_eli_dumpconf;
 	/*
 	 * If detach-on-last-close feature is not enabled and we don't operate
 	 * on read-only provider, we can simply use g_std_access().
 	 */
 	if (md->md_flags & (G_ELI_FLAG_WO_DETACH | G_ELI_FLAG_RO))
 		gp->access = g_eli_access;
 	else
 		gp->access = g_std_access;
 
 	eli_metadata_softc(sc, md, bpp->sectorsize, bpp->mediasize);
 	sc->sc_nkey = nkey;
 
 	gp->softc = sc;
 	sc->sc_geom = gp;
 
 	bioq_init(&sc->sc_queue);
 	mtx_init(&sc->sc_queue_mtx, "geli:queue", NULL, MTX_DEF);
 	mtx_init(&sc->sc_ekeys_lock, "geli:ekeys", NULL, MTX_DEF);
 
 	pp = NULL;
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, bpp);
 	if (error != 0) {
 		if (req != NULL) {
 			gctl_error(req, "Cannot attach to %s (error=%d).",
 			    bpp->name, error);
 		} else {
 			G_ELI_DEBUG(1, "Cannot attach to %s (error=%d).",
 			    bpp->name, error);
 		}
 		goto failed;
 	}
 	/*
 	 * Keep provider open all the time, so we can run critical tasks,
 	 * like Master Keys deletion, without wondering if we can open
 	 * provider or not.
 	 * We don't open provider for writing only when user requested read-only
 	 * access.
 	 */
 	if (sc->sc_flags & G_ELI_FLAG_RO)
 		error = g_access(cp, 1, 0, 1);
 	else
 		error = g_access(cp, 1, 1, 1);
 	if (error != 0) {
 		if (req != NULL) {
 			gctl_error(req, "Cannot access %s (error=%d).",
 			    bpp->name, error);
 		} else {
 			G_ELI_DEBUG(1, "Cannot access %s (error=%d).",
 			    bpp->name, error);
 		}
 		goto failed;
 	}
 
 	/*
 	 * Remember the keys in our softc structure.
 	 */
 	g_eli_mkey_propagate(sc, mkey);
 
 	LIST_INIT(&sc->sc_workers);
 
 	threads = g_eli_threads;
 	if (threads == 0)
 		threads = mp_ncpus;
 	sc->sc_cpubind = (mp_ncpus > 1 && threads == mp_ncpus);
 	for (i = 0; i < threads; i++) {
 		if (g_eli_cpu_is_disabled(i)) {
 			G_ELI_DEBUG(1, "%s: CPU %u disabled, skipping.",
 			    bpp->name, i);
 			continue;
 		}
 		wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO);
 		wr->w_softc = sc;
 		wr->w_number = i;
 		wr->w_active = TRUE;
 
 		error = g_eli_newsession(wr);
 		if (error != 0) {
 			free(wr, M_ELI);
 			if (req != NULL) {
 				gctl_error(req, "Cannot set up crypto session "
 				    "for %s (error=%d).", bpp->name, error);
 			} else {
 				G_ELI_DEBUG(1, "Cannot set up crypto session "
 				    "for %s (error=%d).", bpp->name, error);
 			}
 			goto failed;
 		}
 
 		error = kproc_create(g_eli_worker, wr, &wr->w_proc, 0, 0,
 		    "g_eli[%u] %s", i, bpp->name);
 		if (error != 0) {
 			g_eli_freesession(wr);
 			free(wr, M_ELI);
 			if (req != NULL) {
 				gctl_error(req, "Cannot create kernel thread "
 				    "for %s (error=%d).", bpp->name, error);
 			} else {
 				G_ELI_DEBUG(1, "Cannot create kernel thread "
 				    "for %s (error=%d).", bpp->name, error);
 			}
 			goto failed;
 		}
 		LIST_INSERT_HEAD(&sc->sc_workers, wr, w_next);
 	}
 
 	/*
 	 * Create decrypted provider.
 	 */
 	pp = g_new_providerf(gp, "%s%s", bpp->name, G_ELI_SUFFIX);
 	pp->mediasize = sc->sc_mediasize;
 	pp->sectorsize = sc->sc_sectorsize;
 
 	g_error_provider(pp, 0);
 
 	G_ELI_DEBUG(0, "Device %s created.", pp->name);
 	G_ELI_DEBUG(0, "Encryption: %s %u", g_eli_algo2str(sc->sc_ealgo),
 	    sc->sc_ekeylen);
 	if (sc->sc_flags & G_ELI_FLAG_AUTH)
 		G_ELI_DEBUG(0, " Integrity: %s", g_eli_algo2str(sc->sc_aalgo));
 	G_ELI_DEBUG(0, "    Crypto: %s",
 	    sc->sc_crypto == G_ELI_CRYPTO_SW ? "software" : "hardware");
 	return (gp);
 failed:
 	mtx_lock(&sc->sc_queue_mtx);
 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
 	wakeup(sc);
 	/*
 	 * Wait for kernel threads self destruction.
 	 */
 	while (!LIST_EMPTY(&sc->sc_workers)) {
 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
 		    "geli:destroy", 0);
 	}
 	mtx_destroy(&sc->sc_queue_mtx);
 	if (cp->provider != NULL) {
 		if (cp->acr == 1)
 			g_access(cp, -1, -1, -1);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	g_eli_key_destroy(sc);
 	bzero(sc, sizeof(*sc));
 	free(sc, M_ELI);
 	return (NULL);
 }
 
 int
 g_eli_destroy(struct g_eli_softc *sc, boolean_t force)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	gp = sc->sc_geom;
 	pp = LIST_FIRST(&gp->provider);
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_ELI_DEBUG(1, "Device %s is still open, so it "
 			    "cannot be definitely removed.", pp->name);
 			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
 			gp->access = g_eli_access;
 			g_wither_provider(pp, ENXIO);
 			return (EBUSY);
 		} else {
 			G_ELI_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	mtx_lock(&sc->sc_queue_mtx);
 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
 	wakeup(sc);
 	while (!LIST_EMPTY(&sc->sc_workers)) {
 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
 		    "geli:destroy", 0);
 	}
 	mtx_destroy(&sc->sc_queue_mtx);
 	gp->softc = NULL;
 	g_eli_key_destroy(sc);
 	bzero(sc, sizeof(*sc));
 	free(sc, M_ELI);
 
 	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
 		G_ELI_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom_close(gp, ENXIO);
 
 	return (0);
 }
 
 static int
 g_eli_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_eli_softc *sc;
 
 	sc = gp->softc;
 	return (g_eli_destroy(sc, FALSE));
 }
 
 static int
 g_eli_keyfiles_load(struct hmac_ctx *ctx, const char *provider)
 {
 	u_char *keyfile, *data;
 	char *file, name[64];
 	size_t size;
 	int i;
 
 	for (i = 0; ; i++) {
 		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
 		keyfile = preload_search_by_type(name);
 		if (keyfile == NULL && i == 0) {
 			/*
 			 * If there is only one keyfile, allow simpler name.
 			 */
 			snprintf(name, sizeof(name), "%s:geli_keyfile", provider);
 			keyfile = preload_search_by_type(name);
 		}
 		if (keyfile == NULL)
 			return (i);	/* Return number of loaded keyfiles. */
 		data = preload_fetch_addr(keyfile);
 		if (data == NULL) {
 			G_ELI_DEBUG(0, "Cannot find key file data for %s.",
 			    name);
 			return (0);
 		}
 		size = preload_fetch_size(keyfile);
 		if (size == 0) {
 			G_ELI_DEBUG(0, "Cannot find key file size for %s.",
 			    name);
 			return (0);
 		}
 		file = preload_search_info(keyfile, MODINFO_NAME);
 		if (file == NULL) {
 			G_ELI_DEBUG(0, "Cannot find key file name for %s.",
 			    name);
 			return (0);
 		}
 		G_ELI_DEBUG(1, "Loaded keyfile %s for %s (type: %s).", file,
 		    provider, name);
 		g_eli_crypto_hmac_update(ctx, data, size);
 	}
 }
 
 static void
 g_eli_keyfiles_clear(const char *provider)
 {
 	u_char *keyfile, *data;
 	char name[64];
 	size_t size;
 	int i;
 
 	for (i = 0; ; i++) {
 		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
 		keyfile = preload_search_by_type(name);
 		if (keyfile == NULL)
 			return;
 		data = preload_fetch_addr(keyfile);
 		size = preload_fetch_size(keyfile);
 		if (data != NULL && size != 0)
 			bzero(data, size);
 	}
 }
 
 /*
  * Tasting is only made on boot.
  * We detect providers which should be attached before root is mounted.
  */
 static struct g_geom *
 g_eli_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_eli_metadata md;
 	struct g_geom *gp;
 	struct hmac_ctx ctx;
 	char passphrase[256];
 	u_char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN];
 	u_int i, nkey, nkeyfiles, tries;
 	int error;
+        struct keybuf *keybuf;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	if (root_mounted() || g_eli_tries == 0)
 		return (NULL);
 
 	G_ELI_DEBUG(3, "Tasting %s.", pp->name);
 
 	error = g_eli_read_metadata(mp, pp, &md);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_ELI_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_ELI_VERSION) {
 		printf("geom_eli.ko module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 	/* Should we attach it on boot? */
 	if (!(md.md_flags & G_ELI_FLAG_BOOT))
 		return (NULL);
 	if (md.md_keys == 0x00) {
 		G_ELI_DEBUG(0, "No valid keys on %s.", pp->name);
 		return (NULL);
 	}
 	if (md.md_iterations == -1) {
 		/* If there is no passphrase, we try only once. */
 		tries = 1;
 	} else {
 		/* Ask for the passphrase no more than g_eli_tries times. */
 		tries = g_eli_tries;
 	}
 
-	for (i = 0; i <= tries; i++) {
-		g_eli_crypto_hmac_init(&ctx, NULL, 0);
+        if ((keybuf = get_keybuf()) != NULL) {
+                /* Scan the key buffer, try all GELI keys. */
+                for (i = 0; i < keybuf->kb_nents; i++) {
+                         if (keybuf->kb_ents[i].ke_type == KEYBUF_TYPE_GELI) {
+                                 memcpy(key, keybuf->kb_ents[i].ke_data,
+                                     sizeof(key));
 
-		/*
-		 * Load all key files.
-		 */
-		nkeyfiles = g_eli_keyfiles_load(&ctx, pp->name);
+                                 if (g_eli_mkey_decrypt(&md, key,
+                                     mkey, &nkey) == 0 ) {
+                                         explicit_bzero(key, sizeof(key));
+                                         goto have_key;
+                                 }
+                         }
+                }
+        }
 
-		if (nkeyfiles == 0 && md.md_iterations == -1) {
-			/*
-			 * No key files and no passphrase, something is
-			 * definitely wrong here.
-			 * geli(8) doesn't allow for such situation, so assume
-			 * that there was really no passphrase and in that case
-			 * key files are no properly defined in loader.conf.
-			 */
-			G_ELI_DEBUG(0,
-			    "Found no key files in loader.conf for %s.",
-			    pp->name);
-			return (NULL);
-		}
+        for (i = 0; i <= tries; i++) {
+                g_eli_crypto_hmac_init(&ctx, NULL, 0);
 
-		/* Ask for the passphrase if defined. */
-		if (md.md_iterations >= 0) {
-			/* Try first with cached passphrase. */
-			if (i == 0) {
-				if (!g_eli_boot_passcache)
-					continue;
-				memcpy(passphrase, cached_passphrase,
-				    sizeof(passphrase));
-			} else {
-				printf("Enter passphrase for %s: ", pp->name);
-				cngets(passphrase, sizeof(passphrase),
-				    g_eli_visible_passphrase);
-				memcpy(cached_passphrase, passphrase,
-				    sizeof(passphrase));
-			}
-		}
+                /*
+                 * Load all key files.
+                 */
+                nkeyfiles = g_eli_keyfiles_load(&ctx, pp->name);
 
-		/*
-		 * Prepare Derived-Key from the user passphrase.
-		 */
-		if (md.md_iterations == 0) {
-			g_eli_crypto_hmac_update(&ctx, md.md_salt,
-			    sizeof(md.md_salt));
-			g_eli_crypto_hmac_update(&ctx, passphrase,
-			    strlen(passphrase));
-			bzero(passphrase, sizeof(passphrase));
-		} else if (md.md_iterations > 0) {
-			u_char dkey[G_ELI_USERKEYLEN];
+                if (nkeyfiles == 0 && md.md_iterations == -1) {
+                        /*
+                         * No key files and no passphrase, something is
+                         * definitely wrong here.
+                         * geli(8) doesn't allow for such situation, so assume
+                         * that there was really no passphrase and in that case
+                         * key files are no properly defined in loader.conf.
+                         */
+                        G_ELI_DEBUG(0,
+                            "Found no key files in loader.conf for %s.",
+                            pp->name);
+                        return (NULL);
+                }
 
-			pkcs5v2_genkey(dkey, sizeof(dkey), md.md_salt,
-			    sizeof(md.md_salt), passphrase, md.md_iterations);
-			bzero(passphrase, sizeof(passphrase));
-			g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey));
-			bzero(dkey, sizeof(dkey));
-		}
+                /* Ask for the passphrase if defined. */
+                if (md.md_iterations >= 0) {
+                        /* Try first with cached passphrase. */
+                        if (i == 0) {
+                                if (!g_eli_boot_passcache)
+                                        continue;
+                                memcpy(passphrase, cached_passphrase,
+                                    sizeof(passphrase));
+                        } else {
+                                printf("Enter passphrase for %s: ", pp->name);
+                                cngets(passphrase, sizeof(passphrase),
+                                    g_eli_visible_passphrase);
+                                memcpy(cached_passphrase, passphrase,
+                                    sizeof(passphrase));
+                        }
+                }
 
-		g_eli_crypto_hmac_final(&ctx, key, 0);
+                /*
+                 * Prepare Derived-Key from the user passphrase.
+                 */
+                if (md.md_iterations == 0) {
+                        g_eli_crypto_hmac_update(&ctx, md.md_salt,
+                            sizeof(md.md_salt));
+                        g_eli_crypto_hmac_update(&ctx, passphrase,
+                            strlen(passphrase));
+                        explicit_bzero(passphrase, sizeof(passphrase));
+                } else if (md.md_iterations > 0) {
+                        u_char dkey[G_ELI_USERKEYLEN];
 
-		/*
-		 * Decrypt Master-Key.
-		 */
-		error = g_eli_mkey_decrypt(&md, key, mkey, &nkey);
-		bzero(key, sizeof(key));
-		if (error == -1) {
-			if (i == tries) {
-				G_ELI_DEBUG(0,
-				    "Wrong key for %s. No tries left.",
-				    pp->name);
-				g_eli_keyfiles_clear(pp->name);
-				return (NULL);
-			}
-			if (i > 0) {
-				G_ELI_DEBUG(0,
-				    "Wrong key for %s. Tries left: %u.",
-				    pp->name, tries - i);
-			}
-			/* Try again. */
-			continue;
-		} else if (error > 0) {
-			G_ELI_DEBUG(0,
-			    "Cannot decrypt Master Key for %s (error=%d).",
-			    pp->name, error);
-			g_eli_keyfiles_clear(pp->name);
-			return (NULL);
-		}
-		g_eli_keyfiles_clear(pp->name);
-		G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name);
-		break;
-	}
+                        pkcs5v2_genkey(dkey, sizeof(dkey), md.md_salt,
+                            sizeof(md.md_salt), passphrase, md.md_iterations);
+                        bzero(passphrase, sizeof(passphrase));
+                        g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey));
+                        explicit_bzero(dkey, sizeof(dkey));
+                }
+
+                g_eli_crypto_hmac_final(&ctx, key, 0);
+
+                /*
+                 * Decrypt Master-Key.
+                 */
+                error = g_eli_mkey_decrypt(&md, key, mkey, &nkey);
+                bzero(key, sizeof(key));
+                if (error == -1) {
+                        if (i == tries) {
+                                G_ELI_DEBUG(0,
+                                    "Wrong key for %s. No tries left.",
+                                    pp->name);
+                                g_eli_keyfiles_clear(pp->name);
+                                return (NULL);
+                        }
+                        if (i > 0) {
+                                G_ELI_DEBUG(0,
+                                    "Wrong key for %s. Tries left: %u.",
+                                    pp->name, tries - i);
+                        }
+                        /* Try again. */
+                        continue;
+                } else if (error > 0) {
+                        G_ELI_DEBUG(0,
+                            "Cannot decrypt Master Key for %s (error=%d).",
+                            pp->name, error);
+                        g_eli_keyfiles_clear(pp->name);
+                        return (NULL);
+                }
+                g_eli_keyfiles_clear(pp->name);
+                G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name);
+                break;
+        }
+have_key:
 
 	/*
 	 * We have correct key, let's attach provider.
 	 */
 	gp = g_eli_create(NULL, mp, pp, &md, mkey, nkey);
 	bzero(mkey, sizeof(mkey));
 	bzero(&md, sizeof(md));
 	if (gp == NULL) {
 		G_ELI_DEBUG(0, "Cannot create device %s%s.", pp->name,
 		    G_ELI_SUFFIX);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static void
 g_eli_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_eli_softc *sc;
 
 	g_topology_assert();
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL || cp != NULL)
 		return;	/* Nothing here. */
 
 	sbuf_printf(sb, "%s<KeysTotal>%ju</KeysTotal>\n", indent,
 	    (uintmax_t)sc->sc_ekeys_total);
 	sbuf_printf(sb, "%s<KeysAllocated>%ju</KeysAllocated>\n", indent,
 	    (uintmax_t)sc->sc_ekeys_allocated);
 	sbuf_printf(sb, "%s<Flags>", indent);
 	if (sc->sc_flags == 0)
 		sbuf_printf(sb, "NONE");
 	else {
 		int first = 1;
 
 #define ADD_FLAG(flag, name)	do {					\
 	if (sc->sc_flags & (flag)) {					\
 		if (!first)						\
 			sbuf_printf(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
 		ADD_FLAG(G_ELI_FLAG_SUSPEND, "SUSPEND");
 		ADD_FLAG(G_ELI_FLAG_SINGLE_KEY, "SINGLE-KEY");
 		ADD_FLAG(G_ELI_FLAG_NATIVE_BYTE_ORDER, "NATIVE-BYTE-ORDER");
 		ADD_FLAG(G_ELI_FLAG_ONETIME, "ONETIME");
 		ADD_FLAG(G_ELI_FLAG_BOOT, "BOOT");
 		ADD_FLAG(G_ELI_FLAG_WO_DETACH, "W-DETACH");
 		ADD_FLAG(G_ELI_FLAG_RW_DETACH, "RW-DETACH");
 		ADD_FLAG(G_ELI_FLAG_AUTH, "AUTH");
 		ADD_FLAG(G_ELI_FLAG_WOPEN, "W-OPEN");
 		ADD_FLAG(G_ELI_FLAG_DESTROY, "DESTROY");
 		ADD_FLAG(G_ELI_FLAG_RO, "READ-ONLY");
 		ADD_FLAG(G_ELI_FLAG_NODELETE, "NODELETE");
 		ADD_FLAG(G_ELI_FLAG_GELIBOOT, "GELIBOOT");
 #undef  ADD_FLAG
 	}
 	sbuf_printf(sb, "</Flags>\n");
 
 	if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) {
 		sbuf_printf(sb, "%s<UsedKey>%u</UsedKey>\n", indent,
 		    sc->sc_nkey);
 	}
 	sbuf_printf(sb, "%s<Version>%u</Version>\n", indent, sc->sc_version);
 	sbuf_printf(sb, "%s<Crypto>", indent);
 	switch (sc->sc_crypto) {
 	case G_ELI_CRYPTO_HW:
 		sbuf_printf(sb, "hardware");
 		break;
 	case G_ELI_CRYPTO_SW:
 		sbuf_printf(sb, "software");
 		break;
 	default:
 		sbuf_printf(sb, "UNKNOWN");
 		break;
 	}
 	sbuf_printf(sb, "</Crypto>\n");
 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
 		sbuf_printf(sb,
 		    "%s<AuthenticationAlgorithm>%s</AuthenticationAlgorithm>\n",
 		    indent, g_eli_algo2str(sc->sc_aalgo));
 	}
 	sbuf_printf(sb, "%s<KeyLength>%u</KeyLength>\n", indent,
 	    sc->sc_ekeylen);
 	sbuf_printf(sb, "%s<EncryptionAlgorithm>%s</EncryptionAlgorithm>\n",
 	    indent, g_eli_algo2str(sc->sc_ealgo));
 	sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 	    (sc->sc_flags & G_ELI_FLAG_SUSPEND) ? "SUSPENDED" : "ACTIVE");
 }
 
 static void
 g_eli_shutdown_pre_sync(void *arg, int howto)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 	struct g_provider *pp;
 	struct g_eli_softc *sc;
 	int error;
 
 	mp = arg;
 	g_topology_lock();
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		pp = LIST_FIRST(&gp->provider);
 		KASSERT(pp != NULL, ("No provider? gp=%p (%s)", gp, gp->name));
 		if (pp->acr + pp->acw + pp->ace == 0)
 			error = g_eli_destroy(sc, TRUE);
 		else {
 			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
 			gp->access = g_eli_access;
 		}
 	}
 	g_topology_unlock();
 }
 
 static void
 g_eli_init(struct g_class *mp)
 {
 
 	g_eli_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
 	    g_eli_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
 	if (g_eli_pre_sync == NULL)
 		G_ELI_DEBUG(0, "Warning! Cannot register shutdown event.");
 }
 
 static void
 g_eli_fini(struct g_class *mp)
 {
 
 	if (g_eli_pre_sync != NULL)
 		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_eli_pre_sync);
 }
 
 DECLARE_GEOM_CLASS(g_eli_class, g_eli);
 MODULE_DEPEND(g_eli, crypto, 1, 1, 1);
Index: stable/11/sys/geom/eli/g_eli.h
===================================================================
--- stable/11/sys/geom/eli/g_eli.h	(revision 329098)
+++ stable/11/sys/geom/eli/g_eli.h	(revision 329099)
@@ -1,709 +1,714 @@
 /*-
  * Copyright (c) 2005-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_G_ELI_H_
 #define	_G_ELI_H_
 
 #include <sys/endian.h>
 #include <sys/errno.h>
 #include <sys/malloc.h>
 #include <crypto/sha2/sha256.h>
 #include <crypto/sha2/sha512.h>
 #include <opencrypto/cryptodev.h>
 #ifdef _KERNEL
 #include <sys/bio.h>
 #include <sys/libkern.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <geom/geom.h>
+#include <crypto/intake.h>
 #else
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
 #include <strings.h>
 #endif
 #include <sys/queue.h>
 #include <sys/tree.h>
 #ifndef _OpenSSL_
 #include <sys/md5.h>
 #endif
 
 #define	G_ELI_CLASS_NAME	"ELI"
 #define	G_ELI_MAGIC		"GEOM::ELI"
 #define	G_ELI_SUFFIX		".eli"
 
 /*
  * Version history:
  * 0 - Initial version number.
  * 1 - Added data authentication support (md_aalgo field and
  *     G_ELI_FLAG_AUTH flag).
  * 2 - Added G_ELI_FLAG_READONLY.
  * 3 - Added 'configure' subcommand.
  * 4 - IV is generated from offset converted to little-endian
  *     (the G_ELI_FLAG_NATIVE_BYTE_ORDER flag will be set for older versions).
  * 5 - Added multiple encrypton keys and AES-XTS support.
  * 6 - Fixed usage of multiple keys for authenticated providers (the
  *     G_ELI_FLAG_FIRST_KEY flag will be set for older versions).
  * 7 - Encryption keys are now generated from the Data Key and not from the
  *     IV Key (the G_ELI_FLAG_ENC_IVKEY flag will be set for older versions).
  */
 #define	G_ELI_VERSION_00	0
 #define	G_ELI_VERSION_01	1
 #define	G_ELI_VERSION_02	2
 #define	G_ELI_VERSION_03	3
 #define	G_ELI_VERSION_04	4
 #define	G_ELI_VERSION_05	5
 #define	G_ELI_VERSION_06	6
 #define	G_ELI_VERSION_07	7
 #define	G_ELI_VERSION		G_ELI_VERSION_07
 
 /* ON DISK FLAGS. */
 /* Use random, onetime keys. */
 #define	G_ELI_FLAG_ONETIME		0x00000001
 /* Ask for the passphrase from the kernel, before mounting root. */
 #define	G_ELI_FLAG_BOOT			0x00000002
 /* Detach on last close, if we were open for writing. */
 #define	G_ELI_FLAG_WO_DETACH		0x00000004
 /* Detach on last close. */
 #define	G_ELI_FLAG_RW_DETACH		0x00000008
 /* Provide data authentication. */
 #define	G_ELI_FLAG_AUTH			0x00000010
 /* Provider is read-only, we should deny all write attempts. */
 #define	G_ELI_FLAG_RO			0x00000020
 /* Don't pass through BIO_DELETE requests. */
 #define	G_ELI_FLAG_NODELETE		0x00000040
 /* This GELI supports GELIBoot */
 #define	G_ELI_FLAG_GELIBOOT		0x00000080
 /* RUNTIME FLAGS. */
 /* Provider was open for writing. */
 #define	G_ELI_FLAG_WOPEN		0x00010000
 /* Destroy device. */
 #define	G_ELI_FLAG_DESTROY		0x00020000
 /* Provider uses native byte-order for IV generation. */
 #define	G_ELI_FLAG_NATIVE_BYTE_ORDER	0x00040000
 /* Provider uses single encryption key. */
 #define	G_ELI_FLAG_SINGLE_KEY		0x00080000
 /* Device suspended. */
 #define	G_ELI_FLAG_SUSPEND		0x00100000
 /* Provider uses first encryption key. */
 #define	G_ELI_FLAG_FIRST_KEY		0x00200000
 /* Provider uses IV-Key for encryption key generation. */
 #define	G_ELI_FLAG_ENC_IVKEY		0x00400000
 
 #define	G_ELI_NEW_BIO	255
 
 #define	SHA512_MDLEN		64
 #define	G_ELI_AUTH_SECKEYLEN	SHA256_DIGEST_LENGTH
 
 #define	G_ELI_MAXMKEYS		2
 #define	G_ELI_MAXKEYLEN		64
 #define	G_ELI_USERKEYLEN	G_ELI_MAXKEYLEN
 #define	G_ELI_DATAKEYLEN	G_ELI_MAXKEYLEN
 #define	G_ELI_AUTHKEYLEN	G_ELI_MAXKEYLEN
 #define	G_ELI_IVKEYLEN		G_ELI_MAXKEYLEN
 #define	G_ELI_SALTLEN		64
 #define	G_ELI_DATAIVKEYLEN	(G_ELI_DATAKEYLEN + G_ELI_IVKEYLEN)
 /* Data-Key, IV-Key, HMAC_SHA512(Derived-Key, Data-Key+IV-Key) */
 #define	G_ELI_MKEYLEN		(G_ELI_DATAIVKEYLEN + SHA512_MDLEN)
 #define	G_ELI_OVERWRITES	5
 /* Switch data encryption key every 2^20 blocks. */
 #define	G_ELI_KEY_SHIFT		20
 
 #define	G_ELI_CRYPTO_UNKNOWN	0
 #define	G_ELI_CRYPTO_HW		1
 #define	G_ELI_CRYPTO_SW		2
 
 #ifdef _KERNEL
+#if (MAX_KEY_BYTES < G_ELI_DATAIVKEYLEN)
+#error "MAX_KEY_BYTES is less than G_ELI_DATAKEYLEN"
+#endif
+
 extern int g_eli_debug;
 extern u_int g_eli_overwrites;
 extern u_int g_eli_batch;
 
 #define	G_ELI_DEBUG(lvl, ...)	do {					\
 	if (g_eli_debug >= (lvl)) {					\
 		printf("GEOM_ELI");					\
 		if (g_eli_debug > 0)					\
 			printf("[%u]", lvl);				\
 		printf(": ");						\
 		printf(__VA_ARGS__);					\
 		printf("\n");						\
 	}								\
 } while (0)
 #define	G_ELI_LOGREQ(lvl, bp, ...)	do {				\
 	if (g_eli_debug >= (lvl)) {					\
 		printf("GEOM_ELI");					\
 		if (g_eli_debug > 0)					\
 			printf("[%u]", lvl);				\
 		printf(": ");						\
 		printf(__VA_ARGS__);					\
 		printf(" ");						\
 		g_print_bio(bp);					\
 		printf("\n");						\
 	}								\
 } while (0)
 
 struct g_eli_worker {
 	struct g_eli_softc	*w_softc;
 	struct proc		*w_proc;
 	u_int			 w_number;
 	uint64_t		 w_sid;
 	boolean_t		 w_active;
 	LIST_ENTRY(g_eli_worker) w_next;
 };
 
 #endif	/* _KERNEL */
 
 struct g_eli_softc {
 	struct g_geom	*sc_geom;
 	u_int		 sc_version;
 	u_int		 sc_crypto;
 	uint8_t		 sc_mkey[G_ELI_DATAIVKEYLEN];
 	uint8_t		 sc_ekey[G_ELI_DATAKEYLEN];
 	TAILQ_HEAD(, g_eli_key) sc_ekeys_queue;
 	RB_HEAD(g_eli_key_tree, g_eli_key) sc_ekeys_tree;
 	struct mtx	 sc_ekeys_lock;
 	uint64_t	 sc_ekeys_total;
 	uint64_t	 sc_ekeys_allocated;
 	u_int		 sc_ealgo;
 	u_int		 sc_ekeylen;
 	uint8_t		 sc_akey[G_ELI_AUTHKEYLEN];
 	u_int		 sc_aalgo;
 	u_int		 sc_akeylen;
 	u_int		 sc_alen;
 	SHA256_CTX	 sc_akeyctx;
 	uint8_t		 sc_ivkey[G_ELI_IVKEYLEN];
 	SHA256_CTX	 sc_ivctx;
 	int		 sc_nkey;
 	uint32_t	 sc_flags;
 	int		 sc_inflight;
 	off_t		 sc_mediasize;
 	size_t		 sc_sectorsize;
 	u_int		 sc_bytes_per_sector;
 	u_int		 sc_data_per_sector;
 #ifndef _KERNEL
 	int		 sc_cpubind;
 #else /* _KERNEL */
 	boolean_t	 sc_cpubind;
 
 	/* Only for software cryptography. */
 	struct bio_queue_head sc_queue;
 	struct mtx	 sc_queue_mtx;
 	LIST_HEAD(, g_eli_worker) sc_workers;
 #endif /* _KERNEL */
 };
 #define	sc_name		 sc_geom->name
 
 #define	G_ELI_KEY_MAGIC	0xe11341c
 
 struct g_eli_key {
 	/* Key value, must be first in the structure. */
 	uint8_t		gek_key[G_ELI_DATAKEYLEN];
 	/* Magic. */
 	int		gek_magic;
 	/* Key number. */
 	uint64_t	gek_keyno;
 	/* Reference counter. */
 	int		gek_count;
 	/* Keeps keys sorted by most recent use. */
 	TAILQ_ENTRY(g_eli_key) gek_next;
 	/* Keeps keys sorted by number. */
 	RB_ENTRY(g_eli_key) gek_link;
 };
 
 struct g_eli_metadata {
 	char		md_magic[16];	/* Magic value. */
 	uint32_t	md_version;	/* Version number. */
 	uint32_t	md_flags;	/* Additional flags. */
 	uint16_t	md_ealgo;	/* Encryption algorithm. */
 	uint16_t	md_keylen;	/* Key length. */
 	uint16_t	md_aalgo;	/* Authentication algorithm. */
 	uint64_t	md_provsize;	/* Provider's size. */
 	uint32_t	md_sectorsize;	/* Sector size. */
 	uint8_t		md_keys;	/* Available keys. */
 	int32_t		md_iterations;	/* Number of iterations for PKCS#5v2. */
 	uint8_t		md_salt[G_ELI_SALTLEN]; /* Salt. */
 			/* Encrypted master key (IV-key, Data-key, HMAC). */
 	uint8_t		md_mkeys[G_ELI_MAXMKEYS * G_ELI_MKEYLEN];
 	u_char		md_hash[16];	/* MD5 hash. */
 } __packed;
 #ifndef _OpenSSL_
 static __inline void
 eli_metadata_encode_v0(struct g_eli_metadata *md, u_char **datap)
 {
 	u_char *p;
 
 	p = *datap;
 	le32enc(p, md->md_flags);	p += sizeof(md->md_flags);
 	le16enc(p, md->md_ealgo);	p += sizeof(md->md_ealgo);
 	le16enc(p, md->md_keylen);	p += sizeof(md->md_keylen);
 	le64enc(p, md->md_provsize);	p += sizeof(md->md_provsize);
 	le32enc(p, md->md_sectorsize);	p += sizeof(md->md_sectorsize);
 	*p = md->md_keys;		p += sizeof(md->md_keys);
 	le32enc(p, md->md_iterations);	p += sizeof(md->md_iterations);
 	bcopy(md->md_salt, p, sizeof(md->md_salt)); p += sizeof(md->md_salt);
 	bcopy(md->md_mkeys, p, sizeof(md->md_mkeys)); p += sizeof(md->md_mkeys);
 	*datap = p;
 }
 static __inline void
 eli_metadata_encode_v1v2v3v4v5v6v7(struct g_eli_metadata *md, u_char **datap)
 {
 	u_char *p;
 
 	p = *datap;
 	le32enc(p, md->md_flags);	p += sizeof(md->md_flags);
 	le16enc(p, md->md_ealgo);	p += sizeof(md->md_ealgo);
 	le16enc(p, md->md_keylen);	p += sizeof(md->md_keylen);
 	le16enc(p, md->md_aalgo);	p += sizeof(md->md_aalgo);
 	le64enc(p, md->md_provsize);	p += sizeof(md->md_provsize);
 	le32enc(p, md->md_sectorsize);	p += sizeof(md->md_sectorsize);
 	*p = md->md_keys;		p += sizeof(md->md_keys);
 	le32enc(p, md->md_iterations);	p += sizeof(md->md_iterations);
 	bcopy(md->md_salt, p, sizeof(md->md_salt)); p += sizeof(md->md_salt);
 	bcopy(md->md_mkeys, p, sizeof(md->md_mkeys)); p += sizeof(md->md_mkeys);
 	*datap = p;
 }
 static __inline void
 eli_metadata_encode(struct g_eli_metadata *md, u_char *data)
 {
 	MD5_CTX ctx;
 	u_char *p;
 
 	p = data;
 	bcopy(md->md_magic, p, sizeof(md->md_magic));
 	p += sizeof(md->md_magic);
 	le32enc(p, md->md_version);
 	p += sizeof(md->md_version);
 	switch (md->md_version) {
 	case G_ELI_VERSION_00:
 		eli_metadata_encode_v0(md, &p);
 		break;
 	case G_ELI_VERSION_01:
 	case G_ELI_VERSION_02:
 	case G_ELI_VERSION_03:
 	case G_ELI_VERSION_04:
 	case G_ELI_VERSION_05:
 	case G_ELI_VERSION_06:
 	case G_ELI_VERSION_07:
 		eli_metadata_encode_v1v2v3v4v5v6v7(md, &p);
 		break;
 	default:
 #ifdef _KERNEL
 		panic("%s: Unsupported version %u.", __func__,
 		    (u_int)md->md_version);
 #else
 		assert(!"Unsupported metadata version.");
 #endif
 	}
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, p - data);
 	MD5Final(md->md_hash, &ctx);
 	bcopy(md->md_hash, p, sizeof(md->md_hash));
 }
 static __inline int
 eli_metadata_decode_v0(const u_char *data, struct g_eli_metadata *md)
 {
 	MD5_CTX ctx;
 	const u_char *p;
 
 	p = data + sizeof(md->md_magic) + sizeof(md->md_version);
 	md->md_flags = le32dec(p);	p += sizeof(md->md_flags);
 	md->md_ealgo = le16dec(p);	p += sizeof(md->md_ealgo);
 	md->md_keylen = le16dec(p);	p += sizeof(md->md_keylen);
 	md->md_provsize = le64dec(p);	p += sizeof(md->md_provsize);
 	md->md_sectorsize = le32dec(p);	p += sizeof(md->md_sectorsize);
 	md->md_keys = *p;		p += sizeof(md->md_keys);
 	md->md_iterations = le32dec(p);	p += sizeof(md->md_iterations);
 	bcopy(p, md->md_salt, sizeof(md->md_salt)); p += sizeof(md->md_salt);
 	bcopy(p, md->md_mkeys, sizeof(md->md_mkeys)); p += sizeof(md->md_mkeys);
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, p - data);
 	MD5Final(md->md_hash, &ctx);
 	if (bcmp(md->md_hash, p, 16) != 0)
 		return (EINVAL);
 	return (0);
 }
 
 static __inline int
 eli_metadata_decode_v1v2v3v4v5v6v7(const u_char *data, struct g_eli_metadata *md)
 {
 	MD5_CTX ctx;
 	const u_char *p;
 
 	p = data + sizeof(md->md_magic) + sizeof(md->md_version);
 	md->md_flags = le32dec(p);	p += sizeof(md->md_flags);
 	md->md_ealgo = le16dec(p);	p += sizeof(md->md_ealgo);
 	md->md_keylen = le16dec(p);	p += sizeof(md->md_keylen);
 	md->md_aalgo = le16dec(p);	p += sizeof(md->md_aalgo);
 	md->md_provsize = le64dec(p);	p += sizeof(md->md_provsize);
 	md->md_sectorsize = le32dec(p);	p += sizeof(md->md_sectorsize);
 	md->md_keys = *p;		p += sizeof(md->md_keys);
 	md->md_iterations = le32dec(p);	p += sizeof(md->md_iterations);
 	bcopy(p, md->md_salt, sizeof(md->md_salt)); p += sizeof(md->md_salt);
 	bcopy(p, md->md_mkeys, sizeof(md->md_mkeys)); p += sizeof(md->md_mkeys);
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, p - data);
 	MD5Final(md->md_hash, &ctx);
 	if (bcmp(md->md_hash, p, 16) != 0)
 		return (EINVAL);
 	return (0);
 }
 static __inline int
 eli_metadata_decode(const u_char *data, struct g_eli_metadata *md)
 {
 	int error;
 
 	bcopy(data, md->md_magic, sizeof(md->md_magic));
 	if (strcmp(md->md_magic, G_ELI_MAGIC) != 0)
 		return (EINVAL);
 	md->md_version = le32dec(data + sizeof(md->md_magic));
 	switch (md->md_version) {
 	case G_ELI_VERSION_00:
 		error = eli_metadata_decode_v0(data, md);
 		break;
 	case G_ELI_VERSION_01:
 	case G_ELI_VERSION_02:
 	case G_ELI_VERSION_03:
 	case G_ELI_VERSION_04:
 	case G_ELI_VERSION_05:
 	case G_ELI_VERSION_06:
 	case G_ELI_VERSION_07:
 		error = eli_metadata_decode_v1v2v3v4v5v6v7(data, md);
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 #endif	/* !_OpenSSL */
 
 static __inline u_int
 g_eli_str2ealgo(const char *name)
 {
 
 	if (strcasecmp("null", name) == 0)
 		return (CRYPTO_NULL_CBC);
 	else if (strcasecmp("null-cbc", name) == 0)
 		return (CRYPTO_NULL_CBC);
 	else if (strcasecmp("aes", name) == 0)
 		return (CRYPTO_AES_XTS);
 	else if (strcasecmp("aes-cbc", name) == 0)
 		return (CRYPTO_AES_CBC);
 	else if (strcasecmp("aes-xts", name) == 0)
 		return (CRYPTO_AES_XTS);
 	else if (strcasecmp("blowfish", name) == 0)
 		return (CRYPTO_BLF_CBC);
 	else if (strcasecmp("blowfish-cbc", name) == 0)
 		return (CRYPTO_BLF_CBC);
 	else if (strcasecmp("camellia", name) == 0)
 		return (CRYPTO_CAMELLIA_CBC);
 	else if (strcasecmp("camellia-cbc", name) == 0)
 		return (CRYPTO_CAMELLIA_CBC);
 	else if (strcasecmp("3des", name) == 0)
 		return (CRYPTO_3DES_CBC);
 	else if (strcasecmp("3des-cbc", name) == 0)
 		return (CRYPTO_3DES_CBC);
 	return (CRYPTO_ALGORITHM_MIN - 1);
 }
 
 static __inline u_int
 g_eli_str2aalgo(const char *name)
 {
 
 	if (strcasecmp("hmac/md5", name) == 0)
 		return (CRYPTO_MD5_HMAC);
 	else if (strcasecmp("hmac/sha1", name) == 0)
 		return (CRYPTO_SHA1_HMAC);
 	else if (strcasecmp("hmac/ripemd160", name) == 0)
 		return (CRYPTO_RIPEMD160_HMAC);
 	else if (strcasecmp("hmac/sha256", name) == 0)
 		return (CRYPTO_SHA2_256_HMAC);
 	else if (strcasecmp("hmac/sha384", name) == 0)
 		return (CRYPTO_SHA2_384_HMAC);
 	else if (strcasecmp("hmac/sha512", name) == 0)
 		return (CRYPTO_SHA2_512_HMAC);
 	return (CRYPTO_ALGORITHM_MIN - 1);
 }
 
 static __inline const char *
 g_eli_algo2str(u_int algo)
 {
 
 	switch (algo) {
 	case CRYPTO_NULL_CBC:
 		return ("NULL");
 	case CRYPTO_AES_CBC:
 		return ("AES-CBC");
 	case CRYPTO_AES_XTS:
 		return ("AES-XTS");
 	case CRYPTO_BLF_CBC:
 		return ("Blowfish-CBC");
 	case CRYPTO_CAMELLIA_CBC:
 		return ("CAMELLIA-CBC");
 	case CRYPTO_3DES_CBC:
 		return ("3DES-CBC");
 	case CRYPTO_MD5_HMAC:
 		return ("HMAC/MD5");
 	case CRYPTO_SHA1_HMAC:
 		return ("HMAC/SHA1");
 	case CRYPTO_RIPEMD160_HMAC:
 		return ("HMAC/RIPEMD160");
 	case CRYPTO_SHA2_256_HMAC:
 		return ("HMAC/SHA256");
 	case CRYPTO_SHA2_384_HMAC:
 		return ("HMAC/SHA384");
 	case CRYPTO_SHA2_512_HMAC:
 		return ("HMAC/SHA512");
 	}
 	return ("unknown");
 }
 
 static __inline void
 eli_metadata_dump(const struct g_eli_metadata *md)
 {
 	static const char hex[] = "0123456789abcdef";
 	char str[sizeof(md->md_mkeys) * 2 + 1];
 	u_int i;
 
 	printf("     magic: %s\n", md->md_magic);
 	printf("   version: %u\n", (u_int)md->md_version);
 	printf("     flags: 0x%x\n", (u_int)md->md_flags);
 	printf("     ealgo: %s\n", g_eli_algo2str(md->md_ealgo));
 	printf("    keylen: %u\n", (u_int)md->md_keylen);
 	if (md->md_flags & G_ELI_FLAG_AUTH)
 		printf("     aalgo: %s\n", g_eli_algo2str(md->md_aalgo));
 	printf("  provsize: %ju\n", (uintmax_t)md->md_provsize);
 	printf("sectorsize: %u\n", (u_int)md->md_sectorsize);
 	printf("      keys: 0x%02x\n", (u_int)md->md_keys);
 	printf("iterations: %d\n", (int)md->md_iterations);
 	bzero(str, sizeof(str));
 	for (i = 0; i < sizeof(md->md_salt); i++) {
 		str[i * 2] = hex[md->md_salt[i] >> 4];
 		str[i * 2 + 1] = hex[md->md_salt[i] & 0x0f];
 	}
 	printf("      Salt: %s\n", str);
 	bzero(str, sizeof(str));
 	for (i = 0; i < sizeof(md->md_mkeys); i++) {
 		str[i * 2] = hex[md->md_mkeys[i] >> 4];
 		str[i * 2 + 1] = hex[md->md_mkeys[i] & 0x0f];
 	}
 	printf("Master Key: %s\n", str);
 	bzero(str, sizeof(str));
 	for (i = 0; i < 16; i++) {
 		str[i * 2] = hex[md->md_hash[i] >> 4];
 		str[i * 2 + 1] = hex[md->md_hash[i] & 0x0f];
 	}
 	printf("  MD5 hash: %s\n", str);
 }
 
 static __inline u_int
 g_eli_keylen(u_int algo, u_int keylen)
 {
 
 	switch (algo) {
 	case CRYPTO_NULL_CBC:
 		if (keylen == 0)
 			keylen = 64 * 8;
 		else {
 			if (keylen > 64 * 8)
 				keylen = 0;
 		}
 		return (keylen);
 	case CRYPTO_AES_CBC:
 	case CRYPTO_CAMELLIA_CBC:
 		switch (keylen) {
 		case 0:
 			return (128);
 		case 128:
 		case 192:
 		case 256:
 			return (keylen);
 		default:
 			return (0);
 		}
 	case CRYPTO_AES_XTS:
 		switch (keylen) {
 		case 0:
 			return (128);
 		case 128:
 		case 256:
 			return (keylen);
 		default:
 			return (0);
 		}
 	case CRYPTO_BLF_CBC:
 		if (keylen == 0)
 			return (128);
 		if (keylen < 128 || keylen > 448)
 			return (0);
 		if ((keylen % 32) != 0)
 			return (0);
 		return (keylen);
 	case CRYPTO_3DES_CBC:
 		if (keylen == 0 || keylen == 192)
 			return (192);
 		return (0);
 	default:
 		return (0);
 	}
 }
 
 static __inline u_int
 g_eli_hashlen(u_int algo)
 {
 
 	switch (algo) {
 	case CRYPTO_MD5_HMAC:
 		return (16);
 	case CRYPTO_SHA1_HMAC:
 		return (20);
 	case CRYPTO_RIPEMD160_HMAC:
 		return (20);
 	case CRYPTO_SHA2_256_HMAC:
 		return (32);
 	case CRYPTO_SHA2_384_HMAC:
 		return (48);
 	case CRYPTO_SHA2_512_HMAC:
 		return (64);
 	}
 	return (0);
 }
 
 static __inline void
 eli_metadata_softc(struct g_eli_softc *sc, const struct g_eli_metadata *md,
     u_int sectorsize, off_t mediasize)
 {
 
 	sc->sc_version = md->md_version;
 	sc->sc_inflight = 0;
 	sc->sc_crypto = G_ELI_CRYPTO_UNKNOWN;
 	sc->sc_flags = md->md_flags;
 	/* Backward compatibility. */
 	if (md->md_version < G_ELI_VERSION_04)
 		sc->sc_flags |= G_ELI_FLAG_NATIVE_BYTE_ORDER;
 	if (md->md_version < G_ELI_VERSION_05)
 		sc->sc_flags |= G_ELI_FLAG_SINGLE_KEY;
 	if (md->md_version < G_ELI_VERSION_06 &&
 	    (sc->sc_flags & G_ELI_FLAG_AUTH) != 0) {
 		sc->sc_flags |= G_ELI_FLAG_FIRST_KEY;
 	}
 	if (md->md_version < G_ELI_VERSION_07)
 		sc->sc_flags |= G_ELI_FLAG_ENC_IVKEY;
 	sc->sc_ealgo = md->md_ealgo;
 
 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
 		sc->sc_akeylen = sizeof(sc->sc_akey) * 8;
 		sc->sc_aalgo = md->md_aalgo;
 		sc->sc_alen = g_eli_hashlen(sc->sc_aalgo);
 
 		sc->sc_data_per_sector = sectorsize - sc->sc_alen;
 		/*
 		 * Some hash functions (like SHA1 and RIPEMD160) generates hash
 		 * which length is not multiple of 128 bits, but we want data
 		 * length to be multiple of 128, so we can encrypt without
 		 * padding. The line below rounds down data length to multiple
 		 * of 128 bits.
 		 */
 		sc->sc_data_per_sector -= sc->sc_data_per_sector % 16;
 
 		sc->sc_bytes_per_sector =
 		    (md->md_sectorsize - 1) / sc->sc_data_per_sector + 1;
 		sc->sc_bytes_per_sector *= sectorsize;
 	}
 	sc->sc_sectorsize = md->md_sectorsize;
 	sc->sc_mediasize = mediasize;
 	if (!(sc->sc_flags & G_ELI_FLAG_ONETIME))
 		sc->sc_mediasize -= sectorsize;
 	if (!(sc->sc_flags & G_ELI_FLAG_AUTH))
 		sc->sc_mediasize -= (sc->sc_mediasize % sc->sc_sectorsize);
 	else {
 		sc->sc_mediasize /= sc->sc_bytes_per_sector;
 		sc->sc_mediasize *= sc->sc_sectorsize;
 	}
 	sc->sc_ekeylen = md->md_keylen;
 }
 
 #ifdef _KERNEL
 int g_eli_read_metadata(struct g_class *mp, struct g_provider *pp,
     struct g_eli_metadata *md);
 struct g_geom *g_eli_create(struct gctl_req *req, struct g_class *mp,
     struct g_provider *bpp, const struct g_eli_metadata *md,
     const u_char *mkey, int nkey);
 int g_eli_destroy(struct g_eli_softc *sc, boolean_t force);
 
 int g_eli_access(struct g_provider *pp, int dr, int dw, int de);
 void g_eli_config(struct gctl_req *req, struct g_class *mp, const char *verb);
 
 void g_eli_read_done(struct bio *bp);
 void g_eli_write_done(struct bio *bp);
 int g_eli_crypto_rerun(struct cryptop *crp);
 
 void g_eli_crypto_read(struct g_eli_softc *sc, struct bio *bp, boolean_t fromworker);
 void g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp);
 
 void g_eli_auth_read(struct g_eli_softc *sc, struct bio *bp);
 void g_eli_auth_run(struct g_eli_worker *wr, struct bio *bp);
 #endif
 void g_eli_crypto_ivgen(struct g_eli_softc *sc, off_t offset, u_char *iv,
     size_t size);
 
 void g_eli_mkey_hmac(unsigned char *mkey, const unsigned char *key);
 int g_eli_mkey_decrypt(const struct g_eli_metadata *md,
     const unsigned char *key, unsigned char *mkey, unsigned *nkeyp);
 int g_eli_mkey_encrypt(unsigned algo, const unsigned char *key, unsigned keylen,
     unsigned char *mkey);
 #ifdef _KERNEL
 void g_eli_mkey_propagate(struct g_eli_softc *sc, const unsigned char *mkey);
 #endif
 
 int g_eli_crypto_encrypt(u_int algo, u_char *data, size_t datasize,
     const u_char *key, size_t keysize);
 int g_eli_crypto_decrypt(u_int algo, u_char *data, size_t datasize,
     const u_char *key, size_t keysize);
 
 struct hmac_ctx {
 	SHA512_CTX	innerctx;
 	SHA512_CTX	outerctx;
 };
 
 void g_eli_crypto_hmac_init(struct hmac_ctx *ctx, const uint8_t *hkey,
     size_t hkeylen);
 void g_eli_crypto_hmac_update(struct hmac_ctx *ctx, const uint8_t *data,
     size_t datasize);
 void g_eli_crypto_hmac_final(struct hmac_ctx *ctx, uint8_t *md, size_t mdsize);
 void g_eli_crypto_hmac(const uint8_t *hkey, size_t hkeysize,
     const uint8_t *data, size_t datasize, uint8_t *md, size_t mdsize);
 
 void g_eli_key_fill(struct g_eli_softc *sc, struct g_eli_key *key,
     uint64_t keyno);
 #ifdef _KERNEL
 void g_eli_key_init(struct g_eli_softc *sc);
 void g_eli_key_destroy(struct g_eli_softc *sc);
 uint8_t *g_eli_key_hold(struct g_eli_softc *sc, off_t offset, size_t blocksize);
 void g_eli_key_drop(struct g_eli_softc *sc, uint8_t *rawkey);
 #endif
 #endif	/* !_G_ELI_H_ */
Index: stable/11/sys/opencrypto/crypto.c
===================================================================
--- stable/11/sys/opencrypto/crypto.c	(revision 329098)
+++ stable/11/sys/opencrypto/crypto.c	(revision 329099)
@@ -1,1577 +1,1614 @@
 /*-
  * Copyright (c) 2002-2006 Sam Leffler.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Cryptographic Subsystem.
  *
  * This code is derived from the Openbsd Cryptographic Framework (OCF)
  * that has the copyright shown below.  Very little of the original
  * code remains.
  */
 
 /*-
  * The author of this code is Angelos D. Keromytis (angelos@cis.upenn.edu)
  *
  * This code was written by Angelos D. Keromytis in Athens, Greece, in
  * February 2000. Network Security Technologies Inc. (NSTI) kindly
  * supported the development of this code.
  *
  * Copyright (c) 2000, 2001 Angelos D. Keromytis
  *
  * Permission to use, copy, and modify this software with or without fee
  * is hereby granted, provided that this entire notice is included in
  * all source code copies of any software which is or includes a copy or
  * modification of this software.
  *
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
  * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
  * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
  * PURPOSE.
  */
 
 #define	CRYPTO_TIMING				/* enable timing support */
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
+#include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/sdt.h>
 #include <sys/sysctl.h>
 
 #include <ddb/ddb.h>
 
 #include <vm/uma.h>
+#include <crypto/intake.h>
 #include <opencrypto/cryptodev.h>
 #include <opencrypto/xform.h>			/* XXX for M_XDATA */
 
 #include <sys/kobj.h>
 #include <sys/bus.h>
 #include "cryptodev_if.h"
 
 #if defined(__i386__) || defined(__amd64__)
 #include <machine/pcb.h>
 #endif
+#include <machine/metadata.h>
 
 SDT_PROVIDER_DEFINE(opencrypto);
 
 /*
  * Crypto drivers register themselves by allocating a slot in the
  * crypto_drivers table with crypto_get_driverid() and then registering
  * each algorithm they support with crypto_register() and crypto_kregister().
  */
 static	struct mtx crypto_drivers_mtx;		/* lock on driver table */
 #define	CRYPTO_DRIVER_LOCK()	mtx_lock(&crypto_drivers_mtx)
 #define	CRYPTO_DRIVER_UNLOCK()	mtx_unlock(&crypto_drivers_mtx)
 #define	CRYPTO_DRIVER_ASSERT()	mtx_assert(&crypto_drivers_mtx, MA_OWNED)
 
 /*
  * Crypto device/driver capabilities structure.
  *
  * Synchronization:
  * (d) - protected by CRYPTO_DRIVER_LOCK()
  * (q) - protected by CRYPTO_Q_LOCK()
  * Not tagged fields are read-only.
  */
 struct cryptocap {
 	device_t	cc_dev;			/* (d) device/driver */
 	u_int32_t	cc_sessions;		/* (d) # of sessions */
 	u_int32_t	cc_koperations;		/* (d) # os asym operations */
 	/*
 	 * Largest possible operator length (in bits) for each type of
 	 * encryption algorithm. XXX not used
 	 */
 	u_int16_t	cc_max_op_len[CRYPTO_ALGORITHM_MAX + 1];
 	u_int8_t	cc_alg[CRYPTO_ALGORITHM_MAX + 1];
 	u_int8_t	cc_kalg[CRK_ALGORITHM_MAX + 1];
 
 	int		cc_flags;		/* (d) flags */
 #define CRYPTOCAP_F_CLEANUP	0x80000000	/* needs resource cleanup */
 	int		cc_qblocked;		/* (q) symmetric q blocked */
 	int		cc_kqblocked;		/* (q) asymmetric q blocked */
 };
 static	struct cryptocap *crypto_drivers = NULL;
 static	int crypto_drivers_num = 0;
 
 /*
  * There are two queues for crypto requests; one for symmetric (e.g.
  * cipher) operations and one for asymmetric (e.g. MOD)operations.
  * A single mutex is used to lock access to both queues.  We could
  * have one per-queue but having one simplifies handling of block/unblock
  * operations.
  */
 static	int crp_sleep = 0;
 static	TAILQ_HEAD(,cryptop) crp_q;		/* request queues */
 static	TAILQ_HEAD(,cryptkop) crp_kq;
 static	struct mtx crypto_q_mtx;
 #define	CRYPTO_Q_LOCK()		mtx_lock(&crypto_q_mtx)
 #define	CRYPTO_Q_UNLOCK()	mtx_unlock(&crypto_q_mtx)
 
 /*
  * There are two queues for processing completed crypto requests; one
  * for the symmetric and one for the asymmetric ops.  We only need one
  * but have two to avoid type futzing (cryptop vs. cryptkop).  A single
  * mutex is used to lock access to both queues.  Note that this lock
  * must be separate from the lock on request queues to insure driver
  * callbacks don't generate lock order reversals.
  */
 static	TAILQ_HEAD(,cryptop) crp_ret_q;		/* callback queues */
 static	TAILQ_HEAD(,cryptkop) crp_ret_kq;
 static	struct mtx crypto_ret_q_mtx;
 #define	CRYPTO_RETQ_LOCK()	mtx_lock(&crypto_ret_q_mtx)
 #define	CRYPTO_RETQ_UNLOCK()	mtx_unlock(&crypto_ret_q_mtx)
 #define	CRYPTO_RETQ_EMPTY()	(TAILQ_EMPTY(&crp_ret_q) && TAILQ_EMPTY(&crp_ret_kq))
 
 static	uma_zone_t cryptop_zone;
 static	uma_zone_t cryptodesc_zone;
 
 int	crypto_userasymcrypto = 1;	/* userland may do asym crypto reqs */
 SYSCTL_INT(_kern, OID_AUTO, userasymcrypto, CTLFLAG_RW,
 	   &crypto_userasymcrypto, 0,
 	   "Enable/disable user-mode access to asymmetric crypto support");
 int	crypto_devallowsoft = 0;	/* only use hardware crypto */
 SYSCTL_INT(_kern, OID_AUTO, cryptodevallowsoft, CTLFLAG_RW,
 	   &crypto_devallowsoft, 0,
 	   "Enable/disable use of software crypto by /dev/crypto");
 
 MALLOC_DEFINE(M_CRYPTO_DATA, "crypto", "crypto session records");
 
 static	void crypto_proc(void);
 static	struct proc *cryptoproc;
 static	void crypto_ret_proc(void);
 static	struct proc *cryptoretproc;
 static	void crypto_destroy(void);
 static	int crypto_invoke(struct cryptocap *cap, struct cryptop *crp, int hint);
 static	int crypto_kinvoke(struct cryptkop *krp, int flags);
 
 static	struct cryptostats cryptostats;
 SYSCTL_STRUCT(_kern, OID_AUTO, crypto_stats, CTLFLAG_RW, &cryptostats,
 	    cryptostats, "Crypto system statistics");
 
 #ifdef CRYPTO_TIMING
 static	int crypto_timing = 0;
 SYSCTL_INT(_debug, OID_AUTO, crypto_timing, CTLFLAG_RW,
 	   &crypto_timing, 0, "Enable/disable crypto timing support");
 #endif
 
+/* Try to avoid directly exposing the key buffer as a symbol */
+static struct keybuf *keybuf;
+
+static struct keybuf empty_keybuf = {
+        .kb_nents = 0
+};
+
+/* Obtain the key buffer from boot metadata */
+static void
+keybuf_init(void)
+{
+	caddr_t kmdp;
+
+	kmdp = preload_search_by_type("elf kernel");
+
+	if (kmdp == NULL)
+		kmdp = preload_search_by_type("elf64 kernel");
+
+	keybuf = (struct keybuf *)preload_search_info(kmdp,
+	    MODINFO_METADATA | MODINFOMD_KEYBUF);
+
+        if (keybuf == NULL)
+                keybuf = &empty_keybuf;
+}
+
+/* It'd be nice if we could store these in some kind of secure memory... */
+struct keybuf * get_keybuf(void) {
+
+        return (keybuf);
+}
+
 static int
 crypto_init(void)
 {
 	int error;
 
 	mtx_init(&crypto_drivers_mtx, "crypto", "crypto driver table",
 		MTX_DEF|MTX_QUIET);
 
 	TAILQ_INIT(&crp_q);
 	TAILQ_INIT(&crp_kq);
 	mtx_init(&crypto_q_mtx, "crypto", "crypto op queues", MTX_DEF);
 
 	TAILQ_INIT(&crp_ret_q);
 	TAILQ_INIT(&crp_ret_kq);
 	mtx_init(&crypto_ret_q_mtx, "crypto", "crypto return queues", MTX_DEF);
 
 	cryptop_zone = uma_zcreate("cryptop", sizeof (struct cryptop),
 				    0, 0, 0, 0,
 				    UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
 	cryptodesc_zone = uma_zcreate("cryptodesc", sizeof (struct cryptodesc),
 				    0, 0, 0, 0,
 				    UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
 	if (cryptodesc_zone == NULL || cryptop_zone == NULL) {
 		printf("crypto_init: cannot setup crypto zones\n");
 		error = ENOMEM;
 		goto bad;
 	}
 
 	crypto_drivers_num = CRYPTO_DRIVERS_INITIAL;
 	crypto_drivers = malloc(crypto_drivers_num *
 	    sizeof(struct cryptocap), M_CRYPTO_DATA, M_NOWAIT | M_ZERO);
 	if (crypto_drivers == NULL) {
 		printf("crypto_init: cannot setup crypto drivers\n");
 		error = ENOMEM;
 		goto bad;
 	}
 
 	error = kproc_create((void (*)(void *)) crypto_proc, NULL,
 		    &cryptoproc, 0, 0, "crypto");
 	if (error) {
 		printf("crypto_init: cannot start crypto thread; error %d",
 			error);
 		goto bad;
 	}
 
 	error = kproc_create((void (*)(void *)) crypto_ret_proc, NULL,
 		    &cryptoretproc, 0, 0, "crypto returns");
 	if (error) {
 		printf("crypto_init: cannot start cryptoret thread; error %d",
 			error);
 		goto bad;
 	}
+
+        keybuf_init();
+
 	return 0;
 bad:
 	crypto_destroy();
 	return error;
 }
 
 /*
  * Signal a crypto thread to terminate.  We use the driver
  * table lock to synchronize the sleep/wakeups so that we
  * are sure the threads have terminated before we release
  * the data structures they use.  See crypto_finis below
  * for the other half of this song-and-dance.
  */
 static void
 crypto_terminate(struct proc **pp, void *q)
 {
 	struct proc *p;
 
 	mtx_assert(&crypto_drivers_mtx, MA_OWNED);
 	p = *pp;
 	*pp = NULL;
 	if (p) {
 		wakeup_one(q);
 		PROC_LOCK(p);		/* NB: insure we don't miss wakeup */
 		CRYPTO_DRIVER_UNLOCK();	/* let crypto_finis progress */
 		msleep(p, &p->p_mtx, PWAIT, "crypto_destroy", 0);
 		PROC_UNLOCK(p);
 		CRYPTO_DRIVER_LOCK();
 	}
 }
 
 static void
 crypto_destroy(void)
 {
 	/*
 	 * Terminate any crypto threads.
 	 */
 	CRYPTO_DRIVER_LOCK();
 	crypto_terminate(&cryptoproc, &crp_q);
 	crypto_terminate(&cryptoretproc, &crp_ret_q);
 	CRYPTO_DRIVER_UNLOCK();
 
 	/* XXX flush queues??? */
 
-	/* 
+	/*
 	 * Reclaim dynamically allocated resources.
 	 */
 	if (crypto_drivers != NULL)
 		free(crypto_drivers, M_CRYPTO_DATA);
 
 	if (cryptodesc_zone != NULL)
 		uma_zdestroy(cryptodesc_zone);
 	if (cryptop_zone != NULL)
 		uma_zdestroy(cryptop_zone);
 	mtx_destroy(&crypto_q_mtx);
 	mtx_destroy(&crypto_ret_q_mtx);
 	mtx_destroy(&crypto_drivers_mtx);
 }
 
 static struct cryptocap *
 crypto_checkdriver(u_int32_t hid)
 {
 	if (crypto_drivers == NULL)
 		return NULL;
 	return (hid >= crypto_drivers_num ? NULL : &crypto_drivers[hid]);
 }
 
 /*
  * Compare a driver's list of supported algorithms against another
  * list; return non-zero if all algorithms are supported.
  */
 static int
 driver_suitable(const struct cryptocap *cap, const struct cryptoini *cri)
 {
 	const struct cryptoini *cr;
 
 	/* See if all the algorithms are supported. */
 	for (cr = cri; cr; cr = cr->cri_next)
 		if (cap->cc_alg[cr->cri_alg] == 0)
 			return 0;
 	return 1;
 }
 
 /*
  * Select a driver for a new session that supports the specified
  * algorithms and, optionally, is constrained according to the flags.
  * The algorithm we use here is pretty stupid; just use the
  * first driver that supports all the algorithms we need. If there
  * are multiple drivers we choose the driver with the fewest active
  * sessions.  We prefer hardware-backed drivers to software ones.
  *
  * XXX We need more smarts here (in real life too, but that's
  * XXX another story altogether).
  */
 static struct cryptocap *
 crypto_select_driver(const struct cryptoini *cri, int flags)
 {
 	struct cryptocap *cap, *best;
 	int match, hid;
 
 	CRYPTO_DRIVER_ASSERT();
 
 	/*
 	 * Look first for hardware crypto devices if permitted.
 	 */
 	if (flags & CRYPTOCAP_F_HARDWARE)
 		match = CRYPTOCAP_F_HARDWARE;
 	else
 		match = CRYPTOCAP_F_SOFTWARE;
 	best = NULL;
 again:
 	for (hid = 0; hid < crypto_drivers_num; hid++) {
 		cap = &crypto_drivers[hid];
 		/*
 		 * If it's not initialized, is in the process of
 		 * going away, or is not appropriate (hardware
 		 * or software based on match), then skip.
 		 */
 		if (cap->cc_dev == NULL ||
 		    (cap->cc_flags & CRYPTOCAP_F_CLEANUP) ||
 		    (cap->cc_flags & match) == 0)
 			continue;
 
 		/* verify all the algorithms are supported. */
 		if (driver_suitable(cap, cri)) {
 			if (best == NULL ||
 			    cap->cc_sessions < best->cc_sessions)
 				best = cap;
 		}
 	}
 	if (best == NULL && match == CRYPTOCAP_F_HARDWARE &&
 	    (flags & CRYPTOCAP_F_SOFTWARE)) {
 		/* sort of an Algol 68-style for loop */
 		match = CRYPTOCAP_F_SOFTWARE;
 		goto again;
 	}
 	return best;
 }
 
 /*
  * Create a new session.  The crid argument specifies a crypto
  * driver to use or constraints on a driver to select (hardware
  * only, software only, either).  Whatever driver is selected
  * must be capable of the requested crypto algorithms.
  */
 int
 crypto_newsession(u_int64_t *sid, struct cryptoini *cri, int crid)
 {
 	struct cryptocap *cap;
 	u_int32_t hid, lid;
 	int err;
 
 	CRYPTO_DRIVER_LOCK();
 	if ((crid & (CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE)) == 0) {
 		/*
 		 * Use specified driver; verify it is capable.
 		 */
 		cap = crypto_checkdriver(crid);
 		if (cap != NULL && !driver_suitable(cap, cri))
 			cap = NULL;
 	} else {
 		/*
 		 * No requested driver; select based on crid flags.
 		 */
 		cap = crypto_select_driver(cri, crid);
 		/*
 		 * if NULL then can't do everything in one session.
 		 * XXX Fix this. We need to inject a "virtual" session
 		 * XXX layer right about here.
 		 */
 	}
 	if (cap != NULL) {
 		/* Call the driver initialization routine. */
 		hid = cap - crypto_drivers;
 		lid = hid;		/* Pass the driver ID. */
 		err = CRYPTODEV_NEWSESSION(cap->cc_dev, &lid, cri);
 		if (err == 0) {
 			(*sid) = (cap->cc_flags & 0xff000000)
 			       | (hid & 0x00ffffff);
 			(*sid) <<= 32;
 			(*sid) |= (lid & 0xffffffff);
 			cap->cc_sessions++;
 		} else
 			CRYPTDEB("dev newsession failed");
 	} else {
 		CRYPTDEB("no driver");
 		err = EINVAL;
 	}
 	CRYPTO_DRIVER_UNLOCK();
 	return err;
 }
 
 static void
 crypto_remove(struct cryptocap *cap)
 {
 
 	mtx_assert(&crypto_drivers_mtx, MA_OWNED);
 	if (cap->cc_sessions == 0 && cap->cc_koperations == 0)
 		bzero(cap, sizeof(*cap));
 }
 
 /*
  * Delete an existing session (or a reserved session on an unregistered
  * driver).
  */
 int
 crypto_freesession(u_int64_t sid)
 {
 	struct cryptocap *cap;
 	u_int32_t hid;
 	int err;
 
 	CRYPTO_DRIVER_LOCK();
 
 	if (crypto_drivers == NULL) {
 		err = EINVAL;
 		goto done;
 	}
 
 	/* Determine two IDs. */
 	hid = CRYPTO_SESID2HID(sid);
 
 	if (hid >= crypto_drivers_num) {
 		err = ENOENT;
 		goto done;
 	}
 	cap = &crypto_drivers[hid];
 
 	if (cap->cc_sessions)
 		cap->cc_sessions--;
 
 	/* Call the driver cleanup routine, if available. */
 	err = CRYPTODEV_FREESESSION(cap->cc_dev, sid);
 
 	if (cap->cc_flags & CRYPTOCAP_F_CLEANUP)
 		crypto_remove(cap);
 
 done:
 	CRYPTO_DRIVER_UNLOCK();
 	return err;
 }
 
 /*
  * Return an unused driver id.  Used by drivers prior to registering
  * support for the algorithms they handle.
  */
 int32_t
 crypto_get_driverid(device_t dev, int flags)
 {
 	struct cryptocap *newdrv;
 	int i;
 
 	if ((flags & (CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE)) == 0) {
 		printf("%s: no flags specified when registering driver\n",
 		    device_get_nameunit(dev));
 		return -1;
 	}
 
 	CRYPTO_DRIVER_LOCK();
 
 	for (i = 0; i < crypto_drivers_num; i++) {
 		if (crypto_drivers[i].cc_dev == NULL &&
 		    (crypto_drivers[i].cc_flags & CRYPTOCAP_F_CLEANUP) == 0) {
 			break;
 		}
 	}
 
 	/* Out of entries, allocate some more. */
 	if (i == crypto_drivers_num) {
 		/* Be careful about wrap-around. */
 		if (2 * crypto_drivers_num <= crypto_drivers_num) {
 			CRYPTO_DRIVER_UNLOCK();
 			printf("crypto: driver count wraparound!\n");
 			return -1;
 		}
 
 		newdrv = malloc(2 * crypto_drivers_num *
 		    sizeof(struct cryptocap), M_CRYPTO_DATA, M_NOWAIT|M_ZERO);
 		if (newdrv == NULL) {
 			CRYPTO_DRIVER_UNLOCK();
 			printf("crypto: no space to expand driver table!\n");
 			return -1;
 		}
 
 		bcopy(crypto_drivers, newdrv,
 		    crypto_drivers_num * sizeof(struct cryptocap));
 
 		crypto_drivers_num *= 2;
 
 		free(crypto_drivers, M_CRYPTO_DATA);
 		crypto_drivers = newdrv;
 	}
 
 	/* NB: state is zero'd on free */
 	crypto_drivers[i].cc_sessions = 1;	/* Mark */
 	crypto_drivers[i].cc_dev = dev;
 	crypto_drivers[i].cc_flags = flags;
 	if (bootverbose)
 		printf("crypto: assign %s driver id %u, flags %u\n",
 		    device_get_nameunit(dev), i, flags);
 
 	CRYPTO_DRIVER_UNLOCK();
 
 	return i;
 }
 
 /*
  * Lookup a driver by name.  We match against the full device
  * name and unit, and against just the name.  The latter gives
  * us a simple widlcarding by device name.  On success return the
  * driver/hardware identifier; otherwise return -1.
  */
 int
 crypto_find_driver(const char *match)
 {
 	int i, len = strlen(match);
 
 	CRYPTO_DRIVER_LOCK();
 	for (i = 0; i < crypto_drivers_num; i++) {
 		device_t dev = crypto_drivers[i].cc_dev;
 		if (dev == NULL ||
 		    (crypto_drivers[i].cc_flags & CRYPTOCAP_F_CLEANUP))
 			continue;
 		if (strncmp(match, device_get_nameunit(dev), len) == 0 ||
 		    strncmp(match, device_get_name(dev), len) == 0)
 			break;
 	}
 	CRYPTO_DRIVER_UNLOCK();
 	return i < crypto_drivers_num ? i : -1;
 }
 
 /*
  * Return the device_t for the specified driver or NULL
  * if the driver identifier is invalid.
  */
 device_t
 crypto_find_device_byhid(int hid)
 {
 	struct cryptocap *cap = crypto_checkdriver(hid);
 	return cap != NULL ? cap->cc_dev : NULL;
 }
 
 /*
  * Return the device/driver capabilities.
  */
 int
 crypto_getcaps(int hid)
 {
 	struct cryptocap *cap = crypto_checkdriver(hid);
 	return cap != NULL ? cap->cc_flags : 0;
 }
 
 /*
  * Register support for a key-related algorithm.  This routine
  * is called once for each algorithm supported a driver.
  */
 int
 crypto_kregister(u_int32_t driverid, int kalg, u_int32_t flags)
 {
 	struct cryptocap *cap;
 	int err;
 
 	CRYPTO_DRIVER_LOCK();
 
 	cap = crypto_checkdriver(driverid);
 	if (cap != NULL &&
 	    (CRK_ALGORITM_MIN <= kalg && kalg <= CRK_ALGORITHM_MAX)) {
 		/*
 		 * XXX Do some performance testing to determine placing.
 		 * XXX We probably need an auxiliary data structure that
 		 * XXX describes relative performances.
 		 */
 
 		cap->cc_kalg[kalg] = flags | CRYPTO_ALG_FLAG_SUPPORTED;
 		if (bootverbose)
 			printf("crypto: %s registers key alg %u flags %u\n"
 				, device_get_nameunit(cap->cc_dev)
 				, kalg
 				, flags
 			);
 		err = 0;
 	} else
 		err = EINVAL;
 
 	CRYPTO_DRIVER_UNLOCK();
 	return err;
 }
 
 /*
  * Register support for a non-key-related algorithm.  This routine
  * is called once for each such algorithm supported by a driver.
  */
 int
 crypto_register(u_int32_t driverid, int alg, u_int16_t maxoplen,
     u_int32_t flags)
 {
 	struct cryptocap *cap;
 	int err;
 
 	CRYPTO_DRIVER_LOCK();
 
 	cap = crypto_checkdriver(driverid);
 	/* NB: algorithms are in the range [1..max] */
 	if (cap != NULL &&
 	    (CRYPTO_ALGORITHM_MIN <= alg && alg <= CRYPTO_ALGORITHM_MAX)) {
 		/*
 		 * XXX Do some performance testing to determine placing.
 		 * XXX We probably need an auxiliary data structure that
 		 * XXX describes relative performances.
 		 */
 
 		cap->cc_alg[alg] = flags | CRYPTO_ALG_FLAG_SUPPORTED;
 		cap->cc_max_op_len[alg] = maxoplen;
 		if (bootverbose)
 			printf("crypto: %s registers alg %u flags %u maxoplen %u\n"
 				, device_get_nameunit(cap->cc_dev)
 				, alg
 				, flags
 				, maxoplen
 			);
 		cap->cc_sessions = 0;		/* Unmark */
 		err = 0;
 	} else
 		err = EINVAL;
 
 	CRYPTO_DRIVER_UNLOCK();
 	return err;
 }
 
 static void
 driver_finis(struct cryptocap *cap)
 {
 	u_int32_t ses, kops;
 
 	CRYPTO_DRIVER_ASSERT();
 
 	ses = cap->cc_sessions;
 	kops = cap->cc_koperations;
 	bzero(cap, sizeof(*cap));
 	if (ses != 0 || kops != 0) {
 		/*
 		 * If there are pending sessions,
 		 * just mark as invalid.
 		 */
 		cap->cc_flags |= CRYPTOCAP_F_CLEANUP;
 		cap->cc_sessions = ses;
 		cap->cc_koperations = kops;
 	}
 }
 
 /*
  * Unregister a crypto driver. If there are pending sessions using it,
  * leave enough information around so that subsequent calls using those
  * sessions will correctly detect the driver has been unregistered and
  * reroute requests.
  */
 int
 crypto_unregister(u_int32_t driverid, int alg)
 {
 	struct cryptocap *cap;
 	int i, err;
 
 	CRYPTO_DRIVER_LOCK();
 	cap = crypto_checkdriver(driverid);
 	if (cap != NULL &&
 	    (CRYPTO_ALGORITHM_MIN <= alg && alg <= CRYPTO_ALGORITHM_MAX) &&
 	    cap->cc_alg[alg] != 0) {
 		cap->cc_alg[alg] = 0;
 		cap->cc_max_op_len[alg] = 0;
 
 		/* Was this the last algorithm ? */
 		for (i = 1; i <= CRYPTO_ALGORITHM_MAX; i++)
 			if (cap->cc_alg[i] != 0)
 				break;
 
 		if (i == CRYPTO_ALGORITHM_MAX + 1)
 			driver_finis(cap);
 		err = 0;
 	} else
 		err = EINVAL;
 	CRYPTO_DRIVER_UNLOCK();
 
 	return err;
 }
 
 /*
  * Unregister all algorithms associated with a crypto driver.
  * If there are pending sessions using it, leave enough information
  * around so that subsequent calls using those sessions will
  * correctly detect the driver has been unregistered and reroute
  * requests.
  */
 int
 crypto_unregister_all(u_int32_t driverid)
 {
 	struct cryptocap *cap;
 	int err;
 
 	CRYPTO_DRIVER_LOCK();
 	cap = crypto_checkdriver(driverid);
 	if (cap != NULL) {
 		driver_finis(cap);
 		err = 0;
 	} else
 		err = EINVAL;
 	CRYPTO_DRIVER_UNLOCK();
 
 	return err;
 }
 
 /*
  * Clear blockage on a driver.  The what parameter indicates whether
  * the driver is now ready for cryptop's and/or cryptokop's.
  */
 int
 crypto_unblock(u_int32_t driverid, int what)
 {
 	struct cryptocap *cap;
 	int err;
 
 	CRYPTO_Q_LOCK();
 	cap = crypto_checkdriver(driverid);
 	if (cap != NULL) {
 		if (what & CRYPTO_SYMQ)
 			cap->cc_qblocked = 0;
 		if (what & CRYPTO_ASYMQ)
 			cap->cc_kqblocked = 0;
 		if (crp_sleep)
 			wakeup_one(&crp_q);
 		err = 0;
 	} else
 		err = EINVAL;
 	CRYPTO_Q_UNLOCK();
 
 	return err;
 }
 
 /*
  * Add a crypto request to a queue, to be processed by the kernel thread.
  */
 int
 crypto_dispatch(struct cryptop *crp)
 {
 	struct cryptocap *cap;
 	u_int32_t hid;
 	int result;
 
 	cryptostats.cs_ops++;
 
 #ifdef CRYPTO_TIMING
 	if (crypto_timing)
 		binuptime(&crp->crp_tstamp);
 #endif
 
 	hid = CRYPTO_SESID2HID(crp->crp_sid);
 
 	if ((crp->crp_flags & CRYPTO_F_BATCH) == 0) {
 		/*
 		 * Caller marked the request to be processed
 		 * immediately; dispatch it directly to the
 		 * driver unless the driver is currently blocked.
 		 */
 		cap = crypto_checkdriver(hid);
 		/* Driver cannot disappeared when there is an active session. */
 		KASSERT(cap != NULL, ("%s: Driver disappeared.", __func__));
 		if (!cap->cc_qblocked) {
 			result = crypto_invoke(cap, crp, 0);
 			if (result != ERESTART)
 				return (result);
 			/*
 			 * The driver ran out of resources, put the request on
 			 * the queue.
 			 */
 		}
 	}
 	CRYPTO_Q_LOCK();
 	TAILQ_INSERT_TAIL(&crp_q, crp, crp_next);
 	if (crp_sleep)
 		wakeup_one(&crp_q);
 	CRYPTO_Q_UNLOCK();
 	return 0;
 }
 
 /*
  * Add an asymetric crypto request to a queue,
  * to be processed by the kernel thread.
  */
 int
 crypto_kdispatch(struct cryptkop *krp)
 {
 	int error;
 
 	cryptostats.cs_kops++;
 
 	error = crypto_kinvoke(krp, krp->krp_crid);
 	if (error == ERESTART) {
 		CRYPTO_Q_LOCK();
 		TAILQ_INSERT_TAIL(&crp_kq, krp, krp_next);
 		if (crp_sleep)
 			wakeup_one(&crp_q);
 		CRYPTO_Q_UNLOCK();
 		error = 0;
 	}
 	return error;
 }
 
 /*
  * Verify a driver is suitable for the specified operation.
  */
 static __inline int
 kdriver_suitable(const struct cryptocap *cap, const struct cryptkop *krp)
 {
 	return (cap->cc_kalg[krp->krp_op] & CRYPTO_ALG_FLAG_SUPPORTED) != 0;
 }
 
 /*
  * Select a driver for an asym operation.  The driver must
  * support the necessary algorithm.  The caller can constrain
  * which device is selected with the flags parameter.  The
  * algorithm we use here is pretty stupid; just use the first
  * driver that supports the algorithms we need. If there are
  * multiple suitable drivers we choose the driver with the
  * fewest active operations.  We prefer hardware-backed
  * drivers to software ones when either may be used.
  */
 static struct cryptocap *
 crypto_select_kdriver(const struct cryptkop *krp, int flags)
 {
 	struct cryptocap *cap, *best, *blocked;
 	int match, hid;
 
 	CRYPTO_DRIVER_ASSERT();
 
 	/*
 	 * Look first for hardware crypto devices if permitted.
 	 */
 	if (flags & CRYPTOCAP_F_HARDWARE)
 		match = CRYPTOCAP_F_HARDWARE;
 	else
 		match = CRYPTOCAP_F_SOFTWARE;
 	best = NULL;
 	blocked = NULL;
 again:
 	for (hid = 0; hid < crypto_drivers_num; hid++) {
 		cap = &crypto_drivers[hid];
 		/*
 		 * If it's not initialized, is in the process of
 		 * going away, or is not appropriate (hardware
 		 * or software based on match), then skip.
 		 */
 		if (cap->cc_dev == NULL ||
 		    (cap->cc_flags & CRYPTOCAP_F_CLEANUP) ||
 		    (cap->cc_flags & match) == 0)
 			continue;
 
 		/* verify all the algorithms are supported. */
 		if (kdriver_suitable(cap, krp)) {
 			if (best == NULL ||
 			    cap->cc_koperations < best->cc_koperations)
 				best = cap;
 		}
 	}
 	if (best != NULL)
 		return best;
 	if (match == CRYPTOCAP_F_HARDWARE && (flags & CRYPTOCAP_F_SOFTWARE)) {
 		/* sort of an Algol 68-style for loop */
 		match = CRYPTOCAP_F_SOFTWARE;
 		goto again;
 	}
 	return best;
 }
 
 /*
  * Dispatch an asymmetric crypto request.
  */
 static int
 crypto_kinvoke(struct cryptkop *krp, int crid)
 {
 	struct cryptocap *cap = NULL;
 	int error;
 
 	KASSERT(krp != NULL, ("%s: krp == NULL", __func__));
 	KASSERT(krp->krp_callback != NULL,
 	    ("%s: krp->crp_callback == NULL", __func__));
 
 	CRYPTO_DRIVER_LOCK();
 	if ((crid & (CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE)) == 0) {
 		cap = crypto_checkdriver(crid);
 		if (cap != NULL) {
 			/*
 			 * Driver present, it must support the necessary
 			 * algorithm and, if s/w drivers are excluded,
 			 * it must be registered as hardware-backed.
 			 */
 			if (!kdriver_suitable(cap, krp) ||
 			    (!crypto_devallowsoft &&
 			     (cap->cc_flags & CRYPTOCAP_F_HARDWARE) == 0))
 				cap = NULL;
 		}
 	} else {
 		/*
 		 * No requested driver; select based on crid flags.
 		 */
 		if (!crypto_devallowsoft)	/* NB: disallow s/w drivers */
 			crid &= ~CRYPTOCAP_F_SOFTWARE;
 		cap = crypto_select_kdriver(krp, crid);
 	}
 	if (cap != NULL && !cap->cc_kqblocked) {
 		krp->krp_hid = cap - crypto_drivers;
 		cap->cc_koperations++;
 		CRYPTO_DRIVER_UNLOCK();
 		error = CRYPTODEV_KPROCESS(cap->cc_dev, krp, 0);
 		CRYPTO_DRIVER_LOCK();
 		if (error == ERESTART) {
 			cap->cc_koperations--;
 			CRYPTO_DRIVER_UNLOCK();
 			return (error);
 		}
 	} else {
 		/*
 		 * NB: cap is !NULL if device is blocked; in
 		 *     that case return ERESTART so the operation
 		 *     is resubmitted if possible.
 		 */
 		error = (cap == NULL) ? ENODEV : ERESTART;
 	}
 	CRYPTO_DRIVER_UNLOCK();
 
 	if (error) {
 		krp->krp_status = error;
 		crypto_kdone(krp);
 	}
 	return 0;
 }
 
 #ifdef CRYPTO_TIMING
 static void
 crypto_tstat(struct cryptotstat *ts, struct bintime *bt)
 {
 	struct bintime now, delta;
 	struct timespec t;
 	uint64_t u;
 
 	binuptime(&now);
 	u = now.frac;
 	delta.frac = now.frac - bt->frac;
 	delta.sec = now.sec - bt->sec;
 	if (u < delta.frac)
 		delta.sec--;
 	bintime2timespec(&delta, &t);
 	timespecadd(&ts->acc, &t);
 	if (timespeccmp(&t, &ts->min, <))
 		ts->min = t;
 	if (timespeccmp(&t, &ts->max, >))
 		ts->max = t;
 	ts->count++;
 
 	*bt = now;
 }
 #endif
 
 /*
  * Dispatch a crypto request to the appropriate crypto devices.
  */
 static int
 crypto_invoke(struct cryptocap *cap, struct cryptop *crp, int hint)
 {
 
 	KASSERT(crp != NULL, ("%s: crp == NULL", __func__));
 	KASSERT(crp->crp_callback != NULL,
 	    ("%s: crp->crp_callback == NULL", __func__));
 	KASSERT(crp->crp_desc != NULL, ("%s: crp->crp_desc == NULL", __func__));
 
 #ifdef CRYPTO_TIMING
 	if (crypto_timing)
 		crypto_tstat(&cryptostats.cs_invoke, &crp->crp_tstamp);
 #endif
 	if (cap->cc_flags & CRYPTOCAP_F_CLEANUP) {
 		struct cryptodesc *crd;
 		u_int64_t nid;
 
 		/*
 		 * Driver has unregistered; migrate the session and return
 		 * an error to the caller so they'll resubmit the op.
 		 *
 		 * XXX: What if there are more already queued requests for this
 		 *      session?
 		 */
 		crypto_freesession(crp->crp_sid);
 
 		for (crd = crp->crp_desc; crd->crd_next; crd = crd->crd_next)
 			crd->CRD_INI.cri_next = &(crd->crd_next->CRD_INI);
 
 		/* XXX propagate flags from initial session? */
 		if (crypto_newsession(&nid, &(crp->crp_desc->CRD_INI),
 		    CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE) == 0)
 			crp->crp_sid = nid;
 
 		crp->crp_etype = EAGAIN;
 		crypto_done(crp);
 		return 0;
 	} else {
 		/*
 		 * Invoke the driver to process the request.
 		 */
 		return CRYPTODEV_PROCESS(cap->cc_dev, crp, hint);
 	}
 }
 
 /*
  * Release a set of crypto descriptors.
  */
 void
 crypto_freereq(struct cryptop *crp)
 {
 	struct cryptodesc *crd;
 
 	if (crp == NULL)
 		return;
 
 #ifdef DIAGNOSTIC
 	{
 		struct cryptop *crp2;
 
 		CRYPTO_Q_LOCK();
 		TAILQ_FOREACH(crp2, &crp_q, crp_next) {
 			KASSERT(crp2 != crp,
 			    ("Freeing cryptop from the crypto queue (%p).",
 			    crp));
 		}
 		CRYPTO_Q_UNLOCK();
 		CRYPTO_RETQ_LOCK();
 		TAILQ_FOREACH(crp2, &crp_ret_q, crp_next) {
 			KASSERT(crp2 != crp,
 			    ("Freeing cryptop from the return queue (%p).",
 			    crp));
 		}
 		CRYPTO_RETQ_UNLOCK();
 	}
 #endif
 
 	while ((crd = crp->crp_desc) != NULL) {
 		crp->crp_desc = crd->crd_next;
 		uma_zfree(cryptodesc_zone, crd);
 	}
 	uma_zfree(cryptop_zone, crp);
 }
 
 /*
  * Acquire a set of crypto descriptors.
  */
 struct cryptop *
 crypto_getreq(int num)
 {
 	struct cryptodesc *crd;
 	struct cryptop *crp;
 
 	crp = uma_zalloc(cryptop_zone, M_NOWAIT|M_ZERO);
 	if (crp != NULL) {
 		while (num--) {
 			crd = uma_zalloc(cryptodesc_zone, M_NOWAIT|M_ZERO);
 			if (crd == NULL) {
 				crypto_freereq(crp);
 				return NULL;
 			}
 
 			crd->crd_next = crp->crp_desc;
 			crp->crp_desc = crd;
 		}
 	}
 	return crp;
 }
 
 /*
  * Invoke the callback on behalf of the driver.
  */
 void
 crypto_done(struct cryptop *crp)
 {
 	KASSERT((crp->crp_flags & CRYPTO_F_DONE) == 0,
 		("crypto_done: op already done, flags 0x%x", crp->crp_flags));
 	crp->crp_flags |= CRYPTO_F_DONE;
 	if (crp->crp_etype != 0)
 		cryptostats.cs_errs++;
 #ifdef CRYPTO_TIMING
 	if (crypto_timing)
 		crypto_tstat(&cryptostats.cs_done, &crp->crp_tstamp);
 #endif
 	/*
 	 * CBIMM means unconditionally do the callback immediately;
 	 * CBIFSYNC means do the callback immediately only if the
 	 * operation was done synchronously.  Both are used to avoid
 	 * doing extraneous context switches; the latter is mostly
 	 * used with the software crypto driver.
 	 */
 	if ((crp->crp_flags & CRYPTO_F_CBIMM) ||
 	    ((crp->crp_flags & CRYPTO_F_CBIFSYNC) &&
 	     (CRYPTO_SESID2CAPS(crp->crp_sid) & CRYPTOCAP_F_SYNC))) {
 		/*
 		 * Do the callback directly.  This is ok when the
 		 * callback routine does very little (e.g. the
 		 * /dev/crypto callback method just does a wakeup).
 		 */
 #ifdef CRYPTO_TIMING
 		if (crypto_timing) {
 			/*
 			 * NB: We must copy the timestamp before
 			 * doing the callback as the cryptop is
 			 * likely to be reclaimed.
 			 */
 			struct bintime t = crp->crp_tstamp;
 			crypto_tstat(&cryptostats.cs_cb, &t);
 			crp->crp_callback(crp);
 			crypto_tstat(&cryptostats.cs_finis, &t);
 		} else
 #endif
 			crp->crp_callback(crp);
 	} else {
 		/*
 		 * Normal case; queue the callback for the thread.
 		 */
 		CRYPTO_RETQ_LOCK();
 		if (CRYPTO_RETQ_EMPTY())
 			wakeup_one(&crp_ret_q);	/* shared wait channel */
 		TAILQ_INSERT_TAIL(&crp_ret_q, crp, crp_next);
 		CRYPTO_RETQ_UNLOCK();
 	}
 }
 
 /*
  * Invoke the callback on behalf of the driver.
  */
 void
 crypto_kdone(struct cryptkop *krp)
 {
 	struct cryptocap *cap;
 
 	if (krp->krp_status != 0)
 		cryptostats.cs_kerrs++;
 	CRYPTO_DRIVER_LOCK();
 	/* XXX: What if driver is loaded in the meantime? */
 	if (krp->krp_hid < crypto_drivers_num) {
 		cap = &crypto_drivers[krp->krp_hid];
 		KASSERT(cap->cc_koperations > 0, ("cc_koperations == 0"));
 		cap->cc_koperations--;
 		if (cap->cc_flags & CRYPTOCAP_F_CLEANUP)
 			crypto_remove(cap);
 	}
 	CRYPTO_DRIVER_UNLOCK();
 	CRYPTO_RETQ_LOCK();
 	if (CRYPTO_RETQ_EMPTY())
 		wakeup_one(&crp_ret_q);		/* shared wait channel */
 	TAILQ_INSERT_TAIL(&crp_ret_kq, krp, krp_next);
 	CRYPTO_RETQ_UNLOCK();
 }
 
 int
 crypto_getfeat(int *featp)
 {
 	int hid, kalg, feat = 0;
 
 	CRYPTO_DRIVER_LOCK();
 	for (hid = 0; hid < crypto_drivers_num; hid++) {
 		const struct cryptocap *cap = &crypto_drivers[hid];
 
 		if ((cap->cc_flags & CRYPTOCAP_F_SOFTWARE) &&
 		    !crypto_devallowsoft) {
 			continue;
 		}
 		for (kalg = 0; kalg < CRK_ALGORITHM_MAX; kalg++)
 			if (cap->cc_kalg[kalg] & CRYPTO_ALG_FLAG_SUPPORTED)
 				feat |=  1 << kalg;
 	}
 	CRYPTO_DRIVER_UNLOCK();
 	*featp = feat;
 	return (0);
 }
 
 /*
  * Terminate a thread at module unload.  The process that
  * initiated this is waiting for us to signal that we're gone;
  * wake it up and exit.  We use the driver table lock to insure
  * we don't do the wakeup before they're waiting.  There is no
  * race here because the waiter sleeps on the proc lock for the
  * thread so it gets notified at the right time because of an
  * extra wakeup that's done in exit1().
  */
 static void
 crypto_finis(void *chan)
 {
 	CRYPTO_DRIVER_LOCK();
 	wakeup_one(chan);
 	CRYPTO_DRIVER_UNLOCK();
 	kproc_exit(0);
 }
 
 /*
  * Crypto thread, dispatches crypto requests.
  */
 static void
 crypto_proc(void)
 {
 	struct cryptop *crp, *submit;
 	struct cryptkop *krp;
 	struct cryptocap *cap;
 	u_int32_t hid;
 	int result, hint;
 
 #if defined(__i386__) || defined(__amd64__)
 	fpu_kern_thread(FPU_KERN_NORMAL);
 #endif
 
 	CRYPTO_Q_LOCK();
 	for (;;) {
 		/*
 		 * Find the first element in the queue that can be
 		 * processed and look-ahead to see if multiple ops
 		 * are ready for the same driver.
 		 */
 		submit = NULL;
 		hint = 0;
 		TAILQ_FOREACH(crp, &crp_q, crp_next) {
 			hid = CRYPTO_SESID2HID(crp->crp_sid);
 			cap = crypto_checkdriver(hid);
 			/*
 			 * Driver cannot disappeared when there is an active
 			 * session.
 			 */
 			KASSERT(cap != NULL, ("%s:%u Driver disappeared.",
 			    __func__, __LINE__));
 			if (cap == NULL || cap->cc_dev == NULL) {
 				/* Op needs to be migrated, process it. */
 				if (submit == NULL)
 					submit = crp;
 				break;
 			}
 			if (!cap->cc_qblocked) {
 				if (submit != NULL) {
 					/*
 					 * We stop on finding another op,
 					 * regardless whether its for the same
 					 * driver or not.  We could keep
 					 * searching the queue but it might be
 					 * better to just use a per-driver
 					 * queue instead.
 					 */
 					if (CRYPTO_SESID2HID(submit->crp_sid) == hid)
 						hint = CRYPTO_HINT_MORE;
 					break;
 				} else {
 					submit = crp;
 					if ((submit->crp_flags & CRYPTO_F_BATCH) == 0)
 						break;
 					/* keep scanning for more are q'd */
 				}
 			}
 		}
 		if (submit != NULL) {
 			TAILQ_REMOVE(&crp_q, submit, crp_next);
 			hid = CRYPTO_SESID2HID(submit->crp_sid);
 			cap = crypto_checkdriver(hid);
 			KASSERT(cap != NULL, ("%s:%u Driver disappeared.",
 			    __func__, __LINE__));
 			result = crypto_invoke(cap, submit, hint);
 			if (result == ERESTART) {
 				/*
 				 * The driver ran out of resources, mark the
 				 * driver ``blocked'' for cryptop's and put
 				 * the request back in the queue.  It would
 				 * best to put the request back where we got
 				 * it but that's hard so for now we put it
 				 * at the front.  This should be ok; putting
 				 * it at the end does not work.
 				 */
 				/* XXX validate sid again? */
 				crypto_drivers[CRYPTO_SESID2HID(submit->crp_sid)].cc_qblocked = 1;
 				TAILQ_INSERT_HEAD(&crp_q, submit, crp_next);
 				cryptostats.cs_blocks++;
 			}
 		}
 
 		/* As above, but for key ops */
 		TAILQ_FOREACH(krp, &crp_kq, krp_next) {
 			cap = crypto_checkdriver(krp->krp_hid);
 			if (cap == NULL || cap->cc_dev == NULL) {
 				/*
 				 * Operation needs to be migrated, invalidate
 				 * the assigned device so it will reselect a
 				 * new one below.  Propagate the original
 				 * crid selection flags if supplied.
 				 */
 				krp->krp_hid = krp->krp_crid &
 				    (CRYPTOCAP_F_SOFTWARE|CRYPTOCAP_F_HARDWARE);
 				if (krp->krp_hid == 0)
 					krp->krp_hid =
 				    CRYPTOCAP_F_SOFTWARE|CRYPTOCAP_F_HARDWARE;
 				break;
 			}
 			if (!cap->cc_kqblocked)
 				break;
 		}
 		if (krp != NULL) {
 			TAILQ_REMOVE(&crp_kq, krp, krp_next);
 			result = crypto_kinvoke(krp, krp->krp_hid);
 			if (result == ERESTART) {
 				/*
 				 * The driver ran out of resources, mark the
 				 * driver ``blocked'' for cryptkop's and put
 				 * the request back in the queue.  It would
 				 * best to put the request back where we got
 				 * it but that's hard so for now we put it
 				 * at the front.  This should be ok; putting
 				 * it at the end does not work.
 				 */
 				/* XXX validate sid again? */
 				crypto_drivers[krp->krp_hid].cc_kqblocked = 1;
 				TAILQ_INSERT_HEAD(&crp_kq, krp, krp_next);
 				cryptostats.cs_kblocks++;
 			}
 		}
 
 		if (submit == NULL && krp == NULL) {
 			/*
 			 * Nothing more to be processed.  Sleep until we're
 			 * woken because there are more ops to process.
 			 * This happens either by submission or by a driver
 			 * becoming unblocked and notifying us through
 			 * crypto_unblock.  Note that when we wakeup we
 			 * start processing each queue again from the
 			 * front. It's not clear that it's important to
 			 * preserve this ordering since ops may finish
 			 * out of order if dispatched to different devices
 			 * and some become blocked while others do not.
 			 */
 			crp_sleep = 1;
 			msleep(&crp_q, &crypto_q_mtx, PWAIT, "crypto_wait", 0);
 			crp_sleep = 0;
 			if (cryptoproc == NULL)
 				break;
 			cryptostats.cs_intrs++;
 		}
 	}
 	CRYPTO_Q_UNLOCK();
 
 	crypto_finis(&crp_q);
 }
 
 /*
  * Crypto returns thread, does callbacks for processed crypto requests.
  * Callbacks are done here, rather than in the crypto drivers, because
  * callbacks typically are expensive and would slow interrupt handling.
  */
 static void
 crypto_ret_proc(void)
 {
 	struct cryptop *crpt;
 	struct cryptkop *krpt;
 
 	CRYPTO_RETQ_LOCK();
 	for (;;) {
 		/* Harvest return q's for completed ops */
 		crpt = TAILQ_FIRST(&crp_ret_q);
 		if (crpt != NULL)
 			TAILQ_REMOVE(&crp_ret_q, crpt, crp_next);
 
 		krpt = TAILQ_FIRST(&crp_ret_kq);
 		if (krpt != NULL)
 			TAILQ_REMOVE(&crp_ret_kq, krpt, krp_next);
 
 		if (crpt != NULL || krpt != NULL) {
 			CRYPTO_RETQ_UNLOCK();
 			/*
 			 * Run callbacks unlocked.
 			 */
 			if (crpt != NULL) {
 #ifdef CRYPTO_TIMING
 				if (crypto_timing) {
 					/*
 					 * NB: We must copy the timestamp before
 					 * doing the callback as the cryptop is
 					 * likely to be reclaimed.
 					 */
 					struct bintime t = crpt->crp_tstamp;
 					crypto_tstat(&cryptostats.cs_cb, &t);
 					crpt->crp_callback(crpt);
 					crypto_tstat(&cryptostats.cs_finis, &t);
 				} else
 #endif
 					crpt->crp_callback(crpt);
 			}
 			if (krpt != NULL)
 				krpt->krp_callback(krpt);
 			CRYPTO_RETQ_LOCK();
 		} else {
 			/*
 			 * Nothing more to be processed.  Sleep until we're
 			 * woken because there are more returns to process.
 			 */
 			msleep(&crp_ret_q, &crypto_ret_q_mtx, PWAIT,
 				"crypto_ret_wait", 0);
 			if (cryptoretproc == NULL)
 				break;
 			cryptostats.cs_rets++;
 		}
 	}
 	CRYPTO_RETQ_UNLOCK();
 
 	crypto_finis(&crp_ret_q);
 }
 
 #ifdef DDB
 static void
 db_show_drivers(void)
 {
 	int hid;
 
 	db_printf("%12s %4s %4s %8s %2s %2s\n"
 		, "Device"
 		, "Ses"
 		, "Kops"
 		, "Flags"
 		, "QB"
 		, "KB"
 	);
 	for (hid = 0; hid < crypto_drivers_num; hid++) {
 		const struct cryptocap *cap = &crypto_drivers[hid];
 		if (cap->cc_dev == NULL)
 			continue;
 		db_printf("%-12s %4u %4u %08x %2u %2u\n"
 		    , device_get_nameunit(cap->cc_dev)
 		    , cap->cc_sessions
 		    , cap->cc_koperations
 		    , cap->cc_flags
 		    , cap->cc_qblocked
 		    , cap->cc_kqblocked
 		);
 	}
 }
 
 DB_SHOW_COMMAND(crypto, db_show_crypto)
 {
 	struct cryptop *crp;
 
 	db_show_drivers();
 	db_printf("\n");
 
 	db_printf("%4s %8s %4s %4s %4s %4s %8s %8s\n",
 	    "HID", "Caps", "Ilen", "Olen", "Etype", "Flags",
 	    "Desc", "Callback");
 	TAILQ_FOREACH(crp, &crp_q, crp_next) {
 		db_printf("%4u %08x %4u %4u %4u %04x %8p %8p\n"
 		    , (int) CRYPTO_SESID2HID(crp->crp_sid)
 		    , (int) CRYPTO_SESID2CAPS(crp->crp_sid)
 		    , crp->crp_ilen, crp->crp_olen
 		    , crp->crp_etype
 		    , crp->crp_flags
 		    , crp->crp_desc
 		    , crp->crp_callback
 		);
 	}
 	if (!TAILQ_EMPTY(&crp_ret_q)) {
 		db_printf("\n%4s %4s %4s %8s\n",
 		    "HID", "Etype", "Flags", "Callback");
 		TAILQ_FOREACH(crp, &crp_ret_q, crp_next) {
 			db_printf("%4u %4u %04x %8p\n"
 			    , (int) CRYPTO_SESID2HID(crp->crp_sid)
 			    , crp->crp_etype
 			    , crp->crp_flags
 			    , crp->crp_callback
 			);
 		}
 	}
 }
 
 DB_SHOW_COMMAND(kcrypto, db_show_kcrypto)
 {
 	struct cryptkop *krp;
 
 	db_show_drivers();
 	db_printf("\n");
 
 	db_printf("%4s %5s %4s %4s %8s %4s %8s\n",
 	    "Op", "Status", "#IP", "#OP", "CRID", "HID", "Callback");
 	TAILQ_FOREACH(krp, &crp_kq, krp_next) {
 		db_printf("%4u %5u %4u %4u %08x %4u %8p\n"
 		    , krp->krp_op
 		    , krp->krp_status
 		    , krp->krp_iparams, krp->krp_oparams
 		    , krp->krp_crid, krp->krp_hid
 		    , krp->krp_callback
 		);
 	}
 	if (!TAILQ_EMPTY(&crp_ret_q)) {
 		db_printf("%4s %5s %8s %4s %8s\n",
 		    "Op", "Status", "CRID", "HID", "Callback");
 		TAILQ_FOREACH(krp, &crp_ret_kq, krp_next) {
 			db_printf("%4u %5u %08x %4u %8p\n"
 			    , krp->krp_op
 			    , krp->krp_status
 			    , krp->krp_crid, krp->krp_hid
 			    , krp->krp_callback
 			);
 		}
 	}
 }
 #endif
 
 int crypto_modevent(module_t mod, int type, void *unused);
 
 /*
  * Initialization code, both for static and dynamic loading.
  * Note this is not invoked with the usual MODULE_DECLARE
  * mechanism but instead is listed as a dependency by the
  * cryptosoft driver.  This guarantees proper ordering of
  * calls on module load/unload.
  */
 int
 crypto_modevent(module_t mod, int type, void *unused)
 {
 	int error = EINVAL;
 
 	switch (type) {
 	case MOD_LOAD:
 		error = crypto_init();
 		if (error == 0 && bootverbose)
 			printf("crypto: <crypto core>\n");
 		break;
 	case MOD_UNLOAD:
 		/*XXX disallow if active sessions */
 		error = 0;
 		crypto_destroy();
 		return 0;
 	}
 	return error;
 }
 MODULE_VERSION(crypto, 1);
 MODULE_DEPEND(crypto, zlib, 1, 1, 1);
Index: stable/11/sys/sys/linker.h
===================================================================
--- stable/11/sys/sys/linker.h	(revision 329098)
+++ stable/11/sys/sys/linker.h	(revision 329099)
@@ -1,353 +1,354 @@
 /*-
  * Copyright (c) 1997-2000 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_LINKER_H_
 #define _SYS_LINKER_H_
 
 #ifdef _KERNEL
 
 #include <machine/elf.h>
 #include <sys/kobj.h>
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_LINKER);
 #endif
 
 struct mod_depend;
 
 /*
  * Object representing a file which has been loaded by the linker.
  */
 typedef struct linker_file* linker_file_t;
 typedef TAILQ_HEAD(, linker_file) linker_file_list_t;
 
 typedef caddr_t linker_sym_t;		/* opaque symbol */
 typedef c_caddr_t c_linker_sym_t;	/* const opaque symbol */
 typedef int (*linker_function_name_callback_t)(const char *, void *);
 
 /*
  * expanded out linker_sym_t
  */
 typedef struct linker_symval {
     const char*		name;
     caddr_t		value;
     size_t		size;
 } linker_symval_t;
 
 typedef int (*linker_function_nameval_callback_t)(linker_file_t, int, linker_symval_t *, void *);
 
 struct common_symbol {
     STAILQ_ENTRY(common_symbol) link;
     char*		name;
     caddr_t		address;
 };
 
 struct linker_file {
     KOBJ_FIELDS;
     int			refs;		/* reference count */
     int			userrefs;	/* kldload(2) count */
     int			flags;
 #define LINKER_FILE_LINKED	0x1	/* file has been fully linked */
     TAILQ_ENTRY(linker_file) link;	/* list of all loaded files */
     char*		filename;	/* file which was loaded */
     char*		pathname;	/* file name with full path */
     int			id;		/* unique id */
     caddr_t		address;	/* load address */
     size_t		size;		/* size of file */
     caddr_t		ctors_addr;	/* address of .ctors */
     size_t		ctors_size;	/* size of .ctors */
     int			ndeps;		/* number of dependencies */
     linker_file_t*	deps;		/* list of dependencies */
     STAILQ_HEAD(, common_symbol) common; /* list of common symbols */
     TAILQ_HEAD(, module) modules;	/* modules in this file */
     TAILQ_ENTRY(linker_file) loaded;	/* preload dependency support */
     int			loadcnt;	/* load counter value */
 
     /*
      * Function Boundary Tracing (FBT) or Statically Defined Tracing (SDT)
      * fields.
      */
     int			nenabled;	/* number of enabled probes. */
     int			fbt_nentries;	/* number of fbt entries created. */
 };
 
 /*
  * Object implementing a class of file (a.out, elf, etc.)
  */
 typedef struct linker_class *linker_class_t;
 typedef TAILQ_HEAD(, linker_class) linker_class_list_t;
 
 struct linker_class {
     KOBJ_CLASS_FIELDS;
     TAILQ_ENTRY(linker_class) link;	/* list of all file classes */
 };
 
 /*
  * Function type used when iterating over the list of linker files.
  */
 typedef int linker_predicate_t(linker_file_t, void *);
 
 /*
  * The "file" for the kernel.
  */
 extern linker_file_t	linker_kernel_file;
 
 /*
  * Obtain a reference to a module, loading it if required.
  */
 int linker_reference_module(const char* _modname, struct mod_depend *_verinfo,
 			    linker_file_t* _result);
 
 /*
  * Release a reference to a module, unloading it if there are no more
  * references.  Note that one should either provide a module name and
  * optional version info or a linker file, but not both.
  */
 int linker_release_module(const char *_modname, struct mod_depend *_verinfo,
 			  linker_file_t _file);
 
 /*
  * Iterate over all of the currently loaded linker files calling the
  * predicate function while the function returns 0.  Returns the value
  * returned by the last predicate function.
  */
 int linker_file_foreach(linker_predicate_t *_predicate, void *_context);
 
 /*
  * Lookup a symbol in a file.  If deps is TRUE, look in dependencies
  * if not found in file.
  */
-caddr_t linker_file_lookup_symbol(linker_file_t _file, const char* _name, 
+caddr_t linker_file_lookup_symbol(linker_file_t _file, const char* _name,
 				  int _deps);
 
 /*
  * Lookup a linker set in a file.  Return pointers to the first entry,
  * last + 1, and count of entries.  Use: for (p = start; p < stop; p++) {}
  * void *start is really: "struct yoursetmember ***start;"
  */
 int linker_file_lookup_set(linker_file_t _file, const char *_name,
 			   void *_start, void *_stop, int *_count);
 
 /*
  * List all functions in a file.
  */
-int linker_file_function_listall(linker_file_t, 
+int linker_file_function_listall(linker_file_t,
 				 linker_function_nameval_callback_t, void *);
 
 /*
  * Functions solely for use by the linker class handlers.
  */
 int linker_add_class(linker_class_t _cls);
 int linker_file_unload(linker_file_t _file, int flags);
 int linker_load_dependencies(linker_file_t _lf);
 linker_file_t linker_make_file(const char* _filename, linker_class_t _cls);
 
 /*
  * DDB Helpers, tuned specifically for ddb/db_kld.c
  */
 int linker_ddb_lookup(const char *_symstr, c_linker_sym_t *_sym);
 int linker_ddb_search_symbol(caddr_t _value, c_linker_sym_t *_sym,
 			     long *_diffp);
 int linker_ddb_symbol_values(c_linker_sym_t _sym, linker_symval_t *_symval);
 int linker_ddb_search_symbol_name(caddr_t value, char *buf, u_int buflen,
 				  long *offset);
 
 /*
  * stack(9) helper for situations where kernel locking is required.
  */
 int linker_search_symbol_name(caddr_t value, char *buf, u_int buflen,
     long *offset);
 
 
 /* HWPMC helper */
 void *linker_hwpmc_list_objects(void);
 
 #endif	/* _KERNEL */
 
 /*
  * Module information subtypes
  */
 #define MODINFO_END		0x0000		/* End of list */
 #define MODINFO_NAME		0x0001		/* Name of module (string) */
 #define MODINFO_TYPE		0x0002		/* Type of module (string) */
 #define MODINFO_ADDR		0x0003		/* Loaded address */
 #define MODINFO_SIZE		0x0004		/* Size of module */
 #define MODINFO_EMPTY		0x0005		/* Has been deleted */
 #define MODINFO_ARGS		0x0006		/* Parameters string */
 #define MODINFO_METADATA	0x8000		/* Module-specfic */
 
 #define MODINFOMD_AOUTEXEC	0x0001		/* a.out exec header */
 #define MODINFOMD_ELFHDR	0x0002		/* ELF header */
 #define MODINFOMD_SSYM		0x0003		/* start of symbols */
 #define MODINFOMD_ESYM		0x0004		/* end of symbols */
 #define MODINFOMD_DYNAMIC	0x0005		/* _DYNAMIC pointer */
 /* These values are MD on these two platforms */
 #if !defined(__sparc64__) && !defined(__powerpc__)
 #define MODINFOMD_ENVP		0x0006		/* envp[] */
 #define MODINFOMD_HOWTO		0x0007		/* boothowto */
 #define MODINFOMD_KERNEND	0x0008		/* kernend */
 #endif
 #define MODINFOMD_SHDR		0x0009		/* section header table */
 #define MODINFOMD_CTORS_ADDR	0x000a		/* address of .ctors */
 #define MODINFOMD_CTORS_SIZE	0x000b		/* size of .ctors */
 #define MODINFOMD_FW_HANDLE	0x000c		/* Firmware dependent handle */
+#define MODINFOMD_KEYBUF	0x000d		/* Crypto key intake buffer */
 #define MODINFOMD_NOCOPY	0x8000		/* don't copy this metadata to the kernel */
 
 #define MODINFOMD_DEPLIST	(0x4001 | MODINFOMD_NOCOPY)	/* depends on */
 
 #ifdef _KERNEL
 #define MD_FETCH(mdp, info, type) ({ \
 	type *__p; \
 	__p = (type *)preload_search_info((mdp), MODINFO_METADATA | (info)); \
 	__p ? *__p : 0; \
 })
 #endif
 
 #define	LINKER_HINTS_VERSION	1		/* linker.hints file version */
 #define	LINKER_HINTS_MAX	(1 << 20)	/* Allow at most 1MB for linker.hints */
 
 #ifdef _KERNEL
 
 /*
  * Module lookup
  */
 extern vm_offset_t	preload_addr_relocate;
 extern caddr_t		preload_metadata;
 
 extern void *		preload_fetch_addr(caddr_t _mod);
 extern size_t		preload_fetch_size(caddr_t _mod);
 extern caddr_t		preload_search_by_name(const char *_name);
 extern caddr_t		preload_search_by_type(const char *_type);
 extern caddr_t		preload_search_next_name(caddr_t _base);
 extern caddr_t		preload_search_info(caddr_t _mod, int _inf);
 extern void		preload_delete_name(const char *_name);
 extern void		preload_bootstrap_relocate(vm_offset_t _offset);
 
 #ifdef KLD_DEBUG
 
 extern int kld_debug;
 #define KLD_DEBUG_FILE	1	/* file load/unload */
 #define KLD_DEBUG_SYM	2	/* symbol lookup */
 
 #define KLD_DPF(cat, args)					\
 	do {							\
 		if (kld_debug & KLD_DEBUG_##cat) printf args;	\
 	} while (0)
 
 #else
 
 #define KLD_DPF(cat, args)
 
 #endif
 
 typedef int elf_lookup_fn(linker_file_t, Elf_Size, int, Elf_Addr *);
 
 /* Support functions */
 int	elf_reloc(linker_file_t _lf, Elf_Addr base, const void *_rel, int _type, elf_lookup_fn _lu);
 int	elf_reloc_local(linker_file_t _lf, Elf_Addr base, const void *_rel, int _type, elf_lookup_fn _lu);
 Elf_Addr elf_relocaddr(linker_file_t _lf, Elf_Addr addr);
 const Elf_Sym *elf_get_sym(linker_file_t _lf, Elf_Size _symidx);
 const char *elf_get_symname(linker_file_t _lf, Elf_Size _symidx);
 
 typedef struct linker_ctf {
 	const uint8_t 	*ctftab;	/* Decompressed CTF data. */
 	int 		ctfcnt;		/* Number of CTF data bytes. */
 	const Elf_Sym	*symtab;	/* Ptr to the symbol table. */
 	int		nsym;		/* Number of symbols. */
 	const char	*strtab;	/* Ptr to the string table. */
 	int 		strcnt;		/* Number of string bytes. */
 	uint32_t	**ctfoffp;	/* Ptr to array of obj/fnc offsets. */
 	uint32_t	**typoffp;	/* Ptr to array of type offsets. */
 	long		*typlenp;	/* Ptr to number of type data entries. */
 } linker_ctf_t;
 
 int	linker_ctf_get(linker_file_t, linker_ctf_t *);
 
 int elf_cpu_load_file(linker_file_t);
 int elf_cpu_unload_file(linker_file_t);
 
 /* values for type */
 #define ELF_RELOC_REL	1
 #define ELF_RELOC_RELA	2
 
 /*
  * This is version 1 of the KLD file status structure. It is identified
  * by its _size_ in the version field.
  */
 struct kld_file_stat_1 {
     int		version;	/* set to sizeof(struct kld_file_stat_1) */
     char        name[MAXPATHLEN];
     int		refs;
     int		id;
     caddr_t	address;	/* load address */
     size_t	size;		/* size in bytes */
 };
 #endif /* _KERNEL */
 
 struct kld_file_stat {
     int		version;	/* set to sizeof(struct kld_file_stat) */
     char        name[MAXPATHLEN];
     int		refs;
     int		id;
     caddr_t	address;	/* load address */
     size_t	size;		/* size in bytes */
     char        pathname[MAXPATHLEN];
 };
 
 struct kld_sym_lookup {
     int		version;	/* set to sizeof(struct kld_sym_lookup) */
     char	*symname;	/* Symbol name we are looking up */
     u_long	symvalue;
     size_t	symsize;
 };
 #define KLDSYM_LOOKUP	1
 
 /*
  * Flags for kldunloadf() and linker_file_unload()
  */
 #define LINKER_UNLOAD_NORMAL	0
 #define LINKER_UNLOAD_FORCE	1
 
 #ifndef _KERNEL
 
 #include <sys/cdefs.h>
 
 __BEGIN_DECLS
 int	kldload(const char* _file);
 int	kldunload(int _fileid);
 int	kldunloadf(int _fileid, int flags);
 int	kldfind(const char* _file);
 int	kldnext(int _fileid);
 int	kldstat(int _fileid, struct kld_file_stat* _stat);
 int	kldfirstmod(int _fileid);
 int	kldsym(int _fileid, int _cmd, void *_data);
 __END_DECLS
 
 #endif
 
 #endif /* !_SYS_LINKER_H_ */
Index: stable/11/usr.sbin/makefs/ffs/buf.c
===================================================================
--- stable/11/usr.sbin/makefs/ffs/buf.c	(revision 329098)
+++ stable/11/usr.sbin/makefs/ffs/buf.c	(revision 329099)
@@ -1,222 +1,228 @@
-/*	$NetBSD: buf.c,v 1.12 2004/06/20 22:20:18 jmc Exp $	*/
+/*	$NetBSD: buf.c,v 1.13 2004/06/20 22:20:18 jmc Exp $	*/
 
 /*
  * Copyright (c) 2001 Wasabi Systems, Inc.
  * All rights reserved.
  *
  * Written by Luke Mewburn for Wasabi Systems, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed for the NetBSD Project by
  *      Wasabi Systems, Inc.
  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  *    or promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <assert.h>
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 
 #include "makefs.h"
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ffs/fs.h>
 
 #include "ffs/buf.h"
 #include "ffs/ufs_inode.h"
 
 extern int sectorsize;		/* XXX: from ffs.c & mkfs.c */
 
 TAILQ_HEAD(buftailhead,buf) buftail;
 
 int
-bread(int fd, struct fs *fs, daddr_t blkno, int size, struct buf **bpp)
+bread(struct vnode *vp, daddr_t blkno, int size, struct ucred *u1 __unused,
+    struct buf **bpp)
 {
 	off_t	offset;
 	ssize_t	rv;
+	struct fs *fs = vp->fs;
 
 	assert (fs != NULL);
 	assert (bpp != NULL);
 
 	if (debug & DEBUG_BUF_BREAD)
 		printf("bread: fs %p blkno %lld size %d\n",
 		    fs, (long long)blkno, size);
-	*bpp = getblk(fd, fs, blkno, size);
+	*bpp = getblk(vp, blkno, size, 0, 0, 0);
 	offset = (*bpp)->b_blkno * sectorsize;	/* XXX */
 	if (debug & DEBUG_BUF_BREAD)
 		printf("bread: bp %p blkno %lld offset %lld bcount %ld\n",
 		    (*bpp), (long long)(*bpp)->b_blkno, (long long) offset,
 		    (*bpp)->b_bcount);
 	if (lseek((*bpp)->b_fd, offset, SEEK_SET) == -1)
 		err(1, "bread: lseek %lld (%lld)",
 		    (long long)(*bpp)->b_blkno, (long long)offset);
 	rv = read((*bpp)->b_fd, (*bpp)->b_data, (*bpp)->b_bcount);
 	if (debug & DEBUG_BUF_BREAD)
 		printf("bread: read %ld (%lld) returned %d\n",
 		    (*bpp)->b_bcount, (long long)offset, (int)rv);
 	if (rv == -1)				/* read error */
 		err(1, "bread: read %ld (%lld) returned %d",
 		    (*bpp)->b_bcount, (long long)offset, (int)rv);
 	else if (rv != (*bpp)->b_bcount)	/* short read */
 		err(1, "bread: read %ld (%lld) returned %d",
 		    (*bpp)->b_bcount, (long long)offset, (int)rv);
 	else
 		return (0);
 }
 
 void
-brelse(struct buf *bp)
+brelse(struct buf *bp, int u1 __unused)
 {
 
 	assert (bp != NULL);
 	assert (bp->b_data != NULL);
 
 	if (bp->b_lblkno < 0) {
 		/*
 		 * XXX	don't remove any buffers with negative logical block
 		 *	numbers (lblkno), so that we retain the mapping
 		 *	of negative lblkno -> real blkno that ffs_balloc()
 		 *	sets up.
 		 *
 		 *	if we instead released these buffers, and implemented
 		 *	ufs_strategy() (and ufs_bmaparray()) and called those
 		 *	from bread() and bwrite() to convert the lblkno to
 		 *	a real blkno, we'd add a lot more code & complexity
 		 *	and reading off disk, for little gain, because this
 		 *	simple hack works for our purpose.
 		 */
 		bp->b_bcount = 0;
 		return;
 	}
 
 	TAILQ_REMOVE(&buftail, bp, b_tailq);
 	free(bp->b_data);
 	free(bp);
 }
 
 int
 bwrite(struct buf *bp)
 {
 	off_t	offset;
 	ssize_t	rv;
 
 	assert (bp != NULL);
 	offset = bp->b_blkno * sectorsize;	/* XXX */
 	if (debug & DEBUG_BUF_BWRITE)
 		printf("bwrite: bp %p blkno %lld offset %lld bcount %ld\n",
 		    bp, (long long)bp->b_blkno, (long long) offset,
 		    bp->b_bcount);
 	if (lseek(bp->b_fd, offset, SEEK_SET) == -1)
 		return (errno);
 	rv = write(bp->b_fd, bp->b_data, bp->b_bcount);
 	if (debug & DEBUG_BUF_BWRITE)
 		printf("bwrite: write %ld (offset %lld) returned %lld\n",
 		    bp->b_bcount, (long long)offset, (long long)rv);
 	if (rv == bp->b_bcount)
 		return (0);
 	else if (rv == -1)		/* write error */
 		return (errno);
 	else				/* short write ? */
 		return (EAGAIN);
 }
 
 void
 bcleanup(void)
 {
 	struct buf *bp;
 
 	/*
 	 * XXX	this really shouldn't be necessary, but i'm curious to
 	 *	know why there's still some buffers lying around that
 	 *	aren't brelse()d
 	 */
 
 	if (TAILQ_EMPTY(&buftail))
 		return;
 
 	printf("bcleanup: unflushed buffers:\n");
 	TAILQ_FOREACH(bp, &buftail, b_tailq) {
 		printf("\tlblkno %10lld  blkno %10lld  count %6ld  bufsize %6ld\n",
 		    (long long)bp->b_lblkno, (long long)bp->b_blkno,
 		    bp->b_bcount, bp->b_bufsize);
 	}
 	printf("bcleanup: done\n");
 }
 
 struct buf *
-getblk(int fd, struct fs *fs, daddr_t blkno, int size)
+getblk(struct vnode *vp, daddr_t blkno, int size, int u1 __unused,
+    int u2 __unused, int u3 __unused)
 {
 	static int buftailinitted;
 	struct buf *bp;
 	void *n;
+	int fd = vp->fd;
+	struct fs *fs = vp->fs;
 
+	blkno += vp->offset;
 	assert (fs != NULL);
 	if (debug & DEBUG_BUF_GETBLK)
 		printf("getblk: fs %p blkno %lld size %d\n", fs,
 		    (long long)blkno, size);
 
 	bp = NULL;
 	if (!buftailinitted) {
 		if (debug & DEBUG_BUF_GETBLK)
 			printf("getblk: initialising tailq\n");
 		TAILQ_INIT(&buftail);
 		buftailinitted = 1;
 	} else {
 		TAILQ_FOREACH(bp, &buftail, b_tailq) {
 			if (bp->b_lblkno != blkno)
 				continue;
 			break;
 		}
 	}
 	if (bp == NULL) {
 		if ((bp = calloc(1, sizeof(struct buf))) == NULL)
 			err(1, "getblk: calloc");
 
 		bp->b_bufsize = 0;
 		bp->b_blkno = bp->b_lblkno = blkno;
 		bp->b_fd = fd;
 		bp->b_fs = fs;
 		bp->b_data = NULL;
 		TAILQ_INSERT_HEAD(&buftail, bp, b_tailq);
 	}
 	bp->b_bcount = size;
 	if (bp->b_data == NULL || bp->b_bcount > bp->b_bufsize) {
 		n = realloc(bp->b_data, size);
 		if (n == NULL)
 			err(1, "getblk: realloc b_data %ld", bp->b_bcount);
 		bp->b_data = n;
 		bp->b_bufsize = size;
 	}
 
 	return (bp);
 }
Index: stable/11/usr.sbin/makefs/ffs/buf.h
===================================================================
--- stable/11/usr.sbin/makefs/ffs/buf.h	(revision 329098)
+++ stable/11/usr.sbin/makefs/ffs/buf.h	(revision 329099)
@@ -1,67 +1,77 @@
-/*	$NetBSD: buf.h,v 1.2 2001/11/02 03:12:49 lukem Exp $	*/
+/*	$NetBSD: buf.h,v 1.3 2001/11/02 03:12:49 lukem Exp $	*/
 
 /*
  * Copyright (c) 2001 Wasabi Systems, Inc.
  * All rights reserved.
  *
  * Written by Luke Mewburn for Wasabi Systems, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed for the NetBSD Project by
  *      Wasabi Systems, Inc.
  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  *    or promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _FFS_BUF_H
 #define	_FFS_BUF_H
 
 #include <sys/param.h>
 #include <sys/queue.h>
 
+struct ucred;
+
+struct vnode {
+	int fd;
+	void *fs;
+	void *v_data;
+	int offset;
+};
+
 struct buf {
 	void *		b_data;
 	long		b_bufsize;
 	long		b_bcount;
 	daddr_t		b_blkno;
 	daddr_t		b_lblkno;
 	int		b_fd;
 	struct fs *	b_fs;
 
 	TAILQ_ENTRY(buf)	b_tailq;
 };
 
 void		bcleanup(void);
-int		bread(int, struct fs *, daddr_t, int, struct buf **);
-void		brelse(struct buf *);
+int		bread(struct vnode *, daddr_t, int, struct ucred *,
+    struct buf **);
+void		brelse(struct buf *, int);
 int		bwrite(struct buf *);
-struct buf *	getblk(int, struct fs *, daddr_t, int);
+struct buf *	getblk(struct vnode *, daddr_t, int, int, int, int);
 
 #define	bdwrite(bp)	bwrite(bp)
 #define	clrbuf(bp)	memset((bp)->b_data, 0, (u_int)(bp)->b_bcount)
 
 #endif	/* _FFS_BUF_H */
Index: stable/11/usr.sbin/makefs/ffs/ffs_alloc.c
===================================================================
--- stable/11/usr.sbin/makefs/ffs/ffs_alloc.c	(revision 329098)
+++ stable/11/usr.sbin/makefs/ffs/ffs_alloc.c	(revision 329099)
@@ -1,681 +1,683 @@
 /*	$NetBSD: ffs_alloc.c,v 1.14 2004/06/20 22:20:18 jmc Exp $	*/
 /* From: NetBSD: ffs_alloc.c,v 1.50 2001/09/06 02:16:01 lukem Exp */
 
 /*
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Marshall
  * Kirk McKusick and Network Associates Laboratories, the Security
  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
  * research program
  *
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_alloc.c	8.19 (Berkeley) 7/13/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <errno.h>
 #include <stdint.h>
 
 #include "makefs.h"
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ffs/fs.h>
 
 #include "ffs/ufs_bswap.h"
 #include "ffs/buf.h"
 #include "ffs/ufs_inode.h"
 #include "ffs/ffs_extern.h"
 
 static int scanc(u_int, const u_char *, const u_char *, int);
 
 static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int);
 static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t);
 static daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int,
 		     daddr_t (*)(struct inode *, int, daddr_t, int));
 static int32_t ffs_mapsearch(struct fs *, struct cg *, daddr_t, int);
 
 /*
  * Allocate a block in the file system.
  * 
  * The size of the requested block is given, which must be some
  * multiple of fs_fsize and <= fs_bsize.
  * A preference may be optionally specified. If a preference is given
  * the following hierarchy is used to allocate a block:
  *   1) allocate the requested block.
  *   2) allocate a rotationally optimal block in the same cylinder.
  *   3) allocate a block in the same cylinder group.
  *   4) quadradically rehash into other cylinder groups, until an
  *      available block is located.
  * If no block preference is given the following hierarchy is used
  * to allocate a block:
  *   1) allocate a block in the cylinder group that contains the
  *      inode for the file.
  *   2) quadradically rehash into other cylinder groups, until an
  *      available block is located.
  */
 int
 ffs_alloc(struct inode *ip, daddr_t lbn __unused, daddr_t bpref, int size,
     daddr_t *bnp)
 {
 	struct fs *fs = ip->i_fs;
 	daddr_t bno;
 	int cg;
 	
 	*bnp = 0;
 	if (size > fs->fs_bsize || fragoff(fs, size) != 0) {
 		errx(1, "ffs_alloc: bad size: bsize %d size %d",
 		    fs->fs_bsize, size);
 	}
 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
 		goto nospace;
 	if (bpref >= fs->fs_size)
 		bpref = 0;
 	if (bpref == 0)
 		cg = ino_to_cg(fs, ip->i_number);
 	else
 		cg = dtog(fs, bpref);
 	bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg);
 	if (bno > 0) {
 		if (ip->i_fs->fs_magic == FS_UFS1_MAGIC)
 			ip->i_ffs1_blocks += size / DEV_BSIZE;
 		else
 			ip->i_ffs2_blocks += size / DEV_BSIZE;
 		*bnp = bno;
 		return (0);
 	}
 nospace:
 	return (ENOSPC);
 }
 
 /*
  * Select the desired position for the next block in a file.  The file is
  * logically divided into sections. The first section is composed of the
  * direct blocks. Each additional section contains fs_maxbpg blocks.
  * 
  * If no blocks have been allocated in the first section, the policy is to
  * request a block in the same cylinder group as the inode that describes
  * the file. If no blocks have been allocated in any other section, the
  * policy is to place the section in a cylinder group with a greater than
  * average number of free blocks.  An appropriate cylinder group is found
  * by using a rotor that sweeps the cylinder groups. When a new group of
  * blocks is needed, the sweep begins in the cylinder group following the
  * cylinder group from which the previous allocation was made. The sweep
  * continues until a cylinder group with greater than the average number
  * of free blocks is found. If the allocation is for the first block in an
  * indirect block, the information on the previous allocation is unavailable;
  * here a best guess is made based upon the logical block number being
  * allocated.
  * 
  * If a section is already partially allocated, the policy is to
  * contiguously allocate fs_maxcontig blocks.  The end of one of these
  * contiguous blocks and the beginning of the next is physically separated
  * so that the disk head will be in transit between them for at least
  * fs_rotdelay milliseconds.  This is to allow time for the processor to
  * schedule another I/O transfer.
  */
 /* XXX ondisk32 */
 daddr_t
 ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx, int32_t *bap)
 {
 	struct fs *fs;
 	int cg;
 	int avgbfree, startcg;
 
 	fs = ip->i_fs;
 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
 		if (lbn < NDADDR + NINDIR(fs)) {
 			cg = ino_to_cg(fs, ip->i_number);
 			return (fs->fs_fpg * cg + fs->fs_frag);
 		}
 		/*
 		 * Find a cylinder with greater than average number of
 		 * unused data blocks.
 		 */
 		if (indx == 0 || bap[indx - 1] == 0)
 			startcg =
 			    ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
 		else
 			startcg = dtog(fs,
 				ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
 		startcg %= fs->fs_ncg;
 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
 		for (cg = startcg; cg < fs->fs_ncg; cg++)
 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree)
 				return (fs->fs_fpg * cg + fs->fs_frag);
 		for (cg = 0; cg <= startcg; cg++)
 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree)
 				return (fs->fs_fpg * cg + fs->fs_frag);
 		return (0);
 	}
 	/*
 	 * We just always try to lay things out contiguously.
 	 */
 	return ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
 }
 
 daddr_t
 ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int64_t *bap)
 {
 	struct fs *fs;
 	int cg;
 	int avgbfree, startcg;
 
 	fs = ip->i_fs;
 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
 		if (lbn < NDADDR + NINDIR(fs)) {
 			cg = ino_to_cg(fs, ip->i_number);
 			return (fs->fs_fpg * cg + fs->fs_frag);
 		}
 		/*
 		 * Find a cylinder with greater than average number of
 		 * unused data blocks.
 		 */
 		if (indx == 0 || bap[indx - 1] == 0)
 			startcg =
 			    ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
 		else
 			startcg = dtog(fs,
 				ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
 		startcg %= fs->fs_ncg;
 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
 		for (cg = startcg; cg < fs->fs_ncg; cg++)
 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
 				return (fs->fs_fpg * cg + fs->fs_frag);
 			}
 		for (cg = 0; cg < startcg; cg++)
 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
 				return (fs->fs_fpg * cg + fs->fs_frag);
 			}
 		return (0);
 	}
 	/*
 	 * We just always try to lay things out contiguously.
 	 */
 	return ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
 }
 
 /*
  * Implement the cylinder overflow algorithm.
  *
  * The policy implemented by this algorithm is:
  *   1) allocate the block in its requested cylinder group.
  *   2) quadradically rehash on the cylinder group number.
  *   3) brute force search for a free block.
  *
  * `size':	size for data blocks, mode for inodes
  */
 /*VARARGS5*/
 static daddr_t
 ffs_hashalloc(struct inode *ip, int cg, daddr_t pref, int size,
     daddr_t (*allocator)(struct inode *, int, daddr_t, int))
 {
 	struct fs *fs;
 	daddr_t result;
 	int i, icg = cg;
 
 	fs = ip->i_fs;
 	/*
 	 * 1: preferred cylinder group
 	 */
 	result = (*allocator)(ip, cg, pref, size);
 	if (result)
 		return (result);
 	/*
 	 * 2: quadratic rehash
 	 */
 	for (i = 1; i < fs->fs_ncg; i *= 2) {
 		cg += i;
 		if (cg >= fs->fs_ncg)
 			cg -= fs->fs_ncg;
 		result = (*allocator)(ip, cg, 0, size);
 		if (result)
 			return (result);
 	}
 	/*
 	 * 3: brute force search
 	 * Note that we start at i == 2, since 0 was checked initially,
 	 * and 1 is always checked in the quadratic rehash.
 	 */
 	cg = (icg + 2) % fs->fs_ncg;
 	for (i = 2; i < fs->fs_ncg; i++) {
 		result = (*allocator)(ip, cg, 0, size);
 		if (result)
 			return (result);
 		cg++;
 		if (cg == fs->fs_ncg)
 			cg = 0;
 	}
 	return (0);
 }
 
 /*
  * Determine whether a block can be allocated.
  *
  * Check to see if a block of the appropriate size is available,
  * and if it is, allocate it.
  */
 static daddr_t
 ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
 {
 	struct cg *cgp;
 	struct buf *bp;
 	daddr_t bno, blkno;
 	int error, frags, allocsiz, i;
 	struct fs *fs = ip->i_fs;
 	const int needswap = UFS_FSNEEDSWAP(fs);
+	struct vnode vp = { ip->i_fd, ip->i_fs, NULL, 0 };
 
 	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
 		return (0);
-	error = bread(ip->i_fd, ip->i_fs, fsbtodb(fs, cgtod(fs, cg)),
-		(int)fs->fs_cgsize, &bp);
+	error = bread(&vp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
+	    NULL, &bp);
 	if (error) {
-		brelse(bp);
+		brelse(bp, 0);
 		return (0);
 	}
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic_swap(cgp, needswap) ||
 	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
-		brelse(bp);
+		brelse(bp, 0);
 		return (0);
 	}
 	if (size == fs->fs_bsize) {
 		bno = ffs_alloccgblk(ip, bp, bpref);
 		bdwrite(bp);
 		return (bno);
 	}
 	/*
 	 * check to see if any fragments are already available
 	 * allocsiz is the size which will be allocated, hacking
 	 * it down to a smaller size if necessary
 	 */
 	frags = numfrags(fs, size);
 	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
 		if (cgp->cg_frsum[allocsiz] != 0)
 			break;
 	if (allocsiz == fs->fs_frag) {
 		/*
 		 * no fragments were available, so a block will be 
 		 * allocated, and hacked up
 		 */
 		if (cgp->cg_cs.cs_nbfree == 0) {
-			brelse(bp);
+			brelse(bp, 0);
 			return (0);
 		}
 		bno = ffs_alloccgblk(ip, bp, bpref);
 		bpref = dtogd(fs, bno);
 		for (i = frags; i < fs->fs_frag; i++)
 			setbit(cg_blksfree_swap(cgp, needswap), bpref + i);
 		i = fs->fs_frag - frags;
 		ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
 		fs->fs_cstotal.cs_nffree += i;
 		fs->fs_cs(fs, cg).cs_nffree += i;
 		fs->fs_fmod = 1;
 		ufs_add32(cgp->cg_frsum[i], 1, needswap);
 		bdwrite(bp);
 		return (bno);
 	}
 	bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
 	for (i = 0; i < frags; i++)
 		clrbit(cg_blksfree_swap(cgp, needswap), bno + i);
 	ufs_add32(cgp->cg_cs.cs_nffree, -frags, needswap);
 	fs->fs_cstotal.cs_nffree -= frags;
 	fs->fs_cs(fs, cg).cs_nffree -= frags;
 	fs->fs_fmod = 1;
 	ufs_add32(cgp->cg_frsum[allocsiz], -1, needswap);
 	if (frags != allocsiz)
 		ufs_add32(cgp->cg_frsum[allocsiz - frags], 1, needswap);
 	blkno = cg * fs->fs_fpg + bno;
 	bdwrite(bp);
 	return blkno;
 }
 
 /*
  * Allocate a block in a cylinder group.
  *
  * This algorithm implements the following policy:
  *   1) allocate the requested block.
  *   2) allocate a rotationally optimal block in the same cylinder.
  *   3) allocate the next available block on the block rotor for the
  *      specified cylinder group.
  * Note that this routine only allocates fs_bsize blocks; these
  * blocks may be fragmented by the routine that allocates them.
  */
 static daddr_t
 ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref)
 {
 	struct cg *cgp;
 	daddr_t blkno;
 	int32_t bno;
 	struct fs *fs = ip->i_fs;
 	const int needswap = UFS_FSNEEDSWAP(fs);
 	u_int8_t *blksfree_swap;
 
 	cgp = (struct cg *)bp->b_data;
 	blksfree_swap = cg_blksfree_swap(cgp, needswap);
 	if (bpref == 0 || (uint32_t)dtog(fs, bpref) != ufs_rw32(cgp->cg_cgx, needswap)) {
 		bpref = ufs_rw32(cgp->cg_rotor, needswap);
 	} else {
 		bpref = blknum(fs, bpref);
 		bno = dtogd(fs, bpref);
 		/*
 		 * if the requested block is available, use it
 		 */
 		if (ffs_isblock(fs, blksfree_swap, fragstoblks(fs, bno)))
 			goto gotit;
 	}
 	/*
 	 * Take the next available one in this cylinder group.
 	 */
 	bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
 	if (bno < 0)
 		return (0);
 	cgp->cg_rotor = ufs_rw32(bno, needswap);
 gotit:
 	blkno = fragstoblks(fs, bno);
 	ffs_clrblock(fs, blksfree_swap, (long)blkno);
 	ffs_clusteracct(fs, cgp, blkno, -1);
 	ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
 	fs->fs_cstotal.cs_nbfree--;
 	fs->fs_cs(fs, ufs_rw32(cgp->cg_cgx, needswap)).cs_nbfree--;
 	fs->fs_fmod = 1;
 	blkno = ufs_rw32(cgp->cg_cgx, needswap) * fs->fs_fpg + bno;
 	return (blkno);
 }
 
 /*
  * Free a block or fragment.
  *
  * The specified block or fragment is placed back in the
  * free map. If a fragment is deallocated, a possible 
  * block reassembly is checked.
  */
 void
 ffs_blkfree(struct inode *ip, daddr_t bno, long size)
 {
 	struct cg *cgp;
 	struct buf *bp;
 	int32_t fragno, cgbno;
 	int i, error, cg, blk, frags, bbase;
 	struct fs *fs = ip->i_fs;
 	const int needswap = UFS_FSNEEDSWAP(fs);
+	struct vnode vp = { ip->i_fd, ip->i_fs, NULL, 0 };
 
 	if (size > fs->fs_bsize || fragoff(fs, size) != 0 ||
 	    fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
 		errx(1, "blkfree: bad size: bno %lld bsize %d size %ld",
 		    (long long)bno, fs->fs_bsize, size);
 	}
 	cg = dtog(fs, bno);
 	if (bno >= fs->fs_size) {
 		warnx("bad block %lld, ino %ju", (long long)bno,
 		    (uintmax_t)ip->i_number);
 		return;
 	}
-	error = bread(ip->i_fd, ip->i_fs, fsbtodb(fs, cgtod(fs, cg)),
-		(int)fs->fs_cgsize, &bp);
+	error = bread(&vp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
+	    NULL, &bp);
 	if (error) {
-		brelse(bp);
+		brelse(bp, 0);
 		return;
 	}
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic_swap(cgp, needswap)) {
-		brelse(bp);
+		brelse(bp, 0);
 		return;
 	}
 	cgbno = dtogd(fs, bno);
 	if (size == fs->fs_bsize) {
 		fragno = fragstoblks(fs, cgbno);
 		if (!ffs_isfreeblock(fs, cg_blksfree_swap(cgp, needswap), fragno)) {
 			errx(1, "blkfree: freeing free block %lld",
 			    (long long)bno);
 		}
 		ffs_setblock(fs, cg_blksfree_swap(cgp, needswap), fragno);
 		ffs_clusteracct(fs, cgp, fragno, 1);
 		ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
 		fs->fs_cstotal.cs_nbfree++;
 		fs->fs_cs(fs, cg).cs_nbfree++;
 	} else {
 		bbase = cgbno - fragnum(fs, cgbno);
 		/*
 		 * decrement the counts associated with the old frags
 		 */
 		blk = blkmap(fs, cg_blksfree_swap(cgp, needswap), bbase);
 		ffs_fragacct_swap(fs, blk, cgp->cg_frsum, -1, needswap);
 		/*
 		 * deallocate the fragment
 		 */
 		frags = numfrags(fs, size);
 		for (i = 0; i < frags; i++) {
 			if (isset(cg_blksfree_swap(cgp, needswap), cgbno + i)) {
 				errx(1, "blkfree: freeing free frag: block %lld",
 				    (long long)(cgbno + i));
 			}
 			setbit(cg_blksfree_swap(cgp, needswap), cgbno + i);
 		}
 		ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
 		fs->fs_cstotal.cs_nffree += i;
 		fs->fs_cs(fs, cg).cs_nffree += i;
 		/*
 		 * add back in counts associated with the new frags
 		 */
 		blk = blkmap(fs, cg_blksfree_swap(cgp, needswap), bbase);
 		ffs_fragacct_swap(fs, blk, cgp->cg_frsum, 1, needswap);
 		/*
 		 * if a complete block has been reassembled, account for it
 		 */
 		fragno = fragstoblks(fs, bbase);
 		if (ffs_isblock(fs, cg_blksfree_swap(cgp, needswap), fragno)) {
 			ufs_add32(cgp->cg_cs.cs_nffree, -fs->fs_frag, needswap);
 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
 			ffs_clusteracct(fs, cgp, fragno, 1);
 			ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
 			fs->fs_cstotal.cs_nbfree++;
 			fs->fs_cs(fs, cg).cs_nbfree++;
 		}
 	}
 	fs->fs_fmod = 1;
 	bdwrite(bp);
 }
 
 
 static int
 scanc(u_int size, const u_char *cp, const u_char table[], int mask)
 {
 	const u_char *end = &cp[size];
 
 	while (cp < end && (table[*cp] & mask) == 0)
 		cp++;
 	return (end - cp);
 }
 
 /*
  * Find a block of the specified size in the specified cylinder group.
  *
  * It is a panic if a request is made to find a block if none are
  * available.
  */
 static int32_t
 ffs_mapsearch(struct fs *fs, struct cg *cgp, daddr_t bpref, int allocsiz)
 {
 	int32_t bno;
 	int start, len, loc, i;
 	int blk, field, subfield, pos;
 	int ostart, olen;
 	const int needswap = UFS_FSNEEDSWAP(fs);
 
 	/*
 	 * find the fragment by searching through the free block
 	 * map for an appropriate bit pattern
 	 */
 	if (bpref)
 		start = dtogd(fs, bpref) / NBBY;
 	else
 		start = ufs_rw32(cgp->cg_frotor, needswap) / NBBY;
 	len = howmany(fs->fs_fpg, NBBY) - start;
 	ostart = start;
 	olen = len;
 	loc = scanc((u_int)len,
 		(const u_char *)&cg_blksfree_swap(cgp, needswap)[start],
 		(const u_char *)fragtbl[fs->fs_frag],
 		(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
 	if (loc == 0) {
 		len = start + 1;
 		start = 0;
 		loc = scanc((u_int)len,
 			(const u_char *)&cg_blksfree_swap(cgp, needswap)[0],
 			(const u_char *)fragtbl[fs->fs_frag],
 			(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
 		if (loc == 0) {
 			errx(1,
     "ffs_alloccg: map corrupted: start %d len %d offset %d %ld",
 				ostart, olen,
 				ufs_rw32(cgp->cg_freeoff, needswap),
 				(long)cg_blksfree_swap(cgp, needswap) - (long)cgp);
 			/* NOTREACHED */
 		}
 	}
 	bno = (start + len - loc) * NBBY;
 	cgp->cg_frotor = ufs_rw32(bno, needswap);
 	/*
 	 * found the byte in the map
 	 * sift through the bits to find the selected frag
 	 */
 	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
 		blk = blkmap(fs, cg_blksfree_swap(cgp, needswap), bno);
 		blk <<= 1;
 		field = around[allocsiz];
 		subfield = inside[allocsiz];
 		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
 			if ((blk & field) == subfield)
 				return (bno + pos);
 			field <<= 1;
 			subfield <<= 1;
 		}
 	}
 	errx(1, "ffs_alloccg: block not in map: bno %lld", (long long)bno);
 	return (-1);
 }
 
 /*
  * Update the cluster map because of an allocation or free.
  *
  * Cnt == 1 means free; cnt == -1 means allocating.
  */
 void
 ffs_clusteracct(struct fs *fs, struct cg *cgp, int32_t blkno, int cnt)
 {
 	int32_t *sump;
 	int32_t *lp;
 	u_char *freemapp, *mapp;
 	int i, start, end, forw, back, map, bit;
 	const int needswap = UFS_FSNEEDSWAP(fs);
 
 	if (fs->fs_contigsumsize <= 0)
 		return;
 	freemapp = cg_clustersfree_swap(cgp, needswap);
 	sump = cg_clustersum_swap(cgp, needswap);
 	/*
 	 * Allocate or clear the actual block.
 	 */
 	if (cnt > 0)
 		setbit(freemapp, blkno);
 	else
 		clrbit(freemapp, blkno);
 	/*
 	 * Find the size of the cluster going forward.
 	 */
 	start = blkno + 1;
 	end = start + fs->fs_contigsumsize;
 	if ((unsigned)end >= ufs_rw32(cgp->cg_nclusterblks, needswap))
 		end = ufs_rw32(cgp->cg_nclusterblks, needswap);
 	mapp = &freemapp[start / NBBY];
 	map = *mapp++;
 	bit = 1 << (start % NBBY);
 	for (i = start; i < end; i++) {
 		if ((map & bit) == 0)
 			break;
 		if ((i & (NBBY - 1)) != (NBBY - 1)) {
 			bit <<= 1;
 		} else {
 			map = *mapp++;
 			bit = 1;
 		}
 	}
 	forw = i - start;
 	/*
 	 * Find the size of the cluster going backward.
 	 */
 	start = blkno - 1;
 	end = start - fs->fs_contigsumsize;
 	if (end < 0)
 		end = -1;
 	mapp = &freemapp[start / NBBY];
 	map = *mapp--;
 	bit = 1 << (start % NBBY);
 	for (i = start; i > end; i--) {
 		if ((map & bit) == 0)
 			break;
 		if ((i & (NBBY - 1)) != 0) {
 			bit >>= 1;
 		} else {
 			map = *mapp--;
 			bit = 1 << (NBBY - 1);
 		}
 	}
 	back = start - i;
 	/*
 	 * Account for old cluster and the possibly new forward and
 	 * back clusters.
 	 */
 	i = back + forw + 1;
 	if (i > fs->fs_contigsumsize)
 		i = fs->fs_contigsumsize;
 	ufs_add32(sump[i], cnt, needswap);
 	if (back > 0)
 		ufs_add32(sump[back], -cnt, needswap);
 	if (forw > 0)
 		ufs_add32(sump[forw], -cnt, needswap);
 
 	/*
 	 * Update cluster summary information.
 	 */
 	lp = &sump[fs->fs_contigsumsize];
 	for (i = fs->fs_contigsumsize; i > 0; i--)
 		if (ufs_rw32(*lp--, needswap) > 0)
 			break;
 	fs->fs_maxcluster[ufs_rw32(cgp->cg_cgx, needswap)] = i;
 }
Index: stable/11/usr.sbin/makefs/ffs/ffs_balloc.c
===================================================================
--- stable/11/usr.sbin/makefs/ffs/ffs_balloc.c	(revision 329098)
+++ stable/11/usr.sbin/makefs/ffs/ffs_balloc.c	(revision 329099)
@@ -1,578 +1,576 @@
 /*	$NetBSD: ffs_balloc.c,v 1.13 2004/06/20 22:20:18 jmc Exp $	*/
 /* From NetBSD: ffs_balloc.c,v 1.25 2001/08/08 08:36:36 lukem Exp */
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <assert.h>
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "makefs.h"
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ffs/fs.h>
 
 #include "ffs/ufs_bswap.h"
 #include "ffs/buf.h"
 #include "ffs/ufs_inode.h"
 #include "ffs/ffs_extern.h"
 
 static int ffs_balloc_ufs1(struct inode *, off_t, int, struct buf **);
 static int ffs_balloc_ufs2(struct inode *, off_t, int, struct buf **);
 
 /*
  * Balloc defines the structure of file system storage
  * by allocating the physical blocks on a device given
  * the inode and the logical block number in a file.
  *
  * Assume: flags == B_SYNC | B_CLRBUF
  */
 
 int
 ffs_balloc(struct inode *ip, off_t offset, int bufsize, struct buf **bpp)
 {
 	if (ip->i_fs->fs_magic == FS_UFS2_MAGIC)
 		return ffs_balloc_ufs2(ip, offset, bufsize, bpp);
 	else
 		return ffs_balloc_ufs1(ip, offset, bufsize, bpp);
 }
 
 static int
 ffs_balloc_ufs1(struct inode *ip, off_t offset, int bufsize, struct buf **bpp)
 {
 	daddr_t lbn, lastlbn;
 	int size;
 	int32_t nb;
 	struct buf *bp, *nbp;
 	struct fs *fs = ip->i_fs;
 	struct indir indirs[NIADDR + 2];
 	daddr_t newb, pref;
 	int32_t *bap;
 	int osize, nsize, num, i, error;
 	int32_t *allocblk, allociblk[NIADDR + 1];
 	int32_t *allocib;
 	const int needswap = UFS_FSNEEDSWAP(fs);
+	struct vnode vp = { ip->i_fd, ip->i_fs, NULL, 0 };
 
 	lbn = lblkno(fs, offset);
 	size = blkoff(fs, offset) + bufsize;
 	if (bpp != NULL) {
 		*bpp = NULL;
 	}
 
 	assert(size <= fs->fs_bsize);
 	if (lbn < 0)
 		return (EFBIG);
 
 	/*
 	 * If the next write will extend the file into a new block,
 	 * and the file is currently composed of a fragment
 	 * this fragment has to be extended to be a full block.
 	 */
 
 	lastlbn = lblkno(fs, ip->i_ffs1_size);
 	if (lastlbn < NDADDR && lastlbn < lbn) {
 		nb = lastlbn;
 		osize = blksize(fs, ip, nb);
 		if (osize < fs->fs_bsize && osize > 0) {
 			warnx("need to ffs_realloccg; not supported!");
 			abort();
 		}
 	}
 
 	/*
 	 * The first NDADDR blocks are direct blocks
 	 */
 
 	if (lbn < NDADDR) {
 		nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
 		if (nb != 0 && ip->i_ffs1_size >= lblktosize(fs, lbn + 1)) {
 
 			/*
 			 * The block is an already-allocated direct block
 			 * and the file already extends past this block,
 			 * thus this must be a whole block.
 			 * Just read the block (if requested).
 			 */
 
 			if (bpp != NULL) {
-				error = bread(ip->i_fd, ip->i_fs, lbn,
-				    fs->fs_bsize, bpp);
+				error = bread(&vp, lbn, fs->fs_bsize, NULL,
+				    bpp);
 				if (error) {
-					brelse(*bpp);
+					brelse(*bpp, 0);
 					return (error);
 				}
 			}
 			return (0);
 		}
 		if (nb != 0) {
 
 			/*
 			 * Consider need to reallocate a fragment.
 			 */
 
 			osize = fragroundup(fs, blkoff(fs, ip->i_ffs1_size));
 			nsize = fragroundup(fs, size);
 			if (nsize <= osize) {
 
 				/*
 				 * The existing block is already
 				 * at least as big as we want.
 				 * Just read the block (if requested).
 				 */
 
 				if (bpp != NULL) {
-					error = bread(ip->i_fd, ip->i_fs, lbn,
-					    osize, bpp);
+					error = bread(&vp, lbn, osize, NULL,
+					    bpp);
 					if (error) {
-						brelse(*bpp);
+						brelse(*bpp, 0);
 						return (error);
 					}
 				}
 				return 0;
 			} else {
 				warnx("need to ffs_realloccg; not supported!");
 				abort();
 			}
 		} else {
 
 			/*
 			 * the block was not previously allocated,
 			 * allocate a new block or fragment.
 			 */
 
 			if (ip->i_ffs1_size < lblktosize(fs, lbn + 1))
 				nsize = fragroundup(fs, size);
 			else
 				nsize = fs->fs_bsize;
 			error = ffs_alloc(ip, lbn,
 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
 				&ip->i_ffs1_db[0]),
 				nsize, &newb);
 			if (error)
 				return (error);
 			if (bpp != NULL) {
-				bp = getblk(ip->i_fd, ip->i_fs, lbn, nsize);
+				bp = getblk(&vp, lbn, nsize, 0, 0, 0);
 				bp->b_blkno = fsbtodb(fs, newb);
 				clrbuf(bp);
 				*bpp = bp;
 			}
 		}
 		ip->i_ffs1_db[lbn] = ufs_rw32((int32_t)newb, needswap);
 		return (0);
 	}
 
 	/*
 	 * Determine the number of levels of indirection.
 	 */
 
 	pref = 0;
 	if ((error = ufs_getlbns(ip, lbn, indirs, &num)) != 0)
 		return (error);
 
 	if (num < 1) {
 		warnx("ffs_balloc: ufs_getlbns returned indirect block");
 		abort();
 	}
 
 	/*
 	 * Fetch the first indirect block allocating if necessary.
 	 */
 
 	--num;
 	nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
 	allocib = NULL;
 	allocblk = allociblk;
 	if (nb == 0) {
 		pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, &newb);
 		if (error)
 			return error;
 		nb = newb;
 		*allocblk++ = nb;
-		bp = getblk(ip->i_fd, ip->i_fs, indirs[1].in_lbn, fs->fs_bsize);
+		bp = getblk(&vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
 		bp->b_blkno = fsbtodb(fs, nb);
 		clrbuf(bp);
 		/*
 		 * Write synchronously so that indirect blocks
 		 * never point at garbage.
 		 */
 		if ((error = bwrite(bp)) != 0)
 			return error;
 		allocib = &ip->i_ffs1_ib[indirs[0].in_off];
 		*allocib = ufs_rw32((int32_t)nb, needswap);
 	}
 
 	/*
 	 * Fetch through the indirect blocks, allocating as necessary.
 	 */
 
 	for (i = 1;;) {
-		error = bread(ip->i_fd, ip->i_fs, indirs[i].in_lbn, 
-		    fs->fs_bsize, &bp);
+		error = bread(&vp, indirs[i].in_lbn, fs->fs_bsize, NULL, &bp);
 		if (error) {
-			brelse(bp);
+			brelse(bp, 0);
 			return error;
 		}
 		bap = (int32_t *)bp->b_data;
 		nb = ufs_rw32(bap[indirs[i].in_off], needswap);
 		if (i == num)
 			break;
 		i++;
 		if (nb != 0) {
-			brelse(bp);
+			brelse(bp, 0);
 			continue;
 		}
 		if (pref == 0)
 			pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, &newb);
 		if (error) {
-			brelse(bp);
+			brelse(bp, 0);
 			return error;
 		}
 		nb = newb;
 		*allocblk++ = nb;
-		nbp = getblk(ip->i_fd, ip->i_fs, indirs[i].in_lbn,
-		    fs->fs_bsize);
+		nbp = getblk(&vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
 		nbp->b_blkno = fsbtodb(fs, nb);
 		clrbuf(nbp);
 		/*
 		 * Write synchronously so that indirect blocks
 		 * never point at garbage.
 		 */
 
 		if ((error = bwrite(nbp)) != 0) {
-			brelse(bp);
+			brelse(bp, 0);
 			return error;
 		}
 		bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
 
 		bwrite(bp);
 	}
 
 	/*
 	 * Get the data block, allocating if necessary.
 	 */
 
 	if (nb == 0) {
 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, &bap[0]);
 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, &newb);
 		if (error) {
-			brelse(bp);
+			brelse(bp, 0);
 			return error;
 		}
 		nb = newb;
 		*allocblk++ = nb;
 		if (bpp != NULL) {
-			nbp = getblk(ip->i_fd, ip->i_fs, lbn, fs->fs_bsize);
+			nbp = getblk(&vp, lbn, fs->fs_bsize, 0, 0, 0);
 			nbp->b_blkno = fsbtodb(fs, nb);
 			clrbuf(nbp);
 			*bpp = nbp;
 		}
 		bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
 
 		/*
 		 * If required, write synchronously, otherwise use
 		 * delayed write.
 		 */
 		bwrite(bp);
 		return (0);
 	}
-	brelse(bp);
+	brelse(bp, 0);
 	if (bpp != NULL) {
-		error = bread(ip->i_fd, ip->i_fs, lbn, (int)fs->fs_bsize, &nbp);
+		error = bread(&vp, lbn, (int)fs->fs_bsize, NULL, &nbp);
 		if (error) {
-			brelse(nbp);
+			brelse(nbp, 0);
 			return error;
 		}
 		*bpp = nbp;
 	}
 	return (0);
 }
 
 static int
 ffs_balloc_ufs2(struct inode *ip, off_t offset, int bufsize, struct buf **bpp)
 {
 	daddr_t lbn, lastlbn;
 	int size;
 	struct buf *bp, *nbp;
 	struct fs *fs = ip->i_fs;
 	struct indir indirs[NIADDR + 2];
 	daddr_t newb, pref, nb;
 	int64_t *bap;
 	int osize, nsize, num, i, error;
 	int64_t *allocblk, allociblk[NIADDR + 1];
 	int64_t *allocib;
 	const int needswap = UFS_FSNEEDSWAP(fs);
+	struct vnode vp = { ip->i_fd, ip->i_fs, NULL, 0 };
 
 	lbn = lblkno(fs, offset);
 	size = blkoff(fs, offset) + bufsize;
 	if (bpp != NULL) {
 		*bpp = NULL;
 	}
 
 	assert(size <= fs->fs_bsize);
 	if (lbn < 0)
 		return (EFBIG);
 
 	/*
 	 * If the next write will extend the file into a new block,
 	 * and the file is currently composed of a fragment
 	 * this fragment has to be extended to be a full block.
 	 */
 
 	lastlbn = lblkno(fs, ip->i_ffs2_size);
 	if (lastlbn < NDADDR && lastlbn < lbn) {
 		nb = lastlbn;
 		osize = blksize(fs, ip, nb);
 		if (osize < fs->fs_bsize && osize > 0) {
 			warnx("need to ffs_realloccg; not supported!");
 			abort();
 		}
 	}
 
 	/*
 	 * The first NDADDR blocks are direct blocks
 	 */
 
 	if (lbn < NDADDR) {
 		nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
 		if (nb != 0 && ip->i_ffs2_size >= lblktosize(fs, lbn + 1)) {
 
 			/*
 			 * The block is an already-allocated direct block
 			 * and the file already extends past this block,
 			 * thus this must be a whole block.
 			 * Just read the block (if requested).
 			 */
 
 			if (bpp != NULL) {
-				error = bread(ip->i_fd, ip->i_fs, lbn,
-				    fs->fs_bsize, bpp);
+				error = bread(&vp, lbn, fs->fs_bsize, NULL,
+				    bpp);
 				if (error) {
-					brelse(*bpp);
+					brelse(*bpp, 0);
 					return (error);
 				}
 			}
 			return (0);
 		}
 		if (nb != 0) {
 
 			/*
 			 * Consider need to reallocate a fragment.
 			 */
 
 			osize = fragroundup(fs, blkoff(fs, ip->i_ffs2_size));
 			nsize = fragroundup(fs, size);
 			if (nsize <= osize) {
 
 				/*
 				 * The existing block is already
 				 * at least as big as we want.
 				 * Just read the block (if requested).
 				 */
 
 				if (bpp != NULL) {
-					error = bread(ip->i_fd, ip->i_fs, lbn,
-					    osize, bpp);
+					error = bread(&vp, lbn, osize, NULL,
+					    bpp);
 					if (error) {
-						brelse(*bpp);
+						brelse(*bpp, 0);
 						return (error);
 					}
 				}
 				return 0;
 			} else {
 				warnx("need to ffs_realloccg; not supported!");
 				abort();
 			}
 		} else {
 
 			/*
 			 * the block was not previously allocated,
 			 * allocate a new block or fragment.
 			 */
 
 			if (ip->i_ffs2_size < lblktosize(fs, lbn + 1))
 				nsize = fragroundup(fs, size);
 			else
 				nsize = fs->fs_bsize;
 			error = ffs_alloc(ip, lbn,
 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
 				&ip->i_ffs2_db[0]),
 				nsize, &newb);
 			if (error)
 				return (error);
 			if (bpp != NULL) {
-				bp = getblk(ip->i_fd, ip->i_fs, lbn, nsize);
+				bp = getblk(&vp, lbn, nsize, 0, 0, 0);
 				bp->b_blkno = fsbtodb(fs, newb);
 				clrbuf(bp);
 				*bpp = bp;
 			}
 		}
 		ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
 		return (0);
 	}
 
 	/*
 	 * Determine the number of levels of indirection.
 	 */
 
 	pref = 0;
 	if ((error = ufs_getlbns(ip, lbn, indirs, &num)) != 0)
 		return (error);
 
 	if (num < 1) {
 		warnx("ffs_balloc: ufs_getlbns returned indirect block");
 		abort();
 	}
 
 	/*
 	 * Fetch the first indirect block allocating if necessary.
 	 */
 
 	--num;
 	nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
 	allocib = NULL;
 	allocblk = allociblk;
 	if (nb == 0) {
 		pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, &newb);
 		if (error)
 			return error;
 		nb = newb;
 		*allocblk++ = nb;
-		bp = getblk(ip->i_fd, ip->i_fs, indirs[1].in_lbn, fs->fs_bsize);
+		bp = getblk(&vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
 		bp->b_blkno = fsbtodb(fs, nb);
 		clrbuf(bp);
 		/*
 		 * Write synchronously so that indirect blocks
 		 * never point at garbage.
 		 */
 		if ((error = bwrite(bp)) != 0)
 			return error;
 		allocib = &ip->i_ffs2_ib[indirs[0].in_off];
 		*allocib = ufs_rw64(nb, needswap);
 	}
 
 	/*
 	 * Fetch through the indirect blocks, allocating as necessary.
 	 */
 
 	for (i = 1;;) {
-		error = bread(ip->i_fd, ip->i_fs, indirs[i].in_lbn, 
-		    fs->fs_bsize, &bp);
+		error = bread(&vp, indirs[i].in_lbn, fs->fs_bsize, NULL, &bp);
 		if (error) {
-			brelse(bp);
+			brelse(bp, 0);
 			return error;
 		}
 		bap = (int64_t *)bp->b_data;
 		nb = ufs_rw64(bap[indirs[i].in_off], needswap);
 		if (i == num)
 			break;
 		i++;
 		if (nb != 0) {
-			brelse(bp);
+			brelse(bp, 0);
 			continue;
 		}
 		if (pref == 0)
 			pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, &newb);
 		if (error) {
-			brelse(bp);
+			brelse(bp, 0);
 			return error;
 		}
 		nb = newb;
 		*allocblk++ = nb;
-		nbp = getblk(ip->i_fd, ip->i_fs, indirs[i].in_lbn,
-		    fs->fs_bsize);
+		nbp = getblk(&vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
 		nbp->b_blkno = fsbtodb(fs, nb);
 		clrbuf(nbp);
 		/*
 		 * Write synchronously so that indirect blocks
 		 * never point at garbage.
 		 */
 
 		if ((error = bwrite(nbp)) != 0) {
-			brelse(bp);
+			brelse(bp, 0);
 			return error;
 		}
 		bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
 
 		bwrite(bp);
 	}
 
 	/*
 	 * Get the data block, allocating if necessary.
 	 */
 
 	if (nb == 0) {
 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, &bap[0]);
 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, &newb);
 		if (error) {
-			brelse(bp);
+			brelse(bp, 0);
 			return error;
 		}
 		nb = newb;
 		*allocblk++ = nb;
 		if (bpp != NULL) {
-			nbp = getblk(ip->i_fd, ip->i_fs, lbn, fs->fs_bsize);
+			nbp = getblk(&vp, lbn, fs->fs_bsize, 0, 0, 0);
 			nbp->b_blkno = fsbtodb(fs, nb);
 			clrbuf(nbp);
 			*bpp = nbp;
 		}
 		bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
 
 		/*
 		 * If required, write synchronously, otherwise use
 		 * delayed write.
 		 */
 		bwrite(bp);
 		return (0);
 	}
-	brelse(bp);
+	brelse(bp, 0);
 	if (bpp != NULL) {
-		error = bread(ip->i_fd, ip->i_fs, lbn, (int)fs->fs_bsize, &nbp);
+		error = bread(&vp, lbn, (int)fs->fs_bsize, NULL, &nbp);
 		if (error) {
-			brelse(nbp);
+			brelse(nbp, 0);
 			return error;
 		}
 		*bpp = nbp;
 	}
 	return (0);
 }
Index: stable/11/usr.sbin/makefs/ffs.c
===================================================================
--- stable/11/usr.sbin/makefs/ffs.c	(revision 329098)
+++ stable/11/usr.sbin/makefs/ffs.c	(revision 329099)
@@ -1,1185 +1,1185 @@
 /*	$NetBSD: ffs.c,v 1.44 2009/04/28 22:49:26 joerg Exp $	*/
 
 /*
  * Copyright (c) 2001 Wasabi Systems, Inc.
  * All rights reserved.
  *
  * Written by Luke Mewburn for Wasabi Systems, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed for the NetBSD Project by
  *      Wasabi Systems, Inc.
  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  *    or promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_alloc.c	8.19 (Berkeley) 7/13/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 
 #include <sys/mount.h>
 
 #include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <stdarg.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
 
 #include "makefs.h"
 #include "ffs.h"
 
 #if HAVE_STRUCT_STATVFS_F_IOSIZE && HAVE_FSTATVFS
 #include <sys/statvfs.h>
 #endif
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 
 #include "ffs/ufs_bswap.h"
 #include "ffs/ufs_inode.h"
 #include "ffs/newfs_extern.h"
 #include "ffs/ffs_extern.h"
 
 #undef DIP
 #define DIP(dp, field) \
 	((ffs_opts->version == 1) ? \
 	(dp)->ffs1_din.di_##field : (dp)->ffs2_din.di_##field)
 
 /*
  * Various file system defaults (cribbed from newfs(8)).
  */
 #define	DFL_FRAGSIZE		1024		/* fragment size */
 #define	DFL_BLKSIZE		8192		/* block size */
 #define	DFL_SECSIZE		512		/* sector size */
 #define	DFL_CYLSPERGROUP	65536		/* cylinders per group */
 #define	DFL_FRAGSPERINODE	4		/* fragments per inode */
 #define	DFL_ROTDELAY		0		/* rotational delay */
 #define	DFL_NRPOS		1		/* rotational positions */
 #define	DFL_RPM			3600		/* rpm of disk */
 #define	DFL_NSECTORS		64		/* # of sectors */
 #define	DFL_NTRACKS		16		/* # of tracks */
 
 
 typedef struct {
 	u_char		*buf;		/* buf for directory */
 	doff_t		size;		/* full size of buf */
 	doff_t		cur;		/* offset of current entry */
 } dirbuf_t;
 
 
 static	int	ffs_create_image(const char *, fsinfo_t *);
 static	void	ffs_dump_fsinfo(fsinfo_t *);
 static	void	ffs_dump_dirbuf(dirbuf_t *, const char *, int);
 static	void	ffs_make_dirbuf(dirbuf_t *, const char *, fsnode *, int);
 static	int	ffs_populate_dir(const char *, fsnode *, fsinfo_t *);
 static	void	ffs_size_dir(fsnode *, fsinfo_t *);
 static	void	ffs_validate(const char *, fsnode *, fsinfo_t *);
 static	void	ffs_write_file(union dinode *, uint32_t, void *, fsinfo_t *);
 static	void	ffs_write_inode(union dinode *, uint32_t, const fsinfo_t *);
 static  void	*ffs_build_dinode1(struct ufs1_dinode *, dirbuf_t *, fsnode *,
 				 fsnode *, fsinfo_t *);
 static  void	*ffs_build_dinode2(struct ufs2_dinode *, dirbuf_t *, fsnode *,
 				 fsnode *, fsinfo_t *);
 
 
 
 int	sectorsize;		/* XXX: for buf.c::getblk() */
 
 	/* publicly visible functions */
 
 void
 ffs_prep_opts(fsinfo_t *fsopts)
 {
 	ffs_opt_t *ffs_opts;
 
 	if ((ffs_opts = calloc(1, sizeof(ffs_opt_t))) == NULL)
 		err(1, "Allocating memory for ffs_options");
 
 	fsopts->fs_specific = ffs_opts;
 
 	ffs_opts->bsize= -1;
 	ffs_opts->fsize= -1;
 	ffs_opts->cpg= -1;
 	ffs_opts->density= -1;
 	ffs_opts->minfree= -1;
 	ffs_opts->optimization= -1;
 	ffs_opts->maxcontig= -1;
 	ffs_opts->maxbpg= -1;
 	ffs_opts->avgfilesize= -1;
 	ffs_opts->avgfpdir= -1;
 	ffs_opts->version = 1;
 }
 
 void
 ffs_cleanup_opts(fsinfo_t *fsopts)
 {
 	if (fsopts->fs_specific)
 		free(fsopts->fs_specific);
 }
 
 int
 ffs_parse_opts(const char *option, fsinfo_t *fsopts)
 {
 	ffs_opt_t	*ffs_opts = fsopts->fs_specific;
 
 	option_t ffs_options[] = {
 		{ "bsize",	&ffs_opts->bsize,	1,	INT_MAX,
 					"block size" },
 		{ "fsize",	&ffs_opts->fsize,	1,	INT_MAX,
 					"fragment size" },
 		{ "density",	&ffs_opts->density,	1,	INT_MAX,
 					"bytes per inode" },
 		{ "minfree",	&ffs_opts->minfree,	0,	99,
 					"minfree" },
 		{ "maxbpg",	&ffs_opts->maxbpg,	1,	INT_MAX,
 					"max blocks per file in a cg" },
 		{ "avgfilesize", &ffs_opts->avgfilesize,1,	INT_MAX,
 					"expected average file size" },
 		{ "avgfpdir",	&ffs_opts->avgfpdir,	1,	INT_MAX,
 					"expected # of files per directory" },
 		{ "extent",	&ffs_opts->maxbsize,	1,	INT_MAX,
 					"maximum # extent size" },
 		{ "maxbpcg",	&ffs_opts->maxblkspercg,1,	INT_MAX,
 					"max # of blocks per group" },
 		{ "version",	&ffs_opts->version,	1,	2,
 					"UFS version" },
 		{ .name = NULL }
 	};
 
 	char	*var, *val;
 	int	rv;
 
 	assert(option != NULL);
 	assert(fsopts != NULL);
 	assert(ffs_opts != NULL);
 
 	if (debug & DEBUG_FS_PARSE_OPTS)
 		printf("ffs_parse_opts: got `%s'\n", option);
 
 	if ((var = strdup(option)) == NULL)
 		err(1, "Allocating memory for copy of option string");
 	rv = 0;
 
 	if ((val = strchr(var, '=')) == NULL) {
 		warnx("Option `%s' doesn't contain a value", var);
 		goto leave_ffs_parse_opts;
 	}
 	*val++ = '\0';
 
 	if (strcmp(var, "optimization") == 0) {
 		if (strcmp(val, "time") == 0) {
 			ffs_opts->optimization = FS_OPTTIME;
 		} else if (strcmp(val, "space") == 0) {
 			ffs_opts->optimization = FS_OPTSPACE;
 		} else {
 			warnx("Invalid optimization `%s'", val);
 			goto leave_ffs_parse_opts;
 		}
 		rv = 1;
 	} else if (strcmp(var, "label") == 0) {
 		strlcpy(ffs_opts->label, val, sizeof(ffs_opts->label));
 		rv = 1;
 	} else
 		rv = set_option(ffs_options, var, val);
 
  leave_ffs_parse_opts:
 	if (var)
 		free(var);
 	return (rv);
 }
 
 
 void
 ffs_makefs(const char *image, const char *dir, fsnode *root, fsinfo_t *fsopts)
 {
 	struct fs	*superblock;
 	struct timeval	start;
 
 	assert(image != NULL);
 	assert(dir != NULL);
 	assert(root != NULL);
 	assert(fsopts != NULL);
 
 	if (debug & DEBUG_FS_MAKEFS)
 		printf("ffs_makefs: image %s directory %s root %p\n",
 		    image, dir, root);
 
 		/* validate tree and options */
 	TIMER_START(start);
 	ffs_validate(dir, root, fsopts);
 	TIMER_RESULTS(start, "ffs_validate");
 
 	printf("Calculated size of `%s': %lld bytes, %lld inodes\n",
 	    image, (long long)fsopts->size, (long long)fsopts->inodes);
 
 		/* create image */
 	TIMER_START(start);
 	if (ffs_create_image(image, fsopts) == -1)
 		errx(1, "Image file `%s' not created.", image);
 	TIMER_RESULTS(start, "ffs_create_image");
 
 	fsopts->curinode = ROOTINO;
 
 	if (debug & DEBUG_FS_MAKEFS)
 		putchar('\n');
 
 		/* populate image */
 	printf("Populating `%s'\n", image);
 	TIMER_START(start);
 	if (! ffs_populate_dir(dir, root, fsopts))
 		errx(1, "Image file `%s' not populated.", image);
 	TIMER_RESULTS(start, "ffs_populate_dir");
 
 		/* ensure no outstanding buffers remain */
 	if (debug & DEBUG_FS_MAKEFS)
 		bcleanup();
 
 		/* update various superblock parameters */
 	superblock = fsopts->superblock;
 	superblock->fs_fmod = 0;
 	superblock->fs_old_cstotal.cs_ndir   = superblock->fs_cstotal.cs_ndir;
 	superblock->fs_old_cstotal.cs_nbfree = superblock->fs_cstotal.cs_nbfree;
 	superblock->fs_old_cstotal.cs_nifree = superblock->fs_cstotal.cs_nifree;
 	superblock->fs_old_cstotal.cs_nffree = superblock->fs_cstotal.cs_nffree;
 
 		/* write out superblock; image is now complete */
 	ffs_write_superblock(fsopts->superblock, fsopts);
 	if (close(fsopts->fd) == -1)
 		err(1, "Closing `%s'", image);
 	fsopts->fd = -1;
 	printf("Image `%s' complete\n", image);
 }
 
 	/* end of public functions */
 
 
 static void
 ffs_validate(const char *dir, fsnode *root, fsinfo_t *fsopts)
 {
 	int32_t	ncg = 1;
 #if notyet
 	int32_t	spc, nspf, ncyl, fssize;
 #endif
 	ffs_opt_t	*ffs_opts = fsopts->fs_specific;
 
 	assert(dir != NULL);
 	assert(root != NULL);
 	assert(fsopts != NULL);
 	assert(ffs_opts != NULL);
 
 	if (debug & DEBUG_FS_VALIDATE) {
 		printf("ffs_validate: before defaults set:\n");
 		ffs_dump_fsinfo(fsopts);
 	}
 
 		/* set FFS defaults */
 	if (fsopts->sectorsize == -1)
 		fsopts->sectorsize = DFL_SECSIZE;
 	if (ffs_opts->fsize == -1)
 		ffs_opts->fsize = MAX(DFL_FRAGSIZE, fsopts->sectorsize);
 	if (ffs_opts->bsize == -1)
 		ffs_opts->bsize = MIN(DFL_BLKSIZE, 8 * ffs_opts->fsize);
 	if (ffs_opts->cpg == -1)
 		ffs_opts->cpg = DFL_CYLSPERGROUP;
 	else
 		ffs_opts->cpgflg = 1;
 				/* fsopts->density is set below */
 	if (ffs_opts->nsectors == -1)
 		ffs_opts->nsectors = DFL_NSECTORS;
 	if (ffs_opts->minfree == -1)
 		ffs_opts->minfree = MINFREE;
 	if (ffs_opts->optimization == -1)
 		ffs_opts->optimization = DEFAULTOPT;
 	if (ffs_opts->maxcontig == -1)
 		ffs_opts->maxcontig =
 		    MAX(1, MIN(MAXPHYS, FFS_MAXBSIZE) / ffs_opts->bsize);
 	/* XXX ondisk32 */
 	if (ffs_opts->maxbpg == -1)
 		ffs_opts->maxbpg = ffs_opts->bsize / sizeof(int32_t);
 	if (ffs_opts->avgfilesize == -1)
 		ffs_opts->avgfilesize = AVFILESIZ;
 	if (ffs_opts->avgfpdir == -1)
 		ffs_opts->avgfpdir = AFPDIR;
 
 	if (fsopts->maxsize > 0 &&
 	    roundup(fsopts->minsize, ffs_opts->bsize) > fsopts->maxsize)
 		errx(1, "`%s' minsize of %lld rounded up to ffs bsize of %d "
 		    "exceeds maxsize %lld.  Lower bsize, or round the minimum "
 		    "and maximum sizes to bsize.", dir,
 		    (long long)fsopts->minsize, ffs_opts->bsize,
 		    (long long)fsopts->maxsize);
 
 		/* calculate size of tree */
 	ffs_size_dir(root, fsopts);
 	fsopts->inodes += ROOTINO;		/* include first two inodes */
 
 	if (debug & DEBUG_FS_VALIDATE)
 		printf("ffs_validate: size of tree: %lld bytes, %lld inodes\n",
 		    (long long)fsopts->size, (long long)fsopts->inodes);
 
 		/* add requested slop */
 	fsopts->size += fsopts->freeblocks;
 	fsopts->inodes += fsopts->freefiles;
 	if (fsopts->freefilepc > 0)
 		fsopts->inodes =
 		    fsopts->inodes * (100 + fsopts->freefilepc) / 100;
 	if (fsopts->freeblockpc > 0)
 		fsopts->size =
 		    fsopts->size * (100 + fsopts->freeblockpc) / 100;
 
 		/* add space needed for superblocks */
 	/*
 	 * The old SBOFF (SBLOCK_UFS1) is used here because makefs is
 	 * typically used for small filesystems where space matters.
 	 * XXX make this an option.
 	 */
 	fsopts->size += (SBLOCK_UFS1 + SBLOCKSIZE) * ncg;
 		/* add space needed to store inodes, x3 for blockmaps, etc */
 	if (ffs_opts->version == 1)
 		fsopts->size += ncg * DINODE1_SIZE *
 		    roundup(fsopts->inodes / ncg, 
 			ffs_opts->bsize / DINODE1_SIZE);
 	else
 		fsopts->size += ncg * DINODE2_SIZE *
 		    roundup(fsopts->inodes / ncg, 
 			ffs_opts->bsize / DINODE2_SIZE);
 
 		/* add minfree */
 	if (ffs_opts->minfree > 0)
 		fsopts->size =
 		    fsopts->size * (100 + ffs_opts->minfree) / 100;
 	/*
 	 * XXX	any other fs slop to add, such as csum's, bitmaps, etc ??
 	 */
 
 	if (fsopts->size < fsopts->minsize)	/* ensure meets minimum size */
 		fsopts->size = fsopts->minsize;
 
 		/* round up to the next block */
 	fsopts->size = roundup(fsopts->size, ffs_opts->bsize);
 
 		/* round up to requested block size, if any */
 	if (fsopts->roundup > 0)
 		fsopts->size = roundup(fsopts->size, fsopts->roundup);
 
 		/* calculate density if necessary */
 	if (ffs_opts->density == -1)
 		ffs_opts->density = fsopts->size / fsopts->inodes + 1;
 
 	if (debug & DEBUG_FS_VALIDATE) {
 		printf("ffs_validate: after defaults set:\n");
 		ffs_dump_fsinfo(fsopts);
 		printf("ffs_validate: dir %s; %lld bytes, %lld inodes\n",
 		    dir, (long long)fsopts->size, (long long)fsopts->inodes);
 	}
 	sectorsize = fsopts->sectorsize;	/* XXX - see earlier */
 
 		/* now check calculated sizes vs requested sizes */
 	if (fsopts->maxsize > 0 && fsopts->size > fsopts->maxsize) {
 		errx(1, "`%s' size of %lld is larger than the maxsize of %lld.",
 		    dir, (long long)fsopts->size, (long long)fsopts->maxsize);
 	}
 }
 
 
 static void
 ffs_dump_fsinfo(fsinfo_t *f)
 {
 
 	ffs_opt_t	*fs = f->fs_specific;
 
 	printf("fsopts at %p\n", f);
 
 	printf("\tsize %lld, inodes %lld, curinode %u\n",
 	    (long long)f->size, (long long)f->inodes, f->curinode);
 
 	printf("\tminsize %lld, maxsize %lld\n",
 	    (long long)f->minsize, (long long)f->maxsize);
 	printf("\tfree files %lld, freefile %% %d\n",
 	    (long long)f->freefiles, f->freefilepc);
 	printf("\tfree blocks %lld, freeblock %% %d\n",
 	    (long long)f->freeblocks, f->freeblockpc);
 	printf("\tneedswap %d, sectorsize %d\n", f->needswap, f->sectorsize);
 
 	printf("\tbsize %d, fsize %d, cpg %d, density %d\n",
 	    fs->bsize, fs->fsize, fs->cpg, fs->density);
 	printf("\tnsectors %d, rpm %d, minfree %d\n",
 	    fs->nsectors, fs->rpm, fs->minfree);
 	printf("\tmaxcontig %d, maxbpg %d\n",
 	    fs->maxcontig, fs->maxbpg);
 	printf("\toptimization %s\n",
 	    fs->optimization == FS_OPTSPACE ? "space" : "time");
 }
 
 
 static int
 ffs_create_image(const char *image, fsinfo_t *fsopts)
 {
 #if HAVE_STRUCT_STATVFS_F_IOSIZE && HAVE_FSTATVFS
 	struct statvfs	sfs;
 #endif
 	struct fs	*fs;
 	char	*buf;
 	int	i, bufsize;
 	off_t	bufrem;
 	time_t	tstamp;
 
 	assert (image != NULL);
 	assert (fsopts != NULL);
 
 		/* create image */
 	if ((fsopts->fd = open(image, O_RDWR | O_CREAT | O_TRUNC, 0666))
 	    == -1) {
 		warn("Can't open `%s' for writing", image);
 		return (-1);
 	}
 
 		/* zero image */
 #if HAVE_STRUCT_STATVFS_F_IOSIZE && HAVE_FSTATVFS
 	if (fstatvfs(fsopts->fd, &sfs) == -1) {
 #endif
 		bufsize = 8192;
 #if HAVE_STRUCT_STATVFS_F_IOSIZE && HAVE_FSTATVFS
 		warn("can't fstatvfs `%s', using default %d byte chunk",
 		    image, bufsize);
 	} else
 		bufsize = sfs.f_iosize;
 #endif
 	bufrem = fsopts->size;
 	if (fsopts->sparse) {
 		if (ftruncate(fsopts->fd, bufrem) == -1) {
 			warn("sparse option disabled.\n");
 			fsopts->sparse = 0;
 		}
 	}
 	if (fsopts->sparse) {
 		/* File truncated at bufrem. Remaining is 0 */
 		bufrem = 0;
 		buf = NULL;
 	} else {
 		if (debug & DEBUG_FS_CREATE_IMAGE)
 			printf("zero-ing image `%s', %lld sectors, "
 			    "using %d byte chunks\n", image, (long long)bufrem,
 			    bufsize);
 		if ((buf = calloc(1, bufsize)) == NULL) {
 			warn("Can't create buffer for sector");
 			return (-1);
 		}
 	}
 	while (bufrem > 0) {
 		i = write(fsopts->fd, buf, MIN(bufsize, bufrem));
 		if (i == -1) {
 			warn("zeroing image, %lld bytes to go",
 			    (long long)bufrem);
 			free(buf);
 			return (-1);
 		}
 		bufrem -= i;
 	}
 	if (buf)
 		free(buf);
 
 		/* make the file system */
 	if (debug & DEBUG_FS_CREATE_IMAGE)
 		printf("calling mkfs(\"%s\", ...)\n", image);
 
 	if (stampst.st_ino != 0)
 		tstamp = stampst.st_ctime;
 	else
 		tstamp = start_time.tv_sec;
 
 	srandom(tstamp);
 
 	fs = ffs_mkfs(image, fsopts, tstamp);
 	fsopts->superblock = (void *)fs;
 	if (debug & DEBUG_FS_CREATE_IMAGE) {
 		time_t t;
 
 		t = (time_t)((struct fs *)fsopts->superblock)->fs_time;
 		printf("mkfs returned %p; fs_time %s",
 		    fsopts->superblock, ctime(&t));
 		printf("fs totals: nbfree %lld, nffree %lld, nifree %lld, ndir %lld\n",
 		    (long long)fs->fs_cstotal.cs_nbfree,
 		    (long long)fs->fs_cstotal.cs_nffree,
 		    (long long)fs->fs_cstotal.cs_nifree,
 		    (long long)fs->fs_cstotal.cs_ndir);
 	}
 
 	if (fs->fs_cstotal.cs_nifree + ROOTINO < fsopts->inodes) {
 		warnx(
 		"Image file `%s' has %lld free inodes; %lld are required.",
 		    image,
 		    (long long)(fs->fs_cstotal.cs_nifree + ROOTINO),
 		    (long long)fsopts->inodes);
 		return (-1);
 	}
 	return (fsopts->fd);
 }
 
 
 static void
 ffs_size_dir(fsnode *root, fsinfo_t *fsopts)
 {
 	struct direct	tmpdir;
 	fsnode *	node;
 	int		curdirsize, this;
 	ffs_opt_t	*ffs_opts = fsopts->fs_specific;
 
 	/* node may be NULL (empty directory) */
 	assert(fsopts != NULL);
 	assert(ffs_opts != NULL);
 
 	if (debug & DEBUG_FS_SIZE_DIR)
 		printf("ffs_size_dir: entry: bytes %lld inodes %lld\n",
 		    (long long)fsopts->size, (long long)fsopts->inodes);
 
 #define	ADDDIRENT(e) do {						\
 	tmpdir.d_namlen = strlen((e));					\
 	this = DIRSIZ_SWAP(0, &tmpdir, 0);					\
 	if (debug & DEBUG_FS_SIZE_DIR_ADD_DIRENT)			\
 		printf("ADDDIRENT: was: %s (%d) this %d cur %d\n",	\
 		    e, tmpdir.d_namlen, this, curdirsize);		\
 	if (this + curdirsize > roundup(curdirsize, DIRBLKSIZ))		\
 		curdirsize = roundup(curdirsize, DIRBLKSIZ);		\
 	curdirsize += this;						\
 	if (debug & DEBUG_FS_SIZE_DIR_ADD_DIRENT)			\
 		printf("ADDDIRENT: now: %s (%d) this %d cur %d\n",	\
 		    e, tmpdir.d_namlen, this, curdirsize);		\
 } while (0);
 
 	/*
 	 * XXX	this needs to take into account extra space consumed
 	 *	by indirect blocks, etc.
 	 */
 #define	ADDSIZE(x) do {							\
 	fsopts->size += roundup((x), ffs_opts->fsize);			\
 } while (0);
 
 	curdirsize = 0;
 	for (node = root; node != NULL; node = node->next) {
 		ADDDIRENT(node->name);
 		if (node == root) {			/* we're at "." */
 			assert(strcmp(node->name, ".") == 0);
 			ADDDIRENT("..");
 		} else if ((node->inode->flags & FI_SIZED) == 0) {
 				/* don't count duplicate names */
 			node->inode->flags |= FI_SIZED;
 			if (debug & DEBUG_FS_SIZE_DIR_NODE)
 				printf("ffs_size_dir: `%s' size %lld\n",
 				    node->name,
 				    (long long)node->inode->st.st_size);
 			fsopts->inodes++;
 			if (node->type == S_IFREG)
 				ADDSIZE(node->inode->st.st_size);
 			if (node->type == S_IFLNK) {
 				int	slen;
 
 				slen = strlen(node->symlink) + 1;
 				if (slen >= (ffs_opts->version == 1 ?
 						MAXSYMLINKLEN_UFS1 :
 						MAXSYMLINKLEN_UFS2))
 					ADDSIZE(slen);
 			}
 		}
 		if (node->type == S_IFDIR)
 			ffs_size_dir(node->child, fsopts);
 	}
 	ADDSIZE(curdirsize);
 
 	if (debug & DEBUG_FS_SIZE_DIR)
 		printf("ffs_size_dir: exit: size %lld inodes %lld\n",
 		    (long long)fsopts->size, (long long)fsopts->inodes);
 }
 
 static void *
 ffs_build_dinode1(struct ufs1_dinode *dinp, dirbuf_t *dbufp, fsnode *cur,
 		 fsnode *root, fsinfo_t *fsopts)
 {
 	int slen;
 	void *membuf;
 	struct stat *st = stampst.st_ino != 0 ? &stampst : &cur->inode->st;
 
 	memset(dinp, 0, sizeof(*dinp));
 	dinp->di_mode = cur->inode->st.st_mode;
 	dinp->di_nlink = cur->inode->nlink;
 	dinp->di_size = cur->inode->st.st_size;
 #if HAVE_STRUCT_STAT_ST_FLAGS
 	dinp->di_flags = cur->inode->st.st_flags;
 #endif
 #if HAVE_STRUCT_STAT_ST_GEN
 	dinp->di_gen = cur->inode->st.st_gen;
 #endif
 	dinp->di_uid = cur->inode->st.st_uid;
 	dinp->di_gid = cur->inode->st.st_gid;
 
 	dinp->di_atime = st->st_atime;
 	dinp->di_mtime = st->st_mtime;
 	dinp->di_ctime = st->st_ctime;
 #if HAVE_STRUCT_STAT_ST_MTIMENSEC
 	dinp->di_atimensec = st->st_atimensec;
 	dinp->di_mtimensec = st->st_mtimensec;
 	dinp->di_ctimensec = st->st_ctimensec;
 #endif
 		/* not set: di_db, di_ib, di_blocks, di_spare */
 
 	membuf = NULL;
 	if (cur == root) {			/* "."; write dirbuf */
 		membuf = dbufp->buf;
 		dinp->di_size = dbufp->size;
 	} else if (S_ISBLK(cur->type) || S_ISCHR(cur->type)) {
 		dinp->di_size = 0;	/* a device */
 		dinp->di_rdev =
 		    ufs_rw32(cur->inode->st.st_rdev, fsopts->needswap);
 	} else if (S_ISLNK(cur->type)) {	/* symlink */
 		slen = strlen(cur->symlink);
 		if (slen < MAXSYMLINKLEN_UFS1) {	/* short link */
 			memcpy(dinp->di_db, cur->symlink, slen);
 		} else
 			membuf = cur->symlink;
 		dinp->di_size = slen;
 	}
 	return membuf;
 }
 
 static void *
 ffs_build_dinode2(struct ufs2_dinode *dinp, dirbuf_t *dbufp, fsnode *cur,
 		 fsnode *root, fsinfo_t *fsopts)
 {
 	int slen;
 	void *membuf;
 	struct stat *st = stampst.st_ino != 0 ? &stampst : &cur->inode->st;
 
 	memset(dinp, 0, sizeof(*dinp));
 	dinp->di_mode = cur->inode->st.st_mode;
 	dinp->di_nlink = cur->inode->nlink;
 	dinp->di_size = cur->inode->st.st_size;
 #if HAVE_STRUCT_STAT_ST_FLAGS
 	dinp->di_flags = cur->inode->st.st_flags;
 #endif
 #if HAVE_STRUCT_STAT_ST_GEN
 	dinp->di_gen = cur->inode->st.st_gen;
 #endif
 	dinp->di_uid = cur->inode->st.st_uid;
 	dinp->di_gid = cur->inode->st.st_gid;
 
 	dinp->di_atime = st->st_atime;
 	dinp->di_mtime = st->st_mtime;
 	dinp->di_ctime = st->st_ctime;
 #if HAVE_STRUCT_STAT_ST_MTIMENSEC
 	dinp->di_atimensec = st->st_atimensec;
 	dinp->di_mtimensec = st->st_mtimensec;
 	dinp->di_ctimensec = st->st_ctimensec;
 #endif
 #if HAVE_STRUCT_STAT_BIRTHTIME
 	dinp->di_birthtime = st->st_birthtime;
 	dinp->di_birthnsec = st->st_birthtimensec;
 #endif
 		/* not set: di_db, di_ib, di_blocks, di_spare */
 
 	membuf = NULL;
 	if (cur == root) {			/* "."; write dirbuf */
 		membuf = dbufp->buf;
 		dinp->di_size = dbufp->size;
 	} else if (S_ISBLK(cur->type) || S_ISCHR(cur->type)) {
 		dinp->di_size = 0;	/* a device */
 		dinp->di_rdev =
 		    ufs_rw64(cur->inode->st.st_rdev, fsopts->needswap);
 	} else if (S_ISLNK(cur->type)) {	/* symlink */
 		slen = strlen(cur->symlink);
 		if (slen < MAXSYMLINKLEN_UFS2) {	/* short link */
 			memcpy(dinp->di_db, cur->symlink, slen);
 		} else
 			membuf = cur->symlink;
 		dinp->di_size = slen;
 	}
 	return membuf;
 }
 
 static int
 ffs_populate_dir(const char *dir, fsnode *root, fsinfo_t *fsopts)
 {
 	fsnode		*cur;
 	dirbuf_t	dirbuf;
 	union dinode	din;
 	void		*membuf;
 	char		path[MAXPATHLEN + 1];
 	ffs_opt_t	*ffs_opts = fsopts->fs_specific;
 
 	assert(dir != NULL);
 	assert(root != NULL);
 	assert(fsopts != NULL);
 	assert(ffs_opts != NULL);
 
 	(void)memset(&dirbuf, 0, sizeof(dirbuf));
 
 	if (debug & DEBUG_FS_POPULATE)
 		printf("ffs_populate_dir: PASS 1  dir %s node %p\n", dir, root);
 
 		/*
 		 * pass 1: allocate inode numbers, build directory `file'
 		 */
 	for (cur = root; cur != NULL; cur = cur->next) {
 		if ((cur->inode->flags & FI_ALLOCATED) == 0) {
 			cur->inode->flags |= FI_ALLOCATED;
 			if (cur == root && cur->parent != NULL)
 				cur->inode->ino = cur->parent->inode->ino;
 			else {
 				cur->inode->ino = fsopts->curinode;
 				fsopts->curinode++;
 			}
 		}
 		ffs_make_dirbuf(&dirbuf, cur->name, cur, fsopts->needswap);
 		if (cur == root) {		/* we're at "."; add ".." */
 			ffs_make_dirbuf(&dirbuf, "..",
 			    cur->parent == NULL ? cur : cur->parent->first,
 			    fsopts->needswap);
 			root->inode->nlink++;	/* count my parent's link */
 		} else if (cur->child != NULL)
 			root->inode->nlink++;	/* count my child's link */
 
 		/*
 		 * XXX	possibly write file and long symlinks here,
 		 *	ensuring that blocks get written before inodes?
 		 *	otoh, this isn't a real filesystem, so who
 		 *	cares about ordering? :-)
 		 */
 	}
 	if (debug & DEBUG_FS_POPULATE_DIRBUF)
 		ffs_dump_dirbuf(&dirbuf, dir, fsopts->needswap);
 
 		/*
 		 * pass 2: write out dirbuf, then non-directories at this level
 		 */
 	if (debug & DEBUG_FS_POPULATE)
 		printf("ffs_populate_dir: PASS 2  dir %s\n", dir);
 	for (cur = root; cur != NULL; cur = cur->next) {
 		if (cur->inode->flags & FI_WRITTEN)
 			continue;		/* skip hard-linked entries */
 		cur->inode->flags |= FI_WRITTEN;
 
 		if (cur->contents == NULL) {
 			if (snprintf(path, sizeof(path), "%s/%s/%s", cur->root,
 			    cur->path, cur->name) >= (int)sizeof(path))
 				errx(1, "Pathname too long.");
 		}
 
 		if (cur->child != NULL)
 			continue;		/* child creates own inode */
 
 				/* build on-disk inode */
 		if (ffs_opts->version == 1)
 			membuf = ffs_build_dinode1(&din.ffs1_din, &dirbuf, cur,
 			    root, fsopts);
 		else
 			membuf = ffs_build_dinode2(&din.ffs2_din, &dirbuf, cur,
 			    root, fsopts);
 
 		if (debug & DEBUG_FS_POPULATE_NODE) {
 			printf("ffs_populate_dir: writing ino %d, %s",
 			    cur->inode->ino, inode_type(cur->type));
 			if (cur->inode->nlink > 1)
 				printf(", nlink %d", cur->inode->nlink);
 			putchar('\n');
 		}
 
 		if (membuf != NULL) {
 			ffs_write_file(&din, cur->inode->ino, membuf, fsopts);
 		} else if (S_ISREG(cur->type)) {
 			ffs_write_file(&din, cur->inode->ino,
 			    (cur->contents) ?  cur->contents : path, fsopts);
 		} else {
 			assert (! S_ISDIR(cur->type));
 			ffs_write_inode(&din, cur->inode->ino, fsopts);
 		}
 	}
 
 		/*
 		 * pass 3: write out sub-directories
 		 */
 	if (debug & DEBUG_FS_POPULATE)
 		printf("ffs_populate_dir: PASS 3  dir %s\n", dir);
 	for (cur = root; cur != NULL; cur = cur->next) {
 		if (cur->child == NULL)
 			continue;
 		if (snprintf(path, sizeof(path), "%s/%s", dir, cur->name)
 		    >= sizeof(path))
 			errx(1, "Pathname too long.");
 		if (! ffs_populate_dir(path, cur->child, fsopts))
 			return (0);
 	}
 
 	if (debug & DEBUG_FS_POPULATE)
 		printf("ffs_populate_dir: DONE dir %s\n", dir);
 
 		/* cleanup */
 	if (dirbuf.buf != NULL)
 		free(dirbuf.buf);
 	return (1);
 }
 
 
 static void
 ffs_write_file(union dinode *din, uint32_t ino, void *buf, fsinfo_t *fsopts)
 {
 	int 	isfile, ffd;
 	char	*fbuf, *p;
 	off_t	bufleft, chunk, offset;
 	ssize_t nread;
 	struct inode	in;
 	struct buf *	bp;
 	ffs_opt_t	*ffs_opts = fsopts->fs_specific;
 
 	assert (din != NULL);
 	assert (buf != NULL);
 	assert (fsopts != NULL);
 	assert (ffs_opts != NULL);
 
 	isfile = S_ISREG(DIP(din, mode));
 	fbuf = NULL;
 	ffd = -1;
 	p = NULL;
 
 	in.i_fs = (struct fs *)fsopts->superblock;
 
 	if (debug & DEBUG_FS_WRITE_FILE) {
 		printf(
 		    "ffs_write_file: ino %u, din %p, isfile %d, %s, size %lld",
 		    ino, din, isfile, inode_type(DIP(din, mode) & S_IFMT),
 		    (long long)DIP(din, size));
 		if (isfile)
 			printf(", file '%s'\n", (char *)buf);
 		else
 			printf(", buffer %p\n", buf);
 	}
 
 	in.i_number = ino;
 	in.i_size = DIP(din, size);
 	if (ffs_opts->version == 1)
 		memcpy(&in.i_din.ffs1_din, &din->ffs1_din,
 		    sizeof(in.i_din.ffs1_din));
 	else
 		memcpy(&in.i_din.ffs2_din, &din->ffs2_din,
 		    sizeof(in.i_din.ffs2_din));
 	in.i_fd = fsopts->fd;
 
 	if (DIP(din, size) == 0)
 		goto write_inode_and_leave;		/* mmm, cheating */
 
 	if (isfile) {
 		if ((fbuf = malloc(ffs_opts->bsize)) == NULL)
 			err(1, "Allocating memory for write buffer");
 		if ((ffd = open((char *)buf, O_RDONLY, 0444)) == -1) {
 			warn("Can't open `%s' for reading", (char *)buf);
 			goto leave_ffs_write_file;
 		}
 	} else {
 		p = buf;
 	}
 
 	chunk = 0;
 	for (bufleft = DIP(din, size); bufleft > 0; bufleft -= chunk) {
 		chunk = MIN(bufleft, ffs_opts->bsize);
 		if (!isfile)
 			;
 		else if ((nread = read(ffd, fbuf, chunk)) == -1)
 			err(EXIT_FAILURE, "Reading `%s', %lld bytes to go",
 			    (char *)buf, (long long)bufleft);
 		else if (nread != chunk)
 			errx(EXIT_FAILURE, "Reading `%s', %lld bytes to go, "
 			    "read %zd bytes, expected %ju bytes, does "
 			    "metalog size= attribute mismatch source size?",
 			    (char *)buf, (long long)bufleft, nread,
 			    (uintmax_t)chunk);
 		else
 			p = fbuf;
 		offset = DIP(din, size) - bufleft;
 		if (debug & DEBUG_FS_WRITE_FILE_BLOCK)
 			printf(
 		"ffs_write_file: write %p offset %lld size %lld left %lld\n",
 			    p, (long long)offset,
 			    (long long)chunk, (long long)bufleft);
 	/*
 	 * XXX	if holey support is desired, do the check here
 	 *
 	 * XXX	might need to write out last bit in fragroundup
 	 *	sized chunk. however, ffs_balloc() handles this for us
 	 */
 		errno = ffs_balloc(&in, offset, chunk, &bp);
  bad_ffs_write_file:
 		if (errno != 0)
 			err(1,
 			    "Writing inode %d (%s), bytes %lld + %lld",
 			    ino,
 			    isfile ? (char *)buf :
 			      inode_type(DIP(din, mode) & S_IFMT),
 			    (long long)offset, (long long)chunk);
 		memcpy(bp->b_data, p, chunk);
 		errno = bwrite(bp);
 		if (errno != 0)
 			goto bad_ffs_write_file;
-		brelse(bp);
+		brelse(bp, 0);
 		if (!isfile)
 			p += chunk;
 	}
   
  write_inode_and_leave:
 	ffs_write_inode(&in.i_din, in.i_number, fsopts);
 
  leave_ffs_write_file:
 	if (fbuf)
 		free(fbuf);
 	if (ffd != -1)
 		close(ffd);
 }
 
 
 static void
 ffs_dump_dirbuf(dirbuf_t *dbuf, const char *dir, int needswap)
 {
 	doff_t		i;
 	struct direct	*de;
 	uint16_t	reclen;
 
 	assert (dbuf != NULL);
 	assert (dir != NULL);
 	printf("ffs_dump_dirbuf: dir %s size %d cur %d\n",
 	    dir, dbuf->size, dbuf->cur);
 
 	for (i = 0; i < dbuf->size; ) {
 		de = (struct direct *)(dbuf->buf + i);
 		reclen = ufs_rw16(de->d_reclen, needswap);
 		printf(
 	    " inode %4d %7s offset %4d reclen %3d namlen %3d name %s\n",
 		    ufs_rw32(de->d_ino, needswap),
 		    inode_type(DTTOIF(de->d_type)), i, reclen,
 		    de->d_namlen, de->d_name);
 		i += reclen;
 		assert(reclen > 0);
 	}
 }
 
 static void
 ffs_make_dirbuf(dirbuf_t *dbuf, const char *name, fsnode *node, int needswap)
 {
 	struct direct	de, *dp;
 	uint16_t	llen, reclen;
 	u_char		*newbuf;
 
 	assert (dbuf != NULL);
 	assert (name != NULL);
 	assert (node != NULL);
 					/* create direct entry */
 	(void)memset(&de, 0, sizeof(de));
 	de.d_ino = ufs_rw32(node->inode->ino, needswap);
 	de.d_type = IFTODT(node->type);
 	de.d_namlen = (uint8_t)strlen(name);
 	strcpy(de.d_name, name);
 	reclen = DIRSIZ_SWAP(0, &de, needswap);
 	de.d_reclen = ufs_rw16(reclen, needswap);
 
 	dp = (struct direct *)(dbuf->buf + dbuf->cur);
 	llen = 0;
 	if (dp != NULL)
 		llen = DIRSIZ_SWAP(0, dp, needswap);
 
 	if (debug & DEBUG_FS_MAKE_DIRBUF)
 		printf(
 		    "ffs_make_dirbuf: dbuf siz %d cur %d lastlen %d\n"
 		    "  ino %d type %d reclen %d namlen %d name %.30s\n",
 		    dbuf->size, dbuf->cur, llen,
 		    ufs_rw32(de.d_ino, needswap), de.d_type, reclen,
 		    de.d_namlen, de.d_name);
 
 	if (reclen + dbuf->cur + llen > roundup(dbuf->size, DIRBLKSIZ)) {
 		if (debug & DEBUG_FS_MAKE_DIRBUF)
 			printf("ffs_make_dirbuf: growing buf to %d\n",
 			    dbuf->size + DIRBLKSIZ);
 		if ((newbuf = realloc(dbuf->buf, dbuf->size + DIRBLKSIZ)) == NULL)
 			err(1, "Allocating memory for directory buffer");
 		dbuf->buf = newbuf;
 		dbuf->size += DIRBLKSIZ;
 		memset(dbuf->buf + dbuf->size - DIRBLKSIZ, 0, DIRBLKSIZ);
 		dbuf->cur = dbuf->size - DIRBLKSIZ;
 	} else if (dp) {			/* shrink end of previous */
 		dp->d_reclen = ufs_rw16(llen,needswap);
 		dbuf->cur += llen;
 	}
 	dp = (struct direct *)(dbuf->buf + dbuf->cur);
 	memcpy(dp, &de, reclen);
 	dp->d_reclen = ufs_rw16(dbuf->size - dbuf->cur, needswap);
 }
 
 /*
  * cribbed from sys/ufs/ffs/ffs_alloc.c
  */
 static void
 ffs_write_inode(union dinode *dp, uint32_t ino, const fsinfo_t *fsopts)
 {
 	char 		*buf;
 	struct ufs1_dinode *dp1;
 	struct ufs2_dinode *dp2, *dip;
 	struct cg	*cgp;
 	struct fs	*fs;
 	int		cg, cgino, i;
 	daddr_t		d;
 	char		sbbuf[FFS_MAXBSIZE];
 	int32_t		initediblk;
 	ffs_opt_t	*ffs_opts = fsopts->fs_specific;
 
 	assert (dp != NULL);
 	assert (ino > 0);
 	assert (fsopts != NULL);
 	assert (ffs_opts != NULL);
 
 	fs = (struct fs *)fsopts->superblock;
 	cg = ino_to_cg(fs, ino);
 	cgino = ino % fs->fs_ipg;
 	if (debug & DEBUG_FS_WRITE_INODE)
 		printf("ffs_write_inode: din %p ino %u cg %d cgino %d\n",
 		    dp, ino, cg, cgino);
 
 	ffs_rdfs(fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, &sbbuf,
 	    fsopts);
 	cgp = (struct cg *)sbbuf;
 	if (!cg_chkmagic_swap(cgp, fsopts->needswap))
 		errx(1, "ffs_write_inode: cg %d: bad magic number", cg);
 
 	assert (isclr(cg_inosused_swap(cgp, fsopts->needswap), cgino));
 
 	buf = malloc(fs->fs_bsize);
 	if (buf == NULL)
 		errx(1, "ffs_write_inode: cg %d: can't alloc inode block", cg);
 
 	dp1 = (struct ufs1_dinode *)buf;
 	dp2 = (struct ufs2_dinode *)buf;
 
 	if (fs->fs_cstotal.cs_nifree == 0)
 		errx(1, "ffs_write_inode: fs out of inodes for ino %u",
 		    ino);
 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
 		errx(1,
 		    "ffs_write_inode: cg %d out of inodes for ino %u",
 		    cg, ino);
 	setbit(cg_inosused_swap(cgp, fsopts->needswap), cgino);
 	ufs_add32(cgp->cg_cs.cs_nifree, -1, fsopts->needswap);
 	fs->fs_cstotal.cs_nifree--;
 	fs->fs_cs(fs, cg).cs_nifree--;
 	if (S_ISDIR(DIP(dp, mode))) {
 		ufs_add32(cgp->cg_cs.cs_ndir, 1, fsopts->needswap);
 		fs->fs_cstotal.cs_ndir++;
 		fs->fs_cs(fs, cg).cs_ndir++; 
 	}
 
 	/*
 	 * Initialize inode blocks on the fly for UFS2.
 	 */
 	initediblk = ufs_rw32(cgp->cg_initediblk, fsopts->needswap);
 	while (ffs_opts->version == 2 && cgino + INOPB(fs) > initediblk &&
 	    initediblk < ufs_rw32(cgp->cg_niblk, fsopts->needswap)) {
 		memset(buf, 0, fs->fs_bsize);
 		dip = (struct ufs2_dinode *)buf;
 		for (i = 0; i < INOPB(fs); i++) {
 			dip->di_gen = random();
 			dip++;
 		}
 		ffs_wtfs(fsbtodb(fs, ino_to_fsba(fs,
 				  cg * fs->fs_ipg + initediblk)),
 		    fs->fs_bsize, buf, fsopts);
 		initediblk += INOPB(fs);
 		cgp->cg_initediblk = ufs_rw32(initediblk, fsopts->needswap);
 	}
 
 
 	ffs_wtfs(fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, &sbbuf,
 	    fsopts);
 
 					/* now write inode */
 	d = fsbtodb(fs, ino_to_fsba(fs, ino));
 	ffs_rdfs(d, fs->fs_bsize, buf, fsopts);
 	if (fsopts->needswap) {
 		if (ffs_opts->version == 1)
 			ffs_dinode1_swap(&dp->ffs1_din,
 			    &dp1[ino_to_fsbo(fs, ino)]);
 		else
 			ffs_dinode2_swap(&dp->ffs2_din,
 			    &dp2[ino_to_fsbo(fs, ino)]);
 	} else {
 		if (ffs_opts->version == 1)
 			dp1[ino_to_fsbo(fs, ino)] = dp->ffs1_din;
 		else
 			dp2[ino_to_fsbo(fs, ino)] = dp->ffs2_din;
 	}
 	ffs_wtfs(d, fs->fs_bsize, buf, fsopts);
 	free(buf);
 }
 
 void
 panic(const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 	vwarnx(fmt, ap);
 	va_end(ap);
 	exit(1);
 }
Index: stable/11
===================================================================
--- stable/11	(revision 329098)
+++ stable/11	(revision 329099)

Property changes on: stable/11
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r311458,312237,312314,312374,312947,313042,313047,313166,313328,313332-313333,313337,313348-313349,313389,313442,313451,313575,313645,313710,314114,314213,314275,314945,314948,315008,315408,315427,315645-315646,315648,315653,315850,316064,316078-316079,316100,316104,316111-316112,316171,316279-316280,316287,316311,316343,316424,316436