Index: head/stand/efi/libefi/Makefile =================================================================== --- head/stand/efi/libefi/Makefile +++ head/stand/efi/libefi/Makefile @@ -52,6 +52,8 @@ CFLAGS.teken.c+= -I${SRCTOP}/sys/teken .if ${MK_LOADER_ZFS} != "no" CFLAGS+= -I${ZFSSRC} +CFLAGS+= -I${SYSDIR}/cddl/boot/zfs +CFLAGS+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common CFLAGS+= -DEFI_ZFS_BOOT .endif Index: head/stand/efi/loader/main.c =================================================================== --- head/stand/efi/loader/main.c +++ head/stand/efi/loader/main.c @@ -260,6 +260,8 @@ { char *devname; struct zfs_devdesc currdev; + char *buf = NULL; + bool rv; currdev.dd.d_dev = &zfs_dev; currdev.dd.d_unit = 0; @@ -269,7 +271,18 @@ devname = efi_fmtdev(&currdev); init_zfs_bootenv(devname); - return (sanity_check_currdev()); + rv = sanity_check_currdev(); + if (rv) { + buf = malloc(VDEV_PAD_SIZE); + if (buf != NULL) { + if (zfs_nextboot(&currdev, buf, VDEV_PAD_SIZE) == 0) { + printf("zfs nextboot: %s\n", buf); + set_currdev(buf); + } + free(buf); + } + } + return (rv); } #endif Index: head/stand/i386/gptzfsboot/Makefile =================================================================== --- head/stand/i386/gptzfsboot/Makefile +++ head/stand/i386/gptzfsboot/Makefile @@ -4,7 +4,7 @@ .PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/gptboot \ ${BOOTSRC}/i386/zfsboot ${BOOTSRC}/i386/common \ - ${SASRC} + ${BOOTSRC}/common FILES= gptzfsboot MAN= gptzfsboot.8 @@ -19,12 +19,16 @@ CFLAGS+=-DBOOTPROG=\"gptzfsboot\" \ -O1 \ - -DGPT -DZFS -DBOOT2 \ + -DBOOT2 \ + -DLOADER_GPT_SUPPORT \ + -DLOADER_MBR_SUPPORT \ + -DLOADER_ZFS_SUPPORT \ -DSIOPRT=${BOOT_COMCONSOLE_PORT} \ -DSIOFMT=${B2SIOFMT} \ -DSIOSPD=${BOOT_COMCONSOLE_SPEED} \ -I${LDRSRC} \ -I${BOOTSRC}/i386/common \ + -I${BOOTSRC}/i386/libi386 \ -I${ZFSSRC} \ -I${SYSDIR}/crypto/skein \ -I${SYSDIR}/cddl/boot/zfs \ @@ -60,15 +64,18 @@ gptldr.out: gptldr.o ${LD} ${LD_FLAGS} -e start --defsym ORG=${ORG1} -T ${LDSCRIPT} -o ${.TARGET} gptldr.o -CLEANFILES+= gptzfsboot.bin gptzfsboot.out zfsboot.o sio.o cons.o \ - drv.o gpt.o ${OPENCRYPTO_XTS} +OBJS= zfsboot.o sio.o cons.o bcache.o devopen.o disk.o part.o zfs_cmd.o +CLEANFILES+= gptzfsboot.bin gptzfsboot.out ${OBJS} ${OPENCRYPTO_XTS} +# i386 standalone support library +LIBI386= ${BOOTOBJ}/i386/libi386/libi386.a + gptzfsboot.bin: gptzfsboot.out ${OBJCOPY} -S -O binary gptzfsboot.out ${.TARGET} -gptzfsboot.out: ${BTXCRT} zfsboot.o sio.o gpt.o drv.o cons.o \ +gptzfsboot.out: ${BTXCRT} ${OBJS} \ ${OPENCRYPTO_XTS} - ${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBSA32} + ${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBI386} ${LIBSA32} zfsboot.o: ${ZFSSRC}/zfsimpl.c Index: head/stand/i386/libi386/Makefile =================================================================== --- head/stand/i386/libi386/Makefile +++ head/stand/i386/libi386/Makefile @@ -37,6 +37,8 @@ CFLAGS+= -I${BOOTSRC}/ficl -I${BOOTSRC}/ficl/i386 \ -I${LDRSRC} -I${BOOTSRC}/i386/common \ + -I${SYSDIR}/cddl/boot/zfs \ + -I${SYSDIR}/cddl/contrib/opensolaris/uts/common \ -I${SYSDIR}/contrib/dev/acpica/include # Handle FreeBSD specific %b and %D printf format specifiers Index: head/stand/i386/zfsboot/Makefile =================================================================== --- head/stand/i386/zfsboot/Makefile +++ head/stand/i386/zfsboot/Makefile @@ -2,7 +2,7 @@ .include -.PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/common ${SASRC} +.PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/common ${BOOTSRC}/common FILES= zfsboot MAN= zfsboot.8 @@ -17,13 +17,17 @@ CFLAGS+=-DBOOTPROG=\"zfsboot\" \ -O1 \ - -DZFS -DBOOT2 \ + -DBOOT2 \ + -DLOADER_GPT_SUPPORT \ + -DLOADER_MBR_SUPPORT \ + -DLOADER_ZFS_SUPPORT \ + -DLOADER_UFS_SUPPORT \ -DSIOPRT=${BOOT_COMCONSOLE_PORT} \ -DSIOFMT=${B2SIOFMT} \ -DSIOSPD=${BOOT_COMCONSOLE_SPEED} \ -I${LDRSRC} \ -I${BOOTSRC}/i386/common \ - -I${BOOTSRC}/i386 \ + -I${BOOTSRC}/i386/libi386 \ -I${ZFSSRC} \ -I${SYSDIR}/crypto/skein \ -I${SYSDIR}/cddl/boot/zfs \ @@ -34,6 +38,8 @@ -Wmissing-declarations -Wmissing-prototypes -Wnested-externs \ -Wpointer-arith -Wshadow -Wstrict-prototypes -Wwrite-strings +CFLAGS.part.c+= -DHAVE_MEMCPY -I${SRCTOP}/sys/contrib/zlib + CFLAGS.gcc+= --param max-inline-insns-single=100 LD_FLAGS+=${LD_FLAGS_BIN} @@ -51,14 +57,18 @@ zfsldr.out: zfsldr.o ${LD} ${LD_FLAGS} -e start --defsym ORG=${ORG1} -T ${LDSCRIPT} -o ${.TARGET} zfsldr.o +OBJS= zfsboot.o sio.o cons.o bcache.o devopen.o disk.o part.o zfs_cmd.o CLEANFILES+= zfsboot2 zfsboot.ld zfsboot.ldr zfsboot.bin zfsboot.out \ - zfsboot.o zfsboot.s zfsboot.s.tmp sio.o cons.o drv.o + ${OBJS} # We currently allow 256k bytes for zfsboot - in practice it could be # any size up to 3.5Mb but keeping it fixed size simplifies zfsldr. # BOOT2SIZE= 262144 +# i386 standalone support library +LIBI386= ${BOOTOBJ}/i386/libi386/libi386.a + zfsboot2: zfsboot.ld @set -- `ls -l ${.ALLSRC}`; x=$$((${BOOT2SIZE}-$$5)); \ echo "$$x bytes available"; test $$x -ge 0 @@ -74,8 +84,8 @@ zfsboot.bin: zfsboot.out ${OBJCOPY} -S -O binary zfsboot.out ${.TARGET} -zfsboot.out: ${BTXCRT} zfsboot.o sio.o drv.o cons.o - ${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBSA32} +zfsboot.out: ${BTXCRT} ${OBJS} + ${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBI386} ${LIBSA32} SRCS= zfsboot.c Index: head/stand/i386/zfsboot/zfsboot.c =================================================================== --- head/stand/i386/zfsboot/zfsboot.c +++ head/stand/i386/zfsboot/zfsboot.c @@ -16,7 +16,7 @@ #include __FBSDID("$FreeBSD$"); -#include "stand.h" +#include #include #include @@ -35,15 +35,16 @@ #include #include - +#include "bootstrap.h" +#include "libi386.h" #include #include "lib.h" #include "rbx.h" -#include "drv.h" -#include "edd.h" #include "cons.h" #include "bootargs.h" +#include "disk.h" +#include "part.h" #include "paths.h" #include "libzfs.h" @@ -61,13 +62,8 @@ #define TYPE_MAXHARD TYPE_DA #define TYPE_FD 2 -#define DEV_GELIBOOT_BSIZE 4096 - extern uint32_t _end; -#ifdef GPT -static const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS; -#endif static const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */ static const unsigned char flags[NOPT] = { RBX_DUAL, @@ -107,785 +103,153 @@ static const unsigned char dev_maj[NDEV] = {30, 4, 2}; +static struct i386_devdesc *bdev; static char cmd[512]; static char cmddup[512]; static char kname[1024]; -static char rootname[256]; static int comspeed = SIOSPD; static struct bootinfo bootinfo; static uint32_t bootdev; static struct zfs_boot_args zfsargs; +#ifdef LOADER_GELI_SUPPORT +static struct geli_boot_args geliargs; +#endif -vm_offset_t high_heap_base; -uint32_t bios_basemem, bios_extmem, high_heap_size; +extern vm_offset_t high_heap_base; +extern uint32_t bios_basemem, bios_extmem, high_heap_size; -static struct bios_smap smap; +static char *heap_top; +static char *heap_bottom; -/* - * The minimum amount of memory to reserve in bios_extmem for the heap. - */ -#define HEAP_MIN (64 * 1024 * 1024) - -static char *heap_next; -static char *heap_end; - -/* Buffers that must not span a 64k boundary. */ -#define READ_BUF_SIZE 8192 -struct dmadat { - char rdbuf[READ_BUF_SIZE]; /* for reading large things */ - char secbuf[READ_BUF_SIZE]; /* for MBR/disklabel */ -}; -static struct dmadat *dmadat; - void exit(int); -void reboot(void); +static void i386_zfs_probe(void); static void load(void); static int parse_cmd(void); -static void bios_getmem(void); -int main(void); #ifdef LOADER_GELI_SUPPORT #include "geliboot.h" static char gelipw[GELI_PW_MAXLEN]; #endif -struct zfsdsk { - struct dsk dsk; -#ifdef LOADER_GELI_SUPPORT - struct geli_dev *gdev; +struct arch_switch archsw; /* MI/MD interface boundary */ +static char boot_devname[2 * ZFS_MAXNAMELEN + 8]; /* disk or pool:dataset */ + +struct devsw *devsw[] = { + &bioshd, +#if defined(LOADER_ZFS_SUPPORT) + &zfs_dev, #endif + NULL }; -#include "zfsimpl.c" - -/* - * Read from a dnode (which must be from a ZPL filesystem). - */ -static int -zfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, - size_t size) -{ - const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus; - size_t n; - int rc; - - n = size; - if (*offp + n > zp->zp_size) - n = zp->zp_size - *offp; - - rc = dnode_read(spa, dnode, *offp, start, n); - if (rc) - return (-1); - *offp += n; - - return (n); -} - -/* - * Current ZFS pool - */ -static spa_t *spa; -static spa_t *primary_spa; -static vdev_t *primary_vdev; - -/* - * A wrapper for dskread that doesn't have to worry about whether the - * buffer pointer crosses a 64k boundary. - */ -static int -vdev_read(void *xvdev, void *priv, off_t off, void *buf, size_t bytes) -{ - char *p; - daddr_t lba, alignlba; - off_t diff; - unsigned int nb, alignnb; - struct zfsdsk *zdsk = priv; - - if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1))) - return (-1); - - p = buf; - lba = off / DEV_BSIZE; - lba += zdsk->dsk.start; - /* - * Align reads to 4k else 4k sector GELIs will not decrypt. - * Round LBA down to nearest multiple of DEV_GELIBOOT_BSIZE bytes. - */ - alignlba = rounddown2(off, DEV_GELIBOOT_BSIZE) / DEV_BSIZE; - /* - * The read must be aligned to DEV_GELIBOOT_BSIZE bytes relative to the - * start of the GELI partition, not the start of the actual disk. - */ - alignlba += zdsk->dsk.start; - diff = (lba - alignlba) * DEV_BSIZE; - - while (bytes > 0) { - nb = bytes / DEV_BSIZE; - /* - * Ensure that the read size plus the leading offset does not - * exceed the size of the read buffer. - */ - if (nb > (READ_BUF_SIZE - diff) / DEV_BSIZE) - nb = (READ_BUF_SIZE - diff) / DEV_BSIZE; - /* - * Round the number of blocks to read up to the nearest multiple - * of DEV_GELIBOOT_BSIZE. - */ - alignnb = roundup2(nb * DEV_BSIZE + diff, DEV_GELIBOOT_BSIZE) - / DEV_BSIZE; - - if (zdsk->dsk.size > 0 && alignlba + alignnb > - zdsk->dsk.size + zdsk->dsk.start) { - printf("Shortening read at %lld from %d to %lld\n", - alignlba, alignnb, - (zdsk->dsk.size + zdsk->dsk.start) - alignlba); - alignnb = (zdsk->dsk.size + zdsk->dsk.start) - alignlba; - } - - if (drvread(&zdsk->dsk, dmadat->rdbuf, alignlba, alignnb)) - return (-1); -#ifdef LOADER_GELI_SUPPORT - /* decrypt */ - if (zdsk->gdev != NULL) { - if (geli_read(zdsk->gdev, - ((alignlba - zdsk->dsk.start) * DEV_BSIZE), - dmadat->rdbuf, alignnb * DEV_BSIZE)) - return (-1); - } +struct fs_ops *file_system[] = { +#if defined(LOADER_ZFS_SUPPORT) + &zfs_fsops, #endif - memcpy(p, dmadat->rdbuf + diff, nb * DEV_BSIZE); - p += nb * DEV_BSIZE; - lba += nb; - alignlba += alignnb; - bytes -= nb * DEV_BSIZE; - /* Don't need the leading offset after the first block. */ - diff = 0; - } - - return (0); -} -/* Match the signature exactly due to signature madness */ -static int -vdev_read2(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) -{ - return (vdev_read(vdev, priv, off, buf, bytes)); -} - - -static int -vdev_write(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) -{ - char *p; - daddr_t lba; - unsigned int nb; - struct zfsdsk *zdsk = priv; - - if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1))) - return (-1); - - p = buf; - lba = off / DEV_BSIZE; - lba += zdsk->dsk.start; - while (bytes > 0) { - nb = bytes / DEV_BSIZE; - if (nb > READ_BUF_SIZE / DEV_BSIZE) - nb = READ_BUF_SIZE / DEV_BSIZE; - memcpy(dmadat->rdbuf, p, nb * DEV_BSIZE); - if (drvwrite(&zdsk->dsk, dmadat->rdbuf, lba, nb)) - return (-1); - p += nb * DEV_BSIZE; - lba += nb; - bytes -= nb * DEV_BSIZE; - } - - return (0); -} - -static int -xfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte) -{ - if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) { - printf("Invalid format\n"); - return (-1); - } - return (0); -} - -/* - * Read Pad2 (formerly "Boot Block Header") area of the first - * vdev label of the given vdev. - */ -static int -vdev_read_pad2(vdev_t *vdev, char *buf, size_t size) -{ - blkptr_t bp; - char *tmp; - off_t off = offsetof(vdev_label_t, vl_pad2); - int rc; - - if (size > VDEV_PAD_SIZE) - size = VDEV_PAD_SIZE; - - tmp = malloc(VDEV_PAD_SIZE); - if (tmp == NULL) - return (ENOMEM); - - BP_ZERO(&bp); - BP_SET_LSIZE(&bp, VDEV_PAD_SIZE); - BP_SET_PSIZE(&bp, VDEV_PAD_SIZE); - BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); - BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF); - DVA_SET_OFFSET(BP_IDENTITY(&bp), off); - rc = vdev_read_phys(vdev, &bp, tmp, off, 0); - if (rc == 0) - memcpy(buf, tmp, size); - free(tmp); - return (rc); -} - -static int -vdev_clear_pad2(vdev_t *vdev) -{ - char *zeroes; - uint64_t *end; - off_t off = offsetof(vdev_label_t, vl_pad2); - int rc; - - zeroes = malloc(VDEV_PAD_SIZE); - if (zeroes == NULL) - return (ENOMEM); - - memset(zeroes, 0, VDEV_PAD_SIZE); - end = (uint64_t *)(zeroes + VDEV_PAD_SIZE); - /* ZIO_CHECKSUM_LABEL magic and pre-calcualted checksum for all zeros */ - end[-5] = 0x0210da7ab10c7a11; - end[-4] = 0x97f48f807f6e2a3f; - end[-3] = 0xaf909f1658aacefc; - end[-2] = 0xcbd1ea57ff6db48b; - end[-1] = 0x6ec692db0d465fab; - rc = vdev_write(vdev, vdev->v_read_priv, off, zeroes, VDEV_PAD_SIZE); - free(zeroes); - return (rc); -} - -static void -bios_getmem(void) -{ - uint64_t size; - - /* Parse system memory map */ - v86.ebx = 0; - do { - v86.ctl = V86_FLAGS; - v86.addr = 0x15; /* int 0x15 function 0xe820 */ - v86.eax = 0xe820; - v86.ecx = sizeof(struct bios_smap); - v86.edx = SMAP_SIG; - v86.es = VTOPSEG(&smap); - v86.edi = VTOPOFF(&smap); - v86int(); - if (V86_CY(v86.efl) || (v86.eax != SMAP_SIG)) - break; - /* look for a low-memory segment that's large enough */ - if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) && - (smap.length >= (512 * 1024))) - bios_basemem = smap.length; - /* look for the first segment in 'extended' memory */ - if ((smap.type == SMAP_TYPE_MEMORY) && - (smap.base == 0x100000)) { - bios_extmem = smap.length; - } - - /* - * Look for the largest segment in 'extended' memory beyond - * 1MB but below 4GB. - */ - if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) && - (smap.base < 0x100000000ull)) { - size = smap.length; - - /* - * If this segment crosses the 4GB boundary, - * truncate it. - */ - if (smap.base + size > 0x100000000ull) - size = 0x100000000ull - smap.base; - - if (size > high_heap_size) { - high_heap_size = size; - high_heap_base = smap.base; - } - } - } while (v86.ebx != 0); - - /* Fall back to the old compatibility function for base memory */ - if (bios_basemem == 0) { - v86.ctl = 0; - v86.addr = 0x12; /* int 0x12 */ - v86int(); - - bios_basemem = (v86.eax & 0xffff) * 1024; - } - - /* - * Fall back through several compatibility functions for extended - * memory. - */ - if (bios_extmem == 0) { - v86.ctl = V86_FLAGS; - v86.addr = 0x15; /* int 0x15 function 0xe801 */ - v86.eax = 0xe801; - v86int(); - if (!V86_CY(v86.efl)) { - bios_extmem = ((v86.ecx & 0xffff) + - ((v86.edx & 0xffff) * 64)) * 1024; - } - } - if (bios_extmem == 0) { - v86.ctl = 0; - v86.addr = 0x15; /* int 0x15 function 0x88 */ - v86.eax = 0x8800; - v86int(); - bios_extmem = (v86.eax & 0xffff) * 1024; - } - - /* - * If we have extended memory and did not find a suitable heap - * region in the SMAP, use the last 3MB of 'extended' memory as a - * high heap candidate. - */ - if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) { - high_heap_size = HEAP_MIN; - high_heap_base = bios_extmem + 0x100000 - HEAP_MIN; - } -} - -/* - * Try to detect a device supported by the legacy int13 BIOS - */ -static int -int13probe(int drive) -{ - v86.ctl = V86_FLAGS; - v86.addr = 0x13; - v86.eax = 0x800; - v86.edx = drive; - v86int(); - - if (!V86_CY(v86.efl) && /* carry clear */ - ((v86.edx & 0xff) != (drive & DRV_MASK))) { /* unit # OK */ - if ((v86.ecx & 0x3f) == 0) { /* absurd sector size */ - return (0); /* skip device */ - } - return (1); - } - return (0); -} - -/* - * We call this when we find a ZFS vdev - ZFS consumes the dsk - * structure so we must make a new one. - */ -static struct zfsdsk * -copy_dsk(struct zfsdsk *zdsk) -{ - struct zfsdsk *newdsk; - - newdsk = malloc(sizeof(struct zfsdsk)); - *newdsk = *zdsk; - return (newdsk); -} - -/* - * Get disk size from GPT. - */ -static uint64_t -drvsize_gpt(struct dsk *dskp) -{ -#ifdef GPT - struct gpt_hdr hdr; - char *sec; - - sec = dmadat->secbuf; - if (drvread(dskp, sec, 1, 1)) - return (0); - - memcpy(&hdr, sec, sizeof(hdr)); - if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 || - hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 || - hdr.hdr_entsz < sizeof(struct gpt_ent) || - DEV_BSIZE % hdr.hdr_entsz != 0) { - return (0); - } - return (hdr.hdr_lba_alt + 1); -#else - return (0); +#if defined(LOADER_UFS_SUPPORT) + &ufs_fsops, #endif -} + NULL +}; -/* - * Get disk size from eax=0x800 and 0x4800. We need to probe both - * because 0x4800 may not be available and we would like to get more - * or less correct disk size - if it is possible at all. - * Note we do not really want to touch drv.c because that code is shared - * with boot2 and we can not afford to grow that code. - */ -static uint64_t -drvsize_ext(struct zfsdsk *zdsk) +caddr_t +ptov(uintptr_t x) { - struct dsk *dskp; - uint64_t size, tmp; - int cyl, hds, sec; - - dskp = &zdsk->dsk; - - /* Try to read disk size from GPT */ - size = drvsize_gpt(dskp); - if (size != 0) - return (size); - - v86.ctl = V86_FLAGS; - v86.addr = 0x13; - v86.eax = 0x800; - v86.edx = dskp->drive; - v86int(); - - /* Don't error out if we get bad sector number, try EDD as well */ - if (V86_CY(v86.efl) || /* carry set */ - (v86.edx & 0xff) <= (unsigned)(dskp->drive & 0x7f)) /* unit # bad */ - return (0); - cyl = ((v86.ecx & 0xc0) << 2) + ((v86.ecx & 0xff00) >> 8) + 1; - /* Convert max head # -> # of heads */ - hds = ((v86.edx & 0xff00) >> 8) + 1; - sec = v86.ecx & 0x3f; - - size = (uint64_t)cyl * hds * sec; - - /* Determine if we can use EDD with this device. */ - v86.ctl = V86_FLAGS; - v86.addr = 0x13; - v86.eax = 0x4100; - v86.edx = dskp->drive; - v86.ebx = 0x55aa; - v86int(); - if (V86_CY(v86.efl) || /* carry set */ - (v86.ebx & 0xffff) != 0xaa55 || /* signature */ - (v86.ecx & EDD_INTERFACE_FIXED_DISK) == 0) - return (size); - - tmp = drvsize(dskp); - if (tmp > size) - size = tmp; - - return (size); + return (PTOV(x)); } -/* - * The "layered" ioctl to read disk/partition size. Unfortunately - * the zfsboot case is hardest, because we do not have full software - * stack available, so we need to do some manual work here. - */ -uint64_t -ldi_get_size(void *priv) -{ - struct zfsdsk *zdsk = priv; - uint64_t size = zdsk->dsk.size; - - if (zdsk->dsk.start == 0) - size = drvsize_ext(zdsk); - - return (size * DEV_BSIZE); -} - -static void -probe_drive(struct zfsdsk *zdsk) -{ -#ifdef GPT - struct gpt_hdr hdr; - struct gpt_ent *ent; - unsigned part, entries_per_sec; - daddr_t slba; -#endif -#if defined(GPT) || defined(LOADER_GELI_SUPPORT) - daddr_t elba; -#endif - - struct dos_partition *dp; - char *sec; - unsigned i; - -#ifdef LOADER_GELI_SUPPORT - /* - * Taste the disk, if it is GELI encrypted, decrypt it then dig out the - * partition table and probe each slice/partition in turn for a vdev or - * GELI encrypted vdev. - */ - elba = drvsize_ext(zdsk); - if (elba > 0) { - elba--; - } - zdsk->gdev = geli_taste(vdev_read, zdsk, elba, "disk%u:0:"); - if ((zdsk->gdev != NULL) && (geli_havekey(zdsk->gdev) == 0)) - geli_passphrase(zdsk->gdev, gelipw); -#endif /* LOADER_GELI_SUPPORT */ - - sec = dmadat->secbuf; - zdsk->dsk.start = 0; - -#ifdef GPT - /* - * First check for GPT. - */ - if (drvread(&zdsk->dsk, sec, 1, 1)) { - return; - } - memcpy(&hdr, sec, sizeof(hdr)); - if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 || - hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 || - hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) { - goto trymbr; - } - - /* - * Probe all GPT partitions for the presence of ZFS pools. We - * return the spa_t for the first we find (if requested). This - * will have the effect of booting from the first pool on the - * disk. - * - * If no vdev is found, GELI decrypting the device and try again - */ - entries_per_sec = DEV_BSIZE / hdr.hdr_entsz; - slba = hdr.hdr_lba_table; - elba = slba + hdr.hdr_entries / entries_per_sec; - while (slba < elba) { - zdsk->dsk.start = 0; - if (drvread(&zdsk->dsk, sec, slba, 1)) - return; - for (part = 0; part < entries_per_sec; part++) { - ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz); - if (memcmp(&ent->ent_type, &freebsd_zfs_uuid, - sizeof(uuid_t)) == 0) { - zdsk->dsk.start = ent->ent_lba_start; - zdsk->dsk.size = - ent->ent_lba_end - ent->ent_lba_start + 1; - zdsk->dsk.slice = part + 1; - zdsk->dsk.part = 255; - if (vdev_probe(vdev_read2, zdsk, NULL) == 0) { - /* - * This slice had a vdev. We need a new - * dsk structure now since the vdev now - * owns this one. - */ - zdsk = copy_dsk(zdsk); - } -#ifdef LOADER_GELI_SUPPORT - else if ((zdsk->gdev = geli_taste(vdev_read, - zdsk, ent->ent_lba_end - ent->ent_lba_start, - "disk%up%u:", zdsk->dsk.unit, - zdsk->dsk.slice)) != NULL) { - if (geli_havekey(zdsk->gdev) == 0 || - geli_passphrase(zdsk->gdev, gelipw) - == 0) { - /* - * This slice has GELI, - * check it for ZFS. - */ - if (vdev_probe(vdev_read2, - zdsk, NULL) == 0) { - /* - * This slice had a - * vdev. We need a new - * dsk structure now - * since the vdev now - * owns this one. - */ - zdsk = copy_dsk(zdsk); - } - break; - } - } -#endif /* LOADER_GELI_SUPPORT */ - } - } - slba++; - } - return; -trymbr: -#endif /* GPT */ - - if (drvread(&zdsk->dsk, sec, DOSBBSECTOR, 1)) - return; - dp = (void *)(sec + DOSPARTOFF); - - for (i = 0; i < NDOSPART; i++) { - if (!dp[i].dp_typ) - continue; - zdsk->dsk.start = dp[i].dp_start; - zdsk->dsk.size = dp[i].dp_size; - zdsk->dsk.slice = i + 1; - if (vdev_probe(vdev_read2, zdsk, NULL) == 0) { - zdsk = copy_dsk(zdsk); - } -#ifdef LOADER_GELI_SUPPORT - else if ((zdsk->gdev = geli_taste(vdev_read, zdsk, - dp[i].dp_size - dp[i].dp_start, "disk%us%u:")) != NULL) { - if (geli_havekey(zdsk->gdev) == 0 || - geli_passphrase(zdsk->gdev, gelipw) == 0) { - /* - * This slice has GELI, check it for ZFS. - */ - if (vdev_probe(vdev_read2, zdsk, NULL) == 0) { - /* - * This slice had a vdev. We need a new - * dsk structure now since the vdev now - * owns this one. - */ - zdsk = copy_dsk(zdsk); - } - break; - } - } -#endif /* LOADER_GELI_SUPPORT */ - } -} - int main(void) { - dnode_phys_t dn; - off_t off; - struct zfsdsk *zdsk; - int autoboot, i; - int nextboot; - int rc; + unsigned i; + int auto_boot, fd, nextboot = 0; + struct disk_devdesc devdesc; - dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base); - bios_getmem(); if (high_heap_size > 0) { - heap_end = PTOV(high_heap_base + high_heap_size); - heap_next = PTOV(high_heap_base); + heap_top = PTOV(high_heap_base + high_heap_size); + heap_bottom = PTOV(high_heap_base); } else { - heap_next = (char *)dmadat + sizeof(*dmadat); - heap_end = (char *)PTOV(bios_basemem); + heap_bottom = (char *) + (roundup2(__base + (int32_t)&_end, 0x10000) - __base); + heap_top = (char *)PTOV(bios_basemem); } - setheap(heap_next, heap_end); + setheap(heap_bottom, heap_top); - zdsk = calloc(1, sizeof(struct zfsdsk)); - zdsk->dsk.drive = *(uint8_t *)PTOV(ARGS); - zdsk->dsk.type = zdsk->dsk.drive & DRV_HARD ? TYPE_AD : TYPE_FD; - zdsk->dsk.unit = zdsk->dsk.drive & DRV_MASK; - zdsk->dsk.slice = *(uint8_t *)PTOV(ARGS + 1) + 1; - zdsk->dsk.part = 0; - zdsk->dsk.start = 0; - zdsk->dsk.size = drvsize_ext(zdsk); + /* + * Initialise the block cache. Set the upper limit. + */ + bcache_init(32768, 512); + archsw.arch_autoload = NULL; + archsw.arch_getdev = i386_getdev; + archsw.arch_copyin = NULL; + archsw.arch_copyout = NULL; + archsw.arch_readin = NULL; + archsw.arch_isainb = NULL; + archsw.arch_isaoutb = NULL; + archsw.arch_zfs_probe = i386_zfs_probe; + bootinfo.bi_version = BOOTINFO_VERSION; bootinfo.bi_size = sizeof(bootinfo); bootinfo.bi_basemem = bios_basemem / 1024; bootinfo.bi_extmem = bios_extmem / 1024; bootinfo.bi_memsizes_valid++; - bootinfo.bi_bios_dev = zdsk->dsk.drive; + bootinfo.bi_bios_dev = *(uint8_t *)PTOV(ARGS); - bootdev = MAKEBOOTDEV(dev_maj[zdsk->dsk.type], - zdsk->dsk.slice, zdsk->dsk.unit, zdsk->dsk.part); + /* Set up fall back device name. */ + snprintf(boot_devname, sizeof (boot_devname), "disk%d:", + bd_bios2unit(bootinfo.bi_bios_dev)); - /* Process configuration file */ + for (i = 0; devsw[i] != NULL; i++) + if (devsw[i]->dv_init != NULL) + (devsw[i]->dv_init)(); - autoboot = 1; + disk_parsedev(&devdesc, boot_devname + 4, NULL); - zfs_init(); + bootdev = MAKEBOOTDEV(dev_maj[DEVT_DISK], devdesc.d_slice + 1, + devdesc.dd.d_unit, + devdesc.d_partition >= 0 ? devdesc.d_partition : 0xff); /* - * Probe the boot drive first - we will try to boot from whatever - * pool we find on that drive. + * zfs_fmtdev() can be called only after dv_init */ - probe_drive(zdsk); - - /* - * Probe the rest of the drives that the bios knows about. This - * will find any other available pools and it may fill in missing - * vdevs for the boot pool. - */ -#ifndef VIRTUALBOX - for (i = 0; i < *(unsigned char *)PTOV(BIOS_NUMDRIVES); i++) -#else - for (i = 0; i < MAXBDDEV; i++) -#endif - { - if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS)) - continue; - - if (!int13probe(i | DRV_HARD)) - break; - - zdsk = calloc(1, sizeof(struct zfsdsk)); - zdsk->dsk.drive = i | DRV_HARD; - zdsk->dsk.type = zdsk->dsk.drive & TYPE_AD; - zdsk->dsk.unit = i; - zdsk->dsk.slice = 0; - zdsk->dsk.part = 0; - zdsk->dsk.start = 0; - zdsk->dsk.size = drvsize_ext(zdsk); - probe_drive(zdsk); - } - - /* - * The first discovered pool, if any, is the pool. - */ - spa = spa_get_primary(); - if (!spa) { - printf("%s: No ZFS pools located, can't boot\n", BOOTPROG); - for (;;) - ; - } - - primary_spa = spa; - primary_vdev = spa_get_primary_vdev(spa); - - nextboot = 0; - rc = vdev_read_pad2(primary_vdev, cmd, sizeof(cmd)); - if (vdev_clear_pad2(primary_vdev)) - printf("failed to clear pad2 area of primary vdev\n"); - if (rc == 0) { - if (*cmd) { - /* - * We could find an old-style ZFS Boot Block header - * here. Simply ignore it. - */ - if (*(uint64_t *)cmd != 0x2f5b007b10c) { - /* - * Note that parse() is destructive to cmd[] - * and we also want to honor RBX_QUIET option - * that could be present in cmd[]. - */ - nextboot = 1; - memcpy(cmddup, cmd, sizeof(cmd)); - if (parse_cmd()) { - printf("failed to parse pad2 area of " - "primary vdev\n"); - reboot(); - } + if (bdev != NULL && bdev->dd.d_dev->dv_type == DEVT_ZFS) { + /* set up proper device name string for ZFS */ + strncpy(boot_devname, zfs_fmtdev(bdev), sizeof (boot_devname)); + if (zfs_nextboot(bdev, cmd, sizeof(cmd)) == 0) { + nextboot = 1; + memcpy(cmddup, cmd, sizeof(cmd)); + if (parse_cmd()) { if (!OPT_CHECK(RBX_QUIET)) - printf("zfs nextboot: %s\n", cmddup); + printf("failed to parse pad2 area\n"); + exit(0); } + if (!OPT_CHECK(RBX_QUIET)) + printf("zfs nextboot: %s\n", cmddup); /* Do not process this command twice */ *cmd = 0; } - } else - printf("failed to read pad2 area of primary vdev\n"); + } - /* Mount ZFS only if it's not already mounted via nextboot parsing. */ - if (zfsmount.spa == NULL && - (zfs_spa_init(spa) != 0 || zfs_mount(spa, 0, &zfsmount) != 0)) { - printf("%s: failed to mount default pool %s\n", - BOOTPROG, spa->spa_name); - autoboot = 0; - } else if (zfs_lookup(&zfsmount, PATH_CONFIG, &dn) == 0 || - zfs_lookup(&zfsmount, PATH_DOTCONFIG, &dn) == 0) { - off = 0; - zfs_read(spa, &dn, &off, cmd, sizeof(cmd)); + /* now make sure we have bdev on all cases */ + free(bdev); + i386_getdev((void **)&bdev, boot_devname, NULL); + + env_setenv("currdev", EV_VOLATILE, boot_devname, i386_setcurrdev, + env_nounset); + + /* Process configuration file */ + auto_boot = 1; + + fd = open(PATH_CONFIG, O_RDONLY); + if (fd == -1) + fd = open(PATH_DOTCONFIG, O_RDONLY); + + if (fd != -1) { + read(fd, cmd, sizeof (cmd)); + close(fd); } if (*cmd) { @@ -896,7 +260,7 @@ */ memcpy(cmddup, cmd, sizeof(cmd)); if (parse_cmd()) - autoboot = 0; + auto_boot = 0; if (!OPT_CHECK(RBX_QUIET)) printf("%s: %s\n", PATH_CONFIG, cmddup); /* Do not process this command twice */ @@ -904,10 +268,10 @@ } /* Do not risk waiting at the prompt forever. */ - if (nextboot && !autoboot) - reboot(); + if (nextboot && !auto_boot) + exit(0); - if (autoboot && !*kname) { + if (auto_boot && !*kname) { /* * Iterate through the list of loader and kernel paths, * trying to load. If interrupted by a keypress, or in case of @@ -924,28 +288,17 @@ /* Present the user with the boot2 prompt. */ for (;;) { - if (!autoboot || !OPT_CHECK(RBX_QUIET)) { + if (!auto_boot || !OPT_CHECK(RBX_QUIET)) { printf("\nFreeBSD/x86 boot\n"); - if (zfs_rlookup(spa, zfsmount.rootobj, rootname) != 0) - printf("Default: %s/<0x%llx>:%s\n" - "boot: ", - spa->spa_name, zfsmount.rootobj, kname); - else if (rootname[0] != '\0') - printf("Default: %s/%s:%s\n" - "boot: ", - spa->spa_name, rootname, kname); - else - printf("Default: %s:%s\n" - "boot: ", - spa->spa_name, kname); + printf("Default: %s%s\nboot: ", boot_devname, kname); } if (ioctrl & IO_SERIAL) sio_flush(); - if (!autoboot || keyhit(5)) + if (!auto_boot || keyhit(5)) getstr(cmd, sizeof(cmd)); - else if (!autoboot || !OPT_CHECK(RBX_QUIET)) + else if (!auto_boot || !OPT_CHECK(RBX_QUIET)) putchar('\n'); - autoboot = 0; + auto_boot = 0; if (parse_cmd()) putchar('\a'); else @@ -960,12 +313,6 @@ __exit(x); } -void -reboot(void) -{ - __exit(0); -} - static void load(void) { @@ -976,155 +323,229 @@ static Elf32_Phdr ep[2]; static Elf32_Shdr es[2]; caddr_t p; - dnode_phys_t dn; - off_t off; uint32_t addr, x; - int fmt, i, j; + int fd, fmt, i, j; + ssize_t size; - if (zfs_lookup(&zfsmount, kname, &dn)) { + if ((fd = open(kname, O_RDONLY)) == -1) { printf("\nCan't find %s\n", kname); return; } - off = 0; - if (xfsread(&dn, &off, &hdr, sizeof(hdr))) + + size = sizeof(hdr); + if (read(fd, &hdr, sizeof (hdr)) != size) { + close(fd); return; - if (N_GETMAGIC(hdr.ex) == ZMAGIC) + } + if (N_GETMAGIC(hdr.ex) == ZMAGIC) { fmt = 0; - else if (IS_ELF(hdr.eh)) + } else if (IS_ELF(hdr.eh)) { fmt = 1; - else { + } else { printf("Invalid %s\n", "format"); + close(fd); return; } if (fmt == 0) { addr = hdr.ex.a_entry & 0xffffff; p = PTOV(addr); - off = PAGE_SIZE; - if (xfsread(&dn, &off, p, hdr.ex.a_text)) + lseek(fd, PAGE_SIZE, SEEK_SET); + size = hdr.ex.a_text; + if (read(fd, p, hdr.ex.a_text) != size) { + close(fd); return; + } p += roundup2(hdr.ex.a_text, PAGE_SIZE); - if (xfsread(&dn, &off, p, hdr.ex.a_data)) + size = hdr.ex.a_data; + if (read(fd, p, hdr.ex.a_data) != size) { + close(fd); return; + } p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE); bootinfo.bi_symtab = VTOP(p); memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms)); p += sizeof(hdr.ex.a_syms); if (hdr.ex.a_syms) { - if (xfsread(&dn, &off, p, hdr.ex.a_syms)) + size = hdr.ex.a_syms; + if (read(fd, p, hdr.ex.a_syms) != size) { + close(fd); return; + } p += hdr.ex.a_syms; - if (xfsread(&dn, &off, p, sizeof(int))) + size = sizeof (int); + if (read(fd, p, sizeof (int)) != size) { + close(fd); return; + } x = *(uint32_t *)p; p += sizeof(int); x -= sizeof(int); - if (xfsread(&dn, &off, p, x)) + size = x; + if (read(fd, p, x) != size) { + close(fd); return; + } p += x; } } else { - off = hdr.eh.e_phoff; + lseek(fd, hdr.eh.e_phoff, SEEK_SET); for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) { - if (xfsread(&dn, &off, ep + j, sizeof(ep[0]))) + size = sizeof (ep[0]); + if (read(fd, ep + j, sizeof (ep[0])) != size) { + close(fd); return; + } if (ep[j].p_type == PT_LOAD) j++; } for (i = 0; i < 2; i++) { p = PTOV(ep[i].p_paddr & 0xffffff); - off = ep[i].p_offset; - if (xfsread(&dn, &off, p, ep[i].p_filesz)) + lseek(fd, ep[i].p_offset, SEEK_SET); + size = ep[i].p_filesz; + if (read(fd, p, ep[i].p_filesz) != size) { + close(fd); return; + } } p += roundup2(ep[1].p_memsz, PAGE_SIZE); bootinfo.bi_symtab = VTOP(p); if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) { - off = hdr.eh.e_shoff + sizeof(es[0]) * - (hdr.eh.e_shstrndx + 1); - if (xfsread(&dn, &off, &es, sizeof(es))) + lseek(fd, hdr.eh.e_shoff + + sizeof (es[0]) * (hdr.eh.e_shstrndx + 1), + SEEK_SET); + size = sizeof(es); + if (read(fd, &es, sizeof (es)) != size) { + close(fd); return; + } for (i = 0; i < 2; i++) { memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size)); p += sizeof(es[i].sh_size); - off = es[i].sh_offset; - if (xfsread(&dn, &off, p, es[i].sh_size)) + lseek(fd, es[i].sh_offset, SEEK_SET); + size = es[i].sh_size; + if (read(fd, p, es[i].sh_size) != size) { + close(fd); return; + } p += es[i].sh_size; } } addr = hdr.eh.e_entry & 0xffffff; } + close(fd); + bootinfo.bi_esymtab = VTOP(p); bootinfo.bi_kernelname = VTOP(kname); - zfsargs.size = sizeof(zfsargs); - zfsargs.pool = zfsmount.spa->spa_guid; - zfsargs.root = zfsmount.rootobj; - zfsargs.primary_pool = primary_spa->spa_guid; #ifdef LOADER_GELI_SUPPORT explicit_bzero(gelipw, sizeof(gelipw)); - export_geli_boot_data(&zfsargs.gelidata); #endif - if (primary_vdev != NULL) - zfsargs.primary_vdev = primary_vdev->v_guid; - else - printf("failed to detect primary vdev\n"); - /* - * Note that the zfsargs struct is passed by value, not by pointer. - * Code in btxldr.S copies the values from the entry stack to a fixed - * location within loader(8) at startup due to the presence of - * KARGS_FLAGS_EXTARG. - */ - __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK), - bootdev, - KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG, - (uint32_t)spa->spa_guid, - (uint32_t)(spa->spa_guid >> 32), - VTOP(&bootinfo), - zfsargs); + + if (bdev->dd.d_dev->dv_type == DEVT_ZFS) { + zfsargs.size = sizeof(zfsargs); + zfsargs.pool = bdev->d_kind.zfs.pool_guid; + zfsargs.root = bdev->d_kind.zfs.root_guid; +#ifdef LOADER_GELI_SUPPORT + export_geli_boot_data(&zfsargs.gelidata); +#endif + /* + * Note that the zfsargs struct is passed by value, not by + * pointer. Code in btxldr.S copies the values from the entry + * stack to a fixed location within loader(8) at startup due + * to the presence of KARGS_FLAGS_EXTARG. + */ + __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK), + bootdev, + KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG, + (uint32_t)bdev->d_kind.zfs.pool_guid, + (uint32_t)(bdev->d_kind.zfs.pool_guid >> 32), + VTOP(&bootinfo), + zfsargs); + } else { +#ifdef LOADER_GELI_SUPPORT + geliargs.size = sizeof(geliargs); + export_geli_boot_data(&geliargs.gelidata); +#endif + + /* + * Note that the geliargs struct is passed by value, not by + * pointer. Code in btxldr.S copies the values from the entry + * stack to a fixed location within loader(8) at startup due + * to the presence of the KARGS_FLAGS_EXTARG flag. + */ + __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK), + bootdev, +#ifdef LOADER_GELI_SUPPORT + KARGS_FLAGS_GELI | KARGS_FLAGS_EXTARG, 0, 0, + VTOP(&bootinfo), geliargs +#else + 0, 0, 0, VTOP(&bootinfo) +#endif + ); + } } static int -zfs_mount_ds(char *dsname) +mount_root(char *arg) { - uint64_t newroot; - spa_t *newspa; - char *q; + char *root; + struct i386_devdesc *ddesc; + uint8_t part; - q = strchr(dsname, '/'); - if (q) - *q++ = '\0'; - newspa = spa_find_by_name(dsname); - if (newspa == NULL) { - printf("\nCan't find ZFS pool %s\n", dsname); - return (-1); + if (asprintf(&root, "%s:", arg) < 0) + return (1); + + if (i386_getdev((void **)&ddesc, root, NULL)) { + free(root); + return (1); } - if (zfs_spa_init(newspa)) - return (-1); - - newroot = 0; - if (q) { - if (zfs_lookup_dataset(newspa, q, &newroot)) { - printf("\nCan't find dataset %s in ZFS pool %s\n", - q, newspa->spa_name); - return (-1); - } + /* we should have new device descriptor, free old and replace it. */ + free(bdev); + bdev = ddesc; + if (bdev->dd.d_dev->dv_type == DEVT_DISK) { + if (bdev->d_kind.biosdisk.partition == -1) + part = 0xff; + else + part = bdev->d_kind.biosdisk.partition; + bootdev = MAKEBOOTDEV(dev_maj[bdev->dd.d_dev->dv_type], + bdev->d_kind.biosdisk.slice + 1, + bdev->dd.d_unit, part); + bootinfo.bi_bios_dev = bd_unit2bios(bdev); } - if (zfs_mount(newspa, newroot, &zfsmount)) { - printf("\nCan't mount ZFS dataset\n"); - return (-1); - } - spa = newspa; + strncpy(boot_devname, root, sizeof (boot_devname)); + setenv("currdev", root, 1); + free(root); return (0); } +static void +fs_list(char *arg) +{ + int fd; + struct dirent *d; + char line[80]; + + fd = open(arg, O_RDONLY); + if (fd < 0) + return; + pager_open(); + while ((d = readdirfd(fd)) != NULL) { + sprintf(line, "%s\n", d->d_name); + if (pager_output(line)) + break; + } + pager_close(); + close(fd); +} + static int parse_cmd(void) { char *arg = cmd; char *ep, *p, *q; const char *cp; + char line[80]; int c, i, j; while ((c = *arg++)) { @@ -1173,13 +594,15 @@ ioctrl &= ~IO_SERIAL; } } if (c == '?') { - dnode_phys_t dn; - - if (zfs_lookup(&zfsmount, arg, &dn) == 0) { - zap_list(spa, &dn); - } + printf("\n"); + if (*arg == '\0') + arg = (char *)"/"; + fs_list(arg); + zfs_list(arg); return (-1); } else { + char *ptr; + printf("\n"); arg--; /* @@ -1187,24 +610,39 @@ * hope no-one wants to load /status as a kernel. */ if (strcmp(arg, "status") == 0) { - spa_all_status(); + pager_open(); + for (i = 0; devsw[i] != NULL; i++) { + if (devsw[i]->dv_print != NULL) { + if (devsw[i]->dv_print(1)) + break; + } else { + snprintf(line, sizeof(line), + "%s: (unknown)\n", + devsw[i]->dv_name); + if (pager_output(line)) + break; + } + } + pager_close(); return (-1); } /* * If there is "zfs:" prefix simply ignore it. */ - if (strncmp(arg, "zfs:", 4) == 0) - arg += 4; + ptr = arg; + if (strncmp(ptr, "zfs:", 4) == 0) + ptr += 4; /* * If there is a colon, switch pools. */ - q = strchr(arg, ':'); + q = strchr(ptr, ':'); if (q) { *q++ = '\0'; - if (zfs_mount_ds(arg) != 0) + if (mount_root(arg) != 0) { return (-1); + } arg = q; } if ((i = ep - arg)) { @@ -1216,4 +654,44 @@ arg = p; } return (0); +} + +/* + * Probe all disks to discover ZFS pools. The idea is to walk all possible + * disk devices, however, we also need to identify possible boot pool. + * For boot pool detection we have boot disk passed us from BIOS, recorded + * in bootinfo.bi_bios_dev. + */ +static void +i386_zfs_probe(void) +{ + char devname[32]; + int boot_unit; + struct i386_devdesc dev; + uint64_t pool_guid = 0; + + dev.dd.d_dev = &bioshd; + /* Translate bios dev to our unit number. */ + boot_unit = bd_bios2unit(bootinfo.bi_bios_dev); + + /* + * Open all the disks we can find and see if we can reconstruct + * ZFS pools from them. + */ + for (dev.dd.d_unit = 0; bd_unit2bios(&dev) >= 0; dev.dd.d_unit++) { + snprintf(devname, sizeof (devname), "%s%d:", bioshd.dv_name, + dev.dd.d_unit); + /* If this is not boot disk, use generic probe. */ + if (dev.dd.d_unit != boot_unit) + zfs_probe_dev(devname, NULL); + else + zfs_probe_dev(devname, &pool_guid); + + if (pool_guid != 0 && bdev == NULL) { + bdev = malloc(sizeof (struct i386_devdesc)); + bzero(bdev, sizeof (struct i386_devdesc)); + bdev->dd.d_dev = &zfs_dev; + bdev->d_kind.zfs.pool_guid = pool_guid; + } + } } Index: head/stand/libsa/zfs/Makefile.inc =================================================================== --- head/stand/libsa/zfs/Makefile.inc +++ head/stand/libsa/zfs/Makefile.inc @@ -1,7 +1,7 @@ # $FreeBSD$ .PATH: ${ZFSSRC} -SRCS+= zfs.c skein.c skein_block.c list.c +SRCS+= zfs.c nvlist.c skein.c skein_block.c list.c # Do not unroll skein loops, reduce code size CFLAGS+= -DSKEIN_LOOP=111 .PATH: ${SYSDIR}/crypto/skein Index: head/stand/libsa/zfs/libzfs.h =================================================================== --- head/stand/libsa/zfs/libzfs.h +++ head/stand/libsa/zfs/libzfs.h @@ -26,6 +26,12 @@ * $FreeBSD$ */ +#include + +#ifdef LOADER_GELI_SUPPORT +#include +#endif + #ifndef _BOOT_LIBZFS_H_ #define _BOOT_LIBZFS_H_ @@ -40,13 +46,80 @@ uint64_t root_guid; }; -#ifdef LOADER_GELI_SUPPORT -#include -#endif +/* nvp implementation version */ +#define NV_VERSION 0 +/* nvlist persistent unique name flags, stored in nvl_nvflags */ +#define NV_UNIQUE_NAME 0x1 +#define NV_UNIQUE_NAME_TYPE 0x2 + +#define NV_ALIGN4(x) (((x) + 3) & ~3) + +/* + * nvlist header. + * nvlist has 4 bytes header followed by version and flags, then nvpairs + * and the list is terminated by double zero. + */ +typedef struct { + char nvh_encoding; + char nvh_endian; + char nvh_reserved1; + char nvh_reserved2; +} nvs_header_t; + +typedef struct { + nvs_header_t nv_header; + size_t nv_asize; + size_t nv_size; + uint8_t *nv_data; + uint8_t *nv_idx; +} nvlist_t; + +/* + * nvpair header. + * nvpair has encoded and decoded size + * name string (size and data) + * data type and number of elements + * data + */ +typedef struct { + unsigned encoded_size; + unsigned decoded_size; +} nvp_header_t; + +/* + * nvlist stream head. + */ +typedef struct { + unsigned nvl_version; + unsigned nvl_nvflag; + nvp_header_t nvl_pair; +} nvs_data_t; + +typedef struct { + unsigned nv_size; + uint8_t nv_data[]; /* NV_ALIGN4(string) */ +} nv_string_t; + +typedef struct { + unsigned nv_type; /* data_type_t */ + unsigned nv_nelem; /* number of elements */ + uint8_t nv_data[]; /* data stream */ +} nv_pair_data_t; + +nvlist_t *nvlist_create(int); +void nvlist_destroy(nvlist_t *); +nvlist_t *nvlist_import(const uint8_t *, char, char); +int nvlist_remove(nvlist_t *, const char *, data_type_t); +void nvlist_print(nvlist_t *, unsigned int); +int nvlist_find(const nvlist_t *, const char *, data_type_t, + int *, void *, int *); +int nvlist_next(nvlist_t *); + int zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path); char *zfs_fmtdev(void *vdev); +int zfs_nextboot(void *vdev, char *buf, size_t size); int zfs_probe_dev(const char *devname, uint64_t *pool_guid); int zfs_list(const char *name); uint64_t ldi_get_size(void *); Index: head/stand/libsa/zfs/nvlist.c =================================================================== --- head/stand/libsa/zfs/nvlist.c +++ head/stand/libsa/zfs/nvlist.c @@ -0,0 +1,601 @@ +/*- + * Copyright 2020 Toomas Soome + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include "libzfs.h" + +typedef struct xdr { + int (*xdr_getint)(const struct xdr *, const void *, int *); +} xdr_t; + +static int xdr_int(const xdr_t *, const void *, int *); +static int mem_int(const xdr_t *, const void *, int *); +static void nvlist_decode_nvlist(const xdr_t *, nvlist_t *); +static int nvlist_size(const xdr_t *, const uint8_t *); + +/* + * transform data from network to host. + */ +xdr_t ntoh = { + .xdr_getint = xdr_int +}; + +/* + * transform data from host to host. + */ +xdr_t native = { + .xdr_getint = mem_int +}; + +/* + * transform data from host to network. + */ +xdr_t hton = { + .xdr_getint = xdr_int +}; + +static int +xdr_short(const xdr_t *xdr, const uint8_t *buf, short *ip) +{ + int i, rv; + + rv = xdr->xdr_getint(xdr, buf, &i); + *ip = i; + return (rv); +} + +static int +xdr_u_short(const xdr_t *xdr, const uint8_t *buf, unsigned short *ip) +{ + unsigned u; + int rv; + + rv = xdr->xdr_getint(xdr, buf, &u); + *ip = u; + return (rv); +} + +static int +xdr_int(const xdr_t *xdr __unused, const void *buf, int *ip) +{ + *ip = be32dec(buf); + return (sizeof(int)); +} + +static int +xdr_u_int(const xdr_t *xdr __unused, const void *buf, unsigned *ip) +{ + *ip = be32dec(buf); + return (sizeof(unsigned)); +} + +static int +xdr_string(const xdr_t *xdr, const void *buf, nv_string_t *s) +{ + int size; + + size = xdr->xdr_getint(xdr, buf, &s->nv_size); + size = NV_ALIGN4(size + s->nv_size); + return (size); +} + +static int +xdr_int64(const xdr_t *xdr, const uint8_t *buf, int64_t *lp) +{ + int hi, rv; + unsigned lo; + + rv = xdr->xdr_getint(xdr, buf, &hi); + rv += xdr->xdr_getint(xdr, buf + rv, &lo); + *lp = (((int64_t)hi) << 32) | lo; + return (rv); +} + +static int +xdr_uint64(const xdr_t *xdr, const uint8_t *buf, uint64_t *lp) +{ + unsigned hi, lo; + int rv; + + rv = xdr->xdr_getint(xdr, buf, &hi); + rv += xdr->xdr_getint(xdr, buf + rv, &lo); + *lp = (((int64_t)hi) << 32) | lo; + return (rv); +} + +static int +xdr_char(const xdr_t *xdr, const uint8_t *buf, char *cp) +{ + int i, rv; + + rv = xdr->xdr_getint(xdr, buf, &i); + *cp = i; + return (rv); +} + +/* + * read native data. + */ +static int +mem_int(const xdr_t *xdr, const void *buf, int *i) +{ + *i = *(int *)buf; + return (sizeof(int)); +} + +void +nvlist_destroy(nvlist_t *nvl) +{ + if (nvl != NULL) { + /* Free data if it was allocated by us. */ + if (nvl->nv_asize > 0) + free(nvl->nv_data); + } + free(nvl); +} + +char * +nvstring_get(nv_string_t *nvs) +{ + char *s; + + s = malloc(nvs->nv_size + 1); + if (s != NULL) { + bcopy(nvs->nv_data, s, nvs->nv_size); + s[nvs->nv_size] = '\0'; + } + return (s); +} + +/* + * Create empty nvlist. + * The nvlist is terminated by 2x zeros (8 bytes). + */ +nvlist_t * +nvlist_create(int flag) +{ + nvlist_t *nvl; + nvs_data_t *nvs; + + nvl = calloc(1, sizeof(*nvl)); + if (nvl == NULL) + return (nvl); + + nvl->nv_header.nvh_encoding = NV_ENCODE_XDR; + nvl->nv_header.nvh_endian = _BYTE_ORDER == _LITTLE_ENDIAN; + + nvl->nv_asize = nvl->nv_size = sizeof(*nvs); + nvs = calloc(1, nvl->nv_asize); + if (nvs == NULL) { + free(nvl); + return (NULL); + } + /* data in nvlist is byte stream */ + nvl->nv_data = (uint8_t *)nvs; + + nvs->nvl_version = NV_VERSION; + nvs->nvl_nvflag = flag; + return (nvl); +} + +static void +nvlist_nvp_decode(const xdr_t *xdr, nvlist_t *nvl, nvp_header_t *nvph) +{ + nv_string_t *nv_string; + nv_pair_data_t *nvp_data; + nvlist_t nvlist; + + nv_string = (nv_string_t *)nvl->nv_idx; + nvl->nv_idx += xdr_string(xdr, &nv_string->nv_size, nv_string); + nvp_data = (nv_pair_data_t *)nvl->nv_idx; + + nvl->nv_idx += xdr_u_int(xdr, &nvp_data->nv_type, &nvp_data->nv_type); + nvl->nv_idx += xdr_u_int(xdr, &nvp_data->nv_nelem, &nvp_data->nv_nelem); + + switch (nvp_data->nv_type) { + case DATA_TYPE_NVLIST: + case DATA_TYPE_NVLIST_ARRAY: + bzero(&nvlist, sizeof (nvlist)); + nvlist.nv_data = &nvp_data->nv_data[0]; + nvlist.nv_idx = nvlist.nv_data; + for (int i = 0; i < nvp_data->nv_nelem; i++) { + nvlist.nv_asize = + nvlist_size(xdr, nvlist.nv_data); + nvlist_decode_nvlist(xdr, &nvlist); + nvl->nv_idx = nvlist.nv_idx; + nvlist.nv_data = nvlist.nv_idx; + } + break; + + case DATA_TYPE_BOOLEAN: + /* BOOLEAN does not take value space */ + break; + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + nvl->nv_idx += xdr_char(xdr, &nvp_data->nv_data[0], + (char *)&nvp_data->nv_data[0]); + break; + + case DATA_TYPE_INT16: + nvl->nv_idx += xdr_short(xdr, &nvp_data->nv_data[0], + (short *)&nvp_data->nv_data[0]); + break; + + case DATA_TYPE_UINT16: + nvl->nv_idx += xdr_u_short(xdr, &nvp_data->nv_data[0], + (unsigned short *)&nvp_data->nv_data[0]); + break; + + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_INT32: + nvl->nv_idx += xdr_int(xdr, &nvp_data->nv_data[0], + (int *)&nvp_data->nv_data[0]); + break; + + case DATA_TYPE_UINT32: + nvl->nv_idx += xdr_u_int(xdr, &nvp_data->nv_data[0], + (unsigned *)&nvp_data->nv_data[0]); + break; + + case DATA_TYPE_INT64: + nvl->nv_idx += xdr_int64(xdr, &nvp_data->nv_data[0], + (int64_t *)&nvp_data->nv_data[0]); + break; + + case DATA_TYPE_UINT64: + nvl->nv_idx += xdr_uint64(xdr, &nvp_data->nv_data[0], + (uint64_t *)&nvp_data->nv_data[0]); + break; + + case DATA_TYPE_STRING: + nv_string = (nv_string_t *)&nvp_data->nv_data[0]; + nvl->nv_idx += xdr_string(xdr, &nvp_data->nv_data[0], + nv_string); + + break; + } +} + +static void +nvlist_decode_nvlist(const xdr_t *xdr, nvlist_t *nvl) +{ + nvp_header_t *nvph; + nvs_data_t *nvs = (nvs_data_t *)nvl->nv_data; + + nvl->nv_idx = nvl->nv_data; + nvl->nv_idx += xdr->xdr_getint(xdr, (const uint8_t *)&nvs->nvl_version, + &nvs->nvl_version); + nvl->nv_idx += xdr->xdr_getint(xdr, (const uint8_t *)&nvs->nvl_nvflag, + &nvs->nvl_nvflag); + + nvph = &nvs->nvl_pair; + nvl->nv_idx += xdr->xdr_getint(xdr, + (const uint8_t *)&nvph->encoded_size, &nvph->encoded_size); + nvl->nv_idx += xdr->xdr_getint(xdr, + (const uint8_t *)&nvph->decoded_size, &nvph->decoded_size); + + while (nvph->encoded_size && nvph->decoded_size) { + nvlist_nvp_decode(xdr, nvl, nvph); + + nvph = (nvp_header_t *)(nvl->nv_idx); + nvl->nv_idx += xdr->xdr_getint(xdr, &nvph->encoded_size, + &nvph->encoded_size); + nvl->nv_idx += xdr->xdr_getint(xdr, &nvph->decoded_size, + &nvph->decoded_size); + } +} + +static int +nvlist_size(const xdr_t *xdr, const uint8_t *stream) +{ + const uint8_t *p, *pair; + unsigned encoded_size, decoded_size; + + p = stream; + p += 2 * sizeof(unsigned); + + pair = p; + p += xdr->xdr_getint(xdr, p, &encoded_size); + p += xdr->xdr_getint(xdr, p, &decoded_size); + while (encoded_size && decoded_size) { + p = pair + encoded_size; + pair = p; + p += xdr->xdr_getint(xdr, p, &encoded_size); + p += xdr->xdr_getint(xdr, p, &decoded_size); + } + return (p - stream); +} + +/* + * Import nvlist from byte stream. + * Determine the stream size and allocate private copy. + * Then translate the data. + */ +nvlist_t * +nvlist_import(const uint8_t *stream, char encoding, char endian) +{ + nvlist_t *nvl; + + if (encoding != NV_ENCODE_XDR) + return (NULL); + + nvl = malloc(sizeof(*nvl)); + if (nvl == NULL) + return (nvl); + + nvl->nv_asize = nvl->nv_size = nvlist_size(&ntoh, stream); + nvl->nv_data = malloc(nvl->nv_asize); + if (nvl->nv_data == NULL) { + free(nvl); + return (NULL); + } + nvl->nv_idx = nvl->nv_data; + bcopy(stream, nvl->nv_data, nvl->nv_asize); + + nvlist_decode_nvlist(&ntoh, nvl); + nvl->nv_idx = nvl->nv_data; + return (nvl); +} + +/* + * remove pair from this nvlist. + */ +int +nvlist_remove(nvlist_t *nvl, const char *name, data_type_t type) +{ + uint8_t *head, *tail; + nvs_data_t *data; + nvp_header_t *nvp; + nv_string_t *nvp_name; + nv_pair_data_t *nvp_data; + size_t size; + + if (nvl == NULL || nvl->nv_data == NULL || name == NULL) + return (EINVAL); + + head = nvl->nv_data; + data = (nvs_data_t *)head; + nvp = &data->nvl_pair; /* first pair in nvlist */ + head = (uint8_t *)nvp; + + while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { + nvp_name = (nv_string_t *)(head + sizeof(*nvp)); + + nvp_data = (nv_pair_data_t *) + NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + + nvp_name->nv_size); + + if (memcmp(nvp_name->nv_data, name, nvp_name->nv_size) == 0 && + nvp_data->nv_type == type) { + /* + * set tail to point to next nvpair and size + * is the length of the tail. + */ + tail = head + nvp->encoded_size; + size = nvl->nv_data + nvl->nv_size - tail; + + /* adjust the size of the nvlist. */ + nvl->nv_size -= nvp->encoded_size; + bcopy(tail, head, size); + return (0); + } + /* Not our pair, skip to next. */ + head = head + nvp->encoded_size; + nvp = (nvp_header_t *)head; + } + return (ENOENT); +} + +int +nvlist_find(const nvlist_t *nvl, const char *name, data_type_t type, + int *elementsp, void *valuep, int *sizep) +{ + nvs_data_t *data; + nvp_header_t *nvp; + nv_string_t *nvp_name; + nv_pair_data_t *nvp_data; + nvlist_t *nvlist; + + if (nvl == NULL || nvl->nv_data == NULL || name == NULL) + return (EINVAL); + + data = (nvs_data_t *)nvl->nv_data; + nvp = &data->nvl_pair; /* first pair in nvlist */ + + while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { + nvp_name = (nv_string_t *)((uint8_t *)nvp + sizeof(*nvp)); + + nvp_data = (nv_pair_data_t *) + NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + + nvp_name->nv_size); + + if (memcmp(nvp_name->nv_data, name, nvp_name->nv_size) == 0 && + nvp_data->nv_type == type) { + if (elementsp != NULL) + *elementsp = nvp_data->nv_nelem; + switch (nvp_data->nv_type) { + case DATA_TYPE_UINT64: + *(uint64_t *)valuep = + *(uint64_t *)nvp_data->nv_data; + return (0); + case DATA_TYPE_STRING: + nvp_name = (nv_string_t *)nvp_data->nv_data; + if (sizep != NULL) { + *sizep = nvp_name->nv_size; + } + *(const uint8_t **)valuep = + &nvp_name->nv_data[0]; + return (0); + case DATA_TYPE_NVLIST: + case DATA_TYPE_NVLIST_ARRAY: + nvlist = malloc(sizeof(*nvlist)); + if (nvlist != NULL) { + nvlist->nv_header = nvl->nv_header; + nvlist->nv_asize = 0; + nvlist->nv_size = 0; + nvlist->nv_idx = NULL; + nvlist->nv_data = &nvp_data->nv_data[0]; + *(nvlist_t **)valuep = nvlist; + return (0); + } + return (ENOMEM); + } + return (EIO); + } + /* Not our pair, skip to next. */ + nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size); + } + return (ENOENT); +} + +/* + * Return the next nvlist in an nvlist array. + */ +int +nvlist_next(nvlist_t *nvl) +{ + nvs_data_t *data; + nvp_header_t *nvp; + + if (nvl == NULL || nvl->nv_data == NULL || nvl->nv_asize != 0) + return (EINVAL); + + data = (nvs_data_t *)nvl->nv_data; + nvp = &data->nvl_pair; /* first pair in nvlist */ + + while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { + nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size); + } + nvl->nv_data = (uint8_t *)nvp + sizeof(*nvp); + return (0); +} + +void +nvlist_print(nvlist_t *nvl, unsigned int indent) +{ + static const char *typenames[] = { + "DATA_TYPE_UNKNOWN", + "DATA_TYPE_BOOLEAN", + "DATA_TYPE_BYTE", + "DATA_TYPE_INT16", + "DATA_TYPE_UINT16", + "DATA_TYPE_INT32", + "DATA_TYPE_UINT32", + "DATA_TYPE_INT64", + "DATA_TYPE_UINT64", + "DATA_TYPE_STRING", + "DATA_TYPE_BYTE_ARRAY", + "DATA_TYPE_INT16_ARRAY", + "DATA_TYPE_UINT16_ARRAY", + "DATA_TYPE_INT32_ARRAY", + "DATA_TYPE_UINT32_ARRAY", + "DATA_TYPE_INT64_ARRAY", + "DATA_TYPE_UINT64_ARRAY", + "DATA_TYPE_STRING_ARRAY", + "DATA_TYPE_HRTIME", + "DATA_TYPE_NVLIST", + "DATA_TYPE_NVLIST_ARRAY", + "DATA_TYPE_BOOLEAN_VALUE", + "DATA_TYPE_INT8", + "DATA_TYPE_UINT8", + "DATA_TYPE_BOOLEAN_ARRAY", + "DATA_TYPE_INT8_ARRAY", + "DATA_TYPE_UINT8_ARRAY" + }; + nvs_data_t *data; + nvp_header_t *nvp; + nv_string_t *nvp_name; + nv_pair_data_t *nvp_data; + nvlist_t nvlist; + int i, j; + + data = (nvs_data_t *)nvl->nv_data; + nvp = &data->nvl_pair; /* first pair in nvlist */ + while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { + nvp_name = (nv_string_t *)((uintptr_t)nvp + sizeof(*nvp)); + nvp_data = (nv_pair_data_t *) + NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + + nvp_name->nv_size); + + for (int i = 0; i < indent; i++) + printf(" "); + + printf("%s [%d] %.*s", typenames[nvp_data->nv_type], + nvp_data->nv_nelem, nvp_name->nv_size, nvp_name->nv_data); + + switch (nvp_data->nv_type) { + case DATA_TYPE_UINT64: { + uint64_t val; + + val = *(uint64_t *)nvp_data->nv_data; + printf(" = 0x%jx\n", (uintmax_t)val); + break; + } + + case DATA_TYPE_STRING: { + nvp_name = (nv_string_t *)&nvp_data->nv_data[0]; + printf(" = \"%.*s\"\n", nvp_name->nv_size, + nvp_name->nv_data ); + break; + } + + case DATA_TYPE_NVLIST: + printf("\n"); + nvlist.nv_data = &nvp_data->nv_data[0]; + nvlist_print(&nvlist, indent + 2); + break; + + case DATA_TYPE_NVLIST_ARRAY: + nvlist.nv_data = &nvp_data->nv_data[0]; + for (j = 0; j < nvp_data->nv_nelem; j++) { + data = (nvs_data_t *)nvlist.nv_data; + printf("[%d]\n", j); + nvlist_print(&nvlist, indent + 2); + if (j != nvp_data->nv_nelem - 1) { + for (i = 0; i < indent; i++) + printf(" "); + printf("%s %.*s", + typenames[nvp_data->nv_type], + nvp_name->nv_size, + nvp_name->nv_data); + } + nvlist.nv_data = (uint8_t *)data + + nvlist_size(&native, nvlist.nv_data); + } + break; + + default: + printf("\n"); + } + nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size); + } + printf("%*s\n", indent + 13, "End of nvlist"); +} Index: head/stand/libsa/zfs/zfs.c =================================================================== --- head/stand/libsa/zfs/zfs.c +++ head/stand/libsa/zfs/zfs.c @@ -483,6 +483,215 @@ } static int +vdev_write(vdev_t *vdev __unused, void *priv, off_t offset, void *buf, + size_t bytes) +{ + int fd, ret; + size_t head, tail, total_size, full_sec_size; + unsigned secsz, do_tail_write; + off_t start_sec; + ssize_t res; + char *outbuf, *bouncebuf; + + fd = (uintptr_t)priv; + outbuf = (char *) buf; + bouncebuf = NULL; + + ret = ioctl(fd, DIOCGSECTORSIZE, &secsz); + if (ret != 0) + return (ret); + + start_sec = offset / secsz; + head = offset % secsz; + total_size = roundup2(head + bytes, secsz); + tail = total_size - (head + bytes); + do_tail_write = ((tail > 0) && (head + bytes > secsz)); + full_sec_size = total_size; + if (head > 0) + full_sec_size -= secsz; + if (do_tail_write) + full_sec_size -= secsz; + + /* Partial sector write requires a bounce buffer. */ + if ((head > 0) || do_tail_write || bytes < secsz) { + bouncebuf = malloc(secsz); + if (bouncebuf == NULL) { + printf("vdev_write: out of memory\n"); + return (ENOMEM); + } + } + + if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) { + ret = errno; + goto error; + } + + /* Partial data for first sector */ + if (head > 0) { + res = read(fd, bouncebuf, secsz); + if (res != secsz) { + ret = EIO; + goto error; + } + memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes)); + (void) lseek(fd, -secsz, SEEK_CUR); + res = write(fd, bouncebuf, secsz); + if (res != secsz) { + ret = EIO; + goto error; + } + outbuf += min(secsz - head, bytes); + } + + /* + * Full data write to sectors. + * Note, there is still corner case where we write + * to sector boundary, but less than sector size, e.g. write 512B + * to 4k sector. + */ + if (full_sec_size > 0) { + if (bytes < full_sec_size) { + res = read(fd, bouncebuf, secsz); + if (res != secsz) { + ret = EIO; + goto error; + } + memcpy(bouncebuf, outbuf, bytes); + (void) lseek(fd, -secsz, SEEK_CUR); + res = write(fd, bouncebuf, secsz); + if (res != secsz) { + ret = EIO; + goto error; + } + } else { + res = write(fd, outbuf, full_sec_size); + if (res != full_sec_size) { + ret = EIO; + goto error; + } + outbuf += full_sec_size; + } + } + + /* Partial data write to last sector */ + if (do_tail_write) { + res = read(fd, bouncebuf, secsz); + if (res != secsz) { + ret = EIO; + goto error; + } + memcpy(bouncebuf, outbuf, secsz - tail); + (void) lseek(fd, -secsz, SEEK_CUR); + res = write(fd, bouncebuf, secsz); + if (res != secsz) { + ret = EIO; + goto error; + } + } + + ret = 0; +error: + free(bouncebuf); + return (ret); +} + +static void +vdev_clear_pad2(vdev_t *vdev) +{ + vdev_t *kid; + vdev_boot_envblock_t *be; + off_t off = offsetof(vdev_label_t, vl_be); + zio_checksum_info_t *ci; + zio_cksum_t cksum; + + STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { + if (kid->v_state != VDEV_STATE_HEALTHY) + continue; + vdev_clear_pad2(kid); + } + + if (!STAILQ_EMPTY(&vdev->v_children)) + return; + + be = calloc(1, sizeof (*be)); + if (be == NULL) { + printf("failed to clear be area: out of memory\n"); + return; + } + + ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; + be->vbe_zbt.zec_magic = ZEC_MAGIC; + zio_checksum_label_verifier(&be->vbe_zbt.zec_cksum, off); + ci->ci_func[0](be, sizeof (*be), NULL, &cksum); + be->vbe_zbt.zec_cksum = cksum; + + if (vdev_write(vdev, vdev->v_read_priv, off, be, VDEV_PAD_SIZE)) { + printf("failed to clear be area of primary vdev: %d\n", + errno); + } + free(be); +} + +/* + * Read the next boot command from pad2. + * If any instance of pad2 is set to empty string, or the returned string + * values are not the same, we consider next boot not to be set. + */ +static char * +vdev_read_pad2(vdev_t *vdev) +{ + vdev_t *kid; + char *tmp, *result = NULL; + vdev_boot_envblock_t *be; + off_t off = offsetof(vdev_label_t, vl_be); + + STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { + if (kid->v_state != VDEV_STATE_HEALTHY) + continue; + tmp = vdev_read_pad2(kid); + if (tmp == NULL) + continue; + + /* The next boot is not set, we are done. */ + if (*tmp == '\0') { + free(result); + return (tmp); + } + if (result == NULL) { + result = tmp; + continue; + } + /* Are the next boot strings different? */ + if (strcmp(result, tmp) != 0) { + free(tmp); + *result = '\0'; + break; + } + free(tmp); + } + if (result != NULL) + return (result); + + be = malloc(sizeof (*be)); + if (be == NULL) + return (NULL); + + if (vdev_read(vdev, vdev->v_read_priv, off, be, sizeof (*be))) { + return (NULL); + } + + switch (be->vbe_version) { + case VB_RAW: + case VB_NVLIST: + result = strdup(be->vbe_bootenv); + default: + /* Backward compatibility with initial nextboot feaure. */ + result = strdup((char *)be); + } + return (result); +} + +static int zfs_dev_init(void) { spa_t *spa; @@ -558,7 +767,7 @@ strncpy(devname, ppa->devname, strlen(ppa->devname) - 1); devname[strlen(ppa->devname) - 1] = '\0'; sprintf(devname, "%s%s:", devname, partname); - pa.fd = open(devname, O_RDONLY); + pa.fd = open(devname, O_RDWR); if (pa.fd == -1) return (0); ret = zfs_probe(pa.fd, ppa->pool_guid); @@ -581,6 +790,57 @@ } int +zfs_nextboot(void *vdev, char *buf, size_t size) +{ + struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; + spa_t *spa; + vdev_t *vd; + char *result = NULL; + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (1); + + if (dev->pool_guid == 0) + spa = STAILQ_FIRST(&zfs_pools); + else + spa = spa_find_by_guid(dev->pool_guid); + + if (spa == NULL) { + printf("ZFS: can't find pool by guid\n"); + return (1); + } + + STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { + char *tmp = vdev_read_pad2(vd); + + /* Continue on error. */ + if (tmp == NULL) + continue; + /* Nextboot is not set. */ + if (*tmp == '\0') { + free(result); + free(tmp); + return (1); + } + if (result == NULL) { + result = tmp; + continue; + } + free(tmp); + } + if (result == NULL) + return (1); + + STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { + vdev_clear_pad2(vd); + } + + strlcpy(buf, result, size); + free(result); + return (0); +} + +int zfs_probe_dev(const char *devname, uint64_t *pool_guid) { struct disk_devdesc *dev; @@ -591,7 +851,7 @@ if (pool_guid) *pool_guid = 0; - pa.fd = open(devname, O_RDONLY); + pa.fd = open(devname, O_RDWR); if (pa.fd == -1) return (ENXIO); /* Index: head/stand/libsa/zfs/zfsimpl.c =================================================================== --- head/stand/libsa/zfs/zfsimpl.c +++ head/stand/libsa/zfs/zfsimpl.c @@ -170,284 +170,48 @@ } static int -xdr_int(const unsigned char **xdr, int *ip) +nvlist_check_features_for_read(nvlist_t *nvl) { - *ip = be32dec(*xdr); - (*xdr) += 4; - return (0); -} - -static int -xdr_u_int(const unsigned char **xdr, u_int *ip) -{ - *ip = be32dec(*xdr); - (*xdr) += 4; - return (0); -} - -static int -xdr_uint64_t(const unsigned char **xdr, uint64_t *lp) -{ - u_int hi, lo; - - xdr_u_int(xdr, &hi); - xdr_u_int(xdr, &lo); - *lp = (((uint64_t)hi) << 32) | lo; - return (0); -} - -static int -nvlist_find(const unsigned char *nvlist, const char *name, int type, - int *elementsp, void *valuep, int *sizep) -{ - const unsigned char *p, *pair; - int junk; - int encoded_size, decoded_size; - - p = nvlist; - xdr_int(&p, &junk); - xdr_int(&p, &junk); - - pair = p; - xdr_int(&p, &encoded_size); - xdr_int(&p, &decoded_size); - while (encoded_size && decoded_size) { - int namelen, pairtype, elements; - const char *pairname; - - xdr_int(&p, &namelen); - pairname = (const char *)p; - p += roundup(namelen, 4); - xdr_int(&p, &pairtype); - - if (memcmp(name, pairname, namelen) == 0 && type == pairtype) { - xdr_int(&p, &elements); - if (elementsp) - *elementsp = elements; - if (type == DATA_TYPE_UINT64) { - xdr_uint64_t(&p, (uint64_t *)valuep); - return (0); - } else if (type == DATA_TYPE_STRING) { - int len; - xdr_int(&p, &len); - if (sizep != NULL) - *sizep = len; - (*(const char **)valuep) = (const char *)p; - return (0); - } else if (type == DATA_TYPE_NVLIST || - type == DATA_TYPE_NVLIST_ARRAY) { - (*(const unsigned char **)valuep) = - (const unsigned char *)p; - return (0); - } else { - return (EIO); - } - } else { - /* - * Not the pair we are looking for, skip to the - * next one. - */ - p = pair + encoded_size; - } - - pair = p; - xdr_int(&p, &encoded_size); - xdr_int(&p, &decoded_size); - } - - return (EIO); -} - -static int -nvlist_check_features_for_read(const unsigned char *nvlist) -{ - const unsigned char *p, *pair; - int junk; - int encoded_size, decoded_size; + nvlist_t *features = NULL; + nvs_data_t *data; + nvp_header_t *nvp; + nv_string_t *nvp_name; int rc; - rc = 0; + rc = nvlist_find(nvl, ZPOOL_CONFIG_FEATURES_FOR_READ, + DATA_TYPE_NVLIST, NULL, &features, NULL); + if (rc != 0) + return (rc); - p = nvlist; - xdr_int(&p, &junk); - xdr_int(&p, &junk); + data = (nvs_data_t *)features->nv_data; + nvp = &data->nvl_pair; /* first pair in nvlist */ - pair = p; - xdr_int(&p, &encoded_size); - xdr_int(&p, &decoded_size); - while (encoded_size && decoded_size) { - int namelen, pairtype; - const char *pairname; + while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { int i, found; + nvp_name = (nv_string_t *)((uintptr_t)nvp + sizeof(*nvp)); found = 0; - xdr_int(&p, &namelen); - pairname = (const char *)p; - p += roundup(namelen, 4); - xdr_int(&p, &pairtype); - for (i = 0; features_for_read[i] != NULL; i++) { - if (memcmp(pairname, features_for_read[i], - namelen) == 0) { + if (memcmp(nvp_name->nv_data, features_for_read[i], + nvp_name->nv_size) == 0) { found = 1; break; } } if (!found) { - printf("ZFS: unsupported feature: %s\n", pairname); + printf("ZFS: unsupported feature: %.*s\n", + nvp_name->nv_size, nvp_name->nv_data); rc = EIO; } - - p = pair + encoded_size; - - pair = p; - xdr_int(&p, &encoded_size); - xdr_int(&p, &decoded_size); + nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size); } + nvlist_destroy(features); return (rc); } -/* - * Return the next nvlist in an nvlist array. - */ -static const unsigned char * -nvlist_next(const unsigned char *nvlist) -{ - const unsigned char *p, *pair; - int junk; - int encoded_size, decoded_size; - - p = nvlist; - xdr_int(&p, &junk); - xdr_int(&p, &junk); - - pair = p; - xdr_int(&p, &encoded_size); - xdr_int(&p, &decoded_size); - while (encoded_size && decoded_size) { - p = pair + encoded_size; - - pair = p; - xdr_int(&p, &encoded_size); - xdr_int(&p, &decoded_size); - } - - return (p); -} - -#ifdef TEST - -static const unsigned char * -nvlist_print(const unsigned char *nvlist, unsigned int indent) -{ - static const char *typenames[] = { - "DATA_TYPE_UNKNOWN", - "DATA_TYPE_BOOLEAN", - "DATA_TYPE_BYTE", - "DATA_TYPE_INT16", - "DATA_TYPE_UINT16", - "DATA_TYPE_INT32", - "DATA_TYPE_UINT32", - "DATA_TYPE_INT64", - "DATA_TYPE_UINT64", - "DATA_TYPE_STRING", - "DATA_TYPE_BYTE_ARRAY", - "DATA_TYPE_INT16_ARRAY", - "DATA_TYPE_UINT16_ARRAY", - "DATA_TYPE_INT32_ARRAY", - "DATA_TYPE_UINT32_ARRAY", - "DATA_TYPE_INT64_ARRAY", - "DATA_TYPE_UINT64_ARRAY", - "DATA_TYPE_STRING_ARRAY", - "DATA_TYPE_HRTIME", - "DATA_TYPE_NVLIST", - "DATA_TYPE_NVLIST_ARRAY", - "DATA_TYPE_BOOLEAN_VALUE", - "DATA_TYPE_INT8", - "DATA_TYPE_UINT8", - "DATA_TYPE_BOOLEAN_ARRAY", - "DATA_TYPE_INT8_ARRAY", - "DATA_TYPE_UINT8_ARRAY" - }; - - unsigned int i, j; - const unsigned char *p, *pair; - int junk; - int encoded_size, decoded_size; - - p = nvlist; - xdr_int(&p, &junk); - xdr_int(&p, &junk); - - pair = p; - xdr_int(&p, &encoded_size); - xdr_int(&p, &decoded_size); - while (encoded_size && decoded_size) { - int namelen, pairtype, elements; - const char *pairname; - - xdr_int(&p, &namelen); - pairname = (const char *)p; - p += roundup(namelen, 4); - xdr_int(&p, &pairtype); - - for (i = 0; i < indent; i++) - printf(" "); - printf("%s %.*s", typenames[pairtype], namelen, pairname); - - xdr_int(&p, &elements); - switch (pairtype) { - case DATA_TYPE_UINT64: { - uint64_t val; - xdr_uint64_t(&p, &val); - printf(" = 0x%jx\n", (uintmax_t)val); - break; - } - - case DATA_TYPE_STRING: { - int len; - xdr_int(&p, &len); - printf(" = \"%.*s\"\n", len, p); - break; - } - - case DATA_TYPE_NVLIST: - printf("\n"); - nvlist_print(p, indent + 1); - break; - - case DATA_TYPE_NVLIST_ARRAY: - for (j = 0; j < elements; j++) { - printf("[%d]\n", j); - p = nvlist_print(p, indent + 1); - if (j != elements - 1) { - for (i = 0; i < indent; i++) - printf(" "); - printf("%s %.*s", typenames[pairtype], - namelen, pairname); - } - } - break; - - default: - printf("\n"); - } - - p = pair + encoded_size; - - pair = p; - xdr_int(&p, &encoded_size); - xdr_int(&p, &decoded_size); - } - - return (p); -} - -#endif - static int vdev_read_phys(vdev_t *vdev, const blkptr_t *bp, void *buf, off_t offset, size_t size) @@ -1082,7 +846,7 @@ } static void -vdev_set_initial_state(vdev_t *vdev, const unsigned char *nvlist) +vdev_set_initial_state(vdev_t *vdev, const nvlist_t *nvlist) { uint64_t is_offline, is_faulted, is_degraded, is_removed, isnt_present; uint64_t is_log; @@ -1117,7 +881,7 @@ } static int -vdev_init(uint64_t guid, const unsigned char *nvlist, vdev_t **vdevp) +vdev_init(uint64_t guid, const nvlist_t *nvlist, vdev_t **vdevp) { uint64_t id, ashift, asize, nparity; const char *path; @@ -1128,8 +892,8 @@ if (nvlist_find(nvlist, ZPOOL_CONFIG_ID, DATA_TYPE_UINT64, NULL, &id, NULL) || - nvlist_find(nvlist, ZPOOL_CONFIG_TYPE, DATA_TYPE_STRING, - NULL, &type, &len)) { + nvlist_find(nvlist, ZPOOL_CONFIG_TYPE, DATA_TYPE_STRING, NULL, + &type, &len)) { return (ENOENT); } @@ -1306,10 +1070,10 @@ } static int -vdev_from_nvlist(spa_t *spa, uint64_t top_guid, const unsigned char *nvlist) +vdev_from_nvlist(spa_t *spa, uint64_t top_guid, const nvlist_t *nvlist) { vdev_t *top_vdev, *vdev; - const unsigned char *kids; + nvlist_t *kids = NULL; int rc, nkids; /* Get top vdev. */ @@ -1332,8 +1096,10 @@ rc = nvlist_find(kids, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, NULL, &guid, NULL); - if (rc != 0) + if (rc != 0) { + nvlist_destroy(kids); return (rc); + } rc = vdev_init(guid, kids, &vdev); if (rc != 0) return (rc); @@ -1342,7 +1108,7 @@ vdev->v_top = top_vdev; vdev_insert(top_vdev, vdev); - kids = nvlist_next(kids); + rc = nvlist_next(kids); } } else { /* @@ -1351,15 +1117,17 @@ */ rc = 0; } + nvlist_destroy(kids); return (rc); } static int -vdev_init_from_label(spa_t *spa, const unsigned char *nvlist) +vdev_init_from_label(spa_t *spa, const nvlist_t *nvlist) { uint64_t pool_guid, top_guid; - const unsigned char *vdevs; + nvlist_t *vdevs; + int rc; if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, NULL, &pool_guid, NULL) || @@ -1371,7 +1139,9 @@ return (ENOENT); } - return (vdev_from_nvlist(spa, top_guid, vdevs)); + rc = vdev_from_nvlist(spa, top_guid, vdevs); + nvlist_destroy(vdevs); + return (rc); } static void @@ -1420,10 +1190,10 @@ } static int -vdev_update_from_nvlist(uint64_t top_guid, const unsigned char *nvlist) +vdev_update_from_nvlist(uint64_t top_guid, const nvlist_t *nvlist) { vdev_t *vdev; - const unsigned char *kids; + nvlist_t *kids = NULL; int rc, nkids; /* Update top vdev. */ @@ -1447,20 +1217,21 @@ if (vdev != NULL) vdev_set_initial_state(vdev, kids); - kids = nvlist_next(kids); + rc = nvlist_next(kids); } } else { rc = 0; } + nvlist_destroy(kids); return (rc); } static int -vdev_init_from_nvlist(spa_t *spa, const unsigned char *nvlist) +vdev_init_from_nvlist(spa_t *spa, const nvlist_t *nvlist) { uint64_t pool_guid, vdev_children; - const unsigned char *vdevs, *kids; + nvlist_t *vdevs = NULL, *kids = NULL; int rc, nkids; if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, @@ -1474,13 +1245,16 @@ } /* Wrong guid?! */ - if (spa->spa_guid != pool_guid) + if (spa->spa_guid != pool_guid) { + nvlist_destroy(vdevs); return (EINVAL); + } spa->spa_root_vdev->v_nchildren = vdev_children; rc = nvlist_find(vdevs, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY, &nkids, &kids, NULL); + nvlist_destroy(vdevs); /* * MOS config has at least one child for root vdev. @@ -1506,8 +1280,9 @@ rc = vdev_update_from_nvlist(guid, kids); if (rc != 0) break; - kids = nvlist_next(kids); + nvlist_next(kids); } + nvlist_destroy(kids); /* * Re-evaluate top-level vdev state. @@ -1819,26 +1594,20 @@ return (vdev_read_phys(vd, &bp, buf, off, size)); } -static unsigned char * +static nvlist_t * vdev_label_read_config(vdev_t *vd, uint64_t txg) { vdev_phys_t *label; uint64_t best_txg = 0; uint64_t label_txg = 0; uint64_t asize; - unsigned char *nvl; - size_t nvl_size; + nvlist_t *nvl = NULL, *tmp; int error; label = malloc(sizeof (vdev_phys_t)); if (label == NULL) return (NULL); - nvl_size = VDEV_PHYS_SIZE - sizeof (zio_eck_t) - 4; - nvl = malloc(nvl_size); - if (nvl == NULL) - goto done; - for (int l = 0; l < VDEV_LABELS; l++) { const unsigned char *nvlist; @@ -1847,35 +1616,40 @@ sizeof (vdev_phys_t))) continue; - if (label->vp_nvlist[0] != NV_ENCODE_XDR) + nvlist = (const unsigned char *) label->vp_nvlist; + tmp = nvlist_import(nvlist + 4, nvlist[0], nvlist[1]); + if (tmp == NULL) continue; - nvlist = (const unsigned char *) label->vp_nvlist + 4; - error = nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG, + error = nvlist_find(tmp, ZPOOL_CONFIG_POOL_TXG, DATA_TYPE_UINT64, NULL, &label_txg, NULL); if (error != 0 || label_txg == 0) { - memcpy(nvl, nvlist, nvl_size); + nvlist_destroy(nvl); + nvl = tmp; goto done; } if (label_txg <= txg && label_txg > best_txg) { best_txg = label_txg; - memcpy(nvl, nvlist, nvl_size); + nvlist_destroy(nvl); + nvl = tmp; + tmp = NULL; /* * Use asize from pool config. We need this * because we can get bad value from BIOS. */ - if (nvlist_find(nvlist, ZPOOL_CONFIG_ASIZE, + if (nvlist_find(nvl, ZPOOL_CONFIG_ASIZE, DATA_TYPE_UINT64, NULL, &asize, NULL) == 0) { vd->v_psize = asize + VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE; } } + nvlist_destroy(tmp); } if (best_txg == 0) { - free(nvl); + nvlist_destroy(nvl); nvl = NULL; } done: @@ -1914,12 +1688,11 @@ vdev_t vtmp; spa_t *spa; vdev_t *vdev; - unsigned char *nvlist; + nvlist_t *nvl; uint64_t val; uint64_t guid, vdev_children; uint64_t pool_txg, pool_guid; const char *pool_name; - const unsigned char *features; int rc, namelen; /* @@ -1936,54 +1709,53 @@ if (vtmp.v_psize < SPA_MINDEVSIZE) return (EIO); - nvlist = vdev_label_read_config(&vtmp, UINT64_MAX); - if (nvlist == NULL) + nvl = vdev_label_read_config(&vtmp, UINT64_MAX); + if (nvl == NULL) return (EIO); - if (nvlist_find(nvlist, ZPOOL_CONFIG_VERSION, DATA_TYPE_UINT64, + if (nvlist_find(nvl, ZPOOL_CONFIG_VERSION, DATA_TYPE_UINT64, NULL, &val, NULL) != 0) { - free(nvlist); + nvlist_destroy(nvl); return (EIO); } if (!SPA_VERSION_IS_SUPPORTED(val)) { printf("ZFS: unsupported ZFS version %u (should be %u)\n", (unsigned)val, (unsigned)SPA_VERSION); - free(nvlist); + nvlist_destroy(nvl); return (EIO); } /* Check ZFS features for read */ - if (nvlist_find(nvlist, ZPOOL_CONFIG_FEATURES_FOR_READ, - DATA_TYPE_NVLIST, NULL, &features, NULL) == 0 && - nvlist_check_features_for_read(features) != 0) { - free(nvlist); + rc = nvlist_check_features_for_read(nvl); + if (rc != 0) { + nvlist_destroy(nvl); return (EIO); } - if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_STATE, DATA_TYPE_UINT64, + if (nvlist_find(nvl, ZPOOL_CONFIG_POOL_STATE, DATA_TYPE_UINT64, NULL, &val, NULL) != 0) { - free(nvlist); + nvlist_destroy(nvl); return (EIO); } if (val == POOL_STATE_DESTROYED) { /* We don't boot only from destroyed pools. */ - free(nvlist); + nvlist_destroy(nvl); return (EIO); } - if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG, DATA_TYPE_UINT64, + if (nvlist_find(nvl, ZPOOL_CONFIG_POOL_TXG, DATA_TYPE_UINT64, NULL, &pool_txg, NULL) != 0 || - nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, + nvlist_find(nvl, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, NULL, &pool_guid, NULL) != 0 || - nvlist_find(nvlist, ZPOOL_CONFIG_POOL_NAME, DATA_TYPE_STRING, + nvlist_find(nvl, ZPOOL_CONFIG_POOL_NAME, DATA_TYPE_STRING, NULL, &pool_name, &namelen) != 0) { /* * Cache and spare devices end up here - just ignore * them. */ - free(nvlist); + nvlist_destroy(nvl); return (EIO); } @@ -1994,11 +1766,11 @@ if (spa == NULL) { char *name; - nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_CHILDREN, + nvlist_find(nvl, ZPOOL_CONFIG_VDEV_CHILDREN, DATA_TYPE_UINT64, NULL, &vdev_children, NULL); name = malloc(namelen + 1); if (name == NULL) { - free(nvlist); + nvlist_destroy(nvl); return (ENOMEM); } bcopy(pool_name, name, namelen); @@ -2006,7 +1778,7 @@ spa = spa_create(pool_guid, name); free(name); if (spa == NULL) { - free(nvlist); + nvlist_destroy(nvl); return (ENOMEM); } spa->spa_root_vdev->v_nchildren = vdev_children; @@ -2020,20 +1792,20 @@ * be some kind of alias (overlapping slices, dangerously dedicated * disks etc). */ - if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, + if (nvlist_find(nvl, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, NULL, &guid, NULL) != 0) { - free(nvlist); + nvlist_destroy(nvl); return (EIO); } vdev = vdev_find(guid); /* Has this vdev already been inited? */ if (vdev && vdev->v_phys_read) { - free(nvlist); + nvlist_destroy(nvl); return (EIO); } - rc = vdev_init_from_label(spa, nvlist); - free(nvlist); + rc = vdev_init_from_label(spa, nvl); + nvlist_destroy(nvl); if (rc != 0) return (rc); @@ -2211,6 +1983,8 @@ BP_GET_PSIZE(bp), buf, BP_GET_LSIZE(bp)); else if (size != BP_GET_PSIZE(bp)) bcopy(pbuf, buf, BP_GET_PSIZE(bp)); + } else { + printf("zio_read error: %d\n", error); } if (buf != pbuf) free(pbuf); @@ -3307,7 +3081,7 @@ } static int -load_nvlist(spa_t *spa, uint64_t obj, unsigned char **value) +load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) { dnode_phys_t dir; size_t size; @@ -3336,7 +3110,8 @@ nv = NULL; return (rc); } - *value = nv; + *value = nvlist_import(nv + 4, nv[0], nv[1]); + free(nv); return (rc); } @@ -3345,7 +3120,7 @@ { dnode_phys_t dir; uint64_t config_object; - unsigned char *nvlist; + nvlist_t *nvlist; int rc; if (zio_read(spa, &spa->spa_uberblock.ub_rootbp, &spa->spa_mos)) { @@ -3383,13 +3158,12 @@ rc = load_nvlist(spa, config_object, &nvlist); if (rc != 0) return (rc); - /* * Update vdevs from MOS config. Note, we do skip encoding bytes * here. See also vdev_label_read_config(). */ - rc = vdev_init_from_nvlist(spa, nvlist + 4); - free(nvlist); + rc = vdev_init_from_nvlist(spa, nvlist); + nvlist_destroy(nvlist); return (rc); } Index: head/stand/loader.mk =================================================================== --- head/stand/loader.mk +++ head/stand/loader.mk @@ -136,6 +136,7 @@ CFLAGS+= -DLOADER_ZFS_SUPPORT CFLAGS+= -I${ZFSSRC} CFLAGS+= -I${SYSDIR}/cddl/boot/zfs +CFLAGS+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common SRCS+= zfs_cmd.c .endif Index: head/stand/userboot/userboot/Makefile =================================================================== --- head/stand/userboot/userboot/Makefile +++ head/stand/userboot/userboot/Makefile @@ -34,6 +34,7 @@ CFLAGS+= -Wall CFLAGS+= -I${BOOTSRC}/userboot +CFLAGS+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common CWARNFLAGS.main.c += -Wno-implicit-function-declaration LDFLAGS+= -nostdlib -Wl,-Bsymbolic Index: head/sys/cddl/boot/zfs/zfsimpl.h =================================================================== --- head/sys/cddl/boot/zfs/zfsimpl.h +++ head/sys/cddl/boot/zfs/zfsimpl.h @@ -56,9 +56,16 @@ * Copyright 2013 by Saso Kiselkov. All rights reserved. */ /* - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2020 by Delphix. All rights reserved. */ +#include +#include +#include + +#ifndef _ZFSIMPL_H_ +#define _ZFSIMPL_H_ + #define MAXNAMELEN 256 #define _NOTE(s) @@ -493,7 +500,7 @@ #define VDEV_RAIDZ_MAXPARITY 3 #define VDEV_PAD_SIZE (8 << 10) -/* 2 padding areas (vl_pad1 and vl_pad2) to skip */ +/* 2 padding areas (vl_pad1 and vl_be) to skip */ #define VDEV_SKIP_SIZE VDEV_PAD_SIZE * 2 #define VDEV_PHYS_SIZE (112 << 10) #define VDEV_UBERBLOCK_RING (128 << 10) @@ -519,9 +526,29 @@ zio_eck_t vp_zbt; } vdev_phys_t; +typedef enum vbe_vers { + /* The bootenv file is stored as ascii text in the envblock */ + VB_RAW = 0, + + /* + * The bootenv file is converted to an nvlist and then packed into the + * envblock. + */ + VB_NVLIST = 1 +} vbe_vers_t; + +typedef struct vdev_boot_envblock { + uint64_t vbe_version; + char vbe_bootenv[VDEV_PAD_SIZE - sizeof (uint64_t) - + sizeof (zio_eck_t)]; + zio_eck_t vbe_zbt; +} vdev_boot_envblock_t; + +CTASSERT(sizeof (vdev_boot_envblock_t) == VDEV_PAD_SIZE); + typedef struct vdev_label { char vl_pad1[VDEV_PAD_SIZE]; /* 8K */ - char vl_pad2[VDEV_PAD_SIZE]; /* 8K */ + vdev_boot_envblock_t vl_be; /* 8K */ vdev_phys_t vl_vdev_phys; /* 112K */ char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */ } vdev_label_t; /* 256K total */ @@ -1811,3 +1838,5 @@ } zio_t; static void decode_embedded_bp_compressed(const blkptr_t *, void *); + +#endif /* _ZFSIMPL_H_ */