Index: head/sys/boot/geli/geliboot.c =================================================================== --- head/sys/boot/geli/geliboot.c (revision 297628) +++ head/sys/boot/geli/geliboot.c (revision 297629) @@ -1,292 +1,310 @@ /*- * Copyright (c) 2015 Allan Jude * Copyright (c) 2005-2011 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "geliboot.h" SLIST_HEAD(geli_list, geli_entry) geli_head = SLIST_HEAD_INITIALIZER(geli_head); struct geli_list *geli_headp; static int geli_same_device(struct geli_entry *ge, struct dsk *dskp) { if (geli_e->dsk->drive == dskp->drive && dskp->part == 255 && geli_e->dsk->part == dskp->slice) { /* * Sometimes slice = slice, and sometimes part = slice * If the incoming struct dsk has part=255, it means look at * the slice instead of the part number */ return (0); } /* Is this the same device? */ if (geli_e->dsk->drive != dskp->drive || geli_e->dsk->slice != dskp->slice || geli_e->dsk->part != dskp->part) { return (1); } return (0); } void geli_init(void) { geli_count = 0; SLIST_INIT(&geli_head); } /* * Read the last sector of the drive or partition pointed to by dsk and see * if it is GELI encrypted */ int geli_taste(int read_func(void *vdev, void *priv, off_t off, void *buf, size_t bytes), struct dsk *dskp, daddr_t lastsector) { struct g_eli_metadata md; - u_char buf[DEV_BSIZE]; + u_char buf[DEV_GELIBOOT_BSIZE]; int error; + off_t alignsector; - error = read_func(NULL, dskp, (off_t) lastsector * DEV_BSIZE, &buf, - (size_t) DEV_BSIZE); + alignsector = (lastsector * DEV_BSIZE) & + ~(off_t)(DEV_GELIBOOT_BSIZE - 1); + error = read_func(NULL, dskp, alignsector, &buf, DEV_GELIBOOT_BSIZE); if (error != 0) { return (error); } - error = eli_metadata_decode(buf, &md); + /* Extract the last DEV_BSIZE bytes from the block. */ + error = eli_metadata_decode(buf + (DEV_GELIBOOT_BSIZE - DEV_BSIZE), + &md); if (error != 0) { return (error); } if ((md.md_flags & G_ELI_FLAG_ONETIME)) { - /* Swap device, skip it */ + /* Swap device, skip it. */ return (1); } if (!(md.md_flags & G_ELI_FLAG_BOOT)) { - /* Disk is not GELI boot device, skip it */ + /* Disk is not GELI boot device, skip it. */ return (1); } if (md.md_iterations < 0) { - /* XXX TODO: Support loading key files */ - /* Disk does not have a passphrase, skip it */ + /* XXX TODO: Support loading key files. */ + /* Disk does not have a passphrase, skip it. */ return (1); } geli_e = malloc(sizeof(struct geli_entry)); if (geli_e == NULL) return (2); geli_e->dsk = malloc(sizeof(struct dsk)); if (geli_e->dsk == NULL) return (2); memcpy(geli_e->dsk, dskp, sizeof(struct dsk)); geli_e->part_end = lastsector; if (dskp->part == 255) { geli_e->dsk->part = dskp->slice; } geli_e->md = md; eli_metadata_softc(&geli_e->sc, &md, DEV_BSIZE, (lastsector + DEV_BSIZE) * DEV_BSIZE); SLIST_INSERT_HEAD(&geli_head, geli_e, entries); geli_count++; return (0); } /* * Attempt to decrypt the device */ int geli_attach(struct dsk *dskp, const char *passphrase) { u_char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN], *mkp; u_int keynum; struct hmac_ctx ctx; int error; SLIST_FOREACH_SAFE(geli_e, &geli_head, entries, geli_e_tmp) { if (geli_same_device(geli_e, dskp) != 0) { continue; } g_eli_crypto_hmac_init(&ctx, NULL, 0); /* * Prepare Derived-Key from the user passphrase. */ if (geli_e->md.md_iterations < 0) { - /* XXX TODO: Support loading key files */ + /* XXX TODO: Support loading key files. */ return (1); } else if (geli_e->md.md_iterations == 0) { g_eli_crypto_hmac_update(&ctx, geli_e->md.md_salt, sizeof(geli_e->md.md_salt)); g_eli_crypto_hmac_update(&ctx, passphrase, strlen(passphrase)); } else if (geli_e->md.md_iterations > 0) { - printf("Calculating GELI Decryption Key disk%dp%d @ %lu " - "iterations...\n", dskp->unit, + printf("Calculating GELI Decryption Key disk%dp%d @ %lu" + " iterations...\n", dskp->unit, (dskp->slice > 0 ? dskp->slice : dskp->part), geli_e->md.md_iterations); u_char dkey[G_ELI_USERKEYLEN]; pkcs5v2_genkey(dkey, sizeof(dkey), geli_e->md.md_salt, sizeof(geli_e->md.md_salt), passphrase, geli_e->md.md_iterations); g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey)); bzero(&dkey, sizeof(dkey)); } g_eli_crypto_hmac_final(&ctx, key, 0); error = g_eli_mkey_decrypt(&geli_e->md, key, mkey, &keynum); bzero(&key, sizeof(key)); if (error == -1) { bzero(&mkey, sizeof(mkey)); printf("Bad GELI key: %d\n", error); return (error); } else if (error != 0) { bzero(&mkey, sizeof(mkey)); printf("Failed to decrypt GELI master key: %d\n", error); return (error); } /* Store the keys */ bcopy(mkey, geli_e->sc.sc_mkey, sizeof(geli_e->sc.sc_mkey)); bcopy(mkey, geli_e->sc.sc_ivkey, sizeof(geli_e->sc.sc_ivkey)); mkp = mkey + sizeof(geli_e->sc.sc_ivkey); if ((geli_e->sc.sc_flags & G_ELI_FLAG_AUTH) == 0) { bcopy(mkp, geli_e->sc.sc_ekey, G_ELI_DATAKEYLEN); } else { /* * The encryption key is: ekey = HMAC_SHA512(Data-Key, 0x10) */ g_eli_crypto_hmac(mkp, G_ELI_MAXKEYLEN, "\x10", 1, geli_e->sc.sc_ekey, 0); } bzero(&mkey, sizeof(mkey)); - /* Initialize the per-sector IV */ + /* Initialize the per-sector IV. */ switch (geli_e->sc.sc_ealgo) { case CRYPTO_AES_XTS: break; default: SHA256_Init(&geli_e->sc.sc_ivctx); SHA256_Update(&geli_e->sc.sc_ivctx, geli_e->sc.sc_ivkey, sizeof(geli_e->sc.sc_ivkey)); break; } return (0); } - /* Disk not found */ + /* Disk not found. */ return (2); } int is_geli(struct dsk *dskp) { SLIST_FOREACH_SAFE(geli_e, &geli_head, entries, geli_e_tmp) { if (geli_same_device(geli_e, dskp) == 0) { return (0); } } return (1); } int geli_read(struct dsk *dskp, off_t offset, u_char *buf, size_t bytes) { u_char iv[G_ELI_IVKEYLEN]; u_char *pbuf; int error; - off_t os; + off_t dstoff; uint64_t keyno; - size_t n, nb; + size_t n, nsec, secsize; struct g_eli_key gkey; + pbuf = buf; SLIST_FOREACH_SAFE(geli_e, &geli_head, entries, geli_e_tmp) { if (geli_same_device(geli_e, dskp) != 0) { continue; } - nb = bytes / DEV_BSIZE; - for (n = 0; n < nb; n++) { - os = offset + (n * DEV_BSIZE); - pbuf = buf + (n * DEV_BSIZE); + secsize = geli_e->sc.sc_sectorsize; + nsec = bytes / secsize; + if (nsec == 0) { + /* + * A read of less than the GELI sector size has been + * requested. The caller provided destination buffer may + * not be big enough to boost the read to a full sector, + * so just attempt to decrypt the truncated sector. + */ + secsize = bytes; + nsec = 1; + } - g_eli_crypto_ivgen(&geli_e->sc, os, iv, G_ELI_IVKEYLEN); + for (n = 0, dstoff = offset; n < nsec; n++, dstoff += secsize) { - /* Get the key that corresponds to this offset */ - keyno = (os >> G_ELI_KEY_SHIFT) / DEV_BSIZE; + g_eli_crypto_ivgen(&geli_e->sc, dstoff, iv, + G_ELI_IVKEYLEN); + + /* Get the key that corresponds to this offset. */ + keyno = (dstoff >> G_ELI_KEY_SHIFT) / secsize; g_eli_key_fill(&geli_e->sc, &gkey, keyno); error = geliboot_crypt(geli_e->sc.sc_ealgo, 0, pbuf, - DEV_BSIZE, gkey.gek_key, geli_e->sc.sc_ekeylen, iv); + secsize, gkey.gek_key, + geli_e->sc.sc_ekeylen, iv); if (error != 0) { bzero(&gkey, sizeof(gkey)); printf("Failed to decrypt in geli_read()!"); return (error); } + pbuf += secsize; } bzero(&gkey, sizeof(gkey)); return (0); } printf("GELI provider not found\n"); return (1); } int geli_passphrase(char *pw, int disk, int parttype, int part, struct dsk *dskp) { int i; /* TODO: Implement GELI keyfile(s) support */ for (i = 0; i < 3; i++) { /* Try cached passphrase */ if (i == 0 && pw[0] != '\0') { if (geli_attach(dskp, pw) == 0) { return (0); } } printf("GELI Passphrase for disk%d%c%d: ", disk, parttype, part); pwgets(pw, GELI_PW_MAXLEN); printf("\n"); if (geli_attach(dskp, pw) == 0) { return (0); } } return (1); } Index: head/sys/boot/geli/geliboot.h =================================================================== --- head/sys/boot/geli/geliboot.h (revision 297628) +++ head/sys/boot/geli/geliboot.h (revision 297629) @@ -1,86 +1,89 @@ /*- * Copyright (c) 2015 Allan Jude * Copyright (c) 2005-2011 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #ifndef _GELIBOOT_H_ #define _GELIBOOT_H_ #define _STRING_H_ #define _STRINGS_H_ #define _STDIO_H_ #include #include /* Pull in the md5, sha256, and sha512 implementations */ #include #include #include /* Pull in AES implementation */ #include /* AES-XTS implementation */ #define _STAND #define STAND_H /* We don't want stand.h in {gpt,zfs,gptzfs}boot */ #include #ifndef DEV_BSIZE #define DEV_BSIZE 512 #endif +#ifndef DEV_GELIBOOT_BSIZE +#define DEV_GELIBOOT_BSIZE 4096 +#endif #ifndef MIN #define MIN(a,b) (((a) < (b)) ? (a) : (b)) #endif #define GELI_PW_MAXLEN 256 extern void pwgets(char *buf, int n); struct geli_entry { struct dsk *dsk; off_t part_end; struct g_eli_softc sc; struct g_eli_metadata md; SLIST_ENTRY(geli_entry) entries; } *geli_e, *geli_e_tmp; int geli_count; void geli_init(void); int geli_taste(int read_func(void *vdev, void *priv, off_t off, void *buf, size_t bytes), struct dsk *dsk, daddr_t lastsector); int geli_attach(struct dsk *dskp, const char *passphrase); int is_geli(struct dsk *dsk); int geli_read(struct dsk *dsk, off_t offset, u_char *buf, size_t bytes); int geli_decrypt(u_int algo, u_char *data, size_t datasize, const u_char *key, size_t keysize, const uint8_t* iv); int geli_passphrase(char *pw, int disk, int parttype, int part, struct dsk *dskp); #endif /* _GELIBOOT_H_ */ Index: head/sys/boot/i386/libi386/biosdisk.c =================================================================== --- head/sys/boot/i386/libi386/biosdisk.c (revision 297628) +++ head/sys/boot/i386/libi386/biosdisk.c (revision 297629) @@ -1,856 +1,880 @@ /*- * Copyright (c) 1998 Michael Smith * Copyright (c) 2012 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * BIOS disk device handling. * * Ideas and algorithms from: * * - NetBSD libi386/biosdisk.c * - FreeBSD biosboot/disk.c * */ #include #include #include #include #include #include #include #include "disk.h" #include "libi386.h" #ifdef LOADER_GELI_SUPPORT #include "cons.h" #include "drv.h" #include "gpt.h" #include "part.h" #include struct pentry { struct ptable_entry part; uint64_t flags; union { uint8_t bsd; uint8_t mbr; uuid_t gpt; uint16_t vtoc8; } type; STAILQ_ENTRY(pentry) entry; }; struct ptable { enum ptable_type type; uint16_t sectorsize; uint64_t sectors; STAILQ_HEAD(, pentry) entries; }; #include "geliboot.c" #endif /* LOADER_GELI_SUPPORT */ CTASSERT(sizeof(struct i386_devdesc) >= sizeof(struct disk_devdesc)); #define BIOS_NUMDRIVES 0x475 #define BIOSDISK_SECSIZE 512 #define BUFSIZE (1 * BIOSDISK_SECSIZE) #define DT_ATAPI 0x10 /* disk type for ATAPI floppies */ #define WDMAJOR 0 /* major numbers for devices we frontend for */ #define WFDMAJOR 1 #define FDMAJOR 2 #define DAMAJOR 4 #ifdef DISK_DEBUG # define DEBUG(fmt, args...) printf("%s: " fmt "\n" , __func__ , ## args) #else # define DEBUG(fmt, args...) #endif /* * List of BIOS devices, translation from disk unit number to * BIOS unit number. */ static struct bdinfo { int bd_unit; /* BIOS unit number */ int bd_cyl; /* BIOS geometry */ int bd_hds; int bd_sec; int bd_flags; #define BD_MODEINT13 0x0000 #define BD_MODEEDD1 0x0001 #define BD_MODEEDD3 0x0002 #define BD_MODEMASK 0x0003 #define BD_FLOPPY 0x0004 int bd_type; /* BIOS 'drive type' (floppy only) */ uint16_t bd_sectorsize; /* Sector size */ uint64_t bd_sectors; /* Disk size */ } bdinfo [MAXBDDEV]; static int nbdinfo = 0; #define BD(dev) (bdinfo[(dev)->d_unit]) static int bd_read(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest); static int bd_write(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest); static int bd_int13probe(struct bdinfo *bd); static int bd_init(void); static int bd_strategy(void *devdata, int flag, daddr_t dblk, size_t size, char *buf, size_t *rsize); static int bd_realstrategy(void *devdata, int flag, daddr_t dblk, size_t size, char *buf, size_t *rsize); static int bd_open(struct open_file *f, ...); static int bd_close(struct open_file *f); static int bd_ioctl(struct open_file *f, u_long cmd, void *data); static void bd_print(int verbose); static void bd_cleanup(void); #ifdef LOADER_GELI_SUPPORT static enum isgeli { ISGELI_UNKNOWN, ISGELI_NO, ISGELI_YES }; static enum isgeli geli_status[MAXBDDEV][MAXTBLENTS]; int bios_read(void *vdev __unused, struct dsk *priv, off_t off, char *buf, size_t bytes); #endif /* LOADER_GELI_SUPPORT */ struct devsw biosdisk = { "disk", DEVT_DISK, bd_init, bd_strategy, bd_open, bd_close, bd_ioctl, bd_print, bd_cleanup }; /* * Translate between BIOS device numbers and our private unit numbers. */ int bd_bios2unit(int biosdev) { int i; DEBUG("looking for bios device 0x%x", biosdev); for (i = 0; i < nbdinfo; i++) { DEBUG("bd unit %d is BIOS device 0x%x", i, bdinfo[i].bd_unit); if (bdinfo[i].bd_unit == biosdev) return (i); } return (-1); } int bd_unit2bios(int unit) { if ((unit >= 0) && (unit < nbdinfo)) return (bdinfo[unit].bd_unit); return (-1); } /* * Quiz the BIOS for disk devices, save a little info about them. */ static int bd_init(void) { int base, unit, nfd = 0; #ifdef LOADER_GELI_SUPPORT geli_init(); #endif /* sequence 0, 0x80 */ for (base = 0; base <= 0x80; base += 0x80) { for (unit = base; (nbdinfo < MAXBDDEV); unit++) { #ifndef VIRTUALBOX /* * Check the BIOS equipment list for number * of fixed disks. */ if(base == 0x80 && (nfd >= *(unsigned char *)PTOV(BIOS_NUMDRIVES))) break; #endif bdinfo[nbdinfo].bd_unit = unit; bdinfo[nbdinfo].bd_flags = unit < 0x80 ? BD_FLOPPY: 0; if (!bd_int13probe(&bdinfo[nbdinfo])) break; /* XXX we need "disk aliases" to make this simpler */ printf("BIOS drive %c: is disk%d\n", (unit < 0x80) ? ('A' + unit): ('C' + unit - 0x80), nbdinfo); nbdinfo++; if (base == 0x80) nfd++; } } return(0); } static void bd_cleanup(void) { disk_cleanup(&biosdisk); } /* * Try to detect a device supported by the legacy int13 BIOS */ static int bd_int13probe(struct bdinfo *bd) { struct edd_params params; v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x800; v86.edx = bd->bd_unit; v86int(); if (V86_CY(v86.efl) || /* carry set */ (v86.ecx & 0x3f) == 0 || /* absurd sector number */ (v86.edx & 0xff) <= (unsigned)(bd->bd_unit & 0x7f)) /* unit # bad */ return (0); /* skip device */ /* Convert max cyl # -> # of cylinders */ bd->bd_cyl = ((v86.ecx & 0xc0) << 2) + ((v86.ecx & 0xff00) >> 8) + 1; /* Convert max head # -> # of heads */ bd->bd_hds = ((v86.edx & 0xff00) >> 8) + 1; bd->bd_sec = v86.ecx & 0x3f; bd->bd_type = v86.ebx & 0xff; bd->bd_flags |= BD_MODEINT13; /* Calculate sectors count from the geometry */ bd->bd_sectors = bd->bd_cyl * bd->bd_hds * bd->bd_sec; bd->bd_sectorsize = BIOSDISK_SECSIZE; DEBUG("unit 0x%x geometry %d/%d/%d", bd->bd_unit, bd->bd_cyl, bd->bd_hds, bd->bd_sec); /* Determine if we can use EDD with this device. */ v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x4100; v86.edx = bd->bd_unit; v86.ebx = 0x55aa; v86int(); if (V86_CY(v86.efl) || /* carry set */ (v86.ebx & 0xffff) != 0xaa55 || /* signature */ (v86.ecx & EDD_INTERFACE_FIXED_DISK) == 0) return (1); /* EDD supported */ bd->bd_flags |= BD_MODEEDD1; if ((v86.eax & 0xff00) >= 0x3000) bd->bd_flags |= BD_MODEEDD3; /* Get disk params */ params.len = sizeof(struct edd_params); v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x4800; v86.edx = bd->bd_unit; v86.ds = VTOPSEG(¶ms); v86.esi = VTOPOFF(¶ms); v86int(); if (!V86_CY(v86.efl)) { bd->bd_sectors = params.sectors; bd->bd_sectorsize = params.sector_size; } DEBUG("unit 0x%x flags %x, sectors %llu, sectorsize %u", bd->bd_unit, bd->bd_flags, bd->bd_sectors, bd->bd_sectorsize); return (1); } /* * Print information about disks */ static void bd_print(int verbose) { static char line[80]; struct disk_devdesc dev; int i; for (i = 0; i < nbdinfo; i++) { sprintf(line, " disk%d: BIOS drive %c:\n", i, (bdinfo[i].bd_unit < 0x80) ? ('A' + bdinfo[i].bd_unit): ('C' + bdinfo[i].bd_unit - 0x80)); pager_output(line); dev.d_dev = &biosdisk; dev.d_unit = i; dev.d_slice = -1; dev.d_partition = -1; if (disk_open(&dev, bdinfo[i].bd_sectorsize * bdinfo[i].bd_sectors, bdinfo[i].bd_sectorsize, (bdinfo[i].bd_flags & BD_FLOPPY) ? DISK_F_NOCACHE: 0) == 0) { sprintf(line, " disk%d", i); disk_print(&dev, line, verbose); disk_close(&dev); } } } /* * Attempt to open the disk described by (dev) for use by (f). * * Note that the philosophy here is "give them exactly what * they ask for". This is necessary because being too "smart" * about what the user might want leads to complications. * (eg. given no slice or partition value, with a disk that is * sliced - are they after the first BSD slice, or the DOS * slice before it?) */ static int bd_open(struct open_file *f, ...) { struct disk_devdesc *dev, rdev; int err, g_err; va_list ap; va_start(ap, f); dev = va_arg(ap, struct disk_devdesc *); va_end(ap); if (dev->d_unit < 0 || dev->d_unit >= nbdinfo) return (EIO); err = disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize, BD(dev).bd_sectorsize, (BD(dev).bd_flags & BD_FLOPPY) ? DISK_F_NOCACHE: 0); #ifdef LOADER_GELI_SUPPORT static char gelipw[GELI_PW_MAXLEN]; char *passphrase; if (err) return (err); /* if we already know there is no GELI, skip the rest */ if (geli_status[dev->d_unit][dev->d_slice] != ISGELI_UNKNOWN) return (err); struct dsk dskp; struct ptable *table = NULL; struct ptable_entry part; struct pentry *entry; int geli_part = 0; dskp.drive = bd_unit2bios(dev->d_unit); dskp.type = dev->d_type; dskp.unit = dev->d_unit; dskp.slice = dev->d_slice; dskp.part = dev->d_partition; dskp.start = dev->d_offset; memcpy(&rdev, dev, sizeof(rdev)); /* to read the GPT table, we need to read the first sector */ rdev.d_offset = 0; /* We need the LBA of the end of the partition */ table = ptable_open(&rdev, BD(dev).bd_sectors, BD(dev).bd_sectorsize, ptblread); if (table == NULL) { DEBUG("Can't read partition table"); /* soft failure, return the exit status of disk_open */ return (err); } if (table->type == PTABLE_GPT) dskp.part = 255; STAILQ_FOREACH(entry, &table->entries, entry) { dskp.slice = entry->part.index; dskp.start = entry->part.start; if (is_geli(&dskp) == 0) { geli_status[dev->d_unit][dskp.slice] = ISGELI_YES; return (0); } if (geli_taste(bios_read, &dskp, entry->part.end - entry->part.start) == 0) { if ((passphrase = getenv("kern.geom.eli.passphrase")) != NULL) { /* Use the cached passphrase */ bcopy(passphrase, &gelipw, GELI_PW_MAXLEN); } if (geli_passphrase(&gelipw, dskp.unit, 'p', (dskp.slice > 0 ? dskp.slice : dskp.part), &dskp) == 0) { setenv("kern.geom.eli.passphrase", &gelipw, 1); bzero(gelipw, sizeof(gelipw)); geli_status[dev->d_unit][dskp.slice] = ISGELI_YES; geli_part++; } } else geli_status[dev->d_unit][dskp.slice] = ISGELI_NO; } /* none of the partitions on this disk have GELI */ if (geli_part == 0) { /* found no GELI */ geli_status[dev->d_unit][dev->d_slice] = ISGELI_NO; } #endif /* LOADER_GELI_SUPPORT */ return (err); } static int bd_close(struct open_file *f) { struct disk_devdesc *dev; dev = (struct disk_devdesc *)f->f_devdata; return (disk_close(dev)); } static int bd_ioctl(struct open_file *f, u_long cmd, void *data) { struct disk_devdesc *dev; dev = (struct disk_devdesc *)f->f_devdata; switch (cmd) { case DIOCGSECTORSIZE: *(u_int *)data = BD(dev).bd_sectorsize; break; case DIOCGMEDIASIZE: *(off_t *)data = BD(dev).bd_sectors * BD(dev).bd_sectorsize; break; default: return (ENOTTY); } return (0); } static int bd_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize) { struct bcache_devdata bcd; struct disk_devdesc *dev; dev = (struct disk_devdesc *)devdata; bcd.dv_strategy = bd_realstrategy; bcd.dv_devdata = devdata; return (bcache_strategy(&bcd, BD(dev).bd_unit, rw, dblk + dev->d_offset, size, buf, rsize)); } static int bd_realstrategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize) { struct disk_devdesc *dev = (struct disk_devdesc *)devdata; int blks; #ifdef BD_SUPPORT_FRAGS /* XXX: sector size */ char fragbuf[BIOSDISK_SECSIZE]; size_t fragsize; fragsize = size % BIOSDISK_SECSIZE; #else if (size % BD(dev).bd_sectorsize) panic("bd_strategy: %d bytes I/O not multiple of block size", size); #endif DEBUG("open_disk %p", dev); blks = size / BD(dev).bd_sectorsize; if (rsize) *rsize = 0; switch(rw){ case F_READ: DEBUG("read %d from %lld to %p", blks, dblk, buf); if (blks && bd_read(dev, dblk, blks, buf)) { DEBUG("read error"); return (EIO); } #ifdef BD_SUPPORT_FRAGS /* XXX: sector size */ DEBUG("bd_strategy: frag read %d from %d+%d to %p", fragsize, dblk, blks, buf + (blks * BIOSDISK_SECSIZE)); if (fragsize && bd_read(od, dblk + blks, 1, fragsize)) { DEBUG("frag read error"); return(EIO); } bcopy(fragbuf, buf + (blks * BIOSDISK_SECSIZE), fragsize); #endif break; case F_WRITE : DEBUG("write %d from %d to %p", blks, dblk, buf); if (blks && bd_write(dev, dblk, blks, buf)) { DEBUG("write error"); return (EIO); } #ifdef BD_SUPPORT_FRAGS if(fragsize) { DEBUG("Attempted to write a frag"); return (EIO); } #endif break; default: /* DO NOTHING */ return (EROFS); } if (rsize) *rsize = size; return (0); } /* Max number of sectors to bounce-buffer if the request crosses a 64k boundary */ #define FLOPPY_BOUNCEBUF 18 static int bd_edd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest, int write) { static struct edd_packet packet; packet.len = sizeof(struct edd_packet); packet.count = blks; packet.off = VTOPOFF(dest); packet.seg = VTOPSEG(dest); packet.lba = dblk; v86.ctl = V86_FLAGS; v86.addr = 0x13; if (write) /* Should we Write with verify ?? 0x4302 ? */ v86.eax = 0x4300; else v86.eax = 0x4200; v86.edx = BD(dev).bd_unit; v86.ds = VTOPSEG(&packet); v86.esi = VTOPOFF(&packet); v86int(); return (V86_CY(v86.efl)); } static int bd_chs_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest, int write) { u_int x, bpc, cyl, hd, sec; bpc = BD(dev).bd_sec * BD(dev).bd_hds; /* blocks per cylinder */ x = dblk; cyl = x / bpc; /* block # / blocks per cylinder */ x %= bpc; /* block offset into cylinder */ hd = x / BD(dev).bd_sec; /* offset / blocks per track */ sec = x % BD(dev).bd_sec; /* offset into track */ /* correct sector number for 1-based BIOS numbering */ sec++; if (cyl > 1023) /* CHS doesn't support cylinders > 1023. */ return (1); v86.ctl = V86_FLAGS; v86.addr = 0x13; if (write) v86.eax = 0x300 | blks; else v86.eax = 0x200 | blks; v86.ecx = ((cyl & 0xff) << 8) | ((cyl & 0x300) >> 2) | sec; v86.edx = (hd << 8) | BD(dev).bd_unit; v86.es = VTOPSEG(dest); v86.ebx = VTOPOFF(dest); v86int(); return (V86_CY(v86.efl)); } static int bd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest, int write) { u_int x, sec, result, resid, retry, maxfer; caddr_t p, xp, bbuf, breg; /* Just in case some idiot actually tries to read/write -1 blocks... */ if (blks < 0) return (-1); resid = blks; p = dest; /* Decide whether we have to bounce */ if (VTOP(dest) >> 20 != 0 || (BD(dev).bd_unit < 0x80 && (VTOP(dest) >> 16) != (VTOP(dest + blks * BD(dev).bd_sectorsize) >> 16))) { /* * There is a 64k physical boundary somewhere in the * destination buffer, or the destination buffer is above * first 1MB of physical memory so we have to arrange a * suitable bounce buffer. Allocate a buffer twice as large * as we need to. Use the bottom half unless there is a break * there, in which case we use the top half. */ x = min(FLOPPY_BOUNCEBUF, (unsigned)blks); bbuf = alloca(x * 2 * BD(dev).bd_sectorsize); if (((u_int32_t)VTOP(bbuf) & 0xffff0000) == ((u_int32_t)VTOP(bbuf + x * BD(dev).bd_sectorsize) & 0xffff0000)) { breg = bbuf; } else { breg = bbuf + x * BD(dev).bd_sectorsize; } maxfer = x; /* limit transfers to bounce region size */ } else { breg = bbuf = NULL; maxfer = 0; } while (resid > 0) { /* * Play it safe and don't cross track boundaries. * (XXX this is probably unnecessary) */ sec = dblk % BD(dev).bd_sec; /* offset into track */ x = min(BD(dev).bd_sec - sec, resid); if (maxfer > 0) x = min(x, maxfer); /* fit bounce buffer */ /* where do we transfer to? */ xp = bbuf == NULL ? p : breg; /* * Put your Data In, Put your Data out, * Put your Data In, and shake it all about */ if (write && bbuf != NULL) bcopy(p, breg, x * BD(dev).bd_sectorsize); /* * Loop retrying the operation a couple of times. The BIOS * may also retry. */ for (retry = 0; retry < 3; retry++) { /* if retrying, reset the drive */ if (retry > 0) { v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0; v86.edx = BD(dev).bd_unit; v86int(); } if (BD(dev).bd_flags & BD_MODEEDD1) result = bd_edd_io(dev, dblk, x, xp, write); else result = bd_chs_io(dev, dblk, x, xp, write); if (result == 0) break; } if (write) DEBUG("Write %d sector(s) from %p (0x%x) to %lld %s", x, p, VTOP(p), dblk, result ? "failed" : "ok"); else DEBUG("Read %d sector(s) from %lld to %p (0x%x) %s", x, dblk, p, VTOP(p), result ? "failed" : "ok"); if (result) { return(-1); } if (!write && bbuf != NULL) bcopy(breg, p, x * BD(dev).bd_sectorsize); p += (x * BD(dev).bd_sectorsize); dblk += x; resid -= x; } /* hexdump(dest, (blks * BD(dev).bd_sectorsize)); */ return(0); } static int bd_read(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest) { #ifdef LOADER_GELI_SUPPORT struct dsk dskp; - off_t p_off; - int err, n; + off_t p_off, diff; + daddr_t alignlba; + int err, n, alignblks; + char *tmpbuf; /* if we already know there is no GELI, skip the rest */ if (geli_status[dev->d_unit][dev->d_slice] != ISGELI_YES) return (bd_io(dev, dblk, blks, dest, 0)); if (geli_status[dev->d_unit][dev->d_slice] == ISGELI_YES) { - err = bd_io(dev, dblk, blks, dest, 0); + /* + * Align reads to DEV_GELIBOOT_BSIZE bytes because partial + * sectors cannot be decrypted. Round the requested LBA down to + * nearest multiple of DEV_GELIBOOT_BSIZE bytes. + */ + alignlba = dblk & + ~(daddr_t)((DEV_GELIBOOT_BSIZE / BIOSDISK_SECSIZE) - 1); + /* + * Round number of blocks to read up to nearest multiple of + * DEV_GELIBOOT_BSIZE + */ + alignblks = blks + (dblk - alignlba) + + ((DEV_GELIBOOT_BSIZE / BIOSDISK_SECSIZE) - 1) & + ~(int)((DEV_GELIBOOT_BSIZE / BIOSDISK_SECSIZE) - 1); + diff = (dblk - alignlba) * BIOSDISK_SECSIZE; + /* + * Use a temporary buffer here because the buffer provided by + * the caller may be too small. + */ + tmpbuf = alloca(alignblks * BIOSDISK_SECSIZE); + + err = bd_io(dev, alignlba, alignblks, tmpbuf, 0); if (err) return (err); dskp.drive = bd_unit2bios(dev->d_unit); dskp.type = dev->d_type; dskp.unit = dev->d_unit; dskp.slice = dev->d_slice; dskp.part = dev->d_partition; dskp.start = dev->d_offset; /* GELI needs the offset relative to the partition start */ - p_off = dblk - dskp.start; + p_off = alignlba - dskp.start; - err = geli_read(&dskp, p_off * BIOSDISK_SECSIZE, dest, - blks * BIOSDISK_SECSIZE); + err = geli_read(&dskp, p_off * BIOSDISK_SECSIZE, tmpbuf, + alignblks * BIOSDISK_SECSIZE); if (err) return (err); + bcopy(tmpbuf + diff, dest, blks * BIOSDISK_SECSIZE); return (0); } #endif /* LOADER_GELI_SUPPORT */ return (bd_io(dev, dblk, blks, dest, 0)); } static int bd_write(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest) { return (bd_io(dev, dblk, blks, dest, 1)); } /* * Return the BIOS geometry of a given "fixed drive" in a format * suitable for the legacy bootinfo structure. Since the kernel is * expecting raw int 0x13/0x8 values for N_BIOS_GEOM drives, we * prefer to get the information directly, rather than rely on being * able to put it together from information already maintained for * different purposes and for a probably different number of drives. * * For valid drives, the geometry is expected in the format (31..0) * "000000cc cccccccc hhhhhhhh 00ssssss"; and invalid drives are * indicated by returning the geometry of a "1.2M" PC-format floppy * disk. And, incidentally, what is returned is not the geometry as * such but the highest valid cylinder, head, and sector numbers. */ u_int32_t bd_getbigeom(int bunit) { v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x800; v86.edx = 0x80 + bunit; v86int(); if (V86_CY(v86.efl)) return 0x4f010f; return ((v86.ecx & 0xc0) << 18) | ((v86.ecx & 0xff00) << 8) | (v86.edx & 0xff00) | (v86.ecx & 0x3f); } /* * Return a suitable dev_t value for (dev). * * In the case where it looks like (dev) is a SCSI disk, we allow the number of * IDE disks to be specified in $num_ide_disks. There should be a Better Way. */ int bd_getdev(struct i386_devdesc *d) { struct disk_devdesc *dev; int biosdev; int major; int rootdev; char *nip, *cp; int i, unit; dev = (struct disk_devdesc *)d; biosdev = bd_unit2bios(dev->d_unit); DEBUG("unit %d BIOS device %d", dev->d_unit, biosdev); if (biosdev == -1) /* not a BIOS device */ return(-1); if (disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize, BD(dev).bd_sectorsize,(BD(dev).bd_flags & BD_FLOPPY) ? DISK_F_NOCACHE: 0) != 0) /* oops, not a viable device */ return (-1); else disk_close(dev); if (biosdev < 0x80) { /* floppy (or emulated floppy) or ATAPI device */ if (bdinfo[dev->d_unit].bd_type == DT_ATAPI) { /* is an ATAPI disk */ major = WFDMAJOR; } else { /* is a floppy disk */ major = FDMAJOR; } } else { /* assume an IDE disk */ major = WDMAJOR; } /* default root disk unit number */ unit = biosdev & 0x7f; /* XXX a better kludge to set the root disk unit number */ if ((nip = getenv("root_disk_unit")) != NULL) { i = strtol(nip, &cp, 0); /* check for parse error */ if ((cp != nip) && (*cp == 0)) unit = i; } rootdev = MAKEBOOTDEV(major, dev->d_slice + 1, unit, dev->d_partition); DEBUG("dev is 0x%x\n", rootdev); return(rootdev); } #ifdef LOADER_GELI_SUPPORT int bios_read(void *vdev __unused, struct dsk *priv, off_t off, char *buf, size_t bytes) { struct disk_devdesc dev; dev.d_dev = &biosdisk; dev.d_type = priv->type; dev.d_unit = priv->unit; dev.d_slice = priv->slice; dev.d_partition = priv->part; dev.d_offset = priv->start; off = off / BIOSDISK_SECSIZE; /* GELI gives us the offset relative to the partition start */ off += dev.d_offset; bytes = bytes / BIOSDISK_SECSIZE; return (bd_io(&dev, off, bytes, buf, 0)); } #endif /* LOADER_GELI_SUPPORT */ Index: head/sys/boot/i386/zfsboot/zfsboot.c =================================================================== --- head/sys/boot/i386/zfsboot/zfsboot.c (revision 297628) +++ head/sys/boot/i386/zfsboot/zfsboot.c (revision 297629) @@ -1,919 +1,950 @@ /*- * Copyright (c) 1998 Robert Nordier * All rights reserved. * * Redistribution and use in source and binary forms are freely * permitted provided that the above copyright notice and this * paragraph and the following disclaimer are duplicated in all * such forms. * * This software is provided "AS IS" and without any express or * implied warranties, including, without limitation, the implied * warranties of merchantability and fitness for a particular * purpose. */ #include __FBSDID("$FreeBSD$"); #include #include #include #ifdef GPT #include #endif #include #include #include #include #include #include #include #include #include #include "lib.h" #include "rbx.h" #include "drv.h" #include "util.h" #include "cons.h" #include "bootargs.h" #include "paths.h" #include "libzfs.h" -#define ARGS 0x900 -#define NOPT 14 -#define NDEV 3 +#define ARGS 0x900 +#define NOPT 14 +#define NDEV 3 -#define BIOS_NUMDRIVES 0x475 -#define DRV_HARD 0x80 -#define DRV_MASK 0x7f +#define BIOS_NUMDRIVES 0x475 +#define DRV_HARD 0x80 +#define DRV_MASK 0x7f -#define TYPE_AD 0 -#define TYPE_DA 1 -#define TYPE_MAXHARD TYPE_DA -#define TYPE_FD 2 +#define TYPE_AD 0 +#define TYPE_DA 1 +#define TYPE_MAXHARD TYPE_DA +#define TYPE_FD 2 +#define DEV_GELIBOOT_BSIZE 4096 + extern uint32_t _end; #ifdef GPT static const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS; #endif static const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */ static const unsigned char flags[NOPT] = { RBX_DUAL, RBX_SERIAL, RBX_ASKNAME, RBX_CDROM, RBX_CONFIG, RBX_KDB, RBX_GDB, RBX_MUTE, RBX_NOINTR, RBX_PAUSE, RBX_QUIET, RBX_DFLTROOT, RBX_SINGLE, RBX_VERBOSE }; uint32_t opts; static const char *const dev_nm[NDEV] = {"ad", "da", "fd"}; static const unsigned char dev_maj[NDEV] = {30, 4, 2}; static char cmd[512]; static char cmddup[512]; static char kname[1024]; static char rootname[256]; static int comspeed = SIOSPD; static struct bootinfo bootinfo; static uint32_t bootdev; static struct zfs_boot_args zfsargs; static struct zfsmount zfsmount; vm_offset_t high_heap_base; uint32_t bios_basemem, bios_extmem, high_heap_size; static struct bios_smap smap; /* * The minimum amount of memory to reserve in bios_extmem for the heap. */ -#define HEAP_MIN (3 * 1024 * 1024) +#define HEAP_MIN (3 * 1024 * 1024) static char *heap_next; static char *heap_end; /* Buffers that must not span a 64k boundary. */ -#define READ_BUF_SIZE 8192 +#define READ_BUF_SIZE 8192 struct dmadat { char rdbuf[READ_BUF_SIZE]; /* for reading large things */ char secbuf[READ_BUF_SIZE]; /* for MBR/disklabel */ }; static struct dmadat *dmadat; void exit(int); static void load(void); static int parse(void); static void bios_getmem(void); void *malloc(size_t n); void free(void *ptr); void * malloc(size_t n) { char *p = heap_next; if (p + n > heap_end) { printf("malloc failure\n"); for (;;) ; /* NOTREACHED */ return (0); } heap_next += n; return (p); } void free(void *ptr) { return; } static char * strdup(const char *s) { char *p = malloc(strlen(s) + 1); strcpy(p, s); return (p); } #ifdef LOADER_GELI_SUPPORT #include "geliboot.c" static char gelipw[GELI_PW_MAXLEN]; #endif #include "zfsimpl.c" /* * Read from a dnode (which must be from a ZPL filesystem). */ static int zfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size) { const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus; size_t n; int rc; n = size; if (*offp + n > zp->zp_size) n = zp->zp_size - *offp; rc = dnode_read(spa, dnode, *offp, start, n); if (rc) return (-1); *offp += n; return (n); } /* * Current ZFS pool */ static spa_t *spa; static spa_t *primary_spa; static vdev_t *primary_vdev; /* * A wrapper for dskread that doesn't have to worry about whether the * buffer pointer crosses a 64k boundary. */ static int vdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) { char *p; - daddr_t lba; - unsigned int nb; + daddr_t lba, alignlba; + off_t alignoff, diff; + unsigned int nb, alignnb; struct dsk *dsk = (struct dsk *) priv; if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1))) return -1; p = buf; lba = off / DEV_BSIZE; lba += dsk->start; + /* Align reads to 4k else 4k sector GELIs will not decrypt. */ + alignoff = off & ~ (off_t)(DEV_GELIBOOT_BSIZE - 1); + /* Round LBA down to nearest multiple of DEV_GELIBOOT_BSIZE bytes. */ + alignlba = alignoff / DEV_BSIZE; + /* + * The read must be aligned to DEV_GELIBOOT_BSIZE bytes relative to the + * start of the GELI partition, not the start of the actual disk. + */ + alignlba += dsk->start; + diff = (lba - alignlba) * DEV_BSIZE; + while (bytes > 0) { nb = bytes / DEV_BSIZE; if (nb > READ_BUF_SIZE / DEV_BSIZE) nb = READ_BUF_SIZE / DEV_BSIZE; - if (drvread(dsk, dmadat->rdbuf, lba, nb)) + /* + * Ensure that the read size plus the leading offset does not + * exceed the size of the read buffer. + */ + if (nb * DEV_BSIZE + diff > READ_BUF_SIZE) + nb -= diff / DEV_BSIZE; + /* + * Round the number of blocks to read up to the nearest multiple + * of DEV_GELIBOOT_BSIZE. + */ + alignnb = nb + (diff / DEV_BSIZE) + + (DEV_GELIBOOT_BSIZE / DEV_BSIZE - 1) & ~ + (unsigned int)(DEV_GELIBOOT_BSIZE / DEV_BSIZE - 1); + + if (drvread(dsk, dmadat->rdbuf, alignlba, alignnb)) return -1; #ifdef LOADER_GELI_SUPPORT /* decrypt */ if (is_geli(dsk) == 0) { - if (geli_read(dsk, ((lba - dsk->start) * DEV_BSIZE), - dmadat->rdbuf, nb * DEV_BSIZE)) - return (-1); + if (geli_read(dsk, ((alignlba - dsk->start) * + DEV_BSIZE), dmadat->rdbuf, alignnb * DEV_BSIZE)) + return (-1); } #endif - memcpy(p, dmadat->rdbuf, nb * DEV_BSIZE); + memcpy(p, dmadat->rdbuf + diff, nb * DEV_BSIZE); p += nb * DEV_BSIZE; lba += nb; + alignlba += alignnb; bytes -= nb * DEV_BSIZE; + /* Don't need the leading offset after the first block. */ + diff = 0; } return 0; } static int xfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte) { if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) { printf("Invalid format\n"); return -1; } return 0; } static void bios_getmem(void) { uint64_t size; /* Parse system memory map */ v86.ebx = 0; do { v86.ctl = V86_FLAGS; v86.addr = 0x15; /* int 0x15 function 0xe820*/ v86.eax = 0xe820; v86.ecx = sizeof(struct bios_smap); v86.edx = SMAP_SIG; v86.es = VTOPSEG(&smap); v86.edi = VTOPOFF(&smap); v86int(); if (V86_CY(v86.efl) || (v86.eax != SMAP_SIG)) break; /* look for a low-memory segment that's large enough */ if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) && (smap.length >= (512 * 1024))) bios_basemem = smap.length; /* look for the first segment in 'extended' memory */ if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0x100000)) { bios_extmem = smap.length; } /* * Look for the largest segment in 'extended' memory beyond * 1MB but below 4GB. */ if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) && (smap.base < 0x100000000ull)) { size = smap.length; /* * If this segment crosses the 4GB boundary, truncate it. */ if (smap.base + size > 0x100000000ull) size = 0x100000000ull - smap.base; if (size > high_heap_size) { high_heap_size = size; high_heap_base = smap.base; } } } while (v86.ebx != 0); /* Fall back to the old compatibility function for base memory */ if (bios_basemem == 0) { v86.ctl = 0; v86.addr = 0x12; /* int 0x12 */ v86int(); bios_basemem = (v86.eax & 0xffff) * 1024; } /* Fall back through several compatibility functions for extended memory */ if (bios_extmem == 0) { v86.ctl = V86_FLAGS; v86.addr = 0x15; /* int 0x15 function 0xe801*/ v86.eax = 0xe801; v86int(); if (!V86_CY(v86.efl)) { bios_extmem = ((v86.ecx & 0xffff) + ((v86.edx & 0xffff) * 64)) * 1024; } } if (bios_extmem == 0) { v86.ctl = 0; v86.addr = 0x15; /* int 0x15 function 0x88*/ v86.eax = 0x8800; v86int(); bios_extmem = (v86.eax & 0xffff) * 1024; } /* * If we have extended memory and did not find a suitable heap * region in the SMAP, use the last 3MB of 'extended' memory as a * high heap candidate. */ if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) { high_heap_size = HEAP_MIN; high_heap_base = bios_extmem + 0x100000 - HEAP_MIN; } } /* * Try to detect a device supported by the legacy int13 BIOS */ static int int13probe(int drive) { v86.ctl = V86_FLAGS; v86.addr = 0x13; v86.eax = 0x800; v86.edx = drive; v86int(); if (!V86_CY(v86.efl) && /* carry clear */ ((v86.edx & 0xff) != (drive & DRV_MASK))) { /* unit # OK */ if ((v86.ecx & 0x3f) == 0) { /* absurd sector size */ return(0); /* skip device */ } return (1); } return(0); } /* * We call this when we find a ZFS vdev - ZFS consumes the dsk * structure so we must make a new one. */ static struct dsk * copy_dsk(struct dsk *dsk) { struct dsk *newdsk; newdsk = malloc(sizeof(struct dsk)); *newdsk = *dsk; return (newdsk); } static void probe_drive(struct dsk *dsk) { #ifdef GPT struct gpt_hdr hdr; struct gpt_ent *ent; unsigned part, entries_per_sec; #endif daddr_t slba, elba; struct dos_partition *dp; char *sec; unsigned i; /* * If we find a vdev on the whole disk, stop here. */ if (vdev_probe(vdev_read, dsk, NULL) == 0) return; #ifdef LOADER_GELI_SUPPORT /* * Taste the disk, if it is GELI encrypted, decrypt it and check to see if * it is a usable vdev then. Otherwise dig * out the partition table and probe each slice/partition * in turn for a vdev or GELI encrypted vdev. */ elba = drvsize(dsk); if (elba > 0) { elba--; } if (geli_taste(vdev_read, dsk, elba) == 0) { if (geli_passphrase(&gelipw, dsk->unit, ':', 0, dsk) == 0) { if (vdev_probe(vdev_read, dsk, NULL) == 0) { return; } } } #endif /* LOADER_GELI_SUPPORT */ sec = dmadat->secbuf; dsk->start = 0; #ifdef GPT /* * First check for GPT. */ if (drvread(dsk, sec, 1, 1)) { return; } memcpy(&hdr, sec, sizeof(hdr)); if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 || hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 || hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) { goto trymbr; } /* * Probe all GPT partitions for the presense of ZFS pools. We * return the spa_t for the first we find (if requested). This * will have the effect of booting from the first pool on the * disk. * * If no vdev is found, GELI decrypting the device and try again */ entries_per_sec = DEV_BSIZE / hdr.hdr_entsz; slba = hdr.hdr_lba_table; elba = slba + hdr.hdr_entries / entries_per_sec; while (slba < elba) { dsk->start = 0; if (drvread(dsk, sec, slba, 1)) return; for (part = 0; part < entries_per_sec; part++) { ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz); if (memcmp(&ent->ent_type, &freebsd_zfs_uuid, sizeof(uuid_t)) == 0) { dsk->start = ent->ent_lba_start; dsk->slice = part + 1; dsk->part = 255; if (vdev_probe(vdev_read, dsk, NULL) == 0) { /* * This slice had a vdev. We need a new dsk * structure now since the vdev now owns this one. */ dsk = copy_dsk(dsk); } #ifdef LOADER_GELI_SUPPORT else if (geli_taste(vdev_read, dsk, ent->ent_lba_end - ent->ent_lba_start) == 0) { if (geli_passphrase(&gelipw, dsk->unit, 'p', dsk->slice, dsk) == 0) { /* * This slice has GELI, check it for ZFS. */ if (vdev_probe(vdev_read, dsk, NULL) == 0) { /* * This slice had a vdev. We need a new dsk * structure now since the vdev now owns this one. */ dsk = copy_dsk(dsk); } break; } } #endif /* LOADER_GELI_SUPPORT */ } } slba++; } return; trymbr: #endif /* GPT */ if (drvread(dsk, sec, DOSBBSECTOR, 1)) return; dp = (void *)(sec + DOSPARTOFF); for (i = 0; i < NDOSPART; i++) { if (!dp[i].dp_typ) continue; dsk->start = dp[i].dp_start; dsk->slice = i + 1; if (vdev_probe(vdev_read, dsk, NULL) == 0) { dsk = copy_dsk(dsk); } #ifdef LOADER_GELI_SUPPORT else if (geli_taste(vdev_read, dsk, dp[i].dp_size - dp[i].dp_start) == 0) { if (geli_passphrase(&gelipw, dsk->unit, 's', i, dsk) == 0) { /* * This slice has GELI, check it for ZFS. */ if (vdev_probe(vdev_read, dsk, NULL) == 0) { /* * This slice had a vdev. We need a new dsk * structure now since the vdev now owns this one. */ dsk = copy_dsk(dsk); } break; } } #endif /* LOADER_GELI_SUPPORT */ } } int main(void) { int autoboot, i; dnode_phys_t dn; off_t off; struct dsk *dsk; dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base); bios_getmem(); if (high_heap_size > 0) { heap_end = PTOV(high_heap_base + high_heap_size); heap_next = PTOV(high_heap_base); } else { heap_next = (char *)dmadat + sizeof(*dmadat); heap_end = (char *)PTOV(bios_basemem); } dsk = malloc(sizeof(struct dsk)); dsk->drive = *(uint8_t *)PTOV(ARGS); dsk->type = dsk->drive & DRV_HARD ? TYPE_AD : TYPE_FD; dsk->unit = dsk->drive & DRV_MASK; dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1; dsk->part = 0; dsk->start = 0; dsk->init = 0; bootinfo.bi_version = BOOTINFO_VERSION; bootinfo.bi_size = sizeof(bootinfo); bootinfo.bi_basemem = bios_basemem / 1024; bootinfo.bi_extmem = bios_extmem / 1024; bootinfo.bi_memsizes_valid++; bootinfo.bi_bios_dev = dsk->drive; bootdev = MAKEBOOTDEV(dev_maj[dsk->type], dsk->slice, dsk->unit, dsk->part), /* Process configuration file */ autoboot = 1; #ifdef LOADER_GELI_SUPPORT geli_init(); #endif zfs_init(); /* * Probe the boot drive first - we will try to boot from whatever * pool we find on that drive. */ probe_drive(dsk); /* * Probe the rest of the drives that the bios knows about. This * will find any other available pools and it may fill in missing * vdevs for the boot pool. */ #ifndef VIRTUALBOX for (i = 0; i < *(unsigned char *)PTOV(BIOS_NUMDRIVES); i++) #else for (i = 0; i < MAXBDDEV; i++) #endif { if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS)) continue; if (!int13probe(i | DRV_HARD)) break; dsk = malloc(sizeof(struct dsk)); dsk->drive = i | DRV_HARD; dsk->type = dsk->drive & TYPE_AD; dsk->unit = i; dsk->slice = 0; dsk->part = 0; dsk->start = 0; dsk->init = 0; probe_drive(dsk); } /* * The first discovered pool, if any, is the pool. */ spa = spa_get_primary(); if (!spa) { printf("%s: No ZFS pools located, can't boot\n", BOOTPROG); for (;;) ; } primary_spa = spa; primary_vdev = spa_get_primary_vdev(spa); if (zfs_spa_init(spa) != 0 || zfs_mount(spa, 0, &zfsmount) != 0) { printf("%s: failed to mount default pool %s\n", BOOTPROG, spa->spa_name); autoboot = 0; } else if (zfs_lookup(&zfsmount, PATH_CONFIG, &dn) == 0 || zfs_lookup(&zfsmount, PATH_DOTCONFIG, &dn) == 0) { off = 0; zfs_read(spa, &dn, &off, cmd, sizeof(cmd)); } if (*cmd) { /* * Note that parse() is destructive to cmd[] and we also want * to honor RBX_QUIET option that could be present in cmd[]. */ memcpy(cmddup, cmd, sizeof(cmd)); if (parse()) autoboot = 0; if (!OPT_CHECK(RBX_QUIET)) printf("%s: %s\n", PATH_CONFIG, cmddup); /* Do not process this command twice */ *cmd = 0; } /* * Try to exec /boot/loader. If interrupted by a keypress, * or in case of failure, try to load a kernel directly instead. */ if (autoboot && !*kname) { memcpy(kname, PATH_LOADER_ZFS, sizeof(PATH_LOADER_ZFS)); if (!keyhit(3)) { load(); memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL)); } } /* Present the user with the boot2 prompt. */ for (;;) { if (!autoboot || !OPT_CHECK(RBX_QUIET)) { printf("\nFreeBSD/x86 boot\n"); if (zfs_rlookup(spa, zfsmount.rootobj, rootname) != 0) printf("Default: %s/<0x%llx>:%s\n" "boot: ", spa->spa_name, zfsmount.rootobj, kname); else if (rootname[0] != '\0') printf("Default: %s/%s:%s\n" "boot: ", spa->spa_name, rootname, kname); else printf("Default: %s:%s\n" "boot: ", spa->spa_name, kname); } if (ioctrl & IO_SERIAL) sio_flush(); if (!autoboot || keyhit(5)) getstr(cmd, sizeof(cmd)); else if (!autoboot || !OPT_CHECK(RBX_QUIET)) putchar('\n'); autoboot = 0; if (parse()) putchar('\a'); else load(); } } /* XXX - Needed for btxld to link the boot2 binary; do not remove. */ void exit(int x) { } static void load(void) { union { struct exec ex; Elf32_Ehdr eh; } hdr; static Elf32_Phdr ep[2]; static Elf32_Shdr es[2]; caddr_t p; dnode_phys_t dn; off_t off; uint32_t addr, x; int fmt, i, j; if (zfs_lookup(&zfsmount, kname, &dn)) { printf("\nCan't find %s\n", kname); return; } off = 0; if (xfsread(&dn, &off, &hdr, sizeof(hdr))) return; if (N_GETMAGIC(hdr.ex) == ZMAGIC) fmt = 0; else if (IS_ELF(hdr.eh)) fmt = 1; else { printf("Invalid %s\n", "format"); return; } if (fmt == 0) { addr = hdr.ex.a_entry & 0xffffff; p = PTOV(addr); off = PAGE_SIZE; if (xfsread(&dn, &off, p, hdr.ex.a_text)) return; p += roundup2(hdr.ex.a_text, PAGE_SIZE); if (xfsread(&dn, &off, p, hdr.ex.a_data)) return; p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE); bootinfo.bi_symtab = VTOP(p); memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms)); p += sizeof(hdr.ex.a_syms); if (hdr.ex.a_syms) { if (xfsread(&dn, &off, p, hdr.ex.a_syms)) return; p += hdr.ex.a_syms; if (xfsread(&dn, &off, p, sizeof(int))) return; x = *(uint32_t *)p; p += sizeof(int); x -= sizeof(int); if (xfsread(&dn, &off, p, x)) return; p += x; } } else { off = hdr.eh.e_phoff; for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) { if (xfsread(&dn, &off, ep + j, sizeof(ep[0]))) return; if (ep[j].p_type == PT_LOAD) j++; } for (i = 0; i < 2; i++) { p = PTOV(ep[i].p_paddr & 0xffffff); off = ep[i].p_offset; if (xfsread(&dn, &off, p, ep[i].p_filesz)) return; } p += roundup2(ep[1].p_memsz, PAGE_SIZE); bootinfo.bi_symtab = VTOP(p); if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) { off = hdr.eh.e_shoff + sizeof(es[0]) * (hdr.eh.e_shstrndx + 1); if (xfsread(&dn, &off, &es, sizeof(es))) return; for (i = 0; i < 2; i++) { memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size)); p += sizeof(es[i].sh_size); off = es[i].sh_offset; if (xfsread(&dn, &off, p, es[i].sh_size)) return; p += es[i].sh_size; } } addr = hdr.eh.e_entry & 0xffffff; } bootinfo.bi_esymtab = VTOP(p); bootinfo.bi_kernelname = VTOP(kname); zfsargs.size = sizeof(zfsargs); zfsargs.pool = zfsmount.spa->spa_guid; zfsargs.root = zfsmount.rootobj; zfsargs.primary_pool = primary_spa->spa_guid; #ifdef LOADER_GELI_SUPPORT bcopy(gelipw, zfsargs.gelipw, sizeof(zfsargs.gelipw)); bzero(gelipw, sizeof(gelipw)); #else zfsargs.gelipw[0] = '\0'; #endif if (primary_vdev != NULL) zfsargs.primary_vdev = primary_vdev->v_guid; else printf("failed to detect primary vdev\n"); __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK), bootdev, KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG, (uint32_t) spa->spa_guid, (uint32_t) (spa->spa_guid >> 32), VTOP(&bootinfo), zfsargs); } static int zfs_mount_ds(char *dsname) { uint64_t newroot; spa_t *newspa; char *q; q = strchr(dsname, '/'); if (q) *q++ = '\0'; newspa = spa_find_by_name(dsname); if (newspa == NULL) { printf("\nCan't find ZFS pool %s\n", dsname); return -1; } if (zfs_spa_init(newspa)) return -1; newroot = 0; if (q) { if (zfs_lookup_dataset(newspa, q, &newroot)) { printf("\nCan't find dataset %s in ZFS pool %s\n", q, newspa->spa_name); return -1; } } if (zfs_mount(newspa, newroot, &zfsmount)) { printf("\nCan't mount ZFS dataset\n"); return -1; } spa = newspa; return (0); } static int parse(void) { char *arg = cmd; char *ep, *p, *q; const char *cp; int c, i, j; while ((c = *arg++)) { if (c == ' ' || c == '\t' || c == '\n') continue; for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++); ep = p; if (*p) *p++ = 0; if (c == '-') { while ((c = *arg++)) { if (c == 'P') { if (*(uint8_t *)PTOV(0x496) & 0x10) { cp = "yes"; } else { opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL); cp = "no"; } printf("Keyboard: %s\n", cp); continue; } else if (c == 'S') { j = 0; while ((unsigned int)(i = *arg++ - '0') <= 9) j = j * 10 + i; if (j > 0 && i == -'0') { comspeed = j; break; } /* Fall through to error below ('S' not in optstr[]). */ } for (i = 0; c != optstr[i]; i++) if (i == NOPT - 1) return -1; opts ^= OPT_SET(flags[i]); } ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) : OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD; if (ioctrl & IO_SERIAL) { if (sio_init(115200 / comspeed) != 0) ioctrl &= ~IO_SERIAL; } } if (c == '?') { dnode_phys_t dn; if (zfs_lookup(&zfsmount, arg, &dn) == 0) { zap_list(spa, &dn); } return -1; } else { arg--; /* * Report pool status if the comment is 'status'. Lets * hope no-one wants to load /status as a kernel. */ if (!strcmp(arg, "status")) { spa_all_status(); return -1; } /* * If there is "zfs:" prefix simply ignore it. */ if (strncmp(arg, "zfs:", 4) == 0) arg += 4; /* * If there is a colon, switch pools. */ q = strchr(arg, ':'); if (q) { *q++ = '\0'; if (zfs_mount_ds(arg) != 0) return -1; arg = q; } if ((i = ep - arg)) { if ((size_t)i >= sizeof(kname)) return -1; memcpy(kname, arg, i + 1); } } arg = p; } return 0; }