diff --git a/stand/common/bootstrap.h b/stand/common/bootstrap.h index eb4e50203133..ea03519f5b39 100644 --- a/stand/common/bootstrap.h +++ b/stand/common/bootstrap.h @@ -1,407 +1,411 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _BOOTSTRAP_H_ #define _BOOTSTRAP_H_ +#include #include #include #include #include #include "readin.h" /* Commands and return values; nonzero return sets command_errmsg != NULL */ typedef int (bootblk_cmd_t)(int argc, char *argv[]); #define COMMAND_ERRBUFSZ (256) extern const char *command_errmsg; extern char command_errbuf[COMMAND_ERRBUFSZ]; #define CMD_OK 0 #define CMD_WARN 1 #define CMD_ERROR 2 #define CMD_CRIT 3 #define CMD_FATAL 4 /* interp.c */ void interact(void); void interp_emit_prompt(void); int interp_builtin_cmd(int argc, char *argv[]); /* Called by interp.c for interp_*.c embedded interpreters */ int interp_include(const char *); /* Execute commands from filename */ void interp_init(void); /* Initialize interpreater */ int interp_run(const char *); /* Run a single command */ /* interp_backslash.c */ char *backslash(const char *str); /* interp_parse.c */ int parse(int *argc, char ***argv, const char *str); /* boot.c */ void autoboot_maybe(void); int getrootmount(char *rootdev); /* misc.c */ char *unargv(int argc, char *argv[]); size_t strlenout(vm_offset_t str); char *strdupout(vm_offset_t str); void kern_bzero(vm_offset_t dest, size_t len); int kern_pread(readin_handle_t fd, vm_offset_t dest, size_t len, off_t off); void *alloc_pread(readin_handle_t fd, off_t off, size_t len); /* bcache.c */ void bcache_init(size_t nblks, size_t bsize); void bcache_add_dev(int); void *bcache_allocate(void); void bcache_free(void *); int bcache_strategy(void *devdata, int rw, daddr_t blk, size_t size, char *buf, size_t *rsize); /* * Disk block cache */ struct bcache_devdata { int (*dv_strategy)(void *, int, daddr_t, size_t, char *, size_t *); void *dv_devdata; void *dv_cache; }; /* * Modular console support. */ struct console { const char *c_name; const char *c_desc; int c_flags; #define C_PRESENTIN (1<<0) /* console can provide input */ #define C_PRESENTOUT (1<<1) /* console can provide output */ #define C_ACTIVEIN (1<<2) /* user wants input from console */ #define C_ACTIVEOUT (1<<3) /* user wants output to console */ #define C_WIDEOUT (1<<4) /* c_out routine groks wide chars */ /* set c_flags to match hardware */ void (* c_probe)(struct console *cp); /* reinit XXX may need more args */ int (* c_init)(int arg); /* emit c */ void (* c_out)(int c); /* wait for and return input */ int (* c_in)(void); /* return nonzero if input waiting */ int (* c_ready)(void); }; extern struct console *consoles[]; void cons_probe(void); bool cons_update_mode(bool); void autoload_font(bool); /* * Plug-and-play enumerator/configurator interface. */ struct pnphandler { const char *pp_name; /* handler/bus name */ void (*pp_enumerate)(void); /* enumerate PnP devices, add to chain */ }; struct pnpident { /* ASCII identifier, actual format varies with bus/handler */ char *id_ident; STAILQ_ENTRY(pnpident) id_link; }; struct pnpinfo { char *pi_desc; /* ASCII description, optional */ int pi_revision; /* optional revision (or -1) if not supported */ char *pi_module; /* module/args nominated to handle device */ int pi_argc; /* module arguments */ char **pi_argv; struct pnphandler *pi_handler; /* handler which detected this device */ STAILQ_HEAD(, pnpident) pi_ident; /* list of identifiers */ STAILQ_ENTRY(pnpinfo) pi_link; }; STAILQ_HEAD(pnpinfo_stql, pnpinfo); extern struct pnphandler *pnphandlers[]; /* provided by MD code */ void pnp_addident(struct pnpinfo *pi, char *ident); struct pnpinfo *pnp_allocinfo(void); void pnp_freeinfo(struct pnpinfo *pi); void pnp_addinfo(struct pnpinfo *pi); char *pnp_eisaformat(uint8_t *data); /* * < 0 - No ISA in system * == 0 - Maybe ISA, search for read data port * > 0 - ISA in system, value is read data port address */ extern int isapnp_readport; /* * Version information */ extern char bootprog_info[]; /* * Interpreter information */ extern const char bootprog_interp[]; #define INTERP_DEFINE(interpstr) \ const char bootprog_interp[] = "$Interpreter:" interpstr /* * Preloaded file metadata header. * * Metadata are allocated on our heap, and copied into kernel space * before executing the kernel. */ struct file_metadata { size_t md_size; uint16_t md_type; struct file_metadata *md_next; char md_data[1]; /* data are immediately appended */ }; struct preloaded_file; struct mod_depend; struct kernel_module { char *m_name; /* module name */ int m_version; /* module version */ /* char *m_args; */ /* arguments for the module */ struct preloaded_file *m_fp; struct kernel_module *m_next; }; /* * Preloaded file information. Depending on type, file can contain * additional units called 'modules'. * * At least one file (the kernel) must be loaded in order to boot. * The kernel is always loaded first. * * String fields (m_name, m_type) should be dynamically allocated. */ struct preloaded_file { char *f_name; /* file name */ char *f_type; /* verbose file type, eg 'ELF kernel', 'pnptable', etc. */ char *f_args; /* arguments for the file */ /* metadata that will be placed in the module directory */ struct file_metadata *f_metadata; int f_loader; /* index of the loader that read the file */ vm_offset_t f_addr; /* load address */ size_t f_size; /* file size */ struct kernel_module *f_modules; /* list of modules if any */ struct preloaded_file *f_next; /* next file */ #ifdef __amd64__ bool f_kernphys_relocatable; #endif }; struct file_format { /* * Load function must return EFTYPE if it can't handle * the module supplied */ int (*l_load)(char *, uint64_t, struct preloaded_file **); /* * Only a loader that will load a kernel (first module) * should have an exec handler */ int (*l_exec)(struct preloaded_file *); }; extern struct file_format *file_formats[]; /* supplied by consumer */ extern struct preloaded_file *preloaded_files; int mod_load(char *name, struct mod_depend *verinfo, int argc, char *argv[]); int mod_loadkld(const char *name, int argc, char *argv[]); void unload(void); struct preloaded_file *file_alloc(void); struct preloaded_file *file_findfile(const char *name, const char *type); struct file_metadata *file_findmetadata(struct preloaded_file *fp, int type); struct preloaded_file *file_loadraw(const char *name, char *type, int insert); void file_discard(struct preloaded_file *fp); void file_addmetadata(struct preloaded_file *, int, size_t, void *); int file_addmodule(struct preloaded_file *, char *, int, struct kernel_module **); void file_removemetadata(struct preloaded_file *fp); int file_addbuf(const char *name, const char *type, size_t len, void *buf); int tslog_init(void); int tslog_publish(void); vm_offset_t build_font_module(vm_offset_t); /* MI module loaders */ #ifdef __elfN /* Relocation types. */ #define ELF_RELOC_REL 1 #define ELF_RELOC_RELA 2 /* Relocation offset for some architectures */ extern uint64_t __elfN(relocation_offset); struct elf_file; typedef Elf_Addr (symaddr_fn)(struct elf_file *ef, Elf_Size symidx); int __elfN(loadfile)(char *, uint64_t, struct preloaded_file **); int __elfN(obj_loadfile)(char *, uint64_t, struct preloaded_file **); int __elfN(reloc)(struct elf_file *ef, symaddr_fn *symaddr, const void *reldata, int reltype, Elf_Addr relbase, Elf_Addr dataaddr, void *data, size_t len); int __elfN(loadfile_raw)(char *, uint64_t, struct preloaded_file **, int); int __elfN(load_modmetadata)(struct preloaded_file *, uint64_t); #endif /* * Support for commands */ struct bootblk_command { const char *c_name; const char *c_desc; bootblk_cmd_t *c_fn; }; #define COMMAND_SET(tag, key, desc, func) \ static bootblk_cmd_t func; \ static struct bootblk_command _cmd_ ## tag = { key, desc, func }; \ DATA_SET(Xcommand_set, _cmd_ ## tag) SET_DECLARE(Xcommand_set, struct bootblk_command); /* * The intention of the architecture switch is to provide a convenient * encapsulation of the interface between the bootstrap MI and MD code. * MD code may selectively populate the switch at runtime based on the * actual configuration of the target system. */ struct arch_switch { /* Automatically load modules as required by detected hardware */ int (*arch_autoload)(void); /* Locate the device for (name), return pointer to tail in (*path) */ int (*arch_getdev)(void **dev, const char *name, const char **path); /* * Copy from local address space to module address space, * similar to bcopy() */ ssize_t (*arch_copyin)(const void *, vm_offset_t, const size_t); /* * Copy to local address space from module address space, * similar to bcopy() */ ssize_t (*arch_copyout)(const vm_offset_t, void *, const size_t); /* Read from file to module address space, same semantics as read() */ ssize_t (*arch_readin)(readin_handle_t, vm_offset_t, const size_t); /* Perform ISA byte port I/O (only for systems with ISA) */ int (*arch_isainb)(int port); void (*arch_isaoutb)(int port, int value); /* * Interface to adjust the load address according to the "object" * being loaded. */ uint64_t (*arch_loadaddr)(u_int type, void *data, uint64_t addr); #define LOAD_ELF 1 /* data points to the ELF header. */ #define LOAD_RAW 2 /* data points to the file name. */ /* * Interface to inform MD code about a loaded (ELF) segment. This * can be used to flush caches and/or set up translations. */ #ifdef __elfN void (*arch_loadseg)(Elf_Ehdr *eh, Elf_Phdr *ph, uint64_t delta); #else void (*arch_loadseg)(void *eh, void *ph, uint64_t delta); #endif /* Probe ZFS pool(s), if needed. */ void (*arch_zfs_probe)(void); /* Return the hypervisor name/type or NULL if not virtualized. */ const char *(*arch_hypervisor)(void); /* For kexec-type loaders, get ksegment structure */ void (*arch_kexec_kseg_get)(int *nseg, void **kseg); }; extern struct arch_switch archsw; /* This must be provided by the MD code, but should it be in the archsw? */ void delay(int delay); void dev_cleanup(void); /* * nvstore API. */ typedef int (nvstore_getter_cb_t)(void *, const char *, void **); typedef int (nvstore_setter_cb_t)(void *, int, const char *, const void *, size_t); typedef int (nvstore_setter_str_cb_t)(void *, const char *, const char *, const char *); typedef int (nvstore_unset_cb_t)(void *, const char *); typedef int (nvstore_print_cb_t)(void *, void *); typedef int (nvstore_iterate_cb_t)(void *, int (*)(void *, void *)); typedef struct nvs_callbacks { nvstore_getter_cb_t *nvs_getter; nvstore_setter_cb_t *nvs_setter; nvstore_setter_str_cb_t *nvs_setter_str; nvstore_unset_cb_t *nvs_unset; nvstore_print_cb_t *nvs_print; nvstore_iterate_cb_t *nvs_iterate; } nvs_callbacks_t; int nvstore_init(const char *, nvs_callbacks_t *, void *); int nvstore_fini(const char *); void *nvstore_get_store(const char *); int nvstore_print(void *); int nvstore_get_var(void *, const char *, void **); int nvstore_set_var(void *, int, const char *, void *, size_t); int nvstore_set_var_from_string(void *, const char *, const char *, const char *); int nvstore_unset_var(void *, const char *); +/* common code to set currdev variable. */ +extern int mount_currdev(struct env_var *, int, const void *); + #ifndef CTASSERT #define CTASSERT(x) _Static_assert(x, "compile-time assertion failed") #endif #endif /* !_BOOTSTRAP_H_ */ diff --git a/stand/common/misc.c b/stand/common/misc.c index 9e6bea1e244c..9cb5550344ca 100644 --- a/stand/common/misc.c +++ b/stand/common/misc.c @@ -1,181 +1,203 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include /* * Concatenate the (argc) elements of (argv) into a single string, and return * a copy of same. */ char * unargv(int argc, char *argv[]) { size_t hlong; int i; char *cp; for (i = 0, hlong = 0; i < argc; i++) hlong += strlen(argv[i]) + 2; if(hlong == 0) return(NULL); cp = malloc(hlong); cp[0] = 0; for (i = 0; i < argc; i++) { strcat(cp, argv[i]); if (i < (argc - 1)) strcat(cp, " "); } return(cp); } /* * Get the length of a string in kernel space */ size_t strlenout(vm_offset_t src) { char c; size_t len; for (len = 0; ; len++) { archsw.arch_copyout(src++, &c, 1); if (c == 0) break; } return(len); } /* * Make a duplicate copy of a string in kernel space */ char * strdupout(vm_offset_t str) { char *result, *cp; result = malloc(strlenout(str) + 1); for (cp = result; ;cp++) { archsw.arch_copyout(str++, cp, 1); if (*cp == 0) break; } return(result); } /* Zero a region in kernel space. */ void kern_bzero(vm_offset_t dest, size_t len) { char buf[256]; size_t chunk, resid; bzero(buf, sizeof(buf)); resid = len; while (resid > 0) { chunk = min(sizeof(buf), resid); archsw.arch_copyin(buf, dest, chunk); resid -= chunk; dest += chunk; } } /* * Read the specified part of a file to kernel space. Unlike regular * pread, the file pointer is advanced to the end of the read data, * and it just returns 0 if successful. */ int kern_pread(readin_handle_t fd, vm_offset_t dest, size_t len, off_t off) { if (VECTX_LSEEK(fd, off, SEEK_SET) == -1) { #ifdef DEBUG printf("\nlseek failed\n"); #endif return (-1); } if ((size_t)archsw.arch_readin(fd, dest, len) != len) { #ifdef DEBUG printf("\nreadin failed\n"); #endif return (-1); } return (0); } /* * Read the specified part of a file to a malloced buffer. The file * pointer is advanced to the end of the read data. */ /* coverity[ -tainted_data_return ] */ void * alloc_pread(readin_handle_t fd, off_t off, size_t len) { void *buf; buf = malloc(len); if (buf == NULL) { #ifdef DEBUG printf("\nmalloc(%d) failed\n", (int)len); #endif errno = ENOMEM; return (NULL); } if (VECTX_LSEEK(fd, off, SEEK_SET) == -1) { #ifdef DEBUG printf("\nlseek failed\n"); #endif free(buf); return (NULL); } if ((size_t)VECTX_READ(fd, buf, len) != len) { #ifdef DEBUG printf("\nread failed\n"); #endif free(buf); return (NULL); } return (buf); } void dev_cleanup(void) { int i; /* Call cleanup routines */ for (i = 0; devsw[i] != NULL; ++i) if (devsw[i]->dv_cleanup != NULL) (devsw[i]->dv_cleanup)(); } + +/* + * mount new rootfs and unmount old, set "currdev" environment variable. + */ +int mount_currdev(struct env_var *ev, int flags, const void *value) +{ + int rv; + + /* mount new rootfs */ + rv = mount(value, "/", 0, NULL); + if (rv == 0) { + /* + * Note we unmount any previously mounted fs only after + * successfully mounting the new because we do not want to + * end up with unmounted rootfs. + */ + if (ev->ev_value != NULL) + unmount(ev->ev_value, 0); + env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL); + } + return (rv); +} diff --git a/stand/efi/boot1/zfs_module.c b/stand/efi/boot1/zfs_module.c index 583d6b1d101f..7aaca72e30f1 100644 --- a/stand/efi/boot1/zfs_module.c +++ b/stand/efi/boot1/zfs_module.c @@ -1,245 +1,245 @@ /*- * Copyright (c) 2015 Eric McCorkle * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include "boot_module.h" #include "libzfs.h" #include "zfsimpl.c" static dev_info_t *devices; uint64_t ldi_get_size(void *priv) { dev_info_t *devinfo = priv; return (devinfo->dev->Media->BlockSize * (devinfo->dev->Media->LastBlock + 1)); } static int vdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) { dev_info_t *devinfo; uint64_t lba; size_t size, remainder, rb_size, blksz; char *bouncebuf = NULL, *rb_buf; EFI_STATUS status; devinfo = (dev_info_t *)priv; lba = off / devinfo->dev->Media->BlockSize; remainder = off % devinfo->dev->Media->BlockSize; rb_buf = buf; rb_size = bytes; /* * If we have remainder from off, we need to add remainder part. * Since buffer must be multiple of the BlockSize, round it all up. */ size = roundup2(bytes + remainder, devinfo->dev->Media->BlockSize); blksz = size; if (remainder != 0 || size != bytes) { rb_size = devinfo->dev->Media->BlockSize; bouncebuf = malloc(rb_size); if (bouncebuf == NULL) { printf("vdev_read: out of memory\n"); return (-1); } rb_buf = bouncebuf; blksz = rb_size - remainder; } while (bytes > 0) { status = devinfo->dev->ReadBlocks(devinfo->dev, devinfo->dev->Media->MediaId, lba, rb_size, rb_buf); if (EFI_ERROR(status)) goto error; if (bytes < blksz) blksz = bytes; if (bouncebuf != NULL) memcpy(buf, rb_buf + remainder, blksz); buf = (void *)((uintptr_t)buf + blksz); bytes -= blksz; lba++; remainder = 0; blksz = rb_size; } free(bouncebuf); return (0); error: free(bouncebuf); DPRINTF("vdev_read: failed dev: %p, id: %u, lba: %ju, size: %zu," " rb_size: %zu, status: %lu\n", devinfo->dev, devinfo->dev->Media->MediaId, (uintmax_t)lba, bytes, rb_size, EFI_ERROR_CODE(status)); return (-1); } static EFI_STATUS probe(dev_info_t *dev) { spa_t *spa; dev_info_t *tdev; /* ZFS consumes the dev on success so we need a copy. */ tdev = malloc(sizeof(*dev)); if (tdev == NULL) { DPRINTF("Failed to allocate tdev\n"); return (EFI_OUT_OF_RESOURCES); } memcpy(tdev, dev, sizeof(*dev)); if (vdev_probe(vdev_read, NULL, tdev, &spa) != 0) { free(tdev); return (EFI_UNSUPPORTED); } dev->devdata = spa; add_device(&devices, dev); return (EFI_SUCCESS); } static EFI_STATUS load(const char *filepath, dev_info_t *devinfo, void **bufp, size_t *bufsize) { spa_t *spa; struct zfsmount zmount; dnode_phys_t dn; struct stat st; int err; void *buf; spa = devinfo->devdata; #ifdef EFI_DEBUG { CHAR16 *text = efi_devpath_name(devinfo->devpath); DPRINTF("load: '%s' spa: '%s', devpath: %S\n", filepath, spa->spa_name, text); efi_free_devpath_name(text); } #endif if ((err = zfs_spa_init(spa)) != 0) { DPRINTF("Failed to load pool '%s' (%d)\n", spa->spa_name, err); return (EFI_NOT_FOUND); } - if ((err = zfs_mount(spa, 0, &zmount)) != 0) { + if ((err = zfs_mount_impl(spa, 0, &zmount)) != 0) { DPRINTF("Failed to mount pool '%s' (%d)\n", spa->spa_name, err); return (EFI_NOT_FOUND); } if ((err = zfs_lookup(&zmount, filepath, &dn)) != 0) { if (err == ENOENT) { DPRINTF("Failed to find '%s' on pool '%s' (%d)\n", filepath, spa->spa_name, err); return (EFI_NOT_FOUND); } printf("Failed to lookup '%s' on pool '%s' (%d)\n", filepath, spa->spa_name, err); return (EFI_INVALID_PARAMETER); } if ((err = zfs_dnode_stat(spa, &dn, &st)) != 0) { printf("Failed to stat '%s' on pool '%s' (%d)\n", filepath, spa->spa_name, err); return (EFI_INVALID_PARAMETER); } buf = malloc(st.st_size); if (buf == NULL) { printf("Failed to allocate load buffer %jd for pool '%s' for '%s' ", (intmax_t)st.st_size, spa->spa_name, filepath); return (EFI_INVALID_PARAMETER); } if ((err = dnode_read(spa, &dn, 0, buf, st.st_size)) != 0) { printf("Failed to read node from %s (%d)\n", spa->spa_name, err); free(buf); return (EFI_INVALID_PARAMETER); } *bufsize = st.st_size; *bufp = buf; return (EFI_SUCCESS); } static void status(void) { spa_t *spa; spa = STAILQ_FIRST(&zfs_pools); if (spa == NULL) { printf("%s found no pools\n", zfs_module.name); return; } printf("%s found the following pools:", zfs_module.name); STAILQ_FOREACH(spa, &zfs_pools, spa_link) printf(" %s", spa->spa_name); printf("\n"); } static void init(void) { zfs_init(); } static dev_info_t * _devices(void) { return (devices); } const boot_module_t zfs_module = { .name = "ZFS", .init = init, .probe = probe, .load = load, .status = status, .devices = _devices }; diff --git a/stand/efi/libefi/devicename.c b/stand/efi/libefi/devicename.c index fbce0016338c..cdf4830697fd 100644 --- a/stand/efi/libefi/devicename.c +++ b/stand/efi/libefi/devicename.c @@ -1,216 +1,215 @@ /*- * Copyright (c) 1998 Michael Smith * Copyright (c) 2006 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include static int efi_parsedev(struct devdesc **, const char *, const char **); /* * Point (dev) at an allocated device specifier for the device matching the * path in (devspec). If it contains an explicit device specification, * use that. If not, use the default device. */ int efi_getdev(void **vdev, const char *devspec, const char **path) { struct devdesc **dev = (struct devdesc **)vdev; int rv; /* * If it looks like this is just a path and no device, then * use the current device instead. */ if (devspec == NULL || *devspec == '/' || !strchr(devspec, ':')) { rv = efi_parsedev(dev, getenv("currdev"), NULL); if (rv == 0 && path != NULL) *path = devspec; return (rv); } /* Parse the device name off the beginning of the devspec. */ return (efi_parsedev(dev, devspec, path)); } /* * Point (dev) at an allocated device specifier matching the string version * at the beginning of (devspec). Return a pointer to the remaining * text in (path). * * In all cases, the beginning of (devspec) is compared to the names * of known devices in the device switch, and then any following text * is parsed according to the rules applied to the device type. * * For disk-type devices, the syntax is: * * fs: */ static int efi_parsedev(struct devdesc **dev, const char *devspec, const char **path) { struct devdesc *idev; struct devsw *dv; int i, unit, err; char *cp; const char *np; /* minimum length check */ if (strlen(devspec) < 2) return (EINVAL); /* look for a device that matches */ for (i = 0; devsw[i] != NULL; i++) { dv = devsw[i]; if (!strncmp(devspec, dv->dv_name, strlen(dv->dv_name))) break; } if (devsw[i] == NULL) return (ENOENT); np = devspec + strlen(dv->dv_name); idev = NULL; err = 0; switch (dv->dv_type) { case DEVT_NONE: break; case DEVT_DISK: idev = malloc(sizeof(struct disk_devdesc)); if (idev == NULL) return (ENOMEM); err = disk_parsedev((struct disk_devdesc *)idev, np, path); if (err != 0) goto fail; break; #ifdef EFI_ZFS_BOOT case DEVT_ZFS: idev = malloc(sizeof(struct zfs_devdesc)); if (idev == NULL) return (ENOMEM); err = zfs_parsedev((struct zfs_devdesc*)idev, np, path); if (err != 0) goto fail; break; #endif default: idev = malloc(sizeof(struct devdesc)); if (idev == NULL) return (ENOMEM); unit = 0; cp = (char *)np; if (*np != '\0' && *np != ':') { errno = 0; unit = strtol(np, &cp, 0); if (errno != 0 || cp == np) { err = EUNIT; goto fail; } } if (*cp != '\0' && *cp != ':') { err = EINVAL; goto fail; } idev->d_unit = unit; if (path != NULL) *path = (*cp == 0) ? cp : cp + 1; break; } idev->d_dev = dv; if (dev != NULL) *dev = idev; else free(idev); return (0); fail: free(idev); return (err); } char * efi_fmtdev(void *vdev) { struct devdesc *dev = (struct devdesc *)vdev; static char buf[SPECNAMELEN + 1]; switch(dev->d_dev->dv_type) { case DEVT_NONE: strcpy(buf, "(no device)"); break; case DEVT_DISK: return (disk_fmtdev(vdev)); #ifdef EFI_ZFS_BOOT case DEVT_ZFS: return (zfs_fmtdev(dev)); #endif default: sprintf(buf, "%s%d:", dev->d_dev->dv_name, dev->d_unit); break; } return (buf); } /* * Set currdev to suit the value being supplied in (value) */ int efi_setcurrdev(struct env_var *ev, int flags, const void *value) { struct devdesc *ncurr; int rv; rv = efi_parsedev(&ncurr, value, NULL); if (rv != 0) return (rv); - free(ncurr); - env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL); - return (0); + + return (mount_currdev(ev, flags, value)); } diff --git a/stand/efi/loader/main.c b/stand/efi/loader/main.c index 32b278950745..97bdb7f31e97 100644 --- a/stand/efi/loader/main.c +++ b/stand/efi/loader/main.c @@ -1,1634 +1,1634 @@ /*- * Copyright (c) 2008-2010 Rui Paulo * Copyright (c) 2006 Marcel Moolenaar * All rights reserved. * * Copyright (c) 2016-2019 Netflix, Inc. written by M. Warner Losh * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #ifdef EFI_ZFS_BOOT #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "efizfs.h" #include "loader_efi.h" struct arch_switch archsw; /* MI/MD interface boundary */ EFI_GUID acpi = ACPI_TABLE_GUID; EFI_GUID acpi20 = ACPI_20_TABLE_GUID; EFI_GUID devid = DEVICE_PATH_PROTOCOL; EFI_GUID imgid = LOADED_IMAGE_PROTOCOL; EFI_GUID mps = MPS_TABLE_GUID; EFI_GUID netid = EFI_SIMPLE_NETWORK_PROTOCOL; EFI_GUID smbios = SMBIOS_TABLE_GUID; EFI_GUID smbios3 = SMBIOS3_TABLE_GUID; EFI_GUID dxe = DXE_SERVICES_TABLE_GUID; EFI_GUID hoblist = HOB_LIST_TABLE_GUID; EFI_GUID lzmadecomp = LZMA_DECOMPRESSION_GUID; EFI_GUID mpcore = ARM_MP_CORE_INFO_TABLE_GUID; EFI_GUID esrt = ESRT_TABLE_GUID; EFI_GUID memtype = MEMORY_TYPE_INFORMATION_TABLE_GUID; EFI_GUID debugimg = DEBUG_IMAGE_INFO_TABLE_GUID; EFI_GUID fdtdtb = FDT_TABLE_GUID; EFI_GUID inputid = SIMPLE_TEXT_INPUT_PROTOCOL; /* * Number of seconds to wait for a keystroke before exiting with failure * in the event no currdev is found. -2 means always break, -1 means * never break, 0 means poll once and then reboot, > 0 means wait for * that many seconds. "fail_timeout" can be set in the environment as * well. */ static int fail_timeout = 5; /* * Current boot variable */ UINT16 boot_current; /* * Image that we booted from. */ EFI_LOADED_IMAGE *boot_img; static bool has_keyboard(void) { EFI_STATUS status; EFI_DEVICE_PATH *path; EFI_HANDLE *hin, *hin_end, *walker; UINTN sz; bool retval = false; /* * Find all the handles that support the SIMPLE_TEXT_INPUT_PROTOCOL and * do the typical dance to get the right sized buffer. */ sz = 0; hin = NULL; status = BS->LocateHandle(ByProtocol, &inputid, 0, &sz, 0); if (status == EFI_BUFFER_TOO_SMALL) { hin = (EFI_HANDLE *)malloc(sz); status = BS->LocateHandle(ByProtocol, &inputid, 0, &sz, hin); if (EFI_ERROR(status)) free(hin); } if (EFI_ERROR(status)) return retval; /* * Look at each of the handles. If it supports the device path protocol, * use it to get the device path for this handle. Then see if that * device path matches either the USB device path for keyboards or the * legacy device path for keyboards. */ hin_end = &hin[sz / sizeof(*hin)]; for (walker = hin; walker < hin_end; walker++) { status = OpenProtocolByHandle(*walker, &devid, (void **)&path); if (EFI_ERROR(status)) continue; while (!IsDevicePathEnd(path)) { /* * Check for the ACPI keyboard node. All PNP3xx nodes * are keyboards of different flavors. Note: It is * unclear of there's always a keyboard node when * there's a keyboard controller, or if there's only one * when a keyboard is detected at boot. */ if (DevicePathType(path) == ACPI_DEVICE_PATH && (DevicePathSubType(path) == ACPI_DP || DevicePathSubType(path) == ACPI_EXTENDED_DP)) { ACPI_HID_DEVICE_PATH *acpi; acpi = (ACPI_HID_DEVICE_PATH *)(void *)path; if ((EISA_ID_TO_NUM(acpi->HID) & 0xff00) == 0x300 && (acpi->HID & 0xffff) == PNP_EISA_ID_CONST) { retval = true; goto out; } /* * Check for USB keyboard node, if present. Unlike a * PS/2 keyboard, these definitely only appear when * connected to the system. */ } else if (DevicePathType(path) == MESSAGING_DEVICE_PATH && DevicePathSubType(path) == MSG_USB_CLASS_DP) { USB_CLASS_DEVICE_PATH *usb; usb = (USB_CLASS_DEVICE_PATH *)(void *)path; if (usb->DeviceClass == 3 && /* HID */ usb->DeviceSubClass == 1 && /* Boot devices */ usb->DeviceProtocol == 1) { /* Boot keyboards */ retval = true; goto out; } } path = NextDevicePathNode(path); } } out: free(hin); return retval; } static void set_currdev(const char *devname) { + env_setenv("currdev", EV_VOLATILE, devname, efi_setcurrdev, + env_nounset); /* - * Don't execute hooks here; we may need to try setting these more than - * once here if we're probing for the ZFS pool we're supposed to boot. - * The currdev hook is intended to just validate user input anyways, - * while the loaddev hook makes it immutable once we've determined what - * the proper currdev is. + * Don't execute hook here; the loaddev hook makes it immutable + * once we've determined what the proper currdev is. */ - env_setenv("currdev", EV_VOLATILE | EV_NOHOOK, devname, efi_setcurrdev, - env_nounset); env_setenv("loaddev", EV_VOLATILE | EV_NOHOOK, devname, env_noset, env_nounset); } static void set_currdev_devdesc(struct devdesc *currdev) { const char *devname; devname = efi_fmtdev(currdev); printf("Setting currdev to %s\n", devname); set_currdev(devname); } static void set_currdev_devsw(struct devsw *dev, int unit) { struct devdesc currdev; currdev.d_dev = dev; currdev.d_unit = unit; set_currdev_devdesc(&currdev); } static void set_currdev_pdinfo(pdinfo_t *dp) { /* * Disks are special: they have partitions. if the parent * pointer is non-null, we're a partition not a full disk * and we need to adjust currdev appropriately. */ if (dp->pd_devsw->dv_type == DEVT_DISK) { struct disk_devdesc currdev; currdev.dd.d_dev = dp->pd_devsw; if (dp->pd_parent == NULL) { currdev.dd.d_unit = dp->pd_unit; currdev.d_slice = D_SLICENONE; currdev.d_partition = D_PARTNONE; } else { currdev.dd.d_unit = dp->pd_parent->pd_unit; currdev.d_slice = dp->pd_unit; currdev.d_partition = D_PARTISGPT; /* XXX Assumes GPT */ } set_currdev_devdesc((struct devdesc *)&currdev); } else { set_currdev_devsw(dp->pd_devsw, dp->pd_unit); } } static bool sanity_check_currdev(void) { struct stat st; return (stat(PATH_DEFAULTS_LOADER_CONF, &st) == 0 || #ifdef PATH_BOOTABLE_TOKEN stat(PATH_BOOTABLE_TOKEN, &st) == 0 || /* non-standard layout */ #endif stat(PATH_KERNEL, &st) == 0); } #ifdef EFI_ZFS_BOOT static bool probe_zfs_currdev(uint64_t guid) { char *devname; struct zfs_devdesc currdev; char *buf = NULL; bool rv; currdev.dd.d_dev = &zfs_dev; currdev.dd.d_unit = 0; currdev.pool_guid = guid; currdev.root_guid = 0; set_currdev_devdesc((struct devdesc *)&currdev); devname = efi_fmtdev(&currdev); init_zfs_boot_options(devname); rv = sanity_check_currdev(); if (rv) { buf = malloc(VDEV_PAD_SIZE); if (buf != NULL) { if (zfs_get_bootonce(&currdev, OS_BOOTONCE, buf, VDEV_PAD_SIZE) == 0) { printf("zfs bootonce: %s\n", buf); set_currdev(buf); setenv("zfs-bootonce", buf, 1); } free(buf); (void) zfs_attach_nvstore(&currdev); } } return (rv); } #endif static bool try_as_currdev(pdinfo_t *hd, pdinfo_t *pp) { uint64_t guid; #ifdef EFI_ZFS_BOOT /* * If there's a zpool on this device, try it as a ZFS * filesystem, which has somewhat different setup than all * other types of fs due to imperfect loader integration. * This all stems from ZFS being both a device (zpool) and * a filesystem, plus the boot env feature. */ if (efizfs_get_guid_by_handle(pp->pd_handle, &guid)) return (probe_zfs_currdev(guid)); #endif /* * All other filesystems just need the pdinfo * initialized in the standard way. */ set_currdev_pdinfo(pp); return (sanity_check_currdev()); } /* * Sometimes we get filenames that are all upper case * and/or have backslashes in them. Filter all this out * if it looks like we need to do so. */ static void fix_dosisms(char *p) { while (*p) { if (isupper(*p)) *p = tolower(*p); else if (*p == '\\') *p = '/'; p++; } } #define SIZE(dp, edp) (size_t)((intptr_t)(void *)edp - (intptr_t)(void *)dp) enum { BOOT_INFO_OK = 0, BAD_CHOICE = 1, NOT_SPECIFIC = 2 }; static int match_boot_info(char *boot_info, size_t bisz) { uint32_t attr; uint16_t fplen; size_t len; char *walker, *ep; EFI_DEVICE_PATH *dp, *edp, *first_dp, *last_dp; pdinfo_t *pp; CHAR16 *descr; char *kernel = NULL; FILEPATH_DEVICE_PATH *fp; struct stat st; CHAR16 *text; /* * FreeBSD encodes its boot loading path into the boot loader * BootXXXX variable. We look for the last one in the path * and use that to load the kernel. However, if we only find * one DEVICE_PATH, then there's nothing specific and we should * fall back. * * In an ideal world, we'd look at the image handle we were * passed, match up with the loader we are and then return the * next one in the path. This would be most flexible and cover * many chain booting scenarios where you need to use this * boot loader to get to the next boot loader. However, that * doesn't work. We rarely have the path to the image booted * (just the device) so we can't count on that. So, we do the * next best thing: we look through the device path(s) passed * in the BootXXXX variable. If there's only one, we return * NOT_SPECIFIC. Otherwise, we look at the last one and try to * load that. If we can, we return BOOT_INFO_OK. Otherwise we * return BAD_CHOICE for the caller to sort out. */ if (bisz < sizeof(attr) + sizeof(fplen) + sizeof(CHAR16)) return NOT_SPECIFIC; walker = boot_info; ep = walker + bisz; memcpy(&attr, walker, sizeof(attr)); walker += sizeof(attr); memcpy(&fplen, walker, sizeof(fplen)); walker += sizeof(fplen); descr = (CHAR16 *)(intptr_t)walker; len = ucs2len(descr); walker += (len + 1) * sizeof(CHAR16); last_dp = first_dp = dp = (EFI_DEVICE_PATH *)walker; edp = (EFI_DEVICE_PATH *)(walker + fplen); if ((char *)edp > ep) return NOT_SPECIFIC; while (dp < edp && SIZE(dp, edp) > sizeof(EFI_DEVICE_PATH)) { text = efi_devpath_name(dp); if (text != NULL) { printf(" BootInfo Path: %S\n", text); efi_free_devpath_name(text); } last_dp = dp; dp = (EFI_DEVICE_PATH *)((char *)dp + efi_devpath_length(dp)); } /* * If there's only one item in the list, then nothing was * specified. Or if the last path doesn't have a media * path in it. Those show up as various VenHw() nodes * which are basically opaque to us. Don't count those * as something specifc. */ if (last_dp == first_dp) { printf("Ignoring Boot%04x: Only one DP found\n", boot_current); return NOT_SPECIFIC; } if (efi_devpath_to_media_path(last_dp) == NULL) { printf("Ignoring Boot%04x: No Media Path\n", boot_current); return NOT_SPECIFIC; } /* * OK. At this point we either have a good path or a bad one. * Let's check. */ pp = efiblk_get_pdinfo_by_device_path(last_dp); if (pp == NULL) { printf("Ignoring Boot%04x: Device Path not found\n", boot_current); return BAD_CHOICE; } set_currdev_pdinfo(pp); if (!sanity_check_currdev()) { printf("Ignoring Boot%04x: sanity check failed\n", boot_current); return BAD_CHOICE; } /* * OK. We've found a device that matches, next we need to check the last * component of the path. If it's a file, then we set the default kernel * to that. Otherwise, just use this as the default root. * * Reminder: we're running very early, before we've parsed the defaults * file, so we may need to have a hack override. */ dp = efi_devpath_last_node(last_dp); if (DevicePathType(dp) != MEDIA_DEVICE_PATH || DevicePathSubType(dp) != MEDIA_FILEPATH_DP) { printf("Using Boot%04x for root partition\n", boot_current); return (BOOT_INFO_OK); /* use currdir, default kernel */ } fp = (FILEPATH_DEVICE_PATH *)dp; ucs2_to_utf8(fp->PathName, &kernel); if (kernel == NULL) { printf("Not using Boot%04x: can't decode kernel\n", boot_current); return (BAD_CHOICE); } if (*kernel == '\\' || isupper(*kernel)) fix_dosisms(kernel); if (stat(kernel, &st) != 0) { free(kernel); printf("Not using Boot%04x: can't find %s\n", boot_current, kernel); return (BAD_CHOICE); } setenv("kernel", kernel, 1); free(kernel); text = efi_devpath_name(last_dp); if (text) { printf("Using Boot%04x %S + %s\n", boot_current, text, kernel); efi_free_devpath_name(text); } return (BOOT_INFO_OK); } /* * Look at the passed-in boot_info, if any. If we find it then we need * to see if we can find ourselves in the boot chain. If we can, and * there's another specified thing to boot next, assume that the file * is loaded from / and use that for the root filesystem. If can't * find the specified thing, we must fail the boot. If we're last on * the list, then we fallback to looking for the first available / * candidate (ZFS, if there's a bootable zpool, otherwise a UFS * partition that has either /boot/defaults/loader.conf on it or * /boot/kernel/kernel (the default kernel) that we can use. * * We always fail if we can't find the right thing. However, as * a concession to buggy UEFI implementations, like u-boot, if * we have determined that the host is violating the UEFI boot * manager protocol, we'll signal the rest of the program that * a drop to the OK boot loader prompt is possible. */ static int find_currdev(bool do_bootmgr, bool is_last, char *boot_info, size_t boot_info_sz) { pdinfo_t *dp, *pp; EFI_DEVICE_PATH *devpath, *copy; EFI_HANDLE h; CHAR16 *text; struct devsw *dev; int unit; uint64_t extra; int rv; char *rootdev; /* * First choice: if rootdev is already set, use that, even if * it's wrong. */ rootdev = getenv("rootdev"); if (rootdev != NULL) { printf(" Setting currdev to configured rootdev %s\n", rootdev); set_currdev(rootdev); return (0); } /* * Second choice: If uefi_rootdev is set, translate that UEFI device * path to the loader's internal name and use that. */ do { rootdev = getenv("uefi_rootdev"); if (rootdev == NULL) break; devpath = efi_name_to_devpath(rootdev); if (devpath == NULL) break; dp = efiblk_get_pdinfo_by_device_path(devpath); efi_devpath_free(devpath); if (dp == NULL) break; printf(" Setting currdev to UEFI path %s\n", rootdev); set_currdev_pdinfo(dp); return (0); } while (0); /* * Third choice: If we can find out image boot_info, and there's * a follow-on boot image in that boot_info, use that. In this * case root will be the partition specified in that image and * we'll load the kernel specified by the file path. Should there * not be a filepath, we use the default. This filepath overrides * loader.conf. */ if (do_bootmgr) { rv = match_boot_info(boot_info, boot_info_sz); switch (rv) { case BOOT_INFO_OK: /* We found it */ return (0); case BAD_CHOICE: /* specified file not found -> error */ /* XXX do we want to have an escape hatch for last in boot order? */ return (ENOENT); } /* Nothing specified, try normal match */ } #ifdef EFI_ZFS_BOOT /* * Did efi_zfs_probe() detect the boot pool? If so, use the zpool * it found, if it's sane. ZFS is the only thing that looks for * disks and pools to boot. This may change in the future, however, * if we allow specifying which pool to boot from via UEFI variables * rather than the bootenv stuff that FreeBSD uses today. */ if (pool_guid != 0) { printf("Trying ZFS pool\n"); if (probe_zfs_currdev(pool_guid)) return (0); } #endif /* EFI_ZFS_BOOT */ /* * Try to find the block device by its handle based on the * image we're booting. If we can't find a sane partition, * search all the other partitions of the disk. We do not * search other disks because it's a violation of the UEFI * boot protocol to do so. We fail and let UEFI go on to * the next candidate. */ dp = efiblk_get_pdinfo_by_handle(boot_img->DeviceHandle); if (dp != NULL) { text = efi_devpath_name(dp->pd_devpath); if (text != NULL) { printf("Trying ESP: %S\n", text); efi_free_devpath_name(text); } set_currdev_pdinfo(dp); if (sanity_check_currdev()) return (0); if (dp->pd_parent != NULL) { pdinfo_t *espdp = dp; dp = dp->pd_parent; STAILQ_FOREACH(pp, &dp->pd_part, pd_link) { /* Already tried the ESP */ if (espdp == pp) continue; /* * Roll up the ZFS special case * for those partitions that have * zpools on them. */ text = efi_devpath_name(pp->pd_devpath); if (text != NULL) { printf("Trying: %S\n", text); efi_free_devpath_name(text); } if (try_as_currdev(dp, pp)) return (0); } } } /* * Try the device handle from our loaded image first. If that * fails, use the device path from the loaded image and see if * any of the nodes in that path match one of the enumerated * handles. Currently, this handle list is only for netboot. */ if (efi_handle_lookup(boot_img->DeviceHandle, &dev, &unit, &extra) == 0) { set_currdev_devsw(dev, unit); if (sanity_check_currdev()) return (0); } copy = NULL; devpath = efi_lookup_image_devpath(IH); while (devpath != NULL) { h = efi_devpath_handle(devpath); if (h == NULL) break; free(copy); copy = NULL; if (efi_handle_lookup(h, &dev, &unit, &extra) == 0) { set_currdev_devsw(dev, unit); if (sanity_check_currdev()) return (0); } devpath = efi_lookup_devpath(h); if (devpath != NULL) { copy = efi_devpath_trim(devpath); devpath = copy; } } free(copy); return (ENOENT); } static bool interactive_interrupt(const char *msg) { time_t now, then, last; last = 0; now = then = getsecs(); printf("%s\n", msg); if (fail_timeout == -2) /* Always break to OK */ return (true); if (fail_timeout == -1) /* Never break to OK */ return (false); do { if (last != now) { printf("press any key to interrupt reboot in %d seconds\r", fail_timeout - (int)(now - then)); last = now; } /* XXX no pause or timeout wait for char */ if (ischar()) return (true); now = getsecs(); } while (now - then < fail_timeout); return (false); } static int parse_args(int argc, CHAR16 *argv[]) { int i, j, howto; bool vargood; char var[128]; /* * Parse the args to set the console settings, etc * boot1.efi passes these in, if it can read /boot.config or /boot/config * or iPXE may be setup to pass these in. Or the optional argument in the * boot environment was used to pass these arguments in (in which case * neither /boot.config nor /boot/config are consulted). * * Loop through the args, and for each one that contains an '=' that is * not the first character, add it to the environment. This allows * loader and kernel env vars to be passed on the command line. Convert * args from UCS-2 to ASCII (16 to 8 bit) as they are copied (though this * method is flawed for non-ASCII characters). */ howto = 0; for (i = 1; i < argc; i++) { cpy16to8(argv[i], var, sizeof(var)); howto |= boot_parse_arg(var); } return (howto); } static void setenv_int(const char *key, int val) { char buf[20]; snprintf(buf, sizeof(buf), "%d", val); setenv(key, buf, 1); } /* * Parse ConOut (the list of consoles active) and see if we can find a * serial port and/or a video port. It would be nice to also walk the * ACPI name space to map the UID for the serial port to a port. The * latter is especially hard. */ int parse_uefi_con_out(void) { int how, rv; int vid_seen = 0, com_seen = 0, seen = 0; size_t sz; char buf[4096], *ep; EFI_DEVICE_PATH *node; ACPI_HID_DEVICE_PATH *acpi; UART_DEVICE_PATH *uart; bool pci_pending; how = 0; sz = sizeof(buf); rv = efi_global_getenv("ConOut", buf, &sz); if (rv != EFI_SUCCESS) rv = efi_global_getenv("ConOutDev", buf, &sz); if (rv != EFI_SUCCESS) { /* If we don't have any ConOut default to serial */ how = RB_SERIAL; goto out; } ep = buf + sz; node = (EFI_DEVICE_PATH *)buf; while ((char *)node < ep) { if (IsDevicePathEndType(node)) { if (pci_pending && vid_seen == 0) vid_seen = ++seen; } pci_pending = false; if (DevicePathType(node) == ACPI_DEVICE_PATH && (DevicePathSubType(node) == ACPI_DP || DevicePathSubType(node) == ACPI_EXTENDED_DP)) { /* Check for Serial node */ acpi = (void *)node; if (EISA_ID_TO_NUM(acpi->HID) == 0x501) { setenv_int("efi_8250_uid", acpi->UID); com_seen = ++seen; } } else if (DevicePathType(node) == MESSAGING_DEVICE_PATH && DevicePathSubType(node) == MSG_UART_DP) { com_seen = ++seen; uart = (void *)node; setenv_int("efi_com_speed", uart->BaudRate); } else if (DevicePathType(node) == ACPI_DEVICE_PATH && DevicePathSubType(node) == ACPI_ADR_DP) { /* Check for AcpiAdr() Node for video */ vid_seen = ++seen; } else if (DevicePathType(node) == HARDWARE_DEVICE_PATH && DevicePathSubType(node) == HW_PCI_DP) { /* * Note, vmware fusion has a funky console device * PciRoot(0x0)/Pci(0xf,0x0) * which we can only detect at the end since we also * have to cope with: * PciRoot(0x0)/Pci(0x1f,0x0)/Serial(0x1) * so only match it if it's last. */ pci_pending = true; } node = NextDevicePathNode(node); } /* * Truth table for RB_MULTIPLE | RB_SERIAL * Value Result * 0 Use only video console * RB_SERIAL Use only serial console * RB_MULTIPLE Use both video and serial console * (but video is primary so gets rc messages) * both Use both video and serial console * (but serial is primary so gets rc messages) * * Try to honor this as best we can. If only one of serial / video * found, then use that. Otherwise, use the first one we found. * This also implies if we found nothing, default to video. */ how = 0; if (vid_seen && com_seen) { how |= RB_MULTIPLE; if (com_seen < vid_seen) how |= RB_SERIAL; } else if (com_seen) how |= RB_SERIAL; out: return (how); } void parse_loader_efi_config(EFI_HANDLE h, const char *env_fn) { pdinfo_t *dp; struct stat st; int fd = -1; char *env = NULL; dp = efiblk_get_pdinfo_by_handle(h); if (dp == NULL) return; set_currdev_pdinfo(dp); if (stat(env_fn, &st) != 0) return; fd = open(env_fn, O_RDONLY); if (fd == -1) return; env = malloc(st.st_size + 1); if (env == NULL) goto out; if (read(fd, env, st.st_size) != st.st_size) goto out; env[st.st_size] = '\0'; boot_parse_cmdline(env); out: free(env); close(fd); } static void read_loader_env(const char *name, char *def_fn, bool once) { UINTN len; char *fn, *freeme = NULL; len = 0; fn = def_fn; if (efi_freebsd_getenv(name, NULL, &len) == EFI_BUFFER_TOO_SMALL) { freeme = fn = malloc(len + 1); if (fn != NULL) { if (efi_freebsd_getenv(name, fn, &len) != EFI_SUCCESS) { free(fn); fn = NULL; printf( "Can't fetch FreeBSD::%s we know is there\n", name); } else { /* * if tagged as 'once' delete the env variable so we * only use it once. */ if (once) efi_freebsd_delenv(name); /* * We malloced 1 more than len above, then redid the call. * so now we have room at the end of the string to NUL terminate * it here, even if the typical idium would have '- 1' here to * not overflow. len should be the same on return both times. */ fn[len] = '\0'; } } else { printf( "Can't allocate %d bytes to fetch FreeBSD::%s env var\n", len, name); } } if (fn) { printf(" Reading loader env vars from %s\n", fn); parse_loader_efi_config(boot_img->DeviceHandle, fn); } } caddr_t ptov(uintptr_t x) { return ((caddr_t)x); } EFI_STATUS main(int argc, CHAR16 *argv[]) { EFI_GUID *guid; int howto, i, uhowto; UINTN k; bool has_kbd, is_last; char *s; EFI_DEVICE_PATH *imgpath; CHAR16 *text; EFI_STATUS rv; size_t sz, bosz = 0, bisz = 0; UINT16 boot_order[100]; char boot_info[4096]; char buf[32]; bool uefi_boot_mgr; archsw.arch_autoload = efi_autoload; archsw.arch_getdev = efi_getdev; archsw.arch_copyin = efi_copyin; archsw.arch_copyout = efi_copyout; #ifdef __amd64__ archsw.arch_hypervisor = x86_hypervisor; #endif archsw.arch_readin = efi_readin; archsw.arch_zfs_probe = efi_zfs_probe; /* Get our loaded image protocol interface structure. */ (void) OpenProtocolByHandle(IH, &imgid, (void **)&boot_img); /* * Chicken-and-egg problem; we want to have console output early, but * some console attributes may depend on reading from eg. the boot * device, which we can't do yet. We can use printf() etc. once this is * done. So, we set it to the efi console, then call console init. This * gets us printf early, but also primes the pump for all future console * changes to take effect, regardless of where they come from. */ setenv("console", "efi", 1); uhowto = parse_uefi_con_out(); #if defined(__aarch64__) || defined(__arm__) || defined(__riscv) if ((uhowto & RB_SERIAL) != 0) setenv("console", "comconsole", 1); #endif cons_probe(); + /* Set up currdev variable to have hooks in place. */ + env_setenv("currdev", EV_VOLATILE, "", efi_setcurrdev, env_nounset); + /* Init the time source */ efi_time_init(); /* * Initialise the block cache. Set the upper limit. */ bcache_init(32768, 512); /* * Scan the BLOCK IO MEDIA handles then * march through the device switch probing for things. */ i = efipart_inithandles(); if (i != 0 && i != ENOENT) { printf("efipart_inithandles failed with ERRNO %d, expect " "failures\n", i); } for (i = 0; devsw[i] != NULL; i++) if (devsw[i]->dv_init != NULL) (devsw[i]->dv_init)(); /* * Detect console settings two different ways: one via the command * args (eg -h) or via the UEFI ConOut variable. */ has_kbd = has_keyboard(); howto = parse_args(argc, argv); if (!has_kbd && (howto & RB_PROBE)) howto |= RB_SERIAL | RB_MULTIPLE; howto &= ~RB_PROBE; /* * Read additional environment variables from the boot device's * "LoaderEnv" file. Any boot loader environment variable may be set * there, which are subtly different than loader.conf variables. Only * the 'simple' ones may be set so things like foo_load="YES" won't work * for two reasons. First, the parser is simplistic and doesn't grok * quotes. Second, because the variables that cause an action to happen * are parsed by the lua, 4th or whatever code that's not yet * loaded. This is relative to the root directory when loader.efi is * loaded off the UFS root drive (when chain booted), or from the ESP * when directly loaded by the BIOS. * * We also read in NextLoaderEnv if it was specified. This allows next boot * functionality to be implemented and to override anything in LoaderEnv. */ read_loader_env("LoaderEnv", "/efi/freebsd/loader.env", false); read_loader_env("NextLoaderEnv", NULL, true); /* * We now have two notions of console. howto should be viewed as * overrides. If console is already set, don't set it again. */ #define VIDEO_ONLY 0 #define SERIAL_ONLY RB_SERIAL #define VID_SER_BOTH RB_MULTIPLE #define SER_VID_BOTH (RB_SERIAL | RB_MULTIPLE) #define CON_MASK (RB_SERIAL | RB_MULTIPLE) if (strcmp(getenv("console"), "efi") == 0) { if ((howto & CON_MASK) == 0) { /* No override, uhowto is controlling and efi cons is perfect */ howto = howto | (uhowto & CON_MASK); } else if ((howto & CON_MASK) == (uhowto & CON_MASK)) { /* override matches what UEFI told us, efi console is perfect */ } else if ((uhowto & (CON_MASK)) != 0) { /* * We detected a serial console on ConOut. All possible * overrides include serial. We can't really override what efi * gives us, so we use it knowing it's the best choice. */ /* Do nothing */ } else { /* * We detected some kind of serial in the override, but ConOut * has no serial, so we have to sort out which case it really is. */ switch (howto & CON_MASK) { case SERIAL_ONLY: setenv("console", "comconsole", 1); break; case VID_SER_BOTH: setenv("console", "efi comconsole", 1); break; case SER_VID_BOTH: setenv("console", "comconsole efi", 1); break; /* case VIDEO_ONLY can't happen -- it's the first if above */ } } } /* * howto is set now how we want to export the flags to the kernel, so * set the env based on it. */ boot_howto_to_env(howto); if (efi_copy_init()) { printf("failed to allocate staging area\n"); return (EFI_BUFFER_TOO_SMALL); } if ((s = getenv("fail_timeout")) != NULL) fail_timeout = strtol(s, NULL, 10); printf("%s\n", bootprog_info); printf(" Command line arguments:"); for (i = 0; i < argc; i++) printf(" %S", argv[i]); printf("\n"); printf(" Image base: 0x%lx\n", (unsigned long)boot_img->ImageBase); printf(" EFI version: %d.%02d\n", ST->Hdr.Revision >> 16, ST->Hdr.Revision & 0xffff); printf(" EFI Firmware: %S (rev %d.%02d)\n", ST->FirmwareVendor, ST->FirmwareRevision >> 16, ST->FirmwareRevision & 0xffff); printf(" Console: %s (%#x)\n", getenv("console"), howto); /* Determine the devpath of our image so we can prefer it. */ text = efi_devpath_name(boot_img->FilePath); if (text != NULL) { printf(" Load Path: %S\n", text); efi_setenv_freebsd_wcs("LoaderPath", text); efi_free_devpath_name(text); } rv = OpenProtocolByHandle(boot_img->DeviceHandle, &devid, (void **)&imgpath); if (rv == EFI_SUCCESS) { text = efi_devpath_name(imgpath); if (text != NULL) { printf(" Load Device: %S\n", text); efi_setenv_freebsd_wcs("LoaderDev", text); efi_free_devpath_name(text); } } if (getenv("uefi_ignore_boot_mgr") != NULL) { printf(" Ignoring UEFI boot manager\n"); uefi_boot_mgr = false; } else { uefi_boot_mgr = true; boot_current = 0; sz = sizeof(boot_current); rv = efi_global_getenv("BootCurrent", &boot_current, &sz); if (rv == EFI_SUCCESS) printf(" BootCurrent: %04x\n", boot_current); else { boot_current = 0xffff; uefi_boot_mgr = false; } sz = sizeof(boot_order); rv = efi_global_getenv("BootOrder", &boot_order, &sz); if (rv == EFI_SUCCESS) { printf(" BootOrder:"); for (i = 0; i < sz / sizeof(boot_order[0]); i++) printf(" %04x%s", boot_order[i], boot_order[i] == boot_current ? "[*]" : ""); printf("\n"); is_last = boot_order[(sz / sizeof(boot_order[0])) - 1] == boot_current; bosz = sz; } else if (uefi_boot_mgr) { /* * u-boot doesn't set BootOrder, but otherwise participates in the * boot manager protocol. So we fake it here and don't consider it * a failure. */ bosz = sizeof(boot_order[0]); boot_order[0] = boot_current; is_last = true; } } /* * Next, find the boot info structure the UEFI boot manager is * supposed to setup. We need this so we can walk through it to * find where we are in the booting process and what to try to * boot next. */ if (uefi_boot_mgr) { snprintf(buf, sizeof(buf), "Boot%04X", boot_current); sz = sizeof(boot_info); rv = efi_global_getenv(buf, &boot_info, &sz); if (rv == EFI_SUCCESS) bisz = sz; else uefi_boot_mgr = false; } /* * Disable the watchdog timer. By default the boot manager sets * the timer to 5 minutes before invoking a boot option. If we * want to return to the boot manager, we have to disable the * watchdog timer and since we're an interactive program, we don't * want to wait until the user types "quit". The timer may have * fired by then. We don't care if this fails. It does not prevent * normal functioning in any way... */ BS->SetWatchdogTimer(0, 0, 0, NULL); /* * Initialize the trusted/forbidden certificates from UEFI. * They will be later used to verify the manifest(s), * which should contain hashes of verified files. * This needs to be initialized before any configuration files * are loaded. */ #ifdef EFI_SECUREBOOT ve_efi_init(); #endif /* * Try and find a good currdev based on the image that was booted. * It might be desirable here to have a short pause to allow falling * through to the boot loader instead of returning instantly to follow * the boot protocol and also allow an escape hatch for users wishing * to try something different. */ if (find_currdev(uefi_boot_mgr, is_last, boot_info, bisz) != 0) if (uefi_boot_mgr && !interactive_interrupt("Failed to find bootable partition")) return (EFI_NOT_FOUND); autoload_font(false); /* Set up the font list for console. */ efi_init_environment(); #if !defined(__arm__) for (k = 0; k < ST->NumberOfTableEntries; k++) { guid = &ST->ConfigurationTable[k].VendorGuid; if (!memcmp(guid, &smbios, sizeof(EFI_GUID))) { char buf[40]; snprintf(buf, sizeof(buf), "%p", ST->ConfigurationTable[k].VendorTable); setenv("hint.smbios.0.mem", buf, 1); smbios_detect(ST->ConfigurationTable[k].VendorTable); break; } } #endif interact(); /* doesn't return */ return (EFI_SUCCESS); /* keep compiler happy */ } COMMAND_SET(poweroff, "poweroff", "power off the system", command_poweroff); static int command_poweroff(int argc __unused, char *argv[] __unused) { int i; for (i = 0; devsw[i] != NULL; ++i) if (devsw[i]->dv_cleanup != NULL) (devsw[i]->dv_cleanup)(); RS->ResetSystem(EfiResetShutdown, EFI_SUCCESS, 0, NULL); /* NOTREACHED */ return (CMD_ERROR); } COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot); static int command_reboot(int argc, char *argv[]) { int i; for (i = 0; devsw[i] != NULL; ++i) if (devsw[i]->dv_cleanup != NULL) (devsw[i]->dv_cleanup)(); RS->ResetSystem(EfiResetCold, EFI_SUCCESS, 0, NULL); /* NOTREACHED */ return (CMD_ERROR); } COMMAND_SET(quit, "quit", "exit the loader", command_quit); static int command_quit(int argc, char *argv[]) { exit(0); return (CMD_OK); } COMMAND_SET(memmap, "memmap", "print memory map", command_memmap); static int command_memmap(int argc __unused, char *argv[] __unused) { UINTN sz; EFI_MEMORY_DESCRIPTOR *map, *p; UINTN key, dsz; UINT32 dver; EFI_STATUS status; int i, ndesc; char line[80]; sz = 0; status = BS->GetMemoryMap(&sz, 0, &key, &dsz, &dver); if (status != EFI_BUFFER_TOO_SMALL) { printf("Can't determine memory map size\n"); return (CMD_ERROR); } map = malloc(sz); status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver); if (EFI_ERROR(status)) { printf("Can't read memory map\n"); return (CMD_ERROR); } ndesc = sz / dsz; snprintf(line, sizeof(line), "%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); pager_open(); if (pager_output(line)) { pager_close(); return (CMD_OK); } for (i = 0, p = map; i < ndesc; i++, p = NextMemoryDescriptor(p, dsz)) { snprintf(line, sizeof(line), "%23s %012jx %012jx %08jx ", efi_memory_type(p->Type), (uintmax_t)p->PhysicalStart, (uintmax_t)p->VirtualStart, (uintmax_t)p->NumberOfPages); if (pager_output(line)) break; if (p->Attribute & EFI_MEMORY_UC) printf("UC "); if (p->Attribute & EFI_MEMORY_WC) printf("WC "); if (p->Attribute & EFI_MEMORY_WT) printf("WT "); if (p->Attribute & EFI_MEMORY_WB) printf("WB "); if (p->Attribute & EFI_MEMORY_UCE) printf("UCE "); if (p->Attribute & EFI_MEMORY_WP) printf("WP "); if (p->Attribute & EFI_MEMORY_RP) printf("RP "); if (p->Attribute & EFI_MEMORY_XP) printf("XP "); if (p->Attribute & EFI_MEMORY_NV) printf("NV "); if (p->Attribute & EFI_MEMORY_MORE_RELIABLE) printf("MR "); if (p->Attribute & EFI_MEMORY_RO) printf("RO "); if (pager_output("\n")) break; } pager_close(); return (CMD_OK); } COMMAND_SET(configuration, "configuration", "print configuration tables", command_configuration); static int command_configuration(int argc, char *argv[]) { UINTN i; char *name; printf("NumberOfTableEntries=%lu\n", (unsigned long)ST->NumberOfTableEntries); for (i = 0; i < ST->NumberOfTableEntries; i++) { EFI_GUID *guid; printf(" "); guid = &ST->ConfigurationTable[i].VendorGuid; if (efi_guid_to_name(guid, &name) == true) { printf(name); free(name); } else { printf("Error while translating UUID to name"); } printf(" at %p\n", ST->ConfigurationTable[i].VendorTable); } return (CMD_OK); } COMMAND_SET(mode, "mode", "change or display EFI text modes", command_mode); static int command_mode(int argc, char *argv[]) { UINTN cols, rows; unsigned int mode; int i; char *cp; EFI_STATUS status; SIMPLE_TEXT_OUTPUT_INTERFACE *conout; conout = ST->ConOut; if (argc > 1) { mode = strtol(argv[1], &cp, 0); if (cp[0] != '\0') { printf("Invalid mode\n"); return (CMD_ERROR); } status = conout->QueryMode(conout, mode, &cols, &rows); if (EFI_ERROR(status)) { printf("invalid mode %d\n", mode); return (CMD_ERROR); } status = conout->SetMode(conout, mode); if (EFI_ERROR(status)) { printf("couldn't set mode %d\n", mode); return (CMD_ERROR); } (void) cons_update_mode(true); return (CMD_OK); } printf("Current mode: %d\n", conout->Mode->Mode); for (i = 0; i <= conout->Mode->MaxMode; i++) { status = conout->QueryMode(conout, i, &cols, &rows); if (EFI_ERROR(status)) continue; printf("Mode %d: %u columns, %u rows\n", i, (unsigned)cols, (unsigned)rows); } if (i != 0) printf("Select a mode with the command \"mode \"\n"); return (CMD_OK); } COMMAND_SET(lsefi, "lsefi", "list EFI handles", command_lsefi); static int command_lsefi(int argc __unused, char *argv[] __unused) { char *name; EFI_HANDLE *buffer = NULL; EFI_HANDLE handle; UINTN bufsz = 0, i, j; EFI_STATUS status; int ret = 0; status = BS->LocateHandle(AllHandles, NULL, NULL, &bufsz, buffer); if (status != EFI_BUFFER_TOO_SMALL) { snprintf(command_errbuf, sizeof (command_errbuf), "unexpected error: %lld", (long long)status); return (CMD_ERROR); } if ((buffer = malloc(bufsz)) == NULL) { sprintf(command_errbuf, "out of memory"); return (CMD_ERROR); } status = BS->LocateHandle(AllHandles, NULL, NULL, &bufsz, buffer); if (EFI_ERROR(status)) { free(buffer); snprintf(command_errbuf, sizeof (command_errbuf), "LocateHandle() error: %lld", (long long)status); return (CMD_ERROR); } pager_open(); for (i = 0; i < (bufsz / sizeof (EFI_HANDLE)); i++) { UINTN nproto = 0; EFI_GUID **protocols = NULL; handle = buffer[i]; printf("Handle %p", handle); if (pager_output("\n")) break; /* device path */ status = BS->ProtocolsPerHandle(handle, &protocols, &nproto); if (EFI_ERROR(status)) { snprintf(command_errbuf, sizeof (command_errbuf), "ProtocolsPerHandle() error: %lld", (long long)status); continue; } for (j = 0; j < nproto; j++) { if (efi_guid_to_name(protocols[j], &name) == true) { printf(" %s", name); free(name); } else { printf("Error while translating UUID to name"); } if ((ret = pager_output("\n")) != 0) break; } BS->FreePool(protocols); if (ret != 0) break; } pager_close(); free(buffer); return (CMD_OK); } #ifdef LOADER_FDT_SUPPORT extern int command_fdt_internal(int argc, char *argv[]); /* * Since proper fdt command handling function is defined in fdt_loader_cmd.c, * and declaring it as extern is in contradiction with COMMAND_SET() macro * (which uses static pointer), we're defining wrapper function, which * calls the proper fdt handling routine. */ static int command_fdt(int argc, char *argv[]) { return (command_fdt_internal(argc, argv)); } COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt); #endif /* * Chain load another efi loader. */ static int command_chain(int argc, char *argv[]) { EFI_GUID LoadedImageGUID = LOADED_IMAGE_PROTOCOL; EFI_HANDLE loaderhandle; EFI_LOADED_IMAGE *loaded_image; EFI_STATUS status; struct stat st; struct devdesc *dev; char *name, *path; void *buf; int fd; if (argc < 2) { command_errmsg = "wrong number of arguments"; return (CMD_ERROR); } name = argv[1]; if ((fd = open(name, O_RDONLY)) < 0) { command_errmsg = "no such file"; return (CMD_ERROR); } #ifdef LOADER_VERIEXEC if (verify_file(fd, name, 0, VE_MUST, __func__) < 0) { sprintf(command_errbuf, "can't verify: %s", name); close(fd); return (CMD_ERROR); } #endif if (fstat(fd, &st) < -1) { command_errmsg = "stat failed"; close(fd); return (CMD_ERROR); } status = BS->AllocatePool(EfiLoaderCode, (UINTN)st.st_size, &buf); if (status != EFI_SUCCESS) { command_errmsg = "failed to allocate buffer"; close(fd); return (CMD_ERROR); } if (read(fd, buf, st.st_size) != st.st_size) { command_errmsg = "error while reading the file"; (void)BS->FreePool(buf); close(fd); return (CMD_ERROR); } close(fd); status = BS->LoadImage(FALSE, IH, NULL, buf, st.st_size, &loaderhandle); (void)BS->FreePool(buf); if (status != EFI_SUCCESS) { command_errmsg = "LoadImage failed"; return (CMD_ERROR); } status = OpenProtocolByHandle(loaderhandle, &LoadedImageGUID, (void **)&loaded_image); if (argc > 2) { int i, len = 0; CHAR16 *argp; for (i = 2; i < argc; i++) len += strlen(argv[i]) + 1; len *= sizeof (*argp); loaded_image->LoadOptions = argp = malloc (len); loaded_image->LoadOptionsSize = len; for (i = 2; i < argc; i++) { char *ptr = argv[i]; while (*ptr) *(argp++) = *(ptr++); *(argp++) = ' '; } *(--argv) = 0; } if (efi_getdev((void **)&dev, name, (const char **)&path) == 0) { #ifdef EFI_ZFS_BOOT struct zfs_devdesc *z_dev; #endif struct disk_devdesc *d_dev; pdinfo_t *hd, *pd; switch (dev->d_dev->dv_type) { #ifdef EFI_ZFS_BOOT case DEVT_ZFS: z_dev = (struct zfs_devdesc *)dev; loaded_image->DeviceHandle = efizfs_get_handle_by_guid(z_dev->pool_guid); break; #endif case DEVT_NET: loaded_image->DeviceHandle = efi_find_handle(dev->d_dev, dev->d_unit); break; default: hd = efiblk_get_pdinfo(dev); if (STAILQ_EMPTY(&hd->pd_part)) { loaded_image->DeviceHandle = hd->pd_handle; break; } d_dev = (struct disk_devdesc *)dev; STAILQ_FOREACH(pd, &hd->pd_part, pd_link) { /* * d_partition should be 255 */ if (pd->pd_unit == (uint32_t)d_dev->d_slice) { loaded_image->DeviceHandle = pd->pd_handle; break; } } break; } } dev_cleanup(); status = BS->StartImage(loaderhandle, NULL, NULL); if (status != EFI_SUCCESS) { command_errmsg = "StartImage failed"; free(loaded_image->LoadOptions); loaded_image->LoadOptions = NULL; status = BS->UnloadImage(loaded_image); return (CMD_ERROR); } return (CMD_ERROR); /* not reached */ } COMMAND_SET(chain, "chain", "chain load file", command_chain); extern struct in_addr servip; static int command_netserver(int argc, char *argv[]) { char *proto; n_long rootaddr; if (argc > 2) { command_errmsg = "wrong number of arguments"; return (CMD_ERROR); } if (argc < 2) { proto = netproto == NET_TFTP ? "tftp://" : "nfs://"; printf("Netserver URI: %s%s%s\n", proto, intoa(rootip.s_addr), rootpath); return (CMD_OK); } if (argc == 2) { strncpy(rootpath, argv[1], sizeof(rootpath)); rootpath[sizeof(rootpath) -1] = '\0'; if ((rootaddr = net_parse_rootpath()) != INADDR_NONE) servip.s_addr = rootip.s_addr = rootaddr; return (CMD_OK); } return (CMD_ERROR); /* not reached */ } COMMAND_SET(netserver, "netserver", "change or display netserver URI", command_netserver); diff --git a/stand/i386/gptzfsboot/Makefile b/stand/i386/gptzfsboot/Makefile index aa8b497f8f3a..09615c5f97c1 100644 --- a/stand/i386/gptzfsboot/Makefile +++ b/stand/i386/gptzfsboot/Makefile @@ -1,84 +1,84 @@ # $FreeBSD$ .include .PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/gptboot \ ${BOOTSRC}/i386/zfsboot ${BOOTSRC}/i386/common \ ${BOOTSRC}/common FILES= gptzfsboot MAN= gptzfsboot.8 BOOT_COMCONSOLE_PORT?= 0x3f8 BOOT_COMCONSOLE_SPEED?= 9600 B2SIOFMT?= 0x3 REL1= 0x700 ORG1= 0x7c00 ORG2= 0x0 CFLAGS+=-DBOOTPROG=\"gptzfsboot\" \ -O1 \ -DBOOT2 \ -DLOADER_GPT_SUPPORT \ -DLOADER_MBR_SUPPORT \ -DLOADER_ZFS_SUPPORT \ -DSIOPRT=${BOOT_COMCONSOLE_PORT} \ -DSIOFMT=${B2SIOFMT} \ -DSIOSPD=${BOOT_COMCONSOLE_SPEED} \ -I${LDRSRC} \ -I${BOOTSRC}/i386/common \ -I${BOOTSRC}/i386/libi386 \ -I${ZFSSRC} \ -I${SYSDIR}/crypto/skein \ -I${SYSDIR}/cddl/boot/zfs \ -I${SYSDIR}/contrib/openzfs/include \ -I${SYSDIR}/contrib/openzfs/include/os/freebsd/spl \ -I${SYSDIR}/contrib/openzfs/include/os/freebsd/zfs \ -I${SYSDIR}/cddl/contrib/opensolaris/common/lz4 \ -I${BOOTSRC}/i386/btx/lib \ -I${BOOTSRC}/i386/boot2 \ -DHAVE_MEMCPY -I${SRCTOP}/sys/contrib/zlib \ -Wall -Waggregate-return -Wbad-function-cast \ -Wmissing-declarations -Wmissing-prototypes -Wnested-externs \ -Wpointer-arith -Wshadow -Wstrict-prototypes -Wwrite-strings \ -Wno-pointer-sign CFLAGS.clang+= -Wno-tentative-definition-incomplete-type NO_WCAST_ALIGN= CFLAGS.gcc+= --param max-inline-insns-single=100 LD_FLAGS+=${LD_FLAGS_BIN} CLEANFILES+= gptzfsboot gptzfsboot: gptldr.bin gptzfsboot.bin ${BTXKERN} btxld -v -E ${ORG2} -f bin -b ${BTXKERN} -l gptldr.bin \ -o ${.TARGET} gptzfsboot.bin CLEANFILES+= gptldr.bin gptldr.out gptldr.o gptldr.bin: gptldr.out ${OBJCOPY} -S -O binary gptldr.out ${.TARGET} gptldr.out: gptldr.o ${LD} ${LD_FLAGS} -e start --defsym ORG=${ORG1} -T ${LDSCRIPT} -o ${.TARGET} gptldr.o -OBJS= zfsboot.o sio.o cons.o bcache.o devopen.o disk.o part.o zfs_cmd.o +OBJS= zfsboot.o sio.o cons.o bcache.o devopen.o disk.o part.o zfs_cmd.o misc.o CLEANFILES+= gptzfsboot.bin gptzfsboot.out ${OBJS} ${OPENCRYPTO_XTS} # i386 standalone support library LIBI386= ${BOOTOBJ}/i386/libi386/libi386.a gptzfsboot.bin: gptzfsboot.out ${OBJCOPY} -S -O binary gptzfsboot.out ${.TARGET} gptzfsboot.out: ${BTXCRT} ${OBJS} \ ${OPENCRYPTO_XTS} ${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBI386} ${LIBSA32} zfsboot.o: ${ZFSSRC}/zfsimpl.c .include diff --git a/stand/i386/libi386/devicename.c b/stand/i386/libi386/devicename.c index 926a2c5a850e..061f2ba9ce9f 100644 --- a/stand/i386/libi386/devicename.c +++ b/stand/i386/libi386/devicename.c @@ -1,215 +1,215 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include "bootstrap.h" #include "disk.h" #include "libi386.h" #include "libzfs.h" static int i386_parsedev(struct i386_devdesc **dev, const char *devspec, const char **path); /* * Point (dev) at an allocated device specifier for the device matching the * path in (devspec). If it contains an explicit device specification, * use that. If not, use the default device. */ int i386_getdev(void **vdev, const char *devspec, const char **path) { struct i386_devdesc **dev = (struct i386_devdesc **)vdev; int rv; /* * If it looks like this is just a path and no * device, go with the current device. */ if ((devspec == NULL) || (devspec[0] == '/') || (strchr(devspec, ':') == NULL)) { if (((rv = i386_parsedev(dev, getenv("currdev"), NULL)) == 0) && (path != NULL)) *path = devspec; return(rv); } /* * Try to parse the device name off the beginning of the devspec */ return(i386_parsedev(dev, devspec, path)); } /* * Point (dev) at an allocated device specifier matching the string version * at the beginning of (devspec). Return a pointer to the remaining * text in (path). * * In all cases, the beginning of (devspec) is compared to the names * of known devices in the device switch, and then any following text * is parsed according to the rules applied to the device type. * * For disk-type devices, the syntax is: * * disk[s][]: * */ static int i386_parsedev(struct i386_devdesc **dev, const char *devspec, const char **path) { struct i386_devdesc *idev; struct devsw *dv; int i, unit, err; char *cp; const char *np; /* minimum length check */ if (strlen(devspec) < 2) return(EINVAL); /* look for a device that matches */ for (i = 0, dv = NULL; devsw[i] != NULL; i++) { if (!strncmp(devspec, devsw[i]->dv_name, strlen(devsw[i]->dv_name))) { dv = devsw[i]; break; } } if (dv == NULL) return(ENOENT); np = (devspec + strlen(dv->dv_name)); idev = NULL; err = 0; switch(dv->dv_type) { case DEVT_NONE: break; case DEVT_DISK: idev = malloc(sizeof(struct i386_devdesc)); if (idev == NULL) return (ENOMEM); err = disk_parsedev((struct disk_devdesc *)idev, np, path); if (err != 0) goto fail; break; case DEVT_ZFS: idev = malloc(sizeof (struct zfs_devdesc)); if (idev == NULL) return (ENOMEM); err = zfs_parsedev((struct zfs_devdesc *)idev, np, path); if (err != 0) goto fail; break; default: idev = malloc(sizeof (struct devdesc)); if (idev == NULL) return (ENOMEM); unit = 0; cp = (char *)np; if (*np && (*np != ':')) { unit = strtol(np, &cp, 0); /* get unit number if present */ if (cp == np) { err = EUNIT; goto fail; } } if (*cp && (*cp != ':')) { err = EINVAL; goto fail; } idev->dd.d_unit = unit; if (path != NULL) *path = (*cp == 0) ? cp : cp + 1; break; } idev->dd.d_dev = dv; if (dev != NULL) *dev = idev; else free(idev); return(0); fail: free(idev); return(err); } char * i386_fmtdev(void *vdev) { struct i386_devdesc *dev = (struct i386_devdesc *)vdev; static char buf[128]; /* XXX device length constant? */ switch(dev->dd.d_dev->dv_type) { case DEVT_NONE: strcpy(buf, "(no device)"); break; case DEVT_CD: case DEVT_NET: sprintf(buf, "%s%d:", dev->dd.d_dev->dv_name, dev->dd.d_unit); break; case DEVT_DISK: return (disk_fmtdev(vdev)); case DEVT_ZFS: return(zfs_fmtdev(vdev)); } return(buf); } /* * Set currdev to suit the value being supplied in (value) */ int i386_setcurrdev(struct env_var *ev, int flags, const void *value) { - struct i386_devdesc *ncurr; - int rv; + struct i386_devdesc *ncurr; + int rv; - if ((rv = i386_parsedev(&ncurr, value, NULL)) != 0) - return(rv); - free(ncurr); - env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL); - return(0); + if ((rv = i386_parsedev(&ncurr, value, NULL)) != 0) + return (rv); + free(ncurr); + + return (mount_currdev(ev, flags, value)); } diff --git a/stand/i386/loader/main.c b/stand/i386/loader/main.c index 6b81ef411f7f..cb716d7a9f21 100644 --- a/stand/i386/loader/main.c +++ b/stand/i386/loader/main.c @@ -1,464 +1,468 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * MD bootstrap main() and assorted miscellaneous * commands. */ #include #include #include #include #include #include #include #include #include #include "bootstrap.h" #include "common/bootargs.h" #include "libi386/libi386.h" #include #include "btxv86.h" #ifdef LOADER_ZFS_SUPPORT #include #include "libzfs.h" #endif CTASSERT(sizeof(struct bootargs) == BOOTARGS_SIZE); CTASSERT(offsetof(struct bootargs, bootinfo) == BA_BOOTINFO); CTASSERT(offsetof(struct bootargs, bootflags) == BA_BOOTFLAGS); CTASSERT(offsetof(struct bootinfo, bi_size) == BI_SIZE); /* Arguments passed in from the boot1/boot2 loader */ static struct bootargs *kargs; static uint32_t initial_howto; static uint32_t initial_bootdev; static struct bootinfo *initial_bootinfo; struct arch_switch archsw; /* MI/MD interface boundary */ static void extract_currdev(void); static int isa_inb(int port); static void isa_outb(int port, int value); void exit(int code); #ifdef LOADER_GELI_SUPPORT #include "geliboot.h" struct geli_boot_args *gargs; struct geli_boot_data *gbdata; #endif #ifdef LOADER_ZFS_SUPPORT struct zfs_boot_args *zargs; static void i386_zfs_probe(void); #endif /* XXX debugging */ extern char end[]; static void *heap_top; static void *heap_bottom; caddr_t ptov(uintptr_t x) { return (PTOV(x)); } int main(void) { int i; /* Pick up arguments */ kargs = (void *)__args; initial_howto = kargs->howto; initial_bootdev = kargs->bootdev; initial_bootinfo = kargs->bootinfo ? (struct bootinfo *)PTOV(kargs->bootinfo) : NULL; /* Initialize the v86 register set to a known-good state. */ bzero(&v86, sizeof(v86)); v86.efl = PSL_RESERVED_DEFAULT | PSL_I; /* * Initialise the heap as early as possible. * Once this is done, malloc() is usable. */ bios_getmem(); #if defined(LOADER_BZIP2_SUPPORT) || defined(LOADER_FIREWIRE_SUPPORT) || \ defined(LOADER_GPT_SUPPORT) || defined(LOADER_ZFS_SUPPORT) if (high_heap_size > 0) { heap_top = PTOV(high_heap_base + high_heap_size); heap_bottom = PTOV(high_heap_base); if (high_heap_base < memtop_copyin) memtop_copyin = high_heap_base; } else #endif { heap_top = (void *)PTOV(bios_basemem); heap_bottom = (void *)end; } setheap(heap_bottom, heap_top); /* * Now that malloc is usable, allocate a buffer for tslog and start * logging timestamps during the boot process. */ tslog_init(); /* * detect ACPI for future reference. This may set console to comconsole * if we do have ACPI SPCR table. */ biosacpi_detect(); /* * XXX Chicken-and-egg problem; we want to have console output early, * but some console attributes may depend on reading from eg. the boot * device, which we can't do yet. * * We can use printf() etc. once this is done. * If the previous boot stage has requested a serial console, * prefer that. */ bi_setboothowto(initial_howto); if (initial_howto & RB_MULTIPLE) { if (initial_howto & RB_SERIAL) setenv("console", "comconsole vidconsole", 1); else setenv("console", "vidconsole comconsole", 1); } else if (initial_howto & RB_SERIAL) { setenv("console", "comconsole", 1); } else if (initial_howto & RB_MUTE) { setenv("console", "nullconsole", 1); } cons_probe(); + /* Set up currdev variable to have hooks in place. */ + env_setenv("currdev", EV_VOLATILE | EV_NOHOOK, "", + i386_setcurrdev, env_nounset); + /* * Initialise the block cache. Set the upper limit. */ bcache_init(32768, 512); /* * Special handling for PXE and CD booting. */ if (kargs->bootinfo == 0) { /* * We only want the PXE disk to try to init itself in the below * walk through devsw if we actually booted off of PXE. */ if (kargs->bootflags & KARGS_FLAGS_PXE) pxe_enable(kargs->pxeinfo ? PTOV(kargs->pxeinfo) : NULL); else if (kargs->bootflags & KARGS_FLAGS_CD) bc_add(initial_bootdev); } archsw.arch_autoload = i386_autoload; archsw.arch_getdev = i386_getdev; archsw.arch_copyin = i386_copyin; archsw.arch_copyout = i386_copyout; archsw.arch_readin = i386_readin; archsw.arch_isainb = isa_inb; archsw.arch_isaoutb = isa_outb; archsw.arch_hypervisor = x86_hypervisor; #ifdef LOADER_ZFS_SUPPORT archsw.arch_zfs_probe = i386_zfs_probe; /* * zfsboot and gptzfsboot have always passed KARGS_FLAGS_ZFS, * so if that is set along with KARGS_FLAGS_EXTARG we know we * can interpret the extarg data as a struct zfs_boot_args. */ #define KARGS_EXTARGS_ZFS (KARGS_FLAGS_EXTARG | KARGS_FLAGS_ZFS) if ((kargs->bootflags & KARGS_EXTARGS_ZFS) == KARGS_EXTARGS_ZFS) { zargs = (struct zfs_boot_args *)(kargs + 1); } #endif /* LOADER_ZFS_SUPPORT */ #ifdef LOADER_GELI_SUPPORT /* * If we decided earlier that we have zfs_boot_args extarg data, * and it is big enough to contain the embedded geli data * (the early zfs_boot_args structs weren't), then init the gbdata * pointer accordingly. If there is extarg data which isn't * zfs_boot_args data, determine whether it is geli_boot_args data. * Recent versions of gptboot set KARGS_FLAGS_GELI to indicate that. * Earlier versions didn't, but we presume that's what we * have if the extarg size exactly matches the size of the * geli_boot_args struct during that pre-flag era. */ #define LEGACY_GELI_ARGS_SIZE 260 /* This can never change */ #ifdef LOADER_ZFS_SUPPORT if (zargs != NULL) { if (zargs->size > offsetof(struct zfs_boot_args, gelidata)) { gbdata = &zargs->gelidata; } } else #endif /* LOADER_ZFS_SUPPORT */ if ((kargs->bootflags & KARGS_FLAGS_EXTARG) != 0) { gargs = (struct geli_boot_args *)(kargs + 1); if ((kargs->bootflags & KARGS_FLAGS_GELI) || gargs->size == LEGACY_GELI_ARGS_SIZE) { gbdata = &gargs->gelidata; } } if (gbdata != NULL) import_geli_boot_data(gbdata); #endif /* LOADER_GELI_SUPPORT */ /* * March through the device switch probing for things. */ for (i = 0; devsw[i] != NULL; i++) if (devsw[i]->dv_init != NULL) (devsw[i]->dv_init)(); printf("BIOS %dkB/%dkB available memory\n", bios_basemem / 1024, bios_extmem / 1024); if (initial_bootinfo != NULL) { initial_bootinfo->bi_basemem = bios_basemem / 1024; initial_bootinfo->bi_extmem = bios_extmem / 1024; } /* detect SMBIOS for future reference */ smbios_detect(NULL); /* detect PCI BIOS for future reference */ biospci_detect(); printf("\n%s", bootprog_info); extract_currdev(); /* set $currdev and $loaddev */ autoload_font(true); bios_getsmap(); interact(); /* if we ever get here, it is an error */ return (1); } /* * Set the 'current device' by (if possible) recovering the boot device as * supplied by the initial bootstrap. * * XXX should be extended for netbooting. */ static void extract_currdev(void) { struct i386_devdesc new_currdev; #ifdef LOADER_ZFS_SUPPORT char buf[20]; char *bootonce; #endif int biosdev = -1; /* Assume we are booting from a BIOS disk by default */ new_currdev.dd.d_dev = &bioshd; /* new-style boot loaders such as pxeldr and cdldr */ if (kargs->bootinfo == 0) { if ((kargs->bootflags & KARGS_FLAGS_CD) != 0) { /* we are booting from a CD with cdboot */ new_currdev.dd.d_dev = &bioscd; new_currdev.dd.d_unit = bd_bios2unit(initial_bootdev); } else if ((kargs->bootflags & KARGS_FLAGS_PXE) != 0) { /* we are booting from pxeldr */ new_currdev.dd.d_dev = &pxedisk; new_currdev.dd.d_unit = 0; } else { /* we don't know what our boot device is */ new_currdev.d_kind.biosdisk.slice = -1; new_currdev.d_kind.biosdisk.partition = 0; biosdev = -1; } #ifdef LOADER_ZFS_SUPPORT } else if ((kargs->bootflags & KARGS_FLAGS_ZFS) != 0) { /* * zargs was set in main() if we have new style extended * argument */ if (zargs != NULL && zargs->size >= offsetof(struct zfs_boot_args, primary_pool)) { /* sufficient data is provided */ new_currdev.d_kind.zfs.pool_guid = zargs->pool; new_currdev.d_kind.zfs.root_guid = zargs->root; if (zargs->size >= sizeof(*zargs) && zargs->primary_vdev != 0) { sprintf(buf, "%llu", zargs->primary_pool); setenv("vfs.zfs.boot.primary_pool", buf, 1); sprintf(buf, "%llu", zargs->primary_vdev); setenv("vfs.zfs.boot.primary_vdev", buf, 1); } } else { /* old style zfsboot block */ new_currdev.d_kind.zfs.pool_guid = kargs->zfspool; new_currdev.d_kind.zfs.root_guid = 0; } new_currdev.dd.d_dev = &zfs_dev; if ((bootonce = malloc(VDEV_PAD_SIZE)) != NULL) { if (zfs_get_bootonce(&new_currdev, OS_BOOTONCE_USED, bootonce, VDEV_PAD_SIZE) == 0) { setenv("zfs-bootonce", bootonce, 1); } free(bootonce); (void) zfs_attach_nvstore(&new_currdev); } #endif } else if ((initial_bootdev & B_MAGICMASK) != B_DEVMAGIC) { /* The passed-in boot device is bad */ new_currdev.d_kind.biosdisk.slice = -1; new_currdev.d_kind.biosdisk.partition = 0; biosdev = -1; } else { new_currdev.d_kind.biosdisk.slice = B_SLICE(initial_bootdev) - 1; new_currdev.d_kind.biosdisk.partition = B_PARTITION(initial_bootdev); biosdev = initial_bootinfo->bi_bios_dev; /* * If we are booted by an old bootstrap, we have to guess at * the BIOS unit number. We will lose if there is more than * one disk type and we are not booting from the * lowest-numbered disk type (ie. SCSI when IDE also exists). */ if ((biosdev == 0) && (B_TYPE(initial_bootdev) != 2)) { /* * biosdev doesn't match major, assume harddisk */ biosdev = 0x80 + B_UNIT(initial_bootdev); } } /* * If we are booting off of a BIOS disk and we didn't succeed * in determining which one we booted off of, just use disk0: * as a reasonable default. */ if ((new_currdev.dd.d_dev->dv_type == bioshd.dv_type) && ((new_currdev.dd.d_unit = bd_bios2unit(biosdev)) == -1)) { printf("Can't work out which disk we are booting " "from.\nGuessed BIOS device 0x%x not found by " "probes, defaulting to disk0:\n", biosdev); new_currdev.dd.d_unit = 0; } #ifdef LOADER_ZFS_SUPPORT if (new_currdev.dd.d_dev->dv_type == DEVT_ZFS) init_zfs_boot_options(zfs_fmtdev(&new_currdev)); #endif env_setenv("currdev", EV_VOLATILE, i386_fmtdev(&new_currdev), i386_setcurrdev, env_nounset); env_setenv("loaddev", EV_VOLATILE, i386_fmtdev(&new_currdev), env_noset, env_nounset); } COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot); static int command_reboot(int argc, char *argv[]) { int i; for (i = 0; devsw[i] != NULL; ++i) if (devsw[i]->dv_cleanup != NULL) (devsw[i]->dv_cleanup)(); printf("Rebooting...\n"); delay(1000000); __exit(0); } /* provide this for panic, as it's not in the startup code */ void exit(int code) { __exit(code); } COMMAND_SET(heap, "heap", "show heap usage", command_heap); static int command_heap(int argc, char *argv[]) { mallocstats(); printf("heap base at %p, top at %p, upper limit at %p\n", heap_bottom, sbrk(0), heap_top); return (CMD_OK); } /* ISA bus access functions for PnP. */ static int isa_inb(int port) { return (inb(port)); } static void isa_outb(int port, int value) { outb(port, value); } #ifdef LOADER_ZFS_SUPPORT static void i386_zfs_probe(void) { char devname[32]; struct i386_devdesc dev; /* * Open all the disks we can find and see if we can reconstruct * ZFS pools from them. */ dev.dd.d_dev = &bioshd; for (dev.dd.d_unit = 0; bd_unit2bios(&dev) >= 0; dev.dd.d_unit++) { snprintf(devname, sizeof(devname), "%s%d:", bioshd.dv_name, dev.dd.d_unit); zfs_probe_dev(devname, NULL); } } #endif diff --git a/stand/i386/zfsboot/Makefile b/stand/i386/zfsboot/Makefile index 7e362b43a39c..243b5e99287c 100644 --- a/stand/i386/zfsboot/Makefile +++ b/stand/i386/zfsboot/Makefile @@ -1,94 +1,94 @@ # $FreeBSD$ .include .PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/common ${BOOTSRC}/common FILES= zfsboot MAN= zfsboot.8 BOOT_COMCONSOLE_PORT?= 0x3f8 BOOT_COMCONSOLE_SPEED?= 9600 B2SIOFMT?= 0x3 REL1= 0x700 ORG1= 0x7c00 ORG2= 0x2000 CFLAGS+=-DBOOTPROG=\"zfsboot\" \ -O1 \ -DBOOT2 \ -DLOADER_GPT_SUPPORT \ -DLOADER_MBR_SUPPORT \ -DLOADER_ZFS_SUPPORT \ -DLOADER_UFS_SUPPORT \ -DSIOPRT=${BOOT_COMCONSOLE_PORT} \ -DSIOFMT=${B2SIOFMT} \ -DSIOSPD=${BOOT_COMCONSOLE_SPEED} \ -I${LDRSRC} \ -I${BOOTSRC}/i386/common \ -I${BOOTSRC}/i386/libi386 \ -I${ZFSSRC} \ -I${SYSDIR}/crypto/skein \ -I${SYSDIR}/cddl/boot/zfs \ -I${SYSDIR}/contrib/openzfs/include \ -I${SYSDIR}/contrib/openzfs/include/os/freebsd/spl \ -I${SYSDIR}/contrib/openzfs/include/os/freebsd/zfs \ -I${SYSDIR}/cddl/contrib/opensolaris/common/lz4 \ -I${BOOTSRC}/i386/boot2 \ -Wall -Waggregate-return -Wbad-function-cast -Wno-cast-align \ -Wmissing-declarations -Wmissing-prototypes -Wnested-externs \ -Wpointer-arith -Wshadow -Wstrict-prototypes -Wwrite-strings CFLAGS.part.c+= -DHAVE_MEMCPY -I${SRCTOP}/sys/contrib/zlib CFLAGS.gcc+= --param max-inline-insns-single=100 LD_FLAGS+=${LD_FLAGS_BIN} CLEANFILES+= zfsboot zfsboot: zfsboot1 zfsboot2 cat zfsboot1 zfsboot2 > zfsboot CLEANFILES+= zfsboot1 zfsldr.out zfsldr.o zfsboot1: zfsldr.out ${OBJCOPY} -S -O binary zfsldr.out ${.TARGET} zfsldr.out: zfsldr.o ${LD} ${LD_FLAGS} -e start --defsym ORG=${ORG1} -T ${LDSCRIPT} -o ${.TARGET} zfsldr.o -OBJS= zfsboot.o sio.o cons.o bcache.o devopen.o disk.o part.o zfs_cmd.o +OBJS= zfsboot.o sio.o cons.o bcache.o devopen.o disk.o part.o zfs_cmd.o misc.o CLEANFILES+= zfsboot2 zfsboot.ld zfsboot.ldr zfsboot.bin zfsboot.out \ ${OBJS} # We currently allow 256k bytes for zfsboot - in practice it could be # any size up to 3.5Mb but keeping it fixed size simplifies zfsldr. # BOOT2SIZE= 262144 # i386 standalone support library LIBI386= ${BOOTOBJ}/i386/libi386/libi386.a zfsboot2: zfsboot.ld @set -- `ls -l ${.ALLSRC}`; x=$$((${BOOT2SIZE}-$$5)); \ echo "$$x bytes available"; test $$x -ge 0 ${DD} if=${.ALLSRC} of=${.TARGET} bs=${BOOT2SIZE} conv=sync zfsboot.ld: zfsboot.ldr zfsboot.bin ${BTXKERN} btxld -v -E ${ORG2} -f bin -b ${BTXKERN} -l zfsboot.ldr \ -o ${.TARGET} -P 1 zfsboot.bin zfsboot.ldr: :> ${.TARGET} zfsboot.bin: zfsboot.out ${OBJCOPY} -S -O binary zfsboot.out ${.TARGET} zfsboot.out: ${BTXCRT} ${OBJS} ${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBI386} ${LIBSA32} SRCS= zfsboot.c .include diff --git a/stand/i386/zfsboot/zfsboot.c b/stand/i386/zfsboot/zfsboot.c index 1067c4c6cd42..8dd3066f15e5 100644 --- a/stand/i386/zfsboot/zfsboot.c +++ b/stand/i386/zfsboot/zfsboot.c @@ -1,717 +1,721 @@ /*- * Copyright (c) 1998 Robert Nordier * All rights reserved. * * Redistribution and use in source and binary forms are freely * permitted provided that the above copyright notice and this * paragraph and the following disclaimer are duplicated in all * such forms. * * This software is provided "AS IS" and without any express or * implied warranties, including, without limitation, the implied * warranties of merchantability and fitness for a particular * purpose. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #ifdef GPT #include #endif #include #include #ifdef LOADER_ZFS_SUPPORT #include #endif #include #include #include #include #include #include #include "bootstrap.h" #include "libi386.h" #include #include "lib.h" #include "rbx.h" #include "cons.h" #include "bootargs.h" #include "disk.h" #include "part.h" #include "paths.h" #include "libzfs.h" #define ARGS 0x900 #define NOPT 14 #define NDEV 3 #define BIOS_NUMDRIVES 0x475 #define DRV_HARD 0x80 #define DRV_MASK 0x7f #define TYPE_AD 0 #define TYPE_DA 1 #define TYPE_MAXHARD TYPE_DA #define TYPE_FD 2 extern uint32_t _end; static const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */ static const unsigned char flags[NOPT] = { RBX_DUAL, RBX_SERIAL, RBX_ASKNAME, RBX_CDROM, RBX_CONFIG, RBX_KDB, RBX_GDB, RBX_MUTE, RBX_NOINTR, RBX_PAUSE, RBX_QUIET, RBX_DFLTROOT, RBX_SINGLE, RBX_VERBOSE }; uint32_t opts; /* * Paths to try loading before falling back to the boot2 prompt. * * /boot/zfsloader must be tried before /boot/loader in order to remain * backward compatible with ZFS boot environments where /boot/loader exists * but does not have ZFS support, which was the case before FreeBSD 12. * * If no loader is found, try to load a kernel directly instead. */ static const struct string { const char *p; size_t len; } loadpath[] = { { PATH_LOADER_ZFS, sizeof(PATH_LOADER_ZFS) }, { PATH_LOADER, sizeof(PATH_LOADER) }, { PATH_KERNEL, sizeof(PATH_KERNEL) }, }; static const unsigned char dev_maj[NDEV] = {30, 4, 2}; static struct i386_devdesc *bdev; static char cmd[512]; static char cmddup[512]; static char kname[1024]; static int comspeed = SIOSPD; static struct bootinfo bootinfo; static uint32_t bootdev; static struct zfs_boot_args zfsargs; #ifdef LOADER_GELI_SUPPORT static struct geli_boot_args geliargs; #endif extern vm_offset_t high_heap_base; extern uint32_t bios_basemem, bios_extmem, high_heap_size; static char *heap_top; static char *heap_bottom; void exit(int); static void i386_zfs_probe(void); static void load(void); static int parse_cmd(void); #ifdef LOADER_GELI_SUPPORT #include "geliboot.h" static char gelipw[GELI_PW_MAXLEN]; #endif struct arch_switch archsw; /* MI/MD interface boundary */ static char boot_devname[2 * ZFS_MAXNAMELEN + 8]; /* disk or pool:dataset */ struct devsw *devsw[] = { &bioshd, #if defined(LOADER_ZFS_SUPPORT) &zfs_dev, #endif NULL }; struct fs_ops *file_system[] = { #if defined(LOADER_ZFS_SUPPORT) &zfs_fsops, #endif #if defined(LOADER_UFS_SUPPORT) &ufs_fsops, #endif NULL }; caddr_t ptov(uintptr_t x) { return (PTOV(x)); } int main(void); int main(void) { unsigned i; int auto_boot, fd, nextboot = 0; struct disk_devdesc devdesc; bios_getmem(); if (high_heap_size > 0) { heap_top = PTOV(high_heap_base + high_heap_size); heap_bottom = PTOV(high_heap_base); } else { heap_bottom = (char *) (roundup2(__base + (int32_t)&_end, 0x10000) - __base); heap_top = (char *)PTOV(bios_basemem); } setheap(heap_bottom, heap_top); /* * Initialise the block cache. Set the upper limit. */ bcache_init(32768, 512); archsw.arch_autoload = NULL; archsw.arch_getdev = i386_getdev; archsw.arch_copyin = NULL; archsw.arch_copyout = NULL; archsw.arch_readin = NULL; archsw.arch_isainb = NULL; archsw.arch_isaoutb = NULL; archsw.arch_zfs_probe = i386_zfs_probe; bootinfo.bi_version = BOOTINFO_VERSION; bootinfo.bi_size = sizeof(bootinfo); bootinfo.bi_basemem = bios_basemem / 1024; bootinfo.bi_extmem = bios_extmem / 1024; bootinfo.bi_memsizes_valid++; bootinfo.bi_bios_dev = *(uint8_t *)PTOV(ARGS); /* Set up fall back device name. */ snprintf(boot_devname, sizeof (boot_devname), "disk%d:", bd_bios2unit(bootinfo.bi_bios_dev)); + /* Set up currdev variable to have hooks in place. */ + env_setenv("currdev", EV_VOLATILE, "", i386_setcurrdev, + env_nounset); + for (i = 0; devsw[i] != NULL; i++) if (devsw[i]->dv_init != NULL) (devsw[i]->dv_init)(); disk_parsedev(&devdesc, boot_devname + 4, NULL); bootdev = MAKEBOOTDEV(dev_maj[DEVT_DISK], devdesc.d_slice + 1, devdesc.dd.d_unit, devdesc.d_partition >= 0 ? devdesc.d_partition : 0xff); /* * zfs_fmtdev() can be called only after dv_init */ if (bdev != NULL && bdev->dd.d_dev->dv_type == DEVT_ZFS) { /* set up proper device name string for ZFS */ strncpy(boot_devname, zfs_fmtdev(bdev), sizeof (boot_devname)); if (zfs_get_bootonce(bdev, OS_BOOTONCE, cmd, sizeof(cmd)) == 0) { nvlist_t *benv; nextboot = 1; memcpy(cmddup, cmd, sizeof(cmd)); if (parse_cmd()) { if (!OPT_CHECK(RBX_QUIET)) printf("failed to parse bootonce " "command\n"); exit(0); } if (!OPT_CHECK(RBX_QUIET)) printf("zfs bootonce: %s\n", cmddup); if (zfs_get_bootenv(bdev, &benv) == 0) { nvlist_add_string(benv, OS_BOOTONCE_USED, cmddup); zfs_set_bootenv(bdev, benv); } /* Do not process this command twice */ *cmd = 0; } } /* now make sure we have bdev on all cases */ free(bdev); i386_getdev((void **)&bdev, boot_devname, NULL); env_setenv("currdev", EV_VOLATILE, boot_devname, i386_setcurrdev, env_nounset); /* Process configuration file */ auto_boot = 1; fd = open(PATH_CONFIG, O_RDONLY); if (fd == -1) fd = open(PATH_DOTCONFIG, O_RDONLY); if (fd != -1) { ssize_t cmdlen; if ((cmdlen = read(fd, cmd, sizeof(cmd))) > 0) cmd[cmdlen] = '\0'; else *cmd = '\0'; close(fd); } if (*cmd) { /* * Note that parse_cmd() is destructive to cmd[] and we also * want to honor RBX_QUIET option that could be present in * cmd[]. */ memcpy(cmddup, cmd, sizeof(cmd)); if (parse_cmd()) auto_boot = 0; if (!OPT_CHECK(RBX_QUIET)) printf("%s: %s\n", PATH_CONFIG, cmddup); /* Do not process this command twice */ *cmd = 0; } /* Do not risk waiting at the prompt forever. */ if (nextboot && !auto_boot) exit(0); if (auto_boot && !*kname) { /* * Iterate through the list of loader and kernel paths, * trying to load. If interrupted by a keypress, or in case of * failure, drop the user to the boot2 prompt. */ for (i = 0; i < nitems(loadpath); i++) { memcpy(kname, loadpath[i].p, loadpath[i].len); if (keyhit(3)) break; load(); } } /* Present the user with the boot2 prompt. */ for (;;) { if (!auto_boot || !OPT_CHECK(RBX_QUIET)) { printf("\nFreeBSD/x86 boot\n"); printf("Default: %s%s\nboot: ", boot_devname, kname); } if (ioctrl & IO_SERIAL) sio_flush(); if (!auto_boot || keyhit(5)) getstr(cmd, sizeof(cmd)); else if (!auto_boot || !OPT_CHECK(RBX_QUIET)) putchar('\n'); auto_boot = 0; if (parse_cmd()) putchar('\a'); else load(); } } /* XXX - Needed for btxld to link the boot2 binary; do not remove. */ void exit(int x) { __exit(x); } static void load(void) { union { struct exec ex; Elf32_Ehdr eh; } hdr; static Elf32_Phdr ep[2]; static Elf32_Shdr es[2]; caddr_t p; uint32_t addr, x; int fd, fmt, i, j; ssize_t size; if ((fd = open(kname, O_RDONLY)) == -1) { printf("\nCan't find %s\n", kname); return; } size = sizeof(hdr); if (read(fd, &hdr, sizeof (hdr)) != size) { close(fd); return; } if (N_GETMAGIC(hdr.ex) == ZMAGIC) { fmt = 0; } else if (IS_ELF(hdr.eh)) { fmt = 1; } else { printf("Invalid %s\n", "format"); close(fd); return; } if (fmt == 0) { addr = hdr.ex.a_entry & 0xffffff; p = PTOV(addr); lseek(fd, PAGE_SIZE, SEEK_SET); size = hdr.ex.a_text; if (read(fd, p, hdr.ex.a_text) != size) { close(fd); return; } p += roundup2(hdr.ex.a_text, PAGE_SIZE); size = hdr.ex.a_data; if (read(fd, p, hdr.ex.a_data) != size) { close(fd); return; } p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE); bootinfo.bi_symtab = VTOP(p); memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms)); p += sizeof(hdr.ex.a_syms); if (hdr.ex.a_syms) { size = hdr.ex.a_syms; if (read(fd, p, hdr.ex.a_syms) != size) { close(fd); return; } p += hdr.ex.a_syms; size = sizeof (int); if (read(fd, p, sizeof (int)) != size) { close(fd); return; } x = *(uint32_t *)p; p += sizeof(int); x -= sizeof(int); size = x; if (read(fd, p, x) != size) { close(fd); return; } p += x; } } else { lseek(fd, hdr.eh.e_phoff, SEEK_SET); for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) { size = sizeof (ep[0]); if (read(fd, ep + j, sizeof (ep[0])) != size) { close(fd); return; } if (ep[j].p_type == PT_LOAD) j++; } for (i = 0; i < 2; i++) { p = PTOV(ep[i].p_paddr & 0xffffff); lseek(fd, ep[i].p_offset, SEEK_SET); size = ep[i].p_filesz; if (read(fd, p, ep[i].p_filesz) != size) { close(fd); return; } } p += roundup2(ep[1].p_memsz, PAGE_SIZE); bootinfo.bi_symtab = VTOP(p); if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) { lseek(fd, hdr.eh.e_shoff + sizeof (es[0]) * (hdr.eh.e_shstrndx + 1), SEEK_SET); size = sizeof(es); if (read(fd, &es, sizeof (es)) != size) { close(fd); return; } for (i = 0; i < 2; i++) { memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size)); p += sizeof(es[i].sh_size); lseek(fd, es[i].sh_offset, SEEK_SET); size = es[i].sh_size; if (read(fd, p, es[i].sh_size) != size) { close(fd); return; } p += es[i].sh_size; } } addr = hdr.eh.e_entry & 0xffffff; } close(fd); bootinfo.bi_esymtab = VTOP(p); bootinfo.bi_kernelname = VTOP(kname); #ifdef LOADER_GELI_SUPPORT explicit_bzero(gelipw, sizeof(gelipw)); #endif if (bdev->dd.d_dev->dv_type == DEVT_ZFS) { zfsargs.size = sizeof(zfsargs); zfsargs.pool = bdev->d_kind.zfs.pool_guid; zfsargs.root = bdev->d_kind.zfs.root_guid; #ifdef LOADER_GELI_SUPPORT export_geli_boot_data(&zfsargs.gelidata); #endif /* * Note that the zfsargs struct is passed by value, not by * pointer. Code in btxldr.S copies the values from the entry * stack to a fixed location within loader(8) at startup due * to the presence of KARGS_FLAGS_EXTARG. */ __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK), bootdev, KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG, (uint32_t)bdev->d_kind.zfs.pool_guid, (uint32_t)(bdev->d_kind.zfs.pool_guid >> 32), VTOP(&bootinfo), zfsargs); } else { #ifdef LOADER_GELI_SUPPORT geliargs.size = sizeof(geliargs); export_geli_boot_data(&geliargs.gelidata); #endif /* * Note that the geliargs struct is passed by value, not by * pointer. Code in btxldr.S copies the values from the entry * stack to a fixed location within loader(8) at startup due * to the presence of the KARGS_FLAGS_EXTARG flag. */ __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK), bootdev, #ifdef LOADER_GELI_SUPPORT KARGS_FLAGS_GELI | KARGS_FLAGS_EXTARG, 0, 0, VTOP(&bootinfo), geliargs #else 0, 0, 0, VTOP(&bootinfo) #endif ); } } static int mount_root(char *arg) { char *root; struct i386_devdesc *ddesc; uint8_t part; if (asprintf(&root, "%s:", arg) < 0) return (1); if (i386_getdev((void **)&ddesc, root, NULL)) { free(root); return (1); } /* we should have new device descriptor, free old and replace it. */ free(bdev); bdev = ddesc; if (bdev->dd.d_dev->dv_type == DEVT_DISK) { if (bdev->d_kind.biosdisk.partition == -1) part = 0xff; else part = bdev->d_kind.biosdisk.partition; bootdev = MAKEBOOTDEV(dev_maj[bdev->dd.d_dev->dv_type], bdev->d_kind.biosdisk.slice + 1, bdev->dd.d_unit, part); bootinfo.bi_bios_dev = bd_unit2bios(bdev); } strncpy(boot_devname, root, sizeof (boot_devname)); setenv("currdev", root, 1); free(root); return (0); } static void fs_list(char *arg) { int fd; struct dirent *d; char line[80]; fd = open(arg, O_RDONLY); if (fd < 0) return; pager_open(); while ((d = readdirfd(fd)) != NULL) { sprintf(line, "%s\n", d->d_name); if (pager_output(line)) break; } pager_close(); close(fd); } static int parse_cmd(void) { char *arg = cmd; char *ep, *p, *q; const char *cp; char line[80]; int c, i, j; while ((c = *arg++)) { if (c == ' ' || c == '\t' || c == '\n') continue; for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++) ; ep = p; if (*p) *p++ = 0; if (c == '-') { while ((c = *arg++)) { if (c == 'P') { if (*(uint8_t *)PTOV(0x496) & 0x10) { cp = "yes"; } else { opts |= OPT_SET(RBX_DUAL); opts |= OPT_SET(RBX_SERIAL); cp = "no"; } printf("Keyboard: %s\n", cp); continue; } else if (c == 'S') { j = 0; while ((unsigned int) (i = *arg++ - '0') <= 9) j = j * 10 + i; if (j > 0 && i == -'0') { comspeed = j; break; } /* * Fall through to error below * ('S' not in optstr[]). */ } for (i = 0; c != optstr[i]; i++) if (i == NOPT - 1) return (-1); opts ^= OPT_SET(flags[i]); } ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) : OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD; if (ioctrl & IO_SERIAL) { if (sio_init(115200 / comspeed) != 0) ioctrl &= ~IO_SERIAL; } } if (c == '?') { printf("\n"); if (*arg == '\0') arg = (char *)"/"; fs_list(arg); zfs_list(arg); return (-1); } else { char *ptr; printf("\n"); arg--; /* * Report pool status if the comment is 'status'. Lets * hope no-one wants to load /status as a kernel. */ if (strcmp(arg, "status") == 0) { pager_open(); for (i = 0; devsw[i] != NULL; i++) { if (devsw[i]->dv_print != NULL) { if (devsw[i]->dv_print(1)) break; } else { snprintf(line, sizeof(line), "%s: (unknown)\n", devsw[i]->dv_name); if (pager_output(line)) break; } } pager_close(); return (-1); } /* * If there is "zfs:" prefix simply ignore it. */ ptr = arg; if (strncmp(ptr, "zfs:", 4) == 0) ptr += 4; /* * If there is a colon, switch pools. */ q = strchr(ptr, ':'); if (q) { *q++ = '\0'; if (mount_root(arg) != 0) { return (-1); } arg = q; } if ((i = ep - arg)) { if ((size_t)i >= sizeof(kname)) return (-1); memcpy(kname, arg, i + 1); } } arg = p; } return (0); } /* * Probe all disks to discover ZFS pools. The idea is to walk all possible * disk devices, however, we also need to identify possible boot pool. * For boot pool detection we have boot disk passed us from BIOS, recorded * in bootinfo.bi_bios_dev. */ static void i386_zfs_probe(void) { char devname[32]; int boot_unit; struct i386_devdesc dev; uint64_t pool_guid = 0; dev.dd.d_dev = &bioshd; /* Translate bios dev to our unit number. */ boot_unit = bd_bios2unit(bootinfo.bi_bios_dev); /* * Open all the disks we can find and see if we can reconstruct * ZFS pools from them. */ for (dev.dd.d_unit = 0; bd_unit2bios(&dev) >= 0; dev.dd.d_unit++) { snprintf(devname, sizeof (devname), "%s%d:", bioshd.dv_name, dev.dd.d_unit); /* If this is not boot disk, use generic probe. */ if (dev.dd.d_unit != boot_unit) zfs_probe_dev(devname, NULL); else zfs_probe_dev(devname, &pool_guid); if (pool_guid != 0 && bdev == NULL) { bdev = malloc(sizeof (struct i386_devdesc)); bzero(bdev, sizeof (struct i386_devdesc)); bdev->dd.d_dev = &zfs_dev; bdev->d_kind.zfs.pool_guid = pool_guid; } } } diff --git a/stand/libofw/devicename.c b/stand/libofw/devicename.c index 786e5549c0fc..11c0a1719ff3 100644 --- a/stand/libofw/devicename.c +++ b/stand/libofw/devicename.c @@ -1,146 +1,146 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include "bootstrap.h" #include "libofw.h" #include "libzfs.h" static int ofw_parsedev(struct ofw_devdesc **, const char *, const char **); /* * Point (dev) at an allocated device specifier for the device matching the * path in (devspec). If it contains an explicit device specification, * use that. If not, use the default device. */ int ofw_getdev(void **vdev, const char *devspec, const char **path) { struct ofw_devdesc **dev = (struct ofw_devdesc **)vdev; int rv; /* * If it looks like this is just a path and no * device, go with the current device. */ if ((devspec == NULL) || ((strchr(devspec, '@') == NULL) && (strchr(devspec, ':') == NULL))) { if (((rv = ofw_parsedev(dev, getenv("currdev"), NULL)) == 0) && (path != NULL)) *path = devspec; return(rv); } /* * Try to parse the device name off the beginning of the devspec */ return(ofw_parsedev(dev, devspec, path)); } /* * Point (dev) at an allocated device specifier matching the string version * at the beginning of (devspec). Return a pointer to the remaining * text in (path). */ static int ofw_parsedev(struct ofw_devdesc **dev, const char *devspec, const char **path) { struct ofw_devdesc *idev; struct devsw *dv; phandle_t handle; const char *p; const char *s; char *ep; char name[256]; char type[64]; int err; int len; int i; for (p = s = devspec; *s != '\0'; p = s) { if ((s = strchr(p + 1, '/')) == NULL) s = strchr(p, '\0'); len = s - devspec; bcopy(devspec, name, len); name[len] = '\0'; if ((handle = OF_finddevice(name)) == -1) { bcopy(name, type, len); type[len] = '\0'; } else if (OF_getprop(handle, "device_type", type, sizeof(type)) == -1) continue; for (i = 0; (dv = devsw[i]) != NULL; i++) { if (strncmp(dv->dv_name, type, strlen(dv->dv_name)) == 0) goto found; } } return(ENOENT); found: if (path != NULL) *path = s; idev = malloc(sizeof(struct ofw_devdesc)); if (idev == NULL) { printf("ofw_parsedev: malloc failed\n"); return ENOMEM; } strcpy(idev->d_path, name); idev->dd.d_dev = dv; if (dv->dv_type == DEVT_ZFS) { p = devspec + strlen(dv->dv_name); err = zfs_parsedev((struct zfs_devdesc *)idev, p, path); if (err != 0) { free(idev); return (err); } } if (dev == NULL) { free(idev); } else { *dev = idev; } return(0); } int ofw_setcurrdev(struct env_var *ev, int flags, const void *value) { - struct ofw_devdesc *ncurr; - int rv; + struct ofw_devdesc *ncurr; + int rv; - if ((rv = ofw_parsedev(&ncurr, value, NULL)) != 0) - return rv; + if ((rv = ofw_parsedev(&ncurr, value, NULL)) != 0) + return (rv); - free(ncurr); - env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL); - return 0; + free(ncurr); + + return (mount_currdev(ev, flags, value)); } diff --git a/stand/libsa/Makefile b/stand/libsa/Makefile index 815f479b5a0a..6fe145d681db 100644 --- a/stand/libsa/Makefile +++ b/stand/libsa/Makefile @@ -1,191 +1,191 @@ # $FreeBSD$ # Originally from $NetBSD: Makefile,v 1.21 1997/10/26 22:08:38 lukem Exp $ # # Notes: # - We don't use the libc strerror/sys_errlist because the string table is # quite large. # .include LIBSA_CPUARCH?=${MACHINE_CPUARCH} LIB?= sa # standalone components and stuff we have modified locally SRCS+= gzguts.h zutil.h __main.c abort.c assert.c bcd.c environment.c \ getopt.c gets.c globals.c \ hexdump.c pager.c panic.c printf.c strdup.c strerror.c \ random.c sbrk.c tslog.c twiddle.c zalloc.c zalloc_malloc.c # private (pruned) versions of libc string functions SRCS+= strcasecmp.c .PATH: ${LIBCSRC}/net SRCS+= ntoh.c # string functions from libc .PATH: ${LIBCSRC}/string SRCS+= bcmp.c bcopy.c bzero.c ffs.c fls.c \ memccpy.c memchr.c memcmp.c memcpy.c memmove.c memset.c \ strcat.c strchr.c strchrnul.c strcmp.c strcpy.c stpcpy.c stpncpy.c \ strcspn.c strlcat.c strlcpy.c strlen.c strncat.c strncmp.c strncpy.c \ strnlen.c strpbrk.c strrchr.c strsep.c strspn.c strstr.c strtok.c swab.c # stdlib functions from libc .PATH: ${LIBCSRC}/stdlib SRCS+= abs.c strtol.c strtoll.c strtoul.c strtoull.c # common boot code .PATH: ${SYSDIR}/kern SRCS+= subr_boot.c .if ${MACHINE_CPUARCH} == "arm" .PATH: ${LIBCSRC}/arm/gen # Do not generate movt/movw, because the relocation fixup for them does not # translate to the -Bsymbolic -pie format required by self_reloc() in loader(8). # Also, the fpu is not available in a standalone environment. CFLAGS.clang+= -mno-movt CFLAGS.clang+= -mfpu=none .PATH: ${SRCTOP}/contrib/llvm-project/compiler-rt/lib/builtins/arm/ SRCS+= aeabi_idivmod.S aeabi_ldivmod.S aeabi_uidivmod.S aeabi_uldivmod.S SRCS+= aeabi_memcmp.S aeabi_memcpy.S aeabi_memmove.S aeabi_memset.S .endif .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "riscv" .PATH: ${LIBCSRC}/${MACHINE_CPUARCH}/gen .endif # Compiler support functions .PATH: ${SRCTOP}/contrib/llvm-project/compiler-rt/lib/builtins/ # __clzsi2 and ctzsi2 for various builtin functions SRCS+= clzsi2.c ctzsi2.c # Divide and modulus functions called by the compiler SRCS+= divmoddi4.c divmodsi4.c divdi3.c divsi3.c moddi3.c modsi3.c SRCS+= udivmoddi4.c udivmodsi4.c udivdi3.c udivsi3.c umoddi3.c umodsi3.c SRCS+= ashldi3.c ashrdi3.c lshrdi3.c .if ${MACHINE_CPUARCH:Namd64:Ni386} == "" .PATH: ${SASRC}/x86 SRCS+= hypervisor.c .endif .if ${MACHINE_CPUARCH} == "powerpc" SRCS+= syncicache.c .endif # uuid functions from libc .PATH: ${LIBCSRC}/uuid SRCS+= uuid_create_nil.c uuid_equal.c uuid_from_string.c uuid_is_nil.c uuid_to_string.c # _setjmp/_longjmp .PATH: ${SASRC}/${LIBSA_CPUARCH} SRCS+= _setjmp.S # decompression functionality from libbz2 # NOTE: to actually test this functionality after libbz2 upgrade compile # loader(8) with LOADER_BZIP2_SUPPORT defined .PATH: ${SRCTOP}/contrib/bzip2 CFLAGS+= -DBZ_NO_STDIO -DBZ_NO_COMPRESS SRCS+=bzlib.c crctable.c decompress.c huffman.c randtable.c # decompression functionality from zlib .PATH: ${SRCTOP}/sys/contrib/zlib CFLAGS+=-DHAVE_MEMCPY -I${SRCTOP}/sys/contrib/zlib SRCS+= adler32.c crc32.c SRCS+= infback.c inffast.c inflate.c inftrees.c zutil.c # lz4 decompression functionality .PATH: ${SRCTOP}/sys/cddl/contrib/opensolaris/common/lz4 SRCS+= lz4.c CFLAGS.lz4.c+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/common/lz4 # Create a subset of includes that are safe, as well as adjusting those that aren't # The lists may drive people nuts, but they are explicitly opt-in FAKE_DIRS=xlocale arpa SAFE_INCS=a.out.h assert.h elf.h limits.h nlist.h setjmp.h stddef.h stdbool.h string.h strings.h time.h unistd.h uuid.h STAND_H_INC=ctype.h fcntl.h signal.h stdio.h stdlib.h OTHER_INC=stdarg.h errno.h stdint.h beforedepend: mkdir -p ${FAKE_DIRS}; \ for i in ${SAFE_INCS}; do \ ln -sf ${SRCTOP}/include/$$i $$i; \ done; \ ln -sf ${SYSDIR}/${MACHINE}/include/stdarg.h stdarg.h; \ ln -sf ${SYSDIR}/sys/errno.h errno.h; \ ln -sf ${SYSDIR}/sys/stdint.h stdint.h; \ ln -sf ${SRCTOP}/include/arpa/inet.h arpa/inet.h; \ ln -sf ${SRCTOP}/include/arpa/tftp.h arpa/tftp.h; \ for i in _time.h _strings.h _string.h; do \ [ -f xlocale/$$i ] || :> xlocale/$$i; \ done; \ for i in ${STAND_H_INC}; do \ ln -sf ${SASRC}/stand.h $$i; \ done CLEANDIRS+=${FAKE_DIRS} CLEANFILES+= ${SAFE_INCS} ${STAND_H_INC} ${OTHER_INC} # io routines -SRCS+= closeall.c dev.c ioctl.c nullfs.c stat.c \ +SRCS+= closeall.c dev.c ioctl.c nullfs.c stat.c mount.c \ fstat.c close.c lseek.c open.c read.c write.c readdir.c # SMBios routines SRCS+= smbios.c .if !defined(BOOT_HIDE_SERIAL_NUMBERS) # Export serial numbers, UUID, and asset tag from loader. CFLAGS.smbios.c+= -DSMBIOS_SERIAL_NUMBERS .if defined(BOOT_LITTLE_ENDIAN_UUID) # Use little-endian UUID format as defined in SMBIOS 2.6. CFLAGS.smbios.c+= -DSMBIOS_LITTLE_ENDIAN_UUID .elif defined(BOOT_NETWORK_ENDIAN_UUID) # Use network-endian UUID format for backward compatibility. CFLAGS.smbios.c+= -DSMBIOS_NETWORK_ENDIAN_UUID .endif .endif # network routines SRCS+= arp.c ether.c ip.c inet_ntoa.c in_cksum.c net.c udp.c netif.c rpc.c # network info services: SRCS+= bootp.c rarp.c bootparam.c # boot filesystems SRCS+= ufs.c nfs.c cd9660.c tftp.c gzipfs.c bzipfs.c SRCS+= dosfs.c ext2fs.c SRCS+= splitfs.c SRCS+= pkgfs.c # Time support SRCS+= time.c # kernel ufs support .PATH: ${SRCTOP}/sys/ufs/ffs SRCS+=ffs_subr.c ffs_tables.c CFLAGS.bzipfs.c+= -I${SRCTOP}/contrib/bzip2 # explicit_bzero and calculate_crc32c .PATH: ${SYSDIR}/libkern SRCS+= explicit_bzero.c crc32_libkern.c # Maybe GELI .if ${MK_LOADER_GELI} == "yes" .include "${SASRC}/geli/Makefile.inc" .endif .if ${MK_LOADER_VERIEXEC} == "yes" && ${MK_BEARSSL} == "yes" .include "${SRCTOP}/lib/libbearssl/Makefile.libsa.inc" .include "${SRCTOP}/lib/libsecureboot/Makefile.libsa.inc" .endif # Maybe ZFS .if ${MK_LOADER_ZFS} == "yes" .include "${SASRC}/zfs/Makefile.inc" .endif .include diff --git a/stand/libsa/cd9660.c b/stand/libsa/cd9660.c index c339678d96fe..57e4916c0fb5 100644 --- a/stand/libsa/cd9660.c +++ b/stand/libsa/cd9660.c @@ -1,623 +1,731 @@ /* $NetBSD: cd9660.c,v 1.5 1997/06/26 19:11:33 drochner Exp $ */ /* * Copyright (C) 1996 Wolfgang Solfrank. * Copyright (C) 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Stand-alone ISO9660 file reading package. * * Note: This doesn't support Rock Ridge extensions, extended attributes, * blocksizes other than 2048 bytes, multi-extent files, etc. */ #include #include #include #include #include #include #include "stand.h" #define SUSP_CONTINUATION "CE" #define SUSP_PRESENT "SP" #define SUSP_STOP "ST" #define SUSP_EXTREF "ER" #define RRIP_NAME "NM" typedef struct { ISO_SUSP_HEADER h; u_char signature [ISODCL ( 5, 6)]; u_char len_skp [ISODCL ( 7, 7)]; /* 711 */ } ISO_SUSP_PRESENT; static int buf_read_file(struct open_file *f, char **buf_p, size_t *size_p); static int cd9660_open(const char *path, struct open_file *f); static int cd9660_close(struct open_file *f); static int cd9660_read(struct open_file *f, void *buf, size_t size, size_t *resid); static off_t cd9660_seek(struct open_file *f, off_t offset, int where); static int cd9660_stat(struct open_file *f, struct stat *sb); static int cd9660_readdir(struct open_file *f, struct dirent *d); +static int cd9660_mount(const char *, const char *, void **); +static int cd9660_unmount(const char *, void *); static int dirmatch(struct open_file *f, const char *path, struct iso_directory_record *dp, int use_rrip, int lenskip); static int rrip_check(struct open_file *f, struct iso_directory_record *dp, int *lenskip); static char *rrip_lookup_name(struct open_file *f, struct iso_directory_record *dp, int lenskip, size_t *len); static ISO_SUSP_HEADER *susp_lookup_record(struct open_file *f, const char *identifier, struct iso_directory_record *dp, int lenskip); struct fs_ops cd9660_fsops = { - "cd9660", - cd9660_open, - cd9660_close, - cd9660_read, - null_write, - cd9660_seek, - cd9660_stat, - cd9660_readdir + .fs_name = "cd9660", + .fo_open = cd9660_open, + .fo_close = cd9660_close, + .fo_read = cd9660_read, + .fo_write = null_write, + .fo_seek = cd9660_seek, + .fo_stat = cd9660_stat, + .fo_readdir = cd9660_readdir, + .fo_mount = cd9660_mount, + .fo_unmount = cd9660_unmount }; +typedef struct cd9660_mnt { + struct devdesc *cd_dev; + int cd_fd; + struct iso_directory_record cd_rec; + STAILQ_ENTRY(cd9660_mnt) cd_link; +} cd9660_mnt_t; + +typedef STAILQ_HEAD(cd9660_mnt_list, cd9660_mnt) cd9660_mnt_list_t; +static cd9660_mnt_list_t mnt_list = STAILQ_HEAD_INITIALIZER(mnt_list); + #define F_ISDIR 0x0001 /* Directory */ #define F_ROOTDIR 0x0002 /* Root directory */ #define F_RR 0x0004 /* Rock Ridge on this volume */ struct file { int f_flags; /* file flags */ off_t f_off; /* Current offset within file */ daddr_t f_bno; /* Starting block number */ off_t f_size; /* Size of file */ daddr_t f_buf_blkno; /* block number of data block */ char *f_buf; /* buffer for data block */ int f_susp_skip; /* len_skip for SUSP records */ }; struct ptable_ent { char namlen [ISODCL( 1, 1)]; /* 711 */ char extlen [ISODCL( 2, 2)]; /* 711 */ char block [ISODCL( 3, 6)]; /* 732 */ char parent [ISODCL( 7, 8)]; /* 722 */ char name [1]; }; #define PTFIXSZ 8 #define PTSIZE(pp) roundup(PTFIXSZ + isonum_711((pp)->namlen), 2) #define cdb2devb(bno) ((bno) * ISO_DEFAULT_BLOCK_SIZE / DEV_BSIZE) static ISO_SUSP_HEADER * susp_lookup_record(struct open_file *f, const char *identifier, struct iso_directory_record *dp, int lenskip) { static char susp_buffer[ISO_DEFAULT_BLOCK_SIZE]; ISO_SUSP_HEADER *sh; ISO_RRIP_CONT *shc; char *p, *end; int error; size_t read; p = dp->name + isonum_711(dp->name_len) + lenskip; /* Names of even length have a padding byte after the name. */ if ((isonum_711(dp->name_len) & 1) == 0) p++; end = (char *)dp + isonum_711(dp->length); while (p + 3 < end) { sh = (ISO_SUSP_HEADER *)p; if (bcmp(sh->type, identifier, 2) == 0) return (sh); if (bcmp(sh->type, SUSP_STOP, 2) == 0) return (NULL); if (bcmp(sh->type, SUSP_CONTINUATION, 2) == 0) { shc = (ISO_RRIP_CONT *)sh; error = f->f_dev->dv_strategy(f->f_devdata, F_READ, cdb2devb(isonum_733(shc->location)), ISO_DEFAULT_BLOCK_SIZE, susp_buffer, &read); /* Bail if it fails. */ if (error != 0 || read != ISO_DEFAULT_BLOCK_SIZE) return (NULL); p = susp_buffer + isonum_733(shc->offset); end = p + isonum_733(shc->length); } else { /* Ignore this record and skip to the next. */ p += isonum_711(sh->length); /* Avoid infinite loops with corrupted file systems */ if (isonum_711(sh->length) == 0) return (NULL); } } return (NULL); } static char * rrip_lookup_name(struct open_file *f, struct iso_directory_record *dp, int lenskip, size_t *len) { ISO_RRIP_ALTNAME *p; if (len == NULL) return (NULL); p = (ISO_RRIP_ALTNAME *)susp_lookup_record(f, RRIP_NAME, dp, lenskip); if (p == NULL) return (NULL); switch (*p->flags) { case ISO_SUSP_CFLAG_CURRENT: *len = 1; return ("."); case ISO_SUSP_CFLAG_PARENT: *len = 2; return (".."); case 0: *len = isonum_711(p->h.length) - 5; return ((char *)p + 5); default: /* * We don't handle hostnames or continued names as they are * too hard, so just bail and use the default name. */ return (NULL); } } static int rrip_check(struct open_file *f, struct iso_directory_record *dp, int *lenskip) { ISO_SUSP_PRESENT *sp; ISO_RRIP_EXTREF *er; char *p; /* First, see if we can find a SP field. */ p = dp->name + isonum_711(dp->name_len); if (p > (char *)dp + isonum_711(dp->length)) return (0); sp = (ISO_SUSP_PRESENT *)p; if (bcmp(sp->h.type, SUSP_PRESENT, 2) != 0) return (0); if (isonum_711(sp->h.length) != sizeof(ISO_SUSP_PRESENT)) return (0); if (sp->signature[0] != 0xbe || sp->signature[1] != 0xef) return (0); *lenskip = isonum_711(sp->len_skp); /* * Now look for an ER field. If RRIP is present, then there must * be at least one of these. It would be more pedantic to walk * through the list of fields looking for a Rock Ridge ER field. */ er = (ISO_RRIP_EXTREF *)susp_lookup_record(f, SUSP_EXTREF, dp, 0); if (er == NULL) return (0); return (1); } static int dirmatch(struct open_file *f, const char *path, struct iso_directory_record *dp, int use_rrip, int lenskip) { size_t len, plen; char *cp, *sep; int i, icase; if (use_rrip) cp = rrip_lookup_name(f, dp, lenskip, &len); else cp = NULL; if (cp == NULL) { len = isonum_711(dp->name_len); cp = dp->name; icase = 1; } else icase = 0; sep = strchr(path, '/'); if (sep != NULL) { plen = sep - path; } else { plen = strlen(path); } if (plen != len) return (0); for (i = len; --i >= 0; path++, cp++) { if (!*path || *path == '/') break; if (*path == *cp) continue; if (!icase && toupper(*path) == *cp) continue; return 0; } if (*path && *path != '/') return 0; /* * Allow stripping of trailing dots and the version number. * Note that this will find the first instead of the last version * of a file. */ if (i >= 0 && (*cp == ';' || *cp == '.')) { /* This is to prevent matching of numeric extensions */ if (*cp == '.' && cp[1] != ';') return 0; while (--i >= 0) if (*++cp != ';' && (*cp < '0' || *cp > '9')) return 0; } return 1; } static int -cd9660_open(const char *path, struct open_file *f) +cd9660_read_dr(struct open_file *f, struct iso_directory_record *rec) { - struct file *fp = NULL; - void *buf; struct iso_primary_descriptor *vd; - size_t read, dsize, off; - daddr_t bno, boff; - struct iso_directory_record rec; - struct iso_directory_record *dp = NULL; - int rc, first, use_rrip, lenskip; - bool isdir = false; + size_t read; + daddr_t bno; + int rc; - /* First find the volume descriptor */ - buf = malloc(MAX(ISO_DEFAULT_BLOCK_SIZE, + errno = 0; + vd = malloc(MAX(ISO_DEFAULT_BLOCK_SIZE, sizeof(struct iso_primary_descriptor))); - vd = buf; + if (vd == NULL) + return (errno); + for (bno = 16;; bno++) { twiddle(1); rc = f->f_dev->dv_strategy(f->f_devdata, F_READ, cdb2devb(bno), - ISO_DEFAULT_BLOCK_SIZE, buf, &read); + ISO_DEFAULT_BLOCK_SIZE, (char *)vd, &read); if (rc) goto out; if (read != ISO_DEFAULT_BLOCK_SIZE) { rc = EIO; goto out; } rc = EINVAL; - if (bcmp(vd->id, ISO_STANDARD_ID, sizeof vd->id) != 0) + if (bcmp(vd->id, ISO_STANDARD_ID, sizeof(vd->id)) != 0) goto out; if (isonum_711(vd->type) == ISO_VD_END) goto out; if (isonum_711(vd->type) == ISO_VD_PRIMARY) break; } - if (isonum_723(vd->logical_block_size) != ISO_DEFAULT_BLOCK_SIZE) + if (isonum_723(vd->logical_block_size) == ISO_DEFAULT_BLOCK_SIZE) { + bcopy(vd->root_directory_record, rec, sizeof(*rec)); + rc = 0; + } +out: + free(vd); + return (rc); +} + +static int +cd9660_open(const char *path, struct open_file *f) +{ + struct file *fp = NULL; + void *buf; + size_t read, dsize, off; + daddr_t bno, boff; + struct iso_directory_record rec; + struct iso_directory_record *dp = NULL; + int rc, first, use_rrip, lenskip; + bool isdir = false; + struct devdesc *dev; + cd9660_mnt_t *mnt; + + /* First find the volume descriptor */ + errno = 0; + buf = malloc(MAX(ISO_DEFAULT_BLOCK_SIZE, + sizeof(struct iso_primary_descriptor))); + if (buf == NULL) + return (errno); + + dev = f->f_devdata; + STAILQ_FOREACH(mnt, &mnt_list, cd_link) { + if (dev->d_dev->dv_type == mnt->cd_dev->d_dev->dv_type && + dev->d_unit == mnt->cd_dev->d_unit) + break; + } + + rc = 0; + if (mnt == NULL) + rc = cd9660_read_dr(f, &rec); + else + rec = mnt->cd_rec; + + if (rc != 0) goto out; - bcopy(vd->root_directory_record, &rec, sizeof(rec)); - if (*path == '/') path++; /* eat leading '/' */ + if (*path == '/') + path++; /* eat leading '/' */ first = 1; use_rrip = 0; lenskip = 0; while (*path) { bno = isonum_733(rec.extent) + isonum_711(rec.ext_attr_length); dsize = isonum_733(rec.size); off = 0; boff = 0; while (off < dsize) { if ((off % ISO_DEFAULT_BLOCK_SIZE) == 0) { twiddle(1); rc = f->f_dev->dv_strategy (f->f_devdata, F_READ, cdb2devb(bno + boff), ISO_DEFAULT_BLOCK_SIZE, buf, &read); if (rc) goto out; if (read != ISO_DEFAULT_BLOCK_SIZE) { rc = EIO; goto out; } boff++; dp = (struct iso_directory_record *) buf; } if (isonum_711(dp->length) == 0) { /* skip to next block, if any */ off = boff * ISO_DEFAULT_BLOCK_SIZE; continue; } /* See if RRIP is in use. */ if (first) use_rrip = rrip_check(f, dp, &lenskip); if (dirmatch(f, path, dp, use_rrip, first ? 0 : lenskip)) { first = 0; break; } else first = 0; dp = (struct iso_directory_record *) ((char *) dp + isonum_711(dp->length)); /* If the new block has zero length, it is padding. */ if (isonum_711(dp->length) == 0) { /* Skip to next block, if any. */ off = boff * ISO_DEFAULT_BLOCK_SIZE; continue; } off += isonum_711(dp->length); } if (off >= dsize) { rc = ENOENT; goto out; } rec = *dp; while (*path && *path != '/') /* look for next component */ path++; if (*path) /* this component was directory */ isdir = true; while (*path == '/') path++; /* skip '/' */ if (*path) /* We do have next component. */ isdir = false; } /* * if the path had trailing / but the path does point to file, * report the error ENOTDIR. */ if (isdir == true && (isonum_711(rec.flags) & 2) == 0) { rc = ENOTDIR; goto out; } /* allocate file system specific data structure */ fp = malloc(sizeof(struct file)); bzero(fp, sizeof(struct file)); f->f_fsdata = (void *)fp; if ((isonum_711(rec.flags) & 2) != 0) { fp->f_flags = F_ISDIR; } if (first) { fp->f_flags |= F_ROOTDIR; /* Check for Rock Ridge since we didn't in the loop above. */ bno = isonum_733(rec.extent) + isonum_711(rec.ext_attr_length); twiddle(1); rc = f->f_dev->dv_strategy(f->f_devdata, F_READ, cdb2devb(bno), ISO_DEFAULT_BLOCK_SIZE, buf, &read); if (rc) goto out; if (read != ISO_DEFAULT_BLOCK_SIZE) { rc = EIO; goto out; } dp = (struct iso_directory_record *)buf; use_rrip = rrip_check(f, dp, &lenskip); } if (use_rrip) { fp->f_flags |= F_RR; fp->f_susp_skip = lenskip; } fp->f_off = 0; fp->f_bno = isonum_733(rec.extent) + isonum_711(rec.ext_attr_length); fp->f_size = isonum_733(rec.size); free(buf); return 0; out: free(fp); free(buf); return rc; } static int cd9660_close(struct open_file *f) { struct file *fp = (struct file *)f->f_fsdata; f->f_fsdata = NULL; free(fp); return 0; } static int buf_read_file(struct open_file *f, char **buf_p, size_t *size_p) { struct file *fp = (struct file *)f->f_fsdata; daddr_t blkno, blkoff; int rc = 0; size_t read; blkno = fp->f_off / ISO_DEFAULT_BLOCK_SIZE + fp->f_bno; blkoff = fp->f_off % ISO_DEFAULT_BLOCK_SIZE; if (blkno != fp->f_buf_blkno) { if (fp->f_buf == (char *)0) fp->f_buf = malloc(ISO_DEFAULT_BLOCK_SIZE); twiddle(16); rc = f->f_dev->dv_strategy(f->f_devdata, F_READ, cdb2devb(blkno), ISO_DEFAULT_BLOCK_SIZE, fp->f_buf, &read); if (rc) return (rc); if (read != ISO_DEFAULT_BLOCK_SIZE) return (EIO); fp->f_buf_blkno = blkno; } *buf_p = fp->f_buf + blkoff; *size_p = ISO_DEFAULT_BLOCK_SIZE - blkoff; if (*size_p > fp->f_size - fp->f_off) *size_p = fp->f_size - fp->f_off; return (rc); } static int cd9660_read(struct open_file *f, void *start, size_t size, size_t *resid) { struct file *fp = (struct file *)f->f_fsdata; char *buf, *addr; size_t buf_size, csize; int rc = 0; addr = start; while (size) { if (fp->f_off < 0 || fp->f_off >= fp->f_size) break; rc = buf_read_file(f, &buf, &buf_size); if (rc) break; csize = size > buf_size ? buf_size : size; bcopy(buf, addr, csize); fp->f_off += csize; addr += csize; size -= csize; } if (resid) *resid = size; return (rc); } static int cd9660_readdir(struct open_file *f, struct dirent *d) { struct file *fp = (struct file *)f->f_fsdata; struct iso_directory_record *ep; size_t buf_size, reclen, namelen; int error = 0; int lenskip; char *buf, *name; again: if (fp->f_off >= fp->f_size) return (ENOENT); error = buf_read_file(f, &buf, &buf_size); if (error) return (error); ep = (struct iso_directory_record *)buf; if (isonum_711(ep->length) == 0) { daddr_t blkno; /* skip to next block, if any */ blkno = fp->f_off / ISO_DEFAULT_BLOCK_SIZE; fp->f_off = (blkno + 1) * ISO_DEFAULT_BLOCK_SIZE; goto again; } if (fp->f_flags & F_RR) { if (fp->f_flags & F_ROOTDIR && fp->f_off == 0) lenskip = 0; else lenskip = fp->f_susp_skip; name = rrip_lookup_name(f, ep, lenskip, &namelen); } else name = NULL; if (name == NULL) { namelen = isonum_711(ep->name_len); name = ep->name; if (namelen == 1) { if (ep->name[0] == 0) name = "."; else if (ep->name[0] == 1) { namelen = 2; name = ".."; } } } reclen = sizeof(struct dirent) - (MAXNAMLEN+1) + namelen + 1; reclen = (reclen + 3) & ~3; d->d_fileno = isonum_733(ep->extent); d->d_reclen = reclen; if (isonum_711(ep->flags) & 2) d->d_type = DT_DIR; else d->d_type = DT_REG; d->d_namlen = namelen; bcopy(name, d->d_name, d->d_namlen); d->d_name[d->d_namlen] = 0; fp->f_off += isonum_711(ep->length); return (0); } static off_t cd9660_seek(struct open_file *f, off_t offset, int where) { struct file *fp = (struct file *)f->f_fsdata; switch (where) { case SEEK_SET: fp->f_off = offset; break; case SEEK_CUR: fp->f_off += offset; break; case SEEK_END: fp->f_off = fp->f_size - offset; break; default: return -1; } return fp->f_off; } static int cd9660_stat(struct open_file *f, struct stat *sb) { struct file *fp = (struct file *)f->f_fsdata; /* only important stuff */ sb->st_mode = S_IRUSR | S_IRGRP | S_IROTH; if (fp->f_flags & F_ISDIR) sb->st_mode |= S_IFDIR; else sb->st_mode |= S_IFREG; sb->st_uid = sb->st_gid = 0; sb->st_size = fp->f_size; return 0; } + +static int +cd9660_mount(const char *dev, const char *path, void **data) +{ + cd9660_mnt_t *mnt; + struct open_file *f; + char *fs; + + errno = 0; + mnt = calloc(1, sizeof(*mnt)); + if (mnt == NULL) + return (errno); + mnt->cd_fd = -1; + + if (asprintf(&fs, "%s%s", dev, path) < 0) + goto done; + + mnt->cd_fd = open(fs, O_RDONLY); + free(fs); + if (mnt->cd_fd == -1) + goto done; + + f = fd2open_file(mnt->cd_fd); + /* Is it cd9660 file system? */ + if (strcmp(f->f_ops->fs_name, "cd9660") == 0) { + mnt->cd_dev = f->f_devdata; + errno = cd9660_read_dr(f, &mnt->cd_rec); + STAILQ_INSERT_TAIL(&mnt_list, mnt, cd_link); + } else { + errno = ENXIO; + } + +done: + if (errno != 0) { + free(mnt->cd_dev); + if (mnt->cd_fd >= 0) + close(mnt->cd_fd); + free(mnt); + } else { + *data = mnt; + } + return (errno); +} + +static int +cd9660_unmount(const char *dev __unused, void *data) +{ + cd9660_mnt_t *mnt = data; + + STAILQ_REMOVE(&mnt_list, mnt, cd9660_mnt, cd_link); + close(mnt->cd_fd); + free(mnt); + return (0); +} diff --git a/stand/libsa/dosfs.c b/stand/libsa/dosfs.c index 656af3642c88..452a79ae12dc 100644 --- a/stand/libsa/dosfs.c +++ b/stand/libsa/dosfs.c @@ -1,883 +1,969 @@ /* * Copyright (c) 1996, 1998 Robert Nordier * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Readonly filesystem for Microsoft FAT12/FAT16/FAT32 filesystems, * also supports VFAT. */ #include #include #include #include "stand.h" +#include "disk.h" #include "dosfs.h" +typedef struct dos_mnt { + char *dos_dev; + DOS_FS *dos_fs; + int dos_fd; + STAILQ_ENTRY(dos_mnt) dos_link; +} dos_mnt_t; + +typedef STAILQ_HEAD(dos_mnt_list, dos_mnt) dos_mnt_list_t; +static dos_mnt_list_t mnt_list = STAILQ_HEAD_INITIALIZER(mnt_list); static int dos_open(const char *path, struct open_file *fd); static int dos_close(struct open_file *fd); static int dos_read(struct open_file *fd, void *buf, size_t size, size_t *resid); static off_t dos_seek(struct open_file *fd, off_t offset, int whence); static int dos_stat(struct open_file *fd, struct stat *sb); static int dos_readdir(struct open_file *fd, struct dirent *d); +static int dos_mount(const char *dev, const char *path, void **data); +static int dos_unmount(const char *dev, void *data); struct fs_ops dosfs_fsops = { - "dosfs", - dos_open, - dos_close, - dos_read, - null_write, - dos_seek, - dos_stat, - dos_readdir + .fs_name = "dosfs", + .fo_open = dos_open, + .fo_close = dos_close, + .fo_read = dos_read, + .fo_write = null_write, + .fo_seek = dos_seek, + .fo_stat = dos_stat, + .fo_readdir = dos_readdir, + .fo_mount = dos_mount, + .fo_unmount = dos_unmount }; #define SECSIZ 512 /* sector size */ #define SSHIFT 9 /* SECSIZ shift */ #define DEPSEC 16 /* directory entries per sector */ #define DSHIFT 4 /* DEPSEC shift */ #define LOCLUS 2 /* lowest cluster number */ #define FATBLKSZ 0x20000 /* size of block in the FAT cache buffer */ /* DOS "BIOS Parameter Block" */ typedef struct { u_char secsiz[2]; /* sector size */ u_char spc; /* sectors per cluster */ u_char ressec[2]; /* reserved sectors */ u_char fats; /* FATs */ u_char dirents[2]; /* root directory entries */ u_char secs[2]; /* total sectors */ u_char media; /* media descriptor */ u_char spf[2]; /* sectors per FAT */ u_char spt[2]; /* sectors per track */ u_char heads[2]; /* drive heads */ u_char hidsec[4]; /* hidden sectors */ u_char lsecs[4]; /* huge sectors */ u_char lspf[4]; /* huge sectors per FAT */ u_char xflg[2]; /* flags */ u_char vers[2]; /* filesystem version */ u_char rdcl[4]; /* root directory start cluster */ u_char infs[2]; /* filesystem info sector */ u_char bkbs[2]; /* backup boot sector */ } DOS_BPB; /* Initial portion of DOS boot sector */ typedef struct { u_char jmp[3]; /* usually 80x86 'jmp' opcode */ u_char oem[8]; /* OEM name and version */ DOS_BPB bpb; /* BPB */ } DOS_BS; /* Supply missing "." and ".." root directory entries */ static const char *const dotstr[2] = {".", ".."}; static DOS_DE dot[2] = { {". ", " ", FA_DIR, {0, 0, {0, 0}, {0, 0}, {0, 0}, {0, 0}}, {0, 0}, {0x21, 0}, {0, 0}, {0, 0, 0, 0}}, {".. ", " ", FA_DIR, {0, 0, {0, 0}, {0, 0}, {0, 0}, {0, 0}}, {0, 0}, {0x21, 0}, {0, 0}, {0, 0, 0, 0}} }; /* The usual conversion macros to avoid multiplication and division */ #define bytsec(n) ((n) >> SSHIFT) #define secbyt(s) ((s) << SSHIFT) #define entsec(e) ((e) >> DSHIFT) #define bytblk(fs, n) ((n) >> (fs)->bshift) #define blkbyt(fs, b) ((b) << (fs)->bshift) #define secblk(fs, s) ((s) >> ((fs)->bshift - SSHIFT)) #define blksec(fs, b) ((b) << ((fs)->bshift - SSHIFT)) /* Convert cluster number to offset within filesystem */ #define blkoff(fs, b) (secbyt((fs)->lsndta) + blkbyt(fs, (b) - LOCLUS)) /* Convert cluster number to logical sector number */ #define blklsn(fs, b) ((fs)->lsndta + blksec(fs, (b) - LOCLUS)) /* Convert cluster number to offset within FAT */ #define fatoff(sz, c) ((sz) == 12 ? (c) + ((c) >> 1) : \ (sz) == 16 ? (c) << 1 : \ (c) << 2) /* Does cluster number reference a valid data cluster? */ #define okclus(fs, c) ((c) >= LOCLUS && (c) <= (fs)->xclus) /* Get start cluster from directory entry */ #define stclus(sz, de) ((sz) != 32 ? cv2((de)->clus) : \ ((u_int)cv2((de)->dex.h_clus) << 16) | \ cv2((de)->clus)) static int parsebs(DOS_FS *, DOS_BS *); static int namede(DOS_FS *, const char *, DOS_DE **); static int lookup(DOS_FS *, u_int, const char *, DOS_DE **); static void cp_xdnm(u_char *, DOS_XDE *); static void cp_sfn(u_char *, DOS_DE *); static off_t fsize(DOS_FS *, DOS_DE *); static int fatcnt(DOS_FS *, u_int); static int fatget(DOS_FS *, u_int *); static int fatend(u_int, u_int); static int ioread(DOS_FS *, u_int, void *, size_t); static int ioget(struct open_file *, daddr_t, void *, size_t); static int dos_read_fatblk(DOS_FS *fs, struct open_file *fd, u_int blknum) { int err; size_t io_size; daddr_t offset_in_fat, max_offset_in_fat; offset_in_fat = ((daddr_t)blknum) * FATBLKSZ; max_offset_in_fat = secbyt(fs->spf); io_size = FATBLKSZ; if (offset_in_fat > max_offset_in_fat) offset_in_fat = max_offset_in_fat; if (offset_in_fat + io_size > max_offset_in_fat) io_size = ((size_t)(max_offset_in_fat - offset_in_fat)); if (io_size != 0) { err = ioget(fd, fs->lsnfat + bytsec(offset_in_fat), fs->fatbuf, io_size); if (err != 0) { fs->fatbuf_blknum = ((u_int)(-1)); return (err); } } if (io_size < FATBLKSZ) memset(fs->fatbuf + io_size, 0, FATBLKSZ - io_size); fs->fatbuf_blknum = blknum; return (0); } /* * Mount DOS filesystem */ static int -dos_mount(DOS_FS *fs, struct open_file *fd) +dos_mount_impl(DOS_FS *fs, struct open_file *fd) { int err; u_char *buf; - bzero(fs, sizeof(DOS_FS)); fs->fd = fd; if ((buf = malloc(secbyt(1))) == NULL) return (errno); if ((err = ioget(fs->fd, 0, buf, secbyt(1))) || (err = parsebs(fs, (DOS_BS *)buf))) { free(buf); return (err); } free(buf); if ((fs->fatbuf = malloc(FATBLKSZ)) == NULL) return (errno); err = dos_read_fatblk(fs, fd, 0); if (err != 0) { free(fs->fatbuf); return (err); } fs->root = dot[0]; fs->root.name[0] = ' '; if (fs->fatsz == 32) { fs->root.clus[0] = fs->rdcl & 0xff; fs->root.clus[1] = (fs->rdcl >> 8) & 0xff; fs->root.dex.h_clus[0] = (fs->rdcl >> 16) & 0xff; fs->root.dex.h_clus[1] = (fs->rdcl >> 24) & 0xff; } return (0); } +static int +dos_mount(const char *dev, const char *path, void **data) +{ + char *fs; + dos_mnt_t *mnt; + struct open_file *f; + DOS_FILE *df; + + errno = 0; + mnt = calloc(1, sizeof(*mnt)); + if (mnt == NULL) + return (errno); + mnt->dos_fd = -1; + mnt->dos_dev = strdup(dev); + if (mnt->dos_dev == NULL) + goto done; + + if (asprintf(&fs, "%s%s", dev, path) < 0) + goto done; + + mnt->dos_fd = open(fs, O_RDONLY); + free(fs); + if (mnt->dos_fd == -1) + goto done; + + f = fd2open_file(mnt->dos_fd); + if (strcmp(f->f_ops->fs_name, "dosfs") == 0) { + df = f->f_fsdata; + mnt->dos_fs = df->fs; + STAILQ_INSERT_TAIL(&mnt_list, mnt, dos_link); + } else { + errno = ENXIO; + } + +done: + if (errno != 0) { + free(mnt->dos_dev); + if (mnt->dos_fd >= 0) + close(mnt->dos_fd); + free(mnt); + } else { + *data = mnt; + } + + return (errno); +} + +static int +dos_unmount(const char *dev __unused, void *data) +{ + dos_mnt_t *mnt = data; + + STAILQ_REMOVE(&mnt_list, mnt, dos_mnt, dos_link); + free(mnt->dos_dev); + close(mnt->dos_fd); + free(mnt); + return (0); +} + /* * Unmount mounted filesystem */ static int -dos_unmount(DOS_FS *fs) +dos_unmount_impl(DOS_FS *fs) { if (fs->links) return (EBUSY); free(fs->fatbuf); free(fs); return (0); } /* * Open DOS file */ static int dos_open(const char *path, struct open_file *fd) { DOS_DE *de; DOS_FILE *f; DOS_FS *fs; + dos_mnt_t *mnt; + const char *dev; u_int size, clus; int err; - /* Allocate mount structure, associate with open */ - if ((fs = malloc(sizeof(DOS_FS))) == NULL) - return (errno); - if ((err = dos_mount(fs, fd))) { - free(fs); - return (err); + dev = disk_fmtdev(fd->f_devdata); + STAILQ_FOREACH(mnt, &mnt_list, dos_link) { + if (strcmp(dev, mnt->dos_dev) == 0) + break; + } + + if (mnt == NULL) { + /* Allocate mount structure, associate with open */ + if ((fs = malloc(sizeof(DOS_FS))) == NULL) + return (errno); + if ((err = dos_mount_impl(fs, fd))) { + free(fs); + return (err); + } + } else { + fs = mnt->dos_fs; } if ((err = namede(fs, path, &de))) { - dos_unmount(fs); + if (mnt == NULL) + dos_unmount_impl(fs); return (err); } clus = stclus(fs->fatsz, de); size = cv4(de->size); if ((!(de->attr & FA_DIR) && (!clus != !size)) || ((de->attr & FA_DIR) && size) || (clus && !okclus(fs, clus))) { - dos_unmount(fs); + if (mnt == NULL) + dos_unmount_impl(fs); return (EINVAL); } - if ((f = malloc(sizeof(DOS_FILE))) == NULL) { + if ((f = calloc(1, sizeof(DOS_FILE))) == NULL) { err = errno; - dos_unmount(fs); + if (mnt == NULL) + dos_unmount_impl(fs); return (err); } - bzero(f, sizeof(DOS_FILE)); f->fs = fs; fs->links++; f->de = *de; - fd->f_fsdata = (void *)f; + fd->f_fsdata = f; return (0); } /* * Read from file */ static int dos_read(struct open_file *fd, void *buf, size_t nbyte, size_t *resid) { off_t size; u_int nb, off, clus, c, cnt, n; DOS_FILE *f = (DOS_FILE *)fd->f_fsdata; int err = 0; /* * as ioget() can be called *a lot*, use twiddle here. * also 4 seems to be good value not to slow loading down too much: * with 270MB file (~540k ioget() calls, twiddle can easily waste * 4-5 sec. */ twiddle(4); nb = (u_int)nbyte; if ((size = fsize(f->fs, &f->de)) == -1) return (EINVAL); if (nb > (n = size - f->offset)) nb = n; off = f->offset; if ((clus = stclus(f->fs->fatsz, &f->de))) off &= f->fs->bsize - 1; c = f->c; cnt = nb; while (cnt) { n = 0; if (!c) { if ((c = clus)) n = bytblk(f->fs, f->offset); } else if (!off) n++; while (n--) { if ((err = fatget(f->fs, &c))) goto out; if (!okclus(f->fs, c)) { err = EINVAL; goto out; } } if (!clus || (n = f->fs->bsize - off) > cnt) n = cnt; if ((err = ioread(f->fs, (c ? blkoff(f->fs, c) : secbyt(f->fs->lsndir)) + off, buf, n))) goto out; f->offset += n; f->c = c; off = 0; buf = (char *)buf + n; cnt -= n; } out: if (resid) *resid = nbyte - nb + cnt; return (err); } /* * Reposition within file */ static off_t dos_seek(struct open_file *fd, off_t offset, int whence) { off_t off; u_int size; DOS_FILE *f = (DOS_FILE *)fd->f_fsdata; size = cv4(f->de.size); switch (whence) { case SEEK_SET: off = 0; break; case SEEK_CUR: off = f->offset; break; case SEEK_END: off = size; break; default: errno = EINVAL; return (-1); } off += offset; if (off < 0 || off > size) { errno = EINVAL; return (-1); } f->offset = (u_int)off; f->c = 0; return (off); } /* * Close open file */ static int dos_close(struct open_file *fd) { DOS_FILE *f = (DOS_FILE *)fd->f_fsdata; DOS_FS *fs = f->fs; f->fs->links--; free(f); - dos_unmount(fs); + dos_unmount_impl(fs); return (0); } /* * Return some stat information on a file. */ static int dos_stat(struct open_file *fd, struct stat *sb) { DOS_FILE *f = (DOS_FILE *)fd->f_fsdata; /* only important stuff */ sb->st_mode = f->de.attr & FA_DIR ? S_IFDIR | 0555 : S_IFREG | 0444; sb->st_nlink = 1; sb->st_uid = 0; sb->st_gid = 0; if ((sb->st_size = fsize(f->fs, &f->de)) == -1) return (EINVAL); return (0); } static int dos_checksum(unsigned char *name, unsigned char *ext) { int x, i; char buf[11]; bcopy(name, buf, 8); bcopy(ext, buf+8, 3); x = 0; for (i = 0; i < 11; i++) { x = ((x & 1) << 7) | (x >> 1); x += buf[i]; x &= 0xff; } return (x); } static int dos_readdir(struct open_file *fd, struct dirent *d) { /* DOS_FILE *f = (DOS_FILE *)fd->f_fsdata; */ u_char fn[261]; DOS_DIR dd; size_t res; u_int chk, x, xdn; int err; x = chk = 0; for (;;) { xdn = x; x = 0; err = dos_read(fd, &dd, sizeof(dd), &res); if (err) return (err); if (res == sizeof(dd)) return (ENOENT); if (dd.de.name[0] == 0) return (ENOENT); /* Skip deleted entries */ if (dd.de.name[0] == 0xe5) continue; /* Check if directory entry is volume label */ if (dd.de.attr & FA_LABEL) { /* * If volume label set, check if the current entry is * extended entry (FA_XDE) for long file names. */ if ((dd.de.attr & FA_MASK) == FA_XDE) { /* * Read through all following extended entries * to get the long file name. 0x40 marks the * last entry containing part of long file name. */ if (dd.xde.seq & 0x40) chk = dd.xde.chk; else if (dd.xde.seq != xdn - 1 || dd.xde.chk != chk) continue; x = dd.xde.seq & ~0x40; if (x < 1 || x > 20) { x = 0; continue; } cp_xdnm(fn, &dd.xde); } else { /* skip only volume label entries */ continue; } } else { if (xdn == 1) { x = dos_checksum(dd.de.name, dd.de.ext); if (x == chk) break; } else { cp_sfn(fn, &dd.de); break; } x = 0; } } d->d_fileno = (dd.de.clus[1] << 8) + dd.de.clus[0]; d->d_reclen = sizeof(*d); d->d_type = (dd.de.attr & FA_DIR) ? DT_DIR : DT_REG; memcpy(d->d_name, fn, sizeof(d->d_name)); return (0); } /* * Parse DOS boot sector */ static int parsebs(DOS_FS *fs, DOS_BS *bs) { u_int sc; if ((bs->jmp[0] != 0x69 && bs->jmp[0] != 0xe9 && (bs->jmp[0] != 0xeb || bs->jmp[2] != 0x90)) || bs->bpb.media < 0xf0) return (EINVAL); if (cv2(bs->bpb.secsiz) != SECSIZ) return (EINVAL); if (!(fs->spc = bs->bpb.spc) || fs->spc & (fs->spc - 1)) return (EINVAL); fs->bsize = secbyt(fs->spc); fs->bshift = ffs(fs->bsize) - 1; if ((fs->spf = cv2(bs->bpb.spf))) { if (bs->bpb.fats != 2) return (EINVAL); if (!(fs->dirents = cv2(bs->bpb.dirents))) return (EINVAL); } else { if (!(fs->spf = cv4(bs->bpb.lspf))) return (EINVAL); if (!bs->bpb.fats || bs->bpb.fats > 16) return (EINVAL); if ((fs->rdcl = cv4(bs->bpb.rdcl)) < LOCLUS) return (EINVAL); } if (!(fs->lsnfat = cv2(bs->bpb.ressec))) return (EINVAL); fs->lsndir = fs->lsnfat + fs->spf * bs->bpb.fats; fs->lsndta = fs->lsndir + entsec(fs->dirents); if (!(sc = cv2(bs->bpb.secs)) && !(sc = cv4(bs->bpb.lsecs))) return (EINVAL); if (fs->lsndta > sc) return (EINVAL); if ((fs->xclus = secblk(fs, sc - fs->lsndta) + 1) < LOCLUS) return (EINVAL); fs->fatsz = fs->dirents ? fs->xclus < 0xff6 ? 12 : 16 : 32; sc = (secbyt(fs->spf) << 1) / (fs->fatsz >> 2) - 1; if (fs->xclus > sc) fs->xclus = sc; return (0); } /* * Return directory entry from path */ static int namede(DOS_FS *fs, const char *path, DOS_DE **dep) { char name[256]; DOS_DE *de; char *s; size_t n; int err; err = 0; de = &fs->root; while (*path) { while (*path == '/') path++; if (*path == '\0') break; if (!(s = strchr(path, '/'))) s = strchr(path, 0); if ((n = s - path) > 255) return (ENAMETOOLONG); memcpy(name, path, n); name[n] = 0; path = s; if (!(de->attr & FA_DIR)) return (ENOTDIR); if ((err = lookup(fs, stclus(fs->fatsz, de), name, &de))) return (err); } *dep = de; return (0); } /* * Lookup path segment */ static int lookup(DOS_FS *fs, u_int clus, const char *name, DOS_DE **dep) { static DOS_DIR dir[DEPSEC]; u_char lfn[261]; u_char sfn[13]; u_int nsec, lsec, xdn, chk, sec, ent, x; int err, ok; if (!clus) for (ent = 0; ent < 2; ent++) if (!strcasecmp(name, dotstr[ent])) { *dep = dot + ent; return (0); } if (!clus && fs->fatsz == 32) clus = fs->rdcl; nsec = !clus ? entsec(fs->dirents) : fs->spc; lsec = 0; xdn = chk = 0; for (;;) { if (!clus && !lsec) lsec = fs->lsndir; else if (okclus(fs, clus)) lsec = blklsn(fs, clus); else return (EINVAL); for (sec = 0; sec < nsec; sec++) { if ((err = ioget(fs->fd, lsec + sec, dir, secbyt(1)))) return (err); for (ent = 0; ent < DEPSEC; ent++) { if (!*dir[ent].de.name) return (ENOENT); if (*dir[ent].de.name != 0xe5) { if ((dir[ent].de.attr & FA_MASK) == FA_XDE) { x = dir[ent].xde.seq; if (x & 0x40 || (x + 1 == xdn && dir[ent].xde.chk == chk)) { if (x & 0x40) { chk = dir[ent].xde.chk; x &= ~0x40; } if (x >= 1 && x <= 20) { cp_xdnm(lfn, &dir[ent].xde); xdn = x; continue; } } } else if (!(dir[ent].de.attr & FA_LABEL)) { if ((ok = xdn == 1)) { x = dos_checksum( dir[ent].de.name, dir[ent].de.ext); ok = chk == x && !strcasecmp(name, (const char *)lfn); } if (!ok) { cp_sfn(sfn, &dir[ent].de); ok = !strcasecmp(name, (const char *)sfn); } if (ok) { *dep = &dir[ent].de; return (0); } } } xdn = 0; } } if (!clus) break; if ((err = fatget(fs, &clus))) return (err); if (fatend(fs->fatsz, clus)) break; } return (ENOENT); } /* * Copy name from extended directory entry */ static void cp_xdnm(u_char *lfn, DOS_XDE *xde) { static struct { u_int off; u_int dim; } ix[3] = { {offsetof(DOS_XDE, name1), sizeof(xde->name1) / 2}, {offsetof(DOS_XDE, name2), sizeof(xde->name2) / 2}, {offsetof(DOS_XDE, name3), sizeof(xde->name3) / 2} }; u_char *p; u_int n, x, c; lfn += 13 * ((xde->seq & ~0x40) - 1); for (n = 0; n < 3; n++) for (p = (u_char *)xde + ix[n].off, x = ix[n].dim; x; p += 2, x--) { if ((c = cv2(p)) && (c < 32 || c > 127)) c = '?'; if (!(*lfn++ = c)) return; } if (xde->seq & 0x40) *lfn = 0; } /* * Copy short filename */ static void cp_sfn(u_char *sfn, DOS_DE *de) { u_char *p; int j, i; p = sfn; if (*de->name != ' ') { for (j = 7; de->name[j] == ' '; j--) ; for (i = 0; i <= j; i++) *p++ = de->name[i]; if (*de->ext != ' ') { *p++ = '.'; for (j = 2; de->ext[j] == ' '; j--) ; for (i = 0; i <= j; i++) *p++ = de->ext[i]; } } *p = 0; if (*sfn == 5) *sfn = 0xe5; } /* * Return size of file in bytes */ static off_t fsize(DOS_FS *fs, DOS_DE *de) { u_long size; u_int c; int n; if (!(size = cv4(de->size)) && de->attr & FA_DIR) { if (!(c = cv2(de->clus))) { size = fs->dirents * sizeof(DOS_DE); } else { if ((n = fatcnt(fs, c)) == -1) return (n); size = blkbyt(fs, n); } } return (size); } /* * Count number of clusters in chain */ static int fatcnt(DOS_FS *fs, u_int c) { int n; for (n = 0; okclus(fs, c); n++) if (fatget(fs, &c)) return (-1); return (fatend(fs->fatsz, c) ? n : -1); } /* * Get next cluster in cluster chain. Use in core fat cache unless * the number of current 128K block in FAT has changed. */ static int fatget(DOS_FS *fs, u_int *c) { u_int val_in, val_out, offset, blknum, nbyte; const u_char *p_entry; int err; /* check input value to prevent overflow in fatoff() */ val_in = *c; if (val_in & 0xf0000000) return (EINVAL); /* ensure that current 128K FAT block is cached */ offset = fatoff(fs->fatsz, val_in); nbyte = fs->fatsz != 32 ? 2 : 4; if (offset + nbyte > secbyt(fs->spf)) return (EINVAL); blknum = offset / FATBLKSZ; offset %= FATBLKSZ; if (offset + nbyte > FATBLKSZ) return (EINVAL); if (blknum != fs->fatbuf_blknum) { err = dos_read_fatblk(fs, fs->fd, blknum); if (err != 0) return (err); } p_entry = fs->fatbuf + offset; /* extract cluster number from FAT entry */ switch (fs->fatsz) { case 32: val_out = cv4(p_entry); val_out &= 0x0fffffff; break; case 16: val_out = cv2(p_entry); break; case 12: val_out = cv2(p_entry); if (val_in & 1) val_out >>= 4; else val_out &= 0xfff; break; default: return (EINVAL); } *c = val_out; return (0); } /* * Is cluster an end-of-chain marker? */ static int fatend(u_int sz, u_int c) { return (c > (sz == 12 ? 0xff7U : sz == 16 ? 0xfff7U : 0xffffff7)); } /* * Offset-based I/O primitive */ static int ioread(DOS_FS *fs, u_int offset, void *buf, size_t nbyte) { char *s; u_int off, n; int err; u_char local_buf[SECSIZ]; s = buf; if ((off = offset & (SECSIZ - 1))) { offset -= off; if ((n = SECSIZ - off) > nbyte) n = nbyte; err = ioget(fs->fd, bytsec(offset), local_buf, sizeof(local_buf)); if (err != 0) return (err); memcpy(s, local_buf + off, n); offset += SECSIZ; s += n; nbyte -= n; } n = nbyte & (SECSIZ - 1); if (nbyte -= n) { if ((err = ioget(fs->fd, bytsec(offset), s, nbyte))) return (err); offset += nbyte; s += nbyte; } if (n != 0) { err = ioget(fs->fd, bytsec(offset), local_buf, sizeof(local_buf)); if (err != 0) return (err); memcpy(s, local_buf, n); } return (0); } /* * Sector-based I/O primitive */ static int ioget(struct open_file *fd, daddr_t lsec, void *buf, size_t size) { size_t rsize; int rv; /* Make sure we get full read or error. */ rsize = 0; rv = (fd->f_dev->dv_strategy)(fd->f_devdata, F_READ, lsec, size, buf, &rsize); if ((rv == 0) && (size != rsize)) rv = EIO; return (rv); } diff --git a/stand/libsa/mount.c b/stand/libsa/mount.c new file mode 100644 index 000000000000..c9abe6b945fe --- /dev/null +++ b/stand/libsa/mount.c @@ -0,0 +1,163 @@ +/*- + * Copyright 2021 Toomas Soome + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +/* + * While setting "currdev" environment variable, alse "mount" the + * new root file system. This is done to hold disk device open + * in between file accesses, and thus preserve block cache for + * this device. Additionally, this allows us to optimize filesystem + * access by sharing filesystem metadata (like superblock). + */ + +typedef STAILQ_HEAD(mnt_info_list, mnt_info) mnt_info_list_t; + +typedef struct mnt_info { + STAILQ_ENTRY(mnt_info) mnt_link; /* link in mount list */ + const struct fs_ops *mnt_fs; + char *mnt_dev; + char *mnt_path; + unsigned mnt_refcount; + void *mnt_data; /* Private state */ +} mnt_info_t; + +/* list of mounted filesystems. */ +static mnt_info_list_t mnt_list = STAILQ_HEAD_INITIALIZER(mnt_list); + +static void +free_mnt(mnt_info_t *mnt) +{ + free(mnt->mnt_dev); + free(mnt->mnt_path); + free(mnt); +} + +static int +add_mnt_info(struct fs_ops *fs, const char *dev, const char *path, void *data) +{ + mnt_info_t *mnt; + + mnt = malloc(sizeof(*mnt)); + if (mnt == NULL) + return (ENOMEM); + + mnt->mnt_fs = fs; + mnt->mnt_dev = strdup(dev); + mnt->mnt_path = strdup(path); + mnt->mnt_data = data; + mnt->mnt_refcount = 1; + + if (mnt->mnt_dev == NULL || mnt->mnt_path == NULL) { + free_mnt(mnt); + return (ENOMEM); + } + STAILQ_INSERT_TAIL(&mnt_list, mnt, mnt_link); + return (0); +} + +static void +delete_mnt_info(mnt_info_t *mnt) +{ + STAILQ_REMOVE(&mnt_list, mnt, mnt_info, mnt_link); + free_mnt(mnt); +} + +int +mount(const char *dev, const char *path, int flags __unused, void *data) +{ + mnt_info_t *mnt; + int rc = -1; + + /* Is it already mounted? */ + STAILQ_FOREACH(mnt, &mnt_list, mnt_link) { + if (strcmp(dev, mnt->mnt_dev) == 0 && + strcmp(path, mnt->mnt_path) == 0) { + mnt->mnt_refcount++; + return (0); + } + } + + for (int i = 0; file_system[i] != NULL; i++) { + struct fs_ops *fs; + + fs = file_system[i]; + if (fs->fo_mount == NULL) + continue; + + if (fs->fo_mount(dev, path, &data) != 0) + continue; + + rc = add_mnt_info(fs, dev, path, data); + if (rc != 0 && mnt->mnt_fs->fo_unmount != NULL) { + printf("failed to mount %s: %s\n", dev, + strerror(rc)); + (void)mnt->mnt_fs->fo_unmount(dev, data); + } + break; + } + + + /* + * if rc is -1, it means we have no file system with fo_mount() + * callback, or all fo_mount() calls failed. As long as we + * have missing fo_mount() callbacks, we allow mount() to return 0. + */ + if (rc == -1) + rc = 0; + + return (rc); +} + +int +unmount(const char *dev, int flags __unused) +{ + mnt_info_t *mnt; + int rv; + + rv = 0; + STAILQ_FOREACH(mnt, &mnt_list, mnt_link) { + if (strcmp(dev, mnt->mnt_dev) == 0) { + if (mnt->mnt_refcount > 1) { + mnt->mnt_refcount--; + break; + } + + if (mnt->mnt_fs->fo_unmount != NULL) + rv = mnt->mnt_fs->fo_unmount(dev, + mnt->mnt_data); + delete_mnt_info(mnt); + break; + } + } + + if (rv != 0) + printf("failed to unmount %s: %d\n", dev, rv); + return (0); +} diff --git a/stand/libsa/netif.c b/stand/libsa/netif.c index 2d32ccd9de7e..f199da3b6e95 100644 --- a/stand/libsa/netif.c +++ b/stand/libsa/netif.c @@ -1,388 +1,387 @@ /* $NetBSD: netif.c,v 1.10 1997/09/06 13:57:14 drochner Exp $ */ /* * Copyright (c) 1993 Adam Glass * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Adam Glass. * 4. The name of the Author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Adam Glass ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include #include "stand.h" #include "net.h" #include "netif.h" typedef TAILQ_HEAD(socket_list, iodesc) socket_list_t; /* * Open socket list. The current implementation and assumption is, * we only remove entries from tail and we only add new entries to tail. * This decision is to keep iodesc id management simple - we get list * entries ordered by continiously growing io_id field. * If we do have multiple sockets open and we do close socket not from tail, * this entry will be marked unused. netif_open() will reuse unused entry, or * netif_close() will free all unused tail entries. */ static socket_list_t sockets = TAILQ_HEAD_INITIALIZER(sockets); #ifdef NETIF_DEBUG int netif_debug = 0; #endif /* * netif_init: * * initialize the generic network interface layer */ void netif_init(void) { struct netif_driver *drv; int d, i; #ifdef NETIF_DEBUG if (netif_debug) printf("netif_init: called\n"); #endif for (d = 0; netif_drivers[d]; d++) { drv = netif_drivers[d]; for (i = 0; i < drv->netif_nifs; i++) drv->netif_ifs[i].dif_used = 0; } } int netif_match(struct netif *nif, void *machdep_hint) { struct netif_driver *drv = nif->nif_driver; #if NETIF_DEBUG if (netif_debug) printf("%s%d: netif_match (%d)\n", drv->netif_bname, nif->nif_unit, nif->nif_sel); #endif return drv->netif_match(nif, machdep_hint); } struct netif * netif_select(void *machdep_hint) { int d, u, unit_done, s; struct netif_driver *drv; struct netif cur_if; static struct netif best_if; int best_val; int val; best_val = 0; best_if.nif_driver = NULL; for (d = 0; netif_drivers[d] != NULL; d++) { cur_if.nif_driver = netif_drivers[d]; drv = cur_if.nif_driver; for (u = 0; u < drv->netif_nifs; u++) { cur_if.nif_unit = u; unit_done = 0; #ifdef NETIF_DEBUG if (netif_debug) printf("\t%s%d:", drv->netif_bname, cur_if.nif_unit); #endif for (s = 0; s < drv->netif_ifs[u].dif_nsel; s++) { cur_if.nif_sel = s; if (drv->netif_ifs[u].dif_used & (1 << s)) { #ifdef NETIF_DEBUG if (netif_debug) printf(" [%d used]", s); #endif continue; } val = netif_match(&cur_if, machdep_hint); #ifdef NETIF_DEBUG if (netif_debug) printf(" [%d -> %d]", s, val); #endif if (val > best_val) { best_val = val; best_if = cur_if; } } #ifdef NETIF_DEBUG if (netif_debug) printf("\n"); #endif } } if (best_if.nif_driver == NULL) return NULL; best_if.nif_driver-> netif_ifs[best_if.nif_unit].dif_used |= (1 << best_if.nif_sel); #ifdef NETIF_DEBUG if (netif_debug) printf("netif_select: %s%d(%d) wins\n", best_if.nif_driver->netif_bname, best_if.nif_unit, best_if.nif_sel); #endif return &best_if; } int netif_probe(struct netif *nif, void *machdep_hint) { struct netif_driver *drv = nif->nif_driver; #ifdef NETIF_DEBUG if (netif_debug) printf("%s%d: netif_probe\n", drv->netif_bname, nif->nif_unit); #endif return drv->netif_probe(nif, machdep_hint); } void netif_attach(struct netif *nif, struct iodesc *desc, void *machdep_hint) { struct netif_driver *drv = nif->nif_driver; #ifdef NETIF_DEBUG if (netif_debug) printf("%s%d: netif_attach\n", drv->netif_bname, nif->nif_unit); #endif desc->io_netif = nif; #ifdef PARANOID if (drv->netif_init == NULL) panic("%s%d: no netif_init support", drv->netif_bname, nif->nif_unit); #endif drv->netif_init(desc, machdep_hint); bzero(drv->netif_ifs[nif->nif_unit].dif_stats, sizeof(struct netif_stats)); } void netif_detach(struct netif *nif) { struct netif_driver *drv = nif->nif_driver; #ifdef NETIF_DEBUG if (netif_debug) printf("%s%d: netif_detach\n", drv->netif_bname, nif->nif_unit); #endif #ifdef PARANOID if (drv->netif_end == NULL) panic("%s%d: no netif_end support", drv->netif_bname, nif->nif_unit); #endif drv->netif_end(nif); } ssize_t netif_get(struct iodesc *desc, void **pkt, time_t timo) { #ifdef NETIF_DEBUG struct netif *nif = desc->io_netif; #endif struct netif_driver *drv = desc->io_netif->nif_driver; ssize_t rv; #ifdef NETIF_DEBUG if (netif_debug) printf("%s%d: netif_get\n", drv->netif_bname, nif->nif_unit); #endif #ifdef PARANOID if (drv->netif_get == NULL) panic("%s%d: no netif_get support", drv->netif_bname, nif->nif_unit); #endif rv = drv->netif_get(desc, pkt, timo); #ifdef NETIF_DEBUG if (netif_debug) printf("%s%d: netif_get returning %d\n", drv->netif_bname, nif->nif_unit, (int)rv); #endif return (rv); } ssize_t netif_put(struct iodesc *desc, void *pkt, size_t len) { #ifdef NETIF_DEBUG struct netif *nif = desc->io_netif; #endif struct netif_driver *drv = desc->io_netif->nif_driver; ssize_t rv; #ifdef NETIF_DEBUG if (netif_debug) printf("%s%d: netif_put\n", drv->netif_bname, nif->nif_unit); #endif #ifdef PARANOID if (drv->netif_put == NULL) panic("%s%d: no netif_put support", drv->netif_bname, nif->nif_unit); #endif rv = drv->netif_put(desc, pkt, len); #ifdef NETIF_DEBUG if (netif_debug) printf("%s%d: netif_put returning %d\n", drv->netif_bname, nif->nif_unit, (int)rv); #endif return (rv); } /* * socktodesc_impl: * * Walk socket list and return pointer to iodesc structure. * if id is < 0, return first unused iodesc. */ static struct iodesc * socktodesc_impl(int socket) { struct iodesc *s; TAILQ_FOREACH(s, &sockets, io_link) { /* search by socket id */ if (socket >= 0) { if (s->io_id == socket) break; continue; } /* search for first unused entry */ if (s->io_netif == NULL) break; } return (s); } struct iodesc * socktodesc(int sock) { struct iodesc *desc; if (sock < 0) desc = NULL; else desc = socktodesc_impl(sock); if (desc == NULL) errno = EBADF; return (desc); } int netif_open(void *machdep_hint) { struct iodesc *s; struct netif *nif; /* find a free socket */ s = socktodesc_impl(-1); if (s == NULL) { struct iodesc *last; s = calloc(1, sizeof (*s)); if (s == NULL) return (-1); last = TAILQ_LAST(&sockets, socket_list); if (last != NULL) s->io_id = last->io_id + 1; TAILQ_INSERT_TAIL(&sockets, s, io_link); } netif_init(); nif = netif_select(machdep_hint); if (!nif) panic("netboot: no interfaces left untried"); if (netif_probe(nif, machdep_hint)) { printf("netboot: couldn't probe %s%d\n", nif->nif_driver->netif_bname, nif->nif_unit); errno = EINVAL; return (-1); } netif_attach(nif, s, machdep_hint); return (s->io_id); } int netif_close(int sock) { struct iodesc *s, *last; int err; err = 0; s = socktodesc_impl(sock); if (s == NULL || sock < 0) { err = EBADF; return (-1); } netif_detach(s->io_netif); bzero(&s->destip, sizeof (s->destip)); bzero(&s->myip, sizeof (s->myip)); s->destport = 0; s->myport = 0; s->xid = 0; bzero(s->myea, sizeof (s->myea)); s->io_netif = NULL; /* free unused entries from tail. */ TAILQ_FOREACH_REVERSE_SAFE(last, &sockets, socket_list, io_link, s) { if (last->io_netif != NULL) break; TAILQ_REMOVE(&sockets, last, io_link); free(last); } if (err) { errno = err; return (-1); } return (0); } diff --git a/stand/libsa/stand.h b/stand/libsa/stand.h index 535fee31d586..ab0961b77086 100644 --- a/stand/libsa/stand.h +++ b/stand/libsa/stand.h @@ -1,489 +1,493 @@ /* * Copyright (c) 1998 Michael Smith. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * From $NetBSD: stand.h,v 1.22 1997/06/26 19:17:40 drochner Exp $ */ /*- * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)stand.h 8.1 (Berkeley) 6/11/93 */ #ifndef STAND_H #define STAND_H #include #include #include #include #include /* this header intentionally exports NULL from */ #include #define strcoll(a, b) strcmp((a), (b)) #define CHK(fmt, args...) printf("%s(%d): " fmt "\n", __func__, __LINE__ , ##args) #define PCHK(fmt, args...) {printf("%s(%d): " fmt "\n", __func__, __LINE__ , ##args); getchar();} #include /* special stand error codes */ #define EADAPT (ELAST+1) /* bad adaptor */ #define ECTLR (ELAST+2) /* bad controller */ #define EUNIT (ELAST+3) /* bad unit */ #define ESLICE (ELAST+4) /* bad slice */ #define EPART (ELAST+5) /* bad partition */ #define ERDLAB (ELAST+6) /* can't read disk label */ #define EUNLAB (ELAST+7) /* unlabeled disk */ #define EOFFSET (ELAST+8) /* relative seek not supported */ #define ESALAST (ELAST+8) /* */ /* Partial signal emulation for sig_atomic_t */ #include struct open_file; /* * This structure is used to define file system operations in a file system * independent way. * * XXX note that filesystem providers should export a pointer to their fs_ops * struct, so that consumers can reference this and thus include the * filesystems that they require. */ struct fs_ops { const char *fs_name; int (*fo_open)(const char *path, struct open_file *f); int (*fo_close)(struct open_file *f); int (*fo_read)(struct open_file *f, void *buf, size_t size, size_t *resid); int (*fo_write)(struct open_file *f, const void *buf, size_t size, size_t *resid); off_t (*fo_seek)(struct open_file *f, off_t offset, int where); int (*fo_stat)(struct open_file *f, struct stat *sb); int (*fo_readdir)(struct open_file *f, struct dirent *d); + int (*fo_mount)(const char *, const char *, void **); + int (*fo_unmount)(const char *, void *); }; /* * libstand-supplied filesystems */ extern struct fs_ops ufs_fsops; extern struct fs_ops tftp_fsops; extern struct fs_ops nfs_fsops; extern struct fs_ops cd9660_fsops; extern struct fs_ops gzipfs_fsops; extern struct fs_ops bzipfs_fsops; extern struct fs_ops dosfs_fsops; extern struct fs_ops ext2fs_fsops; extern struct fs_ops splitfs_fsops; extern struct fs_ops pkgfs_fsops; extern struct fs_ops efihttp_fsops; /* where values for lseek(2) */ #define SEEK_SET 0 /* set file offset to offset */ #define SEEK_CUR 1 /* set file offset to current plus offset */ #define SEEK_END 2 /* set file offset to EOF plus offset */ /* * Device switch */ struct devsw { const char dv_name[8]; int dv_type; /* opaque type constant, arch-dependant */ #define DEVT_NONE 0 #define DEVT_DISK 1 #define DEVT_NET 2 #define DEVT_CD 3 #define DEVT_ZFS 4 #define DEVT_FD 5 int (*dv_init)(void); /* early probe call */ int (*dv_strategy)(void *devdata, int rw, daddr_t blk, size_t size, char *buf, size_t *rsize); int (*dv_open)(struct open_file *f, ...); int (*dv_close)(struct open_file *f); int (*dv_ioctl)(struct open_file *f, u_long cmd, void *data); int (*dv_print)(int verbose); /* print device information */ void (*dv_cleanup)(void); }; /* * libstand-supplied device switch */ extern struct devsw netdev; extern int errno; /* * Generic device specifier; architecture-dependent * versions may be larger, but should be allowed to * overlap. */ struct devdesc { struct devsw *d_dev; int d_unit; void *d_opendata; }; struct open_file { int f_flags; /* see F_* below */ struct devsw *f_dev; /* pointer to device operations */ void *f_devdata; /* device specific data */ struct fs_ops *f_ops; /* pointer to file system operations */ void *f_fsdata; /* file system specific data */ off_t f_offset; /* current file offset */ char *f_rabuf; /* readahead buffer pointer */ size_t f_ralen; /* valid data in readahead buffer */ off_t f_raoffset; /* consumer offset in readahead buffer */ int f_id; /* file number */ TAILQ_ENTRY(open_file) f_link; /* next entry */ #define SOPEN_RASIZE 512 }; typedef TAILQ_HEAD(file_list, open_file) file_list_t; extern file_list_t files; extern struct open_file *fd2open_file(int); /* f_flags values */ #define F_READ 0x0001 /* file opened for reading */ #define F_WRITE 0x0002 /* file opened for writing */ #define F_RAW 0x0004 /* raw device open - no file system */ #define F_NODEV 0x0008 /* network open - no device */ #define F_MASK 0xFFFF /* Mode modifier for strategy() */ #define F_NORA (0x01 << 16) /* Disable Read-Ahead */ #define isascii(c) (((c) & ~0x7F) == 0) static __inline int isupper(int c) { return c >= 'A' && c <= 'Z'; } static __inline int islower(int c) { return c >= 'a' && c <= 'z'; } static __inline int isspace(int c) { return c == ' ' || (c >= 0x9 && c <= 0xd); } static __inline int isdigit(int c) { return c >= '0' && c <= '9'; } static __inline int isxdigit(int c) { return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } static __inline int isalpha(int c) { return isupper(c) || islower(c); } static __inline int isalnum(int c) { return isalpha(c) || isdigit(c); } static __inline int iscntrl(int c) { return (c >= 0 && c < ' ') || c == 127; } static __inline int isgraph(int c) { return c >= '!' && c <= '~'; } static __inline int ispunct(int c) { return (c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~'); } static __inline int toupper(int c) { return islower(c) ? c - 'a' + 'A' : c; } static __inline int tolower(int c) { return isupper(c) ? c - 'A' + 'a' : c; } /* sbrk emulation */ extern void setheap(void *base, void *top); extern char *sbrk(int incr); extern int printf(const char *fmt, ...) __printflike(1, 2); extern int asprintf(char **buf, const char *cfmt, ...) __printflike(2, 3); extern int sprintf(char *buf, const char *cfmt, ...) __printflike(2, 3); extern int snprintf(char *buf, size_t size, const char *cfmt, ...) __printflike(3, 4); extern int vprintf(const char *fmt, __va_list); extern int vsprintf(char *buf, const char *cfmt, __va_list); extern int vsnprintf(char *buf, size_t size, const char *cfmt, __va_list); extern void twiddle(u_int callerdiv); extern void twiddle_divisor(u_int globaldiv); extern void ngets(char *, int); #define gets(x) ngets((x), 0) extern int fgetstr(char *buf, int size, int fd); +extern int mount(const char *dev, const char *path, int flags, void *data); +extern int unmount(const char *dev, int flags); extern int open(const char *, int); #define O_RDONLY 0x0 #define O_WRONLY 0x1 #define O_RDWR 0x2 #define O_ACCMODE 0x3 /* NOT IMPLEMENTED */ #define O_CREAT 0x0200 /* create if nonexistent */ #define O_TRUNC 0x0400 /* truncate to zero length */ extern int close(int); extern void closeall(void); extern ssize_t read(int, void *, size_t); extern ssize_t write(int, const void *, size_t); extern struct dirent *readdirfd(int); extern void srandom(unsigned int); extern long random(void); /* imports from stdlib, locally modified */ extern char *optarg; /* getopt(3) external variables */ extern int optind, opterr, optopt, optreset; extern int getopt(int, char * const [], const char *); /* pager.c */ extern void pager_open(void); extern void pager_close(void); extern int pager_output(const char *lines); extern int pager_file(const char *fname); /* No signal state to preserve */ #define setjmp _setjmp #define longjmp _longjmp /* environment.c */ #define EV_DYNAMIC (1<<0) /* value was dynamically allocated, free if changed/unset */ #define EV_VOLATILE (1<<1) /* value is volatile, make a copy of it */ #define EV_NOHOOK (1<<2) /* don't call hook when setting */ struct env_var; typedef char *(ev_format_t)(struct env_var *ev); typedef int (ev_sethook_t)(struct env_var *ev, int flags, const void *value); typedef int (ev_unsethook_t)(struct env_var *ev); struct env_var { char *ev_name; int ev_flags; void *ev_value; ev_sethook_t *ev_sethook; ev_unsethook_t *ev_unsethook; struct env_var *ev_next, *ev_prev; }; extern struct env_var *environ; extern struct env_var *env_getenv(const char *name); extern int env_setenv(const char *name, int flags, const void *value, ev_sethook_t sethook, ev_unsethook_t unsethook); extern void env_discard(struct env_var *); extern char *getenv(const char *name); extern int setenv(const char *name, const char *value, int overwrite); extern int putenv(char *string); extern int unsetenv(const char *name); extern ev_sethook_t env_noset; /* refuse set operation */ extern ev_unsethook_t env_nounset; /* refuse unset operation */ /* stdlib.h routines */ extern int abs(int a); extern void abort(void) __dead2; extern long strtol(const char * __restrict, char ** __restrict, int); extern long long strtoll(const char * __restrict, char ** __restrict, int); extern unsigned long strtoul(const char * __restrict, char ** __restrict, int); extern unsigned long long strtoull(const char * __restrict, char ** __restrict, int); /* BCD conversions (undocumented) */ extern u_char const bcd2bin_data[]; extern u_char const bin2bcd_data[]; extern char const hex2ascii_data[]; #define bcd2bin(bcd) (bcd2bin_data[bcd]) #define bin2bcd(bin) (bin2bcd_data[bin]) #define hex2ascii(hex) (hex2ascii_data[hex]) #define validbcd(bcd) (bcd == 0 || (bcd > 0 && bcd <= 0x99 && bcd2bin_data[bcd] != 0)) /* min/max (undocumented) */ static __inline int imax(int a, int b) { return (a > b ? a : b); } static __inline int imin(int a, int b) { return (a < b ? a : b); } static __inline long lmax(long a, long b) { return (a > b ? a : b); } static __inline long lmin(long a, long b) { return (a < b ? a : b); } static __inline u_int max(u_int a, u_int b) { return (a > b ? a : b); } static __inline u_int min(u_int a, u_int b) { return (a < b ? a : b); } static __inline quad_t qmax(quad_t a, quad_t b) { return (a > b ? a : b); } static __inline quad_t qmin(quad_t a, quad_t b) { return (a < b ? a : b); } static __inline u_long ulmax(u_long a, u_long b) { return (a > b ? a : b); } static __inline u_long ulmin(u_long a, u_long b) { return (a < b ? a : b); } /* null functions for device/filesystem switches (undocumented) */ extern int nodev(void); extern int noioctl(struct open_file *, u_long, void *); extern void nullsys(void); extern int null_open(const char *path, struct open_file *f); extern int null_close(struct open_file *f); extern int null_read(struct open_file *f, void *buf, size_t size, size_t *resid); extern int null_write(struct open_file *f, const void *buf, size_t size, size_t *resid); extern off_t null_seek(struct open_file *f, off_t offset, int where); extern int null_stat(struct open_file *f, struct stat *sb); extern int null_readdir(struct open_file *f, struct dirent *d); /* * Machine dependent functions and data, must be provided or stubbed by * the consumer */ extern void exit(int) __dead2; extern int getchar(void); extern int ischar(void); extern void putchar(int); extern int devopen(struct open_file *, const char *, const char **); extern int devclose(struct open_file *f); extern void panic(const char *, ...) __dead2 __printflike(1, 2); extern void panic_action(void) __weak_symbol __dead2; extern time_t getsecs(void); extern struct fs_ops *file_system[]; extern struct fs_ops *exclusive_file_system; extern struct devsw *devsw[]; /* * Time routines */ time_t time(time_t *); /* * Expose byteorder(3) functions. */ #ifndef _BYTEORDER_PROTOTYPED #define _BYTEORDER_PROTOTYPED extern uint32_t htonl(uint32_t); extern uint16_t htons(uint16_t); extern uint32_t ntohl(uint32_t); extern uint16_t ntohs(uint16_t); #endif #ifndef _BYTEORDER_FUNC_DEFINED #define _BYTEORDER_FUNC_DEFINED #define htonl(x) __htonl(x) #define htons(x) __htons(x) #define ntohl(x) __ntohl(x) #define ntohs(x) __ntohs(x) #endif void *Malloc(size_t, const char *, int); void *Memalign(size_t, size_t, const char *, int); void *Calloc(size_t, size_t, const char *, int); void *Realloc(void *, size_t, const char *, int); void *Reallocf(void *, size_t, const char *, int); void Free(void *, const char *, int); extern void mallocstats(void); const char *x86_hypervisor(void); #ifdef USER_MALLOC extern void *malloc(size_t); extern void *memalign(size_t, size_t); extern void *calloc(size_t, size_t); extern void free(void *); extern void *realloc(void *, size_t); extern void *reallocf(void *, size_t); #elif defined(DEBUG_MALLOC) #define malloc(x) Malloc(x, __FILE__, __LINE__) #define memalign(x, y) Memalign(x, y, __FILE__, __LINE__) #define calloc(x, y) Calloc(x, y, __FILE__, __LINE__) #define free(x) Free(x, __FILE__, __LINE__) #define realloc(x, y) Realloc(x, y, __FILE__, __LINE__) #define reallocf(x, y) Reallocf(x, y, __FILE__, __LINE__) #else #define malloc(x) Malloc(x, NULL, 0) #define memalign(x, y) Memalign(x, y, NULL, 0) #define calloc(x, y) Calloc(x, y, NULL, 0) #define free(x) Free(x, NULL, 0) #define realloc(x, y) Realloc(x, y, NULL, 0) #define reallocf(x, y) Reallocf(x, y, NULL, 0) #endif /* * va <-> pa routines. MD code must supply. */ caddr_t ptov(uintptr_t); /* hexdump.c */ void hexdump(caddr_t region, size_t len); /* tslog.c */ #define TSRAW(a, b, c) tslog(a, b, c) #define TSENTER() TSRAW("ENTER", __func__, NULL) #define TSEXIT() TSRAW("EXIT", __func__, NULL) #define TSLINE() TSRAW("EVENT", __FILE__, __XSTRING(__LINE__)) void tslog(const char *, const char *, const char *); void tslog_setbuf(void * buf, size_t len); void tslog_getbuf(void ** buf, size_t * len); #endif /* STAND_H */ diff --git a/stand/libsa/ufs.c b/stand/libsa/ufs.c index 02d13aa29722..a4015dea74c2 100644 --- a/stand/libsa/ufs.c +++ b/stand/libsa/ufs.c @@ -1,896 +1,966 @@ /* $NetBSD: ufs.c,v 1.20 1998/03/01 07:15:39 ross Exp $ */ /*- * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and Network Associates Laboratories, the Security * Research Division of Network Associates, Inc. under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS * research program * * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * * Copyright (c) 1990, 1991 Carnegie Mellon University * All Rights Reserved. * * Author: David Golub * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ #include __FBSDID("$FreeBSD$"); /* * Stand-alone file reading package. */ #include #include #include #include #include #include #include "stand.h" +#include "disk.h" #include "string.h" static int ufs_open(const char *path, struct open_file *f); static int ufs_write(struct open_file *f, const void *buf, size_t size, size_t *resid); static int ufs_close(struct open_file *f); static int ufs_read(struct open_file *f, void *buf, size_t size, size_t *resid); static off_t ufs_seek(struct open_file *f, off_t offset, int where); static int ufs_stat(struct open_file *f, struct stat *sb); static int ufs_readdir(struct open_file *f, struct dirent *d); +static int ufs_mount(const char *dev, const char *path, void **data); +static int ufs_unmount(const char *dev, void *data); struct fs_ops ufs_fsops = { - "ufs", - ufs_open, - ufs_close, - ufs_read, - ufs_write, - ufs_seek, - ufs_stat, - ufs_readdir + .fs_name = "ufs", + .fo_open = ufs_open, + .fo_close = ufs_close, + .fo_read = ufs_read, + .fo_write = ufs_write, + .fo_seek = ufs_seek, + .fo_stat = ufs_stat, + .fo_readdir = ufs_readdir, + .fo_mount = ufs_mount, + .fo_unmount = ufs_unmount }; /* * In-core open file. */ struct file { off_t f_seekp; /* seek pointer */ struct fs *f_fs; /* pointer to super-block */ union dinode { struct ufs1_dinode di1; struct ufs2_dinode di2; } f_di; /* copy of on-disk inode */ int f_nindir[UFS_NIADDR]; /* number of blocks mapped by indirect block at level i */ char *f_blk[UFS_NIADDR]; /* buffer for indirect block at level i */ size_t f_blksize[UFS_NIADDR]; /* size of buffer */ ufs2_daddr_t f_blkno[UFS_NIADDR];/* disk address of block in buffer */ ufs2_daddr_t f_buf_blkno; /* block number of data block */ char *f_buf; /* buffer for data block */ size_t f_buf_size; /* size of data block */ int f_inumber; /* inumber */ }; #define DIP(fp, field) \ ((fp)->f_fs->fs_magic == FS_UFS1_MAGIC ? \ (fp)->f_di.di1.field : (fp)->f_di.di2.field) +typedef struct ufs_mnt { + char *um_dev; + int um_fd; + STAILQ_ENTRY(ufs_mnt) um_link; +} ufs_mnt_t; + +typedef STAILQ_HEAD(ufs_mnt_list, ufs_mnt) ufs_mnt_list_t; +static ufs_mnt_list_t mnt_list = STAILQ_HEAD_INITIALIZER(mnt_list); + static int read_inode(ino_t, struct open_file *); static int block_map(struct open_file *, ufs2_daddr_t, ufs2_daddr_t *); static int buf_read_file(struct open_file *, char **, size_t *); static int buf_write_file(struct open_file *, const char *, size_t *); static int search_directory(char *, struct open_file *, ino_t *); static int ufs_use_sa_read(void *, off_t, void **, int); /* from ffs_subr.c */ int ffs_sbget(void *, struct fs **, off_t, char *, int (*)(void *, off_t, void **, int)); /* * Request standard superblock location in ffs_sbget */ #define STDSB -1 /* Fail if check-hash is bad */ #define STDSB_NOHASHFAIL -2 /* Ignore check-hash failure */ /* * Read a new inode into a file structure. */ static int -read_inode(inumber, f) - ino_t inumber; - struct open_file *f; +read_inode(ino_t inumber, struct open_file *f) { struct file *fp = (struct file *)f->f_fsdata; struct fs *fs = fp->f_fs; char *buf; size_t rsize; int rc; if (fs == NULL) panic("fs == NULL"); /* * Read inode and save it. */ buf = malloc(fs->fs_bsize); twiddle(1); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fs, ino_to_fsba(fs, inumber)), fs->fs_bsize, buf, &rsize); if (rc) goto out; if (rsize != fs->fs_bsize) { rc = EIO; goto out; } if (fp->f_fs->fs_magic == FS_UFS1_MAGIC) fp->f_di.di1 = ((struct ufs1_dinode *)buf) [ino_to_fsbo(fs, inumber)]; else fp->f_di.di2 = ((struct ufs2_dinode *)buf) [ino_to_fsbo(fs, inumber)]; /* * Clear out the old buffers */ { int level; for (level = 0; level < UFS_NIADDR; level++) fp->f_blkno[level] = -1; fp->f_buf_blkno = -1; } fp->f_seekp = 0; fp->f_inumber = inumber; out: free(buf); return (rc); } /* * Given an offset in a file, find the disk block number that * contains that block. */ static int -block_map(f, file_block, disk_block_p) - struct open_file *f; - ufs2_daddr_t file_block; - ufs2_daddr_t *disk_block_p; /* out */ +block_map(struct open_file *f, ufs2_daddr_t file_block, + ufs2_daddr_t *disk_block_p) { struct file *fp = (struct file *)f->f_fsdata; struct fs *fs = fp->f_fs; int level; int idx; ufs2_daddr_t ind_block_num; int rc; /* * Index structure of an inode: * * di_db[0..UFS_NDADDR-1] hold block numbers for blocks * 0..UFS_NDADDR-1 * * di_ib[0] index block 0 is the single indirect block * holds block numbers for blocks * UFS_NDADDR .. UFS_NDADDR + NINDIR(fs)-1 * * di_ib[1] index block 1 is the double indirect block * holds block numbers for INDEX blocks for blocks * UFS_NDADDR + NINDIR(fs) .. * UFS_NDADDR + NINDIR(fs) + NINDIR(fs)**2 - 1 * * di_ib[2] index block 2 is the triple indirect block * holds block numbers for double-indirect * blocks for blocks * UFS_NDADDR + NINDIR(fs) + NINDIR(fs)**2 .. * UFS_NDADDR + NINDIR(fs) + NINDIR(fs)**2 * + NINDIR(fs)**3 - 1 */ if (file_block < UFS_NDADDR) { /* Direct block. */ *disk_block_p = DIP(fp, di_db[file_block]); return (0); } file_block -= UFS_NDADDR; /* * nindir[0] = NINDIR * nindir[1] = NINDIR**2 * nindir[2] = NINDIR**3 * etc */ for (level = 0; level < UFS_NIADDR; level++) { if (file_block < fp->f_nindir[level]) break; file_block -= fp->f_nindir[level]; } if (level == UFS_NIADDR) { /* Block number too high */ return (EFBIG); } ind_block_num = DIP(fp, di_ib[level]); for (; level >= 0; level--) { if (ind_block_num == 0) { *disk_block_p = 0; /* missing */ return (0); } if (fp->f_blkno[level] != ind_block_num) { if (fp->f_blk[level] == (char *)0) fp->f_blk[level] = malloc(fs->fs_bsize); twiddle(1); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fp->f_fs, ind_block_num), fs->fs_bsize, fp->f_blk[level], &fp->f_blksize[level]); if (rc) return (rc); if (fp->f_blksize[level] != fs->fs_bsize) return (EIO); fp->f_blkno[level] = ind_block_num; } if (level > 0) { idx = file_block / fp->f_nindir[level - 1]; file_block %= fp->f_nindir[level - 1]; } else idx = file_block; if (fp->f_fs->fs_magic == FS_UFS1_MAGIC) ind_block_num = ((ufs1_daddr_t *)fp->f_blk[level])[idx]; else ind_block_num = ((ufs2_daddr_t *)fp->f_blk[level])[idx]; } *disk_block_p = ind_block_num; return (0); } /* * Write a portion of a file from an internal buffer. */ static int -buf_write_file(f, buf_p, size_p) - struct open_file *f; - const char *buf_p; - size_t *size_p; /* out */ +buf_write_file(struct open_file *f, const char *buf_p, size_t *size_p) { struct file *fp = (struct file *)f->f_fsdata; struct fs *fs = fp->f_fs; long off; ufs_lbn_t file_block; ufs2_daddr_t disk_block; size_t block_size; int rc; /* * Calculate the starting block address and offset. */ off = blkoff(fs, fp->f_seekp); file_block = lblkno(fs, fp->f_seekp); block_size = sblksize(fs, DIP(fp, di_size), file_block); rc = block_map(f, file_block, &disk_block); if (rc) return (rc); if (disk_block == 0) /* Because we can't allocate space on the drive */ return (EFBIG); /* * Truncate buffer at end of file, and at the end of * this block. */ if (*size_p > DIP(fp, di_size) - fp->f_seekp) *size_p = DIP(fp, di_size) - fp->f_seekp; if (*size_p > block_size - off) *size_p = block_size - off; /* * If we don't entirely occlude the block and it's not * in memory already, read it in first. */ if (((off > 0) || (*size_p + off < block_size)) && (file_block != fp->f_buf_blkno)) { if (fp->f_buf == (char *)0) fp->f_buf = malloc(fs->fs_bsize); twiddle(4); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fs, disk_block), block_size, fp->f_buf, &fp->f_buf_size); if (rc) return (rc); fp->f_buf_blkno = file_block; } /* * Copy the user data into the cached block. */ bcopy(buf_p, fp->f_buf + off, *size_p); /* * Write the block out to storage. */ twiddle(4); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_WRITE, fsbtodb(fs, disk_block), block_size, fp->f_buf, &fp->f_buf_size); return (rc); } /* * Read a portion of a file into an internal buffer. Return * the location in the buffer and the amount in the buffer. */ static int -buf_read_file(f, buf_p, size_p) - struct open_file *f; - char **buf_p; /* out */ - size_t *size_p; /* out */ +buf_read_file(struct open_file *f, char **buf_p, size_t *size_p) { struct file *fp = (struct file *)f->f_fsdata; struct fs *fs = fp->f_fs; long off; ufs_lbn_t file_block; ufs2_daddr_t disk_block; size_t block_size; int rc; off = blkoff(fs, fp->f_seekp); file_block = lblkno(fs, fp->f_seekp); block_size = sblksize(fs, DIP(fp, di_size), file_block); if (file_block != fp->f_buf_blkno) { if (fp->f_buf == (char *)0) fp->f_buf = malloc(fs->fs_bsize); rc = block_map(f, file_block, &disk_block); if (rc) return (rc); if (disk_block == 0) { bzero(fp->f_buf, block_size); fp->f_buf_size = block_size; } else { twiddle(4); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fs, disk_block), block_size, fp->f_buf, &fp->f_buf_size); if (rc) return (rc); } fp->f_buf_blkno = file_block; } /* * Return address of byte in buffer corresponding to * offset, and size of remainder of buffer after that * byte. */ *buf_p = fp->f_buf + off; *size_p = block_size - off; /* * But truncate buffer at end of file. */ if (*size_p > DIP(fp, di_size) - fp->f_seekp) *size_p = DIP(fp, di_size) - fp->f_seekp; return (0); } /* * Search a directory for a name and return its * i_number. */ static int -search_directory(name, f, inumber_p) - char *name; - struct open_file *f; - ino_t *inumber_p; /* out */ +search_directory(char *name, struct open_file *f, ino_t *inumber_p) { struct file *fp = (struct file *)f->f_fsdata; struct direct *dp; struct direct *edp; char *buf; size_t buf_size; int namlen, length; int rc; length = strlen(name); fp->f_seekp = 0; while (fp->f_seekp < DIP(fp, di_size)) { rc = buf_read_file(f, &buf, &buf_size); if (rc) return (rc); dp = (struct direct *)buf; edp = (struct direct *)(buf + buf_size); while (dp < edp) { if (dp->d_ino == (ino_t)0) goto next; #if BYTE_ORDER == LITTLE_ENDIAN if (fp->f_fs->fs_maxsymlinklen <= 0) namlen = dp->d_type; else #endif namlen = dp->d_namlen; if (namlen == length && !strcmp(name, dp->d_name)) { /* found entry */ *inumber_p = dp->d_ino; return (0); } next: dp = (struct direct *)((char *)dp + dp->d_reclen); } fp->f_seekp += buf_size; } return (ENOENT); } /* * Open a file. */ static int -ufs_open(upath, f) - const char *upath; - struct open_file *f; +ufs_open(const char *upath, struct open_file *f) { char *cp, *ncp; int c; ino_t inumber, parent_inumber; struct file *fp; struct fs *fs; int rc; int nlinks = 0; char namebuf[MAXPATHLEN+1]; char *buf = NULL; char *path = NULL; + const char *dev; + ufs_mnt_t *mnt; /* allocate file system specific data structure */ - fp = malloc(sizeof(struct file)); - bzero(fp, sizeof(struct file)); + errno = 0; + fp = calloc(1, sizeof(struct file)); + if (fp == NULL) + return (errno); f->f_fsdata = (void *)fp; - /* read super block */ - twiddle(1); - if ((rc = ffs_sbget(f, &fs, STDSB_NOHASHFAIL, "stand", - ufs_use_sa_read)) != 0) - goto out; + dev = disk_fmtdev(f->f_devdata); + /* Is this device mounted? */ + STAILQ_FOREACH(mnt, &mnt_list, um_link) { + if (strcmp(dev, mnt->um_dev) == 0) + break; + } + + if (mnt == NULL) { + /* read super block */ + twiddle(1); + if ((rc = ffs_sbget(f, &fs, STDSB_NOHASHFAIL, "stand", + ufs_use_sa_read)) != 0) { + goto out; + } + } else { + struct open_file *sbf; + struct file *sfp; + + /* get superblock from mounted file system */ + sbf = fd2open_file(mnt->um_fd); + sfp = sbf->f_fsdata; + fs = sfp->f_fs; + } fp->f_fs = fs; + /* * Calculate indirect block levels. */ { ufs2_daddr_t mult; int level; mult = 1; for (level = 0; level < UFS_NIADDR; level++) { mult *= NINDIR(fs); fp->f_nindir[level] = mult; } } inumber = UFS_ROOTINO; if ((rc = read_inode(inumber, f)) != 0) goto out; cp = path = strdup(upath); if (path == NULL) { rc = ENOMEM; goto out; } while (*cp) { /* * Remove extra separators */ while (*cp == '/') cp++; if (*cp == '\0') break; /* * Check that current node is a directory. */ if ((DIP(fp, di_mode) & IFMT) != IFDIR) { rc = ENOTDIR; goto out; } /* * Get next component of path name. */ { int len = 0; ncp = cp; while ((c = *cp) != '\0' && c != '/') { if (++len > UFS_MAXNAMLEN) { rc = ENOENT; goto out; } cp++; } *cp = '\0'; } /* * Look up component in current directory. * Save directory inumber in case we find a * symbolic link. */ parent_inumber = inumber; rc = search_directory(ncp, f, &inumber); *cp = c; if (rc) goto out; /* * Open next component. */ if ((rc = read_inode(inumber, f)) != 0) goto out; /* * Check for symbolic link. */ if ((DIP(fp, di_mode) & IFMT) == IFLNK) { int link_len = DIP(fp, di_size); int len; len = strlen(cp); if (link_len + len > MAXPATHLEN || ++nlinks > MAXSYMLINKS) { rc = ENOENT; goto out; } bcopy(cp, &namebuf[link_len], len + 1); if (link_len < fs->fs_maxsymlinklen) { if (fp->f_fs->fs_magic == FS_UFS1_MAGIC) cp = (caddr_t)(fp->f_di.di1.di_db); else cp = (caddr_t)(fp->f_di.di2.di_db); bcopy(cp, namebuf, (unsigned) link_len); } else { /* * Read file for symbolic link */ size_t buf_size; ufs2_daddr_t disk_block; struct fs *fs = fp->f_fs; if (!buf) buf = malloc(fs->fs_bsize); rc = block_map(f, (ufs2_daddr_t)0, &disk_block); if (rc) goto out; twiddle(1); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fs, disk_block), fs->fs_bsize, buf, &buf_size); if (rc) goto out; bcopy((char *)buf, namebuf, (unsigned)link_len); } /* * If relative pathname, restart at parent directory. * If absolute pathname, restart at root. */ cp = namebuf; if (*cp != '/') inumber = parent_inumber; else inumber = (ino_t)UFS_ROOTINO; if ((rc = read_inode(inumber, f)) != 0) goto out; } } /* * Found terminal component. */ rc = 0; fp->f_seekp = 0; out: - if (buf) - free(buf); - if (path) - free(path); + free(buf); + free(path); if (rc) { - if (fp->f_buf) - free(fp->f_buf); - if (fp->f_fs != NULL) { + free(fp->f_buf); + + if (mnt == NULL && fp->f_fs != NULL) { free(fp->f_fs->fs_csp); free(fp->f_fs->fs_si); free(fp->f_fs); } free(fp); } return (rc); } /* * A read function for use by standalone-layer routines. */ static int ufs_use_sa_read(void *devfd, off_t loc, void **bufp, int size) { struct open_file *f; size_t buf_size; int error; f = (struct open_file *)devfd; if ((*bufp = malloc(size)) == NULL) return (ENOSPC); error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, loc / DEV_BSIZE, size, *bufp, &buf_size); if (error != 0) return (error); if (buf_size != size) return (EIO); return (0); } static int -ufs_close(f) - struct open_file *f; +ufs_close(struct open_file *f) { + ufs_mnt_t *mnt; struct file *fp = (struct file *)f->f_fsdata; int level; + char *dev; - f->f_fsdata = (void *)0; - if (fp == (struct file *)0) + f->f_fsdata = NULL; + if (fp == NULL) return (0); for (level = 0; level < UFS_NIADDR; level++) { - if (fp->f_blk[level]) - free(fp->f_blk[level]); + free(fp->f_blk[level]); } - if (fp->f_buf) - free(fp->f_buf); - if (fp->f_fs != NULL) { + free(fp->f_buf); + + dev = disk_fmtdev(f->f_devdata); + STAILQ_FOREACH(mnt, &mnt_list, um_link) { + if (strcmp(dev, mnt->um_dev) == 0) + break; + } + + if (mnt == NULL && fp->f_fs != NULL) { free(fp->f_fs->fs_csp); free(fp->f_fs->fs_si); free(fp->f_fs); } + free(fp); return (0); } /* * Copy a portion of a file into kernel memory. * Cross block boundaries when necessary. */ static int -ufs_read(f, start, size, resid) - struct open_file *f; - void *start; - size_t size; - size_t *resid; /* out */ +ufs_read(struct open_file *f, void *start, size_t size, size_t *resid) { struct file *fp = (struct file *)f->f_fsdata; size_t csize; char *buf; size_t buf_size; int rc = 0; char *addr = start; while (size != 0) { if (fp->f_seekp >= DIP(fp, di_size)) break; rc = buf_read_file(f, &buf, &buf_size); if (rc) break; csize = size; if (csize > buf_size) csize = buf_size; bcopy(buf, addr, csize); fp->f_seekp += csize; addr += csize; size -= csize; } if (resid) *resid = size; return (rc); } /* * Write to a portion of an already allocated file. * Cross block boundaries when necessary. Can not * extend the file. */ static int -ufs_write(f, start, size, resid) - struct open_file *f; - const void *start; - size_t size; - size_t *resid; /* out */ +ufs_write(struct open_file *f, const void *start, size_t size, size_t *resid) { struct file *fp = (struct file *)f->f_fsdata; size_t csize; int rc = 0; const char *addr = start; csize = size; while ((size != 0) && (csize != 0)) { if (fp->f_seekp >= DIP(fp, di_size)) break; if (csize >= 512) csize = 512; /* XXX */ rc = buf_write_file(f, addr, &csize); if (rc) break; fp->f_seekp += csize; addr += csize; size -= csize; } if (resid) *resid = size; return (rc); } static off_t -ufs_seek(f, offset, where) - struct open_file *f; - off_t offset; - int where; +ufs_seek(struct open_file *f, off_t offset, int where) { struct file *fp = (struct file *)f->f_fsdata; switch (where) { case SEEK_SET: fp->f_seekp = offset; break; case SEEK_CUR: fp->f_seekp += offset; break; case SEEK_END: fp->f_seekp = DIP(fp, di_size) - offset; break; default: errno = EINVAL; return (-1); } return (fp->f_seekp); } static int -ufs_stat(f, sb) - struct open_file *f; - struct stat *sb; +ufs_stat(struct open_file *f, struct stat *sb) { struct file *fp = (struct file *)f->f_fsdata; /* only important stuff */ sb->st_mode = DIP(fp, di_mode); sb->st_uid = DIP(fp, di_uid); sb->st_gid = DIP(fp, di_gid); sb->st_size = DIP(fp, di_size); sb->st_mtime = DIP(fp, di_mtime); /* * The items below are ufs specific! * Other fs types will need their own solution * if these fields are needed. */ sb->st_ino = fp->f_inumber; /* * We need something to differentiate devs. * fs_id is unique but 64bit, we xor the two * halves to squeeze it into 32bits. */ sb->st_dev = (dev_t)(fp->f_fs->fs_id[0] ^ fp->f_fs->fs_id[1]); return (0); } static int ufs_readdir(struct open_file *f, struct dirent *d) { struct file *fp = (struct file *)f->f_fsdata; struct direct *dp; char *buf; size_t buf_size; int error; /* * assume that a directory entry will not be split across blocks */ again: if (fp->f_seekp >= DIP(fp, di_size)) return (ENOENT); error = buf_read_file(f, &buf, &buf_size); if (error) return (error); dp = (struct direct *)buf; fp->f_seekp += dp->d_reclen; if (dp->d_ino == (ino_t)0) goto again; d->d_type = dp->d_type; strcpy(d->d_name, dp->d_name); return (0); } + +static int +ufs_mount(const char *dev, const char *path, void **data) +{ + char *fs; + ufs_mnt_t *mnt; + struct open_file *f; + + errno = 0; + mnt = calloc(1, sizeof(*mnt)); + if (mnt == NULL) + return (errno); + mnt->um_fd = -1; + mnt->um_dev = strdup(dev); + if (mnt->um_dev == NULL) + goto done; + + if (asprintf(&fs, "%s%s", dev, path) < 0) + goto done; + + mnt->um_fd = open(fs, O_RDONLY); + free(fs); + if (mnt->um_fd == -1) + goto done; + + /* Is it ufs file system? */ + f = fd2open_file(mnt->um_fd); + if (strcmp(f->f_ops->fs_name, "ufs") == 0) + STAILQ_INSERT_TAIL(&mnt_list, mnt, um_link); + else + errno = ENXIO; + +done: + if (errno != 0) { + free(mnt->um_dev); + if (mnt->um_fd >= 0) + close(mnt->um_fd); + free(mnt); + } else { + *data = mnt; + } + + return (errno); +} + +static int +ufs_unmount(const char *dev __unused, void *data) +{ + ufs_mnt_t *mnt = data; + + STAILQ_REMOVE(&mnt_list, mnt, ufs_mnt, um_link); + free(mnt->um_dev); + close(mnt->um_fd); + free(mnt); + return (0); +} diff --git a/stand/libsa/zfs/zfs.c b/stand/libsa/zfs/zfs.c index da7c63e099d1..633ef3b18784 100644 --- a/stand/libsa/zfs/zfs.c +++ b/stand/libsa/zfs/zfs.c @@ -1,1957 +1,2042 @@ /*- * Copyright (c) 2007 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); /* * Stand-alone file reading package. */ #include #include #include #include #include #include #include #include #include #include #include "libzfs.h" #include "zfsimpl.c" /* Define the range of indexes to be populated with ZFS Boot Environments */ #define ZFS_BE_FIRST 4 #define ZFS_BE_LAST 8 static int zfs_open(const char *path, struct open_file *f); static int zfs_close(struct open_file *f); static int zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid); static off_t zfs_seek(struct open_file *f, off_t offset, int where); static int zfs_stat(struct open_file *f, struct stat *sb); static int zfs_readdir(struct open_file *f, struct dirent *d); +static int zfs_mount(const char *dev, const char *path, void **data); +static int zfs_unmount(const char *dev, void *data); static void zfs_bootenv_initial(const char *envname, spa_t *spa, const char *name, const char *dsname, int checkpoint); static void zfs_checkpoints_initial(spa_t *spa, const char *name, const char *dsname); struct devsw zfs_dev; struct fs_ops zfs_fsops = { - "zfs", - zfs_open, - zfs_close, - zfs_read, - null_write, - zfs_seek, - zfs_stat, - zfs_readdir + .fs_name = "zfs", + .fo_open = zfs_open, + .fo_close = zfs_close, + .fo_read = zfs_read, + .fo_write = null_write, + .fo_seek = zfs_seek, + .fo_stat = zfs_stat, + .fo_readdir = zfs_readdir, + .fo_mount = zfs_mount, + .fo_unmount = zfs_unmount }; /* * In-core open file. */ struct file { off_t f_seekp; /* seek pointer */ dnode_phys_t f_dnode; uint64_t f_zap_type; /* zap type for readdir */ uint64_t f_num_leafs; /* number of fzap leaf blocks */ zap_leaf_phys_t *f_zap_leaf; /* zap leaf buffer */ }; static int zfs_env_index; static int zfs_env_count; SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head); struct zfs_be_list *zfs_be_headp; struct zfs_be_entry { char *name; SLIST_ENTRY(zfs_be_entry) entries; } *zfs_be, *zfs_be_tmp; /* * Open a file. */ static int zfs_open(const char *upath, struct open_file *f) { struct zfsmount *mount = (struct zfsmount *)f->f_devdata; struct file *fp; int rc; if (f->f_dev != &zfs_dev) return (EINVAL); /* allocate file system specific data structure */ fp = calloc(1, sizeof(struct file)); if (fp == NULL) return (ENOMEM); f->f_fsdata = fp; rc = zfs_lookup(mount, upath, &fp->f_dnode); fp->f_seekp = 0; if (rc) { f->f_fsdata = NULL; free(fp); } return (rc); } static int zfs_close(struct open_file *f) { struct file *fp = (struct file *)f->f_fsdata; dnode_cache_obj = NULL; f->f_fsdata = NULL; free(fp); return (0); } /* * Copy a portion of a file into kernel memory. * Cross block boundaries when necessary. */ static int zfs_read(struct open_file *f, void *start, size_t size, size_t *resid /* out */) { const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; struct file *fp = (struct file *)f->f_fsdata; struct stat sb; size_t n; int rc; rc = zfs_stat(f, &sb); if (rc) return (rc); n = size; if (fp->f_seekp + n > sb.st_size) n = sb.st_size - fp->f_seekp; rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n); if (rc) return (rc); if (0) { int i; for (i = 0; i < n; i++) putchar(((char*) start)[i]); } fp->f_seekp += n; if (resid) *resid = size - n; return (0); } static off_t zfs_seek(struct open_file *f, off_t offset, int where) { struct file *fp = (struct file *)f->f_fsdata; switch (where) { case SEEK_SET: fp->f_seekp = offset; break; case SEEK_CUR: fp->f_seekp += offset; break; case SEEK_END: { struct stat sb; int error; error = zfs_stat(f, &sb); if (error != 0) { errno = error; return (-1); } fp->f_seekp = sb.st_size - offset; break; } default: errno = EINVAL; return (-1); } return (fp->f_seekp); } static int zfs_stat(struct open_file *f, struct stat *sb) { const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; struct file *fp = (struct file *)f->f_fsdata; return (zfs_dnode_stat(spa, &fp->f_dnode, sb)); } static int zfs_readdir(struct open_file *f, struct dirent *d) { const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; struct file *fp = (struct file *)f->f_fsdata; mzap_ent_phys_t mze; struct stat sb; size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT; int rc; rc = zfs_stat(f, &sb); if (rc) return (rc); if (!S_ISDIR(sb.st_mode)) return (ENOTDIR); /* * If this is the first read, get the zap type. */ if (fp->f_seekp == 0) { rc = dnode_read(spa, &fp->f_dnode, 0, &fp->f_zap_type, sizeof(fp->f_zap_type)); if (rc) return (rc); if (fp->f_zap_type == ZBT_MICRO) { fp->f_seekp = offsetof(mzap_phys_t, mz_chunk); } else { rc = dnode_read(spa, &fp->f_dnode, offsetof(zap_phys_t, zap_num_leafs), &fp->f_num_leafs, sizeof(fp->f_num_leafs)); if (rc) return (rc); fp->f_seekp = bsize; fp->f_zap_leaf = malloc(bsize); if (fp->f_zap_leaf == NULL) return (ENOMEM); rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, fp->f_zap_leaf, bsize); if (rc) return (rc); } } if (fp->f_zap_type == ZBT_MICRO) { mzap_next: if (fp->f_seekp >= bsize) return (ENOENT); rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, &mze, sizeof(mze)); if (rc) return (rc); fp->f_seekp += sizeof(mze); if (!mze.mze_name[0]) goto mzap_next; d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value); d->d_type = ZFS_DIRENT_TYPE(mze.mze_value); strcpy(d->d_name, mze.mze_name); d->d_namlen = strlen(d->d_name); return (0); } else { zap_leaf_t zl; zap_leaf_chunk_t *zc, *nc; int chunk; size_t namelen; char *p; uint64_t value; /* * Initialise this so we can use the ZAP size * calculating macros. */ zl.l_bs = ilog2(bsize); zl.l_phys = fp->f_zap_leaf; /* * Figure out which chunk we are currently looking at * and consider seeking to the next leaf. We use the * low bits of f_seekp as a simple chunk index. */ fzap_next: chunk = fp->f_seekp & (bsize - 1); if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) { fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize; chunk = 0; /* * Check for EOF and read the new leaf. */ if (fp->f_seekp >= bsize * fp->f_num_leafs) return (ENOENT); rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, fp->f_zap_leaf, bsize); if (rc) return (rc); } zc = &ZAP_LEAF_CHUNK(&zl, chunk); fp->f_seekp++; if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY) goto fzap_next; namelen = zc->l_entry.le_name_numints; if (namelen > sizeof(d->d_name)) namelen = sizeof(d->d_name); /* * Paste the name back together. */ nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk); p = d->d_name; while (namelen > 0) { int len; len = namelen; if (len > ZAP_LEAF_ARRAY_BYTES) len = ZAP_LEAF_ARRAY_BYTES; memcpy(p, nc->l_array.la_array, len); p += len; namelen -= len; nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next); } d->d_name[sizeof(d->d_name) - 1] = 0; /* * Assume the first eight bytes of the value are * a uint64_t. */ value = fzap_leaf_value(&zl, zc); d->d_fileno = ZFS_DIRENT_OBJ(value); d->d_type = ZFS_DIRENT_TYPE(value); d->d_namlen = strlen(d->d_name); return (0); } } +/* + * if path is NULL, create mount structure, but do not add it to list. + */ +static int +zfs_mount(const char *dev, const char *path, void **data) +{ + struct zfs_devdesc *zfsdev; + spa_t *spa; + struct zfsmount *mnt; + int rv; + + errno = 0; + zfsdev = malloc(sizeof(*zfsdev)); + if (zfsdev == NULL) + return (errno); + + rv = zfs_parsedev(zfsdev, dev + 3, NULL); + if (rv != 0) { + free(zfsdev); + return (rv); + } + + spa = spa_find_by_dev(zfsdev); + if (spa == NULL) + return (ENXIO); + + mnt = calloc(1, sizeof(*mnt)); + if (mnt != NULL && path != NULL) + mnt->path = strdup(path); + rv = errno; + + if (mnt != NULL) + rv = zfs_mount_impl(spa, zfsdev->root_guid, mnt); + free(zfsdev); + + if (rv == 0 && mnt != NULL && mnt->objset.os_type != DMU_OST_ZFS) { + printf("Unexpected object set type %ju\n", + (uintmax_t)mnt->objset.os_type); + rv = EIO; + } + + if (rv != 0) { + if (mnt != NULL) + free(mnt->path); + free(mnt); + return (rv); + } + + if (mnt != NULL) { + *data = mnt; + if (path != NULL) + STAILQ_INSERT_TAIL(&zfsmount, mnt, next); + } + + return (rv); +} + +static int +zfs_unmount(const char *dev, void *data) +{ + struct zfsmount *mnt = data; + + STAILQ_REMOVE(&zfsmount, mnt, zfsmount, next); + free(mnt->path); + free(mnt); + return (0); +} + static int vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes) { int fd, ret; size_t res, head, tail, total_size, full_sec_size; unsigned secsz, do_tail_read; off_t start_sec; char *outbuf, *bouncebuf; fd = (uintptr_t) priv; outbuf = (char *) buf; bouncebuf = NULL; ret = ioctl(fd, DIOCGSECTORSIZE, &secsz); if (ret != 0) return (ret); /* * Handling reads of arbitrary offset and size - multi-sector case * and single-sector case. * * Multi-sector Case * (do_tail_read = true if tail > 0) * * |<----------------------total_size--------------------->| * | | * |<--head-->|<--------------bytes------------>|<--tail-->| * | | | | * | | |<~full_sec_size~>| | | * +------------------+ +------------------+ * | |0101010| . . . |0101011| | * +------------------+ +------------------+ * start_sec start_sec + n * * * Single-sector Case * (do_tail_read = false) * * |<------total_size = secsz----->| * | | * |<-head->|<---bytes--->|<-tail->| * +-------------------------------+ * | |0101010101010| | * +-------------------------------+ * start_sec */ start_sec = offset / secsz; head = offset % secsz; total_size = roundup2(head + bytes, secsz); tail = total_size - (head + bytes); do_tail_read = ((tail > 0) && (head + bytes > secsz)); full_sec_size = total_size; if (head > 0) full_sec_size -= secsz; if (do_tail_read) full_sec_size -= secsz; /* Return of partial sector data requires a bounce buffer. */ if ((head > 0) || do_tail_read || bytes < secsz) { bouncebuf = malloc(secsz); if (bouncebuf == NULL) { printf("vdev_read: out of memory\n"); return (ENOMEM); } } if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) { ret = errno; goto error; } /* Partial data return from first sector */ if (head > 0) { res = read(fd, bouncebuf, secsz); if (res != secsz) { ret = EIO; goto error; } memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes)); outbuf += min(secsz - head, bytes); } /* * Full data return from read sectors. * Note, there is still corner case where we read * from sector boundary, but less than sector size, e.g. reading 512B * from 4k sector. */ if (full_sec_size > 0) { if (bytes < full_sec_size) { res = read(fd, bouncebuf, secsz); if (res != secsz) { ret = EIO; goto error; } memcpy(outbuf, bouncebuf, bytes); } else { res = read(fd, outbuf, full_sec_size); if (res != full_sec_size) { ret = EIO; goto error; } outbuf += full_sec_size; } } /* Partial data return from last sector */ if (do_tail_read) { res = read(fd, bouncebuf, secsz); if (res != secsz) { ret = EIO; goto error; } memcpy(outbuf, bouncebuf, secsz - tail); } ret = 0; error: free(bouncebuf); return (ret); } static int vdev_write(vdev_t *vdev, off_t offset, void *buf, size_t bytes) { int fd, ret; size_t head, tail, total_size, full_sec_size; unsigned secsz, do_tail_write; off_t start_sec; ssize_t res; char *outbuf, *bouncebuf; fd = (uintptr_t)vdev->v_priv; outbuf = (char *)buf; bouncebuf = NULL; ret = ioctl(fd, DIOCGSECTORSIZE, &secsz); if (ret != 0) return (ret); start_sec = offset / secsz; head = offset % secsz; total_size = roundup2(head + bytes, secsz); tail = total_size - (head + bytes); do_tail_write = ((tail > 0) && (head + bytes > secsz)); full_sec_size = total_size; if (head > 0) full_sec_size -= secsz; if (do_tail_write) full_sec_size -= secsz; /* Partial sector write requires a bounce buffer. */ if ((head > 0) || do_tail_write || bytes < secsz) { bouncebuf = malloc(secsz); if (bouncebuf == NULL) { printf("vdev_write: out of memory\n"); return (ENOMEM); } } if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) { ret = errno; goto error; } /* Partial data for first sector */ if (head > 0) { res = read(fd, bouncebuf, secsz); if ((unsigned)res != secsz) { ret = EIO; goto error; } memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes)); (void) lseek(fd, -secsz, SEEK_CUR); res = write(fd, bouncebuf, secsz); if ((unsigned)res != secsz) { ret = EIO; goto error; } outbuf += min(secsz - head, bytes); } /* * Full data write to sectors. * Note, there is still corner case where we write * to sector boundary, but less than sector size, e.g. write 512B * to 4k sector. */ if (full_sec_size > 0) { if (bytes < full_sec_size) { res = read(fd, bouncebuf, secsz); if ((unsigned)res != secsz) { ret = EIO; goto error; } memcpy(bouncebuf, outbuf, bytes); (void) lseek(fd, -secsz, SEEK_CUR); res = write(fd, bouncebuf, secsz); if ((unsigned)res != secsz) { ret = EIO; goto error; } } else { res = write(fd, outbuf, full_sec_size); if ((unsigned)res != full_sec_size) { ret = EIO; goto error; } outbuf += full_sec_size; } } /* Partial data write to last sector */ if (do_tail_write) { res = read(fd, bouncebuf, secsz); if ((unsigned)res != secsz) { ret = EIO; goto error; } memcpy(bouncebuf, outbuf, secsz - tail); (void) lseek(fd, -secsz, SEEK_CUR); res = write(fd, bouncebuf, secsz); if ((unsigned)res != secsz) { ret = EIO; goto error; } } ret = 0; error: free(bouncebuf); return (ret); } static int zfs_dev_init(void) { spa_t *spa; spa_t *next; spa_t *prev; zfs_init(); if (archsw.arch_zfs_probe == NULL) return (ENXIO); archsw.arch_zfs_probe(); prev = NULL; spa = STAILQ_FIRST(&zfs_pools); while (spa != NULL) { next = STAILQ_NEXT(spa, spa_link); if (zfs_spa_init(spa)) { if (prev == NULL) STAILQ_REMOVE_HEAD(&zfs_pools, spa_link); else STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link); } else prev = spa; spa = next; } return (0); } struct zfs_probe_args { int fd; const char *devname; uint64_t *pool_guid; u_int secsz; }; static int zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset) { struct zfs_probe_args *ppa; ppa = (struct zfs_probe_args *)arg; return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd, offset * ppa->secsz, buf, blocks * ppa->secsz)); } static int zfs_probe(int fd, uint64_t *pool_guid) { spa_t *spa; int ret; spa = NULL; ret = vdev_probe(vdev_read, vdev_write, (void *)(uintptr_t)fd, &spa); if (ret == 0 && pool_guid != NULL) if (*pool_guid == 0) *pool_guid = spa->spa_guid; return (ret); } static int zfs_probe_partition(void *arg, const char *partname, const struct ptable_entry *part) { struct zfs_probe_args *ppa, pa; struct ptable *table; char devname[32]; int ret; /* Probe only freebsd-zfs and freebsd partitions */ if (part->type != PART_FREEBSD && part->type != PART_FREEBSD_ZFS) return (0); ppa = (struct zfs_probe_args *)arg; strncpy(devname, ppa->devname, strlen(ppa->devname) - 1); devname[strlen(ppa->devname) - 1] = '\0'; snprintf(devname, sizeof(devname), "%s%s:", devname, partname); pa.fd = open(devname, O_RDWR); if (pa.fd == -1) return (0); ret = zfs_probe(pa.fd, ppa->pool_guid); if (ret == 0) return (0); /* Do we have BSD label here? */ if (part->type == PART_FREEBSD) { pa.devname = devname; pa.pool_guid = ppa->pool_guid; pa.secsz = ppa->secsz; table = ptable_open(&pa, part->end - part->start + 1, ppa->secsz, zfs_diskread); if (table != NULL) { ptable_iterate(table, &pa, zfs_probe_partition); ptable_close(table); } } close(pa.fd); return (0); } /* * Return bootenv nvlist from pool label. */ int zfs_get_bootenv(void *vdev, nvlist_t **benvp) { struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; nvlist_t *benv = NULL; vdev_t *vd; spa_t *spa; if (dev->dd.d_dev->dv_type != DEVT_ZFS) return (ENOTSUP); if ((spa = spa_find_by_dev(dev)) == NULL) return (ENXIO); if (spa->spa_bootenv == NULL) { STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { benv = vdev_read_bootenv(vd); if (benv != NULL) break; } spa->spa_bootenv = benv; } else { benv = spa->spa_bootenv; } if (benv == NULL) return (ENOENT); *benvp = benv; return (0); } /* * Store nvlist to pool label bootenv area. Also updates cached pointer in spa. */ int zfs_set_bootenv(void *vdev, nvlist_t *benv) { struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; spa_t *spa; vdev_t *vd; if (dev->dd.d_dev->dv_type != DEVT_ZFS) return (ENOTSUP); if ((spa = spa_find_by_dev(dev)) == NULL) return (ENXIO); STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { vdev_write_bootenv(vd, benv); } spa->spa_bootenv = benv; return (0); } /* * Get bootonce value by key. The bootonce pair is removed * from the bootenv nvlist and the remaining nvlist is committed back to disk. */ int zfs_get_bootonce(void *vdev, const char *key, char *buf, size_t size) { nvlist_t *benv; char *result = NULL; int result_size, rv; if ((rv = zfs_get_bootenv(vdev, &benv)) != 0) return (rv); if ((rv = nvlist_find(benv, key, DATA_TYPE_STRING, NULL, &result, &result_size)) == 0) { if (result_size == 0) { /* ignore empty string */ rv = ENOENT; } else { size = MIN((size_t)result_size + 1, size); strlcpy(buf, result, size); } (void) nvlist_remove(benv, key, DATA_TYPE_STRING); (void) zfs_set_bootenv(vdev, benv); } return (rv); } /* * nvstore backend. */ static int zfs_nvstore_setter(void *, int, const char *, const void *, size_t); static int zfs_nvstore_setter_str(void *, const char *, const char *, const char *); static int zfs_nvstore_unset_impl(void *, const char *, bool); static int zfs_nvstore_setenv(void *, void *); /* * nvstore is only present for current rootfs pool. */ static int zfs_nvstore_sethook(struct env_var *ev, int flags __unused, const void *value) { struct zfs_devdesc *dev; int rv; archsw.arch_getdev((void **)&dev, NULL, NULL); if (dev == NULL) return (ENXIO); rv = zfs_nvstore_setter_str(dev, NULL, ev->ev_name, value); free(dev); return (rv); } /* * nvstore is only present for current rootfs pool. */ static int zfs_nvstore_unsethook(struct env_var *ev) { struct zfs_devdesc *dev; int rv; archsw.arch_getdev((void **)&dev, NULL, NULL); if (dev == NULL) return (ENXIO); rv = zfs_nvstore_unset_impl(dev, ev->ev_name, false); free(dev); return (rv); } static int zfs_nvstore_getter(void *vdev, const char *name, void **data) { struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; spa_t *spa; nvlist_t *nv; char *str, **ptr; int size; int rv; if (dev->dd.d_dev->dv_type != DEVT_ZFS) return (ENOTSUP); if ((spa = spa_find_by_dev(dev)) == NULL) return (ENXIO); if (spa->spa_bootenv == NULL) return (ENXIO); if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST, NULL, &nv, NULL) != 0) return (ENOENT); rv = nvlist_find(nv, name, DATA_TYPE_STRING, NULL, &str, &size); if (rv == 0) { ptr = (char **)data; asprintf(ptr, "%.*s", size, str); if (*data == NULL) rv = ENOMEM; } nvlist_destroy(nv); return (rv); } static int zfs_nvstore_setter(void *vdev, int type, const char *name, const void *data, size_t size) { struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; spa_t *spa; nvlist_t *nv; int rv; bool env_set = true; if (dev->dd.d_dev->dv_type != DEVT_ZFS) return (ENOTSUP); if ((spa = spa_find_by_dev(dev)) == NULL) return (ENXIO); if (spa->spa_bootenv == NULL) return (ENXIO); if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST, NULL, &nv, NULL) != 0) { nv = nvlist_create(NV_UNIQUE_NAME); if (nv == NULL) return (ENOMEM); } rv = 0; switch (type) { case DATA_TYPE_INT8: if (size != sizeof (int8_t)) { rv = EINVAL; break; } rv = nvlist_add_int8(nv, name, *(int8_t *)data); break; case DATA_TYPE_INT16: if (size != sizeof (int16_t)) { rv = EINVAL; break; } rv = nvlist_add_int16(nv, name, *(int16_t *)data); break; case DATA_TYPE_INT32: if (size != sizeof (int32_t)) { rv = EINVAL; break; } rv = nvlist_add_int32(nv, name, *(int32_t *)data); break; case DATA_TYPE_INT64: if (size != sizeof (int64_t)) { rv = EINVAL; break; } rv = nvlist_add_int64(nv, name, *(int64_t *)data); break; case DATA_TYPE_BYTE: if (size != sizeof (uint8_t)) { rv = EINVAL; break; } rv = nvlist_add_byte(nv, name, *(int8_t *)data); break; case DATA_TYPE_UINT8: if (size != sizeof (uint8_t)) { rv = EINVAL; break; } rv = nvlist_add_uint8(nv, name, *(int8_t *)data); break; case DATA_TYPE_UINT16: if (size != sizeof (uint16_t)) { rv = EINVAL; break; } rv = nvlist_add_uint16(nv, name, *(uint16_t *)data); break; case DATA_TYPE_UINT32: if (size != sizeof (uint32_t)) { rv = EINVAL; break; } rv = nvlist_add_uint32(nv, name, *(uint32_t *)data); break; case DATA_TYPE_UINT64: if (size != sizeof (uint64_t)) { rv = EINVAL; break; } rv = nvlist_add_uint64(nv, name, *(uint64_t *)data); break; case DATA_TYPE_STRING: rv = nvlist_add_string(nv, name, data); break; case DATA_TYPE_BOOLEAN_VALUE: if (size != sizeof (boolean_t)) { rv = EINVAL; break; } rv = nvlist_add_boolean_value(nv, name, *(boolean_t *)data); break; default: rv = EINVAL; break; } if (rv == 0) { rv = nvlist_add_nvlist(spa->spa_bootenv, OS_NVSTORE, nv); if (rv == 0) { rv = zfs_set_bootenv(vdev, spa->spa_bootenv); } if (rv == 0) { if (env_set) { rv = zfs_nvstore_setenv(vdev, nvpair_find(nv, name)); } else { env_discard(env_getenv(name)); rv = 0; } } } nvlist_destroy(nv); return (rv); } static int get_int64(const char *data, int64_t *ip) { char *end; int64_t val; errno = 0; val = strtoll(data, &end, 0); if (errno != 0 || *data == '\0' || *end != '\0') return (EINVAL); *ip = val; return (0); } static int get_uint64(const char *data, uint64_t *ip) { char *end; uint64_t val; errno = 0; val = strtoull(data, &end, 0); if (errno != 0 || *data == '\0' || *end != '\0') return (EINVAL); *ip = val; return (0); } /* * Translate textual data to data type. If type is not set, and we are * creating new pair, use DATA_TYPE_STRING. */ static int zfs_nvstore_setter_str(void *vdev, const char *type, const char *name, const char *data) { struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; spa_t *spa; nvlist_t *nv; int rv; data_type_t dt; int64_t val; uint64_t uval; if (dev->dd.d_dev->dv_type != DEVT_ZFS) return (ENOTSUP); if ((spa = spa_find_by_dev(dev)) == NULL) return (ENXIO); if (spa->spa_bootenv == NULL) return (ENXIO); if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST, NULL, &nv, NULL) != 0) { nv = NULL; } if (type == NULL) { nvp_header_t *nvh; /* * if there is no existing pair, default to string. * Otherwise, use type from existing pair. */ nvh = nvpair_find(nv, name); if (nvh == NULL) { dt = DATA_TYPE_STRING; } else { nv_string_t *nvp_name; nv_pair_data_t *nvp_data; nvp_name = (nv_string_t *)(nvh + 1); nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] + NV_ALIGN4(nvp_name->nv_size)); dt = nvp_data->nv_type; } } else { dt = nvpair_type_from_name(type); } nvlist_destroy(nv); rv = 0; switch (dt) { case DATA_TYPE_INT8: rv = get_int64(data, &val); if (rv == 0) { int8_t v = val; rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v)); } break; case DATA_TYPE_INT16: rv = get_int64(data, &val); if (rv == 0) { int16_t v = val; rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v)); } break; case DATA_TYPE_INT32: rv = get_int64(data, &val); if (rv == 0) { int32_t v = val; rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v)); } break; case DATA_TYPE_INT64: rv = get_int64(data, &val); if (rv == 0) { rv = zfs_nvstore_setter(vdev, dt, name, &val, sizeof (val)); } break; case DATA_TYPE_BYTE: rv = get_uint64(data, &uval); if (rv == 0) { uint8_t v = uval; rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v)); } break; case DATA_TYPE_UINT8: rv = get_uint64(data, &uval); if (rv == 0) { uint8_t v = uval; rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v)); } break; case DATA_TYPE_UINT16: rv = get_uint64(data, &uval); if (rv == 0) { uint16_t v = uval; rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v)); } break; case DATA_TYPE_UINT32: rv = get_uint64(data, &uval); if (rv == 0) { uint32_t v = uval; rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v)); } break; case DATA_TYPE_UINT64: rv = get_uint64(data, &uval); if (rv == 0) { rv = zfs_nvstore_setter(vdev, dt, name, &uval, sizeof (uval)); } break; case DATA_TYPE_STRING: rv = zfs_nvstore_setter(vdev, dt, name, data, strlen(data) + 1); break; case DATA_TYPE_BOOLEAN_VALUE: rv = get_int64(data, &val); if (rv == 0) { boolean_t v = val; rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v)); } default: rv = EINVAL; } return (rv); } static int zfs_nvstore_unset_impl(void *vdev, const char *name, bool unset_env) { struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; spa_t *spa; nvlist_t *nv; int rv; if (dev->dd.d_dev->dv_type != DEVT_ZFS) return (ENOTSUP); if ((spa = spa_find_by_dev(dev)) == NULL) return (ENXIO); if (spa->spa_bootenv == NULL) return (ENXIO); if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST, NULL, &nv, NULL) != 0) return (ENOENT); rv = nvlist_remove(nv, name, DATA_TYPE_UNKNOWN); if (rv == 0) { if (nvlist_next_nvpair(nv, NULL) == NULL) { rv = nvlist_remove(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST); } else { rv = nvlist_add_nvlist(spa->spa_bootenv, OS_NVSTORE, nv); } if (rv == 0) rv = zfs_set_bootenv(vdev, spa->spa_bootenv); } if (unset_env) env_discard(env_getenv(name)); return (rv); } static int zfs_nvstore_unset(void *vdev, const char *name) { return (zfs_nvstore_unset_impl(vdev, name, true)); } static int zfs_nvstore_print(void *vdev __unused, void *ptr) { nvpair_print(ptr, 0); return (0); } /* * Create environment variable from nvpair. * set hook will update nvstore with new value, unset hook will remove * variable from nvstore. */ static int zfs_nvstore_setenv(void *vdev __unused, void *ptr) { nvp_header_t *nvh = ptr; nv_string_t *nvp_name, *nvp_value; nv_pair_data_t *nvp_data; char *name, *value; int rv = 0; if (nvh == NULL) return (ENOENT); nvp_name = (nv_string_t *)(nvh + 1); nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] + NV_ALIGN4(nvp_name->nv_size)); if ((name = nvstring_get(nvp_name)) == NULL) return (ENOMEM); value = NULL; switch (nvp_data->nv_type) { case DATA_TYPE_BYTE: case DATA_TYPE_UINT8: (void) asprintf(&value, "%uc", *(unsigned *)&nvp_data->nv_data[0]); if (value == NULL) rv = ENOMEM; break; case DATA_TYPE_INT8: (void) asprintf(&value, "%c", *(int *)&nvp_data->nv_data[0]); if (value == NULL) rv = ENOMEM; break; case DATA_TYPE_INT16: (void) asprintf(&value, "%hd", *(short *)&nvp_data->nv_data[0]); if (value == NULL) rv = ENOMEM; break; case DATA_TYPE_UINT16: (void) asprintf(&value, "%hu", *(unsigned short *)&nvp_data->nv_data[0]); if (value == NULL) rv = ENOMEM; break; case DATA_TYPE_BOOLEAN_VALUE: case DATA_TYPE_INT32: (void) asprintf(&value, "%d", *(int *)&nvp_data->nv_data[0]); if (value == NULL) rv = ENOMEM; break; case DATA_TYPE_UINT32: (void) asprintf(&value, "%u", *(unsigned *)&nvp_data->nv_data[0]); if (value == NULL) rv = ENOMEM; break; case DATA_TYPE_INT64: (void) asprintf(&value, "%jd", (intmax_t)*(int64_t *)&nvp_data->nv_data[0]); if (value == NULL) rv = ENOMEM; break; case DATA_TYPE_UINT64: (void) asprintf(&value, "%ju", (uintmax_t)*(uint64_t *)&nvp_data->nv_data[0]); if (value == NULL) rv = ENOMEM; break; case DATA_TYPE_STRING: nvp_value = (nv_string_t *)&nvp_data->nv_data[0]; if ((value = nvstring_get(nvp_value)) == NULL) { rv = ENOMEM; break; } break; default: rv = EINVAL; break; } if (value != NULL) { rv = env_setenv(name, EV_VOLATILE | EV_NOHOOK, value, zfs_nvstore_sethook, zfs_nvstore_unsethook); free(value); } free(name); return (rv); } static int zfs_nvstore_iterate(void *vdev, int (*cb)(void *, void *)) { struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; spa_t *spa; nvlist_t *nv; nvp_header_t *nvh; int rv; if (dev->dd.d_dev->dv_type != DEVT_ZFS) return (ENOTSUP); if ((spa = spa_find_by_dev(dev)) == NULL) return (ENXIO); if (spa->spa_bootenv == NULL) return (ENXIO); if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST, NULL, &nv, NULL) != 0) return (ENOENT); rv = 0; nvh = NULL; while ((nvh = nvlist_next_nvpair(nv, nvh)) != NULL) { rv = cb(vdev, nvh); if (rv != 0) break; } return (rv); } nvs_callbacks_t nvstore_zfs_cb = { .nvs_getter = zfs_nvstore_getter, .nvs_setter = zfs_nvstore_setter, .nvs_setter_str = zfs_nvstore_setter_str, .nvs_unset = zfs_nvstore_unset, .nvs_print = zfs_nvstore_print, .nvs_iterate = zfs_nvstore_iterate }; int zfs_attach_nvstore(void *vdev) { struct zfs_devdesc *dev = vdev; spa_t *spa; uint64_t version; int rv; if (dev->dd.d_dev->dv_type != DEVT_ZFS) return (ENOTSUP); if ((spa = spa_find_by_dev(dev)) == NULL) return (ENXIO); rv = nvlist_find(spa->spa_bootenv, BOOTENV_VERSION, DATA_TYPE_UINT64, NULL, &version, NULL); if (rv != 0 || version != VB_NVLIST) { return (ENXIO); } dev = malloc(sizeof (*dev)); if (dev == NULL) return (ENOMEM); memcpy(dev, vdev, sizeof (*dev)); rv = nvstore_init(spa->spa_name, &nvstore_zfs_cb, dev); if (rv != 0) free(dev); else rv = zfs_nvstore_iterate(dev, zfs_nvstore_setenv); return (rv); } int zfs_probe_dev(const char *devname, uint64_t *pool_guid) { struct ptable *table; struct zfs_probe_args pa; uint64_t mediasz; int ret; if (pool_guid) *pool_guid = 0; pa.fd = open(devname, O_RDWR); if (pa.fd == -1) return (ENXIO); /* Probe the whole disk */ ret = zfs_probe(pa.fd, pool_guid); if (ret == 0) return (0); /* Probe each partition */ ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz); if (ret == 0) ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz); if (ret == 0) { pa.devname = devname; pa.pool_guid = pool_guid; table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz, zfs_diskread); if (table != NULL) { ptable_iterate(table, &pa, zfs_probe_partition); ptable_close(table); } } close(pa.fd); if (pool_guid && *pool_guid == 0) ret = ENXIO; return (ret); } /* * Print information about ZFS pools */ static int zfs_dev_print(int verbose) { spa_t *spa; char line[80]; int ret = 0; if (STAILQ_EMPTY(&zfs_pools)) return (0); printf("%s devices:", zfs_dev.dv_name); if ((ret = pager_output("\n")) != 0) return (ret); if (verbose) { return (spa_all_status()); } STAILQ_FOREACH(spa, &zfs_pools, spa_link) { snprintf(line, sizeof(line), " zfs:%s\n", spa->spa_name); ret = pager_output(line); if (ret != 0) break; } return (ret); } /* * Attempt to open the pool described by (dev) for use by (f). */ static int zfs_dev_open(struct open_file *f, ...) { va_list args; struct zfs_devdesc *dev; struct zfsmount *mount; spa_t *spa; int rv; va_start(args, f); dev = va_arg(args, struct zfs_devdesc *); va_end(args); if ((spa = spa_find_by_dev(dev)) == NULL) return (ENXIO); - mount = malloc(sizeof(*mount)); - if (mount == NULL) - rv = ENOMEM; - else - rv = zfs_mount(spa, dev->root_guid, mount); - if (rv != 0) { - free(mount); - return (rv); + STAILQ_FOREACH(mount, &zfsmount, next) { + if (spa->spa_guid == mount->spa->spa_guid) + break; } - if (mount->objset.os_type != DMU_OST_ZFS) { - printf("Unexpected object set type %ju\n", - (uintmax_t)mount->objset.os_type); - free(mount); - return (EIO); + + rv = 0; + /* This device is not set as currdev, mount us private copy. */ + if (mount == NULL) + rv = zfs_mount(zfs_fmtdev(dev), NULL, (void **)&mount); + + if (rv == 0) { + f->f_devdata = mount; + free(dev); } - f->f_devdata = mount; - free(dev); - return (0); + return (rv); } static int zfs_dev_close(struct open_file *f) { + struct zfsmount *mnt, *mount; + + mnt = f->f_devdata; + + STAILQ_FOREACH(mount, &zfsmount, next) { + if (mnt->spa->spa_guid == mount->spa->spa_guid) + break; + } + + /* + * devclose() will free f->f_devdata, but since we do have + * pointer to zfsmount structure in f->f_devdata, and + * zfs_unmount() will also free the zfsmount structure, + * we will get double free. To prevent double free, + * we must set f_devdata to NULL there. + */ + if (mount != NULL) + f->f_devdata = NULL; - free(f->f_devdata); - f->f_devdata = NULL; return (0); } static int zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize) { return (ENOSYS); } struct devsw zfs_dev = { .dv_name = "zfs", .dv_type = DEVT_ZFS, .dv_init = zfs_dev_init, .dv_strategy = zfs_dev_strategy, .dv_open = zfs_dev_open, .dv_close = zfs_dev_close, .dv_ioctl = noioctl, .dv_print = zfs_dev_print, .dv_cleanup = NULL }; int zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path) { static char rootname[ZFS_MAXNAMELEN]; static char poolname[ZFS_MAXNAMELEN]; spa_t *spa; const char *end; const char *np; const char *sep; int rv; np = devspec; if (*np != ':') return (EINVAL); np++; end = strrchr(np, ':'); if (end == NULL) return (EINVAL); sep = strchr(np, '/'); if (sep == NULL || sep >= end) sep = end; memcpy(poolname, np, sep - np); poolname[sep - np] = '\0'; if (sep < end) { sep++; memcpy(rootname, sep, end - sep); rootname[end - sep] = '\0'; } else rootname[0] = '\0'; spa = spa_find_by_name(poolname); if (!spa) return (ENXIO); dev->pool_guid = spa->spa_guid; rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid); if (rv != 0) return (rv); if (path != NULL) *path = (*end == '\0') ? end : end + 1; dev->dd.d_dev = &zfs_dev; return (0); } char * zfs_fmtdev(void *vdev) { static char rootname[ZFS_MAXNAMELEN]; static char buf[2 * ZFS_MAXNAMELEN + 8]; struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; spa_t *spa; buf[0] = '\0'; if (dev->dd.d_dev->dv_type != DEVT_ZFS) return (buf); /* Do we have any pools? */ spa = STAILQ_FIRST(&zfs_pools); if (spa == NULL) return (buf); if (dev->pool_guid == 0) dev->pool_guid = spa->spa_guid; else spa = spa_find_by_guid(dev->pool_guid); if (spa == NULL) { printf("ZFS: can't find pool by guid\n"); return (buf); } if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) { printf("ZFS: can't find root filesystem\n"); return (buf); } if (zfs_rlookup(spa, dev->root_guid, rootname)) { printf("ZFS: can't find filesystem by guid\n"); return (buf); } if (rootname[0] == '\0') snprintf(buf, sizeof(buf), "%s:%s:", dev->dd.d_dev->dv_name, spa->spa_name); else snprintf(buf, sizeof(buf), "%s:%s/%s:", dev->dd.d_dev->dv_name, spa->spa_name, rootname); return (buf); } static int split_devname(const char *name, char *poolname, size_t size, const char **dsnamep) { const char *dsname; size_t len; ASSERT(name != NULL); ASSERT(poolname != NULL); len = strlen(name); dsname = strchr(name, '/'); if (dsname != NULL) { len = dsname - name; dsname++; } else dsname = ""; if (len + 1 > size) return (EINVAL); strlcpy(poolname, name, len + 1); if (dsnamep != NULL) *dsnamep = dsname; return (0); } int zfs_list(const char *name) { static char poolname[ZFS_MAXNAMELEN]; uint64_t objid; spa_t *spa; const char *dsname; int rv; if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0) return (EINVAL); spa = spa_find_by_name(poolname); if (!spa) return (ENXIO); rv = zfs_lookup_dataset(spa, dsname, &objid); if (rv != 0) return (rv); return (zfs_list_dataset(spa, objid)); } void init_zfs_boot_options(const char *currdev_in) { char poolname[ZFS_MAXNAMELEN]; char *beroot, *currdev; spa_t *spa; int currdev_len; const char *dsname; currdev = NULL; currdev_len = strlen(currdev_in); if (currdev_len == 0) return; if (strncmp(currdev_in, "zfs:", 4) != 0) return; currdev = strdup(currdev_in); if (currdev == NULL) return; /* Remove the trailing : */ currdev[currdev_len - 1] = '\0'; setenv("zfs_be_active", currdev, 1); setenv("zfs_be_currpage", "1", 1); /* Remove the last element (current bootenv) */ beroot = strrchr(currdev, '/'); if (beroot != NULL) beroot[0] = '\0'; beroot = strchr(currdev, ':') + 1; setenv("zfs_be_root", beroot, 1); if (split_devname(beroot, poolname, sizeof(poolname), &dsname) != 0) return; spa = spa_find_by_name(poolname); if (spa == NULL) return; zfs_bootenv_initial("bootenvs", spa, beroot, dsname, 0); zfs_checkpoints_initial(spa, beroot, dsname); free(currdev); } static void zfs_checkpoints_initial(spa_t *spa, const char *name, const char *dsname) { char envname[32]; if (spa->spa_uberblock_checkpoint.ub_checkpoint_txg != 0) { snprintf(envname, sizeof(envname), "zpool_checkpoint"); setenv(envname, name, 1); spa->spa_uberblock = &spa->spa_uberblock_checkpoint; spa->spa_mos = &spa->spa_mos_checkpoint; zfs_bootenv_initial("bootenvs_check", spa, name, dsname, 1); spa->spa_uberblock = &spa->spa_uberblock_master; spa->spa_mos = &spa->spa_mos_master; } } static void zfs_bootenv_initial(const char *envprefix, spa_t *spa, const char *rootname, const char *dsname, int checkpoint) { char envname[32], envval[256]; uint64_t objid; int bootenvs_idx, rv; SLIST_INIT(&zfs_be_head); zfs_env_count = 0; rv = zfs_lookup_dataset(spa, dsname, &objid); if (rv != 0) return; rv = zfs_callback_dataset(spa, objid, zfs_belist_add); bootenvs_idx = 0; /* Populate the initial environment variables */ SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) { /* Enumerate all bootenvs for general usage */ snprintf(envname, sizeof(envname), "%s[%d]", envprefix, bootenvs_idx); snprintf(envval, sizeof(envval), "zfs:%s%s/%s", checkpoint ? "!" : "", rootname, zfs_be->name); rv = setenv(envname, envval, 1); if (rv != 0) break; bootenvs_idx++; } snprintf(envname, sizeof(envname), "%s_count", envprefix); snprintf(envval, sizeof(envval), "%d", bootenvs_idx); setenv(envname, envval, 1); /* Clean up the SLIST of ZFS BEs */ while (!SLIST_EMPTY(&zfs_be_head)) { zfs_be = SLIST_FIRST(&zfs_be_head); SLIST_REMOVE_HEAD(&zfs_be_head, entries); free(zfs_be->name); free(zfs_be); } } int zfs_bootenv(const char *name) { char poolname[ZFS_MAXNAMELEN], *root; const char *dsname; char becount[4]; uint64_t objid; spa_t *spa; int rv, pages, perpage, currpage; if (name == NULL) return (EINVAL); if ((root = getenv("zfs_be_root")) == NULL) return (EINVAL); if (strcmp(name, root) != 0) { if (setenv("zfs_be_root", name, 1) != 0) return (ENOMEM); } SLIST_INIT(&zfs_be_head); zfs_env_count = 0; if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0) return (EINVAL); spa = spa_find_by_name(poolname); if (!spa) return (ENXIO); rv = zfs_lookup_dataset(spa, dsname, &objid); if (rv != 0) return (rv); rv = zfs_callback_dataset(spa, objid, zfs_belist_add); /* Calculate and store the number of pages of BEs */ perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1); pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0); snprintf(becount, 4, "%d", pages); if (setenv("zfs_be_pages", becount, 1) != 0) return (ENOMEM); /* Roll over the page counter if it has exceeded the maximum */ currpage = strtol(getenv("zfs_be_currpage"), NULL, 10); if (currpage > pages) { if (setenv("zfs_be_currpage", "1", 1) != 0) return (ENOMEM); } /* Populate the menu environment variables */ zfs_set_env(); /* Clean up the SLIST of ZFS BEs */ while (!SLIST_EMPTY(&zfs_be_head)) { zfs_be = SLIST_FIRST(&zfs_be_head); SLIST_REMOVE_HEAD(&zfs_be_head, entries); free(zfs_be->name); free(zfs_be); } return (rv); } int zfs_belist_add(const char *name, uint64_t value __unused) { /* Skip special datasets that start with a $ character */ if (strncmp(name, "$", 1) == 0) { return (0); } /* Add the boot environment to the head of the SLIST */ zfs_be = malloc(sizeof(struct zfs_be_entry)); if (zfs_be == NULL) { return (ENOMEM); } zfs_be->name = strdup(name); if (zfs_be->name == NULL) { free(zfs_be); return (ENOMEM); } SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries); zfs_env_count++; return (0); } int zfs_set_env(void) { char envname[32], envval[256]; char *beroot, *pagenum; int rv, page, ctr; beroot = getenv("zfs_be_root"); if (beroot == NULL) { return (1); } pagenum = getenv("zfs_be_currpage"); if (pagenum != NULL) { page = strtol(pagenum, NULL, 10); } else { page = 1; } ctr = 1; rv = 0; zfs_env_index = ZFS_BE_FIRST; SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) { /* Skip to the requested page number */ if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) { ctr++; continue; } snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index); snprintf(envval, sizeof(envval), "%s", zfs_be->name); rv = setenv(envname, envval, 1); if (rv != 0) { break; } snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index); rv = setenv(envname, envval, 1); if (rv != 0){ break; } snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index); rv = setenv(envname, "set_bootenv", 1); if (rv != 0){ break; } snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index); snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name); rv = setenv(envname, envval, 1); if (rv != 0){ break; } zfs_env_index++; if (zfs_env_index > ZFS_BE_LAST) { break; } } for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) { snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index); (void)unsetenv(envname); snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index); (void)unsetenv(envname); snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index); (void)unsetenv(envname); snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index); (void)unsetenv(envname); } return (rv); } diff --git a/stand/libsa/zfs/zfsimpl.c b/stand/libsa/zfs/zfsimpl.c index 7036b508fa3c..ceaeeb2e77f3 100644 --- a/stand/libsa/zfs/zfsimpl.c +++ b/stand/libsa/zfs/zfsimpl.c @@ -1,3797 +1,3801 @@ /*- * Copyright (c) 2007 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Stand-alone ZFS file reader. */ #include #include #include #include #include #include #include #include "zfsimpl.h" #include "zfssubr.c" #ifdef HAS_ZSTD_ZFS extern int zstd_init(void); #endif struct zfsmount { - const spa_t *spa; - objset_phys_t objset; - uint64_t rootobj; + char *path; + const spa_t *spa; + objset_phys_t objset; + uint64_t rootobj; + STAILQ_ENTRY(zfsmount) next; }; -static struct zfsmount zfsmount __unused; + +typedef STAILQ_HEAD(zfs_mnt_list, zfsmount) zfs_mnt_list_t; +static zfs_mnt_list_t zfsmount = STAILQ_HEAD_INITIALIZER(zfsmount); /* * The indirect_child_t represents the vdev that we will read from, when we * need to read all copies of the data (e.g. for scrub or reconstruction). * For plain (non-mirror) top-level vdevs (i.e. is_vdev is not a mirror), * ic_vdev is the same as is_vdev. However, for mirror top-level vdevs, * ic_vdev is a child of the mirror. */ typedef struct indirect_child { void *ic_data; vdev_t *ic_vdev; } indirect_child_t; /* * The indirect_split_t represents one mapped segment of an i/o to the * indirect vdev. For non-split (contiguously-mapped) blocks, there will be * only one indirect_split_t, with is_split_offset==0 and is_size==io_size. * For split blocks, there will be several of these. */ typedef struct indirect_split { list_node_t is_node; /* link on iv_splits */ /* * is_split_offset is the offset into the i/o. * This is the sum of the previous splits' is_size's. */ uint64_t is_split_offset; vdev_t *is_vdev; /* top-level vdev */ uint64_t is_target_offset; /* offset on is_vdev */ uint64_t is_size; int is_children; /* number of entries in is_child[] */ /* * is_good_child is the child that we are currently using to * attempt reconstruction. */ int is_good_child; indirect_child_t is_child[1]; /* variable-length */ } indirect_split_t; /* * The indirect_vsd_t is associated with each i/o to the indirect vdev. * It is the "Vdev-Specific Data" in the zio_t's io_vsd. */ typedef struct indirect_vsd { boolean_t iv_split_block; boolean_t iv_reconstruct; list_t iv_splits; /* list of indirect_split_t's */ } indirect_vsd_t; /* * List of all vdevs, chained through v_alllink. */ static vdev_list_t zfs_vdevs; /* * List of ZFS features supported for read */ static const char *features_for_read[] = { "org.illumos:lz4_compress", "com.delphix:hole_birth", "com.delphix:extensible_dataset", "com.delphix:embedded_data", "org.open-zfs:large_blocks", "org.illumos:sha512", "org.illumos:skein", "org.zfsonlinux:large_dnode", "com.joyent:multi_vdev_crash_dump", "com.delphix:spacemap_histogram", "com.delphix:zpool_checkpoint", "com.delphix:spacemap_v2", "com.datto:encryption", "com.datto:bookmark_v2", "org.zfsonlinux:allocation_classes", "com.datto:resilver_defer", "com.delphix:device_removal", "com.delphix:obsolete_counts", "com.intel:allocation_classes", "org.freebsd:zstd_compress", "com.delphix:bookmark_written", NULL }; /* * List of all pools, chained through spa_link. */ static spa_list_t zfs_pools; static const dnode_phys_t *dnode_cache_obj; static uint64_t dnode_cache_bn; static char *dnode_cache_buf; static int zio_read(const spa_t *spa, const blkptr_t *bp, void *buf); static int zfs_get_root(const spa_t *spa, uint64_t *objid); static int zfs_rlookup(const spa_t *spa, uint64_t objnum, char *result); static int zap_lookup(const spa_t *spa, const dnode_phys_t *dnode, const char *name, uint64_t integer_size, uint64_t num_integers, void *value); static int objset_get_dnode(const spa_t *, const objset_phys_t *, uint64_t, dnode_phys_t *); static int dnode_read(const spa_t *, const dnode_phys_t *, off_t, void *, size_t); static int vdev_indirect_read(vdev_t *, const blkptr_t *, void *, off_t, size_t); static int vdev_mirror_read(vdev_t *, const blkptr_t *, void *, off_t, size_t); vdev_indirect_mapping_t *vdev_indirect_mapping_open(spa_t *, objset_phys_t *, uint64_t); vdev_indirect_mapping_entry_phys_t * vdev_indirect_mapping_duplicate_adjacent_entries(vdev_t *, uint64_t, uint64_t, uint64_t *); static void zfs_init(void) { STAILQ_INIT(&zfs_vdevs); STAILQ_INIT(&zfs_pools); dnode_cache_buf = malloc(SPA_MAXBLOCKSIZE); zfs_init_crc(); #ifdef HAS_ZSTD_ZFS zstd_init(); #endif } static int nvlist_check_features_for_read(nvlist_t *nvl) { nvlist_t *features = NULL; nvs_data_t *data; nvp_header_t *nvp; nv_string_t *nvp_name; int rc; rc = nvlist_find(nvl, ZPOOL_CONFIG_FEATURES_FOR_READ, DATA_TYPE_NVLIST, NULL, &features, NULL); switch (rc) { case 0: break; /* Continue with checks */ case ENOENT: return (0); /* All features are disabled */ default: return (rc); /* Error while reading nvlist */ } data = (nvs_data_t *)features->nv_data; nvp = &data->nvl_pair; /* first pair in nvlist */ while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { int i, found; nvp_name = (nv_string_t *)((uintptr_t)nvp + sizeof(*nvp)); found = 0; for (i = 0; features_for_read[i] != NULL; i++) { if (memcmp(nvp_name->nv_data, features_for_read[i], nvp_name->nv_size) == 0) { found = 1; break; } } if (!found) { printf("ZFS: unsupported feature: %.*s\n", nvp_name->nv_size, nvp_name->nv_data); rc = EIO; } nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size); } nvlist_destroy(features); return (rc); } static int vdev_read_phys(vdev_t *vdev, const blkptr_t *bp, void *buf, off_t offset, size_t size) { size_t psize; int rc; if (vdev->v_phys_read == NULL) return (ENOTSUP); if (bp) { psize = BP_GET_PSIZE(bp); } else { psize = size; } rc = vdev->v_phys_read(vdev, vdev->v_priv, offset, buf, psize); if (rc == 0) { if (bp != NULL) rc = zio_checksum_verify(vdev->v_spa, bp, buf); } return (rc); } static int vdev_write_phys(vdev_t *vdev, void *buf, off_t offset, size_t size) { if (vdev->v_phys_write == NULL) return (ENOTSUP); return (vdev->v_phys_write(vdev, offset, buf, size)); } typedef struct remap_segment { vdev_t *rs_vd; uint64_t rs_offset; uint64_t rs_asize; uint64_t rs_split_offset; list_node_t rs_node; } remap_segment_t; static remap_segment_t * rs_alloc(vdev_t *vd, uint64_t offset, uint64_t asize, uint64_t split_offset) { remap_segment_t *rs = malloc(sizeof (remap_segment_t)); if (rs != NULL) { rs->rs_vd = vd; rs->rs_offset = offset; rs->rs_asize = asize; rs->rs_split_offset = split_offset; } return (rs); } vdev_indirect_mapping_t * vdev_indirect_mapping_open(spa_t *spa, objset_phys_t *os, uint64_t mapping_object) { vdev_indirect_mapping_t *vim; vdev_indirect_mapping_phys_t *vim_phys; int rc; vim = calloc(1, sizeof (*vim)); if (vim == NULL) return (NULL); vim->vim_dn = calloc(1, sizeof (*vim->vim_dn)); if (vim->vim_dn == NULL) { free(vim); return (NULL); } rc = objset_get_dnode(spa, os, mapping_object, vim->vim_dn); if (rc != 0) { free(vim->vim_dn); free(vim); return (NULL); } vim->vim_spa = spa; vim->vim_phys = malloc(sizeof (*vim->vim_phys)); if (vim->vim_phys == NULL) { free(vim->vim_dn); free(vim); return (NULL); } vim_phys = (vdev_indirect_mapping_phys_t *)DN_BONUS(vim->vim_dn); *vim->vim_phys = *vim_phys; vim->vim_objset = os; vim->vim_object = mapping_object; vim->vim_entries = NULL; vim->vim_havecounts = (vim->vim_dn->dn_bonuslen > VDEV_INDIRECT_MAPPING_SIZE_V0); return (vim); } /* * Compare an offset with an indirect mapping entry; there are three * possible scenarios: * * 1. The offset is "less than" the mapping entry; meaning the * offset is less than the source offset of the mapping entry. In * this case, there is no overlap between the offset and the * mapping entry and -1 will be returned. * * 2. The offset is "greater than" the mapping entry; meaning the * offset is greater than the mapping entry's source offset plus * the entry's size. In this case, there is no overlap between * the offset and the mapping entry and 1 will be returned. * * NOTE: If the offset is actually equal to the entry's offset * plus size, this is considered to be "greater" than the entry, * and this case applies (i.e. 1 will be returned). Thus, the * entry's "range" can be considered to be inclusive at its * start, but exclusive at its end: e.g. [src, src + size). * * 3. The last case to consider is if the offset actually falls * within the mapping entry's range. If this is the case, the * offset is considered to be "equal to" the mapping entry and * 0 will be returned. * * NOTE: If the offset is equal to the entry's source offset, * this case applies and 0 will be returned. If the offset is * equal to the entry's source plus its size, this case does * *not* apply (see "NOTE" above for scenario 2), and 1 will be * returned. */ static int dva_mapping_overlap_compare(const void *v_key, const void *v_array_elem) { const uint64_t *key = v_key; const vdev_indirect_mapping_entry_phys_t *array_elem = v_array_elem; uint64_t src_offset = DVA_MAPPING_GET_SRC_OFFSET(array_elem); if (*key < src_offset) { return (-1); } else if (*key < src_offset + DVA_GET_ASIZE(&array_elem->vimep_dst)) { return (0); } else { return (1); } } /* * Return array entry. */ static vdev_indirect_mapping_entry_phys_t * vdev_indirect_mapping_entry(vdev_indirect_mapping_t *vim, uint64_t index) { uint64_t size; off_t offset = 0; int rc; if (vim->vim_phys->vimp_num_entries == 0) return (NULL); if (vim->vim_entries == NULL) { uint64_t bsize; bsize = vim->vim_dn->dn_datablkszsec << SPA_MINBLOCKSHIFT; size = vim->vim_phys->vimp_num_entries * sizeof (*vim->vim_entries); if (size > bsize) { size = bsize / sizeof (*vim->vim_entries); size *= sizeof (*vim->vim_entries); } vim->vim_entries = malloc(size); if (vim->vim_entries == NULL) return (NULL); vim->vim_num_entries = size / sizeof (*vim->vim_entries); offset = index * sizeof (*vim->vim_entries); } /* We have data in vim_entries */ if (offset == 0) { if (index >= vim->vim_entry_offset && index <= vim->vim_entry_offset + vim->vim_num_entries) { index -= vim->vim_entry_offset; return (&vim->vim_entries[index]); } offset = index * sizeof (*vim->vim_entries); } vim->vim_entry_offset = index; size = vim->vim_num_entries * sizeof (*vim->vim_entries); rc = dnode_read(vim->vim_spa, vim->vim_dn, offset, vim->vim_entries, size); if (rc != 0) { /* Read error, invalidate vim_entries. */ free(vim->vim_entries); vim->vim_entries = NULL; return (NULL); } index -= vim->vim_entry_offset; return (&vim->vim_entries[index]); } /* * Returns the mapping entry for the given offset. * * It's possible that the given offset will not be in the mapping table * (i.e. no mapping entries contain this offset), in which case, the * return value value depends on the "next_if_missing" parameter. * * If the offset is not found in the table and "next_if_missing" is * B_FALSE, then NULL will always be returned. The behavior is intended * to allow consumers to get the entry corresponding to the offset * parameter, iff the offset overlaps with an entry in the table. * * If the offset is not found in the table and "next_if_missing" is * B_TRUE, then the entry nearest to the given offset will be returned, * such that the entry's source offset is greater than the offset * passed in (i.e. the "next" mapping entry in the table is returned, if * the offset is missing from the table). If there are no entries whose * source offset is greater than the passed in offset, NULL is returned. */ static vdev_indirect_mapping_entry_phys_t * vdev_indirect_mapping_entry_for_offset(vdev_indirect_mapping_t *vim, uint64_t offset) { ASSERT(vim->vim_phys->vimp_num_entries > 0); vdev_indirect_mapping_entry_phys_t *entry; uint64_t last = vim->vim_phys->vimp_num_entries - 1; uint64_t base = 0; /* * We don't define these inside of the while loop because we use * their value in the case that offset isn't in the mapping. */ uint64_t mid; int result; while (last >= base) { mid = base + ((last - base) >> 1); entry = vdev_indirect_mapping_entry(vim, mid); if (entry == NULL) break; result = dva_mapping_overlap_compare(&offset, entry); if (result == 0) { break; } else if (result < 0) { last = mid - 1; } else { base = mid + 1; } } return (entry); } /* * Given an indirect vdev and an extent on that vdev, it duplicates the * physical entries of the indirect mapping that correspond to the extent * to a new array and returns a pointer to it. In addition, copied_entries * is populated with the number of mapping entries that were duplicated. * * Finally, since we are doing an allocation, it is up to the caller to * free the array allocated in this function. */ vdev_indirect_mapping_entry_phys_t * vdev_indirect_mapping_duplicate_adjacent_entries(vdev_t *vd, uint64_t offset, uint64_t asize, uint64_t *copied_entries) { vdev_indirect_mapping_entry_phys_t *duplicate_mappings = NULL; vdev_indirect_mapping_t *vim = vd->v_mapping; uint64_t entries = 0; vdev_indirect_mapping_entry_phys_t *first_mapping = vdev_indirect_mapping_entry_for_offset(vim, offset); ASSERT3P(first_mapping, !=, NULL); vdev_indirect_mapping_entry_phys_t *m = first_mapping; while (asize > 0) { uint64_t size = DVA_GET_ASIZE(&m->vimep_dst); uint64_t inner_offset = offset - DVA_MAPPING_GET_SRC_OFFSET(m); uint64_t inner_size = MIN(asize, size - inner_offset); offset += inner_size; asize -= inner_size; entries++; m++; } size_t copy_length = entries * sizeof (*first_mapping); duplicate_mappings = malloc(copy_length); if (duplicate_mappings != NULL) bcopy(first_mapping, duplicate_mappings, copy_length); else entries = 0; *copied_entries = entries; return (duplicate_mappings); } static vdev_t * vdev_lookup_top(spa_t *spa, uint64_t vdev) { vdev_t *rvd; vdev_list_t *vlist; vlist = &spa->spa_root_vdev->v_children; STAILQ_FOREACH(rvd, vlist, v_childlink) if (rvd->v_id == vdev) break; return (rvd); } /* * This is a callback for vdev_indirect_remap() which allocates an * indirect_split_t for each split segment and adds it to iv_splits. */ static void vdev_indirect_gather_splits(uint64_t split_offset, vdev_t *vd, uint64_t offset, uint64_t size, void *arg) { int n = 1; zio_t *zio = arg; indirect_vsd_t *iv = zio->io_vsd; if (vd->v_read == vdev_indirect_read) return; if (vd->v_read == vdev_mirror_read) n = vd->v_nchildren; indirect_split_t *is = malloc(offsetof(indirect_split_t, is_child[n])); if (is == NULL) { zio->io_error = ENOMEM; return; } bzero(is, offsetof(indirect_split_t, is_child[n])); is->is_children = n; is->is_size = size; is->is_split_offset = split_offset; is->is_target_offset = offset; is->is_vdev = vd; /* * Note that we only consider multiple copies of the data for * *mirror* vdevs. We don't for "replacing" or "spare" vdevs, even * though they use the same ops as mirror, because there's only one * "good" copy under the replacing/spare. */ if (vd->v_read == vdev_mirror_read) { int i = 0; vdev_t *kid; STAILQ_FOREACH(kid, &vd->v_children, v_childlink) { is->is_child[i++].ic_vdev = kid; } } else { is->is_child[0].ic_vdev = vd; } list_insert_tail(&iv->iv_splits, is); } static void vdev_indirect_remap(vdev_t *vd, uint64_t offset, uint64_t asize, void *arg) { list_t stack; spa_t *spa = vd->v_spa; zio_t *zio = arg; remap_segment_t *rs; list_create(&stack, sizeof (remap_segment_t), offsetof(remap_segment_t, rs_node)); rs = rs_alloc(vd, offset, asize, 0); if (rs == NULL) { printf("vdev_indirect_remap: out of memory.\n"); zio->io_error = ENOMEM; } for (; rs != NULL; rs = list_remove_head(&stack)) { vdev_t *v = rs->rs_vd; uint64_t num_entries = 0; /* vdev_indirect_mapping_t *vim = v->v_mapping; */ vdev_indirect_mapping_entry_phys_t *mapping = vdev_indirect_mapping_duplicate_adjacent_entries(v, rs->rs_offset, rs->rs_asize, &num_entries); if (num_entries == 0) zio->io_error = ENOMEM; for (uint64_t i = 0; i < num_entries; i++) { vdev_indirect_mapping_entry_phys_t *m = &mapping[i]; uint64_t size = DVA_GET_ASIZE(&m->vimep_dst); uint64_t dst_offset = DVA_GET_OFFSET(&m->vimep_dst); uint64_t dst_vdev = DVA_GET_VDEV(&m->vimep_dst); uint64_t inner_offset = rs->rs_offset - DVA_MAPPING_GET_SRC_OFFSET(m); uint64_t inner_size = MIN(rs->rs_asize, size - inner_offset); vdev_t *dst_v = vdev_lookup_top(spa, dst_vdev); if (dst_v->v_read == vdev_indirect_read) { remap_segment_t *o; o = rs_alloc(dst_v, dst_offset + inner_offset, inner_size, rs->rs_split_offset); if (o == NULL) { printf("vdev_indirect_remap: " "out of memory.\n"); zio->io_error = ENOMEM; break; } list_insert_head(&stack, o); } vdev_indirect_gather_splits(rs->rs_split_offset, dst_v, dst_offset + inner_offset, inner_size, arg); /* * vdev_indirect_gather_splits can have memory * allocation error, we can not recover from it. */ if (zio->io_error != 0) break; rs->rs_offset += inner_size; rs->rs_asize -= inner_size; rs->rs_split_offset += inner_size; } free(mapping); free(rs); if (zio->io_error != 0) break; } list_destroy(&stack); } static void vdev_indirect_map_free(zio_t *zio) { indirect_vsd_t *iv = zio->io_vsd; indirect_split_t *is; while ((is = list_head(&iv->iv_splits)) != NULL) { for (int c = 0; c < is->is_children; c++) { indirect_child_t *ic = &is->is_child[c]; free(ic->ic_data); } list_remove(&iv->iv_splits, is); free(is); } free(iv); } static int vdev_indirect_read(vdev_t *vdev, const blkptr_t *bp, void *buf, off_t offset, size_t bytes) { zio_t zio; spa_t *spa = vdev->v_spa; indirect_vsd_t *iv; indirect_split_t *first; int rc = EIO; iv = calloc(1, sizeof(*iv)); if (iv == NULL) return (ENOMEM); list_create(&iv->iv_splits, sizeof (indirect_split_t), offsetof(indirect_split_t, is_node)); bzero(&zio, sizeof(zio)); zio.io_spa = spa; zio.io_bp = (blkptr_t *)bp; zio.io_data = buf; zio.io_size = bytes; zio.io_offset = offset; zio.io_vd = vdev; zio.io_vsd = iv; if (vdev->v_mapping == NULL) { vdev_indirect_config_t *vic; vic = &vdev->vdev_indirect_config; vdev->v_mapping = vdev_indirect_mapping_open(spa, spa->spa_mos, vic->vic_mapping_object); } vdev_indirect_remap(vdev, offset, bytes, &zio); if (zio.io_error != 0) return (zio.io_error); first = list_head(&iv->iv_splits); if (first->is_size == zio.io_size) { /* * This is not a split block; we are pointing to the entire * data, which will checksum the same as the original data. * Pass the BP down so that the child i/o can verify the * checksum, and try a different location if available * (e.g. on a mirror). * * While this special case could be handled the same as the * general (split block) case, doing it this way ensures * that the vast majority of blocks on indirect vdevs * (which are not split) are handled identically to blocks * on non-indirect vdevs. This allows us to be less strict * about performance in the general (but rare) case. */ rc = first->is_vdev->v_read(first->is_vdev, zio.io_bp, zio.io_data, first->is_target_offset, bytes); } else { iv->iv_split_block = B_TRUE; /* * Read one copy of each split segment, from the * top-level vdev. Since we don't know the * checksum of each split individually, the child * zio can't ensure that we get the right data. * E.g. if it's a mirror, it will just read from a * random (healthy) leaf vdev. We have to verify * the checksum in vdev_indirect_io_done(). */ for (indirect_split_t *is = list_head(&iv->iv_splits); is != NULL; is = list_next(&iv->iv_splits, is)) { char *ptr = zio.io_data; rc = is->is_vdev->v_read(is->is_vdev, zio.io_bp, ptr + is->is_split_offset, is->is_target_offset, is->is_size); } if (zio_checksum_verify(spa, zio.io_bp, zio.io_data)) rc = ECKSUM; else rc = 0; } vdev_indirect_map_free(&zio); if (rc == 0) rc = zio.io_error; return (rc); } static int vdev_disk_read(vdev_t *vdev, const blkptr_t *bp, void *buf, off_t offset, size_t bytes) { return (vdev_read_phys(vdev, bp, buf, offset + VDEV_LABEL_START_SIZE, bytes)); } static int vdev_missing_read(vdev_t *vdev __unused, const blkptr_t *bp __unused, void *buf __unused, off_t offset __unused, size_t bytes __unused) { return (ENOTSUP); } static int vdev_mirror_read(vdev_t *vdev, const blkptr_t *bp, void *buf, off_t offset, size_t bytes) { vdev_t *kid; int rc; rc = EIO; STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { if (kid->v_state != VDEV_STATE_HEALTHY) continue; rc = kid->v_read(kid, bp, buf, offset, bytes); if (!rc) return (0); } return (rc); } static int vdev_replacing_read(vdev_t *vdev, const blkptr_t *bp, void *buf, off_t offset, size_t bytes) { vdev_t *kid; /* * Here we should have two kids: * First one which is the one we are replacing and we can trust * only this one to have valid data, but it might not be present. * Second one is that one we are replacing with. It is most likely * healthy, but we can't trust it has needed data, so we won't use it. */ kid = STAILQ_FIRST(&vdev->v_children); if (kid == NULL) return (EIO); if (kid->v_state != VDEV_STATE_HEALTHY) return (EIO); return (kid->v_read(kid, bp, buf, offset, bytes)); } static vdev_t * vdev_find(uint64_t guid) { vdev_t *vdev; STAILQ_FOREACH(vdev, &zfs_vdevs, v_alllink) if (vdev->v_guid == guid) return (vdev); return (0); } static vdev_t * vdev_create(uint64_t guid, vdev_read_t *_read) { vdev_t *vdev; vdev_indirect_config_t *vic; vdev = calloc(1, sizeof(vdev_t)); if (vdev != NULL) { STAILQ_INIT(&vdev->v_children); vdev->v_guid = guid; vdev->v_read = _read; /* * root vdev has no read function, we use this fact to * skip setting up data we do not need for root vdev. * We only point root vdev from spa. */ if (_read != NULL) { vic = &vdev->vdev_indirect_config; vic->vic_prev_indirect_vdev = UINT64_MAX; STAILQ_INSERT_TAIL(&zfs_vdevs, vdev, v_alllink); } } return (vdev); } static void vdev_set_initial_state(vdev_t *vdev, const nvlist_t *nvlist) { uint64_t is_offline, is_faulted, is_degraded, is_removed, isnt_present; uint64_t is_log; is_offline = is_removed = is_faulted = is_degraded = isnt_present = 0; is_log = 0; (void) nvlist_find(nvlist, ZPOOL_CONFIG_OFFLINE, DATA_TYPE_UINT64, NULL, &is_offline, NULL); (void) nvlist_find(nvlist, ZPOOL_CONFIG_REMOVED, DATA_TYPE_UINT64, NULL, &is_removed, NULL); (void) nvlist_find(nvlist, ZPOOL_CONFIG_FAULTED, DATA_TYPE_UINT64, NULL, &is_faulted, NULL); (void) nvlist_find(nvlist, ZPOOL_CONFIG_DEGRADED, DATA_TYPE_UINT64, NULL, &is_degraded, NULL); (void) nvlist_find(nvlist, ZPOOL_CONFIG_NOT_PRESENT, DATA_TYPE_UINT64, NULL, &isnt_present, NULL); (void) nvlist_find(nvlist, ZPOOL_CONFIG_IS_LOG, DATA_TYPE_UINT64, NULL, &is_log, NULL); if (is_offline != 0) vdev->v_state = VDEV_STATE_OFFLINE; else if (is_removed != 0) vdev->v_state = VDEV_STATE_REMOVED; else if (is_faulted != 0) vdev->v_state = VDEV_STATE_FAULTED; else if (is_degraded != 0) vdev->v_state = VDEV_STATE_DEGRADED; else if (isnt_present != 0) vdev->v_state = VDEV_STATE_CANT_OPEN; vdev->v_islog = is_log != 0; } static int vdev_init(uint64_t guid, const nvlist_t *nvlist, vdev_t **vdevp) { uint64_t id, ashift, asize, nparity; const char *path; const char *type; int len, pathlen; char *name; vdev_t *vdev; if (nvlist_find(nvlist, ZPOOL_CONFIG_ID, DATA_TYPE_UINT64, NULL, &id, NULL) || nvlist_find(nvlist, ZPOOL_CONFIG_TYPE, DATA_TYPE_STRING, NULL, &type, &len)) { return (ENOENT); } if (memcmp(type, VDEV_TYPE_MIRROR, len) != 0 && memcmp(type, VDEV_TYPE_DISK, len) != 0 && #ifdef ZFS_TEST memcmp(type, VDEV_TYPE_FILE, len) != 0 && #endif memcmp(type, VDEV_TYPE_RAIDZ, len) != 0 && memcmp(type, VDEV_TYPE_INDIRECT, len) != 0 && memcmp(type, VDEV_TYPE_REPLACING, len) != 0 && memcmp(type, VDEV_TYPE_HOLE, len) != 0) { printf("ZFS: can only boot from disk, mirror, raidz1, " "raidz2 and raidz3 vdevs, got: %.*s\n", len, type); return (EIO); } if (memcmp(type, VDEV_TYPE_MIRROR, len) == 0) vdev = vdev_create(guid, vdev_mirror_read); else if (memcmp(type, VDEV_TYPE_RAIDZ, len) == 0) vdev = vdev_create(guid, vdev_raidz_read); else if (memcmp(type, VDEV_TYPE_REPLACING, len) == 0) vdev = vdev_create(guid, vdev_replacing_read); else if (memcmp(type, VDEV_TYPE_INDIRECT, len) == 0) { vdev_indirect_config_t *vic; vdev = vdev_create(guid, vdev_indirect_read); if (vdev != NULL) { vdev->v_state = VDEV_STATE_HEALTHY; vic = &vdev->vdev_indirect_config; nvlist_find(nvlist, ZPOOL_CONFIG_INDIRECT_OBJECT, DATA_TYPE_UINT64, NULL, &vic->vic_mapping_object, NULL); nvlist_find(nvlist, ZPOOL_CONFIG_INDIRECT_BIRTHS, DATA_TYPE_UINT64, NULL, &vic->vic_births_object, NULL); nvlist_find(nvlist, ZPOOL_CONFIG_PREV_INDIRECT_VDEV, DATA_TYPE_UINT64, NULL, &vic->vic_prev_indirect_vdev, NULL); } } else if (memcmp(type, VDEV_TYPE_HOLE, len) == 0) { vdev = vdev_create(guid, vdev_missing_read); } else { vdev = vdev_create(guid, vdev_disk_read); } if (vdev == NULL) return (ENOMEM); vdev_set_initial_state(vdev, nvlist); vdev->v_id = id; if (nvlist_find(nvlist, ZPOOL_CONFIG_ASHIFT, DATA_TYPE_UINT64, NULL, &ashift, NULL) == 0) vdev->v_ashift = ashift; if (nvlist_find(nvlist, ZPOOL_CONFIG_ASIZE, DATA_TYPE_UINT64, NULL, &asize, NULL) == 0) { vdev->v_psize = asize + VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE; } if (nvlist_find(nvlist, ZPOOL_CONFIG_NPARITY, DATA_TYPE_UINT64, NULL, &nparity, NULL) == 0) vdev->v_nparity = nparity; if (nvlist_find(nvlist, ZPOOL_CONFIG_PATH, DATA_TYPE_STRING, NULL, &path, &pathlen) == 0) { char prefix[] = "/dev/"; len = strlen(prefix); if (len < pathlen && memcmp(path, prefix, len) == 0) { path += len; pathlen -= len; } name = malloc(pathlen + 1); bcopy(path, name, pathlen); name[pathlen] = '\0'; vdev->v_name = name; } else { name = NULL; if (memcmp(type, VDEV_TYPE_RAIDZ, len) == 0) { if (vdev->v_nparity < 1 || vdev->v_nparity > 3) { printf("ZFS: invalid raidz parity: %d\n", vdev->v_nparity); return (EIO); } (void) asprintf(&name, "%.*s%d-%" PRIu64, len, type, vdev->v_nparity, id); } else { (void) asprintf(&name, "%.*s-%" PRIu64, len, type, id); } vdev->v_name = name; } *vdevp = vdev; return (0); } /* * Find slot for vdev. We return either NULL to signal to use * STAILQ_INSERT_HEAD, or we return link element to be used with * STAILQ_INSERT_AFTER. */ static vdev_t * vdev_find_previous(vdev_t *top_vdev, vdev_t *vdev) { vdev_t *v, *previous; if (STAILQ_EMPTY(&top_vdev->v_children)) return (NULL); previous = NULL; STAILQ_FOREACH(v, &top_vdev->v_children, v_childlink) { if (v->v_id > vdev->v_id) return (previous); if (v->v_id == vdev->v_id) return (v); if (v->v_id < vdev->v_id) previous = v; } return (previous); } static size_t vdev_child_count(vdev_t *vdev) { vdev_t *v; size_t count; count = 0; STAILQ_FOREACH(v, &vdev->v_children, v_childlink) { count++; } return (count); } /* * Insert vdev into top_vdev children list. List is ordered by v_id. */ static void vdev_insert(vdev_t *top_vdev, vdev_t *vdev) { vdev_t *previous; size_t count; /* * The top level vdev can appear in random order, depending how * the firmware is presenting the disk devices. * However, we will insert vdev to create list ordered by v_id, * so we can use either STAILQ_INSERT_HEAD or STAILQ_INSERT_AFTER * as STAILQ does not have insert before. */ previous = vdev_find_previous(top_vdev, vdev); if (previous == NULL) { STAILQ_INSERT_HEAD(&top_vdev->v_children, vdev, v_childlink); } else if (previous->v_id == vdev->v_id) { /* * This vdev was configured from label config, * do not insert duplicate. */ return; } else { STAILQ_INSERT_AFTER(&top_vdev->v_children, previous, vdev, v_childlink); } count = vdev_child_count(top_vdev); if (top_vdev->v_nchildren < count) top_vdev->v_nchildren = count; } static int vdev_from_nvlist(spa_t *spa, uint64_t top_guid, const nvlist_t *nvlist) { vdev_t *top_vdev, *vdev; nvlist_t **kids = NULL; int rc, nkids; /* Get top vdev. */ top_vdev = vdev_find(top_guid); if (top_vdev == NULL) { rc = vdev_init(top_guid, nvlist, &top_vdev); if (rc != 0) return (rc); top_vdev->v_spa = spa; top_vdev->v_top = top_vdev; vdev_insert(spa->spa_root_vdev, top_vdev); } /* Add children if there are any. */ rc = nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY, &nkids, &kids, NULL); if (rc == 0) { for (int i = 0; i < nkids; i++) { uint64_t guid; rc = nvlist_find(kids[i], ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, NULL, &guid, NULL); if (rc != 0) goto done; rc = vdev_init(guid, kids[i], &vdev); if (rc != 0) goto done; vdev->v_spa = spa; vdev->v_top = top_vdev; vdev_insert(top_vdev, vdev); } } else { /* * When there are no children, nvlist_find() does return * error, reset it because leaf devices have no children. */ rc = 0; } done: if (kids != NULL) { for (int i = 0; i < nkids; i++) nvlist_destroy(kids[i]); free(kids); } return (rc); } static int vdev_init_from_label(spa_t *spa, const nvlist_t *nvlist) { uint64_t pool_guid, top_guid; nvlist_t *vdevs; int rc; if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, NULL, &pool_guid, NULL) || nvlist_find(nvlist, ZPOOL_CONFIG_TOP_GUID, DATA_TYPE_UINT64, NULL, &top_guid, NULL) || nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST, NULL, &vdevs, NULL)) { printf("ZFS: can't find vdev details\n"); return (ENOENT); } rc = vdev_from_nvlist(spa, top_guid, vdevs); nvlist_destroy(vdevs); return (rc); } static void vdev_set_state(vdev_t *vdev) { vdev_t *kid; int good_kids; int bad_kids; STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { vdev_set_state(kid); } /* * A mirror or raidz is healthy if all its kids are healthy. A * mirror is degraded if any of its kids is healthy; a raidz * is degraded if at most nparity kids are offline. */ if (STAILQ_FIRST(&vdev->v_children)) { good_kids = 0; bad_kids = 0; STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { if (kid->v_state == VDEV_STATE_HEALTHY) good_kids++; else bad_kids++; } if (bad_kids == 0) { vdev->v_state = VDEV_STATE_HEALTHY; } else { if (vdev->v_read == vdev_mirror_read) { if (good_kids) { vdev->v_state = VDEV_STATE_DEGRADED; } else { vdev->v_state = VDEV_STATE_OFFLINE; } } else if (vdev->v_read == vdev_raidz_read) { if (bad_kids > vdev->v_nparity) { vdev->v_state = VDEV_STATE_OFFLINE; } else { vdev->v_state = VDEV_STATE_DEGRADED; } } } } } static int vdev_update_from_nvlist(uint64_t top_guid, const nvlist_t *nvlist) { vdev_t *vdev; nvlist_t **kids = NULL; int rc, nkids; /* Update top vdev. */ vdev = vdev_find(top_guid); if (vdev != NULL) vdev_set_initial_state(vdev, nvlist); /* Update children if there are any. */ rc = nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY, &nkids, &kids, NULL); if (rc == 0) { for (int i = 0; i < nkids; i++) { uint64_t guid; rc = nvlist_find(kids[i], ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, NULL, &guid, NULL); if (rc != 0) break; vdev = vdev_find(guid); if (vdev != NULL) vdev_set_initial_state(vdev, kids[i]); } } else { rc = 0; } if (kids != NULL) { for (int i = 0; i < nkids; i++) nvlist_destroy(kids[i]); free(kids); } return (rc); } static int vdev_init_from_nvlist(spa_t *spa, const nvlist_t *nvlist) { uint64_t pool_guid, vdev_children; nvlist_t *vdevs = NULL, **kids = NULL; int rc, nkids; if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, NULL, &pool_guid, NULL) || nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_CHILDREN, DATA_TYPE_UINT64, NULL, &vdev_children, NULL) || nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST, NULL, &vdevs, NULL)) { printf("ZFS: can't find vdev details\n"); return (ENOENT); } /* Wrong guid?! */ if (spa->spa_guid != pool_guid) { nvlist_destroy(vdevs); return (EINVAL); } spa->spa_root_vdev->v_nchildren = vdev_children; rc = nvlist_find(vdevs, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY, &nkids, &kids, NULL); nvlist_destroy(vdevs); /* * MOS config has at least one child for root vdev. */ if (rc != 0) return (rc); for (int i = 0; i < nkids; i++) { uint64_t guid; vdev_t *vdev; rc = nvlist_find(kids[i], ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, NULL, &guid, NULL); if (rc != 0) break; vdev = vdev_find(guid); /* * Top level vdev is missing, create it. */ if (vdev == NULL) rc = vdev_from_nvlist(spa, guid, kids[i]); else rc = vdev_update_from_nvlist(guid, kids[i]); if (rc != 0) break; } if (kids != NULL) { for (int i = 0; i < nkids; i++) nvlist_destroy(kids[i]); free(kids); } /* * Re-evaluate top-level vdev state. */ vdev_set_state(spa->spa_root_vdev); return (rc); } static spa_t * spa_find_by_guid(uint64_t guid) { spa_t *spa; STAILQ_FOREACH(spa, &zfs_pools, spa_link) if (spa->spa_guid == guid) return (spa); return (NULL); } static spa_t * spa_find_by_name(const char *name) { spa_t *spa; STAILQ_FOREACH(spa, &zfs_pools, spa_link) if (strcmp(spa->spa_name, name) == 0) return (spa); return (NULL); } static spa_t * spa_find_by_dev(struct zfs_devdesc *dev) { if (dev->dd.d_dev->dv_type != DEVT_ZFS) return (NULL); if (dev->pool_guid == 0) return (STAILQ_FIRST(&zfs_pools)); return (spa_find_by_guid(dev->pool_guid)); } static spa_t * spa_create(uint64_t guid, const char *name) { spa_t *spa; if ((spa = calloc(1, sizeof(spa_t))) == NULL) return (NULL); if ((spa->spa_name = strdup(name)) == NULL) { free(spa); return (NULL); } spa->spa_uberblock = &spa->spa_uberblock_master; spa->spa_mos = &spa->spa_mos_master; spa->spa_guid = guid; spa->spa_root_vdev = vdev_create(guid, NULL); if (spa->spa_root_vdev == NULL) { free(spa->spa_name); free(spa); return (NULL); } spa->spa_root_vdev->v_name = strdup("root"); STAILQ_INSERT_TAIL(&zfs_pools, spa, spa_link); return (spa); } static const char * state_name(vdev_state_t state) { static const char *names[] = { "UNKNOWN", "CLOSED", "OFFLINE", "REMOVED", "CANT_OPEN", "FAULTED", "DEGRADED", "ONLINE" }; return (names[state]); } #ifdef BOOT2 #define pager_printf printf #else static int pager_printf(const char *fmt, ...) { char line[80]; va_list args; va_start(args, fmt); vsnprintf(line, sizeof(line), fmt, args); va_end(args); return (pager_output(line)); } #endif #define STATUS_FORMAT " %s %s\n" static int print_state(int indent, const char *name, vdev_state_t state) { int i; char buf[512]; buf[0] = 0; for (i = 0; i < indent; i++) strcat(buf, " "); strcat(buf, name); return (pager_printf(STATUS_FORMAT, buf, state_name(state))); } static int vdev_status(vdev_t *vdev, int indent) { vdev_t *kid; int ret; if (vdev->v_islog) { (void) pager_output(" logs\n"); indent++; } ret = print_state(indent, vdev->v_name, vdev->v_state); if (ret != 0) return (ret); STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { ret = vdev_status(kid, indent + 1); if (ret != 0) return (ret); } return (ret); } static int spa_status(spa_t *spa) { static char bootfs[ZFS_MAXNAMELEN]; uint64_t rootid; vdev_list_t *vlist; vdev_t *vdev; int good_kids, bad_kids, degraded_kids, ret; vdev_state_t state; ret = pager_printf(" pool: %s\n", spa->spa_name); if (ret != 0) return (ret); if (zfs_get_root(spa, &rootid) == 0 && zfs_rlookup(spa, rootid, bootfs) == 0) { if (bootfs[0] == '\0') ret = pager_printf("bootfs: %s\n", spa->spa_name); else ret = pager_printf("bootfs: %s/%s\n", spa->spa_name, bootfs); if (ret != 0) return (ret); } ret = pager_printf("config:\n\n"); if (ret != 0) return (ret); ret = pager_printf(STATUS_FORMAT, "NAME", "STATE"); if (ret != 0) return (ret); good_kids = 0; degraded_kids = 0; bad_kids = 0; vlist = &spa->spa_root_vdev->v_children; STAILQ_FOREACH(vdev, vlist, v_childlink) { if (vdev->v_state == VDEV_STATE_HEALTHY) good_kids++; else if (vdev->v_state == VDEV_STATE_DEGRADED) degraded_kids++; else bad_kids++; } state = VDEV_STATE_CLOSED; if (good_kids > 0 && (degraded_kids + bad_kids) == 0) state = VDEV_STATE_HEALTHY; else if ((good_kids + degraded_kids) > 0) state = VDEV_STATE_DEGRADED; ret = print_state(0, spa->spa_name, state); if (ret != 0) return (ret); STAILQ_FOREACH(vdev, vlist, v_childlink) { ret = vdev_status(vdev, 1); if (ret != 0) return (ret); } return (ret); } static int spa_all_status(void) { spa_t *spa; int first = 1, ret = 0; STAILQ_FOREACH(spa, &zfs_pools, spa_link) { if (!first) { ret = pager_printf("\n"); if (ret != 0) return (ret); } first = 0; ret = spa_status(spa); if (ret != 0) return (ret); } return (ret); } static uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset) { uint64_t label_offset; if (l < VDEV_LABELS / 2) label_offset = 0; else label_offset = psize - VDEV_LABELS * sizeof (vdev_label_t); return (offset + l * sizeof (vdev_label_t) + label_offset); } static int vdev_uberblock_compare(const uberblock_t *ub1, const uberblock_t *ub2) { unsigned int seq1 = 0; unsigned int seq2 = 0; int cmp = AVL_CMP(ub1->ub_txg, ub2->ub_txg); if (cmp != 0) return (cmp); cmp = AVL_CMP(ub1->ub_timestamp, ub2->ub_timestamp); if (cmp != 0) return (cmp); if (MMP_VALID(ub1) && MMP_SEQ_VALID(ub1)) seq1 = MMP_SEQ(ub1); if (MMP_VALID(ub2) && MMP_SEQ_VALID(ub2)) seq2 = MMP_SEQ(ub2); return (AVL_CMP(seq1, seq2)); } static int uberblock_verify(uberblock_t *ub) { if (ub->ub_magic == BSWAP_64((uint64_t)UBERBLOCK_MAGIC)) { byteswap_uint64_array(ub, sizeof (uberblock_t)); } if (ub->ub_magic != UBERBLOCK_MAGIC || !SPA_VERSION_IS_SUPPORTED(ub->ub_version)) return (EINVAL); return (0); } static int vdev_label_read(vdev_t *vd, int l, void *buf, uint64_t offset, size_t size) { blkptr_t bp; off_t off; off = vdev_label_offset(vd->v_psize, l, offset); BP_ZERO(&bp); BP_SET_LSIZE(&bp, size); BP_SET_PSIZE(&bp, size); BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF); DVA_SET_OFFSET(BP_IDENTITY(&bp), off); ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0); return (vdev_read_phys(vd, &bp, buf, off, size)); } /* * We do need to be sure we write to correct location. * Our vdev label does consist of 4 fields: * pad1 (8k), reserved. * bootenv (8k), checksummed, previously reserved, may contian garbage. * vdev_phys (112k), checksummed * uberblock ring (128k), checksummed. * * Since bootenv area may contain garbage, we can not reliably read it, as * we can get checksum errors. * Next best thing is vdev_phys - it is just after bootenv. It still may * be corrupted, but in such case we will miss this one write. */ static int vdev_label_write_validate(vdev_t *vd, int l, uint64_t offset) { uint64_t off, o_phys; void *buf; size_t size = VDEV_PHYS_SIZE; int rc; o_phys = offsetof(vdev_label_t, vl_vdev_phys); off = vdev_label_offset(vd->v_psize, l, o_phys); /* off should be 8K from bootenv */ if (vdev_label_offset(vd->v_psize, l, offset) + VDEV_PAD_SIZE != off) return (EINVAL); buf = malloc(size); if (buf == NULL) return (ENOMEM); /* Read vdev_phys */ rc = vdev_label_read(vd, l, buf, o_phys, size); free(buf); return (rc); } static int vdev_label_write(vdev_t *vd, int l, vdev_boot_envblock_t *be, uint64_t offset) { zio_checksum_info_t *ci; zio_cksum_t cksum; off_t off; size_t size = VDEV_PAD_SIZE; int rc; if (vd->v_phys_write == NULL) return (ENOTSUP); off = vdev_label_offset(vd->v_psize, l, offset); rc = vdev_label_write_validate(vd, l, offset); if (rc != 0) { return (rc); } ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; be->vbe_zbt.zec_magic = ZEC_MAGIC; zio_checksum_label_verifier(&be->vbe_zbt.zec_cksum, off); ci->ci_func[0](be, size, NULL, &cksum); be->vbe_zbt.zec_cksum = cksum; return (vdev_write_phys(vd, be, off, size)); } static int vdev_write_bootenv_impl(vdev_t *vdev, vdev_boot_envblock_t *be) { vdev_t *kid; int rv = 0, rc; STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { if (kid->v_state != VDEV_STATE_HEALTHY) continue; rc = vdev_write_bootenv_impl(kid, be); if (rv == 0) rv = rc; } /* * Non-leaf vdevs do not have v_phys_write. */ if (vdev->v_phys_write == NULL) return (rv); for (int l = 0; l < VDEV_LABELS; l++) { rc = vdev_label_write(vdev, l, be, offsetof(vdev_label_t, vl_be)); if (rc != 0) { printf("failed to write bootenv to %s label %d: %d\n", vdev->v_name ? vdev->v_name : "unknown", l, rc); rv = rc; } } return (rv); } int vdev_write_bootenv(vdev_t *vdev, nvlist_t *nvl) { vdev_boot_envblock_t *be; nvlist_t nv, *nvp; uint64_t version; int rv; if (nvl->nv_size > sizeof(be->vbe_bootenv)) return (E2BIG); version = VB_RAW; nvp = vdev_read_bootenv(vdev); if (nvp != NULL) { nvlist_find(nvp, BOOTENV_VERSION, DATA_TYPE_UINT64, NULL, &version, NULL); nvlist_destroy(nvp); } be = calloc(1, sizeof(*be)); if (be == NULL) return (ENOMEM); be->vbe_version = version; switch (version) { case VB_RAW: /* * If there is no envmap, we will just wipe bootenv. */ nvlist_find(nvl, GRUB_ENVMAP, DATA_TYPE_STRING, NULL, be->vbe_bootenv, NULL); rv = 0; break; case VB_NVLIST: nv.nv_header = nvl->nv_header; nv.nv_asize = nvl->nv_asize; nv.nv_size = nvl->nv_size; bcopy(&nv.nv_header, be->vbe_bootenv, sizeof(nv.nv_header)); nv.nv_data = be->vbe_bootenv + sizeof(nvs_header_t); bcopy(nvl->nv_data, nv.nv_data, nv.nv_size); rv = nvlist_export(&nv); break; default: rv = EINVAL; break; } if (rv == 0) { be->vbe_version = htobe64(be->vbe_version); rv = vdev_write_bootenv_impl(vdev, be); } free(be); return (rv); } /* * Read the bootenv area from pool label, return the nvlist from it. * We return from first successful read. */ nvlist_t * vdev_read_bootenv(vdev_t *vdev) { vdev_t *kid; nvlist_t *benv; vdev_boot_envblock_t *be; char *command; bool ok; int rv; STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { if (kid->v_state != VDEV_STATE_HEALTHY) continue; benv = vdev_read_bootenv(kid); if (benv != NULL) return (benv); } be = malloc(sizeof (*be)); if (be == NULL) return (NULL); rv = 0; for (int l = 0; l < VDEV_LABELS; l++) { rv = vdev_label_read(vdev, l, be, offsetof(vdev_label_t, vl_be), sizeof (*be)); if (rv == 0) break; } if (rv != 0) { free(be); return (NULL); } be->vbe_version = be64toh(be->vbe_version); switch (be->vbe_version) { case VB_RAW: /* * we have textual data in vbe_bootenv, create nvlist * with key "envmap". */ benv = nvlist_create(NV_UNIQUE_NAME); if (benv != NULL) { if (*be->vbe_bootenv == '\0') { nvlist_add_uint64(benv, BOOTENV_VERSION, VB_NVLIST); break; } nvlist_add_uint64(benv, BOOTENV_VERSION, VB_RAW); be->vbe_bootenv[sizeof (be->vbe_bootenv) - 1] = '\0'; nvlist_add_string(benv, GRUB_ENVMAP, be->vbe_bootenv); } break; case VB_NVLIST: benv = nvlist_import(be->vbe_bootenv, sizeof(be->vbe_bootenv)); break; default: command = (char *)be; ok = false; /* Check for legacy zfsbootcfg command string */ for (int i = 0; command[i] != '\0'; i++) { if (iscntrl(command[i])) { ok = false; break; } else { ok = true; } } benv = nvlist_create(NV_UNIQUE_NAME); if (benv != NULL) { if (ok) nvlist_add_string(benv, FREEBSD_BOOTONCE, command); else nvlist_add_uint64(benv, BOOTENV_VERSION, VB_NVLIST); } break; } free(be); return (benv); } static uint64_t vdev_get_label_asize(nvlist_t *nvl) { nvlist_t *vdevs; uint64_t asize; const char *type; int len; asize = 0; /* Get vdev tree */ if (nvlist_find(nvl, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST, NULL, &vdevs, NULL) != 0) return (asize); /* * Get vdev type. We will calculate asize for raidz, mirror and disk. * For raidz, the asize is raw size of all children. */ if (nvlist_find(vdevs, ZPOOL_CONFIG_TYPE, DATA_TYPE_STRING, NULL, &type, &len) != 0) goto done; if (memcmp(type, VDEV_TYPE_MIRROR, len) != 0 && memcmp(type, VDEV_TYPE_DISK, len) != 0 && memcmp(type, VDEV_TYPE_RAIDZ, len) != 0) goto done; if (nvlist_find(vdevs, ZPOOL_CONFIG_ASIZE, DATA_TYPE_UINT64, NULL, &asize, NULL) != 0) goto done; if (memcmp(type, VDEV_TYPE_RAIDZ, len) == 0) { nvlist_t **kids; int nkids; if (nvlist_find(vdevs, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY, &nkids, &kids, NULL) != 0) { asize = 0; goto done; } asize /= nkids; for (int i = 0; i < nkids; i++) nvlist_destroy(kids[i]); free(kids); } asize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE; done: nvlist_destroy(vdevs); return (asize); } static nvlist_t * vdev_label_read_config(vdev_t *vd, uint64_t txg) { vdev_phys_t *label; uint64_t best_txg = 0; uint64_t label_txg = 0; uint64_t asize; nvlist_t *nvl = NULL, *tmp; int error; label = malloc(sizeof (vdev_phys_t)); if (label == NULL) return (NULL); for (int l = 0; l < VDEV_LABELS; l++) { if (vdev_label_read(vd, l, label, offsetof(vdev_label_t, vl_vdev_phys), sizeof (vdev_phys_t))) continue; tmp = nvlist_import(label->vp_nvlist, sizeof(label->vp_nvlist)); if (tmp == NULL) continue; error = nvlist_find(tmp, ZPOOL_CONFIG_POOL_TXG, DATA_TYPE_UINT64, NULL, &label_txg, NULL); if (error != 0 || label_txg == 0) { nvlist_destroy(nvl); nvl = tmp; goto done; } if (label_txg <= txg && label_txg > best_txg) { best_txg = label_txg; nvlist_destroy(nvl); nvl = tmp; tmp = NULL; /* * Use asize from pool config. We need this * because we can get bad value from BIOS. */ asize = vdev_get_label_asize(nvl); if (asize != 0) { vd->v_psize = asize; } } nvlist_destroy(tmp); } if (best_txg == 0) { nvlist_destroy(nvl); nvl = NULL; } done: free(label); return (nvl); } static void vdev_uberblock_load(vdev_t *vd, uberblock_t *ub) { uberblock_t *buf; buf = malloc(VDEV_UBERBLOCK_SIZE(vd)); if (buf == NULL) return; for (int l = 0; l < VDEV_LABELS; l++) { for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) { if (vdev_label_read(vd, l, buf, VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd))) continue; if (uberblock_verify(buf) != 0) continue; if (vdev_uberblock_compare(buf, ub) > 0) *ub = *buf; } } free(buf); } static int vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv, spa_t **spap) { vdev_t vtmp; spa_t *spa; vdev_t *vdev; nvlist_t *nvl; uint64_t val; uint64_t guid, vdev_children; uint64_t pool_txg, pool_guid; const char *pool_name; int rc, namelen; /* * Load the vdev label and figure out which * uberblock is most current. */ memset(&vtmp, 0, sizeof(vtmp)); vtmp.v_phys_read = _read; vtmp.v_phys_write = _write; vtmp.v_priv = priv; vtmp.v_psize = P2ALIGN(ldi_get_size(priv), (uint64_t)sizeof (vdev_label_t)); /* Test for minimum device size. */ if (vtmp.v_psize < SPA_MINDEVSIZE) return (EIO); nvl = vdev_label_read_config(&vtmp, UINT64_MAX); if (nvl == NULL) return (EIO); if (nvlist_find(nvl, ZPOOL_CONFIG_VERSION, DATA_TYPE_UINT64, NULL, &val, NULL) != 0) { nvlist_destroy(nvl); return (EIO); } if (!SPA_VERSION_IS_SUPPORTED(val)) { printf("ZFS: unsupported ZFS version %u (should be %u)\n", (unsigned)val, (unsigned)SPA_VERSION); nvlist_destroy(nvl); return (EIO); } /* Check ZFS features for read */ rc = nvlist_check_features_for_read(nvl); if (rc != 0) { nvlist_destroy(nvl); return (EIO); } if (nvlist_find(nvl, ZPOOL_CONFIG_POOL_STATE, DATA_TYPE_UINT64, NULL, &val, NULL) != 0) { nvlist_destroy(nvl); return (EIO); } if (val == POOL_STATE_DESTROYED) { /* We don't boot only from destroyed pools. */ nvlist_destroy(nvl); return (EIO); } if (nvlist_find(nvl, ZPOOL_CONFIG_POOL_TXG, DATA_TYPE_UINT64, NULL, &pool_txg, NULL) != 0 || nvlist_find(nvl, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, NULL, &pool_guid, NULL) != 0 || nvlist_find(nvl, ZPOOL_CONFIG_POOL_NAME, DATA_TYPE_STRING, NULL, &pool_name, &namelen) != 0) { /* * Cache and spare devices end up here - just ignore * them. */ nvlist_destroy(nvl); return (EIO); } /* * Create the pool if this is the first time we've seen it. */ spa = spa_find_by_guid(pool_guid); if (spa == NULL) { char *name; nvlist_find(nvl, ZPOOL_CONFIG_VDEV_CHILDREN, DATA_TYPE_UINT64, NULL, &vdev_children, NULL); name = malloc(namelen + 1); if (name == NULL) { nvlist_destroy(nvl); return (ENOMEM); } bcopy(pool_name, name, namelen); name[namelen] = '\0'; spa = spa_create(pool_guid, name); free(name); if (spa == NULL) { nvlist_destroy(nvl); return (ENOMEM); } spa->spa_root_vdev->v_nchildren = vdev_children; } if (pool_txg > spa->spa_txg) spa->spa_txg = pool_txg; /* * Get the vdev tree and create our in-core copy of it. * If we already have a vdev with this guid, this must * be some kind of alias (overlapping slices, dangerously dedicated * disks etc). */ if (nvlist_find(nvl, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, NULL, &guid, NULL) != 0) { nvlist_destroy(nvl); return (EIO); } vdev = vdev_find(guid); /* Has this vdev already been inited? */ if (vdev && vdev->v_phys_read) { nvlist_destroy(nvl); return (EIO); } rc = vdev_init_from_label(spa, nvl); nvlist_destroy(nvl); if (rc != 0) return (rc); /* * We should already have created an incomplete vdev for this * vdev. Find it and initialise it with our read proc. */ vdev = vdev_find(guid); if (vdev != NULL) { vdev->v_phys_read = _read; vdev->v_phys_write = _write; vdev->v_priv = priv; vdev->v_psize = vtmp.v_psize; /* * If no other state is set, mark vdev healthy. */ if (vdev->v_state == VDEV_STATE_UNKNOWN) vdev->v_state = VDEV_STATE_HEALTHY; } else { printf("ZFS: inconsistent nvlist contents\n"); return (EIO); } if (vdev->v_islog) spa->spa_with_log = vdev->v_islog; /* * Re-evaluate top-level vdev state. */ vdev_set_state(vdev->v_top); /* * Ok, we are happy with the pool so far. Lets find * the best uberblock and then we can actually access * the contents of the pool. */ vdev_uberblock_load(vdev, spa->spa_uberblock); if (spap != NULL) *spap = spa; return (0); } static int ilog2(int n) { int v; for (v = 0; v < 32; v++) if (n == (1 << v)) return (v); return (-1); } static int zio_read_gang(const spa_t *spa, const blkptr_t *bp, void *buf) { blkptr_t gbh_bp; zio_gbh_phys_t zio_gb; char *pbuf; int i; /* Artificial BP for gang block header. */ gbh_bp = *bp; BP_SET_PSIZE(&gbh_bp, SPA_GANGBLOCKSIZE); BP_SET_LSIZE(&gbh_bp, SPA_GANGBLOCKSIZE); BP_SET_CHECKSUM(&gbh_bp, ZIO_CHECKSUM_GANG_HEADER); BP_SET_COMPRESS(&gbh_bp, ZIO_COMPRESS_OFF); for (i = 0; i < SPA_DVAS_PER_BP; i++) DVA_SET_GANG(&gbh_bp.blk_dva[i], 0); /* Read gang header block using the artificial BP. */ if (zio_read(spa, &gbh_bp, &zio_gb)) return (EIO); pbuf = buf; for (i = 0; i < SPA_GBH_NBLKPTRS; i++) { blkptr_t *gbp = &zio_gb.zg_blkptr[i]; if (BP_IS_HOLE(gbp)) continue; if (zio_read(spa, gbp, pbuf)) return (EIO); pbuf += BP_GET_PSIZE(gbp); } if (zio_checksum_verify(spa, bp, buf)) return (EIO); return (0); } static int zio_read(const spa_t *spa, const blkptr_t *bp, void *buf) { int cpfunc = BP_GET_COMPRESS(bp); uint64_t align, size; void *pbuf; int i, error; /* * Process data embedded in block pointer */ if (BP_IS_EMBEDDED(bp)) { ASSERT(BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA); size = BPE_GET_PSIZE(bp); ASSERT(size <= BPE_PAYLOAD_SIZE); if (cpfunc != ZIO_COMPRESS_OFF) pbuf = malloc(size); else pbuf = buf; if (pbuf == NULL) return (ENOMEM); decode_embedded_bp_compressed(bp, pbuf); error = 0; if (cpfunc != ZIO_COMPRESS_OFF) { error = zio_decompress_data(cpfunc, pbuf, size, buf, BP_GET_LSIZE(bp)); free(pbuf); } if (error != 0) printf("ZFS: i/o error - unable to decompress " "block pointer data, error %d\n", error); return (error); } error = EIO; for (i = 0; i < SPA_DVAS_PER_BP; i++) { const dva_t *dva = &bp->blk_dva[i]; vdev_t *vdev; vdev_list_t *vlist; uint64_t vdevid; off_t offset; if (!dva->dva_word[0] && !dva->dva_word[1]) continue; vdevid = DVA_GET_VDEV(dva); offset = DVA_GET_OFFSET(dva); vlist = &spa->spa_root_vdev->v_children; STAILQ_FOREACH(vdev, vlist, v_childlink) { if (vdev->v_id == vdevid) break; } if (!vdev || !vdev->v_read) continue; size = BP_GET_PSIZE(bp); if (vdev->v_read == vdev_raidz_read) { align = 1ULL << vdev->v_ashift; if (P2PHASE(size, align) != 0) size = P2ROUNDUP(size, align); } if (size != BP_GET_PSIZE(bp) || cpfunc != ZIO_COMPRESS_OFF) pbuf = malloc(size); else pbuf = buf; if (pbuf == NULL) { error = ENOMEM; break; } if (DVA_GET_GANG(dva)) error = zio_read_gang(spa, bp, pbuf); else error = vdev->v_read(vdev, bp, pbuf, offset, size); if (error == 0) { if (cpfunc != ZIO_COMPRESS_OFF) error = zio_decompress_data(cpfunc, pbuf, BP_GET_PSIZE(bp), buf, BP_GET_LSIZE(bp)); else if (size != BP_GET_PSIZE(bp)) bcopy(pbuf, buf, BP_GET_PSIZE(bp)); } else { printf("zio_read error: %d\n", error); } if (buf != pbuf) free(pbuf); if (error == 0) break; } if (error != 0) printf("ZFS: i/o error - all block copies unavailable\n"); return (error); } static int dnode_read(const spa_t *spa, const dnode_phys_t *dnode, off_t offset, void *buf, size_t buflen) { int ibshift = dnode->dn_indblkshift - SPA_BLKPTRSHIFT; int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; int nlevels = dnode->dn_nlevels; int i, rc; if (bsize > SPA_MAXBLOCKSIZE) { printf("ZFS: I/O error - blocks larger than %llu are not " "supported\n", SPA_MAXBLOCKSIZE); return (EIO); } /* * Note: bsize may not be a power of two here so we need to do an * actual divide rather than a bitshift. */ while (buflen > 0) { uint64_t bn = offset / bsize; int boff = offset % bsize; int ibn; const blkptr_t *indbp; blkptr_t bp; if (bn > dnode->dn_maxblkid) return (EIO); if (dnode == dnode_cache_obj && bn == dnode_cache_bn) goto cached; indbp = dnode->dn_blkptr; for (i = 0; i < nlevels; i++) { /* * Copy the bp from the indirect array so that * we can re-use the scratch buffer for multi-level * objects. */ ibn = bn >> ((nlevels - i - 1) * ibshift); ibn &= ((1 << ibshift) - 1); bp = indbp[ibn]; if (BP_IS_HOLE(&bp)) { memset(dnode_cache_buf, 0, bsize); break; } rc = zio_read(spa, &bp, dnode_cache_buf); if (rc) return (rc); indbp = (const blkptr_t *) dnode_cache_buf; } dnode_cache_obj = dnode; dnode_cache_bn = bn; cached: /* * The buffer contains our data block. Copy what we * need from it and loop. */ i = bsize - boff; if (i > buflen) i = buflen; memcpy(buf, &dnode_cache_buf[boff], i); buf = ((char *)buf) + i; offset += i; buflen -= i; } return (0); } /* * Lookup a value in a microzap directory. */ static int mzap_lookup(const mzap_phys_t *mz, size_t size, const char *name, uint64_t *value) { const mzap_ent_phys_t *mze; int chunks, i; /* * Microzap objects use exactly one block. Read the whole * thing. */ chunks = size / MZAP_ENT_LEN - 1; for (i = 0; i < chunks; i++) { mze = &mz->mz_chunk[i]; if (strcmp(mze->mze_name, name) == 0) { *value = mze->mze_value; return (0); } } return (ENOENT); } /* * Compare a name with a zap leaf entry. Return non-zero if the name * matches. */ static int fzap_name_equal(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc, const char *name) { size_t namelen; const zap_leaf_chunk_t *nc; const char *p; namelen = zc->l_entry.le_name_numints; nc = &ZAP_LEAF_CHUNK(zl, zc->l_entry.le_name_chunk); p = name; while (namelen > 0) { size_t len; len = namelen; if (len > ZAP_LEAF_ARRAY_BYTES) len = ZAP_LEAF_ARRAY_BYTES; if (memcmp(p, nc->l_array.la_array, len)) return (0); p += len; namelen -= len; nc = &ZAP_LEAF_CHUNK(zl, nc->l_array.la_next); } return (1); } /* * Extract a uint64_t value from a zap leaf entry. */ static uint64_t fzap_leaf_value(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc) { const zap_leaf_chunk_t *vc; int i; uint64_t value; const uint8_t *p; vc = &ZAP_LEAF_CHUNK(zl, zc->l_entry.le_value_chunk); for (i = 0, value = 0, p = vc->l_array.la_array; i < 8; i++) { value = (value << 8) | p[i]; } return (value); } static void stv(int len, void *addr, uint64_t value) { switch (len) { case 1: *(uint8_t *)addr = value; return; case 2: *(uint16_t *)addr = value; return; case 4: *(uint32_t *)addr = value; return; case 8: *(uint64_t *)addr = value; return; } } /* * Extract a array from a zap leaf entry. */ static void fzap_leaf_array(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc, uint64_t integer_size, uint64_t num_integers, void *buf) { uint64_t array_int_len = zc->l_entry.le_value_intlen; uint64_t value = 0; uint64_t *u64 = buf; char *p = buf; int len = MIN(zc->l_entry.le_value_numints, num_integers); int chunk = zc->l_entry.le_value_chunk; int byten = 0; if (integer_size == 8 && len == 1) { *u64 = fzap_leaf_value(zl, zc); return; } while (len > 0) { struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(zl, chunk).l_array; int i; ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(zl)); for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) { value = (value << 8) | la->la_array[i]; byten++; if (byten == array_int_len) { stv(integer_size, p, value); byten = 0; len--; if (len == 0) return; p += integer_size; } } chunk = la->la_next; } } static int fzap_check_size(uint64_t integer_size, uint64_t num_integers) { switch (integer_size) { case 1: case 2: case 4: case 8: break; default: return (EINVAL); } if (integer_size * num_integers > ZAP_MAXVALUELEN) return (E2BIG); return (0); } static void zap_leaf_free(zap_leaf_t *leaf) { free(leaf->l_phys); free(leaf); } static int zap_get_leaf_byblk(fat_zap_t *zap, uint64_t blk, zap_leaf_t **lp) { int bs = FZAP_BLOCK_SHIFT(zap); int err; *lp = malloc(sizeof(**lp)); if (*lp == NULL) return (ENOMEM); (*lp)->l_bs = bs; (*lp)->l_phys = malloc(1 << bs); if ((*lp)->l_phys == NULL) { free(*lp); return (ENOMEM); } err = dnode_read(zap->zap_spa, zap->zap_dnode, blk << bs, (*lp)->l_phys, 1 << bs); if (err != 0) { zap_leaf_free(*lp); } return (err); } static int zap_table_load(fat_zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp) { int bs = FZAP_BLOCK_SHIFT(zap); uint64_t blk = idx >> (bs - 3); uint64_t off = idx & ((1 << (bs - 3)) - 1); uint64_t *buf; int rc; buf = malloc(1 << zap->zap_block_shift); if (buf == NULL) return (ENOMEM); rc = dnode_read(zap->zap_spa, zap->zap_dnode, (tbl->zt_blk + blk) << bs, buf, 1 << zap->zap_block_shift); if (rc == 0) *valp = buf[off]; free(buf); return (rc); } static int zap_idx_to_blk(fat_zap_t *zap, uint64_t idx, uint64_t *valp) { if (zap->zap_phys->zap_ptrtbl.zt_numblks == 0) { *valp = ZAP_EMBEDDED_PTRTBL_ENT(zap, idx); return (0); } else { return (zap_table_load(zap, &zap->zap_phys->zap_ptrtbl, idx, valp)); } } #define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n)))) static int zap_deref_leaf(fat_zap_t *zap, uint64_t h, zap_leaf_t **lp) { uint64_t idx, blk; int err; idx = ZAP_HASH_IDX(h, zap->zap_phys->zap_ptrtbl.zt_shift); err = zap_idx_to_blk(zap, idx, &blk); if (err != 0) return (err); return (zap_get_leaf_byblk(zap, blk, lp)); } #define CHAIN_END 0xffff /* end of the chunk chain */ #define LEAF_HASH(l, h) \ ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \ ((h) >> \ (64 - ZAP_LEAF_HASH_SHIFT(l) - (l)->l_phys->l_hdr.lh_prefix_len))) #define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)]) static int zap_leaf_lookup(zap_leaf_t *zl, uint64_t hash, const char *name, uint64_t integer_size, uint64_t num_integers, void *value) { int rc; uint16_t *chunkp; struct zap_leaf_entry *le; /* * Make sure this chunk matches our hash. */ if (zl->l_phys->l_hdr.lh_prefix_len > 0 && zl->l_phys->l_hdr.lh_prefix != hash >> (64 - zl->l_phys->l_hdr.lh_prefix_len)) return (EIO); rc = ENOENT; for (chunkp = LEAF_HASH_ENTPTR(zl, hash); *chunkp != CHAIN_END; chunkp = &le->le_next) { zap_leaf_chunk_t *zc; uint16_t chunk = *chunkp; le = ZAP_LEAF_ENTRY(zl, chunk); if (le->le_hash != hash) continue; zc = &ZAP_LEAF_CHUNK(zl, chunk); if (fzap_name_equal(zl, zc, name)) { if (zc->l_entry.le_value_intlen > integer_size) { rc = EINVAL; } else { fzap_leaf_array(zl, zc, integer_size, num_integers, value); rc = 0; } break; } } return (rc); } /* * Lookup a value in a fatzap directory. */ static int fzap_lookup(const spa_t *spa, const dnode_phys_t *dnode, zap_phys_t *zh, const char *name, uint64_t integer_size, uint64_t num_integers, void *value) { int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; fat_zap_t z; zap_leaf_t *zl; uint64_t hash; int rc; if (zh->zap_magic != ZAP_MAGIC) return (EIO); if ((rc = fzap_check_size(integer_size, num_integers)) != 0) { return (rc); } z.zap_block_shift = ilog2(bsize); z.zap_phys = zh; z.zap_spa = spa; z.zap_dnode = dnode; hash = zap_hash(zh->zap_salt, name); rc = zap_deref_leaf(&z, hash, &zl); if (rc != 0) return (rc); rc = zap_leaf_lookup(zl, hash, name, integer_size, num_integers, value); zap_leaf_free(zl); return (rc); } /* * Lookup a name in a zap object and return its value as a uint64_t. */ static int zap_lookup(const spa_t *spa, const dnode_phys_t *dnode, const char *name, uint64_t integer_size, uint64_t num_integers, void *value) { int rc; zap_phys_t *zap; size_t size = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; zap = malloc(size); if (zap == NULL) return (ENOMEM); rc = dnode_read(spa, dnode, 0, zap, size); if (rc) goto done; switch (zap->zap_block_type) { case ZBT_MICRO: rc = mzap_lookup((const mzap_phys_t *)zap, size, name, value); break; case ZBT_HEADER: rc = fzap_lookup(spa, dnode, zap, name, integer_size, num_integers, value); break; default: printf("ZFS: invalid zap_type=%" PRIx64 "\n", zap->zap_block_type); rc = EIO; } done: free(zap); return (rc); } /* * List a microzap directory. */ static int mzap_list(const mzap_phys_t *mz, size_t size, int (*callback)(const char *, uint64_t)) { const mzap_ent_phys_t *mze; int chunks, i, rc; /* * Microzap objects use exactly one block. Read the whole * thing. */ rc = 0; chunks = size / MZAP_ENT_LEN - 1; for (i = 0; i < chunks; i++) { mze = &mz->mz_chunk[i]; if (mze->mze_name[0]) { rc = callback(mze->mze_name, mze->mze_value); if (rc != 0) break; } } return (rc); } /* * List a fatzap directory. */ static int fzap_list(const spa_t *spa, const dnode_phys_t *dnode, zap_phys_t *zh, int (*callback)(const char *, uint64_t)) { int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; fat_zap_t z; uint64_t i; int j, rc; if (zh->zap_magic != ZAP_MAGIC) return (EIO); z.zap_block_shift = ilog2(bsize); z.zap_phys = zh; /* * This assumes that the leaf blocks start at block 1. The * documentation isn't exactly clear on this. */ zap_leaf_t zl; zl.l_bs = z.zap_block_shift; zl.l_phys = malloc(bsize); if (zl.l_phys == NULL) return (ENOMEM); for (i = 0; i < zh->zap_num_leafs; i++) { off_t off = ((off_t)(i + 1)) << zl.l_bs; char name[256], *p; uint64_t value; if (dnode_read(spa, dnode, off, zl.l_phys, bsize)) { free(zl.l_phys); return (EIO); } for (j = 0; j < ZAP_LEAF_NUMCHUNKS(&zl); j++) { zap_leaf_chunk_t *zc, *nc; int namelen; zc = &ZAP_LEAF_CHUNK(&zl, j); if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY) continue; namelen = zc->l_entry.le_name_numints; if (namelen > sizeof(name)) namelen = sizeof(name); /* * Paste the name back together. */ nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk); p = name; while (namelen > 0) { int len; len = namelen; if (len > ZAP_LEAF_ARRAY_BYTES) len = ZAP_LEAF_ARRAY_BYTES; memcpy(p, nc->l_array.la_array, len); p += len; namelen -= len; nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next); } /* * Assume the first eight bytes of the value are * a uint64_t. */ value = fzap_leaf_value(&zl, zc); /* printf("%s 0x%jx\n", name, (uintmax_t)value); */ rc = callback((const char *)name, value); if (rc != 0) { free(zl.l_phys); return (rc); } } } free(zl.l_phys); return (0); } static int zfs_printf(const char *name, uint64_t value __unused) { printf("%s\n", name); return (0); } /* * List a zap directory. */ static int zap_list(const spa_t *spa, const dnode_phys_t *dnode) { zap_phys_t *zap; size_t size = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; int rc; zap = malloc(size); if (zap == NULL) return (ENOMEM); rc = dnode_read(spa, dnode, 0, zap, size); if (rc == 0) { if (zap->zap_block_type == ZBT_MICRO) rc = mzap_list((const mzap_phys_t *)zap, size, zfs_printf); else rc = fzap_list(spa, dnode, zap, zfs_printf); } free(zap); return (rc); } static int objset_get_dnode(const spa_t *spa, const objset_phys_t *os, uint64_t objnum, dnode_phys_t *dnode) { off_t offset; offset = objnum * sizeof(dnode_phys_t); return dnode_read(spa, &os->os_meta_dnode, offset, dnode, sizeof(dnode_phys_t)); } /* * Lookup a name in a microzap directory. */ static int mzap_rlookup(const mzap_phys_t *mz, size_t size, char *name, uint64_t value) { const mzap_ent_phys_t *mze; int chunks, i; /* * Microzap objects use exactly one block. Read the whole * thing. */ chunks = size / MZAP_ENT_LEN - 1; for (i = 0; i < chunks; i++) { mze = &mz->mz_chunk[i]; if (value == mze->mze_value) { strcpy(name, mze->mze_name); return (0); } } return (ENOENT); } static void fzap_name_copy(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc, char *name) { size_t namelen; const zap_leaf_chunk_t *nc; char *p; namelen = zc->l_entry.le_name_numints; nc = &ZAP_LEAF_CHUNK(zl, zc->l_entry.le_name_chunk); p = name; while (namelen > 0) { size_t len; len = namelen; if (len > ZAP_LEAF_ARRAY_BYTES) len = ZAP_LEAF_ARRAY_BYTES; memcpy(p, nc->l_array.la_array, len); p += len; namelen -= len; nc = &ZAP_LEAF_CHUNK(zl, nc->l_array.la_next); } *p = '\0'; } static int fzap_rlookup(const spa_t *spa, const dnode_phys_t *dnode, zap_phys_t *zh, char *name, uint64_t value) { int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; fat_zap_t z; uint64_t i; int j, rc; if (zh->zap_magic != ZAP_MAGIC) return (EIO); z.zap_block_shift = ilog2(bsize); z.zap_phys = zh; /* * This assumes that the leaf blocks start at block 1. The * documentation isn't exactly clear on this. */ zap_leaf_t zl; zl.l_bs = z.zap_block_shift; zl.l_phys = malloc(bsize); if (zl.l_phys == NULL) return (ENOMEM); for (i = 0; i < zh->zap_num_leafs; i++) { off_t off = ((off_t)(i + 1)) << zl.l_bs; rc = dnode_read(spa, dnode, off, zl.l_phys, bsize); if (rc != 0) goto done; for (j = 0; j < ZAP_LEAF_NUMCHUNKS(&zl); j++) { zap_leaf_chunk_t *zc; zc = &ZAP_LEAF_CHUNK(&zl, j); if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY) continue; if (zc->l_entry.le_value_intlen != 8 || zc->l_entry.le_value_numints != 1) continue; if (fzap_leaf_value(&zl, zc) == value) { fzap_name_copy(&zl, zc, name); goto done; } } } rc = ENOENT; done: free(zl.l_phys); return (rc); } static int zap_rlookup(const spa_t *spa, const dnode_phys_t *dnode, char *name, uint64_t value) { zap_phys_t *zap; size_t size = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT; int rc; zap = malloc(size); if (zap == NULL) return (ENOMEM); rc = dnode_read(spa, dnode, 0, zap, size); if (rc == 0) { if (zap->zap_block_type == ZBT_MICRO) rc = mzap_rlookup((const mzap_phys_t *)zap, size, name, value); else rc = fzap_rlookup(spa, dnode, zap, name, value); } free(zap); return (rc); } static int zfs_rlookup(const spa_t *spa, uint64_t objnum, char *result) { char name[256]; char component[256]; uint64_t dir_obj, parent_obj, child_dir_zapobj; dnode_phys_t child_dir_zap, dataset, dir, parent; dsl_dir_phys_t *dd; dsl_dataset_phys_t *ds; char *p; int len; p = &name[sizeof(name) - 1]; *p = '\0'; if (objset_get_dnode(spa, spa->spa_mos, objnum, &dataset)) { printf("ZFS: can't find dataset %ju\n", (uintmax_t)objnum); return (EIO); } ds = (dsl_dataset_phys_t *)&dataset.dn_bonus; dir_obj = ds->ds_dir_obj; for (;;) { if (objset_get_dnode(spa, spa->spa_mos, dir_obj, &dir) != 0) return (EIO); dd = (dsl_dir_phys_t *)&dir.dn_bonus; /* Actual loop condition. */ parent_obj = dd->dd_parent_obj; if (parent_obj == 0) break; if (objset_get_dnode(spa, spa->spa_mos, parent_obj, &parent) != 0) return (EIO); dd = (dsl_dir_phys_t *)&parent.dn_bonus; child_dir_zapobj = dd->dd_child_dir_zapobj; if (objset_get_dnode(spa, spa->spa_mos, child_dir_zapobj, &child_dir_zap) != 0) return (EIO); if (zap_rlookup(spa, &child_dir_zap, component, dir_obj) != 0) return (EIO); len = strlen(component); p -= len; memcpy(p, component, len); --p; *p = '/'; /* Actual loop iteration. */ dir_obj = parent_obj; } if (*p != '\0') ++p; strcpy(result, p); return (0); } static int zfs_lookup_dataset(const spa_t *spa, const char *name, uint64_t *objnum) { char element[256]; uint64_t dir_obj, child_dir_zapobj; dnode_phys_t child_dir_zap, dir; dsl_dir_phys_t *dd; const char *p, *q; if (objset_get_dnode(spa, spa->spa_mos, DMU_POOL_DIRECTORY_OBJECT, &dir)) return (EIO); if (zap_lookup(spa, &dir, DMU_POOL_ROOT_DATASET, sizeof (dir_obj), 1, &dir_obj)) return (EIO); p = name; for (;;) { if (objset_get_dnode(spa, spa->spa_mos, dir_obj, &dir)) return (EIO); dd = (dsl_dir_phys_t *)&dir.dn_bonus; while (*p == '/') p++; /* Actual loop condition #1. */ if (*p == '\0') break; q = strchr(p, '/'); if (q) { memcpy(element, p, q - p); element[q - p] = '\0'; p = q + 1; } else { strcpy(element, p); p += strlen(p); } child_dir_zapobj = dd->dd_child_dir_zapobj; if (objset_get_dnode(spa, spa->spa_mos, child_dir_zapobj, &child_dir_zap) != 0) return (EIO); /* Actual loop condition #2. */ if (zap_lookup(spa, &child_dir_zap, element, sizeof (dir_obj), 1, &dir_obj) != 0) return (ENOENT); } *objnum = dd->dd_head_dataset_obj; return (0); } #ifndef BOOT2 static int zfs_list_dataset(const spa_t *spa, uint64_t objnum/*, int pos, char *entry*/) { uint64_t dir_obj, child_dir_zapobj; dnode_phys_t child_dir_zap, dir, dataset; dsl_dataset_phys_t *ds; dsl_dir_phys_t *dd; if (objset_get_dnode(spa, spa->spa_mos, objnum, &dataset)) { printf("ZFS: can't find dataset %ju\n", (uintmax_t)objnum); return (EIO); } ds = (dsl_dataset_phys_t *)&dataset.dn_bonus; dir_obj = ds->ds_dir_obj; if (objset_get_dnode(spa, spa->spa_mos, dir_obj, &dir)) { printf("ZFS: can't find dirobj %ju\n", (uintmax_t)dir_obj); return (EIO); } dd = (dsl_dir_phys_t *)&dir.dn_bonus; child_dir_zapobj = dd->dd_child_dir_zapobj; if (objset_get_dnode(spa, spa->spa_mos, child_dir_zapobj, &child_dir_zap) != 0) { printf("ZFS: can't find child zap %ju\n", (uintmax_t)dir_obj); return (EIO); } return (zap_list(spa, &child_dir_zap) != 0); } int zfs_callback_dataset(const spa_t *spa, uint64_t objnum, int (*callback)(const char *, uint64_t)) { uint64_t dir_obj, child_dir_zapobj; dnode_phys_t child_dir_zap, dir, dataset; dsl_dataset_phys_t *ds; dsl_dir_phys_t *dd; zap_phys_t *zap; size_t size; int err; err = objset_get_dnode(spa, spa->spa_mos, objnum, &dataset); if (err != 0) { printf("ZFS: can't find dataset %ju\n", (uintmax_t)objnum); return (err); } ds = (dsl_dataset_phys_t *)&dataset.dn_bonus; dir_obj = ds->ds_dir_obj; err = objset_get_dnode(spa, spa->spa_mos, dir_obj, &dir); if (err != 0) { printf("ZFS: can't find dirobj %ju\n", (uintmax_t)dir_obj); return (err); } dd = (dsl_dir_phys_t *)&dir.dn_bonus; child_dir_zapobj = dd->dd_child_dir_zapobj; err = objset_get_dnode(spa, spa->spa_mos, child_dir_zapobj, &child_dir_zap); if (err != 0) { printf("ZFS: can't find child zap %ju\n", (uintmax_t)dir_obj); return (err); } size = child_dir_zap.dn_datablkszsec << SPA_MINBLOCKSHIFT; zap = malloc(size); if (zap != NULL) { err = dnode_read(spa, &child_dir_zap, 0, zap, size); if (err != 0) goto done; if (zap->zap_block_type == ZBT_MICRO) err = mzap_list((const mzap_phys_t *)zap, size, callback); else err = fzap_list(spa, &child_dir_zap, zap, callback); } else { err = ENOMEM; } done: free(zap); return (err); } #endif /* * Find the object set given the object number of its dataset object * and return its details in *objset */ static int zfs_mount_dataset(const spa_t *spa, uint64_t objnum, objset_phys_t *objset) { dnode_phys_t dataset; dsl_dataset_phys_t *ds; if (objset_get_dnode(spa, spa->spa_mos, objnum, &dataset)) { printf("ZFS: can't find dataset %ju\n", (uintmax_t)objnum); return (EIO); } ds = (dsl_dataset_phys_t *)&dataset.dn_bonus; if (zio_read(spa, &ds->ds_bp, objset)) { printf("ZFS: can't read object set for dataset %ju\n", (uintmax_t)objnum); return (EIO); } return (0); } /* * Find the object set pointed to by the BOOTFS property or the root * dataset if there is none and return its details in *objset */ static int zfs_get_root(const spa_t *spa, uint64_t *objid) { dnode_phys_t dir, propdir; uint64_t props, bootfs, root; *objid = 0; /* * Start with the MOS directory object. */ if (objset_get_dnode(spa, spa->spa_mos, DMU_POOL_DIRECTORY_OBJECT, &dir)) { printf("ZFS: can't read MOS object directory\n"); return (EIO); } /* * Lookup the pool_props and see if we can find a bootfs. */ if (zap_lookup(spa, &dir, DMU_POOL_PROPS, sizeof(props), 1, &props) == 0 && objset_get_dnode(spa, spa->spa_mos, props, &propdir) == 0 && zap_lookup(spa, &propdir, "bootfs", sizeof(bootfs), 1, &bootfs) == 0 && bootfs != 0) { *objid = bootfs; return (0); } /* * Lookup the root dataset directory */ if (zap_lookup(spa, &dir, DMU_POOL_ROOT_DATASET, sizeof(root), 1, &root) || objset_get_dnode(spa, spa->spa_mos, root, &dir)) { printf("ZFS: can't find root dsl_dir\n"); return (EIO); } /* * Use the information from the dataset directory's bonus buffer * to find the dataset object and from that the object set itself. */ dsl_dir_phys_t *dd = (dsl_dir_phys_t *)&dir.dn_bonus; *objid = dd->dd_head_dataset_obj; return (0); } static int -zfs_mount(const spa_t *spa, uint64_t rootobj, struct zfsmount *mount) +zfs_mount_impl(const spa_t *spa, uint64_t rootobj, struct zfsmount *mount) { mount->spa = spa; /* * Find the root object set if not explicitly provided */ if (rootobj == 0 && zfs_get_root(spa, &rootobj)) { printf("ZFS: can't find root filesystem\n"); return (EIO); } if (zfs_mount_dataset(spa, rootobj, &mount->objset)) { printf("ZFS: can't open root filesystem\n"); return (EIO); } mount->rootobj = rootobj; return (0); } /* * callback function for feature name checks. */ static int check_feature(const char *name, uint64_t value) { int i; if (value == 0) return (0); if (name[0] == '\0') return (0); for (i = 0; features_for_read[i] != NULL; i++) { if (strcmp(name, features_for_read[i]) == 0) return (0); } printf("ZFS: unsupported feature: %s\n", name); return (EIO); } /* * Checks whether the MOS features that are active are supported. */ static int check_mos_features(const spa_t *spa) { dnode_phys_t dir; zap_phys_t *zap; uint64_t objnum; size_t size; int rc; if ((rc = objset_get_dnode(spa, spa->spa_mos, DMU_OT_OBJECT_DIRECTORY, &dir)) != 0) return (rc); if ((rc = zap_lookup(spa, &dir, DMU_POOL_FEATURES_FOR_READ, sizeof (objnum), 1, &objnum)) != 0) { /* * It is older pool without features. As we have already * tested the label, just return without raising the error. */ return (0); } if ((rc = objset_get_dnode(spa, spa->spa_mos, objnum, &dir)) != 0) return (rc); if (dir.dn_type != DMU_OTN_ZAP_METADATA) return (EIO); size = dir.dn_datablkszsec << SPA_MINBLOCKSHIFT; zap = malloc(size); if (zap == NULL) return (ENOMEM); if (dnode_read(spa, &dir, 0, zap, size)) { free(zap); return (EIO); } if (zap->zap_block_type == ZBT_MICRO) rc = mzap_list((const mzap_phys_t *)zap, size, check_feature); else rc = fzap_list(spa, &dir, zap, check_feature); free(zap); return (rc); } static int load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) { dnode_phys_t dir; size_t size; int rc; char *nv; *value = NULL; if ((rc = objset_get_dnode(spa, spa->spa_mos, obj, &dir)) != 0) return (rc); if (dir.dn_type != DMU_OT_PACKED_NVLIST && dir.dn_bonustype != DMU_OT_PACKED_NVLIST_SIZE) { return (EIO); } if (dir.dn_bonuslen != sizeof (uint64_t)) return (EIO); size = *(uint64_t *)DN_BONUS(&dir); nv = malloc(size); if (nv == NULL) return (ENOMEM); rc = dnode_read(spa, &dir, 0, nv, size); if (rc != 0) { free(nv); nv = NULL; return (rc); } *value = nvlist_import(nv, size); free(nv); return (rc); } static int zfs_spa_init(spa_t *spa) { struct uberblock checkpoint; dnode_phys_t dir; uint64_t config_object; nvlist_t *nvlist; int rc; if (zio_read(spa, &spa->spa_uberblock->ub_rootbp, spa->spa_mos)) { printf("ZFS: can't read MOS of pool %s\n", spa->spa_name); return (EIO); } if (spa->spa_mos->os_type != DMU_OST_META) { printf("ZFS: corrupted MOS of pool %s\n", spa->spa_name); return (EIO); } if (objset_get_dnode(spa, &spa->spa_mos_master, DMU_POOL_DIRECTORY_OBJECT, &dir)) { printf("ZFS: failed to read pool %s directory object\n", spa->spa_name); return (EIO); } /* this is allowed to fail, older pools do not have salt */ rc = zap_lookup(spa, &dir, DMU_POOL_CHECKSUM_SALT, 1, sizeof (spa->spa_cksum_salt.zcs_bytes), spa->spa_cksum_salt.zcs_bytes); rc = check_mos_features(spa); if (rc != 0) { printf("ZFS: pool %s is not supported\n", spa->spa_name); return (rc); } rc = zap_lookup(spa, &dir, DMU_POOL_CONFIG, sizeof (config_object), 1, &config_object); if (rc != 0) { printf("ZFS: can not read MOS %s\n", DMU_POOL_CONFIG); return (EIO); } rc = load_nvlist(spa, config_object, &nvlist); if (rc != 0) return (rc); rc = zap_lookup(spa, &dir, DMU_POOL_ZPOOL_CHECKPOINT, sizeof(uint64_t), sizeof(checkpoint) / sizeof(uint64_t), &checkpoint); if (rc == 0 && checkpoint.ub_checkpoint_txg != 0) { memcpy(&spa->spa_uberblock_checkpoint, &checkpoint, sizeof(checkpoint)); if (zio_read(spa, &spa->spa_uberblock_checkpoint.ub_rootbp, &spa->spa_mos_checkpoint)) { printf("ZFS: can not read checkpoint data.\n"); return (EIO); } } /* * Update vdevs from MOS config. Note, we do skip encoding bytes * here. See also vdev_label_read_config(). */ rc = vdev_init_from_nvlist(spa, nvlist); nvlist_destroy(nvlist); return (rc); } static int zfs_dnode_stat(const spa_t *spa, dnode_phys_t *dn, struct stat *sb) { if (dn->dn_bonustype != DMU_OT_SA) { znode_phys_t *zp = (znode_phys_t *)dn->dn_bonus; sb->st_mode = zp->zp_mode; sb->st_uid = zp->zp_uid; sb->st_gid = zp->zp_gid; sb->st_size = zp->zp_size; } else { sa_hdr_phys_t *sahdrp; int hdrsize; size_t size = 0; void *buf = NULL; if (dn->dn_bonuslen != 0) sahdrp = (sa_hdr_phys_t *)DN_BONUS(dn); else { if ((dn->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0) { blkptr_t *bp = DN_SPILL_BLKPTR(dn); int error; size = BP_GET_LSIZE(bp); buf = malloc(size); if (buf == NULL) error = ENOMEM; else error = zio_read(spa, bp, buf); if (error != 0) { free(buf); return (error); } sahdrp = buf; } else { return (EIO); } } hdrsize = SA_HDR_SIZE(sahdrp); sb->st_mode = *(uint64_t *)((char *)sahdrp + hdrsize + SA_MODE_OFFSET); sb->st_uid = *(uint64_t *)((char *)sahdrp + hdrsize + SA_UID_OFFSET); sb->st_gid = *(uint64_t *)((char *)sahdrp + hdrsize + SA_GID_OFFSET); sb->st_size = *(uint64_t *)((char *)sahdrp + hdrsize + SA_SIZE_OFFSET); free(buf); } return (0); } static int zfs_dnode_readlink(const spa_t *spa, dnode_phys_t *dn, char *path, size_t psize) { int rc = 0; if (dn->dn_bonustype == DMU_OT_SA) { sa_hdr_phys_t *sahdrp = NULL; size_t size = 0; void *buf = NULL; int hdrsize; char *p; if (dn->dn_bonuslen != 0) { sahdrp = (sa_hdr_phys_t *)DN_BONUS(dn); } else { blkptr_t *bp; if ((dn->dn_flags & DNODE_FLAG_SPILL_BLKPTR) == 0) return (EIO); bp = DN_SPILL_BLKPTR(dn); size = BP_GET_LSIZE(bp); buf = malloc(size); if (buf == NULL) rc = ENOMEM; else rc = zio_read(spa, bp, buf); if (rc != 0) { free(buf); return (rc); } sahdrp = buf; } hdrsize = SA_HDR_SIZE(sahdrp); p = (char *)((uintptr_t)sahdrp + hdrsize + SA_SYMLINK_OFFSET); memcpy(path, p, psize); free(buf); return (0); } /* * Second test is purely to silence bogus compiler * warning about accessing past the end of dn_bonus. */ if (psize + sizeof(znode_phys_t) <= dn->dn_bonuslen && sizeof(znode_phys_t) <= sizeof(dn->dn_bonus)) { memcpy(path, &dn->dn_bonus[sizeof(znode_phys_t)], psize); } else { rc = dnode_read(spa, dn, 0, path, psize); } return (rc); } struct obj_list { uint64_t objnum; STAILQ_ENTRY(obj_list) entry; }; /* * Lookup a file and return its dnode. */ static int zfs_lookup(const struct zfsmount *mount, const char *upath, dnode_phys_t *dnode) { int rc; uint64_t objnum; const spa_t *spa; dnode_phys_t dn; const char *p, *q; char element[256]; char path[1024]; int symlinks_followed = 0; struct stat sb; struct obj_list *entry, *tentry; STAILQ_HEAD(, obj_list) on_cache = STAILQ_HEAD_INITIALIZER(on_cache); spa = mount->spa; if (mount->objset.os_type != DMU_OST_ZFS) { printf("ZFS: unexpected object set type %ju\n", (uintmax_t)mount->objset.os_type); return (EIO); } if ((entry = malloc(sizeof(struct obj_list))) == NULL) return (ENOMEM); /* * Get the root directory dnode. */ rc = objset_get_dnode(spa, &mount->objset, MASTER_NODE_OBJ, &dn); if (rc) { free(entry); return (rc); } rc = zap_lookup(spa, &dn, ZFS_ROOT_OBJ, sizeof(objnum), 1, &objnum); if (rc) { free(entry); return (rc); } entry->objnum = objnum; STAILQ_INSERT_HEAD(&on_cache, entry, entry); rc = objset_get_dnode(spa, &mount->objset, objnum, &dn); if (rc != 0) goto done; p = upath; while (p && *p) { rc = objset_get_dnode(spa, &mount->objset, objnum, &dn); if (rc != 0) goto done; while (*p == '/') p++; if (*p == '\0') break; q = p; while (*q != '\0' && *q != '/') q++; /* skip dot */ if (p + 1 == q && p[0] == '.') { p++; continue; } /* double dot */ if (p + 2 == q && p[0] == '.' && p[1] == '.') { p += 2; if (STAILQ_FIRST(&on_cache) == STAILQ_LAST(&on_cache, obj_list, entry)) { rc = ENOENT; goto done; } entry = STAILQ_FIRST(&on_cache); STAILQ_REMOVE_HEAD(&on_cache, entry); free(entry); objnum = (STAILQ_FIRST(&on_cache))->objnum; continue; } if (q - p + 1 > sizeof(element)) { rc = ENAMETOOLONG; goto done; } memcpy(element, p, q - p); element[q - p] = 0; p = q; if ((rc = zfs_dnode_stat(spa, &dn, &sb)) != 0) goto done; if (!S_ISDIR(sb.st_mode)) { rc = ENOTDIR; goto done; } rc = zap_lookup(spa, &dn, element, sizeof (objnum), 1, &objnum); if (rc) goto done; objnum = ZFS_DIRENT_OBJ(objnum); if ((entry = malloc(sizeof(struct obj_list))) == NULL) { rc = ENOMEM; goto done; } entry->objnum = objnum; STAILQ_INSERT_HEAD(&on_cache, entry, entry); rc = objset_get_dnode(spa, &mount->objset, objnum, &dn); if (rc) goto done; /* * Check for symlink. */ rc = zfs_dnode_stat(spa, &dn, &sb); if (rc) goto done; if (S_ISLNK(sb.st_mode)) { if (symlinks_followed > 10) { rc = EMLINK; goto done; } symlinks_followed++; /* * Read the link value and copy the tail of our * current path onto the end. */ if (sb.st_size + strlen(p) + 1 > sizeof(path)) { rc = ENAMETOOLONG; goto done; } strcpy(&path[sb.st_size], p); rc = zfs_dnode_readlink(spa, &dn, path, sb.st_size); if (rc != 0) goto done; /* * Restart with the new path, starting either at * the root or at the parent depending whether or * not the link is relative. */ p = path; if (*p == '/') { while (STAILQ_FIRST(&on_cache) != STAILQ_LAST(&on_cache, obj_list, entry)) { entry = STAILQ_FIRST(&on_cache); STAILQ_REMOVE_HEAD(&on_cache, entry); free(entry); } } else { entry = STAILQ_FIRST(&on_cache); STAILQ_REMOVE_HEAD(&on_cache, entry); free(entry); } objnum = (STAILQ_FIRST(&on_cache))->objnum; } } *dnode = dn; done: STAILQ_FOREACH_SAFE(entry, &on_cache, entry, tentry) free(entry); return (rc); } diff --git a/stand/mips/beri/loader/devicename.c b/stand/mips/beri/loader/devicename.c index 89eee32567a8..b1f0afd5bc1f 100644 --- a/stand/mips/beri/loader/devicename.c +++ b/stand/mips/beri/loader/devicename.c @@ -1,206 +1,206 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include "bootstrap.h" #include "disk.h" static int beri_arch_parsedev(struct disk_devdesc **dev, const char *devspec, const char **path); /* * Point (dev) at an allocated device specifier for the device matching the * path in (devspec). If it contains an explicit device specification, * use that. If not, use the default device. */ int beri_arch_getdev(void **vdev, const char *devspec, const char **path) { struct disk_devdesc **dev = (struct disk_devdesc **)vdev; int rv; /* * If it looks like this is just a path and no * device, go with the current device. */ if ((devspec == NULL) || (devspec[0] == '/') || (strchr(devspec, ':') == NULL)) { if (((rv = beri_arch_parsedev(dev, getenv("currdev"), NULL)) == 0) && (path != NULL)) *path = devspec; return(rv); } /* * Try to parse the device name off the beginning of the devspec */ return(beri_arch_parsedev(dev, devspec, path)); } /* * Point (dev) at an allocated device specifier matching the string version * at the beginning of (devspec). Return a pointer to the remaining * text in (path). * * In all cases, the beginning of (devspec) is compared to the names * of known devices in the device switch, and then any following text * is parsed according to the rules applied to the device type. * * For disk-type devices, the syntax is: * * disk[s][]: * */ static int beri_arch_parsedev(struct disk_devdesc **dev, const char *devspec, const char **path) { struct disk_devdesc *idev; struct devsw *dv; int i, unit, err; char *cp; const char *np; /* minimum length check */ if (strlen(devspec) < 2) return(EINVAL); /* look for a device that matches */ for (i = 0, dv = NULL; devsw[i] != NULL; i++) { if (!strncmp(devspec, devsw[i]->dv_name, strlen(devsw[i]->dv_name))) { dv = devsw[i]; break; } } if (dv == NULL) return(ENOENT); idev = malloc(sizeof(struct disk_devdesc)); err = 0; np = (devspec + strlen(dv->dv_name)); switch(dv->dv_type) { case DEVT_NONE: /* XXX what to do here? Do we care? */ break; case DEVT_DISK: err = disk_parsedev(idev, np, path); if (err != 0) goto fail; break; case DEVT_CD: case DEVT_NET: case DEVT_ZFS: unit = 0; if (*np && (*np != ':')) { unit = strtol(np, &cp, 0); /* get unit number if present */ if (cp == np) { err = EUNIT; goto fail; } } else { err = EUNIT; goto fail; } if (*cp && (*cp != ':')) { err = EINVAL; goto fail; } idev->dd.d_unit = unit; if (path != NULL) *path = (*cp == 0) ? cp : cp + 1; break; default: err = EINVAL; goto fail; } idev->dd.d_dev = dv; if (dev == NULL) { free(idev); } else { *dev = idev; } return(0); fail: free(idev); return(err); } char * beri_arch_fmtdev(void *vdev) { struct disk_devdesc *dev = (struct disk_devdesc *)vdev; static char buf[128]; /* XXX device length constant? */ switch(dev->dd.d_dev->dv_type) { case DEVT_NONE: strcpy(buf, "(no device)"); break; case DEVT_CD: sprintf(buf, "%s%d:", dev->dd.d_dev->dv_name, dev->dd.d_unit); break; case DEVT_DISK: return (disk_fmtdev(vdev)); case DEVT_NET: case DEVT_ZFS: sprintf(buf, "%s%d:", dev->dd.d_dev->dv_name, dev->dd.d_unit); break; } return(buf); } /* * Set currdev to suit the value being supplied in (value) */ int beri_arch_setcurrdev(struct env_var *ev, int flags, const void *value) { - struct disk_devdesc *ncurr; - int rv; + struct disk_devdesc *ncurr; + int rv; - if ((rv = beri_arch_parsedev(&ncurr, value, NULL)) != 0) - return(rv); - free(ncurr); - env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL); - return(0); + if ((rv = beri_arch_parsedev(&ncurr, value, NULL)) != 0) + return (rv); + free(ncurr); + + return (mount_currdev(ev, flags, value)); } diff --git a/stand/powerpc/kboot/main.c b/stand/powerpc/kboot/main.c index 10a5e89adb9b..d25b6216f421 100644 --- a/stand/powerpc/kboot/main.c +++ b/stand/powerpc/kboot/main.c @@ -1,514 +1,513 @@ /*- * Copyright (C) 2010-2014 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "host_syscall.h" struct arch_switch archsw; extern void *_end; int kboot_getdev(void **vdev, const char *devspec, const char **path); ssize_t kboot_copyin(const void *src, vm_offset_t dest, const size_t len); ssize_t kboot_copyout(vm_offset_t src, void *dest, const size_t len); ssize_t kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len); int kboot_autoload(void); uint64_t kboot_loadaddr(u_int type, void *data, uint64_t addr); -int kboot_setcurrdev(struct env_var *ev, int flags, const void *value); static void kboot_kseg_get(int *nseg, void **ptr); extern int command_fdt_internal(int argc, char *argv[]); struct region_desc { uint64_t start; uint64_t end; }; static uint64_t kboot_get_phys_load_segment(void) { int fd; uint64_t entry[2]; static uint64_t load_segment = ~(0UL); uint64_t val_64; uint32_t val_32; struct region_desc rsvd_reg[32]; int rsvd_reg_cnt = 0; int ret, a, b; uint64_t start, end; if (load_segment == ~(0UL)) { /* Default load address is 0x00000000 */ load_segment = 0UL; /* Read reserved regions */ fd = host_open("/proc/device-tree/reserved-ranges", O_RDONLY, 0); if (fd >= 0) { while (host_read(fd, &entry[0], sizeof(entry)) == sizeof(entry)) { rsvd_reg[rsvd_reg_cnt].start = be64toh(entry[0]); rsvd_reg[rsvd_reg_cnt].end = be64toh(entry[1]) + rsvd_reg[rsvd_reg_cnt].start - 1; rsvd_reg_cnt++; } host_close(fd); } /* Read where the kernel ends */ fd = host_open("/proc/device-tree/chosen/linux,kernel-end", O_RDONLY, 0); if (fd >= 0) { ret = host_read(fd, &val_64, sizeof(val_64)); if (ret == sizeof(uint64_t)) { rsvd_reg[rsvd_reg_cnt].start = 0; rsvd_reg[rsvd_reg_cnt].end = be64toh(val_64) - 1; } else { memcpy(&val_32, &val_64, sizeof(val_32)); rsvd_reg[rsvd_reg_cnt].start = 0; rsvd_reg[rsvd_reg_cnt].end = be32toh(val_32) - 1; } rsvd_reg_cnt++; host_close(fd); } /* Read memory size (SOCKET0 only) */ fd = host_open("/proc/device-tree/memory@0/reg", O_RDONLY, 0); if (fd < 0) fd = host_open("/proc/device-tree/memory/reg", O_RDONLY, 0); if (fd >= 0) { ret = host_read(fd, &entry, sizeof(entry)); /* Memory range in start:length format */ entry[0] = be64toh(entry[0]); entry[1] = be64toh(entry[1]); /* Reserve everything what is before start */ if (entry[0] != 0) { rsvd_reg[rsvd_reg_cnt].start = 0; rsvd_reg[rsvd_reg_cnt].end = entry[0] - 1; rsvd_reg_cnt++; } /* Reserve everything what is after end */ if (entry[1] != 0xffffffffffffffffUL) { rsvd_reg[rsvd_reg_cnt].start = entry[0] + entry[1]; rsvd_reg[rsvd_reg_cnt].end = 0xffffffffffffffffUL; rsvd_reg_cnt++; } host_close(fd); } /* Sort entries in ascending order (bubble) */ for (a = rsvd_reg_cnt - 1; a > 0; a--) { for (b = 0; b < a; b++) { if (rsvd_reg[b].start > rsvd_reg[b + 1].start) { struct region_desc tmp; tmp = rsvd_reg[b]; rsvd_reg[b] = rsvd_reg[b + 1]; rsvd_reg[b + 1] = tmp; } } } /* Join overlapping/adjacent regions */ for (a = 0; a < rsvd_reg_cnt - 1; ) { if ((rsvd_reg[a + 1].start >= rsvd_reg[a].start) && ((rsvd_reg[a + 1].start - 1) <= rsvd_reg[a].end)) { /* We have overlapping/adjacent regions! */ rsvd_reg[a].end = MAX(rsvd_reg[a].end, rsvd_reg[a + a].end); for (b = a + 1; b < rsvd_reg_cnt - 1; b++) rsvd_reg[b] = rsvd_reg[b + 1]; rsvd_reg_cnt--; } else a++; } /* Find the first free region */ if (rsvd_reg_cnt > 0) { start = 0; end = rsvd_reg[0].start; for (a = 0; a < rsvd_reg_cnt - 1; a++) { if ((start >= rsvd_reg[a].start) && (start <= rsvd_reg[a].end)) { start = rsvd_reg[a].end + 1; end = rsvd_reg[a + 1].start; } else break; } if (start != end) { uint64_t align = 64UL*1024UL*1024UL; /* Align both to 64MB boundary */ start = (start + align - 1UL) & ~(align - 1UL); end = ((end + 1UL) & ~(align - 1UL)) - 1UL; if (start < end) load_segment = start; } } } return (load_segment); } uint8_t kboot_get_kernel_machine_bits(void) { static uint8_t bits = 0; struct old_utsname utsname; int ret; if (bits == 0) { /* Default is 32-bit kernel */ bits = 32; /* Try to get system type */ memset(&utsname, 0, sizeof(utsname)); ret = host_uname(&utsname); if (ret == 0) { if (strcmp(utsname.machine, "ppc64") == 0) bits = 64; else if (strcmp(utsname.machine, "ppc64le") == 0) bits = 64; } } return (bits); } int kboot_getdev(void **vdev, const char *devspec, const char **path) { int i; const char *devpath, *filepath; struct devsw *dv; struct devdesc *desc; if (strchr(devspec, ':') != NULL) { devpath = devspec; filepath = strchr(devspec, ':') + 1; } else { devpath = getenv("currdev"); filepath = devspec; } for (i = 0; (dv = devsw[i]) != NULL; i++) { if (strncmp(dv->dv_name, devpath, strlen(dv->dv_name)) == 0) goto found; } return (ENOENT); found: if (path != NULL && filepath != NULL) *path = filepath; else if (path != NULL) *path = strchr(devspec, ':') + 1; if (vdev != NULL) { desc = malloc(sizeof(*desc)); desc->d_dev = dv; desc->d_unit = 0; desc->d_opendata = strdup(devpath); *vdev = desc; } return (0); } int main(int argc, const char **argv) { void *heapbase; const size_t heapsize = 15*1024*1024; const char *bootdev; /* * Set the heap to one page after the end of the loader. */ heapbase = host_getmem(heapsize); setheap(heapbase, heapbase + heapsize); /* * Set up console. */ cons_probe(); /* Choose bootdev if provided */ if (argc > 1) bootdev = argv[1]; else bootdev = ""; printf("Boot device: %s\n", bootdev); archsw.arch_getdev = kboot_getdev; archsw.arch_copyin = kboot_copyin; archsw.arch_copyout = kboot_copyout; archsw.arch_readin = kboot_readin; archsw.arch_autoload = kboot_autoload; archsw.arch_loadaddr = kboot_loadaddr; archsw.arch_kexec_kseg_get = kboot_kseg_get; printf("\n%s", bootprog_info); setenv("currdev", bootdev, 1); setenv("loaddev", bootdev, 1); setenv("LINES", "24", 1); setenv("usefdt", "1", 1); interact(); /* doesn't return */ return (0); } void exit(int code) { while (1); /* XXX: host_exit */ __unreachable(); } void delay(int usecs) { struct host_timeval tvi, tv; uint64_t ti, t; host_gettimeofday(&tvi, NULL); ti = tvi.tv_sec*1000000 + tvi.tv_usec; do { host_gettimeofday(&tv, NULL); t = tv.tv_sec*1000000 + tv.tv_usec; } while (t < ti + usecs); } time_t getsecs(void) { struct host_timeval tv; host_gettimeofday(&tv, NULL); return (tv.tv_sec); } time_t time(time_t *tloc) { time_t rv; rv = getsecs(); if (tloc != NULL) *tloc = rv; return (rv); } struct kexec_segment { void *buf; int bufsz; void *mem; int memsz; }; struct kexec_segment loaded_segments[128]; int nkexec_segments = 0; static ssize_t get_phys_buffer(vm_offset_t dest, const size_t len, void **buf) { int i = 0; const size_t segsize = 4*1024*1024; for (i = 0; i < nkexec_segments; i++) { if (dest >= (vm_offset_t)loaded_segments[i].mem && dest < (vm_offset_t)loaded_segments[i].mem + loaded_segments[i].memsz) goto out; } loaded_segments[nkexec_segments].buf = host_getmem(segsize); loaded_segments[nkexec_segments].bufsz = segsize; loaded_segments[nkexec_segments].mem = (void *)rounddown2(dest,segsize); loaded_segments[nkexec_segments].memsz = segsize; i = nkexec_segments; nkexec_segments++; out: *buf = loaded_segments[i].buf + (dest - (vm_offset_t)loaded_segments[i].mem); return (min(len,loaded_segments[i].bufsz - (dest - (vm_offset_t)loaded_segments[i].mem))); } ssize_t kboot_copyin(const void *src, vm_offset_t dest, const size_t len) { ssize_t segsize, remainder; void *destbuf; remainder = len; do { segsize = get_phys_buffer(dest, remainder, &destbuf); bcopy(src, destbuf, segsize); remainder -= segsize; src += segsize; dest += segsize; } while (remainder > 0); return (len); } ssize_t kboot_copyout(vm_offset_t src, void *dest, const size_t len) { ssize_t segsize, remainder; void *srcbuf; remainder = len; do { segsize = get_phys_buffer(src, remainder, &srcbuf); bcopy(srcbuf, dest, segsize); remainder -= segsize; src += segsize; dest += segsize; } while (remainder > 0); return (len); } ssize_t kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len) { void *buf; size_t resid, chunk, get; ssize_t got; vm_offset_t p; p = dest; chunk = min(PAGE_SIZE, len); buf = malloc(chunk); if (buf == NULL) { printf("kboot_readin: buf malloc failed\n"); return (0); } for (resid = len; resid > 0; resid -= got, p += got) { get = min(chunk, resid); got = VECTX_READ(fd, buf, get); if (got <= 0) { if (got < 0) printf("kboot_readin: read failed\n"); break; } kboot_copyin(buf, p, got); } free (buf); return (len - resid); } int kboot_autoload(void) { return (0); } uint64_t kboot_loadaddr(u_int type, void *data, uint64_t addr) { if (type == LOAD_ELF) addr = roundup(addr, PAGE_SIZE); else addr += kboot_get_phys_load_segment(); return (addr); } static void kboot_kseg_get(int *nseg, void **ptr) { #if 0 int a; for (a = 0; a < nkexec_segments; a++) { printf("kseg_get: %jx %jx %jx %jx\n", (uintmax_t)loaded_segments[a].buf, (uintmax_t)loaded_segments[a].bufsz, (uintmax_t)loaded_segments[a].mem, (uintmax_t)loaded_segments[a].memsz); } #endif *nseg = nkexec_segments; *ptr = &loaded_segments[0]; } void _start(int argc, const char **argv, char **env) { // This makes error "variable 'sp' is uninitialized" be just a warning on clang. // Initializing 'sp' is not desired here as it would overwrite "r1" original value #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic warning "-Wuninitialized" #endif register volatile void **sp asm("r1"); main((int)sp[0], (const char **)&sp[1]); #if defined(__clang__) #pragma clang diagnostic pop #endif } /* * Since proper fdt command handling function is defined in fdt_loader_cmd.c, * and declaring it as extern is in contradiction with COMMAND_SET() macro * (which uses static pointer), we're defining wrapper function, which * calls the proper fdt handling routine. */ static int command_fdt(int argc, char *argv[]) { return (command_fdt_internal(argc, argv)); } COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt); diff --git a/stand/powerpc/ofw/main.c b/stand/powerpc/ofw/main.c index 19b73161c18b..d30be975ac41 100644 --- a/stand/powerpc/ofw/main.c +++ b/stand/powerpc/ofw/main.c @@ -1,250 +1,253 @@ /*- * Copyright (c) 2000 Benno Rice * Copyright (c) 2000 Stephane Potvin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include "openfirm.h" #include "libofw.h" #include "bootstrap.h" #include #include struct arch_switch archsw; /* MI/MD interface boundary */ extern char end[]; uint32_t acells, scells; static char bootargs[128]; #define HEAP_SIZE 0x800000 static char heap[HEAP_SIZE]; // In BSS, so uses no space #define OF_puts(fd, text) OF_write(fd, text, strlen(text)) static __inline register_t mfmsr(void) { register_t value; __asm __volatile ("mfmsr %0" : "=r"(value)); return (value); } void init_heap(void) { bzero(heap, HEAP_SIZE); setheap(heap, (void *)((uintptr_t)heap + HEAP_SIZE)); } uint64_t memsize(void) { phandle_t memoryp; cell_t reg[24]; int i, sz; uint64_t memsz; memsz = 0; memoryp = OF_instance_to_package(memory); sz = OF_getencprop(memoryp, "reg", ®[0], sizeof(reg)); sz /= sizeof(reg[0]); for (i = 0; i < sz; i += (acells + scells)) { if (scells > 1) memsz += (uint64_t)reg[i + acells] << 32; memsz += reg[i + acells + scells - 1]; } return (memsz); } #ifdef CAS extern int ppc64_cas(void); static int ppc64_autoload(void) { const char *cas; if ((cas = getenv("cas")) && cas[0] == '1') if (ppc64_cas() != 0) return (-1); return (ofw_autoload()); } #endif #if BYTE_ORDER == LITTLE_ENDIAN /* * In Little-endian, we cannot just branch to the client interface. Since * the client interface is big endian, we have to rfid to it. * Likewise, when execution resumes, we are in the wrong endianness so * we must do a fixup before returning to the caller. */ static int (*openfirmware_entry)(void *); extern int openfirmware_trampoline(void *buf, int (*cb)(void *)); /* * Wrapper to pass the real entry point to our trampoline. */ static int openfirmware_docall(void *buf) { return openfirmware_trampoline(buf, openfirmware_entry); } #endif int main(int (*openfirm)(void *)) { phandle_t root; int i; char bootpath[64]; char *ch; int bargc; char **bargv; /* * Initialise the Open Firmware routines by giving them the entry point. */ #if BYTE_ORDER == LITTLE_ENDIAN /* * Use a trampoline entry point for endian fixups. */ openfirmware_entry = openfirm; OF_init(openfirmware_docall); #else OF_init(openfirm); #endif root = OF_finddevice("/"); scells = acells = 1; OF_getencprop(root, "#address-cells", &acells, sizeof(acells)); OF_getencprop(root, "#size-cells", &scells, sizeof(scells)); /* * Initialise the heap as early as possible. Once this is done, * alloc() is usable. The stack is buried inside us, so this is * safe. */ init_heap(); /* * Set up console. */ cons_probe(); + /* Set up currdev variable to have hooks in place. */ + env_setenv("currdev", EV_VOLATILE, "", ofw_setcurrdev, env_nounset); + /* * March through the device switch probing for things. */ for (i = 0; devsw[i] != NULL; i++) if (devsw[i]->dv_init != NULL) (devsw[i]->dv_init)(); printf("\n%s", bootprog_info); printf("Memory: %lldKB\n", memsize() / 1024); OF_getprop(chosen, "bootpath", bootpath, 64); ch = strchr(bootpath, ':'); *ch = '\0'; printf("Booted from: %s\n", bootpath); printf("\n"); /* * Only parse the first bootarg if present. It should * be simple to handle extra arguments */ OF_getprop(chosen, "bootargs", bootargs, sizeof(bootargs)); bargc = 0; parse(&bargc, &bargv, bootargs); if (bargc == 1) env_setenv("currdev", EV_VOLATILE, bargv[0], ofw_setcurrdev, env_nounset); else env_setenv("currdev", EV_VOLATILE, bootpath, ofw_setcurrdev, env_nounset); env_setenv("loaddev", EV_VOLATILE, bootpath, env_noset, env_nounset); setenv("LINES", "24", 1); /* optional */ /* * On non-Apple hardware, where it works reliably, pass flattened * device trees to the kernel by default instead of OF CI pointers. * Apple hardware is the only virtual-mode OF implementation in * existence, so far as I am aware, so use that as a flag. */ if (!(mfmsr() & PSL_DR)) setenv("usefdt", "1", 1); archsw.arch_getdev = ofw_getdev; archsw.arch_copyin = ofw_copyin; archsw.arch_copyout = ofw_copyout; archsw.arch_readin = ofw_readin; #ifdef CAS setenv("cas", "1", 0); archsw.arch_autoload = ppc64_autoload; #else archsw.arch_autoload = ofw_autoload; #endif interact(); /* doesn't return */ OF_exit(); return 0; } COMMAND_SET(halt, "halt", "halt the system", command_halt); static int command_halt(int argc, char *argv[]) { OF_exit(); return (CMD_OK); } COMMAND_SET(memmap, "memmap", "print memory map", command_memmap); int command_memmap(int argc, char **argv) { ofw_memmap(acells); return (CMD_OK); } diff --git a/stand/uboot/common/main.c b/stand/uboot/common/main.c index b8bd1cf05089..85ddf5db1f90 100644 --- a/stand/uboot/common/main.c +++ b/stand/uboot/common/main.c @@ -1,723 +1,726 @@ /*- * Copyright (c) 2000 Benno Rice * Copyright (c) 2000 Stephane Potvin * Copyright (c) 2007-2008 Semihalf, Rafal Jaworowski * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include "api_public.h" #include "bootstrap.h" #include "glue.h" #include "libuboot.h" #ifndef nitems #define nitems(x) (sizeof((x)) / sizeof((x)[0])) #endif #ifndef HEAP_SIZE #define HEAP_SIZE (2 * 1024 * 1024) #endif struct uboot_devdesc currdev; struct arch_switch archsw; /* MI/MD interface boundary */ int devs_no; uintptr_t uboot_heap_start; uintptr_t uboot_heap_end; struct device_type { const char *name; int type; } device_types[] = { { "disk", DEV_TYP_STOR }, { "ide", DEV_TYP_STOR | DT_STOR_IDE }, { "mmc", DEV_TYP_STOR | DT_STOR_MMC }, { "sata", DEV_TYP_STOR | DT_STOR_SATA }, { "scsi", DEV_TYP_STOR | DT_STOR_SCSI }, { "usb", DEV_TYP_STOR | DT_STOR_USB }, { "net", DEV_TYP_NET } }; extern char end[]; extern unsigned char _etext[]; extern unsigned char _edata[]; extern unsigned char __bss_start[]; extern unsigned char __sbss_start[]; extern unsigned char __sbss_end[]; extern unsigned char _end[]; #ifdef LOADER_FDT_SUPPORT extern int command_fdt_internal(int argc, char *argv[]); #endif static void dump_sig(struct api_signature *sig) { #ifdef DEBUG printf("signature:\n"); printf(" version\t= %d\n", sig->version); printf(" checksum\t= 0x%08x\n", sig->checksum); printf(" sc entry\t= 0x%08x\n", sig->syscall); #endif } static void dump_addr_info(void) { #ifdef DEBUG printf("\naddresses info:\n"); printf(" _etext (sdata) = 0x%08x\n", (uint32_t)_etext); printf(" _edata = 0x%08x\n", (uint32_t)_edata); printf(" __sbss_start = 0x%08x\n", (uint32_t)__sbss_start); printf(" __sbss_end = 0x%08x\n", (uint32_t)__sbss_end); printf(" __sbss_start = 0x%08x\n", (uint32_t)__bss_start); printf(" _end = 0x%08x\n", (uint32_t)_end); printf(" syscall entry = 0x%08x\n", (uint32_t)syscall_ptr); #endif } static uint64_t memsize(struct sys_info *si, int flags) { uint64_t size; int i; size = 0; for (i = 0; i < si->mr_no; i++) if (si->mr[i].flags == flags && si->mr[i].size) size += (si->mr[i].size); return (size); } static void meminfo(void) { uint64_t size; struct sys_info *si; int t[3] = { MR_ATTR_DRAM, MR_ATTR_FLASH, MR_ATTR_SRAM }; int i; if ((si = ub_get_sys_info()) == NULL) panic("could not retrieve system info"); for (i = 0; i < 3; i++) { size = memsize(si, t[i]); if (size > 0) printf("%s: %juMB\n", ub_mem_type(t[i]), (uintmax_t)(size / 1024 / 1024)); } } static const char * get_device_type(const char *devstr, int *devtype) { int i; int namelen; struct device_type *dt; if (devstr) { for (i = 0; i < nitems(device_types); i++) { dt = &device_types[i]; namelen = strlen(dt->name); if (strncmp(dt->name, devstr, namelen) == 0) { *devtype = dt->type; return (devstr + namelen); } } printf("Unknown device type '%s'\n", devstr); } *devtype = DEV_TYP_NONE; return (NULL); } static const char * device_typename(int type) { int i; for (i = 0; i < nitems(device_types); i++) if (device_types[i].type == type) return (device_types[i].name); return (""); } /* * Parse a device string into type, unit, slice and partition numbers. A * returned value of -1 for type indicates a search should be done for the * first loadable device, otherwise a returned value of -1 for unit * indicates a search should be done for the first loadable device of the * given type. * * The returned values for slice and partition are interpreted by * disk_open(). * * The device string can be a standard loader(8) disk specifier: * * disks disk0s1 * disks disk1s2a * diskp disk0p4 * * or one of the following formats: * * Valid device strings: For device types: * * DEV_TYP_STOR, DEV_TYP_NET * DEV_TYP_STOR, DEV_TYP_NET * : DEV_TYP_STOR, DEV_TYP_NET * : DEV_TYP_STOR * :. DEV_TYP_STOR * :. DEV_TYP_STOR * * For valid type names, see the device_types array, above. * * Slice numbers are 1-based. 0 is a wildcard. */ static void get_load_device(int *type, int *unit, int *slice, int *partition) { struct disk_devdesc dev; char *devstr; const char *p; char *endp; *type = DEV_TYP_NONE; *unit = -1; *slice = D_SLICEWILD; *partition = D_PARTWILD; devstr = ub_env_get("loaderdev"); if (devstr == NULL) { printf("U-Boot env: loaderdev not set, will probe all devices.\n"); return; } printf("U-Boot env: loaderdev='%s'\n", devstr); p = get_device_type(devstr, type); /* * If type is DEV_TYP_STOR we have a disk-like device. If the remainder * of the string contains spaces, dots, or a colon in any location other * than the last char, it's legacy format. Otherwise it might be * standard loader(8) format (e.g., disk0s2a or mmc1p12), so try to * parse the remainder of the string as such, and if it works, return * those results. Otherwise we'll fall through to the code that parses * the legacy format. */ if (*type & DEV_TYP_STOR) { size_t len = strlen(p); if (strcspn(p, " .") == len && strcspn(p, ":") >= len - 1 && disk_parsedev(&dev, p, NULL) == 0) { *unit = dev.dd.d_unit; *slice = dev.d_slice; *partition = dev.d_partition; return; } } /* Ignore optional spaces after the device name. */ while (*p == ' ') p++; /* Unknown device name, or a known name without unit number. */ if ((*type == DEV_TYP_NONE) || (*p == '\0')) { return; } /* Malformed unit number. */ if (!isdigit(*p)) { *type = DEV_TYP_NONE; return; } /* Guaranteed to extract a number from the string, as *p is a digit. */ *unit = strtol(p, &endp, 10); p = endp; /* Known device name with unit number and nothing else. */ if (*p == '\0') { return; } /* Device string is malformed beyond unit number. */ if (*p != ':') { *type = DEV_TYP_NONE; *unit = -1; return; } p++; /* No slice and partition specification. */ if ('\0' == *p ) return; /* Only DEV_TYP_STOR devices can have a slice specification. */ if (!(*type & DEV_TYP_STOR)) { *type = DEV_TYP_NONE; *unit = -1; return; } *slice = strtoul(p, &endp, 10); /* Malformed slice number. */ if (p == endp) { *type = DEV_TYP_NONE; *unit = -1; *slice = D_SLICEWILD; return; } p = endp; /* No partition specification. */ if (*p == '\0') return; /* Device string is malformed beyond slice number. */ if (*p != '.') { *type = DEV_TYP_NONE; *unit = -1; *slice = D_SLICEWILD; return; } p++; /* No partition specification. */ if (*p == '\0') return; *partition = strtol(p, &endp, 10); p = endp; /* Full, valid device string. */ if (*endp == '\0') return; /* Junk beyond partition number. */ *type = DEV_TYP_NONE; *unit = -1; *slice = D_SLICEWILD; *partition = D_PARTWILD; } static void print_disk_probe_info() { char slice[32]; char partition[32]; if (currdev.d_disk.d_slice == D_SLICENONE) strlcpy(slice, "", sizeof(slice)); else if (currdev.d_disk.d_slice == D_SLICEWILD) strlcpy(slice, "", sizeof(slice)); else snprintf(slice, sizeof(slice), "%d", currdev.d_disk.d_slice); if (currdev.d_disk.d_partition == D_PARTNONE) strlcpy(partition, "", sizeof(partition)); else if (currdev.d_disk.d_partition == D_PARTWILD) strlcpy(partition, "", sizeof(partition)); else snprintf(partition, sizeof(partition), "%d", currdev.d_disk.d_partition); printf(" Checking unit=%d slice=%s partition=%s...", currdev.dd.d_unit, slice, partition); } static int probe_disks(int devidx, int load_type, int load_unit, int load_slice, int load_partition) { int open_result, unit; struct open_file f; currdev.d_disk.d_slice = load_slice; currdev.d_disk.d_partition = load_partition; f.f_devdata = &currdev; open_result = -1; if (load_type == -1) { printf(" Probing all disk devices...\n"); /* Try each disk in succession until one works. */ for (currdev.dd.d_unit = 0; currdev.dd.d_unit < UB_MAX_DEV; currdev.dd.d_unit++) { print_disk_probe_info(); open_result = devsw[devidx]->dv_open(&f, &currdev); if (open_result == 0) { printf(" good.\n"); return (0); } printf("\n"); } return (-1); } if (load_unit == -1) { printf(" Probing all %s devices...\n", device_typename(load_type)); /* Try each disk of given type in succession until one works. */ for (unit = 0; unit < UB_MAX_DEV; unit++) { currdev.dd.d_unit = uboot_diskgetunit(load_type, unit); if (currdev.dd.d_unit == -1) break; print_disk_probe_info(); open_result = devsw[devidx]->dv_open(&f, &currdev); if (open_result == 0) { printf(" good.\n"); return (0); } printf("\n"); } return (-1); } if ((currdev.dd.d_unit = uboot_diskgetunit(load_type, load_unit)) != -1) { print_disk_probe_info(); open_result = devsw[devidx]->dv_open(&f,&currdev); if (open_result == 0) { printf(" good.\n"); return (0); } printf("\n"); } printf(" Requested disk type/unit/slice/partition not found\n"); return (-1); } int main(int argc, char **argv) { struct api_signature *sig = NULL; int load_type, load_unit, load_slice, load_partition; int i; const char *ldev; /* * We first check if a command line argument was passed to us containing * API's signature address. If it wasn't then we try to search for the * API signature via the usual hinted address. * If we can't find the magic signature and related info, exit with a * unique error code that U-Boot reports as "## Application terminated, * rc = 0xnnbadab1". Hopefully 'badab1' looks enough like "bad api" to * provide a clue. It's better than 0xffffffff anyway. */ if (!api_parse_cmdline_sig(argc, argv, &sig) && !api_search_sig(&sig)) return (0x01badab1); syscall_ptr = sig->syscall; if (syscall_ptr == NULL) return (0x02badab1); if (sig->version > API_SIG_VERSION) return (0x03badab1); /* Clear BSS sections */ bzero(__sbss_start, __sbss_end - __sbss_start); bzero(__bss_start, _end - __bss_start); /* * Initialise the heap as early as possible. Once this is done, * alloc() is usable. We are using the stack u-boot set up near the top * of physical ram; hopefully there is sufficient space between the end * of our bss and the bottom of the u-boot stack to avoid overlap. */ uboot_heap_start = round_page((uintptr_t)end); uboot_heap_end = uboot_heap_start + HEAP_SIZE; setheap((void *)uboot_heap_start, (void *)uboot_heap_end); /* * Set up console. */ cons_probe(); printf("Compatible U-Boot API signature found @%p\n", sig); printf("\n%s", bootprog_info); printf("\n"); dump_sig(sig); dump_addr_info(); meminfo(); + /* Set up currdev variable to have hooks in place. */ + env_setenv("currdev", EV_VOLATILE, "", uboot_setcurrdev, env_nounset); + /* * Enumerate U-Boot devices */ if ((devs_no = ub_dev_enum()) == 0) { printf("no U-Boot devices found"); goto do_interact; } printf("Number of U-Boot devices: %d\n", devs_no); get_load_device(&load_type, &load_unit, &load_slice, &load_partition); /* * March through the device switch probing for things. */ for (i = 0; devsw[i] != NULL; i++) { if (devsw[i]->dv_init == NULL) continue; if ((devsw[i]->dv_init)() != 0) continue; printf("Found U-Boot device: %s\n", devsw[i]->dv_name); currdev.dd.d_dev = devsw[i]; currdev.dd.d_unit = 0; if ((load_type == DEV_TYP_NONE || (load_type & DEV_TYP_STOR)) && strcmp(devsw[i]->dv_name, "disk") == 0) { if (probe_disks(i, load_type, load_unit, load_slice, load_partition) == 0) break; } if ((load_type == DEV_TYP_NONE || (load_type & DEV_TYP_NET)) && strcmp(devsw[i]->dv_name, "net") == 0) break; } /* * If we couldn't find a boot device, return an error to u-boot. * U-boot may be running a boot script that can try something different * so returning an error is better than forcing a reboot. */ if (devsw[i] == NULL) { printf("No boot device found!\n"); return (0xbadef1ce); } ldev = uboot_fmtdev(&currdev); env_setenv("currdev", EV_VOLATILE, ldev, uboot_setcurrdev, env_nounset); env_setenv("loaddev", EV_VOLATILE, ldev, env_noset, env_nounset); printf("Booting from %s\n", ldev); do_interact: setenv("LINES", "24", 1); /* optional */ setenv("prompt", "loader>", 1); #ifdef __powerpc__ setenv("usefdt", "1", 1); #endif archsw.arch_loadaddr = uboot_loadaddr; archsw.arch_getdev = uboot_getdev; archsw.arch_copyin = uboot_copyin; archsw.arch_copyout = uboot_copyout; archsw.arch_readin = uboot_readin; archsw.arch_autoload = uboot_autoload; interact(); /* doesn't return */ return (0); } COMMAND_SET(heap, "heap", "show heap usage", command_heap); static int command_heap(int argc, char *argv[]) { printf("heap base at %p, top at %p, used %td\n", end, sbrk(0), sbrk(0) - end); return (CMD_OK); } COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot); static int command_reboot(int argc, char *argv[]) { printf("Resetting...\n"); ub_reset(); printf("Reset failed!\n"); while (1); __unreachable(); } COMMAND_SET(devinfo, "devinfo", "show U-Boot devices", command_devinfo); static int command_devinfo(int argc, char *argv[]) { int i; if ((devs_no = ub_dev_enum()) == 0) { command_errmsg = "no U-Boot devices found!?"; return (CMD_ERROR); } printf("U-Boot devices:\n"); for (i = 0; i < devs_no; i++) { ub_dump_di(i); printf("\n"); } return (CMD_OK); } COMMAND_SET(sysinfo, "sysinfo", "show U-Boot system info", command_sysinfo); static int command_sysinfo(int argc, char *argv[]) { struct sys_info *si; if ((si = ub_get_sys_info()) == NULL) { command_errmsg = "could not retrieve U-Boot sys info!?"; return (CMD_ERROR); } printf("U-Boot system info:\n"); ub_dump_si(si); return (CMD_OK); } enum ubenv_action { UBENV_UNKNOWN, UBENV_SHOW, UBENV_IMPORT }; static void handle_uboot_env_var(enum ubenv_action action, const char * var) { char ldvar[128]; const char *val; char *wrk; int len; /* * On an import with the variable name formatted as ldname=ubname, * import the uboot variable ubname into the loader variable ldname, * otherwise the historical behavior is to import to uboot.ubname. */ if (action == UBENV_IMPORT) { len = strcspn(var, "="); if (len == 0) { printf("name cannot start with '=': '%s'\n", var); return; } if (var[len] == 0) { strcpy(ldvar, "uboot."); strncat(ldvar, var, sizeof(ldvar) - 7); } else { len = MIN(len, sizeof(ldvar) - 1); strncpy(ldvar, var, len); ldvar[len] = 0; var = &var[len + 1]; } } /* * If the user prepended "uboot." (which is how they usually see these * names) strip it off as a convenience. */ if (strncmp(var, "uboot.", 6) == 0) { var = &var[6]; } /* If there is no variable name left, punt. */ if (var[0] == 0) { printf("empty variable name\n"); return; } val = ub_env_get(var); if (action == UBENV_SHOW) { if (val == NULL) printf("uboot.%s is not set\n", var); else printf("uboot.%s=%s\n", var, val); } else if (action == UBENV_IMPORT) { if (val != NULL) { setenv(ldvar, val, 1); } } } static int command_ubenv(int argc, char *argv[]) { enum ubenv_action action; const char *var; int i; action = UBENV_UNKNOWN; if (argc > 1) { if (strcasecmp(argv[1], "import") == 0) action = UBENV_IMPORT; else if (strcasecmp(argv[1], "show") == 0) action = UBENV_SHOW; } if (action == UBENV_UNKNOWN) { command_errmsg = "usage: 'ubenv [var ...]"; return (CMD_ERROR); } if (argc > 2) { for (i = 2; i < argc; i++) handle_uboot_env_var(action, argv[i]); } else { var = NULL; for (;;) { if ((var = ub_env_enum(var)) == NULL) break; handle_uboot_env_var(action, var); } } return (CMD_OK); } COMMAND_SET(ubenv, "ubenv", "show or import U-Boot env vars", command_ubenv); #ifdef LOADER_FDT_SUPPORT /* * Since proper fdt command handling function is defined in fdt_loader_cmd.c, * and declaring it as extern is in contradiction with COMMAND_SET() macro * (which uses static pointer), we're defining wrapper function, which * calls the proper fdt handling routine. */ static int command_fdt(int argc, char *argv[]) { return (command_fdt_internal(argc, argv)); } COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt); #endif diff --git a/stand/uboot/lib/devicename.c b/stand/uboot/lib/devicename.c index cda16ded62a7..b6b0e6e40639 100644 --- a/stand/uboot/lib/devicename.c +++ b/stand/uboot/lib/devicename.c @@ -1,200 +1,200 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include "bootstrap.h" #include "disk.h" #include "libuboot.h" static int uboot_parsedev(struct uboot_devdesc **dev, const char *devspec, const char **path); /* * Point (dev) at an allocated device specifier for the device matching the * path in (devspec). If it contains an explicit device specification, * use that. If not, use the default device. */ int uboot_getdev(void **vdev, const char *devspec, const char **path) { struct uboot_devdesc **dev = (struct uboot_devdesc **)vdev; int rv; /* * If it looks like this is just a path and no * device, go with the current device. */ if ((devspec == NULL) || (devspec[0] == '/') || (strchr(devspec, ':') == NULL)) { if (((rv = uboot_parsedev(dev, getenv("currdev"), NULL)) == 0) && (path != NULL)) *path = devspec; return(rv); } /* * Try to parse the device name off the beginning of the devspec. */ return (uboot_parsedev(dev, devspec, path)); } /* * Point (dev) at an allocated device specifier matching the string version * at the beginning of (devspec). Return a pointer to the remaining * text in (path). * * In all cases, the beginning of (devspec) is compared to the names * of known devices in the device switch, and then any following text * is parsed according to the rules applied to the device type. * * For disk-type devices, the syntax is: * * disk[]: * */ static int uboot_parsedev(struct uboot_devdesc **dev, const char *devspec, const char **path) { struct uboot_devdesc *idev; struct devsw *dv; char *cp; const char *np; int i, unit, err; /* minimum length check */ if (strlen(devspec) < 2) return(EINVAL); /* look for a device that matches */ for (i = 0, dv = NULL; devsw[i] != NULL; i++) { if (!strncmp(devspec, devsw[i]->dv_name, strlen(devsw[i]->dv_name))) { dv = devsw[i]; break; } } if (dv == NULL) return(ENOENT); idev = malloc(sizeof(struct uboot_devdesc)); err = 0; np = (devspec + strlen(dv->dv_name)); switch(dv->dv_type) { case DEVT_NONE: break; #ifdef LOADER_DISK_SUPPORT case DEVT_DISK: err = disk_parsedev((struct disk_devdesc *)idev, np, path); if (err != 0) goto fail; break; #endif case DEVT_NET: unit = 0; if (*np && (*np != ':')) { /* get unit number if present */ unit = strtol(np, &cp, 0); if (cp == np) { err = EUNIT; goto fail; } } if (*cp && (*cp != ':')) { err = EINVAL; goto fail; } idev->dd.d_unit = unit; if (path != NULL) *path = (*cp == 0) ? cp : cp + 1; break; default: err = EINVAL; goto fail; } idev->dd.d_dev = dv; if (dev == NULL) { free(idev); } else { *dev = idev; } return (0); fail: free(idev); return (err); } char * uboot_fmtdev(void *vdev) { struct uboot_devdesc *dev = (struct uboot_devdesc *)vdev; static char buf[128]; switch(dev->dd.d_dev->dv_type) { case DEVT_NONE: strcpy(buf, "(no device)"); break; case DEVT_DISK: #ifdef LOADER_DISK_SUPPORT return (disk_fmtdev(vdev)); #endif case DEVT_NET: sprintf(buf, "%s%d:", dev->dd.d_dev->dv_name, dev->dd.d_unit); break; } return(buf); } /* * Set currdev to suit the value being supplied in (value). */ int uboot_setcurrdev(struct env_var *ev, int flags, const void *value) { struct uboot_devdesc *ncurr; int rv; if ((rv = uboot_parsedev(&ncurr, value, NULL)) != 0) return (rv); free(ncurr); - env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL); - return (0); + + return (mount_currdev(ev, flags, value)); } diff --git a/stand/userboot/userboot/devicename.c b/stand/userboot/userboot/devicename.c index 8819af5ef127..e770e6bf4bb1 100644 --- a/stand/userboot/userboot/devicename.c +++ b/stand/userboot/userboot/devicename.c @@ -1,227 +1,227 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include "bootstrap.h" #include "disk.h" #include "libuserboot.h" #if defined(USERBOOT_ZFS_SUPPORT) #include "libzfs.h" #endif static int userboot_parsedev(struct disk_devdesc **dev, const char *devspec, const char **path); /* * Point (dev) at an allocated device specifier for the device matching the * path in (devspec). If it contains an explicit device specification, * use that. If not, use the default device. */ int userboot_getdev(void **vdev, const char *devspec, const char **path) { struct disk_devdesc **dev = (struct disk_devdesc **)vdev; int rv; /* * If it looks like this is just a path and no * device, go with the current device. */ if ((devspec == NULL) || (devspec[0] == '/') || (strchr(devspec, ':') == NULL)) { rv = userboot_parsedev(dev, getenv("currdev"), NULL); if (rv == 0 && path != NULL) *path = devspec; return (rv); } /* * Try to parse the device name off the beginning of the devspec */ return (userboot_parsedev(dev, devspec, path)); } /* * Point (dev) at an allocated device specifier matching the string version * at the beginning of (devspec). Return a pointer to the remaining * text in (path). * * In all cases, the beginning of (devspec) is compared to the names * of known devices in the device switch, and then any following text * is parsed according to the rules applied to the device type. * * For disk-type devices, the syntax is: * * disk[s][]: * */ static int userboot_parsedev(struct disk_devdesc **dev, const char *devspec, const char **path) { struct disk_devdesc *idev; struct devsw *dv; int i, unit, err; const char *cp; const char *np; /* minimum length check */ if (strlen(devspec) < 2) return (EINVAL); /* look for a device that matches */ for (i = 0, dv = NULL; devsw[i] != NULL; i++) { if (strncmp(devspec, devsw[i]->dv_name, strlen(devsw[i]->dv_name)) == 0) { dv = devsw[i]; break; } } if (dv == NULL) return (ENOENT); idev = malloc(sizeof(struct disk_devdesc)); err = 0; np = (devspec + strlen(dv->dv_name)); switch (dv->dv_type) { case DEVT_NONE: /* XXX what to do here? Do we care? */ break; case DEVT_DISK: err = disk_parsedev(idev, np, path); if (err != 0) goto fail; break; case DEVT_CD: case DEVT_NET: unit = 0; if (*np && (*np != ':')) { /* get unit number if present */ unit = strtol(np, (char **)&cp, 0); if (cp == np) { err = EUNIT; goto fail; } } else { cp = np; } if (*cp && (*cp != ':')) { err = EINVAL; goto fail; } idev->dd.d_unit = unit; if (path != NULL) *path = (*cp == 0) ? cp : cp + 1; break; case DEVT_ZFS: #if defined(USERBOOT_ZFS_SUPPORT) err = zfs_parsedev((struct zfs_devdesc *)idev, np, path); if (err != 0) goto fail; break; #else /* FALLTHROUGH */ #endif default: err = EINVAL; goto fail; } idev->dd.d_dev = dv; if (dev == NULL) { free(idev); } else { *dev = idev; } return (0); fail: free(idev); return (err); } char * userboot_fmtdev(void *vdev) { struct devdesc *dev = (struct devdesc *)vdev; static char buf[128]; /* XXX device length constant? */ switch(dev->d_dev->dv_type) { case DEVT_NONE: strcpy(buf, "(no device)"); break; case DEVT_CD: sprintf(buf, "%s%d:", dev->d_dev->dv_name, dev->d_unit); break; case DEVT_DISK: return (disk_fmtdev(vdev)); case DEVT_NET: sprintf(buf, "%s%d:", dev->d_dev->dv_name, dev->d_unit); break; case DEVT_ZFS: #if defined(USERBOOT_ZFS_SUPPORT) return (zfs_fmtdev(vdev)); #else sprintf(buf, "%s%d:", dev->d_dev->dv_name, dev->d_unit); #endif break; } return (buf); } /* * Set currdev to suit the value being supplied in (value) */ int userboot_setcurrdev(struct env_var *ev, int flags, const void *value) { struct disk_devdesc *ncurr; int rv; if ((rv = userboot_parsedev(&ncurr, value, NULL)) != 0) return (rv); free(ncurr); - env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL); - return (0); + + return (mount_currdev(ev, flags, value)); } diff --git a/stand/userboot/userboot/main.c b/stand/userboot/userboot/main.c index c094e987b0cb..48c4e388b828 100644 --- a/stand/userboot/userboot/main.c +++ b/stand/userboot/userboot/main.c @@ -1,331 +1,335 @@ /*- * Copyright (c) 1998 Michael Smith * Copyright (c) 1998,2000 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "bootstrap.h" #include "disk.h" #include "libuserboot.h" #if defined(USERBOOT_ZFS_SUPPORT) #include "libzfs.h" static void userboot_zfs_probe(void); static int userboot_zfs_found; #endif /* Minimum version required */ #define USERBOOT_VERSION USERBOOT_VERSION_3 #define LOADER_PATH "/boot/loader" #define INTERP_MARKER "$Interpreter:" #define MALLOCSZ (64*1024*1024) struct loader_callbacks *callbacks; void *callbacks_arg; static jmp_buf jb; struct arch_switch archsw; /* MI/MD interface boundary */ static void extract_currdev(void); static void check_interpreter(void); void delay(int usec) { CALLBACK(delay, usec); } void exit(int v) { CALLBACK(exit, v); longjmp(jb, 1); } static void check_interpreter(void) { struct stat st; size_t marklen, rdsize; const char *guest_interp, *my_interp; char *buf; int fd; /* * If we can't stat(2) or open(2) LOADER_PATH, then we'll fail by * simply letting us roll on with whatever interpreter we were compiled * with. This is likely not going to be an issue in reality. */ buf = NULL; if (stat(LOADER_PATH, &st) != 0) return; if ((fd = open(LOADER_PATH, O_RDONLY)) < 0) return; rdsize = st.st_size; buf = malloc(rdsize); if (buf == NULL) goto out; if (read(fd, buf, rdsize) < rdsize) goto out; marklen = strlen(INTERP_MARKER); my_interp = bootprog_interp + marklen; /* * Here we make the assumption that a loader binary without the * interpreter marker is a 4th one. All loader binaries going forward * should have this properly specified, so our assumption should always * be a good one. */ if ((guest_interp = memmem(buf, rdsize, INTERP_MARKER, marklen)) != NULL) guest_interp += marklen; else guest_interp = "4th"; /* * The guest interpreter may not have a version of loader that * specifies the interpreter installed. If that's the case, we'll * assume it's legacy (4th) and request a swap to that if we're * a Lua-userboot. */ if (strcmp(my_interp, guest_interp) != 0) CALLBACK(swap_interpreter, guest_interp); out: free(buf); close(fd); return; } void loader_main(struct loader_callbacks *cb, void *arg, int version, int ndisks) { static char mallocbuf[MALLOCSZ]; char *var; int i; if (version < USERBOOT_VERSION) abort(); callbacks = cb; callbacks_arg = arg; userboot_disk_maxunit = ndisks; /* * initialise the heap as early as possible. Once this is done, * alloc() is usable. */ setheap((void *)mallocbuf, (void *)(mallocbuf + sizeof(mallocbuf))); /* * Hook up the console */ cons_probe(); + /* Set up currdev variable to have hooks in place. */ + env_setenv("currdev", EV_VOLATILE, "", + userboot_setcurrdev, env_nounset); + printf("\n%s", bootprog_info); #if 0 printf("Memory: %ld k\n", memsize() / 1024); #endif setenv("LINES", "24", 1); /* optional */ /* * Set custom environment variables */ i = 0; while (1) { var = CALLBACK(getenv, i++); if (var == NULL) break; putenv(var); } archsw.arch_autoload = userboot_autoload; archsw.arch_getdev = userboot_getdev; archsw.arch_copyin = userboot_copyin; archsw.arch_copyout = userboot_copyout; archsw.arch_readin = userboot_readin; #if defined(USERBOOT_ZFS_SUPPORT) archsw.arch_zfs_probe = userboot_zfs_probe; #endif /* * Initialise the block cache. Set the upper limit. */ bcache_init(32768, 512); /* * March through the device switch probing for things. */ for (i = 0; devsw[i] != NULL; i++) if (devsw[i]->dv_init != NULL) (devsw[i]->dv_init)(); extract_currdev(); /* * Checking the interpreter isn't worth the overhead unless we * actually have the swap_interpreter callback, so we actually version * check here rather than later on. */ if (version >= USERBOOT_VERSION_5) check_interpreter(); if (setjmp(jb)) return; interact(); /* doesn't return */ exit(0); } static void set_currdev(const char *devname) { env_setenv("currdev", EV_VOLATILE, devname, userboot_setcurrdev, env_nounset); env_setenv("loaddev", EV_VOLATILE, devname, env_noset, env_nounset); } /* * Set the 'current device' by (if possible) recovering the boot device as * supplied by the initial bootstrap. */ static void extract_currdev(void) { struct disk_devdesc dev; struct devdesc *dd; #if defined(USERBOOT_ZFS_SUPPORT) struct zfs_devdesc zdev; char *buf = NULL; if (userboot_zfs_found) { /* Leave the pool/root guid's unassigned */ bzero(&zdev, sizeof(zdev)); zdev.dd.d_dev = &zfs_dev; init_zfs_boot_options(zfs_fmtdev(&zdev)); dd = &zdev.dd; } else #endif if (userboot_disk_maxunit > 0) { dev.dd.d_dev = &userboot_disk; dev.dd.d_unit = 0; dev.d_slice = D_SLICEWILD; dev.d_partition = D_PARTWILD; /* * If we cannot auto-detect the partition type then * access the disk as a raw device. */ if (dev.dd.d_dev->dv_open(NULL, &dev)) { dev.d_slice = D_SLICENONE; dev.d_partition = D_PARTNONE; } dd = &dev.dd; } else { dev.dd.d_dev = &host_dev; dev.dd.d_unit = 0; dd = &dev.dd; } set_currdev(userboot_fmtdev(dd)); #if defined(USERBOOT_ZFS_SUPPORT) if (userboot_zfs_found) { buf = malloc(VDEV_PAD_SIZE); if (buf != NULL) { if (zfs_get_bootonce(&zdev, OS_BOOTONCE, buf, VDEV_PAD_SIZE) == 0) { printf("zfs bootonce: %s\n", buf); set_currdev(buf); setenv("zfs-bootonce", buf, 1); } free(buf); (void) zfs_attach_nvstore(&zdev); } } #endif } #if defined(USERBOOT_ZFS_SUPPORT) static void userboot_zfs_probe(void) { char devname[32]; uint64_t pool_guid; int unit; /* * Open all the disks we can find and see if we can reconstruct * ZFS pools from them. Record if any were found. */ for (unit = 0; unit < userboot_disk_maxunit; unit++) { sprintf(devname, "disk%d:", unit); pool_guid = 0; zfs_probe_dev(devname, &pool_guid); if (pool_guid != 0) userboot_zfs_found = 1; } } #endif COMMAND_SET(quit, "quit", "exit the loader", command_quit); static int command_quit(int argc, char *argv[]) { exit(USERBOOT_EXIT_QUIT); return (CMD_OK); } COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot); static int command_reboot(int argc, char *argv[]) { exit(USERBOOT_EXIT_REBOOT); return (CMD_OK); }