diff --git a/stand/common/bootstrap.h b/stand/common/bootstrap.h index cc22cbcb729d..80c1a2352b3c 100644 --- a/stand/common/bootstrap.h +++ b/stand/common/bootstrap.h @@ -1,393 +1,390 @@ /*- * Copyright (c) 1998 Michael Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _BOOTSTRAP_H_ #define _BOOTSTRAP_H_ #include #include #include #include #include #include "readin.h" /* Commands and return values; nonzero return sets command_errmsg != NULL */ typedef int (bootblk_cmd_t)(int argc, char *argv[]); #define COMMAND_ERRBUFSZ (256) extern const char *command_errmsg; extern char command_errbuf[COMMAND_ERRBUFSZ]; #define CMD_OK 0 #define CMD_WARN 1 #define CMD_ERROR 2 #define CMD_CRIT 3 #define CMD_FATAL 4 /* interp.c */ void interact(void); void interp_emit_prompt(void); int interp_builtin_cmd(int argc, char *argv[]); bool interp_has_builtin_cmd(const char *cmd); /* Called by interp.c for interp_*.c embedded interpreters */ int interp_include(const char *); /* Execute commands from filename */ void interp_init(void); /* Initialize interpreater */ int interp_run(const char *); /* Run a single command */ /* interp_backslash.c */ char *backslash(const char *str); /* interp_parse.c */ int parse(int *argc, char ***argv, const char *str); /* boot.c */ void autoboot_maybe(void); int getrootmount(char *rootdev); /* misc.c */ char *unargv(int argc, char *argv[]); size_t strlenout(vm_offset_t str); char *strdupout(vm_offset_t str); void kern_bzero(vm_offset_t dest, size_t len); int kern_pread(readin_handle_t fd, vm_offset_t dest, size_t len, off_t off); void *alloc_pread(readin_handle_t fd, off_t off, size_t len); /* bcache.c */ void bcache_init(size_t nblks, size_t bsize); void bcache_add_dev(int); void *bcache_allocate(void); void bcache_free(void *); int bcache_strategy(void *devdata, int rw, daddr_t blk, size_t size, char *buf, size_t *rsize); /* * Disk block cache */ struct bcache_devdata { int (*dv_strategy)(void *, int, daddr_t, size_t, char *, size_t *); void *dv_devdata; void *dv_cache; }; /* * Modular console support. */ struct console { const char *c_name; const char *c_desc; int c_flags; #define C_PRESENTIN (1<<0) /* console can provide input */ #define C_PRESENTOUT (1<<1) /* console can provide output */ #define C_ACTIVEIN (1<<2) /* user wants input from console */ #define C_ACTIVEOUT (1<<3) /* user wants output to console */ #define C_WIDEOUT (1<<4) /* c_out routine groks wide chars */ /* set c_flags to match hardware */ void (* c_probe)(struct console *cp); /* reinit XXX may need more args */ int (* c_init)(int arg); /* emit c */ void (* c_out)(int c); /* wait for and return input */ int (* c_in)(void); /* return nonzero if input waiting */ int (* c_ready)(void); }; extern struct console *consoles[]; void cons_probe(void); bool cons_update_mode(bool); void autoload_font(bool); extern int module_verbose; enum { MODULE_VERBOSE_SILENT, /* say nothing */ MODULE_VERBOSE_SIZE, /* print name and size */ MODULE_VERBOSE_TWIDDLE, /* show progress */ MODULE_VERBOSE_FULL, /* all we have */ }; /* * Plug-and-play enumerator/configurator interface. */ struct pnphandler { const char *pp_name; /* handler/bus name */ void (*pp_enumerate)(void); /* enumerate PnP devices, add to chain */ }; struct pnpident { /* ASCII identifier, actual format varies with bus/handler */ char *id_ident; STAILQ_ENTRY(pnpident) id_link; }; struct pnpinfo { char *pi_desc; /* ASCII description, optional */ int pi_revision; /* optional revision (or -1) if not supported */ char *pi_module; /* module/args nominated to handle device */ int pi_argc; /* module arguments */ char **pi_argv; struct pnphandler *pi_handler; /* handler which detected this device */ STAILQ_HEAD(, pnpident) pi_ident; /* list of identifiers */ STAILQ_ENTRY(pnpinfo) pi_link; }; STAILQ_HEAD(pnpinfo_stql, pnpinfo); extern struct pnphandler *pnphandlers[]; /* provided by MD code */ void pnp_addident(struct pnpinfo *pi, char *ident); struct pnpinfo *pnp_allocinfo(void); void pnp_freeinfo(struct pnpinfo *pi); void pnp_addinfo(struct pnpinfo *pi); char *pnp_eisaformat(uint8_t *data); /* * < 0 - No ISA in system * == 0 - Maybe ISA, search for read data port * > 0 - ISA in system, value is read data port address */ extern int isapnp_readport; /* * Version information */ extern char bootprog_info[]; /* * Interpreter information */ extern const char bootprog_interp[]; #define INTERP_DEFINE(interpstr) \ const char bootprog_interp[] = "$Interpreter:" interpstr /* * Preloaded file metadata header. * * Metadata are allocated on our heap, and copied into kernel space * before executing the kernel. */ struct file_metadata { size_t md_size; uint16_t md_type; vm_offset_t md_addr; /* Valid after copied to kernel space */ struct file_metadata *md_next; char md_data[1]; /* data are immediately appended */ }; struct preloaded_file; struct mod_depend; struct kernel_module { char *m_name; /* module name */ int m_version; /* module version */ /* char *m_args; */ /* arguments for the module */ struct preloaded_file *m_fp; struct kernel_module *m_next; }; /* * Preloaded file information. Depending on type, file can contain * additional units called 'modules'. * * At least one file (the kernel) must be loaded in order to boot. * The kernel is always loaded first. * * String fields (m_name, m_type) should be dynamically allocated. */ struct preloaded_file { char *f_name; /* file name */ char *f_type; /* verbose file type, eg 'ELF kernel', 'pnptable', etc. */ char *f_args; /* arguments for the file */ /* metadata that will be placed in the module directory */ struct file_metadata *f_metadata; int f_loader; /* index of the loader that read the file */ vm_offset_t f_addr; /* load address */ size_t f_size; /* file size */ struct kernel_module *f_modules; /* list of modules if any */ struct preloaded_file *f_next; /* next file */ #ifdef __amd64__ bool f_kernphys_relocatable; #endif #if defined(__i386__) bool f_tg_kernel_support; #endif }; struct file_format { /* * Load function must return EFTYPE if it can't handle * the module supplied */ int (*l_load)(char *, uint64_t, struct preloaded_file **); /* * Only a loader that will load a kernel (first module) * should have an exec handler */ int (*l_exec)(struct preloaded_file *); }; extern struct file_format *file_formats[]; /* supplied by consumer */ extern struct preloaded_file *preloaded_files; int mod_load(char *name, struct mod_depend *verinfo, int argc, char *argv[]); int mod_loadkld(const char *name, int argc, char *argv[]); void unload(void); struct preloaded_file *file_alloc(void); struct preloaded_file *file_findfile(const char *name, const char *type); struct file_metadata *file_findmetadata(struct preloaded_file *fp, int type); struct preloaded_file *file_loadraw(const char *name, char *type, int insert); void file_discard(struct preloaded_file *fp); void file_addmetadata(struct preloaded_file *, int, size_t, void *); int file_addmodule(struct preloaded_file *, char *, int, struct kernel_module **); void file_removemetadata(struct preloaded_file *fp); int file_addbuf(const char *name, const char *type, size_t len, void *buf); int tslog_init(void); int tslog_publish(void); vm_offset_t build_font_module(vm_offset_t); /* MI module loaders */ #ifdef __elfN /* Relocation types. */ #define ELF_RELOC_REL 1 #define ELF_RELOC_RELA 2 /* Relocation offset for some architectures */ extern uint64_t __elfN(relocation_offset); struct elf_file; typedef Elf_Addr (symaddr_fn)(struct elf_file *ef, Elf_Size symidx); int __elfN(loadfile)(char *, uint64_t, struct preloaded_file **); int __elfN(obj_loadfile)(char *, uint64_t, struct preloaded_file **); int __elfN(reloc)(struct elf_file *ef, symaddr_fn *symaddr, const void *reldata, int reltype, Elf_Addr relbase, Elf_Addr dataaddr, void *data, size_t len); int __elfN(loadfile_raw)(char *, uint64_t, struct preloaded_file **, int); int __elfN(load_modmetadata)(struct preloaded_file *, uint64_t); #endif /* * Support for commands */ struct bootblk_command { const char *c_name; const char *c_desc; bootblk_cmd_t *c_fn; }; #define COMMAND_SET(tag, key, desc, func) \ static bootblk_cmd_t func; \ static struct bootblk_command _cmd_ ## tag = { key, desc, func }; \ DATA_SET(Xcommand_set, _cmd_ ## tag) SET_DECLARE(Xcommand_set, struct bootblk_command); /* * The intention of the architecture switch is to provide a convenient * encapsulation of the interface between the bootstrap MI and MD code. * MD code may selectively populate the switch at runtime based on the * actual configuration of the target system. */ struct arch_switch { /* Automatically load modules as required by detected hardware */ int (*arch_autoload)(void); /* Locate the device for (name), return pointer to tail in (*path) */ int (*arch_getdev)(void **dev, const char *name, const char **path); /* * Copy from local address space to module address space, * similar to bcopy() */ ssize_t (*arch_copyin)(const void *, vm_offset_t, const size_t); /* * Copy to local address space from module address space, * similar to bcopy() */ ssize_t (*arch_copyout)(const vm_offset_t, void *, const size_t); /* Read from file to module address space, same semantics as read() */ ssize_t (*arch_readin)(readin_handle_t, vm_offset_t, const size_t); /* Perform ISA byte port I/O (only for systems with ISA) */ int (*arch_isainb)(int port); void (*arch_isaoutb)(int port, int value); /* * Interface to adjust the load address according to the "object" * being loaded. */ uint64_t (*arch_loadaddr)(u_int type, void *data, uint64_t addr); #define LOAD_ELF 1 /* data points to the ELF header. */ #define LOAD_RAW 2 /* data points to the file name. */ /* * Interface to inform MD code about a loaded (ELF) segment. This * can be used to flush caches and/or set up translations. */ #ifdef __elfN void (*arch_loadseg)(Elf_Ehdr *eh, Elf_Phdr *ph, uint64_t delta); #else void (*arch_loadseg)(void *eh, void *ph, uint64_t delta); #endif /* Probe ZFS pool(s), if needed. */ void (*arch_zfs_probe)(void); /* Return the hypervisor name/type or NULL if not virtualized. */ const char *(*arch_hypervisor)(void); - - /* For kexec-type loaders, get ksegment structure */ - void (*arch_kexec_kseg_get)(int *nseg, void **kseg); }; extern struct arch_switch archsw; /* This must be provided by the MD code, but should it be in the archsw? */ void delay(int delay); /* common code to set currdev variable. */ int gen_setcurrdev(struct env_var *ev, int flags, const void *value); int mount_currdev(struct env_var *, int, const void *); void set_currdev(const char *devname); #ifndef CTASSERT #define CTASSERT(x) _Static_assert(x, "compile-time assertion failed") #endif #endif /* !_BOOTSTRAP_H_ */ diff --git a/stand/kboot/arch/aarch64/exec.c b/stand/kboot/arch/aarch64/exec.c index b0cb2fcbb531..521e28beb562 100644 --- a/stand/kboot/arch/aarch64/exec.c +++ b/stand/kboot/arch/aarch64/exec.c @@ -1,298 +1,296 @@ /*- * Copyright (c) 2006 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #ifdef EFI #include #include #include "loader_efi.h" #else #include "host_syscall.h" #endif #include #include "bootstrap.h" #include "kboot.h" #include "bootstrap.h" #include "platform/acfreebsd.h" #include "acconfig.h" #define ACPI_SYSTEM_XFACE #include "actypes.h" #include "actbl.h" #include "cache.h" #ifndef EFI #define LOADER_PAGE_SIZE PAGE_SIZE #endif #ifdef EFI static EFI_GUID acpi_guid = ACPI_TABLE_GUID; static EFI_GUID acpi20_guid = ACPI_20_TABLE_GUID; #endif static int elf64_exec(struct preloaded_file *amp); static int elf64_obj_exec(struct preloaded_file *amp); bool do_mem_map = false; extern uint32_t efi_map_size; extern vm_paddr_t efi_map_phys_src; /* From DTB */ extern vm_paddr_t efi_map_phys_dst; /* From our memory map metadata module */ int bi_load(char *args, vm_offset_t *modulep, vm_offset_t *kernendp, bool exit_bs); static struct file_format arm64_elf = { elf64_loadfile, elf64_exec }; struct file_format *file_formats[] = { &arm64_elf, NULL }; #ifndef EFI extern uintptr_t tramp; extern uint32_t tramp_size; extern uint32_t tramp_data_offset; struct trampoline_data { uint64_t entry; // 0 (PA where kernel loaded) uint64_t modulep; // 8 module metadata uint64_t memmap_src; // 16 Linux-provided memory map PA uint64_t memmap_dst; // 24 Module data copy PA uint64_t memmap_len; // 32 Length to copy }; #endif extern vm_offset_t kboot_get_phys_load_segment(void); static int elf64_exec(struct preloaded_file *fp) { vm_offset_t modulep, kernendp; #ifdef EFI vm_offset_t clean_addr; size_t clean_size; void (*entry)(vm_offset_t); #else vm_offset_t trampolinebase; vm_offset_t staging; void *trampcode; uint64_t *trampoline; struct trampoline_data *trampoline_data; int nseg; void *kseg; #endif struct file_metadata *md; Elf_Ehdr *ehdr; int error; #ifdef EFI ACPI_TABLE_RSDP *rsdp; char buf[24]; int revision; /* * Report the RSDP to the kernel. The old code used the 'hints' method * to communite this to the kernel. However, while convenient, the * 'hints' method is fragile and does not work when static hints are * compiled into the kernel. Instead, move to setting different tunables * that start with acpi. The old 'hints' can be removed before we branch * for FreeBSD 15. */ rsdp = efi_get_table(&acpi20_guid); if (rsdp == NULL) { rsdp = efi_get_table(&acpi_guid); } if (rsdp != NULL) { sprintf(buf, "0x%016llx", (unsigned long long)rsdp); setenv("hint.acpi.0.rsdp", buf, 1); setenv("acpi.rsdp", buf, 1); revision = rsdp->Revision; if (revision == 0) revision = 1; sprintf(buf, "%d", revision); setenv("hint.acpi.0.revision", buf, 1); setenv("acpi.revision", buf, 1); strncpy(buf, rsdp->OemId, sizeof(rsdp->OemId)); buf[sizeof(rsdp->OemId)] = '\0'; setenv("hint.acpi.0.oem", buf, 1); setenv("acpi.oem", buf, 1); sprintf(buf, "0x%016x", rsdp->RsdtPhysicalAddress); setenv("hint.acpi.0.rsdt", buf, 1); setenv("acpi.rsdt", buf, 1); if (revision >= 2) { /* XXX extended checksum? */ sprintf(buf, "0x%016llx", (unsigned long long)rsdp->XsdtPhysicalAddress); setenv("hint.acpi.0.xsdt", buf, 1); setenv("acpi.xsdt", buf, 1); sprintf(buf, "%d", rsdp->Length); setenv("hint.acpi.0.xsdt_length", buf, 1); setenv("acpi.xsdt_length", buf, 1); } } #else vm_offset_t rsdp; rsdp = acpi_rsdp(); if (rsdp != 0) { char buf[24]; printf("Found ACPI 2.0 at %#016lx\n", rsdp); sprintf(buf, "0x%016llx", (unsigned long long)rsdp); setenv("hint.acpi.0.rsdp", buf, 1); /* For 13.1R bootability */ setenv("acpi.rsdp", buf, 1); /* Nobody uses the rest of that stuff */ } // XXX Question: why not just use malloc? trampcode = host_getmem(LOADER_PAGE_SIZE); if (trampcode == NULL) { printf("Unable to allocate trampoline\n"); return (ENOMEM); } bzero((void *)trampcode, LOADER_PAGE_SIZE); bcopy((void *)&tramp, (void *)trampcode, tramp_size); trampoline = (void *)trampcode; /* * Figure out where to put it. * * Linux does not allow us to kexec_load into any part of memory. Ask * arch_loadaddr to resolve the first available chunk of physical memory * where loading is possible (load_addr). * * The kernel is loaded at the 'base' address in continguous physical * memory. We use the 2MB in front of the kernel as a place to put our * trampoline, but that's really overkill since we only need ~100 bytes. * The arm64 kernel's entry requirements are only 'load the kernel at a * 2MB alignment' and it figures out the rest, creates the right page * tables, etc. */ staging = kboot_get_phys_load_segment(); printf("Load address at %#jx\n", (uintmax_t)staging); printf("Relocation offset is %#jx\n", (uintmax_t)elf64_relocation_offset); #endif if ((md = file_findmetadata(fp, MODINFOMD_ELFHDR)) == NULL) return(EFTYPE); ehdr = (Elf_Ehdr *)&(md->md_data); #ifdef EFI entry = efi_translate(ehdr->e_entry); efi_time_fini(); #endif error = bi_load(fp->f_args, &modulep, &kernendp, true); if (error != 0) { #ifdef EFI efi_time_init(); #endif return (error); } dev_cleanup(); #ifdef EFI /* Clean D-cache under kernel area and invalidate whole I-cache */ clean_addr = (vm_offset_t)efi_translate(fp->f_addr); clean_size = (vm_offset_t)efi_translate(kernendp) - clean_addr; cpu_flush_dcache((void *)clean_addr, clean_size); cpu_inval_icache(); (*entry)(modulep); #else /* Linux will flush the caches, just pass this data into our trampoline and go */ trampoline_data = (void *)trampoline + tramp_data_offset; memset(trampoline_data, 0, sizeof(*trampoline_data)); trampoline_data->entry = ehdr->e_entry - fp->f_addr + staging; trampoline_data->modulep = modulep; printf("Modulep = %jx\n", (uintmax_t)modulep); if (efi_map_phys_src != 0) { md = file_findmetadata(fp, MODINFOMD_EFI_MAP); if (md == NULL || md->md_addr == 0) { printf("Need to copy EFI MAP, but EFI MAP not found. %p\n", md); } else { printf("Metadata EFI map loaded at VA %lx\n", md->md_addr); efi_map_phys_dst = md->md_addr + staging + roundup2(sizeof(struct efi_map_header), 16) - fp->f_addr; trampoline_data->memmap_src = efi_map_phys_src; trampoline_data->memmap_dst = efi_map_phys_dst; trampoline_data->memmap_len = efi_map_size - roundup2(sizeof(struct efi_map_header), 16); printf("Copying UEFI Memory Map data from %#lx to %#lx %ld bytes\n", efi_map_phys_src, trampoline_data->memmap_dst, trampoline_data->memmap_len); } } /* * Copy the trampoline to the ksegs. Since we're just bouncing off of * this into the kernel, no need to preserve the pages. On arm64, the * kernel sets up the initial page table, so we don't have to preserve * the memory used for the trampoline past when it calls the kernel. */ printf("kernendp = %#llx\n", (long long)kernendp); trampolinebase = staging + (kernendp - fp->f_addr); printf("trampolinebase = %#llx\n", (long long)trampolinebase); archsw.arch_copyin((void *)trampcode, kernendp, tramp_size); printf("Trampoline bouncing to %#llx\n", (long long)trampoline_data->entry); - if (archsw.arch_kexec_kseg_get == NULL) - panic("architecture did not provide kexec segment mapping"); - archsw.arch_kexec_kseg_get(&nseg, &kseg); + kboot_kseg_get(&nseg, &kseg); error = host_kexec_load(trampolinebase, nseg, kseg, HOST_KEXEC_ARCH_AARCH64); if (error != 0) panic("kexec_load returned error: %d", error); host_reboot(HOST_REBOOT_MAGIC1, HOST_REBOOT_MAGIC2, HOST_REBOOT_CMD_KEXEC, 0); #endif panic("exec returned"); } static int elf64_obj_exec(struct preloaded_file *fp) { printf("%s called for preloaded file %p (=%s):\n", __func__, fp, fp->f_name); return (ENOSYS); } diff --git a/stand/kboot/arch/amd64/elf64_freebsd.c b/stand/kboot/arch/amd64/elf64_freebsd.c index 68588c0f2f02..0d950fb61eb6 100644 --- a/stand/kboot/arch/amd64/elf64_freebsd.c +++ b/stand/kboot/arch/amd64/elf64_freebsd.c @@ -1,466 +1,464 @@ /*- * Copyright (c) 1998 Michael Smith * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define __ELF_WORD_SIZE 64 #include #include #include #include #include #include #include #include #ifdef EFI #include #include #else #include "host_syscall.h" #endif #include "bootstrap.h" #include "kboot.h" #include "platform/acfreebsd.h" #include "acconfig.h" #define ACPI_SYSTEM_XFACE #include "actypes.h" #include "actbl.h" #ifdef EFI #include "loader_efi.h" static EFI_GUID acpi_guid = ACPI_TABLE_GUID; static EFI_GUID acpi20_guid = ACPI_20_TABLE_GUID; #endif #ifdef EFI #define LOADER_PAGE_SIZE EFI_PAGE_SIZE #else #define LOADER_PAGE_SIZE PAGE_SIZE #endif extern vm_offset_t kboot_get_phys_load_segment(void); extern int bi_load(char *args, vm_offset_t *modulep, vm_offset_t *kernendp, bool exit_bs); static int elf64_exec(struct preloaded_file *amp); static int elf64_obj_exec(struct preloaded_file *amp); static struct file_format amd64_elf = { .l_load = elf64_loadfile, .l_exec = elf64_exec, }; static struct file_format amd64_elf_obj = { .l_load = elf64_obj_loadfile, .l_exec = elf64_obj_exec, }; #ifdef EFI extern struct file_format multiboot2; extern struct file_format multiboot2_obj; #endif struct file_format *file_formats[] = { #ifdef EFI &multiboot2, &multiboot2_obj, #endif &amd64_elf, &amd64_elf_obj, NULL }; #ifndef EFI /* * We create the stack that we want. We have the address of the page tables * we make on top (so we pop that off and set %cr3). We have the entry point * to the kernel (which retq pops off) This leaves the stack that the btext * wants: offset 4 is modulep and offset8 is kernend, with the filler bytes * to keep this aligned. This makes the trampoline very simple. */ struct trampoline_data { uint64_t pt4; // Page table address to pop uint64_t entry; // return address to jump to kernel uint32_t fill1; // 0 uint32_t modulep; // 4 module metadata uint32_t kernend; // 8 kernel end uint32_t fill2; // 12 }; _Static_assert(sizeof(struct trampoline_data) == 32, "Bad size for trampoline data"); #endif static pml4_entry_t *PT4; static pdp_entry_t *PT3_l, *PT3_u; static pd_entry_t *PT2_l0, *PT2_l1, *PT2_l2, *PT2_l3, *PT2_u0, *PT2_u1; #ifdef EFI static pdp_entry_t *PT3; static pd_entry_t *PT2; extern EFI_PHYSICAL_ADDRESS staging; static void (*trampoline)(uint64_t stack, void *copy_finish, uint64_t kernend, uint64_t modulep, pml4_entry_t *pagetable, uint64_t entry); #endif extern uintptr_t tramp; extern uint32_t tramp_size; #ifndef EFI extern uint32_t tramp_data_offset; #endif /* * There is an ELF kernel and one or more ELF modules loaded. * We wish to start executing the kernel image, so make such * preparations as are required, and do so. */ static int elf64_exec(struct preloaded_file *fp) { struct file_metadata *md; Elf_Ehdr *ehdr; vm_offset_t modulep, kernend; int err, i; char buf[24]; #ifdef EFI ACPI_TABLE_RSDP *rsdp = NULL; int revision; int copy_auto; vm_offset_t trampstack, trampcode; #else vm_offset_t rsdp = 0; void *trampcode; int nseg; void *kseg; vm_offset_t trampolinebase; uint64_t *trampoline; struct trampoline_data *trampoline_data; vm_offset_t staging; int error; #endif #ifdef EFI copy_auto = copy_staging == COPY_STAGING_AUTO; if (copy_auto) copy_staging = fp->f_kernphys_relocatable ? COPY_STAGING_DISABLE : COPY_STAGING_ENABLE; #else /* * Figure out where to put it. * * Linux does not allow to do kexec_load into any part of memory. Ask * arch_loadaddr to resolve the first available chunk of physical memory * where loading is possible (load_addr). * * The kernel is loaded at the 'base' address in continguous physical * pages (using 2MB super pages). The first such page is unused by the * kernel and serves as a good place to put not only the trampoline, but * the page table pages that the trampoline needs to setup the proper * kernel starting environment. */ staging = trampolinebase = kboot_get_phys_load_segment(); trampolinebase += 1ULL << 20; /* Copy trampoline to base + 1MB, kernel will wind up at 2MB */ printf("Load address at %#jx\n", (uintmax_t)trampolinebase); printf("Relocation offset is %#jx\n", (uintmax_t)elf64_relocation_offset); #endif /* * Report the RSDP to the kernel. While this can be found with * a BIOS boot, the RSDP may be elsewhere when booted from UEFI. */ #ifdef EFI rsdp = efi_get_table(&acpi20_guid); if (rsdp == NULL) { rsdp = efi_get_table(&acpi_guid); } #else rsdp = acpi_rsdp(); #endif if (rsdp != 0) { sprintf(buf, "0x%016llx", (unsigned long long)rsdp); setenv("acpi.rsdp", buf, 1); } if ((md = file_findmetadata(fp, MODINFOMD_ELFHDR)) == NULL) return (EFTYPE); ehdr = (Elf_Ehdr *)&(md->md_data); #ifdef EFI trampcode = copy_staging == COPY_STAGING_ENABLE ? (vm_offset_t)0x0000000040000000 /* 1G */ : (vm_offset_t)0x0000000100000000; /* 4G */; err = BS->AllocatePages(AllocateMaxAddress, EfiLoaderData, 1, (EFI_PHYSICAL_ADDRESS *)&trampcode); if (EFI_ERROR(err)) { printf("Unable to allocate trampoline\n"); if (copy_auto) copy_staging = COPY_STAGING_AUTO; return (ENOMEM); } trampstack = trampcode + LOADER_PAGE_SIZE - 8; #else // XXX Question: why not just use malloc? trampcode = host_getmem(LOADER_PAGE_SIZE); if (trampcode == NULL) { printf("Unable to allocate trampoline\n"); return (ENOMEM); } #endif bzero((void *)trampcode, LOADER_PAGE_SIZE); bcopy((void *)&tramp, (void *)trampcode, tramp_size); trampoline = (void *)trampcode; #ifdef EFI if (copy_staging == COPY_STAGING_ENABLE) { PT4 = (pml4_entry_t *)0x0000000040000000; err = BS->AllocatePages(AllocateMaxAddress, EfiLoaderData, 3, (EFI_PHYSICAL_ADDRESS *)&PT4); if (EFI_ERROR(err)) { printf("Unable to allocate trampoline page table\n"); BS->FreePages(trampcode, 1); if (copy_auto) copy_staging = COPY_STAGING_AUTO; return (ENOMEM); } bzero(PT4, 3 * LOADER_PAGE_SIZE); PT3 = &PT4[512]; PT2 = &PT3[512]; /* * This is kinda brutal, but every single 1GB VM * memory segment points to the same first 1GB of * physical memory. But it is more than adequate. */ for (i = 0; i < NPTEPG; i++) { /* * Each slot of the L4 pages points to the * same L3 page. */ PT4[i] = (pml4_entry_t)PT3; PT4[i] |= PG_V | PG_RW; /* * Each slot of the L3 pages points to the * same L2 page. */ PT3[i] = (pdp_entry_t)PT2; PT3[i] |= PG_V | PG_RW; /* * The L2 page slots are mapped with 2MB pages for 1GB. */ PT2[i] = (pd_entry_t)i * (2 * 1024 * 1024); PT2[i] |= PG_V | PG_RW | PG_PS; } } else { PT4 = (pml4_entry_t *)0x0000000100000000; /* 4G */ err = BS->AllocatePages(AllocateMaxAddress, EfiLoaderData, 9, (EFI_PHYSICAL_ADDRESS *)&PT4); if (EFI_ERROR(err)) { printf("Unable to allocate trampoline page table\n"); BS->FreePages(trampcode, 9); if (copy_auto) copy_staging = COPY_STAGING_AUTO; return (ENOMEM); } bzero(PT4, 9 * LOADER_PAGE_SIZE); PT3_l = &PT4[NPML4EPG * 1]; PT3_u = &PT4[NPML4EPG * 2]; PT2_l0 = &PT4[NPML4EPG * 3]; PT2_l1 = &PT4[NPML4EPG * 4]; PT2_l2 = &PT4[NPML4EPG * 5]; PT2_l3 = &PT4[NPML4EPG * 6]; PT2_u0 = &PT4[NPML4EPG * 7]; PT2_u1 = &PT4[NPML4EPG * 8]; /* 1:1 mapping of lower 4G */ PT4[0] = (pml4_entry_t)PT3_l | PG_V | PG_RW; PT3_l[0] = (pdp_entry_t)PT2_l0 | PG_V | PG_RW; PT3_l[1] = (pdp_entry_t)PT2_l1 | PG_V | PG_RW; PT3_l[2] = (pdp_entry_t)PT2_l2 | PG_V | PG_RW; PT3_l[3] = (pdp_entry_t)PT2_l3 | PG_V | PG_RW; for (i = 0; i < 4 * NPDEPG; i++) { PT2_l0[i] = ((pd_entry_t)i << PDRSHIFT) | PG_V | PG_RW | PG_PS; } /* mapping of kernel 2G below top */ PT4[NPML4EPG - 1] = (pml4_entry_t)PT3_u | PG_V | PG_RW; PT3_u[NPDPEPG - 2] = (pdp_entry_t)PT2_u0 | PG_V | PG_RW; PT3_u[NPDPEPG - 1] = (pdp_entry_t)PT2_u1 | PG_V | PG_RW; /* compat mapping of phys @0 */ PT2_u0[0] = PG_PS | PG_V | PG_RW; /* this maps past staging area */ for (i = 1; i < 2 * NPDEPG; i++) { PT2_u0[i] = ((pd_entry_t)staging + ((pd_entry_t)i - 1) * NBPDR) | PG_V | PG_RW | PG_PS; } } #else { vm_offset_t pabase, pa_pt3_l, pa_pt3_u, pa_pt2_l0, pa_pt2_l1, pa_pt2_l2, pa_pt2_l3, pa_pt2_u0, pa_pt2_u1; /* We'll find a place for these later */ PT4 = (pml4_entry_t *)host_getmem(9 * LOADER_PAGE_SIZE); bzero(PT4, 9 * LOADER_PAGE_SIZE); PT3_l = &PT4[NPML4EPG * 1]; PT3_u = &PT4[NPML4EPG * 2]; PT2_l0 = &PT4[NPML4EPG * 3]; PT2_l1 = &PT4[NPML4EPG * 4]; PT2_l2 = &PT4[NPML4EPG * 5]; PT2_l3 = &PT4[NPML4EPG * 6]; PT2_u0 = &PT4[NPML4EPG * 7]; PT2_u1 = &PT4[NPML4EPG * 8]; pabase = trampolinebase + LOADER_PAGE_SIZE; pa_pt3_l = pabase + LOADER_PAGE_SIZE * 1; pa_pt3_u = pabase + LOADER_PAGE_SIZE * 2; pa_pt2_l0 = pabase + LOADER_PAGE_SIZE * 3; pa_pt2_l1 = pabase + LOADER_PAGE_SIZE * 4; pa_pt2_l2 = pabase + LOADER_PAGE_SIZE * 5; pa_pt2_l3 = pabase + LOADER_PAGE_SIZE * 6; pa_pt2_u0 = pabase + LOADER_PAGE_SIZE * 7; pa_pt2_u1 = pabase + LOADER_PAGE_SIZE * 8; /* 1:1 mapping of lower 4G */ PT4[0] = (pml4_entry_t)pa_pt3_l | PG_V | PG_RW; PT3_l[0] = (pdp_entry_t)pa_pt2_l0 | PG_V | PG_RW; PT3_l[1] = (pdp_entry_t)pa_pt2_l1 | PG_V | PG_RW; PT3_l[2] = (pdp_entry_t)pa_pt2_l2 | PG_V | PG_RW; PT3_l[3] = (pdp_entry_t)pa_pt2_l3 | PG_V | PG_RW; for (i = 0; i < 4 * NPDEPG; i++) { /* we overflow PT2_l0 into _l1, etc */ PT2_l0[i] = ((pd_entry_t)i << PDRSHIFT) | PG_V | PG_RW | PG_PS; } /* mapping of kernel 2G below top */ PT4[NPML4EPG - 1] = (pml4_entry_t)pa_pt3_u | PG_V | PG_RW; PT3_u[NPDPEPG - 2] = (pdp_entry_t)pa_pt2_u0 | PG_V | PG_RW; PT3_u[NPDPEPG - 1] = (pdp_entry_t)pa_pt2_u1 | PG_V | PG_RW; /* compat mapping of phys @0 */ PT2_u0[0] = PG_PS | PG_V | PG_RW; /* this maps past staging area */ /* * Kernel uses the KERNSTART (== KERNBASE + 2MB) entry to figure * out where we loaded the kernel. This is PT2_u0[1] (since * these map 2MB pages. So the PA that this maps has to be * kboot's staging + 2MB. For UEFI we do 'i - 1' since we load * the kernel right at staging (and assume the first address we * load is 2MB in efi_copyin). However for kboot, staging + 1 * * NBPDR == staging + 2MB which is where the kernel starts. Our * trampoline need not be mapped into the kernel space since we * execute PA==VA for that, and the trampoline can just go away * once the kernel is called. * * Staging should likely be as low as possible, though, because * all the 'early' allocations are at kernend (which the kernel * calls physfree). */ for (i = 1; i < 2 * NPDEPG; i++) { /* we overflow PT2_u0 into _u1 */ PT2_u0[i] = ((pd_entry_t)staging + ((pd_entry_t)i) * NBPDR) | PG_V | PG_RW | PG_PS; if (i < 10) printf("Mapping %d to %#lx staging %#lx\n", i, PT2_u0[i], staging); } } #endif #ifdef EFI printf("staging %#lx (%scopying) tramp %p PT4 %p\n", staging, copy_staging == COPY_STAGING_ENABLE ? "" : "not ", trampoline, PT4); #else printf("staging %#lx tramp %p PT4 %p\n", staging, (void *)trampolinebase, (void *)trampolinebase + LOADER_PAGE_SIZE); #endif printf("Start @ 0x%lx ...\n", ehdr->e_entry); #ifdef EFI efi_time_fini(); #endif err = bi_load(fp->f_args, &modulep, &kernend, true); if (err != 0) { #ifdef EFI efi_time_init(); if (copy_auto) copy_staging = COPY_STAGING_AUTO; #endif return (err); } dev_cleanup(); #ifdef EFI trampoline(trampstack, copy_staging == COPY_STAGING_ENABLE ? efi_copy_finish : efi_copy_finish_nop, kernend, modulep, PT4, ehdr->e_entry); #else trampoline_data = (void *)trampoline + tramp_data_offset; trampoline_data->entry = ehdr->e_entry; trampoline_data->pt4 = trampolinebase + LOADER_PAGE_SIZE; /* * So we compute the VA of the module data by modulep + KERNBASE.... * need to make sure that that address is mapped right. We calculate * the start of available memory to allocate via kernend (which is * calculated with a phyaddr of "kernend + PA(PT_u0[1])"), so we better * make sure we're not overwriting the last 2MB of the kernel :). */ trampoline_data->modulep = modulep; /* Offset from KERNBASE */ trampoline_data->kernend = kernend; /* Offset from the load address */ trampoline_data->fill1 = trampoline_data->fill2 = 0; printf("Modulep = %lx kernend %lx\n", modulep, kernend); /* NOTE: when copyting in, it's relative to the start of our 'area' not an abs addr */ /* Copy the trampoline to the ksegs */ archsw.arch_copyin((void *)trampcode, trampolinebase - staging, tramp_size); /* Copy the page table to the ksegs */ archsw.arch_copyin(PT4, trampoline_data->pt4 - staging, 9 * LOADER_PAGE_SIZE); - if (archsw.arch_kexec_kseg_get == NULL) - panic("architecture did not provide kexec segment mapping"); - archsw.arch_kexec_kseg_get(&nseg, &kseg); + kboot_kseg_get(&nseg, &kseg); error = host_kexec_load(trampolinebase, nseg, kseg, HOST_KEXEC_ARCH_X86_64); if (error != 0) panic("kexec_load returned error: %d", error); host_reboot(HOST_REBOOT_MAGIC1, HOST_REBOOT_MAGIC2, HOST_REBOOT_CMD_KEXEC, 0); #endif panic("exec returned"); } static int elf64_obj_exec(struct preloaded_file *fp) { return (EFTYPE); } diff --git a/stand/kboot/arch/powerpc64/ppc64_elf_freebsd.c b/stand/kboot/arch/powerpc64/ppc64_elf_freebsd.c index 3341771be09a..613186ab19cb 100644 --- a/stand/kboot/arch/powerpc64/ppc64_elf_freebsd.c +++ b/stand/kboot/arch/powerpc64/ppc64_elf_freebsd.c @@ -1,181 +1,180 @@ /*- * Copyright (c) 2001 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define __ELF_WORD_SIZE 64 #include #include #include #include #include #include #include "bootstrap.h" #include "syscall_nr.h" #include "host_syscall.h" #include "modinfo.h" +#include "kboot.h" extern char end[]; extern void *kerneltramp; extern size_t szkerneltramp; struct trampoline_data { uint32_t kernel_entry; uint32_t dtb; uint32_t phys_mem_offset; uint32_t of_entry; uint32_t mdp; uint32_t mdp_size; }; int ppc64_elf_loadfile(char *filename, uint64_t dest, struct preloaded_file **result) { int r; r = __elfN(loadfile)(filename, dest, result); if (r != 0) return (r); return (0); } int ppc64_elf_exec(struct preloaded_file *fp) { struct file_metadata *fmp; vm_offset_t mdp, dtb; Elf_Ehdr *e; int error; uint32_t *trampoline; uint64_t entry; uint64_t trampolinebase; struct trampoline_data *trampoline_data; int nseg; void *kseg; if ((fmp = file_findmetadata(fp, MODINFOMD_ELFHDR)) == NULL) { return(EFTYPE); } e = (Elf_Ehdr *)&fmp->md_data; /* * Figure out where to put it. * * Linux does not allow to do kexec_load into * any part of memory. Ask arch_loadaddr to * resolve the first available chunk of physical * memory where loading is possible (load_addr). * * Memory organization is shown below. * It is assumed, that text segment offset of * kernel ELF (KERNPHYSADDR) is non-zero, * which is true for PPC/PPC64 architectures, * where default is 0x100000. * * load_addr: trampoline code * load_addr + KERNPHYSADDR: kernel text segment */ trampolinebase = archsw.arch_loadaddr(LOAD_RAW, NULL, 0); printf("Load address at %#jx\n", (uintmax_t)trampolinebase); printf("Relocation offset is %#jx\n", (uintmax_t)elf64_relocation_offset); /* Set up loader trampoline */ trampoline = malloc(szkerneltramp); memcpy(trampoline, &kerneltramp, szkerneltramp); /* Parse function descriptor for ELFv1 kernels */ if ((e->e_flags & 3) == 2) entry = e->e_entry; else { archsw.arch_copyout(e->e_entry + elf64_relocation_offset, &entry, 8); entry = be64toh(entry); } /* * Placeholder for trampoline data is at trampolinebase + 0x08 * CAUTION: all data must be Big Endian */ trampoline_data = (void*)&trampoline[2]; trampoline_data->kernel_entry = htobe32(entry + elf64_relocation_offset); trampoline_data->phys_mem_offset = htobe32(0); trampoline_data->of_entry = htobe32(0); if ((error = md_load64(fp->f_args, &mdp, &dtb)) != 0) return (error); trampoline_data->dtb = htobe32(dtb); trampoline_data->mdp = htobe32(mdp); trampoline_data->mdp_size = htobe32(0xfb5d104d); printf("Kernel entry at %#jx (%#x) ...\n", entry, be32toh(trampoline_data->kernel_entry)); printf("DTB at %#x, mdp at %#x\n", be32toh(trampoline_data->dtb), be32toh(trampoline_data->mdp)); dev_cleanup(); archsw.arch_copyin(trampoline, trampolinebase, szkerneltramp); free(trampoline); - if (archsw.arch_kexec_kseg_get == NULL) - panic("architecture did not provide kexec segment mapping"); - archsw.arch_kexec_kseg_get(&nseg, &kseg); + kboot_kseg_get(&nseg, &kseg); error = host_kexec_load(trampolinebase, nseg, kseg, HOST_KEXEC_ARCH_PPC64); if (error != 0) panic("kexec_load returned error: %d", error); error = host_reboot(HOST_REBOOT_MAGIC1, HOST_REBOOT_MAGIC2, HOST_REBOOT_CMD_KEXEC, (uintptr_t)NULL); if (error != 0) panic("reboot returned error: %d", error); while (1) {} } struct file_format ppc_elf64 = { ppc64_elf_loadfile, ppc64_elf_exec }; /* * Sort formats so that those that can detect based on arguments rather than * reading the file first. */ struct file_format *file_formats[] = { &ppc_elf64, NULL }; diff --git a/stand/kboot/kboot.h b/stand/kboot/kboot.h index 4211f21adcb6..49e5dea25b12 100644 --- a/stand/kboot/kboot.h +++ b/stand/kboot/kboot.h @@ -1,55 +1,58 @@ /*- * Copyright (c) 2022, Netflix, Inc. * * SPDX-License-Identifier: BSD-2-Clause */ #ifndef KBOOT_H #define KBOOT_H #define DEVT_HOSTDISK 1234 struct memory_segments { uint64_t start; uint64_t end; uint64_t type; /* MD defined */ }; bool enumerate_memory_arch(void); struct preloaded_file; void bi_loadsmap(struct preloaded_file *kfp); bool has_acpi(void); vm_offset_t acpi_rsdp(void); void do_init(void); /* Per-platform fdt fixup */ void fdt_arch_fixups(void *fdtp); uint64_t kboot_get_phys_load_segment(void); uint8_t kboot_get_kernel_machine_bits(void); +/* main.c */ +void kboot_kseg_get(int *nseg, void **ptr); + /* hostdisk.c */ extern const char *hostfs_root; const char *hostdisk_gen_probe(void); void hostdisk_zfs_probe(void); bool hostdisk_zfs_find_default(void); /* seg.c */ #define SYSTEM_RAM 1 void init_avail(void); void need_avail(int n); void add_avail(uint64_t start, uint64_t end, uint64_t type); void remove_avail(uint64_t start, uint64_t end, uint64_t type); uint64_t first_avail(uint64_t align, uint64_t min_size, uint64_t type); void print_avail(void); bool populate_avail_from_iomem(void); uint64_t space_avail(uint64_t start); /* util.c */ bool file2str(const char *fn, char *buffer, size_t buflen); bool file2u64(const char *fn, uint64_t *val); #endif /* KBOOT_H */ diff --git a/stand/kboot/main.c b/stand/kboot/main.c index 6631cb38ae22..75c2d55c3f39 100644 --- a/stand/kboot/main.c +++ b/stand/kboot/main.c @@ -1,481 +1,479 @@ /*- * Copyright (C) 2010-2014 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "host_syscall.h" #include "kboot.h" #include "stand.h" struct arch_switch archsw; extern void *_end; int kboot_getdev(void **vdev, const char *devspec, const char **path); ssize_t kboot_copyin(const void *src, vm_offset_t dest, const size_t len); ssize_t kboot_copyout(vm_offset_t src, void *dest, const size_t len); ssize_t kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len); int kboot_autoload(void); -static void kboot_kseg_get(int *nseg, void **ptr); static void kboot_zfs_probe(void); extern int command_fdt_internal(int argc, char *argv[]); #define PA_INVAL (vm_offset_t)-1 static vm_offset_t pa_start = PA_INVAL; static vm_offset_t padding; static vm_offset_t offset; static uint64_t commit_limit; static uint64_t committed_as; static uint64_t mem_avail; static void memory_limits(void) { int fd; char buf[128]; /* * To properly size the slabs, we need to find how much memory we can * commit to using. commit_limit is the max, while commited_as is the * current total. We can use these later to allocate the largetst amount * of memory possible so we can support larger ram disks than we could * by using fixed segment sizes. We also grab the memory available so * we don't use more than 49% of that. */ fd = open("host:/proc/meminfo", O_RDONLY); if (fd != -1) { while (fgetstr(buf, sizeof(buf), fd) > 0) { if (strncmp(buf, "MemAvailable:", 13) == 0) { mem_avail = strtoll(buf + 13, NULL, 0); mem_avail <<= 10; /* Units are kB */ } else if (strncmp(buf, "CommitLimit:", 12) == 0) { commit_limit = strtoll(buf + 13, NULL, 0); commit_limit <<= 10; /* Units are kB */ } else if (strncmp(buf, "Committed_AS:", 13) == 0) { committed_as = strtoll(buf + 14, NULL, 0); committed_as <<= 10; /* Units are kB */ } } } printf("Commit limit: %lld Committed bytes %lld Available %lld\n", (long long)commit_limit, (long long)committed_as, (long long)mem_avail); close(fd); } /* * NB: getdev should likely be identical to this most places, except maybe * we should move to storing the length of the platform devdesc. */ int kboot_getdev(void **vdev, const char *devspec, const char **path) { struct devdesc **dev = (struct devdesc **)vdev; int rv; /* * If it looks like this is just a path and no device, go with the * current device. */ if (devspec == NULL || strchr(devspec, ':') == NULL) { if (((rv = devparse(dev, getenv("currdev"), NULL)) == 0) && (path != NULL)) *path = devspec; return (rv); } /* * Try to parse the device name off the beginning of the devspec */ return (devparse(dev, devspec, path)); } static int parse_args(int argc, const char **argv) { int howto = 0; /* * When run as init, sometimes argv[0] is a EFI-ESP path, other times * it's the name of the init program, and sometimes it's a placeholder * string, so we exclude it here. For the other args, look for DOS-like * and Unix-like absolte paths and exclude parsing it if we find that, * otherwise parse it as a command arg (so looking for '-X', 'foo' or * 'foo=bar'). This is a little different than EFI where it argv[0] * often times is the first argument passed in. There are cases when * linux-booting via EFI that we have the EFI path we used to run * bootXXX.efi as the arguments to init, so we need to exclude the paths * there as well. */ for (int i = 1; i < argc; i++) { if (argv[i][0] != '\\' && argv[i][0] != '/') { howto |= boot_parse_arg(argv[i]); } } return (howto); } static vm_offset_t rsdp; static vm_offset_t kboot_rsdp_from_efi(void) { char buffer[512 + 1]; char *walker, *ep; if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer))) return (0); /* Not an EFI system */ ep = buffer + strlen(buffer); walker = buffer; while (walker < ep) { if (strncmp("ACPI20=", walker, 7) == 0) return((vm_offset_t)strtoull(walker + 7, NULL, 0)); if (strncmp("ACPI=", walker, 5) == 0) return((vm_offset_t)strtoull(walker + 5, NULL, 0)); walker += strcspn(walker, "\n"); } return (0); } static void find_acpi(void) { rsdp = kboot_rsdp_from_efi(); #if 0 /* maybe for amd64 */ if (rsdp == 0) rsdp = find_rsdp_arch(); #endif } vm_offset_t acpi_rsdp(void) { return (rsdp); } bool has_acpi(void) { return rsdp != 0; } int main(int argc, const char **argv) { void *heapbase; const size_t heapsize = 64*1024*1024; const char *bootdev; archsw.arch_getdev = kboot_getdev; archsw.arch_copyin = kboot_copyin; archsw.arch_copyout = kboot_copyout; archsw.arch_readin = kboot_readin; archsw.arch_autoload = kboot_autoload; - archsw.arch_kexec_kseg_get = kboot_kseg_get; archsw.arch_zfs_probe = kboot_zfs_probe; /* Give us a sane world if we're running as init */ do_init(); /* * Setup the heap, 64MB is minimum for ZFS booting */ heapbase = host_getmem(heapsize); setheap(heapbase, heapbase + heapsize); /* Parse the command line args -- ignoring for now the console selection */ parse_args(argc, argv); /* * Set up console. */ cons_probe(); /* Initialize all the devices */ devinit(); bootdev = getenv("bootdev"); if (bootdev == NULL) bootdev = hostdisk_gen_probe(); if (bootdev == NULL) bootdev="zfs:"; hostfs_root = getenv("hostfs_root"); if (hostfs_root == NULL) hostfs_root = "/"; #if defined(LOADER_ZFS_SUPPORT) if (strcmp(bootdev, "zfs:") == 0) { /* * Pseudo device that says go find the right ZFS pool. This will be * the first pool that we find that passes the sanity checks (eg looks * like it might be vbootable) and sets currdev to the right thing based * on active BEs, etc */ hostdisk_zfs_find_default(); } else #endif { /* * Otherwise, honor what's on the command line. If we've been * given a specific ZFS partition, then we'll honor it w/o BE * processing that would otherwise pick a different snapshot to * boot than the default one in the pool. */ set_currdev(bootdev); } printf("Boot device: %s with hostfs_root %s\n", bootdev, hostfs_root); printf("\n%s", bootprog_info); setenv("LINES", "24", 1); memory_limits(); enumerate_memory_arch(); /* * Find acpi, if it exists */ find_acpi(); interact(); /* doesn't return */ return (0); } void exit(int code) { host_exit(code); __unreachable(); } void delay(int usecs) { struct host_timeval tvi, tv; uint64_t ti, t; host_gettimeofday(&tvi, NULL); ti = tvi.tv_sec*1000000 + tvi.tv_usec; do { host_gettimeofday(&tv, NULL); t = tv.tv_sec*1000000 + tv.tv_usec; } while (t < ti + usecs); } time_t getsecs(void) { struct host_timeval tv; host_gettimeofday(&tv, NULL); return (tv.tv_sec); } time_t time(time_t *tloc) { time_t rv; rv = getsecs(); if (tloc != NULL) *tloc = rv; return (rv); } struct host_kexec_segment loaded_segments[HOST_KEXEC_SEGMENT_MAX]; int nkexec_segments = 0; #define SEGALIGN (1ul<<20) static ssize_t get_phys_buffer(vm_offset_t dest, const size_t len, void **buf) { int i = 0; const size_t segsize = 64*1024*1024; if (nkexec_segments == HOST_KEXEC_SEGMENT_MAX) panic("Tried to load too many kexec segments"); for (i = 0; i < nkexec_segments; i++) { if (dest >= (vm_offset_t)loaded_segments[i].mem && dest < (vm_offset_t)loaded_segments[i].mem + loaded_segments[i].memsz) goto out; } loaded_segments[nkexec_segments].buf = host_getmem(segsize); loaded_segments[nkexec_segments].bufsz = segsize; loaded_segments[nkexec_segments].mem = (void *)rounddown2(dest,SEGALIGN); loaded_segments[nkexec_segments].memsz = segsize; i = nkexec_segments; nkexec_segments++; out: *buf = loaded_segments[i].buf + (dest - (vm_offset_t)loaded_segments[i].mem); return (min(len,loaded_segments[i].bufsz - (dest - (vm_offset_t)loaded_segments[i].mem))); } ssize_t kboot_copyin(const void *src, vm_offset_t dest, const size_t len) { ssize_t segsize, remainder; void *destbuf; if (pa_start == PA_INVAL) { pa_start = kboot_get_phys_load_segment(); // padding = 2 << 20; /* XXX amd64: revisit this when we make it work */ padding = 0; offset = dest; get_phys_buffer(pa_start, len, &destbuf); } remainder = len; do { segsize = get_phys_buffer(dest + pa_start + padding - offset, remainder, &destbuf); bcopy(src, destbuf, segsize); remainder -= segsize; src += segsize; dest += segsize; } while (remainder > 0); return (len); } ssize_t kboot_copyout(vm_offset_t src, void *dest, const size_t len) { ssize_t segsize, remainder; void *srcbuf; remainder = len; do { segsize = get_phys_buffer(src + pa_start + padding - offset, remainder, &srcbuf); bcopy(srcbuf, dest, segsize); remainder -= segsize; src += segsize; dest += segsize; } while (remainder > 0); return (len); } ssize_t kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len) { void *buf; size_t resid, chunk, get; ssize_t got; vm_offset_t p; p = dest; chunk = min(PAGE_SIZE, len); buf = malloc(chunk); if (buf == NULL) { printf("kboot_readin: buf malloc failed\n"); return (0); } for (resid = len; resid > 0; resid -= got, p += got) { get = min(chunk, resid); got = VECTX_READ(fd, buf, get); if (got <= 0) { if (got < 0) printf("kboot_readin: read failed\n"); break; } kboot_copyin(buf, p, got); } free (buf); return (len - resid); } int kboot_autoload(void) { return (0); } -static void +void kboot_kseg_get(int *nseg, void **ptr) { int a; printf("kseg_get: %d segments\n", nkexec_segments); printf("VA SZ PA MEMSZ\n"); printf("---------------- -------- ---------------- -----\n"); for (a = 0; a < nkexec_segments; a++) { printf("%016jx %08jx %016jx %08jx\n", (uintmax_t)loaded_segments[a].buf, (uintmax_t)loaded_segments[a].bufsz, (uintmax_t)loaded_segments[a].mem, (uintmax_t)loaded_segments[a].memsz); } *nseg = nkexec_segments; *ptr = &loaded_segments[0]; } static void kboot_zfs_probe(void) { #if defined(LOADER_ZFS_SUPPORT) /* * Open all the disks and partitions we can find to see if there are ZFS * pools on them. */ hostdisk_zfs_probe(); #endif } /* * Since proper fdt command handling function is defined in fdt_loader_cmd.c, * and declaring it as extern is in contradiction with COMMAND_SET() macro * (which uses static pointer), we're defining wrapper function, which * calls the proper fdt handling routine. */ static int command_fdt(int argc, char *argv[]) { return (command_fdt_internal(argc, argv)); } COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt);