Index: stable/11/stand/efi/loader/bootinfo.c =================================================================== --- stable/11/stand/efi/loader/bootinfo.c (revision 332027) +++ stable/11/stand/efi/loader/bootinfo.c (revision 332028) @@ -1,471 +1,514 @@ /*- * Copyright (c) 1998 Michael Smith * Copyright (c) 2004, 2006 Marcel Moolenaar * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include "bootstrap.h" #include "loader_efi.h" #if defined(__amd64__) #include #endif #include "framebuffer.h" #if defined(LOADER_FDT_SUPPORT) #include #endif int bi_load(char *args, vm_offset_t *modulep, vm_offset_t *kernendp); extern EFI_SYSTEM_TABLE *ST; static const char howto_switches[] = "aCdrgDmphsv"; static int howto_masks[] = { RB_ASKNAME, RB_CDROM, RB_KDB, RB_DFLTROOT, RB_GDB, RB_MULTIPLE, RB_MUTE, RB_PAUSE, RB_SERIAL, RB_SINGLE, RB_VERBOSE }; static int bi_getboothowto(char *kargs) { const char *sw; char *opts; char *console; int howto, i; howto = 0; /* Get the boot options from the environment first. */ for (i = 0; howto_names[i].ev != NULL; i++) { if (getenv(howto_names[i].ev) != NULL) howto |= howto_names[i].mask; } console = getenv("console"); if (console != NULL) { if (strcmp(console, "comconsole") == 0) howto |= RB_SERIAL; if (strcmp(console, "nullconsole") == 0) howto |= RB_MUTE; } /* Parse kargs */ if (kargs == NULL) return (howto); opts = strchr(kargs, '-'); while (opts != NULL) { while (*(++opts) != '\0') { sw = strchr(howto_switches, *opts); if (sw == NULL) break; howto |= howto_masks[sw - howto_switches]; } opts = strchr(opts, '-'); } return (howto); } /* * Copy the environment into the load area starting at (addr). * Each variable is formatted as =, with a single nul * separating each variable, and a double nul terminating the environment. */ static vm_offset_t bi_copyenv(vm_offset_t start) { struct env_var *ep; vm_offset_t addr, last; size_t len; addr = last = start; /* Traverse the environment. */ for (ep = environ; ep != NULL; ep = ep->ev_next) { len = strlen(ep->ev_name); if ((size_t)archsw.arch_copyin(ep->ev_name, addr, len) != len) break; addr += len; if (archsw.arch_copyin("=", addr, 1) != 1) break; addr++; if (ep->ev_value != NULL) { len = strlen(ep->ev_value); if ((size_t)archsw.arch_copyin(ep->ev_value, addr, len) != len) break; addr += len; } if (archsw.arch_copyin("", addr, 1) != 1) break; last = ++addr; } if (archsw.arch_copyin("", last++, 1) != 1) last = start; return(last); } /* * Copy module-related data into the load area, where it can be * used as a directory for loaded modules. * * Module data is presented in a self-describing format. Each datum * is preceded by a 32-bit identifier and a 32-bit size field. * * Currently, the following data are saved: * * MOD_NAME (variable) module name (string) * MOD_TYPE (variable) module type (string) * MOD_ARGS (variable) module parameters (string) * MOD_ADDR sizeof(vm_offset_t) module load address * MOD_SIZE sizeof(size_t) module size * MOD_METADATA (variable) type-specific metadata */ #define COPY32(v, a, c) { \ uint32_t x = (v); \ if (c) \ archsw.arch_copyin(&x, a, sizeof(x)); \ a += sizeof(x); \ } #define MOD_STR(t, a, s, c) { \ COPY32(t, a, c); \ COPY32(strlen(s) + 1, a, c); \ if (c) \ archsw.arch_copyin(s, a, strlen(s) + 1); \ a += roundup(strlen(s) + 1, sizeof(u_long)); \ } #define MOD_NAME(a, s, c) MOD_STR(MODINFO_NAME, a, s, c) #define MOD_TYPE(a, s, c) MOD_STR(MODINFO_TYPE, a, s, c) #define MOD_ARGS(a, s, c) MOD_STR(MODINFO_ARGS, a, s, c) #define MOD_VAR(t, a, s, c) { \ COPY32(t, a, c); \ COPY32(sizeof(s), a, c); \ if (c) \ archsw.arch_copyin(&s, a, sizeof(s)); \ a += roundup(sizeof(s), sizeof(u_long)); \ } #define MOD_ADDR(a, s, c) MOD_VAR(MODINFO_ADDR, a, s, c) #define MOD_SIZE(a, s, c) MOD_VAR(MODINFO_SIZE, a, s, c) #define MOD_METADATA(a, mm, c) { \ COPY32(MODINFO_METADATA | mm->md_type, a, c); \ COPY32(mm->md_size, a, c); \ if (c) \ archsw.arch_copyin(mm->md_data, a, mm->md_size); \ a += roundup(mm->md_size, sizeof(u_long)); \ } #define MOD_END(a, c) { \ COPY32(MODINFO_END, a, c); \ COPY32(0, a, c); \ } static vm_offset_t bi_copymodules(vm_offset_t addr) { struct preloaded_file *fp; struct file_metadata *md; int c; uint64_t v; c = addr != 0; /* Start with the first module on the list, should be the kernel. */ for (fp = file_findfile(NULL, NULL); fp != NULL; fp = fp->f_next) { MOD_NAME(addr, fp->f_name, c); /* This must come first. */ MOD_TYPE(addr, fp->f_type, c); if (fp->f_args) MOD_ARGS(addr, fp->f_args, c); v = fp->f_addr; #if defined(__arm__) v -= __elfN(relocation_offset); #endif MOD_ADDR(addr, v, c); v = fp->f_size; MOD_SIZE(addr, v, c); for (md = fp->f_metadata; md != NULL; md = md->md_next) if (!(md->md_type & MODINFOMD_NOCOPY)) MOD_METADATA(addr, md, c); } MOD_END(addr, c); return(addr); } +static EFI_STATUS +efi_do_vmap(EFI_MEMORY_DESCRIPTOR *mm, UINTN sz, UINTN mmsz, UINT32 mmver) +{ + EFI_MEMORY_DESCRIPTOR *desc, *viter, *vmap; + EFI_STATUS ret; + int curr, ndesc, nset; + + nset = 0; + desc = mm; + ndesc = sz / mmsz; + vmap = malloc(sz); + if (vmap == NULL) + /* This isn't really an EFI error case, but pretend it is */ + return (EFI_OUT_OF_RESOURCES); + viter = vmap; + for (curr = 0; curr < ndesc; + curr++, desc = NextMemoryDescriptor(desc, mmsz)) { + if ((desc->Attribute & EFI_MEMORY_RUNTIME) != 0) { + ++nset; + desc->VirtualStart = desc->PhysicalStart; + *viter = *desc; + viter = NextMemoryDescriptor(viter, mmsz); + } + } + ret = RS->SetVirtualAddressMap(nset * mmsz, mmsz, mmver, vmap); + free(vmap); + return (ret); +} + static int bi_load_efi_data(struct preloaded_file *kfp) { EFI_MEMORY_DESCRIPTOR *mm; EFI_PHYSICAL_ADDRESS addr; EFI_STATUS status; + const char *efi_novmap; size_t efisz; UINTN efi_mapkey; UINTN mmsz, pages, retry, sz; UINT32 mmver; struct efi_map_header *efihdr; + bool do_vmap; #if defined(__amd64__) || defined(__aarch64__) struct efi_fb efifb; if (efi_find_framebuffer(&efifb) == 0) { printf("EFI framebuffer information:\n"); printf("addr, size 0x%jx, 0x%jx\n", efifb.fb_addr, efifb.fb_size); printf("dimensions %d x %d\n", efifb.fb_width, efifb.fb_height); printf("stride %d\n", efifb.fb_stride); printf("masks 0x%08x, 0x%08x, 0x%08x, 0x%08x\n", efifb.fb_mask_red, efifb.fb_mask_green, efifb.fb_mask_blue, efifb.fb_mask_reserved); file_addmetadata(kfp, MODINFOMD_EFI_FB, sizeof(efifb), &efifb); } #endif + do_vmap = true; + efi_novmap = getenv("efi_disable_vmap"); + if (efi_novmap != NULL) + do_vmap = strcasecmp(efi_novmap, "YES") != 0; + efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; /* * Assgin size of EFI_MEMORY_DESCRIPTOR to keep compatible with * u-boot which doesn't fill this value when buffer for memory * descriptors is too small (eg. 0 to obtain memory map size) */ mmsz = sizeof(EFI_MEMORY_DESCRIPTOR); /* * It is possible that the first call to ExitBootServices may change * the map key. Fetch a new map key and retry ExitBootServices in that * case. */ for (retry = 2; retry > 0; retry--) { /* * Allocate enough pages to hold the bootinfo block and the * memory map EFI will return to us. The memory map has an * unknown size, so we have to determine that first. Note that * the AllocatePages call can itself modify the memory map, so * we have to take that into account as well. The changes to * the memory map are caused by splitting a range of free * memory into two (AFAICT), so that one is marked as being * loader data. */ sz = 0; BS->GetMemoryMap(&sz, NULL, &efi_mapkey, &mmsz, &mmver); sz += mmsz; sz = (sz + 0xf) & ~0xf; pages = EFI_SIZE_TO_PAGES(sz + efisz); status = BS->AllocatePages(AllocateAnyPages, EfiLoaderData, pages, &addr); if (EFI_ERROR(status)) { printf("%s: AllocatePages error %lu\n", __func__, EFI_ERROR_CODE(status)); return (ENOMEM); } /* * Read the memory map and stash it after bootinfo. Align the * memory map on a 16-byte boundary (the bootinfo block is page * aligned). */ efihdr = (struct efi_map_header *)addr; mm = (void *)((uint8_t *)efihdr + efisz); sz = (EFI_PAGE_SIZE * pages) - efisz; status = BS->GetMemoryMap(&sz, mm, &efi_mapkey, &mmsz, &mmver); if (EFI_ERROR(status)) { printf("%s: GetMemoryMap error %lu\n", __func__, EFI_ERROR_CODE(status)); return (EINVAL); } status = BS->ExitBootServices(IH, efi_mapkey); if (EFI_ERROR(status) == 0) { + /* + * This may be disabled by setting efi_disable_vmap in + * loader.conf(5). By default we will setup the virtual + * map entries. + */ + if (do_vmap) + efi_do_vmap(mm, sz, mmsz, mmver); efihdr->memory_size = sz; efihdr->descriptor_size = mmsz; efihdr->descriptor_version = mmver; file_addmetadata(kfp, MODINFOMD_EFI_MAP, efisz + sz, efihdr); return (0); } BS->FreePages(addr, pages); } printf("ExitBootServices error %lu\n", EFI_ERROR_CODE(status)); return (EINVAL); } /* * Load the information expected by an amd64 kernel. * * - The 'boothowto' argument is constructed. * - The 'bootdev' argument is constructed. * - The 'bootinfo' struct is constructed, and copied into the kernel space. * - The kernel environment is copied into kernel space. * - Module metadata are formatted and placed in kernel space. */ int bi_load(char *args, vm_offset_t *modulep, vm_offset_t *kernendp) { struct preloaded_file *xp, *kfp; struct devdesc *rootdev; struct file_metadata *md; vm_offset_t addr; uint64_t kernend; uint64_t envp; vm_offset_t size; char *rootdevname; int howto; #if defined(LOADER_FDT_SUPPORT) vm_offset_t dtbp; int dtb_size; #endif #if defined(__arm__) vm_offset_t vaddr; size_t i; /* * These metadata addreses must be converted for kernel after * relocation. */ uint32_t mdt[] = { MODINFOMD_SSYM, MODINFOMD_ESYM, MODINFOMD_KERNEND, MODINFOMD_ENVP, #if defined(LOADER_FDT_SUPPORT) MODINFOMD_DTBP #endif }; #endif howto = bi_getboothowto(args); /* * Allow the environment variable 'rootdev' to override the supplied * device. This should perhaps go to MI code and/or have $rootdev * tested/set by MI code before launching the kernel. */ rootdevname = getenv("rootdev"); archsw.arch_getdev((void**)(&rootdev), rootdevname, NULL); if (rootdev == NULL) { printf("Can't determine root device.\n"); return(EINVAL); } /* Try reading the /etc/fstab file to select the root device */ getrootmount(efi_fmtdev((void *)rootdev)); addr = 0; for (xp = file_findfile(NULL, NULL); xp != NULL; xp = xp->f_next) { if (addr < (xp->f_addr + xp->f_size)) addr = xp->f_addr + xp->f_size; } /* Pad to a page boundary. */ addr = roundup(addr, PAGE_SIZE); /* Copy our environment. */ envp = addr; addr = bi_copyenv(addr); /* Pad to a page boundary. */ addr = roundup(addr, PAGE_SIZE); #if defined(LOADER_FDT_SUPPORT) /* Handle device tree blob */ dtbp = addr; dtb_size = fdt_copy(addr); /* Pad to a page boundary */ if (dtb_size) addr += roundup(dtb_size, PAGE_SIZE); #endif kfp = file_findfile(NULL, "elf kernel"); if (kfp == NULL) kfp = file_findfile(NULL, "elf64 kernel"); if (kfp == NULL) panic("can't find kernel file"); kernend = 0; /* fill it in later */ file_addmetadata(kfp, MODINFOMD_HOWTO, sizeof howto, &howto); file_addmetadata(kfp, MODINFOMD_ENVP, sizeof envp, &envp); #if defined(LOADER_FDT_SUPPORT) if (dtb_size) file_addmetadata(kfp, MODINFOMD_DTBP, sizeof dtbp, &dtbp); else printf("WARNING! Trying to fire up the kernel, but no " "device tree blob found!\n"); #endif file_addmetadata(kfp, MODINFOMD_KERNEND, sizeof kernend, &kernend); file_addmetadata(kfp, MODINFOMD_FW_HANDLE, sizeof ST, &ST); bi_load_efi_data(kfp); /* Figure out the size and location of the metadata. */ *modulep = addr; size = bi_copymodules(0); kernend = roundup(addr + size, PAGE_SIZE); *kernendp = kernend; /* patch MODINFOMD_KERNEND */ md = file_findmetadata(kfp, MODINFOMD_KERNEND); bcopy(&kernend, md->md_data, sizeof kernend); #if defined(__arm__) *modulep -= __elfN(relocation_offset); /* Do relocation fixup on metadata of each module. */ for (xp = file_findfile(NULL, NULL); xp != NULL; xp = xp->f_next) { for (i = 0; i < nitems(mdt); i++) { md = file_findmetadata(xp, mdt[i]); if (md) { bcopy(md->md_data, &vaddr, sizeof vaddr); vaddr -= __elfN(relocation_offset); bcopy(&vaddr, md->md_data, sizeof vaddr); } } } #endif /* Copy module list and metadata. */ (void)bi_copymodules(addr); return (0); } Index: stable/11/sys/amd64/amd64/efirt_machdep.c =================================================================== --- stable/11/sys/amd64/amd64/efirt_machdep.c (revision 332027) +++ stable/11/sys/amd64/amd64/efirt_machdep.c (revision 332028) @@ -1,314 +1,314 @@ /*- * Copyright (c) 2004 Marcel Moolenaar * Copyright (c) 2001 Doug Rabson * Copyright (c) 2016 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static pml4_entry_t *efi_pml4; static vm_object_t obj_1t1_pt; static vm_page_t efi_pml4_page; static vm_pindex_t efi_1t1_idx; void efi_destroy_1t1_map(void) { vm_page_t m; if (obj_1t1_pt != NULL) { VM_OBJECT_RLOCK(obj_1t1_pt); TAILQ_FOREACH(m, &obj_1t1_pt->memq, listq) m->wire_count = 0; atomic_subtract_int(&vm_cnt.v_wire_count, obj_1t1_pt->resident_page_count); VM_OBJECT_RUNLOCK(obj_1t1_pt); vm_object_deallocate(obj_1t1_pt); } obj_1t1_pt = NULL; efi_pml4 = NULL; efi_pml4_page = NULL; } static vm_page_t efi_1t1_page(void) { return (vm_page_grab(obj_1t1_pt, efi_1t1_idx++, VM_ALLOC_NOBUSY | VM_ALLOC_WIRED | VM_ALLOC_ZERO)); } static pt_entry_t * efi_1t1_pte(vm_offset_t va) { pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t *pde; pt_entry_t *pte; vm_page_t m; vm_pindex_t pml4_idx, pdp_idx, pd_idx; vm_paddr_t mphys; pml4_idx = pmap_pml4e_index(va); pml4e = &efi_pml4[pml4_idx]; if (*pml4e == 0) { m = efi_1t1_page(); mphys = VM_PAGE_TO_PHYS(m); *pml4e = mphys | X86_PG_RW | X86_PG_V; } else { mphys = *pml4e & ~PAGE_MASK; } pdpe = (pdp_entry_t *)PHYS_TO_DMAP(mphys); pdp_idx = pmap_pdpe_index(va); pdpe += pdp_idx; if (*pdpe == 0) { m = efi_1t1_page(); mphys = VM_PAGE_TO_PHYS(m); *pdpe = mphys | X86_PG_RW | X86_PG_V; } else { mphys = *pdpe & ~PAGE_MASK; } pde = (pd_entry_t *)PHYS_TO_DMAP(mphys); pd_idx = pmap_pde_index(va); pde += pd_idx; if (*pde == 0) { m = efi_1t1_page(); mphys = VM_PAGE_TO_PHYS(m); *pde = mphys | X86_PG_RW | X86_PG_V; } else { mphys = *pde & ~PAGE_MASK; } pte = (pt_entry_t *)PHYS_TO_DMAP(mphys); pte += pmap_pte_index(va); KASSERT(*pte == 0, ("va %#jx *pt %#jx", va, *pte)); return (pte); } bool efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) { struct efi_md *p; pt_entry_t *pte; vm_offset_t va; uint64_t idx; int bits, i, mode; obj_1t1_pt = vm_pager_allocate(OBJT_PHYS, NULL, ptoa(1 + NPML4EPG + NPML4EPG * NPDPEPG + NPML4EPG * NPDPEPG * NPDEPG), VM_PROT_ALL, 0, NULL); efi_1t1_idx = 0; VM_OBJECT_WLOCK(obj_1t1_pt); efi_pml4_page = efi_1t1_page(); VM_OBJECT_WUNLOCK(obj_1t1_pt); efi_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(efi_pml4_page)); pmap_pinit_pml4(efi_pml4_page); for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, descsz)) { if ((p->md_attr & EFI_MD_ATTR_RT) == 0) continue; - if (p->md_virt != NULL) { + if (p->md_virt != NULL && (uint64_t)p->md_virt != p->md_phys) { if (bootverbose) printf("EFI Runtime entry %d is mapped\n", i); goto fail; } if ((p->md_phys & EFI_PAGE_MASK) != 0) { if (bootverbose) printf("EFI Runtime entry %d is not aligned\n", i); goto fail; } if (p->md_phys + p->md_pages * EFI_PAGE_SIZE < p->md_phys || p->md_phys + p->md_pages * EFI_PAGE_SIZE >= VM_MAXUSER_ADDRESS) { printf("EFI Runtime entry %d is not in mappable for RT:" "base %#016jx %#jx pages\n", i, (uintmax_t)p->md_phys, (uintmax_t)p->md_pages); goto fail; } if ((p->md_attr & EFI_MD_ATTR_WB) != 0) mode = VM_MEMATTR_WRITE_BACK; else if ((p->md_attr & EFI_MD_ATTR_WT) != 0) mode = VM_MEMATTR_WRITE_THROUGH; else if ((p->md_attr & EFI_MD_ATTR_WC) != 0) mode = VM_MEMATTR_WRITE_COMBINING; else if ((p->md_attr & EFI_MD_ATTR_WP) != 0) mode = VM_MEMATTR_WRITE_PROTECTED; else if ((p->md_attr & EFI_MD_ATTR_UC) != 0) mode = VM_MEMATTR_UNCACHEABLE; else { if (bootverbose) printf("EFI Runtime entry %d mapping " "attributes unsupported\n", i); mode = VM_MEMATTR_UNCACHEABLE; } bits = pmap_cache_bits(kernel_pmap, mode, FALSE) | X86_PG_RW | X86_PG_V; VM_OBJECT_WLOCK(obj_1t1_pt); for (va = p->md_phys, idx = 0; idx < p->md_pages; idx++, va += PAGE_SIZE) { pte = efi_1t1_pte(va); pte_store(pte, va | bits); } VM_OBJECT_WUNLOCK(obj_1t1_pt); } return (true); fail: efi_destroy_1t1_map(); return (false); } /* * Create an environment for the EFI runtime code call. The most * important part is creating the required 1:1 physical->virtual * mappings for the runtime segments. To do that, we manually create * page table which unmap userspace but gives correct kernel mapping. * The 1:1 mappings for runtime segments usually occupy low 4G of the * physical address map. * * The 1:1 mappings were chosen over the SetVirtualAddressMap() EFI RT * service, because there are some BIOSes which fail to correctly * relocate itself on the call, requiring both 1:1 and virtual * mapping. As result, we must provide 1:1 mapping anyway, so no * reason to bother with the virtual map, and no need to add a * complexity into loader. * * The fpu_kern_enter() call allows firmware to use FPU, as mandated * by the specification. In particular, CR0.TS bit is cleared. Also * it enters critical section, giving us neccessary protection against * context switch. * * There is no need to disable interrupts around the change of %cr3, * the kernel mappings are correct, while we only grabbed the * userspace portion of VA. Interrupts handlers must not access * userspace. Having interrupts enabled fixes the issue with * firmware/SMM long operation, which would negatively affect IPIs, * esp. TLB shootdown requests. */ int efi_arch_enter(void) { pmap_t curpmap; curpmap = PCPU_GET(curpmap); PMAP_LOCK_ASSERT(curpmap, MA_OWNED); /* * IPI TLB shootdown handler invltlb_pcid_handler() reloads * %cr3 from the curpmap->pm_cr3, which would disable runtime * segments mappings. Block the handler's action by setting * curpmap to impossible value. See also comment in * pmap.c:pmap_activate_sw(). */ if (pmap_pcid_enabled && !invpcid_works) PCPU_SET(curpmap, NULL); load_cr3(VM_PAGE_TO_PHYS(efi_pml4_page) | (pmap_pcid_enabled ? curpmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid : 0)); /* * If PCID is enabled, the clear CR3_PCID_SAVE bit in the loaded %cr3 * causes TLB invalidation. */ if (!pmap_pcid_enabled) invltlb(); return (0); } void efi_arch_leave(void) { pmap_t curpmap; curpmap = &curproc->p_vmspace->vm_pmap; if (pmap_pcid_enabled && !invpcid_works) PCPU_SET(curpmap, curpmap); load_cr3(curpmap->pm_cr3 | (pmap_pcid_enabled ? curpmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid : 0)); if (!pmap_pcid_enabled) invltlb(); } /* XXX debug stuff */ static int efi_time_sysctl_handler(SYSCTL_HANDLER_ARGS) { struct efi_tm tm; int error, val; val = 0; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); error = efi_get_time(&tm); if (error == 0) { uprintf("EFI reports: Year %d Month %d Day %d Hour %d Min %d " "Sec %d\n", tm.tm_year, tm.tm_mon, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); } return (error); } SYSCTL_PROC(_debug, OID_AUTO, efi_time, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, efi_time_sysctl_handler, "I", ""); Index: stable/11/sys/amd64/amd64/fpu.c =================================================================== --- stable/11/sys/amd64/amd64/fpu.c (revision 332027) +++ stable/11/sys/amd64/amd64/fpu.c (revision 332028) @@ -1,1110 +1,1111 @@ /*- * Copyright (c) 1990 William Jolitz. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Floating point support. */ #if defined(__GNUCLIKE_ASM) && !defined(lint) #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) #define fnclex() __asm __volatile("fnclex") #define fninit() __asm __volatile("fninit") #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr)) #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr))) static __inline void xrstor(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi)); } static __inline void xsave(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } #else /* !(__GNUCLIKE_ASM && !lint) */ void fldcw(u_short cw); void fnclex(void); void fninit(void); void fnstcw(caddr_t addr); void fnstsw(caddr_t addr); void fxsave(caddr_t addr); void fxrstor(caddr_t addr); void ldmxcsr(u_int csr); void stmxcsr(u_int *csr); void xrstor(char *addr, uint64_t mask); void xsave(char *addr, uint64_t mask); #endif /* __GNUCLIKE_ASM && !lint */ #define start_emulating() load_cr0(rcr0() | CR0_TS) #define stop_emulating() clts() CTASSERT(sizeof(struct savefpu) == 512); CTASSERT(sizeof(struct xstate_hdr) == 64); CTASSERT(sizeof(struct savefpu_ymm) == 832); /* * This requirement is to make it easier for asm code to calculate * offset of the fpu save area from the pcb address. FPU save area * must be 64-byte aligned. */ CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); /* * Ensure the copy of XCR0 saved in a core is contained in the padding * area. */ CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savefpu, sv_pad) && X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savefpu)); static void fpu_clean_state(void); SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in hardware"); int use_xsave; /* non-static for cpu_switch.S */ uint64_t xsave_mask; /* the same */ static uma_zone_t fpu_save_area_zone; static struct savefpu *fpu_initialstate; struct xsave_area_elm_descr { u_int offset; u_int size; } *xsave_area_desc; void fpusave(void *addr) { if (use_xsave) xsave((char *)addr, xsave_mask); else fxsave((char *)addr); } void fpurestore(void *addr) { if (use_xsave) xrstor((char *)addr, xsave_mask); else fxrstor((char *)addr); } void fpususpend(void *addr) { u_long cr0; cr0 = rcr0(); stop_emulating(); fpusave(addr); load_cr0(cr0); } void fpuresume(void *addr) { u_long cr0; cr0 = rcr0(); stop_emulating(); fninit(); if (use_xsave) load_xcr(XCR0, xsave_mask); fpurestore(addr); load_cr0(cr0); } /* * Enable XSAVE if supported and allowed by user. * Calculate the xsave_mask. */ static void fpuinit_bsp1(void) { u_int cp[4]; uint64_t xsave_mask_user; if ((cpu_feature2 & CPUID2_XSAVE) != 0) { use_xsave = 1; TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); } if (!use_xsave) return; cpuid_count(0xd, 0x0, cp); xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; if ((cp[0] & xsave_mask) != xsave_mask) panic("CPU0 does not support X87 or SSE: %x", cp[0]); xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; xsave_mask_user = xsave_mask; TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user); xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; xsave_mask &= xsave_mask_user; if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512) xsave_mask &= ~XFEATURE_AVX512; if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX) xsave_mask &= ~XFEATURE_MPX; cpuid_count(0xd, 0x1, cp); if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) { /* * Patch the XSAVE instruction in the cpu_switch code * to XSAVEOPT. We assume that XSAVE encoding used * REX byte, and set the bit 4 of the r/m byte. */ ctx_switch_xsave[3] |= 0x10; } } /* * Calculate the fpu save area size. */ static void fpuinit_bsp2(void) { u_int cp[4]; if (use_xsave) { cpuid_count(0xd, 0x0, cp); cpu_max_ext_state_size = cp[1]; /* * Reload the cpu_feature2, since we enabled OSXSAVE. */ do_cpuid(1, cp); cpu_feature2 = cp[2]; } else cpu_max_ext_state_size = sizeof(struct savefpu); } /* * Initialize the floating point unit. */ void fpuinit(void) { register_t saveintr; u_int mxcsr; u_short control; if (IS_BSP()) fpuinit_bsp1(); if (use_xsave) { load_cr4(rcr4() | CR4_XSAVE); load_xcr(XCR0, xsave_mask); } /* * XCR0 shall be set up before CPU can report the save area size. */ if (IS_BSP()) fpuinit_bsp2(); /* * It is too early for critical_enter() to work on AP. */ saveintr = intr_disable(); stop_emulating(); fninit(); control = __INITIAL_FPUCW__; fldcw(control); mxcsr = __INITIAL_MXCSR__; ldmxcsr(mxcsr); start_emulating(); intr_restore(saveintr); } /* * On the boot CPU we generate a clean state that is used to * initialize the floating point unit when it is first used by a * process. */ static void fpuinitstate(void *arg __unused) { register_t saveintr; int cp[4], i, max_ext_n; fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF, M_WAITOK | M_ZERO); saveintr = intr_disable(); stop_emulating(); fpusave(fpu_initialstate); if (fpu_initialstate->sv_env.en_mxcsr_mask) cpu_mxcsr_mask = fpu_initialstate->sv_env.en_mxcsr_mask; else cpu_mxcsr_mask = 0xFFBF; /* * The fninit instruction does not modify XMM registers or x87 * registers (MM/ST). The fpusave call dumped the garbage * contained in the registers after reset to the initial state * saved. Clear XMM and x87 registers file image to make the * startup program state and signal handler XMM/x87 register * content predictable. */ bzero(fpu_initialstate->sv_fp, sizeof(fpu_initialstate->sv_fp)); bzero(fpu_initialstate->sv_xmm, sizeof(fpu_initialstate->sv_xmm)); /* * Create a table describing the layout of the CPU Extended * Save Area. */ if (use_xsave) { max_ext_n = flsl(xsave_mask); xsave_area_desc = malloc(max_ext_n * sizeof(struct xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); /* x87 state */ xsave_area_desc[0].offset = 0; xsave_area_desc[0].size = 160; /* XMM */ xsave_area_desc[1].offset = 160; xsave_area_desc[1].size = 288 - 160; for (i = 2; i < max_ext_n; i++) { cpuid_count(0xd, i, cp); xsave_area_desc[i].offset = cp[1]; xsave_area_desc[i].size = cp[0]; } } fpu_save_area_zone = uma_zcreate("FPU_save_area", cpu_max_ext_state_size, NULL, NULL, NULL, NULL, XSAVE_AREA_ALIGN - 1, 0); start_emulating(); intr_restore(saveintr); } -SYSINIT(fpuinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, fpuinitstate, NULL); +/* EFIRT needs this to be initialized before we can enter our EFI environment */ +SYSINIT(fpuinitstate, SI_SUB_DRIVERS, SI_ORDER_FIRST, fpuinitstate, NULL); /* * Free coprocessor (if we have it). */ void fpuexit(struct thread *td) { critical_enter(); if (curthread == PCPU_GET(fpcurthread)) { stop_emulating(); fpusave(curpcb->pcb_save); start_emulating(); PCPU_SET(fpcurthread, NULL); } critical_exit(); } int fpuformat(void) { return (_MC_FPFMT_XMM); } /* * The following mechanism is used to ensure that the FPE_... value * that is passed as a trapcode to the signal handler of the user * process does not have more than one bit set. * * Multiple bits may be set if the user process modifies the control * word while a status word bit is already set. While this is a sign * of bad coding, we have no choise than to narrow them down to one * bit, since we must not send a trapcode that is not exactly one of * the FPE_ macros. * * The mechanism has a static table with 127 entries. Each combination * of the 7 FPU status word exception bits directly translates to a * position in this table, where a single FPE_... value is stored. * This FPE_... value stored there is considered the "most important" * of the exception bits and will be sent as the signal code. The * precedence of the bits is based upon Intel Document "Numerical * Applications", Chapter "Special Computational Situations". * * The macro to choose one of these values does these steps: 1) Throw * away status word bits that cannot be masked. 2) Throw away the bits * currently masked in the control word, assuming the user isn't * interested in them anymore. 3) Reinsert status word bit 7 (stack * fault) if it is set, which cannot be masked but must be presered. * 4) Use the remaining bits to point into the trapcode table. * * The 6 maskable bits in order of their preference, as stated in the * above referenced Intel manual: * 1 Invalid operation (FP_X_INV) * 1a Stack underflow * 1b Stack overflow * 1c Operand of unsupported format * 1d SNaN operand. * 2 QNaN operand (not an exception, irrelavant here) * 3 Any other invalid-operation not mentioned above or zero divide * (FP_X_INV, FP_X_DZ) * 4 Denormal operand (FP_X_DNML) * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) * 6 Inexact result (FP_X_IMP) */ static char fpetable[128] = { 0, FPE_FLTINV, /* 1 - INV */ FPE_FLTUND, /* 2 - DNML */ FPE_FLTINV, /* 3 - INV | DNML */ FPE_FLTDIV, /* 4 - DZ */ FPE_FLTINV, /* 5 - INV | DZ */ FPE_FLTDIV, /* 6 - DNML | DZ */ FPE_FLTINV, /* 7 - INV | DNML | DZ */ FPE_FLTOVF, /* 8 - OFL */ FPE_FLTINV, /* 9 - INV | OFL */ FPE_FLTUND, /* A - DNML | OFL */ FPE_FLTINV, /* B - INV | DNML | OFL */ FPE_FLTDIV, /* C - DZ | OFL */ FPE_FLTINV, /* D - INV | DZ | OFL */ FPE_FLTDIV, /* E - DNML | DZ | OFL */ FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ FPE_FLTUND, /* 10 - UFL */ FPE_FLTINV, /* 11 - INV | UFL */ FPE_FLTUND, /* 12 - DNML | UFL */ FPE_FLTINV, /* 13 - INV | DNML | UFL */ FPE_FLTDIV, /* 14 - DZ | UFL */ FPE_FLTINV, /* 15 - INV | DZ | UFL */ FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ FPE_FLTOVF, /* 18 - OFL | UFL */ FPE_FLTINV, /* 19 - INV | OFL | UFL */ FPE_FLTUND, /* 1A - DNML | OFL | UFL */ FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ FPE_FLTRES, /* 20 - IMP */ FPE_FLTINV, /* 21 - INV | IMP */ FPE_FLTUND, /* 22 - DNML | IMP */ FPE_FLTINV, /* 23 - INV | DNML | IMP */ FPE_FLTDIV, /* 24 - DZ | IMP */ FPE_FLTINV, /* 25 - INV | DZ | IMP */ FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ FPE_FLTOVF, /* 28 - OFL | IMP */ FPE_FLTINV, /* 29 - INV | OFL | IMP */ FPE_FLTUND, /* 2A - DNML | OFL | IMP */ FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ FPE_FLTUND, /* 30 - UFL | IMP */ FPE_FLTINV, /* 31 - INV | UFL | IMP */ FPE_FLTUND, /* 32 - DNML | UFL | IMP */ FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ FPE_FLTSUB, /* 40 - STK */ FPE_FLTSUB, /* 41 - INV | STK */ FPE_FLTUND, /* 42 - DNML | STK */ FPE_FLTSUB, /* 43 - INV | DNML | STK */ FPE_FLTDIV, /* 44 - DZ | STK */ FPE_FLTSUB, /* 45 - INV | DZ | STK */ FPE_FLTDIV, /* 46 - DNML | DZ | STK */ FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ FPE_FLTOVF, /* 48 - OFL | STK */ FPE_FLTSUB, /* 49 - INV | OFL | STK */ FPE_FLTUND, /* 4A - DNML | OFL | STK */ FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ FPE_FLTDIV, /* 4C - DZ | OFL | STK */ FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ FPE_FLTUND, /* 50 - UFL | STK */ FPE_FLTSUB, /* 51 - INV | UFL | STK */ FPE_FLTUND, /* 52 - DNML | UFL | STK */ FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ FPE_FLTDIV, /* 54 - DZ | UFL | STK */ FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ FPE_FLTOVF, /* 58 - OFL | UFL | STK */ FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ FPE_FLTRES, /* 60 - IMP | STK */ FPE_FLTSUB, /* 61 - INV | IMP | STK */ FPE_FLTUND, /* 62 - DNML | IMP | STK */ FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ FPE_FLTDIV, /* 64 - DZ | IMP | STK */ FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ FPE_FLTOVF, /* 68 - OFL | IMP | STK */ FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ FPE_FLTUND, /* 70 - UFL | IMP | STK */ FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ }; /* * Read the FP status and control words, then generate si_code value * for SIGFPE. The error code chosen will be one of the * FPE_... macros. It will be sent as the second argument to old * BSD-style signal handlers and as "siginfo_t->si_code" (second * argument) to SA_SIGINFO signal handlers. * * Some time ago, we cleared the x87 exceptions with FNCLEX there. * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The * usermode code which understands the FPU hardware enough to enable * the exceptions, can also handle clearing the exception state in the * handler. The only consequence of not clearing the exception is the * rethrow of the SIGFPE on return from the signal handler and * reexecution of the corresponding instruction. * * For XMM traps, the exceptions were never cleared. */ int fputrap_x87(void) { struct savefpu *pcb_save; u_short control, status; critical_enter(); /* * Interrupt handling (for another interrupt) may have pushed the * state to memory. Fetch the relevant parts of the state from * wherever they are. */ if (PCPU_GET(fpcurthread) != curthread) { pcb_save = curpcb->pcb_save; control = pcb_save->sv_env.en_cw; status = pcb_save->sv_env.en_sw; } else { fnstcw(&control); fnstsw(&status); } critical_exit(); return (fpetable[status & ((~control & 0x3f) | 0x40)]); } int fputrap_sse(void) { u_int mxcsr; critical_enter(); if (PCPU_GET(fpcurthread) != curthread) mxcsr = curpcb->pcb_save->sv_env.en_mxcsr; else stmxcsr(&mxcsr); critical_exit(); return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); } /* * Device Not Available (DNA, #NM) exception handler. * * It would be better to switch FP context here (if curthread != * fpcurthread) and not necessarily for every context switch, but it * is too hard to access foreign pcb's. */ void fpudna(void) { /* * This handler is entered with interrupts enabled, so context * switches may occur before critical_enter() is executed. If * a context switch occurs, then when we regain control, our * state will have been completely restored. The CPU may * change underneath us, but the only part of our context that * lives in the CPU is CR0.TS and that will be "restored" by * setting it on the new CPU. */ critical_enter(); KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0, ("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)")); if (PCPU_GET(fpcurthread) == curthread) { printf("fpudna: fpcurthread == curthread\n"); stop_emulating(); critical_exit(); return; } if (PCPU_GET(fpcurthread) != NULL) { panic("fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_tid, curthread, curthread->td_tid); } stop_emulating(); /* * Record new context early in case frstor causes a trap. */ PCPU_SET(fpcurthread, curthread); fpu_clean_state(); if ((curpcb->pcb_flags & PCB_FPUINITDONE) == 0) { /* * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. * * We prefer to restore the state from the actual save * area in PCB instead of directly loading from * fpu_initialstate, to ignite the XSAVEOPT * tracking engine. */ bcopy(fpu_initialstate, curpcb->pcb_save, cpu_max_ext_state_size); fpurestore(curpcb->pcb_save); if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__) fldcw(curpcb->pcb_initial_fpucw); if (PCB_USER_FPU(curpcb)) set_pcb_flags(curpcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); else set_pcb_flags(curpcb, PCB_FPUINITDONE); } else fpurestore(curpcb->pcb_save); critical_exit(); } void fpudrop(void) { struct thread *td; td = PCPU_GET(fpcurthread); KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); CRITICAL_ASSERT(td); PCPU_SET(fpcurthread, NULL); clear_pcb_flags(td->td_pcb, PCB_FPUINITDONE); start_emulating(); } /* * Get the user state of the FPU into pcb->pcb_user_save without * dropping ownership (if possible). It returns the FPU ownership * status. */ int fpugetregs(struct thread *td) { struct pcb *pcb; uint64_t *xstate_bv, bit; char *sa; int max_ext_n, i, owned; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { bcopy(fpu_initialstate, get_pcb_user_save_pcb(pcb), cpu_max_ext_state_size); get_pcb_user_save_pcb(pcb)->sv_env.en_cw = pcb->pcb_initial_fpucw; fpuuserinited(td); return (_MC_FPOWNED_PCB); } critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { fpusave(get_pcb_user_save_pcb(pcb)); owned = _MC_FPOWNED_FPU; } else { owned = _MC_FPOWNED_PCB; } critical_exit(); if (use_xsave) { /* * Handle partially saved state. */ sa = (char *)get_pcb_user_save_pcb(pcb); xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) + offsetof(struct xstate_hdr, xstate_bv)); max_ext_n = flsl(xsave_mask); for (i = 0; i < max_ext_n; i++) { bit = 1ULL << i; if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0) continue; bcopy((char *)fpu_initialstate + xsave_area_desc[i].offset, sa + xsave_area_desc[i].offset, xsave_area_desc[i].size); *xstate_bv |= bit; } } return (owned); } void fpuuserinited(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; if (PCB_USER_FPU(pcb)) set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); else set_pcb_flags(pcb, PCB_FPUINITDONE); } int fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) { struct xstate_hdr *hdr, *ehdr; size_t len, max_len; uint64_t bv; /* XXXKIB should we clear all extended state in xstate_bv instead ? */ if (xfpustate == NULL) return (0); if (!use_xsave) return (EOPNOTSUPP); len = xfpustate_size; if (len < sizeof(struct xstate_hdr)) return (EINVAL); max_len = cpu_max_ext_state_size - sizeof(struct savefpu); if (len > max_len) return (EINVAL); ehdr = (struct xstate_hdr *)xfpustate; bv = ehdr->xstate_bv; /* * Avoid #gp. */ if (bv & ~xsave_mask) return (EINVAL); hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); hdr->xstate_bv = bv; bcopy(xfpustate + sizeof(struct xstate_hdr), (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); return (0); } /* * Set the state of the FPU. */ int fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate, size_t xfpustate_size) { struct pcb *pcb; int error; addr->sv_env.en_mxcsr &= cpu_mxcsr_mask; pcb = td->td_pcb; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { error = fpusetxstate(td, xfpustate, xfpustate_size); if (error != 0) { critical_exit(); return (error); } bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); fpurestore(get_pcb_user_save_td(td)); critical_exit(); set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); } else { critical_exit(); error = fpusetxstate(td, xfpustate, xfpustate_size); if (error != 0) return (error); bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); fpuuserinited(td); } return (0); } /* * On AuthenticAMD processors, the fxrstor instruction does not restore * the x87's stored last instruction pointer, last data pointer, and last * opcode values, except in the rare case in which the exception summary * (ES) bit in the x87 status word is set to 1. * * In order to avoid leaking this information across processes, we clean * these values by performing a dummy load before executing fxrstor(). */ static void fpu_clean_state(void) { static float dummy_variable = 0.0; u_short status; /* * Clear the ES bit in the x87 status word if it is currently * set, in order to avoid causing a fault in the upcoming load. */ fnstsw(&status); if (status & 0x80) fnclex(); /* * Load the dummy variable into the x87 stack. This mangles * the x87 stack, but we don't care since we're about to call * fxrstor() anyway. */ __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); } /* * This really sucks. We want the acpi version only, but it requires * the isa_if.h file in order to get the definitions. */ #include "opt_isa.h" #ifdef DEV_ISA #include /* * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. */ static struct isa_pnp_id fpupnp_ids[] = { { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ { 0 } }; static int fpupnp_probe(device_t dev) { int result; result = ISA_PNP_PROBE(device_get_parent(dev), dev, fpupnp_ids); if (result <= 0) device_quiet(dev); return (result); } static int fpupnp_attach(device_t dev) { return (0); } static device_method_t fpupnp_methods[] = { /* Device interface */ DEVMETHOD(device_probe, fpupnp_probe), DEVMETHOD(device_attach, fpupnp_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t fpupnp_driver = { "fpupnp", fpupnp_methods, 1, /* no softc */ }; static devclass_t fpupnp_devclass; DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0); #endif /* DEV_ISA */ static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", "Kernel contexts for FPU state"); #define FPU_KERN_CTX_FPUINITDONE 0x01 #define FPU_KERN_CTX_DUMMY 0x02 /* avoided save for the kern thread */ #define FPU_KERN_CTX_INUSE 0x04 struct fpu_kern_ctx { struct savefpu *prev; uint32_t flags; char hwstate1[]; }; struct fpu_kern_ctx * fpu_kern_alloc_ctx(u_int flags) { struct fpu_kern_ctx *res; size_t sz; sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + cpu_max_ext_state_size; res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO); return (res); } void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) { KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx")); /* XXXKIB clear the memory ? */ free(ctx, M_FPUKERN_CTX); } static struct savefpu * fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) { vm_offset_t p; p = (vm_offset_t)&ctx->hwstate1; p = roundup2(p, XSAVE_AREA_ALIGN); return ((struct savefpu *)p); } int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { struct pcb *pcb; pcb = td->td_pcb; KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL, ("ctx is required when !FPU_KERN_NOCTX")); KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("using inuse ctx")); KASSERT((pcb->pcb_flags & PCB_FPUNOSAVE) == 0, ("recursive fpu_kern_enter while in PCB_FPUNOSAVE state")); if ((flags & FPU_KERN_NOCTX) != 0) { critical_enter(); stop_emulating(); if (curthread == PCPU_GET(fpcurthread)) { fpusave(curpcb->pcb_save); PCPU_SET(fpcurthread, NULL); } else { KASSERT(PCPU_GET(fpcurthread) == NULL, ("invalid fpcurthread")); } /* * This breaks XSAVEOPT tracker, but * PCB_FPUNOSAVE state is supposed to never need to * save FPU context at all. */ fpurestore(fpu_initialstate); set_pcb_flags(pcb, PCB_KERNFPU | PCB_FPUNOSAVE | PCB_FPUINITDONE); return (0); } if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; return (0); } KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); ctx->flags = FPU_KERN_CTX_INUSE; if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0) ctx->flags |= FPU_KERN_CTX_FPUINITDONE; fpuexit(td); ctx->prev = pcb->pcb_save; pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); set_pcb_flags(pcb, PCB_KERNFPU); clear_pcb_flags(pcb, PCB_FPUINITDONE); return (0); } int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) { struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_FPUNOSAVE) != 0) { KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX")); KASSERT(PCPU_GET(fpcurthread) == NULL, ("non-NULL fpcurthread for PCB_FPUNOSAVE")); CRITICAL_ASSERT(td); clear_pcb_flags(pcb, PCB_FPUNOSAVE | PCB_FPUINITDONE); start_emulating(); critical_exit(); } else { KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, ("leaving not inuse ctx")); ctx->flags &= ~FPU_KERN_CTX_INUSE; if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) return (0); KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx")); critical_enter(); if (curthread == PCPU_GET(fpcurthread)) fpudrop(); critical_exit(); pcb->pcb_save = ctx->prev; } if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) { set_pcb_flags(pcb, PCB_FPUINITDONE); clear_pcb_flags(pcb, PCB_KERNFPU); } else clear_pcb_flags(pcb, PCB_FPUINITDONE | PCB_KERNFPU); } else { if ((ctx->flags & FPU_KERN_CTX_FPUINITDONE) != 0) set_pcb_flags(pcb, PCB_FPUINITDONE); else clear_pcb_flags(pcb, PCB_FPUINITDONE); KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); } return (0); } int fpu_kern_thread(u_int flags) { KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, ("Only kthread may use fpu_kern_thread")); KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb), ("mangled pcb_save")); KASSERT(PCB_USER_FPU(curpcb), ("recursive call")); set_pcb_flags(curpcb, PCB_KERNFPU); return (0); } int is_fpu_kern_thread(u_int flags) { if ((curthread->td_pflags & TDP_KTHREAD) == 0) return (0); return ((curpcb->pcb_flags & PCB_KERNFPU) != 0); } /* * FPU save area alloc/free/init utility routines */ struct savefpu * fpu_save_area_alloc(void) { return (uma_zalloc(fpu_save_area_zone, 0)); } void fpu_save_area_free(struct savefpu *fsa) { uma_zfree(fpu_save_area_zone, fsa); } void fpu_save_area_reset(struct savefpu *fsa) { bcopy(fpu_initialstate, fsa, cpu_max_ext_state_size); } Index: stable/11/sys/dev/efidev/efidev.c =================================================================== --- stable/11/sys/dev/efidev/efidev.c (revision 332027) +++ stable/11/sys/dev/efidev/efidev.c (revision 332028) @@ -1,221 +1,221 @@ /*- * Copyright (c) 2016 Netflix, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include static d_ioctl_t efidev_ioctl; static struct cdevsw efi_cdevsw = { .d_name = "efi", .d_version = D_VERSION, .d_ioctl = efidev_ioctl, }; static int efidev_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr, int flags __unused, struct thread *td __unused) { int error; switch (cmd) { case EFIIOC_GET_TABLE: { struct efi_get_table_ioc *egtioc = (struct efi_get_table_ioc *)addr; error = efi_get_table(&egtioc->uuid, &egtioc->ptr); break; } case EFIIOC_GET_TIME: { struct efi_tm *tm = (struct efi_tm *)addr; error = efi_get_time(tm); break; } case EFIIOC_SET_TIME: { struct efi_tm *tm = (struct efi_tm *)addr; error = efi_set_time(tm); break; } case EFIIOC_VAR_GET: { struct efi_var_ioc *ev = (struct efi_var_ioc *)addr; void *data; efi_char *name; data = malloc(ev->datasize, M_TEMP, M_WAITOK); name = malloc(ev->namesize, M_TEMP, M_WAITOK); error = copyin(ev->name, name, ev->namesize); if (error) goto vg_out; if (name[ev->namesize / sizeof(efi_char) - 1] != 0) { error = EINVAL; goto vg_out; } error = efi_var_get(name, &ev->vendor, &ev->attrib, &ev->datasize, data); if (error == 0) { error = copyout(data, ev->data, ev->datasize); } else if (error == EOVERFLOW) { /* * Pass back the size we really need, but * convert the error to 0 so the copyout * happens. datasize was updated in the * efi_var_get call. */ ev->data = NULL; error = 0; } vg_out: free(data, M_TEMP); free(name, M_TEMP); break; } case EFIIOC_VAR_NEXT: { struct efi_var_ioc *ev = (struct efi_var_ioc *)addr; efi_char *name; name = malloc(ev->namesize, M_TEMP, M_WAITOK); error = copyin(ev->name, name, ev->namesize); if (error) goto vn_out; /* Note: namesize is the buffer size, not the string lenght */ error = efi_var_nextname(&ev->namesize, name, &ev->vendor); if (error == 0) { error = copyout(name, ev->name, ev->namesize); } else if (error == EOVERFLOW) { ev->name = NULL; error = 0; } vn_out: free(name, M_TEMP); break; } case EFIIOC_VAR_SET: { struct efi_var_ioc *ev = (struct efi_var_ioc *)addr; void *data = NULL; efi_char *name; /* datasize == 0 -> delete (more or less) */ if (ev->datasize > 0) data = malloc(ev->datasize, M_TEMP, M_WAITOK); name = malloc(ev->namesize, M_TEMP, M_WAITOK); if (ev->datasize) { error = copyin(ev->data, data, ev->datasize); if (error) goto vs_out; } error = copyin(ev->name, name, ev->namesize); if (error) goto vs_out; if (name[ev->namesize / sizeof(efi_char) - 1] != 0) { error = EINVAL; goto vs_out; } error = efi_var_set(name, &ev->vendor, ev->attrib, ev->datasize, data); vs_out: free(data, M_TEMP); free(name, M_TEMP); break; } default: error = ENOTTY; break; } return (error); } static struct cdev *efidev; static int efidev_modevents(module_t m, int event, void *arg __unused) { struct make_dev_args mda; int error; switch (event) { case MOD_LOAD: /* * If we have no efi environment, then don't create the device. */ if (efi_rt_ok() != 0) return (0); make_dev_args_init(&mda); mda.mda_flags = MAKEDEV_WAITOK | MAKEDEV_CHECKNAME; mda.mda_devsw = &efi_cdevsw; mda.mda_uid = UID_ROOT; mda.mda_gid = GID_WHEEL; mda.mda_mode = 0700; error = make_dev_s(&mda, &efidev, "efi"); return (error); case MOD_UNLOAD: if (efidev != NULL) destroy_dev(efidev); efidev = NULL; return (0); case MOD_SHUTDOWN: return (0); default: return (EOPNOTSUPP); } } static moduledata_t efidev_moddata = { .name = "efidev", .evhand = efidev_modevents, .priv = NULL, }; -DECLARE_MODULE(efidev, efidev_moddata, SI_SUB_DEVFS, SI_ORDER_ANY); +DECLARE_MODULE(efidev, efidev_moddata, SI_SUB_DRIVERS, SI_ORDER_ANY); MODULE_VERSION(efidev, 1); MODULE_DEPEND(efidev, efirt, 1, 1, 1); Index: stable/11/sys/dev/efidev/efirt.c =================================================================== --- stable/11/sys/dev/efidev/efirt.c (revision 332027) +++ stable/11/sys/dev/efidev/efirt.c (revision 332028) @@ -1,409 +1,456 @@ /*- * Copyright (c) 2004 Marcel Moolenaar * Copyright (c) 2001 Doug Rabson * Copyright (c) 2016 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct efi_systbl *efi_systbl; static struct efi_cfgtbl *efi_cfgtbl; static struct efi_rt *efi_runtime; static int efi_status2err[25] = { 0, /* EFI_SUCCESS */ ENOEXEC, /* EFI_LOAD_ERROR */ EINVAL, /* EFI_INVALID_PARAMETER */ ENOSYS, /* EFI_UNSUPPORTED */ EMSGSIZE, /* EFI_BAD_BUFFER_SIZE */ EOVERFLOW, /* EFI_BUFFER_TOO_SMALL */ EBUSY, /* EFI_NOT_READY */ EIO, /* EFI_DEVICE_ERROR */ EROFS, /* EFI_WRITE_PROTECTED */ EAGAIN, /* EFI_OUT_OF_RESOURCES */ EIO, /* EFI_VOLUME_CORRUPTED */ ENOSPC, /* EFI_VOLUME_FULL */ ENXIO, /* EFI_NO_MEDIA */ ESTALE, /* EFI_MEDIA_CHANGED */ ENOENT, /* EFI_NOT_FOUND */ EACCES, /* EFI_ACCESS_DENIED */ ETIMEDOUT, /* EFI_NO_RESPONSE */ EADDRNOTAVAIL, /* EFI_NO_MAPPING */ ETIMEDOUT, /* EFI_TIMEOUT */ EDOOFUS, /* EFI_NOT_STARTED */ EALREADY, /* EFI_ALREADY_STARTED */ ECANCELED, /* EFI_ABORTED */ EPROTO, /* EFI_ICMP_ERROR */ EPROTO, /* EFI_TFTP_ERROR */ EPROTO /* EFI_PROTOCOL_ERROR */ }; +static int efi_enter(void); +static void efi_leave(void); + static int efi_status_to_errno(efi_status status) { u_long code; code = status & 0x3ffffffffffffffful; return (code < nitems(efi_status2err) ? efi_status2err[code] : EDOOFUS); } static struct mtx efi_lock; +static bool +efi_is_in_map(struct efi_md *map, int ndesc, int descsz, vm_offset_t addr) +{ + struct efi_md *p; + int i; + + for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, + descsz)) { + if ((p->md_attr & EFI_MD_ATTR_RT) == 0) + continue; + + if (addr >= (uintptr_t)p->md_virt && + addr < (uintptr_t)p->md_virt + p->md_pages * PAGE_SIZE) + return (true); + } + + return (false); +} + static int efi_init(void) { struct efi_map_header *efihdr; struct efi_md *map; caddr_t kmdp; size_t efisz; mtx_init(&efi_lock, "efi", NULL, MTX_DEF); if (efi_systbl_phys == 0) { if (bootverbose) printf("EFI systbl not available\n"); return (0); } efi_systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys); if (efi_systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) { efi_systbl = NULL; if (bootverbose) printf("EFI systbl signature invalid\n"); return (0); } efi_cfgtbl = (efi_systbl->st_cfgtbl == 0) ? NULL : (struct efi_cfgtbl *)efi_systbl->st_cfgtbl; if (efi_cfgtbl == NULL) { if (bootverbose) printf("EFI config table is not present\n"); } kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); if (efihdr == NULL) { if (bootverbose) printf("EFI map is not present\n"); return (0); } efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; map = (struct efi_md *)((uint8_t *)efihdr + efisz); if (efihdr->descriptor_size == 0) return (ENOMEM); if (!efi_create_1t1_map(map, efihdr->memory_size / efihdr->descriptor_size, efihdr->descriptor_size)) { if (bootverbose) printf("EFI cannot create runtime map\n"); return (ENOMEM); } efi_runtime = (efi_systbl->st_rt == 0) ? NULL : (struct efi_rt *)efi_systbl->st_rt; if (efi_runtime == NULL) { if (bootverbose) printf("EFI runtime services table is not present\n"); efi_destroy_1t1_map(); return (ENXIO); } + /* + * Some UEFI implementations have multiple implementations of the + * RS->GetTime function. They switch from one we can only use early + * in the boot process to one valid as a RunTime service only when we + * call RS->SetVirtualAddressMap. As this is not always the case, e.g. + * with an old loader.efi, check if the RS->GetTime function is within + * the EFI map, and fail to attach if not. + * + * We need to enter into the EFI environment as efi_runtime may point + * to an EFI address. + */ + efi_enter(); + if (!efi_is_in_map(map, efihdr->memory_size / efihdr->descriptor_size, + efihdr->descriptor_size, (vm_offset_t)efi_runtime->rt_gettime)) { + efi_leave(); + if (bootverbose) + printf( + "EFI runtime services table has an invalid pointer\n"); + efi_runtime = NULL; + efi_destroy_1t1_map(); + return (ENXIO); + } + efi_leave(); + return (0); } static void efi_uninit(void) { efi_destroy_1t1_map(); efi_systbl = NULL; efi_cfgtbl = NULL; efi_runtime = NULL; mtx_destroy(&efi_lock); } int efi_rt_ok(void) { if (efi_runtime == NULL) return (ENXIO); return (0); } static int efi_enter(void) { struct thread *td; pmap_t curpmap; int error; if (efi_runtime == NULL) return (ENXIO); td = curthread; curpmap = &td->td_proc->p_vmspace->vm_pmap; PMAP_LOCK(curpmap); mtx_lock(&efi_lock); error = fpu_kern_enter(td, NULL, FPU_KERN_NOCTX); if (error != 0) { PMAP_UNLOCK(curpmap); return (error); } return (efi_arch_enter()); } static void efi_leave(void) { struct thread *td; pmap_t curpmap; efi_arch_leave(); curpmap = &curproc->p_vmspace->vm_pmap; td = curthread; fpu_kern_leave(td, NULL); mtx_unlock(&efi_lock); PMAP_UNLOCK(curpmap); } int efi_get_table(struct uuid *uuid, void **ptr) { struct efi_cfgtbl *ct; u_long count; if (efi_cfgtbl == NULL || efi_systbl == NULL) return (ENXIO); count = efi_systbl->st_entries; ct = efi_cfgtbl; while (count--) { if (!bcmp(&ct->ct_uuid, uuid, sizeof(*uuid))) { *ptr = (void *)PHYS_TO_DMAP(ct->ct_data); return (0); } ct++; } return (ENOENT); } static int efi_get_time_locked(struct efi_tm *tm, struct efi_tmcap *tmcap) { efi_status status; int error; EFI_TIME_OWNED() error = efi_enter(); if (error != 0) return (error); status = efi_runtime->rt_gettime(tm, tmcap); efi_leave(); error = efi_status_to_errno(status); return (error); } int efi_get_time(struct efi_tm *tm) { struct efi_tmcap dummy; int error; if (efi_runtime == NULL) return (ENXIO); EFI_TIME_LOCK() /* * UEFI spec states that the Capabilities argument to GetTime is * optional, but some UEFI implementations choke when passed a NULL * pointer. Pass a dummy efi_dmcap, even though we won't use it, * to workaround such implementations. */ error = efi_get_time_locked(tm, &dummy); EFI_TIME_UNLOCK() return (error); } int efi_get_time_capabilities(struct efi_tmcap *tmcap) { struct efi_tm dummy; int error; if (efi_runtime == NULL) return (ENXIO); EFI_TIME_LOCK() error = efi_get_time_locked(&dummy, tmcap); EFI_TIME_UNLOCK() return (error); } int efi_reset_system(void) { int error; error = efi_enter(); if (error != 0) return (error); efi_runtime->rt_reset(EFI_RESET_WARM, 0, 0, NULL); efi_leave(); return (EIO); } static int efi_set_time_locked(struct efi_tm *tm) { efi_status status; int error; EFI_TIME_OWNED(); error = efi_enter(); if (error != 0) return (error); status = efi_runtime->rt_settime(tm); efi_leave(); error = efi_status_to_errno(status); return (error); } int efi_set_time(struct efi_tm *tm) { int error; if (efi_runtime == NULL) return (ENXIO); EFI_TIME_LOCK() error = efi_set_time_locked(tm); EFI_TIME_UNLOCK() return (error); } int efi_var_get(efi_char *name, struct uuid *vendor, uint32_t *attrib, size_t *datasize, void *data) { efi_status status; int error; error = efi_enter(); if (error != 0) return (error); status = efi_runtime->rt_getvar(name, vendor, attrib, datasize, data); efi_leave(); error = efi_status_to_errno(status); return (error); } int efi_var_nextname(size_t *namesize, efi_char *name, struct uuid *vendor) { efi_status status; int error; error = efi_enter(); if (error != 0) return (error); status = efi_runtime->rt_scanvar(namesize, name, vendor); efi_leave(); error = efi_status_to_errno(status); return (error); } int efi_var_set(efi_char *name, struct uuid *vendor, uint32_t attrib, size_t datasize, void *data) { efi_status status; int error; error = efi_enter(); if (error != 0) return (error); status = efi_runtime->rt_setvar(name, vendor, attrib, datasize, data); efi_leave(); error = efi_status_to_errno(status); return (error); } static int efirt_modevents(module_t m, int event, void *arg __unused) { switch (event) { case MOD_LOAD: return (efi_init()); case MOD_UNLOAD: efi_uninit(); return (0); case MOD_SHUTDOWN: return (0); default: return (EOPNOTSUPP); } } static moduledata_t efirt_moddata = { .name = "efirt", .evhand = efirt_modevents, .priv = NULL, }; -DECLARE_MODULE(efirt, efirt_moddata, SI_SUB_VM_CONF, SI_ORDER_ANY); +/* After fpuinitstate, before efidev */ +DECLARE_MODULE(efirt, efirt_moddata, SI_SUB_DRIVERS, SI_ORDER_SECOND); MODULE_VERSION(efirt, 1); Index: stable/11 =================================================================== --- stable/11 (revision 332027) +++ stable/11 (revision 332028) Property changes on: stable/11 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r330868,331241,331361,331365