diff --git a/sys/arm64/arm64/efirt_machdep.c b/sys/arm64/arm64/efirt_machdep.c index 52906779dfb9..0c46d2e6dcc6 100644 --- a/sys/arm64/arm64/efirt_machdep.c +++ b/sys/arm64/arm64/efirt_machdep.c @@ -1,284 +1,279 @@ /*- * Copyright (c) 2004 Marcel Moolenaar * Copyright (c) 2001 Doug Rabson * Copyright (c) 2016 The FreeBSD Foundation * Copyright (c) 2017 Andrew Turner * All rights reserved. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * This software was developed by SRI International and the University of * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237 * ("CTSRD"), as part of the DARPA CRASH research programme. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static vm_object_t obj_1t1_pt; static vm_pindex_t efi_1t1_idx; static pd_entry_t *efi_l0; static uint64_t efi_ttbr0; void efi_destroy_1t1_map(void) { vm_page_t m; if (obj_1t1_pt != NULL) { VM_OBJECT_RLOCK(obj_1t1_pt); TAILQ_FOREACH(m, &obj_1t1_pt->memq, listq) m->ref_count = VPRC_OBJREF; vm_wire_sub(obj_1t1_pt->resident_page_count); VM_OBJECT_RUNLOCK(obj_1t1_pt); vm_object_deallocate(obj_1t1_pt); } obj_1t1_pt = NULL; efi_1t1_idx = 0; efi_l0 = NULL; efi_ttbr0 = 0; } static vm_page_t efi_1t1_page(void) { return (vm_page_grab(obj_1t1_pt, efi_1t1_idx++, VM_ALLOC_NOBUSY | VM_ALLOC_WIRED | VM_ALLOC_ZERO)); } static pt_entry_t * efi_1t1_l3(vm_offset_t va) { pd_entry_t *l0, *l1, *l2; pt_entry_t *l3; vm_pindex_t l0_idx, l1_idx, l2_idx; vm_page_t m; vm_paddr_t mphys; l0_idx = pmap_l0_index(va); l0 = &efi_l0[l0_idx]; if (*l0 == 0) { m = efi_1t1_page(); mphys = VM_PAGE_TO_PHYS(m); *l0 = PHYS_TO_PTE(mphys) | L0_TABLE; } else { mphys = PTE_TO_PHYS(*l0); } l1 = (pd_entry_t *)PHYS_TO_DMAP(mphys); l1_idx = pmap_l1_index(va); l1 += l1_idx; if (*l1 == 0) { m = efi_1t1_page(); mphys = VM_PAGE_TO_PHYS(m); *l1 = PHYS_TO_PTE(mphys) | L1_TABLE; } else { mphys = PTE_TO_PHYS(*l1); } l2 = (pd_entry_t *)PHYS_TO_DMAP(mphys); l2_idx = pmap_l2_index(va); l2 += l2_idx; if (*l2 == 0) { m = efi_1t1_page(); mphys = VM_PAGE_TO_PHYS(m); *l2 = PHYS_TO_PTE(mphys) | L2_TABLE; } else { mphys = PTE_TO_PHYS(*l2); } l3 = (pt_entry_t *)PHYS_TO_DMAP(mphys); l3 += pmap_l3_index(va); KASSERT(*l3 == 0, ("%s: Already mapped: va %#jx *pt %#jx", __func__, va, *l3)); return (l3); } /* * Map a physical address from EFI runtime space into KVA space. Returns 0 to * indicate a failed mapping so that the caller may handle error. */ vm_offset_t efi_phys_to_kva(vm_paddr_t paddr) { - vm_offset_t vaddr; - - if (PHYS_IN_DMAP(paddr)) { - vaddr = PHYS_TO_DMAP(paddr); - if (pmap_klookup(vaddr, NULL)) - return (vaddr); - } + if (PHYS_IN_DMAP(paddr)) + return (PHYS_TO_DMAP(paddr)); /* TODO: Map memory not in the DMAP */ return (0); } /* * Create the 1:1 virtual to physical map for EFI */ bool efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) { struct efi_md *p; pt_entry_t *l3, l3_attr; vm_offset_t va; vm_page_t efi_l0_page; uint64_t idx; int i, mode; obj_1t1_pt = vm_pager_allocate(OBJT_PHYS, NULL, L0_ENTRIES + L0_ENTRIES * Ln_ENTRIES + L0_ENTRIES * Ln_ENTRIES * Ln_ENTRIES + L0_ENTRIES * Ln_ENTRIES * Ln_ENTRIES * Ln_ENTRIES, VM_PROT_ALL, 0, NULL); VM_OBJECT_WLOCK(obj_1t1_pt); efi_l0_page = efi_1t1_page(); VM_OBJECT_WUNLOCK(obj_1t1_pt); efi_l0 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(efi_l0_page)); efi_ttbr0 = ASID_TO_OPERAND(ASID_RESERVED_FOR_EFI) | VM_PAGE_TO_PHYS(efi_l0_page); for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, descsz)) { if ((p->md_attr & EFI_MD_ATTR_RT) == 0) continue; if (p->md_virt != 0 && p->md_virt != p->md_phys) { if (bootverbose) printf("EFI Runtime entry %d is mapped\n", i); goto fail; } if ((p->md_phys & EFI_PAGE_MASK) != 0) { if (bootverbose) printf("EFI Runtime entry %d is not aligned\n", i); goto fail; } if (p->md_phys + p->md_pages * EFI_PAGE_SIZE < p->md_phys || p->md_phys + p->md_pages * EFI_PAGE_SIZE >= VM_MAXUSER_ADDRESS) { printf("EFI Runtime entry %d is not in mappable for RT:" "base %#016jx %#jx pages\n", i, (uintmax_t)p->md_phys, (uintmax_t)p->md_pages); goto fail; } if ((p->md_attr & EFI_MD_ATTR_WB) != 0) mode = VM_MEMATTR_WRITE_BACK; else if ((p->md_attr & EFI_MD_ATTR_WT) != 0) mode = VM_MEMATTR_WRITE_THROUGH; else if ((p->md_attr & EFI_MD_ATTR_WC) != 0) mode = VM_MEMATTR_WRITE_COMBINING; else mode = VM_MEMATTR_DEVICE; if (bootverbose) { printf("MAP %lx mode %x pages %lu\n", p->md_phys, mode, p->md_pages); } l3_attr = ATTR_DEFAULT | ATTR_S1_IDX(mode) | ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_nG | L3_PAGE; if (mode == VM_MEMATTR_DEVICE || p->md_attr & EFI_MD_ATTR_XP) l3_attr |= ATTR_S1_XN; VM_OBJECT_WLOCK(obj_1t1_pt); for (va = p->md_phys, idx = 0; idx < p->md_pages; idx += (PAGE_SIZE / EFI_PAGE_SIZE), va += PAGE_SIZE) { l3 = efi_1t1_l3(va); *l3 = va | l3_attr; } VM_OBJECT_WUNLOCK(obj_1t1_pt); } return (true); fail: efi_destroy_1t1_map(); return (false); } int efi_arch_enter(void) { CRITICAL_ASSERT(curthread); /* * Temporarily switch to EFI's page table. However, we leave curpmap * unchanged in order to prevent its ASID from being reclaimed before * we switch back to its page table in efi_arch_leave(). */ set_ttbr0(efi_ttbr0); if (PCPU_GET(bcast_tlbi_workaround) != 0) invalidate_local_icache(); return (0); } void efi_arch_leave(void) { /* * Restore the pcpu pointer. Some UEFI implementations trash it and * we don't store it before calling into them. To fix this we need * to restore it after returning to the kernel context. As reading * curpmap will access x18 we need to restore it before loading * the pmap pointer. */ __asm __volatile( "mrs x18, tpidr_el1 \n" ); set_ttbr0(pmap_to_ttbr0(PCPU_GET(curpmap))); if (PCPU_GET(bcast_tlbi_workaround) != 0) invalidate_local_icache(); } int efi_rt_arch_call(struct efirt_callinfo *ec) { panic("not implemented"); } diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c index d566a8e2298c..517f4e7c2e23 100644 --- a/sys/arm64/arm64/machdep.c +++ b/sys/arm64/arm64/machdep.c @@ -1,1185 +1,1185 @@ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include "opt_acpi.h" #include "opt_kstack_pages.h" #include "opt_platform.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif #ifdef DEV_ACPI #include #include #endif #ifdef FDT #include #include #endif #include _Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size"); _Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136, "pcb_fpusaved changed offset"); _Static_assert(offsetof(struct pcb, pcb_fpustate) == 192, "pcb_fpustate changed offset"); enum arm64_bus arm64_bus_method = ARM64_BUS_NONE; /* * XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we * could relocate this, but will need to keep the same virtual address as * it's reverenced by the EARLY_COUNTER macro. */ struct pcpu pcpu0; #if defined(PERTHREAD_SSP) /* * The boot SSP canary. Will be replaced with a per-thread canary when * scheduling has started. */ uintptr_t boot_canary = 0x49a2d892bc05a0b1ul; #endif static struct trapframe proc0_tf; int early_boot = 1; int cold = 1; static int boot_el; static uint64_t hcr_el2; struct kva_md_info kmi; int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */ int has_pan; #if defined(SOCDEV_PA) /* * This is the virtual address used to access SOCDEV_PA. As it's set before * .bss is cleared we need to ensure it's preserved. To do this use * __read_mostly as it's only ever set once but read in the putc functions. */ uintptr_t socdev_va __read_mostly; #endif /* * Physical address of the EFI System Table. Stashed from the metadata hints * passed into the kernel and used by the EFI code to call runtime services. */ vm_paddr_t efi_systbl_phys; static struct efi_map_header *efihdr; /* pagezero_* implementations are provided in support.S */ void pagezero_simple(void *); void pagezero_cache(void *); /* pagezero_simple is default pagezero */ void (*pagezero)(void *p) = pagezero_simple; int (*apei_nmi)(void); #if defined(PERTHREAD_SSP_WARNING) static void print_ssp_warning(void *data __unused) { printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n"); } SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL); SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL); #endif static void pan_setup(void) { uint64_t id_aa64mfr1; id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1); if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE) has_pan = 1; } void pan_enable(void) { /* * The LLVM integrated assembler doesn't understand the PAN * PSTATE field. Because of this we need to manually create * the instruction in an asm block. This is equivalent to: * msr pan, #1 * * This sets the PAN bit, stopping the kernel from accessing * memory when userspace can also access it unless the kernel * uses the userspace load/store instructions. */ if (has_pan) { WRITE_SPECIALREG(sctlr_el1, READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN); __asm __volatile(".inst 0xd500409f | (0x1 << 8)"); } } bool has_hyp(void) { /* * XXX The E2H check is wrong, but it's close enough for now. Needs to * be re-evaluated once we're running regularly in EL2. */ return (boot_el == CURRENTEL_EL_EL2 && (hcr_el2 & HCR_E2H) == 0); } bool in_vhe(void) { /* If we are currently in EL2 then must be in VHE */ return ((READ_SPECIALREG(CurrentEL) & CURRENTEL_EL_MASK) == CURRENTEL_EL_EL2); } static void cpu_startup(void *dummy) { vm_paddr_t size; int i; printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)realmem), ptoa((uintmax_t)realmem) / 1024 / 1024); if (bootverbose) { printf("Physical memory chunk(s):\n"); for (i = 0; phys_avail[i + 1] != 0; i += 2) { size = phys_avail[i + 1] - phys_avail[i]; printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[i], (uintmax_t)phys_avail[i + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)vm_free_count()), ptoa((uintmax_t)vm_free_count()) / 1024 / 1024); undef_init(); install_cpu_errata(); vm_ksubmap_init(&kmi); bufinit(); vm_pager_bufferinit(); } SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); static void late_ifunc_resolve(void *dummy __unused) { link_elf_late_ireloc(); } SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL); int cpu_idle_wakeup(int cpu) { return (0); } void cpu_idle(int busy) { spinlock_enter(); if (!busy) cpu_idleclock(); if (!sched_runnable()) __asm __volatile( "dsb sy \n" "wfi \n"); if (!busy) cpu_activeclock(); spinlock_exit(); } void cpu_halt(void) { /* We should have shutdown by now, if not enter a low power sleep */ intr_disable(); while (1) { __asm __volatile("wfi"); } } /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { /* ARM64TODO TBD */ } /* Get current clock frequency for the given CPU ID. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { struct pcpu *pc; pc = pcpu_find(cpu_id); if (pc == NULL || rate == NULL) return (EINVAL); if (pc->pc_clock == 0) return (EOPNOTSUPP); *rate = pc->pc_clock; return (0); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; pcpu->pc_mpidr = UINT64_MAX; } void spinlock_enter(void) { struct thread *td; register_t daif; td = curthread; if (td->td_md.md_spinlock_count == 0) { daif = intr_disable(); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_daif = daif; critical_enter(); } else td->td_md.md_spinlock_count++; } void spinlock_exit(void) { struct thread *td; register_t daif; td = curthread; daif = td->td_md.md_saved_daif; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) { critical_exit(); intr_restore(daif); } } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { int i; /* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */ for (i = 0; i < nitems(pcb->pcb_x); i++) { if (i == PCB_LR) pcb->pcb_x[i] = tf->tf_elr; else pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START]; } pcb->pcb_sp = tf->tf_sp; } static void init_proc0(vm_offset_t kstack) { struct pcpu *pcpup; pcpup = cpuid_to_pcpu[0]; MPASS(pcpup != NULL); proc_linkup0(&proc0, &thread0); thread0.td_kstack = kstack; thread0.td_kstack_pages = KSTACK_PAGES; #if defined(PERTHREAD_SSP) thread0.td_md.md_canary = boot_canary; #endif thread0.td_pcb = (struct pcb *)(thread0.td_kstack + thread0.td_kstack_pages * PAGE_SIZE) - 1; thread0.td_pcb->pcb_flags = 0; thread0.td_pcb->pcb_fpflags = 0; thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate; thread0.td_pcb->pcb_vfpcpu = UINT_MAX; thread0.td_frame = &proc0_tf; ptrauth_thread0(&thread0); pcpup->pc_curpcb = thread0.td_pcb; /* * Unmask SError exceptions. They are used to signal a RAS failure, * or other hardware error. */ serror_enable(); } /* * Get an address to be used to write to kernel data that may be mapped * read-only, e.g. to patch kernel code. */ bool arm64_get_writable_addr(void *addr, void **out) { vm_paddr_t pa; /* Check if the page is writable */ if (PAR_SUCCESS(arm64_address_translate_s1e1w((vm_offset_t)addr))) { *out = addr; return (true); } /* * Find the physical address of the given page. */ if (!pmap_klookup((vm_offset_t)addr, &pa)) { return (false); } /* * If it is within the DMAP region and is writable use that. */ - if (PHYS_IN_DMAP(pa)) { + if (PHYS_IN_DMAP_RANGE(pa)) { addr = (void *)PHYS_TO_DMAP(pa); if (PAR_SUCCESS(arm64_address_translate_s1e1w( (vm_offset_t)addr))) { *out = addr; return (true); } } return (false); } typedef void (*efi_map_entry_cb)(struct efi_md *, void *argp); static void foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb, void *argp) { struct efi_md *map, *p; size_t efisz; int ndesc, i; /* * Memory map data provided by UEFI via the GetMemoryMap * Boot Services API. */ efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; map = (struct efi_md *)((uint8_t *)efihdr + efisz); if (efihdr->descriptor_size == 0) return; ndesc = efihdr->memory_size / efihdr->descriptor_size; for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, efihdr->descriptor_size)) { cb(p, argp); } } /* * Handle the EFI memory map list. * * We will make two passes at this, the first (exclude == false) to populate * physmem with valid physical memory ranges from recognized map entry types. * In the second pass we will exclude memory ranges from physmem which must not * be used for general allocations, either because they are used by runtime * firmware or otherwise reserved. * * Adding the runtime-reserved memory ranges to physmem and excluding them * later ensures that they are included in the DMAP, but excluded from * phys_avail[]. * * Entry types not explicitly listed here are ignored and not mapped. */ static void handle_efi_map_entry(struct efi_md *p, void *argp) { bool exclude = *(bool *)argp; switch (p->md_type) { case EFI_MD_TYPE_RECLAIM: /* * The recomended location for ACPI tables. Map into the * DMAP so we can access them from userspace via /dev/mem. */ case EFI_MD_TYPE_RT_CODE: /* * Some UEFI implementations put the system table in the * runtime code section. Include it in the DMAP, but will * be excluded from phys_avail. */ case EFI_MD_TYPE_RT_DATA: /* * Runtime data will be excluded after the DMAP * region is created to stop it from being added * to phys_avail. */ if (exclude) { physmem_exclude_region(p->md_phys, p->md_pages * EFI_PAGE_SIZE, EXFLAG_NOALLOC); break; } /* FALLTHROUGH */ case EFI_MD_TYPE_CODE: case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_BS_CODE: case EFI_MD_TYPE_BS_DATA: case EFI_MD_TYPE_FREE: /* * We're allowed to use any entry with these types. */ if (!exclude) physmem_hardware_region(p->md_phys, p->md_pages * EFI_PAGE_SIZE); break; default: /* Other types shall not be handled by physmem. */ break; } } static void add_efi_map_entries(struct efi_map_header *efihdr) { bool exclude = false; foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude); } static void exclude_efi_map_entries(struct efi_map_header *efihdr) { bool exclude = true; foreach_efi_map_entry(efihdr, handle_efi_map_entry, &exclude); } static void print_efi_map_entry(struct efi_md *p, void *argp __unused) { const char *type; static const char *types[] = { "Reserved", "LoaderCode", "LoaderData", "BootServicesCode", "BootServicesData", "RuntimeServicesCode", "RuntimeServicesData", "ConventionalMemory", "UnusableMemory", "ACPIReclaimMemory", "ACPIMemoryNVS", "MemoryMappedIO", "MemoryMappedIOPortSpace", "PalCode", "PersistentMemory" }; if (p->md_type < nitems(types)) type = types[p->md_type]; else type = ""; printf("%23s %012lx %012lx %08lx ", type, p->md_phys, p->md_virt, p->md_pages); if (p->md_attr & EFI_MD_ATTR_UC) printf("UC "); if (p->md_attr & EFI_MD_ATTR_WC) printf("WC "); if (p->md_attr & EFI_MD_ATTR_WT) printf("WT "); if (p->md_attr & EFI_MD_ATTR_WB) printf("WB "); if (p->md_attr & EFI_MD_ATTR_UCE) printf("UCE "); if (p->md_attr & EFI_MD_ATTR_WP) printf("WP "); if (p->md_attr & EFI_MD_ATTR_RP) printf("RP "); if (p->md_attr & EFI_MD_ATTR_XP) printf("XP "); if (p->md_attr & EFI_MD_ATTR_NV) printf("NV "); if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE) printf("MORE_RELIABLE "); if (p->md_attr & EFI_MD_ATTR_RO) printf("RO "); if (p->md_attr & EFI_MD_ATTR_RT) printf("RUNTIME"); printf("\n"); } static void print_efi_map_entries(struct efi_map_header *efihdr) { printf("%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); foreach_efi_map_entry(efihdr, print_efi_map_entry, NULL); } /* * Map the passed in VA in EFI space to a void * using the efi memory table to * find the PA and return it in the DMAP, if it exists. We're used between the * calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG * tables We assume that either the entry you are mapping fits within its page, * or if it spills to the next page, that's contiguous in PA and in the DMAP. * All observed tables obey the first part of this precondition. */ struct early_map_data { vm_offset_t va; vm_offset_t pa; }; static void efi_early_map_entry(struct efi_md *p, void *argp) { struct early_map_data *emdp = argp; vm_offset_t s, e; if (emdp->pa != 0) return; if ((p->md_attr & EFI_MD_ATTR_RT) == 0) return; s = p->md_virt; e = p->md_virt + p->md_pages * EFI_PAGE_SIZE; if (emdp->va < s || emdp->va >= e) return; emdp->pa = p->md_phys + (emdp->va - p->md_virt); } static void * efi_early_map(vm_offset_t va) { struct early_map_data emd = { .va = va }; foreach_efi_map_entry(efihdr, efi_early_map_entry, &emd); if (emd.pa == 0) return NULL; return (void *)PHYS_TO_DMAP(emd.pa); } /* * When booted via kboot, the prior kernel will pass in reserved memory areas in * a EFI config table. We need to find that table and walk through it excluding * the memory ranges in it. btw, this is called too early for the printf to do * anything since msgbufp isn't initialized, let alone a console... */ static void exclude_efi_memreserve(vm_offset_t efi_systbl_phys) { struct efi_systbl *systbl; struct uuid efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE; systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys); if (systbl == NULL) { printf("can't map systbl\n"); return; } if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) { printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig); return; } /* * We don't yet have the pmap system booted enough to create a pmap for * the efi firmware's preferred address space from the GetMemoryMap() * table. The st_cfgtbl is a VA in this space, so we need to do the * mapping ourselves to a kernel VA with efi_early_map. We assume that * the cfgtbl entries don't span a page. Other pointers are PAs, as * noted below. */ if (systbl->st_cfgtbl == 0) /* Failsafe st_entries should == 0 in this case */ return; for (int i = 0; i < systbl->st_entries; i++) { struct efi_cfgtbl *cfgtbl; struct linux_efi_memreserve *mr; cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl)); if (cfgtbl == NULL) panic("Can't map the config table entry %d\n", i); if (memcmp(&cfgtbl->ct_uuid, &efi_memreserve, sizeof(struct uuid)) != 0) continue; /* * cfgtbl points are either VA or PA, depending on the GUID of * the table. memreserve GUID pointers are PA and not converted * after a SetVirtualAddressMap(). The list's mr_next pointer * is also a PA. */ mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP( (vm_offset_t)cfgtbl->ct_data); while (true) { for (int j = 0; j < mr->mr_count; j++) { struct linux_efi_memreserve_entry *mre; mre = &mr->mr_entry[j]; physmem_exclude_region(mre->mre_base, mre->mre_size, EXFLAG_NODUMP | EXFLAG_NOALLOC); } if (mr->mr_next == 0) break; mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next); }; } } #ifdef FDT static void try_load_dtb(caddr_t kmdp) { vm_offset_t dtbp; dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); #if defined(FDT_DTB_STATIC) /* * In case the device tree blob was not retrieved (from metadata) try * to use the statically embedded one. */ if (dtbp == 0) dtbp = (vm_offset_t)&fdt_static_dtb; #endif if (dtbp == (vm_offset_t)NULL) { #ifndef TSLOG printf("ERROR loading DTB\n"); #endif return; } if (OF_install(OFW_FDT, 0) == FALSE) panic("Cannot install FDT"); if (OF_init((void *)dtbp) != 0) panic("OF_init failed with the found device tree"); parse_fdt_bootargs(); } #endif static bool bus_probe(void) { bool has_acpi, has_fdt; char *order, *env; has_acpi = has_fdt = false; #ifdef FDT has_fdt = (OF_peer(0) != 0); #endif #ifdef DEV_ACPI has_acpi = (AcpiOsGetRootPointer() != 0); #endif env = kern_getenv("kern.cfg.order"); if (env != NULL) { order = env; while (order != NULL) { if (has_acpi && strncmp(order, "acpi", 4) == 0 && (order[4] == ',' || order[4] == '\0')) { arm64_bus_method = ARM64_BUS_ACPI; break; } if (has_fdt && strncmp(order, "fdt", 3) == 0 && (order[3] == ',' || order[3] == '\0')) { arm64_bus_method = ARM64_BUS_FDT; break; } order = strchr(order, ','); if (order != NULL) order++; /* Skip comma */ } freeenv(env); /* If we set the bus method it is valid */ if (arm64_bus_method != ARM64_BUS_NONE) return (true); } /* If no order or an invalid order was set use the default */ if (arm64_bus_method == ARM64_BUS_NONE) { if (has_fdt) arm64_bus_method = ARM64_BUS_FDT; else if (has_acpi) arm64_bus_method = ARM64_BUS_ACPI; } /* * If no option was set the default is valid, otherwise we are * setting one to get cninit() working, then calling panic to tell * the user about the invalid bus setup. */ return (env == NULL); } static void cache_setup(void) { int dczva_line_shift; uint32_t dczid_el0; identify_cache(READ_SPECIALREG(ctr_el0)); dczid_el0 = READ_SPECIALREG(dczid_el0); /* Check if dc zva is not prohibited */ if (dczid_el0 & DCZID_DZP) dczva_line_size = 0; else { /* Same as with above calculations */ dczva_line_shift = DCZID_BS_SIZE(dczid_el0); dczva_line_size = sizeof(int) << dczva_line_shift; /* Change pagezero function */ pagezero = pagezero_cache; } } int memory_mapping_mode(vm_paddr_t pa) { struct efi_md *map, *p; size_t efisz; int ndesc, i; if (efihdr == NULL) return (VM_MEMATTR_WRITE_BACK); /* * Memory map data provided by UEFI via the GetMemoryMap * Boot Services API. */ efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; map = (struct efi_md *)((uint8_t *)efihdr + efisz); if (efihdr->descriptor_size == 0) return (VM_MEMATTR_WRITE_BACK); ndesc = efihdr->memory_size / efihdr->descriptor_size; for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, efihdr->descriptor_size)) { if (pa < p->md_phys || pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE) continue; if (p->md_type == EFI_MD_TYPE_IOMEM || p->md_type == EFI_MD_TYPE_IOPORT) return (VM_MEMATTR_DEVICE); else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 || p->md_type == EFI_MD_TYPE_RECLAIM) return (VM_MEMATTR_WRITE_BACK); else if ((p->md_attr & EFI_MD_ATTR_WT) != 0) return (VM_MEMATTR_WRITE_THROUGH); else if ((p->md_attr & EFI_MD_ATTR_WC) != 0) return (VM_MEMATTR_WRITE_COMBINING); break; } return (VM_MEMATTR_DEVICE); } void initarm(struct arm64_bootparams *abp) { struct efi_fb *efifb; struct pcpu *pcpup; char *env; #ifdef FDT struct mem_region mem_regions[FDT_MEM_REGIONS]; int mem_regions_sz; phandle_t root; char dts_version[255]; #endif vm_offset_t lastaddr; caddr_t kmdp; bool valid; TSRAW(&thread0, TS_ENTER, __func__, NULL); boot_el = abp->boot_el; hcr_el2 = abp->hcr_el2; /* Parse loader or FDT boot parametes. Determine last used address. */ lastaddr = parse_boot_param(abp); /* Find the kernel address */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); identify_cpu(0); identify_hypervisor_smbios(); update_special_regs(0); /* Set the pcpu data, this is needed by pmap_bootstrap */ pcpup = &pcpu0; pcpu_init(pcpup, 0, sizeof(struct pcpu)); /* * Set the pcpu pointer with a backup in tpidr_el1 to be * loaded when entering the kernel from userland. */ __asm __volatile( "mov x18, %0 \n" "msr tpidr_el1, %0" :: "r"(pcpup)); /* locore.S sets sp_el0 to &thread0 so no need to set it here. */ PCPU_SET(curthread, &thread0); PCPU_SET(midr, get_midr()); link_elf_ireloc(kmdp); #ifdef FDT try_load_dtb(kmdp); #endif efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); /* Load the physical memory ranges */ efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); if (efihdr != NULL) add_efi_map_entries(efihdr); #ifdef FDT else { /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, NULL) != 0) panic("Cannot get physical memory regions"); physmem_hardware_regions(mem_regions, mem_regions_sz); } if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0) physmem_exclude_regions(mem_regions, mem_regions_sz, EXFLAG_NODUMP | EXFLAG_NOALLOC); #endif /* Exclude the EFI framebuffer from our view of physical memory. */ efifb = (struct efi_fb *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_FB); if (efifb != NULL) physmem_exclude_region(efifb->fb_addr, efifb->fb_size, EXFLAG_NOALLOC); /* Do basic tuning, hz etc */ init_param1(); cache_setup(); pan_setup(); /* Bootstrap enough of pmap to enter the kernel proper */ pmap_bootstrap(lastaddr - KERNBASE); /* Exclude entries needed in the DMAP region, but not phys_avail */ if (efihdr != NULL) exclude_efi_map_entries(efihdr); /* Do the same for reserve entries in the EFI MEMRESERVE table */ if (efi_systbl_phys != 0) exclude_efi_memreserve(efi_systbl_phys); /* * We carefully bootstrap the sanitizer map after we've excluded * absolutely everything else that could impact phys_avail. There's not * always enough room for the initial shadow map after the kernel, so * we'll end up searching for segments that we can safely use. Those * segments also get excluded from phys_avail. */ #if defined(KASAN) || defined(KMSAN) pmap_bootstrap_san(); #endif physmem_init_kernel_globals(); devmap_bootstrap(0, NULL); valid = bus_probe(); cninit(); set_ttbr0(abp->kern_ttbr0); cpu_tlb_flushID(); if (!valid) panic("Invalid bus configuration: %s", kern_getenv("kern.cfg.order")); /* * Check if pointer authentication is available on this system, and * if so enable its use. This needs to be called before init_proc0 * as that will configure the thread0 pointer authentication keys. */ ptrauth_init(); /* * Dump the boot metadata. We have to wait for cninit() since console * output is required. If it's grossly incorrect the kernel will never * make it this far. */ if (getenv_is_true("debug.dump_modinfo_at_boot")) preload_dump(); init_proc0(abp->kern_stack); msgbufinit(msgbufp, msgbufsize); mutex_init(); init_param2(physmem); dbg_init(); kdb_init(); #ifdef KDB if ((boothowto & RB_KDB) != 0) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif pan_enable(); kcsan_cpu_init(0); kasan_init(); kmsan_init(); env = kern_getenv("kernelname"); if (env != NULL) strlcpy(kernelname, env, sizeof(kernelname)); #ifdef FDT if (arm64_bus_method == ARM64_BUS_FDT) { root = OF_finddevice("/"); if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) { if (strcmp(LINUX_DTS_VERSION, dts_version) != 0) printf("WARNING: DTB version is %s while kernel expects %s, " "please update the DTB in the ESP\n", dts_version, LINUX_DTS_VERSION); } else { printf("WARNING: Cannot find freebsd,dts-version property, " "cannot check DTB compliance\n"); } } #endif if (boothowto & RB_VERBOSE) { if (efihdr != NULL) print_efi_map_entries(efihdr); physmem_print_tables(); } early_boot = 0; if (bootverbose && kstack_pages != KSTACK_PAGES) printf("kern.kstack_pages = %d ignored for thread0\n", kstack_pages); TSEXIT(); } void dbg_init(void) { /* Clear OS lock */ WRITE_SPECIALREG(oslar_el1, 0); /* This permits DDB to use debug registers for watchpoints. */ dbg_monitor_init(); /* TODO: Eventually will need to initialize debug registers here. */ } #ifdef DDB #include DB_SHOW_COMMAND(specialregs, db_show_spregs) { #define PRINT_REG(reg) \ db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg)) PRINT_REG(actlr_el1); PRINT_REG(afsr0_el1); PRINT_REG(afsr1_el1); PRINT_REG(aidr_el1); PRINT_REG(amair_el1); PRINT_REG(ccsidr_el1); PRINT_REG(clidr_el1); PRINT_REG(contextidr_el1); PRINT_REG(cpacr_el1); PRINT_REG(csselr_el1); PRINT_REG(ctr_el0); PRINT_REG(currentel); PRINT_REG(daif); PRINT_REG(dczid_el0); PRINT_REG(elr_el1); PRINT_REG(esr_el1); PRINT_REG(far_el1); #if 0 /* ARM64TODO: Enable VFP before reading floating-point registers */ PRINT_REG(fpcr); PRINT_REG(fpsr); #endif PRINT_REG(id_aa64afr0_el1); PRINT_REG(id_aa64afr1_el1); PRINT_REG(id_aa64dfr0_el1); PRINT_REG(id_aa64dfr1_el1); PRINT_REG(id_aa64isar0_el1); PRINT_REG(id_aa64isar1_el1); PRINT_REG(id_aa64pfr0_el1); PRINT_REG(id_aa64pfr1_el1); PRINT_REG(id_afr0_el1); PRINT_REG(id_dfr0_el1); PRINT_REG(id_isar0_el1); PRINT_REG(id_isar1_el1); PRINT_REG(id_isar2_el1); PRINT_REG(id_isar3_el1); PRINT_REG(id_isar4_el1); PRINT_REG(id_isar5_el1); PRINT_REG(id_mmfr0_el1); PRINT_REG(id_mmfr1_el1); PRINT_REG(id_mmfr2_el1); PRINT_REG(id_mmfr3_el1); #if 0 /* Missing from llvm */ PRINT_REG(id_mmfr4_el1); #endif PRINT_REG(id_pfr0_el1); PRINT_REG(id_pfr1_el1); PRINT_REG(isr_el1); PRINT_REG(mair_el1); PRINT_REG(midr_el1); PRINT_REG(mpidr_el1); PRINT_REG(mvfr0_el1); PRINT_REG(mvfr1_el1); PRINT_REG(mvfr2_el1); PRINT_REG(revidr_el1); PRINT_REG(sctlr_el1); PRINT_REG(sp_el0); PRINT_REG(spsel); PRINT_REG(spsr_el1); PRINT_REG(tcr_el1); PRINT_REG(tpidr_el0); PRINT_REG(tpidr_el1); PRINT_REG(tpidrro_el0); PRINT_REG(ttbr0_el1); PRINT_REG(ttbr1_el1); PRINT_REG(vbar_el1); #undef PRINT_REG } DB_SHOW_COMMAND(vtop, db_show_vtop) { uint64_t phys; if (have_addr) { phys = arm64_address_translate_s1e1r(addr); db_printf("EL1 physical address reg (read): 0x%016lx\n", phys); phys = arm64_address_translate_s1e1w(addr); db_printf("EL1 physical address reg (write): 0x%016lx\n", phys); phys = arm64_address_translate_s1e0r(addr); db_printf("EL0 physical address reg (read): 0x%016lx\n", phys); phys = arm64_address_translate_s1e0w(addr); db_printf("EL0 physical address reg (write): 0x%016lx\n", phys); } else db_printf("show vtop \n"); } #endif diff --git a/sys/arm64/arm64/minidump_machdep.c b/sys/arm64/arm64/minidump_machdep.c index 87bf41b27fdf..8ee626953aef 100644 --- a/sys/arm64/arm64/minidump_machdep.c +++ b/sys/arm64/arm64/minidump_machdep.c @@ -1,399 +1,400 @@ /*- * Copyright (c) 2006 Peter Wemm * Copyright (c) 2015 The FreeBSD Foundation * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include CTASSERT(sizeof(struct kerneldumpheader) == 512); static struct kerneldumpheader kdh; /* Handle chunked writes. */ static size_t fragsz; static void *dump_va; static size_t dumpsize; static uint64_t tmpbuffer[Ln_ENTRIES]; static int blk_flush(struct dumperinfo *di) { int error; if (fragsz == 0) return (0); error = dump_append(di, dump_va, fragsz); fragsz = 0; return (error); } static int blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) { size_t len; int error, c; u_int maxdumpsz; maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); if (maxdumpsz == 0) /* seatbelt */ maxdumpsz = PAGE_SIZE; error = 0; if ((sz % PAGE_SIZE) != 0) { printf("size not page aligned\n"); return (EINVAL); } if (ptr != NULL && pa != 0) { printf("cant have both va and pa!\n"); return (EINVAL); } if ((((uintptr_t)pa) % PAGE_SIZE) != 0) { printf("address not page aligned %p\n", ptr); return (EINVAL); } if (ptr != NULL) { /* * If we're doing a virtual dump, flush any * pre-existing pa pages. */ error = blk_flush(di); if (error) return (error); } while (sz) { len = maxdumpsz - fragsz; if (len > sz) len = sz; dumpsys_pb_progress(len); wdog_kern_pat(WD_LASTVAL); if (ptr) { error = dump_append(di, ptr, len); if (error) return (error); ptr += len; sz -= len; } else { dump_va = (void *)PHYS_TO_DMAP(pa); fragsz += len; pa += len; sz -= len; error = blk_flush(di); if (error) return (error); } /* Check for user abort. */ c = cncheckc(); if (c == 0x03) return (ECANCELED); if (c != -1) printf(" (CTRL-C to abort) "); } return (0); } int cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) { struct minidumphdr mdhdr; struct msgbuf *mbp; pd_entry_t *l0, *l1, l1e, *l2, l2e; pt_entry_t *l3, l3e; vm_offset_t va, kva_end; vm_paddr_t pa; uint32_t pmapsize; int error, i, j, retry_count; retry_count = 0; retry: retry_count++; error = 0; pmapsize = 0; /* Snapshot the KVA upper bound in case it grows. */ kva_end = kernel_vm_end; /* * Walk the kernel page table pages, setting the active entries in the * dump bitmap. * * NB: for a live dump, we may be racing with updates to the page * tables, so care must be taken to read each entry only once. */ for (va = VM_MIN_KERNEL_ADDRESS; va < kva_end; va += L2_SIZE) { pmapsize += PAGE_SIZE; if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) continue; l1e = atomic_load_64(l1); l2e = atomic_load_64(l2); if ((l1e & ATTR_DESCR_MASK) == L1_BLOCK) { pa = PTE_TO_PHYS(l1e); for (i = 0; i < Ln_ENTRIES * Ln_ENTRIES; i++, pa += PAGE_SIZE) if (vm_phys_is_dumpable(pa)) vm_page_dump_add(state->dump_bitset, pa); pmapsize += (Ln_ENTRIES - 1) * PAGE_SIZE; va += L1_SIZE - L2_SIZE; } else if ((l2e & ATTR_DESCR_MASK) == L2_BLOCK) { pa = PTE_TO_PHYS(l2e); for (i = 0; i < Ln_ENTRIES; i++, pa += PAGE_SIZE) { if (vm_phys_is_dumpable(pa)) vm_page_dump_add(state->dump_bitset, pa); } } else if ((l2e & ATTR_DESCR_MASK) == L2_TABLE) { for (i = 0; i < Ln_ENTRIES; i++) { l3e = atomic_load_64(&l3[i]); if ((l3e & ATTR_DESCR_MASK) != L3_PAGE) continue; pa = PTE_TO_PHYS(l3e); - if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) + if (PHYS_IN_DMAP_RANGE(pa) && + vm_phys_is_dumpable(pa)) vm_page_dump_add(state->dump_bitset, pa); } } } /* Calculate dump size. */ mbp = state->msgbufp; dumpsize = pmapsize; dumpsize += round_page(mbp->msg_size); dumpsize += round_page(sizeof(dump_avail)); dumpsize += round_page(BITSET_SIZE(vm_page_dump_pages)); VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) { - if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) + if (PHYS_IN_DMAP_RANGE(pa) && vm_phys_is_dumpable(pa)) dumpsize += PAGE_SIZE; else vm_page_dump_drop(state->dump_bitset, pa); } dumpsize += PAGE_SIZE; dumpsys_pb_init(dumpsize); /* Initialize mdhdr */ bzero(&mdhdr, sizeof(mdhdr)); strcpy(mdhdr.magic, MINIDUMP_MAGIC); mdhdr.version = MINIDUMP_VERSION; mdhdr.msgbufsize = mbp->msg_size; mdhdr.bitmapsize = round_page(BITSET_SIZE(vm_page_dump_pages)); mdhdr.pmapsize = pmapsize; mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS; mdhdr.dmapphys = DMAP_MIN_PHYSADDR; mdhdr.dmapbase = DMAP_MIN_ADDRESS; mdhdr.dmapend = DMAP_MAX_ADDRESS; mdhdr.dumpavailsize = round_page(sizeof(dump_avail)); #if PAGE_SIZE == PAGE_SIZE_4K mdhdr.flags = MINIDUMP_FLAG_PS_4K; #elif PAGE_SIZE == PAGE_SIZE_16K mdhdr.flags = MINIDUMP_FLAG_PS_16K; #else #error Unsupported page size #endif dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_AARCH64_VERSION, dumpsize); error = dump_start(di, &kdh); if (error != 0) goto fail; printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, ptoa((uintmax_t)physmem) / 1048576); /* Dump my header */ bzero(&tmpbuffer, sizeof(tmpbuffer)); bcopy(&mdhdr, &tmpbuffer, sizeof(mdhdr)); error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); if (error) goto fail; /* Dump msgbuf up front */ error = blk_write(di, mbp->msg_ptr, 0, round_page(mbp->msg_size)); if (error) goto fail; /* Dump dump_avail */ _Static_assert(sizeof(dump_avail) <= sizeof(tmpbuffer), "Large dump_avail not handled"); bzero(tmpbuffer, sizeof(tmpbuffer)); memcpy(tmpbuffer, dump_avail, sizeof(dump_avail)); error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); if (error) goto fail; /* Dump bitmap */ error = blk_write(di, (char *)state->dump_bitset, 0, round_page(BITSET_SIZE(vm_page_dump_pages))); if (error) goto fail; /* Dump kernel page directory pages */ bzero(&tmpbuffer, sizeof(tmpbuffer)); for (va = VM_MIN_KERNEL_ADDRESS; va < kva_end; va += L2_SIZE) { if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) { /* We always write a page, even if it is zero */ error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); if (error) goto fail; /* flush, in case we reuse tmpbuffer in the same block*/ error = blk_flush(di); if (error) goto fail; continue; } l1e = atomic_load_64(l1); l2e = atomic_load_64(l2); if ((l1e & ATTR_DESCR_MASK) == L1_BLOCK) { /* * Handle a 1GB block mapping: write out 512 fake L2 * pages. */ pa = PTE_TO_PHYS(l1e) | (va & L1_OFFSET); for (i = 0; i < Ln_ENTRIES; i++) { for (j = 0; j < Ln_ENTRIES; j++) { tmpbuffer[j] = (pa + i * L2_SIZE + j * PAGE_SIZE) | ATTR_DEFAULT | L3_PAGE; } error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); if (error) goto fail; } /* flush, in case we reuse tmpbuffer in the same block*/ error = blk_flush(di); if (error) goto fail; bzero(&tmpbuffer, sizeof(tmpbuffer)); va += L1_SIZE - L2_SIZE; } else if ((l2e & ATTR_DESCR_MASK) == L2_BLOCK) { pa = PTE_TO_PHYS(l2e) | (va & L2_OFFSET); /* Generate fake l3 entries based upon the l1 entry */ for (i = 0; i < Ln_ENTRIES; i++) { tmpbuffer[i] = (pa + i * PAGE_SIZE) | ATTR_DEFAULT | L3_PAGE; } error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); if (error) goto fail; /* flush, in case we reuse fakepd in the same block */ error = blk_flush(di); if (error) goto fail; bzero(&tmpbuffer, sizeof(tmpbuffer)); continue; } else { pa = PTE_TO_PHYS(l2e); /* * We always write a page, even if it is zero. If pa * is malformed, write the zeroed tmpbuffer. */ - if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) + if (PHYS_IN_DMAP_RANGE(pa) && vm_phys_is_dumpable(pa)) error = blk_write(di, NULL, pa, PAGE_SIZE); else error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); if (error) goto fail; } } /* Dump memory chunks */ VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) { error = blk_write(di, 0, pa, PAGE_SIZE); if (error) goto fail; } error = blk_flush(di); if (error) goto fail; error = dump_finish(di, &kdh); if (error != 0) goto fail; printf("\nDump complete\n"); return (0); fail: if (error < 0) error = -error; printf("\n"); if (error == ENOSPC) { printf("Dump map grown while dumping. "); if (retry_count < 5) { printf("Retrying...\n"); goto retry; } printf("Dump failed.\n"); } else if (error == ECANCELED) printf("Dump aborted\n"); else if (error == E2BIG) { printf("Dump failed. Partition too small (about %lluMB were " "needed this time).\n", (long long)dumpsize >> 20); } else printf("** DUMP FAILED (ERROR %d) **\n", error); return (error); } diff --git a/sys/arm64/include/vmparam.h b/sys/arm64/include/vmparam.h index d5d4a5691f37..83c55913f56e 100644 --- a/sys/arm64/include/vmparam.h +++ b/sys/arm64/include/vmparam.h @@ -1,312 +1,324 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * from: FreeBSD: src/sys/i386/include/vmparam.h,v 1.33 2000/03/30 */ #ifdef __arm__ #include #else /* !__arm__ */ #ifndef _MACHINE_VMPARAM_H_ #define _MACHINE_VMPARAM_H_ /* * Virtual memory related constants, all in bytes */ #ifndef MAXTSIZ #define MAXTSIZ (1*1024*1024*1024) /* max text size */ #endif #ifndef DFLDSIZ #define DFLDSIZ (128*1024*1024) /* initial data size limit */ #endif #ifndef MAXDSIZ #define MAXDSIZ (1*1024*1024*1024) /* max data size */ #endif #ifndef DFLSSIZ #define DFLSSIZ (128*1024*1024) /* initial stack size limit */ #endif #ifndef MAXSSIZ #define MAXSSIZ (1*1024*1024*1024) /* max stack size */ #endif #ifndef SGROWSIZ #define SGROWSIZ (128*1024) /* amount to grow stack */ #endif /* * The physical address space is sparsely populated. */ #define VM_PHYSSEG_SPARSE /* * The number of PHYSSEG entries. */ #define VM_PHYSSEG_MAX 64 /* * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool * from which physical pages are allocated and VM_FREEPOOL_DIRECT is * the pool from which physical pages for small UMA objects are * allocated. */ #define VM_NFREEPOOL 2 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 1 /* * Create two free page lists: VM_FREELIST_DMA32 is for physical pages that have * physical addresses below 4G, and VM_FREELIST_DEFAULT is for all other * physical pages. */ #define VM_NFREELIST 2 #define VM_FREELIST_DEFAULT 0 #define VM_FREELIST_DMA32 1 /* * When PAGE_SIZE is 4KB, an allocation size of 16MB is supported in order * to optimize the use of the direct map by UMA. Specifically, a 64-byte * cache line contains at most 8 L2 BLOCK entries, collectively mapping 16MB * of physical memory. By reducing the number of distinct 16MB "pages" that * are used by UMA, the physical memory allocator reduces the likelihood of * both 2MB page TLB misses and cache misses during the page table walk when * a 2MB page TLB miss does occur. * * When PAGE_SIZE is 16KB, an allocation size of 32MB is supported. This * size is used by level 0 reservations and L2 BLOCK mappings. */ #if PAGE_SIZE == PAGE_SIZE_4K #define VM_NFREEORDER 13 #elif PAGE_SIZE == PAGE_SIZE_16K #define VM_NFREEORDER 12 #else #error Unsupported page size #endif /* * Enable superpage reservations: 1 level. */ #ifndef VM_NRESERVLEVEL #define VM_NRESERVLEVEL 1 #endif /* * Level 0 reservations consist of 512 pages when PAGE_SIZE is 4KB, and * 2048 pages when PAGE_SIZE is 16KB. */ #ifndef VM_LEVEL_0_ORDER #if PAGE_SIZE == PAGE_SIZE_4K #define VM_LEVEL_0_ORDER 9 #elif PAGE_SIZE == PAGE_SIZE_16K #define VM_LEVEL_0_ORDER 11 #else #error Unsupported page size #endif #endif /** * Address space layout. * * ARMv8 implements up to a 48 bit virtual address space. The address space is * split into 2 regions at each end of the 64 bit address space, with an * out of range "hole" in the middle. * * We use the full 48 bits for each region, however the kernel may only use * a limited range within this space. * * Upper region: 0xffffffffffffffff Top of virtual memory * * 0xfffffeffffffffff End of DMAP * 0xffffa00000000000 Start of DMAP * * 0xffff027fffffffff End of KMSAN origin map * 0xffff020000000000 Start of KMSAN origin map * * 0xffff017fffffffff End of KMSAN shadow map * 0xffff010000000000 Start of KMSAN shadow map * * 0xffff009fffffffff End of KASAN shadow map * 0xffff008000000000 Start of KASAN shadow map * * 0xffff007fffffffff End of KVA * 0xffff000000000000 Kernel base address & start of KVA * * Hole: 0xfffeffffffffffff * 0x0001000000000000 * * Lower region: 0x0000ffffffffffff End of user address space * 0x0000000000000000 Start of user address space * * We use the upper region for the kernel, and the lower region for userland. * * We define some interesting address constants: * * VM_MIN_ADDRESS and VM_MAX_ADDRESS define the start and end of the entire * 64 bit address space, mostly just for convenience. * * VM_MIN_KERNEL_ADDRESS and VM_MAX_KERNEL_ADDRESS define the start and end of * mappable kernel virtual address space. * * VM_MIN_USER_ADDRESS and VM_MAX_USER_ADDRESS define the start and end of the * user address space. */ #define VM_MIN_ADDRESS (0x0000000000000000UL) #define VM_MAX_ADDRESS (0xffffffffffffffffUL) /* 512 GiB of kernel addresses */ #define VM_MIN_KERNEL_ADDRESS (0xffff000000000000UL) #define VM_MAX_KERNEL_ADDRESS (0xffff008000000000UL) /* 128 GiB KASAN shadow map */ #define KASAN_MIN_ADDRESS (0xffff008000000000UL) #define KASAN_MAX_ADDRESS (0xffff00a000000000UL) /* 512GiB KMSAN shadow map */ #define KMSAN_SHAD_MIN_ADDRESS (0xffff010000000000UL) #define KMSAN_SHAD_MAX_ADDRESS (0xffff018000000000UL) /* 512GiB KMSAN origin map */ #define KMSAN_ORIG_MIN_ADDRESS (0xffff020000000000UL) #define KMSAN_ORIG_MAX_ADDRESS (0xffff028000000000UL) /* The address bits that hold a pointer authentication code */ #define PAC_ADDR_MASK (0xff7f000000000000UL) /* If true addr is in the kernel address space */ #define ADDR_IS_KERNEL(addr) (((addr) & (1ul << 55)) == (1ul << 55)) /* If true addr is in its canonical form (i.e. no TBI, PAC, etc.) */ #define ADDR_IS_CANONICAL(addr) \ (((addr) & 0xffff000000000000UL) == 0 || \ ((addr) & 0xffff000000000000UL) == 0xffff000000000000UL) #define ADDR_MAKE_CANONICAL(addr) ({ \ __typeof(addr) _tmp_addr = (addr); \ \ _tmp_addr &= ~0xffff000000000000UL; \ if (ADDR_IS_KERNEL(addr)) \ _tmp_addr |= 0xffff000000000000UL; \ \ _tmp_addr; \ }) /* 95 TiB maximum for the direct map region */ #define DMAP_MIN_ADDRESS (0xffffa00000000000UL) #define DMAP_MAX_ADDRESS (0xffffff0000000000UL) #define DMAP_MIN_PHYSADDR (dmap_phys_base) #define DMAP_MAX_PHYSADDR (dmap_phys_max) -/* True if pa is in the dmap range */ -#define PHYS_IN_DMAP(pa) ((pa) >= DMAP_MIN_PHYSADDR && \ +/* + * Checks to see if a physical address is in the DMAP range. + * - PHYS_IN_DMAP_RANGE will return true that may be within the DMAP range + * but not accessible through the DMAP, e.g. device memory between two + * DMAP physical address regions. + * - PHYS_IN_DMAP will check if DMAP address is mapped before returning true. + * + * PHYS_IN_DMAP_RANGE should only be used when a check on the address is + * performed, e.g. by checking the physical address is within phys_avail, + * or checking the virtual address is mapped. + */ +#define PHYS_IN_DMAP_RANGE(pa) ((pa) >= DMAP_MIN_PHYSADDR && \ (pa) < DMAP_MAX_PHYSADDR) +#define PHYS_IN_DMAP(pa) (PHYS_IN_DMAP_RANGE(pa) && \ + pmap_klookup(PHYS_TO_DMAP(pa), NULL)) /* True if va is in the dmap range */ #define VIRT_IN_DMAP(va) ((va) >= DMAP_MIN_ADDRESS && \ (va) < (dmap_max_addr)) #define PMAP_HAS_DMAP 1 #define PHYS_TO_DMAP(pa) \ ({ \ - KASSERT(PHYS_IN_DMAP(pa), \ + KASSERT(PHYS_IN_DMAP_RANGE(pa), \ ("%s: PA out of range, PA: 0x%lx", __func__, \ (vm_paddr_t)(pa))); \ ((pa) - dmap_phys_base) + DMAP_MIN_ADDRESS; \ }) #define DMAP_TO_PHYS(va) \ ({ \ KASSERT(VIRT_IN_DMAP(va), \ ("%s: VA out of range, VA: 0x%lx", __func__, \ (vm_offset_t)(va))); \ ((va) - DMAP_MIN_ADDRESS) + dmap_phys_base; \ }) #define VM_MIN_USER_ADDRESS (0x0000000000000000UL) #define VM_MAX_USER_ADDRESS (0x0001000000000000UL) #define VM_MINUSER_ADDRESS (VM_MIN_USER_ADDRESS) #define VM_MAXUSER_ADDRESS (VM_MAX_USER_ADDRESS) #define KERNBASE (VM_MIN_KERNEL_ADDRESS) #define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE) #define USRSTACK SHAREDPAGE /* * How many physical pages per kmem arena virtual page. */ #ifndef VM_KMEM_SIZE_SCALE #define VM_KMEM_SIZE_SCALE (1) #endif /* * Optional ceiling (in bytes) on the size of the kmem arena: 60% of the * kernel map. */ #ifndef VM_KMEM_SIZE_MAX #define VM_KMEM_SIZE_MAX ((VM_MAX_KERNEL_ADDRESS - \ VM_MIN_KERNEL_ADDRESS + 1) * 3 / 5) #endif /* * Initial pagein size of beginning of executable file. */ #ifndef VM_INITIAL_PAGEIN #define VM_INITIAL_PAGEIN 16 #endif #if !defined(KASAN) && !defined(KMSAN) #define UMA_MD_SMALL_ALLOC #endif #ifndef LOCORE extern vm_paddr_t dmap_phys_base; extern vm_paddr_t dmap_phys_max; extern vm_offset_t dmap_max_addr; extern vm_offset_t vm_max_kernel_address; #endif #define ZERO_REGION_SIZE (64 * 1024) /* 64KB */ #define DEVMAP_MAX_VADDR VM_MAX_KERNEL_ADDRESS /* * The pmap can create non-transparent large page mappings. */ #define PMAP_HAS_LARGEPAGES 1 /* * Need a page dump array for minidump. */ #define MINIDUMP_PAGE_TRACKING 1 #endif /* !_MACHINE_VMPARAM_H_ */ #endif /* !__arm__ */