diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S index c8b7ded1bf29..0268ab6ef00c 100644 --- a/sys/arm64/arm64/locore.S +++ b/sys/arm64/arm64/locore.S @@ -1,836 +1,835 @@ /*- * Copyright (c) 2012-2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "assym.inc" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #define VIRT_BITS 48 #define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) .globl kernbase .set kernbase, KERNBASE /* U-Boot booti related constants. */ #if defined(LINUX_BOOT_ABI) #define FDT_MAGIC 0xEDFE0DD0 /* FDT blob Magic */ #ifndef UBOOT_IMAGE_OFFSET #define UBOOT_IMAGE_OFFSET 0 /* Image offset from start of */ #endif /* 2 MiB page */ #ifndef UBOOT_IMAGE_SIZE /* Total size of image */ #define UBOOT_IMAGE_SIZE _end - _start #endif #ifndef UBOOT_IMAGE_FLAGS #define UBOOT_IMAGE_FLAGS 0 /* LE kernel, unspecified */ #endif /* page size */ #endif /* defined(LINUX_BOOT_ABI) */ /* * We assume: * MMU on with an identity map, or off * D-Cache: off * I-Cache: on or off * We are loaded at a 2MiB aligned address */ .text .globl _start _start: #if defined(LINUX_BOOT_ABI) /* U-boot image header */ b 1f /* code 0 */ .long 0 /* code 1 */ .quad UBOOT_IMAGE_OFFSET /* Image offset in 2 MiB page, LE */ .quad UBOOT_IMAGE_SIZE /* Image size, LE */ .quad UBOOT_IMAGE_FLAGS /* Flags for kernel. LE */ .quad 0 /* Reserved */ .quad 0 /* Reserved */ .quad 0 /* Reserved */ .long 0x644d5241 /* Magic "ARM\x64", LE */ .long 0 /* Reserved for PE COFF offset*/ 1: #endif /* defined(LINUX_BOOT_ABI) */ /* Drop to EL1 */ bl drop_to_el1 /* * Disable the MMU. We may have entered the kernel with it on and * will need to update the tables later. If this has been set up * with anything other than a VA == PA map then this will fail, * but in this case the code to find where we are running from * would have also failed. */ dsb sy mrs x2, sctlr_el1 bic x2, x2, SCTLR_M msr sctlr_el1, x2 isb /* Set the context id */ msr contextidr_el1, xzr /* Get the virt -> phys offset */ bl get_virt_delta /* * At this point: * x29 = PA - VA * x28 = Our physical load address */ /* Create the page tables */ bl create_pagetables /* * At this point: * x27 = TTBR0 table * x26 = Kernel L1 table * x24 = TTBR1 table */ /* Enable the mmu */ bl start_mmu /* Jump to the virtual address space */ ldr x15, .Lvirtdone br x15 virtdone: /* Set up the stack */ adr x25, initstack_end mov sp, x25 sub sp, sp, #PCB_SIZE /* Zero the BSS */ ldr x15, .Lbss ldr x14, .Lend 1: str xzr, [x15], #8 cmp x15, x14 b.lo 1b /* Backup the module pointer */ mov x1, x0 /* Make the page table base a virtual address */ sub x26, x26, x29 sub x24, x24, x29 sub sp, sp, #BOOTPARAMS_SIZE mov x0, sp /* Degate the delda so it is VA -> PA */ neg x29, x29 str x1, [x0, #BP_MODULEP] str x26, [x0, #BP_KERN_L1PT] str x29, [x0, #BP_KERN_DELTA] adr x25, initstack str x25, [x0, #BP_KERN_STACK] str x24, [x0, #BP_KERN_L0PT] str x23, [x0, #BP_BOOT_EL] /* trace back starts here */ mov fp, #0 /* Branch to C code */ bl initarm bl mi_startup /* We should not get here */ brk 0 .align 3 .Lvirtdone: .quad virtdone .Lbss: .quad __bss_start .Lend: .quad _end #ifdef SMP /* * mpentry(unsigned long) * * Called by a core when it is being brought online. * The data in x0 is passed straight to init_secondary. */ ENTRY(mpentry) /* Disable interrupts */ msr daifset, #2 /* Drop to EL1 */ bl drop_to_el1 /* Set the context id */ msr contextidr_el1, xzr /* Load the kernel page table */ adr x24, pagetable_l0_ttbr1 /* Load the identity page table */ adr x27, pagetable_l0_ttbr0 /* Enable the mmu */ bl start_mmu /* Jump to the virtual address space */ ldr x15, =mp_virtdone br x15 mp_virtdone: - ldr x4, =secondary_stacks - mov x5, #(PAGE_SIZE * KSTACK_PAGES) - mul x5, x0, x5 - add sp, x4, x5 - + /* Start using the AP boot stack */ + ldr x4, =bootstack + ldr x4, [x4] + mov sp, x4 b init_secondary END(mpentry) #endif /* * If we are started in EL2, configure the required hypervisor * registers and drop to EL1. */ drop_to_el1: mrs x23, CurrentEL lsr x23, x23, #2 cmp x23, #0x2 b.eq 1f ret 1: /* Configure the Hypervisor */ mov x2, #(HCR_RW) msr hcr_el2, x2 /* Load the Virtualization Process ID Register */ mrs x2, midr_el1 msr vpidr_el2, x2 /* Load the Virtualization Multiprocess ID Register */ mrs x2, mpidr_el1 msr vmpidr_el2, x2 /* Set the bits that need to be 1 in sctlr_el1 */ ldr x2, .Lsctlr_res1 msr sctlr_el1, x2 /* Don't trap to EL2 for exceptions */ mov x2, #CPTR_RES1 msr cptr_el2, x2 /* Don't trap to EL2 for CP15 traps */ msr hstr_el2, xzr /* Enable access to the physical timers at EL1 */ mrs x2, cnthctl_el2 orr x2, x2, #(CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) msr cnthctl_el2, x2 /* Set the counter offset to a known value */ msr cntvoff_el2, xzr /* Hypervisor trap functions */ adr x2, hyp_vectors msr vbar_el2, x2 mov x2, #(PSR_F | PSR_I | PSR_A | PSR_D | PSR_M_EL1h) msr spsr_el2, x2 /* Configure GICv3 CPU interface */ mrs x2, id_aa64pfr0_el1 /* Extract GIC bits from the register */ ubfx x2, x2, #ID_AA64PFR0_GIC_SHIFT, #ID_AA64PFR0_GIC_BITS /* GIC[3:0] == 0001 - GIC CPU interface via special regs. supported */ cmp x2, #(ID_AA64PFR0_GIC_CPUIF_EN >> ID_AA64PFR0_GIC_SHIFT) b.ne 2f mrs x2, icc_sre_el2 orr x2, x2, #ICC_SRE_EL2_EN /* Enable access from insecure EL1 */ orr x2, x2, #ICC_SRE_EL2_SRE /* Enable system registers */ msr icc_sre_el2, x2 2: /* Set the address to return to our return address */ msr elr_el2, x30 isb eret .align 3 .Lsctlr_res1: .quad SCTLR_RES1 #define VECT_EMPTY \ .align 7; \ 1: b 1b .align 11 hyp_vectors: VECT_EMPTY /* Synchronous EL2t */ VECT_EMPTY /* IRQ EL2t */ VECT_EMPTY /* FIQ EL2t */ VECT_EMPTY /* Error EL2t */ VECT_EMPTY /* Synchronous EL2h */ VECT_EMPTY /* IRQ EL2h */ VECT_EMPTY /* FIQ EL2h */ VECT_EMPTY /* Error EL2h */ VECT_EMPTY /* Synchronous 64-bit EL1 */ VECT_EMPTY /* IRQ 64-bit EL1 */ VECT_EMPTY /* FIQ 64-bit EL1 */ VECT_EMPTY /* Error 64-bit EL1 */ VECT_EMPTY /* Synchronous 32-bit EL1 */ VECT_EMPTY /* IRQ 32-bit EL1 */ VECT_EMPTY /* FIQ 32-bit EL1 */ VECT_EMPTY /* Error 32-bit EL1 */ /* * Get the delta between the physical address we were loaded to and the * virtual address we expect to run from. This is used when building the * initial page table. */ get_virt_delta: /* Load the physical address of virt_map */ adr x29, virt_map /* Load the virtual address of virt_map stored in virt_map */ ldr x28, [x29] /* Find PA - VA as PA' = VA' - VA + PA = VA' + (PA - VA) = VA' + x29 */ sub x29, x29, x28 /* Find the load address for the kernel */ mov x28, #(KERNBASE) add x28, x28, x29 ret .align 3 virt_map: .quad virt_map /* * This builds the page tables containing the identity map, and the kernel * virtual map. * * It relys on: * We were loaded to an address that is on a 2MiB boundary * All the memory must not cross a 1GiB boundaty * x28 contains the physical address we were loaded from * * TODO: This is out of date. * There are at least 5 pages before that address for the page tables * The pages used are: * - The Kernel L2 table * - The Kernel L1 table * - The Kernel L0 table (TTBR1) * - The identity (PA = VA) L1 table * - The identity (PA = VA) L0 table (TTBR0) * - The DMAP L1 tables */ create_pagetables: /* Save the Link register */ mov x5, x30 /* Clean the page table */ adr x6, pagetable mov x26, x6 adr x27, pagetable_end 1: stp xzr, xzr, [x6], #16 stp xzr, xzr, [x6], #16 stp xzr, xzr, [x6], #16 stp xzr, xzr, [x6], #16 cmp x6, x27 b.lo 1b /* * Build the TTBR1 maps. */ /* Find the size of the kernel */ mov x6, #(KERNBASE) #if defined(LINUX_BOOT_ABI) /* X19 is used as 'map FDT data' flag */ mov x19, xzr /* No modules or FDT pointer ? */ cbz x0, booti_no_fdt /* Test if modulep points to modules descriptor or to FDT */ ldr w8, [x0] ldr w7, =FDT_MAGIC cmp w7, w8 b.eq booti_fdt #endif /* Booted with modules pointer */ /* Find modulep - begin */ sub x8, x0, x6 /* Add two 2MiB pages for the module data and round up */ ldr x7, =(3 * L2_SIZE - 1) add x8, x8, x7 b common #if defined(LINUX_BOOT_ABI) booti_fdt: /* Booted by U-Boot booti with FDT data */ /* Set 'map FDT data' flag */ mov x19, #1 booti_no_fdt: /* Booted by U-Boot booti without FTD data */ /* Find the end - begin */ ldr x7, .Lend sub x8, x7, x6 /* * Add one 2MiB page for copy of FDT data (maximum FDT size), * one for metadata and round up */ ldr x7, =(3 * L2_SIZE - 1) add x8, x8, x7 #endif common: /* Get the number of l2 pages to allocate, rounded down */ lsr x10, x8, #(L2_SHIFT) /* Create the kernel space L2 table */ mov x6, x26 mov x7, #VM_MEMATTR_WRITE_BACK mov x8, #(KERNBASE & L2_BLOCK_MASK) mov x9, x28 bl build_l2_block_pagetable /* Move to the l1 table */ add x26, x26, #PAGE_SIZE /* Link the l1 -> l2 table */ mov x9, x6 mov x6, x26 bl link_l1_pagetable /* Move to the l0 table */ add x24, x26, #PAGE_SIZE /* Link the l0 -> l1 table */ mov x9, x6 mov x6, x24 mov x10, #1 bl link_l0_pagetable /* Link the DMAP tables */ ldr x8, =DMAP_MIN_ADDRESS adr x9, pagetable_dmap; mov x10, #DMAP_TABLES bl link_l0_pagetable /* * Build the TTBR0 maps. As TTBR0 maps, they must specify ATTR_S1_nG. * They are only needed early on, so the VA = PA map is uncached. */ add x27, x24, #PAGE_SIZE mov x6, x27 /* The initial page table */ #if defined(SOCDEV_PA) && defined(SOCDEV_VA) /* Create a table for the UART */ mov x7, #(ATTR_S1_nG | ATTR_S1_IDX(VM_MEMATTR_DEVICE)) mov x8, #(SOCDEV_VA) /* VA start */ mov x9, #(SOCDEV_PA) /* PA start */ mov x10, #1 bl build_l1_block_pagetable #endif #if defined(LINUX_BOOT_ABI) /* Map FDT data ? */ cbz x19, 1f /* Create the identity mapping for FDT data (2 MiB max) */ mov x7, #(ATTR_S1_nG | ATTR_S1_IDX(VM_MEMATTR_UNCACHEABLE)) mov x9, x0 mov x8, x0 /* VA start (== PA start) */ mov x10, #1 bl build_l1_block_pagetable 1: #endif /* Create the VA = PA map */ mov x7, #(ATTR_S1_nG | ATTR_S1_IDX(VM_MEMATTR_UNCACHEABLE)) mov x9, x27 mov x8, x9 /* VA start (== PA start) */ mov x10, #1 bl build_l1_block_pagetable /* Move to the l0 table */ add x27, x27, #PAGE_SIZE /* Link the l0 -> l1 table */ mov x9, x6 mov x6, x27 mov x10, #1 bl link_l0_pagetable /* Restore the Link register */ mov x30, x5 ret /* * Builds an L0 -> L1 table descriptor * * This is a link for a 512GiB block of memory with up to 1GiB regions mapped * within it by build_l1_block_pagetable. * * x6 = L0 table * x8 = Virtual Address * x9 = L1 PA (trashed) * x10 = Entry count * x11, x12 and x13 are trashed */ link_l0_pagetable: /* * Link an L0 -> L1 table entry. */ /* Find the table index */ lsr x11, x8, #L0_SHIFT and x11, x11, #L0_ADDR_MASK /* Build the L0 block entry */ mov x12, #L0_TABLE /* Only use the output address bits */ lsr x9, x9, #PAGE_SHIFT 1: orr x13, x12, x9, lsl #PAGE_SHIFT /* Store the entry */ str x13, [x6, x11, lsl #3] sub x10, x10, #1 add x11, x11, #1 add x9, x9, #1 cbnz x10, 1b ret /* * Builds an L1 -> L2 table descriptor * * This is a link for a 1GiB block of memory with up to 2MiB regions mapped * within it by build_l2_block_pagetable. * * x6 = L1 table * x8 = Virtual Address * x9 = L2 PA (trashed) * x11, x12 and x13 are trashed */ link_l1_pagetable: /* * Link an L1 -> L2 table entry. */ /* Find the table index */ lsr x11, x8, #L1_SHIFT and x11, x11, #Ln_ADDR_MASK /* Build the L1 block entry */ mov x12, #L1_TABLE /* Only use the output address bits */ lsr x9, x9, #PAGE_SHIFT orr x13, x12, x9, lsl #PAGE_SHIFT /* Store the entry */ str x13, [x6, x11, lsl #3] ret /* * Builds count 1 GiB page table entry * x6 = L1 table * x7 = Variable lower block attributes * x8 = VA start * x9 = PA start (trashed) * x10 = Entry count * x11, x12 and x13 are trashed */ build_l1_block_pagetable: /* * Build the L1 table entry. */ /* Find the table index */ lsr x11, x8, #L1_SHIFT and x11, x11, #Ln_ADDR_MASK /* Build the L1 block entry */ orr x12, x7, #L1_BLOCK orr x12, x12, #(ATTR_AF) #ifdef SMP orr x12, x12, ATTR_SH(ATTR_SH_IS) #endif /* Only use the output address bits */ lsr x9, x9, #L1_SHIFT /* Set the physical address for this virtual address */ 1: orr x13, x12, x9, lsl #L1_SHIFT /* Store the entry */ str x13, [x6, x11, lsl #3] sub x10, x10, #1 add x11, x11, #1 add x9, x9, #1 cbnz x10, 1b ret /* * Builds count 2 MiB page table entry * x6 = L2 table * x7 = Type (0 = Device, 1 = Normal) * x8 = VA start * x9 = PA start (trashed) * x10 = Entry count * x11, x12 and x13 are trashed */ build_l2_block_pagetable: /* * Build the L2 table entry. */ /* Find the table index */ lsr x11, x8, #L2_SHIFT and x11, x11, #Ln_ADDR_MASK /* Build the L2 block entry */ lsl x12, x7, #2 orr x12, x12, #L2_BLOCK orr x12, x12, #(ATTR_AF) orr x12, x12, #(ATTR_S1_UXN) #ifdef SMP orr x12, x12, ATTR_SH(ATTR_SH_IS) #endif /* Only use the output address bits */ lsr x9, x9, #L2_SHIFT /* Set the physical address for this virtual address */ 1: orr x13, x12, x9, lsl #L2_SHIFT /* Store the entry */ str x13, [x6, x11, lsl #3] sub x10, x10, #1 add x11, x11, #1 add x9, x9, #1 cbnz x10, 1b ret start_mmu: dsb sy /* Load the exception vectors */ ldr x2, =exception_vectors msr vbar_el1, x2 /* Load ttbr0 and ttbr1 */ msr ttbr0_el1, x27 msr ttbr1_el1, x24 isb /* Clear the Monitor Debug System control register */ msr mdscr_el1, xzr /* Invalidate the TLB */ tlbi vmalle1is ldr x2, mair msr mair_el1, x2 /* * Setup TCR according to the PARange and ASIDBits fields * from ID_AA64MMFR0_EL1 and the HAFDBS field from the * ID_AA64MMFR1_EL1. More precisely, set TCR_EL1.AS * to 1 only if the ASIDBits field equals 0b0010. */ ldr x2, tcr mrs x3, id_aa64mmfr0_el1 /* Copy the bottom 3 bits from id_aa64mmfr0_el1 into TCR.IPS */ bfi x2, x3, #(TCR_IPS_SHIFT), #(TCR_IPS_WIDTH) and x3, x3, #(ID_AA64MMFR0_ASIDBits_MASK) /* Check if the HW supports 16 bit ASIDS */ cmp x3, #(ID_AA64MMFR0_ASIDBits_16) /* If so x3 == 1, else x3 == 0 */ cset x3, eq /* Set TCR.AS with x3 */ bfi x2, x3, #(TCR_ASID_SHIFT), #(TCR_ASID_WIDTH) /* * Check if the HW supports access flag and dirty state updates, * and set TCR_EL1.HA and TCR_EL1.HD accordingly. */ mrs x3, id_aa64mmfr1_el1 and x3, x3, #(ID_AA64MMFR1_HAFDBS_MASK) cmp x3, #1 b.ne 1f orr x2, x2, #(TCR_HA) b 2f 1: cmp x3, #2 b.ne 2f orr x2, x2, #(TCR_HA | TCR_HD) 2: msr tcr_el1, x2 /* * Setup SCTLR. */ ldr x2, sctlr_set ldr x3, sctlr_clear mrs x1, sctlr_el1 bic x1, x1, x3 /* Clear the required bits */ orr x1, x1, x2 /* Set the required bits */ msr sctlr_el1, x1 isb ret .align 3 mair: .quad MAIR_ATTR(MAIR_DEVICE_nGnRnE, VM_MEMATTR_DEVICE) | \ MAIR_ATTR(MAIR_NORMAL_NC, VM_MEMATTR_UNCACHEABLE) | \ MAIR_ATTR(MAIR_NORMAL_WB, VM_MEMATTR_WRITE_BACK) | \ MAIR_ATTR(MAIR_NORMAL_WT, VM_MEMATTR_WRITE_THROUGH) tcr: .quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_TG1_4K | \ TCR_CACHE_ATTRS | TCR_SMP_ATTRS) sctlr_set: /* Bits to set */ .quad (SCTLR_LSMAOE | SCTLR_nTLSMD | SCTLR_UCI | SCTLR_SPAN | \ SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \ SCTLR_I | SCTLR_SED | SCTLR_SA0 | SCTLR_SA | SCTLR_C | \ SCTLR_M | SCTLR_CP15BEN) sctlr_clear: /* Bits to clear */ .quad (SCTLR_EE | SCTLR_EOE | SCTLR_IESB | SCTLR_WXN | SCTLR_UMA | \ SCTLR_ITD | SCTLR_A) .globl abort abort: b abort //.section .init_pagetable .align 12 /* 4KiB aligned */ /* * 3 initial tables (in the following order): * L2 for kernel (High addresses) * L1 for kernel * L1 for user (Low addresses) */ pagetable: .space PAGE_SIZE pagetable_l1_ttbr1: .space PAGE_SIZE pagetable_l0_ttbr1: .space PAGE_SIZE pagetable_l1_ttbr0: .space PAGE_SIZE pagetable_l0_ttbr0: .space PAGE_SIZE .globl pagetable_dmap pagetable_dmap: .space PAGE_SIZE * DMAP_TABLES pagetable_end: el2_pagetable: .space PAGE_SIZE .globl init_pt_va init_pt_va: .quad pagetable /* XXX: Keep page tables VA */ .align 4 initstack: .space (PAGE_SIZE * KSTACK_PAGES) initstack_end: ENTRY(sigcode) mov x0, sp add x0, x0, #SF_UC 1: mov x8, #SYS_sigreturn svc 0 /* sigreturn failed, exit */ mov x8, #SYS_exit svc 0 b 1b END(sigcode) /* This may be copied to the stack, keep it 16-byte aligned */ .align 3 esigcode: .data .align 3 .global szsigcode szsigcode: .quad esigcode - sigcode ENTRY(aarch32_sigcode) .word 0xe1a0000d // mov r0, sp .word 0xe2800040 // add r0, r0, #SIGF_UC .word 0xe59f700c // ldr r7, [pc, #12] .word 0xef000000 // swi #0 .word 0xe59f7008 // ldr r7, [pc, #8] .word 0xef000000 // swi #0 .word 0xeafffffa // b . - 16 END(aarch32_sigcode) .word SYS_sigreturn .word SYS_exit .align 3 aarch32_esigcode: .data .global sz_aarch32_sigcode sz_aarch32_sigcode: .quad aarch32_esigcode - aarch32_sigcode diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c index e16a1f416c93..a9250e3e2cd7 100644 --- a/sys/arm64/arm64/mp_machdep.c +++ b/sys/arm64/arm64/mp_machdep.c @@ -1,840 +1,882 @@ /*- * Copyright (c) 2015-2016 The FreeBSD Foundation * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include "opt_acpi.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif #ifdef DEV_ACPI #include #include #endif #ifdef FDT #include #include #include #include #endif #include #include "pic_if.h" #define MP_QUIRK_CPULIST 0x01 /* The list of cpus may be wrong, */ /* don't panic if one fails to start */ static uint32_t mp_quirks; #ifdef FDT static struct { const char *compat; uint32_t quirks; } fdt_quirks[] = { { "arm,foundation-aarch64", MP_QUIRK_CPULIST }, { "arm,fvp-base", MP_QUIRK_CPULIST }, /* This is incorrect in some DTS files */ { "arm,vfp-base", MP_QUIRK_CPULIST }, { NULL, 0 }, }; #endif typedef void intr_ipi_send_t(void *, cpuset_t, u_int); typedef void intr_ipi_handler_t(void *); #define INTR_IPI_NAMELEN (MAXCOMLEN + 1) struct intr_ipi { intr_ipi_handler_t * ii_handler; void * ii_handler_arg; intr_ipi_send_t * ii_send; void * ii_send_arg; char ii_name[INTR_IPI_NAMELEN]; u_long * ii_count; }; static struct intr_ipi ipi_sources[INTR_IPI_COUNT]; static struct intr_ipi *intr_ipi_lookup(u_int); static void intr_pic_ipi_setup(u_int, const char *, intr_ipi_handler_t *, void *); static void ipi_ast(void *); static void ipi_hardclock(void *); static void ipi_preempt(void *); static void ipi_rendezvous(void *); static void ipi_stop(void *); -struct mtx ap_boot_mtx; struct pcb stoppcbs[MAXCPU]; /* * Not all systems boot from the first CPU in the device tree. To work around * this we need to find which CPU we have booted from so when we later * enable the secondary CPUs we skip this one. */ static int cpu0 = -1; void mpentry(unsigned long cpuid); void init_secondary(uint64_t); -uint8_t secondary_stacks[MAXCPU - 1][PAGE_SIZE * KSTACK_PAGES] __aligned(16); +/* Synchronize AP startup. */ +static struct mtx ap_boot_mtx; + +/* Stacks for AP initialization, discarded once idle threads are started. */ +void *bootstack; +static void *bootstacks[MAXCPU]; + +/* Count of started APs, used to synchronize access to bootstack. */ +static volatile int aps_started; /* Set to 1 once we're ready to let the APs out of the pen. */ -volatile int aps_ready = 0; +static volatile int aps_ready; /* Temporary variables for init_secondary() */ void *dpcpu[MAXCPU - 1]; static void release_aps(void *dummy __unused) { int i, started; /* Only release CPUs if they exist */ if (mp_ncpus == 1) return; intr_pic_ipi_setup(IPI_AST, "ast", ipi_ast, NULL); intr_pic_ipi_setup(IPI_PREEMPT, "preempt", ipi_preempt, NULL); intr_pic_ipi_setup(IPI_RENDEZVOUS, "rendezvous", ipi_rendezvous, NULL); intr_pic_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL); intr_pic_ipi_setup(IPI_STOP_HARD, "stop hard", ipi_stop, NULL); intr_pic_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL); atomic_store_rel_int(&aps_ready, 1); /* Wake up the other CPUs */ __asm __volatile( "dsb ishst \n" "sev \n" ::: "memory"); printf("Release APs..."); started = 0; for (i = 0; i < 2000; i++) { if (smp_started) { printf("done\n"); return; } /* * Don't time out while we are making progress. Some large * systems can take a while to start all CPUs. */ if (smp_cpus > started) { i = 0; started = smp_cpus; } DELAY(1000); } printf("APs not started\n"); } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); void init_secondary(uint64_t cpu) { struct pcpu *pcpup; pmap_t pmap0; pcpup = &__pcpu[cpu]; /* * Set the pcpu pointer with a backup in tpidr_el1 to be * loaded when entering the kernel from userland. */ __asm __volatile( "mov x18, %0 \n" "msr tpidr_el1, %0" :: "r"(pcpup)); - /* Spin until the BSP releases the APs */ - while (!aps_ready) + /* Signal the BSP and spin until it has released all APs. */ + atomic_add_int(&aps_started, 1); + while (!atomic_load_int(&aps_ready)) __asm __volatile("wfe"); /* Initialize curthread */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); pcpup->pc_curthread = pcpup->pc_idlethread; - pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb; /* Initialize curpmap to match TTBR0's current setting. */ pmap0 = vmspace_pmap(&vmspace0); KASSERT(pmap_to_ttbr0(pmap0) == READ_SPECIALREG(ttbr0_el1), ("pmap0 doesn't match cpu %ld's ttbr0", cpu)); pcpup->pc_curpmap = pmap0; /* * Identify current CPU. This is necessary to setup * affinity registers and to provide support for * runtime chip identification. */ identify_cpu(); install_cpu_errata(); intr_pic_init_secondary(); /* Start per-CPU event timers. */ cpu_initclocks_ap(); #ifdef VFP vfp_init(); #endif dbg_init(); pan_enable(); mtx_lock_spin(&ap_boot_mtx); atomic_add_rel_32(&smp_cpus, 1); if (smp_cpus == mp_ncpus) { /* enable IPI's, tlb shootdown, freezes etc */ atomic_store_rel_int(&smp_started, 1); } mtx_unlock_spin(&ap_boot_mtx); kcsan_cpu_init(cpu); + /* + * Assert that smp_after_idle_runnable condition is reasonable. + */ + MPASS(PCPU_GET(curpcb) == NULL); + /* Enter the scheduler */ sched_throw(NULL); panic("scheduler returned us to init_secondary"); /* NOTREACHED */ } +static void +smp_after_idle_runnable(void *arg __unused) +{ + struct pcpu *pc; + int cpu; + + for (cpu = 1; cpu < mp_ncpus; cpu++) { + if (bootstacks[cpu] != NULL) { + pc = pcpu_find(cpu); + while (atomic_load_ptr(&pc->pc_curpcb) == NULL) + cpu_spinwait(); + kmem_free((vm_offset_t)bootstacks[cpu], PAGE_SIZE); + } + } +} +SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY, + smp_after_idle_runnable, NULL); + /* * Send IPI thru interrupt controller. */ static void pic_ipi_send(void *arg, cpuset_t cpus, u_int ipi) { KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); PIC_IPI_SEND(intr_irq_root_dev, arg, cpus, ipi); } /* * Setup IPI handler on interrupt controller. * * Not SMP coherent. */ static void intr_pic_ipi_setup(u_int ipi, const char *name, intr_ipi_handler_t *hand, void *arg) { struct intr_irqsrc *isrc; struct intr_ipi *ii; int error; KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); KASSERT(hand != NULL, ("%s: ipi %u no handler", __func__, ipi)); error = PIC_IPI_SETUP(intr_irq_root_dev, ipi, &isrc); if (error != 0) return; isrc->isrc_handlers++; ii = intr_ipi_lookup(ipi); KASSERT(ii->ii_count == NULL, ("%s: ipi %u reused", __func__, ipi)); ii->ii_handler = hand; ii->ii_handler_arg = arg; ii->ii_send = pic_ipi_send; ii->ii_send_arg = isrc; strlcpy(ii->ii_name, name, INTR_IPI_NAMELEN); ii->ii_count = intr_ipi_setup_counters(name); } static void intr_ipi_send(cpuset_t cpus, u_int ipi) { struct intr_ipi *ii; ii = intr_ipi_lookup(ipi); if (ii->ii_count == NULL) panic("%s: not setup IPI %u", __func__, ipi); ii->ii_send(ii->ii_send_arg, cpus, ipi); } static void ipi_ast(void *dummy __unused) { CTR0(KTR_SMP, "IPI_AST"); } static void ipi_hardclock(void *dummy __unused) { CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__); hardclockintr(); } static void ipi_preempt(void *dummy __unused) { CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__); sched_preempt(curthread); } static void ipi_rendezvous(void *dummy __unused) { CTR0(KTR_SMP, "IPI_RENDEZVOUS"); smp_rendezvous_action(); } static void ipi_stop(void *dummy __unused) { u_int cpu; CTR0(KTR_SMP, "IPI_STOP"); cpu = PCPU_GET(cpuid); savectx(&stoppcbs[cpu]); /* Indicate we are stopped */ CPU_SET_ATOMIC(cpu, &stopped_cpus); /* Wait for restart */ while (!CPU_ISSET(cpu, &started_cpus)) cpu_spinwait(); #ifdef DDB dbg_register_sync(NULL); #endif CPU_CLR_ATOMIC(cpu, &started_cpus); CPU_CLR_ATOMIC(cpu, &stopped_cpus); CTR0(KTR_SMP, "IPI_STOP (restart)"); } struct cpu_group * cpu_topo(void) { return (smp_topo_none()); } /* Determine if we running MP machine */ int cpu_mp_probe(void) { /* ARM64TODO: Read the u bit of mpidr_el1 to determine this */ return (1); } static bool start_cpu(u_int id, uint64_t target_cpu) { struct pcpu *pcpup; vm_paddr_t pa; u_int cpuid; - int err; + int err, naps; /* Check we are able to start this cpu */ if (id > mp_maxid) return (false); KASSERT(id < MAXCPU, ("Too many CPUs")); /* We are already running on cpu 0 */ if (id == cpu0) return (true); /* * Rotate the CPU IDs to put the boot CPU as CPU 0. We keep the other - * CPUs ordered as the are likely grouped into clusters so it can be + * CPUs ordered as they are likely grouped into clusters so it can be * useful to keep that property, e.g. for the GICv3 driver to send * an IPI to all CPUs in the cluster. */ cpuid = id; if (cpuid < cpu0) cpuid += mp_maxid + 1; cpuid -= cpu0; pcpup = &__pcpu[cpuid]; pcpu_init(pcpup, cpuid, sizeof(struct pcpu)); dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); dpcpu_init(dpcpu[cpuid - 1], cpuid); + bootstacks[cpuid] = (void *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO); + + naps = atomic_load_int(&aps_started); + bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE; + printf("Starting CPU %u (%lx)\n", cpuid, target_cpu); pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry); - err = psci_cpu_on(target_cpu, pa, cpuid); if (err != PSCI_RETVAL_SUCCESS) { /* * Panic here if INVARIANTS are enabled and PSCI failed to - * start the requested CPU. If psci_cpu_on returns PSCI_MISSING + * start the requested CPU. psci_cpu_on() returns PSCI_MISSING * to indicate we are unable to use it to start the given CPU. */ KASSERT(err == PSCI_MISSING || (mp_quirks & MP_QUIRK_CPULIST) == MP_QUIRK_CPULIST, - ("Failed to start CPU %u (%lx)\n", id, target_cpu)); + ("Failed to start CPU %u (%lx), error %d\n", + id, target_cpu, err)); pcpu_destroy(pcpup); kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE); dpcpu[cpuid - 1] = NULL; + kmem_free((vm_offset_t)bootstacks[cpuid], PAGE_SIZE); + bootstacks[cpuid] = NULL; mp_ncpus--; /* Notify the user that the CPU failed to start */ - printf("Failed to start CPU %u (%lx)\n", id, target_cpu); - } else + printf("Failed to start CPU %u (%lx), error %d\n", + id, target_cpu, err); + } else { + /* Wait for the AP to switch to its boot stack. */ + while (atomic_load_int(&aps_started) < naps + 1) + cpu_spinwait(); CPU_SET(cpuid, &all_cpus); + } return (true); } #ifdef DEV_ACPI static void madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) { ACPI_MADT_GENERIC_INTERRUPT *intr; u_int *cpuid; u_int id; switch(entry->Type) { case ACPI_MADT_TYPE_GENERIC_INTERRUPT: intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry; cpuid = arg; id = *cpuid; start_cpu(id, intr->ArmMpidr); __pcpu[id].pc_acpi_id = intr->Uid; (*cpuid)++; break; default: break; } } static void cpu_init_acpi(void) { ACPI_TABLE_MADT *madt; vm_paddr_t physaddr; u_int cpuid; physaddr = acpi_find_table(ACPI_SIG_MADT); if (physaddr == 0) return; madt = acpi_map_table(physaddr, ACPI_SIG_MADT); if (madt == NULL) { printf("Unable to map the MADT, not starting APs\n"); return; } cpuid = 0; acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, madt_handler, &cpuid); acpi_unmap_table(madt); #if MAXMEMDOM > 1 /* set proximity info */ acpi_pxm_set_cpu_locality(); acpi_pxm_free(); #endif } #endif #ifdef FDT static boolean_t cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) { uint64_t target_cpu; int domain; target_cpu = reg[0]; if (addr_size == 2) { target_cpu <<= 32; target_cpu |= reg[1]; } if (!start_cpu(id, target_cpu)) return (FALSE); /* Try to read the numa node of this cpu */ if (vm_ndomains == 1 || OF_getencprop(node, "numa-node-id", &domain, sizeof(domain)) <= 0) domain = 0; __pcpu[id].pc_domain = domain; if (domain < MAXMEMDOM) CPU_SET(id, &cpuset_domain[domain]); return (TRUE); } #endif /* Initialize and fire up non-boot processors */ void cpu_mp_start(void) { #ifdef FDT phandle_t node; int i; #endif mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); CPU_SET(0, &all_cpus); switch(arm64_bus_method) { #ifdef DEV_ACPI case ARM64_BUS_ACPI: mp_quirks = MP_QUIRK_CPULIST; KASSERT(cpu0 >= 0, ("Current CPU was not found")); cpu_init_acpi(); break; #endif #ifdef FDT case ARM64_BUS_FDT: node = OF_peer(0); for (i = 0; fdt_quirks[i].compat != NULL; i++) { if (ofw_bus_node_is_compatible(node, fdt_quirks[i].compat) != 0) { mp_quirks = fdt_quirks[i].quirks; } } KASSERT(cpu0 >= 0, ("Current CPU was not found")); ofw_cpu_early_foreach(cpu_init_fdt, true); break; #endif default: break; } } /* Introduce rest of cores to the world */ void cpu_mp_announce(void) { } #ifdef DEV_ACPI static void cpu_count_acpi_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) { ACPI_MADT_GENERIC_INTERRUPT *intr; u_int *cores = arg; uint64_t mpidr_reg; switch(entry->Type) { case ACPI_MADT_TYPE_GENERIC_INTERRUPT: intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry; if (cpu0 < 0) { mpidr_reg = READ_SPECIALREG(mpidr_el1); if ((mpidr_reg & 0xff00fffffful) == intr->ArmMpidr) cpu0 = *cores; } (*cores)++; break; default: break; } } static u_int cpu_count_acpi(void) { ACPI_TABLE_MADT *madt; vm_paddr_t physaddr; u_int cores; physaddr = acpi_find_table(ACPI_SIG_MADT); if (physaddr == 0) return (0); madt = acpi_map_table(physaddr, ACPI_SIG_MADT); if (madt == NULL) { printf("Unable to map the MADT, not starting APs\n"); return (0); } cores = 0; acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, cpu_count_acpi_handler, &cores); acpi_unmap_table(madt); return (cores); } #endif #ifdef FDT static boolean_t cpu_find_cpu0_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) { uint64_t mpidr_fdt, mpidr_reg; if (cpu0 < 0) { mpidr_fdt = reg[0]; if (addr_size == 2) { mpidr_fdt <<= 32; mpidr_fdt |= reg[1]; } mpidr_reg = READ_SPECIALREG(mpidr_el1); if ((mpidr_reg & 0xff00fffffful) == mpidr_fdt) cpu0 = id; } return (TRUE); } #endif void cpu_mp_setmaxid(void) { int cores; mp_ncpus = 1; mp_maxid = 0; switch(arm64_bus_method) { #ifdef DEV_ACPI case ARM64_BUS_ACPI: cores = cpu_count_acpi(); if (cores > 0) { cores = MIN(cores, MAXCPU); if (bootverbose) printf("Found %d CPUs in the ACPI tables\n", cores); mp_ncpus = cores; mp_maxid = cores - 1; } break; #endif #ifdef FDT case ARM64_BUS_FDT: cores = ofw_cpu_early_foreach(cpu_find_cpu0_fdt, false); if (cores > 0) { cores = MIN(cores, MAXCPU); if (bootverbose) printf("Found %d CPUs in the device tree\n", cores); mp_ncpus = cores; mp_maxid = cores - 1; } break; #endif default: if (bootverbose) printf("No CPU data, limiting to 1 core\n"); break; } if (TUNABLE_INT_FETCH("hw.ncpu", &cores)) { if (cores > 0 && cores < mp_ncpus) { mp_ncpus = cores; mp_maxid = cores - 1; } } } /* * Lookup IPI source. */ static struct intr_ipi * intr_ipi_lookup(u_int ipi) { if (ipi >= INTR_IPI_COUNT) panic("%s: no such IPI %u", __func__, ipi); return (&ipi_sources[ipi]); } /* * interrupt controller dispatch function for IPIs. It should * be called straight from the interrupt controller, when associated * interrupt source is learned. Or from anybody who has an interrupt * source mapped. */ void intr_ipi_dispatch(u_int ipi, struct trapframe *tf) { void *arg; struct intr_ipi *ii; ii = intr_ipi_lookup(ipi); if (ii->ii_count == NULL) panic("%s: not setup IPI %u", __func__, ipi); intr_ipi_increment_count(ii->ii_count, PCPU_GET(cpuid)); /* * Supply ipi filter with trapframe argument * if none is registered. */ arg = ii->ii_handler_arg != NULL ? ii->ii_handler_arg : tf; ii->ii_handler(arg); } #ifdef notyet /* * Map IPI into interrupt controller. * * Not SMP coherent. */ static int ipi_map(struct intr_irqsrc *isrc, u_int ipi) { boolean_t is_percpu; int error; if (ipi >= INTR_IPI_COUNT) panic("%s: no such IPI %u", __func__, ipi); KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); isrc->isrc_type = INTR_ISRCT_NAMESPACE; isrc->isrc_nspc_type = INTR_IRQ_NSPC_IPI; isrc->isrc_nspc_num = ipi_next_num; error = PIC_REGISTER(intr_irq_root_dev, isrc, &is_percpu); if (error == 0) { isrc->isrc_dev = intr_irq_root_dev; ipi_next_num++; } return (error); } /* * Setup IPI handler to interrupt source. * * Note that there could be more ways how to send and receive IPIs * on a platform like fast interrupts for example. In that case, * one can call this function with ASIF_NOALLOC flag set and then * call intr_ipi_dispatch() when appropriate. * * Not SMP coherent. */ int intr_ipi_set_handler(u_int ipi, const char *name, intr_ipi_filter_t *filter, void *arg, u_int flags) { struct intr_irqsrc *isrc; int error; if (filter == NULL) return(EINVAL); isrc = intr_ipi_lookup(ipi); if (isrc->isrc_ipifilter != NULL) return (EEXIST); if ((flags & AISHF_NOALLOC) == 0) { error = ipi_map(isrc, ipi); if (error != 0) return (error); } isrc->isrc_ipifilter = filter; isrc->isrc_arg = arg; isrc->isrc_handlers = 1; isrc->isrc_count = intr_ipi_setup_counters(name); isrc->isrc_index = 0; /* it should not be used in IPI case */ if (isrc->isrc_dev != NULL) { PIC_ENABLE_INTR(isrc->isrc_dev, isrc); PIC_ENABLE_SOURCE(isrc->isrc_dev, isrc); } return (0); } #endif /* Sending IPI */ void ipi_all_but_self(u_int ipi) { cpuset_t cpus; cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); intr_ipi_send(cpus, ipi); } void ipi_cpu(int cpu, u_int ipi) { cpuset_t cpus; CPU_ZERO(&cpus); CPU_SET(cpu, &cpus); CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x", __func__, cpu, ipi); intr_ipi_send(cpus, ipi); } void ipi_selected(cpuset_t cpus, u_int ipi) { CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); intr_ipi_send(cpus, ipi); } diff --git a/sys/riscv/riscv/locore.S b/sys/riscv/riscv/locore.S index 8d3b89897c3e..a794851bcb6b 100644 --- a/sys/riscv/riscv/locore.S +++ b/sys/riscv/riscv/locore.S @@ -1,346 +1,340 @@ /*- * Copyright (c) 2015-2018 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "assym.inc" #include #include #include #include #include #include .globl kernbase .set kernbase, KERNBASE /* Trap entries */ .text /* Reset vector */ .text .globl _start _start: /* Set the global pointer */ .option push .option norelax lla gp, __global_pointer$ .option pop /* Get the physical address kernel loaded to */ lla t0, virt_map ld t1, 0(t0) sub t1, t1, t0 li t2, KERNBASE sub s9, t2, t1 /* s9 = physmem base */ /* * a0 = hart id * a1 = dtbp */ /* Pick a hart to run the boot process. */ lla t0, hart_lottery li t1, 1 amoadd.w t0, t1, 0(t0) /* * We must jump to mpentry in the non-BSP case because the offset is * too large to fit in a 12-bit branch immediate. */ beqz t0, 1f j mpentry /* * Page tables */ 1: /* Add L1 entry for kernel */ lla s1, pagetable_l1 lla s2, pagetable_l2 /* Link to next level PN */ srli s2, s2, PAGE_SHIFT li a5, KERNBASE srli a5, a5, L1_SHIFT /* >> L1_SHIFT */ andi a5, a5, 0x1ff /* & 0x1ff */ li t4, PTE_V slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */ or t6, t4, t5 /* Store L1 PTE entry to position */ li a6, PTE_SIZE mulw a5, a5, a6 add t0, s1, a5 sd t6, (t0) /* Level 2 superpages (512 x 2MiB) */ lla s1, pagetable_l2 srli t4, s9, 21 /* Div physmem base by 2 MiB */ li t2, 512 /* Build 512 entries */ add t3, t4, t2 li t5, 0 2: li t0, (PTE_KERN | PTE_X) slli t2, t4, PTE_PPN1_S /* << PTE_PPN1_S */ or t5, t0, t2 sd t5, (s1) /* Store PTE entry to position */ addi s1, s1, PTE_SIZE addi t4, t4, 1 bltu t4, t3, 2b /* Create an L1 page for early devmap */ lla s1, pagetable_l1 lla s2, pagetable_l2_devmap /* Link to next level PN */ srli s2, s2, PAGE_SHIFT li a5, (VM_MAX_KERNEL_ADDRESS - L2_SIZE) srli a5, a5, L1_SHIFT /* >> L1_SHIFT */ andi a5, a5, 0x1ff /* & 0x1ff */ li t4, PTE_V slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */ or t6, t4, t5 /* Store single level1 PTE entry to position */ li a6, PTE_SIZE mulw a5, a5, a6 add t0, s1, a5 sd t6, (t0) /* Create an L2 page superpage for DTB */ lla s1, pagetable_l2_devmap mv s2, a1 srli s2, s2, PAGE_SHIFT li t0, (PTE_KERN) slli t2, s2, PTE_PPN0_S /* << PTE_PPN0_S */ or t0, t0, t2 /* Store PTE entry to position */ li a6, PTE_SIZE li a5, 510 mulw a5, a5, a6 add t1, s1, a5 sd t0, (t1) /* Page tables END */ /* Setup supervisor trap vector */ lla t0, va sub t0, t0, s9 li t1, KERNBASE add t0, t0, t1 csrw stvec, t0 /* Set page tables base register */ lla s2, pagetable_l1 srli s2, s2, PAGE_SHIFT li t0, SATP_MODE_SV39 or s2, s2, t0 sfence.vma csrw satp, s2 .align 2 va: /* Set the global pointer again, this time with the virtual address. */ .option push .option norelax lla gp, __global_pointer$ .option pop /* Setup supervisor trap vector */ la t0, cpu_exception_handler csrw stvec, t0 /* Ensure sscratch is zero */ li t0, 0 csrw sscratch, t0 /* Initialize stack pointer */ la s3, initstack_end mv sp, s3 /* Allocate space for thread0 PCB and riscv_bootparams */ addi sp, sp, -(PCB_SIZE + RISCV_BOOTPARAMS_SIZE) & ~STACKALIGNBYTES /* Clear BSS */ la s0, _C_LABEL(__bss_start) la s1, _C_LABEL(_end) 1: sd zero, 0(s0) addi s0, s0, 8 bltu s0, s1, 1b #ifdef SMP /* Store boot hart id. */ la t0, boot_hart sw a0, 0(t0) #endif /* Fill riscv_bootparams */ la t0, pagetable_l1 sd t0, RISCV_BOOTPARAMS_KERN_L1PT(sp) sd s9, RISCV_BOOTPARAMS_KERN_PHYS(sp) la t0, initstack sd t0, RISCV_BOOTPARAMS_KERN_STACK(sp) li t0, (VM_MAX_KERNEL_ADDRESS - 2 * L2_SIZE) sd t0, RISCV_BOOTPARAMS_DTBP_VIRT(sp) sd a1, RISCV_BOOTPARAMS_DTBP_PHYS(sp) mv a0, sp call _C_LABEL(initriscv) /* Off we go */ call _C_LABEL(mi_startup) .align 4 initstack: .space (PAGE_SIZE * KSTACK_PAGES) initstack_end: ENTRY(sigcode) mv a0, sp addi a0, a0, SF_UC 1: li t0, SYS_sigreturn ecall /* sigreturn failed, exit */ li t0, SYS_exit ecall j 1b END(sigcode) /* This may be copied to the stack, keep it 16-byte aligned */ .align 3 esigcode: .data .align 3 .global szsigcode szsigcode: .quad esigcode - sigcode .align 12 pagetable_l1: .space PAGE_SIZE pagetable_l2: .space PAGE_SIZE pagetable_l2_devmap: .space PAGE_SIZE .align 3 virt_map: .quad virt_map hart_lottery: .space 4 .globl init_pt_va init_pt_va: .quad pagetable_l2 /* XXX: Keep page tables VA */ #ifndef SMP ENTRY(mpentry) 1: wfi j 1b END(mpentry) #else /* * mpentry(unsigned long) * * Called by a core when it is being brought online. */ ENTRY(mpentry) /* * Calculate the offset to __riscv_boot_ap * for the current core, cpuid is in a0. */ li t1, 4 mulw t1, t1, a0 /* Get the pointer */ lla t0, __riscv_boot_ap add t0, t0, t1 1: /* Wait the kernel to be ready */ lw t1, 0(t0) beqz t1, 1b /* Setup stack pointer */ - lla t0, secondary_stacks - li t1, (PAGE_SIZE * KSTACK_PAGES) - mulw t2, t1, a0 - add t0, t0, t2 - add t0, t0, t1 - sub t0, t0, s9 - li t1, KERNBASE - add sp, t0, t1 + lla t0, bootstack + ld sp, 0(t0) /* Setup supervisor trap vector */ lla t0, mpva sub t0, t0, s9 li t1, KERNBASE add t0, t0, t1 csrw stvec, t0 /* Set page tables base register */ lla s2, pagetable_l1 srli s2, s2, PAGE_SHIFT li t0, SATP_MODE_SV39 or s2, s2, t0 sfence.vma csrw satp, s2 .align 2 mpva: /* Set the global pointer again, this time with the virtual address. */ .option push .option norelax lla gp, __global_pointer$ .option pop /* Setup supervisor trap vector */ la t0, cpu_exception_handler csrw stvec, t0 /* Ensure sscratch is zero */ li t0, 0 csrw sscratch, t0 call init_secondary END(mpentry) #endif diff --git a/sys/riscv/riscv/mp_machdep.c b/sys/riscv/riscv/mp_machdep.c index 322121510b3e..113165192735 100644 --- a/sys/riscv/riscv/mp_machdep.c +++ b/sys/riscv/riscv/mp_machdep.c @@ -1,492 +1,530 @@ /*- * Copyright (c) 2015 The FreeBSD Foundation * Copyright (c) 2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_kstack_pages.h" #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #endif boolean_t ofw_cpu_reg(phandle_t node, u_int, cell_t *); uint32_t __riscv_boot_ap[MAXCPU]; static enum { CPUS_UNKNOWN, #ifdef FDT CPUS_FDT, #endif } cpu_enum_method; static device_identify_t riscv64_cpu_identify; static device_probe_t riscv64_cpu_probe; static device_attach_t riscv64_cpu_attach; static int ipi_handler(void *); -struct mtx ap_boot_mtx; struct pcb stoppcbs[MAXCPU]; extern uint32_t boot_hart; extern cpuset_t all_harts; #ifdef INVARIANTS static uint32_t cpu_reg[MAXCPU][2]; #endif static device_t cpu_list[MAXCPU]; -void mpentry(unsigned long cpuid); void init_secondary(uint64_t); -uint8_t secondary_stacks[MAXCPU][PAGE_SIZE * KSTACK_PAGES] __aligned(16); +static struct mtx ap_boot_mtx; + +/* Stacks for AP initialization, discarded once idle threads are started. */ +void *bootstack; +static void *bootstacks[MAXCPU]; + +/* Count of started APs, used to synchronize access to bootstack. */ +static volatile int aps_started; /* Set to 1 once we're ready to let the APs out of the pen. */ -volatile int aps_ready = 0; +static volatile int aps_ready; /* Temporary variables for init_secondary() */ void *dpcpu[MAXCPU - 1]; static device_method_t riscv64_cpu_methods[] = { /* Device interface */ DEVMETHOD(device_identify, riscv64_cpu_identify), DEVMETHOD(device_probe, riscv64_cpu_probe), DEVMETHOD(device_attach, riscv64_cpu_attach), DEVMETHOD_END }; static devclass_t riscv64_cpu_devclass; static driver_t riscv64_cpu_driver = { "riscv64_cpu", riscv64_cpu_methods, 0 }; DRIVER_MODULE(riscv64_cpu, cpu, riscv64_cpu_driver, riscv64_cpu_devclass, 0, 0); static void riscv64_cpu_identify(driver_t *driver, device_t parent) { if (device_find_child(parent, "riscv64_cpu", -1) != NULL) return; if (BUS_ADD_CHILD(parent, 0, "riscv64_cpu", -1) == NULL) device_printf(parent, "add child failed\n"); } static int riscv64_cpu_probe(device_t dev) { u_int cpuid; cpuid = device_get_unit(dev); if (cpuid >= MAXCPU || cpuid > mp_maxid) return (EINVAL); device_quiet(dev); return (0); } static int riscv64_cpu_attach(device_t dev) { const uint32_t *reg; size_t reg_size; u_int cpuid; int i; cpuid = device_get_unit(dev); if (cpuid >= MAXCPU || cpuid > mp_maxid) return (EINVAL); KASSERT(cpu_list[cpuid] == NULL, ("Already have cpu %u", cpuid)); reg = cpu_get_cpuid(dev, ®_size); if (reg == NULL) return (EINVAL); if (bootverbose) { device_printf(dev, "register <"); for (i = 0; i < reg_size; i++) printf("%s%x", (i == 0) ? "" : " ", reg[i]); printf(">\n"); } /* Set the device to start it later */ cpu_list[cpuid] = dev; return (0); } static void release_aps(void *dummy __unused) { cpuset_t mask; int i; if (mp_ncpus == 1) return; /* Setup the IPI handler */ riscv_setup_ipihandler(ipi_handler); atomic_store_rel_int(&aps_ready, 1); /* Wake up the other CPUs */ mask = all_harts; CPU_CLR(boot_hart, &mask); printf("Release APs\n"); sbi_send_ipi(mask.__bits); for (i = 0; i < 2000; i++) { if (smp_started) return; DELAY(1000); } printf("APs not started\n"); } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); void init_secondary(uint64_t hart) { struct pcpu *pcpup; u_int cpuid; /* Renumber this cpu */ cpuid = hart; if (cpuid < boot_hart) cpuid += mp_maxid + 1; cpuid -= boot_hart; /* Setup the pcpu pointer */ pcpup = &__pcpu[cpuid]; __asm __volatile("mv tp, %0" :: "r"(pcpup)); /* Workaround: make sure wfi doesn't halt the hart */ csr_set(sie, SIE_SSIE); csr_set(sip, SIE_SSIE); - /* Spin until the BSP releases the APs */ - while (!aps_ready) + /* Signal the BSP and spin until it has released all APs. */ + atomic_add_int(&aps_started, 1); + while (!atomic_load_int(&aps_ready)) __asm __volatile("wfi"); /* Initialize curthread */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); pcpup->pc_curthread = pcpup->pc_idlethread; - pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb; /* * Identify current CPU. This is necessary to setup * affinity registers and to provide support for * runtime chip identification. */ identify_cpu(); /* Enable software interrupts */ riscv_unmask_ipi(); #ifndef EARLY_AP_STARTUP /* Start per-CPU event timers. */ cpu_initclocks_ap(); #endif /* Enable external (PLIC) interrupts */ csr_set(sie, SIE_SEIE); /* Activate process 0's pmap. */ pmap_activate_boot(vmspace_pmap(proc0.p_vmspace)); mtx_lock_spin(&ap_boot_mtx); atomic_add_rel_32(&smp_cpus, 1); if (smp_cpus == mp_ncpus) { /* enable IPI's, tlb shootdown, freezes etc */ atomic_store_rel_int(&smp_started, 1); } mtx_unlock_spin(&ap_boot_mtx); + /* + * Assert that smp_after_idle_runnable condition is reasonable. + */ + MPASS(PCPU_GET(curpcb) == NULL); + /* Enter the scheduler */ sched_throw(NULL); panic("scheduler returned us to init_secondary"); /* NOTREACHED */ } +static void +smp_after_idle_runnable(void *arg __unused) +{ + struct pcpu *pc; + int cpu; + + for (cpu = 1; cpu < mp_ncpus; cpu++) { + if (bootstacks[cpu] != NULL) { + pc = pcpu_find(cpu); + while (atomic_load_ptr(&pc->pc_curpcb) == NULL) + cpu_spinwait(); + kmem_free((vm_offset_t)bootstacks[cpu], PAGE_SIZE); + } + } +} +SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY, + smp_after_idle_runnable, NULL); + static int ipi_handler(void *arg) { u_int ipi_bitmap; u_int cpu, ipi; int bit; sbi_clear_ipi(); cpu = PCPU_GET(cpuid); mb(); ipi_bitmap = atomic_readandclear_int(PCPU_PTR(pending_ipis)); if (ipi_bitmap == 0) return (FILTER_HANDLED); while ((bit = ffs(ipi_bitmap))) { bit = (bit - 1); ipi = (1 << bit); ipi_bitmap &= ~ipi; mb(); switch (ipi) { case IPI_AST: CTR0(KTR_SMP, "IPI_AST"); break; case IPI_PREEMPT: CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__); sched_preempt(curthread); break; case IPI_RENDEZVOUS: CTR0(KTR_SMP, "IPI_RENDEZVOUS"); smp_rendezvous_action(); break; case IPI_STOP: case IPI_STOP_HARD: CTR0(KTR_SMP, (ipi == IPI_STOP) ? "IPI_STOP" : "IPI_STOP_HARD"); savectx(&stoppcbs[cpu]); /* Indicate we are stopped */ CPU_SET_ATOMIC(cpu, &stopped_cpus); /* Wait for restart */ while (!CPU_ISSET(cpu, &started_cpus)) cpu_spinwait(); CPU_CLR_ATOMIC(cpu, &started_cpus); CPU_CLR_ATOMIC(cpu, &stopped_cpus); CTR0(KTR_SMP, "IPI_STOP (restart)"); /* * The kernel debugger might have set a breakpoint, * so flush the instruction cache. */ fence_i(); break; case IPI_HARDCLOCK: CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__); hardclockintr(); break; default: panic("Unknown IPI %#0x on cpu %d", ipi, curcpu); } } return (FILTER_HANDLED); } struct cpu_group * cpu_topo(void) { return (smp_topo_none()); } /* Determine if we running MP machine */ int cpu_mp_probe(void) { return (mp_ncpus > 1); } #ifdef FDT static boolean_t cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) { struct pcpu *pcpup; uint64_t hart; u_int cpuid; + int naps; /* Check if this hart supports MMU. */ if (OF_getproplen(node, "mmu-type") < 0) return (0); KASSERT(id < MAXCPU, ("Too many CPUs")); KASSERT(addr_size == 1 || addr_size == 2, ("Invalid register size")); #ifdef INVARIANTS cpu_reg[id][0] = reg[0]; if (addr_size == 2) cpu_reg[id][1] = reg[1]; #endif hart = reg[0]; if (addr_size == 2) { hart <<= 32; hart |= reg[1]; } KASSERT(hart < MAXCPU, ("Too many harts.")); /* We are already running on this cpu */ if (hart == boot_hart) return (1); /* * Rotate the CPU IDs to put the boot CPU as CPU 0. * We keep the other CPUs ordered. */ cpuid = hart; if (cpuid < boot_hart) cpuid += mp_maxid + 1; cpuid -= boot_hart; /* Check if we are able to start this cpu */ if (cpuid > mp_maxid) return (0); pcpup = &__pcpu[cpuid]; pcpu_init(pcpup, cpuid, sizeof(struct pcpu)); pcpup->pc_hart = hart; dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); dpcpu_init(dpcpu[cpuid - 1], cpuid); + bootstacks[cpuid] = (void *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO); + + naps = atomic_load_int(&aps_started); + bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE; + printf("Starting CPU %u (hart %lx)\n", cpuid, hart); - __riscv_boot_ap[hart] = 1; + atomic_store_32(&__riscv_boot_ap[hart], 1); + + /* Wait for the AP to switch to its boot stack. */ + while (atomic_load_int(&aps_started) < naps + 1) + cpu_spinwait(); CPU_SET(cpuid, &all_cpus); CPU_SET(hart, &all_harts); return (1); } #endif /* Initialize and fire up non-boot processors */ void cpu_mp_start(void) { mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); CPU_SET(0, &all_cpus); CPU_SET(boot_hart, &all_harts); switch(cpu_enum_method) { #ifdef FDT case CPUS_FDT: ofw_cpu_early_foreach(cpu_init_fdt, true); break; #endif case CPUS_UNKNOWN: break; } } /* Introduce rest of cores to the world */ void cpu_mp_announce(void) { } static boolean_t cpu_check_mmu(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) { /* Check if this hart supports MMU. */ if (OF_getproplen(node, "mmu-type") < 0) return (0); return (1); } void cpu_mp_setmaxid(void) { #ifdef FDT int cores; cores = ofw_cpu_early_foreach(cpu_check_mmu, true); if (cores > 0) { cores = MIN(cores, MAXCPU); if (bootverbose) printf("Found %d CPUs in the device tree\n", cores); mp_ncpus = cores; mp_maxid = cores - 1; cpu_enum_method = CPUS_FDT; return; } #endif if (bootverbose) printf("No CPU data, limiting to 1 core\n"); mp_ncpus = 1; mp_maxid = 0; }