Index: head/sys/arm/arm/cpuinfo.c
===================================================================
--- head/sys/arm/arm/cpuinfo.c	(revision 328466)
+++ head/sys/arm/arm/cpuinfo.c	(revision 328467)
@@ -1,341 +1,540 @@
 /*-
  * Copyright 2014 Svatopluk Kraus <onwahe@gmail.com>
  * Copyright 2014 Michal Meloun <meloun@miracle.cz>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
+#include <sys/pcpu.h>
+#include <sys/smp.h>
 #include <sys/sysctl.h>
 
 #include <machine/cpu.h>
 #include <machine/cpuinfo.h>
 #include <machine/elf.h>
 #include <machine/md_var.h>
 
 #if __ARM_ARCH >= 6
 void reinit_mmu(uint32_t ttb, uint32_t aux_clr, uint32_t aux_set);
+
+int disable_bp_hardening;
+int spectre_v2_safe = 1;
 #endif
 
 struct cpuinfo cpuinfo =
 {
 	/* Use safe defaults for start */
 	.dcache_line_size = 32,
 	.dcache_line_mask = 31,
 	.icache_line_size = 32,
 	.icache_line_mask = 31,
 };
 
 static SYSCTL_NODE(_hw, OID_AUTO, cpu, CTLFLAG_RD, 0,
     "CPU");
 static SYSCTL_NODE(_hw_cpu, OID_AUTO, quirks, CTLFLAG_RD, 0,
     "CPU quirks");
 
 /*
  * Tunable CPU quirks.
  * Be careful, ACTRL cannot be changed if CPU is started in secure
  * mode(world) and write to ACTRL can cause exception!
  * These quirks are intended for optimizing CPU performance, not for
  * applying errata workarounds. Nobody can expect that CPU with unfixed
  * errata is stable enough to execute the kernel until quirks are applied.
  */
 static uint32_t cpu_quirks_actlr_mask;
 SYSCTL_INT(_hw_cpu_quirks, OID_AUTO, actlr_mask,
     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &cpu_quirks_actlr_mask, 0,
     "Bits to be masked in ACTLR");
 
 static uint32_t cpu_quirks_actlr_set;
 SYSCTL_INT(_hw_cpu_quirks, OID_AUTO, actlr_set,
     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &cpu_quirks_actlr_set, 0,
     "Bits to be set in ACTLR");
 
 
 /* Read and parse CPU id scheme */
 void
 cpuinfo_init(void)
 {
 #if __ARM_ARCH >= 6
 	uint32_t tmp;
 #endif
 
 	/*
 	 * Prematurely fetch CPU quirks. Standard fetch for tunable
 	 * sysctls is handled using SYSINIT, thus too late for boot CPU.
 	 * Keep names in sync with sysctls.
 	 */
 	TUNABLE_INT_FETCH("hw.cpu.quirks.actlr_mask", &cpu_quirks_actlr_mask);
 	TUNABLE_INT_FETCH("hw.cpu.quirks.actlr_set", &cpu_quirks_actlr_set);
 
 	cpuinfo.midr = cp15_midr_get();
 	/* Test old version id schemes first */
 	if ((cpuinfo.midr & CPU_ID_IMPLEMENTOR_MASK) == CPU_ID_ARM_LTD) {
 		if (CPU_ID_ISOLD(cpuinfo.midr)) {
 			/* obsolete ARMv2 or ARMv3 CPU */
 			cpuinfo.midr = 0;
 			return;
 		}
 		if (CPU_ID_IS7(cpuinfo.midr)) {
 			if ((cpuinfo.midr & (1 << 23)) == 0) {
 				/* obsolete ARMv3 CPU */
 				cpuinfo.midr = 0;
 				return;
 			}
 			/* ARMv4T CPU */
 			cpuinfo.architecture = 1;
 			cpuinfo.revision = (cpuinfo.midr >> 16) & 0x7F;
 		} else {
 			/* ARM new id scheme */
 			cpuinfo.architecture = (cpuinfo.midr >> 16) & 0x0F;
 			cpuinfo.revision = (cpuinfo.midr >> 20) & 0x0F;
 		}
 	} else {
 		/* non ARM -> must be new id scheme */
 		cpuinfo.architecture = (cpuinfo.midr >> 16) & 0x0F;
 		cpuinfo.revision = (cpuinfo.midr >> 20) & 0x0F;
 	}
 	/* Parse rest of MIDR  */
 	cpuinfo.implementer = (cpuinfo.midr >> 24) & 0xFF;
 	cpuinfo.part_number = (cpuinfo.midr >> 4) & 0xFFF;
 	cpuinfo.patch = cpuinfo.midr & 0x0F;
 
 	/* CP15 c0,c0 regs 0-7 exist on all CPUs (although aliased with MIDR) */
 	cpuinfo.ctr = cp15_ctr_get();
 	cpuinfo.tcmtr = cp15_tcmtr_get();
 #if __ARM_ARCH >= 6
 	cpuinfo.tlbtr = cp15_tlbtr_get();
 	cpuinfo.mpidr = cp15_mpidr_get();
 	cpuinfo.revidr = cp15_revidr_get();
 #endif
 
 	/* if CPU is not v7 cpu id scheme */
 	if (cpuinfo.architecture != 0xF)
 		return;
 #if __ARM_ARCH >= 6
 	cpuinfo.id_pfr0 = cp15_id_pfr0_get();
 	cpuinfo.id_pfr1 = cp15_id_pfr1_get();
 	cpuinfo.id_dfr0 = cp15_id_dfr0_get();
 	cpuinfo.id_afr0 = cp15_id_afr0_get();
 	cpuinfo.id_mmfr0 = cp15_id_mmfr0_get();
 	cpuinfo.id_mmfr1 = cp15_id_mmfr1_get();
 	cpuinfo.id_mmfr2 = cp15_id_mmfr2_get();
 	cpuinfo.id_mmfr3 = cp15_id_mmfr3_get();
 	cpuinfo.id_isar0 = cp15_id_isar0_get();
 	cpuinfo.id_isar1 = cp15_id_isar1_get();
 	cpuinfo.id_isar2 = cp15_id_isar2_get();
 	cpuinfo.id_isar3 = cp15_id_isar3_get();
 	cpuinfo.id_isar4 = cp15_id_isar4_get();
 	cpuinfo.id_isar5 = cp15_id_isar5_get();
 
 /* Not yet - CBAR only exist on ARM SMP Cortex A CPUs
 	cpuinfo.cbar = cp15_cbar_get();
 */
 	if (CPU_CT_FORMAT(cpuinfo.ctr) == CPU_CT_ARMV7) {
 		cpuinfo.ccsidr = cp15_ccsidr_get();
 		cpuinfo.clidr = cp15_clidr_get();
 	}
 
 	/* Test if revidr is implemented */
 	if (cpuinfo.revidr == cpuinfo.midr)
 		cpuinfo.revidr = 0;
 
 	/* parsed bits of above registers */
 	/* id_mmfr0 */
 	cpuinfo.outermost_shareability =  (cpuinfo.id_mmfr0 >> 8) & 0xF;
 	cpuinfo.shareability_levels = (cpuinfo.id_mmfr0 >> 12) & 0xF;
 	cpuinfo.auxiliary_registers = (cpuinfo.id_mmfr0 >> 20) & 0xF;
 	cpuinfo.innermost_shareability = (cpuinfo.id_mmfr0 >> 28) & 0xF;
 	/* id_mmfr2 */
 	cpuinfo.mem_barrier = (cpuinfo.id_mmfr2 >> 20) & 0xF;
 	/* id_mmfr3 */
 	cpuinfo.coherent_walk = (cpuinfo.id_mmfr3 >> 20) & 0xF;
 	cpuinfo.maintenance_broadcast =(cpuinfo.id_mmfr3 >> 12) & 0xF;
 	/* id_pfr1 */
 	cpuinfo.generic_timer_ext = (cpuinfo.id_pfr1 >> 16) & 0xF;
 	cpuinfo.virtualization_ext = (cpuinfo.id_pfr1 >> 12) & 0xF;
 	cpuinfo.security_ext = (cpuinfo.id_pfr1 >> 4) & 0xF;
 	/* mpidr */
 	cpuinfo.mp_ext = (cpuinfo.mpidr >> 31u) & 0x1;
 
 	/* L1 Cache sizes */
 	if (CPU_CT_FORMAT(cpuinfo.ctr) == CPU_CT_ARMV7) {
 		cpuinfo.dcache_line_size =
 		    1 << (CPU_CT_DMINLINE(cpuinfo.ctr) + 2);
 		cpuinfo.icache_line_size =
 		    1 << (CPU_CT_IMINLINE(cpuinfo.ctr) + 2);
 	} else {
 		cpuinfo.dcache_line_size =
 		    1 << (CPU_CT_xSIZE_LEN(CPU_CT_DSIZE(cpuinfo.ctr)) + 3);
 		cpuinfo.icache_line_size =
 		    1 << (CPU_CT_xSIZE_LEN(CPU_CT_ISIZE(cpuinfo.ctr)) + 3);
 	}
 	cpuinfo.dcache_line_mask = cpuinfo.dcache_line_size - 1;
 	cpuinfo.icache_line_mask = cpuinfo.icache_line_size - 1;
 
 	/* Fill AT_HWCAP bits. */
 	elf_hwcap |= HWCAP_HALF | HWCAP_FAST_MULT; /* Required for all CPUs */
 	elf_hwcap |= HWCAP_TLS | HWCAP_EDSP;	   /* Required for v6+ CPUs */
 
 	tmp = (cpuinfo.id_isar0 >> 24) & 0xF;	/* Divide_instrs */
 	if (tmp >= 1)
 		elf_hwcap |= HWCAP_IDIVT;
 	if (tmp >= 2)
 		elf_hwcap |= HWCAP_IDIVA;
 
 	tmp = (cpuinfo.id_pfr0 >> 4) & 0xF; 	/* State1  */
 	if (tmp >= 1)
 		elf_hwcap |= HWCAP_THUMB;
 
 	tmp = (cpuinfo.id_pfr0 >> 12) & 0xF; 	/* State3  */
 	if (tmp >= 1)
 		elf_hwcap |= HWCAP_THUMBEE;
 
 	tmp = (cpuinfo.id_mmfr0 >> 0) & 0xF; 	/* VMSA */
 	if (tmp >= 5)
 		elf_hwcap |= HWCAP_LPAE;
 
 	/* Fill AT_HWCAP2 bits. */
 	tmp = (cpuinfo.id_isar5 >> 4) & 0xF;	/* AES */
 	if (tmp >= 1)
 		elf_hwcap2 |= HWCAP2_AES;
 	if (tmp >= 2)
 		elf_hwcap2 |= HWCAP2_PMULL;
 
 	tmp = (cpuinfo.id_isar5 >> 8) & 0xF;	/* SHA1 */
 	if (tmp >= 1)
 		elf_hwcap2 |= HWCAP2_SHA1;
 
 	tmp = (cpuinfo.id_isar5 >> 12) & 0xF;	/* SHA2 */
 	if (tmp >= 1)
 		elf_hwcap2 |= HWCAP2_SHA2;
 
 	tmp = (cpuinfo.id_isar5 >> 16) & 0xF;	/* CRC32 */
 	if (tmp >= 1)
 		elf_hwcap2 |= HWCAP2_CRC32;
 #endif
 }
 
 #if __ARM_ARCH >= 6
 /*
  * Get bits that must be set or cleared in ACLR register.
  * Note: Bits in ACLR register are IMPLEMENTATION DEFINED.
  * Its expected that SCU is in operational state before this
  * function is called.
  */
 static void
 cpuinfo_get_actlr_modifier(uint32_t *actlr_mask, uint32_t *actlr_set)
 {
 
 	*actlr_mask = 0;
 	*actlr_set = 0;
 
 	if (cpuinfo.implementer == CPU_IMPLEMENTER_ARM) {
 		switch (cpuinfo.part_number) {
+		case CPU_ARCH_CORTEX_A75:
 		case CPU_ARCH_CORTEX_A73:
 		case CPU_ARCH_CORTEX_A72:
 		case CPU_ARCH_CORTEX_A57:
 		case CPU_ARCH_CORTEX_A53:
 			/* Nothing to do for AArch32 */
 			break;
 		case CPU_ARCH_CORTEX_A17:
 		case CPU_ARCH_CORTEX_A12: /* A12 is merged to A17 */
 			/*
 			 * Enable SMP mode
 			 */
 			*actlr_mask = (1 << 6);
 			*actlr_set = (1 << 6);
 			break;
 		case CPU_ARCH_CORTEX_A15:
 			/*
 			 * Enable snoop-delayed exclusive handling
 			 * Enable SMP mode
 			 */
 			*actlr_mask = (1U << 31) |(1 << 6);
 			*actlr_set = (1U << 31) |(1 << 6);
 			break;
 		case CPU_ARCH_CORTEX_A9:
 			/*
 			 * Disable exclusive L1/L2 cache control
 			 * Enable SMP mode
 			 * Enable Cache and TLB maintenance broadcast
 			 */
 			*actlr_mask = (1 << 7) | (1 << 6) | (1 << 0);
 			*actlr_set = (1 << 6) | (1 << 0);
 			break;
 		case CPU_ARCH_CORTEX_A8:
 			/*
 			 * Enable L2 cache
 			 * Enable L1 data cache hardware alias checks
 			 */
 			*actlr_mask = (1 << 1) | (1 << 0);
 			*actlr_set = (1 << 1);
 			break;
 		case CPU_ARCH_CORTEX_A7:
 			/*
 			 * Enable SMP mode
 			 */
 			*actlr_mask = (1 << 6);
 			*actlr_set = (1 << 6);
 			break;
 		case CPU_ARCH_CORTEX_A5:
 			/*
 			 * Disable exclusive L1/L2 cache control
 			 * Enable SMP mode
 			 * Enable Cache and TLB maintenance broadcast
 			 */
 			*actlr_mask = (1 << 7) | (1 << 6) | (1 << 0);
 			*actlr_set = (1 << 6) | (1 << 0);
 			break;
 		case CPU_ARCH_ARM1176:
 			/*
 			 * Restrict cache size to 16KB
 			 * Enable the return stack
 			 * Enable dynamic branch prediction
 			 * Enable static branch prediction
 			 */
 			*actlr_mask = (1 << 6) | (1 << 2) | (1 << 1) | (1 << 0);
 			*actlr_set = (1 << 6) | (1 << 2) | (1 << 1) | (1 << 0);
 			break;
 		}
 		return;
 	}
 }
 
 /* Reinitialize MMU to final kernel mapping and apply all CPU quirks. */
 void
 cpuinfo_reinit_mmu(uint32_t ttb)
 {
 	uint32_t actlr_mask;
 	uint32_t actlr_set;
 
 	cpuinfo_get_actlr_modifier(&actlr_mask, &actlr_set);
 	actlr_mask |= cpu_quirks_actlr_mask;
 	actlr_set |= cpu_quirks_actlr_set;
 	reinit_mmu(ttb, actlr_mask, actlr_set);
 }
+
+static bool
+modify_actlr(uint32_t clear, uint32_t set)
+{
+	uint32_t reg, newreg;
+
+	reg = cp15_actlr_get();
+	newreg = reg;
+	newreg &= ~clear;
+	newreg |= set;
+	if (reg == newreg)
+		return (true);
+	cp15_actlr_set(newreg);
+
+	reg = cp15_actlr_get();
+	if (reg == newreg)
+		return (true);
+	return (false);
+}
+
+/* Apply/restore BP hardening on current core. */
+static int
+apply_bp_hardening(bool enable, int kind, bool actrl, uint32_t set_mask)
+{
+	if (enable) {
+		if (actrl && !modify_actlr(0, set_mask))
+			return (-1);
+		PCPU_SET(bp_harden_kind, kind);
+	} else {
+		PCPU_SET(bp_harden_kind, PCPU_BP_HARDEN_KIND_NONE);
+		if (actrl)
+			modify_actlr(~0, PCPU_GET(original_actlr));
+		spectre_v2_safe = 0;
+	}
+	return (0);
+}
+
+static void
+handle_bp_hardening(bool enable)
+{
+	int kind;
+	char *kind_str;
+
+	kind = PCPU_BP_HARDEN_KIND_NONE;
+	/*
+	 * Note: Access to ACTRL is locked to secure world on most boards.
+	 * This means that full BP hardening depends on updated u-boot/firmware
+	 * or is impossible at all (if secure monitor is in on-chip ROM).
+	 */
+	if (cpuinfo.implementer == CPU_IMPLEMENTER_ARM) {
+		switch (cpuinfo.part_number) {
+		case CPU_ARCH_CORTEX_A8:
+			/*
+			 * For Cortex-A8, IBE bit must be set otherwise
+			 * BPIALL is effectively NOP.
+			 * Unfortunately, Cortex-A is also affected by
+			 * ARM erratum 687067 which causes non-working
+			 * BPIALL if IBE bit is set and 'Instruction L1 System
+			 * Array Debug Register 0' is not 0.
+			 * This register is not reset on power-up and is
+			 * accessible only from secure world, so we cannot do
+			 * nothing (nor detect) to fix this issue.
+			 * I afraid that on chip ROM based secure monitor on
+			 * AM335x (BeagleBone) doesn't reset this debug
+			 * register.
+			 */
+			kind = PCPU_BP_HARDEN_KIND_BPIALL;
+			if (apply_bp_hardening(enable, kind, true, 1 << 6) != 0)
+				goto actlr_err;
+			break;
+		break;
+
+		case CPU_ARCH_CORTEX_A9:
+		case CPU_ARCH_CORTEX_A12:
+		case CPU_ARCH_CORTEX_A17:
+		case CPU_ARCH_CORTEX_A57:
+		case CPU_ARCH_CORTEX_A72:
+		case CPU_ARCH_CORTEX_A73:
+		case CPU_ARCH_CORTEX_A75:
+			kind = PCPU_BP_HARDEN_KIND_BPIALL;
+			if (apply_bp_hardening(enable, kind, false, 0) != 0)
+				goto actlr_err;
+			break;
+
+		case CPU_ARCH_CORTEX_A15:
+			/*
+			 * For Cortex-A15, set 'Enable invalidates of BTB' bit.
+			 * Despite this, the BPIALL is still effectively NOP,
+			 * but with this bit set, the ICIALLU also flushes
+			 * branch predictor as side effect.
+			 */
+			kind = PCPU_BP_HARDEN_KIND_ICIALLU;
+			if (apply_bp_hardening(enable, kind, true, 1 << 0) != 0)
+				goto actlr_err;
+			break;
+
+		default:
+			break;
+		}
+	} else if (cpuinfo.implementer == CPU_IMPLEMENTER_QCOM) {
+		printf("!!!WARNING!!! CPU(%d) is vulnerable to speculative "
+		    "branch attacks. !!!\n"
+		    "Qualcomm Krait cores are known (or believed) to be "
+		    "vulnerable to \n"
+		    "speculative branch attacks, no mitigation exists yet.\n",
+		    PCPU_GET(cpuid));
+		goto unkonown_mitigation;
+	}  else {
+		goto unkonown_mitigation;
+	}
+
+	if (bootverbose) {
+		switch (kind) {
+		case PCPU_BP_HARDEN_KIND_NONE:
+			kind_str = "not necessary";
+			break;
+		case PCPU_BP_HARDEN_KIND_BPIALL:
+			kind_str = "BPIALL";
+			break;
+		case PCPU_BP_HARDEN_KIND_ICIALLU:
+			kind_str = "ICIALLU";
+			break;
+		default:
+			panic("Unknown BP hardering kind (%d).", kind);
+		}
+		printf("CPU(%d) applied BP hardening: %s\n", PCPU_GET(cpuid),
+		    kind_str);
+	}
+
+	return;
+
+unkonown_mitigation:
+	PCPU_SET(bp_harden_kind, PCPU_BP_HARDEN_KIND_NONE);
+	spectre_v2_safe = 0;
+	return;
+
+actlr_err:
+	PCPU_SET(bp_harden_kind, PCPU_BP_HARDEN_KIND_NONE);
+	spectre_v2_safe = 0;
+	printf("!!!WARNING!!! CPU(%d) is vulnerable to speculative branch "
+	    "attacks. !!!\n"
+	    "We cannot enable required bit(s) in ACTRL register\n"
+	    "because it's locked by secure monitor and/or firmware.\n",
+	    PCPU_GET(cpuid));
+}
+
+void
+cpuinfo_init_bp_hardening(void)
+{
+
+	/*
+	 * Store original unmodified ACTRL, so we can restore it when
+	 * BP hardening is disabled by sysctl.
+	 */
+	PCPU_SET(original_actlr, cp15_actlr_get());
+	handle_bp_hardening(true);
+}
+
+static void
+bp_hardening_action(void *arg)
+{
+
+	handle_bp_hardening(disable_bp_hardening == 0);
+}
+
+static int
+sysctl_disable_bp_hardening(SYSCTL_HANDLER_ARGS)
+{
+	int rv;
+
+	rv = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
+
+	if (!rv && req->newptr) {
+		spectre_v2_safe = 1;
+		dmb();
+#ifdef SMP
+		smp_rendezvous_cpus(all_cpus, smp_no_rendezvous_barrier,
+		bp_hardening_action, NULL, NULL);
+#else
+		bp_hardening_action(NULL);
+#endif
+	}
+
+	return (rv);
+}
+
+SYSCTL_PROC(_machdep, OID_AUTO, disable_bp_hardening,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+    &disable_bp_hardening, 0, sysctl_disable_bp_hardening, "I",
+    "Disable BP hardening mitigation.");
+
+SYSCTL_INT(_machdep, OID_AUTO, spectre_v2_safe, CTLFLAG_RD,
+    &spectre_v2_safe, 0, "System is safe to Spectre Version 2 attacks");
 
 #endif /* __ARM_ARCH >= 6 */
Index: head/sys/arm/arm/genassym.c
===================================================================
--- head/sys/arm/arm/genassym.c	(revision 328466)
+++ head/sys/arm/arm/genassym.c	(revision 328467)
@@ -1,174 +1,179 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2004 Olivier Houchard
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/cpuset.h>
 #include <sys/systm.h>
 #include <sys/assym.h>
+#include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/mbuf.h>
 #include <sys/vmmeter.h>
 #include <sys/bus.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 
 #include <machine/armreg.h>
 #include <machine/frame.h>
 #include <machine/pcb.h>
 #include <machine/cpu.h>
 #include <machine/proc.h>
 #include <machine/cpufunc.h>
 #include <machine/cpuinfo.h>
 #include <machine/intr.h>
 #include <machine/sysarch.h>
 #include <machine/vmparam.h>	/* For KERNVIRTADDR */
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/ip_var.h>
 
 ASSYM(KERNBASE, KERNBASE);
 ASSYM(KERNVIRTADDR, KERNVIRTADDR);
 #if __ARM_ARCH >= 6
 ASSYM(CPU_ASID_KERNEL,CPU_ASID_KERNEL);
 #endif
 ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
 #if __ARM_ARCH < 6
 ASSYM(PCB_DACR, offsetof(struct pcb, pcb_dacr));
 #endif
 ASSYM(PCB_PAGEDIR, offsetof(struct pcb, pcb_pagedir));
 #if __ARM_ARCH < 6
 ASSYM(PCB_L1VEC, offsetof(struct pcb, pcb_l1vec));
 ASSYM(PCB_PL1VEC, offsetof(struct pcb, pcb_pl1vec));
 #endif
 ASSYM(PCB_R4, offsetof(struct pcb, pcb_regs.sf_r4));
 ASSYM(PCB_R5, offsetof(struct pcb, pcb_regs.sf_r5));
 ASSYM(PCB_R6, offsetof(struct pcb, pcb_regs.sf_r6));
 ASSYM(PCB_R7, offsetof(struct pcb, pcb_regs.sf_r7));
 ASSYM(PCB_R8, offsetof(struct pcb, pcb_regs.sf_r8));
 ASSYM(PCB_R9, offsetof(struct pcb, pcb_regs.sf_r9));
 ASSYM(PCB_R10, offsetof(struct pcb, pcb_regs.sf_r10));
 ASSYM(PCB_R11, offsetof(struct pcb, pcb_regs.sf_r11));
 ASSYM(PCB_R12, offsetof(struct pcb, pcb_regs.sf_r12));
 ASSYM(PCB_SP, offsetof(struct pcb, pcb_regs.sf_sp));
 ASSYM(PCB_LR, offsetof(struct pcb, pcb_regs.sf_lr));
 ASSYM(PCB_PC, offsetof(struct pcb, pcb_regs.sf_pc));
 #if __ARM_ARCH >= 6
 ASSYM(PCB_TPIDRURW, offsetof(struct pcb, pcb_regs.sf_tpidrurw));
 #endif
 
 ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
 ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
 ASSYM(M_LEN, offsetof(struct mbuf, m_len));
 ASSYM(M_DATA, offsetof(struct mbuf, m_data));
 ASSYM(M_NEXT, offsetof(struct mbuf, m_next));
 ASSYM(IP_SRC, offsetof(struct ip, ip_src));
 ASSYM(IP_DST, offsetof(struct ip, ip_dst));
 #if __ARM_ARCH < 6
 ASSYM(CF_CONTEXT_SWITCH, offsetof(struct cpu_functions, cf_context_switch));
 ASSYM(CF_DCACHE_WB_RANGE, offsetof(struct cpu_functions, cf_dcache_wb_range));
 ASSYM(CF_IDCACHE_WBINV_ALL, offsetof(struct cpu_functions, cf_idcache_wbinv_all));
 ASSYM(CF_L2CACHE_WBINV_ALL, offsetof(struct cpu_functions, cf_l2cache_wbinv_all));
 ASSYM(CF_TLB_FLUSHID_SE, offsetof(struct cpu_functions, cf_tlb_flushID_SE));
 #endif
 
 ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
 ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
 ASSYM(TD_PROC, offsetof(struct thread, td_proc));
 ASSYM(TD_MD, offsetof(struct thread, td_md));
 ASSYM(TD_LOCK, offsetof(struct thread, td_lock));
 #if __ARM_ARCH < 6
 ASSYM(MD_TP, offsetof(struct mdthread, md_tp));
 ASSYM(MD_RAS_START, offsetof(struct mdthread, md_ras_start));
 ASSYM(MD_RAS_END, offsetof(struct mdthread, md_ras_end));
 #endif
 
 ASSYM(TF_SPSR, offsetof(struct trapframe, tf_spsr));
 ASSYM(TF_R0, offsetof(struct trapframe, tf_r0));
 ASSYM(TF_R1, offsetof(struct trapframe, tf_r1));
 ASSYM(TF_PC, offsetof(struct trapframe, tf_pc));
 ASSYM(P_PID, offsetof(struct proc, p_pid));
 ASSYM(P_FLAG, offsetof(struct proc, p_flag));
 
 ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc));
 
 #if __ARM_ARCH < 6
 ASSYM(ARM_TP_ADDRESS, ARM_TP_ADDRESS);
 ASSYM(ARM_RAS_START, ARM_RAS_START);
 ASSYM(ARM_RAS_END, ARM_RAS_END);
 #endif
 
 #ifdef VFP
 ASSYM(PCB_VFPSTATE, offsetof(struct pcb, pcb_vfpstate));
 #endif
 
 #if __ARM_ARCH >= 6
 ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
+ASSYM(PC_BP_HARDEN_KIND, offsetof(struct pcpu, pc_bp_harden_kind));
+ASSYM(PCPU_BP_HARDEN_KIND_NONE, PCPU_BP_HARDEN_KIND_NONE);
+ASSYM(PCPU_BP_HARDEN_KIND_BPIALL, PCPU_BP_HARDEN_KIND_BPIALL);
+ASSYM(PCPU_BP_HARDEN_KIND_ICIALLU, PCPU_BP_HARDEN_KIND_ICIALLU);
 #endif
 
 ASSYM(PAGE_SIZE, PAGE_SIZE);
 #if __ARM_ARCH < 6
 ASSYM(PMAP_DOMAIN_KERNEL, PMAP_DOMAIN_KERNEL);
 #endif
 #ifdef PMAP_INCLUDE_PTE_SYNC
 ASSYM(PMAP_INCLUDE_PTE_SYNC, 1);
 #endif
 ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
 ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
 
 ASSYM(MAXCOMLEN, MAXCOMLEN);
 ASSYM(MAXCPU, MAXCPU);
 ASSYM(_NCPUWORDS, _NCPUWORDS);
 ASSYM(NIRQ, NIRQ);
 ASSYM(PCPU_SIZE, sizeof(struct pcpu));
 ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
 ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
 ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
 ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
 ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
 
 ASSYM(DCACHE_LINE_SIZE, offsetof(struct cpuinfo, dcache_line_size));
 ASSYM(DCACHE_LINE_MASK, offsetof(struct cpuinfo, dcache_line_mask));
 ASSYM(ICACHE_LINE_SIZE, offsetof(struct cpuinfo, icache_line_size));
 ASSYM(ICACHE_LINE_MASK, offsetof(struct cpuinfo, icache_line_mask));
 
 /*
  * Emit the LOCORE_MAP_MB option as a #define only if the option was set.
  */
 #include "opt_locore.h"
 
 #ifdef LOCORE_MAP_MB
 ASSYM(LOCORE_MAP_MB, LOCORE_MAP_MB);
 #endif
Index: head/sys/arm/arm/machdep.c
===================================================================
--- head/sys/arm/arm/machdep.c	(revision 328466)
+++ head/sys/arm/arm/machdep.c	(revision 328467)
@@ -1,1272 +1,1274 @@
 /*	$NetBSD: arm32_machdep.c,v 1.44 2004/03/24 15:34:47 atatat Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 2004 Olivier Houchard
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Mark Brinicombe
  *	for the NetBSD Project.
  * 4. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Machine dependent functions for kernel setup
  *
  * Created      : 17/09/94
  * Updated	: 18/04/01 updated for new wscons
  */
 
 #include "opt_compat.h"
 #include "opt_ddb.h"
 #include "opt_kstack_pages.h"
 #include "opt_platform.h"
 #include "opt_sched.h"
 #include "opt_timer.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/devmap.h>
 #include <sys/efi.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/linker.h>
 #include <sys/msgbuf.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 
 #include <machine/debug_monitor.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/pcb.h>
 #include <machine/physmem.h>
 #include <machine/platform.h>
 #include <machine/sysarch.h>
 #include <machine/undefined.h>
 #include <machine/vfp.h>
 #include <machine/vmparam.h>
 
 #ifdef FDT
 #include <dev/fdt/fdt_common.h>
 #include <machine/ofw_machdep.h>
 #endif
 
 #ifdef DEBUG
 #define	debugf(fmt, args...) printf(fmt, ##args)
 #else
 #define	debugf(fmt, args...)
 #endif
 
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) || \
     defined(COMPAT_FREEBSD9)
 #error FreeBSD/arm doesn't provide compatibility with releases prior to 10
 #endif
 
 #if __ARM_ARCH >= 6 && !defined(INTRNG)
 #error armv6 requires INTRNG
 #endif
 
 struct pcpu __pcpu[MAXCPU];
 struct pcpu *pcpup = &__pcpu[0];
 
 static struct trapframe proc0_tf;
 uint32_t cpu_reset_address = 0;
 int cold = 1;
 vm_offset_t vector_page;
 
 int (*_arm_memcpy)(void *, void *, int, int) = NULL;
 int (*_arm_bzero)(void *, int, int) = NULL;
 int _min_memcpy_size = 0;
 int _min_bzero_size = 0;
 
 extern int *end;
 
 #ifdef FDT
 vm_paddr_t pmap_pa;
 #if __ARM_ARCH >= 6
 vm_offset_t systempage;
 vm_offset_t irqstack;
 vm_offset_t undstack;
 vm_offset_t abtstack;
 #else
 /*
  * This is the number of L2 page tables required for covering max
  * (hypothetical) memsize of 4GB and all kernel mappings (vectors, msgbuf,
  * stacks etc.), uprounded to be divisible by 4.
  */
 #define KERNEL_PT_MAX	78
 static struct pv_addr kernel_pt_table[KERNEL_PT_MAX];
 struct pv_addr systempage;
 static struct pv_addr msgbufpv;
 struct pv_addr irqstack;
 struct pv_addr undstack;
 struct pv_addr abtstack;
 static struct pv_addr kernelstack;
 #endif /* __ARM_ARCH >= 6 */
 #endif /* FDT */
 
 #ifdef PLATFORM
 static delay_func *delay_impl;
 static void *delay_arg;
 #endif
 
 struct kva_md_info kmi;
 
 /*
  * arm32_vector_init:
  *
  *	Initialize the vector page, and select whether or not to
  *	relocate the vectors.
  *
  *	NOTE: We expect the vector page to be mapped at its expected
  *	destination.
  */
 
 extern unsigned int page0[], page0_data[];
 void
 arm_vector_init(vm_offset_t va, int which)
 {
 	unsigned int *vectors = (int *) va;
 	unsigned int *vectors_data = vectors + (page0_data - page0);
 	int vec;
 
 	/*
 	 * Loop through the vectors we're taking over, and copy the
 	 * vector's insn and data word.
 	 */
 	for (vec = 0; vec < ARM_NVEC; vec++) {
 		if ((which & (1 << vec)) == 0) {
 			/* Don't want to take over this vector. */
 			continue;
 		}
 		vectors[vec] = page0[vec];
 		vectors_data[vec] = page0_data[vec];
 	}
 
 	/* Now sync the vectors. */
 	icache_sync(va, (ARM_NVEC * 2) * sizeof(u_int));
 
 	vector_page = va;
 #if __ARM_ARCH < 6
 	if (va == ARM_VECTORS_HIGH) {
 		/*
 		 * Enable high vectors in the system control reg (SCTLR).
 		 *
 		 * Assume the MD caller knows what it's doing here, and really
 		 * does want the vector page relocated.
 		 *
 		 * Note: This has to be done here (and not just in
 		 * cpu_setup()) because the vector page needs to be
 		 * accessible *before* cpu_startup() is called.
 		 * Think ddb(9) ...
 		 */
 		cpu_control(CPU_CONTROL_VECRELOC, CPU_CONTROL_VECRELOC);
 	}
 #endif
 }
 
 static void
 cpu_startup(void *dummy)
 {
 	struct pcb *pcb = thread0.td_pcb;
 	const unsigned int mbyte = 1024 * 1024;
 #if __ARM_ARCH < 6 && !defined(ARM_CACHE_LOCK_ENABLE)
 	vm_page_t m;
 #endif
 
 	identify_arm_cpu();
 
 	vm_ksubmap_init(&kmi);
 
 	/*
 	 * Display the RAM layout.
 	 */
 	printf("real memory  = %ju (%ju MB)\n",
 	    (uintmax_t)arm32_ptob(realmem),
 	    (uintmax_t)arm32_ptob(realmem) / mbyte);
 	printf("avail memory = %ju (%ju MB)\n",
 	    (uintmax_t)arm32_ptob(vm_cnt.v_free_count),
 	    (uintmax_t)arm32_ptob(vm_cnt.v_free_count) / mbyte);
 	if (bootverbose) {
 		arm_physmem_print_tables();
 		devmap_print_table();
 	}
 
 	bufinit();
 	vm_pager_bufferinit();
 	pcb->pcb_regs.sf_sp = (u_int)thread0.td_kstack +
 	    USPACE_SVC_STACK_TOP;
 	pmap_set_pcb_pagedir(kernel_pmap, pcb);
 #if __ARM_ARCH < 6
 	vector_page_setprot(VM_PROT_READ);
 	pmap_postinit();
 #ifdef ARM_CACHE_LOCK_ENABLE
 	pmap_kenter_user(ARM_TP_ADDRESS, ARM_TP_ADDRESS);
 	arm_lock_cache_line(ARM_TP_ADDRESS);
 #else
 	m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_ZERO);
 	pmap_kenter_user(ARM_TP_ADDRESS, VM_PAGE_TO_PHYS(m));
 #endif
 	*(uint32_t *)ARM_RAS_START = 0;
 	*(uint32_t *)ARM_RAS_END = 0xffffffff;
 #endif
 }
 
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 /*
  * Flush the D-cache for non-DMA I/O so that the I-cache can
  * be made coherent later.
  */
 void
 cpu_flush_dcache(void *ptr, size_t len)
 {
 
 	dcache_wb_poc((vm_offset_t)ptr, (vm_paddr_t)vtophys(ptr), len);
 }
 
 /* Get current clock frequency for the given cpu id. */
 int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
 
 	return (ENXIO);
 }
 
 void
 cpu_idle(int busy)
 {
 
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu);
 	spinlock_enter();
 #ifndef NO_EVENTTIMERS
 	if (!busy)
 		cpu_idleclock();
 #endif
 	if (!sched_runnable())
 		cpu_sleep(0);
 #ifndef NO_EVENTTIMERS
 	if (!busy)
 		cpu_activeclock();
 #endif
 	spinlock_exit();
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", busy, curcpu);
 }
 
 int
 cpu_idle_wakeup(int cpu)
 {
 
 	return (0);
 }
 
 #ifdef NO_EVENTTIMERS
 /*
  * Most ARM platforms don't need to do anything special to init their clocks
  * (they get intialized during normal device attachment), and by not defining a
  * cpu_initclocks() function they get this generic one.  Any platform that needs
  * to do something special can just provide their own implementation, which will
  * override this one due to the weak linkage.
  */
 void
 arm_generic_initclocks(void)
 {
 }
 __weak_reference(arm_generic_initclocks, cpu_initclocks);
 
 #else
 void
 cpu_initclocks(void)
 {
 
 #ifdef SMP
 	if (PCPU_GET(cpuid) == 0)
 		cpu_initclocks_bsp();
 	else
 		cpu_initclocks_ap();
 #else
 	cpu_initclocks_bsp();
 #endif
 }
 #endif
 
 #ifdef PLATFORM
 void
 arm_set_delay(delay_func *impl, void *arg)
 {
 
 	KASSERT(impl != NULL, ("No DELAY implementation"));
 	delay_impl = impl;
 	delay_arg = arg;
 }
 
 void
 DELAY(int usec)
 {
 
 	TSENTER();
 	delay_impl(usec, delay_arg);
 	TSEXIT();
 }
 #endif
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 }
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t cspr;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		cspr = disable_interrupts(PSR_I | PSR_F);
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_cspr = cspr;
 	} else
 		td->td_md.md_spinlock_count++;
 	critical_enter();
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t cspr;
 
 	td = curthread;
 	critical_exit();
 	cspr = td->td_md.md_saved_cspr;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0)
 		restore_interrupts(cspr);
 }
 
 /*
  * Clear registers on exec
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
 {
 	struct trapframe *tf = td->td_frame;
 
 	memset(tf, 0, sizeof(*tf));
 	tf->tf_usr_sp = stack;
 	tf->tf_usr_lr = imgp->entry_addr;
 	tf->tf_svc_lr = 0x77777777;
 	tf->tf_pc = imgp->entry_addr;
 	tf->tf_spsr = PSR_USR32_MODE;
 }
 
 
 #ifdef VFP
 /*
  * Get machine VFP context.
  */
 void
 get_vfpcontext(struct thread *td, mcontext_vfp_t *vfp)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	if (td == curthread) {
 		critical_enter();
 		vfp_store(&pcb->pcb_vfpstate, false);
 		critical_exit();
 	} else
 		MPASS(TD_IS_SUSPENDED(td));
 	memcpy(vfp->mcv_reg, pcb->pcb_vfpstate.reg,
 	    sizeof(vfp->mcv_reg));
 	vfp->mcv_fpscr = pcb->pcb_vfpstate.fpscr;
 }
 
 /*
  * Set machine VFP context.
  */
 void
 set_vfpcontext(struct thread *td, mcontext_vfp_t *vfp)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	if (td == curthread) {
 		critical_enter();
 		vfp_discard(td);
 		critical_exit();
 	} else
 		MPASS(TD_IS_SUSPENDED(td));
 	memcpy(pcb->pcb_vfpstate.reg, vfp->mcv_reg,
 	    sizeof(pcb->pcb_vfpstate.reg));
 	pcb->pcb_vfpstate.fpscr = vfp->mcv_fpscr;
 }
 #endif
 
 int
 arm_get_vfpstate(struct thread *td, void *args)
 {
 	int rv;
 	struct arm_get_vfpstate_args ua;
 	mcontext_vfp_t	mcontext_vfp;
 
 	rv = copyin(args, &ua, sizeof(ua));
 	if (rv != 0)
 		return (rv);
 	if (ua.mc_vfp_size != sizeof(mcontext_vfp_t))
 		return (EINVAL);
 #ifdef VFP
 	get_vfpcontext(td, &mcontext_vfp);
 #else
 	bzero(&mcontext_vfp, sizeof(mcontext_vfp));
 #endif
 
 	rv = copyout(&mcontext_vfp, ua.mc_vfp,  sizeof(mcontext_vfp));
 	if (rv != 0)
 		return (rv);
 	return (0);
 }
 
 /*
  * Get machine context.
  */
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret)
 {
 	struct trapframe *tf = td->td_frame;
 	__greg_t *gr = mcp->__gregs;
 
 	if (clear_ret & GET_MC_CLEAR_RET) {
 		gr[_REG_R0] = 0;
 		gr[_REG_CPSR] = tf->tf_spsr & ~PSR_C;
 	} else {
 		gr[_REG_R0]   = tf->tf_r0;
 		gr[_REG_CPSR] = tf->tf_spsr;
 	}
 	gr[_REG_R1]   = tf->tf_r1;
 	gr[_REG_R2]   = tf->tf_r2;
 	gr[_REG_R3]   = tf->tf_r3;
 	gr[_REG_R4]   = tf->tf_r4;
 	gr[_REG_R5]   = tf->tf_r5;
 	gr[_REG_R6]   = tf->tf_r6;
 	gr[_REG_R7]   = tf->tf_r7;
 	gr[_REG_R8]   = tf->tf_r8;
 	gr[_REG_R9]   = tf->tf_r9;
 	gr[_REG_R10]  = tf->tf_r10;
 	gr[_REG_R11]  = tf->tf_r11;
 	gr[_REG_R12]  = tf->tf_r12;
 	gr[_REG_SP]   = tf->tf_usr_sp;
 	gr[_REG_LR]   = tf->tf_usr_lr;
 	gr[_REG_PC]   = tf->tf_pc;
 
 	mcp->mc_vfp_size = 0;
 	mcp->mc_vfp_ptr = NULL;
 	memset(&mcp->mc_spare, 0, sizeof(mcp->mc_spare));
 
 	return (0);
 }
 
 /*
  * Set machine context.
  *
  * However, we don't set any but the user modifiable flags, and we won't
  * touch the cs selector.
  */
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	mcontext_vfp_t mc_vfp, *vfp;
 	struct trapframe *tf = td->td_frame;
 	const __greg_t *gr = mcp->__gregs;
 	int spsr;
 
 	/*
 	 * Make sure the processor mode has not been tampered with and
 	 * interrupts have not been disabled.
 	 */
 	spsr = gr[_REG_CPSR];
 	if ((spsr & PSR_MODE) != PSR_USR32_MODE ||
 	    (spsr & (PSR_I | PSR_F)) != 0)
 		return (EINVAL);
 
 #ifdef WITNESS
 	if (mcp->mc_vfp_size != 0 && mcp->mc_vfp_size != sizeof(mc_vfp)) {
 		printf("%s: %s: Malformed mc_vfp_size: %d (0x%08X)\n",
 		    td->td_proc->p_comm, __func__,
 		    mcp->mc_vfp_size, mcp->mc_vfp_size);
 	} else if (mcp->mc_vfp_size != 0 && mcp->mc_vfp_ptr == NULL) {
 		printf("%s: %s: c_vfp_size != 0 but mc_vfp_ptr == NULL\n",
 		    td->td_proc->p_comm, __func__);
 	}
 #endif
 
 	if (mcp->mc_vfp_size == sizeof(mc_vfp) && mcp->mc_vfp_ptr != NULL) {
 		if (copyin(mcp->mc_vfp_ptr, &mc_vfp, sizeof(mc_vfp)) != 0)
 			return (EFAULT);
 		vfp = &mc_vfp;
 	} else {
 		vfp = NULL;
 	}
 
 	tf->tf_r0 = gr[_REG_R0];
 	tf->tf_r1 = gr[_REG_R1];
 	tf->tf_r2 = gr[_REG_R2];
 	tf->tf_r3 = gr[_REG_R3];
 	tf->tf_r4 = gr[_REG_R4];
 	tf->tf_r5 = gr[_REG_R5];
 	tf->tf_r6 = gr[_REG_R6];
 	tf->tf_r7 = gr[_REG_R7];
 	tf->tf_r8 = gr[_REG_R8];
 	tf->tf_r9 = gr[_REG_R9];
 	tf->tf_r10 = gr[_REG_R10];
 	tf->tf_r11 = gr[_REG_R11];
 	tf->tf_r12 = gr[_REG_R12];
 	tf->tf_usr_sp = gr[_REG_SP];
 	tf->tf_usr_lr = gr[_REG_LR];
 	tf->tf_pc = gr[_REG_PC];
 	tf->tf_spsr = gr[_REG_CPSR];
 #ifdef VFP
 	if (vfp != NULL)
 		set_vfpcontext(td, vfp);
 #endif
 	return (0);
 }
 
 void
 sendsig(catcher, ksi, mask)
 	sig_t catcher;
 	ksiginfo_t *ksi;
 	sigset_t *mask;
 {
 	struct thread *td;
 	struct proc *p;
 	struct trapframe *tf;
 	struct sigframe *fp, frame;
 	struct sigacts *psp;
 	struct sysentvec *sysent;
 	int onstack;
 	int sig;
 	int code;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	code = ksi->ksi_code;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_usr_sp);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !(onstack) &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct sigframe *)td->td_frame->tf_usr_sp;
 
 	/* make room on the stack */
 	fp--;
 
 	/* make the stack aligned */
 	fp = (struct sigframe *)STACKALIGN(fp);
 	/* Populate the siginfo frame. */
 	get_mcontext(td, &frame.sf_uc.uc_mcontext, 0);
 #ifdef VFP
 	get_vfpcontext(td, &frame.sf_vfp);
 	frame.sf_uc.uc_mcontext.mc_vfp_size = sizeof(fp->sf_vfp);
 	frame.sf_uc.uc_mcontext.mc_vfp_ptr = &fp->sf_vfp;
 #else
 	frame.sf_uc.uc_mcontext.mc_vfp_size = 0;
 	frame.sf_uc.uc_mcontext.mc_vfp_ptr = NULL;
 #endif
 	frame.sf_si = ksi->ksi_info;
 	frame.sf_uc.uc_sigmask = *mask;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK )
 	    ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	frame.sf_uc.uc_stack = td->td_sigstk;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	/*
 	 * Build context to run handler in.  We invoke the handler
 	 * directly, only returning via the trampoline.  Note the
 	 * trampoline version numbers are coordinated with machine-
 	 * dependent code in libc.
 	 */
 
 	tf->tf_r0 = sig;
 	tf->tf_r1 = (register_t)&fp->sf_si;
 	tf->tf_r2 = (register_t)&fp->sf_uc;
 
 	/* the trampoline uses r5 as the uc address */
 	tf->tf_r5 = (register_t)&fp->sf_uc;
 	tf->tf_pc = (register_t)catcher;
 	tf->tf_usr_sp = (register_t)fp;
 	sysent = p->p_sysent;
 	if (sysent->sv_sigcode_base != 0)
 		tf->tf_usr_lr = (register_t)sysent->sv_sigcode_base;
 	else
 		tf->tf_usr_lr = (register_t)(sysent->sv_psstrings -
 		    *(sysent->sv_szsigcode));
 	/* Set the mode to enter in the signal handler */
 #if __ARM_ARCH >= 7
 	if ((register_t)catcher & 1)
 		tf->tf_spsr |= PSR_T;
 	else
 		tf->tf_spsr &= ~PSR_T;
 #endif
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_usr_lr,
 	    tf->tf_usr_sp);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 int
 sys_sigreturn(td, uap)
 	struct thread *td;
 	struct sigreturn_args /* {
 		const struct __ucontext *sigcntxp;
 	} */ *uap;
 {
 	ucontext_t uc;
 	int error;
 
 	if (uap == NULL)
 		return (EFAULT);
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
 		return (EFAULT);
 	/* Restore register context. */
 	error = set_mcontext(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 }
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 	pcb->pcb_regs.sf_r4 = tf->tf_r4;
 	pcb->pcb_regs.sf_r5 = tf->tf_r5;
 	pcb->pcb_regs.sf_r6 = tf->tf_r6;
 	pcb->pcb_regs.sf_r7 = tf->tf_r7;
 	pcb->pcb_regs.sf_r8 = tf->tf_r8;
 	pcb->pcb_regs.sf_r9 = tf->tf_r9;
 	pcb->pcb_regs.sf_r10 = tf->tf_r10;
 	pcb->pcb_regs.sf_r11 = tf->tf_r11;
 	pcb->pcb_regs.sf_r12 = tf->tf_r12;
 	pcb->pcb_regs.sf_pc = tf->tf_pc;
 	pcb->pcb_regs.sf_lr = tf->tf_usr_lr;
 	pcb->pcb_regs.sf_sp = tf->tf_usr_sp;
 }
 
 void
 pcpu0_init(void)
 {
 #if __ARM_ARCH >= 6
 	set_curthread(&thread0);
 #endif
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 	PCPU_SET(curthread, &thread0);
 }
 
 /*
  * Initialize proc0
  */
 void
 init_proc0(vm_offset_t kstack)
 {
 	proc_linkup0(&proc0, &thread0);
 	thread0.td_kstack = kstack;
 	thread0.td_pcb = (struct pcb *)
 		(thread0.td_kstack + kstack_pages * PAGE_SIZE) - 1;
 	thread0.td_pcb->pcb_flags = 0;
 	thread0.td_pcb->pcb_vfpcpu = -1;
 	thread0.td_pcb->pcb_vfpstate.fpscr = VFPSCR_DN;
 	thread0.td_frame = &proc0_tf;
 	pcpup->pc_curpcb = thread0.td_pcb;
 }
 
 #if __ARM_ARCH >= 6
 void
 set_stackptrs(int cpu)
 {
 
 	set_stackptr(PSR_IRQ32_MODE,
 	    irqstack + ((IRQ_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 	set_stackptr(PSR_ABT32_MODE,
 	    abtstack + ((ABT_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 	set_stackptr(PSR_UND32_MODE,
 	    undstack + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 }
 #else
 void
 set_stackptrs(int cpu)
 {
 
 	set_stackptr(PSR_IRQ32_MODE,
 	    irqstack.pv_va + ((IRQ_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 	set_stackptr(PSR_ABT32_MODE,
 	    abtstack.pv_va + ((ABT_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 	set_stackptr(PSR_UND32_MODE,
 	    undstack.pv_va + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 }
 #endif
 
 
 #ifdef FDT
 #if __ARM_ARCH < 6
 void *
 initarm(struct arm_boot_params *abp)
 {
 	struct mem_region mem_regions[FDT_MEM_REGIONS];
 	struct pv_addr kernel_l1pt;
 	struct pv_addr dpcpu;
 	vm_offset_t dtbp, freemempos, l2_start, lastaddr;
 	uint64_t memsize;
 	uint32_t l2size;
 	char *env;
 	void *kmdp;
 	u_int l1pagetable;
 	int i, j, err_devmap, mem_regions_sz;
 
 	lastaddr = parse_boot_param(abp);
 	arm_physmem_kernaddr = abp->abp_physaddr;
 
 	memsize = 0;
 
 	cpuinfo_init();
 	set_cpufuncs();
 
 	/*
 	 * Find the dtb passed in by the boot loader.
 	 */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp != NULL)
 		dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
 	else
 		dtbp = (vm_offset_t)NULL;
 
 #if defined(FDT_DTB_STATIC)
 	/*
 	 * In case the device tree blob was not retrieved (from metadata) try
 	 * to use the statically embedded one.
 	 */
 	if (dtbp == (vm_offset_t)NULL)
 		dtbp = (vm_offset_t)&fdt_static_dtb;
 #endif
 
 	if (OF_install(OFW_FDT, 0) == FALSE)
 		panic("Cannot install FDT");
 
 	if (OF_init((void *)dtbp) != 0)
 		panic("OF_init failed with the found device tree");
 
 	/* Grab physical memory regions information from device tree. */
 	if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, &memsize) != 0)
 		panic("Cannot get physical memory regions");
 	arm_physmem_hardware_regions(mem_regions, mem_regions_sz);
 
 	/* Grab reserved memory regions information from device tree. */
 	if (fdt_get_reserved_regions(mem_regions, &mem_regions_sz) == 0)
 		arm_physmem_exclude_regions(mem_regions, mem_regions_sz,
 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
 
 	/* Platform-specific initialisation */
 	platform_probe_and_attach();
 
 	pcpu0_init();
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	/* Calculate number of L2 tables needed for mapping vm_page_array */
 	l2size = (memsize / PAGE_SIZE) * sizeof(struct vm_page);
 	l2size = (l2size >> L1_S_SHIFT) + 1;
 
 	/*
 	 * Add one table for end of kernel map, one for stacks, msgbuf and
 	 * L1 and L2 tables map,  one for vectors map and two for
 	 * l2 structures from pmap_bootstrap.
 	 */
 	l2size += 5;
 
 	/* Make it divisible by 4 */
 	l2size = (l2size + 3) & ~3;
 
 	freemempos = (lastaddr + PAGE_MASK) & ~PAGE_MASK;
 
 	/* Define a macro to simplify memory allocation */
 #define valloc_pages(var, np)						\
 	alloc_pages((var).pv_va, (np));					\
 	(var).pv_pa = (var).pv_va + (abp->abp_physaddr - KERNVIRTADDR);
 
 #define alloc_pages(var, np)						\
 	(var) = freemempos;						\
 	freemempos += (np * PAGE_SIZE);					\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0)
 		freemempos += PAGE_SIZE;
 	valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE);
 
 	for (i = 0, j = 0; i < l2size; ++i) {
 		if (!(i % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) {
 			valloc_pages(kernel_pt_table[i],
 			    L2_TABLE_SIZE / PAGE_SIZE);
 			j = i;
 		} else {
 			kernel_pt_table[i].pv_va = kernel_pt_table[j].pv_va +
 			    L2_TABLE_SIZE_REAL * (i - j);
 			kernel_pt_table[i].pv_pa =
 			    kernel_pt_table[i].pv_va - KERNVIRTADDR +
 			    abp->abp_physaddr;
 
 		}
 	}
 	/*
 	 * Allocate a page for the system page mapped to 0x00000000
 	 * or 0xffff0000. This page will just contain the system vectors
 	 * and can be shared by all processes.
 	 */
 	valloc_pages(systempage, 1);
 
 	/* Allocate dynamic per-cpu area. */
 	valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu.pv_va, 0);
 
 	/* Allocate stacks for all modes */
 	valloc_pages(irqstack, IRQ_STACK_SIZE * MAXCPU);
 	valloc_pages(abtstack, ABT_STACK_SIZE * MAXCPU);
 	valloc_pages(undstack, UND_STACK_SIZE * MAXCPU);
 	valloc_pages(kernelstack, kstack_pages * MAXCPU);
 	valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 
 	/*
 	 * Now we start construction of the L1 page table
 	 * We start by mapping the L2 page tables into the L1.
 	 * This means that we can replace L1 mappings later on if necessary
 	 */
 	l1pagetable = kernel_l1pt.pv_va;
 
 	/*
 	 * Try to map as much as possible of kernel text and data using
 	 * 1MB section mapping and for the rest of initial kernel address
 	 * space use L2 coarse tables.
 	 *
 	 * Link L2 tables for mapping remainder of kernel (modulo 1MB)
 	 * and kernel structures
 	 */
 	l2_start = lastaddr & ~(L1_S_OFFSET);
 	for (i = 0 ; i < l2size - 1; i++)
 		pmap_link_l2pt(l1pagetable, l2_start + i * L1_S_SIZE,
 		    &kernel_pt_table[i]);
 
 	pmap_curmaxkvaddr = l2_start + (l2size - 1) * L1_S_SIZE;
 
 	/* Map kernel code and data */
 	pmap_map_chunk(l1pagetable, KERNVIRTADDR, abp->abp_physaddr,
 	   (((uint32_t)(lastaddr) - KERNVIRTADDR) + PAGE_MASK) & ~PAGE_MASK,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	/* Map L1 directory and allocated L2 page tables */
 	pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa,
 	    L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 
 	pmap_map_chunk(l1pagetable, kernel_pt_table[0].pv_va,
 	    kernel_pt_table[0].pv_pa,
 	    L2_TABLE_SIZE_REAL * l2size,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 
 	/* Map allocated DPCPU, stacks and msgbuf */
 	pmap_map_chunk(l1pagetable, dpcpu.pv_va, dpcpu.pv_pa,
 	    freemempos - dpcpu.pv_va,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	/* Link and map the vector page */
 	pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH,
 	    &kernel_pt_table[l2size - 1]);
 	pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, PTE_CACHE);
 
 	/* Establish static device mappings. */
 	err_devmap = platform_devmap_init();
 	devmap_bootstrap(l1pagetable, NULL);
 	vm_max_kernel_address = platform_lastaddr();
 
 	cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT);
 	pmap_pa = kernel_l1pt.pv_pa;
 	cpu_setttb(kernel_l1pt.pv_pa);
 	cpu_tlb_flushID();
 	cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2));
 
 	/*
 	 * Now that proper page tables are installed, call cpu_setup() to enable
 	 * instruction and data caches and other chip-specific features.
 	 */
 	cpu_setup();
 
 	/*
 	 * Only after the SOC registers block is mapped we can perform device
 	 * tree fixups, as they may attempt to read parameters from hardware.
 	 */
 	OF_interpret("perform-fixup", 0);
 
 	platform_gpio_init();
 
 	cninit();
 
 	debugf("initarm: console initialized\n");
 	debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp);
 	debugf(" boothowto = 0x%08x\n", boothowto);
 	debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp);
 	arm_print_kenv();
 
 	env = kern_getenv("kernelname");
 	if (env != NULL) {
 		strlcpy(kernelname, env, sizeof(kernelname));
 		freeenv(env);
 	}
 
 	if (err_devmap != 0)
 		printf("WARNING: could not fully configure devmap, error=%d\n",
 		    err_devmap);
 
 	platform_late_init();
 
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 	cpu_control(CPU_CONTROL_MMU_ENABLE, CPU_CONTROL_MMU_ENABLE);
 
 	set_stackptrs(0);
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in cpu_setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	cpu_idcache_wbinv_all();
 
 	undefined_init();
 
 	init_proc0(kernelstack.pv_va);
 
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
 	pmap_bootstrap(freemempos, &kernel_l1pt);
 	msgbufp = (void *)msgbufpv.pv_va;
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 
 	/*
 	 * Exclude the kernel (and all the things we allocated which immediately
 	 * follow the kernel) from the VM allocation pool but not from crash
 	 * dumps.  virtual_avail is a global variable which tracks the kva we've
 	 * "allocated" while setting up pmaps.
 	 *
 	 * Prepare the list of physical memory available to the vm subsystem.
 	 */
 	arm_physmem_exclude_region(abp->abp_physaddr,
 	    (virtual_avail - KERNVIRTADDR), EXFLAG_NOALLOC);
 	arm_physmem_init_kernel_globals();
 
 	init_param2(physmem);
 	dbg_monitor_init();
 	kdb_init();
 
 	return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP -
 	    sizeof(struct pcb)));
 }
 #else /* __ARM_ARCH < 6 */
 void *
 initarm(struct arm_boot_params *abp)
 {
 	struct mem_region mem_regions[FDT_MEM_REGIONS];
 	vm_paddr_t lastaddr;
 	vm_offset_t dtbp, kernelstack, dpcpu;
 	char *env;
 	void *kmdp;
 	int err_devmap, mem_regions_sz;
 #ifdef EFI
 	struct efi_map_header *efihdr;
 #endif
 
 	/* get last allocated physical address */
 	arm_physmem_kernaddr = abp->abp_physaddr;
 	lastaddr = parse_boot_param(abp) - KERNVIRTADDR + arm_physmem_kernaddr;
 
 	set_cpufuncs();
 	cpuinfo_init();
 
 	/*
 	 * Find the dtb passed in by the boot loader.
 	 */
 	kmdp = preload_search_by_type("elf kernel");
 	dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
 #if defined(FDT_DTB_STATIC)
 	/*
 	 * In case the device tree blob was not retrieved (from metadata) try
 	 * to use the statically embedded one.
 	 */
 	if (dtbp == (vm_offset_t)NULL)
 		dtbp = (vm_offset_t)&fdt_static_dtb;
 #endif
 
 	if (OF_install(OFW_FDT, 0) == FALSE)
 		panic("Cannot install FDT");
 
 	if (OF_init((void *)dtbp) != 0)
 		panic("OF_init failed with the found device tree");
 
 #if defined(LINUX_BOOT_ABI)
 	arm_parse_fdt_bootargs();
 #endif
 
 #ifdef EFI
 	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
 	if (efihdr != NULL) {
 		arm_add_efi_map_entries(efihdr, mem_regions, &mem_regions_sz);
 	} else
 #endif
 	{
 		/* Grab physical memory regions information from device tree. */
 		if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,NULL) != 0)
 			panic("Cannot get physical memory regions");
 	}
 	arm_physmem_hardware_regions(mem_regions, mem_regions_sz);
 
 	/* Grab reserved memory regions information from device tree. */
 	if (fdt_get_reserved_regions(mem_regions, &mem_regions_sz) == 0)
 		arm_physmem_exclude_regions(mem_regions, mem_regions_sz,
 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
 
 	/*
 	 * Set TEX remapping registers.
 	 * Setup kernel page tables and switch to kernel L1 page table.
 	 */
 	pmap_set_tex();
 	pmap_bootstrap_prepare(lastaddr);
 
 	/*
 	 * If EARLY_PRINTF support is enabled, we need to re-establish the
 	 * mapping after pmap_bootstrap_prepare() switches to new page tables.
 	 * Note that we can only do the remapping if the VA is outside the
 	 * kernel, now that we have real virtual (not VA=PA) mappings in effect.
 	 * Early printf does not work between the time pmap_set_tex() does
 	 * cp15_prrr_set() and this code remaps the VA.
 	 */
 #if defined(EARLY_PRINTF) && defined(SOCDEV_PA) && defined(SOCDEV_VA) && SOCDEV_VA < KERNBASE
 	pmap_preboot_map_attr(SOCDEV_PA, SOCDEV_VA, 1024 * 1024, 
 	    VM_PROT_READ | VM_PROT_WRITE, VM_MEMATTR_DEVICE);
 #endif
 
 	/*
 	 * Now that proper page tables are installed, call cpu_setup() to enable
 	 * instruction and data caches and other chip-specific features.
 	 */
 	cpu_setup();
 
 	/* Platform-specific initialisation */
 	platform_probe_and_attach();
 	pcpu0_init();
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	/*
 	 * Allocate a page for the system page mapped to 0xffff0000
 	 * This page will just contain the system vectors and can be
 	 * shared by all processes.
 	 */
 	systempage = pmap_preboot_get_pages(1);
 
 	/* Map the vector page. */
 	pmap_preboot_map_pages(systempage, ARM_VECTORS_HIGH,  1);
 	if (virtual_end >= ARM_VECTORS_HIGH)
 		virtual_end = ARM_VECTORS_HIGH - 1;
 
 	/* Allocate dynamic per-cpu area. */
 	dpcpu = pmap_preboot_get_vpages(DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu, 0);
 
 	/* Allocate stacks for all modes */
 	irqstack    = pmap_preboot_get_vpages(IRQ_STACK_SIZE * MAXCPU);
 	abtstack    = pmap_preboot_get_vpages(ABT_STACK_SIZE * MAXCPU);
 	undstack    = pmap_preboot_get_vpages(UND_STACK_SIZE * MAXCPU );
 	kernelstack = pmap_preboot_get_vpages(kstack_pages * MAXCPU);
 
 	/* Allocate message buffer. */
 	msgbufp = (void *)pmap_preboot_get_vpages(
 	    round_page(msgbufsize) / PAGE_SIZE);
 
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 	set_stackptrs(0);
 	mutex_init();
 
 	/* Establish static device mappings. */
 	err_devmap = platform_devmap_init();
 	devmap_bootstrap(0, NULL);
 	vm_max_kernel_address = platform_lastaddr();
 
 	/*
 	 * Only after the SOC registers block is mapped we can perform device
 	 * tree fixups, as they may attempt to read parameters from hardware.
 	 */
 	OF_interpret("perform-fixup", 0);
 	platform_gpio_init();
 	cninit();
 
 	/*
 	 * If we made a mapping for EARLY_PRINTF after pmap_bootstrap_prepare(),
 	 * undo it now that the normal console printf works.
 	 */
 #if defined(EARLY_PRINTF) && defined(SOCDEV_PA) && defined(SOCDEV_VA) && SOCDEV_VA < KERNBASE
 	pmap_kremove(SOCDEV_VA);
 #endif
 
 	debugf("initarm: console initialized\n");
 	debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp);
 	debugf(" boothowto = 0x%08x\n", boothowto);
 	debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp);
 	debugf(" lastaddr1: 0x%08x\n", lastaddr);
 	arm_print_kenv();
 
 	env = kern_getenv("kernelname");
 	if (env != NULL)
 		strlcpy(kernelname, env, sizeof(kernelname));
 
 	if (err_devmap != 0)
 		printf("WARNING: could not fully configure devmap, error=%d\n",
 		    err_devmap);
 
 	platform_late_init();
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in cpu_setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	/* Set stack for exception handlers */
 	undefined_init();
 	init_proc0(kernelstack);
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
 	enable_interrupts(PSR_A);
 	pmap_bootstrap(0);
 
 	/* Exclude the kernel (and all the things we allocated which immediately
 	 * follow the kernel) from the VM allocation pool but not from crash
 	 * dumps.  virtual_avail is a global variable which tracks the kva we've
 	 * "allocated" while setting up pmaps.
 	 *
 	 * Prepare the list of physical memory available to the vm subsystem.
 	 */
 	arm_physmem_exclude_region(abp->abp_physaddr,
 		pmap_preboot_get_pages(0) - abp->abp_physaddr, EXFLAG_NOALLOC);
 	arm_physmem_init_kernel_globals();
 
 	init_param2(physmem);
 	/* Init message buffer. */
 	msgbufinit(msgbufp, msgbufsize);
 	dbg_monitor_init();
 	kdb_init();
+	/* Apply possible BP hardening. */
+	cpuinfo_init_bp_hardening();
 	return ((void *)STACKALIGN(thread0.td_pcb));
 
 }
 
 #endif /* __ARM_ARCH < 6 */
 #endif /* FDT */
Index: head/sys/arm/arm/mp_machdep.c
===================================================================
--- head/sys/arm/arm/mp_machdep.c	(revision 328466)
+++ head/sys/arm/arm/mp_machdep.c	(revision 328467)
@@ -1,540 +1,543 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 Semihalf.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 #include "opt_ddb.h"
 #include "opt_smp.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/pcpu.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/ktr.h>
 #include <sys/malloc.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/pmap.h>
 
 #include <machine/armreg.h>
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
 #include <machine/debug_monitor.h>
 #include <machine/smp.h>
 #include <machine/pcb.h>
 #include <machine/physmem.h>
 #include <machine/intr.h>
 #include <machine/vmparam.h>
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 #ifdef CPU_MV_PJ4B
 #include <arm/mv/mvwin.h>
 #endif
 
 extern struct pcpu __pcpu[];
 /* used to hold the AP's until we are ready to release them */
 struct mtx ap_boot_mtx;
 struct pcb stoppcbs[MAXCPU];
 
 /* # of Applications processors */
 volatile int mp_naps;
 
 /* Set to 1 once we're ready to let the APs out of the pen. */
 volatile int aps_ready = 0;
 
 #ifndef INTRNG
 static int ipi_handler(void *arg);
 #endif
 void set_stackptrs(int cpu);
 
 /* Temporary variables for init_secondary()  */
 void *dpcpu[MAXCPU - 1];
 
 /* Determine if we running MP machine */
 int
 cpu_mp_probe(void)
 {
 
 	KASSERT(mp_ncpus != 0, ("cpu_mp_probe: mp_ncpus is unset"));
 
 	CPU_SETOF(0, &all_cpus);
 
 	return (mp_ncpus > 1);
 }
 
 /* Start Application Processor via platform specific function */
 static int
 check_ap(void)
 {
 	uint32_t ms;
 
 	for (ms = 0; ms < 2000; ++ms) {
 		if ((mp_naps + 1) == mp_ncpus)
 			return (0);		/* success */
 		else
 			DELAY(1000);
 	}
 
 	return (-2);
 }
 
 extern unsigned char _end[];
 
 /* Initialize and fire up non-boot processors */
 void
 cpu_mp_start(void)
 {
 	int error, i;
 
 	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
 
 	/* Reserve memory for application processors */
 	for(i = 0; i < (mp_ncpus - 1); i++)
 		dpcpu[i] = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
 		    M_WAITOK | M_ZERO);
 
 	dcache_wbinv_poc_all();
 
 	/* Initialize boot code and start up processors */
 	platform_mp_start_ap();
 
 	/*  Check if ap's started properly */
 	error = check_ap();
 	if (error)
 		printf("WARNING: Some AP's failed to start\n");
 	else
 		for (i = 1; i < mp_ncpus; i++)
 			CPU_SET(i, &all_cpus);
 }
 
 /* Introduce rest of cores to the world */
 void
 cpu_mp_announce(void)
 {
 
 }
 
 extern vm_paddr_t pmap_pa;
 void
 init_secondary(int cpu)
 {
 	struct pcpu *pc;
 	uint32_t loop_counter;
 #ifndef INTRNG
 	int start = 0, end = 0;
 #endif
 
 	pmap_set_tex();
 	cpuinfo_reinit_mmu(pmap_kern_ttb);
 	cpu_setup();
 
 	/* Provide stack pointers for other processor modes. */
 	set_stackptrs(cpu);
 
 	enable_interrupts(PSR_A);
 	pc = &__pcpu[cpu];
 
 	/*
 	 * pcpu_init() updates queue, so it should not be executed in parallel
 	 * on several cores
 	 */
 	while(mp_naps < (cpu - 1))
 		;
 
 	pcpu_init(pc, cpu, sizeof(struct pcpu));
 	dpcpu_init(dpcpu[cpu - 1], cpu);
 #if __ARM_ARCH >= 6 && defined(DDB)
 	dbg_monitor_init_secondary();
 #endif
 	/* Signal our startup to BSP */
 	atomic_add_rel_32(&mp_naps, 1);
 
 	/* Spin until the BSP releases the APs */
 	while (!atomic_load_acq_int(&aps_ready)) {
 #if __ARM_ARCH >= 7
 		__asm __volatile("wfe");
 #endif
 	}
 
 	/* Initialize curthread */
 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	pc->pc_curthread = pc->pc_idlethread;
 	pc->pc_curpcb = pc->pc_idlethread->td_pcb;
 	set_curthread(pc->pc_idlethread);
 #ifdef VFP
 	vfp_init();
 #endif
 
 	/* Configure the interrupt controller */
 	intr_pic_init_secondary();
 
+	/* Apply possible BP hardening */
+	cpuinfo_init_bp_hardening();
+
 	mtx_lock_spin(&ap_boot_mtx);
 
 	atomic_add_rel_32(&smp_cpus, 1);
 
 	if (smp_cpus == mp_ncpus) {
 		/* enable IPI's, tlb shootdown, freezes etc */
 		atomic_store_rel_int(&smp_started, 1);
 	}
 
 	mtx_unlock_spin(&ap_boot_mtx);
 
 #ifndef INTRNG
 	/* Enable ipi */
 #ifdef IPI_IRQ_START
 	start = IPI_IRQ_START;
 #ifdef IPI_IRQ_END
 	end = IPI_IRQ_END;
 #else
 	end = IPI_IRQ_START;
 #endif
 #endif
 
 	for (int i = start; i <= end; i++)
 		arm_unmask_irq(i);
 #endif /* INTRNG */
 	enable_interrupts(PSR_I);
 
 	loop_counter = 0;
 	while (smp_started == 0) {
 		DELAY(100);
 		loop_counter++;
 		if (loop_counter == 1000)
 			CTR0(KTR_SMP, "AP still wait for smp_started");
 	}
 	/* Start per-CPU event timers. */
 	cpu_initclocks_ap();
 
 	CTR0(KTR_SMP, "go into scheduler");
 
 	/* Enter the scheduler */
 	sched_throw(NULL);
 
 	panic("scheduler returned us to %s", __func__);
 	/* NOTREACHED */
 }
 
 #ifdef INTRNG
 static void
 ipi_rendezvous(void *dummy __unused)
 {
 
 	CTR0(KTR_SMP, "IPI_RENDEZVOUS");
 	smp_rendezvous_action();
 }
 
 static void
 ipi_ast(void *dummy __unused)
 {
 
 	CTR0(KTR_SMP, "IPI_AST");
 }
 
 static void
 ipi_stop(void *dummy __unused)
 {
 	u_int cpu;
 
 	/*
 	 * IPI_STOP_HARD is mapped to IPI_STOP.
 	 */
 	CTR0(KTR_SMP, "IPI_STOP or IPI_STOP_HARD");
 
 	cpu = PCPU_GET(cpuid);
 	savectx(&stoppcbs[cpu]);
 
 	/*
 	 * CPUs are stopped when entering the debugger and at
 	 * system shutdown, both events which can precede a
 	 * panic dump.  For the dump to be correct, all caches
 	 * must be flushed and invalidated, but on ARM there's
 	 * no way to broadcast a wbinv_all to other cores.
 	 * Instead, we have each core do the local wbinv_all as
 	 * part of stopping the core.  The core requesting the
 	 * stop will do the l2 cache flush after all other cores
 	 * have done their l1 flushes and stopped.
 	 */
 	dcache_wbinv_poc_all();
 
 	/* Indicate we are stopped */
 	CPU_SET_ATOMIC(cpu, &stopped_cpus);
 
 	/* Wait for restart */
 	while (!CPU_ISSET(cpu, &started_cpus))
 		cpu_spinwait();
 
 	CPU_CLR_ATOMIC(cpu, &started_cpus);
 	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
 #ifdef DDB
 	dbg_resume_dbreg();
 #endif
 	CTR0(KTR_SMP, "IPI_STOP (restart)");
 }
 
 static void
 ipi_preempt(void *arg)
 {
 	struct trapframe *oldframe;
 	struct thread *td;
 
 	critical_enter();
 	td = curthread;
 	td->td_intr_nesting_level++;
 	oldframe = td->td_intr_frame;
 	td->td_intr_frame = (struct trapframe *)arg;
 
 	CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__);
 	sched_preempt(td);
 
 	td->td_intr_frame = oldframe;
 	td->td_intr_nesting_level--;
 	critical_exit();
 }
 
 static void
 ipi_hardclock(void *arg)
 {
 	struct trapframe *oldframe;
 	struct thread *td;
 
 	critical_enter();
 	td = curthread;
 	td->td_intr_nesting_level++;
 	oldframe = td->td_intr_frame;
 	td->td_intr_frame = (struct trapframe *)arg;
 
 	CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
 	hardclockintr();
 
 	td->td_intr_frame = oldframe;
 	td->td_intr_nesting_level--;
 	critical_exit();
 }
 
 #else
 static int
 ipi_handler(void *arg)
 {
 	u_int	cpu, ipi;
 
 	cpu = PCPU_GET(cpuid);
 
 	ipi = pic_ipi_read((int)arg);
 
 	while ((ipi != 0x3ff)) {
 		switch (ipi) {
 		case IPI_RENDEZVOUS:
 			CTR0(KTR_SMP, "IPI_RENDEZVOUS");
 			smp_rendezvous_action();
 			break;
 
 		case IPI_AST:
 			CTR0(KTR_SMP, "IPI_AST");
 			break;
 
 		case IPI_STOP:
 			/*
 			 * IPI_STOP_HARD is mapped to IPI_STOP so it is not
 			 * necessary to add it in the switch.
 			 */
 			CTR0(KTR_SMP, "IPI_STOP or IPI_STOP_HARD");
 
 			savectx(&stoppcbs[cpu]);
 
 			/*
 			 * CPUs are stopped when entering the debugger and at
 			 * system shutdown, both events which can precede a
 			 * panic dump.  For the dump to be correct, all caches
 			 * must be flushed and invalidated, but on ARM there's
 			 * no way to broadcast a wbinv_all to other cores.
 			 * Instead, we have each core do the local wbinv_all as
 			 * part of stopping the core.  The core requesting the
 			 * stop will do the l2 cache flush after all other cores
 			 * have done their l1 flushes and stopped.
 			 */
 			dcache_wbinv_poc_all();
 
 			/* Indicate we are stopped */
 			CPU_SET_ATOMIC(cpu, &stopped_cpus);
 
 			/* Wait for restart */
 			while (!CPU_ISSET(cpu, &started_cpus))
 				cpu_spinwait();
 
 			CPU_CLR_ATOMIC(cpu, &started_cpus);
 			CPU_CLR_ATOMIC(cpu, &stopped_cpus);
 #ifdef DDB
 			dbg_resume_dbreg();
 #endif
 			CTR0(KTR_SMP, "IPI_STOP (restart)");
 			break;
 		case IPI_PREEMPT:
 			CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__);
 			sched_preempt(curthread);
 			break;
 		case IPI_HARDCLOCK:
 			CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
 			hardclockintr();
 			break;
 		default:
 			panic("Unknown IPI 0x%0x on cpu %d", ipi, curcpu);
 		}
 
 		pic_ipi_clear(ipi);
 		ipi = pic_ipi_read(-1);
 	}
 
 	return (FILTER_HANDLED);
 }
 #endif
 
 static void
 release_aps(void *dummy __unused)
 {
 	uint32_t loop_counter;
 #ifndef INTRNG
 	int start = 0, end = 0;
 #endif
 
 	if (mp_ncpus == 1)
 		return;
 
 #ifdef INTRNG
 	intr_pic_ipi_setup(IPI_RENDEZVOUS, "rendezvous", ipi_rendezvous, NULL);
 	intr_pic_ipi_setup(IPI_AST, "ast", ipi_ast, NULL);
 	intr_pic_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL);
 	intr_pic_ipi_setup(IPI_PREEMPT, "preempt", ipi_preempt, NULL);
 	intr_pic_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL);
 #else
 #ifdef IPI_IRQ_START
 	start = IPI_IRQ_START;
 #ifdef IPI_IRQ_END
 	end = IPI_IRQ_END;
 #else
 	end = IPI_IRQ_START;
 #endif
 #endif
 
 	for (int i = start; i <= end; i++) {
 		/*
 		 * IPI handler
 		 */
 		/*
 		 * Use 0xdeadbeef as the argument value for irq 0,
 		 * if we used 0, the intr code will give the trap frame
 		 * pointer instead.
 		 */
 		arm_setup_irqhandler("ipi", ipi_handler, NULL, (void *)i, i,
 		    INTR_TYPE_MISC | INTR_EXCL, NULL);
 
 		/* Enable ipi */
 		arm_unmask_irq(i);
 	}
 #endif
 	atomic_store_rel_int(&aps_ready, 1);
 	/* Wake the other threads up */
 	dsb();
 	sev();
 
 	printf("Release APs\n");
 
 	for (loop_counter = 0; loop_counter < 2000; loop_counter++) {
 		if (smp_started)
 			return;
 		DELAY(1000);
 	}
 	printf("AP's not started\n");
 }
 
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 
 struct cpu_group *
 cpu_topo(void)
 {
 
 	return (smp_topo_1level(CG_SHARE_L2, mp_ncpus, 0));
 }
 
 void
 cpu_mp_setmaxid(void)
 {
 
 	platform_mp_setmaxid();
 }
 
 /* Sending IPI */
 void
 ipi_all_but_self(u_int ipi)
 {
 	cpuset_t other_cpus;
 
 	other_cpus = all_cpus;
 	CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 #ifdef INTRNG
 	intr_ipi_send(other_cpus, ipi);
 #else
 	pic_ipi_send(other_cpus, ipi);
 #endif
 }
 
 void
 ipi_cpu(int cpu, u_int ipi)
 {
 	cpuset_t cpus;
 
 	CPU_ZERO(&cpus);
 	CPU_SET(cpu, &cpus);
 
 	CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x", __func__, cpu, ipi);
 #ifdef INTRNG
 	intr_ipi_send(cpus, ipi);
 #else
 	pic_ipi_send(cpus, ipi);
 #endif
 }
 
 void
 ipi_selected(cpuset_t cpus, u_int ipi)
 {
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 #ifdef INTRNG
 	intr_ipi_send(cpus, ipi);
 #else
 	pic_ipi_send(cpus, ipi);
 #endif
 }
Index: head/sys/arm/arm/swtch-v6.S
===================================================================
--- head/sys/arm/arm/swtch-v6.S	(revision 328466)
+++ head/sys/arm/arm/swtch-v6.S	(revision 328467)
@@ -1,499 +1,508 @@
 /*	$NetBSD: cpuswitch.S,v 1.41 2003/11/15 08:44:18 scw Exp $	*/
 
 /*-
  * Copyright 2003 Wasabi Systems, Inc.
  * All rights reserved.
  *
  * Written by Steve C. Woodford for Wasabi Systems, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed for the NetBSD Project by
  *      Wasabi Systems, Inc.
  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  *    or promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Brini.
  * 4. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * RiscBSD kernel project
  *
  * cpuswitch.S
  *
  * cpu switching functions
  *
  * Created      : 15/10/94
  *
  */
 
 #include "assym.s"
 #include "opt_sched.h"
 
 #include <machine/asm.h>
 #include <machine/asmacros.h>
 #include <machine/armreg.h>
 #include <machine/sysreg.h>
 #include <machine/vfp.h>
 
 __FBSDID("$FreeBSD$");
 
 #if defined(SMP)
 #define GET_PCPU(tmp, tmp2) \
 	mrc 	CP15_MPIDR(tmp);	\
 	and	tmp, tmp, #0xf;		\
 	ldr 	tmp2, .Lcurpcpu+4;	\
 	mul 	tmp, tmp, tmp2;		\
 	ldr	tmp2, .Lcurpcpu;	\
 	add	tmp, tmp, tmp2;
 #else
 
 #define GET_PCPU(tmp, tmp2) \
 	ldr	tmp, .Lcurpcpu
 #endif
 
 #ifdef VFP
 	.fpu vfp	/* allow VFP instructions */
 #endif
 
 .Lcurpcpu:
 	.word	_C_LABEL(__pcpu)
 	.word	PCPU_SIZE
 .Lblocked_lock:
 	.word	_C_LABEL(blocked_lock)
 
 ENTRY(cpu_context_switch)
 	DSB
 	/*
 	* We can directly switch between translation tables only when the
 	* size of the mapping for any given virtual address is the same
 	* in the old and new translation tables.
 	* Thus, we must switch to kernel pmap translation table as
 	* intermediate mapping because all sizes of these mappings are same
 	* (or unmapped). The same is true for switch from kernel pmap
 	* translation table to new pmap one.
 	*/
 	mov	r2, #(CPU_ASID_KERNEL)
 	ldr	r1, =(_C_LABEL(pmap_kern_ttb))
 	ldr	r1, [r1]
 	mcr	CP15_TTBR0(r1)		/* switch to kernel TTB */
 	ISB
 	mcr	CP15_TLBIASID(r2)	/* flush not global TLBs */
 	DSB
 	mcr	CP15_TTBR0(r0)		/* switch to new TTB */
 	ISB
 	/*
 	* We must flush not global TLBs again because PT2MAP mapping
 	* is different.
 	*/
 	mcr	CP15_TLBIASID(r2)	/* flush not global TLBs */
 	/*
 	* Flush entire Branch Target Cache because of the branch predictor
 	* is not architecturally invisible. See ARM Architecture Reference
 	* Manual ARMv7-A and ARMv7-R edition, page B2-1264(65), Branch
 	* predictors and Requirements for branch predictor maintenance
 	* operations sections.
 	*/
-	mcr	CP15_BPIALL		/* flush entire Branch Target Cache */
+	/*
+	 * Additionally, to mitigate mistrained branch predictor attack
+	 * we must invalidate it on affected CPUs. Unfortunately, BPIALL
+	 * is effectively NOP on Cortex-A15 so it needs special treatment.
+	 */
+	ldr	r0, [r8, #PC_BP_HARDEN_KIND]
+	cmp	r0, #PCPU_BP_HARDEN_KIND_ICIALLU
+	mcrne	CP15_BPIALL		/* Flush entire Branch Target Cache   */
+	mcreq	CP15_ICIALLU		/* This is the only way how to flush  */
+					/* Branch Target Cache on Cortex-A15. */
 	DSB
 	mov	pc, lr
 END(cpu_context_switch)
 
 /*
  * cpu_throw(oldtd, newtd)
  *
  * Remove current thread state,	then select the	next thread to run
  * and load its	state.
  * r0 =	oldtd
  * r1 =	newtd
  */
 ENTRY(cpu_throw)
 	mov	r10, r0			/* r10 = oldtd */
 	mov	r11, r1			/* r11 = newtd */
 
 #ifdef VFP				/* This thread is dying, disable */
 	bl	_C_LABEL(vfp_discard)	/* VFP without preserving state. */
 #endif
 	GET_PCPU(r8, r9)		/* r8 = current pcpu */
 	ldr	r4, [r8, #PC_CPUID]	/* r4 = current cpu id */
 
 	cmp	r10, #0			/* old thread? */
 	beq	2f			/* no, skip */
 
 	/* Remove this CPU from the active list. */
 	ldr	r5, [r8, #PC_CURPMAP]
 	mov	r0, #(PM_ACTIVE)
 	add	r5, r0			/* r5 = old pm_active */
 
 	/* Compute position and mask. */
 #if _NCPUWORDS > 1
 	lsr	r0, r4, #3
 	bic	r0, #3
 	add	r5, r0			/* r5 = position in old pm_active */
 	mov	r2, #1
 	and	r0, r4, #31
 	lsl	r2, r0			/* r2 = mask */
 #else
 	mov	r2, #1
 	lsl	r2, r4			/* r2 = mask */
 #endif
 	/* Clear cpu from old active list. */
 #ifdef SMP
 1:	ldrex	r0, [r5]
 	bic	r0, r2
 	strex	r1, r0, [r5]
 	teq	r1, #0
 	bne	1b
 #else
 	ldr	r0, [r5]
 	bic	r0, r2
 	str	r0, [r5]
 #endif
 
 2:
 #ifdef INVARIANTS
 	cmp	r11, #0			/* new thread? */
 	beq	badsw1			/* no, panic */
 #endif
 	ldr	r7, [r11, #(TD_PCB)]	/* r7 = new PCB */
 
 	/*
 	 * Registers at this point
 	 *   r4  = current cpu id
 	 *   r7  = new PCB
 	 *   r8  = current pcpu
 	 *   r11 = newtd
 	 */
 
 	/* MMU switch to new thread. */
 	ldr	r0, [r7, #(PCB_PAGEDIR)]
 #ifdef INVARIANTS
 	cmp	r0, #0			/* new thread? */
 	beq	badsw4			/* no, panic */
 #endif
 	bl	_C_LABEL(cpu_context_switch)
 
 	/*
 	 * Set new PMAP as current one.
 	 * Insert cpu to new active list.
 	 */
 
 	ldr	r6, [r11, #(TD_PROC)]	/* newtd->proc */
 	ldr	r6, [r6, #(P_VMSPACE)]	/* newtd->proc->vmspace */
 	add	r6, #VM_PMAP		/* newtd->proc->vmspace->pmap */
 	str	r6, [r8, #PC_CURPMAP]	/* store to curpmap */
 
 	mov	r0, #PM_ACTIVE
 	add	r6, r0			/* r6 = new pm_active */
 
 	/* compute position and mask */
 #if _NCPUWORDS > 1
 	lsr	r0, r4, #3
 	bic	r0, #3
 	add	r6, r0			/* r6 = position in new pm_active */
 	mov	r2, #1
 	and	r0, r4, #31
 	lsl	r2, r0			/* r2 = mask */
 #else
 	mov	r2, #1
 	lsl	r2, r4 			/* r2 = mask */
 #endif
 	/* Set cpu to new active list. */
 #ifdef SMP
 1:	ldrex	r0, [r6]
 	orr	r0, r2
 	strex	r1, r0, [r6]
 	teq	r1, #0
 	bne	1b
 #else
 	ldr	r0, [r6]
 	orr	r0, r2
 	str	r0, [r6]
 #endif
 	/*
 	 * Registers at this point.
 	 *   r7  = new PCB
 	 *   r8  = current pcpu
 	 *   r11 = newtd
 	 * They must match the ones in sw1 position !!!
 	 */
 	DMB
 	b	sw1	/* share new thread init with cpu_switch() */
 END(cpu_throw)
 
 /*
  * cpu_switch(oldtd, newtd, lock)
  *
  * Save the current thread state, then select the next thread to run
  * and load its state.
  * r0 = oldtd
  * r1 = newtd
  * r2 = lock (new lock for old thread)
  */
 ENTRY(cpu_switch)
 	/* Interrupts are disabled. */
 #ifdef INVARIANTS
 	cmp	r0, #0			/* old thread? */
 	beq	badsw2			/* no, panic */
 #endif
 	/* Save all the registers in the old thread's pcb. */
 	ldr	r3, [r0, #(TD_PCB)]
 	add	r3, #(PCB_R4)
 	stmia	r3, {r4-r12, sp, lr, pc}
 	mrc	CP15_TPIDRURW(r4)
 	str	r4, [r3, #(PCB_TPIDRURW - PCB_R4)]
 
 #ifdef INVARIANTS
 	cmp	r1, #0			/* new thread? */
 	beq	badsw3			/* no, panic */
 #endif
 	/*
 	 * Save arguments. Note that we can now use r0-r14 until
 	 * it is time to restore them for the new thread. However,
 	 * some registers are not safe over function call.
 	 */
 	mov	r9, r2			/* r9 = lock */
 	mov	r10, r0			/* r10 = oldtd */
 	mov	r11, r1			/* r11 = newtd */
 
 	GET_PCPU(r8, r3)		/* r8 = current PCPU */
 	ldr	r7, [r11, #(TD_PCB)]	/* r7 = newtd->td_pcb */
 
 
 
 #ifdef VFP
 	ldr	r3, [r10, #(TD_PCB)]
 	fmrx	r0, fpexc		/* If the VFP is enabled */
 	tst	r0, #(VFPEXC_EN)	/* the current thread has */
 	movne	r1, #1			/* used it, so go save */
 	addne	r0, r3, #(PCB_VFPSTATE)	/* the state into the PCB */
 	blne	_C_LABEL(vfp_store)	/* and disable the VFP. */
 #endif
 
 	/*
 	 * MMU switch. If we're switching to a thread with the same
 	 * address space as the outgoing one, we can skip the MMU switch.
 	 */
 	mrc	CP15_TTBR0(r1)		/* r1 = old TTB */
 	ldr	r0, [r7, #(PCB_PAGEDIR)] /* r0 = new TTB */
 	cmp	r0, r1			/* Switching to the TTB? */
 	beq	sw0			/* same TTB, skip */
 
 #ifdef INVARIANTS
 	cmp	r0, #0			/* new thread? */
 	beq	badsw4			/* no, panic */
 #endif
 
 	bl	cpu_context_switch	/* new TTB as argument */
 
 	/*
 	 * Registers at this point
 	 *   r7  = new PCB
 	 *   r8  = current pcpu
 	 *   r9  = lock
 	 *   r10 = oldtd
 	 *   r11 = newtd
 	 */
 
 	/*
 	 * Set new PMAP as current one.
 	 * Update active list on PMAPs.
 	 */
 	ldr	r6, [r11, #TD_PROC]	/* newtd->proc */
 	ldr	r6, [r6, #P_VMSPACE]	/* newtd->proc->vmspace */
 	add	r6, #VM_PMAP		/* newtd->proc->vmspace->pmap */
 
 	ldr	r5, [r8, #PC_CURPMAP]	/* get old curpmap */
 	str	r6, [r8, #PC_CURPMAP]	/* and save new one */
 
 	mov	r0, #PM_ACTIVE
 	add	r5, r0			/* r5 = old pm_active */
 	add	r6, r0			/* r6 = new pm_active */
 
 	/* Compute position and mask. */
 	ldr	r4, [r8, #PC_CPUID]
 #if _NCPUWORDS > 1
 	lsr	r0, r4, #3
 	bic	r0, #3
 	add	r5, r0			/* r5 = position in old pm_active */
 	add	r6, r0			/* r6 = position in new pm_active */
 	mov	r2, #1
 	and	r0, r4, #31
 	lsl	r2, r0			/* r2 = mask */
 #else
 	mov	r2, #1
 	lsl	r2, r4			/* r2 = mask */
 #endif
 	/* Clear cpu from old active list. */
 #ifdef SMP
 1:	ldrex	r0, [r5]
 	bic	r0, r2
 	strex	r1, r0, [r5]
 	teq	r1, #0
 	bne	1b
 #else
 	ldr	r0, [r5]
 	bic	r0, r2
 	str	r0, [r5]
 #endif
 	/* Set cpu to new active list. */
 #ifdef SMP
 1:	ldrex	r0, [r6]
 	orr	r0, r2
 	strex	r1, r0, [r6]
 	teq	r1, #0
 	bne	1b
 #else
 	ldr	r0, [r6]
 	orr	r0, r2
 	str	r0, [r6]
 #endif
 
 sw0:
 	/*
 	 * Registers at this point
 	 *   r7  = new PCB
 	 *   r8  = current pcpu
 	 *   r9  = lock
 	 *   r10 = oldtd
 	 *   r11 = newtd
 	 */
 
 	/* Change the old thread lock. */
 	add	r5, r10, #TD_LOCK
 	DMB
 1:	ldrex	r0, [r5]
 	strex	r1, r9, [r5]
 	teq	r1, #0
 	bne	1b
 	DMB
 
 sw1:
 	clrex
 	/*
 	 * Registers at this point
 	 *   r7  = new PCB
 	 *   r8  = current pcpu
 	 *   r11 = newtd
 	 */
 
 #if defined(SMP) && defined(SCHED_ULE)
 	/*
 	 * 386 and amd64 do the blocked lock test only for SMP and SCHED_ULE
 	 * QQQ: What does it mean in reality and why is it done?
 	 */
 	ldr	r6, =blocked_lock
 1:
 	ldr	r3, [r11, #TD_LOCK]	/* atomic write regular read */
 	cmp	r3, r6
 	beq	1b
 #endif
 
 	/* We have a new curthread now so make a note it */
 	str	r11, [r8, #PC_CURTHREAD]
 	mcr	CP15_TPIDRPRW(r11)
 
 	/* store pcb in per cpu structure */
 	str	r7, [r8, #PC_CURPCB]
 
 	/*
 	 * Restore all saved registers and return. Note that some saved
 	 * registers can be changed when either cpu_fork(), cpu_copy_thread(),
 	 * cpu_fork_kthread_handler(), or makectx() was called.
 	 *
 	 * The value of TPIDRURW is also written into TPIDRURO, as
 	 * userspace still uses TPIDRURO, modifying it through
 	 * sysarch(ARM_SET_TP, addr).
 	 */
 	ldr	r3, [r7, #PCB_TPIDRURW]
 	mcr	CP15_TPIDRURW(r3)	/* write tls thread reg 2 */
 	mcr	CP15_TPIDRURO(r3)	/* write tls thread reg 3 */
 	add	r3, r7, #PCB_R4
 	ldmia	r3, {r4-r12, sp, pc}
 
 #ifdef INVARIANTS
 badsw1:
 	ldr	r0, =sw1_panic_str
 	bl	_C_LABEL(panic)
 1:	nop
 	b	1b
 
 badsw2:
 	ldr	r0, =sw2_panic_str
 	bl	_C_LABEL(panic)
 1:	nop
 	b	1b
 
 badsw3:
 	ldr	r0, =sw3_panic_str
 	bl	_C_LABEL(panic)
 1:	nop
 	b	1b
 
 badsw4:
 	ldr	r0, =sw4_panic_str
 	bl	_C_LABEL(panic)
 1:	nop
 	b	1b
 
 sw1_panic_str:
 	.asciz	"cpu_throw: no newthread supplied.\n"
 sw2_panic_str:
 	.asciz	"cpu_switch: no curthread supplied.\n"
 sw3_panic_str:
 	.asciz	"cpu_switch: no newthread supplied.\n"
 sw4_panic_str:
 	.asciz	"cpu_switch: new pagedir is NULL.\n"
 #endif
 END(cpu_switch)
Index: head/sys/arm/arm/trap-v6.c
===================================================================
--- head/sys/arm/arm/trap-v6.c	(revision 328466)
+++ head/sys/arm/arm/trap-v6.c	(revision 328467)
@@ -1,647 +1,662 @@
 /*-
  * Copyright 2014 Olivier Houchard <cognet@FreeBSD.org>
  * Copyright 2014 Svatopluk Kraus <onwahe@gmail.com>
  * Copyright 2014 Michal Meloun <meloun@miracle.cz>
  * Copyright 2014 Andrew Turner <andrew@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_ktrace.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/signalvar.h>
 #include <sys/ktr.h>
 #include <sys/vmmeter.h>
 #ifdef KTRACE
 #include <sys/uio.h>
 #include <sys/ktrace.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_param.h>
 
 #include <machine/cpu.h>
 #include <machine/frame.h>
 #include <machine/machdep.h>
 #include <machine/pcb.h>
 
 #ifdef KDB
 #include <sys/kdb.h>
 #include <machine/db_machdep.h>
 #endif
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 #endif
 
 extern char cachebailout[];
 
 #ifdef DEBUG
 int last_fault_code;	/* For the benefit of pmap_fault_fixup() */
 #endif
 
 struct ksig {
 	int sig;
 	u_long code;
 	vm_offset_t	addr;
 };
 
 typedef int abort_func_t(struct trapframe *, u_int, u_int, u_int, u_int,
     struct thread *, struct ksig *);
 
 static abort_func_t abort_fatal;
 static abort_func_t abort_align;
 static abort_func_t abort_icache;
 
 struct abort {
 	abort_func_t	*func;
 	const char	*desc;
 };
 
 /*
  * How are the aborts handled?
  *
  * Undefined Code:
  *  - Always fatal as we do not know what does it mean.
  * Imprecise External Abort:
  *  - Always fatal, but can be handled somehow in the future.
  *    Now, due to PCIe buggy hardware, ignored.
  * Precise External Abort:
  *  - Always fatal, but who knows in the future???
  * Debug Event:
  *  - Special handling.
  * External Translation Abort (L1 & L2)
  *  - Always fatal as something is screwed up in page tables or hardware.
  * Domain Fault (L1 & L2):
  *  - Always fatal as we do not play game with domains.
  * Alignment Fault:
  *  - Everything should be aligned in kernel with exception of user to kernel
  *    and vice versa data copying, so if pcb_onfault is not set, it's fatal.
  *    We generate signal in case of abort from user mode.
  * Instruction cache maintenance:
  *  - According to manual, this is translation fault during cache maintenance
  *    operation. So, it could be really complex in SMP case and fuzzy too
  *    for cache operations working on virtual addresses. For now, we will
  *    consider this abort as fatal. In fact, no cache maintenance on
  *    not mapped virtual addresses should be called. As cache maintenance
  *    operation (except DMB, DSB, and Flush Prefetch Buffer) are priviledged,
  *    the abort is fatal for user mode as well for now. (This is good place to
  *    note that cache maintenance on virtual address fill TLB.)
  * Acces Bit (L1 & L2):
  *  - Fast hardware emulation for kernel and user mode.
  * Translation Fault (L1 & L2):
  *  - Standard fault mechanism is held including vm_fault().
  * Permission Fault (L1 & L2):
  *  - Fast hardware emulation of modify bits and in other cases, standard
  *    fault mechanism is held including vm_fault().
  */
 
 static const struct abort aborts[] = {
 	{abort_fatal,	"Undefined Code (0x000)"},
 	{abort_align,	"Alignment Fault"},
 	{abort_fatal,	"Debug Event"},
 	{NULL,		"Access Bit (L1)"},
 	{NULL,		"Instruction cache maintenance"},
 	{NULL,		"Translation Fault (L1)"},
 	{NULL,		"Access Bit (L2)"},
 	{NULL,		"Translation Fault (L2)"},
 
 	{abort_fatal,	"External Abort"},
 	{abort_fatal,	"Domain Fault (L1)"},
 	{abort_fatal,	"Undefined Code (0x00A)"},
 	{abort_fatal,	"Domain Fault (L2)"},
 	{abort_fatal,	"External Translation Abort (L1)"},
 	{NULL,		"Permission Fault (L1)"},
 	{abort_fatal,	"External Translation Abort (L2)"},
 	{NULL,		"Permission Fault (L2)"},
 
 	{abort_fatal,	"TLB Conflict Abort"},
 	{abort_fatal,	"Undefined Code (0x401)"},
 	{abort_fatal,	"Undefined Code (0x402)"},
 	{abort_fatal,	"Undefined Code (0x403)"},
 	{abort_fatal,	"Undefined Code (0x404)"},
 	{abort_fatal,	"Undefined Code (0x405)"},
 	{abort_fatal,	"Asynchronous External Abort"},
 	{abort_fatal,	"Undefined Code (0x407)"},
 
 	{abort_fatal,	"Asynchronous Parity Error on Memory Access"},
 	{abort_fatal,	"Parity Error on Memory Access"},
 	{abort_fatal,	"Undefined Code (0x40A)"},
 	{abort_fatal,	"Undefined Code (0x40B)"},
 	{abort_fatal,	"Parity Error on Translation (L1)"},
 	{abort_fatal,	"Undefined Code (0x40D)"},
 	{abort_fatal,	"Parity Error on Translation (L2)"},
 	{abort_fatal,	"Undefined Code (0x40F)"}
 };
 
 static __inline void
 call_trapsignal(struct thread *td, int sig, int code, vm_offset_t addr)
 {
 	ksiginfo_t ksi;
 
 	CTR4(KTR_TRAP, "%s: addr: %#x, sig: %d, code: %d",
 	   __func__, addr, sig, code);
 
 	/*
 	 * TODO: some info would be nice to know
 	 * if we are serving data or prefetch abort.
 	 */
 
 	ksiginfo_init_trap(&ksi);
 	ksi.ksi_signo = sig;
 	ksi.ksi_code = code;
 	ksi.ksi_addr = (void *)addr;
 	trapsignal(td, &ksi);
 }
 
 /*
  * abort_imprecise() handles the following abort:
  *
  *  FAULT_EA_IMPREC - Imprecise External Abort
  *
  * The imprecise means that we don't know where the abort happened,
  * thus FAR is undefined. The abort should not never fire, but hot
  * plugging or accidental hardware failure can be the cause of it.
  * If the abort happens, it can even be on different (thread) context.
  * Without any additional support, the abort is fatal, as we do not
  * know what really happened.
  *
  * QQQ: Some additional functionality, like pcb_onfault but global,
  *      can be implemented. Imprecise handlers could be registered
  *      which tell us if the abort is caused by something they know
  *      about. They should return one of three codes like:
  *		FAULT_IS_MINE,
  *		FAULT_CAN_BE_MINE,
  *		FAULT_IS_NOT_MINE.
  *      The handlers should be called until some of them returns
  *      FAULT_IS_MINE value or all was called. If all handlers return
  *	FAULT_IS_NOT_MINE value, then the abort is fatal.
  */
 static __inline void
 abort_imprecise(struct trapframe *tf, u_int fsr, u_int prefetch, bool usermode)
 {
 
 	/*
 	 * XXX - We can got imprecise abort as result of access
 	 * to not-present PCI/PCIe configuration space.
 	 */
 #if 0
 	goto out;
 #endif
 	abort_fatal(tf, FAULT_EA_IMPREC, fsr, 0, prefetch, curthread, NULL);
 
 	/*
 	 * Returning from this function means that we ignore
 	 * the abort for good reason. Note that imprecise abort
 	 * could fire any time even in user mode.
 	 */
 
 #if 0
 out:
 	if (usermode)
 		userret(curthread, tf);
 #endif
 }
 
 /*
  * abort_debug() handles the following abort:
  *
  *  FAULT_DEBUG - Debug Event
  *
  */
 static __inline void
 abort_debug(struct trapframe *tf, u_int fsr, u_int prefetch, bool usermode,
     u_int far)
 {
 
 	if (usermode) {
 		struct thread *td;
 
 		td = curthread;
 		call_trapsignal(td, SIGTRAP, TRAP_BRKPT, far);
 		userret(td, tf);
 	} else {
 #ifdef KDB
 		kdb_trap((prefetch) ? T_BREAKPOINT : T_WATCHPOINT, 0, tf);
 #else
 		printf("No debugger in kernel.\n");
 #endif
 	}
 }
 
 /*
  * Abort handler.
  *
  * FAR, FSR, and everything what can be lost after enabling
  * interrupts must be grabbed before the interrupts will be
  * enabled. Note that when interrupts will be enabled, we
  * could even migrate to another CPU ...
  *
  * TODO: move quick cases to ASM
  */
 void
 abort_handler(struct trapframe *tf, int prefetch)
 {
 	struct thread *td;
 	vm_offset_t far, va;
 	int idx, rv;
 	uint32_t fsr;
 	struct ksig ksig;
 	struct proc *p;
 	struct pcb *pcb;
 	struct vm_map *map;
 	struct vmspace *vm;
 	vm_prot_t ftype;
 	bool usermode;
+	int bp_harden;
 #ifdef INVARIANTS
 	void *onfault;
 #endif
 
 	VM_CNT_INC(v_trap);
 	td = curthread;
 
 	fsr = (prefetch) ? cp15_ifsr_get(): cp15_dfsr_get();
 #if __ARM_ARCH >= 7
 	far = (prefetch) ? cp15_ifar_get() : cp15_dfar_get();
 #else
 	far = (prefetch) ? TRAPF_PC(tf) : cp15_dfar_get();
 #endif
 
 	idx = FSR_TO_FAULT(fsr);
 	usermode = TRAPF_USERMODE(tf);	/* Abort came from user mode? */
+
+	/*
+	 * Apply BP hardening by flushing the branch prediction cache
+	 * for prefaults on kernel addresses.
+	 */
+	if (__predict_false(prefetch && far > VM_MAXUSER_ADDRESS &&
+	    (idx == FAULT_TRAN_L2 || idx == FAULT_PERM_L2))) {
+		bp_harden = PCPU_GET(bp_harden_kind);
+		if (bp_harden == PCPU_BP_HARDEN_KIND_BPIALL)
+			_CP15_BPIALL();
+		else if (bp_harden == PCPU_BP_HARDEN_KIND_ICIALLU)
+			_CP15_ICIALLU();
+	}
+
 	if (usermode)
 		td->td_frame = tf;
 
 	CTR6(KTR_TRAP, "%s: fsr %#x (idx %u) far %#x prefetch %u usermode %d",
 	    __func__, fsr, idx, far, prefetch, usermode);
 
 	/*
 	 * Firstly, handle aborts that are not directly related to mapping.
 	 */
 	if (__predict_false(idx == FAULT_EA_IMPREC)) {
 		abort_imprecise(tf, fsr, prefetch, usermode);
 		return;
 	}
 
 	if (__predict_false(idx == FAULT_DEBUG)) {
 		abort_debug(tf, fsr, prefetch, usermode, far);
 		return;
 	}
 
 	/*
 	 * ARM has a set of unprivileged load and store instructions
 	 * (LDRT/LDRBT/STRT/STRBT ...) which are supposed to be used in other
 	 * than user mode and OS should recognize their aborts and behave
 	 * appropriately. However, there is no way how to do that reasonably
 	 * in general unless we restrict the handling somehow.
 	 *
 	 * For now, these instructions are used only in copyin()/copyout()
 	 * like functions where usermode buffers are checked in advance that
 	 * they are not from KVA space. Thus, no action is needed here.
 	 */
 
 	/*
 	 * (1) Handle access and R/W hardware emulation aborts.
 	 * (2) Check that abort is not on pmap essential address ranges.
 	 *     There is no way how to fix it, so we don't even try.
 	 */
 	rv = pmap_fault(PCPU_GET(curpmap), far, fsr, idx, usermode);
 	if (rv == KERN_SUCCESS)
 		return;
 #ifdef KDB
 	if (kdb_active) {
 		kdb_reenter();
 		goto out;
 	}
 #endif
 	if (rv == KERN_INVALID_ADDRESS)
 		goto nogo;
 
 	if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
 		/*
 		 * Due to both processor errata and lazy TLB invalidation when
 		 * access restrictions are removed from virtual pages, memory
 		 * accesses that are allowed by the physical mapping layer may
 		 * nonetheless cause one spurious page fault per virtual page.
 		 * When the thread is executing a "no faulting" section that
 		 * is bracketed by vm_fault_{disable,enable}_pagefaults(),
 		 * every page fault is treated as a spurious page fault,
 		 * unless it accesses the same virtual address as the most
 		 * recent page fault within the same "no faulting" section.
 		 */
 		if (td->td_md.md_spurflt_addr != far ||
 		    (td->td_pflags & TDP_RESETSPUR) != 0) {
 			td->td_md.md_spurflt_addr = far;
 			td->td_pflags &= ~TDP_RESETSPUR;
 
 			tlb_flush_local(far & ~PAGE_MASK);
 			return;
 		}
 	} else {
 		/*
 		 * If we get a page fault while in a critical section, then
 		 * it is most likely a fatal kernel page fault.  The kernel
 		 * is already going to panic trying to get a sleep lock to
 		 * do the VM lookup, so just consider it a fatal trap so the
 		 * kernel can print out a useful trap message and even get
 		 * to the debugger.
 		 *
 		 * If we get a page fault while holding a non-sleepable
 		 * lock, then it is most likely a fatal kernel page fault.
 		 * If WITNESS is enabled, then it's going to whine about
 		 * bogus LORs with various VM locks, so just skip to the
 		 * fatal trap handling directly.
 		 */
 		if (td->td_critnest != 0 ||
 		    WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
 		    "Kernel page fault") != 0) {
 			abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig);
 			return;
 		}
 	}
 
 	/* Re-enable interrupts if they were enabled previously. */
 	if (td->td_md.md_spinlock_count == 0) {
 		if (__predict_true(tf->tf_spsr & PSR_I) == 0)
 			enable_interrupts(PSR_I);
 		if (__predict_true(tf->tf_spsr & PSR_F) == 0)
 			enable_interrupts(PSR_F);
 	}
 
 	p = td->td_proc;
 	if (usermode) {
 		td->td_pticks = 0;
 		if (td->td_cowgen != p->p_cowgen)
 			thread_cow_update(td);
 	}
 
 	/* Invoke the appropriate handler, if necessary. */
 	if (__predict_false(aborts[idx].func != NULL)) {
 		if ((aborts[idx].func)(tf, idx, fsr, far, prefetch, td, &ksig))
 			goto do_trapsignal;
 		goto out;
 	}
 
 	/*
 	 * At this point, we're dealing with one of the following aborts:
 	 *
 	 *  FAULT_ICACHE   - I-cache maintenance
 	 *  FAULT_TRAN_xx  - Translation
 	 *  FAULT_PERM_xx  - Permission
 	 */
 
 	/*
 	 * Don't pass faulting cache operation to vm_fault(). We don't want
 	 * to handle all vm stuff at this moment.
 	 */
 	pcb = td->td_pcb;
 	if (__predict_false(pcb->pcb_onfault == cachebailout)) {
 		tf->tf_r0 = far;		/* return failing address */
 		tf->tf_pc = (register_t)pcb->pcb_onfault;
 		return;
 	}
 
 	/* Handle remaining I-cache aborts. */
 	if (idx == FAULT_ICACHE) {
 		if (abort_icache(tf, idx, fsr, far, prefetch, td, &ksig))
 			goto do_trapsignal;
 		goto out;
 	}
 
 	va = trunc_page(far);
 	if (va >= KERNBASE) {
 		/*
 		 * Don't allow user-mode faults in kernel address space.
 		 */
 		if (usermode)
 			goto nogo;
 
 		map = kernel_map;
 	} else {
 		/*
 		 * This is a fault on non-kernel virtual memory. If curproc
 		 * is NULL or curproc->p_vmspace is NULL the fault is fatal.
 		 */
 		vm = (p != NULL) ? p->p_vmspace : NULL;
 		if (vm == NULL)
 			goto nogo;
 
 		map = &vm->vm_map;
 		if (!usermode && (td->td_intr_nesting_level != 0 ||
 		    pcb->pcb_onfault == NULL)) {
 			abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig);
 			return;
 		}
 	}
 
 	ftype = (fsr & FSR_WNR) ? VM_PROT_WRITE : VM_PROT_READ;
 	if (prefetch)
 		ftype |= VM_PROT_EXECUTE;
 
 #ifdef DEBUG
 	last_fault_code = fsr;
 #endif
 
 #ifdef INVARIANTS
 	onfault = pcb->pcb_onfault;
 	pcb->pcb_onfault = NULL;
 #endif
 
 	/* Fault in the page. */
 	rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
 
 #ifdef INVARIANTS
 	pcb->pcb_onfault = onfault;
 #endif
 
 	if (__predict_true(rv == KERN_SUCCESS))
 		goto out;
 nogo:
 	if (!usermode) {
 		if (td->td_intr_nesting_level == 0 &&
 		    pcb->pcb_onfault != NULL) {
 			tf->tf_r0 = rv;
 			tf->tf_pc = (int)pcb->pcb_onfault;
 			return;
 		}
 		CTR2(KTR_TRAP, "%s: vm_fault() failed with %d", __func__, rv);
 		abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig);
 		return;
 	}
 
 	ksig.sig = SIGSEGV;
 	ksig.code = (rv == KERN_PROTECTION_FAILURE) ? SEGV_ACCERR : SEGV_MAPERR;
 	ksig.addr = far;
 
 do_trapsignal:
 	call_trapsignal(td, ksig.sig, ksig.code, ksig.addr);
 out:
 	if (usermode)
 		userret(td, tf);
 }
 
 /*
  * abort_fatal() handles the following data aborts:
  *
  *  FAULT_DEBUG		- Debug Event
  *  FAULT_ACCESS_xx	- Acces Bit
  *  FAULT_EA_PREC	- Precise External Abort
  *  FAULT_DOMAIN_xx	- Domain Fault
  *  FAULT_EA_TRAN_xx	- External Translation Abort
  *  FAULT_EA_IMPREC	- Imprecise External Abort
  *  + all undefined codes for ABORT
  *
  * We should never see these on a properly functioning system.
  *
  * This function is also called by the other handlers if they
  * detect a fatal problem.
  *
  * Note: If 'l' is NULL, we assume we're dealing with a prefetch abort.
  */
 static int
 abort_fatal(struct trapframe *tf, u_int idx, u_int fsr, u_int far,
     u_int prefetch, struct thread *td, struct ksig *ksig)
 {
 	bool usermode;
 	const char *mode;
 	const char *rw_mode;
 
 	usermode = TRAPF_USERMODE(tf);
 #ifdef KDTRACE_HOOKS
 	if (!usermode) {
 		if (dtrace_trap_func != NULL && (*dtrace_trap_func)(tf, far))
 			return (0);
 	}
 #endif
 
 	mode = usermode ? "user" : "kernel";
 	rw_mode  = fsr & FSR_WNR ? "write" : "read";
 	disable_interrupts(PSR_I|PSR_F);
 
 	if (td != NULL) {
 		printf("Fatal %s mode data abort: '%s' on %s\n", mode,
 		    aborts[idx].desc, rw_mode);
 		printf("trapframe: %p\nFSR=%08x, FAR=", tf, fsr);
 		if (idx != FAULT_EA_IMPREC)
 			printf("%08x, ", far);
 		else
 			printf("Invalid,  ");
 		printf("spsr=%08x\n", tf->tf_spsr);
 	} else {
 		printf("Fatal %s mode prefetch abort at 0x%08x\n",
 		    mode, tf->tf_pc);
 		printf("trapframe: %p, spsr=%08x\n", tf, tf->tf_spsr);
 	}
 
 	printf("r0 =%08x, r1 =%08x, r2 =%08x, r3 =%08x\n",
 	    tf->tf_r0, tf->tf_r1, tf->tf_r2, tf->tf_r3);
 	printf("r4 =%08x, r5 =%08x, r6 =%08x, r7 =%08x\n",
 	    tf->tf_r4, tf->tf_r5, tf->tf_r6, tf->tf_r7);
 	printf("r8 =%08x, r9 =%08x, r10=%08x, r11=%08x\n",
 	    tf->tf_r8, tf->tf_r9, tf->tf_r10, tf->tf_r11);
 	printf("r12=%08x, ", tf->tf_r12);
 
 	if (usermode)
 		printf("usp=%08x, ulr=%08x",
 		    tf->tf_usr_sp, tf->tf_usr_lr);
 	else
 		printf("ssp=%08x, slr=%08x",
 		    tf->tf_svc_sp, tf->tf_svc_lr);
 	printf(", pc =%08x\n\n", tf->tf_pc);
 
 #ifdef KDB
 	if (debugger_on_panic || kdb_active)
 		kdb_trap(fsr, 0, tf);
 #endif
 	panic("Fatal abort");
 	/*NOTREACHED*/
 }
 
 /*
  * abort_align() handles the following data abort:
  *
  *  FAULT_ALIGN - Alignment fault
  *
  * Everything should be aligned in kernel with exception of user to kernel 
  * and vice versa data copying, so if pcb_onfault is not set, it's fatal.
  * We generate signal in case of abort from user mode.
  */
 static int
 abort_align(struct trapframe *tf, u_int idx, u_int fsr, u_int far,
     u_int prefetch, struct thread *td, struct ksig *ksig)
 {
 	bool usermode;
 
 	usermode = TRAPF_USERMODE(tf);
 	if (!usermode) {
 		if (td->td_intr_nesting_level == 0 && td != NULL &&
 		    td->td_pcb->pcb_onfault != NULL) {
 			tf->tf_r0 = EFAULT;
 			tf->tf_pc = (int)td->td_pcb->pcb_onfault;
 			return (0);
 		}
 		abort_fatal(tf, idx, fsr, far, prefetch, td, ksig);
 	}
 	/* Deliver a bus error signal to the process */
 	ksig->code = BUS_ADRALN;
 	ksig->sig = SIGBUS;
 	ksig->addr = far;
 	return (1);
 }
 
 /*
  * abort_icache() handles the following data abort:
  *
  * FAULT_ICACHE - Instruction cache maintenance
  *
  * According to manual, FAULT_ICACHE is translation fault during cache
  * maintenance operation. In fact, no cache maintenance operation on
  * not mapped virtual addresses should be called. As cache maintenance
  * operation (except DMB, DSB, and Flush Prefetch Buffer) are priviledged,
  * the abort is concider as fatal for now. However, all the matter with
  * cache maintenance operation on virtual addresses could be really complex
  * and fuzzy in SMP case, so maybe in future standard fault mechanism
  * should be held here including vm_fault() calling.
  */
 static int
 abort_icache(struct trapframe *tf, u_int idx, u_int fsr, u_int far,
     u_int prefetch, struct thread *td, struct ksig *ksig)
 {
 
 	abort_fatal(tf, idx, fsr, far, prefetch, td, ksig);
 	return(0);
 }
Index: head/sys/arm/include/cpuinfo.h
===================================================================
--- head/sys/arm/include/cpuinfo.h	(revision 328466)
+++ head/sys/arm/include/cpuinfo.h	(revision 328467)
@@ -1,130 +1,132 @@
 /*-
  * Copyright 2014 Svatopluk Kraus <onwahe@gmail.com>
  * Copyright 2014 Michal Meloun <meloun@miracle.cz>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_CPUINFO_H_
 #define	_MACHINE_CPUINFO_H_
 
 #include <sys/types.h>
 
 #define CPU_IMPLEMENTER_ARM		0x41
 #define CPU_IMPLEMENTER_QCOM		0x51
 #define CPU_IMPLEMENTER_MRVL		0x56
 
 /* ARM */
 #define CPU_ARCH_ARM1176		0xB76
 #define CPU_ARCH_CORTEX_A5		0xC05
 #define CPU_ARCH_CORTEX_A7		0xC07
 #define CPU_ARCH_CORTEX_A8		0xC08
 #define CPU_ARCH_CORTEX_A9		0xC09
 #define CPU_ARCH_CORTEX_A12		0xC0D
 #define CPU_ARCH_CORTEX_A15		0xC0F
 #define CPU_ARCH_CORTEX_A17		0xC11
 #define CPU_ARCH_CORTEX_A53		0xD03
 #define CPU_ARCH_CORTEX_A57		0xD07
 #define CPU_ARCH_CORTEX_A72		0xD08
 #define CPU_ARCH_CORTEX_A73		0xD09
+#define CPU_ARCH_CORTEX_A75		0xD0A
 
 
 /* QCOM */
 #define CPU_ARCH_KRAIT_300		0x06F
 
 /* MRVL */
 #define CPU_ARCH_SHEEVA_581		0x581	/* PJ4/PJ4B */
 #define CPU_ARCH_SHEEVA_584		0x584 	/* PJ4B-MP/PJ4C */
 
 struct cpuinfo {
 	/* raw id registers */
 	uint32_t midr;
 	uint32_t ctr;
 	uint32_t tcmtr;
 	uint32_t tlbtr;
 	uint32_t mpidr;
 	uint32_t revidr;
 	uint32_t id_pfr0;
 	uint32_t id_pfr1;
 	uint32_t id_dfr0;
 	uint32_t id_afr0;
 	uint32_t id_mmfr0;
 	uint32_t id_mmfr1;
 	uint32_t id_mmfr2;
 	uint32_t id_mmfr3;
 	uint32_t id_isar0;
 	uint32_t id_isar1;
 	uint32_t id_isar2;
 	uint32_t id_isar3;
 	uint32_t id_isar4;
 	uint32_t id_isar5;
 	uint32_t cbar;
 	uint32_t ccsidr;
 	uint32_t clidr;
 
 	/* Parsed bits of above registers... */
 
 	/* midr */
 	int implementer;
 	int revision;
 	int architecture;
 	int part_number;
 	int patch;
 
 	/* id_mmfr0 */
 	int outermost_shareability;
 	int shareability_levels;
 	int auxiliary_registers;
 	int innermost_shareability;
 
 	/* id_mmfr1 */
 	int mem_barrier;
 
 	/* id_mmfr3 */
 	int coherent_walk;
 	int maintenance_broadcast;
 
 	/* id_pfr1 */
 	int generic_timer_ext;
 	int virtualization_ext;
 	int security_ext;
 
 	/* L1 cache info */
 	int dcache_line_size;
 	int dcache_line_mask;
 	int icache_line_size;
 	int icache_line_mask;
 
 	/* mpidr */
 	int mp_ext;
 };
 
 extern struct cpuinfo cpuinfo;
 
 void cpuinfo_init(void);
 #if __ARM_ARCH >= 6
+void cpuinfo_init_bp_hardening(void);
 void cpuinfo_reinit_mmu(uint32_t ttb);
 #endif
 #endif	/* _MACHINE_CPUINFO_H_ */
Index: head/sys/arm/include/pcpu.h
===================================================================
--- head/sys/arm/include/pcpu.h	(revision 328466)
+++ head/sys/arm/include/pcpu.h	(revision 328467)
@@ -1,148 +1,154 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1999 Luoqi Chen <luoqi@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: FreeBSD: src/sys/i386/include/globaldata.h,v 1.27 2001/04/27
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_PCPU_H_
 #define	_MACHINE_PCPU_H_
 
 #ifdef _KERNEL
 
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 
 #define	ALT_STACK_SIZE	128
 
 struct vmspace;
 
 #endif	/* _KERNEL */
 
 #if __ARM_ARCH >= 6
+/* Branch predictor hardening method */
+#define PCPU_BP_HARDEN_KIND_NONE		0
+#define PCPU_BP_HARDEN_KIND_BPIALL		1
+#define PCPU_BP_HARDEN_KIND_ICIALLU		2
 
 #define PCPU_MD_FIELDS							\
 	unsigned int pc_vfpsid;						\
 	unsigned int pc_vfpmvfr0;					\
 	unsigned int pc_vfpmvfr1;					\
 	struct pmap *pc_curpmap;					\
 	struct mtx pc_cmap_lock;					\
 	void *pc_cmap1_pte2p;						\
 	void *pc_cmap2_pte2p;						\
 	caddr_t pc_cmap1_addr;						\
 	caddr_t pc_cmap2_addr;						\
 	vm_offset_t pc_qmap_addr;					\
 	void *pc_qmap_pte2p;						\
 	unsigned int pc_dbreg[32];					\
 	int pc_dbreg_cmd;						\
-	char __pad[155]
+	int pc_bp_harden_kind;						\
+	uint32_t pc_original_actlr;					\
+	char __pad[147]
 #else
 #define PCPU_MD_FIELDS							\
 	char __pad[93]
 #endif
 
 #ifdef _KERNEL
 
 #define	PC_DBREG_CMD_NONE	0
 #define	PC_DBREG_CMD_LOAD	1
 
 struct pcb;
 struct pcpu;
 
 extern struct pcpu *pcpup;
 
 #if __ARM_ARCH >= 6
 #define CPU_MASK (0xf)
 
 #ifndef SMP
 #define get_pcpu() (pcpup)
 #else
 #define get_pcpu() __extension__ ({			  		\
     	int id;								\
         __asm __volatile("mrc p15, 0, %0, c0, c0, 5" : "=r" (id));	\
     	(pcpup + (id & CPU_MASK));					\
     })
 #endif
 
 static inline struct thread *
 get_curthread(void)
 {
 	void *ret;
 
 	__asm __volatile("mrc p15, 0, %0, c13, c0, 4" : "=r" (ret));
 	return (ret);
 }
 
 static inline void
 set_curthread(struct thread *td)
 {
 
 	__asm __volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (td));
 }
 
 
 static inline void *
 get_tls(void)
 {
 	void *tls;
 
 	/* TPIDRURW contains the authoritative value. */
 	__asm __volatile("mrc p15, 0, %0, c13, c0, 2" : "=r" (tls));
 	return (tls);
 }
 
 static inline void
 set_tls(void *tls)
 {
 
 	/*
 	 * Update both TPIDRURW and TPIDRURO. TPIDRURW needs to be written
 	 * first to ensure that a context switch between the two writes will
 	 * still give the desired result of updating both.
 	 */
 	__asm __volatile(
 	    "mcr p15, 0, %0, c13, c0, 2\n"
 	    "mcr p15, 0, %0, c13, c0, 3\n"
 	     : : "r" (tls));
 }
 
 #define curthread get_curthread()
 
 #else
 #define get_pcpu()	pcpup
 #endif
 
 #define	PCPU_GET(member)	(get_pcpu()->pc_ ## member)
 #define	PCPU_ADD(member, value)	(get_pcpu()->pc_ ## member += (value))
 #define	PCPU_INC(member)	PCPU_ADD(member, 1)
 #define	PCPU_PTR(member)	(&get_pcpu()->pc_ ## member)
 #define	PCPU_SET(member,value)	(get_pcpu()->pc_ ## member = (value))
 
 void pcpu0_init(void);
 #endif	/* _KERNEL */
 
 #endif	/* !_MACHINE_PCPU_H_ */