diff --git a/sys/arm/arm/generic_timer.c b/sys/arm/arm/generic_timer.c
index 27c985c5fcbe..97976408c943 100644
--- a/sys/arm/arm/generic_timer.c
+++ b/sys/arm/arm/generic_timer.c
@@ -1,919 +1,922 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2011 The FreeBSD Foundation
* Copyright (c) 2013 Ruslan Bukin
* All rights reserved.
*
* Based on mpcore_timer.c developed by Ben Gray
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the company nor the name of the author may be used to
* endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/**
* Cortex-A7, Cortex-A15, ARMv8 and later Generic Timer
*/
#include "opt_acpi.h"
#include "opt_platform.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#if defined(__aarch64__)
#include
#include
#include
#endif
#ifdef FDT
#include
#include
#include
#endif
#ifdef DEV_ACPI
#include
#include
#endif
#define GT_PHYS_SECURE 0
#define GT_PHYS_NONSECURE 1
#define GT_VIRT 2
#define GT_HYP_PHYS 3
#define GT_HYP_VIRT 4
#define GT_IRQ_COUNT 5
#define GT_CTRL_ENABLE (1 << 0)
#define GT_CTRL_INT_MASK (1 << 1)
#define GT_CTRL_INT_STAT (1 << 2)
#define GT_REG_CTRL 0
#define GT_REG_TVAL 1
#define GT_CNTKCTL_PL0PTEN (1 << 9) /* PL0 Physical timer reg access */
#define GT_CNTKCTL_PL0VTEN (1 << 8) /* PL0 Virtual timer reg access */
#define GT_CNTKCTL_EVNTI (0xf << 4) /* Virtual counter event bits */
#define GT_CNTKCTL_EVNTDIR (1 << 3) /* Virtual counter event transition */
#define GT_CNTKCTL_EVNTEN (1 << 2) /* Enables virtual counter events */
#define GT_CNTKCTL_PL0VCTEN (1 << 1) /* PL0 CNTVCT and CNTFRQ access */
#define GT_CNTKCTL_PL0PCTEN (1 << 0) /* PL0 CNTPCT and CNTFRQ access */
#if defined(__aarch64__)
static bool __read_mostly enable_wfxt = false;
#endif
struct arm_tmr_softc;
struct arm_tmr_irq {
struct resource *res;
void *ihl;
int rid;
int idx;
};
struct arm_tmr_softc {
struct arm_tmr_irq irqs[GT_IRQ_COUNT];
uint64_t (*get_cntxct)(bool);
uint32_t clkfreq;
int irq_count;
struct eventtimer et;
bool physical_sys;
bool physical_user;
};
static struct arm_tmr_softc *arm_tmr_sc = NULL;
static const struct arm_tmr_irq_defs {
int idx;
const char *name;
int flags;
} arm_tmr_irq_defs[] = {
{
.idx = GT_PHYS_SECURE,
.name = "sec-phys",
.flags = RF_ACTIVE | RF_OPTIONAL,
},
{
.idx = GT_PHYS_NONSECURE,
.name = "phys",
.flags = RF_ACTIVE,
},
{
.idx = GT_VIRT,
.name = "virt",
.flags = RF_ACTIVE,
},
{
.idx = GT_HYP_PHYS,
.name = "hyp-phys",
.flags = RF_ACTIVE | RF_OPTIONAL,
},
{
.idx = GT_HYP_VIRT,
.name = "hyp-virt",
.flags = RF_ACTIVE | RF_OPTIONAL,
},
};
static int arm_tmr_attach(device_t);
static uint32_t arm_tmr_fill_vdso_timehands(struct vdso_timehands *vdso_th,
struct timecounter *tc);
static void arm_tmr_do_delay(int usec, void *);
static timecounter_get_t arm_tmr_get_timecount;
static struct timecounter arm_tmr_timecount = {
.tc_name = "ARM MPCore Timecounter",
.tc_get_timecount = arm_tmr_get_timecount,
.tc_poll_pps = NULL,
.tc_counter_mask = ~0u,
.tc_frequency = 0,
.tc_quality = 1000,
.tc_fill_vdso_timehands = arm_tmr_fill_vdso_timehands,
};
#ifdef __arm__
#define get_el0(x) cp15_## x ##_get()
#define get_el1(x) cp15_## x ##_get()
#define set_el0(x, val) cp15_## x ##_set(val)
#define set_el1(x, val) cp15_## x ##_set(val)
#define HAS_PHYS true
#define IN_VHE false
#else /* __aarch64__ */
#define get_el0(x) READ_SPECIALREG(x ##_el0)
#define get_el1(x) READ_SPECIALREG(x ##_el1)
#define set_el0(x, val) WRITE_SPECIALREG(x ##_el0, val)
#define set_el1(x, val) WRITE_SPECIALREG(x ##_el1, val)
#define HAS_PHYS has_hyp()
#define IN_VHE in_vhe()
#endif
static int
get_freq(void)
{
return (get_el0(cntfrq));
}
#ifdef FDT
static uint64_t
get_cntxct_a64_unstable(bool physical)
{
uint64_t val;
isb();
if (physical) {
do {
val = get_el0(cntpct);
}
while (((val + 1) & 0x7FF) <= 1);
}
else {
do {
val = get_el0(cntvct);
}
while (((val + 1) & 0x7FF) <= 1);
}
return (val);
}
#endif
static uint64_t
get_cntxct(bool physical)
{
uint64_t val;
isb();
if (physical)
val = get_el0(cntpct);
else
val = get_el0(cntvct);
return (val);
}
static int
set_ctrl(uint32_t val, bool physical)
{
if (physical)
set_el0(cntp_ctl, val);
else
set_el0(cntv_ctl, val);
isb();
return (0);
}
static int
set_tval(uint32_t val, bool physical)
{
if (physical)
set_el0(cntp_tval, val);
else
set_el0(cntv_tval, val);
isb();
return (0);
}
static int
get_ctrl(bool physical)
{
uint32_t val;
if (physical)
val = get_el0(cntp_ctl);
else
val = get_el0(cntv_ctl);
return (val);
}
static void
setup_user_access(void *arg __unused)
{
uint32_t cntkctl;
cntkctl = get_el1(cntkctl);
cntkctl &= ~(GT_CNTKCTL_PL0PTEN | GT_CNTKCTL_PL0VTEN |
GT_CNTKCTL_EVNTEN | GT_CNTKCTL_PL0PCTEN);
/* Always enable the virtual timer */
cntkctl |= GT_CNTKCTL_PL0VCTEN;
/* Enable the physical timer if supported */
if (arm_tmr_sc->physical_user) {
cntkctl |= GT_CNTKCTL_PL0PCTEN;
}
set_el1(cntkctl, cntkctl);
isb();
}
#ifdef __aarch64__
static bool
cntpct_handler(uint64_t esr, struct trapframe *frame)
{
uint64_t val;
int reg;
if (ESR_ELx_EXCEPTION(esr) != EXCP_MSR)
return (false);
if ((esr & ISS_MSR_DIR) == 0)
return (false);
if ((esr & ISS_MSR_REG_MASK) != CNTPCT_EL0_ISS)
return (false);
reg = ISS_MSR_Rt(esr);
val = READ_SPECIALREG(cntvct_el0);
if (reg < nitems(frame->tf_x)) {
frame->tf_x[reg] = val;
} else if (reg == 30) {
frame->tf_lr = val;
}
/*
* We will handle this instruction, move to the next so we
* don't trap here again.
*/
frame->tf_elr += INSN_SIZE;
return (true);
}
#endif
static void
tmr_setup_user_access(void *arg __unused)
{
#ifdef __aarch64__
int emulate;
#endif
if (arm_tmr_sc != NULL) {
smp_rendezvous(NULL, setup_user_access, NULL, NULL);
#ifdef __aarch64__
if (TUNABLE_INT_FETCH("hw.emulate_phys_counter", &emulate) &&
emulate != 0) {
install_sys_handler(cntpct_handler);
}
#endif
}
}
SYSINIT(tmr_ua, SI_SUB_SMP, SI_ORDER_ANY, tmr_setup_user_access, NULL);
static unsigned
arm_tmr_get_timecount(struct timecounter *tc)
{
return (arm_tmr_sc->get_cntxct(arm_tmr_sc->physical_sys));
}
static int
arm_tmr_start(struct eventtimer *et, sbintime_t first,
sbintime_t period __unused)
{
struct arm_tmr_softc *sc;
int counts, ctrl;
sc = (struct arm_tmr_softc *)et->et_priv;
if (first != 0) {
counts = ((uint32_t)et->et_frequency * first) >> 32;
ctrl = get_ctrl(sc->physical_sys);
ctrl &= ~GT_CTRL_INT_MASK;
ctrl |= GT_CTRL_ENABLE;
set_tval(counts, sc->physical_sys);
set_ctrl(ctrl, sc->physical_sys);
return (0);
}
return (EINVAL);
}
static void
arm_tmr_disable(bool physical)
{
int ctrl;
ctrl = get_ctrl(physical);
ctrl &= ~GT_CTRL_ENABLE;
set_ctrl(ctrl, physical);
}
static int
arm_tmr_stop(struct eventtimer *et)
{
struct arm_tmr_softc *sc;
sc = (struct arm_tmr_softc *)et->et_priv;
arm_tmr_disable(sc->physical_sys);
return (0);
}
static int
arm_tmr_intr(void *arg)
{
struct arm_tmr_softc *sc;
int ctrl;
sc = (struct arm_tmr_softc *)arg;
ctrl = get_ctrl(sc->physical_sys);
if (ctrl & GT_CTRL_INT_STAT) {
ctrl |= GT_CTRL_INT_MASK;
set_ctrl(ctrl, sc->physical_sys);
}
if (sc->et.et_active)
sc->et.et_event_cb(&sc->et, sc->et.et_arg);
return (FILTER_HANDLED);
}
static int
arm_tmr_attach_irq(device_t dev, struct arm_tmr_softc *sc,
const struct arm_tmr_irq_defs *irq_def, int rid, int flags)
{
struct arm_tmr_irq *irq;
irq = &sc->irqs[sc->irq_count];
irq->res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
&rid, flags);
if (irq->res == NULL) {
if (bootverbose || (flags & RF_OPTIONAL) == 0) {
device_printf(dev,
"could not allocate irq for %s interrupt '%s'\n",
(flags & RF_OPTIONAL) != 0 ? "optional" :
"required", irq_def->name);
}
if ((flags & RF_OPTIONAL) == 0)
return (ENXIO);
} else {
if (bootverbose)
device_printf(dev, "allocated irq for '%s'\n",
irq_def->name);
irq->rid = rid;
irq->idx = irq_def->idx;
sc->irq_count++;
}
return (0);
}
#ifdef FDT
static int
arm_tmr_fdt_probe(device_t dev)
{
if (!ofw_bus_status_okay(dev))
return (ENXIO);
if (ofw_bus_is_compatible(dev, "arm,armv8-timer")) {
device_set_desc(dev, "ARMv8 Generic Timer");
return (BUS_PROBE_DEFAULT);
} else if (ofw_bus_is_compatible(dev, "arm,armv7-timer")) {
device_set_desc(dev, "ARMv7 Generic Timer");
return (BUS_PROBE_DEFAULT);
}
return (ENXIO);
}
static int
arm_tmr_fdt_attach(device_t dev)
{
struct arm_tmr_softc *sc;
const struct arm_tmr_irq_defs *irq_def;
size_t i;
phandle_t node;
int error, rid;
bool has_names;
sc = device_get_softc(dev);
node = ofw_bus_get_node(dev);
has_names = OF_hasprop(node, "interrupt-names");
for (i = 0; i < nitems(arm_tmr_irq_defs); i++) {
int flags;
/*
* If we don't have names to go off of, we assume that they're
* in the "usual" order with sec-phys first and allocate by idx.
*/
irq_def = &arm_tmr_irq_defs[i];
rid = irq_def->idx;
flags = irq_def->flags;
if (has_names) {
error = ofw_bus_find_string_index(node,
"interrupt-names", irq_def->name, &rid);
/*
* If we have names, missing a name means we don't
* have it.
*/
if (error != 0) {
/*
* Could be noisy on a lot of platforms for no
* good cause.
*/
if (bootverbose || (flags & RF_OPTIONAL) == 0) {
device_printf(dev,
"could not find irq for %s interrupt '%s'\n",
(flags & RF_OPTIONAL) != 0 ?
"optional" : "required",
irq_def->name);
}
if ((flags & RF_OPTIONAL) == 0)
goto out;
continue;
}
/*
* Warn about failing to activate if we did actually
* have the name present.
*/
flags &= ~RF_OPTIONAL;
}
error = arm_tmr_attach_irq(dev, sc, irq_def, rid, flags);
if (error != 0)
goto out;
}
error = arm_tmr_attach(dev);
out:
if (error != 0) {
for (i = 0; i < sc->irq_count; i++) {
bus_release_resource(dev, SYS_RES_IRQ, sc->irqs[i].rid,
sc->irqs[i].res);
}
}
return (error);
}
#endif
#ifdef DEV_ACPI
static void
arm_tmr_acpi_add_irq(device_t parent, device_t dev, int rid, u_int irq)
{
BUS_SET_RESOURCE(parent, dev, SYS_RES_IRQ, rid, irq, 1);
}
static void
arm_tmr_acpi_identify(driver_t *driver, device_t parent)
{
ACPI_TABLE_GTDT *gtdt;
vm_paddr_t physaddr;
device_t dev;
physaddr = acpi_find_table(ACPI_SIG_GTDT);
if (physaddr == 0)
return;
gtdt = acpi_map_table(physaddr, ACPI_SIG_GTDT);
if (gtdt == NULL) {
device_printf(parent, "gic: Unable to map the GTDT\n");
return;
}
dev = BUS_ADD_CHILD(parent, BUS_PASS_TIMER + BUS_PASS_ORDER_MIDDLE,
"generic_timer", -1);
if (dev == NULL) {
device_printf(parent, "add gic child failed\n");
goto out;
}
arm_tmr_acpi_add_irq(parent, dev, GT_PHYS_SECURE,
gtdt->SecureEl1Interrupt);
arm_tmr_acpi_add_irq(parent, dev, GT_PHYS_NONSECURE,
gtdt->NonSecureEl1Interrupt);
arm_tmr_acpi_add_irq(parent, dev, GT_VIRT,
gtdt->VirtualTimerInterrupt);
arm_tmr_acpi_add_irq(parent, dev, GT_HYP_PHYS,
gtdt->NonSecureEl2Interrupt);
out:
acpi_unmap_table(gtdt);
}
static int
arm_tmr_acpi_probe(device_t dev)
{
device_set_desc(dev, "ARM Generic Timer");
return (BUS_PROBE_NOWILDCARD);
}
static int
arm_tmr_acpi_attach(device_t dev)
{
const struct arm_tmr_irq_defs *irq_def;
struct arm_tmr_softc *sc;
int error;
sc = device_get_softc(dev);
for (int i = 0; i < nitems(arm_tmr_irq_defs); i++) {
irq_def = &arm_tmr_irq_defs[i];
error = arm_tmr_attach_irq(dev, sc, irq_def, irq_def->idx,
irq_def->flags);
if (error != 0)
goto out;
}
error = arm_tmr_attach(dev);
out:
if (error != 0) {
for (int i = 0; i < sc->irq_count; i++) {
bus_release_resource(dev, SYS_RES_IRQ,
sc->irqs[i].rid, sc->irqs[i].res);
}
}
return (error);
}
#endif
static int
arm_tmr_attach(device_t dev)
{
struct arm_tmr_softc *sc;
#ifdef INVARIANTS
const struct arm_tmr_irq_defs *irq_def;
#endif
#ifdef FDT
phandle_t node;
pcell_t clock;
#endif
#ifdef __aarch64__
int user_phys;
#endif
int error;
int i, first_timer, last_timer;
sc = device_get_softc(dev);
if (arm_tmr_sc)
return (ENXIO);
sc->get_cntxct = &get_cntxct;
#ifdef FDT
/* Get the base clock frequency */
node = ofw_bus_get_node(dev);
if (node > 0) {
error = OF_getencprop(node, "clock-frequency", &clock,
sizeof(clock));
if (error > 0)
sc->clkfreq = clock;
if (OF_hasprop(node, "allwinner,sun50i-a64-unstable-timer")) {
sc->get_cntxct = &get_cntxct_a64_unstable;
if (bootverbose)
device_printf(dev,
"Enabling allwinner unstable timer workaround\n");
}
}
#endif
if (sc->clkfreq == 0) {
/* Try to get clock frequency from timer */
sc->clkfreq = get_freq();
}
if (sc->clkfreq == 0) {
device_printf(dev, "No clock frequency specified\n");
return (ENXIO);
}
#ifdef INVARIANTS
/* Confirm that non-optional irqs were allocated before coming in. */
for (i = 0; i < nitems(arm_tmr_irq_defs); i++) {
int j;
irq_def = &arm_tmr_irq_defs[i];
/* Skip optional interrupts */
if ((irq_def->flags & RF_OPTIONAL) != 0)
continue;
for (j = 0; j < sc->irq_count; j++) {
if (sc->irqs[j].idx == irq_def->idx)
break;
}
KASSERT(j < sc->irq_count, ("%s: Missing required interrupt %s",
__func__, irq_def->name));
}
#endif
#ifdef __aarch64__
if (IN_VHE) {
/*
* The kernel is running at EL2. The EL0 timer registers are
* re-mapped to the EL2 version. Because of this we need to
* use the EL2 interrupt.
*/
sc->physical_sys = true;
first_timer = GT_HYP_PHYS;
last_timer = GT_HYP_PHYS;
} else if (!HAS_PHYS) {
/*
* Use the virtual timer when we can't use the hypervisor.
* A hypervisor guest may change the virtual timer registers
* while executing so any use of the virtual timer interrupt
* needs to be coordinated with the virtual machine manager.
*/
sc->physical_sys = false;
first_timer = GT_VIRT;
last_timer = GT_VIRT;
} else
#endif
/* Otherwise set up the secure and non-secure physical timers. */
{
sc->physical_sys = true;
first_timer = GT_PHYS_SECURE;
last_timer = GT_PHYS_NONSECURE;
}
#ifdef __aarch64__
/*
* The virtual timer is always available on arm and arm64, tell
* userspace to use it.
*/
sc->physical_user = false;
/* Allow use of the physical counter in userspace when available */
if (TUNABLE_INT_FETCH("hw.userspace_allow_phys_counter", &user_phys) &&
user_phys != 0)
sc->physical_user = sc->physical_sys;
#else
/*
* The virtual timer depends on setting cntvoff from the hypervisor
* privilege level/el2, however this is only set on arm64.
*/
sc->physical_user = true;
#endif
arm_tmr_sc = sc;
/* Setup secure, non-secure and virtual IRQs handler */
for (i = 0; i < sc->irq_count; i++) {
/* Only enable IRQs on timers we expect to use */
if (sc->irqs[i].idx < first_timer ||
sc->irqs[i].idx > last_timer)
continue;
error = bus_setup_intr(dev, sc->irqs[i].res, INTR_TYPE_CLK,
arm_tmr_intr, NULL, sc, &sc->irqs[i].ihl);
if (error) {
device_printf(dev, "Unable to alloc int resource.\n");
for (int j = 0; j < i; j++)
bus_teardown_intr(dev, sc->irqs[j].res,
&sc->irqs[j].ihl);
return (ENXIO);
}
}
/* Disable the timers until we are ready */
arm_tmr_disable(false);
if (HAS_PHYS)
arm_tmr_disable(true);
arm_tmr_timecount.tc_frequency = sc->clkfreq;
tc_init(&arm_tmr_timecount);
sc->et.et_name = "ARM MPCore Eventtimer";
sc->et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU;
sc->et.et_quality = 1000;
sc->et.et_frequency = sc->clkfreq;
sc->et.et_min_period = (0x00000010LLU << 32) / sc->et.et_frequency;
sc->et.et_max_period = (0xfffffffeLLU << 32) / sc->et.et_frequency;
sc->et.et_start = arm_tmr_start;
sc->et.et_stop = arm_tmr_stop;
sc->et.et_priv = sc;
et_register(&sc->et);
#if defined(__arm__)
arm_set_delay(arm_tmr_do_delay, sc);
#endif
return (0);
}
#ifdef FDT
static device_method_t arm_tmr_fdt_methods[] = {
DEVMETHOD(device_probe, arm_tmr_fdt_probe),
DEVMETHOD(device_attach, arm_tmr_fdt_attach),
{ 0, 0 }
};
static DEFINE_CLASS_0(generic_timer, arm_tmr_fdt_driver, arm_tmr_fdt_methods,
sizeof(struct arm_tmr_softc));
EARLY_DRIVER_MODULE(timer, simplebus, arm_tmr_fdt_driver, 0, 0,
BUS_PASS_TIMER + BUS_PASS_ORDER_MIDDLE);
EARLY_DRIVER_MODULE(timer, ofwbus, arm_tmr_fdt_driver, 0, 0,
BUS_PASS_TIMER + BUS_PASS_ORDER_MIDDLE);
#endif
#ifdef DEV_ACPI
static device_method_t arm_tmr_acpi_methods[] = {
DEVMETHOD(device_identify, arm_tmr_acpi_identify),
DEVMETHOD(device_probe, arm_tmr_acpi_probe),
DEVMETHOD(device_attach, arm_tmr_acpi_attach),
{ 0, 0 }
};
static DEFINE_CLASS_0(generic_timer, arm_tmr_acpi_driver, arm_tmr_acpi_methods,
sizeof(struct arm_tmr_softc));
EARLY_DRIVER_MODULE(timer, acpi, arm_tmr_acpi_driver, 0, 0,
BUS_PASS_TIMER + BUS_PASS_ORDER_MIDDLE);
#endif
static int64_t
arm_tmr_get_counts(int usec)
{
int64_t counts, counts_per_usec;
/* Get the number of times to count */
counts_per_usec = ((arm_tmr_timecount.tc_frequency / 1000000) + 1);
/*
* Clamp the timeout at a maximum value (about 32 seconds with
* a 66MHz clock). *Nobody* should be delay()ing for anywhere
* near that length of time and if they are, they should be hung
* out to dry.
*/
if (usec >= (0x80000000U / counts_per_usec))
counts = (0x80000000U / counts_per_usec) - 1;
else
counts = usec * counts_per_usec;
return counts;
}
static void
arm_tmr_do_delay(int usec, void *arg)
{
struct arm_tmr_softc *sc = arg;
int64_t counts;
uint64_t first;
#if defined(__aarch64__)
int64_t end;
#endif
counts = arm_tmr_get_counts(usec);
first = sc->get_cntxct(sc->physical_sys);
#if defined(__aarch64__)
end = first + counts;
#endif
while ((sc->get_cntxct(sc->physical_sys) - first) < counts) {
#if defined(__aarch64__)
if (enable_wfxt)
wfet(end);
#endif
}
}
#if defined(__aarch64__)
void
DELAY(int usec)
{
int32_t counts;
TSENTER();
/*
* We have two options for a delay: using the timer, or using the wfet
* instruction. However, both of these are dependent on timers being
* setup, and if they're not just use a loop for the meantime.
*/
if (arm_tmr_sc != NULL) {
arm_tmr_do_delay(usec, arm_tmr_sc);
} else {
for (; usec > 0; usec--)
for (counts = 200; counts > 0; counts--)
/* Prevent the compiler from optimizing out the loop */
cpufunc_nullop();
}
TSEXIT();
}
-static bool
+static cpu_feat_en
wfxt_check(const struct cpu_feat *feat __unused, u_int midr __unused)
{
uint64_t id_aa64isar2;
if (!get_kernel_reg(ID_AA64ISAR2_EL1, &id_aa64isar2))
- return (false);
- return (ID_AA64ISAR2_WFxT_VAL(id_aa64isar2) != ID_AA64ISAR2_WFxT_NONE);
+ return (FEAT_ALWAYS_DISABLE);
+ if (ID_AA64ISAR2_WFxT_VAL(id_aa64isar2) >= ID_AA64ISAR2_WFxT_NONE)
+ return (FEAT_DEFAULT_ENABLE);
+
+ return (FEAT_ALWAYS_DISABLE);
}
static bool
wfxt_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
u_int errata_count __unused)
{
/* will be called if wfxt_check returns true */
enable_wfxt = true;
return (true);
}
CPU_FEAT(feat_wfxt, "WFE and WFI instructions with timeout",
wfxt_check, NULL, wfxt_enable,
CPU_FEAT_AFTER_DEV | CPU_FEAT_SYSTEM);
#endif
static uint32_t
arm_tmr_fill_vdso_timehands(struct vdso_timehands *vdso_th,
struct timecounter *tc)
{
vdso_th->th_algo = VDSO_TH_ALGO_ARM_GENTIM;
vdso_th->th_physical = arm_tmr_sc->physical_user;
bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
return (1);
}
diff --git a/sys/arm64/arm64/cpu_feat.c b/sys/arm64/arm64/cpu_feat.c
index fd1b8429295f..986d5079e980 100644
--- a/sys/arm64/arm64/cpu_feat.c
+++ b/sys/arm64/arm64/cpu_feat.c
@@ -1,122 +1,142 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 Arm Ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include
#include
#include
#include
#include
SYSCTL_NODE(_hw, OID_AUTO, feat, CTLFLAG_RD, 0, "CPU features/errata");
/* TODO: Make this a list if we ever grow a callback other than smccc_errata */
static cpu_feat_errata_check_fn cpu_feat_check_cb = NULL;
void
enable_cpu_feat(uint32_t stage)
{
+ char tunable[32];
struct cpu_feat **featp, *feat;
uint32_t midr;
u_int errata_count, *errata_list;
cpu_feat_errata errata_status;
+ cpu_feat_en check_status;
+ bool val;
MPASS((stage & ~CPU_FEAT_STAGE_MASK) == 0);
midr = get_midr();
SET_FOREACH(featp, cpu_feat_set) {
feat = *featp;
/* Run the enablement code at the correct stage of boot */
if ((feat->feat_flags & CPU_FEAT_STAGE_MASK) != stage)
continue;
/* If the feature is system wide run on a single CPU */
if ((feat->feat_flags & CPU_FEAT_SCOPE_MASK)==CPU_FEAT_SYSTEM &&
PCPU_GET(cpuid) != 0)
continue;
- if (feat->feat_check != NULL && !feat->feat_check(feat, midr))
+ if (feat->feat_check != NULL)
continue;
+ check_status = feat->feat_check(feat, midr);
+ /* Ignore features that are not present */
+ if (check_status == FEAT_ALWAYS_DISABLE)
+ continue;
+
+ snprintf(tunable, sizeof(tunable), "hw.feat.%s",
+ feat->feat_name);
+ if (TUNABLE_BOOL_FETCH(tunable, &val)) {
+ /* Is the feature disabled by the tunable? */
+ if (!val)
+ continue;
+ /* If enabled by the tunable then enable it */
+ } else if (check_status == FEAT_DEFAULT_DISABLE) {
+ /* No tunable set and disabled by default */
+ continue;
+ }
+
/*
* Check if the feature has any errata that may need a
* workaround applied (or it is to install the workaround for
* known errata.
*/
errata_status = ERRATA_NONE;
errata_list = NULL;
errata_count = 0;
if (feat->feat_has_errata != NULL) {
if (feat->feat_has_errata(feat, midr, &errata_list,
&errata_count)) {
/* Assume we are affected */
errata_status = ERRATA_AFFECTED;
}
}
if (errata_status == ERRATA_AFFECTED &&
cpu_feat_check_cb != NULL) {
for (int i = 0; i < errata_count; i++) {
cpu_feat_errata new_status;
/* Check if affected by this erratum */
new_status = cpu_feat_check_cb(feat,
errata_list[i]);
if (new_status != ERRATA_UNKNOWN) {
errata_status = new_status;
errata_list = &errata_list[i];
errata_count = 1;
break;
}
}
}
/* Shouldn't be possible */
MPASS(errata_status != ERRATA_UNKNOWN);
if (feat->feat_enable(feat, errata_status, errata_list,
errata_count))
feat->feat_enabled = true;
}
}
static void
enable_cpu_feat_after_dev(void *dummy __unused)
{
MPASS(PCPU_GET(cpuid) == 0);
enable_cpu_feat(CPU_FEAT_AFTER_DEV);
}
SYSINIT(enable_cpu_feat_after_dev, SI_SUB_CONFIGURE, SI_ORDER_MIDDLE,
enable_cpu_feat_after_dev, NULL);
void
cpu_feat_register_errata_check(cpu_feat_errata_check_fn cb)
{
MPASS(cpu_feat_check_cb == NULL);
cpu_feat_check_cb = cb;
}
diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c
index 6d70692fdf5d..f271891f423d 100644
--- a/sys/arm64/arm64/identcpu.c
+++ b/sys/arm64/arm64/identcpu.c
@@ -1,3423 +1,3423 @@
/*-
* Copyright (c) 2014 Andrew Turner
* Copyright (c) 2014 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by Semihalf
* under sponsorship of the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
static MALLOC_DEFINE(M_IDENTCPU, "CPU ID", "arm64 CPU identification memory");
struct cpu_desc;
#ifdef INVARIANTS
static bool hwcaps_set = false;
#endif
static void print_cpu_midr(struct sbuf *sb, u_int cpu);
static void print_cpu_features(u_int cpu, struct cpu_desc *desc,
struct cpu_desc *prev_desc);
static void print_cpu_caches(struct sbuf *sb, struct cpu_desc *desc);
#ifdef COMPAT_FREEBSD32
static u_long parse_cpu_features_hwcap32(void);
#endif
const char machine[] = "arm64";
#ifdef SCTL_MASK32
extern int adaptive_machine_arch;
#endif
static SYSCTL_NODE(_machdep, OID_AUTO, cache, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"Cache management tuning");
static int allow_dic = 1;
SYSCTL_INT(_machdep_cache, OID_AUTO, allow_dic, CTLFLAG_RDTUN, &allow_dic, 0,
"Allow optimizations based on the DIC cache bit");
static int allow_idc = 1;
SYSCTL_INT(_machdep_cache, OID_AUTO, allow_idc, CTLFLAG_RDTUN, &allow_idc, 0,
"Allow optimizations based on the IDC cache bit");
static bool emulate_ctr = false;
static void check_cpu_regs(u_int cpu, struct cpu_desc *desc,
struct cpu_desc *prev_desc);
static uint64_t update_special_reg_field(uint64_t user_reg, u_int type,
uint64_t value, u_int width, u_int shift, bool sign);
/*
* The default implementation of I-cache sync assumes we have an
* aliasing cache until we know otherwise.
*/
void (*arm64_icache_sync_range)(void *, vm_size_t) =
&arm64_aliasing_icache_sync_range;
static int
sysctl_hw_machine(SYSCTL_HANDLER_ARGS)
{
#ifdef SCTL_MASK32
static const char machine32[] = "arm";
#endif
int error;
#ifdef SCTL_MASK32
if ((req->flags & SCTL_MASK32) != 0 && adaptive_machine_arch)
error = SYSCTL_OUT(req, machine32, sizeof(machine32));
else
#endif
error = SYSCTL_OUT(req, machine, sizeof(machine));
return (error);
}
SYSCTL_PROC(_hw, HW_MACHINE, machine, CTLTYPE_STRING | CTLFLAG_RD |
CTLFLAG_CAPRD | CTLFLAG_MPSAFE, NULL, 0, sysctl_hw_machine, "A",
"Machine class");
static char cpu_model[64];
SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD | CTLFLAG_CAPRD,
cpu_model, sizeof(cpu_model), "Machine model");
#define MAX_CACHES 8 /* Maximum number of caches supported
architecturally. */
/*
* Per-CPU affinity as provided in MPIDR_EL1
* Indexed by CPU number in logical order selected by the system.
* Relevant fields can be extracted using CPU_AFFn macros,
* Aff3.Aff2.Aff1.Aff0 construct a unique CPU address in the system.
*
* Fields used by us:
* Aff1 - Cluster number
* Aff0 - CPU number in Aff1 cluster
*/
uint64_t __cpu_affinity[MAXCPU];
static u_int cpu_aff_levels;
struct cpu_desc {
uint64_t mpidr;
uint64_t id_aa64afr0;
uint64_t id_aa64afr1;
uint64_t id_aa64dfr0;
uint64_t id_aa64dfr1;
uint64_t id_aa64isar0;
uint64_t id_aa64isar1;
uint64_t id_aa64isar2;
uint64_t id_aa64mmfr0;
uint64_t id_aa64mmfr1;
uint64_t id_aa64mmfr2;
uint64_t id_aa64mmfr3;
uint64_t id_aa64mmfr4;
uint64_t id_aa64pfr0;
uint64_t id_aa64pfr1;
uint64_t id_aa64pfr2;
uint64_t id_aa64zfr0;
uint64_t ctr;
#ifdef COMPAT_FREEBSD32
uint64_t id_isar5;
uint64_t mvfr0;
uint64_t mvfr1;
#endif
uint64_t clidr;
uint32_t ccsidr[MAX_CACHES][2]; /* 2 possible types. */
bool have_sve;
};
static struct cpu_desc cpu_desc0;
static struct cpu_desc *cpu_desc;
static struct cpu_desc kern_cpu_desc;
static struct cpu_desc user_cpu_desc;
static struct cpu_desc l_user_cpu_desc;
static struct cpu_desc *
get_cpu_desc(u_int cpu)
{
/* The cpu_desc for CPU 0 is used before the allocator is ready. */
if (cpu == 0)
return (&cpu_desc0);
MPASS(cpu_desc != NULL);
return (&cpu_desc[cpu - 1]);
}
struct cpu_parts {
u_int part_id;
const char *part_name;
};
#define CPU_PART_NONE { 0, NULL }
struct cpu_implementers {
u_int impl_id;
const char *impl_name;
/*
* Part number is implementation defined
* so each vendor will have its own set of values and names.
*/
const struct cpu_parts *cpu_parts;
};
#define CPU_IMPLEMENTER_NONE { 0, NULL, NULL }
/*
* Per-implementer table of (PartNum, CPU Name) pairs.
*/
/* ARM Ltd. */
static const struct cpu_parts cpu_parts_arm[] = {
{ CPU_PART_AEM_V8, "AEMv8" },
{ CPU_PART_FOUNDATION, "Foundation-Model" },
{ CPU_PART_CORTEX_A34, "Cortex-A34" },
{ CPU_PART_CORTEX_A35, "Cortex-A35" },
{ CPU_PART_CORTEX_A53, "Cortex-A53" },
{ CPU_PART_CORTEX_A55, "Cortex-A55" },
{ CPU_PART_CORTEX_A57, "Cortex-A57" },
{ CPU_PART_CORTEX_A65, "Cortex-A65" },
{ CPU_PART_CORTEX_A65AE, "Cortex-A65AE" },
{ CPU_PART_CORTEX_A72, "Cortex-A72" },
{ CPU_PART_CORTEX_A73, "Cortex-A73" },
{ CPU_PART_CORTEX_A75, "Cortex-A75" },
{ CPU_PART_CORTEX_A76, "Cortex-A76" },
{ CPU_PART_CORTEX_A76AE, "Cortex-A76AE" },
{ CPU_PART_CORTEX_A77, "Cortex-A77" },
{ CPU_PART_CORTEX_A78, "Cortex-A78" },
{ CPU_PART_CORTEX_A78AE, "Cortex-A78AE" },
{ CPU_PART_CORTEX_A78C, "Cortex-A78C" },
{ CPU_PART_CORTEX_A510, "Cortex-A510" },
{ CPU_PART_CORTEX_A520, "Cortex-A520" },
{ CPU_PART_CORTEX_A710, "Cortex-A710" },
{ CPU_PART_CORTEX_A715, "Cortex-A715" },
{ CPU_PART_CORTEX_A720, "Cortex-A720" },
{ CPU_PART_CORTEX_A725, "Cortex-A725" },
{ CPU_PART_CORTEX_X925, "Cortex-A925" },
{ CPU_PART_CORTEX_X1, "Cortex-X1" },
{ CPU_PART_CORTEX_X1C, "Cortex-X1C" },
{ CPU_PART_CORTEX_X2, "Cortex-X2" },
{ CPU_PART_CORTEX_X3, "Cortex-X3" },
{ CPU_PART_CORTEX_X4, "Cortex-X4" },
{ CPU_PART_NEOVERSE_E1, "Neoverse-E1" },
{ CPU_PART_NEOVERSE_N1, "Neoverse-N1" },
{ CPU_PART_NEOVERSE_N2, "Neoverse-N2" },
{ CPU_PART_NEOVERSE_N3, "Neoverse-N3" },
{ CPU_PART_NEOVERSE_V1, "Neoverse-V1" },
{ CPU_PART_NEOVERSE_V2, "Neoverse-V2" },
{ CPU_PART_NEOVERSE_V3, "Neoverse-V3" },
{ CPU_PART_NEOVERSE_V3AE, "Neoverse-V3AE" },
CPU_PART_NONE,
};
/* Cavium */
static const struct cpu_parts cpu_parts_cavium[] = {
{ CPU_PART_THUNDERX, "ThunderX" },
{ CPU_PART_THUNDERX2, "ThunderX2" },
CPU_PART_NONE,
};
/* APM (now Ampere) */
static const struct cpu_parts cpu_parts_apm[] = {
{ CPU_PART_EMAG8180, "eMAG 8180" },
CPU_PART_NONE,
};
/* Ampere */
static const struct cpu_parts cpu_parts_ampere[] = {
{ CPU_PART_AMPERE1, "AmpereOne AC03" },
{ CPU_PART_AMPERE1A, "AmpereOne AC04" },
CPU_PART_NONE,
};
/* Microsoft */
static const struct cpu_parts cpu_parts_microsoft[] = {
{ CPU_PART_AZURE_COBALT_100, "Azure Cobalt 100" },
CPU_PART_NONE,
};
/* Qualcomm */
static const struct cpu_parts cpu_parts_qcom[] = {
{ CPU_PART_KRYO400_GOLD, "Kryo 400 Gold" },
{ CPU_PART_KRYO400_SILVER, "Kryo 400 Silver" },
CPU_PART_NONE,
};
/* Apple */
static const struct cpu_parts cpu_parts_apple[] = {
{ CPU_PART_M1_ICESTORM, "M1 Icestorm" },
{ CPU_PART_M1_FIRESTORM, "M1 Firestorm" },
{ CPU_PART_M1_ICESTORM_PRO, "M1 Pro Icestorm" },
{ CPU_PART_M1_FIRESTORM_PRO, "M1 Pro Firestorm" },
{ CPU_PART_M1_ICESTORM_MAX, "M1 Max Icestorm" },
{ CPU_PART_M1_FIRESTORM_MAX, "M1 Max Firestorm" },
{ CPU_PART_M2_BLIZZARD, "M2 Blizzard" },
{ CPU_PART_M2_AVALANCHE, "M2 Avalanche" },
{ CPU_PART_M2_BLIZZARD_PRO, "M2 Pro Blizzard" },
{ CPU_PART_M2_AVALANCHE_PRO, "M2 Pro Avalanche" },
{ CPU_PART_M2_BLIZZARD_MAX, "M2 Max Blizzard" },
{ CPU_PART_M2_AVALANCHE_MAX, "M2 Max Avalanche" },
CPU_PART_NONE,
};
/* Unknown */
static const struct cpu_parts cpu_parts_none[] = {
CPU_PART_NONE,
};
/*
* Implementers table.
*/
const struct cpu_implementers cpu_implementers[] = {
{ CPU_IMPL_AMPERE, "Ampere", cpu_parts_ampere },
{ CPU_IMPL_APPLE, "Apple", cpu_parts_apple },
{ CPU_IMPL_APM, "APM", cpu_parts_apm },
{ CPU_IMPL_ARM, "ARM", cpu_parts_arm },
{ CPU_IMPL_BROADCOM, "Broadcom", cpu_parts_none },
{ CPU_IMPL_CAVIUM, "Cavium", cpu_parts_cavium },
{ CPU_IMPL_DEC, "DEC", cpu_parts_none },
{ CPU_IMPL_FREESCALE, "Freescale", cpu_parts_none },
{ CPU_IMPL_FUJITSU, "Fujitsu", cpu_parts_none },
{ CPU_IMPL_HISILICON, "HiSilicon", cpu_parts_none },
{ CPU_IMPL_INFINEON, "IFX", cpu_parts_none },
{ CPU_IMPL_INTEL, "Intel", cpu_parts_none },
{ CPU_IMPL_MARVELL, "Marvell", cpu_parts_none },
{ CPU_IMPL_MICROSOFT, "Microsoft", cpu_parts_microsoft },
{ CPU_IMPL_NVIDIA, "NVIDIA", cpu_parts_none },
{ CPU_IMPL_QUALCOMM, "Qualcomm", cpu_parts_qcom },
CPU_IMPLEMENTER_NONE,
};
#define MRS_TYPE_MASK 0xf
#define MRS_INVALID 0
#define MRS_EXACT 1
#define MRS_EXACT_IF_DIFFERENT 2
#define MRS_LOWER 3
#define MRS_HIGHER_OR_ZERO 4
#define MRS_HIGHER 5
#define MRS_SAFE_SHIFT 4
#define MRS_SAFE_MASK (0xfu << MRS_SAFE_SHIFT)
#define MRS_SAFE(x) (((x) << MRS_SAFE_SHIFT) & MRS_SAFE_MASK)
#define MRS_SAFE_VAL(x) (((x) & MRS_SAFE_MASK) >> MRS_SAFE_SHIFT)
#define MRS_FREEBSD (1u << 8)
#define MRS_LINUX (1u << 9)
#define MRS_USERSPACE (MRS_FREEBSD | MRS_LINUX)
struct mrs_field_value {
uint64_t value;
const char *desc;
};
#define MRS_FIELD_VALUE(_value, _desc) \
{ \
.value = (_value), \
.desc = (_desc), \
}
#define MRS_FIELD_VALUE_NONE_IMPL(_reg, _field, _none, _impl) \
MRS_FIELD_VALUE(_reg ## _ ## _field ## _ ## _none, ""), \
MRS_FIELD_VALUE(_reg ## _ ## _field ## _ ## _impl, #_field)
#define MRS_FIELD_VALUE_COUNT(_reg, _field, _desc) \
MRS_FIELD_VALUE(0ul << _reg ## _ ## _field ## _SHIFT, "1 " _desc), \
MRS_FIELD_VALUE(1ul << _reg ## _ ## _field ## _SHIFT, "2 " _desc "s"), \
MRS_FIELD_VALUE(2ul << _reg ## _ ## _field ## _SHIFT, "3 " _desc "s"), \
MRS_FIELD_VALUE(3ul << _reg ## _ ## _field ## _SHIFT, "4 " _desc "s"), \
MRS_FIELD_VALUE(4ul << _reg ## _ ## _field ## _SHIFT, "5 " _desc "s"), \
MRS_FIELD_VALUE(5ul << _reg ## _ ## _field ## _SHIFT, "6 " _desc "s"), \
MRS_FIELD_VALUE(6ul << _reg ## _ ## _field ## _SHIFT, "7 " _desc "s"), \
MRS_FIELD_VALUE(7ul << _reg ## _ ## _field ## _SHIFT, "8 " _desc "s"), \
MRS_FIELD_VALUE(8ul << _reg ## _ ## _field ## _SHIFT, "9 " _desc "s"), \
MRS_FIELD_VALUE(9ul << _reg ## _ ## _field ## _SHIFT, "10 "_desc "s"), \
MRS_FIELD_VALUE(10ul<< _reg ## _ ## _field ## _SHIFT, "11 "_desc "s"), \
MRS_FIELD_VALUE(11ul<< _reg ## _ ## _field ## _SHIFT, "12 "_desc "s"), \
MRS_FIELD_VALUE(12ul<< _reg ## _ ## _field ## _SHIFT, "13 "_desc "s"), \
MRS_FIELD_VALUE(13ul<< _reg ## _ ## _field ## _SHIFT, "14 "_desc "s"), \
MRS_FIELD_VALUE(14ul<< _reg ## _ ## _field ## _SHIFT, "15 "_desc "s"), \
MRS_FIELD_VALUE(15ul<< _reg ## _ ## _field ## _SHIFT, "16 "_desc "s")
/*
* Used for printing I/D cache line sizes & CWG/ERG, as 0 is a special case
* in some cases the decoded string needs to be passed in.
*/
#define MRS_FIELD_VALUE_CACHE(_reg, _field, _0desc, _desc) \
MRS_FIELD_VALUE(0ul << _reg ## _ ## _field ## _SHIFT, _0desc), \
MRS_FIELD_VALUE(1ul << _reg ## _ ## _field ## _SHIFT, "8 " _desc), \
MRS_FIELD_VALUE(2ul << _reg ## _ ## _field ## _SHIFT, "16 " _desc), \
MRS_FIELD_VALUE(3ul << _reg ## _ ## _field ## _SHIFT, "32 " _desc), \
MRS_FIELD_VALUE(4ul << _reg ## _ ## _field ## _SHIFT, "64 " _desc), \
MRS_FIELD_VALUE(5ul << _reg ## _ ## _field ## _SHIFT, "128 " _desc), \
MRS_FIELD_VALUE(6ul << _reg ## _ ## _field ## _SHIFT, "256 " _desc), \
MRS_FIELD_VALUE(7ul << _reg ## _ ## _field ## _SHIFT, "512 " _desc), \
MRS_FIELD_VALUE(8ul << _reg ## _ ## _field ## _SHIFT, "1k " _desc), \
MRS_FIELD_VALUE(9ul << _reg ## _ ## _field ## _SHIFT, "2k " _desc), \
MRS_FIELD_VALUE(10ul<< _reg ## _ ## _field ## _SHIFT, "4k " _desc), \
MRS_FIELD_VALUE(11ul<< _reg ## _ ## _field ## _SHIFT, "8k " _desc), \
MRS_FIELD_VALUE(12ul<< _reg ## _ ## _field ## _SHIFT, "16k " _desc), \
MRS_FIELD_VALUE(13ul<< _reg ## _ ## _field ## _SHIFT, "32k " _desc), \
MRS_FIELD_VALUE(14ul<< _reg ## _ ## _field ## _SHIFT, "64k " _desc), \
MRS_FIELD_VALUE(15ul<< _reg ## _ ## _field ## _SHIFT, "128k "_desc)
#define MRS_FIELD_VALUE_END { .desc = NULL }
struct mrs_field_hwcap {
uint64_t min;
u_long hwcap_val;
u_int hwcap_id;
};
#define MRS_HWCAP(_hwcap_id, _val, _min) \
{ \
.hwcap_id = (_hwcap_id), \
.hwcap_val = (_val), \
.min = (_min), \
}
#define MRS_HWCAP_END { .hwcap_id = 0 }
struct mrs_field {
const char *name;
const struct mrs_field_value *values;
const struct mrs_field_hwcap *hwcaps;
uint64_t mask;
bool sign;
u_int type;
u_int width;
u_int shift;
};
#define MRS_FIELD_RES1(_width, _shift) \
{ \
.sign = false, \
.type = MRS_EXACT | MRS_SAFE((1u << (_width)) - 1) | \
MRS_USERSPACE, \
.width = (_width), \
.shift = (_shift), \
}
#define MRS_FIELD_HWCAP(_register, _name, _sign, _type, _visibility, \
_values, _hwcap) \
{ \
.name = #_name, \
.sign = (_sign), \
.type = ((_type) | (_visibility)), \
.width = _register ## _ ## _name ## _WIDTH, \
.shift = _register ## _ ## _name ## _SHIFT, \
.mask = _register ## _ ## _name ## _MASK, \
.values = (_values), \
.hwcaps = (_hwcap), \
}
#define MRS_FIELD(_register, _name, _sign, _type, _visibility, _values) \
MRS_FIELD_HWCAP(_register, _name, _sign, _type, _visibility, \
_values, NULL)
#define MRS_FIELD_END { .type = MRS_INVALID, }
/* CTR_EL0 */
static const struct mrs_field_value ctr_dic[] = {
MRS_FIELD_VALUE_NONE_IMPL(CTR, DIC, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value ctr_idc[] = {
MRS_FIELD_VALUE_NONE_IMPL(CTR, IDC, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value ctr_cwg[] = {
MRS_FIELD_VALUE_CACHE(CTR, CWG, "Unknown CWG",
"byte CWG"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value ctr_erg[] = {
MRS_FIELD_VALUE_CACHE(CTR, ERG, "Unknown ERG",
"byte ERG"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value ctr_dline[] = {
MRS_FIELD_VALUE_CACHE(CTR, DLINE, "4 byte D-cacheline",
"byte D-cacheline"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value ctr_l1ip[] = {
MRS_FIELD_VALUE(CTR_L1IP_VIPT, "VIPT I-cache"),
MRS_FIELD_VALUE(CTR_L1IP_PIPT, "PIPT I-cache"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value ctr_iline[] = {
MRS_FIELD_VALUE_CACHE(CTR, ILINE, "4 byte I-cacheline",
"byte I-cacheline"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field ctr_fields[] = {
/* Bit 31 is RES1 */
MRS_FIELD_RES1(1, 31),
MRS_FIELD(CTR, DIC, false, MRS_LOWER, MRS_USERSPACE, ctr_dic),
MRS_FIELD(CTR, IDC, false, MRS_LOWER, MRS_USERSPACE, ctr_idc),
MRS_FIELD(CTR, CWG, false, MRS_HIGHER_OR_ZERO, MRS_USERSPACE, ctr_cwg),
MRS_FIELD(CTR, ERG, false, MRS_HIGHER_OR_ZERO, MRS_USERSPACE, ctr_erg),
MRS_FIELD(CTR, DLINE, false, MRS_LOWER, MRS_USERSPACE, ctr_dline),
/* If the ICache types are different report the safe option */
MRS_FIELD(CTR, L1IP, false, MRS_EXACT_IF_DIFFERENT |
MRS_SAFE(CTR_L1IP_VIPT >> CTR_L1IP_SHIFT), MRS_USERSPACE,
ctr_l1ip),
MRS_FIELD(CTR, ILINE, false, MRS_LOWER, MRS_USERSPACE, ctr_iline),
MRS_FIELD_END,
};
/* ID_AA64AFR0_EL1 */
static const struct mrs_field id_aa64afr0_fields[] = {
MRS_FIELD_END,
};
/* ID_AA64AFR1_EL1 */
static const struct mrs_field id_aa64afr1_fields[] = {
MRS_FIELD_END,
};
/* ID_AA64DFR0_EL1 */
static const struct mrs_field_value id_aa64dfr0_hpmn0[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64DFR0, HPMN0, CONSTR, DEFINED),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64dfr0_brbe[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64DFR0, BRBE, NONE, IMPL),
MRS_FIELD_VALUE(ID_AA64DFR0_BRBE_EL3, "BRBE EL3"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64dfr0_mtpmu[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64DFR0, MTPMU, NONE, IMPL),
MRS_FIELD_VALUE(ID_AA64DFR0_MTPMU_NONE_MT_RES0, "MTPMU res0"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64dfr0_tracebuffer[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64DFR0, TraceBuffer, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64dfr0_tracefilt[] = {
MRS_FIELD_VALUE(ID_AA64DFR0_TraceFilt_NONE, ""),
MRS_FIELD_VALUE(ID_AA64DFR0_TraceFilt_8_4, "Trace v8.4"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64dfr0_doublelock[] = {
MRS_FIELD_VALUE(ID_AA64DFR0_DoubleLock_IMPL, "DoubleLock"),
MRS_FIELD_VALUE(ID_AA64DFR0_DoubleLock_NONE, ""),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64dfr0_pmsver[] = {
MRS_FIELD_VALUE(ID_AA64DFR0_PMSVer_NONE, ""),
MRS_FIELD_VALUE(ID_AA64DFR0_PMSVer_SPE, "SPE"),
MRS_FIELD_VALUE(ID_AA64DFR0_PMSVer_SPE_1_1, "SPEv1p1"),
MRS_FIELD_VALUE(ID_AA64DFR0_PMSVer_SPE_1_2, "SPEv1p2"),
MRS_FIELD_VALUE(ID_AA64DFR0_PMSVer_SPE_1_3, "SPEv1p3"),
MRS_FIELD_VALUE(ID_AA64DFR0_PMSVer_SPE_1_4, "SPEv1p4"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64dfr0_ctx_cmps[] = {
MRS_FIELD_VALUE_COUNT(ID_AA64DFR0, CTX_CMPs, "CTX BKPT"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64dfr0_wrps[] = {
MRS_FIELD_VALUE_COUNT(ID_AA64DFR0, WRPs, "Watchpoint"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64dfr0_pmss[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64DFR0, PMSS, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64dfr0_brps[] = {
MRS_FIELD_VALUE_COUNT(ID_AA64DFR0, BRPs, "Breakpoint"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64dfr0_pmuver[] = {
MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_NONE, ""),
MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_3, "PMUv3"),
MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_3_1, "PMUv3p1"),
MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_3_4, "PMUv3p4"),
MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_3_5, "PMUv3p5"),
MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_3_7, "PMUv3p7"),
MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_3_8, "PMUv3p8"),
MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_3_9, "PMUv3p9"),
MRS_FIELD_VALUE(ID_AA64DFR0_PMUVer_IMPL, "IMPL PMU"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64dfr0_tracever[] = {
MRS_FIELD_VALUE(ID_AA64DFR0_TraceVer_NONE, ""),
MRS_FIELD_VALUE(ID_AA64DFR0_TraceVer_IMPL, "Trace"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64dfr0_debugver[] = {
MRS_FIELD_VALUE(ID_AA64DFR0_DebugVer_8, "Debugv8"),
MRS_FIELD_VALUE(ID_AA64DFR0_DebugVer_8_VHE, "Debugv8_VHE"),
MRS_FIELD_VALUE(ID_AA64DFR0_DebugVer_8_2, "Debugv8p2"),
MRS_FIELD_VALUE(ID_AA64DFR0_DebugVer_8_4, "Debugv8p4"),
MRS_FIELD_VALUE(ID_AA64DFR0_DebugVer_8_8, "Debugv8p8"),
MRS_FIELD_VALUE(ID_AA64DFR0_DebugVer_8_9, "Debugv8p9"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field id_aa64dfr0_fields[] = {
MRS_FIELD(ID_AA64DFR0, HPMN0, false, MRS_LOWER, 0, id_aa64dfr0_hpmn0),
MRS_FIELD(ID_AA64DFR0, BRBE, false, MRS_LOWER, 0, id_aa64dfr0_brbe),
MRS_FIELD(ID_AA64DFR0, MTPMU, true, MRS_LOWER, 0, id_aa64dfr0_mtpmu),
MRS_FIELD(ID_AA64DFR0, TraceBuffer, false, MRS_LOWER, 0,
id_aa64dfr0_tracebuffer),
MRS_FIELD(ID_AA64DFR0, TraceFilt, false, MRS_LOWER, 0,
id_aa64dfr0_tracefilt),
MRS_FIELD(ID_AA64DFR0, DoubleLock, true, MRS_LOWER, 0,
id_aa64dfr0_doublelock),
MRS_FIELD(ID_AA64DFR0, PMSVer, false, MRS_LOWER, 0, id_aa64dfr0_pmsver),
MRS_FIELD(ID_AA64DFR0, CTX_CMPs, false, MRS_LOWER, 0,
id_aa64dfr0_ctx_cmps),
MRS_FIELD(ID_AA64DFR0, WRPs, false, MRS_LOWER, MRS_USERSPACE,
id_aa64dfr0_wrps),
MRS_FIELD(ID_AA64DFR0, PMSS, false, MRS_LOWER, 0, id_aa64dfr0_pmss),
MRS_FIELD(ID_AA64DFR0, BRPs, false, MRS_LOWER, MRS_USERSPACE,
id_aa64dfr0_brps),
MRS_FIELD(ID_AA64DFR0, PMUVer, true, MRS_LOWER, 0, id_aa64dfr0_pmuver),
MRS_FIELD(ID_AA64DFR0, TraceVer, false, MRS_LOWER, 0,
id_aa64dfr0_tracever),
MRS_FIELD(ID_AA64DFR0, DebugVer, false, MRS_LOWER | MRS_SAFE(0x6), 0,
id_aa64dfr0_debugver),
MRS_FIELD_END,
};
/* ID_AA64DFR1_EL1 */
static const struct mrs_field_value id_aa64dfr1_dpfzs[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64DFR1, DPFZS, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64dfr1_pmicntr[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64DFR1, PMICNTR, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64dfr1_spmu[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64DFR1, SPMU, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field id_aa64dfr1_fields[] = {
MRS_FIELD(ID_AA64DFR1, DPFZS, false, MRS_LOWER, 0, id_aa64dfr1_dpfzs),
MRS_FIELD(ID_AA64DFR1, PMICNTR, false, MRS_LOWER, 0, id_aa64dfr1_pmicntr),
MRS_FIELD(ID_AA64DFR1, SPMU, false, MRS_LOWER, 0, id_aa64dfr1_spmu),
MRS_FIELD_END,
};
/* ID_AA64ISAR0_EL1 */
static const struct mrs_field_value id_aa64isar0_rndr[] = {
MRS_FIELD_VALUE(ID_AA64ISAR0_RNDR_NONE, ""),
MRS_FIELD_VALUE(ID_AA64ISAR0_RNDR_IMPL, "RNG"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar0_rndr_caps[] = {
MRS_HWCAP(2, HWCAP2_RNG, ID_AA64ISAR0_RNDR_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar0_tlb[] = {
MRS_FIELD_VALUE(ID_AA64ISAR0_TLB_NONE, ""),
MRS_FIELD_VALUE(ID_AA64ISAR0_TLB_TLBIOS, "TLBI-OS"),
MRS_FIELD_VALUE(ID_AA64ISAR0_TLB_TLBIOSR, "TLBI-OSR"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64isar0_ts[] = {
MRS_FIELD_VALUE(ID_AA64ISAR0_TS_NONE, ""),
MRS_FIELD_VALUE(ID_AA64ISAR0_TS_CondM_8_4, "CondM-8.4"),
MRS_FIELD_VALUE(ID_AA64ISAR0_TS_CondM_8_5, "CondM-8.5"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar0_ts_caps[] = {
MRS_HWCAP(1, HWCAP_FLAGM, ID_AA64ISAR0_TS_CondM_8_4),
MRS_HWCAP(2, HWCAP2_FLAGM2, ID_AA64ISAR0_TS_CondM_8_5),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar0_fhm[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, FHM, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar0_fhm_caps[] = {
MRS_HWCAP(1, HWCAP_ASIMDFHM, ID_AA64ISAR0_FHM_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar0_dp[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, DP, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar0_dp_caps[] = {
MRS_HWCAP(1, HWCAP_ASIMDDP, ID_AA64ISAR0_DP_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar0_sm4[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, SM4, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar0_sm4_caps[] = {
MRS_HWCAP(1, HWCAP_SM4, ID_AA64ISAR0_SM4_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar0_sm3[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, SM3, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar0_sm3_caps[] = {
MRS_HWCAP(1, HWCAP_SM3, ID_AA64ISAR0_SM3_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar0_sha3[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, SHA3, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar0_sha3_caps[] = {
MRS_HWCAP(1, HWCAP_SHA3, ID_AA64ISAR0_SHA3_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar0_rdm[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, RDM, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar0_rdm_caps[] = {
MRS_HWCAP(1, HWCAP_ASIMDRDM, ID_AA64ISAR0_RDM_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar0_tme[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, TME, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64isar0_atomic[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, Atomic, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar0_atomic_caps[] = {
MRS_HWCAP(1, HWCAP_ATOMICS, ID_AA64ISAR0_Atomic_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar0_crc32[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, CRC32, NONE, BASE),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar0_crc32_caps[] = {
MRS_HWCAP(1, HWCAP_CRC32, ID_AA64ISAR0_CRC32_BASE),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar0_sha2[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, SHA2, NONE, BASE),
MRS_FIELD_VALUE(ID_AA64ISAR0_SHA2_512, "SHA2+SHA512"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar0_sha2_caps[] = {
MRS_HWCAP(1, HWCAP_SHA2, ID_AA64ISAR0_SHA2_BASE),
MRS_HWCAP(1, HWCAP_SHA512, ID_AA64ISAR0_SHA2_512),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar0_sha1[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, SHA1, NONE, BASE),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar0_sha1_caps[] = {
MRS_HWCAP(1, HWCAP_SHA1, ID_AA64ISAR0_SHA1_BASE),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar0_aes[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR0, AES, NONE, BASE),
MRS_FIELD_VALUE(ID_AA64ISAR0_AES_PMULL, "AES+PMULL"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar0_aes_caps[] = {
MRS_HWCAP(1, HWCAP_AES, ID_AA64ISAR0_AES_BASE),
MRS_HWCAP(1, HWCAP_PMULL, ID_AA64ISAR0_AES_PMULL),
MRS_HWCAP_END
};
static const struct mrs_field id_aa64isar0_fields[] = {
MRS_FIELD_HWCAP(ID_AA64ISAR0, RNDR, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar0_rndr, id_aa64isar0_rndr_caps),
MRS_FIELD(ID_AA64ISAR0, TLB, false, MRS_LOWER, 0, id_aa64isar0_tlb),
MRS_FIELD_HWCAP(ID_AA64ISAR0, TS, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar0_ts, id_aa64isar0_ts_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR0, FHM, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar0_fhm, id_aa64isar0_fhm_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR0, DP, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar0_dp, id_aa64isar0_dp_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR0, SM4, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar0_sm4, id_aa64isar0_sm4_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR0, SM3, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar0_sm3, id_aa64isar0_sm3_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR0, SHA3, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar0_sha3, id_aa64isar0_sha3_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR0, RDM, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar0_rdm, id_aa64isar0_rdm_caps),
MRS_FIELD(ID_AA64ISAR0, TME, false, MRS_LOWER, 0, id_aa64isar0_tme),
MRS_FIELD_HWCAP(ID_AA64ISAR0, Atomic, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar0_atomic, id_aa64isar0_atomic_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR0, CRC32, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar0_crc32, id_aa64isar0_crc32_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR0, SHA2, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar0_sha2, id_aa64isar0_sha2_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR0, SHA1, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar0_sha1, id_aa64isar0_sha1_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR0, AES, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar0_aes, id_aa64isar0_aes_caps),
MRS_FIELD_END,
};
/* ID_AA64ISAR1_EL1 */
static const struct mrs_field_value id_aa64isar1_ls64[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, LS64, NONE, IMPL),
MRS_FIELD_VALUE(ID_AA64ISAR1_LS64_V, "LS64v"),
MRS_FIELD_VALUE(ID_AA64ISAR1_LS64_ACCDATA, "LS64+ACCDATA"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64isar1_xs[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, XS, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64isar1_i8mm[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, I8MM, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar1_i8mm_caps[] = {
MRS_HWCAP(2, HWCAP2_I8MM, ID_AA64ISAR1_I8MM_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar1_dgh[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, DGH, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar1_dgh_caps[] = {
MRS_HWCAP(2, HWCAP2_DGH, ID_AA64ISAR1_DGH_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar1_bf16[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, BF16, NONE, IMPL),
MRS_FIELD_VALUE(ID_AA64ISAR1_BF16_EBF, "EBF16"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar1_bf16_caps[] = {
MRS_HWCAP(2, HWCAP2_BF16, ID_AA64ISAR1_BF16_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar1_specres[] = {
MRS_FIELD_VALUE(ID_AA64ISAR1_SPECRES_NONE, ""),
MRS_FIELD_VALUE(ID_AA64ISAR1_SPECRES_8_5, "PredInv v8.5"),
MRS_FIELD_VALUE(ID_AA64ISAR1_SPECRES_8_9, "PredInv v8.9"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64isar1_sb[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, SB, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar1_sb_caps[] = {
MRS_HWCAP(1, HWCAP_SB, ID_AA64ISAR1_SB_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar1_frintts[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, FRINTTS, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar1_frintts_caps[] = {
MRS_HWCAP(2, HWCAP2_FRINT, ID_AA64ISAR1_FRINTTS_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar1_gpi[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, GPI, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar1_gpi_caps[] = {
MRS_HWCAP(1, HWCAP_PACG, ID_AA64ISAR1_GPI_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar1_gpa[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, GPA, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar1_gpa_caps[] = {
MRS_HWCAP(1, HWCAP_PACG, ID_AA64ISAR1_GPA_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar1_lrcpc[] = {
MRS_FIELD_VALUE(ID_AA64ISAR1_LRCPC_NONE, ""),
MRS_FIELD_VALUE(ID_AA64ISAR1_LRCPC_RCPC_8_3, "RCPC-8.3"),
MRS_FIELD_VALUE(ID_AA64ISAR1_LRCPC_RCPC_8_4, "RCPC-8.4"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar1_lrcpc_caps[] = {
MRS_HWCAP(1, HWCAP_LRCPC, ID_AA64ISAR1_LRCPC_RCPC_8_3),
MRS_HWCAP(1, HWCAP_ILRCPC, ID_AA64ISAR1_LRCPC_RCPC_8_4),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar1_fcma[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, FCMA, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar1_fcma_caps[] = {
MRS_HWCAP(1, HWCAP_FCMA, ID_AA64ISAR1_FCMA_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar1_jscvt[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR1, JSCVT, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar1_jscvt_caps[] = {
MRS_HWCAP(1, HWCAP_JSCVT, ID_AA64ISAR1_JSCVT_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar1_api[] = {
MRS_FIELD_VALUE(ID_AA64ISAR1_API_NONE, ""),
MRS_FIELD_VALUE(ID_AA64ISAR1_API_PAC, "API PAC"),
MRS_FIELD_VALUE(ID_AA64ISAR1_API_EPAC, "API EPAC"),
MRS_FIELD_VALUE(ID_AA64ISAR1_API_EPAC2, "Impl PAuth+EPAC2"),
MRS_FIELD_VALUE(ID_AA64ISAR1_API_FPAC, "Impl PAuth+FPAC"),
MRS_FIELD_VALUE(ID_AA64ISAR1_API_FPAC_COMBINED,
"Impl PAuth+FPAC+Combined"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar1_api_caps[] = {
MRS_HWCAP(1, HWCAP_PACA, ID_AA64ISAR1_API_PAC),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar1_apa[] = {
MRS_FIELD_VALUE(ID_AA64ISAR1_APA_NONE, ""),
MRS_FIELD_VALUE(ID_AA64ISAR1_APA_PAC, "APA PAC"),
MRS_FIELD_VALUE(ID_AA64ISAR1_APA_EPAC, "APA EPAC"),
MRS_FIELD_VALUE(ID_AA64ISAR1_APA_EPAC2, "APA EPAC2"),
MRS_FIELD_VALUE(ID_AA64ISAR1_APA_FPAC, "APA FPAC"),
MRS_FIELD_VALUE(ID_AA64ISAR1_APA_FPAC_COMBINED,
"APA FPAC+Combined"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar1_apa_caps[] = {
MRS_HWCAP(1, HWCAP_PACA, ID_AA64ISAR1_APA_PAC),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar1_dpb[] = {
MRS_FIELD_VALUE(ID_AA64ISAR1_DPB_NONE, ""),
MRS_FIELD_VALUE(ID_AA64ISAR1_DPB_DCCVAP, "DCPoP"),
MRS_FIELD_VALUE(ID_AA64ISAR1_DPB_DCCVADP, "DCCVADP"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar1_dpb_caps[] = {
MRS_HWCAP(1, HWCAP_DCPOP, ID_AA64ISAR1_DPB_DCCVAP),
MRS_HWCAP(2, HWCAP2_DCPODP, ID_AA64ISAR1_DPB_DCCVADP),
MRS_HWCAP_END
};
static const struct mrs_field id_aa64isar1_fields[] = {
MRS_FIELD(ID_AA64ISAR1, LS64, false, MRS_LOWER, 0, id_aa64isar1_ls64),
MRS_FIELD(ID_AA64ISAR1, XS, false, MRS_LOWER, 0, id_aa64isar1_xs),
MRS_FIELD_HWCAP(ID_AA64ISAR1, I8MM, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar1_i8mm, id_aa64isar1_i8mm_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR1, DGH, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar1_dgh, id_aa64isar1_dgh_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR1, BF16, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar1_bf16, id_aa64isar1_bf16_caps),
MRS_FIELD(ID_AA64ISAR1, SPECRES, false, MRS_LOWER, 0,
id_aa64isar1_specres),
MRS_FIELD_HWCAP(ID_AA64ISAR1, SB, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar1_sb, id_aa64isar1_sb_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR1, FRINTTS, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar1_frintts, id_aa64isar1_frintts_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR1, GPI, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar1_gpi, id_aa64isar1_gpi_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR1, GPA, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar1_gpa, id_aa64isar1_gpa_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR1, LRCPC, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar1_lrcpc, id_aa64isar1_lrcpc_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR1, FCMA, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar1_fcma, id_aa64isar1_fcma_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR1, JSCVT, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar1_jscvt, id_aa64isar1_jscvt_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR1, API, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar1_api, id_aa64isar1_api_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR1, APA, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar1_apa, id_aa64isar1_apa_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR1, DPB, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar1_dpb, id_aa64isar1_dpb_caps),
MRS_FIELD_END,
};
/* ID_AA64ISAR2_EL1 */
static const struct mrs_field_value id_aa64isar2_ats1a[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR2, ATS1A, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64isar2_cssc[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR2, CSSC, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64isar2_rprfm[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR2, RPRFM, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64isar2_prfmslc[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR2, PRFMSLC, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64isar2_clrbhb[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR2, CLRBHB, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64isar2_pac_frac[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR2, PAC_frac, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64isar2_bc[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR2, BC, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64isar2_mops[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR2, MOPS, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64isar2_apa3[] = {
MRS_FIELD_VALUE(ID_AA64ISAR2_APA3_NONE, ""),
MRS_FIELD_VALUE(ID_AA64ISAR2_APA3_PAC, "APA3 PAC"),
MRS_FIELD_VALUE(ID_AA64ISAR2_APA3_EPAC, "APA3 EPAC"),
MRS_FIELD_VALUE(ID_AA64ISAR2_APA3_EPAC2, "APA3 EPAC2"),
MRS_FIELD_VALUE(ID_AA64ISAR2_APA3_FPAC, "APA3 FPAC"),
MRS_FIELD_VALUE(ID_AA64ISAR2_APA3_FPAC_COMBINED,
"APA3 FPAC+Combined"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar2_apa3_caps[] = {
MRS_HWCAP(1, HWCAP_PACA, ID_AA64ISAR2_APA3_PAC),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar2_gpa3[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR2, GPA3, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar2_gpa3_caps[] = {
MRS_HWCAP(1, HWCAP_PACG, ID_AA64ISAR2_GPA3_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar2_rpres[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR2, RPRES, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar2_rpres_caps[] = {
MRS_HWCAP(2, HWCAP2_RPRES, ID_AA64ISAR2_RPRES_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64isar2_wfxt[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ISAR2, WFxT, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64isar2_wfxt_caps[] = {
MRS_HWCAP(2, HWCAP2_WFXT, ID_AA64ISAR2_WFxT_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field id_aa64isar2_fields[] = {
MRS_FIELD(ID_AA64ISAR2, ATS1A, false, MRS_LOWER, 0, id_aa64isar2_ats1a),
MRS_FIELD(ID_AA64ISAR2, CSSC, false, MRS_LOWER, 0, id_aa64isar2_cssc),
MRS_FIELD(ID_AA64ISAR2, RPRFM, false, MRS_LOWER, 0, id_aa64isar2_rprfm),
MRS_FIELD(ID_AA64ISAR2, PRFMSLC, false, MRS_LOWER, 0, id_aa64isar2_prfmslc),
MRS_FIELD(ID_AA64ISAR2, CLRBHB, false, MRS_LOWER, 0, id_aa64isar2_clrbhb),
MRS_FIELD(ID_AA64ISAR2, PAC_frac, false, MRS_LOWER, 0,
id_aa64isar2_pac_frac),
MRS_FIELD(ID_AA64ISAR2, BC, false, MRS_LOWER, 0, id_aa64isar2_bc),
MRS_FIELD(ID_AA64ISAR2, MOPS, false, MRS_LOWER, 0, id_aa64isar2_mops),
MRS_FIELD_HWCAP(ID_AA64ISAR2, APA3, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar2_apa3, id_aa64isar2_apa3_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR2, GPA3, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar2_gpa3, id_aa64isar2_gpa3_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR2, RPRES, false, MRS_LOWER, MRS_USERSPACE,
id_aa64isar2_rpres, id_aa64isar2_rpres_caps),
MRS_FIELD_HWCAP(ID_AA64ISAR2, WFxT, false, MRS_LOWER, 0,
id_aa64isar2_wfxt, id_aa64isar2_wfxt_caps),
MRS_FIELD_END,
};
/* ID_AA64MMFR0_EL1 */
static const struct mrs_field_value id_aa64mmfr0_ecv[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, ECV, NONE, IMPL),
MRS_FIELD_VALUE(ID_AA64MMFR0_ECV_POFF, "ECV POFF"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr0_fgt[] = {
MRS_FIELD_VALUE(ID_AA64MMFR0_FGT_NONE, ""),
MRS_FIELD_VALUE(ID_AA64MMFR0_FGT_8_6, "FGT v8.6"),
MRS_FIELD_VALUE(ID_AA64MMFR0_FGT_8_9, "FGT v8.9"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr0_exs[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, ExS, ALL, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr0_tgran4_2[] = {
MRS_FIELD_VALUE(ID_AA64MMFR0_TGran4_2_TGran4, ""),
MRS_FIELD_VALUE(ID_AA64MMFR0_TGran4_2_NONE, "No S2 TGran4"),
MRS_FIELD_VALUE(ID_AA64MMFR0_TGran4_2_IMPL, "S2 TGran4"),
MRS_FIELD_VALUE(ID_AA64MMFR0_TGran4_2_LPA2, "S2 TGran4+LPA2"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr0_tgran64_2[] = {
MRS_FIELD_VALUE(ID_AA64MMFR0_TGran64_2_TGran64, ""),
MRS_FIELD_VALUE(ID_AA64MMFR0_TGran64_2_NONE, "No S2 TGran64"),
MRS_FIELD_VALUE(ID_AA64MMFR0_TGran64_2_IMPL, "S2 TGran64"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr0_tgran16_2[] = {
MRS_FIELD_VALUE(ID_AA64MMFR0_TGran16_2_TGran16, ""),
MRS_FIELD_VALUE(ID_AA64MMFR0_TGran16_2_NONE, "No S2 TGran16"),
MRS_FIELD_VALUE(ID_AA64MMFR0_TGran16_2_IMPL, "S2 TGran16"),
MRS_FIELD_VALUE(ID_AA64MMFR0_TGran16_2_LPA2, "S2 TGran16+LPA2"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr0_tgran4[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, TGran4, NONE, IMPL),
MRS_FIELD_VALUE(ID_AA64MMFR0_TGran4_LPA2, "TGran4+LPA2"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr0_tgran64[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, TGran64, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr0_tgran16[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, TGran16, NONE, IMPL),
MRS_FIELD_VALUE(ID_AA64MMFR0_TGran16_LPA2, "TGran16+LPA2"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr0_bigendel0[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, BigEndEL0, FIXED, MIXED),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr0_snsmem[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, SNSMem, NONE, DISTINCT),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr0_bigend[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR0, BigEnd, FIXED, MIXED),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr0_asidbits[] = {
MRS_FIELD_VALUE(ID_AA64MMFR0_ASIDBits_8, "8bit ASID"),
MRS_FIELD_VALUE(ID_AA64MMFR0_ASIDBits_16, "16bit ASID"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr0_parange[] = {
MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_4G, "4GB PA"),
MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_64G, "64GB PA"),
MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_1T, "1TB PA"),
MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_4T, "4TB PA"),
MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_16T, "16TB PA"),
MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_256T, "256TB PA"),
MRS_FIELD_VALUE(ID_AA64MMFR0_PARange_4P, "4PB PA"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field id_aa64mmfr0_fields[] = {
MRS_FIELD(ID_AA64MMFR0, ECV, false, MRS_LOWER, 0, id_aa64mmfr0_ecv),
MRS_FIELD(ID_AA64MMFR0, FGT, false, MRS_LOWER, 0, id_aa64mmfr0_fgt),
MRS_FIELD(ID_AA64MMFR0, ExS, false, MRS_LOWER, 0, id_aa64mmfr0_exs),
MRS_FIELD(ID_AA64MMFR0, TGran4_2, false, MRS_LOWER, 0,
id_aa64mmfr0_tgran4_2),
MRS_FIELD(ID_AA64MMFR0, TGran64_2, false, MRS_LOWER, 0,
id_aa64mmfr0_tgran64_2),
MRS_FIELD(ID_AA64MMFR0, TGran16_2, false, MRS_LOWER, 0,
id_aa64mmfr0_tgran16_2),
MRS_FIELD(ID_AA64MMFR0, TGran4, true, MRS_LOWER, 0,
id_aa64mmfr0_tgran4),
MRS_FIELD(ID_AA64MMFR0, TGran64, true, MRS_LOWER, 0,
id_aa64mmfr0_tgran64),
MRS_FIELD(ID_AA64MMFR0, TGran16, false, MRS_LOWER, 0,
id_aa64mmfr0_tgran16),
MRS_FIELD(ID_AA64MMFR0, BigEndEL0, false, MRS_LOWER, 0,
id_aa64mmfr0_bigendel0),
MRS_FIELD(ID_AA64MMFR0, SNSMem, false, MRS_LOWER, 0,
id_aa64mmfr0_snsmem),
MRS_FIELD(ID_AA64MMFR0, BigEnd, false, MRS_LOWER, 0,
id_aa64mmfr0_bigend),
MRS_FIELD(ID_AA64MMFR0, ASIDBits, false, MRS_LOWER, 0,
id_aa64mmfr0_asidbits),
MRS_FIELD(ID_AA64MMFR0, PARange, false, MRS_LOWER, 0,
id_aa64mmfr0_parange),
MRS_FIELD_END,
};
/* ID_AA64MMFR1_EL1 */
static const struct mrs_field_value id_aa64mmfr1_ecbhb[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, ECBHB, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr1_cmovw[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, CMOVW, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr1_tidcp1[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, TIDCP1, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr1_ntlbpa[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, nTLBPA, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr1_afp[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, AFP, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64mmfr1_afp_caps[] = {
MRS_HWCAP(2, HWCAP2_AFP, ID_AA64MMFR1_AFP_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64mmfr1_hcx[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, HCX, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr1_ets[] = {
MRS_FIELD_VALUE(ID_AA64MMFR1_ETS_NONE, ""),
MRS_FIELD_VALUE(ID_AA64MMFR1_ETS_NONE2, ""),
MRS_FIELD_VALUE(ID_AA64MMFR1_ETS_IMPL, "ETS2"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr1_twed[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, TWED, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr1_xnx[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, XNX, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr1_specsei[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, SpecSEI, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr1_pan[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, PAN, NONE, IMPL),
MRS_FIELD_VALUE(ID_AA64MMFR1_PAN_ATS1E1, "PAN+ATS1E1"),
MRS_FIELD_VALUE(ID_AA64MMFR1_PAN_EPAN, "EPAN"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr1_lo[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, LO, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr1_hpds[] = {
MRS_FIELD_VALUE(ID_AA64MMFR1_HPDS_NONE, ""),
MRS_FIELD_VALUE(ID_AA64MMFR1_HPDS_HPD, "HPD"),
MRS_FIELD_VALUE(ID_AA64MMFR1_HPDS_TTPBHA, "HPD+TTPBHA"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr1_vh[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR1, VH, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr1_vmidbits[] = {
MRS_FIELD_VALUE(ID_AA64MMFR1_VMIDBits_8, "8bit VMID"),
MRS_FIELD_VALUE(ID_AA64MMFR1_VMIDBits_16, "16bit VMID"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr1_hafdbs[] = {
MRS_FIELD_VALUE(ID_AA64MMFR1_HAFDBS_NONE, ""),
MRS_FIELD_VALUE(ID_AA64MMFR1_HAFDBS_AF, "HAF"),
MRS_FIELD_VALUE(ID_AA64MMFR1_HAFDBS_AF_DBS, "HAF+DS"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field id_aa64mmfr1_fields[] = {
MRS_FIELD(ID_AA64MMFR1, ECBHB, false, MRS_LOWER, 0, id_aa64mmfr1_ecbhb),
MRS_FIELD(ID_AA64MMFR1, CMOVW, false, MRS_LOWER, 0, id_aa64mmfr1_cmovw),
MRS_FIELD(ID_AA64MMFR1, TIDCP1, false, MRS_LOWER, 0,
id_aa64mmfr1_tidcp1),
MRS_FIELD(ID_AA64MMFR1, nTLBPA, false, MRS_LOWER, 0,
id_aa64mmfr1_ntlbpa),
MRS_FIELD_HWCAP(ID_AA64MMFR1, AFP, false, MRS_LOWER, 0,
id_aa64mmfr1_afp, id_aa64mmfr1_afp_caps),
MRS_FIELD(ID_AA64MMFR1, HCX, false, MRS_LOWER, 0, id_aa64mmfr1_hcx),
MRS_FIELD(ID_AA64MMFR1, ETS, false, MRS_LOWER, 0, id_aa64mmfr1_ets),
MRS_FIELD(ID_AA64MMFR1, TWED, false, MRS_LOWER, 0, id_aa64mmfr1_twed),
MRS_FIELD(ID_AA64MMFR1, XNX, false, MRS_LOWER, 0, id_aa64mmfr1_xnx),
/*
* SpecSEI != 0 indicates the CPU might generate an external abort
* under speculation, while 0 indicates it can't happen. It's safer
* to incorrectly indicate it might happen when it can't rather than
* say it can't happen when it could. As such use the largest value
* found in the system.
*/
MRS_FIELD(ID_AA64MMFR1, SpecSEI, false, MRS_HIGHER, 0,
id_aa64mmfr1_specsei),
MRS_FIELD(ID_AA64MMFR1, PAN, false, MRS_LOWER, 0, id_aa64mmfr1_pan),
MRS_FIELD(ID_AA64MMFR1, LO, false, MRS_LOWER, 0, id_aa64mmfr1_lo),
MRS_FIELD(ID_AA64MMFR1, HPDS, false, MRS_LOWER, 0, id_aa64mmfr1_hpds),
MRS_FIELD(ID_AA64MMFR1, VH, false, MRS_LOWER, 0, id_aa64mmfr1_vh),
MRS_FIELD(ID_AA64MMFR1, VMIDBits, false, MRS_LOWER, 0,
id_aa64mmfr1_vmidbits),
MRS_FIELD(ID_AA64MMFR1, HAFDBS, false, MRS_LOWER, 0, id_aa64mmfr1_hafdbs),
MRS_FIELD_END,
};
/* ID_AA64MMFR2_EL1 */
static const struct mrs_field_value id_aa64mmfr2_e0pd[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, E0PD, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr2_evt[] = {
MRS_FIELD_VALUE(ID_AA64MMFR2_EVT_NONE, ""),
MRS_FIELD_VALUE(ID_AA64MMFR2_EVT_8_2, "EVT-8.2"),
MRS_FIELD_VALUE(ID_AA64MMFR2_EVT_8_5, "EVT-8.5"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr2_bbm[] = {
MRS_FIELD_VALUE(ID_AA64MMFR2_BBM_LEVEL0, ""),
MRS_FIELD_VALUE(ID_AA64MMFR2_BBM_LEVEL1, "BBM level 1"),
MRS_FIELD_VALUE(ID_AA64MMFR2_BBM_LEVEL2, "BBM level 2"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr2_ttl[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, TTL, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr2_fwb[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, FWB, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr2_ids[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, IDS, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr2_at[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, AT, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64mmfr2_at_caps[] = {
MRS_HWCAP(1, HWCAP_USCAT, ID_AA64MMFR2_AT_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64mmfr2_st[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, ST, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr2_nv[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, NV, NONE, 8_3),
MRS_FIELD_VALUE(ID_AA64MMFR2_NV_8_4, "NV v8.4"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr2_ccidx[] = {
MRS_FIELD_VALUE(ID_AA64MMFR2_CCIDX_32, "32bit CCIDX"),
MRS_FIELD_VALUE(ID_AA64MMFR2_CCIDX_64, "64bit CCIDX"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr2_varange[] = {
MRS_FIELD_VALUE(ID_AA64MMFR2_VARange_48, "48bit VA"),
MRS_FIELD_VALUE(ID_AA64MMFR2_VARange_52, "52bit VA"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr2_iesb[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, IESB, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr2_lsm[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, LSM, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr2_uao[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, UAO, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr2_cnp[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR2, CnP, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field id_aa64mmfr2_fields[] = {
MRS_FIELD(ID_AA64MMFR2, E0PD, false, MRS_LOWER, 0, id_aa64mmfr2_e0pd),
MRS_FIELD(ID_AA64MMFR2, EVT, false, MRS_LOWER, 0, id_aa64mmfr2_evt),
MRS_FIELD(ID_AA64MMFR2, BBM, false, MRS_LOWER, 0, id_aa64mmfr2_bbm),
MRS_FIELD(ID_AA64MMFR2, TTL, false, MRS_LOWER, 0, id_aa64mmfr2_ttl),
MRS_FIELD(ID_AA64MMFR2, FWB, false, MRS_LOWER, 0, id_aa64mmfr2_fwb),
MRS_FIELD(ID_AA64MMFR2, IDS, false, MRS_LOWER, 0, id_aa64mmfr2_ids),
MRS_FIELD_HWCAP(ID_AA64MMFR2, AT, false, MRS_LOWER, MRS_USERSPACE,
id_aa64mmfr2_at, id_aa64mmfr2_at_caps),
MRS_FIELD(ID_AA64MMFR2, ST, false, MRS_LOWER, 0, id_aa64mmfr2_st),
MRS_FIELD(ID_AA64MMFR2, NV, false, MRS_LOWER, 0, id_aa64mmfr2_nv),
MRS_FIELD(ID_AA64MMFR2, CCIDX, false, MRS_LOWER, 0, id_aa64mmfr2_ccidx),
MRS_FIELD(ID_AA64MMFR2, VARange, false, MRS_LOWER, 0,
id_aa64mmfr2_varange),
MRS_FIELD(ID_AA64MMFR2, IESB, false, MRS_LOWER, 0, id_aa64mmfr2_iesb),
MRS_FIELD(ID_AA64MMFR2, LSM, false, MRS_LOWER, 0, id_aa64mmfr2_lsm),
MRS_FIELD(ID_AA64MMFR2, UAO, false, MRS_LOWER, 0, id_aa64mmfr2_uao),
MRS_FIELD(ID_AA64MMFR2, CnP, false, MRS_LOWER, 0, id_aa64mmfr2_cnp),
MRS_FIELD_END,
};
/* ID_AA64MMFR2_EL1 */
static const struct mrs_field_value id_aa64mmfr3_spec_fpacc[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR3, Spec_FPACC, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr3_aderr[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR3, ADERR, NONE, SOME),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr3_sderr[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR3, SDERR, NONE, ALL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr3_anerr[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR3, ANERR, NONE, SOME),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr3_snerr[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR3, SNERR, NONE, ALL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr3_mec[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR3, MEC, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr3_aie[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR3, AIE, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr3_s2poe[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR3, S2POE, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr3_s1poe[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR3, S1POE, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr3_s2pie[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR3, S2PIE, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr3_s1pie[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR3, S1PIE, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr3_sctlrx[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR3, SCTLRX, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64mmfr3_tcrx[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64MMFR3, TCRX, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field id_aa64mmfr3_fields[] = {
MRS_FIELD(ID_AA64MMFR3, Spec_FPACC, false, MRS_LOWER, 0,
id_aa64mmfr3_spec_fpacc),
MRS_FIELD(ID_AA64MMFR3, ADERR, false, MRS_LOWER, 0, id_aa64mmfr3_aderr),
MRS_FIELD(ID_AA64MMFR3, SDERR, false, MRS_LOWER, 0, id_aa64mmfr3_sderr),
MRS_FIELD(ID_AA64MMFR3, ANERR, false, MRS_LOWER, 0, id_aa64mmfr3_anerr),
MRS_FIELD(ID_AA64MMFR3, SNERR, false, MRS_LOWER, 0, id_aa64mmfr3_snerr),
MRS_FIELD(ID_AA64MMFR3, MEC, false, MRS_LOWER, 0, id_aa64mmfr3_mec),
MRS_FIELD(ID_AA64MMFR3, AIE, false, MRS_LOWER, 0, id_aa64mmfr3_aie),
MRS_FIELD(ID_AA64MMFR3, S2POE, false, MRS_LOWER, 0, id_aa64mmfr3_s2poe),
MRS_FIELD(ID_AA64MMFR3, S1POE, false, MRS_LOWER, 0, id_aa64mmfr3_s1poe),
MRS_FIELD(ID_AA64MMFR3, S2PIE, false, MRS_LOWER, 0, id_aa64mmfr3_s2pie),
MRS_FIELD(ID_AA64MMFR3, S1PIE, false, MRS_LOWER, 0, id_aa64mmfr3_s1pie),
MRS_FIELD(ID_AA64MMFR3, SCTLRX, false, MRS_LOWER, 0,
id_aa64mmfr3_sctlrx),
MRS_FIELD(ID_AA64MMFR3, TCRX, false, MRS_LOWER, 0, id_aa64mmfr3_tcrx),
MRS_FIELD_END,
};
/* ID_AA64MMFR4_EL1 */
static const struct mrs_field id_aa64mmfr4_fields[] = {
MRS_FIELD_END,
};
/* ID_AA64PFR0_EL1 */
static const struct mrs_field_value id_aa64pfr0_csv3[] = {
MRS_FIELD_VALUE(ID_AA64PFR0_CSV3_NONE, ""),
MRS_FIELD_VALUE(ID_AA64PFR0_CSV3_ISOLATED, "CSV3"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr0_csv2[] = {
MRS_FIELD_VALUE(ID_AA64PFR0_CSV2_NONE, ""),
MRS_FIELD_VALUE(ID_AA64PFR0_CSV2_ISOLATED, "CSV2"),
MRS_FIELD_VALUE(ID_AA64PFR0_CSV2_SCXTNUM, "CSV2_2"),
MRS_FIELD_VALUE(ID_AA64PFR0_CSV2_3, "CSV2_3"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr0_rme[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, RME, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr0_dit[] = {
MRS_FIELD_VALUE(ID_AA64PFR0_DIT_NONE, ""),
MRS_FIELD_VALUE(ID_AA64PFR0_DIT_PSTATE, "PSTATE.DIT"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64pfr0_dit_caps[] = {
MRS_HWCAP(1, HWCAP_DIT, ID_AA64PFR0_DIT_PSTATE),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64pfr0_amu[] = {
MRS_FIELD_VALUE(ID_AA64PFR0_AMU_NONE, ""),
MRS_FIELD_VALUE(ID_AA64PFR0_AMU_V1, "AMUv1"),
MRS_FIELD_VALUE(ID_AA64PFR0_AMU_V1_1, "AMUv1p1"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr0_mpam[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, MPAM, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr0_sel2[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, SEL2, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr0_sve[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, SVE, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64pfr0_sve_caps[] = {
MRS_HWCAP(1, HWCAP_SVE, ID_AA64PFR0_SVE_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64pfr0_ras[] = {
MRS_FIELD_VALUE(ID_AA64PFR0_RAS_NONE, ""),
MRS_FIELD_VALUE(ID_AA64PFR0_RAS_IMPL, "RAS"),
MRS_FIELD_VALUE(ID_AA64PFR0_RAS_8_4, "RAS v8.4"),
MRS_FIELD_VALUE(ID_AA64PFR0_RAS_8_9, "RAS v8.9"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr0_gic[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, GIC, CPUIF_NONE, CPUIF_EN),
MRS_FIELD_VALUE(ID_AA64PFR0_GIC_CPUIF_NONE, ""),
MRS_FIELD_VALUE(ID_AA64PFR0_GIC_CPUIF_EN, "GIC"),
MRS_FIELD_VALUE(ID_AA64PFR0_GIC_CPUIF_4_1, "GIC 4.1"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr0_advsimd[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, AdvSIMD, NONE, IMPL),
MRS_FIELD_VALUE(ID_AA64PFR0_AdvSIMD_HP, "AdvSIMD+HP"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64pfr0_advsimd_caps[] = {
MRS_HWCAP(1, HWCAP_ASIMD, ID_AA64PFR0_AdvSIMD_IMPL),
MRS_HWCAP(1, HWCAP_ASIMDHP, ID_AA64PFR0_AdvSIMD_HP),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64pfr0_fp[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, FP, NONE, IMPL),
MRS_FIELD_VALUE(ID_AA64PFR0_FP_HP, "FP+HP"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64pfr0_fp_caps[] = {
MRS_HWCAP(1, HWCAP_FP, ID_AA64PFR0_FP_IMPL),
MRS_HWCAP(1, HWCAP_FPHP, ID_AA64PFR0_FP_HP),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64pfr0_el3[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, EL3, NONE, 64),
MRS_FIELD_VALUE(ID_AA64PFR0_EL3_64_32, "EL3 32"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr0_el2[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR0, EL2, NONE, 64),
MRS_FIELD_VALUE(ID_AA64PFR0_EL2_64_32, "EL2 32"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr0_el1[] = {
MRS_FIELD_VALUE(ID_AA64PFR0_EL1_64, "EL1"),
MRS_FIELD_VALUE(ID_AA64PFR0_EL1_64_32, "EL1 32"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr0_el0[] = {
MRS_FIELD_VALUE(ID_AA64PFR0_EL0_64, "EL0"),
MRS_FIELD_VALUE(ID_AA64PFR0_EL0_64_32, "EL0 32"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field id_aa64pfr0_fields[] = {
MRS_FIELD(ID_AA64PFR0, CSV3, false, MRS_LOWER, 0, id_aa64pfr0_csv3),
MRS_FIELD(ID_AA64PFR0, CSV2, false, MRS_LOWER, 0, id_aa64pfr0_csv2),
MRS_FIELD(ID_AA64PFR0, RME, false, MRS_LOWER, 0, id_aa64pfr0_rme),
MRS_FIELD_HWCAP(ID_AA64PFR0, DIT, false, MRS_LOWER, MRS_USERSPACE,
id_aa64pfr0_dit, id_aa64pfr0_dit_caps),
MRS_FIELD(ID_AA64PFR0, AMU, false, MRS_LOWER, 0, id_aa64pfr0_amu),
MRS_FIELD(ID_AA64PFR0, MPAM, false, MRS_LOWER, 0, id_aa64pfr0_mpam),
MRS_FIELD(ID_AA64PFR0, SEL2, false, MRS_LOWER, 0, id_aa64pfr0_sel2),
MRS_FIELD_HWCAP(ID_AA64PFR0, SVE, false, MRS_LOWER,
MRS_FREEBSD, id_aa64pfr0_sve, id_aa64pfr0_sve_caps),
MRS_FIELD(ID_AA64PFR0, RAS, false, MRS_LOWER, 0, id_aa64pfr0_ras),
MRS_FIELD(ID_AA64PFR0, GIC, false, MRS_LOWER, 0, id_aa64pfr0_gic),
MRS_FIELD_HWCAP(ID_AA64PFR0, AdvSIMD, true, MRS_LOWER, MRS_USERSPACE,
id_aa64pfr0_advsimd, id_aa64pfr0_advsimd_caps),
MRS_FIELD_HWCAP(ID_AA64PFR0, FP, true, MRS_LOWER, MRS_USERSPACE,
id_aa64pfr0_fp, id_aa64pfr0_fp_caps),
MRS_FIELD(ID_AA64PFR0, EL3, false, MRS_LOWER, 0, id_aa64pfr0_el3),
MRS_FIELD(ID_AA64PFR0, EL2, false, MRS_LOWER, 0, id_aa64pfr0_el2),
MRS_FIELD(ID_AA64PFR0, EL1, false, MRS_LOWER, MRS_USERSPACE,
id_aa64pfr0_el1),
MRS_FIELD(ID_AA64PFR0, EL0, false, MRS_LOWER, MRS_USERSPACE,
id_aa64pfr0_el0),
MRS_FIELD_END,
};
/* ID_AA64PFR1_EL1 */
static const struct mrs_field_value id_aa64pfr1_pfar[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR1, PFAR, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr1_df2[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR1, DF2, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr1_mtex[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR1, MTEX, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr1_the[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR1, THE, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr1_mtefrac[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR1, MTE_frac, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr1_nmi[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR1, NMI, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr1_csv2_frac[] = {
MRS_FIELD_VALUE(ID_AA64PFR1_CSV2_frac_p0, ""),
MRS_FIELD_VALUE(ID_AA64PFR1_CSV2_frac_p1, "CSV2 p1"),
MRS_FIELD_VALUE(ID_AA64PFR1_CSV2_frac_p2, "CSV2 p2"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr1_rndr_trap[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64PFR1, RNDR_trap, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr1_sme[] = {
MRS_FIELD_VALUE(ID_AA64PFR1_SME_NONE, ""),
MRS_FIELD_VALUE(ID_AA64PFR1_SME_SME, "SME"),
MRS_FIELD_VALUE(ID_AA64PFR1_SME_SME2, "SME2"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr1_mpam_frac[] = {
MRS_FIELD_VALUE(ID_AA64PFR1_MPAM_frac_p0, ""),
MRS_FIELD_VALUE(ID_AA64PFR1_MPAM_frac_p1, "MPAM p1"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr1_ras_frac[] = {
MRS_FIELD_VALUE(ID_AA64PFR1_RAS_frac_p0, ""),
MRS_FIELD_VALUE(ID_AA64PFR1_RAS_frac_p1, "RAS p1"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr1_mte[] = {
MRS_FIELD_VALUE(ID_AA64PFR1_MTE_NONE, ""),
MRS_FIELD_VALUE(ID_AA64PFR1_MTE_MTE, "MTE"),
MRS_FIELD_VALUE(ID_AA64PFR1_MTE_MTE2, "MTE2"),
MRS_FIELD_VALUE(ID_AA64PFR1_MTE_MTE3, "MTE3"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_aa64pfr1_ssbs[] = {
MRS_FIELD_VALUE(ID_AA64PFR1_SSBS_NONE, ""),
MRS_FIELD_VALUE(ID_AA64PFR1_SSBS_PSTATE, "PSTATE.SSBS"),
MRS_FIELD_VALUE(ID_AA64PFR1_SSBS_PSTATE_MSR, "PSTATE.SSBS MSR"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64pfr1_ssbs_caps[] = {
MRS_HWCAP(1, HWCAP_SSBS, ID_AA64PFR1_SSBS_PSTATE),
MRS_HWCAP_END
};
static const struct mrs_field_value id_aa64pfr1_bt[] = {
MRS_FIELD_VALUE(ID_AA64PFR1_BT_NONE, ""),
MRS_FIELD_VALUE(ID_AA64PFR1_BT_IMPL, "BTI"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64pfr1_bt_caps[] = {
MRS_HWCAP(2, HWCAP2_BTI, ID_AA64PFR1_BT_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field id_aa64pfr1_fields[] = {
MRS_FIELD(ID_AA64PFR1, PFAR, false, MRS_LOWER, 0, id_aa64pfr1_pfar),
MRS_FIELD(ID_AA64PFR1, DF2, false, MRS_LOWER, 0, id_aa64pfr1_df2),
MRS_FIELD(ID_AA64PFR1, MTEX, false, MRS_LOWER, 0, id_aa64pfr1_mtex),
MRS_FIELD(ID_AA64PFR1, THE, false, MRS_LOWER, 0, id_aa64pfr1_the),
MRS_FIELD(ID_AA64PFR1, MTE_frac, true, MRS_LOWER, 0,
id_aa64pfr1_mtefrac),
MRS_FIELD(ID_AA64PFR1, NMI, false, MRS_LOWER, 0, id_aa64pfr1_nmi),
MRS_FIELD(ID_AA64PFR1, CSV2_frac, false, MRS_LOWER, 0,
id_aa64pfr1_csv2_frac),
MRS_FIELD(ID_AA64PFR1, RNDR_trap, false, MRS_LOWER, 0,
id_aa64pfr1_rndr_trap),
MRS_FIELD(ID_AA64PFR1, SME, false, MRS_LOWER, 0, id_aa64pfr1_sme),
MRS_FIELD(ID_AA64PFR1, MPAM_frac, false, MRS_LOWER, 0,
id_aa64pfr1_mpam_frac),
MRS_FIELD(ID_AA64PFR1, RAS_frac, false, MRS_LOWER, 0,
id_aa64pfr1_ras_frac),
MRS_FIELD(ID_AA64PFR1, MTE, false, MRS_LOWER, 0, id_aa64pfr1_mte),
MRS_FIELD_HWCAP(ID_AA64PFR1, SSBS, false, MRS_LOWER, MRS_USERSPACE,
id_aa64pfr1_ssbs, id_aa64pfr1_ssbs_caps),
MRS_FIELD_HWCAP(ID_AA64PFR1, BT, false, MRS_LOWER,
MRS_FREEBSD, id_aa64pfr1_bt, id_aa64pfr1_bt_caps),
MRS_FIELD_END,
};
/* ID_AA64PFR2_EL1 */
static const struct mrs_field id_aa64pfr2_fields[] = {
MRS_FIELD_END,
};
/* ID_AA64ZFR0_EL1 */
static const struct mrs_field_value id_aa64zfr0_f64mm[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ZFR0, F64MM, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64zfr0_f64mm_caps[] = {
MRS_HWCAP(2, HWCAP2_SVEF64MM, ID_AA64ZFR0_F64MM_IMPL),
MRS_HWCAP_END,
};
static const struct mrs_field_value id_aa64zfr0_f32mm[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ZFR0, F32MM, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64zfr0_f32mm_caps[] = {
MRS_HWCAP(2, HWCAP2_SVEF32MM, ID_AA64ZFR0_F32MM_IMPL),
MRS_HWCAP_END,
};
static const struct mrs_field_value id_aa64zfr0_i8mm[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ZFR0, I8MM, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64zfr0_i8mm_caps[] = {
MRS_HWCAP(2, HWCAP2_SVEI8MM, ID_AA64ZFR0_I8MM_IMPL),
MRS_HWCAP_END,
};
static const struct mrs_field_value id_aa64zfr0_sm4[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ZFR0, SM4, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64zfr0_sm4_caps[] = {
MRS_HWCAP(2, HWCAP2_SVESM4, ID_AA64ZFR0_SM4_IMPL),
MRS_HWCAP_END,
};
static const struct mrs_field_value id_aa64zfr0_sha3[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ZFR0, SHA3, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64zfr0_sha3_caps[] = {
MRS_HWCAP(2, HWCAP2_SVESHA3, ID_AA64ZFR0_SHA3_IMPL),
MRS_HWCAP_END,
};
static const struct mrs_field_value id_aa64zfr0_bf16[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ZFR0, BF16, NONE, BASE),
MRS_FIELD_VALUE(ID_AA64ZFR0_BF16_EBF, "BF16+EBF"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64zfr0_bf16_caps[] = {
MRS_HWCAP(2, HWCAP2_SVEBF16, ID_AA64ZFR0_BF16_BASE),
MRS_HWCAP(2, HWCAP2_SVE_EBF16, ID_AA64ZFR0_BF16_EBF),
MRS_HWCAP_END,
};
static const struct mrs_field_value id_aa64zfr0_bitperm[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ZFR0, BitPerm, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64zfr0_bitperm_caps[] = {
MRS_HWCAP(2, HWCAP2_SVEBITPERM, ID_AA64ZFR0_BitPerm_IMPL),
MRS_HWCAP_END,
};
static const struct mrs_field_value id_aa64zfr0_aes[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_AA64ZFR0, AES, NONE, BASE),
MRS_FIELD_VALUE(ID_AA64ZFR0_AES_PMULL, "AES+PMULL"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64zfr0_aes_caps[] = {
MRS_HWCAP(2, HWCAP2_SVEAES, ID_AA64ZFR0_AES_BASE),
MRS_HWCAP(2, HWCAP2_SVEPMULL, ID_AA64ZFR0_AES_PMULL),
MRS_HWCAP_END,
};
static const struct mrs_field_value id_aa64zfr0_svever[] = {
MRS_FIELD_VALUE(ID_AA64ZFR0_SVEver_SVE1, "SVE1"),
MRS_FIELD_VALUE(ID_AA64ZFR0_SVEver_SVE2, "SVE2"),
MRS_FIELD_VALUE(ID_AA64ZFR0_SVEver_SVE2P1, "SVE2P1"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_aa64zfr0_svever_caps[] = {
MRS_HWCAP(2, HWCAP2_SVE2, ID_AA64ZFR0_SVEver_SVE2),
MRS_HWCAP(2, HWCAP2_SVE2P1, ID_AA64ZFR0_SVEver_SVE2P1),
MRS_HWCAP_END,
};
static const struct mrs_field id_aa64zfr0_fields[] = {
MRS_FIELD_HWCAP(ID_AA64ZFR0, F64MM, false, MRS_LOWER, MRS_USERSPACE,
id_aa64zfr0_f64mm, id_aa64zfr0_f64mm_caps),
MRS_FIELD_HWCAP(ID_AA64ZFR0, F32MM, false, MRS_LOWER, MRS_USERSPACE,
id_aa64zfr0_f32mm, id_aa64zfr0_f32mm_caps),
MRS_FIELD_HWCAP(ID_AA64ZFR0, I8MM, false, MRS_LOWER, MRS_USERSPACE,
id_aa64zfr0_i8mm, id_aa64zfr0_i8mm_caps),
MRS_FIELD_HWCAP(ID_AA64ZFR0, SM4, false, MRS_LOWER, MRS_USERSPACE,
id_aa64zfr0_sm4, id_aa64zfr0_sm4_caps),
MRS_FIELD_HWCAP(ID_AA64ZFR0, SHA3, false, MRS_LOWER, MRS_USERSPACE,
id_aa64zfr0_sha3, id_aa64zfr0_sha3_caps),
MRS_FIELD_HWCAP(ID_AA64ZFR0, BF16, false, MRS_LOWER, MRS_USERSPACE,
id_aa64zfr0_bf16, id_aa64zfr0_bf16_caps),
MRS_FIELD_HWCAP(ID_AA64ZFR0, BitPerm, false, MRS_LOWER, MRS_USERSPACE,
id_aa64zfr0_bitperm, id_aa64zfr0_bitperm_caps),
MRS_FIELD_HWCAP(ID_AA64ZFR0, AES, false, MRS_LOWER, MRS_USERSPACE,
id_aa64zfr0_aes, id_aa64zfr0_aes_caps),
MRS_FIELD_HWCAP(ID_AA64ZFR0, SVEver, false, MRS_LOWER, MRS_USERSPACE,
id_aa64zfr0_svever, id_aa64zfr0_svever_caps),
MRS_FIELD_END,
};
#ifdef COMPAT_FREEBSD32
/* ID_ISAR5_EL1 */
static const struct mrs_field_value id_isar5_vcma[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_ISAR5, VCMA, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_isar5_rdm[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_ISAR5, RDM, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value id_isar5_crc32[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_ISAR5, CRC32, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_isar5_crc32_caps[] = {
MRS_HWCAP(2, HWCAP32_2_CRC32, ID_ISAR5_CRC32_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_isar5_sha2[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_ISAR5, SHA2, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_isar5_sha2_caps[] = {
MRS_HWCAP(2, HWCAP32_2_SHA2, ID_ISAR5_SHA2_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_isar5_sha1[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_ISAR5, SHA1, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_isar5_sha1_caps[] = {
MRS_HWCAP(2, HWCAP32_2_SHA1, ID_ISAR5_SHA1_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_isar5_aes[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_ISAR5, AES, NONE, BASE),
MRS_FIELD_VALUE(ID_ISAR5_AES_VMULL, "AES+VMULL"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap id_isar5_aes_caps[] = {
MRS_HWCAP(2, HWCAP32_2_AES, ID_ISAR5_AES_BASE),
MRS_HWCAP(2, HWCAP32_2_PMULL, ID_ISAR5_AES_VMULL),
MRS_HWCAP_END
};
static const struct mrs_field_value id_isar5_sevl[] = {
MRS_FIELD_VALUE_NONE_IMPL(ID_ISAR5, SEVL, NOP, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field id_isar5_fields[] = {
MRS_FIELD(ID_ISAR5, VCMA, false, MRS_LOWER,MRS_USERSPACE,
id_isar5_vcma),
MRS_FIELD(ID_ISAR5, RDM, false, MRS_LOWER, MRS_USERSPACE, id_isar5_rdm),
MRS_FIELD_HWCAP(ID_ISAR5, CRC32, false, MRS_LOWER, MRS_USERSPACE,
id_isar5_crc32, id_isar5_crc32_caps),
MRS_FIELD_HWCAP(ID_ISAR5, SHA2, false, MRS_LOWER, MRS_USERSPACE,
id_isar5_sha2, id_isar5_sha2_caps),
MRS_FIELD_HWCAP(ID_ISAR5, SHA1, false, MRS_LOWER, MRS_USERSPACE,
id_isar5_sha1, id_isar5_sha1_caps),
MRS_FIELD_HWCAP(ID_ISAR5, AES, false, MRS_LOWER, MRS_USERSPACE,
id_isar5_aes, id_isar5_aes_caps),
MRS_FIELD(ID_ISAR5, SEVL, false, MRS_LOWER, MRS_USERSPACE,
id_isar5_sevl),
MRS_FIELD_END,
};
/* MVFR0 */
static const struct mrs_field_value mvfr0_fpround[] = {
MRS_FIELD_VALUE_NONE_IMPL(MVFR0, FPRound, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value mvfr0_fpsqrt[] = {
MRS_FIELD_VALUE_NONE_IMPL(MVFR0, FPSqrt, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value mvfr0_fpdivide[] = {
MRS_FIELD_VALUE_NONE_IMPL(MVFR0, FPDivide, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value mvfr0_fptrap[] = {
MRS_FIELD_VALUE_NONE_IMPL(MVFR0, FPTrap, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value mvfr0_fpdp[] = {
MRS_FIELD_VALUE(MVFR0_FPDP_NONE, ""),
MRS_FIELD_VALUE(MVFR0_FPDP_VFP_v2, "DP VFPv2"),
MRS_FIELD_VALUE(MVFR0_FPDP_VFP_v3_v4, "DP VFPv3+v4"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap mvfr0_fpdp_caps[] = {
MRS_HWCAP(1, HWCAP32_VFP, MVFR0_FPDP_VFP_v2),
MRS_HWCAP(1, HWCAP32_VFPv3, MVFR0_FPDP_VFP_v3_v4),
MRS_HWCAP_END
};
static const struct mrs_field_value mvfr0_fpsp[] = {
MRS_FIELD_VALUE(MVFR0_FPSP_NONE, ""),
MRS_FIELD_VALUE(MVFR0_FPSP_VFP_v2, "SP VFPv2"),
MRS_FIELD_VALUE(MVFR0_FPSP_VFP_v3_v4, "SP VFPv3+v4"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value mvfr0_simdreg[] = {
MRS_FIELD_VALUE(MVFR0_SIMDReg_NONE, ""),
MRS_FIELD_VALUE(MVFR0_SIMDReg_FP, "FP 16x64"),
MRS_FIELD_VALUE(MVFR0_SIMDReg_AdvSIMD, "AdvSIMD"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field mvfr0_fields[] = {
MRS_FIELD(MVFR0, FPRound, false, MRS_LOWER, MRS_USERSPACE,
mvfr0_fpround),
MRS_FIELD(MVFR0, FPSqrt, false, MRS_LOWER, MRS_USERSPACE,
mvfr0_fpsqrt),
MRS_FIELD(MVFR0, FPDivide, false, MRS_LOWER, MRS_USERSPACE,
mvfr0_fpdivide),
MRS_FIELD(MVFR0, FPTrap, false, MRS_LOWER, MRS_USERSPACE,
mvfr0_fptrap),
MRS_FIELD_HWCAP(MVFR0, FPDP, false, MRS_LOWER, MRS_USERSPACE,
mvfr0_fpdp, mvfr0_fpdp_caps),
MRS_FIELD(MVFR0, FPSP, false, MRS_LOWER, MRS_USERSPACE, mvfr0_fpsp),
MRS_FIELD(MVFR0, SIMDReg, false, MRS_LOWER, MRS_USERSPACE,
mvfr0_simdreg),
MRS_FIELD_END,
};
/* MVFR1 */
static const struct mrs_field_value mvfr1_simdfmac[] = {
MRS_FIELD_VALUE_NONE_IMPL(MVFR1, SIMDFMAC, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap mvfr1_simdfmac_caps[] = {
MRS_HWCAP(1, HWCAP32_VFPv4, MVFR1_SIMDFMAC_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value mvfr1_fphp[] = {
MRS_FIELD_VALUE(MVFR1_FPHP_NONE, ""),
MRS_FIELD_VALUE(MVFR1_FPHP_CONV_SP, "FPHP SP Conv"),
MRS_FIELD_VALUE(MVFR1_FPHP_CONV_DP, "FPHP DP Conv"),
MRS_FIELD_VALUE(MVFR1_FPHP_ARITH, "FPHP Arith"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value mvfr1_simdhp[] = {
MRS_FIELD_VALUE(MVFR1_SIMDHP_NONE, ""),
MRS_FIELD_VALUE(MVFR1_SIMDHP_CONV_SP, "SIMDHP SP Conv"),
MRS_FIELD_VALUE(MVFR1_SIMDHP_ARITH, "SIMDHP Arith"),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value mvfr1_simdsp[] = {
MRS_FIELD_VALUE_NONE_IMPL(MVFR1, SIMDSP, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value mvfr1_simdint[] = {
MRS_FIELD_VALUE_NONE_IMPL(MVFR1, SIMDInt, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value mvfr1_simdls[] = {
MRS_FIELD_VALUE_NONE_IMPL(MVFR1, SIMDLS, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_hwcap mvfr1_simdls_caps[] = {
MRS_HWCAP(1, HWCAP32_VFPv4, MVFR1_SIMDFMAC_IMPL),
MRS_HWCAP_END
};
static const struct mrs_field_value mvfr1_fpdnan[] = {
MRS_FIELD_VALUE_NONE_IMPL(MVFR1, FPDNaN, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field_value mvfr1_fpftz[] = {
MRS_FIELD_VALUE_NONE_IMPL(MVFR1, FPFtZ, NONE, IMPL),
MRS_FIELD_VALUE_END,
};
static const struct mrs_field mvfr1_fields[] = {
MRS_FIELD_HWCAP(MVFR1, SIMDFMAC, false, MRS_LOWER, MRS_USERSPACE,
mvfr1_simdfmac, mvfr1_simdfmac_caps),
MRS_FIELD(MVFR1, FPHP, false, MRS_LOWER, MRS_USERSPACE, mvfr1_fphp),
MRS_FIELD(MVFR1, SIMDHP, false, MRS_LOWER, MRS_USERSPACE, mvfr1_simdhp),
MRS_FIELD(MVFR1, SIMDSP, false, MRS_LOWER, MRS_USERSPACE, mvfr1_simdsp),
MRS_FIELD(MVFR1, SIMDInt, false, MRS_LOWER, MRS_USERSPACE,
mvfr1_simdint),
MRS_FIELD_HWCAP(MVFR1, SIMDLS, false, MRS_LOWER, MRS_USERSPACE,
mvfr1_simdls, mvfr1_simdls_caps),
MRS_FIELD(MVFR1, FPDNaN, false, MRS_LOWER, MRS_USERSPACE,
mvfr1_fpdnan),
MRS_FIELD(MVFR1, FPFtZ, false, MRS_LOWER, MRS_USERSPACE,
mvfr1_fpftz),
MRS_FIELD_END,
};
#endif /* COMPAT_FREEBSD32 */
struct mrs_user_reg {
u_int iss;
bool is64bit;
size_t offset;
const struct mrs_field *fields;
};
#define USER_REG(name, field_name, _is64bit) \
{ \
.iss = name##_ISS, \
.offset = __offsetof(struct cpu_desc, field_name), \
.fields = field_name##_fields, \
.is64bit = _is64bit, \
}
static const struct mrs_user_reg user_regs[] = {
USER_REG(ID_AA64AFR0_EL1, id_aa64afr0, true),
USER_REG(ID_AA64AFR1_EL1, id_aa64afr1, true),
USER_REG(ID_AA64DFR0_EL1, id_aa64dfr0, true),
USER_REG(ID_AA64DFR1_EL1, id_aa64dfr1, true),
USER_REG(ID_AA64ISAR0_EL1, id_aa64isar0, true),
USER_REG(ID_AA64ISAR1_EL1, id_aa64isar1, true),
USER_REG(ID_AA64ISAR2_EL1, id_aa64isar2, true),
USER_REG(ID_AA64MMFR0_EL1, id_aa64mmfr0, true),
USER_REG(ID_AA64MMFR1_EL1, id_aa64mmfr1, true),
USER_REG(ID_AA64MMFR2_EL1, id_aa64mmfr2, true),
USER_REG(ID_AA64MMFR3_EL1, id_aa64mmfr3, true),
USER_REG(ID_AA64MMFR4_EL1, id_aa64mmfr4, true),
USER_REG(ID_AA64PFR0_EL1, id_aa64pfr0, true),
USER_REG(ID_AA64PFR1_EL1, id_aa64pfr1, true),
USER_REG(ID_AA64PFR2_EL1, id_aa64pfr2, true),
USER_REG(ID_AA64ZFR0_EL1, id_aa64zfr0, true),
USER_REG(CTR_EL0, ctr, true),
#ifdef COMPAT_FREEBSD32
USER_REG(ID_ISAR5_EL1, id_isar5, false),
USER_REG(MVFR0_EL1, mvfr0, false),
USER_REG(MVFR1_EL1, mvfr1, false),
#endif /* COMPAT_FREEBSD32 */
};
#define CPU_DESC_FIELD(desc, idx) \
*(uint64_t *)((char *)&(desc) + user_regs[(idx)].offset)
static bool
user_ctr_has_neoverse_n1_1542419(uint32_t midr, uint64_t ctr)
{
/* Skip non-Neoverse-N1 */
if (!CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_ARM,
CPU_PART_NEOVERSE_N1, 0, 0))
return (false);
switch (CPU_VAR(midr)) {
default:
break;
case 4:
/* Fixed in r4p1 */
if (CPU_REV(midr) > 0)
break;
/* FALLTHROUGH */
case 3:
/* If DIC is enabled (coherent icache) then we are affected */
return (CTR_DIC_VAL(ctr) != 0);
}
return (false);
}
-static bool
-user_ctr_check(const struct cpu_feat *feat __unused, u_int midr __unused)
+static cpu_feat_en
+user_ctr_check(const struct cpu_feat *feat __unused, u_int midr)
{
if (emulate_ctr)
- return (true);
+ return (FEAT_DEFAULT_ENABLE);
if (user_ctr_has_neoverse_n1_1542419(midr, READ_SPECIALREG(ctr_el0)))
- return (true);
+ return (FEAT_DEFAULT_ENABLE);
- return (false);
+ return (FEAT_ALWAYS_DISABLE);
}
static bool
user_ctr_has_errata(const struct cpu_feat *feat __unused, u_int midr,
u_int **errata_list, u_int *errata_count)
{
if (user_ctr_has_neoverse_n1_1542419(midr, READ_SPECIALREG(ctr_el0))) {
static u_int errata_id = 1542419;
*errata_list = &errata_id;
*errata_count = 1;
return (true);
}
return (false);
}
static bool
user_ctr_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status, u_int *errata_list, u_int errata_count)
{
MPASS(emulate_ctr || errata_status != ERRATA_NONE);
/*
* The Errata Management Firmware Interface may incorrectly mark
* this as firmware mitigated. We should ignore that as there is
* a kernel component to the mitigation.
*/
if (errata_status != ERRATA_NONE && PCPU_GET(cpuid) == 0 &&
cpu_feat_has_erratum(errata_list, errata_count, 1542419)) {
/* Clear fields we will change */
user_cpu_desc.ctr &= ~(CTR_DIC_MASK | CTR_ILINE_WIDTH);
/*
* Set DIC to none so userspace will execute an 'ic ivau'
* instruction that can be trapped by EL3.
*/
user_cpu_desc.ctr |= CTR_DIC_NONE;
/*
* Set the i-cache line size to be page size to reduce the
* number of times userspace needs to execute the 'ic ivau'
* instruction. The ctr_el0.IminLine is log2 the number of
* 4-byte words the instruction covers. As PAGE_SHIFT is log2
* of the number of bytes in a page we need to subtract 2.
*/
user_cpu_desc.ctr |= (PAGE_SHIFT - 2) << CTR_ILINE_SHIFT;
l_user_cpu_desc.ctr = user_cpu_desc.ctr;
}
WRITE_SPECIALREG(sctlr_el1,
READ_SPECIALREG(sctlr_el1) & ~SCTLR_UCT);
isb();
return (true);
}
CPU_FEAT(trap_ctr, "Trap CTR_EL0",
user_ctr_check, user_ctr_has_errata, user_ctr_enable,
CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
static bool
user_ctr_handler(uint64_t esr, struct trapframe *frame)
{
uint64_t value;
int reg;
if (ESR_ELx_EXCEPTION(esr) != EXCP_MSR)
return (false);
/* Only support reading from ctr_el0 */
if ((esr & ISS_MSR_DIR) == 0)
return (false);
/* Check if this is the ctr_el0 register */
if ((esr & ISS_MSR_REG_MASK) != CTR_EL0_ISS)
return (false);
if (SV_CURPROC_ABI() == SV_ABI_FREEBSD)
value = user_cpu_desc.ctr;
else
value = l_user_cpu_desc.ctr;
/*
* We will handle this instruction, move to the next so we
* don't trap here again.
*/
frame->tf_elr += INSN_SIZE;
reg = ISS_MSR_Rt(esr);
/* If reg is 31 then write to xzr, i.e. do nothing */
if (reg == 31)
return (true);
if (reg < nitems(frame->tf_x))
frame->tf_x[reg] = value;
else if (reg == 30)
frame->tf_lr = value;
return (true);
}
static bool
user_idreg_handler(uint64_t esr, struct trapframe *frame)
{
uint64_t value;
int reg;
if (ESR_ELx_EXCEPTION(esr) != EXCP_MSR)
return (false);
/* Only support reading from ID registers */
if ((esr & ISS_MSR_DIR) == 0)
return (false);
/*
* This only handles the ID register space and a few registers that
* are safe to pass through to userspace.
*
* These registers are all in the space op0 == 3, op1 == 0,
* CRn == 0. We support the following CRm:
* - CRm == 0: midr_el1, mpidr_el1, and revidr_el1.
* - CRm in {4-7}: sanitized ID registers.
*
* Registers in the ID register space (CRm in {4-7}) are all
* read-only and have either defined fields, or are read as
* zero (RAZ). For these we return 0 for any unknown register.
*/
if (ISS_MSR_OP0(esr) != 3 || ISS_MSR_OP1(esr) != 0 ||
ISS_MSR_CRn(esr) != 0)
return (false);
value = 0;
if (ISS_MSR_CRm(esr) >= 4 && ISS_MSR_CRm(esr) <= 7) {
for (int i = 0; i < nitems(user_regs); i++) {
if (user_regs[i].iss == (esr & ISS_MSR_REG_MASK)) {
if (SV_CURPROC_ABI() == SV_ABI_FREEBSD)
value = CPU_DESC_FIELD(user_cpu_desc, i);
else
value = CPU_DESC_FIELD(l_user_cpu_desc, i);
break;
}
}
} else if (ISS_MSR_CRm(esr) == 0) {
switch (ISS_MSR_OP2(esr)) {
case 0:
value = READ_SPECIALREG(midr_el1);
break;
case 5:
value = READ_SPECIALREG(mpidr_el1);
break;
case 6:
value = READ_SPECIALREG(revidr_el1);
break;
default:
return (false);
}
} else {
return (false);
}
/*
* We will handle this instruction, move to the next so we
* don't trap here again.
*/
frame->tf_elr += INSN_SIZE;
reg = ISS_MSR_Rt(esr);
/* If reg is 31 then write to xzr, i.e. do nothing */
if (reg == 31)
return (1);
if (reg < nitems(frame->tf_x))
frame->tf_x[reg] = value;
else if (reg == 30)
frame->tf_lr = value;
return (true);
}
/*
* Compares two field values that may be signed or unsigned.
* Returns:
* < 0 when a is less than b
* = 0 when a equals b
* > 0 when a is greater than b
*/
static int
mrs_field_cmp(uint64_t a, uint64_t b, u_int shift, int width, bool sign)
{
uint64_t mask;
KASSERT(width > 0 && width < 64, ("%s: Invalid width %d", __func__,
width));
mask = (1ul << width) - 1;
/* Move the field to the lower bits */
a = (a >> shift) & mask;
b = (b >> shift) & mask;
if (sign) {
/*
* The field is signed. Toggle the upper bit so the comparison
* works on unsigned values as this makes positive numbers,
* i.e. those with a 0 bit, larger than negative numbers,
* i.e. those with a 1 bit, in an unsigned comparison.
*/
a ^= 1ul << (width - 1);
b ^= 1ul << (width - 1);
}
return (a - b);
}
bool
get_kernel_reg_iss(u_int iss, uint64_t *val)
{
int i;
for (i = 0; i < nitems(user_regs); i++) {
if (user_regs[i].iss == iss) {
*val = CPU_DESC_FIELD(kern_cpu_desc, i);
return (true);
}
}
return (false);
}
/*
* Fetch the specified register's value, ensuring that individual field values
* do not exceed those in the mask.
*/
bool
get_kernel_reg_iss_masked(u_int iss, uint64_t *valp, uint64_t mask)
{
const struct mrs_field *fields;
uint64_t val;
for (int i = 0; i < nitems(user_regs); i++) {
if (user_regs[i].iss == iss) {
val = CPU_DESC_FIELD(kern_cpu_desc, i);
fields = user_regs[i].fields;
for (int j = 0; fields[j].type != 0; j++) {
mask = update_special_reg_field(mask,
fields[j].type, val, fields[j].width,
fields[j].shift, fields[j].sign);
}
*valp = mask;
return (true);
}
}
return (false);
}
bool
get_user_reg_iss(u_int iss, uint64_t *val, bool fbsd)
{
int i;
for (i = 0; i < nitems(user_regs); i++) {
if (user_regs[i].iss == iss) {
if (fbsd)
*val = CPU_DESC_FIELD(user_cpu_desc, i);
else
*val = CPU_DESC_FIELD(l_user_cpu_desc, i);
return (true);
}
}
return (false);
}
static uint64_t
update_special_reg_field(uint64_t user_reg, u_int type, uint64_t value,
u_int width, u_int shift, bool sign)
{
uint64_t cur, mask, new_val;
mask = ((1ul << width) - 1) << shift;
cur = user_reg & mask;
new_val = value & mask;
switch (type & MRS_TYPE_MASK) {
case MRS_EXACT_IF_DIFFERENT:
if (mrs_field_cmp(new_val, cur, shift, width, sign) == 0)
break;
/* FALLTHROUGH */
case MRS_EXACT:
cur = (uint64_t)MRS_SAFE_VAL(type) << shift;
break;
case MRS_LOWER:
if (mrs_field_cmp(new_val, cur, shift, width, sign) < 0)
cur = new_val;
break;
case MRS_HIGHER_OR_ZERO:
if (cur == 0 || new_val == 0) {
cur = 0;
break;
}
/* FALLTHROUGH */
case MRS_HIGHER:
if (mrs_field_cmp(new_val, cur, shift, width, sign) > 0)
cur = new_val;
break;
default:
panic("Invalid field type: %d", type);
}
user_reg &= ~mask;
user_reg |= cur;
return (user_reg);
}
static void
clear_set_special_reg_idx(int idx, uint64_t clear, uint64_t set)
{
const struct mrs_field *fields;
uint64_t k_old, k_new;
uint64_t f_old, f_new;
uint64_t l_old, l_new;
MPASS(idx < nitems(user_regs));
k_old = CPU_DESC_FIELD(kern_cpu_desc, idx);
k_new = (k_old & ~clear) | set;
f_old = CPU_DESC_FIELD(user_cpu_desc, idx);
f_new = (f_old & ~clear) | set;
l_old = CPU_DESC_FIELD(l_user_cpu_desc, idx);
l_new = (l_old & ~clear) | set;
fields = user_regs[idx].fields;
for (int j = 0; fields[j].type != 0; j++) {
u_int type;
/* Update the FreeBSD userspace ID register view */
type = ((fields[j].type & MRS_FREEBSD) != 0) ?
fields[j].type :
(MRS_EXACT | (fields[j].type & MRS_SAFE_MASK));
f_new = update_special_reg_field(f_new,
type, f_old, fields[j].width, fields[j].shift,
fields[j].sign);
/* Update the Linux userspace ID register view */
type = ((fields[j].type & MRS_LINUX) != 0) ?
fields[j].type :
(MRS_EXACT | (fields[j].type & MRS_SAFE_MASK));
l_new = update_special_reg_field(l_new,
type, l_old, fields[j].width, fields[j].shift,
fields[j].sign);
/* Update the kernel ID register view */
k_new = update_special_reg_field(k_new,
fields[j].type, k_old, fields[j].width,
fields[j].shift, fields[j].sign);
}
CPU_DESC_FIELD(kern_cpu_desc, idx) = k_new;
CPU_DESC_FIELD(user_cpu_desc, idx) = f_new;
CPU_DESC_FIELD(l_user_cpu_desc, idx) = l_new;
}
void
update_special_regs(u_int cpu)
{
struct cpu_desc *desc;
uint64_t value;
int i;
if (cpu == 0) {
/* Create a user visible cpu description with safe values */
memset(&user_cpu_desc, 0, sizeof(user_cpu_desc));
/* Safe values for these registers */
user_cpu_desc.id_aa64pfr0 = ID_AA64PFR0_AdvSIMD_NONE |
ID_AA64PFR0_FP_NONE | ID_AA64PFR0_EL1_64 |
ID_AA64PFR0_EL0_64;
user_cpu_desc.id_aa64dfr0 = ID_AA64DFR0_DebugVer_8;
/* Create the Linux user visible cpu description */
memcpy(&l_user_cpu_desc, &user_cpu_desc, sizeof(user_cpu_desc));
}
desc = get_cpu_desc(cpu);
for (i = 0; i < nitems(user_regs); i++) {
value = CPU_DESC_FIELD(*desc, i);
if (cpu == 0) {
CPU_DESC_FIELD(kern_cpu_desc, i) = value;
CPU_DESC_FIELD(user_cpu_desc, i) = value;
CPU_DESC_FIELD(l_user_cpu_desc, i) = value;
}
clear_set_special_reg_idx(i, UINT64_MAX, value);
}
}
/*
* Updates a special register in all views. This creates a copy of the
* register then clears it and sets new bits. It will then compare this
* with the old version as if it was the ID register for a new CPU.
*
* It is intended to let code that disables features, e.g. due to errata,
* to clear the user visible field.
*
* This needs to be called before the HWCAPs are set. If called from a CPU
* feature handler this safe to call from CPU_FEAT_EARLY_BOOT. It also needs
* to be before link_elf_late_ireloc is called. As this is called after the
* HWCAPs are set the check for these is enough.
*/
void
update_special_reg_iss(u_int iss, uint64_t clear, uint64_t set)
{
MPASS(hwcaps_set == false);
/* There is no locking here, so we only support changing this on CPU0 */
/* TODO: Add said locking */
MPASS(PCPU_GET(cpuid) == 0);
for (int i = 0; i < nitems(user_regs); i++) {
if (user_regs[i].iss != iss)
continue;
clear_set_special_reg_idx(i, clear, set);
return;
}
}
void
cpu_desc_init(void)
{
if (mp_ncpus == 1)
return;
/*
* Allocate memory for the non-boot CPUs to store their registers.
* As this is indexed by CPU ID we need to allocate space for CPUs
* 1 to mp_maxid. Because of this mp_maxid is already the correct
* number of elements.
*/
cpu_desc = mallocarray(mp_maxid, sizeof(*cpu_desc), M_IDENTCPU,
M_ZERO | M_WAITOK);
}
/* HWCAP */
bool __read_frequently lse_supported = false;
bool __read_frequently icache_aliasing = false;
bool __read_frequently icache_vmid = false;
int64_t dcache_line_size; /* The minimum D cache line size */
int64_t icache_line_size; /* The minimum I cache line size */
int64_t idcache_line_size; /* The minimum cache line size */
/*
* Find the values to export to userspace as AT_HWCAP and AT_HWCAP2.
*/
static void
parse_cpu_features(bool is64bit, struct cpu_desc *cpu_desc, u_long *hwcap,
u_long *hwcap2)
{
const struct mrs_field_hwcap *hwcaps;
const struct mrs_field *fields;
uint64_t min, reg;
u_long *cur_hwcap;
int i, j, k;
for (i = 0; i < nitems(user_regs); i++) {
if (user_regs[i].is64bit != is64bit)
continue;
reg = CPU_DESC_FIELD(*cpu_desc, i);
fields = user_regs[i].fields;
for (j = 0; fields[j].type != 0; j++) {
hwcaps = fields[j].hwcaps;
if (hwcaps == NULL)
continue;
for (k = 0; hwcaps[k].hwcap_id != 0; k++) {
KASSERT(hwcaps[k].hwcap_id == 1 ||
hwcaps[k].hwcap_id == 2,
("%s: Invalid HWCAP ID %d", __func__,
hwcaps[k].hwcap_id));
cur_hwcap = hwcaps[k].hwcap_id == 1 ?
hwcap : hwcap2;
min = hwcaps[k].min;
/*
* If the field is greater than the minimum
* value we can set the hwcap;
*/
if (mrs_field_cmp(reg, min, fields[j].shift,
4, fields[j].sign) >= 0) {
*cur_hwcap |= hwcaps[k].hwcap_val;
}
}
}
}
}
static void
identify_cpu_sysinit(void *dummy __unused)
{
struct cpu_desc *desc, *prev_desc;
int cpu;
bool dic, idc;
dic = (allow_dic != 0);
idc = (allow_idc != 0);
prev_desc = NULL;
CPU_FOREACH(cpu) {
desc = get_cpu_desc(cpu);
if (cpu != 0) {
check_cpu_regs(cpu, desc, prev_desc);
update_special_regs(cpu);
}
if (CTR_DIC_VAL(desc->ctr) == 0)
dic = false;
if (CTR_IDC_VAL(desc->ctr) == 0)
idc = false;
prev_desc = desc;
}
#ifdef INVARIANTS
/* Check we dont update the special registers after this point */
hwcaps_set = true;
#endif
/* Find the values to export to userspace as AT_HWCAP and AT_HWCAP2 */
parse_cpu_features(true, &user_cpu_desc, &elf_hwcap, &elf_hwcap2);
parse_cpu_features(true, &l_user_cpu_desc, &linux_elf_hwcap,
&linux_elf_hwcap2);
#ifdef COMPAT_FREEBSD32
parse_cpu_features(false, &user_cpu_desc, &elf32_hwcap, &elf32_hwcap2);
#endif
/* We export the CPUID registers */
elf_hwcap |= HWCAP_CPUID;
linux_elf_hwcap |= HWCAP_CPUID;
#ifdef COMPAT_FREEBSD32
/* Set the default caps and any that need to check multiple fields */
elf32_hwcap |= parse_cpu_features_hwcap32();
#endif
if (dic && idc) {
arm64_icache_sync_range = &arm64_dic_idc_icache_sync_range;
if (bootverbose)
printf("Enabling DIC & IDC ICache sync\n");
} else if (idc) {
arm64_icache_sync_range = &arm64_idc_aliasing_icache_sync_range;
if (bootverbose)
printf("Enabling IDC ICache sync\n");
}
if ((elf_hwcap & HWCAP_ATOMICS) != 0) {
lse_supported = true;
if (bootverbose)
printf("Enabling LSE atomics in the kernel\n");
}
#ifdef LSE_ATOMICS
if (!lse_supported)
panic("CPU does not support LSE atomic instructions");
#endif
install_sys_handler(user_ctr_handler);
install_sys_handler(user_idreg_handler);
}
/*
* This needs to be after the APs have stareted as they may have errata that
* means we need to mask out ID registers & that could affect hwcaps, etc.
*/
SYSINIT(identify_cpu, SI_SUB_CONFIGURE, SI_ORDER_ANY, identify_cpu_sysinit,
NULL);
static void
cpu_features_sysinit(void *dummy __unused)
{
struct sbuf sb;
struct cpu_desc *desc, *prev_desc;
u_int cpu;
prev_desc = NULL;
CPU_FOREACH(cpu) {
desc = get_cpu_desc(cpu);
print_cpu_features(cpu, desc, prev_desc);
prev_desc = desc;
}
/* Fill in cpu_model for the hw.model sysctl */
sbuf_new(&sb, cpu_model, sizeof(cpu_model), SBUF_FIXEDLEN);
print_cpu_midr(&sb, 0);
sbuf_finish(&sb);
sbuf_delete(&sb);
free(cpu_desc, M_IDENTCPU);
}
/* Log features before APs are released and start printing to the dmesg. */
SYSINIT(cpu_features, SI_SUB_SMP - 1, SI_ORDER_ANY, cpu_features_sysinit, NULL);
static void
tcr_set_e0pd1(void *arg __unused)
{
uint64_t tcr;
tcr = READ_SPECIALREG(tcr_el1);
tcr |= TCR_E0PD1;
WRITE_SPECIALREG(tcr_el1, tcr);
isb();
}
/* Enable support for more recent architecture features */
static void
cpu_feat_support(void *arg __unused)
{
/*
* If FEAT_E0PD is supported use it to cause faults without a page
* table walk if userspace tries to access kernel memory.
*/
if (ID_AA64MMFR2_E0PD_VAL(kern_cpu_desc.id_aa64mmfr2) !=
ID_AA64MMFR2_E0PD_NONE)
smp_rendezvous(NULL, tcr_set_e0pd1, NULL, NULL);
}
SYSINIT(cpu_feat_support, SI_SUB_SMP, SI_ORDER_ANY, cpu_feat_support, NULL);
#ifdef COMPAT_FREEBSD32
static u_long
parse_cpu_features_hwcap32(void)
{
u_long hwcap = HWCAP32_DEFAULT;
if ((MVFR1_SIMDLS_VAL(user_cpu_desc.mvfr1) >=
MVFR1_SIMDLS_IMPL) &&
(MVFR1_SIMDInt_VAL(user_cpu_desc.mvfr1) >=
MVFR1_SIMDInt_IMPL) &&
(MVFR1_SIMDSP_VAL(user_cpu_desc.mvfr1) >=
MVFR1_SIMDSP_IMPL))
hwcap |= HWCAP32_NEON;
return (hwcap);
}
#endif /* COMPAT_FREEBSD32 */
static void
print_register(struct sbuf *sb, const char *reg_name, uint64_t reg,
void (*print_fields)(struct sbuf *, uint64_t, const void *),
const void *arg)
{
sbuf_printf(sb, "%29s = <", reg_name);
print_fields(sb, reg, arg);
sbuf_finish(sb);
printf("%s>\n", sbuf_data(sb));
sbuf_clear(sb);
}
static void
print_id_fields(struct sbuf *sb, uint64_t reg, const void *arg)
{
const struct mrs_field *fields = arg;
const struct mrs_field_value *fv;
int field, i, j, printed;
#define SEP_STR ((printed++) == 0) ? "" : ","
printed = 0;
for (i = 0; fields[i].type != 0; i++) {
fv = fields[i].values;
if (fv == NULL)
goto next;
field = (reg & fields[i].mask) >> fields[i].shift;
for (j = 0; fv[j].desc != NULL; j++) {
if ((fv[j].value >> fields[i].shift) != field)
continue;
if (fv[j].desc[0] != '\0')
sbuf_printf(sb, "%s%s", SEP_STR, fv[j].desc);
break;
}
if (fv[j].desc == NULL)
sbuf_printf(sb, "%sUnknown %s(%x)", SEP_STR,
fields[i].name, field);
next:
reg &= ~(((1ul << fields[i].width) - 1) << fields[i].shift);
}
if (reg != 0)
sbuf_printf(sb, "%s%#lx", SEP_STR, reg);
#undef SEP_STR
}
static void
print_id_register(struct sbuf *sb, const char *reg_name, uint64_t reg,
const struct mrs_field *fields)
{
print_register(sb, reg_name, reg, print_id_fields, fields);
}
static void
print_cpu_midr(struct sbuf *sb, u_int cpu)
{
const struct cpu_parts *cpu_partsp;
const char *cpu_impl_name;
const char *cpu_part_name;
u_int midr;
u_int impl_id;
u_int part_id;
midr = pcpu_find(cpu)->pc_midr;
cpu_impl_name = NULL;
cpu_partsp = NULL;
impl_id = CPU_IMPL(midr);
for (int i = 0; cpu_implementers[i].impl_name != NULL; i++) {
if (impl_id == cpu_implementers[i].impl_id) {
cpu_impl_name = cpu_implementers[i].impl_name;
cpu_partsp = cpu_implementers[i].cpu_parts;
break;
}
}
/* Unknown implementer, so unknown part */
if (cpu_impl_name == NULL) {
sbuf_printf(sb, "Unknown Implementer (midr: %08x)", midr);
return;
}
KASSERT(cpu_partsp != NULL, ("%s: No parts table for implementer %s",
__func__, cpu_impl_name));
cpu_part_name = NULL;
part_id = CPU_PART(midr);
for (int i = 0; cpu_partsp[i].part_name != NULL; i++) {
if (part_id == cpu_partsp[i].part_id) {
cpu_part_name = cpu_partsp[i].part_name;
break;
}
}
/* Known Implementer, Unknown part */
if (cpu_part_name == NULL) {
sbuf_printf(sb, "%s Unknown CPU r%dp%d (midr: %08x)",
cpu_impl_name, CPU_VAR(midr), CPU_REV(midr), midr);
return;
}
sbuf_printf(sb, "%s %s r%dp%d", cpu_impl_name,
cpu_part_name, CPU_VAR(midr), CPU_REV(midr));
}
static void
print_cpu_cache(struct cpu_desc *desc, struct sbuf *sb, uint64_t ccs,
bool icache, bool unified)
{
size_t cache_size;
size_t line_size;
/* LineSize is Log2(S) - 4. */
line_size = 1 << ((ccs & CCSIDR_LineSize_MASK) + 4);
/*
* Calculate cache size (sets * ways * line size). There are different
* formats depending on the FEAT_CCIDX bit in ID_AA64MMFR2 feature
* register.
*/
if ((desc->id_aa64mmfr2 & ID_AA64MMFR2_CCIDX_64))
cache_size = (CCSIDR_NSETS_64(ccs) + 1) *
(CCSIDR_ASSOC_64(ccs) + 1);
else
cache_size = (CCSIDR_NSETS(ccs) + 1) * (CCSIDR_ASSOC(ccs) + 1);
cache_size *= line_size;
sbuf_printf(sb, "%zuKB (%s)", cache_size / 1024,
icache ? "instruction" : unified ? "unified" : "data");
}
static void
print_cpu_caches(struct sbuf *sb, struct cpu_desc *desc)
{
/* Print out each cache combination */
uint64_t clidr;
int i = 1;
clidr = desc->clidr;
for (i = 0; (clidr & CLIDR_CTYPE_MASK) != 0; i++, clidr >>= 3) {
int j = 0;
int ctype_m = (clidr & CLIDR_CTYPE_MASK);
sbuf_printf(sb, " L%d cache: ", i + 1);
if ((clidr & CLIDR_CTYPE_IO)) {
print_cpu_cache(desc, sb, desc->ccsidr[i][j++], true,
false);
/* If there's more, add to the line. */
if ((ctype_m & ~CLIDR_CTYPE_IO) != 0)
sbuf_printf(sb, ", ");
}
if ((ctype_m & ~CLIDR_CTYPE_IO) != 0) {
print_cpu_cache(desc, sb, desc->ccsidr[i][j], false,
(clidr & CLIDR_CTYPE_UNIFIED));
}
sbuf_printf(sb, "\n");
}
sbuf_finish(sb);
printf("%s", sbuf_data(sb));
}
static void
print_cpu_features(u_int cpu, struct cpu_desc *desc,
struct cpu_desc *prev_desc)
{
struct sbuf *sb;
sb = sbuf_new_auto();
sbuf_printf(sb, "CPU%3u: ", cpu);
print_cpu_midr(sb, cpu);
sbuf_cat(sb, " affinity:");
switch(cpu_aff_levels) {
default:
case 4:
sbuf_printf(sb, " %2d", CPU_AFF3(desc->mpidr));
/* FALLTHROUGH */
case 3:
sbuf_printf(sb, " %2d", CPU_AFF2(desc->mpidr));
/* FALLTHROUGH */
case 2:
sbuf_printf(sb, " %2d", CPU_AFF1(desc->mpidr));
/* FALLTHROUGH */
case 1:
case 0: /* On UP this will be zero */
sbuf_printf(sb, " %2d", CPU_AFF0(desc->mpidr));
break;
}
sbuf_finish(sb);
printf("%s\n", sbuf_data(sb));
sbuf_clear(sb);
/*
* There is a hardware errata where, if one CPU is performing a TLB
* invalidation while another is performing a store-exclusive the
* store-exclusive may return the wrong status. A workaround seems
* to be to use an IPI to invalidate on each CPU, however given the
* limited number of affected units (pass 1.1 is the evaluation
* hardware revision), and the lack of information from Cavium
* this has not been implemented.
*
* At the time of writing this the only information is from:
* https://lkml.org/lkml/2016/8/4/722
*/
/*
* XXX: CPU_MATCH_ERRATA_CAVIUM_THUNDERX_1_1 on its own also
* triggers on pass 2.0+.
*/
if (cpu == 0 && CPU_VAR(PCPU_GET(midr)) == 0 &&
CPU_MATCH_ERRATA_CAVIUM_THUNDERX_1_1)
printf("WARNING: ThunderX Pass 1.1 detected.\nThis has known "
"hardware bugs that may cause the incorrect operation of "
"atomic operations.\n");
#define SHOULD_PRINT_REG(_reg) \
(prev_desc == NULL || desc->_reg != prev_desc->_reg)
/* Cache Type Register */
if (SHOULD_PRINT_REG(ctr))
print_id_register(sb, "Cache Type", desc->ctr, ctr_fields);
/* AArch64 Instruction Set Attribute Register 0 */
if (SHOULD_PRINT_REG(id_aa64isar0))
print_id_register(sb, "Instruction Set Attributes 0",
desc->id_aa64isar0, id_aa64isar0_fields);
/* AArch64 Instruction Set Attribute Register 1 */
if (SHOULD_PRINT_REG(id_aa64isar1))
print_id_register(sb, "Instruction Set Attributes 1",
desc->id_aa64isar1, id_aa64isar1_fields);
/* AArch64 Instruction Set Attribute Register 2 */
if (SHOULD_PRINT_REG(id_aa64isar2))
print_id_register(sb, "Instruction Set Attributes 2",
desc->id_aa64isar2, id_aa64isar2_fields);
/* AArch64 Processor Feature Register 0 */
if (SHOULD_PRINT_REG(id_aa64pfr0))
print_id_register(sb, "Processor Features 0",
desc->id_aa64pfr0, id_aa64pfr0_fields);
/* AArch64 Processor Feature Register 1 */
if (SHOULD_PRINT_REG(id_aa64pfr1))
print_id_register(sb, "Processor Features 1",
desc->id_aa64pfr1, id_aa64pfr1_fields);
/* AArch64 Processor Feature Register 2 */
if (SHOULD_PRINT_REG(id_aa64pfr2))
print_id_register(sb, "Processor Features 2",
desc->id_aa64pfr2, id_aa64pfr2_fields);
/* AArch64 Memory Model Feature Register 0 */
if (SHOULD_PRINT_REG(id_aa64mmfr0))
print_id_register(sb, "Memory Model Features 0",
desc->id_aa64mmfr0, id_aa64mmfr0_fields);
/* AArch64 Memory Model Feature Register 1 */
if (SHOULD_PRINT_REG(id_aa64mmfr1))
print_id_register(sb, "Memory Model Features 1",
desc->id_aa64mmfr1, id_aa64mmfr1_fields);
/* AArch64 Memory Model Feature Register 2 */
if (SHOULD_PRINT_REG(id_aa64mmfr2))
print_id_register(sb, "Memory Model Features 2",
desc->id_aa64mmfr2, id_aa64mmfr2_fields);
/* AArch64 Memory Model Feature Register 3 */
if (SHOULD_PRINT_REG(id_aa64mmfr3))
print_id_register(sb, "Memory Model Features 3",
desc->id_aa64mmfr3, id_aa64mmfr3_fields);
/* AArch64 Memory Model Feature Register 4 */
if (SHOULD_PRINT_REG(id_aa64mmfr4))
print_id_register(sb, "Memory Model Features 4",
desc->id_aa64mmfr4, id_aa64mmfr4_fields);
/* AArch64 Debug Feature Register 0 */
if (SHOULD_PRINT_REG(id_aa64dfr0))
print_id_register(sb, "Debug Features 0",
desc->id_aa64dfr0, id_aa64dfr0_fields);
/* AArch64 Memory Model Feature Register 1 */
if (SHOULD_PRINT_REG(id_aa64dfr1))
print_id_register(sb, "Debug Features 1",
desc->id_aa64dfr1, id_aa64dfr1_fields);
/* AArch64 Auxiliary Feature Register 0 */
if (SHOULD_PRINT_REG(id_aa64afr0))
print_id_register(sb, "Auxiliary Features 0",
desc->id_aa64afr0, id_aa64afr0_fields);
/* AArch64 Auxiliary Feature Register 1 */
if (SHOULD_PRINT_REG(id_aa64afr1))
print_id_register(sb, "Auxiliary Features 1",
desc->id_aa64afr1, id_aa64afr1_fields);
/* AArch64 SVE Feature Register 0 */
if (desc->have_sve) {
if (SHOULD_PRINT_REG(id_aa64zfr0) ||
!prev_desc->have_sve) {
print_id_register(sb, "SVE Features 0",
desc->id_aa64zfr0, id_aa64zfr0_fields);
}
}
#ifdef COMPAT_FREEBSD32
/* AArch32 Instruction Set Attribute Register 5 */
if (SHOULD_PRINT_REG(id_isar5))
print_id_register(sb, "AArch32 Instruction Set Attributes 5",
desc->id_isar5, id_isar5_fields);
/* AArch32 Media and VFP Feature Register 0 */
if (SHOULD_PRINT_REG(mvfr0))
print_id_register(sb, "AArch32 Media and VFP Features 0",
desc->mvfr0, mvfr0_fields);
/* AArch32 Media and VFP Feature Register 1 */
if (SHOULD_PRINT_REG(mvfr1))
print_id_register(sb, "AArch32 Media and VFP Features 1",
desc->mvfr1, mvfr1_fields);
#endif
if (bootverbose)
print_cpu_caches(sb, desc);
sbuf_delete(sb);
sb = NULL;
#undef SHOULD_PRINT_REG
#undef SEP_STR
}
void
identify_cache(uint64_t ctr)
{
/* Identify the L1 cache type */
switch (CTR_L1IP_VAL(ctr)) {
case CTR_L1IP_PIPT:
break;
default:
case CTR_L1IP_VIPT:
icache_aliasing = true;
break;
}
if (dcache_line_size == 0) {
KASSERT(icache_line_size == 0, ("%s: i-cacheline size set: %ld",
__func__, icache_line_size));
/* Get the D cache line size */
dcache_line_size = CTR_DLINE_SIZE(ctr);
/* And the same for the I cache */
icache_line_size = CTR_ILINE_SIZE(ctr);
idcache_line_size = MIN(dcache_line_size, icache_line_size);
}
if (dcache_line_size != CTR_DLINE_SIZE(ctr)) {
printf("WARNING: D-cacheline size mismatch %ld != %d\n",
dcache_line_size, CTR_DLINE_SIZE(ctr));
}
if (icache_line_size != CTR_ILINE_SIZE(ctr)) {
printf("WARNING: I-cacheline size mismatch %ld != %d\n",
icache_line_size, CTR_ILINE_SIZE(ctr));
}
}
void
identify_cpu(u_int cpu)
{
struct cpu_desc *desc;
uint64_t clidr;
desc = get_cpu_desc(cpu);
/* Save affinity for current CPU */
desc->mpidr = get_mpidr();
CPU_AFFINITY(cpu) = desc->mpidr & CPU_AFF_MASK;
desc->ctr = READ_SPECIALREG(ctr_el0);
desc->id_aa64dfr0 = READ_SPECIALREG(ID_AA64DFR0_EL1_REG);
desc->id_aa64dfr1 = READ_SPECIALREG(ID_AA64DFR1_EL1_REG);
desc->id_aa64isar0 = READ_SPECIALREG(ID_AA64ISAR0_EL1_REG);
desc->id_aa64isar1 = READ_SPECIALREG(ID_AA64ISAR1_EL1_REG);
desc->id_aa64isar2 = READ_SPECIALREG(ID_AA64ISAR2_EL1_REG);
desc->id_aa64mmfr0 = READ_SPECIALREG(ID_AA64MMFR0_EL1_REG);
desc->id_aa64mmfr1 = READ_SPECIALREG(ID_AA64MMFR1_EL1_REG);
desc->id_aa64mmfr2 = READ_SPECIALREG(ID_AA64MMFR2_EL1_REG);
desc->id_aa64mmfr3 = READ_SPECIALREG(ID_AA64MMFR3_EL1_REG);
desc->id_aa64mmfr4 = READ_SPECIALREG(ID_AA64MMFR4_EL1_REG);
desc->id_aa64pfr0 = READ_SPECIALREG(ID_AA64PFR0_EL1_REG);
desc->id_aa64pfr1 = READ_SPECIALREG(ID_AA64PFR1_EL1_REG);
desc->id_aa64pfr2 = READ_SPECIALREG(ID_AA64PFR2_EL1_REG);
/*
* ID_AA64ZFR0_EL1 is only valid when at least one of:
* - ID_AA64PFR0_EL1.SVE is non-zero
* - ID_AA64PFR1_EL1.SME is non-zero
* In other cases it is zero, but still safe to read
*/
desc->have_sve =
(ID_AA64PFR0_SVE_VAL(desc->id_aa64pfr0) != 0);
desc->id_aa64zfr0 = READ_SPECIALREG(ID_AA64ZFR0_EL1_REG);
desc->clidr = READ_SPECIALREG(clidr_el1);
clidr = desc->clidr;
for (int i = 0; (clidr & CLIDR_CTYPE_MASK) != 0; i++, clidr >>= 3) {
int j = 0;
if ((clidr & CLIDR_CTYPE_IO)) {
WRITE_SPECIALREG(csselr_el1,
CSSELR_Level(i) | CSSELR_InD);
desc->ccsidr[i][j++] =
READ_SPECIALREG(ccsidr_el1);
}
if ((clidr & ~CLIDR_CTYPE_IO) == 0)
continue;
WRITE_SPECIALREG(csselr_el1, CSSELR_Level(i));
desc->ccsidr[i][j] = READ_SPECIALREG(ccsidr_el1);
}
#ifdef COMPAT_FREEBSD32
/* Only read aarch32 SRs if EL0-32 is available */
if (ID_AA64PFR0_EL0_VAL(desc->id_aa64pfr0) == ID_AA64PFR0_EL0_64_32) {
desc->id_isar5 = READ_SPECIALREG(id_isar5_el1);
desc->mvfr0 = READ_SPECIALREG(mvfr0_el1);
desc->mvfr1 = READ_SPECIALREG(mvfr1_el1);
}
#endif
}
static void
check_cpu_regs(u_int cpu, struct cpu_desc *desc, struct cpu_desc *prev_desc)
{
switch (cpu_aff_levels) {
case 0:
if (CPU_AFF0(desc->mpidr) != CPU_AFF0(prev_desc->mpidr))
cpu_aff_levels = 1;
/* FALLTHROUGH */
case 1:
if (CPU_AFF1(desc->mpidr) != CPU_AFF1(prev_desc->mpidr))
cpu_aff_levels = 2;
/* FALLTHROUGH */
case 2:
if (CPU_AFF2(desc->mpidr) != CPU_AFF2(prev_desc->mpidr))
cpu_aff_levels = 3;
/* FALLTHROUGH */
case 3:
if (CPU_AFF3(desc->mpidr) != CPU_AFF3(prev_desc->mpidr))
cpu_aff_levels = 4;
break;
}
if (desc->ctr != prev_desc->ctr) {
/*
* If the cache is different on different cores we should
* emulate for userspace to provide a uniform value
*/
emulate_ctr = true;
/*
* If the cache type register is different we may
* have a different l1 cache type.
*/
identify_cache(desc->ctr);
}
}
diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
index c0aeae072570..627b02e82d34 100644
--- a/sys/arm64/arm64/machdep.c
+++ b/sys/arm64/arm64/machdep.c
@@ -1,1033 +1,1036 @@
/*-
* Copyright (c) 2014 Andrew Turner
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include "opt_acpi.h"
#include "opt_kstack_pages.h"
#include "opt_platform.h"
#include "opt_ddb.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef VFP
#include
#endif
#ifdef DEV_ACPI
#include
#include
#endif
#ifdef FDT
#include
#include
#endif
#include
_Static_assert(sizeof(struct pcb) == 1248, "struct pcb is incorrect size");
_Static_assert(offsetof(struct pcb, pcb_fpusaved) == 136,
"pcb_fpusaved changed offset");
_Static_assert(offsetof(struct pcb, pcb_fpustate) == 192,
"pcb_fpustate changed offset");
enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
/*
* XXX: The .bss is assumed to be in the boot CPU NUMA domain. If not we
* could relocate this, but will need to keep the same virtual address as
* it's reverenced by the EARLY_COUNTER macro.
*/
struct pcpu pcpu0;
#if defined(PERTHREAD_SSP)
/*
* The boot SSP canary. Will be replaced with a per-thread canary when
* scheduling has started.
*/
uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
#endif
static struct trapframe proc0_tf;
int early_boot = 1;
int cold = 1;
static int boot_el;
struct kva_md_info kmi;
int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */
int has_pan;
#if defined(SOCDEV_PA)
/*
* This is the virtual address used to access SOCDEV_PA. As it's set before
* .bss is cleared we need to ensure it's preserved. To do this use
* __read_mostly as it's only ever set once but read in the putc functions.
*/
uintptr_t socdev_va __read_mostly;
#endif
/*
* Physical address of the EFI System Table. Stashed from the metadata hints
* passed into the kernel and used by the EFI code to call runtime services.
*/
vm_paddr_t efi_systbl_phys;
static struct efi_map_header *efihdr;
/* pagezero_* implementations are provided in support.S */
void pagezero_simple(void *);
void pagezero_cache(void *);
/* pagezero_simple is default pagezero */
void (*pagezero)(void *p) = pagezero_simple;
int (*apei_nmi)(void);
#if defined(PERTHREAD_SSP_WARNING)
static void
print_ssp_warning(void *data __unused)
{
printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
}
SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
#endif
-static bool
+static cpu_feat_en
pan_check(const struct cpu_feat *feat __unused, u_int midr __unused)
{
uint64_t id_aa64mfr1;
id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
- return (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE);
+ if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) == ID_AA64MMFR1_PAN_NONE)
+ return (FEAT_ALWAYS_DISABLE);
+
+ return (FEAT_DEFAULT_ENABLE);
}
static bool
pan_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
u_int errata_count __unused)
{
has_pan = 1;
/*
* This sets the PAN bit, stopping the kernel from accessing
* memory when userspace can also access it unless the kernel
* uses the userspace load/store instructions.
*/
WRITE_SPECIALREG(sctlr_el1,
READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
__asm __volatile(
".arch_extension pan \n"
"msr pan, #1 \n"
".arch_extension nopan \n");
return (true);
}
CPU_FEAT(feat_pan, "Privileged access never",
pan_check, NULL, pan_enable,
CPU_FEAT_EARLY_BOOT | CPU_FEAT_PER_CPU);
bool
has_hyp(void)
{
return (boot_el == CURRENTEL_EL_EL2);
}
bool
in_vhe(void)
{
/* If we are currently in EL2 then must be in VHE */
return ((READ_SPECIALREG(CurrentEL) & CURRENTEL_EL_MASK) ==
CURRENTEL_EL_EL2);
}
static void
cpu_startup(void *dummy)
{
vm_paddr_t size;
int i;
printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)realmem),
ptoa((uintmax_t)realmem) / 1024 / 1024);
if (bootverbose) {
printf("Physical memory chunk(s):\n");
for (i = 0; phys_avail[i + 1] != 0; i += 2) {
size = phys_avail[i + 1] - phys_avail[i];
printf("%#016jx - %#016jx, %ju bytes (%ju pages)\n",
(uintmax_t)phys_avail[i],
(uintmax_t)phys_avail[i + 1] - 1,
(uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
}
}
printf("avail memory = %ju (%ju MB)\n",
ptoa((uintmax_t)vm_free_count()),
ptoa((uintmax_t)vm_free_count()) / 1024 / 1024);
undef_init();
install_cpu_errata();
vm_ksubmap_init(&kmi);
bufinit();
vm_pager_bufferinit();
}
SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
static void
late_ifunc_resolve(void *dummy __unused)
{
link_elf_late_ireloc();
}
/* Late enough for cpu_feat to have completed */
SYSINIT(late_ifunc_resolve, SI_SUB_CONFIGURE, SI_ORDER_ANY,
late_ifunc_resolve, NULL);
int
cpu_idle_wakeup(int cpu)
{
return (0);
}
void
cpu_idle(int busy)
{
spinlock_enter();
if (!busy)
cpu_idleclock();
if (!sched_runnable())
__asm __volatile(
"dsb sy \n"
"wfi \n");
if (!busy)
cpu_activeclock();
spinlock_exit();
}
void
cpu_halt(void)
{
/* We should have shutdown by now, if not enter a low power sleep */
intr_disable();
while (1) {
__asm __volatile("wfi");
}
}
/*
* Flush the D-cache for non-DMA I/O so that the I-cache can
* be made coherent later.
*/
void
cpu_flush_dcache(void *ptr, size_t len)
{
/* ARM64TODO TBD */
}
/* Get current clock frequency for the given CPU ID. */
int
cpu_est_clockrate(int cpu_id, uint64_t *rate)
{
struct pcpu *pc;
pc = pcpu_find(cpu_id);
if (pc == NULL || rate == NULL)
return (EINVAL);
if (pc->pc_clock == 0)
return (EOPNOTSUPP);
*rate = pc->pc_clock;
return (0);
}
void
cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
{
pcpu->pc_acpi_id = 0xffffffff;
pcpu->pc_mpidr = UINT64_MAX;
}
void
spinlock_enter(void)
{
struct thread *td;
register_t daif;
td = curthread;
if (td->td_md.md_spinlock_count == 0) {
daif = intr_disable();
td->td_md.md_spinlock_count = 1;
td->td_md.md_saved_daif = daif;
critical_enter();
} else
td->td_md.md_spinlock_count++;
}
void
spinlock_exit(void)
{
struct thread *td;
register_t daif;
td = curthread;
daif = td->td_md.md_saved_daif;
td->td_md.md_spinlock_count--;
if (td->td_md.md_spinlock_count == 0) {
critical_exit();
intr_restore(daif);
}
}
/*
* Construct a PCB from a trapframe. This is called from kdb_trap() where
* we want to start a backtrace from the function that caused us to enter
* the debugger. We have the context in the trapframe, but base the trace
* on the PCB. The PCB doesn't have to be perfect, as long as it contains
* enough for a backtrace.
*/
void
makectx(struct trapframe *tf, struct pcb *pcb)
{
int i;
/* NB: pcb_x[PCB_LR] is the PC, see PC_REGS() in db_machdep.h */
for (i = 0; i < nitems(pcb->pcb_x); i++) {
if (i == PCB_LR)
pcb->pcb_x[i] = tf->tf_elr;
else
pcb->pcb_x[i] = tf->tf_x[i + PCB_X_START];
}
pcb->pcb_sp = tf->tf_sp;
}
static void
init_proc0(vm_offset_t kstack)
{
struct pcpu *pcpup;
pcpup = cpuid_to_pcpu[0];
MPASS(pcpup != NULL);
proc_linkup0(&proc0, &thread0);
thread0.td_kstack = kstack;
thread0.td_kstack_pages = KSTACK_PAGES;
#if defined(PERTHREAD_SSP)
thread0.td_md.md_canary = boot_canary;
#endif
thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
thread0.td_kstack_pages * PAGE_SIZE) - 1;
thread0.td_pcb->pcb_flags = 0;
thread0.td_pcb->pcb_fpflags = 0;
thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
thread0.td_frame = &proc0_tf;
ptrauth_thread0(&thread0);
pcpup->pc_curpcb = thread0.td_pcb;
/*
* Unmask SError exceptions. They are used to signal a RAS failure,
* or other hardware error.
*/
serror_enable();
}
/*
* Get an address to be used to write to kernel data that may be mapped
* read-only, e.g. to patch kernel code.
*/
bool
arm64_get_writable_addr(void *addr, void **out)
{
vm_paddr_t pa;
/* Check if the page is writable */
if (PAR_SUCCESS(arm64_address_translate_s1e1w((vm_offset_t)addr))) {
*out = addr;
return (true);
}
/*
* Find the physical address of the given page.
*/
if (!pmap_klookup((vm_offset_t)addr, &pa)) {
return (false);
}
/*
* If it is within the DMAP region and is writable use that.
*/
if (PHYS_IN_DMAP_RANGE(pa)) {
addr = (void *)PHYS_TO_DMAP(pa);
if (PAR_SUCCESS(arm64_address_translate_s1e1w(
(vm_offset_t)addr))) {
*out = addr;
return (true);
}
}
return (false);
}
/*
* Map the passed in VA in EFI space to a void * using the efi memory table to
* find the PA and return it in the DMAP, if it exists. We're used between the
* calls to pmap_bootstrap() and physmem_init_kernel_globals() to parse CFG
* tables We assume that either the entry you are mapping fits within its page,
* or if it spills to the next page, that's contiguous in PA and in the DMAP.
* All observed tables obey the first part of this precondition.
*/
struct early_map_data
{
vm_offset_t va;
vm_offset_t pa;
};
static void
efi_early_map_entry(struct efi_md *p, void *argp)
{
struct early_map_data *emdp = argp;
vm_offset_t s, e;
if (emdp->pa != 0)
return;
if ((p->md_attr & EFI_MD_ATTR_RT) == 0)
return;
s = p->md_virt;
e = p->md_virt + p->md_pages * EFI_PAGE_SIZE;
if (emdp->va < s || emdp->va >= e)
return;
emdp->pa = p->md_phys + (emdp->va - p->md_virt);
}
static void *
efi_early_map(vm_offset_t va)
{
struct early_map_data emd = { .va = va };
efi_map_foreach_entry(efihdr, efi_early_map_entry, &emd);
if (emd.pa == 0)
return NULL;
return (void *)PHYS_TO_DMAP(emd.pa);
}
/*
* When booted via kexec from Linux, the prior kernel will pass in reserved
* memory areas in an EFI config table. We need to find that table and walk
* through it excluding the memory ranges in it. btw, this is called too early
* for the printf to do anything (unless EARLY_PRINTF is defined) since msgbufp
* isn't initialized, let alone a console, but breakpoints in printf help
* diagnose rare failures.
*/
static void
exclude_efi_memreserve(vm_paddr_t efi_systbl_phys)
{
struct efi_systbl *systbl;
efi_guid_t efi_memreserve = LINUX_EFI_MEMRESERVE_TABLE;
systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys);
if (systbl == NULL) {
printf("can't map systbl\n");
return;
}
if (systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) {
printf("Bad signature for systbl %#lx\n", systbl->st_hdr.th_sig);
return;
}
/*
* We don't yet have the pmap system booted enough to create a pmap for
* the efi firmware's preferred address space from the GetMemoryMap()
* table. The st_cfgtbl is a VA in this space, so we need to do the
* mapping ourselves to a kernel VA with efi_early_map. We assume that
* the cfgtbl entries don't span a page. Other pointers are PAs, as
* noted below.
*/
if (systbl->st_cfgtbl == 0) /* Failsafe st_entries should == 0 in this case */
return;
for (int i = 0; i < systbl->st_entries; i++) {
struct efi_cfgtbl *cfgtbl;
struct linux_efi_memreserve *mr;
cfgtbl = efi_early_map(systbl->st_cfgtbl + i * sizeof(*cfgtbl));
if (cfgtbl == NULL)
panic("Can't map the config table entry %d\n", i);
if (memcmp(&cfgtbl->ct_guid, &efi_memreserve, sizeof(efi_guid_t)) != 0)
continue;
/*
* cfgtbl points are either VA or PA, depending on the GUID of
* the table. memreserve GUID pointers are PA and not converted
* after a SetVirtualAddressMap(). The list's mr_next pointer
* is also a PA.
*/
mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(
(vm_offset_t)cfgtbl->ct_data);
while (true) {
for (int j = 0; j < mr->mr_count; j++) {
struct linux_efi_memreserve_entry *mre;
mre = &mr->mr_entry[j];
physmem_exclude_region(mre->mre_base, mre->mre_size,
EXFLAG_NODUMP | EXFLAG_NOALLOC);
}
if (mr->mr_next == 0)
break;
mr = (struct linux_efi_memreserve *)PHYS_TO_DMAP(mr->mr_next);
};
}
}
#ifdef FDT
static void
try_load_dtb(void)
{
vm_offset_t dtbp;
dtbp = MD_FETCH(preload_kmdp, MODINFOMD_DTBP, vm_offset_t);
#if defined(FDT_DTB_STATIC)
/*
* In case the device tree blob was not retrieved (from metadata) try
* to use the statically embedded one.
*/
if (dtbp == 0)
dtbp = (vm_offset_t)&fdt_static_dtb;
#endif
if (dtbp == (vm_offset_t)NULL) {
#ifndef TSLOG
printf("ERROR loading DTB\n");
#endif
return;
}
if (!OF_install(OFW_FDT, 0))
panic("Cannot install FDT");
if (OF_init((void *)dtbp) != 0)
panic("OF_init failed with the found device tree");
parse_fdt_bootargs();
}
#endif
static bool
bus_probe(void)
{
bool has_acpi, has_fdt;
char *order, *env;
has_acpi = has_fdt = false;
#ifdef FDT
has_fdt = (OF_peer(0) != 0);
#endif
#ifdef DEV_ACPI
has_acpi = (AcpiOsGetRootPointer() != 0);
#endif
env = kern_getenv("kern.cfg.order");
if (env != NULL) {
order = env;
while (order != NULL) {
if (has_acpi &&
strncmp(order, "acpi", 4) == 0 &&
(order[4] == ',' || order[4] == '\0')) {
arm64_bus_method = ARM64_BUS_ACPI;
break;
}
if (has_fdt &&
strncmp(order, "fdt", 3) == 0 &&
(order[3] == ',' || order[3] == '\0')) {
arm64_bus_method = ARM64_BUS_FDT;
break;
}
order = strchr(order, ',');
if (order != NULL)
order++; /* Skip comma */
}
freeenv(env);
/* If we set the bus method it is valid */
if (arm64_bus_method != ARM64_BUS_NONE)
return (true);
}
/* If no order or an invalid order was set use the default */
if (arm64_bus_method == ARM64_BUS_NONE) {
if (has_acpi)
arm64_bus_method = ARM64_BUS_ACPI;
else if (has_fdt)
arm64_bus_method = ARM64_BUS_FDT;
}
/*
* If no option was set the default is valid, otherwise we are
* setting one to get cninit() working, then calling panic to tell
* the user about the invalid bus setup.
*/
return (env == NULL);
}
static void
cache_setup(void)
{
int dczva_line_shift;
uint32_t dczid_el0;
identify_cache(READ_SPECIALREG(ctr_el0));
dczid_el0 = READ_SPECIALREG(dczid_el0);
/* Check if dc zva is not prohibited */
if (dczid_el0 & DCZID_DZP)
dczva_line_size = 0;
else {
/* Same as with above calculations */
dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
dczva_line_size = sizeof(int) << dczva_line_shift;
/* Change pagezero function */
pagezero = pagezero_cache;
}
}
int
memory_mapping_mode(vm_paddr_t pa)
{
struct efi_md *map, *p;
size_t efisz;
int ndesc, i;
if (efihdr == NULL)
return (VM_MEMATTR_WRITE_BACK);
/*
* Memory map data provided by UEFI via the GetMemoryMap
* Boot Services API.
*/
efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
map = (struct efi_md *)((uint8_t *)efihdr + efisz);
if (efihdr->descriptor_size == 0)
return (VM_MEMATTR_WRITE_BACK);
ndesc = efihdr->memory_size / efihdr->descriptor_size;
for (i = 0, p = map; i < ndesc; i++,
p = efi_next_descriptor(p, efihdr->descriptor_size)) {
if (pa < p->md_phys ||
pa >= p->md_phys + p->md_pages * EFI_PAGE_SIZE)
continue;
if (p->md_type == EFI_MD_TYPE_IOMEM ||
p->md_type == EFI_MD_TYPE_IOPORT)
return (VM_MEMATTR_DEVICE);
else if ((p->md_attr & EFI_MD_ATTR_WB) != 0 ||
p->md_type == EFI_MD_TYPE_RECLAIM)
return (VM_MEMATTR_WRITE_BACK);
else if ((p->md_attr & EFI_MD_ATTR_WT) != 0)
return (VM_MEMATTR_WRITE_THROUGH);
else if ((p->md_attr & EFI_MD_ATTR_WC) != 0)
return (VM_MEMATTR_WRITE_COMBINING);
break;
}
return (VM_MEMATTR_DEVICE);
}
#ifdef FDT
static void
fdt_physmem_hardware_region_cb(const struct mem_region *mr, void *arg __unused)
{
physmem_hardware_region(mr->mr_start, mr->mr_size);
}
static void
fdt_physmem_exclude_region_cb(const struct mem_region *mr, void *arg __unused)
{
physmem_exclude_region(mr->mr_start, mr->mr_size,
EXFLAG_NODUMP | EXFLAG_NOALLOC);
}
#endif
void
initarm(struct arm64_bootparams *abp)
{
struct efi_fb *efifb;
struct pcpu *pcpup;
char *env;
#ifdef FDT
phandle_t root;
char dts_version[255];
#endif
vm_offset_t lastaddr;
bool valid;
TSRAW(&thread0, TS_ENTER, __func__, NULL);
boot_el = abp->boot_el;
/* Parse loader or FDT boot parameters. Determine last used address. */
lastaddr = parse_boot_param(abp);
identify_cpu(0);
identify_hypervisor_smbios();
update_special_regs(0);
/* Set the pcpu data, this is needed by pmap_bootstrap */
pcpup = &pcpu0;
pcpu_init(pcpup, 0, sizeof(struct pcpu));
/*
* Set the pcpu pointer with a backup in tpidr_el1 to be
* loaded when entering the kernel from userland.
*/
__asm __volatile(
"mov x18, %0 \n"
"msr tpidr_el1, %0" :: "r"(pcpup));
/* locore.S sets sp_el0 to &thread0 so no need to set it here. */
PCPU_SET(curthread, &thread0);
PCPU_SET(midr, get_midr());
link_elf_ireloc();
#ifdef FDT
try_load_dtb();
#endif
efi_systbl_phys = MD_FETCH(preload_kmdp, MODINFOMD_FW_HANDLE,
vm_paddr_t);
/* Load the physical memory ranges */
efihdr = (struct efi_map_header *)preload_search_info(preload_kmdp,
MODINFO_METADATA | MODINFOMD_EFI_MAP);
if (efihdr != NULL)
efi_map_add_entries(efihdr);
#ifdef FDT
else {
/* Grab physical memory regions information from device tree. */
if (fdt_foreach_mem_region(fdt_physmem_hardware_region_cb,
NULL) != 0)
panic("Cannot get physical memory regions");
}
fdt_foreach_reserved_mem(fdt_physmem_exclude_region_cb, NULL);
#endif
/* Exclude the EFI framebuffer from our view of physical memory. */
efifb = (struct efi_fb *)preload_search_info(preload_kmdp,
MODINFO_METADATA | MODINFOMD_EFI_FB);
if (efifb != NULL)
physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
EXFLAG_NOALLOC);
/* Do basic tuning, hz etc */
init_param1();
cache_setup();
/*
* Perform a staged bootstrap of virtual memory.
*
* - First we create the DMAP region. This allows it to be used in
* later bootstrapping.
* - Next exclude memory that is needed in the DMAP region, but must
* not be used by FreeBSD.
* - Lastly complete the bootstrapping. It may use the physical
* memory map so any excluded memory must be marked as such before
* pmap_bootstrap() is called.
*/
pmap_bootstrap_dmap(lastaddr - KERNBASE);
/*
* Exclude EFI entries needed in the DMAP, e.g. EFI_MD_TYPE_RECLAIM
* may contain the ACPI tables but shouldn't be used by the kernel
*/
if (efihdr != NULL)
efi_map_exclude_entries(efihdr);
/* Do the same for reserve entries in the EFI MEMRESERVE table */
if (efi_systbl_phys != 0)
exclude_efi_memreserve(efi_systbl_phys);
/* Continue bootstrapping pmap */
pmap_bootstrap();
/*
* We carefully bootstrap the sanitizer map after we've excluded
* absolutely everything else that could impact phys_avail. There's not
* always enough room for the initial shadow map after the kernel, so
* we'll end up searching for segments that we can safely use. Those
* segments also get excluded from phys_avail.
*/
#if defined(KASAN) || defined(KMSAN)
pmap_bootstrap_san();
#endif
physmem_init_kernel_globals();
valid = bus_probe();
cninit();
set_ttbr0(abp->kern_ttbr0);
cpu_tlb_flushID();
if (!valid)
panic("Invalid bus configuration: %s",
kern_getenv("kern.cfg.order"));
/* Detect early CPU feature support */
enable_cpu_feat(CPU_FEAT_EARLY_BOOT);
/*
* Dump the boot metadata. We have to wait for cninit() since console
* output is required. If it's grossly incorrect the kernel will never
* make it this far.
*/
if (getenv_is_true("debug.dump_modinfo_at_boot"))
preload_dump();
init_proc0(abp->kern_stack);
msgbufinit(msgbufp, msgbufsize);
mutex_init();
init_param2(physmem);
dbg_init();
kdb_init();
#ifdef KDB
if ((boothowto & RB_KDB) != 0)
kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
#endif
kcsan_cpu_init(0);
kasan_init();
kmsan_init();
env = kern_getenv("kernelname");
if (env != NULL)
strlcpy(kernelname, env, sizeof(kernelname));
#ifdef FDT
if (arm64_bus_method == ARM64_BUS_FDT) {
root = OF_finddevice("/");
if (OF_getprop(root, "freebsd,dts-version", dts_version, sizeof(dts_version)) > 0) {
if (strcmp(LINUX_DTS_VERSION, dts_version) != 0)
printf("WARNING: DTB version is %s while kernel expects %s, "
"please update the DTB in the ESP\n",
dts_version,
LINUX_DTS_VERSION);
} else {
printf("WARNING: Cannot find freebsd,dts-version property, "
"cannot check DTB compliance\n");
}
}
#endif
if (boothowto & RB_VERBOSE) {
if (efihdr != NULL)
efi_map_print_entries(efihdr);
physmem_print_tables();
}
early_boot = 0;
if (bootverbose && kstack_pages != KSTACK_PAGES)
printf("kern.kstack_pages = %d ignored for thread0\n",
kstack_pages);
TSEXIT();
}
void
dbg_init(void)
{
/* Clear OS lock */
WRITE_SPECIALREG(oslar_el1, 0);
/* This permits DDB to use debug registers for watchpoints. */
dbg_monitor_init();
/* TODO: Eventually will need to initialize debug registers here. */
}
#ifdef DDB
#include
DB_SHOW_COMMAND(specialregs, db_show_spregs)
{
#define PRINT_REG(reg) \
db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
PRINT_REG(actlr_el1);
PRINT_REG(afsr0_el1);
PRINT_REG(afsr1_el1);
PRINT_REG(aidr_el1);
PRINT_REG(amair_el1);
PRINT_REG(ccsidr_el1);
PRINT_REG(clidr_el1);
PRINT_REG(contextidr_el1);
PRINT_REG(cpacr_el1);
PRINT_REG(csselr_el1);
PRINT_REG(ctr_el0);
PRINT_REG(currentel);
PRINT_REG(daif);
PRINT_REG(dczid_el0);
PRINT_REG(elr_el1);
PRINT_REG(esr_el1);
PRINT_REG(far_el1);
#if 0
/* ARM64TODO: Enable VFP before reading floating-point registers */
PRINT_REG(fpcr);
PRINT_REG(fpsr);
#endif
PRINT_REG(id_aa64afr0_el1);
PRINT_REG(id_aa64afr1_el1);
PRINT_REG(id_aa64dfr0_el1);
PRINT_REG(id_aa64dfr1_el1);
PRINT_REG(id_aa64isar0_el1);
PRINT_REG(id_aa64isar1_el1);
PRINT_REG(id_aa64pfr0_el1);
PRINT_REG(id_aa64pfr1_el1);
PRINT_REG(id_afr0_el1);
PRINT_REG(id_dfr0_el1);
PRINT_REG(id_isar0_el1);
PRINT_REG(id_isar1_el1);
PRINT_REG(id_isar2_el1);
PRINT_REG(id_isar3_el1);
PRINT_REG(id_isar4_el1);
PRINT_REG(id_isar5_el1);
PRINT_REG(id_mmfr0_el1);
PRINT_REG(id_mmfr1_el1);
PRINT_REG(id_mmfr2_el1);
PRINT_REG(id_mmfr3_el1);
#if 0
/* Missing from llvm */
PRINT_REG(id_mmfr4_el1);
#endif
PRINT_REG(id_pfr0_el1);
PRINT_REG(id_pfr1_el1);
PRINT_REG(isr_el1);
PRINT_REG(mair_el1);
PRINT_REG(midr_el1);
PRINT_REG(mpidr_el1);
PRINT_REG(mvfr0_el1);
PRINT_REG(mvfr1_el1);
PRINT_REG(mvfr2_el1);
PRINT_REG(revidr_el1);
PRINT_REG(sctlr_el1);
PRINT_REG(sp_el0);
PRINT_REG(spsel);
PRINT_REG(spsr_el1);
PRINT_REG(tcr_el1);
PRINT_REG(tpidr_el0);
PRINT_REG(tpidr_el1);
PRINT_REG(tpidrro_el0);
PRINT_REG(ttbr0_el1);
PRINT_REG(ttbr1_el1);
PRINT_REG(vbar_el1);
#undef PRINT_REG
}
DB_SHOW_COMMAND(vtop, db_show_vtop)
{
uint64_t phys;
if (have_addr) {
phys = arm64_address_translate_s1e1r(addr);
db_printf("EL1 physical address reg (read): 0x%016lx\n", phys);
phys = arm64_address_translate_s1e1w(addr);
db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
phys = arm64_address_translate_s1e0r(addr);
db_printf("EL0 physical address reg (read): 0x%016lx\n", phys);
phys = arm64_address_translate_s1e0w(addr);
db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
} else
db_printf("show vtop \n");
}
#endif
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 96bde42d2711..0ef23edbc0bf 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -1,10123 +1,10126 @@
/*-
* Copyright (c) 1991 Regents of the University of California.
* All rights reserved.
* Copyright (c) 1994 John S. Dyson
* All rights reserved.
* Copyright (c) 1994 David Greenman
* All rights reserved.
* Copyright (c) 2003 Peter Wemm
* All rights reserved.
* Copyright (c) 2005-2010 Alan L. Cox
* All rights reserved.
* Copyright (c) 2014 Andrew Turner
* All rights reserved.
* Copyright (c) 2014-2016 The FreeBSD Foundation
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Science Department and William Jolitz of UUNET Technologies Inc.
*
* This software was developed by Andrew Turner under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*-
* Copyright (c) 2003 Networks Associates Technology, Inc.
* All rights reserved.
*
* This software was developed for the FreeBSD Project by Jake Burkholder,
* Safeport Network Services, and Network Associates Laboratories, the
* Security Research Division of Network Associates, Inc. under
* DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
* CHATS research program.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include
/*
* Manages physical address maps.
*
* Since the information managed by this module is
* also stored by the logical address mapping module,
* this module may throw away valid virtual-to-physical
* mappings at almost any time. However, invalidations
* of virtual-to-physical mappings must be done as
* requested.
*
* In order to cope with hardware architectures which
* make virtual-to-physical map invalidates expensive,
* this module may delay invalidate or reduced protection
* operations until such time as they are actually
* necessary. This module is given full information as
* to which processors are currently using which maps,
* and to when physical maps must be made correct.
*/
#include "opt_vm.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef NUMA
#define PMAP_MEMDOM MAXMEMDOM
#else
#define PMAP_MEMDOM 1
#endif
#define PMAP_ASSERT_STAGE1(pmap) MPASS((pmap)->pm_stage == PM_STAGE1)
#define PMAP_ASSERT_STAGE2(pmap) MPASS((pmap)->pm_stage == PM_STAGE2)
#define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t)))
#define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t)))
#define NL2PG (PAGE_SIZE/(sizeof (pd_entry_t)))
#define NL3PG (PAGE_SIZE/(sizeof (pt_entry_t)))
#define NUL0E L0_ENTRIES
#define NUL1E (NUL0E * NL1PG)
#define NUL2E (NUL1E * NL2PG)
#ifdef PV_STATS
#define PV_STAT(x) do { x ; } while (0)
#define __pvused
#else
#define PV_STAT(x) do { } while (0)
#define __pvused __unused
#endif
#define pmap_l0_pindex(v) (NUL2E + NUL1E + ((v) >> L0_SHIFT))
#define pmap_l1_pindex(v) (NUL2E + ((v) >> L1_SHIFT))
#define pmap_l2_pindex(v) ((v) >> L2_SHIFT)
#ifdef __ARM_FEATURE_BTI_DEFAULT
pt_entry_t __read_mostly pmap_gp_attr;
#define ATTR_KERN_GP pmap_gp_attr
#else
#define ATTR_KERN_GP 0
#endif
#define PMAP_SAN_PTE_BITS (ATTR_AF | ATTR_S1_XN | pmap_sh_attr | \
ATTR_KERN_GP | ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | ATTR_S1_AP(ATTR_S1_AP_RW))
struct pmap_large_md_page {
struct rwlock pv_lock;
struct md_page pv_page;
/* Pad to a power of 2, see pmap_init_pv_table(). */
int pv_pad[2];
};
__exclusive_cache_line static struct pmap_large_md_page pv_dummy_large;
#define pv_dummy pv_dummy_large.pv_page
__read_mostly static struct pmap_large_md_page *pv_table;
static struct pmap_large_md_page *
_pa_to_pmdp(vm_paddr_t pa)
{
struct vm_phys_seg *seg;
if ((seg = vm_phys_paddr_to_seg(pa)) != NULL)
return ((struct pmap_large_md_page *)seg->md_first +
pmap_l2_pindex(pa) - pmap_l2_pindex(seg->start));
return (NULL);
}
static struct pmap_large_md_page *
pa_to_pmdp(vm_paddr_t pa)
{
struct pmap_large_md_page *pvd;
pvd = _pa_to_pmdp(pa);
if (pvd == NULL)
panic("pa 0x%jx not within vm_phys_segs", (uintmax_t)pa);
return (pvd);
}
static struct pmap_large_md_page *
page_to_pmdp(vm_page_t m)
{
struct vm_phys_seg *seg;
seg = &vm_phys_segs[m->segind];
return ((struct pmap_large_md_page *)seg->md_first +
pmap_l2_pindex(VM_PAGE_TO_PHYS(m)) - pmap_l2_pindex(seg->start));
}
#define pa_to_pvh(pa) (&(pa_to_pmdp(pa)->pv_page))
#define page_to_pvh(m) (&(page_to_pmdp(m)->pv_page))
#define PHYS_TO_PV_LIST_LOCK(pa) ({ \
struct pmap_large_md_page *_pvd; \
struct rwlock *_lock; \
_pvd = _pa_to_pmdp(pa); \
if (__predict_false(_pvd == NULL)) \
_lock = &pv_dummy_large.pv_lock; \
else \
_lock = &(_pvd->pv_lock); \
_lock; \
})
static struct rwlock *
VM_PAGE_TO_PV_LIST_LOCK(vm_page_t m)
{
if ((m->flags & PG_FICTITIOUS) == 0)
return (&page_to_pmdp(m)->pv_lock);
else
return (&pv_dummy_large.pv_lock);
}
#define CHANGE_PV_LIST_LOCK(lockp, new_lock) do { \
struct rwlock **_lockp = (lockp); \
struct rwlock *_new_lock = (new_lock); \
\
if (_new_lock != *_lockp) { \
if (*_lockp != NULL) \
rw_wunlock(*_lockp); \
*_lockp = _new_lock; \
rw_wlock(*_lockp); \
} \
} while (0)
#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) \
CHANGE_PV_LIST_LOCK(lockp, PHYS_TO_PV_LIST_LOCK(pa))
#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \
CHANGE_PV_LIST_LOCK(lockp, VM_PAGE_TO_PV_LIST_LOCK(m))
#define RELEASE_PV_LIST_LOCK(lockp) do { \
struct rwlock **_lockp = (lockp); \
\
if (*_lockp != NULL) { \
rw_wunlock(*_lockp); \
*_lockp = NULL; \
} \
} while (0)
#define PTE_TO_VM_PAGE(pte) PHYS_TO_VM_PAGE(PTE_TO_PHYS(pte))
#define VM_PAGE_TO_PTE(m) PHYS_TO_PTE(VM_PAGE_TO_PHYS(m))
/*
* The presence of this flag indicates that the mapping is writeable.
* If the ATTR_S1_AP_RO bit is also set, then the mapping is clean, otherwise
* it is dirty. This flag may only be set on managed mappings.
*
* The DBM bit is reserved on ARMv8.0 but it seems we can safely treat it
* as a software managed bit.
*/
#define ATTR_SW_DBM ATTR_DBM
struct pmap kernel_pmap_store;
/* Used for mapping ACPI memory before VM is initialized */
#define PMAP_PREINIT_MAPPING_COUNT 32
#define PMAP_PREINIT_MAPPING_SIZE (PMAP_PREINIT_MAPPING_COUNT * L2_SIZE)
static vm_offset_t preinit_map_va; /* Start VA of pre-init mapping space */
static int vm_initialized = 0; /* No need to use pre-init maps when set */
/*
* Reserve a few L2 blocks starting from 'preinit_map_va' pointer.
* Always map entire L2 block for simplicity.
* VA of L2 block = preinit_map_va + i * L2_SIZE
*/
static struct pmap_preinit_mapping {
vm_paddr_t pa;
vm_offset_t va;
vm_size_t size;
} pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT];
vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
vm_offset_t kernel_vm_end = 0;
/*
* Data for the pv entry allocation mechanism.
*/
#ifdef NUMA
static __inline int
pc_to_domain(struct pv_chunk *pc)
{
return (vm_phys_domain(DMAP_TO_PHYS((vm_offset_t)pc)));
}
#else
static __inline int
pc_to_domain(struct pv_chunk *pc __unused)
{
return (0);
}
#endif
struct pv_chunks_list {
struct mtx pvc_lock;
TAILQ_HEAD(pch, pv_chunk) pvc_list;
int active_reclaims;
} __aligned(CACHE_LINE_SIZE);
struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM];
vm_paddr_t dmap_phys_base; /* The start of the dmap region */
vm_paddr_t dmap_phys_max; /* The limit of the dmap region */
vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */
extern pt_entry_t pagetable_l0_ttbr1[];
#define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1))
static vm_paddr_t physmap[PHYSMAP_SIZE];
static u_int physmap_idx;
static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"VM/pmap parameters");
static int pmap_growkernel_panic = 0;
SYSCTL_INT(_vm_pmap, OID_AUTO, growkernel_panic, CTLFLAG_RDTUN,
&pmap_growkernel_panic, 0,
"panic on failure to allocate kernel page table page");
bool pmap_lpa_enabled __read_mostly = false;
pt_entry_t pmap_sh_attr __read_mostly = ATTR_SH(ATTR_SH_IS);
#if PAGE_SIZE == PAGE_SIZE_4K
#define L1_BLOCKS_SUPPORTED 1
#else
#define L1_BLOCKS_SUPPORTED (pmap_lpa_enabled)
#endif
#define PMAP_ASSERT_L1_BLOCKS_SUPPORTED MPASS(L1_BLOCKS_SUPPORTED)
static bool pmap_l1_supported __read_mostly = false;
/*
* This ASID allocator uses a bit vector ("asid_set") to remember which ASIDs
* that it has currently allocated to a pmap, a cursor ("asid_next") to
* optimize its search for a free ASID in the bit vector, and an epoch number
* ("asid_epoch") to indicate when it has reclaimed all previously allocated
* ASIDs that are not currently active on a processor.
*
* The current epoch number is always in the range [0, INT_MAX). Negative
* numbers and INT_MAX are reserved for special cases that are described
* below.
*/
struct asid_set {
int asid_bits;
bitstr_t *asid_set;
int asid_set_size;
int asid_next;
int asid_epoch;
struct mtx asid_set_mutex;
};
static struct asid_set asids;
static struct asid_set vmids;
static SYSCTL_NODE(_vm_pmap, OID_AUTO, asid, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"ASID allocator");
SYSCTL_INT(_vm_pmap_asid, OID_AUTO, bits, CTLFLAG_RD, &asids.asid_bits, 0,
"The number of bits in an ASID");
SYSCTL_INT(_vm_pmap_asid, OID_AUTO, next, CTLFLAG_RD, &asids.asid_next, 0,
"The last allocated ASID plus one");
SYSCTL_INT(_vm_pmap_asid, OID_AUTO, epoch, CTLFLAG_RD, &asids.asid_epoch, 0,
"The current epoch number");
static SYSCTL_NODE(_vm_pmap, OID_AUTO, vmid, CTLFLAG_RD, 0, "VMID allocator");
SYSCTL_INT(_vm_pmap_vmid, OID_AUTO, bits, CTLFLAG_RD, &vmids.asid_bits, 0,
"The number of bits in an VMID");
SYSCTL_INT(_vm_pmap_vmid, OID_AUTO, next, CTLFLAG_RD, &vmids.asid_next, 0,
"The last allocated VMID plus one");
SYSCTL_INT(_vm_pmap_vmid, OID_AUTO, epoch, CTLFLAG_RD, &vmids.asid_epoch, 0,
"The current epoch number");
void (*pmap_clean_stage2_tlbi)(void);
void (*pmap_stage2_invalidate_range)(uint64_t, vm_offset_t, vm_offset_t, bool);
void (*pmap_stage2_invalidate_all)(uint64_t);
/*
* A pmap's cookie encodes an ASID and epoch number. Cookies for reserved
* ASIDs have a negative epoch number, specifically, INT_MIN. Cookies for
* dynamically allocated ASIDs have a non-negative epoch number.
*
* An invalid ASID is represented by -1.
*
* There are two special-case cookie values: (1) COOKIE_FROM(-1, INT_MIN),
* which indicates that an ASID should never be allocated to the pmap, and
* (2) COOKIE_FROM(-1, INT_MAX), which indicates that an ASID should be
* allocated when the pmap is next activated.
*/
#define COOKIE_FROM(asid, epoch) ((long)((u_int)(asid) | \
((u_long)(epoch) << 32)))
#define COOKIE_TO_ASID(cookie) ((int)(cookie))
#define COOKIE_TO_EPOCH(cookie) ((int)((u_long)(cookie) >> 32))
#define TLBI_VA_SHIFT 12
#define TLBI_VA_MASK ((1ul << 44) - 1)
#define TLBI_VA(addr) (((addr) >> TLBI_VA_SHIFT) & TLBI_VA_MASK)
static int __read_frequently superpages_enabled = 1;
SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0,
"Are large page mappings enabled?");
/*
* True when Branch Target Identification should be used by userspace. This
* allows pmap to mark pages as guarded with ATTR_S1_GP.
*/
__read_mostly static bool pmap_bti_support = false;
/*
* Internal flags for pmap_enter()'s helper functions.
*/
#define PMAP_ENTER_NORECLAIM 0x1000000 /* Don't reclaim PV entries. */
#define PMAP_ENTER_NOREPLACE 0x2000000 /* Don't replace mappings. */
TAILQ_HEAD(pv_chunklist, pv_chunk);
static void free_pv_chunk(struct pv_chunk *pc);
static void free_pv_chunk_batch(struct pv_chunklist *batch);
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
vm_offset_t va);
static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte);
static bool pmap_activate_int(struct thread *td, pmap_t pmap);
static void pmap_alloc_asid(pmap_t pmap);
static int pmap_change_props_locked(vm_offset_t va, vm_size_t size,
vm_prot_t prot, int mode, bool skip_unmapped);
static bool pmap_copy_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va,
pt_entry_t l3e, vm_page_t ml3, struct rwlock **lockp);
static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va);
static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2,
vm_offset_t va, struct rwlock **lockp);
static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
static bool pmap_demote_l2c(pmap_t pmap, pt_entry_t *l2p, vm_offset_t va);
static bool pmap_demote_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va);
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2,
u_int flags, vm_page_t m, struct rwlock **lockp);
static int pmap_enter_l3c(pmap_t pmap, vm_offset_t va, pt_entry_t l3e, u_int flags,
vm_page_t m, vm_page_t *ml3p, struct rwlock **lockp);
static bool pmap_every_pte_zero(vm_paddr_t pa);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
bool all_l3e_AF_set);
static pt_entry_t pmap_load_l3c(pt_entry_t *l3p);
static void pmap_mask_set_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va,
vm_offset_t *vap, vm_offset_t va_next, pt_entry_t mask, pt_entry_t nbits);
static bool pmap_pv_insert_l3c(pmap_t pmap, vm_offset_t va, vm_page_t m,
struct rwlock **lockp);
static void pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
static int pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
pd_entry_t l1e, bool demote_kl2e, struct spglist *free,
struct rwlock **lockp);
static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
pd_entry_t l2e, struct spglist *free, struct rwlock **lockp);
static bool pmap_remove_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va,
vm_offset_t *vap, vm_offset_t va_next, vm_page_t ml3, struct spglist *free,
struct rwlock **lockp);
static void pmap_reset_asid_set(pmap_t pmap);
static bool pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
vm_page_t m, struct rwlock **lockp);
static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
struct rwlock **lockp);
static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
struct spglist *free);
static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
static void pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte,
vm_offset_t va, vm_size_t size);
static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
static uma_zone_t pmap_bti_ranges_zone;
static bool pmap_bti_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
pt_entry_t *pte);
static pt_entry_t pmap_pte_bti(pmap_t pmap, vm_offset_t va);
static void pmap_bti_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
static void *bti_dup_range(void *ctx, void *data);
static void bti_free_range(void *ctx, void *node);
static int pmap_bti_copy(pmap_t dst_pmap, pmap_t src_pmap);
static void pmap_bti_deassign_all(pmap_t pmap);
/*
* These load the old table data and store the new value.
* They need to be atomic as the System MMU may write to the table at
* the same time as the CPU.
*/
#define pmap_clear(table) atomic_store_64(table, 0)
#define pmap_clear_bits(table, bits) atomic_clear_64(table, bits)
#define pmap_load(table) (*table)
#define pmap_load_clear(table) atomic_swap_64(table, 0)
#define pmap_load_store(table, entry) atomic_swap_64(table, entry)
#define pmap_set_bits(table, bits) atomic_set_64(table, bits)
#define pmap_store(table, entry) atomic_store_64(table, entry)
/********************/
/* Inline functions */
/********************/
static __inline void
pagecopy(void *s, void *d)
{
memcpy(d, s, PAGE_SIZE);
}
static __inline pd_entry_t *
pmap_l0(pmap_t pmap, vm_offset_t va)
{
return (&pmap->pm_l0[pmap_l0_index(va)]);
}
static __inline pd_entry_t *
pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va)
{
pd_entry_t *l1;
l1 = (pd_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l0)));
return (&l1[pmap_l1_index(va)]);
}
static __inline pd_entry_t *
pmap_l1(pmap_t pmap, vm_offset_t va)
{
pd_entry_t *l0;
l0 = pmap_l0(pmap, va);
if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE)
return (NULL);
return (pmap_l0_to_l1(l0, va));
}
static __inline pd_entry_t *
pmap_l1_to_l2(pd_entry_t *l1p, vm_offset_t va)
{
pd_entry_t l1, *l2p;
l1 = pmap_load(l1p);
KASSERT(ADDR_IS_CANONICAL(va),
("%s: Address not in canonical form: %lx", __func__, va));
/*
* The valid bit may be clear if pmap_update_entry() is concurrently
* modifying the entry, so for KVA only the entry type may be checked.
*/
KASSERT(ADDR_IS_KERNEL(va) || (l1 & ATTR_DESCR_VALID) != 0,
("%s: L1 entry %#lx for %#lx is invalid", __func__, l1, va));
KASSERT((l1 & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_TABLE,
("%s: L1 entry %#lx for %#lx is a leaf", __func__, l1, va));
l2p = (pd_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(l1));
return (&l2p[pmap_l2_index(va)]);
}
static __inline pd_entry_t *
pmap_l2(pmap_t pmap, vm_offset_t va)
{
pd_entry_t *l1;
l1 = pmap_l1(pmap, va);
if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE)
return (NULL);
return (pmap_l1_to_l2(l1, va));
}
static __inline pt_entry_t *
pmap_l2_to_l3(pd_entry_t *l2p, vm_offset_t va)
{
pd_entry_t l2;
pt_entry_t *l3p;
l2 = pmap_load(l2p);
KASSERT(ADDR_IS_CANONICAL(va),
("%s: Address not in canonical form: %lx", __func__, va));
/*
* The valid bit may be clear if pmap_update_entry() is concurrently
* modifying the entry, so for KVA only the entry type may be checked.
*/
KASSERT(ADDR_IS_KERNEL(va) || (l2 & ATTR_DESCR_VALID) != 0,
("%s: L2 entry %#lx for %#lx is invalid", __func__, l2, va));
KASSERT((l2 & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_TABLE,
("%s: L2 entry %#lx for %#lx is a leaf", __func__, l2, va));
l3p = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(l2));
return (&l3p[pmap_l3_index(va)]);
}
/*
* Returns the lowest valid pde for a given virtual address.
* The next level may or may not point to a valid page or block.
*/
static __inline pd_entry_t *
pmap_pde(pmap_t pmap, vm_offset_t va, int *level)
{
pd_entry_t *l0, *l1, *l2, desc;
l0 = pmap_l0(pmap, va);
desc = pmap_load(l0) & ATTR_DESCR_MASK;
if (desc != L0_TABLE) {
*level = -1;
return (NULL);
}
l1 = pmap_l0_to_l1(l0, va);
desc = pmap_load(l1) & ATTR_DESCR_MASK;
if (desc != L1_TABLE) {
*level = 0;
return (l0);
}
l2 = pmap_l1_to_l2(l1, va);
desc = pmap_load(l2) & ATTR_DESCR_MASK;
if (desc != L2_TABLE) {
*level = 1;
return (l1);
}
*level = 2;
return (l2);
}
/*
* Returns the lowest valid pte block or table entry for a given virtual
* address. If there are no valid entries return NULL and set the level to
* the first invalid level.
*/
static __inline pt_entry_t *
pmap_pte(pmap_t pmap, vm_offset_t va, int *level)
{
pd_entry_t *l1, *l2, desc;
pt_entry_t *l3;
l1 = pmap_l1(pmap, va);
if (l1 == NULL) {
*level = 0;
return (NULL);
}
desc = pmap_load(l1) & ATTR_DESCR_MASK;
if (desc == L1_BLOCK) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
*level = 1;
return (l1);
}
if (desc != L1_TABLE) {
*level = 1;
return (NULL);
}
l2 = pmap_l1_to_l2(l1, va);
desc = pmap_load(l2) & ATTR_DESCR_MASK;
if (desc == L2_BLOCK) {
*level = 2;
return (l2);
}
if (desc != L2_TABLE) {
*level = 2;
return (NULL);
}
*level = 3;
l3 = pmap_l2_to_l3(l2, va);
if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE)
return (NULL);
return (l3);
}
/*
* If the given pmap has an L{1,2}_BLOCK or L3_PAGE entry at the specified
* level that maps the specified virtual address, then a pointer to that entry
* is returned. Otherwise, NULL is returned, unless INVARIANTS are enabled
* and a diagnostic message is provided, in which case this function panics.
*/
static __always_inline pt_entry_t *
pmap_pte_exists(pmap_t pmap, vm_offset_t va, int level, const char *diag)
{
pd_entry_t *l0p, *l1p, *l2p;
pt_entry_t desc, *l3p;
int walk_level __diagused;
KASSERT(level >= 0 && level < 4,
("%s: %s passed an out-of-range level (%d)", __func__, diag,
level));
l0p = pmap_l0(pmap, va);
desc = pmap_load(l0p) & ATTR_DESCR_MASK;
if (desc == L0_TABLE && level > 0) {
l1p = pmap_l0_to_l1(l0p, va);
desc = pmap_load(l1p) & ATTR_DESCR_MASK;
if (desc == L1_BLOCK && level == 1) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
return (l1p);
}
if (desc == L1_TABLE && level > 1) {
l2p = pmap_l1_to_l2(l1p, va);
desc = pmap_load(l2p) & ATTR_DESCR_MASK;
if (desc == L2_BLOCK && level == 2)
return (l2p);
else if (desc == L2_TABLE && level > 2) {
l3p = pmap_l2_to_l3(l2p, va);
desc = pmap_load(l3p) & ATTR_DESCR_MASK;
if (desc == L3_PAGE && level == 3)
return (l3p);
else
walk_level = 3;
} else
walk_level = 2;
} else
walk_level = 1;
} else
walk_level = 0;
KASSERT(diag == NULL,
("%s: va %#lx not mapped at level %d, desc %ld at level %d",
diag, va, level, desc, walk_level));
return (NULL);
}
bool
pmap_ps_enabled(pmap_t pmap)
{
/*
* Promotion requires a hypervisor call when the kernel is running
* in EL1. To stop this disable superpage support on non-stage 1
* pmaps for now.
*/
if (pmap->pm_stage != PM_STAGE1)
return (false);
#ifdef KMSAN
/*
* The break-before-make in pmap_update_entry() results in a situation
* where a CPU may call into the KMSAN runtime while the entry is
* invalid. If the entry is used to map the current thread structure,
* then the runtime will attempt to access unmapped memory. Avoid this
* by simply disabling superpage promotion for the kernel map.
*/
if (pmap == kernel_pmap)
return (false);
#endif
return (superpages_enabled != 0);
}
bool
pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1,
pd_entry_t **l2, pt_entry_t **l3)
{
pd_entry_t *l0p, *l1p, *l2p;
if (pmap->pm_l0 == NULL)
return (false);
l0p = pmap_l0(pmap, va);
*l0 = l0p;
if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
return (false);
l1p = pmap_l0_to_l1(l0p, va);
*l1 = l1p;
if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
*l2 = NULL;
*l3 = NULL;
return (true);
}
if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE)
return (false);
l2p = pmap_l1_to_l2(l1p, va);
*l2 = l2p;
if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) {
*l3 = NULL;
return (true);
}
if ((pmap_load(l2p) & ATTR_DESCR_MASK) != L2_TABLE)
return (false);
*l3 = pmap_l2_to_l3(l2p, va);
return (true);
}
static __inline int
pmap_l3_valid(pt_entry_t l3)
{
return ((l3 & ATTR_DESCR_MASK) == L3_PAGE);
}
CTASSERT(L1_BLOCK == L2_BLOCK);
static pt_entry_t
pmap_pte_memattr(pmap_t pmap, vm_memattr_t memattr)
{
pt_entry_t val;
if (pmap->pm_stage == PM_STAGE1) {
val = ATTR_S1_IDX(memattr);
if (memattr == VM_MEMATTR_DEVICE)
val |= ATTR_S1_XN;
return (val);
}
val = 0;
switch (memattr) {
case VM_MEMATTR_DEVICE:
return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_DEVICE_nGnRnE) |
ATTR_S2_XN(ATTR_S2_XN_ALL));
case VM_MEMATTR_UNCACHEABLE:
return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_NC));
case VM_MEMATTR_WRITE_BACK:
return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_WB));
case VM_MEMATTR_WRITE_THROUGH:
return (ATTR_S2_MEMATTR(ATTR_S2_MEMATTR_WT));
default:
panic("%s: invalid memory attribute %x", __func__, memattr);
}
}
static pt_entry_t
pmap_pte_prot(pmap_t pmap, vm_prot_t prot)
{
pt_entry_t val;
val = 0;
if (pmap->pm_stage == PM_STAGE1) {
if ((prot & VM_PROT_EXECUTE) == 0)
val |= ATTR_S1_XN;
if ((prot & VM_PROT_WRITE) == 0)
val |= ATTR_S1_AP(ATTR_S1_AP_RO);
} else {
if ((prot & VM_PROT_WRITE) != 0)
val |= ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE);
if ((prot & VM_PROT_READ) != 0)
val |= ATTR_S2_S2AP(ATTR_S2_S2AP_READ);
if ((prot & VM_PROT_EXECUTE) == 0)
val |= ATTR_S2_XN(ATTR_S2_XN_ALL);
}
return (val);
}
/*
* Checks if the PTE is dirty.
*/
static inline int
pmap_pte_dirty(pmap_t pmap, pt_entry_t pte)
{
KASSERT((pte & ATTR_SW_MANAGED) != 0, ("pte %#lx is unmanaged", pte));
if (pmap->pm_stage == PM_STAGE1) {
KASSERT((pte & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) != 0,
("pte %#lx is writeable and missing ATTR_SW_DBM", pte));
return ((pte & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
(ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_SW_DBM));
}
return ((pte & ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE)) ==
ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE));
}
static __inline void
pmap_resident_count_inc(pmap_t pmap, int count)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
pmap->pm_stats.resident_count += count;
}
static __inline void
pmap_resident_count_dec(pmap_t pmap, int count)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(pmap->pm_stats.resident_count >= count,
("pmap %p resident count underflow %ld %d", pmap,
pmap->pm_stats.resident_count, count));
pmap->pm_stats.resident_count -= count;
}
static vm_paddr_t
pmap_early_vtophys(vm_offset_t va)
{
vm_paddr_t pa_page;
pa_page = arm64_address_translate_s1e1r(va) & PAR_PA_MASK;
return (pa_page | (va & PAR_LOW_MASK));
}
/* State of the bootstrapped DMAP page tables */
struct pmap_bootstrap_state {
pt_entry_t *l1;
pt_entry_t *l2;
pt_entry_t *l3;
vm_offset_t freemempos;
vm_offset_t va;
vm_paddr_t pa;
pt_entry_t table_attrs;
u_int l0_slot;
u_int l1_slot;
u_int l2_slot;
bool dmap_valid;
};
/* The bootstrap state */
static struct pmap_bootstrap_state bs_state = {
.l1 = NULL,
.l2 = NULL,
.l3 = NULL,
.table_attrs = TATTR_PXN_TABLE,
.l0_slot = L0_ENTRIES,
.l1_slot = Ln_ENTRIES,
.l2_slot = Ln_ENTRIES,
.dmap_valid = false,
};
static void
pmap_bootstrap_l0_table(struct pmap_bootstrap_state *state)
{
vm_paddr_t l1_pa;
pd_entry_t l0e;
u_int l0_slot;
/* Link the level 0 table to a level 1 table */
l0_slot = pmap_l0_index(state->va);
if (l0_slot != state->l0_slot) {
/*
* Make sure we move from a low address to high address
* before the DMAP region is ready. This ensures we never
* modify an existing mapping until we can map from a
* physical address to a virtual address.
*/
MPASS(state->l0_slot < l0_slot ||
state->l0_slot == L0_ENTRIES ||
state->dmap_valid);
/* Reset lower levels */
state->l2 = NULL;
state->l3 = NULL;
state->l1_slot = Ln_ENTRIES;
state->l2_slot = Ln_ENTRIES;
/* Check the existing L0 entry */
state->l0_slot = l0_slot;
if (state->dmap_valid) {
l0e = pagetable_l0_ttbr1[l0_slot];
if ((l0e & ATTR_DESCR_VALID) != 0) {
MPASS((l0e & ATTR_DESCR_MASK) == L0_TABLE);
l1_pa = PTE_TO_PHYS(l0e);
state->l1 = (pt_entry_t *)PHYS_TO_DMAP(l1_pa);
return;
}
}
/* Create a new L0 table entry */
state->l1 = (pt_entry_t *)state->freemempos;
memset(state->l1, 0, PAGE_SIZE);
state->freemempos += PAGE_SIZE;
l1_pa = pmap_early_vtophys((vm_offset_t)state->l1);
MPASS((l1_pa & Ln_TABLE_MASK) == 0);
MPASS(pagetable_l0_ttbr1[l0_slot] == 0);
pmap_store(&pagetable_l0_ttbr1[l0_slot], PHYS_TO_PTE(l1_pa) |
TATTR_UXN_TABLE | TATTR_AP_TABLE_NO_EL0 | L0_TABLE);
}
KASSERT(state->l1 != NULL, ("%s: NULL l1", __func__));
}
static void
pmap_bootstrap_l1_table(struct pmap_bootstrap_state *state)
{
vm_paddr_t l2_pa;
pd_entry_t l1e;
u_int l1_slot;
/* Make sure there is a valid L0 -> L1 table */
pmap_bootstrap_l0_table(state);
/* Link the level 1 table to a level 2 table */
l1_slot = pmap_l1_index(state->va);
if (l1_slot != state->l1_slot) {
/* See pmap_bootstrap_l0_table for a description */
MPASS(state->l1_slot < l1_slot ||
state->l1_slot == Ln_ENTRIES ||
state->dmap_valid);
/* Reset lower levels */
state->l3 = NULL;
state->l2_slot = Ln_ENTRIES;
/* Check the existing L1 entry */
state->l1_slot = l1_slot;
if (state->dmap_valid) {
l1e = state->l1[l1_slot];
if ((l1e & ATTR_DESCR_VALID) != 0) {
MPASS((l1e & ATTR_DESCR_MASK) == L1_TABLE);
l2_pa = PTE_TO_PHYS(l1e);
state->l2 = (pt_entry_t *)PHYS_TO_DMAP(l2_pa);
return;
}
}
/* Create a new L1 table entry */
state->l2 = (pt_entry_t *)state->freemempos;
memset(state->l2, 0, PAGE_SIZE);
state->freemempos += PAGE_SIZE;
l2_pa = pmap_early_vtophys((vm_offset_t)state->l2);
MPASS((l2_pa & Ln_TABLE_MASK) == 0);
MPASS(state->l1[l1_slot] == 0);
pmap_store(&state->l1[l1_slot], PHYS_TO_PTE(l2_pa) |
state->table_attrs | L1_TABLE);
}
KASSERT(state->l2 != NULL, ("%s: NULL l2", __func__));
}
static void
pmap_bootstrap_l2_table(struct pmap_bootstrap_state *state)
{
vm_paddr_t l3_pa;
pd_entry_t l2e;
u_int l2_slot;
/* Make sure there is a valid L1 -> L2 table */
pmap_bootstrap_l1_table(state);
/* Link the level 2 table to a level 3 table */
l2_slot = pmap_l2_index(state->va);
if (l2_slot != state->l2_slot) {
/* See pmap_bootstrap_l0_table for a description */
MPASS(state->l2_slot < l2_slot ||
state->l2_slot == Ln_ENTRIES ||
state->dmap_valid);
/* Check the existing L2 entry */
state->l2_slot = l2_slot;
if (state->dmap_valid) {
l2e = state->l2[l2_slot];
if ((l2e & ATTR_DESCR_VALID) != 0) {
MPASS((l2e & ATTR_DESCR_MASK) == L2_TABLE);
l3_pa = PTE_TO_PHYS(l2e);
state->l3 = (pt_entry_t *)PHYS_TO_DMAP(l3_pa);
return;
}
}
/* Create a new L2 table entry */
state->l3 = (pt_entry_t *)state->freemempos;
memset(state->l3, 0, PAGE_SIZE);
state->freemempos += PAGE_SIZE;
l3_pa = pmap_early_vtophys((vm_offset_t)state->l3);
MPASS((l3_pa & Ln_TABLE_MASK) == 0);
MPASS(state->l2[l2_slot] == 0);
pmap_store(&state->l2[l2_slot], PHYS_TO_PTE(l3_pa) |
state->table_attrs | L2_TABLE);
}
KASSERT(state->l3 != NULL, ("%s: NULL l3", __func__));
}
static void
pmap_bootstrap_l2_block(struct pmap_bootstrap_state *state, int i)
{
pt_entry_t contig;
u_int l2_slot;
bool first;
if ((physmap[i + 1] - state->pa) < L2_SIZE)
return;
/* Make sure there is a valid L1 table */
pmap_bootstrap_l1_table(state);
MPASS((state->va & L2_OFFSET) == 0);
for (first = true, contig = 0;
state->va < DMAP_MAX_ADDRESS &&
(physmap[i + 1] - state->pa) >= L2_SIZE;
state->va += L2_SIZE, state->pa += L2_SIZE) {
/*
* Stop if we are about to walk off the end of what the
* current L1 slot can address.
*/
if (!first && (state->pa & L1_OFFSET) == 0)
break;
/*
* If we have an aligned, contiguous chunk of L2C_ENTRIES
* L2 blocks, set the contiguous bit within each PTE so that
* the chunk can be cached using only one TLB entry.
*/
if ((state->pa & L2C_OFFSET) == 0) {
if (state->va + L2C_SIZE < DMAP_MAX_ADDRESS &&
physmap[i + 1] - state->pa >= L2C_SIZE) {
contig = ATTR_CONTIGUOUS;
} else {
contig = 0;
}
}
first = false;
l2_slot = pmap_l2_index(state->va);
MPASS((state->pa & L2_OFFSET) == 0);
MPASS(state->l2[l2_slot] == 0);
pmap_store(&state->l2[l2_slot], PHYS_TO_PTE(state->pa) |
ATTR_AF | pmap_sh_attr | ATTR_S1_XN | ATTR_KERN_GP |
ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | contig | L2_BLOCK);
}
MPASS(state->va == (state->pa - dmap_phys_base + DMAP_MIN_ADDRESS));
}
static void
pmap_bootstrap_l3_page(struct pmap_bootstrap_state *state, int i)
{
pt_entry_t contig;
u_int l3_slot;
bool first;
if (physmap[i + 1] - state->pa < L3_SIZE)
return;
/* Make sure there is a valid L2 table */
pmap_bootstrap_l2_table(state);
MPASS((state->va & L3_OFFSET) == 0);
for (first = true, contig = 0;
state->va < DMAP_MAX_ADDRESS &&
physmap[i + 1] - state->pa >= L3_SIZE;
state->va += L3_SIZE, state->pa += L3_SIZE) {
/*
* Stop if we are about to walk off the end of what the
* current L2 slot can address.
*/
if (!first && (state->pa & L2_OFFSET) == 0)
break;
/*
* If we have an aligned, contiguous chunk of L3C_ENTRIES
* L3 pages, set the contiguous bit within each PTE so that
* the chunk can be cached using only one TLB entry.
*/
if ((state->pa & L3C_OFFSET) == 0) {
if (state->va + L3C_SIZE < DMAP_MAX_ADDRESS &&
physmap[i + 1] - state->pa >= L3C_SIZE) {
contig = ATTR_CONTIGUOUS;
} else {
contig = 0;
}
}
first = false;
l3_slot = pmap_l3_index(state->va);
MPASS((state->pa & L3_OFFSET) == 0);
MPASS(state->l3[l3_slot] == 0);
pmap_store(&state->l3[l3_slot], PHYS_TO_PTE(state->pa) |
ATTR_AF | pmap_sh_attr | ATTR_S1_XN | ATTR_KERN_GP |
ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | contig | L3_PAGE);
}
MPASS(state->va == (state->pa - dmap_phys_base + DMAP_MIN_ADDRESS));
}
void
pmap_bootstrap_dmap(vm_size_t kernlen)
{
vm_paddr_t start_pa, pa;
uint64_t tcr;
int i;
tcr = READ_SPECIALREG(tcr_el1);
/* Verify that the ASID is set through TTBR0. */
KASSERT((tcr & TCR_A1) == 0, ("pmap_bootstrap: TCR_EL1.A1 != 0"));
if ((tcr & TCR_DS) != 0)
pmap_lpa_enabled = true;
pmap_l1_supported = L1_BLOCKS_SUPPORTED;
start_pa = pmap_early_vtophys(KERNBASE);
bs_state.freemempos = KERNBASE + kernlen;
bs_state.freemempos = roundup2(bs_state.freemempos, PAGE_SIZE);
/* Fill in physmap array. */
physmap_idx = physmem_avail(physmap, nitems(physmap));
dmap_phys_base = physmap[0] & ~L1_OFFSET;
dmap_phys_max = 0;
dmap_max_addr = 0;
for (i = 0; i < physmap_idx; i += 2) {
bs_state.pa = physmap[i] & ~L3_OFFSET;
bs_state.va = bs_state.pa - dmap_phys_base + DMAP_MIN_ADDRESS;
/* Create L3 mappings at the start of the region */
if ((bs_state.pa & L2_OFFSET) != 0)
pmap_bootstrap_l3_page(&bs_state, i);
MPASS(bs_state.pa <= physmap[i + 1]);
if (L1_BLOCKS_SUPPORTED) {
/* Create L2 mappings at the start of the region */
if ((bs_state.pa & L1_OFFSET) != 0)
pmap_bootstrap_l2_block(&bs_state, i);
MPASS(bs_state.pa <= physmap[i + 1]);
/* Create the main L1 block mappings */
for (; bs_state.va < DMAP_MAX_ADDRESS &&
(physmap[i + 1] - bs_state.pa) >= L1_SIZE;
bs_state.va += L1_SIZE, bs_state.pa += L1_SIZE) {
/* Make sure there is a valid L1 table */
pmap_bootstrap_l0_table(&bs_state);
MPASS((bs_state.pa & L1_OFFSET) == 0);
pmap_store(
&bs_state.l1[pmap_l1_index(bs_state.va)],
PHYS_TO_PTE(bs_state.pa) | ATTR_AF |
pmap_sh_attr |
ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) |
ATTR_S1_XN | ATTR_KERN_GP | L1_BLOCK);
}
MPASS(bs_state.pa <= physmap[i + 1]);
/* Create L2 mappings at the end of the region */
pmap_bootstrap_l2_block(&bs_state, i);
} else {
while (bs_state.va < DMAP_MAX_ADDRESS &&
(physmap[i + 1] - bs_state.pa) >= L2_SIZE) {
pmap_bootstrap_l2_block(&bs_state, i);
}
}
MPASS(bs_state.pa <= physmap[i + 1]);
/* Create L3 mappings at the end of the region */
pmap_bootstrap_l3_page(&bs_state, i);
MPASS(bs_state.pa == physmap[i + 1]);
if (bs_state.pa > dmap_phys_max) {
dmap_phys_max = bs_state.pa;
dmap_max_addr = bs_state.va;
}
}
cpu_tlb_flushID();
bs_state.dmap_valid = true;
/* Exclude the kernel and DMAP region */
pa = pmap_early_vtophys(bs_state.freemempos);
physmem_exclude_region(start_pa, pa - start_pa, EXFLAG_NOALLOC);
}
static void
pmap_bootstrap_l2(vm_offset_t va)
{
KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address"));
/* Leave bs_state.pa as it's only needed to bootstrap blocks and pages*/
bs_state.va = va;
for (; bs_state.va < VM_MAX_KERNEL_ADDRESS; bs_state.va += L1_SIZE)
pmap_bootstrap_l1_table(&bs_state);
}
static void
pmap_bootstrap_l3(vm_offset_t va)
{
KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
/* Leave bs_state.pa as it's only needed to bootstrap blocks and pages*/
bs_state.va = va;
for (; bs_state.va < VM_MAX_KERNEL_ADDRESS; bs_state.va += L2_SIZE)
pmap_bootstrap_l2_table(&bs_state);
}
/*
* Bootstrap the system enough to run with virtual memory.
*/
void
pmap_bootstrap(void)
{
vm_offset_t dpcpu, msgbufpv;
vm_paddr_t start_pa, pa;
size_t largest_phys_size;
/* Set this early so we can use the pagetable walking functions */
kernel_pmap_store.pm_l0 = pagetable_l0_ttbr1;
PMAP_LOCK_INIT(kernel_pmap);
kernel_pmap->pm_l0_paddr =
pmap_early_vtophys((vm_offset_t)kernel_pmap_store.pm_l0);
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
vm_radix_init(&kernel_pmap->pm_root);
kernel_pmap->pm_cookie = COOKIE_FROM(-1, INT_MIN);
kernel_pmap->pm_stage = PM_STAGE1;
kernel_pmap->pm_levels = 4;
kernel_pmap->pm_ttbr = kernel_pmap->pm_l0_paddr;
kernel_pmap->pm_asid_set = &asids;
/* Reserve some VA space for early BIOS/ACPI mapping */
preinit_map_va = roundup2(bs_state.freemempos, L2_SIZE);
virtual_avail = preinit_map_va + PMAP_PREINIT_MAPPING_SIZE;
virtual_avail = roundup2(virtual_avail, L1_SIZE);
virtual_end = VM_MAX_KERNEL_ADDRESS - PMAP_MAPDEV_EARLY_SIZE;
kernel_vm_end = virtual_avail;
/*
* We only use PXN when we know nothing will be executed from it, e.g.
* the DMAP region.
*/
bs_state.table_attrs &= ~TATTR_PXN_TABLE;
/*
* Find the physical memory we could use. This needs to be after we
* exclude any memory that is mapped into the DMAP region but should
* not be used by the kernel, e.g. some UEFI memory types.
*/
physmap_idx = physmem_avail(physmap, nitems(physmap));
/*
* Find space for early allocations. We search for the largest
* region. This is because the user may choose a large msgbuf.
* This could be smarter, e.g. to allow multiple regions to be
* used & switch to the next when one is full.
*/
largest_phys_size = 0;
for (int i = 0; i < physmap_idx; i += 2) {
if ((physmap[i + 1] - physmap[i]) > largest_phys_size) {
largest_phys_size = physmap[i + 1] - physmap[i];
bs_state.freemempos = PHYS_TO_DMAP(physmap[i]);
}
}
start_pa = pmap_early_vtophys(bs_state.freemempos);
/*
* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS. We assume that the
* loader allocated the first and only l2 page table page used to map
* the kernel, preloaded files and module metadata.
*/
pmap_bootstrap_l2(KERNBASE + L1_SIZE);
/* And the l3 tables for the early devmap */
pmap_bootstrap_l3(VM_MAX_KERNEL_ADDRESS - (PMAP_MAPDEV_EARLY_SIZE));
cpu_tlb_flushID();
#define alloc_pages(var, np) \
(var) = bs_state.freemempos; \
bs_state.freemempos += (np * PAGE_SIZE); \
memset((char *)(var), 0, ((np) * PAGE_SIZE));
/* Allocate dynamic per-cpu area. */
alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
dpcpu_init((void *)dpcpu, 0);
/* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
msgbufp = (void *)msgbufpv;
pa = pmap_early_vtophys(bs_state.freemempos);
physmem_exclude_region(start_pa, pa - start_pa, EXFLAG_NOALLOC);
}
#if defined(KASAN) || defined(KMSAN)
static void
pmap_bootstrap_allocate_san_l2(vm_paddr_t start_pa, vm_paddr_t end_pa,
vm_offset_t *vap, vm_offset_t eva)
{
vm_paddr_t pa;
vm_offset_t va;
pd_entry_t *l2;
va = *vap;
pa = rounddown2(end_pa - L2_SIZE, L2_SIZE);
for (; pa >= start_pa && va < eva; va += L2_SIZE, pa -= L2_SIZE) {
l2 = pmap_l2(kernel_pmap, va);
/*
* KASAN stack checking results in us having already allocated
* part of our shadow map, so we can just skip those segments.
*/
if ((pmap_load(l2) & ATTR_DESCR_VALID) != 0) {
pa += L2_SIZE;
continue;
}
bzero((void *)PHYS_TO_DMAP(pa), L2_SIZE);
physmem_exclude_region(pa, L2_SIZE, EXFLAG_NOALLOC);
pmap_store(l2, PHYS_TO_PTE(pa) | PMAP_SAN_PTE_BITS | L2_BLOCK);
}
*vap = va;
}
/*
* Finish constructing the initial shadow map:
* - Count how many pages from KERNBASE to virtual_avail (scaled for
* shadow map)
* - Map that entire range using L2 superpages.
*/
static void
pmap_bootstrap_san1(vm_offset_t va, int scale)
{
vm_offset_t eva;
vm_paddr_t kernstart;
int i;
kernstart = pmap_early_vtophys(KERNBASE);
/*
* Rebuild physmap one more time, we may have excluded more regions from
* allocation since pmap_bootstrap().
*/
physmap_idx = physmem_avail(physmap, nitems(physmap));
eva = va + (virtual_avail - VM_MIN_KERNEL_ADDRESS) / scale;
/*
* Find a slot in the physmap large enough for what we needed. We try to put
* the shadow map as high up as we can to avoid depleting the lower 4GB in case
* it's needed for, e.g., an xhci controller that can only do 32-bit DMA.
*/
for (i = physmap_idx - 2; i >= 0; i -= 2) {
vm_paddr_t plow, phigh;
/* L2 mappings must be backed by memory that is L2-aligned */
plow = roundup2(physmap[i], L2_SIZE);
phigh = physmap[i + 1];
if (plow >= phigh)
continue;
if (kernstart >= plow && kernstart < phigh)
phigh = kernstart;
if (phigh - plow >= L2_SIZE) {
pmap_bootstrap_allocate_san_l2(plow, phigh, &va, eva);
if (va >= eva)
break;
}
}
if (i < 0)
panic("Could not find phys region for shadow map");
/*
* Done. We should now have a valid shadow address mapped for all KVA
* that has been mapped so far, i.e., KERNBASE to virtual_avail. Thus,
* shadow accesses by the sanitizer runtime will succeed for this range.
* When the kernel virtual address range is later expanded, as will
* happen in vm_mem_init(), the shadow map will be grown as well. This
* is handled by pmap_san_enter().
*/
}
void
pmap_bootstrap_san(void)
{
#ifdef KASAN
pmap_bootstrap_san1(KASAN_MIN_ADDRESS, KASAN_SHADOW_SCALE);
#else
static uint8_t kmsan_shad_ptp[PAGE_SIZE * 2] __aligned(PAGE_SIZE);
static uint8_t kmsan_orig_ptp[PAGE_SIZE * 2] __aligned(PAGE_SIZE);
pd_entry_t *l0, *l1;
if (virtual_avail - VM_MIN_KERNEL_ADDRESS > L1_SIZE)
panic("initial kernel map is too large");
l0 = pmap_l0(kernel_pmap, KMSAN_SHAD_MIN_ADDRESS);
pmap_store(l0, L0_TABLE | PHYS_TO_PTE(
pmap_early_vtophys((vm_offset_t)kmsan_shad_ptp)));
l1 = pmap_l0_to_l1(l0, KMSAN_SHAD_MIN_ADDRESS);
pmap_store(l1, L1_TABLE | PHYS_TO_PTE(
pmap_early_vtophys((vm_offset_t)kmsan_shad_ptp + PAGE_SIZE)));
pmap_bootstrap_san1(KMSAN_SHAD_MIN_ADDRESS, 1);
l0 = pmap_l0(kernel_pmap, KMSAN_ORIG_MIN_ADDRESS);
pmap_store(l0, L0_TABLE | PHYS_TO_PTE(
pmap_early_vtophys((vm_offset_t)kmsan_orig_ptp)));
l1 = pmap_l0_to_l1(l0, KMSAN_ORIG_MIN_ADDRESS);
pmap_store(l1, L1_TABLE | PHYS_TO_PTE(
pmap_early_vtophys((vm_offset_t)kmsan_orig_ptp + PAGE_SIZE)));
pmap_bootstrap_san1(KMSAN_ORIG_MIN_ADDRESS, 1);
#endif
}
#endif
/*
* Initialize a vm_page's machine-dependent fields.
*/
void
pmap_page_init(vm_page_t m)
{
TAILQ_INIT(&m->md.pv_list);
m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
}
static void
pmap_init_asids(struct asid_set *set, int bits)
{
int i;
set->asid_bits = bits;
/*
* We may be too early in the overall initialization process to use
* bit_alloc().
*/
set->asid_set_size = 1 << set->asid_bits;
set->asid_set = kmem_malloc(bitstr_size(set->asid_set_size),
M_WAITOK | M_ZERO);
for (i = 0; i < ASID_FIRST_AVAILABLE; i++)
bit_set(set->asid_set, i);
set->asid_next = ASID_FIRST_AVAILABLE;
mtx_init(&set->asid_set_mutex, "asid set", NULL, MTX_SPIN);
}
static void
pmap_init_pv_table(void)
{
struct vm_phys_seg *seg, *next_seg;
struct pmap_large_md_page *pvd;
vm_size_t s;
int domain, i, j, pages;
/*
* We depend on the size being evenly divisible into a page so
* that the pv_table array can be indexed directly while
* safely spanning multiple pages from different domains.
*/
CTASSERT(PAGE_SIZE % sizeof(*pvd) == 0);
/*
* Calculate the size of the array.
*/
s = 0;
for (i = 0; i < vm_phys_nsegs; i++) {
seg = &vm_phys_segs[i];
pages = pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) -
pmap_l2_pindex(seg->start);
s += round_page(pages * sizeof(*pvd));
}
pv_table = (struct pmap_large_md_page *)kva_alloc(s);
if (pv_table == NULL)
panic("%s: kva_alloc failed\n", __func__);
/*
* Iterate physical segments to allocate domain-local memory for PV
* list headers.
*/
pvd = pv_table;
for (i = 0; i < vm_phys_nsegs; i++) {
seg = &vm_phys_segs[i];
pages = pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) -
pmap_l2_pindex(seg->start);
domain = seg->domain;
s = round_page(pages * sizeof(*pvd));
for (j = 0; j < s; j += PAGE_SIZE) {
vm_page_t m = vm_page_alloc_noobj_domain(domain,
VM_ALLOC_ZERO);
if (m == NULL)
panic("failed to allocate PV table page");
pmap_qenter((vm_offset_t)pvd + j, &m, 1);
}
for (j = 0; j < s / sizeof(*pvd); j++) {
rw_init_flags(&pvd->pv_lock, "pmap pv list", RW_NEW);
TAILQ_INIT(&pvd->pv_page.pv_list);
pvd++;
}
}
pvd = &pv_dummy_large;
memset(pvd, 0, sizeof(*pvd));
rw_init_flags(&pvd->pv_lock, "pmap pv list dummy", RW_NEW);
TAILQ_INIT(&pvd->pv_page.pv_list);
/*
* Set pointers from vm_phys_segs to pv_table.
*/
for (i = 0, pvd = pv_table; i < vm_phys_nsegs; i++) {
seg = &vm_phys_segs[i];
seg->md_first = pvd;
pvd += pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) -
pmap_l2_pindex(seg->start);
/*
* If there is a following segment, and the final
* superpage of this segment and the initial superpage
* of the next segment are the same then adjust the
* pv_table entry for that next segment down by one so
* that the pv_table entries will be shared.
*/
if (i + 1 < vm_phys_nsegs) {
next_seg = &vm_phys_segs[i + 1];
if (pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) - 1 ==
pmap_l2_pindex(next_seg->start)) {
pvd--;
}
}
}
}
-static bool
+static cpu_feat_en
pmap_dbm_check(const struct cpu_feat *feat __unused, u_int midr __unused)
{
uint64_t id_aa64mmfr1;
id_aa64mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
- return (ID_AA64MMFR1_HAFDBS_VAL(id_aa64mmfr1) >=
- ID_AA64MMFR1_HAFDBS_AF_DBS);
+ if (ID_AA64MMFR1_HAFDBS_VAL(id_aa64mmfr1) >=
+ ID_AA64MMFR1_HAFDBS_AF_DBS)
+ return (FEAT_DEFAULT_ENABLE);
+
+ return (FEAT_ALWAYS_DISABLE);
}
static bool
pmap_dbm_has_errata(const struct cpu_feat *feat __unused, u_int midr,
u_int **errata_list, u_int *errata_count)
{
/* Disable on Cortex-A55 for erratum 1024718 - all revisions */
if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK, CPU_IMPL_ARM,
CPU_PART_CORTEX_A55, 0, 0)) {
static u_int errata_id = 1024718;
*errata_list = &errata_id;
*errata_count = 1;
return (true);
}
/* Disable on Cortex-A510 for erratum 2051678 - r0p0 to r0p2 */
if (CPU_MATCH(CPU_IMPL_MASK | CPU_PART_MASK | CPU_VAR_MASK,
CPU_IMPL_ARM, CPU_PART_CORTEX_A510, 0, 0)) {
if (CPU_REV(PCPU_GET(midr)) < 3) {
static u_int errata_id = 2051678;
*errata_list = &errata_id;
*errata_count = 1;
return (true);
}
}
return (false);
}
static bool
pmap_dbm_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status, u_int *errata_list __unused,
u_int errata_count)
{
uint64_t tcr;
/* Skip if there is an erratum affecting DBM */
if (errata_status != ERRATA_NONE)
return (false);
tcr = READ_SPECIALREG(tcr_el1) | TCR_HD;
WRITE_SPECIALREG(tcr_el1, tcr);
isb();
/* Flush the local TLB for the TCR_HD flag change */
dsb(nshst);
__asm __volatile("tlbi vmalle1");
dsb(nsh);
isb();
return (true);
}
CPU_FEAT(feat_hafdbs, "Hardware management of the Access flag and dirty state",
pmap_dbm_check, pmap_dbm_has_errata, pmap_dbm_enable,
CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
/*
* Initialize the pmap module.
*
* Called by vm_mem_init(), to initialize any structures that the pmap
* system needs to map virtual memory.
*/
void
pmap_init(void)
{
uint64_t mmfr1;
int i, vmid_bits;
/*
* Are large page mappings enabled?
*/
TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled);
if (superpages_enabled) {
KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
("pmap_init: can't assign to pagesizes[1]"));
pagesizes[1] = L3C_SIZE;
KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0,
("pmap_init: can't assign to pagesizes[2]"));
pagesizes[2] = L2_SIZE;
if (L1_BLOCKS_SUPPORTED) {
KASSERT(MAXPAGESIZES > 3 && pagesizes[3] == 0,
("pmap_init: can't assign to pagesizes[3]"));
pagesizes[3] = L1_SIZE;
}
}
/*
* Initialize the ASID allocator.
*/
pmap_init_asids(&asids,
(READ_SPECIALREG(tcr_el1) & TCR_ASID_16) != 0 ? 16 : 8);
if (has_hyp()) {
mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
vmid_bits = 8;
if (ID_AA64MMFR1_VMIDBits_VAL(mmfr1) ==
ID_AA64MMFR1_VMIDBits_16)
vmid_bits = 16;
pmap_init_asids(&vmids, vmid_bits);
}
/*
* Initialize pv chunk lists.
*/
for (i = 0; i < PMAP_MEMDOM; i++) {
mtx_init(&pv_chunks[i].pvc_lock, "pmap pv chunk list", NULL,
MTX_DEF);
TAILQ_INIT(&pv_chunks[i].pvc_list);
}
pmap_init_pv_table();
vm_initialized = 1;
}
static SYSCTL_NODE(_vm_pmap, OID_AUTO, l1, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"L1 (1GB/64GB) page mapping counters");
static COUNTER_U64_DEFINE_EARLY(pmap_l1_demotions);
SYSCTL_COUNTER_U64(_vm_pmap_l1, OID_AUTO, demotions, CTLFLAG_RD,
&pmap_l1_demotions, "L1 (1GB/64GB) page demotions");
SYSCTL_BOOL(_vm_pmap_l1, OID_AUTO, supported, CTLFLAG_RD, &pmap_l1_supported,
0, "L1 blocks are supported");
static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2c, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"L2C (32MB/1GB) page mapping counters");
static COUNTER_U64_DEFINE_EARLY(pmap_l2c_demotions);
SYSCTL_COUNTER_U64(_vm_pmap_l2c, OID_AUTO, demotions, CTLFLAG_RD,
&pmap_l2c_demotions, "L2C (32MB/1GB) page demotions");
static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"2MB page mapping counters");
static COUNTER_U64_DEFINE_EARLY(pmap_l2_demotions);
SYSCTL_COUNTER_U64(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD,
&pmap_l2_demotions, "L2 (2MB/32MB) page demotions");
static COUNTER_U64_DEFINE_EARLY(pmap_l2_mappings);
SYSCTL_COUNTER_U64(_vm_pmap_l2, OID_AUTO, mappings, CTLFLAG_RD,
&pmap_l2_mappings, "L2 (2MB/32MB) page mappings");
static COUNTER_U64_DEFINE_EARLY(pmap_l2_p_failures);
SYSCTL_COUNTER_U64(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD,
&pmap_l2_p_failures, "L2 (2MB/32MB) page promotion failures");
static COUNTER_U64_DEFINE_EARLY(pmap_l2_promotions);
SYSCTL_COUNTER_U64(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD,
&pmap_l2_promotions, "L2 (2MB/32MB) page promotions");
static SYSCTL_NODE(_vm_pmap, OID_AUTO, l3c, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"L3C (64KB/2MB) page mapping counters");
static COUNTER_U64_DEFINE_EARLY(pmap_l3c_demotions);
SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, demotions, CTLFLAG_RD,
&pmap_l3c_demotions, "L3C (64KB/2MB) page demotions");
static COUNTER_U64_DEFINE_EARLY(pmap_l3c_mappings);
SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, mappings, CTLFLAG_RD,
&pmap_l3c_mappings, "L3C (64KB/2MB) page mappings");
static COUNTER_U64_DEFINE_EARLY(pmap_l3c_p_failures);
SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, p_failures, CTLFLAG_RD,
&pmap_l3c_p_failures, "L3C (64KB/2MB) page promotion failures");
static COUNTER_U64_DEFINE_EARLY(pmap_l3c_promotions);
SYSCTL_COUNTER_U64(_vm_pmap_l3c, OID_AUTO, promotions, CTLFLAG_RD,
&pmap_l3c_promotions, "L3C (64KB/2MB) page promotions");
/*
* If the given value for "final_only" is false, then any cached intermediate-
* level entries, i.e., L{0,1,2}_TABLE entries, are invalidated in addition to
* any cached final-level entry, i.e., either an L{1,2}_BLOCK or L3_PAGE entry.
* Otherwise, just the cached final-level entry is invalidated.
*/
static __inline void
pmap_s1_invalidate_kernel(uint64_t r, bool final_only)
{
if (final_only)
__asm __volatile("tlbi vaale1is, %0" : : "r" (r));
else
__asm __volatile("tlbi vaae1is, %0" : : "r" (r));
}
static __inline void
pmap_s1_invalidate_user(uint64_t r, bool final_only)
{
if (final_only)
__asm __volatile("tlbi vale1is, %0" : : "r" (r));
else
__asm __volatile("tlbi vae1is, %0" : : "r" (r));
}
/*
* Invalidates any cached final- and optionally intermediate-level TLB entries
* for the specified virtual address in the given virtual address space.
*/
static __inline void
pmap_s1_invalidate_page(pmap_t pmap, vm_offset_t va, bool final_only)
{
uint64_t r;
PMAP_ASSERT_STAGE1(pmap);
dsb(ishst);
r = TLBI_VA(va);
if (pmap == kernel_pmap) {
pmap_s1_invalidate_kernel(r, final_only);
} else {
r |= ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
pmap_s1_invalidate_user(r, final_only);
}
dsb(ish);
isb();
}
static __inline void
pmap_s2_invalidate_page(pmap_t pmap, vm_offset_t va, bool final_only)
{
PMAP_ASSERT_STAGE2(pmap);
MPASS(pmap_stage2_invalidate_range != NULL);
pmap_stage2_invalidate_range(pmap_to_ttbr0(pmap), va, va + PAGE_SIZE,
final_only);
}
static __inline void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va, bool final_only)
{
if (pmap->pm_stage == PM_STAGE1)
pmap_s1_invalidate_page(pmap, va, final_only);
else
pmap_s2_invalidate_page(pmap, va, final_only);
}
/*
* Use stride L{1,2}_SIZE when invalidating the TLB entries for L{1,2}_BLOCK
* mappings. Otherwise, use stride L3_SIZE.
*/
static __inline void
pmap_s1_invalidate_strided(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
vm_offset_t stride, bool final_only)
{
uint64_t end, r, start;
PMAP_ASSERT_STAGE1(pmap);
dsb(ishst);
if (pmap == kernel_pmap) {
start = TLBI_VA(sva);
end = TLBI_VA(eva);
for (r = start; r < end; r += TLBI_VA(stride))
pmap_s1_invalidate_kernel(r, final_only);
} else {
start = end = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
start |= TLBI_VA(sva);
end |= TLBI_VA(eva);
for (r = start; r < end; r += TLBI_VA(stride))
pmap_s1_invalidate_user(r, final_only);
}
dsb(ish);
isb();
}
/*
* Invalidates any cached final- and optionally intermediate-level TLB entries
* for the specified virtual address range in the given virtual address space.
*/
static __inline void
pmap_s1_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
bool final_only)
{
pmap_s1_invalidate_strided(pmap, sva, eva, L3_SIZE, final_only);
}
static __inline void
pmap_s2_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
bool final_only)
{
PMAP_ASSERT_STAGE2(pmap);
MPASS(pmap_stage2_invalidate_range != NULL);
pmap_stage2_invalidate_range(pmap_to_ttbr0(pmap), sva, eva, final_only);
}
static __inline void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
bool final_only)
{
if (pmap->pm_stage == PM_STAGE1)
pmap_s1_invalidate_range(pmap, sva, eva, final_only);
else
pmap_s2_invalidate_range(pmap, sva, eva, final_only);
}
/*
* Invalidates all cached intermediate- and final-level TLB entries for the
* given virtual address space.
*/
static __inline void
pmap_s1_invalidate_all(pmap_t pmap)
{
uint64_t r;
PMAP_ASSERT_STAGE1(pmap);
dsb(ishst);
if (pmap == kernel_pmap) {
__asm __volatile("tlbi vmalle1is");
} else {
r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
__asm __volatile("tlbi aside1is, %0" : : "r" (r));
}
dsb(ish);
isb();
}
static __inline void
pmap_s2_invalidate_all(pmap_t pmap)
{
PMAP_ASSERT_STAGE2(pmap);
MPASS(pmap_stage2_invalidate_all != NULL);
pmap_stage2_invalidate_all(pmap_to_ttbr0(pmap));
}
static __inline void
pmap_invalidate_all(pmap_t pmap)
{
if (pmap->pm_stage == PM_STAGE1)
pmap_s1_invalidate_all(pmap);
else
pmap_s2_invalidate_all(pmap);
}
/*
* Routine: pmap_extract
* Function:
* Extract the physical page address associated
* with the given map/virtual_address pair.
*/
vm_paddr_t
pmap_extract(pmap_t pmap, vm_offset_t va)
{
pt_entry_t *pte, tpte;
vm_paddr_t pa;
int lvl;
pa = 0;
PMAP_LOCK(pmap);
/*
* Find the block or page map for this virtual address. pmap_pte
* will return either a valid block/page entry, or NULL.
*/
pte = pmap_pte(pmap, va, &lvl);
if (pte != NULL) {
tpte = pmap_load(pte);
pa = PTE_TO_PHYS(tpte);
switch(lvl) {
case 1:
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
("pmap_extract: Invalid L1 pte found: %lx",
tpte & ATTR_DESCR_MASK));
pa |= (va & L1_OFFSET);
break;
case 2:
KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
("pmap_extract: Invalid L2 pte found: %lx",
tpte & ATTR_DESCR_MASK));
pa |= (va & L2_OFFSET);
break;
case 3:
KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
("pmap_extract: Invalid L3 pte found: %lx",
tpte & ATTR_DESCR_MASK));
pa |= (va & L3_OFFSET);
break;
}
}
PMAP_UNLOCK(pmap);
return (pa);
}
/*
* Routine: pmap_extract_and_hold
* Function:
* Atomically extract and hold the physical page
* with the given pmap and virtual address pair
* if that mapping permits the given protection.
*/
vm_page_t
pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
{
pt_entry_t *pte, tpte;
vm_offset_t off;
vm_page_t m;
int lvl;
bool use;
m = NULL;
PMAP_LOCK(pmap);
pte = pmap_pte(pmap, va, &lvl);
if (pte != NULL) {
tpte = pmap_load(pte);
KASSERT(lvl > 0 && lvl <= 3,
("pmap_extract_and_hold: Invalid level %d", lvl));
/*
* Check that the pte is either a L3 page, or a L1 or L2 block
* entry. We can assume L1_BLOCK == L2_BLOCK.
*/
KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) ||
(lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK),
("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl,
tpte & ATTR_DESCR_MASK));
use = false;
if ((prot & VM_PROT_WRITE) == 0)
use = true;
else if (pmap->pm_stage == PM_STAGE1 &&
(tpte & ATTR_S1_AP_RW_BIT) == ATTR_S1_AP(ATTR_S1_AP_RW))
use = true;
else if (pmap->pm_stage == PM_STAGE2 &&
((tpte & ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE)) ==
ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE)))
use = true;
if (use) {
switch (lvl) {
case 1:
off = va & L1_OFFSET;
break;
case 2:
off = va & L2_OFFSET;
break;
case 3:
default:
off = 0;
}
m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(tpte) | off);
if (m != NULL && !vm_page_wire_mapped(m))
m = NULL;
}
}
PMAP_UNLOCK(pmap);
return (m);
}
/*
* Returns true if the entire kernel virtual address range is mapped
*/
static bool
pmap_kmapped_range(vm_offset_t sva, vm_size_t size)
{
pt_entry_t *pte, tpte;
vm_offset_t eva;
KASSERT(sva >= VM_MIN_KERNEL_ADDRESS,
("%s: Invalid virtual address: %lx", __func__, sva));
MPASS(size != 0);
eva = sva + size - 1;
KASSERT(eva > sva, ("%s: Size too large: sva %lx, size %lx", __func__,
sva, size));
while (sva <= eva) {
pte = pmap_l1(kernel_pmap, sva);
if (pte == NULL)
return (false);
tpte = pmap_load(pte);
if (tpte == 0)
return (false);
if ((tpte & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_BLOCK) {
sva = (sva & ~L1_OFFSET) + L1_SIZE;
continue;
}
pte = pmap_l1_to_l2(&tpte, sva);
tpte = pmap_load(pte);
if (tpte == 0)
return (false);
if ((tpte & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_BLOCK) {
sva = (sva & ~L2_OFFSET) + L2_SIZE;
continue;
}
pte = pmap_l2_to_l3(&tpte, sva);
tpte = pmap_load(pte);
if (tpte == 0)
return (false);
MPASS((tpte & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_PAGE);
if ((tpte & ATTR_CONTIGUOUS) == ATTR_CONTIGUOUS)
sva = (sva & ~L3C_OFFSET) + L3C_SIZE;
else
sva = (sva & ~L3_OFFSET) + L3_SIZE;
}
return (true);
}
/*
* Walks the page tables to translate a kernel virtual address to a
* physical address. Returns true if the kva is valid and stores the
* physical address in pa if it is not NULL.
*
* See the comment above data_abort() for the rationale for specifying
* NO_PERTHREAD_SSP here.
*/
bool NO_PERTHREAD_SSP
pmap_klookup(vm_offset_t va, vm_paddr_t *pa)
{
pt_entry_t *pte, tpte;
register_t intr;
uint64_t par;
/*
* Disable interrupts so we don't get interrupted between asking
* for address translation, and getting the result back.
*/
intr = intr_disable();
par = arm64_address_translate_s1e1r(va);
intr_restore(intr);
if (PAR_SUCCESS(par)) {
if (pa != NULL)
*pa = (par & PAR_PA_MASK) | (va & PAR_LOW_MASK);
return (true);
}
/*
* Fall back to walking the page table. The address translation
* instruction may fail when the page is in a break-before-make
* sequence. As we only clear the valid bit in said sequence we
* can walk the page table to find the physical address.
*/
pte = pmap_l1(kernel_pmap, va);
if (pte == NULL)
return (false);
/*
* A concurrent pmap_update_entry() will clear the entry's valid bit
* but leave the rest of the entry unchanged. Therefore, we treat a
* non-zero entry as being valid, and we ignore the valid bit when
* determining whether the entry maps a block, page, or table.
*/
tpte = pmap_load(pte);
if (tpte == 0)
return (false);
if ((tpte & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_BLOCK) {
if (pa != NULL)
*pa = PTE_TO_PHYS(tpte) | (va & L1_OFFSET);
return (true);
}
pte = pmap_l1_to_l2(&tpte, va);
tpte = pmap_load(pte);
if (tpte == 0)
return (false);
if ((tpte & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_BLOCK) {
if (pa != NULL)
*pa = PTE_TO_PHYS(tpte) | (va & L2_OFFSET);
return (true);
}
pte = pmap_l2_to_l3(&tpte, va);
tpte = pmap_load(pte);
if (tpte == 0)
return (false);
if (pa != NULL)
*pa = PTE_TO_PHYS(tpte) | (va & L3_OFFSET);
return (true);
}
/*
* Routine: pmap_kextract
* Function:
* Extract the physical page address associated with the given kernel
* virtual address.
*/
vm_paddr_t
pmap_kextract(vm_offset_t va)
{
vm_paddr_t pa;
if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS)
return (DMAP_TO_PHYS(va));
if (pmap_klookup(va, &pa) == false)
return (0);
return (pa);
}
/***************************************************
* Low level mapping routines.....
***************************************************/
void
pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode)
{
pd_entry_t *pde;
pt_entry_t attr, old_l3e, *pte;
vm_offset_t va;
vm_page_t mpte;
int error, lvl;
KASSERT((pa & L3_OFFSET) == 0,
("pmap_kenter: Invalid physical address"));
KASSERT((sva & L3_OFFSET) == 0,
("pmap_kenter: Invalid virtual address"));
KASSERT((size & PAGE_MASK) == 0,
("pmap_kenter: Mapping is not page-sized"));
attr = ATTR_AF | pmap_sh_attr | ATTR_S1_AP(ATTR_S1_AP_RW) |
ATTR_S1_XN | ATTR_KERN_GP | ATTR_S1_IDX(mode);
old_l3e = 0;
va = sva;
while (size != 0) {
pde = pmap_pde(kernel_pmap, va, &lvl);
KASSERT(pde != NULL,
("pmap_kenter: Invalid page entry, va: 0x%lx", va));
KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl));
/*
* If we have an aligned, contiguous chunk of L2_SIZE, try
* to create an L2_BLOCK mapping.
*/
if ((va & L2_OFFSET) == 0 && size >= L2_SIZE &&
(pa & L2_OFFSET) == 0 && vm_initialized) {
mpte = PTE_TO_VM_PAGE(pmap_load(pde));
KASSERT(pmap_every_pte_zero(VM_PAGE_TO_PHYS(mpte)),
("pmap_kenter: Unexpected mapping"));
PMAP_LOCK(kernel_pmap);
error = pmap_insert_pt_page(kernel_pmap, mpte, false,
false);
if (error == 0) {
attr &= ~ATTR_CONTIGUOUS;
/*
* Although the page table page "mpte" should
* be devoid of mappings, the TLB might hold
* intermediate entries that reference it, so
* we perform a single-page invalidation.
*/
pmap_update_entry(kernel_pmap, pde,
PHYS_TO_PTE(pa) | attr | L2_BLOCK, va,
PAGE_SIZE);
}
PMAP_UNLOCK(kernel_pmap);
if (error == 0) {
va += L2_SIZE;
pa += L2_SIZE;
size -= L2_SIZE;
continue;
}
}
/*
* If we have an aligned, contiguous chunk of L3C_ENTRIES
* L3 pages, set the contiguous bit within each PTE so that
* the chunk can be cached using only one TLB entry.
*/
if ((va & L3C_OFFSET) == 0 && (pa & L3C_OFFSET) == 0) {
if (size >= L3C_SIZE)
attr |= ATTR_CONTIGUOUS;
else
attr &= ~ATTR_CONTIGUOUS;
}
pte = pmap_l2_to_l3(pde, va);
old_l3e |= pmap_load_store(pte, PHYS_TO_PTE(pa) | attr |
L3_PAGE);
va += PAGE_SIZE;
pa += PAGE_SIZE;
size -= PAGE_SIZE;
}
if ((old_l3e & ATTR_DESCR_VALID) != 0)
pmap_s1_invalidate_range(kernel_pmap, sva, va, true);
else {
/*
* Because the old entries were invalid and the new mappings
* are not executable, an isb is not required.
*/
dsb(ishst);
}
}
void
pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
{
pmap_kenter(sva, size, pa, VM_MEMATTR_DEVICE);
}
/*
* Remove a page from the kernel pagetables.
*/
void
pmap_kremove(vm_offset_t va)
{
pt_entry_t *pte;
pte = pmap_pte_exists(kernel_pmap, va, 3, __func__);
KASSERT((pmap_load(pte) & ATTR_CONTIGUOUS) == 0,
("pmap_kremove: unexpected ATTR_CONTIGUOUS"));
pmap_clear(pte);
pmap_s1_invalidate_page(kernel_pmap, va, true);
}
/*
* Remove the specified range of mappings from the kernel address space.
*
* Should only be applied to mappings that were created by pmap_kenter() or
* pmap_kenter_device(). Nothing about this function is actually specific
* to device mappings.
*/
void
pmap_kremove_device(vm_offset_t sva, vm_size_t size)
{
pt_entry_t *ptep, *ptep_end;
vm_offset_t va;
int lvl;
KASSERT((sva & L3_OFFSET) == 0,
("pmap_kremove_device: Invalid virtual address"));
KASSERT((size & PAGE_MASK) == 0,
("pmap_kremove_device: Mapping is not page-sized"));
va = sva;
while (size != 0) {
ptep = pmap_pte(kernel_pmap, va, &lvl);
KASSERT(ptep != NULL, ("Invalid page table, va: 0x%lx", va));
switch (lvl) {
case 2:
KASSERT((va & L2_OFFSET) == 0,
("Unaligned virtual address"));
KASSERT(size >= L2_SIZE, ("Insufficient size"));
if (va != sva) {
pmap_s1_invalidate_range(kernel_pmap, sva, va,
true);
}
pmap_clear(ptep);
pmap_s1_invalidate_page(kernel_pmap, va, true);
PMAP_LOCK(kernel_pmap);
pmap_remove_kernel_l2(kernel_pmap, ptep, va);
PMAP_UNLOCK(kernel_pmap);
va += L2_SIZE;
sva = va;
size -= L2_SIZE;
break;
case 3:
if ((pmap_load(ptep) & ATTR_CONTIGUOUS) != 0) {
KASSERT((va & L3C_OFFSET) == 0,
("Unaligned L3C virtual address"));
KASSERT(size >= L3C_SIZE,
("Insufficient L3C size"));
ptep_end = ptep + L3C_ENTRIES;
for (; ptep < ptep_end; ptep++)
pmap_clear(ptep);
va += L3C_SIZE;
size -= L3C_SIZE;
break;
}
pmap_clear(ptep);
va += PAGE_SIZE;
size -= PAGE_SIZE;
break;
default:
__assert_unreachable();
break;
}
}
if (va != sva)
pmap_s1_invalidate_range(kernel_pmap, sva, va, true);
}
/*
* Used to map a range of physical addresses into kernel
* virtual address space.
*
* The value passed in '*virt' is a suggested virtual address for
* the mapping. Architectures which can support a direct-mapped
* physical to virtual region can return the appropriate address
* within that region, leaving '*virt' unchanged. Other
* architectures should map the pages starting at '*virt' and
* update '*virt' with the first usable address after the mapped
* region.
*/
vm_offset_t
pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
{
return PHYS_TO_DMAP(start);
}
/*
* Add a list of wired pages to the kva
* this routine is only used for temporary
* kernel mappings that do not need to have
* page modification or references recorded.
* Note that old mappings are simply written
* over. The page *must* be wired.
* Note: SMP coherent. Uses a ranged shootdown IPI.
*/
void
pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
{
pd_entry_t *pde;
pt_entry_t attr, old_l3e, *pte;
vm_offset_t va;
vm_page_t m;
int i, lvl;
old_l3e = 0;
va = sva;
for (i = 0; i < count; i++) {
pde = pmap_pde(kernel_pmap, va, &lvl);
KASSERT(pde != NULL,
("pmap_qenter: Invalid page entry, va: 0x%lx", va));
KASSERT(lvl == 2,
("pmap_qenter: Invalid level %d", lvl));
m = ma[i];
attr = ATTR_AF | pmap_sh_attr |
ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_XN |
ATTR_KERN_GP | ATTR_S1_IDX(m->md.pv_memattr) | L3_PAGE;
pte = pmap_l2_to_l3(pde, va);
old_l3e |= pmap_load_store(pte, VM_PAGE_TO_PTE(m) | attr);
va += L3_SIZE;
}
if ((old_l3e & ATTR_DESCR_VALID) != 0)
pmap_s1_invalidate_range(kernel_pmap, sva, va, true);
else {
/*
* Because the old entries were invalid and the new mappings
* are not executable, an isb is not required.
*/
dsb(ishst);
}
}
/*
* This routine tears out page mappings from the
* kernel -- it is meant only for temporary mappings.
*/
void
pmap_qremove(vm_offset_t sva, int count)
{
pt_entry_t *pte;
vm_offset_t va;
KASSERT(ADDR_IS_CANONICAL(sva),
("%s: Address not in canonical form: %lx", __func__, sva));
KASSERT(ADDR_IS_KERNEL(sva), ("usermode va %lx", sva));
va = sva;
while (count-- > 0) {
pte = pmap_pte_exists(kernel_pmap, va, 3, NULL);
if (pte != NULL) {
pmap_clear(pte);
}
va += PAGE_SIZE;
}
pmap_s1_invalidate_range(kernel_pmap, sva, va, true);
}
/***************************************************
* Page table page management routines.....
***************************************************/
/*
* Schedule the specified unused page table page to be freed. Specifically,
* add the page to the specified list of pages that will be released to the
* physical memory manager after the TLB has been updated.
*/
static __inline void
pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, bool set_PG_ZERO)
{
if (set_PG_ZERO)
m->flags |= PG_ZERO;
else
m->flags &= ~PG_ZERO;
SLIST_INSERT_HEAD(free, m, plinks.s.ss);
}
/*
* Decrements a page table page's reference count, which is used to record the
* number of valid page table entries within the page. If the reference count
* drops to zero, then the page table page is unmapped. Returns true if the
* page table page was unmapped and false otherwise.
*/
static inline bool
pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
{
--m->ref_count;
if (m->ref_count == 0) {
_pmap_unwire_l3(pmap, va, m, free);
return (true);
} else
return (false);
}
static void
_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/*
* unmap the page table page
*/
if (m->pindex >= (NUL2E + NUL1E)) {
/* l1 page */
pd_entry_t *l0;
l0 = pmap_l0(pmap, va);
pmap_clear(l0);
} else if (m->pindex >= NUL2E) {
/* l2 page */
pd_entry_t *l1;
l1 = pmap_l1(pmap, va);
pmap_clear(l1);
} else {
/* l3 page */
pd_entry_t *l2;
l2 = pmap_l2(pmap, va);
pmap_clear(l2);
}
pmap_resident_count_dec(pmap, 1);
if (m->pindex < NUL2E) {
/* We just released an l3, unhold the matching l2 */
pd_entry_t *l1, tl1;
vm_page_t l2pg;
l1 = pmap_l1(pmap, va);
tl1 = pmap_load(l1);
l2pg = PTE_TO_VM_PAGE(tl1);
pmap_unwire_l3(pmap, va, l2pg, free);
} else if (m->pindex < (NUL2E + NUL1E)) {
/* We just released an l2, unhold the matching l1 */
pd_entry_t *l0, tl0;
vm_page_t l1pg;
l0 = pmap_l0(pmap, va);
tl0 = pmap_load(l0);
l1pg = PTE_TO_VM_PAGE(tl0);
pmap_unwire_l3(pmap, va, l1pg, free);
}
pmap_invalidate_page(pmap, va, false);
/*
* Put page on a list so that it is released after
* *ALL* TLB shootdown is done
*/
pmap_add_delayed_free_list(m, free, true);
}
/*
* After removing a page table entry, this routine is used to
* conditionally free the page, and manage the reference count.
*/
static int
pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
struct spglist *free)
{
vm_page_t mpte;
KASSERT(ADDR_IS_CANONICAL(va),
("%s: Address not in canonical form: %lx", __func__, va));
if (ADDR_IS_KERNEL(va))
return (0);
KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
mpte = PTE_TO_VM_PAGE(ptepde);
return (pmap_unwire_l3(pmap, va, mpte, free));
}
/*
* Release a page table page reference after a failed attempt to create a
* mapping.
*/
static void
pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
{
struct spglist free;
SLIST_INIT(&free);
if (pmap_unwire_l3(pmap, va, mpte, &free))
vm_page_free_pages_toq(&free, true);
}
void
pmap_pinit0(pmap_t pmap)
{
PMAP_LOCK_INIT(pmap);
bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
pmap->pm_l0_paddr = READ_SPECIALREG(ttbr0_el1);
pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
TAILQ_INIT(&pmap->pm_pvchunk);
vm_radix_init(&pmap->pm_root);
pmap->pm_cookie = COOKIE_FROM(ASID_RESERVED_FOR_PID_0, INT_MIN);
pmap->pm_stage = PM_STAGE1;
pmap->pm_levels = 4;
pmap->pm_ttbr = pmap->pm_l0_paddr;
pmap->pm_asid_set = &asids;
pmap->pm_bti = NULL;
PCPU_SET(curpmap, pmap);
}
int
pmap_pinit_stage(pmap_t pmap, enum pmap_stage stage, int levels)
{
vm_page_t m;
/*
* allocate the l0 page
*/
m = vm_page_alloc_noobj(VM_ALLOC_WAITOK | VM_ALLOC_WIRED |
VM_ALLOC_ZERO);
pmap->pm_l0_paddr = VM_PAGE_TO_PHYS(m);
pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
TAILQ_INIT(&pmap->pm_pvchunk);
vm_radix_init(&pmap->pm_root);
bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
pmap->pm_cookie = COOKIE_FROM(-1, INT_MAX);
MPASS(levels == 3 || levels == 4);
pmap->pm_levels = levels;
pmap->pm_stage = stage;
pmap->pm_bti = NULL;
switch (stage) {
case PM_STAGE1:
pmap->pm_asid_set = &asids;
if (pmap_bti_support) {
pmap->pm_bti = malloc(sizeof(struct rangeset), M_DEVBUF,
M_ZERO | M_WAITOK);
rangeset_init(pmap->pm_bti, bti_dup_range,
bti_free_range, pmap, M_NOWAIT);
}
break;
case PM_STAGE2:
pmap->pm_asid_set = &vmids;
break;
default:
panic("%s: Invalid pmap type %d", __func__, stage);
break;
}
/* XXX Temporarily disable deferred ASID allocation. */
pmap_alloc_asid(pmap);
/*
* Allocate the level 1 entry to use as the root. This will increase
* the refcount on the level 1 page so it won't be removed until
* pmap_release() is called.
*/
if (pmap->pm_levels == 3) {
PMAP_LOCK(pmap);
m = _pmap_alloc_l3(pmap, NUL2E + NUL1E, NULL);
PMAP_UNLOCK(pmap);
}
pmap->pm_ttbr = VM_PAGE_TO_PHYS(m);
return (1);
}
int
pmap_pinit(pmap_t pmap)
{
return (pmap_pinit_stage(pmap, PM_STAGE1, 4));
}
/*
* This routine is called if the desired page table page does not exist.
*
* If page table page allocation fails, this routine may sleep before
* returning NULL. It sleeps only if a lock pointer was given.
*
* Note: If a page allocation fails at page table level two or three,
* one or two pages may be held during the wait, only to be released
* afterwards. This conservative approach is easily argued to avoid
* race conditions.
*/
static vm_page_t
_pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
{
vm_page_t m, l1pg, l2pg;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/*
* Allocate a page table page.
*/
if ((m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
if (lockp != NULL) {
RELEASE_PV_LIST_LOCK(lockp);
PMAP_UNLOCK(pmap);
vm_wait(NULL);
PMAP_LOCK(pmap);
}
/*
* Indicate the need to retry. While waiting, the page table
* page may have been allocated.
*/
return (NULL);
}
m->pindex = ptepindex;
/*
* Because of AArch64's weak memory consistency model, we must have a
* barrier here to ensure that the stores for zeroing "m", whether by
* pmap_zero_page() or an earlier function, are visible before adding
* "m" to the page table. Otherwise, a page table walk by another
* processor's MMU could see the mapping to "m" and a stale, non-zero
* PTE within "m".
*/
dmb(ishst);
/*
* Map the pagetable page into the process address space, if
* it isn't already there.
*/
if (ptepindex >= (NUL2E + NUL1E)) {
pd_entry_t *l0p, l0e;
vm_pindex_t l0index;
l0index = ptepindex - (NUL2E + NUL1E);
l0p = &pmap->pm_l0[l0index];
KASSERT((pmap_load(l0p) & ATTR_DESCR_VALID) == 0,
("%s: L0 entry %#lx is valid", __func__, pmap_load(l0p)));
l0e = VM_PAGE_TO_PTE(m) | L0_TABLE;
/*
* Mark all kernel memory as not accessible from userspace
* and userspace memory as not executable from the kernel.
* This has been done for the bootstrap L0 entries in
* locore.S.
*/
if (pmap == kernel_pmap)
l0e |= TATTR_UXN_TABLE | TATTR_AP_TABLE_NO_EL0;
else
l0e |= TATTR_PXN_TABLE;
pmap_store(l0p, l0e);
} else if (ptepindex >= NUL2E) {
vm_pindex_t l0index, l1index;
pd_entry_t *l0, *l1;
pd_entry_t tl0;
l1index = ptepindex - NUL2E;
l0index = l1index >> Ln_ENTRIES_SHIFT;
l0 = &pmap->pm_l0[l0index];
tl0 = pmap_load(l0);
if (tl0 == 0) {
/* recurse for allocating page dir */
if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
lockp) == NULL) {
vm_page_unwire_noq(m);
vm_page_free_zero(m);
return (NULL);
}
} else {
l1pg = PTE_TO_VM_PAGE(tl0);
l1pg->ref_count++;
}
l1 = (pd_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l0)));
l1 = &l1[ptepindex & Ln_ADDR_MASK];
KASSERT((pmap_load(l1) & ATTR_DESCR_VALID) == 0,
("%s: L1 entry %#lx is valid", __func__, pmap_load(l1)));
pmap_store(l1, VM_PAGE_TO_PTE(m) | L1_TABLE);
} else {
vm_pindex_t l0index, l1index;
pd_entry_t *l0, *l1, *l2;
pd_entry_t tl0, tl1;
l1index = ptepindex >> Ln_ENTRIES_SHIFT;
l0index = l1index >> Ln_ENTRIES_SHIFT;
l0 = &pmap->pm_l0[l0index];
tl0 = pmap_load(l0);
if (tl0 == 0) {
/* recurse for allocating page dir */
if (_pmap_alloc_l3(pmap, NUL2E + l1index,
lockp) == NULL) {
vm_page_unwire_noq(m);
vm_page_free_zero(m);
return (NULL);
}
tl0 = pmap_load(l0);
l1 = (pd_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(tl0));
l1 = &l1[l1index & Ln_ADDR_MASK];
} else {
l1 = (pd_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(tl0));
l1 = &l1[l1index & Ln_ADDR_MASK];
tl1 = pmap_load(l1);
if (tl1 == 0) {
/* recurse for allocating page dir */
if (_pmap_alloc_l3(pmap, NUL2E + l1index,
lockp) == NULL) {
vm_page_unwire_noq(m);
vm_page_free_zero(m);
return (NULL);
}
} else {
l2pg = PTE_TO_VM_PAGE(tl1);
l2pg->ref_count++;
}
}
l2 = (pd_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l1)));
l2 = &l2[ptepindex & Ln_ADDR_MASK];
KASSERT((pmap_load(l2) & ATTR_DESCR_VALID) == 0,
("%s: L2 entry %#lx is valid", __func__, pmap_load(l2)));
pmap_store(l2, VM_PAGE_TO_PTE(m) | L2_TABLE);
}
pmap_resident_count_inc(pmap, 1);
return (m);
}
static pd_entry_t *
pmap_alloc_l2(pmap_t pmap, vm_offset_t va, vm_page_t *l2pgp,
struct rwlock **lockp)
{
pd_entry_t *l1, *l2;
vm_page_t l2pg;
vm_pindex_t l2pindex;
KASSERT(ADDR_IS_CANONICAL(va),
("%s: Address not in canonical form: %lx", __func__, va));
retry:
l1 = pmap_l1(pmap, va);
if (l1 != NULL && (pmap_load(l1) & ATTR_DESCR_MASK) == L1_TABLE) {
l2 = pmap_l1_to_l2(l1, va);
if (ADDR_IS_USER(va)) {
/* Add a reference to the L2 page. */
l2pg = PTE_TO_VM_PAGE(pmap_load(l1));
l2pg->ref_count++;
} else
l2pg = NULL;
} else if (ADDR_IS_USER(va)) {
/* Allocate a L2 page. */
l2pindex = pmap_l2_pindex(va) >> Ln_ENTRIES_SHIFT;
l2pg = _pmap_alloc_l3(pmap, NUL2E + l2pindex, lockp);
if (l2pg == NULL) {
if (lockp != NULL)
goto retry;
else
return (NULL);
}
l2 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(l2pg));
l2 = &l2[pmap_l2_index(va)];
} else
panic("pmap_alloc_l2: missing page table page for va %#lx",
va);
*l2pgp = l2pg;
return (l2);
}
static vm_page_t
pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
{
vm_pindex_t ptepindex;
pd_entry_t *pde, tpde;
#ifdef INVARIANTS
pt_entry_t *pte;
#endif
vm_page_t m;
int lvl;
/*
* Calculate pagetable page index
*/
ptepindex = pmap_l2_pindex(va);
retry:
/*
* Get the page directory entry
*/
pde = pmap_pde(pmap, va, &lvl);
/*
* If the page table page is mapped, we just increment the hold count,
* and activate it. If we get a level 2 pde it will point to a level 3
* table.
*/
switch (lvl) {
case -1:
break;
case 0:
#ifdef INVARIANTS
pte = pmap_l0_to_l1(pde, va);
KASSERT(pmap_load(pte) == 0,
("pmap_alloc_l3: TODO: l0 superpages"));
#endif
break;
case 1:
#ifdef INVARIANTS
pte = pmap_l1_to_l2(pde, va);
KASSERT(pmap_load(pte) == 0,
("pmap_alloc_l3: TODO: l1 superpages"));
#endif
break;
case 2:
tpde = pmap_load(pde);
if (tpde != 0) {
m = PTE_TO_VM_PAGE(tpde);
m->ref_count++;
return (m);
}
break;
default:
panic("pmap_alloc_l3: Invalid level %d", lvl);
}
/*
* Here if the pte page isn't mapped, or if it has been deallocated.
*/
m = _pmap_alloc_l3(pmap, ptepindex, lockp);
if (m == NULL && lockp != NULL)
goto retry;
return (m);
}
/***************************************************
* Pmap allocation/deallocation routines.
***************************************************/
/*
* Release any resources held by the given physical map.
* Called when a pmap initialized by pmap_pinit is being released.
* Should only be called if the map contains no valid mappings.
*/
void
pmap_release(pmap_t pmap)
{
bool rv __diagused;
struct spglist freelist;
struct asid_set *set;
vm_page_t m;
int asid;
if (pmap->pm_levels != 4) {
PMAP_ASSERT_STAGE2(pmap);
KASSERT(pmap->pm_stats.resident_count == 1,
("pmap_release: pmap resident count %ld != 0",
pmap->pm_stats.resident_count));
KASSERT((pmap->pm_l0[0] & ATTR_DESCR_VALID) == ATTR_DESCR_VALID,
("pmap_release: Invalid l0 entry: %lx", pmap->pm_l0[0]));
SLIST_INIT(&freelist);
m = PHYS_TO_VM_PAGE(pmap->pm_ttbr);
PMAP_LOCK(pmap);
rv = pmap_unwire_l3(pmap, 0, m, &freelist);
PMAP_UNLOCK(pmap);
MPASS(rv == true);
vm_page_free_pages_toq(&freelist, true);
}
KASSERT(pmap->pm_stats.resident_count == 0,
("pmap_release: pmap resident count %ld != 0",
pmap->pm_stats.resident_count));
KASSERT(vm_radix_is_empty(&pmap->pm_root),
("pmap_release: pmap has reserved page table page(s)"));
set = pmap->pm_asid_set;
KASSERT(set != NULL, ("%s: NULL asid set", __func__));
/*
* Allow the ASID to be reused. In stage 2 VMIDs we don't invalidate
* the entries when removing them so rely on a later tlb invalidation.
* this will happen when updating the VMID generation. Because of this
* we don't reuse VMIDs within a generation.
*/
if (pmap->pm_stage == PM_STAGE1) {
mtx_lock_spin(&set->asid_set_mutex);
if (COOKIE_TO_EPOCH(pmap->pm_cookie) == set->asid_epoch) {
asid = COOKIE_TO_ASID(pmap->pm_cookie);
KASSERT(asid >= ASID_FIRST_AVAILABLE &&
asid < set->asid_set_size,
("pmap_release: pmap cookie has out-of-range asid"));
bit_clear(set->asid_set, asid);
}
mtx_unlock_spin(&set->asid_set_mutex);
if (pmap->pm_bti != NULL) {
rangeset_fini(pmap->pm_bti);
free(pmap->pm_bti, M_DEVBUF);
}
}
m = PHYS_TO_VM_PAGE(pmap->pm_l0_paddr);
vm_page_unwire_noq(m);
vm_page_free_zero(m);
}
static int
kvm_size(SYSCTL_HANDLER_ARGS)
{
unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
return sysctl_handle_long(oidp, &ksize, 0, req);
}
SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
0, 0, kvm_size, "LU",
"Size of KVM");
static int
kvm_free(SYSCTL_HANDLER_ARGS)
{
unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
return sysctl_handle_long(oidp, &kfree, 0, req);
}
SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
0, 0, kvm_free, "LU",
"Amount of KVM free");
/*
* grow the number of kernel page table entries, if needed
*/
static int
pmap_growkernel_nopanic(vm_offset_t addr)
{
vm_page_t nkpg;
pd_entry_t *l0, *l1, *l2;
mtx_assert(&kernel_map->system_mtx, MA_OWNED);
addr = roundup2(addr, L2_SIZE);
if (addr - 1 >= vm_map_max(kernel_map))
addr = vm_map_max(kernel_map);
if (kernel_vm_end < addr) {
kasan_shadow_map(kernel_vm_end, addr - kernel_vm_end);
kmsan_shadow_map(kernel_vm_end, addr - kernel_vm_end);
}
while (kernel_vm_end < addr) {
l0 = pmap_l0(kernel_pmap, kernel_vm_end);
KASSERT(pmap_load(l0) != 0,
("pmap_growkernel: No level 0 kernel entry"));
l1 = pmap_l0_to_l1(l0, kernel_vm_end);
if (pmap_load(l1) == 0) {
/* We need a new PDP entry */
nkpg = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT |
VM_ALLOC_NOFREE | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
if (nkpg == NULL)
return (KERN_RESOURCE_SHORTAGE);
nkpg->pindex = pmap_l1_pindex(kernel_vm_end);
/* See the dmb() in _pmap_alloc_l3(). */
dmb(ishst);
pmap_store(l1, VM_PAGE_TO_PTE(nkpg) | L1_TABLE);
continue; /* try again */
}
l2 = pmap_l1_to_l2(l1, kernel_vm_end);
if (pmap_load(l2) != 0) {
kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
kernel_vm_end = vm_map_max(kernel_map);
break;
}
continue;
}
nkpg = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT |
VM_ALLOC_NOFREE | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
if (nkpg == NULL)
return (KERN_RESOURCE_SHORTAGE);
nkpg->pindex = pmap_l2_pindex(kernel_vm_end);
/* See the dmb() in _pmap_alloc_l3(). */
dmb(ishst);
pmap_store(l2, VM_PAGE_TO_PTE(nkpg) | L2_TABLE);
kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
kernel_vm_end = vm_map_max(kernel_map);
break;
}
}
return (KERN_SUCCESS);
}
int
pmap_growkernel(vm_offset_t addr)
{
int rv;
rv = pmap_growkernel_nopanic(addr);
if (rv != KERN_SUCCESS && pmap_growkernel_panic)
panic("pmap_growkernel: no memory to grow kernel");
return (rv);
}
/***************************************************
* page management routines.
***************************************************/
static const uint64_t pc_freemask[_NPCM] = {
[0 ... _NPCM - 2] = PC_FREEN,
[_NPCM - 1] = PC_FREEL
};
#ifdef PV_STATS
static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
"Current number of pv entry chunks");
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
"Current number of pv entry chunks allocated");
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
"Current number of pv entry chunks frees");
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
"Number of times tried to get a chunk page but failed.");
static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
static int pv_entry_spare;
SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
"Current number of pv entry frees");
SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
"Current number of pv entry allocs");
SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
"Current number of pv entries");
SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
"Current number of spare pv entries");
#endif
/*
* We are in a serious low memory condition. Resort to
* drastic measures to free some pages so we can allocate
* another pv entry chunk.
*
* Returns NULL if PV entries were reclaimed from the specified pmap.
*
* We do not, however, unmap 2mpages because subsequent accesses will
* allocate per-page pv entries until repromotion occurs, thereby
* exacerbating the shortage of free pv entries.
*/
static vm_page_t
reclaim_pv_chunk_domain(pmap_t locked_pmap, struct rwlock **lockp, int domain)
{
struct pv_chunks_list *pvc;
struct pv_chunk *pc, *pc_marker, *pc_marker_end;
struct pv_chunk_header pc_marker_b, pc_marker_end_b;
struct md_page *pvh;
pd_entry_t *pde;
pmap_t next_pmap, pmap;
pt_entry_t *pte, tpte;
pv_entry_t pv;
vm_offset_t va;
vm_page_t m, m_pc;
struct spglist free;
uint64_t inuse;
int bit, field, freed, lvl;
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
pmap = NULL;
m_pc = NULL;
SLIST_INIT(&free);
bzero(&pc_marker_b, sizeof(pc_marker_b));
bzero(&pc_marker_end_b, sizeof(pc_marker_end_b));
pc_marker = (struct pv_chunk *)&pc_marker_b;
pc_marker_end = (struct pv_chunk *)&pc_marker_end_b;
pvc = &pv_chunks[domain];
mtx_lock(&pvc->pvc_lock);
pvc->active_reclaims++;
TAILQ_INSERT_HEAD(&pvc->pvc_list, pc_marker, pc_lru);
TAILQ_INSERT_TAIL(&pvc->pvc_list, pc_marker_end, pc_lru);
while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end &&
SLIST_EMPTY(&free)) {
next_pmap = pc->pc_pmap;
if (next_pmap == NULL) {
/*
* The next chunk is a marker. However, it is
* not our marker, so active_reclaims must be
* > 1. Consequently, the next_chunk code
* will not rotate the pv_chunks list.
*/
goto next_chunk;
}
mtx_unlock(&pvc->pvc_lock);
/*
* A pv_chunk can only be removed from the pc_lru list
* when both pvc->pvc_lock is owned and the
* corresponding pmap is locked.
*/
if (pmap != next_pmap) {
if (pmap != NULL && pmap != locked_pmap)
PMAP_UNLOCK(pmap);
pmap = next_pmap;
/* Avoid deadlock and lock recursion. */
if (pmap > locked_pmap) {
RELEASE_PV_LIST_LOCK(lockp);
PMAP_LOCK(pmap);
mtx_lock(&pvc->pvc_lock);
continue;
} else if (pmap != locked_pmap) {
if (PMAP_TRYLOCK(pmap)) {
mtx_lock(&pvc->pvc_lock);
continue;
} else {
pmap = NULL; /* pmap is not locked */
mtx_lock(&pvc->pvc_lock);
pc = TAILQ_NEXT(pc_marker, pc_lru);
if (pc == NULL ||
pc->pc_pmap != next_pmap)
continue;
goto next_chunk;
}
}
}
/*
* Destroy every non-wired, 4 KB page mapping in the chunk.
*/
freed = 0;
for (field = 0; field < _NPCM; field++) {
for (inuse = ~pc->pc_map[field] & pc_freemask[field];
inuse != 0; inuse &= ~(1UL << bit)) {
bit = ffsl(inuse) - 1;
pv = &pc->pc_pventry[field * 64 + bit];
va = pv->pv_va;
pde = pmap_pde(pmap, va, &lvl);
if (lvl != 2)
continue;
pte = pmap_l2_to_l3(pde, va);
tpte = pmap_load(pte);
if ((tpte & ATTR_SW_WIRED) != 0)
continue;
if ((tpte & ATTR_CONTIGUOUS) != 0)
(void)pmap_demote_l3c(pmap, pte, va);
tpte = pmap_load_clear(pte);
m = PTE_TO_VM_PAGE(tpte);
if (pmap_pte_dirty(pmap, tpte))
vm_page_dirty(m);
if ((tpte & ATTR_AF) != 0) {
pmap_s1_invalidate_page(pmap, va, true);
vm_page_aflag_set(m, PGA_REFERENCED);
}
CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
if (TAILQ_EMPTY(&m->md.pv_list) &&
(m->flags & PG_FICTITIOUS) == 0) {
pvh = page_to_pvh(m);
if (TAILQ_EMPTY(&pvh->pv_list)) {
vm_page_aflag_clear(m,
PGA_WRITEABLE);
}
}
pc->pc_map[field] |= 1UL << bit;
pmap_unuse_pt(pmap, va, pmap_load(pde), &free);
freed++;
}
}
if (freed == 0) {
mtx_lock(&pvc->pvc_lock);
goto next_chunk;
}
/* Every freed mapping is for a 4 KB page. */
pmap_resident_count_dec(pmap, freed);
PV_STAT(atomic_add_long(&pv_entry_frees, freed));
PV_STAT(atomic_add_int(&pv_entry_spare, freed));
PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
if (pc_is_free(pc)) {
PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
/* Entire chunk is free; return it. */
m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
dump_drop_page(m_pc->phys_addr);
mtx_lock(&pvc->pvc_lock);
TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
break;
}
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
mtx_lock(&pvc->pvc_lock);
/* One freed pv entry in locked_pmap is sufficient. */
if (pmap == locked_pmap)
break;
next_chunk:
TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru);
TAILQ_INSERT_AFTER(&pvc->pvc_list, pc, pc_marker, pc_lru);
if (pvc->active_reclaims == 1 && pmap != NULL) {
/*
* Rotate the pv chunks list so that we do not
* scan the same pv chunks that could not be
* freed (because they contained a wired
* and/or superpage mapping) on every
* invocation of reclaim_pv_chunk().
*/
while ((pc = TAILQ_FIRST(&pvc->pvc_list)) != pc_marker){
MPASS(pc->pc_pmap != NULL);
TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru);
}
}
}
TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru);
TAILQ_REMOVE(&pvc->pvc_list, pc_marker_end, pc_lru);
pvc->active_reclaims--;
mtx_unlock(&pvc->pvc_lock);
if (pmap != NULL && pmap != locked_pmap)
PMAP_UNLOCK(pmap);
if (m_pc == NULL && !SLIST_EMPTY(&free)) {
m_pc = SLIST_FIRST(&free);
SLIST_REMOVE_HEAD(&free, plinks.s.ss);
/* Recycle a freed page table page. */
m_pc->ref_count = 1;
}
vm_page_free_pages_toq(&free, true);
return (m_pc);
}
static vm_page_t
reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
{
vm_page_t m;
int i, domain;
domain = PCPU_GET(domain);
for (i = 0; i < vm_ndomains; i++) {
m = reclaim_pv_chunk_domain(locked_pmap, lockp, domain);
if (m != NULL)
break;
domain = (domain + 1) % vm_ndomains;
}
return (m);
}
/*
* free the pv_entry back to the free list
*/
static void
free_pv_entry(pmap_t pmap, pv_entry_t pv)
{
struct pv_chunk *pc;
int idx, field, bit;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PV_STAT(atomic_add_long(&pv_entry_frees, 1));
PV_STAT(atomic_add_int(&pv_entry_spare, 1));
PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
pc = pv_to_chunk(pv);
idx = pv - &pc->pc_pventry[0];
field = idx / 64;
bit = idx % 64;
pc->pc_map[field] |= 1ul << bit;
if (!pc_is_free(pc)) {
/* 98% of the time, pc is already at the head of the list. */
if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
}
return;
}
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
free_pv_chunk(pc);
}
static void
free_pv_chunk_dequeued(struct pv_chunk *pc)
{
vm_page_t m;
PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
/* entire chunk is free, return it */
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
dump_drop_page(m->phys_addr);
vm_page_unwire_noq(m);
vm_page_free(m);
}
static void
free_pv_chunk(struct pv_chunk *pc)
{
struct pv_chunks_list *pvc;
pvc = &pv_chunks[pc_to_domain(pc)];
mtx_lock(&pvc->pvc_lock);
TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
mtx_unlock(&pvc->pvc_lock);
free_pv_chunk_dequeued(pc);
}
static void
free_pv_chunk_batch(struct pv_chunklist *batch)
{
struct pv_chunks_list *pvc;
struct pv_chunk *pc, *npc;
int i;
for (i = 0; i < vm_ndomains; i++) {
if (TAILQ_EMPTY(&batch[i]))
continue;
pvc = &pv_chunks[i];
mtx_lock(&pvc->pvc_lock);
TAILQ_FOREACH(pc, &batch[i], pc_list) {
TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
}
mtx_unlock(&pvc->pvc_lock);
}
for (i = 0; i < vm_ndomains; i++) {
TAILQ_FOREACH_SAFE(pc, &batch[i], pc_list, npc) {
free_pv_chunk_dequeued(pc);
}
}
}
/*
* Returns a new PV entry, allocating a new PV chunk from the system when
* needed. If this PV chunk allocation fails and a PV list lock pointer was
* given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is
* returned.
*
* The given PV list lock may be released.
*/
static pv_entry_t
get_pv_entry(pmap_t pmap, struct rwlock **lockp)
{
struct pv_chunks_list *pvc;
int bit, field;
pv_entry_t pv;
struct pv_chunk *pc;
vm_page_t m;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
retry:
pc = TAILQ_FIRST(&pmap->pm_pvchunk);
if (pc != NULL) {
for (field = 0; field < _NPCM; field++) {
if (pc->pc_map[field]) {
bit = ffsl(pc->pc_map[field]) - 1;
break;
}
}
if (field < _NPCM) {
pv = &pc->pc_pventry[field * 64 + bit];
pc->pc_map[field] &= ~(1ul << bit);
/* If this was the last item, move it to tail */
if (pc_is_full(pc)) {
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
pc_list);
}
PV_STAT(atomic_add_long(&pv_entry_count, 1));
PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
return (pv);
}
}
/* No free items, allocate another chunk */
m = vm_page_alloc_noobj(VM_ALLOC_WIRED);
if (m == NULL) {
if (lockp == NULL) {
PV_STAT(pc_chunk_tryfail++);
return (NULL);
}
m = reclaim_pv_chunk(pmap, lockp);
if (m == NULL)
goto retry;
}
PV_STAT(atomic_add_int(&pc_chunk_count, 1));
PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
dump_add_page(m->phys_addr);
pc = (void *)PHYS_TO_DMAP(m->phys_addr);
pc->pc_pmap = pmap;
memcpy(pc->pc_map, pc_freemask, sizeof(pc_freemask));
pc->pc_map[0] &= ~1ul; /* preallocated bit 0 */
pvc = &pv_chunks[vm_page_domain(m)];
mtx_lock(&pvc->pvc_lock);
TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru);
mtx_unlock(&pvc->pvc_lock);
pv = &pc->pc_pventry[0];
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
PV_STAT(atomic_add_long(&pv_entry_count, 1));
PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
return (pv);
}
/*
* Ensure that the number of spare PV entries in the specified pmap meets or
* exceeds the given count, "needed".
*
* The given PV list lock may be released.
*/
static void
reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
{
struct pv_chunks_list *pvc;
struct pch new_tail[PMAP_MEMDOM];
struct pv_chunk *pc;
vm_page_t m;
int avail, free, i;
bool reclaimed;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
/*
* Newly allocated PV chunks must be stored in a private list until
* the required number of PV chunks have been allocated. Otherwise,
* reclaim_pv_chunk() could recycle one of these chunks. In
* contrast, these chunks must be added to the pmap upon allocation.
*/
for (i = 0; i < PMAP_MEMDOM; i++)
TAILQ_INIT(&new_tail[i]);
retry:
avail = 0;
TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
bit_count((bitstr_t *)pc->pc_map, 0,
sizeof(pc->pc_map) * NBBY, &free);
if (free == 0)
break;
avail += free;
if (avail >= needed)
break;
}
for (reclaimed = false; avail < needed; avail += _NPCPV) {
m = vm_page_alloc_noobj(VM_ALLOC_WIRED);
if (m == NULL) {
m = reclaim_pv_chunk(pmap, lockp);
if (m == NULL)
goto retry;
reclaimed = true;
}
PV_STAT(atomic_add_int(&pc_chunk_count, 1));
PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
dump_add_page(m->phys_addr);
pc = (void *)PHYS_TO_DMAP(m->phys_addr);
pc->pc_pmap = pmap;
memcpy(pc->pc_map, pc_freemask, sizeof(pc_freemask));
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
TAILQ_INSERT_TAIL(&new_tail[vm_page_domain(m)], pc, pc_lru);
PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
/*
* The reclaim might have freed a chunk from the current pmap.
* If that chunk contained available entries, we need to
* re-count the number of available entries.
*/
if (reclaimed)
goto retry;
}
for (i = 0; i < vm_ndomains; i++) {
if (TAILQ_EMPTY(&new_tail[i]))
continue;
pvc = &pv_chunks[i];
mtx_lock(&pvc->pvc_lock);
TAILQ_CONCAT(&pvc->pvc_list, &new_tail[i], pc_lru);
mtx_unlock(&pvc->pvc_lock);
}
}
/*
* First find and then remove the pv entry for the specified pmap and virtual
* address from the specified pv list. Returns the pv entry if found and NULL
* otherwise. This operation can be performed on pv lists for either 4KB or
* 2MB page mappings.
*/
static __inline pv_entry_t
pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
{
pv_entry_t pv;
TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
pvh->pv_gen++;
break;
}
}
return (pv);
}
/*
* After demotion from a 2MB page mapping to 512 4KB page mappings,
* destroy the pv entry for the 2MB page mapping and reinstantiate the pv
* entries for each of the 4KB page mappings.
*/
static void
pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
struct rwlock **lockp)
{
struct md_page *pvh;
struct pv_chunk *pc;
pv_entry_t pv;
vm_offset_t va_last;
vm_page_t m;
int bit, field;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((va & L2_OFFSET) == 0,
("pmap_pv_demote_l2: va is not 2mpage aligned"));
KASSERT((pa & L2_OFFSET) == 0,
("pmap_pv_demote_l2: pa is not 2mpage aligned"));
CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
/*
* Transfer the 2mpage's pv entry for this mapping to the first
* page's pv list. Once this transfer begins, the pv list lock
* must not be released until the last pv entry is reinstantiated.
*/
pvh = pa_to_pvh(pa);
pv = pmap_pvh_remove(pvh, pmap, va);
KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found"));
m = PHYS_TO_VM_PAGE(pa);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
/* Instantiate the remaining Ln_ENTRIES - 1 pv entries. */
PV_STAT(atomic_add_long(&pv_entry_allocs, Ln_ENTRIES - 1));
va_last = va + L2_SIZE - PAGE_SIZE;
for (;;) {
pc = TAILQ_FIRST(&pmap->pm_pvchunk);
KASSERT(!pc_is_full(pc), ("pmap_pv_demote_l2: missing spare"));
for (field = 0; field < _NPCM; field++) {
while (pc->pc_map[field]) {
bit = ffsl(pc->pc_map[field]) - 1;
pc->pc_map[field] &= ~(1ul << bit);
pv = &pc->pc_pventry[field * 64 + bit];
va += PAGE_SIZE;
pv->pv_va = va;
m++;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_pv_demote_l2: page %p is not managed", m));
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
if (va == va_last)
goto out;
}
}
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
}
out:
if (pc_is_full(pc)) {
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
}
PV_STAT(atomic_add_long(&pv_entry_count, Ln_ENTRIES - 1));
PV_STAT(atomic_subtract_int(&pv_entry_spare, Ln_ENTRIES - 1));
}
/*
* First find and then destroy the pv entry for the specified pmap and virtual
* address. This operation can be performed on pv lists for either 4KB or 2MB
* page mappings.
*/
static void
pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
{
pv_entry_t pv;
pv = pmap_pvh_remove(pvh, pmap, va);
KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
free_pv_entry(pmap, pv);
}
/*
* Conditionally create the PV entry for a 4KB page mapping if the required
* memory can be allocated without resorting to reclamation.
*/
static bool
pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
struct rwlock **lockp)
{
pv_entry_t pv;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/* Pass NULL instead of the lock pointer to disable reclamation. */
if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
pv->pv_va = va;
CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
return (true);
} else
return (false);
}
/*
* Create the PV entry for a 2MB page mapping. Always returns true unless the
* flag PMAP_ENTER_NORECLAIM is specified. If that flag is specified, returns
* false if the PV entry cannot be allocated without resorting to reclamation.
*/
static bool
pmap_pv_insert_l2(pmap_t pmap, vm_offset_t va, pd_entry_t l2e, u_int flags,
struct rwlock **lockp)
{
struct md_page *pvh;
pv_entry_t pv;
vm_paddr_t pa;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/* Pass NULL instead of the lock pointer to disable reclamation. */
if ((pv = get_pv_entry(pmap, (flags & PMAP_ENTER_NORECLAIM) != 0 ?
NULL : lockp)) == NULL)
return (false);
pv->pv_va = va;
pa = PTE_TO_PHYS(l2e);
CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
pvh = pa_to_pvh(pa);
TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
pvh->pv_gen++;
return (true);
}
/*
* Conditionally creates the PV entries for a L3C superpage mapping if
* the required memory can be allocated without resorting to reclamation.
*/
static bool
pmap_pv_insert_l3c(pmap_t pmap, vm_offset_t va, vm_page_t m,
struct rwlock **lockp)
{
pv_entry_t pv;
vm_offset_t tva;
vm_paddr_t pa __diagused;
vm_page_t mt;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((va & L3C_OFFSET) == 0,
("pmap_pv_insert_l3c: va is not aligned"));
pa = VM_PAGE_TO_PHYS(m);
KASSERT((pa & L3C_OFFSET) == 0,
("pmap_pv_insert_l3c: pa is not aligned"));
CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
for (mt = m, tva = va; mt < &m[L3C_ENTRIES]; mt++, tva += L3_SIZE) {
/* Pass NULL instead of lockp to disable reclamation. */
pv = get_pv_entry(pmap, NULL);
if (__predict_false(pv == NULL)) {
while (tva > va) {
mt--;
tva -= L3_SIZE;
pmap_pvh_free(&mt->md, pmap, tva);
}
return (false);
}
pv->pv_va = tva;
TAILQ_INSERT_TAIL(&mt->md.pv_list, pv, pv_next);
mt->md.pv_gen++;
}
return (true);
}
static void
pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
{
pt_entry_t newl2, oldl2 __diagused;
vm_page_t ml3;
vm_paddr_t ml3pa;
KASSERT(!VIRT_IN_DMAP(va), ("removing direct mapping of %#lx", va));
KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap));
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
ml3 = pmap_remove_pt_page(pmap, va);
KASSERT(ml3 != NULL, ("pmap_remove_kernel_l2: missing pt page"));
ml3pa = VM_PAGE_TO_PHYS(ml3);
newl2 = PHYS_TO_PTE(ml3pa) | L2_TABLE;
/*
* If this page table page was unmapped by a promotion, then it
* contains valid mappings. Zero it to invalidate those mappings.
*/
if (vm_page_any_valid(ml3))
pagezero((void *)PHYS_TO_DMAP(ml3pa));
/*
* Demote the mapping. The caller must have already invalidated the
* mapping (i.e., the "break" in break-before-make).
*/
oldl2 = pmap_load_store(l2, newl2);
KASSERT(oldl2 == 0, ("%s: found existing mapping at %p: %#lx",
__func__, l2, oldl2));
}
/*
* pmap_remove_l2: Do the things to unmap a level 2 superpage.
*/
static int
pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pd_entry_t l1e,
bool demote_kl2e, struct spglist *free, struct rwlock **lockp)
{
struct md_page *pvh;
pt_entry_t old_l2;
vm_page_t m, ml3, mt;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((sva & L2_OFFSET) == 0, ("pmap_remove_l2: sva is not aligned"));
old_l2 = pmap_load_clear(l2);
KASSERT((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK,
("pmap_remove_l2: L2e %lx is not a block mapping", old_l2));
/*
* Since a promotion must break the 4KB page mappings before making
* the 2MB page mapping, a pmap_s1_invalidate_page() suffices.
*/
pmap_s1_invalidate_page(pmap, sva, true);
if (old_l2 & ATTR_SW_WIRED)
pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE;
pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE);
if (old_l2 & ATTR_SW_MANAGED) {
m = PTE_TO_VM_PAGE(old_l2);
pvh = page_to_pvh(m);
CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
pmap_pvh_free(pvh, pmap, sva);
for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) {
if (pmap_pte_dirty(pmap, old_l2))
vm_page_dirty(mt);
if (old_l2 & ATTR_AF)
vm_page_aflag_set(mt, PGA_REFERENCED);
if (TAILQ_EMPTY(&mt->md.pv_list) &&
TAILQ_EMPTY(&pvh->pv_list))
vm_page_aflag_clear(mt, PGA_WRITEABLE);
}
}
if (pmap != kernel_pmap) {
ml3 = pmap_remove_pt_page(pmap, sva);
if (ml3 != NULL) {
KASSERT(vm_page_any_valid(ml3),
("pmap_remove_l2: l3 page not promoted"));
pmap_resident_count_dec(pmap, 1);
KASSERT(ml3->ref_count == NL3PG,
("pmap_remove_l2: l3 page ref count error"));
ml3->ref_count = 0;
pmap_add_delayed_free_list(ml3, free, false);
}
} else if (demote_kl2e) {
pmap_remove_kernel_l2(pmap, l2, sva);
} else {
ml3 = vm_radix_lookup(&pmap->pm_root, pmap_l2_pindex(sva));
if (vm_page_any_valid(ml3)) {
ml3->valid = 0;
pmap_zero_page(ml3);
}
}
return (pmap_unuse_pt(pmap, sva, l1e, free));
}
/*
* pmap_remove_l3: do the things to unmap a page in a process
*/
static int
pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va,
pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
{
struct md_page *pvh;
pt_entry_t old_l3;
vm_page_t m;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
old_l3 = pmap_load(l3);
if ((old_l3 & ATTR_CONTIGUOUS) != 0)
(void)pmap_demote_l3c(pmap, l3, va);
old_l3 = pmap_load_clear(l3);
pmap_s1_invalidate_page(pmap, va, true);
if (old_l3 & ATTR_SW_WIRED)
pmap->pm_stats.wired_count -= 1;
pmap_resident_count_dec(pmap, 1);
if (old_l3 & ATTR_SW_MANAGED) {
m = PTE_TO_VM_PAGE(old_l3);
if (pmap_pte_dirty(pmap, old_l3))
vm_page_dirty(m);
if (old_l3 & ATTR_AF)
vm_page_aflag_set(m, PGA_REFERENCED);
CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
pmap_pvh_free(&m->md, pmap, va);
if (TAILQ_EMPTY(&m->md.pv_list) &&
(m->flags & PG_FICTITIOUS) == 0) {
pvh = page_to_pvh(m);
if (TAILQ_EMPTY(&pvh->pv_list))
vm_page_aflag_clear(m, PGA_WRITEABLE);
}
}
return (pmap_unuse_pt(pmap, va, l2e, free));
}
/*
* Removes the specified L3C superpage mapping. Requests TLB invalidations
* to be performed by the caller through the returned "*vap". Returns true
* if the level 3 table "ml3" was unmapped and added to the spglist "free".
* Otherwise, returns false.
*/
static bool
pmap_remove_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va, vm_offset_t *vap,
vm_offset_t va_next, vm_page_t ml3, struct spglist *free,
struct rwlock **lockp)
{
struct md_page *pvh;
struct rwlock *new_lock;
pt_entry_t first_l3e, l3e, *tl3p;
vm_offset_t tva;
vm_page_t m, mt;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(((uintptr_t)l3p & ((L3C_ENTRIES * sizeof(pt_entry_t)) - 1)) ==
0, ("pmap_remove_l3c: l3p is not aligned"));
KASSERT((va & L3C_OFFSET) == 0,
("pmap_remove_l3c: va is not aligned"));
/*
* Hardware accessed and dirty bit maintenance might only update a
* single L3 entry, so we must combine the accessed and dirty bits
* from this entire set of contiguous L3 entries.
*/
first_l3e = pmap_load_clear(l3p);
for (tl3p = l3p + 1; tl3p < &l3p[L3C_ENTRIES]; tl3p++) {
l3e = pmap_load_clear(tl3p);
KASSERT((l3e & ATTR_CONTIGUOUS) != 0,
("pmap_remove_l3c: l3e is missing ATTR_CONTIGUOUS"));
if ((l3e & (ATTR_SW_DBM | ATTR_S1_AP_RW_BIT)) ==
(ATTR_SW_DBM | ATTR_S1_AP(ATTR_S1_AP_RW)))
first_l3e &= ~ATTR_S1_AP_RW_BIT;
first_l3e |= l3e & ATTR_AF;
}
if ((first_l3e & ATTR_SW_WIRED) != 0)
pmap->pm_stats.wired_count -= L3C_ENTRIES;
pmap_resident_count_dec(pmap, L3C_ENTRIES);
if ((first_l3e & ATTR_SW_MANAGED) != 0) {
m = PTE_TO_VM_PAGE(first_l3e);
new_lock = VM_PAGE_TO_PV_LIST_LOCK(m);
if (new_lock != *lockp) {
if (*lockp != NULL) {
/*
* Pending TLB invalidations must be
* performed before the PV list lock is
* released. Otherwise, a concurrent
* pmap_remove_all() on a physical page
* could return while a stale TLB entry
* still provides access to that page.
*/
if (*vap != va_next) {
pmap_invalidate_range(pmap, *vap, va,
true);
*vap = va_next;
}
rw_wunlock(*lockp);
}
*lockp = new_lock;
rw_wlock(*lockp);
}
pvh = page_to_pvh(m);
for (mt = m, tva = va; mt < &m[L3C_ENTRIES]; mt++, tva +=
L3_SIZE) {
if (pmap_pte_dirty(pmap, first_l3e))
vm_page_dirty(mt);
if ((first_l3e & ATTR_AF) != 0)
vm_page_aflag_set(mt, PGA_REFERENCED);
pmap_pvh_free(&mt->md, pmap, tva);
if (TAILQ_EMPTY(&mt->md.pv_list) &&
TAILQ_EMPTY(&pvh->pv_list))
vm_page_aflag_clear(mt, PGA_WRITEABLE);
}
}
if (*vap == va_next)
*vap = va;
if (ml3 != NULL) {
ml3->ref_count -= L3C_ENTRIES;
if (ml3->ref_count == 0) {
_pmap_unwire_l3(pmap, va, ml3, free);
return (true);
}
}
return (false);
}
/*
* Remove the specified range of addresses from the L3 page table that is
* identified by the given L2 entry.
*/
static void
pmap_remove_l3_range(pmap_t pmap, pd_entry_t l2e, vm_offset_t sva,
vm_offset_t eva, struct spglist *free, struct rwlock **lockp)
{
struct md_page *pvh;
struct rwlock *new_lock;
pt_entry_t *l3, old_l3;
vm_offset_t va;
vm_page_t l3pg, m;
KASSERT(ADDR_IS_CANONICAL(sva),
("%s: Start address not in canonical form: %lx", __func__, sva));
KASSERT(ADDR_IS_CANONICAL(eva) || eva == VM_MAX_USER_ADDRESS,
("%s: End address not in canonical form: %lx", __func__, eva));
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(rounddown2(sva, L2_SIZE) + L2_SIZE == roundup2(eva, L2_SIZE),
("pmap_remove_l3_range: range crosses an L3 page table boundary"));
l3pg = ADDR_IS_USER(sva) ? PTE_TO_VM_PAGE(l2e) : NULL;
va = eva;
for (l3 = pmap_l2_to_l3(&l2e, sva); sva != eva; l3++, sva += L3_SIZE) {
old_l3 = pmap_load(l3);
if (!pmap_l3_valid(old_l3)) {
if (va != eva) {
pmap_invalidate_range(pmap, va, sva, true);
va = eva;
}
continue;
}
if ((old_l3 & ATTR_CONTIGUOUS) != 0) {
/*
* Is this entire set of contiguous L3 entries being
* removed? Handle the possibility that "eva" is zero
* because of address wraparound.
*/
if ((sva & L3C_OFFSET) == 0 &&
sva + L3C_OFFSET <= eva - 1) {
if (pmap_remove_l3c(pmap, l3, sva, &va, eva,
l3pg, free, lockp)) {
/* The L3 table was unmapped. */
sva += L3C_SIZE;
break;
}
l3 += L3C_ENTRIES - 1;
sva += L3C_SIZE - L3_SIZE;
continue;
}
(void)pmap_demote_l3c(pmap, l3, sva);
}
old_l3 = pmap_load_clear(l3);
if ((old_l3 & ATTR_SW_WIRED) != 0)
pmap->pm_stats.wired_count--;
pmap_resident_count_dec(pmap, 1);
if ((old_l3 & ATTR_SW_MANAGED) != 0) {
m = PTE_TO_VM_PAGE(old_l3);
if (pmap_pte_dirty(pmap, old_l3))
vm_page_dirty(m);
if ((old_l3 & ATTR_AF) != 0)
vm_page_aflag_set(m, PGA_REFERENCED);
new_lock = VM_PAGE_TO_PV_LIST_LOCK(m);
if (new_lock != *lockp) {
if (*lockp != NULL) {
/*
* Pending TLB invalidations must be
* performed before the PV list lock is
* released. Otherwise, a concurrent
* pmap_remove_all() on a physical page
* could return while a stale TLB entry
* still provides access to that page.
*/
if (va != eva) {
pmap_invalidate_range(pmap, va,
sva, true);
va = eva;
}
rw_wunlock(*lockp);
}
*lockp = new_lock;
rw_wlock(*lockp);
}
pmap_pvh_free(&m->md, pmap, sva);
if (TAILQ_EMPTY(&m->md.pv_list) &&
(m->flags & PG_FICTITIOUS) == 0) {
pvh = page_to_pvh(m);
if (TAILQ_EMPTY(&pvh->pv_list))
vm_page_aflag_clear(m, PGA_WRITEABLE);
}
}
if (l3pg != NULL && pmap_unwire_l3(pmap, sva, l3pg, free)) {
/*
* _pmap_unwire_l3() has already invalidated the TLB
* entries at all levels for "sva". So, we need not
* perform "sva += L3_SIZE;" here. Moreover, we need
* not perform "va = sva;" if "sva" is at the start
* of a new valid range consisting of a single page.
*/
break;
}
if (va == eva)
va = sva;
}
if (va != eva)
pmap_invalidate_range(pmap, va, sva, true);
}
static void
pmap_remove1(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, bool map_delete)
{
struct rwlock *lock;
vm_offset_t va_next;
pd_entry_t *l0, *l1, *l2;
pt_entry_t l3_paddr;
struct spglist free;
/*
* Perform an unsynchronized read. This is, however, safe.
*/
if (pmap->pm_stats.resident_count == 0)
return;
SLIST_INIT(&free);
PMAP_LOCK(pmap);
if (map_delete)
pmap_bti_on_remove(pmap, sva, eva);
lock = NULL;
for (; sva < eva; sva = va_next) {
if (pmap->pm_stats.resident_count == 0)
break;
l0 = pmap_l0(pmap, sva);
if (pmap_load(l0) == 0) {
va_next = (sva + L0_SIZE) & ~L0_OFFSET;
if (va_next < sva)
va_next = eva;
continue;
}
va_next = (sva + L1_SIZE) & ~L1_OFFSET;
if (va_next < sva)
va_next = eva;
l1 = pmap_l0_to_l1(l0, sva);
if (pmap_load(l1) == 0)
continue;
if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
KASSERT(va_next <= eva,
("partial update of non-transparent 1G page "
"l1 %#lx sva %#lx eva %#lx va_next %#lx",
pmap_load(l1), sva, eva, va_next));
MPASS(pmap != kernel_pmap);
MPASS((pmap_load(l1) & ATTR_SW_MANAGED) == 0);
pmap_clear(l1);
pmap_s1_invalidate_page(pmap, sva, true);
pmap_resident_count_dec(pmap, L1_SIZE / PAGE_SIZE);
pmap_unuse_pt(pmap, sva, pmap_load(l0), &free);
continue;
}
/*
* Calculate index for next page table.
*/
va_next = (sva + L2_SIZE) & ~L2_OFFSET;
if (va_next < sva)
va_next = eva;
l2 = pmap_l1_to_l2(l1, sva);
l3_paddr = pmap_load(l2);
if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) {
if (sva + L2_SIZE == va_next && eva >= va_next) {
pmap_remove_l2(pmap, l2, sva, pmap_load(l1),
true, &free, &lock);
continue;
} else if (pmap_demote_l2_locked(pmap, l2, sva,
&lock) == NULL)
continue;
l3_paddr = pmap_load(l2);
}
/*
* Weed out invalid mappings.
*/
if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE)
continue;
/*
* Limit our scan to either the end of the va represented
* by the current page table page, or to the end of the
* range being removed.
*/
if (va_next > eva)
va_next = eva;
pmap_remove_l3_range(pmap, l3_paddr, sva, va_next, &free,
&lock);
}
if (lock != NULL)
rw_wunlock(lock);
PMAP_UNLOCK(pmap);
vm_page_free_pages_toq(&free, true);
}
/*
* Remove the given range of addresses from the specified map.
*
* It is assumed that the start and end are properly
* rounded to the page size.
*/
void
pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
pmap_remove1(pmap, sva, eva, false);
}
/*
* Remove the given range of addresses as part of a logical unmap
* operation. This has the effect of calling pmap_remove(), but
* also clears any metadata that should persist for the lifetime
* of a logical mapping.
*/
void
pmap_map_delete(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
pmap_remove1(pmap, sva, eva, true);
}
/*
* Routine: pmap_remove_all
* Function:
* Removes this physical page from
* all physical maps in which it resides.
* Reflects back modify bits to the pager.
*
* Notes:
* Original versions of this routine were very
* inefficient because they iteratively called
* pmap_remove (slow...)
*/
void
pmap_remove_all(vm_page_t m)
{
struct md_page *pvh;
pv_entry_t pv;
pmap_t pmap;
struct rwlock *lock;
pd_entry_t *pde, tpde;
pt_entry_t *pte, tpte;
vm_offset_t va;
struct spglist free;
int lvl, pvh_gen, md_gen;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_remove_all: page %p is not managed", m));
SLIST_INIT(&free);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : page_to_pvh(m);
rw_wlock(lock);
retry:
while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
rw_wunlock(lock);
PMAP_LOCK(pmap);
rw_wlock(lock);
if (pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
goto retry;
}
}
va = pv->pv_va;
pte = pmap_pte_exists(pmap, va, 2, __func__);
pmap_demote_l2_locked(pmap, pte, va, &lock);
PMAP_UNLOCK(pmap);
}
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
md_gen = m->md.pv_gen;
rw_wunlock(lock);
PMAP_LOCK(pmap);
rw_wlock(lock);
if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto retry;
}
}
pmap_resident_count_dec(pmap, 1);
pde = pmap_pde(pmap, pv->pv_va, &lvl);
KASSERT(pde != NULL,
("pmap_remove_all: no page directory entry found"));
KASSERT(lvl == 2,
("pmap_remove_all: invalid pde level %d", lvl));
tpde = pmap_load(pde);
pte = pmap_l2_to_l3(pde, pv->pv_va);
tpte = pmap_load(pte);
if ((tpte & ATTR_CONTIGUOUS) != 0)
(void)pmap_demote_l3c(pmap, pte, pv->pv_va);
tpte = pmap_load_clear(pte);
if (tpte & ATTR_SW_WIRED)
pmap->pm_stats.wired_count--;
if ((tpte & ATTR_AF) != 0) {
pmap_invalidate_page(pmap, pv->pv_va, true);
vm_page_aflag_set(m, PGA_REFERENCED);
}
/*
* Update the vm_page_t clean and reference bits.
*/
if (pmap_pte_dirty(pmap, tpte))
vm_page_dirty(m);
pmap_unuse_pt(pmap, pv->pv_va, tpde, &free);
TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
free_pv_entry(pmap, pv);
PMAP_UNLOCK(pmap);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
rw_wunlock(lock);
vm_page_free_pages_toq(&free, true);
}
/*
* Masks and sets bits in a level 2 page table entries in the specified pmap
*/
static void
pmap_protect_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pt_entry_t mask,
pt_entry_t nbits)
{
pd_entry_t old_l2;
vm_page_t m, mt;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PMAP_ASSERT_STAGE1(pmap);
KASSERT((sva & L2_OFFSET) == 0,
("pmap_protect_l2: sva is not 2mpage aligned"));
old_l2 = pmap_load(l2);
KASSERT((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK,
("pmap_protect_l2: L2e %lx is not a block mapping", old_l2));
/*
* Return if the L2 entry already has the desired access restrictions
* in place.
*/
if ((old_l2 & mask) == nbits)
return;
while (!atomic_fcmpset_64(l2, &old_l2, (old_l2 & ~mask) | nbits))
cpu_spinwait();
/*
* When a dirty read/write superpage mapping is write protected,
* update the dirty field of each of the superpage's constituent 4KB
* pages.
*/
if ((old_l2 & ATTR_SW_MANAGED) != 0 &&
(nbits & ATTR_S1_AP(ATTR_S1_AP_RO)) != 0 &&
pmap_pte_dirty(pmap, old_l2)) {
m = PTE_TO_VM_PAGE(old_l2);
for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
vm_page_dirty(mt);
}
/*
* Since a promotion must break the 4KB page mappings before making
* the 2MB page mapping, a pmap_s1_invalidate_page() suffices.
*/
pmap_s1_invalidate_page(pmap, sva, true);
}
/*
* Masks and sets bits in the specified L3C superpage mapping.
*
* Requests TLB invalidations to be performed by the caller through the
* returned "*vap".
*/
static void
pmap_mask_set_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va,
vm_offset_t *vap, vm_offset_t va_next, pt_entry_t mask, pt_entry_t nbits)
{
pt_entry_t l3e, *tl3p;
vm_page_t m, mt;
bool dirty;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(((uintptr_t)l3p & ((L3C_ENTRIES * sizeof(pt_entry_t)) - 1)) ==
0, ("pmap_mask_set_l3c: l3p is not aligned"));
KASSERT((va & L3C_OFFSET) == 0,
("pmap_mask_set_l3c: va is not aligned"));
dirty = false;
for (tl3p = l3p; tl3p < &l3p[L3C_ENTRIES]; tl3p++) {
l3e = pmap_load(tl3p);
KASSERT((l3e & ATTR_CONTIGUOUS) != 0,
("pmap_mask_set_l3c: l3e is missing ATTR_CONTIGUOUS"));
while (!atomic_fcmpset_64(tl3p, &l3e, (l3e & ~mask) | nbits))
cpu_spinwait();
if ((l3e & (ATTR_SW_DBM | ATTR_S1_AP_RW_BIT)) ==
(ATTR_SW_DBM | ATTR_S1_AP(ATTR_S1_AP_RW)))
dirty = true;
}
/*
* When a dirty read/write superpage mapping is write protected,
* update the dirty field of each of the superpage's constituent 4KB
* pages.
*/
if ((l3e & ATTR_SW_MANAGED) != 0 &&
(nbits & ATTR_S1_AP(ATTR_S1_AP_RO)) != 0 &&
dirty) {
m = PTE_TO_VM_PAGE(pmap_load(l3p));
for (mt = m; mt < &m[L3C_ENTRIES]; mt++)
vm_page_dirty(mt);
}
if (*vap == va_next)
*vap = va;
}
/*
* Masks and sets bits in last level page table entries in the specified
* pmap and range
*/
static void
pmap_mask_set_locked(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, pt_entry_t mask,
pt_entry_t nbits, bool invalidate)
{
vm_offset_t va, va_next;
pd_entry_t *l0, *l1, *l2;
pt_entry_t *l3p, l3;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
for (; sva < eva; sva = va_next) {
l0 = pmap_l0(pmap, sva);
if (pmap_load(l0) == 0) {
va_next = (sva + L0_SIZE) & ~L0_OFFSET;
if (va_next < sva)
va_next = eva;
continue;
}
va_next = (sva + L1_SIZE) & ~L1_OFFSET;
if (va_next < sva)
va_next = eva;
l1 = pmap_l0_to_l1(l0, sva);
if (pmap_load(l1) == 0)
continue;
if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
KASSERT(va_next <= eva,
("partial update of non-transparent 1G page "
"l1 %#lx sva %#lx eva %#lx va_next %#lx",
pmap_load(l1), sva, eva, va_next));
MPASS((pmap_load(l1) & ATTR_SW_MANAGED) == 0);
if ((pmap_load(l1) & mask) != nbits) {
pmap_store(l1, (pmap_load(l1) & ~mask) | nbits);
if (invalidate)
pmap_s1_invalidate_page(pmap, sva, true);
}
continue;
}
va_next = (sva + L2_SIZE) & ~L2_OFFSET;
if (va_next < sva)
va_next = eva;
l2 = pmap_l1_to_l2(l1, sva);
if (pmap_load(l2) == 0)
continue;
if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) {
if (sva + L2_SIZE == va_next && eva >= va_next) {
pmap_protect_l2(pmap, l2, sva, mask, nbits);
continue;
} else if ((pmap_load(l2) & mask) == nbits ||
pmap_demote_l2(pmap, l2, sva) == NULL)
continue;
}
KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE,
("pmap_protect: Invalid L2 entry after demotion"));
if (va_next > eva)
va_next = eva;
va = va_next;
for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
sva += L3_SIZE) {
l3 = pmap_load(l3p);
/*
* Go to the next L3 entry if the current one is
* invalid or already has the desired access
* restrictions in place. (The latter case occurs
* frequently. For example, in a "buildworld"
* workload, almost 1 out of 4 L3 entries already
* have the desired restrictions.)
*/
if (!pmap_l3_valid(l3) || (l3 & mask) == nbits) {
if (va != va_next) {
if (invalidate)
pmap_s1_invalidate_range(pmap,
va, sva, true);
va = va_next;
}
if ((l3 & ATTR_CONTIGUOUS) != 0) {
/*
* Does this L3C page extend beyond
* the requested range? Handle the
* possibility that "va_next" is zero.
*/
if ((sva | L3C_OFFSET) > va_next - 1)
break;
/*
* Skip ahead to the last L3_PAGE
* within this L3C page.
*/
l3p = (pt_entry_t *)((uintptr_t)l3p |
((L3C_ENTRIES - 1) *
sizeof(pt_entry_t)));
sva |= L3C_SIZE - L3_SIZE;
}
continue;
}
if ((l3 & ATTR_CONTIGUOUS) != 0) {
/*
* Is this entire set of contiguous L3 entries
* being protected? Handle the possibility
* that "va_next" is zero because of address
* wraparound.
*/
if ((sva & L3C_OFFSET) == 0 &&
sva + L3C_OFFSET <= va_next - 1) {
pmap_mask_set_l3c(pmap, l3p, sva, &va,
va_next, mask, nbits);
l3p += L3C_ENTRIES - 1;
sva += L3C_SIZE - L3_SIZE;
continue;
}
(void)pmap_demote_l3c(pmap, l3p, sva);
/*
* The L3 entry's accessed bit may have changed.
*/
l3 = pmap_load(l3p);
}
while (!atomic_fcmpset_64(l3p, &l3, (l3 & ~mask) |
nbits))
cpu_spinwait();
/*
* When a dirty read/write mapping is write protected,
* update the page's dirty field.
*/
if ((l3 & ATTR_SW_MANAGED) != 0 &&
(nbits & ATTR_S1_AP(ATTR_S1_AP_RO)) != 0 &&
pmap_pte_dirty(pmap, l3))
vm_page_dirty(PTE_TO_VM_PAGE(l3));
if (va == va_next)
va = sva;
}
if (va != va_next && invalidate)
pmap_s1_invalidate_range(pmap, va, sva, true);
}
}
static void
pmap_mask_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, pt_entry_t mask,
pt_entry_t nbits, bool invalidate)
{
PMAP_LOCK(pmap);
pmap_mask_set_locked(pmap, sva, eva, mask, nbits, invalidate);
PMAP_UNLOCK(pmap);
}
/*
* Set the physical protection on the
* specified range of this map as requested.
*/
void
pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
{
pt_entry_t mask, nbits;
PMAP_ASSERT_STAGE1(pmap);
KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
if (prot == VM_PROT_NONE) {
pmap_remove(pmap, sva, eva);
return;
}
mask = nbits = 0;
if ((prot & VM_PROT_WRITE) == 0) {
mask |= ATTR_S1_AP_RW_BIT | ATTR_SW_DBM;
nbits |= ATTR_S1_AP(ATTR_S1_AP_RO);
}
if ((prot & VM_PROT_EXECUTE) == 0) {
mask |= ATTR_S1_XN;
nbits |= ATTR_S1_XN;
}
if (pmap == kernel_pmap) {
mask |= ATTR_KERN_GP;
nbits |= ATTR_KERN_GP;
}
if (mask == 0)
return;
pmap_mask_set(pmap, sva, eva, mask, nbits, true);
}
void
pmap_disable_promotion(vm_offset_t sva, vm_size_t size)
{
MPASS((sva & L3_OFFSET) == 0);
MPASS(((sva + size) & L3_OFFSET) == 0);
pmap_mask_set(kernel_pmap, sva, sva + size, ATTR_SW_NO_PROMOTE,
ATTR_SW_NO_PROMOTE, false);
}
/*
* Inserts the specified page table page into the specified pmap's collection
* of idle page table pages. Each of a pmap's page table pages is responsible
* for mapping a distinct range of virtual addresses. The pmap's collection is
* ordered by this virtual address range.
*
* If "promoted" is false, then the page table page "mpte" must be zero filled;
* "mpte"'s valid field will be set to 0.
*
* If "promoted" is true and "all_l3e_AF_set" is false, then "mpte" must
* contain valid mappings with identical attributes except for ATTR_AF;
* "mpte"'s valid field will be set to 1.
*
* If "promoted" and "all_l3e_AF_set" are both true, then "mpte" must contain
* valid mappings with identical attributes including ATTR_AF; "mpte"'s valid
* field will be set to VM_PAGE_BITS_ALL.
*/
static __inline int
pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted,
bool all_l3e_AF_set)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(promoted || !all_l3e_AF_set,
("a zero-filled PTP can't have ATTR_AF set in every PTE"));
mpte->valid = promoted ? (all_l3e_AF_set ? VM_PAGE_BITS_ALL : 1) : 0;
return (vm_radix_insert(&pmap->pm_root, mpte));
}
/*
* Removes the page table page mapping the specified virtual address from the
* specified pmap's collection of idle page table pages, and returns it.
* Otherwise, returns NULL if there is no page table page corresponding to the
* specified virtual address.
*/
static __inline vm_page_t
pmap_remove_pt_page(pmap_t pmap, vm_offset_t va)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va)));
}
/*
* Performs a break-before-make update of a pmap entry. This is needed when
* either promoting or demoting pages to ensure the TLB doesn't get into an
* inconsistent state.
*/
static void
pmap_update_entry(pmap_t pmap, pd_entry_t *ptep, pd_entry_t newpte,
vm_offset_t va, vm_size_t size)
{
register_t intr;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((newpte & ATTR_SW_NO_PROMOTE) == 0,
("%s: Updating non-promote pte", __func__));
/*
* Ensure we don't get switched out with the page table in an
* inconsistent state. We also need to ensure no interrupts fire
* as they may make use of an address we are about to invalidate.
*/
intr = intr_disable();
/*
* Clear the old mapping's valid bit, but leave the rest of the entry
* unchanged, so that a lockless, concurrent pmap_kextract() can still
* lookup the physical address.
*/
pmap_clear_bits(ptep, ATTR_DESCR_VALID);
/*
* When promoting, the L{1,2}_TABLE entry that is being replaced might
* be cached, so we invalidate intermediate entries as well as final
* entries.
*/
pmap_s1_invalidate_range(pmap, va, va + size, false);
/* Create the new mapping */
pmap_store(ptep, newpte);
dsb(ishst);
intr_restore(intr);
}
/*
* Performs a break-before-make update of an ATTR_CONTIGUOUS mapping.
*/
static void __nosanitizecoverage
pmap_update_strided(pmap_t pmap, pd_entry_t *ptep, pd_entry_t *ptep_end,
pd_entry_t newpte, vm_offset_t va, vm_offset_t stride, vm_size_t size)
{
pd_entry_t *lip;
register_t intr;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((newpte & ATTR_SW_NO_PROMOTE) == 0,
("%s: Updating non-promote pte", __func__));
/*
* Ensure we don't get switched out with the page table in an
* inconsistent state. We also need to ensure no interrupts fire
* as they may make use of an address we are about to invalidate.
*/
intr = intr_disable();
/*
* Clear the old mapping's valid bits, but leave the rest of each
* entry unchanged, so that a lockless, concurrent pmap_kextract() can
* still lookup the physical address.
*/
for (lip = ptep; lip < ptep_end; lip++)
pmap_clear_bits(lip, ATTR_DESCR_VALID);
/* Only final entries are changing. */
pmap_s1_invalidate_strided(pmap, va, va + size, stride, true);
/* Create the new mapping. */
for (lip = ptep; lip < ptep_end; lip++) {
pmap_store(lip, newpte);
newpte += stride;
}
dsb(ishst);
intr_restore(intr);
}
#if VM_NRESERVLEVEL > 0
/*
* After promotion from 512 4KB page mappings to a single 2MB page mapping,
* replace the many pv entries for the 4KB page mappings by a single pv entry
* for the 2MB page mapping.
*/
static void
pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
struct rwlock **lockp)
{
struct md_page *pvh;
pv_entry_t pv;
vm_offset_t va_last;
vm_page_t m;
KASSERT((pa & L2_OFFSET) == 0,
("pmap_pv_promote_l2: pa is not 2mpage aligned"));
CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
/*
* Transfer the first page's pv entry for this mapping to the 2mpage's
* pv list. Aside from avoiding the cost of a call to get_pv_entry(),
* a transfer avoids the possibility that get_pv_entry() calls
* reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the
* mappings that is being promoted.
*/
m = PHYS_TO_VM_PAGE(pa);
va = va & ~L2_OFFSET;
pv = pmap_pvh_remove(&m->md, pmap, va);
KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv not found"));
pvh = page_to_pvh(m);
TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
pvh->pv_gen++;
/* Free the remaining NPTEPG - 1 pv entries. */
va_last = va + L2_SIZE - PAGE_SIZE;
do {
m++;
va += PAGE_SIZE;
pmap_pvh_free(&m->md, pmap, va);
} while (va < va_last);
}
/*
* Tries to promote the 512, contiguous 4KB page mappings that are within a
* single level 2 table entry to a single 2MB page mapping. For promotion
* to occur, two conditions must be met: (1) the 4KB page mappings must map
* aligned, contiguous physical memory and (2) the 4KB page mappings must have
* identical characteristics.
*/
static bool
pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, vm_page_t mpte,
struct rwlock **lockp)
{
pt_entry_t all_l3e_AF, *firstl3, *l3, newl2, oldl3, pa;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/*
* Currently, this function only supports promotion on stage 1 pmaps
* because it tests stage 1 specific fields and performs a break-
* before-make sequence that is incorrect for stage 2 pmaps.
*/
if (pmap->pm_stage != PM_STAGE1 || !pmap_ps_enabled(pmap))
return (false);
/*
* Examine the first L3E in the specified PTP. Abort if this L3E is
* ineligible for promotion...
*/
firstl3 = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l2)));
newl2 = pmap_load(firstl3);
if ((newl2 & ATTR_SW_NO_PROMOTE) != 0)
return (false);
/* ... is not the first physical page within an L2 block */
if ((PTE_TO_PHYS(newl2) & L2_OFFSET) != 0 ||
((newl2 & ATTR_DESCR_MASK) != L3_PAGE)) { /* ... or is invalid */
counter_u64_add(pmap_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
return (false);
}
/*
* Both here and in the below "for" loop, to allow for repromotion
* after MADV_FREE, conditionally write protect a clean L3E before
* possibly aborting the promotion due to other L3E attributes. Why?
* Suppose that MADV_FREE is applied to a part of a superpage, the
* address range [S, E). pmap_advise() will demote the superpage
* mapping, destroy the 4KB page mapping at the end of [S, E), and
* set AP_RO and clear AF in the L3Es for the rest of [S, E). Later,
* imagine that the memory in [S, E) is recycled, but the last 4KB
* page in [S, E) is not the last to be rewritten, or simply accessed.
* In other words, there is still a 4KB page in [S, E), call it P,
* that is writeable but AP_RO is set and AF is clear in P's L3E.
* Unless we write protect P before aborting the promotion, if and
* when P is finally rewritten, there won't be a page fault to trigger
* repromotion.
*/
setl2:
if ((newl2 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
(ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM)) {
/*
* When the mapping is clean, i.e., ATTR_S1_AP_RO is set,
* ATTR_SW_DBM can be cleared without a TLB invalidation.
*/
if (!atomic_fcmpset_64(firstl3, &newl2, newl2 & ~ATTR_SW_DBM))
goto setl2;
newl2 &= ~ATTR_SW_DBM;
CTR2(KTR_PMAP, "pmap_promote_l2: protect for va %#lx"
" in pmap %p", va & ~L2_OFFSET, pmap);
}
/*
* Examine each of the other L3Es in the specified PTP. Abort if this
* L3E maps an unexpected 4KB physical page or does not have identical
* characteristics to the first L3E. If ATTR_AF is not set in every
* PTE, then request that the PTP be refilled on demotion.
*/
all_l3e_AF = newl2 & ATTR_AF;
pa = (PTE_TO_PHYS(newl2) | (newl2 & ATTR_DESCR_MASK))
+ L2_SIZE - PAGE_SIZE;
for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) {
oldl3 = pmap_load(l3);
if ((PTE_TO_PHYS(oldl3) | (oldl3 & ATTR_DESCR_MASK)) != pa) {
counter_u64_add(pmap_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
return (false);
}
setl3:
if ((oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
(ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM)) {
/*
* When the mapping is clean, i.e., ATTR_S1_AP_RO is
* set, ATTR_SW_DBM can be cleared without a TLB
* invalidation.
*/
if (!atomic_fcmpset_64(l3, &oldl3, oldl3 &
~ATTR_SW_DBM))
goto setl3;
oldl3 &= ~ATTR_SW_DBM;
}
if ((oldl3 & ATTR_PROMOTE) != (newl2 & ATTR_PROMOTE)) {
counter_u64_add(pmap_l2_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
return (false);
}
all_l3e_AF &= oldl3;
pa -= PAGE_SIZE;
}
/*
* Unless all PTEs have ATTR_AF set, clear it from the superpage
* mapping, so that promotions triggered by speculative mappings,
* such as pmap_enter_quick(), don't automatically mark the
* underlying pages as referenced.
*/
newl2 &= ~(ATTR_CONTIGUOUS | ATTR_AF | ATTR_DESCR_MASK) | all_l3e_AF;
/*
* Save the page table page in its current state until the L2
* mapping the superpage is demoted by pmap_demote_l2() or
* destroyed by pmap_remove_l3().
*/
if (mpte == NULL)
mpte = PTE_TO_VM_PAGE(pmap_load(l2));
KASSERT(mpte >= vm_page_array &&
mpte < &vm_page_array[vm_page_array_size],
("pmap_promote_l2: page table page is out of range"));
KASSERT(mpte->pindex == pmap_l2_pindex(va),
("pmap_promote_l2: page table page's pindex is wrong"));
if (pmap_insert_pt_page(pmap, mpte, true, all_l3e_AF != 0)) {
counter_u64_add(pmap_l2_p_failures, 1);
CTR2(KTR_PMAP,
"pmap_promote_l2: failure for va %#lx in pmap %p", va,
pmap);
return (false);
}
if ((newl2 & ATTR_SW_MANAGED) != 0)
pmap_pv_promote_l2(pmap, va, PTE_TO_PHYS(newl2), lockp);
pmap_update_entry(pmap, l2, newl2 | L2_BLOCK, va & ~L2_OFFSET, L2_SIZE);
counter_u64_add(pmap_l2_promotions, 1);
CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va,
pmap);
return (true);
}
/*
* Tries to promote an aligned, contiguous set of base page mappings to a
* single L3C page mapping. For promotion to occur, two conditions must be
* met: (1) the base page mappings must map aligned, contiguous physical
* memory and (2) the base page mappings must have identical characteristics
* except for the accessed flag.
*/
static bool
pmap_promote_l3c(pmap_t pmap, pd_entry_t *l3p, vm_offset_t va)
{
pd_entry_t all_l3e_AF, firstl3c, *l3, oldl3, pa;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/*
* Currently, this function only supports promotion on stage 1 pmaps
* because it tests stage 1 specific fields and performs a break-
* before-make sequence that is incorrect for stage 2 pmaps.
*/
if (pmap->pm_stage != PM_STAGE1 || !pmap_ps_enabled(pmap))
return (false);
/*
* Compute the address of the first L3 entry in the superpage
* candidate.
*/
l3p = (pt_entry_t *)((uintptr_t)l3p & ~((L3C_ENTRIES *
sizeof(pt_entry_t)) - 1));
firstl3c = pmap_load(l3p);
/*
* Examine the first L3 entry. Abort if this L3E is ineligible for
* promotion...
*/
if ((firstl3c & ATTR_SW_NO_PROMOTE) != 0)
return (false);
/* ...is not properly aligned... */
if ((PTE_TO_PHYS(firstl3c) & L3C_OFFSET) != 0 ||
(firstl3c & ATTR_DESCR_MASK) != L3_PAGE) { /* ...or is invalid. */
counter_u64_add(pmap_l3c_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l3c: failure for va %#lx"
" in pmap %p", va, pmap);
return (false);
}
/*
* If the first L3 entry is a clean read-write mapping, convert it
* to a read-only mapping. See pmap_promote_l2() for the rationale.
*/
set_first:
if ((firstl3c & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
(ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM)) {
/*
* When the mapping is clean, i.e., ATTR_S1_AP_RO is set,
* ATTR_SW_DBM can be cleared without a TLB invalidation.
*/
if (!atomic_fcmpset_64(l3p, &firstl3c, firstl3c & ~ATTR_SW_DBM))
goto set_first;
firstl3c &= ~ATTR_SW_DBM;
CTR2(KTR_PMAP, "pmap_promote_l3c: protect for va %#lx"
" in pmap %p", va & ~L3C_OFFSET, pmap);
}
/*
* Check that the rest of the L3 entries are compatible with the first,
* and convert clean read-write mappings to read-only mappings.
*/
all_l3e_AF = firstl3c & ATTR_AF;
pa = (PTE_TO_PHYS(firstl3c) | (firstl3c & ATTR_DESCR_MASK)) +
L3C_SIZE - PAGE_SIZE;
for (l3 = l3p + L3C_ENTRIES - 1; l3 > l3p; l3--) {
oldl3 = pmap_load(l3);
if ((PTE_TO_PHYS(oldl3) | (oldl3 & ATTR_DESCR_MASK)) != pa) {
counter_u64_add(pmap_l3c_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l3c: failure for va %#lx"
" in pmap %p", va, pmap);
return (false);
}
set_l3:
if ((oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
(ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM)) {
/*
* When the mapping is clean, i.e., ATTR_S1_AP_RO is
* set, ATTR_SW_DBM can be cleared without a TLB
* invalidation.
*/
if (!atomic_fcmpset_64(l3, &oldl3, oldl3 &
~ATTR_SW_DBM))
goto set_l3;
oldl3 &= ~ATTR_SW_DBM;
CTR2(KTR_PMAP, "pmap_promote_l3c: protect for va %#lx"
" in pmap %p", (oldl3 & ~ATTR_MASK & L3C_OFFSET) |
(va & ~L3C_OFFSET), pmap);
}
if ((oldl3 & ATTR_PROMOTE) != (firstl3c & ATTR_PROMOTE)) {
counter_u64_add(pmap_l3c_p_failures, 1);
CTR2(KTR_PMAP, "pmap_promote_l3c: failure for va %#lx"
" in pmap %p", va, pmap);
return (false);
}
all_l3e_AF &= oldl3;
pa -= PAGE_SIZE;
}
/*
* Unless all PTEs have ATTR_AF set, clear it from the superpage
* mapping, so that promotions triggered by speculative mappings,
* such as pmap_enter_quick(), don't automatically mark the
* underlying pages as referenced.
*/
firstl3c &= ~ATTR_AF | all_l3e_AF;
/*
* Remake the mappings with the contiguous bit set.
*/
pmap_update_strided(pmap, l3p, l3p + L3C_ENTRIES, firstl3c |
ATTR_CONTIGUOUS, va & ~L3C_OFFSET, L3_SIZE, L3C_SIZE);
counter_u64_add(pmap_l3c_promotions, 1);
CTR2(KTR_PMAP, "pmap_promote_l3c: success for va %#lx in pmap %p", va,
pmap);
return (true);
}
#endif /* VM_NRESERVLEVEL > 0 */
static int
pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t pte, int flags,
int psind)
{
pd_entry_t *l0p, *l1p, *l2p, *l3p, newpte, origpte, *tl3p;
vm_page_t mp;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(psind > 0 && psind < MAXPAGESIZES,
("psind %d unexpected", psind));
KASSERT((PTE_TO_PHYS(pte) & (pagesizes[psind] - 1)) == 0,
("unaligned phys address %#lx pte %#lx psind %d",
PTE_TO_PHYS(pte), pte, psind));
restart:
newpte = pte;
if (!pmap_bti_same(pmap, va, va + pagesizes[psind], &newpte))
return (KERN_PROTECTION_FAILURE);
if (psind == 3) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
KASSERT(pagesizes[psind] == L1_SIZE,
("pagesizes[%d] != L1_SIZE", psind));
l0p = pmap_l0(pmap, va);
if ((pmap_load(l0p) & ATTR_DESCR_VALID) == 0) {
mp = _pmap_alloc_l3(pmap, pmap_l0_pindex(va), NULL);
if (mp == NULL) {
if ((flags & PMAP_ENTER_NOSLEEP) != 0)
return (KERN_RESOURCE_SHORTAGE);
PMAP_UNLOCK(pmap);
vm_wait(NULL);
PMAP_LOCK(pmap);
goto restart;
}
l1p = pmap_l0_to_l1(l0p, va);
KASSERT(l1p != NULL, ("va %#lx lost l1 entry", va));
origpte = pmap_load(l1p);
} else {
l1p = pmap_l0_to_l1(l0p, va);
KASSERT(l1p != NULL, ("va %#lx lost l1 entry", va));
origpte = pmap_load(l1p);
if ((origpte & ATTR_DESCR_VALID) == 0) {
mp = PTE_TO_VM_PAGE(pmap_load(l0p));
mp->ref_count++;
}
}
KASSERT((PTE_TO_PHYS(origpte) == PTE_TO_PHYS(newpte) &&
(origpte & ATTR_DESCR_MASK) == L1_BLOCK) ||
(origpte & ATTR_DESCR_VALID) == 0,
("va %#lx changing 1G phys page l1 %#lx newpte %#lx",
va, origpte, newpte));
pmap_store(l1p, newpte);
} else if (psind == 2) {
KASSERT(pagesizes[psind] == L2_SIZE,
("pagesizes[%d] != L2_SIZE", psind));
l2p = pmap_l2(pmap, va);
if (l2p == NULL) {
mp = _pmap_alloc_l3(pmap, pmap_l1_pindex(va), NULL);
if (mp == NULL) {
if ((flags & PMAP_ENTER_NOSLEEP) != 0)
return (KERN_RESOURCE_SHORTAGE);
PMAP_UNLOCK(pmap);
vm_wait(NULL);
PMAP_LOCK(pmap);
goto restart;
}
l2p = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mp));
l2p = &l2p[pmap_l2_index(va)];
origpte = pmap_load(l2p);
} else {
l1p = pmap_l1(pmap, va);
origpte = pmap_load(l2p);
if ((origpte & ATTR_DESCR_VALID) == 0) {
mp = PTE_TO_VM_PAGE(pmap_load(l1p));
mp->ref_count++;
}
}
KASSERT((origpte & ATTR_DESCR_VALID) == 0 ||
((origpte & ATTR_DESCR_MASK) == L2_BLOCK &&
PTE_TO_PHYS(origpte) == PTE_TO_PHYS(newpte)),
("va %#lx changing 2M phys page l2 %#lx newpte %#lx",
va, origpte, newpte));
pmap_store(l2p, newpte);
} else /* (psind == 1) */ {
KASSERT(pagesizes[psind] == L3C_SIZE,
("pagesizes[%d] != L3C_SIZE", psind));
l2p = pmap_l2(pmap, va);
if (l2p == NULL || (pmap_load(l2p) & ATTR_DESCR_VALID) == 0) {
mp = _pmap_alloc_l3(pmap, pmap_l2_pindex(va), NULL);
if (mp == NULL) {
if ((flags & PMAP_ENTER_NOSLEEP) != 0)
return (KERN_RESOURCE_SHORTAGE);
PMAP_UNLOCK(pmap);
vm_wait(NULL);
PMAP_LOCK(pmap);
goto restart;
}
mp->ref_count += L3C_ENTRIES - 1;
l3p = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mp));
l3p = &l3p[pmap_l3_index(va)];
} else {
l3p = pmap_l2_to_l3(l2p, va);
if ((pmap_load(l3p) & ATTR_DESCR_VALID) == 0) {
mp = PTE_TO_VM_PAGE(pmap_load(l2p));
mp->ref_count += L3C_ENTRIES;
}
}
for (tl3p = l3p; tl3p < &l3p[L3C_ENTRIES]; tl3p++) {
origpte = pmap_load(tl3p);
KASSERT((origpte & ATTR_DESCR_VALID) == 0 ||
((origpte & ATTR_CONTIGUOUS) != 0 &&
PTE_TO_PHYS(origpte) == PTE_TO_PHYS(newpte)),
("va %#lx changing 64K phys page l3 %#lx newpte %#lx",
va, origpte, newpte));
pmap_store(tl3p, newpte);
newpte += L3_SIZE;
}
}
dsb(ishst);
if ((origpte & ATTR_DESCR_VALID) == 0)
pmap_resident_count_inc(pmap, pagesizes[psind] / PAGE_SIZE);
if ((newpte & ATTR_SW_WIRED) != 0 && (origpte & ATTR_SW_WIRED) == 0)
pmap->pm_stats.wired_count += pagesizes[psind] / PAGE_SIZE;
else if ((newpte & ATTR_SW_WIRED) == 0 &&
(origpte & ATTR_SW_WIRED) != 0)
pmap->pm_stats.wired_count -= pagesizes[psind] / PAGE_SIZE;
return (KERN_SUCCESS);
}
/*
* Insert the given physical page (p) at
* the specified virtual address (v) in the
* target physical map with the protection requested.
*
* If specified, the page will be wired down, meaning
* that the related pte can not be reclaimed.
*
* NB: This is the only routine which MAY NOT lazy-evaluate
* or lose information. That is, this routine must actually
* insert this page into the given map NOW.
*/
int
pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
u_int flags, int8_t psind)
{
struct rwlock *lock;
pd_entry_t *pde;
pt_entry_t new_l3, orig_l3;
pt_entry_t *l2, *l3;
pv_entry_t pv;
vm_paddr_t opa, pa;
vm_page_t mpte, om;
bool nosleep;
int full_lvl, lvl, rv;
KASSERT(ADDR_IS_CANONICAL(va),
("%s: Address not in canonical form: %lx", __func__, va));
va = trunc_page(va);
if ((m->oflags & VPO_UNMANAGED) == 0)
VM_PAGE_OBJECT_BUSY_ASSERT(m);
pa = VM_PAGE_TO_PHYS(m);
new_l3 = (pt_entry_t)(PHYS_TO_PTE(pa) | ATTR_AF | pmap_sh_attr |
L3_PAGE);
new_l3 |= pmap_pte_memattr(pmap, m->md.pv_memattr);
new_l3 |= pmap_pte_prot(pmap, prot);
if ((flags & PMAP_ENTER_WIRED) != 0)
new_l3 |= ATTR_SW_WIRED;
if (pmap->pm_stage == PM_STAGE1) {
if (ADDR_IS_USER(va))
new_l3 |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
else
new_l3 |= ATTR_S1_UXN;
if (pmap != kernel_pmap)
new_l3 |= ATTR_S1_nG;
} else {
/*
* Clear the access flag on executable mappings, this will be
* set later when the page is accessed. The fault handler is
* required to invalidate the I-cache.
*
* TODO: Switch to the valid flag to allow hardware management
* of the access flag. Much of the pmap code assumes the
* valid flag is set and fails to destroy the old page tables
* correctly if it is clear.
*/
if (prot & VM_PROT_EXECUTE)
new_l3 &= ~ATTR_AF;
}
if ((m->oflags & VPO_UNMANAGED) == 0) {
new_l3 |= ATTR_SW_MANAGED;
if ((prot & VM_PROT_WRITE) != 0) {
new_l3 |= ATTR_SW_DBM;
if ((flags & VM_PROT_WRITE) == 0) {
if (pmap->pm_stage == PM_STAGE1)
new_l3 |= ATTR_S1_AP(ATTR_S1_AP_RO);
else
new_l3 &=
~ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE);
}
}
}
CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
lock = NULL;
PMAP_LOCK(pmap);
if ((flags & PMAP_ENTER_LARGEPAGE) != 0) {
KASSERT((m->oflags & VPO_UNMANAGED) != 0,
("managed largepage va %#lx flags %#x", va, flags));
if (psind == 3) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
new_l3 &= ~L3_PAGE;
new_l3 |= L1_BLOCK;
} else if (psind == 2) {
new_l3 &= ~L3_PAGE;
new_l3 |= L2_BLOCK;
} else /* (psind == 1) */
new_l3 |= ATTR_CONTIGUOUS;
rv = pmap_enter_largepage(pmap, va, new_l3, flags, psind);
goto out;
}
if (psind == 2) {
/* Assert the required virtual and physical alignment. */
KASSERT((va & L2_OFFSET) == 0, ("pmap_enter: va unaligned"));
KASSERT(m->psind > 1, ("pmap_enter: m->psind < psind"));
rv = pmap_enter_l2(pmap, va, (new_l3 & ~L3_PAGE) | L2_BLOCK,
flags, m, &lock);
goto out;
}
mpte = NULL;
if (psind == 1) {
KASSERT((va & L3C_OFFSET) == 0, ("pmap_enter: va unaligned"));
KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind"));
rv = pmap_enter_l3c(pmap, va, new_l3 | ATTR_CONTIGUOUS, flags,
m, &mpte, &lock);
#if VM_NRESERVLEVEL > 0
/*
* Attempt L2 promotion, if both the PTP and a level 1
* reservation are fully populated.
*/
if (rv == KERN_SUCCESS &&
(mpte == NULL || mpte->ref_count == NL3PG) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 1) {
pde = pmap_l2(pmap, va);
(void)pmap_promote_l2(pmap, pde, va, mpte, &lock);
}
#endif
goto out;
}
/*
* In the case that a page table page is not
* resident, we are creating it here.
*/
retry:
pde = pmap_pde(pmap, va, &lvl);
if (pde != NULL && lvl == 2) {
l3 = pmap_l2_to_l3(pde, va);
if (ADDR_IS_USER(va) && mpte == NULL) {
mpte = PTE_TO_VM_PAGE(pmap_load(pde));
mpte->ref_count++;
}
goto havel3;
} else if (pde != NULL && lvl == 1) {
l2 = pmap_l1_to_l2(pde, va);
if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK &&
(l3 = pmap_demote_l2_locked(pmap, l2, va, &lock)) != NULL) {
l3 = &l3[pmap_l3_index(va)];
if (ADDR_IS_USER(va)) {
mpte = PTE_TO_VM_PAGE(pmap_load(l2));
mpte->ref_count++;
}
goto havel3;
}
/* We need to allocate an L3 table. */
}
if (ADDR_IS_USER(va)) {
nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
/*
* We use _pmap_alloc_l3() instead of pmap_alloc_l3() in order
* to handle the possibility that a superpage mapping for "va"
* was created while we slept.
*/
mpte = _pmap_alloc_l3(pmap, pmap_l2_pindex(va),
nosleep ? NULL : &lock);
if (mpte == NULL && nosleep) {
CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
rv = KERN_RESOURCE_SHORTAGE;
goto out;
}
goto retry;
} else
panic("pmap_enter: missing L3 table for kernel va %#lx", va);
havel3:
orig_l3 = pmap_load(l3);
opa = PTE_TO_PHYS(orig_l3);
pv = NULL;
new_l3 |= pmap_pte_bti(pmap, va);
/*
* Is the specified virtual address already mapped?
*/
if (pmap_l3_valid(orig_l3)) {
/*
* Wiring change, just update stats. We don't worry about
* wiring PT pages as they remain resident as long as there
* are valid mappings in them. Hence, if a user page is wired,
* the PT page will be also.
*/
if ((flags & PMAP_ENTER_WIRED) != 0 &&
(orig_l3 & ATTR_SW_WIRED) == 0)
pmap->pm_stats.wired_count++;
else if ((flags & PMAP_ENTER_WIRED) == 0 &&
(orig_l3 & ATTR_SW_WIRED) != 0)
pmap->pm_stats.wired_count--;
/*
* Remove the extra PT page reference.
*/
if (mpte != NULL) {
mpte->ref_count--;
KASSERT(mpte->ref_count > 0,
("pmap_enter: missing reference to page table page,"
" va: 0x%lx", va));
}
/*
* Has the physical page changed?
*/
if (opa == pa) {
/*
* No, might be a protection or wiring change.
*/
if ((orig_l3 & ATTR_SW_MANAGED) != 0 &&
(new_l3 & ATTR_SW_DBM) != 0)
vm_page_aflag_set(m, PGA_WRITEABLE);
goto validate;
}
/*
* The physical page has changed. Temporarily invalidate
* the mapping.
*/
if ((orig_l3 & ATTR_CONTIGUOUS) != 0)
(void)pmap_demote_l3c(pmap, l3, va);
orig_l3 = pmap_load_clear(l3);
KASSERT(PTE_TO_PHYS(orig_l3) == opa,
("pmap_enter: unexpected pa update for %#lx", va));
if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
om = PHYS_TO_VM_PAGE(opa);
/*
* The pmap lock is sufficient to synchronize with
* concurrent calls to pmap_page_test_mappings() and
* pmap_ts_referenced().
*/
if (pmap_pte_dirty(pmap, orig_l3))
vm_page_dirty(om);
if ((orig_l3 & ATTR_AF) != 0) {
pmap_invalidate_page(pmap, va, true);
vm_page_aflag_set(om, PGA_REFERENCED);
}
CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, om);
pv = pmap_pvh_remove(&om->md, pmap, va);
if ((m->oflags & VPO_UNMANAGED) != 0)
free_pv_entry(pmap, pv);
if ((om->a.flags & PGA_WRITEABLE) != 0 &&
TAILQ_EMPTY(&om->md.pv_list) &&
((om->flags & PG_FICTITIOUS) != 0 ||
TAILQ_EMPTY(&page_to_pvh(om)->pv_list)))
vm_page_aflag_clear(om, PGA_WRITEABLE);
} else {
KASSERT((orig_l3 & ATTR_AF) != 0,
("pmap_enter: unmanaged mapping lacks ATTR_AF"));
pmap_invalidate_page(pmap, va, true);
}
orig_l3 = 0;
} else {
/*
* Increment the counters.
*/
if ((new_l3 & ATTR_SW_WIRED) != 0)
pmap->pm_stats.wired_count++;
pmap_resident_count_inc(pmap, 1);
}
/*
* Enter on the PV list if part of our managed memory.
*/
if ((m->oflags & VPO_UNMANAGED) == 0) {
if (pv == NULL) {
pv = get_pv_entry(pmap, &lock);
pv->pv_va = va;
}
CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
if ((new_l3 & ATTR_SW_DBM) != 0)
vm_page_aflag_set(m, PGA_WRITEABLE);
}
validate:
if (pmap->pm_stage == PM_STAGE1) {
/*
* Sync icache if exec permission and attribute
* VM_MEMATTR_WRITE_BACK is set. Do it now, before the mapping
* is stored and made valid for hardware table walk. If done
* later, then other can access this page before caches are
* properly synced. Don't do it for kernel memory which is
* mapped with exec permission even if the memory isn't going
* to hold executable code. The only time when icache sync is
* needed is after kernel module is loaded and the relocation
* info is processed. And it's done in elf_cpu_load_file().
*/
if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap &&
m->md.pv_memattr == VM_MEMATTR_WRITE_BACK &&
(opa != pa || (orig_l3 & ATTR_S1_XN))) {
PMAP_ASSERT_STAGE1(pmap);
cpu_icache_sync_range((void *)PHYS_TO_DMAP(pa),
PAGE_SIZE);
}
} else {
cpu_dcache_wb_range((void *)PHYS_TO_DMAP(pa), PAGE_SIZE);
}
/*
* Update the L3 entry
*/
if (pmap_l3_valid(orig_l3)) {
KASSERT(opa == pa, ("pmap_enter: invalid update"));
if ((orig_l3 & ~ATTR_AF) != (new_l3 & ~ATTR_AF)) {
/* same PA, different attributes */
if ((orig_l3 & ATTR_CONTIGUOUS) != 0)
(void)pmap_demote_l3c(pmap, l3, va);
orig_l3 = pmap_load_store(l3, new_l3);
pmap_invalidate_page(pmap, va, true);
if ((orig_l3 & ATTR_SW_MANAGED) != 0 &&
pmap_pte_dirty(pmap, orig_l3))
vm_page_dirty(m);
} else {
/*
* orig_l3 == new_l3
* This can happens if multiple threads simultaneously
* access not yet mapped page. This bad for performance
* since this can cause full demotion-NOP-promotion
* cycle.
* Another possible reasons are:
* - VM and pmap memory layout are diverged
* - tlb flush is missing somewhere and CPU doesn't see
* actual mapping.
*/
CTR4(KTR_PMAP, "%s: already mapped page - "
"pmap %p va 0x%#lx pte 0x%lx",
__func__, pmap, va, new_l3);
}
} else {
/* New mapping */
pmap_store(l3, new_l3);
dsb(ishst);
}
#if VM_NRESERVLEVEL > 0
/*
* First, attempt L3C promotion, if the virtual and physical addresses
* are aligned with each other and an underlying reservation has the
* neighboring L3 pages allocated. The first condition is simply an
* optimization that recognizes some eventual promotion failures early
* at a lower run-time cost. Then, if both a level 1 reservation and
* the PTP are fully populated, attempt L2 promotion.
*/
if ((va & L3C_OFFSET) == (pa & L3C_OFFSET) &&
(m->flags & PG_FICTITIOUS) == 0 &&
(full_lvl = vm_reserv_level_iffullpop(m)) >= 0 &&
pmap_promote_l3c(pmap, l3, va) &&
full_lvl == 1 && (mpte == NULL || mpte->ref_count == NL3PG))
(void)pmap_promote_l2(pmap, pde, va, mpte, &lock);
#endif
rv = KERN_SUCCESS;
out:
if (lock != NULL)
rw_wunlock(lock);
PMAP_UNLOCK(pmap);
return (rv);
}
/*
* Tries to create a read- and/or execute-only L2 page mapping. Returns
* KERN_SUCCESS if the mapping was created. Otherwise, returns an error
* value. See pmap_enter_l2() for the possible error values when "no sleep",
* "no replace", and "no reclaim" are specified.
*/
static int
pmap_enter_l2_rx(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
struct rwlock **lockp)
{
pd_entry_t new_l2;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PMAP_ASSERT_STAGE1(pmap);
KASSERT(ADDR_IS_CANONICAL(va),
("%s: Address not in canonical form: %lx", __func__, va));
new_l2 = (pd_entry_t)(VM_PAGE_TO_PTE(m) | pmap_sh_attr |
ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S1_AP_RO) |
L2_BLOCK);
if ((m->oflags & VPO_UNMANAGED) == 0)
new_l2 |= ATTR_SW_MANAGED;
else
new_l2 |= ATTR_AF;
if ((prot & VM_PROT_EXECUTE) == 0 ||
m->md.pv_memattr == VM_MEMATTR_DEVICE)
new_l2 |= ATTR_S1_XN;
if (ADDR_IS_USER(va))
new_l2 |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
else
new_l2 |= ATTR_S1_UXN;
if (pmap != kernel_pmap)
new_l2 |= ATTR_S1_nG;
return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP |
PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, m, lockp));
}
/*
* Returns true if every page table entry in the specified page table is
* zero.
*/
static bool
pmap_every_pte_zero(vm_paddr_t pa)
{
pt_entry_t *pt_end, *pte;
KASSERT((pa & PAGE_MASK) == 0, ("pa is misaligned"));
pte = (pt_entry_t *)PHYS_TO_DMAP(pa);
for (pt_end = pte + Ln_ENTRIES; pte < pt_end; pte++) {
if (*pte != 0)
return (false);
}
return (true);
}
/*
* Tries to create the specified L2 page mapping. Returns KERN_SUCCESS if
* the mapping was created, and one of KERN_FAILURE, KERN_NO_SPACE, or
* KERN_RESOURCE_SHORTAGE otherwise. Returns KERN_FAILURE if
* PMAP_ENTER_NOREPLACE was specified and a base page mapping already exists
* within the L2 virtual address range starting at the specified virtual
* address. Returns KERN_NO_SPACE if PMAP_ENTER_NOREPLACE was specified and a
* L2 page mapping already exists at the specified virtual address. Returns
* KERN_RESOURCE_SHORTAGE if either (1) PMAP_ENTER_NOSLEEP was specified and a
* page table page allocation failed or (2) PMAP_ENTER_NORECLAIM was specified
* and a PV entry allocation failed.
*/
static int
pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags,
vm_page_t m, struct rwlock **lockp)
{
struct spglist free;
pd_entry_t *l2, old_l2;
vm_page_t l2pg, mt;
vm_page_t uwptpg;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(ADDR_IS_CANONICAL(va),
("%s: Address not in canonical form: %lx", __func__, va));
KASSERT((flags & (PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM)) !=
PMAP_ENTER_NORECLAIM,
("pmap_enter_l2: flags is missing PMAP_ENTER_NOREPLACE"));
if ((l2 = pmap_alloc_l2(pmap, va, &l2pg, (flags &
PMAP_ENTER_NOSLEEP) != 0 ? NULL : lockp)) == NULL) {
CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p",
va, pmap);
return (KERN_RESOURCE_SHORTAGE);
}
/*
* If bti is not the same for the whole l2 range, return failure
* and let vm_fault() cope. Check after l2 allocation, since
* it could sleep.
*/
if (!pmap_bti_same(pmap, va, va + L2_SIZE, &new_l2)) {
KASSERT(l2pg != NULL, ("pmap_enter_l2: missing L2 PTP"));
pmap_abort_ptp(pmap, va, l2pg);
return (KERN_PROTECTION_FAILURE);
}
/*
* If there are existing mappings, either abort or remove them.
*/
if ((old_l2 = pmap_load(l2)) != 0) {
KASSERT(l2pg == NULL || l2pg->ref_count > 1,
("pmap_enter_l2: l2pg's ref count is too low"));
if ((flags & PMAP_ENTER_NOREPLACE) != 0) {
if ((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK) {
if (l2pg != NULL)
l2pg->ref_count--;
CTR2(KTR_PMAP,
"pmap_enter_l2: no space for va %#lx"
" in pmap %p", va, pmap);
return (KERN_NO_SPACE);
} else if (ADDR_IS_USER(va) ||
!pmap_every_pte_zero(PTE_TO_PHYS(old_l2))) {
if (l2pg != NULL)
l2pg->ref_count--;
CTR2(KTR_PMAP,
"pmap_enter_l2: failure for va %#lx"
" in pmap %p", va, pmap);
return (KERN_FAILURE);
}
}
SLIST_INIT(&free);
if ((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK) {
(void)pmap_remove_l2(pmap, l2, va,
pmap_load(pmap_l1(pmap, va)), false, &free, lockp);
} else {
if (ADDR_IS_KERNEL(va)) {
/*
* Try to save the ptp in the trie
* before any changes to mappings are
* made. Abort on failure.
*/
mt = PTE_TO_VM_PAGE(old_l2);
if (pmap_insert_pt_page(pmap, mt, false,
false)) {
CTR1(KTR_PMAP,
"pmap_enter_l2: cannot ins kern ptp va %#lx",
va);
return (KERN_RESOURCE_SHORTAGE);
}
/*
* Both pmap_remove_l2() and
* pmap_remove_l3_range() will zero fill
* the L3 kernel page table page.
*/
}
pmap_remove_l3_range(pmap, old_l2, va, va + L2_SIZE,
&free, lockp);
if (ADDR_IS_KERNEL(va)) {
/*
* The TLB could have an intermediate
* entry for the L3 kernel page table
* page, so request an invalidation at
* all levels after clearing the
* L2_TABLE entry.
*/
pmap_clear(l2);
pmap_s1_invalidate_page(pmap, va, false);
}
}
KASSERT(pmap_load(l2) == 0,
("pmap_enter_l2: non-zero L2 entry %p", l2));
if (ADDR_IS_USER(va)) {
vm_page_free_pages_toq(&free, true);
} else {
KASSERT(SLIST_EMPTY(&free),
("pmap_enter_l2: freed kernel page table page"));
}
}
/*
* Allocate leaf ptpage for wired userspace pages.
*/
uwptpg = NULL;
if ((new_l2 & ATTR_SW_WIRED) != 0 && pmap != kernel_pmap) {
uwptpg = vm_page_alloc_noobj(VM_ALLOC_WIRED);
if (uwptpg == NULL) {
pmap_abort_ptp(pmap, va, l2pg);
return (KERN_RESOURCE_SHORTAGE);
}
uwptpg->pindex = pmap_l2_pindex(va);
if (pmap_insert_pt_page(pmap, uwptpg, true, false)) {
vm_page_unwire_noq(uwptpg);
vm_page_free(uwptpg);
pmap_abort_ptp(pmap, va, l2pg);
return (KERN_RESOURCE_SHORTAGE);
}
pmap_resident_count_inc(pmap, 1);
uwptpg->ref_count = NL3PG;
}
if ((new_l2 & ATTR_SW_MANAGED) != 0) {
/*
* Abort this mapping if its PV entry could not be created.
*/
if (!pmap_pv_insert_l2(pmap, va, new_l2, flags, lockp)) {
if (l2pg != NULL)
pmap_abort_ptp(pmap, va, l2pg);
else {
KASSERT(ADDR_IS_KERNEL(va) &&
(pmap_load(l2) & ATTR_DESCR_MASK) ==
L2_TABLE,
("pmap_enter_l2: invalid kernel L2E"));
mt = pmap_remove_pt_page(pmap, va);
KASSERT(mt != NULL,
("pmap_enter_l2: missing kernel PTP"));
}
if (uwptpg != NULL) {
mt = pmap_remove_pt_page(pmap, va);
KASSERT(mt == uwptpg,
("removed pt page %p, expected %p", mt,
uwptpg));
pmap_resident_count_dec(pmap, 1);
uwptpg->ref_count = 1;
vm_page_unwire_noq(uwptpg);
vm_page_free(uwptpg);
}
CTR2(KTR_PMAP,
"pmap_enter_l2: failure for va %#lx in pmap %p",
va, pmap);
return (KERN_RESOURCE_SHORTAGE);
}
if ((new_l2 & ATTR_SW_DBM) != 0)
for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
vm_page_aflag_set(mt, PGA_WRITEABLE);
}
/*
* Increment counters.
*/
if ((new_l2 & ATTR_SW_WIRED) != 0)
pmap->pm_stats.wired_count += L2_SIZE / PAGE_SIZE;
pmap->pm_stats.resident_count += L2_SIZE / PAGE_SIZE;
/*
* Conditionally sync the icache. See pmap_enter() for details.
*/
if ((new_l2 & ATTR_S1_XN) == 0 && (PTE_TO_PHYS(new_l2) !=
PTE_TO_PHYS(old_l2) || (old_l2 & ATTR_S1_XN) != 0) &&
pmap != kernel_pmap && m->md.pv_memattr == VM_MEMATTR_WRITE_BACK) {
cpu_icache_sync_range((void *)PHYS_TO_DMAP(PTE_TO_PHYS(new_l2)),
L2_SIZE);
}
/*
* Map the superpage.
*/
pmap_store(l2, new_l2);
dsb(ishst);
counter_u64_add(pmap_l2_mappings, 1);
CTR2(KTR_PMAP, "pmap_enter_l2: success for va %#lx in pmap %p",
va, pmap);
return (KERN_SUCCESS);
}
/*
* Tries to create a read- and/or execute-only L3C page mapping. Returns
* KERN_SUCCESS if the mapping was created. Otherwise, returns an error
* value.
*/
static int
pmap_enter_l3c_rx(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t *ml3p,
vm_prot_t prot, struct rwlock **lockp)
{
pt_entry_t l3e;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PMAP_ASSERT_STAGE1(pmap);
KASSERT(ADDR_IS_CANONICAL(va),
("%s: Address not in canonical form: %lx", __func__, va));
l3e = VM_PAGE_TO_PTE(m) | pmap_sh_attr |
ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S1_AP_RO) |
ATTR_CONTIGUOUS | L3_PAGE;
if ((m->oflags & VPO_UNMANAGED) == 0)
l3e |= ATTR_SW_MANAGED;
else
l3e |= ATTR_AF;
if ((prot & VM_PROT_EXECUTE) == 0 ||
m->md.pv_memattr == VM_MEMATTR_DEVICE)
l3e |= ATTR_S1_XN;
if (ADDR_IS_USER(va))
l3e |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
else
l3e |= ATTR_S1_UXN;
if (pmap != kernel_pmap)
l3e |= ATTR_S1_nG;
return (pmap_enter_l3c(pmap, va, l3e, PMAP_ENTER_NOSLEEP |
PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, m, ml3p, lockp));
}
static int
pmap_enter_l3c(pmap_t pmap, vm_offset_t va, pt_entry_t l3e, u_int flags,
vm_page_t m, vm_page_t *ml3p, struct rwlock **lockp)
{
pd_entry_t *l2p, *pde;
pt_entry_t *l3p, *tl3p;
vm_page_t mt;
vm_paddr_t pa;
vm_pindex_t l2pindex;
int lvl;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((va & L3C_OFFSET) == 0,
("pmap_enter_l3c: va is not aligned"));
KASSERT(!VA_IS_CLEANMAP(va) || (l3e & ATTR_SW_MANAGED) == 0,
("pmap_enter_l3c: managed mapping within the clean submap"));
KASSERT((l3e & ATTR_CONTIGUOUS) != 0,
("pmap_enter_l3c: l3e is missing ATTR_CONTIGUOUS"));
/*
* If the L3 PTP is not resident, we attempt to create it here.
*/
if (ADDR_IS_USER(va)) {
/*
* Were we given the correct L3 PTP? If so, we can simply
* increment its ref count.
*/
l2pindex = pmap_l2_pindex(va);
if (*ml3p != NULL && (*ml3p)->pindex == l2pindex) {
(*ml3p)->ref_count += L3C_ENTRIES;
} else {
retry:
/*
* Get the L2 entry.
*/
pde = pmap_pde(pmap, va, &lvl);
/*
* If the L2 entry is a superpage, we either abort or
* demote depending on the given flags.
*/
if (lvl == 1) {
l2p = pmap_l1_to_l2(pde, va);
if ((pmap_load(l2p) & ATTR_DESCR_MASK) ==
L2_BLOCK) {
if ((flags & PMAP_ENTER_NOREPLACE) != 0)
return (KERN_FAILURE);
l3p = pmap_demote_l2_locked(pmap, l2p,
va, lockp);
if (l3p != NULL) {
*ml3p = PTE_TO_VM_PAGE(
pmap_load(l2p));
(*ml3p)->ref_count +=
L3C_ENTRIES;
goto have_l3p;
}
}
/* We need to allocate an L3 PTP. */
}
/*
* If the L3 PTP is mapped, we just increment its ref
* count. Otherwise, we attempt to allocate it.
*/
if (lvl == 2 && pmap_load(pde) != 0) {
*ml3p = PTE_TO_VM_PAGE(pmap_load(pde));
(*ml3p)->ref_count += L3C_ENTRIES;
} else {
*ml3p = _pmap_alloc_l3(pmap, l2pindex, (flags &
PMAP_ENTER_NOSLEEP) != 0 ? NULL : lockp);
if (*ml3p == NULL) {
if ((flags & PMAP_ENTER_NOSLEEP) != 0)
return (KERN_FAILURE);
/*
* The page table may have changed
* while we slept.
*/
goto retry;
}
(*ml3p)->ref_count += L3C_ENTRIES - 1;
}
}
l3p = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(*ml3p));
} else {
*ml3p = NULL;
/*
* If the L2 entry is a superpage, we either abort or demote
* depending on the given flags.
*/
pde = pmap_pde(kernel_pmap, va, &lvl);
if (lvl == 1) {
l2p = pmap_l1_to_l2(pde, va);
KASSERT((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK,
("pmap_enter_l3c: missing L2 block"));
if ((flags & PMAP_ENTER_NOREPLACE) != 0)
return (KERN_FAILURE);
l3p = pmap_demote_l2_locked(pmap, l2p, va, lockp);
} else {
KASSERT(lvl == 2,
("pmap_enter_l3c: Invalid level %d", lvl));
l3p = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(
pmap_load(pde)));
}
}
have_l3p:
l3p = &l3p[pmap_l3_index(va)];
/*
* If bti is not the same for the whole L3C range, return failure
* and let vm_fault() cope. Check after L3 allocation, since
* it could sleep.
*/
if (!pmap_bti_same(pmap, va, va + L3C_SIZE, &l3e)) {
KASSERT(*ml3p != NULL, ("pmap_enter_l3c: missing L3 PTP"));
(*ml3p)->ref_count -= L3C_ENTRIES - 1;
pmap_abort_ptp(pmap, va, *ml3p);
*ml3p = NULL;
return (KERN_PROTECTION_FAILURE);
}
/*
* If there are existing mappings, either abort or remove them.
*/
if ((flags & PMAP_ENTER_NOREPLACE) != 0) {
for (tl3p = l3p; tl3p < &l3p[L3C_ENTRIES]; tl3p++) {
if (pmap_load(tl3p) != 0) {
if (*ml3p != NULL)
(*ml3p)->ref_count -= L3C_ENTRIES;
return (KERN_FAILURE);
}
}
} else {
/*
* Because we increment the L3 page's reference count above,
* it is guaranteed not to be freed here and we can pass NULL
* instead of a valid free list.
*/
pmap_remove_l3_range(pmap, pmap_load(pmap_l2(pmap, va)), va,
va + L3C_SIZE, NULL, lockp);
}
/*
* Enter on the PV list if part of our managed memory.
*/
if ((l3e & ATTR_SW_MANAGED) != 0) {
if (!pmap_pv_insert_l3c(pmap, va, m, lockp)) {
if (*ml3p != NULL) {
(*ml3p)->ref_count -= L3C_ENTRIES - 1;
pmap_abort_ptp(pmap, va, *ml3p);
*ml3p = NULL;
}
return (KERN_RESOURCE_SHORTAGE);
}
if ((l3e & ATTR_SW_DBM) != 0)
for (mt = m; mt < &m[L3C_ENTRIES]; mt++)
vm_page_aflag_set(mt, PGA_WRITEABLE);
}
/*
* Increment counters.
*/
if ((l3e & ATTR_SW_WIRED) != 0)
pmap->pm_stats.wired_count += L3C_ENTRIES;
pmap_resident_count_inc(pmap, L3C_ENTRIES);
pa = VM_PAGE_TO_PHYS(m);
KASSERT((pa & L3C_OFFSET) == 0, ("pmap_enter_l3c: pa is not aligned"));
/*
* Sync the icache before the mapping is stored.
*/
if ((l3e & ATTR_S1_XN) == 0 && pmap != kernel_pmap &&
m->md.pv_memattr == VM_MEMATTR_WRITE_BACK)
cpu_icache_sync_range((void *)PHYS_TO_DMAP(pa), L3C_SIZE);
/*
* Map the superpage.
*/
for (tl3p = l3p; tl3p < &l3p[L3C_ENTRIES]; tl3p++) {
pmap_store(tl3p, l3e);
l3e += L3_SIZE;
}
dsb(ishst);
counter_u64_add(pmap_l3c_mappings, 1);
CTR2(KTR_PMAP, "pmap_enter_l3c: success for va %#lx in pmap %p",
va, pmap);
return (KERN_SUCCESS);
}
/*
* Maps a sequence of resident pages belonging to the same object.
* The sequence begins with the given page m_start. This page is
* mapped at the given virtual address start. Each subsequent page is
* mapped at a virtual address that is offset from start by the same
* amount as the page is offset from m_start within the object. The
* last page in the sequence is the page with the largest offset from
* m_start that can be mapped at a virtual address less than the given
* virtual address end. Not every virtual page between start and end
* is mapped; only those for which a resident page exists with the
* corresponding offset from m_start are mapped.
*/
void
pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
vm_page_t m_start, vm_prot_t prot)
{
struct pctrie_iter pages;
struct rwlock *lock;
vm_offset_t va;
vm_page_t m, mpte;
int rv;
VM_OBJECT_ASSERT_LOCKED(m_start->object);
mpte = NULL;
vm_page_iter_limit_init(&pages, m_start->object,
m_start->pindex + atop(end - start));
m = vm_radix_iter_lookup(&pages, m_start->pindex);
lock = NULL;
PMAP_LOCK(pmap);
while (m != NULL) {
va = start + ptoa(m->pindex - m_start->pindex);
if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end &&
m->psind == 2 && pmap_ps_enabled(pmap) &&
((rv = pmap_enter_l2_rx(pmap, va, m, prot, &lock)) ==
KERN_SUCCESS || rv == KERN_NO_SPACE)) {
m = vm_radix_iter_jump(&pages, L2_SIZE / PAGE_SIZE);
} else if ((va & L3C_OFFSET) == 0 && va + L3C_SIZE <= end &&
m->psind >= 1 && pmap_ps_enabled(pmap) &&
((rv = pmap_enter_l3c_rx(pmap, va, m, &mpte, prot,
&lock)) == KERN_SUCCESS || rv == KERN_NO_SPACE)) {
m = vm_radix_iter_jump(&pages, L3C_ENTRIES);
} else {
/*
* In general, if a superpage mapping were possible,
* it would have been created above. That said, if
* start and end are not superpage aligned, then
* promotion might be possible at the ends of [start,
* end). However, in practice, those promotion
* attempts are so unlikely to succeed that they are
* not worth trying.
*/
mpte = pmap_enter_quick_locked(pmap, va, m, prot |
VM_PROT_NO_PROMOTE, mpte, &lock);
m = vm_radix_iter_step(&pages);
}
}
if (lock != NULL)
rw_wunlock(lock);
PMAP_UNLOCK(pmap);
}
/*
* this code makes some *MAJOR* assumptions:
* 1. Current pmap & pmap exists.
* 2. Not wired.
* 3. Read access.
* 4. No page table pages.
* but is *MUCH* faster than pmap_enter...
*/
void
pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
{
struct rwlock *lock;
lock = NULL;
PMAP_LOCK(pmap);
(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
if (lock != NULL)
rw_wunlock(lock);
PMAP_UNLOCK(pmap);
}
static vm_page_t
pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
{
pt_entry_t *l1, *l2, *l3, l3_val;
vm_paddr_t pa;
int full_lvl, lvl;
KASSERT(!VA_IS_CLEANMAP(va) ||
(m->oflags & VPO_UNMANAGED) != 0,
("pmap_enter_quick_locked: managed mapping within the clean submap"));
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PMAP_ASSERT_STAGE1(pmap);
KASSERT(ADDR_IS_CANONICAL(va),
("%s: Address not in canonical form: %lx", __func__, va));
l2 = NULL;
CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va);
/*
* In the case that a page table page is not
* resident, we are creating it here.
*/
if (ADDR_IS_USER(va)) {
vm_pindex_t l2pindex;
/*
* Calculate pagetable page index
*/
l2pindex = pmap_l2_pindex(va);
if (mpte && (mpte->pindex == l2pindex)) {
mpte->ref_count++;
} else {
/*
* If the page table page is mapped, we just increment
* the hold count, and activate it. Otherwise, we
* attempt to allocate a page table page, passing NULL
* instead of the PV list lock pointer because we don't
* intend to sleep. If this attempt fails, we don't
* retry. Instead, we give up.
*/
l1 = pmap_l1(pmap, va);
if (l1 != NULL && pmap_load(l1) != 0) {
if ((pmap_load(l1) & ATTR_DESCR_MASK) ==
L1_BLOCK)
return (NULL);
l2 = pmap_l1_to_l2(l1, va);
if (pmap_load(l2) != 0) {
if ((pmap_load(l2) & ATTR_DESCR_MASK) ==
L2_BLOCK)
return (NULL);
mpte = PTE_TO_VM_PAGE(pmap_load(l2));
mpte->ref_count++;
} else {
mpte = _pmap_alloc_l3(pmap, l2pindex,
NULL);
if (mpte == NULL)
return (mpte);
}
} else {
mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
if (mpte == NULL)
return (mpte);
}
}
l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
l3 = &l3[pmap_l3_index(va)];
} else {
mpte = NULL;
l2 = pmap_pde(kernel_pmap, va, &lvl);
KASSERT(l2 != NULL,
("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx",
va));
KASSERT(lvl == 2,
("pmap_enter_quick_locked: Invalid level %d", lvl));
l3 = pmap_l2_to_l3(l2, va);
}
/*
* Abort if a mapping already exists.
*/
if (pmap_load(l3) != 0) {
if (mpte != NULL)
mpte->ref_count--;
return (NULL);
}
/*
* Enter on the PV list if part of our managed memory.
*/
if ((m->oflags & VPO_UNMANAGED) == 0 &&
!pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
if (mpte != NULL)
pmap_abort_ptp(pmap, va, mpte);
return (NULL);
}
/*
* Increment counters
*/
pmap_resident_count_inc(pmap, 1);
pa = VM_PAGE_TO_PHYS(m);
l3_val = PHYS_TO_PTE(pa) | pmap_sh_attr |
ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S1_AP_RO) | L3_PAGE;
l3_val |= pmap_pte_bti(pmap, va);
if ((prot & VM_PROT_EXECUTE) == 0 ||
m->md.pv_memattr == VM_MEMATTR_DEVICE)
l3_val |= ATTR_S1_XN;
if (ADDR_IS_USER(va))
l3_val |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
else
l3_val |= ATTR_S1_UXN;
if (pmap != kernel_pmap)
l3_val |= ATTR_S1_nG;
/*
* Now validate mapping with RO protection
*/
if ((m->oflags & VPO_UNMANAGED) == 0)
l3_val |= ATTR_SW_MANAGED;
else
l3_val |= ATTR_AF;
/* Sync icache before the mapping is stored to PTE */
if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap &&
m->md.pv_memattr == VM_MEMATTR_WRITE_BACK)
cpu_icache_sync_range((void *)PHYS_TO_DMAP(pa), PAGE_SIZE);
pmap_store(l3, l3_val);
dsb(ishst);
#if VM_NRESERVLEVEL > 0
/*
* First, attempt L3C promotion, if the virtual and physical addresses
* are aligned with each other and an underlying reservation has the
* neighboring L3 pages allocated. The first condition is simply an
* optimization that recognizes some eventual promotion failures early
* at a lower run-time cost. Then, attempt L2 promotion, if both a
* level 1 reservation and the PTP are fully populated.
*/
if ((prot & VM_PROT_NO_PROMOTE) == 0 &&
(va & L3C_OFFSET) == (pa & L3C_OFFSET) &&
(m->flags & PG_FICTITIOUS) == 0 &&
(full_lvl = vm_reserv_level_iffullpop(m)) >= 0 &&
pmap_promote_l3c(pmap, l3, va) &&
full_lvl == 1 && (mpte == NULL || mpte->ref_count == NL3PG)) {
if (l2 == NULL)
l2 = pmap_l2(pmap, va);
/*
* If promotion succeeds, then the next call to this function
* should not be given the unmapped PTP as a hint.
*/
if (pmap_promote_l2(pmap, l2, va, mpte, lockp))
mpte = NULL;
}
#endif
return (mpte);
}
/*
* This code maps large physical mmap regions into the
* processor address space. Note that some shortcuts
* are taken, but the code works.
*/
void
pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
vm_pindex_t pindex, vm_size_t size)
{
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
("pmap_object_init_pt: non-device object"));
}
/*
* Clear the wired attribute from the mappings for the specified range of
* addresses in the given pmap. Every valid mapping within that range
* must have the wired attribute set. In contrast, invalid mappings
* cannot have the wired attribute set, so they are ignored.
*
* The wired attribute of the page table entry is not a hardware feature,
* so there is no need to invalidate any TLB entries.
*/
void
pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
vm_offset_t va_next;
pd_entry_t *l0, *l1, *l2;
pt_entry_t *l3;
bool partial_l3c;
PMAP_LOCK(pmap);
for (; sva < eva; sva = va_next) {
l0 = pmap_l0(pmap, sva);
if (pmap_load(l0) == 0) {
va_next = (sva + L0_SIZE) & ~L0_OFFSET;
if (va_next < sva)
va_next = eva;
continue;
}
l1 = pmap_l0_to_l1(l0, sva);
va_next = (sva + L1_SIZE) & ~L1_OFFSET;
if (va_next < sva)
va_next = eva;
if (pmap_load(l1) == 0)
continue;
if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
KASSERT(va_next <= eva,
("partial update of non-transparent 1G page "
"l1 %#lx sva %#lx eva %#lx va_next %#lx",
pmap_load(l1), sva, eva, va_next));
MPASS(pmap != kernel_pmap);
MPASS((pmap_load(l1) & (ATTR_SW_MANAGED |
ATTR_SW_WIRED)) == ATTR_SW_WIRED);
pmap_clear_bits(l1, ATTR_SW_WIRED);
pmap->pm_stats.wired_count -= L1_SIZE / PAGE_SIZE;
continue;
}
va_next = (sva + L2_SIZE) & ~L2_OFFSET;
if (va_next < sva)
va_next = eva;
l2 = pmap_l1_to_l2(l1, sva);
if (pmap_load(l2) == 0)
continue;
if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) {
if ((pmap_load(l2) & ATTR_SW_WIRED) == 0)
panic("pmap_unwire: l2 %#jx is missing "
"ATTR_SW_WIRED", (uintmax_t)pmap_load(l2));
/*
* Are we unwiring the entire large page? If not,
* demote the mapping and fall through.
*/
if (sva + L2_SIZE == va_next && eva >= va_next) {
pmap_clear_bits(l2, ATTR_SW_WIRED);
pmap->pm_stats.wired_count -= L2_SIZE /
PAGE_SIZE;
continue;
} else if (pmap_demote_l2(pmap, l2, sva) == NULL)
panic("pmap_unwire: demotion failed");
}
KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE,
("pmap_unwire: Invalid l2 entry after demotion"));
if (va_next > eva)
va_next = eva;
for (partial_l3c = true, l3 = pmap_l2_to_l3(l2, sva);
sva != va_next; l3++, sva += L3_SIZE) {
if (pmap_load(l3) == 0)
continue;
if ((pmap_load(l3) & ATTR_CONTIGUOUS) != 0) {
/*
* Avoid demotion for whole-page unwiring.
*/
if ((sva & L3C_OFFSET) == 0) {
/*
* Handle the possibility that
* "va_next" is zero because of
* address wraparound.
*/
partial_l3c = sva + L3C_OFFSET >
va_next - 1;
}
if (partial_l3c)
(void)pmap_demote_l3c(pmap, l3, sva);
}
if ((pmap_load(l3) & ATTR_SW_WIRED) == 0)
panic("pmap_unwire: l3 %#jx is missing "
"ATTR_SW_WIRED", (uintmax_t)pmap_load(l3));
/*
* ATTR_SW_WIRED must be cleared atomically. Although
* the pmap lock synchronizes access to ATTR_SW_WIRED,
* the System MMU may write to the entry concurrently.
*/
pmap_clear_bits(l3, ATTR_SW_WIRED);
pmap->pm_stats.wired_count--;
}
}
PMAP_UNLOCK(pmap);
}
/*
* This function requires that the caller has already added one to ml3's
* ref_count in anticipation of creating a 4KB page mapping.
*/
static bool
pmap_copy_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va, pt_entry_t l3e,
vm_page_t ml3, struct rwlock **lockp)
{
pt_entry_t *tl3p;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((va & L3C_OFFSET) == 0,
("pmap_copy_l3c: va is not aligned"));
KASSERT((l3e & ATTR_SW_MANAGED) != 0,
("pmap_copy_l3c: l3e is not managed"));
/*
* Abort if a mapping already exists.
*/
for (tl3p = l3p; tl3p < &l3p[L3C_ENTRIES]; tl3p++)
if (pmap_load(tl3p) != 0) {
if (ml3 != NULL)
ml3->ref_count--;
return (false);
}
if (!pmap_pv_insert_l3c(pmap, va, PTE_TO_VM_PAGE(l3e), lockp)) {
if (ml3 != NULL)
pmap_abort_ptp(pmap, va, ml3);
return (false);
}
ml3->ref_count += L3C_ENTRIES - 1;
/*
* Clear the wired and accessed bits. However, leave the dirty bit
* unchanged because read/write superpage mappings are required to be
* dirty.
*/
l3e &= ~(ATTR_SW_WIRED | ATTR_AF);
for (tl3p = l3p; tl3p < &l3p[L3C_ENTRIES]; tl3p++) {
pmap_store(tl3p, l3e);
l3e += L3_SIZE;
}
pmap_resident_count_inc(pmap, L3C_ENTRIES);
counter_u64_add(pmap_l3c_mappings, 1);
CTR2(KTR_PMAP, "pmap_copy_l3c: success for va %#lx in pmap %p",
va, pmap);
return (true);
}
/*
* Copy the range specified by src_addr/len
* from the source map to the range dst_addr/len
* in the destination map.
*
* This routine is only advisory and need not do anything.
*
* Because the executable mappings created by this routine are copied,
* it should not have to flush the instruction cache.
*/
void
pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t src_addr)
{
struct rwlock *lock;
pd_entry_t *l0, *l1, *l2, srcptepaddr;
pt_entry_t *dst_pte, mask, nbits, ptetemp, *src_pte;
vm_offset_t addr, end_addr, va_next;
vm_page_t dst_m, dstmpte, srcmpte;
PMAP_ASSERT_STAGE1(dst_pmap);
PMAP_ASSERT_STAGE1(src_pmap);
if (dst_addr != src_addr)
return;
end_addr = src_addr + len;
lock = NULL;
if (dst_pmap < src_pmap) {
PMAP_LOCK(dst_pmap);
PMAP_LOCK(src_pmap);
} else {
PMAP_LOCK(src_pmap);
PMAP_LOCK(dst_pmap);
}
for (addr = src_addr; addr < end_addr; addr = va_next) {
l0 = pmap_l0(src_pmap, addr);
if (pmap_load(l0) == 0) {
va_next = (addr + L0_SIZE) & ~L0_OFFSET;
if (va_next < addr)
va_next = end_addr;
continue;
}
va_next = (addr + L1_SIZE) & ~L1_OFFSET;
if (va_next < addr)
va_next = end_addr;
l1 = pmap_l0_to_l1(l0, addr);
if (pmap_load(l1) == 0)
continue;
if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
KASSERT(va_next <= end_addr,
("partial update of non-transparent 1G page "
"l1 %#lx addr %#lx end_addr %#lx va_next %#lx",
pmap_load(l1), addr, end_addr, va_next));
srcptepaddr = pmap_load(l1);
l1 = pmap_l1(dst_pmap, addr);
if (l1 == NULL) {
if (_pmap_alloc_l3(dst_pmap,
pmap_l0_pindex(addr), NULL) == NULL)
break;
l1 = pmap_l1(dst_pmap, addr);
} else {
l0 = pmap_l0(dst_pmap, addr);
dst_m = PTE_TO_VM_PAGE(pmap_load(l0));
dst_m->ref_count++;
}
KASSERT(pmap_load(l1) == 0,
("1G mapping present in dst pmap "
"l1 %#lx addr %#lx end_addr %#lx va_next %#lx",
pmap_load(l1), addr, end_addr, va_next));
pmap_store(l1, srcptepaddr & ~ATTR_SW_WIRED);
pmap_resident_count_inc(dst_pmap, L1_SIZE / PAGE_SIZE);
continue;
}
va_next = (addr + L2_SIZE) & ~L2_OFFSET;
if (va_next < addr)
va_next = end_addr;
l2 = pmap_l1_to_l2(l1, addr);
srcptepaddr = pmap_load(l2);
if (srcptepaddr == 0)
continue;
if ((srcptepaddr & ATTR_DESCR_MASK) == L2_BLOCK) {
/*
* We can only virtual copy whole superpages.
*/
if ((addr & L2_OFFSET) != 0 ||
addr + L2_SIZE > end_addr)
continue;
l2 = pmap_alloc_l2(dst_pmap, addr, &dst_m, NULL);
if (l2 == NULL)
break;
if (pmap_load(l2) == 0 &&
((srcptepaddr & ATTR_SW_MANAGED) == 0 ||
pmap_pv_insert_l2(dst_pmap, addr, srcptepaddr,
PMAP_ENTER_NORECLAIM, &lock))) {
/*
* We leave the dirty bit unchanged because
* managed read/write superpage mappings are
* required to be dirty. However, managed
* superpage mappings are not required to
* have their accessed bit set, so we clear
* it because we don't know if this mapping
* will be used.
*/
srcptepaddr &= ~ATTR_SW_WIRED;
if ((srcptepaddr & ATTR_SW_MANAGED) != 0)
srcptepaddr &= ~ATTR_AF;
pmap_store(l2, srcptepaddr);
pmap_resident_count_inc(dst_pmap, L2_SIZE /
PAGE_SIZE);
counter_u64_add(pmap_l2_mappings, 1);
} else
pmap_abort_ptp(dst_pmap, addr, dst_m);
continue;
}
KASSERT((srcptepaddr & ATTR_DESCR_MASK) == L2_TABLE,
("pmap_copy: invalid L2 entry"));
srcmpte = PTE_TO_VM_PAGE(srcptepaddr);
KASSERT(srcmpte->ref_count > 0,
("pmap_copy: source page table page is unused"));
if (va_next > end_addr)
va_next = end_addr;
src_pte = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(srcptepaddr));
src_pte = &src_pte[pmap_l3_index(addr)];
dstmpte = NULL;
for (; addr < va_next; addr += PAGE_SIZE, src_pte++) {
ptetemp = pmap_load(src_pte);
/*
* We only virtual copy managed pages.
*/
if ((ptetemp & ATTR_SW_MANAGED) == 0)
continue;
if (dstmpte != NULL) {
KASSERT(dstmpte->pindex == pmap_l2_pindex(addr),
("dstmpte pindex/addr mismatch"));
dstmpte->ref_count++;
} else if ((dstmpte = pmap_alloc_l3(dst_pmap, addr,
NULL)) == NULL)
goto out;
dst_pte = (pt_entry_t *)
PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte));
dst_pte = &dst_pte[pmap_l3_index(addr)];
if ((ptetemp & ATTR_CONTIGUOUS) != 0 && (addr &
L3C_OFFSET) == 0 && addr + L3C_OFFSET <=
va_next - 1) {
if (!pmap_copy_l3c(dst_pmap, dst_pte, addr,
ptetemp, dstmpte, &lock))
goto out;
addr += L3C_SIZE - PAGE_SIZE;
src_pte += L3C_ENTRIES - 1;
} else if (pmap_load(dst_pte) == 0 &&
pmap_try_insert_pv_entry(dst_pmap, addr,
PTE_TO_VM_PAGE(ptetemp), &lock)) {
/*
* Clear the wired, contiguous, modified, and
* accessed bits from the destination PTE.
* The contiguous bit is cleared because we
* are not copying the entire L3C superpage.
*/
mask = ATTR_SW_WIRED | ATTR_CONTIGUOUS |
ATTR_AF;
nbits = 0;
if ((ptetemp & ATTR_SW_DBM) != 0)
nbits |= ATTR_S1_AP_RW_BIT;
pmap_store(dst_pte, (ptetemp & ~mask) | nbits);
pmap_resident_count_inc(dst_pmap, 1);
} else {
pmap_abort_ptp(dst_pmap, addr, dstmpte);
goto out;
}
/* Have we copied all of the valid mappings? */
if (dstmpte->ref_count >= srcmpte->ref_count)
break;
}
}
out:
/*
* XXX This barrier may not be needed because the destination pmap is
* not active.
*/
dsb(ishst);
if (lock != NULL)
rw_wunlock(lock);
PMAP_UNLOCK(src_pmap);
PMAP_UNLOCK(dst_pmap);
}
int
pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap)
{
int error;
if (dst_pmap->pm_stage != src_pmap->pm_stage)
return (EINVAL);
if (dst_pmap->pm_stage != PM_STAGE1 || src_pmap->pm_bti == NULL)
return (0);
for (;;) {
if (dst_pmap < src_pmap) {
PMAP_LOCK(dst_pmap);
PMAP_LOCK(src_pmap);
} else {
PMAP_LOCK(src_pmap);
PMAP_LOCK(dst_pmap);
}
error = pmap_bti_copy(dst_pmap, src_pmap);
/* Clean up partial copy on failure due to no memory. */
if (error == ENOMEM)
pmap_bti_deassign_all(dst_pmap);
PMAP_UNLOCK(src_pmap);
PMAP_UNLOCK(dst_pmap);
if (error != ENOMEM)
break;
vm_wait(NULL);
}
return (error);
}
/*
* pmap_zero_page zeros the specified hardware page by mapping
* the page into KVM and using bzero to clear its contents.
*/
void
pmap_zero_page(vm_page_t m)
{
vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
pagezero((void *)va);
}
/*
* pmap_zero_page_area zeros the specified hardware page by mapping
* the page into KVM and using bzero to clear its contents.
*
* off and size may not cover an area beyond a single hardware page.
*/
void
pmap_zero_page_area(vm_page_t m, int off, int size)
{
vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
if (off == 0 && size == PAGE_SIZE)
pagezero((void *)va);
else
bzero((char *)va + off, size);
}
/*
* pmap_copy_page copies the specified (machine independent)
* page by mapping the page into virtual memory and using
* bcopy to copy the page, one machine dependent page at a
* time.
*/
void
pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
{
vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
pagecopy((void *)src, (void *)dst);
}
int unmapped_buf_allowed = 1;
void
pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
vm_offset_t b_offset, int xfersize)
{
void *a_cp, *b_cp;
vm_page_t m_a, m_b;
vm_paddr_t p_a, p_b;
vm_offset_t a_pg_offset, b_pg_offset;
int cnt;
while (xfersize > 0) {
a_pg_offset = a_offset & PAGE_MASK;
m_a = ma[a_offset >> PAGE_SHIFT];
p_a = m_a->phys_addr;
b_pg_offset = b_offset & PAGE_MASK;
m_b = mb[b_offset >> PAGE_SHIFT];
p_b = m_b->phys_addr;
cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
cnt = min(cnt, PAGE_SIZE - b_pg_offset);
if (__predict_false(!PHYS_IN_DMAP(p_a))) {
panic("!DMAP a %lx", p_a);
} else {
a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
}
if (__predict_false(!PHYS_IN_DMAP(p_b))) {
panic("!DMAP b %lx", p_b);
} else {
b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
}
bcopy(a_cp, b_cp, cnt);
a_offset += cnt;
b_offset += cnt;
xfersize -= cnt;
}
}
vm_offset_t
pmap_quick_enter_page(vm_page_t m)
{
return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
}
void
pmap_quick_remove_page(vm_offset_t addr)
{
}
/*
* Returns true if the pmap's pv is one of the first
* 16 pvs linked to from this page. This count may
* be changed upwards or downwards in the future; it
* is only necessary that true be returned for a small
* subset of pmaps for proper page aging.
*/
bool
pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
{
struct md_page *pvh;
struct rwlock *lock;
pv_entry_t pv;
int loops = 0;
bool rv;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_page_exists_quick: page %p is not managed", m));
rv = false;
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
if (PV_PMAP(pv) == pmap) {
rv = true;
break;
}
loops++;
if (loops >= 16)
break;
}
if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) {
pvh = page_to_pvh(m);
TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
if (PV_PMAP(pv) == pmap) {
rv = true;
break;
}
loops++;
if (loops >= 16)
break;
}
}
rw_runlock(lock);
return (rv);
}
/*
* pmap_page_wired_mappings:
*
* Return the number of managed mappings to the given physical page
* that are wired.
*/
int
pmap_page_wired_mappings(vm_page_t m)
{
struct rwlock *lock;
struct md_page *pvh;
pmap_t pmap;
pt_entry_t *pte;
pv_entry_t pv;
int count, md_gen, pvh_gen;
if ((m->oflags & VPO_UNMANAGED) != 0)
return (0);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
restart:
count = 0;
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
rw_runlock(lock);
PMAP_LOCK(pmap);
rw_rlock(lock);
if (md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
}
}
pte = pmap_pte_exists(pmap, pv->pv_va, 3, __func__);
if ((pmap_load(pte) & ATTR_SW_WIRED) != 0)
count++;
PMAP_UNLOCK(pmap);
}
if ((m->flags & PG_FICTITIOUS) == 0) {
pvh = page_to_pvh(m);
TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
pvh_gen = pvh->pv_gen;
rw_runlock(lock);
PMAP_LOCK(pmap);
rw_rlock(lock);
if (md_gen != m->md.pv_gen ||
pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
}
}
pte = pmap_pte_exists(pmap, pv->pv_va, 2, __func__);
if ((pmap_load(pte) & ATTR_SW_WIRED) != 0)
count++;
PMAP_UNLOCK(pmap);
}
}
rw_runlock(lock);
return (count);
}
/*
* Returns true if the given page is mapped individually or as part of
* a 2mpage. Otherwise, returns false.
*/
bool
pmap_page_is_mapped(vm_page_t m)
{
struct rwlock *lock;
bool rv;
if ((m->oflags & VPO_UNMANAGED) != 0)
return (false);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
rv = !TAILQ_EMPTY(&m->md.pv_list) ||
((m->flags & PG_FICTITIOUS) == 0 &&
!TAILQ_EMPTY(&page_to_pvh(m)->pv_list));
rw_runlock(lock);
return (rv);
}
/*
* Destroy all managed, non-wired mappings in the given user-space
* pmap. This pmap cannot be active on any processor besides the
* caller.
*
* This function cannot be applied to the kernel pmap. Moreover, it
* is not intended for general use. It is only to be used during
* process termination. Consequently, it can be implemented in ways
* that make it faster than pmap_remove(). First, it can more quickly
* destroy mappings by iterating over the pmap's collection of PV
* entries, rather than searching the page table. Second, it doesn't
* have to test and clear the page table entries atomically, because
* no processor is currently accessing the user address space. In
* particular, a page table entry's dirty bit won't change state once
* this function starts.
*/
void
pmap_remove_pages(pmap_t pmap)
{
pd_entry_t *pde;
pt_entry_t *pte, tpte;
struct spglist free;
struct pv_chunklist free_chunks[PMAP_MEMDOM];
vm_page_t m, ml3, mt;
pv_entry_t pv;
struct md_page *pvh;
struct pv_chunk *pc, *npc;
struct rwlock *lock;
int64_t bit;
uint64_t inuse, bitmask;
int allfree, field, i, idx, lvl;
int freed __pvused;
vm_paddr_t pa;
lock = NULL;
for (i = 0; i < PMAP_MEMDOM; i++)
TAILQ_INIT(&free_chunks[i]);
SLIST_INIT(&free);
PMAP_LOCK(pmap);
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
allfree = 1;
freed = 0;
for (field = 0; field < _NPCM; field++) {
inuse = ~pc->pc_map[field] & pc_freemask[field];
while (inuse != 0) {
bit = ffsl(inuse) - 1;
bitmask = 1UL << bit;
idx = field * 64 + bit;
pv = &pc->pc_pventry[idx];
inuse &= ~bitmask;
pde = pmap_pde(pmap, pv->pv_va, &lvl);
KASSERT(pde != NULL,
("Attempting to remove an unmapped page"));
switch(lvl) {
case 1:
pte = pmap_l1_to_l2(pde, pv->pv_va);
tpte = pmap_load(pte);
KASSERT((tpte & ATTR_DESCR_MASK) ==
L2_BLOCK,
("Attempting to remove an invalid "
"block: %lx", tpte));
break;
case 2:
pte = pmap_l2_to_l3(pde, pv->pv_va);
tpte = pmap_load(pte);
KASSERT((tpte & ATTR_DESCR_MASK) ==
L3_PAGE,
("Attempting to remove an invalid "
"page: %lx", tpte));
break;
default:
panic(
"Invalid page directory level: %d",
lvl);
}
/*
* We cannot remove wired mappings at this time.
*
* For L3C superpages, all of the constituent PTEs
* should have the wired bit set, so we don't
* check for ATTR_CONTIGUOUS here.
*/
if (tpte & ATTR_SW_WIRED) {
allfree = 0;
continue;
}
/* Mark free */
pc->pc_map[field] |= bitmask;
/*
* Because this pmap is not active on other
* processors, the dirty bit cannot have
* changed state since we last loaded pte.
*/
pmap_clear(pte);
pa = PTE_TO_PHYS(tpte);
m = PHYS_TO_VM_PAGE(pa);
KASSERT(m->phys_addr == pa,
("vm_page_t %p phys_addr mismatch %016jx %016jx",
m, (uintmax_t)m->phys_addr,
(uintmax_t)tpte));
KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
m < &vm_page_array[vm_page_array_size],
("pmap_remove_pages: bad pte %#jx",
(uintmax_t)tpte));
/*
* Update the vm_page_t clean/reference bits.
*
* We don't check for ATTR_CONTIGUOUS here
* because writeable L3C superpages are expected
* to be dirty, i.e., every constituent PTE
* should be dirty.
*/
if (pmap_pte_dirty(pmap, tpte)) {
switch (lvl) {
case 1:
for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
vm_page_dirty(mt);
break;
case 2:
vm_page_dirty(m);
break;
}
}
CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
switch (lvl) {
case 1:
pmap_resident_count_dec(pmap,
L2_SIZE / PAGE_SIZE);
pvh = page_to_pvh(m);
TAILQ_REMOVE(&pvh->pv_list, pv,pv_next);
pvh->pv_gen++;
if (TAILQ_EMPTY(&pvh->pv_list)) {
for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
if ((mt->a.flags & PGA_WRITEABLE) != 0 &&
TAILQ_EMPTY(&mt->md.pv_list))
vm_page_aflag_clear(mt, PGA_WRITEABLE);
}
ml3 = pmap_remove_pt_page(pmap,
pv->pv_va);
if (ml3 != NULL) {
KASSERT(vm_page_any_valid(ml3),
("pmap_remove_pages: l3 page not promoted"));
pmap_resident_count_dec(pmap,1);
KASSERT(ml3->ref_count == NL3PG,
("pmap_remove_pages: l3 page ref count error"));
ml3->ref_count = 0;
pmap_add_delayed_free_list(ml3,
&free, false);
}
break;
case 2:
pmap_resident_count_dec(pmap, 1);
TAILQ_REMOVE(&m->md.pv_list, pv,
pv_next);
m->md.pv_gen++;
if ((m->a.flags & PGA_WRITEABLE) != 0 &&
TAILQ_EMPTY(&m->md.pv_list) &&
(m->flags & PG_FICTITIOUS) == 0) {
pvh = page_to_pvh(m);
if (TAILQ_EMPTY(&pvh->pv_list))
vm_page_aflag_clear(m,
PGA_WRITEABLE);
}
break;
}
pmap_unuse_pt(pmap, pv->pv_va, pmap_load(pde),
&free);
freed++;
}
}
PV_STAT(atomic_add_long(&pv_entry_frees, freed));
PV_STAT(atomic_add_int(&pv_entry_spare, freed));
PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
if (allfree) {
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
TAILQ_INSERT_TAIL(&free_chunks[pc_to_domain(pc)], pc,
pc_list);
}
}
if (lock != NULL)
rw_wunlock(lock);
pmap_invalidate_all(pmap);
pmap_bti_deassign_all(pmap);
free_pv_chunk_batch(free_chunks);
PMAP_UNLOCK(pmap);
vm_page_free_pages_toq(&free, true);
}
/*
* This is used to check if a page has been accessed or modified.
*/
static bool
pmap_page_test_mappings(vm_page_t m, bool accessed, bool modified)
{
struct rwlock *lock;
pv_entry_t pv;
struct md_page *pvh;
pt_entry_t l3e, mask, *pte, value;
pmap_t pmap;
int md_gen, pvh_gen;
bool rv;
rv = false;
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
restart:
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
PMAP_ASSERT_STAGE1(pmap);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
rw_runlock(lock);
PMAP_LOCK(pmap);
rw_rlock(lock);
if (md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
}
}
pte = pmap_pte_exists(pmap, pv->pv_va, 3, __func__);
mask = 0;
value = 0;
if (modified) {
mask |= ATTR_S1_AP_RW_BIT;
value |= ATTR_S1_AP(ATTR_S1_AP_RW);
}
if (accessed) {
mask |= ATTR_AF | ATTR_DESCR_MASK;
value |= ATTR_AF | L3_PAGE;
}
l3e = pmap_load(pte);
if ((l3e & ATTR_CONTIGUOUS) != 0)
l3e = pmap_load_l3c(pte);
PMAP_UNLOCK(pmap);
rv = (l3e & mask) == value;
if (rv)
goto out;
}
if ((m->flags & PG_FICTITIOUS) == 0) {
pvh = page_to_pvh(m);
TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
pmap = PV_PMAP(pv);
PMAP_ASSERT_STAGE1(pmap);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
pvh_gen = pvh->pv_gen;
rw_runlock(lock);
PMAP_LOCK(pmap);
rw_rlock(lock);
if (md_gen != m->md.pv_gen ||
pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
}
}
pte = pmap_pte_exists(pmap, pv->pv_va, 2, __func__);
mask = 0;
value = 0;
if (modified) {
mask |= ATTR_S1_AP_RW_BIT;
value |= ATTR_S1_AP(ATTR_S1_AP_RW);
}
if (accessed) {
mask |= ATTR_AF | ATTR_DESCR_MASK;
value |= ATTR_AF | L2_BLOCK;
}
rv = (pmap_load(pte) & mask) == value;
PMAP_UNLOCK(pmap);
if (rv)
goto out;
}
}
out:
rw_runlock(lock);
return (rv);
}
/*
* pmap_is_modified:
*
* Return whether or not the specified physical page was modified
* in any physical maps.
*/
bool
pmap_is_modified(vm_page_t m)
{
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_is_modified: page %p is not managed", m));
/*
* If the page is not busied then this check is racy.
*/
if (!pmap_page_is_write_mapped(m))
return (false);
return (pmap_page_test_mappings(m, false, true));
}
/*
* pmap_is_prefaultable:
*
* Return whether or not the specified virtual address is eligible
* for prefault.
*/
bool
pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
{
pd_entry_t *pde;
pt_entry_t *pte;
bool rv;
int lvl;
/*
* Return true if and only if the L3 entry for the specified virtual
* address is allocated but invalid.
*/
rv = false;
PMAP_LOCK(pmap);
pde = pmap_pde(pmap, addr, &lvl);
if (pde != NULL && lvl == 2) {
pte = pmap_l2_to_l3(pde, addr);
rv = pmap_load(pte) == 0;
}
PMAP_UNLOCK(pmap);
return (rv);
}
/*
* pmap_is_referenced:
*
* Return whether or not the specified physical page was referenced
* in any physical maps.
*/
bool
pmap_is_referenced(vm_page_t m)
{
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_is_referenced: page %p is not managed", m));
return (pmap_page_test_mappings(m, true, false));
}
/*
* Clear the write and modified bits in each of the given page's mappings.
*/
void
pmap_remove_write(vm_page_t m)
{
struct md_page *pvh;
pmap_t pmap;
struct rwlock *lock;
pv_entry_t next_pv, pv;
pt_entry_t oldpte, *pte, set, clear, mask, val;
vm_offset_t va;
int md_gen, pvh_gen;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_remove_write: page %p is not managed", m));
vm_page_assert_busied(m);
if (!pmap_page_is_write_mapped(m))
return;
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : page_to_pvh(m);
rw_wlock(lock);
retry:
TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
pmap = PV_PMAP(pv);
PMAP_ASSERT_STAGE1(pmap);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
rw_wunlock(lock);
PMAP_LOCK(pmap);
rw_wlock(lock);
if (pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
goto retry;
}
}
va = pv->pv_va;
pte = pmap_pte_exists(pmap, va, 2, __func__);
if ((pmap_load(pte) & ATTR_SW_DBM) != 0)
(void)pmap_demote_l2_locked(pmap, pte, va, &lock);
KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
("inconsistent pv lock %p %p for page %p",
lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
PMAP_UNLOCK(pmap);
}
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
md_gen = m->md.pv_gen;
rw_wunlock(lock);
PMAP_LOCK(pmap);
rw_wlock(lock);
if (pvh_gen != pvh->pv_gen ||
md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto retry;
}
}
pte = pmap_pte_exists(pmap, pv->pv_va, 3, __func__);
oldpte = pmap_load(pte);
if ((oldpte & ATTR_SW_DBM) != 0) {
if ((oldpte & ATTR_CONTIGUOUS) != 0) {
(void)pmap_demote_l3c(pmap, pte, pv->pv_va);
/*
* The L3 entry's accessed bit may have
* changed.
*/
oldpte = pmap_load(pte);
}
if (pmap->pm_stage == PM_STAGE1) {
set = ATTR_S1_AP_RW_BIT;
clear = 0;
mask = ATTR_S1_AP_RW_BIT;
val = ATTR_S1_AP(ATTR_S1_AP_RW);
} else {
set = 0;
clear = ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE);
mask = ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE);
val = ATTR_S2_S2AP(ATTR_S2_S2AP_WRITE);
}
clear |= ATTR_SW_DBM;
while (!atomic_fcmpset_64(pte, &oldpte,
(oldpte | set) & ~clear))
cpu_spinwait();
if ((oldpte & mask) == val)
vm_page_dirty(m);
pmap_invalidate_page(pmap, pv->pv_va, true);
}
PMAP_UNLOCK(pmap);
}
rw_wunlock(lock);
vm_page_aflag_clear(m, PGA_WRITEABLE);
}
/*
* pmap_ts_referenced:
*
* Return a count of reference bits for a page, clearing those bits.
* It is not necessary for every reference bit to be cleared, but it
* is necessary that 0 only be returned when there are truly no
* reference bits set.
*
* As an optimization, update the page's dirty field if a modified bit is
* found while counting reference bits. This opportunistic update can be
* performed at low cost and can eliminate the need for some future calls
* to pmap_is_modified(). However, since this function stops after
* finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some
* dirty pages. Those dirty pages will only be detected by a future call
* to pmap_is_modified().
*/
int
pmap_ts_referenced(vm_page_t m)
{
struct md_page *pvh;
pv_entry_t pv, pvf;
pmap_t pmap;
struct rwlock *lock;
pt_entry_t *pte, tpte;
vm_offset_t va;
vm_paddr_t pa;
int cleared, md_gen, not_cleared, pvh_gen;
struct spglist free;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_ts_referenced: page %p is not managed", m));
SLIST_INIT(&free);
cleared = 0;
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : page_to_pvh(m);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_wlock(lock);
retry:
not_cleared = 0;
if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
goto small_mappings;
pv = pvf;
do {
if (pvf == NULL)
pvf = pv;
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
rw_wunlock(lock);
PMAP_LOCK(pmap);
rw_wlock(lock);
if (pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
goto retry;
}
}
va = pv->pv_va;
pte = pmap_pte_exists(pmap, va, 2, __func__);
tpte = pmap_load(pte);
if (pmap_pte_dirty(pmap, tpte)) {
/*
* Although "tpte" is mapping a 2MB page, because
* this function is called at a 4KB page granularity,
* we only update the 4KB page under test.
*/
vm_page_dirty(m);
}
if ((tpte & ATTR_AF) != 0) {
pa = VM_PAGE_TO_PHYS(m);
/*
* Since this reference bit is shared by 512 4KB pages,
* it should not be cleared every time it is tested.
* Apply a simple "hash" function on the physical page
* number, the virtual superpage number, and the pmap
* address to select one 4KB page out of the 512 on
* which testing the reference bit will result in
* clearing that reference bit. This function is
* designed to avoid the selection of the same 4KB page
* for every 2MB page mapping.
*
* On demotion, a mapping that hasn't been referenced
* is simply destroyed. To avoid the possibility of a
* subsequent page fault on a demoted wired mapping,
* always leave its reference bit set. Moreover,
* since the superpage is wired, the current state of
* its reference bit won't affect page replacement.
*/
if ((((pa >> PAGE_SHIFT) ^ (va >> L2_SHIFT) ^
(uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 &&
(tpte & ATTR_SW_WIRED) == 0) {
pmap_clear_bits(pte, ATTR_AF);
pmap_invalidate_page(pmap, va, true);
cleared++;
} else
not_cleared++;
}
PMAP_UNLOCK(pmap);
/* Rotate the PV list if it has more than one entry. */
if (TAILQ_NEXT(pv, pv_next) != NULL) {
TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
pvh->pv_gen++;
}
if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX)
goto out;
} while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf);
small_mappings:
if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
goto out;
pv = pvf;
do {
if (pvf == NULL)
pvf = pv;
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
md_gen = m->md.pv_gen;
rw_wunlock(lock);
PMAP_LOCK(pmap);
rw_wlock(lock);
if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto retry;
}
}
pte = pmap_pte_exists(pmap, pv->pv_va, 3, __func__);
tpte = pmap_load(pte);
if (pmap_pte_dirty(pmap, tpte))
vm_page_dirty(m);
if ((tpte & ATTR_AF) != 0) {
if ((tpte & ATTR_SW_WIRED) == 0) {
/*
* Clear the accessed bit in this L3 entry
* regardless of the contiguous bit.
*/
pmap_clear_bits(pte, ATTR_AF);
pmap_invalidate_page(pmap, pv->pv_va, true);
cleared++;
} else
not_cleared++;
} else if ((tpte & ATTR_CONTIGUOUS) != 0 &&
(pmap_load_l3c(pte) & ATTR_AF) != 0) {
/*
* An L3C superpage mapping is regarded as accessed
* until the accessed bit has been cleared in all
* of its constituent entries.
*/
not_cleared++;
}
PMAP_UNLOCK(pmap);
/* Rotate the PV list if it has more than one entry. */
if (TAILQ_NEXT(pv, pv_next) != NULL) {
TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
}
} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
not_cleared < PMAP_TS_REFERENCED_MAX);
out:
rw_wunlock(lock);
vm_page_free_pages_toq(&free, true);
return (cleared + not_cleared);
}
/*
* Apply the given advice to the specified range of addresses within the
* given pmap. Depending on the advice, clear the referenced and/or
* modified flags in each mapping and set the mapped page's dirty field.
*/
void
pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
{
struct rwlock *lock;
vm_offset_t va, va_next, dva;
vm_page_t m;
pd_entry_t *l0, *l1, *l2, oldl2;
pt_entry_t *l3, *dl3, oldl3;
PMAP_ASSERT_STAGE1(pmap);
if (advice != MADV_DONTNEED && advice != MADV_FREE)
return;
PMAP_LOCK(pmap);
for (; sva < eva; sva = va_next) {
l0 = pmap_l0(pmap, sva);
if (pmap_load(l0) == 0) {
va_next = (sva + L0_SIZE) & ~L0_OFFSET;
if (va_next < sva)
va_next = eva;
continue;
}
va_next = (sva + L1_SIZE) & ~L1_OFFSET;
if (va_next < sva)
va_next = eva;
l1 = pmap_l0_to_l1(l0, sva);
if (pmap_load(l1) == 0)
continue;
if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
continue;
}
va_next = (sva + L2_SIZE) & ~L2_OFFSET;
if (va_next < sva)
va_next = eva;
l2 = pmap_l1_to_l2(l1, sva);
oldl2 = pmap_load(l2);
if (oldl2 == 0)
continue;
if ((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK) {
if ((oldl2 & ATTR_SW_MANAGED) == 0)
continue;
lock = NULL;
if (!pmap_demote_l2_locked(pmap, l2, sva, &lock)) {
if (lock != NULL)
rw_wunlock(lock);
/*
* The 2MB page mapping was destroyed.
*/
continue;
}
/*
* Unless the page mappings are wired, remove the
* mapping to a single page so that a subsequent
* access may repromote. Choosing the last page
* within the address range [sva, min(va_next, eva))
* generally results in more repromotions. Since the
* underlying page table page is fully populated, this
* removal never frees a page table page.
*/
if ((oldl2 & ATTR_SW_WIRED) == 0) {
va = eva;
if (va > va_next)
va = va_next;
va -= PAGE_SIZE;
KASSERT(va >= sva,
("pmap_advise: no address gap"));
l3 = pmap_l2_to_l3(l2, va);
KASSERT(pmap_load(l3) != 0,
("pmap_advise: invalid PTE"));
pmap_remove_l3(pmap, l3, va, pmap_load(l2),
NULL, &lock);
}
if (lock != NULL)
rw_wunlock(lock);
}
KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE,
("pmap_advise: invalid L2 entry after demotion"));
if (va_next > eva)
va_next = eva;
va = va_next;
for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
sva += L3_SIZE) {
oldl3 = pmap_load(l3);
if ((oldl3 & (ATTR_SW_MANAGED | ATTR_DESCR_MASK)) !=
(ATTR_SW_MANAGED | L3_PAGE))
goto maybe_invlrng;
else if (pmap_pte_dirty(pmap, oldl3)) {
if (advice == MADV_DONTNEED) {
/*
* Future calls to pmap_is_modified()
* can be avoided by making the page
* dirty now.
*/
m = PTE_TO_VM_PAGE(oldl3);
vm_page_dirty(m);
}
if ((oldl3 & ATTR_CONTIGUOUS) != 0) {
/*
* Unconditionally demote the L3C
* superpage because we do not allow
* writeable, clean superpages.
*/
(void)pmap_demote_l3c(pmap, l3, sva);
/*
* Destroy the final mapping before the
* next L3C boundary or va_next,
* whichever comes first, so that a
* subsequent access may act as a
* repromotion trigger.
*/
if ((oldl3 & ATTR_SW_WIRED) == 0) {
dva = MIN((sva & ~L3C_OFFSET) +
L3C_SIZE - PAGE_SIZE,
va_next - PAGE_SIZE);
dl3 = pmap_l2_to_l3(l2, dva);
KASSERT(pmap_load(dl3) != 0,
("pmap_advise: invalid PTE"));
lock = NULL;
pmap_remove_l3(pmap, dl3, dva,
pmap_load(l2), NULL, &lock);
if (lock != NULL)
rw_wunlock(lock);
}
/*
* The L3 entry's accessed bit may have
* changed.
*/
oldl3 = pmap_load(l3);
}
/*
* Check that we did not just destroy this entry so
* we avoid corrupting the page able.
*/
if (oldl3 != 0) {
while (!atomic_fcmpset_long(l3, &oldl3,
(oldl3 & ~ATTR_AF) |
ATTR_S1_AP(ATTR_S1_AP_RO)))
cpu_spinwait();
}
} else if ((oldl3 & ATTR_AF) != 0) {
/*
* Clear the accessed bit in this L3 entry
* regardless of the contiguous bit.
*/
pmap_clear_bits(l3, ATTR_AF);
} else
goto maybe_invlrng;
if (va == va_next)
va = sva;
continue;
maybe_invlrng:
if (va != va_next) {
pmap_s1_invalidate_range(pmap, va, sva, true);
va = va_next;
}
}
if (va != va_next)
pmap_s1_invalidate_range(pmap, va, sva, true);
}
PMAP_UNLOCK(pmap);
}
/*
* Clear the modify bits on the specified physical page.
*/
void
pmap_clear_modify(vm_page_t m)
{
struct md_page *pvh;
struct rwlock *lock;
pmap_t pmap;
pv_entry_t next_pv, pv;
pd_entry_t *l2, oldl2;
pt_entry_t *l3, oldl3;
vm_offset_t va;
int md_gen, pvh_gen;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_clear_modify: page %p is not managed", m));
vm_page_assert_busied(m);
if (!pmap_page_is_write_mapped(m))
return;
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : page_to_pvh(m);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_wlock(lock);
restart:
TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
pmap = PV_PMAP(pv);
PMAP_ASSERT_STAGE1(pmap);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
rw_wunlock(lock);
PMAP_LOCK(pmap);
rw_wlock(lock);
if (pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
}
}
va = pv->pv_va;
l2 = pmap_l2(pmap, va);
oldl2 = pmap_load(l2);
/* If oldl2 has ATTR_SW_DBM set, then it is also dirty. */
if ((oldl2 & ATTR_SW_DBM) != 0 &&
pmap_demote_l2_locked(pmap, l2, va, &lock) &&
(oldl2 & ATTR_SW_WIRED) == 0) {
/*
* Write protect the mapping to a single page so that
* a subsequent write access may repromote.
*/
va += VM_PAGE_TO_PHYS(m) - PTE_TO_PHYS(oldl2);
l3 = pmap_l2_to_l3(l2, va);
oldl3 = pmap_load(l3);
while (!atomic_fcmpset_long(l3, &oldl3,
(oldl3 & ~ATTR_SW_DBM) | ATTR_S1_AP(ATTR_S1_AP_RO)))
cpu_spinwait();
vm_page_dirty(m);
pmap_s1_invalidate_page(pmap, va, true);
}
PMAP_UNLOCK(pmap);
}
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
PMAP_ASSERT_STAGE1(pmap);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
pvh_gen = pvh->pv_gen;
rw_wunlock(lock);
PMAP_LOCK(pmap);
rw_wlock(lock);
if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
}
}
l2 = pmap_l2(pmap, pv->pv_va);
l3 = pmap_l2_to_l3(l2, pv->pv_va);
oldl3 = pmap_load(l3);
KASSERT((oldl3 & ATTR_CONTIGUOUS) == 0 ||
(oldl3 & (ATTR_SW_DBM | ATTR_S1_AP_RW_BIT)) !=
(ATTR_SW_DBM | ATTR_S1_AP(ATTR_S1_AP_RO)),
("writeable L3C superpage not dirty"));
if ((oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) == ATTR_SW_DBM) {
if ((oldl3 & ATTR_CONTIGUOUS) != 0)
(void)pmap_demote_l3c(pmap, l3, pv->pv_va);
pmap_set_bits(l3, ATTR_S1_AP(ATTR_S1_AP_RO));
pmap_s1_invalidate_page(pmap, pv->pv_va, true);
}
PMAP_UNLOCK(pmap);
}
rw_wunlock(lock);
}
void *
pmap_mapbios(vm_paddr_t pa, vm_size_t size)
{
struct pmap_preinit_mapping *ppim;
vm_offset_t va, offset;
pd_entry_t old_l2e, *pde;
pt_entry_t *l2;
int i, lvl, l2_blocks, free_l2_count, start_idx;
/* Use the DMAP region if we can */
if (PHYS_IN_DMAP(pa) && PHYS_IN_DMAP(pa + size - 1) &&
pmap_kmapped_range(PHYS_TO_DMAP(pa), size))
return ((void *)PHYS_TO_DMAP(pa));
if (!vm_initialized) {
/*
* No L3 ptables so map entire L2 blocks where start VA is:
* preinit_map_va + start_idx * L2_SIZE
* There may be duplicate mappings (multiple VA -> same PA) but
* ARM64 dcache is always PIPT so that's acceptable.
*/
if (size == 0)
return (NULL);
/* Calculate how many L2 blocks are needed for the mapping */
l2_blocks = (roundup2(pa + size, L2_SIZE) -
rounddown2(pa, L2_SIZE)) >> L2_SHIFT;
offset = pa & L2_OFFSET;
if (preinit_map_va == 0)
return (NULL);
/* Map 2MiB L2 blocks from reserved VA space */
free_l2_count = 0;
start_idx = -1;
/* Find enough free contiguous VA space */
for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
ppim = pmap_preinit_mapping + i;
if (free_l2_count > 0 && ppim->pa != 0) {
/* Not enough space here */
free_l2_count = 0;
start_idx = -1;
continue;
}
if (ppim->pa == 0) {
/* Free L2 block */
if (start_idx == -1)
start_idx = i;
free_l2_count++;
if (free_l2_count == l2_blocks)
break;
}
}
if (free_l2_count != l2_blocks)
panic("%s: too many preinit mappings", __func__);
va = preinit_map_va + (start_idx * L2_SIZE);
for (i = start_idx; i < start_idx + l2_blocks; i++) {
/* Mark entries as allocated */
ppim = pmap_preinit_mapping + i;
ppim->pa = pa;
ppim->va = va + offset;
ppim->size = size;
}
/* Map L2 blocks */
pa = rounddown2(pa, L2_SIZE);
old_l2e = 0;
for (i = 0; i < l2_blocks; i++) {
pde = pmap_pde(kernel_pmap, va, &lvl);
KASSERT(pde != NULL,
("pmap_mapbios: Invalid page entry, va: 0x%lx",
va));
KASSERT(lvl == 1,
("pmap_mapbios: Invalid level %d", lvl));
/* Insert L2_BLOCK */
l2 = pmap_l1_to_l2(pde, va);
old_l2e |= pmap_load_store(l2,
PHYS_TO_PTE(pa) | ATTR_AF | pmap_sh_attr |
ATTR_S1_XN | ATTR_KERN_GP |
ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | L2_BLOCK);
va += L2_SIZE;
pa += L2_SIZE;
}
if ((old_l2e & ATTR_DESCR_VALID) != 0)
pmap_s1_invalidate_all(kernel_pmap);
else {
/*
* Because the old entries were invalid and the new
* mappings are not executable, an isb is not required.
*/
dsb(ishst);
}
va = preinit_map_va + (start_idx * L2_SIZE);
} else {
/* kva_alloc may be used to map the pages */
offset = pa & PAGE_MASK;
size = round_page(offset + size);
va = kva_alloc(size);
if (va == 0)
panic("%s: Couldn't allocate KVA", __func__);
pde = pmap_pde(kernel_pmap, va, &lvl);
KASSERT(lvl == 2, ("pmap_mapbios: Invalid level %d", lvl));
/* L3 table is linked */
va = trunc_page(va);
pa = trunc_page(pa);
pmap_kenter(va, size, pa, memory_mapping_mode(pa));
}
return ((void *)(va + offset));
}
void
pmap_unmapbios(void *p, vm_size_t size)
{
struct pmap_preinit_mapping *ppim;
vm_offset_t offset, va, va_trunc;
pd_entry_t *pde;
pt_entry_t *l2;
int error __diagused, i, lvl, l2_blocks, block;
bool preinit_map;
va = (vm_offset_t)p;
if (VIRT_IN_DMAP(va)) {
KASSERT(VIRT_IN_DMAP(va + size - 1),
("%s: End address not in DMAP region: %lx", __func__,
va + size - 1));
/* Ensure the attributes are as expected for the DMAP region */
PMAP_LOCK(kernel_pmap);
error = pmap_change_props_locked(va, size,
PROT_READ | PROT_WRITE, VM_MEMATTR_DEFAULT, false);
PMAP_UNLOCK(kernel_pmap);
KASSERT(error == 0, ("%s: Failed to reset DMAP attributes: %d",
__func__, error));
return;
}
l2_blocks =
(roundup2(va + size, L2_SIZE) - rounddown2(va, L2_SIZE)) >> L2_SHIFT;
KASSERT(l2_blocks > 0, ("pmap_unmapbios: invalid size %lx", size));
/* Remove preinit mapping */
preinit_map = false;
block = 0;
for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
ppim = pmap_preinit_mapping + i;
if (ppim->va == va) {
KASSERT(ppim->size == size,
("pmap_unmapbios: size mismatch"));
ppim->va = 0;
ppim->pa = 0;
ppim->size = 0;
preinit_map = true;
offset = block * L2_SIZE;
va_trunc = rounddown2(va, L2_SIZE) + offset;
/* Remove L2_BLOCK */
pde = pmap_pde(kernel_pmap, va_trunc, &lvl);
KASSERT(pde != NULL,
("pmap_unmapbios: Invalid page entry, va: 0x%lx",
va_trunc));
l2 = pmap_l1_to_l2(pde, va_trunc);
pmap_clear(l2);
if (block == (l2_blocks - 1))
break;
block++;
}
}
if (preinit_map) {
pmap_s1_invalidate_all(kernel_pmap);
return;
}
/* Unmap the pages reserved with kva_alloc. */
if (vm_initialized) {
offset = va & PAGE_MASK;
size = round_page(offset + size);
va = trunc_page(va);
/* Unmap and invalidate the pages */
pmap_kremove_device(va, size);
kva_free(va, size);
}
}
/*
* Sets the memory attribute for the specified page.
*/
void
pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
{
if (m->md.pv_memattr == ma)
return;
m->md.pv_memattr = ma;
/*
* If "m" is a normal page, update its direct mapping. This update
* can be relied upon to perform any cache operations that are
* required for data coherence.
*/
if ((m->flags & PG_FICTITIOUS) == 0 &&
pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE,
m->md.pv_memattr) != 0)
panic("memory attribute change on the direct map failed");
}
/*
* Changes the specified virtual address range's memory type to that given by
* the parameter "mode". The specified virtual address range must be
* completely contained within either the direct map or the kernel map. If
* the virtual address range is contained within the kernel map, then the
* memory type for each of the corresponding ranges of the direct map is also
* changed. (The corresponding ranges of the direct map are those ranges that
* map the same physical pages as the specified virtual address range.) These
* changes to the direct map are necessary because Intel describes the
* behavior of their processors as "undefined" if two or more mappings to the
* same physical page have different memory types.
*
* Returns zero if the change completed successfully, and either EINVAL or
* ENOMEM if the change failed. Specifically, EINVAL is returned if some part
* of the virtual address range was not mapped, and ENOMEM is returned if
* there was insufficient memory available to complete the change. In the
* latter case, the memory type may have been changed on some part of the
* virtual address range or the direct map.
*/
int
pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
{
int error;
PMAP_LOCK(kernel_pmap);
error = pmap_change_props_locked(va, size, PROT_NONE, mode, false);
PMAP_UNLOCK(kernel_pmap);
return (error);
}
/*
* Changes the specified virtual address range's protections to those
* specified by "prot". Like pmap_change_attr(), protections for aliases
* in the direct map are updated as well. Protections on aliasing mappings may
* be a subset of the requested protections; for example, mappings in the direct
* map are never executable.
*/
int
pmap_change_prot(vm_offset_t va, vm_size_t size, vm_prot_t prot)
{
int error;
/* Only supported within the kernel map. */
if (va < VM_MIN_KERNEL_ADDRESS)
return (EINVAL);
PMAP_LOCK(kernel_pmap);
error = pmap_change_props_locked(va, size, prot, -1, false);
PMAP_UNLOCK(kernel_pmap);
return (error);
}
static int
pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot,
int mode, bool skip_unmapped)
{
vm_offset_t base, offset, tmpva;
vm_size_t pte_size;
vm_paddr_t pa;
pt_entry_t pte, *ptep, *newpte;
pt_entry_t bits, mask;
int lvl, rv;
PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED);
base = trunc_page(va);
offset = va & PAGE_MASK;
size = round_page(offset + size);
if (!VIRT_IN_DMAP(base) &&
!(base >= VM_MIN_KERNEL_ADDRESS && base < VM_MAX_KERNEL_ADDRESS))
return (EINVAL);
bits = 0;
mask = 0;
if (mode != -1) {
bits = ATTR_S1_IDX(mode);
mask = ATTR_S1_IDX_MASK;
if (mode == VM_MEMATTR_DEVICE) {
mask |= ATTR_S1_XN;
bits |= ATTR_S1_XN;
}
}
if (prot != VM_PROT_NONE) {
/* Don't mark the DMAP as executable. It never is on arm64. */
if (VIRT_IN_DMAP(base)) {
prot &= ~VM_PROT_EXECUTE;
/*
* XXX Mark the DMAP as writable for now. We rely
* on this in ddb & dtrace to insert breakpoint
* instructions.
*/
prot |= VM_PROT_WRITE;
}
if ((prot & VM_PROT_WRITE) == 0) {
bits |= ATTR_S1_AP(ATTR_S1_AP_RO);
}
if ((prot & VM_PROT_EXECUTE) == 0) {
bits |= ATTR_S1_PXN;
}
bits |= ATTR_S1_UXN;
mask |= ATTR_S1_AP_MASK | ATTR_S1_XN;
}
for (tmpva = base; tmpva < base + size; ) {
ptep = pmap_pte(kernel_pmap, tmpva, &lvl);
if (ptep == NULL && !skip_unmapped) {
return (EINVAL);
} else if ((ptep == NULL && skip_unmapped) ||
(pmap_load(ptep) & mask) == bits) {
/*
* We already have the correct attribute or there
* is no memory mapped at this address and we are
* skipping unmapped memory.
*/
switch (lvl) {
default:
panic("Invalid DMAP table level: %d\n", lvl);
case 1:
tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE;
break;
case 2:
tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE;
break;
case 3:
tmpva += PAGE_SIZE;
break;
}
} else {
/* We can't demote/promote this entry */
MPASS((pmap_load(ptep) & ATTR_SW_NO_PROMOTE) == 0);
/*
* Find the entry and demote it if the requested change
* only applies to part of the address range mapped by
* the entry.
*/
switch (lvl) {
default:
panic("Invalid DMAP table level: %d\n", lvl);
case 1:
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
if ((tmpva & L1_OFFSET) == 0 &&
(base + size - tmpva) >= L1_SIZE) {
pte_size = L1_SIZE;
break;
}
newpte = pmap_demote_l1(kernel_pmap, ptep,
tmpva & ~L1_OFFSET);
if (newpte == NULL)
return (EINVAL);
ptep = pmap_l1_to_l2(ptep, tmpva);
/* FALLTHROUGH */
case 2:
if ((pmap_load(ptep) & ATTR_CONTIGUOUS) != 0) {
if ((tmpva & L2C_OFFSET) == 0 &&
(base + size - tmpva) >= L2C_SIZE) {
pte_size = L2C_SIZE;
break;
}
if (!pmap_demote_l2c(kernel_pmap, ptep,
tmpva))
return (EINVAL);
}
if ((tmpva & L2_OFFSET) == 0 &&
(base + size - tmpva) >= L2_SIZE) {
pte_size = L2_SIZE;
break;
}
newpte = pmap_demote_l2(kernel_pmap, ptep,
tmpva);
if (newpte == NULL)
return (EINVAL);
ptep = pmap_l2_to_l3(ptep, tmpva);
/* FALLTHROUGH */
case 3:
if ((pmap_load(ptep) & ATTR_CONTIGUOUS) != 0) {
if ((tmpva & L3C_OFFSET) == 0 &&
(base + size - tmpva) >= L3C_SIZE) {
pte_size = L3C_SIZE;
break;
}
if (!pmap_demote_l3c(kernel_pmap, ptep,
tmpva))
return (EINVAL);
}
pte_size = PAGE_SIZE;
break;
}
/* Update the entry */
pte = pmap_load(ptep);
pte &= ~mask;
pte |= bits;
switch (pte_size) {
case L2C_SIZE:
pmap_update_strided(kernel_pmap, ptep, ptep +
L2C_ENTRIES, pte, tmpva, L2_SIZE, L2C_SIZE);
break;
case L3C_SIZE:
pmap_update_strided(kernel_pmap, ptep, ptep +
L3C_ENTRIES, pte, tmpva, L3_SIZE, L3C_SIZE);
break;
default:
/*
* We are updating a single block or page entry,
* so regardless of pte_size pass PAGE_SIZE in
* order that a single TLB invalidation is
* performed.
*/
pmap_update_entry(kernel_pmap, ptep, pte, tmpva,
PAGE_SIZE);
break;
}
pa = PTE_TO_PHYS(pte);
if (!VIRT_IN_DMAP(tmpva) && PHYS_IN_DMAP(pa)) {
/*
* Keep the DMAP memory in sync.
*/
rv = pmap_change_props_locked(
PHYS_TO_DMAP(pa), pte_size,
prot, mode, true);
if (rv != 0)
return (rv);
}
/*
* If moving to a non-cacheable entry flush
* the cache.
*/
if (mode == VM_MEMATTR_UNCACHEABLE)
cpu_dcache_wbinv_range((void *)tmpva, pte_size);
tmpva += pte_size;
}
}
return (0);
}
/*
* Create an L2 table to map all addresses within an L1 mapping.
*/
static pt_entry_t *
pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va)
{
pt_entry_t *l2, newl2, oldl1;
vm_offset_t tmpl1;
vm_paddr_t l2phys, phys;
vm_page_t ml2;
int i;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
oldl1 = pmap_load(l1);
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
KASSERT((oldl1 & ATTR_DESCR_MASK) == L1_BLOCK,
("pmap_demote_l1: Demoting a non-block entry"));
KASSERT((va & L1_OFFSET) == 0,
("pmap_demote_l1: Invalid virtual address %#lx", va));
KASSERT((oldl1 & ATTR_SW_MANAGED) == 0,
("pmap_demote_l1: Level 1 table shouldn't be managed"));
KASSERT((oldl1 & ATTR_SW_NO_PROMOTE) == 0,
("pmap_demote_l1: Demoting entry with no-demote flag set"));
tmpl1 = 0;
if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) {
tmpl1 = kva_alloc(PAGE_SIZE);
if (tmpl1 == 0)
return (NULL);
}
if ((ml2 = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED)) ==
NULL) {
CTR2(KTR_PMAP, "pmap_demote_l1: failure for va %#lx"
" in pmap %p", va, pmap);
l2 = NULL;
goto fail;
}
l2phys = VM_PAGE_TO_PHYS(ml2);
l2 = (pt_entry_t *)PHYS_TO_DMAP(l2phys);
/* Address the range points at */
phys = PTE_TO_PHYS(oldl1);
/* The attributed from the old l1 table to be copied */
newl2 = oldl1 & ATTR_MASK;
/* Create the new entries */
newl2 |= ATTR_CONTIGUOUS;
for (i = 0; i < Ln_ENTRIES; i++) {
l2[i] = newl2 | phys;
phys += L2_SIZE;
}
KASSERT(l2[0] == (ATTR_CONTIGUOUS | (oldl1 & ~ATTR_DESCR_MASK) |
L2_BLOCK), ("Invalid l2 page (%lx != %lx)", l2[0],
ATTR_CONTIGUOUS | (oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK));
if (tmpl1 != 0) {
pmap_kenter(tmpl1, PAGE_SIZE,
DMAP_TO_PHYS((vm_offset_t)l1) & ~L3_OFFSET,
VM_MEMATTR_WRITE_BACK);
l1 = (pt_entry_t *)(tmpl1 + ((vm_offset_t)l1 & PAGE_MASK));
}
pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va, PAGE_SIZE);
counter_u64_add(pmap_l1_demotions, 1);
fail:
if (tmpl1 != 0) {
pmap_kremove(tmpl1);
kva_free(tmpl1, PAGE_SIZE);
}
return (l2);
}
static void
pmap_fill_l3(pt_entry_t *firstl3, pt_entry_t newl3)
{
pt_entry_t *l3;
for (l3 = firstl3; l3 - firstl3 < Ln_ENTRIES; l3++) {
*l3 = newl3;
newl3 += L3_SIZE;
}
}
static void
pmap_demote_l2_check(pt_entry_t *firstl3p __unused, pt_entry_t newl3e __unused)
{
#ifdef INVARIANTS
#ifdef DIAGNOSTIC
pt_entry_t *xl3p, *yl3p;
for (xl3p = firstl3p; xl3p < firstl3p + Ln_ENTRIES;
xl3p++, newl3e += PAGE_SIZE) {
if (PTE_TO_PHYS(pmap_load(xl3p)) != PTE_TO_PHYS(newl3e)) {
printf("pmap_demote_l2: xl3e %zd and newl3e map "
"different pages: found %#lx, expected %#lx\n",
xl3p - firstl3p, pmap_load(xl3p), newl3e);
printf("page table dump\n");
for (yl3p = firstl3p; yl3p < firstl3p + Ln_ENTRIES;
yl3p++) {
printf("%zd %#lx\n", yl3p - firstl3p,
pmap_load(yl3p));
}
panic("firstpte");
}
}
#else
KASSERT(PTE_TO_PHYS(pmap_load(firstl3p)) == PTE_TO_PHYS(newl3e),
("pmap_demote_l2: firstl3 and newl3e map different physical"
" addresses"));
#endif
#endif
}
static void
pmap_demote_l2_abort(pmap_t pmap, vm_offset_t va, pt_entry_t *l2,
struct rwlock **lockp)
{
struct spglist free;
SLIST_INIT(&free);
(void)pmap_remove_l2(pmap, l2, va, pmap_load(pmap_l1(pmap, va)), true,
&free, lockp);
vm_page_free_pages_toq(&free, true);
}
/*
* Create an L3 table to map all addresses within an L2 mapping.
*/
static pt_entry_t *
pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va,
struct rwlock **lockp)
{
pt_entry_t *l3, newl3, oldl2;
vm_offset_t tmpl2;
vm_paddr_t l3phys;
vm_page_t ml3;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PMAP_ASSERT_STAGE1(pmap);
KASSERT(ADDR_IS_CANONICAL(va),
("%s: Address not in canonical form: %lx", __func__, va));
l3 = NULL;
oldl2 = pmap_load(l2);
KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK,
("pmap_demote_l2: Demoting a non-block entry"));
KASSERT((oldl2 & ATTR_SW_NO_PROMOTE) == 0,
("pmap_demote_l2: Demoting entry with no-demote flag set"));
va &= ~L2_OFFSET;
tmpl2 = 0;
if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) {
tmpl2 = kva_alloc(PAGE_SIZE);
if (tmpl2 == 0)
return (NULL);
}
/*
* Invalidate the 2MB page mapping and return "failure" if the
* mapping was never accessed and not wired.
*/
if ((oldl2 & ATTR_AF) == 0) {
if ((oldl2 & ATTR_SW_WIRED) == 0) {
pmap_demote_l2_abort(pmap, va, l2, lockp);
CTR2(KTR_PMAP,
"pmap_demote_l2: failure for va %#lx in pmap %p",
va, pmap);
goto fail;
}
ml3 = pmap_remove_pt_page(pmap, va);
/* Fill the PTP with L3Es that have ATTR_AF cleared. */
ml3->valid = 0;
} else if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) {
KASSERT((oldl2 & ATTR_SW_WIRED) == 0,
("pmap_demote_l2: page table page for a wired mapping"
" is missing"));
/*
* If the page table page is missing and the mapping
* is for a kernel address, the mapping must belong to
* either the direct map or the early kernel memory.
* Page table pages are preallocated for every other
* part of the kernel address space, so the direct map
* region and early kernel memory are the only parts of the
* kernel address space that must be handled here.
*/
KASSERT(ADDR_IS_USER(va) || VIRT_IN_DMAP(va) ||
(va >= VM_MIN_KERNEL_ADDRESS && va < kernel_vm_end),
("pmap_demote_l2: No saved mpte for va %#lx", va));
/*
* If the 2MB page mapping belongs to the direct map
* region of the kernel's address space, then the page
* allocation request specifies the highest possible
* priority (VM_ALLOC_INTERRUPT). Otherwise, the
* priority is normal.
*/
ml3 = vm_page_alloc_noobj(
(VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : 0) |
VM_ALLOC_WIRED);
/*
* If the allocation of the new page table page fails,
* invalidate the 2MB page mapping and return "failure".
*/
if (ml3 == NULL) {
pmap_demote_l2_abort(pmap, va, l2, lockp);
CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx"
" in pmap %p", va, pmap);
goto fail;
}
ml3->pindex = pmap_l2_pindex(va);
if (ADDR_IS_USER(va)) {
ml3->ref_count = NL3PG;
pmap_resident_count_inc(pmap, 1);
}
}
l3phys = VM_PAGE_TO_PHYS(ml3);
l3 = (pt_entry_t *)PHYS_TO_DMAP(l3phys);
newl3 = ATTR_CONTIGUOUS | (oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE;
KASSERT((oldl2 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) !=
(ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM),
("pmap_demote_l2: L2 entry is writeable but not dirty"));
/*
* If the PTP is not leftover from an earlier promotion or it does not
* have ATTR_AF set in every L3E, then fill it. The new L3Es will all
* have ATTR_AF set, unless this is a wired mapping with ATTR_AF clear.
*
* When pmap_update_entry() clears the old L2 mapping, it (indirectly)
* performs a dsb(). That dsb() ensures that the stores for filling
* "l3" are visible before "l3" is added to the page table.
*/
if (!vm_page_all_valid(ml3))
pmap_fill_l3(l3, newl3);
pmap_demote_l2_check(l3, newl3);
/*
* If the mapping has changed attributes, update the L3Es.
*/
if ((pmap_load(l3) & ATTR_PROMOTE) != (newl3 & ATTR_PROMOTE))
pmap_fill_l3(l3, newl3);
/*
* Map the temporary page so we don't lose access to the l2 table.
*/
if (tmpl2 != 0) {
pmap_kenter(tmpl2, PAGE_SIZE,
DMAP_TO_PHYS((vm_offset_t)l2) & ~L3_OFFSET,
VM_MEMATTR_WRITE_BACK);
l2 = (pt_entry_t *)(tmpl2 + ((vm_offset_t)l2 & PAGE_MASK));
}
/*
* The spare PV entries must be reserved prior to demoting the
* mapping, that is, prior to changing the PDE. Otherwise, the state
* of the L2 and the PV lists will be inconsistent, which can result
* in reclaim_pv_chunk() attempting to remove a PV entry from the
* wrong PV list and pmap_pv_demote_l2() failing to find the expected
* PV entry for the 2MB page mapping that is being demoted.
*/
if ((oldl2 & ATTR_SW_MANAGED) != 0)
reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp);
/*
* Pass PAGE_SIZE so that a single TLB invalidation is performed on
* the 2MB page mapping.
*/
pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va, PAGE_SIZE);
/*
* Demote the PV entry.
*/
if ((oldl2 & ATTR_SW_MANAGED) != 0)
pmap_pv_demote_l2(pmap, va, PTE_TO_PHYS(oldl2), lockp);
counter_u64_add(pmap_l2_demotions, 1);
CTR3(KTR_PMAP, "pmap_demote_l2: success for va %#lx"
" in pmap %p %lx", va, pmap, l3[0]);
fail:
if (tmpl2 != 0) {
pmap_kremove(tmpl2);
kva_free(tmpl2, PAGE_SIZE);
}
return (l3);
}
static pt_entry_t *
pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
{
struct rwlock *lock;
pt_entry_t *l3;
lock = NULL;
l3 = pmap_demote_l2_locked(pmap, l2, va, &lock);
if (lock != NULL)
rw_wunlock(lock);
return (l3);
}
/*
* Demote an L2C superpage mapping to L2C_ENTRIES L2 block mappings.
*/
static bool
pmap_demote_l2c(pmap_t pmap, pt_entry_t *l2p, vm_offset_t va)
{
pd_entry_t *l2c_end, *l2c_start, l2e, mask, nbits, *tl2p;
vm_offset_t tmpl3;
register_t intr;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PMAP_ASSERT_STAGE1(pmap);
l2c_start = (pd_entry_t *)((uintptr_t)l2p & ~((L2C_ENTRIES *
sizeof(pd_entry_t)) - 1));
l2c_end = l2c_start + L2C_ENTRIES;
tmpl3 = 0;
if ((va & ~L2C_OFFSET) < (vm_offset_t)l2c_end &&
(vm_offset_t)l2c_start < (va & ~L2C_OFFSET) + L2C_SIZE) {
tmpl3 = kva_alloc(PAGE_SIZE);
if (tmpl3 == 0)
return (false);
pmap_kenter(tmpl3, PAGE_SIZE,
DMAP_TO_PHYS((vm_offset_t)l2c_start) & ~L3_OFFSET,
VM_MEMATTR_WRITE_BACK);
l2c_start = (pd_entry_t *)(tmpl3 +
((vm_offset_t)l2c_start & PAGE_MASK));
l2c_end = (pd_entry_t *)(tmpl3 +
((vm_offset_t)l2c_end & PAGE_MASK));
}
mask = 0;
nbits = ATTR_DESCR_VALID;
intr = intr_disable();
/*
* Break the mappings.
*/
for (tl2p = l2c_start; tl2p < l2c_end; tl2p++) {
/*
* Clear the mapping's contiguous and valid bits, but leave
* the rest of the entry unchanged, so that a lockless,
* concurrent pmap_kextract() can still lookup the physical
* address.
*/
l2e = pmap_load(tl2p);
KASSERT((l2e & ATTR_CONTIGUOUS) != 0,
("pmap_demote_l2c: missing ATTR_CONTIGUOUS"));
KASSERT((l2e & (ATTR_SW_DBM | ATTR_S1_AP_RW_BIT)) !=
(ATTR_SW_DBM | ATTR_S1_AP(ATTR_S1_AP_RO)),
("pmap_demote_l2c: missing ATTR_S1_AP_RW"));
while (!atomic_fcmpset_64(tl2p, &l2e, l2e & ~(ATTR_CONTIGUOUS |
ATTR_DESCR_VALID)))
cpu_spinwait();
/*
* Hardware accessed and dirty bit maintenance might only
* update a single L2 entry, so we must combine the accessed
* and dirty bits from this entire set of contiguous L2
* entries.
*/
if ((l2e & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
(ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_SW_DBM))
mask = ATTR_S1_AP_RW_BIT;
nbits |= l2e & ATTR_AF;
}
if ((nbits & ATTR_AF) != 0) {
pmap_s1_invalidate_strided(pmap, va & ~L2C_OFFSET, (va +
L2C_SIZE) & ~L2C_OFFSET, L2_SIZE, true);
}
/*
* Remake the mappings, updating the accessed and dirty bits.
*/
l2e = (pmap_load(l2c_start) & ~mask) | nbits;
for (tl2p = l2c_start; tl2p < l2c_end; tl2p++) {
pmap_store(tl2p, l2e);
l2e += L2_SIZE;
}
dsb(ishst);
intr_restore(intr);
if (tmpl3 != 0) {
pmap_kremove(tmpl3);
kva_free(tmpl3, PAGE_SIZE);
}
counter_u64_add(pmap_l2c_demotions, 1);
CTR2(KTR_PMAP, "pmap_demote_l2c: success for va %#lx in pmap %p",
va, pmap);
return (true);
}
/*
* Demote a L3C superpage mapping to L3C_ENTRIES 4KB page mappings.
*/
static bool
pmap_demote_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va)
{
pt_entry_t *l3c_end, *l3c_start, l3e, mask, nbits, *tl3p;
vm_offset_t tmpl3;
register_t intr;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
l3c_start = (pt_entry_t *)((uintptr_t)l3p & ~((L3C_ENTRIES *
sizeof(pt_entry_t)) - 1));
l3c_end = l3c_start + L3C_ENTRIES;
tmpl3 = 0;
if ((va & ~L3C_OFFSET) < (vm_offset_t)l3c_end &&
(vm_offset_t)l3c_start < (va & ~L3C_OFFSET) + L3C_SIZE) {
tmpl3 = kva_alloc(PAGE_SIZE);
if (tmpl3 == 0)
return (false);
pmap_kenter(tmpl3, PAGE_SIZE,
DMAP_TO_PHYS((vm_offset_t)l3c_start) & ~L3_OFFSET,
VM_MEMATTR_WRITE_BACK);
l3c_start = (pt_entry_t *)(tmpl3 +
((vm_offset_t)l3c_start & PAGE_MASK));
l3c_end = (pt_entry_t *)(tmpl3 +
((vm_offset_t)l3c_end & PAGE_MASK));
}
mask = 0;
nbits = ATTR_DESCR_VALID;
intr = intr_disable();
/*
* Break the mappings.
*/
for (tl3p = l3c_start; tl3p < l3c_end; tl3p++) {
/*
* Clear the mapping's contiguous and valid bits, but leave
* the rest of the entry unchanged, so that a lockless,
* concurrent pmap_kextract() can still lookup the physical
* address.
*/
l3e = pmap_load(tl3p);
KASSERT((l3e & ATTR_CONTIGUOUS) != 0,
("pmap_demote_l3c: missing ATTR_CONTIGUOUS"));
KASSERT((l3e & (ATTR_SW_DBM | ATTR_S1_AP_RW_BIT)) !=
(ATTR_SW_DBM | ATTR_S1_AP(ATTR_S1_AP_RO)),
("pmap_demote_l3c: missing ATTR_S1_AP_RW"));
while (!atomic_fcmpset_64(tl3p, &l3e, l3e & ~(ATTR_CONTIGUOUS |
ATTR_DESCR_VALID)))
cpu_spinwait();
/*
* Hardware accessed and dirty bit maintenance might only
* update a single L3 entry, so we must combine the accessed
* and dirty bits from this entire set of contiguous L3
* entries.
*/
if ((l3e & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
(ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_SW_DBM))
mask = ATTR_S1_AP_RW_BIT;
nbits |= l3e & ATTR_AF;
}
if ((nbits & ATTR_AF) != 0) {
pmap_invalidate_range(pmap, va & ~L3C_OFFSET, (va + L3C_SIZE) &
~L3C_OFFSET, true);
}
/*
* Remake the mappings, updating the accessed and dirty bits.
*/
l3e = (pmap_load(l3c_start) & ~mask) | nbits;
for (tl3p = l3c_start; tl3p < l3c_end; tl3p++) {
pmap_store(tl3p, l3e);
l3e += L3_SIZE;
}
dsb(ishst);
intr_restore(intr);
if (tmpl3 != 0) {
pmap_kremove(tmpl3);
kva_free(tmpl3, PAGE_SIZE);
}
counter_u64_add(pmap_l3c_demotions, 1);
CTR2(KTR_PMAP, "pmap_demote_l3c: success for va %#lx in pmap %p",
va, pmap);
return (true);
}
/*
* Accumulate the accessed and dirty bits within a L3C superpage and
* return the specified PTE with them applied correctly.
*/
static pt_entry_t
pmap_load_l3c(pt_entry_t *l3p)
{
pt_entry_t *l3c_end, *l3c_start, l3e, mask, nbits, *tl3p;
l3c_start = (pt_entry_t *)((uintptr_t)l3p & ~((L3C_ENTRIES *
sizeof(pt_entry_t)) - 1));
l3c_end = l3c_start + L3C_ENTRIES;
mask = 0;
nbits = 0;
/* Iterate over each mapping in the superpage. */
for (tl3p = l3c_start; tl3p < l3c_end; tl3p++) {
l3e = pmap_load(tl3p);
KASSERT((l3e & ATTR_CONTIGUOUS) != 0,
("pmap_load_l3c: missing ATTR_CONTIGUOUS"));
/* Update mask if the current page has its dirty bit set. */
if ((l3e & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
(ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_SW_DBM))
mask = ATTR_S1_AP_RW_BIT;
/* Update nbits if the accessed bit is set. */
nbits |= l3e & ATTR_AF;
}
return ((pmap_load(l3p) & ~mask) | nbits);
}
/*
* Perform the pmap work for mincore(2). If the page is not both referenced and
* modified by this pmap, returns its physical address so that the caller can
* find other mappings.
*/
int
pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap)
{
pt_entry_t *pte, tpte;
vm_paddr_t mask, pa;
int lvl, psind, val;
bool managed;
PMAP_ASSERT_STAGE1(pmap);
PMAP_LOCK(pmap);
pte = pmap_pte(pmap, addr, &lvl);
if (pte != NULL) {
tpte = pmap_load(pte);
switch (lvl) {
case 3:
mask = L3_OFFSET;
psind = (tpte & ATTR_CONTIGUOUS) != 0 ? 1 : 0;
break;
case 2:
mask = L2_OFFSET;
psind = 2;
break;
case 1:
mask = L1_OFFSET;
psind = 3;
break;
default:
panic("pmap_mincore: invalid level %d", lvl);
}
managed = (tpte & ATTR_SW_MANAGED) != 0;
val = MINCORE_INCORE | MINCORE_PSIND(psind);
if ((managed && pmap_pte_dirty(pmap, tpte)) || (!managed &&
(tpte & ATTR_S1_AP_RW_BIT) == ATTR_S1_AP(ATTR_S1_AP_RW)))
val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
if ((tpte & ATTR_AF) == ATTR_AF)
val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
pa = PTE_TO_PHYS(tpte) | (addr & mask);
} else {
managed = false;
val = 0;
}
if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
(MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
*pap = pa;
}
PMAP_UNLOCK(pmap);
return (val);
}
/*
* Garbage collect every ASID that is neither active on a processor nor
* reserved.
*/
static void
pmap_reset_asid_set(pmap_t pmap)
{
pmap_t curpmap;
int asid, cpuid, epoch;
struct asid_set *set;
enum pmap_stage stage;
set = pmap->pm_asid_set;
stage = pmap->pm_stage;
set = pmap->pm_asid_set;
KASSERT(set != NULL, ("%s: NULL asid set", __func__));
mtx_assert(&set->asid_set_mutex, MA_OWNED);
/*
* Ensure that the store to asid_epoch is globally visible before the
* loads from pc_curpmap are performed.
*/
epoch = set->asid_epoch + 1;
if (epoch == INT_MAX)
epoch = 0;
set->asid_epoch = epoch;
dsb(ishst);
if (stage == PM_STAGE1) {
__asm __volatile("tlbi vmalle1is");
} else {
KASSERT(pmap_clean_stage2_tlbi != NULL,
("%s: Unset stage 2 tlb invalidation callback\n",
__func__));
pmap_clean_stage2_tlbi();
}
dsb(ish);
bit_nclear(set->asid_set, ASID_FIRST_AVAILABLE,
set->asid_set_size - 1);
CPU_FOREACH(cpuid) {
if (cpuid == curcpu)
continue;
if (stage == PM_STAGE1) {
curpmap = pcpu_find(cpuid)->pc_curpmap;
PMAP_ASSERT_STAGE1(pmap);
} else {
curpmap = pcpu_find(cpuid)->pc_curvmpmap;
if (curpmap == NULL)
continue;
PMAP_ASSERT_STAGE2(pmap);
}
KASSERT(curpmap->pm_asid_set == set, ("Incorrect set"));
asid = COOKIE_TO_ASID(curpmap->pm_cookie);
if (asid == -1)
continue;
bit_set(set->asid_set, asid);
curpmap->pm_cookie = COOKIE_FROM(asid, epoch);
}
}
/*
* Allocate a new ASID for the specified pmap.
*/
static void
pmap_alloc_asid(pmap_t pmap)
{
struct asid_set *set;
int new_asid;
set = pmap->pm_asid_set;
KASSERT(set != NULL, ("%s: NULL asid set", __func__));
mtx_lock_spin(&set->asid_set_mutex);
/*
* While this processor was waiting to acquire the asid set mutex,
* pmap_reset_asid_set() running on another processor might have
* updated this pmap's cookie to the current epoch. In which case, we
* don't need to allocate a new ASID.
*/
if (COOKIE_TO_EPOCH(pmap->pm_cookie) == set->asid_epoch)
goto out;
bit_ffc_at(set->asid_set, set->asid_next, set->asid_set_size,
&new_asid);
if (new_asid == -1) {
bit_ffc_at(set->asid_set, ASID_FIRST_AVAILABLE,
set->asid_next, &new_asid);
if (new_asid == -1) {
pmap_reset_asid_set(pmap);
bit_ffc_at(set->asid_set, ASID_FIRST_AVAILABLE,
set->asid_set_size, &new_asid);
KASSERT(new_asid != -1, ("ASID allocation failure"));
}
}
bit_set(set->asid_set, new_asid);
set->asid_next = new_asid + 1;
pmap->pm_cookie = COOKIE_FROM(new_asid, set->asid_epoch);
out:
mtx_unlock_spin(&set->asid_set_mutex);
}
static uint64_t __read_mostly ttbr_flags;
/*
* Compute the value that should be stored in ttbr0 to activate the specified
* pmap. This value may change from time to time.
*/
uint64_t
pmap_to_ttbr0(pmap_t pmap)
{
uint64_t ttbr;
ttbr = pmap->pm_ttbr;
ttbr |= ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
ttbr |= ttbr_flags;
return (ttbr);
}
static void
pmap_set_cnp(void *arg)
{
uint64_t ttbr0, ttbr1;
u_int cpuid;
cpuid = *(u_int *)arg;
if (cpuid == curcpu) {
/*
* Set the flags while all CPUs are handling the
* smp_rendezvous so will not call pmap_to_ttbr0. Any calls
* to pmap_to_ttbr0 after this will have the CnP flag set.
* The dsb after invalidating the TLB will act as a barrier
* to ensure all CPUs can observe this change.
*/
ttbr_flags |= TTBR_CnP;
}
ttbr0 = READ_SPECIALREG(ttbr0_el1);
ttbr0 |= TTBR_CnP;
ttbr1 = READ_SPECIALREG(ttbr1_el1);
ttbr1 |= TTBR_CnP;
/* Update ttbr{0,1}_el1 with the CnP flag */
WRITE_SPECIALREG(ttbr0_el1, ttbr0);
WRITE_SPECIALREG(ttbr1_el1, ttbr1);
isb();
__asm __volatile("tlbi vmalle1is");
dsb(ish);
isb();
}
/*
* Defer enabling some features until we have read the ID registers to know
* if they are supported on all CPUs.
*/
static void
pmap_init_mp(void *dummy __unused)
{
uint64_t reg;
if (get_kernel_reg(ID_AA64PFR1_EL1, ®)) {
if (ID_AA64PFR1_BT_VAL(reg) != ID_AA64PFR1_BT_NONE) {
if (bootverbose)
printf("Enabling BTI\n");
pmap_bti_support = true;
pmap_bti_ranges_zone = uma_zcreate("BTI ranges",
sizeof(struct rs_el), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
}
}
}
SYSINIT(pmap_init_mp, SI_SUB_CPU, SI_ORDER_ANY, pmap_init_mp, NULL);
/*
* Defer enabling CnP until we have read the ID registers to know if it's
* supported on all CPUs.
*/
static void
pmap_init_cnp(void *dummy __unused)
{
uint64_t reg;
u_int cpuid;
if (!get_kernel_reg(ID_AA64MMFR2_EL1, ®))
return;
if (ID_AA64MMFR2_CnP_VAL(reg) != ID_AA64MMFR2_CnP_NONE) {
if (bootverbose)
printf("Enabling CnP\n");
cpuid = curcpu;
smp_rendezvous(NULL, pmap_set_cnp, NULL, &cpuid);
}
}
SYSINIT(pmap_init_cnp, SI_SUB_SMP, SI_ORDER_ANY, pmap_init_cnp, NULL);
static bool
pmap_activate_int(struct thread *td, pmap_t pmap)
{
struct asid_set *set;
int epoch;
KASSERT(PCPU_GET(curpmap) != NULL, ("no active pmap"));
KASSERT(pmap != kernel_pmap, ("kernel pmap activation"));
if ((pmap->pm_stage == PM_STAGE1 && pmap == PCPU_GET(curpmap)) ||
(pmap->pm_stage == PM_STAGE2 && pmap == PCPU_GET(curvmpmap))) {
/*
* Handle the possibility that the old thread was preempted
* after an "ic" or "tlbi" instruction but before it performed
* a "dsb" instruction. If the old thread migrates to a new
* processor, its completion of a "dsb" instruction on that
* new processor does not guarantee that the "ic" or "tlbi"
* instructions performed on the old processor have completed.
*/
dsb(ish);
return (false);
}
set = pmap->pm_asid_set;
KASSERT(set != NULL, ("%s: NULL asid set", __func__));
/*
* Ensure that the store to curpmap is globally visible before the
* load from asid_epoch is performed.
*/
if (pmap->pm_stage == PM_STAGE1)
PCPU_SET(curpmap, pmap);
else
PCPU_SET(curvmpmap, pmap);
dsb(ish);
epoch = COOKIE_TO_EPOCH(pmap->pm_cookie);
if (epoch >= 0 && epoch != set->asid_epoch)
pmap_alloc_asid(pmap);
if (pmap->pm_stage == PM_STAGE1) {
uint64_t new_tcr, tcr;
new_tcr = td->td_proc->p_md.md_tcr;
tcr = READ_SPECIALREG(tcr_el1);
if ((tcr & MD_TCR_FIELDS) != new_tcr) {
tcr &= ~MD_TCR_FIELDS;
tcr |= new_tcr;
WRITE_SPECIALREG(tcr_el1, tcr);
}
set_ttbr0(pmap_to_ttbr0(pmap));
if (PCPU_GET(bcast_tlbi_workaround) != 0)
invalidate_local_icache();
}
return (true);
}
void
pmap_activate_vm(pmap_t pmap)
{
PMAP_ASSERT_STAGE2(pmap);
(void)pmap_activate_int(NULL, pmap);
}
void
pmap_activate(struct thread *td)
{
pmap_t pmap;
pmap = vmspace_pmap(td->td_proc->p_vmspace);
PMAP_ASSERT_STAGE1(pmap);
critical_enter();
(void)pmap_activate_int(td, pmap);
critical_exit();
}
/*
* Activate the thread we are switching to.
* To simplify the assembly in cpu_throw return the new threads pcb.
*/
struct pcb *
pmap_switch(struct thread *new)
{
pcpu_bp_harden bp_harden;
struct pcb *pcb;
/* Store the new curthread */
PCPU_SET(curthread, new);
/* And the new pcb */
pcb = new->td_pcb;
PCPU_SET(curpcb, pcb);
/*
* TODO: We may need to flush the cache here if switching
* to a user process.
*/
if (pmap_activate_int(new, vmspace_pmap(new->td_proc->p_vmspace))) {
/*
* Stop userspace from training the branch predictor against
* other processes. This will call into a CPU specific
* function that clears the branch predictor state.
*/
bp_harden = PCPU_GET(bp_harden);
if (bp_harden != NULL)
bp_harden();
}
return (pcb);
}
void
pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz)
{
PMAP_ASSERT_STAGE1(pmap);
KASSERT(ADDR_IS_CANONICAL(va),
("%s: Address not in canonical form: %lx", __func__, va));
if (ADDR_IS_KERNEL(va)) {
cpu_icache_sync_range((void *)va, sz);
} else {
u_int len, offset;
vm_paddr_t pa;
/* Find the length of data in this page to flush */
offset = va & PAGE_MASK;
len = imin(PAGE_SIZE - offset, sz);
while (sz != 0) {
/* Extract the physical address & find it in the DMAP */
pa = pmap_extract(pmap, va);
if (pa != 0)
cpu_icache_sync_range((void *)PHYS_TO_DMAP(pa),
len);
/* Move to the next page */
sz -= len;
va += len;
/* Set the length for the next iteration */
len = imin(PAGE_SIZE, sz);
}
}
}
static int
pmap_stage2_fault(pmap_t pmap, uint64_t esr, uint64_t far)
{
pd_entry_t *pdep;
pt_entry_t *ptep, pte;
int rv, lvl, dfsc;
PMAP_ASSERT_STAGE2(pmap);
rv = KERN_FAILURE;
/* Data and insn aborts use same encoding for FSC field. */
dfsc = esr & ISS_DATA_DFSC_MASK;
switch (dfsc) {
case ISS_DATA_DFSC_TF_L0:
case ISS_DATA_DFSC_TF_L1:
case ISS_DATA_DFSC_TF_L2:
case ISS_DATA_DFSC_TF_L3:
PMAP_LOCK(pmap);
pdep = pmap_pde(pmap, far, &lvl);
if (pdep == NULL || lvl != (dfsc - ISS_DATA_DFSC_TF_L1)) {
PMAP_UNLOCK(pmap);
break;
}
switch (lvl) {
case 0:
ptep = pmap_l0_to_l1(pdep, far);
break;
case 1:
ptep = pmap_l1_to_l2(pdep, far);
break;
case 2:
ptep = pmap_l2_to_l3(pdep, far);
break;
default:
panic("%s: Invalid pde level %d", __func__,lvl);
}
goto fault_exec;
case ISS_DATA_DFSC_AFF_L1:
case ISS_DATA_DFSC_AFF_L2:
case ISS_DATA_DFSC_AFF_L3:
PMAP_LOCK(pmap);
ptep = pmap_pte(pmap, far, &lvl);
fault_exec:
if (ptep != NULL && (pte = pmap_load(ptep)) != 0) {
/*
* If accessing an executable page invalidate
* the I-cache so it will be valid when we
* continue execution in the guest. The D-cache
* is assumed to already be clean to the Point
* of Coherency.
*/
if ((pte & ATTR_S2_XN_MASK) !=
ATTR_S2_XN(ATTR_S2_XN_NONE)) {
invalidate_icache();
}
pmap_set_bits(ptep, ATTR_AF | ATTR_DESCR_VALID);
rv = KERN_SUCCESS;
}
PMAP_UNLOCK(pmap);
break;
}
return (rv);
}
int
pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far)
{
pt_entry_t pte, *ptep;
register_t intr;
uint64_t ec, par;
int lvl, rv;
rv = KERN_FAILURE;
ec = ESR_ELx_EXCEPTION(esr);
switch (ec) {
case EXCP_INSN_ABORT_L:
case EXCP_INSN_ABORT:
case EXCP_DATA_ABORT_L:
case EXCP_DATA_ABORT:
break;
default:
return (rv);
}
if (pmap->pm_stage == PM_STAGE2)
return (pmap_stage2_fault(pmap, esr, far));
/* Data and insn aborts use same encoding for FSC field. */
switch (esr & ISS_DATA_DFSC_MASK) {
case ISS_DATA_DFSC_AFF_L1:
case ISS_DATA_DFSC_AFF_L2:
case ISS_DATA_DFSC_AFF_L3:
PMAP_LOCK(pmap);
ptep = pmap_pte(pmap, far, &lvl);
if (ptep != NULL) {
pmap_set_bits(ptep, ATTR_AF);
rv = KERN_SUCCESS;
/*
* XXXMJ as an optimization we could mark the entry
* dirty if this is a write fault.
*/
}
PMAP_UNLOCK(pmap);
break;
case ISS_DATA_DFSC_PF_L1:
case ISS_DATA_DFSC_PF_L2:
case ISS_DATA_DFSC_PF_L3:
if ((ec != EXCP_DATA_ABORT_L && ec != EXCP_DATA_ABORT) ||
(esr & ISS_DATA_WnR) == 0)
return (rv);
PMAP_LOCK(pmap);
ptep = pmap_pte(pmap, far, &lvl);
if (ptep != NULL &&
((pte = pmap_load(ptep)) & ATTR_SW_DBM) != 0) {
if ((pte & ATTR_S1_AP_RW_BIT) ==
ATTR_S1_AP(ATTR_S1_AP_RO)) {
pmap_clear_bits(ptep, ATTR_S1_AP_RW_BIT);
pmap_s1_invalidate_page(pmap, far, true);
}
rv = KERN_SUCCESS;
}
PMAP_UNLOCK(pmap);
break;
case ISS_DATA_DFSC_TF_L0:
case ISS_DATA_DFSC_TF_L1:
case ISS_DATA_DFSC_TF_L2:
case ISS_DATA_DFSC_TF_L3:
/*
* Retry the translation. A break-before-make sequence can
* produce a transient fault.
*/
if (pmap == kernel_pmap) {
/*
* The translation fault may have occurred within a
* critical section. Therefore, we must check the
* address without acquiring the kernel pmap's lock.
*/
if (pmap_klookup(far, NULL))
rv = KERN_SUCCESS;
} else {
bool owned;
/*
* In the EFIRT driver we lock the pmap before
* calling into the runtime service. As the lock
* is already owned by the current thread skip
* locking it again.
*/
owned = PMAP_OWNED(pmap);
if (!owned)
PMAP_LOCK(pmap);
/* Ask the MMU to check the address. */
intr = intr_disable();
par = arm64_address_translate_s1e0r(far);
intr_restore(intr);
if (!owned)
PMAP_UNLOCK(pmap);
/*
* If the translation was successful, then we can
* return success to the trap handler.
*/
if (PAR_SUCCESS(par))
rv = KERN_SUCCESS;
}
break;
}
return (rv);
}
/*
* Increase the starting virtual address of the given mapping if a
* different alignment might result in more superpage mappings.
*/
void
pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
vm_offset_t *addr, vm_size_t size)
{
vm_offset_t superpage_offset;
if (size < L3C_SIZE)
return;
if (object != NULL && (object->flags & OBJ_COLORED) != 0)
offset += ptoa(object->pg_color);
/*
* Considering the object's physical alignment, is the mapping large
* enough to encompass an L2 (2MB/32MB) superpage ...
*/
superpage_offset = offset & L2_OFFSET;
if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) >= L2_SIZE) {
/*
* If the virtual and physical alignments differ, then
* increase the virtual address so that the alignments match.
*/
if ((*addr & L2_OFFSET) < superpage_offset)
*addr = (*addr & ~L2_OFFSET) + superpage_offset;
else if ((*addr & L2_OFFSET) > superpage_offset)
*addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) +
superpage_offset;
return;
}
/* ... or an L3C (64KB/2MB) superpage? */
superpage_offset = offset & L3C_OFFSET;
if (size - ((L3C_SIZE - superpage_offset) & L3C_OFFSET) >= L3C_SIZE) {
if ((*addr & L3C_OFFSET) < superpage_offset)
*addr = (*addr & ~L3C_OFFSET) + superpage_offset;
else if ((*addr & L3C_OFFSET) > superpage_offset)
*addr = ((*addr + L3C_OFFSET) & ~L3C_OFFSET) +
superpage_offset;
}
}
/**
* Get the kernel virtual address of a set of physical pages. If there are
* physical addresses not covered by the DMAP perform a transient mapping
* that will be removed when calling pmap_unmap_io_transient.
*
* \param page The pages the caller wishes to obtain the virtual
* address on the kernel memory map.
* \param vaddr On return contains the kernel virtual memory address
* of the pages passed in the page parameter.
* \param count Number of pages passed in.
* \param can_fault true if the thread using the mapped pages can take
* page faults, false otherwise.
*
* \returns true if the caller must call pmap_unmap_io_transient when
* finished or false otherwise.
*
*/
bool
pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
bool can_fault)
{
vm_paddr_t paddr;
bool needs_mapping;
int error __diagused, i;
/*
* Allocate any KVA space that we need, this is done in a separate
* loop to prevent calling vmem_alloc while pinned.
*/
needs_mapping = false;
for (i = 0; i < count; i++) {
paddr = VM_PAGE_TO_PHYS(page[i]);
if (__predict_false(!PHYS_IN_DMAP(paddr))) {
error = vmem_alloc(kernel_arena, PAGE_SIZE,
M_BESTFIT | M_WAITOK, &vaddr[i]);
KASSERT(error == 0, ("vmem_alloc failed: %d", error));
needs_mapping = true;
} else {
vaddr[i] = PHYS_TO_DMAP(paddr);
}
}
/* Exit early if everything is covered by the DMAP */
if (!needs_mapping)
return (false);
if (!can_fault)
sched_pin();
for (i = 0; i < count; i++) {
paddr = VM_PAGE_TO_PHYS(page[i]);
if (!PHYS_IN_DMAP(paddr)) {
panic(
"pmap_map_io_transient: TODO: Map out of DMAP data");
}
}
return (needs_mapping);
}
void
pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
bool can_fault)
{
vm_paddr_t paddr;
int i;
if (!can_fault)
sched_unpin();
for (i = 0; i < count; i++) {
paddr = VM_PAGE_TO_PHYS(page[i]);
if (!PHYS_IN_DMAP(paddr)) {
panic("ARM64TODO: pmap_unmap_io_transient: Unmap data");
}
}
}
bool
pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode)
{
return (mode >= VM_MEMATTR_DEVICE && mode <= VM_MEMATTR_WRITE_THROUGH);
}
static void *
bti_dup_range(void *ctx __unused, void *data)
{
struct rs_el *node, *new_node;
new_node = uma_zalloc(pmap_bti_ranges_zone, M_NOWAIT);
if (new_node == NULL)
return (NULL);
node = data;
memcpy(new_node, node, sizeof(*node));
return (new_node);
}
static void
bti_free_range(void *ctx __unused, void *node)
{
uma_zfree(pmap_bti_ranges_zone, node);
}
static int
pmap_bti_assign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
struct rs_el *rs;
int error;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PMAP_ASSERT_STAGE1(pmap);
MPASS(pmap->pm_bti != NULL);
rs = uma_zalloc(pmap_bti_ranges_zone, M_NOWAIT);
if (rs == NULL)
return (ENOMEM);
error = rangeset_insert(pmap->pm_bti, sva, eva, rs);
if (error != 0)
uma_zfree(pmap_bti_ranges_zone, rs);
return (error);
}
static void
pmap_bti_deassign_all(pmap_t pmap)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if (pmap->pm_bti != NULL)
rangeset_remove_all(pmap->pm_bti);
}
/*
* Returns true if the BTI setting is the same across the specified address
* range, and false otherwise. When returning true, updates the referenced PTE
* to reflect the BTI setting.
*
* Only stage 1 pmaps support BTI. The kernel pmap is always a stage 1 pmap
* that has the same BTI setting implicitly across its entire address range.
*/
static bool
pmap_bti_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, pt_entry_t *pte)
{
struct rs_el *rs;
vm_offset_t va;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(ADDR_IS_CANONICAL(sva),
("%s: Start address not in canonical form: %lx", __func__, sva));
KASSERT(ADDR_IS_CANONICAL(eva),
("%s: End address not in canonical form: %lx", __func__, eva));
KASSERT((*pte & ATTR_S1_GP) == 0,
("%s: pte %lx has ATTR_S1_GP preset", __func__, *pte));
if (pmap == kernel_pmap) {
*pte |= ATTR_KERN_GP;
return (true);
}
if (pmap->pm_bti == NULL)
return (true);
PMAP_ASSERT_STAGE1(pmap);
rs = rangeset_containing(pmap->pm_bti, sva);
if (rs == NULL)
return (rangeset_empty(pmap->pm_bti, sva, eva));
while ((va = rs->re_end) < eva) {
if ((rs = rangeset_beginning(pmap->pm_bti, va)) == NULL)
return (false);
}
*pte |= ATTR_S1_GP;
return (true);
}
static pt_entry_t
pmap_pte_bti(pmap_t pmap, vm_offset_t va)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
MPASS(ADDR_IS_CANONICAL(va));
if (pmap->pm_stage != PM_STAGE1)
return (0);
if (pmap == kernel_pmap)
return (ATTR_KERN_GP);
if (pmap->pm_bti != NULL &&
rangeset_containing(pmap->pm_bti, va) != NULL)
return (ATTR_S1_GP);
return (0);
}
static void
pmap_bti_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if (pmap->pm_bti != NULL)
rangeset_remove(pmap->pm_bti, sva, eva);
}
static int
pmap_bti_copy(pmap_t dst_pmap, pmap_t src_pmap)
{
PMAP_LOCK_ASSERT(dst_pmap, MA_OWNED);
PMAP_LOCK_ASSERT(src_pmap, MA_OWNED);
MPASS(src_pmap->pm_stage == dst_pmap->pm_stage);
MPASS(src_pmap->pm_bti != NULL);
MPASS(dst_pmap->pm_bti != NULL);
if (src_pmap->pm_bti->rs_data_ctx == NULL)
return (0);
return (rangeset_copy(dst_pmap->pm_bti, src_pmap->pm_bti));
}
static void
pmap_bti_update_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, bool set)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PMAP_ASSERT_STAGE1(pmap);
pmap_mask_set_locked(pmap, sva, eva, ATTR_S1_GP, set ? ATTR_S1_GP : 0,
true);
}
int
pmap_bti_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
int error;
if (pmap->pm_bti == NULL)
return (0);
if (!ADDR_IS_CANONICAL(sva) || !ADDR_IS_CANONICAL(eva))
return (EINVAL);
if (pmap->pm_stage != PM_STAGE1)
return (EINVAL);
if (eva <= sva || ADDR_IS_KERNEL(eva))
return (EFAULT);
sva = trunc_page(sva);
eva = round_page(eva);
for (;;) {
PMAP_LOCK(pmap);
error = pmap_bti_assign(pmap, sva, eva);
if (error == 0)
pmap_bti_update_range(pmap, sva, eva, true);
PMAP_UNLOCK(pmap);
if (error != ENOMEM)
break;
vm_wait(NULL);
}
return (error);
}
#if defined(KASAN) || defined(KMSAN)
static pd_entry_t *pmap_san_early_l2;
#define SAN_BOOTSTRAP_L2_SIZE (1 * L2_SIZE)
#define SAN_BOOTSTRAP_SIZE (2 * PAGE_SIZE)
static vm_offset_t __nosanitizeaddress
pmap_san_enter_bootstrap_alloc_l2(void)
{
static uint8_t bootstrap_data[SAN_BOOTSTRAP_L2_SIZE] __aligned(L2_SIZE);
static size_t offset = 0;
vm_offset_t addr;
if (offset + L2_SIZE > sizeof(bootstrap_data)) {
panic("%s: out of memory for the bootstrap shadow map L2 entries",
__func__);
}
addr = (uintptr_t)&bootstrap_data[offset];
offset += L2_SIZE;
return (addr);
}
/*
* SAN L1 + L2 pages, maybe L3 entries later?
*/
static vm_offset_t __nosanitizeaddress
pmap_san_enter_bootstrap_alloc_pages(int npages)
{
static uint8_t bootstrap_data[SAN_BOOTSTRAP_SIZE] __aligned(PAGE_SIZE);
static size_t offset = 0;
vm_offset_t addr;
if (offset + (npages * PAGE_SIZE) > sizeof(bootstrap_data)) {
panic("%s: out of memory for the bootstrap shadow map",
__func__);
}
addr = (uintptr_t)&bootstrap_data[offset];
offset += (npages * PAGE_SIZE);
return (addr);
}
static void __nosanitizeaddress
pmap_san_enter_bootstrap(void)
{
vm_offset_t freemempos;
/* L1, L2 */
freemempos = pmap_san_enter_bootstrap_alloc_pages(2);
bs_state.freemempos = freemempos;
bs_state.va = KASAN_MIN_ADDRESS;
pmap_bootstrap_l1_table(&bs_state);
pmap_san_early_l2 = bs_state.l2;
}
static vm_page_t
pmap_san_enter_alloc_l3(void)
{
vm_page_t m;
m = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED |
VM_ALLOC_ZERO);
if (m == NULL)
panic("%s: no memory to grow shadow map", __func__);
return (m);
}
static vm_page_t
pmap_san_enter_alloc_l2(void)
{
return (vm_page_alloc_noobj_contig(VM_ALLOC_WIRED | VM_ALLOC_ZERO,
Ln_ENTRIES, 0, ~0ul, L2_SIZE, 0, VM_MEMATTR_DEFAULT));
}
void __nosanitizeaddress __nosanitizememory
pmap_san_enter(vm_offset_t va)
{
pd_entry_t *l1, *l2;
pt_entry_t *l3;
vm_page_t m;
if (virtual_avail == 0) {
vm_offset_t block;
int slot;
bool first;
/* Temporary shadow map prior to pmap_bootstrap(). */
first = pmap_san_early_l2 == NULL;
if (first)
pmap_san_enter_bootstrap();
l2 = pmap_san_early_l2;
slot = pmap_l2_index(va);
if ((pmap_load(&l2[slot]) & ATTR_DESCR_VALID) == 0) {
MPASS(first);
block = pmap_san_enter_bootstrap_alloc_l2();
pmap_store(&l2[slot],
PHYS_TO_PTE(pmap_early_vtophys(block)) |
PMAP_SAN_PTE_BITS | L2_BLOCK);
dmb(ishst);
}
return;
}
mtx_assert(&kernel_map->system_mtx, MA_OWNED);
l1 = pmap_l1(kernel_pmap, va);
MPASS(l1 != NULL);
if ((pmap_load(l1) & ATTR_DESCR_VALID) == 0) {
m = pmap_san_enter_alloc_l3();
pmap_store(l1, VM_PAGE_TO_PTE(m) | L1_TABLE);
}
l2 = pmap_l1_to_l2(l1, va);
if ((pmap_load(l2) & ATTR_DESCR_VALID) == 0) {
m = pmap_san_enter_alloc_l2();
if (m != NULL) {
pmap_store(l2, VM_PAGE_TO_PTE(m) |
PMAP_SAN_PTE_BITS | L2_BLOCK);
} else {
m = pmap_san_enter_alloc_l3();
pmap_store(l2, VM_PAGE_TO_PTE(m) | L2_TABLE);
}
dmb(ishst);
}
if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK)
return;
l3 = pmap_l2_to_l3(l2, va);
if ((pmap_load(l3) & ATTR_DESCR_VALID) != 0)
return;
m = pmap_san_enter_alloc_l3();
pmap_store(l3, VM_PAGE_TO_PTE(m) | PMAP_SAN_PTE_BITS | L3_PAGE);
dmb(ishst);
}
#endif /* KASAN || KMSAN */
/*
* Track a range of the kernel's virtual address space that is contiguous
* in various mapping attributes.
*/
struct pmap_kernel_map_range {
vm_offset_t sva;
pt_entry_t attrs;
int l3pages;
int l3contig;
int l2blocks;
int l2contig;
int l1blocks;
};
static void
sysctl_kmaps_dump(struct sbuf *sb, struct pmap_kernel_map_range *range,
vm_offset_t eva)
{
const char *mode;
int index;
if (eva <= range->sva)
return;
index = range->attrs & ATTR_S1_IDX_MASK;
switch (index) {
case ATTR_S1_IDX(VM_MEMATTR_DEVICE_NP):
mode = "DEV-NP";
break;
case ATTR_S1_IDX(VM_MEMATTR_DEVICE):
mode = "DEV";
break;
case ATTR_S1_IDX(VM_MEMATTR_UNCACHEABLE):
mode = "UC";
break;
case ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK):
mode = "WB";
break;
case ATTR_S1_IDX(VM_MEMATTR_WRITE_THROUGH):
mode = "WT";
break;
default:
printf(
"%s: unknown memory type %x for range 0x%016lx-0x%016lx\n",
__func__, index, range->sva, eva);
mode = "??";
break;
}
sbuf_printf(sb, "0x%016lx-0x%016lx r%c%c%c%c%c %6s %d %d %d %d %d\n",
range->sva, eva,
(range->attrs & ATTR_S1_AP_RW_BIT) == ATTR_S1_AP_RW ? 'w' : '-',
(range->attrs & ATTR_S1_PXN) != 0 ? '-' : 'x',
(range->attrs & ATTR_S1_UXN) != 0 ? '-' : 'X',
(range->attrs & ATTR_S1_AP(ATTR_S1_AP_USER)) != 0 ? 'u' : 's',
(range->attrs & ATTR_S1_GP) != 0 ? 'g' : '-',
mode, range->l1blocks, range->l2contig, range->l2blocks,
range->l3contig, range->l3pages);
/* Reset to sentinel value. */
range->sva = 0xfffffffffffffffful;
}
/*
* Determine whether the attributes specified by a page table entry match those
* being tracked by the current range.
*/
static bool
sysctl_kmaps_match(struct pmap_kernel_map_range *range, pt_entry_t attrs)
{
return (range->attrs == attrs);
}
static void
sysctl_kmaps_reinit(struct pmap_kernel_map_range *range, vm_offset_t va,
pt_entry_t attrs)
{
memset(range, 0, sizeof(*range));
range->sva = va;
range->attrs = attrs;
}
/* Get the block/page attributes that correspond to the table attributes */
static pt_entry_t
sysctl_kmaps_table_attrs(pd_entry_t table)
{
pt_entry_t attrs;
attrs = 0;
if ((table & TATTR_UXN_TABLE) != 0)
attrs |= ATTR_S1_UXN;
if ((table & TATTR_PXN_TABLE) != 0)
attrs |= ATTR_S1_PXN;
if ((table & TATTR_AP_TABLE_RO) != 0)
attrs |= ATTR_S1_AP(ATTR_S1_AP_RO);
return (attrs);
}
/* Read the block/page attributes we care about */
static pt_entry_t
sysctl_kmaps_block_attrs(pt_entry_t block)
{
return (block & (ATTR_S1_AP_MASK | ATTR_S1_XN | ATTR_S1_IDX_MASK |
ATTR_S1_GP));
}
/*
* Given a leaf PTE, derive the mapping's attributes. If they do not match
* those of the current run, dump the address range and its attributes, and
* begin a new run.
*/
static void
sysctl_kmaps_check(struct sbuf *sb, struct pmap_kernel_map_range *range,
vm_offset_t va, pd_entry_t l0e, pd_entry_t l1e, pd_entry_t l2e,
pt_entry_t l3e)
{
pt_entry_t attrs;
attrs = sysctl_kmaps_table_attrs(l0e);
if ((l1e & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_BLOCK) {
attrs |= sysctl_kmaps_block_attrs(l1e);
goto done;
}
attrs |= sysctl_kmaps_table_attrs(l1e);
if ((l2e & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_BLOCK) {
attrs |= sysctl_kmaps_block_attrs(l2e);
goto done;
}
attrs |= sysctl_kmaps_table_attrs(l2e);
attrs |= sysctl_kmaps_block_attrs(l3e);
done:
if (range->sva > va || !sysctl_kmaps_match(range, attrs)) {
sysctl_kmaps_dump(sb, range, va);
sysctl_kmaps_reinit(range, va, attrs);
}
}
static int
sysctl_kmaps(SYSCTL_HANDLER_ARGS)
{
struct pmap_kernel_map_range range;
struct sbuf sbuf, *sb;
pd_entry_t l0e, *l1, l1e, *l2, l2e;
pt_entry_t *l3, l3e;
vm_offset_t sva;
vm_paddr_t pa;
int error, i, j, k, l;
error = sysctl_wire_old_buffer(req, 0);
if (error != 0)
return (error);
sb = &sbuf;
sbuf_new_for_sysctl(sb, NULL, PAGE_SIZE, req);
/* Sentinel value. */
range.sva = 0xfffffffffffffffful;
/*
* Iterate over the kernel page tables without holding the kernel pmap
* lock. Kernel page table pages are never freed, so at worst we will
* observe inconsistencies in the output.
*/
for (sva = 0xffff000000000000ul, i = pmap_l0_index(sva); i < Ln_ENTRIES;
i++) {
if (i == pmap_l0_index(DMAP_MIN_ADDRESS))
sbuf_printf(sb, "\nDirect map:\n");
else if (i == pmap_l0_index(VM_MIN_KERNEL_ADDRESS))
sbuf_printf(sb, "\nKernel map:\n");
#ifdef KASAN
else if (i == pmap_l0_index(KASAN_MIN_ADDRESS))
sbuf_printf(sb, "\nKASAN shadow map:\n");
#endif
#ifdef KMSAN
else if (i == pmap_l0_index(KMSAN_SHAD_MIN_ADDRESS))
sbuf_printf(sb, "\nKMSAN shadow map:\n");
else if (i == pmap_l0_index(KMSAN_ORIG_MIN_ADDRESS))
sbuf_printf(sb, "\nKMSAN origin map:\n");
#endif
l0e = kernel_pmap->pm_l0[i];
if ((l0e & ATTR_DESCR_VALID) == 0) {
sysctl_kmaps_dump(sb, &range, sva);
sva += L0_SIZE;
continue;
}
pa = PTE_TO_PHYS(l0e);
l1 = (pd_entry_t *)PHYS_TO_DMAP(pa);
for (j = pmap_l1_index(sva); j < Ln_ENTRIES; j++) {
l1e = l1[j];
if ((l1e & ATTR_DESCR_VALID) == 0) {
sysctl_kmaps_dump(sb, &range, sva);
sva += L1_SIZE;
continue;
}
if ((l1e & ATTR_DESCR_MASK) == L1_BLOCK) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
sysctl_kmaps_check(sb, &range, sva, l0e, l1e,
0, 0);
range.l1blocks++;
sva += L1_SIZE;
continue;
}
pa = PTE_TO_PHYS(l1e);
l2 = (pd_entry_t *)PHYS_TO_DMAP(pa);
for (k = pmap_l2_index(sva); k < Ln_ENTRIES; k++) {
l2e = l2[k];
if ((l2e & ATTR_DESCR_VALID) == 0) {
sysctl_kmaps_dump(sb, &range, sva);
sva += L2_SIZE;
continue;
}
if ((l2e & ATTR_DESCR_MASK) == L2_BLOCK) {
sysctl_kmaps_check(sb, &range, sva,
l0e, l1e, l2e, 0);
if ((l2e & ATTR_CONTIGUOUS) != 0)
range.l2contig +=
k % L2C_ENTRIES == 0 ?
1 : 0;
else
range.l2blocks++;
sva += L2_SIZE;
continue;
}
pa = PTE_TO_PHYS(l2e);
l3 = (pt_entry_t *)PHYS_TO_DMAP(pa);
for (l = pmap_l3_index(sva); l < Ln_ENTRIES;
l++, sva += L3_SIZE) {
l3e = l3[l];
if ((l3e & ATTR_DESCR_VALID) == 0) {
sysctl_kmaps_dump(sb, &range,
sva);
continue;
}
sysctl_kmaps_check(sb, &range, sva,
l0e, l1e, l2e, l3e);
if ((l3e & ATTR_CONTIGUOUS) != 0)
range.l3contig +=
l % L3C_ENTRIES == 0 ?
1 : 0;
else
range.l3pages++;
}
}
}
}
error = sbuf_finish(sb);
sbuf_delete(sb);
return (error);
}
SYSCTL_OID(_vm_pmap, OID_AUTO, kernel_maps,
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
NULL, 0, sysctl_kmaps, "A",
"Dump kernel address layout");
diff --git a/sys/arm64/arm64/ptrauth.c b/sys/arm64/arm64/ptrauth.c
index 7f453dfa278d..fdab5414e24c 100644
--- a/sys/arm64/arm64/ptrauth.c
+++ b/sys/arm64/arm64/ptrauth.c
@@ -1,335 +1,335 @@
/*-
* Copyright (c) 2021 The FreeBSD Foundation
*
* This software was developed by Andrew Turner under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* This manages pointer authentication. As it needs to enable the use of
* pointer authentication and change the keys we must built this with
* pointer authentication disabled.
*/
#ifdef __ARM_FEATURE_PAC_DEFAULT
#error Must be built with pointer authentication disabled
#endif
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define SCTLR_PTRAUTH (SCTLR_EnIA | SCTLR_EnIB | SCTLR_EnDA | SCTLR_EnDB)
static bool __read_mostly enable_ptrauth = false;
/* Functions called from assembly. */
void ptrauth_start(void);
struct thread *ptrauth_switch(struct thread *);
void ptrauth_exit_el0(struct thread *);
void ptrauth_enter_el0(struct thread *);
static bool
ptrauth_disable(void)
{
const char *family, *maker, *product;
family = kern_getenv("smbios.system.family");
maker = kern_getenv("smbios.system.maker");
product = kern_getenv("smbios.system.product");
if (family == NULL || maker == NULL || product == NULL)
return (false);
/*
* The Dev Kit appears to be configured to trap upon access to PAC
* registers, but the kernel boots at EL1 and so we have no way to
* inspect or change this configuration. As a workaround, simply
* disable PAC on this platform.
*/
if (strcmp(maker, "Microsoft Corporation") == 0 &&
strcmp(family, "Surface") == 0 &&
strcmp(product, "Windows Dev Kit 2023") == 0)
return (true);
return (false);
}
-static bool
+static cpu_feat_en
ptrauth_check(const struct cpu_feat *feat __unused, u_int midr __unused)
{
uint64_t isar;
int pac_enable;
/*
* Allow the sysadmin to disable pointer authentication globally,
* e.g. on broken hardware.
*/
pac_enable = 1;
TUNABLE_INT_FETCH("hw.pac.enable", &pac_enable);
if (!pac_enable) {
if (boothowto & RB_VERBOSE)
printf("Pointer authentication is disabled\n");
goto out;
}
if (ptrauth_disable())
goto out;
/*
* This assumes if there is pointer authentication on the boot CPU
* it will also be available on any non-boot CPUs. If this is ever
* not the case we will have to add a quirk.
*/
/*
* The QARMA5 or implementation defined algorithms are reported in
* ID_AA64ISAR1_EL1.
*/
if (get_kernel_reg(ID_AA64ISAR1_EL1, &isar)) {
if (ID_AA64ISAR1_APA_VAL(isar) > 0 ||
ID_AA64ISAR1_API_VAL(isar) > 0) {
- return (true);
+ return (FEAT_DEFAULT_ENABLE);
}
}
/* The QARMA3 algorithm is reported in ID_AA64ISAR2_EL1. */
if (get_kernel_reg(ID_AA64ISAR2_EL1, &isar)) {
if (ID_AA64ISAR2_APA3_VAL(isar) > 0) {
- return (true);
+ return (FEAT_DEFAULT_ENABLE);
}
}
out:
/*
* Pointer authentication may be disabled, mask out the ID fields we
* expose to userspace and the rest of the kernel so they don't try
* to use it.
*/
update_special_reg(ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_MASK |
ID_AA64ISAR1_APA_MASK | ID_AA64ISAR1_GPA_MASK |
ID_AA64ISAR1_GPI_MASK, 0);
update_special_reg(ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_MASK, 0);
- return (false);
+ return (FEAT_ALWAYS_DISABLE);
}
static bool
ptrauth_enable(const struct cpu_feat *feat __unused,
cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
u_int errata_count __unused)
{
enable_ptrauth = true;
elf64_addr_mask.code |= PAC_ADDR_MASK;
elf64_addr_mask.data |= PAC_ADDR_MASK;
#ifdef COMPAT_FREEBSD14
elf64_addr_mask_14.code |= PAC_ADDR_MASK_14;
elf64_addr_mask_14.data |= PAC_ADDR_MASK_14;
#endif
return (true);
}
CPU_FEAT(feat_pauth, "Pointer Authentication",
ptrauth_check, NULL, ptrauth_enable,
CPU_FEAT_EARLY_BOOT | CPU_FEAT_SYSTEM);
/* Copy the keys when forking a new process */
void
ptrauth_fork(struct thread *new_td, struct thread *orig_td)
{
if (!enable_ptrauth)
return;
memcpy(&new_td->td_md.md_ptrauth_user, &orig_td->td_md.md_ptrauth_user,
sizeof(new_td->td_md.md_ptrauth_user));
}
/* Generate new userspace keys when executing a new process */
void
ptrauth_exec(struct thread *td)
{
if (!enable_ptrauth)
return;
arc4rand(&td->td_md.md_ptrauth_user, sizeof(td->td_md.md_ptrauth_user),
0);
}
/*
* Copy the user keys when creating a new userspace thread until it's clear
* how the ABI expects the various keys to be assigned.
*/
void
ptrauth_copy_thread(struct thread *new_td, struct thread *orig_td)
{
if (!enable_ptrauth)
return;
memcpy(&new_td->td_md.md_ptrauth_user, &orig_td->td_md.md_ptrauth_user,
sizeof(new_td->td_md.md_ptrauth_user));
}
/* Generate new kernel keys when executing a new kernel thread */
void
ptrauth_thread_alloc(struct thread *td)
{
if (!enable_ptrauth)
return;
arc4rand(&td->td_md.md_ptrauth_kern, sizeof(td->td_md.md_ptrauth_kern),
0);
}
/*
* Load the userspace keys. We can't use WRITE_SPECIALREG as we need
* to set the architecture extension.
*/
#define LOAD_KEY(space, name, reg) \
__asm __volatile( \
"msr "__XSTRING(MRS_REG_ALT_NAME(reg ## KeyLo_EL1))", %0 \n" \
"msr "__XSTRING(MRS_REG_ALT_NAME(reg ## KeyHi_EL1))", %1 \n" \
:: "r"(td->td_md.md_ptrauth_##space.name.pa_key_lo), \
"r"(td->td_md.md_ptrauth_##space.name.pa_key_hi))
void
ptrauth_thread0(struct thread *td)
{
if (!enable_ptrauth)
return;
/* TODO: Generate a random number here */
memset(&td->td_md.md_ptrauth_kern, 0,
sizeof(td->td_md.md_ptrauth_kern));
LOAD_KEY(kern, apia, APIA);
/*
* No isb as this is called before ptrauth_start so can rely on
* the instruction barrier there.
*/
}
/*
* Enable pointer authentication. After this point userspace and the kernel
* can sign return addresses, etc. based on their keys
*
* This assumes either all or no CPUs have pointer authentication support,
* and, if supported, all CPUs have the same algorithm.
*/
void
ptrauth_start(void)
{
uint64_t sctlr;
if (!enable_ptrauth)
return;
/* Enable pointer authentication */
sctlr = READ_SPECIALREG(sctlr_el1);
sctlr |= SCTLR_PTRAUTH;
WRITE_SPECIALREG(sctlr_el1, sctlr);
isb();
}
#ifdef SMP
void
ptrauth_mp_start(uint64_t cpu)
{
struct ptrauth_key start_key;
uint64_t sctlr;
if (!enable_ptrauth)
return;
/*
* We need a key until we call sched_throw, however we don't have
* a thread until then. Create a key just for use within
* init_secondary and whatever it calls. As init_secondary never
* returns it is safe to do so from within it.
*
* As it's only used for a short length of time just use the cpu
* as the key.
*/
start_key.pa_key_lo = cpu;
start_key.pa_key_hi = ~cpu;
__asm __volatile(
".arch_extension pauth \n"
"msr "__XSTRING(APIAKeyLo_EL1_REG)", %0 \n"
"msr "__XSTRING(APIAKeyHi_EL1_REG)", %1 \n"
".arch_extension nopauth \n"
:: "r"(start_key.pa_key_lo), "r"(start_key.pa_key_hi));
/* Enable pointer authentication */
sctlr = READ_SPECIALREG(sctlr_el1);
sctlr |= SCTLR_PTRAUTH;
WRITE_SPECIALREG(sctlr_el1, sctlr);
isb();
}
#endif
struct thread *
ptrauth_switch(struct thread *td)
{
if (enable_ptrauth) {
LOAD_KEY(kern, apia, APIA);
isb();
}
return (td);
}
/* Called when we are exiting uerspace and entering the kernel */
void
ptrauth_exit_el0(struct thread *td)
{
if (!enable_ptrauth)
return;
LOAD_KEY(kern, apia, APIA);
isb();
}
/* Called when we are about to exit the kernel and enter userspace */
void
ptrauth_enter_el0(struct thread *td)
{
if (!enable_ptrauth)
return;
LOAD_KEY(user, apia, APIA);
LOAD_KEY(user, apib, APIB);
LOAD_KEY(user, apda, APDA);
LOAD_KEY(user, apdb, APDB);
LOAD_KEY(user, apga, APGA);
/*
* No isb as this is called from the exception handler so can rely
* on the eret instruction to be the needed context synchronizing event.
*/
}
diff --git a/sys/arm64/include/cpu_feat.h b/sys/arm64/include/cpu_feat.h
index f62f3e334dc1..6a554b6baedf 100644
--- a/sys/arm64/include/cpu_feat.h
+++ b/sys/arm64/include/cpu_feat.h
@@ -1,105 +1,130 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 Arm Ltd
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _MACHINE_CPU_FEAT_H_
#define _MACHINE_CPU_FEAT_H_
#include
#include
typedef enum {
ERRATA_UNKNOWN, /* Unknown erratum */
ERRATA_NONE, /* No errata for this feature on this system. */
ERRATA_AFFECTED, /* There is errata on this system. */
ERRATA_FW_MITIGAION, /* There is errata, and a firmware */
/* mitigation. The mitigation may need a */
/* kernel component. */
} cpu_feat_errata;
+typedef enum {
+ /*
+ * Don't implement the feature or erratum wrokarount,
+ * e.g. the feature is not implemented or erratum is
+ * for another CPU.
+ */
+ FEAT_ALWAYS_DISABLE,
+
+ /*
+ * Disable by default, but allow the user to enable,
+ * e.g. For a rare erratum with a workaround, Arm
+ * Category B (rare) or similar.
+ */
+ FEAT_DEFAULT_DISABLE,
+
+ /*
+ * Enabled by default, bit allow the user to disable,
+ * e.g. For a common erratum with a workaround, Arm
+ * Category A or B or similar.
+ */
+ FEAT_DEFAULT_ENABLE,
+
+ /* We could add FEAT_ALWAYS_ENABLE if a need was found. */
+} cpu_feat_en;
+
#define CPU_FEAT_STAGE_MASK 0x00000001
#define CPU_FEAT_EARLY_BOOT 0x00000000
#define CPU_FEAT_AFTER_DEV 0x00000001
#define CPU_FEAT_SCOPE_MASK 0x00000010
#define CPU_FEAT_PER_CPU 0x00000000
#define CPU_FEAT_SYSTEM 0x00000010
struct cpu_feat;
-typedef bool (cpu_feat_check)(const struct cpu_feat *, u_int);
+typedef cpu_feat_en (cpu_feat_check)(const struct cpu_feat *, u_int);
typedef bool (cpu_feat_has_errata)(const struct cpu_feat *, u_int,
u_int **, u_int *);
typedef bool (cpu_feat_enable)(const struct cpu_feat *, cpu_feat_errata,
u_int *, u_int);
struct cpu_feat {
const char *feat_name;
cpu_feat_check *feat_check;
cpu_feat_has_errata *feat_has_errata;
cpu_feat_enable *feat_enable;
uint32_t feat_flags;
bool feat_enabled;
};
SET_DECLARE(cpu_feat_set, struct cpu_feat);
SYSCTL_DECL(_hw_feat);
#define CPU_FEAT(name, descr, check, has_errata, enable, flags) \
static struct cpu_feat name = { \
.feat_name = #name, \
.feat_check = check, \
.feat_has_errata = has_errata, \
.feat_enable = enable, \
.feat_flags = flags, \
.feat_enabled = false, \
}; \
DATA_SET(cpu_feat_set, name); \
SYSCTL_BOOL(_hw_feat, OID_AUTO, name, CTLFLAG_RD, &name.feat_enabled, \
0, descr)
/*
* Allow drivers to mark an erratum as worked around, e.g. the Errata
* Management ABI may know the workaround isn't needed on a given system.
*/
typedef cpu_feat_errata (*cpu_feat_errata_check_fn)(const struct cpu_feat *,
u_int);
void cpu_feat_register_errata_check(cpu_feat_errata_check_fn);
void enable_cpu_feat(uint32_t);
/* Check if an erratum is in the list of errata */
static inline bool
cpu_feat_has_erratum(u_int *errata_list, u_int errata_count, u_int erratum)
{
for (u_int i = 0; i < errata_count; i++)
if (errata_list[0] == erratum)
return (true);
return (false);
}
#endif /* _MACHINE_CPU_FEAT_H_ */