Changeset View
Standalone View
sys/amd64/amd64/pmap.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 2,514 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
struct invpcid_descr d; | struct invpcid_descr d; | ||||
uint64_t kcr3, ucr3; | uint64_t kcr3, ucr3; | ||||
uint32_t pcid; | uint32_t pcid; | ||||
u_int cpuid, i; | u_int cpuid, i; | ||||
cpuid = PCPU_GET(cpuid); | cpuid = PCPU_GET(cpuid); | ||||
if (pmap == PCPU_GET(curpmap)) { | if (pmap == PCPU_GET(curpmap)) { | ||||
if (pmap->pm_ucr3 != PMAP_NO_CR3) { | if (pmap->pm_ucr3 != PMAP_NO_CR3 && | ||||
/* | /* | ||||
* If we context-switched right after | |||||
* PCPU_GET(ucr3_load_mask), we could read the | |||||
* ~CR3_PCID_SAVE mask, which causes us to skip | |||||
* the code below to invalidate user pages. This | |||||
* is handled in pmap_activate_sw_pcid_pti() by | |||||
* clearing pm_gen if ucr3_load_mask is ~CR3_PCID_SAVE. | |||||
*/ | |||||
alc: Can you please replace "trivial" by a phrase that describes the intended meaning of "trivial"? | |||||
PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) { | |||||
/* | |||||
* Because pm_pcid is recalculated on a | * Because pm_pcid is recalculated on a | ||||
* context switch, we must disable switching. | * context switch, we must disable switching. | ||||
* Otherwise, we might use a stale value | * Otherwise, we might use a stale value | ||||
* below. | * below. | ||||
*/ | */ | ||||
critical_enter(); | critical_enter(); | ||||
pcid = pmap->pm_pcids[cpuid].pm_pcid; | pcid = pmap->pm_pcids[cpuid].pm_pcid; | ||||
if (invpcid_works1) { | if (invpcid_works1) { | ||||
▲ Show 20 Lines • Show All 97 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
struct invpcid_descr d; | struct invpcid_descr d; | ||||
uint64_t kcr3, ucr3; | uint64_t kcr3, ucr3; | ||||
uint32_t pcid; | uint32_t pcid; | ||||
u_int cpuid, i; | u_int cpuid, i; | ||||
cpuid = PCPU_GET(cpuid); | cpuid = PCPU_GET(cpuid); | ||||
if (pmap == PCPU_GET(curpmap)) { | if (pmap == PCPU_GET(curpmap)) { | ||||
if (pmap->pm_ucr3 != PMAP_NO_CR3) { | if (pmap->pm_ucr3 != PMAP_NO_CR3 && | ||||
PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) { | |||||
critical_enter(); | critical_enter(); | ||||
pcid = pmap->pm_pcids[cpuid].pm_pcid; | pcid = pmap->pm_pcids[cpuid].pm_pcid; | ||||
if (invpcid_works1) { | if (invpcid_works1) { | ||||
d.pcid = pcid | PMAP_PCID_USER_PT; | d.pcid = pcid | PMAP_PCID_USER_PT; | ||||
d.pad = 0; | d.pad = 0; | ||||
d.addr = sva; | d.addr = sva; | ||||
for (; d.addr < eva; d.addr += PAGE_SIZE) | for (; d.addr < eva; d.addr += PAGE_SIZE) | ||||
invpcid(&d, INVPCID_ADDR); | invpcid(&d, INVPCID_ADDR); | ||||
▲ Show 20 Lines • Show All 84 Lines • ▼ Show 20 Lines | pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) | ||||
smp_masked_invlpg_range(pmap_invalidate_cpu_mask(pmap), sva, eva, pmap, | smp_masked_invlpg_range(pmap_invalidate_cpu_mask(pmap), sva, eva, pmap, | ||||
pmap_invalidate_range_curcpu_cb); | pmap_invalidate_range_curcpu_cb); | ||||
} | } | ||||
static inline void | static inline void | ||||
pmap_invalidate_all_pcid(pmap_t pmap, bool invpcid_works1) | pmap_invalidate_all_pcid(pmap_t pmap, bool invpcid_works1) | ||||
{ | { | ||||
struct invpcid_descr d; | struct invpcid_descr d; | ||||
uint64_t kcr3, ucr3; | uint64_t kcr3; | ||||
uint32_t pcid; | uint32_t pcid; | ||||
u_int cpuid, i; | u_int cpuid, i; | ||||
if (pmap == kernel_pmap) { | if (pmap == kernel_pmap) { | ||||
if (invpcid_works1) { | if (invpcid_works1) { | ||||
bzero(&d, sizeof(d)); | bzero(&d, sizeof(d)); | ||||
invpcid(&d, INVPCID_CTXGLOB); | invpcid(&d, INVPCID_CTXGLOB); | ||||
} else { | } else { | ||||
invltlb_glob(); | invltlb_glob(); | ||||
} | } | ||||
} else { | } else { | ||||
cpuid = PCPU_GET(cpuid); | cpuid = PCPU_GET(cpuid); | ||||
if (pmap == PCPU_GET(curpmap)) { | if (pmap == PCPU_GET(curpmap)) { | ||||
critical_enter(); | critical_enter(); | ||||
pcid = pmap->pm_pcids[cpuid].pm_pcid; | pcid = pmap->pm_pcids[cpuid].pm_pcid; | ||||
if (invpcid_works1) { | if (invpcid_works1) { | ||||
d.pcid = pcid; | d.pcid = pcid; | ||||
d.pad = 0; | d.pad = 0; | ||||
d.addr = 0; | d.addr = 0; | ||||
invpcid(&d, INVPCID_CTX); | invpcid(&d, INVPCID_CTX); | ||||
if (pmap->pm_ucr3 != PMAP_NO_CR3) { | |||||
d.pcid |= PMAP_PCID_USER_PT; | |||||
invpcid(&d, INVPCID_CTX); | |||||
} | |||||
} else { | } else { | ||||
Done Inline ActionsCan't this "if" statement be moved below, outside the enclosing "if" statement? That would eliminate a duplicate "if" statement in the below "else". alc: Can't this "if" statement be moved below, outside the enclosing "if" statement? That would… | |||||
kcr3 = pmap->pm_cr3 | pcid; | kcr3 = pmap->pm_cr3 | pcid; | ||||
ucr3 = pmap->pm_ucr3; | |||||
if (ucr3 != PMAP_NO_CR3) { | |||||
ucr3 |= pcid | PMAP_PCID_USER_PT; | |||||
pmap_pti_pcid_invalidate(ucr3, kcr3); | |||||
} else { | |||||
load_cr3(kcr3); | load_cr3(kcr3); | ||||
} | } | ||||
} | if (pmap->pm_ucr3 != PMAP_NO_CR3) | ||||
PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE); | |||||
critical_exit(); | critical_exit(); | ||||
} else | } else | ||||
pmap->pm_pcids[cpuid].pm_gen = 0; | pmap->pm_pcids[cpuid].pm_gen = 0; | ||||
CPU_FOREACH(i) { | CPU_FOREACH(i) { | ||||
if (cpuid != i) | if (cpuid != i) | ||||
pmap->pm_pcids[i].pm_gen = 0; | pmap->pm_pcids[i].pm_gen = 0; | ||||
} | } | ||||
} | } | ||||
▲ Show 20 Lines • Show All 6,027 Lines • ▼ Show 20 Lines | |||||
static void | static void | ||||
pmap_activate_sw_pti_post(struct thread *td, pmap_t pmap) | pmap_activate_sw_pti_post(struct thread *td, pmap_t pmap) | ||||
{ | { | ||||
PCPU_GET(tssp)->tss_rsp0 = pmap->pm_ucr3 != PMAP_NO_CR3 ? | PCPU_GET(tssp)->tss_rsp0 = pmap->pm_ucr3 != PMAP_NO_CR3 ? | ||||
PCPU_GET(pti_rsp0) : (uintptr_t)td->td_md.md_stack_base; | PCPU_GET(pti_rsp0) : (uintptr_t)td->td_md.md_stack_base; | ||||
} | } | ||||
static void inline | static void | ||||
pmap_activate_sw_pcid_pti(pmap_t pmap, u_int cpuid, const bool invpcid_works1) | pmap_activate_sw_pcid_pti(struct thread *td, pmap_t pmap, u_int cpuid) | ||||
{ | { | ||||
struct invpcid_descr d; | pmap_t old_pmap; | ||||
uint64_t cached, cr3, kcr3, ucr3; | uint64_t cached, cr3, kcr3, ucr3; | ||||
KASSERT((read_rflags() & PSL_I) == 0, | |||||
("PCID needs interrupts disabled in pmap_activate_sw()")); | |||||
/* See the comment in pmap_invalidate_page_pcid(). */ | |||||
Done Inline ActionsPerhaps write, /* See the comment in pmap_invalidate_page_pcid(). */ markj: Perhaps write, /* See the comment in pmap_invalidate_page_pcid(). */ | |||||
if (PCPU_GET(ucr3_load_mask) != PMAP_UCR3_NOMASK) { | |||||
PCPU_SET(ucr3_load_mask, PMAP_UCR3_NOMASK); | |||||
old_pmap = PCPU_GET(curpmap); | |||||
MPASS(old_pmap->pm_ucr3 != PMAP_NO_CR3); | |||||
old_pmap->pm_pcids[cpuid].pm_gen = 0; | |||||
} | |||||
cached = pmap_pcid_alloc_checked(pmap, cpuid); | cached = pmap_pcid_alloc_checked(pmap, cpuid); | ||||
cr3 = rcr3(); | cr3 = rcr3(); | ||||
if ((cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) | if ((cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) | ||||
load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid); | load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid); | ||||
PCPU_SET(curpmap, pmap); | PCPU_SET(curpmap, pmap); | ||||
kcr3 = pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid; | kcr3 = pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid; | ||||
ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[cpuid].pm_pcid | | ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[cpuid].pm_pcid | | ||||
PMAP_PCID_USER_PT; | PMAP_PCID_USER_PT; | ||||
if (!cached && pmap->pm_ucr3 != PMAP_NO_CR3) { | if (!cached && pmap->pm_ucr3 != PMAP_NO_CR3) | ||||
/* | PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE); | ||||
* Explicitly invalidate translations cached from the | |||||
* user page table. They are not automatically | |||||
* flushed by reload of cr3 with the kernel page table | |||||
* pointer above. | |||||
* | |||||
* Note that the if() condition is resolved statically | |||||
* by using the function argument instead of | |||||
* runtime-evaluated invpcid_works value. | |||||
*/ | |||||
if (invpcid_works1) { | |||||
d.pcid = PMAP_PCID_USER_PT | | |||||
pmap->pm_pcids[cpuid].pm_pcid; | |||||
d.pad = 0; | |||||
d.addr = 0; | |||||
invpcid(&d, INVPCID_CTX); | |||||
} else { | |||||
pmap_pti_pcid_invalidate(ucr3, kcr3); | |||||
} | |||||
} | |||||
PCPU_SET(kcr3, kcr3 | CR3_PCID_SAVE); | PCPU_SET(kcr3, kcr3 | CR3_PCID_SAVE); | ||||
PCPU_SET(ucr3, ucr3 | CR3_PCID_SAVE); | PCPU_SET(ucr3, ucr3 | CR3_PCID_SAVE); | ||||
if (cached) | if (cached) | ||||
PCPU_INC(pm_save_cnt); | PCPU_INC(pm_save_cnt); | ||||
} | |||||
static void | |||||
pmap_activate_sw_pcid_invpcid_pti(struct thread *td, pmap_t pmap, u_int cpuid) | |||||
{ | |||||
pmap_activate_sw_pcid_pti(pmap, cpuid, true); | |||||
pmap_activate_sw_pti_post(td, pmap); | pmap_activate_sw_pti_post(td, pmap); | ||||
} | } | ||||
static void | static void | ||||
pmap_activate_sw_pcid_noinvpcid_pti(struct thread *td, pmap_t pmap, | |||||
u_int cpuid) | |||||
{ | |||||
register_t rflags; | |||||
/* | |||||
* If the INVPCID instruction is not available, | |||||
* invltlb_pcid_handler() is used to handle an invalidate_all | |||||
* IPI, which checks for curpmap == smp_tlb_pmap. The below | |||||
* sequence of operations has a window where %CR3 is loaded | |||||
* with the new pmap's PML4 address, but the curpmap value has | |||||
* not yet been updated. This causes the invltlb IPI handler, | |||||
* which is called between the updates, to execute as a NOP, | |||||
* which leaves stale TLB entries. | |||||
* | |||||
* Note that the most typical use of pmap_activate_sw(), from | |||||
* the context switch, is immune to this race, because | |||||
* interrupts are disabled (while the thread lock is owned), | |||||
* and the IPI happens after curpmap is updated. Protect | |||||
* other callers in a similar way, by disabling interrupts | |||||
* around the %cr3 register reload and curpmap assignment. | |||||
*/ | |||||
rflags = intr_disable(); | |||||
pmap_activate_sw_pcid_pti(pmap, cpuid, false); | |||||
intr_restore(rflags); | |||||
pmap_activate_sw_pti_post(td, pmap); | |||||
} | |||||
static void | |||||
pmap_activate_sw_pcid_nopti(struct thread *td __unused, pmap_t pmap, | pmap_activate_sw_pcid_nopti(struct thread *td __unused, pmap_t pmap, | ||||
u_int cpuid) | u_int cpuid) | ||||
{ | { | ||||
uint64_t cached, cr3; | uint64_t cached, cr3; | ||||
KASSERT((read_rflags() & PSL_I) == 0, | |||||
("PCID needs interrupts disabled in pmap_activate_sw()")); | |||||
cached = pmap_pcid_alloc_checked(pmap, cpuid); | cached = pmap_pcid_alloc_checked(pmap, cpuid); | ||||
cr3 = rcr3(); | cr3 = rcr3(); | ||||
if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) | if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) | ||||
load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid | | load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid | | ||||
cached); | cached); | ||||
PCPU_SET(curpmap, pmap); | PCPU_SET(curpmap, pmap); | ||||
if (cached) | if (cached) | ||||
PCPU_INC(pm_save_cnt); | PCPU_INC(pm_save_cnt); | ||||
} | } | ||||
static void | static void | ||||
pmap_activate_sw_pcid_noinvpcid_nopti(struct thread *td __unused, pmap_t pmap, | |||||
u_int cpuid) | |||||
{ | |||||
register_t rflags; | |||||
rflags = intr_disable(); | |||||
pmap_activate_sw_pcid_nopti(td, pmap, cpuid); | |||||
intr_restore(rflags); | |||||
} | |||||
static void | |||||
pmap_activate_sw_nopcid_nopti(struct thread *td __unused, pmap_t pmap, | pmap_activate_sw_nopcid_nopti(struct thread *td __unused, pmap_t pmap, | ||||
u_int cpuid __unused) | u_int cpuid __unused) | ||||
{ | { | ||||
load_cr3(pmap->pm_cr3); | load_cr3(pmap->pm_cr3); | ||||
PCPU_SET(curpmap, pmap); | PCPU_SET(curpmap, pmap); | ||||
} | } | ||||
static void | static void | ||||
pmap_activate_sw_nopcid_pti(struct thread *td, pmap_t pmap, | pmap_activate_sw_nopcid_pti(struct thread *td, pmap_t pmap, | ||||
u_int cpuid __unused) | u_int cpuid __unused) | ||||
{ | { | ||||
pmap_activate_sw_nopcid_nopti(td, pmap, cpuid); | pmap_activate_sw_nopcid_nopti(td, pmap, cpuid); | ||||
PCPU_SET(kcr3, pmap->pm_cr3); | PCPU_SET(kcr3, pmap->pm_cr3); | ||||
PCPU_SET(ucr3, pmap->pm_ucr3); | PCPU_SET(ucr3, pmap->pm_ucr3); | ||||
pmap_activate_sw_pti_post(td, pmap); | pmap_activate_sw_pti_post(td, pmap); | ||||
} | } | ||||
DEFINE_IFUNC(static, void, pmap_activate_sw_mode, (struct thread *, pmap_t, | DEFINE_IFUNC(static, void, pmap_activate_sw_mode, (struct thread *, pmap_t, | ||||
u_int)) | u_int)) | ||||
{ | { | ||||
if (pmap_pcid_enabled && pti && invpcid_works) | if (pmap_pcid_enabled && pti) | ||||
return (pmap_activate_sw_pcid_invpcid_pti); | return (pmap_activate_sw_pcid_pti); | ||||
else if (pmap_pcid_enabled && pti && !invpcid_works) | else if (pmap_pcid_enabled && !pti) | ||||
return (pmap_activate_sw_pcid_noinvpcid_pti); | |||||
else if (pmap_pcid_enabled && !pti && invpcid_works) | |||||
return (pmap_activate_sw_pcid_nopti); | return (pmap_activate_sw_pcid_nopti); | ||||
else if (pmap_pcid_enabled && !pti && !invpcid_works) | |||||
return (pmap_activate_sw_pcid_noinvpcid_nopti); | |||||
else if (!pmap_pcid_enabled && pti) | else if (!pmap_pcid_enabled && pti) | ||||
return (pmap_activate_sw_nopcid_pti); | return (pmap_activate_sw_nopcid_pti); | ||||
else /* if (!pmap_pcid_enabled && !pti) */ | else /* if (!pmap_pcid_enabled && !pti) */ | ||||
return (pmap_activate_sw_nopcid_nopti); | return (pmap_activate_sw_nopcid_nopti); | ||||
} | } | ||||
void | void | ||||
pmap_activate_sw(struct thread *td) | pmap_activate_sw(struct thread *td) | ||||
Show All 20 Lines | |||||
#else | #else | ||||
CPU_CLR(cpuid, &oldpmap->pm_active); | CPU_CLR(cpuid, &oldpmap->pm_active); | ||||
#endif | #endif | ||||
} | } | ||||
void | void | ||||
pmap_activate(struct thread *td) | pmap_activate(struct thread *td) | ||||
{ | { | ||||
/* | |||||
critical_enter(); | * invltlb_{invpcid,}_pcid_handler() is used to handle an | ||||
* invalidate_all IPI, which checks for curpmap == | |||||
* smp_tlb_pmap. The below sequence of operations has a | |||||
* window where %CR3 is loaded with the new pmap's PML4 | |||||
* address, but the curpmap value has not yet been updated. | |||||
* This causes the invltlb IPI handler, which is called | |||||
* between the updates, to execute as a NOP, which leaves | |||||
* stale TLB entries. | |||||
* | |||||
* Note that the most common use of pmap_activate_sw(), from | |||||
Done Inline Actions"most typical" -> "most common" alc: "most typical" -> "most common" | |||||
* a context switch, is immune to this race, because | |||||
Done Inline Actions-> "a context switch" alc: -> "a context switch" | |||||
* interrupts are disabled (while the thread lock is owned), | |||||
* so the IPI is delayed until after curpmap is updated. Protect | |||||
Done Inline Actions"so the IPI is delayed until after ..." alc: "so the IPI is delayed until after ..." | |||||
* other callers in a similar way, by disabling interrupts | |||||
* around the %cr3 register reload and curpmap assignment. | |||||
*/ | |||||
spinlock_enter(); | |||||
pmap_activate_sw(td); | pmap_activate_sw(td); | ||||
Not Done Inline ActionsAssert that interrupts are disabled in pmap_activate_sw()? markj: Assert that interrupts are disabled in pmap_activate_sw()? | |||||
critical_exit(); | spinlock_exit(); | ||||
Not Done Inline ActionsI do not really understand why spinlock_*() are used instead of MD intr_disable/enable(). markj: I do not really understand why spinlock_*() are used instead of MD intr_disable/enable(). | |||||
Done Inline Actionsspinlock_enter == interrupt disable + critical enter. Parts of the PCID recalculations on the context switch need just the disable of the context switch, so that we are not preempted in the middle of it. So the code asserts critical sections. On the other hand, for the reason explained in the comment above, non-context switch use of pmap_activate() must use this trick to prevent handling IPIs during non-consistent state of the PCPU/hw. In principle just disabling interrupts would serve, but most important case of context switch has both interrupts disabled and critical section around, so I do not want to lower assertions in the code. I might add interrupt disable asserts. kib: spinlock_enter == interrupt disable + critical enter. Parts of the PCID recalculations on the… | |||||
} | } | ||||
void | void | ||||
pmap_activate_boot(pmap_t pmap) | pmap_activate_boot(pmap_t pmap) | ||||
{ | { | ||||
uint64_t kcr3; | uint64_t kcr3; | ||||
u_int cpuid; | u_int cpuid; | ||||
▲ Show 20 Lines • Show All 1,718 Lines • Show Last 20 Lines |
Can you please replace "trivial" by a phrase that describes the intended meaning of "trivial"?