Changeset View
Changeset View
Standalone View
Standalone View
head/sys/amd64/amd64/mp_machdep.c
Show All 38 Lines | |||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/bus.h> | #include <sys/bus.h> | ||||
#include <sys/cpuset.h> | #include <sys/cpuset.h> | ||||
#include <sys/domainset.h> | #include <sys/domainset.h> | ||||
#ifdef GPROF | #ifdef GPROF | ||||
#include <sys/gmon.h> | #include <sys/gmon.h> | ||||
#endif | #endif | ||||
#include <sys/kdb.h> | |||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/ktr.h> | #include <sys/ktr.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/memrange.h> | #include <sys/memrange.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/pcpu.h> | #include <sys/pcpu.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
▲ Show 20 Lines • Show All 142 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
int i; | int i; | ||||
/* Initialize the logical ID to APIC ID table. */ | /* Initialize the logical ID to APIC ID table. */ | ||||
for (i = 0; i < MAXCPU; i++) { | for (i = 0; i < MAXCPU; i++) { | ||||
cpu_apic_ids[i] = -1; | cpu_apic_ids[i] = -1; | ||||
} | } | ||||
/* Install an inter-CPU IPI for TLB invalidation */ | /* Install an inter-CPU IPI for for cache and TLB invalidations. */ | ||||
if (pmap_pcid_enabled) { | setidt(IPI_INVLOP, pti ? IDTVEC(invlop_pti) : IDTVEC(invlop), | ||||
if (invpcid_works) { | |||||
setidt(IPI_INVLTLB, pti ? | |||||
IDTVEC(invltlb_invpcid_pti_pti) : | |||||
IDTVEC(invltlb_invpcid_nopti), SDT_SYSIGT, | |||||
SEL_KPL, 0); | |||||
setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_invpcid_pti) : | |||||
IDTVEC(invlpg_invpcid), SDT_SYSIGT, SEL_KPL, 0); | |||||
setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_invpcid_pti) : | |||||
IDTVEC(invlrng_invpcid), SDT_SYSIGT, SEL_KPL, 0); | |||||
} else { | |||||
setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pcid_pti) : | |||||
IDTVEC(invltlb_pcid), SDT_SYSIGT, SEL_KPL, 0); | |||||
setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pcid_pti) : | |||||
IDTVEC(invlpg_pcid), SDT_SYSIGT, SEL_KPL, 0); | |||||
setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pcid_pti) : | |||||
IDTVEC(invlrng_pcid), SDT_SYSIGT, SEL_KPL, 0); | |||||
} | |||||
} else { | |||||
setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pti) : IDTVEC(invltlb), | |||||
SDT_SYSIGT, SEL_KPL, 0); | SDT_SYSIGT, SEL_KPL, 0); | ||||
setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg), | |||||
SDT_SYSIGT, SEL_KPL, 0); | |||||
setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng), | |||||
SDT_SYSIGT, SEL_KPL, 0); | |||||
} | |||||
/* Install an inter-CPU IPI for cache invalidation. */ | |||||
setidt(IPI_INVLCACHE, pti ? IDTVEC(invlcache_pti) : IDTVEC(invlcache), | |||||
SDT_SYSIGT, SEL_KPL, 0); | |||||
/* Install an inter-CPU IPI for all-CPU rendezvous */ | /* Install an inter-CPU IPI for all-CPU rendezvous */ | ||||
setidt(IPI_RENDEZVOUS, pti ? IDTVEC(rendezvous_pti) : | setidt(IPI_RENDEZVOUS, pti ? IDTVEC(rendezvous_pti) : | ||||
IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0); | IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0); | ||||
/* Install generic inter-CPU IPI handler */ | /* Install generic inter-CPU IPI handler */ | ||||
setidt(IPI_BITMAP_VECTOR, pti ? IDTVEC(ipi_intr_bitmap_handler_pti) : | setidt(IPI_BITMAP_VECTOR, pti ? IDTVEC(ipi_intr_bitmap_handler_pti) : | ||||
IDTVEC(ipi_intr_bitmap_handler), SDT_SYSIGT, SEL_KPL, 0); | IDTVEC(ipi_intr_bitmap_handler), SDT_SYSIGT, SEL_KPL, 0); | ||||
▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines | init_secondary(void) | ||||
pc->pc_tss = (struct system_segment_descriptor *)&gdt[GPROC0_SEL]; | pc->pc_tss = (struct system_segment_descriptor *)&gdt[GPROC0_SEL]; | ||||
pc->pc_fs32p = &gdt[GUFS32_SEL]; | pc->pc_fs32p = &gdt[GUFS32_SEL]; | ||||
pc->pc_gs32p = &gdt[GUGS32_SEL]; | pc->pc_gs32p = &gdt[GUGS32_SEL]; | ||||
pc->pc_ldt = (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]; | pc->pc_ldt = (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]; | ||||
/* See comment in pmap_bootstrap(). */ | /* See comment in pmap_bootstrap(). */ | ||||
pc->pc_pcid_next = PMAP_PCID_KERN + 2; | pc->pc_pcid_next = PMAP_PCID_KERN + 2; | ||||
pc->pc_pcid_gen = 1; | pc->pc_pcid_gen = 1; | ||||
pc->pc_smp_tlb_gen = 1; | |||||
/* Init tss */ | /* Init tss */ | ||||
pc->pc_common_tss = __pcpu[0].pc_common_tss; | pc->pc_common_tss = __pcpu[0].pc_common_tss; | ||||
pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) + | pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) + | ||||
IOPERM_BITMAP_SIZE; | IOPERM_BITMAP_SIZE; | ||||
pc->pc_common_tss.tss_rsp0 = 0; | pc->pc_common_tss.tss_rsp0 = 0; | ||||
/* The doublefault stack runs on IST1. */ | /* The doublefault stack runs on IST1. */ | ||||
np = ((struct nmi_pcpu *)&doublefault_stack[PAGE_SIZE]) - 1; | np = ((struct nmi_pcpu *)&doublefault_stack[PAGE_SIZE]) - 1; | ||||
▲ Show 20 Lines • Show All 212 Lines • ▼ Show 20 Lines | start_ap(int apic_id) | ||||
for (ms = 0; ms < 5000; ms++) { | for (ms = 0; ms < 5000; ms++) { | ||||
if (mp_naps > cpus) | if (mp_naps > cpus) | ||||
return 1; /* return SUCCESS */ | return 1; /* return SUCCESS */ | ||||
DELAY(1000); | DELAY(1000); | ||||
} | } | ||||
return 0; /* return FAILURE */ | return 0; /* return FAILURE */ | ||||
} | } | ||||
/* | |||||
* Flush the TLB on other CPU's | |||||
*/ | |||||
/* | |||||
* Invalidation request. PCPU pc_smp_tlb_op uses u_int instead of the | |||||
* enum to avoid both namespace and ABI issues (with enums). | |||||
*/ | |||||
enum invl_op_codes { | |||||
INVL_OP_TLB = 1, | |||||
INVL_OP_TLB_INVPCID = 2, | |||||
INVL_OP_TLB_INVPCID_PTI = 3, | |||||
INVL_OP_TLB_PCID = 4, | |||||
INVL_OP_PGRNG = 5, | |||||
INVL_OP_PGRNG_INVPCID = 6, | |||||
INVL_OP_PGRNG_PCID = 7, | |||||
INVL_OP_PG = 8, | |||||
INVL_OP_PG_INVPCID = 9, | |||||
INVL_OP_PG_PCID = 10, | |||||
INVL_OP_CACHE = 11, | |||||
}; | |||||
/* | |||||
* These variables are initialized at startup to reflect how each of | |||||
* the different kinds of invalidations should be performed on the | |||||
* current machine and environment. | |||||
*/ | |||||
static enum invl_op_codes invl_op_tlb; | |||||
static enum invl_op_codes invl_op_pgrng; | |||||
static enum invl_op_codes invl_op_pg; | |||||
/* | |||||
* Scoreboard of IPI completion notifications from target to IPI initiator. | |||||
* | |||||
* Each CPU can initiate shootdown IPI independently from other CPUs. | |||||
* Initiator enters critical section, then fills its local PCPU | |||||
* shootdown info (pc_smp_tlb_ vars), then clears scoreboard generation | |||||
* at location (cpu, my_cpuid) for each target cpu. After that IPI is | |||||
* sent to all targets which scan for zeroed scoreboard generation | |||||
* words. Upon finding such word the shootdown data is read from | |||||
* corresponding cpu' pcpu, and generation is set. Meantime initiator | |||||
* loops waiting for all zeroed generations in scoreboard to update. | |||||
*/ | |||||
static uint32_t *invl_scoreboard; | |||||
static void | |||||
invl_scoreboard_init(void *arg __unused) | |||||
{ | |||||
u_int i; | |||||
invl_scoreboard = malloc(sizeof(uint32_t) * (mp_maxid + 1) * | |||||
(mp_maxid + 1), M_DEVBUF, M_WAITOK); | |||||
for (i = 0; i < (mp_maxid + 1) * (mp_maxid + 1); i++) | |||||
invl_scoreboard[i] = 1; | |||||
if (pmap_pcid_enabled) { | |||||
if (invpcid_works) { | |||||
if (pti) | |||||
invl_op_tlb = INVL_OP_TLB_INVPCID_PTI; | |||||
else | |||||
invl_op_tlb = INVL_OP_TLB_INVPCID; | |||||
invl_op_pgrng = INVL_OP_PGRNG_INVPCID; | |||||
invl_op_pg = INVL_OP_PG_INVPCID; | |||||
} else { | |||||
invl_op_tlb = INVL_OP_TLB_PCID; | |||||
invl_op_pgrng = INVL_OP_PGRNG_PCID; | |||||
invl_op_pg = INVL_OP_PG_PCID; | |||||
} | |||||
} else { | |||||
invl_op_tlb = INVL_OP_TLB; | |||||
invl_op_pgrng = INVL_OP_PGRNG; | |||||
invl_op_pg = INVL_OP_PG; | |||||
} | |||||
} | |||||
SYSINIT(invl_ops, SI_SUB_SMP, SI_ORDER_FIRST, invl_scoreboard_init, NULL); | |||||
static uint32_t * | |||||
invl_scoreboard_getcpu(u_int cpu) | |||||
{ | |||||
return (invl_scoreboard + cpu * (mp_maxid + 1)); | |||||
} | |||||
static uint32_t * | |||||
invl_scoreboard_slot(u_int cpu) | |||||
{ | |||||
return (invl_scoreboard_getcpu(cpu) + PCPU_GET(cpuid)); | |||||
} | |||||
/* | |||||
* Used by pmap to request cache or TLB invalidation on local and | |||||
* remote processors. Mask provides the set of remote CPUs which are | |||||
* to be signalled with the invalidation IPI. As an optimization, the | |||||
* curcpu_cb callback is invoked on the calling CPU while waiting for | |||||
* remote CPUs to complete the operation. | |||||
* | |||||
* The callback function is called unconditionally on the caller's | |||||
* underlying processor, even when this processor is not set in the | |||||
* mask. So, the callback function must be prepared to handle such | |||||
* spurious invocations. | |||||
* | |||||
* Interrupts must be enabled when calling the function with smp | |||||
* started, to avoid deadlock with other IPIs that are protected with | |||||
* smp_ipi_mtx spinlock at the initiator side. | |||||
*/ | |||||
static void | |||||
smp_targeted_tlb_shootdown(cpuset_t mask, pmap_t pmap, vm_offset_t addr1, | |||||
vm_offset_t addr2, smp_invl_cb_t curcpu_cb, enum invl_op_codes op) | |||||
{ | |||||
cpuset_t other_cpus, mask1; | |||||
uint32_t generation, *p_cpudone; | |||||
int cpu; | |||||
/* | |||||
* It is not necessary to signal other CPUs while booting or | |||||
* when in the debugger. | |||||
*/ | |||||
if (kdb_active || KERNEL_PANICKED() || !smp_started) { | |||||
curcpu_cb(pmap, addr1, addr2); | |||||
return; | |||||
} | |||||
sched_pin(); | |||||
/* | |||||
* Check for other cpus. Return if none. | |||||
*/ | |||||
if (CPU_ISFULLSET(&mask)) { | |||||
if (mp_ncpus <= 1) | |||||
goto nospinexit; | |||||
} else { | |||||
CPU_CLR(PCPU_GET(cpuid), &mask); | |||||
if (CPU_EMPTY(&mask)) | |||||
goto nospinexit; | |||||
} | |||||
/* | |||||
* Initiator must have interrupts enabled, which prevents | |||||
* non-invalidation IPIs, that takes smp_ipi_mtx spinlock, | |||||
* from deadlocking with as. On the other hand, preemption | |||||
* must be disabled to pin initiator to the instance of the | |||||
* pcpu pc_smp_tlb data and scoreboard line. | |||||
*/ | |||||
KASSERT((read_rflags() & PSL_I) != 0, | |||||
("smp_targeted_tlb_shootdown: interrupts disabled")); | |||||
critical_enter(); | |||||
PCPU_SET(smp_tlb_addr1, addr1); | |||||
PCPU_SET(smp_tlb_addr2, addr2); | |||||
PCPU_SET(smp_tlb_pmap, pmap); | |||||
generation = PCPU_GET(smp_tlb_gen); | |||||
if (++generation == 0) | |||||
generation = 1; | |||||
PCPU_SET(smp_tlb_gen, generation); | |||||
PCPU_SET(smp_tlb_op, op); | |||||
/* Fence between filling smp_tlb fields and clearing scoreboard. */ | |||||
atomic_thread_fence_rel(); | |||||
mask1 = mask; | |||||
while ((cpu = CPU_FFS(&mask1)) != 0) { | |||||
cpu--; | |||||
CPU_CLR(cpu, &mask1); | |||||
KASSERT(*invl_scoreboard_slot(cpu) != 0, | |||||
("IPI scoreboard is zero, initiator %d target %d", | |||||
PCPU_GET(cpuid), cpu)); | |||||
*invl_scoreboard_slot(cpu) = 0; | |||||
} | |||||
/* | |||||
* IPI acts as a fence between writing to the scoreboard above | |||||
* (zeroing slot) and reading from it below (wait for | |||||
* acknowledge). | |||||
*/ | |||||
if (CPU_ISFULLSET(&mask)) { | |||||
ipi_all_but_self(IPI_INVLOP); | |||||
other_cpus = all_cpus; | |||||
CPU_CLR(PCPU_GET(cpuid), &other_cpus); | |||||
} else { | |||||
other_cpus = mask; | |||||
while ((cpu = CPU_FFS(&mask)) != 0) { | |||||
cpu--; | |||||
CPU_CLR(cpu, &mask); | |||||
CTR3(KTR_SMP, "%s: cpu: %d invl ipi op: %x", __func__, | |||||
cpu, op); | |||||
ipi_send_cpu(cpu, IPI_INVLOP); | |||||
} | |||||
} | |||||
curcpu_cb(pmap, addr1, addr2); | |||||
while ((cpu = CPU_FFS(&other_cpus)) != 0) { | |||||
cpu--; | |||||
CPU_CLR(cpu, &other_cpus); | |||||
p_cpudone = invl_scoreboard_slot(cpu); | |||||
while (atomic_load_int(p_cpudone) != generation) | |||||
ia32_pause(); | |||||
} | |||||
critical_exit(); | |||||
sched_unpin(); | |||||
return; | |||||
nospinexit: | |||||
curcpu_cb(pmap, addr1, addr2); | |||||
sched_unpin(); | |||||
} | |||||
void | void | ||||
invltlb_invpcid_handler(void) | smp_masked_invltlb(cpuset_t mask, pmap_t pmap, smp_invl_cb_t curcpu_cb) | ||||
{ | { | ||||
smp_targeted_tlb_shootdown(mask, pmap, 0, 0, curcpu_cb, invl_op_tlb); | |||||
#ifdef COUNT_XINVLTLB_HITS | |||||
ipi_global++; | |||||
#endif | |||||
} | |||||
void | |||||
smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap, | |||||
smp_invl_cb_t curcpu_cb) | |||||
{ | |||||
smp_targeted_tlb_shootdown(mask, pmap, addr, 0, curcpu_cb, invl_op_pg); | |||||
#ifdef COUNT_XINVLTLB_HITS | |||||
ipi_page++; | |||||
#endif | |||||
} | |||||
void | |||||
smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2, | |||||
pmap_t pmap, smp_invl_cb_t curcpu_cb) | |||||
{ | |||||
smp_targeted_tlb_shootdown(mask, pmap, addr1, addr2, curcpu_cb, | |||||
invl_op_pgrng); | |||||
#ifdef COUNT_XINVLTLB_HITS | |||||
ipi_range++; | |||||
ipi_range_size += (addr2 - addr1) / PAGE_SIZE; | |||||
#endif | |||||
} | |||||
void | |||||
smp_cache_flush(smp_invl_cb_t curcpu_cb) | |||||
{ | |||||
smp_targeted_tlb_shootdown(all_cpus, NULL, 0, 0, curcpu_cb, | |||||
INVL_OP_CACHE); | |||||
} | |||||
/* | |||||
* Handlers for TLB related IPIs | |||||
*/ | |||||
static void | |||||
invltlb_handler(pmap_t smp_tlb_pmap) | |||||
{ | |||||
#ifdef COUNT_XINVLTLB_HITS | |||||
xhits_gbl[PCPU_GET(cpuid)]++; | |||||
#endif /* COUNT_XINVLTLB_HITS */ | |||||
#ifdef COUNT_IPIS | |||||
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++; | |||||
#endif /* COUNT_IPIS */ | |||||
if (smp_tlb_pmap == kernel_pmap) | |||||
invltlb_glob(); | |||||
else | |||||
invltlb(); | |||||
} | |||||
static void | |||||
invltlb_invpcid_handler(pmap_t smp_tlb_pmap) | |||||
{ | |||||
struct invpcid_descr d; | struct invpcid_descr d; | ||||
uint32_t generation; | |||||
#ifdef COUNT_XINVLTLB_HITS | #ifdef COUNT_XINVLTLB_HITS | ||||
xhits_gbl[PCPU_GET(cpuid)]++; | xhits_gbl[PCPU_GET(cpuid)]++; | ||||
#endif /* COUNT_XINVLTLB_HITS */ | #endif /* COUNT_XINVLTLB_HITS */ | ||||
#ifdef COUNT_IPIS | #ifdef COUNT_IPIS | ||||
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++; | (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; | ||||
#endif /* COUNT_IPIS */ | #endif /* COUNT_IPIS */ | ||||
generation = smp_tlb_generation; | |||||
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | ||||
d.pad = 0; | d.pad = 0; | ||||
d.addr = 0; | d.addr = 0; | ||||
invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB : | invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB : | ||||
INVPCID_CTX); | INVPCID_CTX); | ||||
PCPU_SET(smp_tlb_done, generation); | |||||
} | } | ||||
void | static void | ||||
invltlb_invpcid_pti_handler(void) | invltlb_invpcid_pti_handler(pmap_t smp_tlb_pmap) | ||||
{ | { | ||||
struct invpcid_descr d; | struct invpcid_descr d; | ||||
uint32_t generation; | |||||
#ifdef COUNT_XINVLTLB_HITS | #ifdef COUNT_XINVLTLB_HITS | ||||
xhits_gbl[PCPU_GET(cpuid)]++; | xhits_gbl[PCPU_GET(cpuid)]++; | ||||
#endif /* COUNT_XINVLTLB_HITS */ | #endif /* COUNT_XINVLTLB_HITS */ | ||||
#ifdef COUNT_IPIS | #ifdef COUNT_IPIS | ||||
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++; | (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; | ||||
#endif /* COUNT_IPIS */ | #endif /* COUNT_IPIS */ | ||||
generation = smp_tlb_generation; | |||||
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | ||||
d.pad = 0; | d.pad = 0; | ||||
d.addr = 0; | d.addr = 0; | ||||
if (smp_tlb_pmap == kernel_pmap) { | if (smp_tlb_pmap == kernel_pmap) { | ||||
/* | /* | ||||
* This invalidation actually needs to clear kernel | * This invalidation actually needs to clear kernel | ||||
* mappings from the TLB in the current pmap, but | * mappings from the TLB in the current pmap, but | ||||
* since we were asked for the flush in the kernel | * since we were asked for the flush in the kernel | ||||
* pmap, achieve it by performing global flush. | * pmap, achieve it by performing global flush. | ||||
*/ | */ | ||||
invpcid(&d, INVPCID_CTXGLOB); | invpcid(&d, INVPCID_CTXGLOB); | ||||
} else { | } else { | ||||
invpcid(&d, INVPCID_CTX); | invpcid(&d, INVPCID_CTX); | ||||
d.pcid |= PMAP_PCID_USER_PT; | d.pcid |= PMAP_PCID_USER_PT; | ||||
invpcid(&d, INVPCID_CTX); | invpcid(&d, INVPCID_CTX); | ||||
} | } | ||||
PCPU_SET(smp_tlb_done, generation); | |||||
} | } | ||||
void | static void | ||||
invltlb_pcid_handler(void) | invltlb_pcid_handler(pmap_t smp_tlb_pmap) | ||||
{ | { | ||||
uint64_t kcr3, ucr3; | uint64_t kcr3, ucr3; | ||||
uint32_t generation, pcid; | uint32_t pcid; | ||||
#ifdef COUNT_XINVLTLB_HITS | #ifdef COUNT_XINVLTLB_HITS | ||||
xhits_gbl[PCPU_GET(cpuid)]++; | xhits_gbl[PCPU_GET(cpuid)]++; | ||||
#endif /* COUNT_XINVLTLB_HITS */ | #endif /* COUNT_XINVLTLB_HITS */ | ||||
#ifdef COUNT_IPIS | #ifdef COUNT_IPIS | ||||
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++; | (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; | ||||
#endif /* COUNT_IPIS */ | #endif /* COUNT_IPIS */ | ||||
generation = smp_tlb_generation; /* Overlap with serialization */ | |||||
if (smp_tlb_pmap == kernel_pmap) { | if (smp_tlb_pmap == kernel_pmap) { | ||||
invltlb_glob(); | invltlb_glob(); | ||||
} else { | } else { | ||||
/* | /* | ||||
* The current pmap might not be equal to | * The current pmap might not be equal to | ||||
* smp_tlb_pmap. The clearing of the pm_gen in | * smp_tlb_pmap. The clearing of the pm_gen in | ||||
* pmap_invalidate_all() takes care of TLB | * pmap_invalidate_all() takes care of TLB | ||||
* invalidation when switching to the pmap on this | * invalidation when switching to the pmap on this | ||||
* CPU. | * CPU. | ||||
*/ | */ | ||||
if (PCPU_GET(curpmap) == smp_tlb_pmap) { | if (PCPU_GET(curpmap) == smp_tlb_pmap) { | ||||
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | ||||
kcr3 = smp_tlb_pmap->pm_cr3 | pcid; | kcr3 = smp_tlb_pmap->pm_cr3 | pcid; | ||||
ucr3 = smp_tlb_pmap->pm_ucr3; | ucr3 = smp_tlb_pmap->pm_ucr3; | ||||
if (ucr3 != PMAP_NO_CR3) { | if (ucr3 != PMAP_NO_CR3) { | ||||
ucr3 |= PMAP_PCID_USER_PT | pcid; | ucr3 |= PMAP_PCID_USER_PT | pcid; | ||||
pmap_pti_pcid_invalidate(ucr3, kcr3); | pmap_pti_pcid_invalidate(ucr3, kcr3); | ||||
} else | } else | ||||
load_cr3(kcr3); | load_cr3(kcr3); | ||||
} | } | ||||
} | } | ||||
PCPU_SET(smp_tlb_done, generation); | |||||
} | } | ||||
void | static void | ||||
invlpg_invpcid_handler(void) | invlpg_handler(vm_offset_t smp_tlb_addr1) | ||||
{ | { | ||||
#ifdef COUNT_XINVLTLB_HITS | |||||
xhits_pg[PCPU_GET(cpuid)]++; | |||||
#endif /* COUNT_XINVLTLB_HITS */ | |||||
#ifdef COUNT_IPIS | |||||
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++; | |||||
#endif /* COUNT_IPIS */ | |||||
invlpg(smp_tlb_addr1); | |||||
} | |||||
static void | |||||
invlpg_invpcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1) | |||||
{ | |||||
struct invpcid_descr d; | struct invpcid_descr d; | ||||
uint32_t generation; | |||||
#ifdef COUNT_XINVLTLB_HITS | #ifdef COUNT_XINVLTLB_HITS | ||||
xhits_pg[PCPU_GET(cpuid)]++; | xhits_pg[PCPU_GET(cpuid)]++; | ||||
#endif /* COUNT_XINVLTLB_HITS */ | #endif /* COUNT_XINVLTLB_HITS */ | ||||
#ifdef COUNT_IPIS | #ifdef COUNT_IPIS | ||||
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++; | (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; | ||||
#endif /* COUNT_IPIS */ | #endif /* COUNT_IPIS */ | ||||
generation = smp_tlb_generation; /* Overlap with serialization */ | |||||
invlpg(smp_tlb_addr1); | invlpg(smp_tlb_addr1); | ||||
if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { | if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { | ||||
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | | d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | | ||||
PMAP_PCID_USER_PT; | PMAP_PCID_USER_PT; | ||||
d.pad = 0; | d.pad = 0; | ||||
d.addr = smp_tlb_addr1; | d.addr = smp_tlb_addr1; | ||||
invpcid(&d, INVPCID_ADDR); | invpcid(&d, INVPCID_ADDR); | ||||
} | } | ||||
PCPU_SET(smp_tlb_done, generation); | |||||
} | } | ||||
void | static void | ||||
invlpg_pcid_handler(void) | invlpg_pcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1) | ||||
{ | { | ||||
uint64_t kcr3, ucr3; | uint64_t kcr3, ucr3; | ||||
uint32_t generation; | |||||
uint32_t pcid; | uint32_t pcid; | ||||
#ifdef COUNT_XINVLTLB_HITS | #ifdef COUNT_XINVLTLB_HITS | ||||
xhits_pg[PCPU_GET(cpuid)]++; | xhits_pg[PCPU_GET(cpuid)]++; | ||||
#endif /* COUNT_XINVLTLB_HITS */ | #endif /* COUNT_XINVLTLB_HITS */ | ||||
#ifdef COUNT_IPIS | #ifdef COUNT_IPIS | ||||
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++; | (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; | ||||
#endif /* COUNT_IPIS */ | #endif /* COUNT_IPIS */ | ||||
generation = smp_tlb_generation; /* Overlap with serialization */ | |||||
invlpg(smp_tlb_addr1); | invlpg(smp_tlb_addr1); | ||||
if (smp_tlb_pmap == PCPU_GET(curpmap) && | if (smp_tlb_pmap == PCPU_GET(curpmap) && | ||||
(ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { | (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { | ||||
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | ||||
kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; | kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; | ||||
ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; | ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; | ||||
pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1); | pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1); | ||||
} | } | ||||
PCPU_SET(smp_tlb_done, generation); | |||||
} | } | ||||
void | static void | ||||
invlrng_invpcid_handler(void) | invlrng_handler(vm_offset_t smp_tlb_addr1, vm_offset_t smp_tlb_addr2) | ||||
{ | { | ||||
vm_offset_t addr, addr2; | |||||
#ifdef COUNT_XINVLTLB_HITS | |||||
xhits_rng[PCPU_GET(cpuid)]++; | |||||
#endif /* COUNT_XINVLTLB_HITS */ | |||||
#ifdef COUNT_IPIS | |||||
(*ipi_invlrng_counts[PCPU_GET(cpuid)])++; | |||||
#endif /* COUNT_IPIS */ | |||||
addr = smp_tlb_addr1; | |||||
addr2 = smp_tlb_addr2; | |||||
do { | |||||
invlpg(addr); | |||||
addr += PAGE_SIZE; | |||||
} while (addr < addr2); | |||||
} | |||||
static void | |||||
invlrng_invpcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1, | |||||
vm_offset_t smp_tlb_addr2) | |||||
{ | |||||
struct invpcid_descr d; | struct invpcid_descr d; | ||||
vm_offset_t addr, addr2; | vm_offset_t addr, addr2; | ||||
uint32_t generation; | |||||
#ifdef COUNT_XINVLTLB_HITS | #ifdef COUNT_XINVLTLB_HITS | ||||
xhits_rng[PCPU_GET(cpuid)]++; | xhits_rng[PCPU_GET(cpuid)]++; | ||||
#endif /* COUNT_XINVLTLB_HITS */ | #endif /* COUNT_XINVLTLB_HITS */ | ||||
#ifdef COUNT_IPIS | #ifdef COUNT_IPIS | ||||
(*ipi_invlrng_counts[PCPU_GET(cpuid)])++; | (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; | ||||
#endif /* COUNT_IPIS */ | #endif /* COUNT_IPIS */ | ||||
addr = smp_tlb_addr1; | addr = smp_tlb_addr1; | ||||
addr2 = smp_tlb_addr2; | addr2 = smp_tlb_addr2; | ||||
generation = smp_tlb_generation; /* Overlap with serialization */ | |||||
do { | do { | ||||
invlpg(addr); | invlpg(addr); | ||||
addr += PAGE_SIZE; | addr += PAGE_SIZE; | ||||
} while (addr < addr2); | } while (addr < addr2); | ||||
if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { | if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { | ||||
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | | d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | | ||||
PMAP_PCID_USER_PT; | PMAP_PCID_USER_PT; | ||||
d.pad = 0; | d.pad = 0; | ||||
d.addr = smp_tlb_addr1; | d.addr = smp_tlb_addr1; | ||||
do { | do { | ||||
invpcid(&d, INVPCID_ADDR); | invpcid(&d, INVPCID_ADDR); | ||||
d.addr += PAGE_SIZE; | d.addr += PAGE_SIZE; | ||||
} while (d.addr < addr2); | } while (d.addr < addr2); | ||||
} | } | ||||
PCPU_SET(smp_tlb_done, generation); | |||||
} | } | ||||
void | static void | ||||
invlrng_pcid_handler(void) | invlrng_pcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1, | ||||
vm_offset_t smp_tlb_addr2) | |||||
{ | { | ||||
vm_offset_t addr, addr2; | vm_offset_t addr, addr2; | ||||
uint64_t kcr3, ucr3; | uint64_t kcr3, ucr3; | ||||
uint32_t generation; | |||||
uint32_t pcid; | uint32_t pcid; | ||||
#ifdef COUNT_XINVLTLB_HITS | #ifdef COUNT_XINVLTLB_HITS | ||||
xhits_rng[PCPU_GET(cpuid)]++; | xhits_rng[PCPU_GET(cpuid)]++; | ||||
#endif /* COUNT_XINVLTLB_HITS */ | #endif /* COUNT_XINVLTLB_HITS */ | ||||
#ifdef COUNT_IPIS | #ifdef COUNT_IPIS | ||||
(*ipi_invlrng_counts[PCPU_GET(cpuid)])++; | (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; | ||||
#endif /* COUNT_IPIS */ | #endif /* COUNT_IPIS */ | ||||
addr = smp_tlb_addr1; | addr = smp_tlb_addr1; | ||||
addr2 = smp_tlb_addr2; | addr2 = smp_tlb_addr2; | ||||
generation = smp_tlb_generation; /* Overlap with serialization */ | |||||
do { | do { | ||||
invlpg(addr); | invlpg(addr); | ||||
addr += PAGE_SIZE; | addr += PAGE_SIZE; | ||||
} while (addr < addr2); | } while (addr < addr2); | ||||
if (smp_tlb_pmap == PCPU_GET(curpmap) && | if (smp_tlb_pmap == PCPU_GET(curpmap) && | ||||
(ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { | (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { | ||||
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | ||||
kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; | kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; | ||||
ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; | ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; | ||||
pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2); | pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2); | ||||
} | } | ||||
PCPU_SET(smp_tlb_done, generation); | } | ||||
static void | |||||
invlcache_handler(void) | |||||
{ | |||||
#ifdef COUNT_IPIS | |||||
(*ipi_invlcache_counts[PCPU_GET(cpuid)])++; | |||||
#endif /* COUNT_IPIS */ | |||||
wbinvd(); | |||||
} | |||||
static void | |||||
invlop_handler_one_req(enum invl_op_codes smp_tlb_op, pmap_t smp_tlb_pmap, | |||||
vm_offset_t smp_tlb_addr1, vm_offset_t smp_tlb_addr2) | |||||
{ | |||||
switch (smp_tlb_op) { | |||||
case INVL_OP_TLB: | |||||
invltlb_handler(smp_tlb_pmap); | |||||
break; | |||||
case INVL_OP_TLB_INVPCID: | |||||
invltlb_invpcid_handler(smp_tlb_pmap); | |||||
break; | |||||
case INVL_OP_TLB_INVPCID_PTI: | |||||
invltlb_invpcid_pti_handler(smp_tlb_pmap); | |||||
break; | |||||
case INVL_OP_TLB_PCID: | |||||
invltlb_pcid_handler(smp_tlb_pmap); | |||||
break; | |||||
case INVL_OP_PGRNG: | |||||
invlrng_handler(smp_tlb_addr1, smp_tlb_addr2); | |||||
break; | |||||
case INVL_OP_PGRNG_INVPCID: | |||||
invlrng_invpcid_handler(smp_tlb_pmap, smp_tlb_addr1, | |||||
smp_tlb_addr2); | |||||
break; | |||||
case INVL_OP_PGRNG_PCID: | |||||
invlrng_pcid_handler(smp_tlb_pmap, smp_tlb_addr1, | |||||
smp_tlb_addr2); | |||||
break; | |||||
case INVL_OP_PG: | |||||
invlpg_handler(smp_tlb_addr1); | |||||
break; | |||||
case INVL_OP_PG_INVPCID: | |||||
invlpg_invpcid_handler(smp_tlb_pmap, smp_tlb_addr1); | |||||
break; | |||||
case INVL_OP_PG_PCID: | |||||
invlpg_pcid_handler(smp_tlb_pmap, smp_tlb_addr1); | |||||
break; | |||||
case INVL_OP_CACHE: | |||||
invlcache_handler(); | |||||
break; | |||||
default: | |||||
__assert_unreachable(); | |||||
break; | |||||
} | |||||
} | |||||
void | |||||
invlop_handler(void) | |||||
{ | |||||
struct pcpu *initiator_pc; | |||||
pmap_t smp_tlb_pmap; | |||||
vm_offset_t smp_tlb_addr1, smp_tlb_addr2; | |||||
u_int initiator_cpu_id; | |||||
enum invl_op_codes smp_tlb_op; | |||||
uint32_t *scoreboard, smp_tlb_gen; | |||||
scoreboard = invl_scoreboard_getcpu(PCPU_GET(cpuid)); | |||||
for (;;) { | |||||
for (initiator_cpu_id = 0; initiator_cpu_id <= mp_maxid; | |||||
initiator_cpu_id++) { | |||||
if (scoreboard[initiator_cpu_id] == 0) | |||||
break; | |||||
} | |||||
if (initiator_cpu_id > mp_maxid) | |||||
break; | |||||
initiator_pc = cpuid_to_pcpu[initiator_cpu_id]; | |||||
/* | |||||
* This acquire fence and its corresponding release | |||||
* fence in smp_targeted_tlb_shootdown(), is between | |||||
* reading zero scoreboard slot and accessing PCPU of | |||||
* initiator for pc_smp_tlb values. | |||||
*/ | |||||
atomic_thread_fence_acq(); | |||||
smp_tlb_pmap = initiator_pc->pc_smp_tlb_pmap; | |||||
smp_tlb_addr1 = initiator_pc->pc_smp_tlb_addr1; | |||||
smp_tlb_addr2 = initiator_pc->pc_smp_tlb_addr2; | |||||
smp_tlb_op = initiator_pc->pc_smp_tlb_op; | |||||
smp_tlb_gen = initiator_pc->pc_smp_tlb_gen; | |||||
/* | |||||
* Ensure that we do not make our scoreboard | |||||
* notification visible to the initiator until the | |||||
* pc_smp_tlb values are read. The corresponding | |||||
* fence is implicitly provided by the barrier in the | |||||
* IPI send operation before the APIC ICR register | |||||
* write. | |||||
* | |||||
* As an optimization, the request is acknowledged | |||||
* before the actual invalidation is performed. It is | |||||
* safe because target CPU cannot return to userspace | |||||
* before handler finishes. Only NMI can preempt the | |||||
* handler, but NMI would see the kernel handler frame | |||||
* and not touch not-invalidated user page table. | |||||
*/ | |||||
atomic_thread_fence_acq(); | |||||
atomic_store_int(&scoreboard[initiator_cpu_id], smp_tlb_gen); | |||||
invlop_handler_one_req(smp_tlb_op, smp_tlb_pmap, smp_tlb_addr1, | |||||
smp_tlb_addr2); | |||||
} | |||||
} | } |