Changeset View
Standalone View
sys/amd64/amd64/mp_machdep.c
Show All 38 Lines | |||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/bus.h> | #include <sys/bus.h> | ||||
#include <sys/cpuset.h> | #include <sys/cpuset.h> | ||||
#include <sys/domainset.h> | #include <sys/domainset.h> | ||||
#ifdef GPROF | #ifdef GPROF | ||||
#include <sys/gmon.h> | #include <sys/gmon.h> | ||||
#endif | #endif | ||||
#include <sys/kdb.h> | |||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/ktr.h> | #include <sys/ktr.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/memrange.h> | #include <sys/memrange.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/pcpu.h> | #include <sys/pcpu.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
▲ Show 20 Lines • Show All 142 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
int i; | int i; | ||||
/* Initialize the logical ID to APIC ID table. */ | /* Initialize the logical ID to APIC ID table. */ | ||||
for (i = 0; i < MAXCPU; i++) { | for (i = 0; i < MAXCPU; i++) { | ||||
cpu_apic_ids[i] = -1; | cpu_apic_ids[i] = -1; | ||||
} | } | ||||
/* Install an inter-CPU IPI for TLB invalidation */ | /* Install an inter-CPU IPI for for cache and TLB invalidations. */ | ||||
if (pmap_pcid_enabled) { | setidt(IPI_INVLOP, pti ? IDTVEC(invlop_pti) : IDTVEC(invlop), | ||||
alc: "... for cache and TLB invalidations." | |||||
if (invpcid_works) { | |||||
setidt(IPI_INVLTLB, pti ? | |||||
IDTVEC(invltlb_invpcid_pti_pti) : | |||||
IDTVEC(invltlb_invpcid_nopti), SDT_SYSIGT, | |||||
SEL_KPL, 0); | |||||
setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_invpcid_pti) : | |||||
IDTVEC(invlpg_invpcid), SDT_SYSIGT, SEL_KPL, 0); | |||||
setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_invpcid_pti) : | |||||
IDTVEC(invlrng_invpcid), SDT_SYSIGT, SEL_KPL, 0); | |||||
} else { | |||||
setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pcid_pti) : | |||||
IDTVEC(invltlb_pcid), SDT_SYSIGT, SEL_KPL, 0); | |||||
setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pcid_pti) : | |||||
IDTVEC(invlpg_pcid), SDT_SYSIGT, SEL_KPL, 0); | |||||
setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pcid_pti) : | |||||
IDTVEC(invlrng_pcid), SDT_SYSIGT, SEL_KPL, 0); | |||||
} | |||||
} else { | |||||
setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pti) : IDTVEC(invltlb), | |||||
SDT_SYSIGT, SEL_KPL, 0); | SDT_SYSIGT, SEL_KPL, 0); | ||||
setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg), | |||||
SDT_SYSIGT, SEL_KPL, 0); | |||||
setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng), | |||||
SDT_SYSIGT, SEL_KPL, 0); | |||||
} | |||||
/* Install an inter-CPU IPI for cache invalidation. */ | |||||
setidt(IPI_INVLCACHE, pti ? IDTVEC(invlcache_pti) : IDTVEC(invlcache), | |||||
SDT_SYSIGT, SEL_KPL, 0); | |||||
/* Install an inter-CPU IPI for all-CPU rendezvous */ | /* Install an inter-CPU IPI for all-CPU rendezvous */ | ||||
setidt(IPI_RENDEZVOUS, pti ? IDTVEC(rendezvous_pti) : | setidt(IPI_RENDEZVOUS, pti ? IDTVEC(rendezvous_pti) : | ||||
IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0); | IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0); | ||||
/* Install generic inter-CPU IPI handler */ | /* Install generic inter-CPU IPI handler */ | ||||
setidt(IPI_BITMAP_VECTOR, pti ? IDTVEC(ipi_intr_bitmap_handler_pti) : | setidt(IPI_BITMAP_VECTOR, pti ? IDTVEC(ipi_intr_bitmap_handler_pti) : | ||||
IDTVEC(ipi_intr_bitmap_handler), SDT_SYSIGT, SEL_KPL, 0); | IDTVEC(ipi_intr_bitmap_handler), SDT_SYSIGT, SEL_KPL, 0); | ||||
▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines | init_secondary(void) | ||||
pc->pc_tss = (struct system_segment_descriptor *)&gdt[GPROC0_SEL]; | pc->pc_tss = (struct system_segment_descriptor *)&gdt[GPROC0_SEL]; | ||||
pc->pc_fs32p = &gdt[GUFS32_SEL]; | pc->pc_fs32p = &gdt[GUFS32_SEL]; | ||||
pc->pc_gs32p = &gdt[GUGS32_SEL]; | pc->pc_gs32p = &gdt[GUGS32_SEL]; | ||||
pc->pc_ldt = (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]; | pc->pc_ldt = (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]; | ||||
/* See comment in pmap_bootstrap(). */ | /* See comment in pmap_bootstrap(). */ | ||||
pc->pc_pcid_next = PMAP_PCID_KERN + 2; | pc->pc_pcid_next = PMAP_PCID_KERN + 2; | ||||
pc->pc_pcid_gen = 1; | pc->pc_pcid_gen = 1; | ||||
pc->pc_smp_tlb_gen = 1; | |||||
/* Init tss */ | /* Init tss */ | ||||
pc->pc_common_tss = __pcpu[0].pc_common_tss; | pc->pc_common_tss = __pcpu[0].pc_common_tss; | ||||
pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) + | pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) + | ||||
IOPERM_BITMAP_SIZE; | IOPERM_BITMAP_SIZE; | ||||
pc->pc_common_tss.tss_rsp0 = 0; | pc->pc_common_tss.tss_rsp0 = 0; | ||||
/* The doublefault stack runs on IST1. */ | /* The doublefault stack runs on IST1. */ | ||||
np = ((struct nmi_pcpu *)&doublefault_stack[PAGE_SIZE]) - 1; | np = ((struct nmi_pcpu *)&doublefault_stack[PAGE_SIZE]) - 1; | ||||
▲ Show 20 Lines • Show All 212 Lines • ▼ Show 20 Lines | start_ap(int apic_id) | ||||
for (ms = 0; ms < 5000; ms++) { | for (ms = 0; ms < 5000; ms++) { | ||||
if (mp_naps > cpus) | if (mp_naps > cpus) | ||||
return 1; /* return SUCCESS */ | return 1; /* return SUCCESS */ | ||||
DELAY(1000); | DELAY(1000); | ||||
} | } | ||||
return 0; /* return FAILURE */ | return 0; /* return FAILURE */ | ||||
} | } | ||||
/* | |||||
* Flush the TLB on other CPU's | |||||
*/ | |||||
/* | |||||
* Invalidation request. PCPU pc_smp_tlb_op uses u_int instead of the | |||||
* enum to avoid both namespace and ABI issues (with enums). | |||||
*/ | |||||
enum invl_op_codes { | |||||
INVL_OP_TLB = 1, | |||||
INVL_OP_TLB_INVPCID = 2, | |||||
INVL_OP_TLB_INVPCID_PTI = 3, | |||||
INVL_OP_TLB_PCID = 4, | |||||
INVL_OP_PGRNG = 5, | |||||
INVL_OP_PGRNG_INVPCID = 6, | |||||
INVL_OP_PGRNG_PCID = 7, | |||||
INVL_OP_PG = 8, | |||||
INVL_OP_PG_INVPCID = 9, | |||||
INVL_OP_PG_PCID = 10, | |||||
INVL_OP_CACHE = 11, | |||||
}; | |||||
/* | |||||
* These variables are initialized at startup to reflect how each of | |||||
Done Inline ActionsI would suggest a comment saying something along the lines of these variables are initialized at startup to reflect how each of the different kinds of invalidations should be performed on the current machine and environment. alc: I would suggest a comment saying something along the lines of these variables are initialized… | |||||
* the different kinds of invalidations should be performed on the | |||||
* current machine and environment. | |||||
*/ | |||||
Done Inline ActionsIt would be good to incorporate text from the summary that explains the scoreboard's operation. alc: It would be good to incorporate text from the summary that explains the scoreboard's operation. | |||||
static enum invl_op_codes invl_op_tlb; | |||||
static enum invl_op_codes invl_op_pgrng; | |||||
static enum invl_op_codes invl_op_pg; | |||||
/* | |||||
* Scoreboard of IPI completion notifications from target to IPI initiator. | |||||
* | |||||
* Each CPU can initiate shootdown IPI independently from other CPUs. | |||||
* Initiator enters critical section, then fills its local PCPU | |||||
* shootdown info (pc_smp_tlb_ vars), then clears scoreboard generation | |||||
* at location (cpu, my_cpuid) for each target cpu. After that IPI is | |||||
* sent to all targets which scan for zeroed scoreboard generation | |||||
* words. Upon finding such word the shootdown data is read from | |||||
* corresponding cpu' pcpu, and generation is set. Meantime initiator | |||||
* loops waiting for all zeroed generations in scoreboard to update. | |||||
*/ | |||||
static uint32_t *invl_scoreboard; | |||||
static void | |||||
invl_scoreboard_init(void *arg __unused) | |||||
{ | |||||
u_int i; | |||||
invl_scoreboard = malloc(sizeof(uint32_t) * (mp_maxid + 1) * | |||||
(mp_maxid + 1), M_DEVBUF, M_WAITOK); | |||||
for (i = 0; i < (mp_maxid + 1) * (mp_maxid + 1); i++) | |||||
invl_scoreboard[i] = 1; | |||||
if (pmap_pcid_enabled) { | |||||
if (invpcid_works) { | |||||
if (pti) | |||||
invl_op_tlb = INVL_OP_TLB_INVPCID_PTI; | |||||
else | |||||
invl_op_tlb = INVL_OP_TLB_INVPCID; | |||||
invl_op_pgrng = INVL_OP_PGRNG_INVPCID; | |||||
invl_op_pg = INVL_OP_PG_INVPCID; | |||||
} else { | |||||
invl_op_tlb = INVL_OP_TLB_PCID; | |||||
invl_op_pgrng = INVL_OP_PGRNG_PCID; | |||||
invl_op_pg = INVL_OP_PG_PCID; | |||||
} | |||||
} else { | |||||
invl_op_tlb = INVL_OP_TLB; | |||||
invl_op_pgrng = INVL_OP_PGRNG; | |||||
invl_op_pg = INVL_OP_PG; | |||||
} | |||||
} | |||||
SYSINIT(invl_ops, SI_SUB_SMP, SI_ORDER_FIRST, invl_scoreboard_init, NULL); | |||||
Done Inline Actions"Used by the pmap to request cache or TLB invalidation on ..." alc: "Used by the pmap to request cache or TLB invalidation on ..." | |||||
static uint32_t * | |||||
Done Inline ActionsIt's not clear what "by vector" refers to. "As an optimization, the curcpu_cb callback ..." alc: It's not clear what "by vector" refers to.
"As an optimization, the curcpu_cb callback ..." | |||||
invl_scoreboard_getcpu(u_int cpu) | |||||
{ | |||||
return (invl_scoreboard + cpu * (mp_maxid + 1)); | |||||
} | |||||
static uint32_t * | |||||
invl_scoreboard_slot(u_int cpu) | |||||
{ | |||||
return (invl_scoreboard_getcpu(cpu) + PCPU_GET(cpuid)); | |||||
} | |||||
Done Inline Actions"which" -> "that" The use of "which" here implies that all IPIs are protected by spinlocks. In contrast, "that" says that you are talking about the subset of IPIs, which is what you intend. Here is a good rule of thumb: https://www.writersdigest.com/write-better-fiction/which-vs-that alc: "which" -> "that"
The use of "which" here implies that all IPIs are protected by spinlocks. | |||||
/* | |||||
* Used by pmap to request cache or TLB invalidation on local and | |||||
* remote processors. Mask provides the set of remote CPUs which are | |||||
* to be signalled with the invalidation IPI. As an optimization, the | |||||
* curcpu_cb callback is invoked on the calling CPU while waiting for | |||||
* remote CPUs to complete the operation. | |||||
* | |||||
* The callback function is called unconditionally on the caller's | |||||
* underlying processor, even when this processor is not set in the | |||||
* mask. So, the callback function must be prepared to handle such | |||||
* spurious invocations. | |||||
* | |||||
* Interrupts must be enabled when calling the function with smp | |||||
* started, to avoid deadlock with other IPIs that are protected with | |||||
* smp_ipi_mtx spinlock at the initiator side. | |||||
*/ | |||||
static void | |||||
smp_targeted_tlb_shootdown(cpuset_t mask, pmap_t pmap, vm_offset_t addr1, | |||||
vm_offset_t addr2, smp_invl_cb_t curcpu_cb, enum invl_op_codes op) | |||||
{ | |||||
cpuset_t other_cpus, mask1; | |||||
uint32_t generation, *p_cpudone; | |||||
int cpu; | |||||
/* | |||||
* It is not necessary to signal other CPUs while booting or | |||||
* when in the debugger. | |||||
*/ | |||||
if (kdb_active || KERNEL_PANICKED() || !smp_started) { | |||||
curcpu_cb(pmap, addr1, addr2); | |||||
return; | |||||
} | |||||
sched_pin(); | |||||
/* | |||||
Done Inline ActionsThis can be KASSERT(). markj: This can be KASSERT(). | |||||
Done Inline ActionsIt would be good to incorporate some of the text from the summary that explains the "synchronization" here, e.g., blocking preemption but not interrupts, avoiding deadlock. alc: It would be good to incorporate some of the text from the summary that explains the… | |||||
* Check for other cpus. Return if none. | |||||
*/ | |||||
if (CPU_ISFULLSET(&mask)) { | |||||
if (mp_ncpus <= 1) | |||||
goto nospinexit; | |||||
} else { | |||||
Done Inline ActionsCan we assert that the old value is non-zero? markj: Can we assert that the old value is non-zero? | |||||
CPU_CLR(PCPU_GET(cpuid), &mask); | |||||
if (CPU_EMPTY(&mask)) | |||||
goto nospinexit; | |||||
} | |||||
/* | |||||
* Initiator must have interrupts enabled, which prevents | |||||
* non-invalidation IPIs, that takes smp_ipi_mtx spinlock, | |||||
* from deadlocking with as. On the other hand, preemption | |||||
* must be disabled to pin initiator to the instance of the | |||||
* pcpu pc_smp_tlb data and scoreboard line. | |||||
*/ | |||||
KASSERT((read_rflags() & PSL_I) != 0, | |||||
("smp_targeted_tlb_shootdown: interrupts disabled")); | |||||
critical_enter(); | |||||
Done Inline ActionsIt wouldn't hurt to remind people that the IPI itself acts as a fence between writing to the scoreboard above and reading from it below. alc: It wouldn't hurt to remind people that the IPI itself acts as a fence between writing to the… | |||||
PCPU_SET(smp_tlb_addr1, addr1); | |||||
PCPU_SET(smp_tlb_addr2, addr2); | |||||
PCPU_SET(smp_tlb_pmap, pmap); | |||||
generation = PCPU_GET(smp_tlb_gen); | |||||
if (++generation == 0) | |||||
generation = 1; | |||||
PCPU_SET(smp_tlb_gen, generation); | |||||
PCPU_SET(smp_tlb_op, op); | |||||
/* Fence between filling smp_tlb fields and clearing scoreboard. */ | |||||
atomic_thread_fence_rel(); | |||||
Done Inline ActionsShouldn't this now be spelled IPI_INVLOP on amd64? alc: Shouldn't this now be spelled IPI_INVLOP on amd64? | |||||
mask1 = mask; | |||||
while ((cpu = CPU_FFS(&mask1)) != 0) { | |||||
cpu--; | |||||
CPU_CLR(cpu, &mask1); | |||||
KASSERT(*invl_scoreboard_slot(cpu) != 0, | |||||
("IPI scoreboard is zero, initiator %d target %d", | |||||
PCPU_GET(cpuid), cpu)); | |||||
*invl_scoreboard_slot(cpu) = 0; | |||||
} | |||||
/* | |||||
* IPI acts as a fence between writing to the scoreboard above | |||||
* (zeroing slot) and reading from it below (wait for | |||||
* acknowledge). | |||||
*/ | |||||
if (CPU_ISFULLSET(&mask)) { | |||||
ipi_all_but_self(IPI_INVLOP); | |||||
other_cpus = all_cpus; | |||||
CPU_CLR(PCPU_GET(cpuid), &other_cpus); | |||||
} else { | |||||
other_cpus = mask; | |||||
while ((cpu = CPU_FFS(&mask)) != 0) { | |||||
cpu--; | |||||
CPU_CLR(cpu, &mask); | |||||
CTR3(KTR_SMP, "%s: cpu: %d invl ipi op: %x", __func__, | |||||
cpu, op); | |||||
ipi_send_cpu(cpu, IPI_INVLOP); | |||||
} | |||||
} | |||||
curcpu_cb(pmap, addr1, addr2); | |||||
while ((cpu = CPU_FFS(&other_cpus)) != 0) { | |||||
cpu--; | |||||
CPU_CLR(cpu, &other_cpus); | |||||
p_cpudone = invl_scoreboard_slot(cpu); | |||||
while (atomic_load_int(p_cpudone) != generation) | |||||
ia32_pause(); | |||||
} | |||||
critical_exit(); | |||||
sched_unpin(); | |||||
return; | |||||
nospinexit: | |||||
curcpu_cb(pmap, addr1, addr2); | |||||
sched_unpin(); | |||||
} | |||||
void | void | ||||
invltlb_invpcid_handler(void) | smp_masked_invltlb(cpuset_t mask, pmap_t pmap, smp_invl_cb_t curcpu_cb) | ||||
{ | { | ||||
smp_targeted_tlb_shootdown(mask, pmap, 0, 0, curcpu_cb, invl_op_tlb); | |||||
#ifdef COUNT_XINVLTLB_HITS | |||||
ipi_global++; | |||||
#endif | |||||
} | |||||
void | |||||
smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap, | |||||
smp_invl_cb_t curcpu_cb) | |||||
{ | |||||
smp_targeted_tlb_shootdown(mask, pmap, addr, 0, curcpu_cb, invl_op_pg); | |||||
#ifdef COUNT_XINVLTLB_HITS | |||||
ipi_page++; | |||||
#endif | |||||
} | |||||
void | |||||
smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2, | |||||
pmap_t pmap, smp_invl_cb_t curcpu_cb) | |||||
{ | |||||
smp_targeted_tlb_shootdown(mask, pmap, addr1, addr2, curcpu_cb, | |||||
invl_op_pgrng); | |||||
#ifdef COUNT_XINVLTLB_HITS | |||||
ipi_range++; | |||||
ipi_range_size += (addr2 - addr1) / PAGE_SIZE; | |||||
#endif | |||||
} | |||||
void | |||||
smp_cache_flush(smp_invl_cb_t curcpu_cb) | |||||
{ | |||||
smp_targeted_tlb_shootdown(all_cpus, NULL, 0, 0, curcpu_cb, | |||||
INVL_OP_CACHE); | |||||
} | |||||
Done Inline ActionsI speculate that ordering the parameters to smp_targeted_tlb_shootdown() consistently with smp_masked_invlpg_range() with the "op" being last to the latter will eliminate some move instructions. alc: I speculate that ordering the parameters to smp_targeted_tlb_shootdown() consistently with… | |||||
/* | |||||
* Handlers for TLB related IPIs | |||||
*/ | |||||
static void | |||||
invltlb_handler(pmap_t smp_tlb_pmap) | |||||
{ | |||||
#ifdef COUNT_XINVLTLB_HITS | |||||
xhits_gbl[PCPU_GET(cpuid)]++; | |||||
#endif /* COUNT_XINVLTLB_HITS */ | |||||
Done Inline ActionsThis blank line is no longer required. alc: This blank line is no longer required. | |||||
#ifdef COUNT_IPIS | |||||
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++; | |||||
#endif /* COUNT_IPIS */ | |||||
if (smp_tlb_pmap == kernel_pmap) | |||||
invltlb_glob(); | |||||
else | |||||
invltlb(); | |||||
} | |||||
static void | |||||
invltlb_invpcid_handler(pmap_t smp_tlb_pmap) | |||||
{ | |||||
struct invpcid_descr d; | struct invpcid_descr d; | ||||
uint32_t generation; | |||||
#ifdef COUNT_XINVLTLB_HITS | #ifdef COUNT_XINVLTLB_HITS | ||||
xhits_gbl[PCPU_GET(cpuid)]++; | xhits_gbl[PCPU_GET(cpuid)]++; | ||||
#endif /* COUNT_XINVLTLB_HITS */ | #endif /* COUNT_XINVLTLB_HITS */ | ||||
#ifdef COUNT_IPIS | #ifdef COUNT_IPIS | ||||
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++; | (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; | ||||
#endif /* COUNT_IPIS */ | #endif /* COUNT_IPIS */ | ||||
generation = smp_tlb_generation; | |||||
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | ||||
d.pad = 0; | d.pad = 0; | ||||
d.addr = 0; | d.addr = 0; | ||||
invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB : | invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB : | ||||
INVPCID_CTX); | INVPCID_CTX); | ||||
PCPU_SET(smp_tlb_done, generation); | |||||
} | } | ||||
void | static void | ||||
invltlb_invpcid_pti_handler(void) | invltlb_invpcid_pti_handler(pmap_t smp_tlb_pmap) | ||||
{ | { | ||||
struct invpcid_descr d; | struct invpcid_descr d; | ||||
uint32_t generation; | |||||
#ifdef COUNT_XINVLTLB_HITS | #ifdef COUNT_XINVLTLB_HITS | ||||
xhits_gbl[PCPU_GET(cpuid)]++; | xhits_gbl[PCPU_GET(cpuid)]++; | ||||
#endif /* COUNT_XINVLTLB_HITS */ | #endif /* COUNT_XINVLTLB_HITS */ | ||||
#ifdef COUNT_IPIS | #ifdef COUNT_IPIS | ||||
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++; | (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; | ||||
#endif /* COUNT_IPIS */ | #endif /* COUNT_IPIS */ | ||||
generation = smp_tlb_generation; | |||||
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | ||||
d.pad = 0; | d.pad = 0; | ||||
d.addr = 0; | d.addr = 0; | ||||
if (smp_tlb_pmap == kernel_pmap) { | if (smp_tlb_pmap == kernel_pmap) { | ||||
/* | /* | ||||
* This invalidation actually needs to clear kernel | * This invalidation actually needs to clear kernel | ||||
* mappings from the TLB in the current pmap, but | * mappings from the TLB in the current pmap, but | ||||
* since we were asked for the flush in the kernel | * since we were asked for the flush in the kernel | ||||
* pmap, achieve it by performing global flush. | * pmap, achieve it by performing global flush. | ||||
*/ | */ | ||||
invpcid(&d, INVPCID_CTXGLOB); | invpcid(&d, INVPCID_CTXGLOB); | ||||
} else { | } else { | ||||
invpcid(&d, INVPCID_CTX); | invpcid(&d, INVPCID_CTX); | ||||
d.pcid |= PMAP_PCID_USER_PT; | d.pcid |= PMAP_PCID_USER_PT; | ||||
invpcid(&d, INVPCID_CTX); | invpcid(&d, INVPCID_CTX); | ||||
} | } | ||||
PCPU_SET(smp_tlb_done, generation); | |||||
} | } | ||||
void | static void | ||||
invltlb_pcid_handler(void) | invltlb_pcid_handler(pmap_t smp_tlb_pmap) | ||||
{ | { | ||||
uint64_t kcr3, ucr3; | uint64_t kcr3, ucr3; | ||||
uint32_t generation, pcid; | uint32_t pcid; | ||||
#ifdef COUNT_XINVLTLB_HITS | #ifdef COUNT_XINVLTLB_HITS | ||||
xhits_gbl[PCPU_GET(cpuid)]++; | xhits_gbl[PCPU_GET(cpuid)]++; | ||||
#endif /* COUNT_XINVLTLB_HITS */ | #endif /* COUNT_XINVLTLB_HITS */ | ||||
#ifdef COUNT_IPIS | #ifdef COUNT_IPIS | ||||
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++; | (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; | ||||
#endif /* COUNT_IPIS */ | #endif /* COUNT_IPIS */ | ||||
generation = smp_tlb_generation; /* Overlap with serialization */ | |||||
if (smp_tlb_pmap == kernel_pmap) { | if (smp_tlb_pmap == kernel_pmap) { | ||||
invltlb_glob(); | invltlb_glob(); | ||||
} else { | } else { | ||||
/* | /* | ||||
* The current pmap might not be equal to | * The current pmap might not be equal to | ||||
* smp_tlb_pmap. The clearing of the pm_gen in | * smp_tlb_pmap. The clearing of the pm_gen in | ||||
* pmap_invalidate_all() takes care of TLB | * pmap_invalidate_all() takes care of TLB | ||||
* invalidation when switching to the pmap on this | * invalidation when switching to the pmap on this | ||||
* CPU. | * CPU. | ||||
*/ | */ | ||||
if (PCPU_GET(curpmap) == smp_tlb_pmap) { | if (PCPU_GET(curpmap) == smp_tlb_pmap) { | ||||
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | ||||
kcr3 = smp_tlb_pmap->pm_cr3 | pcid; | kcr3 = smp_tlb_pmap->pm_cr3 | pcid; | ||||
ucr3 = smp_tlb_pmap->pm_ucr3; | ucr3 = smp_tlb_pmap->pm_ucr3; | ||||
if (ucr3 != PMAP_NO_CR3) { | if (ucr3 != PMAP_NO_CR3) { | ||||
ucr3 |= PMAP_PCID_USER_PT | pcid; | ucr3 |= PMAP_PCID_USER_PT | pcid; | ||||
pmap_pti_pcid_invalidate(ucr3, kcr3); | pmap_pti_pcid_invalidate(ucr3, kcr3); | ||||
} else | } else | ||||
load_cr3(kcr3); | load_cr3(kcr3); | ||||
} | } | ||||
} | } | ||||
PCPU_SET(smp_tlb_done, generation); | |||||
} | } | ||||
void | static void | ||||
invlpg_invpcid_handler(void) | invlpg_handler(vm_offset_t smp_tlb_addr1) | ||||
Done Inline Actionssmp_tlb_pmap is not used. alc: smp_tlb_pmap is not used. | |||||
{ | { | ||||
#ifdef COUNT_XINVLTLB_HITS | |||||
xhits_pg[PCPU_GET(cpuid)]++; | |||||
#endif /* COUNT_XINVLTLB_HITS */ | |||||
#ifdef COUNT_IPIS | |||||
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++; | |||||
#endif /* COUNT_IPIS */ | |||||
invlpg(smp_tlb_addr1); | |||||
} | |||||
static void | |||||
invlpg_invpcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1) | |||||
{ | |||||
struct invpcid_descr d; | struct invpcid_descr d; | ||||
uint32_t generation; | |||||
#ifdef COUNT_XINVLTLB_HITS | #ifdef COUNT_XINVLTLB_HITS | ||||
xhits_pg[PCPU_GET(cpuid)]++; | xhits_pg[PCPU_GET(cpuid)]++; | ||||
#endif /* COUNT_XINVLTLB_HITS */ | #endif /* COUNT_XINVLTLB_HITS */ | ||||
#ifdef COUNT_IPIS | #ifdef COUNT_IPIS | ||||
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++; | (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; | ||||
#endif /* COUNT_IPIS */ | #endif /* COUNT_IPIS */ | ||||
generation = smp_tlb_generation; /* Overlap with serialization */ | |||||
invlpg(smp_tlb_addr1); | invlpg(smp_tlb_addr1); | ||||
if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { | if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { | ||||
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | | d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | | ||||
PMAP_PCID_USER_PT; | PMAP_PCID_USER_PT; | ||||
d.pad = 0; | d.pad = 0; | ||||
d.addr = smp_tlb_addr1; | d.addr = smp_tlb_addr1; | ||||
invpcid(&d, INVPCID_ADDR); | invpcid(&d, INVPCID_ADDR); | ||||
} | } | ||||
PCPU_SET(smp_tlb_done, generation); | |||||
} | } | ||||
void | static void | ||||
invlpg_pcid_handler(void) | invlpg_pcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1) | ||||
{ | { | ||||
uint64_t kcr3, ucr3; | uint64_t kcr3, ucr3; | ||||
uint32_t generation; | |||||
uint32_t pcid; | uint32_t pcid; | ||||
#ifdef COUNT_XINVLTLB_HITS | #ifdef COUNT_XINVLTLB_HITS | ||||
xhits_pg[PCPU_GET(cpuid)]++; | xhits_pg[PCPU_GET(cpuid)]++; | ||||
#endif /* COUNT_XINVLTLB_HITS */ | #endif /* COUNT_XINVLTLB_HITS */ | ||||
#ifdef COUNT_IPIS | #ifdef COUNT_IPIS | ||||
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++; | (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; | ||||
#endif /* COUNT_IPIS */ | #endif /* COUNT_IPIS */ | ||||
generation = smp_tlb_generation; /* Overlap with serialization */ | |||||
invlpg(smp_tlb_addr1); | invlpg(smp_tlb_addr1); | ||||
if (smp_tlb_pmap == PCPU_GET(curpmap) && | if (smp_tlb_pmap == PCPU_GET(curpmap) && | ||||
(ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { | (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { | ||||
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | ||||
kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; | kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; | ||||
ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; | ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; | ||||
pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1); | pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1); | ||||
} | } | ||||
PCPU_SET(smp_tlb_done, generation); | |||||
} | } | ||||
void | static void | ||||
invlrng_invpcid_handler(void) | invlrng_handler(vm_offset_t smp_tlb_addr1, vm_offset_t smp_tlb_addr2) | ||||
Done Inline Actionssmp_tlb_pmap is not used. alc: smp_tlb_pmap is not used. | |||||
{ | { | ||||
vm_offset_t addr, addr2; | |||||
#ifdef COUNT_XINVLTLB_HITS | |||||
xhits_rng[PCPU_GET(cpuid)]++; | |||||
#endif /* COUNT_XINVLTLB_HITS */ | |||||
#ifdef COUNT_IPIS | |||||
(*ipi_invlrng_counts[PCPU_GET(cpuid)])++; | |||||
#endif /* COUNT_IPIS */ | |||||
addr = smp_tlb_addr1; | |||||
addr2 = smp_tlb_addr2; | |||||
do { | |||||
invlpg(addr); | |||||
addr += PAGE_SIZE; | |||||
} while (addr < addr2); | |||||
} | |||||
static void | |||||
invlrng_invpcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1, | |||||
vm_offset_t smp_tlb_addr2) | |||||
{ | |||||
struct invpcid_descr d; | struct invpcid_descr d; | ||||
vm_offset_t addr, addr2; | vm_offset_t addr, addr2; | ||||
uint32_t generation; | |||||
#ifdef COUNT_XINVLTLB_HITS | #ifdef COUNT_XINVLTLB_HITS | ||||
xhits_rng[PCPU_GET(cpuid)]++; | xhits_rng[PCPU_GET(cpuid)]++; | ||||
#endif /* COUNT_XINVLTLB_HITS */ | #endif /* COUNT_XINVLTLB_HITS */ | ||||
#ifdef COUNT_IPIS | #ifdef COUNT_IPIS | ||||
(*ipi_invlrng_counts[PCPU_GET(cpuid)])++; | (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; | ||||
#endif /* COUNT_IPIS */ | #endif /* COUNT_IPIS */ | ||||
addr = smp_tlb_addr1; | addr = smp_tlb_addr1; | ||||
addr2 = smp_tlb_addr2; | addr2 = smp_tlb_addr2; | ||||
generation = smp_tlb_generation; /* Overlap with serialization */ | |||||
do { | do { | ||||
invlpg(addr); | invlpg(addr); | ||||
addr += PAGE_SIZE; | addr += PAGE_SIZE; | ||||
} while (addr < addr2); | } while (addr < addr2); | ||||
if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { | if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { | ||||
d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | | d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | | ||||
PMAP_PCID_USER_PT; | PMAP_PCID_USER_PT; | ||||
d.pad = 0; | d.pad = 0; | ||||
d.addr = smp_tlb_addr1; | d.addr = smp_tlb_addr1; | ||||
do { | do { | ||||
invpcid(&d, INVPCID_ADDR); | invpcid(&d, INVPCID_ADDR); | ||||
d.addr += PAGE_SIZE; | d.addr += PAGE_SIZE; | ||||
} while (d.addr < addr2); | } while (d.addr < addr2); | ||||
} | } | ||||
PCPU_SET(smp_tlb_done, generation); | |||||
} | } | ||||
void | static void | ||||
invlrng_pcid_handler(void) | invlrng_pcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1, | ||||
vm_offset_t smp_tlb_addr2) | |||||
{ | { | ||||
vm_offset_t addr, addr2; | vm_offset_t addr, addr2; | ||||
uint64_t kcr3, ucr3; | uint64_t kcr3, ucr3; | ||||
uint32_t generation; | |||||
uint32_t pcid; | uint32_t pcid; | ||||
#ifdef COUNT_XINVLTLB_HITS | #ifdef COUNT_XINVLTLB_HITS | ||||
xhits_rng[PCPU_GET(cpuid)]++; | xhits_rng[PCPU_GET(cpuid)]++; | ||||
#endif /* COUNT_XINVLTLB_HITS */ | #endif /* COUNT_XINVLTLB_HITS */ | ||||
#ifdef COUNT_IPIS | #ifdef COUNT_IPIS | ||||
(*ipi_invlrng_counts[PCPU_GET(cpuid)])++; | (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; | ||||
#endif /* COUNT_IPIS */ | #endif /* COUNT_IPIS */ | ||||
addr = smp_tlb_addr1; | addr = smp_tlb_addr1; | ||||
addr2 = smp_tlb_addr2; | addr2 = smp_tlb_addr2; | ||||
generation = smp_tlb_generation; /* Overlap with serialization */ | |||||
do { | do { | ||||
invlpg(addr); | invlpg(addr); | ||||
addr += PAGE_SIZE; | addr += PAGE_SIZE; | ||||
} while (addr < addr2); | } while (addr < addr2); | ||||
if (smp_tlb_pmap == PCPU_GET(curpmap) && | if (smp_tlb_pmap == PCPU_GET(curpmap) && | ||||
(ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { | (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { | ||||
pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; | ||||
kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; | kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; | ||||
ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; | ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; | ||||
pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2); | pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2); | ||||
} | } | ||||
PCPU_SET(smp_tlb_done, generation); | } | ||||
static void | |||||
invlcache_handler(void) | |||||
{ | |||||
#ifdef COUNT_IPIS | |||||
(*ipi_invlcache_counts[PCPU_GET(cpuid)])++; | |||||
#endif /* COUNT_IPIS */ | |||||
wbinvd(); | |||||
} | |||||
static void | |||||
invlop_handler_one_req(enum invl_op_codes smp_tlb_op, pmap_t smp_tlb_pmap, | |||||
vm_offset_t smp_tlb_addr1, vm_offset_t smp_tlb_addr2) | |||||
{ | |||||
switch (smp_tlb_op) { | |||||
case INVL_OP_TLB: | |||||
invltlb_handler(smp_tlb_pmap); | |||||
break; | |||||
case INVL_OP_TLB_INVPCID: | |||||
invltlb_invpcid_handler(smp_tlb_pmap); | |||||
break; | |||||
case INVL_OP_TLB_INVPCID_PTI: | |||||
invltlb_invpcid_pti_handler(smp_tlb_pmap); | |||||
break; | |||||
case INVL_OP_TLB_PCID: | |||||
invltlb_pcid_handler(smp_tlb_pmap); | |||||
break; | |||||
case INVL_OP_PGRNG: | |||||
invlrng_handler(smp_tlb_addr1, smp_tlb_addr2); | |||||
break; | |||||
case INVL_OP_PGRNG_INVPCID: | |||||
invlrng_invpcid_handler(smp_tlb_pmap, smp_tlb_addr1, | |||||
smp_tlb_addr2); | |||||
break; | |||||
case INVL_OP_PGRNG_PCID: | |||||
invlrng_pcid_handler(smp_tlb_pmap, smp_tlb_addr1, | |||||
smp_tlb_addr2); | |||||
break; | |||||
case INVL_OP_PG: | |||||
invlpg_handler(smp_tlb_addr1); | |||||
break; | |||||
case INVL_OP_PG_INVPCID: | |||||
invlpg_invpcid_handler(smp_tlb_pmap, smp_tlb_addr1); | |||||
break; | |||||
case INVL_OP_PG_PCID: | |||||
invlpg_pcid_handler(smp_tlb_pmap, smp_tlb_addr1); | |||||
break; | |||||
case INVL_OP_CACHE: | |||||
invlcache_handler(); | |||||
break; | |||||
default: | |||||
__assert_unreachable(); | |||||
Done Inline Actions__assert_unreachable()? You have an anonymous enum for the op type. The type could be named and used for smp_tlb_op in this function, so you'll get a warning if an op type is missed. markj: __assert_unreachable()?
You have an anonymous enum for the op type. The type could be named… | |||||
break; | |||||
} | |||||
} | |||||
void | |||||
invlop_handler(void) | |||||
{ | |||||
struct pcpu *initiator_pc; | |||||
pmap_t smp_tlb_pmap; | |||||
vm_offset_t smp_tlb_addr1, smp_tlb_addr2; | |||||
u_int initiator_cpu_id; | |||||
enum invl_op_codes smp_tlb_op; | |||||
uint32_t *scoreboard, smp_tlb_gen; | |||||
scoreboard = invl_scoreboard_getcpu(PCPU_GET(cpuid)); | |||||
for (;;) { | |||||
for (initiator_cpu_id = 0; initiator_cpu_id <= mp_maxid; | |||||
initiator_cpu_id++) { | |||||
if (scoreboard[initiator_cpu_id] == 0) | |||||
break; | |||||
} | |||||
if (initiator_cpu_id > mp_maxid) | |||||
Done Inline ActionsAren't "orig" and "req" referring to the same thing, the initiating CPU? The terminology should be consistent. Initiator/target seems best to me but I have no strong preference. markj: Aren't "orig" and "req" referring to the same thing, the initiating CPU? The terminology should… | |||||
break; | |||||
initiator_pc = cpuid_to_pcpu[initiator_cpu_id]; | |||||
/* | |||||
* This acquire fence and its corresponding release | |||||
Done Inline ActionsI was briefly confused by the use of the word "dual" here. Initially, I read it as relating to the fact that there are two acquire fences in this code. I would suggest that you instead say, "This acquire fence and its corresponding release fence in ..." alc: I was briefly confused by the use of the word "dual" here. Initially, I read it as relating to… | |||||
* fence in smp_targeted_tlb_shootdown(), is between | |||||
* reading zero scoreboard slot and accessing PCPU of | |||||
* initiator for pc_smp_tlb values. | |||||
Done Inline ActionsWhy two fences? markj: Why two fences? | |||||
Done Inline ActionsI added a comment explaining. It is formal fence, I need to prevent the write to scoreboard to become visible until we read all pc_smp_tlb values. It is formal (or rather, a compiler barrier) because x86 does not allow such reordering. kib: I added a comment explaining. It is formal fence, I need to prevent the write to scoreboard to… | |||||
*/ | |||||
Done Inline Actionsatomic_store_int() markj: atomic_store_int() | |||||
atomic_thread_fence_acq(); | |||||
smp_tlb_pmap = initiator_pc->pc_smp_tlb_pmap; | |||||
smp_tlb_addr1 = initiator_pc->pc_smp_tlb_addr1; | |||||
smp_tlb_addr2 = initiator_pc->pc_smp_tlb_addr2; | |||||
smp_tlb_op = initiator_pc->pc_smp_tlb_op; | |||||
smp_tlb_gen = initiator_pc->pc_smp_tlb_gen; | |||||
/* | |||||
* Ensure that we do not make our scoreboard | |||||
* notification visible to the initiator until the | |||||
* pc_smp_tlb values are read. The corresponding | |||||
Done Inline ActionsAgain, I would use "corresponding" here rather than "dual". alc: Again, I would use "corresponding" here rather than "dual". | |||||
* fence is implicitly provided by the barrier in the | |||||
Done Inline Actions"... in the IPI ..." alc: "... in the IPI ..." | |||||
* IPI send operation before the APIC ICR register | |||||
Done Inline Actions"... before the APIC ..." alc: "... before the APIC ..." | |||||
* write. | |||||
* | |||||
* As an optimization, the request is acknowledged | |||||
* before the actual invalidation is performed. It is | |||||
Done Inline ActionsIt would be good to explicitly say that it is okay to ack the request before performing the invalidation. alc: It would be good to explicitly say that it is okay to ack the request before performing the… | |||||
* safe because target CPU cannot return to userspace | |||||
* before handler finishes. Only NMI can preempt the | |||||
* handler, but NMI would see the kernel handler frame | |||||
* and not touch not-invalidated user page table. | |||||
*/ | |||||
atomic_thread_fence_acq(); | |||||
atomic_store_int(&scoreboard[initiator_cpu_id], smp_tlb_gen); | |||||
invlop_handler_one_req(smp_tlb_op, smp_tlb_pmap, smp_tlb_addr1, | |||||
smp_tlb_addr2); | |||||
} | |||||
} | } |
"... for cache and TLB invalidations."