Changeset View
Changeset View
Standalone View
Standalone View
head/sys/i386/i386/mp_machdep.c
Show First 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | |||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/bus.h> | #include <sys/bus.h> | ||||
#include <sys/cons.h> /* cngetc() */ | #include <sys/cons.h> /* cngetc() */ | ||||
#include <sys/cpuset.h> | #include <sys/cpuset.h> | ||||
#ifdef GPROF | #ifdef GPROF | ||||
#include <sys/gmon.h> | #include <sys/gmon.h> | ||||
#endif | #endif | ||||
#include <sys/kdb.h> | |||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/ktr.h> | #include <sys/ktr.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/memrange.h> | #include <sys/memrange.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/pcpu.h> | #include <sys/pcpu.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
▲ Show 20 Lines • Show All 396 Lines • ▼ Show 20 Lines | start_ap(int apic_id) | ||||
/* Wait up to 5 seconds for it to start. */ | /* Wait up to 5 seconds for it to start. */ | ||||
for (ms = 0; ms < 5000; ms++) { | for (ms = 0; ms < 5000; ms++) { | ||||
if (mp_naps > cpus) | if (mp_naps > cpus) | ||||
return 1; /* return SUCCESS */ | return 1; /* return SUCCESS */ | ||||
DELAY(1000); | DELAY(1000); | ||||
} | } | ||||
return 0; /* return FAILURE */ | return 0; /* return FAILURE */ | ||||
} | |||||
/* | |||||
* Flush the TLB on other CPU's | |||||
*/ | |||||
/* Variables needed for SMP tlb shootdown. */ | |||||
vm_offset_t smp_tlb_addr1, smp_tlb_addr2; | |||||
pmap_t smp_tlb_pmap; | |||||
volatile uint32_t smp_tlb_generation; | |||||
/* | |||||
* Used by pmap to request cache or TLB invalidation on local and | |||||
* remote processors. Mask provides the set of remote CPUs which are | |||||
* to be signalled with the invalidation IPI, specified by vector. As | |||||
* an optimization, the curcpu_cb callback is invoked on the calling | |||||
* CPU while waiting for remote CPUs to complete the operation. | |||||
* | |||||
* The callback function is called unconditionally on the caller's | |||||
* underlying processor, even when this processor is not set in the | |||||
* mask. So, the callback function must be prepared to handle such | |||||
* spurious invocations. | |||||
*/ | |||||
static void | |||||
smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap, | |||||
vm_offset_t addr1, vm_offset_t addr2, smp_invl_cb_t curcpu_cb) | |||||
{ | |||||
cpuset_t other_cpus; | |||||
volatile uint32_t *p_cpudone; | |||||
uint32_t generation; | |||||
int cpu; | |||||
/* | |||||
* It is not necessary to signal other CPUs while booting or | |||||
* when in the debugger. | |||||
*/ | |||||
if (kdb_active || KERNEL_PANICKED() || !smp_started) { | |||||
curcpu_cb(pmap, addr1, addr2); | |||||
return; | |||||
} | |||||
sched_pin(); | |||||
/* | |||||
* Check for other cpus. Return if none. | |||||
*/ | |||||
if (CPU_ISFULLSET(&mask)) { | |||||
if (mp_ncpus <= 1) | |||||
goto nospinexit; | |||||
} else { | |||||
CPU_CLR(PCPU_GET(cpuid), &mask); | |||||
if (CPU_EMPTY(&mask)) | |||||
goto nospinexit; | |||||
} | |||||
KASSERT((read_eflags() & PSL_I) != 0, | |||||
("smp_targeted_tlb_shootdown: interrupts disabled")); | |||||
mtx_lock_spin(&smp_ipi_mtx); | |||||
smp_tlb_addr1 = addr1; | |||||
smp_tlb_addr2 = addr2; | |||||
smp_tlb_pmap = pmap; | |||||
generation = ++smp_tlb_generation; | |||||
if (CPU_ISFULLSET(&mask)) { | |||||
ipi_all_but_self(vector); | |||||
other_cpus = all_cpus; | |||||
CPU_CLR(PCPU_GET(cpuid), &other_cpus); | |||||
} else { | |||||
other_cpus = mask; | |||||
while ((cpu = CPU_FFS(&mask)) != 0) { | |||||
cpu--; | |||||
CPU_CLR(cpu, &mask); | |||||
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, | |||||
cpu, vector); | |||||
ipi_send_cpu(cpu, vector); | |||||
} | |||||
} | |||||
curcpu_cb(pmap, addr1, addr2); | |||||
while ((cpu = CPU_FFS(&other_cpus)) != 0) { | |||||
cpu--; | |||||
CPU_CLR(cpu, &other_cpus); | |||||
p_cpudone = &cpuid_to_pcpu[cpu]->pc_smp_tlb_done; | |||||
while (*p_cpudone != generation) | |||||
ia32_pause(); | |||||
} | |||||
mtx_unlock_spin(&smp_ipi_mtx); | |||||
sched_unpin(); | |||||
return; | |||||
nospinexit: | |||||
curcpu_cb(pmap, addr1, addr2); | |||||
sched_unpin(); | |||||
} | |||||
void | |||||
smp_masked_invltlb(cpuset_t mask, pmap_t pmap, smp_invl_cb_t curcpu_cb) | |||||
{ | |||||
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0, curcpu_cb); | |||||
#ifdef COUNT_XINVLTLB_HITS | |||||
ipi_global++; | |||||
#endif | |||||
} | |||||
void | |||||
smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap, | |||||
smp_invl_cb_t curcpu_cb) | |||||
{ | |||||
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0, curcpu_cb); | |||||
#ifdef COUNT_XINVLTLB_HITS | |||||
ipi_page++; | |||||
#endif | |||||
} | |||||
void | |||||
smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2, | |||||
pmap_t pmap, smp_invl_cb_t curcpu_cb) | |||||
{ | |||||
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1, addr2, | |||||
curcpu_cb); | |||||
#ifdef COUNT_XINVLTLB_HITS | |||||
ipi_range++; | |||||
ipi_range_size += (addr2 - addr1) / PAGE_SIZE; | |||||
#endif | |||||
} | |||||
void | |||||
smp_cache_flush(smp_invl_cb_t curcpu_cb) | |||||
{ | |||||
smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL, 0, 0, | |||||
curcpu_cb); | |||||
} | |||||
/* | |||||
* Handlers for TLB related IPIs | |||||
*/ | |||||
void | |||||
invltlb_handler(void) | |||||
{ | |||||
uint32_t generation; | |||||
#ifdef COUNT_XINVLTLB_HITS | |||||
xhits_gbl[PCPU_GET(cpuid)]++; | |||||
#endif /* COUNT_XINVLTLB_HITS */ | |||||
#ifdef COUNT_IPIS | |||||
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++; | |||||
#endif /* COUNT_IPIS */ | |||||
/* | |||||
* Reading the generation here allows greater parallelism | |||||
* since invalidating the TLB is a serializing operation. | |||||
*/ | |||||
generation = smp_tlb_generation; | |||||
if (smp_tlb_pmap == kernel_pmap) | |||||
invltlb_glob(); | |||||
PCPU_SET(smp_tlb_done, generation); | |||||
} | |||||
void | |||||
invlpg_handler(void) | |||||
{ | |||||
uint32_t generation; | |||||
#ifdef COUNT_XINVLTLB_HITS | |||||
xhits_pg[PCPU_GET(cpuid)]++; | |||||
#endif /* COUNT_XINVLTLB_HITS */ | |||||
#ifdef COUNT_IPIS | |||||
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++; | |||||
#endif /* COUNT_IPIS */ | |||||
generation = smp_tlb_generation; /* Overlap with serialization */ | |||||
if (smp_tlb_pmap == kernel_pmap) | |||||
invlpg(smp_tlb_addr1); | |||||
PCPU_SET(smp_tlb_done, generation); | |||||
} | |||||
void | |||||
invlrng_handler(void) | |||||
{ | |||||
vm_offset_t addr, addr2; | |||||
uint32_t generation; | |||||
#ifdef COUNT_XINVLTLB_HITS | |||||
xhits_rng[PCPU_GET(cpuid)]++; | |||||
#endif /* COUNT_XINVLTLB_HITS */ | |||||
#ifdef COUNT_IPIS | |||||
(*ipi_invlrng_counts[PCPU_GET(cpuid)])++; | |||||
#endif /* COUNT_IPIS */ | |||||
addr = smp_tlb_addr1; | |||||
addr2 = smp_tlb_addr2; | |||||
generation = smp_tlb_generation; /* Overlap with serialization */ | |||||
if (smp_tlb_pmap == kernel_pmap) { | |||||
do { | |||||
invlpg(addr); | |||||
addr += PAGE_SIZE; | |||||
} while (addr < addr2); | |||||
} | |||||
PCPU_SET(smp_tlb_done, generation); | |||||
} | |||||
void | |||||
invlcache_handler(void) | |||||
{ | |||||
uint32_t generation; | |||||
#ifdef COUNT_IPIS | |||||
(*ipi_invlcache_counts[PCPU_GET(cpuid)])++; | |||||
#endif /* COUNT_IPIS */ | |||||
/* | |||||
* Reading the generation here allows greater parallelism | |||||
* since wbinvd is a serializing instruction. Without the | |||||
* temporary, we'd wait for wbinvd to complete, then the read | |||||
* would execute, then the dependent write, which must then | |||||
* complete before return from interrupt. | |||||
*/ | |||||
generation = smp_tlb_generation; | |||||
wbinvd(); | |||||
PCPU_SET(smp_tlb_done, generation); | |||||
} | } |