Index: head/sys/amd64/amd64/pmap.c =================================================================== --- head/sys/amd64/amd64/pmap.c +++ head/sys/amd64/amd64/pmap.c @@ -125,6 +125,7 @@ #include #include #include +#include #include #include #include @@ -2647,7 +2648,7 @@ * "all" host cpus and force any vcpu context to exit as a * side-effect. */ - atomic_add_acq_long(&pmap->pm_eptgen, 1); + atomic_add_long(&pmap->pm_eptgen, 1); break; default: panic("pmap_update_pde_store: bad pm_type %d", pmap->pm_type); @@ -2722,6 +2723,7 @@ static __inline void pmap_invalidate_ept(pmap_t pmap) { + smr_seq_t goal; int ipinum; sched_pin(); @@ -2742,15 +2744,30 @@ * Each vcpu keeps a cache of this counter and compares it * just before a vmresume. If the counter is out-of-date an * invept will be done to flush stale mappings from the TLB. + * + * To ensure that all vCPU threads have observed the new counter + * value before returning, we use SMR. Ordering is important here: + * the VMM enters an SMR read section before loading the counter + * and after updating the pm_active bit set. Thus, pm_active is + * a superset of active readers, and any reader that has observed + * the goal has observed the new counter value. */ - atomic_add_acq_long(&pmap->pm_eptgen, 1); + atomic_add_long(&pmap->pm_eptgen, 1); + goal = smr_advance(pmap->pm_eptsmr); + /* * Force the vcpu to exit and trap back into the hypervisor. */ ipinum = pmap->pm_flags & PMAP_NESTED_IPIMASK; ipi_selected(pmap->pm_active, ipinum); sched_unpin(); + + /* + * Ensure that all active vCPUs will observe the new generation counter + * value before executing any more guest instructions. + */ + smr_wait(pmap->pm_eptsmr, goal); } static cpuset_t @@ -4086,7 +4103,8 @@ * address space. * Install minimal kernel mappings in PTI case. */ - if (pm_type == PT_X86) { + switch (pm_type) { + case PT_X86: pmap->pm_cr3 = pmltop_phys; if (pmap_is_la57(pmap)) pmap_pinit_pml5(pmltop_pg); @@ -4107,6 +4125,11 @@ rangeset_init(&pmap->pm_pkru, pkru_dup_range, pkru_free_range, pmap, M_NOWAIT); } + break; + case PT_EPT: + case PT_RVI: + pmap->pm_eptsmr = smr_create("pmap", 0, 0); + break; } pmap->pm_root.rt_root = 0; Index: head/sys/amd64/include/pmap.h =================================================================== --- head/sys/amd64/include/pmap.h +++ head/sys/amd64/include/pmap.h @@ -270,6 +270,7 @@ #include #include #include +#include #include @@ -371,6 +372,7 @@ struct pmap_statistics pm_stats; /* pmap statistics */ struct vm_radix pm_root; /* spare page table pages */ long pm_eptgen; /* EPT pmap generation id */ + smr_t pm_eptsmr; int pm_flags; struct pmap_pcids pm_pcids[MAXCPU]; struct rangeset pm_pkru; Index: head/sys/amd64/vmm/amd/svm.c =================================================================== --- head/sys/amd64/vmm/amd/svm.c +++ head/sys/amd64/vmm/amd/svm.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -1800,15 +1801,17 @@ } static void -check_asid(struct svm_softc *sc, int vcpuid, pmap_t pmap, u_int thiscpu) +svm_pmap_activate(struct svm_softc *sc, int vcpuid, pmap_t pmap) { struct svm_vcpu *vcpustate; struct vmcb_ctrl *ctrl; long eptgen; + int cpu; bool alloc_asid; - KASSERT(CPU_ISSET(thiscpu, &pmap->pm_active), ("%s: nested pmap not " - "active on cpu %u", __func__, thiscpu)); + cpu = curcpu; + CPU_SET_ATOMIC(cpu, &pmap->pm_active); + smr_enter(pmap->pm_eptsmr); vcpustate = svm_get_vcpu(sc, vcpuid); ctrl = svm_get_vmcb_ctrl(sc, vcpuid); @@ -1849,10 +1852,10 @@ */ alloc_asid = false; - eptgen = pmap->pm_eptgen; + eptgen = atomic_load_long(&pmap->pm_eptgen); ctrl->tlb_ctrl = VMCB_TLB_FLUSH_NOTHING; - if (vcpustate->asid.gen != asid[thiscpu].gen) { + if (vcpustate->asid.gen != asid[cpu].gen) { alloc_asid = true; /* (c) and (d) */ } else if (vcpustate->eptgen != eptgen) { if (flush_by_asid()) @@ -1869,10 +1872,10 @@ } if (alloc_asid) { - if (++asid[thiscpu].num >= nasid) { - asid[thiscpu].num = 1; - if (++asid[thiscpu].gen == 0) - asid[thiscpu].gen = 1; + if (++asid[cpu].num >= nasid) { + asid[cpu].num = 1; + if (++asid[cpu].gen == 0) + asid[cpu].gen = 1; /* * If this cpu does not support "flush-by-asid" * then flush the entire TLB on a generation @@ -1882,8 +1885,8 @@ if (!flush_by_asid()) ctrl->tlb_ctrl = VMCB_TLB_FLUSH_ALL; } - vcpustate->asid.gen = asid[thiscpu].gen; - vcpustate->asid.num = asid[thiscpu].num; + vcpustate->asid.gen = asid[cpu].gen; + vcpustate->asid.num = asid[cpu].num; ctrl->asid = vcpustate->asid.num; svm_set_dirty(sc, vcpuid, VMCB_CACHE_ASID); @@ -1902,6 +1905,13 @@ ("ASID mismatch: %u/%u", ctrl->asid, vcpustate->asid.num)); } +static void +svm_pmap_deactivate(pmap_t pmap) +{ + smr_exit(pmap->pm_eptsmr); + CPU_CLR_ATOMIC(curcpu, &pmap->pm_active); +} + static __inline void disable_gintr(void) { @@ -2083,14 +2093,11 @@ svm_inj_interrupts(svm_sc, vcpu, vlapic); - /* Activate the nested pmap on 'curcpu' */ - CPU_SET_ATOMIC_ACQ(curcpu, &pmap->pm_active); - /* * Check the pmap generation and the ASID generation to * ensure that the vcpu does not use stale TLB mappings. */ - check_asid(svm_sc, vcpu, pmap, curcpu); + svm_pmap_activate(svm_sc, vcpu, pmap); ctrl->vmcb_clean = vmcb_clean & ~vcpustate->dirty; vcpustate->dirty = 0; @@ -2102,7 +2109,7 @@ svm_launch(vmcb_pa, gctx, get_pcpu()); svm_dr_leave_guest(gctx); - CPU_CLR_ATOMIC(curcpu, &pmap->pm_active); + svm_pmap_deactivate(pmap); /* * The host GDTR and IDTR is saved by VMRUN and restored Index: head/sys/amd64/vmm/intel/vmx.c =================================================================== --- head/sys/amd64/vmm/intel/vmx.c +++ head/sys/amd64/vmm/intel/vmx.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -1273,7 +1274,7 @@ * Note also that this will invalidate mappings tagged with 'vpid' * for "all" EP4TAs. */ - if (pmap->pm_eptgen == vmx->eptgen[curcpu]) { + if (atomic_load_long(&pmap->pm_eptgen) == vmx->eptgen[curcpu]) { invvpid_desc._res1 = 0; invvpid_desc._res2 = 0; invvpid_desc.vpid = vmxstate->vpid; @@ -2948,6 +2949,7 @@ cpu = curcpu; CPU_SET_ATOMIC(cpu, &pmap->pm_active); + smr_enter(pmap->pm_eptsmr); eptgen = atomic_load_long(&pmap->pm_eptgen); if (eptgen != vmx->eptgen[cpu]) { vmx->eptgen[cpu] = eptgen; @@ -2959,6 +2961,7 @@ static __inline void vmx_pmap_deactivate(struct vmx *vmx, pmap_t pmap) { + smr_exit(pmap->pm_eptsmr); CPU_CLR_ATOMIC(curcpu, &pmap->pm_active); }