Page MenuHomeFreeBSD

D2350.id4960.diff
No OneTemporary

D2350.id4960.diff

Index: sys/amd64/vmm/amd/svm.c
===================================================================
--- sys/amd64/vmm/amd/svm.c
+++ sys/amd64/vmm/amd/svm.c
@@ -82,6 +82,11 @@
#define AMD_CPUID_SVM_PAUSE_FTH BIT(12) /* Pause filter threshold */
#define AMD_CPUID_SVM_AVIC BIT(13) /* AVIC present */
+/*
+ * Bitmap for all exceptions excluding unimplemented vectors 2 and 9.
+ */
+#define ALL_EXCEPTIONS_BITMAP 0xFFFFFDFB
+
#define VMCB_CACHE_DEFAULT (VMCB_CACHE_ASID | \
VMCB_CACHE_IOPM | \
VMCB_CACHE_I | \
@@ -126,6 +131,12 @@
static VMM_STAT_AMD(VCPU_EXITINTINFO, "VM exits during event delivery");
static VMM_STAT_AMD(VCPU_INTINFO_INJECTED, "Events pending at VM entry");
static VMM_STAT_AMD(VMEXIT_VINTR, "VM exits due to interrupt window");
+static VMM_STAT_AMD(VMEXIT_EXCEPTION_DURING_IRET, "VM exits due to exceptions "
+ "during iret");
+static VMM_STAT_AMD(NMI_SPECULATIVE_UNBLOCKING, "Number of times vNMI "
+ "unblocked speculatively");
+static VMM_STAT_AMD(NMI_PRECISE_UNBLOCKING, "Number of times vNMI "
+ "unblocked precisely");
static int svm_setreg(void *arg, int vcpu, int ident, uint64_t val);
@@ -401,6 +412,22 @@
}
static void
+set_exception_bitmap(struct svm_softc *sc, int vcpu, uint32_t newval)
+{
+ struct vmcb_ctrl *ctrl;
+ uint32_t oldval;
+
+ ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+ oldval = ctrl->intercept[VMCB_EXC_INTCPT];
+ if (newval != oldval) {
+ ctrl->intercept[VMCB_EXC_INTCPT] = newval;
+ svm_set_dirty(sc, vcpu, VMCB_CACHE_I);
+ VCPU_CTR3(sc->vm, vcpu, "intercept[%d] modified "
+ "from %#x to %#x", VMCB_EXC_INTCPT, oldval, newval);
+ }
+}
+
+static void
vmcb_init(struct svm_softc *sc, int vcpu, uint64_t iopm_base_pa,
uint64_t msrpm_base_pa, uint64_t np_pml4)
{
@@ -436,19 +463,11 @@
* Intercept everything when tracing guest exceptions otherwise
* just intercept machine check exception.
*/
- if (vcpu_trace_exceptions(sc->vm, vcpu)) {
- for (n = 0; n < 32; n++) {
- /*
- * Skip unimplemented vectors in the exception bitmap.
- */
- if (n == 2 || n == 9) {
- continue;
- }
- svm_enable_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(n));
- }
- } else {
- svm_enable_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(IDT_MC));
- }
+ if (vcpu_trace_exceptions(sc->vm, vcpu))
+ mask = ALL_EXCEPTIONS_BITMAP;
+ else
+ mask = BIT(IDT_MC);
+ set_exception_bitmap(sc, vcpu, mask);
/* Intercept various events (for e.g. I/O, MSR and CPUID accesses) */
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IO);
@@ -1027,48 +1046,37 @@
return (0);
}
-/*
- * Once an NMI is injected it blocks delivery of further NMIs until the handler
- * executes an IRET. The IRET intercept is enabled when an NMI is injected to
- * to track when the vcpu is done handling the NMI.
- */
-static int
-nmi_blocked(struct svm_softc *sc, int vcpu)
-{
- int blocked;
-
- blocked = svm_get_intercept(sc, vcpu, VMCB_CTRL1_INTCPT,
- VMCB_INTCPT_IRET);
- return (blocked);
-}
-
static void
-enable_nmi_blocking(struct svm_softc *sc, int vcpu)
+nmi_enable_iret_intercept(struct svm_softc *sc, int vcpu)
{
+ struct svm_vcpu *vcpustate;
- KASSERT(!nmi_blocked(sc, vcpu), ("vNMI already blocked"));
- VCPU_CTR0(sc->vm, vcpu, "vNMI blocking enabled");
+ vcpustate = svm_get_vcpu(sc, vcpu);
+ KASSERT(!vcpustate->nmi.blocking, ("invalid vNMI blocking state %d",
+ vcpustate->nmi.blocking));
+
+ vcpustate->nmi.blocking = NMI_IRET_INTERCEPT;
svm_enable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET);
+ VCPU_CTR0(sc->vm, vcpu, "vNMI iret intercept enabled");
}
static void
-clear_nmi_blocking(struct svm_softc *sc, int vcpu)
+nmi_enable_iret_tracing(struct svm_softc *sc, int vcpu)
{
+ struct svm_vcpu *vcpustate;
+ struct vmcb_state *state;
+ struct vmcb_ctrl *ctrl;
+ struct vmcb *vmcb;
int error;
- KASSERT(nmi_blocked(sc, vcpu), ("vNMI already unblocked"));
- VCPU_CTR0(sc->vm, vcpu, "vNMI blocking cleared");
- /*
- * When the IRET intercept is cleared the vcpu will attempt to execute
- * the "iret" when it runs next. However, it is possible to inject
- * another NMI into the vcpu before the "iret" has actually executed.
- *
- * For e.g. if the "iret" encounters a #NPF when accessing the stack
- * it will trap back into the hypervisor. If an NMI is pending for
- * the vcpu it will be injected into the guest.
- *
- * XXX this needs to be fixed
- */
+ vcpustate = svm_get_vcpu(sc, vcpu);
+ ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+ vmcb = svm_get_vmcb(sc, vcpu);
+ state = &vmcb->state;
+
+ KASSERT(vcpustate->nmi.blocking == NMI_IRET_INTERCEPT,
+ ("invalid vNMI blocking state %d", vcpustate->nmi.blocking));
+
svm_disable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_IRET);
/*
@@ -1077,6 +1085,77 @@
*/
error = svm_modify_intr_shadow(sc, vcpu, 1);
KASSERT(!error, ("%s: error %d setting intr_shadow", __func__, error));
+
+ /*
+ * XXX
+ * Single stepping using the trap flag does not work across a task
+ * switch so we unblock vNMIs right here. A vNMI can be prematurely
+ * injected into the vcpu if a #VMEXIT is triggered before the "iret"
+ * can finish execution (e.g. #NPF).
+ */
+ if (state->rflags & PSL_NT) {
+ vcpustate->nmi.blocking = 0;
+ vmm_stat_incr(sc->vm, vcpu, NMI_SPECULATIVE_UNBLOCKING, 1);
+ VCPU_CTR0(sc->vm, vcpu, "vNMI unblocked speculatively");
+ return;
+ }
+
+ /*
+ * Single step "iret" which can trigger a #VMEXIT for the following
+ * reasons:
+ *
+ * 1. The "iret" executes successfully in which case the single step
+ * will trigger a VMEXIT_EXCEPTION (IDT_DB).
+ * 2. The "iret" triggers an exception which in turn will cause a
+ * VMEXIT_EXCEPTION (IDT_GP, IDT_PF, IDT_SS etc).
+ * 3. An #VMEXIT is triggered by reasons unrelated to the "iret".
+ * For e.g. nested page fault, hardware interrupt or NMI.
+ *
+ * From section "Handling Multiple NMIs" from the Intel SDM
+ * cases (1) and (2) will unblock vNMIs.
+ */
+ vcpustate->nmi.blocking = NMI_IRET_TRACING;
+ vcpustate->nmi.rflags = state->rflags;
+ state->rflags |= PSL_RF | PSL_T;
+ vcpustate->nmi.exception_bitmap = ctrl->intercept[VMCB_EXC_INTCPT];
+ set_exception_bitmap(sc, vcpu, ALL_EXCEPTIONS_BITMAP);
+
+ VCPU_CTR4(sc->vm, vcpu, "vNMI iret tracing enabled: "
+ "rflags (%#lx/%#lx) exception_bitmap (%#08x/%#08x)",
+ vcpustate->nmi.rflags, state->rflags,
+ vcpustate->nmi.exception_bitmap, ALL_EXCEPTIONS_BITMAP);
+}
+
+static void
+nmi_unblock(struct svm_softc *sc, int vcpu, bool restore_rflags)
+{
+ struct svm_vcpu *vcpustate;
+ struct vmcb_state *state;
+ struct vmcb *vmcb;
+
+ vcpustate = svm_get_vcpu(sc, vcpu);
+ vmcb = svm_get_vmcb(sc, vcpu);
+ state = &vmcb->state;
+
+ KASSERT(vcpustate->nmi.blocking == NMI_IRET_TRACING,
+ ("invalid vNMI blocking state %d", vcpustate->nmi.blocking));
+
+ /*
+ * If the "iret" execution triggered an exception then restore the
+ * PSL_RF and PSL_T bits in %rflags before injecting the exception
+ * into the guest.
+ *
+ * If the "iret" instruction completes successfully then %rflags has
+ * already been restored from the NMI stack.
+ */
+ if (restore_rflags) {
+ state->rflags &= ~(PSL_RF | PSL_T);
+ state->rflags |= (vcpustate->nmi.rflags & (PSL_RF | PSL_T));
+ }
+ set_exception_bitmap(sc, vcpu, vcpustate->nmi.exception_bitmap);
+ vcpustate->nmi.blocking = 0;
+ vmm_stat_incr(sc->vm, vcpu, NMI_PRECISE_UNBLOCKING, 1);
+ VCPU_CTR0(sc->vm, vcpu, "vNMIs unblocked precisely");
}
static int
@@ -1206,6 +1285,7 @@
{
struct vmcb *vmcb;
struct vmcb_state *state;
+ struct svm_vcpu *vcpustate;
struct vmcb_ctrl *ctrl;
struct svm_regctx *ctx;
uint64_t code, info1, info2, val;
@@ -1214,6 +1294,7 @@
bool retu;
ctx = svm_get_guest_regctx(svm_sc, vcpu);
+ vcpustate = svm_get_vcpu(svm_sc, vcpu);
vmcb = svm_get_vmcb(svm_sc, vcpu);
state = &vmcb->state;
ctrl = &vmcb->ctrl;
@@ -1255,7 +1336,7 @@
* Restart execution at "iret" but with the intercept cleared.
*/
vmexit->inst_length = 0;
- clear_nmi_blocking(svm_sc, vcpu);
+ nmi_enable_iret_tracing(svm_sc, vcpu);
handled = 1;
break;
case VMCB_EXIT_VINTR: /* interrupt window exiting */
@@ -1273,6 +1354,25 @@
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXCEPTION, 1);
reflect = 1;
idtvec = code - 0x40;
+ if (vcpustate->nmi.blocking == NMI_IRET_TRACING) {
+ if (idtvec == IDT_DB) {
+ /* Don't reflect #DB into the guest */
+ reflect = 0;
+
+ /*
+ * APMv2 Section 15.2.2 #DB (Debug):
+ * The value saved for DR6 and DR7 matches
+ * what would be visible to a #DB handler.
+ */
+ KASSERT((state->dr6 & (1 << 14)) != 0,
+ ("DR6.BS not set (%#lx)", state->dr6));
+ } else {
+ vmm_stat_incr(svm_sc->vm, vcpu,
+ VMEXIT_EXCEPTION_DURING_IRET, 1);
+ }
+ nmi_unblock(svm_sc, vcpu, idtvec == IDT_DB ? 0 : 1);
+ }
+
switch (idtvec) {
case IDT_MC:
/*
@@ -1511,13 +1611,14 @@
/* NMI event has priority over interrupts. */
if (vm_nmi_pending(sc->vm, vcpu)) {
- if (nmi_blocked(sc, vcpu)) {
+ if (vcpustate->nmi.blocking) {
/*
* Can't inject another NMI if the guest has not
* yet executed an "iret" after the last NMI.
*/
- VCPU_CTR0(sc->vm, vcpu, "Cannot inject NMI due "
- "to NMI-blocking");
+ VCPU_CTR1(sc->vm, vcpu, "Cannot inject NMI due to %s ",
+ vcpustate->nmi.blocking == NMI_IRET_INTERCEPT ?
+ "iret intercept" : "iret tracing");
} else if (ctrl->intr_shadow) {
/*
* Can't inject an NMI if the vcpu is in an intr_shadow.
@@ -1553,7 +1654,7 @@
IDT_NMI, 0, false);
/* virtual NMI blocking is now in effect */
- enable_nmi_blocking(sc, vcpu);
+ nmi_enable_iret_intercept(sc, vcpu);
VCPU_CTR0(sc->vm, vcpu, "Injecting vNMI");
}
@@ -1688,6 +1789,18 @@
} else {
disable_intr_window_exiting(sc, vcpu);
}
+
+#ifdef INVARIANTS
+ if (vcpustate->nmi.blocking == NMI_IRET_TRACING) {
+ KASSERT((state->rflags & (PSL_RF | PSL_T)) == (PSL_RF | PSL_T),
+ ("invalid rflags value during iret tracing (%#lx)",
+ state->rflags));
+ KASSERT(ctrl->intr_shadow, ("vcpu must be in interrupt "
+ "shadow during iret tracing"));
+ KASSERT((ctrl->eventinj & VMCB_EVENTINJ_VALID) == 0,
+ ("event injection not expected during iret tracing"));
+ }
+#endif
}
static __inline void
Index: sys/amd64/vmm/amd/svm_softc.h
===================================================================
--- sys/amd64/vmm/amd/svm_softc.h
+++ sys/amd64/vmm/amd/svm_softc.h
@@ -37,6 +37,12 @@
uint32_t num; /* range is [1, nasid - 1] */
};
+enum nmi_blocking {
+ NMI_UNBLOCKED = 0,
+ NMI_IRET_INTERCEPT, /* iret intercept is enabled */
+ NMI_IRET_TRACING, /* iret tracing is enabled */
+};
+
/*
* XXX separate out 'struct vmcb' from 'svm_vcpu' to avoid wasting space
* due to VMCB alignment requirements.
@@ -50,6 +56,11 @@
uint32_t dirty; /* state cache bits that must be cleared */
long eptgen; /* pmap->pm_eptgen when the vcpu last ran */
struct asid asid;
+ struct {
+ enum nmi_blocking blocking;
+ uint32_t exception_bitmap;
+ uint64_t rflags;
+ } nmi;
} __aligned(PAGE_SIZE);
/*
Index: sys/amd64/vmm/vmm.c
===================================================================
--- sys/amd64/vmm/vmm.c
+++ sys/amd64/vmm/vmm.c
@@ -1539,6 +1539,9 @@
if (error == 0 && retu == false)
goto restart;
+ VCPU_CTR2(vm, vcpuid, "returning from vm_run with "
+ "error %d and exitcode %d", error, vme->exitcode);
+
/* copy the exit information */
bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
return (error);

File Metadata

Mime Type
text/plain
Expires
Mon, Nov 24, 10:24 PM (9 h, 31 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
26077742
Default Alt Text
D2350.id4960.diff (11 KB)

Event Timeline