Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F157678425
D20389.id60908.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
31 KB
Referenced Files
None
Subscribers
None
D20389.id60908.diff
View Options
Index: sys/amd64/include/vmm.h
===================================================================
--- sys/amd64/include/vmm.h
+++ sys/amd64/include/vmm.h
@@ -3,6 +3,7 @@
*
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -153,7 +154,7 @@
struct pmap;
struct vm_eventinfo {
- void *rptr; /* rendezvous cookie */
+ u_int *rptr; /* runblock cookie */
int *sptr; /* suspend cookie */
int *iptr; /* reqidle cookie */
};
@@ -268,38 +269,21 @@
struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip);
-void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_runblock(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
#ifdef _SYS__CPUSET_H_
-/*
- * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
- * The rendezvous 'func(arg)' is not allowed to do anything that will
- * cause the thread to be put to sleep.
- *
- * If the rendezvous is being initiated from a vcpu context then the
- * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
- *
- * The caller cannot hold any locks when initiating the rendezvous.
- *
- * The implementation of this API may cause vcpus other than those specified
- * by 'dest' to be stalled. The caller should not rely on any vcpus making
- * forward progress when the rendezvous is in progress.
- */
-typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
-void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
- vm_rendezvous_func_t func, void *arg);
cpuset_t vm_active_cpus(struct vm *vm);
cpuset_t vm_debug_cpus(struct vm *vm);
cpuset_t vm_suspended_cpus(struct vm *vm);
#endif /* _SYS__CPUSET_H_ */
static __inline int
-vcpu_rendezvous_pending(struct vm_eventinfo *info)
+vcpu_runblocked(struct vm_eventinfo *info)
{
- return (*((uintptr_t *)(info->rptr)) != 0);
+ return (*info->rptr != 0);
}
static __inline int
@@ -338,6 +322,8 @@
int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
bool from_idle);
enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
+void vcpu_block_run(struct vm *, int);
+void vcpu_unblock_run(struct vm *, int);
static int __inline
vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
@@ -569,7 +555,7 @@
VM_EXITCODE_INST_EMUL,
VM_EXITCODE_SPINUP_AP,
VM_EXITCODE_DEPRECATED1, /* used to be SPINDOWN_CPU */
- VM_EXITCODE_RENDEZVOUS,
+ VM_EXITCODE_RUNBLOCK,
VM_EXITCODE_IOAPIC_EOI,
VM_EXITCODE_SUSPENDED,
VM_EXITCODE_INOUT_STR,
Index: sys/amd64/vmm/amd/svm.c
===================================================================
--- sys/amd64/vmm/amd/svm.c
+++ sys/amd64/vmm/amd/svm.c
@@ -1573,6 +1573,8 @@
need_intr_window = 0;
+ vlapic_tmr_update(vlapic);
+
if (vcpustate->nextrip != state->rip) {
ctrl->intr_shadow = 0;
VCPU_CTR2(sc->vm, vcpu, "Guest interrupt blocking "
@@ -1971,8 +1973,8 @@
* XXX
* Setting 'vcpustate->lastcpu' here is bit premature because
* we may return from this function without actually executing
- * the VMRUN instruction. This could happen if a rendezvous
- * or an AST is pending on the first time through the loop.
+ * the VMRUN instruction. This could happen if an AST or yield
+ * condition is pending on the first time through the loop.
*
* This works for now but any new side-effects of vcpu
* migration should take this case into account.
@@ -2002,9 +2004,9 @@
break;
}
- if (vcpu_rendezvous_pending(evinfo)) {
+ if (vcpu_runblocked(evinfo)) {
enable_gintr();
- vm_exit_rendezvous(vm, vcpu, state->rip);
+ vm_exit_runblock(vm, vcpu, state->rip);
break;
}
Index: sys/amd64/vmm/intel/vmx.c
===================================================================
--- sys/amd64/vmm/intel/vmx.c
+++ sys/amd64/vmm/intel/vmx.c
@@ -1324,6 +1324,8 @@
uint64_t rflags, entryinfo;
uint32_t gi, info;
+ vlapic_tmr_update(vlapic);
+
if (vmx->state[vcpu].nextrip != guestrip) {
gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
if (gi & HWINTR_BLOCKING) {
@@ -2904,9 +2906,9 @@
break;
}
- if (vcpu_rendezvous_pending(evinfo)) {
+ if (vcpu_runblocked(evinfo)) {
enable_intr();
- vm_exit_rendezvous(vmx->vm, vcpu, rip);
+ vm_exit_runblock(vmx->vm, vcpu, rip);
break;
}
@@ -3577,30 +3579,12 @@
}
static void
-vmx_set_tmr(struct vlapic *vlapic, int vector, bool level)
+vmx_set_tmr(struct vlapic *vlapic, const uint32_t *masks)
{
- struct vlapic_vtx *vlapic_vtx;
- struct vmx *vmx;
- struct vmcs *vmcs;
- uint64_t mask, val;
-
- KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector));
- KASSERT(!vcpu_is_running(vlapic->vm, vlapic->vcpuid, NULL),
- ("vmx_set_tmr: vcpu cannot be running"));
-
- vlapic_vtx = (struct vlapic_vtx *)vlapic;
- vmx = vlapic_vtx->vmx;
- vmcs = &vmx->vmcs[vlapic->vcpuid];
- mask = 1UL << (vector % 64);
-
- VMPTRLD(vmcs);
- val = vmcs_read(VMCS_EOI_EXIT(vector));
- if (level)
- val |= mask;
- else
- val &= ~mask;
- vmcs_write(VMCS_EOI_EXIT(vector), val);
- VMCLEAR(vmcs);
+ vmcs_write(VMCS_EOI_EXIT0, ((uint64_t)masks[1] << 32) | masks[0]);
+ vmcs_write(VMCS_EOI_EXIT1, ((uint64_t)masks[3] << 32) | masks[2]);
+ vmcs_write(VMCS_EOI_EXIT2, ((uint64_t)masks[5] << 32) | masks[4]);
+ vmcs_write(VMCS_EOI_EXIT3, ((uint64_t)masks[7] << 32) | masks[6]);
}
static void
Index: sys/amd64/vmm/io/vioapic.c
===================================================================
--- sys/amd64/vmm/io/vioapic.c
+++ sys/amd64/vmm/io/vioapic.c
@@ -4,6 +4,7 @@
* Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* Copyright (c) 2013 Neel Natu <neel@freebsd.org>
* All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -36,9 +37,11 @@
#include <sys/queue.h>
#include <sys/lock.h>
#include <sys/mutex.h>
+#include <sys/sx.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
+#include <sys/cpuset.h>
#include <x86/apicreg.h>
#include <machine/vmm.h>
@@ -57,6 +60,7 @@
struct vioapic {
struct vm *vm;
struct mtx mtx;
+ struct sx wlock;
uint32_t id;
uint32_t ioregsel;
struct {
@@ -69,6 +73,10 @@
#define VIOAPIC_UNLOCK(vioapic) mtx_unlock_spin(&((vioapic)->mtx))
#define VIOAPIC_LOCKED(vioapic) mtx_owned(&((vioapic)->mtx))
+#define VIOAPIC_WRITE_LOCK(vioapic) sx_xlock(&(vioapic)->wlock)
+#define VIOAPIC_WRITE_UNLOCK(vioapic) sx_xunlock(&(vioapic)->wlock)
+#define VIOAPIC_WRITE_LOCKED(vioapic) sx_xlocked(&(vioapic)->wlock)
+
static MALLOC_DEFINE(M_VIOAPIC, "vioapic", "bhyve virtual ioapic");
#define VIOAPIC_CTR1(vioapic, fmt, a1) \
@@ -223,48 +231,152 @@
return (vioapic_set_irqstate(vm, irq, IRQSTATE_PULSE));
}
+#define REDIR_IS_PHYS(reg) (((reg) & IOART_DESTMOD) == IOART_DESTPHY)
+#define REDIR_IS_LOWPRIO(reg) (((reg) & IOART_DELMOD) == IOART_DELLOPRI)
+/* Level-triggered interrupts only valid in fixed and low-priority modes */
+#define REDIR_IS_LVLTRIG(reg) \
+ (((reg) & IOART_TRGRLVL) != 0 && \
+ (((reg) & IOART_DELMOD) == IOART_DELFIXED || REDIR_IS_LOWPRIO(reg)))
+#define REDIR_DEST(reg) ((reg) >> (32 + APIC_ID_SHIFT))
+#define REDIR_VECTOR(reg) ((reg) & IOART_INTVEC)
+
/*
- * Reset the vlapic's trigger-mode register to reflect the ioapic pin
- * configuration.
+ * Given a redirection entry, determine which vCPUs would be targeted.
*/
static void
-vioapic_update_tmr(struct vm *vm, int vcpuid, void *arg)
+vioapic_calcdest(struct vioapic *vioapic, uint64_t redir_ent, cpuset_t *dmask)
{
- struct vioapic *vioapic;
- struct vlapic *vlapic;
- uint32_t low, high, dest;
- int delmode, pin, vector;
- bool level, phys;
-
- vlapic = vm_lapic(vm, vcpuid);
- vioapic = vm_ioapic(vm);
- VIOAPIC_LOCK(vioapic);
/*
- * Reset all vectors to be edge-triggered.
+ * When calculating interrupt destinations with vlapic_calcdest(), the
+ * legacy xAPIC format is assumed, since the system lacks interrupt
+ * redirection hardware.
+ * See vlapic_deliver_intr() for more details.
*/
- vlapic_reset_tmr(vlapic);
- for (pin = 0; pin < REDIR_ENTRIES; pin++) {
- low = vioapic->rtbl[pin].reg;
- high = vioapic->rtbl[pin].reg >> 32;
+ vlapic_calcdest(vioapic->vm, dmask, REDIR_DEST(redir_ent),
+ REDIR_IS_PHYS(redir_ent), REDIR_IS_LOWPRIO(redir_ent), false);
+}
+
+/*
+ * Across all redirection entries utilizing a specified vector, determine the
+ * set of vCPUs which would be targeted by a level-triggered interrupt.
+ */
+static void
+vioapic_tmr_active(struct vioapic *vioapic, uint8_t vec, cpuset_t *result)
+{
+ u_int i;
+
+ CPU_ZERO(result);
+ if (vec == 0) {
+ return;
+ }
- level = low & IOART_TRGRLVL ? true : false;
- if (!level)
+ for (i = 0; i < REDIR_ENTRIES; i++) {
+ cpuset_t dest;
+ const uint64_t val = vioapic->rtbl[i].reg;
+
+ if (!REDIR_IS_LVLTRIG(val) || REDIR_VECTOR(val) != vec) {
continue;
+ }
- /*
- * For a level-triggered 'pin' let the vlapic figure out if
- * an assertion on this 'pin' would result in an interrupt
- * being delivered to it. If yes, then it will modify the
- * TMR bit associated with this vector to level-triggered.
- */
- phys = ((low & IOART_DESTMOD) == IOART_DESTPHY);
- delmode = low & IOART_DELMOD;
- vector = low & IOART_INTVEC;
- dest = high >> APIC_ID_SHIFT;
- vlapic_set_tmr_level(vlapic, dest, phys, delmode, vector);
+ CPU_ZERO(&dest);
+ vioapic_calcdest(vioapic, val, &dest);
+ CPU_OR(result, &dest);
+ }
+}
+
+/*
+ * Update TMR state in vLAPICs after changes to vIOAPIC pin configuration
+ */
+static void
+vioapic_update_tmrs(struct vioapic *vioapic, int vcpuid, uint64_t oldval,
+ uint64_t newval)
+{
+ cpuset_t active, allset, newset, oldset;
+ struct vm *vm;
+ uint8_t newvec, oldvec;
+
+ KASSERT(VIOAPIC_LOCKED(vioapic),
+ ("vioapic_update_tmrs: vioapic is not locked"));
+ KASSERT(VIOAPIC_WRITE_LOCKED(vioapic),
+ ("vioapic_update_tmrs: vioapic write lock not held"));
+
+ vm = vioapic->vm;
+ CPU_ZERO(&allset);
+ CPU_ZERO(&newset);
+ CPU_ZERO(&oldset);
+ newvec = oldvec = 0;
+
+ if (REDIR_IS_LVLTRIG(oldval)) {
+ vioapic_calcdest(vioapic, oldval, &oldset);
+ CPU_OR(&allset, &oldset);
+ oldvec = REDIR_VECTOR(oldval);
}
+
+ if (REDIR_IS_LVLTRIG(newval)) {
+ vioapic_calcdest(vioapic, newval, &newset);
+ CPU_OR(&allset, &newset);
+ newvec = REDIR_VECTOR(newval);
+ }
+
+ if (CPU_EMPTY(&allset) ||
+ (CPU_CMP(&oldset, &newset) == 0 && oldvec == newvec)) {
+ return;
+ }
+
+ /*
+ * Since the write to the redirection table has already occurred, a
+ * scan of level-triggered entries referencing the old vector will find
+ * only entries which are now currently valid.
+ */
+ vioapic_tmr_active(vioapic, oldvec, &active);
+
+ /*
+ * Drop VIOAPIC_LOCK while updateing TMRs in case any of the affected
+ * vCPUs require sleeping until they are in an appropriate state.
+ */
VIOAPIC_UNLOCK(vioapic);
+
+ while (!CPU_EMPTY(&allset)) {
+ struct vlapic *vlapic;
+ u_int i;
+
+ i = CPU_FFS(&allset) - 1;
+ CPU_CLR(i, &allset);
+
+ if (oldvec == newvec &&
+ CPU_ISSET(i, &oldset) && CPU_ISSET(i, &newset)) {
+ continue;
+ }
+
+ if (i != vcpuid) {
+ vcpu_block_run(vm, i);
+ }
+
+ vlapic = vm_lapic(vm, i);
+ if (CPU_ISSET(i, &oldset)) {
+ /*
+ * Perform the deassertion if no other level-triggered
+ * IOAPIC entries target this vCPU with the old vector
+ *
+ * Note: Sharing of vectors like that should be
+ * extremely rare in modern operating systems and was
+ * previously unsupported by the bhyve vIOAPIC.
+ */
+ if (!CPU_ISSET(i, &active)) {
+ vlapic_tmr_set(vlapic, oldvec, false);
+ }
+ }
+ if (CPU_ISSET(i, &newset)) {
+ vlapic_tmr_set(vlapic, newvec, true);
+ }
+
+ if (i != vcpuid) {
+ vcpu_unblock_run(vm, i);
+ }
+ }
+
+ VIOAPIC_LOCK(vioapic);
}
static uint32_t
@@ -308,7 +420,6 @@
uint64_t data64, mask64;
uint64_t last, changed;
int regnum, pin, lshift;
- cpuset_t allvcpus;
regnum = addr & 0xff;
switch (regnum) {
@@ -344,18 +455,15 @@
/*
* If any fields in the redirection table entry (except mask
- * or polarity) have changed then rendezvous all the vcpus
- * to update their vlapic trigger-mode registers.
+ * or polarity) have changed then update the trigger-mode
+ * registers on all the vlapics.
*/
changed = last ^ vioapic->rtbl[pin].reg;
if (changed & ~(IOART_INTMASK | IOART_INTPOL)) {
VIOAPIC_CTR1(vioapic, "ioapic pin%d: recalculate "
"vlapic trigger-mode register", pin);
- VIOAPIC_UNLOCK(vioapic);
- allvcpus = vm_active_cpus(vioapic->vm);
- vm_smp_rendezvous(vioapic->vm, vcpuid, allvcpus,
- vioapic_update_tmr, NULL);
- VIOAPIC_LOCK(vioapic);
+ vioapic_update_tmrs(vioapic, vcpuid, last,
+ vioapic->rtbl[pin].reg);
}
/*
@@ -392,6 +500,18 @@
return (0);
}
+ if (!doread) {
+ /*
+ * When writing the vioapic registers which result in TMR
+ * updates, an unbounded sleep is possible while waiting for
+ * certain vCPUs to reach acceptable states. Since the
+ * VIOAPIC_LOCK mutex cannot be held during such a sleep, an
+ * additional synchronization mechanism is needed to prevent
+ * conflicting writes.
+ */
+ VIOAPIC_WRITE_LOCK(vioapic);
+ }
+
VIOAPIC_LOCK(vioapic);
if (offset == IOREGSEL) {
if (doread)
@@ -409,6 +529,10 @@
}
VIOAPIC_UNLOCK(vioapic);
+ if (!doread) {
+ VIOAPIC_WRITE_UNLOCK(vioapic);
+ }
+
return (0);
}
@@ -478,6 +602,7 @@
vioapic->vm = vm;
mtx_init(&vioapic->mtx, "vioapic lock", NULL, MTX_SPIN);
+ sx_init(&vioapic->wlock, "vioapic write lock");
/* Initialize all redirection entries to mask all interrupts */
for (i = 0; i < REDIR_ENTRIES; i++)
@@ -490,6 +615,7 @@
vioapic_cleanup(struct vioapic *vioapic)
{
+ sx_destroy(&vioapic->wlock);
free(vioapic, M_VIOAPIC);
}
Index: sys/amd64/vmm/io/vlapic.h
===================================================================
--- sys/amd64/vmm/io/vlapic.h
+++ sys/amd64/vmm/io/vlapic.h
@@ -83,16 +83,11 @@
void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
int delmode, int vec);
-/* Reset the trigger-mode bits for all vectors to be edge-triggered */
-void vlapic_reset_tmr(struct vlapic *vlapic);
+void vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
+ bool lowprio, bool x2apic_dest);
-/*
- * Set the trigger-mode bit associated with 'vector' to level-triggered if
- * the (dest,phys,delmode) tuple resolves to an interrupt being delivered to
- * this 'vlapic'.
- */
-void vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
- int delmode, int vector);
+void vlapic_tmr_update(struct vlapic *vlapic);
+void vlapic_tmr_set(struct vlapic *vlapic, uint8_t vector, bool active);
void vlapic_set_cr8(struct vlapic *vlapic, uint64_t val);
uint64_t vlapic_get_cr8(struct vlapic *vlapic);
Index: sys/amd64/vmm/io/vlapic.c
===================================================================
--- sys/amd64/vmm/io/vlapic.c
+++ sys/amd64/vmm/io/vlapic.c
@@ -3,6 +3,7 @@
*
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -78,6 +79,8 @@
*/
#define VLAPIC_BUS_FREQ (128 * 1024 * 1024)
+static void vlapic_tmr_reset(struct vlapic *);
+
static __inline uint32_t
vlapic_get_id(struct vlapic *vlapic)
{
@@ -809,11 +812,11 @@
/*
* This function populates 'dmask' with the set of vcpus that match the
* addressing specified by the (dest, phys, lowprio) tuple.
- *
+ *
* 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
* or xAPIC (8-bit) destination field.
*/
-static void
+void
vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
bool lowprio, bool x2apic_dest)
{
@@ -1433,7 +1436,7 @@
lapic->dfr = 0xffffffff;
lapic->svr = APIC_SVR_VECTOR;
vlapic_mask_lvts(vlapic);
- vlapic_reset_tmr(vlapic);
+ vlapic_tmr_reset(vlapic);
lapic->dcr_timer = 0;
vlapic_dcr_write_handler(vlapic);
@@ -1601,60 +1604,77 @@
}
static void
-vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level)
+vlapic_tmr_reset(struct vlapic *vlapic)
{
struct LAPIC *lapic;
- uint32_t *tmrptr, mask;
- int idx;
lapic = vlapic->apic_page;
- tmrptr = &lapic->tmr0;
- idx = (vector / 32) * 4;
- mask = 1 << (vector % 32);
- if (level)
- tmrptr[idx] |= mask;
- else
- tmrptr[idx] &= ~mask;
-
- if (vlapic->ops.set_tmr != NULL)
- (*vlapic->ops.set_tmr)(vlapic, vector, level);
+ lapic->tmr0 = lapic->tmr1 = lapic->tmr2 = lapic->tmr3 = 0;
+ lapic->tmr4 = lapic->tmr5 = lapic->tmr6 = lapic->tmr7 = 0;
+ vlapic->tmr_pending = 1;
}
+/*
+ * Synchronize TMR designations into the LAPIC state.
+ * The vCPU must be in the VCPU_RUNNING state.
+ */
void
-vlapic_reset_tmr(struct vlapic *vlapic)
+vlapic_tmr_update(struct vlapic *vlapic)
{
- int vector;
+ struct LAPIC *lapic;
+ uint32_t *tmrptr;
+ uint32_t result[VLAPIC_TMR_CNT];
+ u_int i, tmr_idx;
- VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered");
+ if (vlapic->tmr_pending == 0) {
+ return;
+ }
+
+ lapic = vlapic->apic_page;
+ tmrptr = &lapic->tmr0;
- for (vector = 0; vector <= 255; vector++)
- vlapic_set_tmr(vlapic, vector, false);
+ VLAPIC_CTR0(vlapic, "synchronizing TMR");
+ for (i = 0; i < VLAPIC_TMR_CNT; i++) {
+ tmr_idx = i * 4;
+
+ tmrptr[tmr_idx] &= ~vlapic->tmr_vec_deassert[i];
+ tmrptr[tmr_idx] |= vlapic->tmr_vec_assert[i];
+ vlapic->tmr_vec_deassert[i] = 0;
+ vlapic->tmr_vec_assert[i] = 0;
+ result[i] = tmrptr[tmr_idx];
+ }
+ vlapic->tmr_pending = 0;
+
+ if (vlapic->ops.set_tmr != NULL) {
+ (*vlapic->ops.set_tmr)(vlapic, result);
+ }
}
+/*
+ * Designate the TMR state for a given interrupt vector.
+ * The caller must hold the vIOAPIC lock and prevent the vCPU corresponding to
+ * this vLAPIC instance from being-in or entering the VCPU_RUNNING state.
+ */
void
-vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
- int delmode, int vector)
+vlapic_tmr_set(struct vlapic *vlapic, uint8_t vector, bool active)
{
- cpuset_t dmask;
- bool lowprio;
-
- KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector));
+ const uint32_t idx = vector / 32;
+ const uint32_t mask = 1 << (vector % 32);
+
+ VLAPIC_CTR2(vlapic, "TMR for vector %u %sasserted", vector,
+ active ? "" : "de");
+ if (active) {
+ vlapic->tmr_vec_assert[idx] |= mask;
+ vlapic->tmr_vec_deassert[idx] &= ~mask;
+ } else {
+ vlapic->tmr_vec_deassert[idx] |= mask;
+ vlapic->tmr_vec_assert[idx] &= ~mask;
+ }
/*
- * A level trigger is valid only for fixed and lowprio delivery modes.
+ * Track the number of TMR changes between calls to vlapic_tmr_update.
+ * While a simple boolean would suffice, this count may be useful when
+ * tracing or debugging, and is cheap to calculate.
*/
- if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) {
- VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for "
- "delivery-mode %d", delmode);
- return;
- }
-
- lowprio = (delmode == APIC_DELMODE_LOWPRIO);
- vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false);
-
- if (!CPU_ISSET(vlapic->vcpuid, &dmask))
- return;
-
- VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector);
- vlapic_set_tmr(vlapic, vector, true);
+ vlapic->tmr_pending = MIN(UINT32_MAX - 1, vlapic->tmr_pending) + 1;
}
Index: sys/amd64/vmm/io/vlapic_priv.h
===================================================================
--- sys/amd64/vmm/io/vlapic_priv.h
+++ sys/amd64/vmm/io/vlapic_priv.h
@@ -3,6 +3,7 @@
*
* Copyright (c) 2013 Neel Natu <neel@freebsd.org>
* All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -138,6 +139,8 @@
#define VLAPIC_MAXLVT_INDEX APIC_LVT_CMCI
+#define VLAPIC_TMR_CNT 8
+
struct vlapic;
struct vlapic_ops {
@@ -145,7 +148,7 @@
int (*pending_intr)(struct vlapic *vlapic, int *vecptr);
void (*intr_accepted)(struct vlapic *vlapic, int vector);
void (*post_intr)(struct vlapic *vlapic, int hostcpu);
- void (*set_tmr)(struct vlapic *vlapic, int vector, bool level);
+ void (*set_tmr)(struct vlapic *vlapic, const uint32_t *result);
void (*enable_x2apic_mode)(struct vlapic *vlapic);
};
@@ -157,6 +160,7 @@
uint32_t esr_pending;
int esr_firing;
+ uint32_t tmr_pending;
struct callout callout; /* vlapic timer */
struct bintime timer_fire_bt; /* callout expiry time */
@@ -184,6 +188,19 @@
*/
uint32_t svr_last;
uint32_t lvt_last[VLAPIC_MAXLVT_INDEX + 1];
+
+ /*
+ * Store intended modifications to the trigger-mode register state.
+ * Along with the tmr_pending counter above, these are protected by the
+ * vIOAPIC lock and can only be modified under specific conditions:
+ *
+ * 1. When holding the vIOAPIC lock, and the vCPU to which the vLAPIC
+ * belongs is prevented from entering the VCPU_RUNNING state.
+ * 2. When the owning vCPU is in the VCPU_RUNNING state, and is
+ * applying the TMR modifications prior to interrupt injection.
+ */
+ uint32_t tmr_vec_deassert[VLAPIC_TMR_CNT];
+ uint32_t tmr_vec_assert[VLAPIC_TMR_CNT];
};
void vlapic_init(struct vlapic *vlapic);
Index: sys/amd64/vmm/vmm.c
===================================================================
--- sys/amd64/vmm/vmm.c
+++ sys/amd64/vmm/vmm.c
@@ -3,6 +3,7 @@
*
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -96,6 +97,7 @@
struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */
enum vcpu_state state; /* (o) vcpu state */
int hostcpu; /* (o) vcpu's host cpu */
+ u_int runblock; /* (i) block vcpu from run state */
int reqidle; /* (i) request vcpu to idle */
struct vlapic *vlapic; /* (i) APIC device model */
enum x2apic_state x2apic_state; /* (i) APIC mode */
@@ -156,11 +158,6 @@
int suspend; /* (i) stop VM execution */
volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
- cpuset_t rendezvous_req_cpus; /* (x) rendezvous requested */
- cpuset_t rendezvous_done_cpus; /* (x) rendezvous finished */
- void *rendezvous_arg; /* (x) rendezvous func/arg */
- vm_rendezvous_func_t rendezvous_func;
- struct mtx rendezvous_mtx; /* (o) rendezvous lock */
struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
struct vmspace *vmspace; /* (o) guest's address space */
@@ -293,6 +290,7 @@
vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
+ vcpu->runblock = 0;
vcpu->reqidle = 0;
vcpu->exitintinfo = 0;
vcpu->nmi_pending = 0;
@@ -461,7 +459,6 @@
vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
strcpy(vm->name, name);
vm->vmspace = vmspace;
- mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
vm->sockets = 1;
vm->cores = cores_per_package; /* XXX backwards compatibility */
@@ -1201,6 +1198,12 @@
break;
}
+ if (newstate == VCPU_RUNNING) {
+ while (vcpu->runblock != 0) {
+ msleep_spin(&vcpu->state, &vcpu->mtx, "vcpublk", 0);
+ }
+ }
+
if (error)
return (EBUSY);
@@ -1213,8 +1216,10 @@
else
vcpu->hostcpu = NOCPU;
- if (newstate == VCPU_IDLE)
+ if (newstate == VCPU_IDLE ||
+ (newstate == VCPU_FROZEN && vcpu->runblock != 0)) {
wakeup(&vcpu->state);
+ }
return (0);
}
@@ -1237,63 +1242,6 @@
panic("Error %d setting state to %d", error, newstate);
}
-static void
-vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func)
-{
-
- KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked"));
-
- /*
- * Update 'rendezvous_func' and execute a write memory barrier to
- * ensure that it is visible across all host cpus. This is not needed
- * for correctness but it does ensure that all the vcpus will notice
- * that the rendezvous is requested immediately.
- */
- vm->rendezvous_func = func;
- wmb();
-}
-
-#define RENDEZVOUS_CTR0(vm, vcpuid, fmt) \
- do { \
- if (vcpuid >= 0) \
- VCPU_CTR0(vm, vcpuid, fmt); \
- else \
- VM_CTR0(vm, fmt); \
- } while (0)
-
-static void
-vm_handle_rendezvous(struct vm *vm, int vcpuid)
-{
-
- KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < vm->maxcpus),
- ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid));
-
- mtx_lock(&vm->rendezvous_mtx);
- while (vm->rendezvous_func != NULL) {
- /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
- CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus);
-
- if (vcpuid != -1 &&
- CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
- !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
- VCPU_CTR0(vm, vcpuid, "Calling rendezvous func");
- (*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg);
- CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
- }
- if (CPU_CMP(&vm->rendezvous_req_cpus,
- &vm->rendezvous_done_cpus) == 0) {
- VCPU_CTR0(vm, vcpuid, "Rendezvous completed");
- vm_set_rendezvous_func(vm, NULL);
- wakeup(&vm->rendezvous_func);
- break;
- }
- RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion");
- mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
- "vmrndv", 0);
- }
- mtx_unlock(&vm->rendezvous_mtx);
-}
-
/*
* Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
*/
@@ -1321,7 +1269,7 @@
* vcpu returned from VMRUN() and before it acquired the
* vcpu lock above.
*/
- if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle)
+ if (vm->suspend || vcpu->reqidle)
break;
if (vm_nmi_pending(vm, vcpuid))
break;
@@ -1515,10 +1463,6 @@
/*
* Wait until all 'active_cpus' have suspended themselves.
- *
- * Since a VM may be suspended at any time including when one or
- * more vcpus are doing a rendezvous we need to call the rendezvous
- * handler while we are waiting to prevent a deadlock.
*/
vcpu_lock(vcpu);
while (1) {
@@ -1527,17 +1471,10 @@
break;
}
- if (vm->rendezvous_func == NULL) {
- VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
- vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
- msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
- vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
- } else {
- VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend");
- vcpu_unlock(vcpu);
- vm_handle_rendezvous(vm, vcpuid);
- vcpu_lock(vcpu);
- }
+ VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
+ vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
+ msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
+ vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
}
vcpu_unlock(vcpu);
@@ -1621,17 +1558,15 @@
}
void
-vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip)
+vm_exit_runblock(struct vm *vm, int vcpuid, uint64_t rip)
{
struct vm_exit *vmexit;
- KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress"));
-
vmexit = vm_exitinfo(vm, vcpuid);
vmexit->rip = rip;
vmexit->inst_length = 0;
- vmexit->exitcode = VM_EXITCODE_RENDEZVOUS;
- vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1);
+ vmexit->exitcode = VM_EXITCODE_RUNBLOCK;
+ vmm_stat_incr(vm, vcpuid, VMEXIT_RUNBLOCK, 1);
}
void
@@ -1684,7 +1619,7 @@
pmap = vmspace_pmap(vm->vmspace);
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
- evinfo.rptr = &vm->rendezvous_func;
+ evinfo.rptr = &vcpu->runblock;
evinfo.sptr = &vm->suspend;
evinfo.iptr = &vcpu->reqidle;
restart:
@@ -1724,9 +1659,7 @@
vioapic_process_eoi(vm, vcpuid,
vme->u.ioapic_eoi.vector);
break;
- case VM_EXITCODE_RENDEZVOUS:
- vm_handle_rendezvous(vm, vcpuid);
- error = 0;
+ case VM_EXITCODE_RUNBLOCK:
break;
case VM_EXITCODE_HLT:
intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
@@ -2321,6 +2254,46 @@
return (state);
}
+void
+vcpu_block_run(struct vm *vm, int vcpuid)
+{
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+ panic("vcpu_block_run: invalid vcpuid %d", vcpuid);
+
+ vcpu = &vm->vcpu[vcpuid];
+
+ vcpu_lock(vcpu);
+ vcpu->runblock++;
+ if (vcpu->runblock == 1 && vcpu->state == VCPU_RUNNING) {
+ vcpu_notify_event_locked(vcpu, false);
+ }
+ while (vcpu->state == VCPU_RUNNING) {
+ msleep_spin(&vcpu->state, &vcpu->mtx, "vcpublk", 0);
+ }
+ vcpu_unlock(vcpu);
+}
+
+void
+vcpu_unblock_run(struct vm *vm, int vcpuid)
+{
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+ panic("vcpu_block_run: invalid vcpuid %d", vcpuid);
+
+ vcpu = &vm->vcpu[vcpuid];
+
+ vcpu_lock(vcpu);
+ KASSERT(vcpu->runblock != 0, ("expected non-zero runblock"));
+ vcpu->runblock--;
+ if (vcpu->runblock == 0) {
+ wakeup(&vcpu->state);
+ }
+ vcpu_unlock(vcpu);
+}
+
int
vm_activate_cpu(struct vm *vm, int vcpuid)
{
@@ -2504,54 +2477,6 @@
return (apicid);
}
-void
-vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
- vm_rendezvous_func_t func, void *arg)
-{
- int i;
-
- /*
- * Enforce that this function is called without any locks
- */
- WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous");
- KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < vm->maxcpus),
- ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid));
-
-restart:
- mtx_lock(&vm->rendezvous_mtx);
- if (vm->rendezvous_func != NULL) {
- /*
- * If a rendezvous is already in progress then we need to
- * call the rendezvous handler in case this 'vcpuid' is one
- * of the targets of the rendezvous.
- */
- RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress");
- mtx_unlock(&vm->rendezvous_mtx);
- vm_handle_rendezvous(vm, vcpuid);
- goto restart;
- }
- KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous "
- "rendezvous is still in progress"));
-
- RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous");
- vm->rendezvous_req_cpus = dest;
- CPU_ZERO(&vm->rendezvous_done_cpus);
- vm->rendezvous_arg = arg;
- vm_set_rendezvous_func(vm, func);
- mtx_unlock(&vm->rendezvous_mtx);
-
- /*
- * Wake up any sleeping vcpus and trigger a VM-exit in any running
- * vcpus so they handle the rendezvous as soon as possible.
- */
- for (i = 0; i < vm->maxcpus; i++) {
- if (CPU_ISSET(i, &dest))
- vcpu_notify_event(vm, i, false);
- }
-
- vm_handle_rendezvous(vm, vcpuid);
-}
-
struct vatpic *
vm_atpic(struct vm *vm)
{
Index: sys/amd64/vmm/vmm_stat.h
===================================================================
--- sys/amd64/vmm/vmm_stat.h
+++ sys/amd64/vmm/vmm_stat.h
@@ -157,7 +157,7 @@
VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
VMM_STAT_DECLARE(VMEXIT_USERSPACE);
-VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS);
+VMM_STAT_DECLARE(VMEXIT_RUNBLOCK);
VMM_STAT_DECLARE(VMEXIT_EXCEPTION);
VMM_STAT_DECLARE(VMEXIT_REQIDLE);
#endif
Index: sys/amd64/vmm/vmm_stat.c
===================================================================
--- sys/amd64/vmm/vmm_stat.c
+++ sys/amd64/vmm/vmm_stat.c
@@ -168,5 +168,5 @@
VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
VMM_STAT(VMEXIT_REQIDLE, "number of times idle requested at exit");
VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace");
-VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit");
+VMM_STAT(VMEXIT_RUNBLOCK, "number of times runblock at exit");
VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions");
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, May 25, 12:22 AM (4 h, 59 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33491564
Default Alt Text
D20389.id60908.diff (31 KB)
Attached To
Mode
D20389: bhyve vioapic writes can deadlock instance
Attached
Detach File
Event Timeline
Log In to Comment