Page MenuHomeFreeBSD

D20389.id60908.diff
No OneTemporary

D20389.id60908.diff

Index: sys/amd64/include/vmm.h
===================================================================
--- sys/amd64/include/vmm.h
+++ sys/amd64/include/vmm.h
@@ -3,6 +3,7 @@
*
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -153,7 +154,7 @@
struct pmap;
struct vm_eventinfo {
- void *rptr; /* rendezvous cookie */
+ u_int *rptr; /* runblock cookie */
int *sptr; /* suspend cookie */
int *iptr; /* reqidle cookie */
};
@@ -268,38 +269,21 @@
struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip);
-void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_runblock(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
#ifdef _SYS__CPUSET_H_
-/*
- * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
- * The rendezvous 'func(arg)' is not allowed to do anything that will
- * cause the thread to be put to sleep.
- *
- * If the rendezvous is being initiated from a vcpu context then the
- * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
- *
- * The caller cannot hold any locks when initiating the rendezvous.
- *
- * The implementation of this API may cause vcpus other than those specified
- * by 'dest' to be stalled. The caller should not rely on any vcpus making
- * forward progress when the rendezvous is in progress.
- */
-typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
-void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
- vm_rendezvous_func_t func, void *arg);
cpuset_t vm_active_cpus(struct vm *vm);
cpuset_t vm_debug_cpus(struct vm *vm);
cpuset_t vm_suspended_cpus(struct vm *vm);
#endif /* _SYS__CPUSET_H_ */
static __inline int
-vcpu_rendezvous_pending(struct vm_eventinfo *info)
+vcpu_runblocked(struct vm_eventinfo *info)
{
- return (*((uintptr_t *)(info->rptr)) != 0);
+ return (*info->rptr != 0);
}
static __inline int
@@ -338,6 +322,8 @@
int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
bool from_idle);
enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
+void vcpu_block_run(struct vm *, int);
+void vcpu_unblock_run(struct vm *, int);
static int __inline
vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
@@ -569,7 +555,7 @@
VM_EXITCODE_INST_EMUL,
VM_EXITCODE_SPINUP_AP,
VM_EXITCODE_DEPRECATED1, /* used to be SPINDOWN_CPU */
- VM_EXITCODE_RENDEZVOUS,
+ VM_EXITCODE_RUNBLOCK,
VM_EXITCODE_IOAPIC_EOI,
VM_EXITCODE_SUSPENDED,
VM_EXITCODE_INOUT_STR,
Index: sys/amd64/vmm/amd/svm.c
===================================================================
--- sys/amd64/vmm/amd/svm.c
+++ sys/amd64/vmm/amd/svm.c
@@ -1573,6 +1573,8 @@
need_intr_window = 0;
+ vlapic_tmr_update(vlapic);
+
if (vcpustate->nextrip != state->rip) {
ctrl->intr_shadow = 0;
VCPU_CTR2(sc->vm, vcpu, "Guest interrupt blocking "
@@ -1971,8 +1973,8 @@
* XXX
* Setting 'vcpustate->lastcpu' here is bit premature because
* we may return from this function without actually executing
- * the VMRUN instruction. This could happen if a rendezvous
- * or an AST is pending on the first time through the loop.
+ * the VMRUN instruction. This could happen if an AST or yield
+ * condition is pending on the first time through the loop.
*
* This works for now but any new side-effects of vcpu
* migration should take this case into account.
@@ -2002,9 +2004,9 @@
break;
}
- if (vcpu_rendezvous_pending(evinfo)) {
+ if (vcpu_runblocked(evinfo)) {
enable_gintr();
- vm_exit_rendezvous(vm, vcpu, state->rip);
+ vm_exit_runblock(vm, vcpu, state->rip);
break;
}
Index: sys/amd64/vmm/intel/vmx.c
===================================================================
--- sys/amd64/vmm/intel/vmx.c
+++ sys/amd64/vmm/intel/vmx.c
@@ -1324,6 +1324,8 @@
uint64_t rflags, entryinfo;
uint32_t gi, info;
+ vlapic_tmr_update(vlapic);
+
if (vmx->state[vcpu].nextrip != guestrip) {
gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
if (gi & HWINTR_BLOCKING) {
@@ -2904,9 +2906,9 @@
break;
}
- if (vcpu_rendezvous_pending(evinfo)) {
+ if (vcpu_runblocked(evinfo)) {
enable_intr();
- vm_exit_rendezvous(vmx->vm, vcpu, rip);
+ vm_exit_runblock(vmx->vm, vcpu, rip);
break;
}
@@ -3577,30 +3579,12 @@
}
static void
-vmx_set_tmr(struct vlapic *vlapic, int vector, bool level)
+vmx_set_tmr(struct vlapic *vlapic, const uint32_t *masks)
{
- struct vlapic_vtx *vlapic_vtx;
- struct vmx *vmx;
- struct vmcs *vmcs;
- uint64_t mask, val;
-
- KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector));
- KASSERT(!vcpu_is_running(vlapic->vm, vlapic->vcpuid, NULL),
- ("vmx_set_tmr: vcpu cannot be running"));
-
- vlapic_vtx = (struct vlapic_vtx *)vlapic;
- vmx = vlapic_vtx->vmx;
- vmcs = &vmx->vmcs[vlapic->vcpuid];
- mask = 1UL << (vector % 64);
-
- VMPTRLD(vmcs);
- val = vmcs_read(VMCS_EOI_EXIT(vector));
- if (level)
- val |= mask;
- else
- val &= ~mask;
- vmcs_write(VMCS_EOI_EXIT(vector), val);
- VMCLEAR(vmcs);
+ vmcs_write(VMCS_EOI_EXIT0, ((uint64_t)masks[1] << 32) | masks[0]);
+ vmcs_write(VMCS_EOI_EXIT1, ((uint64_t)masks[3] << 32) | masks[2]);
+ vmcs_write(VMCS_EOI_EXIT2, ((uint64_t)masks[5] << 32) | masks[4]);
+ vmcs_write(VMCS_EOI_EXIT3, ((uint64_t)masks[7] << 32) | masks[6]);
}
static void
Index: sys/amd64/vmm/io/vioapic.c
===================================================================
--- sys/amd64/vmm/io/vioapic.c
+++ sys/amd64/vmm/io/vioapic.c
@@ -4,6 +4,7 @@
* Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* Copyright (c) 2013 Neel Natu <neel@freebsd.org>
* All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -36,9 +37,11 @@
#include <sys/queue.h>
#include <sys/lock.h>
#include <sys/mutex.h>
+#include <sys/sx.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
+#include <sys/cpuset.h>
#include <x86/apicreg.h>
#include <machine/vmm.h>
@@ -57,6 +60,7 @@
struct vioapic {
struct vm *vm;
struct mtx mtx;
+ struct sx wlock;
uint32_t id;
uint32_t ioregsel;
struct {
@@ -69,6 +73,10 @@
#define VIOAPIC_UNLOCK(vioapic) mtx_unlock_spin(&((vioapic)->mtx))
#define VIOAPIC_LOCKED(vioapic) mtx_owned(&((vioapic)->mtx))
+#define VIOAPIC_WRITE_LOCK(vioapic) sx_xlock(&(vioapic)->wlock)
+#define VIOAPIC_WRITE_UNLOCK(vioapic) sx_xunlock(&(vioapic)->wlock)
+#define VIOAPIC_WRITE_LOCKED(vioapic) sx_xlocked(&(vioapic)->wlock)
+
static MALLOC_DEFINE(M_VIOAPIC, "vioapic", "bhyve virtual ioapic");
#define VIOAPIC_CTR1(vioapic, fmt, a1) \
@@ -223,48 +231,152 @@
return (vioapic_set_irqstate(vm, irq, IRQSTATE_PULSE));
}
+#define REDIR_IS_PHYS(reg) (((reg) & IOART_DESTMOD) == IOART_DESTPHY)
+#define REDIR_IS_LOWPRIO(reg) (((reg) & IOART_DELMOD) == IOART_DELLOPRI)
+/* Level-triggered interrupts only valid in fixed and low-priority modes */
+#define REDIR_IS_LVLTRIG(reg) \
+ (((reg) & IOART_TRGRLVL) != 0 && \
+ (((reg) & IOART_DELMOD) == IOART_DELFIXED || REDIR_IS_LOWPRIO(reg)))
+#define REDIR_DEST(reg) ((reg) >> (32 + APIC_ID_SHIFT))
+#define REDIR_VECTOR(reg) ((reg) & IOART_INTVEC)
+
/*
- * Reset the vlapic's trigger-mode register to reflect the ioapic pin
- * configuration.
+ * Given a redirection entry, determine which vCPUs would be targeted.
*/
static void
-vioapic_update_tmr(struct vm *vm, int vcpuid, void *arg)
+vioapic_calcdest(struct vioapic *vioapic, uint64_t redir_ent, cpuset_t *dmask)
{
- struct vioapic *vioapic;
- struct vlapic *vlapic;
- uint32_t low, high, dest;
- int delmode, pin, vector;
- bool level, phys;
-
- vlapic = vm_lapic(vm, vcpuid);
- vioapic = vm_ioapic(vm);
- VIOAPIC_LOCK(vioapic);
/*
- * Reset all vectors to be edge-triggered.
+ * When calculating interrupt destinations with vlapic_calcdest(), the
+ * legacy xAPIC format is assumed, since the system lacks interrupt
+ * redirection hardware.
+ * See vlapic_deliver_intr() for more details.
*/
- vlapic_reset_tmr(vlapic);
- for (pin = 0; pin < REDIR_ENTRIES; pin++) {
- low = vioapic->rtbl[pin].reg;
- high = vioapic->rtbl[pin].reg >> 32;
+ vlapic_calcdest(vioapic->vm, dmask, REDIR_DEST(redir_ent),
+ REDIR_IS_PHYS(redir_ent), REDIR_IS_LOWPRIO(redir_ent), false);
+}
+
+/*
+ * Across all redirection entries utilizing a specified vector, determine the
+ * set of vCPUs which would be targeted by a level-triggered interrupt.
+ */
+static void
+vioapic_tmr_active(struct vioapic *vioapic, uint8_t vec, cpuset_t *result)
+{
+ u_int i;
+
+ CPU_ZERO(result);
+ if (vec == 0) {
+ return;
+ }
- level = low & IOART_TRGRLVL ? true : false;
- if (!level)
+ for (i = 0; i < REDIR_ENTRIES; i++) {
+ cpuset_t dest;
+ const uint64_t val = vioapic->rtbl[i].reg;
+
+ if (!REDIR_IS_LVLTRIG(val) || REDIR_VECTOR(val) != vec) {
continue;
+ }
- /*
- * For a level-triggered 'pin' let the vlapic figure out if
- * an assertion on this 'pin' would result in an interrupt
- * being delivered to it. If yes, then it will modify the
- * TMR bit associated with this vector to level-triggered.
- */
- phys = ((low & IOART_DESTMOD) == IOART_DESTPHY);
- delmode = low & IOART_DELMOD;
- vector = low & IOART_INTVEC;
- dest = high >> APIC_ID_SHIFT;
- vlapic_set_tmr_level(vlapic, dest, phys, delmode, vector);
+ CPU_ZERO(&dest);
+ vioapic_calcdest(vioapic, val, &dest);
+ CPU_OR(result, &dest);
+ }
+}
+
+/*
+ * Update TMR state in vLAPICs after changes to vIOAPIC pin configuration
+ */
+static void
+vioapic_update_tmrs(struct vioapic *vioapic, int vcpuid, uint64_t oldval,
+ uint64_t newval)
+{
+ cpuset_t active, allset, newset, oldset;
+ struct vm *vm;
+ uint8_t newvec, oldvec;
+
+ KASSERT(VIOAPIC_LOCKED(vioapic),
+ ("vioapic_update_tmrs: vioapic is not locked"));
+ KASSERT(VIOAPIC_WRITE_LOCKED(vioapic),
+ ("vioapic_update_tmrs: vioapic write lock not held"));
+
+ vm = vioapic->vm;
+ CPU_ZERO(&allset);
+ CPU_ZERO(&newset);
+ CPU_ZERO(&oldset);
+ newvec = oldvec = 0;
+
+ if (REDIR_IS_LVLTRIG(oldval)) {
+ vioapic_calcdest(vioapic, oldval, &oldset);
+ CPU_OR(&allset, &oldset);
+ oldvec = REDIR_VECTOR(oldval);
}
+
+ if (REDIR_IS_LVLTRIG(newval)) {
+ vioapic_calcdest(vioapic, newval, &newset);
+ CPU_OR(&allset, &newset);
+ newvec = REDIR_VECTOR(newval);
+ }
+
+ if (CPU_EMPTY(&allset) ||
+ (CPU_CMP(&oldset, &newset) == 0 && oldvec == newvec)) {
+ return;
+ }
+
+ /*
+ * Since the write to the redirection table has already occurred, a
+ * scan of level-triggered entries referencing the old vector will find
+ * only entries which are now currently valid.
+ */
+ vioapic_tmr_active(vioapic, oldvec, &active);
+
+ /*
+ * Drop VIOAPIC_LOCK while updateing TMRs in case any of the affected
+ * vCPUs require sleeping until they are in an appropriate state.
+ */
VIOAPIC_UNLOCK(vioapic);
+
+ while (!CPU_EMPTY(&allset)) {
+ struct vlapic *vlapic;
+ u_int i;
+
+ i = CPU_FFS(&allset) - 1;
+ CPU_CLR(i, &allset);
+
+ if (oldvec == newvec &&
+ CPU_ISSET(i, &oldset) && CPU_ISSET(i, &newset)) {
+ continue;
+ }
+
+ if (i != vcpuid) {
+ vcpu_block_run(vm, i);
+ }
+
+ vlapic = vm_lapic(vm, i);
+ if (CPU_ISSET(i, &oldset)) {
+ /*
+ * Perform the deassertion if no other level-triggered
+ * IOAPIC entries target this vCPU with the old vector
+ *
+ * Note: Sharing of vectors like that should be
+ * extremely rare in modern operating systems and was
+ * previously unsupported by the bhyve vIOAPIC.
+ */
+ if (!CPU_ISSET(i, &active)) {
+ vlapic_tmr_set(vlapic, oldvec, false);
+ }
+ }
+ if (CPU_ISSET(i, &newset)) {
+ vlapic_tmr_set(vlapic, newvec, true);
+ }
+
+ if (i != vcpuid) {
+ vcpu_unblock_run(vm, i);
+ }
+ }
+
+ VIOAPIC_LOCK(vioapic);
}
static uint32_t
@@ -308,7 +420,6 @@
uint64_t data64, mask64;
uint64_t last, changed;
int regnum, pin, lshift;
- cpuset_t allvcpus;
regnum = addr & 0xff;
switch (regnum) {
@@ -344,18 +455,15 @@
/*
* If any fields in the redirection table entry (except mask
- * or polarity) have changed then rendezvous all the vcpus
- * to update their vlapic trigger-mode registers.
+ * or polarity) have changed then update the trigger-mode
+ * registers on all the vlapics.
*/
changed = last ^ vioapic->rtbl[pin].reg;
if (changed & ~(IOART_INTMASK | IOART_INTPOL)) {
VIOAPIC_CTR1(vioapic, "ioapic pin%d: recalculate "
"vlapic trigger-mode register", pin);
- VIOAPIC_UNLOCK(vioapic);
- allvcpus = vm_active_cpus(vioapic->vm);
- vm_smp_rendezvous(vioapic->vm, vcpuid, allvcpus,
- vioapic_update_tmr, NULL);
- VIOAPIC_LOCK(vioapic);
+ vioapic_update_tmrs(vioapic, vcpuid, last,
+ vioapic->rtbl[pin].reg);
}
/*
@@ -392,6 +500,18 @@
return (0);
}
+ if (!doread) {
+ /*
+ * When writing the vioapic registers which result in TMR
+ * updates, an unbounded sleep is possible while waiting for
+ * certain vCPUs to reach acceptable states. Since the
+ * VIOAPIC_LOCK mutex cannot be held during such a sleep, an
+ * additional synchronization mechanism is needed to prevent
+ * conflicting writes.
+ */
+ VIOAPIC_WRITE_LOCK(vioapic);
+ }
+
VIOAPIC_LOCK(vioapic);
if (offset == IOREGSEL) {
if (doread)
@@ -409,6 +529,10 @@
}
VIOAPIC_UNLOCK(vioapic);
+ if (!doread) {
+ VIOAPIC_WRITE_UNLOCK(vioapic);
+ }
+
return (0);
}
@@ -478,6 +602,7 @@
vioapic->vm = vm;
mtx_init(&vioapic->mtx, "vioapic lock", NULL, MTX_SPIN);
+ sx_init(&vioapic->wlock, "vioapic write lock");
/* Initialize all redirection entries to mask all interrupts */
for (i = 0; i < REDIR_ENTRIES; i++)
@@ -490,6 +615,7 @@
vioapic_cleanup(struct vioapic *vioapic)
{
+ sx_destroy(&vioapic->wlock);
free(vioapic, M_VIOAPIC);
}
Index: sys/amd64/vmm/io/vlapic.h
===================================================================
--- sys/amd64/vmm/io/vlapic.h
+++ sys/amd64/vmm/io/vlapic.h
@@ -83,16 +83,11 @@
void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
int delmode, int vec);
-/* Reset the trigger-mode bits for all vectors to be edge-triggered */
-void vlapic_reset_tmr(struct vlapic *vlapic);
+void vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
+ bool lowprio, bool x2apic_dest);
-/*
- * Set the trigger-mode bit associated with 'vector' to level-triggered if
- * the (dest,phys,delmode) tuple resolves to an interrupt being delivered to
- * this 'vlapic'.
- */
-void vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
- int delmode, int vector);
+void vlapic_tmr_update(struct vlapic *vlapic);
+void vlapic_tmr_set(struct vlapic *vlapic, uint8_t vector, bool active);
void vlapic_set_cr8(struct vlapic *vlapic, uint64_t val);
uint64_t vlapic_get_cr8(struct vlapic *vlapic);
Index: sys/amd64/vmm/io/vlapic.c
===================================================================
--- sys/amd64/vmm/io/vlapic.c
+++ sys/amd64/vmm/io/vlapic.c
@@ -3,6 +3,7 @@
*
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -78,6 +79,8 @@
*/
#define VLAPIC_BUS_FREQ (128 * 1024 * 1024)
+static void vlapic_tmr_reset(struct vlapic *);
+
static __inline uint32_t
vlapic_get_id(struct vlapic *vlapic)
{
@@ -809,11 +812,11 @@
/*
* This function populates 'dmask' with the set of vcpus that match the
* addressing specified by the (dest, phys, lowprio) tuple.
- *
+ *
* 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
* or xAPIC (8-bit) destination field.
*/
-static void
+void
vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
bool lowprio, bool x2apic_dest)
{
@@ -1433,7 +1436,7 @@
lapic->dfr = 0xffffffff;
lapic->svr = APIC_SVR_VECTOR;
vlapic_mask_lvts(vlapic);
- vlapic_reset_tmr(vlapic);
+ vlapic_tmr_reset(vlapic);
lapic->dcr_timer = 0;
vlapic_dcr_write_handler(vlapic);
@@ -1601,60 +1604,77 @@
}
static void
-vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level)
+vlapic_tmr_reset(struct vlapic *vlapic)
{
struct LAPIC *lapic;
- uint32_t *tmrptr, mask;
- int idx;
lapic = vlapic->apic_page;
- tmrptr = &lapic->tmr0;
- idx = (vector / 32) * 4;
- mask = 1 << (vector % 32);
- if (level)
- tmrptr[idx] |= mask;
- else
- tmrptr[idx] &= ~mask;
-
- if (vlapic->ops.set_tmr != NULL)
- (*vlapic->ops.set_tmr)(vlapic, vector, level);
+ lapic->tmr0 = lapic->tmr1 = lapic->tmr2 = lapic->tmr3 = 0;
+ lapic->tmr4 = lapic->tmr5 = lapic->tmr6 = lapic->tmr7 = 0;
+ vlapic->tmr_pending = 1;
}
+/*
+ * Synchronize TMR designations into the LAPIC state.
+ * The vCPU must be in the VCPU_RUNNING state.
+ */
void
-vlapic_reset_tmr(struct vlapic *vlapic)
+vlapic_tmr_update(struct vlapic *vlapic)
{
- int vector;
+ struct LAPIC *lapic;
+ uint32_t *tmrptr;
+ uint32_t result[VLAPIC_TMR_CNT];
+ u_int i, tmr_idx;
- VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered");
+ if (vlapic->tmr_pending == 0) {
+ return;
+ }
+
+ lapic = vlapic->apic_page;
+ tmrptr = &lapic->tmr0;
- for (vector = 0; vector <= 255; vector++)
- vlapic_set_tmr(vlapic, vector, false);
+ VLAPIC_CTR0(vlapic, "synchronizing TMR");
+ for (i = 0; i < VLAPIC_TMR_CNT; i++) {
+ tmr_idx = i * 4;
+
+ tmrptr[tmr_idx] &= ~vlapic->tmr_vec_deassert[i];
+ tmrptr[tmr_idx] |= vlapic->tmr_vec_assert[i];
+ vlapic->tmr_vec_deassert[i] = 0;
+ vlapic->tmr_vec_assert[i] = 0;
+ result[i] = tmrptr[tmr_idx];
+ }
+ vlapic->tmr_pending = 0;
+
+ if (vlapic->ops.set_tmr != NULL) {
+ (*vlapic->ops.set_tmr)(vlapic, result);
+ }
}
+/*
+ * Designate the TMR state for a given interrupt vector.
+ * The caller must hold the vIOAPIC lock and prevent the vCPU corresponding to
+ * this vLAPIC instance from being-in or entering the VCPU_RUNNING state.
+ */
void
-vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
- int delmode, int vector)
+vlapic_tmr_set(struct vlapic *vlapic, uint8_t vector, bool active)
{
- cpuset_t dmask;
- bool lowprio;
-
- KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector));
+ const uint32_t idx = vector / 32;
+ const uint32_t mask = 1 << (vector % 32);
+
+ VLAPIC_CTR2(vlapic, "TMR for vector %u %sasserted", vector,
+ active ? "" : "de");
+ if (active) {
+ vlapic->tmr_vec_assert[idx] |= mask;
+ vlapic->tmr_vec_deassert[idx] &= ~mask;
+ } else {
+ vlapic->tmr_vec_deassert[idx] |= mask;
+ vlapic->tmr_vec_assert[idx] &= ~mask;
+ }
/*
- * A level trigger is valid only for fixed and lowprio delivery modes.
+ * Track the number of TMR changes between calls to vlapic_tmr_update.
+ * While a simple boolean would suffice, this count may be useful when
+ * tracing or debugging, and is cheap to calculate.
*/
- if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) {
- VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for "
- "delivery-mode %d", delmode);
- return;
- }
-
- lowprio = (delmode == APIC_DELMODE_LOWPRIO);
- vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false);
-
- if (!CPU_ISSET(vlapic->vcpuid, &dmask))
- return;
-
- VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector);
- vlapic_set_tmr(vlapic, vector, true);
+ vlapic->tmr_pending = MIN(UINT32_MAX - 1, vlapic->tmr_pending) + 1;
}
Index: sys/amd64/vmm/io/vlapic_priv.h
===================================================================
--- sys/amd64/vmm/io/vlapic_priv.h
+++ sys/amd64/vmm/io/vlapic_priv.h
@@ -3,6 +3,7 @@
*
* Copyright (c) 2013 Neel Natu <neel@freebsd.org>
* All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -138,6 +139,8 @@
#define VLAPIC_MAXLVT_INDEX APIC_LVT_CMCI
+#define VLAPIC_TMR_CNT 8
+
struct vlapic;
struct vlapic_ops {
@@ -145,7 +148,7 @@
int (*pending_intr)(struct vlapic *vlapic, int *vecptr);
void (*intr_accepted)(struct vlapic *vlapic, int vector);
void (*post_intr)(struct vlapic *vlapic, int hostcpu);
- void (*set_tmr)(struct vlapic *vlapic, int vector, bool level);
+ void (*set_tmr)(struct vlapic *vlapic, const uint32_t *result);
void (*enable_x2apic_mode)(struct vlapic *vlapic);
};
@@ -157,6 +160,7 @@
uint32_t esr_pending;
int esr_firing;
+ uint32_t tmr_pending;
struct callout callout; /* vlapic timer */
struct bintime timer_fire_bt; /* callout expiry time */
@@ -184,6 +188,19 @@
*/
uint32_t svr_last;
uint32_t lvt_last[VLAPIC_MAXLVT_INDEX + 1];
+
+ /*
+ * Store intended modifications to the trigger-mode register state.
+ * Along with the tmr_pending counter above, these are protected by the
+ * vIOAPIC lock and can only be modified under specific conditions:
+ *
+ * 1. When holding the vIOAPIC lock, and the vCPU to which the vLAPIC
+ * belongs is prevented from entering the VCPU_RUNNING state.
+ * 2. When the owning vCPU is in the VCPU_RUNNING state, and is
+ * applying the TMR modifications prior to interrupt injection.
+ */
+ uint32_t tmr_vec_deassert[VLAPIC_TMR_CNT];
+ uint32_t tmr_vec_assert[VLAPIC_TMR_CNT];
};
void vlapic_init(struct vlapic *vlapic);
Index: sys/amd64/vmm/vmm.c
===================================================================
--- sys/amd64/vmm/vmm.c
+++ sys/amd64/vmm/vmm.c
@@ -3,6 +3,7 @@
*
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -96,6 +97,7 @@
struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */
enum vcpu_state state; /* (o) vcpu state */
int hostcpu; /* (o) vcpu's host cpu */
+ u_int runblock; /* (i) block vcpu from run state */
int reqidle; /* (i) request vcpu to idle */
struct vlapic *vlapic; /* (i) APIC device model */
enum x2apic_state x2apic_state; /* (i) APIC mode */
@@ -156,11 +158,6 @@
int suspend; /* (i) stop VM execution */
volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
- cpuset_t rendezvous_req_cpus; /* (x) rendezvous requested */
- cpuset_t rendezvous_done_cpus; /* (x) rendezvous finished */
- void *rendezvous_arg; /* (x) rendezvous func/arg */
- vm_rendezvous_func_t rendezvous_func;
- struct mtx rendezvous_mtx; /* (o) rendezvous lock */
struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
struct vmspace *vmspace; /* (o) guest's address space */
@@ -293,6 +290,7 @@
vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
+ vcpu->runblock = 0;
vcpu->reqidle = 0;
vcpu->exitintinfo = 0;
vcpu->nmi_pending = 0;
@@ -461,7 +459,6 @@
vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
strcpy(vm->name, name);
vm->vmspace = vmspace;
- mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
vm->sockets = 1;
vm->cores = cores_per_package; /* XXX backwards compatibility */
@@ -1201,6 +1198,12 @@
break;
}
+ if (newstate == VCPU_RUNNING) {
+ while (vcpu->runblock != 0) {
+ msleep_spin(&vcpu->state, &vcpu->mtx, "vcpublk", 0);
+ }
+ }
+
if (error)
return (EBUSY);
@@ -1213,8 +1216,10 @@
else
vcpu->hostcpu = NOCPU;
- if (newstate == VCPU_IDLE)
+ if (newstate == VCPU_IDLE ||
+ (newstate == VCPU_FROZEN && vcpu->runblock != 0)) {
wakeup(&vcpu->state);
+ }
return (0);
}
@@ -1237,63 +1242,6 @@
panic("Error %d setting state to %d", error, newstate);
}
-static void
-vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func)
-{
-
- KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked"));
-
- /*
- * Update 'rendezvous_func' and execute a write memory barrier to
- * ensure that it is visible across all host cpus. This is not needed
- * for correctness but it does ensure that all the vcpus will notice
- * that the rendezvous is requested immediately.
- */
- vm->rendezvous_func = func;
- wmb();
-}
-
-#define RENDEZVOUS_CTR0(vm, vcpuid, fmt) \
- do { \
- if (vcpuid >= 0) \
- VCPU_CTR0(vm, vcpuid, fmt); \
- else \
- VM_CTR0(vm, fmt); \
- } while (0)
-
-static void
-vm_handle_rendezvous(struct vm *vm, int vcpuid)
-{
-
- KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < vm->maxcpus),
- ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid));
-
- mtx_lock(&vm->rendezvous_mtx);
- while (vm->rendezvous_func != NULL) {
- /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
- CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus);
-
- if (vcpuid != -1 &&
- CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
- !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
- VCPU_CTR0(vm, vcpuid, "Calling rendezvous func");
- (*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg);
- CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
- }
- if (CPU_CMP(&vm->rendezvous_req_cpus,
- &vm->rendezvous_done_cpus) == 0) {
- VCPU_CTR0(vm, vcpuid, "Rendezvous completed");
- vm_set_rendezvous_func(vm, NULL);
- wakeup(&vm->rendezvous_func);
- break;
- }
- RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion");
- mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
- "vmrndv", 0);
- }
- mtx_unlock(&vm->rendezvous_mtx);
-}
-
/*
* Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
*/
@@ -1321,7 +1269,7 @@
* vcpu returned from VMRUN() and before it acquired the
* vcpu lock above.
*/
- if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle)
+ if (vm->suspend || vcpu->reqidle)
break;
if (vm_nmi_pending(vm, vcpuid))
break;
@@ -1515,10 +1463,6 @@
/*
* Wait until all 'active_cpus' have suspended themselves.
- *
- * Since a VM may be suspended at any time including when one or
- * more vcpus are doing a rendezvous we need to call the rendezvous
- * handler while we are waiting to prevent a deadlock.
*/
vcpu_lock(vcpu);
while (1) {
@@ -1527,17 +1471,10 @@
break;
}
- if (vm->rendezvous_func == NULL) {
- VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
- vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
- msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
- vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
- } else {
- VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend");
- vcpu_unlock(vcpu);
- vm_handle_rendezvous(vm, vcpuid);
- vcpu_lock(vcpu);
- }
+ VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
+ vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
+ msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
+ vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
}
vcpu_unlock(vcpu);
@@ -1621,17 +1558,15 @@
}
void
-vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip)
+vm_exit_runblock(struct vm *vm, int vcpuid, uint64_t rip)
{
struct vm_exit *vmexit;
- KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress"));
-
vmexit = vm_exitinfo(vm, vcpuid);
vmexit->rip = rip;
vmexit->inst_length = 0;
- vmexit->exitcode = VM_EXITCODE_RENDEZVOUS;
- vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1);
+ vmexit->exitcode = VM_EXITCODE_RUNBLOCK;
+ vmm_stat_incr(vm, vcpuid, VMEXIT_RUNBLOCK, 1);
}
void
@@ -1684,7 +1619,7 @@
pmap = vmspace_pmap(vm->vmspace);
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
- evinfo.rptr = &vm->rendezvous_func;
+ evinfo.rptr = &vcpu->runblock;
evinfo.sptr = &vm->suspend;
evinfo.iptr = &vcpu->reqidle;
restart:
@@ -1724,9 +1659,7 @@
vioapic_process_eoi(vm, vcpuid,
vme->u.ioapic_eoi.vector);
break;
- case VM_EXITCODE_RENDEZVOUS:
- vm_handle_rendezvous(vm, vcpuid);
- error = 0;
+ case VM_EXITCODE_RUNBLOCK:
break;
case VM_EXITCODE_HLT:
intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
@@ -2321,6 +2254,46 @@
return (state);
}
+void
+vcpu_block_run(struct vm *vm, int vcpuid)
+{
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+ panic("vcpu_block_run: invalid vcpuid %d", vcpuid);
+
+ vcpu = &vm->vcpu[vcpuid];
+
+ vcpu_lock(vcpu);
+ vcpu->runblock++;
+ if (vcpu->runblock == 1 && vcpu->state == VCPU_RUNNING) {
+ vcpu_notify_event_locked(vcpu, false);
+ }
+ while (vcpu->state == VCPU_RUNNING) {
+ msleep_spin(&vcpu->state, &vcpu->mtx, "vcpublk", 0);
+ }
+ vcpu_unlock(vcpu);
+}
+
+void
+vcpu_unblock_run(struct vm *vm, int vcpuid)
+{
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+ panic("vcpu_block_run: invalid vcpuid %d", vcpuid);
+
+ vcpu = &vm->vcpu[vcpuid];
+
+ vcpu_lock(vcpu);
+ KASSERT(vcpu->runblock != 0, ("expected non-zero runblock"));
+ vcpu->runblock--;
+ if (vcpu->runblock == 0) {
+ wakeup(&vcpu->state);
+ }
+ vcpu_unlock(vcpu);
+}
+
int
vm_activate_cpu(struct vm *vm, int vcpuid)
{
@@ -2504,54 +2477,6 @@
return (apicid);
}
-void
-vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
- vm_rendezvous_func_t func, void *arg)
-{
- int i;
-
- /*
- * Enforce that this function is called without any locks
- */
- WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous");
- KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < vm->maxcpus),
- ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid));
-
-restart:
- mtx_lock(&vm->rendezvous_mtx);
- if (vm->rendezvous_func != NULL) {
- /*
- * If a rendezvous is already in progress then we need to
- * call the rendezvous handler in case this 'vcpuid' is one
- * of the targets of the rendezvous.
- */
- RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress");
- mtx_unlock(&vm->rendezvous_mtx);
- vm_handle_rendezvous(vm, vcpuid);
- goto restart;
- }
- KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous "
- "rendezvous is still in progress"));
-
- RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous");
- vm->rendezvous_req_cpus = dest;
- CPU_ZERO(&vm->rendezvous_done_cpus);
- vm->rendezvous_arg = arg;
- vm_set_rendezvous_func(vm, func);
- mtx_unlock(&vm->rendezvous_mtx);
-
- /*
- * Wake up any sleeping vcpus and trigger a VM-exit in any running
- * vcpus so they handle the rendezvous as soon as possible.
- */
- for (i = 0; i < vm->maxcpus; i++) {
- if (CPU_ISSET(i, &dest))
- vcpu_notify_event(vm, i, false);
- }
-
- vm_handle_rendezvous(vm, vcpuid);
-}
-
struct vatpic *
vm_atpic(struct vm *vm)
{
Index: sys/amd64/vmm/vmm_stat.h
===================================================================
--- sys/amd64/vmm/vmm_stat.h
+++ sys/amd64/vmm/vmm_stat.h
@@ -157,7 +157,7 @@
VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
VMM_STAT_DECLARE(VMEXIT_USERSPACE);
-VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS);
+VMM_STAT_DECLARE(VMEXIT_RUNBLOCK);
VMM_STAT_DECLARE(VMEXIT_EXCEPTION);
VMM_STAT_DECLARE(VMEXIT_REQIDLE);
#endif
Index: sys/amd64/vmm/vmm_stat.c
===================================================================
--- sys/amd64/vmm/vmm_stat.c
+++ sys/amd64/vmm/vmm_stat.c
@@ -168,5 +168,5 @@
VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
VMM_STAT(VMEXIT_REQIDLE, "number of times idle requested at exit");
VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace");
-VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit");
+VMM_STAT(VMEXIT_RUNBLOCK, "number of times runblock at exit");
VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions");

File Metadata

Mime Type
text/plain
Expires
Mon, May 25, 12:22 AM (4 h, 59 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33491564
Default Alt Text
D20389.id60908.diff (31 KB)

Event Timeline