Index: sys/amd64/include/vmm.h
===================================================================
--- sys/amd64/include/vmm.h
+++ sys/amd64/include/vmm.h
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -132,7 +133,7 @@
 struct pmap;
 
 struct vm_eventinfo {
-	void	*rptr;		/* rendezvous cookie */
+	u_int	*rptr;		/* runblock cookie */
 	int	*sptr;		/* suspend cookie */
 	int	*iptr;		/* reqidle cookie */
 };
@@ -247,38 +248,21 @@
 struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
 void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip);
-void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_runblock(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
 
 #ifdef _SYS__CPUSET_H_
-/*
- * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
- * The rendezvous 'func(arg)' is not allowed to do anything that will
- * cause the thread to be put to sleep.
- *
- * If the rendezvous is being initiated from a vcpu context then the
- * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
- *
- * The caller cannot hold any locks when initiating the rendezvous.
- *
- * The implementation of this API may cause vcpus other than those specified
- * by 'dest' to be stalled. The caller should not rely on any vcpus making
- * forward progress when the rendezvous is in progress.
- */
-typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
-void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
-    vm_rendezvous_func_t func, void *arg);
 cpuset_t vm_active_cpus(struct vm *vm);
 cpuset_t vm_debug_cpus(struct vm *vm);
 cpuset_t vm_suspended_cpus(struct vm *vm);
 #endif	/* _SYS__CPUSET_H_ */
 
 static __inline int
-vcpu_rendezvous_pending(struct vm_eventinfo *info)
+vcpu_runblocked(struct vm_eventinfo *info)
 {
 
-	return (*((uintptr_t *)(info->rptr)) != 0);
+	return (*info->rptr != 0);
 }
 
 static __inline int
@@ -317,6 +301,8 @@
 int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
     bool from_idle);
 enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
+void vcpu_block_run(struct vm *, int);
+void vcpu_unblock_run(struct vm *, int);
 
 static int __inline
 vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
@@ -548,7 +534,7 @@
 	VM_EXITCODE_INST_EMUL,
 	VM_EXITCODE_SPINUP_AP,
 	VM_EXITCODE_DEPRECATED1,	/* used to be SPINDOWN_CPU */
-	VM_EXITCODE_RENDEZVOUS,
+	VM_EXITCODE_RUNBLOCK,
 	VM_EXITCODE_IOAPIC_EOI,
 	VM_EXITCODE_SUSPENDED,
 	VM_EXITCODE_INOUT_STR,
Index: sys/amd64/vmm/amd/svm.c
===================================================================
--- sys/amd64/vmm/amd/svm.c
+++ sys/amd64/vmm/amd/svm.c
@@ -1573,6 +1573,8 @@
 
 	need_intr_window = 0;
 
+	vlapic_tmr_update(vlapic);
+
 	if (vcpustate->nextrip != state->rip) {
 		ctrl->intr_shadow = 0;
 		VCPU_CTR2(sc->vm, vcpu, "Guest interrupt blocking "
@@ -1971,8 +1973,8 @@
 		 * XXX
 		 * Setting 'vcpustate->lastcpu' here is bit premature because
 		 * we may return from this function without actually executing
-		 * the VMRUN  instruction. This could happen if a rendezvous
-		 * or an AST is pending on the first time through the loop.
+		 * the VMRUN  instruction. This could happen if an AST or yield
+		 * condition is pending on the first time through the loop.
 		 *
 		 * This works for now but any new side-effects of vcpu
 		 * migration should take this case into account.
@@ -2002,9 +2004,9 @@
 			break;
 		}
 
-		if (vcpu_rendezvous_pending(evinfo)) {
+		if (vcpu_runblocked(evinfo)) {
 			enable_gintr();
-			vm_exit_rendezvous(vm, vcpu, state->rip);
+			vm_exit_runblock(vm, vcpu, state->rip);
 			break;
 		}
 
Index: sys/amd64/vmm/intel/vmx.c
===================================================================
--- sys/amd64/vmm/intel/vmx.c
+++ sys/amd64/vmm/intel/vmx.c
@@ -1324,6 +1324,8 @@
 	uint64_t rflags, entryinfo;
 	uint32_t gi, info;
 
+	vlapic_tmr_update(vlapic);
+
 	if (vmx->state[vcpu].nextrip != guestrip) {
 		gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
 		if (gi & HWINTR_BLOCKING) {
@@ -2904,9 +2906,9 @@
 			break;
 		}
 
-		if (vcpu_rendezvous_pending(evinfo)) {
+		if (vcpu_runblocked(evinfo)) {
 			enable_intr();
-			vm_exit_rendezvous(vmx->vm, vcpu, rip);
+			vm_exit_runblock(vmx->vm, vcpu, rip);
 			break;
 		}
 
@@ -3577,30 +3579,12 @@
 }
 
 static void
-vmx_set_tmr(struct vlapic *vlapic, int vector, bool level)
+vmx_set_tmr(struct vlapic *vlapic, const uint32_t *masks)
 {
-	struct vlapic_vtx *vlapic_vtx;
-	struct vmx *vmx;
-	struct vmcs *vmcs;
-	uint64_t mask, val;
-
-	KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector));
-	KASSERT(!vcpu_is_running(vlapic->vm, vlapic->vcpuid, NULL),
-	    ("vmx_set_tmr: vcpu cannot be running"));
-
-	vlapic_vtx = (struct vlapic_vtx *)vlapic;
-	vmx = vlapic_vtx->vmx;
-	vmcs = &vmx->vmcs[vlapic->vcpuid];
-	mask = 1UL << (vector % 64);
-
-	VMPTRLD(vmcs);
-	val = vmcs_read(VMCS_EOI_EXIT(vector));
-	if (level)
-		val |= mask;
-	else
-		val &= ~mask;
-	vmcs_write(VMCS_EOI_EXIT(vector), val);
-	VMCLEAR(vmcs);
+	vmcs_write(VMCS_EOI_EXIT0, ((uint64_t)masks[1] << 32) | masks[0]);
+	vmcs_write(VMCS_EOI_EXIT1, ((uint64_t)masks[3] << 32) | masks[2]);
+	vmcs_write(VMCS_EOI_EXIT2, ((uint64_t)masks[5] << 32) | masks[4]);
+	vmcs_write(VMCS_EOI_EXIT3, ((uint64_t)masks[7] << 32) | masks[6]);
 }
 
 static void
Index: sys/amd64/vmm/io/vioapic.c
===================================================================
--- sys/amd64/vmm/io/vioapic.c
+++ sys/amd64/vmm/io/vioapic.c
@@ -4,6 +4,7 @@
  * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
  * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
  * All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -39,6 +40,7 @@
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
+#include <sys/cpuset.h>
 
 #include <x86/apicreg.h>
 #include <machine/vmm.h>
@@ -223,48 +225,139 @@
 	return (vioapic_set_irqstate(vm, irq, IRQSTATE_PULSE));
 }
 
+#define	REDIR_IS_PHYS(reg)	(((reg) & IOART_DESTMOD) == IOART_DESTPHY)
+#define	REDIR_IS_LOWPRIO(reg)	(((reg) & IOART_DELMOD) == IOART_DELLOPRI)
+/* Level-triggered interrupts only valid in fixed and low-priority modes */
+#define	REDIR_IS_LVLTRIG(reg)						\
+    (((reg) & IOART_TRGRLVL) != 0 &&					\
+    (((reg) & IOART_DELMOD) == IOART_DELFIXED || REDIR_IS_LOWPRIO(reg)))
+#define	REDIR_DEST(reg)		((reg) >> (32 + APIC_ID_SHIFT))
+#define	REDIR_VECTOR(reg)	((reg) & IOART_INTVEC)
+
 /*
- * Reset the vlapic's trigger-mode register to reflect the ioapic pin
- * configuration.
+ * Given a redirection entry, determine which vCPUs would be targeted.
  */
 static void
-vioapic_update_tmr(struct vm *vm, int vcpuid, void *arg)
+vioapic_calcdest(struct vioapic *vioapic, uint64_t redir_ent, cpuset_t *dmask)
 {
-	struct vioapic *vioapic;
-	struct vlapic *vlapic;
-	uint32_t low, high, dest;
-	int delmode, pin, vector;
-	bool level, phys;
 
-	vlapic = vm_lapic(vm, vcpuid);
-	vioapic = vm_ioapic(vm);
+	/*
+	 * When calculating interrupt destinations with vlapic_calcdest(), the
+	 * legacy xAPIC format is assumed, since the system lacks interrupt
+	 * redirection hardware.
+	 * See vlapic_deliver_intr() for more details.
+	 */
+	vlapic_calcdest(vioapic->vm, dmask, REDIR_DEST(redir_ent),
+	    REDIR_IS_PHYS(redir_ent), REDIR_IS_LOWPRIO(redir_ent), false);
+}
+
+/*
+ * Across all redirection entries utilizing a specified vector, determine the
+ * set of vCPUs which would be targeted by a level-triggered interrupt.
+ */
+static void
+vioapic_tmr_active(struct vioapic *vioapic, uint8_t vec, cpuset_t *result)
+{
+	u_int i;
+
+	CPU_ZERO(result);
+	if (vec == 0) {
+		return;
+	}
+
+	for (i = 0; i < REDIR_ENTRIES; i++) {
+		cpuset_t dest;
+		const uint64_t val = vioapic->rtbl[i].reg;
+
+		if (!REDIR_IS_LVLTRIG(val) || REDIR_VECTOR(val) != vec) {
+			continue;
+		}
+
+		CPU_ZERO(&dest);
+		vioapic_calcdest(vioapic, val, &dest);
+		CPU_OR(result, &dest);
+	}
+}
+
+/*
+ * Update TMR state in vLAPICs after changes to vIOAPIC pin configuration
+ */
+static void
+vioapic_update_tmrs(struct vioapic *vioapic, int vcpuid, uint64_t oldval,
+    uint64_t newval)
+{
+	cpuset_t active, allset, newset, oldset;
+	struct vm *vm;
+	uint8_t newvec, oldvec;
+
+	vm = vioapic->vm;
+	CPU_ZERO(&allset);
+	CPU_ZERO(&newset);
+	CPU_ZERO(&oldset);
+	newvec = oldvec = 0;
+
+	if (REDIR_IS_LVLTRIG(oldval)) {
+		vioapic_calcdest(vioapic, oldval, &oldset);
+		CPU_OR(&allset, &oldset);
+		oldvec = REDIR_VECTOR(oldval);
+	}
+
+	if (REDIR_IS_LVLTRIG(newval)) {
+		vioapic_calcdest(vioapic, newval, &newset);
+		CPU_OR(&allset, &newset);
+		newvec = REDIR_VECTOR(newval);
+	}
+
+	if (CPU_EMPTY(&allset) ||
+	    (CPU_CMP(&oldset, &newset) == 0 && oldvec == newvec)) {
+		return;
+	}
 
-	VIOAPIC_LOCK(vioapic);
 	/*
-	 * Reset all vectors to be edge-triggered.
+	 * Since the write to the redirection table has already occurred, a
+	 * scan of level-triggered entries referencing the old vector will find
+	 * only entries which are now currently valid.
 	 */
-	vlapic_reset_tmr(vlapic);
-	for (pin = 0; pin < REDIR_ENTRIES; pin++) {
-		low = vioapic->rtbl[pin].reg;
-		high = vioapic->rtbl[pin].reg >> 32;
+	vioapic_tmr_active(vioapic, oldvec, &active);
 
-		level = low & IOART_TRGRLVL ? true : false;
-		if (!level)
+	while (!CPU_EMPTY(&allset)) {
+		struct vlapic *vlapic;
+		u_int i;
+
+		i = CPU_FFS(&allset) - 1;
+		CPU_CLR(i, &allset);
+
+		if (oldvec == newvec &&
+		    CPU_ISSET(i, &oldset) && CPU_ISSET(i, &newset)) {
 			continue;
+		}
 
-		/*
-		 * For a level-triggered 'pin' let the vlapic figure out if
-		 * an assertion on this 'pin' would result in an interrupt
-		 * being delivered to it. If yes, then it will modify the
-		 * TMR bit associated with this vector to level-triggered.
-		 */
-		phys = ((low & IOART_DESTMOD) == IOART_DESTPHY);
-		delmode = low & IOART_DELMOD;
-		vector = low & IOART_INTVEC;
-		dest = high >> APIC_ID_SHIFT;
-		vlapic_set_tmr_level(vlapic, dest, phys, delmode, vector);
+		if (i != vcpuid) {
+			vcpu_block_run(vm, i);
+		}
+
+		vlapic = vm_lapic(vm, i);
+		if (CPU_ISSET(i, &oldset)) {
+			/*
+			 * Perform the deassertion if no other level-triggered
+			 * IOAPIC entries target this vCPU with the old vector
+			 *
+			 * Note: Sharing of vectors like that should be
+			 * extremely rare in modern operating systems and was
+			 * previously unsupported by the bhyve vIOAPIC.
+			 */
+			if (!CPU_ISSET(i, &active)) {
+				vlapic_tmr_set(vlapic, oldvec, false);
+			}
+		}
+		if (CPU_ISSET(i, &newset)) {
+			vlapic_tmr_set(vlapic, newvec, true);
+		}
+
+		if (i != vcpuid) {
+			vcpu_unblock_run(vm, i);
+		}
 	}
-	VIOAPIC_UNLOCK(vioapic);
 }
 
 static uint32_t
@@ -308,7 +401,6 @@
 	uint64_t data64, mask64;
 	uint64_t last, changed;
 	int regnum, pin, lshift;
-	cpuset_t allvcpus;
 
 	regnum = addr & 0xff;
 	switch (regnum) {
@@ -344,18 +436,15 @@
 
 		/*
 		 * If any fields in the redirection table entry (except mask
-		 * or polarity) have changed then rendezvous all the vcpus
-		 * to update their vlapic trigger-mode registers.
+		 * or polarity) have changed then update the trigger-mode
+		 * registers on all the vlapics.
 		 */
 		changed = last ^ vioapic->rtbl[pin].reg;
 		if (changed & ~(IOART_INTMASK | IOART_INTPOL)) {
 			VIOAPIC_CTR1(vioapic, "ioapic pin%d: recalculate "
 			    "vlapic trigger-mode register", pin);
-			VIOAPIC_UNLOCK(vioapic);
-			allvcpus = vm_active_cpus(vioapic->vm);
-			vm_smp_rendezvous(vioapic->vm, vcpuid, allvcpus,
-			    vioapic_update_tmr, NULL);
-			VIOAPIC_LOCK(vioapic);
+			vioapic_update_tmrs(vioapic, vcpuid, last,
+			    vioapic->rtbl[pin].reg);
 		}
 
 		/*
Index: sys/amd64/vmm/io/vlapic.h
===================================================================
--- sys/amd64/vmm/io/vlapic.h
+++ sys/amd64/vmm/io/vlapic.h
@@ -83,16 +83,11 @@
 void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
     int delmode, int vec);
 
-/* Reset the trigger-mode bits for all vectors to be edge-triggered */
-void vlapic_reset_tmr(struct vlapic *vlapic);
+void vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
+    bool lowprio, bool x2apic_dest);
 
-/*
- * Set the trigger-mode bit associated with 'vector' to level-triggered if
- * the (dest,phys,delmode) tuple resolves to an interrupt being delivered to
- * this 'vlapic'.
- */
-void vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
-    int delmode, int vector);
+void vlapic_tmr_update(struct vlapic *vlapic);
+void vlapic_tmr_set(struct vlapic *vlapic, uint8_t vector, bool active);
 
 void vlapic_set_cr8(struct vlapic *vlapic, uint64_t val);
 uint64_t vlapic_get_cr8(struct vlapic *vlapic);
Index: sys/amd64/vmm/io/vlapic.c
===================================================================
--- sys/amd64/vmm/io/vlapic.c
+++ sys/amd64/vmm/io/vlapic.c
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -78,6 +79,8 @@
  */
 #define VLAPIC_BUS_FREQ		(128 * 1024 * 1024)
 
+static void vlapic_tmr_reset(struct vlapic *);
+
 static __inline uint32_t
 vlapic_get_id(struct vlapic *vlapic)
 {
@@ -809,11 +812,11 @@
 /*
  * This function populates 'dmask' with the set of vcpus that match the
  * addressing specified by the (dest, phys, lowprio) tuple.
- * 
+ *
  * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
  * or xAPIC (8-bit) destination field.
  */
-static void
+void
 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
     bool lowprio, bool x2apic_dest)
 {
@@ -1432,7 +1435,7 @@
 	lapic->dfr = 0xffffffff;
 	lapic->svr = APIC_SVR_VECTOR;
 	vlapic_mask_lvts(vlapic);
-	vlapic_reset_tmr(vlapic);
+	vlapic_tmr_reset(vlapic);
 
 	lapic->dcr_timer = 0;
 	vlapic_dcr_write_handler(vlapic);
@@ -1600,60 +1603,77 @@
 }
 
 static void
-vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level)
+vlapic_tmr_reset(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic;
-	uint32_t *tmrptr, mask;
-	int idx;
 
 	lapic = vlapic->apic_page;
-	tmrptr = &lapic->tmr0;
-	idx = (vector / 32) * 4;
-	mask = 1 << (vector % 32);
-	if (level)
-		tmrptr[idx] |= mask;
-	else
-		tmrptr[idx] &= ~mask;
-
-	if (vlapic->ops.set_tmr != NULL)
-		(*vlapic->ops.set_tmr)(vlapic, vector, level);
+	lapic->tmr0 = lapic->tmr1 = lapic->tmr2 = lapic->tmr3 = 0;
+	lapic->tmr4 = lapic->tmr5 = lapic->tmr6 = lapic->tmr7 = 0;
+	vlapic->tmr_pending = 1;
 }
 
+/*
+ * Synchronize TMR designations into the LAPIC state.
+ * The vCPU must be in the VCPU_RUNNING state.
+ */
 void
-vlapic_reset_tmr(struct vlapic *vlapic)
+vlapic_tmr_update(struct vlapic *vlapic)
 {
-	int vector;
+	struct LAPIC *lapic;
+	uint32_t *tmrptr;
+	uint32_t result[VLAPIC_TMR_CNT];
+	u_int i, tmr_idx;
 
-	VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered");
+	if (vlapic->tmr_pending == 0) {
+		return;
+	}
+
+	lapic = vlapic->apic_page;
+	tmrptr = &lapic->tmr0;
 
-	for (vector = 0; vector <= 255; vector++)
-		vlapic_set_tmr(vlapic, vector, false);
+	VLAPIC_CTR0(vlapic, "synchronizing TMR");
+	for (i = 0; i < VLAPIC_TMR_CNT; i++) {
+		tmr_idx = i * 4;
+
+		tmrptr[tmr_idx] &= ~vlapic->tmr_vec_deassert[i];
+		tmrptr[tmr_idx] |= vlapic->tmr_vec_assert[i];
+		vlapic->tmr_vec_deassert[i] = 0;
+		vlapic->tmr_vec_assert[i] = 0;
+		result[i] = tmrptr[tmr_idx];
+	}
+	vlapic->tmr_pending = 0;
+
+	if (vlapic->ops.set_tmr != NULL) {
+		(*vlapic->ops.set_tmr)(vlapic, result);
+	}
 }
 
+/*
+ * Designate the TMR state for a given interrupt vector.
+ * The caller must hold the vIOAPIC lock and prevent the vCPU corresponding to
+ * this vLAPIC instance from being-in or entering the VCPU_RUNNING state.
+ */
 void
-vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
-    int delmode, int vector)
+vlapic_tmr_set(struct vlapic *vlapic, uint8_t vector, bool active)
 {
-	cpuset_t dmask;
-	bool lowprio;
-
-	KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector));
+	const uint32_t idx = vector / 32;
+	const uint32_t mask = 1 << (vector % 32);
+
+	VLAPIC_CTR2(vlapic, "TMR for vector %u %sasserted", vector,
+	    active ? "" : "de");
+	if (active) {
+		vlapic->tmr_vec_assert[idx] |= mask;
+		vlapic->tmr_vec_deassert[idx] &= ~mask;
+	} else {
+		vlapic->tmr_vec_deassert[idx] |= mask;
+		vlapic->tmr_vec_assert[idx] &= ~mask;
+	}
 
 	/*
-	 * A level trigger is valid only for fixed and lowprio delivery modes.
+	 * Track the number of TMR changes between calls to vlapic_tmr_update.
+	 * While a simple boolean would suffice, this count may be useful when
+	 * tracing or debugging, and is cheap to calculate.
 	 */
-	if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) {
-		VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for "
-		    "delivery-mode %d", delmode);
-		return;
-	}
-
-	lowprio = (delmode == APIC_DELMODE_LOWPRIO);
-	vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false);
-
-	if (!CPU_ISSET(vlapic->vcpuid, &dmask))
-		return;
-
-	VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector);
-	vlapic_set_tmr(vlapic, vector, true);
+	vlapic->tmr_pending = MIN(UINT32_MAX - 1, vlapic->tmr_pending) + 1;
 }
Index: sys/amd64/vmm/io/vlapic_priv.h
===================================================================
--- sys/amd64/vmm/io/vlapic_priv.h
+++ sys/amd64/vmm/io/vlapic_priv.h
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
  * All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -138,6 +139,8 @@
 
 #define VLAPIC_MAXLVT_INDEX	APIC_LVT_CMCI
 
+#define VLAPIC_TMR_CNT		8
+
 struct vlapic;
 
 struct vlapic_ops {
@@ -145,7 +148,7 @@
 	int (*pending_intr)(struct vlapic *vlapic, int *vecptr);
 	void (*intr_accepted)(struct vlapic *vlapic, int vector);
 	void (*post_intr)(struct vlapic *vlapic, int hostcpu);
-	void (*set_tmr)(struct vlapic *vlapic, int vector, bool level);
+	void (*set_tmr)(struct vlapic *vlapic, const uint32_t *result);
 	void (*enable_x2apic_mode)(struct vlapic *vlapic);
 };
 
@@ -157,6 +160,7 @@
 
 	uint32_t		esr_pending;
 	int			esr_firing;
+	uint32_t		tmr_pending;
 
 	struct callout	callout;	/* vlapic timer */
 	struct bintime	timer_fire_bt;	/* callout expiry time */
@@ -184,6 +188,19 @@
 	 */
 	uint32_t	svr_last;
 	uint32_t	lvt_last[VLAPIC_MAXLVT_INDEX + 1];
+
+	/*
+	 * Store intended modifications to the trigger-mode register state.
+	 * Along with the tmr_pending counter above, these are protected by the
+	 * vIOAPIC lock and can only be modified under specific conditions:
+	 *
+	 * 1. When holding the vIOAPIC lock, and the vCPU to which the vLAPIC
+	 *    belongs is prevented from entering the VCPU_RUNNING state.
+	 * 2. When the owning vCPU is in the VCPU_RUNNING state, and is
+	 *    applying the TMR modifications prior to interrupt injection.
+	 */
+	uint32_t	tmr_vec_deassert[VLAPIC_TMR_CNT];
+	uint32_t	tmr_vec_assert[VLAPIC_TMR_CNT];
 };
 
 void vlapic_init(struct vlapic *vlapic);
Index: sys/amd64/vmm/vmm.c
===================================================================
--- sys/amd64/vmm/vmm.c
+++ sys/amd64/vmm/vmm.c
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
+ * Copyright (c) 2019 Joyent, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -96,6 +97,7 @@
 	struct mtx 	mtx;		/* (o) protects 'state' and 'hostcpu' */
 	enum vcpu_state	state;		/* (o) vcpu state */
 	int		hostcpu;	/* (o) vcpu's host cpu */
+	u_int		runblock;	/* (i) block vcpu from run state */
 	int		reqidle;	/* (i) request vcpu to idle */
 	struct vlapic	*vlapic;	/* (i) APIC device model */
 	enum x2apic_state x2apic_state;	/* (i) APIC mode */
@@ -156,11 +158,6 @@
 	int		suspend;		/* (i) stop VM execution */
 	volatile cpuset_t suspended_cpus; 	/* (i) suspended vcpus */
 	volatile cpuset_t halted_cpus;		/* (x) cpus in a hard halt */
-	cpuset_t	rendezvous_req_cpus;	/* (x) rendezvous requested */
-	cpuset_t	rendezvous_done_cpus;	/* (x) rendezvous finished */
-	void		*rendezvous_arg;	/* (x) rendezvous func/arg */
-	vm_rendezvous_func_t rendezvous_func;
-	struct mtx	rendezvous_mtx;		/* (o) rendezvous lock */
 	struct mem_map	mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
 	struct mem_seg	mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
 	struct vmspace	*vmspace;		/* (o) guest's address space */
@@ -293,6 +290,7 @@
 
 	vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
 	vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
+	vcpu->runblock = 0;
 	vcpu->reqidle = 0;
 	vcpu->exitintinfo = 0;
 	vcpu->nmi_pending = 0;
@@ -461,7 +459,6 @@
 	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
 	strcpy(vm->name, name);
 	vm->vmspace = vmspace;
-	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
 
 	vm->sockets = 1;
 	vm->cores = cores_per_package;	/* XXX backwards compatibility */
@@ -1201,6 +1198,12 @@
 		break;
 	}
 
+	if (newstate == VCPU_RUNNING) {
+		while (vcpu->runblock != 0) {
+			msleep_spin(&vcpu->state, &vcpu->mtx, "vcpublk", 0);
+		}
+	}
+
 	if (error)
 		return (EBUSY);
 
@@ -1213,8 +1216,10 @@
 	else
 		vcpu->hostcpu = NOCPU;
 
-	if (newstate == VCPU_IDLE)
+	if (newstate == VCPU_IDLE ||
+	    (newstate == VCPU_FROZEN && vcpu->runblock != 0)) {
 		wakeup(&vcpu->state);
+	}
 
 	return (0);
 }
@@ -1237,63 +1242,6 @@
 		panic("Error %d setting state to %d", error, newstate);
 }
 
-static void
-vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func)
-{
-
-	KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked"));
-
-	/*
-	 * Update 'rendezvous_func' and execute a write memory barrier to
-	 * ensure that it is visible across all host cpus. This is not needed
-	 * for correctness but it does ensure that all the vcpus will notice
-	 * that the rendezvous is requested immediately.
-	 */
-	vm->rendezvous_func = func;
-	wmb();
-}
-
-#define	RENDEZVOUS_CTR0(vm, vcpuid, fmt)				\
-	do {								\
-		if (vcpuid >= 0)					\
-			VCPU_CTR0(vm, vcpuid, fmt);			\
-		else							\
-			VM_CTR0(vm, fmt);				\
-	} while (0)
-
-static void
-vm_handle_rendezvous(struct vm *vm, int vcpuid)
-{
-
-	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < vm->maxcpus),
-	    ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid));
-
-	mtx_lock(&vm->rendezvous_mtx);
-	while (vm->rendezvous_func != NULL) {
-		/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
-		CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus);
-
-		if (vcpuid != -1 &&
-		    CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
-		    !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
-			VCPU_CTR0(vm, vcpuid, "Calling rendezvous func");
-			(*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg);
-			CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
-		}
-		if (CPU_CMP(&vm->rendezvous_req_cpus,
-		    &vm->rendezvous_done_cpus) == 0) {
-			VCPU_CTR0(vm, vcpuid, "Rendezvous completed");
-			vm_set_rendezvous_func(vm, NULL);
-			wakeup(&vm->rendezvous_func);
-			break;
-		}
-		RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion");
-		mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
-		    "vmrndv", 0);
-	}
-	mtx_unlock(&vm->rendezvous_mtx);
-}
-
 /*
  * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
  */
@@ -1321,7 +1269,7 @@
 		 * vcpu returned from VMRUN() and before it acquired the
 		 * vcpu lock above.
 		 */
-		if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle)
+		if (vm->suspend || vcpu->reqidle)
 			break;
 		if (vm_nmi_pending(vm, vcpuid))
 			break;
@@ -1515,10 +1463,6 @@
 
 	/*
 	 * Wait until all 'active_cpus' have suspended themselves.
-	 *
-	 * Since a VM may be suspended at any time including when one or
-	 * more vcpus are doing a rendezvous we need to call the rendezvous
-	 * handler while we are waiting to prevent a deadlock.
 	 */
 	vcpu_lock(vcpu);
 	while (1) {
@@ -1527,17 +1471,10 @@
 			break;
 		}
 
-		if (vm->rendezvous_func == NULL) {
-			VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
-			vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
-			msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
-			vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
-		} else {
-			VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend");
-			vcpu_unlock(vcpu);
-			vm_handle_rendezvous(vm, vcpuid);
-			vcpu_lock(vcpu);
-		}
+		VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
+		vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
+		msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
+		vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
 	}
 	vcpu_unlock(vcpu);
 
@@ -1621,17 +1558,15 @@
 }
 
 void
-vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip)
+vm_exit_runblock(struct vm *vm, int vcpuid, uint64_t rip)
 {
 	struct vm_exit *vmexit;
 
-	KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress"));
-
 	vmexit = vm_exitinfo(vm, vcpuid);
 	vmexit->rip = rip;
 	vmexit->inst_length = 0;
-	vmexit->exitcode = VM_EXITCODE_RENDEZVOUS;
-	vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1);
+	vmexit->exitcode = VM_EXITCODE_RUNBLOCK;
+	vmm_stat_incr(vm, vcpuid, VMEXIT_RUNBLOCK, 1);
 }
 
 void
@@ -1684,7 +1619,7 @@
 	pmap = vmspace_pmap(vm->vmspace);
 	vcpu = &vm->vcpu[vcpuid];
 	vme = &vcpu->exitinfo;
-	evinfo.rptr = &vm->rendezvous_func;
+	evinfo.rptr = &vcpu->runblock;
 	evinfo.sptr = &vm->suspend;
 	evinfo.iptr = &vcpu->reqidle;
 restart:
@@ -1724,9 +1659,7 @@
 			vioapic_process_eoi(vm, vcpuid,
 			    vme->u.ioapic_eoi.vector);
 			break;
-		case VM_EXITCODE_RENDEZVOUS:
-			vm_handle_rendezvous(vm, vcpuid);
-			error = 0;
+		case VM_EXITCODE_RUNBLOCK:
 			break;
 		case VM_EXITCODE_HLT:
 			intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
@@ -2321,6 +2254,46 @@
 	return (state);
 }
 
+void
+vcpu_block_run(struct vm *vm, int vcpuid)
+{
+	struct vcpu *vcpu;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		panic("vcpu_block_run: invalid vcpuid %d", vcpuid);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	vcpu_lock(vcpu);
+	vcpu->runblock++;
+	if (vcpu->runblock == 1 && vcpu->state == VCPU_RUNNING) {
+		vcpu_notify_event_locked(vcpu, false);
+	}
+	while (vcpu->state == VCPU_RUNNING) {
+		msleep_spin(&vcpu->state, &vcpu->mtx, "vcpublk", 0);
+	}
+	vcpu_unlock(vcpu);
+}
+
+void
+vcpu_unblock_run(struct vm *vm, int vcpuid)
+{
+	struct vcpu *vcpu;
+
+	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+		panic("vcpu_block_run: invalid vcpuid %d", vcpuid);
+
+	vcpu = &vm->vcpu[vcpuid];
+
+	vcpu_lock(vcpu);
+	KASSERT(vcpu->runblock != 0, ("expected non-zero runblock"));
+	vcpu->runblock--;
+	if (vcpu->runblock == 0) {
+		wakeup(&vcpu->state);
+	}
+	vcpu_unlock(vcpu);
+}
+
 int
 vm_activate_cpu(struct vm *vm, int vcpuid)
 {
@@ -2504,54 +2477,6 @@
 	return (apicid);
 }
 
-void
-vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
-    vm_rendezvous_func_t func, void *arg)
-{
-	int i;
-
-	/*
-	 * Enforce that this function is called without any locks
-	 */
-	WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous");
-	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < vm->maxcpus),
-	    ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid));
-
-restart:
-	mtx_lock(&vm->rendezvous_mtx);
-	if (vm->rendezvous_func != NULL) {
-		/*
-		 * If a rendezvous is already in progress then we need to
-		 * call the rendezvous handler in case this 'vcpuid' is one
-		 * of the targets of the rendezvous.
-		 */
-		RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress");
-		mtx_unlock(&vm->rendezvous_mtx);
-		vm_handle_rendezvous(vm, vcpuid);
-		goto restart;
-	}
-	KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous "
-	    "rendezvous is still in progress"));
-
-	RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous");
-	vm->rendezvous_req_cpus = dest;
-	CPU_ZERO(&vm->rendezvous_done_cpus);
-	vm->rendezvous_arg = arg;
-	vm_set_rendezvous_func(vm, func);
-	mtx_unlock(&vm->rendezvous_mtx);
-
-	/*
-	 * Wake up any sleeping vcpus and trigger a VM-exit in any running
-	 * vcpus so they handle the rendezvous as soon as possible.
-	 */
-	for (i = 0; i < vm->maxcpus; i++) {
-		if (CPU_ISSET(i, &dest))
-			vcpu_notify_event(vm, i, false);
-	}
-
-	vm_handle_rendezvous(vm, vcpuid);
-}
-
 struct vatpic *
 vm_atpic(struct vm *vm)
 {
Index: sys/amd64/vmm/vmm_stat.h
===================================================================
--- sys/amd64/vmm/vmm_stat.h
+++ sys/amd64/vmm/vmm_stat.h
@@ -157,7 +157,7 @@
 VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
 VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
 VMM_STAT_DECLARE(VMEXIT_USERSPACE);
-VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS);
+VMM_STAT_DECLARE(VMEXIT_RUNBLOCK);
 VMM_STAT_DECLARE(VMEXIT_EXCEPTION);
 VMM_STAT_DECLARE(VMEXIT_REQIDLE);
 #endif
Index: sys/amd64/vmm/vmm_stat.c
===================================================================
--- sys/amd64/vmm/vmm_stat.c
+++ sys/amd64/vmm/vmm_stat.c
@@ -168,5 +168,5 @@
 VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
 VMM_STAT(VMEXIT_REQIDLE, "number of times idle requested at exit");
 VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace");
-VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit");
+VMM_STAT(VMEXIT_RUNBLOCK, "number of times runblock at exit");
 VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions");