diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -485,6 +485,9 @@
 	VM_CAP_BPT_EXIT,
 	VM_CAP_RDPID,
 	VM_CAP_RDTSCP,
+	VM_CAP_DB_EXIT,
+	VM_CAP_RFLAGS_SSTEP,
+	VM_CAP_DR_MOV_EXIT,
 	VM_CAP_MAX
 };
 
@@ -632,6 +635,7 @@
 	VM_EXITCODE_DEBUG,
 	VM_EXITCODE_VMINSN,
 	VM_EXITCODE_BPT,
+	VM_EXITCODE_DB,
 	VM_EXITCODE_MAX
 };
 
@@ -721,6 +725,15 @@
 		struct {
 			int		inst_length;
 		} bpt;
+		struct {
+			int trace_trap;
+			int drx_access;
+			int gpr;
+			int watchpoints; /* bitmask */
+			int pushf_intercept;
+			int tf_shadow_val;
+			struct vm_guest_paging paging;
+		} dbg;
 		struct {
 			uint32_t	code;		/* ecx value */
 			uint64_t	wval;
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -134,6 +134,7 @@
 
 static int svm_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc);
 static int svm_setreg(void *arg, int vcpu, int ident, uint64_t val);
+static int svm_getreg(void *arg, int vcpu, int ident, uint64_t *val);
 
 static __inline int
 flush_by_asid(void)
@@ -1319,6 +1320,99 @@
 	}
 }
 
+static __inline int
+mov_dr_gpr_num_to_reg(int gpr)
+{
+	switch (gpr) {
+	case 0 ... 3:
+		return VM_REG_GUEST_RAX + gpr;
+	case 4:
+		return VM_REG_GUEST_RDI;
+	case 5:
+		return VM_REG_GUEST_RSI;
+	case 6:
+		return VM_REG_GUEST_RBP;
+	case 7:
+		return VM_REG_GUEST_RSP;
+	case 8 ... 15:
+		return VM_REG_GUEST_R8 + (gpr - 8);
+	default:
+		break;
+	};
+
+	return -1;
+}
+
+static int
+emulate_mov_dr(struct svm_softc *svm_sc, struct vm_exit *vmexit, int vcpu,
+    uint64_t code, uint64_t info1)
+{
+	int write, error;
+	int src, dst;
+	int dbreg_num, dbreg;
+	int gpr = mov_dr_gpr_num_to_reg(VMCB_DR_INTCTP_GPR_NUM(info1));
+	uint64_t new_dst_val;
+
+	KASSERT(gpr >= 0, ("%s: invalid GPR num %d\r\n", __func__, gpr));
+
+	if (code >= 0x20 && code <= 0x27) {
+		dbreg_num = code - 0x20;
+		write = 0;
+	} else if (code >= 0x30 && code <= 0x37) {
+		dbreg_num = code - 0x30;
+		write = 1;
+	} else {
+		// should not happen
+		return -1;
+	}
+
+	/*
+	 * Bounce exit to userland - allow the
+	 * gdb stub to adjust its watchpoint metadata
+	 */
+	vmexit->exitcode = VM_EXITCODE_DB;
+	vmexit->u.dbg.trace_trap = 0;
+	vmexit->u.dbg.pushf_intercept = 0;
+	vmexit->u.dbg.drx_access = dbreg_num;
+	vmexit->u.dbg.gpr = -1;
+
+	/*
+	 * Emulate MOV DR.
+	 * No checks are needed since all other
+	 * exceptions take precedence over the intercept.
+	 * (AMD APM v2, page 498)
+	 */
+	if (dbreg_num == 7) {
+		dbreg = VM_REG_GUEST_DR7;
+	} else {
+		dbreg = VM_REG_GUEST_DR0 + dbreg_num;
+	}
+
+	if (write) {
+		src = gpr;
+		dst = dbreg;
+	} else {
+		vmexit->u.dbg.gpr = gpr;
+
+		src = dbreg;
+		dst = gpr;
+	}
+
+	error = svm_getreg(svm_sc, vcpu, src, &new_dst_val);
+	KASSERT(error == 0,
+	    ("%s: error %d fetching reg %d\r\n", __func__, error, src));
+
+	if (write && dbreg_num == 7) {
+		vmexit->u.dbg.watchpoints = (int)new_dst_val;
+	}
+
+	error = svm_setreg(svm_sc, vcpu, dst, new_dst_val);
+	KASSERT(error == 0,
+	    ("%s: error %d updating reg %d\r\n", __func__, error, dst));
+
+	return error;
+}
+
 static int
 svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
 {
@@ -1387,9 +1481,19 @@
 	case VMCB_EXIT_NMI:	/* external NMI */
 		handled = 1;
 		break;
+	case 0x20 ... 0x23: /* DR{0-3,7} read */
+	case 0x27:
+	case 0x30 ... 0x33: /* DR{0-3,7} write */
+	case 0x37:
+		error = emulate_mov_dr(svm_sc, vmexit, vcpu, code, info1);
+		KASSERT(error == 0,
+		    ("%s: error %d emulating MOV DR", __func__, error));
+		handled = 0;
+		break;
 	case 0x40 ... 0x5F:
 		vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXCEPTION, 1);
 		reflect = 1;
+		handled = 1;
 		idtvec = code - 0x40;
 		switch (idtvec) {
 		case IDT_MC:
@@ -1400,6 +1504,7 @@
 			reflect = 0;
 			VCPU_CTR0(svm_sc->vm, vcpu, "Vectoring to MCE handler");
 			__asm __volatile("int $18");
+			handled = 1;
 			break;
 		case IDT_PF:
 			error = svm_setreg(svm_sc, vcpu, VM_REG_GUEST_CR2,
@@ -1420,7 +1525,105 @@
 			info1 = 0;
 			break;
 
+		case IDT_DB: {
+			/*
+			 * Check if we are being stepped (RFLAGS.TF)
+			 * or if a gdb-related watchpoint has been triggered
+			 * and bounce vmexit to userland.
+			 */
+
+			struct svm_vcpu *s_vcpu = svm_get_vcpu(svm_sc, vcpu);
+			uint64_t dr6 = 0;
+			bool stepped = 0;
+			uint64_t watch_mask = 0;
+
+			errcode_valid = 0;
+			info1 = 0;
+
+			vmcb_read(svm_sc, vcpu, VM_REG_GUEST_DR6, &dr6);
+			stepped = !!(dr6 & DBREG_DR6_BS);
+			watch_mask = (dr6 & DBREG_DR6_BMASK);
+
+			if (stepped &&
+			    (s_vcpu->caps & (1 << VM_CAP_RFLAGS_SSTEP))) {
+				vmexit->exitcode = VM_EXITCODE_DB;
+				vmexit->u.dbg.trace_trap = 1;
+				vmexit->u.dbg.pushf_intercept = 0;
+				vmexit->u.dbg.drx_access = -1;
+				vmexit->u.dbg.gpr = -1;
+				vmexit->u.dbg.watchpoints = 0;
+
+				if (s_vcpu->db_info.popf_next) {
+					/* DB exit was caused by stepping over
+					 * popf */
+					uint64_t rflags;
+
+					s_vcpu->db_info.popf_next = 0;
+					/*
+					 * Update shadowed TF bit so the next
+					 * setcap(..., RFLAGS_SSTEP, 0) restores
+					 * the correct value
+					 */
+					vmcb_read(svm_sc, vcpu,
+					    VM_REG_GUEST_RFLAGS, &rflags);
+					s_vcpu->db_info.shadow_rflags_tf =
+					    rflags & PSL_T;
+				} else if (s_vcpu->db_info.pushf_next) {
+					/* DB exit was caused by stepping over
+					 * pushf */
+
+					/*
+					 * Adjusting the pushed rflags after a
+					 * restarted pushf instruction must be
+					 * handled outside of svm.c due to the
+					 * critical_enter() lock being held.
+					 */
+					vmexit->u.dbg.pushf_intercept = 1;
+					vmexit->u.dbg.tf_shadow_val =
+					    s_vcpu->db_info.shadow_rflags_tf;
+					svm_paging_info(
+					    svm_get_vmcb(svm_sc, vcpu),
+					    &vmexit->u.dbg.paging);
+
+					s_vcpu->db_info.pushf_next = 0;
+				}
+				reflect = 0;
+				handled = 0;
+			} else if ((watch_mask != 0) &&
+			    (s_vcpu->caps & (1 << VM_CAP_DB_EXIT))) {
+				/* A hw watchpoint was triggered - bounce to
+				 * userland */
+
+				vmexit->exitcode = VM_EXITCODE_DB;
+				vmexit->u.dbg.trace_trap = 0;
+				vmexit->u.dbg.pushf_intercept = 0;
+				vmexit->u.dbg.drx_access = -1;
+				vmexit->u.dbg.gpr = -1;
+				vmexit->u.dbg.watchpoints = (int)watch_mask;
+
+				dr6 &= ~DBREG_DR6_BS;
+				error = vmcb_write(
+				    svm_sc, vcpu, VM_REG_GUEST_DR6, dr6);
+				KASSERT(error == 0,
+				    ("%s: error %d updating DR6\r\n", __func__,
+					error));
+
+				reflect = 0;
+				handled = 0;
+			}
+			break;
+		}
 		case IDT_BP:
+			if (svm_get_intercept(svm_sc, vcpu, VMCB_EXC_INTCPT,
+				BIT(IDT_BP)) == 1) {
+				vmexit->exitcode = VM_EXITCODE_BPT;
+				vmexit->u.bpt.inst_length = vmexit->inst_length;
+				vmexit->inst_length = 0;
+
+				reflect = 0;
+				handled = 0;
+				break;
+			}
 		case IDT_OF:
 		case IDT_BR:
 			/*
@@ -1442,11 +1645,13 @@
 			info1 = 0;
 			break;
 		}
-		KASSERT(vmexit->inst_length == 0, ("invalid inst_length (%d) "
-		    "when reflecting exception %d into guest",
-		    vmexit->inst_length, idtvec));
 
 		if (reflect) {
+			KASSERT(vmexit->inst_length == 0,
+			    ("invalid inst_length (%d) "
+			     "when reflecting exception %d into guest",
+				vmexit->inst_length, idtvec));
+
 			/* Reflect the exception back into the guest */
 			VCPU_CTR2(svm_sc->vm, vcpu, "Reflecting exception "
 			    "%d/%#x into the guest", idtvec, (int)info1);
@@ -1455,13 +1660,12 @@
 			KASSERT(error == 0, ("%s: vm_inject_exception error %d",
 			    __func__, error));
 		}
-		handled = 1;
 		break;
 	case VMCB_EXIT_MSR:	/* MSR access. */
 		eax = state->rax;
 		ecx = ctx->sctx_rcx;
 		edx = ctx->sctx_rdx;
-		retu = false;	
+		retu = false;
 
 		if (info1) {
 			vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1);
@@ -1538,6 +1742,42 @@
 	case VMCB_EXIT_MWAIT:
 		vmexit->exitcode = VM_EXITCODE_MWAIT;
 		break;
+	case VMCB_EXIT_PUSHF: {
+		uint64_t rflags;
+		struct svm_vcpu *s_vcpu = svm_get_vcpu(svm_sc, vcpu);
+		svm_getreg(svm_sc, vcpu, VM_REG_GUEST_RFLAGS, &rflags);
+		/* Update shadow TF to guard against unrelated intercepts */
+		s_vcpu->db_info.shadow_rflags_tf = rflags & PSL_T;
+
+		/* Restart this instruction */
+		vmexit->rip -= vmexit->inst_length;
+		/* Disable PUSHF intercepts - avoid a loop*/
+		svm_set_intercept(
+		    svm_sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_PUSHF, 0);
+		/* Trace restarted instruction */
+		vmcb_write(svm_sc, vcpu, VM_REG_GUEST_RFLAGS, (rflags | PSL_T));
+
+		s_vcpu->db_info.pushf_next = 1;
+		handled = 1;
+		break;
+	}
+	case VMCB_EXIT_POPF: {
+		uint64_t rflags;
+		svm_getreg(svm_sc, vcpu, VM_REG_GUEST_RFLAGS, &rflags);
+
+		/* Restart this instruction */
+		vmexit->rip -= vmexit->inst_length;
+		/* Disable POPF intercepts - avoid a loop*/
+		svm_set_intercept(
+		    svm_sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_POPF, 0);
+		/* Trace restarted instruction */
+		vmcb_write(svm_sc, vcpu, VM_REG_GUEST_RFLAGS, (rflags | PSL_T));
+
+		svm_get_vcpu(svm_sc, vcpu)->db_info.popf_next = 1;
+
+		handled = 1;
+		break;
+	}
 	case VMCB_EXIT_SHUTDOWN:
 	case VMCB_EXIT_VMRUN:
 	case VMCB_EXIT_VMMCALL:
@@ -2325,6 +2565,114 @@
 		if (val == 0)
 			error = EINVAL;
 		break;
+	case VM_CAP_BPT_EXIT:
+		svm_set_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(IDT_BP), val);
+		break;
+	case VM_CAP_RFLAGS_SSTEP: {
+		uint64_t rflags;
+		int db_inctpt_val = val;
+		struct svm_vcpu *s_vcpu;
+		if (svm_getreg(sc, vcpu, VM_REG_GUEST_RFLAGS, &rflags)) {
+			error = (EINVAL);
+			break;
+		}
+
+		s_vcpu = svm_get_vcpu(sc, vcpu);
+
+		if (val) {
+			/* Save current TF bit */
+			s_vcpu->db_info.shadow_rflags_tf = rflags & PSL_T;
+
+			/* Trace next instruction */
+			if (vmcb_write(sc, vcpu, VM_REG_GUEST_RFLAGS,
+				(rflags | PSL_T))) {
+				error = (EINVAL);
+				break;
+			}
+
+			s_vcpu->caps |= (1 << VM_CAP_RFLAGS_SSTEP);
+		} else {
+			/*
+			 * Restore shadowed RFLAGS.TF only if vCPU was being
+			 * stepped
+			 */
+			if (s_vcpu->caps & (1 << VM_CAP_RFLAGS_SSTEP)) {
+				rflags |= s_vcpu->db_info.shadow_rflags_tf;
+				s_vcpu->db_info.shadow_rflags_tf = 0;
+
+				if (vmcb_write(sc, vcpu, VM_REG_GUEST_RFLAGS,
+					rflags)) {
+					error = (EINVAL);
+					break;
+				}
+				s_vcpu->caps &= ~(1 << VM_CAP_RFLAGS_SSTEP);
+			}
+			/* Dont disable intercept if VM_CAP_DB_EXIT is active */
+			db_inctpt_val = (s_vcpu->caps & (1 << VM_CAP_DB_EXIT));
+		}
+
+		svm_set_intercept(
+		    sc, vcpu, VMCB_EXC_INTCPT, BIT(IDT_DB), db_inctpt_val);
+		svm_set_intercept(
+		    sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_POPF, val);
+		svm_set_intercept(
+		    sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_PUSHF, val);
+
+		break;
+	}
+	case VM_CAP_DB_EXIT: {
+		struct svm_vcpu *s_vcpu = svm_get_vcpu(sc, vcpu);
+		if (val) {
+			/* Require decode assist support for now */
+			if (!decode_assist()) {
+				error = (ENOTSUP);
+				break;
+			}
+			s_vcpu->caps |= (1 << VM_CAP_DB_EXIT);
+		} else {
+			s_vcpu->caps &= ~(1 << VM_CAP_DB_EXIT);
+			/* Dont disable intercept if VM_CAP_RFLAGS_SSTEP is
+			 * active */
+			val = (s_vcpu->caps & (1 << VM_CAP_RFLAGS_SSTEP));
+		}
+
+		svm_set_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(IDT_DB), val);
+
+		break;
+	}
+	case VM_CAP_DR_MOV_EXIT: {
+		struct svm_vcpu *s_vcpu = svm_get_vcpu(sc, vcpu);
+		if (val) {
+			s_vcpu->caps |= (1 << VM_CAP_DR_MOV_EXIT);
+		} else {
+			s_vcpu->caps &= ~(1 << VM_CAP_DR_MOV_EXIT);
+		}
+		/* Intercept DR0-3,7 writes */
+		svm_set_intercept(
+		    sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_WRITE(0), val);
+		svm_set_intercept(
+		    sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_WRITE(1), val);
+		svm_set_intercept(
+		    sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_WRITE(2), val);
+		svm_set_intercept(
+		    sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_WRITE(3), val);
+		svm_set_intercept(
+		    sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_WRITE(7), val);
+
+		/* Intercept DR0-3,7 reads */
+		svm_set_intercept(
+		    sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_READ(0), val);
+		svm_set_intercept(
+		    sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_READ(1), val);
+		svm_set_intercept(
+		    sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_READ(2), val);
+		svm_set_intercept(
+		    sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_READ(3), val);
+		svm_set_intercept(
+		    sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_READ(7), val);
+
+		break;
+	}
 	default:
 		error = ENOENT;
 		break;
@@ -2353,6 +2701,22 @@
 	case VM_CAP_UNRESTRICTED_GUEST:
 		*retval = 1;	/* unrestricted guest is always enabled */
 		break;
+	case VM_CAP_DB_EXIT:
+		*retval = !!(
+		    svm_get_vcpu(sc, vcpu)->caps & (1 << VM_CAP_DB_EXIT));
+		break;
+	case VM_CAP_BPT_EXIT:
+		*retval = svm_get_intercept(
+		    sc, vcpu, VMCB_EXC_INTCPT, BIT(IDT_BP));
+		break;
+	case VM_CAP_RFLAGS_SSTEP:
+		*retval = !!(
+		    svm_get_vcpu(sc, vcpu)->caps & (1 << VM_CAP_RFLAGS_SSTEP));
+		break;
+	case VM_CAP_DR_MOV_EXIT:
+		*retval = !!(
+		    svm_get_vcpu(sc, vcpu)->caps & (1 << VM_CAP_DR_MOV_EXIT));
+		break;
 	default:
 		error = ENOENT;
 		break;
diff --git a/sys/amd64/vmm/amd/svm_softc.h b/sys/amd64/vmm/amd/svm_softc.h
--- a/sys/amd64/vmm/amd/svm_softc.h
+++ b/sys/amd64/vmm/amd/svm_softc.h
@@ -41,6 +41,13 @@
 	uint32_t	num;	/* range is [1, nasid - 1] */
 };
 
+struct svm_vcpu_debug_info {
+	bool popf_next; /* flag for handling single-stepping over popf */
+	bool pushf_next;
+	int shadow_rflags_tf; /* shadowed tf bit value; used for
+				  single-stepping */
+};
+
 /*
  * XXX separate out 'struct vmcb' from 'svm_vcpu' to avoid wasting space
  * due to VMCB alignment requirements.
@@ -50,10 +57,12 @@
 	struct svm_regctx swctx; /* software saved vcpu context */
 	uint64_t	vmcb_pa; /* VMCB physical address */
 	uint64_t	nextrip; /* next instruction to be executed by guest */
-        int		lastcpu; /* host cpu that the vcpu last ran on */
+	int		lastcpu; /* host cpu that the vcpu last ran on */
 	uint32_t	dirty;	 /* state cache bits that must be cleared */
 	long		eptgen;	 /* pmap->pm_eptgen when the vcpu last ran */
 	struct asid	asid;
+	int		caps;	 /* optional vm capabilities */
+	struct svm_vcpu_debug_info db_info;
 } __aligned(PAGE_SIZE);
 
 /*
diff --git a/sys/amd64/vmm/amd/vmcb.h b/sys/amd64/vmm/amd/vmcb.h
--- a/sys/amd64/vmm/amd/vmcb.h
+++ b/sys/amd64/vmm/amd/vmcb.h
@@ -45,6 +45,10 @@
 #define	VMCB_CTRL1_INTCPT	3
 #define	VMCB_CTRL2_INTCPT	4
 
+/* DR intercept helper macros */
+#define VMCB_INTCPT_DR_READ(n) (BIT((n)))
+#define VMCB_INTCPT_DR_WRITE(n) (BIT(((n) + 16)))
+
 /* intercept[VMCB_CTRL1_INTCPT] fields */
 #define	VMCB_INTCPT_INTR		BIT(0)
 #define	VMCB_INTCPT_NMI			BIT(1)
@@ -154,6 +158,12 @@
 #define	VMCB_EXIT_NPF			0x400
 #define	VMCB_EXIT_INVALID		-1
 
+/*
+ * Helper macros to decode MOV DRx EXITINFO1.
+ * Section 15.8.1, MOV CRx/DRx Intercepts.
+ */
+#define VMCB_DR_INTCTP_GPR_NUM(x) ((x)&0xF)
+
 /*
  * Nested page fault.
  * Bit definitions to decode EXITINFO1.
diff --git a/sys/amd64/vmm/intel/vmcs.h b/sys/amd64/vmm/intel/vmcs.h
--- a/sys/amd64/vmm/intel/vmcs.h
+++ b/sys/amd64/vmm/intel/vmcs.h
@@ -104,6 +104,9 @@
 #define	vmcs_gla()			vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)
 #define	vmcs_idt_vectoring_info()	vmcs_read(VMCS_IDT_VECTORING_INFO)
 #define	vmcs_idt_vectoring_err()	vmcs_read(VMCS_IDT_VECTORING_ERROR)
+/* XXX: mask? */
+#define vmcs_pending_dbg_exceptions() \
+	vmcs_read(VMCS_GUEST_PENDING_DBG_EXCEPTIONS)
 
 #endif	/* _KERNEL */
 
@@ -393,6 +396,24 @@
 #define	VMCS_INTERRUPTIBILITY_SMI_BLOCKING	(1 << 2)
 #define	VMCS_INTERRUPTIBILITY_NMI_BLOCKING	(1 << 3)
 
+/*
+ * Exit qualification for debug exception
+ */
+#define EXIT_QUAL_DBG_B0 (1U << 0)
+#define EXIT_QUAL_DBG_B1 (1U << 1)
+#define EXIT_QUAL_DBG_B2 (1U << 2)
+#define EXIT_QUAL_DBG_B3 (1U << 3)
+#define EXIT_QUAL_DBG_B_MASK (0xf)
+#define EXIT_QUAL_DBG_BD (1U << 13)
+#define EXIT_QUAL_DBG_BS (1U << 14)
+
+/*
+ * Exit qualification for MOV DR
+ */
+#define EXIT_QUAL_MOV_DR_REG(n) ((n)&0x7)
+#define EXIT_QUAL_MOV_DR_RW(n) (!!((n)&0x10))
+#define EXIT_QUAL_MOV_DR_GPR(n) (((n)&0xf00) >> 8)
+
 /*
  * Exit qualification for EXIT_REASON_INVAL_VMCS
  */
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -306,6 +306,7 @@
 
 static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc);
 static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval);
+static int vmx_setreg(void *arg, int vcpu, int reg, uint64_t val);
 static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val);
 static void vmx_inject_pir(struct vlapic *vlapic);
 #ifdef BHYVE_SNAPSHOT
@@ -2324,6 +2325,117 @@
 	return (error);
 }
 
+static __inline int
+mov_dr_gpr_num_to_reg(int gpr)
+{
+	switch (gpr) {
+	case 0:
+		return VM_REG_GUEST_RAX;
+	case 1:
+		return VM_REG_GUEST_RCX;
+	case 2:
+		return VM_REG_GUEST_RDX;
+	case 3:
+		return VM_REG_GUEST_RBX;
+	case 4:
+		return VM_REG_GUEST_RSP;
+	case 5:
+		return VM_REG_GUEST_RBP;
+	case 6:
+		return VM_REG_GUEST_RSI;
+	case 7:
+		return VM_REG_GUEST_RDI;
+	case 8 ... 15:
+		return VM_REG_GUEST_R8 + (gpr - 8);
+	default:
+		break;
+	};
+
+	return -1;
+}
+
+/*
+ * Emulates MOV DR according to Intel SDM Vol. 2B 4-43.
+ */
+static int
+emulate_mov_dr(struct vmx *vmx, struct vm_exit *vmexit, int vcpu, uint64_t qual)
+{
+	int error;
+	int cpl, src, dst;
+	int dbreg;
+	uint64_t regval;
+
+	int dbreg_num = EXIT_QUAL_MOV_DR_REG(qual);
+	int gpr = mov_dr_gpr_num_to_reg(EXIT_QUAL_MOV_DR_GPR(qual));
+	int write = (EXIT_QUAL_MOV_DR_RW(qual) == 0);
+
+	cpl = vmx_cpl();
+
+	if (cpl != 0) {
+		vm_inject_gp(vmx->vm, vcpu);
+		return 1;
+	}
+
+	error = vmx_getreg(vmx, vcpu, VM_REG_GUEST_CR4, &regval);
+	KASSERT(
+	    error == 0, ("%s: error %d fetching GPR %d", __func__, error, gpr));
+
+	if ((regval & CR4_DE) && (dbreg_num == 4 || dbreg_num == 5)) {
+		vm_inject_ud(vmx->vm, vcpu);
+		return 1;
+	}
+
+	switch (dbreg_num) {
+		/* TODO: figure out how to handle DR{4,5} */
+	case 0 ... 3:
+		dbreg = VM_REG_GUEST_DR0 + dbreg_num;
+		break;
+	case 6:
+		dbreg = VM_REG_GUEST_DR6;
+		break;
+	case 7:
+		dbreg = VM_REG_GUEST_DR7;
+		break;
+	default:
+		return -1;
+		break;
+	}
+
+	/*
+	 * Bounce exit to userland - allow the
+	 * gdb stub to adjust its watchpoint metadata
+	 */
+	vmexit->exitcode = VM_EXITCODE_DB;
+	vmexit->u.dbg.trace_trap = 0;
+	vmexit->u.dbg.pushf_intercept = 0;
+	vmexit->u.dbg.drx_access = dbreg_num;
+	vmexit->u.dbg.gpr = -1;
+
+	if (write) {
+		dst = dbreg;
+		src = gpr;
+	} else {
+		dst = gpr;
+		src = dbreg;
+
+		vmexit->u.dbg.gpr = gpr;
+	}
+
+	error = vmx_getreg(vmx, vcpu, src, &regval);
+	KASSERT(error == 0,
+	    ("%s: error %d fetching register %d", __func__, error, src));
+
+	if (write && dbreg_num == 7) {
+		vmexit->u.dbg.watchpoints = (int)(regval);
+	}
+
+	error = vmx_setreg(vmx, vcpu, dst, regval);
+	KASSERT(error == 0,
+	    ("%s: error %d updating register %d", __func__, error, dst));
+
+	return error;
+}
+
 static int
 vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 {
@@ -2472,6 +2584,20 @@
 			break;
 		}
 		break;
+
+	case EXIT_REASON_DR_ACCESS:
+		handled = 0;
+
+		error = emulate_mov_dr(vmx, vmexit, vcpu, qual);
+		KASSERT(
+		    error >= 0, ("%s: emulate_mov_dr returned -1", __func__));
+
+		if (error == 1) {
+			/* Fault was injected into guest */
+			vmexit->exitcode = VM_EXITCODE_BOGUS;
+			handled = 1;
+		}
+		break;
 	case EXIT_REASON_RDMSR:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RDMSR, 1);
 		retu = false;
@@ -2658,6 +2784,78 @@
 			vmexit->inst_length = 0;
 			break;
 		}
+		if (intr_type == VMCS_INTR_T_HWEXCEPTION &&
+		    intr_vec == IDT_DB &&
+		    (vmx->cap[vcpu].set & (1 << VM_CAP_DB_EXIT))) {
+
+			int reflect = 0;
+			/*
+			 * A debug exception VMEXIT does not update the DR{6,7}
+			 * registers (SDM Vol. 3C 27-1). It is therefore
+			 * necessary to emulate these writes here.
+			 *
+			 * We reflect everything except watchpoint hits. Since
+			 * it is up to the userland to reinject a debug
+			 * exception when a guest watchpoint is hit, the
+			 * register must be updated here so that the guest may
+			 * properly register the watchpoint hit.
+			 */
+			int trace_trap = !!(qual & EXIT_QUAL_DBG_BS);
+			int debug_detect = !!(qual & EXIT_QUAL_DBG_BD);
+			int watch_mask = qual & EXIT_QUAL_DBG_B_MASK;
+
+			uint64_t dr6;
+			error = vmx_getreg(vmx, vcpu, VM_REG_GUEST_DR6, &dr6);
+			KASSERT(error == 0,
+			    ("%s: error %d fetching DR6", __func__, error));
+
+			uint64_t regval;
+			error = vmx_getreg(
+			    vmx, vcpu, VM_REG_GUEST_RFLAGS, &regval);
+			KASSERT(error == 0,
+			    ("%s: error %d fetching DR6", __func__, error));
+
+			dr6 &= DBREG_DR6_RESERVED1;
+			/*
+			 * Clear the RTM flag (0 indicates a hit,
+			 * Intel SDM Vol. 3B 17-3 ).
+			 */
+			dr6 |= (1 << 16);
+
+			if (watch_mask) {
+				vmexit->exitcode = VM_EXITCODE_DB;
+				vmexit->u.dbg.pushf_intercept = 0;
+				vmexit->u.dbg.trace_trap = 0;
+				vmexit->u.dbg.drx_access = -1;
+				vmexit->u.dbg.watchpoints = watch_mask;
+				vmexit->u.dbg.drx_access = -1;
+				vmexit->u.dbg.watchpoints = watch_mask;
+
+				dr6 |= watch_mask;
+
+				/* Bounce to userland */
+				reflect = 0;
+			} else {
+				dr6 |= debug_detect ? DBREG_DR6_BD : 0;
+				dr6 |= (trace_trap) ? DBREG_DR6_BS : 0;
+				regval &= ~(PSL_T);
+
+				/* Reflect back into guest */
+				reflect = 1;
+			}
+			error = vmx_setreg(vmx, vcpu, VM_REG_GUEST_DR6, dr6);
+			KASSERT(error == 0,
+			    ("%s: error %d updating DR6", __func__, error));
+
+			error = vmx_setreg(
+			    vmx, vcpu, VM_REG_GUEST_RFLAGS, regval);
+			KASSERT(error == 0,
+			    ("%s: error %d fetching DR6", __func__, error));
+
+			if (!reflect) {
+				break;
+			}
+		}
 
 		if (intr_vec == IDT_PF) {
 			error = vmxctx_setreg(vmxctx, VM_REG_GUEST_CR2, qual);
@@ -3488,6 +3686,8 @@
 			ret = 0;
 		break;
 	case VM_CAP_BPT_EXIT:
+	case VM_CAP_DB_EXIT:
+	case VM_CAP_DR_MOV_EXIT:
 		ret = 0;
 		break;
 	default:
@@ -3583,6 +3783,25 @@
 			reg = VMCS_EXCEPTION_BITMAP;
 		}
 		break;
+	case VM_CAP_DB_EXIT:
+		retval = 0;
+
+		/* Don't change the bitmap if we are tracing all exceptions. */
+		if (vmx->cap[vcpu].exc_bitmap != 0xffffffff) {
+			pptr = &vmx->cap[vcpu].exc_bitmap;
+			baseval = *pptr;
+			flag = (1 << IDT_DB);
+			reg = VMCS_EXCEPTION_BITMAP;
+		}
+		break;
+	case VM_CAP_DR_MOV_EXIT:
+		retval = 0;
+
+		pptr = &vmx->cap[vcpu].proc_ctls;
+		baseval = *pptr;
+		flag = PROCBASED_MOV_DR_EXITING;
+		reg = VMCS_PRI_PROC_BASED_CTLS;
+		break;
 	default:
 		break;
 	}
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -1527,8 +1527,10 @@
 	 */
 	vme->inst_length = vie->num_processed;
 	vcpu->nextrip += vie->num_processed;
-	VCPU_CTR1(vm, vcpuid, "nextrip updated to %#lx after instruction "
-	    "decoding", vcpu->nextrip);
+	VCPU_CTR1(vm, vcpuid,
+	    "nextrip updated to %#lx after instruction "
+	    "decoding",
+	    vcpu->nextrip);
 
 	/* return to userland unless this is an in-kernel emulated device */
 	if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
@@ -1623,6 +1625,44 @@
 	return (0);
 }
 
+static int
+vm_handle_db(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu)
+{
+	int error, fault;
+	uint64_t rsp;
+	uint64_t rflags;
+	struct vm_copyinfo copyinfo;
+
+	*retu = true;
+	if (!vme->u.dbg.pushf_intercept) {
+		return 0;
+	}
+	printf("%s: writing back rflags after pushf\r\n", __func__);
+
+	vm_get_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp);
+
+	error = vm_copy_setup(vm, vcpuid, &vme->u.dbg.paging, rsp,
+	    sizeof(uint64_t), VM_PROT_WRITE, &copyinfo, 1, &fault);
+	if (error || fault) {
+		*retu = false;
+		return (EINVAL);
+	}
+
+	/* Read pushed rflags value */
+	vm_copyin(vm, vcpuid, &copyinfo, &rflags, sizeof(uint64_t));
+	printf("%s: rflags: 0x%8lx\r\n", __func__, rflags);
+	/* Set TF bit to shadowed value*/
+	rflags &= ~(PSL_T);
+	rflags |= vme->u.dbg.tf_shadow_val;
+	printf("%s: updated rflags: 0x%8lx\r\n", __func__, rflags);
+	/* Write updated value back to memory*/
+	vm_copyout(vm, vcpuid, &rflags, &copyinfo, sizeof(uint64_t));
+
+	vm_copy_teardown(vm, vcpuid, &copyinfo, 1);
+
+	return (0);
+}
+
 int
 vm_suspend(struct vm *vm, enum vm_suspend_how how)
 {
@@ -1797,6 +1837,9 @@
 		case VM_EXITCODE_INOUT_STR:
 			error = vm_handle_inout(vm, vcpuid, vme, &retu);
 			break;
+		case VM_EXITCODE_DB:
+			error = vm_handle_db(vm, vcpuid, vme, &retu);
+			break;
 		case VM_EXITCODE_MONITOR:
 		case VM_EXITCODE_MWAIT:
 		case VM_EXITCODE_VMINSN:
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -927,6 +927,20 @@
 	return (VMEXIT_CONTINUE);
 }
 
+static int
+vmexit_db(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+#ifdef BHYVE_SNAPSHOT
+	checkpoint_cpu_suspend(*pvcpu);
+#endif
+	gdb_cpu_debug(*pvcpu, vmexit);
+#ifdef BHYVE_SNAPSHOT
+	checkpoint_cpu_resume(*pvcpu);
+#endif
+	return (VMEXIT_CONTINUE);
+}
+
 static int
 vmexit_breakpoint(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
@@ -951,6 +965,7 @@
 	[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
 	[VM_EXITCODE_DEBUG] = vmexit_debug,
 	[VM_EXITCODE_BPT] = vmexit_breakpoint,
+	[VM_EXITCODE_DB] = vmexit_db,
 };
 
 static void
diff --git a/usr.sbin/bhyve/gdb.h b/usr.sbin/bhyve/gdb.h
--- a/usr.sbin/bhyve/gdb.h
+++ b/usr.sbin/bhyve/gdb.h
@@ -34,6 +34,7 @@
 void	gdb_cpu_breakpoint(int vcpu, struct vm_exit *vmexit);
 void	gdb_cpu_mtrap(int vcpu);
 void	gdb_cpu_suspend(int vcpu);
+void	gdb_cpu_debug(int vcpu, struct vm_exit *vmexit);
 void	init_gdb(struct vmctx *ctx);
 
 #endif /* !__GDB_H__ */
diff --git a/usr.sbin/bhyve/gdb.c b/usr.sbin/bhyve/gdb.c
--- a/usr.sbin/bhyve/gdb.c
+++ b/usr.sbin/bhyve/gdb.c
@@ -37,10 +37,14 @@
 #include <sys/mman.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
+
 #include <machine/atomic.h>
+#include <machine/reg.h>
 #include <machine/specialreg.h>
 #include <machine/vmm.h>
+
 #include <netinet/in.h>
+
 #include <assert.h>
 #ifndef WITHOUT_CAPSICUM
 #include <capsicum_helpers.h>
@@ -69,7 +73,25 @@
  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
  * use SIGTRAP.
  */
-#define	GDB_SIGNAL_TRAP		5
+#define GDB_SIGNAL_TRAP 5
+
+#define GDB_SOFTWARE_BPT 0
+#define GDB_WATCHPOINT_TYPE_WRITE 2
+#define GDB_WATCHPOINT_TYPE_READ 3
+#define GDB_WATCHPOINT_TYPE_ACCESS 4
+
+#define GDB_WATCHPOINT_MAX 4
+#define GDB_WATCHPOINT_MASK ((1 << GDB_WATCHPOINT_MAX) - 1)
+#define GDB_WATCHPOINT_CLEAR_NOSKIP -1
+
+#define GDB_WATCHPOINT_INIT() \
+	watch_stats.avail_dbregs = (-1 & GDB_WATCHPOINT_MASK)
+#define GDB_FIND_WATCHPOINT() (__builtin_ffs(watch_stats.avail_dbregs) - 1)
+#define GDB_HAS_AVAIL_WATCHPOINT() (watch_stats.avail_dbregs != 0)
+#define GDB_ALLOC_WATCHPOINT(num) \
+	watch_stats.avail_dbregs &= ~(1 << (num & GDB_WATCHPOINT_MASK))
+#define GDB_FREE_WATCHPOINT(num) \
+	watch_stats.avail_dbregs |= (1 << (num & GDB_WATCHPOINT_MASK))
 
 static void gdb_resume_vcpus(void);
 static void check_command(int fd);
@@ -101,6 +123,22 @@
 	TAILQ_ENTRY(breakpoint) link;
 };
 
+struct watchpoint_stats {
+	int no_active;
+	int no_evicted;
+	int avail_dbregs; /* Tracks DR regs used by the guest */
+
+	struct watchpoint {
+		enum watchpoint_state {
+			WATCH_INACTIVE = 0,
+			WATCH_ACTIVE,
+			WATCH_EVICTED,
+		} state;
+		uint64_t gva;
+		int type;
+		int bytes;
+	} watchpoints[GDB_WATCHPOINT_MAX];
+};
 /*
  * When a vCPU stops to due to an event that should be reported to the
  * debugger, information about the event is stored in this structure.
@@ -119,11 +157,16 @@
  *
  * When a vCPU hits a breakpoint set by the debug server,
  * 'hit_swbreak' is set to true.
+ *
+ *  When a vCPU hits a watchpoint set by the debug server,
+ * 'hit_watch' is set to point to the corresponding watchpoint.
  */
 struct vcpu_state {
 	bool stepping;
 	bool stepped;
 	bool hit_swbreak;
+
+	struct watchpoint *hit_watch;
 };
 
 static struct io_buffer cur_comm, cur_resp;
@@ -131,6 +174,7 @@
 static struct vmctx *ctx;
 static int cur_fd = -1;
 static TAILQ_HEAD(, breakpoint) breakpoints;
+static struct watchpoint_stats watch_stats;
 static struct vcpu_state *vcpu_state;
 static int cur_vcpu, stopped_vcpu;
 static bool gdb_active = false;
@@ -221,6 +265,7 @@
 #endif
 
 static void	remove_all_sw_breakpoints(void);
+static void remove_all_hw_watchpoints(void);
 
 static int
 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
@@ -391,6 +436,7 @@
 	io_buffer_reset(&cur_resp);
 	cur_fd = -1;
 
+	remove_all_hw_watchpoints();
 	remove_all_sw_breakpoints();
 
 	/* Clear any pending events. */
@@ -401,6 +447,22 @@
 	pthread_mutex_unlock(&gdb_lock);
 }
 
+static const char *
+gdb_watch_type_str(struct watchpoint *wp)
+{
+	switch (wp->type) {
+	case GDB_WATCHPOINT_TYPE_ACCESS:
+		return "awatch";
+	case GDB_WATCHPOINT_TYPE_READ:
+		return "rwatch";
+	case GDB_WATCHPOINT_TYPE_WRITE:
+		return "watch";
+	default:
+		// TODO: assert?
+		return "";
+	}
+}
+
 static uint8_t
 hex_digit(uint8_t nibble)
 {
@@ -683,10 +745,18 @@
 			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
 			if (swbreak_enabled)
 				append_string("swbreak:;");
-		} else if (vs->stepped)
+		} else if (vs->stepped) {
 			debug("$vCPU %d reporting step\n", stopped_vcpu);
-		else
+		} else if (vs->hit_watch) {
+			debug("$vCPU %d reporting watchpoint\n", stopped_vcpu);
+			append_string(gdb_watch_type_str(vs->hit_watch));
+			append_char(':');
+			append_unsigned_be(
+			    vs->hit_watch->gva, sizeof(vs->hit_watch->gva));
+			append_char(';');
+		} else {
 			debug("$vCPU %d reporting ???\n", stopped_vcpu);
+		}
 	}
 	finish_packet();
 	report_next_stop = false;
@@ -704,6 +774,7 @@
 	if (stopped_vcpu != -1) {
 		vs = &vcpu_state[stopped_vcpu];
 		vs->hit_swbreak = false;
+		vs->hit_watch = NULL;
 		vs->stepped = false;
 		stopped_vcpu = -1;
 	}
@@ -743,6 +814,105 @@
 	debug("$vCPU %d resuming\n", vcpu);
 }
 
+static void
+gdb_suspend_vcpus(void)
+{
+
+	assert(pthread_mutex_isowned_np(&gdb_lock));
+	debug("suspending all CPUs\n");
+	vcpus_suspended = vcpus_active;
+	vm_suspend_cpu(ctx, -1);
+	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
+		gdb_finish_suspend_vcpus();
+}
+
+/*
+ * Requests vCPU single-stepping using a
+ * VMEXIT suitable for the host platform.
+ */
+static int
+_gdb_set_step(int vcpu, int val)
+{
+	/* If the MTRAP cap fails, we are running on an AMD host.
+	 * In that case, we request DB exits caused by RFLAGS.TF
+	 * stepping.
+	 */
+	int error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, val);
+	if (error) {
+		error = vm_set_capability(ctx, vcpu, VM_CAP_RFLAGS_SSTEP, val);
+	}
+
+	return error;
+}
+
+static int
+_gdb_check_step(int vcpu)
+{
+	int error, val;
+
+	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
+	if (error < 0) {
+		/* Check whether AMD rflags.tf stepping is supported */
+		if (vm_get_capability(ctx, vcpu, VM_CAP_RFLAGS_SSTEP, &val) < 0)
+			return -1;
+	}
+
+	return 0;
+}
+/*
+ * Invoked by vCPU before resuming execution.  This enables stepping
+ * if the vCPU is marked as stepping.
+ */
+static void
+gdb_cpu_resume(int vcpu)
+{
+	struct vcpu_state *vs;
+	int error;
+
+	vs = &vcpu_state[vcpu];
+
+	/*
+	 * Any pending event should already be reported before
+	 * resuming.
+	 */
+	assert(vs->hit_swbreak == false);
+	assert(vs->hit_watch == NULL);
+	assert(vs->stepped == false);
+	if (vs->stepping) {
+		error = _gdb_set_step(vcpu, 1);
+		assert(error == 0);
+	}
+}
+
+/*
+ * Invoked each time a vmexit handler needs to step a vCPU.
+ */
+static void
+_gdb_cpu_step(int vcpu)
+{
+	struct vcpu_state *vs;
+
+	debug("$vCPU %d stepped\n", vcpu);
+	pthread_mutex_lock(&gdb_lock);
+	vs = &vcpu_state[vcpu];
+	if (vs->stepping) {
+		vs->stepping = false;
+		vs->stepped = true;
+		_gdb_set_step(vcpu, 0);
+
+		while (vs->stepped) {
+			if (stopped_vcpu == -1) {
+				debug("$vCPU %d reporting step\n", vcpu);
+				stopped_vcpu = vcpu;
+				gdb_suspend_vcpus();
+			}
+			_gdb_cpu_suspend(vcpu, true);
+		}
+		gdb_cpu_resume(vcpu);
+	}
+	pthread_mutex_unlock(&gdb_lock);
+}
+
 /*
  * Invoked at the start of a vCPU thread's execution to inform the
  * debug server about the new thread.
@@ -774,96 +944,501 @@
 	pthread_mutex_unlock(&gdb_lock);
 }
 
+static bool
+set_dbexit_caps(bool enable)
+{
+	cpuset_t mask;
+	int vcpu;
+
+	mask = vcpus_active;
+	while (!CPU_EMPTY(&mask)) {
+		vcpu = CPU_FFS(&mask) - 1;
+		CPU_CLR(vcpu, &mask);
+		if (vm_set_capability(
+			ctx, vcpu, VM_CAP_DB_EXIT, enable ? 1 : 0) < 0)
+			return (false);
+		debug("$vCPU %d %sabled debug exits\n", vcpu,
+		    enable ? "en" : "dis");
+	}
+	return (true);
+}
+
+static bool
+set_dbreg_exit_caps(bool enable)
+{
+	cpuset_t mask;
+	int vcpu;
+
+	mask = vcpus_active;
+	while (!CPU_EMPTY(&mask)) {
+		vcpu = CPU_FFS(&mask) - 1;
+		CPU_CLR(vcpu, &mask);
+		if (vm_set_capability(
+			ctx, vcpu, VM_CAP_DR_MOV_EXIT, enable ? 1 : 0) < 0)
+			return (false);
+		debug("$vCPU %d %sabled debug register access exits\n", vcpu,
+		    enable ? "en" : "dis");
+	}
+	return (true);
+}
+
 /*
- * Invoked by vCPU before resuming execution.  This enables stepping
- * if the vCPU is marked as stepping.
+ * A helper routine for setting watchpoints.
+ * Each watchpoint is "global" and is placed into the corresponding DR*
+ * registers on all vCPUs.
  */
-static void
-gdb_cpu_resume(int vcpu)
+
+static int
+set_watchpoint(uint64_t gva, int type, int bytes, int watchnum)
 {
-	struct vcpu_state *vs;
-	int error;
+	int access, len;
+	struct watchpoint *wp;
 
-	vs = &vcpu_state[vcpu];
+	cpuset_t mask;
+	int vcpu;
+	uint64_t dr7;
+	int dbreg = VM_REG_GUEST_DR0 + watchnum;
+
+	switch (type) {
+	case GDB_WATCHPOINT_TYPE_WRITE:
+		access = DBREG_DR7_WRONLY;
+		break;
+	case GDB_WATCHPOINT_TYPE_ACCESS:
+	case GDB_WATCHPOINT_TYPE_READ:
+		access = DBREG_DR7_RDWR;
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	switch (bytes) {
+	case 1:
+		len = DBREG_DR7_LEN_1;
+		break;
+	case 2:
+		len = DBREG_DR7_LEN_2;
+		break;
+	case 4:
+		len = DBREG_DR7_LEN_4;
+		break;
+	case 8:
+		len = DBREG_DR7_LEN_8;
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	mask = vcpus_active;
+	while (!CPU_EMPTY(&mask)) {
+
+		vcpu = CPU_FFS(&mask) - 1;
+		CPU_CLR(vcpu, &mask);
+
+		/* Write gva to debug reg */
+		vm_set_register(ctx, vcpu, dbreg, gva);
+		/* Enable watchpoint in DR7 */
+		vm_get_register(ctx, vcpu, VM_REG_GUEST_DR7, &dr7);
+		dr7 &= ~DBREG_DR7_MASK(watchnum);
+		dr7 |= DBREG_DR7_SET(
+		    watchnum, len, access, DBREG_DR7_GLOBAL_ENABLE);
+		vm_set_register(ctx, vcpu, VM_REG_GUEST_DR7, dr7);
+	}
 
+	wp = &watch_stats.watchpoints[watchnum];
 	/*
-	 * Any pending event should already be reported before
-	 * resuming.
+	 * An already active watchpoint can be passed - don't
+	 * increment overall active watchpoints.
 	 */
-	assert(vs->hit_swbreak == false);
-	assert(vs->stepped == false);
-	if (vs->stepping) {
-		error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
-		assert(error == 0);
+	if (wp->state != WATCH_ACTIVE) {
+		watch_stats.no_active++;
 	}
+	wp->state = WATCH_ACTIVE;
+	wp->gva = gva;
+	wp->type = type;
+	wp->bytes = bytes;
+
+	GDB_ALLOC_WATCHPOINT(watchnum);
+
+	return 0;
 }
 
 /*
- * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
- * has been suspended due to an event on different vCPU or in response
- * to a guest-wide suspend such as Ctrl-C or the stop on attach.
+ * Clears watchpoint metadata and disables it on all guest vCPUs.
+ *
+ * The 'skip_vcpu' arg may be passed to prevent this routine from modifying the
+ * DR7 register on a specific vCPU (used when handling VMEXITS caused by DR7
+ * write to avoid thrashing the new value).
+ *
+ * The 'clear_dbreg' arg controls whether the underlying debug register is
+ * zeroed.
  */
-void
-gdb_cpu_suspend(int vcpu)
+static int
+clear_watchpoint(int watchnum, int skip_vcpu, bool clear_dbreg)
 {
+	cpuset_t mask;
+	int vcpu;
+	uint64_t dr7;
 
-	if (!gdb_active)
-		return;
-	pthread_mutex_lock(&gdb_lock);
-	_gdb_cpu_suspend(vcpu, true);
-	gdb_cpu_resume(vcpu);
-	pthread_mutex_unlock(&gdb_lock);
+	mask = vcpus_active;
+	while (!CPU_EMPTY(&mask)) {
+
+		vcpu = CPU_FFS(&mask) - 1;
+		CPU_CLR(vcpu, &mask);
+		if (clear_dbreg) {
+			vm_set_register(
+			    ctx, vcpu, VM_REG_GUEST_DR0 + watchnum, 0);
+		}
+		if (vcpu == skip_vcpu) {
+			continue;
+		}
+
+		/* Disable watchpoint in DR7 */
+		vm_get_register(ctx, vcpu, VM_REG_GUEST_DR7, &dr7);
+		dr7 &= ~DBREG_DR7_MASK(watchnum);
+		vm_set_register(ctx, vcpu, VM_REG_GUEST_DR7, dr7);
+	}
+
+	watch_stats.watchpoints[watchnum].state = WATCH_INACTIVE;
+	/* Refrain from clearing other fields - this avoids unnecessary copies
+	 * if migrate_watchpoint is called afterward */
+	watch_stats.no_active--;
+
+	GDB_FREE_WATCHPOINT(watchnum);
+
+	return 0;
 }
 
-static void
-gdb_suspend_vcpus(void)
+static struct watchpoint *
+find_watchpoint(uint64_t gla)
 {
+	struct watchpoint *wp;
 
-	assert(pthread_mutex_isowned_np(&gdb_lock));
-	debug("suspending all CPUs\n");
-	vcpus_suspended = vcpus_active;
-	vm_suspend_cpu(ctx, -1);
-	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
-		gdb_finish_suspend_vcpus();
+	for (int i = 0; i < GDB_WATCHPOINT_MAX; i++) {
+		wp = &watch_stats.watchpoints[i];
+		if (wp->state == WATCH_ACTIVE && (wp->gva == gla)) {
+			return wp;
+		}
+	}
+
+	return (NULL);
 }
 
 /*
- * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
- * the VT-x-specific MTRAP exit.
+ * Tries to reactivate a previously evicted watchpoint.
  */
-void
-gdb_cpu_mtrap(int vcpu)
+static int
+migrate_watchpoint(struct watchpoint *wp)
+{
+	int error;
+
+	if (!GDB_HAS_AVAIL_WATCHPOINT()) {
+		return -1;
+	}
+
+	if (watch_stats.no_active == 0 && watch_stats.no_evicted == 0) {
+		if (!set_dbexit_caps(true) || !set_dbreg_exit_caps(true)) {
+			return -1;
+		}
+	}
+
+	int watchnum = GDB_FIND_WATCHPOINT();
+	assert(watchnum >= 0);
+
+	error = set_watchpoint(wp->gva, wp->type, wp->bytes, watchnum);
+	if (error == 0) {
+		watch_stats.no_evicted--;
+		/* check if the watchpoint was migrated to the same slot */
+		if (wp->state != WATCH_ACTIVE)
+			wp->state = WATCH_INACTIVE;
+	}
+	return error;
+}
+
+static void
+init_watchpoint_metadata(void)
+{
+	cpuset_t mask;
+	int vcpu;
+	uint64_t dr7;
+
+	GDB_WATCHPOINT_INIT();
+
+	mask = vcpus_active;
+	while (!CPU_EMPTY(&mask)) {
+		int vcpu_used_dbreg_mask = 0;
+
+		vcpu = CPU_FFS(&mask) - 1;
+		CPU_CLR(vcpu, &mask);
+
+		/* Construct bitmask of active dbregs */
+		vm_get_register(ctx, vcpu, VM_REG_GUEST_DR7, &dr7);
+		vcpu_used_dbreg_mask = (DBREG_DR7_ENABLED(dr7, 0) |
+		    (DBREG_DR7_ENABLED(dr7, 1) << 1) |
+		    (DBREG_DR7_ENABLED(dr7, 2) << 2) |
+		    (DBREG_DR7_ENABLED(dr7, 3) << 3));
+
+		/* Mark any currently enabled dbreg as
+		 * unavailable */
+		watch_stats.avail_dbregs &= ~vcpu_used_dbreg_mask;
+	}
+}
+
+static void
+rebuild_avail_watchpoints(void)
 {
+	init_watchpoint_metadata();
+
+	for (int i = 0; i < GDB_WATCHPOINT_MAX; i++) {
+		if (watch_stats.watchpoints[i].state == WATCH_ACTIVE) {
+			GDB_ALLOC_WATCHPOINT(i);
+		}
+	}
+}
+
+static void
+handle_watchpoint_hit(int vcpu, int watch_mask)
+{
+	int watchnum = __builtin_ffs(watch_mask) - 1;
+	int dbreg = VM_REG_GUEST_DR0 + watchnum;
 	struct vcpu_state *vs;
+	struct watchpoint *watch;
+
+	uint64_t gla;
+	uint64_t dr6;
+
+	assert(watchnum >= 0);
 
-	if (!gdb_active)
-		return;
-	debug("$vCPU %d MTRAP\n", vcpu);
 	pthread_mutex_lock(&gdb_lock);
-	vs = &vcpu_state[vcpu];
-	if (vs->stepping) {
-		vs->stepping = false;
-		vs->stepped = true;
-		vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
-		while (vs->stepped) {
+
+	if (!watch_stats.no_active) {
+		vm_inject_exception(ctx, vcpu, IDT_DB, 0, 0, 0);
+		pthread_mutex_unlock(&gdb_lock);
+		return;
+	}
+
+	vm_get_register(ctx, vcpu, dbreg, &gla);
+
+	watch = find_watchpoint(gla);
+	if (watch) {
+		vs = &vcpu_state[vcpu];
+
+		assert(vs->stepping == false);
+		assert(vs->stepped == false);
+		assert(vs->hit_swbreak == false);
+		assert(vs->hit_watch == false);
+
+		vs->hit_watch = watch;
+		for (;;) {
 			if (stopped_vcpu == -1) {
-				debug("$vCPU %d reporting step\n", vcpu);
 				stopped_vcpu = vcpu;
 				gdb_suspend_vcpus();
 			}
 			_gdb_cpu_suspend(vcpu, true);
+			if (!vs->hit_watch) {
+				/* Watchpoint reported. */
+				break;
+			}
+			if (watch->state == WATCH_INACTIVE) {
+				/* Watchpoint removed. */
+				break;
+			}
 		}
+
+		vm_get_register(ctx, vcpu, VM_REG_GUEST_DR6, &dr6);
+		dr6 &= DBREG_DR6_RESERVED1;
+		vm_set_register(ctx, vcpu, VM_REG_GUEST_DR6, dr6);
+
 		gdb_cpu_resume(vcpu);
+	} else {
+		/* Reflect the DB exception back into the guest */
+		vm_inject_exception(ctx, vcpu, IDT_DB, 0, 0, 0);
+	}
+
+	pthread_mutex_unlock(&gdb_lock);
+}
+
+static void
+handle_drx_read(int vcpu, struct vm_exit *vmexit)
+{
+	struct watchpoint *wp;
+	int dbreg_num = vmexit->u.dbg.drx_access;
+	uint64_t gpr_val;
+	int gpr = vmexit->u.dbg.gpr;
+
+	if (dbreg_num >= 4 && dbreg_num <= 6) {
+		return;
+	}
+
+	pthread_mutex_lock(&gdb_lock);
+	wp = &watch_stats.watchpoints[dbreg_num];
+
+	if (dbreg_num == 7) {
+		vm_get_register(ctx, vcpu, gpr, &gpr_val);
+
+		for (int i = 0; i < GDB_WATCHPOINT_MAX; i++) {
+			/* Clear newly read dr7 mask for gdbstub watchpoints */
+			if (watch_stats.watchpoints[i].state == WATCH_ACTIVE) {
+				gpr_val &= ~DBREG_DR7_MASK(i);
+			}
+		}
+
+		vm_set_register(ctx, vcpu, gpr, gpr_val);
+	}
+	/* If the guest attempts to read from a gdbstub-active dbreg, set the
+	 * gpr register to 0 */
+	if (wp->state == WATCH_ACTIVE) {
+		vm_set_register(ctx, vcpu, vmexit->u.dbg.gpr, 0);
+	}
+
+	pthread_mutex_unlock(&gdb_lock);
+}
+
+static void
+handle_drx_write(int vcpu, struct vm_exit *vmexit)
+{
+	int error;
+	struct watchpoint *wp;
+	uint64_t dbreg_val;
+	int dbreg_num = vmexit->u.dbg.drx_access;
+
+	if (dbreg_num >= 4 && dbreg_num <= 6) {
+		return;
+	}
+
+	pthread_mutex_lock(&gdb_lock);
+	wp = &watch_stats.watchpoints[dbreg_num];
+
+	if (dbreg_num == 7) {
+		/* A new DR7 was loaded, update watchpoint metadata */
+		int dr7 = vmexit->u.dbg.watchpoints;
+
+		/* Clear any watchpoints the guest started using */
+		for (int i = 0; i < GDB_WATCHPOINT_MAX; i++) {
+			wp = &watch_stats.watchpoints[i];
+			bool dbreg_enabled = DBREG_DR7_ENABLED(dr7, i);
+			bool watchpoint_active = wp->state == WATCH_ACTIVE;
+
+			if (dbreg_enabled && watchpoint_active) {
+				/* Evict active watchpoint */
+				debug(
+				    "%s: dr7 write: evicting active watchpoint %d\n",
+				    __func__, i);
+				clear_watchpoint(i, vcpu, true);
+				wp->state = WATCH_EVICTED;
+				watch_stats.no_evicted++;
+			} else if (!dbreg_enabled && watchpoint_active) {
+				debug(
+				    "%s: dr7 write: reactivating active watchpoint %d\n",
+				    __func__, i);
+				set_watchpoint(wp->gva, wp->type, wp->bytes, i);
+			}
+		}
+		rebuild_avail_watchpoints();
+
+	} else if (wp->state == WATCH_ACTIVE) {
+		vm_get_register(
+		    ctx, vcpu, VM_REG_GUEST_DR0 + dbreg_num, &dbreg_val);
+		/* Guest started using an occupied DB reg,
+		 * remove watchpoint */
+		if (dbreg_val != 0) {
+
+			debug("%s: evicting active watchpoint %d\n", __func__,
+			    dbreg_num);
+			clear_watchpoint(
+			    dbreg_num, GDB_WATCHPOINT_CLEAR_NOSKIP, false);
+			wp->state = WATCH_EVICTED;
+			watch_stats.no_evicted++;
+			/* Mark watchpoint as in-use */
+			GDB_ALLOC_WATCHPOINT(dbreg_num);
+		} else {
+			debug(
+			    "%s: dr7 write: reactivating active watchpoint %d\n",
+			    __func__, dbreg_num);
+			set_watchpoint(wp->gva, wp->type, wp->bytes, dbreg_num);
+		}
+		// TODO: figure out how to notify remote gdb if a
+		// watchpoint cannot be migrated
+	} else {
+		vm_get_register(
+		    ctx, vcpu, VM_REG_GUEST_DR0 + dbreg_num, &dbreg_val);
+		if (dbreg_val != 0) {
+			/* Mark watchpoint as in-use */
+			GDB_ALLOC_WATCHPOINT(dbreg_num);
+		}
+	}
+	/* Try to migrate any evicted watchpoints */
+	for (int i = 0; i < GDB_WATCHPOINT_MAX; i++) {
+		if (watch_stats.watchpoints[i].state == WATCH_EVICTED) {
+			error = migrate_watchpoint(&watch_stats.watchpoints[i]);
+			debug("%s: %s migrating watchpoint %d\n", __func__,
+			    (error != -1 ? "succeeded" : "failed"), i);
+			if (error) {
+				break;
+			}
+		}
+	}
+
+	pthread_mutex_unlock(&gdb_lock);
+};
+
+/*
+ * A general handler for VM_EXITCODE_DB.
+ * Handles RFLAGS.TF exits on AMD hosts and HW watchpoints.
+ */
+void
+gdb_cpu_debug(int vcpu, struct vm_exit *vmexit)
+{
+	if (!gdb_active)
+		return;
+
+	/* RFLAGS.TF exit? */
+	if (vmexit->u.dbg.trace_trap) {
+		_gdb_cpu_step(vcpu);
+	} else if (vmexit->u.dbg.drx_access != -1) {
+		if (vmexit->u.dbg.gpr != -1) {
+			handle_drx_read(vcpu, vmexit);
+		} else {
+			handle_drx_write(vcpu, vmexit);
+		}
+	} else if (vmexit->u.dbg.watchpoints) {
+		/* A watchpoint was triggered */
+		handle_watchpoint_hit(vcpu, vmexit->u.dbg.watchpoints);
 	}
+}
+
+/*
+ * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
+ * has been suspended due to an event on different vCPU or in response
+ * to a guest-wide suspend such as Ctrl-C or the stop on attach.
+ */
+void
+gdb_cpu_suspend(int vcpu)
+{
+	pthread_mutex_lock(&gdb_lock);
+	_gdb_cpu_suspend(vcpu, true);
+	gdb_cpu_resume(vcpu);
 	pthread_mutex_unlock(&gdb_lock);
 }
 
+/*
+ * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
+ * the VT-x-specific MTRAP exit.
+ */
+void
+gdb_cpu_mtrap(int vcpu)
+{
+	if (!gdb_active)
+		return;
+
+	_gdb_cpu_step(vcpu);
+}
+
 static struct breakpoint *
 find_breakpoint(uint64_t gpa)
 {
 	struct breakpoint *bp;
 
-	TAILQ_FOREACH(bp, &breakpoints, link) {
+	TAILQ_FOREACH (bp, &breakpoints, link) {
 		if (bp->gpa == gpa)
 			return (bp);
 	}
@@ -891,12 +1466,14 @@
 		assert(vs->stepping == false);
 		assert(vs->stepped == false);
 		assert(vs->hit_swbreak == false);
+		assert(vs->hit_watch == false);
 		vs->hit_swbreak = true;
 		vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, vmexit->rip);
 		for (;;) {
 			if (stopped_vcpu == -1) {
-				debug("$vCPU %d reporting breakpoint at rip %#lx\n", vcpu,
-				    vmexit->rip);
+				debug(
+				    "$vCPU %d reporting breakpoint at rip %#lx\n",
+				    vcpu, vmexit->rip);
 				stopped_vcpu = vcpu;
 				gdb_suspend_vcpus();
 			}
@@ -928,12 +1505,13 @@
 static bool
 gdb_step_vcpu(int vcpu)
 {
-	int error, val;
+	int error;
 
 	debug("$vCPU %d step\n", vcpu);
-	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
-	if (error < 0)
+	error = _gdb_check_step(vcpu);
+	if (error < 0) {
 		return (false);
+	}
 
 	discard_stop();
 	vcpu_state[vcpu].stepping = true;
@@ -1231,6 +1809,94 @@
 	set_breakpoint_caps(false);
 }
 
+static void
+remove_all_hw_watchpoints(void)
+{
+
+	for (int i = 0; i < GDB_WATCHPOINT_MAX; i++) {
+		if (watch_stats.watchpoints[i].state == WATCH_ACTIVE) {
+			clear_watchpoint(i, GDB_WATCHPOINT_CLEAR_NOSKIP, true);
+		}
+	}
+
+	set_dbexit_caps(false);
+	set_dbreg_exit_caps(false);
+}
+
+static void
+update_watchpoint(uint64_t gva, int type, int bytes, int insert)
+{
+	struct watchpoint *wp;
+	int error;
+
+	if (!insert && watch_stats.no_active == 0) {
+		send_error(EINVAL);
+		return;
+	}
+
+	if (insert) {
+
+		/*
+		 * No watchpoints are active - fetch and update
+		 * watchpoint stats, enable dbreg and db exits.
+		 */
+		if (watch_stats.no_active == 0 && watch_stats.no_evicted == 0) {
+			/* Activate debug exception vmexits */
+			if (!set_dbexit_caps(true) ||
+			    !set_dbreg_exit_caps(true)) {
+				send_error(EINVAL);
+				return;
+			}
+
+			init_watchpoint_metadata();
+		}
+
+		if (watch_stats.no_active == GDB_WATCHPOINT_MAX ||
+		    !GDB_HAS_AVAIL_WATCHPOINT()) {
+			error = (ENOSPC);
+			goto err;
+		}
+
+		wp = find_watchpoint(gva);
+		if (!wp) {
+			int dbreg_num = GDB_FIND_WATCHPOINT();
+			assert(dbreg_num >= 0);
+
+			debug("Allocated watchpoint %d\n", dbreg_num);
+			error = set_watchpoint(gva, type, bytes, dbreg_num);
+			if (error) {
+				goto err;
+			}
+		}
+	} else {
+		wp = find_watchpoint(gva);
+		if (wp) {
+			int watchnum = wp - &watch_stats.watchpoints[0];
+			debug("Removing watchpoint %d\n", watchnum);
+			clear_watchpoint(
+			    watchnum, GDB_WATCHPOINT_CLEAR_NOSKIP, true);
+			/* If the last watchpoint was removed and none are
+			 * evicted, disable db and dbreg vmexits */
+			if (watch_stats.no_active == 0 &&
+			    watch_stats.no_evicted == 0) {
+				set_dbexit_caps(false);
+				set_dbreg_exit_caps(false);
+			}
+		}
+	}
+
+	send_ok();
+	return;
+
+err:
+	if (watch_stats.no_active == 0 && watch_stats.no_evicted == 0) {
+		set_dbexit_caps(false);
+		set_dbreg_exit_caps(false);
+	}
+	send_error(error);
+	return;
+}
+
 static void
 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
 {
@@ -1351,9 +2017,14 @@
 	len = 0;
 
 	switch (type) {
-	case 0:
+	case GDB_SOFTWARE_BPT:
 		update_sw_breakpoint(gva, kind, insert);
 		break;
+	case GDB_WATCHPOINT_TYPE_WRITE:
+	case GDB_WATCHPOINT_TYPE_READ:
+	case GDB_WATCHPOINT_TYPE_ACCESS:
+		update_watchpoint(gva, type, kind, insert);
+		break;
 	default:
 		send_empty_response();
 		break;
@@ -1890,6 +2561,8 @@
 		stopped_vcpu = 0;
 	}
 
+	memset(&watch_stats, 0, sizeof(watch_stats));
+
 	flags = fcntl(s, F_GETFL);
 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
 		err(1, "Failed to mark gdb socket non-blocking");