diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -485,6 +485,9 @@ VM_CAP_BPT_EXIT, VM_CAP_RDPID, VM_CAP_RDTSCP, + VM_CAP_DB_EXIT, + VM_CAP_RFLAGS_SSTEP, + VM_CAP_DR_MOV_EXIT, VM_CAP_MAX }; @@ -632,6 +635,7 @@ VM_EXITCODE_DEBUG, VM_EXITCODE_VMINSN, VM_EXITCODE_BPT, + VM_EXITCODE_DB, VM_EXITCODE_MAX }; @@ -721,6 +725,15 @@ struct { int inst_length; } bpt; + struct { + int trace_trap; + int drx_access; + int gpr; + int watchpoints; /* bitmask */ + int pushf_intercept; + int tf_shadow_val; + struct vm_guest_paging paging; + } dbg; struct { uint32_t code; /* ecx value */ uint64_t wval; diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c --- a/sys/amd64/vmm/amd/svm.c +++ b/sys/amd64/vmm/amd/svm.c @@ -134,6 +134,7 @@ static int svm_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc); static int svm_setreg(void *arg, int vcpu, int ident, uint64_t val); +static int svm_getreg(void *arg, int vcpu, int ident, uint64_t *val); static __inline int flush_by_asid(void) @@ -1319,6 +1320,99 @@ } } +static __inline int +mov_dr_gpr_num_to_reg(int gpr) +{ + switch (gpr) { + case 0 ... 3: + return VM_REG_GUEST_RAX + gpr; + case 4: + return VM_REG_GUEST_RDI; + case 5: + return VM_REG_GUEST_RSI; + case 6: + return VM_REG_GUEST_RBP; + case 7: + return VM_REG_GUEST_RSP; + case 8 ... 15: + return VM_REG_GUEST_R8 + (gpr - 8); + default: + break; + }; + + return -1; +} + +static int +emulate_mov_dr(struct svm_softc *svm_sc, struct vm_exit *vmexit, int vcpu, + uint64_t code, uint64_t info1) +{ + int write, error; + int src, dst; + int dbreg_num, dbreg; + int gpr = mov_dr_gpr_num_to_reg(VMCB_DR_INTCTP_GPR_NUM(info1)); + uint64_t new_dst_val; + + KASSERT(gpr >= 0, ("%s: invalid GPR num %d\r\n", __func__, gpr)); + + if (code >= 0x20 && code <= 0x27) { + dbreg_num = code - 0x20; + write = 0; + } else if (code >= 0x30 && code <= 0x37) { + dbreg_num = code - 0x30; + write = 1; + } else { + // should not happen + return -1; + } + + /* + * Bounce exit to userland - allow the + * gdb stub to adjust its watchpoint metadata + */ + vmexit->exitcode = VM_EXITCODE_DB; + vmexit->u.dbg.trace_trap = 0; + vmexit->u.dbg.pushf_intercept = 0; + vmexit->u.dbg.drx_access = dbreg_num; + vmexit->u.dbg.gpr = -1; + + /* + * Emulate MOV DR. + * No checks are needed since all other + * exceptions take precedence over the intercept. + * (AMD APM v2, page 498) + */ + if (dbreg_num == 7) { + dbreg = VM_REG_GUEST_DR7; + } else { + dbreg = VM_REG_GUEST_DR0 + dbreg_num; + } + + if (write) { + src = gpr; + dst = dbreg; + } else { + vmexit->u.dbg.gpr = gpr; + + src = dbreg; + dst = gpr; + } + + error = svm_getreg(svm_sc, vcpu, src, &new_dst_val); + KASSERT(error == 0, + ("%s: error %d fetching reg %d\r\n", __func__, error, src)); + + if (write && dbreg_num == 7) { + vmexit->u.dbg.watchpoints = (int)new_dst_val; + } + + error = svm_setreg(svm_sc, vcpu, dst, new_dst_val); + KASSERT(error == 0, + ("%s: error %d updating reg %d\r\n", __func__, error, dst)); + + return error; +} + static int svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) { @@ -1387,9 +1481,19 @@ case VMCB_EXIT_NMI: /* external NMI */ handled = 1; break; + case 0x20 ... 0x23: /* DR{0-3,7} read */ + case 0x27: + case 0x30 ... 0x33: /* DR{0-3,7} write */ + case 0x37: + error = emulate_mov_dr(svm_sc, vmexit, vcpu, code, info1); + KASSERT(error == 0, + ("%s: error %d emulating MOV DR", __func__, error)); + handled = 0; + break; case 0x40 ... 0x5F: vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXCEPTION, 1); reflect = 1; + handled = 1; idtvec = code - 0x40; switch (idtvec) { case IDT_MC: @@ -1400,6 +1504,7 @@ reflect = 0; VCPU_CTR0(svm_sc->vm, vcpu, "Vectoring to MCE handler"); __asm __volatile("int $18"); + handled = 1; break; case IDT_PF: error = svm_setreg(svm_sc, vcpu, VM_REG_GUEST_CR2, @@ -1420,7 +1525,105 @@ info1 = 0; break; + case IDT_DB: { + /* + * Check if we are being stepped (RFLAGS.TF) + * or if a gdb-related watchpoint has been triggered + * and bounce vmexit to userland. + */ + + struct svm_vcpu *s_vcpu = svm_get_vcpu(svm_sc, vcpu); + uint64_t dr6 = 0; + bool stepped = 0; + uint64_t watch_mask = 0; + + errcode_valid = 0; + info1 = 0; + + vmcb_read(svm_sc, vcpu, VM_REG_GUEST_DR6, &dr6); + stepped = !!(dr6 & DBREG_DR6_BS); + watch_mask = (dr6 & DBREG_DR6_BMASK); + + if (stepped && + (s_vcpu->caps & (1 << VM_CAP_RFLAGS_SSTEP))) { + vmexit->exitcode = VM_EXITCODE_DB; + vmexit->u.dbg.trace_trap = 1; + vmexit->u.dbg.pushf_intercept = 0; + vmexit->u.dbg.drx_access = -1; + vmexit->u.dbg.gpr = -1; + vmexit->u.dbg.watchpoints = 0; + + if (s_vcpu->db_info.popf_next) { + /* DB exit was caused by stepping over + * popf */ + uint64_t rflags; + + s_vcpu->db_info.popf_next = 0; + /* + * Update shadowed TF bit so the next + * setcap(..., RFLAGS_SSTEP, 0) restores + * the correct value + */ + vmcb_read(svm_sc, vcpu, + VM_REG_GUEST_RFLAGS, &rflags); + s_vcpu->db_info.shadow_rflags_tf = + rflags & PSL_T; + } else if (s_vcpu->db_info.pushf_next) { + /* DB exit was caused by stepping over + * pushf */ + + /* + * Adjusting the pushed rflags after a + * restarted pushf instruction must be + * handled outside of svm.c due to the + * critical_enter() lock being held. + */ + vmexit->u.dbg.pushf_intercept = 1; + vmexit->u.dbg.tf_shadow_val = + s_vcpu->db_info.shadow_rflags_tf; + svm_paging_info( + svm_get_vmcb(svm_sc, vcpu), + &vmexit->u.dbg.paging); + + s_vcpu->db_info.pushf_next = 0; + } + reflect = 0; + handled = 0; + } else if ((watch_mask != 0) && + (s_vcpu->caps & (1 << VM_CAP_DB_EXIT))) { + /* A hw watchpoint was triggered - bounce to + * userland */ + + vmexit->exitcode = VM_EXITCODE_DB; + vmexit->u.dbg.trace_trap = 0; + vmexit->u.dbg.pushf_intercept = 0; + vmexit->u.dbg.drx_access = -1; + vmexit->u.dbg.gpr = -1; + vmexit->u.dbg.watchpoints = (int)watch_mask; + + dr6 &= ~DBREG_DR6_BS; + error = vmcb_write( + svm_sc, vcpu, VM_REG_GUEST_DR6, dr6); + KASSERT(error == 0, + ("%s: error %d updating DR6\r\n", __func__, + error)); + + reflect = 0; + handled = 0; + } + break; + } case IDT_BP: + if (svm_get_intercept(svm_sc, vcpu, VMCB_EXC_INTCPT, + BIT(IDT_BP)) == 1) { + vmexit->exitcode = VM_EXITCODE_BPT; + vmexit->u.bpt.inst_length = vmexit->inst_length; + vmexit->inst_length = 0; + + reflect = 0; + handled = 0; + break; + } case IDT_OF: case IDT_BR: /* @@ -1442,11 +1645,13 @@ info1 = 0; break; } - KASSERT(vmexit->inst_length == 0, ("invalid inst_length (%d) " - "when reflecting exception %d into guest", - vmexit->inst_length, idtvec)); if (reflect) { + KASSERT(vmexit->inst_length == 0, + ("invalid inst_length (%d) " + "when reflecting exception %d into guest", + vmexit->inst_length, idtvec)); + /* Reflect the exception back into the guest */ VCPU_CTR2(svm_sc->vm, vcpu, "Reflecting exception " "%d/%#x into the guest", idtvec, (int)info1); @@ -1455,13 +1660,12 @@ KASSERT(error == 0, ("%s: vm_inject_exception error %d", __func__, error)); } - handled = 1; break; case VMCB_EXIT_MSR: /* MSR access. */ eax = state->rax; ecx = ctx->sctx_rcx; edx = ctx->sctx_rdx; - retu = false; + retu = false; if (info1) { vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1); @@ -1538,6 +1742,42 @@ case VMCB_EXIT_MWAIT: vmexit->exitcode = VM_EXITCODE_MWAIT; break; + case VMCB_EXIT_PUSHF: { + uint64_t rflags; + struct svm_vcpu *s_vcpu = svm_get_vcpu(svm_sc, vcpu); + svm_getreg(svm_sc, vcpu, VM_REG_GUEST_RFLAGS, &rflags); + /* Update shadow TF to guard against unrelated intercepts */ + s_vcpu->db_info.shadow_rflags_tf = rflags & PSL_T; + + /* Restart this instruction */ + vmexit->rip -= vmexit->inst_length; + /* Disable PUSHF intercepts - avoid a loop*/ + svm_set_intercept( + svm_sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_PUSHF, 0); + /* Trace restarted instruction */ + vmcb_write(svm_sc, vcpu, VM_REG_GUEST_RFLAGS, (rflags | PSL_T)); + + s_vcpu->db_info.pushf_next = 1; + handled = 1; + break; + } + case VMCB_EXIT_POPF: { + uint64_t rflags; + svm_getreg(svm_sc, vcpu, VM_REG_GUEST_RFLAGS, &rflags); + + /* Restart this instruction */ + vmexit->rip -= vmexit->inst_length; + /* Disable POPF intercepts - avoid a loop*/ + svm_set_intercept( + svm_sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_POPF, 0); + /* Trace restarted instruction */ + vmcb_write(svm_sc, vcpu, VM_REG_GUEST_RFLAGS, (rflags | PSL_T)); + + svm_get_vcpu(svm_sc, vcpu)->db_info.popf_next = 1; + + handled = 1; + break; + } case VMCB_EXIT_SHUTDOWN: case VMCB_EXIT_VMRUN: case VMCB_EXIT_VMMCALL: @@ -2325,6 +2565,114 @@ if (val == 0) error = EINVAL; break; + case VM_CAP_BPT_EXIT: + svm_set_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(IDT_BP), val); + break; + case VM_CAP_RFLAGS_SSTEP: { + uint64_t rflags; + int db_inctpt_val = val; + struct svm_vcpu *s_vcpu; + if (svm_getreg(sc, vcpu, VM_REG_GUEST_RFLAGS, &rflags)) { + error = (EINVAL); + break; + } + + s_vcpu = svm_get_vcpu(sc, vcpu); + + if (val) { + /* Save current TF bit */ + s_vcpu->db_info.shadow_rflags_tf = rflags & PSL_T; + + /* Trace next instruction */ + if (vmcb_write(sc, vcpu, VM_REG_GUEST_RFLAGS, + (rflags | PSL_T))) { + error = (EINVAL); + break; + } + + s_vcpu->caps |= (1 << VM_CAP_RFLAGS_SSTEP); + } else { + /* + * Restore shadowed RFLAGS.TF only if vCPU was being + * stepped + */ + if (s_vcpu->caps & (1 << VM_CAP_RFLAGS_SSTEP)) { + rflags |= s_vcpu->db_info.shadow_rflags_tf; + s_vcpu->db_info.shadow_rflags_tf = 0; + + if (vmcb_write(sc, vcpu, VM_REG_GUEST_RFLAGS, + rflags)) { + error = (EINVAL); + break; + } + s_vcpu->caps &= ~(1 << VM_CAP_RFLAGS_SSTEP); + } + /* Dont disable intercept if VM_CAP_DB_EXIT is active */ + db_inctpt_val = (s_vcpu->caps & (1 << VM_CAP_DB_EXIT)); + } + + svm_set_intercept( + sc, vcpu, VMCB_EXC_INTCPT, BIT(IDT_DB), db_inctpt_val); + svm_set_intercept( + sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_POPF, val); + svm_set_intercept( + sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_PUSHF, val); + + break; + } + case VM_CAP_DB_EXIT: { + struct svm_vcpu *s_vcpu = svm_get_vcpu(sc, vcpu); + if (val) { + /* Require decode assist support for now */ + if (!decode_assist()) { + error = (ENOTSUP); + break; + } + s_vcpu->caps |= (1 << VM_CAP_DB_EXIT); + } else { + s_vcpu->caps &= ~(1 << VM_CAP_DB_EXIT); + /* Dont disable intercept if VM_CAP_RFLAGS_SSTEP is + * active */ + val = (s_vcpu->caps & (1 << VM_CAP_RFLAGS_SSTEP)); + } + + svm_set_intercept(sc, vcpu, VMCB_EXC_INTCPT, BIT(IDT_DB), val); + + break; + } + case VM_CAP_DR_MOV_EXIT: { + struct svm_vcpu *s_vcpu = svm_get_vcpu(sc, vcpu); + if (val) { + s_vcpu->caps |= (1 << VM_CAP_DR_MOV_EXIT); + } else { + s_vcpu->caps &= ~(1 << VM_CAP_DR_MOV_EXIT); + } + /* Intercept DR0-3,7 writes */ + svm_set_intercept( + sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_WRITE(0), val); + svm_set_intercept( + sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_WRITE(1), val); + svm_set_intercept( + sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_WRITE(2), val); + svm_set_intercept( + sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_WRITE(3), val); + svm_set_intercept( + sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_WRITE(7), val); + + /* Intercept DR0-3,7 reads */ + svm_set_intercept( + sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_READ(0), val); + svm_set_intercept( + sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_READ(1), val); + svm_set_intercept( + sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_READ(2), val); + svm_set_intercept( + sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_READ(3), val); + svm_set_intercept( + sc, vcpu, VMCB_DR_INTCPT, VMCB_INTCPT_DR_READ(7), val); + + break; + } default: error = ENOENT; break; @@ -2353,6 +2701,22 @@ case VM_CAP_UNRESTRICTED_GUEST: *retval = 1; /* unrestricted guest is always enabled */ break; + case VM_CAP_DB_EXIT: + *retval = !!( + svm_get_vcpu(sc, vcpu)->caps & (1 << VM_CAP_DB_EXIT)); + break; + case VM_CAP_BPT_EXIT: + *retval = svm_get_intercept( + sc, vcpu, VMCB_EXC_INTCPT, BIT(IDT_BP)); + break; + case VM_CAP_RFLAGS_SSTEP: + *retval = !!( + svm_get_vcpu(sc, vcpu)->caps & (1 << VM_CAP_RFLAGS_SSTEP)); + break; + case VM_CAP_DR_MOV_EXIT: + *retval = !!( + svm_get_vcpu(sc, vcpu)->caps & (1 << VM_CAP_DR_MOV_EXIT)); + break; default: error = ENOENT; break; diff --git a/sys/amd64/vmm/amd/svm_softc.h b/sys/amd64/vmm/amd/svm_softc.h --- a/sys/amd64/vmm/amd/svm_softc.h +++ b/sys/amd64/vmm/amd/svm_softc.h @@ -41,6 +41,13 @@ uint32_t num; /* range is [1, nasid - 1] */ }; +struct svm_vcpu_debug_info { + bool popf_next; /* flag for handling single-stepping over popf */ + bool pushf_next; + int shadow_rflags_tf; /* shadowed tf bit value; used for + single-stepping */ +}; + /* * XXX separate out 'struct vmcb' from 'svm_vcpu' to avoid wasting space * due to VMCB alignment requirements. @@ -50,10 +57,12 @@ struct svm_regctx swctx; /* software saved vcpu context */ uint64_t vmcb_pa; /* VMCB physical address */ uint64_t nextrip; /* next instruction to be executed by guest */ - int lastcpu; /* host cpu that the vcpu last ran on */ + int lastcpu; /* host cpu that the vcpu last ran on */ uint32_t dirty; /* state cache bits that must be cleared */ long eptgen; /* pmap->pm_eptgen when the vcpu last ran */ struct asid asid; + int caps; /* optional vm capabilities */ + struct svm_vcpu_debug_info db_info; } __aligned(PAGE_SIZE); /* diff --git a/sys/amd64/vmm/amd/vmcb.h b/sys/amd64/vmm/amd/vmcb.h --- a/sys/amd64/vmm/amd/vmcb.h +++ b/sys/amd64/vmm/amd/vmcb.h @@ -45,6 +45,10 @@ #define VMCB_CTRL1_INTCPT 3 #define VMCB_CTRL2_INTCPT 4 +/* DR intercept helper macros */ +#define VMCB_INTCPT_DR_READ(n) (BIT((n))) +#define VMCB_INTCPT_DR_WRITE(n) (BIT(((n) + 16))) + /* intercept[VMCB_CTRL1_INTCPT] fields */ #define VMCB_INTCPT_INTR BIT(0) #define VMCB_INTCPT_NMI BIT(1) @@ -154,6 +158,12 @@ #define VMCB_EXIT_NPF 0x400 #define VMCB_EXIT_INVALID -1 +/* + * Helper macros to decode MOV DRx EXITINFO1. + * Section 15.8.1, MOV CRx/DRx Intercepts. + */ +#define VMCB_DR_INTCTP_GPR_NUM(x) ((x)&0xF) + /* * Nested page fault. * Bit definitions to decode EXITINFO1. diff --git a/sys/amd64/vmm/intel/vmcs.h b/sys/amd64/vmm/intel/vmcs.h --- a/sys/amd64/vmm/intel/vmcs.h +++ b/sys/amd64/vmm/intel/vmcs.h @@ -104,6 +104,9 @@ #define vmcs_gla() vmcs_read(VMCS_GUEST_LINEAR_ADDRESS) #define vmcs_idt_vectoring_info() vmcs_read(VMCS_IDT_VECTORING_INFO) #define vmcs_idt_vectoring_err() vmcs_read(VMCS_IDT_VECTORING_ERROR) +/* XXX: mask? */ +#define vmcs_pending_dbg_exceptions() \ + vmcs_read(VMCS_GUEST_PENDING_DBG_EXCEPTIONS) #endif /* _KERNEL */ @@ -393,6 +396,24 @@ #define VMCS_INTERRUPTIBILITY_SMI_BLOCKING (1 << 2) #define VMCS_INTERRUPTIBILITY_NMI_BLOCKING (1 << 3) +/* + * Exit qualification for debug exception + */ +#define EXIT_QUAL_DBG_B0 (1U << 0) +#define EXIT_QUAL_DBG_B1 (1U << 1) +#define EXIT_QUAL_DBG_B2 (1U << 2) +#define EXIT_QUAL_DBG_B3 (1U << 3) +#define EXIT_QUAL_DBG_B_MASK (0xf) +#define EXIT_QUAL_DBG_BD (1U << 13) +#define EXIT_QUAL_DBG_BS (1U << 14) + +/* + * Exit qualification for MOV DR + */ +#define EXIT_QUAL_MOV_DR_REG(n) ((n)&0x7) +#define EXIT_QUAL_MOV_DR_RW(n) (!!((n)&0x10)) +#define EXIT_QUAL_MOV_DR_GPR(n) (((n)&0xf00) >> 8) + /* * Exit qualification for EXIT_REASON_INVAL_VMCS */ diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -306,6 +306,7 @@ static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc); static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval); +static int vmx_setreg(void *arg, int vcpu, int reg, uint64_t val); static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val); static void vmx_inject_pir(struct vlapic *vlapic); #ifdef BHYVE_SNAPSHOT @@ -2324,6 +2325,117 @@ return (error); } +static __inline int +mov_dr_gpr_num_to_reg(int gpr) +{ + switch (gpr) { + case 0: + return VM_REG_GUEST_RAX; + case 1: + return VM_REG_GUEST_RCX; + case 2: + return VM_REG_GUEST_RDX; + case 3: + return VM_REG_GUEST_RBX; + case 4: + return VM_REG_GUEST_RSP; + case 5: + return VM_REG_GUEST_RBP; + case 6: + return VM_REG_GUEST_RSI; + case 7: + return VM_REG_GUEST_RDI; + case 8 ... 15: + return VM_REG_GUEST_R8 + (gpr - 8); + default: + break; + }; + + return -1; +} + +/* + * Emulates MOV DR according to Intel SDM Vol. 2B 4-43. + */ +static int +emulate_mov_dr(struct vmx *vmx, struct vm_exit *vmexit, int vcpu, uint64_t qual) +{ + int error; + int cpl, src, dst; + int dbreg; + uint64_t regval; + + int dbreg_num = EXIT_QUAL_MOV_DR_REG(qual); + int gpr = mov_dr_gpr_num_to_reg(EXIT_QUAL_MOV_DR_GPR(qual)); + int write = (EXIT_QUAL_MOV_DR_RW(qual) == 0); + + cpl = vmx_cpl(); + + if (cpl != 0) { + vm_inject_gp(vmx->vm, vcpu); + return 1; + } + + error = vmx_getreg(vmx, vcpu, VM_REG_GUEST_CR4, ®val); + KASSERT( + error == 0, ("%s: error %d fetching GPR %d", __func__, error, gpr)); + + if ((regval & CR4_DE) && (dbreg_num == 4 || dbreg_num == 5)) { + vm_inject_ud(vmx->vm, vcpu); + return 1; + } + + switch (dbreg_num) { + /* TODO: figure out how to handle DR{4,5} */ + case 0 ... 3: + dbreg = VM_REG_GUEST_DR0 + dbreg_num; + break; + case 6: + dbreg = VM_REG_GUEST_DR6; + break; + case 7: + dbreg = VM_REG_GUEST_DR7; + break; + default: + return -1; + break; + } + + /* + * Bounce exit to userland - allow the + * gdb stub to adjust its watchpoint metadata + */ + vmexit->exitcode = VM_EXITCODE_DB; + vmexit->u.dbg.trace_trap = 0; + vmexit->u.dbg.pushf_intercept = 0; + vmexit->u.dbg.drx_access = dbreg_num; + vmexit->u.dbg.gpr = -1; + + if (write) { + dst = dbreg; + src = gpr; + } else { + dst = gpr; + src = dbreg; + + vmexit->u.dbg.gpr = gpr; + } + + error = vmx_getreg(vmx, vcpu, src, ®val); + KASSERT(error == 0, + ("%s: error %d fetching register %d", __func__, error, src)); + + if (write && dbreg_num == 7) { + vmexit->u.dbg.watchpoints = (int)(regval); + } + + error = vmx_setreg(vmx, vcpu, dst, regval); + KASSERT(error == 0, + ("%s: error %d updating register %d", __func__, error, dst)); + + return error; +} + static int vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) { @@ -2472,6 +2584,20 @@ break; } break; + + case EXIT_REASON_DR_ACCESS: + handled = 0; + + error = emulate_mov_dr(vmx, vmexit, vcpu, qual); + KASSERT( + error >= 0, ("%s: emulate_mov_dr returned -1", __func__)); + + if (error == 1) { + /* Fault was injected into guest */ + vmexit->exitcode = VM_EXITCODE_BOGUS; + handled = 1; + } + break; case EXIT_REASON_RDMSR: vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RDMSR, 1); retu = false; @@ -2658,6 +2784,78 @@ vmexit->inst_length = 0; break; } + if (intr_type == VMCS_INTR_T_HWEXCEPTION && + intr_vec == IDT_DB && + (vmx->cap[vcpu].set & (1 << VM_CAP_DB_EXIT))) { + + int reflect = 0; + /* + * A debug exception VMEXIT does not update the DR{6,7} + * registers (SDM Vol. 3C 27-1). It is therefore + * necessary to emulate these writes here. + * + * We reflect everything except watchpoint hits. Since + * it is up to the userland to reinject a debug + * exception when a guest watchpoint is hit, the + * register must be updated here so that the guest may + * properly register the watchpoint hit. + */ + int trace_trap = !!(qual & EXIT_QUAL_DBG_BS); + int debug_detect = !!(qual & EXIT_QUAL_DBG_BD); + int watch_mask = qual & EXIT_QUAL_DBG_B_MASK; + + uint64_t dr6; + error = vmx_getreg(vmx, vcpu, VM_REG_GUEST_DR6, &dr6); + KASSERT(error == 0, + ("%s: error %d fetching DR6", __func__, error)); + + uint64_t regval; + error = vmx_getreg( + vmx, vcpu, VM_REG_GUEST_RFLAGS, ®val); + KASSERT(error == 0, + ("%s: error %d fetching DR6", __func__, error)); + + dr6 &= DBREG_DR6_RESERVED1; + /* + * Clear the RTM flag (0 indicates a hit, + * Intel SDM Vol. 3B 17-3 ). + */ + dr6 |= (1 << 16); + + if (watch_mask) { + vmexit->exitcode = VM_EXITCODE_DB; + vmexit->u.dbg.pushf_intercept = 0; + vmexit->u.dbg.trace_trap = 0; + vmexit->u.dbg.drx_access = -1; + vmexit->u.dbg.watchpoints = watch_mask; + vmexit->u.dbg.drx_access = -1; + vmexit->u.dbg.watchpoints = watch_mask; + + dr6 |= watch_mask; + + /* Bounce to userland */ + reflect = 0; + } else { + dr6 |= debug_detect ? DBREG_DR6_BD : 0; + dr6 |= (trace_trap) ? DBREG_DR6_BS : 0; + regval &= ~(PSL_T); + + /* Reflect back into guest */ + reflect = 1; + } + error = vmx_setreg(vmx, vcpu, VM_REG_GUEST_DR6, dr6); + KASSERT(error == 0, + ("%s: error %d updating DR6", __func__, error)); + + error = vmx_setreg( + vmx, vcpu, VM_REG_GUEST_RFLAGS, regval); + KASSERT(error == 0, + ("%s: error %d fetching DR6", __func__, error)); + + if (!reflect) { + break; + } + } if (intr_vec == IDT_PF) { error = vmxctx_setreg(vmxctx, VM_REG_GUEST_CR2, qual); @@ -3488,6 +3686,8 @@ ret = 0; break; case VM_CAP_BPT_EXIT: + case VM_CAP_DB_EXIT: + case VM_CAP_DR_MOV_EXIT: ret = 0; break; default: @@ -3583,6 +3783,25 @@ reg = VMCS_EXCEPTION_BITMAP; } break; + case VM_CAP_DB_EXIT: + retval = 0; + + /* Don't change the bitmap if we are tracing all exceptions. */ + if (vmx->cap[vcpu].exc_bitmap != 0xffffffff) { + pptr = &vmx->cap[vcpu].exc_bitmap; + baseval = *pptr; + flag = (1 << IDT_DB); + reg = VMCS_EXCEPTION_BITMAP; + } + break; + case VM_CAP_DR_MOV_EXIT: + retval = 0; + + pptr = &vmx->cap[vcpu].proc_ctls; + baseval = *pptr; + flag = PROCBASED_MOV_DR_EXITING; + reg = VMCS_PRI_PROC_BASED_CTLS; + break; default: break; } diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -1527,8 +1527,10 @@ */ vme->inst_length = vie->num_processed; vcpu->nextrip += vie->num_processed; - VCPU_CTR1(vm, vcpuid, "nextrip updated to %#lx after instruction " - "decoding", vcpu->nextrip); + VCPU_CTR1(vm, vcpuid, + "nextrip updated to %#lx after instruction " + "decoding", + vcpu->nextrip); /* return to userland unless this is an in-kernel emulated device */ if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { @@ -1623,6 +1625,44 @@ return (0); } +static int +vm_handle_db(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu) +{ + int error, fault; + uint64_t rsp; + uint64_t rflags; + struct vm_copyinfo copyinfo; + + *retu = true; + if (!vme->u.dbg.pushf_intercept) { + return 0; + } + printf("%s: writing back rflags after pushf\r\n", __func__); + + vm_get_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp); + + error = vm_copy_setup(vm, vcpuid, &vme->u.dbg.paging, rsp, + sizeof(uint64_t), VM_PROT_WRITE, ©info, 1, &fault); + if (error || fault) { + *retu = false; + return (EINVAL); + } + + /* Read pushed rflags value */ + vm_copyin(vm, vcpuid, ©info, &rflags, sizeof(uint64_t)); + printf("%s: rflags: 0x%8lx\r\n", __func__, rflags); + /* Set TF bit to shadowed value*/ + rflags &= ~(PSL_T); + rflags |= vme->u.dbg.tf_shadow_val; + printf("%s: updated rflags: 0x%8lx\r\n", __func__, rflags); + /* Write updated value back to memory*/ + vm_copyout(vm, vcpuid, &rflags, ©info, sizeof(uint64_t)); + + vm_copy_teardown(vm, vcpuid, ©info, 1); + + return (0); +} + int vm_suspend(struct vm *vm, enum vm_suspend_how how) { @@ -1797,6 +1837,9 @@ case VM_EXITCODE_INOUT_STR: error = vm_handle_inout(vm, vcpuid, vme, &retu); break; + case VM_EXITCODE_DB: + error = vm_handle_db(vm, vcpuid, vme, &retu); + break; case VM_EXITCODE_MONITOR: case VM_EXITCODE_MWAIT: case VM_EXITCODE_VMINSN: diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -927,6 +927,20 @@ return (VMEXIT_CONTINUE); } +static int +vmexit_db(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + +#ifdef BHYVE_SNAPSHOT + checkpoint_cpu_suspend(*pvcpu); +#endif + gdb_cpu_debug(*pvcpu, vmexit); +#ifdef BHYVE_SNAPSHOT + checkpoint_cpu_resume(*pvcpu); +#endif + return (VMEXIT_CONTINUE); +} + static int vmexit_breakpoint(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { @@ -951,6 +965,7 @@ [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, [VM_EXITCODE_DEBUG] = vmexit_debug, [VM_EXITCODE_BPT] = vmexit_breakpoint, + [VM_EXITCODE_DB] = vmexit_db, }; static void diff --git a/usr.sbin/bhyve/gdb.h b/usr.sbin/bhyve/gdb.h --- a/usr.sbin/bhyve/gdb.h +++ b/usr.sbin/bhyve/gdb.h @@ -34,6 +34,7 @@ void gdb_cpu_breakpoint(int vcpu, struct vm_exit *vmexit); void gdb_cpu_mtrap(int vcpu); void gdb_cpu_suspend(int vcpu); +void gdb_cpu_debug(int vcpu, struct vm_exit *vmexit); void init_gdb(struct vmctx *ctx); #endif /* !__GDB_H__ */ diff --git a/usr.sbin/bhyve/gdb.c b/usr.sbin/bhyve/gdb.c --- a/usr.sbin/bhyve/gdb.c +++ b/usr.sbin/bhyve/gdb.c @@ -37,10 +37,14 @@ #include #include #include + #include +#include #include #include + #include + #include #ifndef WITHOUT_CAPSICUM #include @@ -69,7 +73,25 @@ * GDB_SIGNAL_* numbers are part of the GDB remote protocol. Most stops * use SIGTRAP. */ -#define GDB_SIGNAL_TRAP 5 +#define GDB_SIGNAL_TRAP 5 + +#define GDB_SOFTWARE_BPT 0 +#define GDB_WATCHPOINT_TYPE_WRITE 2 +#define GDB_WATCHPOINT_TYPE_READ 3 +#define GDB_WATCHPOINT_TYPE_ACCESS 4 + +#define GDB_WATCHPOINT_MAX 4 +#define GDB_WATCHPOINT_MASK ((1 << GDB_WATCHPOINT_MAX) - 1) +#define GDB_WATCHPOINT_CLEAR_NOSKIP -1 + +#define GDB_WATCHPOINT_INIT() \ + watch_stats.avail_dbregs = (-1 & GDB_WATCHPOINT_MASK) +#define GDB_FIND_WATCHPOINT() (__builtin_ffs(watch_stats.avail_dbregs) - 1) +#define GDB_HAS_AVAIL_WATCHPOINT() (watch_stats.avail_dbregs != 0) +#define GDB_ALLOC_WATCHPOINT(num) \ + watch_stats.avail_dbregs &= ~(1 << (num & GDB_WATCHPOINT_MASK)) +#define GDB_FREE_WATCHPOINT(num) \ + watch_stats.avail_dbregs |= (1 << (num & GDB_WATCHPOINT_MASK)) static void gdb_resume_vcpus(void); static void check_command(int fd); @@ -101,6 +123,22 @@ TAILQ_ENTRY(breakpoint) link; }; +struct watchpoint_stats { + int no_active; + int no_evicted; + int avail_dbregs; /* Tracks DR regs used by the guest */ + + struct watchpoint { + enum watchpoint_state { + WATCH_INACTIVE = 0, + WATCH_ACTIVE, + WATCH_EVICTED, + } state; + uint64_t gva; + int type; + int bytes; + } watchpoints[GDB_WATCHPOINT_MAX]; +}; /* * When a vCPU stops to due to an event that should be reported to the * debugger, information about the event is stored in this structure. @@ -119,11 +157,16 @@ * * When a vCPU hits a breakpoint set by the debug server, * 'hit_swbreak' is set to true. + * + * When a vCPU hits a watchpoint set by the debug server, + * 'hit_watch' is set to point to the corresponding watchpoint. */ struct vcpu_state { bool stepping; bool stepped; bool hit_swbreak; + + struct watchpoint *hit_watch; }; static struct io_buffer cur_comm, cur_resp; @@ -131,6 +174,7 @@ static struct vmctx *ctx; static int cur_fd = -1; static TAILQ_HEAD(, breakpoint) breakpoints; +static struct watchpoint_stats watch_stats; static struct vcpu_state *vcpu_state; static int cur_vcpu, stopped_vcpu; static bool gdb_active = false; @@ -221,6 +265,7 @@ #endif static void remove_all_sw_breakpoints(void); +static void remove_all_hw_watchpoints(void); static int guest_paging_info(int vcpu, struct vm_guest_paging *paging) @@ -391,6 +436,7 @@ io_buffer_reset(&cur_resp); cur_fd = -1; + remove_all_hw_watchpoints(); remove_all_sw_breakpoints(); /* Clear any pending events. */ @@ -401,6 +447,22 @@ pthread_mutex_unlock(&gdb_lock); } +static const char * +gdb_watch_type_str(struct watchpoint *wp) +{ + switch (wp->type) { + case GDB_WATCHPOINT_TYPE_ACCESS: + return "awatch"; + case GDB_WATCHPOINT_TYPE_READ: + return "rwatch"; + case GDB_WATCHPOINT_TYPE_WRITE: + return "watch"; + default: + // TODO: assert? + return ""; + } +} + static uint8_t hex_digit(uint8_t nibble) { @@ -683,10 +745,18 @@ debug("$vCPU %d reporting swbreak\n", stopped_vcpu); if (swbreak_enabled) append_string("swbreak:;"); - } else if (vs->stepped) + } else if (vs->stepped) { debug("$vCPU %d reporting step\n", stopped_vcpu); - else + } else if (vs->hit_watch) { + debug("$vCPU %d reporting watchpoint\n", stopped_vcpu); + append_string(gdb_watch_type_str(vs->hit_watch)); + append_char(':'); + append_unsigned_be( + vs->hit_watch->gva, sizeof(vs->hit_watch->gva)); + append_char(';'); + } else { debug("$vCPU %d reporting ???\n", stopped_vcpu); + } } finish_packet(); report_next_stop = false; @@ -704,6 +774,7 @@ if (stopped_vcpu != -1) { vs = &vcpu_state[stopped_vcpu]; vs->hit_swbreak = false; + vs->hit_watch = NULL; vs->stepped = false; stopped_vcpu = -1; } @@ -743,6 +814,105 @@ debug("$vCPU %d resuming\n", vcpu); } +static void +gdb_suspend_vcpus(void) +{ + + assert(pthread_mutex_isowned_np(&gdb_lock)); + debug("suspending all CPUs\n"); + vcpus_suspended = vcpus_active; + vm_suspend_cpu(ctx, -1); + if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0) + gdb_finish_suspend_vcpus(); +} + +/* + * Requests vCPU single-stepping using a + * VMEXIT suitable for the host platform. + */ +static int +_gdb_set_step(int vcpu, int val) +{ + /* If the MTRAP cap fails, we are running on an AMD host. + * In that case, we request DB exits caused by RFLAGS.TF + * stepping. + */ + int error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, val); + if (error) { + error = vm_set_capability(ctx, vcpu, VM_CAP_RFLAGS_SSTEP, val); + } + + return error; +} + +static int +_gdb_check_step(int vcpu) +{ + int error, val; + + error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val); + if (error < 0) { + /* Check whether AMD rflags.tf stepping is supported */ + if (vm_get_capability(ctx, vcpu, VM_CAP_RFLAGS_SSTEP, &val) < 0) + return -1; + } + + return 0; +} +/* + * Invoked by vCPU before resuming execution. This enables stepping + * if the vCPU is marked as stepping. + */ +static void +gdb_cpu_resume(int vcpu) +{ + struct vcpu_state *vs; + int error; + + vs = &vcpu_state[vcpu]; + + /* + * Any pending event should already be reported before + * resuming. + */ + assert(vs->hit_swbreak == false); + assert(vs->hit_watch == NULL); + assert(vs->stepped == false); + if (vs->stepping) { + error = _gdb_set_step(vcpu, 1); + assert(error == 0); + } +} + +/* + * Invoked each time a vmexit handler needs to step a vCPU. + */ +static void +_gdb_cpu_step(int vcpu) +{ + struct vcpu_state *vs; + + debug("$vCPU %d stepped\n", vcpu); + pthread_mutex_lock(&gdb_lock); + vs = &vcpu_state[vcpu]; + if (vs->stepping) { + vs->stepping = false; + vs->stepped = true; + _gdb_set_step(vcpu, 0); + + while (vs->stepped) { + if (stopped_vcpu == -1) { + debug("$vCPU %d reporting step\n", vcpu); + stopped_vcpu = vcpu; + gdb_suspend_vcpus(); + } + _gdb_cpu_suspend(vcpu, true); + } + gdb_cpu_resume(vcpu); + } + pthread_mutex_unlock(&gdb_lock); +} + /* * Invoked at the start of a vCPU thread's execution to inform the * debug server about the new thread. @@ -774,96 +944,501 @@ pthread_mutex_unlock(&gdb_lock); } +static bool +set_dbexit_caps(bool enable) +{ + cpuset_t mask; + int vcpu; + + mask = vcpus_active; + while (!CPU_EMPTY(&mask)) { + vcpu = CPU_FFS(&mask) - 1; + CPU_CLR(vcpu, &mask); + if (vm_set_capability( + ctx, vcpu, VM_CAP_DB_EXIT, enable ? 1 : 0) < 0) + return (false); + debug("$vCPU %d %sabled debug exits\n", vcpu, + enable ? "en" : "dis"); + } + return (true); +} + +static bool +set_dbreg_exit_caps(bool enable) +{ + cpuset_t mask; + int vcpu; + + mask = vcpus_active; + while (!CPU_EMPTY(&mask)) { + vcpu = CPU_FFS(&mask) - 1; + CPU_CLR(vcpu, &mask); + if (vm_set_capability( + ctx, vcpu, VM_CAP_DR_MOV_EXIT, enable ? 1 : 0) < 0) + return (false); + debug("$vCPU %d %sabled debug register access exits\n", vcpu, + enable ? "en" : "dis"); + } + return (true); +} + /* - * Invoked by vCPU before resuming execution. This enables stepping - * if the vCPU is marked as stepping. + * A helper routine for setting watchpoints. + * Each watchpoint is "global" and is placed into the corresponding DR* + * registers on all vCPUs. */ -static void -gdb_cpu_resume(int vcpu) + +static int +set_watchpoint(uint64_t gva, int type, int bytes, int watchnum) { - struct vcpu_state *vs; - int error; + int access, len; + struct watchpoint *wp; - vs = &vcpu_state[vcpu]; + cpuset_t mask; + int vcpu; + uint64_t dr7; + int dbreg = VM_REG_GUEST_DR0 + watchnum; + + switch (type) { + case GDB_WATCHPOINT_TYPE_WRITE: + access = DBREG_DR7_WRONLY; + break; + case GDB_WATCHPOINT_TYPE_ACCESS: + case GDB_WATCHPOINT_TYPE_READ: + access = DBREG_DR7_RDWR; + break; + default: + return (EINVAL); + } + + switch (bytes) { + case 1: + len = DBREG_DR7_LEN_1; + break; + case 2: + len = DBREG_DR7_LEN_2; + break; + case 4: + len = DBREG_DR7_LEN_4; + break; + case 8: + len = DBREG_DR7_LEN_8; + break; + default: + return (EINVAL); + } + + mask = vcpus_active; + while (!CPU_EMPTY(&mask)) { + + vcpu = CPU_FFS(&mask) - 1; + CPU_CLR(vcpu, &mask); + + /* Write gva to debug reg */ + vm_set_register(ctx, vcpu, dbreg, gva); + /* Enable watchpoint in DR7 */ + vm_get_register(ctx, vcpu, VM_REG_GUEST_DR7, &dr7); + dr7 &= ~DBREG_DR7_MASK(watchnum); + dr7 |= DBREG_DR7_SET( + watchnum, len, access, DBREG_DR7_GLOBAL_ENABLE); + vm_set_register(ctx, vcpu, VM_REG_GUEST_DR7, dr7); + } + wp = &watch_stats.watchpoints[watchnum]; /* - * Any pending event should already be reported before - * resuming. + * An already active watchpoint can be passed - don't + * increment overall active watchpoints. */ - assert(vs->hit_swbreak == false); - assert(vs->stepped == false); - if (vs->stepping) { - error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1); - assert(error == 0); + if (wp->state != WATCH_ACTIVE) { + watch_stats.no_active++; } + wp->state = WATCH_ACTIVE; + wp->gva = gva; + wp->type = type; + wp->bytes = bytes; + + GDB_ALLOC_WATCHPOINT(watchnum); + + return 0; } /* - * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest - * has been suspended due to an event on different vCPU or in response - * to a guest-wide suspend such as Ctrl-C or the stop on attach. + * Clears watchpoint metadata and disables it on all guest vCPUs. + * + * The 'skip_vcpu' arg may be passed to prevent this routine from modifying the + * DR7 register on a specific vCPU (used when handling VMEXITS caused by DR7 + * write to avoid thrashing the new value). + * + * The 'clear_dbreg' arg controls whether the underlying debug register is + * zeroed. */ -void -gdb_cpu_suspend(int vcpu) +static int +clear_watchpoint(int watchnum, int skip_vcpu, bool clear_dbreg) { + cpuset_t mask; + int vcpu; + uint64_t dr7; - if (!gdb_active) - return; - pthread_mutex_lock(&gdb_lock); - _gdb_cpu_suspend(vcpu, true); - gdb_cpu_resume(vcpu); - pthread_mutex_unlock(&gdb_lock); + mask = vcpus_active; + while (!CPU_EMPTY(&mask)) { + + vcpu = CPU_FFS(&mask) - 1; + CPU_CLR(vcpu, &mask); + if (clear_dbreg) { + vm_set_register( + ctx, vcpu, VM_REG_GUEST_DR0 + watchnum, 0); + } + if (vcpu == skip_vcpu) { + continue; + } + + /* Disable watchpoint in DR7 */ + vm_get_register(ctx, vcpu, VM_REG_GUEST_DR7, &dr7); + dr7 &= ~DBREG_DR7_MASK(watchnum); + vm_set_register(ctx, vcpu, VM_REG_GUEST_DR7, dr7); + } + + watch_stats.watchpoints[watchnum].state = WATCH_INACTIVE; + /* Refrain from clearing other fields - this avoids unnecessary copies + * if migrate_watchpoint is called afterward */ + watch_stats.no_active--; + + GDB_FREE_WATCHPOINT(watchnum); + + return 0; } -static void -gdb_suspend_vcpus(void) +static struct watchpoint * +find_watchpoint(uint64_t gla) { + struct watchpoint *wp; - assert(pthread_mutex_isowned_np(&gdb_lock)); - debug("suspending all CPUs\n"); - vcpus_suspended = vcpus_active; - vm_suspend_cpu(ctx, -1); - if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0) - gdb_finish_suspend_vcpus(); + for (int i = 0; i < GDB_WATCHPOINT_MAX; i++) { + wp = &watch_stats.watchpoints[i]; + if (wp->state == WATCH_ACTIVE && (wp->gva == gla)) { + return wp; + } + } + + return (NULL); } /* - * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via - * the VT-x-specific MTRAP exit. + * Tries to reactivate a previously evicted watchpoint. */ -void -gdb_cpu_mtrap(int vcpu) +static int +migrate_watchpoint(struct watchpoint *wp) +{ + int error; + + if (!GDB_HAS_AVAIL_WATCHPOINT()) { + return -1; + } + + if (watch_stats.no_active == 0 && watch_stats.no_evicted == 0) { + if (!set_dbexit_caps(true) || !set_dbreg_exit_caps(true)) { + return -1; + } + } + + int watchnum = GDB_FIND_WATCHPOINT(); + assert(watchnum >= 0); + + error = set_watchpoint(wp->gva, wp->type, wp->bytes, watchnum); + if (error == 0) { + watch_stats.no_evicted--; + /* check if the watchpoint was migrated to the same slot */ + if (wp->state != WATCH_ACTIVE) + wp->state = WATCH_INACTIVE; + } + return error; +} + +static void +init_watchpoint_metadata(void) +{ + cpuset_t mask; + int vcpu; + uint64_t dr7; + + GDB_WATCHPOINT_INIT(); + + mask = vcpus_active; + while (!CPU_EMPTY(&mask)) { + int vcpu_used_dbreg_mask = 0; + + vcpu = CPU_FFS(&mask) - 1; + CPU_CLR(vcpu, &mask); + + /* Construct bitmask of active dbregs */ + vm_get_register(ctx, vcpu, VM_REG_GUEST_DR7, &dr7); + vcpu_used_dbreg_mask = (DBREG_DR7_ENABLED(dr7, 0) | + (DBREG_DR7_ENABLED(dr7, 1) << 1) | + (DBREG_DR7_ENABLED(dr7, 2) << 2) | + (DBREG_DR7_ENABLED(dr7, 3) << 3)); + + /* Mark any currently enabled dbreg as + * unavailable */ + watch_stats.avail_dbregs &= ~vcpu_used_dbreg_mask; + } +} + +static void +rebuild_avail_watchpoints(void) { + init_watchpoint_metadata(); + + for (int i = 0; i < GDB_WATCHPOINT_MAX; i++) { + if (watch_stats.watchpoints[i].state == WATCH_ACTIVE) { + GDB_ALLOC_WATCHPOINT(i); + } + } +} + +static void +handle_watchpoint_hit(int vcpu, int watch_mask) +{ + int watchnum = __builtin_ffs(watch_mask) - 1; + int dbreg = VM_REG_GUEST_DR0 + watchnum; struct vcpu_state *vs; + struct watchpoint *watch; + + uint64_t gla; + uint64_t dr6; + + assert(watchnum >= 0); - if (!gdb_active) - return; - debug("$vCPU %d MTRAP\n", vcpu); pthread_mutex_lock(&gdb_lock); - vs = &vcpu_state[vcpu]; - if (vs->stepping) { - vs->stepping = false; - vs->stepped = true; - vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0); - while (vs->stepped) { + + if (!watch_stats.no_active) { + vm_inject_exception(ctx, vcpu, IDT_DB, 0, 0, 0); + pthread_mutex_unlock(&gdb_lock); + return; + } + + vm_get_register(ctx, vcpu, dbreg, &gla); + + watch = find_watchpoint(gla); + if (watch) { + vs = &vcpu_state[vcpu]; + + assert(vs->stepping == false); + assert(vs->stepped == false); + assert(vs->hit_swbreak == false); + assert(vs->hit_watch == false); + + vs->hit_watch = watch; + for (;;) { if (stopped_vcpu == -1) { - debug("$vCPU %d reporting step\n", vcpu); stopped_vcpu = vcpu; gdb_suspend_vcpus(); } _gdb_cpu_suspend(vcpu, true); + if (!vs->hit_watch) { + /* Watchpoint reported. */ + break; + } + if (watch->state == WATCH_INACTIVE) { + /* Watchpoint removed. */ + break; + } } + + vm_get_register(ctx, vcpu, VM_REG_GUEST_DR6, &dr6); + dr6 &= DBREG_DR6_RESERVED1; + vm_set_register(ctx, vcpu, VM_REG_GUEST_DR6, dr6); + gdb_cpu_resume(vcpu); + } else { + /* Reflect the DB exception back into the guest */ + vm_inject_exception(ctx, vcpu, IDT_DB, 0, 0, 0); + } + + pthread_mutex_unlock(&gdb_lock); +} + +static void +handle_drx_read(int vcpu, struct vm_exit *vmexit) +{ + struct watchpoint *wp; + int dbreg_num = vmexit->u.dbg.drx_access; + uint64_t gpr_val; + int gpr = vmexit->u.dbg.gpr; + + if (dbreg_num >= 4 && dbreg_num <= 6) { + return; + } + + pthread_mutex_lock(&gdb_lock); + wp = &watch_stats.watchpoints[dbreg_num]; + + if (dbreg_num == 7) { + vm_get_register(ctx, vcpu, gpr, &gpr_val); + + for (int i = 0; i < GDB_WATCHPOINT_MAX; i++) { + /* Clear newly read dr7 mask for gdbstub watchpoints */ + if (watch_stats.watchpoints[i].state == WATCH_ACTIVE) { + gpr_val &= ~DBREG_DR7_MASK(i); + } + } + + vm_set_register(ctx, vcpu, gpr, gpr_val); + } + /* If the guest attempts to read from a gdbstub-active dbreg, set the + * gpr register to 0 */ + if (wp->state == WATCH_ACTIVE) { + vm_set_register(ctx, vcpu, vmexit->u.dbg.gpr, 0); + } + + pthread_mutex_unlock(&gdb_lock); +} + +static void +handle_drx_write(int vcpu, struct vm_exit *vmexit) +{ + int error; + struct watchpoint *wp; + uint64_t dbreg_val; + int dbreg_num = vmexit->u.dbg.drx_access; + + if (dbreg_num >= 4 && dbreg_num <= 6) { + return; + } + + pthread_mutex_lock(&gdb_lock); + wp = &watch_stats.watchpoints[dbreg_num]; + + if (dbreg_num == 7) { + /* A new DR7 was loaded, update watchpoint metadata */ + int dr7 = vmexit->u.dbg.watchpoints; + + /* Clear any watchpoints the guest started using */ + for (int i = 0; i < GDB_WATCHPOINT_MAX; i++) { + wp = &watch_stats.watchpoints[i]; + bool dbreg_enabled = DBREG_DR7_ENABLED(dr7, i); + bool watchpoint_active = wp->state == WATCH_ACTIVE; + + if (dbreg_enabled && watchpoint_active) { + /* Evict active watchpoint */ + debug( + "%s: dr7 write: evicting active watchpoint %d\n", + __func__, i); + clear_watchpoint(i, vcpu, true); + wp->state = WATCH_EVICTED; + watch_stats.no_evicted++; + } else if (!dbreg_enabled && watchpoint_active) { + debug( + "%s: dr7 write: reactivating active watchpoint %d\n", + __func__, i); + set_watchpoint(wp->gva, wp->type, wp->bytes, i); + } + } + rebuild_avail_watchpoints(); + + } else if (wp->state == WATCH_ACTIVE) { + vm_get_register( + ctx, vcpu, VM_REG_GUEST_DR0 + dbreg_num, &dbreg_val); + /* Guest started using an occupied DB reg, + * remove watchpoint */ + if (dbreg_val != 0) { + + debug("%s: evicting active watchpoint %d\n", __func__, + dbreg_num); + clear_watchpoint( + dbreg_num, GDB_WATCHPOINT_CLEAR_NOSKIP, false); + wp->state = WATCH_EVICTED; + watch_stats.no_evicted++; + /* Mark watchpoint as in-use */ + GDB_ALLOC_WATCHPOINT(dbreg_num); + } else { + debug( + "%s: dr7 write: reactivating active watchpoint %d\n", + __func__, dbreg_num); + set_watchpoint(wp->gva, wp->type, wp->bytes, dbreg_num); + } + // TODO: figure out how to notify remote gdb if a + // watchpoint cannot be migrated + } else { + vm_get_register( + ctx, vcpu, VM_REG_GUEST_DR0 + dbreg_num, &dbreg_val); + if (dbreg_val != 0) { + /* Mark watchpoint as in-use */ + GDB_ALLOC_WATCHPOINT(dbreg_num); + } + } + /* Try to migrate any evicted watchpoints */ + for (int i = 0; i < GDB_WATCHPOINT_MAX; i++) { + if (watch_stats.watchpoints[i].state == WATCH_EVICTED) { + error = migrate_watchpoint(&watch_stats.watchpoints[i]); + debug("%s: %s migrating watchpoint %d\n", __func__, + (error != -1 ? "succeeded" : "failed"), i); + if (error) { + break; + } + } + } + + pthread_mutex_unlock(&gdb_lock); +}; + +/* + * A general handler for VM_EXITCODE_DB. + * Handles RFLAGS.TF exits on AMD hosts and HW watchpoints. + */ +void +gdb_cpu_debug(int vcpu, struct vm_exit *vmexit) +{ + if (!gdb_active) + return; + + /* RFLAGS.TF exit? */ + if (vmexit->u.dbg.trace_trap) { + _gdb_cpu_step(vcpu); + } else if (vmexit->u.dbg.drx_access != -1) { + if (vmexit->u.dbg.gpr != -1) { + handle_drx_read(vcpu, vmexit); + } else { + handle_drx_write(vcpu, vmexit); + } + } else if (vmexit->u.dbg.watchpoints) { + /* A watchpoint was triggered */ + handle_watchpoint_hit(vcpu, vmexit->u.dbg.watchpoints); } +} + +/* + * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest + * has been suspended due to an event on different vCPU or in response + * to a guest-wide suspend such as Ctrl-C or the stop on attach. + */ +void +gdb_cpu_suspend(int vcpu) +{ + pthread_mutex_lock(&gdb_lock); + _gdb_cpu_suspend(vcpu, true); + gdb_cpu_resume(vcpu); pthread_mutex_unlock(&gdb_lock); } +/* + * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via + * the VT-x-specific MTRAP exit. + */ +void +gdb_cpu_mtrap(int vcpu) +{ + if (!gdb_active) + return; + + _gdb_cpu_step(vcpu); +} + static struct breakpoint * find_breakpoint(uint64_t gpa) { struct breakpoint *bp; - TAILQ_FOREACH(bp, &breakpoints, link) { + TAILQ_FOREACH (bp, &breakpoints, link) { if (bp->gpa == gpa) return (bp); } @@ -891,12 +1466,14 @@ assert(vs->stepping == false); assert(vs->stepped == false); assert(vs->hit_swbreak == false); + assert(vs->hit_watch == false); vs->hit_swbreak = true; vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, vmexit->rip); for (;;) { if (stopped_vcpu == -1) { - debug("$vCPU %d reporting breakpoint at rip %#lx\n", vcpu, - vmexit->rip); + debug( + "$vCPU %d reporting breakpoint at rip %#lx\n", + vcpu, vmexit->rip); stopped_vcpu = vcpu; gdb_suspend_vcpus(); } @@ -928,12 +1505,13 @@ static bool gdb_step_vcpu(int vcpu) { - int error, val; + int error; debug("$vCPU %d step\n", vcpu); - error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val); - if (error < 0) + error = _gdb_check_step(vcpu); + if (error < 0) { return (false); + } discard_stop(); vcpu_state[vcpu].stepping = true; @@ -1231,6 +1809,94 @@ set_breakpoint_caps(false); } +static void +remove_all_hw_watchpoints(void) +{ + + for (int i = 0; i < GDB_WATCHPOINT_MAX; i++) { + if (watch_stats.watchpoints[i].state == WATCH_ACTIVE) { + clear_watchpoint(i, GDB_WATCHPOINT_CLEAR_NOSKIP, true); + } + } + + set_dbexit_caps(false); + set_dbreg_exit_caps(false); +} + +static void +update_watchpoint(uint64_t gva, int type, int bytes, int insert) +{ + struct watchpoint *wp; + int error; + + if (!insert && watch_stats.no_active == 0) { + send_error(EINVAL); + return; + } + + if (insert) { + + /* + * No watchpoints are active - fetch and update + * watchpoint stats, enable dbreg and db exits. + */ + if (watch_stats.no_active == 0 && watch_stats.no_evicted == 0) { + /* Activate debug exception vmexits */ + if (!set_dbexit_caps(true) || + !set_dbreg_exit_caps(true)) { + send_error(EINVAL); + return; + } + + init_watchpoint_metadata(); + } + + if (watch_stats.no_active == GDB_WATCHPOINT_MAX || + !GDB_HAS_AVAIL_WATCHPOINT()) { + error = (ENOSPC); + goto err; + } + + wp = find_watchpoint(gva); + if (!wp) { + int dbreg_num = GDB_FIND_WATCHPOINT(); + assert(dbreg_num >= 0); + + debug("Allocated watchpoint %d\n", dbreg_num); + error = set_watchpoint(gva, type, bytes, dbreg_num); + if (error) { + goto err; + } + } + } else { + wp = find_watchpoint(gva); + if (wp) { + int watchnum = wp - &watch_stats.watchpoints[0]; + debug("Removing watchpoint %d\n", watchnum); + clear_watchpoint( + watchnum, GDB_WATCHPOINT_CLEAR_NOSKIP, true); + /* If the last watchpoint was removed and none are + * evicted, disable db and dbreg vmexits */ + if (watch_stats.no_active == 0 && + watch_stats.no_evicted == 0) { + set_dbexit_caps(false); + set_dbreg_exit_caps(false); + } + } + } + + send_ok(); + return; + +err: + if (watch_stats.no_active == 0 && watch_stats.no_evicted == 0) { + set_dbexit_caps(false); + set_dbreg_exit_caps(false); + } + send_error(error); + return; +} + static void update_sw_breakpoint(uint64_t gva, int kind, bool insert) { @@ -1351,9 +2017,14 @@ len = 0; switch (type) { - case 0: + case GDB_SOFTWARE_BPT: update_sw_breakpoint(gva, kind, insert); break; + case GDB_WATCHPOINT_TYPE_WRITE: + case GDB_WATCHPOINT_TYPE_READ: + case GDB_WATCHPOINT_TYPE_ACCESS: + update_watchpoint(gva, type, kind, insert); + break; default: send_empty_response(); break; @@ -1890,6 +2561,8 @@ stopped_vcpu = 0; } + memset(&watch_stats, 0, sizeof(watch_stats)); + flags = fcntl(s, F_GETFL); if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) err(1, "Failed to mark gdb socket non-blocking");