Index: sys/amd64/vmm/amd/svm.c =================================================================== --- sys/amd64/vmm/amd/svm.c +++ sys/amd64/vmm/amd/svm.c @@ -948,7 +948,9 @@ KASSERT(!ctrl->v_ign_tpr, ("%s: invalid v_ign_tpr", __func__)); VCPU_CTR2(vm, vcpu, "v_intr_vector %d %s", ctrl->v_intr_vector, pending ? "pending" : "accepted"); - if (!pending) + if (pending) + vlapic_intr_eject(vlapic, ctrl->v_intr_vector); + else vlapic_intr_accepted(vlapic, ctrl->v_intr_vector); } } @@ -1667,10 +1669,11 @@ * hypervisor. The higher priority vector will be injected on * the next VMRUN. */ - if (vlapic_pending_intr(vlapic, &vector)) { + if (vlapic_pending_vintr(vlapic, &vector)) { KASSERT(vector >= 16 && vector <= 255, ("invalid vector %d from local APIC", vector)); pending_apic_vector = vector; + vlapic_intr_inject(vlapic, vector); } goto done; } Index: sys/amd64/vmm/io/vlapic.h =================================================================== --- sys/amd64/vmm/io/vlapic.h +++ sys/amd64/vmm/io/vlapic.h @@ -52,10 +52,33 @@ int vlapic_pending_intr(struct vlapic *vlapic, int *vecptr); /* - * Transition 'vector' from IRR to ISR. This function is called with the - * vector returned by 'vlapic_pending_intr()' when the guest is able to - * accept this interrupt (i.e. RFLAGS.IF = 1 and no conditions exist that - * block interrupt delivery). + * Like vlapic_pending_intr() above but ignores TPR for the purpose of picking + * an eligible vector. Only vectors in ISR determine the eligibility. + * This is useful when TPR / CR8 is virtualized and can be modified by the guest + * with an intercept, such as the case for AMD SVM. + * In that case we can simply inject a virtual interrupt and let the hardware + * (and the guest) decide whether and when it can be delivered. + */ +int vlapic_pending_vintr(struct vlapic *vlapic, int *vecptr); + +/* + * Transition 'vector' from IRR to an "injected" state or back. + * This is used with SVM virtual interrupt injection where we do + * not have a precise knowledge of when the injected interrupt starts + * being serviced. So, an intermediate state is used for the interrupt + * in the limbo. We will know the state as soon as a next #VMEXIT happens + * and then we either "eject" the interrupt to IRR or mark it as accepted. + */ +void vlapic_intr_inject(struct vlapic *vlapic, int vector); +void vlapic_intr_eject(struct vlapic *vlapic, int vector); + +/* + * Transition 'vector' from IRR (or injected state) to ISR. + * With SVM virtual interrupt injection this function is called as soon as + * we learn that the injected interrupt has been delivered to the guest. + * Otherwise, this function is called with the vector returned by + * 'vlapic_pending_intr()' when the guest is able to accept this interrupt + * (i.e. RFLAGS.IF = 1 and no conditions exist that block interrupt delivery). */ void vlapic_intr_accepted(struct vlapic *vlapic, int vector); Index: sys/amd64/vmm/io/vlapic.c =================================================================== --- sys/amd64/vmm/io/vlapic.c +++ sys/amd64/vmm/io/vlapic.c @@ -263,7 +263,7 @@ { struct LAPIC *lapic; uint32_t *irrptr, *tmrptr, mask; - int idx; + int idx, old; KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); @@ -286,9 +286,10 @@ idx = (vector / 32) * 4; mask = 1 << (vector % 32); - irrptr = &lapic->irr0; - atomic_set_int(&irrptr[idx], mask); + old = atomic_testandset_int(&irrptr[idx], vector % 32); + if (old) + VLAPIC_CTR1(vlapic, "vector %d collapsed in IRR", vector); /* * Verify that the trigger-mode of the interrupt matches with @@ -556,7 +557,7 @@ { struct LAPIC *lapic = vlapic->apic_page; uint32_t *isrptr, *tmrptr; - int i, idx, bitpos, vector; + int i, idx, old, bitpos, vector; isrptr = &lapic->isr0; tmrptr = &lapic->tmr0; @@ -569,7 +570,8 @@ panic("invalid vlapic isrvec_stk_top %d", vlapic->isrvec_stk_top); } - isrptr[idx] &= ~(1 << bitpos); + old = atomic_testandclear_int(&isrptr[idx], bitpos); + KASSERT(old, ("eoi of vector that's not in ISR")); vector = i * 32 + bitpos; VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "EOI vector %d", vector); @@ -1084,58 +1086,132 @@ VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec); } -int -vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) +static int +vlapic_pending_intr_impl(struct vlapic *vlapic, int *vecptr, bool ign_tpr) { struct LAPIC *lapic = vlapic->apic_page; - int idx, i, bitpos, vector; + int idx, i, bitpos, prio, vector; uint32_t *irrptr, val; if (vlapic->ops.pending_intr) return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); - irrptr = &lapic->irr0; + KASSERT(vlapic->injected == 0, + ("query for pending interrupt while interrupt is injected")); + if (!ign_tpr) + prio = PRIO(lapic->ppr); + else + prio = PRIO(vlapic->isrvec_stk[vlapic->isrvec_stk_top]); + + irrptr = &lapic->irr0; for (i = 7; i >= 0; i--) { idx = i * 4; val = atomic_load_acq_int(&irrptr[idx]); bitpos = fls(val); if (bitpos != 0) { vector = i * 32 + (bitpos - 1); - if (PRIO(vector) > PRIO(lapic->ppr)) { + if (PRIO(vector) > prio) { VLAPIC_CTR1(vlapic, "pending intr %d", vector); if (vecptr != NULL) *vecptr = vector; return (1); - } else + } else break; } } return (0); } +int +vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) +{ + return (vlapic_pending_intr_impl(vlapic, vecptr, false)); +} + +int +vlapic_pending_vintr(struct vlapic *vlapic, int *vecptr) +{ + return (vlapic_pending_intr_impl(vlapic, vecptr, true)); +} + void +vlapic_intr_inject(struct vlapic *vlapic, int vector) +{ + struct LAPIC *lapic = vlapic->apic_page; + uint32_t *irrptr; + int idx, old; + + /* + * Clear the ready bit for vector being injected in irr + * and set the vector as injected. + */ + idx = (vector / 32) * 4; + irrptr = &lapic->irr0; + old = atomic_testandclear_int(&irrptr[idx], vector % 32); + KASSERT(old, ("injecting vector not in irr")); + + KASSERT(vlapic->injected == 0, + ("injecting interrupt while interrupt is injected")); + vlapic->injected = vector; + VLAPIC_CTR_IRR(vlapic, "vlapic_intr_inject"); +} + +void +vlapic_intr_eject(struct vlapic *vlapic, int vector) +{ + struct LAPIC *lapic = vlapic->apic_page; + uint32_t *irrptr; + int idx, old; + + /* + * Inverse of vlapic_intr_inject. + */ + KASSERT(vlapic->injected != 0, + ("ejecting interrupt while no interrupt is injected")); + KASSERT(vlapic->injected == vector, ("ejecting mismatching interrupt")); + vlapic->injected = 0; + + idx = (vector / 32) * 4; + irrptr = &lapic->irr0; + old = atomic_testandset_int(&irrptr[idx], vector % 32); + if (old) + VLAPIC_CTR1(vlapic, "vector %d collapsed in IRR", vector); + VLAPIC_CTR_IRR(vlapic, "vlapic_intr_eject"); +} + +void vlapic_intr_accepted(struct vlapic *vlapic, int vector) { struct LAPIC *lapic = vlapic->apic_page; uint32_t *irrptr, *isrptr; - int idx, stk_top; + int idx, old, stk_top; if (vlapic->ops.intr_accepted) return ((*vlapic->ops.intr_accepted)(vlapic, vector)); /* - * clear the ready bit for vector being accepted in irr + * Clear the bit for vector being accepted in injected * and set the vector as in service in isr. */ idx = (vector / 32) * 4; - irrptr = &lapic->irr0; - atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); - VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); - isrptr = &lapic->isr0; - isrptr[idx] |= 1 << (vector % 32); + if (vlapic->injected == 0) { + /* + * VMM does not use vlapic_intr_inject / vlapic_intr_eject, + * so the vector must be in IRR. + */ + old = atomic_testandclear_int(&irrptr[idx], vector % 32); + KASSERT(old, ("accepting vector that's not in IRR")); + } else { + KASSERT(vlapic->injected == vector, + ("accepting mismatching interrupt")); + vlapic->injected = 0; + } + + old = atomic_testandset_int(&isrptr[idx], vector % 32); + KASSERT(!old, ("accepting accepted vector")); VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); /* @@ -1259,6 +1335,8 @@ *data = *(reg + i); break; case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: + KASSERT(vlapic->injected == 0, + ("IRR read while interrupt is injected")); i = (offset - APIC_OFFSET_IRR0) >> 2; reg = &lapic->irr0; *data = atomic_load_acq_int(reg + i); Index: sys/amd64/vmm/io/vlapic_priv.h =================================================================== --- sys/amd64/vmm/io/vlapic_priv.h +++ sys/amd64/vmm/io/vlapic_priv.h @@ -173,6 +173,13 @@ uint8_t isrvec_stk[ISRVEC_STK_SIZE]; int isrvec_stk_top; + /* + * A current interrupt in a limbo between being injected into a guest + * and being delivered (accepted). + * Used for virtual interrupt injection. + */ + uint8_t injected; + uint64_t msr_apicbase; enum boot_state boot_state;