Index: sys/amd64/amd64/bhyve_hypercall.S =================================================================== --- /dev/null +++ sys/amd64/amd64/bhyve_hypercall.S @@ -0,0 +1,63 @@ +/*- + * Copyright (c) 2016 Domagoj Stolfa + + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + + +#include +#include + +#define VENTER push %rbp ; mov %rsp, %rbp +#define VLEAVE pop %rbp + +.globl __start_set_hypercall +.globl __stop_set_hypercall + +/* + * The hypercalls implemented here are only the ones + * that are called from the kernel context. The + * hypercalls that are intended to be called from + * userland are expected to be implemented from each + * program individually or within a library, as + * opposed to here. + */ + +ENTRY(hypercall_prototype) + VENTER + movq $HYPERCALL_PROTOTYPE, %rax +hc_prototype: + .byte 0x0f,0x01,0xc1 + VLEAVE + ret +END(hypercall_prototype) + +.type __set_hypercall_prototype, @object +.section set_hypercall, "a", @progbits +.align 8 +__set_hypercall_prototype: + .quad hc_prototype + .size __set_hypercall_prototype, 3 Index: sys/amd64/include/bhyve_hypercall.h =================================================================== --- /dev/null +++ sys/amd64/include/bhyve_hypercall.h @@ -0,0 +1,57 @@ +/*- + * Copyright (c) 2016 Domagoj Stolfa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MACHINE_HYPERCALL_H_ +#define _MACHINE_HYPERCALL_H_ + +#define HYPERCALL_PROTOTYPE 0 +#define HYPERCALL_DTRACE_PROBE_CREATE 1 +#define HYPERCALL_DTRACE_PROBE 2 +#define HYPERCALL_DTRACE_RESERVED1 3 +#define HYPERCALL_DTRACE_RESERVED2 4 +#define HYPERCALL_DTRACE_RESERVED3 5 +#define HYPERCALL_DTRACE_RESERVED4 6 +#define HYPERCALL_INDEX_MAX 7 + +#define HYPERCALL_RET_SUCCESS 0 +#define HYPERCALL_RET_ERROR 1 +#define HYPERCALL_RET_NOT_IMPL -1 + +#ifndef __asm__ + +/* + * Arguments are only specified in this header file. + * Do not move the arguments around in the assembly + * file as the convention used is the SystemV ABI + * calling convention. + */ +int hypercall_prototype(void /* args */); + +#endif + +#endif Index: sys/amd64/include/vmm.h =================================================================== --- sys/amd64/include/vmm.h +++ sys/amd64/include/vmm.h @@ -105,6 +105,7 @@ #ifdef _KERNEL #define VM_MAX_NAMELEN 32 +#define HV_MAX_NAMELEN 32 struct vm; struct vm_exception; @@ -170,6 +171,12 @@ extern struct vmm_ops vmm_ops_intel; extern struct vmm_ops vmm_ops_amd; +#define BHYVE_MODE 0 +#define VMM_MAX_MODES 1 + +extern int hypervisor_mode; +extern int hypercalls_enabled; + int vm_create(const char *name, struct vm **retvm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); @@ -533,6 +540,7 @@ VM_EXITCODE_MWAIT, VM_EXITCODE_SVM, VM_EXITCODE_REQIDLE, + VM_EXITCODE_HYPERCALL, VM_EXITCODE_MAX }; @@ -573,6 +581,10 @@ struct vm_guest_paging paging; }; +struct vm_hypercall { + struct vm_guest_paging paging; +}; + struct vm_exit { enum vm_exitcode exitcode; int inst_length; /* 0 means unknown */ @@ -636,7 +648,8 @@ struct { enum vm_suspend_how how; } suspended; - struct vm_task_switch task_switch; + struct vm_task_switch task_switch; + struct vm_hypercall hypercall; } u; }; Index: sys/amd64/vmm/amd/svm.c =================================================================== --- sys/amd64/vmm/amd/svm.c +++ sys/amd64/vmm/amd/svm.c @@ -158,7 +158,6 @@ static int svm_cleanup(void) { - smp_rendezvous(NULL, svm_disable, NULL, NULL); return (0); } @@ -469,6 +468,9 @@ svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_MONITOR); svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_MWAIT); + /* Enable VMMCALL */ + svm_enable_intercept(sc, vcpu, VMCB_CTRL2_INTCPT, VMCB_INTCPT_VMMCALL); + /* * From section "Canonicalization and Consistency Checks" in APMv2 * the VMRUN intercept bit must be set to pass the consistency check. @@ -850,6 +852,29 @@ vie_init(&vmexit->u.inst_emul.vie, inst_bytes, inst_len); } +static void +svm_handle_hypercall(struct svm_softc *svm_sc, int vcpu, struct vmcb *vmcb, struct vm_exit *vmexit) +{ + struct vm_guest_paging *paging; + struct vmcb_segment seg; + uint64_t rsp; + int error; + + paging = &vmexit->u.hypercall.paging; + vmexit->exitcode = VM_EXITCODE_HYPERCALL; + + error = vmcb_read(svm_sc, vcpu, VM_REG_GUEST_RSP, + &rsp); + KASSERT(error == 0, ("%s: error %d getting RSP", + __func__, error)); + + error = vmcb_seg(vmcb, VM_REG_GUEST_SS, &seg); + KASSERT(error == 0, ("%s: error %d getting segment SS", + __func__, error)); + + svm_paging_info(vmcb, paging); +} + #ifdef KTR static const char * intrtype_to_str(int intr_type) @@ -1244,6 +1269,12 @@ return ("monitor"); case VMCB_EXIT_MWAIT: return ("mwait"); + case VMCB_EXIT_VMMCALL: + return ("vmmcall"); + case VMCB_EXIT_VMLOAD: + return ("vmload"); + case VMCB_EXIT_VMSAVE: + return ("vmsave"); default: snprintf(reasonbuf, sizeof(reasonbuf), "%#lx", reason); return (reasonbuf); @@ -1345,6 +1376,15 @@ case VMCB_EXIT_NMI: /* external NMI */ handled = 1; break; + case VMCB_EXIT_VMMCALL: + if (hypercalls_enabled == 0) { + vm_inject_ud(svm_sc->vm, vcpu); + handled = 1; + } + else { + svm_handle_hypercall(svm_sc, vcpu, vmcb, vmexit); + } + break; case 0x40 ... 0x5F: vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXCEPTION, 1); reflect = 1; Index: sys/amd64/vmm/amd/vmcb.h =================================================================== --- sys/amd64/vmm/amd/vmcb.h +++ sys/amd64/vmm/amd/vmcb.h @@ -139,6 +139,8 @@ #define VMCB_EXIT_IO 0x7B #define VMCB_EXIT_MSR 0x7C #define VMCB_EXIT_SHUTDOWN 0x7F +#define VMCB_EXIT_VMMCALL 0x81 +#define VMCB_EXIT_VMLOAD 0x82 #define VMCB_EXIT_VMSAVE 0x83 #define VMCB_EXIT_MONITOR 0x8A #define VMCB_EXIT_MWAIT 0x8B Index: sys/amd64/vmm/intel/vmx.c =================================================================== --- sys/amd64/vmm/intel/vmx.c +++ sys/amd64/vmm/intel/vmx.c @@ -2474,6 +2474,15 @@ case EXIT_REASON_MWAIT: vmexit->exitcode = VM_EXITCODE_MWAIT; break; + case EXIT_REASON_VMCALL: + if (hypercalls_enabled == 0) { + vm_inject_ud(vmx->vm, vcpu); + handled = HANDLED; + } else { + vmexit->exitcode = VM_EXITCODE_HYPERCALL; + vmx_paging_info(&vmexit->u.hypercall.paging); + } + break; default: vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1); break; Index: sys/amd64/vmm/vmm.c =================================================================== --- sys/amd64/vmm/vmm.c +++ sys/amd64/vmm/vmm.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include @@ -224,10 +225,115 @@ &trace_guest_exceptions, 0, "Trap into hypervisor on all guest exceptions and reflect them back"); +int hypercalls_enabled = 0; +SYSCTL_INT(_hw_vmm, OID_AUTO, hypercalls_enabled, CTLFLAG_RWTUN, + &hypercalls_enabled, 0, + "Enable hypercalls on all guests"); + +/* + * The maximum amount of arguments currently supproted + * through the hypercall functionality in the VMM. + * Everything higher than HYPERCALL_MAX_ARGS will be + * discarded. + */ +#define HYPERCALL_MAX_ARGS 6 + +typedef int (*hc_handler_t)(uint64_t, struct vm *, int, + struct vm_exit *, bool *); +typedef int64_t (*hc_dispatcher_t)(struct vm *, int, + uint64_t *, struct vm_guest_paging *); + +/* + * The default hypervisor mode used is BHYVE_MODE. + */ +int hypervisor_mode = BHYVE_MODE; + +static int bhyve_handle_hypercall(uint64_t hcid, struct vm *vm, + int vcpuid, struct vm_exit *vmexit, bool *retu); + +/* + * Hypercall handlers based on the hypervisor mode. + * The naming convention should include a prefix of + * the mode that the corresponding handler is bound + * to. This should be kept in sync with the global + * variable hc_dispatcher(see below). + */ +hc_handler_t hc_handler[VMM_MAX_MODES] = { + [BHYVE_MODE] = bhyve_handle_hypercall +}; + +static int64_t hc_handle_prototype(struct vm *, int, + uint64_t *, struct vm_guest_paging *); + +/* + * Each hypercall mode implements different hypercalls + * with differently mapped hypercall numbers. If the + * hypercall is not implemented it should be kept as + * NULL. It is not necessary to add an entry to this + * table, as the hypercall will automatically be + * assigned as NULL. This will return the error to + * the guest without exception. Keep in sync with + * hc_handler(see above) and ring_plevel(see below). + */ +hc_dispatcher_t hc_dispatcher[VMM_MAX_MODES][HYPERCALL_INDEX_MAX] = { + [BHYVE_MODE] = { + [HYPERCALL_PROTOTYPE] = hc_handle_prototype + } +}; + +/* + * Each of the hypercalls can only be called from well + * defined protection rings. Each hypercall should be + * assigned a minimal possible ring that is required + * for correct operation of the hypercall. This should + * be kept in snyc with hc_dispatcher(see above). + */ +static int8_t ring_plevel[VMM_MAX_MODES][HYPERCALL_INDEX_MAX] = { + [BHYVE_MODE] = { + [HYPERCALL_PROTOTYPE] = 0, + [HYPERCALL_DTRACE_PROBE_CREATE] = 0, + [HYPERCALL_DTRACE_PROBE] = 0, + [HYPERCALL_DTRACE_RESERVED1] = 0, + [HYPERCALL_DTRACE_RESERVED2] = 0, + [HYPERCALL_DTRACE_RESERVED3] = 0, + [HYPERCALL_DTRACE_RESERVED4] = 0 + } +}; + static void vm_free_memmap(struct vm *vm, int ident); static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr); +static int +sysctl_vmm_hypervisor_mode(SYSCTL_HANDLER_ARGS) +{ + int error; + char buf[HV_MAX_NAMELEN]; + + if (hypervisor_mode == BHYVE_MODE) { + strlcpy(buf, "bhyve", sizeof(buf)); + } else { + strlcpy(buf, "undefined", sizeof(buf)); + } + + error = sysctl_handle_string(oidp, buf, sizeof(buf), req); + if (error != 0 || req->newptr == NULL) + return (error); + + if (strcmp(buf, "bhyve") == 0) { + hypervisor_mode = BHYVE_MODE; + } else { + /* + * Disallow undefined data + */ + hypervisor_mode = BHYVE_MODE; + } + + return (0); +} +SYSCTL_PROC(_hw_vmm, OID_AUTO, hv_mode, CTLTYPE_STRING | CTLFLAG_RDTUN, + NULL, 0, sysctl_vmm_hypervisor_mode, "A", NULL); + #ifdef KTR static const char * vcpu_state2str(enum vcpu_state state) @@ -538,8 +644,9 @@ if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) return (ENOMEM); - else + else { return (0); + } } int @@ -1507,6 +1614,145 @@ return (0); } +static __inline int64_t +hypercall_dispatch(uint64_t hcid, struct vm *vm, int vcpuid, + uint64_t *args, struct vm_guest_paging *paging) +{ + /* + * Do not allow hypercalls that aren't implemented. + */ + if (hc_dispatcher[hypervisor_mode][hcid] == NULL) { + return (HYPERCALL_RET_NOT_IMPL); + } + return (hc_dispatcher[hypervisor_mode][hcid](vm, vcpuid, args, paging)); +} + +static __inline int +hypercall_handle(uint64_t hcid, struct vm *vm, int vcpuid, + struct vm_exit *vmexit, bool *retu) +{ + return (hc_handler[hypervisor_mode](hcid, vm, vcpuid, vmexit, retu)); +} + +/* + * The hypercall_copy_arg function assumes that appropriate + * checks have been made before calling the function. + */ +static int +hypercall_copy_arg(struct vm *vm, int vcpuid, uint64_t ds_base, + uintptr_t arg, uint64_t arg_len, struct vm_guest_paging *paging, void *dst) +{ + struct vm_copyinfo copyinfo[2]; + uint64_t gla; + int error, fault; + + if (arg == 0) { + return (HYPERCALL_RET_ERROR); + } + + gla = ds_base + arg; + error = vm_copy_setup(vm, vcpuid, paging, gla, arg_len, + PROT_READ, copyinfo, nitems(copyinfo), &fault); + if (error || fault) { + return (error); + } + + vm_copyin(vm, vcpuid, copyinfo, dst, arg_len); + vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); + + return (0); +} + +static int +bhyve_handle_hypercall(uint64_t hcid, struct vm *vm, int vcpuid, + struct vm_exit *vmexit, bool *retu) +{ + struct vm_guest_paging *paging; + uint64_t args[HYPERCALL_MAX_ARGS] = { 0 }; + int64_t retval; + int error, handled, i; + + /* + * The SystemV ABI specifies a calling convetion that + * uses the registers %rdi, %rsi, %rdx, %rcx, %r8 and %r9 + * for INTEGER and POINTER class parameter passing. + */ + int arg_regs[HYPERCALL_MAX_ARGS] = { + [0] = VM_REG_GUEST_RDI, + [1] = VM_REG_GUEST_RSI, + [2] = VM_REG_GUEST_RDX, + [3] = VM_REG_GUEST_RCX, + [4] = VM_REG_GUEST_R8, + [5] = VM_REG_GUEST_R9 + }; + + handled = 0; + paging = &vmexit->u.hypercall.paging; + + for (i = 0; i < HYPERCALL_MAX_ARGS; i++) { + error = vm_get_register(vm, vcpuid, arg_regs[i], &args[i]); + KASSERT(error == 0, ("%s: error %d getting RBX", + __func__, error)); + } + + /* + * From this point on, all the arguments passed in from the + * guest are contained in the args array. + */ + retval = hypercall_dispatch(hcid, vm, vcpuid, args, paging); + error = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX, retval); + KASSERT(error == 0, ("%s: error %d setting RAX", + __func__, error)); + return (0); +} + +static int +vm_handle_hypercall(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu) +{ + struct seg_desc cs_desc; + uint64_t hcid; + int error; + + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RAX, &hcid); + KASSERT(error == 0, ("%s: error %d getting RAX", + __func__, error)); + + /* + * Ensure that the hypercall called by the guest never exceed + * the maximum number of hypercalls defined. + */ + if (hcid >= HYPERCALL_INDEX_MAX) { + error = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX, HYPERCALL_RET_ERROR); + KASSERT(error == 0, ("%s: error %d setting RAX", + __func__, error)); + return (0); + } + + error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_CS, &cs_desc); + KASSERT(error == 0, ("%s: error %d getting CS descriptor", + __func__, error)); + + /* + * The check ensures that each of the hypercalls that is called + * from the guest is called from the correct protection ring. + */ + if (SEG_DESC_DPL(cs_desc.access) != ring_plevel[hypervisor_mode][hcid]) { + error = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX, HYPERCALL_RET_ERROR); + KASSERT(error == 0, ("%s: error %d setting RAX", + __func__, error)); + return (0); + } + + return (hypercall_handle(hcid, vm, vcpuid, vmexit, retu)); +} + +static __inline int64_t +hc_handle_prototype(struct vm *vm, int vcpuid, + uint64_t *args, struct vm_guest_paging *paging) +{ + return (HYPERCALL_RET_SUCCESS); +} + int vm_suspend(struct vm *vm, enum vm_suspend_how how) { @@ -1675,6 +1921,9 @@ case VM_EXITCODE_MWAIT: vm_inject_ud(vm, vcpuid); break; + case VM_EXITCODE_HYPERCALL: + error = vm_handle_hypercall(vm, vcpuid, vme, &retu); + break; default: retu = true; /* handled in userland */ break; @@ -1855,6 +2104,7 @@ /* Handle exceptions serially */ *retinfo = info2; } + return (1); } Index: sys/amd64/vmm/x86.h =================================================================== --- sys/amd64/vmm/x86.h +++ sys/amd64/vmm/x86.h @@ -29,16 +29,16 @@ #ifndef _X86_H_ #define _X86_H_ -#define CPUID_0000_0000 (0x0) +#define CPUID_0000_0000 (0x0) #define CPUID_0000_0001 (0x1) -#define CPUID_0000_0002 (0x2) -#define CPUID_0000_0003 (0x3) -#define CPUID_0000_0004 (0x4) -#define CPUID_0000_0006 (0x6) -#define CPUID_0000_0007 (0x7) -#define CPUID_0000_000A (0xA) -#define CPUID_0000_000B (0xB) -#define CPUID_0000_000D (0xD) +#define CPUID_0000_0002 (0x2) +#define CPUID_0000_0003 (0x3) +#define CPUID_0000_0004 (0x4) +#define CPUID_0000_0006 (0x6) +#define CPUID_0000_0007 (0x7) +#define CPUID_0000_000A (0xA) +#define CPUID_0000_000B (0xB) +#define CPUID_0000_000D (0xD) #define CPUID_8000_0000 (0x80000000) #define CPUID_8000_0001 (0x80000001) #define CPUID_8000_0002 (0x80000002) @@ -47,7 +47,8 @@ #define CPUID_8000_0006 (0x80000006) #define CPUID_8000_0007 (0x80000007) #define CPUID_8000_0008 (0x80000008) - +#define CPUID_4000_0000 (0x40000000) +#define CPUID_4000_0001 (0x40000001) /* * CPUID instruction Fn0000_0001: */ Index: sys/amd64/vmm/x86.c =================================================================== --- sys/amd64/vmm/x86.c +++ sys/amd64/vmm/x86.c @@ -50,9 +50,26 @@ SYSCTL_DECL(_hw_vmm); static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD, 0, NULL); -#define CPUID_VM_HIGH 0x40000000 +#define CPUID_VM_HIGH 0x40000001 +#define CPUID_HV_SPECIFIC_HIGH (CPUID_VM_HIGH & 0x000000FF) +#define CPUID_HV_SPECIFIC_NUM (CPUID_HV_SPECIFIC_HIGH + 1) -static const char bhyve_id[12] = "bhyve bhyve "; +/* + * Maps the specified hypervisor specific CPUID to an + * index used to index the cpuid_dispatcher jumptable. + * The reserved CPUIDs for a hypervisor as seen in + * intel and AMD manuals are 0x40000000-0x400000FF. + */ +#define HV_CPUID_ID(id) (id & 0x000000FF) + +/* + * Advertises the appropriate hypervisor identified based + * on the hypervisor operation mode. This should be kept + * in sync with the possible hypervisor modes. + */ +static const char hypervisor_id[VMM_MAX_MODES][12] = { + [BHYVE_MODE] = "bhyve bhyve " +}; static uint64_t bhyve_xcpuids; SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0, @@ -73,6 +90,28 @@ SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN, &cpuid_leaf_b, 0, NULL); + +typedef void (*cpuid_dispatcher_t)(unsigned int regs[4]); + +static void cpuid_advertise_hw_vendor(unsigned int regs[4]); +static void cpuid_bhyve_hypercall_enabled(unsigned int regs[4]); + +/* + * Dispatches the appropriate CPUID handler based on + * the computed index using the HV_CPUID_ID macro. + * This should be kept in sync with allowed hypervisor + * modes. Keep this jumptable as generic as possible + * and in case of a specific CPUID for each hypervisor + * mode, the naming convention for the jumptable entry + * is cpuid__functionality. + */ +cpuid_dispatcher_t cpuid_dispatcher[VMM_MAX_MODES][CPUID_HV_SPECIFIC_NUM] = { + [BHYVE_MODE] = { + [0] = cpuid_advertise_hw_vendor, + [1] = cpuid_bhyve_hypercall_enabled + } +}; + /* * Round up to the next power of two, if necessary, and then take log2. * Returns -1 if argument is zero. @@ -84,6 +123,27 @@ return (fls(x << (1 - powerof2(x))) - 1); } +static __inline void +cpuid_dispatch(unsigned int func, unsigned int regs[4]) +{ + cpuid_dispatcher[hypervisor_mode][HV_CPUID_ID(func)](regs); +} + +static void +cpuid_advertise_hw_vendor(unsigned int regs[4]) +{ + regs[0] = CPUID_VM_HIGH; + bcopy(hypervisor_id[hypervisor_mode], ®s[1], 4); + bcopy(hypervisor_id[hypervisor_mode]+ 4, ®s[2], 4); + bcopy(hypervisor_id[hypervisor_mode]+ 8, ®s[3], 4); +} + +static void +cpuid_bhyve_hypercall_enabled(unsigned int regs[4]) +{ + regs[0] = hypercalls_enabled; +} + int x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) @@ -463,11 +523,13 @@ } break; - case 0x40000000: - regs[0] = CPUID_VM_HIGH; - bcopy(bhyve_id, ®s[1], 4); - bcopy(bhyve_id + 4, ®s[2], 4); - bcopy(bhyve_id + 8, ®s[3], 4); + case CPUID_4000_0000: + case CPUID_4000_0001: + /* + * Each of the hypervisor specific CPUIDs should + * be handled with the dispatcher. No exceptions. + */ + cpuid_dispatch(func, regs); break; default: Index: sys/conf/files.amd64 =================================================================== --- sys/conf/files.amd64 +++ sys/conf/files.amd64 @@ -103,6 +103,10 @@ compile-with "uudecode < $S/dev/hptrr/amd64-elf.hptrr_lib.o.uu" \ no-implicit-rule # +amd64/amd64/bhyve_hypercall.S standard \ + compile-with "${CC} -x assembler-with-cpp ${CFLAGS} -D__asm__ -c ${.IMPSRC}" \ + clean "bhyve_hypercall.o" +# amd64/acpica/acpi_machdep.c optional acpi acpi_wakecode.o optional acpi \ dependency "$S/amd64/acpica/acpi_wakecode.S assym.s" \ Index: sys/kern/init_main.c =================================================================== --- sys/kern/init_main.c +++ sys/kern/init_main.c @@ -79,6 +79,12 @@ #include #include +#ifdef __amd64__ +#include +#include + +#include +#endif #include #include @@ -137,6 +143,12 @@ struct sysinit **newsysinit, **newsysinit_end; /* + * This ensure that the hypercall symbol is defined so that it can be + * used to patch all the hypercall instructions in the kernel + */ +SET_DECLARE(hypercall, uint32_t); + +/* * Merge a new sysinit set into the current set, reallocating it if * necessary. This can only be called after malloc is running. */ @@ -207,6 +219,7 @@ struct sysinit **sipp; /* system initialization*/ struct sysinit **xipp; /* interior loop of sort*/ struct sysinit *save; /* bubble*/ + uint32_t **hypercall_instr = NULL; #if defined(VERBOSE_SYSINIT) int last; @@ -216,6 +229,18 @@ if (boothowto & RB_VERBOSE) bootverbose++; +#ifdef __amd64__ + if (cpu_vendor_id == CPU_VENDOR_AMD && vm_guest == VM_GUEST_BHYVE) { + if (hypercall_instr == NULL) { + SET_FOREACH(hypercall_instr, hypercall) { + if (hypercall_instr == NULL) + break; + (**hypercall_instr) ^= (0x18 << 16); + } + } + } +#endif + if (sysinit == NULL) { sysinit = SET_BEGIN(sysinit_set); sysinit_end = SET_LIMIT(sysinit_set); Index: sys/kern/subr_param.c =================================================================== --- sys/kern/subr_param.c +++ sys/kern/subr_param.c @@ -149,6 +149,7 @@ "hv", "vmware", "kvm", + "bhyve", NULL }; CTASSERT(nitems(vm_guest_sysctl_names) - 1 == VM_LAST); Index: sys/sys/systm.h =================================================================== --- sys/sys/systm.h +++ sys/sys/systm.h @@ -76,7 +76,7 @@ * Keep in sync with vm_guest_sysctl_names[]. */ enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN, VM_GUEST_HV, - VM_GUEST_VMWARE, VM_GUEST_KVM, VM_LAST }; + VM_GUEST_VMWARE, VM_GUEST_KVM, VM_GUEST_BHYVE, VM_LAST }; #if defined(WITNESS) || defined(INVARIANT_SUPPORT) void kassert_panic(const char *fmt, ...) __printflike(1, 2); Index: sys/x86/x86/identcpu.c =================================================================== --- sys/x86/x86/identcpu.c +++ sys/x86/x86/identcpu.c @@ -1282,6 +1282,8 @@ vm_guest = VM_GUEST_HV; else if (strcmp(hv_vendor, "KVMKVMKVM") == 0) vm_guest = VM_GUEST_KVM; + else if (strcmp(hv_vendor, "bhyve bhyve ") == 0) + vm_guest = VM_GUEST_BHYVE; } return; }