diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -102,13 +102,16 @@ extern u_int mptramp_la57; extern u_int mptramp_nx; - +smp_targeted_tlb_shootdown_t smp_targeted_tlb_shootdown = &smp_targeted_tlb_shootdown_native; /* * Local data and functions. */ static int start_ap(int apic_id, vm_paddr_t boot_address); +void +smp_targeted_tlb_shootdown_legacy(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2, + smp_invl_cb_t curcpu_cb, enum invl_op_codes op); /* * Initialize the IPI handlers and start up the AP's. */ @@ -497,24 +500,6 @@ * Flush the TLB on other CPU's */ -/* - * Invalidation request. PCPU pc_smp_tlb_op uses u_int instead of the - * enum to avoid both namespace and ABI issues (with enums). - */ -enum invl_op_codes { - INVL_OP_TLB = 1, - INVL_OP_TLB_INVPCID = 2, - INVL_OP_TLB_INVPCID_PTI = 3, - INVL_OP_TLB_PCID = 4, - INVL_OP_PGRNG = 5, - INVL_OP_PGRNG_INVPCID = 6, - INVL_OP_PGRNG_PCID = 7, - INVL_OP_PG = 8, - INVL_OP_PG_INVPCID = 9, - INVL_OP_PG_PCID = 10, - INVL_OP_CACHE = 11, -}; - /* * These variables are initialized at startup to reflect how each of * the different kinds of invalidations should be performed on the @@ -600,8 +585,8 @@ * Function must be called with the thread pinned, and it unpins on * completion. */ -static void -smp_targeted_tlb_shootdown(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2, +void +smp_targeted_tlb_shootdown_native(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2, smp_invl_cb_t curcpu_cb, enum invl_op_codes op) { cpuset_t mask; diff --git a/sys/conf/files.x86 b/sys/conf/files.x86 --- a/sys/conf/files.x86 +++ b/sys/conf/files.x86 @@ -145,6 +145,7 @@ dev/hyperv/vmbus/vmbus_if.m optional hyperv dev/hyperv/vmbus/vmbus_res.c optional hyperv dev/hyperv/vmbus/vmbus_xact.c optional hyperv +dev/hyperv/vmbus/hyperv_mmu.c optional hyperv dev/ichwd/ichwd.c optional ichwd dev/imcsmb/imcsmb.c optional imcsmb dev/imcsmb/imcsmb_pci.c optional imcsmb pci diff --git a/sys/dev/hyperv/vmbus/hyperv.c b/sys/dev/hyperv/vmbus/hyperv.c --- a/sys/dev/hyperv/vmbus/hyperv.c +++ b/sys/dev/hyperv/vmbus/hyperv.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,7 @@ #include #include #endif +#include #include #include @@ -72,10 +74,12 @@ MSR_HV_GUESTID_OSID_FREEBSD | \ MSR_HV_GUESTID_OSTYPE_FREEBSD) + static bool hyperv_identify(void); static void hypercall_memfree(void); static struct hypercall_ctx hypercall_context; + uint64_t hypercall_post_message(bus_addr_t msg_paddr) { @@ -90,6 +94,65 @@ HYPERCALL_SIGNAL_EVENT, monprm_paddr, 0); } +static inline int hv_result(uint64_t status) +{ + return status & HV_HYPERCALL_RESULT_MASK; +} + +static inline bool hv_result_success(uint64_t status) +{ + return hv_result(status) == HV_STATUS_SUCCESS; +} + +static inline unsigned int hv_repcomp(uint64_t status) +{ + /* Bits [43:32] of status have 'Reps completed' data. */ + return (status & HV_HYPERCALL_REP_COMP_MASK) >> + HV_HYPERCALL_REP_COMP_OFFSET; +} + +/* + * Rep hypercalls. Callers of this functions are supposed to ensure that + * rep_count and varhead_size comply with Hyper-V hypercall definition. + */ +uint64_t +hv_do_rep_hypercall(uint16_t code, uint16_t rep_count, uint16_t varhead_size, + uint64_t input, uint64_t output) +{ + uint64_t control = code; + uint64_t status; + uint16_t rep_comp; + + control |= (uint64_t)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET; + control |= (uint64_t)rep_count << HV_HYPERCALL_REP_COMP_OFFSET; + + do { + status = hypercall_do_md(control, input, output); + if (!hv_result_success(status)) + return status; + + rep_comp = hv_repcomp(status); + + control &= ~HV_HYPERCALL_REP_START_MASK; + control |= (uint64_t)rep_comp << HV_HYPERCALL_REP_START_OFFSET; + + } while (rep_comp < rep_count); + if (hv_result_success(status)) + return HV_STATUS_SUCCESS; + + return status; +} + +uint64_t +hypercall_do_md(uint64_t input_val, uint64_t input_addr, uint64_t out_addr) +{ + uint64_t phys_inaddr, phys_outaddr; + phys_inaddr = input_addr ? vtophys(input_addr) : 0; + phys_outaddr = out_addr ? vtophys(out_addr) : 0; + return hypercall_md(hypercall_context.hc_addr, + input_val, phys_inaddr, phys_outaddr); +} + int hyperv_guid2str(const struct hyperv_guid *guid, char *buf, size_t sz) { @@ -171,3 +234,4 @@ } SYSUNINIT(hypercall_dtor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_destroy, NULL); + diff --git a/sys/dev/hyperv/vmbus/hyperv_var.h b/sys/dev/hyperv/vmbus/hyperv_mmu.h copy from sys/dev/hyperv/vmbus/hyperv_var.h copy to sys/dev/hyperv/vmbus/hyperv_mmu.h --- a/sys/dev/hyperv/vmbus/hyperv_var.h +++ b/sys/dev/hyperv/vmbus/hyperv_mmu.h @@ -1,5 +1,7 @@ /*- - * Copyright (c) 2016 Microsoft Corp. + * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. + * Copyright (c) 2012 NetApp Inc. + * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,16 +26,32 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _HYPERV_VAR_H_ -#define _HYPERV_VAR_H_ +#ifndef _HYPERV_MMU_H_ +#define _HYPERV_MMU_H_ -extern u_int hyperv_recommends; +#include "vmbus_var.h" -struct hypercall_ctx { - void *hc_addr; - vm_paddr_t hc_paddr; -}; -uint64_t hypercall_post_message(bus_addr_t msg_paddr); -uint64_t hypercall_signal_event(bus_addr_t monprm_paddr); +#define HV_VCPUS_PER_SPARSE_BANK (64) +#define HV_MAX_SPARSE_VCPU_BANKS (64) -#endif /* !_HYPERV_VAR_H_ */ + +struct hyperv_tlb_flush { + uint64_t address_space; + uint64_t flags; + uint64_t processor_mask; + uint64_t gva_list[]; +}__packed; + +struct hv_vpset { + uint64_t format; + uint64_t valid_bank_mask; + uint64_t bank_contents[]; +} __packed; + +struct hv_tlb_flush_ex { + uint64_t address_space; + uint64_t flags; + struct hv_vpset hv_vp_set; +} __packed; + +#endif diff --git a/sys/dev/hyperv/vmbus/hyperv_mmu.c b/sys/dev/hyperv/vmbus/hyperv_mmu.c new file mode 100644 --- /dev/null +++ b/sys/dev/hyperv/vmbus/hyperv_mmu.c @@ -0,0 +1,307 @@ +/*- + * Copyright (c) 2009-2012,2016-2017 Microsoft Corp. + * Copyright (c) 2012 NetApp Inc. + * Copyright (c) 2012 Citrix Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include "hyperv_mmu.h" + +static inline int fill_gva_list(uint64_t gva_list[], + unsigned long start, unsigned long end) +{ + int gva_n = 0; + unsigned long cur = start, diff; + + do { + diff = end > cur ? end - cur : 0; + + gva_list[gva_n] = cur; + /* + * Lower 12 bits encode the number of additional + * pages to flush (in addition to the 'cur' page). + */ + if (diff >= HV_TLB_FLUSH_UNIT) { + gva_list[gva_n] |= PAGE_MASK; + cur += HV_TLB_FLUSH_UNIT; + } else if (diff) { + gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT; + cur = end; + } + + gva_n++; + + } while (cur < end); + + return gva_n; +} + + +inline int hv_cpumask_to_vpset(struct hv_vpset *vpset, + const cpuset_t *cpus, + struct vmbus_softc * sc) +{ + int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; + int max_vcpu_bank = hv_max_vp_index / HV_VCPUS_PER_SPARSE_BANK; + + /* vpset.valid_bank_mask can represent up to HV_MAX_SPARSE_VCPU_BANKS banks */ + if (max_vcpu_bank >= HV_MAX_SPARSE_VCPU_BANKS) + return 0; + + /* + * Clear all banks up to the maximum possible bank as hv_tlb_flush_ex + * structs are not cleared between calls, we risk flushing unneeded + * vCPUs otherwise. + */ + for (vcpu_bank = 0; vcpu_bank <= max_vcpu_bank; vcpu_bank++) + vpset->bank_contents[vcpu_bank] = 0; + + /* + * Some banks may end up being empty but this is acceptable. + */ + CPU_FOREACH_ISSET(cpu, cpus) { + vcpu = VMBUS_PCPU_GET(sc, vcpuid, cpu); + if (vcpu == -1) + return -1; + vcpu_bank = vcpu / HV_VCPUS_PER_SPARSE_BANK; + vcpu_offset = vcpu % HV_VCPUS_PER_SPARSE_BANK; + set_bit(vcpu_offset, (unsigned long *) + &vpset->bank_contents[vcpu_bank]); + if (vcpu_bank >= nr_bank) + nr_bank = vcpu_bank + 1; + } + vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); + return nr_bank; +} + + + + +void +hv_vm_tlb_flush(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2, + enum invl_op_codes op, struct vmbus_softc *sc, smp_invl_local_cb_t curcpu_cb) +{ + cpuset_t tmp_mask, mask; + struct hyperv_tlb_flush *flush; + int cpu, vcpu; + int max_gvas, gva_n; + uint64_t status = 0; + uint64_t cr3; + + /* + * KPTI should be disabled in Hyper-V. + */ + if (op != INVL_OP_TLB && op != INVL_OP_PGRNG && op != INVL_OP_PG) + return smp_targeted_tlb_shootdown_native(pmap, addr1, addr2, curcpu_cb, op); + + flush = *DPCPU_PTR(hv_pcpu_mem); + if(flush == NULL) + return smp_targeted_tlb_shootdown_native(pmap, addr1, addr2, curcpu_cb, op); + /* + * It is not necessary to signal other CPUs while booting or + * when in the debugger. + */ + if (__predict_false(kdb_active || KERNEL_PANICKED() || !smp_started)) + goto local_cb; + + KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); + + /* + * Make a stable copy of the set of CPUs on which the pmap is active. + * See if we have to interrupt other CPUs. + */ + CPU_COPY(pmap_invalidate_cpu_mask(pmap), &tmp_mask); + CPU_COPY(pmap_invalidate_cpu_mask(pmap), &mask); + CPU_CLR(curcpu, &tmp_mask); + if (CPU_EMPTY(&tmp_mask)) + goto local_cb; + + /* + * Initiator must have interrupts enabled, which prevents + * non-invalidation IPIs that take smp_ipi_mtx spinlock, + * from deadlocking with us. On the other hand, preemption + * must be disabled to pin initiator to the instance of the + * pcpu pc_smp_tlb data and scoreboard line. + */ + KASSERT((read_rflags() & PSL_I) != 0, + ("hv_tlb_flush: interrupts disabled")); + critical_enter(); + flush->processor_mask = 0; + cr3 = pmap->pm_cr3; + + if (op == INVL_OP_TLB) { + flush->address_space = 0; + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + } else { + + flush->address_space = cr3; + flush->address_space &= ~CR3_PCID_MASK; + flush->flags = 0; + } + if(CPU_CMP(&mask, &all_cpus) == 0){ + flush->flags |= HV_FLUSH_ALL_PROCESSORS; + } + else { + if (CPU_FLS(&mask) < mp_ncpus && CPU_FLS(&mask) >= 64) + goto do_ex_hypercall; + + CPU_FOREACH_ISSET(cpu, &mask) { + vcpu = VMBUS_PCPU_GET(sc, vcpuid, cpu); + if (vcpu >= 64) + goto do_ex_hypercall; + + set_bit(vcpu, &flush->processor_mask); + } + if (! flush->processor_mask ) + goto native; + } + max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]); + if (addr2 == 0) { + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; + status = hypercall_do_md(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, (uint64_t)flush, + (uint64_t)NULL); + } else if ((addr2 && (addr2 -addr1)/HV_TLB_FLUSH_UNIT) > max_gvas) { + status = hypercall_do_md(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, (uint64_t)flush, + (uint64_t)NULL); + } else { + + gva_n = fill_gva_list(flush->gva_list, + addr1, addr2); + + status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST, + gva_n, 0, (uint64_t)flush, (uint64_t)NULL); + + } + if(status) + goto native; + sched_unpin(); + critical_exit(); + return; + +local_cb: + critical_enter(); + curcpu_cb(pmap, addr1, addr2); + sched_unpin(); + critical_exit(); + return; +do_ex_hypercall: + status = hv_flush_tlb_others_ex(pmap, addr1, addr2, mask, op, sc); + if (status) + goto native; + sched_unpin(); + critical_exit(); + return; +native: + sched_unpin(); + critical_exit(); + return smp_targeted_tlb_shootdown_native(pmap, addr1, addr2, curcpu_cb, op); +} + +uint64_t +hv_flush_tlb_others_ex(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2, const cpuset_t mask, + enum invl_op_codes op, struct vmbus_softc *sc) +{ + int nr_bank = 0, max_gvas, gva_n; + struct hv_tlb_flush_ex *flush; + if(*DPCPU_PTR(hv_pcpu_mem) == NULL) + return EINVAL; + flush = *DPCPU_PTR(hv_pcpu_mem); + uint64_t status = 0; + uint64_t cr3; + + if (!(hyperv_recommends & HYPERV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) + return EINVAL; + + cr3 = pmap->pm_cr3; + if (op == INVL_OP_TLB) { + flush->address_space = 0; + flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES; + } else { + + flush->address_space = cr3; + flush->address_space &= ~CR3_PCID_MASK; + flush->flags = 0; + } + + flush->hv_vp_set.valid_bank_mask = 0; + + flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + nr_bank = hv_cpumask_to_vpset(&flush->hv_vp_set, &mask, sc); + if (nr_bank < 0) + return EINVAL; + + /* + * We can flush not more than max_gvas with one hypercall. Flush the + * whole address space if we were asked to do more. + */ + max_gvas = (PAGE_SIZE - sizeof(*flush) - nr_bank * + sizeof(flush->hv_vp_set.bank_contents[0])) / + sizeof(flush->hv_vp_set.bank_contents[0]); + + if (addr2 == 0) { + flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; + status = hv_do_rep_hypercall( + HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, + 0, nr_bank, (uint64_t)flush, (uint64_t)NULL); + } else if (addr2 && + ((addr2 - addr1)/HV_TLB_FLUSH_UNIT) > max_gvas) { + status = hv_do_rep_hypercall( + HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX, + 0, nr_bank, (uint64_t)flush, (uint64_t)NULL); + } else { + gva_n = fill_gva_list(&flush->hv_vp_set.bank_contents[nr_bank], + addr1, addr2); + status = hv_do_rep_hypercall( + HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX, + gva_n, nr_bank, (uint64_t)flush, (uint64_t)NULL); + } + return status; +} + diff --git a/sys/dev/hyperv/vmbus/hyperv_var.h b/sys/dev/hyperv/vmbus/hyperv_var.h --- a/sys/dev/hyperv/vmbus/hyperv_var.h +++ b/sys/dev/hyperv/vmbus/hyperv_var.h @@ -33,7 +33,18 @@ void *hc_addr; vm_paddr_t hc_paddr; }; + uint64_t hypercall_post_message(bus_addr_t msg_paddr); uint64_t hypercall_signal_event(bus_addr_t monprm_paddr); +uint64_t hypercall_do_md(uint64_t input, uint64_t in_addr, + uint64_t out_addr); +struct hv_vpset; +struct vmbus_softc; +uint64_t +hv_do_rep_hypercall(uint16_t code, uint16_t rep_count, uint16_t varhead_size, + uint64_t input, uint64_t output); +int +hv_cpumask_to_vpset(struct hv_vpset *vpset, const cpuset_t *cpus, + struct vmbus_softc *sc); #endif /* !_HYPERV_VAR_H_ */ diff --git a/sys/dev/hyperv/vmbus/vmbus.c b/sys/dev/hyperv/vmbus/vmbus.c --- a/sys/dev/hyperv/vmbus/vmbus.c +++ b/sys/dev/hyperv/vmbus/vmbus.c @@ -139,6 +139,8 @@ int); static bus_dma_tag_t vmbus_get_dma_tag(device_t parent, device_t child); static struct vmbus_softc *vmbus_sc; +static void free_pcpu_ptr(void); +static void alloc_pcpu_ptr(void); SYSCTL_NODE(_hw, OID_AUTO, vmbus, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Hyper-V vmbus"); @@ -208,6 +210,9 @@ sizeof(struct vmbus_softc) }; +uint32_t hv_max_vp_index; +DPCPU_DEFINE(void *, hv_pcpu_mem); + DRIVER_MODULE(vmbus, pcib, vmbus_driver, NULL, NULL); DRIVER_MODULE(vmbus, acpi_syscontainer, vmbus_driver, NULL, NULL); @@ -739,6 +744,7 @@ int cpu = curcpu; uint64_t val, orig; uint32_t sint; + void **hv_cpu_mem; if (hyperv_features & CPUID_HV_MSR_VP_INDEX) { /* Save virtual processor id. */ @@ -748,6 +754,11 @@ VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0; } + if (VMBUS_PCPU_GET(sc, vcpuid, cpu) > hv_max_vp_index) + hv_max_vp_index = VMBUS_PCPU_GET(sc, vcpuid, cpu); + hv_cpu_mem = DPCPU_ID_PTR(cpu, hv_pcpu_mem); + *hv_cpu_mem = contigmalloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO, + 0ul, ~0ul, PAGE_SIZE, 0); /* * Setup the SynIC message. */ @@ -786,6 +797,16 @@ WRMSR(MSR_HV_SCONTROL, val); } +#if defined(__x86_64__) +void +hyperv_vm_tlb_flush(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2, + smp_invl_local_cb_t curcpu_cb, enum invl_op_codes op) +{ + struct vmbus_softc *sc = vmbus_get_softc(); + return hv_vm_tlb_flush(pmap, addr1, addr2, op, sc, curcpu_cb); +} +#endif /*__x86_64__*/ + static void vmbus_synic_teardown(void *arg) { @@ -820,6 +841,7 @@ */ orig = RDMSR(MSR_HV_SIEFP); WRMSR(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK)); + free_pcpu_ptr(); } static int @@ -1373,6 +1395,16 @@ return (BUS_PROBE_DEFAULT); } + +static void free_pcpu_ptr(void) +{ + int cpu = curcpu; + void **hv_cpu_mem; + hv_cpu_mem = DPCPU_ID_PTR(cpu, hv_pcpu_mem); + if(*hv_cpu_mem) + contigfree(*hv_cpu_mem, PAGE_SIZE, M_DEVBUF); +} + /** * @brief Main vmbus driver initialization routine. * @@ -1470,6 +1502,10 @@ smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc); sc->vmbus_flags |= VMBUS_FLAG_SYNIC; +#if defined(__x86_64__) + smp_targeted_tlb_shootdown = &hyperv_vm_tlb_flush; +#endif + /* * Initialize vmbus, e.g. connect to Hypervisor. */ diff --git a/sys/dev/hyperv/vmbus/vmbus_var.h b/sys/dev/hyperv/vmbus/vmbus_var.h --- a/sys/dev/hyperv/vmbus/vmbus_var.h +++ b/sys/dev/hyperv/vmbus/vmbus_var.h @@ -32,6 +32,11 @@ #include #include +#include +#include +#include +#include + #include #include @@ -137,6 +142,40 @@ #define VMBUS_PCPU_GET(sc, field, cpu) (sc)->vmbus_pcpu[(cpu)].field #define VMBUS_PCPU_PTR(sc, field, cpu) &(sc)->vmbus_pcpu[(cpu)].field +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 +#define HV_FLUSH_ALL_PROCESSORS BIT(0) +#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) +#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) +#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) + + +#define BIT(n) (1ULL << (n)) +#define BITS_PER_LONG (sizeof(long) * NBBY) +#define BIT_MASK(nr) (1UL << ((nr) & (BITS_PER_LONG - 1))) +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define set_bit(i, a) \ + atomic_set_long(&((volatile unsigned long *)(a))[BIT_WORD(i)], BIT_MASK(i)) + +#define GENMASK_ULL(h, l) (((~0ULL) >> (64 - (h) - 1)) & ((~0ULL) << (l))) + +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 +#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 +#define HYPERV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11) +#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) +#define HV_STATUS_SUCCESS 0 +#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) +#define HV_HYPERCALL_REP_COMP_OFFSET 32 + +#define HV_HYPERCALL_VARHEAD_OFFSET 17 + +#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) +#define HV_HYPERCALL_REP_START_OFFSET 48 + +enum HV_GENERIC_SET_FORMAT { + HV_GENERIC_SET_SPARSE_4K, + HV_GENERIC_SET_ALL, +}; struct vmbus_channel; struct trapframe; @@ -176,4 +215,20 @@ void vmbus_synic_teardown1(void); int vmbus_setup_intr1(struct vmbus_softc *sc); void vmbus_intr_teardown1(struct vmbus_softc *sc); + +DPCPU_DECLARE(void *, hv_pcpu_mem); + +extern uint32_t hv_max_vp_index; + + +#if defined(__x86_64__) +void hyperv_vm_tlb_flush(pmap_t, vm_offset_t, + vm_offset_t, smp_invl_local_cb_t, enum invl_op_codes); +uint64_t hv_flush_tlb_others_ex(pmap_t, vm_offset_t, + vm_offset_t, cpuset_t, enum invl_op_codes, struct vmbus_softc *); + +void hv_vm_tlb_flush(pmap_t, vm_offset_t, + vm_offset_t, enum invl_op_codes, + struct vmbus_softc *, smp_invl_local_cb_t); +#endif /* __x86_64__ */ #endif /* !_VMBUS_VAR_H_ */ diff --git a/sys/modules/hyperv/vmbus/Makefile b/sys/modules/hyperv/vmbus/Makefile --- a/sys/modules/hyperv/vmbus/Makefile +++ b/sys/modules/hyperv/vmbus/Makefile @@ -18,7 +18,7 @@ SRCS+= vmbus_vector.S .endif .if ${MACHINE_CPUARCH} != "aarch64" -SRCS+= vmbus_et.c hyperv_x86.c vmbus_x86.c +SRCS+= vmbus_et.c hyperv_x86.c vmbus_x86.c hyperv_mmu.c .else SRC+= hyperv_aarch64.c vmbus_aarch64.c .endif diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -86,7 +86,6 @@ * void pmap_page_set_memattr(vm_page_t, vm_memattr_t); */ #include - #ifdef _KERNEL #include struct thread; @@ -167,6 +166,33 @@ void pmap_zero_page(vm_page_t); void pmap_zero_page_area(vm_page_t, int off, int size); +/* + * Invalidation request. PCPU pc_smp_tlb_op uses u_int instead of the + * enum to avoid both namespace and ABI issues (with enums). + */ +enum invl_op_codes { + INVL_OP_TLB = 1, + INVL_OP_TLB_INVPCID = 2, + INVL_OP_TLB_INVPCID_PTI = 3, + INVL_OP_TLB_PCID = 4, + INVL_OP_PGRNG = 5, + INVL_OP_PGRNG_INVPCID = 6, + INVL_OP_PGRNG_PCID = 7, + INVL_OP_PG = 8, + INVL_OP_PG_INVPCID = 9, + INVL_OP_PG_PCID = 10, + INVL_OP_CACHE = 11, +}; +typedef void (*smp_invl_local_cb_t)(struct pmap *, vm_offset_t addr1, + vm_offset_t addr2); +typedef void (*smp_targeted_tlb_shootdown_t)(pmap_t, vm_offset_t, vm_offset_t, + smp_invl_local_cb_t, enum invl_op_codes); + +extern void +smp_targeted_tlb_shootdown_native(pmap_t, vm_offset_t, vm_offset_t, + smp_invl_local_cb_t, enum invl_op_codes); +extern smp_targeted_tlb_shootdown_t smp_targeted_tlb_shootdown; + #define pmap_resident_count(pm) ((pm)->pm_stats.resident_count) #define pmap_wired_count(pm) ((pm)->pm_stats.wired_count)