Index: head/sys/amd64/amd64/xen-locore.S =================================================================== --- head/sys/amd64/amd64/xen-locore.S (revision 336473) +++ head/sys/amd64/amd64/xen-locore.S (revision 336474) @@ -1,234 +1,234 @@ /*- * Copyright (c) 2003 Peter Wemm * Copyright (c) 2011-2012 Spectra Logic Corporation * Copyright (c) 2013 Roger Pau Monne * All rights reserved. * * This software was developed by Cherry G. Mathew * under sponsorship from Spectra Logic Corporation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #define __ASSEMBLY__ #include #include "assym.inc" #define VTOP(x) ((x) - KERNBASE) #define ENTRY_SIZE 8 /* sizeof(uint64_t) */ #define GDT_CODE 0x08 #define GDT_DATA 0x10 .section __xen_guest ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "FreeBSD") ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, __XSTRING(__FreeBSD_version)) ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0") ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .quad, KERNBASE) ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, 0) ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .quad, xen_start) ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad, hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .quad, HYPERVISOR_VIRT_START) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel|hvm_callback_vector") ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes") ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, PG_V, PG_V) ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic") ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 0) ELFNOTE(Xen, XEN_ELFNOTE_BSD_SYMTAB, .asciz, "yes") /* For PVHv2 support. */ ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long, VTOP(xen_start32)) .text .p2align PAGE_SHIFT, 0x90 /* Hypercall_page needs to be PAGE aligned */ NON_GPROF_ENTRY(hypercall_page) .skip 0x1000, 0x90 /* Fill with "nop"s */ /* Legacy PVH entry point, to be removed. */ NON_GPROF_ENTRY(xen_start) /* Don't trust what the loader gives for rflags. */ pushq $PSL_KERNEL popfq /* Parameters for the xen init function */ movq %rsi, %rdi /* shared_info (arg 1) */ movq %rsp, %rsi /* xenstack (arg 2) */ /* Use our own stack */ movq $bootstack,%rsp xorl %ebp, %ebp /* u_int64_t hammer_time_xen(start_info_t *si, u_int64_t xenstack); */ - call hammer_time_xen + call hammer_time_xen_legacy movq %rax, %rsp /* set up kstack for mi_startup() */ call mi_startup /* autoconfiguration, mountroot etc */ /* NOTREACHED */ 0: hlt jmp 0b /* PVH entry point. */ .code32 NON_GPROF_ENTRY(xen_start32) /* Load flat GDT */ movl $VTOP(gdtdesc32), %eax lgdt (%eax) jmp $GDT_CODE, $VTOP(reload_cs) reload_cs: movw $GDT_DATA, %ax movw %ax, %ds movw %ax, %es movw %ax, %ss movl $VTOP(bootstack), %esp /* Don't trust what the loader gives for eflags. */ pushl $PSL_KERNEL popfl /* * Create the page tables. * The first 1GB is mapped using 2MB entries. */ movl $0, %eax pgbuild: cmp $(PAGE_SIZE/ENTRY_SIZE), %eax jae pgbuild_done /* PT4[i] = VTOP(&PT3[0]) | PG_V | PG_RW | PG_U */ movl $VTOP(PT4), %ecx movl $VTOP(PT3), %edx orl $(PG_V | PG_RW | PG_U), %edx movl %edx, (%ecx,%eax,ENTRY_SIZE) /* PT3[i] = VTOP(&PT2[0]) | PG_V | PG_RW | PG_U */ movl $VTOP(PT3), %ecx movl $VTOP(PT2), %edx orl $(PG_V | PG_RW | PG_U), %edx movl %edx, (%ecx,%eax,ENTRY_SIZE) /* PT2[i] = i * 2MiB | PG_V | PG_RW | PG_PS | PG_U */ movl $VTOP(PT2), %ecx movl %eax, %edx shll $PDRSHIFT, %edx orl $(PG_V | PG_RW | PG_PS | PG_U), %edx movl %edx, (%ecx,%eax,ENTRY_SIZE) inc %eax jmp pgbuild pgbuild_done: /* Turn on EFER.LME */ movl $MSR_EFER, %ecx rdmsr orl $EFER_LME, %eax wrmsr /* Turn on PAE */ movl %cr4, %eax orl $CR4_PAE, %eax movl %eax, %cr4 /* Set %cr3 for PT4 */ movl $VTOP(PT4), %eax movl %eax, %cr3 /* Turn on paging (implicitly sets EFER.LMA) */ movl %cr0, %eax orl $CR0_PG, %eax movl %eax, %cr0 /* Now we're in compatibility mode. Set %cs for long mode */ movl $VTOP(gdtdesc), %eax lgdt (%eax) ljmp $GDT_CODE, $VTOP(longmode) .code64 longmode: /* We're still running V=P, jump to entry point */ movq $bootstack, %rsp movq $start_kernel, %rax pushq %rax ret start_kernel: /* * Pass %ebx as the argument to hammer_time_xen, it contains * the startup info. */ movq %rbx, %rdi call hammer_time_xen movq %rax, %rsp call mi_startup /* NOTREACHED */ 0: hlt jmp 0b /* Space for initial page tables */ .data .p2align 12,0x40 PT4: .space 0x1000 PT3: .space 0x1000 PT2: .space 0x1000 /* 64bit GDT */ gdtdesc: .word gdtend - gdt .long VTOP(gdt) # low .long 0 # high gdt: .long 0 # null descriptor .long 0 .long 0x00000000 # %cs .long 0x00209800 .long 0x00000000 # %ds .long 0x00008000 gdtend: /* 32bit GDT */ gdtdesc32: .word gdt32end - gdt32 .long VTOP(gdt32) .long 0 gdt32: .long 0 # null descriptor .long 0 .long 0x0000ffff # %cs .long 0x00cf9a00 .long 0x0000ffff # %ds, %es, %ss .long 0x00cf9200 gdt32end: Index: head/sys/x86/xen/hvm.c =================================================================== --- head/sys/x86/xen/hvm.c (revision 336473) +++ head/sys/x86/xen/hvm.c (revision 336474) @@ -1,481 +1,487 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2008, 2013 Citrix Systems, Inc. * Copyright (c) 2012 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /*--------------------------- Forward Declarations ---------------------------*/ static void xen_hvm_cpu_init(void); /*-------------------------------- Global Data -------------------------------*/ enum xen_domain_type xen_domain_type = XEN_NATIVE; #ifdef SMP struct cpu_ops xen_hvm_cpu_ops = { .cpu_init = xen_hvm_cpu_init, .cpu_resume = xen_hvm_cpu_init }; #endif static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support"); /** * If non-zero, the hypervisor has been configured to use a direct * IDT event callback for interrupt injection. */ int xen_vector_callback_enabled; +/** + * Start info flags. ATM this only used to store the initial domain flag for + * PVHv2, and it's always empty for HVM guests. + */ +uint32_t hvm_start_flags; + /*------------------------------- Per-CPU Data -------------------------------*/ DPCPU_DEFINE(struct vcpu_info, vcpu_local_info); DPCPU_DEFINE(struct vcpu_info *, vcpu_info); /*------------------ Hypervisor Access Shared Memory Regions -----------------*/ shared_info_t *HYPERVISOR_shared_info; /*------------------------------ Sysctl tunables -----------------------------*/ int xen_disable_pv_disks = 0; int xen_disable_pv_nics = 0; TUNABLE_INT("hw.xen.disable_pv_disks", &xen_disable_pv_disks); TUNABLE_INT("hw.xen.disable_pv_nics", &xen_disable_pv_nics); /*---------------------- XEN Hypervisor Probe and Setup ----------------------*/ static uint32_t cpuid_base; static uint32_t xen_hvm_cpuid_base(void) { uint32_t base, regs[4]; for (base = 0x40000000; base < 0x40010000; base += 0x100) { do_cpuid(base, regs); if (!memcmp("XenVMMXenVMM", ®s[1], 12) && (regs[0] - base) >= 2) return (base); } return (0); } static void hypervisor_quirks(unsigned int major, unsigned int minor) { #ifdef SMP if (((major < 4) || (major == 4 && minor <= 5)) && msix_disable_migration == -1) { /* * Xen hypervisors prior to 4.6.0 do not properly * handle updates to enabled MSI-X table entries, * so disable MSI-X interrupt migration in that * case. */ if (bootverbose) printf( "Disabling MSI-X interrupt migration due to Xen hypervisor bug.\n" "Set machdep.msix_disable_migration=0 to forcefully enable it.\n"); msix_disable_migration = 1; } #endif } static void hypervisor_version(void) { uint32_t regs[4]; int major, minor; do_cpuid(cpuid_base + 1, regs); major = regs[0] >> 16; minor = regs[0] & 0xffff; printf("XEN: Hypervisor version %d.%d detected.\n", major, minor); hypervisor_quirks(major, minor); } /* * Allocate and fill in the hypcall page. */ int xen_hvm_init_hypercall_stubs(enum xen_hvm_init_type init_type) { uint32_t regs[4]; if (xen_domain() && init_type == XEN_HVM_INIT_LATE) { /* * If the domain type is already set we can assume that the * hypercall page has been populated too, so just print the * version (and apply any quirks) and exit. */ hypervisor_version(); return 0; } cpuid_base = xen_hvm_cpuid_base(); if (cpuid_base == 0) return (ENXIO); if (init_type == XEN_HVM_INIT_LATE) hypervisor_version(); /* * Find the hypercall pages. */ do_cpuid(cpuid_base + 2, regs); if (regs[0] != 1) return (EINVAL); wrmsr(regs[1], (init_type == XEN_HVM_INIT_EARLY) ? ((vm_paddr_t)&hypercall_page - KERNBASE) : vtophys(&hypercall_page)); return (0); } static void xen_hvm_init_shared_info_page(void) { struct xen_add_to_physmap xatp; if (xen_pv_domain()) { /* * Already setup in the PV case, shared_info is passed inside * of the start_info struct at start of day. */ return; } if (HYPERVISOR_shared_info == NULL) { HYPERVISOR_shared_info = malloc(PAGE_SIZE, M_XENHVM, M_NOWAIT); if (HYPERVISOR_shared_info == NULL) panic("Unable to allocate Xen shared info page"); } xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; xatp.gpfn = vtophys(HYPERVISOR_shared_info) >> PAGE_SHIFT; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) panic("HYPERVISOR_memory_op failed"); } /* * Tell the hypervisor how to contact us for event channel callbacks. */ void xen_hvm_set_callback(device_t dev) { struct xen_hvm_param xhp; int irq; if (xen_vector_callback_enabled) return; xhp.domid = DOMID_SELF; xhp.index = HVM_PARAM_CALLBACK_IRQ; if (xen_feature(XENFEAT_hvm_callback_vector) != 0) { int error; xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN); error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp); if (error == 0) { xen_vector_callback_enabled = 1; return; } printf("Xen HVM callback vector registration failed (%d). " "Falling back to emulated device interrupt\n", error); } xen_vector_callback_enabled = 0; if (dev == NULL) { /* * Called from early boot or resume. * xenpci will invoke us again later. */ return; } irq = pci_get_irq(dev); if (irq < 16) { xhp.value = HVM_CALLBACK_GSI(irq); } else { u_int slot; u_int pin; slot = pci_get_slot(dev); pin = pci_get_intpin(dev) - 1; xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin); } if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0) panic("Can't set evtchn callback"); } #define XEN_MAGIC_IOPORT 0x10 enum { XMI_MAGIC = 0x49d2, XMI_UNPLUG_IDE_DISKS = 0x01, XMI_UNPLUG_NICS = 0x02, XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04 }; static void xen_hvm_disable_emulated_devices(void) { u_short disable_devs = 0; if (xen_pv_domain()) { /* * No emulated devices in the PV case, so no need to unplug * anything. */ if (xen_disable_pv_disks != 0 || xen_disable_pv_nics != 0) printf("PV devices cannot be disabled in PV guests\n"); return; } if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC) return; if (xen_disable_pv_disks == 0) { if (bootverbose) printf("XEN: disabling emulated disks\n"); disable_devs |= XMI_UNPLUG_IDE_DISKS; } if (xen_disable_pv_nics == 0) { if (bootverbose) printf("XEN: disabling emulated nics\n"); disable_devs |= XMI_UNPLUG_NICS; } if (disable_devs != 0) outw(XEN_MAGIC_IOPORT, disable_devs); } static void xen_hvm_init(enum xen_hvm_init_type init_type) { int error; int i; if (init_type == XEN_HVM_INIT_CANCELLED_SUSPEND) return; error = xen_hvm_init_hypercall_stubs(init_type); switch (init_type) { case XEN_HVM_INIT_LATE: if (error != 0) return; /* * If xen_domain_type is not set at this point * it means we are inside a (PV)HVM guest, because * for PVH the guest type is set much earlier * (see hammer_time_xen). */ if (!xen_domain()) { xen_domain_type = XEN_HVM_DOMAIN; vm_guest = VM_GUEST_XEN; } setup_xen_features(); #ifdef SMP cpu_ops = xen_hvm_cpu_ops; #endif break; case XEN_HVM_INIT_RESUME: if (error != 0) panic("Unable to init Xen hypercall stubs on resume"); /* Clear stale vcpu_info. */ CPU_FOREACH(i) DPCPU_ID_SET(i, vcpu_info, NULL); break; default: panic("Unsupported HVM initialization type"); } xen_vector_callback_enabled = 0; xen_hvm_set_callback(NULL); /* * On (PV)HVM domains we need to request the hypervisor to * fill the shared info page, for PVH guest the shared_info page * is passed inside the start_info struct and is already set, so this * functions are no-ops. */ xen_hvm_init_shared_info_page(); xen_hvm_disable_emulated_devices(); } void xen_hvm_suspend(void) { } void xen_hvm_resume(bool suspend_cancelled) { xen_hvm_init(suspend_cancelled ? XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME); /* Register vcpu_info area for CPU#0. */ xen_hvm_cpu_init(); } static void xen_hvm_sysinit(void *arg __unused) { xen_hvm_init(XEN_HVM_INIT_LATE); } SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL); static void xen_hvm_cpu_init(void) { struct vcpu_register_vcpu_info info; struct vcpu_info *vcpu_info; uint32_t regs[4]; int cpu, rc; if (!xen_domain()) return; if (DPCPU_GET(vcpu_info) != NULL) { /* * vcpu_info is already set. We're resuming * from a failed migration and our pre-suspend * configuration is still valid. */ return; } /* * Set vCPU ID. If available fetch the ID from CPUID, if not just use * the ACPI ID. */ KASSERT(cpuid_base != 0, ("Invalid base Xen CPUID leaf")); cpuid_count(cpuid_base + 4, 0, regs); PCPU_SET(vcpu_id, (regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ? regs[1] : PCPU_GET(acpi_id)); /* * Set the vCPU info. * * NB: the vCPU info for vCPUs < 32 can be fetched from the shared info * page, but in order to make sure the mapping code is correct always * attempt to map the vCPU info at a custom place. */ vcpu_info = DPCPU_PTR(vcpu_local_info); cpu = PCPU_GET(vcpu_id); info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT; info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info)); rc = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); if (rc != 0) DPCPU_SET(vcpu_info, &HYPERVISOR_shared_info->vcpu_info[cpu]); else DPCPU_SET(vcpu_info, vcpu_info); } SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL); /* HVM/PVH start_info accessors */ static vm_paddr_t hvm_get_xenstore_mfn(void) { return (hvm_get_parameter(HVM_PARAM_STORE_PFN)); } static evtchn_port_t hvm_get_xenstore_evtchn(void) { return (hvm_get_parameter(HVM_PARAM_STORE_EVTCHN)); } static vm_paddr_t hvm_get_console_mfn(void) { return (hvm_get_parameter(HVM_PARAM_CONSOLE_PFN)); } static evtchn_port_t hvm_get_console_evtchn(void) { return (hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN)); } static uint32_t hvm_get_start_flags(void) { - return (0); + return (hvm_start_flags); } struct hypervisor_info hypervisor_info = { .get_xenstore_mfn = hvm_get_xenstore_mfn, .get_xenstore_evtchn = hvm_get_xenstore_evtchn, .get_console_mfn = hvm_get_console_mfn, .get_console_evtchn = hvm_get_console_evtchn, .get_start_flags = hvm_get_start_flags, }; Index: head/sys/x86/xen/pv.c =================================================================== --- head/sys/x86/xen/pv.c (revision 336473) +++ head/sys/x86/xen/pv.c (revision 336474) @@ -1,467 +1,636 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-NetBSD * * Copyright (c) 2004 Christian Limpach. * Copyright (c) 2004-2006,2008 Kip Macy * Copyright (c) 2008 The NetBSD Foundation, Inc. * Copyright (c) 2013 Roger Pau MonnĂ© * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include +#include #include #include #include #include +#include #include #include #ifdef DDB #include #endif /* Native initial function */ extern u_int64_t hammer_time(u_int64_t, u_int64_t); /* Xen initial function */ -uint64_t hammer_time_xen(start_info_t *, uint64_t); +uint64_t hammer_time_xen_legacy(start_info_t *, uint64_t); +uint64_t hammer_time_xen(vm_paddr_t); #define MAX_E820_ENTRIES 128 /*--------------------------- Forward Declarations ---------------------------*/ -static caddr_t xen_pv_parse_preload_data(u_int64_t); -static void xen_pv_parse_memmap(caddr_t, vm_paddr_t *, int *); +static caddr_t xen_legacy_pvh_parse_preload_data(uint64_t); +static caddr_t xen_pvh_parse_preload_data(uint64_t); +static void xen_pvh_parse_memmap(caddr_t, vm_paddr_t *, int *); #ifdef SMP static int xen_pv_start_all_aps(void); #endif /*---------------------------- Extern Declarations ---------------------------*/ #ifdef SMP /* Variables used by amd64 mp_machdep to start APs */ extern char *doublefault_stack; extern char *mce_stack; extern char *nmi_stack; extern char *dbg_stack; #endif /* * Placed by the linker at the end of the bss section, which is the last * section loaded by Xen before loading the symtab and strtab. */ extern uint32_t end; /*-------------------------------- Global Data -------------------------------*/ /* Xen init_ops implementation. */ -struct init_ops xen_init_ops = { - .parse_preload_data = xen_pv_parse_preload_data, +struct init_ops xen_legacy_init_ops = { + .parse_preload_data = xen_legacy_pvh_parse_preload_data, .early_clock_source_init = xen_clock_init, .early_delay = xen_delay, - .parse_memmap = xen_pv_parse_memmap, + .parse_memmap = xen_pvh_parse_memmap, #ifdef SMP .start_all_aps = xen_pv_start_all_aps, #endif .msi_init = xen_msi_init, }; +struct init_ops xen_pvh_init_ops = { + .parse_preload_data = xen_pvh_parse_preload_data, + .early_clock_source_init = xen_clock_init, + .early_delay = xen_delay, + .parse_memmap = xen_pvh_parse_memmap, +#ifdef SMP + .mp_bootaddress = mp_bootaddress, + .start_all_aps = native_start_all_aps, +#endif + .msi_init = msi_init, +}; + static struct bios_smap xen_smap[MAX_E820_ENTRIES]; static start_info_t *legacy_start_info; +static struct hvm_start_info *start_info; /*----------------------- Legacy PVH start_info accessors --------------------*/ static vm_paddr_t legacy_get_xenstore_mfn(void) { return (legacy_start_info->store_mfn); } static evtchn_port_t legacy_get_xenstore_evtchn(void) { return (legacy_start_info->store_evtchn); } static vm_paddr_t legacy_get_console_mfn(void) { return (legacy_start_info->console.domU.mfn); } static evtchn_port_t legacy_get_console_evtchn(void) { return (legacy_start_info->console.domU.evtchn); } static uint32_t legacy_get_start_flags(void) { return (legacy_start_info->flags); } struct hypervisor_info legacy_info = { .get_xenstore_mfn = legacy_get_xenstore_mfn, .get_xenstore_evtchn = legacy_get_xenstore_evtchn, .get_console_mfn = legacy_get_console_mfn, .get_console_evtchn = legacy_get_console_evtchn, .get_start_flags = legacy_get_start_flags, }; /*-------------------------------- Xen PV init -------------------------------*/ /* * First function called by the Xen legacy PVH boot sequence. * * Set some Xen global variables and prepare the environment so it is * as similar as possible to what native FreeBSD init function expects. */ uint64_t -hammer_time_xen(start_info_t *si, uint64_t xenstack) +hammer_time_xen_legacy(start_info_t *si, uint64_t xenstack) { uint64_t physfree; uint64_t *PT4 = (u_int64_t *)xenstack; uint64_t *PT3 = (u_int64_t *)(xenstack + PAGE_SIZE); uint64_t *PT2 = (u_int64_t *)(xenstack + 2 * PAGE_SIZE); int i; xen_domain_type = XEN_PV_DOMAIN; vm_guest = VM_GUEST_XEN; if ((si == NULL) || (xenstack == 0)) { xc_printf("ERROR: invalid start_info or xen stack, halting\n"); HYPERVISOR_shutdown(SHUTDOWN_crash); } xc_printf("FreeBSD PVH running on %s\n", si->magic); /* We use 3 pages of xen stack for the boot pagetables */ physfree = xenstack + 3 * PAGE_SIZE - KERNBASE; /* Setup Xen global variables */ legacy_start_info = si; HYPERVISOR_shared_info = (shared_info_t *)(si->shared_info + KERNBASE); /* * Use the stack Xen gives us to build the page tables * as native FreeBSD expects to find them (created * by the boot trampoline). */ for (i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); i++) { /* * Each slot of the level 4 pages points * to the same level 3 page */ PT4[i] = ((uint64_t)&PT3[0]) - KERNBASE; PT4[i] |= PG_V | PG_RW | PG_U; /* * Each slot of the level 3 pages points * to the same level 2 page */ PT3[i] = ((uint64_t)&PT2[0]) - KERNBASE; PT3[i] |= PG_V | PG_RW | PG_U; /* * The level 2 page slots are mapped with * 2MB pages for 1GB. */ PT2[i] = i * (2 * 1024 * 1024); PT2[i] |= PG_V | PG_RW | PG_PS | PG_U; } load_cr3(((uint64_t)&PT4[0]) - KERNBASE); /* Set the hooks for early functions that diverge from bare metal */ - init_ops = xen_init_ops; + init_ops = xen_legacy_init_ops; apic_ops = xen_apic_ops; hypervisor_info = legacy_info; /* Now we can jump into the native init function */ return (hammer_time(0, physfree)); } +uint64_t +hammer_time_xen(vm_paddr_t start_info_paddr) +{ + struct hvm_modlist_entry *mod; + struct xen_add_to_physmap xatp; + uint64_t physfree; + char *kenv; + int rc; + + xen_domain_type = XEN_HVM_DOMAIN; + vm_guest = VM_GUEST_XEN; + + rc = xen_hvm_init_hypercall_stubs(XEN_HVM_INIT_EARLY); + if (rc) { + xc_printf("ERROR: failed to initialize hypercall page: %d\n", + rc); + HYPERVISOR_shutdown(SHUTDOWN_crash); + } + + start_info = (struct hvm_start_info *)(start_info_paddr + KERNBASE); + if (start_info->magic != XEN_HVM_START_MAGIC_VALUE) { + xc_printf("Unknown magic value in start_info struct: %#x\n", + start_info->magic); + HYPERVISOR_shutdown(SHUTDOWN_crash); + } + + /* + * The hvm_start_into structure is always appended after loading + * the kernel and modules. + */ + physfree = roundup2(start_info_paddr + PAGE_SIZE, PAGE_SIZE); + + xatp.domid = DOMID_SELF; + xatp.idx = 0; + xatp.space = XENMAPSPACE_shared_info; + xatp.gpfn = atop(physfree); + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) { + xc_printf("ERROR: failed to setup shared_info page\n"); + HYPERVISOR_shutdown(SHUTDOWN_crash); + } + HYPERVISOR_shared_info = (shared_info_t *)(physfree + KERNBASE); + physfree += PAGE_SIZE; + + /* + * Init a static kenv using a free page. The contents will be filled + * from the parse_preload_data hook. + */ + kenv = (void *)(physfree + KERNBASE); + physfree += PAGE_SIZE; + bzero(kenv, PAGE_SIZE); + init_static_kenv(kenv, PAGE_SIZE); + + if (start_info->modlist_paddr != 0) { + if (start_info->modlist_paddr >= physfree) { + xc_printf( + "ERROR: unexpected module list memory address\n"); + HYPERVISOR_shutdown(SHUTDOWN_crash); + } + if (start_info->nr_modules == 0) { + xc_printf( + "ERROR: modlist_paddr != 0 but nr_modules == 0\n"); + HYPERVISOR_shutdown(SHUTDOWN_crash); + } + mod = (struct hvm_modlist_entry *) + (vm_paddr_t)start_info->modlist_paddr + KERNBASE; + if (mod[0].paddr >= physfree) { + xc_printf("ERROR: unexpected module memory address\n"); + HYPERVISOR_shutdown(SHUTDOWN_crash); + } + } + + /* Set the hooks for early functions that diverge from bare metal */ + init_ops = xen_pvh_init_ops; + hvm_start_flags = start_info->flags; + + /* Now we can jump into the native init function */ + return (hammer_time(0, physfree)); +} + /*-------------------------------- PV specific -------------------------------*/ #ifdef SMP static bool start_xen_ap(int cpu) { struct vcpu_guest_context *ctxt; int ms, cpus = mp_naps; const size_t stacksize = kstack_pages * PAGE_SIZE; /* allocate and set up an idle stack data page */ bootstacks[cpu] = (void *)kmem_malloc(kernel_arena, stacksize, M_WAITOK | M_ZERO); doublefault_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); mce_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); dbg_stack = (void *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); bootSTK = (char *)bootstacks[cpu] + kstack_pages * PAGE_SIZE - 8; bootAP = cpu; ctxt = malloc(sizeof(*ctxt), M_TEMP, M_WAITOK | M_ZERO); ctxt->flags = VGCF_IN_KERNEL; ctxt->user_regs.rip = (unsigned long) init_secondary; ctxt->user_regs.rsp = (unsigned long) bootSTK; /* Set the AP to use the same page tables */ ctxt->ctrlreg[3] = KPML4phys; if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) panic("unable to initialize AP#%d", cpu); free(ctxt, M_TEMP); /* Launch the vCPU */ if (HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) panic("unable to start AP#%d", cpu); /* Wait up to 5 seconds for it to start. */ for (ms = 0; ms < 5000; ms++) { if (mp_naps > cpus) return (true); DELAY(1000); } return (false); } static int xen_pv_start_all_aps(void) { int cpu; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); for (cpu = 1; cpu < mp_ncpus; cpu++) { /* attempt to start the Application Processor */ if (!start_xen_ap(cpu)) panic("AP #%d failed to start!", cpu); CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } return (mp_naps); } #endif /* SMP */ /* - * Functions to convert the "extra" parameters passed by Xen - * into FreeBSD boot options. + * When booted as a PVH guest FreeBSD needs to avoid using the RSDP address + * hint provided by the loader because it points to the native set of ACPI + * tables instead of the ones crafted by Xen. The acpi.rsdp env variable is + * removed from kenv if present, and a new acpi.rsdp is added to kenv that + * points to the address of the Xen crafted RSDP. */ +static bool reject_option(const char *option) +{ + static const char *reject[] = { + "acpi.rsdp", + }; + unsigned int i; + + for (i = 0; i < nitems(reject); i++) + if (strncmp(option, reject[i], strlen(reject[i])) == 0) + return (true); + + return (false); +} + static void -xen_pv_set_env(void) +xen_pvh_set_env(char *env, bool (*filter)(const char *)) { - char *cmd_line_next, *cmd_line; - size_t env_size; + char *option; - cmd_line = legacy_start_info->cmd_line; - env_size = sizeof(legacy_start_info->cmd_line); + if (env == NULL) + return; - /* Skip leading spaces */ - for (; isspace(*cmd_line) && (env_size != 0); cmd_line++) - env_size--; + option = env; + while (*option != 0) { + char *value; - /* Replace ',' with '\0' */ - for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;) - ; + if (filter != NULL && filter(option)) { + option += strlen(option) + 1; + continue; + } - init_static_kenv(cmd_line, 0); + value = option; + option = strsep(&value, "="); + if (kern_setenv(option, value) != 0) + xc_printf("unable to add kenv %s=%s\n", option, value); + option = value + strlen(value) + 1; + } } #ifdef DDB /* * The way Xen loads the symtab is different from the native boot loader, * because it's tailored for NetBSD. So we have to adapt and use the same * method as NetBSD. Portions of the code below have been picked from NetBSD: * sys/kern/kern_ksyms.c CVS Revision 1.71. */ static void -xen_pv_parse_symtab(void) +xen_pvh_parse_symtab(void) { Elf_Ehdr *ehdr; Elf_Shdr *shdr; - vm_offset_t sym_end; uint32_t size; int i, j; size = end; - sym_end = legacy_start_info->mod_start != 0 ? - legacy_start_info->mod_start : legacy_start_info->mfn_list; - /* - * Make sure the size is right headed, sym_end is just a - * high boundary, but at least allows us to fail earlier. - */ - if ((vm_offset_t)&end + size > sym_end) { - xc_printf("Unable to load ELF symtab: size mismatch\n"); - return; - } - ehdr = (Elf_Ehdr *)(&end + 1); if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) || ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || ehdr->e_version > 1) { xc_printf("Unable to load ELF symtab: invalid symbol table\n"); return; } shdr = (Elf_Shdr *)((uint8_t *)ehdr + ehdr->e_shoff); /* Find the symbol table and the corresponding string table. */ for (i = 1; i < ehdr->e_shnum; i++) { if (shdr[i].sh_type != SHT_SYMTAB) continue; if (shdr[i].sh_offset == 0) continue; ksymtab = (uintptr_t)((uint8_t *)ehdr + shdr[i].sh_offset); ksymtab_size = shdr[i].sh_size; j = shdr[i].sh_link; if (shdr[j].sh_offset == 0) continue; /* Can this happen? */ kstrtab = (uintptr_t)((uint8_t *)ehdr + shdr[j].sh_offset); break; } - if (ksymtab == 0 || kstrtab == 0) { + if (ksymtab == 0 || kstrtab == 0) xc_printf( "Unable to load ELF symtab: could not find symtab or strtab\n"); - return; - } } #endif static caddr_t -xen_pv_parse_preload_data(u_int64_t modulep) +xen_legacy_pvh_parse_preload_data(uint64_t modulep) { caddr_t kmdp; vm_ooffset_t off; vm_paddr_t metadata; char *envp; if (legacy_start_info->mod_start != 0) { preload_metadata = (caddr_t)legacy_start_info->mod_start; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); KASSERT(kmdp != NULL, ("unable to find kernel")); /* * Xen has relocated the metadata and the modules, * so we need to recalculate it's position. This is * done by saving the original modulep address and * then calculating the offset with mod_start, * which contains the relocated modulep address. */ metadata = MD_FETCH(kmdp, MODINFOMD_MODULEP, vm_paddr_t); off = legacy_start_info->mod_start - metadata; preload_bootstrap_relocate(off); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); if (envp != NULL) envp += off; - init_static_kenv(envp, 0); + xen_pvh_set_env(envp, NULL); } else { /* Parse the extra boot information given by Xen */ - xen_pv_set_env(); - boothowto |= boot_env_to_howto(); + boot_parse_cmdline_delim(legacy_start_info->cmd_line, ","); kmdp = NULL; } + boothowto |= boot_env_to_howto(); + #ifdef DDB - xen_pv_parse_symtab(); + xen_pvh_parse_symtab(); #endif return (kmdp); } +static caddr_t +xen_pvh_parse_preload_data(uint64_t modulep) +{ + caddr_t kmdp; + vm_ooffset_t off; + vm_paddr_t metadata; + char *envp; + char acpi_rsdp[19]; + + if (start_info->modlist_paddr != 0) { + struct hvm_modlist_entry *mod; + + mod = (struct hvm_modlist_entry *) + (start_info->modlist_paddr + KERNBASE); + preload_metadata = (caddr_t)(mod[0].paddr + KERNBASE); + + kmdp = preload_search_by_type("elf kernel"); + if (kmdp == NULL) + kmdp = preload_search_by_type("elf64 kernel"); + KASSERT(kmdp != NULL, ("unable to find kernel")); + + /* + * Xen has relocated the metadata and the modules, + * so we need to recalculate it's position. This is + * done by saving the original modulep address and + * then calculating the offset with mod_start, + * which contains the relocated modulep address. + */ + metadata = MD_FETCH(kmdp, MODINFOMD_MODULEP, vm_paddr_t); + off = mod[0].paddr + KERNBASE - metadata; + + preload_bootstrap_relocate(off); + + boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); + envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); + if (envp != NULL) + envp += off; + xen_pvh_set_env(envp, reject_option); + } else { + /* Parse the extra boot information given by Xen */ + if (start_info->cmdline_paddr != 0) + boot_parse_cmdline_delim( + (char *)(start_info->cmdline_paddr + KERNBASE), + ","); + kmdp = NULL; + } + + boothowto |= boot_env_to_howto(); + + snprintf(acpi_rsdp, sizeof(acpi_rsdp), "%#" PRIx64, + start_info->rsdp_paddr); + kern_setenv("acpi.rsdp", acpi_rsdp); + +#ifdef DDB + xen_pvh_parse_symtab(); +#endif + return (kmdp); +} + static void -xen_pv_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) +xen_pvh_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) { struct xen_memory_map memmap; u_int32_t size; int rc; /* Fetch the E820 map from Xen */ memmap.nr_entries = MAX_E820_ENTRIES; set_xen_guest_handle(memmap.buffer, xen_smap); rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); - if (rc) - panic("unable to fetch Xen E820 memory map"); + if (rc) { + xc_printf("ERROR: unable to fetch Xen E820 memory map: %d\n", + rc); + HYPERVISOR_shutdown(SHUTDOWN_crash); + } + size = memmap.nr_entries * sizeof(xen_smap[0]); bios_add_smap_entries(xen_smap, size, physmap, physmap_idx); } Index: head/sys/xen/hvm.h =================================================================== --- head/sys/xen/hvm.h (revision 336473) +++ head/sys/xen/hvm.h (revision 336474) @@ -1,105 +1,108 @@ /* * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * $FreeBSD$ */ #ifndef __XEN_HVM_H__ #define __XEN_HVM_H__ #include #include #include /** * \brief Wrapper function to obtain a HVM parameter value. * * \param index HVM parameter index; see . * * \returns 0 on failure; the value of the parameter otherwise. */ static inline unsigned long hvm_get_parameter(int index) { struct xen_hvm_param xhv; int error; xhv.domid = DOMID_SELF; xhv.index = index; error = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); if (error) { printf("%s: error %d trying to get %d\n", __func__, error, index); return (0); } return (xhv.value); } /** The callback method types for Hypervisor event delivery to our domain. */ enum { HVM_CB_TYPE_GSI, HVM_CB_TYPE_PCI_INTX, HVM_CB_TYPE_VECTOR, HVM_CB_TYPE_MASK = 0xFF, HVM_CB_TYPE_SHIFT = 56 }; /** Format for specifying a GSI type callback. */ enum { HVM_CB_GSI_GSI_MASK = 0xFFFFFFFF, HVM_CB_GSI_GSI_SHIFT = 0 }; #define HVM_CALLBACK_GSI(gsi) \ (((uint64_t)HVM_CB_TYPE_GSI << HVM_CB_TYPE_SHIFT) \ | ((gsi) & HVM_CB_GSI_GSI_MASK) << HVM_CB_GSI_GSI_SHIFT) /** Format for specifying a virtual PCI interrupt line GSI style callback. */ enum { HVM_CB_PCI_INTX_INTPIN_MASK = 0x3, HVM_CB_PCI_INTX_INTPIN_SHIFT = 0, HVM_CB_PCI_INTX_SLOT_MASK = 0x1F, HVM_CB_PCI_INTX_SLOT_SHIFT = 11, }; #define HVM_CALLBACK_PCI_INTX(slot, pin) \ (((uint64_t)HVM_CB_TYPE_PCI_INTX << HVM_CB_TYPE_SHIFT) \ | (((slot) & HVM_CB_PCI_INTX_SLOT_MASK) << HVM_CB_PCI_INTX_SLOT_SHIFT) \ | (((pin) & HVM_CB_PCI_INTX_INTPIN_MASK) << HVM_CB_PCI_INTX_INTPIN_SHIFT)) /** Format for specifying a direct IDT vector injection style callback. */ enum { HVM_CB_VECTOR_VECTOR_MASK = 0xFFFFFFFF, HVM_CB_VECTOR_VECTOR_SHIFT = 0 }; #define HVM_CALLBACK_VECTOR(vector) \ (((uint64_t)HVM_CB_TYPE_VECTOR << HVM_CB_TYPE_SHIFT) \ | (((vector) & HVM_CB_GSI_GSI_MASK) << HVM_CB_GSI_GSI_SHIFT)) enum xen_hvm_init_type { XEN_HVM_INIT_EARLY, XEN_HVM_INIT_LATE, XEN_HVM_INIT_CANCELLED_SUSPEND, XEN_HVM_INIT_RESUME, }; int xen_hvm_init_hypercall_stubs(enum xen_hvm_init_type); void xen_hvm_set_callback(device_t); void xen_hvm_suspend(void); void xen_hvm_resume(bool suspend_cancelled); + +extern uint32_t hvm_start_flags; + #endif /* __XEN_HVM_H__ */