Changeset View
Standalone View
sys/x86/xen/pv.c
Show First 20 Lines • Show All 70 Lines • ▼ Show 20 Lines | |||||
#include <machine/cpu.h> | #include <machine/cpu.h> | ||||
#include <xen/xen-os.h> | #include <xen/xen-os.h> | ||||
#include <xen/hvm.h> | #include <xen/hvm.h> | ||||
#include <xen/hypervisor.h> | #include <xen/hypervisor.h> | ||||
#include <xen/xenstore/xenstorevar.h> | #include <xen/xenstore/xenstorevar.h> | ||||
#include <xen/xen_pv.h> | #include <xen/xen_pv.h> | ||||
#include <contrib/xen/arch-x86/cpuid.h> | |||||
#include <contrib/xen/arch-x86/hvm/start_info.h> | #include <contrib/xen/arch-x86/hvm/start_info.h> | ||||
#include <contrib/xen/vcpu.h> | #include <contrib/xen/vcpu.h> | ||||
#include <dev/xen/timer/timer.h> | #include <dev/xen/timer/timer.h> | ||||
#ifdef DDB | #ifdef DDB | ||||
#include <ddb/ddb.h> | #include <ddb/ddb.h> | ||||
#endif | #endif | ||||
Show All 25 Lines | |||||
}; | }; | ||||
static struct bios_smap xen_smap[MAX_E820_ENTRIES]; | static struct bios_smap xen_smap[MAX_E820_ENTRIES]; | ||||
static struct hvm_start_info *start_info; | static struct hvm_start_info *start_info; | ||||
/*-------------------------------- Xen PV init -------------------------------*/ | /*-------------------------------- Xen PV init -------------------------------*/ | ||||
static int | |||||
royger: nit: bool would be better here, or the usage of the function below would have to use 'isxen() ! | |||||
isxen(void) | |||||
{ | |||||
static int xen = -1; | |||||
Not Done Inline ActionsI think we might want to somehow cache whether we are running on Xen or not, as a CPUID instruction will trigger a vmexit and that's costly. Using a static local variable would be fine IMO. royger: I think we might want to somehow cache whether we are running on Xen or not, as a CPUID… | |||||
Done Inline ActionsI don't know that we call this enough times (assuming we're not crashing, between 2 and 5 times) to matter, but I've added the optimization. cperciva: I don't know that we call this enough times (assuming we're not crashing, between 2 and 5… | |||||
uint32_t base; | |||||
u_int regs[4]; | |||||
if (xen != -1) | |||||
return (xen); | |||||
/* | |||||
* The full code for identifying which hypervisor we're running under | |||||
Not Done Inline ActionsYou need to check CPUID range 0x40000000 to 0x40010000, as when Xen is exposing Viridian (HyperV) extensions the signature at 0x40000000 will be the HyperV one. See xen_hvm_cpuid_base() in hvm.c royger: You need to check CPUID range 0x40000000 to 0x40010000, as when Xen is exposing Viridian… | |||||
Done Inline ActionsUgh, ok. I didn't realize that Xen could PVH boot it while pretending to be something other than Xen. cperciva: Ugh, ok. I didn't realize that Xen could PVH boot it while pretending to be something other… | |||||
* is in sys/x86/x86/identcpu.c and runs later in the boot process; | |||||
* this is sufficient to distinguish Xen PVH booting from non-Xen PVH | |||||
* and skip some very early Xen-specific code in the non-Xen case. | |||||
*/ | |||||
xen = 0; | |||||
for (base = 0x40000000; base < 0x40010000; base += 0x100) { | |||||
do_cpuid(base, regs); | |||||
if (regs[1] == XEN_CPUID_SIGNATURE_EBX && | |||||
regs[2] == XEN_CPUID_SIGNATURE_ECX && | |||||
regs[3] == XEN_CPUID_SIGNATURE_EDX) { | |||||
xen = 1; | |||||
break; | |||||
} | |||||
} | |||||
Not Done Inline ActionsI guess there's no way to print any kind of message to the console in the firecracker case here? royger: I guess there's no way to print any kind of message to the console in the firecracker case here? | |||||
Done Inline ActionsCorrect, unless we want to assume there's a UART and do a bunch of outb blindly. cperciva: Correct, unless we want to assume there's a UART and do a bunch of `outb` blindly. | |||||
return (xen); | |||||
} | |||||
#define CRASH(...) do { \ | |||||
if (isxen()) { \ | |||||
xc_printf(__VA_ARGS__); \ | |||||
HYPERVISOR_shutdown(SHUTDOWN_crash); \ | |||||
} else { \ | |||||
halt(); \ | |||||
} \ | |||||
} while (0) | |||||
uint64_t | uint64_t | ||||
hammer_time_xen(vm_paddr_t start_info_paddr) | hammer_time_xen(vm_paddr_t start_info_paddr) | ||||
Not Done Inline ActionsYou might want to rename to hammer_time_pvh() for clarity (since it's no longer Xen-specific) royger: You might want to rename to hammer_time_pvh() for clarity (since it's no longer Xen-specific) | |||||
Done Inline ActionsMy plan is to get things working first, then rename functions / move stuff between files / make it compile without options XENHVM / etc. later. cperciva: My plan is to get things working first, then rename functions / move stuff between files / make… | |||||
{ | { | ||||
struct hvm_modlist_entry *mod; | struct hvm_modlist_entry *mod; | ||||
struct xen_add_to_physmap xatp; | struct xen_add_to_physmap xatp; | ||||
uint64_t physfree; | uint64_t physfree; | ||||
char *kenv; | char *kenv; | ||||
int rc; | int rc; | ||||
if (isxen()) { | |||||
xen_domain_type = XEN_HVM_DOMAIN; | xen_domain_type = XEN_HVM_DOMAIN; | ||||
vm_guest = VM_GUEST_XEN; | vm_guest = VM_GUEST_XEN; | ||||
rc = xen_hvm_init_hypercall_stubs(XEN_HVM_INIT_EARLY); | rc = xen_hvm_init_hypercall_stubs(XEN_HVM_INIT_EARLY); | ||||
if (rc) { | if (rc) { | ||||
xc_printf("ERROR: failed to initialize hypercall page: %d\n", | xc_printf("ERROR: failed to initialize hypercall page: %d\n", | ||||
rc); | rc); | ||||
HYPERVISOR_shutdown(SHUTDOWN_crash); | HYPERVISOR_shutdown(SHUTDOWN_crash); | ||||
} | } | ||||
} | |||||
start_info = (struct hvm_start_info *)(start_info_paddr + KERNBASE); | start_info = (struct hvm_start_info *)(start_info_paddr + KERNBASE); | ||||
if (start_info->magic != XEN_HVM_START_MAGIC_VALUE) { | if (start_info->magic != XEN_HVM_START_MAGIC_VALUE) { | ||||
xc_printf("Unknown magic value in start_info struct: %#x\n", | CRASH("Unknown magic value in start_info struct: %#x\n", | ||||
start_info->magic); | start_info->magic); | ||||
HYPERVISOR_shutdown(SHUTDOWN_crash); | |||||
} | } | ||||
/* | /* | ||||
* Select the higher address to use as physfree: either after | * Select the higher address to use as physfree: either after | ||||
* start_info, after the kernel, after the memory map or after any of | * start_info, after the kernel, after the memory map or after any of | ||||
* the modules. We assume enough memory to be available after the | * the modules. We assume enough memory to be available after the | ||||
* selected address for the needs of very early memory allocations. | * selected address for the needs of very early memory allocations. | ||||
*/ | */ | ||||
physfree = roundup2(start_info_paddr + sizeof(struct hvm_start_info), | physfree = roundup2(start_info_paddr + sizeof(struct hvm_start_info), | ||||
PAGE_SIZE); | PAGE_SIZE); | ||||
physfree = MAX(roundup2((vm_paddr_t)_end - KERNBASE, PAGE_SIZE), | physfree = MAX(roundup2((vm_paddr_t)_end - KERNBASE, PAGE_SIZE), | ||||
physfree); | physfree); | ||||
if (start_info->memmap_paddr != 0) | if (start_info->memmap_paddr != 0) | ||||
physfree = MAX(roundup2(start_info->memmap_paddr + | physfree = MAX(roundup2(start_info->memmap_paddr + | ||||
start_info->memmap_entries * | start_info->memmap_entries * | ||||
sizeof(struct hvm_memmap_table_entry), PAGE_SIZE), | sizeof(struct hvm_memmap_table_entry), PAGE_SIZE), | ||||
physfree); | physfree); | ||||
if (start_info->modlist_paddr != 0) { | if (start_info->modlist_paddr != 0) { | ||||
unsigned int i; | unsigned int i; | ||||
if (start_info->nr_modules == 0) { | if (start_info->nr_modules == 0) { | ||||
xc_printf( | CRASH( | ||||
"ERROR: modlist_paddr != 0 but nr_modules == 0\n"); | "ERROR: modlist_paddr != 0 but nr_modules == 0\n"); | ||||
HYPERVISOR_shutdown(SHUTDOWN_crash); | |||||
} | } | ||||
mod = (struct hvm_modlist_entry *) | mod = (struct hvm_modlist_entry *) | ||||
(start_info->modlist_paddr + KERNBASE); | (start_info->modlist_paddr + KERNBASE); | ||||
for (i = 0; i < start_info->nr_modules; i++) | for (i = 0; i < start_info->nr_modules; i++) | ||||
physfree = MAX(roundup2(mod[i].paddr + mod[i].size, | physfree = MAX(roundup2(mod[i].paddr + mod[i].size, | ||||
PAGE_SIZE), physfree); | PAGE_SIZE), physfree); | ||||
} | } | ||||
if (isxen()) { | |||||
xatp.domid = DOMID_SELF; | xatp.domid = DOMID_SELF; | ||||
xatp.idx = 0; | xatp.idx = 0; | ||||
xatp.space = XENMAPSPACE_shared_info; | xatp.space = XENMAPSPACE_shared_info; | ||||
xatp.gpfn = atop(physfree); | xatp.gpfn = atop(physfree); | ||||
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) { | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) { | ||||
xc_printf("ERROR: failed to setup shared_info page\n"); | xc_printf("ERROR: failed to setup shared_info page\n"); | ||||
HYPERVISOR_shutdown(SHUTDOWN_crash); | HYPERVISOR_shutdown(SHUTDOWN_crash); | ||||
} | } | ||||
HYPERVISOR_shared_info = (shared_info_t *)(physfree + KERNBASE); | HYPERVISOR_shared_info = (shared_info_t *)(physfree + KERNBASE); | ||||
physfree += PAGE_SIZE; | physfree += PAGE_SIZE; | ||||
} | |||||
/* | /* | ||||
* Init a static kenv using a free page. The contents will be filled | * Init a static kenv using a free page. The contents will be filled | ||||
* from the parse_preload_data hook. | * from the parse_preload_data hook. | ||||
*/ | */ | ||||
kenv = (void *)(physfree + KERNBASE); | kenv = (void *)(physfree + KERNBASE); | ||||
physfree += PAGE_SIZE; | physfree += PAGE_SIZE; | ||||
bzero_early(kenv, PAGE_SIZE); | bzero_early(kenv, PAGE_SIZE); | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | while (*option != 0) { | ||||
if (filter != NULL && filter(option)) { | if (filter != NULL && filter(option)) { | ||||
option += strlen(option) + 1; | option += strlen(option) + 1; | ||||
continue; | continue; | ||||
} | } | ||||
value = option; | value = option; | ||||
option = strsep(&value, "="); | option = strsep(&value, "="); | ||||
if (kern_setenv(option, value) != 0) | if (kern_setenv(option, value) != 0 && isxen()) | ||||
xc_printf("unable to add kenv %s=%s\n", option, value); | xc_printf("unable to add kenv %s=%s\n", option, value); | ||||
option = value + strlen(value) + 1; | option = value + strlen(value) + 1; | ||||
} | } | ||||
} | } | ||||
#ifdef DDB | #ifdef DDB | ||||
/* | /* | ||||
* The way Xen loads the symtab is different from the native boot loader, | * The way Xen loads the symtab is different from the native boot loader, | ||||
* because it's tailored for NetBSD. So we have to adapt and use the same | * because it's tailored for NetBSD. So we have to adapt and use the same | ||||
* method as NetBSD. Portions of the code below have been picked from NetBSD: | * method as NetBSD. Portions of the code below have been picked from NetBSD: | ||||
* sys/kern/kern_ksyms.c CVS Revision 1.71. | * sys/kern/kern_ksyms.c CVS Revision 1.71. | ||||
*/ | */ | ||||
static void | static void | ||||
xen_pvh_parse_symtab(void) | xen_pvh_parse_symtab(void) | ||||
Not Done Inline ActionsThis is very specific to how Xen loads the symtab for BSDs, which is unlikely to be useful for firecracker (as I guess the loader there doesn't load the symtab at all?) royger: This is very specific to how Xen loads the symtab for BSDs, which is unlikely to be useful for… | |||||
Done Inline ActionsAt present I don't think the symbols get loaded at all. Theoretically a future Firecracker might load the entire kernel file including symbols? But yes, Firecracker currently hits the "Unable to load ELF symtab" and returns quickly. cperciva: At present I don't think the symbols get loaded at all. Theoretically a future Firecracker… | |||||
{ | { | ||||
Elf_Ehdr *ehdr; | Elf_Ehdr *ehdr; | ||||
Elf_Shdr *shdr; | Elf_Shdr *shdr; | ||||
int i, j; | int i, j; | ||||
ehdr = (Elf_Ehdr *)(&end + 1); | ehdr = (Elf_Ehdr *)(&end + 1); | ||||
if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) || | if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) || | ||||
ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || | ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || | ||||
ehdr->e_version > 1) { | ehdr->e_version > 1) { | ||||
if (isxen()) | |||||
xc_printf("Unable to load ELF symtab: invalid symbol table\n"); | xc_printf("Unable to load ELF symtab: invalid symbol table\n"); | ||||
return; | return; | ||||
} | } | ||||
shdr = (Elf_Shdr *)((uint8_t *)ehdr + ehdr->e_shoff); | shdr = (Elf_Shdr *)((uint8_t *)ehdr + ehdr->e_shoff); | ||||
/* Find the symbol table and the corresponding string table. */ | /* Find the symbol table and the corresponding string table. */ | ||||
for (i = 1; i < ehdr->e_shnum; i++) { | for (i = 1; i < ehdr->e_shnum; i++) { | ||||
if (shdr[i].sh_type != SHT_SYMTAB) | if (shdr[i].sh_type != SHT_SYMTAB) | ||||
continue; | continue; | ||||
if (shdr[i].sh_offset == 0) | if (shdr[i].sh_offset == 0) | ||||
continue; | continue; | ||||
ksymtab = (uintptr_t)((uint8_t *)ehdr + shdr[i].sh_offset); | ksymtab = (uintptr_t)((uint8_t *)ehdr + shdr[i].sh_offset); | ||||
ksymtab_size = shdr[i].sh_size; | ksymtab_size = shdr[i].sh_size; | ||||
j = shdr[i].sh_link; | j = shdr[i].sh_link; | ||||
if (shdr[j].sh_offset == 0) | if (shdr[j].sh_offset == 0) | ||||
continue; /* Can this happen? */ | continue; /* Can this happen? */ | ||||
kstrtab = (uintptr_t)((uint8_t *)ehdr + shdr[j].sh_offset); | kstrtab = (uintptr_t)((uint8_t *)ehdr + shdr[j].sh_offset); | ||||
break; | break; | ||||
} | } | ||||
if (ksymtab == 0 || kstrtab == 0) | if ((ksymtab == 0 || kstrtab == 0) && isxen()) | ||||
xc_printf( | xc_printf( | ||||
Not Done Inline ActionsYou can join with the previous condition to avoid adding more indentation to the line. royger: You can join with the previous condition to avoid adding more indentation to the line. | |||||
Done Inline ActionsFixed. cperciva: Fixed. | |||||
"Unable to load ELF symtab: could not find symtab or strtab\n"); | "Unable to load ELF symtab: could not find symtab or strtab\n"); | ||||
} | } | ||||
#endif | #endif | ||||
static caddr_t | static caddr_t | ||||
xen_pvh_parse_preload_data(uint64_t modulep) | xen_pvh_parse_preload_data(uint64_t modulep) | ||||
Not Done Inline ActionsAre you planning to use this function for firecracker? If so it would benefit from renaming also. Otherwise I would leave the xc_printf() calls as-is. royger: Are you planning to use this function for firecracker? If so it would benefit from renaming… | |||||
Done Inline ActionsI'm not sure. I'm using a two year old branch of Firecracker which added PVH support and I'm not 100% confident that they implemented everything. Right now we have modlist_paddr == 0 but I'm not sure that will always be the case after PVH support is merged into Firecracker's main branch. (In any case, renaming will come later.) cperciva: I'm not sure. I'm using a two year old branch of Firecracker which added PVH support and I'm… | |||||
{ | { | ||||
caddr_t kmdp; | caddr_t kmdp; | ||||
vm_ooffset_t off; | vm_ooffset_t off; | ||||
vm_paddr_t metadata; | vm_paddr_t metadata; | ||||
char *envp; | char *envp; | ||||
char acpi_rsdp[19]; | char acpi_rsdp[19]; | ||||
if (start_info->modlist_paddr != 0) { | if (start_info->modlist_paddr != 0) { | ||||
▲ Show 20 Lines • Show All 117 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
static void | static void | ||||
xen_pvh_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) | xen_pvh_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) | ||||
{ | { | ||||
struct xen_memory_map memmap; | struct xen_memory_map memmap; | ||||
u_int32_t size; | u_int32_t size; | ||||
int rc; | int rc; | ||||
/* We should only reach here if we're running under Xen. */ | |||||
KASSERT(isxen(), ("xen_pvh_parse_memmap reached when !Xen")); | |||||
/* Fetch the E820 map from Xen */ | /* Fetch the E820 map from Xen */ | ||||
memmap.nr_entries = MAX_E820_ENTRIES; | memmap.nr_entries = MAX_E820_ENTRIES; | ||||
set_xen_guest_handle(memmap.buffer, xen_smap); | set_xen_guest_handle(memmap.buffer, xen_smap); | ||||
rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); | rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); | ||||
if (rc) { | if (rc) { | ||||
xc_printf("ERROR: unable to fetch Xen E820 memory map: %d\n", | xc_printf("ERROR: unable to fetch Xen E820 memory map: %d\n", | ||||
Not Done Inline ActionsThere no need to replace this one, as the previous call is already a Xen specific hypercall, so if you get here for other hypervisors it's likely you won't be able to reach the CRASH() anyway due to jumping into an uninitialized hypercall page. royger: There no need to replace this one, as the previous call is already a Xen specific hypercall, so… | |||||
Done Inline ActionsQuite right. Sorry, that was a search-and-replace which went too far. cperciva: Quite right. Sorry, that was a search-and-replace which went too far. | |||||
rc); | rc); | ||||
HYPERVISOR_shutdown(SHUTDOWN_crash); | HYPERVISOR_shutdown(SHUTDOWN_crash); | ||||
} | } | ||||
size = memmap.nr_entries * sizeof(xen_smap[0]); | size = memmap.nr_entries * sizeof(xen_smap[0]); | ||||
bios_add_smap_entries(xen_smap, size, physmap, physmap_idx); | bios_add_smap_entries(xen_smap, size, physmap, physmap_idx); | ||||
} | } | ||||
Show All 15 Lines |
nit: bool would be better here, or the usage of the function below would have to use 'isxen() != 0' because coding style states: "Do not use ! for tests unless it is a boolean, ...".