Changeset View
Standalone View
sys/x86/xen/pv.c
Show First 20 Lines • Show All 70 Lines • ▼ Show 20 Lines | |||||
#include <machine/cpu.h> | #include <machine/cpu.h> | ||||
#include <xen/xen-os.h> | #include <xen/xen-os.h> | ||||
#include <xen/hvm.h> | #include <xen/hvm.h> | ||||
#include <xen/hypervisor.h> | #include <xen/hypervisor.h> | ||||
#include <xen/xenstore/xenstorevar.h> | #include <xen/xenstore/xenstorevar.h> | ||||
#include <xen/xen_pv.h> | #include <xen/xen_pv.h> | ||||
#include <contrib/xen/arch-x86/cpuid.h> | |||||
#include <contrib/xen/arch-x86/hvm/start_info.h> | #include <contrib/xen/arch-x86/hvm/start_info.h> | ||||
#include <contrib/xen/vcpu.h> | #include <contrib/xen/vcpu.h> | ||||
#include <dev/xen/timer/timer.h> | #include <dev/xen/timer/timer.h> | ||||
#ifdef DDB | #ifdef DDB | ||||
#include <ddb/ddb.h> | #include <ddb/ddb.h> | ||||
#endif | #endif | ||||
Show All 25 Lines | |||||
}; | }; | ||||
static struct bios_smap xen_smap[MAX_E820_ENTRIES]; | static struct bios_smap xen_smap[MAX_E820_ENTRIES]; | ||||
static struct hvm_start_info *start_info; | static struct hvm_start_info *start_info; | ||||
/*-------------------------------- Xen PV init -------------------------------*/ | /*-------------------------------- Xen PV init -------------------------------*/ | ||||
static int | |||||
royger: nit: bool would be better here, or the usage of the function below would have to use 'isxen() ! | |||||
isxen(void) | |||||
{ | |||||
u_int regs[4]; | |||||
roygerUnsubmitted Not Done Inline ActionsI think we might want to somehow cache whether we are running on Xen or not, as a CPUID instruction will trigger a vmexit and that's costly. Using a static local variable would be fine IMO. royger: I think we might want to somehow cache whether we are running on Xen or not, as a CPUID… | |||||
cpercivaAuthorUnsubmitted Done Inline ActionsI don't know that we call this enough times (assuming we're not crashing, between 2 and 5 times) to matter, but I've added the optimization. cperciva: I don't know that we call this enough times (assuming we're not crashing, between 2 and 5… | |||||
/* | |||||
* The full code for identifying which hypervisor we're running under | |||||
* is in sys/x86/x86/identcpu.c and runs later in the boot process; | |||||
* this is sufficient to distinguish Xen PVH booting from non-Xen PVH | |||||
* and skip some very early Xen-specific code in the non-Xen case. | |||||
*/ | |||||
do_cpuid(0x40000000, regs); | |||||
roygerUnsubmitted Not Done Inline ActionsYou need to check CPUID range 0x40000000 to 0x40010000, as when Xen is exposing Viridian (HyperV) extensions the signature at 0x40000000 will be the HyperV one. See xen_hvm_cpuid_base() in hvm.c royger: You need to check CPUID range 0x40000000 to 0x40010000, as when Xen is exposing Viridian… | |||||
cpercivaAuthorUnsubmitted Done Inline ActionsUgh, ok. I didn't realize that Xen could PVH boot it while pretending to be something other than Xen. cperciva: Ugh, ok. I didn't realize that Xen could PVH boot it while pretending to be something other… | |||||
if (regs[1] == XEN_CPUID_SIGNATURE_EBX && | |||||
regs[2] == XEN_CPUID_SIGNATURE_ECX && | |||||
regs[3] == XEN_CPUID_SIGNATURE_EDX) | |||||
return (1); | |||||
else | |||||
return (0); | |||||
} | |||||
#define CRASH(...) do { \ | |||||
if (isxen()) { \ | |||||
xc_printf(__VA_ARGS__); \ | |||||
HYPERVISOR_shutdown(SHUTDOWN_crash); \ | |||||
} else { \ | |||||
halt(); \ | |||||
roygerUnsubmitted Not Done Inline ActionsI guess there's no way to print any kind of message to the console in the firecracker case here? royger: I guess there's no way to print any kind of message to the console in the firecracker case here? | |||||
cpercivaAuthorUnsubmitted Done Inline ActionsCorrect, unless we want to assume there's a UART and do a bunch of outb blindly. cperciva: Correct, unless we want to assume there's a UART and do a bunch of `outb` blindly. | |||||
} \ | |||||
} while (0) | |||||
uint64_t | uint64_t | ||||
hammer_time_xen(vm_paddr_t start_info_paddr) | hammer_time_xen(vm_paddr_t start_info_paddr) | ||||
roygerUnsubmitted Not Done Inline ActionsYou might want to rename to hammer_time_pvh() for clarity (since it's no longer Xen-specific) royger: You might want to rename to hammer_time_pvh() for clarity (since it's no longer Xen-specific) | |||||
cpercivaAuthorUnsubmitted Done Inline ActionsMy plan is to get things working first, then rename functions / move stuff between files / make it compile without options XENHVM / etc. later. cperciva: My plan is to get things working first, then rename functions / move stuff between files / make… | |||||
{ | { | ||||
struct hvm_modlist_entry *mod; | struct hvm_modlist_entry *mod; | ||||
struct xen_add_to_physmap xatp; | struct xen_add_to_physmap xatp; | ||||
uint64_t physfree; | uint64_t physfree; | ||||
char *kenv; | char *kenv; | ||||
int rc; | int rc; | ||||
if (isxen()) { | |||||
xen_domain_type = XEN_HVM_DOMAIN; | xen_domain_type = XEN_HVM_DOMAIN; | ||||
vm_guest = VM_GUEST_XEN; | vm_guest = VM_GUEST_XEN; | ||||
rc = xen_hvm_init_hypercall_stubs(XEN_HVM_INIT_EARLY); | rc = xen_hvm_init_hypercall_stubs(XEN_HVM_INIT_EARLY); | ||||
if (rc) { | if (rc) { | ||||
xc_printf("ERROR: failed to initialize hypercall page: %d\n", | CRASH("ERROR: failed to initialize hypercall page: %d\n", | ||||
rc); | rc); | ||||
HYPERVISOR_shutdown(SHUTDOWN_crash); | |||||
} | } | ||||
} | |||||
start_info = (struct hvm_start_info *)(start_info_paddr + KERNBASE); | start_info = (struct hvm_start_info *)(start_info_paddr + KERNBASE); | ||||
if (start_info->magic != XEN_HVM_START_MAGIC_VALUE) { | if (start_info->magic != XEN_HVM_START_MAGIC_VALUE) { | ||||
xc_printf("Unknown magic value in start_info struct: %#x\n", | CRASH("Unknown magic value in start_info struct: %#x\n", | ||||
start_info->magic); | start_info->magic); | ||||
HYPERVISOR_shutdown(SHUTDOWN_crash); | |||||
} | } | ||||
/* | /* | ||||
* Select the higher address to use as physfree: either after | * Select the higher address to use as physfree: either after | ||||
* start_info, after the kernel, after the memory map or after any of | * start_info, after the kernel, after the memory map or after any of | ||||
* the modules. We assume enough memory to be available after the | * the modules. We assume enough memory to be available after the | ||||
* selected address for the needs of very early memory allocations. | * selected address for the needs of very early memory allocations. | ||||
*/ | */ | ||||
physfree = roundup2(start_info_paddr + sizeof(struct hvm_start_info), | physfree = roundup2(start_info_paddr + sizeof(struct hvm_start_info), | ||||
PAGE_SIZE); | PAGE_SIZE); | ||||
physfree = MAX(roundup2((vm_paddr_t)_end - KERNBASE, PAGE_SIZE), | physfree = MAX(roundup2((vm_paddr_t)_end - KERNBASE, PAGE_SIZE), | ||||
physfree); | physfree); | ||||
if (start_info->memmap_paddr != 0) | if (start_info->memmap_paddr != 0) | ||||
physfree = MAX(roundup2(start_info->memmap_paddr + | physfree = MAX(roundup2(start_info->memmap_paddr + | ||||
start_info->memmap_entries * | start_info->memmap_entries * | ||||
sizeof(struct hvm_memmap_table_entry), PAGE_SIZE), | sizeof(struct hvm_memmap_table_entry), PAGE_SIZE), | ||||
physfree); | physfree); | ||||
if (start_info->modlist_paddr != 0) { | if (start_info->modlist_paddr != 0) { | ||||
unsigned int i; | unsigned int i; | ||||
if (start_info->nr_modules == 0) { | if (start_info->nr_modules == 0) { | ||||
xc_printf( | CRASH( | ||||
"ERROR: modlist_paddr != 0 but nr_modules == 0\n"); | "ERROR: modlist_paddr != 0 but nr_modules == 0\n"); | ||||
HYPERVISOR_shutdown(SHUTDOWN_crash); | |||||
} | } | ||||
mod = (struct hvm_modlist_entry *) | mod = (struct hvm_modlist_entry *) | ||||
(start_info->modlist_paddr + KERNBASE); | (start_info->modlist_paddr + KERNBASE); | ||||
for (i = 0; i < start_info->nr_modules; i++) | for (i = 0; i < start_info->nr_modules; i++) | ||||
physfree = MAX(roundup2(mod[i].paddr + mod[i].size, | physfree = MAX(roundup2(mod[i].paddr + mod[i].size, | ||||
PAGE_SIZE), physfree); | PAGE_SIZE), physfree); | ||||
} | } | ||||
if (isxen()) { | |||||
xatp.domid = DOMID_SELF; | xatp.domid = DOMID_SELF; | ||||
xatp.idx = 0; | xatp.idx = 0; | ||||
xatp.space = XENMAPSPACE_shared_info; | xatp.space = XENMAPSPACE_shared_info; | ||||
xatp.gpfn = atop(physfree); | xatp.gpfn = atop(physfree); | ||||
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) { | if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) | ||||
xc_printf("ERROR: failed to setup shared_info page\n"); | CRASH("ERROR: failed to setup shared_info page\n"); | ||||
HYPERVISOR_shutdown(SHUTDOWN_crash); | |||||
} | |||||
HYPERVISOR_shared_info = (shared_info_t *)(physfree + KERNBASE); | HYPERVISOR_shared_info = (shared_info_t *)(physfree + KERNBASE); | ||||
physfree += PAGE_SIZE; | physfree += PAGE_SIZE; | ||||
} | |||||
/* | /* | ||||
* Init a static kenv using a free page. The contents will be filled | * Init a static kenv using a free page. The contents will be filled | ||||
* from the parse_preload_data hook. | * from the parse_preload_data hook. | ||||
*/ | */ | ||||
kenv = (void *)(physfree + KERNBASE); | kenv = (void *)(physfree + KERNBASE); | ||||
physfree += PAGE_SIZE; | physfree += PAGE_SIZE; | ||||
bzero_early(kenv, PAGE_SIZE); | bzero_early(kenv, PAGE_SIZE); | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | while (*option != 0) { | ||||
if (filter != NULL && filter(option)) { | if (filter != NULL && filter(option)) { | ||||
option += strlen(option) + 1; | option += strlen(option) + 1; | ||||
continue; | continue; | ||||
} | } | ||||
value = option; | value = option; | ||||
option = strsep(&value, "="); | option = strsep(&value, "="); | ||||
if (kern_setenv(option, value) != 0) | if (kern_setenv(option, value) != 0 && isxen()) | ||||
xc_printf("unable to add kenv %s=%s\n", option, value); | xc_printf("unable to add kenv %s=%s\n", option, value); | ||||
option = value + strlen(value) + 1; | option = value + strlen(value) + 1; | ||||
} | } | ||||
} | } | ||||
#ifdef DDB | #ifdef DDB | ||||
/* | /* | ||||
* The way Xen loads the symtab is different from the native boot loader, | * The way Xen loads the symtab is different from the native boot loader, | ||||
* because it's tailored for NetBSD. So we have to adapt and use the same | * because it's tailored for NetBSD. So we have to adapt and use the same | ||||
* method as NetBSD. Portions of the code below have been picked from NetBSD: | * method as NetBSD. Portions of the code below have been picked from NetBSD: | ||||
* sys/kern/kern_ksyms.c CVS Revision 1.71. | * sys/kern/kern_ksyms.c CVS Revision 1.71. | ||||
*/ | */ | ||||
static void | static void | ||||
xen_pvh_parse_symtab(void) | xen_pvh_parse_symtab(void) | ||||
roygerUnsubmitted Not Done Inline ActionsThis is very specific to how Xen loads the symtab for BSDs, which is unlikely to be useful for firecracker (as I guess the loader there doesn't load the symtab at all?) royger: This is very specific to how Xen loads the symtab for BSDs, which is unlikely to be useful for… | |||||
cpercivaAuthorUnsubmitted Done Inline ActionsAt present I don't think the symbols get loaded at all. Theoretically a future Firecracker might load the entire kernel file including symbols? But yes, Firecracker currently hits the "Unable to load ELF symtab" and returns quickly. cperciva: At present I don't think the symbols get loaded at all. Theoretically a future Firecracker… | |||||
{ | { | ||||
Elf_Ehdr *ehdr; | Elf_Ehdr *ehdr; | ||||
Elf_Shdr *shdr; | Elf_Shdr *shdr; | ||||
int i, j; | int i, j; | ||||
ehdr = (Elf_Ehdr *)(&end + 1); | ehdr = (Elf_Ehdr *)(&end + 1); | ||||
if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) || | if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) || | ||||
ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || | ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || | ||||
ehdr->e_version > 1) { | ehdr->e_version > 1) { | ||||
if (isxen()) | |||||
xc_printf("Unable to load ELF symtab: invalid symbol table\n"); | xc_printf("Unable to load ELF symtab: invalid symbol table\n"); | ||||
return; | return; | ||||
} | } | ||||
shdr = (Elf_Shdr *)((uint8_t *)ehdr + ehdr->e_shoff); | shdr = (Elf_Shdr *)((uint8_t *)ehdr + ehdr->e_shoff); | ||||
/* Find the symbol table and the corresponding string table. */ | /* Find the symbol table and the corresponding string table. */ | ||||
for (i = 1; i < ehdr->e_shnum; i++) { | for (i = 1; i < ehdr->e_shnum; i++) { | ||||
if (shdr[i].sh_type != SHT_SYMTAB) | if (shdr[i].sh_type != SHT_SYMTAB) | ||||
continue; | continue; | ||||
if (shdr[i].sh_offset == 0) | if (shdr[i].sh_offset == 0) | ||||
continue; | continue; | ||||
ksymtab = (uintptr_t)((uint8_t *)ehdr + shdr[i].sh_offset); | ksymtab = (uintptr_t)((uint8_t *)ehdr + shdr[i].sh_offset); | ||||
ksymtab_size = shdr[i].sh_size; | ksymtab_size = shdr[i].sh_size; | ||||
j = shdr[i].sh_link; | j = shdr[i].sh_link; | ||||
if (shdr[j].sh_offset == 0) | if (shdr[j].sh_offset == 0) | ||||
continue; /* Can this happen? */ | continue; /* Can this happen? */ | ||||
kstrtab = (uintptr_t)((uint8_t *)ehdr + shdr[j].sh_offset); | kstrtab = (uintptr_t)((uint8_t *)ehdr + shdr[j].sh_offset); | ||||
break; | break; | ||||
} | } | ||||
if (ksymtab == 0 || kstrtab == 0) | if (ksymtab == 0 || kstrtab == 0) | ||||
if (isxen()) | |||||
roygerUnsubmitted Not Done Inline ActionsYou can join with the previous condition to avoid adding more indentation to the line. royger: You can join with the previous condition to avoid adding more indentation to the line. | |||||
cpercivaAuthorUnsubmitted Done Inline ActionsFixed. cperciva: Fixed. | |||||
xc_printf( | xc_printf( | ||||
"Unable to load ELF symtab: could not find symtab or strtab\n"); | "Unable to load ELF symtab: could not find symtab or strtab\n"); | ||||
} | } | ||||
#endif | #endif | ||||
static caddr_t | static caddr_t | ||||
xen_pvh_parse_preload_data(uint64_t modulep) | xen_pvh_parse_preload_data(uint64_t modulep) | ||||
roygerUnsubmitted Not Done Inline ActionsAre you planning to use this function for firecracker? If so it would benefit from renaming also. Otherwise I would leave the xc_printf() calls as-is. royger: Are you planning to use this function for firecracker? If so it would benefit from renaming… | |||||
cpercivaAuthorUnsubmitted Done Inline ActionsI'm not sure. I'm using a two year old branch of Firecracker which added PVH support and I'm not 100% confident that they implemented everything. Right now we have modlist_paddr == 0 but I'm not sure that will always be the case after PVH support is merged into Firecracker's main branch. (In any case, renaming will come later.) cperciva: I'm not sure. I'm using a two year old branch of Firecracker which added PVH support and I'm… | |||||
{ | { | ||||
caddr_t kmdp; | caddr_t kmdp; | ||||
vm_ooffset_t off; | vm_ooffset_t off; | ||||
vm_paddr_t metadata; | vm_paddr_t metadata; | ||||
char *envp; | char *envp; | ||||
char acpi_rsdp[19]; | char acpi_rsdp[19]; | ||||
if (start_info->modlist_paddr != 0) { | if (start_info->modlist_paddr != 0) { | ||||
struct hvm_modlist_entry *mod; | struct hvm_modlist_entry *mod; | ||||
const char *cmdline; | const char *cmdline; | ||||
mod = (struct hvm_modlist_entry *) | mod = (struct hvm_modlist_entry *) | ||||
(start_info->modlist_paddr + KERNBASE); | (start_info->modlist_paddr + KERNBASE); | ||||
cmdline = mod[0].cmdline_paddr ? | cmdline = mod[0].cmdline_paddr ? | ||||
(const char *)(mod[0].cmdline_paddr + KERNBASE) : NULL; | (const char *)(mod[0].cmdline_paddr + KERNBASE) : NULL; | ||||
if (strcmp(cmdline, "header") == 0) { | if (strcmp(cmdline, "header") == 0) { | ||||
struct xen_header *header; | struct xen_header *header; | ||||
header = (struct xen_header *)(mod[0].paddr + KERNBASE); | header = (struct xen_header *)(mod[0].paddr + KERNBASE); | ||||
if ((header->flags & XENHEADER_HAS_MODULEP_OFFSET) != | if ((header->flags & XENHEADER_HAS_MODULEP_OFFSET) != | ||||
XENHEADER_HAS_MODULEP_OFFSET) { | XENHEADER_HAS_MODULEP_OFFSET) { | ||||
xc_printf("Unable to load module metadata\n"); | CRASH("Unable to load module metadata\n"); | ||||
HYPERVISOR_shutdown(SHUTDOWN_crash); | |||||
} | } | ||||
preload_metadata = (caddr_t)(mod[0].paddr + | preload_metadata = (caddr_t)(mod[0].paddr + | ||||
header->modulep_offset + KERNBASE); | header->modulep_offset + KERNBASE); | ||||
kmdp = preload_search_by_type("elf kernel"); | kmdp = preload_search_by_type("elf kernel"); | ||||
if (kmdp == NULL) | if (kmdp == NULL) | ||||
kmdp = preload_search_by_type("elf64 kernel"); | kmdp = preload_search_by_type("elf64 kernel"); | ||||
if (kmdp == NULL) { | if (kmdp == NULL) { | ||||
xc_printf("Unable to find kernel\n"); | CRASH("Unable to find kernel\n"); | ||||
HYPERVISOR_shutdown(SHUTDOWN_crash); | |||||
} | } | ||||
/* | /* | ||||
* Xen has relocated the metadata and the modules, so | * Xen has relocated the metadata and the modules, so | ||||
* we need to recalculate it's position. This is done | * we need to recalculate it's position. This is done | ||||
* by saving the original modulep address and then | * by saving the original modulep address and then | ||||
* calculating the offset from the real modulep | * calculating the offset from the real modulep | ||||
* position. | * position. | ||||
*/ | */ | ||||
metadata = MD_FETCH(kmdp, MODINFOMD_MODULEP, | metadata = MD_FETCH(kmdp, MODINFOMD_MODULEP, | ||||
vm_paddr_t); | vm_paddr_t); | ||||
off = mod[0].paddr + header->modulep_offset - metadata + | off = mod[0].paddr + header->modulep_offset - metadata + | ||||
KERNBASE; | KERNBASE; | ||||
} else { | } else { | ||||
preload_metadata = (caddr_t)(mod[0].paddr + KERNBASE); | preload_metadata = (caddr_t)(mod[0].paddr + KERNBASE); | ||||
kmdp = preload_search_by_type("elf kernel"); | kmdp = preload_search_by_type("elf kernel"); | ||||
if (kmdp == NULL) | if (kmdp == NULL) | ||||
kmdp = preload_search_by_type("elf64 kernel"); | kmdp = preload_search_by_type("elf64 kernel"); | ||||
if (kmdp == NULL) { | if (kmdp == NULL) { | ||||
xc_printf("Unable to find kernel\n"); | CRASH("Unable to find kernel\n"); | ||||
HYPERVISOR_shutdown(SHUTDOWN_crash); | |||||
} | } | ||||
metadata = MD_FETCH(kmdp, MODINFOMD_MODULEP, vm_paddr_t); | metadata = MD_FETCH(kmdp, MODINFOMD_MODULEP, vm_paddr_t); | ||||
off = mod[0].paddr + KERNBASE - metadata; | off = mod[0].paddr + KERNBASE - metadata; | ||||
} | } | ||||
preload_bootstrap_relocate(off); | preload_bootstrap_relocate(off); | ||||
▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines | xen_pvh_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) | ||||
u_int32_t size; | u_int32_t size; | ||||
int rc; | int rc; | ||||
/* Fetch the E820 map from Xen */ | /* Fetch the E820 map from Xen */ | ||||
memmap.nr_entries = MAX_E820_ENTRIES; | memmap.nr_entries = MAX_E820_ENTRIES; | ||||
set_xen_guest_handle(memmap.buffer, xen_smap); | set_xen_guest_handle(memmap.buffer, xen_smap); | ||||
rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); | rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); | ||||
if (rc) { | if (rc) { | ||||
xc_printf("ERROR: unable to fetch Xen E820 memory map: %d\n", | CRASH("ERROR: unable to fetch Xen E820 memory map: %d\n", | ||||
roygerUnsubmitted Not Done Inline ActionsThere no need to replace this one, as the previous call is already a Xen specific hypercall, so if you get here for other hypervisors it's likely you won't be able to reach the CRASH() anyway due to jumping into an uninitialized hypercall page. royger: There no need to replace this one, as the previous call is already a Xen specific hypercall, so… | |||||
cpercivaAuthorUnsubmitted Done Inline ActionsQuite right. Sorry, that was a search-and-replace which went too far. cperciva: Quite right. Sorry, that was a search-and-replace which went too far. | |||||
rc); | rc); | ||||
HYPERVISOR_shutdown(SHUTDOWN_crash); | |||||
} | } | ||||
size = memmap.nr_entries * sizeof(xen_smap[0]); | size = memmap.nr_entries * sizeof(xen_smap[0]); | ||||
bios_add_smap_entries(xen_smap, size, physmap, physmap_idx); | bios_add_smap_entries(xen_smap, size, physmap, physmap_idx); | ||||
} | } | ||||
static void | static void | ||||
Show All 13 Lines |
nit: bool would be better here, or the usage of the function below would have to use 'isxen() != 0' because coding style states: "Do not use ! for tests unless it is a boolean, ...".