diff --git a/sys/amd64/amd64/xen-locore.S b/sys/amd64/amd64/xen-locore.S --- a/sys/amd64/amd64/xen-locore.S +++ b/sys/amd64/amd64/xen-locore.S @@ -61,7 +61,7 @@ ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 0) ELFNOTE(Xen, XEN_ELFNOTE_BSD_SYMTAB, .asciz, "yes") /* For PVHv2 support. */ - ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long, VTOP(xen_start32)) + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long, xen_start32) .text .p2align PAGE_SHIFT, 0x90 /* Hypercall_page needs to be PAGE aligned */ @@ -71,12 +71,13 @@ /* PVH entry point. */ .code32 + .pushsection .text.init32 ENTRY(xen_start32) /* Load flat GDT */ - movl $VTOP(gdtdesc32), %eax + movl $gdt32desc, %eax lgdt (%eax) - jmp $GDT_CODE, $VTOP(reload_cs) + jmp $GDT_CODE, $reload_cs reload_cs: movw $GDT_DATA, %ax @@ -90,38 +91,6 @@ pushl $PSL_KERNEL popfl - /* - * Create the page tables. - * The first 1GB is mapped using 2MB entries. - */ - movl $0, %eax -pgbuild: - cmp $(PAGE_SIZE/ENTRY_SIZE), %eax - jae pgbuild_done - - /* PT4[i] = VTOP(&PT3[0]) | PG_V | PG_RW | PG_U */ - movl $VTOP(PT4), %ecx - movl $VTOP(PT3), %edx - orl $(PG_V | PG_RW | PG_U), %edx - movl %edx, (%ecx,%eax,ENTRY_SIZE) - - /* PT3[i] = VTOP(&PT2[0]) | PG_V | PG_RW | PG_U */ - movl $VTOP(PT3), %ecx - movl $VTOP(PT2), %edx - orl $(PG_V | PG_RW | PG_U), %edx - movl %edx, (%ecx,%eax,ENTRY_SIZE) - - /* PT2[i] = i * 2MiB | PG_V | PG_RW | PG_PS | PG_U */ - movl $VTOP(PT2), %ecx - movl %eax, %edx - shll $PDRSHIFT, %edx - orl $(PG_V | PG_RW | PG_PS | PG_U), %edx - movl %edx, (%ecx,%eax,ENTRY_SIZE) - - inc %eax - jmp pgbuild - -pgbuild_done: /* Turn on EFER.LME */ movl $MSR_EFER, %ecx rdmsr @@ -134,7 +103,7 @@ movl %eax, %cr4 /* Set %cr3 for PT4 */ - movl $VTOP(PT4), %eax + call xen_start_pgtable movl %eax, %cr3 /* Turn on paging (implicitly sets EFER.LMA) */ @@ -143,9 +112,9 @@ movl %eax, %cr0 /* Now we're in compatibility mode. Set %cs for long mode */ - movl $VTOP(gdtdesc), %eax + movl $gdt64desc, %eax lgdt (%eax) - ljmp $GDT_CODE, $VTOP(longmode) + ljmp $GDT_CODE, $longmode .code64 longmode: @@ -155,6 +124,7 @@ pushq %rax ret + .popsection start_kernel: /* * Pass %ebx as the argument to hammer_time_xen, it contains @@ -168,41 +138,3 @@ /* NOTREACHED */ 0: hlt jmp 0b - -/* Space for initial page tables */ - .data - .p2align 12,0x40 -PT4: - .space 0x1000 -PT3: - .space 0x1000 -PT2: - .space 0x1000 - -/* 64bit GDT */ -gdtdesc: - .word gdtend - gdt - 1 - .long VTOP(gdt) # low - .long 0 # high -gdt: - .long 0 # null descriptor - .long 0 - .long 0x00000000 # %cs - .long 0x00209800 - .long 0x00000000 # %ds - .long 0x00008000 -gdtend: - -/* 32bit GDT */ -gdtdesc32: - .word gdt32end - gdt32 - 1 - .long VTOP(gdt32) - .long 0 -gdt32: - .long 0 # null descriptor - .long 0 - .long 0x0000ffff # %cs - .long 0x00cf9a00 - .long 0x0000ffff # %ds, %es, %ss - .long 0x00cf9200 -gdt32end: diff --git a/sys/amd64/amd64/xen-start.c b/sys/amd64/amd64/xen-start.c new file mode 100644 --- /dev/null +++ b/sys/amd64/amd64/xen-start.c @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +#include +#include + +#include + +#include + +/* + * This code is compiled in 32bit mode, so it can be called from the PVH entry + * point as part of the bootstrap sequence to transition to long mode. Care + * should be taken to avoid calling any external functions, as that will not + * work. Also only the .text, .data, .rodata and .bss sections are handled + * specially so that VMA == LMA. + */ + +/* GDT descriptors for the PVH entry point trampoline. */ +const static struct segment_descriptor gdt64[] = { + { }, /* NULL descriptor */ + { + .sd_type = SDT_MEMERA, + .sd_p = 1, + .sd_xx = 0x2 /* Longmode bit. */ + }, /* %cs */ + { .sd_p = 1 }, /* %ds */ +}; +const struct { + uint64_t limit:16; + const struct segment_descriptor *base; + uint32_t pad; +} __packed gdt64desc = { + .limit = sizeof(gdt64) - 1, + .base = gdt64, +}; + +const static struct segment_descriptor gdt32[] = { + { }, /* NULL descriptor */ + { + .sd_lolimit = 0xffff, + .sd_type = SDT_MEMERA, + .sd_p = 1, + .sd_hilimit = 0xf, + .sd_def32 = 1, + .sd_gran = 1, + }, /* %cs */ + { + .sd_lolimit = 0xffff, + .sd_type = SDT_MEMRWA, + .sd_p = 1, + .sd_hilimit = 0xf, + .sd_def32 = 1, + .sd_gran = 1, + }, /* %ds, %es, %ss */ +}; +const struct { + unsigned int limit:16; + const struct segment_descriptor *base; +} __packed gdt32desc = { + .limit = sizeof(gdt32) - 1, + .base = gdt32, +}; + +/* Called from assembly only. */ +uint32_t xen_start_pgtable(void); + +#define PG_V 0x001 +#define PG_RW 0x002 +#define PG_PS 0x080 + +uint32_t xen_start_pgtable(void) +{ + static uint64_t PT4[PAGE_SIZE/sizeof(uint64_t)] __aligned(PAGE_SIZE); + static uint64_t PT3[PAGE_SIZE/sizeof(uint64_t)] __aligned(PAGE_SIZE); + static uint64_t PT2[PAGE_SIZE/sizeof(uint64_t)] __aligned(PAGE_SIZE); + unsigned int i; + + for (i = 0; i < nitems(PT4); i++) { + /* Each slot of the level 4 pages points to the same level 3 page */ + PT4[i] = (uint64_t)&PT3[0]; + PT4[i] |= PG_V | PG_RW; + + /* Each slot of the level 3 pages points to the same level 2 page */ + PT3[i] = (uint64_t)&PT2[0]; + PT3[i] |= PG_V | PG_RW; + + /* The level 2 page slots are mapped with 2MB pages for 1GB. */ + PT2[i] = i * (2 * 1024 * 1024); + PT2[i] |= PG_V | PG_RW | PG_PS; + } + + return (uint32_t)&PT4[0]; +} diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -78,6 +78,8 @@ amd64/amd64/initcpu.c standard amd64/amd64/io.c optional io amd64/amd64/locore.S standard no-obj +amd64/amd64/xen-start.c optional xenhvm \ + compile-with "${NORMAL_C} -m32 -g0 && ${OBJCOPY} -I elf32-i386 -O elf64-x86-64 --rename-section .text=.text.init32 --rename-section .data=.data.init32 --rename-section .rodata=.rodata.init32 --rename-section .bss=.bss.init32 ${.TARGET} ${.TARGET}" amd64/amd64/xen-locore.S optional xenhvm \ compile-with "${NORMAL_S} -g0" \ no-ctfconvert diff --git a/sys/conf/ldscript.amd64 b/sys/conf/ldscript.amd64 --- a/sys/conf/ldscript.amd64 +++ b/sys/conf/ldscript.amd64 @@ -4,14 +4,26 @@ SEARCH_DIR("/usr/lib"); SECTIONS { + /* PVH 32bit protected mode init code. + * + * Force VMA == LMA, as that code runs with paging disabled or with identity + * page tables. + */ + . = kernload + SIZEOF_HEADERS; + /* + * Merge all sections into a single .text section, either we run with paging + * disabled and flat segments, or with identity page tables with all entries + * having AWX permissions. Trying to keep sections separated is useless. + */ + .text.init32 : AT (.) { *(.text.init32 .data.init32 .rodata.init32 .bss.init32) } /* Read-only sections, merged into text segment: */ - . = kernbase + kernload + SIZEOF_HEADERS; + . = . + kernbase; /* * Use the AT keyword in order to set the right LMA that contains * the physical address where the section should be loaded. This is * needed for the Xen loader which honours the LMA. */ - .interp : AT (kernload + SIZEOF_HEADERS) { *(.interp) } + .interp : AT (. - kernbase) { *(.interp) } .hash : { *(.hash) } .gnu.hash : { *(.gnu.hash) } .dynsym : { *(.dynsym) }