diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S --- a/sys/amd64/amd64/locore.S +++ b/sys/amd64/amd64/locore.S @@ -48,6 +48,8 @@ .set dmapbase,DMAP_MIN_ADDRESS .set dmapend,DMAP_MAX_ADDRESS +#define BOOTSTACK_SIZE 4096 + .text /********************************************************************** * @@ -66,14 +68,22 @@ pushq $PSL_KERNEL popfq - /* Find the metadata pointers before we lose them */ + /* Get onto a stack that we can trust - there is no going back now. */ movq %rsp, %rbp + movq $bootstack,%rsp + +#ifdef KASAN + /* Bootstrap a shadow map for the boot stack. */ + movq $bootstack, %rdi + subq $BOOTSTACK_SIZE, %rdi + movq $BOOTSTACK_SIZE, %rsi + call kasan_init_early +#endif + + /* Grab metadata pointers from the loader. */ movl 4(%rbp),%edi /* modulep (arg 1) */ movl 8(%rbp),%esi /* kernend (arg 2) */ - - /* Get onto a stack that we can trust - there is no going back now. */ - movq $bootstack,%rsp - xorl %ebp, %ebp + xorq %rbp, %rbp call hammer_time /* set up cpu for unix operation */ movq %rax,%rsp /* set up kstack for mi_startup() */ @@ -140,5 +150,5 @@ .bss ALIGN_DATA /* just to be sure */ .globl bootstack - .space 0x1000 /* space for bootstack - temporary stack */ + .space BOOTSTACK_SIZE /* space for bootstack - temporary stack */ bootstack: diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1260,16 +1260,43 @@ tssp->tss_ist4 = (long)np; } +/* + * Calculate the kernel load address by inspecting page table created by loader. + * The assumptions: + * - kernel is mapped at KERNBASE, backed by contiguous phys memory + * aligned at 2M, below 4G (the latter is important for AP startup) + * - there is a 2M hole at KERNBASE (KERNSTART = KERNBASE + 2M) + * - kernel is mapped with 2M superpages + * - all participating memory, i.e. kernel, modules, metadata, + * page table is accessible by pre-created 1:1 mapping + * (right now loader creates 1:1 mapping for lower 4G, and all + * memory is from there) + * - there is a usable memory block right after the end of the + * mapped kernel and all modules/metadata, pointed to by + * physfree, for early allocations + */ +vm_paddr_t __nosanitizeaddress __nosanitizememory +amd64_loadaddr(void) +{ + pml4_entry_t *pml4e; + pdp_entry_t *pdpe; + pd_entry_t *pde; + uint64_t cr3; + + cr3 = rcr3(); + pml4e = (pml4_entry_t *)cr3 + pmap_pml4e_index(KERNSTART); + pdpe = (pdp_entry_t *)(*pml4e & PG_FRAME) + pmap_pdpe_index(KERNSTART); + pde = (pd_entry_t *)(*pdpe & PG_FRAME) + pmap_pde_index(KERNSTART); + return (*pde & PG_FRAME); +} + u_int64_t hammer_time(u_int64_t modulep, u_int64_t physfree) { caddr_t kmdp; int gsel_tss, x; struct pcpu *pc; - uint64_t cr3, rsp0; - pml4_entry_t *pml4e; - pdp_entry_t *pdpe; - pd_entry_t *pde; + uint64_t rsp0; char *env; struct user_segment_descriptor *gdt; struct region_descriptor r_gdt; @@ -1278,34 +1305,9 @@ TSRAW(&thread0, TS_ENTER, __func__, NULL); - /* - * Calculate kernphys by inspecting page table created by loader. - * The assumptions: - * - kernel is mapped at KERNBASE, backed by contiguous phys memory - * aligned at 2M, below 4G (the latter is important for AP startup) - * - there is a 2M hole at KERNBASE - * - kernel is mapped with 2M superpages - * - all participating memory, i.e. kernel, modules, metadata, - * page table is accessible by pre-created 1:1 mapping - * (right now loader creates 1:1 mapping for lower 4G, and all - * memory is from there) - * - there is a usable memory block right after the end of the - * mapped kernel and all modules/metadata, pointed to by - * physfree, for early allocations - */ - cr3 = rcr3(); - pml4e = (pml4_entry_t *)(cr3 & ~PAGE_MASK) + pmap_pml4e_index( - (vm_offset_t)hammer_time); - pdpe = (pdp_entry_t *)(*pml4e & ~PAGE_MASK) + pmap_pdpe_index( - (vm_offset_t)hammer_time); - pde = (pd_entry_t *)(*pdpe & ~PAGE_MASK) + pmap_pde_index( - (vm_offset_t)hammer_time); - kernphys = (vm_paddr_t)(*pde & ~PDRMASK) - - (vm_paddr_t)(((vm_offset_t)hammer_time - KERNBASE) & ~PDRMASK); - - /* Fix-up for 2M hole */ + kernphys = amd64_loadaddr(); + physfree += kernphys; - kernphys += NBPDR; kmdp = init_ops.parse_preload_data(modulep); diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -11429,6 +11429,107 @@ } #if defined(KASAN) || defined(KMSAN) + +/* + * Reserve enough memory to: + * 1) allocate PDP pages for the shadow map(s), + * 2) shadow one page of memory, so one PD page, one PT page, and one shadow + * page per shadow map. + */ +#ifdef KASAN +#define SAN_EARLY_PAGES (NKASANPML4E + 3) +#else +#define SAN_EARLY_PAGES (NKMSANSHADPML4E + NKMSANORIGPML4E + 2 * 3) +#endif + +static uint64_t __nosanitizeaddress __nosanitizememory +pmap_san_enter_early_alloc_4k(uint64_t pabase) +{ + static uint8_t data[PAGE_SIZE * SAN_EARLY_PAGES] __aligned(PAGE_SIZE); + static size_t offset = 0; + uint64_t pa; + + if (offset == sizeof(data)) { + panic("%s: ran out of memory for the bootstrap shadow map", + __func__); + } + + pa = pabase + ((vm_offset_t)&data[offset] - KERNSTART); + offset += PAGE_SIZE; + return (pa); +} + +/* + * Map a shadow page, before the kernel has bootstrapped its page tables. This + * is currently only used to shadow the temporary boot stack set up by locore. + */ +static void __nosanitizeaddress __nosanitizememory +pmap_san_enter_early(vm_offset_t va) +{ + static bool first = true; + pml4_entry_t *pml4e; + pdp_entry_t *pdpe; + pd_entry_t *pde; + pt_entry_t *pte; + uint64_t cr3, pa, base; + int i; + + base = amd64_loadaddr(); + cr3 = rcr3(); + + if (first) { + /* + * If this the first call, we need to allocate new PML4Es for + * the bootstrap shadow map(s). We don't know how the PML4 page + * was initialized by the boot loader, so we can't simply test + * whether the shadow map's PML4Es are zero. + */ + first = false; +#ifdef KASAN + for (i = 0; i < NKASANPML4E; i++) { + pa = pmap_san_enter_early_alloc_4k(base); + + pml4e = (pml4_entry_t *)cr3 + + pmap_pml4e_index(KASAN_MIN_ADDRESS + i * NBPML4); + *pml4e = (pml4_entry_t)(pa | X86_PG_RW | X86_PG_V); + } +#else + for (i = 0; i < NKMSANORIGPML4E; i++) { + pa = pmap_san_enter_early_alloc_4k(base); + + pml4e = (pml4_entry_t *)cr3 + + pmap_pml4e_index(KMSAN_ORIG_MIN_ADDRESS + + i * NBPML4); + *pml4e = (pml4_entry_t)(pa | X86_PG_RW | X86_PG_V); + } + for (i = 0; i < NKMSANSHADPML4E; i++) { + pa = pmap_san_enter_early_alloc_4k(base); + + pml4e = (pml4_entry_t *)cr3 + + pmap_pml4e_index(KMSAN_SHAD_MIN_ADDRESS + + i * NBPML4); + *pml4e = (pml4_entry_t)(pa | X86_PG_RW | X86_PG_V); + } +#endif + } + pml4e = (pml4_entry_t *)cr3 + pmap_pml4e_index(va); + pdpe = (pdp_entry_t *)(*pml4e & PG_FRAME) + pmap_pdpe_index(va); + if (*pdpe == 0) { + pa = pmap_san_enter_early_alloc_4k(base); + *pdpe = (pdp_entry_t)(pa | X86_PG_RW | X86_PG_V); + } + pde = (pd_entry_t *)(*pdpe & PG_FRAME) + pmap_pde_index(va); + if (*pde == 0) { + pa = pmap_san_enter_early_alloc_4k(base); + *pde = (pd_entry_t)(pa | X86_PG_RW | X86_PG_V); + } + pte = (pt_entry_t *)(*pde & PG_FRAME) + pmap_pte_index(va); + if (*pte != 0) + panic("%s: PTE for %#lx is already initialized", __func__, va); + pa = pmap_san_enter_early_alloc_4k(base); + *pte = (pt_entry_t)(pa | X86_PG_A | X86_PG_M | X86_PG_RW | X86_PG_V); +} + static vm_page_t pmap_san_enter_alloc_4k(void) { @@ -11452,7 +11553,7 @@ * Grow a shadow map by at least one 4KB page at the specified address. Use 2MB * pages when possible. */ -void +void __nosanitizeaddress __nosanitizememory pmap_san_enter(vm_offset_t va) { pdp_entry_t *pdpe; @@ -11460,6 +11561,14 @@ pt_entry_t *pte; vm_page_t m; + if (kernphys == 0) { + /* + * We're creating a temporary shadow map for the boot stack. + */ + pmap_san_enter_early(va); + return; + } + mtx_assert(&kernel_map->system_mtx, MA_OWNED); pdpe = pmap_pdpe(kernel_pmap, va); diff --git a/sys/amd64/include/asan.h b/sys/amd64/include/asan.h --- a/sys/amd64/include/asan.h +++ b/sys/amd64/include/asan.h @@ -66,6 +66,12 @@ { } +static inline void +kasan_md_init_early(vm_offset_t bootstack, size_t size) +{ + kasan_shadow_map(bootstack, size); +} + #endif /* KASAN */ #endif /* !_MACHINE_ASAN_H_ */ diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -64,6 +64,7 @@ void amd64_conf_fast_syscall(void); void amd64_db_resume_dbreg(void); +vm_paddr_t amd64_loadaddr(void); void amd64_lower_shared_page(struct sysentvec *); void amd64_bsp_pcpu_init1(struct pcpu *pc); void amd64_bsp_pcpu_init2(uint64_t rsp0); diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -533,6 +533,7 @@ vm_page_t pmap_page_alloc_below_4g(bool zeroed); #if defined(KASAN) || defined(KMSAN) +void pmap_san_bootstrap(void); void pmap_san_enter(vm_offset_t); #endif diff --git a/sys/kern/subr_asan.c b/sys/kern/subr_asan.c --- a/sys/kern/subr_asan.c +++ b/sys/kern/subr_asan.c @@ -139,6 +139,12 @@ kasan_enabled = true; } +void +kasan_init_early(vm_offset_t stack, size_t size) +{ + kasan_md_init_early(stack, size); +} + static inline const char * kasan_code_name(uint8_t code) { diff --git a/sys/sys/asan.h b/sys/sys/asan.h --- a/sys/sys/asan.h +++ b/sys/sys/asan.h @@ -56,11 +56,10 @@ #define KASAN_EXEC_ARGS_FREED 0xFF void kasan_init(void); +void kasan_init_early(vm_offset_t, size_t); void kasan_shadow_map(vm_offset_t, size_t); - void kasan_mark(const void *, size_t, size_t, uint8_t); #else /* KASAN */ -#define kasan_early_init(u) #define kasan_init() #define kasan_shadow_map(a, s) #define kasan_mark(p, s, l, c)