Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -122,6 +122,7 @@ #include #include #include +#include #include #include #include @@ -161,8 +162,8 @@ #include #include #include -#include #include +#include #include #include #ifdef SMP @@ -430,6 +431,17 @@ #ifdef KASAN static uint64_t KASANPDPphys; #endif +#ifdef KMSAN +static uint64_t KMSANSHADPDPphys; +static uint64_t KMSANORIGPDPphys; + +/* + * To support systems with large amounts of memory, it is necessary to extend + * the maximum size of the direct map. This could eat into the space reserved + * for the shadow map. + */ +_Static_assert(DMPML4I + NDMPML4E <= KMSANSHADPML4I, "direct map overflow"); +#endif static pml4_entry_t *kernel_pml4; static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ @@ -1679,13 +1691,21 @@ DMPDphys = allocpages(firstaddr, ndmpdp - ndm1g); dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT; - /* Allocate pages */ + /* Allocate pages. */ KPML4phys = allocpages(firstaddr, 1); KPDPphys = allocpages(firstaddr, NKPML4E); #ifdef KASAN KASANPDPphys = allocpages(firstaddr, NKASANPML4E); KASANPDphys = allocpages(firstaddr, 1); #endif +#ifdef KMSAN + /* + * The KMSAN shadow maps are initially left unpopulated, since there is + * no need to shadow memory above KERNBASE. + */ + KMSANSHADPDPphys = allocpages(firstaddr, NKMSANSHADPML4E); + KMSANORIGPDPphys = allocpages(firstaddr, NKMSANORIGPML4E); +#endif /* * Allocate the initial number of kernel page table pages required to @@ -1816,6 +1836,20 @@ } #endif +#ifdef KMSAN + /* Connect the KMSAN shadow map slots up to the PML4. */ + for (i = 0; i < NKMSANSHADPML4E; i++) { + p4_p[KMSANSHADPML4I + i] = KMSANSHADPDPphys + ptoa(i); + p4_p[KMSANSHADPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx; + } + + /* Connect the KMSAN origin map slots up to the PML4. */ + for (i = 0; i < NKMSANORIGPML4E; i++) { + p4_p[KMSANORIGPML4I + i] = KMSANORIGPDPphys + ptoa(i); + p4_p[KMSANORIGPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx; + } +#endif + /* Connect the Direct Map slots up to the PML4. */ for (i = 0; i < ndmpdpphys; i++) { p4_p[DMPML4I + i] = DMPDPphys + ptoa(i); @@ -2486,6 +2520,14 @@ TUNABLE_INT_FETCH("vm.pmap.large_map_pml4_entries", &lm_ents); if (lm_ents > LMEPML4I - LMSPML4I + 1) lm_ents = LMEPML4I - LMSPML4I + 1; +#ifdef KMSAN + if (lm_ents > KMSANORIGPML4I - LMSPML4I) { + printf( + "pmap: shrinking large map for KMSAN (%d slots to %ld slots)\n", + lm_ents, KMSANORIGPML4I - LMSPML4I); + lm_ents = KMSANORIGPML4I - LMSPML4I; + } +#endif if (bootverbose) printf("pmap: large map %u PML4 slots (%lu GB)\n", lm_ents, (u_long)lm_ents * (NBPML4 / 1024 / 1024 / 1024)); @@ -4174,6 +4216,16 @@ pm_pml4[KASANPML4I + i] = (KASANPDPphys + ptoa(i)) | X86_PG_RW | X86_PG_V | pg_nx; } +#endif +#ifdef KMSAN + for (i = 0; i < NKMSANSHADPML4E; i++) { + pm_pml4[KMSANSHADPML4I + i] = (KMSANSHADPDPphys + ptoa(i)) | + X86_PG_RW | X86_PG_V | pg_nx; + } + for (i = 0; i < NKMSANORIGPML4E; i++) { + pm_pml4[KMSANORIGPML4I + i] = (KMSANORIGPDPphys + ptoa(i)) | + X86_PG_RW | X86_PG_V | pg_nx; + } #endif for (i = 0; i < ndmpdpphys; i++) { pm_pml4[DMPML4I + i] = (DMPDPphys + ptoa(i)) | X86_PG_RW | @@ -4760,6 +4812,12 @@ #ifdef KASAN for (i = 0; i < NKASANPML4E; i++) /* KASAN shadow map */ pmap->pm_pmltop[KASANPML4I + i] = 0; +#endif +#ifdef KMSAN + for (i = 0; i < NKMSANSHADPML4E; i++) /* KMSAN shadow map */ + pmap->pm_pmltop[KMSANSHADPML4I + i] = 0; + for (i = 0; i < NKMSANORIGPML4E; i++) /* KMSAN shadow map */ + pmap->pm_pmltop[KMSANORIGPML4I + i] = 0; #endif for (i = 0; i < ndmpdpphys; i++)/* Direct Map */ pmap->pm_pmltop[DMPML4I + i] = 0; @@ -4802,6 +4860,60 @@ 0, 0, kvm_free, "LU", "Amount of KVM free"); +#ifdef KMSAN +static void +pmap_kmsan_shadow_map_page_array(vm_paddr_t pdppa, vm_size_t size) +{ + pdp_entry_t *pdpe; + pd_entry_t *pde; + pt_entry_t *pte; + vm_paddr_t dummypa, dummypd, dummypt; + int i, npde, npdpg; + + npdpg = howmany(size, NBPDP); + npde = size / NBPDR; + + dummypa = vm_phys_early_alloc(-1, PAGE_SIZE); + pagezero((void *)PHYS_TO_DMAP(dummypa)); + + dummypt = vm_phys_early_alloc(-1, PAGE_SIZE); + pagezero((void *)PHYS_TO_DMAP(dummypt)); + dummypd = vm_phys_early_alloc(-1, PAGE_SIZE * npdpg); + for (i = 0; i < npdpg; i++) + pagezero((void *)PHYS_TO_DMAP(dummypd + ptoa(i))); + + pte = (pt_entry_t *)PHYS_TO_DMAP(dummypt); + for (i = 0; i < NPTEPG; i++) + pte[i] = (pt_entry_t)(dummypa | X86_PG_V | X86_PG_RW | + X86_PG_A | X86_PG_M | pg_nx); + + pde = (pd_entry_t *)PHYS_TO_DMAP(dummypd); + for (i = 0; i < npde; i++) + pde[i] = (pd_entry_t)(dummypt | X86_PG_V | X86_PG_RW | pg_nx); + + pdpe = (pdp_entry_t *)PHYS_TO_DMAP(pdppa); + for (i = 0; i < npdpg; i++) + pdpe[i] = (pdp_entry_t)(dummypd + ptoa(i) | X86_PG_V | + X86_PG_RW | pg_nx); +} + +static void +pmap_kmsan_page_array_startup(vm_offset_t start, vm_offset_t end) +{ + vm_size_t size; + + KASSERT(start % NBPDP == 0, ("unaligned page array start address")); + + /* + * The end of the page array's KVA region is 2MB aligned, see + * kmem_init(). + */ + size = round_2mpage(end) - start; + pmap_kmsan_shadow_map_page_array(KMSANSHADPDPphys, size); + pmap_kmsan_shadow_map_page_array(KMSANORIGPDPphys, size); +} +#endif + /* * Allocate physical memory for the vm_page array and map it into KVA, * attempting to back the vm_pages with domain-local memory. @@ -4842,6 +4954,10 @@ pde_store(pde, newpdir); } vm_page_array = (vm_page_t)start; + +#ifdef KMSAN + pmap_kmsan_page_array_startup(start, end); +#endif } /* @@ -4880,6 +4996,8 @@ addr = vm_map_max(kernel_map); if (kernel_vm_end < addr) kasan_shadow_map(kernel_vm_end, addr - kernel_vm_end); + if (kernel_vm_end < addr) + kmsan_shadow_map(kernel_vm_end, addr - kernel_vm_end); while (kernel_vm_end < addr) { pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end); if ((*pdpe & X86_PG_V) == 0) { @@ -11307,7 +11425,7 @@ m = pmap_kasan_enter_alloc_2m(); if (m != NULL) { *pde = (pd_entry_t)(VM_PAGE_TO_PHYS(m) | X86_PG_RW | - X86_PG_PS | X86_PG_V | pg_nx); + X86_PG_PS | X86_PG_V | X86_PG_A | X86_PG_M | pg_nx); } else { m = pmap_kasan_enter_alloc_4k(); *pde = (pd_entry_t)(VM_PAGE_TO_PHYS(m) | X86_PG_RW | @@ -11319,14 +11437,82 @@ pte = pmap_pde_to_pte(pde, va); if ((*pte & X86_PG_V) != 0) return; - KASSERT((*pte & X86_PG_V) == 0, - ("%s: shadow address %#lx is already mapped", __func__, va)); m = pmap_kasan_enter_alloc_4k(); *pte = (pt_entry_t)(VM_PAGE_TO_PHYS(m) | X86_PG_RW | X86_PG_V | X86_PG_M | X86_PG_A | pg_nx); } #endif +#ifdef KMSAN +static vm_page_t +pmap_kmsan_enter_alloc_4k(void) +{ + vm_page_t m; + + m = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED | VM_ALLOC_ZERO); + if (m == NULL) + panic("%s: no memory to grow shadow map", __func__); + if ((m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + return (m); +} + +static vm_page_t +pmap_kmsan_enter_alloc_2m(void) +{ + vm_page_t m; + + m = vm_page_alloc_contig(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED, NPTEPG, 0, ~0ul, NBPDR, 0, VM_MEMATTR_DEFAULT); + if (m != NULL) + memset((void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), 0, NBPDR); + return (m); +} + +/* + * Grow the shadow or origin maps by at least one 4KB page at the specified + * address. Use 2MB pages when possible. + */ +void +pmap_kmsan_enter(vm_offset_t va) +{ + pdp_entry_t *pdpe; + pd_entry_t *pde; + pt_entry_t *pte; + vm_page_t m; + + mtx_assert(&kernel_map->system_mtx, MA_OWNED); + + pdpe = pmap_pdpe(kernel_pmap, va); + if ((*pdpe & X86_PG_V) == 0) { + m = pmap_kmsan_enter_alloc_4k(); + *pdpe = (pdp_entry_t)(VM_PAGE_TO_PHYS(m) | X86_PG_RW | + X86_PG_V | pg_nx); + } + pde = pmap_pdpe_to_pde(pdpe, va); + if ((*pde & X86_PG_V) == 0) { + m = pmap_kmsan_enter_alloc_2m(); + if (m != NULL) { + *pde = (pd_entry_t)(VM_PAGE_TO_PHYS(m) | X86_PG_RW | + X86_PG_PS | X86_PG_V | X86_PG_A | X86_PG_M | pg_nx); + } else { + m = pmap_kmsan_enter_alloc_4k(); + *pde = (pd_entry_t)(VM_PAGE_TO_PHYS(m) | X86_PG_RW | + X86_PG_V | pg_nx); + } + } + if ((*pde & X86_PG_PS) != 0) + return; + pte = pmap_pde_to_pte(pde, va); + if ((*pte & X86_PG_V) != 0) + return; + m = pmap_kmsan_enter_alloc_4k(); + *pte = (pt_entry_t)(VM_PAGE_TO_PHYS(m) | X86_PG_RW | X86_PG_V | + X86_PG_M | X86_PG_A | pg_nx); +} +#endif + /* * Track a range of the kernel's virtual address space that is contiguous * in various mapping attributes. @@ -11508,6 +11694,14 @@ case KASANPML4I: sbuf_printf(sb, "\nKASAN shadow map:\n"); break; +#endif +#ifdef KMSAN + case KMSANSHADPML4I: + sbuf_printf(sb, "\nKMSAN shadow map:\n"); + break; + case KMSANORIGPML4I: + sbuf_printf(sb, "\nKMSAN origin map:\n"); + break; #endif case KPML4BASE: sbuf_printf(sb, "\nKernel map:\n"); Index: sys/amd64/include/param.h =================================================================== --- sys/amd64/include/param.h +++ sys/amd64/include/param.h @@ -134,7 +134,7 @@ #define IOPERM_BITMAP_SIZE (IOPAGES * PAGE_SIZE + 1) #ifndef KSTACK_PAGES -#ifdef KASAN +#if defined(KASAN) || defined(KMSAN) #define KSTACK_PAGES 6 #else #define KSTACK_PAGES 4 /* pages of kstack (with pcb) */ Index: sys/amd64/include/pmap.h =================================================================== --- sys/amd64/include/pmap.h +++ sys/amd64/include/pmap.h @@ -201,6 +201,13 @@ */ #define NKASANPML4E ((NKPML4E + 7) / 8) +/* + * Number of PML4 slots for the KMSAN shadow and origin maps. These are + * one-to-one with the kernel map. + */ +#define NKMSANSHADPML4E NKPML4E +#define NKMSANORIGPML4E NKPML4E + /* * We use the same numbering of the page table pages for 5-level and * 4-level paging structures. @@ -251,6 +258,9 @@ #define KASANPML4I (DMPML4I - NKASANPML4E) /* Below the direct map */ +#define KMSANSHADPML4I (KPML4BASE - NKMSANSHADPML4E) +#define KMSANORIGPML4I (DMPML4I - NKMSANORIGPML4E) + /* Large map: index of the first and max last pml4 entry */ #define LMSPML4I (PML4PML4I + 1) #define LMEPML4I (KASANPML4I - 1) @@ -514,6 +524,9 @@ #ifdef KASAN void pmap_kasan_enter(vm_offset_t); #endif +#ifdef KMSAN +void pmap_kmsan_enter(vm_offset_t); +#endif #endif /* _KERNEL */ Index: sys/amd64/include/vmparam.h =================================================================== --- sys/amd64/include/vmparam.h +++ sys/amd64/include/vmparam.h @@ -75,7 +75,7 @@ * of the direct mapped segment. This uses 2MB pages for reduced * TLB pressure. */ -#ifndef KASAN +#if !defined(KASAN) && !defined(KMSAN) #define UMA_MD_SMALL_ALLOC #endif @@ -168,9 +168,10 @@ * 0xffff804020100fff - 0xffff807fffffffff unused * 0xffff808000000000 - 0xffff847fffffffff large map (can be tuned up) * 0xffff848000000000 - 0xfffff77fffffffff unused (large map extends there) - * 0xfffff78000000000 - 0xfffff7ffffffffff 512GB KASAN shadow map + * 0xfffff60000000000 - 0xfffff7ffffffffff 2TB KMSAN origin map, optional + * 0xfffff78000000000 - 0xfffff7bfffffffff 512GB KASAN shadow map, optional * 0xfffff80000000000 - 0xfffffbffffffffff 4TB direct map - * 0xfffffc0000000000 - 0xfffffdffffffffff unused + * 0xfffffc0000000000 - 0xfffffdffffffffff 2TB KMSAN shadow map, optional * 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map * * Within the kernel map: @@ -189,6 +190,14 @@ #define KASAN_MIN_ADDRESS KV4ADDR(KASANPML4I, 0, 0, 0) #define KASAN_MAX_ADDRESS KV4ADDR(KASANPML4I + NKASANPML4E, 0, 0, 0) +#define KMSAN_SHAD_MIN_ADDRESS KV4ADDR(KMSANSHADPML4I, 0, 0, 0) +#define KMSAN_SHAD_MAX_ADDRESS KV4ADDR(KMSANSHADPML4I + NKMSANSHADPML4E, \ + 0, 0, 0) + +#define KMSAN_ORIG_MIN_ADDRESS KV4ADDR(KMSANORIGPML4I, 0, 0, 0) +#define KMSAN_ORIG_MAX_ADDRESS KV4ADDR(KMSANORIGPML4I + NKMSANORIGPML4E, \ + 0, 0, 0) + #define LARGEMAP_MIN_ADDRESS KV4ADDR(LMSPML4I, 0, 0, 0) #define LARGEMAP_MAX_ADDRESS KV4ADDR(LMEPML4I + 1, 0, 0, 0) Index: sys/kern/kern_malloc.c =================================================================== --- sys/kern/kern_malloc.c +++ sys/kern/kern_malloc.c @@ -1168,13 +1168,15 @@ vm_kmem_size = round_page(vm_kmem_size); -#ifdef KASAN /* - * With KASAN enabled, dynamically allocated kernel memory is shadowed. - * Account for this when setting the UMA limit. + * With KASAN or KMSAN enabled, dynamically allocated kernel memory is + * shadowed. Account for this when setting the UMA limit. */ +#if defined(KASAN) vm_kmem_size = (vm_kmem_size * KASAN_SHADOW_SCALE) / (KASAN_SHADOW_SCALE + 1); +#elif defined(KMSAN) + vm_kmem_size /= 3; #endif #ifdef DEBUG_MEMGUARD Index: sys/kern/vfs_bio.c =================================================================== --- sys/kern/vfs_bio.c +++ sys/kern/vfs_bio.c @@ -1044,13 +1044,22 @@ int tuned_nbuf; long maxbuf, maxbuf_sz, buf_sz, biotmap_sz; -#ifdef KASAN /* - * With KASAN enabled, the kernel map is shadowed. Account for this - * when sizing maps based on the amount of physical memory available. + * With KASAN or KMSAN enabled, the kernel map is shadowed. Account for + * this when sizing maps based on the amount of physical memory + * available. */ +#if defined(KASAN) physmem_est = (physmem_est * KASAN_SHADOW_SCALE) / (KASAN_SHADOW_SCALE + 1); +#elif defined(KMSAN) + physmem_est /= 3; + + /* + * KMSAN cannot reliably determine whether buffer data is initialized + * unless it is updated through a KVA mapping. + */ + unmapped_buf_allowed = 0; #endif /* Index: sys/vm/vm_pager.c =================================================================== --- sys/vm/vm_pager.c +++ sys/vm/vm_pager.c @@ -217,6 +217,15 @@ zone = uma_zsecond_create(name, pbuf_ctor, pbuf_dtor, NULL, NULL, pbuf_zone); + +#ifdef KMSAN + /* + * Shrink the size of the pbuf pools if KMSAN is enabled, otherwise the + * shadows of the large KVA allocations eat up too much memory. + */ + max /= 3; +#endif + /* * uma_prealloc() rounds up to items per slab. If we would prealloc * immediately on every pbuf_zsecond_create(), we may accumulate too