Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -2261,6 +2261,8 @@ } } PMAP_UNLOCK(pmap); + if (pa == VM_PAGE_TO_PHYS(zero_page)) + return (0); return (pa); } @@ -2304,7 +2306,10 @@ &pa)) goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); - vm_page_hold(m); + if (m == zero_page) + m = NULL; + else + vm_page_hold(m); } } } Index: sys/i386/i386/pmap.c =================================================================== --- sys/i386/i386/pmap.c +++ sys/i386/i386/pmap.c @@ -1583,6 +1583,8 @@ } } PMAP_UNLOCK(pmap); + if (rtval == VM_PAGE_TO_PHYS(zero_page)) + return (0); return (rtval); } @@ -1626,7 +1628,10 @@ &pa)) goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); - vm_page_hold(m); + if (m == zero_page) + m = NULL; + else + vm_page_hold(m); } } } Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -270,6 +270,7 @@ int fault_type, int fault_flags, boolean_t wired, vm_page_t *m_hold) { vm_page_t m, m_map; + vm_object_t obj; #if defined(__amd64__) && VM_NRESERVLEVEL > 0 vm_page_t m_super; int flags; @@ -277,11 +278,42 @@ int psind, rv; MPASS(fs->vp == NULL); - m = vm_page_lookup(fs->first_object, fs->first_pindex); + obj = fs->first_object; + m = vm_page_lookup(obj, fs->first_pindex); + if (m == NULL) { + if (obj->type != OBJT_DEFAULT || (obj->flags & + (OBJ_ONEMAPPING | OBJ_NOSPLIT)) != OBJ_ONEMAPPING || + obj->shadow_count != 0 || obj->backing_object != NULL || + fault_type != VM_PROT_READ || wired || m_hold != NULL) + return (KERN_FAILURE); + + /* + * This is a lone nameless default object not + * participating in the shadow chains, and the fault + * is for read. Install a transient zero page mapping + * to avoid allocating real physical memory until some + * user content is written there. + * + * Write faults allocate and install the real page. + * Pmaps must not return zero_page from pmap_extract() + * and pmap_extract_and_hold() for this optimization + * to work. + */ +#ifdef DIAGNOSTIC + vm_page_check_zero(zero_page); +#endif + rv = pmap_enter(fs->map->pmap, vaddr, zero_page, VM_PROT_READ, + fault_type | PMAP_ENTER_NOSLEEP, 0); + if (rv == KERN_SUCCESS) + goto done; + return (rv); + } + /* A busy page can be mapped for read|execute access. */ - if (m == NULL || ((prot & VM_PROT_WRITE) != 0 && - vm_page_busied(m)) || m->valid != VM_PAGE_BITS_ALL) + if (((prot & VM_PROT_WRITE) != 0 && vm_page_busied(m)) || + m->valid != VM_PAGE_BITS_ALL) return (KERN_FAILURE); + m_map = m; psind = 0; #if defined(__amd64__) && VM_NRESERVLEVEL > 0 @@ -301,7 +333,7 @@ * the page that was faulted on). */ flags |= PS_NONE_BUSY; - if ((fs->first_object->flags & OBJ_UNMANAGED) == 0) + if ((obj->flags & OBJ_UNMANAGED) == 0) flags |= PS_ALL_DIRTY; } if (vm_page_ps_test(m_super, flags, m)) { @@ -322,7 +354,8 @@ vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags, false); if (psind == 0 && !wired) vm_fault_prefault(fs, vaddr, PFBAK, PFFOR, true); - VM_OBJECT_RUNLOCK(fs->first_object); +done: + VM_OBJECT_RUNLOCK(obj); vm_map_lookup_done(fs->map, fs->entry); curthread->td_ru.ru_minflt++; return (KERN_SUCCESS); @@ -730,6 +763,7 @@ * page. (Default objects are zero-fill, so there is no real * pager for them.) */ + if (fs.object->type != OBJT_DEFAULT || fs.object == fs.first_object) { if (fs.pindex >= fs.object->size) { Index: sys/vm/vm_kern.c =================================================================== --- sys/vm/vm_kern.c +++ sys/vm/vm_kern.c @@ -610,6 +610,8 @@ vm_map_unlock(map); } +vm_page_t zero_page; + void kmem_init_zero_region(void) { @@ -622,7 +624,7 @@ * zeros, while not using much more physical resources. */ addr = kva_alloc(ZERO_REGION_SIZE); - m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | + zero_page = m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h +++ sys/vm/vm_page.h @@ -424,6 +424,7 @@ extern vm_page_t vm_page_array; /* First resident page in table */ extern long vm_page_array_size; /* number of vm_page_t's */ extern long first_page; /* first physical page number */ +extern vm_page_t zero_page; #define VM_PAGE_TO_PHYS(entry) ((entry)->phys_addr) @@ -532,6 +533,7 @@ vm_page_t vm_page_alloc_freelist_domain(int, int, int); bool vm_page_blacklist_add(vm_paddr_t pa, bool verbose); void vm_page_change_lock(vm_page_t m, struct mtx **mtx); +void vm_page_check_zero(vm_page_t m); vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, vm_page_t *ma, int count); Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -3406,6 +3406,29 @@ vm_page_enqueue(m, PQ_ACTIVE); } +#if defined(DIAGNOSTIC) +#if defined(PHYS_TO_DMAP) +void +vm_page_check_zero(vm_page_t m) +{ + uint64_t *p; + int i; + + if (!PMAP_HAS_DMAP) + return; + p = (uint64_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); + for (i = 0; i < PAGE_SIZE / sizeof(uint64_t); i++, p++) + KASSERT(*p == 0, ("vm_page_free_prep %p PG_ZERO %d %jx", + m, i, (uintmax_t)*p)); +} +#else /* !PHYS_TO_DMAP */ +void +vm_page_check_zero(vm_page_t m) +{ +} +#endif /* PHYS_TO_DMAP */ +#endif /* DIAGNOSTIC */ + /* * vm_page_free_prep: * @@ -3420,15 +3443,9 @@ vm_page_free_prep(vm_page_t m) { -#if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP) - if (PMAP_HAS_DMAP && (m->flags & PG_ZERO) != 0) { - uint64_t *p; - int i; - p = (uint64_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); - for (i = 0; i < PAGE_SIZE / sizeof(uint64_t); i++, p++) - KASSERT(*p == 0, ("vm_page_free_prep %p PG_ZERO %d %jx", - m, i, (uintmax_t)*p)); - } +#ifdef DIAGNOSTIC + if ((m->flags & PG_ZERO) != 0) + vm_page_check_zero(m); #endif if ((m->oflags & VPO_UNMANAGED) == 0) { vm_page_lock_assert(m, MA_OWNED);