Index: head/sys/vm/device_pager.c
===================================================================
--- head/sys/vm/device_pager.c	(revision 309709)
+++ head/sys/vm/device_pager.c	(revision 309710)
@@ -1,437 +1,458 @@
 /*-
  * Copyright (c) 1990 University of Utah.
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)device_pager.c	8.1 (Berkeley) 6/11/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/mutex.h>
 #include <sys/mman.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 #include <vm/uma.h>
 
 static void dev_pager_init(void);
 static vm_object_t dev_pager_alloc(void *, vm_ooffset_t, vm_prot_t,
     vm_ooffset_t, struct ucred *);
 static void dev_pager_dealloc(vm_object_t);
 static int dev_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *);
 static void dev_pager_putpages(vm_object_t, vm_page_t *, int, int, int *);
 static boolean_t dev_pager_haspage(vm_object_t, vm_pindex_t, int *, int *);
 static void dev_pager_free_page(vm_object_t object, vm_page_t m);
+static int dev_pager_populate(vm_object_t object, vm_pindex_t pidx,
+    int fault_type, vm_prot_t, vm_pindex_t *first, vm_pindex_t *last);
 
 /* list of device pager objects */
 static struct pagerlst dev_pager_object_list;
 /* protect list manipulation */
 static struct mtx dev_pager_mtx;
 
 struct pagerops devicepagerops = {
 	.pgo_init =	dev_pager_init,
 	.pgo_alloc =	dev_pager_alloc,
 	.pgo_dealloc =	dev_pager_dealloc,
 	.pgo_getpages =	dev_pager_getpages,
 	.pgo_putpages =	dev_pager_putpages,
 	.pgo_haspage =	dev_pager_haspage,
 };
 
 struct pagerops mgtdevicepagerops = {
 	.pgo_alloc =	dev_pager_alloc,
 	.pgo_dealloc =	dev_pager_dealloc,
 	.pgo_getpages =	dev_pager_getpages,
 	.pgo_putpages =	dev_pager_putpages,
 	.pgo_haspage =	dev_pager_haspage,
+	.pgo_populate =	dev_pager_populate,
 };
 
 static int old_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
     vm_ooffset_t foff, struct ucred *cred, u_short *color);
 static void old_dev_pager_dtor(void *handle);
 static int old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
     int prot, vm_page_t *mres);
 
 static struct cdev_pager_ops old_dev_pager_ops = {
 	.cdev_pg_ctor =	old_dev_pager_ctor,
 	.cdev_pg_dtor =	old_dev_pager_dtor,
 	.cdev_pg_fault = old_dev_pager_fault
 };
 
 static void
 dev_pager_init(void)
 {
 
 	TAILQ_INIT(&dev_pager_object_list);
 	mtx_init(&dev_pager_mtx, "dev_pager list", NULL, MTX_DEF);
 }
 
 vm_object_t
 cdev_pager_lookup(void *handle)
 {
 	vm_object_t object;
 
 	mtx_lock(&dev_pager_mtx);
 	object = vm_pager_object_lookup(&dev_pager_object_list, handle);
 	mtx_unlock(&dev_pager_mtx);
 	return (object);
 }
 
 vm_object_t
 cdev_pager_allocate(void *handle, enum obj_type tp, struct cdev_pager_ops *ops,
     vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred)
 {
 	vm_object_t object, object1;
 	vm_pindex_t pindex;
 	u_short color;
 
 	if (tp != OBJT_DEVICE && tp != OBJT_MGTDEVICE)
 		return (NULL);
+	KASSERT(tp == OBJT_MGTDEVICE || ops->cdev_pg_populate == NULL,
+	    ("populate on unmanaged device pager"));
 
 	/*
 	 * Offset should be page aligned.
 	 */
 	if (foff & PAGE_MASK)
 		return (NULL);
 
 	size = round_page(size);
 	pindex = OFF_TO_IDX(foff + size);
 
 	if (ops->cdev_pg_ctor(handle, size, prot, foff, cred, &color) != 0)
 		return (NULL);
 	mtx_lock(&dev_pager_mtx);
 
 	/*
 	 * Look up pager, creating as necessary.
 	 */
 	object1 = NULL;
 	object = vm_pager_object_lookup(&dev_pager_object_list, handle);
 	if (object == NULL) {
 		/*
 		 * Allocate object and associate it with the pager.  Initialize
 		 * the object's pg_color based upon the physical address of the
 		 * device's memory.
 		 */
 		mtx_unlock(&dev_pager_mtx);
 		object1 = vm_object_allocate(tp, pindex);
 		object1->flags |= OBJ_COLORED;
 		object1->pg_color = color;
 		object1->handle = handle;
 		object1->un_pager.devp.ops = ops;
 		object1->un_pager.devp.dev = handle;
 		TAILQ_INIT(&object1->un_pager.devp.devp_pglist);
 		mtx_lock(&dev_pager_mtx);
 		object = vm_pager_object_lookup(&dev_pager_object_list, handle);
 		if (object != NULL) {
 			/*
 			 * We raced with other thread while allocating object.
 			 */
 			if (pindex > object->size)
 				object->size = pindex;
 			KASSERT(object->type == tp,
 			    ("Inconsistent device pager type %p %d",
 			    object, tp));
 			KASSERT(object->un_pager.devp.ops == ops,
 			    ("Inconsistent devops %p %p", object, ops));
 		} else {
 			object = object1;
 			object1 = NULL;
 			object->handle = handle;
 			TAILQ_INSERT_TAIL(&dev_pager_object_list, object,
 			    pager_object_list);
+			if (ops->cdev_pg_populate != NULL)
+				vm_object_set_flag(object, OBJ_POPULATE);
 		}
 	} else {
 		if (pindex > object->size)
 			object->size = pindex;
 		KASSERT(object->type == tp,
 		    ("Inconsistent device pager type %p %d", object, tp));
 	}
 	mtx_unlock(&dev_pager_mtx);
 	if (object1 != NULL) {
 		object1->handle = object1;
 		mtx_lock(&dev_pager_mtx);
 		TAILQ_INSERT_TAIL(&dev_pager_object_list, object1,
 		    pager_object_list);
 		mtx_unlock(&dev_pager_mtx);
 		vm_object_deallocate(object1);
 	}
 	return (object);
 }
 
 static vm_object_t
 dev_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
     vm_ooffset_t foff, struct ucred *cred)
 {
 
 	return (cdev_pager_allocate(handle, OBJT_DEVICE, &old_dev_pager_ops,
 	    size, prot, foff, cred));
 }
 
 void
 cdev_pager_free_page(vm_object_t object, vm_page_t m)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	if (object->type == OBJT_MGTDEVICE) {
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("unmanaged %p", m));
 		pmap_remove_all(m);
 		vm_page_lock(m);
 		vm_page_remove(m);
 		vm_page_unlock(m);
 	} else if (object->type == OBJT_DEVICE)
 		dev_pager_free_page(object, m);
 }
 
 static void
 dev_pager_free_page(vm_object_t object, vm_page_t m)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT((object->type == OBJT_DEVICE &&
 	    (m->oflags & VPO_UNMANAGED) != 0),
 	    ("Managed device or page obj %p m %p", object, m));
 	TAILQ_REMOVE(&object->un_pager.devp.devp_pglist, m, plinks.q);
 	vm_page_putfake(m);
 }
 
 static void
 dev_pager_dealloc(vm_object_t object)
 {
 	vm_page_t m;
 
 	VM_OBJECT_WUNLOCK(object);
 	object->un_pager.devp.ops->cdev_pg_dtor(object->un_pager.devp.dev);
 
 	mtx_lock(&dev_pager_mtx);
 	TAILQ_REMOVE(&dev_pager_object_list, object, pager_object_list);
 	mtx_unlock(&dev_pager_mtx);
 	VM_OBJECT_WLOCK(object);
 
 	if (object->type == OBJT_DEVICE) {
 		/*
 		 * Free up our fake pages.
 		 */
 		while ((m = TAILQ_FIRST(&object->un_pager.devp.devp_pglist))
 		    != NULL)
 			dev_pager_free_page(object, m);
 	}
 	object->handle = NULL;
 	object->type = OBJT_DEAD;
 }
 
 static int
 dev_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int *rbehind,
     int *rahead)
 {
 	int error;
 
 	/* Since our haspage reports zero after/before, the count is 1. */
 	KASSERT(count == 1, ("%s: count %d", __func__, count));
 	VM_OBJECT_ASSERT_WLOCKED(object);
+	if (object->un_pager.devp.ops->cdev_pg_fault == NULL)
+		return (VM_PAGER_FAIL);
 	error = object->un_pager.devp.ops->cdev_pg_fault(object,
 	    IDX_TO_OFF(ma[0]->pindex), PROT_READ, &ma[0]);
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	if (error == VM_PAGER_OK) {
 		KASSERT((object->type == OBJT_DEVICE &&
 		     (ma[0]->oflags & VPO_UNMANAGED) != 0) ||
 		    (object->type == OBJT_MGTDEVICE &&
 		     (ma[0]->oflags & VPO_UNMANAGED) == 0),
 		    ("Wrong page type %p %p", ma[0], object));
 		if (object->type == OBJT_DEVICE) {
 			TAILQ_INSERT_TAIL(&object->un_pager.devp.devp_pglist,
 			    ma[0], plinks.q);
 		}
 		if (rbehind)
 			*rbehind = 0;
 		if (rahead)
 			*rahead = 0;
 	}
 
 	return (error);
+}
+
+static int
+dev_pager_populate(vm_object_t object, vm_pindex_t pidx, int fault_type,
+    vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last)
+{
+
+	VM_OBJECT_ASSERT_WLOCKED(object);
+	if (object->un_pager.devp.ops->cdev_pg_populate == NULL)
+		return (VM_PAGER_FAIL);
+	return (object->un_pager.devp.ops->cdev_pg_populate(object, pidx,
+	    fault_type, max_prot, first, last));
 }
 
 static int
 old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot,
     vm_page_t *mres)
 {
 	vm_paddr_t paddr;
 	vm_page_t m_paddr, page;
 	struct cdev *dev;
 	struct cdevsw *csw;
 	struct file *fpop;
 	struct thread *td;
 	vm_memattr_t memattr, memattr1;
 	int ref, ret;
 
 	memattr = object->memattr;
 
 	VM_OBJECT_WUNLOCK(object);
 
 	dev = object->handle;
 	csw = dev_refthread(dev, &ref);
 	if (csw == NULL) {
 		VM_OBJECT_WLOCK(object);
 		return (VM_PAGER_FAIL);
 	}
 	td = curthread;
 	fpop = td->td_fpop;
 	td->td_fpop = NULL;
 	ret = csw->d_mmap(dev, offset, &paddr, prot, &memattr);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 	if (ret != 0) {
 		printf(
 	    "WARNING: dev_pager_getpage: map function returns error %d", ret);
 		VM_OBJECT_WLOCK(object);
 		return (VM_PAGER_FAIL);
 	}
 
 	/* If "paddr" is a real page, perform a sanity check on "memattr". */
 	if ((m_paddr = vm_phys_paddr_to_vm_page(paddr)) != NULL &&
 	    (memattr1 = pmap_page_get_memattr(m_paddr)) != memattr) {
 		/*
 		 * For the /dev/mem d_mmap routine to return the
 		 * correct memattr, pmap_page_get_memattr() needs to
 		 * be called, which we do there.
 		 */
 		if ((csw->d_flags & D_MEM) == 0) {
 			printf("WARNING: Device driver %s has set "
 			    "\"memattr\" inconsistently (drv %u pmap %u).\n",
 			    csw->d_name, memattr, memattr1);
 		}
 		memattr = memattr1;
 	}
 	if (((*mres)->flags & PG_FICTITIOUS) != 0) {
 		/*
 		 * If the passed in result page is a fake page, update it with
 		 * the new physical address.
 		 */
 		page = *mres;
 		VM_OBJECT_WLOCK(object);
 		vm_page_updatefake(page, paddr, memattr);
 	} else {
 		/*
 		 * Replace the passed in reqpage page with our own fake page and
 		 * free up the all of the original pages.
 		 */
 		page = vm_page_getfake(paddr, memattr);
 		VM_OBJECT_WLOCK(object);
 		vm_page_replace_checked(page, object, (*mres)->pindex, *mres);
 		vm_page_lock(*mres);
 		vm_page_free(*mres);
 		vm_page_unlock(*mres);
 		*mres = page;
 	}
 	page->valid = VM_PAGE_BITS_ALL;
 	return (VM_PAGER_OK);
 }
 
 static void
 dev_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags,
     int *rtvals)
 {
 
 	panic("dev_pager_putpage called");
 }
 
 static boolean_t
 dev_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before,
     int *after)
 {
 
 	if (before != NULL)
 		*before = 0;
 	if (after != NULL)
 		*after = 0;
 	return (TRUE);
 }
 
 static int
 old_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
     vm_ooffset_t foff, struct ucred *cred, u_short *color)
 {
 	struct cdev *dev;
 	struct cdevsw *csw;
 	vm_memattr_t dummy;
 	vm_ooffset_t off;
 	vm_paddr_t paddr;
 	unsigned int npages;
 	int ref;
 
 	/*
 	 * Make sure this device can be mapped.
 	 */
 	dev = handle;
 	csw = dev_refthread(dev, &ref);
 	if (csw == NULL)
 		return (ENXIO);
 
 	/*
 	 * Check that the specified range of the device allows the desired
 	 * protection.
 	 *
 	 * XXX assumes VM_PROT_* == PROT_*
 	 */
 	npages = OFF_TO_IDX(size);
 	paddr = 0; /* Make paddr initialized for the case of size == 0. */
 	for (off = foff; npages--; off += PAGE_SIZE) {
 		if (csw->d_mmap(dev, off, &paddr, (int)prot, &dummy) != 0) {
 			dev_relthread(dev, ref);
 			return (EINVAL);
 		}
 	}
 
 	dev_ref(dev);
 	dev_relthread(dev, ref);
 	*color = atop(paddr) - OFF_TO_IDX(off - PAGE_SIZE);
 	return (0);
 }
 
 static void
 old_dev_pager_dtor(void *handle)
 {
 
 	dev_rel(handle);
 }
Index: head/sys/vm/vm_fault.c
===================================================================
--- head/sys/vm/vm_fault.c	(revision 309709)
+++ head/sys/vm/vm_fault.c	(revision 309710)
@@ -1,1542 +1,1679 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  *
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_fault.c	8.4 (Berkeley) 1/12/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 /*
  *	Page fault handling module.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ktrace.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mman.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_reserv.h>
 
 #define PFBAK 4
 #define PFFOR 4
 
 #define	VM_FAULT_READ_DEFAULT	(1 + VM_FAULT_READ_AHEAD_INIT)
 #define	VM_FAULT_READ_MAX	(1 + VM_FAULT_READ_AHEAD_MAX)
 
 #define	VM_FAULT_DONTNEED_MIN	1048576
 
 struct faultstate {
 	vm_page_t m;
 	vm_object_t object;
 	vm_pindex_t pindex;
 	vm_page_t first_m;
 	vm_object_t	first_object;
 	vm_pindex_t first_pindex;
 	vm_map_t map;
 	vm_map_entry_t entry;
 	int map_generation;
 	bool lookup_still_valid;
 	struct vnode *vp;
 };
 
 static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr,
 	    int ahead);
 static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra,
 	    int backward, int forward);
 
 static inline void
 release_page(struct faultstate *fs)
 {
 
 	vm_page_xunbusy(fs->m);
 	vm_page_lock(fs->m);
 	vm_page_deactivate(fs->m);
 	vm_page_unlock(fs->m);
 	fs->m = NULL;
 }
 
 static inline void
 unlock_map(struct faultstate *fs)
 {
 
 	if (fs->lookup_still_valid) {
 		vm_map_lookup_done(fs->map, fs->entry);
 		fs->lookup_still_valid = false;
 	}
 }
 
 static void
 unlock_vp(struct faultstate *fs)
 {
 
 	if (fs->vp != NULL) {
 		vput(fs->vp);
 		fs->vp = NULL;
 	}
 }
 
 static void
 unlock_and_deallocate(struct faultstate *fs)
 {
 
 	vm_object_pip_wakeup(fs->object);
 	VM_OBJECT_WUNLOCK(fs->object);
 	if (fs->object != fs->first_object) {
 		VM_OBJECT_WLOCK(fs->first_object);
 		vm_page_lock(fs->first_m);
 		vm_page_free(fs->first_m);
 		vm_page_unlock(fs->first_m);
 		vm_object_pip_wakeup(fs->first_object);
 		VM_OBJECT_WUNLOCK(fs->first_object);
 		fs->first_m = NULL;
 	}
 	vm_object_deallocate(fs->first_object);
 	unlock_map(fs);
 	unlock_vp(fs);
 }
 
 static void
 vm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_prot_t prot,
     vm_prot_t fault_type, int fault_flags, bool set_wd)
 {
 	bool need_dirty;
 
 	if (((prot & VM_PROT_WRITE) == 0 &&
 	    (fault_flags & VM_FAULT_DIRTY) == 0) ||
 	    (m->oflags & VPO_UNMANAGED) != 0)
 		return;
 
 	VM_OBJECT_ASSERT_LOCKED(m->object);
 
 	need_dirty = ((fault_type & VM_PROT_WRITE) != 0 &&
 	    (fault_flags & VM_FAULT_WIRE) == 0) ||
 	    (fault_flags & VM_FAULT_DIRTY) != 0;
 
 	if (set_wd)
 		vm_object_set_writeable_dirty(m->object);
 	else
 		/*
 		 * If two callers of vm_fault_dirty() with set_wd ==
 		 * FALSE, one for the map entry with MAP_ENTRY_NOSYNC
 		 * flag set, other with flag clear, race, it is
 		 * possible for the no-NOSYNC thread to see m->dirty
 		 * != 0 and not clear VPO_NOSYNC.  Take vm_page lock
 		 * around manipulation of VPO_NOSYNC and
 		 * vm_page_dirty() call, to avoid the race and keep
 		 * m->oflags consistent.
 		 */
 		vm_page_lock(m);
 
 	/*
 	 * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC
 	 * if the page is already dirty to prevent data written with
 	 * the expectation of being synced from not being synced.
 	 * Likewise if this entry does not request NOSYNC then make
 	 * sure the page isn't marked NOSYNC.  Applications sharing
 	 * data should use the same flags to avoid ping ponging.
 	 */
 	if ((entry->eflags & MAP_ENTRY_NOSYNC) != 0) {
 		if (m->dirty == 0) {
 			m->oflags |= VPO_NOSYNC;
 		}
 	} else {
 		m->oflags &= ~VPO_NOSYNC;
 	}
 
 	/*
 	 * If the fault is a write, we know that this page is being
 	 * written NOW so dirty it explicitly to save on
 	 * pmap_is_modified() calls later.
 	 *
 	 * Also tell the backing pager, if any, that it should remove
 	 * any swap backing since the page is now dirty.
 	 */
 	if (need_dirty)
 		vm_page_dirty(m);
 	if (!set_wd)
 		vm_page_unlock(m);
 	if (need_dirty)
 		vm_pager_page_unswapped(m);
 }
 
 static void
 vm_fault_fill_hold(vm_page_t *m_hold, vm_page_t m)
 {
 
 	if (m_hold != NULL) {
 		*m_hold = m;
 		vm_page_lock(m);
 		vm_page_hold(m);
 		vm_page_unlock(m);
 	}
 }
 
 /*
  * Unlocks fs.first_object and fs.map on success.
  */
 static int
 vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot,
     int fault_type, int fault_flags, boolean_t wired, vm_page_t *m_hold)
 {
 	vm_page_t m;
 	int rv;
 
 	MPASS(fs->vp == NULL);
 	m = vm_page_lookup(fs->first_object, fs->first_pindex);
 	/* A busy page can be mapped for read|execute access. */
 	if (m == NULL || ((prot & VM_PROT_WRITE) != 0 &&
 	    vm_page_busied(m)) || m->valid != VM_PAGE_BITS_ALL)
 		return (KERN_FAILURE);
 	rv = pmap_enter(fs->map->pmap, vaddr, m, prot, fault_type |
 	    PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 0), 0);
 	if (rv != KERN_SUCCESS)
 		return (rv);
 	vm_fault_fill_hold(m_hold, m);
 	vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags, false);
 	VM_OBJECT_RUNLOCK(fs->first_object);
 	if (!wired)
 		vm_fault_prefault(fs, vaddr, PFBAK, PFFOR);
 	vm_map_lookup_done(fs->map, fs->entry);
 	curthread->td_ru.ru_minflt++;
 	return (KERN_SUCCESS);
 }
 
+static void
+vm_fault_restore_map_lock(struct faultstate *fs)
+{
+
+	VM_OBJECT_ASSERT_WLOCKED(fs->first_object);
+	MPASS(fs->first_object->paging_in_progress > 0);
+
+	if (!vm_map_trylock_read(fs->map)) {
+		VM_OBJECT_WUNLOCK(fs->first_object);
+		vm_map_lock_read(fs->map);
+		VM_OBJECT_WLOCK(fs->first_object);
+	}
+	fs->lookup_still_valid = true;
+}
+
+
+static int
+vm_fault_populate(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot,
+    int fault_type, int fault_flags, boolean_t wired, vm_page_t *m_hold)
+{
+	vm_page_t m;
+	vm_pindex_t f_first, f_last, pidx;
+	int rv;
+
+	MPASS(fs->object == fs->first_object);
+	VM_OBJECT_ASSERT_WLOCKED(fs->first_object);
+	MPASS(fs->first_object->paging_in_progress > 0);
+	MPASS(fs->first_object->backing_object == NULL);
+	MPASS(fs->lookup_still_valid);
+
+	f_first = OFF_TO_IDX(fs->entry->offset);
+	f_last = OFF_TO_IDX(fs->entry->offset + fs->entry->end -
+	    fs->entry->start) - 1;
+	unlock_map(fs);
+	unlock_vp(fs);
+
+	/*
+	 * Call the pager (driver) populate() method.
+	 *
+	 * There is no guarantee that the method will be called again
+	 * if the current fault is for read, and a future fault is
+	 * for write.  Report the entry's maximum allowed protection
+	 * to the driver.
+	 */
+	rv = vm_pager_populate(fs->first_object, fs->first_pindex,
+	    fault_type, fs->entry->max_protection, &f_first, &f_last);
+
+	VM_OBJECT_ASSERT_WLOCKED(fs->first_object);
+	if (rv == VM_PAGER_BAD) {
+		/*
+		 * VM_PAGER_BAD is the backdoor for a pager to request
+		 * normal fault handling.
+		 */
+		vm_fault_restore_map_lock(fs);
+		if (fs->map->timestamp != fs->map_generation)
+			return (KERN_RESOURCE_SHORTAGE); /* RetryFault */
+		return (KERN_NOT_RECEIVER);
+	}
+	if (rv != VM_PAGER_OK)
+		return (KERN_FAILURE); /* AKA SIGSEGV */
+
+	/* Ensure that the driver is obeying the interface. */
+	MPASS(f_first <= f_last);
+	MPASS(fs->first_pindex <= f_last);
+	MPASS(fs->first_pindex >= f_first);
+	MPASS(f_last < fs->first_object->size);
+
+	vm_fault_restore_map_lock(fs);
+	if (fs->map->timestamp != fs->map_generation)
+		return (KERN_RESOURCE_SHORTAGE); /* RetryFault */
+
+	/* Clip pager response to fit into the vm_map_entry. */
+	f_first = MAX(OFF_TO_IDX(fs->entry->offset), f_first);
+	f_last = MIN(OFF_TO_IDX(fs->entry->end - fs->entry->start +
+	    fs->entry->offset), f_last);
+
+	pidx = f_first;
+	for (m = vm_page_lookup(fs->first_object, pidx); pidx <= f_last;
+	    pidx++, m = vm_page_next(m)) {
+		/*
+		 * Check each page to ensure that the driver is
+		 * obeying the interface: the page must be installed
+		 * in the object, fully valid, and exclusively busied.
+		 */
+		MPASS(m != NULL);
+		MPASS(vm_page_xbusied(m));
+		MPASS(m->valid == VM_PAGE_BITS_ALL);
+		MPASS(m->object == fs->first_object);
+		MPASS(m->pindex == pidx);
+
+		vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags,
+		    true);
+		VM_OBJECT_WUNLOCK(fs->first_object);
+		pmap_enter(fs->map->pmap, fs->entry->start + IDX_TO_OFF(pidx) -
+		    fs->entry->offset, m, prot, fault_type | (wired ?
+		    PMAP_ENTER_WIRED : 0), 0);
+		VM_OBJECT_WLOCK(fs->first_object);
+		if (pidx == fs->first_pindex)
+			vm_fault_fill_hold(m_hold, m);
+		vm_page_lock(m);
+		if ((fault_flags & VM_FAULT_WIRE) != 0) {
+			KASSERT(wired, ("VM_FAULT_WIRE && !wired"));
+			vm_page_wire(m);
+		} else {
+			vm_page_activate(m);
+		}
+		vm_page_unlock(m);
+		vm_page_xunbusy(m);
+	}
+	curthread->td_ru.ru_majflt++;
+	return (KERN_SUCCESS);
+}
+
 /*
  *	vm_fault:
  *
  *	Handle a page fault occurring at the given address,
  *	requiring the given permissions, in the map specified.
  *	If successful, the page is inserted into the
  *	associated physical map.
  *
  *	NOTE: the given address should be truncated to the
  *	proper page address.
  *
  *	KERN_SUCCESS is returned if the page fault is handled; otherwise,
  *	a standard error specifying why the fault is fatal is returned.
  *
  *	The map in question must be referenced, and remains so.
  *	Caller may hold no locks.
  */
 int
 vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
     int fault_flags)
 {
 	struct thread *td;
 	int result;
 
 	td = curthread;
 	if ((td->td_pflags & TDP_NOFAULTING) != 0)
 		return (KERN_PROTECTION_FAILURE);
 #ifdef KTRACE
 	if (map != kernel_map && KTRPOINT(td, KTR_FAULT))
 		ktrfault(vaddr, fault_type);
 #endif
 	result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags,
 	    NULL);
 #ifdef KTRACE
 	if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND))
 		ktrfaultend(result);
 #endif
 	return (result);
 }
 
 int
 vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
     int fault_flags, vm_page_t *m_hold)
 {
 	struct faultstate fs;
 	struct vnode *vp;
 	vm_object_t next_object, retry_object;
 	vm_offset_t e_end, e_start;
 	vm_pindex_t retry_pindex;
 	vm_prot_t prot, retry_prot;
 	int ahead, alloc_req, behind, cluster_offset, error, era, faultcount;
 	int locked, nera, result, rv;
 	u_char behavior;
 	boolean_t wired;	/* Passed by reference. */
 	bool dead, growstack, hardfault, is_first_object_locked;
 
 	PCPU_INC(cnt.v_vm_faults);
 	fs.vp = NULL;
 	faultcount = 0;
 	nera = -1;
 	growstack = true;
 	hardfault = false;
 
 RetryFault:;
 
 	/*
 	 * Find the backing store object and offset into it to begin the
 	 * search.
 	 */
 	fs.map = map;
 	result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry,
 	    &fs.first_object, &fs.first_pindex, &prot, &wired);
 	if (result != KERN_SUCCESS) {
 		if (growstack && result == KERN_INVALID_ADDRESS &&
 		    map != kernel_map) {
 			result = vm_map_growstack(curproc, vaddr);
 			if (result != KERN_SUCCESS)
 				return (KERN_FAILURE);
 			growstack = false;
 			goto RetryFault;
 		}
 		unlock_vp(&fs);
 		return (result);
 	}
 
 	fs.map_generation = fs.map->timestamp;
 
 	if (fs.entry->eflags & MAP_ENTRY_NOFAULT) {
 		panic("vm_fault: fault on nofault entry, addr: %lx",
 		    (u_long)vaddr);
 	}
 
 	if (fs.entry->eflags & MAP_ENTRY_IN_TRANSITION &&
 	    fs.entry->wiring_thread != curthread) {
 		vm_map_unlock_read(fs.map);
 		vm_map_lock(fs.map);
 		if (vm_map_lookup_entry(fs.map, vaddr, &fs.entry) &&
 		    (fs.entry->eflags & MAP_ENTRY_IN_TRANSITION)) {
 			unlock_vp(&fs);
 			fs.entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 			vm_map_unlock_and_wait(fs.map, 0);
 		} else
 			vm_map_unlock(fs.map);
 		goto RetryFault;
 	}
 
 	if (wired)
 		fault_type = prot | (fault_type & VM_PROT_COPY);
 	else
 		KASSERT((fault_flags & VM_FAULT_WIRE) == 0,
 		    ("!wired && VM_FAULT_WIRE"));
 
 	/*
 	 * Try to avoid lock contention on the top-level object through
 	 * special-case handling of some types of page faults, specifically,
 	 * those that are both (1) mapping an existing page from the top-
 	 * level object and (2) not having to mark that object as containing
 	 * dirty pages.  Under these conditions, a read lock on the top-level
 	 * object suffices, allowing multiple page faults of a similar type to
 	 * run in parallel on the same top-level object.
 	 */
 	if (fs.vp == NULL /* avoid locked vnode leak */ &&
 	    (fault_flags & (VM_FAULT_WIRE | VM_FAULT_DIRTY)) == 0 &&
 	    /* avoid calling vm_object_set_writeable_dirty() */
 	    ((prot & VM_PROT_WRITE) == 0 ||
 	    (fs.first_object->type != OBJT_VNODE &&
 	    (fs.first_object->flags & OBJ_TMPFS_NODE) == 0) ||
 	    (fs.first_object->flags & OBJ_MIGHTBEDIRTY) != 0)) {
 		VM_OBJECT_RLOCK(fs.first_object);
 		if ((prot & VM_PROT_WRITE) == 0 ||
 		    (fs.first_object->type != OBJT_VNODE &&
 		    (fs.first_object->flags & OBJ_TMPFS_NODE) == 0) ||
 		    (fs.first_object->flags & OBJ_MIGHTBEDIRTY) != 0) {
 			rv = vm_fault_soft_fast(&fs, vaddr, prot, fault_type,
 			    fault_flags, wired, m_hold);
 			if (rv == KERN_SUCCESS)
 				return (rv);
 		}
 		if (!VM_OBJECT_TRYUPGRADE(fs.first_object)) {
 			VM_OBJECT_RUNLOCK(fs.first_object);
 			VM_OBJECT_WLOCK(fs.first_object);
 		}
 	} else {
 		VM_OBJECT_WLOCK(fs.first_object);
 	}
 
 	/*
 	 * Make a reference to this object to prevent its disposal while we
 	 * are messing with it.  Once we have the reference, the map is free
 	 * to be diddled.  Since objects reference their shadows (and copies),
 	 * they will stay around as well.
 	 *
 	 * Bump the paging-in-progress count to prevent size changes (e.g. 
 	 * truncation operations) during I/O.
 	 */
 	vm_object_reference_locked(fs.first_object);
 	vm_object_pip_add(fs.first_object, 1);
 
 	fs.lookup_still_valid = true;
 
 	fs.first_m = NULL;
 
 	/*
 	 * Search for the page at object/offset.
 	 */
 	fs.object = fs.first_object;
 	fs.pindex = fs.first_pindex;
 	while (TRUE) {
 		/*
 		 * If the object is marked for imminent termination,
 		 * we retry here, since the collapse pass has raced
 		 * with us.  Otherwise, if we see terminally dead
 		 * object, return fail.
 		 */
 		if ((fs.object->flags & OBJ_DEAD) != 0) {
 			dead = fs.object->type == OBJT_DEAD;
 			unlock_and_deallocate(&fs);
 			if (dead)
 				return (KERN_PROTECTION_FAILURE);
 			pause("vmf_de", 1);
 			goto RetryFault;
 		}
 
 		/*
 		 * See if page is resident
 		 */
 		fs.m = vm_page_lookup(fs.object, fs.pindex);
 		if (fs.m != NULL) {
 			/*
 			 * Wait/Retry if the page is busy.  We have to do this
 			 * if the page is either exclusive or shared busy
 			 * because the vm_pager may be using read busy for
 			 * pageouts (and even pageins if it is the vnode
 			 * pager), and we could end up trying to pagein and
 			 * pageout the same page simultaneously.
 			 *
 			 * We can theoretically allow the busy case on a read
 			 * fault if the page is marked valid, but since such
 			 * pages are typically already pmap'd, putting that
 			 * special case in might be more effort then it is 
 			 * worth.  We cannot under any circumstances mess
 			 * around with a shared busied page except, perhaps,
 			 * to pmap it.
 			 */
 			if (vm_page_busied(fs.m)) {
 				/*
 				 * Reference the page before unlocking and
 				 * sleeping so that the page daemon is less
 				 * likely to reclaim it. 
 				 */
 				vm_page_aflag_set(fs.m, PGA_REFERENCED);
 				if (fs.object != fs.first_object) {
 					if (!VM_OBJECT_TRYWLOCK(
 					    fs.first_object)) {
 						VM_OBJECT_WUNLOCK(fs.object);
 						VM_OBJECT_WLOCK(fs.first_object);
 						VM_OBJECT_WLOCK(fs.object);
 					}
 					vm_page_lock(fs.first_m);
 					vm_page_free(fs.first_m);
 					vm_page_unlock(fs.first_m);
 					vm_object_pip_wakeup(fs.first_object);
 					VM_OBJECT_WUNLOCK(fs.first_object);
 					fs.first_m = NULL;
 				}
 				unlock_map(&fs);
 				if (fs.m == vm_page_lookup(fs.object,
 				    fs.pindex)) {
 					vm_page_sleep_if_busy(fs.m, "vmpfw");
 				}
 				vm_object_pip_wakeup(fs.object);
 				VM_OBJECT_WUNLOCK(fs.object);
 				PCPU_INC(cnt.v_intrans);
 				vm_object_deallocate(fs.first_object);
 				goto RetryFault;
 			}
 			vm_page_lock(fs.m);
 			vm_page_remque(fs.m);
 			vm_page_unlock(fs.m);
 
 			/*
 			 * Mark page busy for other processes, and the 
 			 * pagedaemon.  If it still isn't completely valid
 			 * (readable), jump to readrest, else break-out ( we
 			 * found the page ).
 			 */
 			vm_page_xbusy(fs.m);
 			if (fs.m->valid != VM_PAGE_BITS_ALL)
 				goto readrest;
 			break;
 		}
 		KASSERT(fs.m == NULL, ("fs.m should be NULL, not %p", fs.m));
 
 		/*
 		 * Page is not resident.  If the pager might contain the page
 		 * or this is the beginning of the search, allocate a new
 		 * page.  (Default objects are zero-fill, so there is no real
 		 * pager for them.)
 		 */
 		if (fs.object->type != OBJT_DEFAULT ||
 		    fs.object == fs.first_object) {
 			if (fs.pindex >= fs.object->size) {
 				unlock_and_deallocate(&fs);
 				return (KERN_PROTECTION_FAILURE);
+			}
+
+			if (fs.object == fs.first_object &&
+			    (fs.first_object->flags & OBJ_POPULATE) != 0 &&
+			    fs.first_object->shadow_count == 0) {
+				rv = vm_fault_populate(&fs, vaddr, prot,
+				    fault_type, fault_flags, wired, m_hold);
+				switch (rv) {
+				case KERN_SUCCESS:
+				case KERN_FAILURE:
+					unlock_and_deallocate(&fs);
+					return (rv);
+				case KERN_RESOURCE_SHORTAGE:
+					unlock_and_deallocate(&fs);
+					goto RetryFault;
+				case KERN_NOT_RECEIVER:
+					/*
+					 * Pager's populate() method
+					 * returned VM_PAGER_BAD.
+					 */
+					break;
+				default:
+					panic("inconsistent return codes");
+				}
 			}
 
 			/*
 			 * Allocate a new page for this object/offset pair.
 			 *
 			 * Unlocked read of the p_flag is harmless. At
 			 * worst, the P_KILLED might be not observed
 			 * there, and allocation can fail, causing
 			 * restart and new reading of the p_flag.
 			 */
 			if (!vm_page_count_severe() || P_KILLED(curproc)) {
 #if VM_NRESERVLEVEL > 0
 				vm_object_color(fs.object, atop(vaddr) -
 				    fs.pindex);
 #endif
 				alloc_req = P_KILLED(curproc) ?
 				    VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL;
 				if (fs.object->type != OBJT_VNODE &&
 				    fs.object->backing_object == NULL)
 					alloc_req |= VM_ALLOC_ZERO;
 				fs.m = vm_page_alloc(fs.object, fs.pindex,
 				    alloc_req);
 			}
 			if (fs.m == NULL) {
 				unlock_and_deallocate(&fs);
 				VM_WAITPFAULT;
 				goto RetryFault;
 			}
 		}
 
 readrest:
 		/*
 		 * At this point, we have either allocated a new page or found
 		 * an existing page that is only partially valid.
 		 *
 		 * We hold a reference on the current object and the page is
 		 * exclusive busied.
 		 */
 
 		/*
 		 * If the pager for the current object might have the page,
 		 * then determine the number of additional pages to read and
 		 * potentially reprioritize previously read pages for earlier
 		 * reclamation.  These operations should only be performed
 		 * once per page fault.  Even if the current pager doesn't
 		 * have the page, the number of additional pages to read will
 		 * apply to subsequent objects in the shadow chain.
 		 */
 		if (fs.object->type != OBJT_DEFAULT && nera == -1 &&
 		    !P_KILLED(curproc)) {
 			KASSERT(fs.lookup_still_valid, ("map unlocked"));
 			era = fs.entry->read_ahead;
 			behavior = vm_map_entry_behavior(fs.entry);
 			if (behavior == MAP_ENTRY_BEHAV_RANDOM) {
 				nera = 0;
 			} else if (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL) {
 				nera = VM_FAULT_READ_AHEAD_MAX;
 				if (vaddr == fs.entry->next_read)
 					vm_fault_dontneed(&fs, vaddr, nera);
 			} else if (vaddr == fs.entry->next_read) {
 				/*
 				 * This is a sequential fault.  Arithmetically
 				 * increase the requested number of pages in
 				 * the read-ahead window.  The requested
 				 * number of pages is "# of sequential faults
 				 * x (read ahead min + 1) + read ahead min"
 				 */
 				nera = VM_FAULT_READ_AHEAD_MIN;
 				if (era > 0) {
 					nera += era + 1;
 					if (nera > VM_FAULT_READ_AHEAD_MAX)
 						nera = VM_FAULT_READ_AHEAD_MAX;
 				}
 				if (era == VM_FAULT_READ_AHEAD_MAX)
 					vm_fault_dontneed(&fs, vaddr, nera);
 			} else {
 				/*
 				 * This is a non-sequential fault.
 				 */
 				nera = 0;
 			}
 			if (era != nera) {
 				/*
 				 * A read lock on the map suffices to update
 				 * the read ahead count safely.
 				 */
 				fs.entry->read_ahead = nera;
 			}
 
 			/*
 			 * Prepare for unlocking the map.  Save the map
 			 * entry's start and end addresses, which are used to
 			 * optimize the size of the pager operation below.
 			 * Even if the map entry's addresses change after
 			 * unlocking the map, using the saved addresses is
 			 * safe.
 			 */
 			e_start = fs.entry->start;
 			e_end = fs.entry->end;
 		}
 
 		/*
 		 * Call the pager to retrieve the page if there is a chance
 		 * that the pager has it, and potentially retrieve additional
 		 * pages at the same time.
 		 */
 		if (fs.object->type != OBJT_DEFAULT) {
 			/*
 			 * Release the map lock before locking the vnode or
 			 * sleeping in the pager.  (If the current object has
 			 * a shadow, then an earlier iteration of this loop
 			 * may have already unlocked the map.)
 			 */
 			unlock_map(&fs);
 
 			if (fs.object->type == OBJT_VNODE &&
 			    (vp = fs.object->handle) != fs.vp) {
 				/*
 				 * Perform an unlock in case the desired vnode
 				 * changed while the map was unlocked during a
 				 * retry.
 				 */
 				unlock_vp(&fs);
 
 				locked = VOP_ISLOCKED(vp);
 				if (locked != LK_EXCLUSIVE)
 					locked = LK_SHARED;
 
 				/*
 				 * We must not sleep acquiring the vnode lock
 				 * while we have the page exclusive busied or
 				 * the object's paging-in-progress count
 				 * incremented.  Otherwise, we could deadlock.
 				 */
 				error = vget(vp, locked | LK_CANRECURSE |
 				    LK_NOWAIT, curthread);
 				if (error != 0) {
 					vhold(vp);
 					release_page(&fs);
 					unlock_and_deallocate(&fs);
 					error = vget(vp, locked | LK_RETRY |
 					    LK_CANRECURSE, curthread);
 					vdrop(vp);
 					fs.vp = vp;
 					KASSERT(error == 0,
 					    ("vm_fault: vget failed"));
 					goto RetryFault;
 				}
 				fs.vp = vp;
 			}
 			KASSERT(fs.vp == NULL || !fs.map->system_map,
 			    ("vm_fault: vnode-backed object mapped by system map"));
 
 			/*
 			 * Page in the requested page and hint the pager,
 			 * that it may bring up surrounding pages.
 			 */
 			if (nera == -1 || behavior == MAP_ENTRY_BEHAV_RANDOM ||
 			    P_KILLED(curproc)) {
 				behind = 0;
 				ahead = 0;
 			} else {
 				/* Is this a sequential fault? */
 				if (nera > 0) {
 					behind = 0;
 					ahead = nera;
 				} else {
 					/*
 					 * Request a cluster of pages that is
 					 * aligned to a VM_FAULT_READ_DEFAULT
 					 * page offset boundary within the
 					 * object.  Alignment to a page offset
 					 * boundary is more likely to coincide
 					 * with the underlying file system
 					 * block than alignment to a virtual
 					 * address boundary.
 					 */
 					cluster_offset = fs.pindex %
 					    VM_FAULT_READ_DEFAULT;
 					behind = ulmin(cluster_offset,
 					    atop(vaddr - e_start));
 					ahead = VM_FAULT_READ_DEFAULT - 1 -
 					    cluster_offset;
 				}
 				ahead = ulmin(ahead, atop(e_end - vaddr) - 1);
 			}
 			rv = vm_pager_get_pages(fs.object, &fs.m, 1,
 			    &behind, &ahead);
 			if (rv == VM_PAGER_OK) {
 				faultcount = behind + 1 + ahead;
 				hardfault = true;
 				break; /* break to PAGE HAS BEEN FOUND */
 			}
 			if (rv == VM_PAGER_ERROR)
 				printf("vm_fault: pager read error, pid %d (%s)\n",
 				    curproc->p_pid, curproc->p_comm);
 
 			/*
 			 * If an I/O error occurred or the requested page was
 			 * outside the range of the pager, clean up and return
 			 * an error.
 			 */
 			if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) {
 				vm_page_lock(fs.m);
 				if (fs.m->wire_count == 0)
 					vm_page_free(fs.m);
 				else
 					vm_page_xunbusy_maybelocked(fs.m);
 				vm_page_unlock(fs.m);
 				fs.m = NULL;
 				unlock_and_deallocate(&fs);
 				return (rv == VM_PAGER_ERROR ? KERN_FAILURE :
 				    KERN_PROTECTION_FAILURE);
 			}
 
 			/*
 			 * The requested page does not exist at this object/
 			 * offset.  Remove the invalid page from the object,
 			 * waking up anyone waiting for it, and continue on to
 			 * the next object.  However, if this is the top-level
 			 * object, we must leave the busy page in place to
 			 * prevent another process from rushing past us, and
 			 * inserting the page in that object at the same time
 			 * that we are.
 			 */
 			if (fs.object != fs.first_object) {
 				vm_page_lock(fs.m);
 				if (fs.m->wire_count == 0)
 					vm_page_free(fs.m);
 				else
 					vm_page_xunbusy_maybelocked(fs.m);
 				vm_page_unlock(fs.m);
 				fs.m = NULL;
 			}
 		}
 
 		/*
 		 * We get here if the object has default pager (or unwiring) 
 		 * or the pager doesn't have the page.
 		 */
 		if (fs.object == fs.first_object)
 			fs.first_m = fs.m;
 
 		/*
 		 * Move on to the next object.  Lock the next object before
 		 * unlocking the current one.
 		 */
 		next_object = fs.object->backing_object;
 		if (next_object == NULL) {
 			/*
 			 * If there's no object left, fill the page in the top
 			 * object with zeros.
 			 */
 			if (fs.object != fs.first_object) {
 				vm_object_pip_wakeup(fs.object);
 				VM_OBJECT_WUNLOCK(fs.object);
 
 				fs.object = fs.first_object;
 				fs.pindex = fs.first_pindex;
 				fs.m = fs.first_m;
 				VM_OBJECT_WLOCK(fs.object);
 			}
 			fs.first_m = NULL;
 
 			/*
 			 * Zero the page if necessary and mark it valid.
 			 */
 			if ((fs.m->flags & PG_ZERO) == 0) {
 				pmap_zero_page(fs.m);
 			} else {
 				PCPU_INC(cnt.v_ozfod);
 			}
 			PCPU_INC(cnt.v_zfod);
 			fs.m->valid = VM_PAGE_BITS_ALL;
 			/* Don't try to prefault neighboring pages. */
 			faultcount = 1;
 			break;	/* break to PAGE HAS BEEN FOUND */
 		} else {
 			KASSERT(fs.object != next_object,
 			    ("object loop %p", next_object));
 			VM_OBJECT_WLOCK(next_object);
 			vm_object_pip_add(next_object, 1);
 			if (fs.object != fs.first_object)
 				vm_object_pip_wakeup(fs.object);
 			fs.pindex +=
 			    OFF_TO_IDX(fs.object->backing_object_offset);
 			VM_OBJECT_WUNLOCK(fs.object);
 			fs.object = next_object;
 		}
 	}
 
 	vm_page_assert_xbusied(fs.m);
 
 	/*
 	 * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock
 	 * is held.]
 	 */
 
 	/*
 	 * If the page is being written, but isn't already owned by the
 	 * top-level object, we have to copy it into a new page owned by the
 	 * top-level object.
 	 */
 	if (fs.object != fs.first_object) {
 		/*
 		 * We only really need to copy if we want to write it.
 		 */
 		if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) {
 			/*
 			 * This allows pages to be virtually copied from a 
 			 * backing_object into the first_object, where the 
 			 * backing object has no other refs to it, and cannot
 			 * gain any more refs.  Instead of a bcopy, we just 
 			 * move the page from the backing object to the 
 			 * first object.  Note that we must mark the page 
 			 * dirty in the first object so that it will go out 
 			 * to swap when needed.
 			 */
 			is_first_object_locked = false;
 			if (
 				/*
 				 * Only one shadow object
 				 */
 				(fs.object->shadow_count == 1) &&
 				/*
 				 * No COW refs, except us
 				 */
 				(fs.object->ref_count == 1) &&
 				/*
 				 * No one else can look this object up
 				 */
 				(fs.object->handle == NULL) &&
 				/*
 				 * No other ways to look the object up
 				 */
 				((fs.object->type == OBJT_DEFAULT) ||
 				 (fs.object->type == OBJT_SWAP)) &&
 			    (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs.first_object)) &&
 				/*
 				 * We don't chase down the shadow chain
 				 */
 			    fs.object == fs.first_object->backing_object) {
 				vm_page_lock(fs.m);
 				vm_page_remove(fs.m);
 				vm_page_unlock(fs.m);
 				vm_page_lock(fs.first_m);
 				vm_page_replace_checked(fs.m, fs.first_object,
 				    fs.first_pindex, fs.first_m);
 				vm_page_free(fs.first_m);
 				vm_page_unlock(fs.first_m);
 				vm_page_dirty(fs.m);
 #if VM_NRESERVLEVEL > 0
 				/*
 				 * Rename the reservation.
 				 */
 				vm_reserv_rename(fs.m, fs.first_object,
 				    fs.object, OFF_TO_IDX(
 				    fs.first_object->backing_object_offset));
 #endif
 				/*
 				 * Removing the page from the backing object
 				 * unbusied it.
 				 */
 				vm_page_xbusy(fs.m);
 				fs.first_m = fs.m;
 				fs.m = NULL;
 				PCPU_INC(cnt.v_cow_optim);
 			} else {
 				/*
 				 * Oh, well, lets copy it.
 				 */
 				pmap_copy_page(fs.m, fs.first_m);
 				fs.first_m->valid = VM_PAGE_BITS_ALL;
 				if (wired && (fault_flags &
 				    VM_FAULT_WIRE) == 0) {
 					vm_page_lock(fs.first_m);
 					vm_page_wire(fs.first_m);
 					vm_page_unlock(fs.first_m);
 					
 					vm_page_lock(fs.m);
 					vm_page_unwire(fs.m, PQ_INACTIVE);
 					vm_page_unlock(fs.m);
 				}
 				/*
 				 * We no longer need the old page or object.
 				 */
 				release_page(&fs);
 			}
 			/*
 			 * fs.object != fs.first_object due to above 
 			 * conditional
 			 */
 			vm_object_pip_wakeup(fs.object);
 			VM_OBJECT_WUNLOCK(fs.object);
 			/*
 			 * Only use the new page below...
 			 */
 			fs.object = fs.first_object;
 			fs.pindex = fs.first_pindex;
 			fs.m = fs.first_m;
 			if (!is_first_object_locked)
 				VM_OBJECT_WLOCK(fs.object);
 			PCPU_INC(cnt.v_cow_faults);
 			curthread->td_cow++;
 		} else {
 			prot &= ~VM_PROT_WRITE;
 		}
 	}
 
 	/*
 	 * We must verify that the maps have not changed since our last
 	 * lookup.
 	 */
 	if (!fs.lookup_still_valid) {
 		if (!vm_map_trylock_read(fs.map)) {
 			release_page(&fs);
 			unlock_and_deallocate(&fs);
 			goto RetryFault;
 		}
 		fs.lookup_still_valid = true;
 		if (fs.map->timestamp != fs.map_generation) {
 			result = vm_map_lookup_locked(&fs.map, vaddr, fault_type,
 			    &fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired);
 
 			/*
 			 * If we don't need the page any longer, put it on the inactive
 			 * list (the easiest thing to do here).  If no one needs it,
 			 * pageout will grab it eventually.
 			 */
 			if (result != KERN_SUCCESS) {
 				release_page(&fs);
 				unlock_and_deallocate(&fs);
 
 				/*
 				 * If retry of map lookup would have blocked then
 				 * retry fault from start.
 				 */
 				if (result == KERN_FAILURE)
 					goto RetryFault;
 				return (result);
 			}
 			if ((retry_object != fs.first_object) ||
 			    (retry_pindex != fs.first_pindex)) {
 				release_page(&fs);
 				unlock_and_deallocate(&fs);
 				goto RetryFault;
 			}
 
 			/*
 			 * Check whether the protection has changed or the object has
 			 * been copied while we left the map unlocked. Changing from
 			 * read to write permission is OK - we leave the page
 			 * write-protected, and catch the write fault. Changing from
 			 * write to read permission means that we can't mark the page
 			 * write-enabled after all.
 			 */
 			prot &= retry_prot;
 		}
 	}
 
 	/*
 	 * If the page was filled by a pager, save the virtual address that
 	 * should be faulted on next under a sequential access pattern to the
 	 * map entry.  A read lock on the map suffices to update this address
 	 * safely.
 	 */
 	if (hardfault)
 		fs.entry->next_read = vaddr + ptoa(ahead) + PAGE_SIZE;
 
 	vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags, true);
 	vm_page_assert_xbusied(fs.m);
 
 	/*
 	 * Page must be completely valid or it is not fit to
 	 * map into user space.  vm_pager_get_pages() ensures this.
 	 */
 	KASSERT(fs.m->valid == VM_PAGE_BITS_ALL,
 	    ("vm_fault: page %p partially invalid", fs.m));
 	VM_OBJECT_WUNLOCK(fs.object);
 
 	/*
 	 * Put this page into the physical map.  We had to do the unlock above
 	 * because pmap_enter() may sleep.  We don't put the page
 	 * back on the active queue until later so that the pageout daemon
 	 * won't find it (yet).
 	 */
 	pmap_enter(fs.map->pmap, vaddr, fs.m, prot,
 	    fault_type | (wired ? PMAP_ENTER_WIRED : 0), 0);
 	if (faultcount != 1 && (fault_flags & VM_FAULT_WIRE) == 0 &&
 	    wired == 0)
 		vm_fault_prefault(&fs, vaddr,
 		    faultcount > 0 ? behind : PFBAK,
 		    faultcount > 0 ? ahead : PFFOR);
 	VM_OBJECT_WLOCK(fs.object);
 	vm_page_lock(fs.m);
 
 	/*
 	 * If the page is not wired down, then put it where the pageout daemon
 	 * can find it.
 	 */
 	if ((fault_flags & VM_FAULT_WIRE) != 0) {
 		KASSERT(wired, ("VM_FAULT_WIRE && !wired"));
 		vm_page_wire(fs.m);
 	} else
 		vm_page_activate(fs.m);
 	if (m_hold != NULL) {
 		*m_hold = fs.m;
 		vm_page_hold(fs.m);
 	}
 	vm_page_unlock(fs.m);
 	vm_page_xunbusy(fs.m);
 
 	/*
 	 * Unlock everything, and return
 	 */
 	unlock_and_deallocate(&fs);
 	if (hardfault) {
 		PCPU_INC(cnt.v_io_faults);
 		curthread->td_ru.ru_majflt++;
 #ifdef RACCT
 		if (racct_enable && fs.object->type == OBJT_VNODE) {
 			PROC_LOCK(curproc);
 			if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) {
 				racct_add_force(curproc, RACCT_WRITEBPS,
 				    PAGE_SIZE + behind * PAGE_SIZE);
 				racct_add_force(curproc, RACCT_WRITEIOPS, 1);
 			} else {
 				racct_add_force(curproc, RACCT_READBPS,
 				    PAGE_SIZE + ahead * PAGE_SIZE);
 				racct_add_force(curproc, RACCT_READIOPS, 1);
 			}
 			PROC_UNLOCK(curproc);
 		}
 #endif
 	} else 
 		curthread->td_ru.ru_minflt++;
 
 	return (KERN_SUCCESS);
 }
 
 /*
  * Speed up the reclamation of pages that precede the faulting pindex within
  * the first object of the shadow chain.  Essentially, perform the equivalent
  * to madvise(..., MADV_DONTNEED) on a large cluster of pages that precedes
  * the faulting pindex by the cluster size when the pages read by vm_fault()
  * cross a cluster-size boundary.  The cluster size is the greater of the
  * smallest superpage size and VM_FAULT_DONTNEED_MIN.
  *
  * When "fs->first_object" is a shadow object, the pages in the backing object
  * that precede the faulting pindex are deactivated by vm_fault().  So, this
  * function must only be concerned with pages in the first object.
  */
 static void
 vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr, int ahead)
 {
 	vm_map_entry_t entry;
 	vm_object_t first_object, object;
 	vm_offset_t end, start;
 	vm_page_t m, m_next;
 	vm_pindex_t pend, pstart;
 	vm_size_t size;
 
 	object = fs->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	first_object = fs->first_object;
 	if (first_object != object) {
 		if (!VM_OBJECT_TRYWLOCK(first_object)) {
 			VM_OBJECT_WUNLOCK(object);
 			VM_OBJECT_WLOCK(first_object);
 			VM_OBJECT_WLOCK(object);
 		}
 	}
 	/* Neither fictitious nor unmanaged pages can be reclaimed. */
 	if ((first_object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0) {
 		size = VM_FAULT_DONTNEED_MIN;
 		if (MAXPAGESIZES > 1 && size < pagesizes[1])
 			size = pagesizes[1];
 		end = rounddown2(vaddr, size);
 		if (vaddr - end >= size - PAGE_SIZE - ptoa(ahead) &&
 		    (entry = fs->entry)->start < end) {
 			if (end - entry->start < size)
 				start = entry->start;
 			else
 				start = end - size;
 			pmap_advise(fs->map->pmap, start, end, MADV_DONTNEED);
 			pstart = OFF_TO_IDX(entry->offset) + atop(start -
 			    entry->start);
 			m_next = vm_page_find_least(first_object, pstart);
 			pend = OFF_TO_IDX(entry->offset) + atop(end -
 			    entry->start);
 			while ((m = m_next) != NULL && m->pindex < pend) {
 				m_next = TAILQ_NEXT(m, listq);
 				if (m->valid != VM_PAGE_BITS_ALL ||
 				    vm_page_busied(m))
 					continue;
 
 				/*
 				 * Don't clear PGA_REFERENCED, since it would
 				 * likely represent a reference by a different
 				 * process.
 				 *
 				 * Typically, at this point, prefetched pages
 				 * are still in the inactive queue.  Only
 				 * pages that triggered page faults are in the
 				 * active queue.
 				 */
 				vm_page_lock(m);
 				vm_page_deactivate(m);
 				vm_page_unlock(m);
 			}
 		}
 	}
 	if (first_object != object)
 		VM_OBJECT_WUNLOCK(first_object);
 }
 
 /*
  * vm_fault_prefault provides a quick way of clustering
  * pagefaults into a processes address space.  It is a "cousin"
  * of vm_map_pmap_enter, except it runs at page fault time instead
  * of mmap time.
  */
 static void
 vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra,
     int backward, int forward)
 {
 	pmap_t pmap;
 	vm_map_entry_t entry;
 	vm_object_t backing_object, lobject;
 	vm_offset_t addr, starta;
 	vm_pindex_t pindex;
 	vm_page_t m;
 	int i;
 
 	pmap = fs->map->pmap;
 	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))
 		return;
 
 	entry = fs->entry;
 
 	starta = addra - backward * PAGE_SIZE;
 	if (starta < entry->start) {
 		starta = entry->start;
 	} else if (starta > addra) {
 		starta = 0;
 	}
 
 	/*
 	 * Generate the sequence of virtual addresses that are candidates for
 	 * prefaulting in an outward spiral from the faulting virtual address,
 	 * "addra".  Specifically, the sequence is "addra - PAGE_SIZE", "addra
 	 * + PAGE_SIZE", "addra - 2 * PAGE_SIZE", "addra + 2 * PAGE_SIZE", ...
 	 * If the candidate address doesn't have a backing physical page, then
 	 * the loop immediately terminates.
 	 */
 	for (i = 0; i < 2 * imax(backward, forward); i++) {
 		addr = addra + ((i >> 1) + 1) * ((i & 1) == 0 ? -PAGE_SIZE :
 		    PAGE_SIZE);
 		if (addr > addra + forward * PAGE_SIZE)
 			addr = 0;
 
 		if (addr < starta || addr >= entry->end)
 			continue;
 
 		if (!pmap_is_prefaultable(pmap, addr))
 			continue;
 
 		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
 		lobject = entry->object.vm_object;
 		VM_OBJECT_RLOCK(lobject);
 		while ((m = vm_page_lookup(lobject, pindex)) == NULL &&
 		    lobject->type == OBJT_DEFAULT &&
 		    (backing_object = lobject->backing_object) != NULL) {
 			KASSERT((lobject->backing_object_offset & PAGE_MASK) ==
 			    0, ("vm_fault_prefault: unaligned object offset"));
 			pindex += lobject->backing_object_offset >> PAGE_SHIFT;
 			VM_OBJECT_RLOCK(backing_object);
 			VM_OBJECT_RUNLOCK(lobject);
 			lobject = backing_object;
 		}
 		if (m == NULL) {
 			VM_OBJECT_RUNLOCK(lobject);
 			break;
 		}
 		if (m->valid == VM_PAGE_BITS_ALL &&
 		    (m->flags & PG_FICTITIOUS) == 0)
 			pmap_enter_quick(pmap, addr, m, entry->protection);
 		VM_OBJECT_RUNLOCK(lobject);
 	}
 }
 
 /*
  * Hold each of the physical pages that are mapped by the specified range of
  * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid
  * and allow the specified types of access, "prot".  If all of the implied
  * pages are successfully held, then the number of held pages is returned
  * together with pointers to those pages in the array "ma".  However, if any
  * of the pages cannot be held, -1 is returned.
  */
 int
 vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
     vm_prot_t prot, vm_page_t *ma, int max_count)
 {
 	vm_offset_t end, va;
 	vm_page_t *mp;
 	int count;
 	boolean_t pmap_failed;
 
 	if (len == 0)
 		return (0);
 	end = round_page(addr + len);
 	addr = trunc_page(addr);
 
 	/*
 	 * Check for illegal addresses.
 	 */
 	if (addr < vm_map_min(map) || addr > end || end > vm_map_max(map))
 		return (-1);
 
 	if (atop(end - addr) > max_count)
 		panic("vm_fault_quick_hold_pages: count > max_count");
 	count = atop(end - addr);
 
 	/*
 	 * Most likely, the physical pages are resident in the pmap, so it is
 	 * faster to try pmap_extract_and_hold() first.
 	 */
 	pmap_failed = FALSE;
 	for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) {
 		*mp = pmap_extract_and_hold(map->pmap, va, prot);
 		if (*mp == NULL)
 			pmap_failed = TRUE;
 		else if ((prot & VM_PROT_WRITE) != 0 &&
 		    (*mp)->dirty != VM_PAGE_BITS_ALL) {
 			/*
 			 * Explicitly dirty the physical page.  Otherwise, the
 			 * caller's changes may go unnoticed because they are
 			 * performed through an unmanaged mapping or by a DMA
 			 * operation.
 			 *
 			 * The object lock is not held here.
 			 * See vm_page_clear_dirty_mask().
 			 */
 			vm_page_dirty(*mp);
 		}
 	}
 	if (pmap_failed) {
 		/*
 		 * One or more pages could not be held by the pmap.  Either no
 		 * page was mapped at the specified virtual address or that
 		 * mapping had insufficient permissions.  Attempt to fault in
 		 * and hold these pages.
 		 */
 		for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE)
 			if (*mp == NULL && vm_fault_hold(map, va, prot,
 			    VM_FAULT_NORMAL, mp) != KERN_SUCCESS)
 				goto error;
 	}
 	return (count);
 error:	
 	for (mp = ma; mp < ma + count; mp++)
 		if (*mp != NULL) {
 			vm_page_lock(*mp);
 			vm_page_unhold(*mp);
 			vm_page_unlock(*mp);
 		}
 	return (-1);
 }
 
 /*
  *	Routine:
  *		vm_fault_copy_entry
  *	Function:
  *		Create new shadow object backing dst_entry with private copy of
  *		all underlying pages. When src_entry is equal to dst_entry,
  *		function implements COW for wired-down map entry. Otherwise,
  *		it forks wired entry into dst_map.
  *
  *	In/out conditions:
  *		The source and destination maps must be locked for write.
  *		The source map entry must be wired down (or be a sharing map
  *		entry corresponding to a main map entry that is wired down).
  */
 void
 vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map,
     vm_map_entry_t dst_entry, vm_map_entry_t src_entry,
     vm_ooffset_t *fork_charge)
 {
 	vm_object_t backing_object, dst_object, object, src_object;
 	vm_pindex_t dst_pindex, pindex, src_pindex;
 	vm_prot_t access, prot;
 	vm_offset_t vaddr;
 	vm_page_t dst_m;
 	vm_page_t src_m;
 	boolean_t upgrade;
 
 #ifdef	lint
 	src_map++;
 #endif	/* lint */
 
 	upgrade = src_entry == dst_entry;
 	access = prot = dst_entry->protection;
 
 	src_object = src_entry->object.vm_object;
 	src_pindex = OFF_TO_IDX(src_entry->offset);
 
 	if (upgrade && (dst_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
 		dst_object = src_object;
 		vm_object_reference(dst_object);
 	} else {
 		/*
 		 * Create the top-level object for the destination entry. (Doesn't
 		 * actually shadow anything - we copy the pages directly.)
 		 */
 		dst_object = vm_object_allocate(OBJT_DEFAULT,
 		    OFF_TO_IDX(dst_entry->end - dst_entry->start));
 #if VM_NRESERVLEVEL > 0
 		dst_object->flags |= OBJ_COLORED;
 		dst_object->pg_color = atop(dst_entry->start);
 #endif
 	}
 
 	VM_OBJECT_WLOCK(dst_object);
 	KASSERT(upgrade || dst_entry->object.vm_object == NULL,
 	    ("vm_fault_copy_entry: vm_object not NULL"));
 	if (src_object != dst_object) {
 		dst_entry->object.vm_object = dst_object;
 		dst_entry->offset = 0;
 		dst_object->charge = dst_entry->end - dst_entry->start;
 	}
 	if (fork_charge != NULL) {
 		KASSERT(dst_entry->cred == NULL,
 		    ("vm_fault_copy_entry: leaked swp charge"));
 		dst_object->cred = curthread->td_ucred;
 		crhold(dst_object->cred);
 		*fork_charge += dst_object->charge;
 	} else if (dst_object->cred == NULL) {
 		KASSERT(dst_entry->cred != NULL, ("no cred for entry %p",
 		    dst_entry));
 		dst_object->cred = dst_entry->cred;
 		dst_entry->cred = NULL;
 	}
 
 	/*
 	 * If not an upgrade, then enter the mappings in the pmap as
 	 * read and/or execute accesses.  Otherwise, enter them as
 	 * write accesses.
 	 *
 	 * A writeable large page mapping is only created if all of
 	 * the constituent small page mappings are modified. Marking
 	 * PTEs as modified on inception allows promotion to happen
 	 * without taking potentially large number of soft faults.
 	 */
 	if (!upgrade)
 		access &= ~VM_PROT_WRITE;
 
 	/*
 	 * Loop through all of the virtual pages within the entry's
 	 * range, copying each page from the source object to the
 	 * destination object.  Since the source is wired, those pages
 	 * must exist.  In contrast, the destination is pageable.
 	 * Since the destination object does share any backing storage
 	 * with the source object, all of its pages must be dirtied,
 	 * regardless of whether they can be written.
 	 */
 	for (vaddr = dst_entry->start, dst_pindex = 0;
 	    vaddr < dst_entry->end;
 	    vaddr += PAGE_SIZE, dst_pindex++) {
 again:
 		/*
 		 * Find the page in the source object, and copy it in.
 		 * Because the source is wired down, the page will be
 		 * in memory.
 		 */
 		if (src_object != dst_object)
 			VM_OBJECT_RLOCK(src_object);
 		object = src_object;
 		pindex = src_pindex + dst_pindex;
 		while ((src_m = vm_page_lookup(object, pindex)) == NULL &&
 		    (backing_object = object->backing_object) != NULL) {
 			/*
 			 * Unless the source mapping is read-only or
 			 * it is presently being upgraded from
 			 * read-only, the first object in the shadow
 			 * chain should provide all of the pages.  In
 			 * other words, this loop body should never be
 			 * executed when the source mapping is already
 			 * read/write.
 			 */
 			KASSERT((src_entry->protection & VM_PROT_WRITE) == 0 ||
 			    upgrade,
 			    ("vm_fault_copy_entry: main object missing page"));
 
 			VM_OBJECT_RLOCK(backing_object);
 			pindex += OFF_TO_IDX(object->backing_object_offset);
 			if (object != dst_object)
 				VM_OBJECT_RUNLOCK(object);
 			object = backing_object;
 		}
 		KASSERT(src_m != NULL, ("vm_fault_copy_entry: page missing"));
 
 		if (object != dst_object) {
 			/*
 			 * Allocate a page in the destination object.
 			 */
 			dst_m = vm_page_alloc(dst_object, (src_object ==
 			    dst_object ? src_pindex : 0) + dst_pindex,
 			    VM_ALLOC_NORMAL);
 			if (dst_m == NULL) {
 				VM_OBJECT_WUNLOCK(dst_object);
 				VM_OBJECT_RUNLOCK(object);
 				VM_WAIT;
 				VM_OBJECT_WLOCK(dst_object);
 				goto again;
 			}
 			pmap_copy_page(src_m, dst_m);
 			VM_OBJECT_RUNLOCK(object);
 			dst_m->valid = VM_PAGE_BITS_ALL;
 			dst_m->dirty = VM_PAGE_BITS_ALL;
 		} else {
 			dst_m = src_m;
 			if (vm_page_sleep_if_busy(dst_m, "fltupg"))
 				goto again;
 			vm_page_xbusy(dst_m);
 			KASSERT(dst_m->valid == VM_PAGE_BITS_ALL,
 			    ("invalid dst page %p", dst_m));
 		}
 		VM_OBJECT_WUNLOCK(dst_object);
 
 		/*
 		 * Enter it in the pmap. If a wired, copy-on-write
 		 * mapping is being replaced by a write-enabled
 		 * mapping, then wire that new mapping.
 		 */
 		pmap_enter(dst_map->pmap, vaddr, dst_m, prot,
 		    access | (upgrade ? PMAP_ENTER_WIRED : 0), 0);
 
 		/*
 		 * Mark it no longer busy, and put it on the active list.
 		 */
 		VM_OBJECT_WLOCK(dst_object);
 		
 		if (upgrade) {
 			if (src_m != dst_m) {
 				vm_page_lock(src_m);
 				vm_page_unwire(src_m, PQ_INACTIVE);
 				vm_page_unlock(src_m);
 				vm_page_lock(dst_m);
 				vm_page_wire(dst_m);
 				vm_page_unlock(dst_m);
 			} else {
 				KASSERT(dst_m->wire_count > 0,
 				    ("dst_m %p is not wired", dst_m));
 			}
 		} else {
 			vm_page_lock(dst_m);
 			vm_page_activate(dst_m);
 			vm_page_unlock(dst_m);
 		}
 		vm_page_xunbusy(dst_m);
 	}
 	VM_OBJECT_WUNLOCK(dst_object);
 	if (upgrade) {
 		dst_entry->eflags &= ~(MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY);
 		vm_object_deallocate(src_object);
 	}
 }
 
 /*
  * Block entry into the machine-independent layer's page fault handler by
  * the calling thread.  Subsequent calls to vm_fault() by that thread will
  * return KERN_PROTECTION_FAILURE.  Enable machine-dependent handling of
  * spurious page faults. 
  */
 int
 vm_fault_disable_pagefaults(void)
 {
 
 	return (curthread_pflags_set(TDP_NOFAULTING | TDP_RESETSPUR));
 }
 
 void
 vm_fault_enable_pagefaults(int save)
 {
 
 	curthread_pflags_restore(save);
 }
Index: head/sys/vm/vm_object.h
===================================================================
--- head/sys/vm/vm_object.h	(revision 309709)
+++ head/sys/vm/vm_object.h	(revision 309710)
@@ -1,326 +1,327 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_object.h	8.3 (Berkeley) 1/12/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
  * $FreeBSD$
  */
 
 /*
  *	Virtual memory object module definitions.
  */
 
 #ifndef	_VM_OBJECT_
 #define	_VM_OBJECT_
 
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_rwlock.h>
 
 #include <vm/_vm_radix.h>
 
 /*
  *	Types defined:
  *
  *	vm_object_t		Virtual memory object.
  *
  *	The root of cached pages pool is protected by both the per-object lock
  *	and the free pages queue mutex.
  *	On insert in the cache radix trie, the per-object lock is expected
  *	to be already held and the free pages queue mutex will be
  *	acquired during the operation too.
  *	On remove and lookup from the cache radix trie, only the free
  *	pages queue mutex is expected to be locked.
  *	These rules allow for reliably checking for the presence of cached
  *	pages with only the per-object lock held, thereby reducing contention
  *	for the free pages queue mutex.
  *
  * List of locks
  *	(c)	const until freed
  *	(o)	per-object lock 
  *	(f)	free pages queue mutex
  *
  */
 
 struct vm_object {
 	struct rwlock lock;
 	TAILQ_ENTRY(vm_object) object_list; /* list of all objects */
 	LIST_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */
 	LIST_ENTRY(vm_object) shadow_list; /* chain of shadow objects */
 	TAILQ_HEAD(respgs, vm_page) memq; /* list of resident pages */
 	struct vm_radix rtree;		/* root of the resident page radix trie*/
 	vm_pindex_t size;		/* Object size */
 	int generation;			/* generation ID */
 	int ref_count;			/* How many refs?? */
 	int shadow_count;		/* how many objects that this is a shadow for */
 	vm_memattr_t memattr;		/* default memory attribute for pages */
 	objtype_t type;			/* type of pager */
 	u_short flags;			/* see below */
 	u_short pg_color;		/* (c) color of first page in obj */
 	u_int paging_in_progress;	/* Paging (in or out) so don't collapse or destroy */
 	int resident_page_count;	/* number of resident pages */
 	struct vm_object *backing_object; /* object that I'm a shadow of */
 	vm_ooffset_t backing_object_offset;/* Offset in backing object */
 	TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */
 	LIST_HEAD(, vm_reserv) rvq;	/* list of reservations */
 	void *handle;
 	union {
 		/*
 		 * VNode pager
 		 *
 		 *	vnp_size - current size of file
 		 */
 		struct {
 			off_t vnp_size;
 			vm_ooffset_t writemappings;
 		} vnp;
 
 		/*
 		 * Device pager
 		 *
 		 *	devp_pglist - list of allocated pages
 		 */
 		struct {
 			TAILQ_HEAD(, vm_page) devp_pglist;
 			struct cdev_pager_ops *ops;
 			struct cdev *dev;
 		} devp;
 
 		/*
 		 * SG pager
 		 *
 		 *	sgp_pglist - list of allocated pages
 		 */
 		struct {
 			TAILQ_HEAD(, vm_page) sgp_pglist;
 		} sgp;
 
 		/*
 		 * Swap pager
 		 *
 		 *	swp_tmpfs - back-pointer to the tmpfs vnode,
 		 *		     if any, which uses the vm object
 		 *		     as backing store.  The handle
 		 *		     cannot be reused for linking,
 		 *		     because the vnode can be
 		 *		     reclaimed and recreated, making
 		 *		     the handle changed and hash-chain
 		 *		     invalid.
 		 *
 		 *	swp_bcount - number of swap 'swblock' metablocks, each
 		 *		     contains up to 16 swapblk assignments.
 		 *		     see vm/swap_pager.h
 		 */
 		struct {
 			void *swp_tmpfs;
 			int swp_bcount;
 		} swp;
 	} un_pager;
 	struct ucred *cred;
 	vm_ooffset_t charge;
 	void *umtx_data;
 };
 
 /*
  * Flags
  */
 #define	OBJ_FICTITIOUS	0x0001		/* (c) contains fictitious pages */
 #define	OBJ_UNMANAGED	0x0002		/* (c) contains unmanaged pages */
+#define	OBJ_POPULATE	0x0004		/* pager implements populate() */
 #define OBJ_DEAD	0x0008		/* dead objects (during rundown) */
 #define	OBJ_NOSPLIT	0x0010		/* dont split this object */
 #define	OBJ_UMTXDEAD	0x0020		/* umtx pshared was terminated */
 #define OBJ_PIPWNT	0x0040		/* paging in progress wanted */
 #define OBJ_MIGHTBEDIRTY 0x0100		/* object might be dirty, only for vnode */
 #define	OBJ_TMPFS_NODE	0x0200		/* object belongs to tmpfs VREG node */
 #define	OBJ_TMPFS_DIRTY	0x0400		/* dirty tmpfs obj */
 #define	OBJ_COLORED	0x1000		/* pg_color is defined */
 #define	OBJ_ONEMAPPING	0x2000		/* One USE (a single, non-forked) mapping flag */
 #define	OBJ_DISCONNECTWNT 0x4000	/* disconnect from vnode wanted */
 #define	OBJ_TMPFS	0x8000		/* has tmpfs vnode allocated */
 
 #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT)
 #define OFF_TO_IDX(off) ((vm_pindex_t)(((vm_ooffset_t)(off)) >> PAGE_SHIFT))
 
 #ifdef	_KERNEL
 
 #define OBJPC_SYNC	0x1			/* sync I/O */
 #define OBJPC_INVAL	0x2			/* invalidate */
 #define OBJPC_NOSYNC	0x4			/* skip if VPO_NOSYNC */
 
 /*
  * The following options are supported by vm_object_page_remove().
  */
 #define	OBJPR_CLEANONLY	0x1		/* Don't remove dirty pages. */
 #define	OBJPR_NOTMAPPED	0x2		/* Don't unmap pages. */
 
 TAILQ_HEAD(object_q, vm_object);
 
 extern struct object_q vm_object_list;	/* list of allocated objects */
 extern struct mtx vm_object_list_mtx;	/* lock for object list and count */
 
 extern struct vm_object kernel_object_store;
 extern struct vm_object kmem_object_store;
 
 #define	kernel_object	(&kernel_object_store)
 #define	kmem_object	(&kmem_object_store)
 
 #define	VM_OBJECT_ASSERT_LOCKED(object)					\
 	rw_assert(&(object)->lock, RA_LOCKED)
 #define	VM_OBJECT_ASSERT_RLOCKED(object)				\
 	rw_assert(&(object)->lock, RA_RLOCKED)
 #define	VM_OBJECT_ASSERT_WLOCKED(object)				\
 	rw_assert(&(object)->lock, RA_WLOCKED)
 #define	VM_OBJECT_ASSERT_UNLOCKED(object)				\
 	rw_assert(&(object)->lock, RA_UNLOCKED)
 #define	VM_OBJECT_LOCK_DOWNGRADE(object)				\
 	rw_downgrade(&(object)->lock)
 #define	VM_OBJECT_RLOCK(object)						\
 	rw_rlock(&(object)->lock)
 #define	VM_OBJECT_RUNLOCK(object)					\
 	rw_runlock(&(object)->lock)
 #define	VM_OBJECT_SLEEP(object, wchan, pri, wmesg, timo)		\
 	rw_sleep((wchan), &(object)->lock, (pri), (wmesg), (timo))
 #define	VM_OBJECT_TRYRLOCK(object)					\
 	rw_try_rlock(&(object)->lock)
 #define	VM_OBJECT_TRYWLOCK(object)					\
 	rw_try_wlock(&(object)->lock)
 #define	VM_OBJECT_TRYUPGRADE(object)					\
 	rw_try_upgrade(&(object)->lock)
 #define	VM_OBJECT_WLOCK(object)						\
 	rw_wlock(&(object)->lock)
 #define	VM_OBJECT_WOWNED(object)					\
 	rw_wowned(&(object)->lock)
 #define	VM_OBJECT_WUNLOCK(object)					\
 	rw_wunlock(&(object)->lock)
 
 /*
  *	The object must be locked or thread private.
  */
 static __inline void
 vm_object_set_flag(vm_object_t object, u_short bits)
 {
 
 	object->flags |= bits;
 }
 
 /*
  *	Conditionally set the object's color, which (1) enables the allocation
  *	of physical memory reservations for anonymous objects and larger-than-
  *	superpage-sized named objects and (2) determines the first page offset
  *	within the object at which a reservation may be allocated.  In other
  *	words, the color determines the alignment of the object with respect
  *	to the largest superpage boundary.  When mapping named objects, like
  *	files or POSIX shared memory objects, the color should be set to zero
  *	before a virtual address is selected for the mapping.  In contrast,
  *	for anonymous objects, the color may be set after the virtual address
  *	is selected.
  *
  *	The object must be locked.
  */
 static __inline void
 vm_object_color(vm_object_t object, u_short color)
 {
 
 	if ((object->flags & OBJ_COLORED) == 0) {
 		object->pg_color = color;
 		object->flags |= OBJ_COLORED;
 	}
 }
 
 void vm_object_clear_flag(vm_object_t object, u_short bits);
 void vm_object_pip_add(vm_object_t object, short i);
 void vm_object_pip_subtract(vm_object_t object, short i);
 void vm_object_pip_wakeup(vm_object_t object);
 void vm_object_pip_wakeupn(vm_object_t object, short i);
 void vm_object_pip_wait(vm_object_t object, char *waitid);
 
 void umtx_shm_object_init(vm_object_t object);
 void umtx_shm_object_terminated(vm_object_t object);
 extern int umtx_shm_vnobj_persistent;
 
 vm_object_t vm_object_allocate (objtype_t, vm_pindex_t);
 boolean_t vm_object_coalesce(vm_object_t, vm_ooffset_t, vm_size_t, vm_size_t,
    boolean_t);
 void vm_object_collapse (vm_object_t);
 void vm_object_deallocate (vm_object_t);
 void vm_object_destroy (vm_object_t);
 void vm_object_terminate (vm_object_t);
 void vm_object_set_writeable_dirty (vm_object_t);
 void vm_object_init (void);
 void vm_object_madvise(vm_object_t, vm_pindex_t, vm_pindex_t, int);
 boolean_t vm_object_page_clean(vm_object_t object, vm_ooffset_t start,
     vm_ooffset_t end, int flags);
 void vm_object_page_noreuse(vm_object_t object, vm_pindex_t start,
     vm_pindex_t end);
 void vm_object_page_remove(vm_object_t object, vm_pindex_t start,
     vm_pindex_t end, int options);
 boolean_t vm_object_populate(vm_object_t, vm_pindex_t, vm_pindex_t);
 void vm_object_print(long addr, boolean_t have_addr, long count, char *modif);
 void vm_object_reference (vm_object_t);
 void vm_object_reference_locked(vm_object_t);
 int  vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr);
 void vm_object_shadow (vm_object_t *, vm_ooffset_t *, vm_size_t);
 void vm_object_split(vm_map_entry_t);
 boolean_t vm_object_sync(vm_object_t, vm_ooffset_t, vm_size_t, boolean_t,
     boolean_t);
 void vm_object_unwire(vm_object_t object, vm_ooffset_t offset,
     vm_size_t length, uint8_t queue);
 struct vnode *vm_object_vnode(vm_object_t object);
 #endif				/* _KERNEL */
 
 #endif				/* _VM_OBJECT_ */
Index: head/sys/vm/vm_pager.h
===================================================================
--- head/sys/vm/vm_pager.h	(revision 309709)
+++ head/sys/vm/vm_pager.h	(revision 309710)
@@ -1,192 +1,211 @@
 /*-
  * Copyright (c) 1990 University of Utah.
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vm_pager.h	8.4 (Berkeley) 1/12/94
  * $FreeBSD$
  */
 
 /*
  * Pager routine interface definition.
  */
 
 #ifndef	_VM_PAGER_
 #define	_VM_PAGER_
 
 #include <sys/queue.h>
 
 TAILQ_HEAD(pagerlst, vm_object);
 
 typedef void pgo_init_t(void);
 typedef vm_object_t pgo_alloc_t(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t,
     struct ucred *);
 typedef void pgo_dealloc_t(vm_object_t);
 typedef int pgo_getpages_t(vm_object_t, vm_page_t *, int, int *, int *);
 typedef void pgo_getpages_iodone_t(void *, vm_page_t *, int, int);
 typedef int pgo_getpages_async_t(vm_object_t, vm_page_t *, int, int *, int *,
     pgo_getpages_iodone_t, void *);
 typedef void pgo_putpages_t(vm_object_t, vm_page_t *, int, int, int *);
 typedef boolean_t pgo_haspage_t(vm_object_t, vm_pindex_t, int *, int *);
+typedef int pgo_populate_t(vm_object_t, vm_pindex_t, int, vm_prot_t,
+    vm_pindex_t *, vm_pindex_t *);
 typedef void pgo_pageunswapped_t(vm_page_t);
 
 struct pagerops {
 	pgo_init_t		*pgo_init;		/* Initialize pager. */
 	pgo_alloc_t		*pgo_alloc;		/* Allocate pager. */
 	pgo_dealloc_t		*pgo_dealloc;		/* Disassociate. */
 	pgo_getpages_t		*pgo_getpages;		/* Get (read) page. */
 	pgo_getpages_async_t	*pgo_getpages_async;	/* Get page asyncly. */
 	pgo_putpages_t		*pgo_putpages;		/* Put (write) page. */
 	pgo_haspage_t		*pgo_haspage;		/* Query page. */
+	pgo_populate_t		*pgo_populate;		/* Bulk spec pagein. */
 	pgo_pageunswapped_t	*pgo_pageunswapped;
 };
 
 extern struct pagerops defaultpagerops;
 extern struct pagerops swappagerops;
 extern struct pagerops vnodepagerops;
 extern struct pagerops devicepagerops;
 extern struct pagerops physpagerops;
 extern struct pagerops sgpagerops;
 extern struct pagerops mgtdevicepagerops;
 
 /*
  * get/put return values
  * OK	 operation was successful
  * BAD	 specified data was out of the accepted range
  * FAIL	 specified data was in range, but doesn't exist
  * PEND	 operations was initiated but not completed
  * ERROR error while accessing data that is in range and exists
  * AGAIN temporary resource shortage prevented operation from happening
  */
 #define	VM_PAGER_OK	0
 #define	VM_PAGER_BAD	1
 #define	VM_PAGER_FAIL	2
 #define	VM_PAGER_PEND	3
 #define	VM_PAGER_ERROR	4
 #define VM_PAGER_AGAIN	5
 
 #define	VM_PAGER_PUT_SYNC		0x0001
 #define	VM_PAGER_PUT_INVAL		0x0002
 #define	VM_PAGER_PUT_NOREUSE		0x0004
 #define VM_PAGER_CLUSTER_OK		0x0008
 
 #ifdef _KERNEL
 
 extern struct pagerops *pagertab[];
 extern struct mtx_padalign pbuf_mtx;
 
 vm_object_t vm_pager_allocate(objtype_t, void *, vm_ooffset_t, vm_prot_t,
     vm_ooffset_t, struct ucred *);
 void vm_pager_bufferinit(void);
 void vm_pager_deallocate(vm_object_t);
 int vm_pager_get_pages(vm_object_t, vm_page_t *, int, int *, int *);
 int vm_pager_get_pages_async(vm_object_t, vm_page_t *, int, int *, int *,
     pgo_getpages_iodone_t, void *);
 void vm_pager_init(void);
 vm_object_t vm_pager_object_lookup(struct pagerlst *, void *);
 
 static __inline void
 vm_pager_put_pages(
 	vm_object_t object,
 	vm_page_t *m,
 	int count,
 	int flags,
 	int *rtvals
 ) {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	(*pagertab[object->type]->pgo_putpages)
 	    (object, m, count, flags, rtvals);
 }
 
 /*
  *	vm_pager_haspage
  *
  *	Check to see if an object's pager has the requested page.  The
  *	object's pager will also set before and after to give the caller
  *	some idea of the number of pages before and after the requested
  *	page can be I/O'd efficiently.
  *
  *	The object must be locked.
  */
 static __inline boolean_t
 vm_pager_has_page(
 	vm_object_t object,
 	vm_pindex_t offset, 
 	int *before,
 	int *after
 ) {
 	boolean_t ret;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	ret = (*pagertab[object->type]->pgo_haspage)
 	    (object, offset, before, after);
 	return (ret);
 } 
 
+static __inline int
+vm_pager_populate(vm_object_t object, vm_pindex_t pidx, int fault_type,
+    vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last)
+{
+
+	MPASS((object->flags & OBJ_POPULATE) != 0);
+	MPASS(pidx < object->size);
+	MPASS(object->paging_in_progress > 0);
+	return ((*pagertab[object->type]->pgo_populate)(object, pidx,
+	    fault_type, max_prot, first, last));
+}
+
+
 /* 
  *      vm_pager_page_unswapped
  * 
  *	Destroy swap associated with the page.
  * 
  *	The object containing the page must be locked.
  *      This function may not block.
  *
  *	XXX: A much better name would be "vm_pager_page_dirtied()"
  *	XXX: It is not obvious if this could be profitably used by any
  *	XXX: pagers besides the swap_pager or if it should even be a
  *	XXX: generic pager_op in the first place.
  */
 static __inline void
 vm_pager_page_unswapped(vm_page_t m)
 {
 
 	VM_OBJECT_ASSERT_LOCKED(m->object);
 	if (pagertab[m->object->type]->pgo_pageunswapped)
 		(*pagertab[m->object->type]->pgo_pageunswapped)(m);
 }
 
 struct cdev_pager_ops {
 	int (*cdev_pg_fault)(vm_object_t vm_obj, vm_ooffset_t offset,
 	    int prot, vm_page_t *mres);
+	int (*cdev_pg_populate)(vm_object_t vm_obj, vm_pindex_t pidx,
+	    int fault_type, vm_prot_t max_prot, vm_pindex_t *first,
+	    vm_pindex_t *last);
 	int (*cdev_pg_ctor)(void *handle, vm_ooffset_t size, vm_prot_t prot,
 	    vm_ooffset_t foff, struct ucred *cred, u_short *color);
 	void (*cdev_pg_dtor)(void *handle);
 };
 
 vm_object_t cdev_pager_allocate(void *handle, enum obj_type tp,
     struct cdev_pager_ops *ops, vm_ooffset_t size, vm_prot_t prot,
     vm_ooffset_t foff, struct ucred *cred);
 vm_object_t cdev_pager_lookup(void *handle);
 void cdev_pager_free_page(vm_object_t object, vm_page_t m);
 
 #endif				/* _KERNEL */
 #endif				/* _VM_PAGER_ */