Index: head/sys/amd64/amd64/pmap.c
===================================================================
--- head/sys/amd64/amd64/pmap.c	(revision 17333)
+++ head/sys/amd64/amd64/pmap.c	(revision 17334)
@@ -1,2549 +1,2669 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
- *	$Id: pmap.c,v 1.111 1996/07/28 20:31:27 dyson Exp $
+ *	$Id: pmap.c,v 1.113 1996/07/29 14:22:46 dyson Exp $
  */
 
 /*
  *	Manages physical address maps.
  *
  *	In addition to hardware address maps, this
  *	module is called upon to provide software-use-only
  *	maps which may or may not be stored in the same
  *	form as hardware maps.  These pseudo-maps are
  *	used to store intermediate results from copy
  *	operations to and from address spaces.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/msgbuf.h>
 #include <sys/queue.h>
 #include <sys/vmmeter.h>
 #include <sys/mman.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 
 #include <machine/pcb.h>
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 
 #define PMAP_KEEP_PDIRS
 
 #if defined(DIAGNOSTIC)
 #define PMAP_DIAGNOSTIC
 #endif
 
-#if !defined(SMALL_KERNEL)
-#define PMAP_INLINE __inline
-#else
-#define PMAP_INLINE
-#endif
-
 static void	init_pv_entries __P((int));
 
 /*
  * Get PDEs and PTEs for user/kernel address space
  */
 #define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
 
 #define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
 #define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
 #define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
 #define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
 #define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
 
 #define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
 
 /*
  * Given a map and a machine independent protection code,
  * convert to a vax protection code.
  */
 #define pte_prot(m, p)	(protection_codes[p])
 static int protection_codes[8];
 
 static struct pmap kernel_pmap_store;
 pmap_t kernel_pmap;
 
 vm_offset_t avail_start;	/* PA of first available physical page */
 vm_offset_t avail_end;		/* PA of last available physical page */
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
 static vm_offset_t vm_first_phys;
 
 static int nkpt;
 static vm_page_t nkpg;
 vm_offset_t kernel_vm_end;
 
 extern vm_offset_t clean_sva, clean_eva;
 extern int cpu_class;
 
 #define PV_FREELIST_MIN ((PAGE_SIZE / sizeof (struct pv_entry)) / 2)
 
 /*
  * Data for the pv entry allocation mechanism
  */
 static int pv_freelistcnt;
-TAILQ_HEAD (,pv_entry) pv_freelist;
+static pv_entry_t pv_freelist;
 static vm_offset_t pvva;
 static int npvvapg;
 
 /*
  * All those kernel PT submaps that BSD is so fond of
  */
 pt_entry_t *CMAP1;
 static pt_entry_t *CMAP2, *ptmmap;
+static pv_entry_t *pv_table;
 caddr_t CADDR1, ptvmmap;
 static caddr_t CADDR2;
 static pt_entry_t *msgbufmap;
 struct msgbuf *msgbufp;
 
 pt_entry_t *PMAP1;
 unsigned *PADDR1;
 
 static void	free_pv_entry __P((pv_entry_t pv));
-static unsigned * get_ptbase __P((pmap_t pmap));
+static __inline unsigned * get_ptbase __P((pmap_t pmap));
 static pv_entry_t get_pv_entry __P((void));
 static void	i386_protection_init __P((void));
 static void	pmap_alloc_pv_entry __P((void));
+static void	pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem));
 
 static int	pmap_is_managed __P((vm_offset_t pa));
-static int	pmap_remove_all __P((vm_offset_t pa));
-static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
-				      vm_offset_t pa, vm_page_t mpte));
+static void	pmap_remove_all __P((vm_offset_t pa));
+static void	pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
+				      vm_offset_t pa));
 static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq,
 					vm_offset_t sva));
 static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
-static int pmap_remove_entry __P((struct pmap *pmap, pv_table_t *pv,
+static __inline int pmap_remove_entry __P((struct pmap *pmap, pv_entry_t *pv,
 					vm_offset_t va));
-static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
+static boolean_t pmap_testbit __P((vm_offset_t pa, int bit));
+static __inline void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
 		vm_page_t mpte, vm_offset_t pa));
 
-static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
+static __inline vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
 
-static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
+static __inline int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
 static vm_page_t _pmap_allocpte __P((pmap_t pmap, int ptepindex));
-unsigned * __pure pmap_pte_quick __P((pmap_t pmap, vm_offset_t va));
-int pmap_tcbit __P((vm_offset_t pa, int bit));
-static vm_page_t pmap_page_alloc __P((vm_object_t object, vm_pindex_t pindex));
 
-#define PDSTACKMAX 6
+#define VATRACK 4
+#define PDSTACKMAX 16
 static vm_offset_t pdstack[PDSTACKMAX];
 static int pdstackptr;
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  *
  *	On the i386 this is called after mapping has already been enabled
  *	and just syncs the pmap module with what has already been done.
  *	[We can't call it easily with mapping off since the kernel is not
  *	mapped with PA == VA, hence we would have to relocate every address
  *	from the linked base (virtual) address "KERNBASE" to the actual
  *	(physical) address starting relative to 0]
  */
 void
 pmap_bootstrap(firstaddr, loadaddr)
 	vm_offset_t firstaddr;
 	vm_offset_t loadaddr;
 {
 	vm_offset_t va;
 	pt_entry_t *pte;
 
 	avail_start = firstaddr;
 
 	/*
 	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
 	 * large. It should instead be correctly calculated in locore.s and
 	 * not based on 'first' (which is a physical address, not a virtual
 	 * address, for the start of unused physical memory). The kernel
 	 * page tables are NOT double mapped and thus should not be included
 	 * in this calculation.
 	 */
 	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
 	/*
 	 * Initialize protection array.
 	 */
 	i386_protection_init();
 
 	/*
 	 * The kernel's pmap is statically allocated so we don't have to use
 	 * pmap_create, which is unlikely to work correctly at this part of
 	 * the boot sequence (XXX and which no longer exists).
 	 */
 	kernel_pmap = &kernel_pmap_store;
 
 	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + IdlePTD);
 
 	kernel_pmap->pm_count = 1;
-	TAILQ_INIT(&kernel_pmap->pm_pvlist.pv_list);
 	nkpt = NKPT;
 
 	/*
 	 * Reserve some special page table entries/VA space for temporary
 	 * mapping of pages.
 	 */
 #define	SYSMAP(c, p, v, n)	\
 	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
 
 	va = virtual_avail;
 	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
 
 	/*
 	 * CMAP1/CMAP2 are used for zeroing and copying pages.
 	 */
 	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
 	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
 
 	/*
 	 * ptmmap is used for reading arbitrary physical pages via /dev/mem.
 	 */
 	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
 
 	/*
 	 * msgbufmap is used to map the system message buffer.
 	 */
 	SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 1)
 
 	/*
 	 * ptemap is used for pmap_pte_quick
 	 */
 	SYSMAP(unsigned *, PMAP1, PADDR1, 1);
 
 	virtual_avail = va;
 
 	*(int *) CMAP1 = *(int *) CMAP2 = *(int *) PTD = 0;
 	pmap_update();
 
 }
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  *	pmap_init has been enhanced to support in a fairly consistant
  *	way, discontiguous physical memory.
  */
 void
 pmap_init(phys_start, phys_end)
 	vm_offset_t phys_start, phys_end;
 {
 	vm_offset_t addr;
 	vm_size_t npg, s;
 	int i;
 
 	/*
 	 * calculate the number of pv_entries needed
 	 */
 	vm_first_phys = phys_avail[0];
 	for (i = 0; phys_avail[i + 1]; i += 2);
 	npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / PAGE_SIZE;
 
 	/*
 	 * Allocate memory for random pmap data structures.  Includes the
 	 * pv_head_table.
 	 */
-	s = (vm_size_t) (sizeof(pv_table_t) * npg);
+	s = (vm_size_t) (sizeof(struct pv_entry *) * npg);
 	s = round_page(s);
 	addr = (vm_offset_t) kmem_alloc(kernel_map, s);
-	pv_table = (pv_table_t *) addr;
-	for(i=0;i<npg;i++) {
-		pv_table[i].pv_list_count = 0;
-		TAILQ_INIT(&pv_table[i].pv_list);
-	}
-	TAILQ_INIT(&pv_freelist);
+	pv_table = (pv_entry_t *) addr;
 
 	/*
 	 * init the pv free list
 	 */
 	init_pv_entries(npg);
 	/*
 	 * Now it is safe to enable pv_table recording.
 	 */
 	pmap_initialized = TRUE;
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	For now, VM is already on, we only need to map the
  *	specified memory.
  */
 vm_offset_t
 pmap_map(virt, start, end, prot)
 	vm_offset_t virt;
 	vm_offset_t start;
 	vm_offset_t end;
 	int prot;
 {
 	while (start < end) {
 		pmap_enter(kernel_pmap, virt, start, prot, FALSE);
 		virt += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
 	return (virt);
 }
 
 
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
 
 #if defined(PMAP_DIAGNOSTIC)
 
 /*
  * This code checks for non-writeable/modified pages.
  * This should be an invalid condition.
  */
 static int
 pmap_nw_modified(pt_entry_t ptea) {
 	int pte;
 
 	pte = (int) ptea;
 
 	if ((pte & (PG_M|PG_RW)) == PG_M)
 		return 1;
 	else
 		return 0;
 }
 #endif
 
 
 /*
  * this routine defines the region(s) of memory that should
  * not be tested for the modified bit.
  */
-static PMAP_INLINE int
+static __inline int
 pmap_track_modified( vm_offset_t va) {
 	if ((va < clean_sva) || (va >= clean_eva)) 
 		return 1;
 	else
 		return 0;
 }
 
 /*
  * The below are finer grained pmap_update routines.  These eliminate
  * the gratuitious tlb flushes on non-i386 architectures.
  */
-static PMAP_INLINE void
+static __inline void
 pmap_update_1pg( vm_offset_t va) {
 #if defined(I386_CPU)
 	if (cpu_class == CPUCLASS_386)
 		pmap_update();
 	else
 #endif
 		__asm __volatile(".byte 0xf,0x1,0x38": :"a" (va));
 }
 
-static PMAP_INLINE void
+static __inline void
 pmap_update_2pg( vm_offset_t va1, vm_offset_t va2) {
 #if defined(I386_CPU)
 	if (cpu_class == CPUCLASS_386) {
 		pmap_update();
 	} else
 #endif
 	{
 		__asm __volatile(".byte 0xf,0x1,0x38": :"a" (va1));
 		__asm __volatile(".byte 0xf,0x1,0x38": :"a" (va2));
 	}
 }
 
 static __pure unsigned *
 get_ptbase(pmap)
 	pmap_t pmap;
 {
 	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 
 	/* are we current address space or kernel? */
 	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
 		return (unsigned *) PTmap;
 	}
 	/* otherwise, we are alternate address space */
 	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
 		APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
 		pmap_update();
 	}
 	return (unsigned *) APTmap;
 }
 
 /*
  *	Routine:	pmap_pte
  *	Function:
  *		Extract the page table entry associated
  *		with the given map/virtual_address pair.
  */
 
-unsigned * __pure
+__inline unsigned * __pure
 pmap_pte(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	if (pmap && *pmap_pde(pmap, va)) {
 		return get_ptbase(pmap) + i386_btop(va);
 	}
 	return (0);
 }
 
 /*
  * Super fast pmap_pte routine best used when scanning
  * the pv lists.  This eliminates many coarse-grained
  * pmap_update calls.
  */
-unsigned * __pure
+__inline unsigned * __pure
 pmap_pte_quick(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
-	unsigned pde, newpf;
+	unsigned pde;
 	if (pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) {
 		unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 		/* are we current address space or kernel? */
 		if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
 			return (unsigned *) PTmap + i386_btop(va);
 		}
-		newpf = pde & PG_FRAME;
-		if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
-			* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
-			pmap_update_1pg((vm_offset_t) PADDR1);
-		}
+		* (int *) PMAP1 = (pde & PG_FRAME) | PG_V | PG_RW;
+		pmap_update_1pg((vm_offset_t) PADDR1);
 		return PADDR1 + ((unsigned) i386_btop(va) & (NPTEPG - 1));
 	}
 	return (0);
 }
+	
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_offset_t __pure
 pmap_extract(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	if (pmap && *pmap_pde(pmap, va)) {
 		unsigned *pte;
 		pte = get_ptbase(pmap) + i386_btop(va);
 		return ((*pte & PG_FRAME) | (va & PAGE_MASK));
 	}
 	return 0;
 
 }
 
 /*
  * determine if a page is managed (memory vs. device)
  */
-static PMAP_INLINE __pure int
+static __inline __pure int
 pmap_is_managed(pa)
 	vm_offset_t pa;
 {
 	int i;
 
 	if (!pmap_initialized)
 		return 0;
 
 	for (i = 0; phys_avail[i + 1]; i += 2) {
 		if (pa < phys_avail[i + 1] && pa >= phys_avail[i])
 			return 1;
 	}
 	return 0;
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  */
 void
 pmap_qenter(va, m, count)
 	vm_offset_t va;
 	vm_page_t *m;
 	int count;
 {
 	int i;
 	register unsigned *pte;
 
 	for (i = 0; i < count; i++) {
 		vm_offset_t tva = va + i * PAGE_SIZE;
 		unsigned npte = VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V;
 		unsigned opte;
 		pte = (unsigned *)vtopte(tva);
 		opte = *pte;
 		*pte = npte;
 		if (opte)
 			pmap_update_1pg(tva);
 	}
 }
 /*
  * this routine jerks page mappings from the
  * kernel -- it is meant only for temporary mappings.
  */
 void
 pmap_qremove(va, count)
 	vm_offset_t va;
 	int count;
 {
 	int i;
 	register unsigned *pte;
 
 	for (i = 0; i < count; i++) {
 		pte = (unsigned *)vtopte(va);
 		*pte = 0;
 		pmap_update_1pg(va);
 		va += PAGE_SIZE;
 	}
 }
 
 /*
  * add a wired page to the kva
  * note that in order for the mapping to take effect -- you
  * should do a pmap_update after doing the pmap_kenter...
  */
-PMAP_INLINE void 
+__inline void 
 pmap_kenter(va, pa)
 	vm_offset_t va;
 	register vm_offset_t pa;
 {
 	register unsigned *pte;
 	unsigned npte, opte;
 
 	npte = pa | PG_RW | PG_V;
 	pte = (unsigned *)vtopte(va);
 	opte = *pte;
 	*pte = npte;
 	if (opte)
 		pmap_update_1pg(va);
 }
 
 /*
  * remove a page from the kernel pagetables
  */
-PMAP_INLINE void
+__inline void
 pmap_kremove(va)
 	vm_offset_t va;
 {
 	register unsigned *pte;
 
 	pte = (unsigned *)vtopte(va);
 	*pte = 0;
 	pmap_update_1pg(va);
 }
 
-static vm_page_t
-pmap_page_alloc(object, pindex)
-	vm_object_t object;
-	vm_pindex_t pindex;
+
+/***************************************************
+ * Page table page management routines.....
+ ***************************************************/
+
+/*
+ * This routine unholds page table pages, and if the hold count
+ * drops to zero, then it decrements the wire count.
+ */
+static __inline int
+pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
+	vm_page_unhold(m);
+	if (m->hold_count == 0) {
+		vm_offset_t pteva;
+		/*
+		 * unmap the page table page
+		 */
+		pmap->pm_pdir[m->pindex] = 0;
+		--pmap->pm_stats.resident_count;
+		/*
+		 * Do a pmap_update to make the invalidated mapping
+		 * take effect immediately.
+		 */
+		pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
+		pmap_update_1pg(pteva);
+		/*
+		 * If the page is finally unwired, simply free it.
+		 */
+		--m->wire_count;
+		if (m->wire_count == 0) {
+			vm_page_free_zero(m);
+			--cnt.v_wire_count;
+		}
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * After removing a page table entry, this routine is used to
+ * conditionally free the page, and manage the hold/wire counts.
+ */
+int
+pmap_unuse_pt(pmap, va, mpte)
+	pmap_t pmap;
+	vm_offset_t va;
+	vm_page_t mpte;
 {
-	vm_page_t m;
-	m = vm_page_alloc(object, pindex, VM_ALLOC_ZERO);
-	if (m == NULL) {
-		VM_WAIT;
+	if (va >= UPT_MIN_ADDRESS)
+		return 0;
+
+	if (mpte == NULL) {
+		vm_offset_t ptepa;
+		ptepa = ((vm_offset_t) *pmap_pde(pmap, va));
+#if defined(PMAP_DIAGNOSTIC)
+		if (!ptepa)
+			panic("pmap_unuse_pt: pagetable page missing, va: 0x%x", va);
+#endif
+		if (!ptepa)
+			return 0;
+		mpte = PHYS_TO_VM_PAGE(ptepa);
 	}
-	return m;
+
+#if defined(PMAP_DIAGNOSTIC)
+	if (mpte->pindex != (va >> PDRSHIFT))
+		panic("pmap_unuse_pt: pindex(0x%x) != va(0x%x)",
+			mpte->pindex, (va >> PDRSHIFT));
+
+	if (mpte->hold_count == 0) {
+		panic("pmap_unuse_pt: hold count < 0, va: 0x%x", va);
+	}
+#endif
+
+	return pmap_unwire_pte_hold(pmap, mpte);
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 void
 pmap_pinit(pmap)
 	register struct pmap *pmap;
 {
 	vm_page_t ptdpg;
 	/*
 	 * No need to allocate page table space yet but we do need a valid
 	 * page directory table.
 	 */
 
 	if (pdstackptr > 0) {
 		--pdstackptr;
 		pmap->pm_pdir = (pd_entry_t *)pdstack[pdstackptr];
 	} else {
 		pmap->pm_pdir =
 			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 	}
 
 	/*
 	 * allocate object for the ptes
 	 */
 	pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
 
 	/*
 	 * allocate the page directory page
 	 */
 retry:
-	ptdpg = pmap_page_alloc( pmap->pm_pteobj, PTDPTDI);
-	if (ptdpg == NULL)
+	ptdpg = vm_page_alloc( pmap->pm_pteobj, PTDPTDI, VM_ALLOC_ZERO);
+	if (ptdpg == NULL) {
+		VM_WAIT;
 		goto retry;
-
-	ptdpg->wire_count = 1;
-	++cnt.v_wire_count;
+	}
+	vm_page_wire(ptdpg);
 	ptdpg->flags &= ~(PG_MAPPED|PG_BUSY);	/* not mapped normally */
 	ptdpg->valid = VM_PAGE_BITS_ALL;
 
 	pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
 	if ((ptdpg->flags & PG_ZERO) == 0)
 		bzero(pmap->pm_pdir, PAGE_SIZE);
 
 	/* wire in kernel global address entries */
 	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
 
 	/* install self-referential address mapping entry */
 	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
 		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW;
 
 	pmap->pm_count = 1;
-	TAILQ_INIT(&pmap->pm_pvlist.pv_list);
 }
 
 static int
 pmap_release_free_page(pmap, p)
 	struct pmap *pmap;
 	vm_page_t p;
 {
 	int s;
 	unsigned *pde = (unsigned *) pmap->pm_pdir;
 	/*
 	 * This code optimizes the case of freeing non-busy
 	 * page-table pages.  Those pages are zero now, and
 	 * might as well be placed directly into the zero queue.
 	 */
 	s = splvm();
 	if (p->flags & PG_BUSY) {
 		p->flags |= PG_WANTED;
 		tsleep(p, PVM, "pmaprl", 0);
 		splx(s);
 		return 0;
 	}
 
 	/*
 	 * Remove the page table page from the processes address space.
 	 */
 	pde[p->pindex] = 0;
 	--pmap->pm_stats.resident_count;
 
 	if (p->hold_count)  {
+		int *kvap;
+		int i;
+#if defined(PMAP_DIAGNOSTIC)
 		panic("pmap_release: freeing held page table page");
+#else
+		printf("pmap_release: freeing held page table page:\n");
+#endif
+		kvap = (int *)vm_pager_map_page(p);
+		for(i=0;i<NPTEPG;i++) {
+			if (kvap[i]) {
+				printf("pte: 0x%x, index: %d\n", kvap[i],i);
+			}
+		}
+		vm_pager_unmap_page((vm_offset_t)kvap);
+
+		/*
+		 * HACK ALERT!!!
+		 * If this failure happens, we must clear the page, because
+		 * there is likely a mapping still valid.  This condition
+		 * is an error, but at least this zero operation will mitigate
+		 * some Sig-11's or crashes, because this page is thought
+		 * to be zero.  This is a robustness fix, and not meant to
+		 * be a long term work-around.
+		 */
+		pmap_zero_page(VM_PAGE_TO_PHYS(p));
 	}
 	/*
 	 * Page directory pages need to have the kernel
 	 * stuff cleared, so they can go into the zero queue also.
 	 */
 	if (p->pindex == PTDPTDI) {
 		bzero(pde + KPTDI, nkpt * PTESIZE);
 		pde[APTDPTDI] = 0;
 		pmap_kremove((vm_offset_t) pmap->pm_pdir);
 	}
 
 	vm_page_free_zero(p);
 	splx(s);
 	return 1;
 }
 
 /*
  * this routine is called if the page table page is not
  * mapped correctly.
  */
 static vm_page_t
 _pmap_allocpte(pmap, ptepindex)
 	pmap_t	pmap;
 	int ptepindex;
 {
-	vm_offset_t ptepa;
+	vm_offset_t pteva, ptepa;
 	vm_page_t m;
 
 	/*
 	 * Find or fabricate a new pagetable page
 	 */
 retry:
 	m = vm_page_lookup(pmap->pm_pteobj, ptepindex);
 	if (m == NULL) {
-		m = pmap_page_alloc(pmap->pm_pteobj, ptepindex);
-		if (m == NULL)
+		m = vm_page_alloc(pmap->pm_pteobj, ptepindex, VM_ALLOC_ZERO);
+		if (m == NULL) {
+			VM_WAIT;
 			goto retry;
+		}
 		if ((m->flags & PG_ZERO) == 0)
 			pmap_zero_page(VM_PAGE_TO_PHYS(m));
 		m->flags &= ~(PG_ZERO|PG_BUSY);
 		m->valid = VM_PAGE_BITS_ALL;
 	} else {
 		if ((m->flags & PG_BUSY) || m->busy) {
 			m->flags |= PG_WANTED;
 			tsleep(m, PVM, "ptewai", 0);
 			goto retry;
 		}
 	}
 
+	/*
+	 * mark the object writeable
+	 */
+	pmap->pm_pteobj->flags |= OBJ_WRITEABLE;
+
 	if (m->queue != PQ_NONE) {
 		int s = splvm();
-		vm_page_unqueue(m,1);
+		vm_page_unqueue(m);
 		splx(s);
 	}
 
-	if (m->wire_count == 0)
-		++cnt.v_wire_count;
-	++m->wire_count;
-
+	if (m->hold_count == 0) {
+		if (m->wire_count == 0)
+			++cnt.v_wire_count;
+		++m->wire_count;
+	}
 	/*
 	 * Increment the hold count for the page table page
 	 * (denoting a new mapping.)
 	 */
 	++m->hold_count;
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	pmap->pm_stats.resident_count++;
 
 	ptepa = VM_PAGE_TO_PHYS(m);
 	pmap->pm_pdir[ptepindex] = (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V);
 
+	pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
+	pmap_update_1pg(pteva);
 	m->flags |= PG_MAPPED;
 
 	return m;
 }
 
-PMAP_INLINE static vm_page_t
+static __inline vm_page_t
 pmap_allocpte(pmap, va)
 	pmap_t	pmap;
 	vm_offset_t va;
 {
 	int ptepindex;
 	vm_offset_t ptepa;
 	vm_page_t m;
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = va >> PDRSHIFT;
 
 	/*
 	 * Get the page directory entry
 	 */
 	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 
 	/*
 	 * If the page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (ptepa) {
-		m = vm_page_lookup( pmap->pm_pteobj, ptepindex);
+		m = PHYS_TO_VM_PAGE(ptepa);
 		++m->hold_count;
 		return m;
 	}
 	/*
 	 * Here if the pte page isn't mapped, or if it has been deallocated.
 	 */
 	return _pmap_allocpte(pmap, ptepindex);
 }
 
 
 /***************************************************
 * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap)
 	register struct pmap *pmap;
 {
 	vm_page_t p,n,ptdpg;
 	vm_object_t object = pmap->pm_pteobj;
 
 	if (object->ref_count != 1)
 		panic("pmap_release: pteobj reference count != 1");
 	
 	ptdpg = NULL;
 retry:
 	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
 		n = TAILQ_NEXT(p, listq);
 		if (p->pindex == PTDPTDI) {
 			ptdpg = p;
 			continue;
 		}
 		if (!pmap_release_free_page(pmap, p))
 			goto retry;
 	}
 	if (ptdpg == NULL)
 		panic("pmap_release: missing page table directory page");
 
 	if (!pmap_release_free_page(pmap, ptdpg))
 		goto retry;
 		
 	vm_object_deallocate(object);
 	if (pdstackptr < PDSTACKMAX) {
 		pdstack[pdstackptr] = (vm_offset_t) pmap->pm_pdir;
 		++pdstackptr;
 	} else {
 		kmem_free(kernel_map, (vm_offset_t) pmap->pm_pdir, PAGE_SIZE);
 	}
 }
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	struct proc *p;
 	struct pmap *pmap;
 	int s;
 
 	s = splhigh();
 	if (kernel_vm_end == 0) {
 		kernel_vm_end = KERNBASE;
 		nkpt = 0;
 		while (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			++nkpt;
 		}
 	}
 	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 	while (kernel_vm_end < addr) {
 		if (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			continue;
 		}
 		++nkpt;
 		if (!nkpg) {
 			nkpg = vm_page_alloc(kernel_object, 0, VM_ALLOC_SYSTEM);
 			if (!nkpg)
 				panic("pmap_growkernel: no memory to grow kernel");
 			vm_page_wire(nkpg);
 			vm_page_remove(nkpg);
 			pmap_zero_page(VM_PAGE_TO_PHYS(nkpg));
 		}
 		pdir_pde(PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_RW);
 		nkpg = NULL;
 
 		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
 			if (p->p_vmspace) {
 				pmap = &p->p_vmspace->vm_pmap;
 				*pmap_pde(pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end);
 			}
 		}
 		*pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end);
 		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 	}
 	splx(s);
 }
 
 /*
  *	Retire the given physical map from service.
  *	Should only be called if the map contains
  *	no valid mappings.
  */
 void
 pmap_destroy(pmap)
 	register pmap_t pmap;
 {
 	int count;
 
 	if (pmap == NULL)
 		return;
 
 	count = --pmap->pm_count;
 	if (count == 0) {
 		pmap_release(pmap);
 		free((caddr_t) pmap, M_VMPMAP);
 	}
 }
 
 /*
  *	Add a reference to the specified pmap.
  */
 void
 pmap_reference(pmap)
 	pmap_t pmap;
 {
 	if (pmap != NULL) {
 		pmap->pm_count++;
 	}
 }
 
 /***************************************************
 * page management routines.
  ***************************************************/
 
 /*
  * free the pv_entry back to the free list
  */
-static PMAP_INLINE void
+static __inline void
 free_pv_entry(pv)
 	pv_entry_t pv;
 {
 	++pv_freelistcnt;
-	TAILQ_INSERT_HEAD(&pv_freelist, pv, pv_list);
+	pv->pv_next = pv_freelist;
+	pv_freelist = pv;
 }
 
 /*
  * get a new pv_entry, allocating a block from the system
  * when needed.
  * the memory allocation is performed bypassing the malloc code
  * because of the possibility of allocations at interrupt time.
  */
-static PMAP_INLINE pv_entry_t
+static __inline pv_entry_t
 get_pv_entry()
 {
 	pv_entry_t tmp;
 
 	/*
 	 * get more pv_entry pages if needed
 	 */
-	if (pv_freelistcnt < PV_FREELIST_MIN) {
+	if (pv_freelistcnt < PV_FREELIST_MIN || pv_freelist == 0) {
 		pmap_alloc_pv_entry();
 	}
-
 	/*
 	 * get a pv_entry off of the free list
 	 */
 	--pv_freelistcnt;
-	tmp = TAILQ_FIRST(&pv_freelist);
-	TAILQ_REMOVE(&pv_freelist, tmp, pv_list);
+	tmp = pv_freelist;
+	pv_freelist = tmp->pv_next;
 	return tmp;
 }
 
 /*
  * This *strange* allocation routine eliminates the possibility of a malloc
  * failure (*FATAL*) for a pv_entry_t data structure.
  * also -- this code is MUCH MUCH faster than the malloc equiv...
  * We really need to do the slab allocator thingie here.
  */
 static void
 pmap_alloc_pv_entry()
 {
 	/*
 	 * do we have any pre-allocated map-pages left?
 	 */
 	if (npvvapg) {
 		vm_page_t m;
 
 		/*
 		 * allocate a physical page out of the vm system
 		 */
 		m = vm_page_alloc(kernel_object,
 		    OFF_TO_IDX(pvva - vm_map_min(kernel_map)),
 		    VM_ALLOC_INTERRUPT);
 		if (m) {
 			int newentries;
 			int i;
 			pv_entry_t entry;
 
 			newentries = (PAGE_SIZE / sizeof(struct pv_entry));
 			/*
 			 * wire the page
 			 */
 			vm_page_wire(m);
 			m->flags &= ~PG_BUSY;
 			/*
 			 * let the kernel see it
 			 */
 			pmap_kenter(pvva, VM_PAGE_TO_PHYS(m));
 
 			entry = (pv_entry_t) pvva;
 			/*
 			 * update the allocation pointers
 			 */
 			pvva += PAGE_SIZE;
 			--npvvapg;
 
 			/*
 			 * free the entries into the free list
 			 */
 			for (i = 0; i < newentries; i++) {
 				free_pv_entry(entry);
 				entry++;
 			}
 		}
 	}
-	if (TAILQ_FIRST(&pv_freelist) == NULL)
+	if (!pv_freelist)
 		panic("get_pv_entry: cannot get a pv_entry_t");
 }
 
 /*
  * init the pv_entry allocation system
  */
 #define PVSPERPAGE 64
 void
 init_pv_entries(npg)
 	int npg;
 {
 	/*
 	 * allocate enough kvm space for PVSPERPAGE entries per page (lots)
 	 * kvm space is fairly cheap, be generous!!!  (the system can panic if
 	 * this is too small.)
 	 */
 	npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry)
 		+ PAGE_SIZE - 1) / PAGE_SIZE;
 	pvva = kmem_alloc_pageable(kernel_map, npvvapg * PAGE_SIZE);
 	/*
 	 * get the first batch of entries
 	 */
 	pmap_alloc_pv_entry();
 }
 
 /*
- * This routine unholds page table pages, and if the hold count
- * drops to zero, then it decrements the wire count.
+ * If it is the first entry on the list, it is actually
+ * in the header and we must copy the following entry up
+ * to the header.  Otherwise we must search the list for
+ * the entry.  In either case we free the now unused entry.
  */
-static int
-pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
-	vm_page_unhold(m);
-	if (m->hold_count == 0) {
-		vm_offset_t pteva;
-		/*
-		 * unmap the page table page
-		 */
-		pmap->pm_pdir[m->pindex] = 0;
-		--pmap->pm_stats.resident_count;
-		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
-			(((unsigned) PTDpde) & PG_FRAME)) {
-			/*
-			 * Do a pmap_update to make the invalidated mapping
-			 * take effect immediately.
-			 */
-			pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
-			pmap_update_1pg(pteva);
+static __inline int
+pmap_remove_entry(pmap, ppv, va)
+	struct pmap *pmap;
+	pv_entry_t *ppv;
+	vm_offset_t va;
+{
+	pv_entry_t npv;
+	int s;
+
+	s = splvm();
+	for (npv = *ppv; npv; (ppv = &npv->pv_next, npv = *ppv)) {
+		if (pmap == npv->pv_pmap && va == npv->pv_va) {
+			int rtval = pmap_unuse_pt(pmap, va, npv->pv_ptem);
+			*ppv = npv->pv_next;
+			free_pv_entry(npv);
+			splx(s);
+			return rtval;
 		}
-		/*
-		 * If the page is finally unwired, simply free it.
-		 */
-		--m->wire_count;
-		if (m->wire_count == 0) {
-			vm_page_free_zero(m);
-			--cnt.v_wire_count;
-		}
-		return 1;
 	}
+	splx(s);
 	return 0;
 }
 
 /*
- * After removing a page table entry, this routine is used to
- * conditionally free the page, and manage the hold/wire counts.
+ * Create a pv entry for page at pa for
+ * (pmap, va).
  */
-PMAP_INLINE int
-pmap_unuse_pt(pmap, va, mpte)
+static __inline void
+pmap_insert_entry(pmap, va, mpte, pa)
 	pmap_t pmap;
 	vm_offset_t va;
 	vm_page_t mpte;
+	vm_offset_t pa;
 {
-	int ptepindex;
-	if (va >= UPT_MIN_ADDRESS)
-		return 0;
 
-	if (mpte == NULL) {
-		ptepindex = (va >> PDRSHIFT);
-		mpte = vm_page_lookup( pmap->pm_pteobj, ptepindex);
-	}
-
-	return pmap_unwire_pte_hold(pmap, mpte);
-}
-
-/*
- * If it is the first entry on the list, it is actually
- * in the header and we must copy the following entry up
- * to the header.  Otherwise we must search the list for
- * the entry.  In either case we free the now unused entry.
- */
-static int
-pmap_remove_entry(pmap, ppv, va)
-	struct pmap *pmap;
-	pv_table_t *ppv;
-	vm_offset_t va;
-{
-	pv_entry_t pv;
-	int rtval;
 	int s;
+	pv_entry_t *ppv, pv;
 
 	s = splvm();
-	if (ppv->pv_list_count < pmap->pm_stats.resident_count) {
-		for (pv = TAILQ_FIRST(&ppv->pv_list);
-			pv;
-			pv = TAILQ_NEXT(pv, pv_list)) {
-			if (pmap == pv->pv_pmap && va == pv->pv_va) 
-				break;
-		}
-	} else {
-		for (pv = TAILQ_FIRST(&pmap->pm_pvlist.pv_list);
-			pv;
-			pv = TAILQ_NEXT(pv, pv_plist)) {
-			if (va == pv->pv_va) 
-				break;
-		}
-	}
+	pv = get_pv_entry();
+	pv->pv_va = va;
+	pv->pv_pmap = pmap;
+	pv->pv_ptem = mpte;
 
-	rtval = 0;
-	if (pv) {
-		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
-		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
-		--ppv->pv_list_count;
-		TAILQ_REMOVE(&pmap->pm_pvlist.pv_list, pv, pv_plist);
-		free_pv_entry(pv);
-	}
-			
+	ppv = pa_to_pvh(pa);
+	if (*ppv)
+		pv->pv_next = *ppv;
+	else
+		pv->pv_next = NULL;
+	*ppv = pv;
 	splx(s);
-	return rtval;
 }
 
 /*
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
 pmap_remove_pte(pmap, ptq, va)
 	struct pmap *pmap;
 	unsigned *ptq;
 	vm_offset_t va;
 {
 	unsigned oldpte;
+	pv_entry_t *ppv;
 
 	oldpte = *ptq;
 	*ptq = 0;
 	if (oldpte & PG_W)
 		pmap->pm_stats.wired_count -= 1;
 	pmap->pm_stats.resident_count -= 1;
 	if (oldpte & PG_MANAGED) {
 		if (oldpte & PG_M) {
 #if defined(PMAP_DIAGNOSTIC)
 			if (pmap_nw_modified((pt_entry_t) oldpte)) {
 				printf("pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, (int) oldpte);
 			}
 #endif
 			if (pmap_track_modified(va))
 				PHYS_TO_VM_PAGE(oldpte)->dirty = VM_PAGE_BITS_ALL;
 		}
-		return pmap_remove_entry(pmap, pa_to_pvh(oldpte), va);
+		ppv = pa_to_pvh(oldpte);
+		return pmap_remove_entry(pmap, ppv, va);
 	} else {
 		return pmap_unuse_pt(pmap, va, NULL);
 	}
 
 	return 0;
 }
 
 /*
  * Remove a single page from a process address space
  */
 static void
 pmap_remove_page(pmap, va)
 	struct pmap *pmap;
 	register vm_offset_t va;
 {
 	register unsigned *ptq;
 
 	/*
 	 * if there is no pte for this address, just skip it!!!
 	 */
 	if (*pmap_pde(pmap, va) == 0) {
 		return;
 	}
 
 	/*
 	 * get a local va for mappings for this pmap.
 	 */
 	ptq = get_ptbase(pmap) + i386_btop(va);
 	if (*ptq) {
 		(void) pmap_remove_pte(pmap, ptq, va);
 		pmap_update_1pg(va);
 	}
 	return;
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap, sva, eva)
 	struct pmap *pmap;
 	register vm_offset_t sva;
 	register vm_offset_t eva;
 {
 	register unsigned *ptbase;
 	vm_offset_t pdnxt;
 	vm_offset_t ptpaddr;
 	vm_offset_t sindex, eindex;
 	vm_page_t mpte;
 	int anyvalid;
+	vm_offset_t vachanged[VATRACK];
 
 	if (pmap == NULL)
 		return;
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if ((sva + PAGE_SIZE) == eva) {
 		pmap_remove_page(pmap, sva);
 		return;
 	}
 
 	anyvalid = 0;
 
 	/*
 	 * Get a local virtual address for the mappings that are being
 	 * worked with.
 	 */
 	ptbase = get_ptbase(pmap);
 
 	sindex = i386_btop(sva);
 	eindex = i386_btop(eva);
-	mpte = NULL;
 
 	for (; sindex < eindex; sindex = pdnxt) {
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 		ptpaddr = (vm_offset_t) *pmap_pde(pmap, i386_ptob(sindex));
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
+		if (sindex < i386_btop(UPT_MIN_ADDRESS)) {
 		/*
+		 * get the vm_page_t for the page table page
+		 */
+			mpte = PHYS_TO_VM_PAGE(ptpaddr);
+
+		/*
+		 * if the pte isn't wired, just skip it.
+		 */
+			if (mpte->wire_count == 0)
+				continue;
+		}
+
+		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (pdnxt > eindex) {
 			pdnxt = eindex;
 		}
 
 		for ( ;sindex != pdnxt; sindex++) {
 			vm_offset_t va;
 			if (ptbase[sindex] == 0) {
 				continue;
 			}
 			va = i386_ptob(sindex);
 			
+			if (anyvalid < VATRACK)
+				vachanged[anyvalid] = va;
 			anyvalid++;
 			if (pmap_remove_pte(pmap,
 				ptbase + sindex, va))
 				break;
 		}
 	}
 
 	if (anyvalid) {
-		pmap_update();
+		if (anyvalid <= VATRACK) {
+			int i;
+			for(i=0;i<anyvalid;i++)
+				pmap_update_1pg(vachanged[i]);
+		} else {
+			pmap_update();
+		}
 	}
+			
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
-static int
+static void
 pmap_remove_all(pa)
 	vm_offset_t pa;
 {
-	register pv_entry_t pv, npv;
-	pv_table_t *ppv;
+	register pv_entry_t pv, *ppv, npv;
 	register unsigned *pte, tpte;
 	vm_page_t m;
-	int nmodify;
 	int s;
 
-	nmodify = 0;
 #if defined(PMAP_DIAGNOSTIC)
 	/*
 	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
 	 * pages!
 	 */
 	if (!pmap_is_managed(pa)) {
 		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", pa);
 	}
 #endif
 
 	s = splvm();
 	m = NULL;
 	ppv = pa_to_pvh(pa);
-	for (pv = TAILQ_FIRST(&ppv->pv_list);
-		pv;
-		pv = npv) {
+	for (pv = *ppv; pv; pv=pv->pv_next) {
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 		if (tpte = *pte) {
 			pv->pv_pmap->pm_stats.resident_count--;
 			*pte = 0;
 			if (tpte & PG_W)
 				pv->pv_pmap->pm_stats.wired_count--;
 			/*
 			 * Update the vm_page_t clean and reference bits.
 			 */
-			if ((tpte & (PG_M|PG_MANAGED)) == (PG_M|PG_MANAGED)) {
+			if (tpte & PG_M) {
 #if defined(PMAP_DIAGNOSTIC)
 				if (pmap_nw_modified((pt_entry_t) tpte)) {
 					printf("pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", pv->pv_va, tpte);
 				}
 #endif
-				if (pmap_track_modified(pv->pv_va))
-					nmodify += 1;
+				if (pmap_track_modified(pv->pv_va)) {
+					if (m == NULL)
+						m = PHYS_TO_VM_PAGE(pa);
+					m->dirty = VM_PAGE_BITS_ALL;
+				}
 			}
 		}
-		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist.pv_list, pv, pv_plist);
+	}
 
-		npv = TAILQ_NEXT(pv, pv_list);
-		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
-		--ppv->pv_list_count;
+	for (pv = *ppv; pv; pv = npv) {
+		npv = pv->pv_next;
 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 		free_pv_entry(pv);
 	}
-
+	*ppv = NULL;
 	splx(s);
-	return nmodify;
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap, sva, eva, prot)
 	register pmap_t pmap;
 	vm_offset_t sva, eva;
 	vm_prot_t prot;
 {
 	register unsigned *ptbase;
 	vm_offset_t pdnxt;
 	vm_offset_t ptpaddr;
 	vm_offset_t sindex, eindex;
 	vm_page_t mpte;
 	int anyvalid;
 
 
 	if (pmap == NULL)
 		return;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 	if (prot & VM_PROT_WRITE)
 		return;
 
 	anyvalid = 0;
 
 	ptbase = get_ptbase(pmap);
 
 	sindex = i386_btop(sva);
 	eindex = i386_btop(eva);
 
-	mpte = NULL;
 	for (; sindex < eindex; sindex = pdnxt) {
 
 		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 		ptpaddr = (vm_offset_t) *pmap_pde(pmap, i386_ptob(sindex));
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
+		/*
+		 * Skip page ranges, where the page table page isn't wired.
+		 * If the page table page is not wired, there are no page mappings
+		 * there.
+		 */
+		if (sindex < i386_btop(UPT_MIN_ADDRESS)) {
+			mpte = PHYS_TO_VM_PAGE(ptpaddr);
+
+			if (mpte->wire_count == 0)
+				continue;
+		}
+
 		if (pdnxt > eindex) {
 			pdnxt = eindex;
 		}
 
 		for (; sindex != pdnxt; sindex++) {
 
 			unsigned pbits = ptbase[sindex];
 
-			if ((pbits & (PG_RW|PG_V)) == (PG_RW|PG_V)) {
-				if ((pbits & (PG_M|PG_MANAGED)) == (PG_M|PG_MANAGED)) {
+			if (pbits & PG_RW) {
+				if (pbits & PG_M) {
 					vm_offset_t sva = i386_ptob(sindex);
 					if (pmap_track_modified(sva)) {
 						vm_page_t m = PHYS_TO_VM_PAGE(pbits);
 						m->dirty = VM_PAGE_BITS_ALL;
 					}
 				}
 				ptbase[sindex] = pbits & ~(PG_M|PG_RW);
 				anyvalid = 1;
 			}
 		}
 	}
 	if (anyvalid)
 		pmap_update();
 }
 
 /*
- * Create a pv entry for page at pa for
- * (pmap, va).
- */
-static void
-pmap_insert_entry(pmap, va, mpte, pa)
-	pmap_t pmap;
-	vm_offset_t va;
-	vm_page_t mpte;
-	vm_offset_t pa;
-{
-
-	int s;
-	pv_entry_t pv;
-	pv_table_t *ppv;
-
-	s = splvm();
-	pv = get_pv_entry();
-	pv->pv_va = va;
-	pv->pv_pmap = pmap;
-	pv->pv_ptem = mpte;
-
-	TAILQ_INSERT_TAIL(&pmap->pm_pvlist.pv_list, pv, pv_plist);
-
-	ppv = pa_to_pvh(pa);
-	TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
-	++ppv->pv_list_count;
-
-	splx(s);
-}
-
-/*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 void
 pmap_enter(pmap, va, pa, prot, wired)
 	register pmap_t pmap;
 	vm_offset_t va;
 	register vm_offset_t pa;
 	vm_prot_t prot;
 	boolean_t wired;
 {
 	register unsigned *pte;
 	vm_offset_t opa;
 	vm_offset_t origpte, newpte;
 	vm_page_t mpte;
 
 	if (pmap == NULL)
 		return;
 
 	va &= PG_FRAME;
 #ifdef PMAP_DIAGNOSTIC
 	if (va > VM_MAX_KERNEL_ADDRESS)
 		panic("pmap_enter: toobig");
 	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
 		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
 #endif
 
 	mpte = NULL;
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < UPT_MIN_ADDRESS)
 		mpte = pmap_allocpte(pmap, va);
 
-	pte = pmap_pte(pmap, va);
+	pte = pmap_pte_quick(pmap, va);
 	/*
 	 * Page Directory table entry not valid, we need a new PT page
 	 */
 	if (pte == NULL) {
 		panic("pmap_enter: invalid page directory, pdir=%p, va=0x%lx\n",
 			pmap->pm_pdir[PTDPTDI], va);
 	}
 
 	origpte = *(vm_offset_t *)pte;
 	pa &= PG_FRAME;
 	opa = origpte & PG_FRAME;
 
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
-	if (origpte && (opa == pa)) {
+	if (opa == pa) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if (wired && ((origpte & PG_W) == 0))
 			pmap->pm_stats.wired_count++;
 		else if (!wired && (origpte & PG_W))
 			pmap->pm_stats.wired_count--;
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (pmap_nw_modified((pt_entry_t) origpte)) {
 			printf("pmap_enter: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, origpte);
 		}
 #endif
 
 		/*
 		 * We might be turning off write access to the page,
 		 * so we go ahead and sense modify status.
 		 */
 		if (origpte & PG_MANAGED) {
-			if ((origpte & PG_M) && pmap_track_modified(va)) {
-				vm_page_t m;
-				m = PHYS_TO_VM_PAGE(pa);
-				m->dirty = VM_PAGE_BITS_ALL;
+			vm_page_t m;
+			if (origpte & PG_M) {
+				if (pmap_track_modified(va)) {
+					m = PHYS_TO_VM_PAGE(pa);
+					m->dirty = VM_PAGE_BITS_ALL;
+				}
 			}
 			pa |= PG_MANAGED;
 		}
 
 		if (mpte)
 			--mpte->hold_count;
 
 		goto validate;
 	} 
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
-	if (origpte) {
+	if (opa) {
 		int err;
 		err = pmap_remove_pte(pmap, pte, va);
 		if (err)
 			panic("pmap_enter: pte vanished, va: 0x%x", va);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if (pmap_is_managed(pa)) {
 		pmap_insert_entry(pmap, va, mpte, pa);
 		pa |= PG_MANAGED;
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 	if (wired)
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
 
 	if (wired)
 		newpte |= PG_W;
 	if (va < UPT_MIN_ADDRESS)
 		newpte |= PG_U;
 
 	/*
 	 * if the mapping or permission bits are different, we need
 	 * to update the pte.
 	 */
 	if ((origpte & ~(PG_M|PG_A)) != newpte) {
 		*pte = newpte;
 		if (origpte)
 			pmap_update_1pg(va);
 	}
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * 5. Tlbflush is deferred to calling procedure.
  * 6. Page IS managed.
  * but is *MUCH* faster than pmap_enter...
  */
 
-static vm_page_t 
-pmap_enter_quick(pmap, va, pa, mpte)
+static void
+pmap_enter_quick(pmap, va, pa)
 	register pmap_t pmap;
 	vm_offset_t va;
 	register vm_offset_t pa;
-	vm_page_t mpte;
 {
 	register unsigned *pte;
+	vm_page_t mpte;
 
+	mpte = NULL;
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
-	if (va < UPT_MIN_ADDRESS) {
-		int ptepindex;
-		vm_offset_t ptepa;
+	if (va < UPT_MIN_ADDRESS)
+		mpte = pmap_allocpte(pmap, va);
 
-		/*
-		 * Calculate pagetable page index
-		 */
-		ptepindex = va >> PDRSHIFT;
-		if (mpte && (mpte->pindex == ptepindex)) {
-			++mpte->hold_count;
-		} else {
-			/*
-			 * Get the page directory entry
-			 */
-			ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
-
-			/*
-			 * If the page table page is mapped, we just increment
-			 * the hold count, and activate it.
-			 */
-			if (ptepa) {
-				mpte = vm_page_lookup( pmap->pm_pteobj, ptepindex);
-				++mpte->hold_count;
-			} else {
-				mpte = _pmap_allocpte(pmap, ptepindex);
-			}
-		}
-	} else {
-		mpte = NULL;
-	}
-
 	/*
 	 * This call to vtopte makes the assumption that we are
 	 * entering the page into the current pmap.  In order to support
 	 * quick entry into any pmap, one would likely use pmap_pte_quick.
 	 * But that isn't as quick as vtopte.
 	 */
 	pte = (unsigned *)vtopte(va);
 	if (*pte) {
 		if (mpte)
 			pmap_unwire_pte_hold(pmap, mpte);
-		return NULL;
+		return;
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	pmap_insert_entry(pmap, va, mpte, pa);
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	*pte = pa | PG_V | PG_U | PG_MANAGED;
 
-	return mpte;
+	return;
 }
 
 #define MAX_INIT_PT (96)
 /*
  * pmap_object_init_pt preloads the ptes for a given object
  * into the specified pmap.  This eliminates the blast of soft
  * faults on process startup and immediately after an mmap.
  */
 void
 pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
 	pmap_t pmap;
 	vm_offset_t addr;
 	vm_object_t object;
 	vm_pindex_t pindex;
 	vm_size_t size;
 	int limit;
 {
 	vm_offset_t tmpidx;
 	int psize;
-	vm_page_t p, mpte;
+	vm_page_t p;
 	int objpgs;
 
 	psize = i386_btop(size);
 
 	if (!pmap || (object->type != OBJT_VNODE) ||
 		(limit && (psize > MAX_INIT_PT) &&
 			(object->resident_page_count > MAX_INIT_PT))) {
 		return;
 	}
 
 	if (psize + pindex > object->size)
 		psize = object->size - pindex;
 
-	mpte = NULL;
 	/*
 	 * if we are processing a major portion of the object, then scan the
 	 * entire thing.
 	 */
 	if (psize > (object->size >> 2)) {
 		objpgs = psize;
 
 		for (p = TAILQ_FIRST(&object->memq);
 		    ((objpgs > 0) && (p != NULL));
 		    p = TAILQ_NEXT(p, listq)) {
 
 			tmpidx = p->pindex;
 			if (tmpidx < pindex) {
 				continue;
 			}
 			tmpidx -= pindex;
 			if (tmpidx >= psize) {
 				continue;
 			}
 			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			    (p->busy == 0) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 				if (p->queue == PQ_CACHE)
 					vm_page_deactivate(p);
 				p->flags |= PG_BUSY;
-				mpte = pmap_enter_quick(pmap, 
+				pmap_enter_quick(pmap, 
 					addr + i386_ptob(tmpidx),
-					VM_PAGE_TO_PHYS(p), mpte);
+					VM_PAGE_TO_PHYS(p));
 				p->flags |= PG_MAPPED;
 				PAGE_WAKEUP(p);
 			}
 			objpgs -= 1;
 		}
 	} else {
 		/*
 		 * else lookup the pages one-by-one.
 		 */
 		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
 			p = vm_page_lookup(object, tmpidx + pindex);
 			if (p &&
 			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			    (p->busy == 0) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 				if (p->queue == PQ_CACHE)
 					vm_page_deactivate(p);
 				p->flags |= PG_BUSY;
-				mpte = pmap_enter_quick(pmap, 
+				pmap_enter_quick(pmap, 
 					addr + i386_ptob(tmpidx),
-					VM_PAGE_TO_PHYS(p), mpte);
+					VM_PAGE_TO_PHYS(p));
 				p->flags |= PG_MAPPED;
 				PAGE_WAKEUP(p);
 			}
 		}
 	}
 	return;
 }
 
 /*
  * pmap_prefault provides a quick way of clustering
  * pagefaults into a processes address space.  It is a "cousin"
  * of pmap_object_init_pt, except it runs at page fault time instead
  * of mmap time.
  */
 #define PFBAK 2
 #define PFFOR 2
 #define PAGEORDER_SIZE (PFBAK+PFFOR)
 
 static int pmap_prefault_pageorder[] = {
 	-PAGE_SIZE, PAGE_SIZE, -2 * PAGE_SIZE, 2 * PAGE_SIZE
 };
 
 void
 pmap_prefault(pmap, addra, entry, object)
 	pmap_t pmap;
 	vm_offset_t addra;
 	vm_map_entry_t entry;
 	vm_object_t object;
 {
 	int i;
 	vm_offset_t starta;
 	vm_offset_t addr;
 	vm_pindex_t pindex;
-	vm_page_t m, mpte;
+	vm_page_t m;
 
 	if (entry->object.vm_object != object)
 		return;
 
 	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap))
 		return;
 
 	starta = addra - PFBAK * PAGE_SIZE;
 	if (starta < entry->start) {
 		starta = entry->start;
 	} else if (starta > addra) {
 		starta = 0;
 	}
 
-	mpte = NULL;
 	for (i = 0; i < PAGEORDER_SIZE; i++) {
 		vm_object_t lobject;
 		unsigned *pte;
 
 		addr = addra + pmap_prefault_pageorder[i];
 		if (addr < starta || addr >= entry->end)
 			continue;
 
 		if ((*pmap_pde(pmap, addr)) == NULL) 
 			continue;
 
 		pte = (unsigned *) vtopte(addr);
 		if (*pte)
 			continue;
 
 		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
 		lobject = object;
 		for (m = vm_page_lookup(lobject, pindex);
 		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
 		    lobject = lobject->backing_object) {
 			if (lobject->backing_object_offset & PAGE_MASK)
 				break;
 			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
 			m = vm_page_lookup(lobject->backing_object, pindex);
 		}
 
 		/*
 		 * give-up when a page is not in memory
 		 */
 		if (m == NULL)
 			break;
 
 		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 		    (m->busy == 0) &&
 		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 
 			if (m->queue == PQ_CACHE) {
 				vm_page_deactivate(m);
 			}
 			m->flags |= PG_BUSY;
-			mpte = pmap_enter_quick(pmap, addr,
-				VM_PAGE_TO_PHYS(m), mpte);
+			pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m));
 			m->flags |= PG_MAPPED;
 			PAGE_WAKEUP(m);
 		}
 	}
 }
 
 /*
  *	Routine:	pmap_change_wiring
  *	Function:	Change the wiring attribute for a map/virtual-address
  *			pair.
  *	In/out conditions:
  *			The mapping must already exist in the pmap.
  */
 void
 pmap_change_wiring(pmap, va, wired)
 	register pmap_t pmap;
 	vm_offset_t va;
 	boolean_t wired;
 {
 	register unsigned *pte;
 
 	if (pmap == NULL)
 		return;
 
 	pte = pmap_pte(pmap, va);
 
 	if (wired && !pmap_pte_w(pte))
 		pmap->pm_stats.wired_count++;
 	else if (!wired && pmap_pte_w(pte))
 		pmap->pm_stats.wired_count--;
 
 	/*
 	 * Wiring is not a hardware characteristic so there is no need to
 	 * invalidate TLB.
 	 */
 	pmap_pte_set_w(pte, wired);
 }
 
+
+
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 void
 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
 	pmap_t dst_pmap, src_pmap;
 	vm_offset_t dst_addr;
 	vm_size_t len;
 	vm_offset_t src_addr;
 {
 	vm_offset_t addr;
 	vm_offset_t end_addr = src_addr + len;
 	vm_offset_t pdnxt;
 	unsigned src_frame, dst_frame;
 
 	if (dst_addr != src_addr)
 		return;
 
 	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
 	if (src_frame != (((unsigned) PTDpde) & PG_FRAME))
 		return;
 
 	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
 	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
 		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
 		pmap_update();
 	}
 
 	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
 		unsigned *src_pte, *dst_pte;
 		vm_page_t dstmpte, srcmpte;
 		vm_offset_t srcptepaddr;
-		int ptepindex;
 
 		if (addr >= UPT_MIN_ADDRESS)
 			panic("pmap_copy: invalid to pmap_copy page tables\n");
-
 		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
-		ptepindex = addr >> PDRSHIFT;
-
-		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
-		if (srcptepaddr == 0)
+		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[addr >> PDRSHIFT];
+		if (srcptepaddr == 0) {
 			continue;
+		}
 
-		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
+		srcmpte = PHYS_TO_VM_PAGE(srcptepaddr);
 		if (srcmpte->hold_count == 0)
 			continue;
 
 		if (pdnxt > end_addr)
 			pdnxt = end_addr;
 
 		src_pte = (unsigned *) vtopte(addr);
 		dst_pte = (unsigned *) avtopte(addr);
 		while (addr < pdnxt) {
 			unsigned ptetemp;
 			ptetemp = *src_pte;
 			/*
 			 * we only virtual copy managed pages
 			 */
 			if ((ptetemp & PG_MANAGED) != 0) {
 				/*
 				 * We have to check after allocpte for the
 				 * pte still being around...  allocpte can
 				 * block.
 				 */
 				dstmpte = pmap_allocpte(dst_pmap, addr);
 				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
 					/*
-					 * Clear the modified and
-					 * accessed (referenced) bits
-					 * during the copy.
+					 * Simply clear the modified and accessed (referenced)
+					 * bits.
 					 */
 					*dst_pte = ptetemp & ~(PG_M|PG_A);
 					dst_pmap->pm_stats.resident_count++;
-					pmap_insert_entry(dst_pmap, addr,
-						dstmpte,
+					pmap_insert_entry(dst_pmap, addr, dstmpte,
 						(ptetemp & PG_FRAME));
 	 			} else {
 					pmap_unwire_pte_hold(dst_pmap, dstmpte);
 				}
 				if (dstmpte->hold_count >= srcmpte->hold_count)
 					break;
 			}
 			addr += PAGE_SIZE;
 			++src_pte;
 			++dst_pte;
 		}
 	}
 }	
 
 /*
  *	Routine:	pmap_kernel
  *	Function:
  *		Returns the physical map handle for the kernel.
  */
 pmap_t
 pmap_kernel()
 {
 	return (kernel_pmap);
 }
 
 /*
  *	pmap_zero_page zeros the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bzero to clear its contents, one machine dependent page
  *	at a time.
  */
 void
 pmap_zero_page(phys)
 	vm_offset_t phys;
 {
 	if (*(int *) CMAP2)
 		panic("pmap_zero_page: CMAP busy");
 
 	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME);
 	bzero(CADDR2, PAGE_SIZE);
 	*(int *) CMAP2 = 0;
 	pmap_update_1pg((vm_offset_t) CADDR2);
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(src, dst)
 	vm_offset_t src;
 	vm_offset_t dst;
 {
 	if (*(int *) CMAP1 || *(int *) CMAP2)
 		panic("pmap_copy_page: CMAP busy");
 
 	*(int *) CMAP1 = PG_V | PG_RW | (src & PG_FRAME);
 	*(int *) CMAP2 = PG_V | PG_RW | (dst & PG_FRAME);
 
 	bcopy(CADDR1, CADDR2, PAGE_SIZE);
 
 	*(int *) CMAP1 = 0;
 	*(int *) CMAP2 = 0;
 	pmap_update_2pg( (vm_offset_t) CADDR1, (vm_offset_t) CADDR2);
 }
 
 
 /*
  *	Routine:	pmap_pageable
  *	Function:
  *		Make the specified pages (by pmap, offset)
  *		pageable (or not) as requested.
  *
  *		A page which is not pageable may not take
  *		a fault; therefore, its page table entry
  *		must remain valid for the duration.
  *
  *		This routine is merely advisory; pmap_enter
  *		will specify that these pages are to be wired
  *		down (or not) as appropriate.
  */
 void
 pmap_pageable(pmap, sva, eva, pageable)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 	boolean_t pageable;
 {
 }
 
 /*
  * this routine returns true if a physical page resides
  * in the given pmap.
  */
 boolean_t
 pmap_page_exists(pmap, pa)
 	pmap_t pmap;
 	vm_offset_t pa;
 {
-	register pv_entry_t pv;
-	pv_table_t *ppv;
+	register pv_entry_t *ppv, pv;
 	int s;
 
 	if (!pmap_is_managed(pa))
 		return FALSE;
 
 	s = splvm();
 
 	ppv = pa_to_pvh(pa);
 	/*
 	 * Not found, check current mappings returning immediately if found.
 	 */
-	for (pv = TAILQ_FIRST(&ppv->pv_list);
-		pv;
-		pv = TAILQ_NEXT(pv, pv_list)) {
+	for (pv = *ppv; pv; pv = pv->pv_next) {
 		if (pv->pv_pmap == pmap) {
 			splx(s);
 			return TRUE;
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
-#define PMAP_REMOVE_PAGES_CURPROC_ONLY
 /*
- * Remove all pages from specified address space
- * this aids process exit speeds.  Also, this code
- * is special cased for current process only.
+ * pmap_testbit tests bits in pte's
+ * note that the testbit/changebit routines are inline,
+ * and a lot of things compile-time evaluate.
  */
-void
-pmap_remove_pages(pmap, sva, eva)
-	pmap_t pmap;
-	vm_offset_t sva, eva;
+static __inline boolean_t
+pmap_testbit(pa, bit)
+	register vm_offset_t pa;
+	int bit;
 {
-	unsigned *pte, tpte;
-	pv_table_t *ppv;
-	pv_entry_t pv, npv;
+	register pv_entry_t *ppv, pv;
+	unsigned *pte;
 	int s;
 
-#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
-	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap)) {
-		printf("warning: pmap_remove_pages called with non-current pmap\n");
-		return;
-	}
-#endif
+	if (!pmap_is_managed(pa))
+		return FALSE;
 
+	ppv = pa_to_pvh(pa);
+	if (*ppv == NULL)
+		return FALSE;
+
 	s = splvm();
+	/*
+	 * Not found, check current mappings returning immediately if found.
+	 */
+	for (pv = *ppv ;pv; pv = pv->pv_next) {
 
-	for(pv = TAILQ_FIRST(&pmap->pm_pvlist.pv_list);
-		pv;
-		pv = npv) {
+		/*
+		 * if the bit being tested is the modified bit, then
+		 * mark clean_map and ptes as never
+		 * modified.
+		 */
+		if (bit & (PG_A|PG_M)) {
+			if (!pmap_track_modified(pv->pv_va))
+				continue;
+		}
 
-		if (pv->pv_va >= eva || pv->pv_va < sva) {
-			npv = TAILQ_NEXT(pv, pv_plist);
+		if (!pv->pv_pmap) {
+#if defined(PMAP_DIAGNOSTIC)
+			printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va);
+#endif
 			continue;
 		}
-
-#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
-		pte = (unsigned *)vtopte(pv->pv_va);
-#else
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
-#endif
-		tpte = *pte;
-		*pte = 0;
-
-		if (tpte) {
-			pv->pv_pmap->pm_stats.resident_count--;
-			if (tpte & PG_W)
-				pv->pv_pmap->pm_stats.wired_count--;
-			/*
-			 * Update the vm_page_t clean and reference bits.
-			 */
-			if (tpte & PG_M) {
-				PHYS_TO_VM_PAGE(tpte)->dirty = VM_PAGE_BITS_ALL;
-			}
+		if (pte == NULL)
+			continue;
+		if (*pte & bit) {
+			splx(s);
+			return TRUE;
 		}
+	}
+	splx(s);
+	return (FALSE);
+}
 
-		npv = TAILQ_NEXT(pv, pv_plist);
-		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist.pv_list, pv, pv_plist);
+/*
+ * this routine is used to modify bits in ptes
+ */
+static __inline void
+pmap_changebit(pa, bit, setem)
+	vm_offset_t pa;
+	int bit;
+	boolean_t setem;
+{
+	register pv_entry_t pv, *ppv;
+	register unsigned *pte;
+	vm_offset_t va;
+	int changed;
+	int s;
 
-		ppv = pa_to_pvh(tpte);
-		--ppv->pv_list_count;
-		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
+	if (!pmap_is_managed(pa))
+		return;
 
-		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
-		free_pv_entry(pv);
+	s = splvm();
+	changed = 0;
+	ppv = pa_to_pvh(pa);
+	/*
+	 * Loop over all current mappings setting/clearing as appropos If
+	 * setting RO do we need to clear the VAC?
+	 */
+	for ( pv = *ppv; pv; pv = pv->pv_next) {
+		va = pv->pv_va;
+
+		/*
+		 * don't write protect pager mappings
+		 */
+		if (!setem && (bit == PG_RW)) {
+			if (va >= clean_sva && va < clean_eva)
+				continue;
+		}
+		if (!pv->pv_pmap) {
+#if defined(PMAP_DIAGNOSTIC)
+			printf("Null pmap (cb) at va: 0x%lx\n", va);
+#endif
+			continue;
+		}
+
+		pte = pmap_pte_quick(pv->pv_pmap, va);
+		if (pte == NULL)
+			continue;
+		if (setem) {
+			*(int *)pte |= bit;
+			changed = 1;
+		} else {
+			vm_offset_t pbits = *(vm_offset_t *)pte;
+			if (pbits & bit)
+				changed = 1;
+			if (bit == PG_RW) {
+				if (pbits & PG_M) {
+					vm_page_t m;
+					vm_offset_t pa = pbits & PG_FRAME;
+					m = PHYS_TO_VM_PAGE(pa);
+					m->dirty = VM_PAGE_BITS_ALL;
+				}
+				*(int *)pte = pbits & ~(PG_M|PG_RW);
+			} else {
+				*(int *)pte = pbits & ~bit;
+			}
+		}
 	}
-	pmap_update();
 	splx(s);
+	if (changed)
+		pmap_update();
 }
 
+/*
+ *      pmap_page_protect:
+ *
+ *      Lower the permission for all mappings to a given page.
+ */
+void
+pmap_page_protect(phys, prot)
+	vm_offset_t phys;
+	vm_prot_t prot;
+{
+	if ((prot & VM_PROT_WRITE) == 0) {
+		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
+			pmap_changebit(phys, PG_RW, FALSE);
+		} else {
+			pmap_remove_all(phys);
+			pmap_update();
+		}
+	}
+}
+
 vm_offset_t
 pmap_phys_address(ppn)
 	int ppn;
 {
 	return (i386_ptob(ppn));
 }
 
 /*
- *	pmap_tcbit:
+ *	pmap_is_referenced:
  *
- *	Return the count of bits for a page, clearing all of them.
+ *	Return whether or not the specified physical page was referenced
+ *	by any physical maps.
+ */
+boolean_t
+pmap_is_referenced(vm_offset_t pa)
+{
+	register pv_entry_t *ppv, pv, lpv;
+	unsigned *pte;
+	int s;
+
+	if (!pmap_is_managed(pa))
+		return FALSE;
+
+	ppv = pa_to_pvh(pa);
+
+	s = splvm();
+	/*
+	 * Not found, check current mappings returning immediately if found.
+	 */
+	for (lpv = NULL, pv = *ppv ;pv; lpv = pv, pv = pv->pv_next) {
+		/*
+		 * if the bit being tested is the modified bit, then
+		 * mark clean_map and ptes as never
+		 * modified.
+		 */
+		if (!pmap_track_modified(pv->pv_va))
+			continue;
+		if (!pv->pv_pmap) {
+			continue;
+		}
+		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+		if (pte == NULL)
+			continue;
+		if ((int) *pte & PG_A) {
+			if (lpv) {
+				lpv->pv_next = pv->pv_next;
+				pv->pv_next = *ppv;
+				*ppv = pv;
+			}
+			splx(s);
+			return TRUE;
+		}
+	}
+	splx(s);
+	return (FALSE);
+}
+
+/*
+ *	pmap_ts_referenced:
+ *
+ *	Return the count of reference bits for a page, clearing all of them.
  *	
  */
 int
-pmap_tcbit(vm_offset_t pa, int bit)
+pmap_ts_referenced(vm_offset_t pa)
 {
-	register pv_entry_t pv, npv;
-	pv_table_t *ppv;
+	register pv_entry_t *ppv, pv;
 	unsigned *pte;
 	int s;
 	int rtval = 0;
+	vm_offset_t vachanged[VATRACK];
 
+	if (!pmap_is_managed(pa))
+		return FALSE;
+
 	s = splvm();
 
 	ppv = pa_to_pvh(pa);
+
+	if (*ppv == NULL) {
+		splx(s);
+		return 0;
+	}
+		
 	/*
 	 * Not found, check current mappings returning immediately if found.
 	 */
-	for (pv = TAILQ_FIRST(&ppv->pv_list);
-		pv;
-		pv = npv) {
-		npv = TAILQ_NEXT(pv, pv_list);
+	for (pv = *ppv ;pv; pv = pv->pv_next) {
 		/*
 		 * if the bit being tested is the modified bit, then
 		 * mark clean_map and ptes as never
 		 * modified.
 		 */
-		if (((bit & PG_M) != 0)
-			&& !pmap_track_modified(pv->pv_va))
+		if (!pmap_track_modified(pv->pv_va))
 			continue;
 
+		if (!pv->pv_pmap) {
+			continue;
+		}
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 		if (pte == NULL)
 			continue;
-
-		if ((rtval == 0) && (*pte & bit)) {
-			rtval = 1;
+		if (*pte & PG_A) {
+			if (rtval < VATRACK)
+				vachanged[rtval] = pv->pv_va;
+			rtval++;
+			*pte &= ~PG_A;
 		}
-		*pte &= ~bit;
 	}
 	splx(s);
-	if (rtval)
-		pmap_update();
+	if (rtval) {
+		if (rtval <= VATRACK) {
+			int i;
+			for(i=0;i<rtval;i++)
+				pmap_update_1pg(vachanged[i]);
+		} else {
+			pmap_update();
+		}
+	}
 	return (rtval);
 }
 
 /*
- *	pmap_tc_modified:
+ *	pmap_is_modified:
  *
- *	Return the count of modified bits for a page, clearing all of them.
- *	
+ *	Return whether or not the specified physical page was modified
+ *	in any physical maps.
  */
-int
-pmap_tc_modified(vm_page_t m)
+boolean_t
+pmap_is_modified(vm_offset_t pa)
 {
-	int rtval;
-
-	rtval = pmap_tcbit(VM_PAGE_TO_PHYS(m), PG_M);
-	if (rtval)
-		m->dirty = VM_PAGE_BITS_ALL;
-
-	return rtval;
+	return pmap_testbit((pa), PG_M);
 }
 
 /*
- *	pmap_tc_referenced:
- *
- *	Return the count of referenced bits for a page, clearing all of them.
- *	
+ *	Clear the modify bits on the specified physical page.
  */
-int
-pmap_tc_referenced(vm_offset_t pa)
+void
+pmap_clear_modify(vm_offset_t pa)
 {
-	if (!pmap_is_managed(pa))
-		return 0;
-	return pmap_tcbit(pa, PG_A);
+	pmap_changebit((pa), PG_M, FALSE);
 }
 
 /*
- *      pmap_page_protect:
+ *	pmap_clear_reference:
  *
- *      Lower the permission for all mappings to a given page.
+ *	Clear the reference bit on the specified physical page.
  */
 void
-pmap_page_protect(m, prot)
-	vm_page_t m;
-	vm_prot_t prot;
+pmap_clear_reference(vm_offset_t pa)
 {
-	if ((prot & VM_PROT_WRITE) == 0) {
-		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
-			if ((m->flags & PG_FICTITIOUS) == 0)
-				pmap_tcbit(VM_PAGE_TO_PHYS(m), PG_RW);
-		} else {
-			if (pmap_remove_all(VM_PAGE_TO_PHYS(m))) {
-				m->dirty = VM_PAGE_BITS_ALL;
-			}
-			pmap_update();
-		}
-	}
+	pmap_changebit((pa), PG_A, FALSE);
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 static void
 i386_protection_init()
 {
 	register int *kp, prot;
 
 	kp = protection_codes;
 	for (prot = 0; prot < 8; prot++) {
 		switch (prot) {
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 			/*
 			 * Read access is also 0. There isn't any execute bit,
 			 * so just make it readable.
 			 */
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 			*kp++ = 0;
 			break;
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 			*kp++ = PG_RW;
 			break;
 		}
 	}
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory. The non-cacheable bits are set on each
  * mapped page.
  */
 void *
 pmap_mapdev(pa, size)
 	vm_offset_t pa;
 	vm_size_t size;
 {
 	vm_offset_t va, tmpva;
 	unsigned *pte;
 
 	size = roundup(size, PAGE_SIZE);
 
 	va = kmem_alloc_pageable(kernel_map, size);
 	if (!va)
 		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 
 	pa = pa & PG_FRAME;
 	for (tmpva = va; size > 0;) {
 		pte = (unsigned *)vtopte(tmpva);
 		*pte = pa | PG_RW | PG_V | PG_N;
 		size -= PAGE_SIZE;
 		tmpva += PAGE_SIZE;
 		pa += PAGE_SIZE;
 	}
 	pmap_update();
 
 	return ((void *) va);
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap, addr)
 	pmap_t pmap;
 	vm_offset_t addr;
 {
 	
 	unsigned *ptep, pte;
 	int val = 0;
 	
-	ptep = pmap_pte_quick(pmap, addr);
+	ptep = pmap_pte(pmap, addr);
 	if (ptep == 0) {
 		return 0;
 	}
 
 	if (pte = *ptep) {
 		vm_offset_t pa;
 		val = MINCORE_INCORE;
 		pa = pte & PG_FRAME;
 
 		/*
 		 * Modified by us
 		 */
 		if (pte & PG_M)
 			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 		/*
 		 * Modified by someone
 		 */
 		else if (PHYS_TO_VM_PAGE(pa)->dirty ||
-			pmap_tcbit(pa, PG_M)) {
+			pmap_is_modified(pa))
 			val |= MINCORE_MODIFIED_OTHER;
-			PHYS_TO_VM_PAGE(pa)->dirty = VM_PAGE_BITS_ALL;
-		}
 		/*
 		 * Referenced by us
 		 */
 		if (pte & PG_U)
 			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 
 		/*
 		 * Referenced by someone
 		 */
 		else if ((PHYS_TO_VM_PAGE(pa)->flags & PG_REFERENCED) ||
-			pmap_tcbit(pa, PG_A))
+			pmap_is_referenced(pa))
 			val |= MINCORE_REFERENCED_OTHER;
 	} 
 	return val;
 }
 
 #if defined(PMAP_DEBUG)
 pmap_pid_dump(int pid) {
 	pmap_t pmap;
 	struct proc *p;
 	int npte = 0;
 	int index;
 	for (p = allproc.lh_first; p != NULL; p = p->p_list.le_next) {
 		if (p->p_pid != pid)
 			continue;
 
 		if (p->p_vmspace) {
 			int i,j;
 			index = 0;
 			pmap = &p->p_vmspace->vm_pmap;
 			for(i=0;i<1024;i++) {
 				pd_entry_t *pde;
 				unsigned *pte;
 				unsigned base = i << PDRSHIFT;
 				
 				pde = &pmap->pm_pdir[i];
 				if (pde && pmap_pde_v(pde)) {
 					for(j=0;j<1024;j++) {
 						unsigned va = base + (j << PAGE_SHIFT);
 						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 							if (index) {
 								index = 0;
 								printf("\n");
 							}
 							return npte;
 						}
-						pte = pmap_pte_quick( pmap, va);
+						pte = pmap_pte( pmap, va);
 						if (pte && pmap_pte_v(pte)) {
 							vm_offset_t pa;
 							vm_page_t m;
 							pa = *(int *)pte;
 							m = PHYS_TO_VM_PAGE((pa & PG_FRAME));
 							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 								va, pa, m->hold_count, m->wire_count, m->flags);
 							npte++;
 							index++;
 							if (index >= 2) {
 								index = 0;
 								printf("\n");
 							} else {
 								printf(" ");
 							}
 						}
 					}
 				}
 			}
 		}
 	}
 	return npte;
 }
 #endif
 
 #if defined(DEBUG)
 
 static void	pads __P((pmap_t pm));
 static void	pmap_pvdump __P((vm_offset_t pa));
 
 /* print address space of pmap*/
 static void
 pads(pm)
 	pmap_t pm;
 {
 	unsigned va, i, j;
 	unsigned *ptep;
 
 	if (pm == kernel_pmap)
 		return;
 	for (i = 0; i < 1024; i++)
 		if (pm->pm_pdir[i])
 			for (j = 0; j < 1024; j++) {
 				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 				if (pm == kernel_pmap && va < KERNBASE)
 					continue;
 				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 					continue;
-				ptep = pmap_pte_quick(pm, va);
+				ptep = pmap_pte(pm, va);
 				if (pmap_pte_v(ptep))
 					printf("%x:%x ", va, *(int *) ptep);
 			};
 
 }
 
 static void
 pmap_pvdump(pa)
 	vm_offset_t pa;
 {
 	register pv_entry_t pv;
 
 	printf("pa %x", pa);
-	for (pv = TAILQ_FIRST(pa_to_pvh(pa));
-		pv;
-		pv = TAILQ_NEXT(pv, pv_list)) {
+	for (pv = pa_to_pvh(pa); pv; pv = pv->pv_next) {
 #ifdef used_to_be
 		printf(" -> pmap %x, va %x, flags %x",
 		    pv->pv_pmap, pv->pv_va, pv->pv_flags);
 #endif
 		printf(" -> pmap %x, va %x",
 		    pv->pv_pmap, pv->pv_va);
 		pads(pv->pv_pmap);
 	}
 	printf(" ");
 }
 #endif
Index: head/sys/amd64/include/pmap.h
===================================================================
--- head/sys/amd64/include/pmap.h	(revision 17333)
+++ head/sys/amd64/include/pmap.h	(revision 17334)
@@ -1,237 +1,224 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Derived from hp300 version by Mike Hibler, this version by William
  * Jolitz uses a recursive map [a pde points to the page directory] to
  * map the page tables using the pagetables themselves. This is done to
  * reduce the impact on kernel virtual memory for lots of sparse address
  * space, and to reduce the cost of memory to each process.
  *
  *	from: hp300: @(#)pmap.h	7.2 (Berkeley) 12/16/90
  *	from: @(#)pmap.h	7.4 (Berkeley) 5/12/91
- * 	$Id: pmap.h,v 1.40 1996/06/08 11:21:19 bde Exp $
+ * 	$Id: pmap.h,v 1.41 1996/07/27 03:23:32 dyson Exp $
  */
 
 #ifndef _MACHINE_PMAP_H_
 #define	_MACHINE_PMAP_H_
 
-
 /*
  * Page-directory and page-table entires follow this format, with a few
  * of the fields not present here and there, depending on a lot of things.
  */
 				/* ---- Intel Nomenclature ---- */
 #define	PG_V		0x001	/* P	Valid			*/
 #define PG_RW		0x002	/* R/W	Read/Write		*/
 #define PG_U		0x004	/* U/S  User/Supervisor		*/
 #define	PG_NC_PWT	0x008	/* PWT	Write through		*/
 #define	PG_NC_PCD	0x010	/* PCD	Cache disable		*/
 #define PG_A		0x020	/* A	Accessed		*/
 #define	PG_M		0x040	/* D	Dirty			*/
 #define	PG_PS		0x080	/* PS	Page size (0=4k,1=4M)	*/
 #define	PG_G		0x100	/* G	Global			*/
 #define	PG_AVAIL1	0x200	/*    /	Available for system	*/
 #define	PG_AVAIL2	0x400	/*   <	programmers use		*/
 #define	PG_AVAIL3	0x800	/*    \				*/
 
 
 /* Our various interpretations of the above */
 #define PG_W		PG_AVAIL1	/* "Wired" pseudoflag */
 #define	PG_MANAGED	PG_AVAIL2
 #define	PG_FRAME	(~PAGE_MASK)
 #define	PG_PROT		(PG_RW|PG_U)	/* all protection bits . */
 #define PG_N		(PG_NC_PWT|PG_NC_PCD)	/* Non-cacheable */
 
 /*
  * Page Protection Exception bits
  */
 
 #define PGEX_P		0x01	/* Protection violation vs. not present */
 #define PGEX_W		0x02	/* during a Write cycle */
 #define PGEX_U		0x04	/* access from User mode (UPL) */
 
 /*
  * Pte related macros
  */
 #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDRSHIFT)|((pti)<<PAGE_SHIFT)))
 
 #ifndef NKPT
 #define	NKPT			9	/* actual number of kernel page tables */
 #endif
 #ifndef NKPDE
 #define NKPDE			63	/* addressable number of page tables/pde's */
 #endif
 
 /*
  * The *PTDI values control the layout of virtual memory
  *
  * XXX This works for now, but I am not real happy with it, I'll fix it
  * right after I fix locore.s and the magic 28K hole
  */
 #define	APTDPTDI	(NPDEPG-1)	/* alt ptd entry that points to APTD */
 #define	KPTDI		(APTDPTDI-NKPDE)/* start of kernel virtual pde's */
 #define	PTDPTDI		(KPTDI-1)	/* ptd entry that points to ptd! */
 #define	KSTKPTDI	(PTDPTDI-1)	/* ptd entry for u./kernel&user stack */
 #define KSTKPTEOFF	(NPTEPG-UPAGES) /* pte entry for kernel stack */
 
 /*
  * XXX doesn't really belong here I guess...
  */
 #define ISA_HOLE_START    0xa0000
 #define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START)
 
 #ifndef LOCORE
-
-#include <sys/queue.h>
-
 typedef unsigned int *pd_entry_t;
 typedef unsigned int *pt_entry_t;
 
 #define PDESIZE		sizeof(pd_entry_t) /* for assembly files */
 #define PTESIZE		sizeof(pt_entry_t) /* for assembly files */
 
 /*
  * Address of current and alternate address space page table maps
  * and directories.
  */
 #ifdef KERNEL
 extern pt_entry_t PTmap[], APTmap[], Upte;
 extern pd_entry_t PTD[], APTD[], PTDpde, APTDpde, Upde;
 
 extern int	IdlePTD;	/* physical address of "Idle" state directory */
 #endif
 
 /*
  * virtual address to page table entry and
  * to physical address. Likewise for alternate address space.
  * Note: these work recursively, thus vtopte of a pte will give
  * the corresponding pde that in turn maps it.
  */
 #define	vtopte(va)	(PTmap + i386_btop(va))
 #define	vtophys(va)	(((int) (*vtopte(va))&PG_FRAME) | ((int)(va) & PAGE_MASK))
 
 #define	avtopte(va)	(APTmap + i386_btop(va))
 #define	avtophys(va)	(((int) (*avtopte(va))&PG_FRAME) | ((int)(va) & PAGE_MASK))
 
 #ifdef KERNEL
 /*
  *	Routine:	pmap_kextract
  *	Function:
  *		Extract the physical page address associated
  *		kernel virtual address.
  */
 static __inline vm_offset_t
 pmap_kextract(vm_offset_t va)
 {
 	vm_offset_t pa = *(int *)vtopte(va);
 	pa = (pa & PG_FRAME) | (va & PAGE_MASK);
 	return pa;
 }
 #endif
 
-struct vm_page;
-
 /*
  * Pmap stuff
  */
-struct	pv_entry;
-typedef struct {
-	int	pv_list_count;
-	TAILQ_HEAD(,pv_entry)	pv_list;
-} pv_table_t;
 
 struct pmap {
 	pd_entry_t		*pm_pdir;	/* KVA of page directory */
 	vm_object_t		pm_pteobj;	/* Container for pte's */
-	pv_table_t		pm_pvlist;	/* list of mappings in pmap */
-	int			pm_count;	/* reference count */
+	short			pm_dref;	/* page directory ref count */
+	short			pm_count;	/* pmap reference count */
 	struct pmap_statistics	pm_stats;	/* pmap statistics */
-	struct	vm_page		*pm_ptphint;	/* pmap ptp hint */
+	struct	vm_map		*pm_map;	/* map that owns this pmap */
 };
 
 typedef struct pmap	*pmap_t;
 
 #ifdef KERNEL
 extern pmap_t		kernel_pmap;
 #endif
 
-
 /*
  * For each vm_page_t, there is a list of all currently valid virtual
  * mappings of that page.  An entry is a pv_entry_t, the list is pv_table.
  */
 typedef struct pv_entry {
+	struct pv_entry	*pv_next;	/* next pv_entry */
 	pmap_t		pv_pmap;	/* pmap where mapping lies */
 	vm_offset_t	pv_va;		/* virtual address for mapping */
-	TAILQ_ENTRY(pv_entry)	pv_list;
-	TAILQ_ENTRY(pv_entry)	pv_plist;
 	vm_page_t	pv_ptem;	/* VM page for pte */
 } *pv_entry_t;
 
 #define	PV_ENTRY_NULL	((pv_entry_t) 0)
 
 #define	PV_CI		0x01	/* all entries must be cache inhibited */
 #define	PV_PTPAGE	0x02	/* entry maps a page table page */
 
 #ifdef	KERNEL
 
 extern caddr_t	CADDR1;
 extern pt_entry_t *CMAP1;
 extern vm_offset_t avail_end;
 extern vm_offset_t avail_start;
 extern vm_offset_t phys_avail[];
-pv_table_t *pv_table;
+extern pv_entry_t *pv_table;	/* array of entries, one per page */
 extern vm_offset_t virtual_avail;
 extern vm_offset_t virtual_end;
 
 #define	pa_index(pa)		atop(pa - vm_first_phys)
 #define	pa_to_pvh(pa)		(&pv_table[pa_index(pa)])
 
 #define	pmap_resident_count(pmap)	((pmap)->pm_stats.resident_count)
 
 struct pcb;
 
 void	pmap_bootstrap __P(( vm_offset_t, vm_offset_t));
 pmap_t	pmap_kernel __P((void));
 void	*pmap_mapdev __P((vm_offset_t, vm_size_t));
 unsigned * __pure pmap_pte __P((pmap_t, vm_offset_t)) __pure2;
 int	pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
 vm_page_t pmap_use_pt __P((pmap_t, vm_offset_t));
 
 #endif /* KERNEL */
 #endif /* !LOCORE */
 
 #endif /* !_MACHINE_PMAP_H_ */
Index: head/sys/i386/i386/pmap.c
===================================================================
--- head/sys/i386/i386/pmap.c	(revision 17333)
+++ head/sys/i386/i386/pmap.c	(revision 17334)
@@ -1,2549 +1,2669 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
- *	$Id: pmap.c,v 1.111 1996/07/28 20:31:27 dyson Exp $
+ *	$Id: pmap.c,v 1.113 1996/07/29 14:22:46 dyson Exp $
  */
 
 /*
  *	Manages physical address maps.
  *
  *	In addition to hardware address maps, this
  *	module is called upon to provide software-use-only
  *	maps which may or may not be stored in the same
  *	form as hardware maps.  These pseudo-maps are
  *	used to store intermediate results from copy
  *	operations to and from address spaces.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/msgbuf.h>
 #include <sys/queue.h>
 #include <sys/vmmeter.h>
 #include <sys/mman.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 
 #include <machine/pcb.h>
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 
 #define PMAP_KEEP_PDIRS
 
 #if defined(DIAGNOSTIC)
 #define PMAP_DIAGNOSTIC
 #endif
 
-#if !defined(SMALL_KERNEL)
-#define PMAP_INLINE __inline
-#else
-#define PMAP_INLINE
-#endif
-
 static void	init_pv_entries __P((int));
 
 /*
  * Get PDEs and PTEs for user/kernel address space
  */
 #define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
 
 #define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
 #define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
 #define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
 #define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
 #define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
 
 #define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
 
 /*
  * Given a map and a machine independent protection code,
  * convert to a vax protection code.
  */
 #define pte_prot(m, p)	(protection_codes[p])
 static int protection_codes[8];
 
 static struct pmap kernel_pmap_store;
 pmap_t kernel_pmap;
 
 vm_offset_t avail_start;	/* PA of first available physical page */
 vm_offset_t avail_end;		/* PA of last available physical page */
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
 static vm_offset_t vm_first_phys;
 
 static int nkpt;
 static vm_page_t nkpg;
 vm_offset_t kernel_vm_end;
 
 extern vm_offset_t clean_sva, clean_eva;
 extern int cpu_class;
 
 #define PV_FREELIST_MIN ((PAGE_SIZE / sizeof (struct pv_entry)) / 2)
 
 /*
  * Data for the pv entry allocation mechanism
  */
 static int pv_freelistcnt;
-TAILQ_HEAD (,pv_entry) pv_freelist;
+static pv_entry_t pv_freelist;
 static vm_offset_t pvva;
 static int npvvapg;
 
 /*
  * All those kernel PT submaps that BSD is so fond of
  */
 pt_entry_t *CMAP1;
 static pt_entry_t *CMAP2, *ptmmap;
+static pv_entry_t *pv_table;
 caddr_t CADDR1, ptvmmap;
 static caddr_t CADDR2;
 static pt_entry_t *msgbufmap;
 struct msgbuf *msgbufp;
 
 pt_entry_t *PMAP1;
 unsigned *PADDR1;
 
 static void	free_pv_entry __P((pv_entry_t pv));
-static unsigned * get_ptbase __P((pmap_t pmap));
+static __inline unsigned * get_ptbase __P((pmap_t pmap));
 static pv_entry_t get_pv_entry __P((void));
 static void	i386_protection_init __P((void));
 static void	pmap_alloc_pv_entry __P((void));
+static void	pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem));
 
 static int	pmap_is_managed __P((vm_offset_t pa));
-static int	pmap_remove_all __P((vm_offset_t pa));
-static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
-				      vm_offset_t pa, vm_page_t mpte));
+static void	pmap_remove_all __P((vm_offset_t pa));
+static void	pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
+				      vm_offset_t pa));
 static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq,
 					vm_offset_t sva));
 static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
-static int pmap_remove_entry __P((struct pmap *pmap, pv_table_t *pv,
+static __inline int pmap_remove_entry __P((struct pmap *pmap, pv_entry_t *pv,
 					vm_offset_t va));
-static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
+static boolean_t pmap_testbit __P((vm_offset_t pa, int bit));
+static __inline void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
 		vm_page_t mpte, vm_offset_t pa));
 
-static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
+static __inline vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
 
-static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
+static __inline int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
 static vm_page_t _pmap_allocpte __P((pmap_t pmap, int ptepindex));
-unsigned * __pure pmap_pte_quick __P((pmap_t pmap, vm_offset_t va));
-int pmap_tcbit __P((vm_offset_t pa, int bit));
-static vm_page_t pmap_page_alloc __P((vm_object_t object, vm_pindex_t pindex));
 
-#define PDSTACKMAX 6
+#define VATRACK 4
+#define PDSTACKMAX 16
 static vm_offset_t pdstack[PDSTACKMAX];
 static int pdstackptr;
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  *
  *	On the i386 this is called after mapping has already been enabled
  *	and just syncs the pmap module with what has already been done.
  *	[We can't call it easily with mapping off since the kernel is not
  *	mapped with PA == VA, hence we would have to relocate every address
  *	from the linked base (virtual) address "KERNBASE" to the actual
  *	(physical) address starting relative to 0]
  */
 void
 pmap_bootstrap(firstaddr, loadaddr)
 	vm_offset_t firstaddr;
 	vm_offset_t loadaddr;
 {
 	vm_offset_t va;
 	pt_entry_t *pte;
 
 	avail_start = firstaddr;
 
 	/*
 	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
 	 * large. It should instead be correctly calculated in locore.s and
 	 * not based on 'first' (which is a physical address, not a virtual
 	 * address, for the start of unused physical memory). The kernel
 	 * page tables are NOT double mapped and thus should not be included
 	 * in this calculation.
 	 */
 	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
 	/*
 	 * Initialize protection array.
 	 */
 	i386_protection_init();
 
 	/*
 	 * The kernel's pmap is statically allocated so we don't have to use
 	 * pmap_create, which is unlikely to work correctly at this part of
 	 * the boot sequence (XXX and which no longer exists).
 	 */
 	kernel_pmap = &kernel_pmap_store;
 
 	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + IdlePTD);
 
 	kernel_pmap->pm_count = 1;
-	TAILQ_INIT(&kernel_pmap->pm_pvlist.pv_list);
 	nkpt = NKPT;
 
 	/*
 	 * Reserve some special page table entries/VA space for temporary
 	 * mapping of pages.
 	 */
 #define	SYSMAP(c, p, v, n)	\
 	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
 
 	va = virtual_avail;
 	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
 
 	/*
 	 * CMAP1/CMAP2 are used for zeroing and copying pages.
 	 */
 	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
 	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
 
 	/*
 	 * ptmmap is used for reading arbitrary physical pages via /dev/mem.
 	 */
 	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
 
 	/*
 	 * msgbufmap is used to map the system message buffer.
 	 */
 	SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 1)
 
 	/*
 	 * ptemap is used for pmap_pte_quick
 	 */
 	SYSMAP(unsigned *, PMAP1, PADDR1, 1);
 
 	virtual_avail = va;
 
 	*(int *) CMAP1 = *(int *) CMAP2 = *(int *) PTD = 0;
 	pmap_update();
 
 }
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  *	pmap_init has been enhanced to support in a fairly consistant
  *	way, discontiguous physical memory.
  */
 void
 pmap_init(phys_start, phys_end)
 	vm_offset_t phys_start, phys_end;
 {
 	vm_offset_t addr;
 	vm_size_t npg, s;
 	int i;
 
 	/*
 	 * calculate the number of pv_entries needed
 	 */
 	vm_first_phys = phys_avail[0];
 	for (i = 0; phys_avail[i + 1]; i += 2);
 	npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / PAGE_SIZE;
 
 	/*
 	 * Allocate memory for random pmap data structures.  Includes the
 	 * pv_head_table.
 	 */
-	s = (vm_size_t) (sizeof(pv_table_t) * npg);
+	s = (vm_size_t) (sizeof(struct pv_entry *) * npg);
 	s = round_page(s);
 	addr = (vm_offset_t) kmem_alloc(kernel_map, s);
-	pv_table = (pv_table_t *) addr;
-	for(i=0;i<npg;i++) {
-		pv_table[i].pv_list_count = 0;
-		TAILQ_INIT(&pv_table[i].pv_list);
-	}
-	TAILQ_INIT(&pv_freelist);
+	pv_table = (pv_entry_t *) addr;
 
 	/*
 	 * init the pv free list
 	 */
 	init_pv_entries(npg);
 	/*
 	 * Now it is safe to enable pv_table recording.
 	 */
 	pmap_initialized = TRUE;
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	For now, VM is already on, we only need to map the
  *	specified memory.
  */
 vm_offset_t
 pmap_map(virt, start, end, prot)
 	vm_offset_t virt;
 	vm_offset_t start;
 	vm_offset_t end;
 	int prot;
 {
 	while (start < end) {
 		pmap_enter(kernel_pmap, virt, start, prot, FALSE);
 		virt += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
 	return (virt);
 }
 
 
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
 
 #if defined(PMAP_DIAGNOSTIC)
 
 /*
  * This code checks for non-writeable/modified pages.
  * This should be an invalid condition.
  */
 static int
 pmap_nw_modified(pt_entry_t ptea) {
 	int pte;
 
 	pte = (int) ptea;
 
 	if ((pte & (PG_M|PG_RW)) == PG_M)
 		return 1;
 	else
 		return 0;
 }
 #endif
 
 
 /*
  * this routine defines the region(s) of memory that should
  * not be tested for the modified bit.
  */
-static PMAP_INLINE int
+static __inline int
 pmap_track_modified( vm_offset_t va) {
 	if ((va < clean_sva) || (va >= clean_eva)) 
 		return 1;
 	else
 		return 0;
 }
 
 /*
  * The below are finer grained pmap_update routines.  These eliminate
  * the gratuitious tlb flushes on non-i386 architectures.
  */
-static PMAP_INLINE void
+static __inline void
 pmap_update_1pg( vm_offset_t va) {
 #if defined(I386_CPU)
 	if (cpu_class == CPUCLASS_386)
 		pmap_update();
 	else
 #endif
 		__asm __volatile(".byte 0xf,0x1,0x38": :"a" (va));
 }
 
-static PMAP_INLINE void
+static __inline void
 pmap_update_2pg( vm_offset_t va1, vm_offset_t va2) {
 #if defined(I386_CPU)
 	if (cpu_class == CPUCLASS_386) {
 		pmap_update();
 	} else
 #endif
 	{
 		__asm __volatile(".byte 0xf,0x1,0x38": :"a" (va1));
 		__asm __volatile(".byte 0xf,0x1,0x38": :"a" (va2));
 	}
 }
 
 static __pure unsigned *
 get_ptbase(pmap)
 	pmap_t pmap;
 {
 	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 
 	/* are we current address space or kernel? */
 	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
 		return (unsigned *) PTmap;
 	}
 	/* otherwise, we are alternate address space */
 	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
 		APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
 		pmap_update();
 	}
 	return (unsigned *) APTmap;
 }
 
 /*
  *	Routine:	pmap_pte
  *	Function:
  *		Extract the page table entry associated
  *		with the given map/virtual_address pair.
  */
 
-unsigned * __pure
+__inline unsigned * __pure
 pmap_pte(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	if (pmap && *pmap_pde(pmap, va)) {
 		return get_ptbase(pmap) + i386_btop(va);
 	}
 	return (0);
 }
 
 /*
  * Super fast pmap_pte routine best used when scanning
  * the pv lists.  This eliminates many coarse-grained
  * pmap_update calls.
  */
-unsigned * __pure
+__inline unsigned * __pure
 pmap_pte_quick(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
-	unsigned pde, newpf;
+	unsigned pde;
 	if (pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) {
 		unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 		/* are we current address space or kernel? */
 		if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
 			return (unsigned *) PTmap + i386_btop(va);
 		}
-		newpf = pde & PG_FRAME;
-		if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
-			* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
-			pmap_update_1pg((vm_offset_t) PADDR1);
-		}
+		* (int *) PMAP1 = (pde & PG_FRAME) | PG_V | PG_RW;
+		pmap_update_1pg((vm_offset_t) PADDR1);
 		return PADDR1 + ((unsigned) i386_btop(va) & (NPTEPG - 1));
 	}
 	return (0);
 }
+	
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_offset_t __pure
 pmap_extract(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	if (pmap && *pmap_pde(pmap, va)) {
 		unsigned *pte;
 		pte = get_ptbase(pmap) + i386_btop(va);
 		return ((*pte & PG_FRAME) | (va & PAGE_MASK));
 	}
 	return 0;
 
 }
 
 /*
  * determine if a page is managed (memory vs. device)
  */
-static PMAP_INLINE __pure int
+static __inline __pure int
 pmap_is_managed(pa)
 	vm_offset_t pa;
 {
 	int i;
 
 	if (!pmap_initialized)
 		return 0;
 
 	for (i = 0; phys_avail[i + 1]; i += 2) {
 		if (pa < phys_avail[i + 1] && pa >= phys_avail[i])
 			return 1;
 	}
 	return 0;
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  */
 void
 pmap_qenter(va, m, count)
 	vm_offset_t va;
 	vm_page_t *m;
 	int count;
 {
 	int i;
 	register unsigned *pte;
 
 	for (i = 0; i < count; i++) {
 		vm_offset_t tva = va + i * PAGE_SIZE;
 		unsigned npte = VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V;
 		unsigned opte;
 		pte = (unsigned *)vtopte(tva);
 		opte = *pte;
 		*pte = npte;
 		if (opte)
 			pmap_update_1pg(tva);
 	}
 }
 /*
  * this routine jerks page mappings from the
  * kernel -- it is meant only for temporary mappings.
  */
 void
 pmap_qremove(va, count)
 	vm_offset_t va;
 	int count;
 {
 	int i;
 	register unsigned *pte;
 
 	for (i = 0; i < count; i++) {
 		pte = (unsigned *)vtopte(va);
 		*pte = 0;
 		pmap_update_1pg(va);
 		va += PAGE_SIZE;
 	}
 }
 
 /*
  * add a wired page to the kva
  * note that in order for the mapping to take effect -- you
  * should do a pmap_update after doing the pmap_kenter...
  */
-PMAP_INLINE void 
+__inline void 
 pmap_kenter(va, pa)
 	vm_offset_t va;
 	register vm_offset_t pa;
 {
 	register unsigned *pte;
 	unsigned npte, opte;
 
 	npte = pa | PG_RW | PG_V;
 	pte = (unsigned *)vtopte(va);
 	opte = *pte;
 	*pte = npte;
 	if (opte)
 		pmap_update_1pg(va);
 }
 
 /*
  * remove a page from the kernel pagetables
  */
-PMAP_INLINE void
+__inline void
 pmap_kremove(va)
 	vm_offset_t va;
 {
 	register unsigned *pte;
 
 	pte = (unsigned *)vtopte(va);
 	*pte = 0;
 	pmap_update_1pg(va);
 }
 
-static vm_page_t
-pmap_page_alloc(object, pindex)
-	vm_object_t object;
-	vm_pindex_t pindex;
+
+/***************************************************
+ * Page table page management routines.....
+ ***************************************************/
+
+/*
+ * This routine unholds page table pages, and if the hold count
+ * drops to zero, then it decrements the wire count.
+ */
+static __inline int
+pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
+	vm_page_unhold(m);
+	if (m->hold_count == 0) {
+		vm_offset_t pteva;
+		/*
+		 * unmap the page table page
+		 */
+		pmap->pm_pdir[m->pindex] = 0;
+		--pmap->pm_stats.resident_count;
+		/*
+		 * Do a pmap_update to make the invalidated mapping
+		 * take effect immediately.
+		 */
+		pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
+		pmap_update_1pg(pteva);
+		/*
+		 * If the page is finally unwired, simply free it.
+		 */
+		--m->wire_count;
+		if (m->wire_count == 0) {
+			vm_page_free_zero(m);
+			--cnt.v_wire_count;
+		}
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * After removing a page table entry, this routine is used to
+ * conditionally free the page, and manage the hold/wire counts.
+ */
+int
+pmap_unuse_pt(pmap, va, mpte)
+	pmap_t pmap;
+	vm_offset_t va;
+	vm_page_t mpte;
 {
-	vm_page_t m;
-	m = vm_page_alloc(object, pindex, VM_ALLOC_ZERO);
-	if (m == NULL) {
-		VM_WAIT;
+	if (va >= UPT_MIN_ADDRESS)
+		return 0;
+
+	if (mpte == NULL) {
+		vm_offset_t ptepa;
+		ptepa = ((vm_offset_t) *pmap_pde(pmap, va));
+#if defined(PMAP_DIAGNOSTIC)
+		if (!ptepa)
+			panic("pmap_unuse_pt: pagetable page missing, va: 0x%x", va);
+#endif
+		if (!ptepa)
+			return 0;
+		mpte = PHYS_TO_VM_PAGE(ptepa);
 	}
-	return m;
+
+#if defined(PMAP_DIAGNOSTIC)
+	if (mpte->pindex != (va >> PDRSHIFT))
+		panic("pmap_unuse_pt: pindex(0x%x) != va(0x%x)",
+			mpte->pindex, (va >> PDRSHIFT));
+
+	if (mpte->hold_count == 0) {
+		panic("pmap_unuse_pt: hold count < 0, va: 0x%x", va);
+	}
+#endif
+
+	return pmap_unwire_pte_hold(pmap, mpte);
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 void
 pmap_pinit(pmap)
 	register struct pmap *pmap;
 {
 	vm_page_t ptdpg;
 	/*
 	 * No need to allocate page table space yet but we do need a valid
 	 * page directory table.
 	 */
 
 	if (pdstackptr > 0) {
 		--pdstackptr;
 		pmap->pm_pdir = (pd_entry_t *)pdstack[pdstackptr];
 	} else {
 		pmap->pm_pdir =
 			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 	}
 
 	/*
 	 * allocate object for the ptes
 	 */
 	pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
 
 	/*
 	 * allocate the page directory page
 	 */
 retry:
-	ptdpg = pmap_page_alloc( pmap->pm_pteobj, PTDPTDI);
-	if (ptdpg == NULL)
+	ptdpg = vm_page_alloc( pmap->pm_pteobj, PTDPTDI, VM_ALLOC_ZERO);
+	if (ptdpg == NULL) {
+		VM_WAIT;
 		goto retry;
-
-	ptdpg->wire_count = 1;
-	++cnt.v_wire_count;
+	}
+	vm_page_wire(ptdpg);
 	ptdpg->flags &= ~(PG_MAPPED|PG_BUSY);	/* not mapped normally */
 	ptdpg->valid = VM_PAGE_BITS_ALL;
 
 	pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
 	if ((ptdpg->flags & PG_ZERO) == 0)
 		bzero(pmap->pm_pdir, PAGE_SIZE);
 
 	/* wire in kernel global address entries */
 	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
 
 	/* install self-referential address mapping entry */
 	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
 		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW;
 
 	pmap->pm_count = 1;
-	TAILQ_INIT(&pmap->pm_pvlist.pv_list);
 }
 
 static int
 pmap_release_free_page(pmap, p)
 	struct pmap *pmap;
 	vm_page_t p;
 {
 	int s;
 	unsigned *pde = (unsigned *) pmap->pm_pdir;
 	/*
 	 * This code optimizes the case of freeing non-busy
 	 * page-table pages.  Those pages are zero now, and
 	 * might as well be placed directly into the zero queue.
 	 */
 	s = splvm();
 	if (p->flags & PG_BUSY) {
 		p->flags |= PG_WANTED;
 		tsleep(p, PVM, "pmaprl", 0);
 		splx(s);
 		return 0;
 	}
 
 	/*
 	 * Remove the page table page from the processes address space.
 	 */
 	pde[p->pindex] = 0;
 	--pmap->pm_stats.resident_count;
 
 	if (p->hold_count)  {
+		int *kvap;
+		int i;
+#if defined(PMAP_DIAGNOSTIC)
 		panic("pmap_release: freeing held page table page");
+#else
+		printf("pmap_release: freeing held page table page:\n");
+#endif
+		kvap = (int *)vm_pager_map_page(p);
+		for(i=0;i<NPTEPG;i++) {
+			if (kvap[i]) {
+				printf("pte: 0x%x, index: %d\n", kvap[i],i);
+			}
+		}
+		vm_pager_unmap_page((vm_offset_t)kvap);
+
+		/*
+		 * HACK ALERT!!!
+		 * If this failure happens, we must clear the page, because
+		 * there is likely a mapping still valid.  This condition
+		 * is an error, but at least this zero operation will mitigate
+		 * some Sig-11's or crashes, because this page is thought
+		 * to be zero.  This is a robustness fix, and not meant to
+		 * be a long term work-around.
+		 */
+		pmap_zero_page(VM_PAGE_TO_PHYS(p));
 	}
 	/*
 	 * Page directory pages need to have the kernel
 	 * stuff cleared, so they can go into the zero queue also.
 	 */
 	if (p->pindex == PTDPTDI) {
 		bzero(pde + KPTDI, nkpt * PTESIZE);
 		pde[APTDPTDI] = 0;
 		pmap_kremove((vm_offset_t) pmap->pm_pdir);
 	}
 
 	vm_page_free_zero(p);
 	splx(s);
 	return 1;
 }
 
 /*
  * this routine is called if the page table page is not
  * mapped correctly.
  */
 static vm_page_t
 _pmap_allocpte(pmap, ptepindex)
 	pmap_t	pmap;
 	int ptepindex;
 {
-	vm_offset_t ptepa;
+	vm_offset_t pteva, ptepa;
 	vm_page_t m;
 
 	/*
 	 * Find or fabricate a new pagetable page
 	 */
 retry:
 	m = vm_page_lookup(pmap->pm_pteobj, ptepindex);
 	if (m == NULL) {
-		m = pmap_page_alloc(pmap->pm_pteobj, ptepindex);
-		if (m == NULL)
+		m = vm_page_alloc(pmap->pm_pteobj, ptepindex, VM_ALLOC_ZERO);
+		if (m == NULL) {
+			VM_WAIT;
 			goto retry;
+		}
 		if ((m->flags & PG_ZERO) == 0)
 			pmap_zero_page(VM_PAGE_TO_PHYS(m));
 		m->flags &= ~(PG_ZERO|PG_BUSY);
 		m->valid = VM_PAGE_BITS_ALL;
 	} else {
 		if ((m->flags & PG_BUSY) || m->busy) {
 			m->flags |= PG_WANTED;
 			tsleep(m, PVM, "ptewai", 0);
 			goto retry;
 		}
 	}
 
+	/*
+	 * mark the object writeable
+	 */
+	pmap->pm_pteobj->flags |= OBJ_WRITEABLE;
+
 	if (m->queue != PQ_NONE) {
 		int s = splvm();
-		vm_page_unqueue(m,1);
+		vm_page_unqueue(m);
 		splx(s);
 	}
 
-	if (m->wire_count == 0)
-		++cnt.v_wire_count;
-	++m->wire_count;
-
+	if (m->hold_count == 0) {
+		if (m->wire_count == 0)
+			++cnt.v_wire_count;
+		++m->wire_count;
+	}
 	/*
 	 * Increment the hold count for the page table page
 	 * (denoting a new mapping.)
 	 */
 	++m->hold_count;
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	pmap->pm_stats.resident_count++;
 
 	ptepa = VM_PAGE_TO_PHYS(m);
 	pmap->pm_pdir[ptepindex] = (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V);
 
+	pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
+	pmap_update_1pg(pteva);
 	m->flags |= PG_MAPPED;
 
 	return m;
 }
 
-PMAP_INLINE static vm_page_t
+static __inline vm_page_t
 pmap_allocpte(pmap, va)
 	pmap_t	pmap;
 	vm_offset_t va;
 {
 	int ptepindex;
 	vm_offset_t ptepa;
 	vm_page_t m;
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = va >> PDRSHIFT;
 
 	/*
 	 * Get the page directory entry
 	 */
 	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 
 	/*
 	 * If the page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (ptepa) {
-		m = vm_page_lookup( pmap->pm_pteobj, ptepindex);
+		m = PHYS_TO_VM_PAGE(ptepa);
 		++m->hold_count;
 		return m;
 	}
 	/*
 	 * Here if the pte page isn't mapped, or if it has been deallocated.
 	 */
 	return _pmap_allocpte(pmap, ptepindex);
 }
 
 
 /***************************************************
 * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap)
 	register struct pmap *pmap;
 {
 	vm_page_t p,n,ptdpg;
 	vm_object_t object = pmap->pm_pteobj;
 
 	if (object->ref_count != 1)
 		panic("pmap_release: pteobj reference count != 1");
 	
 	ptdpg = NULL;
 retry:
 	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
 		n = TAILQ_NEXT(p, listq);
 		if (p->pindex == PTDPTDI) {
 			ptdpg = p;
 			continue;
 		}
 		if (!pmap_release_free_page(pmap, p))
 			goto retry;
 	}
 	if (ptdpg == NULL)
 		panic("pmap_release: missing page table directory page");
 
 	if (!pmap_release_free_page(pmap, ptdpg))
 		goto retry;
 		
 	vm_object_deallocate(object);
 	if (pdstackptr < PDSTACKMAX) {
 		pdstack[pdstackptr] = (vm_offset_t) pmap->pm_pdir;
 		++pdstackptr;
 	} else {
 		kmem_free(kernel_map, (vm_offset_t) pmap->pm_pdir, PAGE_SIZE);
 	}
 }
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	struct proc *p;
 	struct pmap *pmap;
 	int s;
 
 	s = splhigh();
 	if (kernel_vm_end == 0) {
 		kernel_vm_end = KERNBASE;
 		nkpt = 0;
 		while (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			++nkpt;
 		}
 	}
 	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 	while (kernel_vm_end < addr) {
 		if (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			continue;
 		}
 		++nkpt;
 		if (!nkpg) {
 			nkpg = vm_page_alloc(kernel_object, 0, VM_ALLOC_SYSTEM);
 			if (!nkpg)
 				panic("pmap_growkernel: no memory to grow kernel");
 			vm_page_wire(nkpg);
 			vm_page_remove(nkpg);
 			pmap_zero_page(VM_PAGE_TO_PHYS(nkpg));
 		}
 		pdir_pde(PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_RW);
 		nkpg = NULL;
 
 		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
 			if (p->p_vmspace) {
 				pmap = &p->p_vmspace->vm_pmap;
 				*pmap_pde(pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end);
 			}
 		}
 		*pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end);
 		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 	}
 	splx(s);
 }
 
 /*
  *	Retire the given physical map from service.
  *	Should only be called if the map contains
  *	no valid mappings.
  */
 void
 pmap_destroy(pmap)
 	register pmap_t pmap;
 {
 	int count;
 
 	if (pmap == NULL)
 		return;
 
 	count = --pmap->pm_count;
 	if (count == 0) {
 		pmap_release(pmap);
 		free((caddr_t) pmap, M_VMPMAP);
 	}
 }
 
 /*
  *	Add a reference to the specified pmap.
  */
 void
 pmap_reference(pmap)
 	pmap_t pmap;
 {
 	if (pmap != NULL) {
 		pmap->pm_count++;
 	}
 }
 
 /***************************************************
 * page management routines.
  ***************************************************/
 
 /*
  * free the pv_entry back to the free list
  */
-static PMAP_INLINE void
+static __inline void
 free_pv_entry(pv)
 	pv_entry_t pv;
 {
 	++pv_freelistcnt;
-	TAILQ_INSERT_HEAD(&pv_freelist, pv, pv_list);
+	pv->pv_next = pv_freelist;
+	pv_freelist = pv;
 }
 
 /*
  * get a new pv_entry, allocating a block from the system
  * when needed.
  * the memory allocation is performed bypassing the malloc code
  * because of the possibility of allocations at interrupt time.
  */
-static PMAP_INLINE pv_entry_t
+static __inline pv_entry_t
 get_pv_entry()
 {
 	pv_entry_t tmp;
 
 	/*
 	 * get more pv_entry pages if needed
 	 */
-	if (pv_freelistcnt < PV_FREELIST_MIN) {
+	if (pv_freelistcnt < PV_FREELIST_MIN || pv_freelist == 0) {
 		pmap_alloc_pv_entry();
 	}
-
 	/*
 	 * get a pv_entry off of the free list
 	 */
 	--pv_freelistcnt;
-	tmp = TAILQ_FIRST(&pv_freelist);
-	TAILQ_REMOVE(&pv_freelist, tmp, pv_list);
+	tmp = pv_freelist;
+	pv_freelist = tmp->pv_next;
 	return tmp;
 }
 
 /*
  * This *strange* allocation routine eliminates the possibility of a malloc
  * failure (*FATAL*) for a pv_entry_t data structure.
  * also -- this code is MUCH MUCH faster than the malloc equiv...
  * We really need to do the slab allocator thingie here.
  */
 static void
 pmap_alloc_pv_entry()
 {
 	/*
 	 * do we have any pre-allocated map-pages left?
 	 */
 	if (npvvapg) {
 		vm_page_t m;
 
 		/*
 		 * allocate a physical page out of the vm system
 		 */
 		m = vm_page_alloc(kernel_object,
 		    OFF_TO_IDX(pvva - vm_map_min(kernel_map)),
 		    VM_ALLOC_INTERRUPT);
 		if (m) {
 			int newentries;
 			int i;
 			pv_entry_t entry;
 
 			newentries = (PAGE_SIZE / sizeof(struct pv_entry));
 			/*
 			 * wire the page
 			 */
 			vm_page_wire(m);
 			m->flags &= ~PG_BUSY;
 			/*
 			 * let the kernel see it
 			 */
 			pmap_kenter(pvva, VM_PAGE_TO_PHYS(m));
 
 			entry = (pv_entry_t) pvva;
 			/*
 			 * update the allocation pointers
 			 */
 			pvva += PAGE_SIZE;
 			--npvvapg;
 
 			/*
 			 * free the entries into the free list
 			 */
 			for (i = 0; i < newentries; i++) {
 				free_pv_entry(entry);
 				entry++;
 			}
 		}
 	}
-	if (TAILQ_FIRST(&pv_freelist) == NULL)
+	if (!pv_freelist)
 		panic("get_pv_entry: cannot get a pv_entry_t");
 }
 
 /*
  * init the pv_entry allocation system
  */
 #define PVSPERPAGE 64
 void
 init_pv_entries(npg)
 	int npg;
 {
 	/*
 	 * allocate enough kvm space for PVSPERPAGE entries per page (lots)
 	 * kvm space is fairly cheap, be generous!!!  (the system can panic if
 	 * this is too small.)
 	 */
 	npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry)
 		+ PAGE_SIZE - 1) / PAGE_SIZE;
 	pvva = kmem_alloc_pageable(kernel_map, npvvapg * PAGE_SIZE);
 	/*
 	 * get the first batch of entries
 	 */
 	pmap_alloc_pv_entry();
 }
 
 /*
- * This routine unholds page table pages, and if the hold count
- * drops to zero, then it decrements the wire count.
+ * If it is the first entry on the list, it is actually
+ * in the header and we must copy the following entry up
+ * to the header.  Otherwise we must search the list for
+ * the entry.  In either case we free the now unused entry.
  */
-static int
-pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
-	vm_page_unhold(m);
-	if (m->hold_count == 0) {
-		vm_offset_t pteva;
-		/*
-		 * unmap the page table page
-		 */
-		pmap->pm_pdir[m->pindex] = 0;
-		--pmap->pm_stats.resident_count;
-		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
-			(((unsigned) PTDpde) & PG_FRAME)) {
-			/*
-			 * Do a pmap_update to make the invalidated mapping
-			 * take effect immediately.
-			 */
-			pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
-			pmap_update_1pg(pteva);
+static __inline int
+pmap_remove_entry(pmap, ppv, va)
+	struct pmap *pmap;
+	pv_entry_t *ppv;
+	vm_offset_t va;
+{
+	pv_entry_t npv;
+	int s;
+
+	s = splvm();
+	for (npv = *ppv; npv; (ppv = &npv->pv_next, npv = *ppv)) {
+		if (pmap == npv->pv_pmap && va == npv->pv_va) {
+			int rtval = pmap_unuse_pt(pmap, va, npv->pv_ptem);
+			*ppv = npv->pv_next;
+			free_pv_entry(npv);
+			splx(s);
+			return rtval;
 		}
-		/*
-		 * If the page is finally unwired, simply free it.
-		 */
-		--m->wire_count;
-		if (m->wire_count == 0) {
-			vm_page_free_zero(m);
-			--cnt.v_wire_count;
-		}
-		return 1;
 	}
+	splx(s);
 	return 0;
 }
 
 /*
- * After removing a page table entry, this routine is used to
- * conditionally free the page, and manage the hold/wire counts.
+ * Create a pv entry for page at pa for
+ * (pmap, va).
  */
-PMAP_INLINE int
-pmap_unuse_pt(pmap, va, mpte)
+static __inline void
+pmap_insert_entry(pmap, va, mpte, pa)
 	pmap_t pmap;
 	vm_offset_t va;
 	vm_page_t mpte;
+	vm_offset_t pa;
 {
-	int ptepindex;
-	if (va >= UPT_MIN_ADDRESS)
-		return 0;
 
-	if (mpte == NULL) {
-		ptepindex = (va >> PDRSHIFT);
-		mpte = vm_page_lookup( pmap->pm_pteobj, ptepindex);
-	}
-
-	return pmap_unwire_pte_hold(pmap, mpte);
-}
-
-/*
- * If it is the first entry on the list, it is actually
- * in the header and we must copy the following entry up
- * to the header.  Otherwise we must search the list for
- * the entry.  In either case we free the now unused entry.
- */
-static int
-pmap_remove_entry(pmap, ppv, va)
-	struct pmap *pmap;
-	pv_table_t *ppv;
-	vm_offset_t va;
-{
-	pv_entry_t pv;
-	int rtval;
 	int s;
+	pv_entry_t *ppv, pv;
 
 	s = splvm();
-	if (ppv->pv_list_count < pmap->pm_stats.resident_count) {
-		for (pv = TAILQ_FIRST(&ppv->pv_list);
-			pv;
-			pv = TAILQ_NEXT(pv, pv_list)) {
-			if (pmap == pv->pv_pmap && va == pv->pv_va) 
-				break;
-		}
-	} else {
-		for (pv = TAILQ_FIRST(&pmap->pm_pvlist.pv_list);
-			pv;
-			pv = TAILQ_NEXT(pv, pv_plist)) {
-			if (va == pv->pv_va) 
-				break;
-		}
-	}
+	pv = get_pv_entry();
+	pv->pv_va = va;
+	pv->pv_pmap = pmap;
+	pv->pv_ptem = mpte;
 
-	rtval = 0;
-	if (pv) {
-		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
-		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
-		--ppv->pv_list_count;
-		TAILQ_REMOVE(&pmap->pm_pvlist.pv_list, pv, pv_plist);
-		free_pv_entry(pv);
-	}
-			
+	ppv = pa_to_pvh(pa);
+	if (*ppv)
+		pv->pv_next = *ppv;
+	else
+		pv->pv_next = NULL;
+	*ppv = pv;
 	splx(s);
-	return rtval;
 }
 
 /*
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
 pmap_remove_pte(pmap, ptq, va)
 	struct pmap *pmap;
 	unsigned *ptq;
 	vm_offset_t va;
 {
 	unsigned oldpte;
+	pv_entry_t *ppv;
 
 	oldpte = *ptq;
 	*ptq = 0;
 	if (oldpte & PG_W)
 		pmap->pm_stats.wired_count -= 1;
 	pmap->pm_stats.resident_count -= 1;
 	if (oldpte & PG_MANAGED) {
 		if (oldpte & PG_M) {
 #if defined(PMAP_DIAGNOSTIC)
 			if (pmap_nw_modified((pt_entry_t) oldpte)) {
 				printf("pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, (int) oldpte);
 			}
 #endif
 			if (pmap_track_modified(va))
 				PHYS_TO_VM_PAGE(oldpte)->dirty = VM_PAGE_BITS_ALL;
 		}
-		return pmap_remove_entry(pmap, pa_to_pvh(oldpte), va);
+		ppv = pa_to_pvh(oldpte);
+		return pmap_remove_entry(pmap, ppv, va);
 	} else {
 		return pmap_unuse_pt(pmap, va, NULL);
 	}
 
 	return 0;
 }
 
 /*
  * Remove a single page from a process address space
  */
 static void
 pmap_remove_page(pmap, va)
 	struct pmap *pmap;
 	register vm_offset_t va;
 {
 	register unsigned *ptq;
 
 	/*
 	 * if there is no pte for this address, just skip it!!!
 	 */
 	if (*pmap_pde(pmap, va) == 0) {
 		return;
 	}
 
 	/*
 	 * get a local va for mappings for this pmap.
 	 */
 	ptq = get_ptbase(pmap) + i386_btop(va);
 	if (*ptq) {
 		(void) pmap_remove_pte(pmap, ptq, va);
 		pmap_update_1pg(va);
 	}
 	return;
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap, sva, eva)
 	struct pmap *pmap;
 	register vm_offset_t sva;
 	register vm_offset_t eva;
 {
 	register unsigned *ptbase;
 	vm_offset_t pdnxt;
 	vm_offset_t ptpaddr;
 	vm_offset_t sindex, eindex;
 	vm_page_t mpte;
 	int anyvalid;
+	vm_offset_t vachanged[VATRACK];
 
 	if (pmap == NULL)
 		return;
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if ((sva + PAGE_SIZE) == eva) {
 		pmap_remove_page(pmap, sva);
 		return;
 	}
 
 	anyvalid = 0;
 
 	/*
 	 * Get a local virtual address for the mappings that are being
 	 * worked with.
 	 */
 	ptbase = get_ptbase(pmap);
 
 	sindex = i386_btop(sva);
 	eindex = i386_btop(eva);
-	mpte = NULL;
 
 	for (; sindex < eindex; sindex = pdnxt) {
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 		ptpaddr = (vm_offset_t) *pmap_pde(pmap, i386_ptob(sindex));
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
+		if (sindex < i386_btop(UPT_MIN_ADDRESS)) {
 		/*
+		 * get the vm_page_t for the page table page
+		 */
+			mpte = PHYS_TO_VM_PAGE(ptpaddr);
+
+		/*
+		 * if the pte isn't wired, just skip it.
+		 */
+			if (mpte->wire_count == 0)
+				continue;
+		}
+
+		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (pdnxt > eindex) {
 			pdnxt = eindex;
 		}
 
 		for ( ;sindex != pdnxt; sindex++) {
 			vm_offset_t va;
 			if (ptbase[sindex] == 0) {
 				continue;
 			}
 			va = i386_ptob(sindex);
 			
+			if (anyvalid < VATRACK)
+				vachanged[anyvalid] = va;
 			anyvalid++;
 			if (pmap_remove_pte(pmap,
 				ptbase + sindex, va))
 				break;
 		}
 	}
 
 	if (anyvalid) {
-		pmap_update();
+		if (anyvalid <= VATRACK) {
+			int i;
+			for(i=0;i<anyvalid;i++)
+				pmap_update_1pg(vachanged[i]);
+		} else {
+			pmap_update();
+		}
 	}
+			
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
-static int
+static void
 pmap_remove_all(pa)
 	vm_offset_t pa;
 {
-	register pv_entry_t pv, npv;
-	pv_table_t *ppv;
+	register pv_entry_t pv, *ppv, npv;
 	register unsigned *pte, tpte;
 	vm_page_t m;
-	int nmodify;
 	int s;
 
-	nmodify = 0;
 #if defined(PMAP_DIAGNOSTIC)
 	/*
 	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
 	 * pages!
 	 */
 	if (!pmap_is_managed(pa)) {
 		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", pa);
 	}
 #endif
 
 	s = splvm();
 	m = NULL;
 	ppv = pa_to_pvh(pa);
-	for (pv = TAILQ_FIRST(&ppv->pv_list);
-		pv;
-		pv = npv) {
+	for (pv = *ppv; pv; pv=pv->pv_next) {
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 		if (tpte = *pte) {
 			pv->pv_pmap->pm_stats.resident_count--;
 			*pte = 0;
 			if (tpte & PG_W)
 				pv->pv_pmap->pm_stats.wired_count--;
 			/*
 			 * Update the vm_page_t clean and reference bits.
 			 */
-			if ((tpte & (PG_M|PG_MANAGED)) == (PG_M|PG_MANAGED)) {
+			if (tpte & PG_M) {
 #if defined(PMAP_DIAGNOSTIC)
 				if (pmap_nw_modified((pt_entry_t) tpte)) {
 					printf("pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", pv->pv_va, tpte);
 				}
 #endif
-				if (pmap_track_modified(pv->pv_va))
-					nmodify += 1;
+				if (pmap_track_modified(pv->pv_va)) {
+					if (m == NULL)
+						m = PHYS_TO_VM_PAGE(pa);
+					m->dirty = VM_PAGE_BITS_ALL;
+				}
 			}
 		}
-		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist.pv_list, pv, pv_plist);
+	}
 
-		npv = TAILQ_NEXT(pv, pv_list);
-		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
-		--ppv->pv_list_count;
+	for (pv = *ppv; pv; pv = npv) {
+		npv = pv->pv_next;
 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 		free_pv_entry(pv);
 	}
-
+	*ppv = NULL;
 	splx(s);
-	return nmodify;
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap, sva, eva, prot)
 	register pmap_t pmap;
 	vm_offset_t sva, eva;
 	vm_prot_t prot;
 {
 	register unsigned *ptbase;
 	vm_offset_t pdnxt;
 	vm_offset_t ptpaddr;
 	vm_offset_t sindex, eindex;
 	vm_page_t mpte;
 	int anyvalid;
 
 
 	if (pmap == NULL)
 		return;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 	if (prot & VM_PROT_WRITE)
 		return;
 
 	anyvalid = 0;
 
 	ptbase = get_ptbase(pmap);
 
 	sindex = i386_btop(sva);
 	eindex = i386_btop(eva);
 
-	mpte = NULL;
 	for (; sindex < eindex; sindex = pdnxt) {
 
 		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 		ptpaddr = (vm_offset_t) *pmap_pde(pmap, i386_ptob(sindex));
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
+		/*
+		 * Skip page ranges, where the page table page isn't wired.
+		 * If the page table page is not wired, there are no page mappings
+		 * there.
+		 */
+		if (sindex < i386_btop(UPT_MIN_ADDRESS)) {
+			mpte = PHYS_TO_VM_PAGE(ptpaddr);
+
+			if (mpte->wire_count == 0)
+				continue;
+		}
+
 		if (pdnxt > eindex) {
 			pdnxt = eindex;
 		}
 
 		for (; sindex != pdnxt; sindex++) {
 
 			unsigned pbits = ptbase[sindex];
 
-			if ((pbits & (PG_RW|PG_V)) == (PG_RW|PG_V)) {
-				if ((pbits & (PG_M|PG_MANAGED)) == (PG_M|PG_MANAGED)) {
+			if (pbits & PG_RW) {
+				if (pbits & PG_M) {
 					vm_offset_t sva = i386_ptob(sindex);
 					if (pmap_track_modified(sva)) {
 						vm_page_t m = PHYS_TO_VM_PAGE(pbits);
 						m->dirty = VM_PAGE_BITS_ALL;
 					}
 				}
 				ptbase[sindex] = pbits & ~(PG_M|PG_RW);
 				anyvalid = 1;
 			}
 		}
 	}
 	if (anyvalid)
 		pmap_update();
 }
 
 /*
- * Create a pv entry for page at pa for
- * (pmap, va).
- */
-static void
-pmap_insert_entry(pmap, va, mpte, pa)
-	pmap_t pmap;
-	vm_offset_t va;
-	vm_page_t mpte;
-	vm_offset_t pa;
-{
-
-	int s;
-	pv_entry_t pv;
-	pv_table_t *ppv;
-
-	s = splvm();
-	pv = get_pv_entry();
-	pv->pv_va = va;
-	pv->pv_pmap = pmap;
-	pv->pv_ptem = mpte;
-
-	TAILQ_INSERT_TAIL(&pmap->pm_pvlist.pv_list, pv, pv_plist);
-
-	ppv = pa_to_pvh(pa);
-	TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list);
-	++ppv->pv_list_count;
-
-	splx(s);
-}
-
-/*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 void
 pmap_enter(pmap, va, pa, prot, wired)
 	register pmap_t pmap;
 	vm_offset_t va;
 	register vm_offset_t pa;
 	vm_prot_t prot;
 	boolean_t wired;
 {
 	register unsigned *pte;
 	vm_offset_t opa;
 	vm_offset_t origpte, newpte;
 	vm_page_t mpte;
 
 	if (pmap == NULL)
 		return;
 
 	va &= PG_FRAME;
 #ifdef PMAP_DIAGNOSTIC
 	if (va > VM_MAX_KERNEL_ADDRESS)
 		panic("pmap_enter: toobig");
 	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
 		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
 #endif
 
 	mpte = NULL;
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < UPT_MIN_ADDRESS)
 		mpte = pmap_allocpte(pmap, va);
 
-	pte = pmap_pte(pmap, va);
+	pte = pmap_pte_quick(pmap, va);
 	/*
 	 * Page Directory table entry not valid, we need a new PT page
 	 */
 	if (pte == NULL) {
 		panic("pmap_enter: invalid page directory, pdir=%p, va=0x%lx\n",
 			pmap->pm_pdir[PTDPTDI], va);
 	}
 
 	origpte = *(vm_offset_t *)pte;
 	pa &= PG_FRAME;
 	opa = origpte & PG_FRAME;
 
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
-	if (origpte && (opa == pa)) {
+	if (opa == pa) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if (wired && ((origpte & PG_W) == 0))
 			pmap->pm_stats.wired_count++;
 		else if (!wired && (origpte & PG_W))
 			pmap->pm_stats.wired_count--;
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (pmap_nw_modified((pt_entry_t) origpte)) {
 			printf("pmap_enter: modified page not writable: va: 0x%lx, pte: 0x%lx\n", va, origpte);
 		}
 #endif
 
 		/*
 		 * We might be turning off write access to the page,
 		 * so we go ahead and sense modify status.
 		 */
 		if (origpte & PG_MANAGED) {
-			if ((origpte & PG_M) && pmap_track_modified(va)) {
-				vm_page_t m;
-				m = PHYS_TO_VM_PAGE(pa);
-				m->dirty = VM_PAGE_BITS_ALL;
+			vm_page_t m;
+			if (origpte & PG_M) {
+				if (pmap_track_modified(va)) {
+					m = PHYS_TO_VM_PAGE(pa);
+					m->dirty = VM_PAGE_BITS_ALL;
+				}
 			}
 			pa |= PG_MANAGED;
 		}
 
 		if (mpte)
 			--mpte->hold_count;
 
 		goto validate;
 	} 
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
-	if (origpte) {
+	if (opa) {
 		int err;
 		err = pmap_remove_pte(pmap, pte, va);
 		if (err)
 			panic("pmap_enter: pte vanished, va: 0x%x", va);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if (pmap_is_managed(pa)) {
 		pmap_insert_entry(pmap, va, mpte, pa);
 		pa |= PG_MANAGED;
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 	if (wired)
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
 
 	if (wired)
 		newpte |= PG_W;
 	if (va < UPT_MIN_ADDRESS)
 		newpte |= PG_U;
 
 	/*
 	 * if the mapping or permission bits are different, we need
 	 * to update the pte.
 	 */
 	if ((origpte & ~(PG_M|PG_A)) != newpte) {
 		*pte = newpte;
 		if (origpte)
 			pmap_update_1pg(va);
 	}
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * 5. Tlbflush is deferred to calling procedure.
  * 6. Page IS managed.
  * but is *MUCH* faster than pmap_enter...
  */
 
-static vm_page_t 
-pmap_enter_quick(pmap, va, pa, mpte)
+static void
+pmap_enter_quick(pmap, va, pa)
 	register pmap_t pmap;
 	vm_offset_t va;
 	register vm_offset_t pa;
-	vm_page_t mpte;
 {
 	register unsigned *pte;
+	vm_page_t mpte;
 
+	mpte = NULL;
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
-	if (va < UPT_MIN_ADDRESS) {
-		int ptepindex;
-		vm_offset_t ptepa;
+	if (va < UPT_MIN_ADDRESS)
+		mpte = pmap_allocpte(pmap, va);
 
-		/*
-		 * Calculate pagetable page index
-		 */
-		ptepindex = va >> PDRSHIFT;
-		if (mpte && (mpte->pindex == ptepindex)) {
-			++mpte->hold_count;
-		} else {
-			/*
-			 * Get the page directory entry
-			 */
-			ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
-
-			/*
-			 * If the page table page is mapped, we just increment
-			 * the hold count, and activate it.
-			 */
-			if (ptepa) {
-				mpte = vm_page_lookup( pmap->pm_pteobj, ptepindex);
-				++mpte->hold_count;
-			} else {
-				mpte = _pmap_allocpte(pmap, ptepindex);
-			}
-		}
-	} else {
-		mpte = NULL;
-	}
-
 	/*
 	 * This call to vtopte makes the assumption that we are
 	 * entering the page into the current pmap.  In order to support
 	 * quick entry into any pmap, one would likely use pmap_pte_quick.
 	 * But that isn't as quick as vtopte.
 	 */
 	pte = (unsigned *)vtopte(va);
 	if (*pte) {
 		if (mpte)
 			pmap_unwire_pte_hold(pmap, mpte);
-		return NULL;
+		return;
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	pmap_insert_entry(pmap, va, mpte, pa);
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	*pte = pa | PG_V | PG_U | PG_MANAGED;
 
-	return mpte;
+	return;
 }
 
 #define MAX_INIT_PT (96)
 /*
  * pmap_object_init_pt preloads the ptes for a given object
  * into the specified pmap.  This eliminates the blast of soft
  * faults on process startup and immediately after an mmap.
  */
 void
 pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
 	pmap_t pmap;
 	vm_offset_t addr;
 	vm_object_t object;
 	vm_pindex_t pindex;
 	vm_size_t size;
 	int limit;
 {
 	vm_offset_t tmpidx;
 	int psize;
-	vm_page_t p, mpte;
+	vm_page_t p;
 	int objpgs;
 
 	psize = i386_btop(size);
 
 	if (!pmap || (object->type != OBJT_VNODE) ||
 		(limit && (psize > MAX_INIT_PT) &&
 			(object->resident_page_count > MAX_INIT_PT))) {
 		return;
 	}
 
 	if (psize + pindex > object->size)
 		psize = object->size - pindex;
 
-	mpte = NULL;
 	/*
 	 * if we are processing a major portion of the object, then scan the
 	 * entire thing.
 	 */
 	if (psize > (object->size >> 2)) {
 		objpgs = psize;
 
 		for (p = TAILQ_FIRST(&object->memq);
 		    ((objpgs > 0) && (p != NULL));
 		    p = TAILQ_NEXT(p, listq)) {
 
 			tmpidx = p->pindex;
 			if (tmpidx < pindex) {
 				continue;
 			}
 			tmpidx -= pindex;
 			if (tmpidx >= psize) {
 				continue;
 			}
 			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			    (p->busy == 0) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 				if (p->queue == PQ_CACHE)
 					vm_page_deactivate(p);
 				p->flags |= PG_BUSY;
-				mpte = pmap_enter_quick(pmap, 
+				pmap_enter_quick(pmap, 
 					addr + i386_ptob(tmpidx),
-					VM_PAGE_TO_PHYS(p), mpte);
+					VM_PAGE_TO_PHYS(p));
 				p->flags |= PG_MAPPED;
 				PAGE_WAKEUP(p);
 			}
 			objpgs -= 1;
 		}
 	} else {
 		/*
 		 * else lookup the pages one-by-one.
 		 */
 		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
 			p = vm_page_lookup(object, tmpidx + pindex);
 			if (p &&
 			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			    (p->busy == 0) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 				if (p->queue == PQ_CACHE)
 					vm_page_deactivate(p);
 				p->flags |= PG_BUSY;
-				mpte = pmap_enter_quick(pmap, 
+				pmap_enter_quick(pmap, 
 					addr + i386_ptob(tmpidx),
-					VM_PAGE_TO_PHYS(p), mpte);
+					VM_PAGE_TO_PHYS(p));
 				p->flags |= PG_MAPPED;
 				PAGE_WAKEUP(p);
 			}
 		}
 	}
 	return;
 }
 
 /*
  * pmap_prefault provides a quick way of clustering
  * pagefaults into a processes address space.  It is a "cousin"
  * of pmap_object_init_pt, except it runs at page fault time instead
  * of mmap time.
  */
 #define PFBAK 2
 #define PFFOR 2
 #define PAGEORDER_SIZE (PFBAK+PFFOR)
 
 static int pmap_prefault_pageorder[] = {
 	-PAGE_SIZE, PAGE_SIZE, -2 * PAGE_SIZE, 2 * PAGE_SIZE
 };
 
 void
 pmap_prefault(pmap, addra, entry, object)
 	pmap_t pmap;
 	vm_offset_t addra;
 	vm_map_entry_t entry;
 	vm_object_t object;
 {
 	int i;
 	vm_offset_t starta;
 	vm_offset_t addr;
 	vm_pindex_t pindex;
-	vm_page_t m, mpte;
+	vm_page_t m;
 
 	if (entry->object.vm_object != object)
 		return;
 
 	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap))
 		return;
 
 	starta = addra - PFBAK * PAGE_SIZE;
 	if (starta < entry->start) {
 		starta = entry->start;
 	} else if (starta > addra) {
 		starta = 0;
 	}
 
-	mpte = NULL;
 	for (i = 0; i < PAGEORDER_SIZE; i++) {
 		vm_object_t lobject;
 		unsigned *pte;
 
 		addr = addra + pmap_prefault_pageorder[i];
 		if (addr < starta || addr >= entry->end)
 			continue;
 
 		if ((*pmap_pde(pmap, addr)) == NULL) 
 			continue;
 
 		pte = (unsigned *) vtopte(addr);
 		if (*pte)
 			continue;
 
 		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
 		lobject = object;
 		for (m = vm_page_lookup(lobject, pindex);
 		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
 		    lobject = lobject->backing_object) {
 			if (lobject->backing_object_offset & PAGE_MASK)
 				break;
 			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
 			m = vm_page_lookup(lobject->backing_object, pindex);
 		}
 
 		/*
 		 * give-up when a page is not in memory
 		 */
 		if (m == NULL)
 			break;
 
 		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 		    (m->busy == 0) &&
 		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 
 			if (m->queue == PQ_CACHE) {
 				vm_page_deactivate(m);
 			}
 			m->flags |= PG_BUSY;
-			mpte = pmap_enter_quick(pmap, addr,
-				VM_PAGE_TO_PHYS(m), mpte);
+			pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m));
 			m->flags |= PG_MAPPED;
 			PAGE_WAKEUP(m);
 		}
 	}
 }
 
 /*
  *	Routine:	pmap_change_wiring
  *	Function:	Change the wiring attribute for a map/virtual-address
  *			pair.
  *	In/out conditions:
  *			The mapping must already exist in the pmap.
  */
 void
 pmap_change_wiring(pmap, va, wired)
 	register pmap_t pmap;
 	vm_offset_t va;
 	boolean_t wired;
 {
 	register unsigned *pte;
 
 	if (pmap == NULL)
 		return;
 
 	pte = pmap_pte(pmap, va);
 
 	if (wired && !pmap_pte_w(pte))
 		pmap->pm_stats.wired_count++;
 	else if (!wired && pmap_pte_w(pte))
 		pmap->pm_stats.wired_count--;
 
 	/*
 	 * Wiring is not a hardware characteristic so there is no need to
 	 * invalidate TLB.
 	 */
 	pmap_pte_set_w(pte, wired);
 }
 
+
+
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 void
 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
 	pmap_t dst_pmap, src_pmap;
 	vm_offset_t dst_addr;
 	vm_size_t len;
 	vm_offset_t src_addr;
 {
 	vm_offset_t addr;
 	vm_offset_t end_addr = src_addr + len;
 	vm_offset_t pdnxt;
 	unsigned src_frame, dst_frame;
 
 	if (dst_addr != src_addr)
 		return;
 
 	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
 	if (src_frame != (((unsigned) PTDpde) & PG_FRAME))
 		return;
 
 	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
 	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
 		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
 		pmap_update();
 	}
 
 	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
 		unsigned *src_pte, *dst_pte;
 		vm_page_t dstmpte, srcmpte;
 		vm_offset_t srcptepaddr;
-		int ptepindex;
 
 		if (addr >= UPT_MIN_ADDRESS)
 			panic("pmap_copy: invalid to pmap_copy page tables\n");
-
 		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
-		ptepindex = addr >> PDRSHIFT;
-
-		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
-		if (srcptepaddr == 0)
+		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[addr >> PDRSHIFT];
+		if (srcptepaddr == 0) {
 			continue;
+		}
 
-		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
+		srcmpte = PHYS_TO_VM_PAGE(srcptepaddr);
 		if (srcmpte->hold_count == 0)
 			continue;
 
 		if (pdnxt > end_addr)
 			pdnxt = end_addr;
 
 		src_pte = (unsigned *) vtopte(addr);
 		dst_pte = (unsigned *) avtopte(addr);
 		while (addr < pdnxt) {
 			unsigned ptetemp;
 			ptetemp = *src_pte;
 			/*
 			 * we only virtual copy managed pages
 			 */
 			if ((ptetemp & PG_MANAGED) != 0) {
 				/*
 				 * We have to check after allocpte for the
 				 * pte still being around...  allocpte can
 				 * block.
 				 */
 				dstmpte = pmap_allocpte(dst_pmap, addr);
 				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
 					/*
-					 * Clear the modified and
-					 * accessed (referenced) bits
-					 * during the copy.
+					 * Simply clear the modified and accessed (referenced)
+					 * bits.
 					 */
 					*dst_pte = ptetemp & ~(PG_M|PG_A);
 					dst_pmap->pm_stats.resident_count++;
-					pmap_insert_entry(dst_pmap, addr,
-						dstmpte,
+					pmap_insert_entry(dst_pmap, addr, dstmpte,
 						(ptetemp & PG_FRAME));
 	 			} else {
 					pmap_unwire_pte_hold(dst_pmap, dstmpte);
 				}
 				if (dstmpte->hold_count >= srcmpte->hold_count)
 					break;
 			}
 			addr += PAGE_SIZE;
 			++src_pte;
 			++dst_pte;
 		}
 	}
 }	
 
 /*
  *	Routine:	pmap_kernel
  *	Function:
  *		Returns the physical map handle for the kernel.
  */
 pmap_t
 pmap_kernel()
 {
 	return (kernel_pmap);
 }
 
 /*
  *	pmap_zero_page zeros the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bzero to clear its contents, one machine dependent page
  *	at a time.
  */
 void
 pmap_zero_page(phys)
 	vm_offset_t phys;
 {
 	if (*(int *) CMAP2)
 		panic("pmap_zero_page: CMAP busy");
 
 	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME);
 	bzero(CADDR2, PAGE_SIZE);
 	*(int *) CMAP2 = 0;
 	pmap_update_1pg((vm_offset_t) CADDR2);
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(src, dst)
 	vm_offset_t src;
 	vm_offset_t dst;
 {
 	if (*(int *) CMAP1 || *(int *) CMAP2)
 		panic("pmap_copy_page: CMAP busy");
 
 	*(int *) CMAP1 = PG_V | PG_RW | (src & PG_FRAME);
 	*(int *) CMAP2 = PG_V | PG_RW | (dst & PG_FRAME);
 
 	bcopy(CADDR1, CADDR2, PAGE_SIZE);
 
 	*(int *) CMAP1 = 0;
 	*(int *) CMAP2 = 0;
 	pmap_update_2pg( (vm_offset_t) CADDR1, (vm_offset_t) CADDR2);
 }
 
 
 /*
  *	Routine:	pmap_pageable
  *	Function:
  *		Make the specified pages (by pmap, offset)
  *		pageable (or not) as requested.
  *
  *		A page which is not pageable may not take
  *		a fault; therefore, its page table entry
  *		must remain valid for the duration.
  *
  *		This routine is merely advisory; pmap_enter
  *		will specify that these pages are to be wired
  *		down (or not) as appropriate.
  */
 void
 pmap_pageable(pmap, sva, eva, pageable)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 	boolean_t pageable;
 {
 }
 
 /*
  * this routine returns true if a physical page resides
  * in the given pmap.
  */
 boolean_t
 pmap_page_exists(pmap, pa)
 	pmap_t pmap;
 	vm_offset_t pa;
 {
-	register pv_entry_t pv;
-	pv_table_t *ppv;
+	register pv_entry_t *ppv, pv;
 	int s;
 
 	if (!pmap_is_managed(pa))
 		return FALSE;
 
 	s = splvm();
 
 	ppv = pa_to_pvh(pa);
 	/*
 	 * Not found, check current mappings returning immediately if found.
 	 */
-	for (pv = TAILQ_FIRST(&ppv->pv_list);
-		pv;
-		pv = TAILQ_NEXT(pv, pv_list)) {
+	for (pv = *ppv; pv; pv = pv->pv_next) {
 		if (pv->pv_pmap == pmap) {
 			splx(s);
 			return TRUE;
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
-#define PMAP_REMOVE_PAGES_CURPROC_ONLY
 /*
- * Remove all pages from specified address space
- * this aids process exit speeds.  Also, this code
- * is special cased for current process only.
+ * pmap_testbit tests bits in pte's
+ * note that the testbit/changebit routines are inline,
+ * and a lot of things compile-time evaluate.
  */
-void
-pmap_remove_pages(pmap, sva, eva)
-	pmap_t pmap;
-	vm_offset_t sva, eva;
+static __inline boolean_t
+pmap_testbit(pa, bit)
+	register vm_offset_t pa;
+	int bit;
 {
-	unsigned *pte, tpte;
-	pv_table_t *ppv;
-	pv_entry_t pv, npv;
+	register pv_entry_t *ppv, pv;
+	unsigned *pte;
 	int s;
 
-#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
-	if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap)) {
-		printf("warning: pmap_remove_pages called with non-current pmap\n");
-		return;
-	}
-#endif
+	if (!pmap_is_managed(pa))
+		return FALSE;
 
+	ppv = pa_to_pvh(pa);
+	if (*ppv == NULL)
+		return FALSE;
+
 	s = splvm();
+	/*
+	 * Not found, check current mappings returning immediately if found.
+	 */
+	for (pv = *ppv ;pv; pv = pv->pv_next) {
 
-	for(pv = TAILQ_FIRST(&pmap->pm_pvlist.pv_list);
-		pv;
-		pv = npv) {
+		/*
+		 * if the bit being tested is the modified bit, then
+		 * mark clean_map and ptes as never
+		 * modified.
+		 */
+		if (bit & (PG_A|PG_M)) {
+			if (!pmap_track_modified(pv->pv_va))
+				continue;
+		}
 
-		if (pv->pv_va >= eva || pv->pv_va < sva) {
-			npv = TAILQ_NEXT(pv, pv_plist);
+		if (!pv->pv_pmap) {
+#if defined(PMAP_DIAGNOSTIC)
+			printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va);
+#endif
 			continue;
 		}
-
-#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
-		pte = (unsigned *)vtopte(pv->pv_va);
-#else
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
-#endif
-		tpte = *pte;
-		*pte = 0;
-
-		if (tpte) {
-			pv->pv_pmap->pm_stats.resident_count--;
-			if (tpte & PG_W)
-				pv->pv_pmap->pm_stats.wired_count--;
-			/*
-			 * Update the vm_page_t clean and reference bits.
-			 */
-			if (tpte & PG_M) {
-				PHYS_TO_VM_PAGE(tpte)->dirty = VM_PAGE_BITS_ALL;
-			}
+		if (pte == NULL)
+			continue;
+		if (*pte & bit) {
+			splx(s);
+			return TRUE;
 		}
+	}
+	splx(s);
+	return (FALSE);
+}
 
-		npv = TAILQ_NEXT(pv, pv_plist);
-		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist.pv_list, pv, pv_plist);
+/*
+ * this routine is used to modify bits in ptes
+ */
+static __inline void
+pmap_changebit(pa, bit, setem)
+	vm_offset_t pa;
+	int bit;
+	boolean_t setem;
+{
+	register pv_entry_t pv, *ppv;
+	register unsigned *pte;
+	vm_offset_t va;
+	int changed;
+	int s;
 
-		ppv = pa_to_pvh(tpte);
-		--ppv->pv_list_count;
-		TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
+	if (!pmap_is_managed(pa))
+		return;
 
-		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
-		free_pv_entry(pv);
+	s = splvm();
+	changed = 0;
+	ppv = pa_to_pvh(pa);
+	/*
+	 * Loop over all current mappings setting/clearing as appropos If
+	 * setting RO do we need to clear the VAC?
+	 */
+	for ( pv = *ppv; pv; pv = pv->pv_next) {
+		va = pv->pv_va;
+
+		/*
+		 * don't write protect pager mappings
+		 */
+		if (!setem && (bit == PG_RW)) {
+			if (va >= clean_sva && va < clean_eva)
+				continue;
+		}
+		if (!pv->pv_pmap) {
+#if defined(PMAP_DIAGNOSTIC)
+			printf("Null pmap (cb) at va: 0x%lx\n", va);
+#endif
+			continue;
+		}
+
+		pte = pmap_pte_quick(pv->pv_pmap, va);
+		if (pte == NULL)
+			continue;
+		if (setem) {
+			*(int *)pte |= bit;
+			changed = 1;
+		} else {
+			vm_offset_t pbits = *(vm_offset_t *)pte;
+			if (pbits & bit)
+				changed = 1;
+			if (bit == PG_RW) {
+				if (pbits & PG_M) {
+					vm_page_t m;
+					vm_offset_t pa = pbits & PG_FRAME;
+					m = PHYS_TO_VM_PAGE(pa);
+					m->dirty = VM_PAGE_BITS_ALL;
+				}
+				*(int *)pte = pbits & ~(PG_M|PG_RW);
+			} else {
+				*(int *)pte = pbits & ~bit;
+			}
+		}
 	}
-	pmap_update();
 	splx(s);
+	if (changed)
+		pmap_update();
 }
 
+/*
+ *      pmap_page_protect:
+ *
+ *      Lower the permission for all mappings to a given page.
+ */
+void
+pmap_page_protect(phys, prot)
+	vm_offset_t phys;
+	vm_prot_t prot;
+{
+	if ((prot & VM_PROT_WRITE) == 0) {
+		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
+			pmap_changebit(phys, PG_RW, FALSE);
+		} else {
+			pmap_remove_all(phys);
+			pmap_update();
+		}
+	}
+}
+
 vm_offset_t
 pmap_phys_address(ppn)
 	int ppn;
 {
 	return (i386_ptob(ppn));
 }
 
 /*
- *	pmap_tcbit:
+ *	pmap_is_referenced:
  *
- *	Return the count of bits for a page, clearing all of them.
+ *	Return whether or not the specified physical page was referenced
+ *	by any physical maps.
+ */
+boolean_t
+pmap_is_referenced(vm_offset_t pa)
+{
+	register pv_entry_t *ppv, pv, lpv;
+	unsigned *pte;
+	int s;
+
+	if (!pmap_is_managed(pa))
+		return FALSE;
+
+	ppv = pa_to_pvh(pa);
+
+	s = splvm();
+	/*
+	 * Not found, check current mappings returning immediately if found.
+	 */
+	for (lpv = NULL, pv = *ppv ;pv; lpv = pv, pv = pv->pv_next) {
+		/*
+		 * if the bit being tested is the modified bit, then
+		 * mark clean_map and ptes as never
+		 * modified.
+		 */
+		if (!pmap_track_modified(pv->pv_va))
+			continue;
+		if (!pv->pv_pmap) {
+			continue;
+		}
+		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+		if (pte == NULL)
+			continue;
+		if ((int) *pte & PG_A) {
+			if (lpv) {
+				lpv->pv_next = pv->pv_next;
+				pv->pv_next = *ppv;
+				*ppv = pv;
+			}
+			splx(s);
+			return TRUE;
+		}
+	}
+	splx(s);
+	return (FALSE);
+}
+
+/*
+ *	pmap_ts_referenced:
+ *
+ *	Return the count of reference bits for a page, clearing all of them.
  *	
  */
 int
-pmap_tcbit(vm_offset_t pa, int bit)
+pmap_ts_referenced(vm_offset_t pa)
 {
-	register pv_entry_t pv, npv;
-	pv_table_t *ppv;
+	register pv_entry_t *ppv, pv;
 	unsigned *pte;
 	int s;
 	int rtval = 0;
+	vm_offset_t vachanged[VATRACK];
 
+	if (!pmap_is_managed(pa))
+		return FALSE;
+
 	s = splvm();
 
 	ppv = pa_to_pvh(pa);
+
+	if (*ppv == NULL) {
+		splx(s);
+		return 0;
+	}
+		
 	/*
 	 * Not found, check current mappings returning immediately if found.
 	 */
-	for (pv = TAILQ_FIRST(&ppv->pv_list);
-		pv;
-		pv = npv) {
-		npv = TAILQ_NEXT(pv, pv_list);
+	for (pv = *ppv ;pv; pv = pv->pv_next) {
 		/*
 		 * if the bit being tested is the modified bit, then
 		 * mark clean_map and ptes as never
 		 * modified.
 		 */
-		if (((bit & PG_M) != 0)
-			&& !pmap_track_modified(pv->pv_va))
+		if (!pmap_track_modified(pv->pv_va))
 			continue;
 
+		if (!pv->pv_pmap) {
+			continue;
+		}
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 		if (pte == NULL)
 			continue;
-
-		if ((rtval == 0) && (*pte & bit)) {
-			rtval = 1;
+		if (*pte & PG_A) {
+			if (rtval < VATRACK)
+				vachanged[rtval] = pv->pv_va;
+			rtval++;
+			*pte &= ~PG_A;
 		}
-		*pte &= ~bit;
 	}
 	splx(s);
-	if (rtval)
-		pmap_update();
+	if (rtval) {
+		if (rtval <= VATRACK) {
+			int i;
+			for(i=0;i<rtval;i++)
+				pmap_update_1pg(vachanged[i]);
+		} else {
+			pmap_update();
+		}
+	}
 	return (rtval);
 }
 
 /*
- *	pmap_tc_modified:
+ *	pmap_is_modified:
  *
- *	Return the count of modified bits for a page, clearing all of them.
- *	
+ *	Return whether or not the specified physical page was modified
+ *	in any physical maps.
  */
-int
-pmap_tc_modified(vm_page_t m)
+boolean_t
+pmap_is_modified(vm_offset_t pa)
 {
-	int rtval;
-
-	rtval = pmap_tcbit(VM_PAGE_TO_PHYS(m), PG_M);
-	if (rtval)
-		m->dirty = VM_PAGE_BITS_ALL;
-
-	return rtval;
+	return pmap_testbit((pa), PG_M);
 }
 
 /*
- *	pmap_tc_referenced:
- *
- *	Return the count of referenced bits for a page, clearing all of them.
- *	
+ *	Clear the modify bits on the specified physical page.
  */
-int
-pmap_tc_referenced(vm_offset_t pa)
+void
+pmap_clear_modify(vm_offset_t pa)
 {
-	if (!pmap_is_managed(pa))
-		return 0;
-	return pmap_tcbit(pa, PG_A);
+	pmap_changebit((pa), PG_M, FALSE);
 }
 
 /*
- *      pmap_page_protect:
+ *	pmap_clear_reference:
  *
- *      Lower the permission for all mappings to a given page.
+ *	Clear the reference bit on the specified physical page.
  */
 void
-pmap_page_protect(m, prot)
-	vm_page_t m;
-	vm_prot_t prot;
+pmap_clear_reference(vm_offset_t pa)
 {
-	if ((prot & VM_PROT_WRITE) == 0) {
-		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
-			if ((m->flags & PG_FICTITIOUS) == 0)
-				pmap_tcbit(VM_PAGE_TO_PHYS(m), PG_RW);
-		} else {
-			if (pmap_remove_all(VM_PAGE_TO_PHYS(m))) {
-				m->dirty = VM_PAGE_BITS_ALL;
-			}
-			pmap_update();
-		}
-	}
+	pmap_changebit((pa), PG_A, FALSE);
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 static void
 i386_protection_init()
 {
 	register int *kp, prot;
 
 	kp = protection_codes;
 	for (prot = 0; prot < 8; prot++) {
 		switch (prot) {
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 			/*
 			 * Read access is also 0. There isn't any execute bit,
 			 * so just make it readable.
 			 */
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 			*kp++ = 0;
 			break;
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 			*kp++ = PG_RW;
 			break;
 		}
 	}
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory. The non-cacheable bits are set on each
  * mapped page.
  */
 void *
 pmap_mapdev(pa, size)
 	vm_offset_t pa;
 	vm_size_t size;
 {
 	vm_offset_t va, tmpva;
 	unsigned *pte;
 
 	size = roundup(size, PAGE_SIZE);
 
 	va = kmem_alloc_pageable(kernel_map, size);
 	if (!va)
 		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 
 	pa = pa & PG_FRAME;
 	for (tmpva = va; size > 0;) {
 		pte = (unsigned *)vtopte(tmpva);
 		*pte = pa | PG_RW | PG_V | PG_N;
 		size -= PAGE_SIZE;
 		tmpva += PAGE_SIZE;
 		pa += PAGE_SIZE;
 	}
 	pmap_update();
 
 	return ((void *) va);
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap, addr)
 	pmap_t pmap;
 	vm_offset_t addr;
 {
 	
 	unsigned *ptep, pte;
 	int val = 0;
 	
-	ptep = pmap_pte_quick(pmap, addr);
+	ptep = pmap_pte(pmap, addr);
 	if (ptep == 0) {
 		return 0;
 	}
 
 	if (pte = *ptep) {
 		vm_offset_t pa;
 		val = MINCORE_INCORE;
 		pa = pte & PG_FRAME;
 
 		/*
 		 * Modified by us
 		 */
 		if (pte & PG_M)
 			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 		/*
 		 * Modified by someone
 		 */
 		else if (PHYS_TO_VM_PAGE(pa)->dirty ||
-			pmap_tcbit(pa, PG_M)) {
+			pmap_is_modified(pa))
 			val |= MINCORE_MODIFIED_OTHER;
-			PHYS_TO_VM_PAGE(pa)->dirty = VM_PAGE_BITS_ALL;
-		}
 		/*
 		 * Referenced by us
 		 */
 		if (pte & PG_U)
 			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 
 		/*
 		 * Referenced by someone
 		 */
 		else if ((PHYS_TO_VM_PAGE(pa)->flags & PG_REFERENCED) ||
-			pmap_tcbit(pa, PG_A))
+			pmap_is_referenced(pa))
 			val |= MINCORE_REFERENCED_OTHER;
 	} 
 	return val;
 }
 
 #if defined(PMAP_DEBUG)
 pmap_pid_dump(int pid) {
 	pmap_t pmap;
 	struct proc *p;
 	int npte = 0;
 	int index;
 	for (p = allproc.lh_first; p != NULL; p = p->p_list.le_next) {
 		if (p->p_pid != pid)
 			continue;
 
 		if (p->p_vmspace) {
 			int i,j;
 			index = 0;
 			pmap = &p->p_vmspace->vm_pmap;
 			for(i=0;i<1024;i++) {
 				pd_entry_t *pde;
 				unsigned *pte;
 				unsigned base = i << PDRSHIFT;
 				
 				pde = &pmap->pm_pdir[i];
 				if (pde && pmap_pde_v(pde)) {
 					for(j=0;j<1024;j++) {
 						unsigned va = base + (j << PAGE_SHIFT);
 						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 							if (index) {
 								index = 0;
 								printf("\n");
 							}
 							return npte;
 						}
-						pte = pmap_pte_quick( pmap, va);
+						pte = pmap_pte( pmap, va);
 						if (pte && pmap_pte_v(pte)) {
 							vm_offset_t pa;
 							vm_page_t m;
 							pa = *(int *)pte;
 							m = PHYS_TO_VM_PAGE((pa & PG_FRAME));
 							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 								va, pa, m->hold_count, m->wire_count, m->flags);
 							npte++;
 							index++;
 							if (index >= 2) {
 								index = 0;
 								printf("\n");
 							} else {
 								printf(" ");
 							}
 						}
 					}
 				}
 			}
 		}
 	}
 	return npte;
 }
 #endif
 
 #if defined(DEBUG)
 
 static void	pads __P((pmap_t pm));
 static void	pmap_pvdump __P((vm_offset_t pa));
 
 /* print address space of pmap*/
 static void
 pads(pm)
 	pmap_t pm;
 {
 	unsigned va, i, j;
 	unsigned *ptep;
 
 	if (pm == kernel_pmap)
 		return;
 	for (i = 0; i < 1024; i++)
 		if (pm->pm_pdir[i])
 			for (j = 0; j < 1024; j++) {
 				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 				if (pm == kernel_pmap && va < KERNBASE)
 					continue;
 				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 					continue;
-				ptep = pmap_pte_quick(pm, va);
+				ptep = pmap_pte(pm, va);
 				if (pmap_pte_v(ptep))
 					printf("%x:%x ", va, *(int *) ptep);
 			};
 
 }
 
 static void
 pmap_pvdump(pa)
 	vm_offset_t pa;
 {
 	register pv_entry_t pv;
 
 	printf("pa %x", pa);
-	for (pv = TAILQ_FIRST(pa_to_pvh(pa));
-		pv;
-		pv = TAILQ_NEXT(pv, pv_list)) {
+	for (pv = pa_to_pvh(pa); pv; pv = pv->pv_next) {
 #ifdef used_to_be
 		printf(" -> pmap %x, va %x, flags %x",
 		    pv->pv_pmap, pv->pv_va, pv->pv_flags);
 #endif
 		printf(" -> pmap %x, va %x",
 		    pv->pv_pmap, pv->pv_va);
 		pads(pv->pv_pmap);
 	}
 	printf(" ");
 }
 #endif
Index: head/sys/i386/include/pmap.h
===================================================================
--- head/sys/i386/include/pmap.h	(revision 17333)
+++ head/sys/i386/include/pmap.h	(revision 17334)
@@ -1,237 +1,224 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Derived from hp300 version by Mike Hibler, this version by William
  * Jolitz uses a recursive map [a pde points to the page directory] to
  * map the page tables using the pagetables themselves. This is done to
  * reduce the impact on kernel virtual memory for lots of sparse address
  * space, and to reduce the cost of memory to each process.
  *
  *	from: hp300: @(#)pmap.h	7.2 (Berkeley) 12/16/90
  *	from: @(#)pmap.h	7.4 (Berkeley) 5/12/91
- * 	$Id: pmap.h,v 1.40 1996/06/08 11:21:19 bde Exp $
+ * 	$Id: pmap.h,v 1.41 1996/07/27 03:23:32 dyson Exp $
  */
 
 #ifndef _MACHINE_PMAP_H_
 #define	_MACHINE_PMAP_H_
 
-
 /*
  * Page-directory and page-table entires follow this format, with a few
  * of the fields not present here and there, depending on a lot of things.
  */
 				/* ---- Intel Nomenclature ---- */
 #define	PG_V		0x001	/* P	Valid			*/
 #define PG_RW		0x002	/* R/W	Read/Write		*/
 #define PG_U		0x004	/* U/S  User/Supervisor		*/
 #define	PG_NC_PWT	0x008	/* PWT	Write through		*/
 #define	PG_NC_PCD	0x010	/* PCD	Cache disable		*/
 #define PG_A		0x020	/* A	Accessed		*/
 #define	PG_M		0x040	/* D	Dirty			*/
 #define	PG_PS		0x080	/* PS	Page size (0=4k,1=4M)	*/
 #define	PG_G		0x100	/* G	Global			*/
 #define	PG_AVAIL1	0x200	/*    /	Available for system	*/
 #define	PG_AVAIL2	0x400	/*   <	programmers use		*/
 #define	PG_AVAIL3	0x800	/*    \				*/
 
 
 /* Our various interpretations of the above */
 #define PG_W		PG_AVAIL1	/* "Wired" pseudoflag */
 #define	PG_MANAGED	PG_AVAIL2
 #define	PG_FRAME	(~PAGE_MASK)
 #define	PG_PROT		(PG_RW|PG_U)	/* all protection bits . */
 #define PG_N		(PG_NC_PWT|PG_NC_PCD)	/* Non-cacheable */
 
 /*
  * Page Protection Exception bits
  */
 
 #define PGEX_P		0x01	/* Protection violation vs. not present */
 #define PGEX_W		0x02	/* during a Write cycle */
 #define PGEX_U		0x04	/* access from User mode (UPL) */
 
 /*
  * Pte related macros
  */
 #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDRSHIFT)|((pti)<<PAGE_SHIFT)))
 
 #ifndef NKPT
 #define	NKPT			9	/* actual number of kernel page tables */
 #endif
 #ifndef NKPDE
 #define NKPDE			63	/* addressable number of page tables/pde's */
 #endif
 
 /*
  * The *PTDI values control the layout of virtual memory
  *
  * XXX This works for now, but I am not real happy with it, I'll fix it
  * right after I fix locore.s and the magic 28K hole
  */
 #define	APTDPTDI	(NPDEPG-1)	/* alt ptd entry that points to APTD */
 #define	KPTDI		(APTDPTDI-NKPDE)/* start of kernel virtual pde's */
 #define	PTDPTDI		(KPTDI-1)	/* ptd entry that points to ptd! */
 #define	KSTKPTDI	(PTDPTDI-1)	/* ptd entry for u./kernel&user stack */
 #define KSTKPTEOFF	(NPTEPG-UPAGES) /* pte entry for kernel stack */
 
 /*
  * XXX doesn't really belong here I guess...
  */
 #define ISA_HOLE_START    0xa0000
 #define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START)
 
 #ifndef LOCORE
-
-#include <sys/queue.h>
-
 typedef unsigned int *pd_entry_t;
 typedef unsigned int *pt_entry_t;
 
 #define PDESIZE		sizeof(pd_entry_t) /* for assembly files */
 #define PTESIZE		sizeof(pt_entry_t) /* for assembly files */
 
 /*
  * Address of current and alternate address space page table maps
  * and directories.
  */
 #ifdef KERNEL
 extern pt_entry_t PTmap[], APTmap[], Upte;
 extern pd_entry_t PTD[], APTD[], PTDpde, APTDpde, Upde;
 
 extern int	IdlePTD;	/* physical address of "Idle" state directory */
 #endif
 
 /*
  * virtual address to page table entry and
  * to physical address. Likewise for alternate address space.
  * Note: these work recursively, thus vtopte of a pte will give
  * the corresponding pde that in turn maps it.
  */
 #define	vtopte(va)	(PTmap + i386_btop(va))
 #define	vtophys(va)	(((int) (*vtopte(va))&PG_FRAME) | ((int)(va) & PAGE_MASK))
 
 #define	avtopte(va)	(APTmap + i386_btop(va))
 #define	avtophys(va)	(((int) (*avtopte(va))&PG_FRAME) | ((int)(va) & PAGE_MASK))
 
 #ifdef KERNEL
 /*
  *	Routine:	pmap_kextract
  *	Function:
  *		Extract the physical page address associated
  *		kernel virtual address.
  */
 static __inline vm_offset_t
 pmap_kextract(vm_offset_t va)
 {
 	vm_offset_t pa = *(int *)vtopte(va);
 	pa = (pa & PG_FRAME) | (va & PAGE_MASK);
 	return pa;
 }
 #endif
 
-struct vm_page;
-
 /*
  * Pmap stuff
  */
-struct	pv_entry;
-typedef struct {
-	int	pv_list_count;
-	TAILQ_HEAD(,pv_entry)	pv_list;
-} pv_table_t;
 
 struct pmap {
 	pd_entry_t		*pm_pdir;	/* KVA of page directory */
 	vm_object_t		pm_pteobj;	/* Container for pte's */
-	pv_table_t		pm_pvlist;	/* list of mappings in pmap */
-	int			pm_count;	/* reference count */
+	short			pm_dref;	/* page directory ref count */
+	short			pm_count;	/* pmap reference count */
 	struct pmap_statistics	pm_stats;	/* pmap statistics */
-	struct	vm_page		*pm_ptphint;	/* pmap ptp hint */
+	struct	vm_map		*pm_map;	/* map that owns this pmap */
 };
 
 typedef struct pmap	*pmap_t;
 
 #ifdef KERNEL
 extern pmap_t		kernel_pmap;
 #endif
 
-
 /*
  * For each vm_page_t, there is a list of all currently valid virtual
  * mappings of that page.  An entry is a pv_entry_t, the list is pv_table.
  */
 typedef struct pv_entry {
+	struct pv_entry	*pv_next;	/* next pv_entry */
 	pmap_t		pv_pmap;	/* pmap where mapping lies */
 	vm_offset_t	pv_va;		/* virtual address for mapping */
-	TAILQ_ENTRY(pv_entry)	pv_list;
-	TAILQ_ENTRY(pv_entry)	pv_plist;
 	vm_page_t	pv_ptem;	/* VM page for pte */
 } *pv_entry_t;
 
 #define	PV_ENTRY_NULL	((pv_entry_t) 0)
 
 #define	PV_CI		0x01	/* all entries must be cache inhibited */
 #define	PV_PTPAGE	0x02	/* entry maps a page table page */
 
 #ifdef	KERNEL
 
 extern caddr_t	CADDR1;
 extern pt_entry_t *CMAP1;
 extern vm_offset_t avail_end;
 extern vm_offset_t avail_start;
 extern vm_offset_t phys_avail[];
-pv_table_t *pv_table;
+extern pv_entry_t *pv_table;	/* array of entries, one per page */
 extern vm_offset_t virtual_avail;
 extern vm_offset_t virtual_end;
 
 #define	pa_index(pa)		atop(pa - vm_first_phys)
 #define	pa_to_pvh(pa)		(&pv_table[pa_index(pa)])
 
 #define	pmap_resident_count(pmap)	((pmap)->pm_stats.resident_count)
 
 struct pcb;
 
 void	pmap_bootstrap __P(( vm_offset_t, vm_offset_t));
 pmap_t	pmap_kernel __P((void));
 void	*pmap_mapdev __P((vm_offset_t, vm_size_t));
 unsigned * __pure pmap_pte __P((pmap_t, vm_offset_t)) __pure2;
 int	pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
 vm_page_t pmap_use_pt __P((pmap_t, vm_offset_t));
 
 #endif /* KERNEL */
 #endif /* !LOCORE */
 
 #endif /* !_MACHINE_PMAP_H_ */
Index: head/sys/kern/kern_exec.c
===================================================================
--- head/sys/kern/kern_exec.c	(revision 17333)
+++ head/sys/kern/kern_exec.c	(revision 17334)
@@ -1,616 +1,616 @@
 /*
  * Copyright (c) 1993, David Greenman
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	$Id: kern_exec.c,v 1.44 1996/07/12 04:11:37 bde Exp $
+ *	$Id: kern_exec.c,v 1.45 1996/07/27 03:23:41 dyson Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
 #include <sys/mount.h>
 #include <sys/filedesc.h>
 #include <sys/fcntl.h>
 #include <sys/acct.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/wait.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/namei.h>
 #include <sys/sysent.h>
 #include <sys/syslog.h>
 #include <sys/shm.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/lock.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 
 #include <machine/reg.h>
 
 static int *exec_copyout_strings __P((struct image_params *));
 
 static int exec_check_permissions(struct image_params *);
 
 /*
  * XXX trouble here if sizeof(caddr_t) != sizeof(int), other parts
  * of the sysctl code also assumes this, and sizeof(int) == sizeof(long).
  */
 static struct ps_strings *ps_strings = PS_STRINGS;
 SYSCTL_INT(_kern, KERN_PS_STRINGS, ps_strings, 0, &ps_strings, 0, "");
 
 static caddr_t usrstack = (caddr_t)USRSTACK;
 SYSCTL_INT(_kern, KERN_USRSTACK, usrstack, 0, &usrstack, 0, "");
 
 /*
  * execsw_set is constructed for us by the linker.  Each of the items
  * is a pointer to a `const struct execsw', hence the double pointer here.
  */
 static const struct execsw **execsw = 
 	(const struct execsw **)&execsw_set.ls_items[0];
 
 #ifndef _SYS_SYSPROTO_H_
 struct execve_args {
         char    *fname; 
         char    **argv;
         char    **envv; 
 };
 #endif
 
 /*
  * execve() system call.
  */
 int
 execve(p, uap, retval)
 	struct proc *p;
 	register struct execve_args *uap;
 	int *retval;
 {
 	struct nameidata nd, *ndp;
 	int *stack_base;
 	int error, len, i;
 	struct image_params image_params, *imgp;
 	struct vattr attr;
 
 	imgp = &image_params;
 
 	/*
 	 * Initialize part of the common data
 	 */
 	imgp->proc = p;
 	imgp->uap = uap;
 	imgp->attr = &attr;
 	imgp->image_header = NULL;
 	imgp->argc = imgp->envc = 0;
 	imgp->entry_addr = 0;
 	imgp->vmspace_destroyed = 0;
 	imgp->interpreted = 0;
 	imgp->interpreter_name[0] = '\0';
 	imgp->auxargs = NULL;
 
 	/*
 	 * Allocate temporary demand zeroed space for argument and
 	 *	environment strings
 	 */
 	imgp->stringbase = (char *)kmem_alloc_wait(exec_map, ARG_MAX);
 	if (imgp->stringbase == NULL) {
 		error = ENOMEM;
 		goto exec_fail;
 	}
 	imgp->stringp = imgp->stringbase;
 	imgp->stringspace = ARG_MAX;
 
 	/*
 	 * Translate the file name. namei() returns a vnode pointer
 	 *	in ni_vp amoung other things.
 	 */
 	ndp = &nd;
 	NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
 	    UIO_USERSPACE, uap->fname, p);
 
 interpret:
 
 	error = namei(ndp);
 	if (error) {
 		kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase, ARG_MAX);
 		goto exec_fail;
 	}
 
 	imgp->vp = ndp->ni_vp;
 	if (imgp->vp == NULL) {
 		error = ENOEXEC;
 		goto exec_fail_dealloc;
 	}
 
 	/*
 	 * Check file permissions (also 'opens' file)
 	 */
 	error = exec_check_permissions(imgp);
 
 	/*
 	 * Lose the lock on the vnode. It's no longer needed, and must not
 	 * exist for the pagefault paging to work below.
 	 */
 	VOP_UNLOCK(imgp->vp);
 
 	if (error)
 		goto exec_fail_dealloc;
 
 	/*
 	 * Map the image header (first page) of the file into
 	 *	kernel address space
 	 */
 	error = vm_mmap(exech_map,			/* map */
 			(vm_offset_t *)&imgp->image_header, /* address */
 			PAGE_SIZE,			/* size */
 			VM_PROT_READ, 			/* protection */
 			VM_PROT_READ, 			/* max protection */
 			0,	 			/* flags */
 			(caddr_t)imgp->vp,		/* vnode */
 			0);				/* offset */
 	if (error) {
 		uprintf("mmap failed: %d\n",error);
 		goto exec_fail_dealloc;
 	}
 
 	/*
 	 * Loop through list of image activators, calling each one.
 	 *	If there is no match, the activator returns -1. If there
 	 *	is a match, but there was an error during the activation,
 	 *	the error is returned. Otherwise 0 means success. If the
 	 *	image is interpreted, loop back up and try activating
 	 *	the interpreter.
 	 */
 	for (i = 0; execsw[i]; ++i) {
 		if (execsw[i]->ex_imgact)
 			error = (*execsw[i]->ex_imgact)(imgp);
 		else
 			continue;
 
 		if (error == -1)
 			continue;
 		if (error)
 			goto exec_fail_dealloc;
 		if (imgp->interpreted) {
 			/* free old vnode and name buffer */
 			vrele(ndp->ni_vp);
 			FREE(ndp->ni_cnd.cn_pnbuf, M_NAMEI);
 			if (vm_map_remove(exech_map, (vm_offset_t)imgp->image_header,
 			    (vm_offset_t)imgp->image_header + PAGE_SIZE))
 				panic("execve: header dealloc failed (1)");
 
 			/* set new name to that of the interpreter */
 			NDINIT(ndp, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
 			    UIO_SYSSPACE, imgp->interpreter_name, p);
 			goto interpret;
 		}
 		break;
 	}
 	/* If we made it through all the activators and none matched, exit. */
 	if (error == -1) {
 		error = ENOEXEC;
 		goto exec_fail_dealloc;
 	}
 
 	/*
 	 * Copy out strings (args and env) and initialize stack base
 	 */
 	stack_base = exec_copyout_strings(imgp);
 	p->p_vmspace->vm_minsaddr = (char *)stack_base;
 
 	/*
 	 * If custom stack fixup routine present for this process
 	 * let it do the stack setup.
 	 * Else stuff argument count as first item on stack
 	 */
 	if (p->p_sysent->sv_fixup)
 		(*p->p_sysent->sv_fixup)(&stack_base, imgp);
 	else
 		suword(--stack_base, imgp->argc);
 
 	/* close files on exec */
 	fdcloseexec(p);
 
 	/* reset caught signals */
 	execsigs(p);
 
 	/* name this process - nameiexec(p, ndp) */
 	len = min(ndp->ni_cnd.cn_namelen,MAXCOMLEN);
 	bcopy(ndp->ni_cnd.cn_nameptr, p->p_comm, len);
 	p->p_comm[len] = 0;
 
 	/*
 	 * mark as execed, wakeup the process that vforked (if any) and tell
 	 * it that it now has it's own resources back
 	 */
 	p->p_flag |= P_EXEC;
 	if (p->p_pptr && (p->p_flag & P_PPWAIT)) {
 		p->p_flag &= ~P_PPWAIT;
 		wakeup((caddr_t)p->p_pptr);
 	}
 
 	/*
 	 * Implement image setuid/setgid. Disallow if the process is
 	 * being traced.
 	 */
 	if ((attr.va_mode & (VSUID | VSGID)) &&
 	    (p->p_flag & P_TRACED) == 0) {
 		/*
 		 * Turn off syscall tracing for set-id programs, except for
 		 * root.
 		 */
 		if (p->p_tracep && suser(p->p_ucred, &p->p_acflag)) {
 			p->p_traceflag = 0;
 			vrele(p->p_tracep);
 			p->p_tracep = NULL;
 		}
 		/*
 		 * Set the new credentials.
 		 */
 		p->p_ucred = crcopy(p->p_ucred);
 		if (attr.va_mode & VSUID)
 			p->p_ucred->cr_uid = attr.va_uid;
 		if (attr.va_mode & VSGID)
 			p->p_ucred->cr_groups[0] = attr.va_gid;
 		p->p_flag |= P_SUGID;
 	} else {
 		p->p_flag &= ~P_SUGID;
 	}
 
 	/*
 	 * Implement correct POSIX saved-id behavior.
 	 */
 	p->p_cred->p_svuid = p->p_ucred->cr_uid;
 	p->p_cred->p_svgid = p->p_ucred->cr_gid;
 
 	/*
 	 * Store the vp for use in procfs
 	 */
 	if (p->p_textvp)		/* release old reference */
 		vrele(p->p_textvp);
 	VREF(ndp->ni_vp);
 	p->p_textvp = ndp->ni_vp;
 
 	/*
 	 * If tracing the process, trap to debugger so breakpoints
 	 * 	can be set before the program executes.
 	 */
 	if (p->p_flag & P_TRACED)
 		psignal(p, SIGTRAP);
 
 	/* clear "fork but no exec" flag, as we _are_ execing */
 	p->p_acflag &= ~AFORK;
 
 	/* Set entry address */
 	setregs(p, imgp->entry_addr, (u_long)stack_base);
 
 	/*
 	 * free various allocated resources
 	 */
 	kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase, ARG_MAX);
 	if (vm_map_remove(exech_map, (vm_offset_t)imgp->image_header,
 	    (vm_offset_t)imgp->image_header + PAGE_SIZE))
 		panic("execve: header dealloc failed (2)");
 	vrele(ndp->ni_vp);
 	FREE(ndp->ni_cnd.cn_pnbuf, M_NAMEI);
 
 	return (0);
 
 exec_fail_dealloc:
 	if (imgp->stringbase != NULL)
 		kmem_free_wakeup(exec_map, (vm_offset_t)imgp->stringbase, ARG_MAX);
 	if (imgp->image_header && imgp->image_header != (char *)-1)
 		if (vm_map_remove(exech_map, (vm_offset_t)imgp->image_header,
 		    (vm_offset_t)imgp->image_header + PAGE_SIZE))
 			panic("execve: header dealloc failed (3)");
 	if (ndp->ni_vp)
 		vrele(ndp->ni_vp);
 	FREE(ndp->ni_cnd.cn_pnbuf, M_NAMEI);
 
 exec_fail:
 	if (imgp->vmspace_destroyed) {
 		/* sorry, no more process anymore. exit gracefully */
 		exit1(p, W_EXITCODE(0, SIGABRT));
 		/* NOT REACHED */
 		return(0);
 	} else {
 		return(error);
 	}
 }
 
 /*
  * Destroy old address space, and allocate a new stack
  *	The new stack is only SGROWSIZ large because it is grown
  *	automatically in trap.c.
  */
 int
 exec_new_vmspace(imgp)
 	struct image_params *imgp;
 {
 	int error;
 	struct vmspace *vmspace = imgp->proc->p_vmspace;
 	caddr_t	stack_addr = (caddr_t) (USRSTACK - SGROWSIZ);
 
 	imgp->vmspace_destroyed = 1;
 
 	/* Blow away entire process VM */
 	if (vmspace->vm_shm)
 		shmexit(imgp->proc);
-	vm_map_remove_userspace(&vmspace->vm_map);
+	vm_map_remove(&vmspace->vm_map, 0, USRSTACK);
 
 	/* Allocate a new stack */
 	error = vm_map_find(&vmspace->vm_map, NULL, 0, (vm_offset_t *)&stack_addr,
 	    SGROWSIZ, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0);
 	if (error)
 		return(error);
 
 	vmspace->vm_ssize = SGROWSIZ >> PAGE_SHIFT;
 
 	/* Initialize maximum stack address */
 	vmspace->vm_maxsaddr = (char *)USRSTACK - MAXSSIZ;
 
 	return(0);
 }
 
 /*
  * Copy out argument and environment strings from the old process
  *	address space into the temporary string buffer.
  */
 int
 exec_extract_strings(imgp)
 	struct image_params *imgp;
 {
 	char	**argv, **envv;
 	char	*argp, *envp;
 	int	error, length;
 
 	/*
 	 * extract arguments first
 	 */
 
 	argv = imgp->uap->argv;
 
 	if (argv) {
 		while ((argp = (caddr_t) fuword(argv++))) {
 			if (argp == (caddr_t) -1)
 				return (EFAULT);
 			if ((error = copyinstr(argp, imgp->stringp,
 			    imgp->stringspace, &length))) {
 				if (error == ENAMETOOLONG)
 					return(E2BIG);
 				return (error);
 			}
 			imgp->stringspace -= length;
 			imgp->stringp += length;
 			imgp->argc++;
 		}
 	}
 
 	/*
 	 * extract environment strings
 	 */
 
 	envv = imgp->uap->envv;
 
 	if (envv) {
 		while ((envp = (caddr_t) fuword(envv++))) {
 			if (envp == (caddr_t) -1)
 				return (EFAULT);
 			if ((error = copyinstr(envp, imgp->stringp,
 			    imgp->stringspace, &length))) {
 				if (error == ENAMETOOLONG)
 					return(E2BIG);
 				return (error);
 			}
 			imgp->stringspace -= length;
 			imgp->stringp += length;
 			imgp->envc++;
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Copy strings out to the new process address space, constructing
  *	new arg and env vector tables. Return a pointer to the base
  *	so that it can be used as the initial stack pointer.
  */
 int *
 exec_copyout_strings(imgp)
 	struct image_params *imgp;
 {
 	int argc, envc;
 	char **vectp;
 	char *stringp, *destp;
 	int *stack_base;
 	struct ps_strings *arginfo;
 	int szsigcode;
 
 	/*
 	 * Calculate string base and vector table pointers.
 	 * Also deal with signal trampoline code for this exec type.
 	 */
 	arginfo = PS_STRINGS;
 	szsigcode = *(imgp->proc->p_sysent->sv_szsigcode);
 	destp =	(caddr_t)arginfo - szsigcode - SPARE_USRSPACE -
 		roundup((ARG_MAX - imgp->stringspace), sizeof(char *));
 
 	/*
 	 * install sigcode
 	 */
 	if (szsigcode)
 		copyout(imgp->proc->p_sysent->sv_sigcode,
 			((caddr_t)arginfo - szsigcode), szsigcode);
 
 	/*
 	 * If we have a valid auxargs ptr, prepare some room
 	 * on the stack.
 	 */
 	if (imgp->auxargs)
 	/*
 	 * The '+ 2' is for the null pointers at the end of each of the
 	 * arg and env vector sets, and 'AT_COUNT*2' is room for the
 	 * ELF Auxargs data.
 	 */
 		vectp = (char **)(destp - (imgp->argc + imgp->envc + 2 +
 				  AT_COUNT*2) * sizeof(char*));
 	else 
 	/*
 	 * The '+ 2' is for the null pointers at the end of each of the
 	 * arg and env vector sets
 	 */
 		vectp = (char **)
 			(destp - (imgp->argc + imgp->envc + 2) * sizeof(char*));
 
 	/*
 	 * vectp also becomes our initial stack base
 	 */
 	stack_base = (int *)vectp;
 
 	stringp = imgp->stringbase;
 	argc = imgp->argc;
 	envc = imgp->envc;
 
 	/*
 	 * Copy out strings - arguments and environment.
 	 */
 	copyout(stringp, destp, ARG_MAX - imgp->stringspace);
 
 	/*
 	 * Fill in "ps_strings" struct for ps, w, etc.
 	 */
 	suword(&arginfo->ps_argvstr, (int)vectp);
 	suword(&arginfo->ps_nargvstr, argc);
 
 	/*
 	 * Fill in argument portion of vector table.
 	 */
 	for (; argc > 0; --argc) {
 		suword(vectp++, (int)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* a null vector table pointer seperates the argp's from the envp's */
 	suword(vectp++, 0);
 
 	suword(&arginfo->ps_envstr, (int)vectp);
 	suword(&arginfo->ps_nenvstr, envc);
 
 	/*
 	 * Fill in environment portion of vector table.
 	 */
 	for (; envc > 0; --envc) {
 		suword(vectp++, (int)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* end of vector table is a null pointer */
 	suword(vectp, 0);
 
 	return (stack_base);
 }
 
 /*
  * Check permissions of file to execute.
  *	Return 0 for success or error code on failure.
  */
 static int
 exec_check_permissions(imgp)
 	struct image_params *imgp;
 {
 	struct proc *p = imgp->proc;
 	struct vnode *vp = imgp->vp;
 	struct vattr *attr = imgp->attr;
 	int error;
 
 	/*
 	 * Check number of open-for-writes on the file and deny execution
 	 *	if there are any.
 	 */
 	if (vp->v_writecount) {
 		return (ETXTBSY);
 	}
 
 	/* Get file attributes */
 	error = VOP_GETATTR(vp, attr, p->p_ucred, p);
 	if (error)
 		return (error);
 
 	/*
 	 * 1) Check if file execution is disabled for the filesystem that this
 	 *	file resides on.
 	 * 2) Insure that at least one execute bit is on - otherwise root
 	 *	will always succeed, and we don't want to happen unless the
 	 *	file really is executable.
 	 * 3) Insure that the file is a regular file.
 	 */
 	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
 	    ((attr->va_mode & 0111) == 0) ||
 	    (attr->va_type != VREG)) {
 		return (EACCES);
 	}
 
 	/*
 	 * Zero length files can't be exec'd
 	 */
 	if (attr->va_size == 0)
 		return (ENOEXEC);
 
 	/*
 	 * Disable setuid/setgid if the filesystem prohibits it or if
 	 *	the process is being traced.
 	 */
         if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_flag & P_TRACED))
 		attr->va_mode &= ~(VSUID | VSGID);
 
 	/*
 	 *  Check for execute permission to file based on current credentials.
 	 *	Then call filesystem specific open routine (which does nothing
 	 *	in the general case).
 	 */
 	error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
 	if (error)
 		return (error);
 
 	error = VOP_OPEN(vp, FREAD, p->p_ucred, p);
 	if (error)
 		return (error);
 
 	return (0);
 }
Index: head/sys/kern/kern_exit.c
===================================================================
--- head/sys/kern/kern_exit.c	(revision 17333)
+++ head/sys/kern/kern_exit.c	(revision 17334)
@@ -1,474 +1,475 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_exit.c	8.7 (Berkeley) 2/12/94
- * $Id: kern_exit.c,v 1.33 1996/06/12 05:07:28 gpalmer Exp $
+ * $Id: kern_exit.c,v 1.34 1996/07/27 03:23:42 dyson Exp $
  */
 
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/sysent.h>
 #include <sys/ioctl.h>
 #include <sys/proc.h>
 #include <sys/tty.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/kernel.h>
 #include <sys/buf.h>
 #include <sys/wait.h>
 #include <sys/file.h>
 #include <sys/vnode.h>
 #include <sys/syslog.h>
 #include <sys/malloc.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/ptrace.h>
 #include <sys/acct.h>		/* for acct_process() function prototype */
 #include <sys/filedesc.h>
 #include <sys/shm.h>
 #include <sys/sem.h>
 
 #ifdef COMPAT_43
 #include <machine/reg.h>
 #include <machine/psl.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/lock.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 
 static int wait1 __P((struct proc *, struct wait_args *, int [], int));
 
 /*
  * exit --
  *	Death of process.
  */
 __dead void
 exit(p, uap, retval)
 	struct proc *p;
 	struct rexit_args /* {
 		int	rval;
 	} */ *uap;
 	int *retval;
 {
 
 	exit1(p, W_EXITCODE(uap->rval, 0));
 	/* NOTREACHED */
 }
 
 /*
  * Exit: deallocate address space and other resources, change proc state
  * to zombie, and unlink proc from allproc and parent's lists.  Save exit
  * status and rusage for wait().  Check for child processes and orphan them.
  */
 __dead void
 exit1(p, rv)
 	register struct proc *p;
 	int rv;
 {
 	register struct proc *q, *nq;
 	register struct vmspace *vm;
 
 	if (p->p_pid == 1) {
 		printf("init died (signal %d, exit %d)\n",
 		    WTERMSIG(rv), WEXITSTATUS(rv));
 		panic("Going nowhere without my init!");
 	}
 #ifdef PGINPROF
 	vmsizmon();
 #endif
 	if (p->p_flag & P_PROFIL)
 		stopprofclock(p);
 	MALLOC(p->p_ru, struct rusage *, sizeof(struct rusage),
 		M_ZOMBIE, M_WAITOK);
 	/*
 	 * If parent is waiting for us to exit or exec,
 	 * P_PPWAIT is set; we will wakeup the parent below.
 	 */
 	p->p_flag &= ~(P_TRACED | P_PPWAIT);
 	p->p_flag |= P_WEXIT;
 	p->p_sigignore = ~0;
 	p->p_siglist = 0;
 	untimeout(realitexpire, (caddr_t)p);
 
 	/*
 	 * Close open files and release open-file table.
 	 * This may block!
 	 */
 	fdfree(p);
 
 	/*
 	 * XXX Shutdown SYSV semaphores
 	 */
 	semexit(p);
 
 	/* The next two chunks should probably be moved to vmspace_exit. */
 	vm = p->p_vmspace;
 	if (vm->vm_shm)
 		shmexit(p);
 	/*
 	 * Release user portion of address space.
 	 * This releases references to vnodes,
 	 * which could cause I/O if the file has been unlinked.
 	 * Need to do this early enough that we can still sleep.
 	 * Can't free the entire vmspace as the kernel stack
 	 * may be mapped within that space also.
 	 */
 	if (vm->vm_refcnt == 1)
-		vm_map_remove_userspace(&vm->vm_map);
+		(void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS,
+		    VM_MAXUSER_ADDRESS);
 
 	if (SESS_LEADER(p)) {
 		register struct session *sp = p->p_session;
 
 		if (sp->s_ttyvp) {
 			/*
 			 * Controlling process.
 			 * Signal foreground pgrp,
 			 * drain controlling terminal
 			 * and revoke access to controlling terminal.
 			 */
 			if (sp->s_ttyp->t_session == sp) {
 				if (sp->s_ttyp->t_pgrp)
 					pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1);
 				(void) ttywait(sp->s_ttyp);
 				/*
 				 * The tty could have been revoked
 				 * if we blocked.
 				 */
 				if (sp->s_ttyvp)
 					vgoneall(sp->s_ttyvp);
 			}
 			if (sp->s_ttyvp)
 				vrele(sp->s_ttyvp);
 			sp->s_ttyvp = NULL;
 			/*
 			 * s_ttyp is not zero'd; we use this to indicate
 			 * that the session once had a controlling terminal.
 			 * (for logging and informational purposes)
 			 */
 		}
 		sp->s_leader = NULL;
 	}
 	fixjobc(p, p->p_pgrp, 0);
 	p->p_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
 	(void)acct_process(p);
 #ifdef KTRACE
 	/*
 	 * release trace file
 	 */
 	p->p_traceflag = 0;	/* don't trace the vrele() */
 	if (p->p_tracep)
 		vrele(p->p_tracep);
 #endif
 	/*
 	 * Remove proc from allproc queue and pidhash chain.
 	 * Place onto zombproc.  Unlink from parent's child list.
 	 */
 	LIST_REMOVE(p, p_list);
 	LIST_INSERT_HEAD(&zombproc, p, p_list);
 	p->p_stat = SZOMB;
 
 	LIST_REMOVE(p, p_hash);
 
 	q = p->p_children.lh_first;
 	if (q)		/* only need this if any child is S_ZOMB */
 		wakeup((caddr_t) initproc);
 	for (; q != 0; q = nq) {
 		nq = q->p_sibling.le_next;
 		LIST_REMOVE(q, p_sibling);
 		LIST_INSERT_HEAD(&initproc->p_children, q, p_sibling);
 		q->p_pptr = initproc;
 		/*
 		 * Traced processes are killed
 		 * since their existence means someone is screwing up.
 		 */
 		if (q->p_flag & P_TRACED) {
 			q->p_flag &= ~P_TRACED;
 			psignal(q, SIGKILL);
 		}
 	}
 
 	/*
 	 * Save exit status and final rusage info, adding in child rusage
 	 * info and self times.
 	 */
 	p->p_xstat = rv;
 	*p->p_ru = p->p_stats->p_ru;
 	calcru(p, &p->p_ru->ru_utime, &p->p_ru->ru_stime, NULL);
 	ruadd(p->p_ru, &p->p_stats->p_cru);
 
 	/*
 	 * Notify parent that we're gone.
 	 */
 	psignal(p->p_pptr, SIGCHLD);
 	wakeup((caddr_t)p->p_pptr);
 #if defined(tahoe)
 	/* move this to cpu_exit */
 	p->p_addr->u_pcb.pcb_savacc.faddr = (float *)NULL;
 #endif
 	/*
 	 * Clear curproc after we've done all operations
 	 * that could block, and before tearing down the rest
 	 * of the process state that might be used from clock, etc.
 	 * Also, can't clear curproc while we're still runnable,
 	 * as we're not on a run queue (we are current, just not
 	 * a proper proc any longer!).
 	 *
 	 * Other substructures are freed from wait().
 	 */
 	curproc = NULL;
 	if (--p->p_limit->p_refcnt == 0) {
 		FREE(p->p_limit, M_SUBPROC);
 		p->p_limit = NULL;
 	}
 
 	/*
 	 * Finally, call machine-dependent code to release the remaining
 	 * resources including address space, the kernel stack and pcb.
 	 * The address space is released by "vmspace_free(p->p_vmspace)";
 	 * This is machine-dependent, as we may have to change stacks
 	 * or ensure that the current one isn't reallocated before we
 	 * finish.  cpu_exit will end with a call to cpu_switch(), finishing
 	 * our execution (pun intended).
 	 */
 	cpu_exit(p);
 }
 
 #ifdef COMPAT_43
 #if defined(hp300) || defined(luna68k)
 #include <machine/frame.h>
 #define GETPS(rp)	((struct frame *)(rp))->f_sr
 #else
 #define GETPS(rp)	(rp)[PS]
 #endif
 
 int
 owait(p, uap, retval)
 	struct proc *p;
 	register struct owait_args /* {
 		int     dummy;
 	} */ *uap;
 	int *retval;
 {
 	struct wait_args w;
 
 #ifdef PSL_ALLCC
 	if ((GETPS(p->p_md.md_regs) & PSL_ALLCC) != PSL_ALLCC) {
 		w.options = 0;
 		w.rusage = NULL;
 	} else {
 		w.options = p->p_md.md_regs[R0];
 		w.rusage = (struct rusage *)p->p_md.md_regs[R1];
 	}
 #else
 	w.options = 0;
 	w.rusage = NULL;
 #endif
 	w.pid = WAIT_ANY;
 	w.status = NULL;
 	return (wait1(p, &w, retval, 1));
 }
 #endif /* COMPAT_43 */
 
 int
 wait4(p, uap, retval)
 	struct proc *p;
 	struct wait_args *uap;
 	int *retval;
 {
 
 	return (wait1(p, uap, retval, 0));
 }
 
 static int
 wait1(q, uap, retval, compat)
 	register struct proc *q;
 	register struct wait_args /* {
 		int pid;
 		int *status;
 		int options;
 		struct rusage *rusage;
 	} */ *uap;
 	int retval[];
 	int compat;
 {
 	register int nfound;
 	register struct proc *p, *t;
 	int status, error;
 
 	if (uap->pid == 0)
 		uap->pid = -q->p_pgid;
 #ifdef notyet
 	if (uap->options &~ (WUNTRACED|WNOHANG))
 		return (EINVAL);
 #endif
 loop:
 	nfound = 0;
 	for (p = q->p_children.lh_first; p != 0; p = p->p_sibling.le_next) {
 		if (uap->pid != WAIT_ANY &&
 		    p->p_pid != uap->pid && p->p_pgid != -uap->pid)
 			continue;
 		nfound++;
 		if (p->p_stat == SZOMB) {
 			/* charge childs scheduling cpu usage to parent */
 			if (curproc->p_pid != 1) {
 				curproc->p_estcpu = min(curproc->p_estcpu +
 				    p->p_estcpu, UCHAR_MAX);
 			}
 
 			retval[0] = p->p_pid;
 #ifdef COMPAT_43
 			if (compat)
 				retval[1] = p->p_xstat;
 			else
 #endif
 			if (uap->status) {
 				status = p->p_xstat;	/* convert to int */
 				if ((error = copyout((caddr_t)&status,
 				    (caddr_t)uap->status, sizeof(status))))
 					return (error);
 			}
 			if (uap->rusage && (error = copyout((caddr_t)p->p_ru,
 			    (caddr_t)uap->rusage, sizeof (struct rusage))))
 				return (error);
 			/*
 			 * If we got the child via a ptrace 'attach',
 			 * we need to give it back to the old parent.
 			 */
 			if (p->p_oppid && (t = pfind(p->p_oppid))) {
 				p->p_oppid = 0;
 				proc_reparent(p, t);
 				psignal(t, SIGCHLD);
 				wakeup((caddr_t)t);
 				return (0);
 			}
 			p->p_xstat = 0;
 			ruadd(&q->p_stats->p_cru, p->p_ru);
 			FREE(p->p_ru, M_ZOMBIE);
 			p->p_ru = NULL;
 
 			/*
 			 * Decrement the count of procs running with this uid.
 			 */
 			(void)chgproccnt(p->p_cred->p_ruid, -1);
 
 			/*
 			 * Release reference to text vnode
 			 */
 			if (p->p_textvp)
 				vrele(p->p_textvp);
 
 			/*
 			 * Free up credentials.
 			 */
 			if (--p->p_cred->p_refcnt == 0) {
 				crfree(p->p_cred->pc_ucred);
 				FREE(p->p_cred, M_SUBPROC);
 				p->p_cred = NULL;
 			}
 
 			/*
 			 * Finally finished with old proc entry.
 			 * Unlink it from its process group and free it.
 			 */
 			leavepgrp(p);
 			LIST_REMOVE(p, p_list);	/* off zombproc */
 			LIST_REMOVE(p, p_sibling);
 
 			/*
 			 * Give machine-dependent layer a chance
 			 * to free anything that cpu_exit couldn't
 			 * release while still running in process context.
 			 */
 			cpu_wait(p);
 			FREE(p, M_PROC);
 			nprocs--;
 			return (0);
 		}
 		if (p->p_stat == SSTOP && (p->p_flag & P_WAITED) == 0 &&
 		    (p->p_flag & P_TRACED || uap->options & WUNTRACED)) {
 			p->p_flag |= P_WAITED;
 			retval[0] = p->p_pid;
 #ifdef COMPAT_43
 			if (compat) {
 				retval[1] = W_STOPCODE(p->p_xstat);
 				error = 0;
 			} else
 #endif
 			if (uap->status) {
 				status = W_STOPCODE(p->p_xstat);
 				error = copyout((caddr_t)&status,
 					(caddr_t)uap->status, sizeof(status));
 			} else
 				error = 0;
 			return (error);
 		}
 	}
 	if (nfound == 0)
 		return (ECHILD);
 	if (uap->options & WNOHANG) {
 		retval[0] = 0;
 		return (0);
 	}
 	if ((error = tsleep((caddr_t)q, PWAIT | PCATCH, "wait", 0)))
 		return (error);
 	goto loop;
 }
 
 /*
  * make process 'parent' the new parent of process 'child'.
  */
 void
 proc_reparent(child, parent)
 	register struct proc *child;
 	register struct proc *parent;
 {
 
 	if (child->p_pptr == parent)
 		return;
 
 	LIST_REMOVE(child, p_sibling);
 	LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
 	child->p_pptr = parent;
 }
Index: head/sys/vm/pmap.h
===================================================================
--- head/sys/vm/pmap.h	(revision 17333)
+++ head/sys/vm/pmap.h	(revision 17334)
@@ -1,131 +1,131 @@
 /*
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)pmap.h	8.1 (Berkeley) 6/11/93
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Author: Avadis Tevanian, Jr.
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
- * $Id: pmap.h,v 1.11 1996/06/17 03:35:34 dyson Exp $
+ * $Id: pmap.h,v 1.13 1996/07/27 04:22:12 dyson Exp $
  */
 
 /*
  *	Machine address mapping definitions -- machine-independent
  *	section.  [For machine-dependent section, see "machine/pmap.h".]
  */
 
 #ifndef	_PMAP_VM_
 #define	_PMAP_VM_
 
 /*
  * Each machine dependent implementation is expected to
  * keep certain statistics.  They may do this anyway they
  * so choose, but are expected to return the statistics
  * in the following structure.
  */
 struct pmap_statistics {
 	long resident_count;	/* # of pages mapped (total) */
 	long wired_count;	/* # of pages wired */
 };
 typedef struct pmap_statistics *pmap_statistics_t;
 
 #include <machine/pmap.h>
 
 #ifdef KERNEL
 void		 pmap_change_wiring __P((pmap_t, vm_offset_t, boolean_t));
 void		 pmap_clear_modify __P((vm_offset_t pa));
 void		 pmap_clear_reference __P((vm_offset_t pa));
 void		 pmap_copy __P((pmap_t, pmap_t, vm_offset_t, vm_size_t,
 		    vm_offset_t));
 void		 pmap_copy_page __P((vm_offset_t, vm_offset_t));
 void		 pmap_destroy __P((pmap_t));
 void		 pmap_enter __P((pmap_t, vm_offset_t, vm_offset_t, vm_prot_t,
 		    boolean_t));
 vm_offset_t	 pmap_extract __P((pmap_t, vm_offset_t));
 void		 pmap_growkernel __P((vm_offset_t));
 void		 pmap_init __P((vm_offset_t, vm_offset_t));
-int		 pmap_tc_modified __P((vm_page_t m));
-int		 pmap_tc_referenced __P((vm_offset_t pa));
+boolean_t	 pmap_is_modified __P((vm_offset_t pa));
+boolean_t	 pmap_is_referenced __P((vm_offset_t pa));
+boolean_t	 pmap_ts_referenced __P((vm_offset_t pa));
 void		 pmap_kenter __P((vm_offset_t, vm_offset_t));
 void		 pmap_kremove __P((vm_offset_t));
 vm_offset_t	 pmap_map __P((vm_offset_t, vm_offset_t, vm_offset_t, int));
 void		 pmap_object_init_pt __P((pmap_t pmap, vm_offset_t addr,
 		    vm_object_t object, vm_pindex_t pindex, vm_offset_t size,
 		    int pagelimit));
 boolean_t	 pmap_page_exists __P((pmap_t, vm_offset_t));
-void		 pmap_page_protect __P((vm_page_t, vm_prot_t));
+void		 pmap_page_protect __P((vm_offset_t, vm_prot_t));
 void		 pmap_pageable __P((pmap_t, vm_offset_t, vm_offset_t,
 		    boolean_t));
 vm_offset_t	 pmap_phys_address __P((int));
 void		 pmap_pinit __P((pmap_t));
 void		 pmap_protect __P((pmap_t, vm_offset_t, vm_offset_t,
 		    vm_prot_t));
 void		 pmap_qenter __P((vm_offset_t, vm_page_t *, int));
 void		 pmap_qremove __P((vm_offset_t, int));
 void		 pmap_reference __P((pmap_t));
 void		 pmap_release __P((pmap_t));
 void		 pmap_remove __P((pmap_t, vm_offset_t, vm_offset_t));
-void		 pmap_remove_pages __P((pmap_t, vm_offset_t, vm_offset_t));
 void		 pmap_zero_page __P((vm_offset_t));
 void		 pmap_prefault __P((pmap_t pmap, vm_offset_t addra,
 		    vm_map_entry_t entry, vm_object_t object));
 int		 pmap_mincore __P((pmap_t pmap, vm_offset_t addr));
 
 #endif /* KERNEL */
 
 #endif /* _PMAP_VM_ */
Index: head/sys/vm/swap_pager.c
===================================================================
--- head/sys/vm/swap_pager.c	(revision 17333)
+++ head/sys/vm/swap_pager.c	(revision 17334)
@@ -1,1670 +1,1663 @@
 /*
  * Copyright (c) 1994 John S. Dyson
  * Copyright (c) 1990 University of Utah.
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
  *
  *	@(#)swap_pager.c	8.9 (Berkeley) 3/21/94
- * $Id: swap_pager.c,v 1.68 1996/06/10 04:58:48 dyson Exp $
+ * $Id: swap_pager.c,v 1.69 1996/07/27 03:23:51 dyson Exp $
  */
 
 /*
  * Quick hack to page to dedicated partition(s).
  * TODO:
  *	Add multiprocessor locks
  *	Deal with async writes in a better fashion
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/vmmeter.h>
 
 #include <miscfs/specfs/specdev.h>
 #include <sys/rlist.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_pageout.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 
 #ifndef NPENDINGIO
 #define NPENDINGIO	10
 #endif
 
 static int nswiodone;
 int swap_pager_full;
 extern int vm_swap_size;
 static int no_swap_space = 1;
 struct rlisthdr swaplist;
 
 #define MAX_PAGEOUT_CLUSTER 16
 
 TAILQ_HEAD(swpclean, swpagerclean);
 
 typedef struct swpagerclean *swp_clean_t;
 
 static struct swpagerclean {
 	TAILQ_ENTRY(swpagerclean) spc_list;
 	int spc_flags;
 	struct buf *spc_bp;
 	vm_object_t spc_object;
 	vm_offset_t spc_kva;
 	int spc_count;
 	vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
 } swcleanlist[NPENDINGIO];
 
 
 /* spc_flags values */
 #define SPC_ERROR	0x01
 
 #define SWB_EMPTY (-1)
 
 /* list of completed page cleans */
 static struct swpclean swap_pager_done;
 
 /* list of pending page cleans */
 static struct swpclean swap_pager_inuse;
 
 /* list of free pager clean structs */
 static struct swpclean swap_pager_free;
 int swap_pager_free_count;
 
 /* list of "named" anon region objects */
 static struct pagerlst swap_pager_object_list;
 
 /* list of "unnamed" anon region objects */
 struct pagerlst swap_pager_un_object_list;
 
 #define	SWAP_FREE_NEEDED	0x1	/* need a swap block */
 #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2
 static int swap_pager_needflags;
 
 static struct pagerlst *swp_qs[] = {
 	&swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0
 };
 
 /*
  * pagerops for OBJT_SWAP - "swap pager".
  */
 static vm_object_t
 		swap_pager_alloc __P((void *handle, vm_size_t size,
 				      vm_prot_t prot, vm_ooffset_t offset));
 static void	swap_pager_dealloc __P((vm_object_t object));
 static boolean_t
 		swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex,
 					int *before, int *after));
 static int	swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
 static void	swap_pager_init __P((void));
 static void	swap_pager_sync __P((void));
 
 struct pagerops swappagerops = {
 	swap_pager_init,
 	swap_pager_alloc,
 	swap_pager_dealloc,
 	swap_pager_getpages,
 	swap_pager_putpages,
 	swap_pager_haspage,
 	swap_pager_sync
 };
 
 static int npendingio = NPENDINGIO;
 static int dmmin;
 int dmmax;
 
 static __pure int
 		swap_pager_block_index __P((vm_pindex_t pindex)) __pure2;
 static __pure int
 		swap_pager_block_offset __P((vm_pindex_t pindex)) __pure2;
 static daddr_t *swap_pager_diskaddr __P((vm_object_t object,
 					  vm_pindex_t pindex, int *valid));
 static void	swap_pager_finish __P((swp_clean_t spc));
 static void	swap_pager_freepage __P((vm_page_t m));
 static void	swap_pager_free_swap __P((vm_object_t object));
 static void	swap_pager_freeswapspace __P((vm_object_t object,
 					      unsigned int from,
 					      unsigned int to));
 static int	swap_pager_getswapspace __P((vm_object_t object,
 					     unsigned int amount,
 					     daddr_t *rtval));
 static void	swap_pager_iodone __P((struct buf *));
 static void	swap_pager_iodone1 __P((struct buf *bp));
 static void	swap_pager_reclaim __P((void));
 static void	swap_pager_ridpages __P((vm_page_t *m, int count,
 					 int reqpage));
 static void	swap_pager_setvalid __P((vm_object_t object,
 					 vm_offset_t offset, int valid));
 static void	swapsizecheck __P((void));
 
 #define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE)))
 
 static inline void
 swapsizecheck()
 {
 	if (vm_swap_size < 128 * btodb(PAGE_SIZE)) {
 		if (swap_pager_full == 0)
 			printf("swap_pager: out of swap space\n");
 		swap_pager_full = 1;
 	} else if (vm_swap_size > 192 * btodb(PAGE_SIZE))
 		swap_pager_full = 0;
 }
 
 static void
 swap_pager_init()
 {
 	TAILQ_INIT(&swap_pager_object_list);
 	TAILQ_INIT(&swap_pager_un_object_list);
 
 	/*
 	 * Initialize clean lists
 	 */
 	TAILQ_INIT(&swap_pager_inuse);
 	TAILQ_INIT(&swap_pager_done);
 	TAILQ_INIT(&swap_pager_free);
 	swap_pager_free_count = 0;
 
 	/*
 	 * Calculate the swap allocation constants.
 	 */
 	dmmin = PAGE_SIZE / DEV_BSIZE;
 	dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2;
 }
 
 void
 swap_pager_swap_init()
 {
 	swp_clean_t spc;
 	struct buf *bp;
 	int i;
 
 	/*
 	 * kva's are allocated here so that we dont need to keep doing
 	 * kmem_alloc pageables at runtime
 	 */
 	for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) {
 		spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER);
 		if (!spc->spc_kva) {
 			break;
 		}
 		spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL);
 		if (!spc->spc_bp) {
 			kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
 			break;
 		}
 		spc->spc_flags = 0;
 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
 		swap_pager_free_count++;
 	}
 }
 
 int
 swap_pager_swp_alloc(object, wait)
 	vm_object_t object;
 	int wait;
 {
 	sw_blk_t swb;
 	int nblocks;
 	int i, j;
 
 	nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES;
 	swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait);
 	if (swb == NULL)
 		return 1;
 
 	for (i = 0; i < nblocks; i++) {
 		swb[i].swb_valid = 0;
 		swb[i].swb_locked = 0;
 		for (j = 0; j < SWB_NPAGES; j++)
 			swb[i].swb_block[j] = SWB_EMPTY;
 	}
 
 	object->un_pager.swp.swp_nblocks = nblocks;
 	object->un_pager.swp.swp_allocsize = 0;
 	object->un_pager.swp.swp_blocks = swb;
 	object->un_pager.swp.swp_poip = 0;
 
 	if (object->handle != NULL) {
 		TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list);
 	} else {
 		TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list);
 	}
 
 	return 0;
 }
 
 /*
  * Allocate an object and associated resources.
  * Note that if we are called from the pageout daemon (handle == NULL)
  * we should not wait for memory as it could resulting in deadlock.
  */
 static vm_object_t
 swap_pager_alloc(handle, size, prot, offset)
 	void *handle;
 	register vm_size_t size;
 	vm_prot_t prot;
 	vm_ooffset_t offset;
 {
 	vm_object_t object;
 
 	/*
 	 * If this is a "named" anonymous region, look it up and use the
 	 * object if it exists, otherwise allocate a new one.
 	 */
 	if (handle) {
 		object = vm_pager_object_lookup(&swap_pager_object_list, handle);
 		if (object != NULL) {
 			vm_object_reference(object);
 		} else {
 			/*
 			 * XXX - there is a race condition here. Two processes
 			 * can request the same named object simultaneuously,
 			 * and if one blocks for memory, the result is a disaster.
 			 * Probably quite rare, but is yet another reason to just
 			 * rip support of "named anonymous regions" out altogether.
 			 */
 			object = vm_object_allocate(OBJT_SWAP,
 				OFF_TO_IDX(offset + PAGE_MASK) + size);
 			object->handle = handle;
 			(void) swap_pager_swp_alloc(object, M_WAITOK);
 		}
 	} else {
 		object = vm_object_allocate(OBJT_SWAP,
 			OFF_TO_IDX(offset + PAGE_MASK) + size);
 		(void) swap_pager_swp_alloc(object, M_WAITOK);
 	}
 
 	return (object);
 }
 
 /*
  * returns disk block associated with pager and offset
  * additionally, as a side effect returns a flag indicating
  * if the block has been written
  */
 
 inline static daddr_t *
 swap_pager_diskaddr(object, pindex, valid)
 	vm_object_t object;
 	vm_pindex_t pindex;
 	int *valid;
 {
 	register sw_blk_t swb;
 	int ix;
 
 	if (valid)
 		*valid = 0;
 	ix = pindex / SWB_NPAGES;
 	if ((ix >= object->un_pager.swp.swp_nblocks) ||
 	    (pindex >= object->size)) {
 		return (FALSE);
 	}
 	swb = &object->un_pager.swp.swp_blocks[ix];
 	ix = pindex % SWB_NPAGES;
 	if (valid)
 		*valid = swb->swb_valid & (1 << ix);
 	return &swb->swb_block[ix];
 }
 
 /*
  * Utility routine to set the valid (written) bit for
  * a block associated with a pager and offset
  */
 static void
 swap_pager_setvalid(object, offset, valid)
 	vm_object_t object;
 	vm_offset_t offset;
 	int valid;
 {
 	register sw_blk_t swb;
 	int ix;
 
 	ix = offset / SWB_NPAGES;
 	if (ix >= object->un_pager.swp.swp_nblocks)
 		return;
 
 	swb = &object->un_pager.swp.swp_blocks[ix];
 	ix = offset % SWB_NPAGES;
 	if (valid)
 		swb->swb_valid |= (1 << ix);
 	else
 		swb->swb_valid &= ~(1 << ix);
 	return;
 }
 
 /*
  * this routine allocates swap space with a fragmentation
  * minimization policy.
  */
 static int
 swap_pager_getswapspace(object, amount, rtval)
 	vm_object_t object;
 	unsigned int amount;
 	daddr_t *rtval;
 {
 	unsigned location;
 	vm_swap_size -= amount;
 	if (!rlist_alloc(&swaplist, amount, &location)) {
 		vm_swap_size += amount;
 		return 0;
 	} else {
 		swapsizecheck();
 		object->un_pager.swp.swp_allocsize += amount;
 		*rtval = location;
 		return 1;
 	}
 }
 
 /*
  * this routine frees swap space with a fragmentation
  * minimization policy.
  */
 static void
 swap_pager_freeswapspace(object, from, to)
 	vm_object_t object;
 	unsigned int from;
 	unsigned int to;
 {
 	rlist_free(&swaplist, from, to);
 	vm_swap_size += (to - from) + 1;
 	object->un_pager.swp.swp_allocsize -= (to - from) + 1;
 	swapsizecheck();
 }
 /*
  * this routine frees swap blocks from a specified pager
  */
 void
 swap_pager_freespace(object, start, size)
 	vm_object_t object;
 	vm_pindex_t start;
 	vm_size_t size;
 {
 	vm_pindex_t i;
 	int s;
 
 	s = splbio();
 	for (i = start; i < start + size; i += 1) {
 		int valid;
 		daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
 
 		if (addr && *addr != SWB_EMPTY) {
 			swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1);
 			if (valid) {
 				swap_pager_setvalid(object, i, 0);
 			}
 			*addr = SWB_EMPTY;
 		}
 	}
 	splx(s);
 }
 
 /*
  * same as freespace, but don't free, just force a DMZ next time
  */
 void
 swap_pager_dmzspace(object, start, size)
 	vm_object_t object;
 	vm_pindex_t start;
 	vm_size_t size;
 {
 	vm_pindex_t i;
 	int s;
 
 	s = splbio();
 	for (i = start; i < start + size; i += 1) {
 		int valid;
 		daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
 
 		if (addr && *addr != SWB_EMPTY) {
 			if (valid) {
 				swap_pager_setvalid(object, i, 0);
 			}
 		}
 	}
 	splx(s);
 }
 
 static void
 swap_pager_free_swap(object)
 	vm_object_t object;
 {
 	register int i, j;
 	register sw_blk_t swb;
 	int first_block=0, block_count=0;
 	int s;
 	/*
 	 * Free left over swap blocks
 	 */
 	s = splbio();
 	for (i = 0, swb = object->un_pager.swp.swp_blocks;
 	    i < object->un_pager.swp.swp_nblocks; i++, swb++) {
 		for (j = 0; j < SWB_NPAGES; j++) {
 			if (swb->swb_block[j] != SWB_EMPTY) {
 				/*
 				 * initially the length of the run is zero
 				 */
 				if (block_count == 0) {
 					first_block = swb->swb_block[j];
 					block_count = btodb(PAGE_SIZE);
 					swb->swb_block[j] = SWB_EMPTY;
 				/*
 				 * if the new block can be included into the current run
 				 */
 				} else if (swb->swb_block[j] == first_block + block_count) {
 					block_count += btodb(PAGE_SIZE);
 					swb->swb_block[j] = SWB_EMPTY;
 				/*
 				 * terminate the previous run, and start a new one
 				 */
 				} else {
 					swap_pager_freeswapspace(object, first_block,
 				   	 (unsigned) first_block + block_count - 1);
 					first_block = swb->swb_block[j];
 					block_count = btodb(PAGE_SIZE);
 					swb->swb_block[j] = SWB_EMPTY;
 				}
 			}
 		}
 	}
 
 	if (block_count) {
 		swap_pager_freeswapspace(object, first_block,
 		   	 (unsigned) first_block + block_count - 1);
 	}
 	splx(s);
 }
 
 
 /*
  * swap_pager_reclaim frees up over-allocated space from all pagers
  * this eliminates internal fragmentation due to allocation of space
  * for segments that are never swapped to. It has been written so that
  * it does not block until the rlist_free operation occurs; it keeps
  * the queues consistant.
  */
 
 /*
  * Maximum number of blocks (pages) to reclaim per pass
  */
 #define MAXRECLAIM 128
 
 static void
 swap_pager_reclaim()
 {
 	vm_object_t object;
 	int i, j, k;
 	int s;
 	int reclaimcount;
 	static struct {
 		int address;
 		vm_object_t object;
 	} reclaims[MAXRECLAIM];
 	static int in_reclaim;
 
 	/*
 	 * allow only one process to be in the swap_pager_reclaim subroutine
 	 */
 	s = splbio();
 	if (in_reclaim) {
 		tsleep(&in_reclaim, PSWP, "swrclm", 0);
 		splx(s);
 		return;
 	}
 	in_reclaim = 1;
 	reclaimcount = 0;
 
 	/* for each pager queue */
 	for (k = 0; swp_qs[k]; k++) {
 
 		object = TAILQ_FIRST(swp_qs[k]);
 		while (object && (reclaimcount < MAXRECLAIM)) {
 
 			/*
 			 * see if any blocks associated with a pager has been
 			 * allocated but not used (written)
 			 */
 			if (object->paging_in_progress == 0) {
 				for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) {
 					sw_blk_t swb = &object->un_pager.swp.swp_blocks[i];
 
 					if (swb->swb_locked)
 						continue;
 					for (j = 0; j < SWB_NPAGES; j++) {
 						if (swb->swb_block[j] != SWB_EMPTY &&
 						    (swb->swb_valid & (1 << j)) == 0) {
 							reclaims[reclaimcount].address = swb->swb_block[j];
 							reclaims[reclaimcount++].object = object;
 							swb->swb_block[j] = SWB_EMPTY;
 							if (reclaimcount >= MAXRECLAIM)
 								goto rfinished;
 						}
 					}
 				}
 			}
 			object = TAILQ_NEXT(object, pager_object_list);
 		}
 	}
 
 rfinished:
 
 	/*
 	 * free the blocks that have been added to the reclaim list
 	 */
 	for (i = 0; i < reclaimcount; i++) {
 		swap_pager_freeswapspace(reclaims[i].object,
 		    reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1);
 	}
 	splx(s);
 	in_reclaim = 0;
 	wakeup(&in_reclaim);
 }
 
 
 /*
  * swap_pager_copy copies blocks from one pager to another and
  * destroys the source pager
  */
 
 void
 swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset, offset)
 	vm_object_t srcobject;
 	vm_pindex_t srcoffset;
 	vm_object_t dstobject;
 	vm_pindex_t dstoffset;
 	vm_pindex_t offset;
 {
 	vm_pindex_t i;
 	int origsize;
 	int s;
 
 	if (vm_swap_size)
 		no_swap_space = 0;
 
 	origsize = srcobject->un_pager.swp.swp_allocsize;
 
 	/*
 	 * remove the source object from the swap_pager internal queue
 	 */
 	if (srcobject->handle == NULL) {
 		TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list);
 	} else {
 		TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list);
 	}
 
 	s = splbio();
 	while (srcobject->un_pager.swp.swp_poip) {
 		tsleep(srcobject, PVM, "spgout", 0);
 	}
 	splx(s);
 
 	/*
 	 * clean all of the pages that are currently active and finished
 	 */
 	swap_pager_sync();
 
 	s = splbio();
 	/*
 	 * transfer source to destination
 	 */
 	for (i = 0; i < dstobject->size; i += 1) {
 		int srcvalid, dstvalid;
 		daddr_t *srcaddrp = swap_pager_diskaddr(srcobject, i + offset + srcoffset,
 						    &srcvalid);
 		daddr_t *dstaddrp;
 
 		/*
 		 * see if the source has space allocated
 		 */
 		if (srcaddrp && *srcaddrp != SWB_EMPTY) {
 			/*
 			 * if the source is valid and the dest has no space,
 			 * then copy the allocation from the srouce to the
 			 * dest.
 			 */
 			if (srcvalid) {
 				dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset,
 							&dstvalid);
 				/*
 				 * if the dest already has a valid block,
 				 * deallocate the source block without
 				 * copying.
 				 */
 				if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
 					swap_pager_freeswapspace(dstobject, *dstaddrp,
 						*dstaddrp + btodb(PAGE_SIZE) - 1);
 					*dstaddrp = SWB_EMPTY;
 				}
 				if (dstaddrp && *dstaddrp == SWB_EMPTY) {
 					*dstaddrp = *srcaddrp;
 					*srcaddrp = SWB_EMPTY;
 					dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE);
 					srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE);
 					swap_pager_setvalid(dstobject, i + dstoffset, 1);
 				}
 			}
 			/*
 			 * if the source is not empty at this point, then
 			 * deallocate the space.
 			 */
 			if (*srcaddrp != SWB_EMPTY) {
 				swap_pager_freeswapspace(srcobject, *srcaddrp,
 					*srcaddrp + btodb(PAGE_SIZE) - 1);
 				*srcaddrp = SWB_EMPTY;
 			}
 		}
 	}
 	splx(s);
 
 	/*
 	 * Free left over swap blocks
 	 */
 	swap_pager_free_swap(srcobject);
 
 	if (srcobject->un_pager.swp.swp_allocsize) {
 		printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n",
 		    srcobject->un_pager.swp.swp_allocsize, origsize);
 	}
 
 	free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA);
 	srcobject->un_pager.swp.swp_blocks = NULL;
 
 	return;
 }
 
 static void
 swap_pager_dealloc(object)
 	vm_object_t object;
 {
 	int s;
 
 	/*
 	 * Remove from list right away so lookups will fail if we block for
 	 * pageout completion.
 	 */
 	if (object->handle == NULL) {
 		TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list);
 	} else {
 		TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list);
 	}
 
 	/*
 	 * Wait for all pageouts to finish and remove all entries from
 	 * cleaning list.
 	 */
 
 	s = splbio();
 	while (object->un_pager.swp.swp_poip) {
 		tsleep(object, PVM, "swpout", 0);
 	}
 	splx(s);
 
 
 	swap_pager_sync();
 
 	/*
 	 * Free left over swap blocks
 	 */
 	swap_pager_free_swap(object);
 
 	if (object->un_pager.swp.swp_allocsize) {
 		printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n",
 		    object->un_pager.swp.swp_allocsize);
 	}
 	/*
 	 * Free swap management resources
 	 */
 	free(object->un_pager.swp.swp_blocks, M_VMPGDATA);
 	object->un_pager.swp.swp_blocks = NULL;
 }
 
 static inline __pure int
 swap_pager_block_index(pindex)
 	vm_pindex_t pindex;
 {
 	return (pindex / SWB_NPAGES);
 }
 
 static inline __pure int
 swap_pager_block_offset(pindex)
 	vm_pindex_t pindex;
 {
 	return (pindex % SWB_NPAGES);
 }
 
 /*
  * swap_pager_haspage returns TRUE if the pager has data that has
  * been written out.
  */
 static boolean_t
 swap_pager_haspage(object, pindex, before, after)
 	vm_object_t object;
 	vm_pindex_t pindex;
 	int *before;
 	int *after;
 {
 	register sw_blk_t swb;
 	int ix;
 
 	if (before != NULL)
 		*before = 0;
 	if (after != NULL)
 		*after = 0;
 	ix = pindex / SWB_NPAGES;
 	if (ix >= object->un_pager.swp.swp_nblocks) {
 		return (FALSE);
 	}
 	swb = &object->un_pager.swp.swp_blocks[ix];
 	ix = pindex % SWB_NPAGES;
 
 	if (swb->swb_block[ix] != SWB_EMPTY) {
 
 		if (swb->swb_valid & (1 << ix)) {
 			int tix;
 			if (before) {
 				for(tix = ix - 1; tix >= 0; --tix) {
 					if ((swb->swb_valid & (1 << tix)) == 0)
 						break;
 					if ((swb->swb_block[tix] +
 						(ix - tix) * (PAGE_SIZE/DEV_BSIZE)) !=
 						swb->swb_block[ix])
 						break;
 					(*before)++;
 				}
 			}
 
 			if (after) {
 				for(tix = ix + 1; tix < SWB_NPAGES; tix++) {
 					if ((swb->swb_valid & (1 << tix)) == 0)
 						break;
 					if ((swb->swb_block[tix] -
 						(tix - ix) * (PAGE_SIZE/DEV_BSIZE)) !=
 						swb->swb_block[ix])
 						break;
 					(*after)++;
 				}
 			}
 
 			return TRUE;
 		}
 	}
 	return (FALSE);
 }
 
 /*
  * swap_pager_freepage is a convienience routine that clears the busy
  * bit and deallocates a page.
  */
 static void
 swap_pager_freepage(m)
 	vm_page_t m;
 {
 	PAGE_WAKEUP(m);
 	vm_page_free(m);
 }
 
 /*
  * swap_pager_ridpages is a convienience routine that deallocates all
  * but the required page.  this is usually used in error returns that
  * need to invalidate the "extra" readahead pages.
  */
 static void
 swap_pager_ridpages(m, count, reqpage)
 	vm_page_t *m;
 	int count;
 	int reqpage;
 {
 	int i;
 
 	for (i = 0; i < count; i++)
 		if (i != reqpage)
 			swap_pager_freepage(m[i]);
 }
 
 /*
  * swap_pager_iodone1 is the completion routine for both reads and async writes
  */
 static void
 swap_pager_iodone1(bp)
 	struct buf *bp;
 {
 	bp->b_flags |= B_DONE;
 	bp->b_flags &= ~B_ASYNC;
 	wakeup(bp);
 }
 
 static int
 swap_pager_getpages(object, m, count, reqpage)
 	vm_object_t object;
 	vm_page_t *m;
 	int count, reqpage;
 {
 	register struct buf *bp;
 	sw_blk_t swb[count];
 	register int s;
 	int i;
 	boolean_t rv;
 	vm_offset_t kva, off[count];
 	swp_clean_t spc;
 	vm_pindex_t paging_offset;
 	int reqaddr[count];
 	int sequential;
 
 	int first, last;
 	int failed;
 	int reqdskregion;
 
 	object = m[reqpage]->object;
 	paging_offset = OFF_TO_IDX(object->paging_offset);
 	sequential = (m[reqpage]->pindex == (object->last_read + 1));
 
 	for (i = 0; i < count; i++) {
 		vm_pindex_t fidx = m[i]->pindex + paging_offset;
 		int ix = swap_pager_block_index(fidx);
 
 		if (ix >= object->un_pager.swp.swp_nblocks) {
 			int j;
 
 			if (i <= reqpage) {
 				swap_pager_ridpages(m, count, reqpage);
 				return (VM_PAGER_FAIL);
 			}
 			for (j = i; j < count; j++) {
 				swap_pager_freepage(m[j]);
 			}
 			count = i;
 			break;
 		}
 		swb[i] = &object->un_pager.swp.swp_blocks[ix];
 		off[i] = swap_pager_block_offset(fidx);
 		reqaddr[i] = swb[i]->swb_block[off[i]];
 	}
 
 	/* make sure that our required input request is existant */
 
 	if (reqaddr[reqpage] == SWB_EMPTY ||
 	    (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
 		swap_pager_ridpages(m, count, reqpage);
 		return (VM_PAGER_FAIL);
 	}
 	reqdskregion = reqaddr[reqpage] / dmmax;
 
 	/*
 	 * search backwards for the first contiguous page to transfer
 	 */
 	failed = 0;
 	first = 0;
 	for (i = reqpage - 1; i >= 0; --i) {
 		if (sequential || failed || (reqaddr[i] == SWB_EMPTY) ||
 		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
 		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
 		    ((reqaddr[i] / dmmax) != reqdskregion)) {
 			failed = 1;
 			swap_pager_freepage(m[i]);
 			if (first == 0)
 				first = i + 1;
 		}
 	}
 	/*
 	 * search forwards for the last contiguous page to transfer
 	 */
 	failed = 0;
 	last = count;
 	for (i = reqpage + 1; i < count; i++) {
 		if (failed || (reqaddr[i] == SWB_EMPTY) ||
 		    (swb[i]->swb_valid & (1 << off[i])) == 0 ||
 		    (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
 		    ((reqaddr[i] / dmmax) != reqdskregion)) {
 			failed = 1;
 			swap_pager_freepage(m[i]);
 			if (last == count)
 				last = i;
 		}
 	}
 
 	count = last;
 	if (first != 0) {
 		for (i = first; i < count; i++) {
 			m[i - first] = m[i];
 			reqaddr[i - first] = reqaddr[i];
 			off[i - first] = off[i];
 		}
 		count -= first;
 		reqpage -= first;
 	}
 	++swb[reqpage]->swb_locked;
 
 	/*
 	 * at this point: "m" is a pointer to the array of vm_page_t for
 	 * paging I/O "count" is the number of vm_page_t entries represented
 	 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index
 	 * into "m" for the page actually faulted
 	 */
 
 	spc = NULL;
 	if ((count == 1) && ((spc = TAILQ_FIRST(&swap_pager_free)) != NULL)) {
 		TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
 		swap_pager_free_count--;
 		kva = spc->spc_kva;
 		bp = spc->spc_bp;
 		bzero(bp, sizeof *bp);
 		bp->b_spc = spc;
 		bp->b_vnbufs.le_next = NOLIST;
 	} else {
 		/*
 		 * Get a swap buffer header to perform the IO
 		 */
 		bp = getpbuf();
 		kva = (vm_offset_t) bp->b_data;
 	}
 
 	/*
 	 * map our page(s) into kva for input
 	 */
 	pmap_qenter(kva, m, count);
 
 	bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING;
 	bp->b_iodone = swap_pager_iodone1;
 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
 	crhold(bp->b_rcred);
 	crhold(bp->b_wcred);
 	bp->b_un.b_addr = (caddr_t) kva;
 	bp->b_blkno = reqaddr[0];
 	bp->b_bcount = PAGE_SIZE * count;
 	bp->b_bufsize = PAGE_SIZE * count;
 
 	pbgetvp(swapdev_vp, bp);
 
 	cnt.v_swapin++;
 	cnt.v_swappgsin += count;
 	/*
 	 * perform the I/O
 	 */
 	VOP_STRATEGY(bp);
 
 	/*
 	 * wait for the sync I/O to complete
 	 */
 	s = splbio();
 	while ((bp->b_flags & B_DONE) == 0) {
 		if (tsleep(bp, PVM, "swread", hz*20)) {
 			printf("swap_pager: indefinite wait buffer: device: %d, blkno: %d, size: %d\n",
 				bp->b_dev, bp->b_blkno, bp->b_bcount);
 		}
 	}
 
 	if (bp->b_flags & B_ERROR) {
 		printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n",
 		    bp->b_blkno, bp->b_bcount, bp->b_error);
 		rv = VM_PAGER_ERROR;
 	} else {
 		rv = VM_PAGER_OK;
 	}
 
 	/*
 	 * relpbuf does this, but we maintain our own buffer list also...
 	 */
 	if (bp->b_vp)
 		pbrelvp(bp);
 
 	splx(s);
 	swb[reqpage]->swb_locked--;
 
 	/*
 	 * remove the mapping for kernel virtual
 	 */
 	pmap_qremove(kva, count);
 
 	if (spc) {
 		m[reqpage]->object->last_read = m[reqpage]->pindex;
 		if (bp->b_flags & B_WANTED)
 			wakeup(bp);
 		/*
 		 * if we have used an spc, we need to free it.
 		 */
 		if (bp->b_rcred != NOCRED)
 			crfree(bp->b_rcred);
 		if (bp->b_wcred != NOCRED)
 			crfree(bp->b_wcred);
 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
 		swap_pager_free_count++;
 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
 			wakeup(&swap_pager_free);
 		}
 		if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
 			pagedaemon_wakeup();
 		swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
 		if (rv == VM_PAGER_OK) {
-			pmap_tc_modified(m[reqpage]);
+			pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage]));
 			m[reqpage]->valid = VM_PAGE_BITS_ALL;
 			m[reqpage]->dirty = 0;
 		}
 	} else {
 		/*
 		 * release the physical I/O buffer
 		 */
 		relpbuf(bp);
 		/*
 		 * finish up input if everything is ok
 		 */
 		if (rv == VM_PAGER_OK) {
 			for (i = 0; i < count; i++) {
-				pmap_tc_modified(m[i]);
+				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
 				m[i]->dirty = 0;
 				m[i]->flags &= ~PG_ZERO;
 				if (i != reqpage) {
 					/*
 					 * whether or not to leave the page
 					 * activated is up in the air, but we
 					 * should put the page on a page queue
 					 * somewhere. (it already is in the
 					 * object). After some emperical
 					 * results, it is best to deactivate
 					 * the readahead pages.
 					 */
 					vm_page_deactivate(m[i]);
 
 					/*
 					 * just in case someone was asking for
 					 * this page we now tell them that it
 					 * is ok to use
 					 */
 					m[i]->valid = VM_PAGE_BITS_ALL;
 					PAGE_WAKEUP(m[i]);
 				}
 			}
 
 			m[reqpage]->object->last_read = m[count-1]->pindex;
 
 			/*
 			 * If we're out of swap space, then attempt to free
 			 * some whenever multiple pages are brought in. We
 			 * must set the dirty bits so that the page contents
 			 * will be preserved.
 			 */
 			if (SWAPLOW) {
 				for (i = 0; i < count; i++) {
 					m[i]->dirty = VM_PAGE_BITS_ALL;
 				}
 				swap_pager_freespace(object, m[0]->pindex + paging_offset, count);
 			}
 		} else {
 			swap_pager_ridpages(m, count, reqpage);
 		}
 	}
 	return (rv);
 }
 
 int
 swap_pager_putpages(object, m, count, sync, rtvals)
 	vm_object_t object;
 	vm_page_t *m;
 	int count;
 	boolean_t sync;
 	int *rtvals;
 {
 	register struct buf *bp;
 	sw_blk_t swb[count];
 	register int s;
 	int i, j, ix;
 	boolean_t rv;
 	vm_offset_t kva, off, fidx;
 	swp_clean_t spc;
 	vm_pindex_t paging_pindex;
 	int reqaddr[count];
 	int failed;
 
 	if (vm_swap_size)
 		no_swap_space = 0;
 	if (no_swap_space) {
 		for (i = 0; i < count; i++)
 			rtvals[i] = VM_PAGER_FAIL;
 		return VM_PAGER_FAIL;
 	}
 	spc = NULL;
 
 	object = m[0]->object;
 	paging_pindex = OFF_TO_IDX(object->paging_offset);
 
 	failed = 0;
 	for (j = 0; j < count; j++) {
 		fidx = m[j]->pindex + paging_pindex;
 		ix = swap_pager_block_index(fidx);
 		swb[j] = 0;
 		if (ix >= object->un_pager.swp.swp_nblocks) {
 			rtvals[j] = VM_PAGER_FAIL;
 			failed = 1;
 			continue;
 		} else {
 			rtvals[j] = VM_PAGER_OK;
 		}
 		swb[j] = &object->un_pager.swp.swp_blocks[ix];
 		swb[j]->swb_locked++;
 		if (failed) {
 			rtvals[j] = VM_PAGER_FAIL;
 			continue;
 		}
 		off = swap_pager_block_offset(fidx);
 		reqaddr[j] = swb[j]->swb_block[off];
 		if (reqaddr[j] == SWB_EMPTY) {
 			daddr_t blk;
 			int tries;
 			int ntoget;
 
 			tries = 0;
 			s = splbio();
 
 			/*
 			 * if any other pages have been allocated in this
 			 * block, we only try to get one page.
 			 */
 			for (i = 0; i < SWB_NPAGES; i++) {
 				if (swb[j]->swb_block[i] != SWB_EMPTY)
 					break;
 			}
 
 			ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
 			/*
 			 * this code is alittle conservative, but works (the
 			 * intent of this code is to allocate small chunks for
 			 * small objects)
 			 */
 			if ((off == 0) && ((fidx + ntoget) > object->size)) {
 				ntoget = object->size - fidx;
 			}
 	retrygetspace:
 			if (!swap_pager_full && ntoget > 1 &&
 			    swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE),
 				&blk)) {
 
 				for (i = 0; i < ntoget; i++) {
 					swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
 					swb[j]->swb_valid = 0;
 				}
 
 				reqaddr[j] = swb[j]->swb_block[off];
 			} else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE),
 				&swb[j]->swb_block[off])) {
 				/*
 				 * if the allocation has failed, we try to
 				 * reclaim space and retry.
 				 */
 				if (++tries == 1) {
 					swap_pager_reclaim();
 					goto retrygetspace;
 				}
 				rtvals[j] = VM_PAGER_AGAIN;
 				failed = 1;
 				swap_pager_full = 1;
 			} else {
 				reqaddr[j] = swb[j]->swb_block[off];
 				swb[j]->swb_valid &= ~(1 << off);
 			}
 			splx(s);
 		}
 	}
 
 	/*
 	 * search forwards for the last contiguous page to transfer
 	 */
 	failed = 0;
 	for (i = 0; i < count; i++) {
 		if (failed ||
 			(reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) ||
 		    ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) ||
 		    (rtvals[i] != VM_PAGER_OK)) {
 			failed = 1;
 			if (rtvals[i] == VM_PAGER_OK)
 				rtvals[i] = VM_PAGER_AGAIN;
 		}
 	}
 
 	for (i = 0; i < count; i++) {
 		if (rtvals[i] != VM_PAGER_OK) {
 			if (swb[i])
 				--swb[i]->swb_locked;
 		}
 	}
 
 	for (i = 0; i < count; i++)
 		if (rtvals[i] != VM_PAGER_OK)
 			break;
 
 	if (i == 0) {
 		return VM_PAGER_AGAIN;
 	}
 	count = i;
 	for (i = 0; i < count; i++) {
 		if (reqaddr[i] == SWB_EMPTY) {
 			printf("I/O to empty block???? -- pindex: %d, i: %d\n",
 				m[i]->pindex, i);
 		}
 	}
 
 	/*
 	 * For synchronous writes, we clean up all completed async pageouts.
 	 */
 	if (sync == TRUE) {
 		swap_pager_sync();
 	}
 	kva = 0;
 
 	/*
 	 * get a swap pager clean data structure, block until we get it
 	 */
 	if (swap_pager_free_count <= 3) {
 		s = splbio();
 		if (curproc == pageproc) {
 retryfree:
 			/*
 			 * pageout daemon needs a swap control block
 			 */
 			swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT|SWAP_FREE_NEEDED;
 			/*
 			 * if it does not get one within a short time, then
 			 * there is a potential deadlock, so we go-on trying
 			 * to free pages.  It is important to block here as opposed
 			 * to returning, thereby allowing the pageout daemon to continue.
 			 * It is likely that pageout daemon will start suboptimally
 			 * reclaiming vnode backed pages if we don't block.  Since the
 			 * I/O subsystem is probably already fully utilized, might as
 			 * well wait.
 			 */
 			if (tsleep(&swap_pager_free, PVM, "swpfre", hz/5)) {
 				swap_pager_sync();
 				if (swap_pager_free_count <= 3) {
 					splx(s);
 					return VM_PAGER_AGAIN;
 				}
 			} else {
 			/*
 			 * we make sure that pageouts aren't taking up all of
 			 * the free swap control blocks.
 			 */
 				swap_pager_sync();
 				if (swap_pager_free_count <= 3) {
 					goto retryfree;
 				}
 			}
 		} else {
 			pagedaemon_wakeup();
 			while (swap_pager_free_count <= 3) {
 				swap_pager_needflags |= SWAP_FREE_NEEDED;
 				tsleep(&swap_pager_free, PVM, "swpfre", 0);
 				pagedaemon_wakeup();
 			}
 		}
 		splx(s);
 	}
 	spc = TAILQ_FIRST(&swap_pager_free);
 	if (spc == NULL)
 		panic("swap_pager_putpages: free queue is empty, %d expected\n", swap_pager_free_count);
 	TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
 	swap_pager_free_count--;
 
 	kva = spc->spc_kva;
 
 	/*
 	 * map our page(s) into kva for I/O
 	 */
 	pmap_qenter(kva, m, count);
 
 	/*
 	 * get the base I/O offset into the swap file
 	 */
 	for (i = 0; i < count; i++) {
 		fidx = m[i]->pindex + paging_pindex;
 		off = swap_pager_block_offset(fidx);
 		/*
 		 * set the valid bit
 		 */
 		swb[i]->swb_valid |= (1 << off);
 		/*
 		 * and unlock the data structure
 		 */
 		swb[i]->swb_locked--;
 	}
 
 	/*
 	 * Get a swap buffer header and perform the IO
 	 */
 	bp = spc->spc_bp;
 	bzero(bp, sizeof *bp);
 	bp->b_spc = spc;
 	bp->b_vnbufs.le_next = NOLIST;
 
 	bp->b_flags = B_BUSY | B_PAGING;
 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
 	if (bp->b_rcred != NOCRED)
 		crhold(bp->b_rcred);
 	if (bp->b_wcred != NOCRED)
 		crhold(bp->b_wcred);
 	bp->b_data = (caddr_t) kva;
 	bp->b_blkno = reqaddr[0];
 	pbgetvp(swapdev_vp, bp);
 
 	bp->b_bcount = PAGE_SIZE * count;
 	bp->b_bufsize = PAGE_SIZE * count;
 	swapdev_vp->v_numoutput++;
 
 	/*
 	 * If this is an async write we set up additional buffer fields and
 	 * place a "cleaning" entry on the inuse queue.
 	 */
 	s = splbio();
 	if (sync == FALSE) {
 		spc->spc_flags = 0;
 		spc->spc_object = object;
 		for (i = 0; i < count; i++)
 			spc->spc_m[i] = m[i];
 		spc->spc_count = count;
 		/*
 		 * the completion routine for async writes
 		 */
 		bp->b_flags |= B_CALL;
 		bp->b_iodone = swap_pager_iodone;
 		bp->b_dirtyoff = 0;
 		bp->b_dirtyend = bp->b_bcount;
 		object->un_pager.swp.swp_poip++;
 		TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
 	} else {
 		object->un_pager.swp.swp_poip++;
 		bp->b_flags |= B_CALL;
 		bp->b_iodone = swap_pager_iodone1;
 	}
 
 	cnt.v_swapout++;
 	cnt.v_swappgsout += count;
 	/*
 	 * perform the I/O
 	 */
 	VOP_STRATEGY(bp);
 	if (sync == FALSE) {
 		if ((bp->b_flags & B_DONE) == B_DONE) {
 			swap_pager_sync();
 		}
 		splx(s);
 		for (i = 0; i < count; i++) {
 			rtvals[i] = VM_PAGER_PEND;
 		}
 		return VM_PAGER_PEND;
 	}
 	/*
 	 * wait for the sync I/O to complete
 	 */
 	while ((bp->b_flags & B_DONE) == 0) {
 		tsleep(bp, PVM, "swwrt", 0);
 	}
 	if (bp->b_flags & B_ERROR) {
 		printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n",
 		    bp->b_blkno, bp->b_bcount, bp->b_error);
 		rv = VM_PAGER_ERROR;
 	} else {
 		rv = VM_PAGER_OK;
 	}
 
 	object->un_pager.swp.swp_poip--;
 	if (object->un_pager.swp.swp_poip == 0)
 		wakeup(object);
 
 	if (bp->b_vp)
 		pbrelvp(bp);
 	if (bp->b_flags & B_WANTED)
 		wakeup(bp);
 
 	splx(s);
 
 	/*
 	 * remove the mapping for kernel virtual
 	 */
 	pmap_qremove(kva, count);
 
 	/*
 	 * if we have written the page, then indicate that the page is clean.
 	 */
 	if (rv == VM_PAGER_OK) {
 		for (i = 0; i < count; i++) {
 			if (rtvals[i] == VM_PAGER_OK) {
-				pmap_tc_modified(m[i]);
+				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
 				m[i]->dirty = 0;
 				/*
 				 * optimization, if a page has been read
 				 * during the pageout process, we activate it.
 				 */
 				if ((m[i]->queue != PQ_ACTIVE) &&
 				    ((m[i]->flags & (PG_WANTED|PG_REFERENCED)) ||
-				    pmap_tc_referenced(VM_PAGE_TO_PHYS(m[i])))) {
+				    pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))) {
 					vm_page_activate(m[i]);
 				}
 			}
 		}
 	} else {
 		for (i = 0; i < count; i++) {
 			rtvals[i] = rv;
 		}
 	}
 
 	if (bp->b_rcred != NOCRED)
 		crfree(bp->b_rcred);
 	if (bp->b_wcred != NOCRED)
 		crfree(bp->b_wcred);
 	TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
 	swap_pager_free_count++;
 	if (swap_pager_needflags & SWAP_FREE_NEEDED) {
 		wakeup(&swap_pager_free);
 	}
 	if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
 		pagedaemon_wakeup();
 	swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
 	return (rv);
 }
 
 static void
 swap_pager_sync()
 {
 	register swp_clean_t spc, tspc;
 	register int s;
 
 	tspc = NULL;
 	if (TAILQ_FIRST(&swap_pager_done) == NULL)
 		return;
 	for (;;) {
 		s = splbio();
 		/*
 		 * Look up and removal from done list must be done at splbio()
 		 * to avoid conflicts with swap_pager_iodone.
 		 */
 		while ((spc = TAILQ_FIRST(&swap_pager_done)) != 0) {
 			pmap_qremove(spc->spc_kva, spc->spc_count);
 			swap_pager_finish(spc);
 			TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
 			goto doclean;
 		}
 
 		/*
 		 * No operations done, thats all we can do for now.
 		 */
 
 		splx(s);
 		break;
 
 		/*
 		 * The desired page was found to be busy earlier in the scan
 		 * but has since completed.
 		 */
 doclean:
 		if (tspc && tspc == spc) {
 			tspc = NULL;
 		}
 		spc->spc_flags = 0;
 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
 		swap_pager_free_count++;
 		if (swap_pager_needflags & SWAP_FREE_NEEDED) {
 			wakeup(&swap_pager_free);
 		}
 		if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
 			pagedaemon_wakeup();
 		swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
 		splx(s);
 	}
 
 	return;
 }
 
 void
 swap_pager_finish(spc)
 	register swp_clean_t spc;
 {
 	vm_object_t object = spc->spc_m[0]->object;
 	int i;
 
 	object->paging_in_progress -= spc->spc_count;
 	if ((object->paging_in_progress == 0) &&
 	    (object->flags & OBJ_PIPWNT)) {
 		object->flags &= ~OBJ_PIPWNT;
 		wakeup(object);
 	}
 
 	/*
 	 * If no error, mark as clean and inform the pmap system. If error,
 	 * mark as dirty so we will try again. (XXX could get stuck doing
 	 * this, should give up after awhile)
 	 */
 	if (spc->spc_flags & SPC_ERROR) {
 		for (i = 0; i < spc->spc_count; i++) {
 			printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
 			    (u_long) VM_PAGE_TO_PHYS(spc->spc_m[i]));
 		}
 	} else {
-		int pagewanted = 0;
 		for (i = 0; i < spc->spc_count; i++) {
-			if (spc->spc_m[i]->flags & (PG_WANTED | PG_REFERENCED)) {
-				pagewanted = 1;
-				break;
-			}
-		}
-		for (i = 0; i < spc->spc_count; i++) {
-			pmap_tc_modified(spc->spc_m[i]);
+			pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i]));
 			spc->spc_m[i]->dirty = 0;
-			if (pagewanted) {
-				if (spc->spc_m[i]->queue != PQ_ACTIVE)
-					vm_page_activate(spc->spc_m[i]);
-				spc->spc_m[i]->flags |= PG_REFERENCED;
-			}
+			if ((spc->spc_m[i]->queue != PQ_ACTIVE) &&
+			    ((spc->spc_m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i]))))
+				vm_page_activate(spc->spc_m[i]);
 		}
 	}
 
 
 	for (i = 0; i < spc->spc_count; i++) {
 		/*
 		 * we wakeup any processes that are waiting on these pages.
 		 */
 		PAGE_WAKEUP(spc->spc_m[i]);
 	}
 	nswiodone -= spc->spc_count;
 
 	return;
 }
 
 /*
  * swap_pager_iodone
  */
 static void
 swap_pager_iodone(bp)
 	register struct buf *bp;
 {
 	register swp_clean_t spc;
 	int s;
 
 	s = splbio();
 	spc = (swp_clean_t) bp->b_spc;
 	TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
 	TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
 	if (bp->b_flags & B_ERROR) {
 		spc->spc_flags |= SPC_ERROR;
 		printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n",
 		    (bp->b_flags & B_READ) ? "pagein" : "pageout",
 		    (u_long) bp->b_blkno, bp->b_bcount, bp->b_error);
 	}
 
 	if (bp->b_vp)
 		pbrelvp(bp);
 
+/*
 	if (bp->b_flags & B_WANTED)
+*/
 		wakeup(bp);
 
 	if (bp->b_rcred != NOCRED)
 		crfree(bp->b_rcred);
 	if (bp->b_wcred != NOCRED)
 		crfree(bp->b_wcred);
 
 	nswiodone += spc->spc_count;
 	if (--spc->spc_object->un_pager.swp.swp_poip == 0) {
 		wakeup(spc->spc_object);
 	}
 	if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
 	    TAILQ_FIRST(&swap_pager_inuse) == 0) {
 		swap_pager_needflags &= ~SWAP_FREE_NEEDED;
 		wakeup(&swap_pager_free);
 	}
 
 	if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) {
 		swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT;
 		pagedaemon_wakeup();
 	}
 
 	if (vm_pageout_pages_needed) {
 		wakeup(&vm_pageout_pages_needed);
 		vm_pageout_pages_needed = 0;
 	}
 	if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) ||
 	    ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min &&
 	    nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) {
 		pagedaemon_wakeup();
 	}
 	splx(s);
 }
Index: head/sys/vm/vm_fault.c
===================================================================
--- head/sys/vm/vm_fault.c	(revision 17333)
+++ head/sys/vm/vm_fault.c	(revision 17334)
@@ -1,1139 +1,1146 @@
 /*
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  *
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_fault.c	8.4 (Berkeley) 1/12/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
- * $Id: vm_fault.c,v 1.54 1996/07/27 03:23:52 dyson Exp $
+ * $Id: vm_fault.c,v 1.55 1996/07/28 01:14:01 dyson Exp $
  */
 
 /*
  *	Page fault handling module.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/resource.h>
 #include <sys/signalvar.h>
 #include <sys/resourcevar.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/lock.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_extern.h>
 
 int vm_fault_additional_pages __P((vm_page_t, int, int, vm_page_t *, int *));
 
 #define VM_FAULT_READ_AHEAD 4
 #define VM_FAULT_READ_BEHIND 3
 #define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1)
 
+int vm_fault_free_1;
+int vm_fault_copy_save_1;
+int vm_fault_copy_save_2;
+
 /*
  *	vm_fault:
  *
  *	Handle a page fault occuring at the given address,
  *	requiring the given permissions, in the map specified.
  *	If successful, the page is inserted into the
  *	associated physical map.
  *
  *	NOTE: the given address should be truncated to the
  *	proper page address.
  *
  *	KERN_SUCCESS is returned if the page fault is handled; otherwise,
  *	a standard error specifying why the fault is fatal is returned.
  *
  *
  *	The map in question must be referenced, and remains so.
  *	Caller may hold no locks.
  */
 int
 vm_fault(map, vaddr, fault_type, change_wiring)
 	vm_map_t map;
 	vm_offset_t vaddr;
 	vm_prot_t fault_type;
 	boolean_t change_wiring;
 {
 	vm_object_t first_object;
 	vm_pindex_t first_pindex;
 	vm_map_entry_t entry;
 	register vm_object_t object;
 	register vm_pindex_t pindex;
 	vm_page_t m;
 	vm_page_t first_m;
 	vm_prot_t prot;
 	int result;
 	boolean_t wired;
 	boolean_t su;
 	boolean_t lookup_still_valid;
 	vm_page_t old_m;
 	vm_object_t next_object;
 	vm_page_t marray[VM_FAULT_READ];
 	int hardfault = 0;
 	struct vnode *vp = NULL;
 
 	cnt.v_vm_faults++;	/* needs lock XXX */
 /*
  *	Recovery actions
  */
 #define	FREE_PAGE(m)	{				\
 	PAGE_WAKEUP(m);					\
 	vm_page_free(m);				\
 }
 
 #define	RELEASE_PAGE(m)	{				\
 	PAGE_WAKEUP(m);					\
 	if (m->queue != PQ_ACTIVE) vm_page_activate(m);		\
 }
 
 #define	UNLOCK_MAP	{				\
 	if (lookup_still_valid) {			\
 		vm_map_lookup_done(map, entry);		\
 		lookup_still_valid = FALSE;		\
 	}						\
 }
 
 #define	UNLOCK_THINGS	{				\
 	vm_object_pip_wakeup(object); \
 	if (object != first_object) {			\
 		FREE_PAGE(first_m);			\
 		vm_object_pip_wakeup(first_object); \
 	}						\
 	UNLOCK_MAP;					\
 	if (vp != NULL) VOP_UNLOCK(vp);			\
 }
 
 #define	UNLOCK_AND_DEALLOCATE	{			\
 	UNLOCK_THINGS;					\
 	vm_object_deallocate(first_object);		\
 }
 
 
 RetryFault:;
 
 	/*
 	 * Find the backing store object and offset into it to begin the
 	 * search.
 	 */
 
 	if ((result = vm_map_lookup(&map, vaddr,
 		fault_type, &entry, &first_object,
 		&first_pindex, &prot, &wired, &su)) != KERN_SUCCESS) {
 		return (result);
 	}
 
 	vp = vnode_pager_lock(first_object);
 
 	lookup_still_valid = TRUE;
 
 	if (wired)
 		fault_type = prot;
 
 	first_m = NULL;
 
 	/*
 	 * Make a reference to this object to prevent its disposal while we
 	 * are messing with it.  Once we have the reference, the map is free
 	 * to be diddled.  Since objects reference their shadows (and copies),
 	 * they will stay around as well.
 	 */
 
 	first_object->ref_count++;
 	first_object->paging_in_progress++;
 
 	/*
 	 * INVARIANTS (through entire routine):
 	 *
 	 * 1)	At all times, we must either have the object lock or a busy
 	 * page in some object to prevent some other process from trying to
 	 * bring in the same page.
 	 *
 	 * Note that we cannot hold any locks during the pager access or when
 	 * waiting for memory, so we use a busy page then.
 	 *
 	 * Note also that we aren't as concerned about more than one thead
 	 * attempting to pager_data_unlock the same page at once, so we don't
 	 * hold the page as busy then, but do record the highest unlock value
 	 * so far.  [Unlock requests may also be delivered out of order.]
 	 *
 	 * 2)	Once we have a busy page, we must remove it from the pageout
 	 * queues, so that the pageout daemon will not grab it away.
 	 *
 	 * 3)	To prevent another process from racing us down the shadow chain
 	 * and entering a new page in the top object before we do, we must
 	 * keep a busy page in the top object while following the shadow
 	 * chain.
 	 *
 	 * 4)	We must increment paging_in_progress on any object for which
 	 * we have a busy page, to prevent vm_object_collapse from removing
 	 * the busy page without our noticing.
 	 */
 
 	/*
 	 * Search for the page at object/offset.
 	 */
 
 	object = first_object;
 	pindex = first_pindex;
 
 	/*
 	 * See whether this page is resident
 	 */
 
 	while (TRUE) {
 		m = vm_page_lookup(object, pindex);
 		if (m != NULL) {
 			int queue;
 			/*
 			 * If the page is being brought in, wait for it and
 			 * then retry.
 			 */
 			if ((m->flags & PG_BUSY) || m->busy) {
 				int s;
 
 				UNLOCK_THINGS;
 				s = splvm();
 				if (((m->flags & PG_BUSY) || m->busy)) {
 					m->flags |= PG_WANTED | PG_REFERENCED;
 					cnt.v_intrans++;
 					tsleep(m, PSWP, "vmpfw", 0);
 				}
 				splx(s);
 				vm_object_deallocate(first_object);
 				goto RetryFault;
 			}
 
 			queue = m->queue;
-			vm_page_unqueue(m,0);
+			vm_page_unqueue_nowakeup(m);
 
 			/*
 			 * Mark page busy for other processes, and the pagedaemon.
 			 */
 			if ((queue == PQ_CACHE) &&
 			    (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) {
 				vm_page_activate(m);
 				UNLOCK_AND_DEALLOCATE;
 				VM_WAIT;
 				goto RetryFault;
 			}
 
 			m->flags |= PG_BUSY;
 
 			if (m->valid &&
 				((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) &&
 				m->object != kernel_object && m->object != kmem_object) {
 				goto readrest;
 			}
 			break;
 		}
 		if (((object->type != OBJT_DEFAULT) && (!change_wiring || wired))
 		    || (object == first_object)) {
 
 			if (pindex >= object->size) {
 				UNLOCK_AND_DEALLOCATE;
 				return (KERN_PROTECTION_FAILURE);
 			}
 
 			/*
 			 * Allocate a new page for this object/offset pair.
 			 */
 			m = vm_page_alloc(object, pindex,
 				(vp || object->backing_object)?VM_ALLOC_NORMAL:VM_ALLOC_ZERO);
 
 			if (m == NULL) {
 				UNLOCK_AND_DEALLOCATE;
 				VM_WAIT;
 				goto RetryFault;
 			}
 		}
 readrest:
 		if (object->type != OBJT_DEFAULT && (!change_wiring || wired)) {
 			int rv;
 			int faultcount;
 			int reqpage;
 			int ahead, behind;
 
 			ahead = VM_FAULT_READ_AHEAD;
 			behind = VM_FAULT_READ_BEHIND;
 			if (first_object->behavior == OBJ_RANDOM) {
 				ahead = 0;
 				behind = 0;
 			}
 
 			if ((first_object->type != OBJT_DEVICE) &&
 				(first_object->behavior == OBJ_SEQUENTIAL)) {
 				vm_pindex_t firstpindex, tmppindex;
 				if (first_pindex <
 					2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1))
 					firstpindex = 0;
 				else
 					firstpindex = first_pindex -
 						2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1);
 
 				for(tmppindex = first_pindex - 1;
 					tmppindex >= first_pindex;
 					--tmppindex) {
 					vm_page_t mt;
 					mt = vm_page_lookup( first_object, tmppindex);
 					if (mt == NULL || (mt->valid != VM_PAGE_BITS_ALL))
 						break;
 					if (mt->busy ||
 						(mt->flags & (PG_BUSY|PG_FICTITIOUS)) ||
 						mt->hold_count ||
 						mt->wire_count) 
 						continue;
 					if (mt->dirty == 0)
 						vm_page_test_dirty(mt);
 					if (mt->dirty) {
 						vm_page_protect(mt, VM_PROT_NONE);
 						vm_page_deactivate(mt);
 					} else {
 						vm_page_cache(mt);
 					}
 				}
 
 				ahead += behind;
 				behind = 0;
 			}
 
 			/*
 			 * now we find out if any other pages should be paged
 			 * in at this time this routine checks to see if the
 			 * pages surrounding this fault reside in the same
 			 * object as the page for this fault.  If they do,
 			 * then they are faulted in also into the object.  The
 			 * array "marray" returned contains an array of
 			 * vm_page_t structs where one of them is the
 			 * vm_page_t passed to the routine.  The reqpage
 			 * return value is the index into the marray for the
 			 * vm_page_t passed to the routine.
 			 */
 			faultcount = vm_fault_additional_pages(
 			    m, behind, ahead, marray, &reqpage);
 
 			/*
 			 * Call the pager to retrieve the data, if any, after
 			 * releasing the lock on the map.
 			 */
 			UNLOCK_MAP;
 
 			rv = faultcount ?
 			    vm_pager_get_pages(object, marray, faultcount,
 				reqpage) : VM_PAGER_FAIL;
 
 			if (rv == VM_PAGER_OK) {
 				/*
 				 * Found the page. Leave it busy while we play
 				 * with it.
 				 */
 
 				/*
 				 * Relookup in case pager changed page. Pager
 				 * is responsible for disposition of old page
 				 * if moved.
 				 */
 				m = vm_page_lookup(object, pindex);
 				if( !m) {
 					UNLOCK_AND_DEALLOCATE;
 					goto RetryFault;
 				}
 
 				hardfault++;
 				break;
 			}
 			/*
 			 * Remove the bogus page (which does not exist at this
 			 * object/offset); before doing so, we must get back
 			 * our object lock to preserve our invariant.
 			 *
 			 * Also wake up any other process that may want to bring
 			 * in this page.
 			 *
 			 * If this is the top-level object, we must leave the
 			 * busy page to prevent another process from rushing
 			 * past us, and inserting the page in that object at
 			 * the same time that we are.
 			 */
 
 			if (rv == VM_PAGER_ERROR)
 				printf("vm_fault: pager input (probably hardware) error, PID %d failure\n",
 				    curproc->p_pid);
 			/*
 			 * Data outside the range of the pager or an I/O error
 			 */
 			/*
 			 * XXX - the check for kernel_map is a kludge to work
 			 * around having the machine panic on a kernel space
 			 * fault w/ I/O error.
 			 */
 			if (((map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) {
 				FREE_PAGE(m);
 				UNLOCK_AND_DEALLOCATE;
 				return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE);
 			}
 			if (object != first_object) {
 				FREE_PAGE(m);
 				/*
 				 * XXX - we cannot just fall out at this
 				 * point, m has been freed and is invalid!
 				 */
 			}
 		}
 		/*
 		 * We get here if the object has default pager (or unwiring) or the
 		 * pager doesn't have the page.
 		 */
 		if (object == first_object)
 			first_m = m;
 
 		/*
 		 * Move on to the next object.  Lock the next object before
 		 * unlocking the current one.
 		 */
 
 		pindex += OFF_TO_IDX(object->backing_object_offset);
 		next_object = object->backing_object;
 		if (next_object == NULL) {
 			/*
 			 * If there's no object left, fill the page in the top
 			 * object with zeros.
 			 */
 			if (object != first_object) {
 				vm_object_pip_wakeup(object);
 
 				object = first_object;
 				pindex = first_pindex;
 				m = first_m;
 			}
 			first_m = NULL;
 
 			if ((m->flags & PG_ZERO) == 0)
 				vm_page_zero_fill(m);
 			cnt.v_zfod++;
 			break;
 		} else {
 			if (object != first_object) {
 				vm_object_pip_wakeup(object);
 			}
 			object = next_object;
 			object->paging_in_progress++;
 		}
 	}
 
 	if ((m->flags & PG_BUSY) == 0)
 		panic("vm_fault: not busy after main loop");
 
 	/*
 	 * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock
 	 * is held.]
 	 */
 
 	old_m = m;	/* save page that would be copied */
 
 	/*
 	 * If the page is being written, but isn't already owned by the
 	 * top-level object, we have to copy it into a new page owned by the
 	 * top-level object.
 	 */
 
 	if (object != first_object) {
 		/*
 		 * We only really need to copy if we want to write it.
 		 */
 
 		if (fault_type & VM_PROT_WRITE) {
 
 			/*
 			 * This allows pages to be virtually copied from a backing_object
 			 * into the first_object, where the backing object has no other
 			 * refs to it, and cannot gain any more refs.  Instead of a
 			 * bcopy, we just move the page from the backing object to the
 			 * first object.  Note that we must mark the page dirty in the
 			 * first object so that it will go out to swap when needed.
 			 */
 			if (lookup_still_valid &&
 				/*
 				 * Only one shadow object
 				 */
 				(object->shadow_count == 1) &&
 				/*
 				 * No COW refs, except us
 				 */
 				(object->ref_count == 1) &&
 				/*
 				 * Noone else can look this object up
 				 */
 				(object->handle == NULL) &&
 				/*
 				 * No other ways to look the object up
 				 */
 				((object->type == OBJT_DEFAULT) ||
 				 (object->type == OBJT_SWAP)) &&
 				/*
 				 * We don't chase down the shadow chain
 				 */
 				(object == first_object->backing_object)) {
 
 				/*
 				 * get rid of the unnecessary page
 				 */
 				vm_page_protect(first_m, VM_PROT_NONE);
 				PAGE_WAKEUP(first_m);
 				vm_page_free(first_m);
 				/*
 				 * grab the page and put it into the process'es object
 				 */
 				vm_page_rename(m, first_object, first_pindex);
 				first_m = m;
 				m->dirty = VM_PAGE_BITS_ALL;
 				m = NULL;
+				++vm_fault_copy_save_1;
 			} else {
 				/*
 				 * Oh, well, lets copy it.
 				 */
 				vm_page_copy(m, first_m);
 			}
 
 			/*
 			 * This code handles the case where there are two references to the
 			 * backing object, and one reference is getting a copy of the
 			 * page.  If the other reference is the only other object that
 			 * points to the backing object, then perform a virtual copy
 			 * from the backing object to the other object after the
 			 * page is copied to the current first_object.  If the other
 			 * object already has the page, we destroy it in the backing object
 			 * performing an optimized collapse-type operation.  We don't
 			 * bother removing the page from the backing object's swap space.
 			 */
 			if (lookup_still_valid &&
 				/*
 				 * make sure that we have two shadow objs
 				 */
 				(object->shadow_count == 2) &&
 				/*
 				 * And no COW refs -- note that there are sometimes
 				 * temp refs to objs, but ignore that case -- we just
 				 * punt.
 				 */
 				(object->ref_count == 2) &&
 				/*
 				 * Noone else can look us up
 				 */
 				(object->handle == NULL) &&
 				/*
 				 * Not something that can be referenced elsewhere
 				 */
 				((object->type == OBJT_DEFAULT) ||
 				 (object->type == OBJT_SWAP)) &&
 				/*
 				 * We don't bother chasing down object chain
 				 */
 				(object == first_object->backing_object)) {
 
 				vm_object_t other_object;
 				vm_pindex_t other_pindex, other_pindex_offset;
 				vm_page_t tm;
 				
 				other_object = TAILQ_FIRST(&object->shadow_head);
 				if (other_object == first_object)
 					other_object = TAILQ_NEXT(other_object, shadow_list);
 				if (!other_object)
 					panic("vm_fault: other object missing");
 				if (other_object &&
 					(other_object->type == OBJT_DEFAULT) &&
 					(other_object->paging_in_progress == 0)) {
 					other_pindex_offset =
 						OFF_TO_IDX(other_object->backing_object_offset);
 					if (pindex >= other_pindex_offset) {
 						other_pindex = pindex - other_pindex_offset;
 						/*
 						 * If the other object has the page, just free it.
 						 */
 						if ((tm = vm_page_lookup(other_object, other_pindex))) {
 							if ((tm->flags & PG_BUSY) == 0 &&
 								tm->busy == 0 &&
 								tm->valid == VM_PAGE_BITS_ALL) {
 								/*
 								 * get rid of the unnecessary page
 								 */
 								vm_page_protect(m, VM_PROT_NONE);
 								PAGE_WAKEUP(m);
 								vm_page_free(m);
 								m = NULL;
+								++vm_fault_free_1;
 								tm->dirty = VM_PAGE_BITS_ALL;
 								first_m->dirty = VM_PAGE_BITS_ALL;
 							}
 						} else {
 							/*
 							 * If the other object doesn't have the page,
 							 * then we move it there.
 							 */
 							vm_page_rename(m, other_object, other_pindex);
 							m->dirty = VM_PAGE_BITS_ALL;
 							m->valid = VM_PAGE_BITS_ALL;
+							++vm_fault_copy_save_2;
 						}
 					}
 				}
 			}
 
 			if (m) {
 				if (m->queue != PQ_ACTIVE)
 					vm_page_activate(m);
-				/*
-				 * We no longer need the old page or object.
-				 */
+			/*
+			 * We no longer need the old page or object.
+			 */
 				PAGE_WAKEUP(m);
 			}
 
 			vm_object_pip_wakeup(object);
 			/*
 			 * Only use the new page below...
 			 */
 
 			cnt.v_cow_faults++;
 			m = first_m;
 			object = first_object;
 			pindex = first_pindex;
 
 			/*
 			 * Now that we've gotten the copy out of the way,
 			 * let's try to collapse the top object.
 			 *
 			 * But we have to play ugly games with
 			 * paging_in_progress to do that...
 			 */
 			vm_object_pip_wakeup(object);
 			vm_object_collapse(object);
 			object->paging_in_progress++;
 		} else {
 			prot &= ~VM_PROT_WRITE;
 		}
 	}
 
 	/*
 	 * We must verify that the maps have not changed since our last
 	 * lookup.
 	 */
 
 	if (!lookup_still_valid) {
 		vm_object_t retry_object;
 		vm_pindex_t retry_pindex;
 		vm_prot_t retry_prot;
 
 		/*
 		 * Since map entries may be pageable, make sure we can take a
 		 * page fault on them.
 		 */
 
 		/*
 		 * To avoid trying to write_lock the map while another process
 		 * has it read_locked (in vm_map_pageable), we do not try for
 		 * write permission.  If the page is still writable, we will
 		 * get write permission.  If it is not, or has been marked
 		 * needs_copy, we enter the mapping without write permission,
 		 * and will merely take another fault.
 		 */
 		result = vm_map_lookup(&map, vaddr, fault_type & ~VM_PROT_WRITE,
 		    &entry, &retry_object, &retry_pindex, &retry_prot, &wired, &su);
 
 		/*
 		 * If we don't need the page any longer, put it on the active
 		 * list (the easiest thing to do here).  If no one needs it,
 		 * pageout will grab it eventually.
 		 */
 
 		if (result != KERN_SUCCESS) {
 			RELEASE_PAGE(m);
 			UNLOCK_AND_DEALLOCATE;
 			return (result);
 		}
 		lookup_still_valid = TRUE;
 
 		if ((retry_object != first_object) ||
 		    (retry_pindex != first_pindex)) {
 			RELEASE_PAGE(m);
 			UNLOCK_AND_DEALLOCATE;
 			goto RetryFault;
 		}
 		/*
 		 * Check whether the protection has changed or the object has
 		 * been copied while we left the map unlocked. Changing from
 		 * read to write permission is OK - we leave the page
 		 * write-protected, and catch the write fault. Changing from
 		 * write to read permission means that we can't mark the page
 		 * write-enabled after all.
 		 */
 		prot &= retry_prot;
 	}
 
 	/*
 	 * Put this page into the physical map. We had to do the unlock above
 	 * because pmap_enter may cause other faults.   We don't put the page
 	 * back on the active queue until later so that the page-out daemon
 	 * won't find us (yet).
 	 */
 
 	if (prot & VM_PROT_WRITE) {
 		m->flags |= PG_WRITEABLE;
 		m->object->flags |= OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY;
 		/*
 		 * If the fault is a write, we know that this page is being
 		 * written NOW. This will save on the pmap_is_modified() calls
 		 * later.
 		 */
 		if (fault_type & VM_PROT_WRITE) {
 			m->dirty = VM_PAGE_BITS_ALL;
 		}
 	}
 
 	UNLOCK_THINGS;
 	m->valid = VM_PAGE_BITS_ALL;
 	m->flags &= ~PG_ZERO;
 
 	pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired);
 	if ((change_wiring == 0) && (wired == 0))
 		pmap_prefault(map->pmap, vaddr, entry, first_object);
 
 	m->flags |= PG_MAPPED|PG_REFERENCED;
 
 	/*
 	 * If the page is not wired down, then put it where the pageout daemon
 	 * can find it.
 	 */
 	if (change_wiring) {
 		if (wired)
 			vm_page_wire(m);
 		else
 			vm_page_unwire(m);
 	} else {
 		if (m->queue != PQ_ACTIVE)
 			vm_page_activate(m);
 	}
 
 	if (curproc && (curproc->p_flag & P_INMEM) && curproc->p_stats) {
 		if (hardfault) {
 			curproc->p_stats->p_ru.ru_majflt++;
 		} else {
 			curproc->p_stats->p_ru.ru_minflt++;
 		}
 	}
 
 	/*
 	 * Unlock everything, and return
 	 */
 
 	PAGE_WAKEUP(m);
 	vm_object_deallocate(first_object);
 
 	return (KERN_SUCCESS);
 
 }
 
 /*
  *	vm_fault_wire:
  *
  *	Wire down a range of virtual addresses in a map.
  */
 int
 vm_fault_wire(map, start, end)
 	vm_map_t map;
 	vm_offset_t start, end;
 {
 
 	register vm_offset_t va;
 	register pmap_t pmap;
 	int rv;
 
 	pmap = vm_map_pmap(map);
 
 	/*
 	 * Inform the physical mapping system that the range of addresses may
 	 * not fault, so that page tables and such can be locked down as well.
 	 */
 
 	pmap_pageable(pmap, start, end, FALSE);
 
 	/*
 	 * We simulate a fault to get the page and enter it in the physical
 	 * map.
 	 */
 
 	for (va = start; va < end; va += PAGE_SIZE) {
 		rv = vm_fault(map, va, VM_PROT_READ|VM_PROT_WRITE, TRUE);
 		if (rv) {
 			if (va != start)
 				vm_fault_unwire(map, start, va);
 			return (rv);
 		}
 	}
 	return (KERN_SUCCESS);
 }
 
 
 /*
  *	vm_fault_unwire:
  *
  *	Unwire a range of virtual addresses in a map.
  */
 void
 vm_fault_unwire(map, start, end)
 	vm_map_t map;
 	vm_offset_t start, end;
 {
 
 	register vm_offset_t va, pa;
 	register pmap_t pmap;
 
 	pmap = vm_map_pmap(map);
 
 	/*
 	 * Since the pages are wired down, we must be able to get their
 	 * mappings from the physical map system.
 	 */
 
 	for (va = start; va < end; va += PAGE_SIZE) {
 		pa = pmap_extract(pmap, va);
 		if (pa != (vm_offset_t) 0) {
 			pmap_change_wiring(pmap, va, FALSE);
 			vm_page_unwire(PHYS_TO_VM_PAGE(pa));
 		}
 	}
 
 	/*
 	 * Inform the physical mapping system that the range of addresses may
 	 * fault, so that page tables and such may be unwired themselves.
 	 */
 
 	pmap_pageable(pmap, start, end, TRUE);
 
 }
 
 /*
  *	Routine:
  *		vm_fault_copy_entry
  *	Function:
  *		Copy all of the pages from a wired-down map entry to another.
  *
  *	In/out conditions:
  *		The source and destination maps must be locked for write.
  *		The source map entry must be wired down (or be a sharing map
  *		entry corresponding to a main map entry that is wired down).
  */
 
 void
 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry)
 	vm_map_t dst_map;
 	vm_map_t src_map;
 	vm_map_entry_t dst_entry;
 	vm_map_entry_t src_entry;
 {
 	vm_object_t dst_object;
 	vm_object_t src_object;
 	vm_ooffset_t dst_offset;
 	vm_ooffset_t src_offset;
 	vm_prot_t prot;
 	vm_offset_t vaddr;
 	vm_page_t dst_m;
 	vm_page_t src_m;
 
 #ifdef	lint
 	src_map++;
 #endif	/* lint */
 
 	src_object = src_entry->object.vm_object;
 	src_offset = src_entry->offset;
 
 	/*
 	 * Create the top-level object for the destination entry. (Doesn't
 	 * actually shadow anything - we copy the pages directly.)
 	 */
 	dst_object = vm_object_allocate(OBJT_DEFAULT,
 	    (vm_size_t) OFF_TO_IDX(dst_entry->end - dst_entry->start));
 
 	dst_entry->object.vm_object = dst_object;
 	dst_entry->offset = 0;
 
 	prot = dst_entry->max_protection;
 
 	/*
 	 * Loop through all of the pages in the entry's range, copying each
 	 * one from the source object (it should be there) to the destination
 	 * object.
 	 */
 	for (vaddr = dst_entry->start, dst_offset = 0;
 	    vaddr < dst_entry->end;
 	    vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) {
 
 		/*
 		 * Allocate a page in the destination object
 		 */
 		do {
 			dst_m = vm_page_alloc(dst_object,
 				OFF_TO_IDX(dst_offset), VM_ALLOC_NORMAL);
 			if (dst_m == NULL) {
 				VM_WAIT;
 			}
 		} while (dst_m == NULL);
 
 		/*
 		 * Find the page in the source object, and copy it in.
 		 * (Because the source is wired down, the page will be in
 		 * memory.)
 		 */
 		src_m = vm_page_lookup(src_object,
 			OFF_TO_IDX(dst_offset + src_offset));
 		if (src_m == NULL)
 			panic("vm_fault_copy_wired: page missing");
 
 		vm_page_copy(src_m, dst_m);
 
 		/*
 		 * Enter it in the pmap...
 		 */
 
 		dst_m->flags &= ~PG_ZERO;
 		pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m),
 		    prot, FALSE);
 		dst_m->flags |= PG_WRITEABLE|PG_MAPPED;
 
 		/*
 		 * Mark it no longer busy, and put it on the active list.
 		 */
 		vm_page_activate(dst_m);
 		PAGE_WAKEUP(dst_m);
 	}
 }
 
 
 /*
  * This routine checks around the requested page for other pages that
  * might be able to be faulted in.  This routine brackets the viable
  * pages for the pages to be paged in.
  *
  * Inputs:
  *	m, rbehind, rahead
  *
  * Outputs:
  *  marray (array of vm_page_t), reqpage (index of requested page)
  *
  * Return value:
  *  number of pages in marray
  */
 int
 vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage)
 	vm_page_t m;
 	int rbehind;
 	int rahead;
 	vm_page_t *marray;
 	int *reqpage;
 {
 	int i;
 	vm_object_t object;
 	vm_pindex_t pindex, startpindex, endpindex, tpindex;
 	vm_offset_t size;
 	vm_page_t rtm;
 	int treqpage;
 	int cbehind, cahead;
 
 	object = m->object;
 	pindex = m->pindex;
 
 	/*
 	 * we don't fault-ahead for device pager
 	 */
 	if (object->type == OBJT_DEVICE) {
 		*reqpage = 0;
 		marray[0] = m;
 		return 1;
 	}
 
 	/*
 	 * if the requested page is not available, then give up now
 	 */
 
 	if (!vm_pager_has_page(object,
 		OFF_TO_IDX(object->paging_offset) + pindex, &cbehind, &cahead))
 		return 0;
 
 	if ((cbehind == 0) && (cahead == 0)) {
 		*reqpage = 0;
 		marray[0] = m;
 		return 1;
 	}
 
 	if (rahead > cahead) {
 		rahead = cahead;
 	}
 
 	if (rbehind > cbehind) {
 		rbehind = cbehind;
 	}
 
 	/*
 	 * try to do any readahead that we might have free pages for.
 	 */
 	if ((rahead + rbehind) >
 		((cnt.v_free_count + cnt.v_cache_count) - cnt.v_free_reserved)) {
 		pagedaemon_wakeup();
 		*reqpage = 0;
 		marray[0] = m;
 		return 1;
 	}
 
 	/*
 	 * scan backward for the read behind pages -- in memory or on disk not
 	 * in same object
 	 */
 	tpindex = pindex - 1;
 	if (tpindex < pindex) {
 		if (rbehind > pindex)
 			rbehind = pindex;
 		startpindex = pindex - rbehind;
 		while (tpindex >= startpindex) {
 			if (vm_page_lookup( object, tpindex)) {
 				startpindex = tpindex + 1;
 				break;
 			}
 			if (tpindex == 0)
 				break;
 			tpindex -= 1;
 		}
 	} else {
 		startpindex = pindex;
 	}
 
 	/*
 	 * scan forward for the read ahead pages -- in memory or on disk not
 	 * in same object
 	 */
 	tpindex = pindex + 1;
 	endpindex = pindex + (rahead + 1);
 	if (endpindex > object->size)
 		endpindex = object->size;
-	while (tpindex < endpindex) {
+	while (tpindex <  endpindex) {
 		if ( vm_page_lookup(object, tpindex)) {
 			break;
 		}	
 		tpindex += 1;
 	}
 	endpindex = tpindex;
 
 	/* calculate number of bytes of pages */
 	size = endpindex - startpindex;
 
 	/* calculate the page offset of the required page */
 	treqpage = pindex - startpindex;
 
 	/* see if we have space (again) */
 	if ((cnt.v_free_count + cnt.v_cache_count) >
 		(cnt.v_free_reserved + size)) {
 		/*
 		 * get our pages and don't block for them
 		 */
 		for (i = 0; i < size; i++) {
 			if (i != treqpage) {
 				rtm = vm_page_alloc(object,
 					startpindex + i,
 					VM_ALLOC_NORMAL);
 				if (rtm == NULL) {
 					if (i < treqpage) {
 						int j;
 						for (j = 0; j < i; j++) {
 							FREE_PAGE(marray[j]);
 						}
 						*reqpage = 0;
 						marray[0] = m;
 						return 1;
 					} else {
 						size = i;
 						*reqpage = treqpage;
 						return size;
 					}
 				}
 				marray[i] = rtm;
 			} else {
 				marray[i] = m;
 			}
 		}
 
 		*reqpage = treqpage;
 		return size;
 	}
 	*reqpage = 0;
 	marray[0] = m;
 	return 1;
 }
Index: head/sys/vm/vm_map.c
===================================================================
--- head/sys/vm/vm_map.c	(revision 17333)
+++ head/sys/vm/vm_map.c	(revision 17334)
@@ -1,2449 +1,2440 @@
 /*
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_map.c	8.3 (Berkeley) 1/12/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
- * $Id: vm_map.c,v 1.52 1996/07/07 03:27:41 davidg Exp $
+ * $Id: vm_map.c,v 1.53 1996/07/27 03:23:56 dyson Exp $
  */
 
 /*
  *	Virtual memory mapping module.
  */
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/vmmeter.h>
 #include <sys/mman.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_inherit.h>
 #include <vm/lock.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 #include <vm/default_pager.h>
 
 /*
  *	Virtual memory maps provide for the mapping, protection,
  *	and sharing of virtual memory objects.  In addition,
  *	this module provides for an efficient virtual copy of
  *	memory from one map to another.
  *
  *	Synchronization is required prior to most operations.
  *
  *	Maps consist of an ordered doubly-linked list of simple
  *	entries; a single hint is used to speed up lookups.
  *
  *	In order to properly represent the sharing of virtual
  *	memory regions among maps, the map structure is bi-level.
  *	Top-level ("address") maps refer to regions of sharable
  *	virtual memory.  These regions are implemented as
  *	("sharing") maps, which then refer to the actual virtual
  *	memory objects.  When two address maps "share" memory,
  *	their top-level maps both have references to the same
  *	sharing map.  When memory is virtual-copied from one
  *	address map to another, the references in the sharing
  *	maps are actually copied -- no copying occurs at the
  *	virtual memory object level.
  *
  *	Since portions of maps are specified by start/end addreses,
  *	which may not align with existing map entries, all
  *	routines merely "clip" entries to these start/end values.
  *	[That is, an entry is split into two, bordering at a
  *	start or end value.]  Note that these clippings may not
  *	always be necessary (as the two resulting entries are then
  *	not changed); however, the clipping is done for convenience.
  *	No attempt is currently made to "glue back together" two
  *	abutting entries.
  *
  *	As mentioned above, virtual copy operations are performed
  *	by copying VM object references from one sharing map to
  *	another, and then marking both regions as copy-on-write.
  *	It is important to note that only one writeable reference
  *	to a VM object region exists in any map -- this means that
  *	shadow object creation can be delayed until a write operation
  *	occurs.
  */
 
 /*
  *	vm_map_startup:
  *
  *	Initialize the vm_map module.  Must be called before
  *	any other vm_map routines.
  *
  *	Map and entry structures are allocated from the general
  *	purpose memory pool with some exceptions:
  *
  *	- The kernel map and kmem submap are allocated statically.
  *	- Kernel map entries are allocated out of a static pool.
  *
  *	These restrictions are necessary since malloc() uses the
  *	maps and requires map entries.
  */
 
 vm_offset_t kentry_data;
 vm_size_t kentry_data_size;
 static vm_map_entry_t kentry_free;
 static vm_map_t kmap_free;
 extern char kstack[];
 
 static int kentry_count;
 static vm_offset_t mapvm_start, mapvm, mapvmmax;
 static int mapvmpgcnt;
 
 static struct vm_map_entry *mappool;
 static int mappoolcnt;
 #define KENTRY_LOW_WATER 128
 
 static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t));
 static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t));
 static vm_map_entry_t vm_map_entry_create __P((vm_map_t));
 static void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t));
 static __inline void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t));
 static void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
 static void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t,
 		vm_map_entry_t));
 static void vm_map_simplify_entry __P((vm_map_t, vm_map_entry_t));
-static __pure int vm_map_simplify_okay __P((vm_map_entry_t entry1,
-		vm_map_entry_t entry2));
 
 void
 vm_map_startup()
 {
 	register int i;
 	register vm_map_entry_t mep;
 	vm_map_t mp;
 
 	/*
 	 * Static map structures for allocation before initialization of
 	 * kernel map or kmem map.  vm_map_create knows how to deal with them.
 	 */
 	kmap_free = mp = (vm_map_t) kentry_data;
 	i = MAX_KMAP;
 	while (--i > 0) {
 		mp->header.next = (vm_map_entry_t) (mp + 1);
 		mp++;
 	}
 	mp++->header.next = NULL;
 
 	/*
 	 * Form a free list of statically allocated kernel map entries with
 	 * the rest.
 	 */
 	kentry_free = mep = (vm_map_entry_t) mp;
 	kentry_count = i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep;
 	while (--i > 0) {
 		mep->next = mep + 1;
 		mep++;
 	}
 	mep->next = NULL;
 }
 
 /*
  * Allocate a vmspace structure, including a vm_map and pmap,
  * and initialize those structures.  The refcnt is set to 1.
  * The remaining fields must be initialized by the caller.
  */
 struct vmspace *
 vmspace_alloc(min, max, pageable)
 	vm_offset_t min, max;
 	int pageable;
 {
 	register struct vmspace *vm;
 
 	if (mapvmpgcnt == 0 && mapvm == 0) {
 		mapvmpgcnt = (cnt.v_page_count * sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE;
 		mapvm_start = mapvm = kmem_alloc_pageable(kernel_map,
 			mapvmpgcnt * PAGE_SIZE);
 		mapvmmax = mapvm_start + mapvmpgcnt * PAGE_SIZE;
 		if (!mapvm)
 			mapvmpgcnt = 0;
 	}
 	MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK);
 	bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm);
 	vm_map_init(&vm->vm_map, min, max, pageable);
 	pmap_pinit(&vm->vm_pmap);
 	vm->vm_map.pmap = &vm->vm_pmap;		/* XXX */
+	vm->vm_pmap.pm_map = &vm->vm_map;
 	vm->vm_refcnt = 1;
 	return (vm);
 }
 
 void
 vmspace_free(vm)
 	register struct vmspace *vm;
 {
 
 	if (vm->vm_refcnt == 0)
 		panic("vmspace_free: attempt to free already freed vmspace");
 
 	if (--vm->vm_refcnt == 0) {
 
 		/*
 		 * Lock the map, to wait out all other references to it.
 		 * Delete all of the mappings and pages they hold, then call
 		 * the pmap module to reclaim anything left.
 		 */
 		vm_map_lock(&vm->vm_map);
 		(void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
 		    vm->vm_map.max_offset);
 		vm_map_unlock(&vm->vm_map);
 
 		while( vm->vm_map.ref_count != 1)
 			tsleep(&vm->vm_map.ref_count, PVM, "vmsfre", 0);
 		--vm->vm_map.ref_count;
 		vm_object_pmap_remove(vm->vm_upages_obj,
 			0, vm->vm_upages_obj->size);
 		vm_object_deallocate(vm->vm_upages_obj);
 		pmap_release(&vm->vm_pmap);
 		FREE(vm, M_VMMAP);
 	} else {
 		wakeup(&vm->vm_map.ref_count);
 	}
 }
 
 /*
  *	vm_map_create:
  *
  *	Creates and returns a new empty VM map with
  *	the given physical map structure, and having
  *	the given lower and upper address bounds.
  */
 vm_map_t
 vm_map_create(pmap, min, max, pageable)
 	pmap_t pmap;
 	vm_offset_t min, max;
 	boolean_t pageable;
 {
 	register vm_map_t result;
 
 	if (kmem_map == NULL) {
 		result = kmap_free;
 		kmap_free = (vm_map_t) result->header.next;
 		if (result == NULL)
 			panic("vm_map_create: out of maps");
 	} else
 		MALLOC(result, vm_map_t, sizeof(struct vm_map),
 		    M_VMMAP, M_WAITOK);
 
 	vm_map_init(result, min, max, pageable);
 	result->pmap = pmap;
 	return (result);
 }
 
 /*
  * Initialize an existing vm_map structure
  * such as that in the vmspace structure.
  * The pmap is set elsewhere.
  */
 void
 vm_map_init(map, min, max, pageable)
 	register struct vm_map *map;
 	vm_offset_t min, max;
 	boolean_t pageable;
 {
 	map->header.next = map->header.prev = &map->header;
 	map->nentries = 0;
 	map->size = 0;
 	map->ref_count = 1;
 	map->is_main_map = TRUE;
 	map->min_offset = min;
 	map->max_offset = max;
 	map->entries_pageable = pageable;
 	map->first_free = &map->header;
 	map->hint = &map->header;
 	map->timestamp = 0;
 	lock_init(&map->lock, TRUE);
 }
 
 /*
  *	vm_map_entry_dispose:	[ internal use only ]
  *
  *	Inverse of vm_map_entry_create.
  */
 static __inline void
 vm_map_entry_dispose(map, entry)
 	vm_map_t map;
 	vm_map_entry_t entry;
 {
 	int s;
 
 	if (kentry_count < KENTRY_LOW_WATER) {
 		s = splvm();
 		entry->next = kentry_free;
 		kentry_free = entry;
 		++kentry_count;
 		splx(s);
 	} else {
 		entry->next = mappool;
 		mappool = entry;
 		++mappoolcnt;
 	}
 }
 
 /*
  *	vm_map_entry_create:	[ internal use only ]
  *
  *	Allocates a VM map entry for insertion.
  *	No entry fields are filled in.  This routine is
  */
 static vm_map_entry_t
 vm_map_entry_create(map)
 	vm_map_t map;
 {
 	vm_map_entry_t entry;
 	int i;
 	int s;
 
 	/*
 	 * This is a *very* nasty (and sort of incomplete) hack!!!!
 	 */
 	if (kentry_count < KENTRY_LOW_WATER) {
 		s = splvm();
 		if (mapvmpgcnt && mapvm) {
 			vm_page_t m;
 
 			m = vm_page_alloc(kernel_object,
 			        OFF_TO_IDX(mapvm - VM_MIN_KERNEL_ADDRESS),
 				    (map == kmem_map || map == mb_map) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL);
 
 			if (m) {
 				int newentries;
 
 				newentries = (PAGE_SIZE / sizeof(struct vm_map_entry));
 				vm_page_wire(m);
 				PAGE_WAKEUP(m);
 				m->valid = VM_PAGE_BITS_ALL;
 				pmap_kenter(mapvm, VM_PAGE_TO_PHYS(m));
 				m->flags |= PG_WRITEABLE;
 
 				entry = (vm_map_entry_t) mapvm;
 				mapvm += PAGE_SIZE;
 				--mapvmpgcnt;
 
 				for (i = 0; i < newentries; i++) {
 					vm_map_entry_dispose(kernel_map, entry);
 					entry++;
 				}
 			}
 		}
 		splx(s);
 	}
 
 	if (map == kernel_map || map == kmem_map || map == mb_map || map == pager_map) {
 		s = splvm();
 		entry = kentry_free;
 		if (entry) {
 			kentry_free = entry->next;
 			--kentry_count;
 		} else {
 			panic("vm_map_entry_create: out of map entries for kernel");
 		}
 		splx(s);
 	} else {
 		entry = mappool;
 		if (entry) {
 			mappool = entry->next;
 			--mappoolcnt;
 		} else {
 			MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry),
 			    M_VMMAPENT, M_WAITOK);
 		}
 	}
 
 	return (entry);
 }
 
 /*
  *	vm_map_entry_{un,}link:
  *
  *	Insert/remove entries from maps.
  */
 #define	vm_map_entry_link(map, after_where, entry) \
 		{ \
 		(map)->nentries++; \
 		(entry)->prev = (after_where); \
 		(entry)->next = (after_where)->next; \
 		(entry)->prev->next = (entry); \
 		(entry)->next->prev = (entry); \
 		}
 #define	vm_map_entry_unlink(map, entry) \
 		{ \
 		(map)->nentries--; \
 		(entry)->next->prev = (entry)->prev; \
 		(entry)->prev->next = (entry)->next; \
 		}
 
 /*
  *	vm_map_reference:
  *
  *	Creates another valid reference to the given map.
  *
  */
 void
 vm_map_reference(map)
 	register vm_map_t map;
 {
 	if (map == NULL)
 		return;
 
 	map->ref_count++;
 }
 
 /*
  *	vm_map_deallocate:
  *
  *	Removes a reference from the specified map,
  *	destroying it if no references remain.
  *	The map should not be locked.
  */
 void
 vm_map_deallocate(map)
 	register vm_map_t map;
 {
 	register int c;
 
 	if (map == NULL)
 		return;
 
 	c = map->ref_count;
 
 	if (c == 0)
 		panic("vm_map_deallocate: deallocating already freed map");
 
 	if (c != 1) {
 		--map->ref_count;
 		wakeup(&map->ref_count);
 		return;
 	}
 	/*
 	 * Lock the map, to wait out all other references to it.
 	 */
 
 	vm_map_lock(map);
 	(void) vm_map_delete(map, map->min_offset, map->max_offset);
 	--map->ref_count;
 	if( map->ref_count != 0) {
 		vm_map_unlock(map);
 		return;
 	}
 
 	pmap_destroy(map->pmap);
 	FREE(map, M_VMMAP);
 }
 
 /*
  *	SAVE_HINT:
  *
  *	Saves the specified entry as the hint for
  *	future lookups.
  */
 #define	SAVE_HINT(map,value) \
 		(map)->hint = (value);
 
 /*
  *	vm_map_lookup_entry:	[ internal use only ]
  *
  *	Finds the map entry containing (or
  *	immediately preceding) the specified address
  *	in the given map; the entry is returned
  *	in the "entry" parameter.  The boolean
  *	result indicates whether the address is
  *	actually contained in the map.
  */
 boolean_t
 vm_map_lookup_entry(map, address, entry)
 	register vm_map_t map;
 	register vm_offset_t address;
 	vm_map_entry_t *entry;	/* OUT */
 {
 	register vm_map_entry_t cur;
 	register vm_map_entry_t last;
 
 	/*
 	 * Start looking either from the head of the list, or from the hint.
 	 */
 
 	cur = map->hint;
 
 	if (cur == &map->header)
 		cur = cur->next;
 
 	if (address >= cur->start) {
 		/*
 		 * Go from hint to end of list.
 		 *
 		 * But first, make a quick check to see if we are already looking
 		 * at the entry we want (which is usually the case). Note also
 		 * that we don't need to save the hint here... it is the same
 		 * hint (unless we are at the header, in which case the hint
 		 * didn't buy us anything anyway).
 		 */
 		last = &map->header;
 		if ((cur != last) && (cur->end > address)) {
 			*entry = cur;
 			return (TRUE);
 		}
 	} else {
 		/*
 		 * Go from start to hint, *inclusively*
 		 */
 		last = cur->next;
 		cur = map->header.next;
 	}
 
 	/*
 	 * Search linearly
 	 */
 
 	while (cur != last) {
 		if (cur->end > address) {
 			if (address >= cur->start) {
 				/*
 				 * Save this lookup for future hints, and
 				 * return
 				 */
 
 				*entry = cur;
 				SAVE_HINT(map, cur);
 				return (TRUE);
 			}
 			break;
 		}
 		cur = cur->next;
 	}
 	*entry = cur->prev;
 	SAVE_HINT(map, *entry);
 	return (FALSE);
 }
 
 /*
  *	vm_map_insert:
  *
  *	Inserts the given whole VM object into the target
  *	map at the specified address range.  The object's
  *	size should match that of the address range.
  *
  *	Requires that the map be locked, and leaves it so.
  */
 int
 vm_map_insert(map, object, offset, start, end, prot, max, cow)
 	vm_map_t map;
 	vm_object_t object;
 	vm_ooffset_t offset;
 	vm_offset_t start;
 	vm_offset_t end;
 	vm_prot_t prot, max;
 	int cow;
 {
 	register vm_map_entry_t new_entry;
 	register vm_map_entry_t prev_entry;
 	vm_map_entry_t temp_entry;
 	vm_object_t prev_object;
 
 	/*
 	 * Check that the start and end points are not bogus.
 	 */
 
 	if ((start < map->min_offset) || (end > map->max_offset) ||
 	    (start >= end))
 		return (KERN_INVALID_ADDRESS);
 
 	/*
 	 * Find the entry prior to the proposed starting address; if it's part
 	 * of an existing entry, this range is bogus.
 	 */
 
 	if (vm_map_lookup_entry(map, start, &temp_entry))
 		return (KERN_NO_SPACE);
 
 	prev_entry = temp_entry;
 
 	/*
 	 * Assert that the next entry doesn't overlap the end point.
 	 */
 
 	if ((prev_entry->next != &map->header) &&
 	    (prev_entry->next->start < end))
 		return (KERN_NO_SPACE);
 
 	if ((prev_entry != &map->header) &&
-		(object == NULL) &&
 		(prev_entry->end == start) &&
+		((object == NULL) || (prev_entry->object.vm_object == object)) &&
 		(prev_entry->is_a_map == FALSE) &&
 		(prev_entry->is_sub_map == FALSE) &&
 		(prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
 		(prev_entry->protection == prot) &&
 		(prev_entry->max_protection == max) &&
 		(prev_entry->wired_count == 0)) {
 
 			
 	/*
 	 * See if we can avoid creating a new entry by extending one of our
 	 * neighbors.
 	 */
-		if (vm_object_coalesce(prev_entry->object.vm_object,
-			OFF_TO_IDX(prev_entry->offset),
-			(vm_size_t) (prev_entry->end
-			    - prev_entry->start),
-			(vm_size_t) (end - prev_entry->end))) {
+		if (object == NULL) {
+			if (vm_object_coalesce(prev_entry->object.vm_object,
+				OFF_TO_IDX(prev_entry->offset),
+				(vm_size_t) (prev_entry->end
+				    - prev_entry->start),
+				(vm_size_t) (end - prev_entry->end))) {
 
-			/*
-			 * Coalesced the two objects - can extend the
-			 * previous map entry to include the new
-			 * range.
-			 */
-			map->size += (end - prev_entry->end);
-			prev_entry->end = end;
-			prev_object = prev_entry->object.vm_object;
-			default_pager_convert_to_swapq(prev_object);
-			return (KERN_SUCCESS);
+				/*
+				 * Coalesced the two objects - can extend the
+				 * previous map entry to include the new
+				 * range.
+				 */
+				map->size += (end - prev_entry->end);
+				prev_entry->end = end;
+				prev_object = prev_entry->object.vm_object;
+				default_pager_convert_to_swapq(prev_object);
+				return (KERN_SUCCESS);
+			}
 		}
 	}
 	/*
 	 * Create a new entry
 	 */
 
 	new_entry = vm_map_entry_create(map);
 	new_entry->start = start;
 	new_entry->end = end;
 
 	new_entry->is_a_map = FALSE;
 	new_entry->is_sub_map = FALSE;
 	new_entry->object.vm_object = object;
 	new_entry->offset = offset;
 
 	if (cow & MAP_COPY_NEEDED)
 		new_entry->needs_copy = TRUE;
 	else
 		new_entry->needs_copy = FALSE;
 
 	if (cow & MAP_COPY_ON_WRITE)
 		new_entry->copy_on_write = TRUE;
 	else
 		new_entry->copy_on_write = FALSE;
 
 	if (map->is_main_map) {
 		new_entry->inheritance = VM_INHERIT_DEFAULT;
 		new_entry->protection = prot;
 		new_entry->max_protection = max;
 		new_entry->wired_count = 0;
 	}
 	/*
 	 * Insert the new entry into the list
 	 */
 
 	vm_map_entry_link(map, prev_entry, new_entry);
 	map->size += new_entry->end - new_entry->start;
 
 	/*
 	 * Update the free space hint
 	 */
-	if (map->first_free == prev_entry) {
-		if (prev_entry->end == new_entry->start)
-			map->first_free = new_entry;
-	}
+	if ((map->first_free == prev_entry) &&
+		(prev_entry->end >= new_entry->start))
+		map->first_free = new_entry;
 
 	default_pager_convert_to_swapq(object);
 	return (KERN_SUCCESS);
 }
 
 /*
  * Find sufficient space for `length' bytes in the given map, starting at
  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
  */
 int
 vm_map_findspace(map, start, length, addr)
 	register vm_map_t map;
 	register vm_offset_t start;
 	vm_size_t length;
 	vm_offset_t *addr;
 {
 	register vm_map_entry_t entry, next;
 	register vm_offset_t end;
 
 	if (start < map->min_offset)
 		start = map->min_offset;
 	if (start > map->max_offset)
 		return (1);
 
 	/*
 	 * Look for the first possible address; if there's already something
 	 * at this address, we have to start after it.
 	 */
 	if (start == map->min_offset) {
-		if ((entry = map->first_free) != &map->header) {
+		if ((entry = map->first_free) != &map->header)
 			start = entry->end;
-		}
 	} else {
 		vm_map_entry_t tmp;
 
 		if (vm_map_lookup_entry(map, start, &tmp))
 			start = tmp->end;
 		entry = tmp;
 	}
 
 	/*
 	 * Look through the rest of the map, trying to fit a new region in the
 	 * gap between existing regions, or after the very last region.
 	 */
 	for (;; start = (entry = next)->end) {
 		/*
 		 * Find the end of the proposed new region.  Be sure we didn't
 		 * go beyond the end of the map, or wrap around the address;
 		 * if so, we lose.  Otherwise, if this is the last entry, or
 		 * if the proposed new region fits before the next entry, we
 		 * win.
 		 */
 		end = start + length;
 		if (end > map->max_offset || end < start)
 			return (1);
 		next = entry->next;
 		if (next == &map->header || next->start >= end)
 			break;
 	}
 	SAVE_HINT(map, entry);
 	*addr = start;
 	if (map == kernel_map && round_page(start + length) > kernel_vm_end)
 		pmap_growkernel(round_page(start + length));
 	return (0);
 }
 
 /*
  *	vm_map_find finds an unallocated region in the target address
  *	map with the given length.  The search is defined to be
  *	first-fit from the specified address; the region found is
  *	returned in the same parameter.
  *
  */
 int
 vm_map_find(map, object, offset, addr, length, find_space, prot, max, cow)
 	vm_map_t map;
 	vm_object_t object;
 	vm_ooffset_t offset;
 	vm_offset_t *addr;	/* IN/OUT */
 	vm_size_t length;
 	boolean_t find_space;
 	vm_prot_t prot, max;
 	int cow;
 {
 	register vm_offset_t start;
 	int result, s = 0;
 
 	start = *addr;
 
 	if (map == kmem_map || map == mb_map)
 		s = splvm();
 
 	vm_map_lock(map);
 	if (find_space) {
 		if (vm_map_findspace(map, start, length, addr)) {
 			vm_map_unlock(map);
 			if (map == kmem_map || map == mb_map)
 				splx(s);
 			return (KERN_NO_SPACE);
 		}
 		start = *addr;
 	}
 	result = vm_map_insert(map, object, offset,
 		start, start + length, prot, max, cow);
 	vm_map_unlock(map);
 
 	if (map == kmem_map || map == mb_map)
 		splx(s);
 
 	return (result);
 }
 
-static __pure int
-vm_map_simplify_okay(entry1, entry2)
-	vm_map_entry_t entry1, entry2;
-{
-	if ((entry1->end != entry2->start) ||
-		(entry1->object.vm_object != entry2->object.vm_object))
-		return 0;
-	if (entry1->object.vm_object) {
-		if (entry1->object.vm_object->behavior !=
-			entry2->object.vm_object->behavior)
-			return 0;
-		if (entry1->offset + (entry1->end - entry1->start) !=
-			entry2->offset)
-			return 0;
-	}
-	if ((entry1->needs_copy != entry2->needs_copy) ||
-		(entry1->copy_on_write != entry2->copy_on_write) ||
-		(entry1->protection != entry2->protection) ||
-		(entry1->max_protection != entry2->max_protection) ||
-		(entry1->inheritance != entry2->inheritance) ||
-		(entry1->is_sub_map != FALSE) ||
-		(entry1->is_a_map != FALSE) ||
-		(entry1->wired_count != 0) ||
-		(entry2->is_sub_map != FALSE) ||
-		(entry2->is_a_map != FALSE) ||
-		(entry2->wired_count != 0))
-		return 0;
-
-	return 1;
-}
-	
 /*
  *	vm_map_simplify_entry:	[ internal use only ]
+ *
+ *	Simplify the given map entry by:
+ *		removing extra sharing maps
+ *		[XXX maybe later] merging with a neighbor
  */
 static void
 vm_map_simplify_entry(map, entry)
 	vm_map_t map;
 	vm_map_entry_t entry;
 {
 	vm_map_entry_t next, prev;
+	vm_size_t nextsize, prevsize, esize;
 
-	if (entry->is_a_map || entry->is_sub_map || entry->wired_count)
+	/*
+	 * If this entry corresponds to a sharing map, then see if we can
+	 * remove the level of indirection. If it's not a sharing map, then it
+	 * points to a VM object, so see if we can merge with either of our
+	 * neighbors.
+	 */
+
+	if (entry->is_sub_map || entry->is_a_map || entry->wired_count)
 		return;
 
 	prev = entry->prev;
 	if (prev != &map->header) {
-		if ( vm_map_simplify_okay(prev, entry)) {
+		prevsize = prev->end - prev->start;
+		if ( (prev->end == entry->start) &&
+		     (prev->object.vm_object == entry->object.vm_object) &&
+		     (!prev->object.vm_object || (prev->object.vm_object->behavior == entry->object.vm_object->behavior)) &&
+		     (!prev->object.vm_object ||
+			(prev->offset + prevsize == entry->offset)) &&
+		     (prev->needs_copy == entry->needs_copy) &&
+		     (prev->copy_on_write == entry->copy_on_write) &&
+		     (prev->protection == entry->protection) &&
+		     (prev->max_protection == entry->max_protection) &&
+		     (prev->inheritance == entry->inheritance) &&
+		     (prev->is_a_map == FALSE) &&
+		     (prev->is_sub_map == FALSE) &&
+		     (prev->wired_count == 0)) {
 			if (map->first_free == prev)
 				map->first_free = entry;
 			if (map->hint == prev)
 				map->hint = entry;
 			vm_map_entry_unlink(map, prev);
 			entry->start = prev->start;
 			entry->offset = prev->offset;
 			if (prev->object.vm_object)
 				vm_object_deallocate(prev->object.vm_object);
 			vm_map_entry_dispose(map, prev);
 		}
 	}
 
 	next = entry->next;
 	if (next != &map->header) {
-		if ( vm_map_simplify_okay(entry, next)) {
+		nextsize = next->end - next->start;
+		esize = entry->end - entry->start;
+		if ((entry->end == next->start) &&
+		    (next->object.vm_object == entry->object.vm_object) &&
+		    (!next->object.vm_object || (next->object.vm_object->behavior == entry->object.vm_object->behavior)) &&
+		     (!entry->object.vm_object ||
+			(entry->offset + esize == next->offset)) &&
+		    (next->needs_copy == entry->needs_copy) &&
+		    (next->copy_on_write == entry->copy_on_write) &&
+		    (next->protection == entry->protection) &&
+		    (next->max_protection == entry->max_protection) &&
+		    (next->inheritance == entry->inheritance) &&
+		    (next->is_a_map == FALSE) &&
+		    (next->is_sub_map == FALSE) &&
+		    (next->wired_count == 0)) {
 			if (map->first_free == next)
 				map->first_free = entry;
 			if (map->hint == next)
 				map->hint = entry;
 			vm_map_entry_unlink(map, next);
 			entry->end = next->end;
 			if (next->object.vm_object)
 				vm_object_deallocate(next->object.vm_object);
 			vm_map_entry_dispose(map, next);
 	        }
 	}
 }
-
 /*
  *	vm_map_clip_start:	[ internal use only ]
  *
  *	Asserts that the given entry begins at or after
  *	the specified address; if necessary,
  *	it splits the entry into two.
  */
 #define vm_map_clip_start(map, entry, startaddr) \
 { \
 	if (startaddr > entry->start) \
 		_vm_map_clip_start(map, entry, startaddr); \
 }
 
 /*
  *	This routine is called only when it is known that
  *	the entry must be split.
  */
 static void
 _vm_map_clip_start(map, entry, start)
 	register vm_map_t map;
 	register vm_map_entry_t entry;
 	register vm_offset_t start;
 {
 	register vm_map_entry_t new_entry;
 
 	/*
 	 * Split off the front portion -- note that we must insert the new
 	 * entry BEFORE this one, so that this entry has the specified
 	 * starting address.
 	 */
 
 	vm_map_simplify_entry(map, entry);
 
 	new_entry = vm_map_entry_create(map);
 	*new_entry = *entry;
 
 	new_entry->end = start;
 	entry->offset += (start - entry->start);
 	entry->start = start;
 
 	vm_map_entry_link(map, entry->prev, new_entry);
 
 	if (entry->is_a_map || entry->is_sub_map)
 		vm_map_reference(new_entry->object.share_map);
 	else
 		vm_object_reference(new_entry->object.vm_object);
 }
 
 /*
  *	vm_map_clip_end:	[ internal use only ]
  *
  *	Asserts that the given entry ends at or before
  *	the specified address; if necessary,
  *	it splits the entry into two.
  */
 
 #define vm_map_clip_end(map, entry, endaddr) \
 { \
 	if (endaddr < entry->end) \
 		_vm_map_clip_end(map, entry, endaddr); \
 }
 
 /*
  *	This routine is called only when it is known that
  *	the entry must be split.
  */
 static void
 _vm_map_clip_end(map, entry, end)
 	register vm_map_t map;
 	register vm_map_entry_t entry;
 	register vm_offset_t end;
 {
 	register vm_map_entry_t new_entry;
 
 	/*
 	 * Create a new entry and insert it AFTER the specified entry
 	 */
 
 	new_entry = vm_map_entry_create(map);
 	*new_entry = *entry;
 
 	new_entry->start = entry->end = end;
 	new_entry->offset += (end - entry->start);
 
 	vm_map_entry_link(map, entry, new_entry);
 
 	if (entry->is_a_map || entry->is_sub_map)
 		vm_map_reference(new_entry->object.share_map);
 	else
 		vm_object_reference(new_entry->object.vm_object);
 }
 
 /*
  *	VM_MAP_RANGE_CHECK:	[ internal use only ]
  *
  *	Asserts that the starting and ending region
  *	addresses fall within the valid range of the map.
  */
 #define	VM_MAP_RANGE_CHECK(map, start, end)		\
 		{					\
 		if (start < vm_map_min(map))		\
 			start = vm_map_min(map);	\
 		if (end > vm_map_max(map))		\
 			end = vm_map_max(map);		\
 		if (start > end)			\
 			start = end;			\
 		}
 
 /*
  *	vm_map_submap:		[ kernel use only ]
  *
  *	Mark the given range as handled by a subordinate map.
  *
  *	This range must have been created with vm_map_find,
  *	and no other operations may have been performed on this
  *	range prior to calling vm_map_submap.
  *
  *	Only a limited number of operations can be performed
  *	within this rage after calling vm_map_submap:
  *		vm_fault
  *	[Don't try vm_map_copy!]
  *
  *	To remove a submapping, one must first remove the
  *	range from the superior map, and then destroy the
  *	submap (if desired).  [Better yet, don't try it.]
  */
 int
 vm_map_submap(map, start, end, submap)
 	register vm_map_t map;
 	register vm_offset_t start;
 	register vm_offset_t end;
 	vm_map_t submap;
 {
 	vm_map_entry_t entry;
 	register int result = KERN_INVALID_ARGUMENT;
 
 	vm_map_lock(map);
 
 	VM_MAP_RANGE_CHECK(map, start, end);
 
 	if (vm_map_lookup_entry(map, start, &entry)) {
 		vm_map_clip_start(map, entry, start);
 	} else
 		entry = entry->next;
 
 	vm_map_clip_end(map, entry, end);
 
 	if ((entry->start == start) && (entry->end == end) &&
 	    (!entry->is_a_map) &&
 	    (entry->object.vm_object == NULL) &&
 	    (!entry->copy_on_write)) {
 		entry->is_a_map = FALSE;
 		entry->is_sub_map = TRUE;
 		vm_map_reference(entry->object.sub_map = submap);
 		result = KERN_SUCCESS;
 	}
 	vm_map_unlock(map);
 
 	return (result);
 }
 
 /*
  *	vm_map_protect:
  *
  *	Sets the protection of the specified address
  *	region in the target map.  If "set_max" is
  *	specified, the maximum protection is to be set;
  *	otherwise, only the current protection is affected.
  */
 int
 vm_map_protect(map, start, end, new_prot, set_max)
 	register vm_map_t map;
 	register vm_offset_t start;
 	register vm_offset_t end;
 	register vm_prot_t new_prot;
 	register boolean_t set_max;
 {
 	register vm_map_entry_t current;
 	vm_map_entry_t entry;
 
 	vm_map_lock(map);
 
 	VM_MAP_RANGE_CHECK(map, start, end);
 
 	if (vm_map_lookup_entry(map, start, &entry)) {
 		vm_map_clip_start(map, entry, start);
 	} else
 		entry = entry->next;
 
 	/*
 	 * Make a first pass to check for protection violations.
 	 */
 
 	current = entry;
 	while ((current != &map->header) && (current->start < end)) {
 		if (current->is_sub_map) {
 			vm_map_unlock(map);
 			return (KERN_INVALID_ARGUMENT);
 		}
 		if ((new_prot & current->max_protection) != new_prot) {
 			vm_map_unlock(map);
 			return (KERN_PROTECTION_FAILURE);
 		}
 		current = current->next;
 	}
 
 	/*
 	 * Go back and fix up protections. [Note that clipping is not
 	 * necessary the second time.]
 	 */
 
 	current = entry;
 
 	while ((current != &map->header) && (current->start < end)) {
 		vm_prot_t old_prot;
 
 		vm_map_clip_end(map, current, end);
 
 		old_prot = current->protection;
 		if (set_max)
 			current->protection =
 			    (current->max_protection = new_prot) &
 			    old_prot;
 		else
 			current->protection = new_prot;
 
 		/*
 		 * Update physical map if necessary. Worry about copy-on-write
 		 * here -- CHECK THIS XXX
 		 */
 
 		if (current->protection != old_prot) {
 #define MASK(entry)	((entry)->copy_on_write ? ~VM_PROT_WRITE : \
 							VM_PROT_ALL)
 #define	max(a,b)	((a) > (b) ? (a) : (b))
 
 			if (current->is_a_map) {
 				vm_map_entry_t share_entry;
 				vm_offset_t share_end;
 
 				vm_map_lock(current->object.share_map);
 				(void) vm_map_lookup_entry(
 				    current->object.share_map,
 				    current->offset,
 				    &share_entry);
 				share_end = current->offset +
 				    (current->end - current->start);
 				while ((share_entry !=
 					&current->object.share_map->header) &&
 				    (share_entry->start < share_end)) {
 
 					pmap_protect(map->pmap,
 					    (max(share_entry->start,
 						    current->offset) -
 						current->offset +
 						current->start),
 					    min(share_entry->end,
 						share_end) -
 					    current->offset +
 					    current->start,
 					    current->protection &
 					    MASK(share_entry));
 
 					share_entry = share_entry->next;
 				}
 				vm_map_unlock(current->object.share_map);
 			} else
 				pmap_protect(map->pmap, current->start,
 				    current->end,
 				    current->protection & MASK(entry));
 #undef	max
 #undef	MASK
 		}
 		current = current->next;
 	}
 
 	vm_map_simplify_entry(map, entry);
 	vm_map_unlock(map);
 	return (KERN_SUCCESS);
 }
 
 /*
  *	vm_map_madvise:
  *
  * 	This routine traverses a processes map handling the madvise
  *	system call.
  */
 void
 vm_map_madvise(map, pmap, start, end, advise)
 	vm_map_t map;
 	pmap_t pmap;
 	vm_offset_t start, end;
 	int advise;
 {
 	register vm_map_entry_t current;
 	vm_map_entry_t entry;
 
 	vm_map_lock(map);
 
 	VM_MAP_RANGE_CHECK(map, start, end);
 
 	if (vm_map_lookup_entry(map, start, &entry)) {
 		vm_map_clip_start(map, entry, start);
 	} else
 		entry = entry->next;
 
 	for(current = entry;
 		(current != &map->header) && (current->start < end);
 		current = current->next) {
 		if (current->is_a_map || current->is_sub_map) {
 			continue;
 		}
 		vm_map_clip_end(map, current, end);
 		switch (advise) {
 	case MADV_NORMAL:
 			current->object.vm_object->behavior = OBJ_NORMAL;
 			break;
 	case MADV_SEQUENTIAL:
 			current->object.vm_object->behavior = OBJ_SEQUENTIAL;
 			break;
 	case MADV_RANDOM:
 			current->object.vm_object->behavior = OBJ_RANDOM;
 			break;
 	/*
 	 * Right now, we could handle DONTNEED and WILLNEED with common code.
 	 * They are mostly the same, except for the potential async reads (NYI).
 	 */
 	case MADV_FREE:
 	case MADV_DONTNEED:
 			{
 				vm_pindex_t pindex;
 				int count;
 				vm_size_t size = entry->end - entry->start;
 				pindex = OFF_TO_IDX(entry->offset);
 				count = OFF_TO_IDX(size);
 				/*
 				 * MADV_DONTNEED removes the page from all
 				 * pmaps, so pmap_remove is not necessary.
 				 */
 				vm_object_madvise(current->object.vm_object,
 					pindex, count, advise);
 			}
 			break;
 
 	case MADV_WILLNEED:
 			{
 				vm_pindex_t pindex;
 				int count;
 				vm_size_t size = entry->end - entry->start;
 				pindex = OFF_TO_IDX(entry->offset);
 				count = OFF_TO_IDX(size);
 				vm_object_madvise(current->object.vm_object,
 					pindex, count, advise);
 				pmap_object_init_pt(pmap, current->start,
 					current->object.vm_object, pindex,
 					(count << PAGE_SHIFT), 0);
 			}
 			break;
 
 	default:
 			break;
 		}
 	}
 
 	vm_map_simplify_entry(map, entry);
 	vm_map_unlock(map);
 	return;
 }	
 
 
 /*
  *	vm_map_inherit:
  *
  *	Sets the inheritance of the specified address
  *	range in the target map.  Inheritance
  *	affects how the map will be shared with
  *	child maps at the time of vm_map_fork.
  */
 int
 vm_map_inherit(map, start, end, new_inheritance)
 	register vm_map_t map;
 	register vm_offset_t start;
 	register vm_offset_t end;
 	register vm_inherit_t new_inheritance;
 {
 	register vm_map_entry_t entry;
 	vm_map_entry_t temp_entry;
 
 	switch (new_inheritance) {
 	case VM_INHERIT_NONE:
 	case VM_INHERIT_COPY:
 	case VM_INHERIT_SHARE:
 		break;
 	default:
 		return (KERN_INVALID_ARGUMENT);
 	}
 
 	vm_map_lock(map);
 
 	VM_MAP_RANGE_CHECK(map, start, end);
 
 	if (vm_map_lookup_entry(map, start, &temp_entry)) {
 		entry = temp_entry;
 		vm_map_clip_start(map, entry, start);
 	} else
 		entry = temp_entry->next;
 
 	while ((entry != &map->header) && (entry->start < end)) {
 		vm_map_clip_end(map, entry, end);
 
 		entry->inheritance = new_inheritance;
 
 		entry = entry->next;
 	}
 
 	vm_map_simplify_entry(map, temp_entry);
 	vm_map_unlock(map);
 	return (KERN_SUCCESS);
 }
 
 /*
  *	vm_map_pageable:
  *
  *	Sets the pageability of the specified address
  *	range in the target map.  Regions specified
  *	as not pageable require locked-down physical
  *	memory and physical page maps.
  *
  *	The map must not be locked, but a reference
  *	must remain to the map throughout the call.
  */
 int
 vm_map_pageable(map, start, end, new_pageable)
 	register vm_map_t map;
 	register vm_offset_t start;
 	register vm_offset_t end;
 	register boolean_t new_pageable;
 {
 	register vm_map_entry_t entry;
 	vm_map_entry_t start_entry;
 	register vm_offset_t failed = 0;
 	int rv;
 
 	vm_map_lock(map);
 
 	VM_MAP_RANGE_CHECK(map, start, end);
 
 	/*
 	 * Only one pageability change may take place at one time, since
 	 * vm_fault assumes it will be called only once for each
 	 * wiring/unwiring.  Therefore, we have to make sure we're actually
 	 * changing the pageability for the entire region.  We do so before
 	 * making any changes.
 	 */
 
 	if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
 		vm_map_unlock(map);
 		return (KERN_INVALID_ADDRESS);
 	}
 	entry = start_entry;
 
 	/*
 	 * Actions are rather different for wiring and unwiring, so we have
 	 * two separate cases.
 	 */
 
 	if (new_pageable) {
 
 		vm_map_clip_start(map, entry, start);
 
 		/*
 		 * Unwiring.  First ensure that the range to be unwired is
 		 * really wired down and that there are no holes.
 		 */
 		while ((entry != &map->header) && (entry->start < end)) {
 
 			if (entry->wired_count == 0 ||
 			    (entry->end < end &&
 				(entry->next == &map->header ||
 				    entry->next->start > entry->end))) {
 				vm_map_unlock(map);
 				return (KERN_INVALID_ARGUMENT);
 			}
 			entry = entry->next;
 		}
 
 		/*
 		 * Now decrement the wiring count for each region. If a region
 		 * becomes completely unwired, unwire its physical pages and
 		 * mappings.
 		 */
 		lock_set_recursive(&map->lock);
 
 		entry = start_entry;
 		while ((entry != &map->header) && (entry->start < end)) {
 			vm_map_clip_end(map, entry, end);
 
 			entry->wired_count--;
 			if (entry->wired_count == 0)
 				vm_fault_unwire(map, entry->start, entry->end);
 
 			entry = entry->next;
 		}
 		vm_map_simplify_entry(map, start_entry);
 		lock_clear_recursive(&map->lock);
 	} else {
 		/*
 		 * Wiring.  We must do this in two passes:
 		 *
 		 * 1.  Holding the write lock, we create any shadow or zero-fill
 		 * objects that need to be created. Then we clip each map
 		 * entry to the region to be wired and increment its wiring
 		 * count.  We create objects before clipping the map entries
 		 * to avoid object proliferation.
 		 *
 		 * 2.  We downgrade to a read lock, and call vm_fault_wire to
 		 * fault in the pages for any newly wired area (wired_count is
 		 * 1).
 		 *
 		 * Downgrading to a read lock for vm_fault_wire avoids a possible
 		 * deadlock with another process that may have faulted on one
 		 * of the pages to be wired (it would mark the page busy,
 		 * blocking us, then in turn block on the map lock that we
 		 * hold).  Because of problems in the recursive lock package,
 		 * we cannot upgrade to a write lock in vm_map_lookup.  Thus,
 		 * any actions that require the write lock must be done
 		 * beforehand.  Because we keep the read lock on the map, the
 		 * copy-on-write status of the entries we modify here cannot
 		 * change.
 		 */
 
 		/*
 		 * Pass 1.
 		 */
 		while ((entry != &map->header) && (entry->start < end)) {
 			if (entry->wired_count == 0) {
 
 				/*
 				 * Perform actions of vm_map_lookup that need
 				 * the write lock on the map: create a shadow
 				 * object for a copy-on-write region, or an
 				 * object for a zero-fill region.
 				 *
 				 * We don't have to do this for entries that
 				 * point to sharing maps, because we won't
 				 * hold the lock on the sharing map.
 				 */
 				if (!entry->is_a_map && !entry->is_sub_map) {
 					int copyflag = entry->needs_copy;
 					if (copyflag &&
 					    ((entry->protection & VM_PROT_WRITE) != 0)) {
 
 						vm_object_shadow(&entry->object.vm_object,
 						    &entry->offset,
 						    OFF_TO_IDX(entry->end
 							- entry->start));
 						entry->needs_copy = FALSE;
 					} else if (entry->object.vm_object == NULL) {
 						entry->object.vm_object =
 						    vm_object_allocate(OBJT_DEFAULT,
 							OFF_TO_IDX(entry->end - entry->start));
 						entry->offset = (vm_offset_t) 0;
 					}
 					default_pager_convert_to_swapq(entry->object.vm_object);
 				}
 			}
 			vm_map_clip_start(map, entry, start);
 			vm_map_clip_end(map, entry, end);
 			entry->wired_count++;
 
 			/*
 			 * Check for holes
 			 */
 			if (entry->end < end &&
 			    (entry->next == &map->header ||
 				entry->next->start > entry->end)) {
 				/*
 				 * Found one.  Object creation actions do not
 				 * need to be undone, but the wired counts
 				 * need to be restored.
 				 */
 				while (entry != &map->header && entry->end > start) {
 					entry->wired_count--;
 					entry = entry->prev;
 				}
 				vm_map_unlock(map);
 				return (KERN_INVALID_ARGUMENT);
 			}
 			entry = entry->next;
 		}
 
 		/*
 		 * Pass 2.
 		 */
 
 		/*
 		 * HACK HACK HACK HACK
 		 *
 		 * If we are wiring in the kernel map or a submap of it,
 		 * unlock the map to avoid deadlocks.  We trust that the
 		 * kernel is well-behaved, and therefore will not do
 		 * anything destructive to this region of the map while
 		 * we have it unlocked.  We cannot trust user processes
 		 * to do the same.
 		 *
 		 * HACK HACK HACK HACK
 		 */
 		if (vm_map_pmap(map) == kernel_pmap) {
 			vm_map_unlock(map);	/* trust me ... */
 		} else {
 			lock_set_recursive(&map->lock);
 			lock_write_to_read(&map->lock);
 		}
 
 		rv = 0;
 		entry = start_entry;
 		while (entry != &map->header && entry->start < end) {
 			/*
 			 * If vm_fault_wire fails for any page we need to undo
 			 * what has been done.  We decrement the wiring count
 			 * for those pages which have not yet been wired (now)
 			 * and unwire those that have (later).
 			 *
 			 * XXX this violates the locking protocol on the map,
 			 * needs to be fixed.
 			 */
 			if (rv)
 				entry->wired_count--;
 			else if (entry->wired_count == 1) {
 				rv = vm_fault_wire(map, entry->start, entry->end);
 				if (rv) {
 					failed = entry->start;
 					entry->wired_count--;
 				}
 			}
 			entry = entry->next;
 		}
 
 		if (vm_map_pmap(map) == kernel_pmap) {
 			vm_map_lock(map);
 		} else {
 			lock_clear_recursive(&map->lock);
 		}
 		if (rv) {
 			vm_map_unlock(map);
 			(void) vm_map_pageable(map, start, failed, TRUE);
 			return (rv);
 		}
 	}
 
 	vm_map_unlock(map);
 
 	return (KERN_SUCCESS);
 }
 
 /*
  * vm_map_clean
  *
  * Push any dirty cached pages in the address range to their pager.
  * If syncio is TRUE, dirty pages are written synchronously.
  * If invalidate is TRUE, any cached pages are freed as well.
  *
  * Returns an error if any part of the specified range is not mapped.
  */
 int
 vm_map_clean(map, start, end, syncio, invalidate)
 	vm_map_t map;
 	vm_offset_t start;
 	vm_offset_t end;
 	boolean_t syncio;
 	boolean_t invalidate;
 {
 	register vm_map_entry_t current;
 	vm_map_entry_t entry;
 	vm_size_t size;
 	vm_object_t object;
 	vm_ooffset_t offset;
 
 	vm_map_lock_read(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
 	if (!vm_map_lookup_entry(map, start, &entry)) {
 		vm_map_unlock_read(map);
 		return (KERN_INVALID_ADDRESS);
 	}
 	/*
 	 * Make a first pass to check for holes.
 	 */
 	for (current = entry; current->start < end; current = current->next) {
 		if (current->is_sub_map) {
 			vm_map_unlock_read(map);
 			return (KERN_INVALID_ARGUMENT);
 		}
 		if (end > current->end &&
 		    (current->next == &map->header ||
 			current->end != current->next->start)) {
 			vm_map_unlock_read(map);
 			return (KERN_INVALID_ADDRESS);
 		}
 	}
 
 	/*
 	 * Make a second pass, cleaning/uncaching pages from the indicated
 	 * objects as we go.
 	 */
 	for (current = entry; current->start < end; current = current->next) {
 		offset = current->offset + (start - current->start);
 		size = (end <= current->end ? end : current->end) - start;
 		if (current->is_a_map || current->is_sub_map) {
 			register vm_map_t smap;
 			vm_map_entry_t tentry;
 			vm_size_t tsize;
 
 			smap = current->object.share_map;
 			vm_map_lock_read(smap);
 			(void) vm_map_lookup_entry(smap, offset, &tentry);
 			tsize = tentry->end - offset;
 			if (tsize < size)
 				size = tsize;
 			object = tentry->object.vm_object;
 			offset = tentry->offset + (offset - tentry->start);
 			vm_map_unlock_read(smap);
 		} else {
 			object = current->object.vm_object;
 		}
 		/*
 		 * Note that there is absolutely no sense in writing out
 		 * anonymous objects, so we track down the vnode object
 		 * to write out.
 		 * We invalidate (remove) all pages from the address space
 		 * anyway, for semantic correctness.
 		 */
 		while (object->backing_object) {
 			object = object->backing_object;
 			offset += object->backing_object_offset;
 			if (object->size < OFF_TO_IDX( offset + size))
 				size = IDX_TO_OFF(object->size) - offset;
 		}
 		if (invalidate)
 			pmap_remove(vm_map_pmap(map), current->start,
 				current->start + size);
 		if (object && (object->type == OBJT_VNODE)) {
 			/*
 			 * Flush pages if writing is allowed. XXX should we continue
 			 * on an error?
 			 *
 			 * XXX Doing async I/O and then removing all the pages from
 			 *     the object before it completes is probably a very bad
 			 *     idea.
 			 */
 			if (current->protection & VM_PROT_WRITE) {
 		   	    	vm_object_page_clean(object,
 					OFF_TO_IDX(offset),
 					OFF_TO_IDX(offset + size),
 					(syncio||invalidate)?1:0, TRUE);
 				if (invalidate)
 					vm_object_page_remove(object,
 						OFF_TO_IDX(offset),
 						OFF_TO_IDX(offset + size),
 						FALSE);
 			}
 		}
 		start += size;
 	}
 
 	vm_map_unlock_read(map);
 	return (KERN_SUCCESS);
 }
 
 /*
  *	vm_map_entry_unwire:	[ internal use only ]
  *
  *	Make the region specified by this entry pageable.
  *
  *	The map in question should be locked.
  *	[This is the reason for this routine's existence.]
  */
 static __inline void 
 vm_map_entry_unwire(map, entry)
 	vm_map_t map;
 	register vm_map_entry_t entry;
 {
 	vm_fault_unwire(map, entry->start, entry->end);
 	entry->wired_count = 0;
 }
 
 /*
  *	vm_map_entry_delete:	[ internal use only ]
  *
  *	Deallocate the given entry from the target map.
  */
 static __inline void
 vm_map_entry_delete(map, entry)
 	register vm_map_t map;
 	register vm_map_entry_t entry;
 {
 	vm_map_entry_unlink(map, entry);
 	map->size -= entry->end - entry->start;
 
 	if (entry->is_a_map || entry->is_sub_map) {
 		vm_map_deallocate(entry->object.share_map);
 	} else {
 		vm_object_deallocate(entry->object.vm_object);
 	}
 
 	vm_map_entry_dispose(map, entry);
 }
 
 /*
  *	vm_map_delete:	[ internal use only ]
  *
  *	Deallocates the given address range from the target
  *	map.
  *
  *	When called with a sharing map, removes pages from
  *	that region from all physical maps.
  */
 int
 vm_map_delete(map, start, end)
 	register vm_map_t map;
 	vm_offset_t start;
 	register vm_offset_t end;
 {
 	register vm_map_entry_t entry;
 	vm_map_entry_t first_entry;
 
 	/*
 	 * Find the start of the region, and clip it
 	 */
 
 	if (!vm_map_lookup_entry(map, start, &first_entry))
 		entry = first_entry->next;
 	else {
 		entry = first_entry;
 		vm_map_clip_start(map, entry, start);
 
 		/*
 		 * Fix the lookup hint now, rather than each time though the
 		 * loop.
 		 */
 
 		SAVE_HINT(map, entry->prev);
 	}
 
 	/*
 	 * Save the free space hint
 	 */
 
 	if (entry == &map->header) {
 		map->first_free = &map->header;
 	} else if (map->first_free->start >= start)
 		map->first_free = entry->prev;
 
 	/*
 	 * Step through all entries in this region
 	 */
 
 	while ((entry != &map->header) && (entry->start < end)) {
 		vm_map_entry_t next;
 		vm_offset_t s, e;
 		vm_object_t object;
 		vm_ooffset_t offset;
 
 		vm_map_clip_end(map, entry, end);
 
 		next = entry->next;
 		s = entry->start;
 		e = entry->end;
 		offset = entry->offset;
 
 		/*
 		 * Unwire before removing addresses from the pmap; otherwise,
 		 * unwiring will put the entries back in the pmap.
 		 */
 
 		object = entry->object.vm_object;
 		if (entry->wired_count != 0)
 			vm_map_entry_unwire(map, entry);
 
 		/*
 		 * If this is a sharing map, we must remove *all* references
 		 * to this data, since we can't find all of the physical maps
 		 * which are sharing it.
 		 */
 
 		if (object == kernel_object || object == kmem_object) {
 			vm_object_page_remove(object, OFF_TO_IDX(offset),
 			    OFF_TO_IDX(offset + (e - s)), FALSE);
 		} else if (!map->is_main_map) {
 			vm_object_pmap_remove(object,
 			    OFF_TO_IDX(offset),
 			    OFF_TO_IDX(offset + (e - s)));
 		} else {
 			pmap_remove(map->pmap, s, e);
 		}
 
 		/*
 		 * Delete the entry (which may delete the object) only after
 		 * removing all pmap entries pointing to its pages.
 		 * (Otherwise, its page frames may be reallocated, and any
 		 * modify bits will be set in the wrong object!)
 		 */
 
 		vm_map_entry_delete(map, entry);
 		entry = next;
 	}
 	return (KERN_SUCCESS);
 }
 
 /*
  *	vm_map_remove:
  *
  *	Remove the given address range from the target map.
  *	This is the exported form of vm_map_delete.
  */
 int
 vm_map_remove(map, start, end)
 	register vm_map_t map;
 	register vm_offset_t start;
 	register vm_offset_t end;
 {
 	register int result, s = 0;
 
 	if (map == kmem_map || map == mb_map)
 		s = splvm();
 
 	vm_map_lock(map);
 	VM_MAP_RANGE_CHECK(map, start, end);
 	result = vm_map_delete(map, start, end);
 	vm_map_unlock(map);
 
 	if (map == kmem_map || map == mb_map)
 		splx(s);
 
 	return (result);
 }
 
 /*
- *	vm_map_remove_userspace:
- *		Removes the user portion of the address space.
- */
-void
-vm_map_remove_userspace(map)
-	register vm_map_t map;
-{
-	vm_map_lock(map);
-	pmap_remove_pages(map->pmap, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
-	vm_map_delete(map, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
-	vm_map_unlock(map);
-	return;
-}
-
-/*
  *	vm_map_check_protection:
  *
  *	Assert that the target map allows the specified
  *	privilege on the entire address region given.
  *	The entire region must be allocated.
  */
 boolean_t
 vm_map_check_protection(map, start, end, protection)
 	register vm_map_t map;
 	register vm_offset_t start;
 	register vm_offset_t end;
 	register vm_prot_t protection;
 {
 	register vm_map_entry_t entry;
 	vm_map_entry_t tmp_entry;
 
 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
 		return (FALSE);
 	}
 	entry = tmp_entry;
 
 	while (start < end) {
 		if (entry == &map->header) {
 			return (FALSE);
 		}
 		/*
 		 * No holes allowed!
 		 */
 
 		if (start < entry->start) {
 			return (FALSE);
 		}
 		/*
 		 * Check protection associated with entry.
 		 */
 
 		if ((entry->protection & protection) != protection) {
 			return (FALSE);
 		}
 		/* go to next entry */
 
 		start = entry->end;
 		entry = entry->next;
 	}
 	return (TRUE);
 }
 
 /*
  *	vm_map_copy_entry:
  *
  *	Copies the contents of the source entry to the destination
  *	entry.  The entries *must* be aligned properly.
  */
 static void
 vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
 	vm_map_t src_map, dst_map;
 	register vm_map_entry_t src_entry, dst_entry;
 {
 	if (src_entry->is_sub_map || dst_entry->is_sub_map)
 		return;
 
 	if (src_entry->wired_count == 0) {
 
 		/*
 		 * If the source entry is marked needs_copy, it is already
 		 * write-protected.
 		 */
 		if (!src_entry->needs_copy) {
 
 			boolean_t su;
 
 			/*
 			 * If the source entry has only one mapping, we can
 			 * just protect the virtual address range.
 			 */
 			if (!(su = src_map->is_main_map)) {
 				su = (src_map->ref_count == 1);
 			}
 			if (su) {
 				pmap_protect(src_map->pmap,
 				    src_entry->start,
 				    src_entry->end,
 				    src_entry->protection & ~VM_PROT_WRITE);
 			} else {
 				vm_object_pmap_copy(src_entry->object.vm_object,
 				    OFF_TO_IDX(src_entry->offset),
 				    OFF_TO_IDX(src_entry->offset + (src_entry->end
 					- src_entry->start)));
 			}
 		}
 
 		/*
 		 * Make a copy of the object.
 		 */
 		if (src_entry->object.vm_object) {
 			if ((src_entry->object.vm_object->handle == NULL) &&
 				(src_entry->object.vm_object->type == OBJT_DEFAULT ||
 				 src_entry->object.vm_object->type == OBJT_SWAP))
 				vm_object_collapse(src_entry->object.vm_object);
 			++src_entry->object.vm_object->ref_count;
 			src_entry->copy_on_write = TRUE;
 			src_entry->needs_copy = TRUE;
 
 			dst_entry->needs_copy = TRUE;
 			dst_entry->copy_on_write = TRUE;
 			dst_entry->object.vm_object =
 				src_entry->object.vm_object;
 			dst_entry->offset = src_entry->offset;
 		} else {
 			dst_entry->object.vm_object = NULL;
 			dst_entry->offset = 0;
 		}
 
 		pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
 		    dst_entry->end - dst_entry->start, src_entry->start);
 	} else {
 		/*
 		 * Of course, wired down pages can't be set copy-on-write.
 		 * Cause wired pages to be copied into the new map by
 		 * simulating faults (the new pages are pageable)
 		 */
 		vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
 	}
 }
 
 /*
  * vmspace_fork:
  * Create a new process vmspace structure and vm_map
  * based on those of an existing process.  The new map
  * is based on the old map, according to the inheritance
  * values on the regions in that map.
  *
  * The source map must not be locked.
  */
 struct vmspace *
 vmspace_fork(vm1)
 	register struct vmspace *vm1;
 {
 	register struct vmspace *vm2;
 	vm_map_t old_map = &vm1->vm_map;
 	vm_map_t new_map;
 	vm_map_entry_t old_entry;
 	vm_map_entry_t new_entry;
 	pmap_t new_pmap;
 	vm_object_t object;
 
 	vm_map_lock(old_map);
 
 	vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset,
 	    old_map->entries_pageable);
 	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
 	new_pmap = &vm2->vm_pmap;	/* XXX */
 	new_map = &vm2->vm_map;	/* XXX */
 
 	old_entry = old_map->header.next;
 
 	while (old_entry != &old_map->header) {
 		if (old_entry->is_sub_map)
 			panic("vm_map_fork: encountered a submap");
 
 		switch (old_entry->inheritance) {
 		case VM_INHERIT_NONE:
 			break;
 
 		case VM_INHERIT_SHARE:
 			/*
 			 * Clone the entry, referencing the sharing map.
 			 */
 			new_entry = vm_map_entry_create(new_map);
 			*new_entry = *old_entry;
 			new_entry->wired_count = 0;
 			object = new_entry->object.vm_object;
 			++object->ref_count;
 
 			/*
 			 * Insert the entry into the new map -- we know we're
 			 * inserting at the end of the new map.
 			 */
 
 			vm_map_entry_link(new_map, new_map->header.prev,
 			    new_entry);
 
 			/*
 			 * Update the physical map
 			 */
 
 			pmap_copy(new_map->pmap, old_map->pmap,
 			    new_entry->start,
 			    (old_entry->end - old_entry->start),
 			    old_entry->start);
 			break;
 
 		case VM_INHERIT_COPY:
 			/*
 			 * Clone the entry and link into the map.
 			 */
 			new_entry = vm_map_entry_create(new_map);
 			*new_entry = *old_entry;
 			new_entry->wired_count = 0;
 			new_entry->object.vm_object = NULL;
 			new_entry->is_a_map = FALSE;
 			vm_map_entry_link(new_map, new_map->header.prev,
 			    new_entry);
 			vm_map_copy_entry(old_map, new_map, old_entry,
 			    new_entry);
 			break;
 		}
 		old_entry = old_entry->next;
 	}
 
 	new_map->size = old_map->size;
 	vm_map_unlock(old_map);
 
 	return (vm2);
 }
 
 /*
  *	vm_map_lookup:
  *
  *	Finds the VM object, offset, and
  *	protection for a given virtual address in the
  *	specified map, assuming a page fault of the
  *	type specified.
  *
  *	Leaves the map in question locked for read; return
  *	values are guaranteed until a vm_map_lookup_done
  *	call is performed.  Note that the map argument
  *	is in/out; the returned map must be used in
  *	the call to vm_map_lookup_done.
  *
  *	A handle (out_entry) is returned for use in
  *	vm_map_lookup_done, to make that fast.
  *
  *	If a lookup is requested with "write protection"
  *	specified, the map may be changed to perform virtual
  *	copying operations, although the data referenced will
  *	remain the same.
  */
 int
 vm_map_lookup(var_map, vaddr, fault_type, out_entry,
     object, pindex, out_prot, wired, single_use)
 	vm_map_t *var_map;	/* IN/OUT */
 	register vm_offset_t vaddr;
 	register vm_prot_t fault_type;
 
 	vm_map_entry_t *out_entry;	/* OUT */
 	vm_object_t *object;	/* OUT */
 	vm_pindex_t *pindex;	/* OUT */
 	vm_prot_t *out_prot;	/* OUT */
 	boolean_t *wired;	/* OUT */
 	boolean_t *single_use;	/* OUT */
 {
 	vm_map_t share_map;
 	vm_offset_t share_offset;
 	register vm_map_entry_t entry;
 	register vm_map_t map = *var_map;
 	register vm_prot_t prot;
 	register boolean_t su;
 
 RetryLookup:;
 
 	/*
 	 * Lookup the faulting address.
 	 */
 
 	vm_map_lock_read(map);
 
 #define	RETURN(why) \
 		{ \
 		vm_map_unlock_read(map); \
 		return(why); \
 		}
 
 	/*
 	 * If the map has an interesting hint, try it before calling full
 	 * blown lookup routine.
 	 */
 
 	entry = map->hint;
 
 	*out_entry = entry;
 
 	if ((entry == &map->header) ||
 	    (vaddr < entry->start) || (vaddr >= entry->end)) {
 		vm_map_entry_t tmp_entry;
 
 		/*
 		 * Entry was either not a valid hint, or the vaddr was not
 		 * contained in the entry, so do a full lookup.
 		 */
 		if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
 			RETURN(KERN_INVALID_ADDRESS);
 
 		entry = tmp_entry;
 		*out_entry = entry;
 	}
 	/*
 	 * Handle submaps.
 	 */
 
 	if (entry->is_sub_map) {
 		vm_map_t old_map = map;
 
 		*var_map = map = entry->object.sub_map;
 		vm_map_unlock_read(old_map);
 		goto RetryLookup;
 	}
 	/*
 	 * Check whether this task is allowed to have this page.
 	 */
 
 	prot = entry->protection;
 	if ((fault_type & (prot)) != fault_type)
 		RETURN(KERN_PROTECTION_FAILURE);
 
 	/*
 	 * If this page is not pageable, we have to get it for all possible
 	 * accesses.
 	 */
 
 	*wired = (entry->wired_count != 0);
 	if (*wired)
 		prot = fault_type = entry->protection;
 
 	/*
 	 * If we don't already have a VM object, track it down.
 	 */
 
 	su = !entry->is_a_map;
 	if (su) {
 		share_map = map;
 		share_offset = vaddr;
 	} else {
 		vm_map_entry_t share_entry;
 
 		/*
 		 * Compute the sharing map, and offset into it.
 		 */
 
 		share_map = entry->object.share_map;
 		share_offset = (vaddr - entry->start) + entry->offset;
 
 		/*
 		 * Look for the backing store object and offset
 		 */
 
 		vm_map_lock_read(share_map);
 
 		if (!vm_map_lookup_entry(share_map, share_offset,
 			&share_entry)) {
 			vm_map_unlock_read(share_map);
 			RETURN(KERN_INVALID_ADDRESS);
 		}
 		entry = share_entry;
 	}
 
 	/*
 	 * If the entry was copy-on-write, we either ...
 	 */
 
 	if (entry->needs_copy) {
 		/*
 		 * If we want to write the page, we may as well handle that
 		 * now since we've got the sharing map locked.
 		 *
 		 * If we don't need to write the page, we just demote the
 		 * permissions allowed.
 		 */
 
 		if (fault_type & VM_PROT_WRITE) {
 			/*
 			 * Make a new object, and place it in the object
 			 * chain.  Note that no new references have appeared
 			 * -- one just moved from the share map to the new
 			 * object.
 			 */
 
 			if (lock_read_to_write(&share_map->lock)) {
 				if (share_map != map)
 					vm_map_unlock_read(map);
 				goto RetryLookup;
 			}
 			vm_object_shadow(
 			    &entry->object.vm_object,
 			    &entry->offset,
 			    OFF_TO_IDX(entry->end - entry->start));
 
 			entry->needs_copy = FALSE;
 
 			lock_write_to_read(&share_map->lock);
 		} else {
 			/*
 			 * We're attempting to read a copy-on-write page --
 			 * don't allow writes.
 			 */
 
 			prot &= (~VM_PROT_WRITE);
 		}
 	}
 	/*
 	 * Create an object if necessary.
 	 */
 	if (entry->object.vm_object == NULL) {
 
 		if (lock_read_to_write(&share_map->lock)) {
 			if (share_map != map)
 				vm_map_unlock_read(map);
 			goto RetryLookup;
 		}
 		entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
 		    OFF_TO_IDX(entry->end - entry->start));
 		entry->offset = 0;
 		lock_write_to_read(&share_map->lock);
 	}
 
-	default_pager_convert_to_swapq(entry->object.vm_object);
-
+	if (entry->object.vm_object != NULL)
+		default_pager_convert_to_swapq(entry->object.vm_object);
 	/*
 	 * Return the object/offset from this entry.  If the entry was
 	 * copy-on-write or empty, it has been fixed up.
 	 */
 
 	*pindex = OFF_TO_IDX((share_offset - entry->start) + entry->offset);
 	*object = entry->object.vm_object;
 
 	/*
 	 * Return whether this is the only map sharing this data.
 	 */
 
 	if (!su) {
 		su = (share_map->ref_count == 1);
 	}
 	*out_prot = prot;
 	*single_use = su;
 
 	return (KERN_SUCCESS);
 
 #undef	RETURN
 }
 
 /*
  *	vm_map_lookup_done:
  *
  *	Releases locks acquired by a vm_map_lookup
  *	(according to the handle returned by that lookup).
  */
 
 void
 vm_map_lookup_done(map, entry)
 	register vm_map_t map;
 	vm_map_entry_t entry;
 {
 	/*
 	 * If this entry references a map, unlock it first.
 	 */
 
 	if (entry->is_a_map)
 		vm_map_unlock_read(entry->object.share_map);
 
 	/*
 	 * Unlock the main-level map
 	 */
 
 	vm_map_unlock_read(map);
 }
 
 /*
  *	Routine:	vm_map_simplify
  *	Purpose:
  *		Attempt to simplify the map representation in
  *		the vicinity of the given starting address.
  *	Note:
  *		This routine is intended primarily to keep the
  *		kernel maps more compact -- they generally don't
  *		benefit from the "expand a map entry" technology
  *		at allocation time because the adjacent entry
  *		is often wired down.
  */
 void
 vm_map_simplify(map, start)
 	vm_map_t map;
 	vm_offset_t start;
 {
 	vm_map_entry_t this_entry;
 	vm_map_entry_t prev_entry;
 
 	vm_map_lock(map);
 	if ((vm_map_lookup_entry(map, start, &this_entry)) &&
 	    ((prev_entry = this_entry->prev) != &map->header) &&
 	    (prev_entry->end == start) &&
 	    (prev_entry->object.vm_object == this_entry->object.vm_object) &&
 	    ((prev_entry->offset + (prev_entry->end - prev_entry->start))
 		== this_entry->offset) &&
 
 	    (map->is_main_map) &&
 
 	    (prev_entry->is_a_map == FALSE) &&
 	    (prev_entry->is_sub_map == FALSE) &&
 
 	    (this_entry->is_a_map == FALSE) &&
 	    (this_entry->is_sub_map == FALSE) &&
 
 	    (prev_entry->inheritance == this_entry->inheritance) &&
 	    (prev_entry->protection == this_entry->protection) &&
 	    (prev_entry->max_protection == this_entry->max_protection) &&
 	    (prev_entry->wired_count == this_entry->wired_count) &&
 
 	    (prev_entry->copy_on_write == this_entry->copy_on_write) &&
 	    (prev_entry->needs_copy == this_entry->needs_copy)) {
 		if (map->first_free == this_entry)
 			map->first_free = prev_entry;
 		if (map->hint == this_entry)
 			SAVE_HINT(map, prev_entry);
 		vm_map_entry_unlink(map, this_entry);
 		prev_entry->end = this_entry->end;
 		if (this_entry->object.vm_object)
 			vm_object_deallocate(this_entry->object.vm_object);
 		vm_map_entry_dispose(map, this_entry);
 	}
 	vm_map_unlock(map);
 }
 
 #ifdef DDB
 /*
  *	vm_map_print:	[ debug ]
  */
 void
 vm_map_print(imap, full, dummy3, dummy4)
 	/* db_expr_t */ int imap;
 	boolean_t full;
 	/* db_expr_t */ int dummy3;
 	char *dummy4;
 {
 	register vm_map_entry_t entry;
 	register vm_map_t map = (vm_map_t)imap;	/* XXX */
 
 	iprintf("%s map 0x%x: pmap=0x%x,ref=%d,nentries=%d,version=%d\n",
 	    (map->is_main_map ? "Task" : "Share"),
 	    (int) map, (int) (map->pmap), map->ref_count, map->nentries,
 	    map->timestamp);
 
 	if (!full && indent)
 		return;
 
 	indent += 2;
 	for (entry = map->header.next; entry != &map->header;
 	    entry = entry->next) {
 		iprintf("map entry 0x%x: start=0x%x, end=0x%x, ",
 		    (int) entry, (int) entry->start, (int) entry->end);
 		if (map->is_main_map) {
 			static char *inheritance_name[4] =
 			{"share", "copy", "none", "donate_copy"};
 
 			printf("prot=%x/%x/%s, ",
 			    entry->protection,
 			    entry->max_protection,
 			    inheritance_name[entry->inheritance]);
 			if (entry->wired_count != 0)
 				printf("wired, ");
 		}
 		if (entry->is_a_map || entry->is_sub_map) {
 			printf("share=0x%x, offset=0x%x\n",
 			    (int) entry->object.share_map,
 			    (int) entry->offset);
 			if ((entry->prev == &map->header) ||
 			    (!entry->prev->is_a_map) ||
 			    (entry->prev->object.share_map !=
 				entry->object.share_map)) {
 				indent += 2;
 				vm_map_print((int)entry->object.share_map,
 					     full, 0, (char *)0);
 				indent -= 2;
 			}
 		} else {
 			printf("object=0x%x, offset=0x%x",
 			    (int) entry->object.vm_object,
 			    (int) entry->offset);
 			if (entry->copy_on_write)
 				printf(", copy (%s)",
 				    entry->needs_copy ? "needed" : "done");
 			printf("\n");
 
 			if ((entry->prev == &map->header) ||
 			    (entry->prev->is_a_map) ||
 			    (entry->prev->object.vm_object !=
 				entry->object.vm_object)) {
 				indent += 2;
 				vm_object_print((int)entry->object.vm_object,
 						full, 0, (char *)0);
 				indent -= 2;
 			}
 		}
 	}
 	indent -= 2;
 }
 #endif
Index: head/sys/vm/vm_map.h
===================================================================
--- head/sys/vm/vm_map.h	(revision 17333)
+++ head/sys/vm/vm_map.h	(revision 17334)
@@ -1,243 +1,242 @@
 /*
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_map.h	8.3 (Berkeley) 3/15/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
- * $Id: vm_map.h,v 1.13 1996/05/19 07:36:48 dyson Exp $
+ * $Id: vm_map.h,v 1.14 1996/07/27 03:23:59 dyson Exp $
  */
 
 /*
  *	Virtual memory map module definitions.
  */
 
 #ifndef	_VM_MAP_
 #define	_VM_MAP_
 
 /*
  *	Types defined:
  *
  *	vm_map_t		the high-level address map data structure.
  *	vm_map_entry_t		an entry in an address map.
  *	vm_map_version_t	a timestamp of a map, for use with vm_map_lookup
  */
 
 /*
  *	Objects which live in maps may be either VM objects, or
  *	another map (called a "sharing map") which denotes read-write
  *	sharing with other maps.
  */
 
 union vm_map_object {
 	struct vm_object *vm_object;	/* object object */
 	struct vm_map *share_map;	/* share map */
 	struct vm_map *sub_map;		/* belongs to another map */
 };
 
 /*
  *	Address map entries consist of start and end addresses,
  *	a VM object (or sharing map) and offset into that object,
  *	and user-exported inheritance and protection information.
  *	Also included is control information for virtual copy operations.
  */
 struct vm_map_entry {
 	struct vm_map_entry *prev;	/* previous entry */
 	struct vm_map_entry *next;	/* next entry */
 	vm_offset_t start;		/* start address */
 	vm_offset_t end;		/* end address */
 	union vm_map_object object;	/* object I point to */
 	vm_ooffset_t offset;		/* offset into object */
 	boolean_t is_a_map:1,		/* Is "object" a map? */
 	 is_sub_map:1,			/* Is "object" a submap? */
 	/* Only in sharing maps: */
 	 copy_on_write:1,		/* is data copy-on-write */
 	 needs_copy:1;			/* does object need to be copied */
 	/* Only in task maps: */
 	vm_prot_t protection;		/* protection code */
 	vm_prot_t max_protection;	/* maximum protection */
 	vm_inherit_t inheritance;	/* inheritance */
 	int wired_count;		/* can be paged if = 0 */
 };
 
 /*
  *	Maps are doubly-linked lists of map entries, kept sorted
  *	by address.  A single hint is provided to start
  *	searches again from the last successful search,
  *	insertion, or removal.
  */
 struct vm_map {
 	struct pmap *pmap;		/* Physical map */
 	lock_data_t lock;		/* Lock for map data */
 	struct vm_map_entry header;	/* List of entries */
 	int nentries;			/* Number of entries */
 	vm_size_t size;			/* virtual size */
 	boolean_t is_main_map;		/* Am I a main map? */
 	int ref_count;			/* Reference count */
 	vm_map_entry_t hint;		/* hint for quick lookups */
 	vm_map_entry_t first_free;	/* First free space hint */
 	boolean_t entries_pageable;	/* map entries pageable?? */
 	unsigned int timestamp;		/* Version number */
 #define	min_offset		header.start
 #define max_offset		header.end
 };
 
 /* 
  * Shareable process virtual address space.
  * May eventually be merged with vm_map.
  * Several fields are temporary (text, data stuff).
  */
 struct vmspace {
 	struct vm_map vm_map;	/* VM address map */
 	struct pmap vm_pmap;	/* private physical map */
 	int vm_refcnt;		/* number of references */
 	caddr_t vm_shm;		/* SYS5 shared memory private data XXX */
 	vm_object_t vm_upages_obj;	/* UPAGES object */
 /* we copy from vm_startcopy to the end of the structure on fork */
 #define vm_startcopy vm_rssize
 	segsz_t vm_rssize;	/* current resident set size in pages */
 	segsz_t vm_swrss;	/* resident set size before last swap */
 	segsz_t vm_tsize;	/* text size (pages) XXX */
 	segsz_t vm_dsize;	/* data size (pages) XXX */
 	segsz_t vm_ssize;	/* stack size (pages) */
 	caddr_t vm_taddr;	/* user virtual address of text XXX */
 	caddr_t vm_daddr;	/* user virtual address of data XXX */
 	caddr_t vm_maxsaddr;	/* user VA at max stack growth */
 	caddr_t vm_minsaddr;	/* user VA at max stack growth */
 };
 
 
 /*
  *	Map versions are used to validate a previous lookup attempt.
  *
  *	Since lookup operations may involve both a main map and
  *	a sharing map, it is necessary to have a timestamp from each.
  *	[If the main map timestamp has changed, the share_map and
  *	associated timestamp are no longer valid; the map version
  *	does not include a reference for the embedded share_map.]
  */
 typedef struct {
 	int main_timestamp;
 	vm_map_t share_map;
 	int share_timestamp;
 } vm_map_version_t;
 
 /*
  *	Macros:		vm_map_lock, etc.
  *	Function:
  *		Perform locking on the data portion of a map.
  */
 
 #define	vm_map_lock(map) { \
 	lock_write(&(map)->lock); \
 	(map)->timestamp++; \
 }
 #define	vm_map_unlock(map)	lock_write_done(&(map)->lock)
 #define	vm_map_lock_read(map)	lock_read(&(map)->lock)
 #define	vm_map_unlock_read(map)	lock_read_done(&(map)->lock)
 
 /*
  *	Functions implemented as macros
  */
 #define		vm_map_min(map)		((map)->min_offset)
 #define		vm_map_max(map)		((map)->max_offset)
 #define		vm_map_pmap(map)	((map)->pmap)
 
 /* XXX: number of kernel maps and entries to statically allocate */
 #define MAX_KMAP	10
 #define	MAX_KMAPENT	128
 
 /*
  * Copy-on-write flags for vm_map operations
  */
 #define MAP_COPY_NEEDED 0x1
 #define MAP_COPY_ON_WRITE 0x2
 
 #ifdef KERNEL
 extern vm_offset_t kentry_data;
 extern vm_size_t kentry_data_size;
 
 boolean_t vm_map_check_protection __P((vm_map_t, vm_offset_t, vm_offset_t, vm_prot_t));
 int vm_map_copy __P((vm_map_t, vm_map_t, vm_offset_t, vm_size_t, vm_offset_t, boolean_t, boolean_t));
 struct pmap;
 vm_map_t vm_map_create __P((struct pmap *, vm_offset_t, vm_offset_t, boolean_t));
 void vm_map_deallocate __P((vm_map_t));
 int vm_map_delete __P((vm_map_t, vm_offset_t, vm_offset_t));
 int vm_map_find __P((vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t, boolean_t, vm_prot_t, vm_prot_t, int));
 int vm_map_findspace __P((vm_map_t, vm_offset_t, vm_size_t, vm_offset_t *));
 int vm_map_inherit __P((vm_map_t, vm_offset_t, vm_offset_t, vm_inherit_t));
 void vm_map_init __P((struct vm_map *, vm_offset_t, vm_offset_t, boolean_t));
 int vm_map_insert __P((vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t, vm_offset_t, vm_prot_t, vm_prot_t, int));
 int vm_map_lookup __P((vm_map_t *, vm_offset_t, vm_prot_t, vm_map_entry_t *, vm_object_t *,
     vm_pindex_t *, vm_prot_t *, boolean_t *, boolean_t *));
 void vm_map_lookup_done __P((vm_map_t, vm_map_entry_t));
 boolean_t vm_map_lookup_entry __P((vm_map_t, vm_offset_t, vm_map_entry_t *));
 int vm_map_pageable __P((vm_map_t, vm_offset_t, vm_offset_t, boolean_t));
 int vm_map_clean __P((vm_map_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t));
 int vm_map_protect __P((vm_map_t, vm_offset_t, vm_offset_t, vm_prot_t, boolean_t));
 void vm_map_reference __P((vm_map_t));
 int vm_map_remove __P((vm_map_t, vm_offset_t, vm_offset_t));
-void vm_map_remove_userspace __P((vm_map_t));
 void vm_map_simplify __P((vm_map_t, vm_offset_t));
 void vm_map_startup __P((void));
 int vm_map_submap __P((vm_map_t, vm_offset_t, vm_offset_t, vm_map_t));
 void vm_map_madvise __P((vm_map_t, pmap_t, vm_offset_t, vm_offset_t, int));
 
 #endif
 #endif				/* _VM_MAP_ */
Index: head/sys/vm/vm_mmap.c
===================================================================
--- head/sys/vm/vm_mmap.c	(revision 17333)
+++ head/sys/vm/vm_mmap.c	(revision 17334)
@@ -1,1150 +1,987 @@
 /*
  * Copyright (c) 1988 University of Utah.
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
  *
  *	@(#)vm_mmap.c	8.4 (Berkeley) 1/12/94
- * $Id: vm_mmap.c,v 1.47 1996/07/27 17:21:41 dyson Exp $
+ * $Id: vm_mmap.c,v 1.48 1996/07/28 02:54:09 davidg Exp $
  */
 
 /*
  * Mapped file (mmap) interface to VM
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/filedesc.h>
 #include <sys/resourcevar.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/file.h>
 #include <sys/mman.h>
 #include <sys/conf.h>
 #include <sys/vmmeter.h>
 
 #include <miscfs/specfs/specdev.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_inherit.h>
 #include <vm/lock.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
-#include <vm/default_pager.h>
 
 #ifndef _SYS_SYSPROTO_H_
 struct sbrk_args {
 	int incr;
 };
 #endif
 
 /* ARGSUSED */
 int
 sbrk(p, uap, retval)
 	struct proc *p;
 	struct sbrk_args *uap;
 	int *retval;
 {
 
 	/* Not yet implemented */
 	return (EOPNOTSUPP);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct sstk_args {
 	int incr;
 };
 #endif
 
 /* ARGSUSED */
 int
 sstk(p, uap, retval)
 	struct proc *p;
 	struct sstk_args *uap;
 	int *retval;
 {
 
 	/* Not yet implemented */
 	return (EOPNOTSUPP);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 #ifndef _SYS_SYSPROTO_H_
 struct getpagesize_args {
 	int dummy;
 };
 #endif
 
 /* ARGSUSED */
 int
 ogetpagesize(p, uap, retval)
 	struct proc *p;
 	struct getpagesize_args *uap;
 	int *retval;
 {
 
 	*retval = PAGE_SIZE;
 	return (0);
 }
 #endif				/* COMPAT_43 || COMPAT_SUNOS */
 
 #ifndef _SYS_SYSPROTO_H_
 struct mmap_args {
 	caddr_t addr;
 	size_t len;
 	int prot;
 	int flags;
 	int fd;
 	long pad;
 	off_t pos;
 };
 #endif
 
 int
 mmap(p, uap, retval)
 	struct proc *p;
 	register struct mmap_args *uap;
 	int *retval;
 {
 	register struct filedesc *fdp = p->p_fd;
 	register struct file *fp;
 	struct vnode *vp;
 	vm_offset_t addr;
 	vm_size_t size, pageoff;
 	vm_prot_t prot, maxprot;
 	caddr_t handle;
 	int flags, error;
 
 	prot = uap->prot & VM_PROT_ALL;
 	flags = uap->flags;
 	/*
 	 * Address (if FIXED) must be page aligned. Size is implicitly rounded
 	 * to a page boundary.
 	 */
 	addr = (vm_offset_t) uap->addr;
 	if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
 	    (ssize_t) uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
 		return (EINVAL);
 
 	/*
 	 * Round page if not already disallowed by above test
 	 * XXX: Is there any point in the MAP_FIXED align requirement above?
 	 */
 	size = uap->len;
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
 	size = (vm_size_t) round_page(size);
 
 	/*
 	 * Check for illegal addresses.  Watch out for address wrap... Note
 	 * that VM_*_ADDRESS are not constants due to casts (argh).
 	 */
 	if (flags & MAP_FIXED) {
 		if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
 			return (EINVAL);
 #ifndef i386
 		if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
 			return (EINVAL);
 #endif
 		if (addr + size < addr)
 			return (EINVAL);
 	}
 	/*
 	 * XXX if no hint provided for a non-fixed mapping place it after the
 	 * end of the largest possible heap.
 	 *
 	 * There should really be a pmap call to determine a reasonable location.
 	 */
 	if (addr == 0 && (flags & MAP_FIXED) == 0)
 		addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
 	if (flags & MAP_ANON) {
 		/*
 		 * Mapping blank space is trivial.
 		 */
 		handle = NULL;
 		maxprot = VM_PROT_ALL;
 	} else {
 		/*
 		 * Mapping file, get fp for validation. Obtain vnode and make
 		 * sure it is of appropriate type.
 		 */
 		if (((unsigned) uap->fd) >= fdp->fd_nfiles ||
 		    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
 			return (EBADF);
 		if (fp->f_type != DTYPE_VNODE)
 			return (EINVAL);
 		vp = (struct vnode *) fp->f_data;
 		if (vp->v_type != VREG && vp->v_type != VCHR)
 			return (EINVAL);
 		/*
 		 * XXX hack to handle use of /dev/zero to map anon memory (ala
 		 * SunOS).
 		 */
 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
 			handle = NULL;
 			maxprot = VM_PROT_ALL;
 			flags |= MAP_ANON;
 		} else {
 			/*
 			 * Ensure that file and memory protections are
 			 * compatible.  Note that we only worry about
 			 * writability if mapping is shared; in this case,
 			 * current and max prot are dictated by the open file.
 			 * XXX use the vnode instead?  Problem is: what
 			 * credentials do we use for determination? What if
 			 * proc does a setuid?
 			 */
 			maxprot = VM_PROT_EXECUTE;	/* ??? */
 			if (fp->f_flag & FREAD)
 				maxprot |= VM_PROT_READ;
 			else if (prot & PROT_READ)
 				return (EACCES);
 			if (flags & MAP_SHARED) {
 				if (fp->f_flag & FWRITE)
 					maxprot |= VM_PROT_WRITE;
 				else if (prot & PROT_WRITE)
 					return (EACCES);
 			} else
 				maxprot |= VM_PROT_WRITE;
 			handle = (caddr_t) vp;
 		}
 	}
 	error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
 	    flags, handle, uap->pos);
 	if (error == 0)
 		*retval = (int) addr;
 	return (error);
 }
 
 #ifdef COMPAT_43
 #ifndef _SYS_SYSPROTO_H_
 struct ommap_args {
 	caddr_t addr;
 	int len;
 	int prot;
 	int flags;
 	int fd;
 	long pos;
 };
 #endif
 int
 ommap(p, uap, retval)
 	struct proc *p;
 	register struct ommap_args *uap;
 	int *retval;
 {
 	struct mmap_args nargs;
 	static const char cvtbsdprot[8] = {
 		0,
 		PROT_EXEC,
 		PROT_WRITE,
 		PROT_EXEC | PROT_WRITE,
 		PROT_READ,
 		PROT_EXEC | PROT_READ,
 		PROT_WRITE | PROT_READ,
 		PROT_EXEC | PROT_WRITE | PROT_READ,
 	};
 
 #define	OMAP_ANON	0x0002
 #define	OMAP_COPY	0x0020
 #define	OMAP_SHARED	0x0010
 #define	OMAP_FIXED	0x0100
 #define	OMAP_INHERIT	0x0800
 
 	nargs.addr = uap->addr;
 	nargs.len = uap->len;
 	nargs.prot = cvtbsdprot[uap->prot & 0x7];
 	nargs.flags = 0;
 	if (uap->flags & OMAP_ANON)
 		nargs.flags |= MAP_ANON;
 	if (uap->flags & OMAP_COPY)
 		nargs.flags |= MAP_COPY;
 	if (uap->flags & OMAP_SHARED)
 		nargs.flags |= MAP_SHARED;
 	else
 		nargs.flags |= MAP_PRIVATE;
 	if (uap->flags & OMAP_FIXED)
 		nargs.flags |= MAP_FIXED;
 	if (uap->flags & OMAP_INHERIT)
 		nargs.flags |= MAP_INHERIT;
 	nargs.fd = uap->fd;
 	nargs.pos = uap->pos;
 	return (mmap(p, &nargs, retval));
 }
 #endif				/* COMPAT_43 */
 
 
 #ifndef _SYS_SYSPROTO_H_
 struct msync_args {
 	caddr_t addr;
 	int len;
 	int flags;
 };
 #endif
 int
 msync(p, uap, retval)
 	struct proc *p;
 	struct msync_args *uap;
 	int *retval;
 {
 	vm_offset_t addr;
 	vm_size_t size, pageoff;
 	int flags;
 	vm_map_t map;
 	int rv;
 
 	addr = (vm_offset_t) uap->addr;
 	size = uap->len;
 	flags = uap->flags;
 
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
 	size = (vm_size_t) round_page(size);
 	if (addr + size < addr)
 		return(EINVAL);
 
 	if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE))
 		return (EINVAL);
 
 	map = &p->p_vmspace->vm_map;
 
 	/*
 	 * XXX Gak!  If size is zero we are supposed to sync "all modified
 	 * pages with the region containing addr".  Unfortunately, we don't
 	 * really keep track of individual mmaps so we approximate by flushing
 	 * the range of the map entry containing addr. This can be incorrect
 	 * if the region splits or is coalesced with a neighbor.
 	 */
 	if (size == 0) {
 		vm_map_entry_t entry;
 
 		vm_map_lock_read(map);
 		rv = vm_map_lookup_entry(map, addr, &entry);
 		vm_map_unlock_read(map);
 		if (rv == FALSE)
 			return (EINVAL);
 		addr = entry->start;
 		size = entry->end - entry->start;
 	}
 
 	/*
 	 * Clean the pages and interpret the return value.
 	 */
 	rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0,
 	    (flags & MS_INVALIDATE) != 0);
 
 	switch (rv) {
 	case KERN_SUCCESS:
 		break;
 	case KERN_INVALID_ADDRESS:
 		return (EINVAL);	/* Sun returns ENOMEM? */
 	case KERN_FAILURE:
 		return (EIO);
 	default:
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct munmap_args {
 	caddr_t addr;
 	size_t len;
 };
 #endif
 int
 munmap(p, uap, retval)
 	register struct proc *p;
 	register struct munmap_args *uap;
 	int *retval;
 {
 	vm_offset_t addr;
 	vm_size_t size, pageoff;
 	vm_map_t map;
 
 	addr = (vm_offset_t) uap->addr;
 	size = uap->len;
 
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
 	size = (vm_size_t) round_page(size);
 	if (addr + size < addr)
 		return(EINVAL);
 
 	if (size == 0)
 		return (0);
 
 	/*
 	 * Check for illegal addresses.  Watch out for address wrap... Note
 	 * that VM_*_ADDRESS are not constants due to casts (argh).
 	 */
 	if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
 		return (EINVAL);
 #ifndef i386
 	if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
 		return (EINVAL);
 #endif
 	if (addr + size < addr)
 		return (EINVAL);
 	map = &p->p_vmspace->vm_map;
 	/*
 	 * Make sure entire range is allocated.
 	 */
 	if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
 		return (EINVAL);
 	/* returns nothing but KERN_SUCCESS anyway */
 	(void) vm_map_remove(map, addr, addr + size);
 	return (0);
 }
 
 void
 munmapfd(p, fd)
 	struct proc *p;
 	int fd;
 {
 	/*
 	 * XXX should unmap any regions mapped to this file
 	 */
 	p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct mprotect_args {
 	caddr_t addr;
 	size_t len;
 	int prot;
 };
 #endif
 int
 mprotect(p, uap, retval)
 	struct proc *p;
 	struct mprotect_args *uap;
 	int *retval;
 {
 	vm_offset_t addr;
 	vm_size_t size, pageoff;
 	register vm_prot_t prot;
 
 	addr = (vm_offset_t) uap->addr;
 	size = uap->len;
 	prot = uap->prot & VM_PROT_ALL;
 
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
 	size = (vm_size_t) round_page(size);
 	if (addr + size < addr)
 		return(EINVAL);
 
 	switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
 		FALSE)) {
 	case KERN_SUCCESS:
 		return (0);
 	case KERN_PROTECTION_FAILURE:
 		return (EACCES);
 	}
 	return (EINVAL);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct minherit_args {
 	caddr_t addr;
 	size_t len;
 	int inherit;
 };
 #endif
 int
 minherit(p, uap, retval)
 	struct proc *p;
 	struct minherit_args *uap;
 	int *retval;
 {
 	vm_offset_t addr;
 	vm_size_t size, pageoff;
 	register vm_inherit_t inherit;
 
 	addr = (vm_offset_t)uap->addr;
 	size = uap->len;
 	inherit = uap->inherit;
 
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
 	size = (vm_size_t) round_page(size);
 	if (addr + size < addr)
 		return(EINVAL);
 
 	switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
 	    inherit)) {
 	case KERN_SUCCESS:
 		return (0);
 	case KERN_PROTECTION_FAILURE:
 		return (EACCES);
 	}
 	return (EINVAL);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct madvise_args {
 	caddr_t addr;
 	size_t len;
 	int behav;
 };
 #endif
 
 /* ARGSUSED */
 int
 madvise(p, uap, retval)
 	struct proc *p;
 	struct madvise_args *uap;
 	int *retval;
 {
 	vm_map_t map;
 	pmap_t pmap;
 	vm_offset_t start, end;
 	/*
 	 * Check for illegal addresses.  Watch out for address wrap... Note
 	 * that VM_*_ADDRESS are not constants due to casts (argh).
 	 */
 	if (VM_MAXUSER_ADDRESS > 0 &&
 		((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS)
 		return (EINVAL);
 #ifndef i386
 	if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS)
 		return (EINVAL);
 #endif
 	if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr)
 		return (EINVAL);
 
 	/*
 	 * Since this routine is only advisory, we default to conservative
 	 * behavior.
 	 */
 	start = round_page((vm_offset_t) uap->addr);
 	end = trunc_page((vm_offset_t) uap->addr + uap->len);
 	
 	map = &p->p_vmspace->vm_map;
 	pmap = &p->p_vmspace->vm_pmap;
 
 	vm_map_madvise(map, pmap, start, end, uap->behav);
 
 	/* Not yet implemented */
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct mincore_args {
 	caddr_t addr;
 	size_t len;
 	char *vec;
 };
 #endif
 
 /* ARGSUSED */
 int
 mincore(p, uap, retval)
 	struct proc *p;
 	struct mincore_args *uap;
 	int *retval;
 {
 	vm_offset_t addr, first_addr;
 	vm_offset_t end, cend;
 	pmap_t pmap;
 	vm_map_t map;
 	char *vec;
 	int error;
 	int vecindex, lastvecindex;
 	register vm_map_entry_t current;
 	vm_map_entry_t entry;
 	int mincoreinfo;
 
 	/*
 	 * Make sure that the addresses presented are valid for user
 	 * mode.
 	 */
 	first_addr = addr = trunc_page((vm_offset_t) uap->addr);
 	end = addr + (vm_size_t)round_page(uap->len);
 	if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS)
 		return (EINVAL);
 	if (end < addr)
 		return (EINVAL);
 
 	/*
 	 * Address of byte vector
 	 */
 	vec = uap->vec;
 
 	map = &p->p_vmspace->vm_map;
 	pmap = &p->p_vmspace->vm_pmap;
 
 	vm_map_lock(map);
 
 	/*
 	 * Not needed here
 	 */
 #if 0
 	VM_MAP_RANGE_CHECK(map, addr, end);
 #endif
 
 	if (!vm_map_lookup_entry(map, addr, &entry))
 		entry = entry->next;
 
 	/*
 	 * Do this on a map entry basis so that if the pages are not
 	 * in the current processes address space, we can easily look
 	 * up the pages elsewhere.
 	 */
 	lastvecindex = -1;
 	for(current = entry;
 		(current != &map->header) && (current->start < end);
 		current = current->next) {
 
 		/*
 		 * ignore submaps (for now) or null objects
 		 */
 		if (current->is_a_map || current->is_sub_map ||
 			current->object.vm_object == NULL)
 			continue;
 		
 		/*
 		 * limit this scan to the current map entry and the
 		 * limits for the mincore call
 		 */
 		if (addr < current->start)
 			addr = current->start;
 		cend = current->end;
 		if (cend > end)
 			cend = end;
 
 		/*
 		 * scan this entry one page at a time
 		 */
 		while(addr < cend) {
 			/*
 			 * Check pmap first, it is likely faster, also
 			 * it can provide info as to whether we are the
 			 * one referencing or modifying the page.
 			 */
 			mincoreinfo = pmap_mincore(pmap, addr);
 			if (!mincoreinfo) {
 				vm_pindex_t pindex;
 				vm_ooffset_t offset;
 				vm_page_t m;
 				/*
 				 * calculate the page index into the object
 				 */
 				offset = current->offset + (addr - current->start);
 				pindex = OFF_TO_IDX(offset);
 				m = vm_page_lookup(current->object.vm_object,
 					pindex);
 				/*
 				 * if the page is resident, then gather information about
 				 * it.
 				 */
 				if (m) {
 					mincoreinfo = MINCORE_INCORE;
 					if (m->dirty ||
-						pmap_tc_modified(m))
+						pmap_is_modified(VM_PAGE_TO_PHYS(m)))
 						mincoreinfo |= MINCORE_MODIFIED_OTHER;
 					if ((m->flags & PG_REFERENCED) ||
-						pmap_tc_referenced(VM_PAGE_TO_PHYS(m)))
+						pmap_is_referenced(VM_PAGE_TO_PHYS(m)))
 						mincoreinfo |= MINCORE_REFERENCED_OTHER;
 				}
 			}
 
 			/*
 			 * calculate index into user supplied byte vector
 			 */
 			vecindex = OFF_TO_IDX(addr - first_addr);
 
 			/*
 			 * If we have skipped map entries, we need to make sure that
 			 * the byte vector is zeroed for those skipped entries.
 			 */
 			while((lastvecindex + 1) < vecindex) {
 				error = subyte( vec + lastvecindex, 0);
 				if (error) {
 					vm_map_unlock(map);
 					return (EFAULT);
 				}
 				++lastvecindex;
 			}
 
 			/*
 			 * Pass the page information to the user
 			 */
 			error = subyte( vec + vecindex, mincoreinfo);
 			if (error) {
 				vm_map_unlock(map);
 				return (EFAULT);
 			}
 			lastvecindex = vecindex;
 			addr += PAGE_SIZE;
 		}
 	}
 
 	/*
 	 * Zero the last entries in the byte vector.
 	 */
 	vecindex = OFF_TO_IDX(end - first_addr);
 	while((lastvecindex + 1) < vecindex) {
 		error = subyte( vec + lastvecindex, 0);
 		if (error) {
 			vm_map_unlock(map);
 			return (EFAULT);
 		}
 		++lastvecindex;
 	}
 	
 	vm_map_unlock(map);
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct mlock_args {
 	caddr_t addr;
 	size_t len;
 };
 #endif
 int
 mlock(p, uap, retval)
 	struct proc *p;
 	struct mlock_args *uap;
 	int *retval;
 {
 	vm_offset_t addr;
 	vm_size_t size, pageoff;
 	int error;
 
 	addr = (vm_offset_t) uap->addr;
 	size = uap->len;
 
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
 	size = (vm_size_t) round_page(size);
 
 	/* disable wrap around */
 	if (addr + size < addr)
 		return (EINVAL);
 
 	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
 		return (EAGAIN);
 
 #ifdef pmap_wired_count
 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
 	    p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
 		return (EAGAIN);
 #else
 	error = suser(p->p_ucred, &p->p_acflag);
 	if (error)
 		return (error);
 #endif
 
 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE);
 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct munlock_args {
 	caddr_t addr;
 	size_t len;
 };
 #endif
 int
 munlock(p, uap, retval)
 	struct proc *p;
 	struct munlock_args *uap;
 	int *retval;
 {
 	vm_offset_t addr;
 	vm_size_t size, pageoff;
 	int error;
 
 	addr = (vm_offset_t) uap->addr;
 	size = uap->len;
 
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
 	size = (vm_size_t) round_page(size);
 
 	/* disable wrap around */
 	if (addr + size < addr)
 		return (EINVAL);
 
 #ifndef pmap_wired_count
 	error = suser(p->p_ucred, &p->p_acflag);
 	if (error)
 		return (error);
 #endif
 
 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE);
 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
 }
 
 /*
  * Internal version of mmap.
  * Currently used by mmap, exec, and sys5 shared memory.
  * Handle is either a vnode pointer or NULL for MAP_ANON.
  */
 int
 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
 	register vm_map_t map;
 	register vm_offset_t *addr;
 	register vm_size_t size;
 	vm_prot_t prot, maxprot;
 	register int flags;
 	caddr_t handle;		/* XXX should be vp */
 	vm_ooffset_t foff;
 {
 	boolean_t fitit;
-	vm_object_t object;
+	vm_object_t object, object2;
 	struct vnode *vp = NULL;
 	objtype_t type;
 	int rv = KERN_SUCCESS;
 	vm_ooffset_t objsize;
 	int docow;
 	struct proc *p = curproc;
 
 	if (size == 0)
 		return (0);
 
 	objsize = size = round_page(size);
 
 	/*
 	 * We currently can only deal with page aligned file offsets.
 	 * The check is here rather than in the syscall because the
 	 * kernel calls this function internally for other mmaping
 	 * operations (such as in exec) and non-aligned offsets will
 	 * cause pmap inconsistencies...so we want to be sure to
 	 * disallow this in all cases.
 	 */
 	if (foff & PAGE_MASK)
 		return (EINVAL);
 
 	if ((flags & MAP_FIXED) == 0) {
 		fitit = TRUE;
 		*addr = round_page(*addr);
 	} else {
 		if (*addr != trunc_page(*addr))
 			return (EINVAL);
 		fitit = FALSE;
 		(void) vm_map_remove(map, *addr, *addr + size);
 	}
 
 	/*
 	 * Lookup/allocate object.
 	 */
 	if (flags & MAP_ANON) {
+		type = OBJT_SWAP;
 		/*
 		 * Unnamed anonymous regions always start at 0.
 		 */
-		if (handle == 0) {
+		if (handle == 0)
 			foff = 0;
-			type = OBJT_DEFAULT;
-		} else {
-			type = OBJT_SWAP;
-		}
 	} else {
 		vp = (struct vnode *) handle;
 		if (vp->v_type == VCHR) {
 			type = OBJT_DEVICE;
 			handle = (caddr_t) vp->v_rdev;
 		} else {
 			struct vattr vat;
 			int error;
 
 			error = VOP_GETATTR(vp, &vat, p->p_ucred, p);
 			if (error)
 				return (error);
 			objsize = round_page(vat.va_size);
 			type = OBJT_VNODE;
 		}
 	}
+	object = vm_pager_allocate(type, handle, OFF_TO_IDX(objsize), prot, foff);
+	if (object == NULL)
+		return (type == OBJT_DEVICE ? EINVAL : ENOMEM);
 
-	if (type != OBJT_DEFAULT) {
-		object = vm_pager_allocate(type, handle,
-			OFF_TO_IDX(objsize), prot, foff);
-		if (object == NULL)
-			return (type == OBJT_DEVICE ? EINVAL : ENOMEM);
-	} else {
-		object = NULL;
-	}
-
 	/*
 	 * Force device mappings to be shared.
 	 */
 	if (type == OBJT_DEVICE) {
 		flags &= ~(MAP_PRIVATE|MAP_COPY);
 		flags |= MAP_SHARED;
 	}
 
+	object2 = NULL;
 	docow = 0;
 	if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
-		docow = MAP_COPY_ON_WRITE|MAP_COPY_NEEDED;
+		docow = MAP_COPY_ON_WRITE;
+		if (objsize < size) {
+			object2 = vm_object_allocate( OBJT_DEFAULT,
+				OFF_TO_IDX(size - (foff & ~PAGE_MASK)));
+			object2->backing_object = object;
+			object2->backing_object_offset = foff;
+			TAILQ_INSERT_TAIL(&object->shadow_head,
+				object2, shadow_list);
+			++object->shadow_count;
+		} else {
+			docow |= MAP_COPY_NEEDED;
+		}
 	}
 
-	rv = vm_map_find(map, object, foff, addr, size, fitit,
-		prot, maxprot, docow);
+	if (object2)
+		rv = vm_map_find(map, object2, 0, addr, size, fitit,
+			prot, maxprot, docow);
+	else
+		rv = vm_map_find(map, object, foff, addr, size, fitit,
+			prot, maxprot, docow);
 
+
 	if (rv != KERN_SUCCESS) {
 		/*
 		 * Lose the object reference. Will destroy the
 		 * object if it's an unnamed anonymous mapping
 		 * or named anonymous without other references.
 		 */
-		vm_object_deallocate(object);
+		if (object2)
+			vm_object_deallocate(object2);
+		else
+			vm_object_deallocate(object);
 		goto out;
 	}
 
 	/*
 	 * "Pre-fault" resident pages.
 	 */
 	if ((type == OBJT_VNODE) && (map->pmap != NULL)) {
 		pmap_object_init_pt(map->pmap, *addr,
 			object, (vm_pindex_t) OFF_TO_IDX(foff), size, 1);
 	}
 
 	/*
 	 * Shared memory is also shared with children.
 	 */
 	if (flags & (MAP_SHARED|MAP_INHERIT)) {
 		rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE);
 		if (rv != KERN_SUCCESS) {
 			(void) vm_map_remove(map, *addr, *addr + size);
 			goto out;
 		}
 	}
 out:
 	switch (rv) {
 	case KERN_SUCCESS:
 		return (0);
 	case KERN_INVALID_ADDRESS:
 	case KERN_NO_SPACE:
 		return (ENOMEM);
 	case KERN_PROTECTION_FAILURE:
 		return (EACCES);
 	default:
 		return (EINVAL);
 	}
 }
-
-#ifdef notyet
-/*
- * Efficient mapping of a .text+.data+.bss object
- */
-int
-vm_mapaout(map, baseaddr, vp, foff, textsize, datasize, bsssize, addr)
-	vm_map_t map;
-	vm_offset_t baseaddr;
-	struct vnode *vp;
-	vm_ooffset_t foff;
-	register vm_size_t textsize, datasize, bsssize;
-	vm_offset_t *addr;
-{
-	vm_object_t object;
-	int rv;
-	vm_pindex_t objpsize;
-	struct proc *p = curproc;
-
-	vm_size_t totalsize;
-	vm_size_t textend;
-	struct vattr vat;
-	int error;
-	
-	textsize = round_page(textsize);
-	datasize = round_page(datasize);
-	bsssize = round_page(bsssize);
-	totalsize = textsize + datasize + bsssize;
-
-	vm_map_lock(map);
-	/*
-	 * If baseaddr == -1, then we need to search for space.  Otherwise,
-	 * we need to be loaded into a certain spot.
-	 */
-	if (baseaddr != (vm_offset_t) -1) {
-		if (vm_map_findspace(map, baseaddr, totalsize, addr)) {
-			goto outnomem;
-		}
-
-		if(*addr != baseaddr) {
-			goto outnomem;
-		}
-	} else {
-		baseaddr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
-		if (vm_map_findspace(map, baseaddr, totalsize, addr)) {
-			goto outnomem;
-		}
-	}
-
-	if (foff & PAGE_MASK) {
-		vm_map_unlock(map);
-		return EINVAL;
-	}
-
-	if ((vp->v_object != 0) &&
-		((((vm_object_t)vp->v_object)->flags & OBJ_DEAD) == 0)) {
-		object = vp->v_object;
-		vm_object_reference(object);
-	} else {
-		/*
-		 * get the object size to allocate
-		 */
-		error = VOP_GETATTR(vp, &vat, p->p_ucred, p);
-		if (error) {
-			vm_map_unlock(map);
-			return error;
-		}
-		objpsize = OFF_TO_IDX(round_page(vat.va_size));
-	/*
-	 * Alloc/reference the object
-	 */
-		object = vm_pager_allocate(OBJT_VNODE, vp,
-			   objpsize, VM_PROT_ALL, foff);
-		if (object == NULL) {
-			goto outnomem;
-		}
-	}
-
-	/*
-	 * Insert .text into the map
-	 */
-	textend = *addr + textsize;
-	rv = vm_map_insert(map, object, foff,
-		*addr, textend,
-		VM_PROT_READ|VM_PROT_EXECUTE, VM_PROT_ALL,
-		MAP_COPY_ON_WRITE|MAP_COPY_NEEDED);
-	if (rv != KERN_SUCCESS) {
-		vm_object_deallocate(object);
-		goto out;
-	}
-	
-	/*
-	 * Insert .data into the map, if there is any to map.
-	 */
-	if (datasize != 0) {
-		object->ref_count++;
-		rv = vm_map_insert(map, object, foff + textsize,
-			textend, textend + datasize,
-			VM_PROT_ALL, VM_PROT_ALL,
-			MAP_COPY_ON_WRITE|MAP_COPY_NEEDED);
-		if (rv != KERN_SUCCESS) {
-			--object->ref_count;
-			vm_map_delete(map, *addr, textend);
-			goto out;
-		}
-	}
-
-	/*
-	 * Preload the page tables
-	 */
-	pmap_object_init_pt(map->pmap, *addr,
-		object, (vm_pindex_t) OFF_TO_IDX(foff),
-		textsize + datasize, 1);
-
-	/*
-	 * Get the space for bss.
-	 */
-	if (bsssize != 0) {
-		rv = vm_map_insert(map, NULL, 0,
-			textend + datasize,
-			*addr + totalsize,
-			VM_PROT_ALL, VM_PROT_ALL, 0);
-	}
-	if (rv != KERN_SUCCESS) {
-		vm_map_delete(map, *addr, textend + datasize + bsssize);
-	}
-
-out:
-	vm_map_unlock(map);
-	switch (rv) {
-	case KERN_SUCCESS:
-		return 0;
-	case KERN_INVALID_ADDRESS:
-	case KERN_NO_SPACE:
-		return ENOMEM;
-	case KERN_PROTECTION_FAILURE:
-		return EACCES;
-	default:
-		return EINVAL;
-	}
-outnomem:
-	vm_map_unlock(map);
-	return ENOMEM;
-}
-
-
-int
-mapaout(struct proc *p, struct mapaout_args *uap, int *retval)
-{
-
-	register struct filedesc *fdp = p->p_fd;
-	struct file *fp;
-	struct vnode *vp;
-	int rtval;
-
-	if (((unsigned) uap->fd) >= fdp->fd_nfiles ||
-	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
-		return (EBADF);
-	if (fp->f_type != DTYPE_VNODE)
-		return (EINVAL);
-
-	vp = (struct vnode *) fp->f_data;
-	if ((vp->v_type != VREG) && (vp->v_type != VCHR))
-		return (EINVAL);
-
-	rtval = vm_mapaout( &p->p_vmspace->vm_map,
-		uap->addr, vp, uap->offset,
-		uap->textsize, uap->datasize, uap->bsssize,
-		(vm_offset_t *)retval);
-
-	return rtval;
-}
-#endif
Index: head/sys/vm/vm_object.c
===================================================================
--- head/sys/vm/vm_object.c	(revision 17333)
+++ head/sys/vm/vm_object.c	(revision 17334)
@@ -1,1482 +1,1481 @@
 /*
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_object.c	8.5 (Berkeley) 3/22/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
- * $Id: vm_object.c,v 1.76 1996/06/16 20:37:30 dyson Exp $
+ * $Id: vm_object.c,v 1.77 1996/07/27 03:24:03 dyson Exp $
  */
 
 /*
  *	Virtual memory object module.
  */
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>		/* for curproc, pageproc */
 #include <sys/malloc.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/vmmeter.h>
 #include <sys/mman.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/lock.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 
 #ifdef DDB
 static void	DDB_vm_object_check __P((void));
 #endif
 
 static void	_vm_object_allocate __P((objtype_t, vm_size_t, vm_object_t));
 #ifdef DDB
 static int	_vm_object_in_map __P((vm_map_t map, vm_object_t object,
 				       vm_map_entry_t entry));
 static int	vm_object_in_map __P((vm_object_t object));
 #endif
 static void	vm_object_qcollapse __P((vm_object_t object));
 #ifdef not_used
 static void	vm_object_deactivate_pages __P((vm_object_t));
 #endif
 static void	vm_object_terminate __P((vm_object_t));
 static void	vm_object_cache_trim __P((void));
 
 /*
  *	Virtual memory objects maintain the actual data
  *	associated with allocated virtual memory.  A given
  *	page of memory exists within exactly one object.
  *
  *	An object is only deallocated when all "references"
  *	are given up.  Only one "reference" to a given
  *	region of an object should be writeable.
  *
  *	Associated with each object is a list of all resident
  *	memory pages belonging to that object; this list is
  *	maintained by the "vm_page" module, and locked by the object's
  *	lock.
  *
  *	Each object also records a "pager" routine which is
  *	used to retrieve (and store) pages to the proper backing
  *	storage.  In addition, objects may be backed by other
  *	objects from which they were virtual-copied.
  *
  *	The only items within the object structure which are
  *	modified after time of creation are:
  *		reference count		locked by object's lock
  *		pager routine		locked by object's lock
  *
  */
 
 int vm_object_cache_max;
 struct object_q vm_object_cached_list;
 static int vm_object_cached;
 struct object_q vm_object_list;
 static long vm_object_count;
 vm_object_t kernel_object;
 vm_object_t kmem_object;
 static struct vm_object kernel_object_store;
 static struct vm_object kmem_object_store;
 extern int vm_pageout_page_count;
 
 static long object_collapses;
 static long object_bypasses;
 
 static void
 _vm_object_allocate(type, size, object)
 	objtype_t type;
 	vm_size_t size;
 	register vm_object_t object;
 {
 	TAILQ_INIT(&object->memq);
 	TAILQ_INIT(&object->shadow_head);
 
 	object->type = type;
 	object->size = size;
 	object->ref_count = 1;
 	object->flags = 0;
 	object->behavior = OBJ_NORMAL;
 	object->paging_in_progress = 0;
 	object->resident_page_count = 0;
 	object->shadow_count = 0;
 	object->handle = NULL;
 	object->paging_offset = (vm_ooffset_t) 0;
 	object->backing_object = NULL;
 	object->backing_object_offset = (vm_ooffset_t) 0;
 
 	object->last_read = 0;
 
 	TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
 	vm_object_count++;
 }
 
 /*
  *	vm_object_init:
  *
  *	Initialize the VM objects module.
  */
 void
 vm_object_init()
 {
 	TAILQ_INIT(&vm_object_cached_list);
 	TAILQ_INIT(&vm_object_list);
 	vm_object_count = 0;
 	
 	vm_object_cache_max = 84;
 	if (cnt.v_page_count > 1000)
 		vm_object_cache_max += (cnt.v_page_count - 1000) / 4;
 
 	kernel_object = &kernel_object_store;
 	_vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
 	    kernel_object);
 
 	kmem_object = &kmem_object_store;
 	_vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
 	    kmem_object);
 }
 
 /*
  *	vm_object_allocate:
  *
  *	Returns a new object with the given size.
  */
 
 vm_object_t
 vm_object_allocate(type, size)
 	objtype_t type;
 	vm_size_t size;
 {
 	register vm_object_t result;
 
 	result = (vm_object_t)
 	    malloc((u_long) sizeof *result, M_VMOBJ, M_WAITOK);
 
+
 	_vm_object_allocate(type, size, result);
 
 	return (result);
 }
 
 
 /*
  *	vm_object_reference:
  *
  *	Gets another reference to the given object.
  */
-void
+inline void
 vm_object_reference(object)
 	register vm_object_t object;
 {
 	if (object == NULL)
 		return;
 
 	if (object->ref_count == 0) {
 		if ((object->flags & OBJ_CANPERSIST) == 0)
 			panic("vm_object_reference: non-persistent object with 0 ref_count");
 		TAILQ_REMOVE(&vm_object_cached_list, object, cached_list);
 		vm_object_cached--;
 	}
 	object->ref_count++;
 }
 
 /*
  *	vm_object_deallocate:
  *
  *	Release a reference to the specified object,
  *	gained either through a vm_object_allocate
  *	or a vm_object_reference call.  When all references
  *	are gone, storage associated with this object
  *	may be relinquished.
  *
  *	No object may be locked.
  */
 void
 vm_object_deallocate(object)
 	vm_object_t object;
 {
 	vm_object_t temp;
 
 	while (object != NULL) {
 
 		if (object->ref_count == 0)
 			panic("vm_object_deallocate: object deallocated too many times");
 
 		/*
 		 * Lose the reference
 		 */
 		object->ref_count--;
 		if (object->ref_count != 0) {
 			if ((object->ref_count == 1) &&
 			    (object->handle == NULL) &&
 			    (object->type == OBJT_DEFAULT ||
 			     object->type == OBJT_SWAP)) {
 				vm_object_t robject;
 				robject = TAILQ_FIRST(&object->shadow_head);
 				if ((robject != NULL) &&
 				    (robject->handle == NULL) &&
 				    (robject->type == OBJT_DEFAULT ||
 				     robject->type == OBJT_SWAP)) {
 					int s;
 					robject->ref_count += 2;
 					object->ref_count += 2;
 
 					do {
 						s = splvm();
 						while (robject->paging_in_progress) {
 							robject->flags |= OBJ_PIPWNT;
 							tsleep(robject, PVM, "objde1", 0);
 						}
 
 						while (object->paging_in_progress) {
 							object->flags |= OBJ_PIPWNT;
 							tsleep(object, PVM, "objde2", 0);
 						}
 						splx(s);
 
 					} while( object->paging_in_progress || robject->paging_in_progress);
 
 					object->ref_count -= 2;
 					robject->ref_count -= 2;
 					if( robject->ref_count == 0) {
 						robject->ref_count += 1;
 						object = robject;
 						continue;
 					}
 					vm_object_collapse(robject);
 					return;
 				}
 			}
 			/*
 			 * If there are still references, then we are done.
 			 */
 			return;
 		}
 
 		if (object->type == OBJT_VNODE) {
 			struct vnode *vp = object->handle;
 
 			vp->v_flag &= ~VTEXT;
 		}
 
 		/*
 		 * See if this object can persist and has some resident
 		 * pages.  If so, enter it in the cache.
 		 */
 		if (object->flags & OBJ_CANPERSIST) {
 			if (object->resident_page_count != 0) {
 				vm_object_page_clean(object, 0, 0 ,TRUE, TRUE);
 				TAILQ_INSERT_TAIL(&vm_object_cached_list, object,
 				    cached_list);
 				vm_object_cached++;
 
 				vm_object_cache_trim();
 				return;
 			} else {
 				object->flags &= ~OBJ_CANPERSIST;
 			}
 		}
 
 		/*
 		 * Make sure no one uses us.
 		 */
 		object->flags |= OBJ_DEAD;
 
 		temp = object->backing_object;
 		if (temp) {
 			TAILQ_REMOVE(&temp->shadow_head, object, shadow_list);
 			--temp->shadow_count;
 		}
 		vm_object_terminate(object);
 		/* unlocks and deallocates object */
 		object = temp;
 	}
 }
 
 /*
  *	vm_object_terminate actually destroys the specified object, freeing
  *	up all previously used resources.
  *
  *	The object must be locked.
  */
 static void
 vm_object_terminate(object)
 	register vm_object_t object;
 {
 	register vm_page_t p;
 	int s;
 
 	/*
 	 * wait for the pageout daemon to be done with the object
 	 */
 	s = splvm();
 	while (object->paging_in_progress) {
 		object->flags |= OBJ_PIPWNT;
 		tsleep(object, PVM, "objtrm", 0);
 	}
 	splx(s);
 
 	if (object->paging_in_progress != 0)
 		panic("vm_object_deallocate: pageout in progress");
 
 	/*
 	 * Clean and free the pages, as appropriate. All references to the
 	 * object are gone, so we don't need to lock it.
 	 */
 	if (object->type == OBJT_VNODE) {
 		struct vnode *vp = object->handle;
 
 		VOP_LOCK(vp);
 		vm_object_page_clean(object, 0, 0, TRUE, FALSE);
 		vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
 		VOP_UNLOCK(vp);
 	} 
 	/*
 	 * Now free the pages. For internal objects, this also removes them
 	 * from paging queues.
 	 */
 	while ((p = TAILQ_FIRST(&object->memq)) != NULL) {
-#if defined(DIAGNOSTIC)
 		if (p->flags & PG_BUSY)
 			printf("vm_object_terminate: freeing busy page\n");
-#endif
 		PAGE_WAKEUP(p);
 		vm_page_free(p);
 		cnt.v_pfree++;
 	}
 
 	/*
 	 * Let the pager know object is dead.
 	 */
 	vm_pager_deallocate(object);
 
 	TAILQ_REMOVE(&vm_object_list, object, object_list);
 	vm_object_count--;
 
 	wakeup(object);
 
 	/*
 	 * Free the space for the object.
 	 */
 	free((caddr_t) object, M_VMOBJ);
 }
 
 /*
  *	vm_object_page_clean
  *
  *	Clean all dirty pages in the specified range of object.
  *	Leaves page on whatever queue it is currently on.
  *
  *	Odd semantics: if start == end, we clean everything.
  *
  *	The object must be locked.
  */
 
 void
 vm_object_page_clean(object, start, end, syncio, lockflag)
 	vm_object_t object;
 	vm_pindex_t start;
 	vm_pindex_t end;
 	boolean_t syncio;
 	boolean_t lockflag;
 {
 	register vm_page_t p, np, tp;
 	register vm_offset_t tstart, tend;
 	vm_pindex_t pi;
 	int s;
 	struct vnode *vp;
 	int runlen;
 	int maxf;
 	int chkb;
 	int maxb;
 	int i;
 	vm_page_t maf[vm_pageout_page_count];
 	vm_page_t mab[vm_pageout_page_count];
 	vm_page_t ma[vm_pageout_page_count];
 
 	if (object->type != OBJT_VNODE ||
 		(object->flags & OBJ_MIGHTBEDIRTY) == 0)
 		return;
 
 	vp = object->handle;
 
 	if (lockflag)
 		VOP_LOCK(vp);
 	object->flags |= OBJ_CLEANING;
 
 	tstart = start;
 	if (end == 0) {
 		tend = object->size;
 	} else {
 		tend = end;
 	}
 	if ((tstart == 0) && (tend == object->size)) {
 		object->flags &= ~(OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
 	}
 	for(p = TAILQ_FIRST(&object->memq); p; p = TAILQ_NEXT(p, listq))
 		p->flags |= PG_CLEANCHK;
 
 rescan:
 	for(p = TAILQ_FIRST(&object->memq); p; p = np) {
 		np = TAILQ_NEXT(p, listq);
 
 		pi = p->pindex;
 		if (((p->flags & PG_CLEANCHK) == 0) ||
 			(pi < tstart) || (pi >= tend) ||
 			(p->valid == 0) || (p->queue == PQ_CACHE)) {
 			p->flags &= ~PG_CLEANCHK;
 			continue;
 		}
 
 		vm_page_test_dirty(p);
 		if ((p->dirty & p->valid) == 0) {
 			p->flags &= ~PG_CLEANCHK;
 			continue;
 		}
 
 		s = splvm();
 		if ((p->flags & PG_BUSY) || p->busy) {
 			p->flags |= PG_WANTED|PG_REFERENCED;
 			tsleep(p, PVM, "vpcwai", 0);
 			splx(s);
 			goto rescan;
 		}
 		splx(s);
 			
 		s = splvm();
 		maxf = 0;
 		for(i=1;i<vm_pageout_page_count;i++) {
 			if (tp = vm_page_lookup(object, pi + i)) {
 				if ((tp->flags & PG_BUSY) ||
 					(tp->flags & PG_CLEANCHK) == 0)
 					break;
 				if (tp->queue == PQ_CACHE) {
 					tp->flags &= ~PG_CLEANCHK;
 					break;
 				}
 				vm_page_test_dirty(tp);
 				if ((tp->dirty & tp->valid) == 0) {
 					tp->flags &= ~PG_CLEANCHK;
 					break;
 				}
 				maf[ i - 1 ] = tp;
 				maxf++;
 				continue;
 			}
 			break;
 		}
 
 		maxb = 0;
 		chkb = vm_pageout_page_count -  maxf;
 		if (chkb) {
 			for(i = 1; i < chkb;i++) {
 				if (tp = vm_page_lookup(object, pi - i)) {
 					if ((tp->flags & PG_BUSY) ||
 						(tp->flags & PG_CLEANCHK) == 0)
 						break;
 					if (tp->queue == PQ_CACHE) {
 						tp->flags &= ~PG_CLEANCHK;
 						break;
 					}
 					vm_page_test_dirty(tp);
 					if ((tp->dirty & tp->valid) == 0) {
 						tp->flags &= ~PG_CLEANCHK;
 						break;
 					}
 					mab[ i - 1 ] = tp;
 					maxb++;
 					continue;
 				}
 				break;
 			}
 		}
 
 		for(i=0;i<maxb;i++) {
 			int index = (maxb - i) - 1;
 			ma[index] = mab[i];
 			ma[index]->flags |= PG_BUSY;
 			ma[index]->flags &= ~PG_CLEANCHK;
 			vm_page_protect(ma[index], VM_PROT_READ);
 		}
 		vm_page_protect(p, VM_PROT_READ);
 		p->flags |= PG_BUSY;
 		p->flags &= ~PG_CLEANCHK;
 		ma[maxb] = p;
 		for(i=0;i<maxf;i++) {
 			int index = (maxb + i) + 1;
 			ma[index] = maf[i];
 			ma[index]->flags |= PG_BUSY;
 			ma[index]->flags &= ~PG_CLEANCHK;
 			vm_page_protect(ma[index], VM_PROT_READ);
 		}
 		runlen = maxb + maxf + 1;
 		splx(s);
 		vm_pageout_flush(ma, runlen, 0);
 		goto rescan;
 	}
 
 	VOP_FSYNC(vp, NULL, syncio, curproc);
 
 	if (lockflag)
 		VOP_UNLOCK(vp);
 	object->flags &= ~OBJ_CLEANING;
 	return;
 }
 
 #ifdef not_used
 /* XXX I cannot tell if this should be an exported symbol */
 /*
  *	vm_object_deactivate_pages
  *
  *	Deactivate all pages in the specified object.  (Keep its pages
  *	in memory even though it is no longer referenced.)
  *
  *	The object must be locked.
  */
 static void
 vm_object_deactivate_pages(object)
 	register vm_object_t object;
 {
 	register vm_page_t p, next;
 
 	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) {
 		next = TAILQ_NEXT(p, listq);
 		vm_page_deactivate(p);
 	}
 }
 #endif
 
 /*
  *	Trim the object cache to size.
  */
 static void
 vm_object_cache_trim()
 {
 	register vm_object_t object;
 
 	while (vm_object_cached > vm_object_cache_max) {
 		object = TAILQ_FIRST(&vm_object_cached_list);
 
 		vm_object_reference(object);
 		pager_cache(object, FALSE);
 	}
 }
 
 
 /*
  *	vm_object_pmap_copy:
  *
  *	Makes all physical pages in the specified
  *	object range copy-on-write.  No writeable
  *	references to these pages should remain.
  *
  *	The object must *not* be locked.
  */
 void
 vm_object_pmap_copy(object, start, end)
 	register vm_object_t object;
 	register vm_pindex_t start;
 	register vm_pindex_t end;
 {
 	register vm_page_t p;
 
 	if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0)
 		return;
 
 	for (p = TAILQ_FIRST(&object->memq);
 		p != NULL;
 		p = TAILQ_NEXT(p, listq)) {
 		vm_page_protect(p, VM_PROT_READ);
 	}
 
 	object->flags &= ~OBJ_WRITEABLE;
 }
 
 /*
  *	vm_object_pmap_remove:
  *
  *	Removes all physical pages in the specified
  *	object range from all physical maps.
  *
  *	The object must *not* be locked.
  */
 void
 vm_object_pmap_remove(object, start, end)
 	register vm_object_t object;
 	register vm_pindex_t start;
 	register vm_pindex_t end;
 {
 	register vm_page_t p;
 	if (object == NULL)
 		return;
 	for (p = TAILQ_FIRST(&object->memq);
 		p != NULL;
 		p = TAILQ_NEXT(p, listq)) {
 		if (p->pindex >= start && p->pindex < end)
 			vm_page_protect(p, VM_PROT_NONE);
 	}
 }
 
 /*
  *	vm_object_madvise:
  *
  *	Implements the madvise function at the object/page level.
  */
 void
 vm_object_madvise(object, pindex, count, advise)
 	vm_object_t object;
 	vm_pindex_t pindex;
 	int count;
 	int advise;
 {
 	vm_pindex_t end;
 	vm_page_t m;
 
 	if (object == NULL)
 		return;
 
 	end = pindex + count;
 
 	for (; pindex < end; pindex += 1) {
 		m = vm_page_lookup(object, pindex);
 
 		/*
 		 * If the page is busy or not in a normal active state,
 		 * we skip it.  Things can break if we mess with pages
 		 * in any of the below states.
 		 */
 		if (m == NULL || m->busy || (m->flags & PG_BUSY) ||
 			m->hold_count || m->wire_count ||
 			m->valid != VM_PAGE_BITS_ALL)
 			continue;
 
 		if (advise == MADV_WILLNEED) {
 			if (m->queue != PQ_ACTIVE)
 				vm_page_activate(m);
 		} else if ((advise == MADV_DONTNEED) ||
 			((advise == MADV_FREE) &&
 				((object->type != OBJT_DEFAULT) &&
 					(object->type != OBJT_SWAP)))) {
 			vm_page_deactivate(m);
 		} else if (advise == MADV_FREE) {
 			/*
 			 * Force a demand-zero on next ref
 			 */
 			if (object->type == OBJT_SWAP)
 				swap_pager_dmzspace(object, m->pindex, 1);
 			vm_page_protect(m, VM_PROT_NONE);
 			vm_page_free(m);
 		}
 	}	
 }
 
 /*
  *	vm_object_shadow:
  *
  *	Create a new object which is backed by the
  *	specified existing object range.  The source
  *	object reference is deallocated.
  *
  *	The new object and offset into that object
  *	are returned in the source parameters.
  */
 
 void
 vm_object_shadow(object, offset, length)
 	vm_object_t *object;	/* IN/OUT */
 	vm_ooffset_t *offset;	/* IN/OUT */
 	vm_size_t length;
 {
 	register vm_object_t source;
 	register vm_object_t result;
 
 	source = *object;
 
 	/*
 	 * Allocate a new object with the given length
 	 */
 
 	if ((result = vm_object_allocate(OBJT_DEFAULT, length)) == NULL)
 		panic("vm_object_shadow: no object for shadowing");
 
 	/*
 	 * The new object shadows the source object, adding a reference to it.
 	 * Our caller changes his reference to point to the new object,
 	 * removing a reference to the source object.  Net result: no change
 	 * of reference count.
 	 */
 	result->backing_object = source;
 	if (source) {
 		TAILQ_INSERT_TAIL(&source->shadow_head, result, shadow_list);
 		++source->shadow_count;
 	}
 
 	/*
 	 * Store the offset into the source object, and fix up the offset into
 	 * the new object.
 	 */
 
 	result->backing_object_offset = *offset;
 
 	/*
 	 * Return the new things
 	 */
 
 	*offset = 0;
 	*object = result;
 }
 
 
 /*
  * this version of collapse allows the operation to occur earlier and
  * when paging_in_progress is true for an object...  This is not a complete
  * operation, but should plug 99.9% of the rest of the leaks.
  */
 static void
 vm_object_qcollapse(object)
 	register vm_object_t object;
 {
 	register vm_object_t backing_object;
 	register vm_pindex_t backing_offset_index, paging_offset_index;
 	vm_pindex_t backing_object_paging_offset_index;
 	vm_pindex_t new_pindex;
 	register vm_page_t p, pp;
 	register vm_size_t size;
 
 	backing_object = object->backing_object;
 	if (backing_object->ref_count != 1)
 		return;
 
 	backing_object->ref_count += 2;
 
 	backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
 	backing_object_paging_offset_index = OFF_TO_IDX(backing_object->paging_offset);
 	paging_offset_index = OFF_TO_IDX(object->paging_offset);
 	size = object->size;
 	p = TAILQ_FIRST(&backing_object->memq);
 	while (p) {
 		vm_page_t next;
 
 		next = TAILQ_NEXT(p, listq);
 		if ((p->flags & (PG_BUSY | PG_FICTITIOUS)) ||
 		    (p->queue == PQ_CACHE) || !p->valid || p->hold_count || p->wire_count || p->busy) {
 			p = next;
 			continue;
 		}
 		new_pindex = p->pindex - backing_offset_index;
 		if (p->pindex < backing_offset_index ||
 		    new_pindex >= size) {
 			if (backing_object->type == OBJT_SWAP)
 				swap_pager_freespace(backing_object,
 				    backing_object_paging_offset_index+p->pindex,
 				    1);
 			vm_page_protect(p, VM_PROT_NONE);
 			vm_page_free(p);
 		} else {
 			pp = vm_page_lookup(object, new_pindex);
 			if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object,
 				    paging_offset_index + new_pindex, NULL, NULL))) {
 				if (backing_object->type == OBJT_SWAP)
 					swap_pager_freespace(backing_object,
 					    backing_object_paging_offset_index + p->pindex, 1);
 				vm_page_protect(p, VM_PROT_NONE);
 				vm_page_free(p);
 			} else {
 				if (backing_object->type == OBJT_SWAP)
 					swap_pager_freespace(backing_object,
 					    backing_object_paging_offset_index + p->pindex, 1);
 				vm_page_rename(p, object, new_pindex);
 				p->dirty = VM_PAGE_BITS_ALL;
 			}
 		}
 		p = next;
 	}
 	backing_object->ref_count -= 2;
 }
 
 /*
  *	vm_object_collapse:
  *
  *	Collapse an object with the object backing it.
  *	Pages in the backing object are moved into the
  *	parent, and the backing object is deallocated.
  */
 void
 vm_object_collapse(object)
 	vm_object_t object;
 
 {
 	vm_object_t backing_object;
 	vm_ooffset_t backing_offset;
 	vm_size_t size;
 	vm_pindex_t new_pindex, backing_offset_index;
 	vm_page_t p, pp;
 
 	while (TRUE) {
 		/*
 		 * Verify that the conditions are right for collapse:
 		 *
 		 * The object exists and no pages in it are currently being paged
 		 * out.
 		 */
 		if (object == NULL)
 			return;
 
 		/*
 		 * Make sure there is a backing object.
 		 */
 		if ((backing_object = object->backing_object) == NULL)
 			return;
 
 		/*
 		 * we check the backing object first, because it is most likely
 		 * not collapsable.
 		 */
 		if (backing_object->handle != NULL ||
 		    (backing_object->type != OBJT_DEFAULT &&
 		     backing_object->type != OBJT_SWAP) ||
 		    (backing_object->flags & OBJ_DEAD) ||
 		    object->handle != NULL ||
 		    (object->type != OBJT_DEFAULT &&
 		     object->type != OBJT_SWAP) ||
 		    (object->flags & OBJ_DEAD)) {
 			return;
 		}
 
 		if (object->paging_in_progress != 0 ||
 		    backing_object->paging_in_progress != 0) {
 			vm_object_qcollapse(object);
 			return;
 		}
 
 		/*
 		 * We know that we can either collapse the backing object (if
 		 * the parent is the only reference to it) or (perhaps) remove
 		 * the parent's reference to it.
 		 */
 
 		backing_offset = object->backing_object_offset;
 		backing_offset_index = OFF_TO_IDX(backing_offset);
 		size = object->size;
 
 		/*
 		 * If there is exactly one reference to the backing object, we
 		 * can collapse it into the parent.
 		 */
 
 		if (backing_object->ref_count == 1) {
 
 			backing_object->flags |= OBJ_DEAD;
 			/*
 			 * We can collapse the backing object.
 			 *
 			 * Move all in-memory pages from backing_object to the
 			 * parent.  Pages that have been paged out will be
 			 * overwritten by any of the parent's pages that
 			 * shadow them.
 			 */
 
 			while ((p = TAILQ_FIRST(&backing_object->memq)) != 0) {
 
 				new_pindex = p->pindex - backing_offset_index;
 
 				/*
 				 * If the parent has a page here, or if this
 				 * page falls outside the parent, dispose of
 				 * it.
 				 *
 				 * Otherwise, move it as planned.
 				 */
 
 				if (p->pindex < backing_offset_index ||
 				    new_pindex >= size) {
 					vm_page_protect(p, VM_PROT_NONE);
 					PAGE_WAKEUP(p);
 					vm_page_free(p);
 				} else {
 					pp = vm_page_lookup(object, new_pindex);
 					if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object,
 					    OFF_TO_IDX(object->paging_offset) + new_pindex, NULL, NULL))) {
 						vm_page_protect(p, VM_PROT_NONE);
 						PAGE_WAKEUP(p);
 						vm_page_free(p);
 					} else {
 						vm_page_rename(p, object, new_pindex);
 					}
 				}
 			}
 
 			/*
 			 * Move the pager from backing_object to object.
 			 */
 
 			if (backing_object->type == OBJT_SWAP) {
 				backing_object->paging_in_progress++;
 				if (object->type == OBJT_SWAP) {
 					object->paging_in_progress++;
 					/*
 					 * copy shadow object pages into ours
 					 * and destroy unneeded pages in
 					 * shadow object.
 					 */
 					swap_pager_copy(
 					    backing_object,
 					    OFF_TO_IDX(backing_object->paging_offset),
 					    object,
 					    OFF_TO_IDX(object->paging_offset),
 					    OFF_TO_IDX(object->backing_object_offset));
 					vm_object_pip_wakeup(object);
 				} else {
 					object->paging_in_progress++;
 					/*
 					 * move the shadow backing_object's pager data to
 					 * "object" and convert "object" type to OBJT_SWAP.
 					 */
 					object->type = OBJT_SWAP;
 					object->un_pager.swp.swp_nblocks =
 					    backing_object->un_pager.swp.swp_nblocks;
 					object->un_pager.swp.swp_allocsize =
 					    backing_object->un_pager.swp.swp_allocsize;
 					object->un_pager.swp.swp_blocks =
 					    backing_object->un_pager.swp.swp_blocks;
 					object->un_pager.swp.swp_poip =		/* XXX */
 					    backing_object->un_pager.swp.swp_poip;
 					object->paging_offset = backing_object->paging_offset + backing_offset;
 					TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list);
 
 					/*
 					 * Convert backing object from OBJT_SWAP to
 					 * OBJT_DEFAULT. XXX - only the TAILQ_REMOVE is
 					 * actually necessary.
 					 */
 					backing_object->type = OBJT_DEFAULT;
 					TAILQ_REMOVE(&swap_pager_un_object_list, backing_object, pager_object_list);
 					/*
 					 * free unnecessary blocks
 					 */
 					swap_pager_freespace(object, 0,
 						OFF_TO_IDX(object->paging_offset));
 					vm_object_pip_wakeup(object);
 				}
 
 				vm_object_pip_wakeup(backing_object);
 			}
 			/*
 			 * Object now shadows whatever backing_object did.
 			 * Note that the reference to backing_object->backing_object
 			 * moves from within backing_object to within object.
 			 */
 
 			TAILQ_REMOVE(&object->backing_object->shadow_head, object,
 			    shadow_list);
 			--object->backing_object->shadow_count;
 			if (backing_object->backing_object) {
 				TAILQ_REMOVE(&backing_object->backing_object->shadow_head,
 				    backing_object, shadow_list);
 				--backing_object->backing_object->shadow_count;
 			}
 			object->backing_object = backing_object->backing_object;
 			if (object->backing_object) {
 				TAILQ_INSERT_TAIL(&object->backing_object->shadow_head,
 				    object, shadow_list);
 				++object->backing_object->shadow_count;
 			}
 
 			object->backing_object_offset += backing_object->backing_object_offset;
 			/*
 			 * Discard backing_object.
 			 *
 			 * Since the backing object has no pages, no pager left,
 			 * and no object references within it, all that is
 			 * necessary is to dispose of it.
 			 */
 
 			TAILQ_REMOVE(&vm_object_list, backing_object,
 			    object_list);
 			vm_object_count--;
 
 			free((caddr_t) backing_object, M_VMOBJ);
 
 			object_collapses++;
 		} else {
 			/*
 			 * If all of the pages in the backing object are
 			 * shadowed by the parent object, the parent object no
 			 * longer has to shadow the backing object; it can
 			 * shadow the next one in the chain.
 			 *
 			 * The backing object must not be paged out - we'd have
 			 * to check all of the paged-out pages, as well.
 			 */
 
 			if (backing_object->type != OBJT_DEFAULT) {
 				return;
 			}
 			/*
 			 * Should have a check for a 'small' number of pages
 			 * here.
 			 */
 
 			for (p = TAILQ_FIRST(&backing_object->memq); p; p = TAILQ_NEXT(p, listq)) {
 				new_pindex = p->pindex - backing_offset_index;
 
 				/*
 				 * If the parent has a page here, or if this
 				 * page falls outside the parent, keep going.
 				 *
 				 * Otherwise, the backing_object must be left in
 				 * the chain.
 				 */
 
 				if (p->pindex >= backing_offset_index &&
 					new_pindex <= size) {
 
 					pp = vm_page_lookup(object, new_pindex);
 
 					if ((pp == NULL || pp->valid == 0) &&
 				   	    !vm_pager_has_page(object, OFF_TO_IDX(object->paging_offset) + new_pindex, NULL, NULL)) {
 						/*
 						 * Page still needed. Can't go any
 						 * further.
 						 */
 						return;
 					}
 				}
 			}
 
 			/*
 			 * Make the parent shadow the next object in the
 			 * chain.  Deallocating backing_object will not remove
 			 * it, since its reference count is at least 2.
 			 */
 
 			TAILQ_REMOVE(&object->backing_object->shadow_head,
 			    object, shadow_list);
 			--object->backing_object->shadow_count;
 			vm_object_reference(object->backing_object = backing_object->backing_object);
 			if (object->backing_object) {
 				TAILQ_INSERT_TAIL(&object->backing_object->shadow_head,
 				    object, shadow_list);
 				++object->backing_object->shadow_count;
 			}
 			object->backing_object_offset += backing_object->backing_object_offset;
 
 			/*
 			 * Drop the reference count on backing_object. Since
 			 * its ref_count was at least 2, it will not vanish;
 			 * so we don't need to call vm_object_deallocate.
 			 */
 			if (backing_object->ref_count == 1)
 				printf("should have called obj deallocate\n");
 			backing_object->ref_count--;
 
 			object_bypasses++;
 
 		}
 
 		/*
 		 * Try again with this object's new backing object.
 		 */
 	}
 }
 
 /*
  *	vm_object_page_remove: [internal]
  *
  *	Removes all physical pages in the specified
  *	object range from the object's list of pages.
  *
  *	The object must be locked.
  */
 void
 vm_object_page_remove(object, start, end, clean_only)
 	register vm_object_t object;
 	register vm_pindex_t start;
 	register vm_pindex_t end;
 	boolean_t clean_only;
 {
 	register vm_page_t p, next;
 	unsigned int size;
 	int s;
 
 	if (object == NULL)
 		return;
 
 	object->paging_in_progress++;
 again:
 	size = end - start;
 	if (size > 4 || size >= object->size / 4) {
 		for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) {
 			next = TAILQ_NEXT(p, listq);
 			if ((start <= p->pindex) && (p->pindex < end)) {
 				if (p->wire_count != 0) {
 					vm_page_protect(p, VM_PROT_NONE);
 					p->valid = 0;
 					continue;
 				}
 
 				/*
 				 * The busy flags are only cleared at
 				 * interrupt -- minimize the spl transitions
 				 */
 				if ((p->flags & PG_BUSY) || p->busy) {
 					s = splvm();
 					if ((p->flags & PG_BUSY) || p->busy) {
 						p->flags |= PG_WANTED;
 						tsleep(p, PVM, "vmopar", 0);
 						splx(s);
 						goto again;
 					}
 					splx(s);
 				}
 
 				if (clean_only) {
 					vm_page_test_dirty(p);
 					if (p->valid & p->dirty)
 						continue;
 				}
 				vm_page_protect(p, VM_PROT_NONE);
 				PAGE_WAKEUP(p);
 				vm_page_free(p);
 			}
 		}
 	} else {
 		while (size > 0) {
 			if ((p = vm_page_lookup(object, start)) != 0) {
 				if (p->wire_count != 0) {
 					p->valid = 0;
 					vm_page_protect(p, VM_PROT_NONE);
 					start += 1;
 					size -= 1;
 					continue;
 				}
 				/*
 				 * The busy flags are only cleared at
 				 * interrupt -- minimize the spl transitions
 				 */
 				if ((p->flags & PG_BUSY) || p->busy) {
 					s = splvm();
 					if ((p->flags & PG_BUSY) || p->busy) {
 						p->flags |= PG_WANTED;
 						tsleep(p, PVM, "vmopar", 0);
 						splx(s);
 						goto again;
 					}
 					splx(s);
 				}
 				if (clean_only) {
 					vm_page_test_dirty(p);
 					if (p->valid & p->dirty) {
 						start += 1;
 						size -= 1;
 						continue;
 					}
 				}
 				vm_page_protect(p, VM_PROT_NONE);
 				PAGE_WAKEUP(p);
 				vm_page_free(p);
 			}
 			start += 1;
 			size -= 1;
 		}
 	}
 	vm_object_pip_wakeup(object);
 }
 
 /*
  *	Routine:	vm_object_coalesce
  *	Function:	Coalesces two objects backing up adjoining
  *			regions of memory into a single object.
  *
  *	returns TRUE if objects were combined.
  *
  *	NOTE:	Only works at the moment if the second object is NULL -
  *		if it's not, which object do we lock first?
  *
  *	Parameters:
  *		prev_object	First object to coalesce
  *		prev_offset	Offset into prev_object
  *		next_object	Second object into coalesce
  *		next_offset	Offset into next_object
  *
  *		prev_size	Size of reference to prev_object
  *		next_size	Size of reference to next_object
  *
  *	Conditions:
  *	The object must *not* be locked.
  */
 boolean_t
 vm_object_coalesce(prev_object, prev_pindex, prev_size, next_size)
 	register vm_object_t prev_object;
 	vm_pindex_t prev_pindex;
 	vm_size_t prev_size, next_size;
 {
 	vm_size_t newsize;
 
 	if (prev_object == NULL) {
 		return (TRUE);
 	}
 
 	if (prev_object->type != OBJT_DEFAULT) {
 		return (FALSE);
 	}
 
 	/*
 	 * Try to collapse the object first
 	 */
 	vm_object_collapse(prev_object);
 
 	/*
 	 * Can't coalesce if: . more than one reference . paged out . shadows
 	 * another object . has a copy elsewhere (any of which mean that the
 	 * pages not mapped to prev_entry may be in use anyway)
 	 */
 
 	if (prev_object->ref_count > 1 ||
 	    prev_object->backing_object != NULL) {
 		return (FALSE);
 	}
 
 	prev_size >>= PAGE_SHIFT;
 	next_size >>= PAGE_SHIFT;
 	/*
 	 * Remove any pages that may still be in the object from a previous
 	 * deallocation.
 	 */
 
 	vm_object_page_remove(prev_object,
 	    prev_pindex + prev_size,
 	    prev_pindex + prev_size + next_size, FALSE);
 
 	/*
 	 * Extend the object if necessary.
 	 */
 	newsize = prev_pindex + prev_size + next_size;
 	if (newsize > prev_object->size)
 		prev_object->size = newsize;
 
 	return (TRUE);
 }
 
 #ifdef DDB
 
 static int
 _vm_object_in_map(map, object, entry)
 	vm_map_t map;
 	vm_object_t object;
 	vm_map_entry_t entry;
 {
 	vm_map_t tmpm;
 	vm_map_entry_t tmpe;
 	vm_object_t obj;
 	int entcount;
 
 	if (map == 0)
 		return 0;
 
 	if (entry == 0) {
 		tmpe = map->header.next;
 		entcount = map->nentries;
 		while (entcount-- && (tmpe != &map->header)) {
 			if( _vm_object_in_map(map, object, tmpe)) {
 				return 1;
 			}
 			tmpe = tmpe->next;
 		}
 	} else if (entry->is_sub_map || entry->is_a_map) {
 		tmpm = entry->object.share_map;
 		tmpe = tmpm->header.next;
 		entcount = tmpm->nentries;
 		while (entcount-- && tmpe != &tmpm->header) {
 			if( _vm_object_in_map(tmpm, object, tmpe)) {
 				return 1;
 			}
 			tmpe = tmpe->next;
 		}
 	} else if (obj = entry->object.vm_object) {
 		for(; obj; obj=obj->backing_object)
 			if( obj == object) {
 				return 1;
 			}
 	}
 	return 0;
 }
 
 static int
 vm_object_in_map( object)
 	vm_object_t object;
 {
 	struct proc *p;
 	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
 		if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */)
 			continue;
 		if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0))
 			return 1;
 	}
 	if( _vm_object_in_map( kernel_map, object, 0))
 		return 1;
 	if( _vm_object_in_map( kmem_map, object, 0))
 		return 1;
 	if( _vm_object_in_map( pager_map, object, 0))
 		return 1;
 	if( _vm_object_in_map( buffer_map, object, 0))
 		return 1;
 	if( _vm_object_in_map( io_map, object, 0))
 		return 1;
 	if( _vm_object_in_map( phys_map, object, 0))
 		return 1;
 	if( _vm_object_in_map( mb_map, object, 0))
 		return 1;
 	if( _vm_object_in_map( u_map, object, 0))
 		return 1;
 	return 0;
 }
 
 
 #ifdef DDB
 static void
 DDB_vm_object_check()
 {
 	vm_object_t object;
 
 	/*
 	 * make sure that internal objs are in a map somewhere
 	 * and none have zero ref counts.
 	 */
 	for (object = TAILQ_FIRST(&vm_object_list);
 			object != NULL;
 			object = TAILQ_NEXT(object, object_list)) {
 		if (object->handle == NULL &&
 		    (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
 			if (object->ref_count == 0) {
 				printf("vmochk: internal obj has zero ref count: %d\n",
 					object->size);
 			}
 			if (!vm_object_in_map(object)) {
 				printf("vmochk: internal obj is not in a map: "
 		"ref: %d, size: %d: 0x%x, backing_object: 0x%x\n",
 				    object->ref_count, object->size, 
 				    object->size, object->backing_object);
 			}
 		}
 	}
 }
 #endif /* DDB */
 
 /*
  *	vm_object_print:	[ debug ]
  */
 void
 vm_object_print(iobject, full, dummy3, dummy4)
 	/* db_expr_t */ int iobject;
 	boolean_t full;
 	/* db_expr_t */ int dummy3;
 	char *dummy4;
 {
 	vm_object_t object = (vm_object_t)iobject;	/* XXX */
 	register vm_page_t p;
 
 	register int count;
 
 	if (object == NULL)
 		return;
 
 	iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ",
 	    (int) object, (int) object->size,
 	    object->resident_page_count, object->ref_count);
 	printf("offset=0x%x, backing_object=(0x%x)+0x%x\n",
 	    (int) object->paging_offset,
 	    (int) object->backing_object, (int) object->backing_object_offset);
 	printf("cache: next=%p, prev=%p\n",
 	    TAILQ_NEXT(object, cached_list), TAILQ_PREV(object, cached_list));
 
 	if (!full)
 		return;
 
 	indent += 2;
 	count = 0;
 	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = TAILQ_NEXT(p, listq)) {
 		if (count == 0)
 			iprintf("memory:=");
 		else if (count == 6) {
 			printf("\n");
 			iprintf(" ...");
 			count = 0;
 		} else
 			printf(",");
 		count++;
 
 		printf("(off=0x%lx,page=0x%lx)",
 		    (u_long) p->pindex, (u_long) VM_PAGE_TO_PHYS(p));
 	}
 	if (count != 0)
 		printf("\n");
 	indent -= 2;
 }
 #endif /* DDB */
Index: head/sys/vm/vm_page.c
===================================================================
--- head/sys/vm/vm_page.c	(revision 17333)
+++ head/sys/vm/vm_page.c	(revision 17334)
@@ -1,1187 +1,1204 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_page.c	7.4 (Berkeley) 5/7/91
- *	$Id: vm_page.c,v 1.60 1996/06/26 05:39:25 dyson Exp $
+ *	$Id: vm_page.c,v 1.61 1996/07/27 03:24:05 dyson Exp $
  */
 
 /*
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 /*
  *	Resident memory management module.
  */
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_extern.h>
 
 #ifdef DDB
 extern void	DDB_print_page_info __P((void));
 #endif
 
 /*
  *	Associated with page of user-allocatable memory is a
  *	page structure.
  */
 
 static struct pglist *vm_page_buckets;	/* Array of buckets */
 static int vm_page_bucket_count;	/* How big is array? */
 static int vm_page_hash_mask;		/* Mask for hash function */
 
 struct pglist vm_page_queue_free;
 struct pglist vm_page_queue_zero;
 struct pglist vm_page_queue_active;
 struct pglist vm_page_queue_inactive;
 struct pglist vm_page_queue_cache;
 
 int no_queue;
 
 struct {
 	struct pglist *pl;
 	int	*cnt;
 } vm_page_queues[PQ_CACHE+1] = {
 	{NULL, &no_queue},
 	{ &vm_page_queue_free, &cnt.v_free_count},
 	{ &vm_page_queue_zero, &cnt.v_free_count},
 	{ &vm_page_queue_inactive, &cnt.v_inactive_count},
 	{ &vm_page_queue_active, &cnt.v_active_count},
 	{ &vm_page_queue_cache, &cnt.v_cache_count}
 };
 
 vm_page_t vm_page_array;
 static int vm_page_array_size;
 long first_page;
 static long last_page;
 static vm_size_t page_mask;
 static int page_shift;
 int vm_page_zero_count;
 
 /*
  * map of contiguous valid DEV_BSIZE chunks in a page
  * (this list is valid for page sizes upto 16*DEV_BSIZE)
  */
 static u_short vm_page_dev_bsize_chunks[] = {
 	0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff,
 	0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
 };
 
 static inline __pure int
 		vm_page_hash __P((vm_object_t object, vm_pindex_t pindex))
 		__pure2;
 
 static int vm_page_freechk_and_unqueue __P((vm_page_t m));
 static void vm_page_free_wakeup __P((void));
 
 /*
  *	vm_set_page_size:
  *
  *	Sets the page size, perhaps based upon the memory
  *	size.  Must be called before any use of page-size
  *	dependent functions.
  *
  *	Sets page_shift and page_mask from cnt.v_page_size.
  */
 void
 vm_set_page_size()
 {
 
 	if (cnt.v_page_size == 0)
 		cnt.v_page_size = DEFAULT_PAGE_SIZE;
 	page_mask = cnt.v_page_size - 1;
 	if ((page_mask & cnt.v_page_size) != 0)
 		panic("vm_set_page_size: page size not a power of two");
 	for (page_shift = 0;; page_shift++)
 		if ((1 << page_shift) == cnt.v_page_size)
 			break;
 }
 
 /*
  *	vm_page_startup:
  *
  *	Initializes the resident memory module.
  *
  *	Allocates memory for the page cells, and
  *	for the object/offset-to-page hash table headers.
  *	Each page cell is initialized and placed on the free list.
  */
 
 vm_offset_t
 vm_page_startup(starta, enda, vaddr)
 	register vm_offset_t starta;
 	vm_offset_t enda;
 	register vm_offset_t vaddr;
 {
 	register vm_offset_t mapped;
 	register vm_page_t m;
 	register struct pglist *bucket;
 	vm_size_t npages, page_range;
 	register vm_offset_t new_start;
 	int i;
 	vm_offset_t pa;
 	int nblocks;
 	vm_offset_t first_managed_page;
 
 	/* the biggest memory array is the second group of pages */
 	vm_offset_t start;
 	vm_offset_t biggestone, biggestsize;
 
 	vm_offset_t total;
 
 	total = 0;
 	biggestsize = 0;
 	biggestone = 0;
 	nblocks = 0;
 	vaddr = round_page(vaddr);
 
 	for (i = 0; phys_avail[i + 1]; i += 2) {
 		phys_avail[i] = round_page(phys_avail[i]);
 		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
 	}
 
 	for (i = 0; phys_avail[i + 1]; i += 2) {
 		int size = phys_avail[i + 1] - phys_avail[i];
 
 		if (size > biggestsize) {
 			biggestone = i;
 			biggestsize = size;
 		}
 		++nblocks;
 		total += size;
 	}
 
 	start = phys_avail[biggestone];
 
 	/*
 	 * Initialize the queue headers for the free queue, the active queue
 	 * and the inactive queue.
 	 */
 
 	TAILQ_INIT(&vm_page_queue_free);
 	TAILQ_INIT(&vm_page_queue_zero);
 	TAILQ_INIT(&vm_page_queue_active);
 	TAILQ_INIT(&vm_page_queue_inactive);
 	TAILQ_INIT(&vm_page_queue_cache);
 
 	/*
 	 * Allocate (and initialize) the hash table buckets.
 	 *
 	 * The number of buckets MUST BE a power of 2, and the actual value is
 	 * the next power of 2 greater than the number of physical pages in
 	 * the system.
 	 *
 	 * Note: This computation can be tweaked if desired.
 	 */
 	vm_page_buckets = (struct pglist *) vaddr;
 	bucket = vm_page_buckets;
 	if (vm_page_bucket_count == 0) {
 		vm_page_bucket_count = 1;
 		while (vm_page_bucket_count < atop(total))
 			vm_page_bucket_count <<= 1;
 	}
 	vm_page_hash_mask = vm_page_bucket_count - 1;
 
 	/*
 	 * Validate these addresses.
 	 */
 
 	new_start = start + vm_page_bucket_count * sizeof(struct pglist);
 	new_start = round_page(new_start);
 	mapped = vaddr;
 	vaddr = pmap_map(mapped, start, new_start,
 	    VM_PROT_READ | VM_PROT_WRITE);
 	start = new_start;
 	bzero((caddr_t) mapped, vaddr - mapped);
 	mapped = vaddr;
 
 	for (i = 0; i < vm_page_bucket_count; i++) {
 		TAILQ_INIT(bucket);
 		bucket++;
 	}
 
 	/*
 	 * round (or truncate) the addresses to our page size.
 	 */
 
 	/*
 	 * Pre-allocate maps and map entries that cannot be dynamically
 	 * allocated via malloc().  The maps include the kernel_map and
 	 * kmem_map which must be initialized before malloc() will work
 	 * (obviously).  Also could include pager maps which would be
 	 * allocated before kmeminit.
 	 *
 	 * Allow some kernel map entries... this should be plenty since people
 	 * shouldn't be cluttering up the kernel map (they should use their
 	 * own maps).
 	 */
 
 	kentry_data_size = MAX_KMAP * sizeof(struct vm_map) +
 	    MAX_KMAPENT * sizeof(struct vm_map_entry);
 	kentry_data_size = round_page(kentry_data_size);
 	kentry_data = (vm_offset_t) vaddr;
 	vaddr += kentry_data_size;
 
 	/*
 	 * Validate these zone addresses.
 	 */
 
 	new_start = start + (vaddr - mapped);
 	pmap_map(mapped, start, new_start, VM_PROT_READ | VM_PROT_WRITE);
 	bzero((caddr_t) mapped, (vaddr - mapped));
 	start = round_page(new_start);
 
 	/*
 	 * Compute the number of pages of memory that will be available for
 	 * use (taking into account the overhead of a page structure per
 	 * page).
 	 */
 
 	first_page = phys_avail[0] / PAGE_SIZE;
 	last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE;
 
 	page_range = last_page - (phys_avail[0] / PAGE_SIZE);
 	npages = (total - (page_range * sizeof(struct vm_page)) -
 	    (start - phys_avail[biggestone])) / PAGE_SIZE;
 
 	/*
 	 * Initialize the mem entry structures now, and put them in the free
 	 * queue.
 	 */
 
 	vm_page_array = (vm_page_t) vaddr;
 	mapped = vaddr;
 
 	/*
 	 * Validate these addresses.
 	 */
 
 	new_start = round_page(start + page_range * sizeof(struct vm_page));
 	mapped = pmap_map(mapped, start, new_start,
 	    VM_PROT_READ | VM_PROT_WRITE);
 	start = new_start;
 
 	first_managed_page = start / PAGE_SIZE;
 
 	/*
 	 * Clear all of the page structures
 	 */
 	bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
 	vm_page_array_size = page_range;
 
 	cnt.v_page_count = 0;
 	cnt.v_free_count = 0;
 	for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) {
 		if (i == biggestone)
 			pa = ptoa(first_managed_page);
 		else
 			pa = phys_avail[i];
 		while (pa < phys_avail[i + 1] && npages-- > 0) {
 			++cnt.v_page_count;
 			++cnt.v_free_count;
 			m = PHYS_TO_VM_PAGE(pa);
 			m->queue = PQ_FREE;
 			m->flags = 0;
 			m->phys_addr = pa;
 			TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
 			pa += PAGE_SIZE;
 		}
 	}
 
 	return (mapped);
 }
 
 /*
  *	vm_page_hash:
  *
  *	Distributes the object/offset key pair among hash buckets.
  *
  *	NOTE:  This macro depends on vm_page_bucket_count being a power of 2.
  */
 static inline __pure int
 vm_page_hash(object, pindex)
 	vm_object_t object;
 	vm_pindex_t pindex;
 {
 	return ((((unsigned) object) >> 5) + (pindex >> 1)) & vm_page_hash_mask;
 }
 
 /*
  *	vm_page_insert:		[ internal use only ]
  *
  *	Inserts the given mem entry into the object/object-page
  *	table and object list.
  *
  *	The object and page must be locked, and must be splhigh.
  */
 
-void
+__inline void
 vm_page_insert(m, object, pindex)
 	register vm_page_t m;
 	register vm_object_t object;
 	register vm_pindex_t pindex;
 {
 	register struct pglist *bucket;
 
 	if (m->flags & PG_TABLED)
 		panic("vm_page_insert: already inserted");
 
 	/*
 	 * Record the object/offset pair in this page
 	 */
 
 	m->object = object;
 	m->pindex = pindex;
 
 	/*
 	 * Insert it into the object_object/offset hash table
 	 */
 
 	bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
 	TAILQ_INSERT_TAIL(bucket, m, hashq);
 
 	/*
 	 * Now link into the object's list of backed pages.
 	 */
 
 	TAILQ_INSERT_TAIL(&object->memq, m, listq);
 	m->flags |= PG_TABLED;
 
 	/*
 	 * And show that the object has one more resident page.
 	 */
 
 	object->resident_page_count++;
 }
 
 /*
  *	vm_page_remove:		[ internal use only ]
  *				NOTE: used by device pager as well -wfj
  *
  *	Removes the given mem entry from the object/offset-page
  *	table and the object page list.
  *
  *	The object and page must be locked, and at splhigh.
  */
 
-void
+__inline void
 vm_page_remove(m)
 	register vm_page_t m;
 {
 	register struct pglist *bucket;
 
 	if (!(m->flags & PG_TABLED))
 		return;
 
 	/*
 	 * Remove from the object_object/offset hash table
 	 */
 
 	bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)];
 	TAILQ_REMOVE(bucket, m, hashq);
 
 	/*
 	 * Now remove from the object's list of backed pages.
 	 */
 
 	TAILQ_REMOVE(&m->object->memq, m, listq);
 
 	/*
 	 * And show that the object has one fewer resident page.
 	 */
 
 	m->object->resident_page_count--;
 
 	m->flags &= ~PG_TABLED;
 }
 
 /*
  *	vm_page_lookup:
  *
  *	Returns the page associated with the object/offset
  *	pair specified; if none is found, NULL is returned.
  *
  *	The object must be locked.  No side effects.
  */
 
 vm_page_t
 vm_page_lookup(object, pindex)
 	register vm_object_t object;
 	register vm_pindex_t pindex;
 {
 	register vm_page_t m;
 	register struct pglist *bucket;
 	int s;
 
 	/*
 	 * Search the hash table for this object/offset pair
 	 */
 
 	bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
 
 	s = splvm();
 	for (m = TAILQ_FIRST(bucket); m != NULL; m = TAILQ_NEXT(m,hashq)) {
 		if ((m->object == object) && (m->pindex == pindex)) {
 			splx(s);
 			return (m);
 		}
 	}
 	splx(s);
 	return (NULL);
 }
 
 /*
  *	vm_page_rename:
  *
  *	Move the given memory entry from its
  *	current object to the specified target object/offset.
  *
  *	The object must be locked.
  */
 void
 vm_page_rename(m, new_object, new_pindex)
 	register vm_page_t m;
 	register vm_object_t new_object;
 	vm_pindex_t new_pindex;
 {
 	int s;
 
 	s = splvm();
 	vm_page_remove(m);
 	vm_page_insert(m, new_object, new_pindex);
 	splx(s);
 }
 
 /*
+ * vm_page_unqueue without any wakeup
+ */
+__inline void
+vm_page_unqueue_nowakeup(m)
+	vm_page_t m;
+{
+	int queue = m->queue;
+	if (queue != PQ_NONE) {
+		m->queue = PQ_NONE;
+		TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq);
+		--(*vm_page_queues[queue].cnt);
+	}
+}
+	
+
+/*
  * vm_page_unqueue must be called at splhigh();
  */
 __inline void
-vm_page_unqueue(m, wakeup)
+vm_page_unqueue(m)
 	vm_page_t m;
-	int wakeup;
 {
 	int queue = m->queue;
 	if (queue != PQ_NONE) {
 		m->queue = PQ_NONE;
 		TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq);
 		--(*vm_page_queues[queue].cnt);
-		if ((queue == PQ_CACHE) && wakeup) {
+		if (queue == PQ_CACHE) {
 			if ((cnt.v_cache_count + cnt.v_free_count) <
 				(cnt.v_free_reserved + cnt.v_cache_min))
 				pagedaemon_wakeup();
 		}
 	}
 }
 
 /*
  *	vm_page_alloc:
  *
  *	Allocate and return a memory cell associated
  *	with this VM object/offset pair.
  *
  *	page_req classes:
  *	VM_ALLOC_NORMAL		normal process request
  *	VM_ALLOC_SYSTEM		system *really* needs a page
  *	VM_ALLOC_INTERRUPT	interrupt time request
  *	VM_ALLOC_ZERO		zero page
  *
  *	Object must be locked.
  */
 vm_page_t
 vm_page_alloc(object, pindex, page_req)
 	vm_object_t object;
 	vm_pindex_t pindex;
 	int page_req;
 {
 	register vm_page_t m;
 	int queue;
 	int s;
 
 #ifdef DIAGNOSTIC
 	m = vm_page_lookup(object, pindex);
 	if (m)
 		panic("vm_page_alloc: page already allocated");
 #endif
 
 	if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) {
 		page_req = VM_ALLOC_SYSTEM;
 	};
 
 	s = splvm();
 
 	switch (page_req) {
 
 	case VM_ALLOC_NORMAL:
 		if (cnt.v_free_count >= cnt.v_free_reserved) {
 			m = TAILQ_FIRST(&vm_page_queue_free);
 			if (m == NULL) {
 				--vm_page_zero_count;
 				m = TAILQ_FIRST(&vm_page_queue_zero);
 			}
 		} else {
 			m = TAILQ_FIRST(&vm_page_queue_cache);
 			if (m == NULL) {
 				splx(s);
 #if defined(DIAGNOSTIC)
 				if (cnt.v_cache_count > 0)
 					printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", cnt.v_cache_count);
 #endif
 				pagedaemon_wakeup();
 				return (NULL);
 			}
 		}
 		break;
 
 	case VM_ALLOC_ZERO:
 		if (cnt.v_free_count >= cnt.v_free_reserved) {
 			m = TAILQ_FIRST(&vm_page_queue_zero);
 			if (m) {
 				--vm_page_zero_count;
 			} else {
 				m = TAILQ_FIRST(&vm_page_queue_free);
 			}
 		} else {
 			m = TAILQ_FIRST(&vm_page_queue_cache);
 			if (m == NULL) {
 				splx(s);
 #if defined(DIAGNOSTIC)
 				if (cnt.v_cache_count > 0)
 					printf("vm_page_alloc(ZERO): missing pages on cache queue: %d\n", cnt.v_cache_count);
 #endif
 				pagedaemon_wakeup();
 				return (NULL);
 			}
 		}
 		break;
 
 	case VM_ALLOC_SYSTEM:
 		if ((cnt.v_free_count >= cnt.v_free_reserved) ||
 		    ((cnt.v_cache_count == 0) &&
 		    (cnt.v_free_count >= cnt.v_interrupt_free_min))) {
 				m = TAILQ_FIRST(&vm_page_queue_free);
 				if (m == NULL) {
 					--vm_page_zero_count;
 					m = TAILQ_FIRST(&vm_page_queue_zero);
 				}
 		} else {
 			m = TAILQ_FIRST(&vm_page_queue_cache);
 			if (m == NULL) {
 				splx(s);
 #if defined(DIAGNOSTIC)
 				if (cnt.v_cache_count > 0)
 					printf("vm_page_alloc(SYSTEM): missing pages on cache queue: %d\n", cnt.v_cache_count);
 #endif
 				pagedaemon_wakeup();
 				return (NULL);
 			}
 		}
 		break;
 
 	case VM_ALLOC_INTERRUPT:
 		if (cnt.v_free_count > 0) {
 			m = TAILQ_FIRST(&vm_page_queue_free);
 			if (m == NULL) {
 				--vm_page_zero_count;
 				m = TAILQ_FIRST(&vm_page_queue_zero);
 			}
 		} else {
 			splx(s);
 			pagedaemon_wakeup();
 			return (NULL);
 		}
 		break;
 
 	default:
 		panic("vm_page_alloc: invalid allocation class");
 	}
 
 	queue = m->queue;
 	TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq);
 	--(*vm_page_queues[queue].cnt);
 	if (queue == PQ_ZERO) {
 		m->flags = PG_ZERO|PG_BUSY;
 	} else if (queue == PQ_CACHE) {
 		vm_page_remove(m);
 		m->flags = PG_BUSY;
 	} else {
 		m->flags = PG_BUSY;
 	}
 	m->wire_count = 0;
 	m->hold_count = 0;
 	m->act_count = 0;
 	m->busy = 0;
 	m->valid = 0;
 	m->dirty = 0;
 	m->queue = PQ_NONE;
 
 	/* XXX before splx until vm_page_insert is safe */
 	vm_page_insert(m, object, pindex);
 
 	splx(s);
 
 	/*
 	 * Don't wakeup too often - wakeup the pageout daemon when
 	 * we would be nearly out of memory.
 	 */
 	if (((cnt.v_free_count + cnt.v_cache_count) <
 		(cnt.v_free_reserved + cnt.v_cache_min)) ||
 			(cnt.v_free_count < cnt.v_pageout_free_min))
 		pagedaemon_wakeup();
 
 	return (m);
 }
 
 /*
  *	vm_page_activate:
  *
  *	Put the specified page on the active list (if appropriate).
  *
  *	The page queues must be locked.
  */
 void
 vm_page_activate(m)
 	register vm_page_t m;
 {
 	int s;
 
 	s = splvm();
 	if (m->queue == PQ_ACTIVE)
 		panic("vm_page_activate: already active");
 
 	if (m->queue == PQ_CACHE)
 		cnt.v_reactivated++;
 
-	vm_page_unqueue(m, 1);
+	vm_page_unqueue(m);
 
 	if (m->wire_count == 0) {
 		TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
 		m->queue = PQ_ACTIVE;
 		if (m->act_count < ACT_INIT)
 			m->act_count = ACT_INIT;
 		cnt.v_active_count++;
 	}
 	splx(s);
 }
 
 /*
  * helper routine for vm_page_free and vm_page_free_zero
  */
-__inline static int
+static int
 vm_page_freechk_and_unqueue(m)
 	vm_page_t m;
 {
 	if (m->busy ||
 		(m->flags & PG_BUSY) ||
 		(m->queue == PQ_FREE) ||
 		(m->hold_count != 0)) {
 		printf("vm_page_free: pindex(%ld), busy(%d), PG_BUSY(%d), hold(%d)\n",
 			m->pindex, m->busy,
 			(m->flags & PG_BUSY) ? 1 : 0, m->hold_count);
 		if (m->queue == PQ_FREE)
 			panic("vm_page_free: freeing free page");
 		else
 			panic("vm_page_free: freeing busy page");
 	}
 
 	vm_page_remove(m);
-	vm_page_unqueue(m,0);
+	vm_page_unqueue_nowakeup(m);
 	if ((m->flags & PG_FICTITIOUS) != 0) {
 		return 0;
 	}
 	if (m->wire_count != 0) {
 		if (m->wire_count > 1) {
 			panic("vm_page_free: invalid wire count (%d), pindex: 0x%x",
 				m->wire_count, m->pindex);
 		}
 		m->wire_count = 0;
 		cnt.v_wire_count--;
 	}
 
 	return 1;
 }
 
 /*
  * helper routine for vm_page_free and vm_page_free_zero
  */
-__inline static void
+static __inline void
 vm_page_free_wakeup()
 {
 	
 /*
  * if pageout daemon needs pages, then tell it that there are
  * some free.
  */
 	if (vm_pageout_pages_needed) {
 		wakeup(&vm_pageout_pages_needed);
 		vm_pageout_pages_needed = 0;
 	}
 	/*
 	 * wakeup processes that are waiting on memory if we hit a
 	 * high water mark. And wakeup scheduler process if we have
 	 * lots of memory. this process will swapin processes.
 	 */
 	if (vm_pages_needed &&
 		((cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)) {
 		wakeup(&cnt.v_free_count);
 		vm_pages_needed = 0;
 	}
 }
 
 /*
  *	vm_page_free:
  *
  *	Returns the given page to the free list,
  *	disassociating it with any VM object.
  *
  *	Object and page must be locked prior to entry.
  */
 void
 vm_page_free(m)
 	register vm_page_t m;
 {
 	int s;
 
 	s = splvm();
 
 	cnt.v_tfree++;
 
 	if (!vm_page_freechk_and_unqueue(m)) {
 		splx(s);
 		return;
 	}
 
 	m->queue = PQ_FREE;
 
 	/*
 	 * If the pageout process is grabbing the page, it is likely
 	 * that the page is NOT in the cache.  It is more likely that
 	 * the page will be partially in the cache if it is being
 	 * explicitly freed.
 	 */
 	if (curproc == pageproc) {
 		TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
 	} else {
 		TAILQ_INSERT_HEAD(&vm_page_queue_free, m, pageq);
 	}
 
 	cnt.v_free_count++;
 	vm_page_free_wakeup();
 	splx(s);
 }
 
 void
 vm_page_free_zero(m)
 	register vm_page_t m;
 {
 	int s;
 
 	s = splvm();
 
 	cnt.v_tfree++;
 
 	if (!vm_page_freechk_and_unqueue(m)) {
 		splx(s);
 		return;
 	}
 
 	m->queue = PQ_ZERO;
 
 	TAILQ_INSERT_HEAD(&vm_page_queue_zero, m, pageq);
 	++vm_page_zero_count;
 	cnt.v_free_count++;
 	vm_page_free_wakeup();
 	splx(s);
 }
 
 /*
  *	vm_page_wire:
  *
  *	Mark this page as wired down by yet
  *	another map, removing it from paging queues
  *	as necessary.
  *
  *	The page queues must be locked.
  */
 void
 vm_page_wire(m)
 	register vm_page_t m;
 {
 	int s;
 
 	if (m->wire_count == 0) {
 		s = splvm();
-		vm_page_unqueue(m,1);
+		vm_page_unqueue(m);
 		splx(s);
 		cnt.v_wire_count++;
 	}
 	m->wire_count++;
 	m->flags |= PG_MAPPED;
 }
 
 /*
  *	vm_page_unwire:
  *
  *	Release one wiring of this page, potentially
  *	enabling it to be paged again.
  *
  *	The page queues must be locked.
  */
 void
 vm_page_unwire(m)
 	register vm_page_t m;
 {
 	int s;
 
 	s = splvm();
 
 	if (m->wire_count > 0)
 		m->wire_count--;
 		
 	if (m->wire_count == 0) {
 		cnt.v_wire_count--;
 		TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
 		m->queue = PQ_ACTIVE;
 		cnt.v_active_count++;
 	}
 	splx(s);
 }
 
 
 /*
  *	vm_page_deactivate:
  *
  *	Returns the given page to the inactive list,
  *	indicating that no physical maps have access
  *	to this page.  [Used by the physical mapping system.]
  *
  *	The page queues must be locked.
  */
 void
 vm_page_deactivate(m)
 	register vm_page_t m;
 {
 	int s;
 
 	/*
 	 * Only move active pages -- ignore locked or already inactive ones.
 	 *
 	 * XXX: sometimes we get pages which aren't wired down or on any queue -
 	 * we need to put them on the inactive queue also, otherwise we lose
 	 * track of them. Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93.
 	 */
 	if (m->queue == PQ_INACTIVE)
 		return;
 
 	s = splvm();
 	if (m->wire_count == 0 && m->hold_count == 0) {
 		if (m->queue == PQ_CACHE)
 			cnt.v_reactivated++;
-		vm_page_unqueue(m,1);
+		vm_page_unqueue(m);
 		TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
 		m->queue = PQ_INACTIVE;
 		cnt.v_inactive_count++;
 	}
 	splx(s);
 }
 
 /*
  * vm_page_cache
  *
  * Put the specified page onto the page cache queue (if appropriate).
  */
 void
 vm_page_cache(m)
 	register vm_page_t m;
 {
 	int s;
 
 	if ((m->flags & PG_BUSY) || m->busy || m->wire_count) {
 		printf("vm_page_cache: attempting to cache busy page\n");
 		return;
 	}
 	if (m->queue == PQ_CACHE)
 		return;
 
 	vm_page_protect(m, VM_PROT_NONE);
 	if (m->dirty != 0) {
 		panic("vm_page_cache: caching a dirty page, pindex: %d", m->pindex);
 	}
 	s = splvm();
-	vm_page_unqueue(m,0);
+	vm_page_unqueue_nowakeup(m);
 	TAILQ_INSERT_TAIL(&vm_page_queue_cache, m, pageq);
 	m->queue = PQ_CACHE;
 	cnt.v_cache_count++;
 	vm_page_free_wakeup();
 	splx(s);
 }
 
 
 /*
  * mapping function for valid bits or for dirty bits in
  * a page
  */
 inline int
 vm_page_bits(int base, int size)
 {
 	u_short chunk;
 
 	if ((base == 0) && (size >= PAGE_SIZE))
 		return VM_PAGE_BITS_ALL;
 	size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 	base = (base % PAGE_SIZE) / DEV_BSIZE;
 	chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE];
 	return (chunk << base) & VM_PAGE_BITS_ALL;
 }
 
 /*
  * set a page valid and clean
  */
 void
 vm_page_set_validclean(m, base, size)
 	vm_page_t m;
 	int base;
 	int size;
 {
 	int pagebits = vm_page_bits(base, size);
 	m->valid |= pagebits;
 	m->dirty &= ~pagebits;
 	if( base == 0 && size == PAGE_SIZE)
-		pmap_tc_modified(m);
+		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
 }
 
 /*
  * set a page (partially) invalid
  */
 void
 vm_page_set_invalid(m, base, size)
 	vm_page_t m;
 	int base;
 	int size;
 {
 	int bits;
 
 	m->valid &= ~(bits = vm_page_bits(base, size));
 	if (m->valid == 0)
 		m->dirty &= ~bits;
 }
 
 /*
  * is (partial) page valid?
  */
 int
 vm_page_is_valid(m, base, size)
 	vm_page_t m;
 	int base;
 	int size;
 {
 	int bits = vm_page_bits(base, size);
 
 	if (m->valid && ((m->valid & bits) == bits))
 		return 1;
 	else
 		return 0;
 }
 
 void
 vm_page_test_dirty(m)
 	vm_page_t m;
 {
-	if (m->dirty != VM_PAGE_BITS_ALL)
-		pmap_tc_modified(m);
+	if ((m->dirty != VM_PAGE_BITS_ALL) &&
+	    pmap_is_modified(VM_PAGE_TO_PHYS(m))) {
+		m->dirty = VM_PAGE_BITS_ALL;
+	}
 }
 
 /*
  * This interface is for merging with malloc() someday.
  * Even if we never implement compaction so that contiguous allocation
  * works after initialization time, malloc()'s data structures are good
  * for statistics and for allocations of less than a page.
  */
 void *
 contigmalloc(size, type, flags, low, high, alignment, boundary)
 	unsigned long size;	/* should be size_t here and for malloc() */
 	int type;
 	int flags;
 	unsigned long low;
 	unsigned long high;
 	unsigned long alignment;
 	unsigned long boundary;
 {
 	int i, s, start;
 	vm_offset_t addr, phys, tmp_addr;
 	vm_page_t pga = vm_page_array;
 
 	size = round_page(size);
 	if (size == 0)
 		panic("vm_page_alloc_contig: size must not be 0");
 	if ((alignment & (alignment - 1)) != 0)
 		panic("vm_page_alloc_contig: alignment must be a power of 2");
 	if ((boundary & (boundary - 1)) != 0)
 		panic("vm_page_alloc_contig: boundary must be a power of 2");
 
 	start = 0;
 	s = splvm();
 again:
 	/*
 	 * Find first page in array that is free, within range, aligned, and
 	 * such that the boundary won't be crossed.
 	 */
 	for (i = start; i < cnt.v_page_count; i++) {
 		phys = VM_PAGE_TO_PHYS(&pga[i]);
 		if ((pga[i].queue == PQ_FREE) &&
 		    (phys >= low) && (phys < high) &&
 		    ((phys & (alignment - 1)) == 0) &&
 		    (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
 			break;
 	}
 
 	/*
 	 * If the above failed or we will exceed the upper bound, fail.
 	 */
 	if ((i == cnt.v_page_count) ||
 		((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
 		splx(s);
 		return (NULL);
 	}
 	start = i;
 
 	/*
 	 * Check successive pages for contiguous and free.
 	 */
 	for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
 		if ((VM_PAGE_TO_PHYS(&pga[i]) !=
 		    (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
 		    (pga[i].queue != PQ_FREE)) {
 			start++;
 			goto again;
 		}
 	}
 
 	/*
 	 * We've found a contiguous chunk that meets are requirements.
 	 * Allocate kernel VM, unfree and assign the physical pages to it and
 	 * return kernel VM pointer.
 	 */
 	tmp_addr = addr = kmem_alloc_pageable(kernel_map, size);
 	if (addr == 0) {
 		splx(s);
 		return (NULL);
 	}
 
 	for (i = start; i < (start + size / PAGE_SIZE); i++) {
 		vm_page_t m = &pga[i];
 
 		TAILQ_REMOVE(&vm_page_queue_free, m, pageq);
 		cnt.v_free_count--;
 		m->valid = VM_PAGE_BITS_ALL;
 		m->flags = 0;
 		m->dirty = 0;
 		m->wire_count = 0;
 		m->busy = 0;
 		m->queue = PQ_NONE;
 		vm_page_insert(m, kernel_object,
 			OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
 		vm_page_wire(m);
 		pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m));
 		tmp_addr += PAGE_SIZE;
 	}
 
 	splx(s);
 	return ((void *)addr);
 }
 
 vm_offset_t
 vm_page_alloc_contig(size, low, high, alignment)
 	vm_offset_t size;
 	vm_offset_t low;
 	vm_offset_t high;
 	vm_offset_t alignment;
 {
 	return ((vm_offset_t)contigmalloc(size, M_DEVBUF, M_NOWAIT, low, high,
 					  alignment, 0ul));
 }
 #ifdef DDB
 void
 DDB_print_page_info(void)
 {
 	printf("cnt.v_free_count: %d\n", cnt.v_free_count);
 	printf("cnt.v_cache_count: %d\n", cnt.v_cache_count);
 	printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count);
 	printf("cnt.v_active_count: %d\n", cnt.v_active_count);
 	printf("cnt.v_wire_count: %d\n", cnt.v_wire_count);
 	printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved);
 	printf("cnt.v_free_min: %d\n", cnt.v_free_min);
 	printf("cnt.v_free_target: %d\n", cnt.v_free_target);
 	printf("cnt.v_cache_min: %d\n", cnt.v_cache_min);
 	printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target);
 }
 #endif
Index: head/sys/vm/vm_page.h
===================================================================
--- head/sys/vm/vm_page.h	(revision 17333)
+++ head/sys/vm/vm_page.h	(revision 17334)
@@ -1,334 +1,334 @@
 /*
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_page.h	8.2 (Berkeley) 12/13/93
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
- * $Id: vm_page.h,v 1.29 1996/06/26 05:39:25 dyson Exp $
+ * $Id: vm_page.h,v 1.30 1996/07/27 03:24:06 dyson Exp $
  */
 
 /*
  *	Resident memory system definitions.
  */
 
 #ifndef	_VM_PAGE_
 #define	_VM_PAGE_
 
 #include <vm/pmap.h>
 /*
  *	Management of resident (logical) pages.
  *
  *	A small structure is kept for each resident
  *	page, indexed by page number.  Each structure
  *	is an element of several lists:
  *
  *		A hash table bucket used to quickly
  *		perform object/offset lookups
  *
  *		A list of all pages for a given object,
  *		so they can be quickly deactivated at
  *		time of deallocation.
  *
  *		An ordered list of pages due for pageout.
  *
  *	In addition, the structure contains the object
  *	and offset to which this page belongs (for pageout),
  *	and sundry status bits.
  *
  *	Fields in this structure are locked either by the lock on the
  *	object that the page belongs to (O) or by the lock on the page
  *	queues (P).
  */
 
 TAILQ_HEAD(pglist, vm_page);
 
 struct vm_page {
 	TAILQ_ENTRY(vm_page) pageq;	/* queue info for FIFO queue or free list (P) */
 	TAILQ_ENTRY(vm_page) hashq;	/* hash table links (O) */
 	TAILQ_ENTRY(vm_page) listq;	/* pages in same object (O) */
 
 	vm_object_t object;		/* which object am I in (O,P) */
 	vm_pindex_t pindex;		/* offset into object (O,P) */
 	vm_offset_t phys_addr;		/* physical address of page */
 	u_short	queue:4,		/* page queue index */
 		flags:12;		/* see below */
 	u_short wire_count;		/* wired down maps refs (P) */
 	short hold_count;		/* page hold count */
 	u_char	act_count;		/* page usage count */
 	u_char	busy;			/* page busy count */
 	/* NOTE that these must support one bit per DEV_BSIZE in a page!!! */
 	/* so, on normal X86 kernels, they must be at least 8 bits wide */
 	u_char	valid;			/* map of valid DEV_BSIZE chunks */
 	u_char	dirty;			/* map of dirty DEV_BSIZE chunks */
 };
 
 #define PQ_NONE 0
 #define PQ_FREE	1
 #define PQ_ZERO 2
 #define PQ_INACTIVE 3
 #define PQ_ACTIVE 4
 #define PQ_CACHE 5
 
 /*
  * These are the flags defined for vm_page.
  *
  * Note: PG_FILLED and PG_DIRTY are added for the filesystems.
  */
 #define	PG_BUSY		0x01		/* page is in transit (O) */
 #define	PG_WANTED	0x02		/* someone is waiting for page (O) */
 #define	PG_TABLED	0x04		/* page is in VP table (O) */
 #define	PG_FICTITIOUS	0x08		/* physical page doesn't exist (O) */
 #define	PG_WRITEABLE	0x10		/* page is mapped writeable */
 #define PG_MAPPED	0x20		/* page is mapped */
 #define	PG_ZERO		0x40		/* page is zeroed */
 #define PG_REFERENCED	0x80		/* page has been referenced */
 #define PG_CLEANCHK	0x100		/* page has been checked for cleaning */
 
 /*
  * Misc constants.
  */
 
 #define ACT_DECLINE		1
 #define ACT_ADVANCE		3
 #define ACT_INIT		5
 #define ACT_MAX			32
 #define PFCLUSTER_BEHIND	3
 #define PFCLUSTER_AHEAD		3
 
 #ifdef KERNEL
 /*
  * Each pageable resident page falls into one of four lists:
  *
  *	free
  *		Available for allocation now.
  *
  * The following are all LRU sorted:
  *
  *	cache
  *		Almost available for allocation. Still in an
  *		object, but clean and immediately freeable at
  *		non-interrupt times.
  *
  *	inactive
  *		Low activity, candidates for reclamation.
  *		This is the list of pages that should be
  *		paged out next.
  *
  *	active
  *		Pages that are "active" i.e. they have been
  *		recently referenced.
  *
  *	zero
  *		Pages that are really free and have been pre-zeroed
  *
  */
 
 extern struct pglist vm_page_queue_free;	/* memory free queue */
 extern struct pglist vm_page_queue_zero;	/* zeroed memory free queue */
 extern struct pglist vm_page_queue_active;	/* active memory queue */
 extern struct pglist vm_page_queue_inactive;	/* inactive memory queue */
 extern struct pglist vm_page_queue_cache;	/* cache memory queue */
 
 extern int vm_page_zero_count;
 
 extern vm_page_t vm_page_array;		/* First resident page in table */
 extern long first_page;			/* first physical page number */
 
  /* ... represented in vm_page_array */
 extern long last_page;			/* last physical page number */
 
  /* ... represented in vm_page_array */
  /* [INCLUSIVE] */
 extern vm_offset_t first_phys_addr;	/* physical address for first_page */
 extern vm_offset_t last_phys_addr;	/* physical address for last_page */
 
 #define VM_PAGE_TO_PHYS(entry)	((entry)->phys_addr)
 
 #define IS_VM_PHYSADDR(pa) \
 		((pa) >= first_phys_addr && (pa) <= last_phys_addr)
 
 #define PHYS_TO_VM_PAGE(pa) \
 		(&vm_page_array[atop(pa) - first_page ])
 
 /*
  *	Functions implemented as macros
  */
 
 #define PAGE_ASSERT_WAIT(m, interruptible)	{ \
 				(m)->flags |= PG_WANTED; \
 				assert_wait((int) (m), (interruptible)); \
 			}
 
 #define PAGE_WAKEUP(m)	{ \
 				(m)->flags &= ~PG_BUSY; \
 				if ((m)->flags & PG_WANTED) { \
 					(m)->flags &= ~PG_WANTED; \
-					(m)->flags |= PG_REFERENCED; \
 					wakeup((caddr_t) (m)); \
 				} \
 			}
 
 #if PAGE_SIZE == 4096
 #define VM_PAGE_BITS_ALL 0xff
 #endif
 
 #if PAGE_SIZE == 8192
 #define VM_PAGE_BITS_ALL 0xffff
 #endif
 
 #define VM_ALLOC_NORMAL 0
 #define VM_ALLOC_INTERRUPT 1
 #define VM_ALLOC_SYSTEM 2
 #define	VM_ALLOC_ZERO	3
 
 void vm_page_activate __P((vm_page_t));
 vm_page_t vm_page_alloc __P((vm_object_t, vm_pindex_t, int));
 void vm_page_cache __P((register vm_page_t));
 static __inline void vm_page_copy __P((vm_page_t, vm_page_t));
 void vm_page_deactivate __P((vm_page_t));
 void vm_page_free __P((vm_page_t));
 void vm_page_free_zero __P((vm_page_t));
 void vm_page_insert __P((vm_page_t, vm_object_t, vm_pindex_t));
 vm_page_t vm_page_lookup __P((vm_object_t, vm_pindex_t));
 void vm_page_remove __P((vm_page_t));
 void vm_page_rename __P((vm_page_t, vm_object_t, vm_pindex_t));
 vm_offset_t vm_page_startup __P((vm_offset_t, vm_offset_t, vm_offset_t));
 void vm_page_unwire __P((vm_page_t));
 void vm_page_wire __P((vm_page_t));
-void vm_page_unqueue __P((vm_page_t, int));
+void vm_page_unqueue __P((vm_page_t));
+void vm_page_unqueue_nowakeup __P((vm_page_t));
 void vm_page_set_validclean __P((vm_page_t, int, int));
 void vm_page_set_invalid __P((vm_page_t, int, int));
 static __inline boolean_t vm_page_zero_fill __P((vm_page_t));
 int vm_page_is_valid __P((vm_page_t, int, int));
 void vm_page_test_dirty __P((vm_page_t));
 int vm_page_bits __P((int, int));
 
 /*
  * Keep page from being freed by the page daemon
  * much of the same effect as wiring, except much lower
  * overhead and should be used only for *very* temporary
  * holding ("wiring").
  */
 static __inline void
 vm_page_hold(vm_page_t mem)
 {
 	mem->hold_count++;
 }
 
 #ifdef DIAGNOSTIC
 #include <sys/systm.h>		/* make GCC shut up */
 #endif
 
 static __inline void
 vm_page_unhold(vm_page_t mem)
 {
 #ifdef DIAGNOSTIC
 	if (--mem->hold_count < 0)
 		panic("vm_page_unhold: hold count < 0!!!");
 #else
 	--mem->hold_count;
 #endif
 }
 
 static __inline void
 vm_page_protect(vm_page_t mem, int prot)
 {
 	if (prot == VM_PROT_NONE) {
 		if (mem->flags & (PG_WRITEABLE|PG_MAPPED)) {
-			pmap_page_protect(mem, prot);
+			pmap_page_protect(VM_PAGE_TO_PHYS(mem), prot);
 			mem->flags &= ~(PG_WRITEABLE|PG_MAPPED);
 		}
 	} else if ((prot == VM_PROT_READ) && (mem->flags & PG_WRITEABLE)) {
-		pmap_page_protect(mem, prot);
+		pmap_page_protect(VM_PAGE_TO_PHYS(mem), prot);
 		mem->flags &= ~PG_WRITEABLE;
 	}
 }
 
 /*
  *	vm_page_zero_fill:
  *
  *	Zero-fill the specified page.
  *	Written as a standard pagein routine, to
  *	be used by the zero-fill object.
  */
 static __inline boolean_t
 vm_page_zero_fill(m)
 	vm_page_t m;
 {
 	pmap_zero_page(VM_PAGE_TO_PHYS(m));
 	return (TRUE);
 }
 
 /*
  *	vm_page_copy:
  *
  *	Copy one page to another
  */
 static __inline void
 vm_page_copy(src_m, dest_m)
 	vm_page_t src_m;
 	vm_page_t dest_m;
 {
 	pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m));
 	dest_m->valid = VM_PAGE_BITS_ALL;
 }
 
 #endif				/* KERNEL */
 #endif				/* !_VM_PAGE_ */
Index: head/sys/vm/vm_pageout.c
===================================================================
--- head/sys/vm/vm_pageout.c	(revision 17333)
+++ head/sys/vm/vm_pageout.c	(revision 17334)
@@ -1,1100 +1,1102 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_pageout.c	7.4 (Berkeley) 5/7/91
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
- * $Id: vm_pageout.c,v 1.81 1996/07/08 02:25:53 dyson Exp $
+ * $Id: vm_pageout.c,v 1.83 1996/07/27 03:24:08 dyson Exp $
  */
 
 /*
  *	The proverbial page-out daemon.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/lock.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_pager.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_extern.h>
 
 /*
  * System initialization
  */
 
 /* the kernel process "vm_pageout"*/
 static void vm_pageout __P((void));
 static int vm_pageout_clean __P((vm_page_t, int));
 static int vm_pageout_scan __P((void));
 static int vm_pageout_free_page_calc __P((vm_size_t count));
 struct proc *pageproc;
 
 static struct kproc_desc page_kp = {
 	"pagedaemon",
 	vm_pageout,
 	&pageproc
 };
 SYSINIT_KT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start, &page_kp)
 
 #if !defined(NO_SWAPPING)
 /* the kernel process "vm_daemon"*/
 static void vm_daemon __P((void));
 static struct	proc *vmproc;
 
 static struct kproc_desc vm_kp = {
 	"vmdaemon",
 	vm_daemon,
 	&vmproc
 };
 SYSINIT_KT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp)
 #endif
 
 
 int vm_pages_needed;		/* Event on which pageout daemon sleeps */
 
 int vm_pageout_pages_needed;	/* flag saying that the pageout daemon needs pages */
 
 extern int npendingio;
 #if !defined(NO_SWAPPING)
 static int vm_pageout_req_swapout;	/* XXX */
 static int vm_daemon_needed;
 #endif
 extern int nswiodone;
 extern int vm_swap_size;
 extern int vfs_update_wakeup;
 int vm_pageout_algorithm_lru=0;
 #if defined(NO_SWAPPING)
 int vm_swapping_enabled=0;
 #else
 int vm_swapping_enabled=1;
 #endif
 
 SYSCTL_INT(_vm, VM_PAGEOUT_ALGORITHM, pageout_algorithm,
 	CTLFLAG_RW, &vm_pageout_algorithm_lru, 0, "");
 
 #if defined(NO_SWAPPING)
 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swapping_enabled,
 	CTLFLAG_RD, &vm_swapping_enabled, 0, "");
 #else
 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swapping_enabled,
 	CTLFLAG_RW, &vm_swapping_enabled, 0, "");
 #endif
 
 #define MAXLAUNDER (cnt.v_page_count > 1800 ? 32 : 16)
 
 #define VM_PAGEOUT_PAGE_COUNT 16
 int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
 
 int vm_page_max_wired;		/* XXX max # of wired pages system-wide */
 
 #if !defined(NO_SWAPPING)
 typedef void freeer_fcn_t __P((vm_map_t, vm_object_t, vm_pindex_t, int));
 static void vm_pageout_map_deactivate_pages __P((vm_map_t, vm_pindex_t));
 static freeer_fcn_t vm_pageout_object_deactivate_pages;
 static void vm_req_vmdaemon __P((void));
 #endif
 
 /*
  * vm_pageout_clean:
  *
  * Clean the page and remove it from the laundry.
  * 
  * We set the busy bit to cause potential page faults on this page to
  * block.
  * 
  * And we set pageout-in-progress to keep the object from disappearing
  * during pageout.  This guarantees that the page won't move from the
  * inactive queue.  (However, any other page on the inactive queue may
  * move!)
  */
 static int
 vm_pageout_clean(m, sync)
 	vm_page_t m;
 	int sync;
 {
 	register vm_object_t object;
 	vm_page_t mc[2*vm_pageout_page_count];
 	int pageout_count;
 	int i, forward_okay, backward_okay, page_base;
 	vm_pindex_t pindex = m->pindex;
 
 	object = m->object;
 
 	/*
 	 * If not OBJT_SWAP, additional memory may be needed to do the pageout.
 	 * Try to avoid the deadlock.
 	 */
 	if ((sync != VM_PAGEOUT_FORCE) &&
 	    (object->type == OBJT_DEFAULT) &&
 	    ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min))
 		return 0;
 
 	/*
 	 * Don't mess with the page if it's busy.
 	 */
 	if ((!sync && m->hold_count != 0) ||
 	    ((m->busy != 0) || (m->flags & PG_BUSY)))
 		return 0;
 
 	/*
 	 * Try collapsing before it's too late.
 	 */
 	if (!sync && object->backing_object) {
 		vm_object_collapse(object);
 	}
 	mc[vm_pageout_page_count] = m;
 	pageout_count = 1;
 	page_base = vm_pageout_page_count;
 	forward_okay = TRUE;
 	if (pindex != 0)
 		backward_okay = TRUE;
 	else
 		backward_okay = FALSE;
 	/*
 	 * Scan object for clusterable pages.
 	 *
 	 * We can cluster ONLY if: ->> the page is NOT
 	 * clean, wired, busy, held, or mapped into a
 	 * buffer, and one of the following:
 	 * 1) The page is inactive, or a seldom used
 	 *    active page.
 	 * -or-
 	 * 2) we force the issue.
 	 */
 	for (i = 1; (i < vm_pageout_page_count) && (forward_okay || backward_okay); i++) {
 		vm_page_t p;
 
 		/*
 		 * See if forward page is clusterable.
 		 */
 		if (forward_okay) {
 			/*
 			 * Stop forward scan at end of object.
 			 */
 			if ((pindex + i) > object->size) {
 				forward_okay = FALSE;
 				goto do_backward;
 			}
 			p = vm_page_lookup(object, pindex + i);
 			if (p) {
 				if ((p->queue == PQ_CACHE) || (p->flags & PG_BUSY) || p->busy) {
 					forward_okay = FALSE;
 					goto do_backward;
 				}
 				vm_page_test_dirty(p);
 				if ((p->dirty & p->valid) != 0 &&
 				    ((p->queue == PQ_INACTIVE) ||
 				     (sync == VM_PAGEOUT_FORCE)) &&
 				    (p->wire_count == 0) &&
 				    (p->hold_count == 0)) {
 					mc[vm_pageout_page_count + i] = p;
 					pageout_count++;
 					if (pageout_count == vm_pageout_page_count)
 						break;
 				} else {
 					forward_okay = FALSE;
 				}
 			} else {
 				forward_okay = FALSE;
 			}
 		}
 do_backward:
 		/*
 		 * See if backward page is clusterable.
 		 */
 		if (backward_okay) {
 			/*
 			 * Stop backward scan at beginning of object.
 			 */
 			if ((pindex - i) == 0) {
 				backward_okay = FALSE;
 			}
 			p = vm_page_lookup(object, pindex - i);
 			if (p) {
 				if ((p->queue == PQ_CACHE) || (p->flags & PG_BUSY) || p->busy) {
 					backward_okay = FALSE;
 					continue;
 				}
 				vm_page_test_dirty(p);
 				if ((p->dirty & p->valid) != 0 &&
 				    ((p->queue == PQ_INACTIVE) ||
 				     (sync == VM_PAGEOUT_FORCE)) &&
 				    (p->wire_count == 0) &&
 				    (p->hold_count == 0)) {
 					mc[vm_pageout_page_count - i] = p;
 					pageout_count++;
 					page_base--;
 					if (pageout_count == vm_pageout_page_count)
 						break;
 				} else {
 					backward_okay = FALSE;
 				}
 			} else {
 				backward_okay = FALSE;
 			}
 		}
 	}
 
+	/*
+	 * we allow reads during pageouts...
+	 */
 	for (i = page_base; i < (page_base + pageout_count); i++) {
 		mc[i]->flags |= PG_BUSY;
-		vm_page_protect(mc[i], VM_PROT_NONE);
+		vm_page_protect(mc[i], VM_PROT_READ);
 	}
 
 	return vm_pageout_flush(&mc[page_base], pageout_count, sync);
 }
 
 int
 vm_pageout_flush(mc, count, sync)
 	vm_page_t *mc;
 	int count;
 	int sync;
 {
 	register vm_object_t object;
 	int pageout_status[count];
 	int anyok = 0;
 	int i;
 
 	object = mc[0]->object;
 	object->paging_in_progress += count;
 
 	vm_pager_put_pages(object, mc, count,
 	    ((sync || (object == kernel_object)) ? TRUE : FALSE),
 	    pageout_status);
 
 	for (i = 0; i < count; i++) {
 		vm_page_t mt = mc[i];
 
 		switch (pageout_status[i]) {
 		case VM_PAGER_OK:
 			++anyok;
 			break;
 		case VM_PAGER_PEND:
 			++anyok;
 			break;
 		case VM_PAGER_BAD:
 			/*
 			 * Page outside of range of object. Right now we
 			 * essentially lose the changes by pretending it
 			 * worked.
 			 */
-			pmap_tc_modified(mt);
+			pmap_clear_modify(VM_PAGE_TO_PHYS(mt));
 			mt->dirty = 0;
 			break;
 		case VM_PAGER_ERROR:
 		case VM_PAGER_FAIL:
 			/*
 			 * If page couldn't be paged out, then reactivate the
 			 * page so it doesn't clog the inactive list.  (We
 			 * will try paging out it again later).
 			 */
 			if (mt->queue == PQ_INACTIVE)
 				vm_page_activate(mt);
 			break;
 		case VM_PAGER_AGAIN:
 			break;
 		}
 
 
 		/*
 		 * If the operation is still going, leave the page busy to
 		 * block all other accesses. Also, leave the paging in
 		 * progress indicator set so that we don't attempt an object
 		 * collapse.
 		 */
 		if (pageout_status[i] != VM_PAGER_PEND) {
 			vm_object_pip_wakeup(object);
 			PAGE_WAKEUP(mt);
 		}
 	}
 	return anyok;
 }
 
 #if !defined(NO_SWAPPING)
 /*
  *	vm_pageout_object_deactivate_pages
  *
  *	deactivate enough pages to satisfy the inactive target
  *	requirements or if vm_page_proc_limit is set, then
  *	deactivate all of the pages in the object and its
  *	backing_objects.
  *
  *	The object and map must be locked.
  */
 static void
 vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only)
 	vm_map_t map;
 	vm_object_t object;
 	vm_pindex_t desired;
 	int map_remove_only;
 {
 	register vm_page_t p, next;
 	int rcount;
 	int remove_mode;
 	int s;
 
 	if (object->type == OBJT_DEVICE)
 		return;
 
 	while (object) {
 		if (vm_map_pmap(map)->pm_stats.resident_count <= desired)
 			return;
 		if (object->paging_in_progress)
 			return;
 
 		remove_mode = map_remove_only;
 		if (object->shadow_count > 1)
 			remove_mode = 1;
 	/*
 	 * scan the objects entire memory queue
 	 */
 		rcount = object->resident_page_count;
 		p = TAILQ_FIRST(&object->memq);
 		while (p && (rcount-- > 0)) {
 			int refcount;
 			if (vm_map_pmap(map)->pm_stats.resident_count <= desired)
 				return;
 			next = TAILQ_NEXT(p, listq);
 			cnt.v_pdpages++;
 			if (p->wire_count != 0 ||
 			    p->hold_count != 0 ||
 			    p->busy != 0 ||
 			    (p->flags & PG_BUSY) ||
 			    !pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) {
 				p = next;
 				continue;
 			}
 
-			refcount = pmap_tc_referenced(VM_PAGE_TO_PHYS(p));
+			refcount = pmap_ts_referenced(VM_PAGE_TO_PHYS(p));
 			if (refcount) {
 				p->flags |= PG_REFERENCED;
 			} else if (p->flags & PG_REFERENCED) {
 				refcount = 1;
 			}
 
 			if ((p->queue != PQ_ACTIVE) &&
 				(p->flags & PG_REFERENCED)) {
 				vm_page_activate(p);
 				p->act_count += refcount;
 				p->flags &= ~PG_REFERENCED;
 			} else if (p->queue == PQ_ACTIVE) {
 				if ((p->flags & PG_REFERENCED) == 0) {
 					p->act_count -= min(p->act_count, ACT_DECLINE);
 					if (!remove_mode && (vm_pageout_algorithm_lru || (p->act_count == 0))) {
 						vm_page_protect(p, VM_PROT_NONE);
 						vm_page_deactivate(p);
 					} else {
 						s = splvm();
 						TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
 						TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
 						splx(s);
 					}
 				} else {
 					p->flags &= ~PG_REFERENCED;
 					if (p->act_count < (ACT_MAX - ACT_ADVANCE))
 						p->act_count += ACT_ADVANCE;
 					s = splvm();
 					TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
 					TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
 					splx(s);
 				}
 			} else if (p->queue == PQ_INACTIVE) {
 				vm_page_protect(p, VM_PROT_NONE);
 			}
 			p = next;
 		}
 		object = object->backing_object;
 	}
 	return;
 }
 
 /*
  * deactivate some number of pages in a map, try to do it fairly, but
  * that is really hard to do.
  */
 static void
 vm_pageout_map_deactivate_pages(map, desired)
 	vm_map_t map;
 	vm_pindex_t desired;
 {
 	vm_map_entry_t tmpe;
 	vm_object_t obj, bigobj;
 
 	vm_map_reference(map);
 	if (!lock_try_write(&map->lock)) {
 		vm_map_deallocate(map);
 		return;
 	}
 
 	bigobj = NULL;
 
 	/*
 	 * first, search out the biggest object, and try to free pages from
 	 * that.
 	 */
 	tmpe = map->header.next;
 	while (tmpe != &map->header) {
 		if ((tmpe->is_sub_map == 0) && (tmpe->is_a_map == 0)) {
 			obj = tmpe->object.vm_object;
 			if ((obj != NULL) && (obj->shadow_count <= 1) &&
 				((bigobj == NULL) ||
 				 (bigobj->resident_page_count < obj->resident_page_count))) {
 				bigobj = obj;
 			}
 		}
 		tmpe = tmpe->next;
 	}
 
 	if (bigobj)
 		vm_pageout_object_deactivate_pages(map, bigobj, desired, 0);
 
 	/*
 	 * Next, hunt around for other pages to deactivate.  We actually
 	 * do this search sort of wrong -- .text first is not the best idea.
 	 */
 	tmpe = map->header.next;
 	while (tmpe != &map->header) {
 		if (vm_map_pmap(map)->pm_stats.resident_count <= desired)
 			break;
 		if ((tmpe->is_sub_map == 0) && (tmpe->is_a_map == 0)) {
 			obj = tmpe->object.vm_object;
 			if (obj)
 				vm_pageout_object_deactivate_pages(map, obj, desired, 0);
 		}
 		tmpe = tmpe->next;
 	};
 
 	/*
 	 * Remove all mappings if a process is swapped out, this will free page
 	 * table pages.
 	 */
 	if (desired == 0)
 		pmap_remove(vm_map_pmap(map),
 			VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
 	vm_map_unlock(map);
 	vm_map_deallocate(map);
 	return;
 }
 #endif
 
 /*
  *	vm_pageout_scan does the dirty work for the pageout daemon.
  */
 static int
 vm_pageout_scan()
 {
 	vm_page_t m, next;
 	int page_shortage, addl_page_shortage, maxscan, maxlaunder, pcount;
 	int pages_freed;
 	struct proc *p, *bigproc;
 	vm_offset_t size, bigsize;
 	vm_object_t object;
 	int force_wakeup = 0;
 	int vnodes_skipped = 0;
 	int s;
 
 	/*
 	 * Start scanning the inactive queue for pages we can free. We keep
 	 * scanning until we have enough free pages or we have scanned through
 	 * the entire queue.  If we encounter dirty pages, we start cleaning
 	 * them.
 	 */
 
 	pages_freed = 0;
 	addl_page_shortage = 0;
 
 	maxlaunder = (cnt.v_inactive_target > MAXLAUNDER) ?
 	    MAXLAUNDER : cnt.v_inactive_target;
-
+rescan0:
 	maxscan = cnt.v_inactive_count;
 	for( m = TAILQ_FIRST(&vm_page_queue_inactive);
 
 		(m != NULL) && (maxscan-- > 0) &&
 			((cnt.v_cache_count + cnt.v_free_count) <
 			(cnt.v_cache_min + cnt.v_free_target));
 
 		m = next) {
 
 		cnt.v_pdpages++;
 
 		if (m->queue != PQ_INACTIVE) {
-			break;
+			goto rescan0;
 		}
 
 		next = TAILQ_NEXT(m, pageq);
 
 		if (m->hold_count) {
 			s = splvm();
 			TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
 			TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
 			splx(s);
 			addl_page_shortage++;
 			continue;
 		}
 		/*
 		 * Dont mess with busy pages, keep in the front of the
 		 * queue, most likely are being paged out.
 		 */
 		if (m->busy || (m->flags & PG_BUSY)) {
 			addl_page_shortage++;
 			continue;
 		}
 
-		if (m->valid != 0) {
-			if (m->object->ref_count == 0) {
-				m->flags &= ~PG_REFERENCED;
-				pmap_tc_referenced(VM_PAGE_TO_PHYS(m));
-			} else if (((m->flags & PG_REFERENCED) == 0) &&
-				pmap_tc_referenced(VM_PAGE_TO_PHYS(m))) {
-				vm_page_activate(m);
-				continue;
-			}
+		if (m->object->ref_count == 0) {
+			m->flags &= ~PG_REFERENCED;
+			pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+		} else if (((m->flags & PG_REFERENCED) == 0) &&
+			pmap_ts_referenced(VM_PAGE_TO_PHYS(m))) {
+			vm_page_activate(m);
+			continue;
+		}
 
-			if ((m->flags & PG_REFERENCED) != 0) {
-				m->flags &= ~PG_REFERENCED;
-				pmap_tc_referenced(VM_PAGE_TO_PHYS(m));
-				vm_page_activate(m);
-				continue;
-			}
-			if (m->dirty == 0) {
-				vm_page_test_dirty(m);
-			} else if (m->dirty != 0) {
-				m->dirty = VM_PAGE_BITS_ALL;
-			}
-		} 
+		if ((m->flags & PG_REFERENCED) != 0) {
+			m->flags &= ~PG_REFERENCED;
+			pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+			vm_page_activate(m);
+			continue;
+		}
 
+		if (m->dirty == 0) {
+			vm_page_test_dirty(m);
+		} else if (m->dirty != 0) {
+			m->dirty = VM_PAGE_BITS_ALL;
+		}
+
 		if (m->valid == 0) {
 			vm_page_protect(m, VM_PROT_NONE);
 			vm_page_free(m);
-			++cnt.v_dfree;
+			cnt.v_dfree++;
 			++pages_freed;
 		} else if (m->dirty == 0) {
 			vm_page_cache(m);
 			++pages_freed;
 		} else if (maxlaunder > 0) {
 			int written;
 			struct vnode *vp = NULL;
 
 			object = m->object;
 			if (object->flags & OBJ_DEAD) {
 				s = splvm();
 				TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
 				TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
 				splx(s);
 				continue;
 			}
 
 			if (object->type == OBJT_VNODE) {
 				vp = object->handle;
 				if (VOP_ISLOCKED(vp) || vget(vp, 1)) {
 					if ((m->queue == PQ_INACTIVE) &&
 						(m->hold_count == 0) &&
 						(m->busy == 0) &&
 						(m->flags & PG_BUSY) == 0) {
 						s = splvm();
 						TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
 						TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
 						splx(s);
 					}
 					if (object->flags & OBJ_MIGHTBEDIRTY)
 						++vnodes_skipped;
 					continue;
 				}
 
 				/*
 				 * The page might have been moved to another queue
 				 * during potential blocking in vget() above.
 				 */
 				if (m->queue != PQ_INACTIVE) {
 					if (object->flags & OBJ_MIGHTBEDIRTY)
 						++vnodes_skipped;
 					vput(vp);
 					continue;
 				}
 	
 				/*
 				 * The page may have been busied during the blocking in
 				 * vput();  We don't move the page back onto the end of
 				 * the queue so that statistics are more correct if we don't.
 				 */
 				if (m->busy || (m->flags & PG_BUSY)) {
 					vput(vp);
 					continue;
 				}
 
 				/*
 				 * If the page has become held, then skip it
 				 */
 				if (m->hold_count) {
 					s = splvm();
 					TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
 					TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
 					splx(s);
 					if (object->flags & OBJ_MIGHTBEDIRTY)
 						++vnodes_skipped;
 					vput(vp);
 					continue;
 				}
 			}
 
 			/*
 			 * If a page is dirty, then it is either being washed
 			 * (but not yet cleaned) or it is still in the
 			 * laundry.  If it is still in the laundry, then we
 			 * start the cleaning operation.
 			 */
 			written = vm_pageout_clean(m, 0);
 
 			if (vp)
 				vput(vp);
 
 			maxlaunder -= written;
 		}
 	}
 
 	/*
 	 * Compute the page shortage.  If we are still very low on memory be
 	 * sure that we will move a minimal amount of pages from active to
 	 * inactive.
 	 */
 
 	page_shortage = (cnt.v_inactive_target + cnt.v_cache_min) -
 	    (cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count);
 	if (page_shortage <= 0) {
 		if (pages_freed == 0) {
 			page_shortage = cnt.v_free_min - cnt.v_free_count;
 		} else {
 			page_shortage = 1;
 		}
 	}
 	if (addl_page_shortage) {
 		if (page_shortage < 0)
 			page_shortage = 0;
 		page_shortage += addl_page_shortage;
 	}
 
 	pcount = cnt.v_active_count;
 	m = TAILQ_FIRST(&vm_page_queue_active);
 	while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) {
 		int refcount;
 
 		if (m->queue != PQ_ACTIVE) {
 			break;
 		}
 
 		next = TAILQ_NEXT(m, pageq);
 		/*
 		 * Don't deactivate pages that are busy.
 		 */
 		if ((m->busy != 0) ||
 		    (m->flags & PG_BUSY) ||
 		    (m->hold_count != 0)) {
 			s = splvm();
 			TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
 			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
 			splx(s);
 			m = next;
 			continue;
 		}
 
 		/*
 		 * The count for pagedaemon pages is done after checking the
 		 * page for eligbility...
 		 */
 		cnt.v_pdpages++;
 
 		refcount = 0;
 		if (m->object->ref_count != 0) {
 			if (m->flags & PG_REFERENCED) {
 				refcount += 1;
 			}
-			refcount += pmap_tc_referenced(VM_PAGE_TO_PHYS(m));
+			refcount += pmap_ts_referenced(VM_PAGE_TO_PHYS(m));
 			if (refcount) {
 				m->act_count += ACT_ADVANCE + refcount;
 				if (m->act_count > ACT_MAX)
 					m->act_count = ACT_MAX;
 			}
 		}
 
 		m->flags &= ~PG_REFERENCED;
 
 		if (refcount && (m->object->ref_count != 0)) {
 			s = splvm();
 			TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
 			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
 			splx(s);
 		} else {
 			m->act_count -= min(m->act_count, ACT_DECLINE);
 			if (vm_pageout_algorithm_lru ||
 				(m->object->ref_count == 0) || (m->act_count == 0)) {
 				--page_shortage;
 				vm_page_protect(m, VM_PROT_NONE);
 				if ((m->dirty == 0) &&
 					(m->object->ref_count == 0)) {
 					vm_page_cache(m);
 				} else {
 					vm_page_deactivate(m);
 				}
 			} else {
 				s = splvm();
 				TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
 				TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
 				splx(s);
 			}
 		}
 		m = next;
 	}
 
 	s = splvm();
 	/*
 	 * We try to maintain some *really* free pages, this allows interrupt
 	 * code to be guaranteed space.
 	 */
 	while (cnt.v_free_count < cnt.v_free_reserved) {
 		m = TAILQ_FIRST(&vm_page_queue_cache);
 		if (!m)
 			break;
 		vm_page_free(m);
 		cnt.v_dfree++;
 	}
 	splx(s);
 
 	/*
 	 * If we didn't get enough free pages, and we have skipped a vnode
 	 * in a writeable object, wakeup the sync daemon.  And kick swapout
 	 * if we did not get enough free pages.
 	 */
 	if ((cnt.v_cache_count + cnt.v_free_count) <
 		(cnt.v_free_target + cnt.v_cache_min) ) {
 		if (vnodes_skipped &&
 		    (cnt.v_cache_count + cnt.v_free_count) < cnt.v_free_min) {
 			if (!vfs_update_wakeup) {
 				vfs_update_wakeup = 1;
 				wakeup(&vfs_update_wakeup);
 			}
 		}
 #if !defined(NO_SWAPPING)
 		if (vm_swapping_enabled &&
 			(cnt.v_free_count + cnt.v_cache_count < cnt.v_free_target)) {
 			vm_req_vmdaemon();
 			vm_pageout_req_swapout = 1;
 		}
 #endif
 	}
 
 
 	/*
 	 * make sure that we have swap space -- if we are low on memory and
 	 * swap -- then kill the biggest process.
 	 */
 	if ((vm_swap_size == 0 || swap_pager_full) &&
 	    ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min)) {
 		bigproc = NULL;
 		bigsize = 0;
 		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
 			/*
 			 * if this is a system process, skip it
 			 */
 			if ((p->p_flag & P_SYSTEM) || (p->p_pid == 1) ||
 			    ((p->p_pid < 48) && (vm_swap_size != 0))) {
 				continue;
 			}
 			/*
 			 * if the process is in a non-running type state,
 			 * don't touch it.
 			 */
 			if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
 				continue;
 			}
 			/*
 			 * get the process size
 			 */
 			size = p->p_vmspace->vm_pmap.pm_stats.resident_count;
 			/*
 			 * if the this process is bigger than the biggest one
 			 * remember it.
 			 */
 			if (size > bigsize) {
 				bigproc = p;
 				bigsize = size;
 			}
 		}
 		if (bigproc != NULL) {
 			killproc(bigproc, "out of swap space");
 			bigproc->p_estcpu = 0;
 			bigproc->p_nice = PRIO_MIN;
 			resetpriority(bigproc);
 			wakeup(&cnt.v_free_count);
 		}
 	}
 	return force_wakeup;
 }
 
 static int
 vm_pageout_free_page_calc(count)
 vm_size_t count;
 {
 	if (count < cnt.v_page_count)
 		 return 0;
 	/*
 	 * free_reserved needs to include enough for the largest swap pager
 	 * structures plus enough for any pv_entry structs when paging.
 	 */
 	if (cnt.v_page_count > 1024)
 		cnt.v_free_min = 4 + (cnt.v_page_count - 1024) / 200;
 	else
 		cnt.v_free_min = 4;
 	cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
 		cnt.v_interrupt_free_min;
 	cnt.v_free_reserved = vm_pageout_page_count +
 		cnt.v_pageout_free_min + (count / 768);
 	cnt.v_free_min += cnt.v_free_reserved;
 	return 1;
 }
 
 
 #ifdef unused
 int
 vm_pageout_free_pages(object, add)
 vm_object_t object;
 int add;
 {
 	return vm_pageout_free_page_calc(object->size);
 }
 #endif
 
 /*
  *	vm_pageout is the high level pageout daemon.
  */
 static void
 vm_pageout()
 {
 	(void) spl0();
 
 	/*
 	 * Initialize some paging parameters.
 	 */
 
 	cnt.v_interrupt_free_min = 2;
 	if (cnt.v_page_count < 2000)
 		vm_pageout_page_count = 8;
 
 	vm_pageout_free_page_calc(cnt.v_page_count);
 	/*
 	 * free_reserved needs to include enough for the largest swap pager
 	 * structures plus enough for any pv_entry structs when paging.
 	 */
 	cnt.v_free_target = 3 * cnt.v_free_min + cnt.v_free_reserved;
 
 	if (cnt.v_free_count > 1024) {
 		cnt.v_cache_max = (cnt.v_free_count - 1024) / 2;
 		cnt.v_cache_min = (cnt.v_free_count - 1024) / 8;
 		cnt.v_inactive_target = 2*cnt.v_cache_min + 192;
 	} else {
 		cnt.v_cache_min = 0;
 		cnt.v_cache_max = 0;
 		cnt.v_inactive_target = cnt.v_free_count / 4;
 	}
 
 	/* XXX does not really belong here */
 	if (vm_page_max_wired == 0)
 		vm_page_max_wired = cnt.v_free_count / 3;
 
 
 	swap_pager_swap_init();
 	/*
 	 * The pageout daemon is never done, so loop forever.
 	 */
 	while (TRUE) {
 		int inactive_target;
 		int s = splvm();
 		if (!vm_pages_needed ||
 			((cnt.v_free_count + cnt.v_cache_count) > cnt.v_free_min)) {
 			vm_pages_needed = 0;
 			tsleep(&vm_pages_needed, PVM, "psleep", 0);
 		} else if (!vm_pages_needed) {
 			tsleep(&vm_pages_needed, PVM, "psleep", hz/10);
 		}
 		inactive_target =
 			(cnt.v_page_count - cnt.v_wire_count) / 4;
 		if (inactive_target < 2*cnt.v_free_min)
 			inactive_target = 2*cnt.v_free_min;
 		cnt.v_inactive_target = inactive_target;
 		if (vm_pages_needed)
 			cnt.v_pdwakeups++;
 		vm_pages_needed = 0;
 		splx(s);
 		vm_pager_sync();
 		vm_pageout_scan();
 		vm_pager_sync();
 		wakeup(&cnt.v_free_count);
 	}
 }
 
 #if !defined(NO_SWAPPING)
 static void
 vm_req_vmdaemon()
 {
 	static int lastrun = 0;
 
 	if ((ticks > (lastrun + hz)) || (ticks < lastrun)) {
 		wakeup(&vm_daemon_needed);
 		lastrun = ticks;
 	}
 }
 
 static void
 vm_daemon()
 {
 	vm_object_t object;
 	struct proc *p;
 
 	(void) spl0();
 
 	while (TRUE) {
 		tsleep(&vm_daemon_needed, PUSER, "psleep", 0);
 		if (vm_pageout_req_swapout) {
 			swapout_procs();
 			vm_pageout_req_swapout = 0;
 		}
 		/*
 		 * scan the processes for exceeding their rlimits or if
 		 * process is swapped out -- deactivate pages
 		 */
 
 		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
 			quad_t limit;
 			vm_offset_t size;
 
 			/*
 			 * if this is a system process or if we have already
 			 * looked at this process, skip it.
 			 */
 			if (p->p_flag & (P_SYSTEM | P_WEXIT)) {
 				continue;
 			}
 			/*
 			 * if the process is in a non-running type state,
 			 * don't touch it.
 			 */
 			if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
 				continue;
 			}
 			/*
 			 * get a limit
 			 */
 			limit = qmin(p->p_rlimit[RLIMIT_RSS].rlim_cur,
 			    p->p_rlimit[RLIMIT_RSS].rlim_max);
 
 			/*
 			 * let processes that are swapped out really be
 			 * swapped out set the limit to nothing (will force a
 			 * swap-out.)
 			 */
 			if ((p->p_flag & P_INMEM) == 0)
 				limit = 0;	/* XXX */
 
 			size = p->p_vmspace->vm_pmap.pm_stats.resident_count * PAGE_SIZE;
 			if (limit >= 0 && size >= limit) {
 				vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map,
 				    (vm_pindex_t)(limit >> PAGE_SHIFT) );
 			}
 		}
 
 		/*
 		 * we remove cached objects that have no RSS...
 		 */
 restart:
 		object = TAILQ_FIRST(&vm_object_cached_list);
 		while (object) {
 			/*
 			 * if there are no resident pages -- get rid of the object
 			 */
 			if (object->resident_page_count == 0) {
 				vm_object_reference(object);
 				pager_cache(object, FALSE);
 				goto restart;
 			}
 			object = TAILQ_NEXT(object, cached_list);
 		}
 	}
 }
 #endif
Index: head/sys/vm/vnode_pager.c
===================================================================
--- head/sys/vm/vnode_pager.c	(revision 17333)
+++ head/sys/vm/vnode_pager.c	(revision 17334)
@@ -1,964 +1,964 @@
 /*
  * Copyright (c) 1990 University of Utah.
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1993, 1994 John S. Dyson
  * Copyright (c) 1995, David Greenman
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vnode_pager.c	7.5 (Berkeley) 4/20/91
- *	$Id: vnode_pager.c,v 1.60 1996/05/03 21:01:54 phk Exp $
+ *	$Id: vnode_pager.c,v 1.61 1996/07/27 03:24:10 dyson Exp $
  */
 
 /*
  * Page to/from files (vnodes).
  */
 
 /*
  * TODO:
  *	Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will
  *	greatly re-simplify the vnode_pager.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/vnode.h>
 #include <sys/uio.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/vm_extern.h>
 
 static vm_offset_t vnode_pager_addr __P((struct vnode *vp, vm_ooffset_t address,
 					 int *run));
 static void vnode_pager_iodone __P((struct buf *bp));
 static int vnode_pager_input_smlfs __P((vm_object_t object, vm_page_t m));
 static int vnode_pager_input_old __P((vm_object_t object, vm_page_t m));
 static void vnode_pager_dealloc __P((vm_object_t));
 static int vnode_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
 static int vnode_pager_putpages __P((vm_object_t, vm_page_t *, int, boolean_t, int *));
 static boolean_t vnode_pager_haspage __P((vm_object_t, vm_pindex_t, int *, int *));
 
 struct pagerops vnodepagerops = {
 	NULL,
 	vnode_pager_alloc,
 	vnode_pager_dealloc,
 	vnode_pager_getpages,
 	vnode_pager_putpages,
 	vnode_pager_haspage,
 	NULL
 };
 
 static int vnode_pager_leaf_getpages __P((vm_object_t object, vm_page_t *m,
 					  int count, int reqpage));
 static int vnode_pager_leaf_putpages __P((vm_object_t object, vm_page_t *m,
 					  int count, boolean_t sync,
 					  int *rtvals));
 
 /*
  * Allocate (or lookup) pager for a vnode.
  * Handle is a vnode pointer.
  */
 vm_object_t
 vnode_pager_alloc(handle, size, prot, offset)
 	void *handle;
 	vm_size_t size;
 	vm_prot_t prot;
 	vm_ooffset_t offset;
 {
 	vm_object_t object;
 	struct vnode *vp;
 
 	/*
 	 * Pageout to vnode, no can do yet.
 	 */
 	if (handle == NULL)
 		return (NULL);
 
 	vp = (struct vnode *) handle;
 
 	/*
 	 * Prevent race condition when allocating the object. This
 	 * can happen with NFS vnodes since the nfsnode isn't locked.
 	 */
 	while (vp->v_flag & VOLOCK) {
 		vp->v_flag |= VOWANT;
 		tsleep(vp, PVM, "vnpobj", 0);
 	}
 	vp->v_flag |= VOLOCK;
 
 	/*
 	 * If the object is being terminated, wait for it to
 	 * go away.
 	 */
 	while (((object = vp->v_object) != NULL) &&
 		(object->flags & OBJ_DEAD)) {
 		tsleep(object, PVM, "vadead", 0);
 	}
 
 	if (object == NULL) {
 		/*
 		 * And an object of the appropriate size
 		 */
 		object = vm_object_allocate(OBJT_VNODE, size);
 		if (vp->v_type == VREG)
 			object->flags = OBJ_CANPERSIST;
 		else
 			object->flags = 0;
 
 		/*
 		 * Hold a reference to the vnode and initialize object data.
 		 */
 		VREF(vp);
 		object->un_pager.vnp.vnp_size = (vm_ooffset_t) size * PAGE_SIZE;
 
 		object->handle = handle;
 		vp->v_object = object;
 	} else {
 		/*
 		 * vm_object_reference() will remove the object from the cache if
 		 * found and gain a reference to the object.
 		 */
 		vm_object_reference(object);
 	}
 
 	if (vp->v_type == VREG)
 		vp->v_flag |= VVMIO;
 
 	vp->v_flag &= ~VOLOCK;
 	if (vp->v_flag & VOWANT) {
 		vp->v_flag &= ~VOWANT;
 		wakeup(vp);
 	}
 	return (object);
 }
 
 static void
 vnode_pager_dealloc(object)
 	vm_object_t object;
 {
 	register struct vnode *vp = object->handle;
 
 	if (vp == NULL)
 		panic("vnode_pager_dealloc: pager already dealloced");
 
 	if (object->paging_in_progress) {
 		int s = splbio();
 		while (object->paging_in_progress) {
 			object->flags |= OBJ_PIPWNT;
 			tsleep(object, PVM, "vnpdea", 0);
 		}
 		splx(s);
 	}
 
 	object->handle = NULL;
 
 	vp->v_object = NULL;
 	vp->v_flag &= ~(VTEXT | VVMIO);
 	vp->v_flag |= VAGE;
 	vrele(vp);
 }
 
 static boolean_t
 vnode_pager_haspage(object, pindex, before, after)
 	vm_object_t object;
 	vm_pindex_t pindex;
 	int *before;
 	int *after;
 {
 	struct vnode *vp = object->handle;
 	daddr_t bn;
 	int err;
 	daddr_t reqblock;
 	int poff;
 	int bsize;
 	int pagesperblock, blocksperpage;
 
 	/*
 	 * If filesystem no longer mounted or offset beyond end of file we do
 	 * not have the page.
 	 */
 	if ((vp->v_mount == NULL) ||
 		(IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size))
 		return FALSE;
 
 	bsize = vp->v_mount->mnt_stat.f_iosize;
 	pagesperblock = bsize / PAGE_SIZE;
 	blocksperpage = 0;
 	if (pagesperblock > 0) {
 		reqblock = pindex / pagesperblock;
 	} else {
 		blocksperpage = (PAGE_SIZE / bsize);
 		reqblock = pindex * blocksperpage;
 	}
 	err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn,
 		after, before);
 	if (err)
 		return TRUE;
 	if ( bn == -1)
 		return FALSE;
 	if (pagesperblock > 0) {
 		poff = pindex - (reqblock * pagesperblock);
 		if (before) {
 			*before *= pagesperblock;
 			*before += poff;
 		}
 		if (after) {
 			int numafter;
 			*after *= pagesperblock;
 			numafter = pagesperblock - (poff + 1);
 			if (IDX_TO_OFF(pindex + numafter) > object->un_pager.vnp.vnp_size) {
 				numafter = OFF_TO_IDX((object->un_pager.vnp.vnp_size - IDX_TO_OFF(pindex)));
 			}
 			*after += numafter;
 		}
 	} else {
 		if (before) {
 			*before /= blocksperpage;
 		}
 
 		if (after) {
 			*after /= blocksperpage;
 		}
 	}
 	return TRUE;
 }
 
 /*
  * Lets the VM system know about a change in size for a file.
  * We adjust our own internal size and flush any cached pages in
  * the associated object that are affected by the size change.
  *
  * Note: this routine may be invoked as a result of a pager put
  * operation (possibly at object termination time), so we must be careful.
  */
 void
 vnode_pager_setsize(vp, nsize)
 	struct vnode *vp;
 	vm_ooffset_t nsize;
 {
 	vm_object_t object = vp->v_object;
 
 	if (object == NULL)
 		return;
 
 	/*
 	 * Hasn't changed size
 	 */
 	if (nsize == object->un_pager.vnp.vnp_size)
 		return;
 
 	/*
 	 * File has shrunk. Toss any cached pages beyond the new EOF.
 	 */
 	if (nsize < object->un_pager.vnp.vnp_size) {
 		vm_ooffset_t nsizerounded;
 		nsizerounded = IDX_TO_OFF(OFF_TO_IDX(nsize + PAGE_MASK));
 		if (nsizerounded < object->un_pager.vnp.vnp_size) {
 			vm_object_page_remove(object,
 				OFF_TO_IDX(nsize + PAGE_MASK),
 				OFF_TO_IDX(object->un_pager.vnp.vnp_size),
 				FALSE);
 		}
 		/*
 		 * this gets rid of garbage at the end of a page that is now
 		 * only partially backed by the vnode...
 		 */
 		if (nsize & PAGE_MASK) {
 			vm_offset_t kva;
 			vm_page_t m;
 
 			m = vm_page_lookup(object, OFF_TO_IDX(nsize));
 			if (m) {
 				kva = vm_pager_map_page(m);
 				bzero((caddr_t) kva + (nsize & PAGE_MASK),
 				    (int) (round_page(nsize) - nsize));
 				vm_pager_unmap_page(kva);
 			}
 		}
 	}
 	object->un_pager.vnp.vnp_size = nsize;
 	object->size = OFF_TO_IDX(nsize + PAGE_MASK);
 }
 
 void
 vnode_pager_umount(mp)
 	register struct mount *mp;
 {
 	struct vnode *vp, *nvp;
 
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 		/*
 		 * Vnode can be reclaimed by getnewvnode() while we
 		 * traverse the list.
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 
 		/*
 		 * Save the next pointer now since uncaching may terminate the
 		 * object and render vnode invalid
 		 */
 		nvp = vp->v_mntvnodes.le_next;
 
 		if (vp->v_object != NULL) {
 			VOP_LOCK(vp);
 			vnode_pager_uncache(vp);
 			VOP_UNLOCK(vp);
 		}
 	}
 }
 
 /*
  * Remove vnode associated object from the object cache.
  * This routine must be called with the vnode locked.
  *
  * XXX unlock the vnode.
  * We must do this since uncaching the object may result in its
  * destruction which may initiate paging activity which may necessitate
  * re-locking the vnode.
  */
 void
 vnode_pager_uncache(vp)
 	struct vnode *vp;
 {
 	vm_object_t object;
 
 	/*
 	 * Not a mapped vnode
 	 */
 	object = vp->v_object;
 	if (object == NULL)
 		return;
 
 	vm_object_reference(object);
 	VOP_UNLOCK(vp);
 	pager_cache(object, FALSE);
 	VOP_LOCK(vp);
 	return;
 }
 
 
 void
 vnode_pager_freepage(m)
 	vm_page_t m;
 {
 	PAGE_WAKEUP(m);
 	vm_page_free(m);
 }
 
 /*
  * calculate the linear (byte) disk address of specified virtual
  * file address
  */
 static vm_offset_t
 vnode_pager_addr(vp, address, run)
 	struct vnode *vp;
 	vm_ooffset_t address;
 	int *run;
 {
 	int rtaddress;
 	int bsize;
 	daddr_t block;
 	struct vnode *rtvp;
 	int err;
 	daddr_t vblock;
 	int voffset;
 
 	if ((int) address < 0)
 		return -1;
 
 	if (vp->v_mount == NULL)
 		return -1;
 
 	bsize = vp->v_mount->mnt_stat.f_iosize;
 	vblock = address / bsize;
 	voffset = address % bsize;
 
 	err = VOP_BMAP(vp, vblock, &rtvp, &block, run, NULL);
 
 	if (err || (block == -1))
 		rtaddress = -1;
 	else {
 		rtaddress = block + voffset / DEV_BSIZE;
 		if( run) {
 			*run += 1;
 			*run *= bsize/PAGE_SIZE;
 			*run -= voffset/PAGE_SIZE;
 		}
 	}
 
 	return rtaddress;
 }
 
 /*
  * interrupt routine for I/O completion
  */
 static void
 vnode_pager_iodone(bp)
 	struct buf *bp;
 {
 	bp->b_flags |= B_DONE;
 	wakeup(bp);
 }
 
 /*
  * small block file system vnode pager input
  */
 static int
 vnode_pager_input_smlfs(object, m)
 	vm_object_t object;
 	vm_page_t m;
 {
 	int i;
 	int s;
 	struct vnode *dp, *vp;
 	struct buf *bp;
 	vm_offset_t kva;
 	int fileaddr;
 	vm_offset_t bsize;
 	int error = 0;
 
 	vp = object->handle;
 	if (vp->v_mount == NULL)
 		return VM_PAGER_BAD;
 
 	bsize = vp->v_mount->mnt_stat.f_iosize;
 
 
 	VOP_BMAP(vp, 0, &dp, 0, NULL, NULL);
 
 	kva = vm_pager_map_page(m);
 
 	for (i = 0; i < PAGE_SIZE / bsize; i++) {
 
 		if ((vm_page_bits(IDX_TO_OFF(m->pindex) + i * bsize, bsize) & m->valid))
 			continue;
 
 		fileaddr = vnode_pager_addr(vp,
 			IDX_TO_OFF(m->pindex) + i * bsize, (int *)0);
 		if (fileaddr != -1) {
 			bp = getpbuf();
 
 			/* build a minimal buffer header */
 			bp->b_flags = B_BUSY | B_READ | B_CALL;
 			bp->b_iodone = vnode_pager_iodone;
 			bp->b_proc = curproc;
 			bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
 			if (bp->b_rcred != NOCRED)
 				crhold(bp->b_rcred);
 			if (bp->b_wcred != NOCRED)
 				crhold(bp->b_wcred);
 			bp->b_un.b_addr = (caddr_t) kva + i * bsize;
 			bp->b_blkno = fileaddr;
 			pbgetvp(dp, bp);
 			bp->b_bcount = bsize;
 			bp->b_bufsize = bsize;
 
 			/* do the input */
 			VOP_STRATEGY(bp);
 
 			/* we definitely need to be at splbio here */
 
 			s = splbio();
 			while ((bp->b_flags & B_DONE) == 0) {
 				tsleep(bp, PVM, "vnsrd", 0);
 			}
 			splx(s);
 			if ((bp->b_flags & B_ERROR) != 0)
 				error = EIO;
 
 			/*
 			 * free the buffer header back to the swap buffer pool
 			 */
 			relpbuf(bp);
 			if (error)
 				break;
 
 			vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
 		} else {
 			vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
 			bzero((caddr_t) kva + i * bsize, bsize);
 		}
 	}
 	vm_pager_unmap_page(kva);
-	pmap_tc_modified(m);
+	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
 	m->flags &= ~PG_ZERO;
 	if (error) {
 		return VM_PAGER_ERROR;
 	}
 	return VM_PAGER_OK;
 
 }
 
 
 /*
  * old style vnode pager output routine
  */
 static int
 vnode_pager_input_old(object, m)
 	vm_object_t object;
 	vm_page_t m;
 {
 	struct uio auio;
 	struct iovec aiov;
 	int error;
 	int size;
 	vm_offset_t kva;
 
 	error = 0;
 
 	/*
 	 * Return failure if beyond current EOF
 	 */
 	if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) {
 		return VM_PAGER_BAD;
 	} else {
 		size = PAGE_SIZE;
 		if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size)
 			size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex);
 
 		/*
 		 * Allocate a kernel virtual address and initialize so that
 		 * we can use VOP_READ/WRITE routines.
 		 */
 		kva = vm_pager_map_page(m);
 
 		aiov.iov_base = (caddr_t) kva;
 		aiov.iov_len = size;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = IDX_TO_OFF(m->pindex);
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_rw = UIO_READ;
 		auio.uio_resid = size;
 		auio.uio_procp = (struct proc *) 0;
 
 		error = VOP_READ(object->handle, &auio, 0, curproc->p_ucred);
 		if (!error) {
 			register int count = size - auio.uio_resid;
 
 			if (count == 0)
 				error = EINVAL;
 			else if (count != PAGE_SIZE)
 				bzero((caddr_t) kva + count, PAGE_SIZE - count);
 		}
 		vm_pager_unmap_page(kva);
 	}
-	pmap_tc_modified(m);
+	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
 	m->dirty = 0;
 	m->flags &= ~PG_ZERO;
 	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
 }
 
 /*
  * generic vnode pager input routine
  */
 
 static int
 vnode_pager_getpages(object, m, count, reqpage)
 	vm_object_t object;
 	vm_page_t *m;
 	int count;
 	int reqpage;
 {
 	int rtval;
 	struct vnode *vp;
 	vp = object->handle;
 	rtval = VOP_GETPAGES(vp, m, count*PAGE_SIZE, reqpage, 0);
 	if (rtval == EOPNOTSUPP)
 		return vnode_pager_leaf_getpages(object, m, count, reqpage);
 	else
 		return rtval;
 }
 
 static int
 vnode_pager_leaf_getpages(object, m, count, reqpage)
 	vm_object_t object;
 	vm_page_t *m;
 	int count;
 	int reqpage;
 {
 	vm_offset_t kva;
 	off_t foff;
 	int i, size, bsize, first, firstaddr;
 	struct vnode *dp, *vp;
 	int runpg;
 	int runend;
 	struct buf *bp;
 	int s;
 	int error = 0;
 
 	vp = object->handle;
 	if (vp->v_mount == NULL)
 		return VM_PAGER_BAD;
 
 	bsize = vp->v_mount->mnt_stat.f_iosize;
 
 	/* get the UNDERLYING device for the file with VOP_BMAP() */
 
 	/*
 	 * originally, we did not check for an error return value -- assuming
 	 * an fs always has a bmap entry point -- that assumption is wrong!!!
 	 */
 	foff = IDX_TO_OFF(m[reqpage]->pindex);
 
 	/*
 	 * if we can't bmap, use old VOP code
 	 */
 	if (VOP_BMAP(vp, 0, &dp, 0, NULL, NULL)) {
 		for (i = 0; i < count; i++) {
 			if (i != reqpage) {
 				vnode_pager_freepage(m[i]);
 			}
 		}
 		cnt.v_vnodein++;
 		cnt.v_vnodepgsin++;
 		return vnode_pager_input_old(object, m[reqpage]);
 
 		/*
 		 * if the blocksize is smaller than a page size, then use
 		 * special small filesystem code.  NFS sometimes has a small
 		 * blocksize, but it can handle large reads itself.
 		 */
 	} else if ((PAGE_SIZE / bsize) > 1 &&
 	    (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) {
 
 		for (i = 0; i < count; i++) {
 			if (i != reqpage) {
 				vnode_pager_freepage(m[i]);
 			}
 		}
 		cnt.v_vnodein++;
 		cnt.v_vnodepgsin++;
 		return vnode_pager_input_smlfs(object, m[reqpage]);
 	}
 	/*
 	 * if ANY DEV_BSIZE blocks are valid on a large filesystem block
 	 * then, the entire page is valid --
 	 */
 	if (m[reqpage]->valid) {
 		m[reqpage]->valid = VM_PAGE_BITS_ALL;
 		for (i = 0; i < count; i++) {
 			if (i != reqpage)
 				vnode_pager_freepage(m[i]);
 		}
 		return VM_PAGER_OK;
 	}
 
 	/*
 	 * here on direct device I/O
 	 */
 
 	firstaddr = -1;
 	/*
 	 * calculate the run that includes the required page
 	 */
 	for(first = 0, i = 0; i < count; i = runend) {
 		firstaddr = vnode_pager_addr(vp,
 			IDX_TO_OFF(m[i]->pindex), &runpg);
 		if (firstaddr == -1) {
 			if (i == reqpage && foff < object->un_pager.vnp.vnp_size) {
 				panic("vnode_pager_putpages: unexpected missing page: firstaddr: %d, foff: %ld, vnp_size: %d",
 			   	 firstaddr, foff, object->un_pager.vnp.vnp_size);
 			}
 			vnode_pager_freepage(m[i]);
 			runend = i + 1;
 			first = runend;
 			continue;
 		}
 		runend = i + runpg;
 		if (runend <= reqpage) {
 			int j;
 			for (j = i; j < runend; j++) {
 				vnode_pager_freepage(m[j]);
 			}
 		} else {
 			if (runpg < (count - first)) {
 				for (i = first + runpg; i < count; i++)
 					vnode_pager_freepage(m[i]);
 				count = first + runpg;
 			}
 			break;
 		}
 		first = runend;
 	}
 
 	/*
 	 * the first and last page have been calculated now, move input pages
 	 * to be zero based...
 	 */
 	if (first != 0) {
 		for (i = first; i < count; i++) {
 			m[i - first] = m[i];
 		}
 		count -= first;
 		reqpage -= first;
 	}
 
 	/*
 	 * calculate the file virtual address for the transfer
 	 */
 	foff = IDX_TO_OFF(m[0]->pindex);
 
 	/*
 	 * calculate the size of the transfer
 	 */
 	size = count * PAGE_SIZE;
 	if ((foff + size) > object->un_pager.vnp.vnp_size)
 		size = object->un_pager.vnp.vnp_size - foff;
 
 	/*
 	 * round up physical size for real devices
 	 */
 	if (dp->v_type == VBLK || dp->v_type == VCHR)
 		size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 
 	bp = getpbuf();
 	kva = (vm_offset_t) bp->b_data;
 
 	/*
 	 * and map the pages to be read into the kva
 	 */
 	pmap_qenter(kva, m, count);
 
 	/* build a minimal buffer header */
 	bp->b_flags = B_BUSY | B_READ | B_CALL;
 	bp->b_iodone = vnode_pager_iodone;
 	/* B_PHYS is not set, but it is nice to fill this in */
 	bp->b_proc = curproc;
 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
 	if (bp->b_rcred != NOCRED)
 		crhold(bp->b_rcred);
 	if (bp->b_wcred != NOCRED)
 		crhold(bp->b_wcred);
 	bp->b_blkno = firstaddr;
 	pbgetvp(dp, bp);
 	bp->b_bcount = size;
 	bp->b_bufsize = size;
 
 	cnt.v_vnodein++;
 	cnt.v_vnodepgsin += count;
 
 	/* do the input */
 	VOP_STRATEGY(bp);
 
 	s = splbio();
 	/* we definitely need to be at splbio here */
 
 	while ((bp->b_flags & B_DONE) == 0) {
 		tsleep(bp, PVM, "vnread", 0);
 	}
 	splx(s);
 	if ((bp->b_flags & B_ERROR) != 0)
 		error = EIO;
 
 	if (!error) {
 		if (size != count * PAGE_SIZE)
 			bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
 	}
 	pmap_qremove(kva, count);
 
 	/*
 	 * free the buffer header back to the swap buffer pool
 	 */
 	relpbuf(bp);
 
 	for (i = 0; i < count; i++) {
-		pmap_tc_modified(m[i]);
+		pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
 		m[i]->dirty = 0;
 		m[i]->valid = VM_PAGE_BITS_ALL;
 		m[i]->flags &= ~PG_ZERO;
 		if (i != reqpage) {
 
 			/*
 			 * whether or not to leave the page activated is up in
 			 * the air, but we should put the page on a page queue
 			 * somewhere. (it already is in the object). Result:
 			 * It appears that emperical results show that
 			 * deactivating pages is best.
 			 */
 
 			/*
 			 * just in case someone was asking for this page we
 			 * now tell them that it is ok to use
 			 */
 			if (!error) {
 				vm_page_deactivate(m[i]);
 				PAGE_WAKEUP(m[i]);
 			} else {
 				vnode_pager_freepage(m[i]);
 			}
 		}
 	}
 	if (error) {
 		printf("vnode_pager_getpages: I/O read error\n");
 	}
 	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
 }
 
 static int
 vnode_pager_putpages(object, m, count, sync, rtvals)
 	vm_object_t object;
 	vm_page_t *m;
 	int count;
 	boolean_t sync;
 	int *rtvals;
 {
 	int rtval;
 	struct vnode *vp;
 	vp = object->handle;
 	rtval = VOP_PUTPAGES(vp, m, count*PAGE_SIZE, sync, rtvals, 0);
 	if (rtval == EOPNOTSUPP)
 		return vnode_pager_leaf_putpages(object, m, count, sync, rtvals);
 	else
 		return rtval;
 }
 
 /*
  * generic vnode pager output routine
  */
 static int
 vnode_pager_leaf_putpages(object, m, count, sync, rtvals)
 	vm_object_t object;
 	vm_page_t *m;
 	int count;
 	boolean_t sync;
 	int *rtvals;
 {
 	int i;
 
 	struct vnode *vp;
 	int maxsize, ncount;
 	vm_ooffset_t poffset;
 	struct uio auio;
 	struct iovec aiov;
 	int error;
 
 	vp = object->handle;;
 	for (i = 0; i < count; i++)
 		rtvals[i] = VM_PAGER_AGAIN;
 
 	if ((int) m[0]->pindex < 0) {
 		printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%x(%x)\n", m[0]->pindex, m[0]->dirty);
 		rtvals[0] = VM_PAGER_BAD;
 		return VM_PAGER_BAD;
 	}
 
 	maxsize = count * PAGE_SIZE;
 	ncount = count;
 
 	poffset = IDX_TO_OFF(m[0]->pindex);
 	if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
 		if (object->un_pager.vnp.vnp_size > poffset)
 			maxsize = object->un_pager.vnp.vnp_size - poffset;
 		else
 			maxsize = 0;
 		ncount = btoc(maxsize);
 		if (ncount < count) {
 			for (i = ncount; i < count; i++) {
 				rtvals[i] = VM_PAGER_BAD;
 			}
 #ifdef BOGUS
 			if (ncount == 0) {
 				printf("vnode_pager_putpages: write past end of file: %d, %lu\n",
 					poffset,
 					(unsigned long) object->un_pager.vnp.vnp_size);
 				return rtvals[0];
 			}
 #endif
 		}
 	}
 
 	for (i = 0; i < count; i++) {
 		m[i]->busy++;
 		m[i]->flags &= ~PG_BUSY;
 	}
 
 	aiov.iov_base = (caddr_t) 0;
 	aiov.iov_len = maxsize;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_offset = poffset;
 	auio.uio_segflg = UIO_NOCOPY;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_resid = maxsize;
 	auio.uio_procp = (struct proc *) 0;
 	error = VOP_WRITE(vp, &auio, IO_VMIO|(sync?IO_SYNC:0), curproc->p_ucred);
 	cnt.v_vnodeout++;
 	cnt.v_vnodepgsout += ncount;
 
 	if (error) {
 		printf("vnode_pager_putpages: I/O error %d\n", error);
 	}
 	if (auio.uio_resid) {
 		printf("vnode_pager_putpages: residual I/O %d at %ld\n",
 			auio.uio_resid, m[0]->pindex);
 	}
 	for (i = 0; i < count; i++) {
 		m[i]->busy--;
 		if (i < ncount) {
 			rtvals[i] = VM_PAGER_OK;
 		}
 		if ((m[i]->busy == 0) && (m[i]->flags & PG_WANTED))
 			wakeup(m[i]);
 	}
 	return rtvals[0];
 }
 
 struct vnode *
 vnode_pager_lock(object)
 	vm_object_t object;
 {
 	for (; object != NULL; object = object->backing_object) {
 		if (object->type != OBJT_VNODE)
 			continue;
 
 		VOP_LOCK(object->handle);
 		return object->handle;
 	}
 	return NULL;
 }