Index: head/sys/amd64/amd64/pmap.c
===================================================================
--- head/sys/amd64/amd64/pmap.c	(revision 6806)
+++ head/sys/amd64/amd64/pmap.c	(revision 6807)
@@ -1,2121 +1,2124 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
- *	$Id: pmap.c,v 1.49 1995/02/15 04:36:31 davidg Exp $
+ *	$Id: pmap.c,v 1.50 1995/02/26 05:14:16 bde Exp $
  */
 
 /*
  * Derived from hp300 version by Mike Hibler, this version by William
  * Jolitz uses a recursive map [a pde points to the page directory] to
  * map the page tables using the pagetables themselves. This is done to
  * reduce the impact on kernel virtual memory for lots of sparse address
  * space, and to reduce the cost of memory to each process.
  *
  *	Derived from: hp300/@(#)pmap.c	7.1 (Berkeley) 12/5/90
  */
 /*
  * Major modifications by John S. Dyson primarily to support
  * pageable page tables, eliminating pmap_attributes,
  * discontiguous memory pages, and using more efficient string
  * instructions. Jan 13, 1994.  Further modifications on Mar 2, 1994,
  * general clean-up and efficiency mods.
  */
 
 /*
  *	Manages physical address maps.
  *
  *	In addition to hardware address maps, this
  *	module is called upon to provide software-use-only
  *	maps which may or may not be stored in the same
  *	form as hardware maps.  These pseudo-maps are
  *	used to store intermediate results from copy
  *	operations to and from address spaces.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 
 #include <machine/cputypes.h>
 
 #include <i386/isa/isa.h>
 
 /*
  * Allocate various and sundry SYSMAPs used in the days of old VM
  * and not yet converted.  XXX.
  */
 #define BSDVM_COMPAT	1
 
 /*
  * Get PDEs and PTEs for user/kernel address space
  */
 #define	pmap_pde(m, v)	(&((m)->pm_pdir[((vm_offset_t)(v) >> PD_SHIFT)&1023]))
 #define pdir_pde(m, v) (m[((vm_offset_t)(v) >> PD_SHIFT)&1023])
 
 #define pmap_pte_pa(pte)	(*(int *)(pte) & PG_FRAME)
 
 #define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
 #define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
 #define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
 #define pmap_pte_u(pte)		((*(int *)pte & PG_U) != 0)
 #define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
 
 #define pmap_pte_set_w(pte, v)		((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
 #define pmap_pte_set_prot(pte, v)	((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
 
 /*
  * Given a map and a machine independent protection code,
  * convert to a vax protection code.
  */
 #define pte_prot(m, p)	(protection_codes[p])
 int protection_codes[8];
 
 struct pmap kernel_pmap_store;
 pmap_t kernel_pmap;
 
 vm_offset_t phys_avail[6];	/* 2 entries + 1 null */
 vm_offset_t avail_start;	/* PA of first available physical page */
 vm_offset_t avail_end;		/* PA of last available physical page */
 vm_size_t mem_size;		/* memory size in bytes */
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 int i386pagesperpage;		/* PAGE_SIZE / I386_PAGE_SIZE */
 boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
 vm_offset_t vm_first_phys, vm_last_phys;
 
 static inline int pmap_is_managed();
 static inline void *vm_get_pmap();
 static inline void vm_put_pmap();
 static void i386_protection_init();
 static void pmap_alloc_pv_entry();
 static inline pv_entry_t get_pv_entry();
 int nkpt;
 
 
 extern vm_offset_t clean_sva, clean_eva;
 extern int cpu_class;
 
 #if BSDVM_COMPAT
 #include <sys/msgbuf.h>
 
 /*
  * All those kernel PT submaps that BSD is so fond of
  */
 pt_entry_t *CMAP1, *CMAP2, *ptmmap;
 caddr_t CADDR1, CADDR2, ptvmmap;
 pt_entry_t *msgbufmap;
 struct msgbuf *msgbufp;
 
 #endif
 
 void 
 init_pv_entries(int);
 
 /*
  *	Routine:	pmap_pte
  *	Function:
  *		Extract the page table entry associated
  *		with the given map/virtual_address pair.
  * [ what about induced faults -wfj]
  */
 
 inline pt_entry_t * const 
 pmap_pte(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 
 	if (pmap && *pmap_pde(pmap, va)) {
 		vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 
 		/* are we current address space or kernel? */
 		if ((pmap == kernel_pmap) || (frame == ((int) PTDpde & PG_FRAME)))
 			return ((pt_entry_t *) vtopte(va));
 		/* otherwise, we are alternate address space */
 		else {
 			if (frame != ((int) APTDpde & PG_FRAME)) {
 				APTDpde = pmap->pm_pdir[PTDPTDI];
 				pmap_update();
 			}
 			return ((pt_entry_t *) avtopte(va));
 		}
 	}
 	return (0);
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 
 vm_offset_t
 pmap_extract(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	vm_offset_t pa;
 
 	if (pmap && *pmap_pde(pmap, va)) {
 		vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 
 		/* are we current address space or kernel? */
 		if ((pmap == kernel_pmap)
 		    || (frame == ((int) PTDpde & PG_FRAME))) {
 			pa = *(int *) vtopte(va);
 			/* otherwise, we are alternate address space */
 		} else {
 			if (frame != ((int) APTDpde & PG_FRAME)) {
 				APTDpde = pmap->pm_pdir[PTDPTDI];
 				pmap_update();
 			}
 			pa = *(int *) avtopte(va);
 		}
 		return ((pa & PG_FRAME) | (va & ~PG_FRAME));
 	}
 	return 0;
 
 }
 
 /*
  * determine if a page is managed (memory vs. device)
  */
 static inline int
 pmap_is_managed(pa)
 	vm_offset_t pa;
 {
 	int i;
 
 	if (!pmap_initialized)
 		return 0;
 
 	for (i = 0; phys_avail[i + 1]; i += 2) {
 		if (pa >= phys_avail[i] && pa < phys_avail[i + 1])
 			return 1;
 	}
 	return 0;
 }
 
 /*
  * find the vm_page_t of a pte (only) given va of pte and pmap
  */
 __inline vm_page_t
 pmap_pte_vm_page(pmap, pt)
 	pmap_t pmap;
 	vm_offset_t pt;
 {
 	vm_page_t m;
 
 	pt = i386_trunc_page(pt);
 	pt = (pt - UPT_MIN_ADDRESS) / NBPG;
 	pt = ((vm_offset_t) pmap->pm_pdir[pt]) & PG_FRAME;
 	m = PHYS_TO_VM_PAGE(pt);
 	return m;
 }
 
 /*
  * Wire a page table page
  */
 __inline void
 pmap_use_pt(pmap, va)
 	pmap_t pmap;
 	vm_offset_t va;
 {
 	vm_offset_t pt;
 
 	if ((va >= UPT_MIN_ADDRESS) || !pmap_initialized)
 		return;
 
 	pt = (vm_offset_t) vtopte(va);
 	vm_page_hold(pmap_pte_vm_page(pmap, pt));
 }
 
 /*
  * Unwire a page table page
  */
 inline void
 pmap_unuse_pt(pmap, va)
 	pmap_t pmap;
 	vm_offset_t va;
 {
 	vm_offset_t pt;
 	vm_page_t m;
 
 	if ((va >= UPT_MIN_ADDRESS) || !pmap_initialized)
 		return;
 
 	pt = (vm_offset_t) vtopte(va);
 	m = pmap_pte_vm_page(pmap, pt);
 	vm_page_unhold(m);
 	if (pmap != kernel_pmap &&
 	    (m->hold_count == 0) &&
 	    (m->wire_count == 0) &&
 	    (va < KPT_MIN_ADDRESS)) {
 		pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
 		vm_page_free(m);
 	}
 }
 
 /* [ macro again?, should I force kstack into user map here? -wfj ] */
 void
 pmap_activate(pmap, pcbp)
 	register pmap_t pmap;
 	struct pcb *pcbp;
 {
 	PMAP_ACTIVATE(pmap, pcbp);
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  *	Map the kernel's code and data, and allocate the system page table.
  *
  *	On the I386 this is called after mapping has already been enabled
  *	and just syncs the pmap module with what has already been done.
  *	[We can't call it easily with mapping off since the kernel is not
  *	mapped with PA == VA, hence we would have to relocate every address
  *	from the linked base (virtual) address "KERNBASE" to the actual
  *	(physical) address starting relative to 0]
  */
 
 #define DMAPAGES 8
 void
 pmap_bootstrap(firstaddr, loadaddr)
 	vm_offset_t firstaddr;
 	vm_offset_t loadaddr;
 {
 #if BSDVM_COMPAT
 	vm_offset_t va;
 	pt_entry_t *pte;
 
 #endif
 
 	avail_start = firstaddr + DMAPAGES * NBPG;
 
 	virtual_avail = (vm_offset_t) KERNBASE + avail_start;
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 	i386pagesperpage = PAGE_SIZE / NBPG;
 
 	/*
 	 * Initialize protection array.
 	 */
 	i386_protection_init();
 
 	/*
 	 * The kernel's pmap is statically allocated so we don't have to use
 	 * pmap_create, which is unlikely to work correctly at this part of
 	 * the boot sequence.
 	 */
 	kernel_pmap = &kernel_pmap_store;
 
 	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + IdlePTD);
 
 	simple_lock_init(&kernel_pmap->pm_lock);
 	kernel_pmap->pm_count = 1;
 	nkpt = NKPT;
 
 #if BSDVM_COMPAT
 	/*
 	 * Allocate all the submaps we need
 	 */
 #define	SYSMAP(c, p, v, n)	\
 	v = (c)va; va += ((n)*NBPG); p = pte; pte += (n);
 
 	va = virtual_avail;
 	pte = pmap_pte(kernel_pmap, va);
 
 	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
 	    SYSMAP(caddr_t, CMAP2, CADDR2, 1)
 	    SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
 	    SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 1)
 	    virtual_avail = va;
 #endif
 	/*
 	 * Reserve special hunk of memory for use by bus dma as a bounce
 	 * buffer (contiguous virtual *and* physical memory).
 	 */
 	{
 		extern vm_offset_t isaphysmem;
 
 		isaphysmem = va;
 
 		virtual_avail = pmap_map(va, firstaddr,
 		    firstaddr + DMAPAGES * NBPG, VM_PROT_ALL);
 	}
 
 	*(int *) CMAP1 = *(int *) CMAP2 = *(int *) PTD = 0;
 	pmap_update();
 
 }
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  *	pmap_init has been enhanced to support in a fairly consistant
  *	way, discontiguous physical memory.
  */
 void
 pmap_init(phys_start, phys_end)
 	vm_offset_t phys_start, phys_end;
 {
 	vm_offset_t addr;
 	vm_size_t npg, s;
 	int i;
 
 	/*
 	 * Now that kernel map has been allocated, we can mark as unavailable
 	 * regions which we have mapped in locore.
 	 */
 	addr = atdevbase;
 	(void) vm_map_find(kernel_map, NULL, (vm_offset_t) 0,
 	    &addr, (0x100000 - 0xa0000), FALSE);
 
 	addr = (vm_offset_t) KERNBASE + IdlePTD;
 	vm_object_reference(kernel_object);
 	(void) vm_map_find(kernel_map, kernel_object, addr,
 	    &addr, (4 + NKPDE) * NBPG, FALSE);
 
 	/*
 	 * calculate the number of pv_entries needed
 	 */
 	vm_first_phys = phys_avail[0];
 	for (i = 0; phys_avail[i + 1]; i += 2);
 	npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / NBPG;
 
 	/*
 	 * Allocate memory for random pmap data structures.  Includes the
 	 * pv_head_table.
 	 */
 	s = (vm_size_t) (sizeof(struct pv_entry) * npg);
 	s = i386_round_page(s);
 	addr = (vm_offset_t) kmem_alloc(kernel_map, s);
 	pv_table = (pv_entry_t) addr;
 
 	/*
 	 * init the pv free list
 	 */
 	init_pv_entries(npg);
 	/*
 	 * Now it is safe to enable pv_table recording.
 	 */
 	pmap_initialized = TRUE;
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	For now, VM is already on, we only need to map the
  *	specified memory.
  */
 vm_offset_t
 pmap_map(virt, start, end, prot)
 	vm_offset_t virt;
 	vm_offset_t start;
 	vm_offset_t end;
 	int prot;
 {
 	while (start < end) {
 		pmap_enter(kernel_pmap, virt, start, prot, FALSE);
 		virt += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
 	return (virt);
 }
 
 /*
  *	Create and return a physical map.
  *
  *	If the size specified for the map
  *	is zero, the map is an actual physical
  *	map, and may be referenced by the
  *	hardware.
  *
  *	If the size specified is non-zero,
  *	the map will be used in software only, and
  *	is bounded by that size.
  *
  * [ just allocate a ptd and mark it uninitialize -- should we track
  *   with a table which process has which ptd? -wfj ]
  */
 
 pmap_t
 pmap_create(size)
 	vm_size_t size;
 {
 	register pmap_t pmap;
 
 	/*
 	 * Software use map does not need a pmap
 	 */
 	if (size)
 		return (NULL);
 
 	pmap = (pmap_t) malloc(sizeof *pmap, M_VMPMAP, M_WAITOK);
 	bzero(pmap, sizeof(*pmap));
 	pmap_pinit(pmap);
 	return (pmap);
 }
 
 
 struct pmaplist {
 	struct pmaplist *next;
 };
 
 static inline void *
 vm_get_pmap()
 {
 	struct pmaplist *rtval;
 
 	rtval = (struct pmaplist *) kmem_alloc(kernel_map, ctob(1));
 	bzero(rtval, ctob(1));
 	return rtval;
 }
 
 static inline void
 vm_put_pmap(up)
 	struct pmaplist *up;
 {
 	kmem_free(kernel_map, (vm_offset_t) up, ctob(1));
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 void
 pmap_pinit(pmap)
 	register struct pmap *pmap;
 {
 	/*
 	 * No need to allocate page table space yet but we do need a valid
 	 * page directory table.
 	 */
 	pmap->pm_pdir = (pd_entry_t *) vm_get_pmap();
 
 	/* wire in kernel global address entries */
 	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
 
 	/* install self-referential address mapping entry */
 	*(int *) (pmap->pm_pdir + PTDPTDI) =
 	    ((int) pmap_kextract((vm_offset_t) pmap->pm_pdir)) | PG_V | PG_KW;
 
 	pmap->pm_count = 1;
 	simple_lock_init(&pmap->pm_lock);
 }
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 
 vm_page_t nkpg;
 vm_offset_t kernel_vm_end;
 
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	struct proc *p;
 	struct pmap *pmap;
 	int s;
 
 	s = splhigh();
 	if (kernel_vm_end == 0) {
 		kernel_vm_end = KERNBASE;
 		nkpt = 0;
 		while (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1);
 			++nkpt;
 		}
 	}
 	addr = (addr + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1);
 	while (kernel_vm_end < addr) {
 		if (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1);
 			continue;
 		}
 		++nkpt;
 		if (!nkpg) {
 			nkpg = vm_page_alloc(kernel_object, 0, VM_ALLOC_SYSTEM);
 			if (!nkpg)
 				panic("pmap_growkernel: no memory to grow kernel");
 			vm_page_wire(nkpg);
 			vm_page_remove(nkpg);
 			pmap_zero_page(VM_PAGE_TO_PHYS(nkpg));
 		}
 		pdir_pde(PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_KW);
 		nkpg = NULL;
 
 		for (p = (struct proc *) allproc; p != NULL; p = p->p_next) {
 			if (p->p_vmspace) {
 				pmap = &p->p_vmspace->vm_pmap;
 				*pmap_pde(pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end);
 			}
 		}
 		*pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end);
 		kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1);
 	}
 	splx(s);
 }
 
 /*
  *	Retire the given physical map from service.
  *	Should only be called if the map contains
  *	no valid mappings.
  */
 void
 pmap_destroy(pmap)
 	register pmap_t pmap;
 {
 	int count;
 
 	if (pmap == NULL)
 		return;
 
 	simple_lock(&pmap->pm_lock);
 	count = --pmap->pm_count;
 	simple_unlock(&pmap->pm_lock);
 	if (count == 0) {
 		pmap_release(pmap);
 		free((caddr_t) pmap, M_VMPMAP);
 	}
 }
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap)
 	register struct pmap *pmap;
 {
 	vm_put_pmap((struct pmaplist *) pmap->pm_pdir);
 }
 
 /*
  *	Add a reference to the specified pmap.
  */
 void
 pmap_reference(pmap)
 	pmap_t pmap;
 {
 	if (pmap != NULL) {
 		simple_lock(&pmap->pm_lock);
 		pmap->pm_count++;
 		simple_unlock(&pmap->pm_lock);
 	}
 }
 
 #define PV_FREELIST_MIN ((NBPG / sizeof (struct pv_entry)) / 2)
 
 /*
  * Data for the pv entry allocation mechanism
  */
 int pv_freelistcnt;
 pv_entry_t pv_freelist;
 vm_offset_t pvva;
 int npvvapg;
 
 /*
  * free the pv_entry back to the free list
  */
 inline static void
 free_pv_entry(pv)
 	pv_entry_t pv;
 {
 	if (!pv)
 		return;
 	++pv_freelistcnt;
 	pv->pv_next = pv_freelist;
 	pv_freelist = pv;
 }
 
 /*
  * get a new pv_entry, allocating a block from the system
  * when needed.
  * the memory allocation is performed bypassing the malloc code
  * because of the possibility of allocations at interrupt time.
  */
 static inline pv_entry_t
 get_pv_entry()
 {
 	pv_entry_t tmp;
 
 	/*
 	 * get more pv_entry pages if needed
 	 */
 	if (pv_freelistcnt < PV_FREELIST_MIN || pv_freelist == 0) {
 		pmap_alloc_pv_entry();
 	}
 	/*
 	 * get a pv_entry off of the free list
 	 */
 	--pv_freelistcnt;
 	tmp = pv_freelist;
 	pv_freelist = tmp->pv_next;
 	return tmp;
 }
 
 /*
  * this *strange* allocation routine *statistically* eliminates the
  * *possibility* of a malloc failure (*FATAL*) for a pv_entry_t data structure.
  * also -- this code is MUCH MUCH faster than the malloc equiv...
  */
 static void
 pmap_alloc_pv_entry()
 {
 	/*
 	 * do we have any pre-allocated map-pages left?
 	 */
 	if (npvvapg) {
 		vm_page_t m;
 
 		/*
 		 * we do this to keep recursion away
 		 */
 		pv_freelistcnt += PV_FREELIST_MIN;
 		/*
 		 * allocate a physical page out of the vm system
 		 */
 		m = vm_page_alloc(kernel_object,
 		    pvva - vm_map_min(kernel_map), VM_ALLOC_INTERRUPT);
 		if (m) {
 			int newentries;
 			int i;
 			pv_entry_t entry;
 
 			newentries = (NBPG / sizeof(struct pv_entry));
 			/*
 			 * wire the page
 			 */
 			vm_page_wire(m);
 			m->flags &= ~PG_BUSY;
 			/*
 			 * let the kernel see it
 			 */
 			pmap_kenter(pvva, VM_PAGE_TO_PHYS(m));
 
 			entry = (pv_entry_t) pvva;
 			/*
 			 * update the allocation pointers
 			 */
 			pvva += NBPG;
 			--npvvapg;
 
 			/*
 			 * free the entries into the free list
 			 */
 			for (i = 0; i < newentries; i++) {
 				free_pv_entry(entry);
 				entry++;
 			}
 		}
 		pv_freelistcnt -= PV_FREELIST_MIN;
 	}
 	if (!pv_freelist)
 		panic("get_pv_entry: cannot get a pv_entry_t");
 }
 
 
 
 /*
  * init the pv_entry allocation system
  */
 #define PVSPERPAGE 64
 void
 init_pv_entries(npg)
 	int npg;
 {
 	/*
 	 * allocate enough kvm space for PVSPERPAGE entries per page (lots)
 	 * kvm space is fairly cheap, be generous!!!  (the system can panic if
 	 * this is too small.)
 	 */
 	npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry) + NBPG - 1) / NBPG;
 	pvva = kmem_alloc_pageable(kernel_map, npvvapg * NBPG);
 	/*
 	 * get the first batch of entries
 	 */
 	free_pv_entry(get_pv_entry());
 }
 
 static pt_entry_t *
 get_pt_entry(pmap)
 	pmap_t pmap;
 {
 	vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 
 	/* are we current address space or kernel? */
 	if (pmap == kernel_pmap || frame == ((int) PTDpde & PG_FRAME)) {
 		return PTmap;
 	}
 	/* otherwise, we are alternate address space */
 	if (frame != ((int) APTDpde & PG_FRAME)) {
 		APTDpde = pmap->pm_pdir[PTDPTDI];
 		pmap_update();
 	}
 	return APTmap;
 }
 
 /*
  * If it is the first entry on the list, it is actually
  * in the header and we must copy the following entry up
  * to the header.  Otherwise we must search the list for
  * the entry.  In either case we free the now unused entry.
  */
 void
 pmap_remove_entry(pmap, pv, va)
 	struct pmap *pmap;
 	pv_entry_t pv;
 	vm_offset_t va;
 {
 	pv_entry_t npv;
 	int s;
 
 	s = splhigh();
 	if (pmap == pv->pv_pmap && va == pv->pv_va) {
 		npv = pv->pv_next;
 		if (npv) {
 			*pv = *npv;
 			free_pv_entry(npv);
 		} else {
 			pv->pv_pmap = NULL;
 		}
 	} else {
 		for (npv = pv->pv_next; npv; npv = npv->pv_next) {
 			if (pmap == npv->pv_pmap && va == npv->pv_va) {
 				break;
 			}
 			pv = npv;
 		}
 		if (npv) {
 			pv->pv_next = npv->pv_next;
 			free_pv_entry(npv);
 		}
 	}
 	splx(s);
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap, sva, eva)
 	struct pmap *pmap;
 	register vm_offset_t sva;
 	register vm_offset_t eva;
 {
 	register pt_entry_t *ptp, *ptq;
 	vm_offset_t pa;
 	register pv_entry_t pv;
 	vm_offset_t va;
 	vm_page_t m;
 	pt_entry_t oldpte;
 
 	if (pmap == NULL)
 		return;
 
 	ptp = get_pt_entry(pmap);
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if ((sva + NBPG) == eva) {
 
 		if (*pmap_pde(pmap, sva) == 0)
 			return;
 
 		ptq = ptp + i386_btop(sva);
 
 		if (!*ptq)
 			return;
 		/*
 		 * Update statistics
 		 */
 		if (pmap_pte_w(ptq))
 			pmap->pm_stats.wired_count--;
 		pmap->pm_stats.resident_count--;
 
 		pa = pmap_pte_pa(ptq);
 		oldpte = *ptq;
 		*ptq = 0;
 
 		if (pmap_is_managed(pa)) {
 			if ((int) oldpte & PG_M) {
 				if ((sva < USRSTACK || sva >= KERNBASE) ||
 				    (sva >= USRSTACK && sva < USRSTACK + (UPAGES * NBPG))) {
 					if (sva < clean_sva || sva >= clean_eva) {
 						PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL;
 					}
 				}
 			}
 			pv = pa_to_pvh(pa);
 			pmap_remove_entry(pmap, pv, sva);
 		}
 		pmap_unuse_pt(pmap, sva);
 		pmap_update();
 		return;
 	}
 	sva = i386_btop(sva);
 	eva = i386_btop(eva);
 
 	while (sva < eva) {
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 
 		if (*pmap_pde(pmap, i386_ptob(sva)) == 0) {
 			/* We can race ahead here, straight to next pde.. */
 			sva = ((sva + NPTEPG) & ~(NPTEPG - 1));
 			continue;
 		}
 		ptq = ptp + sva;
 
 		/*
 		 * search for page table entries, use string operations that
 		 * are much faster than explicitly scanning when page tables
 		 * are not fully populated.
 		 */
 		if (*ptq == 0) {
 			vm_offset_t pdnxt = ((sva + NPTEPG) & ~(NPTEPG - 1));
 			vm_offset_t nscan = pdnxt - sva;
 			int found = 0;
 
 			if ((nscan + sva) > eva)
 				nscan = eva - sva;
 
 			asm("xorl %%eax,%%eax;cld;repe;scasl;jz 1f;incl %%eax;1:;" :
 			    "=D"(ptq), "=a"(found) : "c"(nscan), "0"(ptq) : "cx");
 
 			if (!found) {
 				sva = pdnxt;
 				continue;
 			}
 			ptq -= 1;
 
 			sva = ptq - ptp;
 		}
 		/*
 		 * Update statistics
 		 */
 		oldpte = *ptq;
 		if (((int) oldpte) & PG_W)
 			pmap->pm_stats.wired_count--;
 		pmap->pm_stats.resident_count--;
 
 		/*
 		 * Invalidate the PTEs. XXX: should cluster them up and
 		 * invalidate as many as possible at once.
 		 */
 		*ptq = 0;
 
 		va = i386_ptob(sva);
 
 		/*
 		 * Remove from the PV table (raise IPL since we may be called
 		 * at interrupt time).
 		 */
 		pa = ((int) oldpte) & PG_FRAME;
 		if (!pmap_is_managed(pa)) {
 			pmap_unuse_pt(pmap, va);
 			++sva;
 			continue;
 		}
 		if ((int) oldpte & PG_M) {
 			if ((va < USRSTACK || va >= KERNBASE) ||
 			    (va >= USRSTACK && va < USRSTACK + (UPAGES * NBPG))) {
 				if (va < clean_sva || va >= clean_eva) {
 					PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL;
 				}
 			}
 		}
 		pv = pa_to_pvh(pa);
 		pmap_remove_entry(pmap, pv, va);
 		pmap_unuse_pt(pmap, va);
 		++sva;
 	}
 	pmap_update();
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 void
 pmap_remove_all(pa)
 	vm_offset_t pa;
 {
 	register pv_entry_t pv, npv;
 	register pt_entry_t *pte, *ptp;
 	vm_offset_t va;
 	struct pmap *pmap;
 	vm_page_t m;
 	int s;
 	int anyvalid = 0;
 
 	/*
 	 * Not one of ours
 	 */
 	/*
 	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
 	 * pages!
 	 */
 	if (!pmap_is_managed(pa))
 		return;
 
 	pa = i386_trunc_page(pa);
 	pv = pa_to_pvh(pa);
 	m = PHYS_TO_VM_PAGE(pa);
 
 	s = splhigh();
 	while (pv->pv_pmap != NULL) {
 		pmap = pv->pv_pmap;
 		ptp = get_pt_entry(pmap);
 		va = pv->pv_va;
 		pte = ptp + i386_btop(va);
 		if (pmap_pte_w(pte))
 			pmap->pm_stats.wired_count--;
 		if (*pte) {
 			pmap->pm_stats.resident_count--;
 			anyvalid++;
 
 			/*
 			 * Update the vm_page_t clean and reference bits.
 			 */
 			if ((int) *pte & PG_M) {
 				if ((va < USRSTACK || va >= KERNBASE) ||
 				    (va >= USRSTACK && va < USRSTACK + (UPAGES * NBPG))) {
 					if (va < clean_sva || va >= clean_eva) {
 						PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL;
 					}
 				}
 			}
 			*pte = 0;
 			pmap_unuse_pt(pmap, va);
 		}
 		npv = pv->pv_next;
 		if (npv) {
 			*pv = *npv;
 			free_pv_entry(npv);
 		} else {
 			pv->pv_pmap = NULL;
 		}
 	}
 	splx(s);
 	if (anyvalid)
 		pmap_update();
 }
 
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap, sva, eva, prot)
 	register pmap_t pmap;
 	vm_offset_t sva, eva;
 	vm_prot_t prot;
 {
 	register pt_entry_t *pte;
 	register vm_offset_t va;
 	int i386prot;
 	register pt_entry_t *ptp;
 	int evap = i386_btop(eva);
 	int anyvalid = 0;;
 
 	if (pmap == NULL)
 		return;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 	if (prot & VM_PROT_WRITE)
 		return;
 
 	ptp = get_pt_entry(pmap);
 
 	va = sva;
 	while (va < eva) {
 		int found = 0;
 		int svap;
 		vm_offset_t nscan;
 
 		/*
 		 * Page table page is not allocated. Skip it, we don't want to
 		 * force allocation of unnecessary PTE pages just to set the
 		 * protection.
 		 */
 		if (!*pmap_pde(pmap, va)) {
 			/* XXX: avoid address wrap around */
 	nextpde:
 			if (va >= i386_trunc_pdr((vm_offset_t) - 1))
 				break;
 			va = i386_round_pdr(va + PAGE_SIZE);
 			continue;
 		}
 		pte = ptp + i386_btop(va);
 
 		if (*pte == 0) {
 			/*
 			 * scan for a non-empty pte
 			 */
 			svap = pte - ptp;
 			nscan = ((svap + NPTEPG) & ~(NPTEPG - 1)) - svap;
 
 			if (nscan + svap > evap)
 				nscan = evap - svap;
 
 			found = 0;
 			if (nscan)
 				asm("xorl %%eax,%%eax;cld;repe;scasl;jz 1f;incl %%eax;1:;" :
 				    "=D"(pte), "=a"(found) : "c"(nscan), "0"(pte) : "cx");
 
 			if (!found)
 				goto nextpde;
 
 			pte -= 1;
 			svap = pte - ptp;
 
 			va = i386_ptob(svap);
 		}
 		anyvalid++;
 
 		i386prot = pte_prot(pmap, prot);
 		if (va < UPT_MAX_ADDRESS) {
 			i386prot |= PG_u;
 			if (va >= UPT_MIN_ADDRESS)
 				i386prot |= PG_RW;
 		}
 		pmap_pte_set_prot(pte, i386prot);
 		va += PAGE_SIZE;
 	}
 	if (anyvalid)
 		pmap_update();
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 void
 pmap_enter(pmap, va, pa, prot, wired)
 	register pmap_t pmap;
 	vm_offset_t va;
 	register vm_offset_t pa;
 	vm_prot_t prot;
 	boolean_t wired;
 {
 	register pt_entry_t *pte;
 	register pt_entry_t npte;
 	vm_offset_t opa;
 	int ptevalid = 0;
 
 	if (pmap == NULL)
 		return;
 
 	va = i386_trunc_page(va);
 	pa = i386_trunc_page(pa);
 	if (va > VM_MAX_KERNEL_ADDRESS)
 		panic("pmap_enter: toobig");
 
 	/*
 	 * Page Directory table entry not valid, we need a new PT page
 	 */
 	if (*pmap_pde(pmap, va) == 0) {
 		printf("kernel page directory invalid pdir=0x%x, va=0x%x\n", pmap->pm_pdir[PTDPTDI], va);
 		panic("invalid kernel page directory");
 	}
 	pte = pmap_pte(pmap, va);
 	opa = pmap_pte_pa(pte);
 
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
 	if (opa == pa) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if (wired && !pmap_pte_w(pte))
 			pmap->pm_stats.wired_count++;
 		else if (!wired && pmap_pte_w(pte))
 			pmap->pm_stats.wired_count--;
 
 		goto validate;
 	}
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
 	if (opa) {
 		pmap_remove(pmap, va, va + PAGE_SIZE);
 	}
 	/*
 	 * Enter on the PV list if part of our managed memory Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if (pmap_is_managed(pa)) {
 		register pv_entry_t pv, npv;
 		int s;
 
 		pv = pa_to_pvh(pa);
 		s = splhigh();
 		/*
 		 * No entries yet, use header as the first entry
 		 */
 		if (pv->pv_pmap == NULL) {
 			pv->pv_va = va;
 			pv->pv_pmap = pmap;
 			pv->pv_next = NULL;
 		}
 		/*
 		 * There is at least one other VA mapping this page. Place
 		 * this entry after the header.
 		 */
 		else {
 			npv = get_pv_entry();
 			npv->pv_va = va;
 			npv->pv_pmap = pmap;
 			npv->pv_next = pv->pv_next;
 			pv->pv_next = npv;
 		}
 		splx(s);
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 	if (wired)
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	npte = (pt_entry_t) ((int) (pa | pte_prot(pmap, prot) | PG_V));
 
 	/*
 	 * When forking (copy-on-write, etc): A process will turn off write
 	 * permissions for any of its writable pages.  If the data (object) is
 	 * only referred to by one process, the processes map is modified
 	 * directly as opposed to using the object manipulation routine.  When
 	 * using pmap_protect, the modified bits are not kept in the vm_page_t
 	 * data structure.  Therefore, when using pmap_enter in vm_fault to
 	 * bring back writability of a page, there has been no memory of the
 	 * modified or referenced bits except at the pte level.  this clause
 	 * supports the carryover of the modified and used (referenced) bits.
 	 */
 	if (pa == opa)
 		(int) npte |= (int) *pte & (PG_M | PG_U);
 
 
 	if (wired)
 		(int) npte |= PG_W;
 	if (va < UPT_MIN_ADDRESS)
 		(int) npte |= PG_u;
 	else if (va < UPT_MAX_ADDRESS)
 		(int) npte |= PG_u | PG_RW;
 
 	if (*pte != npte) {
 		if (*pte)
 			ptevalid++;
 		*pte = npte;
 	}
 	if (ptevalid) {
 		pmap_update();
 	} else {
 		pmap_use_pt(pmap, va);
 	}
 }
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  */
 void
 pmap_qenter(va, m, count)
 	vm_offset_t va;
 	vm_page_t *m;
 	int count;
 {
 	int i;
 	int anyvalid = 0;
 	register pt_entry_t *pte;
 
 	for (i = 0; i < count; i++) {
 		pte = vtopte(va + i * NBPG);
 		if (*pte)
 			anyvalid++;
 		*pte = (pt_entry_t) ((int) (VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V | PG_W));
 	}
 	if (anyvalid)
 		pmap_update();
 }
 /*
  * this routine jerks page mappings from the
  * kernel -- it is meant only for temporary mappings.
  */
 void
 pmap_qremove(va, count)
 	vm_offset_t va;
 	int count;
 {
 	int i;
 	register pt_entry_t *pte;
 
 	for (i = 0; i < count; i++) {
 		pte = vtopte(va + i * NBPG);
 		*pte = 0;
 	}
 	pmap_update();
 }
 
 /*
  * add a wired page to the kva
  * note that in order for the mapping to take effect -- you
  * should do a pmap_update after doing the pmap_kenter...
  */
 void
 pmap_kenter(va, pa)
 	vm_offset_t va;
 	register vm_offset_t pa;
 {
 	register pt_entry_t *pte;
 	int wasvalid = 0;
 
 	pte = vtopte(va);
 
 	if (*pte)
 		wasvalid++;
 
 	*pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V | PG_W));
 
 	if (wasvalid)
 		pmap_update();
 }
 
 /*
  * remove a page from the kernel pagetables
  */
 void
 pmap_kremove(va)
 	vm_offset_t va;
 {
 	register pt_entry_t *pte;
 
 	pte = vtopte(va);
 
 	*pte = (pt_entry_t) 0;
 	pmap_update();
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * 5. Tlbflush is deferred to calling procedure.
  * 6. Page IS managed.
  * but is *MUCH* faster than pmap_enter...
  */
 
 static inline void
 pmap_enter_quick(pmap, va, pa)
 	register pmap_t pmap;
 	vm_offset_t va;
 	register vm_offset_t pa;
 {
 	register pt_entry_t *pte;
 	register pv_entry_t pv, npv;
 	int s;
 
 	/*
 	 * Enter on the PV list if part of our managed memory Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 
 	pte = vtopte(va);
 
 	/* a fault on the page table might occur here */
 	if (*pte) {
 		pmap_remove(pmap, va, va + PAGE_SIZE);
 	}
 	pv = pa_to_pvh(pa);
 	s = splhigh();
 	/*
 	 * No entries yet, use header as the first entry
 	 */
 	if (pv->pv_pmap == NULL) {
 		pv->pv_pmap = pmap;
 		pv->pv_va = va;
 		pv->pv_next = NULL;
 	}
 	/*
 	 * There is at least one other VA mapping this page. Place this entry
 	 * after the header.
 	 */
 	else {
 		npv = get_pv_entry();
 		npv->pv_va = va;
 		npv->pv_pmap = pmap;
 		npv->pv_next = pv->pv_next;
 		pv->pv_next = npv;
 	}
 	splx(s);
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	*pte = (pt_entry_t) ((int) (pa | PG_V | PG_u));
 
 	pmap_use_pt(pmap, va);
 
 	return;
 }
 
 #define MAX_INIT_PT (1024*2048)
 /*
  * pmap_object_init_pt preloads the ptes for a given object
  * into the specified pmap.  This eliminates the blast of soft
  * faults on process startup and immediately after an mmap.
  */
 void
 pmap_object_init_pt(pmap, addr, object, offset, size)
 	pmap_t pmap;
 	vm_offset_t addr;
 	vm_object_t object;
 	vm_offset_t offset;
 	vm_offset_t size;
 {
 	vm_offset_t tmpoff;
 	vm_page_t p;
 	int bits;
 	int objbytes;
 
 	if (!pmap || ((size > MAX_INIT_PT) &&
 		(object->resident_page_count > (MAX_INIT_PT / NBPG)))) {
 		return;
 	}
 	if (!vm_object_lock_try(object))
 		return;
 
 	/*
 	 * if we are processing a major portion of the object, then scan the
 	 * entire thing.
 	 */
 	if (size > (object->size >> 2)) {
 		objbytes = size;
 
 		for (p = object->memq.tqh_first;
 		    ((objbytes > 0) && (p != NULL));
 		    p = p->listq.tqe_next) {
 
 			tmpoff = p->offset;
 			if (tmpoff < offset) {
 				continue;
 			}
 			tmpoff -= offset;
 			if (tmpoff >= size) {
 				continue;
 			}
 			if (((p->flags & (PG_ACTIVE | PG_INACTIVE)) != 0) &&
 			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			    (p->bmapped == 0) &&
 				(p->busy == 0) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) == 0) {
 				vm_page_hold(p);
+				p->flags |= PG_MAPPED;
 				pmap_enter_quick(pmap, addr + tmpoff, VM_PAGE_TO_PHYS(p));
 				vm_page_unhold(p);
 			}
 			objbytes -= NBPG;
 		}
 	} else {
 		/*
 		 * else lookup the pages one-by-one.
 		 */
 		for (tmpoff = 0; tmpoff < size; tmpoff += NBPG) {
 			p = vm_page_lookup(object, tmpoff + offset);
 			if (p && ((p->flags & (PG_ACTIVE | PG_INACTIVE)) != 0) &&
 			    (p->bmapped == 0) && (p->busy == 0) &&
 			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) == 0) {
 				vm_page_hold(p);
+				p->flags |= PG_MAPPED;
 				pmap_enter_quick(pmap, addr + tmpoff, VM_PAGE_TO_PHYS(p));
 				vm_page_unhold(p);
 			}
 		}
 	}
 	vm_object_unlock(object);
 }
 
 #if 0
 /*
  * pmap_prefault provides a quick way of clustering
  * pagefaults into a processes address space.  It is a "cousin"
  * of pmap_object_init_pt, except it runs at page fault time instead
  * of mmap time.
  */
 #define PFBAK 2
 #define PFFOR 2
 #define PAGEORDER_SIZE (PFBAK+PFFOR)
 
 static int pmap_prefault_pageorder[] = {
 	-NBPG, NBPG, -2 * NBPG, 2 * NBPG
 };
 
 void
 pmap_prefault(pmap, addra, entry, object)
 	pmap_t pmap;
 	vm_offset_t addra;
 	vm_map_entry_t entry;
 	vm_object_t object;
 {
 	int i;
 	vm_offset_t starta, enda;
 	vm_offset_t offset, addr;
 	vm_page_t m;
 	int pageorder_index;
 
 	if (entry->object.vm_object != object)
 		return;
 
 	if (pmap != &curproc->p_vmspace->vm_pmap)
 		return;
 
 	starta = addra - PFBAK * NBPG;
 	if (starta < entry->start) {
 		starta = entry->start;
 	} else if (starta > addra)
 		starta = 0;
 
 	enda = addra + PFFOR * NBPG;
 	if (enda > entry->end)
 		enda = entry->end;
 
 	for (i = 0; i < PAGEORDER_SIZE; i++) {
 		vm_object_t lobject;
 		pt_entry_t *pte;
 
 		addr = addra + pmap_prefault_pageorder[i];
 		if (addr < starta || addr >= enda)
 			continue;
 
 		pte = vtopte(addr);
 		if (*pte)
 			continue;
 
 		offset = (addr - entry->start) + entry->offset;
 		lobject = object;
 		for (m = vm_page_lookup(lobject, offset);
 		    (!m && lobject->shadow && !lobject->pager);
 		    lobject = lobject->shadow) {
 
 			offset += lobject->shadow_offset;
 			m = vm_page_lookup(lobject->shadow, offset);
 		}
 
 		/*
 		 * give-up when a page is not in memory
 		 */
 		if (m == NULL)
 			break;
 
 		if (((m->flags & (PG_CACHE | PG_ACTIVE | PG_INACTIVE)) != 0) &&
 		    ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 		    (m->busy == 0) &&
 			(m->bmapped == 0) &&
 		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 			/*
 			 * test results show that the system is faster when
 			 * pages are activated.
 			 */
 			if ((m->flags & PG_ACTIVE) == 0) {
 				if( m->flags & PG_CACHE)
 					vm_page_deactivate(m);
 				else
 					vm_page_activate(m);
 			}
 			vm_page_hold(m);
+			m->flags |= PG_MAPPED;
 			pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m));
 			vm_page_unhold(m);
 		}
 	}
 }
 #endif
 
 /*
  *	Routine:	pmap_change_wiring
  *	Function:	Change the wiring attribute for a map/virtual-address
  *			pair.
  *	In/out conditions:
  *			The mapping must already exist in the pmap.
  */
 void
 pmap_change_wiring(pmap, va, wired)
 	register pmap_t pmap;
 	vm_offset_t va;
 	boolean_t wired;
 {
 	register pt_entry_t *pte;
 
 	if (pmap == NULL)
 		return;
 
 	pte = pmap_pte(pmap, va);
 
 	if (wired && !pmap_pte_w(pte))
 		pmap->pm_stats.wired_count++;
 	else if (!wired && pmap_pte_w(pte))
 		pmap->pm_stats.wired_count--;
 
 	/*
 	 * Wiring is not a hardware characteristic so there is no need to
 	 * invalidate TLB.
 	 */
 	pmap_pte_set_w(pte, wired);
 	/*
 	 * When unwiring, set the modified bit in the pte -- could have been
 	 * changed by the kernel
 	 */
 	if (!wired)
 		(int) *pte |= PG_M;
 }
 
 
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 void
 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
 	pmap_t dst_pmap, src_pmap;
 	vm_offset_t dst_addr;
 	vm_size_t len;
 	vm_offset_t src_addr;
 {
 }
 
 /*
  *	Routine:	pmap_kernel
  *	Function:
  *		Returns the physical map handle for the kernel.
  */
 pmap_t
 pmap_kernel()
 {
 	return (kernel_pmap);
 }
 
 /*
  *	pmap_zero_page zeros the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bzero to clear its contents, one machine dependent page
  *	at a time.
  */
 void
 pmap_zero_page(phys)
 	vm_offset_t phys;
 {
 	if (*(int *) CMAP2)
 		panic("pmap_zero_page: CMAP busy");
 
 	*(int *) CMAP2 = PG_V | PG_KW | i386_trunc_page(phys);
 	bzero(CADDR2, NBPG);
 
 	*(int *) CMAP2 = 0;
 	pmap_update();
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(src, dst)
 	vm_offset_t src;
 	vm_offset_t dst;
 {
 	if (*(int *) CMAP1 || *(int *) CMAP2)
 		panic("pmap_copy_page: CMAP busy");
 
 	*(int *) CMAP1 = PG_V | PG_KW | i386_trunc_page(src);
 	*(int *) CMAP2 = PG_V | PG_KW | i386_trunc_page(dst);
 
 #if __GNUC__ > 1
 	memcpy(CADDR2, CADDR1, NBPG);
 #else
 	bcopy(CADDR1, CADDR2, NBPG);
 #endif
 	*(int *) CMAP1 = 0;
 	*(int *) CMAP2 = 0;
 	pmap_update();
 }
 
 
 /*
  *	Routine:	pmap_pageable
  *	Function:
  *		Make the specified pages (by pmap, offset)
  *		pageable (or not) as requested.
  *
  *		A page which is not pageable may not take
  *		a fault; therefore, its page table entry
  *		must remain valid for the duration.
  *
  *		This routine is merely advisory; pmap_enter
  *		will specify that these pages are to be wired
  *		down (or not) as appropriate.
  */
 void
 pmap_pageable(pmap, sva, eva, pageable)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 	boolean_t pageable;
 {
 }
 
 /*
  * this routine returns true if a physical page resides
  * in the given pmap.
  */
 boolean_t
 pmap_page_exists(pmap, pa)
 	pmap_t pmap;
 	vm_offset_t pa;
 {
 	register pv_entry_t pv;
 	int s;
 
 	if (!pmap_is_managed(pa))
 		return FALSE;
 
 	pv = pa_to_pvh(pa);
 	s = splhigh();
 
 	/*
 	 * Not found, check current mappings returning immediately if found.
 	 */
 	if (pv->pv_pmap != NULL) {
 		for (; pv; pv = pv->pv_next) {
 			if (pv->pv_pmap == pmap) {
 				splx(s);
 				return TRUE;
 			}
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
 /*
  * pmap_testbit tests bits in pte's
  * note that the testbit/changebit routines are inline,
  * and a lot of things compile-time evaluate.
  */
 __inline boolean_t
 pmap_testbit(pa, bit)
 	register vm_offset_t pa;
 	int bit;
 {
 	register pv_entry_t pv;
 	pt_entry_t *pte;
 	int s;
 
 	if (!pmap_is_managed(pa))
 		return FALSE;
 
 	pv = pa_to_pvh(pa);
 	s = splhigh();
 
 	/*
 	 * Not found, check current mappings returning immediately if found.
 	 */
 	if (pv->pv_pmap != NULL) {
 		for (; pv; pv = pv->pv_next) {
 			/*
 			 * if the bit being tested is the modified bit, then
 			 * mark UPAGES as always modified, and ptes as never
 			 * modified.
 			 */
 			if (bit & PG_U) {
 				if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) {
 					continue;
 				}
 			}
 			if (bit & PG_M) {
 				if (pv->pv_va >= USRSTACK) {
 					if (pv->pv_va >= clean_sva && pv->pv_va < clean_eva) {
 						continue;
 					}
 					if (pv->pv_va < USRSTACK + (UPAGES * NBPG)) {
 						splx(s);
 						return TRUE;
 					} else if (pv->pv_va < KERNBASE) {
 						splx(s);
 						return FALSE;
 					}
 				}
 			}
 			if (!pv->pv_pmap) {
 				printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va);
 				continue;
 			}
 			pte = pmap_pte(pv->pv_pmap, pv->pv_va);
 			if ((int) *pte & bit) {
 				splx(s);
 				return TRUE;
 			}
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
 /*
  * this routine is used to modify bits in ptes
  */
 __inline void
 pmap_changebit(pa, bit, setem)
 	vm_offset_t pa;
 	int bit;
 	boolean_t setem;
 {
 	register pv_entry_t pv;
 	register pt_entry_t *pte, npte;
 	vm_offset_t va;
 	int s;
 
 	if (!pmap_is_managed(pa))
 		return;
 
 	pv = pa_to_pvh(pa);
 	s = splhigh();
 
 	/*
 	 * Loop over all current mappings setting/clearing as appropos If
 	 * setting RO do we need to clear the VAC?
 	 */
 	if (pv->pv_pmap != NULL) {
 		for (; pv; pv = pv->pv_next) {
 			va = pv->pv_va;
 
 			/*
 			 * don't write protect pager mappings
 			 */
 			if (!setem && (bit == PG_RW)) {
 				if (va >= clean_sva && va < clean_eva)
 					continue;
 			}
 			if (!pv->pv_pmap) {
 				printf("Null pmap (cb) at va: 0x%lx\n", va);
 				continue;
 			}
 			pte = pmap_pte(pv->pv_pmap, va);
 			if (setem)
 				(int) npte = (int) *pte | bit;
 			else
 				(int) npte = (int) *pte & ~bit;
 			*pte = npte;
 		}
 	}
 	splx(s);
 	pmap_update();
 }
 
 /*
  *      pmap_page_protect:
  *
  *      Lower the permission for all mappings to a given page.
  */
 void
 pmap_page_protect(phys, prot)
 	vm_offset_t phys;
 	vm_prot_t prot;
 {
 	if ((prot & VM_PROT_WRITE) == 0) {
 		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE))
 			pmap_changebit(phys, PG_RW, FALSE);
 		else
 			pmap_remove_all(phys);
 	}
 }
 
 vm_offset_t
 pmap_phys_address(ppn)
 	int ppn;
 {
 	return (i386_ptob(ppn));
 }
 
 /*
  *	pmap_is_referenced:
  *
  *	Return whether or not the specified physical page was referenced
  *	by any physical maps.
  */
 boolean_t
 pmap_is_referenced(vm_offset_t pa)
 {
 	return pmap_testbit((pa), PG_U);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_offset_t pa)
 {
 	return pmap_testbit((pa), PG_M);
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_offset_t pa)
 {
 	pmap_changebit((pa), PG_M, FALSE);
 }
 
 /*
  *	pmap_clear_reference:
  *
  *	Clear the reference bit on the specified physical page.
  */
 void
 pmap_clear_reference(vm_offset_t pa)
 {
 	pmap_changebit((pa), PG_U, FALSE);
 }
 
 /*
  *	Routine:	pmap_copy_on_write
  *	Function:
  *		Remove write privileges from all
  *		physical maps for this physical page.
  */
 void
 pmap_copy_on_write(vm_offset_t pa)
 {
 	pmap_changebit((pa), PG_RW, FALSE);
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 void
 i386_protection_init()
 {
 	register int *kp, prot;
 
 	kp = protection_codes;
 	for (prot = 0; prot < 8; prot++) {
 		switch (prot) {
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 			/*
 			 * Read access is also 0. There isn't any execute bit,
 			 * so just make it readable.
 			 */
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 			*kp++ = 0;
 			break;
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 			*kp++ = PG_RW;
 			break;
 		}
 	}
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory. The non-cacheable bits are set on each
  * mapped page.
  */
 void *
 pmap_mapdev(pa, size)
 	vm_offset_t pa;
 	vm_size_t size;
 {
 	vm_offset_t va, tmpva;
 	pt_entry_t *pte;
 
 	pa = trunc_page(pa);
 	size = roundup(size, PAGE_SIZE);
 
 	va = kmem_alloc_pageable(kernel_map, size);
 	if (!va)
 		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 
 	for (tmpva = va; size > 0;) {
 		pte = vtopte(tmpva);
 		*pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V | PG_N));
 		size -= PAGE_SIZE;
 		tmpva += PAGE_SIZE;
 		pa += PAGE_SIZE;
 	}
 	pmap_update();
 
 	return ((void *) va);
 }
 
 #ifdef DEBUG
 /* print address space of pmap*/
 void
 pads(pm)
 	pmap_t pm;
 {
 	unsigned va, i, j;
 	pt_entry_t *ptep;
 
 	if (pm == kernel_pmap)
 		return;
 	for (i = 0; i < 1024; i++)
 		if (pm->pm_pdir[i])
 			for (j = 0; j < 1024; j++) {
 				va = (i << PD_SHIFT) + (j << PG_SHIFT);
 				if (pm == kernel_pmap && va < KERNBASE)
 					continue;
 				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 					continue;
 				ptep = pmap_pte(pm, va);
 				if (pmap_pte_v(ptep))
 					printf("%x:%x ", va, *(int *) ptep);
 			};
 
 }
 
 void
 pmap_pvdump(pa)
 	vm_offset_t pa;
 {
 	register pv_entry_t pv;
 
 	printf("pa %x", pa);
 	for (pv = pa_to_pvh(pa); pv; pv = pv->pv_next) {
 #ifdef used_to_be
 		printf(" -> pmap %x, va %x, flags %x",
 		    pv->pv_pmap, pv->pv_va, pv->pv_flags);
 #endif
 		printf(" -> pmap %x, va %x",
 		    pv->pv_pmap, pv->pv_va);
 		pads(pv->pv_pmap);
 	}
 	printf(" ");
 }
 #endif
Index: head/sys/i386/i386/pmap.c
===================================================================
--- head/sys/i386/i386/pmap.c	(revision 6806)
+++ head/sys/i386/i386/pmap.c	(revision 6807)
@@ -1,2121 +1,2124 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
- *	$Id: pmap.c,v 1.49 1995/02/15 04:36:31 davidg Exp $
+ *	$Id: pmap.c,v 1.50 1995/02/26 05:14:16 bde Exp $
  */
 
 /*
  * Derived from hp300 version by Mike Hibler, this version by William
  * Jolitz uses a recursive map [a pde points to the page directory] to
  * map the page tables using the pagetables themselves. This is done to
  * reduce the impact on kernel virtual memory for lots of sparse address
  * space, and to reduce the cost of memory to each process.
  *
  *	Derived from: hp300/@(#)pmap.c	7.1 (Berkeley) 12/5/90
  */
 /*
  * Major modifications by John S. Dyson primarily to support
  * pageable page tables, eliminating pmap_attributes,
  * discontiguous memory pages, and using more efficient string
  * instructions. Jan 13, 1994.  Further modifications on Mar 2, 1994,
  * general clean-up and efficiency mods.
  */
 
 /*
  *	Manages physical address maps.
  *
  *	In addition to hardware address maps, this
  *	module is called upon to provide software-use-only
  *	maps which may or may not be stored in the same
  *	form as hardware maps.  These pseudo-maps are
  *	used to store intermediate results from copy
  *	operations to and from address spaces.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 
 #include <machine/cputypes.h>
 
 #include <i386/isa/isa.h>
 
 /*
  * Allocate various and sundry SYSMAPs used in the days of old VM
  * and not yet converted.  XXX.
  */
 #define BSDVM_COMPAT	1
 
 /*
  * Get PDEs and PTEs for user/kernel address space
  */
 #define	pmap_pde(m, v)	(&((m)->pm_pdir[((vm_offset_t)(v) >> PD_SHIFT)&1023]))
 #define pdir_pde(m, v) (m[((vm_offset_t)(v) >> PD_SHIFT)&1023])
 
 #define pmap_pte_pa(pte)	(*(int *)(pte) & PG_FRAME)
 
 #define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
 #define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
 #define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
 #define pmap_pte_u(pte)		((*(int *)pte & PG_U) != 0)
 #define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
 
 #define pmap_pte_set_w(pte, v)		((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
 #define pmap_pte_set_prot(pte, v)	((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
 
 /*
  * Given a map and a machine independent protection code,
  * convert to a vax protection code.
  */
 #define pte_prot(m, p)	(protection_codes[p])
 int protection_codes[8];
 
 struct pmap kernel_pmap_store;
 pmap_t kernel_pmap;
 
 vm_offset_t phys_avail[6];	/* 2 entries + 1 null */
 vm_offset_t avail_start;	/* PA of first available physical page */
 vm_offset_t avail_end;		/* PA of last available physical page */
 vm_size_t mem_size;		/* memory size in bytes */
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 int i386pagesperpage;		/* PAGE_SIZE / I386_PAGE_SIZE */
 boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
 vm_offset_t vm_first_phys, vm_last_phys;
 
 static inline int pmap_is_managed();
 static inline void *vm_get_pmap();
 static inline void vm_put_pmap();
 static void i386_protection_init();
 static void pmap_alloc_pv_entry();
 static inline pv_entry_t get_pv_entry();
 int nkpt;
 
 
 extern vm_offset_t clean_sva, clean_eva;
 extern int cpu_class;
 
 #if BSDVM_COMPAT
 #include <sys/msgbuf.h>
 
 /*
  * All those kernel PT submaps that BSD is so fond of
  */
 pt_entry_t *CMAP1, *CMAP2, *ptmmap;
 caddr_t CADDR1, CADDR2, ptvmmap;
 pt_entry_t *msgbufmap;
 struct msgbuf *msgbufp;
 
 #endif
 
 void 
 init_pv_entries(int);
 
 /*
  *	Routine:	pmap_pte
  *	Function:
  *		Extract the page table entry associated
  *		with the given map/virtual_address pair.
  * [ what about induced faults -wfj]
  */
 
 inline pt_entry_t * const 
 pmap_pte(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 
 	if (pmap && *pmap_pde(pmap, va)) {
 		vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 
 		/* are we current address space or kernel? */
 		if ((pmap == kernel_pmap) || (frame == ((int) PTDpde & PG_FRAME)))
 			return ((pt_entry_t *) vtopte(va));
 		/* otherwise, we are alternate address space */
 		else {
 			if (frame != ((int) APTDpde & PG_FRAME)) {
 				APTDpde = pmap->pm_pdir[PTDPTDI];
 				pmap_update();
 			}
 			return ((pt_entry_t *) avtopte(va));
 		}
 	}
 	return (0);
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 
 vm_offset_t
 pmap_extract(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	vm_offset_t pa;
 
 	if (pmap && *pmap_pde(pmap, va)) {
 		vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 
 		/* are we current address space or kernel? */
 		if ((pmap == kernel_pmap)
 		    || (frame == ((int) PTDpde & PG_FRAME))) {
 			pa = *(int *) vtopte(va);
 			/* otherwise, we are alternate address space */
 		} else {
 			if (frame != ((int) APTDpde & PG_FRAME)) {
 				APTDpde = pmap->pm_pdir[PTDPTDI];
 				pmap_update();
 			}
 			pa = *(int *) avtopte(va);
 		}
 		return ((pa & PG_FRAME) | (va & ~PG_FRAME));
 	}
 	return 0;
 
 }
 
 /*
  * determine if a page is managed (memory vs. device)
  */
 static inline int
 pmap_is_managed(pa)
 	vm_offset_t pa;
 {
 	int i;
 
 	if (!pmap_initialized)
 		return 0;
 
 	for (i = 0; phys_avail[i + 1]; i += 2) {
 		if (pa >= phys_avail[i] && pa < phys_avail[i + 1])
 			return 1;
 	}
 	return 0;
 }
 
 /*
  * find the vm_page_t of a pte (only) given va of pte and pmap
  */
 __inline vm_page_t
 pmap_pte_vm_page(pmap, pt)
 	pmap_t pmap;
 	vm_offset_t pt;
 {
 	vm_page_t m;
 
 	pt = i386_trunc_page(pt);
 	pt = (pt - UPT_MIN_ADDRESS) / NBPG;
 	pt = ((vm_offset_t) pmap->pm_pdir[pt]) & PG_FRAME;
 	m = PHYS_TO_VM_PAGE(pt);
 	return m;
 }
 
 /*
  * Wire a page table page
  */
 __inline void
 pmap_use_pt(pmap, va)
 	pmap_t pmap;
 	vm_offset_t va;
 {
 	vm_offset_t pt;
 
 	if ((va >= UPT_MIN_ADDRESS) || !pmap_initialized)
 		return;
 
 	pt = (vm_offset_t) vtopte(va);
 	vm_page_hold(pmap_pte_vm_page(pmap, pt));
 }
 
 /*
  * Unwire a page table page
  */
 inline void
 pmap_unuse_pt(pmap, va)
 	pmap_t pmap;
 	vm_offset_t va;
 {
 	vm_offset_t pt;
 	vm_page_t m;
 
 	if ((va >= UPT_MIN_ADDRESS) || !pmap_initialized)
 		return;
 
 	pt = (vm_offset_t) vtopte(va);
 	m = pmap_pte_vm_page(pmap, pt);
 	vm_page_unhold(m);
 	if (pmap != kernel_pmap &&
 	    (m->hold_count == 0) &&
 	    (m->wire_count == 0) &&
 	    (va < KPT_MIN_ADDRESS)) {
 		pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
 		vm_page_free(m);
 	}
 }
 
 /* [ macro again?, should I force kstack into user map here? -wfj ] */
 void
 pmap_activate(pmap, pcbp)
 	register pmap_t pmap;
 	struct pcb *pcbp;
 {
 	PMAP_ACTIVATE(pmap, pcbp);
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  *	Map the kernel's code and data, and allocate the system page table.
  *
  *	On the I386 this is called after mapping has already been enabled
  *	and just syncs the pmap module with what has already been done.
  *	[We can't call it easily with mapping off since the kernel is not
  *	mapped with PA == VA, hence we would have to relocate every address
  *	from the linked base (virtual) address "KERNBASE" to the actual
  *	(physical) address starting relative to 0]
  */
 
 #define DMAPAGES 8
 void
 pmap_bootstrap(firstaddr, loadaddr)
 	vm_offset_t firstaddr;
 	vm_offset_t loadaddr;
 {
 #if BSDVM_COMPAT
 	vm_offset_t va;
 	pt_entry_t *pte;
 
 #endif
 
 	avail_start = firstaddr + DMAPAGES * NBPG;
 
 	virtual_avail = (vm_offset_t) KERNBASE + avail_start;
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 	i386pagesperpage = PAGE_SIZE / NBPG;
 
 	/*
 	 * Initialize protection array.
 	 */
 	i386_protection_init();
 
 	/*
 	 * The kernel's pmap is statically allocated so we don't have to use
 	 * pmap_create, which is unlikely to work correctly at this part of
 	 * the boot sequence.
 	 */
 	kernel_pmap = &kernel_pmap_store;
 
 	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + IdlePTD);
 
 	simple_lock_init(&kernel_pmap->pm_lock);
 	kernel_pmap->pm_count = 1;
 	nkpt = NKPT;
 
 #if BSDVM_COMPAT
 	/*
 	 * Allocate all the submaps we need
 	 */
 #define	SYSMAP(c, p, v, n)	\
 	v = (c)va; va += ((n)*NBPG); p = pte; pte += (n);
 
 	va = virtual_avail;
 	pte = pmap_pte(kernel_pmap, va);
 
 	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
 	    SYSMAP(caddr_t, CMAP2, CADDR2, 1)
 	    SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
 	    SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 1)
 	    virtual_avail = va;
 #endif
 	/*
 	 * Reserve special hunk of memory for use by bus dma as a bounce
 	 * buffer (contiguous virtual *and* physical memory).
 	 */
 	{
 		extern vm_offset_t isaphysmem;
 
 		isaphysmem = va;
 
 		virtual_avail = pmap_map(va, firstaddr,
 		    firstaddr + DMAPAGES * NBPG, VM_PROT_ALL);
 	}
 
 	*(int *) CMAP1 = *(int *) CMAP2 = *(int *) PTD = 0;
 	pmap_update();
 
 }
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  *	pmap_init has been enhanced to support in a fairly consistant
  *	way, discontiguous physical memory.
  */
 void
 pmap_init(phys_start, phys_end)
 	vm_offset_t phys_start, phys_end;
 {
 	vm_offset_t addr;
 	vm_size_t npg, s;
 	int i;
 
 	/*
 	 * Now that kernel map has been allocated, we can mark as unavailable
 	 * regions which we have mapped in locore.
 	 */
 	addr = atdevbase;
 	(void) vm_map_find(kernel_map, NULL, (vm_offset_t) 0,
 	    &addr, (0x100000 - 0xa0000), FALSE);
 
 	addr = (vm_offset_t) KERNBASE + IdlePTD;
 	vm_object_reference(kernel_object);
 	(void) vm_map_find(kernel_map, kernel_object, addr,
 	    &addr, (4 + NKPDE) * NBPG, FALSE);
 
 	/*
 	 * calculate the number of pv_entries needed
 	 */
 	vm_first_phys = phys_avail[0];
 	for (i = 0; phys_avail[i + 1]; i += 2);
 	npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / NBPG;
 
 	/*
 	 * Allocate memory for random pmap data structures.  Includes the
 	 * pv_head_table.
 	 */
 	s = (vm_size_t) (sizeof(struct pv_entry) * npg);
 	s = i386_round_page(s);
 	addr = (vm_offset_t) kmem_alloc(kernel_map, s);
 	pv_table = (pv_entry_t) addr;
 
 	/*
 	 * init the pv free list
 	 */
 	init_pv_entries(npg);
 	/*
 	 * Now it is safe to enable pv_table recording.
 	 */
 	pmap_initialized = TRUE;
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	For now, VM is already on, we only need to map the
  *	specified memory.
  */
 vm_offset_t
 pmap_map(virt, start, end, prot)
 	vm_offset_t virt;
 	vm_offset_t start;
 	vm_offset_t end;
 	int prot;
 {
 	while (start < end) {
 		pmap_enter(kernel_pmap, virt, start, prot, FALSE);
 		virt += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
 	return (virt);
 }
 
 /*
  *	Create and return a physical map.
  *
  *	If the size specified for the map
  *	is zero, the map is an actual physical
  *	map, and may be referenced by the
  *	hardware.
  *
  *	If the size specified is non-zero,
  *	the map will be used in software only, and
  *	is bounded by that size.
  *
  * [ just allocate a ptd and mark it uninitialize -- should we track
  *   with a table which process has which ptd? -wfj ]
  */
 
 pmap_t
 pmap_create(size)
 	vm_size_t size;
 {
 	register pmap_t pmap;
 
 	/*
 	 * Software use map does not need a pmap
 	 */
 	if (size)
 		return (NULL);
 
 	pmap = (pmap_t) malloc(sizeof *pmap, M_VMPMAP, M_WAITOK);
 	bzero(pmap, sizeof(*pmap));
 	pmap_pinit(pmap);
 	return (pmap);
 }
 
 
 struct pmaplist {
 	struct pmaplist *next;
 };
 
 static inline void *
 vm_get_pmap()
 {
 	struct pmaplist *rtval;
 
 	rtval = (struct pmaplist *) kmem_alloc(kernel_map, ctob(1));
 	bzero(rtval, ctob(1));
 	return rtval;
 }
 
 static inline void
 vm_put_pmap(up)
 	struct pmaplist *up;
 {
 	kmem_free(kernel_map, (vm_offset_t) up, ctob(1));
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 void
 pmap_pinit(pmap)
 	register struct pmap *pmap;
 {
 	/*
 	 * No need to allocate page table space yet but we do need a valid
 	 * page directory table.
 	 */
 	pmap->pm_pdir = (pd_entry_t *) vm_get_pmap();
 
 	/* wire in kernel global address entries */
 	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
 
 	/* install self-referential address mapping entry */
 	*(int *) (pmap->pm_pdir + PTDPTDI) =
 	    ((int) pmap_kextract((vm_offset_t) pmap->pm_pdir)) | PG_V | PG_KW;
 
 	pmap->pm_count = 1;
 	simple_lock_init(&pmap->pm_lock);
 }
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 
 vm_page_t nkpg;
 vm_offset_t kernel_vm_end;
 
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	struct proc *p;
 	struct pmap *pmap;
 	int s;
 
 	s = splhigh();
 	if (kernel_vm_end == 0) {
 		kernel_vm_end = KERNBASE;
 		nkpt = 0;
 		while (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1);
 			++nkpt;
 		}
 	}
 	addr = (addr + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1);
 	while (kernel_vm_end < addr) {
 		if (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1);
 			continue;
 		}
 		++nkpt;
 		if (!nkpg) {
 			nkpg = vm_page_alloc(kernel_object, 0, VM_ALLOC_SYSTEM);
 			if (!nkpg)
 				panic("pmap_growkernel: no memory to grow kernel");
 			vm_page_wire(nkpg);
 			vm_page_remove(nkpg);
 			pmap_zero_page(VM_PAGE_TO_PHYS(nkpg));
 		}
 		pdir_pde(PTD, kernel_vm_end) = (pd_entry_t) (VM_PAGE_TO_PHYS(nkpg) | PG_V | PG_KW);
 		nkpg = NULL;
 
 		for (p = (struct proc *) allproc; p != NULL; p = p->p_next) {
 			if (p->p_vmspace) {
 				pmap = &p->p_vmspace->vm_pmap;
 				*pmap_pde(pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end);
 			}
 		}
 		*pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end);
 		kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1);
 	}
 	splx(s);
 }
 
 /*
  *	Retire the given physical map from service.
  *	Should only be called if the map contains
  *	no valid mappings.
  */
 void
 pmap_destroy(pmap)
 	register pmap_t pmap;
 {
 	int count;
 
 	if (pmap == NULL)
 		return;
 
 	simple_lock(&pmap->pm_lock);
 	count = --pmap->pm_count;
 	simple_unlock(&pmap->pm_lock);
 	if (count == 0) {
 		pmap_release(pmap);
 		free((caddr_t) pmap, M_VMPMAP);
 	}
 }
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap)
 	register struct pmap *pmap;
 {
 	vm_put_pmap((struct pmaplist *) pmap->pm_pdir);
 }
 
 /*
  *	Add a reference to the specified pmap.
  */
 void
 pmap_reference(pmap)
 	pmap_t pmap;
 {
 	if (pmap != NULL) {
 		simple_lock(&pmap->pm_lock);
 		pmap->pm_count++;
 		simple_unlock(&pmap->pm_lock);
 	}
 }
 
 #define PV_FREELIST_MIN ((NBPG / sizeof (struct pv_entry)) / 2)
 
 /*
  * Data for the pv entry allocation mechanism
  */
 int pv_freelistcnt;
 pv_entry_t pv_freelist;
 vm_offset_t pvva;
 int npvvapg;
 
 /*
  * free the pv_entry back to the free list
  */
 inline static void
 free_pv_entry(pv)
 	pv_entry_t pv;
 {
 	if (!pv)
 		return;
 	++pv_freelistcnt;
 	pv->pv_next = pv_freelist;
 	pv_freelist = pv;
 }
 
 /*
  * get a new pv_entry, allocating a block from the system
  * when needed.
  * the memory allocation is performed bypassing the malloc code
  * because of the possibility of allocations at interrupt time.
  */
 static inline pv_entry_t
 get_pv_entry()
 {
 	pv_entry_t tmp;
 
 	/*
 	 * get more pv_entry pages if needed
 	 */
 	if (pv_freelistcnt < PV_FREELIST_MIN || pv_freelist == 0) {
 		pmap_alloc_pv_entry();
 	}
 	/*
 	 * get a pv_entry off of the free list
 	 */
 	--pv_freelistcnt;
 	tmp = pv_freelist;
 	pv_freelist = tmp->pv_next;
 	return tmp;
 }
 
 /*
  * this *strange* allocation routine *statistically* eliminates the
  * *possibility* of a malloc failure (*FATAL*) for a pv_entry_t data structure.
  * also -- this code is MUCH MUCH faster than the malloc equiv...
  */
 static void
 pmap_alloc_pv_entry()
 {
 	/*
 	 * do we have any pre-allocated map-pages left?
 	 */
 	if (npvvapg) {
 		vm_page_t m;
 
 		/*
 		 * we do this to keep recursion away
 		 */
 		pv_freelistcnt += PV_FREELIST_MIN;
 		/*
 		 * allocate a physical page out of the vm system
 		 */
 		m = vm_page_alloc(kernel_object,
 		    pvva - vm_map_min(kernel_map), VM_ALLOC_INTERRUPT);
 		if (m) {
 			int newentries;
 			int i;
 			pv_entry_t entry;
 
 			newentries = (NBPG / sizeof(struct pv_entry));
 			/*
 			 * wire the page
 			 */
 			vm_page_wire(m);
 			m->flags &= ~PG_BUSY;
 			/*
 			 * let the kernel see it
 			 */
 			pmap_kenter(pvva, VM_PAGE_TO_PHYS(m));
 
 			entry = (pv_entry_t) pvva;
 			/*
 			 * update the allocation pointers
 			 */
 			pvva += NBPG;
 			--npvvapg;
 
 			/*
 			 * free the entries into the free list
 			 */
 			for (i = 0; i < newentries; i++) {
 				free_pv_entry(entry);
 				entry++;
 			}
 		}
 		pv_freelistcnt -= PV_FREELIST_MIN;
 	}
 	if (!pv_freelist)
 		panic("get_pv_entry: cannot get a pv_entry_t");
 }
 
 
 
 /*
  * init the pv_entry allocation system
  */
 #define PVSPERPAGE 64
 void
 init_pv_entries(npg)
 	int npg;
 {
 	/*
 	 * allocate enough kvm space for PVSPERPAGE entries per page (lots)
 	 * kvm space is fairly cheap, be generous!!!  (the system can panic if
 	 * this is too small.)
 	 */
 	npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry) + NBPG - 1) / NBPG;
 	pvva = kmem_alloc_pageable(kernel_map, npvvapg * NBPG);
 	/*
 	 * get the first batch of entries
 	 */
 	free_pv_entry(get_pv_entry());
 }
 
 static pt_entry_t *
 get_pt_entry(pmap)
 	pmap_t pmap;
 {
 	vm_offset_t frame = (int) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 
 	/* are we current address space or kernel? */
 	if (pmap == kernel_pmap || frame == ((int) PTDpde & PG_FRAME)) {
 		return PTmap;
 	}
 	/* otherwise, we are alternate address space */
 	if (frame != ((int) APTDpde & PG_FRAME)) {
 		APTDpde = pmap->pm_pdir[PTDPTDI];
 		pmap_update();
 	}
 	return APTmap;
 }
 
 /*
  * If it is the first entry on the list, it is actually
  * in the header and we must copy the following entry up
  * to the header.  Otherwise we must search the list for
  * the entry.  In either case we free the now unused entry.
  */
 void
 pmap_remove_entry(pmap, pv, va)
 	struct pmap *pmap;
 	pv_entry_t pv;
 	vm_offset_t va;
 {
 	pv_entry_t npv;
 	int s;
 
 	s = splhigh();
 	if (pmap == pv->pv_pmap && va == pv->pv_va) {
 		npv = pv->pv_next;
 		if (npv) {
 			*pv = *npv;
 			free_pv_entry(npv);
 		} else {
 			pv->pv_pmap = NULL;
 		}
 	} else {
 		for (npv = pv->pv_next; npv; npv = npv->pv_next) {
 			if (pmap == npv->pv_pmap && va == npv->pv_va) {
 				break;
 			}
 			pv = npv;
 		}
 		if (npv) {
 			pv->pv_next = npv->pv_next;
 			free_pv_entry(npv);
 		}
 	}
 	splx(s);
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap, sva, eva)
 	struct pmap *pmap;
 	register vm_offset_t sva;
 	register vm_offset_t eva;
 {
 	register pt_entry_t *ptp, *ptq;
 	vm_offset_t pa;
 	register pv_entry_t pv;
 	vm_offset_t va;
 	vm_page_t m;
 	pt_entry_t oldpte;
 
 	if (pmap == NULL)
 		return;
 
 	ptp = get_pt_entry(pmap);
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if ((sva + NBPG) == eva) {
 
 		if (*pmap_pde(pmap, sva) == 0)
 			return;
 
 		ptq = ptp + i386_btop(sva);
 
 		if (!*ptq)
 			return;
 		/*
 		 * Update statistics
 		 */
 		if (pmap_pte_w(ptq))
 			pmap->pm_stats.wired_count--;
 		pmap->pm_stats.resident_count--;
 
 		pa = pmap_pte_pa(ptq);
 		oldpte = *ptq;
 		*ptq = 0;
 
 		if (pmap_is_managed(pa)) {
 			if ((int) oldpte & PG_M) {
 				if ((sva < USRSTACK || sva >= KERNBASE) ||
 				    (sva >= USRSTACK && sva < USRSTACK + (UPAGES * NBPG))) {
 					if (sva < clean_sva || sva >= clean_eva) {
 						PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL;
 					}
 				}
 			}
 			pv = pa_to_pvh(pa);
 			pmap_remove_entry(pmap, pv, sva);
 		}
 		pmap_unuse_pt(pmap, sva);
 		pmap_update();
 		return;
 	}
 	sva = i386_btop(sva);
 	eva = i386_btop(eva);
 
 	while (sva < eva) {
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 
 		if (*pmap_pde(pmap, i386_ptob(sva)) == 0) {
 			/* We can race ahead here, straight to next pde.. */
 			sva = ((sva + NPTEPG) & ~(NPTEPG - 1));
 			continue;
 		}
 		ptq = ptp + sva;
 
 		/*
 		 * search for page table entries, use string operations that
 		 * are much faster than explicitly scanning when page tables
 		 * are not fully populated.
 		 */
 		if (*ptq == 0) {
 			vm_offset_t pdnxt = ((sva + NPTEPG) & ~(NPTEPG - 1));
 			vm_offset_t nscan = pdnxt - sva;
 			int found = 0;
 
 			if ((nscan + sva) > eva)
 				nscan = eva - sva;
 
 			asm("xorl %%eax,%%eax;cld;repe;scasl;jz 1f;incl %%eax;1:;" :
 			    "=D"(ptq), "=a"(found) : "c"(nscan), "0"(ptq) : "cx");
 
 			if (!found) {
 				sva = pdnxt;
 				continue;
 			}
 			ptq -= 1;
 
 			sva = ptq - ptp;
 		}
 		/*
 		 * Update statistics
 		 */
 		oldpte = *ptq;
 		if (((int) oldpte) & PG_W)
 			pmap->pm_stats.wired_count--;
 		pmap->pm_stats.resident_count--;
 
 		/*
 		 * Invalidate the PTEs. XXX: should cluster them up and
 		 * invalidate as many as possible at once.
 		 */
 		*ptq = 0;
 
 		va = i386_ptob(sva);
 
 		/*
 		 * Remove from the PV table (raise IPL since we may be called
 		 * at interrupt time).
 		 */
 		pa = ((int) oldpte) & PG_FRAME;
 		if (!pmap_is_managed(pa)) {
 			pmap_unuse_pt(pmap, va);
 			++sva;
 			continue;
 		}
 		if ((int) oldpte & PG_M) {
 			if ((va < USRSTACK || va >= KERNBASE) ||
 			    (va >= USRSTACK && va < USRSTACK + (UPAGES * NBPG))) {
 				if (va < clean_sva || va >= clean_eva) {
 					PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL;
 				}
 			}
 		}
 		pv = pa_to_pvh(pa);
 		pmap_remove_entry(pmap, pv, va);
 		pmap_unuse_pt(pmap, va);
 		++sva;
 	}
 	pmap_update();
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 void
 pmap_remove_all(pa)
 	vm_offset_t pa;
 {
 	register pv_entry_t pv, npv;
 	register pt_entry_t *pte, *ptp;
 	vm_offset_t va;
 	struct pmap *pmap;
 	vm_page_t m;
 	int s;
 	int anyvalid = 0;
 
 	/*
 	 * Not one of ours
 	 */
 	/*
 	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
 	 * pages!
 	 */
 	if (!pmap_is_managed(pa))
 		return;
 
 	pa = i386_trunc_page(pa);
 	pv = pa_to_pvh(pa);
 	m = PHYS_TO_VM_PAGE(pa);
 
 	s = splhigh();
 	while (pv->pv_pmap != NULL) {
 		pmap = pv->pv_pmap;
 		ptp = get_pt_entry(pmap);
 		va = pv->pv_va;
 		pte = ptp + i386_btop(va);
 		if (pmap_pte_w(pte))
 			pmap->pm_stats.wired_count--;
 		if (*pte) {
 			pmap->pm_stats.resident_count--;
 			anyvalid++;
 
 			/*
 			 * Update the vm_page_t clean and reference bits.
 			 */
 			if ((int) *pte & PG_M) {
 				if ((va < USRSTACK || va >= KERNBASE) ||
 				    (va >= USRSTACK && va < USRSTACK + (UPAGES * NBPG))) {
 					if (va < clean_sva || va >= clean_eva) {
 						PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL;
 					}
 				}
 			}
 			*pte = 0;
 			pmap_unuse_pt(pmap, va);
 		}
 		npv = pv->pv_next;
 		if (npv) {
 			*pv = *npv;
 			free_pv_entry(npv);
 		} else {
 			pv->pv_pmap = NULL;
 		}
 	}
 	splx(s);
 	if (anyvalid)
 		pmap_update();
 }
 
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap, sva, eva, prot)
 	register pmap_t pmap;
 	vm_offset_t sva, eva;
 	vm_prot_t prot;
 {
 	register pt_entry_t *pte;
 	register vm_offset_t va;
 	int i386prot;
 	register pt_entry_t *ptp;
 	int evap = i386_btop(eva);
 	int anyvalid = 0;;
 
 	if (pmap == NULL)
 		return;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 	if (prot & VM_PROT_WRITE)
 		return;
 
 	ptp = get_pt_entry(pmap);
 
 	va = sva;
 	while (va < eva) {
 		int found = 0;
 		int svap;
 		vm_offset_t nscan;
 
 		/*
 		 * Page table page is not allocated. Skip it, we don't want to
 		 * force allocation of unnecessary PTE pages just to set the
 		 * protection.
 		 */
 		if (!*pmap_pde(pmap, va)) {
 			/* XXX: avoid address wrap around */
 	nextpde:
 			if (va >= i386_trunc_pdr((vm_offset_t) - 1))
 				break;
 			va = i386_round_pdr(va + PAGE_SIZE);
 			continue;
 		}
 		pte = ptp + i386_btop(va);
 
 		if (*pte == 0) {
 			/*
 			 * scan for a non-empty pte
 			 */
 			svap = pte - ptp;
 			nscan = ((svap + NPTEPG) & ~(NPTEPG - 1)) - svap;
 
 			if (nscan + svap > evap)
 				nscan = evap - svap;
 
 			found = 0;
 			if (nscan)
 				asm("xorl %%eax,%%eax;cld;repe;scasl;jz 1f;incl %%eax;1:;" :
 				    "=D"(pte), "=a"(found) : "c"(nscan), "0"(pte) : "cx");
 
 			if (!found)
 				goto nextpde;
 
 			pte -= 1;
 			svap = pte - ptp;
 
 			va = i386_ptob(svap);
 		}
 		anyvalid++;
 
 		i386prot = pte_prot(pmap, prot);
 		if (va < UPT_MAX_ADDRESS) {
 			i386prot |= PG_u;
 			if (va >= UPT_MIN_ADDRESS)
 				i386prot |= PG_RW;
 		}
 		pmap_pte_set_prot(pte, i386prot);
 		va += PAGE_SIZE;
 	}
 	if (anyvalid)
 		pmap_update();
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 void
 pmap_enter(pmap, va, pa, prot, wired)
 	register pmap_t pmap;
 	vm_offset_t va;
 	register vm_offset_t pa;
 	vm_prot_t prot;
 	boolean_t wired;
 {
 	register pt_entry_t *pte;
 	register pt_entry_t npte;
 	vm_offset_t opa;
 	int ptevalid = 0;
 
 	if (pmap == NULL)
 		return;
 
 	va = i386_trunc_page(va);
 	pa = i386_trunc_page(pa);
 	if (va > VM_MAX_KERNEL_ADDRESS)
 		panic("pmap_enter: toobig");
 
 	/*
 	 * Page Directory table entry not valid, we need a new PT page
 	 */
 	if (*pmap_pde(pmap, va) == 0) {
 		printf("kernel page directory invalid pdir=0x%x, va=0x%x\n", pmap->pm_pdir[PTDPTDI], va);
 		panic("invalid kernel page directory");
 	}
 	pte = pmap_pte(pmap, va);
 	opa = pmap_pte_pa(pte);
 
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
 	if (opa == pa) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if (wired && !pmap_pte_w(pte))
 			pmap->pm_stats.wired_count++;
 		else if (!wired && pmap_pte_w(pte))
 			pmap->pm_stats.wired_count--;
 
 		goto validate;
 	}
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
 	if (opa) {
 		pmap_remove(pmap, va, va + PAGE_SIZE);
 	}
 	/*
 	 * Enter on the PV list if part of our managed memory Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if (pmap_is_managed(pa)) {
 		register pv_entry_t pv, npv;
 		int s;
 
 		pv = pa_to_pvh(pa);
 		s = splhigh();
 		/*
 		 * No entries yet, use header as the first entry
 		 */
 		if (pv->pv_pmap == NULL) {
 			pv->pv_va = va;
 			pv->pv_pmap = pmap;
 			pv->pv_next = NULL;
 		}
 		/*
 		 * There is at least one other VA mapping this page. Place
 		 * this entry after the header.
 		 */
 		else {
 			npv = get_pv_entry();
 			npv->pv_va = va;
 			npv->pv_pmap = pmap;
 			npv->pv_next = pv->pv_next;
 			pv->pv_next = npv;
 		}
 		splx(s);
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 	if (wired)
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	npte = (pt_entry_t) ((int) (pa | pte_prot(pmap, prot) | PG_V));
 
 	/*
 	 * When forking (copy-on-write, etc): A process will turn off write
 	 * permissions for any of its writable pages.  If the data (object) is
 	 * only referred to by one process, the processes map is modified
 	 * directly as opposed to using the object manipulation routine.  When
 	 * using pmap_protect, the modified bits are not kept in the vm_page_t
 	 * data structure.  Therefore, when using pmap_enter in vm_fault to
 	 * bring back writability of a page, there has been no memory of the
 	 * modified or referenced bits except at the pte level.  this clause
 	 * supports the carryover of the modified and used (referenced) bits.
 	 */
 	if (pa == opa)
 		(int) npte |= (int) *pte & (PG_M | PG_U);
 
 
 	if (wired)
 		(int) npte |= PG_W;
 	if (va < UPT_MIN_ADDRESS)
 		(int) npte |= PG_u;
 	else if (va < UPT_MAX_ADDRESS)
 		(int) npte |= PG_u | PG_RW;
 
 	if (*pte != npte) {
 		if (*pte)
 			ptevalid++;
 		*pte = npte;
 	}
 	if (ptevalid) {
 		pmap_update();
 	} else {
 		pmap_use_pt(pmap, va);
 	}
 }
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  */
 void
 pmap_qenter(va, m, count)
 	vm_offset_t va;
 	vm_page_t *m;
 	int count;
 {
 	int i;
 	int anyvalid = 0;
 	register pt_entry_t *pte;
 
 	for (i = 0; i < count; i++) {
 		pte = vtopte(va + i * NBPG);
 		if (*pte)
 			anyvalid++;
 		*pte = (pt_entry_t) ((int) (VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V | PG_W));
 	}
 	if (anyvalid)
 		pmap_update();
 }
 /*
  * this routine jerks page mappings from the
  * kernel -- it is meant only for temporary mappings.
  */
 void
 pmap_qremove(va, count)
 	vm_offset_t va;
 	int count;
 {
 	int i;
 	register pt_entry_t *pte;
 
 	for (i = 0; i < count; i++) {
 		pte = vtopte(va + i * NBPG);
 		*pte = 0;
 	}
 	pmap_update();
 }
 
 /*
  * add a wired page to the kva
  * note that in order for the mapping to take effect -- you
  * should do a pmap_update after doing the pmap_kenter...
  */
 void
 pmap_kenter(va, pa)
 	vm_offset_t va;
 	register vm_offset_t pa;
 {
 	register pt_entry_t *pte;
 	int wasvalid = 0;
 
 	pte = vtopte(va);
 
 	if (*pte)
 		wasvalid++;
 
 	*pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V | PG_W));
 
 	if (wasvalid)
 		pmap_update();
 }
 
 /*
  * remove a page from the kernel pagetables
  */
 void
 pmap_kremove(va)
 	vm_offset_t va;
 {
 	register pt_entry_t *pte;
 
 	pte = vtopte(va);
 
 	*pte = (pt_entry_t) 0;
 	pmap_update();
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * 5. Tlbflush is deferred to calling procedure.
  * 6. Page IS managed.
  * but is *MUCH* faster than pmap_enter...
  */
 
 static inline void
 pmap_enter_quick(pmap, va, pa)
 	register pmap_t pmap;
 	vm_offset_t va;
 	register vm_offset_t pa;
 {
 	register pt_entry_t *pte;
 	register pv_entry_t pv, npv;
 	int s;
 
 	/*
 	 * Enter on the PV list if part of our managed memory Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 
 	pte = vtopte(va);
 
 	/* a fault on the page table might occur here */
 	if (*pte) {
 		pmap_remove(pmap, va, va + PAGE_SIZE);
 	}
 	pv = pa_to_pvh(pa);
 	s = splhigh();
 	/*
 	 * No entries yet, use header as the first entry
 	 */
 	if (pv->pv_pmap == NULL) {
 		pv->pv_pmap = pmap;
 		pv->pv_va = va;
 		pv->pv_next = NULL;
 	}
 	/*
 	 * There is at least one other VA mapping this page. Place this entry
 	 * after the header.
 	 */
 	else {
 		npv = get_pv_entry();
 		npv->pv_va = va;
 		npv->pv_pmap = pmap;
 		npv->pv_next = pv->pv_next;
 		pv->pv_next = npv;
 	}
 	splx(s);
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	*pte = (pt_entry_t) ((int) (pa | PG_V | PG_u));
 
 	pmap_use_pt(pmap, va);
 
 	return;
 }
 
 #define MAX_INIT_PT (1024*2048)
 /*
  * pmap_object_init_pt preloads the ptes for a given object
  * into the specified pmap.  This eliminates the blast of soft
  * faults on process startup and immediately after an mmap.
  */
 void
 pmap_object_init_pt(pmap, addr, object, offset, size)
 	pmap_t pmap;
 	vm_offset_t addr;
 	vm_object_t object;
 	vm_offset_t offset;
 	vm_offset_t size;
 {
 	vm_offset_t tmpoff;
 	vm_page_t p;
 	int bits;
 	int objbytes;
 
 	if (!pmap || ((size > MAX_INIT_PT) &&
 		(object->resident_page_count > (MAX_INIT_PT / NBPG)))) {
 		return;
 	}
 	if (!vm_object_lock_try(object))
 		return;
 
 	/*
 	 * if we are processing a major portion of the object, then scan the
 	 * entire thing.
 	 */
 	if (size > (object->size >> 2)) {
 		objbytes = size;
 
 		for (p = object->memq.tqh_first;
 		    ((objbytes > 0) && (p != NULL));
 		    p = p->listq.tqe_next) {
 
 			tmpoff = p->offset;
 			if (tmpoff < offset) {
 				continue;
 			}
 			tmpoff -= offset;
 			if (tmpoff >= size) {
 				continue;
 			}
 			if (((p->flags & (PG_ACTIVE | PG_INACTIVE)) != 0) &&
 			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			    (p->bmapped == 0) &&
 				(p->busy == 0) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) == 0) {
 				vm_page_hold(p);
+				p->flags |= PG_MAPPED;
 				pmap_enter_quick(pmap, addr + tmpoff, VM_PAGE_TO_PHYS(p));
 				vm_page_unhold(p);
 			}
 			objbytes -= NBPG;
 		}
 	} else {
 		/*
 		 * else lookup the pages one-by-one.
 		 */
 		for (tmpoff = 0; tmpoff < size; tmpoff += NBPG) {
 			p = vm_page_lookup(object, tmpoff + offset);
 			if (p && ((p->flags & (PG_ACTIVE | PG_INACTIVE)) != 0) &&
 			    (p->bmapped == 0) && (p->busy == 0) &&
 			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) == 0) {
 				vm_page_hold(p);
+				p->flags |= PG_MAPPED;
 				pmap_enter_quick(pmap, addr + tmpoff, VM_PAGE_TO_PHYS(p));
 				vm_page_unhold(p);
 			}
 		}
 	}
 	vm_object_unlock(object);
 }
 
 #if 0
 /*
  * pmap_prefault provides a quick way of clustering
  * pagefaults into a processes address space.  It is a "cousin"
  * of pmap_object_init_pt, except it runs at page fault time instead
  * of mmap time.
  */
 #define PFBAK 2
 #define PFFOR 2
 #define PAGEORDER_SIZE (PFBAK+PFFOR)
 
 static int pmap_prefault_pageorder[] = {
 	-NBPG, NBPG, -2 * NBPG, 2 * NBPG
 };
 
 void
 pmap_prefault(pmap, addra, entry, object)
 	pmap_t pmap;
 	vm_offset_t addra;
 	vm_map_entry_t entry;
 	vm_object_t object;
 {
 	int i;
 	vm_offset_t starta, enda;
 	vm_offset_t offset, addr;
 	vm_page_t m;
 	int pageorder_index;
 
 	if (entry->object.vm_object != object)
 		return;
 
 	if (pmap != &curproc->p_vmspace->vm_pmap)
 		return;
 
 	starta = addra - PFBAK * NBPG;
 	if (starta < entry->start) {
 		starta = entry->start;
 	} else if (starta > addra)
 		starta = 0;
 
 	enda = addra + PFFOR * NBPG;
 	if (enda > entry->end)
 		enda = entry->end;
 
 	for (i = 0; i < PAGEORDER_SIZE; i++) {
 		vm_object_t lobject;
 		pt_entry_t *pte;
 
 		addr = addra + pmap_prefault_pageorder[i];
 		if (addr < starta || addr >= enda)
 			continue;
 
 		pte = vtopte(addr);
 		if (*pte)
 			continue;
 
 		offset = (addr - entry->start) + entry->offset;
 		lobject = object;
 		for (m = vm_page_lookup(lobject, offset);
 		    (!m && lobject->shadow && !lobject->pager);
 		    lobject = lobject->shadow) {
 
 			offset += lobject->shadow_offset;
 			m = vm_page_lookup(lobject->shadow, offset);
 		}
 
 		/*
 		 * give-up when a page is not in memory
 		 */
 		if (m == NULL)
 			break;
 
 		if (((m->flags & (PG_CACHE | PG_ACTIVE | PG_INACTIVE)) != 0) &&
 		    ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 		    (m->busy == 0) &&
 			(m->bmapped == 0) &&
 		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 			/*
 			 * test results show that the system is faster when
 			 * pages are activated.
 			 */
 			if ((m->flags & PG_ACTIVE) == 0) {
 				if( m->flags & PG_CACHE)
 					vm_page_deactivate(m);
 				else
 					vm_page_activate(m);
 			}
 			vm_page_hold(m);
+			m->flags |= PG_MAPPED;
 			pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m));
 			vm_page_unhold(m);
 		}
 	}
 }
 #endif
 
 /*
  *	Routine:	pmap_change_wiring
  *	Function:	Change the wiring attribute for a map/virtual-address
  *			pair.
  *	In/out conditions:
  *			The mapping must already exist in the pmap.
  */
 void
 pmap_change_wiring(pmap, va, wired)
 	register pmap_t pmap;
 	vm_offset_t va;
 	boolean_t wired;
 {
 	register pt_entry_t *pte;
 
 	if (pmap == NULL)
 		return;
 
 	pte = pmap_pte(pmap, va);
 
 	if (wired && !pmap_pte_w(pte))
 		pmap->pm_stats.wired_count++;
 	else if (!wired && pmap_pte_w(pte))
 		pmap->pm_stats.wired_count--;
 
 	/*
 	 * Wiring is not a hardware characteristic so there is no need to
 	 * invalidate TLB.
 	 */
 	pmap_pte_set_w(pte, wired);
 	/*
 	 * When unwiring, set the modified bit in the pte -- could have been
 	 * changed by the kernel
 	 */
 	if (!wired)
 		(int) *pte |= PG_M;
 }
 
 
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 void
 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
 	pmap_t dst_pmap, src_pmap;
 	vm_offset_t dst_addr;
 	vm_size_t len;
 	vm_offset_t src_addr;
 {
 }
 
 /*
  *	Routine:	pmap_kernel
  *	Function:
  *		Returns the physical map handle for the kernel.
  */
 pmap_t
 pmap_kernel()
 {
 	return (kernel_pmap);
 }
 
 /*
  *	pmap_zero_page zeros the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bzero to clear its contents, one machine dependent page
  *	at a time.
  */
 void
 pmap_zero_page(phys)
 	vm_offset_t phys;
 {
 	if (*(int *) CMAP2)
 		panic("pmap_zero_page: CMAP busy");
 
 	*(int *) CMAP2 = PG_V | PG_KW | i386_trunc_page(phys);
 	bzero(CADDR2, NBPG);
 
 	*(int *) CMAP2 = 0;
 	pmap_update();
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(src, dst)
 	vm_offset_t src;
 	vm_offset_t dst;
 {
 	if (*(int *) CMAP1 || *(int *) CMAP2)
 		panic("pmap_copy_page: CMAP busy");
 
 	*(int *) CMAP1 = PG_V | PG_KW | i386_trunc_page(src);
 	*(int *) CMAP2 = PG_V | PG_KW | i386_trunc_page(dst);
 
 #if __GNUC__ > 1
 	memcpy(CADDR2, CADDR1, NBPG);
 #else
 	bcopy(CADDR1, CADDR2, NBPG);
 #endif
 	*(int *) CMAP1 = 0;
 	*(int *) CMAP2 = 0;
 	pmap_update();
 }
 
 
 /*
  *	Routine:	pmap_pageable
  *	Function:
  *		Make the specified pages (by pmap, offset)
  *		pageable (or not) as requested.
  *
  *		A page which is not pageable may not take
  *		a fault; therefore, its page table entry
  *		must remain valid for the duration.
  *
  *		This routine is merely advisory; pmap_enter
  *		will specify that these pages are to be wired
  *		down (or not) as appropriate.
  */
 void
 pmap_pageable(pmap, sva, eva, pageable)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 	boolean_t pageable;
 {
 }
 
 /*
  * this routine returns true if a physical page resides
  * in the given pmap.
  */
 boolean_t
 pmap_page_exists(pmap, pa)
 	pmap_t pmap;
 	vm_offset_t pa;
 {
 	register pv_entry_t pv;
 	int s;
 
 	if (!pmap_is_managed(pa))
 		return FALSE;
 
 	pv = pa_to_pvh(pa);
 	s = splhigh();
 
 	/*
 	 * Not found, check current mappings returning immediately if found.
 	 */
 	if (pv->pv_pmap != NULL) {
 		for (; pv; pv = pv->pv_next) {
 			if (pv->pv_pmap == pmap) {
 				splx(s);
 				return TRUE;
 			}
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
 /*
  * pmap_testbit tests bits in pte's
  * note that the testbit/changebit routines are inline,
  * and a lot of things compile-time evaluate.
  */
 __inline boolean_t
 pmap_testbit(pa, bit)
 	register vm_offset_t pa;
 	int bit;
 {
 	register pv_entry_t pv;
 	pt_entry_t *pte;
 	int s;
 
 	if (!pmap_is_managed(pa))
 		return FALSE;
 
 	pv = pa_to_pvh(pa);
 	s = splhigh();
 
 	/*
 	 * Not found, check current mappings returning immediately if found.
 	 */
 	if (pv->pv_pmap != NULL) {
 		for (; pv; pv = pv->pv_next) {
 			/*
 			 * if the bit being tested is the modified bit, then
 			 * mark UPAGES as always modified, and ptes as never
 			 * modified.
 			 */
 			if (bit & PG_U) {
 				if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) {
 					continue;
 				}
 			}
 			if (bit & PG_M) {
 				if (pv->pv_va >= USRSTACK) {
 					if (pv->pv_va >= clean_sva && pv->pv_va < clean_eva) {
 						continue;
 					}
 					if (pv->pv_va < USRSTACK + (UPAGES * NBPG)) {
 						splx(s);
 						return TRUE;
 					} else if (pv->pv_va < KERNBASE) {
 						splx(s);
 						return FALSE;
 					}
 				}
 			}
 			if (!pv->pv_pmap) {
 				printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va);
 				continue;
 			}
 			pte = pmap_pte(pv->pv_pmap, pv->pv_va);
 			if ((int) *pte & bit) {
 				splx(s);
 				return TRUE;
 			}
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
 /*
  * this routine is used to modify bits in ptes
  */
 __inline void
 pmap_changebit(pa, bit, setem)
 	vm_offset_t pa;
 	int bit;
 	boolean_t setem;
 {
 	register pv_entry_t pv;
 	register pt_entry_t *pte, npte;
 	vm_offset_t va;
 	int s;
 
 	if (!pmap_is_managed(pa))
 		return;
 
 	pv = pa_to_pvh(pa);
 	s = splhigh();
 
 	/*
 	 * Loop over all current mappings setting/clearing as appropos If
 	 * setting RO do we need to clear the VAC?
 	 */
 	if (pv->pv_pmap != NULL) {
 		for (; pv; pv = pv->pv_next) {
 			va = pv->pv_va;
 
 			/*
 			 * don't write protect pager mappings
 			 */
 			if (!setem && (bit == PG_RW)) {
 				if (va >= clean_sva && va < clean_eva)
 					continue;
 			}
 			if (!pv->pv_pmap) {
 				printf("Null pmap (cb) at va: 0x%lx\n", va);
 				continue;
 			}
 			pte = pmap_pte(pv->pv_pmap, va);
 			if (setem)
 				(int) npte = (int) *pte | bit;
 			else
 				(int) npte = (int) *pte & ~bit;
 			*pte = npte;
 		}
 	}
 	splx(s);
 	pmap_update();
 }
 
 /*
  *      pmap_page_protect:
  *
  *      Lower the permission for all mappings to a given page.
  */
 void
 pmap_page_protect(phys, prot)
 	vm_offset_t phys;
 	vm_prot_t prot;
 {
 	if ((prot & VM_PROT_WRITE) == 0) {
 		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE))
 			pmap_changebit(phys, PG_RW, FALSE);
 		else
 			pmap_remove_all(phys);
 	}
 }
 
 vm_offset_t
 pmap_phys_address(ppn)
 	int ppn;
 {
 	return (i386_ptob(ppn));
 }
 
 /*
  *	pmap_is_referenced:
  *
  *	Return whether or not the specified physical page was referenced
  *	by any physical maps.
  */
 boolean_t
 pmap_is_referenced(vm_offset_t pa)
 {
 	return pmap_testbit((pa), PG_U);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_offset_t pa)
 {
 	return pmap_testbit((pa), PG_M);
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_offset_t pa)
 {
 	pmap_changebit((pa), PG_M, FALSE);
 }
 
 /*
  *	pmap_clear_reference:
  *
  *	Clear the reference bit on the specified physical page.
  */
 void
 pmap_clear_reference(vm_offset_t pa)
 {
 	pmap_changebit((pa), PG_U, FALSE);
 }
 
 /*
  *	Routine:	pmap_copy_on_write
  *	Function:
  *		Remove write privileges from all
  *		physical maps for this physical page.
  */
 void
 pmap_copy_on_write(vm_offset_t pa)
 {
 	pmap_changebit((pa), PG_RW, FALSE);
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 void
 i386_protection_init()
 {
 	register int *kp, prot;
 
 	kp = protection_codes;
 	for (prot = 0; prot < 8; prot++) {
 		switch (prot) {
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 			/*
 			 * Read access is also 0. There isn't any execute bit,
 			 * so just make it readable.
 			 */
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 			*kp++ = 0;
 			break;
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 			*kp++ = PG_RW;
 			break;
 		}
 	}
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory. The non-cacheable bits are set on each
  * mapped page.
  */
 void *
 pmap_mapdev(pa, size)
 	vm_offset_t pa;
 	vm_size_t size;
 {
 	vm_offset_t va, tmpva;
 	pt_entry_t *pte;
 
 	pa = trunc_page(pa);
 	size = roundup(size, PAGE_SIZE);
 
 	va = kmem_alloc_pageable(kernel_map, size);
 	if (!va)
 		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 
 	for (tmpva = va; size > 0;) {
 		pte = vtopte(tmpva);
 		*pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V | PG_N));
 		size -= PAGE_SIZE;
 		tmpva += PAGE_SIZE;
 		pa += PAGE_SIZE;
 	}
 	pmap_update();
 
 	return ((void *) va);
 }
 
 #ifdef DEBUG
 /* print address space of pmap*/
 void
 pads(pm)
 	pmap_t pm;
 {
 	unsigned va, i, j;
 	pt_entry_t *ptep;
 
 	if (pm == kernel_pmap)
 		return;
 	for (i = 0; i < 1024; i++)
 		if (pm->pm_pdir[i])
 			for (j = 0; j < 1024; j++) {
 				va = (i << PD_SHIFT) + (j << PG_SHIFT);
 				if (pm == kernel_pmap && va < KERNBASE)
 					continue;
 				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 					continue;
 				ptep = pmap_pte(pm, va);
 				if (pmap_pte_v(ptep))
 					printf("%x:%x ", va, *(int *) ptep);
 			};
 
 }
 
 void
 pmap_pvdump(pa)
 	vm_offset_t pa;
 {
 	register pv_entry_t pv;
 
 	printf("pa %x", pa);
 	for (pv = pa_to_pvh(pa); pv; pv = pv->pv_next) {
 #ifdef used_to_be
 		printf(" -> pmap %x, va %x, flags %x",
 		    pv->pv_pmap, pv->pv_va, pv->pv_flags);
 #endif
 		printf(" -> pmap %x, va %x",
 		    pv->pv_pmap, pv->pv_va);
 		pads(pv->pv_pmap);
 	}
 	printf(" ");
 }
 #endif
Index: head/sys/kern/vfs_bio.c
===================================================================
--- head/sys/kern/vfs_bio.c	(revision 6806)
+++ head/sys/kern/vfs_bio.c	(revision 6807)
@@ -1,1462 +1,1465 @@
 /*
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice immediately at the beginning of the file, without modification,
  *    this list of conditions, and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Absolutely no warranty of function or purpose is made by the author
  *    John S. Dyson.
  * 4. This work was done expressly for inclusion into FreeBSD.  Other use
  *    is allowed if this notation is included.
  * 5. Modifications may be freely made to this file if the above conditions
  *    are met.
  *
- * $Id: vfs_bio.c,v 1.30 1995/02/22 09:30:13 davidg Exp $
+ * $Id: vfs_bio.c,v 1.31 1995/02/25 01:46:26 davidg Exp $
  */
 
 /*
  * this file contains a new buffer I/O scheme implementing a coherent
  * VM object and buffer cache scheme.  Pains have been taken to make
  * sure that the performance degradation associated with schemes such
  * as this is not realized.
  *
  * Author:  John S. Dyson
  * Significant help during the development and debugging phases
  * had been provided by David Greenman, also of the FreeBSD core team.
  */
 
 #define VMIO
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <vm/vm.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <sys/buf.h>
 #include <sys/mount.h>
 #include <sys/malloc.h>
 #include <sys/resourcevar.h>
 #include <sys/proc.h>
 
 #include <miscfs/specfs/specdev.h>
 
 struct buf *buf;		/* buffer header pool */
 int nbuf;			/* number of buffer headers calculated
 				 * elsewhere */
 struct swqueue bswlist;
 int nvmio, nlru;
 
 extern vm_map_t buffer_map, io_map, kernel_map, pager_map;
 
 void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to);
 void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to);
 void vfs_dirty_pages(struct buf * bp);
 void vfs_busy_pages(struct buf *, int clear_modify);
 
 int needsbuffer;
 
 /*
  * Internal update daemon, process 3
  *	The variable vfs_update_wakeup allows for internal syncs.
  */
 int vfs_update_wakeup;
 
 
 /*
  * buffers base kva
  */
 caddr_t buffers_kva;
 
 /*
  * bogus page -- for I/O to/from partially complete buffers
  * this is a temporary solution to the problem, but it is not
  * really that bad.  it would be better to split the buffer
  * for input in the case of buffers partially already in memory,
  * but the code is intricate enough already.
  */
 vm_page_t bogus_page;
 vm_offset_t bogus_offset;
 
 int bufspace, maxbufspace;
 
 /*
  * advisory minimum for size of LRU queue or VMIO queue
  */
 int minbuf;
 
 /*
  * Initialize buffer headers and related structures.
  */
 void
 bufinit()
 {
 	struct buf *bp;
 	int i;
 
 	TAILQ_INIT(&bswlist);
 	LIST_INIT(&invalhash);
 
 	/* first, make a null hash table */
 	for (i = 0; i < BUFHSZ; i++)
 		LIST_INIT(&bufhashtbl[i]);
 
 	/* next, make a null set of free lists */
 	for (i = 0; i < BUFFER_QUEUES; i++)
 		TAILQ_INIT(&bufqueues[i]);
 
 	buffers_kva = (caddr_t) kmem_alloc_pageable(buffer_map, MAXBSIZE * nbuf);
 	/* finally, initialize each buffer header and stick on empty q */
 	for (i = 0; i < nbuf; i++) {
 		bp = &buf[i];
 		bzero(bp, sizeof *bp);
 		bp->b_flags = B_INVAL;	/* we're just an empty header */
 		bp->b_dev = NODEV;
 		bp->b_vp = NULL;
 		bp->b_rcred = NOCRED;
 		bp->b_wcred = NOCRED;
 		bp->b_qindex = QUEUE_EMPTY;
 		bp->b_vnbufs.le_next = NOLIST;
 		bp->b_data = buffers_kva + i * MAXBSIZE;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
 		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 	}
 /*
  * this will change later!!!
  */
 	minbuf = nbuf / 3;
 	maxbufspace = 2 * (nbuf + 8) * PAGE_SIZE;
 
 	bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 	bogus_page = vm_page_alloc(kernel_object,
 			bogus_offset - VM_MIN_KERNEL_ADDRESS, VM_ALLOC_NORMAL);
 
 }
 
 /*
  * remove the buffer from the appropriate free list
  */
 void
 bremfree(struct buf * bp)
 {
 	int s = splbio();
 
 	if (bp->b_qindex != QUEUE_NONE) {
 		if (bp->b_qindex == QUEUE_LRU)
 			--nlru;
 		TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist);
 		bp->b_qindex = QUEUE_NONE;
 	} else {
 		panic("bremfree: removing a buffer when not on a queue");
 	}
 	splx(s);
 }
 
 /*
  * Get a buffer with the specified data.  Look in the cache first.
  */
 int
 bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred,
     struct buf ** bpp)
 {
 	struct buf *bp;
 
 	bp = getblk(vp, blkno, size, 0, 0);
 	*bpp = bp;
 
 	/* if not found in cache, do some I/O */
 	if ((bp->b_flags & B_CACHE) == 0) {
 		if (curproc && curproc->p_stats)	/* count block I/O */
 			curproc->p_stats->p_ru.ru_inblock++;
 		bp->b_flags |= B_READ;
 		bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
 		if (bp->b_rcred == NOCRED) {
 			if (cred != NOCRED)
 				crhold(cred);
 			bp->b_rcred = cred;
 		}
 		vfs_busy_pages(bp, 0);
 		VOP_STRATEGY(bp);
 		return (biowait(bp));
 	} else if (bp->b_lblkno == bp->b_blkno) {
 		VOP_BMAP(vp, bp->b_lblkno, (struct vnode **) 0,
 		    &bp->b_blkno, (int *) 0);
 	}
 	return (0);
 }
 
 /*
  * Operates like bread, but also starts asynchronous I/O on
  * read-ahead blocks.
  */
 int
 breadn(struct vnode * vp, daddr_t blkno, int size,
     daddr_t * rablkno, int *rabsize,
     int cnt, struct ucred * cred, struct buf ** bpp)
 {
 	struct buf *bp, *rabp;
 	int i;
 	int rv = 0, readwait = 0;
 
 	*bpp = bp = getblk(vp, blkno, size, 0, 0);
 
 	/* if not found in cache, do some I/O */
 	if ((bp->b_flags & B_CACHE) == 0) {
 		if (curproc && curproc->p_stats)	/* count block I/O */
 			curproc->p_stats->p_ru.ru_inblock++;
 		bp->b_flags |= B_READ;
 		bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
 		if (bp->b_rcred == NOCRED) {
 			if (cred != NOCRED)
 				crhold(cred);
 			bp->b_rcred = cred;
 		}
 		vfs_busy_pages(bp, 0);
 		VOP_STRATEGY(bp);
 		++readwait;
 	} else if (bp->b_lblkno == bp->b_blkno) {
 		VOP_BMAP(vp, bp->b_lblkno, (struct vnode **) 0,
 		    &bp->b_blkno, (int *) 0);
 	}
 	for (i = 0; i < cnt; i++, rablkno++, rabsize++) {
 		if (inmem(vp, *rablkno))
 			continue;
 		rabp = getblk(vp, *rablkno, *rabsize, 0, 0);
 
 		if ((rabp->b_flags & B_CACHE) == 0) {
 			if (curproc && curproc->p_stats)
 				curproc->p_stats->p_ru.ru_inblock++;
 			rabp->b_flags |= B_READ | B_ASYNC;
 			rabp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
 			if (rabp->b_rcred == NOCRED) {
 				if (cred != NOCRED)
 					crhold(cred);
 				rabp->b_rcred = cred;
 			}
 			vfs_busy_pages(rabp, 0);
 			VOP_STRATEGY(rabp);
 		} else {
 			brelse(rabp);
 		}
 	}
 
 	if (readwait) {
 		rv = biowait(bp);
 	}
 	return (rv);
 }
 
 /*
  * Write, release buffer on completion.  (Done by iodone
  * if async.)
  */
 int
 bwrite(struct buf * bp)
 {
 	int oldflags = bp->b_flags;
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return (0);
 	}
 	if (!(bp->b_flags & B_BUSY))
 		panic("bwrite: buffer is not busy???");
 
 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
 	bp->b_flags |= B_WRITEINPROG;
 
 	if (oldflags & B_ASYNC) {
 		if (oldflags & B_DELWRI) {
 			reassignbuf(bp, bp->b_vp);
 		} else if (curproc) {
 			++curproc->p_stats->p_ru.ru_oublock;
 		}
 	}
 	bp->b_vp->v_numoutput++;
 	vfs_busy_pages(bp, 1);
 	VOP_STRATEGY(bp);
 
 	if ((oldflags & B_ASYNC) == 0) {
 		int rtval = biowait(bp);
 
 		if (oldflags & B_DELWRI) {
 			reassignbuf(bp, bp->b_vp);
 		} else if (curproc) {
 			++curproc->p_stats->p_ru.ru_oublock;
 		}
 		brelse(bp);
 		return (rtval);
 	}
 	return (0);
 }
 
 int
 vn_bwrite(ap)
 	struct vop_bwrite_args *ap;
 {
 	return (bwrite(ap->a_bp));
 }
 
 /*
  * Delayed write. (Buffer is marked dirty).
  */
 void
 bdwrite(struct buf * bp)
 {
 
 	if ((bp->b_flags & B_BUSY) == 0) {
 		panic("bdwrite: buffer is not busy");
 	}
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return;
 	}
 	if (bp->b_flags & B_TAPE) {
 		bawrite(bp);
 		return;
 	}
 	bp->b_flags &= ~B_READ;
 	vfs_dirty_pages(bp);
 	if ((bp->b_flags & B_DELWRI) == 0) {
 		if (curproc)
 			++curproc->p_stats->p_ru.ru_oublock;
 		bp->b_flags |= B_DONE | B_DELWRI;
 		reassignbuf(bp, bp->b_vp);
 	}
 	brelse(bp);
 	return;
 }
 
 /*
  * Asynchronous write.
  * Start output on a buffer, but do not wait for it to complete.
  * The buffer is released when the output completes.
  */
 void
 bawrite(struct buf * bp)
 {
 	struct vnode *vp;
 	vp = bp->b_vp;
 	bp->b_flags |= B_ASYNC;
 	(void) bwrite(bp);
 	/*
 	 * this code supports limits on the amount of outstanding
 	 * writes to a disk file.  this helps keep from overwhelming
 	 * the buffer cache with writes, thereby allowing other files
 	 * to be operated upon.
 	 */
 	if (vp->v_numoutput > (nbuf/2)) {
 		int s = splbio();
 
 		while (vp->v_numoutput > (nbuf/4)) {
 			vp->v_flag |= VBWAIT;
 			tsleep((caddr_t) &vp->v_numoutput, PRIBIO, "bawnmo", 0);
 		}
 		splx(s);
 	}
 }
 
 /*
  * Release a buffer.
  */
 void
 brelse(struct buf * bp)
 {
 	int s;
 
 	if (bp->b_flags & B_CLUSTER) {
 		relpbuf(bp);
 		return;
 	}
 	/* anyone need a "free" block? */
 	s = splbio();
 
 	if (needsbuffer) {
 		needsbuffer = 0;
 		wakeup((caddr_t) &needsbuffer);
 	}
 
 	/* anyone need this block? */
 	if (bp->b_flags & B_WANTED) {
 		bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_AGE);
 		wakeup((caddr_t) bp);
 	} else if (bp->b_flags & B_VMIO) {
 		bp->b_flags &= ~(B_WANTED | B_PDWANTED);
 		wakeup((caddr_t) bp);
 	}
 	if (bp->b_flags & B_LOCKED)
 		bp->b_flags &= ~B_ERROR;
 
 	if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) ||
 	    (bp->b_bufsize <= 0)) {
 		bp->b_flags |= B_INVAL;
 		bp->b_flags &= ~(B_DELWRI | B_CACHE);
 		if (((bp->b_flags & B_VMIO) == 0) && bp->b_vp)
 			brelvp(bp);
 	}
 	
 	/*
 	 * VMIO buffer rundown.  It is not very necessary to keep a VMIO buffer
 	 * constituted, so the B_INVAL flag is used to *invalidate* the buffer,
 	 * but the VM object is kept around.  The B_NOCACHE flag is used to
 	 * invalidate the pages in the VM object.
 	 */
 	if (bp->b_flags & B_VMIO) {
 		vm_offset_t foff;
 		vm_object_t obj;
 		int i, resid;
 		vm_page_t m;
 		int iototal = bp->b_bufsize;
 
 		foff = 0;
 		obj = 0;
 		if (bp->b_npages) {
 			if (bp->b_vp && bp->b_vp->v_mount) {
 				foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
 			} else {
 				/*
 				 * vnode pointer has been ripped away --
 				 * probably file gone...
 				 */
 				foff = bp->b_pages[0]->offset;
 			}
 		}
 		for (i = 0; i < bp->b_npages; i++) {
 			m = bp->b_pages[i];
 			if (m == bogus_page) {
 				panic("brelse: bogus page found");
 			}
 			resid = (m->offset + PAGE_SIZE) - foff;
 			if (resid > iototal)
 				resid = iototal;
 			if (resid > 0) {
 				if (bp->b_flags & (B_ERROR | B_NOCACHE)) {
 					vm_page_set_invalid(m, foff, resid);
 				} else if ((bp->b_flags & B_DELWRI) == 0) {
 					vm_page_set_clean(m, foff, resid);
 					vm_page_set_valid(m, foff, resid);
 				}
 			} else {
 				vm_page_test_dirty(m);
 			}
 			foff += resid;
 			iototal -= resid;
 		}
 
 		if (bp->b_flags & B_INVAL) {
 			for(i=0;i<bp->b_npages;i++) {
 				m = bp->b_pages[i];
 				--m->bmapped;
 				if (m->bmapped == 0) {
 					PAGE_WAKEUP(m);
 					if (m->valid == 0) {
-						pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
+						vm_page_protect(m, VM_PROT_NONE);
 						vm_page_free(m);
 					} else if ((m->dirty & m->valid) == 0 &&
 						(m->flags & PG_REFERENCED) == 0 &&
 							!pmap_is_referenced(VM_PAGE_TO_PHYS(m)))
 						vm_page_cache(m);
 					else if( (m->flags & PG_ACTIVE) == 0)
 						vm_page_activate(m);
 				}
 			}
 			bufspace -= bp->b_bufsize;
 			pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
 			bp->b_npages = 0;
 			bp->b_bufsize = 0;
 			bp->b_flags &= ~B_VMIO;
 			if (bp->b_vp)
 				brelvp(bp);
 			--nvmio;
 		}
 	}
 	if (bp->b_qindex != QUEUE_NONE)
 		panic("brelse: free buffer onto another queue???");
 
 	/* enqueue */
 	/* buffers with no memory */
 	if (bp->b_bufsize == 0) {
 		bp->b_qindex = QUEUE_EMPTY;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
 		LIST_REMOVE(bp, b_hash);
 		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 		bp->b_dev = NODEV;
 		/* buffers with junk contents */
 	} else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE)) {
 		bp->b_qindex = QUEUE_AGE;
 		TAILQ_INSERT_HEAD(&bufqueues[QUEUE_AGE], bp, b_freelist);
 		LIST_REMOVE(bp, b_hash);
 		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 		bp->b_dev = NODEV;
 		/* buffers that are locked */
 	} else if (bp->b_flags & B_LOCKED) {
 		bp->b_qindex = QUEUE_LOCKED;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist);
 		/* buffers with stale but valid contents */
 	} else if (bp->b_flags & B_AGE) {
 		bp->b_qindex = QUEUE_AGE;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp, b_freelist);
 		/* buffers with valid and quite potentially reuseable contents */
 	} else {
 		if (bp->b_flags & B_VMIO)
 			bp->b_qindex = QUEUE_VMIO;
 		else {
 			bp->b_qindex = QUEUE_LRU;
 			++nlru;
 		}
 		TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist);
 	}
 
 	/* unlock */
 	bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_BUSY | B_ASYNC | B_NOCACHE | B_AGE);
 	splx(s);
 }
 
 /*
  * this routine implements clustered async writes for
  * clearing out B_DELWRI buffers...  This is much better
  * than the old way of writing only one buffer at a time.
  */
 void
 vfs_bio_awrite(struct buf * bp)
 {
 	int i;
 	daddr_t lblkno = bp->b_lblkno;
 	struct vnode *vp = bp->b_vp;
 	int s;
 	int ncl;
 	struct buf *bpa;
 
 	s = splbio();
 	if( vp->v_mount && (vp->v_flag & VVMIO) &&
 	    	(bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) {
 		int size = vp->v_mount->mnt_stat.f_iosize;
 
 		for (i = 1; i < MAXPHYS / size; i++) {
 			if ((bpa = incore(vp, lblkno + i)) &&
 			    ((bpa->b_flags & (B_BUSY | B_DELWRI | B_BUSY | B_CLUSTEROK | B_INVAL)) == B_DELWRI | B_CLUSTEROK) &&
 			    (bpa->b_bufsize == size)) {
 				if ((bpa->b_blkno == bpa->b_lblkno) ||
 				    (bpa->b_blkno != bp->b_blkno + (i * size) / DEV_BSIZE))
 					break;
 			} else {
 				break;
 			}
 		}
 		ncl = i;
 		/*
 		 * this is a possible cluster write
 		 */
 		if (ncl != 1) {
 			cluster_wbuild(vp, NULL, size, lblkno, ncl, -1);
 			splx(s);
 			return;
 		}
 	}
 	/*
 	 * default (old) behavior, writing out only one block
 	 */
 	bremfree(bp);
 	bp->b_flags |= B_BUSY | B_ASYNC;
 	bwrite(bp);
 	splx(s);
 }
 
 
 /*
  * Find a buffer header which is available for use.
  */
 struct buf *
 getnewbuf(int slpflag, int slptimeo, int doingvmio)
 {
 	struct buf *bp;
 	int s;
 	int firstbp = 1;
 
 	s = splbio();
 start:
 	if (bufspace >= maxbufspace)
 		goto trytofreespace;
 
 	/* can we constitute a new buffer? */
 	if ((bp = bufqueues[QUEUE_EMPTY].tqh_first)) {
 		if (bp->b_qindex != QUEUE_EMPTY)
 			panic("getnewbuf: inconsistent EMPTY queue");
 		bremfree(bp);
 		goto fillbuf;
 	}
 trytofreespace:
 	/*
 	 * We keep the file I/O from hogging metadata I/O
 	 * This is desirable because file data is cached in the
 	 * VM/Buffer cache even if a buffer is freed.
 	 */
 	if (bp = bufqueues[QUEUE_AGE].tqh_first) {
 		if (bp->b_qindex != QUEUE_AGE)
 			panic("getnewbuf: inconsistent AGE queue");
 	} else if ((nvmio > nbuf - minbuf)
 	    && (bp = bufqueues[QUEUE_VMIO].tqh_first)) {
 		if (bp->b_qindex != QUEUE_VMIO)
 			panic("getnewbuf: inconsistent VMIO queue");
 	} else if ((nlru > nbuf - minbuf) &&
 	    (bp = bufqueues[QUEUE_LRU].tqh_first)) {
 		if (bp->b_qindex != QUEUE_LRU)
 			panic("getnewbuf: inconsistent LRU queue");
 	}
 	if (!bp) {
 		if (doingvmio) {
 			if (bp = bufqueues[QUEUE_VMIO].tqh_first) {
 				if (bp->b_qindex != QUEUE_VMIO)
 					panic("getnewbuf: inconsistent VMIO queue");
 			} else if (bp = bufqueues[QUEUE_LRU].tqh_first) {
 				if (bp->b_qindex != QUEUE_LRU)
 					panic("getnewbuf: inconsistent LRU queue");
 			}
 		} else {
 			if (bp = bufqueues[QUEUE_LRU].tqh_first) {
 				if (bp->b_qindex != QUEUE_LRU)
 					panic("getnewbuf: inconsistent LRU queue");
 			} else if (bp = bufqueues[QUEUE_VMIO].tqh_first) {
 				if (bp->b_qindex != QUEUE_VMIO)
 					panic("getnewbuf: inconsistent VMIO queue");
 			}
 		}
 	}
 	if (!bp) {
 		/* wait for a free buffer of any kind */
 		needsbuffer = 1;
 		tsleep((caddr_t) &needsbuffer, PRIBIO | slpflag, "newbuf", slptimeo);
 		splx(s);
 		return (0);
 	}
 	/* if we are a delayed write, convert to an async write */
 	if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) {
 		vfs_bio_awrite(bp);
 		if (!slpflag && !slptimeo) {
 			splx(s);
 			return (0);
 		}
 		goto start;
 	}
 	bremfree(bp);
 
 	if (bp->b_flags & B_VMIO) {
 		bp->b_flags |= B_INVAL | B_BUSY;
 		brelse(bp);
 		bremfree(bp);
 	}
 	if (bp->b_vp)
 		brelvp(bp);
 
 	/* we are not free, nor do we contain interesting data */
 	if (bp->b_rcred != NOCRED)
 		crfree(bp->b_rcred);
 	if (bp->b_wcred != NOCRED)
 		crfree(bp->b_wcred);
 fillbuf:
 	bp->b_flags |= B_BUSY;
 	LIST_REMOVE(bp, b_hash);
 	LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 	splx(s);
 	if (bp->b_bufsize) {
 		allocbuf(bp, 0, 0);
 	}
 	bp->b_flags = B_BUSY;
 	bp->b_dev = NODEV;
 	bp->b_vp = NULL;
 	bp->b_blkno = bp->b_lblkno = 0;
 	bp->b_iodone = 0;
 	bp->b_error = 0;
 	bp->b_resid = 0;
 	bp->b_bcount = 0;
 	bp->b_npages = 0;
 	bp->b_wcred = bp->b_rcred = NOCRED;
 	bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE;
 	bp->b_dirtyoff = bp->b_dirtyend = 0;
 	bp->b_validoff = bp->b_validend = 0;
 	if (bufspace >= maxbufspace) {
 		s = splbio();
 		bp->b_flags |= B_INVAL;
 		brelse(bp);
 		goto trytofreespace;
 	}
 	return (bp);
 }
 
 /*
  * Check to see if a block is currently memory resident.
  */
 struct buf *
 incore(struct vnode * vp, daddr_t blkno)
 {
 	struct buf *bp;
 	struct bufhashhdr *bh;
 
 	int s = splbio();
 
 	bh = BUFHASH(vp, blkno);
 	bp = bh->lh_first;
 
 	/* Search hash chain */
 	while (bp) {
 		/* hit */
 		if (bp->b_lblkno == blkno && bp->b_vp == vp
 		    && (bp->b_flags & B_INVAL) == 0) {
 			splx(s);
 			return (bp);
 		}
 		bp = bp->b_hash.le_next;
 	}
 	splx(s);
 
 	return (0);
 }
 
 /*
  * Returns true if no I/O is needed to access the
  * associated VM object.  This is like incore except
  * it also hunts around in the VM system for the data.
  */
 
 int
 inmem(struct vnode * vp, daddr_t blkno)
 {
 	vm_object_t obj;
 	vm_offset_t off, toff, tinc;
 	vm_page_t m;
 
 	if (incore(vp, blkno))
 		return 1;
 	if (vp->v_mount == 0)
 		return 0;
 	if ((vp->v_vmdata == 0) || (vp->v_flag & VVMIO) == 0)
 		return 0;
 
 	obj = (vm_object_t) vp->v_vmdata;
 	tinc = PAGE_SIZE;
 	if (tinc > vp->v_mount->mnt_stat.f_iosize)
 		tinc = vp->v_mount->mnt_stat.f_iosize;
 	off = blkno * vp->v_mount->mnt_stat.f_iosize;
 
 	for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
 		int mask;
 
 		m = vm_page_lookup(obj, trunc_page(toff + off));
 		if (!m)
 			return 0;
 		if (vm_page_is_valid(m, toff + off, tinc) == 0)
 			return 0;
 	}
 	return 1;
 }
 
 /*
  * Get a block given a specified block and offset into a file/device.
  */
 struct buf *
 getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo)
 {
 	struct buf *bp;
 	int s;
 	struct bufhashhdr *bh;
 	vm_offset_t off;
 	int nleft;
 
 	s = splbio();
 loop:
 	if ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_cache_min)
-		wakeup((caddr_t) &vm_pages_needed);
+		pagedaemon_wakeup();
 
 	if (bp = incore(vp, blkno)) {
 		if (bp->b_flags & B_BUSY) {
 			bp->b_flags |= B_WANTED;
 			if (curproc == pageproc) {
 				bp->b_flags |= B_PDWANTED;
 				wakeup((caddr_t) &cnt.v_free_count);
 			}
 			if (!tsleep((caddr_t) bp, PRIBIO | slpflag, "getblk", slptimeo))
 				goto loop;
 			splx(s);
 			return (struct buf *) NULL;
 		}
 		bp->b_flags |= B_BUSY | B_CACHE;
 		bremfree(bp);
 		/*
 		 * check for size inconsistancies
 		 */
 		if (bp->b_bcount != size) {
 #if defined(VFS_BIO_DEBUG)
 			printf("getblk: invalid buffer size: %ld\n", bp->b_bcount);
 #endif
 			bp->b_flags |= B_INVAL;
 			bwrite(bp);
 			goto loop;
 		}
 		splx(s);
 		return (bp);
 	} else {
 		vm_object_t obj;
 		int doingvmio;
 
 		if ((obj = (vm_object_t) vp->v_vmdata) && (vp->v_flag & VVMIO)) {
 			doingvmio = 1;
 		} else {
 			doingvmio = 0;
 		}
 		if ((bp = getnewbuf(slpflag, slptimeo, doingvmio)) == 0) {
 			if (slpflag || slptimeo)
 				return NULL;
 			goto loop;
 		}
 		/*
 		 * It is possible that another buffer has been constituted
 		 * during the time that getnewbuf is blocked.  This checks
 		 * for this possibility, and handles it.
 		 */
 		if (incore(vp, blkno)) {
 			bp->b_flags |= B_INVAL;
 			brelse(bp);
 			goto loop;
 		}
 		/*
 		 * Insert the buffer into the hash, so that it can
 		 * be found by incore.
 		 */
 		bp->b_blkno = bp->b_lblkno = blkno;
 		bgetvp(vp, bp);
 		LIST_REMOVE(bp, b_hash);
 		bh = BUFHASH(vp, blkno);
 		LIST_INSERT_HEAD(bh, bp, b_hash);
 
 		if (doingvmio) {
 			bp->b_flags |= (B_VMIO | B_CACHE);
 #if defined(VFS_BIO_DEBUG)
 			if (vp->v_type != VREG)
 				printf("getblk: vmioing file type %d???\n", vp->v_type);
 #endif
 			++nvmio;
 		} else {
 			if (bp->b_flags & B_VMIO)
 				--nvmio;
 			bp->b_flags &= ~B_VMIO;
 		}
 		splx(s);
 
 		if (!allocbuf(bp, size, 1)) {
 			s = splbio();
 			goto loop;
 		}
 		return (bp);
 	}
 }
 
 /*
  * Get an empty, disassociated buffer of given size.
  */
 struct buf *
 geteblk(int size)
 {
 	struct buf *bp;
 
 	while ((bp = getnewbuf(0, 0, 0)) == 0);
 	allocbuf(bp, size, 0);
 	bp->b_flags |= B_INVAL;
 	return (bp);
 }
 
 /*
  * This code constitutes the buffer memory from either anonymous system
  * memory (in the case of non-VMIO operations) or from an associated
  * VM object (in the case of VMIO operations).
  *
  * Note that this code is tricky, and has many complications to resolve
  * deadlock or inconsistant data situations.  Tread lightly!!!
  *
  * Modify the length of a buffer's underlying buffer storage without
  * destroying information (unless, of course the buffer is shrinking).
  */
 int
 allocbuf(struct buf * bp, int size, int vmio)
 {
 
 	int s;
 	int newbsize, mbsize;
 	int i;
 
 	if ((bp->b_flags & B_VMIO) == 0) {
 		/*
 		 * Just get anonymous memory from the kernel
 		 */
 		mbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE;
 		newbsize = round_page(size);
 
 		if (newbsize == bp->b_bufsize) {
 			bp->b_bcount = size;
 			return 1;
 		} else if (newbsize < bp->b_bufsize) {
 			vm_hold_free_pages(
 			    bp,
 			    (vm_offset_t) bp->b_data + newbsize,
 			    (vm_offset_t) bp->b_data + bp->b_bufsize);
 			bufspace -= (bp->b_bufsize - newbsize);
 		} else if (newbsize > bp->b_bufsize) {
 			vm_hold_load_pages(
 			    bp,
 			    (vm_offset_t) bp->b_data + bp->b_bufsize,
 			    (vm_offset_t) bp->b_data + newbsize);
 			bufspace += (newbsize - bp->b_bufsize);
 		}
 	} else {
 		vm_page_t m;
 		int desiredpages;
 
 		newbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE;
 		desiredpages = round_page(newbsize) / PAGE_SIZE;
 
 		if (newbsize == bp->b_bufsize) {
 			bp->b_bcount = size;
 			return 1;
 		} else if (newbsize < bp->b_bufsize) {
 			if (desiredpages < bp->b_npages) {
 				pmap_qremove((vm_offset_t) trunc_page(bp->b_data) +
 				    desiredpages * PAGE_SIZE, (bp->b_npages - desiredpages));
 				for (i = desiredpages; i < bp->b_npages; i++) {
 					m = bp->b_pages[i];
 					s = splhigh();
 					while ((m->flags & PG_BUSY) || (m->busy != 0)) {
 						m->flags |= PG_WANTED;
 						tsleep(m, PVM, "biodep", 0);
 					}
 					splx(s);
 
 					if (m->bmapped == 0) {
 						printf("allocbuf: bmapped is zero for page %d\n", i);
 						panic("allocbuf: error");
 					}
 					--m->bmapped;
 					if (m->bmapped == 0) {
 						PAGE_WAKEUP(m);
 						if (m->valid == 0) {
-							pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
+							vm_page_protect(m, VM_PROT_NONE);
 							vm_page_free(m);
 						}
 					}
 					bp->b_pages[i] = NULL;
 				}
 				bp->b_npages = desiredpages;
 				bufspace -= (bp->b_bufsize - newbsize);
 			}
 		} else {
 			vm_object_t obj;
 			vm_offset_t tinc, off, toff, objoff;
 			int pageindex, curbpnpages;
 			struct vnode *vp;
 			int bsize;
 
 			vp = bp->b_vp;
 			bsize = vp->v_mount->mnt_stat.f_iosize;
 
 			if (bp->b_npages < desiredpages) {
 				obj = (vm_object_t) vp->v_vmdata;
 				tinc = PAGE_SIZE;
 				if (tinc > bsize)
 					tinc = bsize;
 				off = bp->b_lblkno * bsize;
 				curbpnpages = bp->b_npages;
 		doretry:
 				for (toff = 0; toff < newbsize; toff += tinc) {
 					int mask;
 					int bytesinpage;
 
 					pageindex = toff / PAGE_SIZE;
 					objoff = trunc_page(toff + off);
 					if (pageindex < curbpnpages) {
 						int pb;
 
 						m = bp->b_pages[pageindex];
 						if (m->offset != objoff)
 							panic("allocbuf: page changed offset??!!!?");
 						bytesinpage = tinc;
 						if (tinc > (newbsize - toff))
 							bytesinpage = newbsize - toff;
 						if (!vm_page_is_valid(m, toff + off, bytesinpage)) {
 							bp->b_flags &= ~B_CACHE;
 						}
 						if ((m->flags & PG_ACTIVE) == 0)
 							vm_page_activate(m);
 						continue;
 					}
 					m = vm_page_lookup(obj, objoff);
 					if (!m) {
 						m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL);
 						if (!m) {
 							int j;
 
 							for (j = bp->b_npages; j < pageindex; j++) {
 								vm_page_t mt = bp->b_pages[j];
 
 								PAGE_WAKEUP(mt);
 								if (mt->valid == 0 && mt->bmapped == 0) {
 									vm_page_free(mt);
 								}
 							}
 							VM_WAIT;
 							if (vmio && (bp->b_flags & B_PDWANTED)) {
 								bp->b_flags |= B_INVAL;
 								brelse(bp);
 								return 0;
 							}
 							curbpnpages = bp->b_npages;
 							goto doretry;
 						}
 						m->valid = 0;
 						vm_page_activate(m);
 					} else if ((m->valid == 0) || (m->flags & PG_BUSY)) {
 						int j;
 						int bufferdestroyed = 0;
 
 						for (j = bp->b_npages; j < pageindex; j++) {
 							vm_page_t mt = bp->b_pages[j];
 
 							PAGE_WAKEUP(mt);
 							if (mt->valid == 0 && mt->bmapped == 0) {
 								vm_page_free(mt);
 							}
 						}
 						if (vmio && (bp->b_flags & B_PDWANTED)) {
 							bp->b_flags |= B_INVAL;
 							brelse(bp);
 							VM_WAIT;
 							bufferdestroyed = 1;
 						}
 						s = splbio();
 						if (m->flags & PG_BUSY) {
 							m->flags |= PG_WANTED;
 							tsleep(m, PRIBIO, "pgtblk", 0);
 						} else if( m->valid == 0 && m->bmapped == 0) {
 							vm_page_free(m);
 						}
 						splx(s);
 						if (bufferdestroyed)
 							return 0;
 						curbpnpages = bp->b_npages;
 						goto doretry;
 					} else {
 						int pb;
 
 						if ((m->flags & PG_CACHE) &&
 						    (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) {
 							int j;
 
 							for (j = bp->b_npages; j < pageindex; j++) {
 								vm_page_t mt = bp->b_pages[j];
 
 								PAGE_WAKEUP(mt);
 								if (mt->valid == 0 && mt->bmapped == 0) {
 									vm_page_free(mt);
 								}
 							}
 							VM_WAIT;
 							if (vmio && (bp->b_flags & B_PDWANTED)) {
 								bp->b_flags |= B_INVAL;
 								brelse(bp);
 								return 0;
 							}
 							curbpnpages = bp->b_npages;
 							goto doretry;
 						}
 						bytesinpage = tinc;
 						if (tinc > (newbsize - toff))
 							bytesinpage = newbsize - toff;
 						if (!vm_page_is_valid(m, toff + off, bytesinpage)) {
 							bp->b_flags &= ~B_CACHE;
 						}
 						if ((m->flags & PG_ACTIVE) == 0)
 							vm_page_activate(m);
 						m->flags |= PG_BUSY;
 					}
 					bp->b_pages[pageindex] = m;
 					curbpnpages = pageindex + 1;
 				}
 				if (bsize >= PAGE_SIZE) {
 					for (i = bp->b_npages; i < curbpnpages; i++) {
 						m = bp->b_pages[i];
 						if (m->valid == 0) {
 							bp->b_flags &= ~B_CACHE;
 						}
 						m->bmapped++;
 						PAGE_WAKEUP(m);
 					}
 				} else {
 					if (!vm_page_is_valid(bp->b_pages[0], off, bsize))
 						bp->b_flags &= ~B_CACHE;
 					bp->b_pages[0]->bmapped++;
 					PAGE_WAKEUP(bp->b_pages[0]);
 				}
 				bp->b_npages = curbpnpages;
 				bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE;
 				pmap_qenter((vm_offset_t) bp->b_data, bp->b_pages, bp->b_npages);
 				bp->b_data += off % PAGE_SIZE;
 			}
 			bufspace += (newbsize - bp->b_bufsize);
 		}
 	}
 	bp->b_bufsize = newbsize;
 	bp->b_bcount = size;
 	return 1;
 }
 
 /*
  * Wait for buffer I/O completion, returning error status.
  */
 int
 biowait(register struct buf * bp)
 {
 	int s;
 
 	s = splbio();
 	while ((bp->b_flags & B_DONE) == 0)
 		tsleep((caddr_t) bp, PRIBIO, "biowait", 0);
 	if ((bp->b_flags & B_ERROR) || bp->b_error) {
 		if ((bp->b_flags & B_INVAL) == 0) {
 			bp->b_flags |= B_INVAL;
 			bp->b_dev = NODEV;
 			LIST_REMOVE(bp, b_hash);
 			LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 			wakeup((caddr_t) bp);
 		}
 		if (!bp->b_error)
 			bp->b_error = EIO;
 		else
 			bp->b_flags |= B_ERROR;
 		splx(s);
 		return (bp->b_error);
 	} else {
 		splx(s);
 		return (0);
 	}
 }
 
 /*
  * Finish I/O on a buffer, calling an optional function.
  * This is usually called from interrupt level, so process blocking
  * is not *a good idea*.
  */
 void
 biodone(register struct buf * bp)
 {
 	int s;
 
 	s = splbio();
-	if (bp->b_flags & B_DONE)
+	if (bp->b_flags & B_DONE) {
+		splx(s);
 		printf("biodone: buffer already done\n");
+		return;
+	}
 	bp->b_flags |= B_DONE;
 
 	if ((bp->b_flags & B_READ) == 0) {
 		struct vnode *vp = bp->b_vp;
 		vwakeup(bp);
 		if (vp && (vp->v_numoutput == (nbuf/4)) && (vp->v_flag & VBWAIT)) {
 			vp->v_flag &= ~VBWAIT;
 			wakeup((caddr_t) &vp->v_numoutput);
 		}
 	}
 #ifdef BOUNCE_BUFFERS
 	if (bp->b_flags & B_BOUNCE)
 		vm_bounce_free(bp);
 #endif
 
 	/* call optional completion function if requested */
 	if (bp->b_flags & B_CALL) {
 		bp->b_flags &= ~B_CALL;
 		(*bp->b_iodone) (bp);
 		splx(s);
 		return;
 	}
 	if (bp->b_flags & B_VMIO) {
 		int i, resid;
 		vm_offset_t foff;
 		vm_page_t m;
 		vm_object_t obj;
 		int iosize;
 		struct vnode *vp = bp->b_vp;
 
 		foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
 		obj = (vm_object_t) vp->v_vmdata;
 		if (!obj) {
 			return;
 		}
 #if defined(VFS_BIO_DEBUG)
 		if (obj->paging_in_progress < bp->b_npages) {
 			printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
 			    obj->paging_in_progress, bp->b_npages);
 		}
 #endif
 		iosize = bp->b_bufsize;
 		for (i = 0; i < bp->b_npages; i++) {
 			m = bp->b_pages[i];
 			if (m == bogus_page) {
 				m = vm_page_lookup(obj, foff);
 				if (!m) {
 #if defined(VFS_BIO_DEBUG)
 					printf("biodone: page disappeared\n");
 #endif
 					--obj->paging_in_progress;
 					continue;
 				}
 				bp->b_pages[i] = m;
 				pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
 			}
 #if defined(VFS_BIO_DEBUG)
 			if (trunc_page(foff) != m->offset) {
 				printf("biodone: foff(%d)/m->offset(%d) mismatch\n", foff, m->offset);
 			}
 #endif
 			resid = (m->offset + PAGE_SIZE) - foff;
 			if (resid > iosize)
 				resid = iosize;
 			if (resid > 0) {
 				vm_page_set_valid(m, foff, resid);
 				vm_page_set_clean(m, foff, resid);
 			}
 
 			/*
 			 * when debugging new filesystems or buffer I/O methods, this
 			 * is the most common error that pops up.  if you see this, you
 			 * have not set the page busy flag correctly!!!
 			 */
 			if (m->busy == 0) {
 				printf("biodone: page busy < 0, off: %d, foff: %d, resid: %d, index: %d\n",
 				    m->offset, foff, resid, i);
 				printf(" iosize: %d, lblkno: %d\n",
 				    bp->b_vp->v_mount->mnt_stat.f_iosize, bp->b_lblkno);
 				printf(" valid: 0x%x, dirty: 0x%x, mapped: %d\n",
 				    m->valid, m->dirty, m->bmapped);
 				panic("biodone: page busy < 0\n");
 			}
 			--m->busy;
 			PAGE_WAKEUP(m);
 			--obj->paging_in_progress;
 			foff += resid;
 			iosize -= resid;
 		}
 		if (obj && obj->paging_in_progress == 0 &&
 		    (obj->flags & OBJ_PIPWNT)) {
 			obj->flags &= ~OBJ_PIPWNT;
 			wakeup((caddr_t) obj);
 		}
 	}
 	/*
 	 * For asynchronous completions, release the buffer now. The brelse
 	 * checks for B_WANTED and will do the wakeup there if necessary - so
 	 * no need to do a wakeup here in the async case.
 	 */
 
 	if (bp->b_flags & B_ASYNC) {
 		brelse(bp);
 	} else {
 		bp->b_flags &= ~(B_WANTED | B_PDWANTED);
 		wakeup((caddr_t) bp);
 	}
 	splx(s);
 }
 
 int
 count_lock_queue()
 {
 	int count;
 	struct buf *bp;
 
 	count = 0;
 	for (bp = bufqueues[QUEUE_LOCKED].tqh_first;
 	    bp != NULL;
 	    bp = bp->b_freelist.tqe_next)
 		count++;
 	return (count);
 }
 
 int vfs_update_interval = 30;
 
 void
 vfs_update()
 {
 	(void) spl0();
 	while (1) {
 		tsleep((caddr_t) &vfs_update_wakeup, PRIBIO, "update",
 		    hz * vfs_update_interval);
 		vfs_update_wakeup = 0;
 		sync(curproc, NULL, NULL);
 	}
 }
 
 /*
  * This routine is called in lieu of iodone in the case of
  * incomplete I/O.  This keeps the busy status for pages
  * consistant.
  */
 void
 vfs_unbusy_pages(struct buf * bp)
 {
 	int i;
 
 	if (bp->b_flags & B_VMIO) {
 		struct vnode *vp = bp->b_vp;
 		vm_object_t obj = (vm_object_t) vp->v_vmdata;
 		vm_offset_t foff;
 
 		foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
 
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 
 			if (m == bogus_page) {
 				m = vm_page_lookup(obj, foff);
 				if (!m) {
 					panic("vfs_unbusy_pages: page missing\n");
 				}
 				bp->b_pages[i] = m;
 				pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
 			}
 			--obj->paging_in_progress;
 			--m->busy;
 			PAGE_WAKEUP(m);
 		}
 		if (obj->paging_in_progress == 0 &&
 		    (obj->flags & OBJ_PIPWNT)) {
 			obj->flags &= ~OBJ_PIPWNT;
 			wakeup((caddr_t) obj);
 		}
 	}
 }
 
 /*
  * This routine is called before a device strategy routine.
  * It is used to tell the VM system that paging I/O is in
  * progress, and treat the pages associated with the buffer
  * almost as being PG_BUSY.  Also the object paging_in_progress
  * flag is handled to make sure that the object doesn't become
  * inconsistant.
  */
 void
 vfs_busy_pages(struct buf * bp, int clear_modify)
 {
 	int i;
 
 	if (bp->b_flags & B_VMIO) {
 		vm_object_t obj = (vm_object_t) bp->b_vp->v_vmdata;
 		vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
 		int iocount = bp->b_bufsize;
 
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 			int resid = (m->offset + PAGE_SIZE) - foff;
 
 			if (resid > iocount)
 				resid = iocount;
 			obj->paging_in_progress++;
 			m->busy++;
 			if (clear_modify) {
 				vm_page_test_dirty(m);
-				pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_READ);
+				vm_page_protect(m, VM_PROT_READ);
 			} else if (bp->b_bcount >= PAGE_SIZE) {
 				if (m->valid && (bp->b_flags & B_CACHE) == 0) {
 					bp->b_pages[i] = bogus_page;
 					pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
 				}
 			}
 			foff += resid;
 			iocount -= resid;
 		}
 	}
 }
 
 /*
  * Tell the VM system that the pages associated with this buffer
  * are dirty.  This is in case of the unlikely circumstance that
  * a buffer has to be destroyed before it is flushed.
  */
 void
 vfs_dirty_pages(struct buf * bp)
 {
 	int i;
 
 	if (bp->b_flags & B_VMIO) {
 		vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
 		int iocount = bp->b_bufsize;
 
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 			int resid = (m->offset + PAGE_SIZE) - foff;
 
 			if (resid > iocount)
 				resid = iocount;
 			if (resid > 0) {
 				vm_page_set_valid(m, foff, resid);
 				vm_page_set_dirty(m, foff, resid);
 			}
 			PAGE_WAKEUP(m);
 			foff += resid;
 			iocount -= resid;
 		}
 	}
 }
 /*
  * vm_hold_load_pages and vm_hold_unload pages get pages into
  * a buffers address space.  The pages are anonymous and are
  * not associated with a file object.
  */
 void
 vm_hold_load_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa)
 {
 	vm_offset_t pg;
 	vm_page_t p;
 	vm_offset_t from = round_page(froma);
 	vm_offset_t to = round_page(toa);
 
 	for (pg = from; pg < to; pg += PAGE_SIZE) {
 
 tryagain:
 
 		p = vm_page_alloc(kernel_object, pg - VM_MIN_KERNEL_ADDRESS,
 		    VM_ALLOC_NORMAL);
 		if (!p) {
 			VM_WAIT;
 			goto tryagain;
 		}
 		vm_page_wire(p);
 		pmap_kenter(pg, VM_PAGE_TO_PHYS(p));
 		bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = p;
 		PAGE_WAKEUP(p);
 		bp->b_npages++;
 	}
 }
 
 void
 vm_hold_free_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa)
 {
 	vm_offset_t pg;
 	vm_page_t p;
 	vm_offset_t from = round_page(froma);
 	vm_offset_t to = round_page(toa);
 
 	for (pg = from; pg < to; pg += PAGE_SIZE) {
 		p = bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE];
 		bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = 0;
 		pmap_kremove(pg);
 		vm_page_free(p);
 		--bp->b_npages;
 	}
 }
 
 void
 bufstats()
 {
 }