Index: head/sys/alpha/alpha/pmap.c
===================================================================
--- head/sys/alpha/alpha/pmap.c	(revision 71349)
+++ head/sys/alpha/alpha/pmap.c	(revision 71350)
@@ -1,3170 +1,3170 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 1998 Doug Rabson
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
  *		with some ideas from NetBSD's alpha pmap
  * $FreeBSD$
  */
 
 /*
  *	Manages physical address maps.
  *
  *	In addition to hardware address maps, this
  *	module is called upon to provide software-use-only
  *	maps which may or may not be stored in the same
  *	form as hardware maps.  These pseudo-maps are
  *	used to store intermediate results from copy
  *	operations to and from address spaces.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 /*
  * Notes for alpha pmap.
  * 
  * On alpha, pm_pdeobj will hold lev1, lev2 and lev3 page tables.
  * Indices from 0 to NUSERLEV3MAPS-1 will map user lev3 page tables,
  * indices from NUSERLEV3MAPS to NUSERLEV3MAPS+NUSERLEV2MAPS-1 will
  * map user lev2 page tables and index NUSERLEV3MAPS+NUSERLEV2MAPS
  * will map the lev1 page table.  The lev1 table will self map at
  * address VADDR(PTLEV1I,0,0).
  * 
  * The vm_object kptobj holds the kernel page tables on i386 (62 or 63
  * of them, depending on whether the system is SMP).  On alpha, kptobj
  * will hold the lev3 and lev2 page tables for K1SEG.  Indices 0 to
  * NKLEV3MAPS-1 will map kernel lev3 page tables and indices
  * NKLEV3MAPS to NKLEV3MAPS+NKLEV2MAPS will map lev2 page tables. (XXX
  * should the kernel Lev1map be inserted into this object?).
  * 
  * pvtmmap is not needed for alpha since K0SEG maps all of physical
  * memory.
  * 
  * 
  * alpha virtual memory map:
  * 
  * 
  *  Address							Lev1 index
  * 
  * 	         	---------------------------------
  *  0000000000000000    | 				|	0
  * 		        |				|
  * 		        |				|
  * 		        |				|
  * 		        |				|
  * 		       ---      		       ---
  * 		                User space (USEG)
  * 		       ---      		       ---
  * 		        |				|
  * 		        |				|
  * 		        |				|
  * 		        |				|
  *  000003ffffffffff    |				|	511=UMAXLEV1I
  * 	                ---------------------------------
  *  fffffc0000000000    |				|	512=K0SEGLEV1I
  * 	                |	Kernel code/data/bss	|
  * 	                |				|
  * 	                |				|
  * 	                |				|
  * 	               ---			       ---
  * 	                	K0SEG
  * 	               ---			       ---
  * 	                |				|
  * 	                |	1-1 physical/virtual	|
  * 	                |				|
  * 	                |				|
  *  fffffdffffffffff    |				|
  * 	                ---------------------------------
  *  fffffe0000000000    |				|	768=K1SEGLEV1I
  * 	                |	Kernel dynamic data	|
  * 	                |				|
  * 	                |				|
  * 	                |				|
  * 	               ---			       ---
  * 	                	K1SEG
  * 	               ---	        	       ---
  * 	                |				|
  * 	                |	mapped by ptes		|
  * 	                |				|
  * 	                |				|
  *  fffffff7ffffffff    |				|
  * 	                ---------------------------------
  *  fffffffe00000000    | 				|	1023=PTLEV1I
  * 		        |	PTmap (pte self map)	|
  *  ffffffffffffffff	|				|
  * 			---------------------------------
  * 
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/msgbuf.h>
 #include <sys/vmmeter.h>
 #include <sys/mman.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_zone.h>
 
 #include <sys/user.h>
 
 #include <machine/md_var.h>
 #include <machine/rpb.h>
 #include <machine/smp.h>
 
 #ifndef PMAP_SHPGPERPROC
 #define PMAP_SHPGPERPROC 200
 #endif
 
 #if defined(DIAGNOSTIC)
 #define PMAP_DIAGNOSTIC
 #endif
 
 #define MINPV 2048
 
 #if 0
 #define PMAP_DIAGNOSTIC
 #define PMAP_DEBUG
 #endif
 
 #if !defined(PMAP_DIAGNOSTIC)
 #define PMAP_INLINE __inline
 #else
 #define PMAP_INLINE
 #endif
 
 #if 0
 
 static void
 pmap_break(void)
 {
 }
 
 /* #define PMAP_DEBUG_VA(va) if ((va) == 0x120058000) pmap_break(); else */
 
 #endif
 
 #ifndef PMAP_DEBUG_VA
 #define PMAP_DEBUG_VA(va) do {} while(0)
 #endif
 
 /*
  * Some macros for manipulating virtual addresses
  */
 #define ALPHA_L1SIZE		(1L << ALPHA_L1SHIFT)
 #define ALPHA_L2SIZE		(1L << ALPHA_L2SHIFT)
 
 #define alpha_l1trunc(va)	((va) & ~(ALPHA_L1SIZE-1))
 #define alpha_l2trunc(va)	((va) & ~(ALPHA_L2SIZE-1))
 
 /*
  * Get PDEs and PTEs for user/kernel address space
  */
 #define pmap_pte_w(pte)		((*(pte) & PG_W) != 0)
 #define pmap_pte_managed(pte)	((*(pte) & PG_MANAGED) != 0)
 #define pmap_pte_v(pte)		((*(pte) & PG_V) != 0)
 #define pmap_pte_pa(pte)	alpha_ptob(ALPHA_PTE_TO_PFN(*(pte)))
 #define pmap_pte_prot(pte)	(*(pte) & PG_PROT)
 
 #define pmap_pte_set_w(pte, v) ((v)?(*pte |= PG_W):(*pte &= ~PG_W))
 #define pmap_pte_set_prot(pte, v) ((*pte &= ~PG_PROT), (*pte |= (v)))
 
 /*
  * Given a map and a machine independent protection code,
  * convert to an alpha protection code.
  */
 #define pte_prot(m, p)		(protection_codes[m == pmap_kernel() ? 0 : 1][p])
 int	protection_codes[2][8];
 
 /*
  * Return non-zero if this pmap is currently active
  */
 #define pmap_isactive(pmap)	(pmap->pm_active)
 
 /* 
  * Extract level 1, 2 and 3 page table indices from a va
  */
 #define PTMASK	((1 << ALPHA_PTSHIFT) - 1)
 
 #define pmap_lev1_index(va)	(((va) >> ALPHA_L1SHIFT) & PTMASK)
 #define pmap_lev2_index(va)	(((va) >> ALPHA_L2SHIFT) & PTMASK)
 #define pmap_lev3_index(va)	(((va) >> ALPHA_L3SHIFT) & PTMASK)
 
 /*
  * Given a physical address, construct a pte
  */
 #define pmap_phys_to_pte(pa)	ALPHA_PTE_FROM_PFN(alpha_btop(pa))
 
 /*
  * Given a page frame number, construct a k0seg va
  */
 #define pmap_k0seg_to_pfn(va)	alpha_btop(ALPHA_K0SEG_TO_PHYS(va))
 
 /*
  * Given a pte, construct a k0seg va
  */
 #define pmap_k0seg_to_pte(va)	ALPHA_PTE_FROM_PFN(pmap_k0seg_to_pfn(va))
 
 /*
  * Lev1map:
  *
  *	Kernel level 1 page table.  This maps all kernel level 2
  *	page table pages, and is used as a template for all user
  *	pmap level 1 page tables.  When a new user level 1 page
  *	table is allocated, all Lev1map PTEs for kernel addresses
  *	are copied to the new map.
  *
  * Lev2map:
  *
  *	Initial set of kernel level 2 page table pages.  These
  *	map the kernel level 3 page table pages.  As kernel
  *	level 3 page table pages are added, more level 2 page
  *	table pages may be added to map them.  These pages are
  *	never freed.
  *
  * Lev3map:
  *
  *	Initial set of kernel level 3 page table pages.  These
  *	map pages in K1SEG.  More level 3 page table pages may
  *	be added at run-time if additional K1SEG address space
  *	is required.  These pages are never freed.
  *
  * Lev2mapsize:
  *
  *	Number of entries in the initial Lev2map.
  *
  * Lev3mapsize:
  *
  *	Number of entries in the initial Lev3map.
  *
  * NOTE: When mappings are inserted into the kernel pmap, all
  * level 2 and level 3 page table pages must already be allocated
  * and mapped into the parent page table.
  */
 pt_entry_t	*Lev1map, *Lev2map, *Lev3map;
 vm_size_t	Lev2mapsize, Lev3mapsize;
 
 /*
  * Statically allocated kernel pmap
  */
 static struct pmap kernel_pmap_store;
 pmap_t kernel_pmap;
 
 vm_offset_t avail_start;	/* PA of first available physical page */
 vm_offset_t avail_end;		/* PA of last available physical page */
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
 
 static vm_object_t kptobj;
 
 static int nklev3, nklev2;
 vm_offset_t kernel_vm_end;
 
 /*
  * Data for the ASN allocator
  */
 static int pmap_maxasn;
 static pmap_t pmap_active[MAXCPU];
 
 /*
  * Data for the pv entry allocation mechanism
  */
 static vm_zone_t pvzone;
 static struct vm_zone pvzone_store;
 static struct vm_object pvzone_obj;
 static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
 static int pmap_pagedaemon_waken = 0;
 static struct pv_entry *pvinit;
 
 static PMAP_INLINE void	free_pv_entry __P((pv_entry_t pv));
 static pv_entry_t get_pv_entry __P((void));
 static void	alpha_protection_init __P((void));
 static void	pmap_changebit __P((vm_page_t m, int bit, boolean_t setem));
 
 static void	pmap_remove_all __P((vm_page_t m));
 static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
 				      vm_page_t m, vm_page_t mpte));
 static int pmap_remove_pte __P((pmap_t pmap, pt_entry_t* ptq, vm_offset_t sva));
 static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
 static int pmap_remove_entry __P((struct pmap *pmap, vm_page_t m,
 					vm_offset_t va));
 static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
 		vm_page_t mpte, vm_page_t m));
 
 static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
 
 static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
 static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex));
 static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex));
 static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
 
 
 /*
  *	Routine:	pmap_lev1pte
  *	Function:
  *		Extract the level 1 page table entry associated
  *		with the given map/virtual_address pair.
  */
 static PMAP_INLINE pt_entry_t*
 pmap_lev1pte(pmap_t pmap, vm_offset_t va)
 {
 	if (!pmap)
 		return 0;
 	return &pmap->pm_lev1[pmap_lev1_index(va)];
 }
 
 /*
  *	Routine:	pmap_lev2pte
  *	Function:
  *		Extract the level 2 page table entry associated
  *		with the given map/virtual_address pair.
  */
 static PMAP_INLINE pt_entry_t*
 pmap_lev2pte(pmap_t pmap, vm_offset_t va)
 {
 	pt_entry_t* l1pte;
 	pt_entry_t* l2map;
 
 	l1pte = pmap_lev1pte(pmap, va);
 	if (!pmap_pte_v(l1pte))
 		return 0;
 
 	l2map = (pt_entry_t*) ALPHA_PHYS_TO_K0SEG(pmap_pte_pa(l1pte));
 	return &l2map[pmap_lev2_index(va)];
 }
 
 /*
  *	Routine:	pmap_lev3pte
  *	Function:
  *		Extract the level 3 page table entry associated
  *		with the given map/virtual_address pair.
  */
 static PMAP_INLINE pt_entry_t*
 pmap_lev3pte(pmap_t pmap, vm_offset_t va)
 {
 	pt_entry_t* l2pte;
 	pt_entry_t* l3map;
 
 	l2pte = pmap_lev2pte(pmap, va);
 	if (!l2pte || !pmap_pte_v(l2pte))
 		return 0;
 
 	l3map = (pt_entry_t*) ALPHA_PHYS_TO_K0SEG(pmap_pte_pa(l2pte));
 	return &l3map[pmap_lev3_index(va)];
 }
 
 vm_offset_t
 pmap_steal_memory(vm_size_t size)
 {
 	vm_size_t bank_size;
 	vm_offset_t pa, va;
 
 	size = round_page(size);
 
 	bank_size = phys_avail[1] - phys_avail[0];
 	while (size > bank_size) {
 		int i;
 		for (i = 0; phys_avail[i+2]; i+= 2) {
 			phys_avail[i] = phys_avail[i+2];
 			phys_avail[i+1] = phys_avail[i+3];
 		}
 		phys_avail[i] = 0;
 		phys_avail[i+1] = 0;
 		if (!phys_avail[0])
 			panic("pmap_steal_memory: out of memory");
 		bank_size = phys_avail[1] - phys_avail[0];
 	}
 
 	pa = phys_avail[0];
 	phys_avail[0] += size;
 
 	va = ALPHA_PHYS_TO_K0SEG(pa);
 	bzero((caddr_t) va, size);
 	return va;
 }
 
 extern pt_entry_t rom_pte;			/* XXX */
 extern int prom_mapped;				/* XXX */
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  */
 void
 pmap_bootstrap(vm_offset_t ptaddr, u_int maxasn)
 {
 	pt_entry_t newpte;
 	int i;
 
 	/*
 	 * Setup ASNs. PCPU_GET(next_asn) and PCPU_GET(current_asngen) are set
 	 * up already.
 	 */
 	pmap_maxasn = maxasn;
 
 	/*
 	 * Allocate a level 1 map for the kernel.
 	 */
 	Lev1map = (pt_entry_t*) pmap_steal_memory(PAGE_SIZE);
 
 	/*
 	 * Allocate a level 2 map for the kernel
 	 */
 	Lev2map = (pt_entry_t*) pmap_steal_memory(PAGE_SIZE);
 	Lev2mapsize = PAGE_SIZE;
 
 	/*
 	 * Allocate some level 3 maps for the kernel
 	 */
 	Lev3map = (pt_entry_t*) pmap_steal_memory(PAGE_SIZE*NKPT);
 	Lev3mapsize = NKPT * PAGE_SIZE;
 
 	/* Map all of the level 2 maps */
 	for (i = 0; i < howmany(Lev2mapsize, PAGE_SIZE); i++) {
 		unsigned long pfn =
 			pmap_k0seg_to_pfn((vm_offset_t) Lev2map) + i;
 		newpte = ALPHA_PTE_FROM_PFN(pfn);
 		newpte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_W;
 		Lev1map[K1SEGLEV1I + i] = newpte;
 	}
 
 
 	/* Setup the mapping for the prom console */
 	{
 
 		if (pmap_uses_prom_console()) {
 			/* XXX save old pte so that we can remap prom if necessary */
 			rom_pte = *(pt_entry_t *)ptaddr & ~PG_ASM;	/* XXX */
 		}
 		prom_mapped = 0;
 
 		/*
 		 * Actually, this code lies.  The prom is still mapped, and will
 		 * remain so until the context switch after alpha_init() returns.
 		 * Printfs using the firmware before then will end up frobbing
 		 * Lev1map unnecessarily, but that's OK.
 		 */
 	}
 
 	/*
 	 * Level 1 self mapping.
 	 *
 	 * Don't set PG_ASM since the self-mapping is different for each
 	 * address space.
 	 */
 	newpte = pmap_k0seg_to_pte((vm_offset_t) Lev1map);
 	newpte |= PG_V | PG_KRE | PG_KWE;
 	Lev1map[PTLEV1I] = newpte;
 
 	/* Map all of the level 3 maps */
 	for (i = 0; i < howmany(Lev3mapsize, PAGE_SIZE); i++) {
 		unsigned long pfn =
 			pmap_k0seg_to_pfn((vm_offset_t) Lev3map) + i;
 		newpte = ALPHA_PTE_FROM_PFN(pfn);
 		newpte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_W;
 		Lev2map[i] = newpte;
 	}
 
 	avail_start = phys_avail[0];
 	for (i = 0; phys_avail[i+2]; i+= 2) ;
 	avail_end = phys_avail[i+1];
 
 	virtual_avail = VM_MIN_KERNEL_ADDRESS;
 	virtual_end = VPTBASE;
 
 	/*
 	 * Initialize protection array.
 	 */
 	alpha_protection_init();
 
 	/*
 	 * The kernel's pmap is statically allocated so we don't have to use
 	 * pmap_create, which is unlikely to work correctly at this part of
 	 * the boot sequence (XXX and which no longer exists).
 	 */
 	kernel_pmap = &kernel_pmap_store;
 	kernel_pmap->pm_lev1 = Lev1map;
 	kernel_pmap->pm_count = 1;
 	kernel_pmap->pm_active = ~0;
 	kernel_pmap->pm_asn[alpha_pal_whami()].asn = 0;
 	kernel_pmap->pm_asn[alpha_pal_whami()].gen = 1;
 	TAILQ_INIT(&kernel_pmap->pm_pvlist);
 	nklev3 = NKPT;
 	nklev2 = 1;
 
 	/*
 	 * Set up proc0's PCB such that the ptbr points to the right place
 	 * and has the kernel pmap's.
 	 */
 	proc0.p_addr->u_pcb.pcb_hw.apcb_ptbr =
 	    ALPHA_K0SEG_TO_PHYS((vm_offset_t)Lev1map) >> PAGE_SHIFT;
 	proc0.p_addr->u_pcb.pcb_hw.apcb_asn = 0;
 }
 
 int
 pmap_uses_prom_console()
 {
 	int cputype;
 
 	cputype = hwrpb->rpb_type;
 	return (cputype == ST_DEC_21000 || ST_DEC_4100
 	     || cputype == ST_DEC_3000_300
 	     || cputype == ST_DEC_3000_500);
 
 	return 0;
 }
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  *	pmap_init has been enhanced to support in a fairly consistant
  *	way, discontiguous physical memory.
  */
 void
 pmap_init(phys_start, phys_end)
 	vm_offset_t phys_start, phys_end;
 {
 	int i;
 	int initial_pvs;
 
 	/*
 	 * Allocate memory for random pmap data structures.  Includes the
 	 * pv_head_table.
 	 */
 
 	for(i = 0; i < vm_page_array_size; i++) {
 		vm_page_t m;
 
 		m = &vm_page_array[i];
 		TAILQ_INIT(&m->md.pv_list);
 		m->md.pv_list_count = 0;
 		m->md.pv_flags = 0;
  	}
 
 	/*
 	 * init the pv free list
 	 */
 	initial_pvs = vm_page_array_size;
 	if (initial_pvs < MINPV)
 		initial_pvs = MINPV;
 	pvzone = &pvzone_store;
 	pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
 		initial_pvs * sizeof (struct pv_entry));
 	zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit,
 	    vm_page_array_size);
 	/*
 	 * object for kernel page table pages
 	 */
 	kptobj = vm_object_allocate(OBJT_DEFAULT, NKLEV3MAPS + NKLEV2MAPS);
 
 	/*
 	 * Now it is safe to enable pv_table recording.
 	 */
 	pmap_initialized = TRUE;
 }
 
 /*
  * Initialize the address space (zone) for the pv_entries.  Set a
  * high water mark so that the system can recover from excessive
  * numbers of pv entries.
  */
 void
 pmap_init2()
 {
 	pv_entry_max = PMAP_SHPGPERPROC * maxproc + vm_page_array_size;
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
 }
 
 
 /***************************************************
  * Manipulate TLBs for a pmap
  ***************************************************/
 
 static void
 pmap_invalidate_asn(pmap_t pmap)
 {
 	pmap->pm_asn[PCPU_GET(cpuno)].gen = 0;
 }
 
 struct pmap_invalidate_page_arg {
 	pmap_t pmap;
 	vm_offset_t va;
 };
 
 static void
 pmap_invalidate_page_action(void *arg)
 {
 	pmap_t pmap = ((struct pmap_invalidate_page_arg *) arg)->pmap;
 	vm_offset_t va = ((struct pmap_invalidate_page_arg *) arg)->va;
 
 	if (pmap->pm_active & (1 << PCPU_GET(cpuno))) {
 		ALPHA_TBIS(va);
 		alpha_pal_imb();		/* XXX overkill? */
 	} else {
 		pmap_invalidate_asn(pmap);
 	}
 }
 
 static void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 	struct pmap_invalidate_page_arg arg;
 	arg.pmap = pmap;
 	arg.va = va;
 	smp_rendezvous(0, pmap_invalidate_page_action, 0, (void *) &arg);
 }
 
 static void
 pmap_invalidate_all_action(void *arg)
 {
 	pmap_t pmap = (pmap_t) arg;
 
 	if (pmap->pm_active & (1 << PCPU_GET(cpuno))) {
 		ALPHA_TBIA();
 		alpha_pal_imb();		/* XXX overkill? */
 	} else
 		pmap_invalidate_asn(pmap);
 }
 
 static void
 pmap_invalidate_all(pmap_t pmap)
 {
 	smp_rendezvous(0, pmap_invalidate_all_action, 0, (void *) pmap);
 }
 
 static void
 pmap_get_asn(pmap_t pmap)
 {
 	if (pmap->pm_asn[PCPU_GET(cpuno)].gen != PCPU_GET(current_asngen)) {
 		if (PCPU_GET(next_asn) > pmap_maxasn) {
 			/*
 			 * Start a new ASN generation.
 			 *
 			 * Invalidate all per-process mappings and I-cache
 			 */
 			PCPU_GET(next_asn) = 0;
 			PCPU_GET(current_asngen)++;
 			PCPU_GET(current_asngen) &= (1 << 24) - 1;
 
 			if (PCPU_GET(current_asngen) == 0) {
 				/*
 				 * Clear the pm_asn[].gen of all pmaps.
 				 * This is safe since it is only called from
 				 * pmap_activate after it has deactivated
 				 * the old pmap and it only affects this cpu.
 				 */
 				struct proc *p;
 				pmap_t tpmap;
 			       
 #ifdef PMAP_DIAGNOSTIC
 				printf("pmap_get_asn: generation rollover\n");
 #endif
 				PCPU_GET(current_asngen) = 1;
 				ALLPROC_LOCK(AP_SHARED);
 				LIST_FOREACH(p, &allproc, p_list) {
 					if (p->p_vmspace) {
 						tpmap = vmspace_pmap(p->p_vmspace);
 						tpmap->pm_asn[PCPU_GET(cpuno)].gen = 0;
 					}
 				}
 				ALLPROC_LOCK(AP_RELEASE);
 			}
 
 			/*
 			 * Since we are about to start re-using ASNs, we must
 			 * clear out the TLB and the I-cache since they are tagged
 			 * with the ASN.
 			 */
 			ALPHA_TBIAP();
 			alpha_pal_imb();	/* XXX overkill? */
 		}
 		pmap->pm_asn[PCPU_GET(cpuno)].asn = PCPU_GET(next_asn)++;
 		pmap->pm_asn[PCPU_GET(cpuno)].gen = PCPU_GET(current_asngen);
 	}
 }
 
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
 
 
 
 /*
  * this routine defines the region(s) of memory that should
  * not be tested for the modified bit.
  */
 static PMAP_INLINE int
 pmap_track_modified(vm_offset_t va)
 {
 	if ((va < clean_sva) || (va >= clean_eva)) 
 		return 1;
 	else
 		return 0;
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_offset_t 
 pmap_extract(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	pt_entry_t* pte = pmap_lev3pte(pmap, va);
 	if (pte)
 		return alpha_ptob(ALPHA_PTE_TO_PFN(*pte));
 	else
 		return 0;
 }
 
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  */
 void
 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
 {
 	int i;
 	pt_entry_t *pte;
 
 	for (i = 0; i < count; i++) {
 		vm_offset_t tva = va + i * PAGE_SIZE;
 		pt_entry_t npte = pmap_phys_to_pte(VM_PAGE_TO_PHYS(m[i]))
 			| PG_ASM | PG_KRE | PG_KWE | PG_V;
 		pt_entry_t opte;
 		pte = vtopte(tva);
 		opte = *pte;
 		PMAP_DEBUG_VA(va);
 		*pte = npte;
 		if (opte)
 			pmap_invalidate_page(kernel_pmap, tva);
 	}
 }
 
 /*
  * this routine jerks page mappings from the
  * kernel -- it is meant only for temporary mappings.
  */
 void
 pmap_qremove(va, count)
 	vm_offset_t va;
 	int count;
 {
 	int i;
 	register pt_entry_t *pte;
 
 	for (i = 0; i < count; i++) {
 		pte = vtopte(va);
 		PMAP_DEBUG_VA(va);
 		*pte = 0;
 		pmap_invalidate_page(kernel_pmap, va);
 		va += PAGE_SIZE;
 	}
 }
 
 /*
  * add a wired page to the kva
  * note that in order for the mapping to take effect -- you
  * should do a invltlb after doing the pmap_kenter...
  */
 PMAP_INLINE void 
 pmap_kenter(vm_offset_t va, vm_offset_t pa)
 {
 	pt_entry_t *pte;
 	pt_entry_t npte, opte;
 
 	npte = pmap_phys_to_pte(pa) | PG_ASM | PG_KRE | PG_KWE | PG_V;
 	pte = vtopte(va);
 	opte = *pte;
 	PMAP_DEBUG_VA(va);
 	*pte = npte;
 	if (opte)
 		pmap_invalidate_page(kernel_pmap, va);
 }
 
 /*
  * remove a page from the kernel pagetables
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
 	register pt_entry_t *pte;
 
 	pte = vtopte(va);
 	PMAP_DEBUG_VA(va);
 	*pte = 0;
 	pmap_invalidate_page(kernel_pmap, va);
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	For now, VM is already on, we only need to map the
  *	specified memory.
  */
 vm_offset_t
 pmap_map(vm_offset_t virt, vm_offset_t start, vm_offset_t end, int prot)
 {
 	while (start < end) {
 		pmap_kenter(virt, start);
 		virt += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
 	return (virt);
 }
 
 
 static vm_page_t
 pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
 {
 	vm_page_t m;
 retry:
 	m = vm_page_lookup(object, pindex);
 	if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
 		goto retry;
 	return m;
 }
 
 /*
  * Create the UPAGES for a new process.
  * This routine directly affects the fork perf for a process.
  */
 void
 pmap_new_proc(struct proc *p)
 {
 	int i;
 	vm_object_t upobj;
 	vm_page_t m;
 	struct user *up;
 	pt_entry_t *ptek, oldpte;
 
 	/*
 	 * allocate object for the upages
 	 */
 	if ((upobj = p->p_upages_obj) == NULL) {
 		upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES);
 		p->p_upages_obj = upobj;
 	}
 
 	/* get a kernel virtual address for the UPAGES for this proc */
 	if ((up = p->p_addr) == NULL) {
 		up = (struct user *) kmem_alloc_nofault(kernel_map,
 				UPAGES * PAGE_SIZE);
 		if (up == NULL)
 			panic("pmap_new_proc: u_map allocation failed");
 		p->p_addr = up;
 	}
 
 	ptek = vtopte((vm_offset_t) up);
 
 	for(i=0;i<UPAGES;i++) {
 		/*
 		 * Get a kernel stack page
 		 */
 		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
 		/*
 		 * Wire the page
 		 */
 		m->wire_count++;
 		cnt.v_wire_count++;
 
 		oldpte = *(ptek + i);
 		/*
 		 * Enter the page into the kernel address space.
 		 */
 		*(ptek + i) = pmap_phys_to_pte(VM_PAGE_TO_PHYS(m))
 			| PG_ASM | PG_KRE | PG_KWE | PG_V;
 		if (oldpte) 
 			pmap_invalidate_page(kernel_pmap,
 					     (vm_offset_t)up + i * PAGE_SIZE);
 
 		vm_page_wakeup(m);
 		vm_page_flag_clear(m, PG_ZERO);
 		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
 		m->valid = VM_PAGE_BITS_ALL;
 	}
 }
 
 /*
  * Dispose the UPAGES for a process that has exited.
  * This routine directly impacts the exit perf of a process.
  */
 void
 pmap_dispose_proc(p)
 	struct proc *p;
 {
 	int i;
 	vm_object_t upobj;
 	vm_page_t m;
 	pt_entry_t *ptek, oldpte;
 
 	upobj = p->p_upages_obj;
 
 	ptek = vtopte((vm_offset_t) p->p_addr);
 	for(i=0;i<UPAGES;i++) {
 
 		if ((m = vm_page_lookup(upobj, i)) == NULL)
 			panic("pmap_dispose_proc: upage already missing???");
 
 		vm_page_busy(m);
 
 		oldpte = *(ptek + i);
 		*(ptek + i) = 0;
 		pmap_invalidate_page(kernel_pmap, 
 				     (vm_offset_t)p->p_addr + i * PAGE_SIZE);
 		vm_page_unwire(m, 0);
 		vm_page_free(m);
 	}
 }
 
 /*
  * Allow the UPAGES for a process to be prejudicially paged out.
  */
 void
 pmap_swapout_proc(p)
 	struct proc *p;
 {
 	int i;
 	vm_object_t upobj;
 	vm_page_t m;
 
 	/*
 	 * Make sure we aren't fpcurproc.
 	 */
 	alpha_fpstate_save(p, 1);
 
 	upobj = p->p_upages_obj;
 	/*
 	 * let the upages be paged
 	 */
 	for(i=0;i<UPAGES;i++) {
 		if ((m = vm_page_lookup(upobj, i)) == NULL)
 			panic("pmap_swapout_proc: upage already missing???");
 		vm_page_dirty(m);
 		vm_page_unwire(m, 0);
 		pmap_kremove((vm_offset_t)p->p_addr + PAGE_SIZE * i);
 	}
 }
 
 /*
  * Bring the UPAGES for a specified process back in.
  */
 void
 pmap_swapin_proc(p)
 	struct proc *p;
 {
 	int i,rv;
 	vm_object_t upobj;
 	vm_page_t m;
 
 	upobj = p->p_upages_obj;
 	for(i=0;i<UPAGES;i++) {
 
 		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
 		pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
 			VM_PAGE_TO_PHYS(m));
 
 		if (m->valid != VM_PAGE_BITS_ALL) {
 			rv = vm_pager_get_pages(upobj, &m, 1, 0);
 			if (rv != VM_PAGER_OK)
 				panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
 			m = vm_page_lookup(upobj, i);
 			m->valid = VM_PAGE_BITS_ALL;
 		}
 
 		vm_page_wire(m);
 		vm_page_wakeup(m);
 		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
 	}
 
 	/*
 	 * The pcb may be at a different physical address now so cache the
 	 * new address.
 	 */
 	p->p_md.md_pcbpaddr = (void*) vtophys((vm_offset_t) &p->p_addr->u_pcb);
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 
 /*
  * This routine unholds page table pages, and if the hold count
  * drops to zero, then it decrements the wire count.
  */
 static int 
 _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 
 	while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
 		;
 
 	if (m->hold_count == 0) {
 		vm_offset_t pteva;
 		pt_entry_t* pte;
 
 		/*
 		 * unmap the page table page
 		 */
 		if (m->pindex >= NUSERLEV3MAPS) {
 			/* Level 2 page table */
 			pte = pmap_lev1pte(pmap, va);
 			pteva = (vm_offset_t) PTlev2 + alpha_ptob(m->pindex - NUSERLEV3MAPS);
 		} else {
 			/* Level 3 page table */
 			pte = pmap_lev2pte(pmap, va);
 			pteva = (vm_offset_t) PTmap + alpha_ptob(m->pindex);
 		}
 
 		*pte = 0;
 
 		if (m->pindex < NUSERLEV3MAPS) {
 			/* unhold the level 2 page table */
 			vm_page_t lev2pg;
 			lev2pg = pmap_page_lookup(pmap->pm_pteobj,
 						  NUSERLEV3MAPS + pmap_lev1_index(va));
 			vm_page_unhold(lev2pg);
 			if (lev2pg->hold_count == 0)
 				_pmap_unwire_pte_hold(pmap, va, lev2pg);
 		}
 
 		--pmap->pm_stats.resident_count;
 		/*
 		 * Do a invltlb to make the invalidated mapping
 		 * take effect immediately.
 		 */
 		pmap_invalidate_page(pmap, pteva);
 
 		if (pmap->pm_ptphint == m)
 			pmap->pm_ptphint = NULL;
 
 		/*
 		 * If the page is finally unwired, simply free it.
 		 */
 		--m->wire_count;
 		if (m->wire_count == 0) {
 
 			if (m->flags & PG_WANTED) {
 				vm_page_flag_clear(m, PG_WANTED);
 				wakeup(m);
 			}
 
 			vm_page_busy(m);
 			vm_page_free_zero(m);
 			--cnt.v_wire_count;
 		}
 		return 1;
 	}
 	return 0;
 }
 
 static PMAP_INLINE int
 pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	vm_page_unhold(m);
 	if (m->hold_count == 0)
 		return _pmap_unwire_pte_hold(pmap, va, m);
 	else
 		return 0;
 }
 
 /*
  * After removing a page table entry, this routine is used to
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
 {
 	unsigned ptepindex;
 	if (va >= VM_MAXUSER_ADDRESS)
 		return 0;
 
 	if (mpte == NULL) {
 		ptepindex = (va >> ALPHA_L2SHIFT);
 		if (pmap->pm_ptphint &&
 			(pmap->pm_ptphint->pindex == ptepindex)) {
 			mpte = pmap->pm_ptphint;
 		} else {
 			mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 			pmap->pm_ptphint = mpte;
 		}
 	}
 
 	return pmap_unwire_pte_hold(pmap, va, mpte);
 }
 
 void
 pmap_pinit0(pmap)
 	struct pmap *pmap;
 {
 	int i;
 
 	pmap->pm_lev1 = Lev1map;
 	pmap->pm_flags = 0;
 	pmap->pm_count = 1;
 	pmap->pm_ptphint = NULL;
 	pmap->pm_active = 0;
 	for (i = 0; i < MAXCPU; i++) {
 		pmap->pm_asn[i].asn = 0;
 		pmap->pm_asn[i].gen = 0;
 	}
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 void
 pmap_pinit(pmap)
 	register struct pmap *pmap;
 {
 	vm_page_t lev1pg;
 	int i;
 
 	/*
 	 * allocate object for the ptes
 	 */
 	if (pmap->pm_pteobj == NULL)
 		pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, NUSERLEV3MAPS + NUSERLEV2MAPS + 1);
 
 	/*
 	 * allocate the page directory page
 	 */
 	lev1pg = vm_page_grab(pmap->pm_pteobj, NUSERLEV3MAPS + NUSERLEV2MAPS,
 			      VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
 	lev1pg->wire_count = 1;
 	++cnt.v_wire_count;
 
 	vm_page_flag_clear(lev1pg, PG_MAPPED | PG_BUSY);	/* not mapped normally */
 	lev1pg->valid = VM_PAGE_BITS_ALL;
 
 	pmap->pm_lev1 = (pt_entry_t*) ALPHA_PHYS_TO_K0SEG(VM_PAGE_TO_PHYS(lev1pg));
 	if ((lev1pg->flags & PG_ZERO) == 0)
 		bzero(pmap->pm_lev1, PAGE_SIZE);
 
 
 	/* install self-referential address mapping entry (not PG_ASM) */
 	pmap->pm_lev1[PTLEV1I] = pmap_phys_to_pte(VM_PAGE_TO_PHYS(lev1pg))
 		| PG_V | PG_KRE | PG_KWE;
 
 	pmap->pm_flags = 0;
 	pmap->pm_count = 1;
 	pmap->pm_ptphint = NULL;
 	pmap->pm_active = 0;
 	for (i = 0; i < MAXCPU; i++) {
 		pmap->pm_asn[i].asn = 0;
 		pmap->pm_asn[i].gen = 0;
 	}
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 }
 
 /*
  * Wire in kernel global address entries.  To avoid a race condition
  * between pmap initialization and pmap_growkernel, this procedure
  * should be called after the vmspace is attached to the process
  * but before this pmap is activated.
  */
 void
 pmap_pinit2(pmap)
 	struct pmap *pmap;
 {
 	bcopy(PTlev1 + K1SEGLEV1I, pmap->pm_lev1 + K1SEGLEV1I, nklev2 * PTESIZE);
 }
 
 static int
 pmap_release_free_page(pmap_t pmap, vm_page_t p)
 {
 	pt_entry_t* pte;
 	pt_entry_t* l2map;
 
 	if (p->pindex >= NUSERLEV3MAPS + NUSERLEV2MAPS)
 		/* level 1 page table */
 		pte = &pmap->pm_lev1[PTLEV1I];
 	else if (p->pindex >= NUSERLEV3MAPS)
 		/* level 2 page table */
 		pte = &pmap->pm_lev1[p->pindex - NUSERLEV3MAPS];
 	else {
 		/* level 3 page table */
 		pte = &pmap->pm_lev1[p->pindex >> ALPHA_PTSHIFT];
 		l2map = (pt_entry_t*) ALPHA_PHYS_TO_K0SEG(pmap_pte_pa(pte));
 		pte = &l2map[p->pindex & ((1 << ALPHA_PTSHIFT) - 1)];
 	}
 
 	/*
 	 * This code optimizes the case of freeing non-busy
 	 * page-table pages.  Those pages are zero now, and
 	 * might as well be placed directly into the zero queue.
 	 */
 	if (vm_page_sleep_busy(p, FALSE, "pmaprl"))
 		return 0;
 
 	vm_page_busy(p);
 
 	/*
 	 * Remove the page table page from the processes address space.
 	 */
 	*pte = 0;
 	pmap->pm_stats.resident_count--;
 
 #ifdef PMAP_DEBUG
 	if (p->hold_count)  {
 		panic("pmap_release: freeing held page table page");
 	}
 #endif
 	/*
 	 * Level1  pages need to have the kernel
 	 * stuff cleared, so they can go into the zero queue also.
 	 */
 	if (p->pindex == NUSERLEV3MAPS + NUSERLEV2MAPS)
 		bzero(pmap->pm_lev1 + K1SEGLEV1I, nklev2 * PTESIZE);
 
 	if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
 		pmap->pm_ptphint = NULL;
 
 #ifdef PMAP_DEBUG
 	{
 	    u_long *lp = (u_long*) ALPHA_PHYS_TO_K0SEG(VM_PAGE_TO_PHYS(p));
 	    u_long *ep = (u_long*) ((char*) lp + PAGE_SIZE);
 	    for (; lp < ep; lp++)
 		if (*lp != 0)
 		    panic("pmap_release_free_page: page not zero");
 	}
 #endif
 
 	p->wire_count--;
 	cnt.v_wire_count--;
 	vm_page_free_zero(p);
 	return 1;
 }
 
 /*
  * this routine is called if the page table page is not
  * mapped correctly.
  */
 static vm_page_t
 _pmap_allocpte(pmap, ptepindex)
 	pmap_t	pmap;
 	unsigned ptepindex;
 {
 	pt_entry_t* pte;
 	vm_offset_t ptepa;
 	vm_page_t m;
 
 	/*
 	 * Find or fabricate a new pagetable page
 	 */
 	m = vm_page_grab(pmap->pm_pteobj, ptepindex,
 			VM_ALLOC_ZERO | VM_ALLOC_RETRY);
 
 	KASSERT(m->queue == PQ_NONE,
 		("_pmap_allocpte: %p->queue != PQ_NONE", m));
 
 	if (m->wire_count == 0)
 		cnt.v_wire_count++;
 	m->wire_count++;
 
 	/*
 	 * Increment the hold count for the page table page
 	 * (denoting a new mapping.)
 	 */
 	m->hold_count++;
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	pmap->pm_stats.resident_count++;
 
 	ptepa = VM_PAGE_TO_PHYS(m);
 
 	if (ptepindex >= NUSERLEV3MAPS) {
 		pte = &pmap->pm_lev1[ptepindex - NUSERLEV3MAPS];
 	} else {
 		int l1index = ptepindex >> ALPHA_PTSHIFT;
 		pt_entry_t* l1pte = &pmap->pm_lev1[l1index];
 		pt_entry_t* l2map;
 		if (!pmap_pte_v(l1pte))
 			_pmap_allocpte(pmap, NUSERLEV3MAPS + l1index);
 		else {
 			vm_page_t l2page =
 				pmap_page_lookup(pmap->pm_pteobj,
 						 NUSERLEV3MAPS + l1index);
 			l2page->hold_count++;
 		}
 		l2map = (pt_entry_t*) ALPHA_PHYS_TO_K0SEG(pmap_pte_pa(l1pte));
 		pte = &l2map[ptepindex & ((1 << ALPHA_PTSHIFT) - 1)];
 	}
 
 	*pte = pmap_phys_to_pte(ptepa) | PG_KRE | PG_KWE | PG_V;
 
 	/*
 	 * Set the page table hint
 	 */
 	pmap->pm_ptphint = m;
 
 	if ((m->flags & PG_ZERO) == 0)
 		bzero((caddr_t) ALPHA_PHYS_TO_K0SEG(ptepa), PAGE_SIZE);
 
 	m->valid = VM_PAGE_BITS_ALL;
 	vm_page_flag_clear(m, PG_ZERO | PG_BUSY);
 	vm_page_flag_set(m, PG_MAPPED);
 
 	return m;
 }
 
 static vm_page_t
 pmap_allocpte(pmap_t pmap, vm_offset_t va)
 {
 	unsigned ptepindex;
 	pt_entry_t* lev2pte;
 	vm_page_t m;
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = va >> (PAGE_SHIFT + ALPHA_PTSHIFT);
 
 	/*
 	 * Get the level2 entry
 	 */
 	lev2pte = pmap_lev2pte(pmap, va);
 
 	/*
 	 * If the page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (lev2pte && pmap_pte_v(lev2pte)) {
 		/*
 		 * In order to get the page table page, try the
 		 * hint first.
 		 */
 		if (pmap->pm_ptphint &&
 			(pmap->pm_ptphint->pindex == ptepindex)) {
 			m = pmap->pm_ptphint;
 		} else {
 			m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 			pmap->pm_ptphint = m;
 		}
 		m->hold_count++;
 		return m;
 	}
 	/*
 	 * Here if the pte page isn't mapped, or if it has been deallocated.
 	 */
 	return _pmap_allocpte(pmap, ptepindex);
 }
 
 
 /***************************************************
 * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pmap)
 {
 	vm_page_t p,n,lev1pg;
 	vm_object_t object = pmap->pm_pteobj;
 	int curgeneration;
 
 #if defined(DIAGNOSTIC)
 	if (object->ref_count != 1)
 		panic("pmap_release: pteobj reference count != 1");
 #endif
 	
 	lev1pg = NULL;
 retry:
 	curgeneration = object->generation;
 	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
 		n = TAILQ_NEXT(p, listq);
 		if (p->pindex >= NUSERLEV3MAPS) {
 			continue;
 		}
 		while (1) {
 			if (!pmap_release_free_page(pmap, p) &&
 				(object->generation != curgeneration))
 				goto retry;
 		}
 	}
 	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
 		n = TAILQ_NEXT(p, listq);
 		if (p->pindex < NUSERLEV3MAPS) {
 			/* can this happen?  maybe panic */
 			goto retry;
 		}
 		if (p->pindex >= NUSERLEV3MAPS + NUSERLEV2MAPS) {
 			lev1pg = p;
 			continue;
 		}
 		while (1) {
 			if (!pmap_release_free_page(pmap, p) &&
 				(object->generation != curgeneration))
 				goto retry;
 		}
 	}
 
 	if (lev1pg && !pmap_release_free_page(pmap, lev1pg))
 		goto retry;
 }
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	/* XXX come back to this */
 	struct proc *p;
 	struct pmap *pmap;
 	int s;
 	pt_entry_t* pte;
 	pt_entry_t newlev1, newlev2;
 	vm_offset_t pa;
 	vm_page_t nkpg;
 
 	s = splhigh();
 
 	if (kernel_vm_end == 0) {
 		kernel_vm_end = VM_MIN_KERNEL_ADDRESS;;
 
 		/* Count the level 2 page tables */
 		nklev2 = 0;
 		nklev3 = 0;
 		while (pmap_pte_v(pmap_lev1pte(kernel_pmap, kernel_vm_end))) {
 			nklev2++;
 			nklev3 += (1L << ALPHA_PTSHIFT);
 			kernel_vm_end += ALPHA_L1SIZE;
 		}
 			
 		/* Count the level 3 page tables in the last level 2 page table */
 		kernel_vm_end -= ALPHA_L1SIZE;
 		nklev3 -= (1 << ALPHA_PTSHIFT);
 		while (pmap_pte_v(pmap_lev2pte(kernel_pmap, kernel_vm_end))) {
 			nklev3++;
 			kernel_vm_end += ALPHA_L2SIZE;
 		}
 	}
 
 	addr = (addr + ALPHA_L2SIZE) & ~(ALPHA_L2SIZE - 1);
 	while (kernel_vm_end < addr) {
 		/*
 		 * If the level 1 pte is invalid, allocate a new level 2 page table
 		 */
 		pte = pmap_lev1pte(kernel_pmap, kernel_vm_end);
 		if (!pmap_pte_v(pte)) {
 			int pindex = NKLEV3MAPS + pmap_lev1_index(kernel_vm_end) - K1SEGLEV1I;
 
 			nkpg = vm_page_alloc(kptobj, pindex, VM_ALLOC_SYSTEM);
 			if (!nkpg)
 				panic("pmap_growkernel: no memory to grow kernel");
 			printf("pmap_growkernel: growing to %lx\n", addr);
 			printf("pmap_growkernel: adding new level2 page table\n");
 
 			nklev2++;
 			vm_page_wire(nkpg);
 			pa = VM_PAGE_TO_PHYS(nkpg);
 			pmap_zero_page(pa);
 
 			newlev1 = pmap_phys_to_pte(pa)
 				| PG_V | PG_ASM | PG_KRE | PG_KWE;
 
 			ALLPROC_LOCK(AP_SHARED);
 			LIST_FOREACH(p, &allproc, p_list) {
 				if (p->p_vmspace) {
 					pmap = vmspace_pmap(p->p_vmspace);
 					*pmap_lev1pte(pmap, kernel_vm_end) = newlev1;
 				}
 			}
 			ALLPROC_LOCK(AP_RELEASE);
 			*pte = newlev1;
 			pmap_invalidate_all(kernel_pmap);
 		}
 
 		/*
 		 * If the level 2 pte is invalid, allocate a new level 3 page table
 		 */
 		pte = pmap_lev2pte(kernel_pmap, kernel_vm_end);
 		if (pmap_pte_v(pte)) {
 			kernel_vm_end = (kernel_vm_end + ALPHA_L2SIZE) & ~(ALPHA_L2SIZE - 1);
 			continue;
 		}
 
 		/*
 		 * This index is bogus, but out of the way
 		 */
 		nkpg = vm_page_alloc(kptobj, nklev3, VM_ALLOC_SYSTEM);
 		if (!nkpg)
 			panic("pmap_growkernel: no memory to grow kernel");
 
 		nklev3++;
 
 		vm_page_wire(nkpg);
 		pa = VM_PAGE_TO_PHYS(nkpg);
 		pmap_zero_page(pa);
 		newlev2 = pmap_phys_to_pte(pa) | PG_V | PG_ASM | PG_KRE | PG_KWE;
 		*pte = newlev2;
 
 		kernel_vm_end = (kernel_vm_end + ALPHA_L2SIZE) & ~(ALPHA_L2SIZE - 1);
 	}
 	splx(s);
 }
 
 /*
  *	Retire the given physical map from service.
  *	Should only be called if the map contains
  *	no valid mappings.
  */
 void
 pmap_destroy(pmap_t pmap)
 {
 	int count;
 
 	if (pmap == NULL)
 		return;
 
 	count = --pmap->pm_count;
 	if (count == 0) {
 		pmap_release(pmap);
 		panic("destroying a pmap is not yet implemented");
 	}
 }
 
 /*
  *	Add a reference to the specified pmap.
  */
 void
 pmap_reference(pmap_t pmap)
 {
 	if (pmap != NULL) {
 		pmap->pm_count++;
 	}
 }
 
 /***************************************************
 * page management routines.
  ***************************************************/
 
 /*
  * free the pv_entry back to the free list
  */
 static PMAP_INLINE void
 free_pv_entry(pv_entry_t pv)
 {
 	pv_entry_count--;
-	zfreei(pvzone, pv);
+	zfree(pvzone, pv);
 }
 
 /*
  * get a new pv_entry, allocating a block from the system
  * when needed.
  * the memory allocation is performed bypassing the malloc code
  * because of the possibility of allocations at interrupt time.
  */
 static pv_entry_t
 get_pv_entry(void)
 {
 	pv_entry_count++;
 	if (pv_entry_high_water &&
 		(pv_entry_count > pv_entry_high_water) &&
 		(pmap_pagedaemon_waken == 0)) {
 		pmap_pagedaemon_waken = 1;
 		wakeup (&vm_pages_needed);
 	}
-	return zalloci(pvzone);
+	return zalloc(pvzone);
 }
 
 /*
  * This routine is very drastic, but can save the system
  * in a pinch.
  */
 void
 pmap_collect()
 {
 	int i;
 	vm_page_t m;
 	static int warningdone=0;
 
 	if (pmap_pagedaemon_waken == 0)
 		return;
 
 	if (warningdone < 5) {
 		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
 		warningdone++;
 	}
 
 	for(i = 0; i < vm_page_array_size; i++) {
 		m = &vm_page_array[i];
 		if (m->wire_count || m->hold_count || m->busy ||
 		    (m->flags & PG_BUSY))
 			continue;
 		pmap_remove_all(m);
 	}
 	pmap_pagedaemon_waken = 0;
 }
 	
 
 /*
  * If it is the first entry on the list, it is actually
  * in the header and we must copy the following entry up
  * to the header.  Otherwise we must search the list for
  * the entry.  In either case we free the now unused entry.
  */
 
 static int
 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 {
 	pv_entry_t pv;
 	int rtval;
 	int s;
 
 	s = splvm();
 	if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
 		for (pv = TAILQ_FIRST(&m->md.pv_list);
 			pv;
 			pv = TAILQ_NEXT(pv, pv_list)) {
 			if (pmap == pv->pv_pmap && va == pv->pv_va) 
 				break;
 		}
 	} else {
 		for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
 			pv;
 			pv = TAILQ_NEXT(pv, pv_plist)) {
 			if (va == pv->pv_va) 
 				break;
 		}
 	}
 
 	rtval = 0;
 	if (pv) {
 		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count--;
 		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 
 		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 		free_pv_entry(pv);
 	}
 			
 	splx(s);
 	return rtval;
 }
 
 /*
  * Create a pv entry for page at pa for
  * (pmap, va).
  */
 static void
 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
 {
 
 	int s;
 	pv_entry_t pv;
 
 	s = splvm();
 	pv = get_pv_entry();
 	pv->pv_va = va;
 	pv->pv_pmap = pmap;
 	pv->pv_ptem = mpte;
 
 	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 	m->md.pv_list_count++;
 
 	splx(s);
 }
 
 /*
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
 pmap_remove_pte(pmap_t pmap, pt_entry_t* ptq, vm_offset_t va)
 {
 	pt_entry_t oldpte;
 	vm_page_t m;
 
 	oldpte = *ptq;
 	PMAP_DEBUG_VA(va);
 	*ptq = 0;
 	if (oldpte & PG_W)
 		pmap->pm_stats.wired_count -= 1;
 
 	pmap->pm_stats.resident_count -= 1;
 	if (oldpte & PG_MANAGED) {
 		m = PHYS_TO_VM_PAGE(pmap_pte_pa(&oldpte));
 		return pmap_remove_entry(pmap, m, va);
 	} else {
 		return pmap_unuse_pt(pmap, va, NULL);
 	}
 
 	return 0;
 }
 
 /*
  * Remove a single page from a process address space
  */
 static void
 pmap_remove_page(pmap_t pmap, vm_offset_t va)
 {
 	register pt_entry_t *ptq;
 
 	ptq = pmap_lev3pte(pmap, va);
 	
 	/*
 	 * if there is no pte for this address, just skip it!!!
 	 */
 	if (!ptq || !pmap_pte_v(ptq))
 		return;
 
 	/*
 	 * get a local va for mappings for this pmap.
 	 */
 	(void) pmap_remove_pte(pmap, ptq, va);
 	pmap_invalidate_page(pmap, va);
 
 	return;
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t va, nva;
 
 	if (pmap == NULL)
 		return;
 
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if (sva + PAGE_SIZE == eva) {
 		pmap_remove_page(pmap, sva);
 		return;
 	}
 
 	for (va = sva; va < eva; va = nva) {
 		if (!pmap_pte_v(pmap_lev1pte(pmap, va))) {
 			nva = alpha_l1trunc(va + ALPHA_L1SIZE);
 			continue;
 		}
 
 		if (!pmap_pte_v(pmap_lev2pte(pmap, va))) {
 			nva = alpha_l2trunc(va + ALPHA_L2SIZE);
 			continue;
 		}
 
 		pmap_remove_page(pmap, va);
 		nva = va + PAGE_SIZE;
 	}
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 static void
 pmap_remove_all(vm_page_t m)
 {
 	register pv_entry_t pv;
 	pt_entry_t *pte, tpte;
 	int nmodify;
 	int s;
 
 	nmodify = 0;
 #if defined(PMAP_DIAGNOSTIC)
 	/*
 	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
 	 * pages!
 	 */
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
 		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
 	}
 #endif
 
 	s = splvm();
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pte = pmap_lev3pte(pv->pv_pmap, pv->pv_va);
 
 		pv->pv_pmap->pm_stats.resident_count--;
 
 		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
 			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
 
 		tpte = *pte;
 
 		PMAP_DEBUG_VA(pv->pv_va);
 		*pte = 0;
 		if (tpte & PG_W)
 			pv->pv_pmap->pm_stats.wired_count--;
 
 		pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 
 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count--;
 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 		free_pv_entry(pv);
 	}
 
 	vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 
 	splx(s);
 	return;
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	pt_entry_t* pte;
 	int newprot;
 
 	if (pmap == NULL)
 		return;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 	if (prot & VM_PROT_WRITE)
 		return;
 
 	newprot = pte_prot(pmap, prot);
 
 	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
 		panic("pmap_protect: unaligned addresses");
 
 	while (sva < eva) {
 
 		/*
 		 * If level 1 pte is invalid, skip this segment
 		 */
 		pte = pmap_lev1pte(pmap, sva);
 		if (!pmap_pte_v(pte)) {
 			sva = alpha_l1trunc(sva) + ALPHA_L1SIZE;
 			continue;
 		}
 
 		/*
 		 * If level 2 pte is invalid, skip this segment
 		 */
 		pte = pmap_lev2pte(pmap, sva);
 		if (!pmap_pte_v(pte)) {
 			sva = alpha_l2trunc(sva) + ALPHA_L2SIZE;
 			continue;
 		}
 
 		/* 
 		 * If level 3 pte is invalid, skip this page
 		 */
 		pte = pmap_lev3pte(pmap, sva);
 		if (!pmap_pte_v(pte)) {
 			sva += PAGE_SIZE;
 			continue;
 		}
 
 		if (pmap_pte_prot(pte) != newprot) {
 			pmap_pte_set_prot(pte, newprot);
 			pmap_invalidate_page(pmap, sva);
 		}
 
 		sva += PAGE_SIZE;
 	}
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 void
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 	   boolean_t wired)
 {
 	vm_offset_t pa;
 	pt_entry_t *pte;
 	vm_offset_t opa;
 	pt_entry_t origpte, newpte;
 	vm_page_t mpte;
 	int managed;
 
 	if (pmap == NULL)
 		return;
 
 	va &= ~PAGE_MASK;
 #ifdef PMAP_DIAGNOSTIC
 	if (va > VM_MAX_KERNEL_ADDRESS)
 		panic("pmap_enter: toobig");
 #endif
 
 	mpte = NULL;
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		mpte = pmap_allocpte(pmap, va);
 	}
 
 	pte = pmap_lev3pte(pmap, va);
 
 	/*
 	 * Page Directory table entry not valid, we need a new PT page
 	 */
 	if (pte == NULL) {
 		panic("pmap_enter: invalid kernel page tables pmap=%p, va=0x%lx\n", pmap, va);
 	}
 
 	origpte = *pte;
 	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
 	managed = 0;
 	opa = pmap_pte_pa(pte);
 
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
 	if (origpte && (opa == pa)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if (wired && ((origpte & PG_W) == 0))
 			pmap->pm_stats.wired_count++;
 		else if (!wired && (origpte & PG_W))
 			pmap->pm_stats.wired_count--;
 
 		/*
 		 * Remove extra pte reference
 		 */
 		if (mpte)
 			mpte->hold_count--;
 
 		managed = origpte & PG_MANAGED;
 		goto validate;
 	} 
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
 	if (opa) {
 		int err;
 		err = pmap_remove_pte(pmap, pte, va);
 		if (err)
 			panic("pmap_enter: pte vanished, va: 0x%lx", va);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory. Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
 		pmap_insert_entry(pmap, va, mpte, m);
 		managed |= PG_MANAGED;
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 	if (wired)
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	newpte = pmap_phys_to_pte(pa) | pte_prot(pmap, prot) | PG_V | managed;
 
 	if (managed) {
 		vm_page_t om;
 
 		/*
 		 * Set up referenced/modified emulation for the new mapping
 		 */
 		om = PHYS_TO_VM_PAGE(pa);
 		if ((om->md.pv_flags & PV_TABLE_REF) == 0)
 			newpte |= PG_FOR | PG_FOW | PG_FOE;
 		else if ((om->md.pv_flags & PV_TABLE_MOD) == 0)
 			newpte |= PG_FOW;
 	}
 
 	if (wired)
 		newpte |= PG_W;
 
 	/*
 	 * if the mapping or permission bits are different, we need
 	 * to update the pte.
 	 */
 	if (origpte != newpte) {
 		PMAP_DEBUG_VA(va);
 		*pte = newpte;
 		if (origpte)
 			pmap_invalidate_page(pmap, va);
 		if (prot & VM_PROT_EXECUTE)
 			alpha_pal_imb();
 	}
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * 5. Tlbflush is deferred to calling procedure.
  * 6. Page IS managed.
  * but is *MUCH* faster than pmap_enter...
  */
 
 static vm_page_t
 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
 {
 	register pt_entry_t *pte;
 
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		unsigned ptepindex;
 		pt_entry_t* l2pte;
 
 		/*
 		 * Calculate lev2 page index
 		 */
 		ptepindex = va >> ALPHA_L2SHIFT;
 		if (mpte && (mpte->pindex == ptepindex)) {
 			mpte->hold_count++;
 		} else {
 retry:
 			/*
 			 * Get the level 2 entry
 			 */
 			l2pte = pmap_lev2pte(pmap, va);
 
 			/*
 			 * If the level 2 page table is mapped, we just increment
 			 * the hold count, and activate it.
 			 */
 			if (l2pte && pmap_pte_v(l2pte)) {
 				if (pmap->pm_ptphint &&
 					(pmap->pm_ptphint->pindex == ptepindex)) {
 					mpte = pmap->pm_ptphint;
 				} else {
 					mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 					pmap->pm_ptphint = mpte;
 				}
 				if (mpte == NULL)
 					goto retry;
 				mpte->hold_count++;
 			} else {
 				mpte = _pmap_allocpte(pmap, ptepindex);
 			}
 		}
 	} else {
 		mpte = NULL;
 	}
 
 	/*
 	 * This call to vtopte makes the assumption that we are
 	 * entering the page into the current pmap.  In order to support
 	 * quick entry into any pmap, one would likely use pmap_pte_quick.
 	 * But that isn't as quick as vtopte.
 	 */
 	pte = vtopte(va);
 	if (*pte) {
 		if (mpte)
 			pmap_unwire_pte_hold(pmap, va, mpte);
 		alpha_pal_imb();		/* XXX overkill? */
 		return 0;
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory. Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	PMAP_DEBUG_VA(va);
 	pmap_insert_entry(pmap, va, mpte, m);
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	*pte = pmap_phys_to_pte(VM_PAGE_TO_PHYS(m)) | PG_V | PG_KRE | PG_URE | PG_MANAGED;
 
 	alpha_pal_imb();			/* XXX overkill? */
 	return mpte;
 }
 
 /*
  * Make temporary mapping for a physical address. This is called
  * during dump.
  */
 void *
 pmap_kenter_temporary(vm_offset_t pa, int i)
 {
 	return (void *) ALPHA_PHYS_TO_K0SEG(pa - (i * PAGE_SIZE));
 }
 
 #define MAX_INIT_PT (96)
 /*
  * pmap_object_init_pt preloads the ptes for a given object
  * into the specified pmap.  This eliminates the blast of soft
  * faults on process startup and immediately after an mmap.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
 		    vm_object_t object, vm_pindex_t pindex,
 		    vm_size_t size, int limit)
 {
 	vm_offset_t tmpidx;
 	int psize;
 	vm_page_t p, mpte;
 	int objpgs;
 
 	if (pmap == NULL || object == NULL)
 		return;
 
 	psize = alpha_btop(size);
 
 	if ((object->type != OBJT_VNODE) ||
 		(limit && (psize > MAX_INIT_PT) &&
 			(object->resident_page_count > MAX_INIT_PT))) {
 		return;
 	}
 
 	if (psize + pindex > object->size)
 		psize = object->size - pindex;
 
 	mpte = NULL;
 	/*
 	 * if we are processing a major portion of the object, then scan the
 	 * entire thing.
 	 */
 	if (psize > (object->resident_page_count >> 2)) {
 		objpgs = psize;
 
 		for (p = TAILQ_FIRST(&object->memq);
 		    ((objpgs > 0) && (p != NULL));
 		    p = TAILQ_NEXT(p, listq)) {
 
 			tmpidx = p->pindex;
 			if (tmpidx < pindex) {
 				continue;
 			}
 			tmpidx -= pindex;
 			if (tmpidx >= psize) {
 				continue;
 			}
 			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 				if ((p->queue - p->pc) == PQ_CACHE)
 					vm_page_deactivate(p);
 				vm_page_busy(p);
 				mpte = pmap_enter_quick(pmap, 
 					addr + alpha_ptob(tmpidx), p, mpte);
 				vm_page_flag_set(p, PG_MAPPED);
 				vm_page_wakeup(p);
 			}
 			objpgs -= 1;
 		}
 	} else {
 		/*
 		 * else lookup the pages one-by-one.
 		 */
 		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
 			p = vm_page_lookup(object, tmpidx + pindex);
 			if (p &&
 			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 				if ((p->queue - p->pc) == PQ_CACHE)
 					vm_page_deactivate(p);
 				vm_page_busy(p);
 				mpte = pmap_enter_quick(pmap, 
 					addr + alpha_ptob(tmpidx), p, mpte);
 				vm_page_flag_set(p, PG_MAPPED);
 				vm_page_wakeup(p);
 			}
 		}
 	}
 	return;
 }
 
 /*
  * pmap_prefault provides a quick way of clustering
  * pagefaults into a processes address space.  It is a "cousin"
  * of pmap_object_init_pt, except it runs at page fault time instead
  * of mmap time.
  */
 #define PFBAK 4
 #define PFFOR 4
 #define PAGEORDER_SIZE (PFBAK+PFFOR)
 
 static int pmap_prefault_pageorder[] = {
 	-PAGE_SIZE, PAGE_SIZE,
 	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
 	-3 * PAGE_SIZE, 3 * PAGE_SIZE
 	-4 * PAGE_SIZE, 4 * PAGE_SIZE
 };
 
 void
 pmap_prefault(pmap, addra, entry)
 	pmap_t pmap;
 	vm_offset_t addra;
 	vm_map_entry_t entry;
 {
 	int i;
 	vm_offset_t starta;
 	vm_offset_t addr;
 	vm_pindex_t pindex;
 	vm_page_t m, mpte;
 	vm_object_t object;
 
 	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace)))
 		return;
 
 	object = entry->object.vm_object;
 
 	starta = addra - PFBAK * PAGE_SIZE;
 	if (starta < entry->start) {
 		starta = entry->start;
 	} else if (starta > addra) {
 		starta = 0;
 	}
 
 	mpte = NULL;
 	for (i = 0; i < PAGEORDER_SIZE; i++) {
 		vm_object_t lobject;
 		pt_entry_t *pte;
 
 		addr = addra + pmap_prefault_pageorder[i];
 		if (addr > addra + (PFFOR * PAGE_SIZE))
 			addr = 0;
 
 		if (addr < starta || addr >= entry->end)
 			continue;
 
 		if (!pmap_pte_v(pmap_lev1pte(pmap, addr))
 		    || !pmap_pte_v(pmap_lev2pte(pmap, addr)))
 			continue;
 
 		pte = vtopte(addr);
 		if (*pte)
 			continue;
 
 		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
 		lobject = object;
 		for (m = vm_page_lookup(lobject, pindex);
 		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
 		    lobject = lobject->backing_object) {
 			if (lobject->backing_object_offset & PAGE_MASK)
 				break;
 			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
 			m = vm_page_lookup(lobject->backing_object, pindex);
 		}
 
 		/*
 		 * give-up when a page is not in memory
 		 */
 		if (m == NULL)
 			break;
 
 		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 
 			if ((m->queue - m->pc) == PQ_CACHE) {
 				vm_page_deactivate(m);
 			}
 			vm_page_busy(m);
 			mpte = pmap_enter_quick(pmap, addr, m, mpte);
 			vm_page_flag_set(m, PG_MAPPED);
 			vm_page_wakeup(m);
 		}
 	}
 }
 
 /*
  *	Routine:	pmap_change_wiring
  *	Function:	Change the wiring attribute for a map/virtual-address
  *			pair.
  *	In/out conditions:
  *			The mapping must already exist in the pmap.
  */
 void
 pmap_change_wiring(pmap, va, wired)
 	register pmap_t pmap;
 	vm_offset_t va;
 	boolean_t wired;
 {
 	pt_entry_t *pte;
 
 	if (pmap == NULL)
 		return;
 
 	pte = pmap_lev3pte(pmap, va);
 
 	if (wired && !pmap_pte_w(pte))
 		pmap->pm_stats.wired_count++;
 	else if (!wired && pmap_pte_w(pte))
 		pmap->pm_stats.wired_count--;
 
 	/*
 	 * Wiring is not a hardware characteristic so there is no need to
 	 * invalidate TLB.
 	 */
 	pmap_pte_set_w(pte, wired);
 }
 
 
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
 	  vm_offset_t src_addr)
 {
 }	
 
 /*
  *	Routine:	pmap_kernel
  *	Function:
  *		Returns the physical map handle for the kernel.
  */
 pmap_t
 pmap_kernel()
 {
 	return (kernel_pmap);
 }
 
 /*
  *	pmap_zero_page zeros the specified hardware page by
  *	mapping it into virtual memory and using bzero to clear
  *	its contents.
  */
 
 void
 pmap_zero_page(vm_offset_t pa)
 {
 	vm_offset_t va = ALPHA_PHYS_TO_K0SEG(pa);
 	bzero((caddr_t) va, PAGE_SIZE);
 }
 
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by
  *	mapping it into virtual memory and using bzero to clear
  *	its contents.
  *
  *	off and size must reside within a single page.
  */
 
 void
 pmap_zero_page_area(vm_offset_t pa, int off, int size)
 {
 	vm_offset_t va = ALPHA_PHYS_TO_K0SEG(pa);
 	bzero((char *)(caddr_t)va + off, size);
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(vm_offset_t src, vm_offset_t dst)
 {
 	src = ALPHA_PHYS_TO_K0SEG(src);
 	dst = ALPHA_PHYS_TO_K0SEG(dst);
 	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
 }
 
 
 /*
  *	Routine:	pmap_pageable
  *	Function:
  *		Make the specified pages (by pmap, offset)
  *		pageable (or not) as requested.
  *
  *		A page which is not pageable may not take
  *		a fault; therefore, its page table entry
  *		must remain valid for the duration.
  *
  *		This routine is merely advisory; pmap_enter
  *		will specify that these pages are to be wired
  *		down (or not) as appropriate.
  */
 void
 pmap_pageable(pmap, sva, eva, pageable)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 	boolean_t pageable;
 {
 }
 
 /*
  * this routine returns true if a physical page resides
  * in the given pmap.
  */
 boolean_t
 pmap_page_exists(pmap, m)
 	pmap_t pmap;
 	vm_page_t m;
 {
 	register pv_entry_t pv;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	s = splvm();
 
 	/*
 	 * Not found, check current mappings returning immediately if found.
 	 */
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 		if (pv->pv_pmap == pmap) {
 			splx(s);
 			return TRUE;
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
 #define PMAP_REMOVE_PAGES_CURPROC_ONLY
 /*
  * Remove all pages from specified address space
  * this aids process exit speeds.  Also, this code
  * is special cased for current process only, but
  * can have the more generic (and slightly slower)
  * mode enabled.  This is much faster than pmap_remove
  * in the case of running down an entire address space.
  */
 void
 pmap_remove_pages(pmap, sva, eva)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 {
 	pt_entry_t *pte, tpte;
 	vm_page_t m;
 	pv_entry_t pv, npv;
 	int s;
 
 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) {
 		printf("warning: pmap_remove_pages called with non-current pmap\n");
 		return;
 	}
 #endif
 
 	s = splvm();
 	for(pv = TAILQ_FIRST(&pmap->pm_pvlist);
 		pv;
 		pv = npv) {
 
 		if (pv->pv_va >= eva || pv->pv_va < sva) {
 			npv = TAILQ_NEXT(pv, pv_plist);
 			continue;
 		}
 
 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 		pte = vtopte(pv->pv_va);
 #else
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 #endif
 		if (!pmap_pte_v(pte))
 			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
 		tpte = *pte;
 
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 		if (tpte & PG_W) {
 			npv = TAILQ_NEXT(pv, pv_plist);
 			continue;
 		}
 		PMAP_DEBUG_VA(pv->pv_va);
 		*pte = 0;
 
 		m = PHYS_TO_VM_PAGE(pmap_pte_pa(&tpte));
 
 		pv->pv_pmap->pm_stats.resident_count--;
 
 		npv = TAILQ_NEXT(pv, pv_plist);
 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 
 		m->md.pv_list_count--;
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
 			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 		}
 
 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 		free_pv_entry(pv);
 	}
 	splx(s);
 	pmap_invalidate_all(pmap);
 }
 
 /*
  * this routine is used to modify bits in ptes
  */
 static void
 pmap_changebit(vm_page_t m, int bit, boolean_t setem)
 {
 	pv_entry_t pv;
 	pt_entry_t *pte;
 	int changed;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return;
 
 	s = splvm();
 	changed = 0;
 
 	/*
 	 * Loop over all current mappings setting/clearing as appropos If
 	 * setting RO do we need to clear the VAC?
 	 */
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 
 		/*
 		 * don't write protect pager mappings
 		 */
 		if (!setem && bit == (PG_UWE|PG_KWE)) {
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 		}
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (!pv->pv_pmap) {
 			printf("Null pmap (cb) at va: 0x%lx\n", pv->pv_va);
 			continue;
 		}
 #endif
 
 		pte = pmap_lev3pte(pv->pv_pmap, pv->pv_va);
 
 		changed = 0;
 		if (setem) {
 			*pte |= bit;
 			changed = 1;
 		} else {
 			pt_entry_t pbits = *pte;
 			if (pbits & bit) {
 				changed = 1;
 				*pte = pbits & ~bit;
 			}
 		}
 		if (changed)
 			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 	}
 	splx(s);
 }
 
 /*
  *      pmap_page_protect:
  *
  *      Lower the permission for all mappings to a given page.
  */
 void
 pmap_page_protect(vm_page_t m, vm_prot_t prot)
 {
 	if ((prot & VM_PROT_WRITE) == 0) {
 		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
 			pmap_changebit(m, PG_KWE|PG_UWE, FALSE);
 		} else {
 			pmap_remove_all(m);
 		}
 	}
 }
 
 vm_offset_t
 pmap_phys_address(ppn)
 	int ppn;
 {
 	return (alpha_ptob(ppn));
 }
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return the count of reference bits for a page, clearing all of them.
  *	
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return 0;
 
 	if (m->md.pv_flags & PV_TABLE_REF) {
 		pmap_changebit(m, PG_FOR|PG_FOE|PG_FOW, TRUE);
 		m->md.pv_flags &= ~PV_TABLE_REF;
 		return 1;
 	}
 
 	return 0;
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	return (m->md.pv_flags & PV_TABLE_MOD) != 0;
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return;
 
 	if (m->md.pv_flags & PV_TABLE_MOD) {
 		pmap_changebit(m, PG_FOW, TRUE);
 		m->md.pv_flags &= ~PV_TABLE_MOD;
 	}
 }
 
 /*
  *	pmap_page_is_free:
  *
  *	Called when a page is freed to allow pmap to clean up
  *	any extra state associated with the page.  In this case
  *	clear modified/referenced bits.
  */
 void
 pmap_page_is_free(vm_page_t m)
 {
 	m->md.pv_flags = 0;
 }
 
 /*
  *	pmap_clear_reference:
  *
  *	Clear the reference bit on the specified physical page.
  */
 void
 pmap_clear_reference(vm_page_t m)
 {
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return;
 
 	if (m->md.pv_flags & PV_TABLE_REF) {
 		pmap_changebit(m, PG_FOR|PG_FOE|PG_FOW, TRUE);
 		m->md.pv_flags &= ~PV_TABLE_REF;
 	}
 }
 
 /*
  * pmap_emulate_reference:
  *
  *	Emulate reference and/or modified bit hits.
  *	From NetBSD
  */
 void
 pmap_emulate_reference(struct proc *p, vm_offset_t v, int user, int write)
 {
 	pt_entry_t faultoff, *pte;
 	vm_offset_t pa;
 	vm_page_t m;
 
 	/*
 	 * Convert process and virtual address to physical address.
 	 */
 	if (v >= VM_MIN_KERNEL_ADDRESS) {
 		if (user)
 			panic("pmap_emulate_reference: user ref to kernel");
 		pte = vtopte(v);
 	} else {
 #ifdef DIAGNOSTIC
 		if (p == NULL)
 			panic("pmap_emulate_reference: bad proc");
 		if (p->p_vmspace == NULL)
 			panic("pmap_emulate_reference: bad p_vmspace");
 #endif
 		pte = pmap_lev3pte(p->p_vmspace->vm_map.pmap, v);
 	}
 #ifdef DEBUG				/* These checks are more expensive */
 	if (!pmap_pte_v(pte))
 		panic("pmap_emulate_reference: invalid pte");
 #if 0
 	/*
 	 * Can't do these, because cpu_fork and cpu_swapin call
 	 * pmap_emulate_reference(), and the bits aren't guaranteed,
 	 * for them...
 	 */
 	if (write) {
 		if (!(*pte & (user ? PG_UWE : PG_UWE | PG_KWE)))
 			panic("pmap_emulate_reference: write but unwritable");
 		if (!(*pte & PG_FOW))
 			panic("pmap_emulate_reference: write but not FOW");
 	} else {
 		if (!(*pte & (user ? PG_URE : PG_URE | PG_KRE)))
 			panic("pmap_emulate_reference: !write but unreadable");
 		if (!(*pte & (PG_FOR | PG_FOE)))
 			panic("pmap_emulate_reference: !write but not FOR|FOE");
 	}
 #endif
 	/* Other diagnostics? */
 #endif
 	pa = pmap_pte_pa(pte);
 
 #ifdef DIAGNOSTIC
 	if ((*pte & PG_MANAGED) == 0)
 		panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): pa 0x%lx not managed", p, v, user, write, pa);
 #endif
 
 	/*
 	 * Twiddle the appropriate bits to reflect the reference
 	 * and/or modification..
 	 *
 	 * The rules:
 	 * 	(1) always mark page as used, and
 	 *	(2) if it was a write fault, mark page as modified.
 	 */
 	m = PHYS_TO_VM_PAGE(pa);
 	m->md.pv_flags |= PV_TABLE_REF;
 	faultoff = PG_FOR | PG_FOE;
 	vm_page_flag_set(m, PG_REFERENCED);
 	if (write) {
 		m->md.pv_flags |= PV_TABLE_MOD;
 		vm_page_dirty(m);
 		faultoff |= PG_FOW;
 	}
 	pmap_changebit(m, faultoff, FALSE);
 	if ((*pte & faultoff) != 0) {
 #if 1
 		/*
 		 * XXX dfr - don't think its possible in our pmap
 		 */
 		/*
 		 * This is apparently normal.  Why? -- cgd
 		 * XXX because was being called on unmanaged pages?
 		 */
 		panic("warning: pmap_changebit didn't.");
 #endif
 		*pte &= ~faultoff;
 		ALPHA_TBIS(v);
 	}
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 static void
 alpha_protection_init()
 {
 	int prot, *kp, *up;
 
 	kp = protection_codes[0];
 	up = protection_codes[1];
 
 	for (prot = 0; prot < 8; prot++) {
 		switch (prot) {
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 			*kp++ = PG_ASM;
 			*up++ = 0;
 			break;
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 			*kp++ = PG_ASM | PG_KRE;
 			*up++ = PG_URE | PG_KRE;
 			break;
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 			*kp++ = PG_ASM | PG_KWE;
 			*up++ = PG_UWE | PG_KWE;
 			break;
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 			*kp++ = PG_ASM | PG_KWE | PG_KRE;
 			*up++ = PG_UWE | PG_URE | PG_KWE | PG_KRE;
 			break;
 		}
 	}
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory.
  */
 void *
 pmap_mapdev(pa, size)
 	vm_offset_t pa;
 	vm_size_t size;
 {
 	return (void*) ALPHA_PHYS_TO_K0SEG(pa);
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap, addr)
 	pmap_t pmap;
 	vm_offset_t addr;
 {
 	
 	pt_entry_t *pte;
 	int val = 0;
 	
 	pte = pmap_lev3pte(pmap, addr);
 	if (pte == 0) {
 		return 0;
 	}
 
 	if (pmap_pte_v(pte)) {
 		vm_page_t m;
 		vm_offset_t pa;
 
 		val = MINCORE_INCORE;
 		if ((*pte & PG_MANAGED) == 0)
 			return val;
 
 		pa = pmap_pte_pa(pte);
 
 		m = PHYS_TO_VM_PAGE(pa);
 
 		/*
 		 * Modified by us
 		 */
 		if (m->md.pv_flags & PV_TABLE_MOD)
 			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 		/*
 		 * Modified by someone
 		 */
 		else if (m->dirty || pmap_is_modified(m))
 			val |= MINCORE_MODIFIED_OTHER;
 		/*
 		 * Referenced by us
 		 */
 		if (m->md.pv_flags & PV_TABLE_REF)
 			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 
 		/*
 		 * Referenced by someone
 		 */
 		else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
 			val |= MINCORE_REFERENCED_OTHER;
 			vm_page_flag_set(m, PG_REFERENCED);
 		}
 	} 
 	return val;
 }
 
 void
 pmap_activate(struct proc *p)
 {
 	pmap_t pmap;
 
 	pmap = vmspace_pmap(p->p_vmspace);
 
 	if (pmap_active[PCPU_GET(cpuno)] && pmap != pmap_active[PCPU_GET(cpuno)]) {
 		atomic_clear_32(&pmap_active[PCPU_GET(cpuno)]->pm_active,
 				1 << PCPU_GET(cpuno));
 		pmap_active[PCPU_GET(cpuno)] = 0;
 	}
 
 	p->p_addr->u_pcb.pcb_hw.apcb_ptbr =
 		ALPHA_K0SEG_TO_PHYS((vm_offset_t) pmap->pm_lev1) >> PAGE_SHIFT;
 
 	if (pmap->pm_asn[PCPU_GET(cpuno)].gen != PCPU_GET(current_asngen))
 		pmap_get_asn(pmap);
 
 	pmap_active[PCPU_GET(cpuno)] = pmap;
 	atomic_set_32(&pmap->pm_active, 1 << PCPU_GET(cpuno));
 
 	p->p_addr->u_pcb.pcb_hw.apcb_asn = pmap->pm_asn[PCPU_GET(cpuno)].asn;
 
 	if (p == curproc) {
 		alpha_pal_swpctx((u_long)p->p_md.md_pcbpaddr);
 	}
 }
 
 void
 pmap_deactivate(struct proc *p)
 {
 	pmap_t pmap;
 	pmap = vmspace_pmap(p->p_vmspace);
 	atomic_clear_32(&pmap->pm_active, 1 << PCPU_GET(cpuno));
 	pmap_active[PCPU_GET(cpuno)] = 0;
 }
 
 vm_offset_t
 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
 {
 
 	return addr;
 }
 
 #if 0
 #if defined(PMAP_DEBUG)
 pmap_pid_dump(int pid)
 {
 	pmap_t pmap;
 	struct proc *p;
 	int npte = 0;
 	int index;
 	ALLPROC_LOCK(AP_SHARED);
 	LIST_FOREACH(p, &allproc, p_list) {
 		if (p->p_pid != pid)
 			continue;
 
 		if (p->p_vmspace) {
 			int i,j;
 			index = 0;
 			pmap = vmspace_pmap(p->p_vmspace);
 			for(i=0;i<1024;i++) {
 				pd_entry_t *pde;
 				pt_entry_t *pte;
 				unsigned base = i << PDRSHIFT;
 				
 				pde = &pmap->pm_pdir[i];
 				if (pde && pmap_pde_v(pde)) {
 					for(j=0;j<1024;j++) {
 						unsigned va = base + (j << PAGE_SHIFT);
 						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 							if (index) {
 								index = 0;
 								printf("\n");
 							}
 							ALLPROC_LOCK(AP_RELEASE);
 							return npte;
 						}
 						pte = pmap_pte_quick( pmap, va);
 						if (pte && pmap_pte_v(pte)) {
 							vm_offset_t pa;
 							vm_page_t m;
 							pa = *(int *)pte;
 							m = PHYS_TO_VM_PAGE(pa);
 							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 								va, pa, m->hold_count, m->wire_count, m->flags);
 							npte++;
 							index++;
 							if (index >= 2) {
 								index = 0;
 								printf("\n");
 							} else {
 								printf(" ");
 							}
 						}
 					}
 				}
 			}
 		}
 	}
 	ALLPROC_LOCK(AP_RELEASE);
 	return npte;
 }
 #endif
 
 #if defined(DEBUG)
 
 static void	pads __P((pmap_t pm));
 static void	pmap_pvdump __P((vm_page_t m));
 
 /* print address space of pmap*/
 static void
 pads(pm)
 	pmap_t pm;
 {
         int i, j;
 	vm_offset_t va;
 	pt_entry_t *ptep;
 
 	if (pm == kernel_pmap)
 		return;
 	for (i = 0; i < 1024; i++)
 		if (pm->pm_pdir[i])
 			for (j = 0; j < 1024; j++) {
 				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 				if (pm == kernel_pmap && va < KERNBASE)
 					continue;
 				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 					continue;
 				ptep = pmap_pte_quick(pm, va);
 				if (pmap_pte_v(ptep))
 					printf("%x:%x ", va, *(int *) ptep);
 			};
 
 }
 
 static void
 pmap_pvdump(pa)
 	vm_offset_t pa;
 {
 	pv_entry_t pv;
 
 	printf("pa %x", pa);
 	m = PHYS_TO_VM_PAGE(pa);
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 #ifdef used_to_be
 		printf(" -> pmap %x, va %x, flags %x",
 		    pv->pv_pmap, pv->pv_va, pv->pv_flags);
 #endif
 		printf(" -> pmap %x, va %x",
 		    pv->pv_pmap, pv->pv_va);
 		pads(pv->pv_pmap);
 	}
 	printf(" ");
 }
 #endif
 #endif
Index: head/sys/amd64/amd64/pmap.c
===================================================================
--- head/sys/amd64/amd64/pmap.c	(revision 71349)
+++ head/sys/amd64/amd64/pmap.c	(revision 71350)
@@ -1,3360 +1,3360 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  * $FreeBSD$
  */
 
 /*
  *	Manages physical address maps.
  *
  *	In addition to hardware address maps, this
  *	module is called upon to provide software-use-only
  *	maps which may or may not be stored in the same
  *	form as hardware maps.  These pseudo-maps are
  *	used to store intermediate results from copy
  *	operations to and from address spaces.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include "opt_disable_pse.h"
 #include "opt_pmap.h"
 #include "opt_msgbuf.h"
 #include "opt_user_ldt.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/msgbuf.h>
 #include <sys/vmmeter.h>
 #include <sys/mman.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_zone.h>
 
 #include <sys/user.h>
 
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #if defined(SMP) || defined(APIC_IO)
 #include <machine/smp.h>
 #include <machine/apic.h>
 #include <machine/segments.h>
 #include <machine/tss.h>
 #include <machine/globaldata.h>
 #endif /* SMP || APIC_IO */
 
 #define PMAP_KEEP_PDIRS
 #ifndef PMAP_SHPGPERPROC
 #define PMAP_SHPGPERPROC 200
 #endif
 
 #if defined(DIAGNOSTIC)
 #define PMAP_DIAGNOSTIC
 #endif
 
 #define MINPV 2048
 
 #if !defined(PMAP_DIAGNOSTIC)
 #define PMAP_INLINE __inline
 #else
 #define PMAP_INLINE
 #endif
 
 /*
  * Get PDEs and PTEs for user/kernel address space
  */
 #define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
 
 #define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
 #define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
 #define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
 #define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
 #define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
 
 #define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
 
 /*
  * Given a map and a machine independent protection code,
  * convert to a vax protection code.
  */
 #define pte_prot(m, p)	(protection_codes[p])
 static int protection_codes[8];
 
 static struct pmap kernel_pmap_store;
 pmap_t kernel_pmap;
 LIST_HEAD(pmaplist, pmap);
 struct pmaplist allpmaps;
 
 vm_offset_t avail_start;	/* PA of first available physical page */
 vm_offset_t avail_end;		/* PA of last available physical page */
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
 static int pgeflag;		/* PG_G or-in */
 static int pseflag;		/* PG_PS or-in */
 
 static vm_object_t kptobj;
 
 static int nkpt;
 vm_offset_t kernel_vm_end;
 
 /*
  * Data for the pv entry allocation mechanism
  */
 static vm_zone_t pvzone;
 static struct vm_zone pvzone_store;
 static struct vm_object pvzone_obj;
 static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
 static int pmap_pagedaemon_waken = 0;
 static struct pv_entry *pvinit;
 
 /*
  * All those kernel PT submaps that BSD is so fond of
  */
 pt_entry_t *CMAP1 = 0;
 static pt_entry_t *CMAP2, *ptmmap;
 caddr_t CADDR1 = 0, ptvmmap = 0;
 static caddr_t CADDR2;
 static pt_entry_t *msgbufmap;
 struct msgbuf *msgbufp=0;
 
 /*
  * Crashdump maps.
  */
 static pt_entry_t *pt_crashdumpmap;
 static caddr_t crashdumpmap;
 
 #ifdef SMP
 extern pt_entry_t *SMPpt;
 #endif
 static pt_entry_t *PMAP1 = 0;
 static unsigned *PADDR1 = 0;
 
 static PMAP_INLINE void	free_pv_entry __P((pv_entry_t pv));
 static unsigned * get_ptbase __P((pmap_t pmap));
 static pv_entry_t get_pv_entry __P((void));
 static void	i386_protection_init __P((void));
 static __inline void	pmap_changebit __P((vm_page_t m, int bit, boolean_t setem));
 
 static void	pmap_remove_all __P((vm_page_t m));
 static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
 				      vm_page_t m, vm_page_t mpte));
 static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq,
 					vm_offset_t sva));
 static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
 static int pmap_remove_entry __P((struct pmap *pmap, vm_page_t m,
 					vm_offset_t va));
 static boolean_t pmap_testbit __P((vm_page_t m, int bit));
 static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
 		vm_page_t mpte, vm_page_t m));
 
 static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
 
 static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
 static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex));
 static unsigned * pmap_pte_quick __P((pmap_t pmap, vm_offset_t va));
 static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex));
 static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
 
 static unsigned pdir4mb;
 
 /*
  *	Routine:	pmap_pte
  *	Function:
  *		Extract the page table entry associated
  *		with the given map/virtual_address pair.
  */
 
 PMAP_INLINE unsigned *
 pmap_pte(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	unsigned *pdeaddr;
 
 	if (pmap) {
 		pdeaddr = (unsigned *) pmap_pde(pmap, va);
 		if (*pdeaddr & PG_PS)
 			return pdeaddr;
 		if (*pdeaddr) {
 			return get_ptbase(pmap) + i386_btop(va);
 		}
 	}
 	return (0);
 }
 
 /*
  * Move the kernel virtual free pointer to the next
  * 4MB.  This is used to help improve performance
  * by using a large (4MB) page for much of the kernel
  * (.text, .data, .bss)
  */
 static vm_offset_t
 pmap_kmem_choose(vm_offset_t addr)
 {
 	vm_offset_t newaddr = addr;
 #ifndef DISABLE_PSE
 	if (cpu_feature & CPUID_PSE) {
 		newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 	}
 #endif
 	return newaddr;
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  *
  *	On the i386 this is called after mapping has already been enabled
  *	and just syncs the pmap module with what has already been done.
  *	[We can't call it easily with mapping off since the kernel is not
  *	mapped with PA == VA, hence we would have to relocate every address
  *	from the linked base (virtual) address "KERNBASE" to the actual
  *	(physical) address starting relative to 0]
  */
 void
 pmap_bootstrap(firstaddr, loadaddr)
 	vm_offset_t firstaddr;
 	vm_offset_t loadaddr;
 {
 	vm_offset_t va;
 	pt_entry_t *pte;
 
 	avail_start = firstaddr;
 
 	/*
 	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
 	 * large. It should instead be correctly calculated in locore.s and
 	 * not based on 'first' (which is a physical address, not a virtual
 	 * address, for the start of unused physical memory). The kernel
 	 * page tables are NOT double mapped and thus should not be included
 	 * in this calculation.
 	 */
 	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
 	virtual_avail = pmap_kmem_choose(virtual_avail);
 
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
 	/*
 	 * Initialize protection array.
 	 */
 	i386_protection_init();
 
 	/*
 	 * The kernel's pmap is statically allocated so we don't have to use
 	 * pmap_create, which is unlikely to work correctly at this part of
 	 * the boot sequence (XXX and which no longer exists).
 	 */
 	kernel_pmap = &kernel_pmap_store;
 
 	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
 	kernel_pmap->pm_count = 1;
 	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvlist);
 	LIST_INIT(&allpmaps);
 	LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
 	nkpt = NKPT;
 
 	/*
 	 * Reserve some special page table entries/VA space for temporary
 	 * mapping of pages.
 	 */
 #define	SYSMAP(c, p, v, n)	\
 	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
 
 	va = virtual_avail;
 	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
 
 	/*
 	 * CMAP1/CMAP2 are used for zeroing and copying pages.
 	 */
 	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
 	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
 
 	/*
 	 * Crashdump maps.
 	 */
 	SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS);
 
 	/*
 	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
 	 * XXX ptmmap is not used.
 	 */
 	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
 
 	/*
 	 * msgbufp is used to map the system message buffer.
 	 * XXX msgbufmap is not used.
 	 */
 	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
 	       atop(round_page(MSGBUF_SIZE)))
 
 	/*
 	 * ptemap is used for pmap_pte_quick
 	 */
 	SYSMAP(unsigned *, PMAP1, PADDR1, 1);
 
 	virtual_avail = va;
 
 	*(int *) CMAP1 = *(int *) CMAP2 = 0;
 	*(int *) PTD = 0;
 
 
 	pgeflag = 0;
 #if !defined(SMP)			/* XXX - see also mp_machdep.c */
 	if (cpu_feature & CPUID_PGE) {
 		pgeflag = PG_G;
 	}
 #endif
 	
 /*
  * Initialize the 4MB page size flag
  */
 	pseflag = 0;
 /*
  * The 4MB page version of the initial
  * kernel page mapping.
  */
 	pdir4mb = 0;
 
 #if !defined(DISABLE_PSE)
 	if (cpu_feature & CPUID_PSE) {
 		unsigned ptditmp;
 		/*
 		 * Note that we have enabled PSE mode
 		 */
 		pseflag = PG_PS;
 		ptditmp = *((unsigned *)PTmap + i386_btop(KERNBASE));
 		ptditmp &= ~(NBPDR - 1);
 		ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
 		pdir4mb = ptditmp;
 
 #if !defined(SMP)
 		/*
 		 * Enable the PSE mode.
 		 */
 		load_cr4(rcr4() | CR4_PSE);
 
 		/*
 		 * We can do the mapping here for the single processor
 		 * case.  We simply ignore the old page table page from
 		 * now on.
 		 */
 		/*
 		 * For SMP, we still need 4K pages to bootstrap APs,
 		 * PSE will be enabled as soon as all APs are up.
 		 */
 		PTD[KPTDI] = (pd_entry_t) ptditmp;
 		kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
 		invltlb();
 #endif
 	}
 #endif
 
 #ifdef SMP
 	if (cpu_apic_address == 0)
 		panic("pmap_bootstrap: no local apic! (non-SMP hardware?)");
 
 	/* local apic is mapped on last page */
 	SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
 	    (cpu_apic_address & PG_FRAME));
 #endif
 
 	invltlb();
 }
 
 #ifdef SMP
 /*
  * Set 4mb pdir for mp startup
  */
 void
 pmap_set_opt(void)
 {
 	if (pseflag && (cpu_feature & CPUID_PSE)) {
 		load_cr4(rcr4() | CR4_PSE);
 		if (pdir4mb && PCPU_GET(cpuid) == 0) {	/* only on BSP */
 			kernel_pmap->pm_pdir[KPTDI] =
 			    PTD[KPTDI] = (pd_entry_t)pdir4mb;
 			cpu_invltlb();
 		}
 	}
 }
 #endif
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  *	pmap_init has been enhanced to support in a fairly consistant
  *	way, discontiguous physical memory.
  */
 void
 pmap_init(phys_start, phys_end)
 	vm_offset_t phys_start, phys_end;
 {
 	int i;
 	int initial_pvs;
 
 	/*
 	 * object for kernel page table pages
 	 */
 	kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE);
 
 	/*
 	 * Allocate memory for random pmap data structures.  Includes the
 	 * pv_head_table.
 	 */
 
 	for(i = 0; i < vm_page_array_size; i++) {
 		vm_page_t m;
 
 		m = &vm_page_array[i];
 		TAILQ_INIT(&m->md.pv_list);
 		m->md.pv_list_count = 0;
 	}
 
 	/*
 	 * init the pv free list
 	 */
 	initial_pvs = vm_page_array_size;
 	if (initial_pvs < MINPV)
 		initial_pvs = MINPV;
 	pvzone = &pvzone_store;
 	pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
 		initial_pvs * sizeof (struct pv_entry));
 	zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit,
 	    vm_page_array_size);
 
 	/*
 	 * Now it is safe to enable pv_table recording.
 	 */
 	pmap_initialized = TRUE;
 }
 
 /*
  * Initialize the address space (zone) for the pv_entries.  Set a
  * high water mark so that the system can recover from excessive
  * numbers of pv entries.
  */
 void
 pmap_init2()
 {
 	pv_entry_max = PMAP_SHPGPERPROC * maxproc + vm_page_array_size;
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
 }
 
 
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
 
 #if defined(PMAP_DIAGNOSTIC)
 
 /*
  * This code checks for non-writeable/modified pages.
  * This should be an invalid condition.
  */
 static int
 pmap_nw_modified(pt_entry_t ptea)
 {
 	int pte;
 
 	pte = (int) ptea;
 
 	if ((pte & (PG_M|PG_RW)) == PG_M)
 		return 1;
 	else
 		return 0;
 }
 #endif
 
 
 /*
  * this routine defines the region(s) of memory that should
  * not be tested for the modified bit.
  */
 static PMAP_INLINE int
 pmap_track_modified(vm_offset_t va)
 {
 	if ((va < clean_sva) || (va >= clean_eva)) 
 		return 1;
 	else
 		return 0;
 }
 
 static PMAP_INLINE void
 invltlb_1pg(vm_offset_t va)
 {
 #ifdef I386_CPU
 	invltlb();
 #else
 	invlpg(va);
 #endif
 }
 
 static __inline void
 pmap_TLB_invalidate(pmap_t pmap, vm_offset_t va)
 {
 #if defined(SMP)
 	if (pmap->pm_active & (1 << PCPU_GET(cpuid)))
 		cpu_invlpg((void *)va);
 	if (pmap->pm_active & PCPU_GET(other_cpus))
 		smp_invltlb();
 #else
 	if (pmap->pm_active)
 		invltlb_1pg(va);
 #endif
 }
 
 static __inline void
 pmap_TLB_invalidate_all(pmap_t pmap)
 {
 #if defined(SMP)
 	if (pmap->pm_active & (1 << PCPU_GET(cpuid)))
 		cpu_invltlb();
 	if (pmap->pm_active & PCPU_GET(other_cpus))
 		smp_invltlb();
 #else
 	if (pmap->pm_active)
 		invltlb();
 #endif
 }
 
 static unsigned *
 get_ptbase(pmap)
 	pmap_t pmap;
 {
 	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 
 	/* are we current address space or kernel? */
 	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
 		return (unsigned *) PTmap;
 	}
 	/* otherwise, we are alternate address space */
 	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
 		APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
 #if defined(SMP)
 		/* The page directory is not shared between CPUs */
 		cpu_invltlb();
 #else
 		invltlb();
 #endif
 	}
 	return (unsigned *) APTmap;
 }
 
 /*
  * Super fast pmap_pte routine best used when scanning
  * the pv lists.  This eliminates many coarse-grained
  * invltlb calls.  Note that many of the pv list
  * scans are across different pmaps.  It is very wasteful
  * to do an entire invltlb for checking a single mapping.
  */
 
 static unsigned * 
 pmap_pte_quick(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	unsigned pde, newpf;
 	if ((pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) != 0) {
 		unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 		unsigned index = i386_btop(va);
 		/* are we current address space or kernel? */
 		if ((pmap == kernel_pmap) ||
 			(frame == (((unsigned) PTDpde) & PG_FRAME))) {
 			return (unsigned *) PTmap + index;
 		}
 		newpf = pde & PG_FRAME;
 		if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
 			* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
 			invltlb_1pg((vm_offset_t) PADDR1);
 		}
 		return PADDR1 + ((unsigned) index & (NPTEPG - 1));
 	}
 	return (0);
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_offset_t 
 pmap_extract(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	vm_offset_t rtval;
 	vm_offset_t pdirindex;
 	pdirindex = va >> PDRSHIFT;
 	if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
 		unsigned *pte;
 		if ((rtval & PG_PS) != 0) {
 			rtval &= ~(NBPDR - 1);
 			rtval |= va & (NBPDR - 1);
 			return rtval;
 		}
 		pte = get_ptbase(pmap) + i386_btop(va);
 		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
 		return rtval;
 	}
 	return 0;
 
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 /*
  * add a wired page to the kva
  * note that in order for the mapping to take effect -- you
  * should do a invltlb after doing the pmap_kenter...
  */
 PMAP_INLINE void 
 pmap_kenter(va, pa)
 	vm_offset_t va;
 	register vm_offset_t pa;
 {
 	register unsigned *pte;
 	unsigned npte, opte;
 
 	npte = pa | PG_RW | PG_V | pgeflag;
 	pte = (unsigned *)vtopte(va);
 	opte = *pte;
 	*pte = npte;
 	/*if (opte)*/
 		invltlb_1pg(va);	/* XXX what about SMP? */
 }
 
 /*
  * remove a page from the kernel pagetables
  */
 PMAP_INLINE void
 pmap_kremove(va)
 	vm_offset_t va;
 {
 	register unsigned *pte;
 
 	pte = (unsigned *)vtopte(va);
 	*pte = 0;
 	invltlb_1pg(va);	/* XXX what about SMP? */
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	For now, VM is already on, we only need to map the
  *	specified memory.
  */
 vm_offset_t
 pmap_map(virt, start, end, prot)
 	vm_offset_t virt;
 	vm_offset_t start;
 	vm_offset_t end;
 	int prot;
 {
 	while (start < end) {
 		pmap_kenter(virt, start);
 		virt += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
 	return (virt);
 }
 
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  */
 void
 pmap_qenter(va, m, count)
 	vm_offset_t va;
 	vm_page_t *m;
 	int count;
 {
 	int i;
 
 	for (i = 0; i < count; i++) {
 		vm_offset_t tva = va + i * PAGE_SIZE;
 		pmap_kenter(tva, VM_PAGE_TO_PHYS(m[i]));
 	}
 }
 
 /*
  * this routine jerks page mappings from the
  * kernel -- it is meant only for temporary mappings.
  */
 void
 pmap_qremove(va, count)
 	vm_offset_t va;
 	int count;
 {
 	vm_offset_t end_va;
 
 	end_va = va + count*PAGE_SIZE;
 
 	while (va < end_va) {
 		unsigned *pte;
 
 		pte = (unsigned *)vtopte(va);
 		*pte = 0;
 #ifdef SMP
 		cpu_invlpg((void *)va);
 #else
 		invltlb_1pg(va);
 #endif
 		va += PAGE_SIZE;
 	}
 #ifdef SMP
 	smp_invltlb();
 #endif
 }
 
 static vm_page_t
 pmap_page_lookup(object, pindex)
 	vm_object_t object;
 	vm_pindex_t pindex;
 {
 	vm_page_t m;
 retry:
 	m = vm_page_lookup(object, pindex);
 	if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
 		goto retry;
 	return m;
 }
 
 /*
  * Create the UPAGES for a new process.
  * This routine directly affects the fork perf for a process.
  */
 void
 pmap_new_proc(p)
 	struct proc *p;
 {
 #ifdef I386_CPU
 	int updateneeded;
 #endif
 	int i;
 	vm_object_t upobj;
 	vm_page_t m;
 	struct user *up;
 	unsigned *ptek, oldpte;
 
 	/*
 	 * allocate object for the upages
 	 */
 	if ((upobj = p->p_upages_obj) == NULL) {
 		upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES);
 		p->p_upages_obj = upobj;
 	}
 
 	/* get a kernel virtual address for the UPAGES for this proc */
 	if ((up = p->p_addr) == NULL) {
 		up = (struct user *) kmem_alloc_nofault(kernel_map,
 				UPAGES * PAGE_SIZE);
 		if (up == NULL)
 			panic("pmap_new_proc: u_map allocation failed");
 		p->p_addr = up;
 	}
 
 	ptek = (unsigned *) vtopte((vm_offset_t) up);
 
 #ifdef I386_CPU
 	updateneeded = 0;
 #endif
 	for(i=0;i<UPAGES;i++) {
 		/*
 		 * Get a kernel stack page
 		 */
 		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
 		/*
 		 * Wire the page
 		 */
 		m->wire_count++;
 		cnt.v_wire_count++;
 
 		oldpte = *(ptek + i);
 		/*
 		 * Enter the page into the kernel address space.
 		 */
 		*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
 		if (oldpte) {
 #ifdef I386_CPU
 			updateneeded = 1;
 #else
 			invlpg((vm_offset_t) up + i * PAGE_SIZE);
 #endif
 		}
 
 		vm_page_wakeup(m);
 		vm_page_flag_clear(m, PG_ZERO);
 		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
 		m->valid = VM_PAGE_BITS_ALL;
 	}
 #ifdef I386_CPU
 	if (updateneeded)
 		invltlb();
 #endif
 }
 
 /*
  * Dispose the UPAGES for a process that has exited.
  * This routine directly impacts the exit perf of a process.
  */
 void
 pmap_dispose_proc(p)
 	struct proc *p;
 {
 	int i;
 	vm_object_t upobj;
 	vm_page_t m;
 	unsigned *ptek, oldpte;
 
 	upobj = p->p_upages_obj;
 
 	ptek = (unsigned *) vtopte((vm_offset_t) p->p_addr);
 	for(i=0;i<UPAGES;i++) {
 
 		if ((m = vm_page_lookup(upobj, i)) == NULL)
 			panic("pmap_dispose_proc: upage already missing???");
 
 		vm_page_busy(m);
 
 		oldpte = *(ptek + i);
 		*(ptek + i) = 0;
 #ifndef I386_CPU
 		invlpg((vm_offset_t) p->p_addr + i * PAGE_SIZE);
 #endif
 		vm_page_unwire(m, 0);
 		vm_page_free(m);
 	}
 #ifdef I386_CPU
 	invltlb();
 #endif
 }
 
 /*
  * Allow the UPAGES for a process to be prejudicially paged out.
  */
 void
 pmap_swapout_proc(p)
 	struct proc *p;
 {
 	int i;
 	vm_object_t upobj;
 	vm_page_t m;
 
 	upobj = p->p_upages_obj;
 	/*
 	 * let the upages be paged
 	 */
 	for(i=0;i<UPAGES;i++) {
 		if ((m = vm_page_lookup(upobj, i)) == NULL)
 			panic("pmap_swapout_proc: upage already missing???");
 		vm_page_dirty(m);
 		vm_page_unwire(m, 0);
 		pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i);
 	}
 }
 
 /*
  * Bring the UPAGES for a specified process back in.
  */
 void
 pmap_swapin_proc(p)
 	struct proc *p;
 {
 	int i,rv;
 	vm_object_t upobj;
 	vm_page_t m;
 
 	upobj = p->p_upages_obj;
 	for(i=0;i<UPAGES;i++) {
 
 		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
 		pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
 			VM_PAGE_TO_PHYS(m));
 
 		if (m->valid != VM_PAGE_BITS_ALL) {
 			rv = vm_pager_get_pages(upobj, &m, 1, 0);
 			if (rv != VM_PAGER_OK)
 				panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
 			m = vm_page_lookup(upobj, i);
 			m->valid = VM_PAGE_BITS_ALL;
 		}
 
 		vm_page_wire(m);
 		vm_page_wakeup(m);
 		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
 	}
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 
 /*
  * This routine unholds page table pages, and if the hold count
  * drops to zero, then it decrements the wire count.
  */
 static int 
 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
 
 	while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
 		;
 
 	if (m->hold_count == 0) {
 		vm_offset_t pteva;
 		/*
 		 * unmap the page table page
 		 */
 		pmap->pm_pdir[m->pindex] = 0;
 		--pmap->pm_stats.resident_count;
 		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
 			(((unsigned) PTDpde) & PG_FRAME)) {
 			/*
 			 * Do a invltlb to make the invalidated mapping
 			 * take effect immediately.
 			 */
 			pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
 			pmap_TLB_invalidate(pmap, pteva);
 		}
 
 		if (pmap->pm_ptphint == m)
 			pmap->pm_ptphint = NULL;
 
 		/*
 		 * If the page is finally unwired, simply free it.
 		 */
 		--m->wire_count;
 		if (m->wire_count == 0) {
 
 			vm_page_flash(m);
 			vm_page_busy(m);
 			vm_page_free_zero(m);
 			--cnt.v_wire_count;
 		}
 		return 1;
 	}
 	return 0;
 }
 
 static PMAP_INLINE int
 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
 {
 	vm_page_unhold(m);
 	if (m->hold_count == 0)
 		return _pmap_unwire_pte_hold(pmap, m);
 	else
 		return 0;
 }
 
 /*
  * After removing a page table entry, this routine is used to
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
 pmap_unuse_pt(pmap, va, mpte)
 	pmap_t pmap;
 	vm_offset_t va;
 	vm_page_t mpte;
 {
 	unsigned ptepindex;
 	if (va >= UPT_MIN_ADDRESS)
 		return 0;
 
 	if (mpte == NULL) {
 		ptepindex = (va >> PDRSHIFT);
 		if (pmap->pm_ptphint &&
 			(pmap->pm_ptphint->pindex == ptepindex)) {
 			mpte = pmap->pm_ptphint;
 		} else {
 			mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 			pmap->pm_ptphint = mpte;
 		}
 	}
 
 	return pmap_unwire_pte_hold(pmap, mpte);
 }
 
 void
 pmap_pinit0(pmap)
 	struct pmap *pmap;
 {
 	pmap->pm_pdir =
 		(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 	pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD);
 	pmap->pm_count = 1;
 	pmap->pm_active = 0;
 	pmap->pm_ptphint = NULL;
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 void
 pmap_pinit(pmap)
 	register struct pmap *pmap;
 {
 	vm_page_t ptdpg;
 
 	/*
 	 * No need to allocate page table space yet but we do need a valid
 	 * page directory table.
 	 */
 	if (pmap->pm_pdir == NULL)
 		pmap->pm_pdir =
 			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 
 	/*
 	 * allocate object for the ptes
 	 */
 	if (pmap->pm_pteobj == NULL)
 		pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
 
 	/*
 	 * allocate the page directory page
 	 */
 	ptdpg = vm_page_grab( pmap->pm_pteobj, PTDPTDI,
 			VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
 	ptdpg->wire_count = 1;
 	++cnt.v_wire_count;
 
 
 	vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/
 	ptdpg->valid = VM_PAGE_BITS_ALL;
 
 	pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
 	if ((ptdpg->flags & PG_ZERO) == 0)
 		bzero(pmap->pm_pdir, PAGE_SIZE);
 
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 	/* Wire in kernel global address entries. */
 	/* XXX copies current process, does not fill in MPPTDI */
 	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
 #ifdef SMP
 	pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
 #endif
 
 	/* install self-referential address mapping entry */
 	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
 		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M;
 
 	pmap->pm_count = 1;
 	pmap->pm_active = 0;
 	pmap->pm_ptphint = NULL;
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 }
 
 /*
  * Wire in kernel global address entries.  To avoid a race condition
  * between pmap initialization and pmap_growkernel, this procedure
  * should be called after the vmspace is attached to the process
  * but before this pmap is activated.
  */
 void
 pmap_pinit2(pmap)
 	struct pmap *pmap;
 {
 	/* XXX: Remove this stub when no longer called */
 }
 
 static int
 pmap_release_free_page(pmap, p)
 	struct pmap *pmap;
 	vm_page_t p;
 {
 	unsigned *pde = (unsigned *) pmap->pm_pdir;
 	/*
 	 * This code optimizes the case of freeing non-busy
 	 * page-table pages.  Those pages are zero now, and
 	 * might as well be placed directly into the zero queue.
 	 */
 	if (vm_page_sleep_busy(p, FALSE, "pmaprl"))
 		return 0;
 
 	vm_page_busy(p);
 
 	/*
 	 * Remove the page table page from the processes address space.
 	 */
 	pde[p->pindex] = 0;
 	pmap->pm_stats.resident_count--;
 
 	if (p->hold_count)  {
 		panic("pmap_release: freeing held page table page");
 	}
 	/*
 	 * Page directory pages need to have the kernel
 	 * stuff cleared, so they can go into the zero queue also.
 	 */
 	if (p->pindex == PTDPTDI) {
 		bzero(pde + KPTDI, nkpt * PTESIZE);
 #ifdef SMP
 		pde[MPPTDI] = 0;
 #endif
 		pde[APTDPTDI] = 0;
 		pmap_kremove((vm_offset_t) pmap->pm_pdir);
 	}
 
 	if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
 		pmap->pm_ptphint = NULL;
 
 	p->wire_count--;
 	cnt.v_wire_count--;
 	vm_page_free_zero(p);
 	return 1;
 }
 
 /*
  * this routine is called if the page table page is not
  * mapped correctly.
  */
 static vm_page_t
 _pmap_allocpte(pmap, ptepindex)
 	pmap_t	pmap;
 	unsigned ptepindex;
 {
 	vm_offset_t pteva, ptepa;
 	vm_page_t m;
 
 	/*
 	 * Find or fabricate a new pagetable page
 	 */
 	m = vm_page_grab(pmap->pm_pteobj, ptepindex,
 			VM_ALLOC_ZERO | VM_ALLOC_RETRY);
 
 	KASSERT(m->queue == PQ_NONE,
 		("_pmap_allocpte: %p->queue != PQ_NONE", m));
 
 	if (m->wire_count == 0)
 		cnt.v_wire_count++;
 	m->wire_count++;
 
 	/*
 	 * Increment the hold count for the page table page
 	 * (denoting a new mapping.)
 	 */
 	m->hold_count++;
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	pmap->pm_stats.resident_count++;
 
 	ptepa = VM_PAGE_TO_PHYS(m);
 	pmap->pm_pdir[ptepindex] =
 		(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
 
 	/*
 	 * Set the page table hint
 	 */
 	pmap->pm_ptphint = m;
 
 	/*
 	 * Try to use the new mapping, but if we cannot, then
 	 * do it with the routine that maps the page explicitly.
 	 */
 	if ((m->flags & PG_ZERO) == 0) {
 		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
 			(((unsigned) PTDpde) & PG_FRAME)) {
 			pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
 			bzero((caddr_t) pteva, PAGE_SIZE);
 		} else {
 			pmap_zero_page(ptepa);
 		}
 	}
 
 	m->valid = VM_PAGE_BITS_ALL;
 	vm_page_flag_clear(m, PG_ZERO);
 	vm_page_flag_set(m, PG_MAPPED);
 	vm_page_wakeup(m);
 
 	return m;
 }
 
 static vm_page_t
 pmap_allocpte(pmap, va)
 	pmap_t	pmap;
 	vm_offset_t va;
 {
 	unsigned ptepindex;
 	vm_offset_t ptepa;
 	vm_page_t m;
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = va >> PDRSHIFT;
 
 	/*
 	 * Get the page directory entry
 	 */
 	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 
 	/*
 	 * This supports switching from a 4MB page to a
 	 * normal 4K page.
 	 */
 	if (ptepa & PG_PS) {
 		pmap->pm_pdir[ptepindex] = 0;
 		ptepa = 0;
 		invltlb();
 	}
 
 	/*
 	 * If the page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (ptepa) {
 		/*
 		 * In order to get the page table page, try the
 		 * hint first.
 		 */
 		if (pmap->pm_ptphint &&
 			(pmap->pm_ptphint->pindex == ptepindex)) {
 			m = pmap->pm_ptphint;
 		} else {
 			m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 			pmap->pm_ptphint = m;
 		}
 		m->hold_count++;
 		return m;
 	}
 	/*
 	 * Here if the pte page isn't mapped, or if it has been deallocated.
 	 */
 	return _pmap_allocpte(pmap, ptepindex);
 }
 
 
 /***************************************************
 * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap)
 	register struct pmap *pmap;
 {
 	vm_page_t p,n,ptdpg;
 	vm_object_t object = pmap->pm_pteobj;
 	int curgeneration;
 
 #if defined(DIAGNOSTIC)
 	if (object->ref_count != 1)
 		panic("pmap_release: pteobj reference count != 1");
 #endif
 	
 	ptdpg = NULL;
 	LIST_REMOVE(pmap, pm_list);
 retry:
 	curgeneration = object->generation;
 	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
 		n = TAILQ_NEXT(p, listq);
 		if (p->pindex == PTDPTDI) {
 			ptdpg = p;
 			continue;
 		}
 		while (1) {
 			if (!pmap_release_free_page(pmap, p) &&
 				(object->generation != curgeneration))
 				goto retry;
 		}
 	}
 
 	if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
 		goto retry;
 }
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	struct pmap *pmap;
 	int s;
 	vm_offset_t ptppaddr;
 	vm_page_t nkpg;
 	pd_entry_t newpdir;
 
 	s = splhigh();
 	if (kernel_vm_end == 0) {
 		kernel_vm_end = KERNBASE;
 		nkpt = 0;
 		while (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			nkpt++;
 		}
 	}
 	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 	while (kernel_vm_end < addr) {
 		if (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			continue;
 		}
 
 		/*
 		 * This index is bogus, but out of the way
 		 */
 		nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM);
 		if (!nkpg)
 			panic("pmap_growkernel: no memory to grow kernel");
 
 		nkpt++;
 
 		vm_page_wire(nkpg);
 		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 		pmap_zero_page(ptppaddr);
 		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 		pdir_pde(PTD, kernel_vm_end) = newpdir;
 
 		LIST_FOREACH(pmap, &allpmaps, pm_list) {
 			*pmap_pde(pmap, kernel_vm_end) = newpdir;
 		}
 		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 	}
 	splx(s);
 }
 
 /*
  *	Retire the given physical map from service.
  *	Should only be called if the map contains
  *	no valid mappings.
  */
 void
 pmap_destroy(pmap)
 	register pmap_t pmap;
 {
 	int count;
 
 	if (pmap == NULL)
 		return;
 
 	count = --pmap->pm_count;
 	if (count == 0) {
 		pmap_release(pmap);
 		panic("destroying a pmap is not yet implemented");
 	}
 }
 
 /*
  *	Add a reference to the specified pmap.
  */
 void
 pmap_reference(pmap)
 	pmap_t pmap;
 {
 	if (pmap != NULL) {
 		pmap->pm_count++;
 	}
 }
 
 /***************************************************
 * page management routines.
  ***************************************************/
 
 /*
  * free the pv_entry back to the free list
  */
 static PMAP_INLINE void
 free_pv_entry(pv)
 	pv_entry_t pv;
 {
 	pv_entry_count--;
-	zfreei(pvzone, pv);
+	zfree(pvzone, pv);
 }
 
 /*
  * get a new pv_entry, allocating a block from the system
  * when needed.
  * the memory allocation is performed bypassing the malloc code
  * because of the possibility of allocations at interrupt time.
  */
 static pv_entry_t
 get_pv_entry(void)
 {
 	pv_entry_count++;
 	if (pv_entry_high_water &&
 		(pv_entry_count > pv_entry_high_water) &&
 		(pmap_pagedaemon_waken == 0)) {
 		pmap_pagedaemon_waken = 1;
 		wakeup (&vm_pages_needed);
 	}
-	return zalloci(pvzone);
+	return zalloc(pvzone);
 }
 
 /*
  * This routine is very drastic, but can save the system
  * in a pinch.
  */
 void
 pmap_collect()
 {
 	int i;
 	vm_page_t m;
 	static int warningdone=0;
 
 	if (pmap_pagedaemon_waken == 0)
 		return;
 
 	if (warningdone < 5) {
 		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
 		warningdone++;
 	}
 
 	for(i = 0; i < vm_page_array_size; i++) {
 		m = &vm_page_array[i];
 		if (m->wire_count || m->hold_count || m->busy ||
 		    (m->flags & PG_BUSY))
 			continue;
 		pmap_remove_all(m);
 	}
 	pmap_pagedaemon_waken = 0;
 }
 	
 
 /*
  * If it is the first entry on the list, it is actually
  * in the header and we must copy the following entry up
  * to the header.  Otherwise we must search the list for
  * the entry.  In either case we free the now unused entry.
  */
 
 static int
 pmap_remove_entry(pmap, m, va)
 	struct pmap *pmap;
 	vm_page_t m;
 	vm_offset_t va;
 {
 	pv_entry_t pv;
 	int rtval;
 	int s;
 
 	s = splvm();
 	if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
 		for (pv = TAILQ_FIRST(&m->md.pv_list);
 			pv;
 			pv = TAILQ_NEXT(pv, pv_list)) {
 			if (pmap == pv->pv_pmap && va == pv->pv_va) 
 				break;
 		}
 	} else {
 		for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
 			pv;
 			pv = TAILQ_NEXT(pv, pv_plist)) {
 			if (va == pv->pv_va) 
 				break;
 		}
 	}
 
 	rtval = 0;
 	if (pv) {
 
 		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count--;
 		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 
 		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 		free_pv_entry(pv);
 	}
 			
 	splx(s);
 	return rtval;
 }
 
 /*
  * Create a pv entry for page at pa for
  * (pmap, va).
  */
 static void
 pmap_insert_entry(pmap, va, mpte, m)
 	pmap_t pmap;
 	vm_offset_t va;
 	vm_page_t mpte;
 	vm_page_t m;
 {
 
 	int s;
 	pv_entry_t pv;
 
 	s = splvm();
 	pv = get_pv_entry();
 	pv->pv_va = va;
 	pv->pv_pmap = pmap;
 	pv->pv_ptem = mpte;
 
 	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 	m->md.pv_list_count++;
 
 	splx(s);
 }
 
 /*
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
 pmap_remove_pte(pmap, ptq, va)
 	struct pmap *pmap;
 	unsigned *ptq;
 	vm_offset_t va;
 {
 	unsigned oldpte;
 	vm_page_t m;
 
 	oldpte = atomic_readandclear_int(ptq);
 	if (oldpte & PG_W)
 		pmap->pm_stats.wired_count -= 1;
 	/*
 	 * Machines that don't support invlpg, also don't support
 	 * PG_G.
 	 */
 	if (oldpte & PG_G)
 		invlpg(va);
 	pmap->pm_stats.resident_count -= 1;
 	if (oldpte & PG_MANAGED) {
 		m = PHYS_TO_VM_PAGE(oldpte);
 		if (oldpte & PG_M) {
 #if defined(PMAP_DIAGNOSTIC)
 			if (pmap_nw_modified((pt_entry_t) oldpte)) {
 				printf(
 	"pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
 				    va, oldpte);
 			}
 #endif
 			if (pmap_track_modified(va))
 				vm_page_dirty(m);
 		}
 		if (oldpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 		return pmap_remove_entry(pmap, m, va);
 	} else {
 		return pmap_unuse_pt(pmap, va, NULL);
 	}
 
 	return 0;
 }
 
 /*
  * Remove a single page from a process address space
  */
 static void
 pmap_remove_page(pmap, va)
 	struct pmap *pmap;
 	register vm_offset_t va;
 {
 	register unsigned *ptq;
 
 	/*
 	 * if there is no pte for this address, just skip it!!!
 	 */
 	if (*pmap_pde(pmap, va) == 0) {
 		return;
 	}
 
 	/*
 	 * get a local va for mappings for this pmap.
 	 */
 	ptq = get_ptbase(pmap) + i386_btop(va);
 	if (*ptq) {
 		(void) pmap_remove_pte(pmap, ptq, va);
 		pmap_TLB_invalidate(pmap, va);
 	}
 	return;
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap, sva, eva)
 	struct pmap *pmap;
 	register vm_offset_t sva;
 	register vm_offset_t eva;
 {
 	register unsigned *ptbase;
 	vm_offset_t pdnxt;
 	vm_offset_t ptpaddr;
 	vm_offset_t sindex, eindex;
 	int anyvalid;
 
 	if (pmap == NULL)
 		return;
 
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if (((sva + PAGE_SIZE) == eva) && 
 		(((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 		pmap_remove_page(pmap, sva);
 		return;
 	}
 
 	anyvalid = 0;
 
 	/*
 	 * Get a local virtual address for the mappings that are being
 	 * worked with.
 	 */
 	ptbase = get_ptbase(pmap);
 
 	sindex = i386_btop(sva);
 	eindex = i386_btop(eva);
 
 	for (; sindex < eindex; sindex = pdnxt) {
 		unsigned pdirindex;
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		pdirindex = sindex / NPDEPG;
 		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
 			pmap->pm_pdir[pdirindex] = 0;
 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 			anyvalid++;
 			continue;
 		}
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (pdnxt > eindex) {
 			pdnxt = eindex;
 		}
 
 		for ( ;sindex != pdnxt; sindex++) {
 			vm_offset_t va;
 			if (ptbase[sindex] == 0) {
 				continue;
 			}
 			va = i386_ptob(sindex);
 			
 			anyvalid++;
 			if (pmap_remove_pte(pmap,
 				ptbase + sindex, va))
 				break;
 		}
 	}
 
 	if (anyvalid)
 		pmap_TLB_invalidate_all(pmap);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 static void
 pmap_remove_all(m)
 	vm_page_t m;
 {
 	register pv_entry_t pv;
 	register unsigned *pte, tpte;
 	int s;
 
 #if defined(PMAP_DIAGNOSTIC)
 	/*
 	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
 	 * pages!
 	 */
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
 		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%x", VM_PAGE_TO_PHYS(m));
 	}
 #endif
 
 	s = splvm();
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pv->pv_pmap->pm_stats.resident_count--;
 
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 
 		tpte = atomic_readandclear_int(pte);
 		if (tpte & PG_W)
 			pv->pv_pmap->pm_stats.wired_count--;
 
 		if (tpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (tpte & PG_M) {
 #if defined(PMAP_DIAGNOSTIC)
 			if (pmap_nw_modified((pt_entry_t) tpte)) {
 				printf(
 	"pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
 				    pv->pv_va, tpte);
 			}
 #endif
 			if (pmap_track_modified(pv->pv_va))
 				vm_page_dirty(m);
 		}
 		pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 
 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count--;
 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 		free_pv_entry(pv);
 	}
 
 	vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 
 	splx(s);
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	register unsigned *ptbase;
 	vm_offset_t pdnxt, ptpaddr;
 	vm_pindex_t sindex, eindex;
 	int anychanged;
 
 	if (pmap == NULL)
 		return;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 	if (prot & VM_PROT_WRITE)
 		return;
 
 	anychanged = 0;
 
 	ptbase = get_ptbase(pmap);
 
 	sindex = i386_btop(sva);
 	eindex = i386_btop(eva);
 
 	for (; sindex < eindex; sindex = pdnxt) {
 
 		unsigned pdirindex;
 
 		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 
 		pdirindex = sindex / NPDEPG;
 		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
 			(unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 			anychanged++;
 			continue;
 		}
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		if (pdnxt > eindex) {
 			pdnxt = eindex;
 		}
 
 		for (; sindex != pdnxt; sindex++) {
 
 			unsigned pbits;
 			vm_page_t m;
 
 			pbits = ptbase[sindex];
 
 			if (pbits & PG_MANAGED) {
 				m = NULL;
 				if (pbits & PG_A) {
 					m = PHYS_TO_VM_PAGE(pbits);
 					vm_page_flag_set(m, PG_REFERENCED);
 					pbits &= ~PG_A;
 				}
 				if (pbits & PG_M) {
 					if (pmap_track_modified(i386_ptob(sindex))) {
 						if (m == NULL)
 							m = PHYS_TO_VM_PAGE(pbits);
 						vm_page_dirty(m);
 						pbits &= ~PG_M;
 					}
 				}
 			}
 
 			pbits &= ~PG_RW;
 
 			if (pbits != ptbase[sindex]) {
 				ptbase[sindex] = pbits;
 				anychanged = 1;
 			}
 		}
 	}
 	if (anychanged)
 		pmap_TLB_invalidate_all(pmap);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 void
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 	   boolean_t wired)
 {
 	vm_offset_t pa;
 	register unsigned *pte;
 	vm_offset_t opa;
 	vm_offset_t origpte, newpte;
 	vm_page_t mpte;
 
 	if (pmap == NULL)
 		return;
 
 	va &= PG_FRAME;
 #ifdef PMAP_DIAGNOSTIC
 	if (va > VM_MAX_KERNEL_ADDRESS)
 		panic("pmap_enter: toobig");
 	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
 		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
 #endif
 
 	mpte = NULL;
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < UPT_MIN_ADDRESS) {
 		mpte = pmap_allocpte(pmap, va);
 	}
 #if 0 && defined(PMAP_DIAGNOSTIC)
 	else {
 		vm_offset_t *pdeaddr = (vm_offset_t *)pmap_pde(pmap, va);
 		if (((origpte = (vm_offset_t) *pdeaddr) & PG_V) == 0) { 
 			panic("pmap_enter: invalid kernel page table page(0), pdir=%p, pde=%p, va=%p\n",
 				pmap->pm_pdir[PTDPTDI], origpte, va);
 		}
 		if (smp_active) {
 			pdeaddr = (vm_offset_t *) IdlePTDS[PCPU_GET(cpuid)];
 			if (((newpte = pdeaddr[va >> PDRSHIFT]) & PG_V) == 0) {
 				if ((vm_offset_t) my_idlePTD != (vm_offset_t) vtophys(pdeaddr))
 					printf("pde mismatch: %x, %x\n", my_idlePTD, pdeaddr);
 				printf("cpuid: %d, pdeaddr: 0x%x\n", PCPU_GET(cpuid), pdeaddr);
 				panic("pmap_enter: invalid kernel page table page(1), pdir=%p, npde=%p, pde=%p, va=%p\n",
 					pmap->pm_pdir[PTDPTDI], newpte, origpte, va);
 			}
 		}
 	}
 #endif
 
 	pte = pmap_pte(pmap, va);
 
 	/*
 	 * Page Directory table entry not valid, we need a new PT page
 	 */
 	if (pte == NULL) {
 		panic("pmap_enter: invalid page directory, pdir=%p, va=0x%x\n",
 			(void *)pmap->pm_pdir[PTDPTDI], va);
 	}
 
 	pa = VM_PAGE_TO_PHYS(m) & PG_FRAME;
 	origpte = *(vm_offset_t *)pte;
 	opa = origpte & PG_FRAME;
 
 	if (origpte & PG_PS)
 		panic("pmap_enter: attempted pmap_enter on 4MB page");
 
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
 	if (origpte && (opa == pa)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if (wired && ((origpte & PG_W) == 0))
 			pmap->pm_stats.wired_count++;
 		else if (!wired && (origpte & PG_W))
 			pmap->pm_stats.wired_count--;
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (pmap_nw_modified((pt_entry_t) origpte)) {
 			printf(
 	"pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
 			    va, origpte);
 		}
 #endif
 
 		/*
 		 * Remove extra pte reference
 		 */
 		if (mpte)
 			mpte->hold_count--;
 
 		if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
 			if ((origpte & PG_RW) == 0) {
 				*pte |= PG_RW;
 #ifdef SMP
 				cpu_invlpg((void *)va);
 				if (pmap->pm_active & PCPU_GET(other_cpus))
 					smp_invltlb();
 #else
 				invltlb_1pg(va);
 #endif
 			}
 			return;
 		}
 
 		/*
 		 * We might be turning off write access to the page,
 		 * so we go ahead and sense modify status.
 		 */
 		if (origpte & PG_MANAGED) {
 			if ((origpte & PG_M) && pmap_track_modified(va)) {
 				vm_page_t om;
 				om = PHYS_TO_VM_PAGE(opa);
 				vm_page_dirty(om);
 			}
 			pa |= PG_MANAGED;
 		}
 		goto validate;
 	} 
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
 	if (opa) {
 		int err;
 		err = pmap_remove_pte(pmap, pte, va);
 		if (err)
 			panic("pmap_enter: pte vanished, va: 0x%x", va);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory. Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if (pmap_initialized && 
 	    (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
 		pmap_insert_entry(pmap, va, mpte, m);
 		pa |= PG_MANAGED;
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 	if (wired)
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
 
 	if (wired)
 		newpte |= PG_W;
 	if (va < UPT_MIN_ADDRESS)
 		newpte |= PG_U;
 	if (pmap == kernel_pmap)
 		newpte |= pgeflag;
 
 	/*
 	 * if the mapping or permission bits are different, we need
 	 * to update the pte.
 	 */
 	if ((origpte & ~(PG_M|PG_A)) != newpte) {
 		*pte = newpte | PG_A;
 		/*if (origpte)*/ {
 #ifdef SMP
 			cpu_invlpg((void *)va);
 			if (pmap->pm_active & PCPU_GET(other_cpus))
 				smp_invltlb();
 #else
 			invltlb_1pg(va);
 #endif
 		}
 	}
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * 5. Tlbflush is deferred to calling procedure.
  * 6. Page IS managed.
  * but is *MUCH* faster than pmap_enter...
  */
 
 static vm_page_t
 pmap_enter_quick(pmap, va, m, mpte)
 	register pmap_t pmap;
 	vm_offset_t va;
 	vm_page_t m;
 	vm_page_t mpte;
 {
 	unsigned *pte;
 	vm_offset_t pa;
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < UPT_MIN_ADDRESS) {
 		unsigned ptepindex;
 		vm_offset_t ptepa;
 
 		/*
 		 * Calculate pagetable page index
 		 */
 		ptepindex = va >> PDRSHIFT;
 		if (mpte && (mpte->pindex == ptepindex)) {
 			mpte->hold_count++;
 		} else {
 retry:
 			/*
 			 * Get the page directory entry
 			 */
 			ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 
 			/*
 			 * If the page table page is mapped, we just increment
 			 * the hold count, and activate it.
 			 */
 			if (ptepa) {
 				if (ptepa & PG_PS)
 					panic("pmap_enter_quick: unexpected mapping into 4MB page");
 				if (pmap->pm_ptphint &&
 					(pmap->pm_ptphint->pindex == ptepindex)) {
 					mpte = pmap->pm_ptphint;
 				} else {
 					mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 					pmap->pm_ptphint = mpte;
 				}
 				if (mpte == NULL)
 					goto retry;
 				mpte->hold_count++;
 			} else {
 				mpte = _pmap_allocpte(pmap, ptepindex);
 			}
 		}
 	} else {
 		mpte = NULL;
 	}
 
 	/*
 	 * This call to vtopte makes the assumption that we are
 	 * entering the page into the current pmap.  In order to support
 	 * quick entry into any pmap, one would likely use pmap_pte_quick.
 	 * But that isn't as quick as vtopte.
 	 */
 	pte = (unsigned *)vtopte(va);
 	if (*pte) {
 		if (mpte)
 			pmap_unwire_pte_hold(pmap, mpte);
 		return 0;
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory. Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
 		pmap_insert_entry(pmap, va, mpte, m);
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 
 	pa = VM_PAGE_TO_PHYS(m);
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 		*pte = pa | PG_V | PG_U;
 	else
 		*pte = pa | PG_V | PG_U | PG_MANAGED;
 
 	return mpte;
 }
 
 /*
  * Make a temporary mapping for a physical address.  This is only intended
  * to be used for panic dumps.
  */
 void *
 pmap_kenter_temporary(vm_offset_t pa, int i)
 {
 	pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa);
 	return ((void *)crashdumpmap);
 }
 
 #define MAX_INIT_PT (96)
 /*
  * pmap_object_init_pt preloads the ptes for a given object
  * into the specified pmap.  This eliminates the blast of soft
  * faults on process startup and immediately after an mmap.
  */
 void
 pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
 	pmap_t pmap;
 	vm_offset_t addr;
 	vm_object_t object;
 	vm_pindex_t pindex;
 	vm_size_t size;
 	int limit;
 {
 	vm_offset_t tmpidx;
 	int psize;
 	vm_page_t p, mpte;
 	int objpgs;
 
 	if (pmap == NULL || object == NULL)
 		return;
 
 	/*
 	 * This code maps large physical mmap regions into the
 	 * processor address space.  Note that some shortcuts
 	 * are taken, but the code works.
 	 */
 	if (pseflag &&
 		(object->type == OBJT_DEVICE) &&
 		((addr & (NBPDR - 1)) == 0) &&
 		((size & (NBPDR - 1)) == 0) ) {
 		int i;
 		vm_page_t m[1];
 		unsigned int ptepindex;
 		int npdes;
 		vm_offset_t ptepa;
 
 		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
 			return;
 
 retry:
 		p = vm_page_lookup(object, pindex);
 		if (p && vm_page_sleep_busy(p, FALSE, "init4p"))
 			goto retry;
 
 		if (p == NULL) {
 			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
 			if (p == NULL)
 				return;
 			m[0] = p;
 
 			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
 				vm_page_free(p);
 				return;
 			}
 
 			p = vm_page_lookup(object, pindex);
 			vm_page_wakeup(p);
 		}
 
 		ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p);
 		if (ptepa & (NBPDR - 1)) {
 			return;
 		}
 
 		p->valid = VM_PAGE_BITS_ALL;
 
 		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
 		npdes = size >> PDRSHIFT;
 		for(i=0;i<npdes;i++) {
 			pmap->pm_pdir[ptepindex] =
 				(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
 			ptepa += NBPDR;
 			ptepindex += 1;
 		}
 		vm_page_flag_set(p, PG_MAPPED);
 		invltlb();
 		return;
 	}
 
 	psize = i386_btop(size);
 
 	if ((object->type != OBJT_VNODE) ||
 		(limit && (psize > MAX_INIT_PT) &&
 			(object->resident_page_count > MAX_INIT_PT))) {
 		return;
 	}
 
 	if (psize + pindex > object->size) {
 		if (object->size < pindex)
 			return;		  
 		psize = object->size - pindex;
 	}
 
 	mpte = NULL;
 	/*
 	 * if we are processing a major portion of the object, then scan the
 	 * entire thing.
 	 */
 	if (psize > (object->resident_page_count >> 2)) {
 		objpgs = psize;
 
 		for (p = TAILQ_FIRST(&object->memq);
 		    ((objpgs > 0) && (p != NULL));
 		    p = TAILQ_NEXT(p, listq)) {
 
 			tmpidx = p->pindex;
 			if (tmpidx < pindex) {
 				continue;
 			}
 			tmpidx -= pindex;
 			if (tmpidx >= psize) {
 				continue;
 			}
 			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 				(p->busy == 0) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 				if ((p->queue - p->pc) == PQ_CACHE)
 					vm_page_deactivate(p);
 				vm_page_busy(p);
 				mpte = pmap_enter_quick(pmap, 
 					addr + i386_ptob(tmpidx), p, mpte);
 				vm_page_flag_set(p, PG_MAPPED);
 				vm_page_wakeup(p);
 			}
 			objpgs -= 1;
 		}
 	} else {
 		/*
 		 * else lookup the pages one-by-one.
 		 */
 		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
 			p = vm_page_lookup(object, tmpidx + pindex);
 			if (p &&
 			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 				(p->busy == 0) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 				if ((p->queue - p->pc) == PQ_CACHE)
 					vm_page_deactivate(p);
 				vm_page_busy(p);
 				mpte = pmap_enter_quick(pmap, 
 					addr + i386_ptob(tmpidx), p, mpte);
 				vm_page_flag_set(p, PG_MAPPED);
 				vm_page_wakeup(p);
 			}
 		}
 	}
 	return;
 }
 
 /*
  * pmap_prefault provides a quick way of clustering
  * pagefaults into a processes address space.  It is a "cousin"
  * of pmap_object_init_pt, except it runs at page fault time instead
  * of mmap time.
  */
 #define PFBAK 4
 #define PFFOR 4
 #define PAGEORDER_SIZE (PFBAK+PFFOR)
 
 static int pmap_prefault_pageorder[] = {
 	-PAGE_SIZE, PAGE_SIZE,
 	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
 	-3 * PAGE_SIZE, 3 * PAGE_SIZE
 	-4 * PAGE_SIZE, 4 * PAGE_SIZE
 };
 
 void
 pmap_prefault(pmap, addra, entry)
 	pmap_t pmap;
 	vm_offset_t addra;
 	vm_map_entry_t entry;
 {
 	int i;
 	vm_offset_t starta;
 	vm_offset_t addr;
 	vm_pindex_t pindex;
 	vm_page_t m, mpte;
 	vm_object_t object;
 
 	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace)))
 		return;
 
 	object = entry->object.vm_object;
 
 	starta = addra - PFBAK * PAGE_SIZE;
 	if (starta < entry->start) {
 		starta = entry->start;
 	} else if (starta > addra) {
 		starta = 0;
 	}
 
 	mpte = NULL;
 	for (i = 0; i < PAGEORDER_SIZE; i++) {
 		vm_object_t lobject;
 		unsigned *pte;
 
 		addr = addra + pmap_prefault_pageorder[i];
 		if (addr > addra + (PFFOR * PAGE_SIZE))
 			addr = 0;
 
 		if (addr < starta || addr >= entry->end)
 			continue;
 
 		if ((*pmap_pde(pmap, addr)) == NULL) 
 			continue;
 
 		pte = (unsigned *) vtopte(addr);
 		if (*pte)
 			continue;
 
 		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
 		lobject = object;
 		for (m = vm_page_lookup(lobject, pindex);
 		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
 		    lobject = lobject->backing_object) {
 			if (lobject->backing_object_offset & PAGE_MASK)
 				break;
 			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
 			m = vm_page_lookup(lobject->backing_object, pindex);
 		}
 
 		/*
 		 * give-up when a page is not in memory
 		 */
 		if (m == NULL)
 			break;
 
 		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			(m->busy == 0) &&
 		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 
 			if ((m->queue - m->pc) == PQ_CACHE) {
 				vm_page_deactivate(m);
 			}
 			vm_page_busy(m);
 			mpte = pmap_enter_quick(pmap, addr, m, mpte);
 			vm_page_flag_set(m, PG_MAPPED);
 			vm_page_wakeup(m);
 		}
 	}
 }
 
 /*
  *	Routine:	pmap_change_wiring
  *	Function:	Change the wiring attribute for a map/virtual-address
  *			pair.
  *	In/out conditions:
  *			The mapping must already exist in the pmap.
  */
 void
 pmap_change_wiring(pmap, va, wired)
 	register pmap_t pmap;
 	vm_offset_t va;
 	boolean_t wired;
 {
 	register unsigned *pte;
 
 	if (pmap == NULL)
 		return;
 
 	pte = pmap_pte(pmap, va);
 
 	if (wired && !pmap_pte_w(pte))
 		pmap->pm_stats.wired_count++;
 	else if (!wired && pmap_pte_w(pte))
 		pmap->pm_stats.wired_count--;
 
 	/*
 	 * Wiring is not a hardware characteristic so there is no need to
 	 * invalidate TLB.
 	 */
 	pmap_pte_set_w(pte, wired);
 }
 
 
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
 	pmap_t dst_pmap, src_pmap;
 	vm_offset_t dst_addr;
 	vm_size_t len;
 	vm_offset_t src_addr;
 {
 	vm_offset_t addr;
 	vm_offset_t end_addr = src_addr + len;
 	vm_offset_t pdnxt;
 	unsigned src_frame, dst_frame;
 	vm_page_t m;
 
 	if (dst_addr != src_addr)
 		return;
 
 	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
 	if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) {
 		return;
 	}
 
 	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
 	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
 		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
 #if defined(SMP)
 		/* The page directory is not shared between CPUs */
 		cpu_invltlb();
 #else
 		invltlb();
 #endif
 	}
 
 	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
 		unsigned *src_pte, *dst_pte;
 		vm_page_t dstmpte, srcmpte;
 		vm_offset_t srcptepaddr;
 		unsigned ptepindex;
 
 		if (addr >= UPT_MIN_ADDRESS)
 			panic("pmap_copy: invalid to pmap_copy page tables\n");
 
 		/*
 		 * Don't let optional prefaulting of pages make us go
 		 * way below the low water mark of free pages or way
 		 * above high water mark of used pv entries.
 		 */
 		if (cnt.v_free_count < cnt.v_free_reserved ||
 		    pv_entry_count > pv_entry_high_water)
 			break;
 		
 		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
 		ptepindex = addr >> PDRSHIFT;
 
 		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
 		if (srcptepaddr == 0)
 			continue;
 			
 		if (srcptepaddr & PG_PS) {
 			if (dst_pmap->pm_pdir[ptepindex] == 0) {
 				dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr;
 				dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
 			}
 			continue;
 		}
 
 		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
 		if ((srcmpte == NULL) ||
 			(srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
 			continue;
 
 		if (pdnxt > end_addr)
 			pdnxt = end_addr;
 
 		src_pte = (unsigned *) vtopte(addr);
 		dst_pte = (unsigned *) avtopte(addr);
 		while (addr < pdnxt) {
 			unsigned ptetemp;
 			ptetemp = *src_pte;
 			/*
 			 * we only virtual copy managed pages
 			 */
 			if ((ptetemp & PG_MANAGED) != 0) {
 				/*
 				 * We have to check after allocpte for the
 				 * pte still being around...  allocpte can
 				 * block.
 				 */
 				dstmpte = pmap_allocpte(dst_pmap, addr);
 				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
 					/*
 					 * Clear the modified and
 					 * accessed (referenced) bits
 					 * during the copy.
 					 */
 					m = PHYS_TO_VM_PAGE(ptetemp);
 					*dst_pte = ptetemp & ~(PG_M | PG_A);
 					dst_pmap->pm_stats.resident_count++;
 					pmap_insert_entry(dst_pmap, addr,
 						dstmpte, m);
 	 			} else {
 					pmap_unwire_pte_hold(dst_pmap, dstmpte);
 				}
 				if (dstmpte->hold_count >= srcmpte->hold_count)
 					break;
 			}
 			addr += PAGE_SIZE;
 			src_pte++;
 			dst_pte++;
 		}
 	}
 }	
 
 /*
  *	Routine:	pmap_kernel
  *	Function:
  *		Returns the physical map handle for the kernel.
  */
 pmap_t
 pmap_kernel()
 {
 	return (kernel_pmap);
 }
 
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  */
 void
 pmap_zero_page(phys)
 	vm_offset_t phys;
 {
 
 	if (*(int *) CMAP2)
 		panic("pmap_zero_page: CMAP2 busy");
 
 	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
 	invltlb_1pg((vm_offset_t)CADDR2);
 
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686)
 		i686_pagezero(CADDR2);
 	else
 #endif
 		bzero(CADDR2, PAGE_SIZE);
 	*(int *) CMAP2 = 0;
 }
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  *
  *	off and size may not cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(phys, off, size)
 	vm_offset_t phys;
 	int off;
 	int size;
 {
 
 	if (*(int *) CMAP2)
 		panic("pmap_zero_page: CMAP2 busy");
 
 	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
 	invltlb_1pg((vm_offset_t)CADDR2);
 
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
 		i686_pagezero(CADDR2);
 	else
 #endif
 		bzero((char *)CADDR2 + off, size);
 	*(int *) CMAP2 = 0;
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(src, dst)
 	vm_offset_t src;
 	vm_offset_t dst;
 {
 
 	if (*(int *) CMAP1)
 		panic("pmap_copy_page: CMAP1 busy");
 	if (*(int *) CMAP2)
 		panic("pmap_copy_page: CMAP2 busy");
 
 	*(int *) CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
 	*(int *) CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
 #ifdef I386_CPU
 	invltlb();
 #else
 	invlpg((u_int)CADDR1);
 	invlpg((u_int)CADDR2);
 #endif
 
 	bcopy(CADDR1, CADDR2, PAGE_SIZE);
 
 	*(int *) CMAP1 = 0;
 	*(int *) CMAP2 = 0;
 }
 
 
 /*
  *	Routine:	pmap_pageable
  *	Function:
  *		Make the specified pages (by pmap, offset)
  *		pageable (or not) as requested.
  *
  *		A page which is not pageable may not take
  *		a fault; therefore, its page table entry
  *		must remain valid for the duration.
  *
  *		This routine is merely advisory; pmap_enter
  *		will specify that these pages are to be wired
  *		down (or not) as appropriate.
  */
 void
 pmap_pageable(pmap, sva, eva, pageable)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 	boolean_t pageable;
 {
 }
 
 /*
  * this routine returns true if a physical page resides
  * in the given pmap.
  */
 boolean_t
 pmap_page_exists(pmap, m)
 	pmap_t pmap;
 	vm_page_t m;
 {
 	register pv_entry_t pv;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	s = splvm();
 
 	/*
 	 * Not found, check current mappings returning immediately if found.
 	 */
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 		if (pv->pv_pmap == pmap) {
 			splx(s);
 			return TRUE;
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
 #define PMAP_REMOVE_PAGES_CURPROC_ONLY
 /*
  * Remove all pages from specified address space
  * this aids process exit speeds.  Also, this code
  * is special cased for current process only, but
  * can have the more generic (and slightly slower)
  * mode enabled.  This is much faster than pmap_remove
  * in the case of running down an entire address space.
  */
 void
 pmap_remove_pages(pmap, sva, eva)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 {
 	unsigned *pte, tpte;
 	pv_entry_t pv, npv;
 	int s;
 	vm_page_t m;
 
 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) {
 		printf("warning: pmap_remove_pages called with non-current pmap\n");
 		return;
 	}
 #endif
 
 	s = splvm();
 	for(pv = TAILQ_FIRST(&pmap->pm_pvlist);
 		pv;
 		pv = npv) {
 
 		if (pv->pv_va >= eva || pv->pv_va < sva) {
 			npv = TAILQ_NEXT(pv, pv_plist);
 			continue;
 		}
 
 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 		pte = (unsigned *)vtopte(pv->pv_va);
 #else
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 #endif
 		tpte = *pte;
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 		if (tpte & PG_W) {
 			npv = TAILQ_NEXT(pv, pv_plist);
 			continue;
 		}
 		*pte = 0;
 
 		m = PHYS_TO_VM_PAGE(tpte);
 
 		KASSERT(m < &vm_page_array[vm_page_array_size],
 			("pmap_remove_pages: bad tpte %x", tpte));
 
 		pv->pv_pmap->pm_stats.resident_count--;
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (tpte & PG_M) {
 			vm_page_dirty(m);
 		}
 
 
 		npv = TAILQ_NEXT(pv, pv_plist);
 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 
 		m->md.pv_list_count--;
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
 			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 		}
 
 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 		free_pv_entry(pv);
 	}
 	splx(s);
 	pmap_TLB_invalidate_all(pmap);
 }
 
 /*
  * pmap_testbit tests bits in pte's
  * note that the testbit/changebit routines are inline,
  * and a lot of things compile-time evaluate.
  */
 static boolean_t
 pmap_testbit(m, bit)
 	vm_page_t m;
 	int bit;
 {
 	pv_entry_t pv;
 	unsigned *pte;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 		return FALSE;
 
 	s = splvm();
 
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 
 		/*
 		 * if the bit being tested is the modified bit, then
 		 * mark clean_map and ptes as never
 		 * modified.
 		 */
 		if (bit & (PG_A|PG_M)) {
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 		}
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (!pv->pv_pmap) {
 			printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
 			continue;
 		}
 #endif
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 		if (*pte & bit) {
 			splx(s);
 			return TRUE;
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
 /*
  * this routine is used to modify bits in ptes
  */
 static __inline void
 pmap_changebit(m, bit, setem)
 	vm_page_t m;
 	int bit;
 	boolean_t setem;
 {
 	register pv_entry_t pv;
 	register unsigned *pte;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return;
 
 	s = splvm();
 
 	/*
 	 * Loop over all current mappings setting/clearing as appropos If
 	 * setting RO do we need to clear the VAC?
 	 */
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 
 		/*
 		 * don't write protect pager mappings
 		 */
 		if (!setem && (bit == PG_RW)) {
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 		}
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (!pv->pv_pmap) {
 			printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
 			continue;
 		}
 #endif
 
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 
 		if (setem) {
 			*(int *)pte |= bit;
 			pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 		} else {
 			vm_offset_t pbits = *(vm_offset_t *)pte;
 			if (pbits & bit) {
 				if (bit == PG_RW) {
 					if (pbits & PG_M) {
 						vm_page_dirty(m);
 					}
 					*(int *)pte = pbits & ~(PG_M|PG_RW);
 				} else {
 					*(int *)pte = pbits & ~bit;
 				}
 				pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 			}
 		}
 	}
 	splx(s);
 }
 
 /*
  *      pmap_page_protect:
  *
  *      Lower the permission for all mappings to a given page.
  */
 void
 pmap_page_protect(vm_page_t m, vm_prot_t prot)
 {
 	if ((prot & VM_PROT_WRITE) == 0) {
 		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
 			pmap_changebit(m, PG_RW, FALSE);
 		} else {
 			pmap_remove_all(m);
 		}
 	}
 }
 
 vm_offset_t
 pmap_phys_address(ppn)
 	int ppn;
 {
 	return (i386_ptob(ppn));
 }
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return the count of reference bits for a page, clearing all of them.
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	register pv_entry_t pv, pvf, pvn;
 	unsigned *pte;
 	int s;
 	int rtval = 0;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return (rtval);
 
 	s = splvm();
 
 	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 
 		pvf = pv;
 
 		do {
 			pvn = TAILQ_NEXT(pv, pv_list);
 
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 
 			pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 
 			if (pte && (*pte & PG_A)) {
 				*pte &= ~PG_A;
 
 				pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 
 				rtval++;
 				if (rtval > 4) {
 					break;
 				}
 			}
 		} while ((pv = pvn) != NULL && pv != pvf);
 	}
 	splx(s);
 
 	return (rtval);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 	return pmap_testbit(m, PG_M);
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	pmap_changebit(m, PG_M, FALSE);
 }
 
 /*
  *	pmap_clear_reference:
  *
  *	Clear the reference bit on the specified physical page.
  */
 void
 pmap_clear_reference(vm_page_t m)
 {
 	pmap_changebit(m, PG_A, FALSE);
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 static void
 i386_protection_init()
 {
 	register int *kp, prot;
 
 	kp = protection_codes;
 	for (prot = 0; prot < 8; prot++) {
 		switch (prot) {
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 			/*
 			 * Read access is also 0. There isn't any execute bit,
 			 * so just make it readable.
 			 */
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 			*kp++ = 0;
 			break;
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 			*kp++ = PG_RW;
 			break;
 		}
 	}
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory.
  */
 void *
 pmap_mapdev(pa, size)
 	vm_offset_t pa;
 	vm_size_t size;
 {
 	vm_offset_t va, tmpva, offset;
 	unsigned *pte;
 
 	offset = pa & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 
 	va = kmem_alloc_pageable(kernel_map, size);
 	if (!va)
 		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 
 	pa = pa & PG_FRAME;
 	for (tmpva = va; size > 0;) {
 		pte = (unsigned *)vtopte(tmpva);
 		*pte = pa | PG_RW | PG_V | pgeflag;
 		size -= PAGE_SIZE;
 		tmpva += PAGE_SIZE;
 		pa += PAGE_SIZE;
 	}
 	invltlb();
 
 	return ((void *)(va + offset));
 }
 
 void
 pmap_unmapdev(va, size)
 	vm_offset_t va;
 	vm_size_t size;
 {
 	vm_offset_t base, offset;
 
 	base = va & PG_FRAME;
 	offset = va & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 	kmem_free(kernel_map, base, size);
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap, addr)
 	pmap_t pmap;
 	vm_offset_t addr;
 {
 	
 	unsigned *ptep, pte;
 	vm_page_t m;
 	int val = 0;
 	
 	ptep = pmap_pte(pmap, addr);
 	if (ptep == 0) {
 		return 0;
 	}
 
 	if ((pte = *ptep) != 0) {
 		vm_offset_t pa;
 
 		val = MINCORE_INCORE;
 		if ((pte & PG_MANAGED) == 0)
 			return val;
 
 		pa = pte & PG_FRAME;
 
 		m = PHYS_TO_VM_PAGE(pa);
 
 		/*
 		 * Modified by us
 		 */
 		if (pte & PG_M)
 			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 		/*
 		 * Modified by someone
 		 */
 		else if (m->dirty || pmap_is_modified(m))
 			val |= MINCORE_MODIFIED_OTHER;
 		/*
 		 * Referenced by us
 		 */
 		if (pte & PG_A)
 			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 
 		/*
 		 * Referenced by someone
 		 */
 		else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
 			val |= MINCORE_REFERENCED_OTHER;
 			vm_page_flag_set(m, PG_REFERENCED);
 		}
 	} 
 	return val;
 }
 
 void
 pmap_activate(struct proc *p)
 {
 	pmap_t	pmap;
 
 	pmap = vmspace_pmap(p->p_vmspace);
 #if defined(SMP)
 	pmap->pm_active |= 1 << PCPU_GET(cpuid);
 #else
 	pmap->pm_active |= 1;
 #endif
 #if defined(SWTCH_OPTIM_STATS)
 	tlb_flush_count++;
 #endif
 	load_cr3(p->p_addr->u_pcb.pcb_cr3 = vtophys(pmap->pm_pdir));
 }
 
 vm_offset_t
 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
 {
 
 	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
 		return addr;
 	}
 
 	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 	return addr;
 }
 
 
 #if defined(PMAP_DEBUG)
 pmap_pid_dump(int pid)
 {
 	pmap_t pmap;
 	struct proc *p;
 	int npte = 0;
 	int index;
 	ALLPROC_LOCK(AP_SHARED);
 	LIST_FOREACH(p, &allproc, p_list) {
 		if (p->p_pid != pid)
 			continue;
 
 		if (p->p_vmspace) {
 			int i,j;
 			index = 0;
 			pmap = vmspace_pmap(p->p_vmspace);
 			for(i=0;i<1024;i++) {
 				pd_entry_t *pde;
 				unsigned *pte;
 				unsigned base = i << PDRSHIFT;
 				
 				pde = &pmap->pm_pdir[i];
 				if (pde && pmap_pde_v(pde)) {
 					for(j=0;j<1024;j++) {
 						unsigned va = base + (j << PAGE_SHIFT);
 						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 							if (index) {
 								index = 0;
 								printf("\n");
 							}
 							ALLPROC_LOCK(AP_RELEASE);
 							return npte;
 						}
 						pte = pmap_pte_quick( pmap, va);
 						if (pte && pmap_pte_v(pte)) {
 							vm_offset_t pa;
 							vm_page_t m;
 							pa = *(int *)pte;
 							m = PHYS_TO_VM_PAGE(pa);
 							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 								va, pa, m->hold_count, m->wire_count, m->flags);
 							npte++;
 							index++;
 							if (index >= 2) {
 								index = 0;
 								printf("\n");
 							} else {
 								printf(" ");
 							}
 						}
 					}
 				}
 			}
 		}
 	}
 	ALLPROC_LOCK(AP_RELEASE);
 	return npte;
 }
 #endif
 
 #if defined(DEBUG)
 
 static void	pads __P((pmap_t pm));
 void		pmap_pvdump __P((vm_offset_t pa));
 
 /* print address space of pmap*/
 static void
 pads(pm)
 	pmap_t pm;
 {
 	unsigned va, i, j;
 	unsigned *ptep;
 
 	if (pm == kernel_pmap)
 		return;
 	for (i = 0; i < 1024; i++)
 		if (pm->pm_pdir[i])
 			for (j = 0; j < 1024; j++) {
 				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 				if (pm == kernel_pmap && va < KERNBASE)
 					continue;
 				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 					continue;
 				ptep = pmap_pte_quick(pm, va);
 				if (pmap_pte_v(ptep))
 					printf("%x:%x ", va, *(int *) ptep);
 			};
 
 }
 
 void
 pmap_pvdump(pa)
 	vm_offset_t pa;
 {
 	register pv_entry_t pv;
 	vm_page_t m;
 
 	printf("pa %x", pa);
 	m = PHYS_TO_VM_PAGE(pa);
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 #ifdef used_to_be
 		printf(" -> pmap %p, va %x, flags %x",
 		    (void *)pv->pv_pmap, pv->pv_va, pv->pv_flags);
 #endif
 		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
 		pads(pv->pv_pmap);
 	}
 	printf(" ");
 }
 #endif
Index: head/sys/i386/i386/pmap.c
===================================================================
--- head/sys/i386/i386/pmap.c	(revision 71349)
+++ head/sys/i386/i386/pmap.c	(revision 71350)
@@ -1,3360 +1,3360 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  * $FreeBSD$
  */
 
 /*
  *	Manages physical address maps.
  *
  *	In addition to hardware address maps, this
  *	module is called upon to provide software-use-only
  *	maps which may or may not be stored in the same
  *	form as hardware maps.  These pseudo-maps are
  *	used to store intermediate results from copy
  *	operations to and from address spaces.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include "opt_disable_pse.h"
 #include "opt_pmap.h"
 #include "opt_msgbuf.h"
 #include "opt_user_ldt.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/msgbuf.h>
 #include <sys/vmmeter.h>
 #include <sys/mman.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_zone.h>
 
 #include <sys/user.h>
 
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #if defined(SMP) || defined(APIC_IO)
 #include <machine/smp.h>
 #include <machine/apic.h>
 #include <machine/segments.h>
 #include <machine/tss.h>
 #include <machine/globaldata.h>
 #endif /* SMP || APIC_IO */
 
 #define PMAP_KEEP_PDIRS
 #ifndef PMAP_SHPGPERPROC
 #define PMAP_SHPGPERPROC 200
 #endif
 
 #if defined(DIAGNOSTIC)
 #define PMAP_DIAGNOSTIC
 #endif
 
 #define MINPV 2048
 
 #if !defined(PMAP_DIAGNOSTIC)
 #define PMAP_INLINE __inline
 #else
 #define PMAP_INLINE
 #endif
 
 /*
  * Get PDEs and PTEs for user/kernel address space
  */
 #define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
 
 #define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
 #define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
 #define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
 #define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
 #define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
 
 #define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
 
 /*
  * Given a map and a machine independent protection code,
  * convert to a vax protection code.
  */
 #define pte_prot(m, p)	(protection_codes[p])
 static int protection_codes[8];
 
 static struct pmap kernel_pmap_store;
 pmap_t kernel_pmap;
 LIST_HEAD(pmaplist, pmap);
 struct pmaplist allpmaps;
 
 vm_offset_t avail_start;	/* PA of first available physical page */
 vm_offset_t avail_end;		/* PA of last available physical page */
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
 static int pgeflag;		/* PG_G or-in */
 static int pseflag;		/* PG_PS or-in */
 
 static vm_object_t kptobj;
 
 static int nkpt;
 vm_offset_t kernel_vm_end;
 
 /*
  * Data for the pv entry allocation mechanism
  */
 static vm_zone_t pvzone;
 static struct vm_zone pvzone_store;
 static struct vm_object pvzone_obj;
 static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
 static int pmap_pagedaemon_waken = 0;
 static struct pv_entry *pvinit;
 
 /*
  * All those kernel PT submaps that BSD is so fond of
  */
 pt_entry_t *CMAP1 = 0;
 static pt_entry_t *CMAP2, *ptmmap;
 caddr_t CADDR1 = 0, ptvmmap = 0;
 static caddr_t CADDR2;
 static pt_entry_t *msgbufmap;
 struct msgbuf *msgbufp=0;
 
 /*
  * Crashdump maps.
  */
 static pt_entry_t *pt_crashdumpmap;
 static caddr_t crashdumpmap;
 
 #ifdef SMP
 extern pt_entry_t *SMPpt;
 #endif
 static pt_entry_t *PMAP1 = 0;
 static unsigned *PADDR1 = 0;
 
 static PMAP_INLINE void	free_pv_entry __P((pv_entry_t pv));
 static unsigned * get_ptbase __P((pmap_t pmap));
 static pv_entry_t get_pv_entry __P((void));
 static void	i386_protection_init __P((void));
 static __inline void	pmap_changebit __P((vm_page_t m, int bit, boolean_t setem));
 
 static void	pmap_remove_all __P((vm_page_t m));
 static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
 				      vm_page_t m, vm_page_t mpte));
 static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq,
 					vm_offset_t sva));
 static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
 static int pmap_remove_entry __P((struct pmap *pmap, vm_page_t m,
 					vm_offset_t va));
 static boolean_t pmap_testbit __P((vm_page_t m, int bit));
 static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
 		vm_page_t mpte, vm_page_t m));
 
 static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
 
 static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
 static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex));
 static unsigned * pmap_pte_quick __P((pmap_t pmap, vm_offset_t va));
 static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex));
 static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
 
 static unsigned pdir4mb;
 
 /*
  *	Routine:	pmap_pte
  *	Function:
  *		Extract the page table entry associated
  *		with the given map/virtual_address pair.
  */
 
 PMAP_INLINE unsigned *
 pmap_pte(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	unsigned *pdeaddr;
 
 	if (pmap) {
 		pdeaddr = (unsigned *) pmap_pde(pmap, va);
 		if (*pdeaddr & PG_PS)
 			return pdeaddr;
 		if (*pdeaddr) {
 			return get_ptbase(pmap) + i386_btop(va);
 		}
 	}
 	return (0);
 }
 
 /*
  * Move the kernel virtual free pointer to the next
  * 4MB.  This is used to help improve performance
  * by using a large (4MB) page for much of the kernel
  * (.text, .data, .bss)
  */
 static vm_offset_t
 pmap_kmem_choose(vm_offset_t addr)
 {
 	vm_offset_t newaddr = addr;
 #ifndef DISABLE_PSE
 	if (cpu_feature & CPUID_PSE) {
 		newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 	}
 #endif
 	return newaddr;
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  *
  *	On the i386 this is called after mapping has already been enabled
  *	and just syncs the pmap module with what has already been done.
  *	[We can't call it easily with mapping off since the kernel is not
  *	mapped with PA == VA, hence we would have to relocate every address
  *	from the linked base (virtual) address "KERNBASE" to the actual
  *	(physical) address starting relative to 0]
  */
 void
 pmap_bootstrap(firstaddr, loadaddr)
 	vm_offset_t firstaddr;
 	vm_offset_t loadaddr;
 {
 	vm_offset_t va;
 	pt_entry_t *pte;
 
 	avail_start = firstaddr;
 
 	/*
 	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
 	 * large. It should instead be correctly calculated in locore.s and
 	 * not based on 'first' (which is a physical address, not a virtual
 	 * address, for the start of unused physical memory). The kernel
 	 * page tables are NOT double mapped and thus should not be included
 	 * in this calculation.
 	 */
 	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
 	virtual_avail = pmap_kmem_choose(virtual_avail);
 
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
 	/*
 	 * Initialize protection array.
 	 */
 	i386_protection_init();
 
 	/*
 	 * The kernel's pmap is statically allocated so we don't have to use
 	 * pmap_create, which is unlikely to work correctly at this part of
 	 * the boot sequence (XXX and which no longer exists).
 	 */
 	kernel_pmap = &kernel_pmap_store;
 
 	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
 	kernel_pmap->pm_count = 1;
 	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvlist);
 	LIST_INIT(&allpmaps);
 	LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
 	nkpt = NKPT;
 
 	/*
 	 * Reserve some special page table entries/VA space for temporary
 	 * mapping of pages.
 	 */
 #define	SYSMAP(c, p, v, n)	\
 	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
 
 	va = virtual_avail;
 	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
 
 	/*
 	 * CMAP1/CMAP2 are used for zeroing and copying pages.
 	 */
 	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
 	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
 
 	/*
 	 * Crashdump maps.
 	 */
 	SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS);
 
 	/*
 	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
 	 * XXX ptmmap is not used.
 	 */
 	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
 
 	/*
 	 * msgbufp is used to map the system message buffer.
 	 * XXX msgbufmap is not used.
 	 */
 	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
 	       atop(round_page(MSGBUF_SIZE)))
 
 	/*
 	 * ptemap is used for pmap_pte_quick
 	 */
 	SYSMAP(unsigned *, PMAP1, PADDR1, 1);
 
 	virtual_avail = va;
 
 	*(int *) CMAP1 = *(int *) CMAP2 = 0;
 	*(int *) PTD = 0;
 
 
 	pgeflag = 0;
 #if !defined(SMP)			/* XXX - see also mp_machdep.c */
 	if (cpu_feature & CPUID_PGE) {
 		pgeflag = PG_G;
 	}
 #endif
 	
 /*
  * Initialize the 4MB page size flag
  */
 	pseflag = 0;
 /*
  * The 4MB page version of the initial
  * kernel page mapping.
  */
 	pdir4mb = 0;
 
 #if !defined(DISABLE_PSE)
 	if (cpu_feature & CPUID_PSE) {
 		unsigned ptditmp;
 		/*
 		 * Note that we have enabled PSE mode
 		 */
 		pseflag = PG_PS;
 		ptditmp = *((unsigned *)PTmap + i386_btop(KERNBASE));
 		ptditmp &= ~(NBPDR - 1);
 		ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
 		pdir4mb = ptditmp;
 
 #if !defined(SMP)
 		/*
 		 * Enable the PSE mode.
 		 */
 		load_cr4(rcr4() | CR4_PSE);
 
 		/*
 		 * We can do the mapping here for the single processor
 		 * case.  We simply ignore the old page table page from
 		 * now on.
 		 */
 		/*
 		 * For SMP, we still need 4K pages to bootstrap APs,
 		 * PSE will be enabled as soon as all APs are up.
 		 */
 		PTD[KPTDI] = (pd_entry_t) ptditmp;
 		kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
 		invltlb();
 #endif
 	}
 #endif
 
 #ifdef SMP
 	if (cpu_apic_address == 0)
 		panic("pmap_bootstrap: no local apic! (non-SMP hardware?)");
 
 	/* local apic is mapped on last page */
 	SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
 	    (cpu_apic_address & PG_FRAME));
 #endif
 
 	invltlb();
 }
 
 #ifdef SMP
 /*
  * Set 4mb pdir for mp startup
  */
 void
 pmap_set_opt(void)
 {
 	if (pseflag && (cpu_feature & CPUID_PSE)) {
 		load_cr4(rcr4() | CR4_PSE);
 		if (pdir4mb && PCPU_GET(cpuid) == 0) {	/* only on BSP */
 			kernel_pmap->pm_pdir[KPTDI] =
 			    PTD[KPTDI] = (pd_entry_t)pdir4mb;
 			cpu_invltlb();
 		}
 	}
 }
 #endif
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  *	pmap_init has been enhanced to support in a fairly consistant
  *	way, discontiguous physical memory.
  */
 void
 pmap_init(phys_start, phys_end)
 	vm_offset_t phys_start, phys_end;
 {
 	int i;
 	int initial_pvs;
 
 	/*
 	 * object for kernel page table pages
 	 */
 	kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE);
 
 	/*
 	 * Allocate memory for random pmap data structures.  Includes the
 	 * pv_head_table.
 	 */
 
 	for(i = 0; i < vm_page_array_size; i++) {
 		vm_page_t m;
 
 		m = &vm_page_array[i];
 		TAILQ_INIT(&m->md.pv_list);
 		m->md.pv_list_count = 0;
 	}
 
 	/*
 	 * init the pv free list
 	 */
 	initial_pvs = vm_page_array_size;
 	if (initial_pvs < MINPV)
 		initial_pvs = MINPV;
 	pvzone = &pvzone_store;
 	pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
 		initial_pvs * sizeof (struct pv_entry));
 	zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit,
 	    vm_page_array_size);
 
 	/*
 	 * Now it is safe to enable pv_table recording.
 	 */
 	pmap_initialized = TRUE;
 }
 
 /*
  * Initialize the address space (zone) for the pv_entries.  Set a
  * high water mark so that the system can recover from excessive
  * numbers of pv entries.
  */
 void
 pmap_init2()
 {
 	pv_entry_max = PMAP_SHPGPERPROC * maxproc + vm_page_array_size;
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
 }
 
 
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
 
 #if defined(PMAP_DIAGNOSTIC)
 
 /*
  * This code checks for non-writeable/modified pages.
  * This should be an invalid condition.
  */
 static int
 pmap_nw_modified(pt_entry_t ptea)
 {
 	int pte;
 
 	pte = (int) ptea;
 
 	if ((pte & (PG_M|PG_RW)) == PG_M)
 		return 1;
 	else
 		return 0;
 }
 #endif
 
 
 /*
  * this routine defines the region(s) of memory that should
  * not be tested for the modified bit.
  */
 static PMAP_INLINE int
 pmap_track_modified(vm_offset_t va)
 {
 	if ((va < clean_sva) || (va >= clean_eva)) 
 		return 1;
 	else
 		return 0;
 }
 
 static PMAP_INLINE void
 invltlb_1pg(vm_offset_t va)
 {
 #ifdef I386_CPU
 	invltlb();
 #else
 	invlpg(va);
 #endif
 }
 
 static __inline void
 pmap_TLB_invalidate(pmap_t pmap, vm_offset_t va)
 {
 #if defined(SMP)
 	if (pmap->pm_active & (1 << PCPU_GET(cpuid)))
 		cpu_invlpg((void *)va);
 	if (pmap->pm_active & PCPU_GET(other_cpus))
 		smp_invltlb();
 #else
 	if (pmap->pm_active)
 		invltlb_1pg(va);
 #endif
 }
 
 static __inline void
 pmap_TLB_invalidate_all(pmap_t pmap)
 {
 #if defined(SMP)
 	if (pmap->pm_active & (1 << PCPU_GET(cpuid)))
 		cpu_invltlb();
 	if (pmap->pm_active & PCPU_GET(other_cpus))
 		smp_invltlb();
 #else
 	if (pmap->pm_active)
 		invltlb();
 #endif
 }
 
 static unsigned *
 get_ptbase(pmap)
 	pmap_t pmap;
 {
 	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 
 	/* are we current address space or kernel? */
 	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
 		return (unsigned *) PTmap;
 	}
 	/* otherwise, we are alternate address space */
 	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
 		APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
 #if defined(SMP)
 		/* The page directory is not shared between CPUs */
 		cpu_invltlb();
 #else
 		invltlb();
 #endif
 	}
 	return (unsigned *) APTmap;
 }
 
 /*
  * Super fast pmap_pte routine best used when scanning
  * the pv lists.  This eliminates many coarse-grained
  * invltlb calls.  Note that many of the pv list
  * scans are across different pmaps.  It is very wasteful
  * to do an entire invltlb for checking a single mapping.
  */
 
 static unsigned * 
 pmap_pte_quick(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	unsigned pde, newpf;
 	if ((pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) != 0) {
 		unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 		unsigned index = i386_btop(va);
 		/* are we current address space or kernel? */
 		if ((pmap == kernel_pmap) ||
 			(frame == (((unsigned) PTDpde) & PG_FRAME))) {
 			return (unsigned *) PTmap + index;
 		}
 		newpf = pde & PG_FRAME;
 		if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
 			* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
 			invltlb_1pg((vm_offset_t) PADDR1);
 		}
 		return PADDR1 + ((unsigned) index & (NPTEPG - 1));
 	}
 	return (0);
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_offset_t 
 pmap_extract(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	vm_offset_t rtval;
 	vm_offset_t pdirindex;
 	pdirindex = va >> PDRSHIFT;
 	if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
 		unsigned *pte;
 		if ((rtval & PG_PS) != 0) {
 			rtval &= ~(NBPDR - 1);
 			rtval |= va & (NBPDR - 1);
 			return rtval;
 		}
 		pte = get_ptbase(pmap) + i386_btop(va);
 		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
 		return rtval;
 	}
 	return 0;
 
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 /*
  * add a wired page to the kva
  * note that in order for the mapping to take effect -- you
  * should do a invltlb after doing the pmap_kenter...
  */
 PMAP_INLINE void 
 pmap_kenter(va, pa)
 	vm_offset_t va;
 	register vm_offset_t pa;
 {
 	register unsigned *pte;
 	unsigned npte, opte;
 
 	npte = pa | PG_RW | PG_V | pgeflag;
 	pte = (unsigned *)vtopte(va);
 	opte = *pte;
 	*pte = npte;
 	/*if (opte)*/
 		invltlb_1pg(va);	/* XXX what about SMP? */
 }
 
 /*
  * remove a page from the kernel pagetables
  */
 PMAP_INLINE void
 pmap_kremove(va)
 	vm_offset_t va;
 {
 	register unsigned *pte;
 
 	pte = (unsigned *)vtopte(va);
 	*pte = 0;
 	invltlb_1pg(va);	/* XXX what about SMP? */
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	For now, VM is already on, we only need to map the
  *	specified memory.
  */
 vm_offset_t
 pmap_map(virt, start, end, prot)
 	vm_offset_t virt;
 	vm_offset_t start;
 	vm_offset_t end;
 	int prot;
 {
 	while (start < end) {
 		pmap_kenter(virt, start);
 		virt += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
 	return (virt);
 }
 
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  */
 void
 pmap_qenter(va, m, count)
 	vm_offset_t va;
 	vm_page_t *m;
 	int count;
 {
 	int i;
 
 	for (i = 0; i < count; i++) {
 		vm_offset_t tva = va + i * PAGE_SIZE;
 		pmap_kenter(tva, VM_PAGE_TO_PHYS(m[i]));
 	}
 }
 
 /*
  * this routine jerks page mappings from the
  * kernel -- it is meant only for temporary mappings.
  */
 void
 pmap_qremove(va, count)
 	vm_offset_t va;
 	int count;
 {
 	vm_offset_t end_va;
 
 	end_va = va + count*PAGE_SIZE;
 
 	while (va < end_va) {
 		unsigned *pte;
 
 		pte = (unsigned *)vtopte(va);
 		*pte = 0;
 #ifdef SMP
 		cpu_invlpg((void *)va);
 #else
 		invltlb_1pg(va);
 #endif
 		va += PAGE_SIZE;
 	}
 #ifdef SMP
 	smp_invltlb();
 #endif
 }
 
 static vm_page_t
 pmap_page_lookup(object, pindex)
 	vm_object_t object;
 	vm_pindex_t pindex;
 {
 	vm_page_t m;
 retry:
 	m = vm_page_lookup(object, pindex);
 	if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
 		goto retry;
 	return m;
 }
 
 /*
  * Create the UPAGES for a new process.
  * This routine directly affects the fork perf for a process.
  */
 void
 pmap_new_proc(p)
 	struct proc *p;
 {
 #ifdef I386_CPU
 	int updateneeded;
 #endif
 	int i;
 	vm_object_t upobj;
 	vm_page_t m;
 	struct user *up;
 	unsigned *ptek, oldpte;
 
 	/*
 	 * allocate object for the upages
 	 */
 	if ((upobj = p->p_upages_obj) == NULL) {
 		upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES);
 		p->p_upages_obj = upobj;
 	}
 
 	/* get a kernel virtual address for the UPAGES for this proc */
 	if ((up = p->p_addr) == NULL) {
 		up = (struct user *) kmem_alloc_nofault(kernel_map,
 				UPAGES * PAGE_SIZE);
 		if (up == NULL)
 			panic("pmap_new_proc: u_map allocation failed");
 		p->p_addr = up;
 	}
 
 	ptek = (unsigned *) vtopte((vm_offset_t) up);
 
 #ifdef I386_CPU
 	updateneeded = 0;
 #endif
 	for(i=0;i<UPAGES;i++) {
 		/*
 		 * Get a kernel stack page
 		 */
 		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
 		/*
 		 * Wire the page
 		 */
 		m->wire_count++;
 		cnt.v_wire_count++;
 
 		oldpte = *(ptek + i);
 		/*
 		 * Enter the page into the kernel address space.
 		 */
 		*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
 		if (oldpte) {
 #ifdef I386_CPU
 			updateneeded = 1;
 #else
 			invlpg((vm_offset_t) up + i * PAGE_SIZE);
 #endif
 		}
 
 		vm_page_wakeup(m);
 		vm_page_flag_clear(m, PG_ZERO);
 		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
 		m->valid = VM_PAGE_BITS_ALL;
 	}
 #ifdef I386_CPU
 	if (updateneeded)
 		invltlb();
 #endif
 }
 
 /*
  * Dispose the UPAGES for a process that has exited.
  * This routine directly impacts the exit perf of a process.
  */
 void
 pmap_dispose_proc(p)
 	struct proc *p;
 {
 	int i;
 	vm_object_t upobj;
 	vm_page_t m;
 	unsigned *ptek, oldpte;
 
 	upobj = p->p_upages_obj;
 
 	ptek = (unsigned *) vtopte((vm_offset_t) p->p_addr);
 	for(i=0;i<UPAGES;i++) {
 
 		if ((m = vm_page_lookup(upobj, i)) == NULL)
 			panic("pmap_dispose_proc: upage already missing???");
 
 		vm_page_busy(m);
 
 		oldpte = *(ptek + i);
 		*(ptek + i) = 0;
 #ifndef I386_CPU
 		invlpg((vm_offset_t) p->p_addr + i * PAGE_SIZE);
 #endif
 		vm_page_unwire(m, 0);
 		vm_page_free(m);
 	}
 #ifdef I386_CPU
 	invltlb();
 #endif
 }
 
 /*
  * Allow the UPAGES for a process to be prejudicially paged out.
  */
 void
 pmap_swapout_proc(p)
 	struct proc *p;
 {
 	int i;
 	vm_object_t upobj;
 	vm_page_t m;
 
 	upobj = p->p_upages_obj;
 	/*
 	 * let the upages be paged
 	 */
 	for(i=0;i<UPAGES;i++) {
 		if ((m = vm_page_lookup(upobj, i)) == NULL)
 			panic("pmap_swapout_proc: upage already missing???");
 		vm_page_dirty(m);
 		vm_page_unwire(m, 0);
 		pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i);
 	}
 }
 
 /*
  * Bring the UPAGES for a specified process back in.
  */
 void
 pmap_swapin_proc(p)
 	struct proc *p;
 {
 	int i,rv;
 	vm_object_t upobj;
 	vm_page_t m;
 
 	upobj = p->p_upages_obj;
 	for(i=0;i<UPAGES;i++) {
 
 		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
 		pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
 			VM_PAGE_TO_PHYS(m));
 
 		if (m->valid != VM_PAGE_BITS_ALL) {
 			rv = vm_pager_get_pages(upobj, &m, 1, 0);
 			if (rv != VM_PAGER_OK)
 				panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
 			m = vm_page_lookup(upobj, i);
 			m->valid = VM_PAGE_BITS_ALL;
 		}
 
 		vm_page_wire(m);
 		vm_page_wakeup(m);
 		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
 	}
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 
 /*
  * This routine unholds page table pages, and if the hold count
  * drops to zero, then it decrements the wire count.
  */
 static int 
 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
 
 	while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
 		;
 
 	if (m->hold_count == 0) {
 		vm_offset_t pteva;
 		/*
 		 * unmap the page table page
 		 */
 		pmap->pm_pdir[m->pindex] = 0;
 		--pmap->pm_stats.resident_count;
 		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
 			(((unsigned) PTDpde) & PG_FRAME)) {
 			/*
 			 * Do a invltlb to make the invalidated mapping
 			 * take effect immediately.
 			 */
 			pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
 			pmap_TLB_invalidate(pmap, pteva);
 		}
 
 		if (pmap->pm_ptphint == m)
 			pmap->pm_ptphint = NULL;
 
 		/*
 		 * If the page is finally unwired, simply free it.
 		 */
 		--m->wire_count;
 		if (m->wire_count == 0) {
 
 			vm_page_flash(m);
 			vm_page_busy(m);
 			vm_page_free_zero(m);
 			--cnt.v_wire_count;
 		}
 		return 1;
 	}
 	return 0;
 }
 
 static PMAP_INLINE int
 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
 {
 	vm_page_unhold(m);
 	if (m->hold_count == 0)
 		return _pmap_unwire_pte_hold(pmap, m);
 	else
 		return 0;
 }
 
 /*
  * After removing a page table entry, this routine is used to
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
 pmap_unuse_pt(pmap, va, mpte)
 	pmap_t pmap;
 	vm_offset_t va;
 	vm_page_t mpte;
 {
 	unsigned ptepindex;
 	if (va >= UPT_MIN_ADDRESS)
 		return 0;
 
 	if (mpte == NULL) {
 		ptepindex = (va >> PDRSHIFT);
 		if (pmap->pm_ptphint &&
 			(pmap->pm_ptphint->pindex == ptepindex)) {
 			mpte = pmap->pm_ptphint;
 		} else {
 			mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 			pmap->pm_ptphint = mpte;
 		}
 	}
 
 	return pmap_unwire_pte_hold(pmap, mpte);
 }
 
 void
 pmap_pinit0(pmap)
 	struct pmap *pmap;
 {
 	pmap->pm_pdir =
 		(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 	pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD);
 	pmap->pm_count = 1;
 	pmap->pm_active = 0;
 	pmap->pm_ptphint = NULL;
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 void
 pmap_pinit(pmap)
 	register struct pmap *pmap;
 {
 	vm_page_t ptdpg;
 
 	/*
 	 * No need to allocate page table space yet but we do need a valid
 	 * page directory table.
 	 */
 	if (pmap->pm_pdir == NULL)
 		pmap->pm_pdir =
 			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 
 	/*
 	 * allocate object for the ptes
 	 */
 	if (pmap->pm_pteobj == NULL)
 		pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
 
 	/*
 	 * allocate the page directory page
 	 */
 	ptdpg = vm_page_grab( pmap->pm_pteobj, PTDPTDI,
 			VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
 	ptdpg->wire_count = 1;
 	++cnt.v_wire_count;
 
 
 	vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/
 	ptdpg->valid = VM_PAGE_BITS_ALL;
 
 	pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
 	if ((ptdpg->flags & PG_ZERO) == 0)
 		bzero(pmap->pm_pdir, PAGE_SIZE);
 
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 	/* Wire in kernel global address entries. */
 	/* XXX copies current process, does not fill in MPPTDI */
 	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
 #ifdef SMP
 	pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
 #endif
 
 	/* install self-referential address mapping entry */
 	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
 		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M;
 
 	pmap->pm_count = 1;
 	pmap->pm_active = 0;
 	pmap->pm_ptphint = NULL;
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 }
 
 /*
  * Wire in kernel global address entries.  To avoid a race condition
  * between pmap initialization and pmap_growkernel, this procedure
  * should be called after the vmspace is attached to the process
  * but before this pmap is activated.
  */
 void
 pmap_pinit2(pmap)
 	struct pmap *pmap;
 {
 	/* XXX: Remove this stub when no longer called */
 }
 
 static int
 pmap_release_free_page(pmap, p)
 	struct pmap *pmap;
 	vm_page_t p;
 {
 	unsigned *pde = (unsigned *) pmap->pm_pdir;
 	/*
 	 * This code optimizes the case of freeing non-busy
 	 * page-table pages.  Those pages are zero now, and
 	 * might as well be placed directly into the zero queue.
 	 */
 	if (vm_page_sleep_busy(p, FALSE, "pmaprl"))
 		return 0;
 
 	vm_page_busy(p);
 
 	/*
 	 * Remove the page table page from the processes address space.
 	 */
 	pde[p->pindex] = 0;
 	pmap->pm_stats.resident_count--;
 
 	if (p->hold_count)  {
 		panic("pmap_release: freeing held page table page");
 	}
 	/*
 	 * Page directory pages need to have the kernel
 	 * stuff cleared, so they can go into the zero queue also.
 	 */
 	if (p->pindex == PTDPTDI) {
 		bzero(pde + KPTDI, nkpt * PTESIZE);
 #ifdef SMP
 		pde[MPPTDI] = 0;
 #endif
 		pde[APTDPTDI] = 0;
 		pmap_kremove((vm_offset_t) pmap->pm_pdir);
 	}
 
 	if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
 		pmap->pm_ptphint = NULL;
 
 	p->wire_count--;
 	cnt.v_wire_count--;
 	vm_page_free_zero(p);
 	return 1;
 }
 
 /*
  * this routine is called if the page table page is not
  * mapped correctly.
  */
 static vm_page_t
 _pmap_allocpte(pmap, ptepindex)
 	pmap_t	pmap;
 	unsigned ptepindex;
 {
 	vm_offset_t pteva, ptepa;
 	vm_page_t m;
 
 	/*
 	 * Find or fabricate a new pagetable page
 	 */
 	m = vm_page_grab(pmap->pm_pteobj, ptepindex,
 			VM_ALLOC_ZERO | VM_ALLOC_RETRY);
 
 	KASSERT(m->queue == PQ_NONE,
 		("_pmap_allocpte: %p->queue != PQ_NONE", m));
 
 	if (m->wire_count == 0)
 		cnt.v_wire_count++;
 	m->wire_count++;
 
 	/*
 	 * Increment the hold count for the page table page
 	 * (denoting a new mapping.)
 	 */
 	m->hold_count++;
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	pmap->pm_stats.resident_count++;
 
 	ptepa = VM_PAGE_TO_PHYS(m);
 	pmap->pm_pdir[ptepindex] =
 		(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
 
 	/*
 	 * Set the page table hint
 	 */
 	pmap->pm_ptphint = m;
 
 	/*
 	 * Try to use the new mapping, but if we cannot, then
 	 * do it with the routine that maps the page explicitly.
 	 */
 	if ((m->flags & PG_ZERO) == 0) {
 		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
 			(((unsigned) PTDpde) & PG_FRAME)) {
 			pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
 			bzero((caddr_t) pteva, PAGE_SIZE);
 		} else {
 			pmap_zero_page(ptepa);
 		}
 	}
 
 	m->valid = VM_PAGE_BITS_ALL;
 	vm_page_flag_clear(m, PG_ZERO);
 	vm_page_flag_set(m, PG_MAPPED);
 	vm_page_wakeup(m);
 
 	return m;
 }
 
 static vm_page_t
 pmap_allocpte(pmap, va)
 	pmap_t	pmap;
 	vm_offset_t va;
 {
 	unsigned ptepindex;
 	vm_offset_t ptepa;
 	vm_page_t m;
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = va >> PDRSHIFT;
 
 	/*
 	 * Get the page directory entry
 	 */
 	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 
 	/*
 	 * This supports switching from a 4MB page to a
 	 * normal 4K page.
 	 */
 	if (ptepa & PG_PS) {
 		pmap->pm_pdir[ptepindex] = 0;
 		ptepa = 0;
 		invltlb();
 	}
 
 	/*
 	 * If the page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (ptepa) {
 		/*
 		 * In order to get the page table page, try the
 		 * hint first.
 		 */
 		if (pmap->pm_ptphint &&
 			(pmap->pm_ptphint->pindex == ptepindex)) {
 			m = pmap->pm_ptphint;
 		} else {
 			m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 			pmap->pm_ptphint = m;
 		}
 		m->hold_count++;
 		return m;
 	}
 	/*
 	 * Here if the pte page isn't mapped, or if it has been deallocated.
 	 */
 	return _pmap_allocpte(pmap, ptepindex);
 }
 
 
 /***************************************************
 * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap)
 	register struct pmap *pmap;
 {
 	vm_page_t p,n,ptdpg;
 	vm_object_t object = pmap->pm_pteobj;
 	int curgeneration;
 
 #if defined(DIAGNOSTIC)
 	if (object->ref_count != 1)
 		panic("pmap_release: pteobj reference count != 1");
 #endif
 	
 	ptdpg = NULL;
 	LIST_REMOVE(pmap, pm_list);
 retry:
 	curgeneration = object->generation;
 	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
 		n = TAILQ_NEXT(p, listq);
 		if (p->pindex == PTDPTDI) {
 			ptdpg = p;
 			continue;
 		}
 		while (1) {
 			if (!pmap_release_free_page(pmap, p) &&
 				(object->generation != curgeneration))
 				goto retry;
 		}
 	}
 
 	if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
 		goto retry;
 }
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	struct pmap *pmap;
 	int s;
 	vm_offset_t ptppaddr;
 	vm_page_t nkpg;
 	pd_entry_t newpdir;
 
 	s = splhigh();
 	if (kernel_vm_end == 0) {
 		kernel_vm_end = KERNBASE;
 		nkpt = 0;
 		while (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			nkpt++;
 		}
 	}
 	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 	while (kernel_vm_end < addr) {
 		if (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			continue;
 		}
 
 		/*
 		 * This index is bogus, but out of the way
 		 */
 		nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM);
 		if (!nkpg)
 			panic("pmap_growkernel: no memory to grow kernel");
 
 		nkpt++;
 
 		vm_page_wire(nkpg);
 		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 		pmap_zero_page(ptppaddr);
 		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 		pdir_pde(PTD, kernel_vm_end) = newpdir;
 
 		LIST_FOREACH(pmap, &allpmaps, pm_list) {
 			*pmap_pde(pmap, kernel_vm_end) = newpdir;
 		}
 		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 	}
 	splx(s);
 }
 
 /*
  *	Retire the given physical map from service.
  *	Should only be called if the map contains
  *	no valid mappings.
  */
 void
 pmap_destroy(pmap)
 	register pmap_t pmap;
 {
 	int count;
 
 	if (pmap == NULL)
 		return;
 
 	count = --pmap->pm_count;
 	if (count == 0) {
 		pmap_release(pmap);
 		panic("destroying a pmap is not yet implemented");
 	}
 }
 
 /*
  *	Add a reference to the specified pmap.
  */
 void
 pmap_reference(pmap)
 	pmap_t pmap;
 {
 	if (pmap != NULL) {
 		pmap->pm_count++;
 	}
 }
 
 /***************************************************
 * page management routines.
  ***************************************************/
 
 /*
  * free the pv_entry back to the free list
  */
 static PMAP_INLINE void
 free_pv_entry(pv)
 	pv_entry_t pv;
 {
 	pv_entry_count--;
-	zfreei(pvzone, pv);
+	zfree(pvzone, pv);
 }
 
 /*
  * get a new pv_entry, allocating a block from the system
  * when needed.
  * the memory allocation is performed bypassing the malloc code
  * because of the possibility of allocations at interrupt time.
  */
 static pv_entry_t
 get_pv_entry(void)
 {
 	pv_entry_count++;
 	if (pv_entry_high_water &&
 		(pv_entry_count > pv_entry_high_water) &&
 		(pmap_pagedaemon_waken == 0)) {
 		pmap_pagedaemon_waken = 1;
 		wakeup (&vm_pages_needed);
 	}
-	return zalloci(pvzone);
+	return zalloc(pvzone);
 }
 
 /*
  * This routine is very drastic, but can save the system
  * in a pinch.
  */
 void
 pmap_collect()
 {
 	int i;
 	vm_page_t m;
 	static int warningdone=0;
 
 	if (pmap_pagedaemon_waken == 0)
 		return;
 
 	if (warningdone < 5) {
 		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
 		warningdone++;
 	}
 
 	for(i = 0; i < vm_page_array_size; i++) {
 		m = &vm_page_array[i];
 		if (m->wire_count || m->hold_count || m->busy ||
 		    (m->flags & PG_BUSY))
 			continue;
 		pmap_remove_all(m);
 	}
 	pmap_pagedaemon_waken = 0;
 }
 	
 
 /*
  * If it is the first entry on the list, it is actually
  * in the header and we must copy the following entry up
  * to the header.  Otherwise we must search the list for
  * the entry.  In either case we free the now unused entry.
  */
 
 static int
 pmap_remove_entry(pmap, m, va)
 	struct pmap *pmap;
 	vm_page_t m;
 	vm_offset_t va;
 {
 	pv_entry_t pv;
 	int rtval;
 	int s;
 
 	s = splvm();
 	if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
 		for (pv = TAILQ_FIRST(&m->md.pv_list);
 			pv;
 			pv = TAILQ_NEXT(pv, pv_list)) {
 			if (pmap == pv->pv_pmap && va == pv->pv_va) 
 				break;
 		}
 	} else {
 		for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
 			pv;
 			pv = TAILQ_NEXT(pv, pv_plist)) {
 			if (va == pv->pv_va) 
 				break;
 		}
 	}
 
 	rtval = 0;
 	if (pv) {
 
 		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count--;
 		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 
 		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 		free_pv_entry(pv);
 	}
 			
 	splx(s);
 	return rtval;
 }
 
 /*
  * Create a pv entry for page at pa for
  * (pmap, va).
  */
 static void
 pmap_insert_entry(pmap, va, mpte, m)
 	pmap_t pmap;
 	vm_offset_t va;
 	vm_page_t mpte;
 	vm_page_t m;
 {
 
 	int s;
 	pv_entry_t pv;
 
 	s = splvm();
 	pv = get_pv_entry();
 	pv->pv_va = va;
 	pv->pv_pmap = pmap;
 	pv->pv_ptem = mpte;
 
 	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 	m->md.pv_list_count++;
 
 	splx(s);
 }
 
 /*
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
 pmap_remove_pte(pmap, ptq, va)
 	struct pmap *pmap;
 	unsigned *ptq;
 	vm_offset_t va;
 {
 	unsigned oldpte;
 	vm_page_t m;
 
 	oldpte = atomic_readandclear_int(ptq);
 	if (oldpte & PG_W)
 		pmap->pm_stats.wired_count -= 1;
 	/*
 	 * Machines that don't support invlpg, also don't support
 	 * PG_G.
 	 */
 	if (oldpte & PG_G)
 		invlpg(va);
 	pmap->pm_stats.resident_count -= 1;
 	if (oldpte & PG_MANAGED) {
 		m = PHYS_TO_VM_PAGE(oldpte);
 		if (oldpte & PG_M) {
 #if defined(PMAP_DIAGNOSTIC)
 			if (pmap_nw_modified((pt_entry_t) oldpte)) {
 				printf(
 	"pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
 				    va, oldpte);
 			}
 #endif
 			if (pmap_track_modified(va))
 				vm_page_dirty(m);
 		}
 		if (oldpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 		return pmap_remove_entry(pmap, m, va);
 	} else {
 		return pmap_unuse_pt(pmap, va, NULL);
 	}
 
 	return 0;
 }
 
 /*
  * Remove a single page from a process address space
  */
 static void
 pmap_remove_page(pmap, va)
 	struct pmap *pmap;
 	register vm_offset_t va;
 {
 	register unsigned *ptq;
 
 	/*
 	 * if there is no pte for this address, just skip it!!!
 	 */
 	if (*pmap_pde(pmap, va) == 0) {
 		return;
 	}
 
 	/*
 	 * get a local va for mappings for this pmap.
 	 */
 	ptq = get_ptbase(pmap) + i386_btop(va);
 	if (*ptq) {
 		(void) pmap_remove_pte(pmap, ptq, va);
 		pmap_TLB_invalidate(pmap, va);
 	}
 	return;
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap, sva, eva)
 	struct pmap *pmap;
 	register vm_offset_t sva;
 	register vm_offset_t eva;
 {
 	register unsigned *ptbase;
 	vm_offset_t pdnxt;
 	vm_offset_t ptpaddr;
 	vm_offset_t sindex, eindex;
 	int anyvalid;
 
 	if (pmap == NULL)
 		return;
 
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if (((sva + PAGE_SIZE) == eva) && 
 		(((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 		pmap_remove_page(pmap, sva);
 		return;
 	}
 
 	anyvalid = 0;
 
 	/*
 	 * Get a local virtual address for the mappings that are being
 	 * worked with.
 	 */
 	ptbase = get_ptbase(pmap);
 
 	sindex = i386_btop(sva);
 	eindex = i386_btop(eva);
 
 	for (; sindex < eindex; sindex = pdnxt) {
 		unsigned pdirindex;
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		pdirindex = sindex / NPDEPG;
 		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
 			pmap->pm_pdir[pdirindex] = 0;
 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 			anyvalid++;
 			continue;
 		}
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (pdnxt > eindex) {
 			pdnxt = eindex;
 		}
 
 		for ( ;sindex != pdnxt; sindex++) {
 			vm_offset_t va;
 			if (ptbase[sindex] == 0) {
 				continue;
 			}
 			va = i386_ptob(sindex);
 			
 			anyvalid++;
 			if (pmap_remove_pte(pmap,
 				ptbase + sindex, va))
 				break;
 		}
 	}
 
 	if (anyvalid)
 		pmap_TLB_invalidate_all(pmap);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 static void
 pmap_remove_all(m)
 	vm_page_t m;
 {
 	register pv_entry_t pv;
 	register unsigned *pte, tpte;
 	int s;
 
 #if defined(PMAP_DIAGNOSTIC)
 	/*
 	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
 	 * pages!
 	 */
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
 		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%x", VM_PAGE_TO_PHYS(m));
 	}
 #endif
 
 	s = splvm();
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pv->pv_pmap->pm_stats.resident_count--;
 
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 
 		tpte = atomic_readandclear_int(pte);
 		if (tpte & PG_W)
 			pv->pv_pmap->pm_stats.wired_count--;
 
 		if (tpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (tpte & PG_M) {
 #if defined(PMAP_DIAGNOSTIC)
 			if (pmap_nw_modified((pt_entry_t) tpte)) {
 				printf(
 	"pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
 				    pv->pv_va, tpte);
 			}
 #endif
 			if (pmap_track_modified(pv->pv_va))
 				vm_page_dirty(m);
 		}
 		pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 
 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count--;
 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 		free_pv_entry(pv);
 	}
 
 	vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 
 	splx(s);
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	register unsigned *ptbase;
 	vm_offset_t pdnxt, ptpaddr;
 	vm_pindex_t sindex, eindex;
 	int anychanged;
 
 	if (pmap == NULL)
 		return;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 	if (prot & VM_PROT_WRITE)
 		return;
 
 	anychanged = 0;
 
 	ptbase = get_ptbase(pmap);
 
 	sindex = i386_btop(sva);
 	eindex = i386_btop(eva);
 
 	for (; sindex < eindex; sindex = pdnxt) {
 
 		unsigned pdirindex;
 
 		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 
 		pdirindex = sindex / NPDEPG;
 		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
 			(unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 			anychanged++;
 			continue;
 		}
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		if (pdnxt > eindex) {
 			pdnxt = eindex;
 		}
 
 		for (; sindex != pdnxt; sindex++) {
 
 			unsigned pbits;
 			vm_page_t m;
 
 			pbits = ptbase[sindex];
 
 			if (pbits & PG_MANAGED) {
 				m = NULL;
 				if (pbits & PG_A) {
 					m = PHYS_TO_VM_PAGE(pbits);
 					vm_page_flag_set(m, PG_REFERENCED);
 					pbits &= ~PG_A;
 				}
 				if (pbits & PG_M) {
 					if (pmap_track_modified(i386_ptob(sindex))) {
 						if (m == NULL)
 							m = PHYS_TO_VM_PAGE(pbits);
 						vm_page_dirty(m);
 						pbits &= ~PG_M;
 					}
 				}
 			}
 
 			pbits &= ~PG_RW;
 
 			if (pbits != ptbase[sindex]) {
 				ptbase[sindex] = pbits;
 				anychanged = 1;
 			}
 		}
 	}
 	if (anychanged)
 		pmap_TLB_invalidate_all(pmap);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 void
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 	   boolean_t wired)
 {
 	vm_offset_t pa;
 	register unsigned *pte;
 	vm_offset_t opa;
 	vm_offset_t origpte, newpte;
 	vm_page_t mpte;
 
 	if (pmap == NULL)
 		return;
 
 	va &= PG_FRAME;
 #ifdef PMAP_DIAGNOSTIC
 	if (va > VM_MAX_KERNEL_ADDRESS)
 		panic("pmap_enter: toobig");
 	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
 		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
 #endif
 
 	mpte = NULL;
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < UPT_MIN_ADDRESS) {
 		mpte = pmap_allocpte(pmap, va);
 	}
 #if 0 && defined(PMAP_DIAGNOSTIC)
 	else {
 		vm_offset_t *pdeaddr = (vm_offset_t *)pmap_pde(pmap, va);
 		if (((origpte = (vm_offset_t) *pdeaddr) & PG_V) == 0) { 
 			panic("pmap_enter: invalid kernel page table page(0), pdir=%p, pde=%p, va=%p\n",
 				pmap->pm_pdir[PTDPTDI], origpte, va);
 		}
 		if (smp_active) {
 			pdeaddr = (vm_offset_t *) IdlePTDS[PCPU_GET(cpuid)];
 			if (((newpte = pdeaddr[va >> PDRSHIFT]) & PG_V) == 0) {
 				if ((vm_offset_t) my_idlePTD != (vm_offset_t) vtophys(pdeaddr))
 					printf("pde mismatch: %x, %x\n", my_idlePTD, pdeaddr);
 				printf("cpuid: %d, pdeaddr: 0x%x\n", PCPU_GET(cpuid), pdeaddr);
 				panic("pmap_enter: invalid kernel page table page(1), pdir=%p, npde=%p, pde=%p, va=%p\n",
 					pmap->pm_pdir[PTDPTDI], newpte, origpte, va);
 			}
 		}
 	}
 #endif
 
 	pte = pmap_pte(pmap, va);
 
 	/*
 	 * Page Directory table entry not valid, we need a new PT page
 	 */
 	if (pte == NULL) {
 		panic("pmap_enter: invalid page directory, pdir=%p, va=0x%x\n",
 			(void *)pmap->pm_pdir[PTDPTDI], va);
 	}
 
 	pa = VM_PAGE_TO_PHYS(m) & PG_FRAME;
 	origpte = *(vm_offset_t *)pte;
 	opa = origpte & PG_FRAME;
 
 	if (origpte & PG_PS)
 		panic("pmap_enter: attempted pmap_enter on 4MB page");
 
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
 	if (origpte && (opa == pa)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if (wired && ((origpte & PG_W) == 0))
 			pmap->pm_stats.wired_count++;
 		else if (!wired && (origpte & PG_W))
 			pmap->pm_stats.wired_count--;
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (pmap_nw_modified((pt_entry_t) origpte)) {
 			printf(
 	"pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
 			    va, origpte);
 		}
 #endif
 
 		/*
 		 * Remove extra pte reference
 		 */
 		if (mpte)
 			mpte->hold_count--;
 
 		if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
 			if ((origpte & PG_RW) == 0) {
 				*pte |= PG_RW;
 #ifdef SMP
 				cpu_invlpg((void *)va);
 				if (pmap->pm_active & PCPU_GET(other_cpus))
 					smp_invltlb();
 #else
 				invltlb_1pg(va);
 #endif
 			}
 			return;
 		}
 
 		/*
 		 * We might be turning off write access to the page,
 		 * so we go ahead and sense modify status.
 		 */
 		if (origpte & PG_MANAGED) {
 			if ((origpte & PG_M) && pmap_track_modified(va)) {
 				vm_page_t om;
 				om = PHYS_TO_VM_PAGE(opa);
 				vm_page_dirty(om);
 			}
 			pa |= PG_MANAGED;
 		}
 		goto validate;
 	} 
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
 	if (opa) {
 		int err;
 		err = pmap_remove_pte(pmap, pte, va);
 		if (err)
 			panic("pmap_enter: pte vanished, va: 0x%x", va);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory. Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if (pmap_initialized && 
 	    (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
 		pmap_insert_entry(pmap, va, mpte, m);
 		pa |= PG_MANAGED;
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 	if (wired)
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
 
 	if (wired)
 		newpte |= PG_W;
 	if (va < UPT_MIN_ADDRESS)
 		newpte |= PG_U;
 	if (pmap == kernel_pmap)
 		newpte |= pgeflag;
 
 	/*
 	 * if the mapping or permission bits are different, we need
 	 * to update the pte.
 	 */
 	if ((origpte & ~(PG_M|PG_A)) != newpte) {
 		*pte = newpte | PG_A;
 		/*if (origpte)*/ {
 #ifdef SMP
 			cpu_invlpg((void *)va);
 			if (pmap->pm_active & PCPU_GET(other_cpus))
 				smp_invltlb();
 #else
 			invltlb_1pg(va);
 #endif
 		}
 	}
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * 5. Tlbflush is deferred to calling procedure.
  * 6. Page IS managed.
  * but is *MUCH* faster than pmap_enter...
  */
 
 static vm_page_t
 pmap_enter_quick(pmap, va, m, mpte)
 	register pmap_t pmap;
 	vm_offset_t va;
 	vm_page_t m;
 	vm_page_t mpte;
 {
 	unsigned *pte;
 	vm_offset_t pa;
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < UPT_MIN_ADDRESS) {
 		unsigned ptepindex;
 		vm_offset_t ptepa;
 
 		/*
 		 * Calculate pagetable page index
 		 */
 		ptepindex = va >> PDRSHIFT;
 		if (mpte && (mpte->pindex == ptepindex)) {
 			mpte->hold_count++;
 		} else {
 retry:
 			/*
 			 * Get the page directory entry
 			 */
 			ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 
 			/*
 			 * If the page table page is mapped, we just increment
 			 * the hold count, and activate it.
 			 */
 			if (ptepa) {
 				if (ptepa & PG_PS)
 					panic("pmap_enter_quick: unexpected mapping into 4MB page");
 				if (pmap->pm_ptphint &&
 					(pmap->pm_ptphint->pindex == ptepindex)) {
 					mpte = pmap->pm_ptphint;
 				} else {
 					mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 					pmap->pm_ptphint = mpte;
 				}
 				if (mpte == NULL)
 					goto retry;
 				mpte->hold_count++;
 			} else {
 				mpte = _pmap_allocpte(pmap, ptepindex);
 			}
 		}
 	} else {
 		mpte = NULL;
 	}
 
 	/*
 	 * This call to vtopte makes the assumption that we are
 	 * entering the page into the current pmap.  In order to support
 	 * quick entry into any pmap, one would likely use pmap_pte_quick.
 	 * But that isn't as quick as vtopte.
 	 */
 	pte = (unsigned *)vtopte(va);
 	if (*pte) {
 		if (mpte)
 			pmap_unwire_pte_hold(pmap, mpte);
 		return 0;
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory. Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
 		pmap_insert_entry(pmap, va, mpte, m);
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 
 	pa = VM_PAGE_TO_PHYS(m);
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 		*pte = pa | PG_V | PG_U;
 	else
 		*pte = pa | PG_V | PG_U | PG_MANAGED;
 
 	return mpte;
 }
 
 /*
  * Make a temporary mapping for a physical address.  This is only intended
  * to be used for panic dumps.
  */
 void *
 pmap_kenter_temporary(vm_offset_t pa, int i)
 {
 	pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa);
 	return ((void *)crashdumpmap);
 }
 
 #define MAX_INIT_PT (96)
 /*
  * pmap_object_init_pt preloads the ptes for a given object
  * into the specified pmap.  This eliminates the blast of soft
  * faults on process startup and immediately after an mmap.
  */
 void
 pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
 	pmap_t pmap;
 	vm_offset_t addr;
 	vm_object_t object;
 	vm_pindex_t pindex;
 	vm_size_t size;
 	int limit;
 {
 	vm_offset_t tmpidx;
 	int psize;
 	vm_page_t p, mpte;
 	int objpgs;
 
 	if (pmap == NULL || object == NULL)
 		return;
 
 	/*
 	 * This code maps large physical mmap regions into the
 	 * processor address space.  Note that some shortcuts
 	 * are taken, but the code works.
 	 */
 	if (pseflag &&
 		(object->type == OBJT_DEVICE) &&
 		((addr & (NBPDR - 1)) == 0) &&
 		((size & (NBPDR - 1)) == 0) ) {
 		int i;
 		vm_page_t m[1];
 		unsigned int ptepindex;
 		int npdes;
 		vm_offset_t ptepa;
 
 		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
 			return;
 
 retry:
 		p = vm_page_lookup(object, pindex);
 		if (p && vm_page_sleep_busy(p, FALSE, "init4p"))
 			goto retry;
 
 		if (p == NULL) {
 			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
 			if (p == NULL)
 				return;
 			m[0] = p;
 
 			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
 				vm_page_free(p);
 				return;
 			}
 
 			p = vm_page_lookup(object, pindex);
 			vm_page_wakeup(p);
 		}
 
 		ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p);
 		if (ptepa & (NBPDR - 1)) {
 			return;
 		}
 
 		p->valid = VM_PAGE_BITS_ALL;
 
 		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
 		npdes = size >> PDRSHIFT;
 		for(i=0;i<npdes;i++) {
 			pmap->pm_pdir[ptepindex] =
 				(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
 			ptepa += NBPDR;
 			ptepindex += 1;
 		}
 		vm_page_flag_set(p, PG_MAPPED);
 		invltlb();
 		return;
 	}
 
 	psize = i386_btop(size);
 
 	if ((object->type != OBJT_VNODE) ||
 		(limit && (psize > MAX_INIT_PT) &&
 			(object->resident_page_count > MAX_INIT_PT))) {
 		return;
 	}
 
 	if (psize + pindex > object->size) {
 		if (object->size < pindex)
 			return;		  
 		psize = object->size - pindex;
 	}
 
 	mpte = NULL;
 	/*
 	 * if we are processing a major portion of the object, then scan the
 	 * entire thing.
 	 */
 	if (psize > (object->resident_page_count >> 2)) {
 		objpgs = psize;
 
 		for (p = TAILQ_FIRST(&object->memq);
 		    ((objpgs > 0) && (p != NULL));
 		    p = TAILQ_NEXT(p, listq)) {
 
 			tmpidx = p->pindex;
 			if (tmpidx < pindex) {
 				continue;
 			}
 			tmpidx -= pindex;
 			if (tmpidx >= psize) {
 				continue;
 			}
 			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 				(p->busy == 0) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 				if ((p->queue - p->pc) == PQ_CACHE)
 					vm_page_deactivate(p);
 				vm_page_busy(p);
 				mpte = pmap_enter_quick(pmap, 
 					addr + i386_ptob(tmpidx), p, mpte);
 				vm_page_flag_set(p, PG_MAPPED);
 				vm_page_wakeup(p);
 			}
 			objpgs -= 1;
 		}
 	} else {
 		/*
 		 * else lookup the pages one-by-one.
 		 */
 		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
 			p = vm_page_lookup(object, tmpidx + pindex);
 			if (p &&
 			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 				(p->busy == 0) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 				if ((p->queue - p->pc) == PQ_CACHE)
 					vm_page_deactivate(p);
 				vm_page_busy(p);
 				mpte = pmap_enter_quick(pmap, 
 					addr + i386_ptob(tmpidx), p, mpte);
 				vm_page_flag_set(p, PG_MAPPED);
 				vm_page_wakeup(p);
 			}
 		}
 	}
 	return;
 }
 
 /*
  * pmap_prefault provides a quick way of clustering
  * pagefaults into a processes address space.  It is a "cousin"
  * of pmap_object_init_pt, except it runs at page fault time instead
  * of mmap time.
  */
 #define PFBAK 4
 #define PFFOR 4
 #define PAGEORDER_SIZE (PFBAK+PFFOR)
 
 static int pmap_prefault_pageorder[] = {
 	-PAGE_SIZE, PAGE_SIZE,
 	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
 	-3 * PAGE_SIZE, 3 * PAGE_SIZE
 	-4 * PAGE_SIZE, 4 * PAGE_SIZE
 };
 
 void
 pmap_prefault(pmap, addra, entry)
 	pmap_t pmap;
 	vm_offset_t addra;
 	vm_map_entry_t entry;
 {
 	int i;
 	vm_offset_t starta;
 	vm_offset_t addr;
 	vm_pindex_t pindex;
 	vm_page_t m, mpte;
 	vm_object_t object;
 
 	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace)))
 		return;
 
 	object = entry->object.vm_object;
 
 	starta = addra - PFBAK * PAGE_SIZE;
 	if (starta < entry->start) {
 		starta = entry->start;
 	} else if (starta > addra) {
 		starta = 0;
 	}
 
 	mpte = NULL;
 	for (i = 0; i < PAGEORDER_SIZE; i++) {
 		vm_object_t lobject;
 		unsigned *pte;
 
 		addr = addra + pmap_prefault_pageorder[i];
 		if (addr > addra + (PFFOR * PAGE_SIZE))
 			addr = 0;
 
 		if (addr < starta || addr >= entry->end)
 			continue;
 
 		if ((*pmap_pde(pmap, addr)) == NULL) 
 			continue;
 
 		pte = (unsigned *) vtopte(addr);
 		if (*pte)
 			continue;
 
 		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
 		lobject = object;
 		for (m = vm_page_lookup(lobject, pindex);
 		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
 		    lobject = lobject->backing_object) {
 			if (lobject->backing_object_offset & PAGE_MASK)
 				break;
 			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
 			m = vm_page_lookup(lobject->backing_object, pindex);
 		}
 
 		/*
 		 * give-up when a page is not in memory
 		 */
 		if (m == NULL)
 			break;
 
 		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			(m->busy == 0) &&
 		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 
 			if ((m->queue - m->pc) == PQ_CACHE) {
 				vm_page_deactivate(m);
 			}
 			vm_page_busy(m);
 			mpte = pmap_enter_quick(pmap, addr, m, mpte);
 			vm_page_flag_set(m, PG_MAPPED);
 			vm_page_wakeup(m);
 		}
 	}
 }
 
 /*
  *	Routine:	pmap_change_wiring
  *	Function:	Change the wiring attribute for a map/virtual-address
  *			pair.
  *	In/out conditions:
  *			The mapping must already exist in the pmap.
  */
 void
 pmap_change_wiring(pmap, va, wired)
 	register pmap_t pmap;
 	vm_offset_t va;
 	boolean_t wired;
 {
 	register unsigned *pte;
 
 	if (pmap == NULL)
 		return;
 
 	pte = pmap_pte(pmap, va);
 
 	if (wired && !pmap_pte_w(pte))
 		pmap->pm_stats.wired_count++;
 	else if (!wired && pmap_pte_w(pte))
 		pmap->pm_stats.wired_count--;
 
 	/*
 	 * Wiring is not a hardware characteristic so there is no need to
 	 * invalidate TLB.
 	 */
 	pmap_pte_set_w(pte, wired);
 }
 
 
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
 	pmap_t dst_pmap, src_pmap;
 	vm_offset_t dst_addr;
 	vm_size_t len;
 	vm_offset_t src_addr;
 {
 	vm_offset_t addr;
 	vm_offset_t end_addr = src_addr + len;
 	vm_offset_t pdnxt;
 	unsigned src_frame, dst_frame;
 	vm_page_t m;
 
 	if (dst_addr != src_addr)
 		return;
 
 	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
 	if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) {
 		return;
 	}
 
 	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
 	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
 		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
 #if defined(SMP)
 		/* The page directory is not shared between CPUs */
 		cpu_invltlb();
 #else
 		invltlb();
 #endif
 	}
 
 	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
 		unsigned *src_pte, *dst_pte;
 		vm_page_t dstmpte, srcmpte;
 		vm_offset_t srcptepaddr;
 		unsigned ptepindex;
 
 		if (addr >= UPT_MIN_ADDRESS)
 			panic("pmap_copy: invalid to pmap_copy page tables\n");
 
 		/*
 		 * Don't let optional prefaulting of pages make us go
 		 * way below the low water mark of free pages or way
 		 * above high water mark of used pv entries.
 		 */
 		if (cnt.v_free_count < cnt.v_free_reserved ||
 		    pv_entry_count > pv_entry_high_water)
 			break;
 		
 		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
 		ptepindex = addr >> PDRSHIFT;
 
 		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
 		if (srcptepaddr == 0)
 			continue;
 			
 		if (srcptepaddr & PG_PS) {
 			if (dst_pmap->pm_pdir[ptepindex] == 0) {
 				dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr;
 				dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
 			}
 			continue;
 		}
 
 		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
 		if ((srcmpte == NULL) ||
 			(srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
 			continue;
 
 		if (pdnxt > end_addr)
 			pdnxt = end_addr;
 
 		src_pte = (unsigned *) vtopte(addr);
 		dst_pte = (unsigned *) avtopte(addr);
 		while (addr < pdnxt) {
 			unsigned ptetemp;
 			ptetemp = *src_pte;
 			/*
 			 * we only virtual copy managed pages
 			 */
 			if ((ptetemp & PG_MANAGED) != 0) {
 				/*
 				 * We have to check after allocpte for the
 				 * pte still being around...  allocpte can
 				 * block.
 				 */
 				dstmpte = pmap_allocpte(dst_pmap, addr);
 				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
 					/*
 					 * Clear the modified and
 					 * accessed (referenced) bits
 					 * during the copy.
 					 */
 					m = PHYS_TO_VM_PAGE(ptetemp);
 					*dst_pte = ptetemp & ~(PG_M | PG_A);
 					dst_pmap->pm_stats.resident_count++;
 					pmap_insert_entry(dst_pmap, addr,
 						dstmpte, m);
 	 			} else {
 					pmap_unwire_pte_hold(dst_pmap, dstmpte);
 				}
 				if (dstmpte->hold_count >= srcmpte->hold_count)
 					break;
 			}
 			addr += PAGE_SIZE;
 			src_pte++;
 			dst_pte++;
 		}
 	}
 }	
 
 /*
  *	Routine:	pmap_kernel
  *	Function:
  *		Returns the physical map handle for the kernel.
  */
 pmap_t
 pmap_kernel()
 {
 	return (kernel_pmap);
 }
 
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  */
 void
 pmap_zero_page(phys)
 	vm_offset_t phys;
 {
 
 	if (*(int *) CMAP2)
 		panic("pmap_zero_page: CMAP2 busy");
 
 	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
 	invltlb_1pg((vm_offset_t)CADDR2);
 
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686)
 		i686_pagezero(CADDR2);
 	else
 #endif
 		bzero(CADDR2, PAGE_SIZE);
 	*(int *) CMAP2 = 0;
 }
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  *
  *	off and size may not cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(phys, off, size)
 	vm_offset_t phys;
 	int off;
 	int size;
 {
 
 	if (*(int *) CMAP2)
 		panic("pmap_zero_page: CMAP2 busy");
 
 	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
 	invltlb_1pg((vm_offset_t)CADDR2);
 
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
 		i686_pagezero(CADDR2);
 	else
 #endif
 		bzero((char *)CADDR2 + off, size);
 	*(int *) CMAP2 = 0;
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(src, dst)
 	vm_offset_t src;
 	vm_offset_t dst;
 {
 
 	if (*(int *) CMAP1)
 		panic("pmap_copy_page: CMAP1 busy");
 	if (*(int *) CMAP2)
 		panic("pmap_copy_page: CMAP2 busy");
 
 	*(int *) CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
 	*(int *) CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
 #ifdef I386_CPU
 	invltlb();
 #else
 	invlpg((u_int)CADDR1);
 	invlpg((u_int)CADDR2);
 #endif
 
 	bcopy(CADDR1, CADDR2, PAGE_SIZE);
 
 	*(int *) CMAP1 = 0;
 	*(int *) CMAP2 = 0;
 }
 
 
 /*
  *	Routine:	pmap_pageable
  *	Function:
  *		Make the specified pages (by pmap, offset)
  *		pageable (or not) as requested.
  *
  *		A page which is not pageable may not take
  *		a fault; therefore, its page table entry
  *		must remain valid for the duration.
  *
  *		This routine is merely advisory; pmap_enter
  *		will specify that these pages are to be wired
  *		down (or not) as appropriate.
  */
 void
 pmap_pageable(pmap, sva, eva, pageable)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 	boolean_t pageable;
 {
 }
 
 /*
  * this routine returns true if a physical page resides
  * in the given pmap.
  */
 boolean_t
 pmap_page_exists(pmap, m)
 	pmap_t pmap;
 	vm_page_t m;
 {
 	register pv_entry_t pv;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	s = splvm();
 
 	/*
 	 * Not found, check current mappings returning immediately if found.
 	 */
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 		if (pv->pv_pmap == pmap) {
 			splx(s);
 			return TRUE;
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
 #define PMAP_REMOVE_PAGES_CURPROC_ONLY
 /*
  * Remove all pages from specified address space
  * this aids process exit speeds.  Also, this code
  * is special cased for current process only, but
  * can have the more generic (and slightly slower)
  * mode enabled.  This is much faster than pmap_remove
  * in the case of running down an entire address space.
  */
 void
 pmap_remove_pages(pmap, sva, eva)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 {
 	unsigned *pte, tpte;
 	pv_entry_t pv, npv;
 	int s;
 	vm_page_t m;
 
 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) {
 		printf("warning: pmap_remove_pages called with non-current pmap\n");
 		return;
 	}
 #endif
 
 	s = splvm();
 	for(pv = TAILQ_FIRST(&pmap->pm_pvlist);
 		pv;
 		pv = npv) {
 
 		if (pv->pv_va >= eva || pv->pv_va < sva) {
 			npv = TAILQ_NEXT(pv, pv_plist);
 			continue;
 		}
 
 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 		pte = (unsigned *)vtopte(pv->pv_va);
 #else
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 #endif
 		tpte = *pte;
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 		if (tpte & PG_W) {
 			npv = TAILQ_NEXT(pv, pv_plist);
 			continue;
 		}
 		*pte = 0;
 
 		m = PHYS_TO_VM_PAGE(tpte);
 
 		KASSERT(m < &vm_page_array[vm_page_array_size],
 			("pmap_remove_pages: bad tpte %x", tpte));
 
 		pv->pv_pmap->pm_stats.resident_count--;
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (tpte & PG_M) {
 			vm_page_dirty(m);
 		}
 
 
 		npv = TAILQ_NEXT(pv, pv_plist);
 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 
 		m->md.pv_list_count--;
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
 			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 		}
 
 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 		free_pv_entry(pv);
 	}
 	splx(s);
 	pmap_TLB_invalidate_all(pmap);
 }
 
 /*
  * pmap_testbit tests bits in pte's
  * note that the testbit/changebit routines are inline,
  * and a lot of things compile-time evaluate.
  */
 static boolean_t
 pmap_testbit(m, bit)
 	vm_page_t m;
 	int bit;
 {
 	pv_entry_t pv;
 	unsigned *pte;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 		return FALSE;
 
 	s = splvm();
 
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 
 		/*
 		 * if the bit being tested is the modified bit, then
 		 * mark clean_map and ptes as never
 		 * modified.
 		 */
 		if (bit & (PG_A|PG_M)) {
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 		}
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (!pv->pv_pmap) {
 			printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
 			continue;
 		}
 #endif
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 		if (*pte & bit) {
 			splx(s);
 			return TRUE;
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
 /*
  * this routine is used to modify bits in ptes
  */
 static __inline void
 pmap_changebit(m, bit, setem)
 	vm_page_t m;
 	int bit;
 	boolean_t setem;
 {
 	register pv_entry_t pv;
 	register unsigned *pte;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return;
 
 	s = splvm();
 
 	/*
 	 * Loop over all current mappings setting/clearing as appropos If
 	 * setting RO do we need to clear the VAC?
 	 */
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 
 		/*
 		 * don't write protect pager mappings
 		 */
 		if (!setem && (bit == PG_RW)) {
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 		}
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (!pv->pv_pmap) {
 			printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
 			continue;
 		}
 #endif
 
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 
 		if (setem) {
 			*(int *)pte |= bit;
 			pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 		} else {
 			vm_offset_t pbits = *(vm_offset_t *)pte;
 			if (pbits & bit) {
 				if (bit == PG_RW) {
 					if (pbits & PG_M) {
 						vm_page_dirty(m);
 					}
 					*(int *)pte = pbits & ~(PG_M|PG_RW);
 				} else {
 					*(int *)pte = pbits & ~bit;
 				}
 				pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 			}
 		}
 	}
 	splx(s);
 }
 
 /*
  *      pmap_page_protect:
  *
  *      Lower the permission for all mappings to a given page.
  */
 void
 pmap_page_protect(vm_page_t m, vm_prot_t prot)
 {
 	if ((prot & VM_PROT_WRITE) == 0) {
 		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
 			pmap_changebit(m, PG_RW, FALSE);
 		} else {
 			pmap_remove_all(m);
 		}
 	}
 }
 
 vm_offset_t
 pmap_phys_address(ppn)
 	int ppn;
 {
 	return (i386_ptob(ppn));
 }
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return the count of reference bits for a page, clearing all of them.
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	register pv_entry_t pv, pvf, pvn;
 	unsigned *pte;
 	int s;
 	int rtval = 0;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return (rtval);
 
 	s = splvm();
 
 	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 
 		pvf = pv;
 
 		do {
 			pvn = TAILQ_NEXT(pv, pv_list);
 
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 
 			pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 
 			if (pte && (*pte & PG_A)) {
 				*pte &= ~PG_A;
 
 				pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 
 				rtval++;
 				if (rtval > 4) {
 					break;
 				}
 			}
 		} while ((pv = pvn) != NULL && pv != pvf);
 	}
 	splx(s);
 
 	return (rtval);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 	return pmap_testbit(m, PG_M);
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	pmap_changebit(m, PG_M, FALSE);
 }
 
 /*
  *	pmap_clear_reference:
  *
  *	Clear the reference bit on the specified physical page.
  */
 void
 pmap_clear_reference(vm_page_t m)
 {
 	pmap_changebit(m, PG_A, FALSE);
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 static void
 i386_protection_init()
 {
 	register int *kp, prot;
 
 	kp = protection_codes;
 	for (prot = 0; prot < 8; prot++) {
 		switch (prot) {
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 			/*
 			 * Read access is also 0. There isn't any execute bit,
 			 * so just make it readable.
 			 */
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 			*kp++ = 0;
 			break;
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 			*kp++ = PG_RW;
 			break;
 		}
 	}
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory.
  */
 void *
 pmap_mapdev(pa, size)
 	vm_offset_t pa;
 	vm_size_t size;
 {
 	vm_offset_t va, tmpva, offset;
 	unsigned *pte;
 
 	offset = pa & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 
 	va = kmem_alloc_pageable(kernel_map, size);
 	if (!va)
 		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 
 	pa = pa & PG_FRAME;
 	for (tmpva = va; size > 0;) {
 		pte = (unsigned *)vtopte(tmpva);
 		*pte = pa | PG_RW | PG_V | pgeflag;
 		size -= PAGE_SIZE;
 		tmpva += PAGE_SIZE;
 		pa += PAGE_SIZE;
 	}
 	invltlb();
 
 	return ((void *)(va + offset));
 }
 
 void
 pmap_unmapdev(va, size)
 	vm_offset_t va;
 	vm_size_t size;
 {
 	vm_offset_t base, offset;
 
 	base = va & PG_FRAME;
 	offset = va & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 	kmem_free(kernel_map, base, size);
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap, addr)
 	pmap_t pmap;
 	vm_offset_t addr;
 {
 	
 	unsigned *ptep, pte;
 	vm_page_t m;
 	int val = 0;
 	
 	ptep = pmap_pte(pmap, addr);
 	if (ptep == 0) {
 		return 0;
 	}
 
 	if ((pte = *ptep) != 0) {
 		vm_offset_t pa;
 
 		val = MINCORE_INCORE;
 		if ((pte & PG_MANAGED) == 0)
 			return val;
 
 		pa = pte & PG_FRAME;
 
 		m = PHYS_TO_VM_PAGE(pa);
 
 		/*
 		 * Modified by us
 		 */
 		if (pte & PG_M)
 			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 		/*
 		 * Modified by someone
 		 */
 		else if (m->dirty || pmap_is_modified(m))
 			val |= MINCORE_MODIFIED_OTHER;
 		/*
 		 * Referenced by us
 		 */
 		if (pte & PG_A)
 			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 
 		/*
 		 * Referenced by someone
 		 */
 		else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
 			val |= MINCORE_REFERENCED_OTHER;
 			vm_page_flag_set(m, PG_REFERENCED);
 		}
 	} 
 	return val;
 }
 
 void
 pmap_activate(struct proc *p)
 {
 	pmap_t	pmap;
 
 	pmap = vmspace_pmap(p->p_vmspace);
 #if defined(SMP)
 	pmap->pm_active |= 1 << PCPU_GET(cpuid);
 #else
 	pmap->pm_active |= 1;
 #endif
 #if defined(SWTCH_OPTIM_STATS)
 	tlb_flush_count++;
 #endif
 	load_cr3(p->p_addr->u_pcb.pcb_cr3 = vtophys(pmap->pm_pdir));
 }
 
 vm_offset_t
 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
 {
 
 	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
 		return addr;
 	}
 
 	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 	return addr;
 }
 
 
 #if defined(PMAP_DEBUG)
 pmap_pid_dump(int pid)
 {
 	pmap_t pmap;
 	struct proc *p;
 	int npte = 0;
 	int index;
 	ALLPROC_LOCK(AP_SHARED);
 	LIST_FOREACH(p, &allproc, p_list) {
 		if (p->p_pid != pid)
 			continue;
 
 		if (p->p_vmspace) {
 			int i,j;
 			index = 0;
 			pmap = vmspace_pmap(p->p_vmspace);
 			for(i=0;i<1024;i++) {
 				pd_entry_t *pde;
 				unsigned *pte;
 				unsigned base = i << PDRSHIFT;
 				
 				pde = &pmap->pm_pdir[i];
 				if (pde && pmap_pde_v(pde)) {
 					for(j=0;j<1024;j++) {
 						unsigned va = base + (j << PAGE_SHIFT);
 						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 							if (index) {
 								index = 0;
 								printf("\n");
 							}
 							ALLPROC_LOCK(AP_RELEASE);
 							return npte;
 						}
 						pte = pmap_pte_quick( pmap, va);
 						if (pte && pmap_pte_v(pte)) {
 							vm_offset_t pa;
 							vm_page_t m;
 							pa = *(int *)pte;
 							m = PHYS_TO_VM_PAGE(pa);
 							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 								va, pa, m->hold_count, m->wire_count, m->flags);
 							npte++;
 							index++;
 							if (index >= 2) {
 								index = 0;
 								printf("\n");
 							} else {
 								printf(" ");
 							}
 						}
 					}
 				}
 			}
 		}
 	}
 	ALLPROC_LOCK(AP_RELEASE);
 	return npte;
 }
 #endif
 
 #if defined(DEBUG)
 
 static void	pads __P((pmap_t pm));
 void		pmap_pvdump __P((vm_offset_t pa));
 
 /* print address space of pmap*/
 static void
 pads(pm)
 	pmap_t pm;
 {
 	unsigned va, i, j;
 	unsigned *ptep;
 
 	if (pm == kernel_pmap)
 		return;
 	for (i = 0; i < 1024; i++)
 		if (pm->pm_pdir[i])
 			for (j = 0; j < 1024; j++) {
 				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 				if (pm == kernel_pmap && va < KERNBASE)
 					continue;
 				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 					continue;
 				ptep = pmap_pte_quick(pm, va);
 				if (pmap_pte_v(ptep))
 					printf("%x:%x ", va, *(int *) ptep);
 			};
 
 }
 
 void
 pmap_pvdump(pa)
 	vm_offset_t pa;
 {
 	register pv_entry_t pv;
 	vm_page_t m;
 
 	printf("pa %x", pa);
 	m = PHYS_TO_VM_PAGE(pa);
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 #ifdef used_to_be
 		printf(" -> pmap %p, va %x, flags %x",
 		    (void *)pv->pv_pmap, pv->pv_va, pv->pv_flags);
 #endif
 		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
 		pads(pv->pv_pmap);
 	}
 	printf(" ");
 }
 #endif
Index: head/sys/ia64/ia64/pmap.c
===================================================================
--- head/sys/ia64/ia64/pmap.c	(revision 71349)
+++ head/sys/ia64/ia64/pmap.c	(revision 71350)
@@ -1,2349 +1,2349 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 1998,2000 Doug Rabson
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
  *		with some ideas from NetBSD's alpha pmap
  * $FreeBSD$
  */
 
 /*
  *	Manages physical address maps.
  *
  *	In addition to hardware address maps, this
  *	module is called upon to provide software-use-only
  *	maps which may or may not be stored in the same
  *	form as hardware maps.  These pseudo-maps are
  *	used to store intermediate results from copy
  *	operations to and from address spaces.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 /*
  * Following the Linux model, region IDs are allocated in groups of
  * eight so that a single region ID can be used for as many RRs as we
  * want by encoding the RR number into the low bits of the ID.
  *
  * We reserve region ID 0 for the kernel and allocate the remaining
  * IDs for user pmaps.
  *
  * Region 0..4
  *	User virtually mapped
  *
  * Region 5
  *	Kernel virtually mapped
  *
  * Region 6
  *	Kernel physically mapped uncacheable
  *
  * Region 7
  *	Kernel physically mapped cacheable
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/msgbuf.h>
 #include <sys/vmmeter.h>
 #include <sys/mman.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_zone.h>
 
 #include <sys/user.h>
 
 #include <machine/md_var.h>
 
 MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
 
 #ifndef PMAP_SHPGPERPROC
 #define PMAP_SHPGPERPROC 200
 #endif
 
 #if defined(DIAGNOSTIC)
 #define PMAP_DIAGNOSTIC
 #endif
 
 #define MINPV 2048
 
 #if 0
 #define PMAP_DIAGNOSTIC
 #define PMAP_DEBUG
 #endif
 
 #if !defined(PMAP_DIAGNOSTIC)
 #define PMAP_INLINE __inline
 #else
 #define PMAP_INLINE
 #endif
 
 #if 0
 
 static void
 pmap_break(void)
 {
 }
 
 /* #define PMAP_DEBUG_VA(va) if ((va) == 0x120058000) pmap_break(); else */
 
 #endif
 
 #ifndef PMAP_DEBUG_VA
 #define PMAP_DEBUG_VA(va) do {} while(0)
 #endif
 
 /*
  * Get PDEs and PTEs for user/kernel address space
  */
 #define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
 #define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
 #define pmap_pte_v(pte)		((pte)->pte_p)
 #define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
 #define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
 
 #define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
 				:((pte)->pte_ig &= ~PTE_IG_WIRED))
 #define pmap_pte_set_prot(pte, v) do {		\
     (pte)->pte_ar = v >> 2;			\
     (pte)->pte_pl = v & 3;			\
 } while (0)
 
 /*
  * Given a map and a machine independent protection code,
  * convert to an ia64 protection code.
  */
 #define pte_prot(m, p)		(protection_codes[m == pmap_kernel() ? 0 : 1][p])
 int	protection_codes[2][8];
 
 /*
  * Return non-zero if this pmap is currently active
  */
 #define pmap_isactive(pmap)	(pmap->pm_active)
 
 /*
  * Statically allocated kernel pmap
  */
 static struct pmap kernel_pmap_store;
 pmap_t kernel_pmap;
 
 vm_offset_t avail_start;	/* PA of first available physical page */
 vm_offset_t avail_end;		/* PA of last available physical page */
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
 
 
 vm_offset_t kernel_vm_end;
 
 /*
  * Values for ptc.e. XXX values for SKI.
  */
 static u_int64_t pmap_pte_e_base = 0x100000000;
 static u_int64_t pmap_pte_e_count1 = 3;
 static u_int64_t pmap_pte_e_count2 = 2;
 static u_int64_t pmap_pte_e_stride1 = 0x2000;
 static u_int64_t pmap_pte_e_stride2 = 0x100000000;
 
 /*
  * Data for the RID allocator
  */
 static int pmap_nextrid;
 static int pmap_ridbits = 18;
 
 /*
  * Data for the pv entry allocation mechanism
  */
 static vm_zone_t pvzone;
 static struct vm_zone pvzone_store;
 static struct vm_object pvzone_obj;
 static vm_zone_t pvbootzone;
 static struct vm_zone pvbootzone_store;
 static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
 static int pmap_pagedaemon_waken = 0;
 static struct pv_entry *pvinit;
 static struct pv_entry *pvbootinit;
 
 static PMAP_INLINE void	free_pv_entry __P((pv_entry_t pv));
 static pv_entry_t get_pv_entry __P((void));
 static void	ia64_protection_init __P((void));
 
 static void	pmap_remove_all __P((vm_page_t m));
 static void	pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, vm_page_t m));
 
 vm_offset_t
 pmap_steal_memory(vm_size_t size)
 {
 	vm_size_t bank_size;
 	vm_offset_t pa, va;
 
 	size = round_page(size);
 
 	bank_size = phys_avail[1] - phys_avail[0];
 	while (size > bank_size) {
 		int i;
 		for (i = 0; phys_avail[i+2]; i+= 2) {
 			phys_avail[i] = phys_avail[i+2];
 			phys_avail[i+1] = phys_avail[i+3];
 		}
 		phys_avail[i] = 0;
 		phys_avail[i+1] = 0;
 		if (!phys_avail[0])
 			panic("pmap_steal_memory: out of memory");
 		bank_size = phys_avail[1] - phys_avail[0];
 	}
 
 	pa = phys_avail[0];
 	phys_avail[0] += size;
 
 	va = IA64_PHYS_TO_RR7(pa);
 	bzero((caddr_t) va, size);
 	return va;
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  */
 void
 pmap_bootstrap()
 {
 	int i;
 	int boot_pvs;
 
 	/*
 	 * Setup RIDs. We use the bits above pmap_ridbits for a
 	 * generation counter, saving generation zero for
 	 * 'invalid'. RIDs 0..7 are reserved for the kernel.
 	 */
 	pmap_nextrid = (1 << pmap_ridbits) + 8;
 
 	avail_start = phys_avail[0];
 	for (i = 0; phys_avail[i+2]; i+= 2) ;
 	avail_end = phys_avail[i+1];
 
 	virtual_avail = IA64_RR_BASE(5);
 	virtual_end = IA64_RR_BASE(6)-1;
 
 	/*
 	 * Initialize protection array.
 	 */
 	ia64_protection_init();
 
 	/*
 	 * The kernel's pmap is statically allocated so we don't have to use
 	 * pmap_create, which is unlikely to work correctly at this part of
 	 * the boot sequence (XXX and which no longer exists).
 	 */
 	kernel_pmap = &kernel_pmap_store;
 	kernel_pmap->pm_rid = 0;
 	kernel_pmap->pm_count = 1;
 	kernel_pmap->pm_active = 1;
 	TAILQ_INIT(&kernel_pmap->pm_pvlist);
 
 	/*
 	 * Region 5 is mapped via the vhpt.
 	 */
 	ia64_set_rr(IA64_RR_BASE(5),
 		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
 
 	/*
 	 * Region 6 is direct mapped UC and region 7 is direct mapped
 	 * WC. The details of this is controlled by the Alt {I,D}TLB
 	 * handlers. Here we just make sure that they have the largest 
 	 * possible page size to minimise TLB usage.
 	 */
 	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
 	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
 			 
 	/*
 	 * We need some PVs to cope with pmap_kenter() calls prior to
 	 * pmap_init(). This is all a bit flaky and needs to be
 	 * rethought, probably by avoiding the zone allocator
 	 * entirely.
 	 */
   	boot_pvs = 32768;
 	pvbootzone = &pvbootzone_store;
 	pvbootinit = (struct pv_entry *)
 		pmap_steal_memory(boot_pvs * sizeof (struct pv_entry));
 	zbootinit(pvbootzone, "PV ENTRY", sizeof (struct pv_entry),
 		  pvbootinit, boot_pvs);
 
 	/*
 	 * Set up proc0's PCB.
 	 */
 #if 0
 	proc0.p_addr->u_pcb.pcb_hw.apcb_asn = 0;
 #endif
 }
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  *	pmap_init has been enhanced to support in a fairly consistant
  *	way, discontiguous physical memory.
  */
 void
 pmap_init(phys_start, phys_end)
 	vm_offset_t phys_start, phys_end;
 {
 	int i;
 	int initial_pvs;
 
 	/*
 	 * Allocate memory for random pmap data structures.  Includes the
 	 * pv_head_table.
 	 */
 
 	for(i = 0; i < vm_page_array_size; i++) {
 		vm_page_t m;
 
 		m = &vm_page_array[i];
 		TAILQ_INIT(&m->md.pv_list);
 		m->md.pv_list_count = 0;
  	}
 
 	/*
 	 * init the pv free list
 	 */
 	initial_pvs = vm_page_array_size;
 	if (initial_pvs < MINPV)
 		initial_pvs = MINPV;
 	pvzone = &pvzone_store;
 	pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
 		initial_pvs * sizeof (struct pv_entry));
 	zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit,
 		  vm_page_array_size);
 
 	/*
 	 * Now it is safe to enable pv_table recording.
 	 */
 	pmap_initialized = TRUE;
 }
 
 /*
  * Initialize the address space (zone) for the pv_entries.  Set a
  * high water mark so that the system can recover from excessive
  * numbers of pv entries.
  */
 void
 pmap_init2()
 {
 	pv_entry_max = PMAP_SHPGPERPROC * maxproc + vm_page_array_size;
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
 }
 
 
 /***************************************************
  * Manipulate TLBs for a pmap
  ***************************************************/
 
 static void
 pmap_invalidate_rid(pmap_t pmap)
 {
 	KASSERT(pmap != kernel_pmap,
 		("changing kernel_pmap's RID"));
 	KASSERT(pmap == PCPU_GET(current_pmap),
 		("invalidating RID of non-current pmap"));
 	pmap_remove_pages(pmap, IA64_RR_BASE(0), IA64_RR_BASE(5));
 	pmap->pm_rid = 0;
 }
 
 static void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 	KASSERT(pmap == PCPU_GET(current_pmap),
 		("invalidating TLB for non-current pmap"));
 	ia64_ptc_l(va, PAGE_SHIFT << 2);
 }
 
 static void
 pmap_invalidate_all(pmap_t pmap)
 {
 	u_int64_t addr;
 	int i, j;
 	u_int32_t psr;
 
 	KASSERT(pmap == PCPU_GET(current_pmap),
 		("invalidating TLB for non-current pmap"));
 
 	psr = save_intr();
 	disable_intr();
 	addr = pmap_pte_e_base;
 	for (i = 0; i < pmap_pte_e_count1; i++) {
 		for (j = 0; j < pmap_pte_e_count2; j++) {
 			ia64_ptc_e(addr);
 			addr += pmap_pte_e_stride2;
 		}
 		addr += pmap_pte_e_stride1;
 	}
 	restore_intr(psr);
 }
 
 static void
 pmap_get_rid(pmap_t pmap)
 {
 	if ((pmap_nextrid & ((1 << pmap_ridbits) - 1)) == 0) {
 		/*
 		 * Start a new ASN generation.
 		 *
 		 * Invalidate all per-process mappings and I-cache
 		 */
 		pmap_nextrid += 8;
 
 		/*
 		 * Since we are about to start re-using ASNs, we must
 		 * clear out the TLB.
 		 * with the ASN.
 		 */
 #if 0
 		IA64_TBIAP();
 		ia64_pal_imb();	/* XXX overkill? */
 #endif
 	}
 	pmap->pm_rid = pmap_nextrid;
 	pmap_nextrid += 8;
 }
 
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
 
 /*
  * Install a pte into the VHPT
  */
 static PMAP_INLINE void
 pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
 {
 	u_int64_t *vhp, *p;
 
 	/* invalidate the pte */
 	atomic_set_64(&vhpte->pte_tag, 1L << 63);
 	ia64_mf();			/* make sure everyone sees */
 
 	vhp = (u_int64_t *) vhpte;
 	p = (u_int64_t *) pte;
 
 	vhp[0] = p[0];
 	vhp[1] = p[1];
 	vhp[2] = p[2];			/* sets ti to one */
 
 	ia64_mf();
 }
 
 /*
  * Compare essential parts of pte.
  */
 static PMAP_INLINE int
 pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
 {
 	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
 }
 
 /*
  * this routine defines the region(s) of memory that should
  * not be tested for the modified bit.
  */
 static PMAP_INLINE int
 pmap_track_modified(vm_offset_t va)
 {
 	if ((va < clean_sva) || (va >= clean_eva)) 
 		return 1;
 	else
 		return 0;
 }
 
 /*
  * Create the UPAGES for a new process.
  * This routine directly affects the fork perf for a process.
  */
 void
 pmap_new_proc(struct proc *p)
 {
 	struct user *up;
 
 	/*
 	 * Use contigmalloc for user area so that we can use a region
 	 * 7 address for it which makes it impossible to accidentally
 	 * lose when recording a trapframe.
 	 */
 	up = contigmalloc(UPAGES * PAGE_SIZE, M_PMAP,
 			  M_WAITOK,
 			  0ul,
 			  256*1024*1024 - 1,
 			  PAGE_SIZE,
 			  256*1024*1024);
 
 	p->p_md.md_uservirt = up;
 	p->p_addr = (struct user *)
 		IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t) up));
 }
 
 /*
  * Dispose the UPAGES for a process that has exited.
  * This routine directly impacts the exit perf of a process.
  */
 void
 pmap_dispose_proc(p)
 	struct proc *p;
 {
 	contigfree(p->p_md.md_uservirt, UPAGES * PAGE_SIZE, M_PMAP);
 	p->p_md.md_uservirt = 0;
 	p->p_addr = 0;
 }
 
 /*
  * Allow the UPAGES for a process to be prejudicially paged out.
  */
 void
 pmap_swapout_proc(p)
 	struct proc *p;
 {
 #if 0
 	int i;
 	vm_object_t upobj;
 	vm_page_t m;
 
 	/*
 	 * Make sure we aren't fpcurproc.
 	 */
 	ia64_fpstate_save(p, 1);
 
 	upobj = p->p_upages_obj;
 	/*
 	 * let the upages be paged
 	 */
 	for(i=0;i<UPAGES;i++) {
 		if ((m = vm_page_lookup(upobj, i)) == NULL)
 			panic("pmap_swapout_proc: upage already missing???");
 		vm_page_dirty(m);
 		vm_page_unwire(m, 0);
 		pmap_kremove((vm_offset_t)p->p_addr + PAGE_SIZE * i);
 	}
 #endif
 }
 
 /*
  * Bring the UPAGES for a specified process back in.
  */
 void
 pmap_swapin_proc(p)
 	struct proc *p;
 {
 #if 0
 	int i,rv;
 	vm_object_t upobj;
 	vm_page_t m;
 
 	upobj = p->p_upages_obj;
 	for(i=0;i<UPAGES;i++) {
 
 		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
 		pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
 			VM_PAGE_TO_PHYS(m));
 
 		if (m->valid != VM_PAGE_BITS_ALL) {
 			rv = vm_pager_get_pages(upobj, &m, 1, 0);
 			if (rv != VM_PAGER_OK)
 				panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
 			m = vm_page_lookup(upobj, i);
 			m->valid = VM_PAGE_BITS_ALL;
 		}
 
 		vm_page_wire(m);
 		vm_page_wakeup(m);
 		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
 	}
 #endif
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 
 void
 pmap_pinit0(pmap)
 	struct pmap *pmap;
 {
 	/*
 	 * kernel_pmap is the same as any other pmap.
 	 */
 	pmap_pinit(pmap);
 	pmap->pm_flags = 0;
 	pmap->pm_rid = 0;
 	pmap->pm_count = 1;
 	pmap->pm_ptphint = NULL;
 	pmap->pm_active = 0;
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 void
 pmap_pinit(pmap)
 	register struct pmap *pmap;
 {
 	pmap->pm_flags = 0;
 	pmap->pm_rid = 0;
 	pmap->pm_count = 1;
 	pmap->pm_ptphint = NULL;
 	pmap->pm_active = 0;
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 }
 
 /*
  * Wire in kernel global address entries.  To avoid a race condition
  * between pmap initialization and pmap_growkernel, this procedure
  * should be called after the vmspace is attached to the process
  * but before this pmap is activated.
  */
 void
 pmap_pinit2(pmap)
 	struct pmap *pmap;
 {
 }
 
 /***************************************************
 * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pmap)
 {
 #if defined(DIAGNOSTIC)
 	if (object->ref_count != 1)
 		panic("pmap_release: pteobj reference count != 1");
 #endif
 }
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 }
 
 /*
  *	Retire the given physical map from service.
  *	Should only be called if the map contains
  *	no valid mappings.
  */
 void
 pmap_destroy(pmap_t pmap)
 {
 	int count;
 
 	if (pmap == NULL)
 		return;
 
 	count = --pmap->pm_count;
 	if (count == 0) {
 		pmap_release(pmap);
 		panic("destroying a pmap is not yet implemented");
 	}
 }
 
 /*
  *	Add a reference to the specified pmap.
  */
 void
 pmap_reference(pmap_t pmap)
 {
 	if (pmap != NULL) {
 		pmap->pm_count++;
 	}
 }
 
 /***************************************************
 * page management routines.
  ***************************************************/
 
 /*
  * free the pv_entry back to the free list
  */
 static PMAP_INLINE void
 free_pv_entry(pv_entry_t pv)
 {
 	pv_entry_count--;
-	zfreei(pvzone, pv);
+	zfree(pvzone, pv);
 }
 
 /*
  * get a new pv_entry, allocating a block from the system
  * when needed.
  * the memory allocation is performed bypassing the malloc code
  * because of the possibility of allocations at interrupt time.
  */
 static pv_entry_t
 get_pv_entry(void)
 {
 	if (!pvinit)
-		return zalloci(pvbootzone);
+		return zalloc(pvbootzone);
 
 	pv_entry_count++;
 	if (pv_entry_high_water &&
 		(pv_entry_count > pv_entry_high_water) &&
 		(pmap_pagedaemon_waken == 0)) {
 		pmap_pagedaemon_waken = 1;
 		wakeup (&vm_pages_needed);
 	}
-	return (pv_entry_t) IA64_PHYS_TO_RR7(vtophys(zalloci(pvzone)));
+	return (pv_entry_t) IA64_PHYS_TO_RR7(vtophys(zalloc(pvzone)));
 }
 
 /*
  * Add a pv_entry to the VHPT.
  */
 static void
 pmap_enter_vhpt(pv_entry_t pv)
 {
 	struct ia64_lpte *vhpte;
 
 	vhpte = (struct ia64_lpte *) ia64_thash(pv->pv_va);
 
 	pv->pv_pte.pte_chain = vhpte->pte_chain;
 	vhpte->pte_chain = ia64_tpa((vm_offset_t) pv);
 
 	if (!vhpte->pte_p && pv->pv_pte.pte_p)
 		pmap_install_pte(vhpte, &pv->pv_pte);
 	else
 		ia64_mf();
 }
 
 /*
  * Update VHPT after pv->pv_pte has changed.
  */
 static void
 pmap_update_vhpt(pv_entry_t pv)
 {
 	struct ia64_lpte *vhpte;
 
 	vhpte = (struct ia64_lpte *) ia64_thash(pv->pv_va);
 
 	if ((!vhpte->pte_p || vhpte->pte_tag == pv->pv_pte.pte_tag)
 	    && pv->pv_pte.pte_p)
 		pmap_install_pte(vhpte, &pv->pv_pte);
 }
 
 /*
  * Remove a pv_entry from the VHPT. Return true if it worked.
  */
 static int
 pmap_remove_vhpt(pv_entry_t pv)
 {
 	struct ia64_lpte *pte;
 	struct ia64_lpte *lpte;
 	struct ia64_lpte *vhpte;
 	u_int64_t tag;
 
 	vhpte = (struct ia64_lpte *) ia64_thash(pv->pv_va);
 
 	/*
 	 * If the VHPTE is invalid, there can't be a collision chain.
 	 */
 	if (!vhpte->pte_p) {
 		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
 		return 0;
 	}
 
 	lpte = vhpte;
 	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(vhpte->pte_chain);
 	tag = ia64_ttag(pv->pv_va);
 
 	while (pte->pte_tag != tag) {
 		lpte = pte;
 		if (pte->pte_chain)
 			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
 		else
 			return 0; /* error here? */
 	}
 
 	/*
 	 * Snip this pv_entry out of the collision chain.
 	 */
 	lpte->pte_chain = pte->pte_chain;
 
 	/*
 	 * If the VHPTE matches as well, change it to map the first
 	 * element from the chain if there is one.
 	 */
 	if (vhpte->pte_tag == tag) {
 		if (vhpte->pte_chain) {
 			pte = (struct ia64_lpte *)
 				IA64_PHYS_TO_RR7(vhpte->pte_chain);
 			pmap_install_pte(vhpte, pte);
 		} else {
 			vhpte->pte_p = 0;
 			ia64_mf();
 		}
 	}
 
 	return 1;
 }
 
 /*
  * Make a pv_entry_t which maps the given virtual address. The pte
  * will be initialised with pte_p = 0. The function pmap_set_pv()
  * should be called to change the value of the pte.
  * Must be called at splvm().
  */
 static pv_entry_t
 pmap_make_pv(pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	pv = get_pv_entry();
 	bzero(pv, sizeof(*pv));
 	pv->pv_va = va;
 	pv->pv_pmap = pmap;
 
 	pv->pv_pte.pte_p = 0;		/* invalid for now */
 	pv->pv_pte.pte_ma = PTE_MA_WB;	/* cacheable, write-back */
 	pv->pv_pte.pte_a = 0;
 	pv->pv_pte.pte_d = 0;
 	pv->pv_pte.pte_pl = 0;		/* privilege level 0 */
 	pv->pv_pte.pte_ar = 3;		/* read/write/execute */
 	pv->pv_pte.pte_ppn = 0;		/* physical address */
 	pv->pv_pte.pte_ed = 0;
 	pv->pv_pte.pte_ig = 0;
 
 	pv->pv_pte.pte_ps = PAGE_SHIFT;	/* page size */
 	pv->pv_pte.pte_key = 0;		/* protection key */
 
 	pv->pv_pte.pte_tag = ia64_ttag(va);
 
 	pmap_enter_vhpt(pv);
 
 	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
 	pmap->pm_stats.resident_count++;
 
 	return pv;
 }
 
 /*
  * Initialise a pv_entry_t with a given physical address and
  * protection code. If the passed vm_page_t is non-zero, the entry is
  * added to its list of mappings.
  * Must be called at splvm().
  */
 static void
 pmap_set_pv(pmap_t pmap, pv_entry_t pv, vm_offset_t pa,
 	    int prot, vm_page_t m)
 {
 	if (pv->pv_pte.pte_p && pv->pv_pte.pte_ig & PTE_IG_MANAGED) {
 		vm_offset_t opa = pv->pv_pte.pte_ppn << 12;
 		vm_page_t om = PHYS_TO_VM_PAGE(opa);
 
 		TAILQ_REMOVE(&om->md.pv_list, pv, pv_list);
 		om->md.pv_list_count--;
 
 		if (TAILQ_FIRST(&om->md.pv_list) == NULL)
 			vm_page_flag_clear(om, PG_MAPPED | PG_WRITEABLE);
 	}
 
 	pv->pv_pte.pte_p = 1;		/* set to valid */
 
 	/*
 	 * Only track access/modify for managed pages.
 	 */
 	if (m) {
 		pv->pv_pte.pte_a = 0;
 		pv->pv_pte.pte_d = 0;
 	} else {
 		pv->pv_pte.pte_a = 1;
 		pv->pv_pte.pte_d = 1;
 	}
 
 	pv->pv_pte.pte_pl = prot & 3;	/* privilege level */
 	pv->pv_pte.pte_ar = prot >> 2;	/* access rights */
 	pv->pv_pte.pte_ppn = pa >> 12;	/* physical address */
 
 	if (m) {
 		pv->pv_pte.pte_ig |= PTE_IG_MANAGED;
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count++;
 	}
 
 	/*
 	 * Update the VHPT entry if it needs to change.
 	 */
 	pmap_update_vhpt(pv);
 }
 	
 /*
  * Remove a mapping represented by a particular pv_entry_t. If the
  * passed vm_page_t is non-zero, then the entry is removed from it.
  * Must be called at splvm().
  */
 static int
 pmap_remove_pv(pmap_t pmap, pv_entry_t pv, vm_page_t m)
 {
 	int rtval;
 
 	/*
 	 * First remove from the VHPT.
 	 */
 	rtval = pmap_remove_vhpt(pv);
 	if (!rtval)
 		return rtval;
 
 	if ((pv->pv_pte.pte_ig & PTE_IG_MANAGED) && m) {
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count--;
 
 		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 	}
 
 	TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 	pmap->pm_stats.resident_count--;
 
 	free_pv_entry(pv);
 
 	return (rtval);
 }
 
 /*
  * Find a pv given a pmap and virtual address.
  */
 static pv_entry_t
 pmap_find_pv(pmap_t pmap, vm_offset_t va)
 {
 	struct ia64_lpte *pte;
 	u_int64_t tag;
 
 	pte = (struct ia64_lpte *) ia64_thash(va);
 	if (!pte->pte_chain)
 		return 0;
 
 	tag = ia64_ttag(va);
 	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
 
 	while (pte->pte_tag != tag) {
 		if (pte->pte_chain)
 			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
 		else
 			return 0;
 	}
 
 	return (pv_entry_t) pte;	/* XXX wrong va */
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_offset_t 
 pmap_extract(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	pv_entry_t pv = pmap_find_pv(pmap, va);
 	if (pv)
 		return pmap_pte_pa(&pv->pv_pte);
 	else
 		return 0;
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  */
 void
 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
 {
 	int i, inval;
 	pv_entry_t pv;
 
 	for (i = 0; i < count; i++) {
 		vm_offset_t tva = va + i * PAGE_SIZE;
 		pv = pmap_find_pv(kernel_pmap, tva);
 		inval = 0;
 		if (!pv)
 			pv = pmap_make_pv(kernel_pmap, tva);
 		else
 			inval = 1;
 
 		PMAP_DEBUG_VA(va);
 		pmap_set_pv(kernel_pmap, pv,
 			    VM_PAGE_TO_PHYS(m[i]),
 			    (PTE_AR_RWX<<2) | PTE_PL_KERN, 0);
 		if (inval)
 			pmap_invalidate_page(kernel_pmap, tva);
 	}
 }
 
 /*
  * this routine jerks page mappings from the
  * kernel -- it is meant only for temporary mappings.
  */
 void
 pmap_qremove(va, count)
 	vm_offset_t va;
 	int count;
 {
 	int i;
 	pv_entry_t pv;
 
 	for (i = 0; i < count; i++) {
 		pv = pmap_find_pv(kernel_pmap, va);
 		PMAP_DEBUG_VA(va);
 		if (pv) {
 			pmap_remove_pv(kernel_pmap, pv, 0);
 			pmap_invalidate_page(kernel_pmap, va);
 		}
 		va += PAGE_SIZE;
 	}
 }
 
 /*
  * Add a wired page to the kva.
  */
 void 
 pmap_kenter(vm_offset_t va, vm_offset_t pa)
 {
 	pv_entry_t pv;
 
 	pv = pmap_find_pv(kernel_pmap, va);
 	if (!pv)
 		pv = pmap_make_pv(kernel_pmap, va);
 	pmap_set_pv(kernel_pmap, pv,
 		    pa, (PTE_AR_RWX<<2) | PTE_PL_KERN, 0);
 	pmap_invalidate_page(kernel_pmap, va);
 }
 
 /*
  * Remove a page from the kva
  */
 void
 pmap_kremove(vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	pv = pmap_find_pv(kernel_pmap, va);
 	if (pv) {
 		pmap_remove_pv(kernel_pmap, pv, 0);
 		pmap_invalidate_page(kernel_pmap, va);
 	}
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	For now, VM is already on, we only need to map the
  *	specified memory.
  */
 vm_offset_t
 pmap_map(vm_offset_t virt, vm_offset_t start, vm_offset_t end, int prot)
 {
 	/*
 	 * XXX We should really try to use larger pagesizes here to
 	 * cut down the number of PVs used.
 	 */
 	while (start < end) {
 		pmap_kenter(virt, start);
 		virt += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
 	return (virt);
 }
 
 /*
  * This routine is very drastic, but can save the system
  * in a pinch.
  */
 void
 pmap_collect()
 {
 	int i;
 	vm_page_t m;
 	static int warningdone=0;
 
 	if (pmap_pagedaemon_waken == 0)
 		return;
 
 	if (warningdone < 5) {
 		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
 		warningdone++;
 	}
 
 	for(i = 0; i < vm_page_array_size; i++) {
 		m = &vm_page_array[i];
 		if (m->wire_count || m->hold_count || m->busy ||
 		    (m->flags & PG_BUSY))
 			continue;
 		pmap_remove_all(m);
 	}
 	pmap_pagedaemon_waken = 0;
 }
 
 /*
  * Remove a single page from a process address space
  */
 static void
 pmap_remove_page(pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 	vm_page_t m;
 	int rtval;
 	int s;
 
 	s = splvm();
 
 	pv = pmap_find_pv(pmap, va);
 
 	rtval = 0;
 	if (pv) {
 		m = PHYS_TO_VM_PAGE(pmap_pte_pa(&pv->pv_pte));
 		rtval = pmap_remove_pv(pmap, pv, m);
 		pmap_invalidate_page(pmap, va);
 	}
 			
 	splx(s);
 	return;
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t va, nva;
 
 	if (pmap == NULL)
 		return;
 
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if (sva + PAGE_SIZE == eva) {
 		pmap_remove_page(pmap, sva);
 		return;
 	}
 
 	if (atop(eva - sva) < pmap->pm_stats.resident_count) {
 		for (va = sva; va < eva; va = nva) {
 			pmap_remove_page(pmap, va);
 			nva = va + PAGE_SIZE;
 		}
 	} else {
 		pv_entry_t pv, pvnext;
 		int s;
 
 		s = splvm();
 		for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
 			pv;
 			pv = pvnext) {
 			pvnext = TAILQ_NEXT(pv, pv_plist);
 			if (pv->pv_va >= sva && pv->pv_va < eva) {
 				vm_page_t m = PHYS_TO_VM_PAGE(pmap_pte_pa(&pv->pv_pte));
 				va = pv->pv_va;
 				pmap_remove_pv(pmap, pv, m);
 				pmap_invalidate_page(pmap, va);
 			}
 		}
 		splx(s);
 	}
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 static void
 pmap_remove_all(vm_page_t m)
 {
 	register pv_entry_t pv;
 	int nmodify;
 	int s;
 
 	nmodify = 0;
 #if defined(PMAP_DIAGNOSTIC)
 	/*
 	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
 	 * pages!
 	 */
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
 		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
 	}
 #endif
 
 	s = splvm();
 
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		vm_page_t m = PHYS_TO_VM_PAGE(pmap_pte_pa(&pv->pv_pte));
 		vm_offset_t va = pv->pv_va;
 		pmap_remove_pv(pv->pv_pmap, pv, m);
 		pmap_invalidate_page(pv->pv_pmap, va);
 	}
 
 	vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 
 	splx(s);
 	return;
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	pmap_t oldpmap;
 	pv_entry_t pv;
 	int newprot;
 
 	if (pmap == NULL)
 		return;
 
 	oldpmap = pmap_install(pmap);
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		pmap_install(oldpmap);
 		return;
 	}
 
 	if (prot & VM_PROT_WRITE) {
 		pmap_install(oldpmap);
 		return;
 	}
 
 	newprot = pte_prot(pmap, prot);
 
 	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
 		panic("pmap_protect: unaligned addresses");
 
 	while (sva < eva) {
 		/* 
 		 * If page is invalid, skip this page
 		 */
 		pv = pmap_find_pv(pmap, sva);
 		if (!pv) {
 			sva += PAGE_SIZE;
 			continue;
 		}
 
 		if (pmap_pte_prot(&pv->pv_pte) != newprot) {
 			pmap_pte_set_prot(&pv->pv_pte, newprot);
 			pmap_update_vhpt(pv);
 			pmap_invalidate_page(pmap, sva);
 		}
 
 		sva += PAGE_SIZE;
 	}
 	pmap_install(oldpmap);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 void
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 	   boolean_t wired)
 {
 	pmap_t oldpmap;
 	vm_offset_t pa;
 	pv_entry_t pv;
 	vm_offset_t opa;
 	struct ia64_lpte origpte;
 	int managed;
 
 	if (pmap == NULL)
 		return;
 
 	oldpmap = pmap_install(pmap);
 
 	va &= ~PAGE_MASK;
 #ifdef PMAP_DIAGNOSTIC
 	if (va > VM_MAX_KERNEL_ADDRESS)
 		panic("pmap_enter: toobig");
 #endif
 
 	pv = pmap_find_pv(pmap, va);
 	if (!pv)
 		pv = pmap_make_pv(pmap, va);
 
 	origpte = pv->pv_pte;
 	if (origpte.pte_p)
 		opa = pmap_pte_pa(&origpte);
 	else
 		opa = 0;
 
 	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
 	managed = 0;
 
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
 	if (origpte.pte_p && (opa == pa)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
 			pmap->pm_stats.wired_count++;
 		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
 			pmap->pm_stats.wired_count--;
 
 		managed = origpte.pte_ig & PTE_IG_MANAGED;
 		goto validate;
 	}  else {
 		/*
 		 * Mapping has changed, invalidate old range and fall
 		 * through to handle validating new mapping.
 		 */
 	}
 
 	/*
 	 * Increment counters
 	 */
 	if (wired)
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 * This enters the pv_entry_t on the page's list if necessary.
 	 */
 	pmap_set_pv(pmap, pv, pa, pte_prot(pmap, prot), m);
 
 	if (wired)
 		pv->pv_pte.pte_ig |= PTE_IG_WIRED;
 
 	/*
 	 * if the mapping or permission bits are different, we need
 	 * to invalidate the page.
 	 */
 	if (!pmap_equal_pte(&origpte, &pv->pv_pte)) {
 		PMAP_DEBUG_VA(va);
 		pmap_invalidate_page(pmap, va);
 	}
 
 	pmap_install(oldpmap);
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * 5. Tlbflush is deferred to calling procedure.
  * 6. Page IS managed.
  * but is *MUCH* faster than pmap_enter...
  */
 
 static void
 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	pv_entry_t pv;
 	int s;
 
 	s = splvm();
 
 	pv = pmap_find_pv(pmap, va);
 	if (!pv)
 		pv = pmap_make_pv(pmap, va);
 
 	/*
 	 * Enter on the PV list if part of our managed memory. Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	PMAP_DEBUG_VA(va);
 	pmap_set_pv(pmap, pv, VM_PAGE_TO_PHYS(m),
 		    (PTE_AR_R << 2) | PTE_PL_USER, m);
 
 	splx(s);
 }
 
 /*
  * Make temporary mapping for a physical address. This is called
  * during dump.
  */
 void *
 pmap_kenter_temporary(vm_offset_t pa, int i)
 {
 	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
 }
 
 #define MAX_INIT_PT (96)
 /*
  * pmap_object_init_pt preloads the ptes for a given object
  * into the specified pmap.  This eliminates the blast of soft
  * faults on process startup and immediately after an mmap.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
 		    vm_object_t object, vm_pindex_t pindex,
 		    vm_size_t size, int limit)
 {
 	pmap_t oldpmap;
 	vm_offset_t tmpidx;
 	int psize;
 	vm_page_t p;
 	int objpgs;
 
 	if (pmap == NULL || object == NULL)
 		return;
 
 	oldpmap = pmap_install(pmap);
 
 	psize = ia64_btop(size);
 
 	if ((object->type != OBJT_VNODE) ||
 		(limit && (psize > MAX_INIT_PT) &&
 			(object->resident_page_count > MAX_INIT_PT))) {
 		pmap_install(oldpmap);
 		return;
 	}
 
 	if (psize + pindex > object->size)
 		psize = object->size - pindex;
 
 	/*
 	 * if we are processing a major portion of the object, then scan the
 	 * entire thing.
 	 */
 	if (psize > (object->resident_page_count >> 2)) {
 		objpgs = psize;
 
 		for (p = TAILQ_FIRST(&object->memq);
 		    ((objpgs > 0) && (p != NULL));
 		    p = TAILQ_NEXT(p, listq)) {
 
 			tmpidx = p->pindex;
 			if (tmpidx < pindex) {
 				continue;
 			}
 			tmpidx -= pindex;
 			if (tmpidx >= psize) {
 				continue;
 			}
 			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 				if ((p->queue - p->pc) == PQ_CACHE)
 					vm_page_deactivate(p);
 				vm_page_busy(p);
 				pmap_enter_quick(pmap,
 						 addr + ia64_ptob(tmpidx), p);
 				vm_page_flag_set(p, PG_MAPPED);
 				vm_page_wakeup(p);
 			}
 			objpgs -= 1;
 		}
 	} else {
 		/*
 		 * else lookup the pages one-by-one.
 		 */
 		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
 			p = vm_page_lookup(object, tmpidx + pindex);
 			if (p &&
 			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 				if ((p->queue - p->pc) == PQ_CACHE)
 					vm_page_deactivate(p);
 				vm_page_busy(p);
 				pmap_enter_quick(pmap,
 						 addr + ia64_ptob(tmpidx), p);
 				vm_page_flag_set(p, PG_MAPPED);
 				vm_page_wakeup(p);
 			}
 		}
 	}
 	pmap_install(oldpmap);
 	return;
 }
 
 /*
  * pmap_prefault provides a quick way of clustering
  * pagefaults into a processes address space.  It is a "cousin"
  * of pmap_object_init_pt, except it runs at page fault time instead
  * of mmap time.
  */
 #define PFBAK 4
 #define PFFOR 4
 #define PAGEORDER_SIZE (PFBAK+PFFOR)
 
 static int pmap_prefault_pageorder[] = {
 	-PAGE_SIZE, PAGE_SIZE,
 	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
 	-3 * PAGE_SIZE, 3 * PAGE_SIZE
 	-4 * PAGE_SIZE, 4 * PAGE_SIZE
 };
 
 void
 pmap_prefault(pmap, addra, entry)
 	pmap_t pmap;
 	vm_offset_t addra;
 	vm_map_entry_t entry;
 {
 	int i;
 	vm_offset_t starta;
 	vm_offset_t addr;
 	vm_pindex_t pindex;
 	vm_page_t m, mpte;
 	vm_object_t object;
 
 	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace)))
 		return;
 
 	object = entry->object.vm_object;
 
 	starta = addra - PFBAK * PAGE_SIZE;
 	if (starta < entry->start) {
 		starta = entry->start;
 	} else if (starta > addra) {
 		starta = 0;
 	}
 
 	mpte = NULL;
 	for (i = 0; i < PAGEORDER_SIZE; i++) {
 		vm_object_t lobject;
 		pv_entry_t pv;
 
 		addr = addra + pmap_prefault_pageorder[i];
 		if (addr > addra + (PFFOR * PAGE_SIZE))
 			addr = 0;
 
 		if (addr < starta || addr >= entry->end)
 			continue;
 
 		pv = pmap_find_pv(pmap, addr);
 		if (pv)
 			continue;
 
 		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
 		lobject = object;
 		for (m = vm_page_lookup(lobject, pindex);
 		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
 		    lobject = lobject->backing_object) {
 			if (lobject->backing_object_offset & PAGE_MASK)
 				break;
 			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
 			m = vm_page_lookup(lobject->backing_object, pindex);
 		}
 
 		/*
 		 * give-up when a page is not in memory
 		 */
 		if (m == NULL)
 			break;
 
 		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 
 			if ((m->queue - m->pc) == PQ_CACHE) {
 				vm_page_deactivate(m);
 			}
 			vm_page_busy(m);
 			pmap_enter_quick(pmap, addr, m);
 			vm_page_flag_set(m, PG_MAPPED);
 			vm_page_wakeup(m);
 		}
 	}
 }
 
 /*
  *	Routine:	pmap_change_wiring
  *	Function:	Change the wiring attribute for a map/virtual-address
  *			pair.
  *	In/out conditions:
  *			The mapping must already exist in the pmap.
  */
 void
 pmap_change_wiring(pmap, va, wired)
 	register pmap_t pmap;
 	vm_offset_t va;
 	boolean_t wired;
 {
 	pmap_t oldpmap;
 	pv_entry_t pv;
 
 	if (pmap == NULL)
 		return;
 
 	oldpmap = pmap_install(pmap);
 
 	pv = pmap_find_pv(pmap, va);
 
 	if (wired && !pmap_pte_w(&pv->pv_pte))
 		pmap->pm_stats.wired_count++;
 	else if (!wired && pmap_pte_w(&pv->pv_pte))
 		pmap->pm_stats.wired_count--;
 
 	/*
 	 * Wiring is not a hardware characteristic so there is no need to
 	 * invalidate TLB.
 	 */
 	pmap_pte_set_w(&pv->pv_pte, wired);
 
 	pmap_install(oldpmap);
 }
 
 
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
 	  vm_offset_t src_addr)
 {
 }	
 
 /*
  *	Routine:	pmap_kernel
  *	Function:
  *		Returns the physical map handle for the kernel.
  */
 pmap_t
 pmap_kernel()
 {
 	return (kernel_pmap);
 }
 
 /*
  *	pmap_zero_page zeros the specified hardware page by
  *	mapping it into virtual memory and using bzero to clear
  *	its contents.
  */
 
 void
 pmap_zero_page(vm_offset_t pa)
 {
 	vm_offset_t va = IA64_PHYS_TO_RR7(pa);
 	bzero((caddr_t) va, PAGE_SIZE);
 }
 
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by
  *	mapping it into virtual memory and using bzero to clear
  *	its contents.
  *
  *	off and size must reside within a single page.
  */
 
 void
 pmap_zero_page_area(vm_offset_t pa, int off, int size)
 {
 	vm_offset_t va = IA64_PHYS_TO_RR7(pa);
 	bzero((char *)(caddr_t)va + off, size);
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(vm_offset_t src, vm_offset_t dst)
 {
 	src = IA64_PHYS_TO_RR7(src);
 	dst = IA64_PHYS_TO_RR7(dst);
 	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
 }
 
 
 /*
  *	Routine:	pmap_pageable
  *	Function:
  *		Make the specified pages (by pmap, offset)
  *		pageable (or not) as requested.
  *
  *		A page which is not pageable may not take
  *		a fault; therefore, its page table entry
  *		must remain valid for the duration.
  *
  *		This routine is merely advisory; pmap_enter
  *		will specify that these pages are to be wired
  *		down (or not) as appropriate.
  */
 void
 pmap_pageable(pmap, sva, eva, pageable)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 	boolean_t pageable;
 {
 }
 
 /*
  * this routine returns true if a physical page resides
  * in the given pmap.
  */
 boolean_t
 pmap_page_exists(pmap, m)
 	pmap_t pmap;
 	vm_page_t m;
 {
 	register pv_entry_t pv;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	s = splvm();
 
 	/*
 	 * Not found, check current mappings returning immediately if found.
 	 */
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 		if (pv->pv_pmap == pmap) {
 			splx(s);
 			return TRUE;
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
 #define PMAP_REMOVE_PAGES_CURPROC_ONLY
 /*
  * Remove all pages from specified address space
  * this aids process exit speeds.  Also, this code
  * is special cased for current process only, but
  * can have the more generic (and slightly slower)
  * mode enabled.  This is much faster than pmap_remove
  * in the case of running down an entire address space.
  */
 void
 pmap_remove_pages(pmap, sva, eva)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 {
 	pv_entry_t pv, npv;
 	int s;
 
 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) {
 		printf("warning: pmap_remove_pages called with non-current pmap\n");
 		return;
 	}
 #endif
 
 	s = splvm();
 	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
 		pv;
 		pv = npv) {
 		vm_page_t m;
 
 		npv = TAILQ_NEXT(pv, pv_plist);
 
 		if (pv->pv_va >= eva || pv->pv_va < sva) {
 			continue;
 		}
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 		if (pv->pv_pte.pte_ig & PTE_IG_WIRED) {
 			continue;
 		}
 
 		PMAP_DEBUG_VA(pv->pv_va);
 
 		m = PHYS_TO_VM_PAGE(pmap_pte_pa(&pv->pv_pte));
 		pmap_remove_pv(pmap, pv, m);
 	}
 	splx(s);
 
 	pmap_invalidate_all(pmap);
 }
 
 /*
  *      pmap_page_protect:
  *
  *      Lower the permission for all mappings to a given page.
  */
 void
 pmap_page_protect(vm_page_t m, vm_prot_t prot)
 {
 	pv_entry_t pv;
 
 	if ((prot & VM_PROT_WRITE) != 0)
 		return;
 	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
 		for (pv = TAILQ_FIRST(&m->md.pv_list);
 		     pv;
 		     pv = TAILQ_NEXT(pv, pv_list)) {
 			int newprot = pte_prot(pv->pv_pmap, prot);
 			pmap_t oldpmap = pmap_install(pv->pv_pmap);
 			pmap_pte_set_prot(&pv->pv_pte, newprot);
 			pmap_update_vhpt(pv);
 			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 			pmap_install(oldpmap);
 		}
 	} else {
 		pmap_remove_all(m);
 	}
 }
 
 vm_offset_t
 pmap_phys_address(ppn)
 	int ppn;
 {
 	return (ia64_ptob(ppn));
 }
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return the count of reference bits for a page, clearing all of them.
  *	
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	pv_entry_t pv;
 	int count = 0;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return 0;
 
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 		if (pv->pv_pte.pte_a) {
 			pmap_t oldpmap = pmap_install(pv->pv_pmap);
 			count++;
 			pv->pv_pte.pte_a = 0;
 			pmap_update_vhpt(pv);
 			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 			pmap_install(oldpmap);
 		}
 	}
 
 	return count;
 }
 
 #if 0
 /*
  *	pmap_is_referenced:
  *
  *	Return whether or not the specified physical page was referenced
  *	in any physical maps.
  */
 static boolean_t
 pmap_is_referenced(vm_page_t m)
 {
 	pv_entry_t pv;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 		if (pv->pv_pte.pte_a) {
 			return 1;
 		}
 	}
 
 	return 0;
 }
 #endif
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 	pv_entry_t pv;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 		if (pv->pv_pte.pte_d) {
 			return 1;
 		}
 	}
 
 	return 0;
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	pv_entry_t pv;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return;
 
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 		if (pv->pv_pte.pte_d) {
 			pmap_t oldpmap = pmap_install(pv->pv_pmap);
 			pv->pv_pte.pte_d = 0;
 			pmap_update_vhpt(pv);
 			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 			pmap_install(oldpmap);
 		}
 	}
 }
 
 /*
  *	pmap_clear_reference:
  *
  *	Clear the reference bit on the specified physical page.
  */
 void
 pmap_clear_reference(vm_page_t m)
 {
 	pv_entry_t pv;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return;
 
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 		if (pv->pv_pte.pte_a) {
 			pmap_t oldpmap = pmap_install(pv->pv_pmap);
 			pv->pv_pte.pte_a = 0;
 			pmap_update_vhpt(pv);
 			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 			pmap_install(oldpmap);
 		}
 	}
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 static void
 ia64_protection_init()
 {
 	int prot, *kp, *up;
 
 	kp = protection_codes[0];
 	up = protection_codes[1];
 
 	for (prot = 0; prot < 8; prot++) {
 		switch (prot) {
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
 			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
 			break;
 
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
 			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
 			break;
 
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
 			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
 			break;
 
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
 			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
 			break;
 
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
 			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
 			break;
 
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
 			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
 			break;
 
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
 			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
 			break;
 
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
 			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
 			break;
 		}
 	}
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory.
  */
 void *
 pmap_mapdev(pa, size)
 	vm_offset_t pa;
 	vm_size_t size;
 {
 	return (void*) IA64_PHYS_TO_RR6(pa);
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap, addr)
 	pmap_t pmap;
 	vm_offset_t addr;
 {
 	pv_entry_t pv;
 	struct ia64_lpte *pte;
 	int val = 0;
 	
 	pv = pmap_find_pv(pmap, addr);
 	if (pv == 0) {
 		return 0;
 	}
 	pte = &pv->pv_pte;
 
 	if (pmap_pte_v(pte)) {
 		vm_page_t m;
 		vm_offset_t pa;
 
 		val = MINCORE_INCORE;
 		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
 			return val;
 
 		pa = pmap_pte_pa(pte);
 
 		m = PHYS_TO_VM_PAGE(pa);
 
 		/*
 		 * Modified by us
 		 */
 		if (pte->pte_d)
 			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 		/*
 		 * Modified by someone
 		 */
 		else if (pmap_is_modified(m))
 			val |= MINCORE_MODIFIED_OTHER;
 		/*
 		 * Referenced by us
 		 */
 		if (pte->pte_a)
 			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 
 		/*
 		 * Referenced by someone
 		 */
 		else if (pmap_ts_referenced(m)) {
 			val |= MINCORE_REFERENCED_OTHER;
 			vm_page_flag_set(m, PG_REFERENCED);
 		}
 	} 
 	return val;
 }
 
 void
 pmap_activate(struct proc *p)
 {
 	pmap_install(vmspace_pmap(p->p_vmspace));
 }
 
 pmap_t
 pmap_install(pmap_t pmap)
 {
 	pmap_t oldpmap;
 	int rid;
 
 	oldpmap = PCPU_GET(current_pmap);
 
 	if (pmap == oldpmap || pmap == kernel_pmap)
 		return pmap;
 
 	PCPU_SET(current_pmap, pmap);
 	if (!pmap) {
 		/*
 		 * RIDs 0..4 have no mappings to make sure we generate 
 		 * page faults on accesses.
 		 */
 		ia64_set_rr(IA64_RR_BASE(0), (0 << 8)|(PAGE_SHIFT << 2)|1);
 		ia64_set_rr(IA64_RR_BASE(1), (1 << 8)|(PAGE_SHIFT << 2)|1);
 		ia64_set_rr(IA64_RR_BASE(2), (2 << 8)|(PAGE_SHIFT << 2)|1);
 		ia64_set_rr(IA64_RR_BASE(3), (3 << 8)|(PAGE_SHIFT << 2)|1);
 		ia64_set_rr(IA64_RR_BASE(4), (4 << 8)|(PAGE_SHIFT << 2)|1);
 		return oldpmap;
 	}
 
 	pmap->pm_active = 1;	/* XXX use bitmap for SMP */
 
  reinstall:
 	rid = pmap->pm_rid & ((1 << pmap_ridbits) - 1);
 	ia64_set_rr(IA64_RR_BASE(0), ((rid + 0) << 8)|(PAGE_SHIFT << 2)|1);
 	ia64_set_rr(IA64_RR_BASE(1), ((rid + 1) << 8)|(PAGE_SHIFT << 2)|1);
 	ia64_set_rr(IA64_RR_BASE(2), ((rid + 2) << 8)|(PAGE_SHIFT << 2)|1);
 	ia64_set_rr(IA64_RR_BASE(3), ((rid + 3) << 8)|(PAGE_SHIFT << 2)|1);
 	ia64_set_rr(IA64_RR_BASE(4), ((rid + 4) << 8)|(PAGE_SHIFT << 2)|1);
 
 	/*
 	 * If we need a new RID, get it now. Note that we need to
 	 * remove our old mappings (if any) from the VHTP, so we will
 	 * run on the old RID for a moment while we invalidate the old 
 	 * one. XXX maybe we should just clear out the VHTP when the
 	 * RID generation rolls over.
 	 */
 	if ((pmap->pm_rid>>pmap_ridbits) != (pmap_nextrid>>pmap_ridbits)) {
 		if (pmap->pm_rid)
 			pmap_invalidate_rid(pmap);
 		pmap_get_rid(pmap);
 		goto reinstall;
 	}
 
 	return oldpmap;
 }
 
 vm_offset_t
 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
 {
 
 	return addr;
 }
 
 #if 0
 #if defined(PMAP_DEBUG)
 pmap_pid_dump(int pid)
 {
 	pmap_t pmap;
 	struct proc *p;
 	int npte = 0;
 	int index;
 	ALLPROC_LOCK(AP_SHARED);
 	LIST_FOREACH(p, &allproc, p_list) {
 		if (p->p_pid != pid)
 			continue;
 
 		if (p->p_vmspace) {
 			int i,j;
 			index = 0;
 			pmap = vmspace_pmap(p->p_vmspace);
 			for(i=0;i<1024;i++) {
 				pd_entry_t *pde;
 				pt_entry_t *pte;
 				unsigned base = i << PDRSHIFT;
 				
 				pde = &pmap->pm_pdir[i];
 				if (pde && pmap_pde_v(pde)) {
 					for(j=0;j<1024;j++) {
 						unsigned va = base + (j << PAGE_SHIFT);
 						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 							if (index) {
 								index = 0;
 								printf("\n");
 							}
 							ALLPROC_LOCK(AP_RELEASE);
 							return npte;
 						}
 						pte = pmap_pte_quick( pmap, va);
 						if (pte && pmap_pte_v(pte)) {
 							vm_offset_t pa;
 							vm_page_t m;
 							pa = *(int *)pte;
 							m = PHYS_TO_VM_PAGE(pa);
 							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 								va, pa, m->hold_count, m->wire_count, m->flags);
 							npte++;
 							index++;
 							if (index >= 2) {
 								index = 0;
 								printf("\n");
 							} else {
 								printf(" ");
 							}
 						}
 					}
 				}
 			}
 		}
 	}
 	ALLPROC_LOCK(AP_RELEASE);
 	return npte;
 }
 #endif
 
 #if defined(DEBUG)
 
 static void	pads __P((pmap_t pm));
 static void	pmap_pvdump __P((vm_page_t m));
 
 /* print address space of pmap*/
 static void
 pads(pm)
 	pmap_t pm;
 {
         int i, j;
 	vm_offset_t va;
 	pt_entry_t *ptep;
 
 	if (pm == kernel_pmap)
 		return;
 	for (i = 0; i < 1024; i++)
 		if (pm->pm_pdir[i])
 			for (j = 0; j < 1024; j++) {
 				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 				if (pm == kernel_pmap && va < KERNBASE)
 					continue;
 				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 					continue;
 				ptep = pmap_pte_quick(pm, va);
 				if (pmap_pte_v(ptep))
 					printf("%x:%x ", va, *(int *) ptep);
 			};
 
 }
 
 static void
 pmap_pvdump(pa)
 	vm_offset_t pa;
 {
 	pv_entry_t pv;
 
 	printf("pa %x", pa);
 	m = PHYS_TO_VM_PAGE(pa);
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 		printf(" -> pmap %x, va %x",
 		    pv->pv_pmap, pv->pv_va);
 		pads(pv->pv_pmap);
 	}
 	printf(" ");
 }
 #endif
 #endif
Index: head/sys/kern/uipc_socket.c
===================================================================
--- head/sys/kern/uipc_socket.c	(revision 71349)
+++ head/sys/kern/uipc_socket.c	(revision 71350)
@@ -1,1633 +1,1633 @@
 /*
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_socket.c	8.3 (Berkeley) 4/15/94
  * $FreeBSD$
  */
 
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/file.h>			/* for struct knote */
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/event.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 #include <sys/jail.h>
 #include <vm/vm_zone.h>
 
 #include <machine/limits.h>
 
 #ifdef INET
 static int	 do_setopt_accept_filter(struct socket *so, struct sockopt *sopt);
 #endif
 
 static int 	filt_sorattach(struct knote *kn);
 static void 	filt_sordetach(struct knote *kn);
 static int 	filt_soread(struct knote *kn, long hint);
 static int 	filt_sowattach(struct knote *kn);
 static void 	filt_sowdetach(struct knote *kn);
 static int	filt_sowrite(struct knote *kn, long hint);
 static int	filt_solisten(struct knote *kn, long hint);
 
 static struct filterops solisten_filtops = 
 	{ 1, filt_sorattach, filt_sordetach, filt_solisten };
 
 struct filterops so_rwfiltops[] = {
 	{ 1, filt_sorattach, filt_sordetach, filt_soread },
 	{ 1, filt_sowattach, filt_sowdetach, filt_sowrite },
 };
 
 struct	vm_zone *socket_zone;
 so_gen_t	so_gencnt;	/* generation count for sockets */
 
 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
 
 SYSCTL_DECL(_kern_ipc);
 
 static int somaxconn = SOMAXCONN;
 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW,
     &somaxconn, 0, "Maximum pending socket connection queue size");
 
 /*
  * Socket operation routines.
  * These routines are called by the routines in
  * sys_socket.c or from a system process, and
  * implement the semantics of socket operations by
  * switching out to the protocol specific routines.
  */
 
 /*
  * Get a socket structure from our zone, and initialize it.
  * We don't implement `waitok' yet (see comments in uipc_domain.c).
  * Note that it would probably be better to allocate socket
  * and PCB at the same time, but I'm not convinced that all
  * the protocols can be easily modified to do this.
  */
 struct socket *
 soalloc(waitok)
 	int waitok;
 {
 	struct socket *so;
 
-	so = zalloci(socket_zone);
+	so = zalloc(socket_zone);
 	if (so) {
 		/* XXX race condition for reentrant kernel */
 		bzero(so, sizeof *so);
 		so->so_gencnt = ++so_gencnt;
 		so->so_zone = socket_zone;
 		TAILQ_INIT(&so->so_aiojobq);
 	}
 	return so;
 }
 
 int
 socreate(dom, aso, type, proto, p)
 	int dom;
 	struct socket **aso;
 	register int type;
 	int proto;
 	struct proc *p;
 {
 	register struct protosw *prp;
 	register struct socket *so;
 	register int error;
 
 	if (proto)
 		prp = pffindproto(dom, proto, type);
 	else
 		prp = pffindtype(dom, type);
 
 	if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
 		return (EPROTONOSUPPORT);
 
 	if (p->p_prison && jail_socket_unixiproute_only &&
 	    prp->pr_domain->dom_family != PF_LOCAL &&
 	    prp->pr_domain->dom_family != PF_INET &&
 	    prp->pr_domain->dom_family != PF_ROUTE) {
 		return (EPROTONOSUPPORT);
 	}
 
 	if (prp->pr_type != type)
 		return (EPROTOTYPE);
 	so = soalloc(p != 0);
 	if (so == 0)
 		return (ENOBUFS);
 
 	TAILQ_INIT(&so->so_incomp);
 	TAILQ_INIT(&so->so_comp);
 	so->so_type = type;
 	so->so_cred = p->p_ucred;
 	crhold(so->so_cred);
 	so->so_proto = prp;
 	error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
 	if (error) {
 		so->so_state |= SS_NOFDREF;
 		sofree(so);
 		return (error);
 	}
 	*aso = so;
 	return (0);
 }
 
 int
 sobind(so, nam, p)
 	struct socket *so;
 	struct sockaddr *nam;
 	struct proc *p;
 {
 	int s = splnet();
 	int error;
 
 	error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
 	splx(s);
 	return (error);
 }
 
 void
 sodealloc(so)
 	struct socket *so;
 {
 
 	so->so_gencnt = ++so_gencnt;
 	if (so->so_rcv.sb_hiwat)
 		(void)chgsbsize(so->so_cred->cr_uidinfo,
 		    &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
 	if (so->so_snd.sb_hiwat)
 		(void)chgsbsize(so->so_cred->cr_uidinfo,
 		    &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
 #ifdef INET
 	if (so->so_accf != NULL) {
 		if (so->so_accf->so_accept_filter != NULL && 
 			so->so_accf->so_accept_filter->accf_destroy != NULL) {
 			so->so_accf->so_accept_filter->accf_destroy(so);
 		}
 		if (so->so_accf->so_accept_filter_str != NULL)
 			FREE(so->so_accf->so_accept_filter_str, M_ACCF);
 		FREE(so->so_accf, M_ACCF);
 	}
 #endif
 	crfree(so->so_cred);
-	zfreei(so->so_zone, so);
+	zfree(so->so_zone, so);
 }
 
 int
 solisten(so, backlog, p)
 	register struct socket *so;
 	int backlog;
 	struct proc *p;
 {
 	int s, error;
 
 	s = splnet();
 	error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
 	if (error) {
 		splx(s);
 		return (error);
 	}
 	if (TAILQ_EMPTY(&so->so_comp))
 		so->so_options |= SO_ACCEPTCONN;
 	if (backlog < 0 || backlog > somaxconn)
 		backlog = somaxconn;
 	so->so_qlimit = backlog;
 	splx(s);
 	return (0);
 }
 
 void
 sofree(so)
 	register struct socket *so;
 {
 	struct socket *head = so->so_head;
 
 	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
 		return;
 	if (head != NULL) {
 		if (so->so_state & SS_INCOMP) {
 			TAILQ_REMOVE(&head->so_incomp, so, so_list);
 			head->so_incqlen--;
 		} else if (so->so_state & SS_COMP) {
 			/*
 			 * We must not decommission a socket that's
 			 * on the accept(2) queue.  If we do, then
 			 * accept(2) may hang after select(2) indicated
 			 * that the listening socket was ready.
 			 */
 			return;
 		} else {
 			panic("sofree: not queued");
 		}
 		head->so_qlen--;
 		so->so_state &= ~SS_INCOMP;
 		so->so_head = NULL;
 	}
 	sbrelease(&so->so_snd, so);
 	sorflush(so);
 	sodealloc(so);
 }
 
 /*
  * Close a socket on last file table reference removal.
  * Initiate disconnect if connected.
  * Free socket when disconnect complete.
  */
 int
 soclose(so)
 	register struct socket *so;
 {
 	int s = splnet();		/* conservative */
 	int error = 0;
 
 	funsetown(so->so_sigio);
 	if (so->so_options & SO_ACCEPTCONN) {
 		struct socket *sp, *sonext;
 
 		sp = TAILQ_FIRST(&so->so_incomp);
 		for (; sp != NULL; sp = sonext) {
 			sonext = TAILQ_NEXT(sp, so_list);
 			(void) soabort(sp);
 		}
 		for (sp = TAILQ_FIRST(&so->so_comp); sp != NULL; sp = sonext) {
 			sonext = TAILQ_NEXT(sp, so_list);
 			/* Dequeue from so_comp since sofree() won't do it */
 			TAILQ_REMOVE(&so->so_comp, sp, so_list);
 			so->so_qlen--;
 			sp->so_state &= ~SS_COMP;
 			sp->so_head = NULL;
 			(void) soabort(sp);
 		}
 	}
 	if (so->so_pcb == 0)
 		goto discard;
 	if (so->so_state & SS_ISCONNECTED) {
 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
 			error = sodisconnect(so);
 			if (error)
 				goto drop;
 		}
 		if (so->so_options & SO_LINGER) {
 			if ((so->so_state & SS_ISDISCONNECTING) &&
 			    (so->so_state & SS_NBIO))
 				goto drop;
 			while (so->so_state & SS_ISCONNECTED) {
 				error = tsleep((caddr_t)&so->so_timeo,
 				    PSOCK | PCATCH, "soclos", so->so_linger * hz);
 				if (error)
 					break;
 			}
 		}
 	}
 drop:
 	if (so->so_pcb) {
 		int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
 		if (error == 0)
 			error = error2;
 	}
 discard:
 	if (so->so_state & SS_NOFDREF)
 		panic("soclose: NOFDREF");
 	so->so_state |= SS_NOFDREF;
 	sofree(so);
 	splx(s);
 	return (error);
 }
 
 /*
  * Must be called at splnet...
  */
 int
 soabort(so)
 	struct socket *so;
 {
 	int error;
 
 	error = (*so->so_proto->pr_usrreqs->pru_abort)(so);
 	if (error) {
 		sofree(so);
 		return error;
 	}
 	return (0);
 }
 
 int
 soaccept(so, nam)
 	register struct socket *so;
 	struct sockaddr **nam;
 {
 	int s = splnet();
 	int error;
 
 	if ((so->so_state & SS_NOFDREF) == 0)
 		panic("soaccept: !NOFDREF");
 	so->so_state &= ~SS_NOFDREF;
  	if ((so->so_state & SS_ISDISCONNECTED) == 0)
 		error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
 	else {
 		if (nam)
 			*nam = 0;
 		error = 0;
 	}
 	splx(s);
 	return (error);
 }
 
 int
 soconnect(so, nam, p)
 	register struct socket *so;
 	struct sockaddr *nam;
 	struct proc *p;
 {
 	int s;
 	int error;
 
 	if (so->so_options & SO_ACCEPTCONN)
 		return (EOPNOTSUPP);
 	s = splnet();
 	/*
 	 * If protocol is connection-based, can only connect once.
 	 * Otherwise, if connected, try to disconnect first.
 	 * This allows user to disconnect by connecting to, e.g.,
 	 * a null address.
 	 */
 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
 	    (error = sodisconnect(so))))
 		error = EISCONN;
 	else
 		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
 	splx(s);
 	return (error);
 }
 
 int
 soconnect2(so1, so2)
 	register struct socket *so1;
 	struct socket *so2;
 {
 	int s = splnet();
 	int error;
 
 	error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
 	splx(s);
 	return (error);
 }
 
 int
 sodisconnect(so)
 	register struct socket *so;
 {
 	int s = splnet();
 	int error;
 
 	if ((so->so_state & SS_ISCONNECTED) == 0) {
 		error = ENOTCONN;
 		goto bad;
 	}
 	if (so->so_state & SS_ISDISCONNECTING) {
 		error = EALREADY;
 		goto bad;
 	}
 	error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
 bad:
 	splx(s);
 	return (error);
 }
 
 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
 /*
  * Send on a socket.
  * If send must go all at once and message is larger than
  * send buffering, then hard error.
  * Lock against other senders.
  * If must go all at once and not enough room now, then
  * inform user that this would block and do nothing.
  * Otherwise, if nonblocking, send as much as possible.
  * The data to be sent is described by "uio" if nonzero,
  * otherwise by the mbuf chain "top" (which must be null
  * if uio is not).  Data provided in mbuf chain must be small
  * enough to send all at once.
  *
  * Returns nonzero on error, timeout or signal; callers
  * must check for short counts if EINTR/ERESTART are returned.
  * Data and control buffers are freed on return.
  */
 int
 sosend(so, addr, uio, top, control, flags, p)
 	register struct socket *so;
 	struct sockaddr *addr;
 	struct uio *uio;
 	struct mbuf *top;
 	struct mbuf *control;
 	int flags;
 	struct proc *p;
 {
 	struct mbuf **mp;
 	register struct mbuf *m;
 	register long space, len, resid;
 	int clen = 0, error, s, dontroute, mlen;
 	int atomic = sosendallatonce(so) || top;
 
 	if (uio)
 		resid = uio->uio_resid;
 	else
 		resid = top->m_pkthdr.len;
 	/*
 	 * In theory resid should be unsigned.
 	 * However, space must be signed, as it might be less than 0
 	 * if we over-committed, and we must use a signed comparison
 	 * of space and resid.  On the other hand, a negative resid
 	 * causes us to loop sending 0-length segments to the protocol.
 	 *
 	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
 	 * type sockets since that's an error.
 	 */
 	if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
 		error = EINVAL;
 		goto out;
 	}
 
 	dontroute =
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
 	    (so->so_proto->pr_flags & PR_ATOMIC);
 	if (p)
 		p->p_stats->p_ru.ru_msgsnd++;
 	if (control)
 		clen = control->m_len;
 #define	snderr(errno)	{ error = errno; splx(s); goto release; }
 
 restart:
 	error = sblock(&so->so_snd, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
 	do {
 		s = splnet();
 		if (so->so_state & SS_CANTSENDMORE)
 			snderr(EPIPE);
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			splx(s);
 			goto release;
 		}
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			/*
 			 * `sendto' and `sendmsg' is allowed on a connection-
 			 * based socket if it supports implied connect.
 			 * Return ENOTCONN if not connected and no address is
 			 * supplied.
 			 */
 			if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 			    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 				    !(resid == 0 && clen != 0))
 					snderr(ENOTCONN);
 			} else if (addr == 0)
 			    snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
 				   ENOTCONN : EDESTADDRREQ);
 		}
 		space = sbspace(&so->so_snd);
 		if (flags & MSG_OOB)
 			space += 1024;
 		if ((atomic && resid > so->so_snd.sb_hiwat) ||
 		    clen > so->so_snd.sb_hiwat)
 			snderr(EMSGSIZE);
 		if (space < resid + clen && uio &&
 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
 			if (so->so_state & SS_NBIO)
 				snderr(EWOULDBLOCK);
 			sbunlock(&so->so_snd);
 			error = sbwait(&so->so_snd);
 			splx(s);
 			if (error)
 				goto out;
 			goto restart;
 		}
 		splx(s);
 		mp = &top;
 		space -= clen;
 		do {
 		    if (uio == NULL) {
 			/*
 			 * Data is prepackaged in "top".
 			 */
 			resid = 0;
 			if (flags & MSG_EOR)
 				top->m_flags |= M_EOR;
 		    } else do {
 			if (top == 0) {
 				MGETHDR(m, M_TRYWAIT, MT_DATA);
 				if (m == NULL) {
 					error = ENOBUFS;
 					goto release;
 				}
 				mlen = MHLEN;
 				m->m_pkthdr.len = 0;
 				m->m_pkthdr.rcvif = (struct ifnet *)0;
 			} else {
 				MGET(m, M_TRYWAIT, MT_DATA);
 				if (m == NULL) {
 					error = ENOBUFS;
 					goto release;
 				}
 				mlen = MLEN;
 			}
 			if (resid >= MINCLSIZE) {
 				MCLGET(m, M_TRYWAIT);
 				if ((m->m_flags & M_EXT) == 0)
 					goto nopages;
 				mlen = MCLBYTES;
 				len = min(min(mlen, resid), space);
 			} else {
 nopages:
 				len = min(min(mlen, resid), space);
 				/*
 				 * For datagram protocols, leave room
 				 * for protocol headers in first mbuf.
 				 */
 				if (atomic && top == 0 && len < mlen)
 					MH_ALIGN(m, len);
 			}
 			space -= len;
 			error = uiomove(mtod(m, caddr_t), (int)len, uio);
 			resid = uio->uio_resid;
 			m->m_len = len;
 			*mp = m;
 			top->m_pkthdr.len += len;
 			if (error)
 				goto release;
 			mp = &m->m_next;
 			if (resid <= 0) {
 				if (flags & MSG_EOR)
 					top->m_flags |= M_EOR;
 				break;
 			}
 		    } while (space > 0 && atomic);
 		    if (dontroute)
 			    so->so_options |= SO_DONTROUTE;
 		    s = splnet();				/* XXX */
 		    /*
 		     * XXX all the SS_CANTSENDMORE checks previously
 		     * done could be out of date.  We could have recieved
 		     * a reset packet in an interrupt or maybe we slept
 		     * while doing page faults in uiomove() etc. We could
 		     * probably recheck again inside the splnet() protection
 		     * here, but there are probably other places that this
 		     * also happens.  We must rethink this.
 		     */
 		    error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 			(flags & MSG_OOB) ? PRUS_OOB :
 			/*
 			 * If the user set MSG_EOF, the protocol
 			 * understands this flag and nothing left to
 			 * send then use PRU_SEND_EOF instead of PRU_SEND.
 			 */
 			((flags & MSG_EOF) &&
 			 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 			 (resid <= 0)) ?
 				PRUS_EOF :
 			/* If there is more to send set PRUS_MORETOCOME */
 			(resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 			top, addr, control, p);
 		    splx(s);
 		    if (dontroute)
 			    so->so_options &= ~SO_DONTROUTE;
 		    clen = 0;
 		    control = 0;
 		    top = 0;
 		    mp = &top;
 		    if (error)
 			goto release;
 		} while (resid && space > 0);
 	} while (resid);
 
 release:
 	sbunlock(&so->so_snd);
 out:
 	if (top)
 		m_freem(top);
 	if (control)
 		m_freem(control);
 	return (error);
 }
 
 /*
  * Implement receive operations on a socket.
  * We depend on the way that records are added to the sockbuf
  * by sbappend*.  In particular, each record (mbufs linked through m_next)
  * must begin with an address if the protocol so specifies,
  * followed by an optional mbuf or mbufs containing ancillary data,
  * and then zero or more mbufs of data.
  * In order to avoid blocking network interrupts for the entire time here,
  * we splx() while doing the actual copy to user space.
  * Although the sockbuf is locked, new data may still be appended,
  * and thus we must maintain consistency of the sockbuf during that time.
  *
  * The caller may receive the data as a single mbuf chain by supplying
  * an mbuf **mp0 for use in returning the chain.  The uio is then used
  * only for the count in uio_resid.
  */
 int
 soreceive(so, psa, uio, mp0, controlp, flagsp)
 	register struct socket *so;
 	struct sockaddr **psa;
 	struct uio *uio;
 	struct mbuf **mp0;
 	struct mbuf **controlp;
 	int *flagsp;
 {
 	register struct mbuf *m, **mp;
 	register int flags, len, error, s, offset;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
 	int moff, type = 0;
 	int orig_resid = uio->uio_resid;
 
 	mp = mp0;
 	if (psa)
 		*psa = 0;
 	if (controlp)
 		*controlp = 0;
 	if (flagsp)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 	if (flags & MSG_OOB) {
 		m = m_get(M_TRYWAIT, MT_DATA);
 		if (m == NULL)
 			return (ENOBUFS);
 		error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
 		if (error)
 			goto bad;
 		do {
 			error = uiomove(mtod(m, caddr_t),
 			    (int) min(uio->uio_resid, m->m_len), uio);
 			m = m_free(m);
 		} while (uio->uio_resid && error == 0 && m);
 bad:
 		if (m)
 			m_freem(m);
 		return (error);
 	}
 	if (mp)
 		*mp = (struct mbuf *)0;
 	if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
 		(*pr->pr_usrreqs->pru_rcvd)(so, 0);
 
 restart:
 	error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
 	if (error)
 		return (error);
 	s = splnet();
 
 	m = so->so_rcv.sb_mb;
 	/*
 	 * If we have less data than requested, block awaiting more
 	 * (subject to any timeout) if:
 	 *   1. the current count is less than the low water mark, or
 	 *   2. MSG_WAITALL is set, and it is possible to do the entire
 	 *	receive operation at once if we block (resid <= hiwat).
 	 *   3. MSG_DONTWAIT is not set
 	 * If MSG_WAITALL is set but resid is larger than the receive buffer,
 	 * we have to do the receive in sections, and thus risk returning
 	 * a short count if a timeout or signal occurs after we start.
 	 */
 	if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
 	    so->so_rcv.sb_cc < uio->uio_resid) &&
 	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
 	    ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
 	    m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
 		KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1"));
 		if (so->so_error) {
 			if (m)
 				goto dontblock;
 			error = so->so_error;
 			if ((flags & MSG_PEEK) == 0)
 				so->so_error = 0;
 			goto release;
 		}
 		if (so->so_state & SS_CANTRCVMORE) {
 			if (m)
 				goto dontblock;
 			else
 				goto release;
 		}
 		for (; m; m = m->m_next)
 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
 				m = so->so_rcv.sb_mb;
 				goto dontblock;
 			}
 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
 		    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
 			error = ENOTCONN;
 			goto release;
 		}
 		if (uio->uio_resid == 0)
 			goto release;
 		if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
 			error = EWOULDBLOCK;
 			goto release;
 		}
 		sbunlock(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
 		splx(s);
 		if (error)
 			return (error);
 		goto restart;
 	}
 dontblock:
 	if (uio->uio_procp)
 		uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
 	nextrecord = m->m_nextpkt;
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME, ("receive 1a"));
 		orig_resid = 0;
 		if (psa)
 			*psa = dup_sockaddr(mtod(m, struct sockaddr *),
 					    mp0 == 0);
 		if (flags & MSG_PEEK) {
 			m = m->m_next;
 		} else {
 			sbfree(&so->so_rcv, m);
 			MFREE(m, so->so_rcv.sb_mb);
 			m = so->so_rcv.sb_mb;
 		}
 	}
 	while (m && m->m_type == MT_CONTROL && error == 0) {
 		if (flags & MSG_PEEK) {
 			if (controlp)
 				*controlp = m_copy(m, 0, m->m_len);
 			m = m->m_next;
 		} else {
 			sbfree(&so->so_rcv, m);
 			if (controlp) {
 				if (pr->pr_domain->dom_externalize &&
 				    mtod(m, struct cmsghdr *)->cmsg_type ==
 				    SCM_RIGHTS)
 				   error = (*pr->pr_domain->dom_externalize)(m);
 				*controlp = m;
 				so->so_rcv.sb_mb = m->m_next;
 				m->m_next = 0;
 				m = so->so_rcv.sb_mb;
 			} else {
 				MFREE(m, so->so_rcv.sb_mb);
 				m = so->so_rcv.sb_mb;
 			}
 		}
 		if (controlp) {
 			orig_resid = 0;
 			controlp = &(*controlp)->m_next;
 		}
 	}
 	if (m) {
 		if ((flags & MSG_PEEK) == 0)
 			m->m_nextpkt = nextrecord;
 		type = m->m_type;
 		if (type == MT_OOBDATA)
 			flags |= MSG_OOB;
 	}
 	moff = 0;
 	offset = 0;
 	while (m && uio->uio_resid > 0 && error == 0) {
 		if (m->m_type == MT_OOBDATA) {
 			if (type != MT_OOBDATA)
 				break;
 		} else if (type == MT_OOBDATA)
 			break;
 		else
 		    KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER,
 			("receive 3"));
 		so->so_state &= ~SS_RCVATMARK;
 		len = uio->uio_resid;
 		if (so->so_oobmark && len > so->so_oobmark - offset)
 			len = so->so_oobmark - offset;
 		if (len > m->m_len - moff)
 			len = m->m_len - moff;
 		/*
 		 * If mp is set, just pass back the mbufs.
 		 * Otherwise copy them out via the uio, then free.
 		 * Sockbuf must be consistent here (points to current mbuf,
 		 * it points to next record) when we drop priority;
 		 * we must note any additions to the sockbuf when we
 		 * block interrupts again.
 		 */
 		if (mp == 0) {
 			splx(s);
 			error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
 			s = splnet();
 			if (error)
 				goto release;
 		} else
 			uio->uio_resid -= len;
 		if (len == m->m_len - moff) {
 			if (m->m_flags & M_EOR)
 				flags |= MSG_EOR;
 			if (flags & MSG_PEEK) {
 				m = m->m_next;
 				moff = 0;
 			} else {
 				nextrecord = m->m_nextpkt;
 				sbfree(&so->so_rcv, m);
 				if (mp) {
 					*mp = m;
 					mp = &m->m_next;
 					so->so_rcv.sb_mb = m = m->m_next;
 					*mp = (struct mbuf *)0;
 				} else {
 					MFREE(m, so->so_rcv.sb_mb);
 					m = so->so_rcv.sb_mb;
 				}
 				if (m)
 					m->m_nextpkt = nextrecord;
 			}
 		} else {
 			if (flags & MSG_PEEK)
 				moff += len;
 			else {
 				if (mp)
 					*mp = m_copym(m, 0, len, M_TRYWAIT);
 				m->m_data += len;
 				m->m_len -= len;
 				so->so_rcv.sb_cc -= len;
 			}
 		}
 		if (so->so_oobmark) {
 			if ((flags & MSG_PEEK) == 0) {
 				so->so_oobmark -= len;
 				if (so->so_oobmark == 0) {
 					so->so_state |= SS_RCVATMARK;
 					break;
 				}
 			} else {
 				offset += len;
 				if (offset == so->so_oobmark)
 					break;
 			}
 		}
 		if (flags & MSG_EOR)
 			break;
 		/*
 		 * If the MSG_WAITALL flag is set (for non-atomic socket),
 		 * we must not quit until "uio->uio_resid == 0" or an error
 		 * termination.  If a signal/timeout occurs, return
 		 * with a short count but without error.
 		 * Keep sockbuf locked against other readers.
 		 */
 		while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
 		    !sosendallatonce(so) && !nextrecord) {
 			if (so->so_error || so->so_state & SS_CANTRCVMORE)
 				break;
 			error = sbwait(&so->so_rcv);
 			if (error) {
 				sbunlock(&so->so_rcv);
 				splx(s);
 				return (0);
 			}
 			m = so->so_rcv.sb_mb;
 			if (m)
 				nextrecord = m->m_nextpkt;
 		}
 	}
 
 	if (m && pr->pr_flags & PR_ATOMIC) {
 		flags |= MSG_TRUNC;
 		if ((flags & MSG_PEEK) == 0)
 			(void) sbdroprecord(&so->so_rcv);
 	}
 	if ((flags & MSG_PEEK) == 0) {
 		if (m == 0)
 			so->so_rcv.sb_mb = nextrecord;
 		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
 			(*pr->pr_usrreqs->pru_rcvd)(so, flags);
 	}
 	if (orig_resid == uio->uio_resid && orig_resid &&
 	    (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
 		sbunlock(&so->so_rcv);
 		splx(s);
 		goto restart;
 	}
 
 	if (flagsp)
 		*flagsp |= flags;
 release:
 	sbunlock(&so->so_rcv);
 	splx(s);
 	return (error);
 }
 
 int
 soshutdown(so, how)
 	register struct socket *so;
 	register int how;
 {
 	register struct protosw *pr = so->so_proto;
 
 	how++;
 	if (how & FREAD)
 		sorflush(so);
 	if (how & FWRITE)
 		return ((*pr->pr_usrreqs->pru_shutdown)(so));
 	return (0);
 }
 
 void
 sorflush(so)
 	register struct socket *so;
 {
 	register struct sockbuf *sb = &so->so_rcv;
 	register struct protosw *pr = so->so_proto;
 	register int s;
 	struct sockbuf asb;
 
 	sb->sb_flags |= SB_NOINTR;
 	(void) sblock(sb, M_WAITOK);
 	s = splimp();
 	socantrcvmore(so);
 	sbunlock(sb);
 	asb = *sb;
 	bzero((caddr_t)sb, sizeof (*sb));
 	splx(s);
 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
 		(*pr->pr_domain->dom_dispose)(asb.sb_mb);
 	sbrelease(&asb, so);
 }
 
 #ifdef INET
 static int
 do_setopt_accept_filter(so, sopt)
 	struct	socket *so;
 	struct	sockopt *sopt;
 {
 	struct accept_filter_arg	*afap = NULL;
 	struct accept_filter	*afp;
 	struct so_accf	*af = so->so_accf;
 	int	error = 0;
 
 	/* do not set/remove accept filters on non listen sockets */
 	if ((so->so_options & SO_ACCEPTCONN) == 0) {
 		error = EINVAL;
 		goto out;
 	}
 
 	/* removing the filter */
 	if (sopt == NULL) {
 		if (af != NULL) {
 			if (af->so_accept_filter != NULL && 
 				af->so_accept_filter->accf_destroy != NULL) {
 				af->so_accept_filter->accf_destroy(so);
 			}
 			if (af->so_accept_filter_str != NULL) {
 				FREE(af->so_accept_filter_str, M_ACCF);
 			}
 			FREE(af, M_ACCF);
 			so->so_accf = NULL;
 		}
 		so->so_options &= ~SO_ACCEPTFILTER;
 		return (0);
 	}
 	/* adding a filter */
 	/* must remove previous filter first */
 	if (af != NULL) {
 		error = EINVAL;
 		goto out;
 	}
 	/* don't put large objects on the kernel stack */
 	MALLOC(afap, struct accept_filter_arg *, sizeof(*afap), M_TEMP, M_WAITOK);
 	error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap);
 	afap->af_name[sizeof(afap->af_name)-1] = '\0';
 	afap->af_arg[sizeof(afap->af_arg)-1] = '\0';
 	if (error)
 		goto out;
 	afp = accept_filt_get(afap->af_name);
 	if (afp == NULL) {
 		error = ENOENT;
 		goto out;
 	}
 	MALLOC(af, struct so_accf *, sizeof(*af), M_ACCF, M_WAITOK | M_ZERO);
 	if (afp->accf_create != NULL) {
 		if (afap->af_name[0] != '\0') {
 			int len = strlen(afap->af_name) + 1;
 
 			MALLOC(af->so_accept_filter_str, char *, len, M_ACCF, M_WAITOK);
 			strcpy(af->so_accept_filter_str, afap->af_name);
 		}
 		af->so_accept_filter_arg = afp->accf_create(so, afap->af_arg);
 		if (af->so_accept_filter_arg == NULL) {
 			FREE(af->so_accept_filter_str, M_ACCF);
 			FREE(af, M_ACCF);
 			so->so_accf = NULL;
 			error = EINVAL;
 			goto out;
 		}
 	}
 	af->so_accept_filter = afp;
 	so->so_accf = af;
 	so->so_options |= SO_ACCEPTFILTER;
 out:
 	if (afap != NULL)
 		FREE(afap, M_TEMP);
 	return (error);
 }
 #endif /* INET */
 
 /*
  * Perhaps this routine, and sooptcopyout(), below, ought to come in
  * an additional variant to handle the case where the option value needs
  * to be some kind of integer, but not a specific size.
  * In addition to their use here, these functions are also called by the
  * protocol-level pr_ctloutput() routines.
  */
 int
 sooptcopyin(sopt, buf, len, minlen)
 	struct	sockopt *sopt;
 	void	*buf;
 	size_t	len;
 	size_t	minlen;
 {
 	size_t	valsize;
 
 	/*
 	 * If the user gives us more than we wanted, we ignore it,
 	 * but if we don't get the minimum length the caller
 	 * wants, we return EINVAL.  On success, sopt->sopt_valsize
 	 * is set to however much we actually retrieved.
 	 */
 	if ((valsize = sopt->sopt_valsize) < minlen)
 		return EINVAL;
 	if (valsize > len)
 		sopt->sopt_valsize = valsize = len;
 
 	if (sopt->sopt_p != 0)
 		return (copyin(sopt->sopt_val, buf, valsize));
 
 	bcopy(sopt->sopt_val, buf, valsize);
 	return 0;
 }
 
 int
 sosetopt(so, sopt)
 	struct socket *so;
 	struct sockopt *sopt;
 {
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
 	u_long  val;
 
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto && so->so_proto->pr_ctloutput)
 			return ((*so->so_proto->pr_ctloutput)
 				  (so, sopt));
 		error = ENOPROTOOPT;
 	} else {
 		switch (sopt->sopt_name) {
 #ifdef INET
 		case SO_ACCEPTFILTER:
 			error = do_setopt_accept_filter(so, sopt);
 			if (error)
 				goto bad;
 			break;
 #endif
 		case SO_LINGER:
 			error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
 			if (error)
 				goto bad;
 
 			so->so_linger = l.l_linger;
 			if (l.l_onoff)
 				so->so_options |= SO_LINGER;
 			else
 				so->so_options &= ~SO_LINGER;
 			break;
 
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_DONTROUTE:
 		case SO_USELOOPBACK:
 		case SO_BROADCAST:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_OOBINLINE:
 		case SO_TIMESTAMP:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				goto bad;
 			if (optval)
 				so->so_options |= sopt->sopt_name;
 			else
 				so->so_options &= ~sopt->sopt_name;
 			break;
 
 		case SO_SNDBUF:
 		case SO_RCVBUF:
 		case SO_SNDLOWAT:
 		case SO_RCVLOWAT:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				goto bad;
 
 			/*
 			 * Values < 1 make no sense for any of these
 			 * options, so disallow them.
 			 */
 			if (optval < 1) {
 				error = EINVAL;
 				goto bad;
 			}
 
 			switch (sopt->sopt_name) {
 			case SO_SNDBUF:
 			case SO_RCVBUF:
 				if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
 				    &so->so_snd : &so->so_rcv, (u_long)optval,
 				    so, curproc) == 0) {
 					error = ENOBUFS;
 					goto bad;
 				}
 				break;
 
 			/*
 			 * Make sure the low-water is never greater than
 			 * the high-water.
 			 */
 			case SO_SNDLOWAT:
 				so->so_snd.sb_lowat =
 				    (optval > so->so_snd.sb_hiwat) ?
 				    so->so_snd.sb_hiwat : optval;
 				break;
 			case SO_RCVLOWAT:
 				so->so_rcv.sb_lowat =
 				    (optval > so->so_rcv.sb_hiwat) ?
 				    so->so_rcv.sb_hiwat : optval;
 				break;
 			}
 			break;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 			error = sooptcopyin(sopt, &tv, sizeof tv,
 					    sizeof tv);
 			if (error)
 				goto bad;
 
 			/* assert(hz > 0); */
 			if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz ||
 			    tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
 				error = EDOM;
 				goto bad;
 			}
 			/* assert(tick > 0); */
 			/* assert(ULONG_MAX - SHRT_MAX >= 1000000); */
 			val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick;
 			if (val > SHRT_MAX) {
 				error = EDOM;
 				goto bad;
 			}
 
 			switch (sopt->sopt_name) {
 			case SO_SNDTIMEO:
 				so->so_snd.sb_timeo = val;
 				break;
 			case SO_RCVTIMEO:
 				so->so_rcv.sb_timeo = val;
 				break;
 			}
 			break;
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
 			(void) ((*so->so_proto->pr_ctloutput)
 				  (so, sopt));
 		}
 	}
 bad:
 	return (error);
 }
 
 /* Helper routine for getsockopt */
 int
 sooptcopyout(sopt, buf, len)
 	struct	sockopt *sopt;
 	void	*buf;
 	size_t	len;
 {
 	int	error;
 	size_t	valsize;
 
 	error = 0;
 
 	/*
 	 * Documented get behavior is that we always return a value,
 	 * possibly truncated to fit in the user's buffer.
 	 * Traditional behavior is that we always tell the user
 	 * precisely how much we copied, rather than something useful
 	 * like the total amount we had available for her.
 	 * Note that this interface is not idempotent; the entire answer must
 	 * generated ahead of time.
 	 */
 	valsize = min(len, sopt->sopt_valsize);
 	sopt->sopt_valsize = valsize;
 	if (sopt->sopt_val != 0) {
 		if (sopt->sopt_p != 0)
 			error = copyout(buf, sopt->sopt_val, valsize);
 		else
 			bcopy(buf, sopt->sopt_val, valsize);
 	}
 	return error;
 }
 
 int
 sogetopt(so, sopt)
 	struct socket *so;
 	struct sockopt *sopt;
 {
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
 #ifdef INET
 	struct accept_filter_arg *afap;
 #endif
 
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto && so->so_proto->pr_ctloutput) {
 			return ((*so->so_proto->pr_ctloutput)
 				  (so, sopt));
 		} else
 			return (ENOPROTOOPT);
 	} else {
 		switch (sopt->sopt_name) {
 #ifdef INET
 		case SO_ACCEPTFILTER:
 			if ((so->so_options & SO_ACCEPTCONN) == 0)
 				return (EINVAL);
 			MALLOC(afap, struct accept_filter_arg *, sizeof(*afap),
 				M_TEMP, M_WAITOK | M_ZERO);
 			if ((so->so_options & SO_ACCEPTFILTER) != 0) {
 				strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name);
 				if (so->so_accf->so_accept_filter_str != NULL)
 					strcpy(afap->af_arg, so->so_accf->so_accept_filter_str);
 			}
 			error = sooptcopyout(sopt, afap, sizeof(*afap));
 			FREE(afap, M_TEMP);
 			break;
 #endif
 			
 		case SO_LINGER:
 			l.l_onoff = so->so_options & SO_LINGER;
 			l.l_linger = so->so_linger;
 			error = sooptcopyout(sopt, &l, sizeof l);
 			break;
 
 		case SO_USELOOPBACK:
 		case SO_DONTROUTE:
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_BROADCAST:
 		case SO_OOBINLINE:
 		case SO_TIMESTAMP:
 			optval = so->so_options & sopt->sopt_name;
 integer:
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case SO_TYPE:
 			optval = so->so_type;
 			goto integer;
 
 		case SO_ERROR:
 			optval = so->so_error;
 			so->so_error = 0;
 			goto integer;
 
 		case SO_SNDBUF:
 			optval = so->so_snd.sb_hiwat;
 			goto integer;
 
 		case SO_RCVBUF:
 			optval = so->so_rcv.sb_hiwat;
 			goto integer;
 
 		case SO_SNDLOWAT:
 			optval = so->so_snd.sb_lowat;
 			goto integer;
 
 		case SO_RCVLOWAT:
 			optval = so->so_rcv.sb_lowat;
 			goto integer;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 			optval = (sopt->sopt_name == SO_SNDTIMEO ?
 				  so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
 
 			tv.tv_sec = optval / hz;
 			tv.tv_usec = (optval % hz) * tick;
 			error = sooptcopyout(sopt, &tv, sizeof tv);
 			break;			
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		return (error);
 	}
 }
 
 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
 int
 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
 {
 	struct mbuf *m, *m_prev;
 	int sopt_size = sopt->sopt_valsize;
 
 	MGET(m, sopt->sopt_p ? M_TRYWAIT : M_DONTWAIT, MT_DATA);
 	if (m == 0)
 		return ENOBUFS;
 	if (sopt_size > MLEN) {
 		MCLGET(m, sopt->sopt_p ? M_TRYWAIT : M_DONTWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			m_free(m);
 			return ENOBUFS;
 		}
 		m->m_len = min(MCLBYTES, sopt_size);
 	} else {
 		m->m_len = min(MLEN, sopt_size);
 	}
 	sopt_size -= m->m_len;
 	*mp = m;
 	m_prev = m;
 
 	while (sopt_size) {
 		MGET(m, sopt->sopt_p ? M_TRYWAIT : M_DONTWAIT, MT_DATA);
 		if (m == 0) {
 			m_freem(*mp);
 			return ENOBUFS;
 		}
 		if (sopt_size > MLEN) {
 			MCLGET(m, sopt->sopt_p ? M_TRYWAIT : M_DONTWAIT);
 			if ((m->m_flags & M_EXT) == 0) {
 				m_freem(*mp);
 				return ENOBUFS;
 			}
 			m->m_len = min(MCLBYTES, sopt_size);
 		} else {
 			m->m_len = min(MLEN, sopt_size);
 		}
 		sopt_size -= m->m_len;
 		m_prev->m_next = m;
 		m_prev = m;
 	}
 	return 0;
 }
 
 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
 int
 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
 {
 	struct mbuf *m0 = m;
 
 	if (sopt->sopt_val == NULL)
 		return 0;
 	while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 		if (sopt->sopt_p != NULL) {
 			int error;
 
 			error = copyin(sopt->sopt_val, mtod(m, char *),
 				       m->m_len);
 			if (error != 0) {
 				m_freem(m0);
 				return(error);
 			}
 		} else
 			bcopy(sopt->sopt_val, mtod(m, char *), m->m_len);
 		sopt->sopt_valsize -= m->m_len;
 		(caddr_t)sopt->sopt_val += m->m_len;
 		m = m->m_next;
 	}
 	if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
 		panic("ip6_sooptmcopyin");
 	return 0;
 }
 
 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
 int
 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
 {
 	struct mbuf *m0 = m;
 	size_t valsize = 0;
 
 	if (sopt->sopt_val == NULL)
 		return 0;
 	while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 		if (sopt->sopt_p != NULL) {
 			int error;
 
 			error = copyout(mtod(m, char *), sopt->sopt_val,
 				       m->m_len);
 			if (error != 0) {
 				m_freem(m0);
 				return(error);
 			}
 		} else
 			bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
 	       sopt->sopt_valsize -= m->m_len;
 	       (caddr_t)sopt->sopt_val += m->m_len;
 	       valsize += m->m_len;
 	       m = m->m_next;
 	}
 	if (m != NULL) {
 		/* enough soopt buffer should be given from user-land */
 		m_freem(m0);
 		return(EINVAL);
 	}
 	sopt->sopt_valsize = valsize;
 	return 0;
 }
 
 void
 sohasoutofband(so)
 	register struct socket *so;
 {
 	if (so->so_sigio != NULL)
 		pgsigio(so->so_sigio, SIGURG, 0);
 	selwakeup(&so->so_rcv.sb_sel);
 }
 
 int
 sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p)
 {
 	int revents = 0;
 	int s = splnet();
 
 	if (events & (POLLIN | POLLRDNORM))
 		if (soreadable(so))
 			revents |= events & (POLLIN | POLLRDNORM);
 
 	if (events & (POLLOUT | POLLWRNORM))
 		if (sowriteable(so))
 			revents |= events & (POLLOUT | POLLWRNORM);
 
 	if (events & (POLLPRI | POLLRDBAND))
 		if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
 			revents |= events & (POLLPRI | POLLRDBAND);
 
 	if (revents == 0) {
 		if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
 			selrecord(p, &so->so_rcv.sb_sel);
 			so->so_rcv.sb_flags |= SB_SEL;
 		}
 
 		if (events & (POLLOUT | POLLWRNORM)) {
 			selrecord(p, &so->so_snd.sb_sel);
 			so->so_snd.sb_flags |= SB_SEL;
 		}
 	}
 
 	splx(s);
 	return (revents);
 }
 
 static int
 filt_sorattach(struct knote *kn)
 {
 	struct socket *so = (struct socket *)kn->kn_fp->f_data;
 	int s = splnet();
 
 	if (so->so_options & SO_ACCEPTCONN)
 		kn->kn_fop = &solisten_filtops;
 	SLIST_INSERT_HEAD(&so->so_rcv.sb_sel.si_note, kn, kn_selnext);
 	so->so_rcv.sb_flags |= SB_KNOTE;
 	splx(s);
 	return (0);
 }
 
 static void
 filt_sordetach(struct knote *kn)
 {
 	struct socket *so = (struct socket *)kn->kn_fp->f_data;
 	int s = splnet();
 
 	SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext);
 	if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note))
 		so->so_rcv.sb_flags &= ~SB_KNOTE;
 	splx(s);
 }
 
 /*ARGSUSED*/
 static int
 filt_soread(struct knote *kn, long hint)
 {
 	struct socket *so = (struct socket *)kn->kn_fp->f_data;
 
 	kn->kn_data = so->so_rcv.sb_cc;
 	if (so->so_state & SS_CANTRCVMORE) {
 		kn->kn_flags |= EV_EOF; 
 		return (1);
 	}
 	if (so->so_error)	/* temporary udp error */
 		return (1);
 	return (kn->kn_data >= so->so_rcv.sb_lowat);
 }
 
 static int
 filt_sowattach(struct knote *kn)
 {
 	struct socket *so = (struct socket *)kn->kn_fp->f_data;
 	int s = splnet();
 
 	SLIST_INSERT_HEAD(&so->so_snd.sb_sel.si_note, kn, kn_selnext);
 	so->so_snd.sb_flags |= SB_KNOTE;
 	splx(s);
 	return (0);
 }
 
 static void
 filt_sowdetach(struct knote *kn)
 {
 	struct socket *so = (struct socket *)kn->kn_fp->f_data;
 	int s = splnet();
 
 	SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext);
 	if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note))
 		so->so_snd.sb_flags &= ~SB_KNOTE;
 	splx(s);
 }
 
 /*ARGSUSED*/
 static int
 filt_sowrite(struct knote *kn, long hint)
 {
 	struct socket *so = (struct socket *)kn->kn_fp->f_data;
 
 	kn->kn_data = sbspace(&so->so_snd);
 	if (so->so_state & SS_CANTSENDMORE) {
 		kn->kn_flags |= EV_EOF; 
 		return (1);
 	}
 	if (so->so_error)	/* temporary udp error */
 		return (1);
 	if (((so->so_state & SS_ISCONNECTED) == 0) &&
 	    (so->so_proto->pr_flags & PR_CONNREQUIRED))
 		return (0);
 	return (kn->kn_data >= so->so_snd.sb_lowat);
 }
 
 /*ARGSUSED*/
 static int
 filt_solisten(struct knote *kn, long hint)
 {
 	struct socket *so = (struct socket *)kn->kn_fp->f_data;
 
 	kn->kn_data = so->so_qlen - so->so_incqlen;
 	return (! TAILQ_EMPTY(&so->so_comp));
 }
Index: head/sys/netinet/in_pcb.c
===================================================================
--- head/sys/netinet/in_pcb.c	(revision 71349)
+++ head/sys/netinet/in_pcb.c	(revision 71350)
@@ -1,1080 +1,1080 @@
 /*
  * Copyright (c) 1982, 1986, 1991, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
  * $FreeBSD$
  */
 
 #include "opt_ipsec.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/proc.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
 #include <machine/limits.h>
 
 #include <vm/vm_zone.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_var.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif /* INET6 */
 
 #include "faith.h"
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #include <netkey/key.h>
 #endif /* IPSEC */
 
 struct	in_addr zeroin_addr;
 
 static void	in_rtchange __P((struct inpcb *, int));
 
 /*
  * These configure the range of local port addresses assigned to
  * "unspecified" outgoing connections/packets/whatever.
  */
 int	ipport_lowfirstauto  = IPPORT_RESERVED - 1;	/* 1023 */
 int	ipport_lowlastauto = IPPORT_RESERVEDSTART;	/* 600 */
 int	ipport_firstauto = IPPORT_RESERVED;		/* 1024 */
 int	ipport_lastauto  = IPPORT_USERRESERVED;		/* 5000 */
 int	ipport_hifirstauto = IPPORT_HIFIRSTAUTO;	/* 49152 */
 int	ipport_hilastauto  = IPPORT_HILASTAUTO;		/* 65535 */
 
 #define RANGECHK(var, min, max) \
 	if ((var) < (min)) { (var) = (min); } \
 	else if ((var) > (max)) { (var) = (max); }
 
 static int
 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp,
 		oidp->oid_arg1, oidp->oid_arg2, req);
 	if (!error) {
 		RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
 		RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
 		RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
 		RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
 		RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
 		RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
 	}
 	return error;
 }
 
 #undef RANGECHK
 
 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
 
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
 
 /*
  * in_pcb.c: manage the Protocol Control Blocks.
  *
  * NOTE: It is assumed that most of these functions will be called at
  * splnet(). XXX - There are, unfortunately, a few exceptions to this
  * rule that should be fixed.
  */
 
 /*
  * Allocate a PCB and associate it with the socket.
  */
 int
 in_pcballoc(so, pcbinfo, p)
 	struct socket *so;
 	struct inpcbinfo *pcbinfo;
 	struct proc *p;
 {
 	register struct inpcb *inp;
 
-	inp = zalloci(pcbinfo->ipi_zone);
+	inp = zalloc(pcbinfo->ipi_zone);
 	if (inp == NULL)
 		return (ENOBUFS);
 	bzero((caddr_t)inp, sizeof(*inp));
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	inp->inp_pcbinfo = pcbinfo;
 	inp->inp_socket = so;
 #if defined(INET6)
 	if (ip6_mapped_addr_on)
 		inp->inp_flags &= ~IN6P_BINDV6ONLY;
 	else
 		inp->inp_flags |= IN6P_BINDV6ONLY;
 #endif
 	LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
 	pcbinfo->ipi_count++;
 	so->so_pcb = (caddr_t)inp;
 	return (0);
 }
 
 int
 in_pcbbind(inp, nam, p)
 	register struct inpcb *inp;
 	struct sockaddr *nam;
 	struct proc *p;
 {
 	register struct socket *so = inp->inp_socket;
 	unsigned short *lastport;
 	struct sockaddr_in *sin;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	u_short lport = 0;
 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
 	int error, prison = 0;
 
 	if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
 		return (EADDRNOTAVAIL);
 	if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
 		return (EINVAL);
 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
 		wild = 1;
 	if (nam) {
 		sin = (struct sockaddr_in *)nam;
 		if (nam->sa_len != sizeof (*sin))
 			return (EINVAL);
 #ifdef notdef
 		/*
 		 * We should check the family, but old programs
 		 * incorrectly fail to initialize it.
 		 */
 		if (sin->sin_family != AF_INET)
 			return (EAFNOSUPPORT);
 #endif
 		if (sin->sin_addr.s_addr != INADDR_ANY)
 			if (prison_ip(p, 0, &sin->sin_addr.s_addr))
 				return(EINVAL);
 		lport = sin->sin_port;
 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
 			/*
 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
 			 * allow complete duplication of binding if
 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
 			 * and a multicast address is bound on both
 			 * new and duplicated sockets.
 			 */
 			if (so->so_options & SO_REUSEADDR)
 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
 			sin->sin_port = 0;		/* yech... */
 			if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
 				return (EADDRNOTAVAIL);
 		}
 		if (lport) {
 			struct inpcb *t;
 
 			/* GROSS */
 			if (ntohs(lport) < IPPORT_RESERVED && p &&
 			    suser_xxx(0, p, PRISON_ROOT))
 				return (EACCES);
 			if (p && p->p_prison)
 				prison = 1;
 			if (so->so_cred->cr_uid != 0 &&
 			    !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
 				t = in_pcblookup_local(inp->inp_pcbinfo,
 				    sin->sin_addr, lport,
 				    prison ? 0 :  INPLOOKUP_WILDCARD);
 				if (t &&
 				    (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
 				     ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
 				     (t->inp_socket->so_options &
 					 SO_REUSEPORT) == 0) &&
 				    (so->so_cred->cr_uid !=
 				     t->inp_socket->so_cred->cr_uid)) {
 #if defined(INET6)
 					if ((inp->inp_flags &
 					     IN6P_BINDV6ONLY) != 0 ||
 					    ntohl(sin->sin_addr.s_addr) !=
 					    INADDR_ANY ||
 					    ntohl(t->inp_laddr.s_addr) !=
 					    INADDR_ANY ||
 					    INP_SOCKAF(so) ==
 					    INP_SOCKAF(t->inp_socket))
 #endif /* defined(INET6) */
 					return (EADDRINUSE);
 				}
 			}
 			t = in_pcblookup_local(pcbinfo, sin->sin_addr,
 			    lport, prison ? 0 : wild);
 			if (t &&
 			    (reuseport & t->inp_socket->so_options) == 0) {
 #if defined(INET6)
 				if ((inp->inp_flags & IN6P_BINDV6ONLY) != 0 ||
 				    ntohl(sin->sin_addr.s_addr) !=
 				    INADDR_ANY ||
 				    ntohl(t->inp_laddr.s_addr) !=
 				    INADDR_ANY ||
 				    INP_SOCKAF(so) ==
 				    INP_SOCKAF(t->inp_socket))
 #endif /* defined(INET6) */
 				return (EADDRINUSE);
 			}
 		}
 		inp->inp_laddr = sin->sin_addr;
 	}
 	if (lport == 0) {
 		ushort first, last;
 		int count;
 
 		if (inp->inp_laddr.s_addr != INADDR_ANY)
 			if (prison_ip(p, 0, &inp->inp_laddr.s_addr ))
 				return (EINVAL);
 		inp->inp_flags |= INP_ANONPORT;
 
 		if (inp->inp_flags & INP_HIGHPORT) {
 			first = ipport_hifirstauto;	/* sysctl */
 			last  = ipport_hilastauto;
 			lastport = &pcbinfo->lasthi;
 		} else if (inp->inp_flags & INP_LOWPORT) {
 			if (p && (error = suser_xxx(0, p, PRISON_ROOT)))
 				return error;
 			first = ipport_lowfirstauto;	/* 1023 */
 			last  = ipport_lowlastauto;	/* 600 */
 			lastport = &pcbinfo->lastlow;
 		} else {
 			first = ipport_firstauto;	/* sysctl */
 			last  = ipport_lastauto;
 			lastport = &pcbinfo->lastport;
 		}
 		/*
 		 * Simple check to ensure all ports are not used up causing
 		 * a deadlock here.
 		 *
 		 * We split the two cases (up and down) so that the direction
 		 * is not being tested on each round of the loop.
 		 */
 		if (first > last) {
 			/*
 			 * counting down
 			 */
 			count = first - last;
 
 			do {
 				if (count-- < 0) {	/* completely used? */
 					/*
 					 * Undo any address bind that may have
 					 * occurred above.
 					 */
 					inp->inp_laddr.s_addr = INADDR_ANY;
 					return (EAGAIN);
 				}
 				--*lastport;
 				if (*lastport > first || *lastport < last)
 					*lastport = first;
 				lport = htons(*lastport);
 			} while (in_pcblookup_local(pcbinfo,
 				 inp->inp_laddr, lport, wild));
 		} else {
 			/*
 			 * counting up
 			 */
 			count = last - first;
 
 			do {
 				if (count-- < 0) {	/* completely used? */
 					/*
 					 * Undo any address bind that may have
 					 * occurred above.
 					 */
 					inp->inp_laddr.s_addr = INADDR_ANY;
 					return (EAGAIN);
 				}
 				++*lastport;
 				if (*lastport < first || *lastport > last)
 					*lastport = first;
 				lport = htons(*lastport);
 			} while (in_pcblookup_local(pcbinfo,
 				 inp->inp_laddr, lport, wild));
 		}
 	}
 	inp->inp_lport = lport;
 	if (prison_ip(p, 0, &inp->inp_laddr.s_addr))
 		return(EINVAL);
 	if (in_pcbinshash(inp) != 0) {
 		inp->inp_laddr.s_addr = INADDR_ANY;
 		inp->inp_lport = 0;
 		return (EAGAIN);
 	}
 	return (0);
 }
 
 /*
  *   Transform old in_pcbconnect() into an inner subroutine for new
  *   in_pcbconnect(): Do some validity-checking on the remote
  *   address (in mbuf 'nam') and then determine local host address
  *   (i.e., which interface) to use to access that remote host.
  *
  *   This preserves definition of in_pcbconnect(), while supporting a
  *   slightly different version for T/TCP.  (This is more than
  *   a bit of a kludge, but cleaning up the internal interfaces would
  *   have forced minor changes in every protocol).
  */
 
 int
 in_pcbladdr(inp, nam, plocal_sin)
 	register struct inpcb *inp;
 	struct sockaddr *nam;
 	struct sockaddr_in **plocal_sin;
 {
 	struct in_ifaddr *ia;
 	register struct sockaddr_in *sin = (struct sockaddr_in *)nam;
 
 	if (nam->sa_len != sizeof (*sin))
 		return (EINVAL);
 	if (sin->sin_family != AF_INET)
 		return (EAFNOSUPPORT);
 	if (sin->sin_port == 0)
 		return (EADDRNOTAVAIL);
 	if (!TAILQ_EMPTY(&in_ifaddrhead)) {
 		/*
 		 * If the destination address is INADDR_ANY,
 		 * use the primary local address.
 		 * If the supplied address is INADDR_BROADCAST,
 		 * and the primary interface supports broadcast,
 		 * choose the broadcast address for that interface.
 		 */
 #define	satosin(sa)	((struct sockaddr_in *)(sa))
 #define sintosa(sin)	((struct sockaddr *)(sin))
 #define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
 		if (sin->sin_addr.s_addr == INADDR_ANY)
 		    sin->sin_addr = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr;
 		else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
 		  (in_ifaddrhead.tqh_first->ia_ifp->if_flags & IFF_BROADCAST))
 		    sin->sin_addr = satosin(&in_ifaddrhead.tqh_first->ia_broadaddr)->sin_addr;
 	}
 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
 		register struct route *ro;
 
 		ia = (struct in_ifaddr *)0;
 		/*
 		 * If route is known or can be allocated now,
 		 * our src addr is taken from the i/f, else punt.
 		 */
 		ro = &inp->inp_route;
 		if (ro->ro_rt &&
 		    (satosin(&ro->ro_dst)->sin_addr.s_addr !=
 			sin->sin_addr.s_addr ||
 		    inp->inp_socket->so_options & SO_DONTROUTE)) {
 			RTFREE(ro->ro_rt);
 			ro->ro_rt = (struct rtentry *)0;
 		}
 		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
 		    (ro->ro_rt == (struct rtentry *)0 ||
 		    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
 			/* No route yet, so try to acquire one */
 			ro->ro_dst.sa_family = AF_INET;
 			ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
 			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
 				sin->sin_addr;
 			rtalloc(ro);
 		}
 		/*
 		 * If we found a route, use the address
 		 * corresponding to the outgoing interface
 		 * unless it is the loopback (in case a route
 		 * to our address on another net goes to loopback).
 		 */
 		if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
 			ia = ifatoia(ro->ro_rt->rt_ifa);
 		if (ia == 0) {
 			u_short fport = sin->sin_port;
 
 			sin->sin_port = 0;
 			ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
 			if (ia == 0)
 				ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
 			sin->sin_port = fport;
 			if (ia == 0)
 				ia = in_ifaddrhead.tqh_first;
 			if (ia == 0)
 				return (EADDRNOTAVAIL);
 		}
 		/*
 		 * If the destination address is multicast and an outgoing
 		 * interface has been set as a multicast option, use the
 		 * address of that interface as our source address.
 		 */
 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
 		    inp->inp_moptions != NULL) {
 			struct ip_moptions *imo;
 			struct ifnet *ifp;
 
 			imo = inp->inp_moptions;
 			if (imo->imo_multicast_ifp != NULL) {
 				ifp = imo->imo_multicast_ifp;
 				for (ia = in_ifaddrhead.tqh_first; ia;
 				     ia = ia->ia_link.tqe_next)
 					if (ia->ia_ifp == ifp)
 						break;
 				if (ia == 0)
 					return (EADDRNOTAVAIL);
 			}
 		}
 	/*
 	 * Don't do pcblookup call here; return interface in plocal_sin
 	 * and exit to caller, that will do the lookup.
 	 */
 		*plocal_sin = &ia->ia_addr;
 
 	}
 	return(0);
 }
 
 /*
  * Outer subroutine:
  * Connect from a socket to a specified address.
  * Both address and port must be specified in argument sin.
  * If don't have a local address for this socket yet,
  * then pick one.
  */
 int
 in_pcbconnect(inp, nam, p)
 	register struct inpcb *inp;
 	struct sockaddr *nam;
 	struct proc *p;
 {
 	struct sockaddr_in *ifaddr;
 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
 	struct sockaddr_in sa;
 	int error;
 
 	if (inp->inp_laddr.s_addr == INADDR_ANY && p->p_prison != NULL) {
 		bzero(&sa, sizeof (sa));
 		sa.sin_addr.s_addr = htonl(p->p_prison->pr_ip);
 		sa.sin_len=sizeof (sa);
 		sa.sin_family = AF_INET;
 		error = in_pcbbind(inp, (struct sockaddr *)&sa, p);
 		if (error)
 		    return (error);
 	}
 	/*
 	 *   Call inner routine, to assign local interface address.
 	 */
 	if ((error = in_pcbladdr(inp, nam, &ifaddr)) != 0)
 		return(error);
 
 	if (in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
 	    inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
 	    inp->inp_lport, 0, NULL) != NULL) {
 		return (EADDRINUSE);
 	}
 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
 		if (inp->inp_lport == 0) {
 			error = in_pcbbind(inp, (struct sockaddr *)0, p);
 			if (error)
 			    return (error);
 		}
 		inp->inp_laddr = ifaddr->sin_addr;
 	}
 	inp->inp_faddr = sin->sin_addr;
 	inp->inp_fport = sin->sin_port;
 	in_pcbrehash(inp);
 	return (0);
 }
 
 void
 in_pcbdisconnect(inp)
 	struct inpcb *inp;
 {
 
 	inp->inp_faddr.s_addr = INADDR_ANY;
 	inp->inp_fport = 0;
 	in_pcbrehash(inp);
 	if (inp->inp_socket->so_state & SS_NOFDREF)
 		in_pcbdetach(inp);
 }
 
 void
 in_pcbdetach(inp)
 	struct inpcb *inp;
 {
 	struct socket *so = inp->inp_socket;
 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
 	struct rtentry *rt  = inp->inp_route.ro_rt;
 
 #ifdef IPSEC
 	ipsec4_delete_pcbpolicy(inp);
 #endif /*IPSEC*/
 	inp->inp_gencnt = ++ipi->ipi_gencnt;
 	in_pcbremlists(inp);
 	so->so_pcb = 0;
 	sofree(so);
 	if (inp->inp_options)
 		(void)m_free(inp->inp_options);
 	if (rt) {
 		/* 
 		 * route deletion requires reference count to be <= zero 
 		 */
 		if ((rt->rt_flags & RTF_DELCLONE) &&
 		    (rt->rt_flags & RTF_WASCLONED)) {
 			if (--rt->rt_refcnt <= 0) {
 				rt->rt_flags &= ~RTF_UP;
 				rtrequest(RTM_DELETE, rt_key(rt),
 					  rt->rt_gateway, rt_mask(rt),
 					  rt->rt_flags, (struct rtentry **)0);
 			}
 			else
 				/* 
 				 * more than one reference, bump it up 
 				 * again.
 				 */
 				rt->rt_refcnt++;
 		}
 		else
 			rtfree(rt);
 	}
 	ip_freemoptions(inp->inp_moptions);
 	inp->inp_vflag = 0;
-	zfreei(ipi->ipi_zone, inp);
+	zfree(ipi->ipi_zone, inp);
 }
 
 /*
  * The calling convention of in_setsockaddr() and in_setpeeraddr() was
  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
  * in struct pr_usrreqs, so that protocols can just reference then directly
  * without the need for a wrapper function.  The socket must have a valid
  * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
  * except through a kernel programming error, so it is acceptable to panic
  * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
  * because there actually /is/ a programming error somewhere... XXX)
  */
 int
 in_setsockaddr(so, nam)
 	struct socket *so;
 	struct sockaddr **nam;
 {
 	int s;
 	register struct inpcb *inp;
 	register struct sockaddr_in *sin;
 
 	/*
 	 * Do the malloc first in case it blocks.
 	 */
 	MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
 		M_WAITOK | M_ZERO);
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 
 	s = splnet();
 	inp = sotoinpcb(so);
 	if (!inp) {
 		splx(s);
 		free(sin, M_SONAME);
 		return ECONNRESET;
 	}
 	sin->sin_port = inp->inp_lport;
 	sin->sin_addr = inp->inp_laddr;
 	splx(s);
 
 	*nam = (struct sockaddr *)sin;
 	return 0;
 }
 
 int
 in_setpeeraddr(so, nam)
 	struct socket *so;
 	struct sockaddr **nam;
 {
 	int s;
 	struct inpcb *inp;
 	register struct sockaddr_in *sin;
 
 	/*
 	 * Do the malloc first in case it blocks.
 	 */
 	MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
 		M_WAITOK | M_ZERO);
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 
 	s = splnet();
 	inp = sotoinpcb(so);
 	if (!inp) {
 		splx(s);
 		free(sin, M_SONAME);
 		return ECONNRESET;
 	}
 	sin->sin_port = inp->inp_fport;
 	sin->sin_addr = inp->inp_faddr;
 	splx(s);
 
 	*nam = (struct sockaddr *)sin;
 	return 0;
 }
 
 /*
  * Pass some notification to all connections of a protocol
  * associated with address dst.  The local address and/or port numbers
  * may be specified to limit the search.  The "usual action" will be
  * taken, depending on the ctlinput cmd.  The caller must filter any
  * cmds that are uninteresting (e.g., no error in the map).
  * Call the protocol specific routine (if any) to report
  * any errors for each matching socket.
  *
  * If tcp_seq_check != 0 it also checks if tcp_sequence is
  * a valid TCP sequence number for the session.
  */
 void
 in_pcbnotify(head, dst, fport_arg, laddr, lport_arg, cmd, notify, tcp_sequence, tcp_seq_check)
 	struct inpcbhead *head;
 	struct sockaddr *dst;
 	u_int fport_arg, lport_arg;
 	struct in_addr laddr;
 	int cmd;
 	void (*notify) __P((struct inpcb *, int));
 	u_int32_t tcp_sequence;
 	int tcp_seq_check;
 {
 	register struct inpcb *inp, *oinp;
 	struct in_addr faddr;
 	u_short fport = fport_arg, lport = lport_arg;
 	int errno, s;
 
 	if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET)
 		return;
 	faddr = ((struct sockaddr_in *)dst)->sin_addr;
 	if (faddr.s_addr == INADDR_ANY)
 		return;
 
 	/*
 	 * Redirects go to all references to the destination,
 	 * and use in_rtchange to invalidate the route cache.
 	 * Dead host indications: notify all references to the destination.
 	 * Otherwise, if we have knowledge of the local port and address,
 	 * deliver only to that socket.
 	 */
 	if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
 		fport = 0;
 		lport = 0;
 		laddr.s_addr = 0;
 		if (cmd != PRC_HOSTDEAD)
 			notify = in_rtchange;
 	}
 	errno = inetctlerrmap[cmd];
 	s = splnet();
 	for (inp = head->lh_first; inp != NULL;) {
 #ifdef INET6
 		if ((inp->inp_vflag & INP_IPV4) == 0) {
 			inp = LIST_NEXT(inp, inp_list);
 			continue;
 		}
 #endif
 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
 		    inp->inp_socket == 0 ||
 		    (lport && inp->inp_lport != lport) ||
 		    (laddr.s_addr && inp->inp_laddr.s_addr != laddr.s_addr) ||
 		    (fport && inp->inp_fport != fport)) {
 			inp = inp->inp_list.le_next;
 			continue;
 		}
 		/*
 		 * If tcp_seq_check is set, then skip sessions where
 		 * the sequence number is not one of a unacknowledged
 		 * packet.
 		 *
 		 * If it doesn't match, we break the loop, as only a
 		 * single session can match on src/dst ip addresses 
 		 * and TCP port numbers.
 		 */
 		if ((tcp_seq_check == 1) && (tcp_seq_vs_sess(inp, tcp_sequence) == 0)) {
 			inp = inp->inp_list.le_next;
 			break;
 		}
 		oinp = inp;
 		inp = inp->inp_list.le_next;
 		if (notify)
 			(*notify)(oinp, errno);
 	}
 	splx(s);
 }
 
 /*
  * Check for alternatives when higher level complains
  * about service problems.  For now, invalidate cached
  * routing information.  If the route was created dynamically
  * (by a redirect), time to try a default gateway again.
  */
 void
 in_losing(inp)
 	struct inpcb *inp;
 {
 	register struct rtentry *rt;
 	struct rt_addrinfo info;
 
 	if ((rt = inp->inp_route.ro_rt)) {
 		inp->inp_route.ro_rt = 0;
 		bzero((caddr_t)&info, sizeof(info));
 		info.rti_info[RTAX_DST] =
 			(struct sockaddr *)&inp->inp_route.ro_dst;
 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
 		if (rt->rt_flags & RTF_DYNAMIC)
 			(void) rtrequest(RTM_DELETE, rt_key(rt),
 				rt->rt_gateway, rt_mask(rt), rt->rt_flags,
 				(struct rtentry **)0);
 		else
 		/*
 		 * A new route can be allocated
 		 * the next time output is attempted.
 		 */
 			rtfree(rt);
 	}
 }
 
 /*
  * After a routing change, flush old routing
  * and allocate a (hopefully) better one.
  */
 static void
 in_rtchange(inp, errno)
 	register struct inpcb *inp;
 	int errno;
 {
 	if (inp->inp_route.ro_rt) {
 		rtfree(inp->inp_route.ro_rt);
 		inp->inp_route.ro_rt = 0;
 		/*
 		 * A new route can be allocated the next time
 		 * output is attempted.
 		 */
 	}
 }
 
 /*
  * Lookup a PCB based on the local address and port.
  */
 struct inpcb *
 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
 	struct inpcbinfo *pcbinfo;
 	struct in_addr laddr;
 	u_int lport_arg;
 	int wild_okay;
 {
 	register struct inpcb *inp;
 	int matchwild = 3, wildcard;
 	u_short lport = lport_arg;
 
 	if (!wild_okay) {
 		struct inpcbhead *head;
 		/*
 		 * Look for an unconnected (wildcard foreign addr) PCB that
 		 * matches the local address and port we're looking for.
 		 */
 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
 		for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
 #ifdef INET6
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
 			    inp->inp_laddr.s_addr == laddr.s_addr &&
 			    inp->inp_lport == lport) {
 				/*
 				 * Found.
 				 */
 				return (inp);
 			}
 		}
 		/*
 		 * Not found.
 		 */
 		return (NULL);
 	} else {
 		struct inpcbporthead *porthash;
 		struct inpcbport *phd;
 		struct inpcb *match = NULL;
 		/*
 		 * Best fit PCB lookup.
 		 *
 		 * First see if this local port is in use by looking on the
 		 * port hash list.
 		 */
 		porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
 		    pcbinfo->porthashmask)];
 		for (phd = porthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) {
 			if (phd->phd_port == lport)
 				break;
 		}
 		if (phd != NULL) {
 			/*
 			 * Port is in use by one or more PCBs. Look for best
 			 * fit.
 			 */
 			for (inp = phd->phd_pcblist.lh_first; inp != NULL;
 			    inp = inp->inp_portlist.le_next) {
 				wildcard = 0;
 #ifdef INET6
 				if ((inp->inp_vflag & INP_IPV4) == 0)
 					continue;
 #endif
 				if (inp->inp_faddr.s_addr != INADDR_ANY)
 					wildcard++;
 				if (inp->inp_laddr.s_addr != INADDR_ANY) {
 					if (laddr.s_addr == INADDR_ANY)
 						wildcard++;
 					else if (inp->inp_laddr.s_addr != laddr.s_addr)
 						continue;
 				} else {
 					if (laddr.s_addr != INADDR_ANY)
 						wildcard++;
 				}
 				if (wildcard < matchwild) {
 					match = inp;
 					matchwild = wildcard;
 					if (matchwild == 0) {
 						break;
 					}
 				}
 			}
 		}
 		return (match);
 	}
 }
 
 /*
  * Lookup PCB in hash list.
  */
 struct inpcb *
 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard,
 		  ifp)
 	struct inpcbinfo *pcbinfo;
 	struct in_addr faddr, laddr;
 	u_int fport_arg, lport_arg;
 	int wildcard;
 	struct ifnet *ifp;
 {
 	struct inpcbhead *head;
 	register struct inpcb *inp;
 	u_short fport = fport_arg, lport = lport_arg;
 
 	/*
 	 * First look for an exact match.
 	 */
 	head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
 	for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
 #ifdef INET6
 		if ((inp->inp_vflag & INP_IPV4) == 0)
 			continue;
 #endif
 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
 		    inp->inp_laddr.s_addr == laddr.s_addr &&
 		    inp->inp_fport == fport &&
 		    inp->inp_lport == lport) {
 			/*
 			 * Found.
 			 */
 			return (inp);
 		}
 	}
 	if (wildcard) {
 		struct inpcb *local_wild = NULL;
 #if defined(INET6)
 		struct inpcb *local_wild_mapped = NULL;
 #endif /* defined(INET6) */
 
 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
 		for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
 #ifdef INET6
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
 			    inp->inp_lport == lport) {
 #if defined(NFAITH) && NFAITH > 0
 				if (ifp && ifp->if_type == IFT_FAITH &&
 				    (inp->inp_flags & INP_FAITH) == 0)
 					continue;
 #endif
 				if (inp->inp_laddr.s_addr == laddr.s_addr)
 					return (inp);
 				else if (inp->inp_laddr.s_addr == INADDR_ANY) {
 #if defined(INET6)
 					if (INP_CHECK_SOCKAF(inp->inp_socket,
 							     AF_INET6))
 						local_wild_mapped = inp;
 					else
 #endif /* defined(INET6) */
 					local_wild = inp;
 				}
 			}
 		}
 #if defined(INET6)
 		if (local_wild == NULL)
 			return (local_wild_mapped);
 #endif /* defined(INET6) */
 		return (local_wild);
 	}
 
 	/*
 	 * Not found.
 	 */
 	return (NULL);
 }
 
 /*
  * Insert PCB onto various hash lists.
  */
 int
 in_pcbinshash(inp)
 	struct inpcb *inp;
 {
 	struct inpcbhead *pcbhash;
 	struct inpcbporthead *pcbporthash;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbport *phd;
 	u_int32_t hashkey_faddr;
 
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6)
 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
 	else
 #endif /* INET6 */
 	hashkey_faddr = inp->inp_faddr.s_addr;
 
 	pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
 		 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)];
 
 	pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
 	    pcbinfo->porthashmask)];
 
 	/*
 	 * Go through port list and look for a head for this lport.
 	 */
 	for (phd = pcbporthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) {
 		if (phd->phd_port == inp->inp_lport)
 			break;
 	}
 	/*
 	 * If none exists, malloc one and tack it on.
 	 */
 	if (phd == NULL) {
 		MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT);
 		if (phd == NULL) {
 			return (ENOBUFS); /* XXX */
 		}
 		phd->phd_port = inp->inp_lport;
 		LIST_INIT(&phd->phd_pcblist);
 		LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
 	}
 	inp->inp_phd = phd;
 	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
 	return (0);
 }
 
 /*
  * Move PCB to the proper hash bucket when { faddr, fport } have  been
  * changed. NOTE: This does not handle the case of the lport changing (the
  * hashed port list would have to be updated as well), so the lport must
  * not change after in_pcbinshash() has been called.
  */
 void
 in_pcbrehash(inp)
 	struct inpcb *inp;
 {
 	struct inpcbhead *head;
 	u_int32_t hashkey_faddr;
 
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6)
 		hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
 	else
 #endif /* INET6 */
 	hashkey_faddr = inp->inp_faddr.s_addr;
 
 	head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
 		inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)];
 
 	LIST_REMOVE(inp, inp_hash);
 	LIST_INSERT_HEAD(head, inp, inp_hash);
 }
 
 /*
  * Remove PCB from various lists.
  */
 void
 in_pcbremlists(inp)
 	struct inpcb *inp;
 {
 	inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
 	if (inp->inp_lport) {
 		struct inpcbport *phd = inp->inp_phd;
 
 		LIST_REMOVE(inp, inp_hash);
 		LIST_REMOVE(inp, inp_portlist);
 		if (phd->phd_pcblist.lh_first == NULL) {
 			LIST_REMOVE(phd, phd_hash);
 			free(phd, M_PCB);
 		}
 	}
 	LIST_REMOVE(inp, inp_list);
 	inp->inp_pcbinfo->ipi_count--;
 }
 
 int
 prison_xinpcb(struct proc *p, struct inpcb *inp)
 {
 	if (!p->p_prison)
 		return (0);
 	if (ntohl(inp->inp_laddr.s_addr) == p->p_prison->pr_ip)
 		return (0);
 	return (1);
 }
Index: head/sys/netinet6/in6_pcb.c
===================================================================
--- head/sys/netinet6/in6_pcb.c	(revision 71349)
+++ head/sys/netinet6/in6_pcb.c	(revision 71350)
@@ -1,1061 +1,1061 @@
 /*	$FreeBSD$	*/
 /*	$KAME: in6_pcb.c,v 1.8 2000/06/09 00:37:02 itojun Exp $	*/
   
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 /*
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.c	8.2 (Berkeley) 1/4/94
  */
 
 #include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/proc.h>
 #include <sys/jail.h>
 
 #include <vm/vm_zone.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip6.h>
 #include <netinet/ip_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet/in_pcb.h>
 #include <netinet6/in6_pcb.h>
 
 #include "faith.h"
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #include <netinet6/ah.h>
 #include <netinet6/ipsec6.h>
 #include <netinet6/ah6.h>
 #include <netkey/key.h>
 #endif /* IPSEC */
 
 struct	in6_addr zeroin6_addr;
 
 int
 in6_pcbbind(inp, nam, p)
 	register struct inpcb *inp;
 	struct sockaddr *nam;
 	struct proc *p;
 {
 	struct socket *so = inp->inp_socket;
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	u_short	lport = 0;
 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
 
 	if (!in6_ifaddr) /* XXX broken! */
 		return (EADDRNOTAVAIL);
 	if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
 		return(EINVAL);
 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
 		wild = 1;
 	if (nam) {
 		sin6 = (struct sockaddr_in6 *)nam;
 		if (nam->sa_len != sizeof(*sin6))
 			return(EINVAL);
 		/*
 		 * family check.
 		 */
 		if (nam->sa_family != AF_INET6)
 			return(EAFNOSUPPORT);
 
 		/* KAME hack: embed scopeid */
 		if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL) != 0)
 			return EINVAL;
 		/* this must be cleared for ifa_ifwithaddr() */
 		sin6->sin6_scope_id = 0;
 
 		lport = sin6->sin6_port;
 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 			/*
 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
 			 * allow compepte duplication of binding if
 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
 			 * and a multicast address is bound on both
 			 * new and duplicated sockets.
 			 */
 			if (so->so_options & SO_REUSEADDR)
 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			struct ifaddr *ia = NULL;
 
 			sin6->sin6_port = 0;		/* yech... */
 			if ((ia = ifa_ifwithaddr((struct sockaddr *)sin6)) == 0)
 				return(EADDRNOTAVAIL);
 
 			/*
 			 * XXX: bind to an anycast address might accidentally
 			 * cause sending a packet with anycast source address.
 			 */
 			if (ia &&
 			    ((struct in6_ifaddr *)ia)->ia6_flags &
 			    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|
 			     IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) {
 				return(EADDRNOTAVAIL);
 			}
 		}
 		if (lport) {
 			struct inpcb *t;
 
 			/* GROSS */
 			if (ntohs(lport) < IPV6PORT_RESERVED && p &&
 			    suser_xxx(0, p, PRISON_ROOT))
 				return(EACCES);
 			if (so->so_cred->cr_uid != 0 &&
 			    !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 				t = in6_pcblookup_local(pcbinfo,
 				    &sin6->sin6_addr, lport,
 				    INPLOOKUP_WILDCARD);
 				if (t &&
 				    (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
 				     !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
 				     (t->inp_socket->so_options &
 				      SO_REUSEPORT) == 0) &&
 				    (so->so_cred->cr_uid !=
 				     t->inp_socket->so_cred->cr_uid))
 					return (EADDRINUSE);
 				if (ip6_mapped_addr_on != 0 &&
 				    IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 					struct sockaddr_in sin;
 
 					in6_sin6_2_sin(&sin, sin6);
 					t = in_pcblookup_local(pcbinfo,
 						sin.sin_addr, lport,
 						INPLOOKUP_WILDCARD);
 					if (t &&
 					    (so->so_cred->cr_uid !=
 					     t->inp_socket->so_cred->cr_uid) &&
 					    (ntohl(t->inp_laddr.s_addr) !=
 					     INADDR_ANY ||
 					     INP_SOCKAF(so) ==
 					     INP_SOCKAF(t->inp_socket)))
 						return (EADDRINUSE);
 				}
 			}
 			t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr,
 						lport, wild);
 			if (t && (reuseport & t->inp_socket->so_options) == 0)
 				return(EADDRINUSE);
 			if (ip6_mapped_addr_on != 0 &&
 			    IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 				struct sockaddr_in sin;
 
 				in6_sin6_2_sin(&sin, sin6);
 				t = in_pcblookup_local(pcbinfo, sin.sin_addr,
 						       lport, wild);
 				if (t &&
 				    (reuseport & t->inp_socket->so_options)
 				    == 0 &&
 				    (ntohl(t->inp_laddr.s_addr)
 				     != INADDR_ANY ||
 				     INP_SOCKAF(so) ==
 				     INP_SOCKAF(t->inp_socket)))
 					return (EADDRINUSE);
 			}
 		}
 		inp->in6p_laddr = sin6->sin6_addr;
 	}
 	if (lport == 0) {
 		int e;
 		if ((e = in6_pcbsetport(&inp->in6p_laddr, inp, p)) != 0)
 			return(e);
 	}
 	else {
 		inp->inp_lport = lport;
 		if (in_pcbinshash(inp) != 0) {
 			inp->in6p_laddr = in6addr_any;
 			inp->inp_lport = 0;
 			return (EAGAIN);
 		}
 	}
 	inp->in6p_flowinfo = sin6 ? sin6->sin6_flowinfo : 0;	/*XXX*/
 	return(0);
 }
 
 /*
  *   Transform old in6_pcbconnect() into an inner subroutine for new
  *   in6_pcbconnect(): Do some validity-checking on the remote
  *   address (in mbuf 'nam') and then determine local host address
  *   (i.e., which interface) to use to access that remote host.
  *
  *   This preserves definition of in6_pcbconnect(), while supporting a
  *   slightly different version for T/TCP.  (This is more than
  *   a bit of a kludge, but cleaning up the internal interfaces would
  *   have forced minor changes in every protocol).
  */
 
 int
 in6_pcbladdr(inp, nam, plocal_addr6)
 	register struct inpcb *inp;
 	struct sockaddr *nam;
 	struct in6_addr **plocal_addr6;
 {
 	register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
 	struct in6_pktinfo *pi;
 	struct ifnet *ifp = NULL;
 	int error = 0;
 
 	if (nam->sa_len != sizeof (*sin6))
 		return (EINVAL);
 	if (sin6->sin6_family != AF_INET6)
 		return (EAFNOSUPPORT);
 	if (sin6->sin6_port == 0)
 		return (EADDRNOTAVAIL);
 
 	/* KAME hack: embed scopeid */
 	if (in6_embedscope(&sin6->sin6_addr, sin6, inp, &ifp) != 0)
 		return EINVAL;
 
 	if (in6_ifaddr) {
 		/*
 		 * If the destination address is UNSPECIFIED addr,
 		 * use the loopback addr, e.g ::1.
 		 */
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
 			sin6->sin6_addr = in6addr_loopback;
 	}
 	{
 		/*
 		 * XXX: in6_selectsrc might replace the bound local address
 		 * with the address specified by setsockopt(IPV6_PKTINFO).
 		 * Is it the intended behavior?
 		 */
 		*plocal_addr6 = in6_selectsrc(sin6, inp->in6p_outputopts,
 					      inp->in6p_moptions,
 					      &inp->in6p_route,
 					      &inp->in6p_laddr, &error);
 		if (*plocal_addr6 == 0) {
 			if (error == 0)
 				error = EADDRNOTAVAIL;
 			return(error);
 		}
 		/*
 		 * Don't do pcblookup call here; return interface in
 		 * plocal_addr6
 		 * and exit to caller, that will do the lookup.
 		 */
 	}
 
 	if (inp->in6p_route.ro_rt)
 		ifp = inp->in6p_route.ro_rt->rt_ifp;
 
 	return(0);
 }
 
 /*
  * Outer subroutine:
  * Connect from a socket to a specified address.
  * Both address and port must be specified in argument sin.
  * If don't have a local address for this socket yet,
  * then pick one.
  */
 int
 in6_pcbconnect(inp, nam, p)
 	register struct inpcb *inp;
 	struct sockaddr *nam;
 	struct proc *p;
 {
 	struct in6_addr *addr6;
 	register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
 	int error;
 
 	/*
 	 *   Call inner routine, to assign local interface address.
 	 */
 	if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0)
 		return(error);
 
 	if (in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr,
 			       sin6->sin6_port,
 			      IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
 			      ? addr6 : &inp->in6p_laddr,
 			      inp->inp_lport, 0, NULL) != NULL) {
 		return (EADDRINUSE);
 	}
 	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
 		if (inp->inp_lport == 0) {
 			error = in6_pcbbind(inp, (struct sockaddr *)0, p);
 			if (error)
 				return (error);
 		}
 		inp->in6p_laddr = *addr6;
 	}
 	inp->in6p_faddr = sin6->sin6_addr;
 	inp->inp_fport = sin6->sin6_port;
 	/*
 	 * xxx kazu flowlabel is necessary for connect?
 	 * but if this line is missing, the garbage value remains.
 	 */
 	inp->in6p_flowinfo = sin6->sin6_flowinfo;
 	if ((inp->in6p_flowinfo & IPV6_FLOWLABEL_MASK) == 0 &&
 	    ip6_auto_flowlabel != 0)
 		inp->in6p_flowinfo |=
 			(htonl(ip6_flow_seq++) & IPV6_FLOWLABEL_MASK);
 
 	in_pcbrehash(inp);
 	return (0);
 }
 
 #if 0
 /*
  * Return an IPv6 address, which is the most appropriate for given
  * destination and user specified options.
  * If necessary, this function lookups the routing table and return
  * an entry to the caller for later use.
  */
 struct in6_addr *
 in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp)
 	struct sockaddr_in6 *dstsock;
 	struct ip6_pktopts *opts;
 	struct ip6_moptions *mopts;
 	struct route_in6 *ro;
 	struct in6_addr *laddr;
 	int *errorp;
 {
 	struct in6_addr *dst;
 	struct in6_ifaddr *ia6 = 0;
 	struct in6_pktinfo *pi = NULL;
 
 	dst = &dstsock->sin6_addr;
 	*errorp = 0;
 
 	/*
 	 * If the source address is explicitly specified by the caller,
 	 * use it.
 	 */
 	if (opts && (pi = opts->ip6po_pktinfo) &&
 	    !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr))
 		return(&pi->ipi6_addr);
 
 	/*
 	 * If the source address is not specified but the socket(if any)
 	 * is already bound, use the bound address.
 	 */
 	if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr))
 		return(laddr);
 
 	/*
 	 * If the caller doesn't specify the source address but
 	 * the outgoing interface, use an address associated with
 	 * the interface.
 	 */
 	if (pi && pi->ipi6_ifindex) {
 		/* XXX boundary check is assumed to be already done. */
 		ia6 = in6_ifawithscope(ifindex2ifnet[pi->ipi6_ifindex],
 				       dst);
 		if (ia6 == 0) {
 			*errorp = EADDRNOTAVAIL;
 			return(0);
 		}
 		return(&satosin6(&ia6->ia_addr)->sin6_addr);
 	}
 
 	/*
 	 * If the destination address is a link-local unicast address or
 	 * a multicast address, and if the outgoing interface is specified
 	 * by the sin6_scope_id filed, use an address associated with the
 	 * interface.
 	 * XXX: We're now trying to define more specific semantics of
 	 *      sin6_scope_id field, so this part will be rewritten in
 	 *      the near future.
 	 */
 	if ((IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst)) &&
 	    dstsock->sin6_scope_id) {
 		/*
 		 * I'm not sure if boundary check for scope_id is done
 		 * somewhere...
 		 */
 		if (dstsock->sin6_scope_id < 0 ||
 		    if_index < dstsock->sin6_scope_id) {
 			*errorp = ENXIO; /* XXX: better error? */
 			return(0);
 		}
 		ia6 = in6_ifawithscope(ifindex2ifnet[dstsock->sin6_scope_id],
 				       dst);
 		if (ia6 == 0) {
 			*errorp = EADDRNOTAVAIL;
 			return(0);
 		}
 		return(&satosin6(&ia6->ia_addr)->sin6_addr);
 	}
 
 	/*
 	 * If the destination address is a multicast address and
 	 * the outgoing interface for the address is specified
 	 * by the caller, use an address associated with the interface.
 	 * There is a sanity check here; if the destination has node-local
 	 * scope, the outgoing interfacde should be a loopback address.
 	 * Even if the outgoing interface is not specified, we also
 	 * choose a loopback interface as the outgoing interface.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(dst)) {
 		struct ifnet *ifp = mopts ? mopts->im6o_multicast_ifp : NULL;
 
 		if (ifp == NULL && IN6_IS_ADDR_MC_NODELOCAL(dst)) {
 			ifp = &loif[0];
 		}
 
 		if (ifp) {
 			ia6 = in6_ifawithscope(ifp, dst);
 			if (ia6 == 0) {
 				*errorp = EADDRNOTAVAIL;
 				return(0);
 			}
 			return(&ia6->ia_addr.sin6_addr);
 		}
 	}
 
 	/*
 	 * If the next hop address for the packet is specified
 	 * by caller, use an address associated with the route
 	 * to the next hop.
 	 */
 	{
 		struct sockaddr_in6 *sin6_next;
 		struct rtentry *rt;
 
 		if (opts && opts->ip6po_nexthop) {
 			sin6_next = satosin6(opts->ip6po_nexthop);
 			rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL);
 			if (rt) {
 				ia6 = in6_ifawithscope(rt->rt_ifp, dst);
 				if (ia6 == 0)
 					ia6 = ifatoia6(rt->rt_ifa);
 			}
 			if (ia6 == 0) {
 				*errorp = EADDRNOTAVAIL;
 				return(0);
 			}
 			return(&satosin6(&ia6->ia_addr)->sin6_addr);
 		}
 	}
 
 	/*
 	 * If route is known or can be allocated now,
 	 * our src addr is taken from the i/f, else punt.
 	 */
 	if (ro) {
 		if (ro->ro_rt &&
 		    !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst)) {
 			RTFREE(ro->ro_rt);
 			ro->ro_rt = (struct rtentry *)0;
 		}
 		if (ro->ro_rt == (struct rtentry *)0 ||
 		    ro->ro_rt->rt_ifp == (struct ifnet *)0) {
 			/* No route yet, so try to acquire one */
 			bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
 			ro->ro_dst.sin6_family = AF_INET6;
 			ro->ro_dst.sin6_len = sizeof(struct sockaddr_in6);
 			ro->ro_dst.sin6_addr = *dst;
 			if (IN6_IS_ADDR_MULTICAST(dst)) {
 				ro->ro_rt = rtalloc1(&((struct route *)ro)
 						     ->ro_dst, 0, 0UL);
 			} else {
 				rtalloc((struct route *)ro);
 			}
 		}
 
 		/*
 		 * in_pcbconnect() checks out IFF_LOOPBACK to skip using
 		 * the address. But we don't know why it does so.
 		 * It is necessary to ensure the scope even for lo0
 		 * so doesn't check out IFF_LOOPBACK.
 		 */
 
 		if (ro->ro_rt) {
 			ia6 = in6_ifawithscope(ro->ro_rt->rt_ifa->ifa_ifp, dst);
 			if (ia6 == 0) /* xxx scope error ?*/
 				ia6 = ifatoia6(ro->ro_rt->rt_ifa);
 		}
 		if (ia6 == 0) {
 			*errorp = EHOSTUNREACH;	/* no route */
 			return(0);
 		}
 		return(&satosin6(&ia6->ia_addr)->sin6_addr);
 	}
 
 	*errorp = EADDRNOTAVAIL;
 	return(0);
 }
 
 /*
  * Default hop limit selection. The precedence is as follows:
  * 1. Hoplimit valued specified via ioctl.
  * 2. (If the outgoing interface is detected) the current
  *     hop limit of the interface specified by router advertisement.
  * 3. The system default hoplimit.
 */
 int
 in6_selecthlim(in6p, ifp)
 	struct in6pcb *in6p;
 	struct ifnet *ifp;
 {
 	if (in6p && in6p->in6p_hops >= 0)
 		return(in6p->in6p_hops);
 	else if (ifp)
 		return(nd_ifinfo[ifp->if_index].chlim);
 	else
 		return(ip6_defhlim);
 }
 #endif
 
 void
 in6_pcbdisconnect(inp)
 	struct inpcb *inp;
 {
 	bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr));
 	inp->inp_fport = 0;
 	in_pcbrehash(inp);
 	if (inp->inp_socket->so_state & SS_NOFDREF)
 		in6_pcbdetach(inp);
 }
 
 void
 in6_pcbdetach(inp)
 	struct inpcb *inp;
 {
 	struct socket *so = inp->inp_socket;
 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
 
 #ifdef IPSEC
 	if (inp->in6p_sp != NULL)
 		ipsec6_delete_pcbpolicy(inp);
 #endif /* IPSEC */
 	inp->inp_gencnt = ++ipi->ipi_gencnt;
 	in_pcbremlists(inp);
 	sotoinpcb(so) = 0;
 	sofree(so);
 	if (inp->in6p_options)
 		m_freem(inp->in6p_options);
 	if (inp->in6p_outputopts) {
 		if (inp->in6p_outputopts->ip6po_rthdr &&
 		    inp->in6p_outputopts->ip6po_route.ro_rt)
 			RTFREE(inp->in6p_outputopts->ip6po_route.ro_rt);
 		if (inp->in6p_outputopts->ip6po_m)
 			(void)m_free(inp->in6p_outputopts->ip6po_m);
 		free(inp->in6p_outputopts, M_IP6OPT);
 	}
 	if (inp->in6p_route.ro_rt)
 		rtfree(inp->in6p_route.ro_rt);
 	ip6_freemoptions(inp->in6p_moptions);
 
 	/* Check and free IPv4 related resources in case of mapped addr */
 	if (inp->inp_options)
 		(void)m_free(inp->inp_options);
 	ip_freemoptions(inp->inp_moptions);
 
 	inp->inp_vflag = 0;
-	zfreei(ipi->ipi_zone, inp);
+	zfree(ipi->ipi_zone, inp);
 }
 
 /*
  * The calling convention of in6_setsockaddr() and in6_setpeeraddr() was
  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
  * in struct pr_usrreqs, so that protocols can just reference then directly
  * without the need for a wrapper function.  The socket must have a valid
  * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
  * except through a kernel programming error, so it is acceptable to panic
  * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
  * because there actually /is/ a programming error somewhere... XXX)
  */
 int
 in6_setsockaddr(so, nam)
 	struct socket *so;
 	struct sockaddr **nam;
 {
 	int s;
 	register struct inpcb *inp;
 	register struct sockaddr_in6 *sin6;
 
 	/*
 	 * Do the malloc first in case it blocks.
 	 */
 	MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6, M_SONAME, M_WAITOK);
 	bzero(sin6, sizeof *sin6);
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_len = sizeof(*sin6);
 
 	s = splnet();
 	inp = sotoinpcb(so);
 	if (!inp) {
 		splx(s);
 		free(sin6, M_SONAME);
 		return EINVAL;
 	}
 	sin6->sin6_port = inp->inp_lport;
 	sin6->sin6_addr = inp->in6p_laddr;
 	splx(s);
 	if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
 		sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
 	else
 		sin6->sin6_scope_id = 0;	/*XXX*/
 	if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
 		sin6->sin6_addr.s6_addr16[1] = 0;
 
 	*nam = (struct sockaddr *)sin6;
 	return 0;
 }
 
 int
 in6_setpeeraddr(so, nam)
 	struct socket *so;
 	struct sockaddr **nam;
 {
 	int s;
 	struct inpcb *inp;
 	register struct sockaddr_in6 *sin6;
 
 	/*
 	 * Do the malloc first in case it blocks.
 	 */
 	MALLOC(sin6, struct sockaddr_in6 *, sizeof(*sin6), M_SONAME, M_WAITOK);
 	bzero((caddr_t)sin6, sizeof (*sin6));
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_len = sizeof(struct sockaddr_in6);
 
 	s = splnet();
 	inp = sotoinpcb(so);
 	if (!inp) {
 		splx(s);
 		free(sin6, M_SONAME);
 		return EINVAL;
 	}
 	sin6->sin6_port = inp->inp_fport;
 	sin6->sin6_addr = inp->in6p_faddr;
 	splx(s);
 	if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
 		sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
 	else
 		sin6->sin6_scope_id = 0;	/*XXX*/
 	if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
 		sin6->sin6_addr.s6_addr16[1] = 0;
 
 	*nam = (struct sockaddr *)sin6;
 	return 0;
 }
 
 int
 in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error;
 
 	if (inp == NULL)
 		return EINVAL;
 	if (inp->inp_vflag & INP_IPV4) {
 		error = in_setsockaddr(so, nam);
 		if (error == 0)
 			in6_sin_2_v4mapsin6_in_sock(nam);
 	} else
 	error = in6_setsockaddr(so, nam);
 
 	return error;
 }
 
 int
 in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error;
 
 	if (inp == NULL)
 		return EINVAL;
 	if (inp->inp_vflag & INP_IPV4) {
 		error = in_setpeeraddr(so, nam);
 		if (error == 0)
 			in6_sin_2_v4mapsin6_in_sock(nam);
 	} else
 	error = in6_setpeeraddr(so, nam);
 
 	return error;
 }
 
 /*
  * Pass some notification to all connections of a protocol
  * associated with address dst.  The local address and/or port numbers
  * may be specified to limit the search.  The "usual action" will be
  * taken, depending on the ctlinput cmd.  The caller must filter any
  * cmds that are uninteresting (e.g., no error in the map).
  * Call the protocol specific routine (if any) to report
  * any errors for each matching socket.
  *
  * Must be called at splnet.
  */
 void
 in6_pcbnotify(head, dst, fport_arg, laddr6, lport_arg, cmd, notify)
 	struct inpcbhead *head;
 	struct sockaddr *dst;
 	u_int fport_arg, lport_arg;
 	struct in6_addr *laddr6;
 	int cmd;
 	void (*notify) __P((struct inpcb *, int));
 {
 	struct inpcb *inp, *ninp;
 	struct in6_addr faddr6;
 	u_short	fport = fport_arg, lport = lport_arg;
 	int errno, s;
 	int do_rtchange = (notify == in6_rtchange);
 
 	if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET6)
 		return;
 	faddr6 = ((struct sockaddr_in6 *)dst)->sin6_addr;
 	if (IN6_IS_ADDR_UNSPECIFIED(&faddr6))
 		return;
 
 	/*
 	 * Redirects go to all references to the destination,
 	 * and use in6_rtchange to invalidate the route cache.
 	 * Dead host indications: also use in6_rtchange to invalidate
 	 * the cache, and deliver the error to all the sockets.
 	 * Otherwise, if we have knowledge of the local port and address,
 	 * deliver only to that socket.
 	 */
 	if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
 		fport = 0;
 		lport = 0;
 		bzero((caddr_t)laddr6, sizeof(*laddr6));
 
 		do_rtchange = 1;
 	}
 	errno = inet6ctlerrmap[cmd];
 	s = splnet();
  	for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
  		ninp = LIST_NEXT(inp, inp_list);
 
  		if ((inp->inp_vflag & INP_IPV6) == NULL)
 			continue;
 
  		if (do_rtchange) {
  			/*
  			 * Since a non-connected PCB might have a cached route,
  			 * we always call in6_rtchange without matching
  			 * the PCB to the src/dst pair.
  			 *
  			 * XXX: we assume in6_rtchange does not free the PCB.
  			 */
  			if (IN6_ARE_ADDR_EQUAL(&inp->in6p_route.ro_dst.sin6_addr,
  					       &faddr6))
  				in6_rtchange(inp, errno);
 
  			if (notify == in6_rtchange)
  				continue; /* there's nothing to do any more */
   		}
 
 		if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &faddr6) ||
 		   inp->inp_socket == 0 ||
 		   (lport && inp->inp_lport != lport) ||
 		   (!IN6_IS_ADDR_UNSPECIFIED(laddr6) &&
 		    !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr6)) ||
 		    (fport && inp->inp_fport != fport))
 			continue;
 
 		if (notify)
  			(*notify)(inp, errno);
 	}
 	splx(s);
 }
 
 /*
  * Lookup a PCB based on the local address and port.
  */
 struct inpcb *
 in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
 	struct inpcbinfo *pcbinfo;
 	struct in6_addr *laddr;
 	u_int lport_arg;
 	int wild_okay;
 {
 	register struct inpcb *inp;
 	int matchwild = 3, wildcard;
 	u_short lport = lport_arg;
 
 	if (!wild_okay) {
 		struct inpcbhead *head;
 		/*
 		 * Look for an unconnected (wildcard foreign addr) PCB that
 		 * matches the local address and port we're looking for.
 		 */
 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
 						      pcbinfo->hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 			if ((inp->inp_vflag & INP_IPV6) == 0)
 				continue;
 			if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
 			    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
 			    inp->inp_lport == lport) {
 				/*
 				 * Found.
 				 */
 				return (inp);
 			}
 		}
 		/*
 		 * Not found.
 		 */
 		return (NULL);
 	} else {
 		struct inpcbporthead *porthash;
 		struct inpcbport *phd;
 		struct inpcb *match = NULL;
 		/*
 		 * Best fit PCB lookup.
 		 *
 		 * First see if this local port is in use by looking on the
 		 * port hash list.
 		 */
 		porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
 		    pcbinfo->porthashmask)];
 		LIST_FOREACH(phd, porthash, phd_hash) {
 			if (phd->phd_port == lport)
 				break;
 		}
 		if (phd != NULL) {
 			/*
 			 * Port is in use by one or more PCBs. Look for best
 			 * fit.
 			 */
 			LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
 				wildcard = 0;
 				if ((inp->inp_vflag & INP_IPV6) == 0)
 					continue;
 				if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))
 					wildcard++;
 				if (!IN6_IS_ADDR_UNSPECIFIED(
 					&inp->in6p_laddr)) {
 					if (IN6_IS_ADDR_UNSPECIFIED(laddr))
 						wildcard++;
 					else if (!IN6_ARE_ADDR_EQUAL(
 						&inp->in6p_laddr, laddr))
 						continue;
 				} else {
 					if (!IN6_IS_ADDR_UNSPECIFIED(laddr))
 						wildcard++;
 				}
 				if (wildcard < matchwild) {
 					match = inp;
 					matchwild = wildcard;
 					if (matchwild == 0) {
 						break;
 					}
 				}
 			}
 		}
 		return (match);
 	}
 }
 
 /*
  * Check for alternatives when higher level complains
  * about service problems.  For now, invalidate cached
  * routing information.  If the route was created dynamically
  * (by a redirect), time to try a default gateway again.
  */
 void
 in6_losing(in6p)
 	struct inpcb *in6p;
 {
 	struct rtentry *rt;
 	struct rt_addrinfo info;
 
 	if ((rt = in6p->in6p_route.ro_rt) != NULL) {
 		in6p->in6p_route.ro_rt = 0;
 		bzero((caddr_t)&info, sizeof(info));
 		info.rti_info[RTAX_DST] =
 			(struct sockaddr *)&in6p->in6p_route.ro_dst;
 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
 		if (rt->rt_flags & RTF_DYNAMIC)
 			(void)rtrequest(RTM_DELETE, rt_key(rt),
 					rt->rt_gateway, rt_mask(rt), rt->rt_flags,
 					(struct rtentry **)0);
 		else
 		/*
 		 * A new route can be allocated
 		 * the next time output is attempted.
 		 */
 			rtfree(rt);
 	}
 }
 
 /*
  * After a routing change, flush old routing
  * and allocate a (hopefully) better one.
  */
 void
 in6_rtchange(inp, errno)
 	struct inpcb *inp;
 	int errno;
 {
 	if (inp->in6p_route.ro_rt) {
 		rtfree(inp->in6p_route.ro_rt);
 		inp->in6p_route.ro_rt = 0;
 		/*
 		 * A new route can be allocated the next time
 		 * output is attempted.
 		 */
 	}
 }
 
 /*
  * Lookup PCB in hash list.
  */
 struct inpcb *
 in6_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp)
 	struct inpcbinfo *pcbinfo;
 	struct in6_addr *faddr, *laddr;
 	u_int fport_arg, lport_arg;
 	int wildcard;
 	struct ifnet *ifp;
 {
 	struct inpcbhead *head;
 	register struct inpcb *inp;
 	u_short fport = fport_arg, lport = lport_arg;
 
 	/*
 	 * First look for an exact match.
 	 */
 	head = &pcbinfo->hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */,
 					      lport, fport,
 					      pcbinfo->hashmask)];
 	LIST_FOREACH(inp, head, inp_hash) {
 		if ((inp->inp_vflag & INP_IPV6) == 0)
 			continue;
 		if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
 		    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
 		    inp->inp_fport == fport &&
 		    inp->inp_lport == lport) {
 			/*
 			 * Found.
 			 */
 			return (inp);
 		}
 	}
 	if (wildcard) {
 		struct inpcb *local_wild = NULL;
 
 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
 						      pcbinfo->hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 			if ((inp->inp_vflag & INP_IPV6) == 0)
 				continue;
 			if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
 			    inp->inp_lport == lport) {
 #if defined(NFAITH) && NFAITH > 0
 				if (ifp && ifp->if_type == IFT_FAITH &&
 				    (inp->inp_flags & INP_FAITH) == 0)
 					continue;
 #endif
 				if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
 						       laddr))
 					return (inp);
 				else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
 					local_wild = inp;
 			}
 		}
 		return (local_wild);
 	}
 
 	/*
 	 * Not found.
 	 */
 	return (NULL);
 }
 
 void
 init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m)
 {
 	struct ip6_hdr *ip;
 
 	ip = mtod(m, struct ip6_hdr *);
 	bzero(sin6, sizeof(*sin6));
 	sin6->sin6_len = sizeof(*sin6);
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_addr = ip->ip6_src;
 	if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
 		sin6->sin6_addr.s6_addr16[1] = 0;
 	sin6->sin6_scope_id =
 		(m->m_pkthdr.rcvif && IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
 		? m->m_pkthdr.rcvif->if_index : 0;
 
 	return;
 }
Index: head/sys/vm/vm_zone.c
===================================================================
--- head/sys/vm/vm_zone.c	(revision 71349)
+++ head/sys/vm/vm_zone.c	(revision 71350)
@@ -1,554 +1,519 @@
 /*
  * Copyright (c) 1997, 1998 John S. Dyson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *	notice immediately at the beginning of the file, without modification,
  *	this list of conditions, and the following disclaimer.
  * 2. Absolutely no warranty of function or purpose is made by the author
  *	John S. Dyson.
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_zone.h>
 
 static MALLOC_DEFINE(M_ZONE, "ZONE", "Zone header");
 
 #define ZONE_ERROR_INVALID 0
 #define ZONE_ERROR_NOTFREE 1
 #define ZONE_ERROR_ALREADYFREE 2
 
 #define ZONE_ROUNDING	32
 
 #define ZENTRY_FREE	0x12342378
+
+static void *_zget(vm_zone_t z);
+
 /*
  * void *zalloc(vm_zone_t zone) --
  *	Returns an item from a specified zone.
  *
  * void zfree(vm_zone_t zone, void *item) --
  *  Frees an item back to a specified zone.
  */
 static __inline__ void *
 _zalloc(vm_zone_t z)
 {
 	void *item;
 
 #ifdef INVARIANTS
 	if (z == 0)
 		zerror(ZONE_ERROR_INVALID);
 #endif
 
 	if (z->zfreecnt <= z->zfreemin)
 		return _zget(z);
 
 	item = z->zitems;
 	z->zitems = ((void **) item)[0];
 #ifdef INVARIANTS
 	if (((void **) item)[1] != (void *) ZENTRY_FREE)
 		zerror(ZONE_ERROR_NOTFREE);
 	((void **) item)[1] = 0;
 #endif
 
 	z->zfreecnt--;
 	z->znalloc++;
 	return item;
 }
 
 static __inline__ void
 _zfree(vm_zone_t z, void *item)
 {
 	((void **) item)[0] = z->zitems;
 #ifdef INVARIANTS
 	if (((void **) item)[1] == (void *) ZENTRY_FREE)
 		zerror(ZONE_ERROR_ALREADYFREE);
 	((void **) item)[1] = (void *) ZENTRY_FREE;
 #endif
 	z->zitems = item;
 	z->zfreecnt++;
 }
 
 /*
  * This file comprises a very simple zone allocator.  This is used
  * in lieu of the malloc allocator, where needed or more optimal.
  *
  * Note that the initial implementation of this had coloring, and
  * absolutely no improvement (actually perf degradation) occurred.
  *
  * Note also that the zones are type stable.  The only restriction is
  * that the first two longwords of a data structure can be changed
  * between allocations.  Any data that must be stable between allocations
  * must reside in areas after the first two longwords.
  *
  * zinitna, zinit, zbootinit are the initialization routines.
  * zalloc, zfree, are the interrupt/lock unsafe allocation/free routines.
  * zalloci, zfreei, are the interrupt/lock safe allocation/free routines.
  */
 
 static struct vm_zone *zlist;
 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
 static int zone_kmem_pages, zone_kern_pages, zone_kmem_kvaspace;
 
 /*
  * Create a zone, but don't allocate the zone structure.  If the
  * zone had been previously created by the zone boot code, initialize
  * various parts of the zone code.
  *
  * If waits are not allowed during allocation (e.g. during interrupt
  * code), a-priori allocate the kernel virtual space, and allocate
  * only pages when needed.
  *
  * Arguments:
  * z		pointer to zone structure.
  * obj		pointer to VM object (opt).
  * name		name of zone.
  * size		size of zone entries.
  * nentries	number of zone entries allocated (only ZONE_INTERRUPT.)
  * flags	ZONE_INTERRUPT -- items can be allocated at interrupt time.
  * zalloc	number of pages allocated when memory is needed.
  *
  * Note that when using ZONE_INTERRUPT, the size of the zone is limited
  * by the nentries argument.  The size of the memory allocatable is
  * unlimited if ZONE_INTERRUPT is not set.
  *
  */
 int
 zinitna(vm_zone_t z, vm_object_t obj, char *name, int size,
 	int nentries, int flags, int zalloc)
 {
 	int totsize, oldzflags;
 	vm_zone_t oldzlist;
 
 	oldzflags = z->zflags;
 	oldzlist = zlist;
 	if ((z->zflags & ZONE_BOOT) == 0) {
 		z->zsize = (size + ZONE_ROUNDING - 1) & ~(ZONE_ROUNDING - 1);
 		simple_lock_init(&z->zlock);
 		z->zfreecnt = 0;
 		z->ztotal = 0;
 		z->zmax = 0;
 		z->zname = name;
 		z->znalloc = 0;
 		z->zitems = NULL;
 
 		if (zlist == 0) {
 			zlist = z;
 		} else {
 			z->znext = zlist;
 			zlist = z;
 		}
 	}
 
 	z->zflags |= flags;
 
 	/*
 	 * If we cannot wait, allocate KVA space up front, and we will fill
 	 * in pages as needed.
 	 */
 	if (z->zflags & ZONE_INTERRUPT) {
 
 		totsize = round_page(z->zsize * nentries);
 		zone_kmem_kvaspace += totsize;
 
 		z->zkva = kmem_alloc_pageable(kernel_map, totsize);
 		if (z->zkva == 0) {
 			/* Clean up the zlist in case we messed it. */
 			if ((oldzflags & ZONE_BOOT) == 0)
 				zlist = oldzlist;
 			return 0;
 		}
 
 		z->zpagemax = totsize / PAGE_SIZE;
 		if (obj == NULL) {
 			z->zobj = vm_object_allocate(OBJT_DEFAULT, z->zpagemax);
 		} else {
 			z->zobj = obj;
 			_vm_object_allocate(OBJT_DEFAULT, z->zpagemax, obj);
 		}
 		z->zallocflag = VM_ALLOC_INTERRUPT;
 		z->zmax += nentries;
 	} else {
 		z->zallocflag = VM_ALLOC_SYSTEM;
 		z->zmax = 0;
 	}
 
 
 	if (z->zsize > PAGE_SIZE)
 		z->zfreemin = 1;
 	else
 		z->zfreemin = PAGE_SIZE / z->zsize;
 
 	z->zpagecount = 0;
 	if (zalloc)
 		z->zalloc = zalloc;
 	else
 		z->zalloc = 1;
 
 	return 1;
 }
 
 /*
  * Subroutine same as zinitna, except zone data structure is allocated
  * automatically by malloc.  This routine should normally be used, except
  * in certain tricky startup conditions in the VM system -- then
  * zbootinit and zinitna can be used.  Zinit is the standard zone
  * initialization call.
  */
 vm_zone_t
 zinit(char *name, int size, int nentries, int flags, int zalloc)
 {
 	vm_zone_t z;
 
 	z = (vm_zone_t) malloc(sizeof (struct vm_zone), M_ZONE, M_NOWAIT | M_ZERO);
 	if (z == NULL)
 		return NULL;
 
 	if (zinitna(z, NULL, name, size, nentries, flags, zalloc) == 0) {
 		free(z, M_ZONE);
 		return NULL;
 	}
 
 	return z;
 }
 
 /*
  * Initialize a zone before the system is fully up.  This routine should
  * only be called before full VM startup.
  */
 void
 zbootinit(vm_zone_t z, char *name, int size, void *item, int nitems)
 {
 	int i;
 
 	z->zname = name;
 	z->zsize = size;
 	z->zpagemax = 0;
 	z->zobj = NULL;
 	z->zflags = ZONE_BOOT;
 	z->zfreemin = 0;
 	z->zallocflag = 0;
 	z->zpagecount = 0;
 	z->zalloc = 0;
 	z->znalloc = 0;
 	simple_lock_init(&z->zlock);
 
 	bzero(item, nitems * z->zsize);
 	z->zitems = NULL;
 	for (i = 0; i < nitems; i++) {
 		((void **) item)[0] = z->zitems;
 #ifdef INVARIANTS
 		((void **) item)[1] = (void *) ZENTRY_FREE;
 #endif
 		z->zitems = item;
 		(char *) item += z->zsize;
 	}
 	z->zfreecnt = nitems;
 	z->zmax = nitems;
 	z->ztotal = nitems;
 
 	if (zlist == 0) {
 		zlist = z;
 	} else {
 		z->znext = zlist;
 		zlist = z;
 	}
 }
 
 /*
  * Zone critical region locks.
  */
 static __inline int
 zlock(vm_zone_t z)
 {
 	int s;
 
 	s = splhigh();
 	simple_lock(&z->zlock);
 	return s;
 }
 
 static __inline void
 zunlock(vm_zone_t z, int s)
 {
 	simple_unlock(&z->zlock);
 	splx(s);
 }
 
 /*
  * void *zalloc(vm_zone_t zone) --
  *	Returns an item from a specified zone.
  *
  * void zfree(vm_zone_t zone, void *item) --
  *  Frees an item back to a specified zone.
  *
- * void *zalloci(vm_zone_t zone) --
- *	Returns an item from a specified zone, interrupt safe.
- *
- * void zfreei(vm_zone_t zone, void *item) --
- *  Frees an item back to a specified zone, interrupt safe.
- *
  */
 
 void *
 zalloc(vm_zone_t z)
 {
-#if defined(SMP)
-	return zalloci(z);
-#else
-	return _zalloc(z);
-#endif
-}
-
-void
-zfree(vm_zone_t z, void *item)
-{
-#ifdef SMP
-	zfreei(z, item);
-#else
-	_zfree(z, item);
-#endif
-}
- 
-/*
- * Zone allocator/deallocator.  These are interrupt / (or potentially SMP)
- * safe.  The raw zalloc/zfree routines are not interrupt safe, but are fast.
- */
-void *
-zalloci(vm_zone_t z)
-{
 	int s;
 	void *item;
 
 	s = zlock(z);
 	item = _zalloc(z);
 	zunlock(z, s);
 	return item;
 }
 
 void
-zfreei(vm_zone_t z, void *item)
+zfree(vm_zone_t z, void *item)
 {
 	int s;
 
 	s = zlock(z);
 	_zfree(z, item);
 	zunlock(z, s);
 	return;
 }
 
 /*
  * Internal zone routine.  Not to be called from external (non vm_zone) code.
  */
-void *
+static void *
 _zget(vm_zone_t z)
 {
 	int i;
 	vm_page_t m;
 	int nitems, nbytes;
 	void *item;
 
 	if (z == NULL)
 		panic("zget: null zone");
 
 	if (z->zflags & ZONE_INTERRUPT) {
 		item = (char *) z->zkva + z->zpagecount * PAGE_SIZE;
 		for (i = 0; ((i < z->zalloc) && (z->zpagecount < z->zpagemax));
 		     i++) {
 			vm_offset_t zkva;
 
 			m = vm_page_alloc(z->zobj, z->zpagecount,
 					  z->zallocflag);
 			if (m == NULL)
 				break;
 
 			zkva = z->zkva + z->zpagecount * PAGE_SIZE;
 			pmap_kenter(zkva, VM_PAGE_TO_PHYS(m));
 			bzero((caddr_t) zkva, PAGE_SIZE);
 			z->zpagecount++;
 			zone_kmem_pages++;
 			cnt.v_wire_count++;
 		}
 		nitems = (i * PAGE_SIZE) / z->zsize;
 	} else {
 		nbytes = z->zalloc * PAGE_SIZE;
 
 		/*
 		 * Check to see if the kernel map is already locked.  We could allow
 		 * for recursive locks, but that eliminates a valuable debugging
 		 * mechanism, and opens up the kernel map for potential corruption
 		 * by inconsistent data structure manipulation.  We could also use
 		 * the interrupt allocation mechanism, but that has size limitations.
 		 * Luckily, we have kmem_map that is a submap of kernel map available
 		 * for memory allocation, and manipulation of that map doesn't affect
 		 * the kernel map structures themselves.
 		 *
 		 * We can wait, so just do normal map allocation in the appropriate
 		 * map.
 		 */
 		if (lockstatus(&kernel_map->lock, NULL)) {
 			int s;
 			s = splvm();
-#ifdef SMP
 			simple_unlock(&z->zlock);
-#endif
 			item = (void *) kmem_malloc(kmem_map, nbytes, M_WAITOK);
-#ifdef SMP
 			simple_lock(&z->zlock);
-#endif
 			if (item != NULL)
 				zone_kmem_pages += z->zalloc;
 			splx(s);
 		} else {
-#ifdef SMP
 			simple_unlock(&z->zlock);
-#endif
 			item = (void *) kmem_alloc(kernel_map, nbytes);
-#ifdef SMP
 			simple_lock(&z->zlock);
-#endif
 			if (item != NULL)
 				zone_kern_pages += z->zalloc;
 		}
 		if (item != NULL) {
 			bzero(item, nbytes);
 		} else {
 			nbytes = 0;
 		}
 		nitems = nbytes / z->zsize;
 	}
 	z->ztotal += nitems;
 
 	/*
 	 * Save one for immediate allocation
 	 */
 	if (nitems != 0) {
 		nitems -= 1;
 		for (i = 0; i < nitems; i++) {
 			((void **) item)[0] = z->zitems;
 #ifdef INVARIANTS
 			((void **) item)[1] = (void *) ZENTRY_FREE;
 #endif
 			z->zitems = item;
 			(char *) item += z->zsize;
 		}
 		z->zfreecnt += nitems;
 		z->znalloc++;
 	} else if (z->zfreecnt > 0) {
 		item = z->zitems;
 		z->zitems = ((void **) item)[0];
 #ifdef INVARIANTS
 		if (((void **) item)[1] != (void *) ZENTRY_FREE)
 			zerror(ZONE_ERROR_NOTFREE);
 		((void **) item)[1] = 0;
 #endif
 		z->zfreecnt--;
 		z->znalloc++;
 	} else {
 		item = NULL;
 	}
 
 	return item;
 }
 
 static int
 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
 {
 	int error=0;
 	vm_zone_t curzone, nextzone;
 	char tmpbuf[128];
 	char tmpname[14];
 
 	snprintf(tmpbuf, sizeof(tmpbuf),
 	    "\nITEM            SIZE     LIMIT    USED    FREE  REQUESTS\n");
 	error = SYSCTL_OUT(req, tmpbuf, strlen(tmpbuf));
 	if (error)
 		return (error);
 
 	for (curzone = zlist; curzone; curzone = nextzone) {
 		int i;
 		int len;
 		int offset;
 
 		nextzone = curzone->znext;
 		len = strlen(curzone->zname);
 		if (len >= (sizeof(tmpname) - 1))
 			len = (sizeof(tmpname) - 1);
 		for(i = 0; i < sizeof(tmpname) - 1; i++)
 			tmpname[i] = ' ';
 		tmpname[i] = 0;
 		memcpy(tmpname, curzone->zname, len);
 		tmpname[len] = ':';
 		offset = 0;
 		if (curzone == zlist) {
 			offset = 1;
 			tmpbuf[0] = '\n';
 		}
 
 		snprintf(tmpbuf + offset, sizeof(tmpbuf) - offset,
 			"%s %6.6u, %8.8u, %6.6u, %6.6u, %8.8u\n",
 			tmpname, curzone->zsize, curzone->zmax,
 			(curzone->ztotal - curzone->zfreecnt),
 			curzone->zfreecnt, curzone->znalloc);
 
 		len = strlen((char *)tmpbuf);
 		if (nextzone == NULL)
 			tmpbuf[len - 1] = 0;
 
 		error = SYSCTL_OUT(req, tmpbuf, len);
 
 		if (error)
 			return (error);
 	}
 	return (0);
 }
 
 #ifdef INVARIANT_SUPPORT
 void
 zerror(int error)
 {
 	char *msg;
 
 	switch (error) {
 	case ZONE_ERROR_INVALID:
 		msg = "zone: invalid zone";
 		break;
 	case ZONE_ERROR_NOTFREE:
 		msg = "zone: entry not free";
 		break;
 	case ZONE_ERROR_ALREADYFREE:
 		msg = "zone: freeing free entry";
 		break;
 	default:
 		msg = "zone: invalid error";
 		break;
 	}
 	panic(msg);
 }
 #endif
 
 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, \
 	NULL, 0, sysctl_vm_zone, "A", "Zone Info");
 
 SYSCTL_INT(_vm, OID_AUTO, zone_kmem_pages,
 	CTLFLAG_RD, &zone_kmem_pages, 0, "Number of interrupt safe pages allocated by zone");
 SYSCTL_INT(_vm, OID_AUTO, zone_kmem_kvaspace,
 	CTLFLAG_RD, &zone_kmem_kvaspace, 0, "KVA space allocated by zone");
 SYSCTL_INT(_vm, OID_AUTO, zone_kern_pages,
 	CTLFLAG_RD, &zone_kern_pages, 0, "Number of non-interrupt safe pages allocated by zone");
Index: head/sys/vm/vm_zone.h
===================================================================
--- head/sys/vm/vm_zone.h	(revision 71349)
+++ head/sys/vm/vm_zone.h	(revision 71350)
@@ -1,60 +1,57 @@
 /*
  * Copyright (c) 1997, 1998 John S. Dyson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *	notice immediately at the beginning of the file, without modification,
  *	this list of conditions, and the following disclaimer.
  * 2. Absolutely no warranty of function or purpose is made by the author
  *	John S. Dyson.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_ZONE_H
 
 #define _SYS_ZONE_H
 
 #define ZONE_INTERRUPT 1 /* Use this if you need to allocate at int time */
 #define ZONE_BOOT 16	 /* This is an internal flag used by zbootinit */
 
 #include	<machine/lock.h>
 
 typedef struct vm_zone {
 	struct simplelock zlock;	/* lock for data structure */
 	void		*zitems;	/* linked list of items */
 	int		zfreecnt;	/* free entries */
 	int		zfreemin;	/* minimum number of free entries */
 	int		znalloc;	/* number of allocations */
 	vm_offset_t	zkva;		/* Base kva of zone */
 	int		zpagecount;	/* Total # of allocated pages */
 	int		zpagemax;	/* Max address space */
 	int		zmax;		/* Max number of entries allocated */
 	int		ztotal;		/* Total entries allocated now */
 	int		zsize;		/* size of each entry */
 	int		zalloc;		/* hint for # of pages to alloc */
 	int		zflags;		/* flags for zone */
 	int		zallocflag;	/* flag for allocation */
 	struct vm_object *zobj;		/* object to hold zone */
 	char		*zname;		/* name for diags */
 	struct vm_zone	*znext;		/* list of zones for sysctl */
 } *vm_zone_t;
 
 
 void		zerror __P((int)) __dead2;
 vm_zone_t	zinit __P((char *name, int size, int nentries, int flags,
 			   int zalloc));
 int		zinitna __P((vm_zone_t z, struct vm_object *obj, char *name,
 			     int size, int nentries, int flags, int zalloc));
 void *		zalloc __P((vm_zone_t z));
 void		zfree __P((vm_zone_t z, void *item));
-void *		zalloci __P((vm_zone_t z));
-void		zfreei __P((vm_zone_t z, void *item));
 void		zbootinit __P((vm_zone_t z, char *name, int size, void *item,
 			       int nitems));
-void *		_zget __P((vm_zone_t z));
 
 #endif /* _SYS_ZONE_H */