diff --git a/sys/kern/subr_mbuf.c b/sys/kern/subr_mbuf.c
index f5e8abff85c1..123553bc53dd 100644
--- a/sys/kern/subr_mbuf.c
+++ b/sys/kern/subr_mbuf.c
@@ -1,1595 +1,1594 @@
 /*-
  * Copyright (c) 2001, 2002, 2003
  * 	Bosko Milekic <bmilekic@FreeBSD.org>.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission. 
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_mac.h"
 #include "opt_param.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/condvar.h>
 #include <sys/smp.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 
 /*
  * mb_alloc: network buffer allocator
  */
 
 /*
  * Maximum number of PCPU containers. If you know what you're doing you could
  * explicitly define MBALLOC_NCPU to be exactly the number of CPUs on your
  * system during compilation, and thus prevent kernel structure bloat.
  *
  * SMP and non-SMP kernels clearly have a different number of possible CPUs,
  * but because we cannot assume a dense array of CPUs, we always allocate
  * and traverse PCPU containers up to NCPU amount and merely check for
  * CPU availability.
  */
 #ifdef MBALLOC_NCPU
 #define	NCPU	MBALLOC_NCPU
 #else
 #define	NCPU	MAXCPU
 #endif
 
 /*-
  * The mbuf allocator is based on Alfred Perlstein's <alfred@FreeBSD.org>
  * "memcache" proof-of-concept allocator which was itself based on
  * several well-known SMP-friendly allocators.
  *
  * The mb_alloc mbuf allocator is a special when compared to other
  * general-purpose allocators.  Some things to take note of:
  *
  *   Mbufs and mbuf clusters are two different objects.  Sometimes we
  *   will allocate a single mbuf, other times a single cluster,
  *   other times both.  Further, we may sometimes wish to allocate a
  *   whole chain of mbufs with clusters.  This allocator will perform
  *   the common case of each scenario in one function call (this
  *   includes constructing or destructing the object) while only
  *   locking/unlocking the cache once, if it can get away with it.
  *   The caches consist of pure mbufs and pure clusters; that is
  *   there are no 'zones' containing mbufs with already pre-hooked
  *   clusters.  Since we can allocate both objects atomically anyway,
  *   we don't bother fragmenting our caches for any particular 'scenarios.'
  *
  *   We allocate from seperate sub-maps of kmem_map, thus imposing
  *   an ultimate upper-limit on the number of allocatable clusters
  *   and mbufs and also, since the clusters all come from a
  *   virtually contiguous region, we can keep reference counters
  *   for them and "allocate" them purely by indexing into a
  *   dense refcount vector.
  *
  *   We call out to protocol drain routines (which can be hooked
  *   into us) when we're low on space.
  *
  * The mbuf allocator keeps all objects that it allocates in mb_buckets.
  * The buckets keep a number of objects (an object can be an mbuf or an
  * mbuf cluster) and facilitate moving larger sets of contiguous objects
  * from the per-CPU caches to the global cache. The buckets also have
  * the added advantage that objects, when migrated from cache to cache,
  * are migrated in chunks that keep contiguous objects together,
  * minimizing TLB pollution.
  *
  * The buckets are kept on singly-linked lists called "containers." A container
  * is protected by a mutex in order to ensure consistency.  The mutex
  * itself is allocated separately and attached to the container at boot time,
  * thus allowing for certain containers to share the same lock.  Per-CPU
  * containers for mbufs and mbuf clusters all share the same per-CPU 
  * lock whereas the global cache containers for these objects share one
  * global lock.
  */
 struct mb_bucket {
 	SLIST_ENTRY(mb_bucket) mb_blist;
 	int 	mb_owner;
 	int	mb_numfree;
 	void 	*mb_free[0];
 };
 
 struct mb_container {
 	SLIST_HEAD(mc_buckethd, mb_bucket) mc_bhead;
 	struct	mtx *mc_lock;
 	int	mc_numowner;
 	u_int	mc_starved;
 	long	*mc_types;
 	u_long	*mc_objcount;
 	u_long	*mc_numbucks;
 };
 
 struct mb_gen_list {
 	struct	mb_container mb_cont;
 	struct	cv mgl_mstarved;
 };
 
 struct mb_pcpu_list {
 	struct	mb_container mb_cont;
 };
 
 /*
  * Boot-time configurable object counts that will determine the maximum
  * number of permitted objects in the mbuf and mcluster cases.  In the
  * ext counter (nmbcnt) case, it's just an indicator serving to scale
  * kmem_map size properly - in other words, we may be allowed to allocate
  * more than nmbcnt counters, whereas we will never be allowed to allocate
  * more than nmbufs mbufs or nmbclusters mclusters.
  * As for nsfbufs, it is used to indicate how many sendfile(2) buffers will be
  * allocatable by the sfbuf allocator (found in uipc_syscalls.c)
  */
 #ifndef NMBCLUSTERS
 #define	NMBCLUSTERS	(1024 + maxusers * 64)
 #endif
 #ifndef NMBUFS
 #define	NMBUFS		(nmbclusters * 2)
 #endif
 #ifndef NSFBUFS
 #define	NSFBUFS		(512 + maxusers * 16)
 #endif
 #ifndef NMBCNTS
 #define	NMBCNTS		(nmbclusters + nsfbufs)
 #endif
 int	nmbufs;
 int	nmbclusters;
 int	nmbcnt;
 int	nsfbufs;
 
 /*
  * Sizes of objects per bucket.  There are this size's worth of mbufs
  * or clusters in each bucket.  Please keep these a power-of-2.
  */
 #define	MBUF_BUCK_SZ	(PAGE_SIZE * 2)
 #define	CLUST_BUCK_SZ	(PAGE_SIZE * 4)
 
 /*
  * Perform sanity checks of tunables declared above.
  */
 static void
 tunable_mbinit(void *dummy)
 {
 
 	/*
 	 * This has to be done before VM init.
 	 */
 	nmbclusters = NMBCLUSTERS;
 	TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
 	nmbufs = NMBUFS;
 	TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
 	nsfbufs = NSFBUFS;
 	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
 	nmbcnt = NMBCNTS;
 	TUNABLE_INT_FETCH("kern.ipc.nmbcnt", &nmbcnt);
 	/* Sanity checks */
 	if (nmbufs < nmbclusters * 2)
 		nmbufs = nmbclusters * 2;
 	if (nmbcnt < nmbclusters + nsfbufs)
 		nmbcnt = nmbclusters + nsfbufs;
 }
 SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL);
 
 /*
  * The freelist structures and mutex locks.  The number statically declared
  * here depends on the number of CPUs.
  *
  * We set up in such a way that all the objects (mbufs, clusters)
  * share the same mutex lock.  It has been established that we do not benefit
  * from different locks for different objects, so we use the same lock,
  * regardless of object type.  This also allows us to do optimised
  * multi-object allocations without dropping the lock in between.
  */
 struct mb_lstmngr {
 	struct mb_gen_list *ml_genlist;
 	struct mb_pcpu_list *ml_cntlst[NCPU];
 	struct mb_bucket **ml_btable;
 	vm_map_t	ml_map;
 	vm_offset_t	ml_mapbase;
 	vm_offset_t	ml_maptop;
 	int		ml_mapfull;
 	u_int		ml_objsize;
 	u_int		ml_objbucks;
 	u_int		*ml_wmhigh;
 	u_int		*ml_wmlow;
 };
 static struct mb_lstmngr mb_list_mbuf, mb_list_clust;
 static struct mtx mbuf_gen, mbuf_pcpu[NCPU];
 u_int *cl_refcntmap;
 
 /*
  * Local macros for internal allocator structure manipulations.
  */
 #ifdef SMP
 #define	MB_GET_PCPU_LIST(mb_lst)	(mb_lst)->ml_cntlst[PCPU_GET(cpuid)]
 #else
 #define	MB_GET_PCPU_LIST(mb_lst)	(mb_lst)->ml_cntlst[0]
 #endif
 
 #define	MB_GET_GEN_LIST(mb_lst)		(mb_lst)->ml_genlist
 
 #define	MB_LOCK_CONT(mb_cnt)		mtx_lock((mb_cnt)->mb_cont.mc_lock)
 
 #define	MB_UNLOCK_CONT(mb_cnt)		mtx_unlock((mb_cnt)->mb_cont.mc_lock)
 
 #define	MB_GET_PCPU_LIST_NUM(mb_lst, num)				\
     (mb_lst)->ml_cntlst[(num)]
 
 #define	MB_BUCKET_INDX(mb_obj, mb_lst)					\
     (int)(((caddr_t)(mb_obj) - (caddr_t)(mb_lst)->ml_mapbase) /		\
     ((mb_lst)->ml_objbucks * (mb_lst)->ml_objsize))
 
 #define	MB_GET_OBJECT(mb_objp, mb_bckt, mb_lst)				\
 {									\
 	struct mc_buckethd *_mchd = &((mb_lst)->mb_cont.mc_bhead);	\
 									\
 	(mb_bckt)->mb_numfree--;					\
 	(mb_objp) = (mb_bckt)->mb_free[((mb_bckt)->mb_numfree)];	\
 	(*((mb_lst)->mb_cont.mc_objcount))--;				\
 	if ((mb_bckt)->mb_numfree == 0) {				\
 		SLIST_REMOVE_HEAD(_mchd, mb_blist);			\
 		SLIST_NEXT((mb_bckt), mb_blist) = NULL;			\
 		(mb_bckt)->mb_owner |= MB_BUCKET_FREE;			\
 	}								\
 }
 
 #define	MB_PUT_OBJECT(mb_objp, mb_bckt, mb_lst)				\
 	(mb_bckt)->mb_free[((mb_bckt)->mb_numfree)] = (mb_objp);	\
 	(mb_bckt)->mb_numfree++;					\
 	(*((mb_lst)->mb_cont.mc_objcount))++;
 
 #define	MB_MBTYPES_INC(mb_cnt, mb_type, mb_num)				\
 	if ((mb_type) != MT_NOTMBUF)					\
 	    (*((mb_cnt)->mb_cont.mc_types + (mb_type))) += (mb_num)
 
 #define	MB_MBTYPES_DEC(mb_cnt, mb_type, mb_num)				\
 	if ((mb_type) != MT_NOTMBUF)					\
 	    (*((mb_cnt)->mb_cont.mc_types + (mb_type))) -= (mb_num)
 
 /*
  * Ownership of buckets/containers is represented by integers.  The PCPU
  * lists range from 0 to NCPU-1.  We need a free numerical id for the general
  * list (we use NCPU).  We also need a non-conflicting free bit to indicate
  * that the bucket is free and removed from a container, while not losing
  * the bucket's originating container id.  We use the highest bit
  * for the free marker.
  */
 #define	MB_GENLIST_OWNER	(NCPU)
 #define	MB_BUCKET_FREE		(1 << (sizeof(int) * 8 - 1))
 
 /* Statistics structures for allocator (per-CPU and general). */
 static struct mbpstat mb_statpcpu[NCPU + 1];
 struct mbstat mbstat;
 
 /* Sleep time for wait code (in ticks). */
 static int mbuf_wait = 64;
 
 static u_int mbuf_hiwm = 512;	/* High wm on  # of mbufs per cache */
 static u_int mbuf_lowm = 128;	/* Low wm on # of mbufs per cache */
 static u_int clust_hiwm = 128;	/* High wm on # of clusters per cache */
 static u_int clust_lowm = 16;	/* Low wm on # of clusters per cache */
 
 /*
  * Objects exported by sysctl(8).
  */
 SYSCTL_DECL(_kern_ipc);
 SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, 
     "Maximum number of mbuf clusters available");
 SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
     "Maximum number of mbufs available"); 
 SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0,
     "Number used to scale kmem_map to ensure sufficient space for counters");
 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RD, &nsfbufs, 0,
     "Maximum number of sendfile(2) sf_bufs available");
 SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, &mbuf_wait, 0,
     "Sleep time of mbuf subsystem wait allocations during exhaustion");
 SYSCTL_UINT(_kern_ipc, OID_AUTO, mbuf_hiwm, CTLFLAG_RW, &mbuf_hiwm, 0,
     "Upper limit of number of mbufs allowed in each cache");
 SYSCTL_UINT(_kern_ipc, OID_AUTO, mbuf_lowm, CTLFLAG_RW, &mbuf_lowm, 0,
     "Lower limit of number of mbufs allowed in each cache");
 SYSCTL_UINT(_kern_ipc, OID_AUTO, clust_hiwm, CTLFLAG_RW, &clust_hiwm, 0,
     "Upper limit of number of mbuf clusters allowed in each cache");
 SYSCTL_UINT(_kern_ipc, OID_AUTO, clust_lowm, CTLFLAG_RW, &clust_lowm, 0,
     "Lower limit of number of mbuf clusters allowed in each cache");
 SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
     "Mbuf general information and statistics");
 SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mb_statpcpu, CTLFLAG_RD, mb_statpcpu,
     sizeof(mb_statpcpu), "S,", "Mbuf allocator per CPU statistics");
 
 /*
  * Prototypes of local allocator routines.
  */
 static void		*mb_alloc_wait(struct mb_lstmngr *, short);
 static struct mb_bucket	*mb_pop_cont(struct mb_lstmngr *, int,
 			    struct mb_pcpu_list *);
 static void		 mb_reclaim(void);
 static void		 mbuf_init(void *);
 
 /*
  * Initial allocation numbers.  Each parameter represents the number of buckets
  * of each object that will be placed initially in each PCPU container for
  * said object.
  */
 #define	NMB_MBUF_INIT	2
 #define	NMB_CLUST_INIT	8
 
 /*
  * Internal flags that allow for cache locks to remain "persistent" across
  * allocation and free calls.  They may be used in combination.
  */
 #define	MBP_PERSIST	0x1	/* Return with lock still held. */
 #define	MBP_PERSISTENT	0x2	/* Cache lock is already held coming in. */
 
 /*
  * Initialize the mbuf subsystem.
  *
  * We sub-divide the kmem_map into several submaps; this way, we don't have
  * to worry about artificially limiting the number of mbuf or mbuf cluster
  * allocations, due to fear of one type of allocation "stealing" address
  * space initially reserved for another.
  *
  * Set up both the general containers and all the PCPU containers.  Populate
  * the PCPU containers with initial numbers.
  */
 MALLOC_DEFINE(M_MBUF, "mbufmgr", "mbuf subsystem management structures");
 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL)
 static void
 mbuf_init(void *dummy)
 {
 	struct mb_pcpu_list *pcpu_cnt;
 	vm_size_t mb_map_size;
 	int i, j;
 
 	/*
 	 * Set up all the submaps, for each type of object that we deal
 	 * with in this allocator.
 	 */
 	mb_map_size = (vm_size_t)(nmbufs * MSIZE);
 	mb_map_size = rounddown(mb_map_size, MBUF_BUCK_SZ);
 	mb_list_mbuf.ml_btable = malloc((unsigned long)mb_map_size /
 	    MBUF_BUCK_SZ * sizeof(struct mb_bucket *), M_MBUF, M_NOWAIT);
 	if (mb_list_mbuf.ml_btable == NULL)
 		goto bad;
 	mb_list_mbuf.ml_map = kmem_suballoc(kmem_map,&(mb_list_mbuf.ml_mapbase),
 	    &(mb_list_mbuf.ml_maptop), mb_map_size);
 	mb_list_mbuf.ml_map->system_map = 1;
 	mb_list_mbuf.ml_mapfull = 0;
 	mb_list_mbuf.ml_objsize = MSIZE;
 	mb_list_mbuf.ml_objbucks = MBUF_BUCK_SZ / MSIZE;
 	mb_list_mbuf.ml_wmhigh = &mbuf_hiwm;
 	mb_list_mbuf.ml_wmlow = &mbuf_lowm;
 
 	mb_map_size = (vm_size_t)(nmbclusters * MCLBYTES);
 	mb_map_size = rounddown(mb_map_size, CLUST_BUCK_SZ);
 	mb_list_clust.ml_btable = malloc((unsigned long)mb_map_size /
 	    CLUST_BUCK_SZ * sizeof(struct mb_bucket *), M_MBUF, M_NOWAIT);
 	if (mb_list_clust.ml_btable == NULL)
 		goto bad;
 	mb_list_clust.ml_map = kmem_suballoc(kmem_map,
 	    &(mb_list_clust.ml_mapbase), &(mb_list_clust.ml_maptop),
 	    mb_map_size);
 	mb_list_clust.ml_map->system_map = 1;
 	mb_list_clust.ml_mapfull = 0;
 	mb_list_clust.ml_objsize = MCLBYTES;
 	mb_list_clust.ml_objbucks = CLUST_BUCK_SZ / MCLBYTES;
 	mb_list_clust.ml_wmhigh = &clust_hiwm;
 	mb_list_clust.ml_wmlow = &clust_lowm;
 
 	/*
 	 * Allocate required general (global) containers for each object type.
 	 */
 	mb_list_mbuf.ml_genlist = malloc(sizeof(struct mb_gen_list), M_MBUF,
 	    M_NOWAIT);
 	mb_list_clust.ml_genlist = malloc(sizeof(struct mb_gen_list), M_MBUF,
 	    M_NOWAIT);
 	if ((mb_list_mbuf.ml_genlist == NULL) ||
 	    (mb_list_clust.ml_genlist == NULL))
 		goto bad;
 
 	/*
 	 * Initialize condition variables and general container mutex locks.
 	 */
 	mtx_init(&mbuf_gen, "mbuf subsystem general lists lock", NULL, 0);
 	cv_init(&(mb_list_mbuf.ml_genlist->mgl_mstarved), "mbuf pool starved");
 	cv_init(&(mb_list_clust.ml_genlist->mgl_mstarved),
 	    "mcluster pool starved");
 	mb_list_mbuf.ml_genlist->mb_cont.mc_lock =
 	    mb_list_clust.ml_genlist->mb_cont.mc_lock = &mbuf_gen;
 
 	/*
 	 * Set up the general containers for each object.
 	 */
 	mb_list_mbuf.ml_genlist->mb_cont.mc_numowner =
 	    mb_list_clust.ml_genlist->mb_cont.mc_numowner = MB_GENLIST_OWNER;
 	mb_list_mbuf.ml_genlist->mb_cont.mc_starved =
 	    mb_list_clust.ml_genlist->mb_cont.mc_starved = 0;
 	mb_list_mbuf.ml_genlist->mb_cont.mc_objcount =
 	    &(mb_statpcpu[MB_GENLIST_OWNER].mb_mbfree);
 	mb_list_clust.ml_genlist->mb_cont.mc_objcount =
 	    &(mb_statpcpu[MB_GENLIST_OWNER].mb_clfree);
 	mb_list_mbuf.ml_genlist->mb_cont.mc_numbucks =
 	    &(mb_statpcpu[MB_GENLIST_OWNER].mb_mbbucks);
 	mb_list_clust.ml_genlist->mb_cont.mc_numbucks =
 	    &(mb_statpcpu[MB_GENLIST_OWNER].mb_clbucks);
 	mb_list_mbuf.ml_genlist->mb_cont.mc_types =
 	    &(mb_statpcpu[MB_GENLIST_OWNER].mb_mbtypes[0]);
 	mb_list_clust.ml_genlist->mb_cont.mc_types = NULL;
 	SLIST_INIT(&(mb_list_mbuf.ml_genlist->mb_cont.mc_bhead));
 	SLIST_INIT(&(mb_list_clust.ml_genlist->mb_cont.mc_bhead));
 
 	/*
 	 * Allocate all the required counters for clusters.  This makes
 	 * cluster allocations/deallocations much faster.
 	 */
 	cl_refcntmap = malloc(nmbclusters * sizeof(u_int), M_MBUF, M_NOWAIT);
 	if (cl_refcntmap == NULL)
 		goto bad;
 
 	/*
 	 * Initialize general mbuf statistics.
 	 */
 	mbstat.m_msize = MSIZE;
 	mbstat.m_mclbytes = MCLBYTES;
 	mbstat.m_minclsize = MINCLSIZE;
 	mbstat.m_mlen = MLEN;
 	mbstat.m_mhlen = MHLEN;
 	mbstat.m_numtypes = MT_NTYPES;
 	mbstat.m_mbperbuck = MBUF_BUCK_SZ / MSIZE;
 	mbstat.m_clperbuck = CLUST_BUCK_SZ / MCLBYTES;
 
 	/*
 	 * Allocate and initialize PCPU containers.
 	 */
 	for (i = 0; i < NCPU; i++) {
 		if (CPU_ABSENT(i)) {
 			mb_statpcpu[i].mb_active = 0;
 			continue;
 		}
 
 		mb_list_mbuf.ml_cntlst[i] = malloc(sizeof(struct mb_pcpu_list),
 		    M_MBUF, M_NOWAIT);
 		mb_list_clust.ml_cntlst[i] = malloc(sizeof(struct mb_pcpu_list),
 		    M_MBUF, M_NOWAIT);
 		if ((mb_list_mbuf.ml_cntlst[i] == NULL) ||
 		    (mb_list_clust.ml_cntlst[i] == NULL))
 			goto bad;
 
 		mtx_init(&mbuf_pcpu[i], "mbuf PCPU list lock", NULL, 0);
 		mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_lock =
 		    mb_list_clust.ml_cntlst[i]->mb_cont.mc_lock = &mbuf_pcpu[i];
 
 		mb_statpcpu[i].mb_active = 1;
 		mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_numowner =
 		    mb_list_clust.ml_cntlst[i]->mb_cont.mc_numowner = i;
 		mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_starved =
 		    mb_list_clust.ml_cntlst[i]->mb_cont.mc_starved = 0;
 		mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_objcount =
 		    &(mb_statpcpu[i].mb_mbfree);
 		mb_list_clust.ml_cntlst[i]->mb_cont.mc_objcount =
 		    &(mb_statpcpu[i].mb_clfree);
 		mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_numbucks =
 		    &(mb_statpcpu[i].mb_mbbucks);
 		mb_list_clust.ml_cntlst[i]->mb_cont.mc_numbucks =
 		    &(mb_statpcpu[i].mb_clbucks);
 		mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_types =
 		    &(mb_statpcpu[i].mb_mbtypes[0]);
 		mb_list_clust.ml_cntlst[i]->mb_cont.mc_types = NULL;
 
 		SLIST_INIT(&(mb_list_mbuf.ml_cntlst[i]->mb_cont.mc_bhead));
 		SLIST_INIT(&(mb_list_clust.ml_cntlst[i]->mb_cont.mc_bhead));
 
 		/*
 		 * Perform initial allocations.
 		 */
 		pcpu_cnt = MB_GET_PCPU_LIST_NUM(&mb_list_mbuf, i);
 		MB_LOCK_CONT(pcpu_cnt);
 		for (j = 0; j < NMB_MBUF_INIT; j++) {
 			if (mb_pop_cont(&mb_list_mbuf, M_DONTWAIT, pcpu_cnt)
 			    == NULL)
 				goto bad;
 		}
 		MB_UNLOCK_CONT(pcpu_cnt);
 
 		pcpu_cnt = MB_GET_PCPU_LIST_NUM(&mb_list_clust, i);
 		MB_LOCK_CONT(pcpu_cnt);
 		for (j = 0; j < NMB_CLUST_INIT; j++) {
 			if (mb_pop_cont(&mb_list_clust, M_DONTWAIT, pcpu_cnt)
 			    == NULL)
 				goto bad;
 		}
 		MB_UNLOCK_CONT(pcpu_cnt);
 	}
 
 	return;
 bad:
 	panic("mbuf_init(): failed to initialize mbuf subsystem!");
 }
 
 /*
  * Populate a given mbuf PCPU container with a bucket full of fresh new
  * buffers.  Return a pointer to the new bucket (already in the container if
  * successful), or return NULL on failure.
  *
  * LOCKING NOTES:
  * PCPU container lock must be held when this is called.
  * The lock is dropped here so that we can cleanly call the underlying VM
  * code.  If we fail, we return with no locks held. If we succeed (i.e., return
  * non-NULL), we return with the PCPU lock held, ready for allocation from
  * the returned bucket.
  */
 static struct mb_bucket *
 mb_pop_cont(struct mb_lstmngr *mb_list, int how, struct mb_pcpu_list *cnt_lst)
 {
 	struct mb_bucket *bucket;
 	caddr_t p;
 	int i;
 
 	MB_UNLOCK_CONT(cnt_lst);
 	/*
 	 * If our object's (finite) map is starved now (i.e., no more address
 	 * space), bail out now.
 	 */
 	if (mb_list->ml_mapfull)
 		return (NULL);
 
 	bucket = malloc(sizeof(struct mb_bucket) +
-	    mb_list->ml_objbucks * sizeof(void *), M_MBUF,
-	    how == M_TRYWAIT ? M_WAITOK : M_NOWAIT);
+	    mb_list->ml_objbucks * sizeof(void *), M_MBUF, MBTOM(how));
 	if (bucket == NULL)
 		return (NULL);
 
 	p = (caddr_t)kmem_malloc(mb_list->ml_map, mb_list->ml_objsize * 
-	    mb_list->ml_objbucks, how == M_TRYWAIT ? M_WAITOK : M_NOWAIT);
+	    mb_list->ml_objbucks, MBTOM(how));
 	if (p == NULL) {
 		free(bucket, M_MBUF);
 		if (how == M_TRYWAIT)
 			mb_list->ml_mapfull = 1;
 		return (NULL);
 	}
 
 	bucket->mb_numfree = 0;
 	mb_list->ml_btable[MB_BUCKET_INDX(p, mb_list)] = bucket;
 	for (i = 0; i < mb_list->ml_objbucks; i++) {
 		bucket->mb_free[i] = p;
 		bucket->mb_numfree++;
 		p += mb_list->ml_objsize;
 	}
 
 	MB_LOCK_CONT(cnt_lst);
 	bucket->mb_owner = cnt_lst->mb_cont.mc_numowner;
 	SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead), bucket, mb_blist);
 	(*(cnt_lst->mb_cont.mc_numbucks))++;
 	*(cnt_lst->mb_cont.mc_objcount) += bucket->mb_numfree;
 
 	return (bucket);
 }
 
 /*
  * Allocate a network buffer.
  * The general case is very easy.  Complications only arise if our PCPU
  * container is empty.  Things get worse if the PCPU container is empty,
  * the general container is empty, and we've run out of address space
  * in our map; then we try to block if we're willing to (M_TRYWAIT).
  */
 static __inline
 void *
 mb_alloc(struct mb_lstmngr *mb_list, int how, short type, short persist, 
 	 int *pers_list)
 {
 	static int last_report;
 	struct mb_pcpu_list *cnt_lst;
 	struct mb_bucket *bucket;
 	void *m;
 
 #ifdef INVARIANTS
 	int flags;
 	
 	flags = how & (M_WAITOK | M_NOWAIT | M_DONTWAIT | M_TRYWAIT);
 	if (flags != M_DONTWAIT && flags != M_TRYWAIT) {
 		static	struct timeval lasterr;
 		static	int curerr;
 		if (ppsratecheck(&lasterr, &curerr, 1)) {
 			printf("Bad mbuf alloc flags: %x\n", flags);
 			backtrace();
 			how = M_TRYWAIT;
 		}
 	}
 #endif
 
 	m = NULL;
 	if ((persist & MBP_PERSISTENT) != 0) {
 		/*
 		 * If we're a "persistent" call, then the per-CPU #(pers_list)
 		 * cache lock is already held, and we just need to refer to
 		 * the correct cache descriptor.
 		 */
 		cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, *pers_list);
 	} else {
 		cnt_lst = MB_GET_PCPU_LIST(mb_list);
 		MB_LOCK_CONT(cnt_lst);
 	}
 
 	if ((bucket = SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead))) != NULL) {
 		/*
 		 * This is the easy allocation case. We just grab an object
 		 * from a bucket in the PCPU container. At worst, we
 		 * have just emptied the bucket and so we remove it
 		 * from the container.
 		 */
 		MB_GET_OBJECT(m, bucket, cnt_lst);
 		MB_MBTYPES_INC(cnt_lst, type, 1);
 
 		/* If asked to persist, do not drop the lock. */
 		if ((persist & MBP_PERSIST) == 0)
 			MB_UNLOCK_CONT(cnt_lst);
 		else
 			*pers_list = cnt_lst->mb_cont.mc_numowner;
 	} else {
 		struct mb_gen_list *gen_list;
 
 		/*
 		 * This is the less-common more difficult case. We must
 		 * first verify if the general list has anything for us
 		 * and if that also fails, we must allocate a page from
 		 * the map and create a new bucket to place in our PCPU
 		 * container (already locked). If the map is starved then
 		 * we're really in for trouble, as we have to wait on
 		 * the general container's condition variable.
 		 */
 		gen_list = MB_GET_GEN_LIST(mb_list);
 		MB_LOCK_CONT(gen_list);
 
 		if ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead)))
 		    != NULL) {
 			/*
 			 * Give ownership of the bucket to our CPU's
 			 * container, but only actually put the bucket
 			 * in the container if it doesn't become free
 			 * upon removing an mbuf from it.
 			 */
 			SLIST_REMOVE_HEAD(&(gen_list->mb_cont.mc_bhead),
 			    mb_blist);
 			bucket->mb_owner = cnt_lst->mb_cont.mc_numowner;
 			(*(gen_list->mb_cont.mc_numbucks))--;
 			(*(cnt_lst->mb_cont.mc_numbucks))++;
 			*(gen_list->mb_cont.mc_objcount) -= bucket->mb_numfree;
 			bucket->mb_numfree--;
 			m = bucket->mb_free[(bucket->mb_numfree)];
 			if (bucket->mb_numfree == 0) {
 				SLIST_NEXT(bucket, mb_blist) = NULL;
 				bucket->mb_owner |= MB_BUCKET_FREE;
 			} else {
 				SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead),
 				     bucket, mb_blist);
 				*(cnt_lst->mb_cont.mc_objcount) +=
 				    bucket->mb_numfree;
 			}
 			MB_UNLOCK_CONT(gen_list);
 			MB_MBTYPES_INC(cnt_lst, type, 1);
 
 			/* If asked to persist, do not drop the lock. */
 			if ((persist & MBP_PERSIST) == 0)
 				MB_UNLOCK_CONT(cnt_lst);
 			else
 				*pers_list = cnt_lst->mb_cont.mc_numowner;
 		} else {
 			/*
 			 * We'll have to allocate a new page.
 			 */
 			MB_UNLOCK_CONT(gen_list);
 			bucket = mb_pop_cont(mb_list, how, cnt_lst);
 			if (bucket != NULL) {
 				MB_GET_OBJECT(m, bucket, cnt_lst);
 				MB_MBTYPES_INC(cnt_lst, type, 1);
 
 				/* If asked to persist, do not drop the lock. */
 				if ((persist & MBP_PERSIST) == 0)
 					MB_UNLOCK_CONT(cnt_lst);
 				else
 					*pers_list=cnt_lst->mb_cont.mc_numowner;
 			} else {
 				if (how == M_TRYWAIT) {
 					/*
 				 	 * Absolute worst-case scenario.
 					 * We block if we're willing to, but
 					 * only after trying to steal from
 					 * other lists.
 					 */
 					m = mb_alloc_wait(mb_list, type);
 				} else {
 					/* XXX: No consistency. */
 					mbstat.m_drops++;
 
 					if (ticks < last_report ||
 					   (ticks - last_report) >= hz) {
 						last_report = ticks;
 						printf(
 "All mbufs or mbuf clusters exhausted, please see tuning(7).\n");
 					}
 
 				}
 				if (m != NULL && (persist & MBP_PERSIST) != 0) {
 					cnt_lst = MB_GET_PCPU_LIST(mb_list);
 					MB_LOCK_CONT(cnt_lst);
 					*pers_list=cnt_lst->mb_cont.mc_numowner;
 				}
 			}
 		}
 	}
 
 	return (m);
 }
 
 /*
  * This is the worst-case scenario called only if we're allocating with
  * M_TRYWAIT.  We first drain all the protocols, then try to find an mbuf
  * by looking in every PCPU container.  If we're still unsuccesful, we
  * try the general container one last time and possibly block on our
  * starved cv.
  */
 static void *
 mb_alloc_wait(struct mb_lstmngr *mb_list, short type)
 {
 	struct mb_pcpu_list *cnt_lst;
 	struct mb_gen_list *gen_list;
 	struct mb_bucket *bucket;
 	void *m;
 	int i, cv_ret;
 
 	/*
 	 * Try to reclaim mbuf-related objects (mbufs, clusters).
 	 */
 	mb_reclaim();
 
 	/*
 	 * Cycle all the PCPU containers. Increment starved counts if found
 	 * empty.
 	 */
 	for (i = 0; i < NCPU; i++) {
 		if (CPU_ABSENT(i))
 			continue;
 		cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, i);
 		MB_LOCK_CONT(cnt_lst);
 
 		/*
 		 * If container is non-empty, get a single object from it.
 		 * If empty, increment starved count.
 		 */
 		if ((bucket = SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead))) !=
 		    NULL) {
 			MB_GET_OBJECT(m, bucket, cnt_lst);
 			MB_MBTYPES_INC(cnt_lst, type, 1);
 			MB_UNLOCK_CONT(cnt_lst);
 			mbstat.m_wait++;	/* XXX: No consistency. */
 			return (m);
 		} else
 			cnt_lst->mb_cont.mc_starved++;
 
 		MB_UNLOCK_CONT(cnt_lst);
 	}
 
 	/*
 	 * We're still here, so that means it's time to get the general
 	 * container lock, check it one more time (now that mb_reclaim()
 	 * has been called) and if we still get nothing, block on the cv.
 	 */
 	gen_list = MB_GET_GEN_LIST(mb_list);
 	MB_LOCK_CONT(gen_list);
 	if ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead))) != NULL) {
 		MB_GET_OBJECT(m, bucket, gen_list);
 		MB_MBTYPES_INC(gen_list, type, 1);
 		MB_UNLOCK_CONT(gen_list);
 		mbstat.m_wait++;	/* XXX: No consistency. */
 		return (m);
 	}
 
 	gen_list->mb_cont.mc_starved++;
 	cv_ret = cv_timedwait(&(gen_list->mgl_mstarved),
 	    gen_list->mb_cont.mc_lock, mbuf_wait);
 	gen_list->mb_cont.mc_starved--;
 
 	if ((cv_ret == 0) &&
 	    ((bucket = SLIST_FIRST(&(gen_list->mb_cont.mc_bhead))) != NULL)) {
 		MB_GET_OBJECT(m, bucket, gen_list);
 		MB_MBTYPES_INC(gen_list, type, 1);
 		mbstat.m_wait++;	/* XXX: No consistency. */
 	} else {
 		mbstat.m_drops++;	/* XXX: No consistency. */
 		m = NULL;
 	}
 
 	MB_UNLOCK_CONT(gen_list);
 
 	return (m);
 }
 
 /*-
  * Free an object to its rightful container.
  * In the very general case, this operation is really very easy.
  * Complications arise primarily if:
  *	(a) We've hit the high limit on number of free objects allowed in
  *	    our PCPU container.
  *	(b) We're in a critical situation where our container has been
  *	    marked 'starved' and we need to issue wakeups on the starved
  *	    condition variable.
  *	(c) Minor (odd) cases: our bucket has migrated while we were
  *	    waiting for the lock; our bucket is in the general container;
  *	    our bucket is empty.
  */
 static __inline
 void
 mb_free(struct mb_lstmngr *mb_list, void *m, short type, short persist,
 	int *pers_list)
 {
 	struct mb_pcpu_list *cnt_lst;
 	struct mb_gen_list *gen_list;
 	struct mb_bucket *bucket;
 	u_int owner;
 
 	bucket = mb_list->ml_btable[MB_BUCKET_INDX(m, mb_list)];
 
 	/*
 	 * Make sure that if after we lock the bucket's present container the
 	 * bucket has migrated, that we drop the lock and get the new one.
 	 */
 retry_lock:
 	owner = bucket->mb_owner & ~MB_BUCKET_FREE;
 	switch (owner) {
 	case MB_GENLIST_OWNER:
 		gen_list = MB_GET_GEN_LIST(mb_list);
 		if (((persist & MBP_PERSISTENT) != 0) && (*pers_list >= 0)) {
 			if (*pers_list != MB_GENLIST_OWNER) {
 				cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list,
 				    *pers_list);
 				MB_UNLOCK_CONT(cnt_lst);
 				MB_LOCK_CONT(gen_list);
 			}
 		} else {
 			MB_LOCK_CONT(gen_list);
 		}
 		if (owner != (bucket->mb_owner & ~MB_BUCKET_FREE)) {
 			MB_UNLOCK_CONT(gen_list);
 			*pers_list = -1;
 			goto retry_lock;
 		}
 
 		/*
 		 * If we're intended for the general container, this is
 		 * real easy: no migrating required. The only `bogon'
 		 * is that we're now contending with all the threads
 		 * dealing with the general list, but this is expected.
 		 */
 		MB_PUT_OBJECT(m, bucket, gen_list);
 		MB_MBTYPES_DEC(gen_list, type, 1);
 		if (gen_list->mb_cont.mc_starved > 0)
 			cv_signal(&(gen_list->mgl_mstarved));
 		if ((persist & MBP_PERSIST) == 0)
 			MB_UNLOCK_CONT(gen_list);
 		else
 			*pers_list = MB_GENLIST_OWNER;
 		break;
 
 	default:
 		cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list, owner);
 		if (((persist & MBP_PERSISTENT) != 0) && (*pers_list >= 0)) {
 			if (*pers_list == MB_GENLIST_OWNER) {
 				gen_list = MB_GET_GEN_LIST(mb_list);
 				MB_UNLOCK_CONT(gen_list);
 				MB_LOCK_CONT(cnt_lst);
 			} else {
 				cnt_lst = MB_GET_PCPU_LIST_NUM(mb_list,
 				    *pers_list);
 				owner = *pers_list;
 			}
 		} else {
 			MB_LOCK_CONT(cnt_lst);
 		}
 		if (owner != (bucket->mb_owner & ~MB_BUCKET_FREE)) {
 			MB_UNLOCK_CONT(cnt_lst);
 			*pers_list = -1;
 			goto retry_lock;
 		}
 
 		MB_PUT_OBJECT(m, bucket, cnt_lst);
 		MB_MBTYPES_DEC(cnt_lst, type, 1);
 
 		if (cnt_lst->mb_cont.mc_starved > 0) {
 			/*
 			 * This is a tough case. It means that we've
 			 * been flagged at least once to indicate that
 			 * we're empty, and that the system is in a critical
 			 * situation, so we ought to migrate at least one
 			 * bucket over to the general container.
 			 * There may or may not be a thread blocking on
 			 * the starved condition variable, but chances
 			 * are that one will eventually come up soon so
 			 * it's better to migrate now than never.
 			 */
 			gen_list = MB_GET_GEN_LIST(mb_list);
 			MB_LOCK_CONT(gen_list);
 			KASSERT((bucket->mb_owner & MB_BUCKET_FREE) != 0,
 			    ("mb_free: corrupt bucket %p\n", bucket));
 			SLIST_INSERT_HEAD(&(gen_list->mb_cont.mc_bhead),
 			    bucket, mb_blist);
 			bucket->mb_owner = MB_GENLIST_OWNER;
 			(*(cnt_lst->mb_cont.mc_objcount))--;
 			(*(gen_list->mb_cont.mc_objcount))++;
 			(*(cnt_lst->mb_cont.mc_numbucks))--;
 			(*(gen_list->mb_cont.mc_numbucks))++;
 
 			/*
 			 * Determine whether or not to keep transferring
 			 * buckets to the general list or whether we've
 			 * transferred enough already.
 			 * We realize that although we may flag another
 			 * bucket to be migrated to the general container
 			 * that in the meantime, the thread that was
 			 * blocked on the cv is already woken up and
 			 * long gone. But in that case, the worst
 			 * consequence is that we will end up migrating
 			 * one bucket too many, which is really not a big
 			 * deal, especially if we're close to a critical
 			 * situation.
 			 */
 			if (gen_list->mb_cont.mc_starved > 0) {
 				cnt_lst->mb_cont.mc_starved--;
 				cv_signal(&(gen_list->mgl_mstarved));
 			} else
 				cnt_lst->mb_cont.mc_starved = 0;
 
 			MB_UNLOCK_CONT(gen_list);
 			if ((persist & MBP_PERSIST) == 0)
 				MB_UNLOCK_CONT(cnt_lst);
 			else
 				*pers_list = owner;
 			break;
 		}
 
 		if (*(cnt_lst->mb_cont.mc_objcount) > *(mb_list->ml_wmhigh)) {
 			/*
 			 * We've hit the high limit of allowed numbers of mbufs
 			 * on this PCPU list. We must now migrate a bucket
 			 * over to the general container.
 			 */
 			gen_list = MB_GET_GEN_LIST(mb_list);
 			MB_LOCK_CONT(gen_list);
 			if ((bucket->mb_owner & MB_BUCKET_FREE) == 0) {
 				bucket =
 				    SLIST_FIRST(&(cnt_lst->mb_cont.mc_bhead));
 				SLIST_REMOVE_HEAD(&(cnt_lst->mb_cont.mc_bhead),
 				    mb_blist);
 			}
 			SLIST_INSERT_HEAD(&(gen_list->mb_cont.mc_bhead),
 			    bucket, mb_blist);
 			bucket->mb_owner = MB_GENLIST_OWNER;
 			*(cnt_lst->mb_cont.mc_objcount) -= bucket->mb_numfree;
 			*(gen_list->mb_cont.mc_objcount) += bucket->mb_numfree;
 			(*(cnt_lst->mb_cont.mc_numbucks))--;
 			(*(gen_list->mb_cont.mc_numbucks))++;
 
 			/*
 			 * While we're at it, transfer some of the mbtypes
 			 * "count load" onto the general list's mbtypes
 			 * array, seeing as how we're moving the bucket
 			 * there now, meaning that the freeing of objects
 			 * there will now decrement the _general list's_
 			 * mbtypes counters, and no longer our PCPU list's
 			 * mbtypes counters. We do this for the type presently
 			 * being freed in an effort to keep the mbtypes
 			 * counters approximately balanced across all lists.
 			 */ 
 			MB_MBTYPES_DEC(cnt_lst, type,
 			    mb_list->ml_objbucks - bucket->mb_numfree);
 			MB_MBTYPES_INC(gen_list, type,
 			    mb_list->ml_objbucks - bucket->mb_numfree);
  
 			MB_UNLOCK_CONT(gen_list);
 			if ((persist & MBP_PERSIST) == 0)
 				MB_UNLOCK_CONT(cnt_lst);
 			else
 				*pers_list = owner;
 			break;
 		}
 
 		if (bucket->mb_owner & MB_BUCKET_FREE) {
 			SLIST_INSERT_HEAD(&(cnt_lst->mb_cont.mc_bhead),
 			    bucket, mb_blist);
 			bucket->mb_owner = cnt_lst->mb_cont.mc_numowner;
 		}
 
 		if ((persist & MBP_PERSIST) == 0)
 			MB_UNLOCK_CONT(cnt_lst);
 		else
 			*pers_list = owner;
 		break;
 	}
 }
 
 /*
  * Drain protocols in hopes to free up some resources.
  *
  * LOCKING NOTES:
  * No locks should be held when this is called.  The drain routines have to
  * presently acquire some locks which raises the possibility of lock order
  * violation if we're holding any mutex if that mutex is acquired in reverse
  * order relative to one of the locks in the drain routines.
  */
 static void
 mb_reclaim(void)
 {
 	struct domain *dp;
 	struct protosw *pr;
 
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL,
 	    "mb_reclaim()");
 
 	mbstat.m_drain++;	/* XXX: No consistency. */
 
 	for (dp = domains; dp != NULL; dp = dp->dom_next)
 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
 			if (pr->pr_drain != NULL)
 				(*pr->pr_drain)();
 }
 
 /******************************************************************************
  * Internal setup macros.
  */
 
 #define	_mb_setup(m, type) do {						\
 	(m)->m_type = (type);						\
 	(m)->m_next = NULL;						\
 	(m)->m_nextpkt = NULL;						\
 	(m)->m_data = (m)->m_dat;					\
 	(m)->m_flags = 0;						\
 } while (0)
 
 #define	_mbhdr_setup(m, type) do {					\
 	(m)->m_type = (type);						\
 	(m)->m_next = NULL;						\
 	(m)->m_nextpkt = NULL;						\
 	(m)->m_data = (m)->m_pktdat;					\
 	(m)->m_flags = M_PKTHDR;					\
 	(m)->m_pkthdr.rcvif = NULL;					\
 	(m)->m_pkthdr.csum_flags = 0;					\
 	SLIST_INIT(&(m)->m_pkthdr.tags);				\
 } while (0)
 
 #define _mcl_setup(m) do {						\
 	(m)->m_data = (m)->m_ext.ext_buf;				\
 	(m)->m_flags |= M_EXT;						\
 	(m)->m_ext.ext_free = NULL;					\
 	(m)->m_ext.ext_args = NULL;					\
 	(m)->m_ext.ext_size = MCLBYTES;					\
 	(m)->m_ext.ext_type = EXT_CLUSTER;				\
 } while (0)
 
 #define	_mext_init_ref(m, ref) do {					\
 	(m)->m_ext.ref_cnt = ((ref) == NULL) ?				\
 	    malloc(sizeof(u_int), M_MBUF, M_NOWAIT) : (u_int *)(ref);	\
 	if ((m)->m_ext.ref_cnt != NULL) {				\
 		*((m)->m_ext.ref_cnt) = 0;				\
 		MEXT_ADD_REF((m));					\
 	}								\
 } while (0)
 
 #define	cl2ref(cl)							\
     (((uintptr_t)(cl) - (uintptr_t)mb_list_clust.ml_mapbase) >> MCLSHIFT)
 
 #define	_mext_dealloc_ref(m)						\
 	if ((m)->m_ext.ext_type != EXT_EXTREF)				\
 		free((m)->m_ext.ref_cnt, M_MBUF)
 
 /******************************************************************************
  * Internal routines.
  * 
  * Because mb_alloc() and mb_free() are inlines (to keep the common
  * cases down to a maximum of one function call), below are a few
  * routines used only internally for the sole purpose of making certain
  * functions smaller.
  *
  * - _mext_free(): frees associated storage when the ref. count is
  *   exactly one and we're freeing.
  *
  * - _mgetm_internal(): common "persistent-lock" routine that allocates
  *   an mbuf and a cluster in one shot, but where the lock is already
  *   held coming in (which is what makes it different from the exported
  *   m_getcl()).  The lock is dropped when done.  This is used by m_getm()
  *   and, therefore, is very m_getm()-specific.
  */
 static struct mbuf *_mgetm_internal(int, short, short, int);
 
 void
 _mext_free(struct mbuf *mb)
 {
 
 	if (mb->m_ext.ext_type == EXT_CLUSTER) {
 		mb_free(&mb_list_clust, (caddr_t)mb->m_ext.ext_buf, MT_NOTMBUF,
 		    0, NULL);
 	} else {
 		(*(mb->m_ext.ext_free))(mb->m_ext.ext_buf, mb->m_ext.ext_args);
 		_mext_dealloc_ref(mb);
 	}
 }
 
 static struct mbuf *
 _mgetm_internal(int how, short type, short persist, int cchnum)
 {
 	struct mbuf *mb;
 
 	mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, persist,&cchnum);
 	if (mb == NULL)
 		return NULL;
 	_mb_setup(mb, type);
 
 	if ((persist & MBP_PERSIST) != 0) {
 		mb->m_ext.ext_buf = (caddr_t)mb_alloc(&mb_list_clust,
 		    how, MT_NOTMBUF, MBP_PERSISTENT, &cchnum);
 		if (mb->m_ext.ext_buf == NULL) {
 			(void)m_free(mb);
 			mb = NULL;
 		}
 		_mcl_setup(mb);
 		_mext_init_ref(mb, &cl_refcntmap[cl2ref(mb->m_ext.ext_buf)]);
 	}
 	return (mb);
 }
 
 /******************************************************************************
  * Exported buffer allocation and de-allocation routines.
  */
 
 /*
  * Allocate and return a single (normal) mbuf.  NULL is returned on failure.
  *
  * Arguments:
  *  - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks
  *    if really starved for memory.  M_DONTWAIT to never block.
  *  - type: the type of the mbuf being allocated.
  */
 struct mbuf *
 m_get(int how, short type)
 {
 	struct mbuf *mb;
 
 	mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, 0, NULL);
 	if (mb != NULL)
 		_mb_setup(mb, type);
 	return (mb);
 }
 
 /*
  * Allocate a given length worth of mbufs and/or clusters (whatever fits
  * best) and return a pointer to the top of the allocated chain.  If an
  * existing mbuf chain is provided, then we will append the new chain
  * to the existing one but still return the top of the newly allocated
  * chain.  NULL is returned on failure, in which case the [optional]
  * provided chain is left untouched, and any memory already allocated
  * is freed.
  *
  * Arguments:
  *  - m: existing chain to which to append new chain (optional).
  *  - len: total length of data to append, either in mbufs or clusters
  *    (we allocate whatever combination yields the best fit).
  *  - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks
  *    if really starved for memory.  M_DONTWAIT to never block.
  *  - type: the type of the mbuf being allocated.
  */
 struct mbuf *
 m_getm(struct mbuf *m, int len, int how, short type)
 {
 	struct mbuf *mb, *top, *cur, *mtail;
 	int num, rem, cchnum;
 	short persist;
 	int i;
 
 	KASSERT(len >= 0, ("m_getm(): len is < 0"));
 
 	/* If m != NULL, we will append to the end of that chain. */
 	if (m != NULL)
 		for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next);
 	else
 		mtail = NULL;
 
 	/*
 	 * In the best-case scenario (which should be the common case
 	 * unless we're in a starvation situation), we will be able to
 	 * go through the allocation of all the desired mbufs and clusters
 	 * here without dropping our per-CPU cache lock in between.
 	 */
 	num = len / MCLBYTES;
 	rem = len % MCLBYTES;
 	persist = 0;
 	cchnum = -1;
 	top = cur = NULL;
 	for (i = 0; i < num; i++) {
 		mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type,
 		    MBP_PERSIST | persist, &cchnum);
 		if (mb == NULL)
 			goto failed;
 		_mb_setup(mb, type);
 		mb->m_len = 0;
 
 		persist = (i != (num - 1) || rem > 0) ? MBP_PERSIST : 0;
 		mb->m_ext.ext_buf = (caddr_t)mb_alloc(&mb_list_clust,
 		    how, MT_NOTMBUF, persist | MBP_PERSISTENT, &cchnum);
 		if (mb->m_ext.ext_buf == NULL) {
 			(void)m_free(mb);
 			goto failed;
 		}
 		_mcl_setup(mb);
 		_mext_init_ref(mb, &cl_refcntmap[cl2ref(mb->m_ext.ext_buf)]);
 		persist = MBP_PERSISTENT;
 
 		if (cur == NULL)
 			top = cur = mb;
 		else
 			cur = (cur->m_next = mb);
 	}
 	if (rem > 0) {
 		if (cchnum >= 0) {
 			persist = MBP_PERSISTENT;
 			persist |= (rem > MINCLSIZE) ? MBP_PERSIST : 0;
 			mb = _mgetm_internal(how, type, persist, cchnum);
 			if (mb == NULL)
 				goto failed;
 		} else if (rem > MINCLSIZE) {
 			mb = m_getcl(how, type, 0);
 		} else {
 			mb = m_get(how, type);
 		}
 		if (mb != NULL) {
 			mb->m_len = 0;
 			if (cur == NULL)
 				top = mb;
 			else
 				cur->m_next = mb;
 		} else
 			goto failed;
 	}
 
 	if (mtail != NULL)
 		mtail->m_next = top;
 	return top;
 failed:
 	if (top != NULL)
 		m_freem(top);
 	return NULL;
 }
 
 /*
  * Allocate and return a single M_PKTHDR mbuf.  NULL is returned on failure.
  *
  * Arguments:
  *  - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks
  *    if really starved for memory.  M_DONTWAIT to never block.
  *  - type: the type of the mbuf being allocated.
  */
 struct mbuf *
 m_gethdr(int how, short type)
 {
 	struct mbuf *mb;
 
 	mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, 0, NULL);
 	if (mb != NULL) {
 		_mbhdr_setup(mb, type);
 #ifdef MAC
 		if (mac_init_mbuf(mb, how) != 0) {
 			m_free(mb);
 			return NULL;
 		}
 #endif
 	}
 	return (mb);
 }
 
 /*
  * Allocate and return a single (normal) pre-zero'd mbuf.  NULL is
  * returned on failure.
  *
  * Arguments:
  *  - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks
  *    if really starved for memory.  M_DONTWAIT to never block.
  *  - type: the type of the mbuf being allocated.
  */
 struct mbuf *
 m_get_clrd(int how, short type)
 {
 	struct mbuf *mb;
 
 	mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, 0, NULL);
 	if (mb != NULL) {
 		_mb_setup(mb, type);
 		bzero(mtod(mb, caddr_t), MLEN);
 	}
 	return (mb);
 }
 
 /*
  * Allocate and return a single M_PKTHDR pre-zero'd mbuf.  NULL is
  * returned on failure.
  *
  * Arguments:
  *  - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks
  *    if really starved for memory.  M_DONTWAIT to never block.
  *  - type: the type of the mbuf being allocated.
  */
 struct mbuf *
 m_gethdr_clrd(int how, short type)
 {
 	struct mbuf *mb;
 
 	mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type, 0, NULL);
 	if (mb != NULL) {
 		_mbhdr_setup(mb, type);
 #ifdef MAC
 		if (mac_init_mbuf(mb, how) != 0) {
 			m_free(mb);
 			return NULL;
 		}
 #endif
 		bzero(mtod(mb, caddr_t), MHLEN);
 	}
 	return (mb);
 }
 
 /*
  * Free a single mbuf and any associated storage that it may have attached
  * to it.  The associated storage may not be immediately freed if its
  * reference count is above 1.  Returns the next mbuf in the chain following
  * the mbuf being freed.
  *
  * Arguments:
  *  - mb: the mbuf to free.
  */
 struct mbuf *
 m_free(struct mbuf *mb)
 {
 	struct mbuf *nb;
 	int cchnum;
 	short persist = 0;
 
 	if ((mb->m_flags & M_PKTHDR) != 0)
 		m_tag_delete_chain(mb, NULL);
 #ifdef MAC
 	if ((mb->m_flags & M_PKTHDR) &&
 	    (mb->m_pkthdr.label.l_flags & MAC_FLAG_INITIALIZED))
 		mac_destroy_mbuf(mb);
 #endif
 	nb = mb->m_next;
 	if ((mb->m_flags & M_EXT) != 0) {
 		MEXT_REM_REF(mb);
 		if (atomic_cmpset_int(mb->m_ext.ref_cnt, 0, 1)) {
 			if (mb->m_ext.ext_type == EXT_CLUSTER) {
 				mb_free(&mb_list_clust,
 				    (caddr_t)mb->m_ext.ext_buf, MT_NOTMBUF,
 				    MBP_PERSIST, &cchnum);
 				persist = MBP_PERSISTENT;
 			} else {
 				(*(mb->m_ext.ext_free))(mb->m_ext.ext_buf,
 				    mb->m_ext.ext_args);
 				_mext_dealloc_ref(mb);
 				persist = 0;
 			}
 		}
 	}
 	mb_free(&mb_list_mbuf, mb, mb->m_type, persist, &cchnum);
 	return (nb);
 }
 
 /*
  * Free an entire chain of mbufs and associated external buffers, if
  * applicable.  Right now, we only optimize a little so that the cache
  * lock may be held across a single mbuf+cluster free.  Hopefully,
  * we'll eventually be holding the lock across more than merely two
  * consecutive frees but right now this is hard to implement because of
  * things like _mext_dealloc_ref (may do a free()) and atomic ops in the
  * loop.
  *
  *  - mb: the mbuf chain to free.
  */
 void
 m_freem(struct mbuf *mb)
 {
 	struct mbuf *m;
 	int cchnum;
 	short persist;
 
 	while (mb != NULL) {
 		if ((mb->m_flags & M_PKTHDR) != 0)
 			m_tag_delete_chain(mb, NULL);
 #ifdef MAC
 		if ((mb->m_flags & M_PKTHDR) &&
 		    (mb->m_pkthdr.label.l_flags & MAC_FLAG_INITIALIZED))
 			mac_destroy_mbuf(mb);
 #endif
 		persist = 0;
 		m = mb;
 		mb = mb->m_next;
 		if ((m->m_flags & M_EXT) != 0) {
 			MEXT_REM_REF(m);
 			if (atomic_cmpset_int(m->m_ext.ref_cnt, 0, 1)) {
 				if (m->m_ext.ext_type == EXT_CLUSTER) {
 					mb_free(&mb_list_clust,
 					    (caddr_t)m->m_ext.ext_buf,
 					    MT_NOTMBUF, MBP_PERSIST, &cchnum);
 					persist = MBP_PERSISTENT;
 				} else {
 					(*(m->m_ext.ext_free))(m->m_ext.ext_buf,
 					    m->m_ext.ext_args);
 					_mext_dealloc_ref(m);
 					persist = 0;
 				}
 			}
 		}
 		mb_free(&mb_list_mbuf, m, m->m_type, persist, &cchnum);
 	}
 }
 
 /*
  * Fetch an mbuf with a cluster attached to it.  If one of the
  * allocations fails, the entire allocation fails.  This routine is
  * the preferred way of fetching both the mbuf and cluster together,
  * as it avoids having to unlock/relock between allocations.  Returns
  * NULL on failure. 
  *
  * Arguments:
  *  - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks
  *    if really starved for memory.  M_DONTWAIT to never block.
  *  - type: the type of the mbuf being allocated.
  *  - flags: any flags to pass to the mbuf being allocated; if this includes
  *    the M_PKTHDR bit, then the mbuf is configured as a M_PKTHDR mbuf.
  */
 struct mbuf *
 m_getcl(int how, short type, int flags)
 {
 	struct mbuf *mb;
 	int cchnum;
 
 	mb = (struct mbuf *)mb_alloc(&mb_list_mbuf, how, type,
 	    MBP_PERSIST, &cchnum);
 	if (mb == NULL)
 		return NULL;
 	mb->m_type = type;
 	mb->m_next = NULL;
 	mb->m_flags = flags;
 	if ((flags & M_PKTHDR) != 0) {
 		mb->m_nextpkt = NULL;
 		mb->m_pkthdr.rcvif = NULL;
 		mb->m_pkthdr.csum_flags = 0;
 		SLIST_INIT(&mb->m_pkthdr.tags);
 	}
 
 	mb->m_ext.ext_buf = (caddr_t)mb_alloc(&mb_list_clust, how,
 	    MT_NOTMBUF, MBP_PERSISTENT, &cchnum);
 	if (mb->m_ext.ext_buf == NULL) {
 		(void)m_free(mb);
 		mb = NULL;
 	} else {
 		_mcl_setup(mb);
 		_mext_init_ref(mb, &cl_refcntmap[cl2ref(mb->m_ext.ext_buf)]);
 	}
 #ifdef MAC
 	if ((flags & M_PKTHDR) && (mac_init_mbuf(mb, how) != 0)) {
 		m_free(mb);
 		return NULL;
 	}
 #endif
 	return (mb);
 }
 
 /*
  * Fetch a single mbuf cluster and attach it to an existing mbuf.  If
  * successfull, configures the provided mbuf to have mbuf->m_ext.ext_buf
  * pointing to the cluster, and sets the M_EXT bit in the mbuf's flags.
  * The M_EXT bit is not set on failure.
  *
  * Arguments:
  *  - mb: the existing mbuf to which to attach the allocated cluster.
  *  - how: M_TRYWAIT to try to block for kern.ipc.mbuf_wait number of ticks
  *    if really starved for memory.  M_DONTWAIT to never block.
  */
 void
 m_clget(struct mbuf *mb, int how)
 {
 
 	mb->m_ext.ext_buf= (caddr_t)mb_alloc(&mb_list_clust,how,MT_NOTMBUF,
 	    0, NULL);
 	if (mb->m_ext.ext_buf != NULL) {
 		_mcl_setup(mb);
 		_mext_init_ref(mb, &cl_refcntmap[cl2ref(mb->m_ext.ext_buf)]);
 	}
 }
 
 /*
  * Configure a provided mbuf to refer to the provided external storage
  * buffer and setup a reference count for said buffer.  If the setting
  * up of the reference count fails, the M_EXT bit will not be set.  If
  * successfull, the M_EXT bit is set in the mbuf's flags.
  *
  * Arguments:
  *  - mb: the existing mbuf to which to attach the provided buffer.
  *  - buf: the address of the provided external storage buffer.
  *  - size: the size of the provided buffer.
  *  - freef: a pointer to a routine that is responsible for freeing the
  *    provided external storage buffer.
  *  - args: a pointer to an argument structure (of any type) to be passed
  *    to the provided freef routine (may be NULL).
  *  - flags: any other flags to be passed to the provided mbuf.
  *  - type: the type that the external storage buffer should be labeled with.
  */
 void
 m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
     void (*freef)(void *, void *), void *args, int flags, int type)
 {
 	u_int *ref_cnt = NULL;
 
 	if (type == EXT_CLUSTER)
 		ref_cnt = &cl_refcntmap[cl2ref(mb->m_ext.ext_buf)];
 	else if (type == EXT_EXTREF)
 		ref_cnt = mb->m_ext.ref_cnt;
 	_mext_init_ref(mb, ref_cnt);
 	if (mb->m_ext.ref_cnt != NULL) {
 		mb->m_flags |= (M_EXT | flags);
 		mb->m_ext.ext_buf = buf;
 		mb->m_data = mb->m_ext.ext_buf;
 		mb->m_ext.ext_size = size;
 		mb->m_ext.ext_free = freef;
 		mb->m_ext.ext_args = args;
 		mb->m_ext.ext_type = type;
 	}
 }
 
 /*
  * Change type of provided mbuf.  This is a relatively expensive operation
  * (due to the cost of statistics manipulations) and should be avoided, where
  * possible.
  *
  * Arguments:
  *  - mb: the provided mbuf for which the type needs to be changed.
  *  - new_type: the new type to change the mbuf to.
  */
 void
 m_chtype(struct mbuf *mb, short new_type)
 {
 	struct mb_gen_list *gen_list;
 
 	gen_list = MB_GET_GEN_LIST(&mb_list_mbuf);
 	MB_LOCK_CONT(gen_list);
 	MB_MBTYPES_DEC(gen_list, mb->m_type, 1);
 	MB_MBTYPES_INC(gen_list, new_type, 1);
 	MB_UNLOCK_CONT(gen_list);
 	mb->m_type = new_type;
 }
diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c
index 9caeed9454e1..55d828df28ee 100644
--- a/sys/kern/uipc_mbuf.c
+++ b/sys/kern/uipc_mbuf.c
@@ -1,862 +1,861 @@
 /*
  * Copyright (c) 1982, 1986, 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
  * $FreeBSD$
  */
 
 #include "opt_mac.h"
 #include "opt_param.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/sysctl.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 
 int	max_linkhdr;
 int	max_protohdr;
 int	max_hdr;
 int	max_datalen;
 int	m_defragpackets;
 int	m_defragbytes;
 int	m_defraguseless;
 int	m_defragfailure;
 
 /*
  * sysctl(8) exported objects
  */
 SYSCTL_DECL(_kern_ipc);
 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
 	   &max_linkhdr, 0, "");
 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
 	   &max_protohdr, 0, "");
 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
 	   &max_datalen, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD,
 	   &m_defragpackets, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD,
 	   &m_defragbytes, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD,
 	   &m_defraguseless, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD,
 	   &m_defragfailure, 0, "");
 
 /*
  * "Move" mbuf pkthdr from "from" to "to".
  * "from" must have M_PKTHDR set, and "to" must be empty.
  */
 void
 m_move_pkthdr(struct mbuf *to, struct mbuf *from)
 {
 
 #if 0
 	/* see below for why these are not enabled */
 	M_ASSERTPKTHDR(to);
 	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags),
 	    ("m_move_pkthdr: to has tags"));
 #endif
 	KASSERT((to->m_flags & M_EXT) == 0, ("m_move_pkthdr: to has cluster"));
 #ifdef MAC
 	if (to->m_flags & M_PKTHDR)
 		mac_destroy_mbuf(to);
 #endif
 	to->m_flags = from->m_flags & M_COPYFLAGS;
 	to->m_data = to->m_pktdat;
 	to->m_pkthdr = from->m_pkthdr;		/* especially tags */
 #ifdef MAC
 	mac_init_mbuf(to, 1);			/* XXXMAC no way to fail */
 	mac_create_mbuf_from_mbuf(from, to);
 #endif
 	SLIST_INIT(&from->m_pkthdr.tags);	/* purge tags from src */
 	from->m_flags &= ~M_PKTHDR;
 }
 
 /*
  * Duplicate "from"'s mbuf pkthdr in "to".
  * "from" must have M_PKTHDR set, and "to" must be empty.
  * In particular, this does a deep copy of the packet tags.
  */
 int
 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
 {
 
 #if 0
 	/*
 	 * The mbuf allocator only initializes the pkthdr
 	 * when the mbuf is allocated with MGETHDR. Many users
 	 * (e.g. m_copy*, m_prepend) use MGET and then
 	 * smash the pkthdr as needed causing these
 	 * assertions to trip.  For now just disable them.
 	 */
 	M_ASSERTPKTHDR(to);
 	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags"));
 #endif
 #ifdef MAC
 	if (to->m_flags & M_PKTHDR)
 		mac_destroy_mbuf(to);
 #endif
 	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
 	if ((to->m_flags & M_EXT) == 0)
 		to->m_data = to->m_pktdat;
 	to->m_pkthdr = from->m_pkthdr;
 #ifdef MAC
 	mac_init_mbuf(to, 1);			/* XXXMAC no way to fail */
 	mac_create_mbuf_from_mbuf(from, to);
 #endif
 	SLIST_INIT(&to->m_pkthdr.tags);
-	return (m_tag_copy_chain(to, from, (how & M_TRYWAIT) ? M_WAITOK :
-	    M_NOWAIT));
+	return (m_tag_copy_chain(to, from, MBTOM(how)));
 }
 
 /*
  * Lesser-used path for M_PREPEND:
  * allocate new mbuf to prepend to chain,
  * copy junk along.
  */
 struct mbuf *
 m_prepend(struct mbuf *m, int len, int how)
 {
 	struct mbuf *mn;
 
 	MGET(mn, how, m->m_type);
 	if (mn == NULL) {
 		m_freem(m);
 		return (NULL);
 	}
 	if (m->m_flags & M_PKTHDR) {
 		M_MOVE_PKTHDR(mn, m);
 #ifdef MAC
 		mac_destroy_mbuf(m);
 #endif
 	}
 	mn->m_next = m;
 	m = mn;
 	if (len < MHLEN)
 		MH_ALIGN(m, len);
 	m->m_len = len;
 	return (m);
 }
 
 /*
  * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
  * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller.
  * Note that the copy is read-only, because clusters are not copied,
  * only their reference counts are incremented.
  */
 struct mbuf *
 m_copym(struct mbuf *m, int off0, int len, int wait)
 {
 	struct mbuf *n, **np;
 	int off = off0;
 	struct mbuf *top;
 	int copyhdr = 0;
 
 	KASSERT(off >= 0, ("m_copym, negative off %d", off));
 	KASSERT(len >= 0, ("m_copym, negative len %d", len));
 	if (off == 0 && m->m_flags & M_PKTHDR)
 		copyhdr = 1;
 	while (off > 0) {
 		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
 		if (off < m->m_len)
 			break;
 		off -= m->m_len;
 		m = m->m_next;
 	}
 	np = &top;
 	top = 0;
 	while (len > 0) {
 		if (m == NULL) {
 			KASSERT(len == M_COPYALL, 
 			    ("m_copym, length > size of mbuf chain"));
 			break;
 		}
 		MGET(n, wait, m->m_type);
 		*np = n;
 		if (n == NULL)
 			goto nospace;
 		if (copyhdr) {
 			if (!m_dup_pkthdr(n, m, wait))
 				goto nospace;
 			if (len == M_COPYALL)
 				n->m_pkthdr.len -= off0;
 			else
 				n->m_pkthdr.len = len;
 			copyhdr = 0;
 		}
 		n->m_len = min(len, m->m_len - off);
 		if (m->m_flags & M_EXT) {
 			n->m_data = m->m_data + off;
 			n->m_ext = m->m_ext;
 			n->m_flags |= M_EXT;
 			MEXT_ADD_REF(m);
 		} else
 			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
 			    (u_int)n->m_len);
 		if (len != M_COPYALL)
 			len -= n->m_len;
 		off = 0;
 		m = m->m_next;
 		np = &n->m_next;
 	}
 	if (top == NULL)
 		mbstat.m_mcfail++;	/* XXX: No consistency. */
 
 	return (top);
 nospace:
 	m_freem(top);
 	mbstat.m_mcfail++;	/* XXX: No consistency. */
 	return (NULL);
 }
 
 /*
  * Copy an entire packet, including header (which must be present).
  * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
  * Note that the copy is read-only, because clusters are not copied,
  * only their reference counts are incremented.
  * Preserve alignment of the first mbuf so if the creator has left
  * some room at the beginning (e.g. for inserting protocol headers)
  * the copies still have the room available.
  */
 struct mbuf *
 m_copypacket(struct mbuf *m, int how)
 {
 	struct mbuf *top, *n, *o;
 
 	MGET(n, how, m->m_type);
 	top = n;
 	if (n == NULL)
 		goto nospace;
 
 	if (!m_dup_pkthdr(n, m, how))
 		goto nospace;
 	n->m_len = m->m_len;
 	if (m->m_flags & M_EXT) {
 		n->m_data = m->m_data;
 		n->m_ext = m->m_ext;
 		n->m_flags |= M_EXT;
 		MEXT_ADD_REF(m);
 	} else {
 		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
 		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
 	}
 
 	m = m->m_next;
 	while (m) {
 		MGET(o, how, m->m_type);
 		if (o == NULL)
 			goto nospace;
 
 		n->m_next = o;
 		n = n->m_next;
 
 		n->m_len = m->m_len;
 		if (m->m_flags & M_EXT) {
 			n->m_data = m->m_data;
 			n->m_ext = m->m_ext;
 			n->m_flags |= M_EXT;
 			MEXT_ADD_REF(m);
 		} else {
 			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
 		}
 
 		m = m->m_next;
 	}
 	return top;
 nospace:
 	m_freem(top);
 	mbstat.m_mcfail++;	/* XXX: No consistency. */ 
 	return (NULL);
 }
 
 /*
  * Copy data from an mbuf chain starting "off" bytes from the beginning,
  * continuing for "len" bytes, into the indicated buffer.
  */
 void
 m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
 {
 	u_int count;
 
 	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
 	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
 	while (off > 0) {
 		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
 		if (off < m->m_len)
 			break;
 		off -= m->m_len;
 		m = m->m_next;
 	}
 	while (len > 0) {
 		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
 		count = min(m->m_len - off, len);
 		bcopy(mtod(m, caddr_t) + off, cp, count);
 		len -= count;
 		cp += count;
 		off = 0;
 		m = m->m_next;
 	}
 }
 
 /*
  * Copy a packet header mbuf chain into a completely new chain, including
  * copying any mbuf clusters.  Use this instead of m_copypacket() when
  * you need a writable copy of an mbuf chain.
  */
 struct mbuf *
 m_dup(struct mbuf *m, int how)
 {
 	struct mbuf **p, *top = NULL;
 	int remain, moff, nsize;
 
 	/* Sanity check */
 	if (m == NULL)
 		return (NULL);
 	M_ASSERTPKTHDR(m);
 
 	/* While there's more data, get a new mbuf, tack it on, and fill it */
 	remain = m->m_pkthdr.len;
 	moff = 0;
 	p = &top;
 	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
 		struct mbuf *n;
 
 		/* Get the next new mbuf */
 		MGET(n, how, m->m_type);
 		if (n == NULL)
 			goto nospace;
 		if (top == NULL) {		/* first one, must be PKTHDR */
 			if (!m_dup_pkthdr(n, m, how))
 				goto nospace;
 			nsize = MHLEN;
 		} else				/* not the first one */
 			nsize = MLEN;
 		if (remain >= MINCLSIZE) {
 			MCLGET(n, how);
 			if ((n->m_flags & M_EXT) == 0) {
 				(void)m_free(n);
 				goto nospace;
 			}
 			nsize = MCLBYTES;
 		}
 		n->m_len = 0;
 
 		/* Link it into the new chain */
 		*p = n;
 		p = &n->m_next;
 
 		/* Copy data from original mbuf(s) into new mbuf */
 		while (n->m_len < nsize && m != NULL) {
 			int chunk = min(nsize - n->m_len, m->m_len - moff);
 
 			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
 			moff += chunk;
 			n->m_len += chunk;
 			remain -= chunk;
 			if (moff == m->m_len) {
 				m = m->m_next;
 				moff = 0;
 			}
 		}
 
 		/* Check correct total mbuf length */
 		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
 		    	("%s: bogus m_pkthdr.len", __func__));
 	}
 	return (top);
 
 nospace:
 	m_freem(top);
 	mbstat.m_mcfail++;	/* XXX: No consistency. */
 	return (NULL);
 }
 
 /*
  * Concatenate mbuf chain n to m.
  * Both chains must be of the same type (e.g. MT_DATA).
  * Any m_pkthdr is not updated.
  */
 void
 m_cat(struct mbuf *m, struct mbuf *n)
 {
 	while (m->m_next)
 		m = m->m_next;
 	while (n) {
 		if (m->m_flags & M_EXT ||
 		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
 			/* just join the two chains */
 			m->m_next = n;
 			return;
 		}
 		/* splat the data from one into the other */
 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
 		    (u_int)n->m_len);
 		m->m_len += n->m_len;
 		n = m_free(n);
 	}
 }
 
 void
 m_adj(struct mbuf *mp, int req_len)
 {
 	int len = req_len;
 	struct mbuf *m;
 	int count;
 
 	if ((m = mp) == NULL)
 		return;
 	if (len >= 0) {
 		/*
 		 * Trim from head.
 		 */
 		while (m != NULL && len > 0) {
 			if (m->m_len <= len) {
 				len -= m->m_len;
 				m->m_len = 0;
 				m = m->m_next;
 			} else {
 				m->m_len -= len;
 				m->m_data += len;
 				len = 0;
 			}
 		}
 		m = mp;
 		if (mp->m_flags & M_PKTHDR)
 			m->m_pkthdr.len -= (req_len - len);
 	} else {
 		/*
 		 * Trim from tail.  Scan the mbuf chain,
 		 * calculating its length and finding the last mbuf.
 		 * If the adjustment only affects this mbuf, then just
 		 * adjust and return.  Otherwise, rescan and truncate
 		 * after the remaining size.
 		 */
 		len = -len;
 		count = 0;
 		for (;;) {
 			count += m->m_len;
 			if (m->m_next == (struct mbuf *)0)
 				break;
 			m = m->m_next;
 		}
 		if (m->m_len >= len) {
 			m->m_len -= len;
 			if (mp->m_flags & M_PKTHDR)
 				mp->m_pkthdr.len -= len;
 			return;
 		}
 		count -= len;
 		if (count < 0)
 			count = 0;
 		/*
 		 * Correct length for chain is "count".
 		 * Find the mbuf with last data, adjust its length,
 		 * and toss data from remaining mbufs on chain.
 		 */
 		m = mp;
 		if (m->m_flags & M_PKTHDR)
 			m->m_pkthdr.len = count;
 		for (; m; m = m->m_next) {
 			if (m->m_len >= count) {
 				m->m_len = count;
 				break;
 			}
 			count -= m->m_len;
 		}
 		while (m->m_next)
 			(m = m->m_next) ->m_len = 0;
 	}
 }
 
 /*
  * Rearange an mbuf chain so that len bytes are contiguous
  * and in the data area of an mbuf (so that mtod and dtom
  * will work for a structure of size len).  Returns the resulting
  * mbuf chain on success, frees it and returns null on failure.
  * If there is room, it will add up to max_protohdr-len extra bytes to the
  * contiguous region in an attempt to avoid being called next time.
  */
 struct mbuf *
 m_pullup(struct mbuf *n, int len)
 {
 	struct mbuf *m;
 	int count;
 	int space;
 
 	/*
 	 * If first mbuf has no cluster, and has room for len bytes
 	 * without shifting current data, pullup into it,
 	 * otherwise allocate a new mbuf to prepend to the chain.
 	 */
 	if ((n->m_flags & M_EXT) == 0 &&
 	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
 		if (n->m_len >= len)
 			return (n);
 		m = n;
 		n = n->m_next;
 		len -= m->m_len;
 	} else {
 		if (len > MHLEN)
 			goto bad;
 		MGET(m, M_DONTWAIT, n->m_type);
 		if (m == NULL)
 			goto bad;
 		m->m_len = 0;
 		if (n->m_flags & M_PKTHDR)
 			M_MOVE_PKTHDR(m, n);
 	}
 	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
 	do {
 		count = min(min(max(len, max_protohdr), space), n->m_len);
 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
 		  (u_int)count);
 		len -= count;
 		m->m_len += count;
 		n->m_len -= count;
 		space -= count;
 		if (n->m_len)
 			n->m_data += count;
 		else
 			n = m_free(n);
 	} while (len > 0 && n);
 	if (len > 0) {
 		(void) m_free(m);
 		goto bad;
 	}
 	m->m_next = n;
 	return (m);
 bad:
 	m_freem(n);
 	mbstat.m_mpfail++;	/* XXX: No consistency. */
 	return (NULL);
 }
 
 /*
  * Partition an mbuf chain in two pieces, returning the tail --
  * all but the first len0 bytes.  In case of failure, it returns NULL and
  * attempts to restore the chain to its original state.
  *
  * Note that the resulting mbufs might be read-only, because the new
  * mbuf can end up sharing an mbuf cluster with the original mbuf if
  * the "breaking point" happens to lie within a cluster mbuf. Use the
  * M_WRITABLE() macro to check for this case.
  */
 struct mbuf *
 m_split(struct mbuf *m0, int len0, int wait)
 {
 	struct mbuf *m, *n;
 	u_int len = len0, remain;
 
 	for (m = m0; m && len > m->m_len; m = m->m_next)
 		len -= m->m_len;
 	if (m == NULL)
 		return (NULL);
 	remain = m->m_len - len;
 	if (m0->m_flags & M_PKTHDR) {
 		MGETHDR(n, wait, m0->m_type);
 		if (n == NULL)
 			return (NULL);
 		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
 		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
 		m0->m_pkthdr.len = len0;
 		if (m->m_flags & M_EXT)
 			goto extpacket;
 		if (remain > MHLEN) {
 			/* m can't be the lead packet */
 			MH_ALIGN(n, 0);
 			n->m_next = m_split(m, len, wait);
 			if (n->m_next == NULL) {
 				(void) m_free(n);
 				return (NULL);
 			} else {
 				n->m_len = 0;
 				return (n);
 			}
 		} else
 			MH_ALIGN(n, remain);
 	} else if (remain == 0) {
 		n = m->m_next;
 		m->m_next = NULL;
 		return (n);
 	} else {
 		MGET(n, wait, m->m_type);
 		if (n == NULL)
 			return (NULL);
 		M_ALIGN(n, remain);
 	}
 extpacket:
 	if (m->m_flags & M_EXT) {
 		n->m_flags |= M_EXT;
 		n->m_ext = m->m_ext;
 		MEXT_ADD_REF(m);
 		n->m_data = m->m_data + len;
 	} else {
 		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
 	}
 	n->m_len = remain;
 	m->m_len = len;
 	n->m_next = m->m_next;
 	m->m_next = NULL;
 	return (n);
 }
 /*
  * Routine to copy from device local memory into mbufs.
  * Note that `off' argument is offset into first mbuf of target chain from
  * which to begin copying the data to.
  */
 struct mbuf *
 m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
 	 void (*copy)(char *from, caddr_t to, u_int len))
 {
 	struct mbuf *m;
 	struct mbuf *top = 0, **mp = &top;
 	int len;
 
 	if (off < 0 || off > MHLEN)
 		return (NULL);
 
 	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (m == NULL)
 		return (NULL);
 	m->m_pkthdr.rcvif = ifp;
 	m->m_pkthdr.len = totlen;
 	len = MHLEN;
 
 	while (totlen > 0) {
 		if (top) {
 			MGET(m, M_DONTWAIT, MT_DATA);
 			if (m == NULL) {
 				m_freem(top);
 				return (NULL);
 			}
 			len = MLEN;
 		}
 		if (totlen + off >= MINCLSIZE) {
 			MCLGET(m, M_DONTWAIT);
 			if (m->m_flags & M_EXT)
 				len = MCLBYTES;
 		} else {
 			/*
 			 * Place initial small packet/header at end of mbuf.
 			 */
 			if (top == NULL && totlen + off + max_linkhdr <= len) {
 				m->m_data += max_linkhdr;
 				len -= max_linkhdr;
 			}
 		}
 		if (off) {
 			m->m_data += off;
 			len -= off;
 			off = 0;
 		}
 		m->m_len = len = min(totlen, len);
 		if (copy)
 			copy(buf, mtod(m, caddr_t), (u_int)len);
 		else
 			bcopy(buf, mtod(m, caddr_t), (u_int)len);
 		buf += len;
 		*mp = m;
 		mp = &m->m_next;
 		totlen -= len;
 	}
 	return (top);
 }
 
 /*
  * Copy data from a buffer back into the indicated mbuf chain,
  * starting "off" bytes from the beginning, extending the mbuf
  * chain if necessary.
  */
 void
 m_copyback(struct mbuf *m0, int off, int len, caddr_t cp)
 {
 	int mlen;
 	struct mbuf *m = m0, *n;
 	int totlen = 0;
 
 	if (m0 == NULL)
 		return;
 	while (off > (mlen = m->m_len)) {
 		off -= mlen;
 		totlen += mlen;
 		if (m->m_next == NULL) {
 			n = m_get_clrd(M_DONTWAIT, m->m_type);
 			if (n == NULL)
 				goto out;
 			n->m_len = min(MLEN, len + off);
 			m->m_next = n;
 		}
 		m = m->m_next;
 	}
 	while (len > 0) {
 		mlen = min (m->m_len - off, len);
 		bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen);
 		cp += mlen;
 		len -= mlen;
 		mlen += off;
 		off = 0;
 		totlen += mlen;
 		if (len == 0)
 			break;
 		if (m->m_next == NULL) {
 			n = m_get(M_DONTWAIT, m->m_type);
 			if (n == NULL)
 				break;
 			n->m_len = min(MLEN, len);
 			m->m_next = n;
 		}
 		m = m->m_next;
 	}
 out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
 		m->m_pkthdr.len = totlen;
 }
 
 void
 m_print(const struct mbuf *m)
 {
 	int len;
 	const struct mbuf *m2;
 
 	len = m->m_pkthdr.len;
 	m2 = m;
 	while (len) {
 		printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
 		len -= m2->m_len;
 		m2 = m2->m_next;
 	}
 	return;
 }
 
 u_int
 m_fixhdr(struct mbuf *m0)
 {
 	u_int len;
 
 	len = m_length(m0, NULL);
 	m0->m_pkthdr.len = len;
 	return (len);
 }
 
 u_int
 m_length(struct mbuf *m0, struct mbuf **last)
 {
 	struct mbuf *m;
 	u_int len;
 
 	len = 0;
 	for (m = m0; m != NULL; m = m->m_next) {
 		len += m->m_len;
 		if (m->m_next == NULL)
 			break;
 	}
 	if (last != NULL)
 		*last = m;
 	return (len);
 }
 
 /*
  * Defragment a mbuf chain, returning the shortest possible
  * chain of mbufs and clusters.  If allocation fails and
  * this cannot be completed, NULL will be returned, but
  * the passed in chain will be unchanged.  Upon success,
  * the original chain will be freed, and the new chain
  * will be returned.
  *
  * If a non-packet header is passed in, the original
  * mbuf (chain?) will be returned unharmed.
  */
 struct mbuf *
 m_defrag(struct mbuf *m0, int how)
 {
 	struct mbuf	*m_new = NULL, *m_final = NULL;
 	int		progress = 0, length;
 
 	if (!(m0->m_flags & M_PKTHDR))
 		return (m0);
 
 	
 	if (m0->m_pkthdr.len > MHLEN)
 		m_final = m_getcl(how, MT_DATA, M_PKTHDR);
 	else
 		m_final = m_gethdr(how, MT_DATA);
 
 	if (m_final == NULL)
 		goto nospace;
 
 	if (m_dup_pkthdr(m_final, m0, how) == NULL)
 		goto nospace;
 
 	m_new = m_final;
 
 	while (progress < m0->m_pkthdr.len) {
 		length = m0->m_pkthdr.len - progress;
 		if (length > MCLBYTES)
 			length = MCLBYTES;
 
 		if (m_new == NULL) {
 			if (length > MLEN)
 				m_new = m_getcl(how, MT_DATA, 0);
 			else
 				m_new = m_get(how, MT_DATA);
 			if (m_new == NULL)
 				goto nospace;
 		}
 
 		m_copydata(m0, progress, length, mtod(m_new, caddr_t));
 		progress += length;
 		m_new->m_len = length;
 		if (m_new != m_final)
 			m_cat(m_final, m_new);
 		m_new = NULL;
 	}
 	if (m0->m_next == NULL)
 		m_defraguseless++;
 	m_freem(m0);
 	m0 = m_final;
 	m_defragpackets++;
 	m_defragbytes += m0->m_pkthdr.len;
 	return (m0);
 nospace:
 	m_defragfailure++;
 	if (m_new)
 		m_free(m_new);
 	if (m_final)
 		m_freem(m_final);
 	return (NULL);
 }