Index: head/sys/cam/scsi/scsi_pass.c
===================================================================
--- head/sys/cam/scsi/scsi_pass.c	(revision 299863)
+++ head/sys/cam/scsi/scsi_pass.c	(revision 299864)
@@ -1,2222 +1,2220 @@
 /*-
  * Copyright (c) 1997, 1998, 2000 Justin T. Gibbs.
  * Copyright (c) 1997, 1998, 1999 Kenneth D. Merry.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#include "opt_kdtrace.h"
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/types.h>
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/devicestat.h>
 #include <sys/errno.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/poll.h>
 #include <sys/selinfo.h>
 #include <sys/sdt.h>
 #include <sys/taskqueue.h>
 #include <vm/uma.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #include <machine/bus.h>
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_periph.h>
 #include <cam/cam_queue.h>
 #include <cam/cam_xpt.h>
 #include <cam/cam_xpt_periph.h>
 #include <cam/cam_debug.h>
 #include <cam/cam_compat.h>
 #include <cam/cam_xpt_periph.h>
 
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_pass.h>
 
 typedef enum {
 	PASS_FLAG_OPEN			= 0x01,
 	PASS_FLAG_LOCKED		= 0x02,
 	PASS_FLAG_INVALID		= 0x04,
 	PASS_FLAG_INITIAL_PHYSPATH	= 0x08,
 	PASS_FLAG_ZONE_INPROG		= 0x10,
 	PASS_FLAG_ZONE_VALID		= 0x20,
 	PASS_FLAG_UNMAPPED_CAPABLE	= 0x40,
 	PASS_FLAG_ABANDONED_REF_SET	= 0x80
 } pass_flags;
 
 typedef enum {
 	PASS_STATE_NORMAL
 } pass_state;
 
 typedef enum {
 	PASS_CCB_BUFFER_IO,
 	PASS_CCB_QUEUED_IO
 } pass_ccb_types;
 
 #define ccb_type	ppriv_field0
 #define ccb_ioreq	ppriv_ptr1
 
 /*
  * The maximum number of memory segments we preallocate.
  */
 #define	PASS_MAX_SEGS	16
 
 typedef enum {
 	PASS_IO_NONE		= 0x00,
 	PASS_IO_USER_SEG_MALLOC	= 0x01,
 	PASS_IO_KERN_SEG_MALLOC	= 0x02,
 	PASS_IO_ABANDONED	= 0x04
 } pass_io_flags; 
 
 struct pass_io_req {
 	union ccb			 ccb;
 	union ccb			*alloced_ccb;
 	union ccb			*user_ccb_ptr;
 	camq_entry			 user_periph_links;
 	ccb_ppriv_area			 user_periph_priv;
 	struct cam_periph_map_info	 mapinfo;
 	pass_io_flags			 flags;
 	ccb_flags			 data_flags;
 	int				 num_user_segs;
 	bus_dma_segment_t		 user_segs[PASS_MAX_SEGS];
 	int				 num_kern_segs;
 	bus_dma_segment_t		 kern_segs[PASS_MAX_SEGS];
 	bus_dma_segment_t		*user_segptr;
 	bus_dma_segment_t		*kern_segptr;
 	int				 num_bufs;
 	uint32_t			 dirs[CAM_PERIPH_MAXMAPS];
 	uint32_t			 lengths[CAM_PERIPH_MAXMAPS];
 	uint8_t				*user_bufs[CAM_PERIPH_MAXMAPS];
 	uint8_t				*kern_bufs[CAM_PERIPH_MAXMAPS];
 	struct bintime			 start_time;
 	TAILQ_ENTRY(pass_io_req)	 links;
 };
 
 struct pass_softc {
 	pass_state		  state;
 	pass_flags		  flags;
 	u_int8_t		  pd_type;
 	union ccb		  saved_ccb;
 	int			  open_count;
 	u_int		 	  maxio;
 	struct devstat		 *device_stats;
 	struct cdev		 *dev;
 	struct cdev		 *alias_dev;
 	struct task		  add_physpath_task;
 	struct task		  shutdown_kqueue_task;
 	struct selinfo		  read_select;
 	TAILQ_HEAD(, pass_io_req) incoming_queue;
 	TAILQ_HEAD(, pass_io_req) active_queue;
 	TAILQ_HEAD(, pass_io_req) abandoned_queue;
 	TAILQ_HEAD(, pass_io_req) done_queue;
 	struct cam_periph	 *periph;
 	char			  zone_name[12];
 	char			  io_zone_name[12];
 	uma_zone_t		  pass_zone;
 	uma_zone_t		  pass_io_zone;
 	size_t			  io_zone_size;
 };
 
 static	d_open_t	passopen;
 static	d_close_t	passclose;
 static	d_ioctl_t	passioctl;
 static	d_ioctl_t	passdoioctl;
 static	d_poll_t	passpoll;
 static	d_kqfilter_t	passkqfilter;
 static	void		passreadfiltdetach(struct knote *kn);
 static	int		passreadfilt(struct knote *kn, long hint);
 
 static	periph_init_t	passinit;
 static	periph_ctor_t	passregister;
 static	periph_oninv_t	passoninvalidate;
 static	periph_dtor_t	passcleanup;
 static	periph_start_t	passstart;
 static	void		pass_shutdown_kqueue(void *context, int pending);
 static	void		pass_add_physpath(void *context, int pending);
 static	void		passasync(void *callback_arg, u_int32_t code,
 				  struct cam_path *path, void *arg);
 static	void		passdone(struct cam_periph *periph, 
 				 union ccb *done_ccb);
 static	int		passcreatezone(struct cam_periph *periph);
 static	void		passiocleanup(struct pass_softc *softc, 
 				      struct pass_io_req *io_req);
 static	int		passcopysglist(struct cam_periph *periph,
 				       struct pass_io_req *io_req,
 				       ccb_flags direction);
 static	int		passmemsetup(struct cam_periph *periph,
 				     struct pass_io_req *io_req);
 static	int		passmemdone(struct cam_periph *periph,
 				    struct pass_io_req *io_req);
 static	int		passerror(union ccb *ccb, u_int32_t cam_flags, 
 				  u_int32_t sense_flags);
 static 	int		passsendccb(struct cam_periph *periph, union ccb *ccb,
 				    union ccb *inccb);
 
 static struct periph_driver passdriver =
 {
 	passinit, "pass",
 	TAILQ_HEAD_INITIALIZER(passdriver.units), /* generation */ 0
 };
 
 PERIPHDRIVER_DECLARE(pass, passdriver);
 
 static struct cdevsw pass_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_flags =	D_TRACKCLOSE,
 	.d_open =	passopen,
 	.d_close =	passclose,
 	.d_ioctl =	passioctl,
 	.d_poll = 	passpoll,
 	.d_kqfilter = 	passkqfilter,
 	.d_name =	"pass",
 };
 
 static struct filterops passread_filtops = {
 	.f_isfd	=	1,
 	.f_detach =	passreadfiltdetach,
 	.f_event =	passreadfilt
 };
 
 static MALLOC_DEFINE(M_SCSIPASS, "scsi_pass", "scsi passthrough buffers");
 
 static void
 passinit(void)
 {
 	cam_status status;
 
 	/*
 	 * Install a global async callback.  This callback will
 	 * receive async callbacks like "new device found".
 	 */
 	status = xpt_register_async(AC_FOUND_DEVICE, passasync, NULL, NULL);
 
 	if (status != CAM_REQ_CMP) {
 		printf("pass: Failed to attach master async callback "
 		       "due to status 0x%x!\n", status);
 	}
 
 }
 
 static void
 passrejectios(struct cam_periph *periph)
 {
 	struct pass_io_req *io_req, *io_req2;
 	struct pass_softc *softc;
 
 	softc = (struct pass_softc *)periph->softc;
 
 	/*
 	 * The user can no longer get status for I/O on the done queue, so
 	 * clean up all outstanding I/O on the done queue.
 	 */
 	TAILQ_FOREACH_SAFE(io_req, &softc->done_queue, links, io_req2) {
 		TAILQ_REMOVE(&softc->done_queue, io_req, links);
 		passiocleanup(softc, io_req);
 		uma_zfree(softc->pass_zone, io_req);
 	}
 
 	/*
 	 * The underlying device is gone, so we can't issue these I/Os.
 	 * The devfs node has been shut down, so we can't return status to
 	 * the user.  Free any I/O left on the incoming queue.
 	 */
 	TAILQ_FOREACH_SAFE(io_req, &softc->incoming_queue, links, io_req2) {
 		TAILQ_REMOVE(&softc->incoming_queue, io_req, links);
 		passiocleanup(softc, io_req);
 		uma_zfree(softc->pass_zone, io_req);
 	}
 
 	/*
 	 * Normally we would put I/Os on the abandoned queue and acquire a
 	 * reference when we saw the final close.  But, the device went
 	 * away and devfs may have moved everything off to deadfs by the
 	 * time the I/O done callback is called; as a result, we won't see
 	 * any more closes.  So, if we have any active I/Os, we need to put
 	 * them on the abandoned queue.  When the abandoned queue is empty,
 	 * we'll release the remaining reference (see below) to the peripheral.
 	 */
 	TAILQ_FOREACH_SAFE(io_req, &softc->active_queue, links, io_req2) {
 		TAILQ_REMOVE(&softc->active_queue, io_req, links);
 		io_req->flags |= PASS_IO_ABANDONED;
 		TAILQ_INSERT_TAIL(&softc->abandoned_queue, io_req, links);
 	}
 
 	/*
 	 * If we put any I/O on the abandoned queue, acquire a reference.
 	 */
 	if ((!TAILQ_EMPTY(&softc->abandoned_queue))
 	 && ((softc->flags & PASS_FLAG_ABANDONED_REF_SET) == 0)) {
 		cam_periph_doacquire(periph);
 		softc->flags |= PASS_FLAG_ABANDONED_REF_SET;
 	}
 }
 
 static void
 passdevgonecb(void *arg)
 {
 	struct cam_periph *periph;
 	struct mtx *mtx;
 	struct pass_softc *softc;
 	int i;
 
 	periph = (struct cam_periph *)arg;
 	mtx = cam_periph_mtx(periph);
 	mtx_lock(mtx);
 
 	softc = (struct pass_softc *)periph->softc;
 	KASSERT(softc->open_count >= 0, ("Negative open count %d",
 		softc->open_count));
 
 	/*
 	 * When we get this callback, we will get no more close calls from
 	 * devfs.  So if we have any dangling opens, we need to release the
 	 * reference held for that particular context.
 	 */
 	for (i = 0; i < softc->open_count; i++)
 		cam_periph_release_locked(periph);
 
 	softc->open_count = 0;
 
 	/*
 	 * Release the reference held for the device node, it is gone now.
 	 * Accordingly, inform all queued I/Os of their fate.
 	 */
 	cam_periph_release_locked(periph);
 	passrejectios(periph);
 
 	/*
 	 * We reference the SIM lock directly here, instead of using
 	 * cam_periph_unlock().  The reason is that the final call to
 	 * cam_periph_release_locked() above could result in the periph
 	 * getting freed.  If that is the case, dereferencing the periph
 	 * with a cam_periph_unlock() call would cause a page fault.
 	 */
 	mtx_unlock(mtx);
 
 	/*
 	 * We have to remove our kqueue context from a thread because it
 	 * may sleep.  It would be nice if we could get a callback from
 	 * kqueue when it is done cleaning up resources.
 	 */
 	taskqueue_enqueue(taskqueue_thread, &softc->shutdown_kqueue_task);
 }
 
 static void
 passoninvalidate(struct cam_periph *periph)
 {
 	struct pass_softc *softc;
 
 	softc = (struct pass_softc *)periph->softc;
 
 	/*
 	 * De-register any async callbacks.
 	 */
 	xpt_register_async(0, passasync, periph, periph->path);
 
 	softc->flags |= PASS_FLAG_INVALID;
 
 	/*
 	 * Tell devfs this device has gone away, and ask for a callback
 	 * when it has cleaned up its state.
 	 */
 	destroy_dev_sched_cb(softc->dev, passdevgonecb, periph);
 }
 
 static void
 passcleanup(struct cam_periph *periph)
 {
 	struct pass_softc *softc;
 
 	softc = (struct pass_softc *)periph->softc;
 
 	cam_periph_assert(periph, MA_OWNED);
 	KASSERT(TAILQ_EMPTY(&softc->active_queue),
 		("%s called when there are commands on the active queue!\n",
 		__func__));
 	KASSERT(TAILQ_EMPTY(&softc->abandoned_queue),
 		("%s called when there are commands on the abandoned queue!\n",
 		__func__));
 	KASSERT(TAILQ_EMPTY(&softc->incoming_queue),
 		("%s called when there are commands on the incoming queue!\n",
 		__func__));
 	KASSERT(TAILQ_EMPTY(&softc->done_queue),
 		("%s called when there are commands on the done queue!\n",
 		__func__));
 
 	devstat_remove_entry(softc->device_stats);
 
 	cam_periph_unlock(periph);
 
 	/*
 	 * We call taskqueue_drain() for the physpath task to make sure it
 	 * is complete.  We drop the lock because this can potentially
 	 * sleep.  XXX KDM that is bad.  Need a way to get a callback when
 	 * a taskqueue is drained.
 	 *
  	 * Note that we don't drain the kqueue shutdown task queue.  This
 	 * is because we hold a reference on the periph for kqueue, and
 	 * release that reference from the kqueue shutdown task queue.  So
 	 * we cannot come into this routine unless we've released that
 	 * reference.  Also, because that could be the last reference, we
 	 * could be called from the cam_periph_release() call in
 	 * pass_shutdown_kqueue().  In that case, the taskqueue_drain()
 	 * would deadlock.  It would be preferable if we had a way to
 	 * get a callback when a taskqueue is done.
 	 */
 	taskqueue_drain(taskqueue_thread, &softc->add_physpath_task);
 
 	cam_periph_lock(periph);
 
 	free(softc, M_DEVBUF);
 }
 
 static void
 pass_shutdown_kqueue(void *context, int pending)
 {
 	struct cam_periph *periph;
 	struct pass_softc *softc;
 
 	periph = context;
 	softc = periph->softc;
 
 	knlist_clear(&softc->read_select.si_note, /*is_locked*/ 0);
 	knlist_destroy(&softc->read_select.si_note);
 
 	/*
 	 * Release the reference we held for kqueue.
 	 */
 	cam_periph_release(periph);
 }
 
 static void
 pass_add_physpath(void *context, int pending)
 {
 	struct cam_periph *periph;
 	struct pass_softc *softc;
 	struct mtx *mtx;
 	char *physpath;
 
 	/*
 	 * If we have one, create a devfs alias for our
 	 * physical path.
 	 */
 	periph = context;
 	softc = periph->softc;
 	physpath = malloc(MAXPATHLEN, M_DEVBUF, M_WAITOK);
 	mtx = cam_periph_mtx(periph);
 	mtx_lock(mtx);
 
 	if (periph->flags & CAM_PERIPH_INVALID)
 		goto out;
 
 	if (xpt_getattr(physpath, MAXPATHLEN,
 			"GEOM::physpath", periph->path) == 0
 	 && strlen(physpath) != 0) {
 
 		mtx_unlock(mtx);
 		make_dev_physpath_alias(MAKEDEV_WAITOK, &softc->alias_dev,
 					softc->dev, softc->alias_dev, physpath);
 		mtx_lock(mtx);
 	}
 
 out:
 	/*
 	 * Now that we've made our alias, we no longer have to have a
 	 * reference to the device.
 	 */
 	if ((softc->flags & PASS_FLAG_INITIAL_PHYSPATH) == 0)
 		softc->flags |= PASS_FLAG_INITIAL_PHYSPATH;
 
 	/*
 	 * We always acquire a reference to the periph before queueing this
 	 * task queue function, so it won't go away before we run.
 	 */
 	while (pending-- > 0)
 		cam_periph_release_locked(periph);
 	mtx_unlock(mtx);
 
 	free(physpath, M_DEVBUF);
 }
 
 static void
 passasync(void *callback_arg, u_int32_t code,
 	  struct cam_path *path, void *arg)
 {
 	struct cam_periph *periph;
 
 	periph = (struct cam_periph *)callback_arg;
 
 	switch (code) {
 	case AC_FOUND_DEVICE:
 	{
 		struct ccb_getdev *cgd;
 		cam_status status;
  
 		cgd = (struct ccb_getdev *)arg;
 		if (cgd == NULL)
 			break;
 
 		/*
 		 * Allocate a peripheral instance for
 		 * this device and start the probe
 		 * process.
 		 */
 		status = cam_periph_alloc(passregister, passoninvalidate,
 					  passcleanup, passstart, "pass",
 					  CAM_PERIPH_BIO, path,
 					  passasync, AC_FOUND_DEVICE, cgd);
 
 		if (status != CAM_REQ_CMP
 		 && status != CAM_REQ_INPROG) {
 			const struct cam_status_entry *entry;
 
 			entry = cam_fetch_status_entry(status);
 
 			printf("passasync: Unable to attach new device "
 			       "due to status %#x: %s\n", status, entry ?
 			       entry->status_text : "Unknown");
 		}
 
 		break;
 	}
 	case AC_ADVINFO_CHANGED:
 	{
 		uintptr_t buftype;
 
 		buftype = (uintptr_t)arg;
 		if (buftype == CDAI_TYPE_PHYS_PATH) {
 			struct pass_softc *softc;
 			cam_status status;
 
 			softc = (struct pass_softc *)periph->softc;
 			/*
 			 * Acquire a reference to the periph before we
 			 * start the taskqueue, so that we don't run into
 			 * a situation where the periph goes away before
 			 * the task queue has a chance to run.
 			 */
 			status = cam_periph_acquire(periph);
 			if (status != CAM_REQ_CMP)
 				break;
 
 			taskqueue_enqueue(taskqueue_thread,
 					  &softc->add_physpath_task);
 		}
 		break;
 	}
 	default:
 		cam_periph_async(periph, code, path, arg);
 		break;
 	}
 }
 
 static cam_status
 passregister(struct cam_periph *periph, void *arg)
 {
 	struct pass_softc *softc;
 	struct ccb_getdev *cgd;
 	struct ccb_pathinq cpi;
 	struct make_dev_args args;
 	int error, no_tags;
 
 	cgd = (struct ccb_getdev *)arg;
 	if (cgd == NULL) {
 		printf("%s: no getdev CCB, can't register device\n", __func__);
 		return(CAM_REQ_CMP_ERR);
 	}
 
 	softc = (struct pass_softc *)malloc(sizeof(*softc),
 					    M_DEVBUF, M_NOWAIT);
 
 	if (softc == NULL) {
 		printf("%s: Unable to probe new device. "
 		       "Unable to allocate softc\n", __func__);
 		return(CAM_REQ_CMP_ERR);
 	}
 
 	bzero(softc, sizeof(*softc));
 	softc->state = PASS_STATE_NORMAL;
 	if (cgd->protocol == PROTO_SCSI || cgd->protocol == PROTO_ATAPI)
 		softc->pd_type = SID_TYPE(&cgd->inq_data);
 	else if (cgd->protocol == PROTO_SATAPM)
 		softc->pd_type = T_ENCLOSURE;
 	else
 		softc->pd_type = T_DIRECT;
 
 	periph->softc = softc;
 	softc->periph = periph;
 	TAILQ_INIT(&softc->incoming_queue);
 	TAILQ_INIT(&softc->active_queue);
 	TAILQ_INIT(&softc->abandoned_queue);
 	TAILQ_INIT(&softc->done_queue);
 	snprintf(softc->zone_name, sizeof(softc->zone_name), "%s%d",
 		 periph->periph_name, periph->unit_number);
 	snprintf(softc->io_zone_name, sizeof(softc->io_zone_name), "%s%dIO",
 		 periph->periph_name, periph->unit_number);
 	softc->io_zone_size = MAXPHYS;
 	knlist_init_mtx(&softc->read_select.si_note, cam_periph_mtx(periph));
 
 	bzero(&cpi, sizeof(cpi));
 	xpt_setup_ccb(&cpi.ccb_h, periph->path, CAM_PRIORITY_NORMAL);
 	cpi.ccb_h.func_code = XPT_PATH_INQ;
 	xpt_action((union ccb *)&cpi);
 
 	if (cpi.maxio == 0)
 		softc->maxio = DFLTPHYS;	/* traditional default */
 	else if (cpi.maxio > MAXPHYS)
 		softc->maxio = MAXPHYS;		/* for safety */
 	else
 		softc->maxio = cpi.maxio;	/* real value */
 
 	if (cpi.hba_misc & PIM_UNMAPPED)
 		softc->flags |= PASS_FLAG_UNMAPPED_CAPABLE;
 
 	/*
 	 * We pass in 0 for a blocksize, since we don't 
 	 * know what the blocksize of this device is, if 
 	 * it even has a blocksize.
 	 */
 	cam_periph_unlock(periph);
 	no_tags = (cgd->inq_data.flags & SID_CmdQue) == 0;
 	softc->device_stats = devstat_new_entry("pass",
 			  periph->unit_number, 0,
 			  DEVSTAT_NO_BLOCKSIZE
 			  | (no_tags ? DEVSTAT_NO_ORDERED_TAGS : 0),
 			  softc->pd_type |
 			  XPORT_DEVSTAT_TYPE(cpi.transport) |
 			  DEVSTAT_TYPE_PASS,
 			  DEVSTAT_PRIORITY_PASS);
 
 	/*
 	 * Initialize the taskqueue handler for shutting down kqueue.
 	 */
 	TASK_INIT(&softc->shutdown_kqueue_task, /*priority*/ 0,
 		  pass_shutdown_kqueue, periph);
 
 	/*
 	 * Acquire a reference to the periph that we can release once we've
 	 * cleaned up the kqueue.
 	 */
 	if (cam_periph_acquire(periph) != CAM_REQ_CMP) {
 		xpt_print(periph->path, "%s: lost periph during "
 			  "registration!\n", __func__);
 		cam_periph_lock(periph);
 		return (CAM_REQ_CMP_ERR);
 	}
 
 	/*
 	 * Acquire a reference to the periph before we create the devfs
 	 * instance for it.  We'll release this reference once the devfs
 	 * instance has been freed.
 	 */
 	if (cam_periph_acquire(periph) != CAM_REQ_CMP) {
 		xpt_print(periph->path, "%s: lost periph during "
 			  "registration!\n", __func__);
 		cam_periph_lock(periph);
 		return (CAM_REQ_CMP_ERR);
 	}
 
 	/* Register the device */
 	make_dev_args_init(&args);
 	args.mda_devsw = &pass_cdevsw;
 	args.mda_unit = periph->unit_number;
 	args.mda_uid = UID_ROOT;
 	args.mda_gid = GID_OPERATOR;
 	args.mda_mode = 0600;
 	args.mda_si_drv1 = periph;
 	error = make_dev_s(&args, &softc->dev, "%s%d", periph->periph_name,
 	    periph->unit_number);
 	if (error != 0) {
 		cam_periph_lock(periph);
 		cam_periph_release_locked(periph);
 		return (CAM_REQ_CMP_ERR);
 	}
 
 	/*
 	 * Hold a reference to the periph before we create the physical
 	 * path alias so it can't go away.
 	 */
 	if (cam_periph_acquire(periph) != CAM_REQ_CMP) {
 		xpt_print(periph->path, "%s: lost periph during "
 			  "registration!\n", __func__);
 		cam_periph_lock(periph);
 		return (CAM_REQ_CMP_ERR);
 	}
 
 	cam_periph_lock(periph);
 
 	TASK_INIT(&softc->add_physpath_task, /*priority*/0,
 		  pass_add_physpath, periph);
 
 	/*
 	 * See if physical path information is already available.
 	 */
 	taskqueue_enqueue(taskqueue_thread, &softc->add_physpath_task);
 
 	/*
 	 * Add an async callback so that we get notified if
 	 * this device goes away or its physical path
 	 * (stored in the advanced info data of the EDT) has
 	 * changed.
 	 */
 	xpt_register_async(AC_LOST_DEVICE | AC_ADVINFO_CHANGED,
 			   passasync, periph, periph->path);
 
 	if (bootverbose)
 		xpt_announce_periph(periph, NULL);
 
 	return(CAM_REQ_CMP);
 }
 
 static int
 passopen(struct cdev *dev, int flags, int fmt, struct thread *td)
 {
 	struct cam_periph *periph;
 	struct pass_softc *softc;
 	int error;
 
 	periph = (struct cam_periph *)dev->si_drv1;
 	if (cam_periph_acquire(periph) != CAM_REQ_CMP)
 		return (ENXIO);
 
 	cam_periph_lock(periph);
 
 	softc = (struct pass_softc *)periph->softc;
 
 	if (softc->flags & PASS_FLAG_INVALID) {
 		cam_periph_release_locked(periph);
 		cam_periph_unlock(periph);
 		return(ENXIO);
 	}
 
 	/*
 	 * Don't allow access when we're running at a high securelevel.
 	 */
 	error = securelevel_gt(td->td_ucred, 1);
 	if (error) {
 		cam_periph_release_locked(periph);
 		cam_periph_unlock(periph);
 		return(error);
 	}
 
 	/*
 	 * Only allow read-write access.
 	 */
 	if (((flags & FWRITE) == 0) || ((flags & FREAD) == 0)) {
 		cam_periph_release_locked(periph);
 		cam_periph_unlock(periph);
 		return(EPERM);
 	}
 
 	/*
 	 * We don't allow nonblocking access.
 	 */
 	if ((flags & O_NONBLOCK) != 0) {
 		xpt_print(periph->path, "can't do nonblocking access\n");
 		cam_periph_release_locked(periph);
 		cam_periph_unlock(periph);
 		return(EINVAL);
 	}
 
 	softc->open_count++;
 
 	cam_periph_unlock(periph);
 
 	return (error);
 }
 
 static int
 passclose(struct cdev *dev, int flag, int fmt, struct thread *td)
 {
 	struct 	cam_periph *periph;
 	struct  pass_softc *softc;
 	struct mtx *mtx;
 
 	periph = (struct cam_periph *)dev->si_drv1;
 	mtx = cam_periph_mtx(periph);
 	mtx_lock(mtx);
 
 	softc = periph->softc;
 	softc->open_count--;
 
 	if (softc->open_count == 0) {
 		struct pass_io_req *io_req, *io_req2;
 
 		TAILQ_FOREACH_SAFE(io_req, &softc->done_queue, links, io_req2) {
 			TAILQ_REMOVE(&softc->done_queue, io_req, links);
 			passiocleanup(softc, io_req);
 			uma_zfree(softc->pass_zone, io_req);
 		}
 
 		TAILQ_FOREACH_SAFE(io_req, &softc->incoming_queue, links,
 				   io_req2) {
 			TAILQ_REMOVE(&softc->incoming_queue, io_req, links);
 			passiocleanup(softc, io_req);
 			uma_zfree(softc->pass_zone, io_req);
 		}
 
 		/*
 		 * If there are any active I/Os, we need to forcibly acquire a
 		 * reference to the peripheral so that we don't go away
 		 * before they complete.  We'll release the reference when
 		 * the abandoned queue is empty.
 		 */
 		io_req = TAILQ_FIRST(&softc->active_queue);
 		if ((io_req != NULL)
 		 && (softc->flags & PASS_FLAG_ABANDONED_REF_SET) == 0) {
 			cam_periph_doacquire(periph);
 			softc->flags |= PASS_FLAG_ABANDONED_REF_SET;
 		}
 
 		/*
 		 * Since the I/O in the active queue is not under our
 		 * control, just set a flag so that we can clean it up when
 		 * it completes and put it on the abandoned queue.  This
 		 * will prevent our sending spurious completions in the
 		 * event that the device is opened again before these I/Os
 		 * complete.
 		 */
 		TAILQ_FOREACH_SAFE(io_req, &softc->active_queue, links,
 				   io_req2) {
 			TAILQ_REMOVE(&softc->active_queue, io_req, links);
 			io_req->flags |= PASS_IO_ABANDONED;
 			TAILQ_INSERT_TAIL(&softc->abandoned_queue, io_req,
 					  links);
 		}
 	}
 
 	cam_periph_release_locked(periph);
 
 	/*
 	 * We reference the lock directly here, instead of using
 	 * cam_periph_unlock().  The reason is that the call to
 	 * cam_periph_release_locked() above could result in the periph
 	 * getting freed.  If that is the case, dereferencing the periph
 	 * with a cam_periph_unlock() call would cause a page fault.
 	 *
 	 * cam_periph_release() avoids this problem using the same method,
 	 * but we're manually acquiring and dropping the lock here to
 	 * protect the open count and avoid another lock acquisition and
 	 * release.
 	 */
 	mtx_unlock(mtx);
 
 	return (0);
 }
 
 
 static void
 passstart(struct cam_periph *periph, union ccb *start_ccb)
 {
 	struct pass_softc *softc;
 
 	softc = (struct pass_softc *)periph->softc;
 
 	switch (softc->state) {
 	case PASS_STATE_NORMAL: {
 		struct pass_io_req *io_req;
 
 		/*
 		 * Check for any queued I/O requests that require an
 		 * allocated slot.
 		 */
 		io_req = TAILQ_FIRST(&softc->incoming_queue);
 		if (io_req == NULL) {
 			xpt_release_ccb(start_ccb);
 			break;
 		}
 		TAILQ_REMOVE(&softc->incoming_queue, io_req, links);
 		TAILQ_INSERT_TAIL(&softc->active_queue, io_req, links);
 		/*
 		 * Merge the user's CCB into the allocated CCB.
 		 */
 		xpt_merge_ccb(start_ccb, &io_req->ccb);
 		start_ccb->ccb_h.ccb_type = PASS_CCB_QUEUED_IO;
 		start_ccb->ccb_h.ccb_ioreq = io_req;
 		start_ccb->ccb_h.cbfcnp = passdone;
 		io_req->alloced_ccb = start_ccb;
 		binuptime(&io_req->start_time);
 		devstat_start_transaction(softc->device_stats,
 					  &io_req->start_time);
 
 		xpt_action(start_ccb);
 
 		/*
 		 * If we have any more I/O waiting, schedule ourselves again.
 		 */
 		if (!TAILQ_EMPTY(&softc->incoming_queue))
 			xpt_schedule(periph, CAM_PRIORITY_NORMAL);
 		break;
 	}
 	default:
 		break;
 	}
 }
 
 static void
 passdone(struct cam_periph *periph, union ccb *done_ccb)
 { 
 	struct pass_softc *softc;
 	struct ccb_scsiio *csio;
 
 	softc = (struct pass_softc *)periph->softc;
 
 	cam_periph_assert(periph, MA_OWNED);
 
 	csio = &done_ccb->csio;
 	switch (csio->ccb_h.ccb_type) {
 	case PASS_CCB_QUEUED_IO: {
 		struct pass_io_req *io_req;
 
 		io_req = done_ccb->ccb_h.ccb_ioreq;
 #if 0
 		xpt_print(periph->path, "%s: called for user CCB %p\n",
 			  __func__, io_req->user_ccb_ptr);
 #endif
 		if (((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP)
 		 && (done_ccb->ccb_h.flags & CAM_PASS_ERR_RECOVER)
 		 && ((io_req->flags & PASS_IO_ABANDONED) == 0)) {
 			int error;
 
 			error = passerror(done_ccb, CAM_RETRY_SELTO,
 					  SF_RETRY_UA | SF_NO_PRINT);
 
 			if (error == ERESTART) {
 				/*
 				 * A retry was scheduled, so
  				 * just return.
 				 */
 				return;
 			}
 		}
 
 		/*
 		 * Copy the allocated CCB contents back to the malloced CCB
 		 * so we can give status back to the user when he requests it.
 		 */
 		bcopy(done_ccb, &io_req->ccb, sizeof(*done_ccb));
 
 		/*
 		 * Log data/transaction completion with devstat(9).
 		 */
 		switch (done_ccb->ccb_h.func_code) {
 		case XPT_SCSI_IO:
 			devstat_end_transaction(softc->device_stats,
 			    done_ccb->csio.dxfer_len - done_ccb->csio.resid,
 			    done_ccb->csio.tag_action & 0x3,
 			    ((done_ccb->ccb_h.flags & CAM_DIR_MASK) ==
 			    CAM_DIR_NONE) ? DEVSTAT_NO_DATA :
 			    (done_ccb->ccb_h.flags & CAM_DIR_OUT) ?
 			    DEVSTAT_WRITE : DEVSTAT_READ, NULL,
 			    &io_req->start_time);
 			break;
 		case XPT_ATA_IO:
 			devstat_end_transaction(softc->device_stats,
 			    done_ccb->ataio.dxfer_len - done_ccb->ataio.resid,
 			    0, /* Not used in ATA */
 			    ((done_ccb->ccb_h.flags & CAM_DIR_MASK) ==
 			    CAM_DIR_NONE) ? DEVSTAT_NO_DATA : 
 			    (done_ccb->ccb_h.flags & CAM_DIR_OUT) ?
 			    DEVSTAT_WRITE : DEVSTAT_READ, NULL,
 			    &io_req->start_time);
 			break;
 		case XPT_SMP_IO:
 			/*
 			 * XXX KDM this isn't quite right, but there isn't
 			 * currently an easy way to represent a bidirectional 
 			 * transfer in devstat.  The only way to do it
 			 * and have the byte counts come out right would
 			 * mean that we would have to record two
 			 * transactions, one for the request and one for the
 			 * response.  For now, so that we report something,
 			 * just treat the entire thing as a read.
 			 */
 			devstat_end_transaction(softc->device_stats,
 			    done_ccb->smpio.smp_request_len +
 			    done_ccb->smpio.smp_response_len,
 			    DEVSTAT_TAG_SIMPLE, DEVSTAT_READ, NULL,
 			    &io_req->start_time);
 			break;
 		default:
 			devstat_end_transaction(softc->device_stats, 0,
 			    DEVSTAT_TAG_NONE, DEVSTAT_NO_DATA, NULL,
 			    &io_req->start_time);
 			break;
 		}
 
 		/*
 		 * In the normal case, take the completed I/O off of the
 		 * active queue and put it on the done queue.  Notitfy the
 		 * user that we have a completed I/O.
 		 */
 		if ((io_req->flags & PASS_IO_ABANDONED) == 0) {
 			TAILQ_REMOVE(&softc->active_queue, io_req, links);
 			TAILQ_INSERT_TAIL(&softc->done_queue, io_req, links);
 			selwakeuppri(&softc->read_select, PRIBIO);
 			KNOTE_LOCKED(&softc->read_select.si_note, 0);
 		} else {
 			/*
 			 * In the case of an abandoned I/O (final close
 			 * without fetching the I/O), take it off of the
 			 * abandoned queue and free it.
 			 */
 			TAILQ_REMOVE(&softc->abandoned_queue, io_req, links);
 			passiocleanup(softc, io_req);
 			uma_zfree(softc->pass_zone, io_req);
 
 			/*
 			 * Release the done_ccb here, since we may wind up
 			 * freeing the peripheral when we decrement the
 			 * reference count below.
 			 */
 			xpt_release_ccb(done_ccb);
 
 			/*
 			 * If the abandoned queue is empty, we can release
 			 * our reference to the periph since we won't have
 			 * any more completions coming.
 			 */
 			if ((TAILQ_EMPTY(&softc->abandoned_queue))
 			 && (softc->flags & PASS_FLAG_ABANDONED_REF_SET)) {
 				softc->flags &= ~PASS_FLAG_ABANDONED_REF_SET;
 				cam_periph_release_locked(periph);
 			}
 
 			/*
 			 * We have already released the CCB, so we can
 			 * return.
 			 */
 			return;
 		}
 		break;
 	}
 	}
 	xpt_release_ccb(done_ccb);
 }
 
 static int
 passcreatezone(struct cam_periph *periph)
 {
 	struct pass_softc *softc;
 	int error;
 
 	error = 0;
 	softc = (struct pass_softc *)periph->softc;
 
 	cam_periph_assert(periph, MA_OWNED);
 	KASSERT(((softc->flags & PASS_FLAG_ZONE_VALID) == 0), 
 		("%s called when the pass(4) zone is valid!\n", __func__));
 	KASSERT((softc->pass_zone == NULL), 
 		("%s called when the pass(4) zone is allocated!\n", __func__));
 
 	if ((softc->flags & PASS_FLAG_ZONE_INPROG) == 0) {
 
 		/*
 		 * We're the first context through, so we need to create
 		 * the pass(4) UMA zone for I/O requests.
 		 */
 		softc->flags |= PASS_FLAG_ZONE_INPROG;
 
 		/*
 		 * uma_zcreate() does a blocking (M_WAITOK) allocation,
 		 * so we cannot hold a mutex while we call it.
 		 */
 		cam_periph_unlock(periph);
 
 		softc->pass_zone = uma_zcreate(softc->zone_name,
 		    sizeof(struct pass_io_req), NULL, NULL, NULL, NULL,
 		    /*align*/ 0, /*flags*/ 0);
 
 		softc->pass_io_zone = uma_zcreate(softc->io_zone_name,
 		    softc->io_zone_size, NULL, NULL, NULL, NULL,
 		    /*align*/ 0, /*flags*/ 0);
 
 		cam_periph_lock(periph);
 
 		if ((softc->pass_zone == NULL)
 		 || (softc->pass_io_zone == NULL)) {
 			if (softc->pass_zone == NULL)
 				xpt_print(periph->path, "unable to allocate "
 				    "IO Req UMA zone\n");
 			else
 				xpt_print(periph->path, "unable to allocate "
 				    "IO UMA zone\n");
 			softc->flags &= ~PASS_FLAG_ZONE_INPROG;
 			goto bailout;
 		}
 
 		/*
 		 * Set the flags appropriately and notify any other waiters.
 		 */
 		softc->flags &= PASS_FLAG_ZONE_INPROG;
 		softc->flags |= PASS_FLAG_ZONE_VALID;
 		wakeup(&softc->pass_zone);
 	} else {
 		/*
 		 * In this case, the UMA zone has not yet been created, but
 		 * another context is in the process of creating it.  We
 		 * need to sleep until the creation is either done or has
 		 * failed.
 		 */
 		while ((softc->flags & PASS_FLAG_ZONE_INPROG)
 		    && ((softc->flags & PASS_FLAG_ZONE_VALID) == 0)) {
 			error = msleep(&softc->pass_zone,
 				       cam_periph_mtx(periph), PRIBIO,
 				       "paszon", 0);
 			if (error != 0)
 				goto bailout;
 		}
 		/*
 		 * If the zone creation failed, no luck for the user.
 		 */
 		if ((softc->flags & PASS_FLAG_ZONE_VALID) == 0){
 			error = ENOMEM;
 			goto bailout;
 		}
 	}
 bailout:
 	return (error);
 }
 
 static void
 passiocleanup(struct pass_softc *softc, struct pass_io_req *io_req)
 {
 	union ccb *ccb;
 	u_int8_t **data_ptrs[CAM_PERIPH_MAXMAPS];
 	int i, numbufs;
 
 	ccb = &io_req->ccb;
 
 	switch (ccb->ccb_h.func_code) {
 	case XPT_DEV_MATCH:
 		numbufs = min(io_req->num_bufs, 2);
 
 		if (numbufs == 1) {
 			data_ptrs[0] = (u_int8_t **)&ccb->cdm.matches;
 		} else {
 			data_ptrs[0] = (u_int8_t **)&ccb->cdm.patterns;
 			data_ptrs[1] = (u_int8_t **)&ccb->cdm.matches;
 		}
 		break;
 	case XPT_SCSI_IO:
 	case XPT_CONT_TARGET_IO:
 		data_ptrs[0] = &ccb->csio.data_ptr;
 		numbufs = min(io_req->num_bufs, 1);
 		break;
 	case XPT_ATA_IO:
 		data_ptrs[0] = &ccb->ataio.data_ptr;
 		numbufs = min(io_req->num_bufs, 1);
 		break;
 	case XPT_SMP_IO:
 		numbufs = min(io_req->num_bufs, 2);
 		data_ptrs[0] = &ccb->smpio.smp_request;
 		data_ptrs[1] = &ccb->smpio.smp_response;
 		break;
 	case XPT_DEV_ADVINFO:
 		numbufs = min(io_req->num_bufs, 1);
 		data_ptrs[0] = (uint8_t **)&ccb->cdai.buf;
 		break;
 	default:
 		/* allow ourselves to be swapped once again */
 		return;
 		break; /* NOTREACHED */ 
 	}
 
 	if (io_req->flags & PASS_IO_USER_SEG_MALLOC) {
 		free(io_req->user_segptr, M_SCSIPASS);
 		io_req->user_segptr = NULL;
 	}
 
 	/*
 	 * We only want to free memory we malloced.
 	 */
 	if (io_req->data_flags == CAM_DATA_VADDR) {
 		for (i = 0; i < io_req->num_bufs; i++) {
 			if (io_req->kern_bufs[i] == NULL)
 				continue;
 
 			free(io_req->kern_bufs[i], M_SCSIPASS);
 			io_req->kern_bufs[i] = NULL;
 		}
 	} else if (io_req->data_flags == CAM_DATA_SG) {
 		for (i = 0; i < io_req->num_kern_segs; i++) {
 			if ((uint8_t *)(uintptr_t)
 			    io_req->kern_segptr[i].ds_addr == NULL)
 				continue;
 
 			uma_zfree(softc->pass_io_zone, (uint8_t *)(uintptr_t)
 			    io_req->kern_segptr[i].ds_addr);
 			io_req->kern_segptr[i].ds_addr = 0;
 		}
 	}
 
 	if (io_req->flags & PASS_IO_KERN_SEG_MALLOC) {
 		free(io_req->kern_segptr, M_SCSIPASS);
 		io_req->kern_segptr = NULL;
 	}
 
 	if (io_req->data_flags != CAM_DATA_PADDR) {
 		for (i = 0; i < numbufs; i++) {
 			/*
 			 * Restore the user's buffer pointers to their
 			 * previous values.
 			 */
 			if (io_req->user_bufs[i] != NULL)
 				*data_ptrs[i] = io_req->user_bufs[i];
 		}
 	}
 
 }
 
 static int
 passcopysglist(struct cam_periph *periph, struct pass_io_req *io_req,
 	       ccb_flags direction)
 {
 	bus_size_t kern_watermark, user_watermark, len_copied, len_to_copy;
 	bus_dma_segment_t *user_sglist, *kern_sglist;
 	int i, j, error;
 
 	error = 0;
 	kern_watermark = 0;
 	user_watermark = 0;
 	len_to_copy = 0;
 	len_copied = 0;
 	user_sglist = io_req->user_segptr;
 	kern_sglist = io_req->kern_segptr;
 
 	for (i = 0, j = 0; i < io_req->num_user_segs &&
 	     j < io_req->num_kern_segs;) {
 		uint8_t *user_ptr, *kern_ptr;
 
 		len_to_copy = min(user_sglist[i].ds_len -user_watermark,
 		    kern_sglist[j].ds_len - kern_watermark);
 
 		user_ptr = (uint8_t *)(uintptr_t)user_sglist[i].ds_addr;
 		user_ptr = user_ptr + user_watermark;
 		kern_ptr = (uint8_t *)(uintptr_t)kern_sglist[j].ds_addr;
 		kern_ptr = kern_ptr + kern_watermark;
 
 		user_watermark += len_to_copy;
 		kern_watermark += len_to_copy;
 
 		if (!useracc(user_ptr, len_to_copy,
 		    (direction == CAM_DIR_IN) ? VM_PROT_WRITE : VM_PROT_READ)) {
 			xpt_print(periph->path, "%s: unable to access user "
 				  "S/G list element %p len %zu\n", __func__,
 				  user_ptr, len_to_copy);
 			error = EFAULT;
 			goto bailout;
 		}
 
 		if (direction == CAM_DIR_IN) {
 			error = copyout(kern_ptr, user_ptr, len_to_copy);
 			if (error != 0) {
 				xpt_print(periph->path, "%s: copyout of %u "
 					  "bytes from %p to %p failed with "
 					  "error %d\n", __func__, len_to_copy,
 					  kern_ptr, user_ptr, error);
 				goto bailout;
 			}
 		} else {
 			error = copyin(user_ptr, kern_ptr, len_to_copy);
 			if (error != 0) {
 				xpt_print(periph->path, "%s: copyin of %u "
 					  "bytes from %p to %p failed with "
 					  "error %d\n", __func__, len_to_copy,
 					  user_ptr, kern_ptr, error);
 				goto bailout;
 			}
 		}
 
 		len_copied += len_to_copy;
 
 		if (user_sglist[i].ds_len == user_watermark) {
 			i++;
 			user_watermark = 0;
 		}
 
 		if (kern_sglist[j].ds_len == kern_watermark) {
 			j++;
 			kern_watermark = 0;
 		}
 	}
 
 bailout:
 
 	return (error);
 }
 
 static int
 passmemsetup(struct cam_periph *periph, struct pass_io_req *io_req)
 {
 	union ccb *ccb;
 	struct pass_softc *softc;
 	int numbufs, i;
 	uint8_t **data_ptrs[CAM_PERIPH_MAXMAPS];
 	uint32_t lengths[CAM_PERIPH_MAXMAPS];
 	uint32_t dirs[CAM_PERIPH_MAXMAPS];
 	uint32_t num_segs;
 	uint16_t *seg_cnt_ptr;
 	size_t maxmap;
 	int error;
 
 	cam_periph_assert(periph, MA_NOTOWNED);
 
 	softc = periph->softc;
 
 	error = 0;
 	ccb = &io_req->ccb;
 	maxmap = 0;
 	num_segs = 0;
 	seg_cnt_ptr = NULL;
 
 	switch(ccb->ccb_h.func_code) {
 	case XPT_DEV_MATCH:
 		if (ccb->cdm.match_buf_len == 0) {
 			printf("%s: invalid match buffer length 0\n", __func__);
 			return(EINVAL);
 		}
 		if (ccb->cdm.pattern_buf_len > 0) {
 			data_ptrs[0] = (u_int8_t **)&ccb->cdm.patterns;
 			lengths[0] = ccb->cdm.pattern_buf_len;
 			dirs[0] = CAM_DIR_OUT;
 			data_ptrs[1] = (u_int8_t **)&ccb->cdm.matches;
 			lengths[1] = ccb->cdm.match_buf_len;
 			dirs[1] = CAM_DIR_IN;
 			numbufs = 2;
 		} else {
 			data_ptrs[0] = (u_int8_t **)&ccb->cdm.matches;
 			lengths[0] = ccb->cdm.match_buf_len;
 			dirs[0] = CAM_DIR_IN;
 			numbufs = 1;
 		}
 		io_req->data_flags = CAM_DATA_VADDR;
 		break;
 	case XPT_SCSI_IO:
 	case XPT_CONT_TARGET_IO:
 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_NONE)
 			return(0);
 
 		/*
 		 * The user shouldn't be able to supply a bio.
 		 */
 		if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
 			return (EINVAL);
 
 		io_req->data_flags = ccb->ccb_h.flags & CAM_DATA_MASK;
 
 		data_ptrs[0] = &ccb->csio.data_ptr;
 		lengths[0] = ccb->csio.dxfer_len;
 		dirs[0] = ccb->ccb_h.flags & CAM_DIR_MASK;
 		num_segs = ccb->csio.sglist_cnt;
 		seg_cnt_ptr = &ccb->csio.sglist_cnt;
 		numbufs = 1;
 		maxmap = softc->maxio;
 		break;
 	case XPT_ATA_IO:
 		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_NONE)
 			return(0);
 
 		/*
 		 * We only support a single virtual address for ATA I/O.
 		 */
 		if ((ccb->ccb_h.flags & CAM_DATA_MASK) != CAM_DATA_VADDR)
 			return (EINVAL);
 
 		io_req->data_flags = CAM_DATA_VADDR;
 
 		data_ptrs[0] = &ccb->ataio.data_ptr;
 		lengths[0] = ccb->ataio.dxfer_len;
 		dirs[0] = ccb->ccb_h.flags & CAM_DIR_MASK;
 		numbufs = 1;
 		maxmap = softc->maxio;
 		break;
 	case XPT_SMP_IO:
 		io_req->data_flags = CAM_DATA_VADDR;
 
 		data_ptrs[0] = &ccb->smpio.smp_request;
 		lengths[0] = ccb->smpio.smp_request_len;
 		dirs[0] = CAM_DIR_OUT;
 		data_ptrs[1] = &ccb->smpio.smp_response;
 		lengths[1] = ccb->smpio.smp_response_len;
 		dirs[1] = CAM_DIR_IN;
 		numbufs = 2;
 		maxmap = softc->maxio;
 		break;
 	case XPT_DEV_ADVINFO:
 		if (ccb->cdai.bufsiz == 0)
 			return (0);
 
 		io_req->data_flags = CAM_DATA_VADDR;
 
 		data_ptrs[0] = (uint8_t **)&ccb->cdai.buf;
 		lengths[0] = ccb->cdai.bufsiz;
 		dirs[0] = CAM_DIR_IN;
 		numbufs = 1;
 		break;
 	default:
 		return(EINVAL);
 		break; /* NOTREACHED */
 	}
 
 	io_req->num_bufs = numbufs;
 
 	/*
 	 * If there is a maximum, check to make sure that the user's
 	 * request fits within the limit.  In general, we should only have
 	 * a maximum length for requests that go to hardware.  Otherwise it
 	 * is whatever we're able to malloc.
 	 */
 	for (i = 0; i < numbufs; i++) {
 		io_req->user_bufs[i] = *data_ptrs[i];
 		io_req->dirs[i] = dirs[i];
 		io_req->lengths[i] = lengths[i];
 
 		if (maxmap == 0)
 			continue;
 
 		if (lengths[i] <= maxmap)
 			continue;
 
 		xpt_print(periph->path, "%s: data length %u > max allowed %u "
 			  "bytes\n", __func__, lengths[i], maxmap);
 		error = EINVAL;
 		goto bailout;
 	}
 
 	switch (io_req->data_flags) {
 	case CAM_DATA_VADDR:
 		/* Map or copy the buffer into kernel address space */
 		for (i = 0; i < numbufs; i++) {
 			uint8_t *tmp_buf;
 
 			/*
 			 * If for some reason no length is specified, we
 			 * don't need to allocate anything.
 			 */
 			if (io_req->lengths[i] == 0)
 				continue;
 
 			/*
 			 * Make sure that the user's buffer is accessible
 			 * to that process.
 			 */
 			if (!useracc(io_req->user_bufs[i], io_req->lengths[i],
 			    (io_req->dirs[i] == CAM_DIR_IN) ? VM_PROT_WRITE :
 			     VM_PROT_READ)) {
 				xpt_print(periph->path, "%s: user address %p "
 				    "length %u is not accessible\n", __func__,
 				    io_req->user_bufs[i], io_req->lengths[i]);
 				error = EFAULT;
 				goto bailout;
 			}
 
 			tmp_buf = malloc(lengths[i], M_SCSIPASS,
 					 M_WAITOK | M_ZERO);
 			io_req->kern_bufs[i] = tmp_buf;
 			*data_ptrs[i] = tmp_buf;
 
 #if 0
 			xpt_print(periph->path, "%s: malloced %p len %u, user "
 				  "buffer %p, operation: %s\n", __func__,
 				  tmp_buf, lengths[i], io_req->user_bufs[i],
 				  (dirs[i] == CAM_DIR_IN) ? "read" : "write");
 #endif
 			/*
 			 * We only need to copy in if the user is writing.
 			 */
 			if (dirs[i] != CAM_DIR_OUT)
 				continue;
 
 			error = copyin(io_req->user_bufs[i],
 				       io_req->kern_bufs[i], lengths[i]);
 			if (error != 0) {
 				xpt_print(periph->path, "%s: copy of user "
 					  "buffer from %p to %p failed with "
 					  "error %d\n", __func__,
 					  io_req->user_bufs[i],
 					  io_req->kern_bufs[i], error);
 				goto bailout;
 			}
 		}
 		break;
 	case CAM_DATA_PADDR:
 		/* Pass down the pointer as-is */
 		break;
 	case CAM_DATA_SG: {
 		size_t sg_length, size_to_go, alloc_size;
 		uint32_t num_segs_needed;
 
 		/*
 		 * Copy the user S/G list in, and then copy in the
 		 * individual segments.
 		 */
 		/*
 		 * We shouldn't see this, but check just in case.
 		 */
 		if (numbufs != 1) {
 			xpt_print(periph->path, "%s: cannot currently handle "
 				  "more than one S/G list per CCB\n", __func__);
 			error = EINVAL;
 			goto bailout;
 		}
 
 		/*
 		 * We have to have at least one segment.
 		 */
 		if (num_segs == 0) {
 			xpt_print(periph->path, "%s: CAM_DATA_SG flag set, "
 				  "but sglist_cnt=0!\n", __func__);
 			error = EINVAL;
 			goto bailout;
 		}
 
 		/*
 		 * Make sure the user specified the total length and didn't
 		 * just leave it to us to decode the S/G list.
 		 */
 		if (lengths[0] == 0) {
 			xpt_print(periph->path, "%s: no dxfer_len specified, "
 				  "but CAM_DATA_SG flag is set!\n", __func__);
 			error = EINVAL;
 			goto bailout;
 		}
 
 		/*
 		 * We allocate buffers in io_zone_size increments for an
 		 * S/G list.  This will generally be MAXPHYS.
 		 */
 		if (lengths[0] <= softc->io_zone_size)
 			num_segs_needed = 1;
 		else {
 			num_segs_needed = lengths[0] / softc->io_zone_size;
 			if ((lengths[0] % softc->io_zone_size) != 0)
 				num_segs_needed++;
 		}
 
 		/* Figure out the size of the S/G list */
 		sg_length = num_segs * sizeof(bus_dma_segment_t);
 		io_req->num_user_segs = num_segs;
 		io_req->num_kern_segs = num_segs_needed;
 
 		/* Save the user's S/G list pointer for later restoration */
 		io_req->user_bufs[0] = *data_ptrs[0];
 
 		/*
 		 * If we have enough segments allocated by default to handle
 		 * the length of the user's S/G list,
 		 */
 		if (num_segs > PASS_MAX_SEGS) {
 			io_req->user_segptr = malloc(sizeof(bus_dma_segment_t) *
 			    num_segs, M_SCSIPASS, M_WAITOK | M_ZERO);
 			io_req->flags |= PASS_IO_USER_SEG_MALLOC;
 		} else
 			io_req->user_segptr = io_req->user_segs;
 
 		if (!useracc(*data_ptrs[0], sg_length, VM_PROT_READ)) {
 			xpt_print(periph->path, "%s: unable to access user "
 				  "S/G list at %p\n", __func__, *data_ptrs[0]);
 			error = EFAULT;
 			goto bailout;
 		}
 
 		error = copyin(*data_ptrs[0], io_req->user_segptr, sg_length);
 		if (error != 0) {
 			xpt_print(periph->path, "%s: copy of user S/G list "
 				  "from %p to %p failed with error %d\n",
 				  __func__, *data_ptrs[0], io_req->user_segptr,
 				  error);
 			goto bailout;
 		}
 
 		if (num_segs_needed > PASS_MAX_SEGS) {
 			io_req->kern_segptr = malloc(sizeof(bus_dma_segment_t) *
 			    num_segs_needed, M_SCSIPASS, M_WAITOK | M_ZERO);
 			io_req->flags |= PASS_IO_KERN_SEG_MALLOC;
 		} else {
 			io_req->kern_segptr = io_req->kern_segs;
 		}
 
 		/*
 		 * Allocate the kernel S/G list.
 		 */
 		for (size_to_go = lengths[0], i = 0;
 		     size_to_go > 0 && i < num_segs_needed;
 		     i++, size_to_go -= alloc_size) {
 			uint8_t *kern_ptr;
 
 			alloc_size = min(size_to_go, softc->io_zone_size);
 			kern_ptr = uma_zalloc(softc->pass_io_zone, M_WAITOK);
 			io_req->kern_segptr[i].ds_addr =
 			    (bus_addr_t)(uintptr_t)kern_ptr;
 			io_req->kern_segptr[i].ds_len = alloc_size;
 		}
 		if (size_to_go > 0) {
 			printf("%s: size_to_go = %zu, software error!\n",
 			       __func__, size_to_go);
 			error = EINVAL;
 			goto bailout;
 		}
 
 		*data_ptrs[0] = (uint8_t *)io_req->kern_segptr;
 		*seg_cnt_ptr = io_req->num_kern_segs;
 
 		/*
 		 * We only need to copy data here if the user is writing.
 		 */
 		if (dirs[0] == CAM_DIR_OUT)
 			error = passcopysglist(periph, io_req, dirs[0]);
 		break;
 	}
 	case CAM_DATA_SG_PADDR: {
 		size_t sg_length;
 
 		/*
 		 * We shouldn't see this, but check just in case.
 		 */
 		if (numbufs != 1) {
 			printf("%s: cannot currently handle more than one "
 			       "S/G list per CCB\n", __func__);
 			error = EINVAL;
 			goto bailout;
 		}
 
 		/*
 		 * We have to have at least one segment.
 		 */
 		if (num_segs == 0) {
 			xpt_print(periph->path, "%s: CAM_DATA_SG_PADDR flag "
 				  "set, but sglist_cnt=0!\n", __func__);
 			error = EINVAL;
 			goto bailout;
 		}
 
 		/*
 		 * Make sure the user specified the total length and didn't
 		 * just leave it to us to decode the S/G list.
 		 */
 		if (lengths[0] == 0) {
 			xpt_print(periph->path, "%s: no dxfer_len specified, "
 				  "but CAM_DATA_SG flag is set!\n", __func__);
 			error = EINVAL;
 			goto bailout;
 		}
 
 		/* Figure out the size of the S/G list */
 		sg_length = num_segs * sizeof(bus_dma_segment_t);
 		io_req->num_user_segs = num_segs;
 		io_req->num_kern_segs = io_req->num_user_segs;
 
 		/* Save the user's S/G list pointer for later restoration */
 		io_req->user_bufs[0] = *data_ptrs[0];
 
 		if (num_segs > PASS_MAX_SEGS) {
 			io_req->user_segptr = malloc(sizeof(bus_dma_segment_t) *
 			    num_segs, M_SCSIPASS, M_WAITOK | M_ZERO);
 			io_req->flags |= PASS_IO_USER_SEG_MALLOC;
 		} else
 			io_req->user_segptr = io_req->user_segs;
 
 		io_req->kern_segptr = io_req->user_segptr;
 
 		error = copyin(*data_ptrs[0], io_req->user_segptr, sg_length);
 		if (error != 0) {
 			xpt_print(periph->path, "%s: copy of user S/G list "
 				  "from %p to %p failed with error %d\n",
 				  __func__, *data_ptrs[0], io_req->user_segptr,
 				  error);
 			goto bailout;
 		}
 		break;
 	}
 	default:
 	case CAM_DATA_BIO:
 		/*
 		 * A user shouldn't be attaching a bio to the CCB.  It
 		 * isn't a user-accessible structure.
 		 */
 		error = EINVAL;
 		break;
 	}
 
 bailout:
 	if (error != 0)
 		passiocleanup(softc, io_req);
 
 	return (error);
 }
 
 static int
 passmemdone(struct cam_periph *periph, struct pass_io_req *io_req)
 {
 	struct pass_softc *softc;
 	union ccb *ccb;
 	int error;
 	int i;
 
 	error = 0;
 	softc = (struct pass_softc *)periph->softc;
 	ccb = &io_req->ccb;
 
 	switch (io_req->data_flags) {
 	case CAM_DATA_VADDR:
 		/*
 		 * Copy back to the user buffer if this was a read.
 		 */
 		for (i = 0; i < io_req->num_bufs; i++) {
 			if (io_req->dirs[i] != CAM_DIR_IN)
 				continue;
 
 			error = copyout(io_req->kern_bufs[i],
 			    io_req->user_bufs[i], io_req->lengths[i]);
 			if (error != 0) {
 				xpt_print(periph->path, "Unable to copy %u "
 					  "bytes from %p to user address %p\n",
 					  io_req->lengths[i],
 					  io_req->kern_bufs[i],
 					  io_req->user_bufs[i]);
 				goto bailout;
 			}
 
 		}
 		break;
 	case CAM_DATA_PADDR:
 		/* Do nothing.  The pointer is a physical address already */
 		break;
 	case CAM_DATA_SG:
 		/*
 		 * Copy back to the user buffer if this was a read.
 		 * Restore the user's S/G list buffer pointer.
 		 */
 		if (io_req->dirs[0] == CAM_DIR_IN)
 			error = passcopysglist(periph, io_req, io_req->dirs[0]);
 		break;
 	case CAM_DATA_SG_PADDR:
 		/*
 		 * Restore the user's S/G list buffer pointer.  No need to
 		 * copy.
 		 */
 		break;
 	default:
 	case CAM_DATA_BIO:
 		error = EINVAL;
 		break;
 	}
 
 bailout:
 	/*
 	 * Reset the user's pointers to their original values and free
 	 * allocated memory.
 	 */
 	passiocleanup(softc, io_req);
 
 	return (error);
 }
 
 static int
 passioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
 {
 	int error;
 
 	if ((error = passdoioctl(dev, cmd, addr, flag, td)) == ENOTTY) {
 		error = cam_compat_ioctl(dev, cmd, addr, flag, td, passdoioctl);
 	}
 	return (error);
 }
 
 static int
 passdoioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
 {
 	struct	cam_periph *periph;
 	struct	pass_softc *softc;
 	int	error;
 	uint32_t priority;
 
 	periph = (struct cam_periph *)dev->si_drv1;
 	cam_periph_lock(periph);
 	softc = (struct pass_softc *)periph->softc;
 
 	error = 0;
 
 	switch (cmd) {
 
 	case CAMIOCOMMAND:
 	{
 		union ccb *inccb;
 		union ccb *ccb;
 		int ccb_malloced;
 
 		inccb = (union ccb *)addr;
 
 		/*
 		 * Some CCB types, like scan bus and scan lun can only go
 		 * through the transport layer device.
 		 */
 		if (inccb->ccb_h.func_code & XPT_FC_XPT_ONLY) {
 			xpt_print(periph->path, "CCB function code %#x is "
 			    "restricted to the XPT device\n",
 			    inccb->ccb_h.func_code);
 			error = ENODEV;
 			break;
 		}
 
 		/* Compatibility for RL/priority-unaware code. */
 		priority = inccb->ccb_h.pinfo.priority;
 		if (priority <= CAM_PRIORITY_OOB)
 		    priority += CAM_PRIORITY_OOB + 1;
 
 		/*
 		 * Non-immediate CCBs need a CCB from the per-device pool
 		 * of CCBs, which is scheduled by the transport layer.
 		 * Immediate CCBs and user-supplied CCBs should just be
 		 * malloced.
 		 */
 		if ((inccb->ccb_h.func_code & XPT_FC_QUEUED)
 		 && ((inccb->ccb_h.func_code & XPT_FC_USER_CCB) == 0)) {
 			ccb = cam_periph_getccb(periph, priority);
 			ccb_malloced = 0;
 		} else {
 			ccb = xpt_alloc_ccb_nowait();
 
 			if (ccb != NULL)
 				xpt_setup_ccb(&ccb->ccb_h, periph->path,
 					      priority);
 			ccb_malloced = 1;
 		}
 
 		if (ccb == NULL) {
 			xpt_print(periph->path, "unable to allocate CCB\n");
 			error = ENOMEM;
 			break;
 		}
 
 		error = passsendccb(periph, ccb, inccb);
 
 		if (ccb_malloced)
 			xpt_free_ccb(ccb);
 		else
 			xpt_release_ccb(ccb);
 
 		break;
 	}
 	case CAMIOQUEUE:
 	{
 		struct pass_io_req *io_req;
 		union ccb **user_ccb, *ccb;
 		xpt_opcode fc;
 
 		if ((softc->flags & PASS_FLAG_ZONE_VALID) == 0) {
 			error = passcreatezone(periph);
 			if (error != 0)
 				goto bailout;
 		}
 
 		/*
 		 * We're going to do a blocking allocation for this I/O
 		 * request, so we have to drop the lock.
 		 */
 		cam_periph_unlock(periph);
 
 		io_req = uma_zalloc(softc->pass_zone, M_WAITOK | M_ZERO);
 		ccb = &io_req->ccb;
 		user_ccb = (union ccb **)addr;
 
 		/*
 		 * Unlike the CAMIOCOMMAND ioctl above, we only have a
 		 * pointer to the user's CCB, so we have to copy the whole
 		 * thing in to a buffer we have allocated (above) instead
 		 * of allowing the ioctl code to malloc a buffer and copy
 		 * it in.
 		 *
 		 * This is an advantage for this asynchronous interface,
 		 * since we don't want the memory to get freed while the
 		 * CCB is outstanding.
 		 */
 #if 0
 		xpt_print(periph->path, "Copying user CCB %p to "
 			  "kernel address %p\n", *user_ccb, ccb);
 #endif
 		error = copyin(*user_ccb, ccb, sizeof(*ccb));
 		if (error != 0) {
 			xpt_print(periph->path, "Copy of user CCB %p to "
 				  "kernel address %p failed with error %d\n",
 				  *user_ccb, ccb, error);
 			uma_zfree(softc->pass_zone, io_req);
 			cam_periph_lock(periph);
 			break;
 		}
 
 		/*
 		 * Some CCB types, like scan bus and scan lun can only go
 		 * through the transport layer device.
 		 */
 		if (ccb->ccb_h.func_code & XPT_FC_XPT_ONLY) {
 			xpt_print(periph->path, "CCB function code %#x is "
 			    "restricted to the XPT device\n",
 			    ccb->ccb_h.func_code);
 			uma_zfree(softc->pass_zone, io_req);
 			cam_periph_lock(periph);
 			error = ENODEV;
 			break;
 		}
 
 		/*
 		 * Save the user's CCB pointer as well as his linked list
 		 * pointers and peripheral private area so that we can
 		 * restore these later.
 		 */
 		io_req->user_ccb_ptr = *user_ccb;
 		io_req->user_periph_links = ccb->ccb_h.periph_links;
 		io_req->user_periph_priv = ccb->ccb_h.periph_priv;
 
 		/*
 		 * Now that we've saved the user's values, we can set our
 		 * own peripheral private entry.
 		 */
 		ccb->ccb_h.ccb_ioreq = io_req;
 
 		/* Compatibility for RL/priority-unaware code. */
 		priority = ccb->ccb_h.pinfo.priority;
 		if (priority <= CAM_PRIORITY_OOB)
 		    priority += CAM_PRIORITY_OOB + 1;
 
 		/*
 		 * Setup fields in the CCB like the path and the priority.
 		 * The path in particular cannot be done in userland, since
 		 * it is a pointer to a kernel data structure.
 		 */
 		xpt_setup_ccb_flags(&ccb->ccb_h, periph->path, priority,
 				    ccb->ccb_h.flags);
 
 		/*
 		 * Setup our done routine.  There is no way for the user to
 		 * have a valid pointer here.
 		 */
 		ccb->ccb_h.cbfcnp = passdone;
 
 		fc = ccb->ccb_h.func_code;
 		/*
 		 * If this function code has memory that can be mapped in
 		 * or out, we need to call passmemsetup().
 		 */
 		if ((fc == XPT_SCSI_IO) || (fc == XPT_ATA_IO)
 		 || (fc == XPT_SMP_IO) || (fc == XPT_DEV_MATCH)
 		 || (fc == XPT_DEV_ADVINFO)) {
 			error = passmemsetup(periph, io_req);
 			if (error != 0) {
 				uma_zfree(softc->pass_zone, io_req);
 				cam_periph_lock(periph);
 				break;
 			}
 		} else
 			io_req->mapinfo.num_bufs_used = 0;
 
 		cam_periph_lock(periph);
 
 		/*
 		 * Everything goes on the incoming queue initially.
 		 */
 		TAILQ_INSERT_TAIL(&softc->incoming_queue, io_req, links);
 
 		/*
 		 * If the CCB is queued, and is not a user CCB, then
 		 * we need to allocate a slot for it.  Call xpt_schedule()
 		 * so that our start routine will get called when a CCB is
 		 * available.
 		 */
 		if ((fc & XPT_FC_QUEUED)
 		 && ((fc & XPT_FC_USER_CCB) == 0)) {
 			xpt_schedule(periph, priority);
 			break;
 		} 
 
 		/*
 		 * At this point, the CCB in question is either an
 		 * immediate CCB (like XPT_DEV_ADVINFO) or it is a user CCB
 		 * and therefore should be malloced, not allocated via a slot.
 		 * Remove the CCB from the incoming queue and add it to the
 		 * active queue.
 		 */
 		TAILQ_REMOVE(&softc->incoming_queue, io_req, links);
 		TAILQ_INSERT_TAIL(&softc->active_queue, io_req, links);
 
 		xpt_action(ccb);
 
 		/*
 		 * If this is not a queued CCB (i.e. it is an immediate CCB),
 		 * then it is already done.  We need to put it on the done
 		 * queue for the user to fetch.
 		 */
 		if ((fc & XPT_FC_QUEUED) == 0) {
 			TAILQ_REMOVE(&softc->active_queue, io_req, links);
 			TAILQ_INSERT_TAIL(&softc->done_queue, io_req, links);
 		}
 		break;
 	}
 	case CAMIOGET:
 	{
 		union ccb **user_ccb;
 		struct pass_io_req *io_req;
 		int old_error;
 
 		user_ccb = (union ccb **)addr;
 		old_error = 0;
 
 		io_req = TAILQ_FIRST(&softc->done_queue);
 		if (io_req == NULL) {
 			error = ENOENT;
 			break;
 		}
 
 		/*
 		 * Remove the I/O from the done queue.
 		 */
 		TAILQ_REMOVE(&softc->done_queue, io_req, links);
 
 		/*
 		 * We have to drop the lock during the copyout because the
 		 * copyout can result in VM faults that require sleeping.
 		 */
 		cam_periph_unlock(periph);
 
 		/*
 		 * Do any needed copies (e.g. for reads) and revert the
 		 * pointers in the CCB back to the user's pointers.
 		 */
 		error = passmemdone(periph, io_req);
 
 		old_error = error;
 
 		io_req->ccb.ccb_h.periph_links = io_req->user_periph_links;
 		io_req->ccb.ccb_h.periph_priv = io_req->user_periph_priv;
 
 #if 0
 		xpt_print(periph->path, "Copying to user CCB %p from "
 			  "kernel address %p\n", *user_ccb, &io_req->ccb);
 #endif
 
 		error = copyout(&io_req->ccb, *user_ccb, sizeof(union ccb));
 		if (error != 0) {
 			xpt_print(periph->path, "Copy to user CCB %p from "
 				  "kernel address %p failed with error %d\n",
 				  *user_ccb, &io_req->ccb, error);
 		}
 
 		/*
 		 * Prefer the first error we got back, and make sure we
 		 * don't overwrite bad status with good.
 		 */
 		if (old_error != 0)
 			error = old_error;
 
 		cam_periph_lock(periph);
 
 		/*
 		 * At this point, if there was an error, we could potentially
 		 * re-queue the I/O and try again.  But why?  The error
 		 * would almost certainly happen again.  We might as well
 		 * not leak memory.
 		 */
 		uma_zfree(softc->pass_zone, io_req);
 		break;
 	}
 	default:
 		error = cam_periph_ioctl(periph, cmd, addr, passerror);
 		break;
 	}
 
 bailout:
 	cam_periph_unlock(periph);
 
 	return(error);
 }
 
 static int
 passpoll(struct cdev *dev, int poll_events, struct thread *td)
 {
 	struct cam_periph *periph;
 	struct pass_softc *softc;
 	int revents;
 
 	periph = (struct cam_periph *)dev->si_drv1;
 	softc = (struct pass_softc *)periph->softc;
 
 	revents = poll_events & (POLLOUT | POLLWRNORM);
 	if ((poll_events & (POLLIN | POLLRDNORM)) != 0) {
 		cam_periph_lock(periph);
 
 		if (!TAILQ_EMPTY(&softc->done_queue)) {
 			revents |= poll_events & (POLLIN | POLLRDNORM);
 		}
 		cam_periph_unlock(periph);
 		if (revents == 0)
 			selrecord(td, &softc->read_select);
 	}
 
 	return (revents);
 }
 
 static int
 passkqfilter(struct cdev *dev, struct knote *kn)
 {
 	struct cam_periph *periph;
 	struct pass_softc *softc;
 
 	periph = (struct cam_periph *)dev->si_drv1;
 	softc = (struct pass_softc *)periph->softc;
 
 	kn->kn_hook = (caddr_t)periph;
 	kn->kn_fop = &passread_filtops;
 	knlist_add(&softc->read_select.si_note, kn, 0);
 
 	return (0);
 }
 
 static void
 passreadfiltdetach(struct knote *kn)
 {
 	struct cam_periph *periph;
 	struct pass_softc *softc;
 
 	periph = (struct cam_periph *)kn->kn_hook;
 	softc = (struct pass_softc *)periph->softc;
 
 	knlist_remove(&softc->read_select.si_note, kn, 0);
 }
 
 static int
 passreadfilt(struct knote *kn, long hint)
 {
 	struct cam_periph *periph;
 	struct pass_softc *softc;
 	int retval;
 
 	periph = (struct cam_periph *)kn->kn_hook;
 	softc = (struct pass_softc *)periph->softc;
 
 	cam_periph_assert(periph, MA_OWNED);
 
 	if (TAILQ_EMPTY(&softc->done_queue))
 		retval = 0;
 	else
 		retval = 1;
 
 	return (retval);
 }
 
 /*
  * Generally, "ccb" should be the CCB supplied by the kernel.  "inccb"
  * should be the CCB that is copied in from the user.
  */
 static int
 passsendccb(struct cam_periph *periph, union ccb *ccb, union ccb *inccb)
 {
 	struct pass_softc *softc;
 	struct cam_periph_map_info mapinfo;
 	xpt_opcode fc;
 	int error;
 
 	softc = (struct pass_softc *)periph->softc;
 
 	/*
 	 * There are some fields in the CCB header that need to be
 	 * preserved, the rest we get from the user.
 	 */
 	xpt_merge_ccb(ccb, inccb);
 
 	/*
 	 */
 	ccb->ccb_h.cbfcnp = passdone;
 
 	/*
 	 * Let cam_periph_mapmem do a sanity check on the data pointer format.
 	 * Even if no data transfer is needed, it's a cheap check and it
 	 * simplifies the code.
 	 */
 	fc = ccb->ccb_h.func_code;
 	if ((fc == XPT_SCSI_IO) || (fc == XPT_ATA_IO) || (fc == XPT_SMP_IO)
 	 || (fc == XPT_DEV_MATCH) || (fc == XPT_DEV_ADVINFO)) {
 		bzero(&mapinfo, sizeof(mapinfo));
 
 		/*
 		 * cam_periph_mapmem calls into proc and vm functions that can
 		 * sleep as well as trigger I/O, so we can't hold the lock.
 		 * Dropping it here is reasonably safe.
 		 */
 		cam_periph_unlock(periph);
 		error = cam_periph_mapmem(ccb, &mapinfo, softc->maxio);
 		cam_periph_lock(periph);
 
 		/*
 		 * cam_periph_mapmem returned an error, we can't continue.
 		 * Return the error to the user.
 		 */
 		if (error)
 			return(error);
 	} else
 		/* Ensure that the unmap call later on is a no-op. */
 		mapinfo.num_bufs_used = 0;
 
 	/*
 	 * If the user wants us to perform any error recovery, then honor
 	 * that request.  Otherwise, it's up to the user to perform any
 	 * error recovery.
 	 */
 	cam_periph_runccb(ccb, passerror, /* cam_flags */ CAM_RETRY_SELTO,
 	    /* sense_flags */ ((ccb->ccb_h.flags & CAM_PASS_ERR_RECOVER) ?
 	     SF_RETRY_UA : SF_NO_RECOVERY) | SF_NO_PRINT,
 	    softc->device_stats);
 
 	cam_periph_unmapmem(ccb, &mapinfo);
 
 	ccb->ccb_h.cbfcnp = NULL;
 	ccb->ccb_h.periph_priv = inccb->ccb_h.periph_priv;
 	bcopy(ccb, inccb, sizeof(union ccb));
 
 	return(0);
 }
 
 static int
 passerror(union ccb *ccb, u_int32_t cam_flags, u_int32_t sense_flags)
 {
 	struct cam_periph *periph;
 	struct pass_softc *softc;
 
 	periph = xpt_path_periph(ccb->ccb_h.path);
 	softc = (struct pass_softc *)periph->softc;
 	
 	return(cam_periph_error(ccb, cam_flags, sense_flags, 
 				 &softc->saved_ccb));
 }
Index: head/sys/modules/cam/Makefile
===================================================================
--- head/sys/modules/cam/Makefile	(revision 299863)
+++ head/sys/modules/cam/Makefile	(revision 299864)
@@ -1,47 +1,46 @@
 # $FreeBSD$
 
 S=	${.CURDIR}/../..
 
 .PATH: $S/cam $S/cam/scsi $S/cam/ata $S/${MACHINE}/${MACHINE}
 
 KMOD=	cam
 
 # See sys/conf/options for the flags that go into the different opt_*.h files.
 SRCS=	opt_cam.h
 SRCS+=	opt_ada.h
 SRCS+=	opt_scsi.h
 SRCS+=	opt_cd.h
-SRCS+=	opt_kdtrace.h
 SRCS+=	opt_pt.h
 SRCS+=	opt_sa.h
 SRCS+=	opt_ses.h
 SRCS+=	device_if.h bus_if.h vnode_if.h
 SRCS+=	cam.c
 SRCS+=	cam_compat.c
 .if exists($S/${MACHINE}/${MACHINE}/cam_machdep.c)
 SRCS+=	cam_machdep.c
 .endif
 SRCS+=	cam_iosched.c cam_periph.c cam_queue.c cam_sim.c cam_xpt.c
 SRCS+=	scsi_all.c scsi_cd.c scsi_ch.c
 SRCS+=	scsi_da.c
 SRCS+=	scsi_pass.c
 SRCS+=	scsi_pt.c
 SRCS+=	scsi_sa.c
 SRCS+=	scsi_enc.c
 SRCS+=	scsi_enc_ses.c
 SRCS+=	scsi_enc_safte.c
 SRCS+=	scsi_sg.c
 SRCS+=	scsi_targ_bh.c scsi_target.c
 SRCS+=	scsi_xpt.c
 SRCS+=	smp_all.c
 SRCS+=	ata_all.c
 SRCS+=	ata_xpt.c
 SRCS+=	ata_da.c
 .if exists($S/${MACHINE}/${MACHINE}/ata_machdep.c)
 SRCS+=	ata_machdep.c
 .endif
 SRCS+=	ata_pmp.c
 
 EXPORT_SYMS=	YES	# XXX evaluate
 
 .include <bsd.kmod.mk>
Index: head/sys/modules/tcp/fastpath/Makefile
===================================================================
--- head/sys/modules/tcp/fastpath/Makefile	(revision 299863)
+++ head/sys/modules/tcp/fastpath/Makefile	(revision 299864)
@@ -1,18 +1,18 @@
 #
 # $FreeBSD$
 #
 
 .PATH: ${.CURDIR}/../../../netinet/tcp_stacks
 
 KMOD=	fastpath
 SRCS=	fastpath.c
 
-SRCS+=	opt_ipfw.h opt_inet.h opt_inet6.h opt_ipsec.h opt_kdtrace.h
+SRCS+=	opt_ipfw.h opt_inet.h opt_inet6.h opt_ipsec.h
 SRCS+=	opt_tcpdebug.h
 
 #
 # Enable full debugging
 #
 #CFLAGS += -g
 
 .include <bsd.kmod.mk>
Index: head/sys/netinet/tcp_stacks/fastpath.c
===================================================================
--- head/sys/netinet/tcp_stacks/fastpath.c	(revision 299863)
+++ head/sys/netinet/tcp_stacks/fastpath.c	(revision 299864)
@@ -1,2455 +1,2454 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (c) 2007-2008,2010
  *	Swinburne University of Technology, Melbourne, Australia.
  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
  * Copyright (c) 2010 The FreeBSD Foundation
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * Copyright (c) 2015 Netflix Inc.
  * All rights reserved.
  *
  * Portions of this software were developed at the Centre for Advanced Internet
  * Architectures, Swinburne University of Technology, by Lawrence Stewart,
  * James Healy and David Hayes, made possible in part by a grant from the Cisco
  * University Research Program Fund at Community Foundation Silicon Valley.
  *
  * Portions of this software were developed at the Centre for Advanced
  * Internet Architectures, Swinburne University of Technology, Melbourne,
  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Portions of this software were developed by Randall R. Stewart while
  * working for Netflix Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipfw.h"		/* for ipfw_fwd	*/
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
-#include "opt_kdtrace.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/hhook.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>		/* for proc0 declaration */
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <machine/cpu.h>	/* before tcp_seq.h, for tcp_random18() */
 
 #include <vm/uma.h>
 
 #include <net/route.h>
 #include <net/vnet.h>
 
 #define TCPSTATES		/* for logging */
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>	/* required for icmp_var.h */
 #include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet6/tcp6_var.h>
 #include <netinet/tcpip.h>
 #include <netinet/tcp_syncache.h>
 #include <netinet/cc/cc.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif /* TCPDEBUG */
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 VNET_DECLARE(int, tcp_autorcvbuf_inc);
 #define	V_tcp_autorcvbuf_inc	VNET(tcp_autorcvbuf_inc)
 VNET_DECLARE(int, tcp_autorcvbuf_max);
 #define	V_tcp_autorcvbuf_max	VNET(tcp_autorcvbuf_max)
 VNET_DECLARE(int, tcp_do_rfc3042);
 #define	V_tcp_do_rfc3042	VNET(tcp_do_rfc3042)
 VNET_DECLARE(int, tcp_do_autorcvbuf);
 #define	V_tcp_do_autorcvbuf	VNET(tcp_do_autorcvbuf)
 VNET_DECLARE(int, tcp_insecure_rst);
 #define	V_tcp_insecure_rst	VNET(tcp_insecure_rst)
 VNET_DECLARE(int, tcp_insecure_syn);
 #define	V_tcp_insecure_syn	VNET(tcp_insecure_syn)
 
 static void	 tcp_do_segment_fastslow(struct mbuf *, struct tcphdr *,
 			struct socket *, struct tcpcb *, int, int, uint8_t,
 			int);
 
 static void	 tcp_do_segment_fastack(struct mbuf *, struct tcphdr *,
 			struct socket *, struct tcpcb *, int, int, uint8_t,
 			int);
 
 /*
  * Indicate whether this ack should be delayed.  We can delay the ack if
  * following conditions are met:
  *	- There is no delayed ack timer in progress.
  *	- Our last ack wasn't a 0-sized window. We never want to delay
  *	  the ack that opens up a 0-sized window.
  *	- LRO wasn't used for this segment. We make sure by checking that the
  *	  segment size is not larger than the MSS.
  */
 #define DELAY_ACK(tp, tlen)						\
 	((!tcp_timer_active(tp, TT_DELACK) &&				\
 	    (tp->t_flags & TF_RXWIN0SENT) == 0) &&			\
 	    (tlen <= tp->t_maxseg) &&					\
 	    (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
 
 /*
  * So how is this faster than the normal fast ack?
  * It basically allows us to also stay in the fastpath
  * when a window-update ack also arrives. In testing
  * we saw only 25-30% of connections doing fastpath 
  * due to the fact that along with moving forward
  * in sequence the window was also updated.
  */
 static void
 tcp_do_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	       struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, 
 	       int ti_locked, u_long tiwin)
 {
 	int acked;
 	int winup_only=0;
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
         /*
 	 * The following if statement will be true if
 	 * we are doing the win_up_in_fp <and>
 	 * - We have more new data (SEQ_LT(tp->snd_wl1, th->th_seq)) <or>
 	 * - No more new data, but we have an ack for new data
 	 *   (tp->snd_wl1 == th->th_seq && SEQ_LT(tp->snd_wl2, th->th_ack))
 	 * - No more new data, the same ack point but the window grew
 	 *   (tp->snd_wl1 == th->th_seq && tp->snd_wl2 == th->th_ack && twin > tp->snd_wnd)
 	 */
 	if ((SEQ_LT(tp->snd_wl1, th->th_seq) ||
 	     (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
 					    (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
 		/* keep track of pure window updates */
 		if (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) {
 			winup_only = 1;
 			TCPSTAT_INC(tcps_rcvwinupd);
 		}
 		tp->snd_wnd = tiwin;
 		tp->snd_wl1 = th->th_seq;
 		tp->snd_wl2 = th->th_ack;
 		if (tp->snd_wnd > tp->max_sndwnd)
 			tp->max_sndwnd = tp->snd_wnd;
 	}
 	/*
 	 * If last ACK falls within this segment's sequence numbers,
 	 * record the timestamp.
 	 * NOTE that the test is modified according to the latest
 	 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 	/*
 	 * This is a pure ack for outstanding data.
 	 */
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	}
 	ti_locked = TI_UNLOCKED;
 
 	TCPSTAT_INC(tcps_predack);
 
 	/*
 	 * "bad retransmit" recovery.
 	 */
 	if (tp->t_rxtshift == 1 &&
 	    tp->t_flags & TF_PREVVALID &&
 	    (int)(ticks - tp->t_badrxtwin) < 0) {
 		cc_cong_signal(tp, th, CC_RTO_ERR);
 	}
 
 	/*
 	 * Recalculate the transmit timer / rtt.
 	 *
 	 * Some boxes send broken timestamp replies
 	 * during the SYN+ACK phase, ignore
 	 * timestamps of 0 or we could calculate a
 	 * huge RTT and blow up the retransmit timer.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    to->to_tsecr) {
 		u_int t;
 
 		t = tcp_ts_getticks() - to->to_tsecr;
 		if (!tp->t_rttlow || tp->t_rttlow > t)
 			tp->t_rttlow = t;
 		tcp_xmit_timer(tp,
 			       TCP_TS_TO_TICKS(t) + 1);
 	} else if (tp->t_rtttime &&
 		   SEQ_GT(th->th_ack, tp->t_rtseq)) {
 		if (!tp->t_rttlow ||
 		    tp->t_rttlow > ticks - tp->t_rtttime)
 			tp->t_rttlow = ticks - tp->t_rtttime;
 		tcp_xmit_timer(tp,
 			       ticks - tp->t_rtttime);
 	}
 	if (winup_only == 0) {
 		acked = BYTES_THIS_ACK(tp, th);
 
 		/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
 		hhook_run_tcp_est_in(tp, th, to);
 
 		TCPSTAT_ADD(tcps_rcvackbyte, acked);
 		sbdrop(&so->so_snd, acked);
 		if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
 		    SEQ_LEQ(th->th_ack, tp->snd_recover))
 			tp->snd_recover = th->th_ack - 1;
 				
 		/*
 		 * Let the congestion control algorithm update
 		 * congestion control related information. This
 		 * typically means increasing the congestion
 		 * window.
 		 */
 		cc_ack_received(tp, th, CC_ACK);
 
 		tp->snd_una = th->th_ack;
 		/*
 		 * Pull snd_wl2 up to prevent seq wrap relative
 		 * to th_ack.
 		 */
 		tp->snd_wl2 = th->th_ack;
 		tp->t_dupacks = 0;
 
 		/*
 		 * If all outstanding data are acked, stop
 		 * retransmit timer, otherwise restart timer
 		 * using current (possibly backed-off) value.
 		 * If process is waiting for space,
 		 * wakeup/selwakeup/signal.  If data
 		 * are ready to send, let tcp_output
 		 * decide between more output or persist.
 		 */
 #ifdef TCPDEBUG
 		if (so->so_options & SO_DEBUG)
 			tcp_trace(TA_INPUT, ostate, tp,
 				  (void *)tcp_saveipgen,
 				  &tcp_savetcp, 0);
 #endif
 		TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
 		m_freem(m);
 		if (tp->snd_una == tp->snd_max)
 			tcp_timer_activate(tp, TT_REXMT, 0);
 		else if (!tcp_timer_active(tp, TT_PERSIST))
 			tcp_timer_activate(tp, TT_REXMT,
 					   tp->t_rxtcur);
 	} else {
 		/* 
 		 * Window update only, just free the mbufs and
 		 * send out whatever we can.
 		 */
 		m_freem(m);
 	}
 	sowwakeup(so);
 	if (sbavail(&so->so_snd))
 		(void) tcp_output(tp);
 	KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
 					    __func__, ti_locked));
 	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (tp->t_flags & TF_DELACK) {
 		tp->t_flags &= ~TF_DELACK;
 		tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 	}
 	INP_WUNLOCK(tp->t_inpcb);
 }
 
 /*
  * Here nothing is really faster, its just that we
  * have broken out the fast-data path also just like
  * the fast-ack. 
  */
 static void
 tcp_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		   struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, 
 		   int ti_locked, u_long tiwin)
 {
 	int newsize = 0;	/* automatic sockbuf scaling */
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
 	/*
 	 * If last ACK falls within this segment's sequence numbers,
 	 * record the timestamp.
 	 * NOTE that the test is modified according to the latest
 	 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 
 	/*
 	 * This is a pure, in-sequence data packet with
 	 * nothing on the reassembly queue and we have enough
 	 * buffer space to take it.
 	 */
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	}
 	ti_locked = TI_UNLOCKED;
 
 	/* Clean receiver SACK report if present */
 	if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks)
 		tcp_clean_sackreport(tp);
 	TCPSTAT_INC(tcps_preddat);
 	tp->rcv_nxt += tlen;
 	/*
 	 * Pull snd_wl1 up to prevent seq wrap relative to
 	 * th_seq.
 	 */
 	tp->snd_wl1 = th->th_seq;
 	/*
 	 * Pull rcv_up up to prevent seq wrap relative to
 	 * rcv_nxt.
 	 */
 	tp->rcv_up = tp->rcv_nxt;
 	TCPSTAT_ADD(tcps_rcvbyte, tlen);
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_INPUT, ostate, tp,
 			  (void *)tcp_saveipgen, &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
 	/*
 	 * Automatic sizing of receive socket buffer.  Often the send
 	 * buffer size is not optimally adjusted to the actual network
 	 * conditions at hand (delay bandwidth product).  Setting the
 	 * buffer size too small limits throughput on links with high
 	 * bandwidth and high delay (eg. trans-continental/oceanic links).
 	 *
 	 * On the receive side the socket buffer memory is only rarely
 	 * used to any significant extent.  This allows us to be much
 	 * more aggressive in scaling the receive socket buffer.  For
 	 * the case that the buffer space is actually used to a large
 	 * extent and we run out of kernel memory we can simply drop
 	 * the new segments; TCP on the sender will just retransmit it
 	 * later.  Setting the buffer size too big may only consume too
 	 * much kernel memory if the application doesn't read() from
 	 * the socket or packet loss or reordering makes use of the
 	 * reassembly queue.
 	 *
 	 * The criteria to step up the receive buffer one notch are:
 	 *  1. Application has not set receive buffer size with
 	 *     SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
 	 *  2. the number of bytes received during the time it takes
 	 *     one timestamp to be reflected back to us (the RTT);
 	 *  3. received bytes per RTT is within seven eighth of the
 	 *     current socket buffer size;
 	 *  4. receive buffer size has not hit maximal automatic size;
 	 *
 	 * This algorithm does one step per RTT at most and only if
 	 * we receive a bulk stream w/o packet losses or reorderings.
 	 * Shrinking the buffer during idle times is not necessary as
 	 * it doesn't consume any memory when idle.
 	 *
 	 * TODO: Only step up if the application is actually serving
 	 * the buffer to better manage the socket buffer resources.
 	 */
 	if (V_tcp_do_autorcvbuf &&
 	    (to->to_flags & TOF_TS) &&
 	    to->to_tsecr &&
 	    (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
 		if (TSTMP_GT(to->to_tsecr, tp->rfbuf_ts) &&
 		    to->to_tsecr - tp->rfbuf_ts < hz) {
 			if (tp->rfbuf_cnt >
 			    (so->so_rcv.sb_hiwat / 8 * 7) &&
 			    so->so_rcv.sb_hiwat <
 			    V_tcp_autorcvbuf_max) {
 				newsize =
 					min(so->so_rcv.sb_hiwat +
 					    V_tcp_autorcvbuf_inc,
 					    V_tcp_autorcvbuf_max);
 			}
 			/* Start over with next RTT. */
 			tp->rfbuf_ts = 0;
 			tp->rfbuf_cnt = 0;
 		} else
 			tp->rfbuf_cnt += tlen;	/* add up */
 	}
 
 	/* Add data to socket buffer. */
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		m_freem(m);
 	} else {
 		/*
 		 * Set new socket buffer size.
 		 * Give up when limit is reached.
 		 */
 		if (newsize)
 			if (!sbreserve_locked(&so->so_rcv,
 					      newsize, so, NULL))
 				so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
 		m_adj(m, drop_hdrlen);	/* delayed header drop */
 		sbappendstream_locked(&so->so_rcv, m, 0);
 	}
 	/* NB: sorwakeup_locked() does an implicit unlock. */
 	sorwakeup_locked(so);
 	if (DELAY_ACK(tp, tlen)) {
 		tp->t_flags |= TF_DELACK;
 	} else {
 		tp->t_flags |= TF_ACKNOW;
 		tcp_output(tp);
 	}
 	KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
 					    __func__, ti_locked));
 	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (tp->t_flags & TF_DELACK) {
 		tp->t_flags &= ~TF_DELACK;
 		tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 	}
 	INP_WUNLOCK(tp->t_inpcb);
 }
 
 /*
  * The slow-path is the clone of the long long part
  * of tcp_do_segment past all the fast-path stuff. We
  * use it here by two different callers, the fast/slow and
  * the fastack only.
  */
 static void
 tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, 
 		int ti_locked, u_long tiwin, int thflags)
 {
 	int  acked, ourfinisacked, needoutput = 0;
 	int rstreason, todrop, win;
 	char *s;
 	struct in_conninfo *inc;
 	struct mbuf *mfree = NULL;
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
 	/*
 	 * Calculate amount of space in receive window,
 	 * and then do TCP input processing.
 	 * Receive window is amount of space in rcv queue,
 	 * but not less than advertised window.
 	 */
 	inc = &tp->t_inpcb->inp_inc;
 	win = sbspace(&so->so_rcv);
 	if (win < 0)
 		win = 0;
 	tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
 
 	/* Reset receive buffer auto scaling when not in bulk receive mode. */
 	tp->rfbuf_ts = 0;
 	tp->rfbuf_cnt = 0;
 
 	switch (tp->t_state) {
 
 	/*
 	 * If the state is SYN_RECEIVED:
 	 *	if seg contains an ACK, but not for our SYN/ACK, send a RST.
 	 */
 	case TCPS_SYN_RECEIVED:
 		if ((thflags & TH_ACK) &&
 		    (SEQ_LEQ(th->th_ack, tp->snd_una) ||
 		     SEQ_GT(th->th_ack, tp->snd_max))) {
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 		}
 		break;
 
 	/*
 	 * If the state is SYN_SENT:
 	 *	if seg contains an ACK, but not for our SYN, drop the input.
 	 *	if seg contains a RST, then drop the connection.
 	 *	if seg does not contain SYN, then drop it.
 	 * Otherwise this is an acceptable SYN segment
 	 *	initialize tp->rcv_nxt and tp->irs
 	 *	if seg contains ack then advance tp->snd_una
 	 *	if seg contains an ECE and ECN support is enabled, the stream
 	 *	    is ECN capable.
 	 *	if SYN has been acked change to ESTABLISHED else SYN_RCVD state
 	 *	arrange for segment to be acked (eventually)
 	 *	continue processing rest of data/controls, beginning with URG
 	 */
 	case TCPS_SYN_SENT:
 		if ((thflags & TH_ACK) &&
 		    (SEQ_LEQ(th->th_ack, tp->iss) ||
 		     SEQ_GT(th->th_ack, tp->snd_max))) {
 			rstreason = BANDLIM_UNLIMITED;
 			goto dropwithreset;
 		}
 		if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) {
 			TCP_PROBE5(connect__refused, NULL, tp,
 			    mtod(m, const char *), tp, th);
 			tp = tcp_drop(tp, ECONNREFUSED);
 		}
 		if (thflags & TH_RST)
 			goto drop;
 		if (!(thflags & TH_SYN))
 			goto drop;
 
 		tp->irs = th->th_seq;
 		tcp_rcvseqinit(tp);
 		if (thflags & TH_ACK) {
 			TCPSTAT_INC(tcps_connects);
 			soisconnected(so);
 #ifdef MAC
 			mac_socketpeer_set_from_mbuf(m, so);
 #endif
 			/* Do window scaling on this connection? */
 			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 				tp->rcv_scale = tp->request_r_scale;
 			}
 			tp->rcv_adv += imin(tp->rcv_wnd,
 			    TCP_MAXWIN << tp->rcv_scale);
 			tp->snd_una++;		/* SYN is acked */
 			/*
 			 * If there's data, delay ACK; if there's also a FIN
 			 * ACKNOW will be turned on later.
 			 */
 			if (DELAY_ACK(tp, tlen) && tlen != 0)
 				tcp_timer_activate(tp, TT_DELACK,
 				    tcp_delacktime);
 			else
 				tp->t_flags |= TF_ACKNOW;
 
 			if ((thflags & TH_ECE) && V_tcp_do_ecn) {
 				tp->t_flags |= TF_ECN_PERMIT;
 				TCPSTAT_INC(tcps_ecn_shs);
 			}
 			
 			/*
 			 * Received <SYN,ACK> in SYN_SENT[*] state.
 			 * Transitions:
 			 *	SYN_SENT  --> ESTABLISHED
 			 *	SYN_SENT* --> FIN_WAIT_1
 			 */
 			tp->t_starttime = ticks;
 			if (tp->t_flags & TF_NEEDFIN) {
 				tcp_state_change(tp, TCPS_FIN_WAIT_1);
 				tp->t_flags &= ~TF_NEEDFIN;
 				thflags &= ~TH_SYN;
 			} else {
 				tcp_state_change(tp, TCPS_ESTABLISHED);
 				TCP_PROBE5(connect__established, NULL, tp,
 				    mtod(m, const char *), tp, th);
 				cc_conn_init(tp);
 				tcp_timer_activate(tp, TT_KEEP,
 				    TP_KEEPIDLE(tp));
 			}
 		} else {
 			/*
 			 * Received initial SYN in SYN-SENT[*] state =>
 			 * simultaneous open.
 			 * If it succeeds, connection is * half-synchronized.
 			 * Otherwise, do 3-way handshake:
 			 *        SYN-SENT -> SYN-RECEIVED
 			 *        SYN-SENT* -> SYN-RECEIVED*
 			 */
 			tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			tcp_state_change(tp, TCPS_SYN_RECEIVED);
 		}
 
 		KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: "
 		    "ti_locked %d", __func__, ti_locked));
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		/*
 		 * Advance th->th_seq to correspond to first data byte.
 		 * If data, trim to stay within window,
 		 * dropping FIN if necessary.
 		 */
 		th->th_seq++;
 		if (tlen > tp->rcv_wnd) {
 			todrop = tlen - tp->rcv_wnd;
 			m_adj(m, -todrop);
 			tlen = tp->rcv_wnd;
 			thflags &= ~TH_FIN;
 			TCPSTAT_INC(tcps_rcvpackafterwin);
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
 		}
 		tp->snd_wl1 = th->th_seq - 1;
 		tp->rcv_up = th->th_seq;
 		/*
 		 * Client side of transaction: already sent SYN and data.
 		 * If the remote host used T/TCP to validate the SYN,
 		 * our data will be ACK'd; if so, enter normal data segment
 		 * processing in the middle of step 5, ack processing.
 		 * Otherwise, goto step 6.
 		 */
 		if (thflags & TH_ACK)
 			goto process_ACK;
 
 		goto step6;
 
 	/*
 	 * If the state is LAST_ACK or CLOSING or TIME_WAIT:
 	 *      do normal processing.
 	 *
 	 * NB: Leftover from RFC1644 T/TCP.  Cases to be reused later.
 	 */
 	case TCPS_LAST_ACK:
 	case TCPS_CLOSING:
 		break;  /* continue normal processing */
 	}
 
 	/*
 	 * States other than LISTEN or SYN_SENT.
 	 * First check the RST flag and sequence number since reset segments
 	 * are exempt from the timestamp and connection count tests.  This
 	 * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix
 	 * below which allowed reset segments in half the sequence space
 	 * to fall though and be processed (which gives forged reset
 	 * segments with a random sequence number a 50 percent chance of
 	 * killing a connection).
 	 * Then check timestamp, if present.
 	 * Then check the connection count, if present.
 	 * Then check that at least some bytes of segment are within
 	 * receive window.  If segment begins before rcv_nxt,
 	 * drop leading data (and SYN); if nothing left, just ack.
 	 */
 	if (thflags & TH_RST) {
 		/*
 		 * RFC5961 Section 3.2
 		 *
 		 * - RST drops connection only if SEG.SEQ == RCV.NXT.
 		 * - If RST is in window, we send challenge ACK.
 		 *
 		 * Note: to take into account delayed ACKs, we should
 		 *   test against last_ack_sent instead of rcv_nxt.
 		 * Note 2: we handle special case of closed window, not
 		 *   covered by the RFC.
 		 */
 		if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
 		    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
 		    (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 			KASSERT(ti_locked == TI_RLOCKED,
 			    ("%s: TH_RST ti_locked %d, th %p tp %p",
 			    __func__, ti_locked, th, tp));
 			KASSERT(tp->t_state != TCPS_SYN_SENT,
 			    ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
 			    __func__, th, tp));
 
 			if (V_tcp_insecure_rst ||
 			    tp->last_ack_sent == th->th_seq) {
 				TCPSTAT_INC(tcps_drops);
 				/* Drop the connection. */
 				switch (tp->t_state) {
 				case TCPS_SYN_RECEIVED:
 					so->so_error = ECONNREFUSED;
 					goto close;
 				case TCPS_ESTABLISHED:
 				case TCPS_FIN_WAIT_1:
 				case TCPS_FIN_WAIT_2:
 				case TCPS_CLOSE_WAIT:
 					so->so_error = ECONNRESET;
 				close:
 					tcp_state_change(tp, TCPS_CLOSED);
 					/* FALLTHROUGH */
 				default:
 					tp = tcp_close(tp);
 				}
 			} else {
 				TCPSTAT_INC(tcps_badrst);
 				/* Send challenge ACK. */
 				tcp_respond(tp, mtod(m, void *), th, m,
 				    tp->rcv_nxt, tp->snd_nxt, TH_ACK);
 				tp->last_ack_sent = tp->rcv_nxt;
 				m = NULL;
 			}
 		}
 		goto drop;
 	}
 
 	/*
 	 * RFC5961 Section 4.2
 	 * Send challenge ACK for any SYN in synchronized state.
 	 */
 	if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT) {
 		KASSERT(ti_locked == TI_RLOCKED,
 		    ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 		TCPSTAT_INC(tcps_badsyn);
 		if (V_tcp_insecure_syn &&
 		    SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
 		    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
 			tp = tcp_drop(tp, ECONNRESET);
 			rstreason = BANDLIM_UNLIMITED;
 		} else {
 			/* Send challenge ACK. */
 			tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt,
 			    tp->snd_nxt, TH_ACK);
 			tp->last_ack_sent = tp->rcv_nxt;
 			m = NULL;
 		}
 		goto drop;
 	}
 
 	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
 	 * and it's less than ts_recent, drop it.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
 	    TSTMP_LT(to->to_tsval, tp->ts_recent)) {
 
 		/* Check to see if ts_recent is over 24 days old.  */
 		if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) {
 			/*
 			 * Invalidate ts_recent.  If this segment updates
 			 * ts_recent, the age will be reset later and ts_recent
 			 * will get a valid value.  If it does not, setting
 			 * ts_recent to zero will at least satisfy the
 			 * requirement that zero be placed in the timestamp
 			 * echo reply when ts_recent isn't valid.  The
 			 * age isn't reset until we get a valid ts_recent
 			 * because we don't want out-of-order segments to be
 			 * dropped when ts_recent is old.
 			 */
 			tp->ts_recent = 0;
 		} else {
 			TCPSTAT_INC(tcps_rcvduppack);
 			TCPSTAT_ADD(tcps_rcvdupbyte, tlen);
 			TCPSTAT_INC(tcps_pawsdrop);
 			if (tlen)
 				goto dropafterack;
 			goto drop;
 		}
 	}
 
 	/*
 	 * In the SYN-RECEIVED state, validate that the packet belongs to
 	 * this connection before trimming the data to fit the receive
 	 * window.  Check the sequence number versus IRS since we know
 	 * the sequence numbers haven't wrapped.  This is a partial fix
 	 * for the "LAND" DoS attack.
 	 */
 	if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) {
 		rstreason = BANDLIM_RST_OPENPORT;
 		goto dropwithreset;
 	}
 
 	todrop = tp->rcv_nxt - th->th_seq;
 	if (todrop > 0) {
 		if (thflags & TH_SYN) {
 			thflags &= ~TH_SYN;
 			th->th_seq++;
 			if (th->th_urp > 1)
 				th->th_urp--;
 			else
 				thflags &= ~TH_URG;
 			todrop--;
 		}
 		/*
 		 * Following if statement from Stevens, vol. 2, p. 960.
 		 */
 		if (todrop > tlen
 		    || (todrop == tlen && (thflags & TH_FIN) == 0)) {
 			/*
 			 * Any valid FIN must be to the left of the window.
 			 * At this point the FIN must be a duplicate or out
 			 * of sequence; drop it.
 			 */
 			thflags &= ~TH_FIN;
 
 			/*
 			 * Send an ACK to resynchronize and drop any data.
 			 * But keep on processing for RST or ACK.
 			 */
 			tp->t_flags |= TF_ACKNOW;
 			todrop = tlen;
 			TCPSTAT_INC(tcps_rcvduppack);
 			TCPSTAT_ADD(tcps_rcvdupbyte, todrop);
 		} else {
 			TCPSTAT_INC(tcps_rcvpartduppack);
 			TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop);
 		}
 		drop_hdrlen += todrop;	/* drop from the top afterwards */
 		th->th_seq += todrop;
 		tlen -= todrop;
 		if (th->th_urp > todrop)
 			th->th_urp -= todrop;
 		else {
 			thflags &= ~TH_URG;
 			th->th_urp = 0;
 		}
 	}
 
 	/*
 	 * If new data are received on a connection after the
 	 * user processes are gone, then RST the other end.
 	 */
 	if ((so->so_state & SS_NOFDREF) &&
 	    tp->t_state > TCPS_CLOSE_WAIT && tlen) {
 		KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && "
 		    "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked));
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data "
 			    "after socket was closed, "
 			    "sending RST and removing tcpcb\n",
 			    s, __func__, tcpstates[tp->t_state], tlen);
 			free(s, M_TCPLOG);
 		}
 		tp = tcp_close(tp);
 		TCPSTAT_INC(tcps_rcvafterclose);
 		rstreason = BANDLIM_UNLIMITED;
 		goto dropwithreset;
 	}
 
 	/*
 	 * If segment ends after window, drop trailing data
 	 * (and PUSH and FIN); if nothing left, just ACK.
 	 */
 	todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd);
 	if (todrop > 0) {
 		TCPSTAT_INC(tcps_rcvpackafterwin);
 		if (todrop >= tlen) {
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, tlen);
 			/*
 			 * If window is closed can only take segments at
 			 * window edge, and have to drop data and PUSH from
 			 * incoming segments.  Continue processing, but
 			 * remember to ack.  Otherwise, drop segment
 			 * and ack.
 			 */
 			if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
 				tp->t_flags |= TF_ACKNOW;
 				TCPSTAT_INC(tcps_rcvwinprobe);
 			} else
 				goto dropafterack;
 		} else
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
 		m_adj(m, -todrop);
 		tlen -= todrop;
 		thflags &= ~(TH_PUSH|TH_FIN);
 	}
 
 	/*
 	 * If last ACK falls within this segment's sequence numbers,
 	 * record its timestamp.
 	 * NOTE: 
 	 * 1) That the test incorporates suggestions from the latest
 	 *    proposal of the tcplw@cray.com list (Braden 1993/04/26).
 	 * 2) That updating only on newer timestamps interferes with
 	 *    our earlier PAWS tests, so this check should be solely
 	 *    predicated on the sequence space of this segment.
 	 * 3) That we modify the segment boundary check to be 
 	 *        Last.ACK.Sent <= SEG.SEQ + SEG.Len  
 	 *    instead of RFC1323's
 	 *        Last.ACK.Sent < SEG.SEQ + SEG.Len,
 	 *    This modified check allows us to overcome RFC1323's
 	 *    limitations as described in Stevens TCP/IP Illustrated
 	 *    Vol. 2 p.869. In such cases, we can still calculate the
 	 *    RTT correctly when RCV.NXT == Last.ACK.Sent.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
 	    SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
 		((thflags & (TH_SYN|TH_FIN)) != 0))) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 
 	/*
 	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN
 	 * flag is on (half-synchronized state), then queue data for
 	 * later processing; else drop segment and return.
 	 */
 	if ((thflags & TH_ACK) == 0) {
 		if (tp->t_state == TCPS_SYN_RECEIVED ||
 		    (tp->t_flags & TF_NEEDSYN))
 			goto step6;
 		else if (tp->t_flags & TF_ACKNOW)
 			goto dropafterack;
 		else
 			goto drop;
 	}
 
 	/*
 	 * Ack processing.
 	 */
 	switch (tp->t_state) {
 
 	/*
 	 * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
 	 * ESTABLISHED state and continue processing.
 	 * The ACK was checked above.
 	 */
 	case TCPS_SYN_RECEIVED:
 
 		TCPSTAT_INC(tcps_connects);
 		soisconnected(so);
 		/* Do window scaling? */
 		if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 			tp->rcv_scale = tp->request_r_scale;
 			tp->snd_wnd = tiwin;
 		}
 		/*
 		 * Make transitions:
 		 *      SYN-RECEIVED  -> ESTABLISHED
 		 *      SYN-RECEIVED* -> FIN-WAIT-1
 		 */
 		tp->t_starttime = ticks;
 		if (tp->t_flags & TF_NEEDFIN) {
 			tcp_state_change(tp, TCPS_FIN_WAIT_1);
 			tp->t_flags &= ~TF_NEEDFIN;
 		} else {
 			tcp_state_change(tp, TCPS_ESTABLISHED);
 			TCP_PROBE5(accept__established, NULL, tp,
 			    mtod(m, const char *), tp, th);
 			cc_conn_init(tp);
 			tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
 		}
 		/*
 		 * If segment contains data or ACK, will call tcp_reass()
 		 * later; if not, do so now to pass queued data to user.
 		 */
 		if (tlen == 0 && (thflags & TH_FIN) == 0)
 			(void) tcp_reass(tp, (struct tcphdr *)0, 0,
 			    (struct mbuf *)0);
 		tp->snd_wl1 = th->th_seq - 1;
 		/* FALLTHROUGH */
 
 	/*
 	 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
 	 * ACKs.  If the ack is in the range
 	 *	tp->snd_una < th->th_ack <= tp->snd_max
 	 * then advance tp->snd_una to th->th_ack and drop
 	 * data from the retransmission queue.  If this ACK reflects
 	 * more up to date window information we update our window information.
 	 */
 	case TCPS_ESTABLISHED:
 	case TCPS_FIN_WAIT_1:
 	case TCPS_FIN_WAIT_2:
 	case TCPS_CLOSE_WAIT:
 	case TCPS_CLOSING:
 	case TCPS_LAST_ACK:
 		if (SEQ_GT(th->th_ack, tp->snd_max)) {
 			TCPSTAT_INC(tcps_rcvacktoomuch);
 			goto dropafterack;
 		}
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    ((to->to_flags & TOF_SACK) ||
 		     !TAILQ_EMPTY(&tp->snd_holes)))
 			tcp_sack_doack(tp, to, th->th_ack);
 		else
 			/*
 			 * Reset the value so that previous (valid) value
 			 * from the last ack with SACK doesn't get used.
 			 */
 			tp->sackhint.sacked_bytes = 0;
 
 		/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
 		hhook_run_tcp_est_in(tp, th, to);
 
 		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
 			if (tlen == 0 && tiwin == tp->snd_wnd) {
 				/*
 				 * If this is the first time we've seen a
 				 * FIN from the remote, this is not a
 				 * duplicate and it needs to be processed
 				 * normally.  This happens during a
 				 * simultaneous close.
 				 */
 				if ((thflags & TH_FIN) &&
 				    (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
 					tp->t_dupacks = 0;
 					break;
 				}
 				TCPSTAT_INC(tcps_rcvdupack);
 				/*
 				 * If we have outstanding data (other than
 				 * a window probe), this is a completely
 				 * duplicate ack (ie, window info didn't
 				 * change and FIN isn't set),
 				 * the ack is the biggest we've
 				 * seen and we've seen exactly our rexmt
 				 * threshold of them, assume a packet
 				 * has been dropped and retransmit it.
 				 * Kludge snd_nxt & the congestion
 				 * window so we send only this one
 				 * packet.
 				 *
 				 * We know we're losing at the current
 				 * window size so do congestion avoidance
 				 * (set ssthresh to half the current window
 				 * and pull our congestion window back to
 				 * the new ssthresh).
 				 *
 				 * Dup acks mean that packets have left the
 				 * network (they're now cached at the receiver)
 				 * so bump cwnd by the amount in the receiver
 				 * to keep a constant cwnd packets in the
 				 * network.
 				 *
 				 * When using TCP ECN, notify the peer that
 				 * we reduced the cwnd.
 				 */
 				if (!tcp_timer_active(tp, TT_REXMT) ||
 				    th->th_ack != tp->snd_una)
 					tp->t_dupacks = 0;
 				else if (++tp->t_dupacks > tcprexmtthresh ||
 				     IN_FASTRECOVERY(tp->t_flags)) {
 					cc_ack_received(tp, th, CC_DUPACK);
 					if ((tp->t_flags & TF_SACK_PERMIT) &&
 					    IN_FASTRECOVERY(tp->t_flags)) {
 						int awnd;
 						
 						/*
 						 * Compute the amount of data in flight first.
 						 * We can inject new data into the pipe iff 
 						 * we have less than 1/2 the original window's
 						 * worth of data in flight.
 						 */
 						if (V_tcp_do_rfc6675_pipe)
 							awnd = tcp_compute_pipe(tp);
 						else
 							awnd = (tp->snd_nxt - tp->snd_fack) +
 								tp->sackhint.sack_bytes_rexmit;
 
 						if (awnd < tp->snd_ssthresh) {
 							tp->snd_cwnd += tp->t_maxseg;
 							if (tp->snd_cwnd > tp->snd_ssthresh)
 								tp->snd_cwnd = tp->snd_ssthresh;
 						}
 					} else
 						tp->snd_cwnd += tp->t_maxseg;
 					(void) tp->t_fb->tfb_tcp_output(tp);
 					goto drop;
 				} else if (tp->t_dupacks == tcprexmtthresh) {
 					tcp_seq onxt = tp->snd_nxt;
 
 					/*
 					 * If we're doing sack, check to
 					 * see if we're already in sack
 					 * recovery. If we're not doing sack,
 					 * check to see if we're in newreno
 					 * recovery.
 					 */
 					if (tp->t_flags & TF_SACK_PERMIT) {
 						if (IN_FASTRECOVERY(tp->t_flags)) {
 							tp->t_dupacks = 0;
 							break;
 						}
 					} else {
 						if (SEQ_LEQ(th->th_ack,
 						    tp->snd_recover)) {
 							tp->t_dupacks = 0;
 							break;
 						}
 					}
 					/* Congestion signal before ack. */
 					cc_cong_signal(tp, th, CC_NDUPACK);
 					cc_ack_received(tp, th, CC_DUPACK);
 					tcp_timer_activate(tp, TT_REXMT, 0);
 					tp->t_rtttime = 0;
 					if (tp->t_flags & TF_SACK_PERMIT) {
 						TCPSTAT_INC(
 						    tcps_sack_recovery_episode);
 						tp->sack_newdata = tp->snd_nxt;
 						tp->snd_cwnd = tp->t_maxseg;
 						(void) tp->t_fb->tfb_tcp_output(tp);
 						goto drop;
 					}
 					tp->snd_nxt = th->th_ack;
 					tp->snd_cwnd = tp->t_maxseg;
 					(void) tp->t_fb->tfb_tcp_output(tp);
 					KASSERT(tp->snd_limited <= 2,
 					    ("%s: tp->snd_limited too big",
 					    __func__));
 					tp->snd_cwnd = tp->snd_ssthresh +
 					     tp->t_maxseg *
 					     (tp->t_dupacks - tp->snd_limited);
 					if (SEQ_GT(onxt, tp->snd_nxt))
 						tp->snd_nxt = onxt;
 					goto drop;
 				} else if (V_tcp_do_rfc3042) {
 					/*
 					 * Process first and second duplicate
 					 * ACKs. Each indicates a segment
 					 * leaving the network, creating room
 					 * for more. Make sure we can send a
 					 * packet on reception of each duplicate
 					 * ACK by increasing snd_cwnd by one
 					 * segment. Restore the original
 					 * snd_cwnd after packet transmission.
 					 */
 					cc_ack_received(tp, th, CC_DUPACK);
 					u_long oldcwnd = tp->snd_cwnd;
 					tcp_seq oldsndmax = tp->snd_max;
 					u_int sent;
 					int avail;
 
 					KASSERT(tp->t_dupacks == 1 ||
 					    tp->t_dupacks == 2,
 					    ("%s: dupacks not 1 or 2",
 					    __func__));
 					if (tp->t_dupacks == 1)
 						tp->snd_limited = 0;
 					tp->snd_cwnd =
 					    (tp->snd_nxt - tp->snd_una) +
 					    (tp->t_dupacks - tp->snd_limited) *
 					    tp->t_maxseg;
 					/*
 					 * Only call tcp_output when there
 					 * is new data available to be sent.
 					 * Otherwise we would send pure ACKs.
 					 */
 					SOCKBUF_LOCK(&so->so_snd);
 					avail = sbavail(&so->so_snd) -
 					    (tp->snd_nxt - tp->snd_una);
 					SOCKBUF_UNLOCK(&so->so_snd);
 					if (avail > 0)
 						(void) tp->t_fb->tfb_tcp_output(tp);
 					sent = tp->snd_max - oldsndmax;
 					if (sent > tp->t_maxseg) {
 						KASSERT((tp->t_dupacks == 2 &&
 						    tp->snd_limited == 0) ||
 						   (sent == tp->t_maxseg + 1 &&
 						    tp->t_flags & TF_SENTFIN),
 						    ("%s: sent too much",
 						    __func__));
 						tp->snd_limited = 2;
 					} else if (sent > 0)
 						++tp->snd_limited;
 					tp->snd_cwnd = oldcwnd;
 					goto drop;
 				}
 			} else
 				tp->t_dupacks = 0;
 			break;
 		}
 
 		KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
 		    ("%s: th_ack <= snd_una", __func__));
 
 		/*
 		 * If the congestion window was inflated to account
 		 * for the other side's cached packets, retract it.
 		 */
 		if (IN_FASTRECOVERY(tp->t_flags)) {
 			if (SEQ_LT(th->th_ack, tp->snd_recover)) {
 				if (tp->t_flags & TF_SACK_PERMIT)
 					tcp_sack_partialack(tp, th);
 				else
 					tcp_newreno_partial_ack(tp, th);
 			} else
 				cc_post_recovery(tp, th);
 		}
 		tp->t_dupacks = 0;
 		/*
 		 * If we reach this point, ACK is not a duplicate,
 		 *     i.e., it ACKs something we sent.
 		 */
 		if (tp->t_flags & TF_NEEDSYN) {
 			/*
 			 * T/TCP: Connection was half-synchronized, and our
 			 * SYN has been ACK'd (so connection is now fully
 			 * synchronized).  Go to non-starred state,
 			 * increment snd_una for ACK of SYN, and check if
 			 * we can do window scaling.
 			 */
 			tp->t_flags &= ~TF_NEEDSYN;
 			tp->snd_una++;
 			/* Do window scaling? */
 			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 				tp->rcv_scale = tp->request_r_scale;
 				/* Send window already scaled. */
 			}
 		}
 
 process_ACK:
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		acked = BYTES_THIS_ACK(tp, th);
 		TCPSTAT_INC(tcps_rcvackpack);
 		TCPSTAT_ADD(tcps_rcvackbyte, acked);
 
 		/*
 		 * If we just performed our first retransmit, and the ACK
 		 * arrives within our recovery window, then it was a mistake
 		 * to do the retransmit in the first place.  Recover our
 		 * original cwnd and ssthresh, and proceed to transmit where
 		 * we left off.
 		 */
 		if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID &&
 		    (int)(ticks - tp->t_badrxtwin) < 0)
 			cc_cong_signal(tp, th, CC_RTO_ERR);
 
 		/*
 		 * If we have a timestamp reply, update smoothed
 		 * round trip time.  If no timestamp is present but
 		 * transmit timer is running and timed sequence
 		 * number was acked, update smoothed round trip time.
 		 * Since we now have an rtt measurement, cancel the
 		 * timer backoff (cf., Phil Karn's retransmit alg.).
 		 * Recompute the initial retransmit timer.
 		 *
 		 * Some boxes send broken timestamp replies
 		 * during the SYN+ACK phase, ignore
 		 * timestamps of 0 or we could calculate a
 		 * huge RTT and blow up the retransmit timer.
 		 */
 		if ((to->to_flags & TOF_TS) != 0 && to->to_tsecr) {
 			u_int t;
 
 			t = tcp_ts_getticks() - to->to_tsecr;
 			if (!tp->t_rttlow || tp->t_rttlow > t)
 				tp->t_rttlow = t;
 			tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1);
 		} else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) {
 			if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime)
 				tp->t_rttlow = ticks - tp->t_rtttime;
 			tcp_xmit_timer(tp, ticks - tp->t_rtttime);
 		}
 
 		/*
 		 * If all outstanding data is acked, stop retransmit
 		 * timer and remember to restart (more output or persist).
 		 * If there is more data to be acked, restart retransmit
 		 * timer, using current (possibly backed-off) value.
 		 */
 		if (th->th_ack == tp->snd_max) {
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			needoutput = 1;
 		} else if (!tcp_timer_active(tp, TT_PERSIST))
 			tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 
 		/*
 		 * If no data (only SYN) was ACK'd,
 		 *    skip rest of ACK processing.
 		 */
 		if (acked == 0)
 			goto step6;
 
 		/*
 		 * Let the congestion control algorithm update congestion
 		 * control related information. This typically means increasing
 		 * the congestion window.
 		 */
 		cc_ack_received(tp, th, CC_ACK);
 
 		SOCKBUF_LOCK(&so->so_snd);
 		if (acked > sbavail(&so->so_snd)) {
 			tp->snd_wnd -= sbavail(&so->so_snd);
 			mfree = sbcut_locked(&so->so_snd,
 			    (int)sbavail(&so->so_snd));
 			ourfinisacked = 1;
 		} else {
 			mfree = sbcut_locked(&so->so_snd, acked);
 			tp->snd_wnd -= acked;
 			ourfinisacked = 0;
 		}
 		/* NB: sowwakeup_locked() does an implicit unlock. */
 		sowwakeup_locked(so);
 		m_freem(mfree);
 		/* Detect una wraparound. */
 		if (!IN_RECOVERY(tp->t_flags) &&
 		    SEQ_GT(tp->snd_una, tp->snd_recover) &&
 		    SEQ_LEQ(th->th_ack, tp->snd_recover))
 			tp->snd_recover = th->th_ack - 1;
 		/* XXXLAS: Can this be moved up into cc_post_recovery? */
 		if (IN_RECOVERY(tp->t_flags) &&
 		    SEQ_GEQ(th->th_ack, tp->snd_recover)) {
 			EXIT_RECOVERY(tp->t_flags);
 		}
 		tp->snd_una = th->th_ack;
 		if (tp->t_flags & TF_SACK_PERMIT) {
 			if (SEQ_GT(tp->snd_una, tp->snd_recover))
 				tp->snd_recover = tp->snd_una;
 		}
 		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
 			tp->snd_nxt = tp->snd_una;
 
 		switch (tp->t_state) {
 
 		/*
 		 * In FIN_WAIT_1 STATE in addition to the processing
 		 * for the ESTABLISHED state if our FIN is now acknowledged
 		 * then enter FIN_WAIT_2.
 		 */
 		case TCPS_FIN_WAIT_1:
 			if (ourfinisacked) {
 				/*
 				 * If we can't receive any more
 				 * data, then closing user can proceed.
 				 * Starting the timer is contrary to the
 				 * specification, but if we don't get a FIN
 				 * we'll hang forever.
 				 *
 				 * XXXjl:
 				 * we should release the tp also, and use a
 				 * compressed state.
 				 */
 				if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 					soisdisconnected(so);
 					tcp_timer_activate(tp, TT_2MSL,
 					    (tcp_fast_finwait2_recycle ?
 					    tcp_finwait2_timeout :
 					    TP_MAXIDLE(tp)));
 				}
 				tcp_state_change(tp, TCPS_FIN_WAIT_2);
 			}
 			break;
 
 		/*
 		 * In CLOSING STATE in addition to the processing for
 		 * the ESTABLISHED state if the ACK acknowledges our FIN
 		 * then enter the TIME-WAIT state, otherwise ignore
 		 * the segment.
 		 */
 		case TCPS_CLOSING:
 			if (ourfinisacked) {
 				INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 				tcp_twstart(tp);
 				INP_INFO_RUNLOCK(&V_tcbinfo);
 				m_freem(m);
 				return;
 			}
 			break;
 
 		/*
 		 * In LAST_ACK, we may still be waiting for data to drain
 		 * and/or to be acked, as well as for the ack of our FIN.
 		 * If our FIN is now acknowledged, delete the TCB,
 		 * enter the closed state and return.
 		 */
 		case TCPS_LAST_ACK:
 			if (ourfinisacked) {
 				INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 				tp = tcp_close(tp);
 				goto drop;
 			}
 			break;
 		}
 	}
 
 step6:
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Update window information.
 	 * Don't look at window if no ACK: TAC's send garbage on first SYN.
 	 */
 	if ((thflags & TH_ACK) &&
 	    (SEQ_LT(tp->snd_wl1, th->th_seq) ||
 	    (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
 	     (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
 		/* keep track of pure window updates */
 		if (tlen == 0 &&
 		    tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
 			TCPSTAT_INC(tcps_rcvwinupd);
 		tp->snd_wnd = tiwin;
 		tp->snd_wl1 = th->th_seq;
 		tp->snd_wl2 = th->th_ack;
 		if (tp->snd_wnd > tp->max_sndwnd)
 			tp->max_sndwnd = tp->snd_wnd;
 		needoutput = 1;
 	}
 
 	/*
 	 * Process segments with URG.
 	 */
 	if ((thflags & TH_URG) && th->th_urp &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		/*
 		 * This is a kludge, but if we receive and accept
 		 * random urgent pointers, we'll crash in
 		 * soreceive.  It's hard to imagine someone
 		 * actually wanting to send this much urgent data.
 		 */
 		SOCKBUF_LOCK(&so->so_rcv);
 		if (th->th_urp + sbavail(&so->so_rcv) > sb_max) {
 			th->th_urp = 0;			/* XXX */
 			thflags &= ~TH_URG;		/* XXX */
 			SOCKBUF_UNLOCK(&so->so_rcv);	/* XXX */
 			goto dodata;			/* XXX */
 		}
 		/*
 		 * If this segment advances the known urgent pointer,
 		 * then mark the data stream.  This should not happen
 		 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
 		 * a FIN has been received from the remote side.
 		 * In these states we ignore the URG.
 		 *
 		 * According to RFC961 (Assigned Protocols),
 		 * the urgent pointer points to the last octet
 		 * of urgent data.  We continue, however,
 		 * to consider it to indicate the first octet
 		 * of data past the urgent section as the original
 		 * spec states (in one of two places).
 		 */
 		if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
 			tp->rcv_up = th->th_seq + th->th_urp;
 			so->so_oobmark = sbavail(&so->so_rcv) +
 			    (tp->rcv_up - tp->rcv_nxt) - 1;
 			if (so->so_oobmark == 0)
 				so->so_rcv.sb_state |= SBS_RCVATMARK;
 			sohasoutofband(so);
 			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
 		}
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		/*
 		 * Remove out of band data so doesn't get presented to user.
 		 * This can happen independent of advancing the URG pointer,
 		 * but if two URG's are pending at once, some out-of-band
 		 * data may creep in... ick.
 		 */
 		if (th->th_urp <= (u_long)tlen &&
 		    !(so->so_options & SO_OOBINLINE)) {
 			/* hdr drop is delayed */
 			tcp_pulloutofband(so, th, m, drop_hdrlen);
 		}
 	} else {
 		/*
 		 * If no out of band data is expected,
 		 * pull receive urgent pointer along
 		 * with the receive window.
 		 */
 		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
 			tp->rcv_up = tp->rcv_nxt;
 	}
 dodata:							/* XXX */
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Process the segment text, merging it into the TCP sequencing queue,
 	 * and arranging for acknowledgment of receipt if necessary.
 	 * This process logically involves adjusting tp->rcv_wnd as data
 	 * is presented to the user (this happens in tcp_usrreq.c,
 	 * case PRU_RCVD).  If a FIN has already been received on this
 	 * connection then we just ignore the text.
 	 */
 	if ((tlen || (thflags & TH_FIN)) &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		tcp_seq save_start = th->th_seq;
 		m_adj(m, drop_hdrlen);	/* delayed header drop */
 		/*
 		 * Insert segment which includes th into TCP reassembly queue
 		 * with control block tp.  Set thflags to whether reassembly now
 		 * includes a segment with FIN.  This handles the common case
 		 * inline (segment is the next to be received on an established
 		 * connection, and the queue is empty), avoiding linkage into
 		 * and removal from the queue and repetition of various
 		 * conversions.
 		 * Set DELACK for segments received in order, but ack
 		 * immediately when segments are out of order (so
 		 * fast retransmit can work).
 		 */
 		if (th->th_seq == tp->rcv_nxt &&
 		    LIST_EMPTY(&tp->t_segq) &&
 		    TCPS_HAVEESTABLISHED(tp->t_state)) {
 			if (DELAY_ACK(tp, tlen))
 				tp->t_flags |= TF_DELACK;
 			else
 				tp->t_flags |= TF_ACKNOW;
 			tp->rcv_nxt += tlen;
 			thflags = th->th_flags & TH_FIN;
 			TCPSTAT_INC(tcps_rcvpack);
 			TCPSTAT_ADD(tcps_rcvbyte, tlen);
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				m_freem(m);
 			else
 				sbappendstream_locked(&so->so_rcv, m, 0);
 			/* NB: sorwakeup_locked() does an implicit unlock. */
 			sorwakeup_locked(so);
 		} else {
 			/*
 			 * XXX: Due to the header drop above "th" is
 			 * theoretically invalid by now.  Fortunately
 			 * m_adj() doesn't actually frees any mbufs
 			 * when trimming from the head.
 			 */
 			thflags = tcp_reass(tp, th, &tlen, m);
 			tp->t_flags |= TF_ACKNOW;
 		}
 		if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))
 			tcp_update_sack_list(tp, save_start, save_start + tlen);
 #if 0
 		/*
 		 * Note the amount of data that peer has sent into
 		 * our window, in order to estimate the sender's
 		 * buffer size.
 		 * XXX: Unused.
 		 */
 		if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt))
 			len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
 		else
 			len = so->so_rcv.sb_hiwat;
 #endif
 	} else {
 		m_freem(m);
 		thflags &= ~TH_FIN;
 	}
 
 	/*
 	 * If FIN is received ACK the FIN and let the user know
 	 * that the connection is closing.
 	 */
 	if (thflags & TH_FIN) {
 		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 			socantrcvmore(so);
 			/*
 			 * If connection is half-synchronized
 			 * (ie NEEDSYN flag on) then delay ACK,
 			 * so it may be piggybacked when SYN is sent.
 			 * Otherwise, since we received a FIN then no
 			 * more input can be expected, send ACK now.
 			 */
 			if (tp->t_flags & TF_NEEDSYN)
 				tp->t_flags |= TF_DELACK;
 			else
 				tp->t_flags |= TF_ACKNOW;
 			tp->rcv_nxt++;
 		}
 		switch (tp->t_state) {
 
 		/*
 		 * In SYN_RECEIVED and ESTABLISHED STATES
 		 * enter the CLOSE_WAIT state.
 		 */
 		case TCPS_SYN_RECEIVED:
 			tp->t_starttime = ticks;
 			/* FALLTHROUGH */
 		case TCPS_ESTABLISHED:
 			tcp_state_change(tp, TCPS_CLOSE_WAIT);
 			break;
 
 		/*
 		 * If still in FIN_WAIT_1 STATE FIN has not been acked so
 		 * enter the CLOSING state.
 		 */
 		case TCPS_FIN_WAIT_1:
 			tcp_state_change(tp, TCPS_CLOSING);
 			break;
 
 		/*
 		 * In FIN_WAIT_2 state enter the TIME_WAIT state,
 		 * starting the time-wait timer, turning off the other
 		 * standard timers.
 		 */
 		case TCPS_FIN_WAIT_2:
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 			KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata "
 			    "TCP_FIN_WAIT_2 ti_locked: %d", __func__,
 			    ti_locked));
 
 			tcp_twstart(tp);
 			INP_INFO_RUNLOCK(&V_tcbinfo);
 			return;
 		}
 	}
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	}
 	ti_locked = TI_UNLOCKED;
 
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
 
 	/*
 	 * Return any desired output.
 	 */
 	if (needoutput || (tp->t_flags & TF_ACKNOW))
 		(void) tp->t_fb->tfb_tcp_output(tp);
 
 	KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
 	    __func__, ti_locked));
 	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (tp->t_flags & TF_DELACK) {
 		tp->t_flags &= ~TF_DELACK;
 		tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 	}
 	INP_WUNLOCK(tp->t_inpcb);
 	return;
 
 dropafterack:
 	/*
 	 * Generate an ACK dropping incoming segment if it occupies
 	 * sequence space, where the ACK reflects our state.
 	 *
 	 * We can now skip the test for the RST flag since all
 	 * paths to this code happen after packets containing
 	 * RST have been dropped.
 	 *
 	 * In the SYN-RECEIVED state, don't send an ACK unless the
 	 * segment we received passes the SYN-RECEIVED ACK test.
 	 * If it fails send a RST.  This breaks the loop in the
 	 * "LAND" DoS attack, and also prevents an ACK storm
 	 * between two listening ports that have been sent forged
 	 * SYN segments, each with the source address of the other.
 	 */
 	if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) &&
 	    (SEQ_GT(tp->snd_una, th->th_ack) ||
 	     SEQ_GT(th->th_ack, tp->snd_max)) ) {
 		rstreason = BANDLIM_RST_OPENPORT;
 		goto dropwithreset;
 	}
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__drop, tp, th, mtod(m, const char *));
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	}
 	ti_locked = TI_UNLOCKED;
 
 	tp->t_flags |= TF_ACKNOW;
 	(void) tp->t_fb->tfb_tcp_output(tp);
 	INP_WUNLOCK(tp->t_inpcb);
 	m_freem(m);
 	return;
 
 dropwithreset:
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	}
 	ti_locked = TI_UNLOCKED;
 
 	if (tp != NULL) {
 		tcp_dropwithreset(m, th, tp, tlen, rstreason);
 		INP_WUNLOCK(tp->t_inpcb);
 	} else
 		tcp_dropwithreset(m, th, NULL, tlen, rstreason);
 	return;
 
 drop:
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		ti_locked = TI_UNLOCKED;
 	}
 #ifdef INVARIANTS
 	else
 		INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 #endif
 
 	/*
 	 * Drop space held by incoming segment and return.
 	 */
 #ifdef TCPDEBUG
 	if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__drop, tp, th, mtod(m, const char *));
 	if (tp != NULL)
 		INP_WUNLOCK(tp->t_inpcb);
 	m_freem(m);
 }
 
 
 /*
  * Do fast slow is a combination of the original
  * tcp_dosegment and a split fastpath, one function
  * for the fast-ack which also includes allowing fastpath
  * for window advanced in sequence acks. And also a
  * sub-function that handles the insequence data.
  */
 void
 tcp_do_segment_fastslow(struct mbuf *m, struct tcphdr *th, struct socket *so,
 			struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
 			int ti_locked)
 {
 	int thflags;
 	u_long tiwin;
 	char *s;
 	int can_enter;
 	struct in_conninfo *inc;
 	struct tcpopt to;
 
 	thflags = th->th_flags;
 	tp->sackhint.last_sack_ack = 0;
 	inc = &tp->t_inpcb->inp_inc;
 	/*
 	 * If this is either a state-changing packet or current state isn't
 	 * established, we require a write lock on tcbinfo.  Otherwise, we
 	 * allow the tcbinfo to be in either alocked or unlocked, as the
 	 * caller may have unnecessarily acquired a write lock due to a race.
 	 */
 	if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
 	    tp->t_state != TCPS_ESTABLISHED) {
 		KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
 						  "SYN/FIN/RST/!EST", __func__, ti_locked));
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	} else {
 #ifdef INVARIANTS
 		if (ti_locked == TI_RLOCKED) {
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 		} else {
 			KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
 							   "ti_locked: %d", __func__, ti_locked));
 			INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 		}
 #endif
 	}
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
 					    __func__));
 	KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
 						__func__));
 
 	/*
 	 * Segment received on connection.
 	 * Reset idle time and keep-alive timer.
 	 * XXX: This should be done after segment
 	 * validation to ignore broken/spoofed segs.
 	 */
 	tp->t_rcvtime = ticks;
 	if (TCPS_HAVEESTABLISHED(tp->t_state))
 		tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
 
 	/*
 	 * Unscale the window into a 32-bit value.
 	 * For the SYN_SENT state the scale is zero.
 	 */
 	tiwin = th->th_win << tp->snd_scale;
 
 	/*
 	 * TCP ECN processing.
 	 */
 	if (tp->t_flags & TF_ECN_PERMIT) {
 		if (thflags & TH_CWR)
 			tp->t_flags &= ~TF_ECN_SND_ECE;
 		switch (iptos & IPTOS_ECN_MASK) {
 		case IPTOS_ECN_CE:
 			tp->t_flags |= TF_ECN_SND_ECE;
 			TCPSTAT_INC(tcps_ecn_ce);
 			break;
 		case IPTOS_ECN_ECT0:
 			TCPSTAT_INC(tcps_ecn_ect0);
 			break;
 		case IPTOS_ECN_ECT1:
 			TCPSTAT_INC(tcps_ecn_ect1);
 			break;
 		}
 		/* Congestion experienced. */
 		if (thflags & TH_ECE) {
 			cc_cong_signal(tp, th, CC_ECN);
 		}
 	}
 
 	/*
 	 * Parse options on any incoming segment.
 	 */
 	tcp_dooptions(&to, (u_char *)(th + 1),
 		      (th->th_off << 2) - sizeof(struct tcphdr),
 		      (thflags & TH_SYN) ? TO_SYN : 0);
 
 	/*
 	 * If echoed timestamp is later than the current time,
 	 * fall back to non RFC1323 RTT calculation.  Normalize
 	 * timestamp if syncookies were used when this connection
 	 * was established.
 	 */
 	if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
 		to.to_tsecr -= tp->ts_offset;
 		if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
 			to.to_tsecr = 0;
 	}
 	/*
 	 * If timestamps were negotiated during SYN/ACK they should
 	 * appear on every segment during this session and vice versa.
 	 */
 	if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Timestamp missing, "
 			    "no action\n", s, __func__);
 			free(s, M_TCPLOG);
 		}
 	}
 	if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
 			    "no action\n", s, __func__);
 			free(s, M_TCPLOG);
 		}
 	}
 
 	/*
 	 * Process options only when we get SYN/ACK back. The SYN case
 	 * for incoming connections is handled in tcp_syncache.
 	 * According to RFC1323 the window field in a SYN (i.e., a <SYN>
 	 * or <SYN,ACK>) segment itself is never scaled.
 	 * XXX this is traditional behavior, may need to be cleaned up.
 	 */
 	if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
 		if ((to.to_flags & TOF_SCALE) &&
 		    (tp->t_flags & TF_REQ_SCALE)) {
 			tp->t_flags |= TF_RCVD_SCALE;
 			tp->snd_scale = to.to_wscale;
 		}
 		/*
 		 * Initial send window.  It will be updated with
 		 * the next incoming segment to the scaled value.
 		 */
 		tp->snd_wnd = th->th_win;
 		if (to.to_flags & TOF_TS) {
 			tp->t_flags |= TF_RCVD_TSTMP;
 			tp->ts_recent = to.to_tsval;
 			tp->ts_recent_age = tcp_ts_getticks();
 		}
 		if (to.to_flags & TOF_MSS)
 			tcp_mss(tp, to.to_mss);
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    (to.to_flags & TOF_SACKPERM) == 0)
 			tp->t_flags &= ~TF_SACK_PERMIT;
 	}
 	can_enter = 0;
 	if (__predict_true((tlen == 0))) {
 		/*
 		 * The ack moved forward and we have a window (non-zero)
 		 * <or>
 		 * The ack did not move forward, but the window increased.
 		 */
 		if (__predict_true((SEQ_GT(th->th_ack, tp->snd_una) && tiwin) ||
 				   ((th->th_ack == tp->snd_una) && tiwin && (tiwin > tp->snd_wnd)))) {
 			can_enter = 1;
 		}
 	} else {
 		/* 
 		 * Data incoming, use the old entry criteria
 		 * for fast-path with data.
 		 */
 		if ((tiwin && tiwin == tp->snd_wnd)) {
 			can_enter = 1;
 		}
 	}
 	/*
 	 * Header prediction: check for the two common cases
 	 * of a uni-directional data xfer.  If the packet has
 	 * no control flags, is in-sequence, the window didn't
 	 * change and we're not retransmitting, it's a
 	 * candidate.  If the length is zero and the ack moved
 	 * forward, we're the sender side of the xfer.  Just
 	 * free the data acked & wake any higher level process
 	 * that was blocked waiting for space.  If the length
 	 * is non-zero and the ack didn't move, we're the
 	 * receiver side.  If we're getting packets in-order
 	 * (the reassembly queue is empty), add the data to
 	 * the socket buffer and note that we need a delayed ack.
 	 * Make sure that the hidden state-flags are also off.
 	 * Since we check for TCPS_ESTABLISHED first, it can only
 	 * be TH_NEEDSYN.
 	 */
 	if (__predict_true(tp->t_state == TCPS_ESTABLISHED &&
 	    th->th_seq == tp->rcv_nxt &&
 	    (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
 	    tp->snd_nxt == tp->snd_max &&
 	    can_enter &&
 	    ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
 	    LIST_EMPTY(&tp->t_segq) &&
 	    ((to.to_flags & TOF_TS) == 0 ||
 	     TSTMP_GEQ(to.to_tsval, tp->ts_recent)))) {
 		if (__predict_true((tlen == 0) &&
 		    (SEQ_LEQ(th->th_ack, tp->snd_max) &&
 		     !IN_RECOVERY(tp->t_flags) &&
 		     (to.to_flags & TOF_SACK) == 0 &&
 		     TAILQ_EMPTY(&tp->snd_holes)))) {
 			/* We are done */
 			tcp_do_fastack(m, th, so, tp, &to, drop_hdrlen, tlen, 
 				       ti_locked, tiwin);
 			return;
 		} else if ((tlen) &&
 			   (th->th_ack == tp->snd_una &&
 			    tlen <= sbspace(&so->so_rcv))) {
 			tcp_do_fastnewdata(m, th, so, tp, &to, drop_hdrlen, tlen, 
 					   ti_locked, tiwin);
 			/* We are done */
 			return;
 		}
 	}
 	tcp_do_slowpath(m, th, so, tp, &to, drop_hdrlen, tlen,
 			ti_locked, tiwin, thflags);
 }
 
 
 /*
  * This subfunction is used to try to highly optimize the
  * fast path. We again allow window updates that are
  * in sequence to remain in the fast-path. We also add
  * in the __predict's to attempt to help the compiler.
  * Note that if we return a 0, then we can *not* process
  * it and the caller should push the packet into the 
  * slow-path.
  */
 static int
 tcp_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	       struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, 
 	       int ti_locked, u_long tiwin)
 {
 	int acked;
 	int winup_only=0;
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
 
 
 	if (__predict_false(SEQ_LEQ(th->th_ack, tp->snd_una))) {
 		/* Old ack, behind (or duplicate to) the last one rcv'd */
 		return (0);
 	}
 	if (__predict_false(th->th_ack == tp->snd_una) && 
 	    __predict_false(tiwin <= tp->snd_wnd)) {
 		/* duplicate ack <or> a shrinking dup ack with shrinking window */
 		return (0);
 	}
 	if (__predict_false(tiwin == 0)) {
 		/* zero window */
 		return (0);
 	}
 	if (__predict_false(SEQ_GT(th->th_ack, tp->snd_max))) {
 		/* Above what we have sent? */
 		return (0);
 	}
 	if (__predict_false(tp->snd_nxt != tp->snd_max)) {
 		/* We are retransmitting */
 		return (0);
 	}
 	if (__predict_false(tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN))) {
 		/* We need a SYN or a FIN, unlikely.. */
 		return (0);
 	}
 	if((to->to_flags & TOF_TS) && __predict_false(TSTMP_LT(to->to_tsval, tp->ts_recent))) {
 		/* Timestamp is behind .. old ack with seq wrap? */
 		return (0);
 	}
 	if (__predict_false(IN_RECOVERY(tp->t_flags))) {
 		/* Still recovering */
 		return (0);
 	}
 	if (__predict_false(to->to_flags & TOF_SACK)) {
 		/* Sack included in the ack..  */
 		return (0);
 	}
 	if (!TAILQ_EMPTY(&tp->snd_holes)) {
 		/* We have sack holes on our scoreboard */
 		return (0);
 	}
 	/* Ok if we reach here, we can process a fast-ack */
 
 	/* Did the window get updated? */
 	if (tiwin != tp->snd_wnd) {
 		/* keep track of pure window updates */
 		if (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) {
 			winup_only = 1;
 			TCPSTAT_INC(tcps_rcvwinupd);
 		}
 		tp->snd_wnd = tiwin;
 		tp->snd_wl1 = th->th_seq;
 		if (tp->snd_wnd > tp->max_sndwnd)
 			tp->max_sndwnd = tp->snd_wnd;
 	}
 	/*
 	 * Pull snd_wl2 up to prevent seq wrap relative
 	 * to th_ack.
 	 */
 	tp->snd_wl2 = th->th_ack;
 	/*
 	 * If last ACK falls within this segment's sequence numbers,
 	 * record the timestamp.
 	 * NOTE that the test is modified according to the latest
 	 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 	/*
 	 * This is a pure ack for outstanding data.
 	 */
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	}
 	ti_locked = TI_UNLOCKED;
 
 	TCPSTAT_INC(tcps_predack);
 
 	/*
 	 * "bad retransmit" recovery.
 	 */
 	if (tp->t_rxtshift == 1 &&
 	    tp->t_flags & TF_PREVVALID &&
 	    (int)(ticks - tp->t_badrxtwin) < 0) {
 		cc_cong_signal(tp, th, CC_RTO_ERR);
 	}
 
 	/*
 	 * Recalculate the transmit timer / rtt.
 	 *
 	 * Some boxes send broken timestamp replies
 	 * during the SYN+ACK phase, ignore
 	 * timestamps of 0 or we could calculate a
 	 * huge RTT and blow up the retransmit timer.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    to->to_tsecr) {
 		u_int t;
 
 		t = tcp_ts_getticks() - to->to_tsecr;
 		if (!tp->t_rttlow || tp->t_rttlow > t)
 			tp->t_rttlow = t;
 		tcp_xmit_timer(tp,
 			       TCP_TS_TO_TICKS(t) + 1);
 	} else if (tp->t_rtttime &&
 		   SEQ_GT(th->th_ack, tp->t_rtseq)) {
 		if (!tp->t_rttlow ||
 		    tp->t_rttlow > ticks - tp->t_rtttime)
 			tp->t_rttlow = ticks - tp->t_rtttime;
 		tcp_xmit_timer(tp,
 			       ticks - tp->t_rtttime);
 	}
 	if (winup_only == 0) {
 		acked = BYTES_THIS_ACK(tp, th);
 
 		/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
 		hhook_run_tcp_est_in(tp, th, to);
 
 		TCPSTAT_ADD(tcps_rcvackbyte, acked);
 		sbdrop(&so->so_snd, acked);
 		if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
 		    SEQ_LEQ(th->th_ack, tp->snd_recover))
 			tp->snd_recover = th->th_ack - 1;
 				
 		/*
 		 * Let the congestion control algorithm update
 		 * congestion control related information. This
 		 * typically means increasing the congestion
 		 * window.
 		 */
 		cc_ack_received(tp, th, CC_ACK);
 
 		tp->snd_una = th->th_ack;
 		tp->t_dupacks = 0;
 
 		/*
 		 * If all outstanding data are acked, stop
 		 * retransmit timer, otherwise restart timer
 		 * using current (possibly backed-off) value.
 		 * If process is waiting for space,
 		 * wakeup/selwakeup/signal.  If data
 		 * are ready to send, let tcp_output
 		 * decide between more output or persist.
 		 */
 #ifdef TCPDEBUG
 		if (so->so_options & SO_DEBUG)
 			tcp_trace(TA_INPUT, ostate, tp,
 				  (void *)tcp_saveipgen,
 				  &tcp_savetcp, 0);
 #endif
 		TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
 		m_freem(m);
 		if (tp->snd_una == tp->snd_max)
 			tcp_timer_activate(tp, TT_REXMT, 0);
 		else if (!tcp_timer_active(tp, TT_PERSIST))
 			tcp_timer_activate(tp, TT_REXMT,
 					   tp->t_rxtcur);
 		/* Wake up the socket if we have room to write more */
 		sowwakeup(so);
 	} else {
 		/* 
 		 * Window update only, just free the mbufs and
 		 * send out whatever we can.
 		 */
 		m_freem(m);
 	}
 	if (sbavail(&so->so_snd))
 		(void) tcp_output(tp);
 	KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
 					    __func__, ti_locked));
 	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (tp->t_flags & TF_DELACK) {
 		tp->t_flags &= ~TF_DELACK;
 		tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 	}
 	INP_WUNLOCK(tp->t_inpcb);
 	return (1);
 }
 
 /*
  * This tcp-do-segment concentrates on making the fastest
  * ack processing path. It does not have a fast-path for
  * data (it possibly could which would then eliminate the
  * need for fast-slow above). For a content distributor having
  * large outgoing elephants and very very little coming in
  * having no fastpath for data does not really help (since you
  * don't get much data in). The most important thing is 
  * processing ack's quickly and getting the rest of the data
  * output to the peer as quickly as possible. This routine
  * seems to be about an overall 3% faster then the old
  * tcp_do_segment and keeps us in the fast-path for packets
  * much more (by allowing window updates to also stay in the fastpath).
  */
 void
 tcp_do_segment_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		       struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
 		       int ti_locked)
 {
 	int thflags;
 	u_long tiwin;
 	char *s;
 	struct in_conninfo *inc;
 	struct tcpopt to;
 
 	thflags = th->th_flags;
 	tp->sackhint.last_sack_ack = 0;
 	inc = &tp->t_inpcb->inp_inc;
 	/*
 	 * If this is either a state-changing packet or current state isn't
 	 * established, we require a write lock on tcbinfo.  Otherwise, we
 	 * allow the tcbinfo to be in either alocked or unlocked, as the
 	 * caller may have unnecessarily acquired a write lock due to a race.
 	 */
 	if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
 	    tp->t_state != TCPS_ESTABLISHED) {
 		KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
 						  "SYN/FIN/RST/!EST", __func__, ti_locked));
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	} else {
 #ifdef INVARIANTS
 		if (ti_locked == TI_RLOCKED) {
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 		} else {
 			KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
 							   "ti_locked: %d", __func__, ti_locked));
 			INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 		}
 #endif
 	}
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
 					    __func__));
 	KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
 						__func__));
 
 	/*
 	 * Segment received on connection.
 	 * Reset idle time and keep-alive timer.
 	 * XXX: This should be done after segment
 	 * validation to ignore broken/spoofed segs.
 	 */
 	tp->t_rcvtime = ticks;
 	if (TCPS_HAVEESTABLISHED(tp->t_state))
 		tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
 
 	/*
 	 * Unscale the window into a 32-bit value.
 	 * For the SYN_SENT state the scale is zero.
 	 */
 	tiwin = th->th_win << tp->snd_scale;
 
 	/*
 	 * TCP ECN processing.
 	 */
 	if (tp->t_flags & TF_ECN_PERMIT) {
 		if (thflags & TH_CWR)
 			tp->t_flags &= ~TF_ECN_SND_ECE;
 		switch (iptos & IPTOS_ECN_MASK) {
 		case IPTOS_ECN_CE:
 			tp->t_flags |= TF_ECN_SND_ECE;
 			TCPSTAT_INC(tcps_ecn_ce);
 			break;
 		case IPTOS_ECN_ECT0:
 			TCPSTAT_INC(tcps_ecn_ect0);
 			break;
 		case IPTOS_ECN_ECT1:
 			TCPSTAT_INC(tcps_ecn_ect1);
 			break;
 		}
 		/* Congestion experienced. */
 		if (thflags & TH_ECE) {
 			cc_cong_signal(tp, th, CC_ECN);
 		}
 	}
 
 	/*
 	 * Parse options on any incoming segment.
 	 */
 	tcp_dooptions(&to, (u_char *)(th + 1),
 		      (th->th_off << 2) - sizeof(struct tcphdr),
 		      (thflags & TH_SYN) ? TO_SYN : 0);
 
 	/*
 	 * If echoed timestamp is later than the current time,
 	 * fall back to non RFC1323 RTT calculation.  Normalize
 	 * timestamp if syncookies were used when this connection
 	 * was established.
 	 */
 	if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
 		to.to_tsecr -= tp->ts_offset;
 		if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
 			to.to_tsecr = 0;
 	}
 	/*
 	 * If timestamps were negotiated during SYN/ACK they should
 	 * appear on every segment during this session and vice versa.
 	 */
 	if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Timestamp missing, "
 			    "no action\n", s, __func__);
 			free(s, M_TCPLOG);
 		}
 	}
 	if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
 			    "no action\n", s, __func__);
 			free(s, M_TCPLOG);
 		}
 	}
 
 	/*
 	 * Process options only when we get SYN/ACK back. The SYN case
 	 * for incoming connections is handled in tcp_syncache.
 	 * According to RFC1323 the window field in a SYN (i.e., a <SYN>
 	 * or <SYN,ACK>) segment itself is never scaled.
 	 * XXX this is traditional behavior, may need to be cleaned up.
 	 */
 	if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
 		if ((to.to_flags & TOF_SCALE) &&
 		    (tp->t_flags & TF_REQ_SCALE)) {
 			tp->t_flags |= TF_RCVD_SCALE;
 			tp->snd_scale = to.to_wscale;
 		}
 		/*
 		 * Initial send window.  It will be updated with
 		 * the next incoming segment to the scaled value.
 		 */
 		tp->snd_wnd = th->th_win;
 		if (to.to_flags & TOF_TS) {
 			tp->t_flags |= TF_RCVD_TSTMP;
 			tp->ts_recent = to.to_tsval;
 			tp->ts_recent_age = tcp_ts_getticks();
 		}
 		if (to.to_flags & TOF_MSS)
 			tcp_mss(tp, to.to_mss);
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    (to.to_flags & TOF_SACKPERM) == 0)
 			tp->t_flags &= ~TF_SACK_PERMIT;
 	}
 	/*
 	 * Header prediction: check for the two common cases
 	 * of a uni-directional data xfer.  If the packet has
 	 * no control flags, is in-sequence, the window didn't
 	 * change and we're not retransmitting, it's a
 	 * candidate.  If the length is zero and the ack moved
 	 * forward, we're the sender side of the xfer.  Just
 	 * free the data acked & wake any higher level process
 	 * that was blocked waiting for space.  If the length
 	 * is non-zero and the ack didn't move, we're the
 	 * receiver side.  If we're getting packets in-order
 	 * (the reassembly queue is empty), add the data to
 	 * the socket buffer and note that we need a delayed ack.
 	 * Make sure that the hidden state-flags are also off.
 	 * Since we check for TCPS_ESTABLISHED first, it can only
 	 * be TH_NEEDSYN.
 	 */
 	if (__predict_true(tp->t_state == TCPS_ESTABLISHED) &&
 	    __predict_true(((to.to_flags & TOF_SACK) == 0)) &&
 	    __predict_true(tlen == 0) &&
 	    __predict_true((thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK) &&
 	    __predict_true(LIST_EMPTY(&tp->t_segq)) &&
 	    __predict_true(th->th_seq == tp->rcv_nxt)) {
 		    if (tcp_fastack(m, th, so, tp, &to, drop_hdrlen, tlen, 
 				    ti_locked, tiwin)) {
 			    return;
 		    }
 	} 
 	tcp_do_slowpath(m, th, so, tp, &to, drop_hdrlen, tlen,
 			ti_locked, tiwin, thflags);
 }
 
 struct tcp_function_block __tcp_fastslow = {
 	"fastslow",
 	tcp_output,
 	tcp_do_segment_fastslow,
 	tcp_default_ctloutput,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	0,
 	0
 
 };
 
 struct tcp_function_block __tcp_fastack = {
 	"fastack",
 	tcp_output,
 	tcp_do_segment_fastack,
 	tcp_default_ctloutput,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	0,
 	0
 };
 
 static int
 tcp_addfastpaths(module_t mod, int type, void *data)
 {
 	int err=0;
 
 	switch (type) {
 	case MOD_LOAD:
 		err = register_tcp_functions(&__tcp_fastack, M_WAITOK);
 		if (err) {
 			printf("Failed to register fastack module -- err:%d\n", err);
 			return(err);
 		}
 		err = register_tcp_functions(&__tcp_fastslow, M_WAITOK); 
 		if (err) {
 			printf("Failed to register fastslow module -- err:%d\n", err);
 			deregister_tcp_functions(&__tcp_fastack);
 			return(err);
 		}
 		break;
 	case MOD_QUIESCE:
 		if ((__tcp_fastslow.tfb_refcnt) ||( __tcp_fastack.tfb_refcnt)) {
 			return(EBUSY);
 		}
 		break;
 	case MOD_UNLOAD:
 		err = deregister_tcp_functions(&__tcp_fastack);
 		if (err == EBUSY)
 			break;
 		err = deregister_tcp_functions(&__tcp_fastslow);
 		if (err == EBUSY)
 			break;
 		err = 0;
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (err);
 }
 
 static moduledata_t new_tcp_fastpaths = {
 	.name = "tcp_fastpaths",
 	.evhand = tcp_addfastpaths,
 	.priv = 0
 };
 
 MODULE_VERSION(kern_tcpfastpaths, 1);
 DECLARE_MODULE(kern_tcpfastpaths, new_tcp_fastpaths, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);