Index: projects/hps_head/sys/dev/nand/nandsim_chip.c
===================================================================
--- projects/hps_head/sys/dev/nand/nandsim_chip.c	(revision 309217)
+++ projects/hps_head/sys/dev/nand/nandsim_chip.c	(revision 309218)
@@ -1,899 +1,898 @@
 /*-
  * Copyright (C) 2009-2012 Semihalf
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/kthread.h>
 #include <sys/unistd.h>
 
 #include <dev/nand/nand.h>
 #include <dev/nand/nandsim_chip.h>
 #include <dev/nand/nandsim_log.h>
 #include <dev/nand/nandsim_swap.h>
 
 MALLOC_DEFINE(M_NANDSIM, "NANDsim", "NANDsim dynamic data");
 
 #define NANDSIM_CHIP_LOCK(chip)		mtx_lock(&(chip)->ns_lock)
 #define	NANDSIM_CHIP_UNLOCK(chip)	mtx_unlock(&(chip)->ns_lock)
 
 static nandsim_evh_t erase_evh;
 static nandsim_evh_t idle_evh;
 static nandsim_evh_t poweron_evh;
 static nandsim_evh_t reset_evh;
 static nandsim_evh_t read_evh;
 static nandsim_evh_t readid_evh;
 static nandsim_evh_t readparam_evh;
 static nandsim_evh_t write_evh;
 
 static void nandsim_loop(void *);
 static void nandsim_undefined(struct nandsim_chip *, uint8_t);
 static void nandsim_bad_address(struct nandsim_chip *, uint8_t *);
 static void nandsim_ignore_address(struct nandsim_chip *, uint8_t);
 static void nandsim_sm_error(struct nandsim_chip *);
 static void nandsim_start_handler(struct nandsim_chip *, nandsim_evh_t);
 
 static void nandsim_callout_eh(void *);
 static int  nandsim_delay(struct nandsim_chip *, int);
 
 static int  nandsim_bbm_init(struct nandsim_chip *, uint32_t, uint32_t *);
 static int  nandsim_blk_state_init(struct nandsim_chip *, uint32_t, uint32_t);
 static void nandsim_blk_state_destroy(struct nandsim_chip *);
 static int  nandchip_is_block_valid(struct nandsim_chip *, int);
 
 static void nandchip_set_status(struct nandsim_chip *, uint8_t);
 static void nandchip_clear_status(struct nandsim_chip *, uint8_t);
 
 struct proc *nandsim_proc;
 
 struct nandsim_chip *
 nandsim_chip_init(struct nandsim_softc* sc, uint8_t chip_num,
     struct sim_chip *sim_chip)
 {
 	struct nandsim_chip *chip;
 	struct onfi_params *chip_param;
 	char swapfile[20];
 	uint32_t size;
 	int error;
 
 	chip = malloc(sizeof(*chip), M_NANDSIM, M_WAITOK | M_ZERO);
 
 	mtx_init(&chip->ns_lock, "nandsim lock", NULL, MTX_DEF);
 	callout_init(&chip->ns_callout, 1);
 	STAILQ_INIT(&chip->nandsim_events);
 
 	chip->chip_num = chip_num;
 	chip->ctrl_num = sim_chip->ctrl_num;
 	chip->sc = sc;
 
 	if (!sim_chip->is_wp)
 		nandchip_set_status(chip, NAND_STATUS_WP);
 
 	chip_param = &chip->params;
 
 	chip->id.dev_id = sim_chip->device_id;
 	chip->id.man_id = sim_chip->manufact_id;
 
 	chip->error_ratio = sim_chip->error_ratio;
 	chip->wear_level = sim_chip->wear_level;
 	chip->prog_delay = sim_chip->prog_time;
 	chip->erase_delay = sim_chip->erase_time;
 	chip->read_delay = sim_chip->read_time;
 
 	chip_param->t_prog = sim_chip->prog_time;
 	chip_param->t_bers = sim_chip->erase_time;
 	chip_param->t_r = sim_chip->read_time;
 	bcopy("onfi", &chip_param->signature, 4);
 
 	chip_param->manufacturer_id = sim_chip->manufact_id;
 	strncpy(chip_param->manufacturer_name, sim_chip->manufacturer, 12);
 	chip_param->manufacturer_name[11] = 0;
 	strncpy(chip_param->device_model, sim_chip->device_model, 20);
 	chip_param->device_model[19] = 0;
 
 	chip_param->bytes_per_page = sim_chip->page_size;
 	chip_param->spare_bytes_per_page = sim_chip->oob_size;
 	chip_param->pages_per_block = sim_chip->pgs_per_blk;
 	chip_param->blocks_per_lun = sim_chip->blks_per_lun;
 	chip_param->luns = sim_chip->luns;
 
 	init_chip_geom(&chip->cg, chip_param->luns, chip_param->blocks_per_lun,
 	    chip_param->pages_per_block, chip_param->bytes_per_page,
 	    chip_param->spare_bytes_per_page);
 
 	chip_param->address_cycles = sim_chip->row_addr_cycles |
 	    (sim_chip->col_addr_cycles << 4);
 	chip_param->features = sim_chip->features;
 	if (sim_chip->width == 16)
 		chip_param->features |= ONFI_FEAT_16BIT;
 
 	size = chip_param->blocks_per_lun * chip_param->luns;
 
 	error = nandsim_blk_state_init(chip, size, sim_chip->wear_level);
 	if (error) {
 		mtx_destroy(&chip->ns_lock);
 		free(chip, M_NANDSIM);
 		return (NULL);
 	}
 
 	error = nandsim_bbm_init(chip, size, sim_chip->bad_block_map);
 	if (error) {
 		mtx_destroy(&chip->ns_lock);
 		nandsim_blk_state_destroy(chip);
 		free(chip, M_NANDSIM);
 		return (NULL);
 	}
 
 	nandsim_start_handler(chip, poweron_evh);
 
 	nand_debug(NDBG_SIM,"Create thread for chip%d [%8p]", chip->chip_num,
 	    chip);
 	/* Create chip thread */
 	error = kproc_kthread_add(nandsim_loop, chip, &nandsim_proc,
 	    &chip->nandsim_td, RFSTOPPED | RFHIGHPID,
 	    0, "nandsim", "chip");
 	if (error) {
 		mtx_destroy(&chip->ns_lock);
 		nandsim_blk_state_destroy(chip);
 		free(chip, M_NANDSIM);
 		return (NULL);
 	}
 
 	thread_lock(chip->nandsim_td);
 	sched_class(chip->nandsim_td, PRI_REALTIME);
 	sched_add(chip->nandsim_td, SRQ_BORING);
 	thread_unlock(chip->nandsim_td);
 
 	size = (chip_param->bytes_per_page +
 	    chip_param->spare_bytes_per_page) *
 	    chip_param->pages_per_block;
 
 	sprintf(swapfile, "chip%d%d.swp", chip->ctrl_num, chip->chip_num);
 	chip->swap = nandsim_swap_init(swapfile, chip_param->blocks_per_lun *
 	    chip_param->luns, size);
 	if (!chip->swap)
 		nandsim_chip_destroy(chip);
 
 	/* Wait for new thread to enter main loop */
 	tsleep(chip->nandsim_td, PWAIT, "ns_chip", 1 * hz);
 
 	return (chip);
 }
 
 static int
 nandsim_blk_state_init(struct nandsim_chip *chip, uint32_t size,
     uint32_t wear_lev)
 {
 	int i;
 
 	if (!chip || size == 0)
 		return (-1);
 
 	chip->blk_state = malloc(size * sizeof(struct nandsim_block_state),
 	    M_NANDSIM, M_WAITOK | M_ZERO);
 
 	for (i = 0; i < size; i++) {
 		if (wear_lev)
 			chip->blk_state[i].wear_lev = wear_lev;
 		else
 			chip->blk_state[i].wear_lev = -1;
 	}
 
 	return (0);
 }
 
 static void
 nandsim_blk_state_destroy(struct nandsim_chip *chip)
 {
 
 	if (chip && chip->blk_state)
 		free(chip->blk_state, M_NANDSIM);
 }
 
 static int
 nandsim_bbm_init(struct nandsim_chip *chip, uint32_t size,
     uint32_t *sim_bbm)
 {
 	uint32_t index;
 	int i;
 
 	if ((chip == NULL) || (size == 0))
 		return (-1);
 
 	if (chip->blk_state == NULL)
 		return (-1);
 
 	if (sim_bbm == NULL)
 		return (0);
 
 	for (i = 0; i < MAX_BAD_BLOCKS; i++) {
 		index = sim_bbm[i];
 
 		if (index == 0xffffffff)
 			break;
 		else if (index > size)
 			return (-1);
 		else
 			chip->blk_state[index].is_bad = 1;
 	}
 
 	return (0);
 }
 
 void
 nandsim_chip_destroy(struct nandsim_chip *chip)
 {
 	struct nandsim_ev *ev;
 
 	ev = create_event(chip, NANDSIM_EV_EXIT, 0);
 	if (ev)
 		send_event(ev);
 }
 
 void
 nandsim_chip_freeze(struct nandsim_chip *chip)
 {
 
 	chip->flags |= NANDSIM_CHIP_FROZEN;
 }
 
 static void
 nandsim_loop(void *arg)
 {
 	struct nandsim_chip *chip = (struct nandsim_chip *)arg;
 	struct nandsim_ev *ev;
 
 	nand_debug(NDBG_SIM,"Start main loop for chip%d [%8p]", chip->chip_num,
 	    chip);
 	for(;;) {
 		NANDSIM_CHIP_LOCK(chip);
 		if (!(chip->flags & NANDSIM_CHIP_ACTIVE)) {
 			chip->flags |= NANDSIM_CHIP_ACTIVE;
 			wakeup(chip->nandsim_td);
 		}
 
 		if (STAILQ_EMPTY(&chip->nandsim_events)) {
 			nand_debug(NDBG_SIM,"Chip%d [%8p] going sleep",
 			    chip->chip_num, chip);
 			msleep(chip, &chip->ns_lock, PRIBIO, "nandev", 0);
 		}
 
 		ev = STAILQ_FIRST(&chip->nandsim_events);
 		STAILQ_REMOVE_HEAD(&chip->nandsim_events, links);
 		NANDSIM_CHIP_UNLOCK(chip);
 		if (ev->type == NANDSIM_EV_EXIT) {
 			NANDSIM_CHIP_LOCK(chip);
 			destroy_event(ev);
 			wakeup(ev);
 			while (!STAILQ_EMPTY(&chip->nandsim_events)) {
 				ev = STAILQ_FIRST(&chip->nandsim_events);
 				STAILQ_REMOVE_HEAD(&chip->nandsim_events,
 				    links);
 				destroy_event(ev);
 				wakeup(ev);
 			}
 			NANDSIM_CHIP_UNLOCK(chip);
 			nandsim_log(chip, NANDSIM_LOG_SM, "destroyed\n");
 			mtx_destroy(&chip->ns_lock);
 			nandsim_blk_state_destroy(chip);
 			nandsim_swap_destroy(chip->swap);
 			free(chip, M_NANDSIM);
 			nandsim_proc = NULL;
 
 			kthread_exit();
 		}
 
 		if (!(chip->flags & NANDSIM_CHIP_FROZEN)) {
 			nand_debug(NDBG_SIM,"Chip [%x] get event [%x]",
 			    chip->chip_num, ev->type);
 			chip->ev_handler(chip, ev->type, ev->data);
 		}
 
 		wakeup(ev);
 		destroy_event(ev);
 	}
 
 }
 
 struct nandsim_ev *
 create_event(struct nandsim_chip *chip, uint8_t type, uint8_t data_size)
 {
 	struct nandsim_ev *ev;
 
 	ev = malloc(sizeof(*ev), M_NANDSIM, M_NOWAIT | M_ZERO);
 	if (!ev) {
 		nand_debug(NDBG_SIM,"Cannot create event");
 		return (NULL);
 	}
 
 	if (data_size > 0)
 		ev->data = malloc(sizeof(*ev), M_NANDSIM, M_NOWAIT | M_ZERO);
 	ev->type = type;
 	ev->chip = chip;
 
 	return (ev);
 }
 
 void
 destroy_event(struct nandsim_ev *ev)
 {
 
 	if (ev->data)
 		free(ev->data, M_NANDSIM);
 	free(ev, M_NANDSIM);
 }
 
 int
 send_event(struct nandsim_ev *ev)
 {
 	struct nandsim_chip *chip = ev->chip;
 
 	if (!(chip->flags & NANDSIM_CHIP_FROZEN)) {
 		nand_debug(NDBG_SIM,"Chip%d [%p] send event %x",
 		    chip->chip_num, chip, ev->type);
 
 		NANDSIM_CHIP_LOCK(chip);
 		STAILQ_INSERT_TAIL(&chip->nandsim_events, ev, links);
 		NANDSIM_CHIP_UNLOCK(chip);
 
 		wakeup(chip);
 		if ((ev->type != NANDSIM_EV_TIMEOUT) && chip->nandsim_td &&
 		    (curthread != chip->nandsim_td))
 			tsleep(ev, PWAIT, "ns_ev", 5 * hz);
 	}
 
 	return (0);
 }
 
 static void
 nandsim_callout_eh(void *arg)
 {
 	struct nandsim_ev *ev = (struct nandsim_ev *)arg;
 
 	send_event(ev);
 }
 
 static int
 nandsim_delay(struct nandsim_chip *chip, int timeout)
 {
 	struct nandsim_ev *ev;
 	struct timeval delay;
 	int tm;
 
 	nand_debug(NDBG_SIM,"Chip[%d] Set delay: %d", chip->chip_num, timeout);
 
 	ev = create_event(chip, NANDSIM_EV_TIMEOUT, 0);
 	if (!ev)
 		return (-1);
 
 	chip->sm_state = NANDSIM_STATE_TIMEOUT;
 	tm = (timeout/10000) * (hz / 100);
-	if (callout_reset(&chip->ns_callout, tm, nandsim_callout_eh, ev) &
-	    CALLOUT_RET_CANCELLED) {
+	if (callout_reset(&chip->ns_callout, tm, nandsim_callout_eh, ev).bit.cancelled) {
 		/* XXX we are leaking the old event */
 		return (-1);
 	}
 
 	delay.tv_sec = chip->read_delay / 1000000;
 	delay.tv_usec = chip->read_delay % 1000000;
 	timevaladd(&chip->delay_tv, &delay);
 
 	return (0);
 }
 
 static void
 nandsim_start_handler(struct nandsim_chip *chip, nandsim_evh_t evh)
 {
 	struct nandsim_ev *ev;
 
 	chip->ev_handler = evh;
 
 	nand_debug(NDBG_SIM,"Start handler %p for chip%d [%p]", evh,
 	    chip->chip_num, chip);
 	ev = create_event(chip, NANDSIM_EV_START, 0);
 	if (!ev)
 		nandsim_sm_error(chip);
 
 	send_event(ev);
 }
 
 static void
 nandchip_set_data(struct nandsim_chip *chip, uint8_t *data, uint32_t len,
     uint32_t idx)
 {
 
 	nand_debug(NDBG_SIM,"Chip [%x] data %p [%x] at %x", chip->chip_num,
 	    data, len, idx);
 	chip->data.data_ptr = data;
 	chip->data.size = len;
 	chip->data.index = idx;
 }
 
 static int
 nandchip_chip_space(struct nandsim_chip *chip, int32_t row, int32_t column,
     size_t size, uint8_t writing)
 {
 	struct block_space *blk_space;
 	uint32_t lun, block, page, offset, block_size;
 	int err;
 
 	block_size = chip->cg.block_size +
 	    (chip->cg.oob_size * chip->cg.pgs_per_blk);
 
 	err = nand_row_to_blkpg(&chip->cg, row, &lun, &block, &page);
 	if (err) {
 		nand_debug(NDBG_SIM,"cannot get address\n");
 		return (-1);
 	}
 
 	if (!nandchip_is_block_valid(chip, block)) {
 		nandchip_set_data(chip, NULL, 0, 0);
 		return (-1);
 	}
 
 	blk_space = get_bs(chip->swap, block, writing);
 	if (!blk_space) {
 		nandchip_set_data(chip, NULL, 0, 0);
 		return (-1);
 	}
 
 	if (size > block_size)
 		size = block_size;
 
 	if (size == block_size) {
 		offset = 0;
 		column = 0;
 	} else
 		offset = page * (chip->cg.page_size + chip->cg.oob_size);
 
 	nandchip_set_data(chip, &blk_space->blk_ptr[offset], size, column);
 
 	return (0);
 }
 
 static int
 nandchip_get_addr_byte(struct nandsim_chip *chip, void *data, uint32_t *value)
 {
 	int ncycles = 0;
 	uint8_t byte;
 	uint8_t *buffer;
 
 	buffer = (uint8_t *)value;
 	byte = *((uint8_t *)data);
 
 	KASSERT((chip->sm_state == NANDSIM_STATE_WAIT_ADDR_ROW ||
 	    chip->sm_state == NANDSIM_STATE_WAIT_ADDR_COL),
 	    ("unexpected state"));
 
 	if (chip->sm_state == NANDSIM_STATE_WAIT_ADDR_ROW) {
 		ncycles = chip->params.address_cycles & 0xf;
 		buffer[chip->sm_addr_cycle++] = byte;
 	} else if (chip->sm_state == NANDSIM_STATE_WAIT_ADDR_COL) {
 		ncycles = (chip->params.address_cycles >> 4) & 0xf;
 		buffer[chip->sm_addr_cycle++] = byte;
 	}
 
 	nand_debug(NDBG_SIM, "Chip [%x] read addr byte: %02x (%d of %d)\n",
 	    chip->chip_num, byte, chip->sm_addr_cycle, ncycles);
 
 	if (chip->sm_addr_cycle == ncycles) {
 		chip->sm_addr_cycle = 0;
 		return (0);
 	}
 
 	return (1);
 }
 
 static int
 nandchip_is_block_valid(struct nandsim_chip *chip, int block_num)
 {
 
 	if (!chip || !chip->blk_state)
 		return (0);
 
 	if (chip->blk_state[block_num].wear_lev == 0 ||
 	    chip->blk_state[block_num].is_bad)
 		return (0);
 
 	return (1);
 }
 
 static void
 nandchip_set_status(struct nandsim_chip *chip, uint8_t flags)
 {
 
 	chip->chip_status |= flags;
 }
 
 static void
 nandchip_clear_status(struct nandsim_chip *chip, uint8_t flags)
 {
 
 	chip->chip_status &= ~flags;
 }
 
 uint8_t
 nandchip_get_status(struct nandsim_chip *chip)
 {
 	return (chip->chip_status);
 }
 
 void
 nandsim_chip_timeout(struct nandsim_chip *chip)
 {
 	struct timeval tv;
 
 	getmicrotime(&tv);
 
 	if (chip->sm_state == NANDSIM_STATE_TIMEOUT &&
 	    timevalcmp(&tv, &chip->delay_tv, >=)) {
 		nandchip_set_status(chip, NAND_STATUS_RDY);
 	}
 }
 void
 poweron_evh(struct nandsim_chip *chip, uint32_t type, void *data)
 {
 	uint8_t cmd;
 
 	if (type == NANDSIM_EV_START)
 		chip->sm_state = NANDSIM_STATE_IDLE;
 	else if (type == NANDSIM_EV_CMD) {
 		cmd = *(uint8_t *)data;
 		switch(cmd) {
 		case NAND_CMD_RESET:
 			nandsim_log(chip, NANDSIM_LOG_SM, "in RESET state\n");
 			nandsim_start_handler(chip, reset_evh);
 			break;
 		default:
 			nandsim_undefined(chip, type);
 			break;
 		}
 	} else
 		nandsim_undefined(chip, type);
 }
 
 void
 idle_evh(struct nandsim_chip *chip, uint32_t type, void *data)
 {
 	uint8_t cmd;
 
 	if (type == NANDSIM_EV_START) {
 		nandsim_log(chip, NANDSIM_LOG_SM, "in IDLE state\n");
 		chip->sm_state = NANDSIM_STATE_WAIT_CMD;
 	} else if (type == NANDSIM_EV_CMD) {
 		nandchip_clear_status(chip, NAND_STATUS_FAIL);
 		getmicrotime(&chip->delay_tv);
 		cmd = *(uint8_t *)data;
 		switch(cmd) {
 		case NAND_CMD_READ_ID:
 			nandsim_start_handler(chip, readid_evh);
 			break;
 		case NAND_CMD_READ_PARAMETER:
 			nandsim_start_handler(chip, readparam_evh);
 			break;
 		case NAND_CMD_READ:
 			nandsim_start_handler(chip, read_evh);
 			break;
 		case NAND_CMD_PROG:
 			nandsim_start_handler(chip, write_evh);
 			break;
 		case NAND_CMD_ERASE:
 			nandsim_start_handler(chip, erase_evh);
 			break;
 		default:
 			nandsim_undefined(chip, type);
 			break;
 		}
 	} else
 		nandsim_undefined(chip, type);
 }
 
 void
 readid_evh(struct nandsim_chip *chip, uint32_t type, void *data)
 {
 	struct onfi_params *params;
 	uint8_t addr;
 
 	params = &chip->params;
 
 	if (type == NANDSIM_EV_START) {
 		nandsim_log(chip, NANDSIM_LOG_SM, "in READID state\n");
 		chip->sm_state = NANDSIM_STATE_WAIT_ADDR_BYTE;
 	} else if (type == NANDSIM_EV_ADDR) {
 
 		addr = *((uint8_t *)data);
 
 		if (addr == 0x0)
 			nandchip_set_data(chip, (uint8_t *)&chip->id, 2, 0);
 		else if (addr == ONFI_SIG_ADDR)
 			nandchip_set_data(chip, (uint8_t *)&params->signature,
 			    4, 0);
 		else
 			nandsim_bad_address(chip, &addr);
 
 		nandsim_start_handler(chip, idle_evh);
 	} else
 		nandsim_undefined(chip, type);
 }
 
 void
 readparam_evh(struct nandsim_chip *chip, uint32_t type, void *data)
 {
 	struct onfi_params *params;
 	uint8_t addr;
 
 	params = &chip->params;
 
 	if (type == NANDSIM_EV_START) {
 		nandsim_log(chip, NANDSIM_LOG_SM, "in READPARAM state\n");
 		chip->sm_state = NANDSIM_STATE_WAIT_ADDR_BYTE;
 	} else if (type == NANDSIM_EV_ADDR) {
 		addr = *((uint8_t *)data);
 
 		if (addr == 0) {
 			nandchip_set_data(chip, (uint8_t *)params,
 			    sizeof(*params), 0);
 		} else
 			nandsim_bad_address(chip, &addr);
 
 		nandsim_start_handler(chip, idle_evh);
 	} else
 		nandsim_undefined(chip, type);
 }
 
 void
 read_evh(struct nandsim_chip *chip, uint32_t type, void *data)
 {
 	static uint32_t column = 0, row = 0;
 	uint32_t size;
 	uint8_t cmd;
 
 	size = chip->cg.page_size + chip->cg.oob_size;
 
 	switch (type) {
 	case NANDSIM_EV_START:
 		nandsim_log(chip, NANDSIM_LOG_SM, "in READ state\n");
 		chip->sm_state = NANDSIM_STATE_WAIT_ADDR_COL;
 		break;
 	case NANDSIM_EV_ADDR:
 		if (chip->sm_state == NANDSIM_STATE_WAIT_ADDR_COL) {
 			if (nandchip_get_addr_byte(chip, data, &column))
 				break;
 
 			chip->sm_state = NANDSIM_STATE_WAIT_ADDR_ROW;
 		} else if (chip->sm_state == NANDSIM_STATE_WAIT_ADDR_ROW) {
 			if (nandchip_get_addr_byte(chip, data, &row))
 				break;
 
 			chip->sm_state = NANDSIM_STATE_WAIT_CMD;
 		} else
 			nandsim_ignore_address(chip, *((uint8_t *)data));
 		break;
 	case NANDSIM_EV_CMD:
 		cmd = *(uint8_t *)data;
 		if (chip->sm_state == NANDSIM_STATE_WAIT_CMD &&
 		    cmd == NAND_CMD_READ_END) {
 			if (chip->read_delay != 0 &&
 			    nandsim_delay(chip, chip->read_delay) == 0)
 				nandchip_clear_status(chip, NAND_STATUS_RDY);
 			else {
 				nandchip_chip_space(chip, row, column, size, 0);
 				nandchip_set_status(chip, NAND_STATUS_RDY);
 				nandsim_start_handler(chip, idle_evh);
 			}
 		} else
 			nandsim_undefined(chip, type);
 		break;
 	case NANDSIM_EV_TIMEOUT:
 		if (chip->sm_state == NANDSIM_STATE_TIMEOUT) {
 			nandchip_chip_space(chip, row, column, size, 0);
 			nandchip_set_status(chip, NAND_STATUS_RDY);
 			nandsim_start_handler(chip, idle_evh);
 		} else
 			nandsim_undefined(chip, type);
 		break;
 	}
 }
 void
 write_evh(struct nandsim_chip *chip, uint32_t type, void *data)
 {
 	static uint32_t column, row;
 	uint32_t size;
 	uint8_t cmd;
 	int err;
 
 	size = chip->cg.page_size + chip->cg.oob_size;
 
 	switch(type) {
 	case NANDSIM_EV_START:
 		nandsim_log(chip, NANDSIM_LOG_SM, "in WRITE state\n");
 		chip->sm_state = NANDSIM_STATE_WAIT_ADDR_COL;
 		break;
 	case NANDSIM_EV_ADDR:
 		if (chip->sm_state == NANDSIM_STATE_WAIT_ADDR_COL) {
 			if (nandchip_get_addr_byte(chip, data, &column))
 				break;
 
 			chip->sm_state = NANDSIM_STATE_WAIT_ADDR_ROW;
 		} else if (chip->sm_state == NANDSIM_STATE_WAIT_ADDR_ROW) {
 			if (nandchip_get_addr_byte(chip, data, &row))
 				break;
 
 			err = nandchip_chip_space(chip, row, column, size, 1);
 			if (err == -1)
 				nandchip_set_status(chip, NAND_STATUS_FAIL);
 
 			chip->sm_state = NANDSIM_STATE_WAIT_CMD;
 		} else
 			nandsim_ignore_address(chip, *((uint8_t *)data));
 		break;
 	case NANDSIM_EV_CMD:
 		cmd = *(uint8_t *)data;
 		if (chip->sm_state == NANDSIM_STATE_WAIT_CMD &&
 		    cmd == NAND_CMD_PROG_END) {
 			if (chip->prog_delay != 0 &&
 			    nandsim_delay(chip, chip->prog_delay) == 0)
 				nandchip_clear_status(chip, NAND_STATUS_RDY);
 			else {
 				nandchip_set_status(chip, NAND_STATUS_RDY);
 				nandsim_start_handler(chip, idle_evh);
 			}
 		} else
 			nandsim_undefined(chip, type);
 		break;
 	case NANDSIM_EV_TIMEOUT:
 		if (chip->sm_state == NANDSIM_STATE_TIMEOUT) {
 			nandsim_start_handler(chip, idle_evh);
 			nandchip_set_status(chip, NAND_STATUS_RDY);
 		} else
 			nandsim_undefined(chip, type);
 		break;
 	}
 }
 
 void
 erase_evh(struct nandsim_chip *chip, uint32_t type, void *data)
 {
 	static uint32_t row, block_size;
 	uint32_t lun, block, page;
 	int err;
 	uint8_t cmd;
 
 	block_size = chip->cg.block_size +
 	    (chip->cg.oob_size * chip->cg.pgs_per_blk);
 
 	switch (type) {
 	case NANDSIM_EV_START:
 		nandsim_log(chip, NANDSIM_LOG_SM, "in ERASE state\n");
 		chip->sm_state = NANDSIM_STATE_WAIT_ADDR_ROW;
 		break;
 	case NANDSIM_EV_CMD:
 		cmd = *(uint8_t *)data;
 		if (chip->sm_state == NANDSIM_STATE_WAIT_CMD &&
 		    cmd == NAND_CMD_ERASE_END) {
 			if (chip->data.data_ptr != NULL &&
 			    chip->data.size == block_size)
 				memset(chip->data.data_ptr, 0xff, block_size);
 			else
 				nand_debug(NDBG_SIM,"Bad block erase data\n");
 
 			err = nand_row_to_blkpg(&chip->cg, row, &lun,
 			    &block, &page);
 			if (!err) {
 				if (chip->blk_state[block].wear_lev > 0)
 					chip->blk_state[block].wear_lev--;
 			}
 
 			if (chip->erase_delay != 0 &&
 			    nandsim_delay(chip, chip->erase_delay) == 0)
 				nandchip_clear_status(chip, NAND_STATUS_RDY);
 			else {
 				nandchip_set_status(chip, NAND_STATUS_RDY);
 				nandsim_start_handler(chip, idle_evh);
 			}
 		} else
 			nandsim_undefined(chip, type);
 		break;
 	case NANDSIM_EV_ADDR:
 		if (chip->sm_state == NANDSIM_STATE_WAIT_ADDR_ROW) {
 			if (nandchip_get_addr_byte(chip, data, &row))
 				break;
 
 			err = nandchip_chip_space(chip, row, 0, block_size, 1);
 			if (err == -1) {
 				nandchip_set_status(chip, NAND_STATUS_FAIL);
 			}
 			chip->sm_state = NANDSIM_STATE_WAIT_CMD;
 		} else
 			nandsim_ignore_address(chip, *((uint8_t *)data));
 		break;
 	case NANDSIM_EV_TIMEOUT:
 		if (chip->sm_state == NANDSIM_STATE_TIMEOUT) {
 			nandchip_set_status(chip, NAND_STATUS_RDY);
 			nandsim_start_handler(chip, idle_evh);
 		} else
 			nandsim_undefined(chip, type);
 		break;
 	}
 }
 
 void
 reset_evh(struct nandsim_chip *chip, uint32_t type, void *data)
 {
 
 	if (type == NANDSIM_EV_START) {
 		nandsim_log(chip, NANDSIM_LOG_SM, "in RESET state\n");
 		chip->sm_state = NANDSIM_STATE_TIMEOUT;
 		nandchip_set_data(chip, NULL, 0, 0);
 		DELAY(500);
 		nandsim_start_handler(chip, idle_evh);
 	} else
 		nandsim_undefined(chip, type);
 }
 
 static void
 nandsim_undefined(struct nandsim_chip *chip, uint8_t type)
 {
 
 	nandsim_log(chip, NANDSIM_LOG_ERR,
 	    "ERR: Chip received ev %x in state %x\n",
 	    type, chip->sm_state);
 	nandsim_start_handler(chip, idle_evh);
 }
 
 static void
 nandsim_bad_address(struct nandsim_chip *chip, uint8_t *addr)
 {
 
 	nandsim_log(chip, NANDSIM_LOG_ERR,
 	    "ERR: Chip received out of range address"
 	    "%02x%02x - %02x%02x%02x\n", addr[0], addr[1], addr[2],
 	    addr[3], addr[4]);
 }
 
 static void
 nandsim_ignore_address(struct nandsim_chip *chip, uint8_t byte)
 {
 	nandsim_log(chip, NANDSIM_LOG_SM, "ignored address byte: %d\n", byte);
 }
 
 static void
 nandsim_sm_error(struct nandsim_chip *chip)
 {
 
 	nandsim_log(chip, NANDSIM_LOG_ERR, "ERR: State machine error."
 	    "Restart required.\n");
 }
Index: projects/hps_head/sys/dev/oce/oce_if.c
===================================================================
--- projects/hps_head/sys/dev/oce/oce_if.c	(revision 309217)
+++ projects/hps_head/sys/dev/oce/oce_if.c	(revision 309218)
@@ -1,2993 +1,2993 @@
 /*-
  * Copyright (C) 2013 Emulex
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * 1. Redistributions of source code must retain the above copyright notice,
  *    this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * 3. Neither the name of the Emulex Corporation nor the names of its
  *    contributors may be used to endorse or promote products derived from
  *    this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * Contact Information:
  * freebsd-drivers@emulex.com
  *
  * Emulex
  * 3333 Susan Street
  * Costa Mesa, CA 92626
  */
 
 /* $FreeBSD$ */
 
 #include "opt_inet6.h"
 #include "opt_inet.h"
 
 #include "oce_if.h"
 #include "oce_user.h"
 
 #define is_tso_pkt(m) (m->m_pkthdr.csum_flags & CSUM_TSO)
 
 /* UE Status Low CSR */
 static char *ue_status_low_desc[] = {
         "CEV",
         "CTX",
         "DBUF",
         "ERX",
         "Host",
         "MPU",
         "NDMA",
         "PTC ",
         "RDMA ",
         "RXF ",
         "RXIPS ",
         "RXULP0 ",
         "RXULP1 ",
         "RXULP2 ",
         "TIM ",
         "TPOST ",
         "TPRE ",
         "TXIPS ",
         "TXULP0 ",
         "TXULP1 ",
         "UC ",
         "WDMA ",
         "TXULP2 ",
         "HOST1 ",
         "P0_OB_LINK ",
         "P1_OB_LINK ",
         "HOST_GPIO ",
         "MBOX ",
         "AXGMAC0",
         "AXGMAC1",
         "JTAG",
         "MPU_INTPEND"
 };
 
 /* UE Status High CSR */
 static char *ue_status_hi_desc[] = {
         "LPCMEMHOST",
         "MGMT_MAC",
         "PCS0ONLINE",
         "MPU_IRAM",
         "PCS1ONLINE",
         "PCTL0",
         "PCTL1",
         "PMEM",
         "RR",
         "TXPB",
         "RXPP",
         "XAUI",
         "TXP",
         "ARM",
         "IPC",
         "HOST2",
         "HOST3",
         "HOST4",
         "HOST5",
         "HOST6",
         "HOST7",
         "HOST8",
         "HOST9",
         "NETC",
         "Unknown",
         "Unknown",
         "Unknown",
         "Unknown",
         "Unknown",
         "Unknown",
         "Unknown",
         "Unknown"
 };
 
 struct oce_common_cqe_info{
         uint8_t vtp:1;
         uint8_t l4_cksum_pass:1;
         uint8_t ip_cksum_pass:1;
         uint8_t ipv6_frame:1;
         uint8_t qnq:1;
         uint8_t rsvd:3;
         uint8_t num_frags;
         uint16_t pkt_size;
         uint16_t vtag;
 };
 
 
 /* Driver entry points prototypes */
 static int  oce_probe(device_t dev);
 static int  oce_attach(device_t dev);
 static int  oce_detach(device_t dev);
 static int  oce_shutdown(device_t dev);
 static int  oce_ioctl(struct ifnet *ifp, u_long command, caddr_t data);
 static void oce_init(void *xsc);
 static int  oce_multiq_start(struct ifnet *ifp, struct mbuf *m);
 static void oce_multiq_flush(struct ifnet *ifp);
 
 /* Driver interrupt routines protypes */
 static void oce_intr(void *arg, int pending);
 static int  oce_setup_intr(POCE_SOFTC sc);
 static int  oce_fast_isr(void *arg);
 static int  oce_alloc_intr(POCE_SOFTC sc, int vector,
 			  void (*isr) (void *arg, int pending));
 
 /* Media callbacks prototypes */
 static void oce_media_status(struct ifnet *ifp, struct ifmediareq *req);
 static int  oce_media_change(struct ifnet *ifp);
 
 /* Transmit routines prototypes */
 static int  oce_tx(POCE_SOFTC sc, struct mbuf **mpp, int wq_index);
 static void oce_tx_restart(POCE_SOFTC sc, struct oce_wq *wq);
 static void oce_process_tx_completion(struct oce_wq *wq);
 static int  oce_multiq_transmit(struct ifnet *ifp, struct mbuf *m,
 				 struct oce_wq *wq);
 
 /* Receive routines prototypes */
 static int  oce_cqe_vtp_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe);
 static int  oce_cqe_portid_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe);
 static void oce_rx(struct oce_rq *rq, struct oce_nic_rx_cqe *cqe);
 static void oce_check_rx_bufs(POCE_SOFTC sc, uint32_t num_cqes, struct oce_rq *rq);
 static uint16_t oce_rq_handler_lro(void *arg);
 static void oce_correct_header(struct mbuf *m, struct nic_hwlro_cqe_part1 *cqe1, struct nic_hwlro_cqe_part2 *cqe2);
 static void oce_rx_lro(struct oce_rq *rq, struct nic_hwlro_singleton_cqe *cqe, struct nic_hwlro_cqe_part2 *cqe2);
 static void oce_rx_mbuf_chain(struct oce_rq *rq, struct oce_common_cqe_info *cqe_info, struct mbuf **m);
 
 /* Helper function prototypes in this file */
 static int  oce_attach_ifp(POCE_SOFTC sc);
 static void oce_add_vlan(void *arg, struct ifnet *ifp, uint16_t vtag);
 static void oce_del_vlan(void *arg, struct ifnet *ifp, uint16_t vtag);
 static int  oce_vid_config(POCE_SOFTC sc);
 static void oce_mac_addr_set(POCE_SOFTC sc);
 static int  oce_handle_passthrough(struct ifnet *ifp, caddr_t data);
 static void oce_local_timer(void *arg);
 static void oce_if_deactivate(POCE_SOFTC sc);
 static void oce_if_activate(POCE_SOFTC sc);
 static void setup_max_queues_want(POCE_SOFTC sc);
 static void update_queues_got(POCE_SOFTC sc);
 static void process_link_state(POCE_SOFTC sc,
 		 struct oce_async_cqe_link_state *acqe);
 static int oce_tx_asic_stall_verify(POCE_SOFTC sc, struct mbuf *m);
 static void oce_get_config(POCE_SOFTC sc);
 static struct mbuf *oce_insert_vlan_tag(POCE_SOFTC sc, struct mbuf *m, boolean_t *complete);
 static void oce_read_env_variables(POCE_SOFTC sc);
 
 
 /* IP specific */
 #if defined(INET6) || defined(INET)
 static int  oce_init_lro(POCE_SOFTC sc);
 static struct mbuf * oce_tso_setup(POCE_SOFTC sc, struct mbuf **mpp);
 #endif
 
 static device_method_t oce_dispatch[] = {
 	DEVMETHOD(device_probe, oce_probe),
 	DEVMETHOD(device_attach, oce_attach),
 	DEVMETHOD(device_detach, oce_detach),
 	DEVMETHOD(device_shutdown, oce_shutdown),
 
 	DEVMETHOD_END
 };
 
 static driver_t oce_driver = {
 	"oce",
 	oce_dispatch,
 	sizeof(OCE_SOFTC)
 };
 static devclass_t oce_devclass;
 
 
 DRIVER_MODULE(oce, pci, oce_driver, oce_devclass, 0, 0);
 MODULE_DEPEND(oce, pci, 1, 1, 1);
 MODULE_DEPEND(oce, ether, 1, 1, 1);
 MODULE_VERSION(oce, 1);
 
 
 /* global vars */
 const char component_revision[32] = {"///" COMPONENT_REVISION "///"};
 
 /* Module capabilites and parameters */
 uint32_t oce_max_rsp_handled = OCE_MAX_RSP_HANDLED;
 uint32_t oce_enable_rss = OCE_MODCAP_RSS;
 uint32_t oce_rq_buf_size = 2048;
 
 TUNABLE_INT("hw.oce.max_rsp_handled", &oce_max_rsp_handled);
 TUNABLE_INT("hw.oce.enable_rss", &oce_enable_rss);
 
 
 /* Supported devices table */
 static uint32_t supportedDevices[] =  {
 	(PCI_VENDOR_SERVERENGINES << 16) | PCI_PRODUCT_BE2,
 	(PCI_VENDOR_SERVERENGINES << 16) | PCI_PRODUCT_BE3,
 	(PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_BE3,
 	(PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_XE201,
 	(PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_XE201_VF,
 	(PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_SH
 };
 
 POCE_SOFTC softc_head = NULL;
 POCE_SOFTC softc_tail = NULL;
 
 struct oce_rdma_if *oce_rdma_if = NULL;
 
 /*****************************************************************************
  *			Driver entry points functions                        *
  *****************************************************************************/
 
 static int
 oce_probe(device_t dev)
 {
 	uint16_t vendor = 0;
 	uint16_t device = 0;
 	int i = 0;
 	char str[256] = {0};
 	POCE_SOFTC sc;
 
 	sc = device_get_softc(dev);
 	bzero(sc, sizeof(OCE_SOFTC));
 	sc->dev = dev;
 
 	vendor = pci_get_vendor(dev);
 	device = pci_get_device(dev);
 
 	for (i = 0; i < (sizeof(supportedDevices) / sizeof(uint32_t)); i++) {
 		if (vendor == ((supportedDevices[i] >> 16) & 0xffff)) {
 			if (device == (supportedDevices[i] & 0xffff)) {
 				sprintf(str, "%s:%s", "Emulex CNA NIC function",
 					component_revision);
 				device_set_desc_copy(dev, str);
 
 				switch (device) {
 				case PCI_PRODUCT_BE2:
 					sc->flags |= OCE_FLAGS_BE2;
 					break;
 				case PCI_PRODUCT_BE3:
 					sc->flags |= OCE_FLAGS_BE3;
 					break;
 				case PCI_PRODUCT_XE201:
 				case PCI_PRODUCT_XE201_VF:
 					sc->flags |= OCE_FLAGS_XE201;
 					break;
 				case PCI_PRODUCT_SH:
 					sc->flags |= OCE_FLAGS_SH;
 					break;
 				default:
 					return ENXIO;
 				}
 				return BUS_PROBE_DEFAULT;
 			}
 		}
 	}
 
 	return ENXIO;
 }
 
 
 static int
 oce_attach(device_t dev)
 {
 	POCE_SOFTC sc;
 	int rc = 0;
 
 	sc = device_get_softc(dev);
 
 	rc = oce_hw_pci_alloc(sc);
 	if (rc)
 		return rc;
 
 	sc->tx_ring_size = OCE_TX_RING_SIZE;
 	sc->rx_ring_size = OCE_RX_RING_SIZE;
 	/* receive fragment size should be multiple of 2K */
 	sc->rq_frag_size = ((oce_rq_buf_size / 2048) * 2048);
 	sc->flow_control = OCE_DEFAULT_FLOW_CONTROL;
 	sc->promisc	 = OCE_DEFAULT_PROMISCUOUS;
 
 	LOCK_CREATE(&sc->bmbx_lock, "Mailbox_lock");
 	LOCK_CREATE(&sc->dev_lock,  "Device_lock");
 
 	/* initialise the hardware */
 	rc = oce_hw_init(sc);
 	if (rc)
 		goto pci_res_free;
 
 	oce_read_env_variables(sc);
 
 	oce_get_config(sc);
 
 	setup_max_queues_want(sc);	
 
 	rc = oce_setup_intr(sc);
 	if (rc)
 		goto mbox_free;
 
 	rc = oce_queue_init_all(sc);
 	if (rc)
 		goto intr_free;
 
 	rc = oce_attach_ifp(sc);
 	if (rc)
 		goto queues_free;
 
 #if defined(INET6) || defined(INET)
 	rc = oce_init_lro(sc);
 	if (rc)
 		goto ifp_free;
 #endif
 
 	rc = oce_hw_start(sc);
 	if (rc)
 		goto lro_free;
 
 	sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 				oce_add_vlan, sc, EVENTHANDLER_PRI_FIRST);
 	sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 				oce_del_vlan, sc, EVENTHANDLER_PRI_FIRST);
 
 	rc = oce_stats_init(sc);
 	if (rc)
 		goto vlan_free;
 
 	oce_add_sysctls(sc);
 
 	callout_init(&sc->timer, CALLOUT_MPSAFE);
-	rc = callout_reset(&sc->timer, 2 * hz, oce_local_timer, sc);
-	if (rc & CALLOUT_RET_CANCELLED)
+	rc = callout_reset(&sc->timer, 2 * hz, oce_local_timer, sc).bit.cancelled;
+	if (rc)
 		goto stats_free;
 
 	sc->next =NULL;
 	if (softc_tail != NULL) {
 	  softc_tail->next = sc;
 	} else {
 	  softc_head = sc;
 	}
 	softc_tail = sc;
 
 	return 0;
 
 stats_free:
 	callout_drain(&sc->timer);
 	oce_stats_free(sc);
 vlan_free:
 	if (sc->vlan_attach)
 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
 	if (sc->vlan_detach)
 		EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
 	oce_hw_intr_disable(sc);
 lro_free:
 #if defined(INET6) || defined(INET)
 	oce_free_lro(sc);
 ifp_free:
 #endif
 	ether_ifdetach(sc->ifp);
 	if_free(sc->ifp);
 queues_free:
 	oce_queue_release_all(sc);
 intr_free:
 	oce_intr_free(sc);
 mbox_free:
 	oce_dma_free(sc, &sc->bsmbx);
 pci_res_free:
 	oce_hw_pci_free(sc);
 	LOCK_DESTROY(&sc->dev_lock);
 	LOCK_DESTROY(&sc->bmbx_lock);
 	return rc;
 
 }
 
 
 static int
 oce_detach(device_t dev)
 {
 	POCE_SOFTC sc = device_get_softc(dev);
 	POCE_SOFTC poce_sc_tmp, *ppoce_sc_tmp1, poce_sc_tmp2 = NULL;
 
         poce_sc_tmp = softc_head;
         ppoce_sc_tmp1 = &softc_head;
         while (poce_sc_tmp != NULL) {
           if (poce_sc_tmp == sc) {
             *ppoce_sc_tmp1 = sc->next;
             if (sc->next == NULL) {
               softc_tail = poce_sc_tmp2;
             }
             break;
           }
           poce_sc_tmp2 = poce_sc_tmp;
           ppoce_sc_tmp1 = &poce_sc_tmp->next;
           poce_sc_tmp = poce_sc_tmp->next;
         }
 
 	LOCK(&sc->dev_lock);
 	oce_if_deactivate(sc);
 	UNLOCK(&sc->dev_lock);
 
 	callout_drain(&sc->timer);
 	
 	if (sc->vlan_attach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
 	if (sc->vlan_detach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
 
 	ether_ifdetach(sc->ifp);
 
 	if_free(sc->ifp);
 
 	oce_hw_shutdown(sc);
 
 	bus_generic_detach(dev);
 
 	return 0;
 }
 
 
 static int
 oce_shutdown(device_t dev)
 {
 	int rc;
 	
 	rc = oce_detach(dev);
 
 	return rc;	
 }
 
 
 static int
 oce_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ifreq *ifr = (struct ifreq *)data;
 	POCE_SOFTC sc = ifp->if_softc;
 	int rc = 0;
 	uint32_t u;
 
 	switch (command) {
 
 	case SIOCGIFMEDIA:
 		rc = ifmedia_ioctl(ifp, ifr, &sc->media, command);
 		break;
 
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu > OCE_MAX_MTU)
 			rc = EINVAL;
 		else
 			ifp->if_mtu = ifr->ifr_mtu;
 		break;
 
 	case SIOCSIFFLAGS:
 		if (ifp->if_flags & IFF_UP) {
 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 				sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;	
 				oce_init(sc);
 			}
 			device_printf(sc->dev, "Interface Up\n");	
 		} else {
 			LOCK(&sc->dev_lock);
 
 			sc->ifp->if_drv_flags &=
 			    ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 			oce_if_deactivate(sc);
 
 			UNLOCK(&sc->dev_lock);
 
 			device_printf(sc->dev, "Interface Down\n");
 		}
 
 		if ((ifp->if_flags & IFF_PROMISC) && !sc->promisc) {
 			if (!oce_rxf_set_promiscuous(sc, (1 | (1 << 1))))
 				sc->promisc = TRUE;
 		} else if (!(ifp->if_flags & IFF_PROMISC) && sc->promisc) {
 			if (!oce_rxf_set_promiscuous(sc, 0))
 				sc->promisc = FALSE;
 		}
 
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		rc = oce_hw_update_multicast(sc);
 		if (rc)
 			device_printf(sc->dev,
 				"Update multicast address failed\n");
 		break;
 
 	case SIOCSIFCAP:
 		u = ifr->ifr_reqcap ^ ifp->if_capenable;
 
 		if (u & IFCAP_TXCSUM) {
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
 			
 			if (IFCAP_TSO & ifp->if_capenable &&
 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
 				ifp->if_capenable &= ~IFCAP_TSO;
 				ifp->if_hwassist &= ~CSUM_TSO;
 				if_printf(ifp,
 					 "TSO disabled due to -txcsum.\n");
 			}
 		}
 
 		if (u & IFCAP_RXCSUM)
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 
 		if (u & IFCAP_TSO4) {
 			ifp->if_capenable ^= IFCAP_TSO4;
 
 			if (IFCAP_TSO & ifp->if_capenable) {
 				if (IFCAP_TXCSUM & ifp->if_capenable)
 					ifp->if_hwassist |= CSUM_TSO;
 				else {
 					ifp->if_capenable &= ~IFCAP_TSO;
 					ifp->if_hwassist &= ~CSUM_TSO;
 					if_printf(ifp,
 					    "Enable txcsum first.\n");
 					rc = EAGAIN;
 				}
 			} else
 				ifp->if_hwassist &= ~CSUM_TSO;
 		}
 
 		if (u & IFCAP_VLAN_HWTAGGING)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 
 		if (u & IFCAP_VLAN_HWFILTER) {
 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
 			oce_vid_config(sc);
 		}
 #if defined(INET6) || defined(INET)
 		if (u & IFCAP_LRO) {
 			ifp->if_capenable ^= IFCAP_LRO;
 			if(sc->enable_hwlro) {
 				if(ifp->if_capenable & IFCAP_LRO) {
 					rc = oce_mbox_nic_set_iface_lro_config(sc, 1);
 				}else {
 					rc = oce_mbox_nic_set_iface_lro_config(sc, 0);
 				}
 			}
 		}
 #endif
 
 		break;
 
 	case SIOCGPRIVATE_0:
 		rc = oce_handle_passthrough(ifp, data);
 		break;
 	default:
 		rc = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return rc;
 }
 
 
 static void
 oce_init(void *arg)
 {
 	POCE_SOFTC sc = arg;
 	
 	LOCK(&sc->dev_lock);
 
 	if (sc->ifp->if_flags & IFF_UP) {
 		oce_if_deactivate(sc);
 		oce_if_activate(sc);
 	}
 	
 	UNLOCK(&sc->dev_lock);
 
 }
 
 
 static int
 oce_multiq_start(struct ifnet *ifp, struct mbuf *m)
 {
 	POCE_SOFTC sc = ifp->if_softc;
 	struct oce_wq *wq = NULL;
 	int queue_index = 0;
 	int status = 0;
 
 	if (!sc->link_status)
 		return ENXIO;
 
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		queue_index = m->m_pkthdr.flowid % sc->nwqs;
 
 	wq = sc->wq[queue_index];
 
 	LOCK(&wq->tx_lock);
 	status = oce_multiq_transmit(ifp, m, wq);
 	UNLOCK(&wq->tx_lock);
 
 	return status;
 
 }
 
 
 static void
 oce_multiq_flush(struct ifnet *ifp)
 {
 	POCE_SOFTC sc = ifp->if_softc;
 	struct mbuf     *m;
 	int i = 0;
 
 	for (i = 0; i < sc->nwqs; i++) {
 		while ((m = buf_ring_dequeue_sc(sc->wq[i]->br)) != NULL)
 			m_freem(m);
 	}
 	if_qflush(ifp);
 }
 
 
 
 /*****************************************************************************
  *                   Driver interrupt routines functions                     *
  *****************************************************************************/
 
 static void
 oce_intr(void *arg, int pending)
 {
 
 	POCE_INTR_INFO ii = (POCE_INTR_INFO) arg;
 	POCE_SOFTC sc = ii->sc;
 	struct oce_eq *eq = ii->eq;
 	struct oce_eqe *eqe;
 	struct oce_cq *cq = NULL;
 	int i, num_eqes = 0;
 
 
 	bus_dmamap_sync(eq->ring->dma.tag, eq->ring->dma.map,
 				 BUS_DMASYNC_POSTWRITE);
 	do {
 		eqe = RING_GET_CONSUMER_ITEM_VA(eq->ring, struct oce_eqe);
 		if (eqe->evnt == 0)
 			break;
 		eqe->evnt = 0;
 		bus_dmamap_sync(eq->ring->dma.tag, eq->ring->dma.map,
 					BUS_DMASYNC_POSTWRITE);
 		RING_GET(eq->ring, 1);
 		num_eqes++;
 
 	} while (TRUE);
 	
 	if (!num_eqes)
 		goto eq_arm; /* Spurious */
 
  	/* Clear EQ entries, but dont arm */
 	oce_arm_eq(sc, eq->eq_id, num_eqes, FALSE, FALSE);
 
 	/* Process TX, RX and MCC. But dont arm CQ*/
 	for (i = 0; i < eq->cq_valid; i++) {
 		cq = eq->cq[i];
 		(*cq->cq_handler)(cq->cb_arg);
 	}
 
 	/* Arm all cqs connected to this EQ */
 	for (i = 0; i < eq->cq_valid; i++) {
 		cq = eq->cq[i];
 		oce_arm_cq(sc, cq->cq_id, 0, TRUE);
 	}
 
 eq_arm:
 	oce_arm_eq(sc, eq->eq_id, 0, TRUE, FALSE);
 
 	return;
 }
 
 
 static int
 oce_setup_intr(POCE_SOFTC sc)
 {
 	int rc = 0, use_intx = 0;
 	int vector = 0, req_vectors = 0;
 	int tot_req_vectors, tot_vectors;
 
 	if (is_rss_enabled(sc))
 		req_vectors = MAX((sc->nrqs - 1), sc->nwqs);
 	else
 		req_vectors = 1;
 
 	tot_req_vectors = req_vectors;
 	if (sc->rdma_flags & OCE_RDMA_FLAG_SUPPORTED) {
 	  if (req_vectors > 1) {
 	    tot_req_vectors += OCE_RDMA_VECTORS;
 	    sc->roce_intr_count = OCE_RDMA_VECTORS;
 	  }
 	}
 
         if (sc->flags & OCE_FLAGS_MSIX_CAPABLE) {
 		sc->intr_count = req_vectors;
                 tot_vectors = tot_req_vectors;
 		rc = pci_alloc_msix(sc->dev, &tot_vectors);
 		if (rc != 0) {
 			use_intx = 1;
 			pci_release_msi(sc->dev);
 		} else {
 		  if (sc->rdma_flags & OCE_RDMA_FLAG_SUPPORTED) {
 		    if (tot_vectors < tot_req_vectors) {
 		      if (sc->intr_count < (2 * OCE_RDMA_VECTORS)) {
 			sc->roce_intr_count = (tot_vectors / 2);
 		      }
 		      sc->intr_count = tot_vectors - sc->roce_intr_count;
 		    }
 		  } else {
 		    sc->intr_count = tot_vectors;
 		  }
     		  sc->flags |= OCE_FLAGS_USING_MSIX;
 		}
 	} else
 		use_intx = 1;
 
 	if (use_intx)
 		sc->intr_count = 1;
 
 	/* Scale number of queues based on intr we got */
 	update_queues_got(sc);
 
 	if (use_intx) {
 		device_printf(sc->dev, "Using legacy interrupt\n");
 		rc = oce_alloc_intr(sc, vector, oce_intr);
 		if (rc)
 			goto error;		
 	} else {
 		for (; vector < sc->intr_count; vector++) {
 			rc = oce_alloc_intr(sc, vector, oce_intr);
 			if (rc)
 				goto error;
 		}
 	}
 
 	return 0;
 error:
 	oce_intr_free(sc);
 	return rc;
 }
 
 
 static int
 oce_fast_isr(void *arg)
 {
 	POCE_INTR_INFO ii = (POCE_INTR_INFO) arg;
 	POCE_SOFTC sc = ii->sc;
 
 	if (ii->eq == NULL)
 		return FILTER_STRAY;
 
 	oce_arm_eq(sc, ii->eq->eq_id, 0, FALSE, TRUE);
 
 	taskqueue_enqueue(ii->tq, &ii->task);
 
  	ii->eq->intr++;	
 
 	return FILTER_HANDLED;
 }
 
 
 static int
 oce_alloc_intr(POCE_SOFTC sc, int vector, void (*isr) (void *arg, int pending))
 {
 	POCE_INTR_INFO ii = &sc->intrs[vector];
 	int rc = 0, rr;
 
 	if (vector >= OCE_MAX_EQ)
 		return (EINVAL);
 
 	/* Set the resource id for the interrupt.
 	 * MSIx is vector + 1 for the resource id,
 	 * INTx is 0 for the resource id.
 	 */
 	if (sc->flags & OCE_FLAGS_USING_MSIX)
 		rr = vector + 1;
 	else
 		rr = 0;
 	ii->intr_res = bus_alloc_resource_any(sc->dev,
 					      SYS_RES_IRQ,
 					      &rr, RF_ACTIVE|RF_SHAREABLE);
 	ii->irq_rr = rr;
 	if (ii->intr_res == NULL) {
 		device_printf(sc->dev,
 			  "Could not allocate interrupt\n");
 		rc = ENXIO;
 		return rc;
 	}
 
 	TASK_INIT(&ii->task, 0, isr, ii);
 	ii->vector = vector;
 	sprintf(ii->task_name, "oce_task[%d]", ii->vector);
 	ii->tq = taskqueue_create_fast(ii->task_name,
 			M_NOWAIT,
 			taskqueue_thread_enqueue,
 			&ii->tq);
 	taskqueue_start_threads(&ii->tq, 1, PI_NET, "%s taskq",
 			device_get_nameunit(sc->dev));
 
 	ii->sc = sc;
 	rc = bus_setup_intr(sc->dev,
 			ii->intr_res,
 			INTR_TYPE_NET,
 			oce_fast_isr, NULL, ii, &ii->tag);
 	return rc;
 
 }
 
 
 void
 oce_intr_free(POCE_SOFTC sc)
 {
 	int i = 0;
 	
 	for (i = 0; i < sc->intr_count; i++) {
 		
 		if (sc->intrs[i].tag != NULL)
 			bus_teardown_intr(sc->dev, sc->intrs[i].intr_res,
 						sc->intrs[i].tag);
 		if (sc->intrs[i].tq != NULL)
 			taskqueue_free(sc->intrs[i].tq);
 		
 		if (sc->intrs[i].intr_res != NULL)
 			bus_release_resource(sc->dev, SYS_RES_IRQ,
 						sc->intrs[i].irq_rr,
 						sc->intrs[i].intr_res);
 		sc->intrs[i].tag = NULL;
 		sc->intrs[i].intr_res = NULL;
 	}
 
 	if (sc->flags & OCE_FLAGS_USING_MSIX)
 		pci_release_msi(sc->dev);
 
 }
 
 
 
 /******************************************************************************
 *			  Media callbacks functions 			      *
 ******************************************************************************/
 
 static void
 oce_media_status(struct ifnet *ifp, struct ifmediareq *req)
 {
 	POCE_SOFTC sc = (POCE_SOFTC) ifp->if_softc;
 
 
 	req->ifm_status = IFM_AVALID;
 	req->ifm_active = IFM_ETHER;
 	
 	if (sc->link_status == 1)
 		req->ifm_status |= IFM_ACTIVE;
 	else 
 		return;
 	
 	switch (sc->link_speed) {
 	case 1: /* 10 Mbps */
 		req->ifm_active |= IFM_10_T | IFM_FDX;
 		sc->speed = 10;
 		break;
 	case 2: /* 100 Mbps */
 		req->ifm_active |= IFM_100_TX | IFM_FDX;
 		sc->speed = 100;
 		break;
 	case 3: /* 1 Gbps */
 		req->ifm_active |= IFM_1000_T | IFM_FDX;
 		sc->speed = 1000;
 		break;
 	case 4: /* 10 Gbps */
 		req->ifm_active |= IFM_10G_SR | IFM_FDX;
 		sc->speed = 10000;
 		break;
 	case 5: /* 20 Gbps */
 		req->ifm_active |= IFM_10G_SR | IFM_FDX;
 		sc->speed = 20000;
 		break;
 	case 6: /* 25 Gbps */
 		req->ifm_active |= IFM_10G_SR | IFM_FDX;
 		sc->speed = 25000;
 		break;
 	case 7: /* 40 Gbps */
 		req->ifm_active |= IFM_40G_SR4 | IFM_FDX;
 		sc->speed = 40000;
 		break;
 	default:
 		sc->speed = 0;
 		break;
 	}
 	
 	return;
 }
 
 
 int
 oce_media_change(struct ifnet *ifp)
 {
 	return 0;
 }
 
 
 static void oce_is_pkt_dest_bmc(POCE_SOFTC sc,
 				struct mbuf *m, boolean_t *os2bmc,
 				struct mbuf **m_new)
 {
 	struct ether_header *eh = NULL;
 
 	eh = mtod(m, struct ether_header *);
 
 	if (!is_os2bmc_enabled(sc) || *os2bmc) {
 		*os2bmc = FALSE;
 		goto done;
 	}
 	if (!ETHER_IS_MULTICAST(eh->ether_dhost))
 		goto done;
 
 	if (is_mc_allowed_on_bmc(sc, eh) ||
 	    is_bc_allowed_on_bmc(sc, eh) ||
 	    is_arp_allowed_on_bmc(sc, ntohs(eh->ether_type))) {
 		*os2bmc = TRUE;
 		goto done;
 	}
 
 	if (mtod(m, struct ip *)->ip_p == IPPROTO_IPV6) {
 		struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 		uint8_t nexthdr = ip6->ip6_nxt;
 		if (nexthdr == IPPROTO_ICMPV6) {
 			struct icmp6_hdr *icmp6 = (struct icmp6_hdr *)(ip6 + 1);
 			switch (icmp6->icmp6_type) {
 			case ND_ROUTER_ADVERT:
 				*os2bmc = is_ipv6_ra_filt_enabled(sc);
 				goto done;
 			case ND_NEIGHBOR_ADVERT:
 				*os2bmc = is_ipv6_na_filt_enabled(sc);
 				goto done;
 			default:
 				break;
 			}
 		}
 	}
 
 	if (mtod(m, struct ip *)->ip_p == IPPROTO_UDP) {
 		struct ip *ip = mtod(m, struct ip *);
 		int iphlen = ip->ip_hl << 2;
 		struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen);
 		switch (uh->uh_dport) {
 		case DHCP_CLIENT_PORT:
 			*os2bmc = is_dhcp_client_filt_enabled(sc);
 			goto done;
 		case DHCP_SERVER_PORT:
 			*os2bmc = is_dhcp_srvr_filt_enabled(sc);
 			goto done;
 		case NET_BIOS_PORT1:
 		case NET_BIOS_PORT2:
 			*os2bmc = is_nbios_filt_enabled(sc);
 			goto done;
 		case DHCPV6_RAS_PORT:
 			*os2bmc = is_ipv6_ras_filt_enabled(sc);
 			goto done;
 		default:
 			break;
 		}
 	}
 done:
 	if (*os2bmc) {
 		*m_new = m_dup(m, M_NOWAIT);
 		if (!*m_new) {
 			*os2bmc = FALSE;
 			return;
 		}
 		*m_new = oce_insert_vlan_tag(sc, *m_new, NULL);
 	}
 }
 
 
 
 /*****************************************************************************
  *			  Transmit routines functions			     *
  *****************************************************************************/
 
 static int
 oce_tx(POCE_SOFTC sc, struct mbuf **mpp, int wq_index)
 {
 	int rc = 0, i, retry_cnt = 0;
 	bus_dma_segment_t segs[OCE_MAX_TX_ELEMENTS];
 	struct mbuf *m, *m_temp, *m_new = NULL;
 	struct oce_wq *wq = sc->wq[wq_index];
 	struct oce_packet_desc *pd;
 	struct oce_nic_hdr_wqe *nichdr;
 	struct oce_nic_frag_wqe *nicfrag;
 	struct ether_header *eh = NULL;
 	int num_wqes;
 	uint32_t reg_value;
 	boolean_t complete = TRUE;
 	boolean_t os2bmc = FALSE;
 
 	m = *mpp;
 	if (!m)
 		return EINVAL;
 
 	if (!(m->m_flags & M_PKTHDR)) {
 		rc = ENXIO;
 		goto free_ret;
 	}
 
 	/* Don't allow non-TSO packets longer than MTU */
 	if (!is_tso_pkt(m)) {
 		eh = mtod(m, struct ether_header *);
 		if(m->m_pkthdr.len > ETHER_MAX_FRAME(sc->ifp, eh->ether_type, FALSE))
 			 goto free_ret;
 	}
 
 	if(oce_tx_asic_stall_verify(sc, m)) {
 		m = oce_insert_vlan_tag(sc, m, &complete);
 		if(!m) {
 			device_printf(sc->dev, "Insertion unsuccessful\n");
 			return 0;
 		}
 
 	}
 
 	/* Lancer, SH ASIC has a bug wherein Packets that are 32 bytes or less
 	 * may cause a transmit stall on that port. So the work-around is to
 	 * pad short packets (<= 32 bytes) to a 36-byte length.
 	*/
 	if(IS_SH(sc) || IS_XE201(sc) ) {
 		if(m->m_pkthdr.len <= 32) {
 			char buf[36];
 			bzero((void *)buf, 36);
 			m_append(m, (36 - m->m_pkthdr.len), buf);
 		}
 	}
 
 tx_start:
 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
 		/* consolidate packet buffers for TSO/LSO segment offload */
 #if defined(INET6) || defined(INET)
 		m = oce_tso_setup(sc, mpp);
 #else
 		m = NULL;
 #endif
 		if (m == NULL) {
 			rc = ENXIO;
 			goto free_ret;
 		}
 	}
 
 
 	pd = &wq->pckts[wq->pkt_desc_head];
 
 retry:
 	rc = bus_dmamap_load_mbuf_sg(wq->tag,
 				     pd->map,
 				     m, segs, &pd->nsegs, BUS_DMA_NOWAIT);
 	if (rc == 0) {
 		num_wqes = pd->nsegs + 1;
 		if (IS_BE(sc) || IS_SH(sc)) {
 			/*Dummy required only for BE3.*/
 			if (num_wqes & 1)
 				num_wqes++;
 		}
 		if (num_wqes >= RING_NUM_FREE(wq->ring)) {
 			bus_dmamap_unload(wq->tag, pd->map);
 			return EBUSY;
 		}
 		atomic_store_rel_int(&wq->pkt_desc_head,
 				     (wq->pkt_desc_head + 1) % \
 				      OCE_WQ_PACKET_ARRAY_SIZE);
 		bus_dmamap_sync(wq->tag, pd->map, BUS_DMASYNC_PREWRITE);
 		pd->mbuf = m;
 
 		nichdr =
 		    RING_GET_PRODUCER_ITEM_VA(wq->ring, struct oce_nic_hdr_wqe);
 		nichdr->u0.dw[0] = 0;
 		nichdr->u0.dw[1] = 0;
 		nichdr->u0.dw[2] = 0;
 		nichdr->u0.dw[3] = 0;
 
 		nichdr->u0.s.complete = complete;
 		nichdr->u0.s.mgmt = os2bmc;
 		nichdr->u0.s.event = 1;
 		nichdr->u0.s.crc = 1;
 		nichdr->u0.s.forward = 0;
 		nichdr->u0.s.ipcs = (m->m_pkthdr.csum_flags & CSUM_IP) ? 1 : 0;
 		nichdr->u0.s.udpcs =
 			(m->m_pkthdr.csum_flags & CSUM_UDP) ? 1 : 0;
 		nichdr->u0.s.tcpcs =
 			(m->m_pkthdr.csum_flags & CSUM_TCP) ? 1 : 0;
 		nichdr->u0.s.num_wqe = num_wqes;
 		nichdr->u0.s.total_length = m->m_pkthdr.len;
 
 		if (m->m_flags & M_VLANTAG) {
 			nichdr->u0.s.vlan = 1; /*Vlan present*/
 			nichdr->u0.s.vlan_tag = m->m_pkthdr.ether_vtag;
 		}
 
 		if (m->m_pkthdr.csum_flags & CSUM_TSO) {
 			if (m->m_pkthdr.tso_segsz) {
 				nichdr->u0.s.lso = 1;
 				nichdr->u0.s.lso_mss  = m->m_pkthdr.tso_segsz;
 			}
 			if (!IS_BE(sc) || !IS_SH(sc))
 				nichdr->u0.s.ipcs = 1;
 		}
 
 		RING_PUT(wq->ring, 1);
 		atomic_add_int(&wq->ring->num_used, 1);
 
 		for (i = 0; i < pd->nsegs; i++) {
 			nicfrag =
 			    RING_GET_PRODUCER_ITEM_VA(wq->ring,
 						      struct oce_nic_frag_wqe);
 			nicfrag->u0.s.rsvd0 = 0;
 			nicfrag->u0.s.frag_pa_hi = ADDR_HI(segs[i].ds_addr);
 			nicfrag->u0.s.frag_pa_lo = ADDR_LO(segs[i].ds_addr);
 			nicfrag->u0.s.frag_len = segs[i].ds_len;
 			pd->wqe_idx = wq->ring->pidx;
 			RING_PUT(wq->ring, 1);
 			atomic_add_int(&wq->ring->num_used, 1);
 		}
 		if (num_wqes > (pd->nsegs + 1)) {
 			nicfrag =
 			    RING_GET_PRODUCER_ITEM_VA(wq->ring,
 						      struct oce_nic_frag_wqe);
 			nicfrag->u0.dw[0] = 0;
 			nicfrag->u0.dw[1] = 0;
 			nicfrag->u0.dw[2] = 0;
 			nicfrag->u0.dw[3] = 0;
 			pd->wqe_idx = wq->ring->pidx;
 			RING_PUT(wq->ring, 1);
 			atomic_add_int(&wq->ring->num_used, 1);
 			pd->nsegs++;
 		}
 
 		if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, 1);
 		wq->tx_stats.tx_reqs++;
 		wq->tx_stats.tx_wrbs += num_wqes;
 		wq->tx_stats.tx_bytes += m->m_pkthdr.len;
 		wq->tx_stats.tx_pkts++;
 
 		bus_dmamap_sync(wq->ring->dma.tag, wq->ring->dma.map,
 				BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 		reg_value = (num_wqes << 16) | wq->wq_id;
 
 		/* if os2bmc is not enabled or if the pkt is already tagged as
 		   bmc, do nothing
 		 */
 		oce_is_pkt_dest_bmc(sc, m, &os2bmc, &m_new);
 
 		OCE_WRITE_REG32(sc, db, wq->db_offset, reg_value);
 
 	} else if (rc == EFBIG)	{
 		if (retry_cnt == 0) {
 			m_temp = m_defrag(m, M_NOWAIT);
 			if (m_temp == NULL)
 				goto free_ret;
 			m = m_temp;
 			*mpp = m_temp;
 			retry_cnt = retry_cnt + 1;
 			goto retry;
 		} else
 			goto free_ret;
 	} else if (rc == ENOMEM)
 		return rc;
 	else
 		goto free_ret;
 
 	if (os2bmc) {
 		m = m_new;
 		goto tx_start;
 	}
 	
 	return 0;
 
 free_ret:
 	m_freem(*mpp);
 	*mpp = NULL;
 	return rc;
 }
 
 
 static void
 oce_process_tx_completion(struct oce_wq *wq)
 {
 	struct oce_packet_desc *pd;
 	POCE_SOFTC sc = (POCE_SOFTC) wq->parent;
 	struct mbuf *m;
 
 	pd = &wq->pckts[wq->pkt_desc_tail];
 	atomic_store_rel_int(&wq->pkt_desc_tail,
 			     (wq->pkt_desc_tail + 1) % OCE_WQ_PACKET_ARRAY_SIZE); 
 	atomic_subtract_int(&wq->ring->num_used, pd->nsegs + 1);
 	bus_dmamap_sync(wq->tag, pd->map, BUS_DMASYNC_POSTWRITE);
 	bus_dmamap_unload(wq->tag, pd->map);
 
 	m = pd->mbuf;
 	m_freem(m);
 	pd->mbuf = NULL;
 
 
 	if (sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) {
 		if (wq->ring->num_used < (wq->ring->num_items / 2)) {
 			sc->ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE);
 			oce_tx_restart(sc, wq);	
 		}
 	}
 }
 
 
 static void
 oce_tx_restart(POCE_SOFTC sc, struct oce_wq *wq)
 {
 
 	if ((sc->ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING)
 		return;
 
 #if __FreeBSD_version >= 800000
 	if (!drbr_empty(sc->ifp, wq->br))
 #else
 	if (!IFQ_DRV_IS_EMPTY(&sc->ifp->if_snd))
 #endif
 		taskqueue_enqueue(taskqueue_swi, &wq->txtask);
 
 }
 
 
 #if defined(INET6) || defined(INET)
 static struct mbuf *
 oce_tso_setup(POCE_SOFTC sc, struct mbuf **mpp)
 {
 	struct mbuf *m;
 #ifdef INET
 	struct ip *ip;
 #endif
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 	struct ether_vlan_header *eh;
 	struct tcphdr *th;
 	uint16_t etype;
 	int total_len = 0, ehdrlen = 0;
 	
 	m = *mpp;
 
 	if (M_WRITABLE(m) == 0) {
 		m = m_dup(*mpp, M_NOWAIT);
 		if (!m)
 			return NULL;
 		m_freem(*mpp);
 		*mpp = m;
 	}
 
 	eh = mtod(m, struct ether_vlan_header *);
 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 		etype = ntohs(eh->evl_proto);
 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 	} else {
 		etype = ntohs(eh->evl_encap_proto);
 		ehdrlen = ETHER_HDR_LEN;
 	}
 
 	switch (etype) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		ip = (struct ip *)(m->m_data + ehdrlen);
 		if (ip->ip_p != IPPROTO_TCP)
 			return NULL;
 		th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 
 		total_len = ehdrlen + (ip->ip_hl << 2) + (th->th_off << 2);
 		break;
 #endif
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		ip6 = (struct ip6_hdr *)(m->m_data + ehdrlen);
 		if (ip6->ip6_nxt != IPPROTO_TCP)
 			return NULL;
 		th = (struct tcphdr *)((caddr_t)ip6 + sizeof(struct ip6_hdr));
 
 		total_len = ehdrlen + sizeof(struct ip6_hdr) + (th->th_off << 2);
 		break;
 #endif
 	default:
 		return NULL;
 	}
 	
 	m = m_pullup(m, total_len);
 	if (!m)
 		return NULL;
 	*mpp = m;
 	return m;
 	
 }
 #endif /* INET6 || INET */
 
 void
 oce_tx_task(void *arg, int npending)
 {
 	struct oce_wq *wq = arg;
 	POCE_SOFTC sc = wq->parent;
 	struct ifnet *ifp = sc->ifp;
 	int rc = 0;
 
 #if __FreeBSD_version >= 800000
 	LOCK(&wq->tx_lock);
 	rc = oce_multiq_transmit(ifp, NULL, wq);
 	if (rc) {
 		device_printf(sc->dev,
 				"TX[%d] restart failed\n", wq->queue_index);
 	}
 	UNLOCK(&wq->tx_lock);
 #else
 	oce_start(ifp);
 #endif
 
 }
 
 
 void
 oce_start(struct ifnet *ifp)
 {
 	POCE_SOFTC sc = ifp->if_softc;
 	struct mbuf *m;
 	int rc = 0;
 	int def_q = 0; /* Defualt tx queue is 0*/
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 			IFF_DRV_RUNNING)
 		return;
 
 	if (!sc->link_status)
 		return;
 	
 	do {
 		IF_DEQUEUE(&sc->ifp->if_snd, m);
 		if (m == NULL)
 			break;
 
 		LOCK(&sc->wq[def_q]->tx_lock);
 		rc = oce_tx(sc, &m, def_q);
 		UNLOCK(&sc->wq[def_q]->tx_lock);
 		if (rc) {
 			if (m != NULL) {
 				sc->wq[def_q]->tx_stats.tx_stops ++;
 				ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 				IFQ_DRV_PREPEND(&ifp->if_snd, m);
 				m = NULL;
 			}
 			break;
 		}
 		if (m != NULL)
 			ETHER_BPF_MTAP(ifp, m);
 
 	} while (TRUE);
 
 	return;
 }
 
 
 /* Handle the Completion Queue for transmit */
 uint16_t
 oce_wq_handler(void *arg)
 {
 	struct oce_wq *wq = (struct oce_wq *)arg;
 	POCE_SOFTC sc = wq->parent;
 	struct oce_cq *cq = wq->cq;
 	struct oce_nic_tx_cqe *cqe;
 	int num_cqes = 0;
 
 	LOCK(&wq->tx_compl_lock);
 	bus_dmamap_sync(cq->ring->dma.tag,
 			cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
 	cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_tx_cqe);
 	while (cqe->u0.dw[3]) {
 		DW_SWAP((uint32_t *) cqe, sizeof(oce_wq_cqe));
 
 		wq->ring->cidx = cqe->u0.s.wqe_index + 1;
 		if (wq->ring->cidx >= wq->ring->num_items)
 			wq->ring->cidx -= wq->ring->num_items;
 
 		oce_process_tx_completion(wq);
 		wq->tx_stats.tx_compl++;
 		cqe->u0.dw[3] = 0;
 		RING_GET(cq->ring, 1);
 		bus_dmamap_sync(cq->ring->dma.tag,
 				cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
 		cqe =
 		    RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_tx_cqe);
 		num_cqes++;
 	}
 
 	if (num_cqes)
 		oce_arm_cq(sc, cq->cq_id, num_cqes, FALSE);
 	
 	UNLOCK(&wq->tx_compl_lock);
 	return num_cqes;
 }
 
 
 static int 
 oce_multiq_transmit(struct ifnet *ifp, struct mbuf *m, struct oce_wq *wq)
 {
 	POCE_SOFTC sc = ifp->if_softc;
 	int status = 0, queue_index = 0;
 	struct mbuf *next = NULL;
 	struct buf_ring *br = NULL;
 
 	br  = wq->br;
 	queue_index = wq->queue_index;
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 		IFF_DRV_RUNNING) {
 		if (m != NULL)
 			status = drbr_enqueue(ifp, br, m);
 		return status;
 	}
 
 	if (m != NULL) {
 		if ((status = drbr_enqueue(ifp, br, m)) != 0)
 			return status;
 	} 
 	while ((next = drbr_peek(ifp, br)) != NULL) {
 		if (oce_tx(sc, &next, queue_index)) {
 			if (next == NULL) {
 				drbr_advance(ifp, br);
 			} else {
 				drbr_putback(ifp, br, next);
 				wq->tx_stats.tx_stops ++;
 				ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 			}  
 			break;
 		}
 		drbr_advance(ifp, br);
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
 		if (next->m_flags & M_MCAST)
 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 		ETHER_BPF_MTAP(ifp, next);
 	}
 
 	return 0;
 }
 
 
 
 
 /*****************************************************************************
  *			    Receive  routines functions 		     *
  *****************************************************************************/
 
 static void
 oce_correct_header(struct mbuf *m, struct nic_hwlro_cqe_part1 *cqe1, struct nic_hwlro_cqe_part2 *cqe2)
 {
 	uint32_t *p;
         struct ether_header *eh = NULL;
         struct tcphdr *tcp_hdr = NULL;
         struct ip *ip4_hdr = NULL;
         struct ip6_hdr *ip6 = NULL;
         uint32_t payload_len = 0;
 
         eh = mtod(m, struct ether_header *);
         /* correct IP header */
         if(!cqe2->ipv6_frame) {
 		ip4_hdr = (struct ip *)((char*)eh + sizeof(struct ether_header));
                 ip4_hdr->ip_ttl = cqe2->frame_lifespan;
                 ip4_hdr->ip_len = htons(cqe2->coalesced_size - sizeof(struct ether_header));
                 tcp_hdr = (struct tcphdr *)((char*)ip4_hdr + sizeof(struct ip));
         }else {
         	ip6 = (struct ip6_hdr *)((char*)eh + sizeof(struct ether_header));
                 ip6->ip6_ctlun.ip6_un1.ip6_un1_hlim = cqe2->frame_lifespan;
                 payload_len = cqe2->coalesced_size - sizeof(struct ether_header)
                                                 - sizeof(struct ip6_hdr);
                 ip6->ip6_ctlun.ip6_un1.ip6_un1_plen = htons(payload_len);
                 tcp_hdr = (struct tcphdr *)((char*)ip6 + sizeof(struct ip6_hdr));
         }
 
         /* correct tcp header */
         tcp_hdr->th_ack = htonl(cqe2->tcp_ack_num);
         if(cqe2->push) {
         	tcp_hdr->th_flags |= TH_PUSH;
         }
         tcp_hdr->th_win = htons(cqe2->tcp_window);
         tcp_hdr->th_sum = 0xffff;
         if(cqe2->ts_opt) {
                 p = (uint32_t *)((char*)tcp_hdr + sizeof(struct tcphdr) + 2);
                 *p = cqe1->tcp_timestamp_val;
                 *(p+1) = cqe1->tcp_timestamp_ecr;
         }
 
 	return;
 }
 
 static void
 oce_rx_mbuf_chain(struct oce_rq *rq, struct oce_common_cqe_info *cqe_info, struct mbuf **m)
 {
 	POCE_SOFTC sc = (POCE_SOFTC) rq->parent;
         uint32_t i = 0, frag_len = 0;
 	uint32_t len = cqe_info->pkt_size;
         struct oce_packet_desc *pd;
         struct mbuf *tail = NULL;
 
         for (i = 0; i < cqe_info->num_frags; i++) {
                 if (rq->ring->cidx == rq->ring->pidx) {
                         device_printf(sc->dev,
                                   "oce_rx_mbuf_chain: Invalid RX completion - Queue is empty\n");
                         return;
                 }
                 pd = &rq->pckts[rq->ring->cidx];
 
                 bus_dmamap_sync(rq->tag, pd->map, BUS_DMASYNC_POSTWRITE);
                 bus_dmamap_unload(rq->tag, pd->map);
 		RING_GET(rq->ring, 1);
                 rq->pending--;
 
                 frag_len = (len > rq->cfg.frag_size) ? rq->cfg.frag_size : len;
                 pd->mbuf->m_len = frag_len;
 
                 if (tail != NULL) {
                         /* additional fragments */
                         pd->mbuf->m_flags &= ~M_PKTHDR;
                         tail->m_next = pd->mbuf;
 			if(rq->islro)
                         	tail->m_nextpkt = NULL;
                         tail = pd->mbuf;
                 } else {
                         /* first fragment, fill out much of the packet header */
                         pd->mbuf->m_pkthdr.len = len;
 			if(rq->islro)
                         	pd->mbuf->m_nextpkt = NULL;
                         pd->mbuf->m_pkthdr.csum_flags = 0;
                         if (IF_CSUM_ENABLED(sc)) {
                                 if (cqe_info->l4_cksum_pass) {
                                         if(!cqe_info->ipv6_frame) { /* IPV4 */
                                                 pd->mbuf->m_pkthdr.csum_flags |=
                                                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
                                         }else { /* IPV6 frame */
 						if(rq->islro) {
                                                 	pd->mbuf->m_pkthdr.csum_flags |=
                                                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 						}
                                         }
                                         pd->mbuf->m_pkthdr.csum_data = 0xffff;
                                 }
                                 if (cqe_info->ip_cksum_pass) {
                                         pd->mbuf->m_pkthdr.csum_flags |=
                                                (CSUM_IP_CHECKED|CSUM_IP_VALID);
                                 }
                         }
                         *m = tail = pd->mbuf;
                }
                 pd->mbuf = NULL;
                 len -= frag_len;
         }
 
         return;
 }
 
 static void
 oce_rx_lro(struct oce_rq *rq, struct nic_hwlro_singleton_cqe *cqe, struct nic_hwlro_cqe_part2 *cqe2)
 {
         POCE_SOFTC sc = (POCE_SOFTC) rq->parent;
         struct nic_hwlro_cqe_part1 *cqe1 = NULL;
         struct mbuf *m = NULL;
 	struct oce_common_cqe_info cq_info;
 
 	/* parse cqe */
         if(cqe2 == NULL) {
                 cq_info.pkt_size =  cqe->pkt_size;
                 cq_info.vtag = cqe->vlan_tag;
                 cq_info.l4_cksum_pass = cqe->l4_cksum_pass;
                 cq_info.ip_cksum_pass = cqe->ip_cksum_pass;
                 cq_info.ipv6_frame = cqe->ipv6_frame;
                 cq_info.vtp = cqe->vtp;
                 cq_info.qnq = cqe->qnq;
         }else {
                 cqe1 = (struct nic_hwlro_cqe_part1 *)cqe;
                 cq_info.pkt_size =  cqe2->coalesced_size;
                 cq_info.vtag = cqe2->vlan_tag;
                 cq_info.l4_cksum_pass = cqe2->l4_cksum_pass;
                 cq_info.ip_cksum_pass = cqe2->ip_cksum_pass;
                 cq_info.ipv6_frame = cqe2->ipv6_frame;
                 cq_info.vtp = cqe2->vtp;
                 cq_info.qnq = cqe1->qnq;
         }
         
 	cq_info.vtag = BSWAP_16(cq_info.vtag);
 
         cq_info.num_frags = cq_info.pkt_size / rq->cfg.frag_size;
         if(cq_info.pkt_size % rq->cfg.frag_size)
                 cq_info.num_frags++;
 
 	oce_rx_mbuf_chain(rq, &cq_info, &m);
 
 	if (m) {
 		if(cqe2) {
 			//assert(cqe2->valid != 0);
 			
 			//assert(cqe2->cqe_type != 2);
 			oce_correct_header(m, cqe1, cqe2);
 		}
 
 		m->m_pkthdr.rcvif = sc->ifp;
 #if __FreeBSD_version >= 800000
 		if (rq->queue_index)
 			m->m_pkthdr.flowid = (rq->queue_index - 1);
 		else
 			m->m_pkthdr.flowid = rq->queue_index;
 		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
 #endif
 		/* This deternies if vlan tag is Valid */
 		if (cq_info.vtp) {
 			if (sc->function_mode & FNM_FLEX10_MODE) {
 				/* FLEX10. If QnQ is not set, neglect VLAN */
 				if (cq_info.qnq) {
 					m->m_pkthdr.ether_vtag = cq_info.vtag;
 					m->m_flags |= M_VLANTAG;
 				}
 			} else if (sc->pvid != (cq_info.vtag & VLAN_VID_MASK))  {
 				/* In UMC mode generally pvid will be striped by
 				   hw. But in some cases we have seen it comes
 				   with pvid. So if pvid == vlan, neglect vlan.
 				 */
 				m->m_pkthdr.ether_vtag = cq_info.vtag;
 				m->m_flags |= M_VLANTAG;
 			}
 		}
 		if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1);
 		
 		(*sc->ifp->if_input) (sc->ifp, m);
 
 		/* Update rx stats per queue */
 		rq->rx_stats.rx_pkts++;
 		rq->rx_stats.rx_bytes += cq_info.pkt_size;
 		rq->rx_stats.rx_frags += cq_info.num_frags;
 		rq->rx_stats.rx_ucast_pkts++;
 	}
         return;
 }
 
 static void
 oce_rx(struct oce_rq *rq, struct oce_nic_rx_cqe *cqe)
 {
 	POCE_SOFTC sc = (POCE_SOFTC) rq->parent;
 	int len;
 	struct mbuf *m = NULL;
 	struct oce_common_cqe_info cq_info;
 	uint16_t vtag = 0;
 
 	/* Is it a flush compl that has no data */
 	if(!cqe->u0.s.num_fragments)
 		goto exit;
 
 	len = cqe->u0.s.pkt_size;
 	if (!len) {
 		/*partial DMA workaround for Lancer*/
 		oce_discard_rx_comp(rq, cqe->u0.s.num_fragments);
 		goto exit;
 	}
 
 	if (!oce_cqe_portid_valid(sc, cqe)) {
 		oce_discard_rx_comp(rq, cqe->u0.s.num_fragments);
 		goto exit;
 	}
 
 	 /* Get vlan_tag value */
 	if(IS_BE(sc) || IS_SH(sc))
 		vtag = BSWAP_16(cqe->u0.s.vlan_tag);
 	else
 		vtag = cqe->u0.s.vlan_tag;
 	
 	cq_info.l4_cksum_pass = cqe->u0.s.l4_cksum_pass;
 	cq_info.ip_cksum_pass = cqe->u0.s.ip_cksum_pass;
 	cq_info.ipv6_frame = cqe->u0.s.ip_ver;
 	cq_info.num_frags = cqe->u0.s.num_fragments;
 	cq_info.pkt_size = cqe->u0.s.pkt_size;
 
 	oce_rx_mbuf_chain(rq, &cq_info, &m);
 
 	if (m) {
 		m->m_pkthdr.rcvif = sc->ifp;
 #if __FreeBSD_version >= 800000
 		if (rq->queue_index)
 			m->m_pkthdr.flowid = (rq->queue_index - 1);
 		else
 			m->m_pkthdr.flowid = rq->queue_index;
 		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
 #endif
 		/* This deternies if vlan tag is Valid */
 		if (oce_cqe_vtp_valid(sc, cqe)) { 
 			if (sc->function_mode & FNM_FLEX10_MODE) {
 				/* FLEX10. If QnQ is not set, neglect VLAN */
 				if (cqe->u0.s.qnq) {
 					m->m_pkthdr.ether_vtag = vtag;
 					m->m_flags |= M_VLANTAG;
 				}
 			} else if (sc->pvid != (vtag & VLAN_VID_MASK))  {
 				/* In UMC mode generally pvid will be striped by
 				   hw. But in some cases we have seen it comes
 				   with pvid. So if pvid == vlan, neglect vlan.
 				*/
 				m->m_pkthdr.ether_vtag = vtag;
 				m->m_flags |= M_VLANTAG;
 			}
 		}
 
 		if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1);
 #if defined(INET6) || defined(INET)
 		/* Try to queue to LRO */
 		if (IF_LRO_ENABLED(sc) &&
 		    (cqe->u0.s.ip_cksum_pass) &&
 		    (cqe->u0.s.l4_cksum_pass) &&
 		    (!cqe->u0.s.ip_ver)       &&
 		    (rq->lro.lro_cnt != 0)) {
 
 			if (tcp_lro_rx(&rq->lro, m, 0) == 0) {
 				rq->lro_pkts_queued ++;		
 				goto post_done;
 			}
 			/* If LRO posting fails then try to post to STACK */
 		}
 #endif
 	
 		(*sc->ifp->if_input) (sc->ifp, m);
 #if defined(INET6) || defined(INET)
 post_done:
 #endif
 		/* Update rx stats per queue */
 		rq->rx_stats.rx_pkts++;
 		rq->rx_stats.rx_bytes += cqe->u0.s.pkt_size;
 		rq->rx_stats.rx_frags += cqe->u0.s.num_fragments;
 		if (cqe->u0.s.pkt_type == OCE_MULTICAST_PACKET)
 			rq->rx_stats.rx_mcast_pkts++;
 		if (cqe->u0.s.pkt_type == OCE_UNICAST_PACKET)
 			rq->rx_stats.rx_ucast_pkts++;
 	}
 exit:
 	return;
 }
 
 
 void
 oce_discard_rx_comp(struct oce_rq *rq, int num_frags)
 {
 	uint32_t i = 0;
 	struct oce_packet_desc *pd;
 	POCE_SOFTC sc = (POCE_SOFTC) rq->parent;
 
 	for (i = 0; i < num_frags; i++) {
                 if (rq->ring->cidx == rq->ring->pidx) {
                         device_printf(sc->dev,
                                 "oce_discard_rx_comp: Invalid RX completion - Queue is empty\n");
                         return;
                 }
                 pd = &rq->pckts[rq->ring->cidx];
                 bus_dmamap_sync(rq->tag, pd->map, BUS_DMASYNC_POSTWRITE);
                 bus_dmamap_unload(rq->tag, pd->map);
                 if (pd->mbuf != NULL) {
                         m_freem(pd->mbuf);
                         pd->mbuf = NULL;
                 }
 
 		RING_GET(rq->ring, 1);
                 rq->pending--;
 	}
 }
 
 
 static int
 oce_cqe_vtp_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe)
 {
 	struct oce_nic_rx_cqe_v1 *cqe_v1;
 	int vtp = 0;
 
 	if (sc->be3_native) {
 		cqe_v1 = (struct oce_nic_rx_cqe_v1 *)cqe;
 		vtp =  cqe_v1->u0.s.vlan_tag_present; 
 	} else
 		vtp = cqe->u0.s.vlan_tag_present;
 	
 	return vtp;
 
 }
 
 
 static int
 oce_cqe_portid_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe)
 {
 	struct oce_nic_rx_cqe_v1 *cqe_v1;
 	int port_id = 0;
 
 	if (sc->be3_native && (IS_BE(sc) || IS_SH(sc))) {
 		cqe_v1 = (struct oce_nic_rx_cqe_v1 *)cqe;
 		port_id =  cqe_v1->u0.s.port;
 		if (sc->port_id != port_id)
 			return 0;
 	} else
 		;/* For BE3 legacy and Lancer this is dummy */
 	
 	return 1;
 
 }
 
 #if defined(INET6) || defined(INET)
 void
 oce_rx_flush_lro(struct oce_rq *rq)
 {
 	struct lro_ctrl	*lro = &rq->lro;
 	POCE_SOFTC sc = (POCE_SOFTC) rq->parent;
 
 	if (!IF_LRO_ENABLED(sc))
 		return;
 
 	tcp_lro_flush_all(lro);
 	rq->lro_pkts_queued = 0;
 	
 	return;
 }
 
 
 static int
 oce_init_lro(POCE_SOFTC sc)
 {
 	struct lro_ctrl *lro = NULL;
 	int i = 0, rc = 0;
 
 	for (i = 0; i < sc->nrqs; i++) { 
 		lro = &sc->rq[i]->lro;
 		rc = tcp_lro_init(lro);
 		if (rc != 0) {
 			device_printf(sc->dev, "LRO init failed\n");
 			return rc;		
 		}
 		lro->ifp = sc->ifp;
 	}
 
 	return rc;		
 }
 
 
 void
 oce_free_lro(POCE_SOFTC sc)
 {
 	struct lro_ctrl *lro = NULL;
 	int i = 0;
 
 	for (i = 0; i < sc->nrqs; i++) {
 		lro = &sc->rq[i]->lro;
 		if (lro)
 			tcp_lro_free(lro);
 	}
 }
 #endif
 
 int
 oce_alloc_rx_bufs(struct oce_rq *rq, int count)
 {
 	POCE_SOFTC sc = (POCE_SOFTC) rq->parent;
 	int i, in, rc;
 	struct oce_packet_desc *pd;
 	bus_dma_segment_t segs[6];
 	int nsegs, added = 0;
 	struct oce_nic_rqe *rqe;
 	pd_rxulp_db_t rxdb_reg;
 	uint32_t val = 0;
 	uint32_t oce_max_rq_posts = 64;
 
 	bzero(&rxdb_reg, sizeof(pd_rxulp_db_t));
 	for (i = 0; i < count; i++) {
 		in = (rq->ring->pidx + 1) % OCE_RQ_PACKET_ARRAY_SIZE;
 
 		pd = &rq->pckts[rq->ring->pidx];
 		pd->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, oce_rq_buf_size);
 		if (pd->mbuf == NULL) {
 			device_printf(sc->dev, "mbuf allocation failed, size = %d\n",oce_rq_buf_size);
 			break;
 		}
 		pd->mbuf->m_nextpkt = NULL;
 
 		pd->mbuf->m_len = pd->mbuf->m_pkthdr.len = rq->cfg.frag_size;
 
 		rc = bus_dmamap_load_mbuf_sg(rq->tag,
 					     pd->map,
 					     pd->mbuf,
 					     segs, &nsegs, BUS_DMA_NOWAIT);
 		if (rc) {
 			m_free(pd->mbuf);
 			device_printf(sc->dev, "bus_dmamap_load_mbuf_sg failed rc = %d\n", rc);
 			break;
 		}
 
 		if (nsegs != 1) {
 			i--;
 			continue;
 		}
 
 		bus_dmamap_sync(rq->tag, pd->map, BUS_DMASYNC_PREREAD);
 
 		rqe = RING_GET_PRODUCER_ITEM_VA(rq->ring, struct oce_nic_rqe);
 		rqe->u0.s.frag_pa_hi = ADDR_HI(segs[0].ds_addr);
 		rqe->u0.s.frag_pa_lo = ADDR_LO(segs[0].ds_addr);
 		DW_SWAP(u32ptr(rqe), sizeof(struct oce_nic_rqe));
 		RING_PUT(rq->ring, 1);
 		added++;
 		rq->pending++;
 	}
 	oce_max_rq_posts = sc->enable_hwlro ? OCE_HWLRO_MAX_RQ_POSTS : OCE_MAX_RQ_POSTS;
 	if (added != 0) {
 		for (i = added / oce_max_rq_posts; i > 0; i--) {
 			rxdb_reg.bits.num_posted = oce_max_rq_posts;
 			rxdb_reg.bits.qid = rq->rq_id;
 			if(rq->islro) {
                                 val |= rq->rq_id & DB_LRO_RQ_ID_MASK;
                                 val |= oce_max_rq_posts << 16;
                                 OCE_WRITE_REG32(sc, db, DB_OFFSET, val);
 			}else {
 				OCE_WRITE_REG32(sc, db, PD_RXULP_DB, rxdb_reg.dw0);
 			}
 			added -= oce_max_rq_posts;
 		}
 		if (added > 0) {
 			rxdb_reg.bits.qid = rq->rq_id;
 			rxdb_reg.bits.num_posted = added;
 			if(rq->islro) {
                                 val |= rq->rq_id & DB_LRO_RQ_ID_MASK;
                                 val |= added << 16;
                                 OCE_WRITE_REG32(sc, db, DB_OFFSET, val);
 			}else {
 				OCE_WRITE_REG32(sc, db, PD_RXULP_DB, rxdb_reg.dw0);
 			}
 		}
 	}
 	
 	return 0;	
 }
 
 static void
 oce_check_rx_bufs(POCE_SOFTC sc, uint32_t num_cqes, struct oce_rq *rq)
 {
         if (num_cqes) {
                 oce_arm_cq(sc, rq->cq->cq_id, num_cqes, FALSE);
 		if(!sc->enable_hwlro) {
 			if((OCE_RQ_PACKET_ARRAY_SIZE - rq->pending) > 1)
 				oce_alloc_rx_bufs(rq, ((OCE_RQ_PACKET_ARRAY_SIZE - rq->pending) - 1));
 		}else {
                 	if ((OCE_RQ_PACKET_ARRAY_SIZE -1 - rq->pending) > 64)
                         	oce_alloc_rx_bufs(rq, 64);
         	}
 	}
 
         return;
 }
 
 uint16_t
 oce_rq_handler_lro(void *arg)
 {
         struct oce_rq *rq = (struct oce_rq *)arg;
         struct oce_cq *cq = rq->cq;
         POCE_SOFTC sc = rq->parent;
         struct nic_hwlro_singleton_cqe *cqe;
         struct nic_hwlro_cqe_part2 *cqe2;
         int num_cqes = 0;
 
 	LOCK(&rq->rx_lock);
         bus_dmamap_sync(cq->ring->dma.tag,cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
         cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct nic_hwlro_singleton_cqe);
         while (cqe->valid) {
                 if(cqe->cqe_type == 0) { /* singleton cqe */
 			/* we should not get singleton cqe after cqe1 on same rq */
 			if(rq->cqe_firstpart != NULL) {
 				device_printf(sc->dev, "Got singleton cqe after cqe1 \n");
 				goto exit_rq_handler_lro;
 			}							
                         if(cqe->error != 0) {
                                 rq->rx_stats.rxcp_err++;
 				if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1);
                         }
                         oce_rx_lro(rq, cqe, NULL);
                         rq->rx_stats.rx_compl++;
                         cqe->valid = 0;
                         RING_GET(cq->ring, 1);
                         num_cqes++;
                         if (num_cqes >= (IS_XE201(sc) ? 8 : oce_max_rsp_handled))
                                 break;
                 }else if(cqe->cqe_type == 0x1) { /* first part */
 			/* we should not get cqe1 after cqe1 on same rq */
 			if(rq->cqe_firstpart != NULL) {
 				device_printf(sc->dev, "Got cqe1 after cqe1 \n");
 				goto exit_rq_handler_lro;
 			}
 			rq->cqe_firstpart = (struct nic_hwlro_cqe_part1 *)cqe;
                         RING_GET(cq->ring, 1);
                 }else if(cqe->cqe_type == 0x2) { /* second part */
 			cqe2 = (struct nic_hwlro_cqe_part2 *)cqe;
                         if(cqe2->error != 0) {
                                 rq->rx_stats.rxcp_err++;
 				if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1);
                         }
 			/* We should not get cqe2 without cqe1 */
 			if(rq->cqe_firstpart == NULL) {
 				device_printf(sc->dev, "Got cqe2 without cqe1 \n");
 				goto exit_rq_handler_lro;
 			}
                         oce_rx_lro(rq, (struct nic_hwlro_singleton_cqe *)rq->cqe_firstpart, cqe2);
 
                         rq->rx_stats.rx_compl++;
                         rq->cqe_firstpart->valid = 0;
                         cqe2->valid = 0;
 			rq->cqe_firstpart = NULL;
 
                         RING_GET(cq->ring, 1);
                         num_cqes += 2;
                         if (num_cqes >= (IS_XE201(sc) ? 8 : oce_max_rsp_handled))
                                 break;
 		}
 
                 bus_dmamap_sync(cq->ring->dma.tag,cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
                 cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct nic_hwlro_singleton_cqe);
         }
 	oce_check_rx_bufs(sc, num_cqes, rq);
 exit_rq_handler_lro:
 	UNLOCK(&rq->rx_lock);
 	return 0;
 }
 
 /* Handle the Completion Queue for receive */
 uint16_t
 oce_rq_handler(void *arg)
 {
 	struct oce_rq *rq = (struct oce_rq *)arg;
 	struct oce_cq *cq = rq->cq;
 	POCE_SOFTC sc = rq->parent;
 	struct oce_nic_rx_cqe *cqe;
 	int num_cqes = 0;
 
 	if(rq->islro) {
 		oce_rq_handler_lro(arg);
 		return 0;
 	}
 	LOCK(&rq->rx_lock);
 	bus_dmamap_sync(cq->ring->dma.tag,
 			cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
 	cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_rx_cqe);
 	while (cqe->u0.dw[2]) {
 		DW_SWAP((uint32_t *) cqe, sizeof(oce_rq_cqe));
 
 		if (cqe->u0.s.error == 0) {
 			oce_rx(rq, cqe);
 		} else {
 			rq->rx_stats.rxcp_err++;
 			if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1);
 			/* Post L3/L4 errors to stack.*/
 			oce_rx(rq, cqe);
 		}
 		rq->rx_stats.rx_compl++;
 		cqe->u0.dw[2] = 0;
 
 #if defined(INET6) || defined(INET)
 		if (IF_LRO_ENABLED(sc) && rq->lro_pkts_queued >= 16) {
 			oce_rx_flush_lro(rq);
 		}
 #endif
 
 		RING_GET(cq->ring, 1);
 		bus_dmamap_sync(cq->ring->dma.tag,
 				cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
 		cqe =
 		    RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_rx_cqe);
 		num_cqes++;
 		if (num_cqes >= (IS_XE201(sc) ? 8 : oce_max_rsp_handled))
 			break;
 	}
 
 #if defined(INET6) || defined(INET)
         if (IF_LRO_ENABLED(sc))
                 oce_rx_flush_lro(rq);
 #endif
 
 	oce_check_rx_bufs(sc, num_cqes, rq);
 	UNLOCK(&rq->rx_lock);
 	return 0;
 
 }
 
 
 
 
 /*****************************************************************************
  *		   Helper function prototypes in this file 		     *
  *****************************************************************************/
 
 static int 
 oce_attach_ifp(POCE_SOFTC sc)
 {
 
 	sc->ifp = if_alloc(IFT_ETHER);
 	if (!sc->ifp)
 		return ENOMEM;
 
 	ifmedia_init(&sc->media, IFM_IMASK, oce_media_change, oce_media_status);
 	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
 
 	sc->ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
 	sc->ifp->if_ioctl = oce_ioctl;
 	sc->ifp->if_start = oce_start;
 	sc->ifp->if_init = oce_init;
 	sc->ifp->if_mtu = ETHERMTU;
 	sc->ifp->if_softc = sc;
 #if __FreeBSD_version >= 800000
 	sc->ifp->if_transmit = oce_multiq_start;
 	sc->ifp->if_qflush = oce_multiq_flush;
 #endif
 
 	if_initname(sc->ifp,
 		    device_get_name(sc->dev), device_get_unit(sc->dev));
 
 	sc->ifp->if_snd.ifq_drv_maxlen = OCE_MAX_TX_DESC - 1;
 	IFQ_SET_MAXLEN(&sc->ifp->if_snd, sc->ifp->if_snd.ifq_drv_maxlen);
 	IFQ_SET_READY(&sc->ifp->if_snd);
 
 	sc->ifp->if_hwassist = OCE_IF_HWASSIST;
 	sc->ifp->if_hwassist |= CSUM_TSO;
 	sc->ifp->if_hwassist |= (CSUM_IP | CSUM_TCP | CSUM_UDP);
 
 	sc->ifp->if_capabilities = OCE_IF_CAPABILITIES;
 	sc->ifp->if_capabilities |= IFCAP_HWCSUM;
 	sc->ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
 
 #if defined(INET6) || defined(INET)
 	sc->ifp->if_capabilities |= IFCAP_TSO;
 	sc->ifp->if_capabilities |= IFCAP_LRO;
 	sc->ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
 #endif
 	
 	sc->ifp->if_capenable = sc->ifp->if_capabilities;
 	sc->ifp->if_baudrate = IF_Gbps(10);
 
 #if __FreeBSD_version >= 1000000
 	sc->ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
 	sc->ifp->if_hw_tsomaxsegcount = OCE_MAX_TX_ELEMENTS;
 	sc->ifp->if_hw_tsomaxsegsize = 4096;
 #endif
 
 	ether_ifattach(sc->ifp, sc->macaddr.mac_addr);
 	
 	return 0;
 }
 
 
 static void
 oce_add_vlan(void *arg, struct ifnet *ifp, uint16_t vtag)
 {
 	POCE_SOFTC sc = ifp->if_softc;
 
 	if (ifp->if_softc !=  arg)
 		return;
 	if ((vtag == 0) || (vtag > 4095))
 		return;
 
 	sc->vlan_tag[vtag] = 1;
 	sc->vlans_added++;
 	if (sc->vlans_added <= (sc->max_vlans + 1))
 		oce_vid_config(sc);
 }
 
 
 static void
 oce_del_vlan(void *arg, struct ifnet *ifp, uint16_t vtag)
 {
 	POCE_SOFTC sc = ifp->if_softc;
 
 	if (ifp->if_softc !=  arg)
 		return;
 	if ((vtag == 0) || (vtag > 4095))
 		return;
 
 	sc->vlan_tag[vtag] = 0;
 	sc->vlans_added--;
 	oce_vid_config(sc);
 }
 
 
 /*
  * A max of 64 vlans can be configured in BE. If the user configures
  * more, place the card in vlan promiscuous mode.
  */
 static int
 oce_vid_config(POCE_SOFTC sc)
 {
 	struct normal_vlan vtags[MAX_VLANFILTER_SIZE];
 	uint16_t ntags = 0, i;
 	int status = 0;
 
 	if ((sc->vlans_added <= MAX_VLANFILTER_SIZE) && 
 			(sc->ifp->if_capenable & IFCAP_VLAN_HWFILTER)) {
 		for (i = 0; i < MAX_VLANS; i++) {
 			if (sc->vlan_tag[i]) {
 				vtags[ntags].vtag = i;
 				ntags++;
 			}
 		}
 		if (ntags)
 			status = oce_config_vlan(sc, (uint8_t) sc->if_id,
 						vtags, ntags, 1, 0); 
 	} else 
 		status = oce_config_vlan(sc, (uint8_t) sc->if_id,
 					 	NULL, 0, 1, 1);
 	return status;
 }
 
 
 static void
 oce_mac_addr_set(POCE_SOFTC sc)
 {
 	uint32_t old_pmac_id = sc->pmac_id;
 	int status = 0;
 
 	
 	status = bcmp((IF_LLADDR(sc->ifp)), sc->macaddr.mac_addr,
 			 sc->macaddr.size_of_struct);
 	if (!status)
 		return;
 
 	status = oce_mbox_macaddr_add(sc, (uint8_t *)(IF_LLADDR(sc->ifp)),
 					sc->if_id, &sc->pmac_id);
 	if (!status) {
 		status = oce_mbox_macaddr_del(sc, sc->if_id, old_pmac_id);
 		bcopy((IF_LLADDR(sc->ifp)), sc->macaddr.mac_addr,
 				 sc->macaddr.size_of_struct); 
 	}
 	if (status)
 		device_printf(sc->dev, "Failed update macaddress\n");
 
 }
 
 
 static int
 oce_handle_passthrough(struct ifnet *ifp, caddr_t data)
 {
 	POCE_SOFTC sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
 	int rc = ENXIO;
 	char cookie[32] = {0};
 	void *priv_data = (void *)ifr->ifr_data;
 	void *ioctl_ptr;
 	uint32_t req_size;
 	struct mbx_hdr req;
 	OCE_DMA_MEM dma_mem;
 	struct mbx_common_get_cntl_attr *fw_cmd;
 
 	if (copyin(priv_data, cookie, strlen(IOCTL_COOKIE)))
 		return EFAULT;
 
 	if (memcmp(cookie, IOCTL_COOKIE, strlen(IOCTL_COOKIE)))
 		return EINVAL;
 
 	ioctl_ptr = (char *)priv_data + strlen(IOCTL_COOKIE);
 	if (copyin(ioctl_ptr, &req, sizeof(struct mbx_hdr)))
 		return EFAULT;
 
 	req_size = le32toh(req.u0.req.request_length);
 	if (req_size > 65536)
 		return EINVAL;
 
 	req_size += sizeof(struct mbx_hdr);
 	rc = oce_dma_alloc(sc, req_size, &dma_mem, 0);
 	if (rc)
 		return ENOMEM;
 
 	if (copyin(ioctl_ptr, OCE_DMAPTR(&dma_mem,char), req_size)) {
 		rc = EFAULT;
 		goto dma_free;
 	}
 
 	rc = oce_pass_through_mbox(sc, &dma_mem, req_size);
 	if (rc) {
 		rc = EIO;
 		goto dma_free;
 	}
 
 	if (copyout(OCE_DMAPTR(&dma_mem,char), ioctl_ptr, req_size))
 		rc =  EFAULT;
 
 	/* 
 	   firmware is filling all the attributes for this ioctl except
 	   the driver version..so fill it 
 	 */
 	if(req.u0.rsp.opcode == OPCODE_COMMON_GET_CNTL_ATTRIBUTES) {
 		fw_cmd = (struct mbx_common_get_cntl_attr *) ioctl_ptr;
 		strncpy(fw_cmd->params.rsp.cntl_attr_info.hba_attr.drv_ver_str,
 			COMPONENT_REVISION, strlen(COMPONENT_REVISION));	
 	}
 
 dma_free:
 	oce_dma_free(sc, &dma_mem);
 	return rc;
 
 }
 
 static void
 oce_eqd_set_periodic(POCE_SOFTC sc)
 {
 	struct oce_set_eqd set_eqd[OCE_MAX_EQ];
 	struct oce_aic_obj *aic;
 	struct oce_eq *eqo;
 	uint64_t now = 0, delta;
 	int eqd, i, num = 0;
 	uint32_t tx_reqs = 0, rxpkts = 0, pps;
 	struct oce_wq *wq;
 	struct oce_rq *rq;
 
 	#define ticks_to_msecs(t)       (1000 * (t) / hz)
 
 	for (i = 0 ; i < sc->neqs; i++) {
 		eqo = sc->eq[i];
 		aic = &sc->aic_obj[i];
 		/* When setting the static eq delay from the user space */
 		if (!aic->enable) {
 			if (aic->ticks)
 				aic->ticks = 0;
 			eqd = aic->et_eqd;
 			goto modify_eqd;
 		}
 
 		rq = sc->rq[i];
 		rxpkts = rq->rx_stats.rx_pkts;
 		wq = sc->wq[i];
 		tx_reqs = wq->tx_stats.tx_reqs;
 		now = ticks;
 
 		if (!aic->ticks || now < aic->ticks ||
 		    rxpkts < aic->prev_rxpkts || tx_reqs < aic->prev_txreqs) {
 			aic->prev_rxpkts = rxpkts;
 			aic->prev_txreqs = tx_reqs;
 			aic->ticks = now;
 			continue;
 		}
 
 		delta = ticks_to_msecs(now - aic->ticks);
 
 		pps = (((uint32_t)(rxpkts - aic->prev_rxpkts) * 1000) / delta) +
 		      (((uint32_t)(tx_reqs - aic->prev_txreqs) * 1000) / delta);
 		eqd = (pps / 15000) << 2;
 		if (eqd < 8)
 			eqd = 0;
 
 		/* Make sure that the eq delay is in the known range */
 		eqd = min(eqd, aic->max_eqd);
 		eqd = max(eqd, aic->min_eqd);
 
 		aic->prev_rxpkts = rxpkts;
 		aic->prev_txreqs = tx_reqs;
 		aic->ticks = now;
 
 modify_eqd:
 		if (eqd != aic->cur_eqd) {
 			set_eqd[num].delay_multiplier = (eqd * 65)/100;
 			set_eqd[num].eq_id = eqo->eq_id;
 			aic->cur_eqd = eqd;
 			num++;
 		}
 	}
 
 	/* Is there atleast one eq that needs to be modified? */
         for(i = 0; i < num; i += 8) {
                 if((num - i) >=8 )
                         oce_mbox_eqd_modify_periodic(sc, &set_eqd[i], 8);
                 else
                         oce_mbox_eqd_modify_periodic(sc, &set_eqd[i], (num - i));
         }
 
 }
 
 static void oce_detect_hw_error(POCE_SOFTC sc)
 {
 
 	uint32_t ue_low = 0, ue_high = 0, ue_low_mask = 0, ue_high_mask = 0;
 	uint32_t sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
 	uint32_t i;
 
 	if (sc->hw_error)
 		return;
 
 	if (IS_XE201(sc)) {
 		sliport_status = OCE_READ_REG32(sc, db, SLIPORT_STATUS_OFFSET);
 		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
 			sliport_err1 = OCE_READ_REG32(sc, db, SLIPORT_ERROR1_OFFSET);
 			sliport_err2 = OCE_READ_REG32(sc, db, SLIPORT_ERROR2_OFFSET);
 		}
 	} else {
 		ue_low = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_LOW);
 		ue_high = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_HIGH);
 		ue_low_mask = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_LOW_MASK);
 		ue_high_mask = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_HI_MASK);
 
 		ue_low = (ue_low & ~ue_low_mask);
 		ue_high = (ue_high & ~ue_high_mask);
 	}
 
 	/* On certain platforms BE hardware can indicate spurious UEs.
 	 * Allow the h/w to stop working completely in case of a real UE.
 	 * Hence not setting the hw_error for UE detection.
 	 */
 	if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
 		sc->hw_error = TRUE;
 		device_printf(sc->dev, "Error detected in the card\n");
 	}
 
 	if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
 		device_printf(sc->dev,
 				"ERR: sliport status 0x%x\n", sliport_status);
 		device_printf(sc->dev,
 				"ERR: sliport error1 0x%x\n", sliport_err1);
 		device_printf(sc->dev,
 				"ERR: sliport error2 0x%x\n", sliport_err2);
 	}
 
 	if (ue_low) {
 		for (i = 0; ue_low; ue_low >>= 1, i++) {
 			if (ue_low & 1)
 				device_printf(sc->dev, "UE: %s bit set\n",
 							ue_status_low_desc[i]);
 		}
 	}
 
 	if (ue_high) {
 		for (i = 0; ue_high; ue_high >>= 1, i++) {
 			if (ue_high & 1)
 				device_printf(sc->dev, "UE: %s bit set\n",
 							ue_status_hi_desc[i]);
 		}
 	}
 
 }
 
 
 static void
 oce_local_timer(void *arg)
 {
 	POCE_SOFTC sc = arg;
 	int i = 0;
 	
 	oce_detect_hw_error(sc);
 	oce_refresh_nic_stats(sc);
 	oce_refresh_queue_stats(sc);
 	oce_mac_addr_set(sc);
 	
 	/* TX Watch Dog*/
 	for (i = 0; i < sc->nwqs; i++)
 		oce_tx_restart(sc, sc->wq[i]);
 	
 	/* calculate and set the eq delay for optimal interrupt rate */
 	if (IS_BE(sc) || IS_SH(sc))
 		oce_eqd_set_periodic(sc);
 
 	callout_reset(&sc->timer, hz, oce_local_timer, sc);
 }
 
 static void 
 oce_tx_compl_clean(POCE_SOFTC sc) 
 {
 	struct oce_wq *wq;
 	int i = 0, timeo = 0, num_wqes = 0;
 	int pending_txqs = sc->nwqs;
 
 	/* Stop polling for compls when HW has been silent for 10ms or 
 	 * hw_error or no outstanding completions expected
 	 */
 	do {
 		pending_txqs = sc->nwqs;
 		
 		for_all_wq_queues(sc, wq, i) {
 			num_wqes = oce_wq_handler(wq);
 			
 			if(num_wqes)
 				timeo = 0;
 
 			if(!wq->ring->num_used)
 				pending_txqs--;
 		}
 
 		if (pending_txqs == 0 || ++timeo > 10 || sc->hw_error)
 			break;
 
 		DELAY(1000);
 	} while (TRUE);
 
 	for_all_wq_queues(sc, wq, i) {
 		while(wq->ring->num_used) {
 			LOCK(&wq->tx_compl_lock);
 			oce_process_tx_completion(wq);
 			UNLOCK(&wq->tx_compl_lock);
 		}
 	}	
 		
 }
 
 /* NOTE : This should only be called holding
  *        DEVICE_LOCK.
  */
 static void
 oce_if_deactivate(POCE_SOFTC sc)
 {
 	int i;
 	struct oce_rq *rq;
 	struct oce_wq *wq;
 	struct oce_eq *eq;
 
 	sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
 	oce_tx_compl_clean(sc);
 
 	/* Stop intrs and finish any bottom halves pending */
 	oce_hw_intr_disable(sc);
 
 	/* Since taskqueue_drain takes a Gaint Lock, We should not acquire
 	   any other lock. So unlock device lock and require after
 	   completing taskqueue_drain.
 	*/
 	UNLOCK(&sc->dev_lock);
 	for (i = 0; i < sc->intr_count; i++) {
 		if (sc->intrs[i].tq != NULL) {
 			taskqueue_drain(sc->intrs[i].tq, &sc->intrs[i].task);
 		}
 	}
 	LOCK(&sc->dev_lock);
 
 	/* Delete RX queue in card with flush param */
 	oce_stop_rx(sc);
 
 	/* Invalidate any pending cq and eq entries*/	
 	for_all_evnt_queues(sc, eq, i)	
 		oce_drain_eq(eq);
 	for_all_rq_queues(sc, rq, i)
 		oce_drain_rq_cq(rq);
 	for_all_wq_queues(sc, wq, i)
 		oce_drain_wq_cq(wq);
 
 	/* But still we need to get MCC aync events.
 	   So enable intrs and also arm first EQ
 	*/
 	oce_hw_intr_enable(sc);
 	oce_arm_eq(sc, sc->eq[0]->eq_id, 0, TRUE, FALSE);
 
 	DELAY(10);
 }
 
 
 static void
 oce_if_activate(POCE_SOFTC sc)
 {
 	struct oce_eq *eq;
 	struct oce_rq *rq;
 	struct oce_wq *wq;
 	int i, rc = 0;
 
 	sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 
 	
 	oce_hw_intr_disable(sc);
 	
 	oce_start_rx(sc);
 
 	for_all_rq_queues(sc, rq, i) {
 		rc = oce_start_rq(rq);
 		if (rc)
 			device_printf(sc->dev, "Unable to start RX\n");
 	}
 
 	for_all_wq_queues(sc, wq, i) {
 		rc = oce_start_wq(wq);
 		if (rc)
 			device_printf(sc->dev, "Unable to start TX\n");
 	}
 
 	
 	for_all_evnt_queues(sc, eq, i)
 		oce_arm_eq(sc, eq->eq_id, 0, TRUE, FALSE);
 
 	oce_hw_intr_enable(sc);
 
 }
 
 static void
 process_link_state(POCE_SOFTC sc, struct oce_async_cqe_link_state *acqe)
 {
 	/* Update Link status */
 	if ((acqe->u0.s.link_status & ~ASYNC_EVENT_LOGICAL) ==
 	     ASYNC_EVENT_LINK_UP) {
 		sc->link_status = ASYNC_EVENT_LINK_UP;
 		if_link_state_change(sc->ifp, LINK_STATE_UP);
 	} else {
 		sc->link_status = ASYNC_EVENT_LINK_DOWN;
 		if_link_state_change(sc->ifp, LINK_STATE_DOWN);
 	}
 }
 
 
 static void oce_async_grp5_osbmc_process(POCE_SOFTC sc,
 					 struct oce_async_evt_grp5_os2bmc *evt)
 {
 	DW_SWAP(evt, sizeof(struct oce_async_evt_grp5_os2bmc));
 	if (evt->u.s.mgmt_enable)
 		sc->flags |= OCE_FLAGS_OS2BMC;
 	else
 		return;
 
 	sc->bmc_filt_mask = evt->u.s.arp_filter;
 	sc->bmc_filt_mask |= (evt->u.s.dhcp_client_filt << 1);
 	sc->bmc_filt_mask |= (evt->u.s.dhcp_server_filt << 2);
 	sc->bmc_filt_mask |= (evt->u.s.net_bios_filt << 3);
 	sc->bmc_filt_mask |= (evt->u.s.bcast_filt << 4);
 	sc->bmc_filt_mask |= (evt->u.s.ipv6_nbr_filt << 5);
 	sc->bmc_filt_mask |= (evt->u.s.ipv6_ra_filt << 6);
 	sc->bmc_filt_mask |= (evt->u.s.ipv6_ras_filt << 7);
 	sc->bmc_filt_mask |= (evt->u.s.mcast_filt << 8);
 }
 
 
 static void oce_process_grp5_events(POCE_SOFTC sc, struct oce_mq_cqe *cqe)
 {
 	struct oce_async_event_grp5_pvid_state *gcqe;
 	struct oce_async_evt_grp5_os2bmc *bmccqe;
 
 	switch (cqe->u0.s.async_type) {
 	case ASYNC_EVENT_PVID_STATE:
 		/* GRP5 PVID */
 		gcqe = (struct oce_async_event_grp5_pvid_state *)cqe;
 		if (gcqe->enabled)
 			sc->pvid = gcqe->tag & VLAN_VID_MASK;
 		else
 			sc->pvid = 0;
 		break;
 	case ASYNC_EVENT_OS2BMC:
 		bmccqe = (struct oce_async_evt_grp5_os2bmc *)cqe;
 		oce_async_grp5_osbmc_process(sc, bmccqe);
 		break;
 	default:
 		break;
 	}
 }
 
 /* Handle the Completion Queue for the Mailbox/Async notifications */
 uint16_t
 oce_mq_handler(void *arg)
 {
 	struct oce_mq *mq = (struct oce_mq *)arg;
 	POCE_SOFTC sc = mq->parent;
 	struct oce_cq *cq = mq->cq;
 	int num_cqes = 0, evt_type = 0, optype = 0;
 	struct oce_mq_cqe *cqe;
 	struct oce_async_cqe_link_state *acqe;
 	struct oce_async_event_qnq *dbgcqe;
 
 
 	bus_dmamap_sync(cq->ring->dma.tag,
 			cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
 	cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_mq_cqe);
 
 	while (cqe->u0.dw[3]) {
 		DW_SWAP((uint32_t *) cqe, sizeof(oce_mq_cqe));
 		if (cqe->u0.s.async_event) {
 			evt_type = cqe->u0.s.event_type;
 			optype = cqe->u0.s.async_type;
 			if (evt_type  == ASYNC_EVENT_CODE_LINK_STATE) {
 				/* Link status evt */
 				acqe = (struct oce_async_cqe_link_state *)cqe;
 				process_link_state(sc, acqe);
 			} else if (evt_type == ASYNC_EVENT_GRP5) {
 				oce_process_grp5_events(sc, cqe);
 			} else if (evt_type == ASYNC_EVENT_CODE_DEBUG &&
 					optype == ASYNC_EVENT_DEBUG_QNQ) {
 				dbgcqe =  (struct oce_async_event_qnq *)cqe;
 				if(dbgcqe->valid)
 					sc->qnqid = dbgcqe->vlan_tag;
 				sc->qnq_debug_event = TRUE;
 			}
 		}
 		cqe->u0.dw[3] = 0;
 		RING_GET(cq->ring, 1);
 		bus_dmamap_sync(cq->ring->dma.tag,
 				cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
 		cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_mq_cqe);
 		num_cqes++;
 	}
 
 	if (num_cqes)
 		oce_arm_cq(sc, cq->cq_id, num_cqes, FALSE);
 
 	return 0;
 }
 
 
 static void
 setup_max_queues_want(POCE_SOFTC sc)
 {
 	/* Check if it is FLEX machine. Is so dont use RSS */	
 	if ((sc->function_mode & FNM_FLEX10_MODE) ||
 	    (sc->function_mode & FNM_UMC_MODE)    ||
 	    (sc->function_mode & FNM_VNIC_MODE)	  ||
 	    (!is_rss_enabled(sc))		  ||
 	    IS_BE2(sc)) {
 		sc->nrqs = 1;
 		sc->nwqs = 1;
 	} else {
 		sc->nrqs = MIN(OCE_NCPUS, sc->nrssqs) + 1;
 		sc->nwqs = MIN(OCE_NCPUS, sc->nrssqs);
 	}
 
 	if (IS_BE2(sc) && is_rss_enabled(sc))
 		sc->nrqs = MIN(OCE_NCPUS, sc->nrssqs) + 1;
 }
 
 
 static void
 update_queues_got(POCE_SOFTC sc)
 {
 	if (is_rss_enabled(sc)) {
 		sc->nrqs = sc->intr_count + 1;
 		sc->nwqs = sc->intr_count;
 	} else {
 		sc->nrqs = 1;
 		sc->nwqs = 1;
 	}
 
 	if (IS_BE2(sc))
 		sc->nwqs = 1;
 }
 
 static int 
 oce_check_ipv6_ext_hdr(struct mbuf *m)
 {
 	struct ether_header *eh = mtod(m, struct ether_header *);
 	caddr_t m_datatemp = m->m_data;
 
 	if (eh->ether_type == htons(ETHERTYPE_IPV6)) {
 		m->m_data += sizeof(struct ether_header);
 		struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 
 		if((ip6->ip6_nxt != IPPROTO_TCP) && \
 				(ip6->ip6_nxt != IPPROTO_UDP)){
 			struct ip6_ext *ip6e = NULL;
 			m->m_data += sizeof(struct ip6_hdr);
 
 			ip6e = (struct ip6_ext *) mtod(m, struct ip6_ext *);
 			if(ip6e->ip6e_len == 0xff) {
 				m->m_data = m_datatemp;
 				return TRUE;
 			}
 		} 
 		m->m_data = m_datatemp;
 	}
 	return FALSE;
 }
 
 static int 
 is_be3_a1(POCE_SOFTC sc)
 {
 	if((sc->flags & OCE_FLAGS_BE3)  && ((sc->asic_revision & 0xFF) < 2)) {
 		return TRUE;
 	}
 	return FALSE;
 }
 
 static struct mbuf *
 oce_insert_vlan_tag(POCE_SOFTC sc, struct mbuf *m, boolean_t *complete)
 {
 	uint16_t vlan_tag = 0;
 
 	if(!M_WRITABLE(m))
 		return NULL;
 
 	/* Embed vlan tag in the packet if it is not part of it */
 	if(m->m_flags & M_VLANTAG) {
 		vlan_tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
 		m->m_flags &= ~M_VLANTAG;
 	}
 
 	/* if UMC, ignore vlan tag insertion and instead insert pvid */
 	if(sc->pvid) {
 		if(!vlan_tag)
 			vlan_tag = sc->pvid;
 		if (complete)
 			*complete = FALSE;
 	}
 
 	if(vlan_tag) {
 		m = ether_vlanencap(m, vlan_tag);
 	}
 
 	if(sc->qnqid) {
 		m = ether_vlanencap(m, sc->qnqid);
 
 		if (complete)
 			*complete = FALSE;
 	}
 	return m;
 }
 
 static int 
 oce_tx_asic_stall_verify(POCE_SOFTC sc, struct mbuf *m)
 {
 	if(is_be3_a1(sc) && IS_QNQ_OR_UMC(sc) && \
 			oce_check_ipv6_ext_hdr(m)) {
 		return TRUE;
 	}
 	return FALSE;
 }
 
 static void
 oce_get_config(POCE_SOFTC sc)
 {
 	int rc = 0;
 	uint32_t max_rss = 0;
 
 	if ((IS_BE(sc) || IS_SH(sc)) && (!sc->be3_native))
 		max_rss = OCE_LEGACY_MODE_RSS;
 	else
 		max_rss = OCE_MAX_RSS;
 
 	if (!IS_BE(sc)) {
 		rc = oce_get_profile_config(sc, max_rss);
 		if (rc) {
 			sc->nwqs = OCE_MAX_WQ;
 			sc->nrssqs = max_rss;
 			sc->nrqs = sc->nrssqs + 1;
 		}
 	}
 	else { /* For BE3 don't rely on fw for determining the resources */
 		sc->nrssqs = max_rss;
 		sc->nrqs = sc->nrssqs + 1;
 		sc->nwqs = OCE_MAX_WQ;
 		sc->max_vlans = MAX_VLANFILTER_SIZE; 
 	}
 }
 
 static void
 oce_rdma_close(void)
 {
   if (oce_rdma_if != NULL) {
     oce_rdma_if = NULL;
   }
 }
 
 static void
 oce_get_mac_addr(POCE_SOFTC sc, uint8_t *macaddr)
 {
   memcpy(macaddr, sc->macaddr.mac_addr, 6);
 }
 
 int
 oce_register_rdma(POCE_RDMA_INFO rdma_info, POCE_RDMA_IF rdma_if)
 {
   POCE_SOFTC sc;
   struct oce_dev_info di;
   int i;
 
   if ((rdma_info == NULL) || (rdma_if == NULL)) {
     return -EINVAL;
   }
 
   if ((rdma_info->size != OCE_RDMA_INFO_SIZE) ||
       (rdma_if->size != OCE_RDMA_IF_SIZE)) {
     return -ENXIO;
   }
 
   rdma_info->close = oce_rdma_close;
   rdma_info->mbox_post = oce_mbox_post;
   rdma_info->common_req_hdr_init = mbx_common_req_hdr_init;
   rdma_info->get_mac_addr = oce_get_mac_addr;
 
   oce_rdma_if = rdma_if;
 
   sc = softc_head;
   while (sc != NULL) {
     if (oce_rdma_if->announce != NULL) {
       memset(&di, 0, sizeof(di));
       di.dev = sc->dev;
       di.softc = sc;
       di.ifp = sc->ifp;
       di.db_bhandle = sc->db_bhandle;
       di.db_btag = sc->db_btag;
       di.db_page_size = 4096;
       if (sc->flags & OCE_FLAGS_USING_MSIX) {
         di.intr_mode = OCE_INTERRUPT_MODE_MSIX;
       } else if (sc->flags & OCE_FLAGS_USING_MSI) {
         di.intr_mode = OCE_INTERRUPT_MODE_MSI;
       } else {
         di.intr_mode = OCE_INTERRUPT_MODE_INTX;
       }
       di.dev_family = OCE_GEN2_FAMILY; // fixme: must detect skyhawk
       if (di.intr_mode != OCE_INTERRUPT_MODE_INTX) {
         di.msix.num_vectors = sc->intr_count + sc->roce_intr_count;
         di.msix.start_vector = sc->intr_count;
         for (i=0; i<di.msix.num_vectors; i++) {
           di.msix.vector_list[i] = sc->intrs[i].vector;
         }
       } else {
       }
       memcpy(di.mac_addr, sc->macaddr.mac_addr, 6);
       di.vendor_id = pci_get_vendor(sc->dev);
       di.dev_id = pci_get_device(sc->dev);
 
       if (sc->rdma_flags & OCE_RDMA_FLAG_SUPPORTED) {
           di.flags  |= OCE_RDMA_INFO_RDMA_SUPPORTED;
       }
 
       rdma_if->announce(&di);
       sc = sc->next;
     }
   }
 
   return 0;
 }
 
 static void
 oce_read_env_variables( POCE_SOFTC sc )
 {
 	char *value = NULL;
 	int rc = 0;
 
         /* read if user wants to enable hwlro or swlro */
         //value = getenv("oce_enable_hwlro");
         if(value && IS_SH(sc)) {
                 sc->enable_hwlro = strtol(value, NULL, 10);
                 if(sc->enable_hwlro) {
                         rc = oce_mbox_nic_query_lro_capabilities(sc, NULL, NULL);
                         if(rc) {
                                 device_printf(sc->dev, "no hardware lro support\n");
                 		device_printf(sc->dev, "software lro enabled\n");
                                 sc->enable_hwlro = 0;
                         }else {
                                 device_printf(sc->dev, "hardware lro enabled\n");
 				oce_max_rsp_handled = 32;
                         }
                 }else {
                         device_printf(sc->dev, "software lro enabled\n");
                 }
         }else {
                 sc->enable_hwlro = 0;
         }
 
         /* read mbuf size */
         //value = getenv("oce_rq_buf_size");
         if(value && IS_SH(sc)) {
                 oce_rq_buf_size = strtol(value, NULL, 10);
                 switch(oce_rq_buf_size) {
                 case 2048:
                 case 4096:
                 case 9216:
                 case 16384:
                         break;
 
                 default:
                         device_printf(sc->dev, " Supported oce_rq_buf_size values are 2K, 4K, 9K, 16K \n");
                         oce_rq_buf_size = 2048;
                 }
         }
 
 	return;
 }
Index: projects/hps_head/sys/dev/twa/tw_osl_freebsd.c
===================================================================
--- projects/hps_head/sys/dev/twa/tw_osl_freebsd.c	(revision 309217)
+++ projects/hps_head/sys/dev/twa/tw_osl_freebsd.c	(revision 309218)
@@ -1,1712 +1,1712 @@
 /*
  * Copyright (c) 2004-07 Applied Micro Circuits Corporation.
  * Copyright (c) 2004-05 Vinod Kashyap.
  * Copyright (c) 2000 Michael Smith
  * Copyright (c) 2000 BSDi
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * AMCC'S 3ware driver for 9000 series storage controllers.
  *
  * Author: Vinod Kashyap
  * Modifications by: Adam Radford
  * Modifications by: Manjunath Ranganathaiah
  */
 
 
 /*
  * FreeBSD specific functions not related to CAM, and other
  * miscellaneous functions.
  */
 
 
 #include <dev/twa/tw_osl_includes.h>
 #include <dev/twa/tw_cl_fwif.h>
 #include <dev/twa/tw_cl_ioctl.h>
 #include <dev/twa/tw_osl_ioctl.h>
 
 #ifdef TW_OSL_DEBUG
 TW_INT32	TW_DEBUG_LEVEL_FOR_OSL = TW_OSL_DEBUG;
 TW_INT32	TW_OSL_DEBUG_LEVEL_FOR_CL = TW_OSL_DEBUG;
 #endif /* TW_OSL_DEBUG */
 
 static MALLOC_DEFINE(TW_OSLI_MALLOC_CLASS, "twa_commands", "twa commands");
 
 
 static	d_open_t		twa_open;
 static	d_close_t		twa_close;
 static	d_ioctl_t		twa_ioctl;
 
 static struct cdevsw twa_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_open =	twa_open,
 	.d_close =	twa_close,
 	.d_ioctl =	twa_ioctl,
 	.d_name =	"twa",
 };
 
 static devclass_t	twa_devclass;
 
 
 /*
  * Function name:	twa_open
  * Description:		Called when the controller is opened.
  *			Simply marks the controller as open.
  *
  * Input:		dev	-- control device corresponding to the ctlr
  *			flags	-- mode of open
  *			fmt	-- device type (character/block etc.)
  *			proc	-- current process
  * Output:		None
  * Return value:	0	-- success
  *			non-zero-- failure
  */
 static TW_INT32
 twa_open(struct cdev *dev, TW_INT32 flags, TW_INT32 fmt, struct thread *proc)
 {
 	struct twa_softc	*sc = (struct twa_softc *)(dev->si_drv1);
 
 	tw_osli_dbg_dprintf(5, sc, "entered");
 	sc->open = TW_CL_TRUE;
 	return(0);
 }
 
 
 
 /*
  * Function name:	twa_close
  * Description:		Called when the controller is closed.
  *			Simply marks the controller as not open.
  *
  * Input:		dev	-- control device corresponding to the ctlr
  *			flags	-- mode of corresponding open
  *			fmt	-- device type (character/block etc.)
  *			proc	-- current process
  * Output:		None
  * Return value:	0	-- success
  *			non-zero-- failure
  */
 static TW_INT32
 twa_close(struct cdev *dev, TW_INT32 flags, TW_INT32 fmt, struct thread *proc)
 {
 	struct twa_softc	*sc = (struct twa_softc *)(dev->si_drv1);
 
 	tw_osli_dbg_dprintf(5, sc, "entered");
 	sc->open = TW_CL_FALSE;
 	return(0);
 }
 
 
 
 /*
  * Function name:	twa_ioctl
  * Description:		Called when an ioctl is posted to the controller.
  *			Handles any OS Layer specific cmds, passes the rest
  *			on to the Common Layer.
  *
  * Input:		dev	-- control device corresponding to the ctlr
  *			cmd	-- ioctl cmd
  *			buf	-- ptr to buffer in kernel memory, which is
  *				   a copy of the input buffer in user-space
  *			flags	-- mode of corresponding open
  *			proc	-- current process
  * Output:		buf	-- ptr to buffer in kernel memory, which will
  *				   be copied to the output buffer in user-space
  * Return value:	0	-- success
  *			non-zero-- failure
  */
 static TW_INT32
 twa_ioctl(struct cdev *dev, u_long cmd, caddr_t buf, TW_INT32 flags, struct thread *proc)
 {
 	struct twa_softc	*sc = (struct twa_softc *)(dev->si_drv1);
 	TW_INT32		error;
 
 	tw_osli_dbg_dprintf(5, sc, "entered");
 
 	switch (cmd) {
 	case TW_OSL_IOCTL_FIRMWARE_PASS_THROUGH:
 		tw_osli_dbg_dprintf(6, sc, "ioctl: fw_passthru");
 		error = tw_osli_fw_passthru(sc, (TW_INT8 *)buf);
 		break;
 
 	case TW_OSL_IOCTL_SCAN_BUS:
 		/* Request CAM for a bus scan. */
 		tw_osli_dbg_dprintf(6, sc, "ioctl: scan bus");
 		error = tw_osli_request_bus_scan(sc);
 		break;
 
 	default:
 		tw_osli_dbg_dprintf(6, sc, "ioctl: 0x%lx", cmd);
 		error = tw_cl_ioctl(&sc->ctlr_handle, cmd, buf);
 		break;
 	}
 	return(error);
 }
 
 
 
 static TW_INT32	twa_probe(device_t dev);
 static TW_INT32	twa_attach(device_t dev);
 static TW_INT32	twa_detach(device_t dev);
 static TW_INT32	twa_shutdown(device_t dev);
 static TW_VOID	twa_busdma_lock(TW_VOID *lock_arg, bus_dma_lock_op_t op);
 static TW_VOID	twa_pci_intr(TW_VOID *arg);
 static TW_VOID	twa_watchdog(TW_VOID *arg);
 int twa_setup_intr(struct twa_softc *sc);
 int twa_teardown_intr(struct twa_softc *sc);
 
 static TW_INT32	tw_osli_alloc_mem(struct twa_softc *sc);
 static TW_VOID	tw_osli_free_resources(struct twa_softc *sc);
 
 static TW_VOID	twa_map_load_data_callback(TW_VOID *arg,
 	bus_dma_segment_t *segs, TW_INT32 nsegments, TW_INT32 error);
 static TW_VOID	twa_map_load_callback(TW_VOID *arg,
 	bus_dma_segment_t *segs, TW_INT32 nsegments, TW_INT32 error);
 
 
 static device_method_t	twa_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		twa_probe),
 	DEVMETHOD(device_attach,	twa_attach),
 	DEVMETHOD(device_detach,	twa_detach),
 	DEVMETHOD(device_shutdown,	twa_shutdown),
 
 	DEVMETHOD_END
 };
 
 static driver_t	twa_pci_driver = {
 	"twa",
 	twa_methods,
 	sizeof(struct twa_softc)
 };
 
 DRIVER_MODULE(twa, pci, twa_pci_driver, twa_devclass, 0, 0);
 MODULE_DEPEND(twa, cam, 1, 1, 1);
 MODULE_DEPEND(twa, pci, 1, 1, 1);
 
 
 /*
  * Function name:	twa_probe
  * Description:		Called at driver load time.  Claims 9000 ctlrs.
  *
  * Input:		dev	-- bus device corresponding to the ctlr
  * Output:		None
  * Return value:	<= 0	-- success
  *			> 0	-- failure
  */
 static TW_INT32
 twa_probe(device_t dev)
 {
 	static TW_UINT8	first_ctlr = 1;
 
 	tw_osli_dbg_printf(3, "entered");
 
 	if (tw_cl_ctlr_supported(pci_get_vendor(dev), pci_get_device(dev))) {
 		device_set_desc(dev, TW_OSLI_DEVICE_NAME);
 		/* Print the driver version only once. */
 		if (first_ctlr) {
 			printf("3ware device driver for 9000 series storage "
 				"controllers, version: %s\n",
 				TW_OSL_DRIVER_VERSION_STRING);
 			first_ctlr = 0;
 		}
 		return(0);
 	}
 	return(ENXIO);
 }
 
 int twa_setup_intr(struct twa_softc *sc)
 {
 	int error = 0;
 
 	if (!(sc->intr_handle) && (sc->irq_res)) {
 		error = bus_setup_intr(sc->bus_dev, sc->irq_res,
 					INTR_TYPE_CAM | INTR_MPSAFE,
 					NULL, twa_pci_intr,
 					sc, &sc->intr_handle);
 	}
 	return( error );
 }
 
 
 int twa_teardown_intr(struct twa_softc *sc)
 {
 	int error = 0;
 
 	if ((sc->intr_handle) && (sc->irq_res)) {
 		error = bus_teardown_intr(sc->bus_dev,
 						sc->irq_res, sc->intr_handle);
 		sc->intr_handle = NULL;
 	}
 	return( error );
 }
 
 
 
 /*
  * Function name:	twa_attach
  * Description:		Allocates pci resources; updates sc; adds a node to the
  *			sysctl tree to expose the driver version; makes calls
  *			(to the Common Layer) to initialize ctlr, and to
  *			attach to CAM.
  *
  * Input:		dev	-- bus device corresponding to the ctlr
  * Output:		None
  * Return value:	0	-- success
  *			non-zero-- failure
  */
 static TW_INT32
 twa_attach(device_t dev)
 {
 	struct twa_softc	*sc = device_get_softc(dev);
 	TW_INT32		bar_num;
 	TW_INT32		bar0_offset;
 	TW_INT32		bar_size;
 	TW_INT32		error;
 
 	tw_osli_dbg_dprintf(3, sc, "entered");
 
 	sc->ctlr_handle.osl_ctlr_ctxt = sc;
 
 	/* Initialize the softc structure. */
 	sc->bus_dev = dev;
 	sc->device_id = pci_get_device(dev);
 
 	/* Initialize the mutexes right here. */
 	sc->io_lock = &(sc->io_lock_handle);
 	mtx_init(sc->io_lock, "tw_osl_io_lock", NULL, MTX_SPIN);
 	sc->q_lock = &(sc->q_lock_handle);
 	mtx_init(sc->q_lock, "tw_osl_q_lock", NULL, MTX_SPIN);
 	sc->sim_lock = &(sc->sim_lock_handle);
 	mtx_init(sc->sim_lock, "tw_osl_sim_lock", NULL, MTX_DEF | MTX_RECURSE);
 
 	sysctl_ctx_init(&sc->sysctl_ctxt);
 	sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctxt,
 		SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
 		device_get_nameunit(dev), CTLFLAG_RD, 0, "");
 	if (sc->sysctl_tree == NULL) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x2000,
 			"Cannot add sysctl tree node",
 			ENXIO);
 		return(ENXIO);
 	}
 	SYSCTL_ADD_STRING(&sc->sysctl_ctxt, SYSCTL_CHILDREN(sc->sysctl_tree),
 		OID_AUTO, "driver_version", CTLFLAG_RD,
 		TW_OSL_DRIVER_VERSION_STRING, 0, "TWA driver version");
 
 	/* Force the busmaster enable bit on, in case the BIOS forgot. */
 	pci_enable_busmaster(dev);
 
 	/* Allocate the PCI register window. */
 	if ((error = tw_cl_get_pci_bar_info(sc->device_id, TW_CL_BAR_TYPE_MEM,
 		&bar_num, &bar0_offset, &bar_size))) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x201F,
 			"Can't get PCI BAR info",
 			error);
 		tw_osli_free_resources(sc);
 		return(error);
 	}
 	sc->reg_res_id = PCIR_BARS + bar0_offset;
 	if ((sc->reg_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 				&(sc->reg_res_id), RF_ACTIVE))
 				== NULL) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x2002,
 			"Can't allocate register window",
 			ENXIO);
 		tw_osli_free_resources(sc);
 		return(ENXIO);
 	}
 	sc->bus_tag = rman_get_bustag(sc->reg_res);
 	sc->bus_handle = rman_get_bushandle(sc->reg_res);
 
 	/* Allocate and register our interrupt. */
 	sc->irq_res_id = 0;
 	if ((sc->irq_res = bus_alloc_resource_any(sc->bus_dev, SYS_RES_IRQ,
 				&(sc->irq_res_id),
 				RF_SHAREABLE | RF_ACTIVE)) == NULL) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x2003,
 			"Can't allocate interrupt",
 			ENXIO);
 		tw_osli_free_resources(sc);
 		return(ENXIO);
 	}
 	if ((error = twa_setup_intr(sc))) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x2004,
 			"Can't set up interrupt",
 			error);
 		tw_osli_free_resources(sc);
 		return(error);
 	}
 
 	if ((error = tw_osli_alloc_mem(sc))) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x2005,
 			"Memory allocation failure",
 			error);
 		tw_osli_free_resources(sc);
 		return(error);
 	}
 
 	/* Initialize the Common Layer for this controller. */
 	if ((error = tw_cl_init_ctlr(&sc->ctlr_handle, sc->flags, sc->device_id,
 			TW_OSLI_MAX_NUM_REQUESTS, TW_OSLI_MAX_NUM_AENS,
 			sc->non_dma_mem, sc->dma_mem,
 			sc->dma_mem_phys
 			))) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x2006,
 			"Failed to initialize Common Layer/controller",
 			error);
 		tw_osli_free_resources(sc);
 		return(error);
 	}
 
 	/* Create the control device. */
 	sc->ctrl_dev = make_dev(&twa_cdevsw, device_get_unit(sc->bus_dev),
 			UID_ROOT, GID_OPERATOR, S_IRUSR | S_IWUSR,
 			"twa%d", device_get_unit(sc->bus_dev));
 	sc->ctrl_dev->si_drv1 = sc;
 
 	if ((error = tw_osli_cam_attach(sc))) {
 		tw_osli_free_resources(sc);
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x2007,
 			"Failed to initialize CAM",
 			error);
 		return(error);
 	}
 
 	sc->watchdog_index = 0;
 	callout_init(&(sc->watchdog_callout[0]), 1);
 	callout_init(&(sc->watchdog_callout[1]), 1);
 	callout_reset(&(sc->watchdog_callout[0]), 5*hz, twa_watchdog, &sc->ctlr_handle);
 
 	return(0);
 }
 
 
 static TW_VOID
 twa_watchdog(TW_VOID *arg)
 {
 	struct tw_cl_ctlr_handle *ctlr_handle =
 		(struct tw_cl_ctlr_handle *)arg;
 	struct twa_softc		*sc = ctlr_handle->osl_ctlr_ctxt;
 	int				i;
 	int				i_need_a_reset = 0;
 	int				driver_is_active = 0;
 	int				my_watchdog_was_pending = 1234;
 	TW_UINT64			current_time;
 	struct tw_osli_req_context	*my_req;
 
 
 //==============================================================================
 	current_time = (TW_UINT64) (tw_osl_get_local_time());
 
 	for (i = 0; i < TW_OSLI_MAX_NUM_REQUESTS; i++) {
 		my_req = &(sc->req_ctx_buf[i]);
 
 		if ((my_req->state == TW_OSLI_REQ_STATE_BUSY) &&
 			(my_req->deadline) &&
 			(my_req->deadline < current_time)) {
 			tw_cl_set_reset_needed(ctlr_handle);
 #ifdef    TW_OSL_DEBUG
 			device_printf((sc)->bus_dev, "Request %d timed out! d = %llu, c = %llu\n", i, my_req->deadline, current_time);
 #else  /* TW_OSL_DEBUG */
 			device_printf((sc)->bus_dev, "Request %d timed out!\n", i);
 #endif /* TW_OSL_DEBUG */
 			break;
 		}
 	}
 //==============================================================================
 
 	i_need_a_reset = tw_cl_is_reset_needed(ctlr_handle);
 
 	i = (int) ((sc->watchdog_index++) & 1);
 
 	driver_is_active = tw_cl_is_active(ctlr_handle);
 
 	if (i_need_a_reset) {
 #ifdef    TW_OSL_DEBUG
 		device_printf((sc)->bus_dev, "Watchdog rescheduled in 70 seconds\n");
 #endif /* TW_OSL_DEBUG */
 		my_watchdog_was_pending =
-			callout_reset(&(sc->watchdog_callout[i]), 70*hz, twa_watchdog, &sc->ctlr_handle);
+			callout_reset(&(sc->watchdog_callout[i]), 70*hz, twa_watchdog, &sc->ctlr_handle).bit.cancelled;
 		tw_cl_reset_ctlr(ctlr_handle);
 #ifdef    TW_OSL_DEBUG
 		device_printf((sc)->bus_dev, "Watchdog reset completed!\n");
 #endif /* TW_OSL_DEBUG */
 	} else if (driver_is_active) {
 		my_watchdog_was_pending =
-			callout_reset(&(sc->watchdog_callout[i]),  5*hz, twa_watchdog, &sc->ctlr_handle);
+			callout_reset(&(sc->watchdog_callout[i]),  5*hz, twa_watchdog, &sc->ctlr_handle).bit.cancelled;
 	}
 #ifdef    TW_OSL_DEBUG
 	if (i_need_a_reset || my_watchdog_was_pending)
 		device_printf((sc)->bus_dev, "i_need_a_reset = %d, "
 		"driver_is_active = %d, my_watchdog_was_pending = %d\n",
 		i_need_a_reset, driver_is_active, my_watchdog_was_pending);
 #endif /* TW_OSL_DEBUG */
 }
 
 
 /*
  * Function name:	tw_osli_alloc_mem
  * Description:		Allocates memory needed both by CL and OSL.
  *
  * Input:		sc	-- OSL internal controller context
  * Output:		None
  * Return value:	0	-- success
  *			non-zero-- failure
  */
 static TW_INT32
 tw_osli_alloc_mem(struct twa_softc *sc)
 {
 	struct tw_osli_req_context	*req;
 	TW_UINT32			max_sg_elements;
 	TW_UINT32			non_dma_mem_size;
 	TW_UINT32			dma_mem_size;
 	TW_INT32			error;
 	TW_INT32			i;
 
 	tw_osli_dbg_dprintf(3, sc, "entered");
 
 	sc->flags |= (sizeof(bus_addr_t) == 8) ? TW_CL_64BIT_ADDRESSES : 0;
 	sc->flags |= (sizeof(bus_size_t) == 8) ? TW_CL_64BIT_SG_LENGTH : 0;
 
 	max_sg_elements = (sizeof(bus_addr_t) == 8) ?
 		TW_CL_MAX_64BIT_SG_ELEMENTS : TW_CL_MAX_32BIT_SG_ELEMENTS;
 
 	if ((error = tw_cl_get_mem_requirements(&sc->ctlr_handle, sc->flags,
 			sc->device_id, TW_OSLI_MAX_NUM_REQUESTS,  TW_OSLI_MAX_NUM_AENS,
 			&(sc->alignment), &(sc->sg_size_factor),
 			&non_dma_mem_size, &dma_mem_size
 			))) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x2008,
 			"Can't get Common Layer's memory requirements",
 			error);
 		return(error);
 	}
 
 	if ((sc->non_dma_mem = malloc(non_dma_mem_size, TW_OSLI_MALLOC_CLASS,
 				M_WAITOK)) == NULL) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x2009,
 			"Can't allocate non-dma memory",
 			ENOMEM);
 		return(ENOMEM);
 	}
 
 	/* Create the parent dma tag. */
 	if (bus_dma_tag_create(bus_get_dma_tag(sc->bus_dev), /* parent */
 				sc->alignment,		/* alignment */
 				0,			/* boundary */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
 				BUS_SPACE_MAXADDR, 	/* highaddr */
 				NULL, NULL, 		/* filter, filterarg */
 				TW_CL_MAX_IO_SIZE,	/* maxsize */
 				max_sg_elements,	/* nsegments */
 				TW_CL_MAX_IO_SIZE,	/* maxsegsize */
 				0,			/* flags */
 				NULL,			/* lockfunc */
 				NULL,			/* lockfuncarg */
 				&sc->parent_tag		/* tag */)) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x200A,
 			"Can't allocate parent DMA tag",
 			ENOMEM);
 		return(ENOMEM);
 	}
 
 	/* Create a dma tag for Common Layer's DMA'able memory (dma_mem). */
 	if (bus_dma_tag_create(sc->parent_tag,		/* parent */
 				sc->alignment,		/* alignment */
 				0,			/* boundary */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
 				BUS_SPACE_MAXADDR, 	/* highaddr */
 				NULL, NULL, 		/* filter, filterarg */
 				dma_mem_size,		/* maxsize */
 				1,			/* nsegments */
 				BUS_SPACE_MAXSIZE,	/* maxsegsize */
 				0,			/* flags */
 				NULL,			/* lockfunc */
 				NULL,			/* lockfuncarg */
 				&sc->cmd_tag		/* tag */)) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x200B,
 			"Can't allocate DMA tag for Common Layer's "
 			"DMA'able memory",
 			ENOMEM);
 		return(ENOMEM);
 	}
 
 	if (bus_dmamem_alloc(sc->cmd_tag, &sc->dma_mem,
 		BUS_DMA_NOWAIT, &sc->cmd_map)) {
 		/* Try a second time. */
 		if (bus_dmamem_alloc(sc->cmd_tag, &sc->dma_mem,
 			BUS_DMA_NOWAIT, &sc->cmd_map)) {
 			tw_osli_printf(sc, "error = %d",
 				TW_CL_SEVERITY_ERROR_STRING,
 				TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 				0x200C,
 				"Can't allocate DMA'able memory for the"
 				"Common Layer",
 				ENOMEM);
 			return(ENOMEM);
 		}
 	}
 
 	bus_dmamap_load(sc->cmd_tag, sc->cmd_map, sc->dma_mem,
 		dma_mem_size, twa_map_load_callback,
 		&sc->dma_mem_phys, 0);
 
 	/*
 	 * Create a dma tag for data buffers; size will be the maximum
 	 * possible I/O size (128kB).
 	 */
 	if (bus_dma_tag_create(sc->parent_tag,		/* parent */
 				sc->alignment,		/* alignment */
 				0,			/* boundary */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
 				BUS_SPACE_MAXADDR, 	/* highaddr */
 				NULL, NULL, 		/* filter, filterarg */
 				TW_CL_MAX_IO_SIZE,	/* maxsize */
 				max_sg_elements,	/* nsegments */
 				TW_CL_MAX_IO_SIZE,	/* maxsegsize */
 				BUS_DMA_ALLOCNOW,	/* flags */
 				twa_busdma_lock,	/* lockfunc */
 				sc->io_lock,		/* lockfuncarg */
 				&sc->dma_tag		/* tag */)) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x200F,
 			"Can't allocate DMA tag for data buffers",
 			ENOMEM);
 		return(ENOMEM);
 	}
 
 	/*
 	 * Create a dma tag for ioctl data buffers; size will be the maximum
 	 * possible I/O size (128kB).
 	 */
 	if (bus_dma_tag_create(sc->parent_tag,		/* parent */
 				sc->alignment,		/* alignment */
 				0,			/* boundary */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
 				BUS_SPACE_MAXADDR, 	/* highaddr */
 				NULL, NULL, 		/* filter, filterarg */
 				TW_CL_MAX_IO_SIZE,	/* maxsize */
 				max_sg_elements,	/* nsegments */
 				TW_CL_MAX_IO_SIZE,	/* maxsegsize */
 				BUS_DMA_ALLOCNOW,	/* flags */
 				twa_busdma_lock,	/* lockfunc */
 				sc->io_lock,		/* lockfuncarg */
 				&sc->ioctl_tag		/* tag */)) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x2010,
 			"Can't allocate DMA tag for ioctl data buffers",
 			ENOMEM);
 		return(ENOMEM);
 	}
 
 	/* Create just one map for all ioctl request data buffers. */
 	if (bus_dmamap_create(sc->ioctl_tag, 0, &sc->ioctl_map)) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x2011,
 			"Can't create ioctl map",
 			ENOMEM);
 		return(ENOMEM);
 	}
 
 
 	/* Initialize request queues. */
 	tw_osli_req_q_init(sc, TW_OSLI_FREE_Q);
 	tw_osli_req_q_init(sc, TW_OSLI_BUSY_Q);
 
 	if ((sc->req_ctx_buf = (struct tw_osli_req_context *)
 			malloc((sizeof(struct tw_osli_req_context) *
 				TW_OSLI_MAX_NUM_REQUESTS),
 				TW_OSLI_MALLOC_CLASS, M_WAITOK)) == NULL) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x2012,
 			"Failed to allocate request packets",
 			ENOMEM);
 		return(ENOMEM);
 	}
 	bzero(sc->req_ctx_buf,
 		sizeof(struct tw_osli_req_context) * TW_OSLI_MAX_NUM_REQUESTS);
 
 	for (i = 0; i < TW_OSLI_MAX_NUM_REQUESTS; i++) {
 		req = &(sc->req_ctx_buf[i]);
 		req->ctlr = sc;
 		if (bus_dmamap_create(sc->dma_tag, 0, &req->dma_map)) {
 			tw_osli_printf(sc, "request # = %d, error = %d",
 				TW_CL_SEVERITY_ERROR_STRING,
 				TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 				0x2013,
 				"Can't create dma map",
 				i, ENOMEM);
 			return(ENOMEM);
 		}
 
 		/* Initialize the ioctl wakeup/ timeout mutex */
 		req->ioctl_wake_timeout_lock = &(req->ioctl_wake_timeout_lock_handle);
 		mtx_init(req->ioctl_wake_timeout_lock, "tw_ioctl_wake_timeout_lock", NULL, MTX_DEF);
 
 		/* Insert request into the free queue. */
 		tw_osli_req_q_insert_tail(req, TW_OSLI_FREE_Q);
 	}
 
 	return(0);
 }
 
 
 
 /*
  * Function name:	tw_osli_free_resources
  * Description:		Performs clean-up at the time of going down.
  *
  * Input:		sc	-- ptr to OSL internal ctlr context
  * Output:		None
  * Return value:	None
  */
 static TW_VOID
 tw_osli_free_resources(struct twa_softc *sc)
 {
 	struct tw_osli_req_context	*req;
 	TW_INT32			error = 0;
 
 	tw_osli_dbg_dprintf(3, sc, "entered");
 
 	/* Detach from CAM */
 	tw_osli_cam_detach(sc);
 
 	if (sc->req_ctx_buf)
 		while ((req = tw_osli_req_q_remove_head(sc, TW_OSLI_FREE_Q)) !=
 			NULL) {
 			mtx_destroy(req->ioctl_wake_timeout_lock);
 
 			if ((error = bus_dmamap_destroy(sc->dma_tag,
 					req->dma_map)))
 				tw_osli_dbg_dprintf(1, sc,
 					"dmamap_destroy(dma) returned %d",
 					error);
 		}
 
 	if ((sc->ioctl_tag) && (sc->ioctl_map))
 		if ((error = bus_dmamap_destroy(sc->ioctl_tag, sc->ioctl_map)))
 			tw_osli_dbg_dprintf(1, sc,
 				"dmamap_destroy(ioctl) returned %d", error);
 
 	/* Free all memory allocated so far. */
 	if (sc->req_ctx_buf)
 		free(sc->req_ctx_buf, TW_OSLI_MALLOC_CLASS);
 
 	if (sc->non_dma_mem)
 		free(sc->non_dma_mem, TW_OSLI_MALLOC_CLASS);
 
 	if (sc->dma_mem) {
 		bus_dmamap_unload(sc->cmd_tag, sc->cmd_map);
 		bus_dmamem_free(sc->cmd_tag, sc->dma_mem,
 			sc->cmd_map);
 	}
 	if (sc->cmd_tag)
 		if ((error = bus_dma_tag_destroy(sc->cmd_tag)))
 			tw_osli_dbg_dprintf(1, sc,
 				"dma_tag_destroy(cmd) returned %d", error);
 
 	if (sc->dma_tag)
 		if ((error = bus_dma_tag_destroy(sc->dma_tag)))
 			tw_osli_dbg_dprintf(1, sc,
 				"dma_tag_destroy(dma) returned %d", error);
 
 	if (sc->ioctl_tag)
 		if ((error = bus_dma_tag_destroy(sc->ioctl_tag)))
 			tw_osli_dbg_dprintf(1, sc,
 				"dma_tag_destroy(ioctl) returned %d", error);
 
 	if (sc->parent_tag)
 		if ((error = bus_dma_tag_destroy(sc->parent_tag)))
 			tw_osli_dbg_dprintf(1, sc,
 				"dma_tag_destroy(parent) returned %d", error);
 
 
 	/* Disconnect the interrupt handler. */
 	if ((error = twa_teardown_intr(sc)))
 			tw_osli_dbg_dprintf(1, sc,
 				"teardown_intr returned %d", error);
 
 	if (sc->irq_res != NULL)
 		if ((error = bus_release_resource(sc->bus_dev,
 				SYS_RES_IRQ, sc->irq_res_id, sc->irq_res)))
 			tw_osli_dbg_dprintf(1, sc,
 				"release_resource(irq) returned %d", error);
 
 
 	/* Release the register window mapping. */
 	if (sc->reg_res != NULL)
 		if ((error = bus_release_resource(sc->bus_dev,
 				SYS_RES_MEMORY, sc->reg_res_id, sc->reg_res)))
 			tw_osli_dbg_dprintf(1, sc,
 				"release_resource(io) returned %d", error);
 
 
 	/* Destroy the control device. */
 	if (sc->ctrl_dev != (struct cdev *)NULL)
 		destroy_dev(sc->ctrl_dev);
 
 	if ((error = sysctl_ctx_free(&sc->sysctl_ctxt)))
 		tw_osli_dbg_dprintf(1, sc,
 			"sysctl_ctx_free returned %d", error);
 
 }
 
 
 
 /*
  * Function name:	twa_detach
  * Description:		Called when the controller is being detached from
  *			the pci bus.
  *
  * Input:		dev	-- bus device corresponding to the ctlr
  * Output:		None
  * Return value:	0	-- success
  *			non-zero-- failure
  */
 static TW_INT32
 twa_detach(device_t dev)
 {
 	struct twa_softc	*sc = device_get_softc(dev);
 	TW_INT32		error;
 
 	tw_osli_dbg_dprintf(3, sc, "entered");
 
 	error = EBUSY;
 	if (sc->open) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x2014,
 			"Device open",
 			error);
 		goto out;
 	}
 
 	/* Shut the controller down. */
 	if ((error = twa_shutdown(dev)))
 		goto out;
 
 	/* Free all resources associated with this controller. */
 	tw_osli_free_resources(sc);
 	error = 0;
 
 out:
 	return(error);
 }
 
 
 
 /*
  * Function name:	twa_shutdown
  * Description:		Called at unload/shutdown time.  Lets the controller
  *			know that we are going down.
  *
  * Input:		dev	-- bus device corresponding to the ctlr
  * Output:		None
  * Return value:	0	-- success
  *			non-zero-- failure
  */
 static TW_INT32
 twa_shutdown(device_t dev)
 {
 	struct twa_softc	*sc = device_get_softc(dev);
 	TW_INT32		error = 0;
 
 	tw_osli_dbg_dprintf(3, sc, "entered");
 
 	/* Disconnect interrupts. */
 	error = twa_teardown_intr(sc);
 
 	/* Stop watchdog task. */
 	callout_drain(&(sc->watchdog_callout[0]));
 	callout_drain(&(sc->watchdog_callout[1]));
 
 	/* Disconnect from the controller. */
 	if ((error = tw_cl_shutdown_ctlr(&(sc->ctlr_handle), 0))) {
 		tw_osli_printf(sc, "error = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x2015,
 			"Failed to shutdown Common Layer/controller",
 			error);
 	}
 	return(error);
 }
 
 
 
 /*
  * Function name:	twa_busdma_lock
  * Description:		Function to provide synchronization during busdma_swi.
  *
  * Input:		lock_arg -- lock mutex sent as argument
  *			op -- operation (lock/unlock) expected of the function
  * Output:		None
  * Return value:	None
  */
 TW_VOID
 twa_busdma_lock(TW_VOID *lock_arg, bus_dma_lock_op_t op)
 {
 	struct mtx	*lock;
 
 	lock = (struct mtx *)lock_arg;
 	switch (op) {
 	case BUS_DMA_LOCK:
 		mtx_lock_spin(lock);
 		break;
 
 	case BUS_DMA_UNLOCK:
 		mtx_unlock_spin(lock);
 		break;
 
 	default:
 		panic("Unknown operation 0x%x for twa_busdma_lock!", op);
 	}
 }
 
 
 /*
  * Function name:	twa_pci_intr
  * Description:		Interrupt handler.  Wrapper for twa_interrupt.
  *
  * Input:		arg	-- ptr to OSL internal ctlr context
  * Output:		None
  * Return value:	None
  */
 static TW_VOID
 twa_pci_intr(TW_VOID *arg)
 {
 	struct twa_softc	*sc = (struct twa_softc *)arg;
 
 	tw_osli_dbg_dprintf(10, sc, "entered");
 	tw_cl_interrupt(&(sc->ctlr_handle));
 }
 
 
 /*
  * Function name:	tw_osli_fw_passthru
  * Description:		Builds a fw passthru cmd pkt, and submits it to CL.
  *
  * Input:		sc	-- ptr to OSL internal ctlr context
  *			buf	-- ptr to ioctl pkt understood by CL
  * Output:		None
  * Return value:	0	-- success
  *			non-zero-- failure
  */
 TW_INT32
 tw_osli_fw_passthru(struct twa_softc *sc, TW_INT8 *buf)
 {
 	struct tw_osli_req_context		*req;
 	struct tw_osli_ioctl_no_data_buf	*user_buf =
 		(struct tw_osli_ioctl_no_data_buf *)buf;
 	TW_TIME					end_time;
 	TW_UINT32				timeout = 60;
 	TW_UINT32				data_buf_size_adjusted;
 	struct tw_cl_req_packet			*req_pkt;
 	struct tw_cl_passthru_req_packet	*pt_req;
 	TW_INT32				error;
 
 	tw_osli_dbg_dprintf(5, sc, "ioctl: passthru");
 		
 	if ((req = tw_osli_get_request(sc)) == NULL)
 		return(EBUSY);
 
 	req->req_handle.osl_req_ctxt = req;
 	req->orig_req = buf;
 	req->flags |= TW_OSLI_REQ_FLAGS_PASSTHRU;
 
 	req_pkt = &(req->req_pkt);
 	req_pkt->status = 0;
 	req_pkt->tw_osl_callback = tw_osl_complete_passthru;
 	/* Let the Common Layer retry the request on cmd queue full. */
 	req_pkt->flags |= TW_CL_REQ_RETRY_ON_BUSY;
 
 	pt_req = &(req_pkt->gen_req_pkt.pt_req);
 	/*
 	 * Make sure that the data buffer sent to firmware is a 
 	 * 512 byte multiple in size.
 	 */
 	data_buf_size_adjusted =
 		(user_buf->driver_pkt.buffer_length +
 		(sc->sg_size_factor - 1)) & ~(sc->sg_size_factor - 1);
 	if ((req->length = data_buf_size_adjusted)) {
 		if ((req->data = malloc(data_buf_size_adjusted,
 			TW_OSLI_MALLOC_CLASS, M_WAITOK)) == NULL) {
 			error = ENOMEM;
 			tw_osli_printf(sc, "error = %d",
 				TW_CL_SEVERITY_ERROR_STRING,
 				TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 				0x2016,
 				"Could not alloc mem for "
 				"fw_passthru data_buf",
 				error);
 			goto fw_passthru_err;
 		}
 		/* Copy the payload. */
 		if ((error = copyin((TW_VOID *)(user_buf->pdata), 
 			req->data,
 			user_buf->driver_pkt.buffer_length)) != 0) {
 			tw_osli_printf(sc, "error = %d",
 				TW_CL_SEVERITY_ERROR_STRING,
 				TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 				0x2017,
 				"Could not copyin fw_passthru data_buf",
 				error);
 			goto fw_passthru_err;
 		}
 		pt_req->sgl_entries = 1; /* will be updated during mapping */
 		req->flags |= (TW_OSLI_REQ_FLAGS_DATA_IN |
 			TW_OSLI_REQ_FLAGS_DATA_OUT);
 	} else
 		pt_req->sgl_entries = 0; /* no payload */
 
 	pt_req->cmd_pkt = (TW_VOID *)(&(user_buf->cmd_pkt));
 	pt_req->cmd_pkt_length = sizeof(struct tw_cl_command_packet);
 
 	if ((error = tw_osli_map_request(req)))
 		goto fw_passthru_err;
 
 	end_time = tw_osl_get_local_time() + timeout;
 	while (req->state != TW_OSLI_REQ_STATE_COMPLETE) {
 		mtx_lock(req->ioctl_wake_timeout_lock);
 		req->flags |= TW_OSLI_REQ_FLAGS_SLEEPING;
 
 		error = mtx_sleep(req, req->ioctl_wake_timeout_lock, 0,
 			    "twa_passthru", timeout*hz);
 		mtx_unlock(req->ioctl_wake_timeout_lock);
 
 		if (!(req->flags & TW_OSLI_REQ_FLAGS_SLEEPING))
 			error = 0;
 		req->flags &= ~TW_OSLI_REQ_FLAGS_SLEEPING;
 
 		if (! error) {
 			if (((error = req->error_code)) ||
 				((error = (req->state !=
 				TW_OSLI_REQ_STATE_COMPLETE))) ||
 				((error = req_pkt->status)))
 				goto fw_passthru_err;
 			break;
 		}
 
 		if (req_pkt->status) {
 			error = req_pkt->status;
 			goto fw_passthru_err;
 		}
 
 		if (error == EWOULDBLOCK) {
 			/* Time out! */
 			if ((!(req->error_code))                       &&
 			    (req->state == TW_OSLI_REQ_STATE_COMPLETE) &&
 			    (!(req_pkt->status))			  ) {
 #ifdef    TW_OSL_DEBUG
 				tw_osli_printf(sc, "request = %p",
 					TW_CL_SEVERITY_ERROR_STRING,
 					TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 					0x7777,
 					"FALSE Passthru timeout!",
 					req);
 #endif /* TW_OSL_DEBUG */
 				error = 0; /* False error */
 				break;
 			}
 			if (!(tw_cl_is_reset_needed(&(req->ctlr->ctlr_handle)))) {
 #ifdef    TW_OSL_DEBUG
 				tw_osli_printf(sc, "request = %p",
 					TW_CL_SEVERITY_ERROR_STRING,
 					TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 					0x2018,
 					"Passthru request timed out!",
 					req);
 #else  /* TW_OSL_DEBUG */
 			device_printf((sc)->bus_dev, "Passthru request timed out!\n");
 #endif /* TW_OSL_DEBUG */
 				tw_cl_reset_ctlr(&(req->ctlr->ctlr_handle));
 			}
 
 			error = 0;
 			end_time = tw_osl_get_local_time() + timeout;
 			continue;
 			/*
 			 * Don't touch req after a reset.  It (and any
 			 * associated data) will be
 			 * unmapped by the callback.
 			 */
 		}
 		/* 
 		 * Either the request got completed, or we were woken up by a
 		 * signal.  Calculate the new timeout, in case it was the latter.
 		 */
 		timeout = (end_time - tw_osl_get_local_time());
 	} /* End of while loop */
 
 	/* If there was a payload, copy it back. */
 	if ((!error) && (req->length))
 		if ((error = copyout(req->data, user_buf->pdata,
 			user_buf->driver_pkt.buffer_length)))
 			tw_osli_printf(sc, "error = %d",
 				TW_CL_SEVERITY_ERROR_STRING,
 				TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 				0x2019,
 				"Could not copyout fw_passthru data_buf",
 				error);
 	
 fw_passthru_err:
 
 	if (req_pkt->status == TW_CL_ERR_REQ_BUS_RESET)
 		error = EBUSY;
 
 	user_buf->driver_pkt.os_status = error;
 	/* Free resources. */
 	if (req->data)
 		free(req->data, TW_OSLI_MALLOC_CLASS);
 	tw_osli_req_q_insert_tail(req, TW_OSLI_FREE_Q);
 	return(error);
 }
 
 
 
 /*
  * Function name:	tw_osl_complete_passthru
  * Description:		Called to complete passthru requests.
  *
  * Input:		req_handle	-- ptr to request handle
  * Output:		None
  * Return value:	None
  */
 TW_VOID
 tw_osl_complete_passthru(struct tw_cl_req_handle *req_handle)
 {
 	struct tw_osli_req_context	*req = req_handle->osl_req_ctxt;
 	struct tw_cl_req_packet		*req_pkt =
 		(struct tw_cl_req_packet *)(&req->req_pkt);
 	struct twa_softc		*sc = req->ctlr;
 
 	tw_osli_dbg_dprintf(5, sc, "entered");
 
 	if (req->state != TW_OSLI_REQ_STATE_BUSY) {
 		tw_osli_printf(sc, "request = %p, status = %d",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x201B,
 			"Unposted command completed!!",
 			req, req->state);
 	}
 
 	/*
 	 * Remove request from the busy queue.  Just mark it complete.
 	 * There's no need to move it into the complete queue as we are
 	 * going to be done with it right now.
 	 */
 	req->state = TW_OSLI_REQ_STATE_COMPLETE;
 	tw_osli_req_q_remove_item(req, TW_OSLI_BUSY_Q);
 
 	tw_osli_unmap_request(req);
 
 	/*
 	 * Don't do a wake up if there was an error even before the request
 	 * was sent down to the Common Layer, and we hadn't gotten an
 	 * EINPROGRESS.  The request originator will then be returned an
 	 * error, and he can do the clean-up.
 	 */
 	if ((req->error_code) && (!(req->flags & TW_OSLI_REQ_FLAGS_IN_PROGRESS)))
 		return;
 
 	if (req->flags & TW_OSLI_REQ_FLAGS_PASSTHRU) {
 		if (req->flags & TW_OSLI_REQ_FLAGS_SLEEPING) {
 			/* Wake up the sleeping command originator. */
 			tw_osli_dbg_dprintf(5, sc,
 				"Waking up originator of request %p", req);
 			req->flags &= ~TW_OSLI_REQ_FLAGS_SLEEPING;
 			wakeup_one(req);
 		} else {
 			/*
 			 * If the request completed even before mtx_sleep
 			 * was called, simply return.
 			 */
 			if (req->flags & TW_OSLI_REQ_FLAGS_MAPPED)
 				return;
 
 			if (req_pkt->status == TW_CL_ERR_REQ_BUS_RESET)
 				return;
 
 			tw_osli_printf(sc, "request = %p",
 				TW_CL_SEVERITY_ERROR_STRING,
 				TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 				0x201C,
 				"Passthru callback called, "
 				"and caller not sleeping",
 				req);
 		}
 	} else {
 		tw_osli_printf(sc, "request = %p",
 			TW_CL_SEVERITY_ERROR_STRING,
 			TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 			0x201D,
 			"Passthru callback called for non-passthru request",
 			req);
 	}
 }
 
 
 
 /*
  * Function name:	tw_osli_get_request
  * Description:		Gets a request pkt from the free queue.
  *
  * Input:		sc	-- ptr to OSL internal ctlr context
  * Output:		None
  * Return value:	ptr to request pkt	-- success
  *			NULL			-- failure
  */
 struct tw_osli_req_context *
 tw_osli_get_request(struct twa_softc *sc)
 {
 	struct tw_osli_req_context	*req;
 
 	tw_osli_dbg_dprintf(4, sc, "entered");
 
 	/* Get a free request packet. */
 	req = tw_osli_req_q_remove_head(sc, TW_OSLI_FREE_Q);
 
 	/* Initialize some fields to their defaults. */
 	if (req) {
 		req->req_handle.osl_req_ctxt = NULL;
 		req->req_handle.cl_req_ctxt = NULL;
 		req->req_handle.is_io = 0;
 		req->data = NULL;
 		req->length = 0;
 		req->deadline = 0;
 		req->real_data = NULL;
 		req->real_length = 0;
 		req->state = TW_OSLI_REQ_STATE_INIT;/* req being initialized */
 		req->flags = 0;
 		req->error_code = 0;
 		req->orig_req = NULL;
 
 		bzero(&(req->req_pkt), sizeof(struct tw_cl_req_packet));
 
 	}
 	return(req);
 }
 
 
 
 /*
  * Function name:	twa_map_load_data_callback
  * Description:		Callback of bus_dmamap_load for the buffer associated
  *			with data.  Updates the cmd pkt (size/sgl_entries
  *			fields, as applicable) to reflect the number of sg
  *			elements.
  *
  * Input:		arg	-- ptr to OSL internal request context
  *			segs	-- ptr to a list of segment descriptors
  *			nsegments--# of segments
  *			error	-- 0 if no errors encountered before callback,
  *				   non-zero if errors were encountered
  * Output:		None
  * Return value:	None
  */
 static TW_VOID
 twa_map_load_data_callback(TW_VOID *arg, bus_dma_segment_t *segs,
 	TW_INT32 nsegments, TW_INT32 error)
 {
 	struct tw_osli_req_context	*req =
 		(struct tw_osli_req_context *)arg;
 	struct twa_softc		*sc = req->ctlr;
 	struct tw_cl_req_packet		*req_pkt = &(req->req_pkt);
 
 	tw_osli_dbg_dprintf(10, sc, "entered");
 
 	if (error == EINVAL) {
 		req->error_code = error;
 		return;
 	}
 
 	/* Mark the request as currently being processed. */
 	req->state = TW_OSLI_REQ_STATE_BUSY;
 	/* Move the request into the busy queue. */
 	tw_osli_req_q_insert_tail(req, TW_OSLI_BUSY_Q);
 
 	req->flags |= TW_OSLI_REQ_FLAGS_MAPPED;
 
 	if (error == EFBIG) {
 		req->error_code = error;
 		goto out;
 	}
 
 	if (req->flags & TW_OSLI_REQ_FLAGS_PASSTHRU) {
 		struct tw_cl_passthru_req_packet	*pt_req;
 
 		if (req->flags & TW_OSLI_REQ_FLAGS_DATA_IN)
 			bus_dmamap_sync(sc->ioctl_tag, sc->ioctl_map,
 				BUS_DMASYNC_PREREAD);
 
 		if (req->flags & TW_OSLI_REQ_FLAGS_DATA_OUT) {
 			/* 
 			 * If we're using an alignment buffer, and we're
 			 * writing data, copy the real data out.
 			 */
 			if (req->flags & TW_OSLI_REQ_FLAGS_DATA_COPY_NEEDED)
 				bcopy(req->real_data, req->data, req->real_length);
 			bus_dmamap_sync(sc->ioctl_tag, sc->ioctl_map,
 				BUS_DMASYNC_PREWRITE);
 		}
 
 		pt_req = &(req_pkt->gen_req_pkt.pt_req);
 		pt_req->sg_list = (TW_UINT8 *)segs;
 		pt_req->sgl_entries += (nsegments - 1);
 		error = tw_cl_fw_passthru(&(sc->ctlr_handle), req_pkt,
 			&(req->req_handle));
 	} else {
 		struct tw_cl_scsi_req_packet	*scsi_req;
 
 		if (req->flags & TW_OSLI_REQ_FLAGS_DATA_IN)
 			bus_dmamap_sync(sc->dma_tag, req->dma_map,
 				BUS_DMASYNC_PREREAD);
 
 		if (req->flags & TW_OSLI_REQ_FLAGS_DATA_OUT) {
 			/* 
 			 * If we're using an alignment buffer, and we're
 			 * writing data, copy the real data out.
 			 */
 			if (req->flags & TW_OSLI_REQ_FLAGS_DATA_COPY_NEEDED)
 				bcopy(req->real_data, req->data, req->real_length);
 			bus_dmamap_sync(sc->dma_tag, req->dma_map,
 				BUS_DMASYNC_PREWRITE);
 		}
 
 		scsi_req = &(req_pkt->gen_req_pkt.scsi_req);
 		scsi_req->sg_list = (TW_UINT8 *)segs;
 		scsi_req->sgl_entries += (nsegments - 1);
 		error = tw_cl_start_io(&(sc->ctlr_handle), req_pkt,
 			&(req->req_handle));
 	}
 
 out:
 	if (error) {
 		req->error_code = error;
 		req_pkt->tw_osl_callback(&(req->req_handle));
 		/*
 		 * If the caller had been returned EINPROGRESS, and he has
 		 * registered a callback for handling completion, the callback
 		 * will never get called because we were unable to submit the
 		 * request.  So, free up the request right here.
 		 */
 		if (req->flags & TW_OSLI_REQ_FLAGS_IN_PROGRESS)
 			tw_osli_req_q_insert_tail(req, TW_OSLI_FREE_Q);
 	}
 }
 
 
 
 /*
  * Function name:	twa_map_load_callback
  * Description:		Callback of bus_dmamap_load for the buffer associated
  *			with a cmd pkt.
  *
  * Input:		arg	-- ptr to variable to hold phys addr
  *			segs	-- ptr to a list of segment descriptors
  *			nsegments--# of segments
  *			error	-- 0 if no errors encountered before callback,
  *				   non-zero if errors were encountered
  * Output:		None
  * Return value:	None
  */
 static TW_VOID
 twa_map_load_callback(TW_VOID *arg, bus_dma_segment_t *segs,
 	TW_INT32 nsegments, TW_INT32 error)
 {
 	*((bus_addr_t *)arg) = segs[0].ds_addr;
 }
 
 
 
 /*
  * Function name:	tw_osli_map_request
  * Description:		Maps a cmd pkt and data associated with it, into
  *			DMA'able memory.
  *
  * Input:		req	-- ptr to request pkt
  * Output:		None
  * Return value:	0	-- success
  *			non-zero-- failure
  */
 TW_INT32
 tw_osli_map_request(struct tw_osli_req_context *req)
 {
 	struct twa_softc	*sc = req->ctlr;
 	TW_INT32		error = 0;
 
 	tw_osli_dbg_dprintf(10, sc, "entered");
 
 	/* If the command involves data, map that too. */
 	if (req->data != NULL) {
 		/*
 		 * It's sufficient for the data pointer to be 4-byte aligned
 		 * to work with 9000.  However, if 4-byte aligned addresses
 		 * are passed to bus_dmamap_load, we can get back sg elements
 		 * that are not 512-byte multiples in size.  So, we will let
 		 * only those buffers that are 512-byte aligned to pass
 		 * through, and bounce the rest, so as to make sure that we
 		 * always get back sg elements that are 512-byte multiples
 		 * in size.
 		 */
 		if (((vm_offset_t)req->data % sc->sg_size_factor) ||
 			(req->length % sc->sg_size_factor)) {
 			req->flags |= TW_OSLI_REQ_FLAGS_DATA_COPY_NEEDED;
 			/* Save original data pointer and length. */
 			req->real_data = req->data;
 			req->real_length = req->length;
 			req->length = (req->length +
 				(sc->sg_size_factor - 1)) &
 				~(sc->sg_size_factor - 1);
 			req->data = malloc(req->length, TW_OSLI_MALLOC_CLASS,
 					M_NOWAIT);
 			if (req->data == NULL) {
 				tw_osli_printf(sc, "error = %d",
 					TW_CL_SEVERITY_ERROR_STRING,
 					TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 					0x201E,
 					"Failed to allocate memory "
 					"for bounce buffer",
 					ENOMEM);
 				/* Restore original data pointer and length. */
 				req->data = req->real_data;
 				req->length = req->real_length;
 				return(ENOMEM);
 			}
 		}
 	
 		/*
 		 * Map the data buffer into bus space and build the SG list.
 		 */
 		if (req->flags & TW_OSLI_REQ_FLAGS_PASSTHRU) {
 			/* Lock against multiple simultaneous ioctl calls. */
 			mtx_lock_spin(sc->io_lock);
 			error = bus_dmamap_load(sc->ioctl_tag, sc->ioctl_map,
 				req->data, req->length,
 				twa_map_load_data_callback, req,
 				BUS_DMA_WAITOK);
 			mtx_unlock_spin(sc->io_lock);
 		} else if (req->flags & TW_OSLI_REQ_FLAGS_CCB) {
 			error = bus_dmamap_load_ccb(sc->dma_tag, req->dma_map,
 				req->orig_req, twa_map_load_data_callback, req,
 				BUS_DMA_WAITOK);
 		} else {
 			/*
 			 * There's only one CAM I/O thread running at a time.
 			 * So, there's no need to hold the io_lock.
 			 */
 			error = bus_dmamap_load(sc->dma_tag, req->dma_map,
 				req->data, req->length,
 				twa_map_load_data_callback, req,
 				BUS_DMA_WAITOK);
 		}
 		
 		if (!error)
 			error = req->error_code;
 		else {
 			if (error == EINPROGRESS) {
 				/*
 				 * Specifying sc->io_lock as the lockfuncarg
 				 * in ...tag_create should protect the access
 				 * of ...FLAGS_MAPPED from the callback.
 				 */
 				mtx_lock_spin(sc->io_lock);
 				if (!(req->flags & TW_OSLI_REQ_FLAGS_MAPPED))
 					req->flags |= TW_OSLI_REQ_FLAGS_IN_PROGRESS;
 				tw_osli_disallow_new_requests(sc, &(req->req_handle));
 				mtx_unlock_spin(sc->io_lock);
 				error = 0;
 			} else {
 				tw_osli_printf(sc, "error = %d",
 					TW_CL_SEVERITY_ERROR_STRING,
 					TW_CL_MESSAGE_SOURCE_FREEBSD_DRIVER,
 					0x9999,
 					"Failed to map DMA memory "
 					"for I/O request",
 					error);
 				req->flags |= TW_OSLI_REQ_FLAGS_FAILED;
 				/* Free alignment buffer if it was used. */
 				if (req->flags &
 					TW_OSLI_REQ_FLAGS_DATA_COPY_NEEDED) {
 					free(req->data, TW_OSLI_MALLOC_CLASS);
 					/*
 					 * Restore original data pointer
 					 * and length.
 					 */
 					req->data = req->real_data;
 					req->length = req->real_length;
 				}
 			}
 		}
 
 	} else {
 		/* Mark the request as currently being processed. */
 		req->state = TW_OSLI_REQ_STATE_BUSY;
 		/* Move the request into the busy queue. */
 		tw_osli_req_q_insert_tail(req, TW_OSLI_BUSY_Q);
 		if (req->flags & TW_OSLI_REQ_FLAGS_PASSTHRU)
 			error = tw_cl_fw_passthru(&sc->ctlr_handle,
 					&(req->req_pkt), &(req->req_handle));
 		else
 			error = tw_cl_start_io(&sc->ctlr_handle,
 					&(req->req_pkt), &(req->req_handle));
 		if (error) {
 			req->error_code = error;
 			req->req_pkt.tw_osl_callback(&(req->req_handle));
 		}
 	}
 	return(error);
 }
 
 
 
 /*
  * Function name:	tw_osli_unmap_request
  * Description:		Undoes the mapping done by tw_osli_map_request.
  *
  * Input:		req	-- ptr to request pkt
  * Output:		None
  * Return value:	None
  */
 TW_VOID
 tw_osli_unmap_request(struct tw_osli_req_context *req)
 {
 	struct twa_softc	*sc = req->ctlr;
 
 	tw_osli_dbg_dprintf(10, sc, "entered");
 
 	/* If the command involved data, unmap that too. */
 	if (req->data != NULL) {
 		if (req->flags & TW_OSLI_REQ_FLAGS_PASSTHRU) {
 			/* Lock against multiple simultaneous ioctl calls. */
 			mtx_lock_spin(sc->io_lock);
 
 			if (req->flags & TW_OSLI_REQ_FLAGS_DATA_IN) {
 				bus_dmamap_sync(sc->ioctl_tag,
 					sc->ioctl_map, BUS_DMASYNC_POSTREAD);
 
 				/* 
 				 * If we are using a bounce buffer, and we are
 				 * reading data, copy the real data in.
 				 */
 				if (req->flags & TW_OSLI_REQ_FLAGS_DATA_COPY_NEEDED)
 					bcopy(req->data, req->real_data,
 						req->real_length);
 			}
 
 			if (req->flags & TW_OSLI_REQ_FLAGS_DATA_OUT)
 				bus_dmamap_sync(sc->ioctl_tag, sc->ioctl_map,
 					BUS_DMASYNC_POSTWRITE);
 
 			bus_dmamap_unload(sc->ioctl_tag, sc->ioctl_map);
 
 			mtx_unlock_spin(sc->io_lock);
 		} else {
 			if (req->flags & TW_OSLI_REQ_FLAGS_DATA_IN) {
 				bus_dmamap_sync(sc->dma_tag,
 					req->dma_map, BUS_DMASYNC_POSTREAD);
 
 				/* 
 				 * If we are using a bounce buffer, and we are
 				 * reading data, copy the real data in.
 				 */
 				if (req->flags & TW_OSLI_REQ_FLAGS_DATA_COPY_NEEDED)
 					bcopy(req->data, req->real_data,
 						req->real_length);
 			}
 			if (req->flags & TW_OSLI_REQ_FLAGS_DATA_OUT)
 				bus_dmamap_sync(sc->dma_tag, req->dma_map,
 					BUS_DMASYNC_POSTWRITE);
 
 			bus_dmamap_unload(sc->dma_tag, req->dma_map);
 		}
 	}
 
 	/* Free alignment buffer if it was used. */
 	if (req->flags & TW_OSLI_REQ_FLAGS_DATA_COPY_NEEDED) {
 		free(req->data, TW_OSLI_MALLOC_CLASS);
 		/* Restore original data pointer and length. */
 		req->data = req->real_data;
 		req->length = req->real_length;
 	}
 }
 
 
 
 #ifdef TW_OSL_DEBUG
 
 TW_VOID	twa_report_stats(TW_VOID);
 TW_VOID	twa_reset_stats(TW_VOID);
 TW_VOID	tw_osli_print_ctlr_stats(struct twa_softc *sc);
 TW_VOID twa_print_req_info(struct tw_osli_req_context *req);
 
 
 /*
  * Function name:	twa_report_stats
  * Description:		For being called from ddb.  Calls functions that print
  *			OSL and CL internal stats for the controller.
  *
  * Input:		None
  * Output:		None
  * Return value:	None
  */
 TW_VOID
 twa_report_stats(TW_VOID)
 {
 	struct twa_softc	*sc;
 	TW_INT32		i;
 
 	for (i = 0; (sc = devclass_get_softc(twa_devclass, i)) != NULL; i++) {
 		tw_osli_print_ctlr_stats(sc);
 		tw_cl_print_ctlr_stats(&sc->ctlr_handle);
 	}
 }
 
 
 
 /*
  * Function name:	tw_osli_print_ctlr_stats
  * Description:		For being called from ddb.  Prints OSL controller stats
  *
  * Input:		sc	-- ptr to OSL internal controller context
  * Output:		None
  * Return value:	None
  */
 TW_VOID
 tw_osli_print_ctlr_stats(struct twa_softc *sc)
 {
 	twa_printf(sc, "osl_ctlr_ctxt = %p\n", sc);
 	twa_printf(sc, "OSLq type  current  max\n");
 	twa_printf(sc, "free      %04d     %04d\n",
 		sc->q_stats[TW_OSLI_FREE_Q].cur_len,
 		sc->q_stats[TW_OSLI_FREE_Q].max_len);
 	twa_printf(sc, "busy      %04d     %04d\n",
 		sc->q_stats[TW_OSLI_BUSY_Q].cur_len,
 		sc->q_stats[TW_OSLI_BUSY_Q].max_len);
 }	
 
 
 
 /*
  * Function name:	twa_print_req_info
  * Description:		For being called from ddb.  Calls functions that print
  *			OSL and CL internal details for the request.
  *
  * Input:		req	-- ptr to OSL internal request context
  * Output:		None
  * Return value:	None
  */
 TW_VOID
 twa_print_req_info(struct tw_osli_req_context *req)
 {
 	struct twa_softc	*sc = req->ctlr;
 
 	twa_printf(sc, "OSL details for request:\n");
 	twa_printf(sc, "osl_req_ctxt = %p, cl_req_ctxt = %p\n"
 		"data = %p, length = 0x%x, real_data = %p, real_length = 0x%x\n"
 		"state = 0x%x, flags = 0x%x, error = 0x%x, orig_req = %p\n"
 		"next_req = %p, prev_req = %p, dma_map = %p\n",
 		req->req_handle.osl_req_ctxt, req->req_handle.cl_req_ctxt,
 		req->data, req->length, req->real_data, req->real_length,
 		req->state, req->flags, req->error_code, req->orig_req,
 		req->link.next, req->link.prev, req->dma_map);
 	tw_cl_print_req_info(&(req->req_handle));
 }
 
 
 
 /*
  * Function name:	twa_reset_stats
  * Description:		For being called from ddb.
  *			Resets some OSL controller stats.
  *
  * Input:		None
  * Output:		None
  * Return value:	None
  */
 TW_VOID
 twa_reset_stats(TW_VOID)
 {
 	struct twa_softc	*sc;
 	TW_INT32		i;
 
 	for (i = 0; (sc = devclass_get_softc(twa_devclass, i)) != NULL; i++) {
 		sc->q_stats[TW_OSLI_FREE_Q].max_len = 0;
 		sc->q_stats[TW_OSLI_BUSY_Q].max_len = 0;
 		tw_cl_reset_stats(&sc->ctlr_handle);
 	}
 }
 
 #endif /* TW_OSL_DEBUG */
Index: projects/hps_head/sys/kern/kern_exit.c
===================================================================
--- projects/hps_head/sys/kern/kern_exit.c	(revision 309217)
+++ projects/hps_head/sys/kern/kern_exit.c	(revision 309218)
@@ -1,1328 +1,1328 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_exit.c	8.7 (Berkeley) 2/12/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/capsicum.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/procdesc.h>
 #include <sys/pioctl.h>
 #include <sys/jail.h>
 #include <sys/tty.h>
 #include <sys/wait.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/sbuf.h>
 #include <sys/signalvar.h>
 #include <sys/sched.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
 #include <sys/syslog.h>
 #include <sys/ptrace.h>
 #include <sys/acct.h>		/* for acct_process() function prototype */
 #include <sys/filedesc.h>
 #include <sys/sdt.h>
 #include <sys/shm.h>
 #include <sys/sem.h>
 #include <sys/umtx.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/uma.h>
 #include <vm/vm_domain.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 dtrace_execexit_func_t	dtrace_fasttrap_exit;
 #endif
 
 SDT_PROVIDER_DECLARE(proc);
 SDT_PROBE_DEFINE1(proc, , , exit, "int");
 
 /* Hook for NFS teardown procedure. */
 void (*nlminfo_release_p)(struct proc *p);
 
 struct proc *
 proc_realparent(struct proc *child)
 {
 	struct proc *p, *parent;
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 	if ((child->p_treeflag & P_TREE_ORPHANED) == 0) {
 		if (child->p_oppid == 0 ||
 		    child->p_pptr->p_pid == child->p_oppid)
 			parent = child->p_pptr;
 		else
 			parent = initproc;
 		return (parent);
 	}
 	for (p = child; (p->p_treeflag & P_TREE_FIRST_ORPHAN) == 0;) {
 		/* Cannot use LIST_PREV(), since the list head is not known. */
 		p = __containerof(p->p_orphan.le_prev, struct proc,
 		    p_orphan.le_next);
 		KASSERT((p->p_treeflag & P_TREE_ORPHANED) != 0,
 		    ("missing P_ORPHAN %p", p));
 	}
 	parent = __containerof(p->p_orphan.le_prev, struct proc,
 	    p_orphans.lh_first);
 	return (parent);
 }
 
 void
 reaper_abandon_children(struct proc *p, bool exiting)
 {
 	struct proc *p1, *p2, *ptmp;
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 	KASSERT(p != initproc, ("reaper_abandon_children for initproc"));
 	if ((p->p_treeflag & P_TREE_REAPER) == 0)
 		return;
 	p1 = p->p_reaper;
 	LIST_FOREACH_SAFE(p2, &p->p_reaplist, p_reapsibling, ptmp) {
 		LIST_REMOVE(p2, p_reapsibling);
 		p2->p_reaper = p1;
 		p2->p_reapsubtree = p->p_reapsubtree;
 		LIST_INSERT_HEAD(&p1->p_reaplist, p2, p_reapsibling);
 		if (exiting && p2->p_pptr == p) {
 			PROC_LOCK(p2);
 			proc_reparent(p2, p1);
 			PROC_UNLOCK(p2);
 		}
 	}
 	KASSERT(LIST_EMPTY(&p->p_reaplist), ("p_reaplist not empty"));
 	p->p_treeflag &= ~P_TREE_REAPER;
 }
 
 static void
 clear_orphan(struct proc *p)
 {
 	struct proc *p1;
 
 	sx_assert(&proctree_lock, SA_XLOCKED);
 	if ((p->p_treeflag & P_TREE_ORPHANED) == 0)
 		return;
 	if ((p->p_treeflag & P_TREE_FIRST_ORPHAN) != 0) {
 		p1 = LIST_NEXT(p, p_orphan);
 		if (p1 != NULL)
 			p1->p_treeflag |= P_TREE_FIRST_ORPHAN;
 		p->p_treeflag &= ~P_TREE_FIRST_ORPHAN;
 	}
 	LIST_REMOVE(p, p_orphan);
 	p->p_treeflag &= ~P_TREE_ORPHANED;
 }
 
 /*
  * exit -- death of process.
  */
 void
 sys_sys_exit(struct thread *td, struct sys_exit_args *uap)
 {
 
 	exit1(td, uap->rval, 0);
 	/* NOTREACHED */
 }
 
 /*
  * Exit: deallocate address space and other resources, change proc state to
  * zombie, and unlink proc from allproc and parent's lists.  Save exit status
  * and rusage for wait().  Check for child processes and orphan them.
  */
 void
 exit1(struct thread *td, int rval, int signo)
 {
 	struct proc *p, *nq, *q, *t;
 	struct thread *tdt;
 	int drain;
 
 	mtx_assert(&Giant, MA_NOTOWNED);
 	KASSERT(rval == 0 || signo == 0, ("exit1 rv %d sig %d", rval, signo));
 
 	p = td->td_proc;
 	/*
 	 * XXX in case we're rebooting we just let init die in order to
 	 * work around an unsolved stack overflow seen very late during
 	 * shutdown on sparc64 when the gmirror worker process exists.
 	 */
 	if (p == initproc && rebooting == 0) {
 		printf("init died (signal %d, exit %d)\n", signo, rval);
 		panic("Going nowhere without my init!");
 	}
 
 	/*
 	 * Deref SU mp, since the thread does not return to userspace.
 	 */
 	if (softdep_ast_cleanup != NULL)
 		softdep_ast_cleanup();
 
 	/*
 	 * MUST abort all other threads before proceeding past here.
 	 */
 	PROC_LOCK(p);
 	/*
 	 * First check if some other thread or external request got
 	 * here before us.  If so, act appropriately: exit or suspend.
 	 * We must ensure that stop requests are handled before we set
 	 * P_WEXIT.
 	 */
 	thread_suspend_check(0);
 	while (p->p_flag & P_HADTHREADS) {
 		/*
 		 * Kill off the other threads. This requires
 		 * some co-operation from other parts of the kernel
 		 * so it may not be instantaneous.  With this state set
 		 * any thread entering the kernel from userspace will
 		 * thread_exit() in trap().  Any thread attempting to
 		 * sleep will return immediately with EINTR or EWOULDBLOCK
 		 * which will hopefully force them to back out to userland
 		 * freeing resources as they go.  Any thread attempting
 		 * to return to userland will thread_exit() from userret().
 		 * thread_exit() will unsuspend us when the last of the
 		 * other threads exits.
 		 * If there is already a thread singler after resumption,
 		 * calling thread_single will fail; in that case, we just
 		 * re-check all suspension request, the thread should
 		 * either be suspended there or exit.
 		 */
 		if (!thread_single(p, SINGLE_EXIT))
 			/*
 			 * All other activity in this process is now
 			 * stopped.  Threading support has been turned
 			 * off.
 			 */
 			break;
 		/*
 		 * Recheck for new stop or suspend requests which
 		 * might appear while process lock was dropped in
 		 * thread_single().
 		 */
 		thread_suspend_check(0);
 	}
 	KASSERT(p->p_numthreads == 1,
 	    ("exit1: proc %p exiting with %d threads", p, p->p_numthreads));
 	racct_sub(p, RACCT_NTHR, 1);
 
 	/* Let event handler change exit status */
 	p->p_xexit = rval;
 	p->p_xsig = signo;
 
 	/*
 	 * Wakeup anyone in procfs' PIOCWAIT.  They should have a hold
 	 * on our vmspace, so we should block below until they have
 	 * released their reference to us.  Note that if they have
 	 * requested S_EXIT stops we will block here until they ack
 	 * via PIOCCONT.
 	 */
 	_STOPEVENT(p, S_EXIT, 0);
 
 	/*
 	 * Ignore any pending request to stop due to a stop signal.
 	 * Once P_WEXIT is set, future requests will be ignored as
 	 * well.
 	 */
 	p->p_flag &= ~P_STOPPED_SIG;
 	KASSERT(!P_SHOULDSTOP(p), ("exiting process is stopped"));
 
 	/*
 	 * Note that we are exiting and do another wakeup of anyone in
 	 * PIOCWAIT in case they aren't listening for S_EXIT stops or
 	 * decided to wait again after we told them we are exiting.
 	 */
 	p->p_flag |= P_WEXIT;
 	wakeup(&p->p_stype);
 
 	/*
 	 * Wait for any processes that have a hold on our vmspace to
 	 * release their reference.
 	 */
 	while (p->p_lock > 0)
 		msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0);
 
 	PROC_UNLOCK(p);
 	/* Drain the limit callout while we don't have the proc locked */
 	callout_drain(&p->p_limco);
 
 #ifdef AUDIT
 	/*
 	 * The Sun BSM exit token contains two components: an exit status as
 	 * passed to exit(), and a return value to indicate what sort of exit
 	 * it was.  The exit status is WEXITSTATUS(rv), but it's not clear
 	 * what the return value is.
 	 */
 	AUDIT_ARG_EXIT(rval, 0);
 	AUDIT_SYSCALL_EXIT(0, td);
 #endif
 
 	/* Are we a task leader with peers? */
 	if (p->p_peers != NULL && p == p->p_leader) {
 		mtx_lock(&ppeers_lock);
 		q = p->p_peers;
 		while (q != NULL) {
 			PROC_LOCK(q);
 			kern_psignal(q, SIGKILL);
 			PROC_UNLOCK(q);
 			q = q->p_peers;
 		}
 		while (p->p_peers != NULL)
 			msleep(p, &ppeers_lock, PWAIT, "exit1", 0);
 		mtx_unlock(&ppeers_lock);
 	}
 
 	/*
 	 * Check if any loadable modules need anything done at process exit.
 	 * E.g. SYSV IPC stuff.
 	 * Event handler could change exit status.
 	 * XXX what if one of these generates an error?
 	 */
 	EVENTHANDLER_INVOKE(process_exit, p);
 
 	/*
 	 * If parent is waiting for us to exit or exec,
 	 * P_PPWAIT is set; we will wakeup the parent below.
 	 */
 	PROC_LOCK(p);
 	stopprofclock(p);
 	p->p_flag &= ~(P_TRACED | P_PPWAIT | P_PPTRACE);
 	p->p_ptevents = 0;
 
 	/*
 	 * Stop the real interval timer.  If the handler is currently
 	 * executing, prevent it from rearming itself and let it finish.
 	 */
 	if (timevalisset(&p->p_realtimer.it_value)) {
 		timevalclear(&p->p_realtimer.it_interval);
-		drain = callout_stop(&p->p_itcallout);
+		drain = callout_stop(&p->p_itcallout).bit.draining;
 	} else {
 		drain = 0;
 	}
 	PROC_UNLOCK(p);
 
-	if (drain & CALLOUT_RET_DRAINING)
+	if (drain)
 		callout_drain(&p->p_itcallout);
 
 	umtx_thread_exit(td);
 
 	/*
 	 * Reset any sigio structures pointing to us as a result of
 	 * F_SETOWN with our pid.
 	 */
 	funsetownlst(&p->p_sigiolst);
 
 	/*
 	 * If this process has an nlminfo data area (for lockd), release it
 	 */
 	if (nlminfo_release_p != NULL && p->p_nlminfo != NULL)
 		(*nlminfo_release_p)(p);
 
 	/*
 	 * Close open files and release open-file table.
 	 * This may block!
 	 */
 	fdescfree(td);
 
 	/*
 	 * If this thread tickled GEOM, we need to wait for the giggling to
 	 * stop before we return to userland
 	 */
 	if (td->td_pflags & TDP_GEOM)
 		g_waitidle();
 
 	/*
 	 * Remove ourself from our leader's peer list and wake our leader.
 	 */
 	if (p->p_leader->p_peers != NULL) {
 		mtx_lock(&ppeers_lock);
 		if (p->p_leader->p_peers != NULL) {
 			q = p->p_leader;
 			while (q->p_peers != p)
 				q = q->p_peers;
 			q->p_peers = p->p_peers;
 			wakeup(p->p_leader);
 		}
 		mtx_unlock(&ppeers_lock);
 	}
 
 	vmspace_exit(td);
 	killjobc();
 	(void)acct_process(td);
 
 #ifdef KTRACE
 	ktrprocexit(td);
 #endif
 	/*
 	 * Release reference to text vnode
 	 */
 	if (p->p_textvp != NULL) {
 		vrele(p->p_textvp);
 		p->p_textvp = NULL;
 	}
 
 	/*
 	 * Release our limits structure.
 	 */
 	lim_free(p->p_limit);
 	p->p_limit = NULL;
 
 	tidhash_remove(td);
 
 	/*
 	 * Remove proc from allproc queue and pidhash chain.
 	 * Place onto zombproc.  Unlink from parent's child list.
 	 */
 	sx_xlock(&allproc_lock);
 	LIST_REMOVE(p, p_list);
 	LIST_INSERT_HEAD(&zombproc, p, p_list);
 	LIST_REMOVE(p, p_hash);
 	sx_xunlock(&allproc_lock);
 
 	/*
 	 * Call machine-dependent code to release any
 	 * machine-dependent resources other than the address space.
 	 * The address space is released by "vmspace_exitfree(p)" in
 	 * vm_waitproc().
 	 */
 	cpu_exit(td);
 
 	WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid);
 
 	/*
 	 * Reparent all children processes:
 	 * - traced ones to the original parent (or init if we are that parent)
 	 * - the rest to init
 	 */
 	sx_xlock(&proctree_lock);
 	q = LIST_FIRST(&p->p_children);
 	if (q != NULL)		/* only need this if any child is S_ZOMB */
 		wakeup(q->p_reaper);
 	for (; q != NULL; q = nq) {
 		nq = LIST_NEXT(q, p_sibling);
 		PROC_LOCK(q);
 		q->p_sigparent = SIGCHLD;
 
 		if (!(q->p_flag & P_TRACED)) {
 			proc_reparent(q, q->p_reaper);
 		} else {
 			/*
 			 * Traced processes are killed since their existence
 			 * means someone is screwing up.
 			 */
 			t = proc_realparent(q);
 			if (t == p) {
 				proc_reparent(q, q->p_reaper);
 			} else {
 				PROC_LOCK(t);
 				proc_reparent(q, t);
 				PROC_UNLOCK(t);
 			}
 			/*
 			 * Since q was found on our children list, the
 			 * proc_reparent() call moved q to the orphan
 			 * list due to present P_TRACED flag. Clear
 			 * orphan link for q now while q is locked.
 			 */
 			clear_orphan(q);
 			q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE);
 			q->p_flag2 &= ~P2_PTRACE_FSTP;
 			q->p_ptevents = 0;
 			FOREACH_THREAD_IN_PROC(q, tdt) {
 				tdt->td_dbgflags &= ~(TDB_SUSPEND | TDB_XSIG |
 				    TDB_FSTP);
 			}
 			kern_psignal(q, SIGKILL);
 		}
 		PROC_UNLOCK(q);
 	}
 
 	/*
 	 * Also get rid of our orphans.
 	 */
 	while ((q = LIST_FIRST(&p->p_orphans)) != NULL) {
 		PROC_LOCK(q);
 		CTR2(KTR_PTRACE, "exit: pid %d, clearing orphan %d", p->p_pid,
 		    q->p_pid);
 		clear_orphan(q);
 		PROC_UNLOCK(q);
 	}
 
 	/* Save exit status. */
 	PROC_LOCK(p);
 	p->p_xthread = td;
 
 	/* Tell the prison that we are gone. */
 	prison_proc_free(p->p_ucred->cr_prison);
 
 #ifdef KDTRACE_HOOKS
 	/*
 	 * Tell the DTrace fasttrap provider about the exit if it
 	 * has declared an interest.
 	 */
 	if (dtrace_fasttrap_exit)
 		dtrace_fasttrap_exit(p);
 #endif
 
 	/*
 	 * Notify interested parties of our demise.
 	 */
 	KNOTE_LOCKED(p->p_klist, NOTE_EXIT);
 
 #ifdef KDTRACE_HOOKS
 	int reason = CLD_EXITED;
 	if (WCOREDUMP(signo))
 		reason = CLD_DUMPED;
 	else if (WIFSIGNALED(signo))
 		reason = CLD_KILLED;
 	SDT_PROBE1(proc, , , exit, reason);
 #endif
 
 	/*
 	 * If this is a process with a descriptor, we may not need to deliver
 	 * a signal to the parent.  proctree_lock is held over
 	 * procdesc_exit() to serialize concurrent calls to close() and
 	 * exit().
 	 */
 	if (p->p_procdesc == NULL || procdesc_exit(p)) {
 		/*
 		 * Notify parent that we're gone.  If parent has the
 		 * PS_NOCLDWAIT flag set, or if the handler is set to SIG_IGN,
 		 * notify process 1 instead (and hope it will handle this
 		 * situation).
 		 */
 		PROC_LOCK(p->p_pptr);
 		mtx_lock(&p->p_pptr->p_sigacts->ps_mtx);
 		if (p->p_pptr->p_sigacts->ps_flag &
 		    (PS_NOCLDWAIT | PS_CLDSIGIGN)) {
 			struct proc *pp;
 
 			mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
 			pp = p->p_pptr;
 			PROC_UNLOCK(pp);
 			proc_reparent(p, p->p_reaper);
 			p->p_sigparent = SIGCHLD;
 			PROC_LOCK(p->p_pptr);
 
 			/*
 			 * Notify parent, so in case he was wait(2)ing or
 			 * executing waitpid(2) with our pid, he will
 			 * continue.
 			 */
 			wakeup(pp);
 		} else
 			mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
 
 		if (p->p_pptr == p->p_reaper || p->p_pptr == initproc)
 			childproc_exited(p);
 		else if (p->p_sigparent != 0) {
 			if (p->p_sigparent == SIGCHLD)
 				childproc_exited(p);
 			else	/* LINUX thread */
 				kern_psignal(p->p_pptr, p->p_sigparent);
 		}
 	} else
 		PROC_LOCK(p->p_pptr);
 	sx_xunlock(&proctree_lock);
 
 	/*
 	 * The state PRS_ZOMBIE prevents other proesses from sending
 	 * signal to the process, to avoid memory leak, we free memory
 	 * for signal queue at the time when the state is set.
 	 */
 	sigqueue_flush(&p->p_sigqueue);
 	sigqueue_flush(&td->td_sigqueue);
 
 	/*
 	 * We have to wait until after acquiring all locks before
 	 * changing p_state.  We need to avoid all possible context
 	 * switches (including ones from blocking on a mutex) while
 	 * marked as a zombie.  We also have to set the zombie state
 	 * before we release the parent process' proc lock to avoid
 	 * a lost wakeup.  So, we first call wakeup, then we grab the
 	 * sched lock, update the state, and release the parent process'
 	 * proc lock.
 	 */
 	wakeup(p->p_pptr);
 	cv_broadcast(&p->p_pwait);
 	sched_exit(p->p_pptr, td);
 	PROC_SLOCK(p);
 	p->p_state = PRS_ZOMBIE;
 	PROC_UNLOCK(p->p_pptr);
 
 	/*
 	 * Save our children's rusage information in our exit rusage.
 	 */
 	PROC_STATLOCK(p);
 	ruadd(&p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux);
 	PROC_STATUNLOCK(p);
 
 	/*
 	 * Make sure the scheduler takes this thread out of its tables etc.
 	 * This will also release this thread's reference to the ucred.
 	 * Other thread parts to release include pcb bits and such.
 	 */
 	thread_exit();
 }
 
 
 #ifndef _SYS_SYSPROTO_H_
 struct abort2_args {
 	char *why;
 	int nargs;
 	void **args;
 };
 #endif
 
 int
 sys_abort2(struct thread *td, struct abort2_args *uap)
 {
 	struct proc *p = td->td_proc;
 	struct sbuf *sb;
 	void *uargs[16];
 	int error, i, sig;
 
 	/*
 	 * Do it right now so we can log either proper call of abort2(), or
 	 * note, that invalid argument was passed. 512 is big enough to
 	 * handle 16 arguments' descriptions with additional comments.
 	 */
 	sb = sbuf_new(NULL, NULL, 512, SBUF_FIXEDLEN);
 	sbuf_clear(sb);
 	sbuf_printf(sb, "%s(pid %d uid %d) aborted: ",
 	    p->p_comm, p->p_pid, td->td_ucred->cr_uid);
 	/*
 	 * Since we can't return from abort2(), send SIGKILL in cases, where
 	 * abort2() was called improperly
 	 */
 	sig = SIGKILL;
 	/* Prevent from DoSes from user-space. */
 	if (uap->nargs < 0 || uap->nargs > 16)
 		goto out;
 	if (uap->nargs > 0) {
 		if (uap->args == NULL)
 			goto out;
 		error = copyin(uap->args, uargs, uap->nargs * sizeof(void *));
 		if (error != 0)
 			goto out;
 	}
 	/*
 	 * Limit size of 'reason' string to 128. Will fit even when
 	 * maximal number of arguments was chosen to be logged.
 	 */
 	if (uap->why != NULL) {
 		error = sbuf_copyin(sb, uap->why, 128);
 		if (error < 0)
 			goto out;
 	} else {
 		sbuf_printf(sb, "(null)");
 	}
 	if (uap->nargs > 0) {
 		sbuf_printf(sb, "(");
 		for (i = 0;i < uap->nargs; i++)
 			sbuf_printf(sb, "%s%p", i == 0 ? "" : ", ", uargs[i]);
 		sbuf_printf(sb, ")");
 	}
 	/*
 	 * Final stage: arguments were proper, string has been
 	 * successfully copied from userspace, and copying pointers
 	 * from user-space succeed.
 	 */
 	sig = SIGABRT;
 out:
 	if (sig == SIGKILL) {
 		sbuf_trim(sb);
 		sbuf_printf(sb, " (Reason text inaccessible)");
 	}
 	sbuf_cat(sb, "\n");
 	sbuf_finish(sb);
 	log(LOG_INFO, "%s", sbuf_data(sb));
 	sbuf_delete(sb);
 	exit1(td, 0, sig);
 	return (0);
 }
 
 
 #ifdef COMPAT_43
 /*
  * The dirty work is handled by kern_wait().
  */
 int
 owait(struct thread *td, struct owait_args *uap __unused)
 {
 	int error, status;
 
 	error = kern_wait(td, WAIT_ANY, &status, 0, NULL);
 	if (error == 0)
 		td->td_retval[1] = status;
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 /*
  * The dirty work is handled by kern_wait().
  */
 int
 sys_wait4(struct thread *td, struct wait4_args *uap)
 {
 	struct rusage ru, *rup;
 	int error, status;
 
 	if (uap->rusage != NULL)
 		rup = &ru;
 	else
 		rup = NULL;
 	error = kern_wait(td, uap->pid, &status, uap->options, rup);
 	if (uap->status != NULL && error == 0 && td->td_retval[0] != 0)
 		error = copyout(&status, uap->status, sizeof(status));
 	if (uap->rusage != NULL && error == 0 && td->td_retval[0] != 0)
 		error = copyout(&ru, uap->rusage, sizeof(struct rusage));
 	return (error);
 }
 
 int
 sys_wait6(struct thread *td, struct wait6_args *uap)
 {
 	struct __wrusage wru, *wrup;
 	siginfo_t si, *sip;
 	idtype_t idtype;
 	id_t id;
 	int error, status;
 
 	idtype = uap->idtype;
 	id = uap->id;
 
 	if (uap->wrusage != NULL)
 		wrup = &wru;
 	else
 		wrup = NULL;
 
 	if (uap->info != NULL) {
 		sip = &si;
 		bzero(sip, sizeof(*sip));
 	} else
 		sip = NULL;
 
 	/*
 	 *  We expect all callers of wait6() to know about WEXITED and
 	 *  WTRAPPED.
 	 */
 	error = kern_wait6(td, idtype, id, &status, uap->options, wrup, sip);
 
 	if (uap->status != NULL && error == 0 && td->td_retval[0] != 0)
 		error = copyout(&status, uap->status, sizeof(status));
 	if (uap->wrusage != NULL && error == 0 && td->td_retval[0] != 0)
 		error = copyout(&wru, uap->wrusage, sizeof(wru));
 	if (uap->info != NULL && error == 0)
 		error = copyout(&si, uap->info, sizeof(si));
 	return (error);
 }
 
 /*
  * Reap the remains of a zombie process and optionally return status and
  * rusage.  Asserts and will release both the proctree_lock and the process
  * lock as part of its work.
  */
 void
 proc_reap(struct thread *td, struct proc *p, int *status, int options)
 {
 	struct proc *q, *t;
 
 	sx_assert(&proctree_lock, SA_XLOCKED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_SLOCK_ASSERT(p, MA_OWNED);
 	KASSERT(p->p_state == PRS_ZOMBIE, ("proc_reap: !PRS_ZOMBIE"));
 
 	q = td->td_proc;
 
 	PROC_SUNLOCK(p);
 	if (status)
 		*status = KW_EXITCODE(p->p_xexit, p->p_xsig);
 	if (options & WNOWAIT) {
 		/*
 		 *  Only poll, returning the status.  Caller does not wish to
 		 * release the proc struct just yet.
 		 */
 		PROC_UNLOCK(p);
 		sx_xunlock(&proctree_lock);
 		return;
 	}
 
 	PROC_LOCK(q);
 	sigqueue_take(p->p_ksi);
 	PROC_UNLOCK(q);
 
 	/*
 	 * If we got the child via a ptrace 'attach', we need to give it back
 	 * to the old parent.
 	 */
 	if (p->p_oppid != 0 && p->p_oppid != p->p_pptr->p_pid) {
 		PROC_UNLOCK(p);
 		t = proc_realparent(p);
 		PROC_LOCK(t);
 		PROC_LOCK(p);
 		CTR2(KTR_PTRACE,
 		    "wait: traced child %d moved back to parent %d", p->p_pid,
 		    t->p_pid);
 		proc_reparent(p, t);
 		p->p_oppid = 0;
 		PROC_UNLOCK(p);
 		pksignal(t, SIGCHLD, p->p_ksi);
 		wakeup(t);
 		cv_broadcast(&p->p_pwait);
 		PROC_UNLOCK(t);
 		sx_xunlock(&proctree_lock);
 		return;
 	}
 	p->p_oppid = 0;
 	PROC_UNLOCK(p);
 
 	/*
 	 * Remove other references to this process to ensure we have an
 	 * exclusive reference.
 	 */
 	sx_xlock(&allproc_lock);
 	LIST_REMOVE(p, p_list);	/* off zombproc */
 	sx_xunlock(&allproc_lock);
 	LIST_REMOVE(p, p_sibling);
 	reaper_abandon_children(p, true);
 	LIST_REMOVE(p, p_reapsibling);
 	PROC_LOCK(p);
 	clear_orphan(p);
 	PROC_UNLOCK(p);
 	leavepgrp(p);
 	if (p->p_procdesc != NULL)
 		procdesc_reap(p);
 	sx_xunlock(&proctree_lock);
 
 	PROC_LOCK(p);
 	knlist_detach(p->p_klist);
 	p->p_klist = NULL;
 	PROC_UNLOCK(p);
 
 	/*
 	 * Removal from allproc list and process group list paired with
 	 * PROC_LOCK which was executed during that time should guarantee
 	 * nothing can reach this process anymore. As such further locking
 	 * is unnecessary.
 	 */
 	p->p_xexit = p->p_xsig = 0;		/* XXX: why? */
 
 	PROC_LOCK(q);
 	ruadd(&q->p_stats->p_cru, &q->p_crux, &p->p_ru, &p->p_rux);
 	PROC_UNLOCK(q);
 
 	/*
 	 * Decrement the count of procs running with this uid.
 	 */
 	(void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0);
 
 	/*
 	 * Destroy resource accounting information associated with the process.
 	 */
 #ifdef RACCT
 	if (racct_enable) {
 		PROC_LOCK(p);
 		racct_sub(p, RACCT_NPROC, 1);
 		PROC_UNLOCK(p);
 	}
 #endif
 	racct_proc_exit(p);
 
 	/*
 	 * Free credentials, arguments, and sigacts.
 	 */
 	crfree(p->p_ucred);
 	proc_set_cred(p, NULL);
 	pargs_drop(p->p_args);
 	p->p_args = NULL;
 	sigacts_free(p->p_sigacts);
 	p->p_sigacts = NULL;
 
 	/*
 	 * Do any thread-system specific cleanups.
 	 */
 	thread_wait(p);
 
 	/*
 	 * Give vm and machine-dependent layer a chance to free anything that
 	 * cpu_exit couldn't release while still running in process context.
 	 */
 	vm_waitproc(p);
 #ifdef MAC
 	mac_proc_destroy(p);
 #endif
 	/*
 	 * Free any domain policy that's still hiding around.
 	 */
 	vm_domain_policy_cleanup(&p->p_vm_dom_policy);
 
 	KASSERT(FIRST_THREAD_IN_PROC(p),
 	    ("proc_reap: no residual thread!"));
 	uma_zfree(proc_zone, p);
 	atomic_add_int(&nprocs, -1);
 }
 
 static int
 proc_to_reap(struct thread *td, struct proc *p, idtype_t idtype, id_t id,
     int *status, int options, struct __wrusage *wrusage, siginfo_t *siginfo,
     int check_only)
 {
 	struct rusage *rup;
 
 	sx_assert(&proctree_lock, SA_XLOCKED);
 
 	PROC_LOCK(p);
 
 	switch (idtype) {
 	case P_ALL:
 		if (p->p_procdesc != NULL) {
 			PROC_UNLOCK(p);
 			return (0);
 		}
 		break;
 	case P_PID:
 		if (p->p_pid != (pid_t)id) {
 			PROC_UNLOCK(p);
 			return (0);
 		}
 		break;
 	case P_PGID:
 		if (p->p_pgid != (pid_t)id) {
 			PROC_UNLOCK(p);
 			return (0);
 		}
 		break;
 	case P_SID:
 		if (p->p_session->s_sid != (pid_t)id) {
 			PROC_UNLOCK(p);
 			return (0);
 		}
 		break;
 	case P_UID:
 		if (p->p_ucred->cr_uid != (uid_t)id) {
 			PROC_UNLOCK(p);
 			return (0);
 		}
 		break;
 	case P_GID:
 		if (p->p_ucred->cr_gid != (gid_t)id) {
 			PROC_UNLOCK(p);
 			return (0);
 		}
 		break;
 	case P_JAILID:
 		if (p->p_ucred->cr_prison->pr_id != (int)id) {
 			PROC_UNLOCK(p);
 			return (0);
 		}
 		break;
 	/*
 	 * It seems that the thread structures get zeroed out
 	 * at process exit.  This makes it impossible to
 	 * support P_SETID, P_CID or P_CPUID.
 	 */
 	default:
 		PROC_UNLOCK(p);
 		return (0);
 	}
 
 	if (p_canwait(td, p)) {
 		PROC_UNLOCK(p);
 		return (0);
 	}
 
 	if (((options & WEXITED) == 0) && (p->p_state == PRS_ZOMBIE)) {
 		PROC_UNLOCK(p);
 		return (0);
 	}
 
 	/*
 	 * This special case handles a kthread spawned by linux_clone
 	 * (see linux_misc.c).  The linux_wait4 and linux_waitpid
 	 * functions need to be able to distinguish between waiting
 	 * on a process and waiting on a thread.  It is a thread if
 	 * p_sigparent is not SIGCHLD, and the WLINUXCLONE option
 	 * signifies we want to wait for threads and not processes.
 	 */
 	if ((p->p_sigparent != SIGCHLD) ^
 	    ((options & WLINUXCLONE) != 0)) {
 		PROC_UNLOCK(p);
 		return (0);
 	}
 
 	if (siginfo != NULL) {
 		bzero(siginfo, sizeof(*siginfo));
 		siginfo->si_errno = 0;
 
 		/*
 		 * SUSv4 requires that the si_signo value is always
 		 * SIGCHLD. Obey it despite the rfork(2) interface
 		 * allows to request other signal for child exit
 		 * notification.
 		 */
 		siginfo->si_signo = SIGCHLD;
 
 		/*
 		 *  This is still a rough estimate.  We will fix the
 		 *  cases TRAPPED, STOPPED, and CONTINUED later.
 		 */
 		if (WCOREDUMP(p->p_xsig)) {
 			siginfo->si_code = CLD_DUMPED;
 			siginfo->si_status = WTERMSIG(p->p_xsig);
 		} else if (WIFSIGNALED(p->p_xsig)) {
 			siginfo->si_code = CLD_KILLED;
 			siginfo->si_status = WTERMSIG(p->p_xsig);
 		} else {
 			siginfo->si_code = CLD_EXITED;
 			siginfo->si_status = p->p_xexit;
 		}
 
 		siginfo->si_pid = p->p_pid;
 		siginfo->si_uid = p->p_ucred->cr_uid;
 
 		/*
 		 * The si_addr field would be useful additional
 		 * detail, but apparently the PC value may be lost
 		 * when we reach this point.  bzero() above sets
 		 * siginfo->si_addr to NULL.
 		 */
 	}
 
 	/*
 	 * There should be no reason to limit resources usage info to
 	 * exited processes only.  A snapshot about any resources used
 	 * by a stopped process may be exactly what is needed.
 	 */
 	if (wrusage != NULL) {
 		rup = &wrusage->wru_self;
 		*rup = p->p_ru;
 		PROC_STATLOCK(p);
 		calcru(p, &rup->ru_utime, &rup->ru_stime);
 		PROC_STATUNLOCK(p);
 
 		rup = &wrusage->wru_children;
 		*rup = p->p_stats->p_cru;
 		calccru(p, &rup->ru_utime, &rup->ru_stime);
 	}
 
 	if (p->p_state == PRS_ZOMBIE && !check_only) {
 		PROC_SLOCK(p);
 		proc_reap(td, p, status, options);
 		return (-1);
 	}
 	PROC_UNLOCK(p);
 	return (1);
 }
 
 int
 kern_wait(struct thread *td, pid_t pid, int *status, int options,
     struct rusage *rusage)
 {
 	struct __wrusage wru, *wrup;
 	idtype_t idtype;
 	id_t id;
 	int ret;
 
 	/*
 	 * Translate the special pid values into the (idtype, pid)
 	 * pair for kern_wait6.  The WAIT_MYPGRP case is handled by
 	 * kern_wait6() on its own.
 	 */
 	if (pid == WAIT_ANY) {
 		idtype = P_ALL;
 		id = 0;
 	} else if (pid < 0) {
 		idtype = P_PGID;
 		id = (id_t)-pid;
 	} else {
 		idtype = P_PID;
 		id = (id_t)pid;
 	}
 
 	if (rusage != NULL)
 		wrup = &wru;
 	else
 		wrup = NULL;
 
 	/*
 	 * For backward compatibility we implicitly add flags WEXITED
 	 * and WTRAPPED here.
 	 */
 	options |= WEXITED | WTRAPPED;
 	ret = kern_wait6(td, idtype, id, status, options, wrup, NULL);
 	if (rusage != NULL)
 		*rusage = wru.wru_self;
 	return (ret);
 }
 
 int
 kern_wait6(struct thread *td, idtype_t idtype, id_t id, int *status,
     int options, struct __wrusage *wrusage, siginfo_t *siginfo)
 {
 	struct proc *p, *q;
 	pid_t pid;
 	int error, nfound, ret;
 
 	AUDIT_ARG_VALUE((int)idtype);	/* XXX - This is likely wrong! */
 	AUDIT_ARG_PID((pid_t)id);	/* XXX - This may be wrong! */
 	AUDIT_ARG_VALUE(options);
 
 	q = td->td_proc;
 
 	if ((pid_t)id == WAIT_MYPGRP && (idtype == P_PID || idtype == P_PGID)) {
 		PROC_LOCK(q);
 		id = (id_t)q->p_pgid;
 		PROC_UNLOCK(q);
 		idtype = P_PGID;
 	}
 
 	/* If we don't know the option, just return. */
 	if ((options & ~(WUNTRACED | WNOHANG | WCONTINUED | WNOWAIT |
 	    WEXITED | WTRAPPED | WLINUXCLONE)) != 0)
 		return (EINVAL);
 	if ((options & (WEXITED | WUNTRACED | WCONTINUED | WTRAPPED)) == 0) {
 		/*
 		 * We will be unable to find any matching processes,
 		 * because there are no known events to look for.
 		 * Prefer to return error instead of blocking
 		 * indefinitely.
 		 */
 		return (EINVAL);
 	}
 
 loop:
 	if (q->p_flag & P_STATCHILD) {
 		PROC_LOCK(q);
 		q->p_flag &= ~P_STATCHILD;
 		PROC_UNLOCK(q);
 	}
 	nfound = 0;
 	sx_xlock(&proctree_lock);
 	LIST_FOREACH(p, &q->p_children, p_sibling) {
 		pid = p->p_pid;
 		ret = proc_to_reap(td, p, idtype, id, status, options,
 		    wrusage, siginfo, 0);
 		if (ret == 0)
 			continue;
 		else if (ret == 1)
 			nfound++;
 		else {
 			td->td_retval[0] = pid;
 			return (0);
 		}
 
 		PROC_LOCK(p);
 		PROC_SLOCK(p);
 
 		if ((options & WTRAPPED) != 0 &&
 		    (p->p_flag & P_TRACED) != 0 &&
 		    (p->p_flag & (P_STOPPED_TRACE | P_STOPPED_SIG)) != 0 &&
 		    (p->p_suspcount == p->p_numthreads) &&
 		    ((p->p_flag & P_WAITED) == 0)) {
 			PROC_SUNLOCK(p);
 			if ((options & WNOWAIT) == 0)
 				p->p_flag |= P_WAITED;
 			sx_xunlock(&proctree_lock);
 
 			if (status != NULL)
 				*status = W_STOPCODE(p->p_xsig);
 			if (siginfo != NULL) {
 				siginfo->si_status = p->p_xsig;
 				siginfo->si_code = CLD_TRAPPED;
 			}
 			if ((options & WNOWAIT) == 0) {
 				PROC_LOCK(q);
 				sigqueue_take(p->p_ksi);
 				PROC_UNLOCK(q);
 			}
 
 			CTR4(KTR_PTRACE,
 	    "wait: returning trapped pid %d status %#x (xstat %d) xthread %d",
 			    p->p_pid, W_STOPCODE(p->p_xsig), p->p_xsig,
 			    p->p_xthread != NULL ? p->p_xthread->td_tid : -1);
 			PROC_UNLOCK(p);
 			td->td_retval[0] = pid;
 			return (0);
 		}
 		if ((options & WUNTRACED) != 0 &&
 		    (p->p_flag & P_STOPPED_SIG) != 0 &&
 		    (p->p_suspcount == p->p_numthreads) &&
 		    ((p->p_flag & P_WAITED) == 0)) {
 			PROC_SUNLOCK(p);
 			if ((options & WNOWAIT) == 0)
 				p->p_flag |= P_WAITED;
 			sx_xunlock(&proctree_lock);
 
 			if (status != NULL)
 				*status = W_STOPCODE(p->p_xsig);
 			if (siginfo != NULL) {
 				siginfo->si_status = p->p_xsig;
 				siginfo->si_code = CLD_STOPPED;
 			}
 			if ((options & WNOWAIT) == 0) {
 				PROC_LOCK(q);
 				sigqueue_take(p->p_ksi);
 				PROC_UNLOCK(q);
 			}
 
 			PROC_UNLOCK(p);
 			td->td_retval[0] = pid;
 			return (0);
 		}
 		PROC_SUNLOCK(p);
 		if ((options & WCONTINUED) != 0 &&
 		    (p->p_flag & P_CONTINUED) != 0) {
 			sx_xunlock(&proctree_lock);
 			if ((options & WNOWAIT) == 0) {
 				p->p_flag &= ~P_CONTINUED;
 				PROC_LOCK(q);
 				sigqueue_take(p->p_ksi);
 				PROC_UNLOCK(q);
 			}
 			PROC_UNLOCK(p);
 
 			if (status != NULL)
 				*status = SIGCONT;
 			if (siginfo != NULL) {
 				siginfo->si_status = SIGCONT;
 				siginfo->si_code = CLD_CONTINUED;
 			}
 			td->td_retval[0] = pid;
 			return (0);
 		}
 		PROC_UNLOCK(p);
 	}
 
 	/*
 	 * Look in the orphans list too, to allow the parent to
 	 * collect it's child exit status even if child is being
 	 * debugged.
 	 *
 	 * Debugger detaches from the parent upon successful
 	 * switch-over from parent to child.  At this point due to
 	 * re-parenting the parent loses the child to debugger and a
 	 * wait4(2) call would report that it has no children to wait
 	 * for.  By maintaining a list of orphans we allow the parent
 	 * to successfully wait until the child becomes a zombie.
 	 */
 	if (nfound == 0) {
 		LIST_FOREACH(p, &q->p_orphans, p_orphan) {
 			ret = proc_to_reap(td, p, idtype, id, NULL, options,
 			    NULL, NULL, 1);
 			if (ret != 0) {
 				KASSERT(ret != -1, ("reaped an orphan (pid %d)",
 				    (int)td->td_retval[0]));
 				nfound++;
 				break;
 			}
 		}
 	}
 	if (nfound == 0) {
 		sx_xunlock(&proctree_lock);
 		return (ECHILD);
 	}
 	if (options & WNOHANG) {
 		sx_xunlock(&proctree_lock);
 		td->td_retval[0] = 0;
 		return (0);
 	}
 	PROC_LOCK(q);
 	sx_xunlock(&proctree_lock);
 	if (q->p_flag & P_STATCHILD) {
 		q->p_flag &= ~P_STATCHILD;
 		error = 0;
 	} else
 		error = msleep(q, &q->p_mtx, PWAIT | PCATCH, "wait", 0);
 	PROC_UNLOCK(q);
 	if (error)
 		return (error);
 	goto loop;
 }
 
 /*
  * Make process 'parent' the new parent of process 'child'.
  * Must be called with an exclusive hold of proctree lock.
  */
 void
 proc_reparent(struct proc *child, struct proc *parent)
 {
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	PROC_LOCK_ASSERT(child, MA_OWNED);
 	if (child->p_pptr == parent)
 		return;
 
 	PROC_LOCK(child->p_pptr);
 	sigqueue_take(child->p_ksi);
 	PROC_UNLOCK(child->p_pptr);
 	LIST_REMOVE(child, p_sibling);
 	LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
 
 	clear_orphan(child);
 	if (child->p_flag & P_TRACED) {
 		if (LIST_EMPTY(&child->p_pptr->p_orphans)) {
 			child->p_treeflag |= P_TREE_FIRST_ORPHAN;
 			LIST_INSERT_HEAD(&child->p_pptr->p_orphans, child,
 			    p_orphan);
 		} else {
 			LIST_INSERT_AFTER(LIST_FIRST(&child->p_pptr->p_orphans),
 			    child, p_orphan);
 		}
 		child->p_treeflag |= P_TREE_ORPHANED;
 	}
 
 	child->p_pptr = parent;
 }
Index: projects/hps_head/sys/kern/kern_timeout.c
===================================================================
--- projects/hps_head/sys/kern/kern_timeout.c	(revision 309217)
+++ projects/hps_head/sys/kern/kern_timeout.c	(revision 309218)
@@ -1,1575 +1,1575 @@
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)kern_clock.c	8.5 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_callout_profiling.h"
 #include "opt_ddb.h"
 #if defined(__arm__)
 #include "opt_timer.h"
 #endif
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
 #include <sys/file.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/rmlock.h>
 #include <sys/rwlock.h>
 #include <sys/proc.h>
 #include <sys/sdt.h>
 #include <sys/sleepqueue.h>
 #include <sys/sysctl.h>
 #include <sys/smp.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #include <machine/_inttypes.h>
 #endif
 
 #ifdef SMP
 #include <machine/cpu.h>
 #endif
 
 #ifndef NO_EVENTTIMERS
 DPCPU_DECLARE(sbintime_t, hardclocktime);
 #endif
 
 SDT_PROVIDER_DEFINE(callout_execute);
 SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *");
 SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *");
 
 #ifdef CALLOUT_PROFILING
 static int avg_depth[2];
 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth[0], 0,
     "Average number of items examined per softclock call. Units = 1/1000");
 static int avg_gcalls[2];
 SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls[0], 0,
     "Average number of Giant callouts made per softclock call. Units = 1/1000");
 static int avg_lockcalls[2];
 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls[0], 0,
     "Average number of lock callouts made per softclock call. Units = 1/1000");
 static int avg_mpcalls[2];
 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls[0], 0,
     "Average number of MP callouts made per softclock call. Units = 1/1000");
 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth[1], 0,
     "Average number of direct callouts examined per callout_process call. "
     "Units = 1/1000");
 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD,
     &avg_lockcalls[1], 0, "Average number of lock direct callouts made per "
     "callout_process call. Units = 1/1000");
 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls[1],
     0, "Average number of MP direct callouts made per callout_process call. "
     "Units = 1/1000");
 #endif
 
 static int ncallout;
 SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout, 0,
     "Number of entries in callwheel and size of timeout() preallocation");
 
 #ifdef	RSS
 static int pin_default_swi = 1;
 static int pin_pcpu_swi = 1;
 #else
 static int pin_default_swi = 0;
 static int pin_pcpu_swi = 0;
 #endif
 
 SYSCTL_INT(_kern, OID_AUTO, pin_default_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_default_swi,
     0, "Pin the default (non-per-cpu) swi (shared with PCPU 0 swi)");
 SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_pcpu_swi,
     0, "Pin the per-CPU swis (except PCPU 0, which is also default");
 
 /*
  * TODO:
  *	allocate more timeout table slots when table overflows.
  */
 u_int callwheelsize, callwheelmask;
 
 struct callout_args {
 	sbintime_t time;		/* absolute time for the event */
 	sbintime_t precision;		/* delta allowed wrt opt */
 	void	*arg;			/* function argument */
 	callout_func_t *func;		/* function to call */
 	int	flags;			/* flags passed to callout_reset() */
 	int	cpu;			/* CPU we're scheduled on */
 };
 
 typedef void callout_mutex_op_t(void *, struct lock_object *);
 
 struct callout_mutex_ops {
 	callout_mutex_op_t *lock;
 	callout_mutex_op_t *unlock;
 };
 
 enum {
 	CALLOUT_LC_UNUSED_0,
 	CALLOUT_LC_UNUSED_1,
 	CALLOUT_LC_UNUSED_2,
 	CALLOUT_LC_FUNCTION,
 	CALLOUT_LC_SPIN,
 	CALLOUT_LC_MUTEX,
 	CALLOUT_LC_RW,
 	CALLOUT_LC_RM,
 };
 
 static void
 callout_mutex_op_none(void *arg, struct lock_object *lock)
 {
 }
 
 static void
 callout_function_lock(void *arg, struct lock_object *lock)
 {
 
 	((callout_lock_func_t *)lock)(arg, 1);
 }
 
 static void
 callout_function_unlock(void *arg, struct lock_object *lock)
 {
 
 	((callout_lock_func_t *)lock)(arg, 0);
 }
 
 static void
 callout_mutex_lock(void *arg, struct lock_object *lock)
 {
 
 	mtx_lock((struct mtx *)lock);
 }
 
 static void
 callout_mutex_unlock(void *arg, struct lock_object *lock)
 {
 
 	mtx_unlock((struct mtx *)lock);
 }
 
 static void
 callout_mutex_lock_spin(void *arg, struct lock_object *lock)
 {
 
 	mtx_lock_spin((struct mtx *)lock);
 }
 
 static void
 callout_mutex_unlock_spin(void *arg, struct lock_object *lock)
 {
 
 	mtx_unlock_spin((struct mtx *)lock);
 }
 
 static void
 callout_rm_wlock(void *arg, struct lock_object *lock)
 {
 
 	rm_wlock((struct rmlock *)lock);
 }
 
 static void
 callout_rm_wunlock(void *arg, struct lock_object *lock)
 {
 
 	rm_wunlock((struct rmlock *)lock);
 }
 
 static void
 callout_rw_wlock(void *arg, struct lock_object *lock)
 {
 
 	rw_wlock((struct rwlock *)lock);
 }
 
 static void
 callout_rw_wunlock(void *arg, struct lock_object *lock)
 {
 
 	rw_wunlock((struct rwlock *)lock);
 }
 
 static const struct callout_mutex_ops callout_mutex_ops[8] = {
 	[CALLOUT_LC_UNUSED_0] = {
 		.lock = callout_mutex_op_none,
 		.unlock = callout_mutex_op_none,
 	},
 	[CALLOUT_LC_UNUSED_1] = {
 		.lock = callout_mutex_op_none,
 		.unlock = callout_mutex_op_none,
 	},
 	[CALLOUT_LC_UNUSED_2] = {
 		.lock = callout_mutex_op_none,
 		.unlock = callout_mutex_op_none,
 	},
 	[CALLOUT_LC_FUNCTION] = {
 		.lock = callout_function_lock,
 		.unlock = callout_function_unlock,
 	},
 	[CALLOUT_LC_SPIN] = {
 		.lock = callout_mutex_lock_spin,
 		.unlock = callout_mutex_unlock_spin,
 	},
 	[CALLOUT_LC_MUTEX] = {
 		.lock = callout_mutex_lock,
 		.unlock = callout_mutex_unlock,
 	},
 	[CALLOUT_LC_RW] = {
 		.lock = callout_rw_wlock,
 		.unlock = callout_rw_wunlock,
 	},
 	[CALLOUT_LC_RM] = {
 		.lock = callout_rm_wlock,
 		.unlock = callout_rm_wunlock,
 	},
 };
 
 static inline void
 callout_lock_client(int c_flags, void *c_arg, struct lock_object *c_lock)
 {
 
 	callout_mutex_ops[CALLOUT_GET_LC(c_flags)].lock(c_arg, c_lock);
 }
 
 static inline void
 callout_unlock_client(int c_flags, void *c_arg, struct lock_object *c_lock)
 {
 
 	callout_mutex_ops[CALLOUT_GET_LC(c_flags)].unlock(c_arg, c_lock);
 }
 
 /*
  * The callout CPU exec structure represent information necessary for
  * describing the state of callouts currently running on the CPU and
  * for handling deferred callout restarts.
  *
  * In particular, the first entry of the array cc_exec_entity holds
  * information for callouts running from the SWI thread context, while
  * the second one holds information for callouts running directly from
  * the hardware interrupt context.
  */
 struct cc_exec {
 	/*
 	 * The "cc_curr" points to the currently executing callout and
 	 * is protected by the "cc_lock" spinlock. If no callback is
 	 * currently executing it is equal to "NULL".
 	 */
 	struct callout		*cc_curr;
 	/*
 	 * The "cc_restart_args" structure holds the argument for a
 	 * deferred callback restart and is protected by the "cc_lock"
 	 * spinlock. The structure is only valid if "cc_restart" is
 	 * "true". If "cc_restart" is "false" the information in the
 	 * "cc_restart_args" structure shall be ignored.
 	 */
 	struct callout_args	cc_restart_args;
 	bool			cc_restart;
 	/*
 	 * The "cc_cancel" variable allows the currently pending
 	 * callback to be atomically cancelled. This field is write
 	 * protected by the "cc_lock" spinlock.
 	 */
 	bool cc_cancel;
 	/*
 	 * The "cc_drain_fn" points to a function which shall be
 	 * called when an asynchronous drain is performed. This field
 	 * is write protected by the "cc_lock" spinlock.
 	 */
 	callout_func_t *cc_drain_fn;
 	/*
 	 * The following fields are used for callout profiling only:
 	 */
 #ifdef CALLOUT_PROFILING
 	int cc_depth;
 	int cc_mpcalls;
 	int cc_lockcalls;
 	int cc_gcalls;
 #endif
 };
 
 /*
  * There is one "struct callout_cpu" per CPU, holding all relevant
  * state for the callout processing thread on the individual CPU.
  */
 struct callout_cpu {
 	struct mtx_padalign	cc_lock;
 	struct cc_exec 		cc_exec_entity[2];
 	struct callout		*cc_callout;
 	struct callout_list	*cc_callwheel;
 	struct callout_list	cc_directlist;
 	struct callout_tailq	cc_expireq;
 	struct callout_slist	cc_callfree;
 	sbintime_t		cc_firstevent;
 	sbintime_t		cc_lastscan;
 	void			*cc_cookie;
 	char			cc_ktr_event_name[20];
 };
 
 #define	cc_exec_curr(cc, dir)		(cc)->cc_exec_entity[(dir)].cc_curr
 #define	cc_exec_restart_args(cc, dir)	(cc)->cc_exec_entity[(dir)].cc_restart_args
 #define	cc_exec_restart(cc, dir)	(cc)->cc_exec_entity[(dir)].cc_restart
 #define	cc_exec_cancel(cc, dir)		(cc)->cc_exec_entity[(dir)].cc_cancel
 #define	cc_exec_drain_fn(cc, dir)	(cc)->cc_exec_entity[(dir)].cc_drain_fn
 #define	cc_exec_depth(cc, dir)		(cc)->cc_exec_entity[(dir)].cc_depth
 #define	cc_exec_mpcalls(cc, dir)	(cc)->cc_exec_entity[(dir)].cc_mpcalls
 #define	cc_exec_lockcalls(cc, dir)	(cc)->cc_exec_entity[(dir)].cc_lockcalls
 #define	cc_exec_gcalls(cc, dir)		(cc)->cc_exec_entity[(dir)].cc_gcalls
 
 #ifdef SMP
 struct callout_cpu cc_cpu[MAXCPU];
 #define	CPUBLOCK	-1
 #define	CC_CPU(cpu)	(&cc_cpu[(cpu)])
 #define	CC_SELF()	CC_CPU(PCPU_GET(cpuid))
 #else
 struct callout_cpu cc_cpu;
 #define	CC_CPU(cpu)	&cc_cpu
 #define	CC_SELF()	&cc_cpu
 #endif
 #define	CC_LOCK(cc)	mtx_lock_spin(&(cc)->cc_lock)
 #define	CC_UNLOCK(cc)	mtx_unlock_spin(&(cc)->cc_lock)
 #define	CC_LOCK_ASSERT(cc)	mtx_assert(&(cc)->cc_lock, MA_OWNED)
 
 static int timeout_cpu;
 
 static void	callout_cpu_init(struct callout_cpu *cc, int cpu);
 static void	softclock_call_cc(struct callout *c, struct callout_cpu *cc, const int direct);
 
 static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
 
 /*
  * Kernel low level callwheel initialization called from cpu0 during
  * kernel startup:
  */
 static void
 callout_callwheel_init(void *dummy)
 {
 	struct callout_cpu *cc;
 
 	/*
 	 * Calculate the size of the callout wheel and the preallocated
 	 * timeout() structures.
 	 * XXX: Clip callout to result of previous function of maxusers
 	 * maximum 384.  This is still huge, but acceptable.
 	 */
 	memset(CC_CPU(0), 0, sizeof(cc_cpu));
 	ncallout = imin(16 + maxproc + maxfiles, 18508);
 	TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
 
 	/*
 	 * Calculate callout wheel size, should be next power of two higher
 	 * than 'ncallout'.
 	 */
 	callwheelsize = 1 << fls(ncallout);
 	callwheelmask = callwheelsize - 1;
 
 	/*
 	 * Fetch whether we're pinning the swi's or not.
 	 */
 	TUNABLE_INT_FETCH("kern.pin_default_swi", &pin_default_swi);
 	TUNABLE_INT_FETCH("kern.pin_pcpu_swi", &pin_pcpu_swi);
 
 	/*
 	 * Only cpu0 handles timeout(9) and receives a preallocation.
 	 *
 	 * XXX: Once all timeout(9) consumers are converted this can
 	 * be removed.
 	 */
 	timeout_cpu = PCPU_GET(cpuid);
 	cc = CC_CPU(timeout_cpu);
 	cc->cc_callout = malloc(ncallout * sizeof(struct callout),
 	    M_CALLOUT, M_WAITOK);
 	callout_cpu_init(cc, timeout_cpu);
 }
 SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL);
 
 /*
  * Initialize the per-cpu callout structures.
  */
 static void
 callout_cpu_init(struct callout_cpu *cc, int cpu)
 {
 	struct callout *c;
 	int i;
 
 	mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
 	SLIST_INIT(&cc->cc_callfree);
 	cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize,
 	    M_CALLOUT, M_WAITOK);
 	for (i = 0; i < callwheelsize; i++)
 		LIST_INIT(&cc->cc_callwheel[i]);
 	TAILQ_INIT(&cc->cc_expireq);
 	LIST_INIT(&cc->cc_directlist);
 	cc->cc_firstevent = SBT_MAX;
 	snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name),
 	    "callwheel cpu %d", cpu);
 	if (cc->cc_callout == NULL)	/* Only cpu0 handles timeout(9) */
 		return;
 	for (i = 0; i < ncallout; i++) {
 		c = &cc->cc_callout[i];
 		callout_init(c, 0);
 		c->c_flags |= CALLOUT_LOCAL_ALLOC;
 		SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
 	}
 }
 
 #ifdef CALLOUT_PROFILING
 static inline void
 callout_clear_stats(struct callout_cpu *cc, const int direct)
 {
 	cc_exec_depth(cc, direct) = 0;
 	cc_exec_mpcalls(cc, direct) = 0;
 	cc_exec_lockcalls(cc, direct) = 0;
 	cc_exec_gcalls(cc, direct) = 0;
 }
 #endif
 
 #ifdef CALLOUT_PROFILING
 static inline void
 callout_update_stats(struct callout_cpu *cc, const int direct)
 {
 	avg_depth[direct] +=
 	    (cc_exec_depth(cc, direct) * 1000 -
 	    avg_depth[direct]) >> 8;
 	avg_mpcalls[direct] +=
 	    (cc_exec_mpcalls(cc, direct) * 1000 -
 	    avg_mpcalls[direct]) >> 8;
 	avg_lockcalls[direct] +=
 	    (cc_exec_lockcalls(cc, direct) * 1000 -
 	    avg_lockcalls[direct]) >> 8;
 	avg_gcalls[direct] +=
 	    (cc_exec_gcalls(cc, direct) * 1000 -
 	    avg_gcalls[direct]) >> 8;
 }
 #endif
 
 /*
  * Start standard softclock thread.
  */
 static void
 start_softclock(void *dummy)
 {
 	struct callout_cpu *cc;
 	char name[MAXCOMLEN];
 #ifdef SMP
 	int cpu;
 	struct intr_event *ie;
 #endif
 
 	cc = CC_CPU(timeout_cpu);
 	snprintf(name, sizeof(name), "clock (%d)", timeout_cpu);
 	if (swi_add(&clk_intr_event, name, softclock, cc, SWI_CLOCK,
 	    INTR_MPSAFE, &cc->cc_cookie))
 		panic("died while creating standard software ithreads");
 	if (pin_default_swi &&
 	    (intr_event_bind(clk_intr_event, timeout_cpu) != 0)) {
 		printf("%s: timeout clock couldn't be pinned to cpu %d\n",
 		    __func__,
 		    timeout_cpu);
 	}
 
 #ifdef SMP
 	CPU_FOREACH(cpu) {
 		if (cpu == timeout_cpu)
 			continue;
 		cc = CC_CPU(cpu);
 		cc->cc_callout = NULL;	/* Only cpu0 handles timeout(9). */
 		callout_cpu_init(cc, cpu);
 		snprintf(name, sizeof(name), "clock (%d)", cpu);
 		ie = NULL;
 		if (swi_add(&ie, name, softclock, cc, SWI_CLOCK,
 		    INTR_MPSAFE, &cc->cc_cookie))
 			panic("died while creating standard software ithreads");
 		if (pin_pcpu_swi && (intr_event_bind(ie, cpu) != 0)) {
 			printf("%s: per-cpu clock couldn't be pinned to "
 			    "cpu %d\n",
 			    __func__,
 			    cpu);
 		}
 	}
 #endif
 }
 SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL);
 
 #define	CC_HASH_SHIFT	8
 
 static inline u_int
 callout_hash(sbintime_t sbt)
 {
 
 	return (sbt >> (32 - CC_HASH_SHIFT));
 }
 
 static inline u_int
 callout_get_bucket(sbintime_t sbt)
 {
 
 	return (callout_hash(sbt) & callwheelmask);
 }
 
 void
 callout_process(sbintime_t now)
 {
 	struct callout *tmp;
 	struct callout *next;
 	struct callout_cpu *cc;
 	struct callout_list *sc;
 	sbintime_t first, last, max, tmp_max;
 	uint32_t lookahead;
 	u_int firstb, lastb, nowb;
 
 	cc = CC_SELF();
 	CC_LOCK(cc);
 
 #ifdef CALLOUT_PROFILING
 	callout_clear_stats(cc, 1);
 #endif
 	/* Compute the buckets of the last scan and present times. */
 	firstb = callout_hash(cc->cc_lastscan);
 	cc->cc_lastscan = now;
 	nowb = callout_hash(now);
 
 	/* Compute the last bucket and minimum time of the bucket after it. */
 	if (nowb == firstb)
 		lookahead = (SBT_1S / 16);
 	else if (nowb - firstb == 1)
 		lookahead = (SBT_1S / 8);
 	else
 		lookahead = (SBT_1S / 2);
 	first = last = now;
 	first += (lookahead / 2);
 	last += lookahead;
 	last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT));
 	lastb = callout_hash(last) - 1;
 	max = last;
 
 	/*
 	 * Check if we wrapped around the entire wheel from the last scan.
 	 * In case, we need to scan entirely the wheel for pending callouts.
 	 */
 	if (lastb - firstb >= callwheelsize) {
 		lastb = firstb + callwheelsize - 1;
 		if (nowb - firstb >= callwheelsize)
 			nowb = lastb;
 	}
 
 	/* Iterate callwheel from firstb to nowb and then up to lastb. */
 	do {
 		sc = &cc->cc_callwheel[firstb & callwheelmask];
 
 		/* Iterate all callouts in the current bucket */
 		LIST_FOREACH_SAFE(tmp, sc, c_links.le, next) {
 			/* Run the callout if present time within allowed. */
 			if (tmp->c_time <= now) {
 				/* Remove callout from bucket */
 				LIST_REMOVE(tmp, c_links.le);
 				if (tmp->c_flags & CALLOUT_DIRECT) {
 					/* Insert callout into direct list */
 					LIST_INSERT_HEAD(&cc->cc_directlist, tmp, c_links.le);
 				} else {
 					/* Insert callout into expired list */
 					TAILQ_INSERT_TAIL(&cc->cc_expireq, tmp, c_links.tqe);
 					tmp->c_flags |= CALLOUT_PROCESSED;
 				}
 				continue;
 			}
 
 			/* Skip events from distant future. */
 			if (tmp->c_time >= max)
 				continue;
 
 			/*
 			 * Event minimal time is bigger than present maximal
 			 * time, so it cannot be aggregated.
 			 */
 			if (tmp->c_time > last) {
 				lastb = nowb;
 				continue;
 			}
 			/* Update first and last time, respecting this event. */
 			if (tmp->c_time < first)
 				first = tmp->c_time;
 			tmp_max = tmp->c_time + tmp->c_precision;
 			if (tmp_max < last)
 				last = tmp_max;
 		}
 
 		/* Proceed with the next bucket. */
 		firstb++;
 
 		/*
 		 * Stop if we looked after present time and found
 		 * some event we can't execute at now.
 		 * Stop if we looked far enough into the future.
 		 */
 	} while (((int)(firstb - lastb)) <= 0);
 
 	cc->cc_firstevent = last;
 #ifndef NO_EVENTTIMERS
 	cpu_new_callout(curcpu, last, first);
 #endif
 	/*
 	 * Check for expired direct callouts, if any:
 	 */
 	while ((tmp = LIST_FIRST(&cc->cc_directlist)) != NULL) {
 		LIST_REMOVE(tmp, c_links.le);
 		softclock_call_cc(tmp, cc, 1);
 	}
 #ifdef CALLOUT_PROFILING
 	callout_update_stats(cc, 1);
 #endif
 	CC_UNLOCK(cc);
 	/*
 	 * "swi_sched()" acquires the thread lock and we don't want to
 	 * call it having cc_lock held because it leads to a locking
 	 * order reversal issue.
 	 */
 	if (!TAILQ_EMPTY(&cc->cc_expireq))
 		swi_sched(cc->cc_cookie, 0);
 }
 
 static struct callout_cpu *
 callout_lock(struct callout *c)
 {
 	struct callout_cpu *cc;
 	int cpu;
 
 	for (;;) {
 		cpu = c->c_cpu;
 #ifdef SMP
 		if (cpu == CPUBLOCK) {
 			cpu_spinwait();
 			continue;
 		}
 #endif
 		cc = CC_CPU(cpu);
 		CC_LOCK(cc);
 		if (cpu == c->c_cpu)
 			break;
 		CC_UNLOCK(cc);
 	}
 	return (cc);
 }
 
 static struct callout_cpu *
 callout_cc_add_locked(struct callout *c, struct callout_cpu *cc,
     struct callout_args *coa)
 {
 #ifndef NO_EVENTTIMERS
 	sbintime_t sbt;
 #endif
 	u_int bucket;
 
 	CC_LOCK_ASSERT(cc);
 
 	/* update flags before swapping locks, if any */
 	c->c_flags &= ~(CALLOUT_PROCESSED | CALLOUT_DIRECT);
 	if (coa->flags & C_DIRECT_EXEC)
 		c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING | CALLOUT_DIRECT);
 	else
 		c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
 
 #ifdef SMP
 	/* only set the "c_cpu" if the CPU number changed and is valid */
 	if (c->c_cpu != coa->cpu && coa->cpu > CPUBLOCK &&
 	    coa->cpu <= mp_maxid && !CPU_ABSENT(coa->cpu)) {
 		/*
 		 * Avoid interrupts and preemption firing after the
 		 * callout CPU is blocked in order to avoid deadlocks
 		 * as the new thread may be willing to acquire the
 		 * callout CPU lock:
 		 */
 		c->c_cpu = CPUBLOCK;
 		spinlock_enter();
 		CC_UNLOCK(cc);
 		cc = CC_CPU(coa->cpu);
 		CC_LOCK(cc);
 		spinlock_exit();
 		c->c_cpu = coa->cpu;
 	}
 #endif
 	if (coa->time < cc->cc_lastscan)
 		coa->time = cc->cc_lastscan;
 	c->c_arg = coa->arg;
 	c->c_func = coa->func;
 	c->c_time = coa->time;
 	c->c_precision = coa->precision;
 
 	bucket = callout_get_bucket(c->c_time);
 	CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x",
 	    c, (int)(c->c_precision >> 32),
 	    (u_int)(c->c_precision & 0xffffffff));
 	LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le);
 
 #ifndef NO_EVENTTIMERS
 	/*
 	 * Inform the eventtimers(4) subsystem there's a new callout
 	 * that has been inserted, but only if really required.
 	 */
 	if (SBT_MAX - c->c_time < c->c_precision)
 		c->c_precision = SBT_MAX - c->c_time;
 	sbt = c->c_time + c->c_precision;
 	if (sbt < cc->cc_firstevent) {
 		cc->cc_firstevent = sbt;
 		cpu_new_callout(c->c_cpu, sbt, c->c_time);
 	}
 #endif
 	return (cc);
 }
 
 static inline void
 callout_cc_del(struct callout *c, struct callout_cpu *cc)
 {
 
 	c->c_func = NULL;
 	SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
 }
 
 static inline void
 softclock_call_cc(struct callout *c, struct callout_cpu *cc,
     const int direct)
 {
 	callout_func_t *c_func;
 	void *c_arg;
 	struct lock_object *c_lock;
 	int c_flags;
 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 
 	sbintime_t sbt1, sbt2;
 	struct timespec ts2;
 	static sbintime_t maxdt = 2 * SBT_1MS;	/* 2 msec */
 	static timeout_t *lastfunc;
 #endif
 
 	KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) ==
 	    (CALLOUT_PENDING | CALLOUT_ACTIVE),
 	    ("softclock_call_cc: pend|act %p %x", c, c->c_flags));
 
 	c_lock = c->c_lock;
 	c_func = c->c_func;
 	c_arg = c->c_arg;
 	c_flags = c->c_flags;
 
 	/* remove pending bit */
 	c->c_flags &= ~CALLOUT_PENDING;
 
 	/* reset our local state */
 	cc_exec_curr(cc, direct) = c;
 	cc_exec_restart(cc, direct) = false;
 	cc_exec_drain_fn(cc, direct) = NULL;
 
 	if (c_lock != NULL) {
 		cc_exec_cancel(cc, direct) = false;
 		CC_UNLOCK(cc);
 
 		/* unlocked region for switching locks */
 
 		callout_lock_client(c_flags, c_arg, c_lock);
 
 		/*
 		 * Check if the callout may have been cancelled while
 		 * we were switching locks. Even though the callout is
 		 * specifying a lock, it might not be certain this
 		 * lock is locked when starting and stopping callouts.
 		 */
 		CC_LOCK(cc);
 		if (cc_exec_cancel(cc, direct)) {
 			callout_unlock_client(c_flags, c_arg, c_lock);
 			goto skip_cc_locked;
 		}
 		if (c_lock == &Giant.lock_object) {
 #ifdef CALLOUT_PROFILING
 			cc_exec_gcalls(cc, direct)++;
 #endif
 			CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p",
 			    c, c_func, c_arg);
 		} else {
 #ifdef CALLOUT_PROFILING
 			cc_exec_lockcalls(cc, direct)++;
 #endif
 			CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
 			    c, c_func, c_arg);
 		}
 	} else {
 #ifdef CALLOUT_PROFILING
 		cc_exec_mpcalls(cc, direct)++;
 #endif
 		CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
 		    c, c_func, c_arg);
 	}
 	/* The callout cannot be stopped now! */
 	cc_exec_cancel(cc, direct) = true;
 	CC_UNLOCK(cc);
 
 	/* unlocked region */
 	KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running",
 	    "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct);
 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
 	sbt1 = sbinuptime();
 #endif
 	THREAD_NO_SLEEPING();
 	SDT_PROBE1(callout_execute, , , callout__start, c);
 	c_func(c_arg);
 	SDT_PROBE1(callout_execute, , , callout__end, c);
 	THREAD_SLEEPING_OK();
 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
 	sbt2 = sbinuptime();
 	sbt2 -= sbt1;
 	if (sbt2 > maxdt) {
 		if (lastfunc != c_func || sbt2 > maxdt * 2) {
 			ts2 = sbttots(sbt2);
 			printf(
 		"Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
 			    c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec);
 		}
 		maxdt = sbt2;
 		lastfunc = c_func;
 	}
 #endif
 	KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle");
 	CTR1(KTR_CALLOUT, "callout %p finished", c);
 
 	/*
 	 * At this point the callback structure might have been freed,
 	 * so we need to check the previously copied value of
 	 * "c->c_flags":
 	 */
 	if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0)
 		callout_unlock_client(c_flags, c_arg, c_lock);
 
 	CC_LOCK(cc);
 
 skip_cc_locked:
 	KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr"));
 	cc_exec_curr(cc, direct) = NULL;
 
 	/* Check if there is anything which needs draining */
 	if (cc_exec_drain_fn(cc, direct) != NULL) {
 		/*
 		 * Unlock the CPU callout last, so that any use of
 		 * structures belonging to the callout are complete:
 		 */
 		CC_UNLOCK(cc);
 		/* call drain function unlocked */
 		cc_exec_drain_fn(cc, direct)(c_arg);
 		CC_LOCK(cc);
 	} else if (c_flags & CALLOUT_LOCAL_ALLOC) {
 		/* return callout back to freelist */
 		callout_cc_del(c, cc);
 	} else if (cc_exec_restart(cc, direct)) {
 		struct callout_cpu *new_cc;
 		/* [re-]schedule callout, if any */
 		new_cc = callout_cc_add_locked(c, cc,
 		    &cc_exec_restart_args(cc, direct));
 		if (new_cc != cc) {
 			/* switch locks back again */
 			CC_UNLOCK(new_cc);
 			CC_LOCK(cc);
 		}
 	}
 }
 
 /*
  * The callout mechanism is based on the work of Adam M. Costello and
  * George Varghese, published in a technical report entitled "Redesigning
  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
  * used in this implementation was published by G. Varghese and T. Lauck in
  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
  * the Efficient Implementation of a Timer Facility" in the Proceedings of
  * the 11th ACM Annual Symposium on Operating Systems Principles,
  * Austin, Texas Nov 1987.
  */
 
 /*
  * Software (low priority) clock interrupt.
  * Run periodic events from timeout queue.
  */
 void
 softclock(void *arg)
 {
 	struct callout_cpu *cc;
 	struct callout *c;
 
 	cc = (struct callout_cpu *)arg;
 	CC_LOCK(cc);
 #ifdef CALLOUT_PROFILING
 	callout_clear_stats(cc, 0);
 #endif
 	while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
 		TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
 		softclock_call_cc(c, cc, 0);
 	}
 #ifdef CALLOUT_PROFILING
 	callout_update_stats(cc, 0);
 #endif
 	CC_UNLOCK(cc);
 }
 
 /*
  * timeout --
  *	Execute a function after a specified length of time.
  *
  * untimeout --
  *	Cancel previous timeout function call.
  *
  * callout_handle_init --
  *	Initialize a handle so that using it with untimeout is benign.
  *
  *	See AT&T BCI Driver Reference Manual for specification.  This
  *	implementation differs from that one in that although an
  *	identification value is returned from timeout, the original
  *	arguments to timeout as well as the identifier are used to
  *	identify entries for untimeout.
  */
 struct callout_handle
 timeout(timeout_t *ftn, void *arg, int to_ticks)
 {
 	struct callout_cpu *cc;
 	struct callout *new;
 	struct callout_handle handle;
 
 	cc = CC_CPU(timeout_cpu);
 	CC_LOCK(cc);
 	/* Fill in the next free callout structure. */
 	new = SLIST_FIRST(&cc->cc_callfree);
 	if (new == NULL)
 		/* XXX Attempt to malloc first */
 		panic("timeout table full");
 	SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle);
 	handle.callout = new;
 	CC_UNLOCK(cc);
 
 	callout_reset(new, to_ticks, ftn, arg);
 
 	return (handle);
 }
 
 void
 untimeout(timeout_t *ftn, void *arg, struct callout_handle handle)
 {
 	struct callout_cpu *cc;
 	bool match;
 
 	/*
 	 * Check for a handle that was initialized
 	 * by callout_handle_init, but never used
 	 * for a real timeout.
 	 */
 	if (handle.callout == NULL)
 		return;
 
 	cc = callout_lock(handle.callout);
 	match = (handle.callout->c_func == ftn && handle.callout->c_arg == arg);
 	CC_UNLOCK(cc);
 
 	if (match)
 		callout_stop(handle.callout);
 }
 
 void
 callout_handle_init(struct callout_handle *handle)
 {
 	handle->callout = NULL;
 }
 
 #ifdef KTR
 static const char *
-callout_retvalstring(int retval)
+callout_retvalstring(callout_ret_t retval)
 {
-	switch (retval) {
+	switch (retval.value) {
 	case CALLOUT_RET_DRAINING:
 		return ("callout cannot be stopped and needs drain");
 	case CALLOUT_RET_CANCELLED:
 		return ("callout was successfully stopped");
 	case CALLOUT_RET_CANCELLED_AND_DRAINING:
 		return ("callout was successfully stopped while being serviced");
 	default:
 		return ("callout was already stopped");
 	}
 }
 #endif
 
-static int
+static callout_ret_t
 callout_restart_async(struct callout *c, struct callout_args *coa,
     callout_func_t *drain_fn)
 {
 	struct callout_cpu *cc;
-	int retval;
+	callout_ret_t retval;
 	int direct;
 
 	cc = callout_lock(c);
 
 	/* Figure out if the callout is direct or not */
 	direct = ((c->c_flags & CALLOUT_DIRECT) != 0);
 
 	/*
 	 * Check if the callback is currently scheduled for
 	 * completion:
 	 */
 	if (cc_exec_curr(cc, direct) == c) {
 
-		retval = CALLOUT_RET_DRAINING;
+		retval.value = CALLOUT_RET_DRAINING;
 
 		/* set drain function, if any */
 		if (drain_fn != NULL)
 			cc_exec_drain_fn(cc, direct) = drain_fn;
 
 		/*
 		 * Try to prevent the callback from running by setting
 		 * the "cc_exec_cancel()" variable to "true".
 		 */
 		if (cc_exec_cancel(cc, direct) == false ||
 		    cc_exec_restart(cc, direct) == true) {
 			cc_exec_cancel(cc, direct) = true;
-			retval |= CALLOUT_RET_CANCELLED;
+			retval.value |= CALLOUT_RET_CANCELLED;
 		}
 
 		/*
 		 * Prevent callback restart if "callout_drain_xxx()"
 		 * is being called or we are stopping the callout or
 		 * the callback was preallocated by us:
 		 */
 		if (cc_exec_drain_fn(cc, direct) != NULL ||
 		    coa == NULL || (c->c_flags & CALLOUT_LOCAL_ALLOC) != 0) {
 			CTR4(KTR_CALLOUT, "%s: %p func %p arg %p",
 			    callout_retvalstring(retval),
 			    c, c->c_func, c->c_arg);
 
 			/* clear old flags, if any */
 			c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING |
 			    CALLOUT_PROCESSED);
 
 			/* clear restart flag, if any */
 			cc_exec_restart(cc, direct) = false;
 		} else {
 			CTR4(KTR_CALLOUT, "%s: %p func %p arg %p",
 			    callout_retvalstring(retval),
 			    c, c->c_func, c->c_arg);
 
 			/* get us back into the game */
 			c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
 			c->c_flags &= ~CALLOUT_PROCESSED;
 
 			/* enable deferred restart */
 			cc_exec_restart(cc, direct) = true;
 
 			/* store arguments for the deferred restart, if any */
 			cc_exec_restart_args(cc, direct) = *coa;
 		}
 	} else {
 		/* stop callout */
 		if (c->c_flags & CALLOUT_PENDING) {
 			/*
 			 * The callback has not yet been executed, and
 			 * we simply just need to unlink it:
 			 */
 			if ((c->c_flags & CALLOUT_PROCESSED) == 0) {
 				LIST_REMOVE(c, c_links.le);
 			} else {
 				TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
 			}
-			retval = CALLOUT_RET_CANCELLED;
+			retval.value = CALLOUT_RET_CANCELLED;
 		} else {
-			retval = CALLOUT_RET_STOPPED;
+			retval.value = CALLOUT_RET_STOPPED;
 		}
 
 		CTR4(KTR_CALLOUT, "%s: %p func %p arg %p",
 		    callout_retvalstring(retval),
 		    c, c->c_func, c->c_arg);
 
 		/* [re-]schedule callout, if any */
 		if (coa != NULL) {
 			cc = callout_cc_add_locked(c, cc, coa);
 		} else {
 			/* clear old flags, if any */
 			c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING |
 			    CALLOUT_PROCESSED);
 
 			/* return callback to pre-allocated list, if any */
 			if ((c->c_flags & CALLOUT_LOCAL_ALLOC) &&
-			    retval != CALLOUT_RET_STOPPED) {
+			    retval.value != CALLOUT_RET_STOPPED) {
 				callout_cc_del(c, cc);
 			}
 		}
 	}
 	CC_UNLOCK(cc);
 	return (retval);
 }
 
 void
 callout_when(sbintime_t sbt, sbintime_t precision, int flags,
     sbintime_t *res, sbintime_t *prec_res)
 {
 	sbintime_t to_sbt, to_pr;
 
 	if ((flags & (C_ABSOLUTE | C_PRECALC)) != 0) {
 		*res = sbt;
 		*prec_res = precision;
 		return;
 	}
 	if ((flags & C_HARDCLOCK) != 0 && sbt < tick_sbt)
 		sbt = tick_sbt;
 	if ((flags & C_HARDCLOCK) != 0 ||
 #ifdef NO_EVENTTIMERS
 	    sbt >= sbt_timethreshold) {
 		to_sbt = getsbinuptime();
 
 		/* Add safety belt for the case of hz > 1000. */
 		to_sbt += tc_tick_sbt - tick_sbt;
 #else
 	    sbt >= sbt_tickthreshold) {
 		/*
 		 * Obtain the time of the last hardclock() call on
 		 * this CPU directly from the kern_clocksource.c.
 		 * This value is per-CPU, but it is equal for all
 		 * active ones.
 		 */
 #ifdef __LP64__
 		to_sbt = DPCPU_GET(hardclocktime);
 #else
 		spinlock_enter();
 		to_sbt = DPCPU_GET(hardclocktime);
 		spinlock_exit();
 #endif
 #endif
 		if ((flags & C_HARDCLOCK) == 0)
 			to_sbt += tick_sbt;
 	} else
 		to_sbt = sbinuptime();
 	if (SBT_MAX - to_sbt < sbt)
 		to_sbt = SBT_MAX;
 	else
 		to_sbt += sbt;
 	*res = to_sbt;
 	to_pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
 	    sbt >> C_PRELGET(flags));
 	*prec_res = to_pr > precision ? to_pr : precision;
 }
 
 /*
  * New interface; clients allocate their own callout structures.
  *
  * callout_reset() - establish or change a timeout
  * callout_stop() - disestablish a timeout
  * callout_init() - initialize a callout structure so that it can
  *	safely be passed to callout_reset() and callout_stop()
  *
  * <sys/callout.h> defines three convenience macros:
  *
  * callout_active() - returns truth if callout has not been stopped,
  *	drained, or deactivated since the last time the callout was
  *	reset.
  * callout_pending() - returns truth if callout is still waiting for timeout
  * callout_deactivate() - marks the callout as having been serviced
  */
-int
+callout_ret_t
 callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t prec,
     callout_func_t *ftn, void *arg, int cpu, int flags)
 {
 	struct callout_args coa;
 
 	/* store arguments for callout add function */
 	coa.func = ftn;
 	coa.arg = arg;
 	coa.flags = flags;
 	coa.cpu = cpu;
 
 	/* compute trigger time for callout */
 	callout_when(sbt, prec, flags, &coa.time, &coa.precision);
 
 	/* get callback started, if any */
 	return (callout_restart_async(c, &coa, NULL));
 }
 
 /*
  * Common idioms that can be optimized in the future.
  */
-int
+callout_ret_t
 callout_schedule_on(struct callout *c, int to_ticks, int cpu)
 {
 	return (callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu));
 }
 
-int
+callout_ret_t
 callout_schedule(struct callout *c, int to_ticks)
 {
 	return (callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu));
 }
 
-int
+callout_ret_t
 callout_stop(struct callout *c)
 {
 	/* get callback stopped, if any */
 	return (callout_restart_async(c, NULL, NULL));
 }
 
 static void
 callout_drain_function(void *arg)
 {
 	wakeup(&callout_drain_function);
 }
 
-int
+callout_ret_t
 callout_async_drain(struct callout *c, callout_func_t *fn)
 {
 	/* get callback stopped, if any */
 	return (callout_restart_async(c, NULL, fn));
 }
 
-int
+callout_ret_t
 callout_drain(struct callout *c)
 {
-	int retval;
+	callout_ret_t retval;
 
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 	    "Draining callout");
 
 	/* at this point the "c->c_cpu" field is not changing */
 
 	retval = callout_async_drain(c, &callout_drain_function);
 
-	if (retval & CALLOUT_RET_DRAINING) {
+	if (retval.bit.draining) {
 		void *ident = &callout_drain_function;
 		struct callout_cpu *cc;
 		int direct;
 		int busy;
 
 		CTR3(KTR_CALLOUT, "need to drain %p func %p arg %p",
 		    c, c->c_func, c->c_arg);
 
 		do {
 			/*
 			 * The sleepq_lock() is lower rank than the
 			 * callout_lock() and must be locked first:
 			 */
 			sleepq_lock(ident);
 			cc = callout_lock(c);
 			direct = ((c->c_flags & CALLOUT_DIRECT) != 0);
 			busy = (cc_exec_curr(cc, direct) == c);
 			CC_UNLOCK(cc);
 			DROP_GIANT();
 
 			if (busy && !SCHEDULER_STOPPED()) {
 				/* Wait for drain to complete */
 				sleepq_add(ident, &cc->cc_lock.lock_object, "codrain", SLEEPQ_SLEEP, 0);
 				sleepq_wait(ident, 0);
 			} else {
 				sleepq_release(ident);
 			}
 
 			PICKUP_GIANT();
 		} while (busy);
 	}
 
 	CTR4(KTR_CALLOUT, "%s: %p func %p arg %p",
 	    callout_retvalstring(retval),
 	    c, c->c_func, c->c_arg);
 
 	return (retval);
 }
 
 void
 callout_init(struct callout *c, int mpsafe)
 {
 	if (mpsafe) {
 		callout_init_lock_object(c, NULL, CALLOUT_RETURNUNLOCKED);
 	} else {
 		callout_init_lock_object(c, &Giant.lock_object, 0);
 	}
 }
 
 void
 callout_init_lock_function(struct callout *c, callout_lock_func_t *lock_fn, int flags)
 {
 	bzero(c, sizeof *c);
 
 	KASSERT((flags & ~CALLOUT_RETURNUNLOCKED) == 0,
 	    ("callout_init_lock_function: bad flags 0x%08x", flags));
 	KASSERT(lock_fn != NULL,
 	    ("callout_init_lock_function: lock function is NULL"));
 	flags &= CALLOUT_RETURNUNLOCKED;
 	flags |= CALLOUT_SET_LC(CALLOUT_LC_FUNCTION);
 	c->c_lock = (struct lock_object *)lock_fn;
 	c->c_flags = flags;
 	c->c_cpu = timeout_cpu;
 }
 
 void
 callout_init_lock_object(struct callout *c, struct lock_object *lock, int flags)
 {
 	bzero(c, sizeof *c);
 	KASSERT((flags & ~CALLOUT_RETURNUNLOCKED) == 0,
 	    ("callout_init_lock_object: bad flags 0x%08x", flags));
 	flags &= CALLOUT_RETURNUNLOCKED;
 	if (lock != NULL) {
 		struct lock_class *class = LOCK_CLASS(lock);
 		if (class == &lock_class_mtx_sleep)
 			flags |= CALLOUT_SET_LC(CALLOUT_LC_MUTEX);
 		else if (class == &lock_class_mtx_spin)
 			flags |= CALLOUT_SET_LC(CALLOUT_LC_SPIN);
 		else if (class == &lock_class_rm)
 			flags |= CALLOUT_SET_LC(CALLOUT_LC_RM);
 		else if (class == &lock_class_rw)
 			flags |= CALLOUT_SET_LC(CALLOUT_LC_RW);
 		else
 			panic("callout_init_lock_object: Unsupported lock class '%s'\n",
 			    class->lc_name);
 	} else {
 		flags |= CALLOUT_SET_LC(CALLOUT_LC_UNUSED_0);
 	}
 	c->c_lock = lock;
 	c->c_flags = flags;
 	c->c_cpu = timeout_cpu;
 }
 
 #ifdef APM_FIXUP_CALLTODO
 /* 
  * Adjust the kernel calltodo timeout list.  This routine is used after 
  * an APM resume to recalculate the calltodo timer list values with the 
  * number of hz's we have been sleeping.  The next hardclock() will detect 
  * that there are fired timers and run softclock() to execute them.
  *
  * Please note, I have not done an exhaustive analysis of what code this
  * might break.  I am motivated to have my select()'s and alarm()'s that
  * have expired during suspend firing upon resume so that the applications
  * which set the timer can do the maintanence the timer was for as close
  * as possible to the originally intended time.  Testing this code for a 
  * week showed that resuming from a suspend resulted in 22 to 25 timers 
  * firing, which seemed independent on whether the suspend was 2 hours or
  * 2 days.  Your milage may vary.   - Ken Key <key@cs.utk.edu>
  */
 void
 adjust_timeout_calltodo(struct timeval *time_change)
 {
 	register struct callout *p;
 	unsigned long delta_ticks;
 
 	/* 
 	 * How many ticks were we asleep?
 	 * (stolen from tvtohz()).
 	 */
 
 	/* Don't do anything */
 	if (time_change->tv_sec < 0)
 		return;
 	else if (time_change->tv_sec <= LONG_MAX / 1000000)
 		delta_ticks = howmany(time_change->tv_sec * 1000000 +
 		    time_change->tv_usec, tick) + 1;
 	else if (time_change->tv_sec <= LONG_MAX / hz)
 		delta_ticks = time_change->tv_sec * hz +
 		    howmany(time_change->tv_usec, tick) + 1;
 	else
 		delta_ticks = LONG_MAX;
 
 	if (delta_ticks > INT_MAX)
 		delta_ticks = INT_MAX;
 
 	/* 
 	 * Now rip through the timer calltodo list looking for timers
 	 * to expire.
 	 */
 
 	/* don't collide with softclock() */
 	CC_LOCK(cc);
 	for (p = calltodo.c_next; p != NULL; p = p->c_next) {
 		p->c_time -= delta_ticks;
 
 		/* Break if the timer had more time on it than delta_ticks */
 		if (p->c_time > 0)
 			break;
 
 		/* take back the ticks the timer didn't use (p->c_time <= 0) */
 		delta_ticks = -p->c_time;
 	}
 	CC_UNLOCK(cc);
 
 	return;
 }
 #endif /* APM_FIXUP_CALLTODO */
 
 static int
 flssbt(sbintime_t sbt)
 {
 
 	sbt += (uint64_t)sbt >> 1;
 	if (sizeof(long) >= sizeof(sbintime_t))
 		return (flsl(sbt));
 	if (sbt >= SBT_1S)
 		return (flsl(((uint64_t)sbt) >> 32) + 32);
 	return (flsl(sbt));
 }
 
 /*
  * Dump immediate statistic snapshot of the scheduled callouts.
  */
 static int
 sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS)
 {
 	struct callout *tmp;
 	struct callout_cpu *cc;
 	struct callout_list *sc;
 	sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t;
 	int ct[64], cpr[64], ccpbk[32];
 	int error, val, i, count, tcum, pcum, maxc, c, medc;
 #ifdef SMP
 	int cpu;
 #endif
 
 	val = 0;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	count = maxc = 0;
 	st = spr = maxt = maxpr = 0;
 	bzero(ccpbk, sizeof(ccpbk));
 	bzero(ct, sizeof(ct));
 	bzero(cpr, sizeof(cpr));
 	now = sbinuptime();
 #ifdef SMP
 	CPU_FOREACH(cpu) {
 		cc = CC_CPU(cpu);
 #else
 		cc = CC_CPU(timeout_cpu);
 #endif
 		CC_LOCK(cc);
 		for (i = 0; i < callwheelsize; i++) {
 			sc = &cc->cc_callwheel[i];
 			c = 0;
 			LIST_FOREACH(tmp, sc, c_links.le) {
 				c++;
 				t = tmp->c_time - now;
 				if (t < 0)
 					t = 0;
 				st += t / SBT_1US;
 				spr += tmp->c_precision / SBT_1US;
 				if (t > maxt)
 					maxt = t;
 				if (tmp->c_precision > maxpr)
 					maxpr = tmp->c_precision;
 				ct[flssbt(t)]++;
 				cpr[flssbt(tmp->c_precision)]++;
 			}
 			if (c > maxc)
 				maxc = c;
 			ccpbk[fls(c + c / 2)]++;
 			count += c;
 		}
 		CC_UNLOCK(cc);
 #ifdef SMP
 	}
 #endif
 
 	for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++)
 		tcum += ct[i];
 	medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
 	for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++)
 		pcum += cpr[i];
 	medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
 	for (i = 0, c = 0; i < 32 && c < count / 2; i++)
 		c += ccpbk[i];
 	medc = (i >= 2) ? (1 << (i - 2)) : 0;
 
 	printf("Scheduled callouts statistic snapshot:\n");
 	printf("  Callouts: %6d  Buckets: %6d*%-3d  Bucket size: 0.%06ds\n",
 	    count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT);
 	printf("  C/Bk: med %5d         avg %6d.%06jd  max %6d\n",
 	    medc,
 	    count / callwheelsize / mp_ncpus,
 	    (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000,
 	    maxc);
 	printf("  Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
 	    medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32,
 	    (st / count) / 1000000, (st / count) % 1000000,
 	    maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32);
 	printf("  Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
 	    medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32,
 	    (spr / count) / 1000000, (spr / count) % 1000000,
 	    maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32);
 	printf("  Distribution:       \tbuckets\t   time\t   tcum\t"
 	    "   prec\t   pcum\n");
 	for (i = 0, tcum = pcum = 0; i < 64; i++) {
 		if (ct[i] == 0 && cpr[i] == 0)
 			continue;
 		t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0;
 		tcum += ct[i];
 		pcum += cpr[i];
 		printf("  %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n",
 		    t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32,
 		    i - 1 - (32 - CC_HASH_SHIFT),
 		    ct[i], tcum, cpr[i], pcum);
 	}
 	return (error);
 }
 SYSCTL_PROC(_kern, OID_AUTO, callout_stat,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, sysctl_kern_callout_stat, "I",
     "Dump immediate statistic snapshot of the scheduled callouts");
 
 #ifdef DDB
 static void
 _show_callout(struct callout *c)
 {
 
 	db_printf("callout %p\n", c);
 #define	C_DB_PRINTF(f, e)	db_printf("   %s = " f "\n", #e, c->e);
 	db_printf("   &c_links = %p\n", &(c->c_links));
 	C_DB_PRINTF("%" PRId64,	c_time);
 	C_DB_PRINTF("%" PRId64,	c_precision);
 	C_DB_PRINTF("%p",	c_arg);
 	C_DB_PRINTF("%p",	c_func);
 	C_DB_PRINTF("%p",	c_lock);
 	C_DB_PRINTF("%#x",	c_flags);
 	C_DB_PRINTF("%d",	c_cpu);
 #undef	C_DB_PRINTF
 }
 
 DB_SHOW_COMMAND(callout, db_show_callout)
 {
 
 	if (!have_addr) {
 		db_printf("usage: show callout <struct callout *>\n");
 		return;
 	}
 
 	_show_callout((struct callout *)addr);
 }
 #endif /* DDB */
Index: projects/hps_head/sys/kern/subr_taskqueue.c
===================================================================
--- projects/hps_head/sys/kern/subr_taskqueue.c	(revision 309217)
+++ projects/hps_head/sys/kern/subr_taskqueue.c	(revision 309218)
@@ -1,818 +1,818 @@
 /*-
  * Copyright (c) 2000 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cpuset.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/libkern.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/taskqueue.h>
 #include <sys/unistd.h>
 #include <machine/stdarg.h>
 
 static MALLOC_DEFINE(M_TASKQUEUE, "taskqueue", "Task Queues");
 static void	*taskqueue_giant_ih;
 static void	*taskqueue_ih;
 static void	 taskqueue_fast_enqueue(void *);
 static void	 taskqueue_swi_enqueue(void *);
 static void	 taskqueue_swi_giant_enqueue(void *);
 
 struct taskqueue_busy {
 	struct task	*tb_running;
 	TAILQ_ENTRY(taskqueue_busy) tb_link;
 };
 
 struct task * const TB_DRAIN_WAITER = (struct task *)0x1;
 
 struct taskqueue {
 	STAILQ_HEAD(, task)	tq_queue;
 	taskqueue_enqueue_fn	tq_enqueue;
 	void			*tq_context;
 	char			*tq_name;
 	TAILQ_HEAD(, taskqueue_busy) tq_active;
 	struct mtx		tq_mutex;
 	struct thread		**tq_threads;
 	int			tq_tcount;
 	int			tq_spin;
 	int			tq_flags;
 	int			tq_callouts;
 	taskqueue_callback_fn	tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
 	void			*tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
 };
 
 #define	TQ_FLAGS_ACTIVE		(1 << 0)
 #define	TQ_FLAGS_BLOCKED	(1 << 1)
 #define	TQ_FLAGS_UNLOCKED_ENQUEUE	(1 << 2)
 
 #define	DT_CALLOUT_ARMED	(1 << 0)
 #define	DT_DRAIN_IN_PROGRESS	(1 << 1)
 
 #define	TQ_LOCK(tq)							\
 	do {								\
 		if ((tq)->tq_spin)					\
 			mtx_lock_spin(&(tq)->tq_mutex);			\
 		else							\
 			mtx_lock(&(tq)->tq_mutex);			\
 	} while (0)
 #define	TQ_ASSERT_LOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_OWNED)
 
 #define	TQ_UNLOCK(tq)							\
 	do {								\
 		if ((tq)->tq_spin)					\
 			mtx_unlock_spin(&(tq)->tq_mutex);		\
 		else							\
 			mtx_unlock(&(tq)->tq_mutex);			\
 	} while (0)
 #define	TQ_ASSERT_UNLOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
 
 void
 _timeout_task_init(struct taskqueue *queue, struct timeout_task *timeout_task,
     int priority, task_fn_t func, void *context)
 {
 
 	TASK_INIT(&timeout_task->t, priority, func, context);
 	callout_init_mtx(&timeout_task->c, &queue->tq_mutex,
 	    CALLOUT_RETURNUNLOCKED);
 	timeout_task->q = queue;
 	timeout_task->f = 0;
 }
 
 static __inline int
 TQ_SLEEP(struct taskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
     int t)
 {
 	if (tq->tq_spin)
 		return (msleep_spin(p, m, wm, t));
 	return (msleep(p, m, pri, wm, t));
 }
 
 static struct taskqueue *
 _taskqueue_create(const char *name, int mflags,
 		 taskqueue_enqueue_fn enqueue, void *context,
 		 int mtxflags, const char *mtxname __unused)
 {
 	struct taskqueue *queue;
 	char *tq_name;
 
 	tq_name = malloc(TASKQUEUE_NAMELEN, M_TASKQUEUE, mflags | M_ZERO);
 	if (tq_name == NULL)
 		return (NULL);
 
 	queue = malloc(sizeof(struct taskqueue), M_TASKQUEUE, mflags | M_ZERO);
 	if (queue == NULL) {
 		free(tq_name, M_TASKQUEUE);
 		return (NULL);
 	}
 
 	snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
 
 	STAILQ_INIT(&queue->tq_queue);
 	TAILQ_INIT(&queue->tq_active);
 	queue->tq_enqueue = enqueue;
 	queue->tq_context = context;
 	queue->tq_name = tq_name;
 	queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
 	queue->tq_flags |= TQ_FLAGS_ACTIVE;
 	if (enqueue == taskqueue_fast_enqueue ||
 	    enqueue == taskqueue_swi_enqueue ||
 	    enqueue == taskqueue_swi_giant_enqueue ||
 	    enqueue == taskqueue_thread_enqueue)
 		queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
 	mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
 
 	return (queue);
 }
 
 struct taskqueue *
 taskqueue_create(const char *name, int mflags,
 		 taskqueue_enqueue_fn enqueue, void *context)
 {
 
 	return _taskqueue_create(name, mflags, enqueue, context,
 			MTX_DEF, name);
 }
 
 void
 taskqueue_set_callback(struct taskqueue *queue,
     enum taskqueue_callback_type cb_type, taskqueue_callback_fn callback,
     void *context)
 {
 
 	KASSERT(((cb_type >= TASKQUEUE_CALLBACK_TYPE_MIN) &&
 	    (cb_type <= TASKQUEUE_CALLBACK_TYPE_MAX)),
 	    ("Callback type %d not valid, must be %d-%d", cb_type,
 	    TASKQUEUE_CALLBACK_TYPE_MIN, TASKQUEUE_CALLBACK_TYPE_MAX));
 	KASSERT((queue->tq_callbacks[cb_type] == NULL),
 	    ("Re-initialization of taskqueue callback?"));
 
 	queue->tq_callbacks[cb_type] = callback;
 	queue->tq_cb_contexts[cb_type] = context;
 }
 
 /*
  * Signal a taskqueue thread to terminate.
  */
 static void
 taskqueue_terminate(struct thread **pp, struct taskqueue *tq)
 {
 
 	while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
 		wakeup(tq);
 		TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
 	}
 }
 
 void
 taskqueue_free(struct taskqueue *queue)
 {
 
 	TQ_LOCK(queue);
 	queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
 	taskqueue_terminate(queue->tq_threads, queue);
 	KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
 	KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
 	mtx_destroy(&queue->tq_mutex);
 	free(queue->tq_threads, M_TASKQUEUE);
 	free(queue->tq_name, M_TASKQUEUE);
 	free(queue, M_TASKQUEUE);
 }
 
 static int
 taskqueue_enqueue_locked(struct taskqueue *queue, struct task *task)
 {
 	struct task *ins;
 	struct task *prev;
 
 	KASSERT(task->ta_func != NULL, ("enqueueing task with NULL func"));
 	/*
 	 * Count multiple enqueues.
 	 */
 	if (task->ta_pending) {
 		if (task->ta_pending < USHRT_MAX)
 			task->ta_pending++;
 		TQ_UNLOCK(queue);
 		return (0);
 	}
 
 	/*
 	 * Optimise the case when all tasks have the same priority.
 	 */
 	prev = STAILQ_LAST(&queue->tq_queue, task, ta_link);
 	if (!prev || prev->ta_priority >= task->ta_priority) {
 		STAILQ_INSERT_TAIL(&queue->tq_queue, task, ta_link);
 	} else {
 		prev = NULL;
 		for (ins = STAILQ_FIRST(&queue->tq_queue); ins;
 		     prev = ins, ins = STAILQ_NEXT(ins, ta_link))
 			if (ins->ta_priority < task->ta_priority)
 				break;
 
 		if (prev)
 			STAILQ_INSERT_AFTER(&queue->tq_queue, prev, task, ta_link);
 		else
 			STAILQ_INSERT_HEAD(&queue->tq_queue, task, ta_link);
 	}
 
 	task->ta_pending = 1;
 	if ((queue->tq_flags & TQ_FLAGS_UNLOCKED_ENQUEUE) != 0)
 		TQ_UNLOCK(queue);
 	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
 		queue->tq_enqueue(queue->tq_context);
 	if ((queue->tq_flags & TQ_FLAGS_UNLOCKED_ENQUEUE) == 0)
 		TQ_UNLOCK(queue);
 
 	/* Return with lock released. */
 	return (0);
 }
 
 int
 taskqueue_enqueue(struct taskqueue *queue, struct task *task)
 {
 	int res;
 
 	TQ_LOCK(queue);
 	res = taskqueue_enqueue_locked(queue, task);
 	/* The lock is released inside. */
 
 	return (res);
 }
 
 static void
 taskqueue_timeout_func(void *arg)
 {
 	struct taskqueue *queue;
 	struct timeout_task *timeout_task;
 
 	timeout_task = arg;
 	queue = timeout_task->q;
 	KASSERT((timeout_task->f & DT_CALLOUT_ARMED) != 0, ("Stray timeout"));
 	timeout_task->f &= ~DT_CALLOUT_ARMED;
 	queue->tq_callouts--;
 	taskqueue_enqueue_locked(timeout_task->q, &timeout_task->t);
 	/* The lock is released inside. */
 }
 
 int
 taskqueue_enqueue_timeout(struct taskqueue *queue,
     struct timeout_task *timeout_task, int ticks)
 {
 	int res;
 
 	TQ_LOCK(queue);
 	KASSERT(timeout_task->q == NULL || timeout_task->q == queue,
 	    ("Migrated queue"));
 	KASSERT(!queue->tq_spin, ("Timeout for spin-queue"));
 	timeout_task->q = queue;
 	res = timeout_task->t.ta_pending;
 	if (timeout_task->f & DT_DRAIN_IN_PROGRESS) {
 		/* Do nothing */
 		TQ_UNLOCK(queue);
 		res = -1;
 	} else if (ticks == 0) {
 		taskqueue_enqueue_locked(queue, &timeout_task->t);
 		/* The lock is released inside. */
 	} else {
 		if ((timeout_task->f & DT_CALLOUT_ARMED) != 0) {
 			res++;
 		} else {
 			queue->tq_callouts++;
 			timeout_task->f |= DT_CALLOUT_ARMED;
 			if (ticks < 0)
 				ticks = -ticks; /* Ignore overflow. */
 		}
 		if (ticks > 0) {
 			callout_reset(&timeout_task->c, ticks,
 			    taskqueue_timeout_func, timeout_task);
 		}
 		TQ_UNLOCK(queue);
 	}
 	return (res);
 }
 
 static void
 taskqueue_task_nop_fn(void *context, int pending)
 {
 }
 
 /*
  * Block until all currently queued tasks in this taskqueue
  * have begun execution.  Tasks queued during execution of
  * this function are ignored.
  */
 static void
 taskqueue_drain_tq_queue(struct taskqueue *queue)
 {
 	struct task t_barrier;
 
 	if (STAILQ_EMPTY(&queue->tq_queue))
 		return;
 
 	/*
 	 * Enqueue our barrier after all current tasks, but with
 	 * the highest priority so that newly queued tasks cannot
 	 * pass it.  Because of the high priority, we can not use
 	 * taskqueue_enqueue_locked directly (which drops the lock
 	 * anyway) so just insert it at tail while we have the
 	 * queue lock.
 	 */
 	TASK_INIT(&t_barrier, USHRT_MAX, taskqueue_task_nop_fn, &t_barrier);
 	STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
 	t_barrier.ta_pending = 1;
 
 	/*
 	 * Once the barrier has executed, all previously queued tasks
 	 * have completed or are currently executing.
 	 */
 	while (t_barrier.ta_pending != 0)
 		TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
 }
 
 /*
  * Block until all currently executing tasks for this taskqueue
  * complete.  Tasks that begin execution during the execution
  * of this function are ignored.
  */
 static void
 taskqueue_drain_tq_active(struct taskqueue *queue)
 {
 	struct taskqueue_busy tb_marker, *tb_first;
 
 	if (TAILQ_EMPTY(&queue->tq_active))
 		return;
 
 	/* Block taskq_terminate().*/
 	queue->tq_callouts++;
 
 	/*
 	 * Wait for all currently executing taskqueue threads
 	 * to go idle.
 	 */
 	tb_marker.tb_running = TB_DRAIN_WAITER;
 	TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
 	while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
 		TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
 	TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
 
 	/*
 	 * Wakeup any other drain waiter that happened to queue up
 	 * without any intervening active thread.
 	 */
 	tb_first = TAILQ_FIRST(&queue->tq_active);
 	if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
 		wakeup(tb_first);
 
 	/* Release taskqueue_terminate(). */
 	queue->tq_callouts--;
 	if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 		wakeup_one(queue->tq_threads);
 }
 
 void
 taskqueue_block(struct taskqueue *queue)
 {
 
 	TQ_LOCK(queue);
 	queue->tq_flags |= TQ_FLAGS_BLOCKED;
 	TQ_UNLOCK(queue);
 }
 
 void
 taskqueue_unblock(struct taskqueue *queue)
 {
 
 	TQ_LOCK(queue);
 	queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
 	if (!STAILQ_EMPTY(&queue->tq_queue))
 		queue->tq_enqueue(queue->tq_context);
 	TQ_UNLOCK(queue);
 }
 
 static void
 taskqueue_run_locked(struct taskqueue *queue)
 {
 	struct taskqueue_busy tb;
 	struct taskqueue_busy *tb_first;
 	struct task *task;
 	int pending;
 
 	KASSERT(queue != NULL, ("tq is NULL"));
 	TQ_ASSERT_LOCKED(queue);
 	tb.tb_running = NULL;
 
 	while (STAILQ_FIRST(&queue->tq_queue)) {
 		TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
 
 		/*
 		 * Carefully remove the first task from the queue and
 		 * zero its pending count.
 		 */
 		task = STAILQ_FIRST(&queue->tq_queue);
 		KASSERT(task != NULL, ("task is NULL"));
 		STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
 		pending = task->ta_pending;
 		task->ta_pending = 0;
 		tb.tb_running = task;
 		TQ_UNLOCK(queue);
 
 		KASSERT(task->ta_func != NULL, ("task->ta_func is NULL"));
 		task->ta_func(task->ta_context, pending);
 
 		TQ_LOCK(queue);
 		tb.tb_running = NULL;
 		wakeup(task);
 
 		TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
 		tb_first = TAILQ_FIRST(&queue->tq_active);
 		if (tb_first != NULL &&
 		    tb_first->tb_running == TB_DRAIN_WAITER)
 			wakeup(tb_first);
 	}
 }
 
 void
 taskqueue_run(struct taskqueue *queue)
 {
 
 	TQ_LOCK(queue);
 	taskqueue_run_locked(queue);
 	TQ_UNLOCK(queue);
 }
 
 static int
 task_is_running(struct taskqueue *queue, struct task *task)
 {
 	struct taskqueue_busy *tb;
 
 	TQ_ASSERT_LOCKED(queue);
 	TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
 		if (tb->tb_running == task)
 			return (1);
 	}
 	return (0);
 }
 
 static int
 taskqueue_cancel_locked(struct taskqueue *queue, struct task *task,
     u_int *pendp)
 {
 
 	if (task->ta_pending > 0)
 		STAILQ_REMOVE(&queue->tq_queue, task, task, ta_link);
 	if (pendp != NULL)
 		*pendp = task->ta_pending;
 	task->ta_pending = 0;
 	return (task_is_running(queue, task) ? EBUSY : 0);
 }
 
 int
 taskqueue_cancel(struct taskqueue *queue, struct task *task, u_int *pendp)
 {
 	int error;
 
 	TQ_LOCK(queue);
 	error = taskqueue_cancel_locked(queue, task, pendp);
 	TQ_UNLOCK(queue);
 
 	return (error);
 }
 
 int
 taskqueue_cancel_timeout(struct taskqueue *queue,
     struct timeout_task *timeout_task, u_int *pendp)
 {
 	u_int pending, pending1;
 	int error;
 
 	TQ_LOCK(queue);
-	pending = (callout_stop(&timeout_task->c) & CALLOUT_RET_CANCELLED) ? 1 : 0;
+	pending = callout_stop(&timeout_task->c).bit.cancelled;
 	error = taskqueue_cancel_locked(queue, &timeout_task->t, &pending1);
 	if ((timeout_task->f & DT_CALLOUT_ARMED) != 0) {
 		timeout_task->f &= ~DT_CALLOUT_ARMED;
 		queue->tq_callouts--;
 	}
 	TQ_UNLOCK(queue);
 
 	if (pendp != NULL)
 		*pendp = pending + pending1;
 	return (error);
 }
 
 void
 taskqueue_drain(struct taskqueue *queue, struct task *task)
 {
 
 	if (!queue->tq_spin)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
 
 	TQ_LOCK(queue);
 	while (task->ta_pending != 0 || task_is_running(queue, task))
 		TQ_SLEEP(queue, task, &queue->tq_mutex, PWAIT, "-", 0);
 	TQ_UNLOCK(queue);
 }
 
 void
 taskqueue_drain_all(struct taskqueue *queue)
 {
 
 	if (!queue->tq_spin)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
 
 	TQ_LOCK(queue);
 	taskqueue_drain_tq_queue(queue);
 	taskqueue_drain_tq_active(queue);
 	TQ_UNLOCK(queue);
 }
 
 void
 taskqueue_drain_timeout(struct taskqueue *queue,
     struct timeout_task *timeout_task)
 {
 
 	/*
 	 * Set flag to prevent timer from re-starting during drain:
 	 */
 	TQ_LOCK(queue);
 	KASSERT((timeout_task->f & DT_DRAIN_IN_PROGRESS) == 0,
 	    ("Drain already in progress"));
 	timeout_task->f |= DT_DRAIN_IN_PROGRESS;
 	TQ_UNLOCK(queue);
 
 	callout_drain(&timeout_task->c);
 	taskqueue_drain(queue, &timeout_task->t);
 
 	/*
 	 * Clear flag to allow timer to re-start:
 	 */
 	TQ_LOCK(queue);
 	timeout_task->f &= ~DT_DRAIN_IN_PROGRESS;
 	TQ_UNLOCK(queue);
 }
 
 static void
 taskqueue_swi_enqueue(void *context)
 {
 	swi_sched(taskqueue_ih, 0);
 }
 
 static void
 taskqueue_swi_run(void *dummy)
 {
 	taskqueue_run(taskqueue_swi);
 }
 
 static void
 taskqueue_swi_giant_enqueue(void *context)
 {
 	swi_sched(taskqueue_giant_ih, 0);
 }
 
 static void
 taskqueue_swi_giant_run(void *dummy)
 {
 	taskqueue_run(taskqueue_swi_giant);
 }
 
 static int
 _taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
     cpuset_t *mask, const char *name, va_list ap)
 {
 	char ktname[MAXCOMLEN + 1];
 	struct thread *td;
 	struct taskqueue *tq;
 	int i, error;
 
 	if (count <= 0)
 		return (EINVAL);
 
 	vsnprintf(ktname, sizeof(ktname), name, ap);
 	tq = *tqp;
 
 	tq->tq_threads = malloc(sizeof(struct thread *) * count, M_TASKQUEUE,
 	    M_NOWAIT | M_ZERO);
 	if (tq->tq_threads == NULL) {
 		printf("%s: no memory for %s threads\n", __func__, ktname);
 		return (ENOMEM);
 	}
 
 	for (i = 0; i < count; i++) {
 		if (count == 1)
 			error = kthread_add(taskqueue_thread_loop, tqp, NULL,
 			    &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
 		else
 			error = kthread_add(taskqueue_thread_loop, tqp, NULL,
 			    &tq->tq_threads[i], RFSTOPPED, 0,
 			    "%s_%d", ktname, i);
 		if (error) {
 			/* should be ok to continue, taskqueue_free will dtrt */
 			printf("%s: kthread_add(%s): error %d", __func__,
 			    ktname, error);
 			tq->tq_threads[i] = NULL;		/* paranoid */
 		} else
 			tq->tq_tcount++;
 	}
 	if (tq->tq_tcount == 0) {
 		free(tq->tq_threads, M_TASKQUEUE);
 		tq->tq_threads = NULL;
 		return (ENOMEM);
 	}
 	for (i = 0; i < count; i++) {
 		if (tq->tq_threads[i] == NULL)
 			continue;
 		td = tq->tq_threads[i];
 		if (mask) {
 			error = cpuset_setthread(td->td_tid, mask);
 			/*
 			 * Failing to pin is rarely an actual fatal error;
 			 * it'll just affect performance.
 			 */
 			if (error)
 				printf("%s: curthread=%llu: can't pin; "
 				    "error=%d\n",
 				    __func__,
 				    (unsigned long long) td->td_tid,
 				    error);
 		}
 		thread_lock(td);
 		sched_prio(td, pri);
 		sched_add(td, SRQ_BORING);
 		thread_unlock(td);
 	}
 
 	return (0);
 }
 
 int
 taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
     const char *name, ...)
 {
 	va_list ap;
 	int error;
 
 	va_start(ap, name);
 	error = _taskqueue_start_threads(tqp, count, pri, NULL, name, ap);
 	va_end(ap);
 	return (error);
 }
 
 int
 taskqueue_start_threads_cpuset(struct taskqueue **tqp, int count, int pri,
     cpuset_t *mask, const char *name, ...)
 {
 	va_list ap;
 	int error;
 
 	va_start(ap, name);
 	error = _taskqueue_start_threads(tqp, count, pri, mask, name, ap);
 	va_end(ap);
 	return (error);
 }
 
 static inline void
 taskqueue_run_callback(struct taskqueue *tq,
     enum taskqueue_callback_type cb_type)
 {
 	taskqueue_callback_fn tq_callback;
 
 	TQ_ASSERT_UNLOCKED(tq);
 	tq_callback = tq->tq_callbacks[cb_type];
 	if (tq_callback != NULL)
 		tq_callback(tq->tq_cb_contexts[cb_type]);
 }
 
 void
 taskqueue_thread_loop(void *arg)
 {
 	struct taskqueue **tqp, *tq;
 
 	tqp = arg;
 	tq = *tqp;
 	taskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
 	TQ_LOCK(tq);
 	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
 		/* XXX ? */
 		taskqueue_run_locked(tq);
 		/*
 		 * Because taskqueue_run() can drop tq_mutex, we need to
 		 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
 		 * meantime, which means we missed a wakeup.
 		 */
 		if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 			break;
 		TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
 	}
 	taskqueue_run_locked(tq);
 	/*
 	 * This thread is on its way out, so just drop the lock temporarily
 	 * in order to call the shutdown callback.  This allows the callback
 	 * to look at the taskqueue, even just before it dies.
 	 */
 	TQ_UNLOCK(tq);
 	taskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
 	TQ_LOCK(tq);
 
 	/* rendezvous with thread that asked us to terminate */
 	tq->tq_tcount--;
 	wakeup_one(tq->tq_threads);
 	TQ_UNLOCK(tq);
 	kthread_exit();
 }
 
 void
 taskqueue_thread_enqueue(void *context)
 {
 	struct taskqueue **tqp, *tq;
 
 	tqp = context;
 	tq = *tqp;
 	wakeup_one(tq);
 }
 
 TASKQUEUE_DEFINE(swi, taskqueue_swi_enqueue, NULL,
 		 swi_add(NULL, "task queue", taskqueue_swi_run, NULL, SWI_TQ,
 		     INTR_MPSAFE, &taskqueue_ih));
 
 TASKQUEUE_DEFINE(swi_giant, taskqueue_swi_giant_enqueue, NULL,
 		 swi_add(NULL, "Giant taskq", taskqueue_swi_giant_run,
 		     NULL, SWI_TQ_GIANT, 0, &taskqueue_giant_ih));
 
 TASKQUEUE_DEFINE_THREAD(thread);
 
 struct taskqueue *
 taskqueue_create_fast(const char *name, int mflags,
 		 taskqueue_enqueue_fn enqueue, void *context)
 {
 	return _taskqueue_create(name, mflags, enqueue, context,
 			MTX_SPIN, "fast_taskqueue");
 }
 
 static void	*taskqueue_fast_ih;
 
 static void
 taskqueue_fast_enqueue(void *context)
 {
 	swi_sched(taskqueue_fast_ih, 0);
 }
 
 static void
 taskqueue_fast_run(void *dummy)
 {
 	taskqueue_run(taskqueue_fast);
 }
 
 TASKQUEUE_FAST_DEFINE(fast, taskqueue_fast_enqueue, NULL,
 	swi_add(NULL, "fast taskq", taskqueue_fast_run, NULL,
 	SWI_TQ_FAST, INTR_MPSAFE, &taskqueue_fast_ih));
 
 int
 taskqueue_member(struct taskqueue *queue, struct thread *td)
 {
 	int i, j, ret = 0;
 
 	for (i = 0, j = 0; ; i++) {
 		if (queue->tq_threads[i] == NULL)
 			continue;
 		if (queue->tq_threads[i] == td) {
 			ret = 1;
 			break;
 		}
 		if (++j >= queue->tq_tcount)
 			break;
 	}
 	return (ret);
 }
Index: projects/hps_head/sys/net/if_llatbl.c
===================================================================
--- projects/hps_head/sys/net/if_llatbl.c	(revision 309217)
+++ projects/hps_head/sys/net/if_llatbl.c	(revision 309218)
@@ -1,963 +1,963 @@
 /*
  * Copyright (c) 2004 Luigi Rizzo, Alessandro Cerri. All rights reserved.
  * Copyright (c) 2004-2008 Qing Li. All rights reserved.
  * Copyright (c) 2008 Kip Macy. All rights reserved.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 #include <sys/socket.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <vm/uma.h>
 
 #include <netinet/in.h>
 #include <net/if_llatbl.h>
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 #include <netinet/if_ether.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/nd6.h>
 
 MALLOC_DEFINE(M_LLTABLE, "lltable", "link level address tables");
 
 static VNET_DEFINE(SLIST_HEAD(, lltable), lltables) =
     SLIST_HEAD_INITIALIZER(lltables);
 #define	V_lltables	VNET(lltables)
 
 static struct rwlock lltable_list_lock;
 RW_SYSINIT(lltable_list_lock, &lltable_list_lock, "lltable_list_lock");
 #define	LLTABLE_LIST_RLOCK()		rw_rlock(&lltable_list_lock)
 #define	LLTABLE_LIST_RUNLOCK()		rw_runlock(&lltable_list_lock)
 #define	LLTABLE_LIST_WLOCK()		rw_wlock(&lltable_list_lock)
 #define	LLTABLE_LIST_WUNLOCK()		rw_wunlock(&lltable_list_lock)
 #define	LLTABLE_LIST_LOCK_ASSERT()	rw_assert(&lltable_list_lock, RA_LOCKED)
 
 static void lltable_unlink(struct lltable *llt);
 static void llentries_unlink(struct lltable *llt, struct llentries *head);
 
 static void htable_unlink_entry(struct llentry *lle);
 static void htable_link_entry(struct lltable *llt, struct llentry *lle);
 static int htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f,
     void *farg);
 
 /*
  * Dump lle state for a specific address family.
  */
 static int
 lltable_dump_af(struct lltable *llt, struct sysctl_req *wr)
 {
 	int error;
 
 	LLTABLE_LIST_LOCK_ASSERT();
 
 	if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
 		return (0);
 	error = 0;
 
 	IF_AFDATA_RLOCK(llt->llt_ifp);
 	error = lltable_foreach_lle(llt,
 	    (llt_foreach_cb_t *)llt->llt_dump_entry, wr);
 	IF_AFDATA_RUNLOCK(llt->llt_ifp);
 
 	return (error);
 }
 
 /*
  * Dump arp state for a specific address family.
  */
 int
 lltable_sysctl_dumparp(int af, struct sysctl_req *wr)
 {
 	struct lltable *llt;
 	int error = 0;
 
 	LLTABLE_LIST_RLOCK();
 	SLIST_FOREACH(llt, &V_lltables, llt_link) {
 		if (llt->llt_af == af) {
 			error = lltable_dump_af(llt, wr);
 			if (error != 0)
 				goto done;
 		}
 	}
 done:
 	LLTABLE_LIST_RUNLOCK();
 	return (error);
 }
 
 /*
  * Common function helpers for chained hash table.
  */
 
 /*
  * Runs specified callback for each entry in @llt.
  * Caller does the locking.
  *
  */
 static int
 htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg)
 {
 	struct llentry *lle, *next;
 	int i, error;
 
 	error = 0;
 
 	for (i = 0; i < llt->llt_hsize; i++) {
 		LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
 			error = f(llt, lle, farg);
 			if (error != 0)
 				break;
 		}
 	}
 
 	return (error);
 }
 
 static void
 htable_link_entry(struct lltable *llt, struct llentry *lle)
 {
 	struct llentries *lleh;
 	uint32_t hashidx;
 
 	if ((lle->la_flags & LLE_LINKED) != 0)
 		return;
 
 	IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp);
 
 	hashidx = llt->llt_hash(lle, llt->llt_hsize);
 	lleh = &llt->lle_head[hashidx];
 
 	lle->lle_tbl  = llt;
 	lle->lle_head = lleh;
 	lle->la_flags |= LLE_LINKED;
 	LIST_INSERT_HEAD(lleh, lle, lle_next);
 }
 
 static void
 htable_unlink_entry(struct llentry *lle)
 {
 
 	if ((lle->la_flags & LLE_LINKED) != 0) {
 		IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp);
 		LIST_REMOVE(lle, lle_next);
 		lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
 #if 0
 		lle->lle_tbl = NULL;
 		lle->lle_head = NULL;
 #endif
 	}
 }
 
 struct prefix_match_data {
 	const struct sockaddr *addr;
 	const struct sockaddr *mask;
 	struct llentries dchain;
 	u_int flags;
 };
 
 static int
 htable_prefix_free_cb(struct lltable *llt, struct llentry *lle, void *farg)
 {
 	struct prefix_match_data *pmd;
 
 	pmd = (struct prefix_match_data *)farg;
 
 	if (llt->llt_match_prefix(pmd->addr, pmd->mask, pmd->flags, lle)) {
 		LLE_WLOCK(lle);
 		LIST_INSERT_HEAD(&pmd->dchain, lle, lle_chain);
 	}
 
 	return (0);
 }
 
 static void
 htable_prefix_free(struct lltable *llt, const struct sockaddr *addr,
     const struct sockaddr *mask, u_int flags)
 {
 	struct llentry *lle, *next;
 	struct prefix_match_data pmd;
 
 	bzero(&pmd, sizeof(pmd));
 	pmd.addr = addr;
 	pmd.mask = mask;
 	pmd.flags = flags;
 	LIST_INIT(&pmd.dchain);
 
 	IF_AFDATA_WLOCK(llt->llt_ifp);
 	/* Push matching lles to chain */
 	lltable_foreach_lle(llt, htable_prefix_free_cb, &pmd);
 
 	llentries_unlink(llt, &pmd.dchain);
 	IF_AFDATA_WUNLOCK(llt->llt_ifp);
 
 	LIST_FOREACH_SAFE(lle, &pmd.dchain, lle_chain, next)
 		lltable_free_entry(llt, lle);
 }
 
 static void
 htable_free_tbl(struct lltable *llt)
 {
 
 	free(llt->lle_head, M_LLTABLE);
 	free(llt, M_LLTABLE);
 }
 
 static void
 llentries_unlink(struct lltable *llt, struct llentries *head)
 {
 	struct llentry *lle, *next;
 
 	LIST_FOREACH_SAFE(lle, head, lle_chain, next)
 		llt->llt_unlink_entry(lle);
 }
 
 /*
  * Helper function used to drop all mbufs in hold queue.
  *
  * Returns the number of held packets, if any, that were dropped.
  */
 size_t
 lltable_drop_entry_queue(struct llentry *lle)
 {
 	size_t pkts_dropped;
 	struct mbuf *next;
 
 	LLE_WLOCK_ASSERT(lle);
 
 	pkts_dropped = 0;
 	while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) {
 		next = lle->la_hold->m_nextpkt;
 		m_freem(lle->la_hold);
 		lle->la_hold = next;
 		lle->la_numheld--;
 		pkts_dropped++;
 	}
 
 	KASSERT(lle->la_numheld == 0,
 		("%s: la_numheld %d > 0, pkts_droped %zd", __func__,
 		 lle->la_numheld, pkts_dropped));
 
 	return (pkts_dropped);
 }
 
 void
 lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
     const char *linkhdr, size_t linkhdrsize, int lladdr_off)
 {
 
 	memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
 	lle->r_hdrlen = linkhdrsize;
 	lle->ll_addr = &lle->r_linkdata[lladdr_off];
 	lle->la_flags |= LLE_VALID;
 	lle->r_flags |= RLLE_VALID;
 }
 
 /*
  * Tries to update @lle link-level address.
  * Since update requires AFDATA WLOCK, function
  * drops @lle lock, acquires AFDATA lock and then acquires
  * @lle lock to maintain lock order.
  *
  * Returns 1 on success.
  */
 int
 lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
     const char *linkhdr, size_t linkhdrsize, int lladdr_off)
 {
 
 	/* Perform real LLE update */
 	/* use afdata WLOCK to update fields */
 	LLE_WLOCK_ASSERT(lle);
 	LLE_ADDREF(lle);
 	LLE_WUNLOCK(lle);
 	IF_AFDATA_WLOCK(ifp);
 	LLE_WLOCK(lle);
 
 	/*
 	 * Since we droppped LLE lock, other thread might have deleted
 	 * this lle. Check and return
 	 */
 	if ((lle->la_flags & LLE_DELETED) != 0) {
 		IF_AFDATA_WUNLOCK(ifp);
 		LLE_FREE_LOCKED(lle);
 		return (0);
 	}
 
 	/* Update data */
 	lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, lladdr_off);
 
 	IF_AFDATA_WUNLOCK(ifp);
 
 	LLE_REMREF(lle);
 
 	return (1);
 }
 
  /*
  * Helper function used to pre-compute full/partial link-layer
  * header data suitable for feeding into if_output().
  */
 int
 lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
     char *buf, size_t *bufsize, int *lladdr_off)
 {
 	struct if_encap_req ereq;
 	int error;
 
 	bzero(buf, *bufsize);
 	bzero(&ereq, sizeof(ereq));
 	ereq.buf = buf;
 	ereq.bufsize = *bufsize;
 	ereq.rtype = IFENCAP_LL;
 	ereq.family = family;
 	ereq.lladdr = lladdr;
 	ereq.lladdr_len = ifp->if_addrlen;
 	error = ifp->if_requestencap(ifp, &ereq);
 	if (error == 0) {
 		*bufsize = ereq.bufsize;
 		*lladdr_off = ereq.lladdr_off;
 	}
 
 	return (error);
 }
 
 /*
  * Update link-layer header for given @lle after
  * interface lladdr was changed.
  */
 static int
 llentry_update_ifaddr(struct lltable *llt, struct llentry *lle, void *farg)
 {
 	struct ifnet *ifp;
 	u_char linkhdr[LLE_MAX_LINKHDR];
 	size_t linkhdrsize;
 	u_char *lladdr;
 	int lladdr_off;
 
 	ifp = (struct ifnet *)farg;
 
 	lladdr = lle->ll_addr;
 
 	LLE_WLOCK(lle);
 	if ((lle->la_flags & LLE_VALID) == 0) {
 		LLE_WUNLOCK(lle);
 		return (0);
 	}
 
 	if ((lle->la_flags & LLE_IFADDR) != 0)
 		lladdr = IF_LLADDR(ifp);
 
 	linkhdrsize = sizeof(linkhdr);
 	lltable_calc_llheader(ifp, llt->llt_af, lladdr, linkhdr, &linkhdrsize,
 	    &lladdr_off);
 	memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
 	LLE_WUNLOCK(lle);
 
 	return (0);
 }
 
 /*
  * Update all calculated headers for given @llt
  */
 void
 lltable_update_ifaddr(struct lltable *llt)
 {
 
 	if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
 		return;
 
 	IF_AFDATA_WLOCK(llt->llt_ifp);
 	lltable_foreach_lle(llt, llentry_update_ifaddr, llt->llt_ifp);
 	IF_AFDATA_WUNLOCK(llt->llt_ifp);
 }
 
 /*
  *
  * Performs generic cleanup routines and frees lle.
  *
  * Called for non-linked entries, with callouts and
  * other AF-specific cleanups performed.
  *
  * @lle must be passed WLOCK'ed
  *
  * Returns the number of held packets, if any, that were dropped.
  */
 size_t
 llentry_free(struct llentry *lle)
 {
 	size_t pkts_dropped;
 
 	LLE_WLOCK_ASSERT(lle);
 
 	KASSERT((lle->la_flags & LLE_LINKED) == 0, ("freeing linked lle"));
 
 	pkts_dropped = lltable_drop_entry_queue(lle);
 
 	LLE_FREE_LOCKED(lle);
 
 	return (pkts_dropped);
 }
 
 /*
  * (al)locate an llentry for address dst (equivalent to rtalloc for new-arp).
  *
  * If found the llentry * is returned referenced and unlocked.
  */
 struct llentry *
 llentry_alloc(struct ifnet *ifp, struct lltable *lt,
     struct sockaddr_storage *dst)
 {
 	struct llentry *la, *la_tmp;
 
 	IF_AFDATA_RLOCK(ifp);
 	la = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst);
 	IF_AFDATA_RUNLOCK(ifp);
 
 	if (la != NULL) {
 		LLE_ADDREF(la);
 		LLE_WUNLOCK(la);
 		return (la);
 	}
 
 	if ((ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) {
 		la = lltable_alloc_entry(lt, 0, (struct sockaddr *)dst);
 		if (la == NULL)
 			return (NULL);
 		IF_AFDATA_WLOCK(ifp);
 		LLE_WLOCK(la);
 		/* Prefer any existing LLE over newly-created one */
 		la_tmp = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst);
 		if (la_tmp == NULL)
 			lltable_link_entry(lt, la);
 		IF_AFDATA_WUNLOCK(ifp);
 		if (la_tmp != NULL) {
 			lltable_free_entry(lt, la);
 			la = la_tmp;
 		}
 		LLE_ADDREF(la);
 		LLE_WUNLOCK(la);
 	}
 
 	return (la);
 }
 
 /*
  * Free all entries from given table and free itself.
  */
 
 static int
 lltable_free_cb(struct lltable *llt, struct llentry *lle, void *farg)
 {
 	struct llentries *dchain;
 
 	dchain = (struct llentries *)farg;
 
 	LLE_WLOCK(lle);
 	LIST_INSERT_HEAD(dchain, lle, lle_chain);
 
 	return (0);
 }
 
 /*
  * Free all entries from given table and free itself.
  */
 void
 lltable_free(struct lltable *llt)
 {
 	struct llentry *lle, *next;
 	struct llentries dchain;
 
 	KASSERT(llt != NULL, ("%s: llt is NULL", __func__));
 
 	lltable_unlink(llt);
 
 	LIST_INIT(&dchain);
 	IF_AFDATA_WLOCK(llt->llt_ifp);
 	/* Push all lles to @dchain */
 	lltable_foreach_lle(llt, lltable_free_cb, &dchain);
 	llentries_unlink(llt, &dchain);
 	IF_AFDATA_WUNLOCK(llt->llt_ifp);
 
 	LIST_FOREACH_SAFE(lle, &dchain, lle_chain, next) {
-		if (callout_stop(&lle->lle_timer) & CALLOUT_RET_CANCELLED)
+		if (callout_stop(&lle->lle_timer).bit.cancelled)
 			LLE_REMREF(lle);
 		llentry_free(lle);
 	}
 
 	llt->llt_free_tbl(llt);
 }
 
 #if 0
 void
 lltable_drain(int af)
 {
 	struct lltable	*llt;
 	struct llentry	*lle;
 	register int i;
 
 	LLTABLE_LIST_RLOCK();
 	SLIST_FOREACH(llt, &V_lltables, llt_link) {
 		if (llt->llt_af != af)
 			continue;
 
 		for (i=0; i < llt->llt_hsize; i++) {
 			LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
 				LLE_WLOCK(lle);
 				if (lle->la_hold) {
 					m_freem(lle->la_hold);
 					lle->la_hold = NULL;
 				}
 				LLE_WUNLOCK(lle);
 			}
 		}
 	}
 	LLTABLE_LIST_RUNLOCK();
 }
 #endif
 
 /*
  * Deletes an address from given lltable.
  * Used for userland interaction to remove
  * individual entries. Skips entries added by OS.
  */
 int
 lltable_delete_addr(struct lltable *llt, u_int flags,
     const struct sockaddr *l3addr)
 {
 	struct llentry *lle;
 	struct ifnet *ifp;
 
 	ifp = llt->llt_ifp;
 	IF_AFDATA_WLOCK(ifp);
 	lle = lla_lookup(llt, LLE_EXCLUSIVE, l3addr);
 
 	if (lle == NULL) {
 		IF_AFDATA_WUNLOCK(ifp);
 		return (ENOENT);
 	}
 	if ((lle->la_flags & LLE_IFADDR) != 0 && (flags & LLE_IFADDR) == 0) {
 		IF_AFDATA_WUNLOCK(ifp);
 		LLE_WUNLOCK(lle);
 		return (EPERM);
 	}
 
 	lltable_unlink_entry(llt, lle);
 	IF_AFDATA_WUNLOCK(ifp);
 
 	llt->llt_delete_entry(llt, lle);
 
 	return (0);
 }
 
 void
 lltable_prefix_free(int af, struct sockaddr *addr, struct sockaddr *mask,
     u_int flags)
 {
 	struct lltable *llt;
 
 	LLTABLE_LIST_RLOCK();
 	SLIST_FOREACH(llt, &V_lltables, llt_link) {
 		if (llt->llt_af != af)
 			continue;
 
 		llt->llt_prefix_free(llt, addr, mask, flags);
 	}
 	LLTABLE_LIST_RUNLOCK();
 }
 
 struct lltable *
 lltable_allocate_htbl(uint32_t hsize)
 {
 	struct lltable *llt;
 	int i;
 
 	llt = malloc(sizeof(struct lltable), M_LLTABLE, M_WAITOK | M_ZERO);
 	llt->llt_hsize = hsize;
 	llt->lle_head = malloc(sizeof(struct llentries) * hsize,
 	    M_LLTABLE, M_WAITOK | M_ZERO);
 
 	for (i = 0; i < llt->llt_hsize; i++)
 		LIST_INIT(&llt->lle_head[i]);
 
 	/* Set some default callbacks */
 	llt->llt_link_entry = htable_link_entry;
 	llt->llt_unlink_entry = htable_unlink_entry;
 	llt->llt_prefix_free = htable_prefix_free;
 	llt->llt_foreach_entry = htable_foreach_lle;
 	llt->llt_free_tbl = htable_free_tbl;
 
 	return (llt);
 }
 
 /*
  * Links lltable to global llt list.
  */
 void
 lltable_link(struct lltable *llt)
 {
 
 	LLTABLE_LIST_WLOCK();
 	SLIST_INSERT_HEAD(&V_lltables, llt, llt_link);
 	LLTABLE_LIST_WUNLOCK();
 }
 
 static void
 lltable_unlink(struct lltable *llt)
 {
 
 	LLTABLE_LIST_WLOCK();
 	SLIST_REMOVE(&V_lltables, llt, lltable, llt_link);
 	LLTABLE_LIST_WUNLOCK();
 
 }
 
 /*
  * External methods used by lltable consumers
  */
 
 int
 lltable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg)
 {
 
 	return (llt->llt_foreach_entry(llt, f, farg));
 }
 
 struct llentry *
 lltable_alloc_entry(struct lltable *llt, u_int flags,
     const struct sockaddr *l3addr)
 {
 
 	return (llt->llt_alloc_entry(llt, flags, l3addr));
 }
 
 void
 lltable_free_entry(struct lltable *llt, struct llentry *lle)
 {
 
 	llt->llt_free_entry(llt, lle);
 }
 
 void
 lltable_link_entry(struct lltable *llt, struct llentry *lle)
 {
 
 	llt->llt_link_entry(llt, lle);
 }
 
 void
 lltable_unlink_entry(struct lltable *llt, struct llentry *lle)
 {
 
 	llt->llt_unlink_entry(lle);
 }
 
 void
 lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
 {
 	struct lltable *llt;
 
 	llt = lle->lle_tbl;
 	llt->llt_fill_sa_entry(lle, sa);
 }
 
 struct ifnet *
 lltable_get_ifp(const struct lltable *llt)
 {
 
 	return (llt->llt_ifp);
 }
 
 int
 lltable_get_af(const struct lltable *llt)
 {
 
 	return (llt->llt_af);
 }
 
 /*
  * Called in route_output when rtm_flags contains RTF_LLDATA.
  */
 int
 lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
 {
 	struct sockaddr_dl *dl =
 	    (struct sockaddr_dl *)info->rti_info[RTAX_GATEWAY];
 	struct sockaddr *dst = (struct sockaddr *)info->rti_info[RTAX_DST];
 	struct ifnet *ifp;
 	struct lltable *llt;
 	struct llentry *lle, *lle_tmp;
 	uint8_t linkhdr[LLE_MAX_LINKHDR];
 	size_t linkhdrsize;
 	int lladdr_off;
 	u_int laflags = 0;
 	int error;
 
 	KASSERT(dl != NULL && dl->sdl_family == AF_LINK,
 	    ("%s: invalid dl\n", __func__));
 
 	ifp = ifnet_byindex(dl->sdl_index);
 	if (ifp == NULL) {
 		log(LOG_INFO, "%s: invalid ifp (sdl_index %d)\n",
 		    __func__, dl->sdl_index);
 		return EINVAL;
 	}
 
 	/* XXX linked list may be too expensive */
 	LLTABLE_LIST_RLOCK();
 	SLIST_FOREACH(llt, &V_lltables, llt_link) {
 		if (llt->llt_af == dst->sa_family &&
 		    llt->llt_ifp == ifp)
 			break;
 	}
 	LLTABLE_LIST_RUNLOCK();
 	KASSERT(llt != NULL, ("Yep, ugly hacks are bad\n"));
 
 	error = 0;
 
 	switch (rtm->rtm_type) {
 	case RTM_ADD:
 		/* Add static LLE */
 		laflags = 0;
 		if (rtm->rtm_rmx.rmx_expire == 0)
 			laflags = LLE_STATIC;
 		lle = lltable_alloc_entry(llt, laflags, dst);
 		if (lle == NULL)
 			return (ENOMEM);
 
 		linkhdrsize = sizeof(linkhdr);
 		if (lltable_calc_llheader(ifp, dst->sa_family, LLADDR(dl),
 		    linkhdr, &linkhdrsize, &lladdr_off) != 0)
 			return (EINVAL);
 		lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
 		    lladdr_off);
 		if ((rtm->rtm_flags & RTF_ANNOUNCE))
 			lle->la_flags |= LLE_PUB;
 		lle->la_expire = rtm->rtm_rmx.rmx_expire;
 
 		laflags = lle->la_flags;
 
 		/* Try to link new entry */
 		lle_tmp = NULL;
 		IF_AFDATA_WLOCK(ifp);
 		LLE_WLOCK(lle);
 		lle_tmp = lla_lookup(llt, LLE_EXCLUSIVE, dst);
 		if (lle_tmp != NULL) {
 			/* Check if we are trying to replace immutable entry */
 			if ((lle_tmp->la_flags & LLE_IFADDR) != 0) {
 				IF_AFDATA_WUNLOCK(ifp);
 				LLE_WUNLOCK(lle_tmp);
 				lltable_free_entry(llt, lle);
 				return (EPERM);
 			}
 			/* Unlink existing entry from table */
 			lltable_unlink_entry(llt, lle_tmp);
 		}
 		lltable_link_entry(llt, lle);
 		IF_AFDATA_WUNLOCK(ifp);
 
 		if (lle_tmp != NULL) {
 			EVENTHANDLER_INVOKE(lle_event, lle_tmp,LLENTRY_EXPIRED);
 			lltable_free_entry(llt, lle_tmp);
 		}
 
 		/*
 		 * By invoking LLE handler here we might get
 		 * two events on static LLE entry insertion
 		 * in routing socket. However, since we might have
 		 * other subscribers we need to generate this event.
 		 */
 		EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_RESOLVED);
 		LLE_WUNLOCK(lle);
 #ifdef INET
 		/* gratuitous ARP */
 		if ((laflags & LLE_PUB) && dst->sa_family == AF_INET)
 			arprequest(ifp,
 			    &((struct sockaddr_in *)dst)->sin_addr,
 			    &((struct sockaddr_in *)dst)->sin_addr,
 			    (u_char *)LLADDR(dl));
 #endif
 
 		break;
 
 	case RTM_DELETE:
 		return (lltable_delete_addr(llt, 0, dst));
 
 	default:
 		error = EINVAL;
 	}
 
 	return (error);
 }
 
 #ifdef DDB
 struct llentry_sa {
 	struct llentry		base;
 	struct sockaddr		l3_addr;
 };
 
 static void
 llatbl_lle_show(struct llentry_sa *la)
 {
 	struct llentry *lle;
 	uint8_t octet[6];
 
 	lle = &la->base;
 	db_printf("lle=%p\n", lle);
 	db_printf(" lle_next=%p\n", lle->lle_next.le_next);
 	db_printf(" lle_lock=%p\n", &lle->lle_lock);
 	db_printf(" lle_tbl=%p\n", lle->lle_tbl);
 	db_printf(" lle_head=%p\n", lle->lle_head);
 	db_printf(" la_hold=%p\n", lle->la_hold);
 	db_printf(" la_numheld=%d\n", lle->la_numheld);
 	db_printf(" la_expire=%ju\n", (uintmax_t)lle->la_expire);
 	db_printf(" la_flags=0x%04x\n", lle->la_flags);
 	db_printf(" la_asked=%u\n", lle->la_asked);
 	db_printf(" la_preempt=%u\n", lle->la_preempt);
 	db_printf(" ln_state=%d\n", lle->ln_state);
 	db_printf(" ln_router=%u\n", lle->ln_router);
 	db_printf(" ln_ntick=%ju\n", (uintmax_t)lle->ln_ntick);
 	db_printf(" lle_refcnt=%d\n", lle->lle_refcnt);
 	bcopy(lle->ll_addr, octet, sizeof(octet));
 	db_printf(" ll_addr=%02x:%02x:%02x:%02x:%02x:%02x\n",
 	    octet[0], octet[1], octet[2], octet[3], octet[4], octet[5]);
 	db_printf(" lle_timer=%p\n", &lle->lle_timer);
 
 	switch (la->l3_addr.sa_family) {
 #ifdef INET
 	case AF_INET:
 	{
 		struct sockaddr_in *sin;
 		char l3s[INET_ADDRSTRLEN];
 
 		sin = (struct sockaddr_in *)&la->l3_addr;
 		inet_ntoa_r(sin->sin_addr, l3s);
 		db_printf(" l3_addr=%s\n", l3s);
 		break;
 	}
 #endif
 #ifdef INET6
 	case AF_INET6:
 	{
 		struct sockaddr_in6 *sin6;
 		char l3s[INET6_ADDRSTRLEN];
 
 		sin6 = (struct sockaddr_in6 *)&la->l3_addr;
 		ip6_sprintf(l3s, &sin6->sin6_addr);
 		db_printf(" l3_addr=%s\n", l3s);
 		break;
 	}
 #endif
 	default:
 		db_printf(" l3_addr=N/A (af=%d)\n", la->l3_addr.sa_family);
 		break;
 	}
 }
 
 DB_SHOW_COMMAND(llentry, db_show_llentry)
 {
 
 	if (!have_addr) {
 		db_printf("usage: show llentry <struct llentry *>\n");
 		return;
 	}
 
 	llatbl_lle_show((struct llentry_sa *)addr);
 }
 
 static void
 llatbl_llt_show(struct lltable *llt)
 {
 	int i;
 	struct llentry *lle;
 
 	db_printf("llt=%p llt_af=%d llt_ifp=%p\n",
 	    llt, llt->llt_af, llt->llt_ifp);
 
 	for (i = 0; i < llt->llt_hsize; i++) {
 		LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
 
 			llatbl_lle_show((struct llentry_sa *)lle);
 			if (db_pager_quit)
 				return;
 		}
 	}
 }
 
 DB_SHOW_COMMAND(lltable, db_show_lltable)
 {
 
 	if (!have_addr) {
 		db_printf("usage: show lltable <struct lltable *>\n");
 		return;
 	}
 
 	llatbl_llt_show((struct lltable *)addr);
 }
 
 DB_SHOW_ALL_COMMAND(lltables, db_show_all_lltables)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 	struct lltable *llt;
 
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET_QUIET(vnet_iter);
 #ifdef VIMAGE
 		db_printf("vnet=%p\n", curvnet);
 #endif
 		SLIST_FOREACH(llt, &V_lltables, llt_link) {
 			db_printf("llt=%p llt_af=%d llt_ifp=%p(%s)\n",
 			    llt, llt->llt_af, llt->llt_ifp,
 			    (llt->llt_ifp != NULL) ?
 				llt->llt_ifp->if_xname : "?");
 			if (have_addr && addr != 0) /* verbose */
 				llatbl_llt_show(llt);
 			if (db_pager_quit) {
 				CURVNET_RESTORE();
 				return;
 			}
 		}
 		CURVNET_RESTORE();
 	}
 }
 #endif
Index: projects/hps_head/sys/netgraph/ng_base.c
===================================================================
--- projects/hps_head/sys/netgraph/ng_base.c	(revision 309217)
+++ projects/hps_head/sys/netgraph/ng_base.c	(revision 309218)
@@ -1,3847 +1,3847 @@
 /*-
  * Copyright (c) 1996-1999 Whistle Communications, Inc.
  * All rights reserved.
  *
  * Subject to the following obligations and disclaimer of warranty, use and
  * redistribution of this software, in source or object code forms, with or
  * without modifications are expressly permitted by Whistle Communications;
  * provided, however, that:
  * 1. Any and all reproductions of the source or object code must include the
  *    copyright notice above and the following disclaimer of warranties; and
  * 2. No rights are granted, in any manner or form, to use Whistle
  *    Communications, Inc. trademarks, including the mark "WHISTLE
  *    COMMUNICATIONS" on advertising, endorsements, or otherwise except as
  *    such appears in the above copyright notice or in the software.
  *
  * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND
  * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO
  * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE,
  * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
  * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY
  * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS
  * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE.
  * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES
  * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING
  * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY
  * OF SUCH DAMAGE.
  *
  * Authors: Julian Elischer <julian@freebsd.org>
  *          Archie Cobbs <archie@freebsd.org>
  *
  * $FreeBSD$
  * $Whistle: ng_base.c,v 1.39 1999/01/28 23:54:53 julian Exp $
  */
 
 /*
  * This file implements the base netgraph code.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/ctype.h>
 #include <sys/hash.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/unistd.h>
 #include <machine/cpu.h>
 #include <vm/uma.h>
 
 #include <net/netisr.h>
 #include <net/vnet.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 #include <netgraph/ng_parse.h>
 
 MODULE_VERSION(netgraph, NG_ABI_VERSION);
 
 /* Mutex to protect topology events. */
 static struct rwlock	ng_topo_lock;
 #define	TOPOLOGY_RLOCK()	rw_rlock(&ng_topo_lock)
 #define	TOPOLOGY_RUNLOCK()	rw_runlock(&ng_topo_lock)
 #define	TOPOLOGY_WLOCK()	rw_wlock(&ng_topo_lock)
 #define	TOPOLOGY_WUNLOCK()	rw_wunlock(&ng_topo_lock)
 #define	TOPOLOGY_NOTOWNED()	rw_assert(&ng_topo_lock, RA_UNLOCKED)
 
 #ifdef	NETGRAPH_DEBUG
 static struct mtx	ng_nodelist_mtx; /* protects global node/hook lists */
 static struct mtx	ngq_mtx;	/* protects the queue item list */
 
 static SLIST_HEAD(, ng_node) ng_allnodes;
 static LIST_HEAD(, ng_node) ng_freenodes; /* in debug, we never free() them */
 static SLIST_HEAD(, ng_hook) ng_allhooks;
 static LIST_HEAD(, ng_hook) ng_freehooks; /* in debug, we never free() them */
 
 static void ng_dumpitems(void);
 static void ng_dumpnodes(void);
 static void ng_dumphooks(void);
 
 #endif	/* NETGRAPH_DEBUG */
 /*
  * DEAD versions of the structures.
  * In order to avoid races, it is sometimes necessary to point
  * at SOMETHING even though theoretically, the current entity is
  * INVALID. Use these to avoid these races.
  */
 struct ng_type ng_deadtype = {
 	NG_ABI_VERSION,
 	"dead",
 	NULL,	/* modevent */
 	NULL,	/* constructor */
 	NULL,	/* rcvmsg */
 	NULL,	/* shutdown */
 	NULL,	/* newhook */
 	NULL,	/* findhook */
 	NULL,	/* connect */
 	NULL,	/* rcvdata */
 	NULL,	/* disconnect */
 	NULL, 	/* cmdlist */
 };
 
 struct ng_node ng_deadnode = {
 	"dead",
 	&ng_deadtype,	
 	NGF_INVALID,
 	0,	/* numhooks */
 	NULL,	/* private */
 	0,	/* ID */
 	LIST_HEAD_INITIALIZER(ng_deadnode.nd_hooks),
 	{},	/* all_nodes list entry */
 	{},	/* id hashtable list entry */
 	{	0,
 		0,
 		{}, /* should never use! (should hang) */
 		{}, /* workqueue entry */
 		STAILQ_HEAD_INITIALIZER(ng_deadnode.nd_input_queue.queue),
 	},
 	1,	/* refs */
 	NULL,	/* vnet */
 #ifdef	NETGRAPH_DEBUG
 	ND_MAGIC,
 	__FILE__,
 	__LINE__,
 	{NULL}
 #endif	/* NETGRAPH_DEBUG */
 };
 
 struct ng_hook ng_deadhook = {
 	"dead",
 	NULL,		/* private */
 	HK_INVALID | HK_DEAD,
 	0,		/* undefined data link type */
 	&ng_deadhook,	/* Peer is self */
 	&ng_deadnode,	/* attached to deadnode */
 	{},		/* hooks list */
 	NULL,		/* override rcvmsg() */
 	NULL,		/* override rcvdata() */
 	1,		/* refs always >= 1 */
 #ifdef	NETGRAPH_DEBUG
 	HK_MAGIC,
 	__FILE__,
 	__LINE__,
 	{NULL}
 #endif	/* NETGRAPH_DEBUG */
 };
 
 /*
  * END DEAD STRUCTURES
  */
 /* List nodes with unallocated work */
 static STAILQ_HEAD(, ng_node) ng_worklist = STAILQ_HEAD_INITIALIZER(ng_worklist);
 static struct mtx	ng_worklist_mtx;   /* MUST LOCK NODE FIRST */
 
 /* List of installed types */
 static LIST_HEAD(, ng_type) ng_typelist;
 static struct rwlock	ng_typelist_lock;
 #define	TYPELIST_RLOCK()	rw_rlock(&ng_typelist_lock)
 #define	TYPELIST_RUNLOCK()	rw_runlock(&ng_typelist_lock)
 #define	TYPELIST_WLOCK()	rw_wlock(&ng_typelist_lock)
 #define	TYPELIST_WUNLOCK()	rw_wunlock(&ng_typelist_lock)
 
 /* Hash related definitions. */
 LIST_HEAD(nodehash, ng_node);
 static VNET_DEFINE(struct nodehash *, ng_ID_hash);
 static VNET_DEFINE(u_long, ng_ID_hmask);
 static VNET_DEFINE(u_long, ng_nodes);
 static VNET_DEFINE(struct nodehash *, ng_name_hash);
 static VNET_DEFINE(u_long, ng_name_hmask);
 static VNET_DEFINE(u_long, ng_named_nodes);
 #define	V_ng_ID_hash		VNET(ng_ID_hash)
 #define	V_ng_ID_hmask		VNET(ng_ID_hmask)
 #define	V_ng_nodes		VNET(ng_nodes)
 #define	V_ng_name_hash		VNET(ng_name_hash)
 #define	V_ng_name_hmask		VNET(ng_name_hmask)
 #define	V_ng_named_nodes	VNET(ng_named_nodes)
 
 static struct rwlock	ng_idhash_lock;
 #define	IDHASH_RLOCK()		rw_rlock(&ng_idhash_lock)
 #define	IDHASH_RUNLOCK()	rw_runlock(&ng_idhash_lock)
 #define	IDHASH_WLOCK()		rw_wlock(&ng_idhash_lock)
 #define	IDHASH_WUNLOCK()	rw_wunlock(&ng_idhash_lock)
 
 /* Method to find a node.. used twice so do it here */
 #define NG_IDHASH_FN(ID) ((ID) % (V_ng_ID_hmask + 1))
 #define NG_IDHASH_FIND(ID, node)					\
 	do { 								\
 		rw_assert(&ng_idhash_lock, RA_LOCKED);			\
 		LIST_FOREACH(node, &V_ng_ID_hash[NG_IDHASH_FN(ID)],	\
 						nd_idnodes) {		\
 			if (NG_NODE_IS_VALID(node)			\
 			&& (NG_NODE_ID(node) == ID)) {			\
 				break;					\
 			}						\
 		}							\
 	} while (0)
 
 static struct rwlock	ng_namehash_lock;
 #define	NAMEHASH_RLOCK()	rw_rlock(&ng_namehash_lock)
 #define	NAMEHASH_RUNLOCK()	rw_runlock(&ng_namehash_lock)
 #define	NAMEHASH_WLOCK()	rw_wlock(&ng_namehash_lock)
 #define	NAMEHASH_WUNLOCK()	rw_wunlock(&ng_namehash_lock)
 
 /* Internal functions */
 static int	ng_add_hook(node_p node, const char *name, hook_p * hookp);
 static int	ng_generic_msg(node_p here, item_p item, hook_p lasthook);
 static ng_ID_t	ng_decodeidname(const char *name);
 static int	ngb_mod_event(module_t mod, int event, void *data);
 static void	ng_worklist_add(node_p node);
 static void	ngthread(void *);
 static int	ng_apply_item(node_p node, item_p item, int rw);
 static void	ng_flush_input_queue(node_p node);
 static node_p	ng_ID2noderef(ng_ID_t ID);
 static int	ng_con_nodes(item_p item, node_p node, const char *name,
 		    node_p node2, const char *name2);
 static int	ng_con_part2(node_p node, item_p item, hook_p hook);
 static int	ng_con_part3(node_p node, item_p item, hook_p hook);
 static int	ng_mkpeer(node_p node, const char *name, const char *name2,
 		    char *type);
 static void	ng_name_rehash(void);
 static void	ng_ID_rehash(void);
 
 /* Imported, these used to be externally visible, some may go back. */
 void	ng_destroy_hook(hook_p hook);
 int	ng_path2noderef(node_p here, const char *path,
 	node_p *dest, hook_p *lasthook);
 int	ng_make_node(const char *type, node_p *nodepp);
 int	ng_path_parse(char *addr, char **node, char **path, char **hook);
 void	ng_rmnode(node_p node, hook_p dummy1, void *dummy2, int dummy3);
 void	ng_unname(node_p node);
 
 /* Our own netgraph malloc type */
 MALLOC_DEFINE(M_NETGRAPH, "netgraph", "netgraph structures and ctrl messages");
 MALLOC_DEFINE(M_NETGRAPH_MSG, "netgraph_msg", "netgraph name storage");
 static MALLOC_DEFINE(M_NETGRAPH_HOOK, "netgraph_hook",
     "netgraph hook structures");
 static MALLOC_DEFINE(M_NETGRAPH_NODE, "netgraph_node",
     "netgraph node structures");
 static MALLOC_DEFINE(M_NETGRAPH_ITEM, "netgraph_item",
     "netgraph item structures");
 
 /* Should not be visible outside this file */
 
 #define _NG_ALLOC_HOOK(hook) \
 	hook = malloc(sizeof(*hook), M_NETGRAPH_HOOK, M_NOWAIT | M_ZERO)
 #define _NG_ALLOC_NODE(node) \
 	node = malloc(sizeof(*node), M_NETGRAPH_NODE, M_NOWAIT | M_ZERO)
 
 #define	NG_QUEUE_LOCK_INIT(n)			\
 	mtx_init(&(n)->q_mtx, "ng_node", NULL, MTX_DEF)
 #define	NG_QUEUE_LOCK(n)			\
 	mtx_lock(&(n)->q_mtx)
 #define	NG_QUEUE_UNLOCK(n)			\
 	mtx_unlock(&(n)->q_mtx)
 #define	NG_WORKLIST_LOCK_INIT()			\
 	mtx_init(&ng_worklist_mtx, "ng_worklist", NULL, MTX_DEF)
 #define	NG_WORKLIST_LOCK()			\
 	mtx_lock(&ng_worklist_mtx)
 #define	NG_WORKLIST_UNLOCK()			\
 	mtx_unlock(&ng_worklist_mtx)
 #define	NG_WORKLIST_SLEEP()			\
 	mtx_sleep(&ng_worklist, &ng_worklist_mtx, PI_NET, "sleep", 0)
 #define	NG_WORKLIST_WAKEUP()			\
 	wakeup_one(&ng_worklist)
 
 #ifdef NETGRAPH_DEBUG /*----------------------------------------------*/
 /*
  * In debug mode:
  * In an attempt to help track reference count screwups
  * we do not free objects back to the malloc system, but keep them
  * in a local cache where we can examine them and keep information safely
  * after they have been freed.
  * We use this scheme for nodes and hooks, and to some extent for items.
  */
 static __inline hook_p
 ng_alloc_hook(void)
 {
 	hook_p hook;
 	SLIST_ENTRY(ng_hook) temp;
 	mtx_lock(&ng_nodelist_mtx);
 	hook = LIST_FIRST(&ng_freehooks);
 	if (hook) {
 		LIST_REMOVE(hook, hk_hooks);
 		bcopy(&hook->hk_all, &temp, sizeof(temp));
 		bzero(hook, sizeof(struct ng_hook));
 		bcopy(&temp, &hook->hk_all, sizeof(temp));
 		mtx_unlock(&ng_nodelist_mtx);
 		hook->hk_magic = HK_MAGIC;
 	} else {
 		mtx_unlock(&ng_nodelist_mtx);
 		_NG_ALLOC_HOOK(hook);
 		if (hook) {
 			hook->hk_magic = HK_MAGIC;
 			mtx_lock(&ng_nodelist_mtx);
 			SLIST_INSERT_HEAD(&ng_allhooks, hook, hk_all);
 			mtx_unlock(&ng_nodelist_mtx);
 		}
 	}
 	return (hook);
 }
 
 static __inline node_p
 ng_alloc_node(void)
 {
 	node_p node;
 	SLIST_ENTRY(ng_node) temp;
 	mtx_lock(&ng_nodelist_mtx);
 	node = LIST_FIRST(&ng_freenodes);
 	if (node) {
 		LIST_REMOVE(node, nd_nodes);
 		bcopy(&node->nd_all, &temp, sizeof(temp));
 		bzero(node, sizeof(struct ng_node));
 		bcopy(&temp, &node->nd_all, sizeof(temp));
 		mtx_unlock(&ng_nodelist_mtx);
 		node->nd_magic = ND_MAGIC;
 	} else {
 		mtx_unlock(&ng_nodelist_mtx);
 		_NG_ALLOC_NODE(node);
 		if (node) {
 			node->nd_magic = ND_MAGIC;
 			mtx_lock(&ng_nodelist_mtx);
 			SLIST_INSERT_HEAD(&ng_allnodes, node, nd_all);
 			mtx_unlock(&ng_nodelist_mtx);
 		}
 	}
 	return (node);
 }
 
 #define NG_ALLOC_HOOK(hook) do { (hook) = ng_alloc_hook(); } while (0)
 #define NG_ALLOC_NODE(node) do { (node) = ng_alloc_node(); } while (0)
 
 #define NG_FREE_HOOK(hook)						\
 	do {								\
 		mtx_lock(&ng_nodelist_mtx);				\
 		LIST_INSERT_HEAD(&ng_freehooks, hook, hk_hooks);	\
 		hook->hk_magic = 0;					\
 		mtx_unlock(&ng_nodelist_mtx);				\
 	} while (0)
 
 #define NG_FREE_NODE(node)						\
 	do {								\
 		mtx_lock(&ng_nodelist_mtx);				\
 		LIST_INSERT_HEAD(&ng_freenodes, node, nd_nodes);	\
 		node->nd_magic = 0;					\
 		mtx_unlock(&ng_nodelist_mtx);				\
 	} while (0)
 
 #else /* NETGRAPH_DEBUG */ /*----------------------------------------------*/
 
 #define NG_ALLOC_HOOK(hook) _NG_ALLOC_HOOK(hook)
 #define NG_ALLOC_NODE(node) _NG_ALLOC_NODE(node)
 
 #define NG_FREE_HOOK(hook) do { free((hook), M_NETGRAPH_HOOK); } while (0)
 #define NG_FREE_NODE(node) do { free((node), M_NETGRAPH_NODE); } while (0)
 
 #endif /* NETGRAPH_DEBUG */ /*----------------------------------------------*/
 
 /* Set this to kdb_enter("X") to catch all errors as they occur */
 #ifndef TRAP_ERROR
 #define TRAP_ERROR()
 #endif
 
 static VNET_DEFINE(ng_ID_t, nextID) = 1;
 #define	V_nextID			VNET(nextID)
 
 #ifdef INVARIANTS
 #define CHECK_DATA_MBUF(m)	do {					\
 		struct mbuf *n;						\
 		int total;						\
 									\
 		M_ASSERTPKTHDR(m);					\
 		for (total = 0, n = (m); n != NULL; n = n->m_next) {	\
 			total += n->m_len;				\
 			if (n->m_nextpkt != NULL)			\
 				panic("%s: m_nextpkt", __func__);	\
 		}							\
 									\
 		if ((m)->m_pkthdr.len != total) {			\
 			panic("%s: %d != %d",				\
 			    __func__, (m)->m_pkthdr.len, total);	\
 		}							\
 	} while (0)
 #else
 #define CHECK_DATA_MBUF(m)
 #endif
 
 #define ERROUT(x)	do { error = (x); goto done; } while (0)
 
 /************************************************************************
 	Parse type definitions for generic messages
 ************************************************************************/
 
 /* Handy structure parse type defining macro */
 #define DEFINE_PARSE_STRUCT_TYPE(lo, up, args)				\
 static const struct ng_parse_struct_field				\
 	ng_ ## lo ## _type_fields[] = NG_GENERIC_ ## up ## _INFO args;	\
 static const struct ng_parse_type ng_generic_ ## lo ## _type = {	\
 	&ng_parse_struct_type,						\
 	&ng_ ## lo ## _type_fields					\
 }
 
 DEFINE_PARSE_STRUCT_TYPE(mkpeer, MKPEER, ());
 DEFINE_PARSE_STRUCT_TYPE(connect, CONNECT, ());
 DEFINE_PARSE_STRUCT_TYPE(name, NAME, ());
 DEFINE_PARSE_STRUCT_TYPE(rmhook, RMHOOK, ());
 DEFINE_PARSE_STRUCT_TYPE(nodeinfo, NODEINFO, ());
 DEFINE_PARSE_STRUCT_TYPE(typeinfo, TYPEINFO, ());
 DEFINE_PARSE_STRUCT_TYPE(linkinfo, LINKINFO, (&ng_generic_nodeinfo_type));
 
 /* Get length of an array when the length is stored as a 32 bit
    value immediately preceding the array -- as with struct namelist
    and struct typelist. */
 static int
 ng_generic_list_getLength(const struct ng_parse_type *type,
 	const u_char *start, const u_char *buf)
 {
 	return *((const u_int32_t *)(buf - 4));
 }
 
 /* Get length of the array of struct linkinfo inside a struct hooklist */
 static int
 ng_generic_linkinfo_getLength(const struct ng_parse_type *type,
 	const u_char *start, const u_char *buf)
 {
 	const struct hooklist *hl = (const struct hooklist *)start;
 
 	return hl->nodeinfo.hooks;
 }
 
 /* Array type for a variable length array of struct namelist */
 static const struct ng_parse_array_info ng_nodeinfoarray_type_info = {
 	&ng_generic_nodeinfo_type,
 	&ng_generic_list_getLength
 };
 static const struct ng_parse_type ng_generic_nodeinfoarray_type = {
 	&ng_parse_array_type,
 	&ng_nodeinfoarray_type_info
 };
 
 /* Array type for a variable length array of struct typelist */
 static const struct ng_parse_array_info ng_typeinfoarray_type_info = {
 	&ng_generic_typeinfo_type,
 	&ng_generic_list_getLength
 };
 static const struct ng_parse_type ng_generic_typeinfoarray_type = {
 	&ng_parse_array_type,
 	&ng_typeinfoarray_type_info
 };
 
 /* Array type for array of struct linkinfo in struct hooklist */
 static const struct ng_parse_array_info ng_generic_linkinfo_array_type_info = {
 	&ng_generic_linkinfo_type,
 	&ng_generic_linkinfo_getLength
 };
 static const struct ng_parse_type ng_generic_linkinfo_array_type = {
 	&ng_parse_array_type,
 	&ng_generic_linkinfo_array_type_info
 };
 
 DEFINE_PARSE_STRUCT_TYPE(typelist, TYPELIST, (&ng_generic_typeinfoarray_type));
 DEFINE_PARSE_STRUCT_TYPE(hooklist, HOOKLIST,
 	(&ng_generic_nodeinfo_type, &ng_generic_linkinfo_array_type));
 DEFINE_PARSE_STRUCT_TYPE(listnodes, LISTNODES,
 	(&ng_generic_nodeinfoarray_type));
 
 /* List of commands and how to convert arguments to/from ASCII */
 static const struct ng_cmdlist ng_generic_cmds[] = {
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_SHUTDOWN,
 	  "shutdown",
 	  NULL,
 	  NULL
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_MKPEER,
 	  "mkpeer",
 	  &ng_generic_mkpeer_type,
 	  NULL
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_CONNECT,
 	  "connect",
 	  &ng_generic_connect_type,
 	  NULL
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_NAME,
 	  "name",
 	  &ng_generic_name_type,
 	  NULL
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_RMHOOK,
 	  "rmhook",
 	  &ng_generic_rmhook_type,
 	  NULL
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_NODEINFO,
 	  "nodeinfo",
 	  NULL,
 	  &ng_generic_nodeinfo_type
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_LISTHOOKS,
 	  "listhooks",
 	  NULL,
 	  &ng_generic_hooklist_type
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_LISTNAMES,
 	  "listnames",
 	  NULL,
 	  &ng_generic_listnodes_type	/* same as NGM_LISTNODES */
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_LISTNODES,
 	  "listnodes",
 	  NULL,
 	  &ng_generic_listnodes_type
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_LISTTYPES,
 	  "listtypes",
 	  NULL,
 	  &ng_generic_typelist_type
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_TEXT_CONFIG,
 	  "textconfig",
 	  NULL,
 	  &ng_parse_string_type
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_TEXT_STATUS,
 	  "textstatus",
 	  NULL,
 	  &ng_parse_string_type
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_ASCII2BINARY,
 	  "ascii2binary",
 	  &ng_parse_ng_mesg_type,
 	  &ng_parse_ng_mesg_type
 	},
 	{
 	  NGM_GENERIC_COOKIE,
 	  NGM_BINARY2ASCII,
 	  "binary2ascii",
 	  &ng_parse_ng_mesg_type,
 	  &ng_parse_ng_mesg_type
 	},
 	{ 0 }
 };
 
 /************************************************************************
 			Node routines
 ************************************************************************/
 
 /*
  * Instantiate a node of the requested type
  */
 int
 ng_make_node(const char *typename, node_p *nodepp)
 {
 	struct ng_type *type;
 	int	error;
 
 	/* Check that the type makes sense */
 	if (typename == NULL) {
 		TRAP_ERROR();
 		return (EINVAL);
 	}
 
 	/* Locate the node type. If we fail we return. Do not try to load
 	 * module.
 	 */
 	if ((type = ng_findtype(typename)) == NULL)
 		return (ENXIO);
 
 	/*
 	 * If we have a constructor, then make the node and
 	 * call the constructor to do type specific initialisation.
 	 */
 	if (type->constructor != NULL) {
 		if ((error = ng_make_node_common(type, nodepp)) == 0) {
 			if ((error = ((*type->constructor)(*nodepp))) != 0) {
 				NG_NODE_UNREF(*nodepp);
 			}
 		}
 	} else {
 		/*
 		 * Node has no constructor. We cannot ask for one
 		 * to be made. It must be brought into existence by
 		 * some external agency. The external agency should
 		 * call ng_make_node_common() directly to get the
 		 * netgraph part initialised.
 		 */
 		TRAP_ERROR();
 		error = EINVAL;
 	}
 	return (error);
 }
 
 /*
  * Generic node creation. Called by node initialisation for externally
  * instantiated nodes (e.g. hardware, sockets, etc ).
  * The returned node has a reference count of 1.
  */
 int
 ng_make_node_common(struct ng_type *type, node_p *nodepp)
 {
 	node_p node;
 
 	/* Require the node type to have been already installed */
 	if (ng_findtype(type->name) == NULL) {
 		TRAP_ERROR();
 		return (EINVAL);
 	}
 
 	/* Make a node and try attach it to the type */
 	NG_ALLOC_NODE(node);
 	if (node == NULL) {
 		TRAP_ERROR();
 		return (ENOMEM);
 	}
 	node->nd_type = type;
 #ifdef VIMAGE
 	node->nd_vnet = curvnet;
 #endif
 	NG_NODE_REF(node);				/* note reference */
 	type->refs++;
 
 	NG_QUEUE_LOCK_INIT(&node->nd_input_queue);
 	STAILQ_INIT(&node->nd_input_queue.queue);
 	node->nd_input_queue.q_flags = 0;
 
 	/* Initialize hook list for new node */
 	LIST_INIT(&node->nd_hooks);
 
 	/* Get an ID and put us in the hash chain. */
 	IDHASH_WLOCK();
 	for (;;) { /* wrap protection, even if silly */
 		node_p node2 = NULL;
 		node->nd_ID = V_nextID++; /* 137/sec for 1 year before wrap */
 
 		/* Is there a problem with the new number? */
 		NG_IDHASH_FIND(node->nd_ID, node2); /* already taken? */
 		if ((node->nd_ID != 0) && (node2 == NULL)) {
 			break;
 		}
 	}
 	V_ng_nodes++;
 	if (V_ng_nodes * 2 > V_ng_ID_hmask)
 		ng_ID_rehash();
 	LIST_INSERT_HEAD(&V_ng_ID_hash[NG_IDHASH_FN(node->nd_ID)], node,
 	    nd_idnodes);
 	IDHASH_WUNLOCK();
 
 	/* Done */
 	*nodepp = node;
 	return (0);
 }
 
 /*
  * Forceably start the shutdown process on a node. Either call
  * its shutdown method, or do the default shutdown if there is
  * no type-specific method.
  *
  * We can only be called from a shutdown message, so we know we have
  * a writer lock, and therefore exclusive access. It also means
  * that we should not be on the work queue, but we check anyhow.
  *
  * Persistent node types must have a type-specific method which
  * allocates a new node in which case, this one is irretrievably going away,
  * or cleans up anything it needs, and just makes the node valid again,
  * in which case we allow the node to survive.
  *
  * XXX We need to think of how to tell a persistent node that we
  * REALLY need to go away because the hardware has gone or we
  * are rebooting.... etc.
  */
 void
 ng_rmnode(node_p node, hook_p dummy1, void *dummy2, int dummy3)
 {
 	hook_p hook;
 
 	/* Check if it's already shutting down */
 	if ((node->nd_flags & NGF_CLOSING) != 0)
 		return;
 
 	if (node == &ng_deadnode) {
 		printf ("shutdown called on deadnode\n");
 		return;
 	}
 
 	/* Add an extra reference so it doesn't go away during this */
 	NG_NODE_REF(node);
 
 	/*
 	 * Mark it invalid so any newcomers know not to try use it
 	 * Also add our own mark so we can't recurse
 	 * note that NGF_INVALID does not do this as it's also set during
 	 * creation
 	 */
 	node->nd_flags |= NGF_INVALID|NGF_CLOSING;
 
 	/* If node has its pre-shutdown method, then call it first*/
 	if (node->nd_type && node->nd_type->close)
 		(*node->nd_type->close)(node);
 
 	/* Notify all remaining connected nodes to disconnect */
 	while ((hook = LIST_FIRST(&node->nd_hooks)) != NULL)
 		ng_destroy_hook(hook);
 
 	/*
 	 * Drain the input queue forceably.
 	 * it has no hooks so what's it going to do, bleed on someone?
 	 * Theoretically we came here from a queue entry that was added
 	 * Just before the queue was closed, so it should be empty anyway.
 	 * Also removes us from worklist if needed.
 	 */
 	ng_flush_input_queue(node);
 
 	/* Ask the type if it has anything to do in this case */
 	if (node->nd_type && node->nd_type->shutdown) {
 		(*node->nd_type->shutdown)(node);
 		if (NG_NODE_IS_VALID(node)) {
 			/*
 			 * Well, blow me down if the node code hasn't declared
 			 * that it doesn't want to die.
 			 * Presumably it is a persistent node.
 			 * If we REALLY want it to go away,
 			 *  e.g. hardware going away,
 			 * Our caller should set NGF_REALLY_DIE in nd_flags.
 			 */
 			node->nd_flags &= ~(NGF_INVALID|NGF_CLOSING);
 			NG_NODE_UNREF(node); /* Assume they still have theirs */
 			return;
 		}
 	} else {				/* do the default thing */
 		NG_NODE_UNREF(node);
 	}
 
 	ng_unname(node); /* basically a NOP these days */
 
 	/*
 	 * Remove extra reference, possibly the last
 	 * Possible other holders of references may include
 	 * timeout callouts, but theoretically the node's supposed to
 	 * have cancelled them. Possibly hardware dependencies may
 	 * force a driver to 'linger' with a reference.
 	 */
 	NG_NODE_UNREF(node);
 }
 
 /*
  * Remove a reference to the node, possibly the last.
  * deadnode always acts as it it were the last.
  */
 void
 ng_unref_node(node_p node)
 {
 
 	if (node == &ng_deadnode)
 		return;
 
 	CURVNET_SET(node->nd_vnet);
 
 	if (refcount_release(&node->nd_refs)) { /* we were the last */
 
 		node->nd_type->refs--; /* XXX maybe should get types lock? */
 		NAMEHASH_WLOCK();
 		if (NG_NODE_HAS_NAME(node)) {
 			V_ng_named_nodes--;
 			LIST_REMOVE(node, nd_nodes);
 		}
 		NAMEHASH_WUNLOCK();
 
 		IDHASH_WLOCK();
 		V_ng_nodes--;
 		LIST_REMOVE(node, nd_idnodes);
 		IDHASH_WUNLOCK();
 
 		mtx_destroy(&node->nd_input_queue.q_mtx);
 		NG_FREE_NODE(node);
 	}
 	CURVNET_RESTORE();
 }
 
 /************************************************************************
 			Node ID handling
 ************************************************************************/
 static node_p
 ng_ID2noderef(ng_ID_t ID)
 {
 	node_p node;
 
 	IDHASH_RLOCK();
 	NG_IDHASH_FIND(ID, node);
 	if (node)
 		NG_NODE_REF(node);
 	IDHASH_RUNLOCK();
 	return(node);
 }
 
 ng_ID_t
 ng_node2ID(node_p node)
 {
 	return (node ? NG_NODE_ID(node) : 0);
 }
 
 /************************************************************************
 			Node name handling
 ************************************************************************/
 
 /*
  * Assign a node a name.
  */
 int
 ng_name_node(node_p node, const char *name)
 {
 	uint32_t hash;
 	node_p node2;
 	int i;
 
 	/* Check the name is valid */
 	for (i = 0; i < NG_NODESIZ; i++) {
 		if (name[i] == '\0' || name[i] == '.' || name[i] == ':')
 			break;
 	}
 	if (i == 0 || name[i] != '\0') {
 		TRAP_ERROR();
 		return (EINVAL);
 	}
 	if (ng_decodeidname(name) != 0) { /* valid IDs not allowed here */
 		TRAP_ERROR();
 		return (EINVAL);
 	}
 
 	NAMEHASH_WLOCK();
 	if (V_ng_named_nodes * 2 > V_ng_name_hmask)
 		ng_name_rehash();
 
 	hash = hash32_str(name, HASHINIT) & V_ng_name_hmask;
 	/* Check the name isn't already being used. */
 	LIST_FOREACH(node2, &V_ng_name_hash[hash], nd_nodes)
 		if (NG_NODE_IS_VALID(node2) &&
 		    (strcmp(NG_NODE_NAME(node2), name) == 0)) {
 			NAMEHASH_WUNLOCK();
 			return (EADDRINUSE);
 		}
 
 	if (NG_NODE_HAS_NAME(node))
 		LIST_REMOVE(node, nd_nodes);
 	else
 		V_ng_named_nodes++;
 	/* Copy it. */
 	strlcpy(NG_NODE_NAME(node), name, NG_NODESIZ);
 	/* Update name hash. */
 	LIST_INSERT_HEAD(&V_ng_name_hash[hash], node, nd_nodes);
 	NAMEHASH_WUNLOCK();
 
 	return (0);
 }
 
 /*
  * Find a node by absolute name. The name should NOT end with ':'
  * The name "." means "this node" and "[xxx]" means "the node
  * with ID (ie, at address) xxx".
  *
  * Returns the node if found, else NULL.
  * Eventually should add something faster than a sequential search.
  * Note it acquires a reference on the node so you can be sure it's still
  * there.
  */
 node_p
 ng_name2noderef(node_p here, const char *name)
 {
 	node_p node;
 	ng_ID_t temp;
 	int	hash;
 
 	/* "." means "this node" */
 	if (strcmp(name, ".") == 0) {
 		NG_NODE_REF(here);
 		return(here);
 	}
 
 	/* Check for name-by-ID */
 	if ((temp = ng_decodeidname(name)) != 0) {
 		return (ng_ID2noderef(temp));
 	}
 
 	/* Find node by name. */
 	hash = hash32_str(name, HASHINIT) & V_ng_name_hmask;
 	NAMEHASH_RLOCK();
 	LIST_FOREACH(node, &V_ng_name_hash[hash], nd_nodes)
 		if (NG_NODE_IS_VALID(node) &&
 		    (strcmp(NG_NODE_NAME(node), name) == 0)) {
 			NG_NODE_REF(node);
 			break;
 		}
 	NAMEHASH_RUNLOCK();
 
 	return (node);
 }
 
 /*
  * Decode an ID name, eg. "[f03034de]". Returns 0 if the
  * string is not valid, otherwise returns the value.
  */
 static ng_ID_t
 ng_decodeidname(const char *name)
 {
 	const int len = strlen(name);
 	char *eptr;
 	u_long val;
 
 	/* Check for proper length, brackets, no leading junk */
 	if ((len < 3) || (name[0] != '[') || (name[len - 1] != ']') ||
 	    (!isxdigit(name[1])))
 		return ((ng_ID_t)0);
 
 	/* Decode number */
 	val = strtoul(name + 1, &eptr, 16);
 	if ((eptr - name != len - 1) || (val == ULONG_MAX) || (val == 0))
 		return ((ng_ID_t)0);
 
 	return ((ng_ID_t)val);
 }
 
 /*
  * Remove a name from a node. This should only be called
  * when shutting down and removing the node.
  */
 void
 ng_unname(node_p node)
 {
 }
 
 /*
  * Allocate a bigger name hash.
  */
 static void
 ng_name_rehash()
 {
 	struct nodehash *new;
 	uint32_t hash;
 	u_long hmask;
 	node_p node, node2;
 	int i;
 
 	new = hashinit_flags((V_ng_name_hmask + 1) * 2, M_NETGRAPH_NODE, &hmask,
 	    HASH_NOWAIT);
 	if (new == NULL)
 		return;
 
 	for (i = 0; i <= V_ng_name_hmask; i++)
 		LIST_FOREACH_SAFE(node, &V_ng_name_hash[i], nd_nodes, node2) {
 #ifdef INVARIANTS
 			LIST_REMOVE(node, nd_nodes);
 #endif
 			hash = hash32_str(NG_NODE_NAME(node), HASHINIT) & hmask;
 			LIST_INSERT_HEAD(&new[hash], node, nd_nodes);
 		}
 
 	hashdestroy(V_ng_name_hash, M_NETGRAPH_NODE, V_ng_name_hmask);
 	V_ng_name_hash = new;
 	V_ng_name_hmask = hmask;
 }
 
 /*
  * Allocate a bigger ID hash.
  */
 static void
 ng_ID_rehash()
 {
 	struct nodehash *new;
 	uint32_t hash;
 	u_long hmask;
 	node_p node, node2;
 	int i;
 
 	new = hashinit_flags((V_ng_ID_hmask + 1) * 2, M_NETGRAPH_NODE, &hmask,
 	    HASH_NOWAIT);
 	if (new == NULL)
 		return;
 
 	for (i = 0; i <= V_ng_ID_hmask; i++)
 		LIST_FOREACH_SAFE(node, &V_ng_ID_hash[i], nd_idnodes, node2) {
 #ifdef INVARIANTS
 			LIST_REMOVE(node, nd_idnodes);
 #endif
 			hash = (node->nd_ID % (hmask + 1));
 			LIST_INSERT_HEAD(&new[hash], node, nd_idnodes);
 		}
 
 	hashdestroy(V_ng_ID_hash, M_NETGRAPH_NODE, V_ng_name_hmask);
 	V_ng_ID_hash = new;
 	V_ng_ID_hmask = hmask;
 }
 
 /************************************************************************
 			Hook routines
  Names are not optional. Hooks are always connected, except for a
  brief moment within these routines. On invalidation or during creation
  they are connected to the 'dead' hook.
 ************************************************************************/
 
 /*
  * Remove a hook reference
  */
 void
 ng_unref_hook(hook_p hook)
 {
 
 	if (hook == &ng_deadhook)
 		return;
 
 	if (refcount_release(&hook->hk_refs)) { /* we were the last */
 		if (_NG_HOOK_NODE(hook)) /* it'll probably be ng_deadnode */
 			_NG_NODE_UNREF((_NG_HOOK_NODE(hook)));
 		NG_FREE_HOOK(hook);
 	}
 }
 
 /*
  * Add an unconnected hook to a node. Only used internally.
  * Assumes node is locked. (XXX not yet true )
  */
 static int
 ng_add_hook(node_p node, const char *name, hook_p *hookp)
 {
 	hook_p hook;
 	int error = 0;
 
 	/* Check that the given name is good */
 	if (name == NULL) {
 		TRAP_ERROR();
 		return (EINVAL);
 	}
 	if (ng_findhook(node, name) != NULL) {
 		TRAP_ERROR();
 		return (EEXIST);
 	}
 
 	/* Allocate the hook and link it up */
 	NG_ALLOC_HOOK(hook);
 	if (hook == NULL) {
 		TRAP_ERROR();
 		return (ENOMEM);
 	}
 	hook->hk_refs = 1;		/* add a reference for us to return */
 	hook->hk_flags = HK_INVALID;
 	hook->hk_peer = &ng_deadhook;	/* start off this way */
 	hook->hk_node = node;
 	NG_NODE_REF(node);		/* each hook counts as a reference */
 
 	/* Set hook name */
 	strlcpy(NG_HOOK_NAME(hook), name, NG_HOOKSIZ);
 
 	/*
 	 * Check if the node type code has something to say about it
 	 * If it fails, the unref of the hook will also unref the node.
 	 */
 	if (node->nd_type->newhook != NULL) {
 		if ((error = (*node->nd_type->newhook)(node, hook, name))) {
 			NG_HOOK_UNREF(hook);	/* this frees the hook */
 			return (error);
 		}
 	}
 	/*
 	 * The 'type' agrees so far, so go ahead and link it in.
 	 * We'll ask again later when we actually connect the hooks.
 	 */
 	LIST_INSERT_HEAD(&node->nd_hooks, hook, hk_hooks);
 	node->nd_numhooks++;
 	NG_HOOK_REF(hook);	/* one for the node */
 
 	if (hookp)
 		*hookp = hook;
 	return (0);
 }
 
 /*
  * Find a hook
  *
  * Node types may supply their own optimized routines for finding
  * hooks.  If none is supplied, we just do a linear search.
  * XXX Possibly we should add a reference to the hook?
  */
 hook_p
 ng_findhook(node_p node, const char *name)
 {
 	hook_p hook;
 
 	if (node->nd_type->findhook != NULL)
 		return (*node->nd_type->findhook)(node, name);
 	LIST_FOREACH(hook, &node->nd_hooks, hk_hooks) {
 		if (NG_HOOK_IS_VALID(hook) &&
 		    (strcmp(NG_HOOK_NAME(hook), name) == 0))
 			return (hook);
 	}
 	return (NULL);
 }
 
 /*
  * Destroy a hook
  *
  * As hooks are always attached, this really destroys two hooks.
  * The one given, and the one attached to it. Disconnect the hooks
  * from each other first. We reconnect the peer hook to the 'dead'
  * hook so that it can still exist after we depart. We then
  * send the peer its own destroy message. This ensures that we only
  * interact with the peer's structures when it is locked processing that
  * message. We hold a reference to the peer hook so we are guaranteed that
  * the peer hook and node are still going to exist until
  * we are finished there as the hook holds a ref on the node.
  * We run this same code again on the peer hook, but that time it is already
  * attached to the 'dead' hook.
  *
  * This routine is called at all stages of hook creation
  * on error detection and must be able to handle any such stage.
  */
 void
 ng_destroy_hook(hook_p hook)
 {
 	hook_p peer;
 	node_p node;
 
 	if (hook == &ng_deadhook) {	/* better safe than sorry */
 		printf("ng_destroy_hook called on deadhook\n");
 		return;
 	}
 
 	/*
 	 * Protect divorce process with mutex, to avoid races on
 	 * simultaneous disconnect.
 	 */
 	TOPOLOGY_WLOCK();
 
 	hook->hk_flags |= HK_INVALID;
 
 	peer = NG_HOOK_PEER(hook);
 	node = NG_HOOK_NODE(hook);
 
 	if (peer && (peer != &ng_deadhook)) {
 		/*
 		 * Set the peer to point to ng_deadhook
 		 * from this moment on we are effectively independent it.
 		 * send it an rmhook message of it's own.
 		 */
 		peer->hk_peer = &ng_deadhook;	/* They no longer know us */
 		hook->hk_peer = &ng_deadhook;	/* Nor us, them */
 		if (NG_HOOK_NODE(peer) == &ng_deadnode) {
 			/*
 			 * If it's already divorced from a node,
 			 * just free it.
 			 */
 			TOPOLOGY_WUNLOCK();
 		} else {
 			TOPOLOGY_WUNLOCK();
 			ng_rmhook_self(peer); 	/* Send it a surprise */
 		}
 		NG_HOOK_UNREF(peer);		/* account for peer link */
 		NG_HOOK_UNREF(hook);		/* account for peer link */
 	} else
 		TOPOLOGY_WUNLOCK();
 
 	TOPOLOGY_NOTOWNED();
 
 	/*
 	 * Remove the hook from the node's list to avoid possible recursion
 	 * in case the disconnection results in node shutdown.
 	 */
 	if (node == &ng_deadnode) { /* happens if called from ng_con_nodes() */
 		return;
 	}
 	LIST_REMOVE(hook, hk_hooks);
 	node->nd_numhooks--;
 	if (node->nd_type->disconnect) {
 		/*
 		 * The type handler may elect to destroy the node so don't
 		 * trust its existence after this point. (except
 		 * that we still hold a reference on it. (which we
 		 * inherrited from the hook we are destroying)
 		 */
 		(*node->nd_type->disconnect) (hook);
 	}
 
 	/*
 	 * Note that because we will point to ng_deadnode, the original node
 	 * is not decremented automatically so we do that manually.
 	 */
 	_NG_HOOK_NODE(hook) = &ng_deadnode;
 	NG_NODE_UNREF(node);	/* We no longer point to it so adjust count */
 	NG_HOOK_UNREF(hook);	/* Account for linkage (in list) to node */
 }
 
 /*
  * Take two hooks on a node and merge the connection so that the given node
  * is effectively bypassed.
  */
 int
 ng_bypass(hook_p hook1, hook_p hook2)
 {
 	if (hook1->hk_node != hook2->hk_node) {
 		TRAP_ERROR();
 		return (EINVAL);
 	}
 	TOPOLOGY_WLOCK();
 	if (NG_HOOK_NOT_VALID(hook1) || NG_HOOK_NOT_VALID(hook2)) {
 		TOPOLOGY_WUNLOCK();
 		return (EINVAL);
 	}
 	hook1->hk_peer->hk_peer = hook2->hk_peer;
 	hook2->hk_peer->hk_peer = hook1->hk_peer;
 
 	hook1->hk_peer = &ng_deadhook;
 	hook2->hk_peer = &ng_deadhook;
 	TOPOLOGY_WUNLOCK();
 
 	NG_HOOK_UNREF(hook1);
 	NG_HOOK_UNREF(hook2);
 
 	/* XXX If we ever cache methods on hooks update them as well */
 	ng_destroy_hook(hook1);
 	ng_destroy_hook(hook2);
 	return (0);
 }
 
 /*
  * Install a new netgraph type
  */
 int
 ng_newtype(struct ng_type *tp)
 {
 	const size_t namelen = strlen(tp->name);
 
 	/* Check version and type name fields */
 	if ((tp->version != NG_ABI_VERSION) || (namelen == 0) ||
 	    (namelen >= NG_TYPESIZ)) {
 		TRAP_ERROR();
 		if (tp->version != NG_ABI_VERSION) {
 			printf("Netgraph: Node type rejected. ABI mismatch. "
 			    "Suggest recompile\n");
 		}
 		return (EINVAL);
 	}
 
 	/* Check for name collision */
 	if (ng_findtype(tp->name) != NULL) {
 		TRAP_ERROR();
 		return (EEXIST);
 	}
 
 	/* Link in new type */
 	TYPELIST_WLOCK();
 	LIST_INSERT_HEAD(&ng_typelist, tp, types);
 	tp->refs = 1;	/* first ref is linked list */
 	TYPELIST_WUNLOCK();
 	return (0);
 }
 
 /*
  * unlink a netgraph type
  * If no examples exist
  */
 int
 ng_rmtype(struct ng_type *tp)
 {
 	/* Check for name collision */
 	if (tp->refs != 1) {
 		TRAP_ERROR();
 		return (EBUSY);
 	}
 
 	/* Unlink type */
 	TYPELIST_WLOCK();
 	LIST_REMOVE(tp, types);
 	TYPELIST_WUNLOCK();
 	return (0);
 }
 
 /*
  * Look for a type of the name given
  */
 struct ng_type *
 ng_findtype(const char *typename)
 {
 	struct ng_type *type;
 
 	TYPELIST_RLOCK();
 	LIST_FOREACH(type, &ng_typelist, types) {
 		if (strcmp(type->name, typename) == 0)
 			break;
 	}
 	TYPELIST_RUNLOCK();
 	return (type);
 }
 
 /************************************************************************
 			Composite routines
 ************************************************************************/
 /*
  * Connect two nodes using the specified hooks, using queued functions.
  */
 static int
 ng_con_part3(node_p node, item_p item, hook_p hook)
 {
 	int	error = 0;
 
 	/*
 	 * When we run, we know that the node 'node' is locked for us.
 	 * Our caller has a reference on the hook.
 	 * Our caller has a reference on the node.
 	 * (In this case our caller is ng_apply_item() ).
 	 * The peer hook has a reference on the hook.
 	 * We are all set up except for the final call to the node, and
 	 * the clearing of the INVALID flag.
 	 */
 	if (NG_HOOK_NODE(hook) == &ng_deadnode) {
 		/*
 		 * The node must have been freed again since we last visited
 		 * here. ng_destry_hook() has this effect but nothing else does.
 		 * We should just release our references and
 		 * free anything we can think of.
 		 * Since we know it's been destroyed, and it's our caller
 		 * that holds the references, just return.
 		 */
 		ERROUT(ENOENT);
 	}
 	if (hook->hk_node->nd_type->connect) {
 		if ((error = (*hook->hk_node->nd_type->connect) (hook))) {
 			ng_destroy_hook(hook);	/* also zaps peer */
 			printf("failed in ng_con_part3()\n");
 			ERROUT(error);
 		}
 	}
 	/*
 	 *  XXX this is wrong for SMP. Possibly we need
 	 * to separate out 'create' and 'invalid' flags.
 	 * should only set flags on hooks we have locked under our node.
 	 */
 	hook->hk_flags &= ~HK_INVALID;
 done:
 	NG_FREE_ITEM(item);
 	return (error);
 }
 
 static int
 ng_con_part2(node_p node, item_p item, hook_p hook)
 {
 	hook_p	peer;
 	int	error = 0;
 
 	/*
 	 * When we run, we know that the node 'node' is locked for us.
 	 * Our caller has a reference on the hook.
 	 * Our caller has a reference on the node.
 	 * (In this case our caller is ng_apply_item() ).
 	 * The peer hook has a reference on the hook.
 	 * our node pointer points to the 'dead' node.
 	 * First check the hook name is unique.
 	 * Should not happen because we checked before queueing this.
 	 */
 	if (ng_findhook(node, NG_HOOK_NAME(hook)) != NULL) {
 		TRAP_ERROR();
 		ng_destroy_hook(hook); /* should destroy peer too */
 		printf("failed in ng_con_part2()\n");
 		ERROUT(EEXIST);
 	}
 	/*
 	 * Check if the node type code has something to say about it
 	 * If it fails, the unref of the hook will also unref the attached node,
 	 * however since that node is 'ng_deadnode' this will do nothing.
 	 * The peer hook will also be destroyed.
 	 */
 	if (node->nd_type->newhook != NULL) {
 		if ((error = (*node->nd_type->newhook)(node, hook,
 		    hook->hk_name))) {
 			ng_destroy_hook(hook); /* should destroy peer too */
 			printf("failed in ng_con_part2()\n");
 			ERROUT(error);
 		}
 	}
 
 	/*
 	 * The 'type' agrees so far, so go ahead and link it in.
 	 * We'll ask again later when we actually connect the hooks.
 	 */
 	hook->hk_node = node;		/* just overwrite ng_deadnode */
 	NG_NODE_REF(node);		/* each hook counts as a reference */
 	LIST_INSERT_HEAD(&node->nd_hooks, hook, hk_hooks);
 	node->nd_numhooks++;
 	NG_HOOK_REF(hook);	/* one for the node */
 	
 	/*
 	 * We now have a symmetrical situation, where both hooks have been
 	 * linked to their nodes, the newhook methods have been called
 	 * And the references are all correct. The hooks are still marked
 	 * as invalid, as we have not called the 'connect' methods
 	 * yet.
 	 * We can call the local one immediately as we have the
 	 * node locked, but we need to queue the remote one.
 	 */
 	if (hook->hk_node->nd_type->connect) {
 		if ((error = (*hook->hk_node->nd_type->connect) (hook))) {
 			ng_destroy_hook(hook);	/* also zaps peer */
 			printf("failed in ng_con_part2(A)\n");
 			ERROUT(error);
 		}
 	}
 
 	/*
 	 * Acquire topo mutex to avoid race with ng_destroy_hook().
 	 */
 	TOPOLOGY_RLOCK();
 	peer = hook->hk_peer;
 	if (peer == &ng_deadhook) {
 		TOPOLOGY_RUNLOCK();
 		printf("failed in ng_con_part2(B)\n");
 		ng_destroy_hook(hook);
 		ERROUT(ENOENT);
 	}
 	TOPOLOGY_RUNLOCK();
 
 	if ((error = ng_send_fn2(peer->hk_node, peer, item, &ng_con_part3,
 	    NULL, 0, NG_REUSE_ITEM))) {
 		printf("failed in ng_con_part2(C)\n");
 		ng_destroy_hook(hook);	/* also zaps peer */
 		return (error);		/* item was consumed. */
 	}
 	hook->hk_flags &= ~HK_INVALID; /* need both to be able to work */
 	return (0);			/* item was consumed. */
 done:
 	NG_FREE_ITEM(item);
 	return (error);
 }
 
 /*
  * Connect this node with another node. We assume that this node is
  * currently locked, as we are only called from an NGM_CONNECT message.
  */
 static int
 ng_con_nodes(item_p item, node_p node, const char *name,
     node_p node2, const char *name2)
 {
 	int	error;
 	hook_p	hook;
 	hook_p	hook2;
 
 	if (ng_findhook(node2, name2) != NULL) {
 		return(EEXIST);
 	}
 	if ((error = ng_add_hook(node, name, &hook)))  /* gives us a ref */
 		return (error);
 	/* Allocate the other hook and link it up */
 	NG_ALLOC_HOOK(hook2);
 	if (hook2 == NULL) {
 		TRAP_ERROR();
 		ng_destroy_hook(hook);	/* XXX check ref counts so far */
 		NG_HOOK_UNREF(hook);	/* including our ref */
 		return (ENOMEM);
 	}
 	hook2->hk_refs = 1;		/* start with a reference for us. */
 	hook2->hk_flags = HK_INVALID;
 	hook2->hk_peer = hook;		/* Link the two together */
 	hook->hk_peer = hook2;	
 	NG_HOOK_REF(hook);		/* Add a ref for the peer to each*/
 	NG_HOOK_REF(hook2);
 	hook2->hk_node = &ng_deadnode;
 	strlcpy(NG_HOOK_NAME(hook2), name2, NG_HOOKSIZ);
 
 	/*
 	 * Queue the function above.
 	 * Procesing continues in that function in the lock context of
 	 * the other node.
 	 */
 	if ((error = ng_send_fn2(node2, hook2, item, &ng_con_part2, NULL, 0,
 	    NG_NOFLAGS))) {
 		printf("failed in ng_con_nodes(): %d\n", error);
 		ng_destroy_hook(hook);	/* also zaps peer */
 	}
 
 	NG_HOOK_UNREF(hook);		/* Let each hook go if it wants to */
 	NG_HOOK_UNREF(hook2);
 	return (error);
 }
 
 /*
  * Make a peer and connect.
  * We assume that the local node is locked.
  * The new node probably doesn't need a lock until
  * it has a hook, because it cannot really have any work until then,
  * but we should think about it a bit more.
  *
  * The problem may come if the other node also fires up
  * some hardware or a timer or some other source of activation,
  * also it may already get a command msg via it's ID.
  *
  * We could use the same method as ng_con_nodes() but we'd have
  * to add ability to remove the node when failing. (Not hard, just
  * make arg1 point to the node to remove).
  * Unless of course we just ignore failure to connect and leave
  * an unconnected node?
  */
 static int
 ng_mkpeer(node_p node, const char *name, const char *name2, char *type)
 {
 	node_p	node2;
 	hook_p	hook1, hook2;
 	int	error;
 
 	if ((error = ng_make_node(type, &node2))) {
 		return (error);
 	}
 
 	if ((error = ng_add_hook(node, name, &hook1))) { /* gives us a ref */
 		ng_rmnode(node2, NULL, NULL, 0);
 		return (error);
 	}
 
 	if ((error = ng_add_hook(node2, name2, &hook2))) {
 		ng_rmnode(node2, NULL, NULL, 0);
 		ng_destroy_hook(hook1);
 		NG_HOOK_UNREF(hook1);
 		return (error);
 	}
 
 	/*
 	 * Actually link the two hooks together.
 	 */
 	hook1->hk_peer = hook2;
 	hook2->hk_peer = hook1;
 
 	/* Each hook is referenced by the other */
 	NG_HOOK_REF(hook1);
 	NG_HOOK_REF(hook2);
 
 	/* Give each node the opportunity to veto the pending connection */
 	if (hook1->hk_node->nd_type->connect) {
 		error = (*hook1->hk_node->nd_type->connect) (hook1);
 	}
 
 	if ((error == 0) && hook2->hk_node->nd_type->connect) {
 		error = (*hook2->hk_node->nd_type->connect) (hook2);
 
 	}
 
 	/*
 	 * drop the references we were holding on the two hooks.
 	 */
 	if (error) {
 		ng_destroy_hook(hook2);	/* also zaps hook1 */
 		ng_rmnode(node2, NULL, NULL, 0);
 	} else {
 		/* As a last act, allow the hooks to be used */
 		hook1->hk_flags &= ~HK_INVALID;
 		hook2->hk_flags &= ~HK_INVALID;
 	}
 	NG_HOOK_UNREF(hook1);
 	NG_HOOK_UNREF(hook2);
 	return (error);
 }
 
 /************************************************************************
 		Utility routines to send self messages
 ************************************************************************/
 	
 /* Shut this node down as soon as everyone is clear of it */
 /* Should add arg "immediately" to jump the queue */
 int
 ng_rmnode_self(node_p node)
 {
 	int		error;
 
 	if (node == &ng_deadnode)
 		return (0);
 	node->nd_flags |= NGF_INVALID;
 	if (node->nd_flags & NGF_CLOSING)
 		return (0);
 
 	error = ng_send_fn(node, NULL, &ng_rmnode, NULL, 0);
 	return (error);
 }
 
 static void
 ng_rmhook_part2(node_p node, hook_p hook, void *arg1, int arg2)
 {
 	ng_destroy_hook(hook);
 	return ;
 }
 
 int
 ng_rmhook_self(hook_p hook)
 {
 	int		error;
 	node_p node = NG_HOOK_NODE(hook);
 
 	if (node == &ng_deadnode)
 		return (0);
 
 	error = ng_send_fn(node, hook, &ng_rmhook_part2, NULL, 0);
 	return (error);
 }
 
 /***********************************************************************
  * Parse and verify a string of the form:  <NODE:><PATH>
  *
  * Such a string can refer to a specific node or a specific hook
  * on a specific node, depending on how you look at it. In the
  * latter case, the PATH component must not end in a dot.
  *
  * Both <NODE:> and <PATH> are optional. The <PATH> is a string
  * of hook names separated by dots. This breaks out the original
  * string, setting *nodep to "NODE" (or NULL if none) and *pathp
  * to "PATH" (or NULL if degenerate). Also, *hookp will point to
  * the final hook component of <PATH>, if any, otherwise NULL.
  *
  * This returns -1 if the path is malformed. The char ** are optional.
  ***********************************************************************/
 int
 ng_path_parse(char *addr, char **nodep, char **pathp, char **hookp)
 {
 	char	*node, *path, *hook;
 	int	k;
 
 	/*
 	 * Extract absolute NODE, if any
 	 */
 	for (path = addr; *path && *path != ':'; path++);
 	if (*path) {
 		node = addr;	/* Here's the NODE */
 		*path++ = '\0';	/* Here's the PATH */
 
 		/* Node name must not be empty */
 		if (!*node)
 			return -1;
 
 		/* A name of "." is OK; otherwise '.' not allowed */
 		if (strcmp(node, ".") != 0) {
 			for (k = 0; node[k]; k++)
 				if (node[k] == '.')
 					return -1;
 		}
 	} else {
 		node = NULL;	/* No absolute NODE */
 		path = addr;	/* Here's the PATH */
 	}
 
 	/* Snoop for illegal characters in PATH */
 	for (k = 0; path[k]; k++)
 		if (path[k] == ':')
 			return -1;
 
 	/* Check for no repeated dots in PATH */
 	for (k = 0; path[k]; k++)
 		if (path[k] == '.' && path[k + 1] == '.')
 			return -1;
 
 	/* Remove extra (degenerate) dots from beginning or end of PATH */
 	if (path[0] == '.')
 		path++;
 	if (*path && path[strlen(path) - 1] == '.')
 		path[strlen(path) - 1] = 0;
 
 	/* If PATH has a dot, then we're not talking about a hook */
 	if (*path) {
 		for (hook = path, k = 0; path[k]; k++)
 			if (path[k] == '.') {
 				hook = NULL;
 				break;
 			}
 	} else
 		path = hook = NULL;
 
 	/* Done */
 	if (nodep)
 		*nodep = node;
 	if (pathp)
 		*pathp = path;
 	if (hookp)
 		*hookp = hook;
 	return (0);
 }
 
 /*
  * Given a path, which may be absolute or relative, and a starting node,
  * return the destination node.
  */
 int
 ng_path2noderef(node_p here, const char *address, node_p *destp,
     hook_p *lasthook)
 {
 	char    fullpath[NG_PATHSIZ];
 	char   *nodename, *path;
 	node_p  node, oldnode;
 
 	/* Initialize */
 	if (destp == NULL) {
 		TRAP_ERROR();
 		return EINVAL;
 	}
 	*destp = NULL;
 
 	/* Make a writable copy of address for ng_path_parse() */
 	strncpy(fullpath, address, sizeof(fullpath) - 1);
 	fullpath[sizeof(fullpath) - 1] = '\0';
 
 	/* Parse out node and sequence of hooks */
 	if (ng_path_parse(fullpath, &nodename, &path, NULL) < 0) {
 		TRAP_ERROR();
 		return EINVAL;
 	}
 
 	/*
 	 * For an absolute address, jump to the starting node.
 	 * Note that this holds a reference on the node for us.
 	 * Don't forget to drop the reference if we don't need it.
 	 */
 	if (nodename) {
 		node = ng_name2noderef(here, nodename);
 		if (node == NULL) {
 			TRAP_ERROR();
 			return (ENOENT);
 		}
 	} else {
 		if (here == NULL) {
 			TRAP_ERROR();
 			return (EINVAL);
 		}
 		node = here;
 		NG_NODE_REF(node);
 	}
 
 	if (path == NULL) {
 		if (lasthook != NULL)
 			*lasthook = NULL;
 		*destp = node;
 		return (0);
 	}
 
 	/*
 	 * Now follow the sequence of hooks
 	 *
 	 * XXXGL: The path may demolish as we go the sequence, but if
 	 * we hold the topology mutex at critical places, then, I hope,
 	 * we would always have valid pointers in hand, although the
 	 * path behind us may no longer exist.
 	 */
 	for (;;) {
 		hook_p hook;
 		char *segment;
 
 		/*
 		 * Break out the next path segment. Replace the dot we just
 		 * found with a NUL; "path" points to the next segment (or the
 		 * NUL at the end).
 		 */
 		for (segment = path; *path != '\0'; path++) {
 			if (*path == '.') {
 				*path++ = '\0';
 				break;
 			}
 		}
 
 		/* We have a segment, so look for a hook by that name */
 		hook = ng_findhook(node, segment);
 
 		TOPOLOGY_WLOCK();
 		/* Can't get there from here... */
 		if (hook == NULL || NG_HOOK_PEER(hook) == NULL ||
 		    NG_HOOK_NOT_VALID(hook) ||
 		    NG_HOOK_NOT_VALID(NG_HOOK_PEER(hook))) {
 			TRAP_ERROR();
 			NG_NODE_UNREF(node);
 			TOPOLOGY_WUNLOCK();
 			return (ENOENT);
 		}
 
 		/*
 		 * Hop on over to the next node
 		 * XXX
 		 * Big race conditions here as hooks and nodes go away
 		 * *** Idea.. store an ng_ID_t in each hook and use that
 		 * instead of the direct hook in this crawl?
 		 */
 		oldnode = node;
 		if ((node = NG_PEER_NODE(hook)))
 			NG_NODE_REF(node);	/* XXX RACE */
 		NG_NODE_UNREF(oldnode);	/* XXX another race */
 		if (NG_NODE_NOT_VALID(node)) {
 			NG_NODE_UNREF(node);	/* XXX more races */
 			TOPOLOGY_WUNLOCK();
 			TRAP_ERROR();
 			return (ENXIO);
 		}
 
 		if (*path == '\0') {
 			if (lasthook != NULL) {
 				if (hook != NULL) {
 					*lasthook = NG_HOOK_PEER(hook);
 					NG_HOOK_REF(*lasthook);
 				} else
 					*lasthook = NULL;
 			}
 			TOPOLOGY_WUNLOCK();
 			*destp = node;
 			return (0);
 		}
 		TOPOLOGY_WUNLOCK();
 	}
 }
 
 /***************************************************************\
 * Input queue handling.
 * All activities are submitted to the node via the input queue
 * which implements a multiple-reader/single-writer gate.
 * Items which cannot be handled immediately are queued.
 *
 * read-write queue locking inline functions			*
 \***************************************************************/
 
 static __inline void	ng_queue_rw(node_p node, item_p  item, int rw);
 static __inline item_p	ng_dequeue(node_p node, int *rw);
 static __inline item_p	ng_acquire_read(node_p node, item_p  item);
 static __inline item_p	ng_acquire_write(node_p node, item_p  item);
 static __inline void	ng_leave_read(node_p node);
 static __inline void	ng_leave_write(node_p node);
 
 /*
  * Definition of the bits fields in the ng_queue flag word.
  * Defined here rather than in netgraph.h because no-one should fiddle
  * with them.
  *
  * The ordering here may be important! don't shuffle these.
  */
 /*-
  Safety Barrier--------+ (adjustable to suit taste) (not used yet)
                        |
                        V
 +-------+-------+-------+-------+-------+-------+-------+-------+
   | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
   | |A|c|t|i|v|e| |R|e|a|d|e|r| |C|o|u|n|t| | | | | | | | | |P|A|
   | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |O|W|
 +-------+-------+-------+-------+-------+-------+-------+-------+
   \___________________________ ____________________________/ | |
                             V                                | |
                   [active reader count]                      | |
                                                              | |
             Operation Pending -------------------------------+ |
                                                                |
           Active Writer ---------------------------------------+
 
 Node queue has such semantics:
 - All flags modifications are atomic.
 - Reader count can be incremented only if there is no writer or pending flags.
   As soon as this can't be done with single operation, it is implemented with
   spin loop and atomic_cmpset().
 - Writer flag can be set only if there is no any bits set.
   It is implemented with atomic_cmpset().
 - Pending flag can be set any time, but to avoid collision on queue processing
   all queue fields are protected by the mutex.
 - Queue processing thread reads queue holding the mutex, but releases it while
   processing. When queue is empty pending flag is removed.
 */
 
 #define WRITER_ACTIVE	0x00000001
 #define OP_PENDING	0x00000002
 #define READER_INCREMENT 0x00000004
 #define READER_MASK	0xfffffffc	/* Not valid if WRITER_ACTIVE is set */
 #define SAFETY_BARRIER	0x00100000	/* 128K items queued should be enough */
 
 /* Defines of more elaborate states on the queue */
 /* Mask of bits a new read cares about */
 #define NGQ_RMASK	(WRITER_ACTIVE|OP_PENDING)
 
 /* Mask of bits a new write cares about */
 #define NGQ_WMASK	(NGQ_RMASK|READER_MASK)
 
 /* Test to decide if there is something on the queue. */
 #define QUEUE_ACTIVE(QP) ((QP)->q_flags & OP_PENDING)
 
 /* How to decide what the next queued item is. */
 #define HEAD_IS_READER(QP)  NGI_QUEUED_READER(STAILQ_FIRST(&(QP)->queue))
 #define HEAD_IS_WRITER(QP)  NGI_QUEUED_WRITER(STAILQ_FIRST(&(QP)->queue)) /* notused */
 
 /* Read the status to decide if the next item on the queue can now run. */
 #define QUEUED_READER_CAN_PROCEED(QP)			\
 		(((QP)->q_flags & (NGQ_RMASK & ~OP_PENDING)) == 0)
 #define QUEUED_WRITER_CAN_PROCEED(QP)			\
 		(((QP)->q_flags & (NGQ_WMASK & ~OP_PENDING)) == 0)
 
 /* Is there a chance of getting ANY work off the queue? */
 #define NEXT_QUEUED_ITEM_CAN_PROCEED(QP)				\
 	((HEAD_IS_READER(QP)) ? QUEUED_READER_CAN_PROCEED(QP) :		\
 				QUEUED_WRITER_CAN_PROCEED(QP))
 
 #define NGQRW_R 0
 #define NGQRW_W 1
 
 #define NGQ2_WORKQ	0x00000001
 
 /*
  * Taking into account the current state of the queue and node, possibly take
  * the next entry off the queue and return it. Return NULL if there was
  * nothing we could return, either because there really was nothing there, or
  * because the node was in a state where it cannot yet process the next item
  * on the queue.
  */
 static __inline item_p
 ng_dequeue(node_p node, int *rw)
 {
 	item_p item;
 	struct ng_queue *ngq = &node->nd_input_queue;
 
 	/* This MUST be called with the mutex held. */
 	mtx_assert(&ngq->q_mtx, MA_OWNED);
 
 	/* If there is nothing queued, then just return. */
 	if (!QUEUE_ACTIVE(ngq)) {
 		CTR4(KTR_NET, "%20s: node [%x] (%p) queue empty; "
 		    "queue flags 0x%lx", __func__,
 		    node->nd_ID, node, ngq->q_flags);
 		return (NULL);
 	}
 
 	/*
 	 * From here, we can assume there is a head item.
 	 * We need to find out what it is and if it can be dequeued, given
 	 * the current state of the node.
 	 */
 	if (HEAD_IS_READER(ngq)) {
 		while (1) {
 			long t = ngq->q_flags;
 			if (t & WRITER_ACTIVE) {
 				/* There is writer, reader can't proceed. */
 				CTR4(KTR_NET, "%20s: node [%x] (%p) queued "
 				    "reader can't proceed; queue flags 0x%lx",
 				    __func__, node->nd_ID, node, t);
 				return (NULL);
 			}
 			if (atomic_cmpset_acq_int(&ngq->q_flags, t,
 			    t + READER_INCREMENT))
 				break;
 			cpu_spinwait();
 		}
 		/* We have got reader lock for the node. */
 		*rw = NGQRW_R;
 	} else if (atomic_cmpset_acq_int(&ngq->q_flags, OP_PENDING,
 	    OP_PENDING + WRITER_ACTIVE)) {
 		/* We have got writer lock for the node. */
 		*rw = NGQRW_W;
 	} else {
 		/* There is somebody other, writer can't proceed. */
 		CTR4(KTR_NET, "%20s: node [%x] (%p) queued writer can't "
 		    "proceed; queue flags 0x%lx", __func__, node->nd_ID, node,
 		    ngq->q_flags);
 		return (NULL);
 	}
 
 	/*
 	 * Now we dequeue the request (whatever it may be) and correct the
 	 * pending flags and the next and last pointers.
 	 */
 	item = STAILQ_FIRST(&ngq->queue);
 	STAILQ_REMOVE_HEAD(&ngq->queue, el_next);
 	if (STAILQ_EMPTY(&ngq->queue))
 		atomic_clear_int(&ngq->q_flags, OP_PENDING);
 	CTR6(KTR_NET, "%20s: node [%x] (%p) returning item %p as %s; queue "
 	    "flags 0x%lx", __func__, node->nd_ID, node, item, *rw ? "WRITER" :
 	    "READER", ngq->q_flags);
 	return (item);
 }
 
 /*
  * Queue a packet to be picked up later by someone else.
  * If the queue could be run now, add node to the queue handler's worklist.
  */
 static __inline void
 ng_queue_rw(node_p node, item_p  item, int rw)
 {
 	struct ng_queue *ngq = &node->nd_input_queue;
 	if (rw == NGQRW_W)
 		NGI_SET_WRITER(item);
 	else
 		NGI_SET_READER(item);
 	item->depth = 1;
 
 	NG_QUEUE_LOCK(ngq);
 	/* Set OP_PENDING flag and enqueue the item. */
 	atomic_set_int(&ngq->q_flags, OP_PENDING);
 	STAILQ_INSERT_TAIL(&ngq->queue, item, el_next);
 
 	CTR5(KTR_NET, "%20s: node [%x] (%p) queued item %p as %s", __func__,
 	    node->nd_ID, node, item, rw ? "WRITER" : "READER" );
 
 	/*
 	 * We can take the worklist lock with the node locked
 	 * BUT NOT THE REVERSE!
 	 */
 	if (NEXT_QUEUED_ITEM_CAN_PROCEED(ngq))
 		ng_worklist_add(node);
 	NG_QUEUE_UNLOCK(ngq);
 }
 
 /* Acquire reader lock on node. If node is busy, queue the packet. */
 static __inline item_p
 ng_acquire_read(node_p node, item_p item)
 {
 	KASSERT(node != &ng_deadnode,
 	    ("%s: working on deadnode", __func__));
 
 	/* Reader needs node without writer and pending items. */
 	for (;;) {
 		long t = node->nd_input_queue.q_flags;
 		if (t & NGQ_RMASK)
 			break; /* Node is not ready for reader. */
 		if (atomic_cmpset_acq_int(&node->nd_input_queue.q_flags, t,
 		    t + READER_INCREMENT)) {
 	    		/* Successfully grabbed node */
 			CTR4(KTR_NET, "%20s: node [%x] (%p) acquired item %p",
 			    __func__, node->nd_ID, node, item);
 			return (item);
 		}
 		cpu_spinwait();
 	}
 
 	/* Queue the request for later. */
 	ng_queue_rw(node, item, NGQRW_R);
 
 	return (NULL);
 }
 
 /* Acquire writer lock on node. If node is busy, queue the packet. */
 static __inline item_p
 ng_acquire_write(node_p node, item_p item)
 {
 	KASSERT(node != &ng_deadnode,
 	    ("%s: working on deadnode", __func__));
 
 	/* Writer needs completely idle node. */
 	if (atomic_cmpset_acq_int(&node->nd_input_queue.q_flags, 0,
 	    WRITER_ACTIVE)) {
 	    	/* Successfully grabbed node */
 		CTR4(KTR_NET, "%20s: node [%x] (%p) acquired item %p",
 		    __func__, node->nd_ID, node, item);
 		return (item);
 	}
 
 	/* Queue the request for later. */
 	ng_queue_rw(node, item, NGQRW_W);
 
 	return (NULL);
 }
 
 #if 0
 static __inline item_p
 ng_upgrade_write(node_p node, item_p item)
 {
 	struct ng_queue *ngq = &node->nd_input_queue;
 	KASSERT(node != &ng_deadnode,
 	    ("%s: working on deadnode", __func__));
 
 	NGI_SET_WRITER(item);
 
 	NG_QUEUE_LOCK(ngq);
 
 	/*
 	 * There will never be no readers as we are there ourselves.
 	 * Set the WRITER_ACTIVE flags ASAP to block out fast track readers.
 	 * The caller we are running from will call ng_leave_read()
 	 * soon, so we must account for that. We must leave again with the
 	 * READER lock. If we find other readers, then
 	 * queue the request for later. However "later" may be rignt now
 	 * if there are no readers. We don't really care if there are queued
 	 * items as we will bypass them anyhow.
 	 */
 	atomic_add_int(&ngq->q_flags, WRITER_ACTIVE - READER_INCREMENT);
 	if ((ngq->q_flags & (NGQ_WMASK & ~OP_PENDING)) == WRITER_ACTIVE) {
 		NG_QUEUE_UNLOCK(ngq);
 		
 		/* It's just us, act on the item. */
 		/* will NOT drop writer lock when done */
 		ng_apply_item(node, item, 0);
 
 		/*
 		 * Having acted on the item, atomically
 		 * downgrade back to READER and finish up.
 	 	 */
 		atomic_add_int(&ngq->q_flags, READER_INCREMENT - WRITER_ACTIVE);
 
 		/* Our caller will call ng_leave_read() */
 		return;
 	}
 	/*
 	 * It's not just us active, so queue us AT THE HEAD.
 	 * "Why?" I hear you ask.
 	 * Put us at the head of the queue as we've already been
 	 * through it once. If there is nothing else waiting,
 	 * set the correct flags.
 	 */
 	if (STAILQ_EMPTY(&ngq->queue)) {
 		/* We've gone from, 0 to 1 item in the queue */
 		atomic_set_int(&ngq->q_flags, OP_PENDING);
 
 		CTR3(KTR_NET, "%20s: node [%x] (%p) set OP_PENDING", __func__,
 		    node->nd_ID, node);
 	};
 	STAILQ_INSERT_HEAD(&ngq->queue, item, el_next);
 	CTR4(KTR_NET, "%20s: node [%x] (%p) requeued item %p as WRITER",
 	    __func__, node->nd_ID, node, item );
 
 	/* Reverse what we did above. That downgrades us back to reader */
 	atomic_add_int(&ngq->q_flags, READER_INCREMENT - WRITER_ACTIVE);
 	if (QUEUE_ACTIVE(ngq) && NEXT_QUEUED_ITEM_CAN_PROCEED(ngq))
 		ng_worklist_add(node);
 	NG_QUEUE_UNLOCK(ngq);
 
 	return;
 }
 #endif
 
 /* Release reader lock. */
 static __inline void
 ng_leave_read(node_p node)
 {
 	atomic_subtract_rel_int(&node->nd_input_queue.q_flags, READER_INCREMENT);
 }
 
 /* Release writer lock. */
 static __inline void
 ng_leave_write(node_p node)
 {
 	atomic_clear_rel_int(&node->nd_input_queue.q_flags, WRITER_ACTIVE);
 }
 
 /* Purge node queue. Called on node shutdown. */
 static void
 ng_flush_input_queue(node_p node)
 {
 	struct ng_queue *ngq = &node->nd_input_queue;
 	item_p item;
 
 	NG_QUEUE_LOCK(ngq);
 	while ((item = STAILQ_FIRST(&ngq->queue)) != NULL) {
 		STAILQ_REMOVE_HEAD(&ngq->queue, el_next);
 		if (STAILQ_EMPTY(&ngq->queue))
 			atomic_clear_int(&ngq->q_flags, OP_PENDING);
 		NG_QUEUE_UNLOCK(ngq);
 
 		/* If the item is supplying a callback, call it with an error */
 		if (item->apply != NULL) {
 			if (item->depth == 1)
 				item->apply->error = ENOENT;
 			if (refcount_release(&item->apply->refs)) {
 				(*item->apply->apply)(item->apply->context,
 				    item->apply->error);
 			}
 		}
 		NG_FREE_ITEM(item);
 		NG_QUEUE_LOCK(ngq);
 	}
 	NG_QUEUE_UNLOCK(ngq);
 }
 
 /***********************************************************************
 * Externally visible method for sending or queueing messages or data.
 ***********************************************************************/
 
 /*
  * The module code should have filled out the item correctly by this stage:
  * Common:
  *    reference to destination node.
  *    Reference to destination rcv hook if relevant.
  *    apply pointer must be or NULL or reference valid struct ng_apply_info.
  * Data:
  *    pointer to mbuf
  * Control_Message:
  *    pointer to msg.
  *    ID of original sender node. (return address)
  * Function:
  *    Function pointer
  *    void * argument
  *    integer argument
  *
  * The nodes have several routines and macros to help with this task:
  */
 
 int
 ng_snd_item(item_p item, int flags)
 {
 	hook_p hook;
 	node_p node;
 	int queue, rw;
 	struct ng_queue *ngq;
 	int error = 0;
 
 	/* We are sending item, so it must be present! */
 	KASSERT(item != NULL, ("ng_snd_item: item is NULL"));
 
 #ifdef	NETGRAPH_DEBUG
 	_ngi_check(item, __FILE__, __LINE__);
 #endif
 
 	/* Item was sent once more, postpone apply() call. */
 	if (item->apply)
 		refcount_acquire(&item->apply->refs);
 
 	node = NGI_NODE(item);
 	/* Node is never optional. */
 	KASSERT(node != NULL, ("ng_snd_item: node is NULL"));
 
 	hook = NGI_HOOK(item);
 	/* Valid hook and mbuf are mandatory for data. */
 	if ((item->el_flags & NGQF_TYPE) == NGQF_DATA) {
 		KASSERT(hook != NULL, ("ng_snd_item: hook for data is NULL"));
 		if (NGI_M(item) == NULL)
 			ERROUT(EINVAL);
 		CHECK_DATA_MBUF(NGI_M(item));
 	}
 
 	/*
 	 * If the item or the node specifies single threading, force
 	 * writer semantics. Similarly, the node may say one hook always
 	 * produces writers. These are overrides.
 	 */
 	if (((item->el_flags & NGQF_RW) == NGQF_WRITER) ||
 	    (node->nd_flags & NGF_FORCE_WRITER) ||
 	    (hook && (hook->hk_flags & HK_FORCE_WRITER))) {
 		rw = NGQRW_W;
 	} else {
 		rw = NGQRW_R;
 	}
 
 	/*
 	 * If sender or receiver requests queued delivery, or call graph
 	 * loops back from outbound to inbound path, or stack usage
 	 * level is dangerous - enqueue message.
 	 */
 	if ((flags & NG_QUEUE) || (hook && (hook->hk_flags & HK_QUEUE))) {
 		queue = 1;
 	} else if (hook && (hook->hk_flags & HK_TO_INBOUND) &&
 	    curthread->td_ng_outbound) {
 		queue = 1;
 	} else {
 		queue = 0;
 #ifdef GET_STACK_USAGE
 		/*
 		 * Most of netgraph nodes have small stack consumption and
 		 * for them 25% of free stack space is more than enough.
 		 * Nodes/hooks with higher stack usage should be marked as
 		 * HI_STACK. For them 50% of stack will be guaranteed then.
 		 * XXX: Values 25% and 50% are completely empirical.
 		 */
 		size_t	st, su, sl;
 		GET_STACK_USAGE(st, su);
 		sl = st - su;
 		if ((sl * 4 < st) || ((sl * 2 < st) &&
 		    ((node->nd_flags & NGF_HI_STACK) || (hook &&
 		    (hook->hk_flags & HK_HI_STACK)))))
 			queue = 1;
 #endif
 	}
 
 	if (queue) {
 		/* Put it on the queue for that node*/
 		ng_queue_rw(node, item, rw);
 		return ((flags & NG_PROGRESS) ? EINPROGRESS : 0);
 	}
 
 	/*
 	 * We already decided how we will be queueud or treated.
 	 * Try get the appropriate operating permission.
 	 */
  	if (rw == NGQRW_R)
 		item = ng_acquire_read(node, item);
 	else
 		item = ng_acquire_write(node, item);
 
 	/* Item was queued while trying to get permission. */
 	if (item == NULL)
 		return ((flags & NG_PROGRESS) ? EINPROGRESS : 0);
 
 	NGI_GET_NODE(item, node); /* zaps stored node */
 
 	item->depth++;
 	error = ng_apply_item(node, item, rw); /* drops r/w lock when done */
 
 	/* If something is waiting on queue and ready, schedule it. */
 	ngq = &node->nd_input_queue;
 	if (QUEUE_ACTIVE(ngq)) {
 		NG_QUEUE_LOCK(ngq);
 		if (QUEUE_ACTIVE(ngq) && NEXT_QUEUED_ITEM_CAN_PROCEED(ngq))
 			ng_worklist_add(node);
 		NG_QUEUE_UNLOCK(ngq);
 	}
 
 	/*
 	 * Node may go away as soon as we remove the reference.
 	 * Whatever we do, DO NOT access the node again!
 	 */
 	NG_NODE_UNREF(node);
 
 	return (error);
 
 done:
 	/* If was not sent, apply callback here. */
 	if (item->apply != NULL) {
 		if (item->depth == 0 && error != 0)
 			item->apply->error = error;
 		if (refcount_release(&item->apply->refs)) {
 			(*item->apply->apply)(item->apply->context,
 			    item->apply->error);
 		}
 	}
 
 	NG_FREE_ITEM(item);
 	return (error);
 }
 
 /*
  * We have an item that was possibly queued somewhere.
  * It should contain all the information needed
  * to run it on the appropriate node/hook.
  * If there is apply pointer and we own the last reference, call apply().
  */
 static int
 ng_apply_item(node_p node, item_p item, int rw)
 {
 	hook_p  hook;
 	ng_rcvdata_t *rcvdata;
 	ng_rcvmsg_t *rcvmsg;
 	struct ng_apply_info *apply;
 	int	error = 0, depth;
 
 	/* Node and item are never optional. */
 	KASSERT(node != NULL, ("ng_apply_item: node is NULL"));
 	KASSERT(item != NULL, ("ng_apply_item: item is NULL"));
 
 	NGI_GET_HOOK(item, hook); /* clears stored hook */
 #ifdef	NETGRAPH_DEBUG
 	_ngi_check(item, __FILE__, __LINE__);
 #endif
 
 	apply = item->apply;
 	depth = item->depth;
 
 	switch (item->el_flags & NGQF_TYPE) {
 	case NGQF_DATA:
 		/*
 		 * Check things are still ok as when we were queued.
 		 */
 		KASSERT(hook != NULL, ("ng_apply_item: hook for data is NULL"));
 		if (NG_HOOK_NOT_VALID(hook) ||
 		    NG_NODE_NOT_VALID(node)) {
 			error = EIO;
 			NG_FREE_ITEM(item);
 			break;
 		}
 		/*
 		 * If no receive method, just silently drop it.
 		 * Give preference to the hook over-ride method.
 		 */
 		if ((!(rcvdata = hook->hk_rcvdata)) &&
 		    (!(rcvdata = NG_HOOK_NODE(hook)->nd_type->rcvdata))) {
 			error = 0;
 			NG_FREE_ITEM(item);
 			break;
 		}
 		error = (*rcvdata)(hook, item);
 		break;
 	case NGQF_MESG:
 		if (hook && NG_HOOK_NOT_VALID(hook)) {
 			/*
 			 * The hook has been zapped then we can't use it.
 			 * Immediately drop its reference.
 			 * The message may not need it.
 			 */
 			NG_HOOK_UNREF(hook);
 			hook = NULL;
 		}
 		/*
 		 * Similarly, if the node is a zombie there is
 		 * nothing we can do with it, drop everything.
 		 */
 		if (NG_NODE_NOT_VALID(node)) {
 			TRAP_ERROR();
 			error = EINVAL;
 			NG_FREE_ITEM(item);
 			break;
 		}
 		/*
 		 * Call the appropriate message handler for the object.
 		 * It is up to the message handler to free the message.
 		 * If it's a generic message, handle it generically,
 		 * otherwise call the type's message handler (if it exists).
 		 * XXX (race). Remember that a queued message may
 		 * reference a node or hook that has just been
 		 * invalidated. It will exist as the queue code
 		 * is holding a reference, but..
 		 */
 		if ((NGI_MSG(item)->header.typecookie == NGM_GENERIC_COOKIE) &&
 		    ((NGI_MSG(item)->header.flags & NGF_RESP) == 0)) {
 			error = ng_generic_msg(node, item, hook);
 			break;
 		}
 		if (((!hook) || (!(rcvmsg = hook->hk_rcvmsg))) &&
 		    (!(rcvmsg = node->nd_type->rcvmsg))) {
 			TRAP_ERROR();
 			error = 0;
 			NG_FREE_ITEM(item);
 			break;
 		}
 		error = (*rcvmsg)(node, item, hook);
 		break;
 	case NGQF_FN:
 	case NGQF_FN2:
 		/*
 		 * In the case of the shutdown message we allow it to hit
 		 * even if the node is invalid.
 		 */
 		if (NG_NODE_NOT_VALID(node) &&
 		    NGI_FN(item) != &ng_rmnode) {
 			TRAP_ERROR();
 			error = EINVAL;
 			NG_FREE_ITEM(item);
 			break;
 		}
 		/* Same is about some internal functions and invalid hook. */
 		if (hook && NG_HOOK_NOT_VALID(hook) &&
 		    NGI_FN2(item) != &ng_con_part2 &&
 		    NGI_FN2(item) != &ng_con_part3 &&
 		    NGI_FN(item) != &ng_rmhook_part2) {
 			TRAP_ERROR();
 			error = EINVAL;
 			NG_FREE_ITEM(item);
 			break;
 		}
 		
 		if ((item->el_flags & NGQF_TYPE) == NGQF_FN) {
 			(*NGI_FN(item))(node, hook, NGI_ARG1(item),
 			    NGI_ARG2(item));
 			NG_FREE_ITEM(item);
 		} else	/* it is NGQF_FN2 */
 			error = (*NGI_FN2(item))(node, item, hook);
 		break;
 	}
 	/*
 	 * We held references on some of the resources
 	 * that we took from the item. Now that we have
 	 * finished doing everything, drop those references.
 	 */
 	if (hook)
 		NG_HOOK_UNREF(hook);
 
  	if (rw == NGQRW_R)
 		ng_leave_read(node);
 	else
 		ng_leave_write(node);
 
 	/* Apply callback. */
 	if (apply != NULL) {
 		if (depth == 1 && error != 0)
 			apply->error = error;
 		if (refcount_release(&apply->refs))
 			(*apply->apply)(apply->context, apply->error);
 	}
 
 	return (error);
 }
 
 /***********************************************************************
  * Implement the 'generic' control messages
  ***********************************************************************/
 static int
 ng_generic_msg(node_p here, item_p item, hook_p lasthook)
 {
 	int error = 0;
 	struct ng_mesg *msg;
 	struct ng_mesg *resp = NULL;
 
 	NGI_GET_MSG(item, msg);
 	if (msg->header.typecookie != NGM_GENERIC_COOKIE) {
 		TRAP_ERROR();
 		error = EINVAL;
 		goto out;
 	}
 	switch (msg->header.cmd) {
 	case NGM_SHUTDOWN:
 		ng_rmnode(here, NULL, NULL, 0);
 		break;
 	case NGM_MKPEER:
 	    {
 		struct ngm_mkpeer *const mkp = (struct ngm_mkpeer *) msg->data;
 
 		if (msg->header.arglen != sizeof(*mkp)) {
 			TRAP_ERROR();
 			error = EINVAL;
 			break;
 		}
 		mkp->type[sizeof(mkp->type) - 1] = '\0';
 		mkp->ourhook[sizeof(mkp->ourhook) - 1] = '\0';
 		mkp->peerhook[sizeof(mkp->peerhook) - 1] = '\0';
 		error = ng_mkpeer(here, mkp->ourhook, mkp->peerhook, mkp->type);
 		break;
 	    }
 	case NGM_CONNECT:
 	    {
 		struct ngm_connect *const con =
 			(struct ngm_connect *) msg->data;
 		node_p node2;
 
 		if (msg->header.arglen != sizeof(*con)) {
 			TRAP_ERROR();
 			error = EINVAL;
 			break;
 		}
 		con->path[sizeof(con->path) - 1] = '\0';
 		con->ourhook[sizeof(con->ourhook) - 1] = '\0';
 		con->peerhook[sizeof(con->peerhook) - 1] = '\0';
 		/* Don't forget we get a reference.. */
 		error = ng_path2noderef(here, con->path, &node2, NULL);
 		if (error)
 			break;
 		error = ng_con_nodes(item, here, con->ourhook,
 		    node2, con->peerhook);
 		NG_NODE_UNREF(node2);
 		break;
 	    }
 	case NGM_NAME:
 	    {
 		struct ngm_name *const nam = (struct ngm_name *) msg->data;
 
 		if (msg->header.arglen != sizeof(*nam)) {
 			TRAP_ERROR();
 			error = EINVAL;
 			break;
 		}
 		nam->name[sizeof(nam->name) - 1] = '\0';
 		error = ng_name_node(here, nam->name);
 		break;
 	    }
 	case NGM_RMHOOK:
 	    {
 		struct ngm_rmhook *const rmh = (struct ngm_rmhook *) msg->data;
 		hook_p hook;
 
 		if (msg->header.arglen != sizeof(*rmh)) {
 			TRAP_ERROR();
 			error = EINVAL;
 			break;
 		}
 		rmh->ourhook[sizeof(rmh->ourhook) - 1] = '\0';
 		if ((hook = ng_findhook(here, rmh->ourhook)) != NULL)
 			ng_destroy_hook(hook);
 		break;
 	    }
 	case NGM_NODEINFO:
 	    {
 		struct nodeinfo *ni;
 
 		NG_MKRESPONSE(resp, msg, sizeof(*ni), M_NOWAIT);
 		if (resp == NULL) {
 			error = ENOMEM;
 			break;
 		}
 
 		/* Fill in node info */
 		ni = (struct nodeinfo *) resp->data;
 		if (NG_NODE_HAS_NAME(here))
 			strcpy(ni->name, NG_NODE_NAME(here));
 		strcpy(ni->type, here->nd_type->name);
 		ni->id = ng_node2ID(here);
 		ni->hooks = here->nd_numhooks;
 		break;
 	    }
 	case NGM_LISTHOOKS:
 	    {
 		const int nhooks = here->nd_numhooks;
 		struct hooklist *hl;
 		struct nodeinfo *ni;
 		hook_p hook;
 
 		/* Get response struct */
 		NG_MKRESPONSE(resp, msg, sizeof(*hl) +
 		    (nhooks * sizeof(struct linkinfo)), M_NOWAIT);
 		if (resp == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		hl = (struct hooklist *) resp->data;
 		ni = &hl->nodeinfo;
 
 		/* Fill in node info */
 		if (NG_NODE_HAS_NAME(here))
 			strcpy(ni->name, NG_NODE_NAME(here));
 		strcpy(ni->type, here->nd_type->name);
 		ni->id = ng_node2ID(here);
 
 		/* Cycle through the linked list of hooks */
 		ni->hooks = 0;
 		LIST_FOREACH(hook, &here->nd_hooks, hk_hooks) {
 			struct linkinfo *const link = &hl->link[ni->hooks];
 
 			if (ni->hooks >= nhooks) {
 				log(LOG_ERR, "%s: number of %s changed\n",
 				    __func__, "hooks");
 				break;
 			}
 			if (NG_HOOK_NOT_VALID(hook))
 				continue;
 			strcpy(link->ourhook, NG_HOOK_NAME(hook));
 			strcpy(link->peerhook, NG_PEER_HOOK_NAME(hook));
 			if (NG_PEER_NODE_NAME(hook)[0] != '\0')
 				strcpy(link->nodeinfo.name,
 				    NG_PEER_NODE_NAME(hook));
 			strcpy(link->nodeinfo.type,
 			   NG_PEER_NODE(hook)->nd_type->name);
 			link->nodeinfo.id = ng_node2ID(NG_PEER_NODE(hook));
 			link->nodeinfo.hooks = NG_PEER_NODE(hook)->nd_numhooks;
 			ni->hooks++;
 		}
 		break;
 	    }
 
 	case NGM_LISTNODES:
 	    {
 		struct namelist *nl;
 		node_p node;
 		int i;
 
 		IDHASH_RLOCK();
 		/* Get response struct. */
 		NG_MKRESPONSE(resp, msg, sizeof(*nl) +
 		    (V_ng_nodes * sizeof(struct nodeinfo)), M_NOWAIT | M_ZERO);
 		if (resp == NULL) {
 			IDHASH_RUNLOCK();
 			error = ENOMEM;
 			break;
 		}
 		nl = (struct namelist *) resp->data;
 
 		/* Cycle through the lists of nodes. */
 		nl->numnames = 0;
 		for (i = 0; i <= V_ng_ID_hmask; i++) {
 			LIST_FOREACH(node, &V_ng_ID_hash[i], nd_idnodes) {
 				struct nodeinfo *const np =
 				    &nl->nodeinfo[nl->numnames];
 
 				if (NG_NODE_NOT_VALID(node))
 					continue;
 				if (NG_NODE_HAS_NAME(node))
 					strcpy(np->name, NG_NODE_NAME(node));
 				strcpy(np->type, node->nd_type->name);
 				np->id = ng_node2ID(node);
 				np->hooks = node->nd_numhooks;
 				KASSERT(nl->numnames < V_ng_nodes,
 				    ("%s: no space", __func__));
 				nl->numnames++;
 			}
 		}
 		IDHASH_RUNLOCK();
 		break;
 	    }
 	case NGM_LISTNAMES:
 	    {
 		struct namelist *nl;
 		node_p node;
 		int i;
 
 		NAMEHASH_RLOCK();
 		/* Get response struct. */
 		NG_MKRESPONSE(resp, msg, sizeof(*nl) +
 		    (V_ng_named_nodes * sizeof(struct nodeinfo)), M_NOWAIT);
 		if (resp == NULL) {
 			NAMEHASH_RUNLOCK();
 			error = ENOMEM;
 			break;
 		}
 		nl = (struct namelist *) resp->data;
 
 		/* Cycle through the lists of nodes. */
 		nl->numnames = 0;
 		for (i = 0; i <= V_ng_name_hmask; i++) {
 			LIST_FOREACH(node, &V_ng_name_hash[i], nd_nodes) {
 				struct nodeinfo *const np =
 				    &nl->nodeinfo[nl->numnames];
 
 				if (NG_NODE_NOT_VALID(node))
 					continue;
 				strcpy(np->name, NG_NODE_NAME(node));
 				strcpy(np->type, node->nd_type->name);
 				np->id = ng_node2ID(node);
 				np->hooks = node->nd_numhooks;
 				KASSERT(nl->numnames < V_ng_named_nodes,
 				    ("%s: no space", __func__));
 				nl->numnames++;
 			}
 		}
 		NAMEHASH_RUNLOCK();
 		break;
 	    }
 
 	case NGM_LISTTYPES:
 	    {
 		struct typelist *tl;
 		struct ng_type *type;
 		int num = 0;
 
 		TYPELIST_RLOCK();
 		/* Count number of types */
 		LIST_FOREACH(type, &ng_typelist, types)
 			num++;
 
 		/* Get response struct */
 		NG_MKRESPONSE(resp, msg, sizeof(*tl) +
 		    (num * sizeof(struct typeinfo)), M_NOWAIT);
 		if (resp == NULL) {
 			TYPELIST_RUNLOCK();
 			error = ENOMEM;
 			break;
 		}
 		tl = (struct typelist *) resp->data;
 
 		/* Cycle through the linked list of types */
 		tl->numtypes = 0;
 		LIST_FOREACH(type, &ng_typelist, types) {
 			struct typeinfo *const tp = &tl->typeinfo[tl->numtypes];
 
 			strcpy(tp->type_name, type->name);
 			tp->numnodes = type->refs - 1; /* don't count list */
 			KASSERT(tl->numtypes < num, ("%s: no space", __func__));
 			tl->numtypes++;
 		}
 		TYPELIST_RUNLOCK();
 		break;
 	    }
 
 	case NGM_BINARY2ASCII:
 	    {
 		int bufSize = 20 * 1024;	/* XXX hard coded constant */
 		const struct ng_parse_type *argstype;
 		const struct ng_cmdlist *c;
 		struct ng_mesg *binary, *ascii;
 
 		/* Data area must contain a valid netgraph message */
 		binary = (struct ng_mesg *)msg->data;
 		if (msg->header.arglen < sizeof(struct ng_mesg) ||
 		    (msg->header.arglen - sizeof(struct ng_mesg) <
 		    binary->header.arglen)) {
 			TRAP_ERROR();
 			error = EINVAL;
 			break;
 		}
 
 		/* Get a response message with lots of room */
 		NG_MKRESPONSE(resp, msg, sizeof(*ascii) + bufSize, M_NOWAIT);
 		if (resp == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		ascii = (struct ng_mesg *)resp->data;
 
 		/* Copy binary message header to response message payload */
 		bcopy(binary, ascii, sizeof(*binary));
 
 		/* Find command by matching typecookie and command number */
 		for (c = here->nd_type->cmdlist; c != NULL && c->name != NULL;
 		    c++) {
 			if (binary->header.typecookie == c->cookie &&
 			    binary->header.cmd == c->cmd)
 				break;
 		}
 		if (c == NULL || c->name == NULL) {
 			for (c = ng_generic_cmds; c->name != NULL; c++) {
 				if (binary->header.typecookie == c->cookie &&
 				    binary->header.cmd == c->cmd)
 					break;
 			}
 			if (c->name == NULL) {
 				NG_FREE_MSG(resp);
 				error = ENOSYS;
 				break;
 			}
 		}
 
 		/* Convert command name to ASCII */
 		snprintf(ascii->header.cmdstr, sizeof(ascii->header.cmdstr),
 		    "%s", c->name);
 
 		/* Convert command arguments to ASCII */
 		argstype = (binary->header.flags & NGF_RESP) ?
 		    c->respType : c->mesgType;
 		if (argstype == NULL) {
 			*ascii->data = '\0';
 		} else {
 			if ((error = ng_unparse(argstype,
 			    (u_char *)binary->data,
 			    ascii->data, bufSize)) != 0) {
 				NG_FREE_MSG(resp);
 				break;
 			}
 		}
 
 		/* Return the result as struct ng_mesg plus ASCII string */
 		bufSize = strlen(ascii->data) + 1;
 		ascii->header.arglen = bufSize;
 		resp->header.arglen = sizeof(*ascii) + bufSize;
 		break;
 	    }
 
 	case NGM_ASCII2BINARY:
 	    {
 		int bufSize = 20 * 1024;	/* XXX hard coded constant */
 		const struct ng_cmdlist *c;
 		const struct ng_parse_type *argstype;
 		struct ng_mesg *ascii, *binary;
 		int off = 0;
 
 		/* Data area must contain at least a struct ng_mesg + '\0' */
 		ascii = (struct ng_mesg *)msg->data;
 		if ((msg->header.arglen < sizeof(*ascii) + 1) ||
 		    (ascii->header.arglen < 1) ||
 		    (msg->header.arglen < sizeof(*ascii) +
 		    ascii->header.arglen)) {
 			TRAP_ERROR();
 			error = EINVAL;
 			break;
 		}
 		ascii->data[ascii->header.arglen - 1] = '\0';
 
 		/* Get a response message with lots of room */
 		NG_MKRESPONSE(resp, msg, sizeof(*binary) + bufSize, M_NOWAIT);
 		if (resp == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		binary = (struct ng_mesg *)resp->data;
 
 		/* Copy ASCII message header to response message payload */
 		bcopy(ascii, binary, sizeof(*ascii));
 
 		/* Find command by matching ASCII command string */
 		for (c = here->nd_type->cmdlist;
 		    c != NULL && c->name != NULL; c++) {
 			if (strcmp(ascii->header.cmdstr, c->name) == 0)
 				break;
 		}
 		if (c == NULL || c->name == NULL) {
 			for (c = ng_generic_cmds; c->name != NULL; c++) {
 				if (strcmp(ascii->header.cmdstr, c->name) == 0)
 					break;
 			}
 			if (c->name == NULL) {
 				NG_FREE_MSG(resp);
 				error = ENOSYS;
 				break;
 			}
 		}
 
 		/* Convert command name to binary */
 		binary->header.cmd = c->cmd;
 		binary->header.typecookie = c->cookie;
 
 		/* Convert command arguments to binary */
 		argstype = (binary->header.flags & NGF_RESP) ?
 		    c->respType : c->mesgType;
 		if (argstype == NULL) {
 			bufSize = 0;
 		} else {
 			if ((error = ng_parse(argstype, ascii->data, &off,
 			    (u_char *)binary->data, &bufSize)) != 0) {
 				NG_FREE_MSG(resp);
 				break;
 			}
 		}
 
 		/* Return the result */
 		binary->header.arglen = bufSize;
 		resp->header.arglen = sizeof(*binary) + bufSize;
 		break;
 	    }
 
 	case NGM_TEXT_CONFIG:
 	case NGM_TEXT_STATUS:
 		/*
 		 * This one is tricky as it passes the command down to the
 		 * actual node, even though it is a generic type command.
 		 * This means we must assume that the item/msg is already freed
 		 * when control passes back to us.
 		 */
 		if (here->nd_type->rcvmsg != NULL) {
 			NGI_MSG(item) = msg; /* put it back as we found it */
 			return((*here->nd_type->rcvmsg)(here, item, lasthook));
 		}
 		/* Fall through if rcvmsg not supported */
 	default:
 		TRAP_ERROR();
 		error = EINVAL;
 	}
 	/*
 	 * Sometimes a generic message may be statically allocated
 	 * to avoid problems with allocating when in tight memory situations.
 	 * Don't free it if it is so.
 	 * I break them apart here, because erros may cause a free if the item
 	 * in which case we'd be doing it twice.
 	 * they are kept together above, to simplify freeing.
 	 */
 out:
 	NG_RESPOND_MSG(error, here, item, resp);
 	NG_FREE_MSG(msg);
 	return (error);
 }
 
 /************************************************************************
 			Queue element get/free routines
 ************************************************************************/
 
 uma_zone_t			ng_qzone;
 uma_zone_t			ng_qdzone;
 static int			numthreads = 0; /* number of queue threads */
 static int			maxalloc = 4096;/* limit the damage of a leak */
 static int			maxdata = 4096;	/* limit the damage of a DoS */
 
 SYSCTL_INT(_net_graph, OID_AUTO, threads, CTLFLAG_RDTUN, &numthreads,
     0, "Number of queue processing threads");
 SYSCTL_INT(_net_graph, OID_AUTO, maxalloc, CTLFLAG_RDTUN, &maxalloc,
     0, "Maximum number of non-data queue items to allocate");
 SYSCTL_INT(_net_graph, OID_AUTO, maxdata, CTLFLAG_RDTUN, &maxdata,
     0, "Maximum number of data queue items to allocate");
 
 #ifdef	NETGRAPH_DEBUG
 static TAILQ_HEAD(, ng_item) ng_itemlist = TAILQ_HEAD_INITIALIZER(ng_itemlist);
 static int allocated;	/* number of items malloc'd */
 #endif
 
 /*
  * Get a queue entry.
  * This is usually called when a packet first enters netgraph.
  * By definition, this is usually from an interrupt, or from a user.
  * Users are not so important, but try be quick for the times that it's
  * an interrupt.
  */
 static __inline item_p
 ng_alloc_item(int type, int flags)
 {
 	item_p item;
 
 	KASSERT(((type & ~NGQF_TYPE) == 0),
 	    ("%s: incorrect item type: %d", __func__, type));
 
 	item = uma_zalloc((type == NGQF_DATA) ? ng_qdzone : ng_qzone,
 	    ((flags & NG_WAITOK) ? M_WAITOK : M_NOWAIT) | M_ZERO);
 
 	if (item) {
 		item->el_flags = type;
 #ifdef	NETGRAPH_DEBUG
 		mtx_lock(&ngq_mtx);
 		TAILQ_INSERT_TAIL(&ng_itemlist, item, all);
 		allocated++;
 		mtx_unlock(&ngq_mtx);
 #endif
 	}
 
 	return (item);
 }
 
 /*
  * Release a queue entry
  */
 void
 ng_free_item(item_p item)
 {
 	/*
 	 * The item may hold resources on it's own. We need to free
 	 * these before we can free the item. What they are depends upon
 	 * what kind of item it is. it is important that nodes zero
 	 * out pointers to resources that they remove from the item
 	 * or we release them again here.
 	 */
 	switch (item->el_flags & NGQF_TYPE) {
 	case NGQF_DATA:
 		/* If we have an mbuf still attached.. */
 		NG_FREE_M(_NGI_M(item));
 		break;
 	case NGQF_MESG:
 		_NGI_RETADDR(item) = 0;
 		NG_FREE_MSG(_NGI_MSG(item));
 		break;
 	case NGQF_FN:
 	case NGQF_FN2:
 		/* nothing to free really, */
 		_NGI_FN(item) = NULL;
 		_NGI_ARG1(item) = NULL;
 		_NGI_ARG2(item) = 0;
 		break;
 	}
 	/* If we still have a node or hook referenced... */
 	_NGI_CLR_NODE(item);
 	_NGI_CLR_HOOK(item);
 
 #ifdef	NETGRAPH_DEBUG
 	mtx_lock(&ngq_mtx);
 	TAILQ_REMOVE(&ng_itemlist, item, all);
 	allocated--;
 	mtx_unlock(&ngq_mtx);
 #endif
 	uma_zfree(((item->el_flags & NGQF_TYPE) == NGQF_DATA) ?
 	    ng_qdzone : ng_qzone, item);
 }
 
 /*
  * Change type of the queue entry.
  * Possibly reallocates it from another UMA zone.
  */
 static __inline item_p
 ng_realloc_item(item_p pitem, int type, int flags)
 {
 	item_p item;
 	int from, to;
 
 	KASSERT((pitem != NULL), ("%s: can't reallocate NULL", __func__));
 	KASSERT(((type & ~NGQF_TYPE) == 0),
 	    ("%s: incorrect item type: %d", __func__, type));
 
 	from = ((pitem->el_flags & NGQF_TYPE) == NGQF_DATA);
 	to = (type == NGQF_DATA);
 	if (from != to) {
 		/* If reallocation is required do it and copy item. */
 		if ((item = ng_alloc_item(type, flags)) == NULL) {
 			ng_free_item(pitem);
 			return (NULL);
 		}
 		*item = *pitem;
 		ng_free_item(pitem);
 	} else
 		item = pitem;
 	item->el_flags = (item->el_flags & ~NGQF_TYPE) | type;
 
 	return (item);
 }
 
 /************************************************************************
 			Module routines
 ************************************************************************/
 
 /*
  * Handle the loading/unloading of a netgraph node type module
  */
 int
 ng_mod_event(module_t mod, int event, void *data)
 {
 	struct ng_type *const type = data;
 	int error = 0;
 
 	switch (event) {
 	case MOD_LOAD:
 
 		/* Register new netgraph node type */
 		if ((error = ng_newtype(type)) != 0)
 			break;
 
 		/* Call type specific code */
 		if (type->mod_event != NULL)
 			if ((error = (*type->mod_event)(mod, event, data))) {
 				TYPELIST_WLOCK();
 				type->refs--;	/* undo it */
 				LIST_REMOVE(type, types);
 				TYPELIST_WUNLOCK();
 			}
 		break;
 
 	case MOD_UNLOAD:
 		if (type->refs > 1) {		/* make sure no nodes exist! */
 			error = EBUSY;
 		} else {
 			if (type->refs == 0) /* failed load, nothing to undo */
 				break;
 			if (type->mod_event != NULL) {	/* check with type */
 				error = (*type->mod_event)(mod, event, data);
 				if (error != 0)	/* type refuses.. */
 					break;
 			}
 			TYPELIST_WLOCK();
 			LIST_REMOVE(type, types);
 			TYPELIST_WUNLOCK();
 		}
 		break;
 
 	default:
 		if (type->mod_event != NULL)
 			error = (*type->mod_event)(mod, event, data);
 		else
 			error = EOPNOTSUPP;		/* XXX ? */
 		break;
 	}
 	return (error);
 }
 
 static void
 vnet_netgraph_init(const void *unused __unused)
 {
 
 	/* We start with small hashes, but they can grow. */
 	V_ng_ID_hash = hashinit(16, M_NETGRAPH_NODE, &V_ng_ID_hmask);
 	V_ng_name_hash = hashinit(16, M_NETGRAPH_NODE, &V_ng_name_hmask);
 }
 VNET_SYSINIT(vnet_netgraph_init, SI_SUB_NETGRAPH, SI_ORDER_FIRST,
     vnet_netgraph_init, NULL);
 
 #ifdef VIMAGE
 static void
 vnet_netgraph_uninit(const void *unused __unused)
 {
 	node_p node = NULL, last_killed = NULL;
 	int i;
 
 	do {
 		/* Find a node to kill */
 		IDHASH_RLOCK();
 		for (i = 0; i <= V_ng_ID_hmask; i++) {
 			LIST_FOREACH(node, &V_ng_ID_hash[i], nd_idnodes) {
 				if (node != &ng_deadnode) {
 					NG_NODE_REF(node);
 					break;
 				}
 			}
 			if (node != NULL)
 				break;
 		}
 		IDHASH_RUNLOCK();
 
 		/* Attempt to kill it only if it is a regular node */
 		if (node != NULL) {
 			if (node == last_killed) {
 				/* This should never happen */
 				printf("ng node %s needs NGF_REALLY_DIE\n",
 				    node->nd_name);
 				if (node->nd_flags & NGF_REALLY_DIE)
 					panic("ng node %s won't die",
 					    node->nd_name);
 				node->nd_flags |= NGF_REALLY_DIE;
 			}
 			ng_rmnode(node, NULL, NULL, 0);
 			NG_NODE_UNREF(node);
 			last_killed = node;
 		}
 	} while (node != NULL);
 
 	hashdestroy(V_ng_name_hash, M_NETGRAPH_NODE, V_ng_name_hmask);
 	hashdestroy(V_ng_ID_hash, M_NETGRAPH_NODE, V_ng_ID_hmask);
 }
 VNET_SYSUNINIT(vnet_netgraph_uninit, SI_SUB_NETGRAPH, SI_ORDER_FIRST,
     vnet_netgraph_uninit, NULL);
 #endif /* VIMAGE */
 
 /*
  * Handle loading and unloading for this code.
  * The only thing we need to link into is the NETISR strucure.
  */
 static int
 ngb_mod_event(module_t mod, int event, void *data)
 {
 	struct proc *p;
 	struct thread *td;
 	int i, error = 0;
 
 	switch (event) {
 	case MOD_LOAD:
 		/* Initialize everything. */
 		NG_WORKLIST_LOCK_INIT();
 		rw_init(&ng_typelist_lock, "netgraph types");
 		rw_init(&ng_idhash_lock, "netgraph idhash");
 		rw_init(&ng_namehash_lock, "netgraph namehash");
 		rw_init(&ng_topo_lock, "netgraph topology mutex");
 #ifdef	NETGRAPH_DEBUG
 		mtx_init(&ng_nodelist_mtx, "netgraph nodelist mutex", NULL,
 		    MTX_DEF);
 		mtx_init(&ngq_mtx, "netgraph item list mutex", NULL,
 		    MTX_DEF);
 #endif
 		ng_qzone = uma_zcreate("NetGraph items", sizeof(struct ng_item),
 		    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0);
 		uma_zone_set_max(ng_qzone, maxalloc);
 		ng_qdzone = uma_zcreate("NetGraph data items",
 		    sizeof(struct ng_item), NULL, NULL, NULL, NULL,
 		    UMA_ALIGN_CACHE, 0);
 		uma_zone_set_max(ng_qdzone, maxdata);
 		/* Autoconfigure number of threads. */
 		if (numthreads <= 0)
 			numthreads = mp_ncpus;
 		/* Create threads. */
     		p = NULL; /* start with no process */
 		for (i = 0; i < numthreads; i++) {
 			if (kproc_kthread_add(ngthread, NULL, &p, &td,
 			    RFHIGHPID, 0, "ng_queue", "ng_queue%d", i)) {
 				numthreads = i;
 				break;
 			}
 		}
 		break;
 	case MOD_UNLOAD:
 		/* You can't unload it because an interface may be using it. */
 		error = EBUSY;
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 static moduledata_t netgraph_mod = {
 	"netgraph",
 	ngb_mod_event,
 	(NULL)
 };
 DECLARE_MODULE(netgraph, netgraph_mod, SI_SUB_NETGRAPH, SI_ORDER_FIRST);
 SYSCTL_NODE(_net, OID_AUTO, graph, CTLFLAG_RW, 0, "netgraph Family");
 SYSCTL_INT(_net_graph, OID_AUTO, abi_version, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, NG_ABI_VERSION,"");
 SYSCTL_INT(_net_graph, OID_AUTO, msg_version, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, NG_VERSION, "");
 
 #ifdef	NETGRAPH_DEBUG
 void
 dumphook (hook_p hook, char *file, int line)
 {
 	printf("hook: name %s, %d refs, Last touched:\n",
 		_NG_HOOK_NAME(hook), hook->hk_refs);
 	printf("	Last active @ %s, line %d\n",
 		hook->lastfile, hook->lastline);
 	if (line) {
 		printf(" problem discovered at file %s, line %d\n", file, line);
 #ifdef KDB
 		kdb_backtrace();
 #endif
 	}
 }
 
 void
 dumpnode(node_p node, char *file, int line)
 {
 	printf("node: ID [%x]: type '%s', %d hooks, flags 0x%x, %d refs, %s:\n",
 		_NG_NODE_ID(node), node->nd_type->name,
 		node->nd_numhooks, node->nd_flags,
 		node->nd_refs, node->nd_name);
 	printf("	Last active @ %s, line %d\n",
 		node->lastfile, node->lastline);
 	if (line) {
 		printf(" problem discovered at file %s, line %d\n", file, line);
 #ifdef KDB
 		kdb_backtrace();
 #endif
 	}
 }
 
 void
 dumpitem(item_p item, char *file, int line)
 {
 	printf(" ACTIVE item, last used at %s, line %d",
 		item->lastfile, item->lastline);
 	switch(item->el_flags & NGQF_TYPE) {
 	case NGQF_DATA:
 		printf(" - [data]\n");
 		break;
 	case NGQF_MESG:
 		printf(" - retaddr[%d]:\n", _NGI_RETADDR(item));
 		break;
 	case NGQF_FN:
 		printf(" - fn@%p (%p, %p, %p, %d (%x))\n",
 			_NGI_FN(item),
 			_NGI_NODE(item),
 			_NGI_HOOK(item),
 			item->body.fn.fn_arg1,
 			item->body.fn.fn_arg2,
 			item->body.fn.fn_arg2);
 		break;
 	case NGQF_FN2:
 		printf(" - fn2@%p (%p, %p, %p, %d (%x))\n",
 			_NGI_FN2(item),
 			_NGI_NODE(item),
 			_NGI_HOOK(item),
 			item->body.fn.fn_arg1,
 			item->body.fn.fn_arg2,
 			item->body.fn.fn_arg2);
 		break;
 	}
 	if (line) {
 		printf(" problem discovered at file %s, line %d\n", file, line);
 		if (_NGI_NODE(item)) {
 			printf("node %p ([%x])\n",
 				_NGI_NODE(item), ng_node2ID(_NGI_NODE(item)));
 		}
 	}
 }
 
 static void
 ng_dumpitems(void)
 {
 	item_p item;
 	int i = 1;
 	TAILQ_FOREACH(item, &ng_itemlist, all) {
 		printf("[%d] ", i++);
 		dumpitem(item, NULL, 0);
 	}
 }
 
 static void
 ng_dumpnodes(void)
 {
 	node_p node;
 	int i = 1;
 	mtx_lock(&ng_nodelist_mtx);
 	SLIST_FOREACH(node, &ng_allnodes, nd_all) {
 		printf("[%d] ", i++);
 		dumpnode(node, NULL, 0);
 	}
 	mtx_unlock(&ng_nodelist_mtx);
 }
 
 static void
 ng_dumphooks(void)
 {
 	hook_p hook;
 	int i = 1;
 	mtx_lock(&ng_nodelist_mtx);
 	SLIST_FOREACH(hook, &ng_allhooks, hk_all) {
 		printf("[%d] ", i++);
 		dumphook(hook, NULL, 0);
 	}
 	mtx_unlock(&ng_nodelist_mtx);
 }
 
 static int
 sysctl_debug_ng_dump_items(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int val;
 	int i;
 
 	val = allocated;
 	i = 1;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (val == 42) {
 		ng_dumpitems();
 		ng_dumpnodes();
 		ng_dumphooks();
 	}
 	return (0);
 }
 
 SYSCTL_PROC(_debug, OID_AUTO, ng_dump_items, CTLTYPE_INT | CTLFLAG_RW,
     0, sizeof(int), sysctl_debug_ng_dump_items, "I", "Number of allocated items");
 #endif	/* NETGRAPH_DEBUG */
 
 /***********************************************************************
 * Worklist routines
 **********************************************************************/
 /*
  * Pick a node off the list of nodes with work,
  * try get an item to process off it. Remove the node from the list.
  */
 static void
 ngthread(void *arg)
 {
 	for (;;) {
 		node_p  node;
 
 		/* Get node from the worklist. */
 		NG_WORKLIST_LOCK();
 		while ((node = STAILQ_FIRST(&ng_worklist)) == NULL)
 			NG_WORKLIST_SLEEP();
 		STAILQ_REMOVE_HEAD(&ng_worklist, nd_input_queue.q_work);
 		NG_WORKLIST_UNLOCK();
 		CURVNET_SET(node->nd_vnet);
 		CTR3(KTR_NET, "%20s: node [%x] (%p) taken off worklist",
 		    __func__, node->nd_ID, node);
 		/*
 		 * We have the node. We also take over the reference
 		 * that the list had on it.
 		 * Now process as much as you can, until it won't
 		 * let you have another item off the queue.
 		 * All this time, keep the reference
 		 * that lets us be sure that the node still exists.
 		 * Let the reference go at the last minute.
 		 */
 		for (;;) {
 			item_p item;
 			int rw;
 
 			NG_QUEUE_LOCK(&node->nd_input_queue);
 			item = ng_dequeue(node, &rw);
 			if (item == NULL) {
 				node->nd_input_queue.q_flags2 &= ~NGQ2_WORKQ;
 				NG_QUEUE_UNLOCK(&node->nd_input_queue);
 				break; /* go look for another node */
 			} else {
 				NG_QUEUE_UNLOCK(&node->nd_input_queue);
 				NGI_GET_NODE(item, node); /* zaps stored node */
 				ng_apply_item(node, item, rw);
 				NG_NODE_UNREF(node);
 			}
 		}
 		NG_NODE_UNREF(node);
 		CURVNET_RESTORE();
 	}
 }
 
 /*
  * XXX
  * It's posible that a debugging NG_NODE_REF may need
  * to be outside the mutex zone
  */
 static void
 ng_worklist_add(node_p node)
 {
 
 	mtx_assert(&node->nd_input_queue.q_mtx, MA_OWNED);
 
 	if ((node->nd_input_queue.q_flags2 & NGQ2_WORKQ) == 0) {
 		/*
 		 * If we are not already on the work queue,
 		 * then put us on.
 		 */
 		node->nd_input_queue.q_flags2 |= NGQ2_WORKQ;
 		NG_NODE_REF(node); /* XXX safe in mutex? */
 		NG_WORKLIST_LOCK();
 		STAILQ_INSERT_TAIL(&ng_worklist, node, nd_input_queue.q_work);
 		NG_WORKLIST_UNLOCK();
 		CTR3(KTR_NET, "%20s: node [%x] (%p) put on worklist", __func__,
 		    node->nd_ID, node);
 		NG_WORKLIST_WAKEUP();
 	} else {
 		CTR3(KTR_NET, "%20s: node [%x] (%p) already on worklist",
 		    __func__, node->nd_ID, node);
 	}
 }
 
 /***********************************************************************
 * Externally useable functions to set up a queue item ready for sending
 ***********************************************************************/
 
 #ifdef	NETGRAPH_DEBUG
 #define	ITEM_DEBUG_CHECKS						\
 	do {								\
 		if (NGI_NODE(item) ) {					\
 			printf("item already has node");		\
 			kdb_enter(KDB_WHY_NETGRAPH, "has node");	\
 			NGI_CLR_NODE(item);				\
 		}							\
 		if (NGI_HOOK(item) ) {					\
 			printf("item already has hook");		\
 			kdb_enter(KDB_WHY_NETGRAPH, "has hook");	\
 			NGI_CLR_HOOK(item);				\
 		}							\
 	} while (0)
 #else
 #define ITEM_DEBUG_CHECKS
 #endif
 
 /*
  * Put mbuf into the item.
  * Hook and node references will be removed when the item is dequeued.
  * (or equivalent)
  * (XXX) Unsafe because no reference held by peer on remote node.
  * remote node might go away in this timescale.
  * We know the hooks can't go away because that would require getting
  * a writer item on both nodes and we must have at least a  reader
  * here to be able to do this.
  * Note that the hook loaded is the REMOTE hook.
  *
  * This is possibly in the critical path for new data.
  */
 item_p
 ng_package_data(struct mbuf *m, int flags)
 {
 	item_p item;
 
 	if ((item = ng_alloc_item(NGQF_DATA, flags)) == NULL) {
 		NG_FREE_M(m);
 		return (NULL);
 	}
 	ITEM_DEBUG_CHECKS;
 	item->el_flags |= NGQF_READER;
 	NGI_M(item) = m;
 	return (item);
 }
 
 /*
  * Allocate a queue item and put items into it..
  * Evaluate the address as this will be needed to queue it and
  * to work out what some of the fields should be.
  * Hook and node references will be removed when the item is dequeued.
  * (or equivalent)
  */
 item_p
 ng_package_msg(struct ng_mesg *msg, int flags)
 {
 	item_p item;
 
 	if ((item = ng_alloc_item(NGQF_MESG, flags)) == NULL) {
 		NG_FREE_MSG(msg);
 		return (NULL);
 	}
 	ITEM_DEBUG_CHECKS;
 	/* Messages items count as writers unless explicitly exempted. */
 	if (msg->header.cmd & NGM_READONLY)
 		item->el_flags |= NGQF_READER;
 	else
 		item->el_flags |= NGQF_WRITER;
 	/*
 	 * Set the current lasthook into the queue item
 	 */
 	NGI_MSG(item) = msg;
 	NGI_RETADDR(item) = 0;
 	return (item);
 }
 
 #define SET_RETADDR(item, here, retaddr)				\
 	do {	/* Data or fn items don't have retaddrs */		\
 		if ((item->el_flags & NGQF_TYPE) == NGQF_MESG) {	\
 			if (retaddr) {					\
 				NGI_RETADDR(item) = retaddr;		\
 			} else {					\
 				/*					\
 				 * The old return address should be ok.	\
 				 * If there isn't one, use the address	\
 				 * here.				\
 				 */					\
 				if (NGI_RETADDR(item) == 0) {		\
 					NGI_RETADDR(item)		\
 						= ng_node2ID(here);	\
 				}					\
 			}						\
 		}							\
 	} while (0)
 
 int
 ng_address_hook(node_p here, item_p item, hook_p hook, ng_ID_t retaddr)
 {
 	hook_p peer;
 	node_p peernode;
 	ITEM_DEBUG_CHECKS;
 	/*
 	 * Quick sanity check..
 	 * Since a hook holds a reference on it's node, once we know
 	 * that the peer is still connected (even if invalid,) we know
 	 * that the peer node is present, though maybe invalid.
 	 */
 	TOPOLOGY_RLOCK();
 	if ((hook == NULL) || NG_HOOK_NOT_VALID(hook) ||
 	    NG_HOOK_NOT_VALID(peer = NG_HOOK_PEER(hook)) ||
 	    NG_NODE_NOT_VALID(peernode = NG_PEER_NODE(hook))) {
 		NG_FREE_ITEM(item);
 		TRAP_ERROR();
 		TOPOLOGY_RUNLOCK();
 		return (ENETDOWN);
 	}
 
 	/*
 	 * Transfer our interest to the other (peer) end.
 	 */
 	NG_HOOK_REF(peer);
 	NG_NODE_REF(peernode);
 	NGI_SET_HOOK(item, peer);
 	NGI_SET_NODE(item, peernode);
 	SET_RETADDR(item, here, retaddr);
 
 	TOPOLOGY_RUNLOCK();
 
 	return (0);
 }
 
 int
 ng_address_path(node_p here, item_p item, const char *address, ng_ID_t retaddr)
 {
 	node_p	dest = NULL;
 	hook_p	hook = NULL;
 	int	error;
 
 	ITEM_DEBUG_CHECKS;
 	/*
 	 * Note that ng_path2noderef increments the reference count
 	 * on the node for us if it finds one. So we don't have to.
 	 */
 	error = ng_path2noderef(here, address, &dest, &hook);
 	if (error) {
 		NG_FREE_ITEM(item);
 		return (error);
 	}
 	NGI_SET_NODE(item, dest);
 	if (hook)
 		NGI_SET_HOOK(item, hook);
 
 	SET_RETADDR(item, here, retaddr);
 	return (0);
 }
 
 int
 ng_address_ID(node_p here, item_p item, ng_ID_t ID, ng_ID_t retaddr)
 {
 	node_p dest;
 
 	ITEM_DEBUG_CHECKS;
 	/*
 	 * Find the target node.
 	 */
 	dest = ng_ID2noderef(ID); /* GETS REFERENCE! */
 	if (dest == NULL) {
 		NG_FREE_ITEM(item);
 		TRAP_ERROR();
 		return(EINVAL);
 	}
 	/* Fill out the contents */
 	NGI_SET_NODE(item, dest);
 	NGI_CLR_HOOK(item);
 	SET_RETADDR(item, here, retaddr);
 	return (0);
 }
 
 /*
  * special case to send a message to self (e.g. destroy node)
  * Possibly indicate an arrival hook too.
  * Useful for removing that hook :-)
  */
 item_p
 ng_package_msg_self(node_p here, hook_p hook, struct ng_mesg *msg)
 {
 	item_p item;
 
 	/*
 	 * Find the target node.
 	 * If there is a HOOK argument, then use that in preference
 	 * to the address.
 	 */
 	if ((item = ng_alloc_item(NGQF_MESG, NG_NOFLAGS)) == NULL) {
 		NG_FREE_MSG(msg);
 		return (NULL);
 	}
 
 	/* Fill out the contents */
 	item->el_flags |= NGQF_WRITER;
 	NG_NODE_REF(here);
 	NGI_SET_NODE(item, here);
 	if (hook) {
 		NG_HOOK_REF(hook);
 		NGI_SET_HOOK(item, hook);
 	}
 	NGI_MSG(item) = msg;
 	NGI_RETADDR(item) = ng_node2ID(here);
 	return (item);
 }
 
 /*
  * Send ng_item_fn function call to the specified node.
  */
 
 int
 ng_send_fn(node_p node, hook_p hook, ng_item_fn *fn, void * arg1, int arg2)
 {
 
 	return ng_send_fn1(node, hook, fn, arg1, arg2, NG_NOFLAGS);
 }
 
 int
 ng_send_fn1(node_p node, hook_p hook, ng_item_fn *fn, void * arg1, int arg2,
 	int flags)
 {
 	item_p item;
 
 	if ((item = ng_alloc_item(NGQF_FN, flags)) == NULL) {
 		return (ENOMEM);
 	}
 	item->el_flags |= NGQF_WRITER;
 	NG_NODE_REF(node); /* and one for the item */
 	NGI_SET_NODE(item, node);
 	if (hook) {
 		NG_HOOK_REF(hook);
 		NGI_SET_HOOK(item, hook);
 	}
 	NGI_FN(item) = fn;
 	NGI_ARG1(item) = arg1;
 	NGI_ARG2(item) = arg2;
 	return(ng_snd_item(item, flags));
 }
 
 /*
  * Send ng_item_fn2 function call to the specified node.
  *
  * If an optional pitem parameter is supplied, its apply
  * callback will be copied to the new item. If also NG_REUSE_ITEM
  * flag is set, no new item will be allocated, but pitem will
  * be used.
  */
 int
 ng_send_fn2(node_p node, hook_p hook, item_p pitem, ng_item_fn2 *fn, void *arg1,
 	int arg2, int flags)
 {
 	item_p item;
 
 	KASSERT((pitem != NULL || (flags & NG_REUSE_ITEM) == 0),
 	    ("%s: NG_REUSE_ITEM but no pitem", __func__));
 
 	/*
 	 * Allocate a new item if no supplied or
 	 * if we can't use supplied one.
 	 */
 	if (pitem == NULL || (flags & NG_REUSE_ITEM) == 0) {
 		if ((item = ng_alloc_item(NGQF_FN2, flags)) == NULL)
 			return (ENOMEM);
 		if (pitem != NULL)
 			item->apply = pitem->apply;
 	} else {
 		if ((item = ng_realloc_item(pitem, NGQF_FN2, flags)) == NULL)
 			return (ENOMEM);
 	}
 
 	item->el_flags = (item->el_flags & ~NGQF_RW) | NGQF_WRITER;
 	NG_NODE_REF(node); /* and one for the item */
 	NGI_SET_NODE(item, node);
 	if (hook) {
 		NG_HOOK_REF(hook);
 		NGI_SET_HOOK(item, hook);
 	}
 	NGI_FN2(item) = fn;
 	NGI_ARG1(item) = arg1;
 	NGI_ARG2(item) = arg2;
 	return(ng_snd_item(item, flags));
 }
 
 /*
  * Official timeout routines for Netgraph nodes.
  */
 static void
 ng_callout_trampoline(void *arg)
 {
 	item_p item = arg;
 
 	CURVNET_SET(NGI_NODE(item)->nd_vnet);
 	ng_snd_item(item, 0);
 	CURVNET_RESTORE();
 }
 
 int
 ng_callout(struct callout *c, node_p node, hook_p hook, int ticks,
     ng_item_fn *fn, void * arg1, int arg2)
 {
 	item_p item, oitem;
 
 	if ((item = ng_alloc_item(NGQF_FN, NG_NOFLAGS)) == NULL)
 		return (ENOMEM);
 
 	item->el_flags |= NGQF_WRITER;
 	NG_NODE_REF(node);		/* and one for the item */
 	NGI_SET_NODE(item, node);
 	if (hook) {
 		NG_HOOK_REF(hook);
 		NGI_SET_HOOK(item, hook);
 	}
 	NGI_FN(item) = fn;
 	NGI_ARG1(item) = arg1;
 	NGI_ARG2(item) = arg2;
 	oitem = c->c_arg;
-	if ((callout_reset(c, ticks, &ng_callout_trampoline, item) &
-	     CALLOUT_RET_CANCELLED) && oitem != NULL)
+	if (callout_reset(c, ticks, &ng_callout_trampoline, item).bit.cancelled
+	    && oitem != NULL)
 		NG_FREE_ITEM(oitem);
 	return (0);
 }
 
 /* A special modified version of untimeout() */
 int
 ng_uncallout(struct callout *c, node_p node)
 {
 	item_p item;
 	int rval;
 
 	KASSERT(c != NULL, ("ng_uncallout: NULL callout"));
 	KASSERT(node != NULL, ("ng_uncallout: NULL node"));
 
-	rval = callout_stop(c);
+	rval = callout_stop(c).bit.cancelled;
 	item = c->c_arg;
 	/* Do an extra check */
-	if ((rval & CALLOUT_RET_CANCELLED) &&
+	if ((rval != 0) &&
 	    (c->c_func == &ng_callout_trampoline) &&
 	    (item != NULL) && (NGI_NODE(item) == node)) {
 		/*
 		 * We successfully removed it from the queue before it ran
 		 * So now we need to unreference everything that was
 		 * given extra references. (NG_FREE_ITEM does this).
 		 */
 		NG_FREE_ITEM(item);
 	}
 	c->c_arg = NULL;
 
 	return (rval);
 }
 
 /*
  * Set the address, if none given, give the node here.
  */
 void
 ng_replace_retaddr(node_p here, item_p item, ng_ID_t retaddr)
 {
 	if (retaddr) {
 		NGI_RETADDR(item) = retaddr;
 	} else {
 		/*
 		 * The old return address should be ok.
 		 * If there isn't one, use the address here.
 		 */
 		NGI_RETADDR(item) = ng_node2ID(here);
 	}
 }
Index: projects/hps_head/sys/netinet/if_ether.c
===================================================================
--- projects/hps_head/sys/netinet/if_ether.c	(revision 309217)
+++ projects/hps_head/sys/netinet/if_ether.c	(revision 309218)
@@ -1,1503 +1,1498 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_ether.c	8.1 (Berkeley) 6/10/93
  */
 
 /*
  * Ethernet address resolution protocol.
  * TODO:
  *	add "inuse/lock" bit (or ref. count) along with valid bit
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/ethernet.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_var.h>
 #include <net/if_llatbl.h>
 #include <netinet/if_ether.h>
 #ifdef INET
 #include <netinet/ip_carp.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 #define SIN(s) ((const struct sockaddr_in *)(s))
 
 static struct timeval arp_lastlog;
 static int arp_curpps;
 static int arp_maxpps = 1;
 
 /* Simple ARP state machine */
 enum arp_llinfo_state {
 	ARP_LLINFO_INCOMPLETE = 0, /* No LLE data */
 	ARP_LLINFO_REACHABLE,	/* LLE is valid */
 	ARP_LLINFO_VERIFY,	/* LLE is valid, need refresh */
 	ARP_LLINFO_DELETED,	/* LLE is deleted */
 };
 
 SYSCTL_DECL(_net_link_ether);
 static SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
 static SYSCTL_NODE(_net_link_ether, PF_ARP, arp, CTLFLAG_RW, 0, "");
 
 /* timer values */
 static VNET_DEFINE(int, arpt_keep) = (20*60);	/* once resolved, good for 20
 						 * minutes */
 static VNET_DEFINE(int, arp_maxtries) = 5;
 static VNET_DEFINE(int, arp_proxyall) = 0;
 static VNET_DEFINE(int, arpt_down) = 20;	/* keep incomplete entries for
 						 * 20 seconds */
 static VNET_DEFINE(int, arpt_rexmit) = 1;	/* retransmit arp entries, sec*/
 VNET_PCPUSTAT_DEFINE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
 VNET_PCPUSTAT_SYSINIT(arpstat);
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(arpstat);
 #endif /* VIMAGE */
 
 static VNET_DEFINE(int, arp_maxhold) = 1;
 
 #define	V_arpt_keep		VNET(arpt_keep)
 #define	V_arpt_down		VNET(arpt_down)
 #define	V_arpt_rexmit		VNET(arpt_rexmit)
 #define	V_arp_maxtries		VNET(arp_maxtries)
 #define	V_arp_proxyall		VNET(arp_proxyall)
 #define	V_arp_maxhold		VNET(arp_maxhold)
 
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(arpt_keep), 0,
 	"ARP entry lifetime in seconds");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(arp_maxtries), 0,
 	"ARP resolution attempts before returning error");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(arp_proxyall), 0,
 	"Enable proxy ARP for all suitable requests");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, wait, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(arpt_down), 0,
 	"Incomplete ARP entry lifetime in seconds");
 SYSCTL_VNET_PCPUSTAT(_net_link_ether_arp, OID_AUTO, stats, struct arpstat,
     arpstat, "ARP statistics (struct arpstat, net/if_arp.h)");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(arp_maxhold), 0,
 	"Number of packets to hold per ARP entry");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_log_per_second,
 	CTLFLAG_RW, &arp_maxpps, 0,
 	"Maximum number of remotely triggered ARP messages that can be "
 	"logged per second");
 
 /*
  * Due to the exponential backoff algorithm used for the interval between GARP
  * retransmissions, the maximum number of retransmissions is limited for
  * sanity. This limit corresponds to a maximum interval between retransmissions
  * of 2^16 seconds ~= 18 hours.
  *
  * Making this limit more dynamic is more complicated than worthwhile,
  * especially since sending out GARPs spaced days apart would be of little
  * use. A maximum dynamic limit would look something like:
  *
  * const int max = fls(INT_MAX / hz) - 1;
  */
 #define MAX_GARP_RETRANSMITS 16
 static int sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS);
 static int garp_rexmit_count = 0; /* GARP retransmission setting. */
 
 SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, garp_rexmit_count,
     CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MPSAFE,
     &garp_rexmit_count, 0, sysctl_garp_rexmit, "I",
     "Number of times to retransmit GARP packets;"
     " 0 to disable, maximum of 16");
 
 #define	ARP_LOG(pri, ...)	do {					\
 	if (ppsratecheck(&arp_lastlog, &arp_curpps, arp_maxpps))	\
 		log((pri), "arp: " __VA_ARGS__);			\
 } while (0)
 
 
 static void	arpintr(struct mbuf *);
 static void	arptimer(void *);
 #ifdef INET
 static void	in_arpinput(struct mbuf *);
 #endif
 
 static void arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr,
     struct ifnet *ifp, int bridged, struct llentry *la);
 static void arp_mark_lle_reachable(struct llentry *la);
 static void arp_iflladdr(void *arg __unused, struct ifnet *ifp);
 
 static eventhandler_tag iflladdr_tag;
 
 static const struct netisr_handler arp_nh = {
 	.nh_name = "arp",
 	.nh_handler = arpintr,
 	.nh_proto = NETISR_ARP,
 	.nh_policy = NETISR_POLICY_SOURCE,
 };
 
 /*
  * Timeout routine.  Age arp_tab entries periodically.
  */
 static void
 arptimer(void *arg)
 {
 	struct llentry *lle = (struct llentry *)arg;
 	struct ifnet *ifp;
 	int r_skip_req;
 
 	if (lle->la_flags & LLE_STATIC) {
 		return;
 	}
 	LLE_WLOCK(lle);
 	if (callout_pending(&lle->lle_timer)) {
 		/*
 		 * Here we are a bit odd here in the treatment of 
 		 * active/pending. If the pending bit is set, it got
 		 * rescheduled before I ran. The active
 		 * bit we ignore, since if it was stopped
 		 * in ll_tablefree() and was currently running
 		 * it would have return 0 so the code would
 		 * not have deleted it since the callout could
 		 * not be stopped so we want to go through
 		 * with the delete here now. If the callout
 		 * was restarted, the pending bit will be back on and
 		 * we just want to bail since the callout_reset would
 		 * return 1 and our reference would have been removed
 		 * by arpresolve() below.
 		 */
 		LLE_WUNLOCK(lle);
  		return;
  	}
 	ifp = lle->lle_tbl->llt_ifp;
 	CURVNET_SET(ifp->if_vnet);
 
 	switch (lle->ln_state) {
 	case ARP_LLINFO_REACHABLE:
 
 		/*
 		 * Expiration time is approaching.
 		 * Let's try to refresh entry if it is still
 		 * in use.
 		 *
 		 * Set r_skip_req to get feedback from
 		 * fast path. Change state and re-schedule
 		 * ourselves.
 		 */
 		LLE_REQ_LOCK(lle);
 		lle->r_skip_req = 1;
 		LLE_REQ_UNLOCK(lle);
 		lle->ln_state = ARP_LLINFO_VERIFY;
 		callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit);
 		LLE_WUNLOCK(lle);
 		CURVNET_RESTORE();
 		return;
 	case ARP_LLINFO_VERIFY:
 		LLE_REQ_LOCK(lle);
 		r_skip_req = lle->r_skip_req;
 		LLE_REQ_UNLOCK(lle);
 
 		if (r_skip_req == 0 && lle->la_preempt > 0) {
 			/* Entry was used, issue refresh request */
 			struct in_addr dst;
 			dst = lle->r_l3addr.addr4;
 			lle->la_preempt--;
 			callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit);
 			LLE_WUNLOCK(lle);
 			arprequest(ifp, NULL, &dst, NULL);
 			CURVNET_RESTORE();
 			return;
 		}
 		/* Nothing happened. Reschedule if not too late */
 		if (lle->la_expire > time_uptime) {
 			callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit);
 			LLE_WUNLOCK(lle);
 			CURVNET_RESTORE();
 			return;
 		}
 		break;
 	case ARP_LLINFO_INCOMPLETE:
 	case ARP_LLINFO_DELETED:
 		break;
 	}
 
 	if ((lle->la_flags & LLE_DELETED) == 0) {
 		int evt;
 
 		if (lle->la_flags & LLE_VALID)
 			evt = LLENTRY_EXPIRED;
 		else
 			evt = LLENTRY_TIMEDOUT;
 		EVENTHANDLER_INVOKE(lle_event, lle, evt);
 	}
 
 	callout_stop(&lle->lle_timer);
 
 	/* XXX: LOR avoidance. We still have ref on lle. */
 	LLE_WUNLOCK(lle);
 	IF_AFDATA_LOCK(ifp);
 	LLE_WLOCK(lle);
 
 	/* Guard against race with other llentry_free(). */
 	if (lle->la_flags & LLE_LINKED) {
 		LLE_REMREF(lle);
 		lltable_unlink_entry(lle->lle_tbl, lle);
 	}
 	IF_AFDATA_UNLOCK(ifp);
 
 	size_t pkts_dropped = llentry_free(lle);
 
 	ARPSTAT_ADD(dropped, pkts_dropped);
 	ARPSTAT_INC(timeouts);
 
 	CURVNET_RESTORE();
 }
 
 /*
  * Stores link-layer header for @ifp in format suitable for if_output()
  * into buffer @buf. Resulting header length is stored in @bufsize.
  *
  * Returns 0 on success.
  */
 static int
 arp_fillheader(struct ifnet *ifp, struct arphdr *ah, int bcast, u_char *buf,
     size_t *bufsize)
 {
 	struct if_encap_req ereq;
 	int error;
 
 	bzero(buf, *bufsize);
 	bzero(&ereq, sizeof(ereq));
 	ereq.buf = buf;
 	ereq.bufsize = *bufsize;
 	ereq.rtype = IFENCAP_LL;
 	ereq.family = AF_ARP;
 	ereq.lladdr = ar_tha(ah);
 	ereq.hdata = (u_char *)ah;
 	if (bcast)
 		ereq.flags = IFENCAP_FLAG_BROADCAST;
 	error = ifp->if_requestencap(ifp, &ereq);
 	if (error == 0)
 		*bufsize = ereq.bufsize;
 
 	return (error);
 }
 
 
 /*
  * Broadcast an ARP request. Caller specifies:
  *	- arp header source ip address
  *	- arp header target ip address
  *	- arp header source ethernet address
  */
 void
 arprequest(struct ifnet *ifp, const struct in_addr *sip,
     const struct in_addr *tip, u_char *enaddr)
 {
 	struct mbuf *m;
 	struct arphdr *ah;
 	struct sockaddr sa;
 	u_char *carpaddr = NULL;
 	uint8_t linkhdr[LLE_MAX_LINKHDR];
 	size_t linkhdrsize;
 	struct route ro;
 	int error;
 
 	if (sip == NULL) {
 		/*
 		 * The caller did not supply a source address, try to find
 		 * a compatible one among those assigned to this interface.
 		 */
 		struct ifaddr *ifa;
 
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 
 			if (ifa->ifa_carp) {
 				if ((*carp_iamatch_p)(ifa, &carpaddr) == 0)
 					continue;
 				sip = &IA_SIN(ifa)->sin_addr;
 			} else {
 				carpaddr = NULL;
 				sip = &IA_SIN(ifa)->sin_addr;
 			}
 
 			if (0 == ((sip->s_addr ^ tip->s_addr) &
 			    IA_MASKSIN(ifa)->sin_addr.s_addr))
 				break;  /* found it. */
 		}
 		IF_ADDR_RUNLOCK(ifp);
 		if (sip == NULL) {
 			printf("%s: cannot find matching address\n", __func__);
 			return;
 		}
 	}
 	if (enaddr == NULL)
 		enaddr = carpaddr ? carpaddr : (u_char *)IF_LLADDR(ifp);
 
 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
 		return;
 	m->m_len = sizeof(*ah) + 2 * sizeof(struct in_addr) +
 		2 * ifp->if_addrlen;
 	m->m_pkthdr.len = m->m_len;
 	M_ALIGN(m, m->m_len);
 	ah = mtod(m, struct arphdr *);
 	bzero((caddr_t)ah, m->m_len);
 #ifdef MAC
 	mac_netinet_arp_send(ifp, m);
 #endif
 	ah->ar_pro = htons(ETHERTYPE_IP);
 	ah->ar_hln = ifp->if_addrlen;		/* hardware address length */
 	ah->ar_pln = sizeof(struct in_addr);	/* protocol address length */
 	ah->ar_op = htons(ARPOP_REQUEST);
 	bcopy(enaddr, ar_sha(ah), ah->ar_hln);
 	bcopy(sip, ar_spa(ah), ah->ar_pln);
 	bcopy(tip, ar_tpa(ah), ah->ar_pln);
 	sa.sa_family = AF_ARP;
 	sa.sa_len = 2;
 
 	/* Calculate link header for sending frame */
 	bzero(&ro, sizeof(ro));
 	linkhdrsize = sizeof(linkhdr);
 	error = arp_fillheader(ifp, ah, 1, linkhdr, &linkhdrsize);
 	if (error != 0 && error != EAFNOSUPPORT) {
 		ARP_LOG(LOG_ERR, "Failed to calculate ARP header on %s: %d\n",
 		    if_name(ifp), error);
 		return;
 	}
 
 	ro.ro_prepend = linkhdr;
 	ro.ro_plen = linkhdrsize;
 	ro.ro_flags = 0;
 
 	m->m_flags |= M_BCAST;
 	m_clrprotoflags(m);	/* Avoid confusing lower layers. */
 	(*ifp->if_output)(ifp, m, &sa, &ro);
 	ARPSTAT_INC(txrequests);
 }
 
 
 /*
  * Resolve an IP address into an ethernet address - heavy version.
  * Used internally by arpresolve().
  * We have already checked than  we can't use existing lle without
  * modification so we have to acquire LLE_EXCLUSIVE lle lock.
  *
  * On success, desten and flags are filled in and the function returns 0;
  * If the packet must be held pending resolution, we return EWOULDBLOCK
  * On other errors, we return the corresponding error code.
  * Note that m_freem() handles NULL.
  */
 static int
 arpresolve_full(struct ifnet *ifp, int is_gw, int flags, struct mbuf *m,
 	const struct sockaddr *dst, u_char *desten, uint32_t *pflags,
 	struct llentry **plle)
 {
 	struct llentry *la = NULL, *la_tmp;
 	struct mbuf *curr = NULL;
 	struct mbuf *next = NULL;
 	int error, renew;
 	char *lladdr;
 	int ll_len;
 
 	if (pflags != NULL)
 		*pflags = 0;
 	if (plle != NULL)
 		*plle = NULL;
 
 	if ((flags & LLE_CREATE) == 0) {
 		IF_AFDATA_RLOCK(ifp);
 		la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
 		IF_AFDATA_RUNLOCK(ifp);
 	}
 	if (la == NULL && (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) {
 		la = lltable_alloc_entry(LLTABLE(ifp), 0, dst);
 		if (la == NULL) {
 			log(LOG_DEBUG,
 			    "arpresolve: can't allocate llinfo for %s on %s\n",
 			    inet_ntoa(SIN(dst)->sin_addr), if_name(ifp));
 			m_freem(m);
 			return (EINVAL);
 		}
 
 		IF_AFDATA_WLOCK(ifp);
 		LLE_WLOCK(la);
 		la_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
 		/* Prefer ANY existing lle over newly-created one */
 		if (la_tmp == NULL)
 			lltable_link_entry(LLTABLE(ifp), la);
 		IF_AFDATA_WUNLOCK(ifp);
 		if (la_tmp != NULL) {
 			lltable_free_entry(LLTABLE(ifp), la);
 			la = la_tmp;
 		}
 	}
 	if (la == NULL) {
 		m_freem(m);
 		return (EINVAL);
 	}
 
 	if ((la->la_flags & LLE_VALID) &&
 	    ((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) {
 		if (flags & LLE_ADDRONLY) {
 			lladdr = la->ll_addr;
 			ll_len = ifp->if_addrlen;
 		} else {
 			lladdr = la->r_linkdata;
 			ll_len = la->r_hdrlen;
 		}
 		bcopy(lladdr, desten, ll_len);
 
 		/* Check if we have feedback request from arptimer() */
 		if (la->r_skip_req != 0) {
 			LLE_REQ_LOCK(la);
 			la->r_skip_req = 0; /* Notify that entry was used */
 			LLE_REQ_UNLOCK(la);
 		}
 		if (pflags != NULL)
 			*pflags = la->la_flags & (LLE_VALID|LLE_IFADDR);
 		if (plle) {
 			LLE_ADDREF(la);
 			*plle = la;
 		}
 		LLE_WUNLOCK(la);
 		return (0);
 	}
 
 	renew = (la->la_asked == 0 || la->la_expire != time_uptime);
 	/*
 	 * There is an arptab entry, but no ethernet address
 	 * response yet.  Add the mbuf to the list, dropping
 	 * the oldest packet if we have exceeded the system
 	 * setting.
 	 */
 	if (m != NULL) {
 		if (la->la_numheld >= V_arp_maxhold) {
 			if (la->la_hold != NULL) {
 				next = la->la_hold->m_nextpkt;
 				m_freem(la->la_hold);
 				la->la_hold = next;
 				la->la_numheld--;
 				ARPSTAT_INC(dropped);
 			}
 		}
 		if (la->la_hold != NULL) {
 			curr = la->la_hold;
 			while (curr->m_nextpkt != NULL)
 				curr = curr->m_nextpkt;
 			curr->m_nextpkt = m;
 		} else
 			la->la_hold = m;
 		la->la_numheld++;
 	}
 	/*
 	 * Return EWOULDBLOCK if we have tried less than arp_maxtries. It
 	 * will be masked by ether_output(). Return EHOSTDOWN/EHOSTUNREACH
 	 * if we have already sent arp_maxtries ARP requests. Retransmit the
 	 * ARP request, but not faster than one request per second.
 	 */
 	if (la->la_asked < V_arp_maxtries)
 		error = EWOULDBLOCK;	/* First request. */
 	else
 		error = is_gw != 0 ? EHOSTUNREACH : EHOSTDOWN;
 
 	if (renew) {
-		int canceled;
-
 		LLE_ADDREF(la);
 		la->la_expire = time_uptime;
-		canceled = callout_reset(&la->lle_timer, hz * V_arpt_down,
-		    arptimer, la);
-		if (canceled & CALLOUT_RET_CANCELLED)
+		if (callout_reset(&la->lle_timer, hz * V_arpt_down,
+		    arptimer, la).bit.cancelled)
 			LLE_REMREF(la);
 		la->la_asked++;
 		LLE_WUNLOCK(la);
 		arprequest(ifp, NULL, &SIN(dst)->sin_addr, NULL);
 		return (error);
 	}
 
 	LLE_WUNLOCK(la);
 	return (error);
 }
 
 /*
  * Resolve an IP address into an ethernet address.
  */
 int
 arpresolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
     char *desten, uint32_t *pflags, struct llentry **plle)
 {
 	int error;
 
 	flags |= LLE_ADDRONLY;
 	error = arpresolve_full(ifp, 0, flags, NULL, dst, desten, pflags, plle);
 	return (error);
 }
 
 
 /*
  * Lookups link header based on an IP address.
  * On input:
  *    ifp is the interface we use
  *    is_gw != 0 if @dst represents gateway to some destination
  *    m is the mbuf. May be NULL if we don't have a packet.
  *    dst is the next hop,
  *    desten is the storage to put LL header.
  *    flags returns subset of lle flags: LLE_VALID | LLE_IFADDR
  *
  * On success, full/partial link header and flags are filled in and
  * the function returns 0.
  * If the packet must be held pending resolution, we return EWOULDBLOCK
  * On other errors, we return the corresponding error code.
  * Note that m_freem() handles NULL.
  */
 int
 arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
 	const struct sockaddr *dst, u_char *desten, uint32_t *pflags,
 	struct llentry **plle)
 {
 	struct llentry *la = NULL;
 
 	if (pflags != NULL)
 		*pflags = 0;
 	if (plle != NULL)
 		*plle = NULL;
 
 	if (m != NULL) {
 		if (m->m_flags & M_BCAST) {
 			/* broadcast */
 			(void)memcpy(desten,
 			    ifp->if_broadcastaddr, ifp->if_addrlen);
 			return (0);
 		}
 		if (m->m_flags & M_MCAST) {
 			/* multicast */
 			ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten);
 			return (0);
 		}
 	}
 
 	IF_AFDATA_RLOCK(ifp);
 	la = lla_lookup(LLTABLE(ifp), plle ? LLE_EXCLUSIVE : LLE_UNLOCKED, dst);
 	if (la != NULL && (la->r_flags & RLLE_VALID) != 0) {
 		/* Entry found, let's copy lle info */
 		bcopy(la->r_linkdata, desten, la->r_hdrlen);
 		if (pflags != NULL)
 			*pflags = LLE_VALID | (la->r_flags & RLLE_IFADDR);
 		/* Check if we have feedback request from arptimer() */
 		if (la->r_skip_req != 0) {
 			LLE_REQ_LOCK(la);
 			la->r_skip_req = 0; /* Notify that entry was used */
 			LLE_REQ_UNLOCK(la);
 		}
 		if (plle) {
 			LLE_ADDREF(la);
 			*plle = la;
 			LLE_WUNLOCK(la);
 		}
 		IF_AFDATA_RUNLOCK(ifp);
 		return (0);
 	}
 	if (plle && la)
 		LLE_WUNLOCK(la);
 	IF_AFDATA_RUNLOCK(ifp);
 
 	return (arpresolve_full(ifp, is_gw, la == NULL ? LLE_CREATE : 0, m, dst,
 	    desten, pflags, plle));
 }
 
 /*
  * Common length and type checks are done here,
  * then the protocol-specific routine is called.
  */
 static void
 arpintr(struct mbuf *m)
 {
 	struct arphdr *ar;
 	struct ifnet *ifp;
 	char *layer;
 	int hlen;
 
 	ifp = m->m_pkthdr.rcvif;
 
 	if (m->m_len < sizeof(struct arphdr) &&
 	    ((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) {
 		ARP_LOG(LOG_NOTICE, "packet with short header received on %s\n",
 		    if_name(ifp));
 		return;
 	}
 	ar = mtod(m, struct arphdr *);
 
 	/* Check if length is sufficient */
 	if (m->m_len <  arphdr_len(ar)) {
 		m = m_pullup(m, arphdr_len(ar));
 		if (m == NULL) {
 			ARP_LOG(LOG_NOTICE, "short packet received on %s\n",
 			    if_name(ifp));
 			return;
 		}
 		ar = mtod(m, struct arphdr *);
 	}
 
 	hlen = 0;
 	layer = "";
 	switch (ntohs(ar->ar_hrd)) {
 	case ARPHRD_ETHER:
 		hlen = ETHER_ADDR_LEN; /* RFC 826 */
 		layer = "ethernet";
 		break;
 	case ARPHRD_IEEE802:
 		hlen = 6; /* RFC 1390, FDDI_ADDR_LEN */
 		layer = "fddi";
 		break;
 	case ARPHRD_ARCNET:
 		hlen = 1; /* RFC 1201, ARC_ADDR_LEN */
 		layer = "arcnet";
 		break;
 	case ARPHRD_INFINIBAND:
 		hlen = 20;	/* RFC 4391, INFINIBAND_ALEN */ 
 		layer = "infiniband";
 		break;
 	case ARPHRD_IEEE1394:
 		hlen = 0; /* SHALL be 16 */ /* RFC 2734 */
 		layer = "firewire";
 
 		/*
 		 * Restrict too long hardware addresses.
 		 * Currently we are capable of handling 20-byte
 		 * addresses ( sizeof(lle->ll_addr) )
 		 */
 		if (ar->ar_hln >= 20)
 			hlen = 16;
 		break;
 	default:
 		ARP_LOG(LOG_NOTICE,
 		    "packet with unknown hardware format 0x%02d received on "
 		    "%s\n", ntohs(ar->ar_hrd), if_name(ifp));
 		m_freem(m);
 		return;
 	}
 
 	if (hlen != 0 && hlen != ar->ar_hln) {
 		ARP_LOG(LOG_NOTICE,
 		    "packet with invalid %s address length %d received on %s\n",
 		    layer, ar->ar_hln, if_name(ifp));
 		m_freem(m);
 		return;
 	}
 
 	ARPSTAT_INC(received);
 	switch (ntohs(ar->ar_pro)) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		in_arpinput(m);
 		return;
 #endif
 	}
 	m_freem(m);
 }
 
 #ifdef INET
 /*
  * ARP for Internet protocols on 10 Mb/s Ethernet.
  * Algorithm is that given in RFC 826.
  * In addition, a sanity check is performed on the sender
  * protocol address, to catch impersonators.
  * We no longer handle negotiations for use of trailer protocol:
  * Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent
  * along with IP replies if we wanted trailers sent to us,
  * and also sent them in response to IP replies.
  * This allowed either end to announce the desire to receive
  * trailer packets.
  * We no longer reply to requests for ETHERTYPE_TRAIL protocol either,
  * but formerly didn't normally send requests.
  */
 static int log_arp_wrong_iface = 1;
 static int log_arp_movements = 1;
 static int log_arp_permanent_modify = 1;
 static int allow_multicast = 0;
 
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW,
 	&log_arp_wrong_iface, 0,
 	"log arp packets arriving on the wrong interface");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_movements, CTLFLAG_RW,
 	&log_arp_movements, 0,
 	"log arp replies from MACs different than the one in the cache");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_permanent_modify, CTLFLAG_RW,
 	&log_arp_permanent_modify, 0,
 	"log arp replies from MACs different than the one in the permanent arp entry");
 SYSCTL_INT(_net_link_ether_inet, OID_AUTO, allow_multicast, CTLFLAG_RW,
 	&allow_multicast, 0, "accept multicast addresses");
 
 static void
 in_arpinput(struct mbuf *m)
 {
 	struct rm_priotracker in_ifa_tracker;
 	struct arphdr *ah;
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct llentry *la = NULL, *la_tmp;
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 	struct sockaddr sa;
 	struct in_addr isaddr, itaddr, myaddr;
 	u_int8_t *enaddr = NULL;
 	int op;
 	int bridged = 0, is_bridge = 0;
 	int carped;
 	struct sockaddr_in sin;
 	struct sockaddr *dst;
 	struct nhop4_basic nh4;
 	uint8_t linkhdr[LLE_MAX_LINKHDR];
 	struct route ro;
 	size_t linkhdrsize;
 	int lladdr_off;
 	int error;
 
 	sin.sin_len = sizeof(struct sockaddr_in);
 	sin.sin_family = AF_INET;
 	sin.sin_addr.s_addr = 0;
 
 	if (ifp->if_bridge)
 		bridged = 1;
 	if (ifp->if_type == IFT_BRIDGE)
 		is_bridge = 1;
 
 	/*
 	 * We already have checked that mbuf contains enough contiguous data
 	 * to hold entire arp message according to the arp header.
 	 */
 	ah = mtod(m, struct arphdr *);
 
 	/*
 	 * ARP is only for IPv4 so we can reject packets with
 	 * a protocol length not equal to an IPv4 address.
 	 */
 	if (ah->ar_pln != sizeof(struct in_addr)) {
 		ARP_LOG(LOG_NOTICE, "requested protocol length != %zu\n",
 		    sizeof(struct in_addr));
 		goto drop;
 	}
 
 	if (allow_multicast == 0 && ETHER_IS_MULTICAST(ar_sha(ah))) {
 		ARP_LOG(LOG_NOTICE, "%*D is multicast\n",
 		    ifp->if_addrlen, (u_char *)ar_sha(ah), ":");
 		goto drop;
 	}
 
 	op = ntohs(ah->ar_op);
 	(void)memcpy(&isaddr, ar_spa(ah), sizeof (isaddr));
 	(void)memcpy(&itaddr, ar_tpa(ah), sizeof (itaddr));
 
 	if (op == ARPOP_REPLY)
 		ARPSTAT_INC(rxreplies);
 
 	/*
 	 * For a bridge, we want to check the address irrespective
 	 * of the receive interface. (This will change slightly
 	 * when we have clusters of interfaces).
 	 */
 	IN_IFADDR_RLOCK(&in_ifa_tracker);
 	LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
 		if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) ||
 		    ia->ia_ifp == ifp) &&
 		    itaddr.s_addr == ia->ia_addr.sin_addr.s_addr &&
 		    (ia->ia_ifa.ifa_carp == NULL ||
 		    (*carp_iamatch_p)(&ia->ia_ifa, &enaddr))) {
 			ifa_ref(&ia->ia_ifa);
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			goto match;
 		}
 	}
 	LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash)
 		if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) ||
 		    ia->ia_ifp == ifp) &&
 		    isaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
 			ifa_ref(&ia->ia_ifa);
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			goto match;
 		}
 
 #define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia)				\
   (ia->ia_ifp->if_bridge == ifp->if_softc &&				\
   !bcmp(IF_LLADDR(ia->ia_ifp), IF_LLADDR(ifp), ifp->if_addrlen) &&	\
   addr == ia->ia_addr.sin_addr.s_addr)
 	/*
 	 * Check the case when bridge shares its MAC address with
 	 * some of its children, so packets are claimed by bridge
 	 * itself (bridge_input() does it first), but they are really
 	 * meant to be destined to the bridge member.
 	 */
 	if (is_bridge) {
 		LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
 			if (BDG_MEMBER_MATCHES_ARP(itaddr.s_addr, ifp, ia)) {
 				ifa_ref(&ia->ia_ifa);
 				ifp = ia->ia_ifp;
 				IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 				goto match;
 			}
 		}
 	}
 #undef BDG_MEMBER_MATCHES_ARP
 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 
 	/*
 	 * No match, use the first inet address on the receive interface
 	 * as a dummy address for the rest of the function.
 	 */
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (ifa->ifa_addr->sa_family == AF_INET &&
 		    (ifa->ifa_carp == NULL ||
 		    (*carp_iamatch_p)(ifa, &enaddr))) {
 			ia = ifatoia(ifa);
 			ifa_ref(ifa);
 			IF_ADDR_RUNLOCK(ifp);
 			goto match;
 		}
 	IF_ADDR_RUNLOCK(ifp);
 
 	/*
 	 * If bridging, fall back to using any inet address.
 	 */
 	IN_IFADDR_RLOCK(&in_ifa_tracker);
 	if (!bridged || (ia = TAILQ_FIRST(&V_in_ifaddrhead)) == NULL) {
 		IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		goto drop;
 	}
 	ifa_ref(&ia->ia_ifa);
 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 match:
 	if (!enaddr)
 		enaddr = (u_int8_t *)IF_LLADDR(ifp);
 	carped = (ia->ia_ifa.ifa_carp != NULL);
 	myaddr = ia->ia_addr.sin_addr;
 	ifa_free(&ia->ia_ifa);
 	if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen))
 		goto drop;	/* it's from me, ignore it. */
 	if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
 		ARP_LOG(LOG_NOTICE, "link address is broadcast for IP address "
 		    "%s!\n", inet_ntoa(isaddr));
 		goto drop;
 	}
 
 	if (ifp->if_addrlen != ah->ar_hln) {
 		ARP_LOG(LOG_WARNING, "from %*D: addr len: new %d, "
 		    "i/f %d (ignored)\n", ifp->if_addrlen,
 		    (u_char *) ar_sha(ah), ":", ah->ar_hln,
 		    ifp->if_addrlen);
 		goto drop;
 	}
 
 	/*
 	 * Warn if another host is using the same IP address, but only if the
 	 * IP address isn't 0.0.0.0, which is used for DHCP only, in which
 	 * case we suppress the warning to avoid false positive complaints of
 	 * potential misconfiguration.
 	 */
 	if (!bridged && !carped && isaddr.s_addr == myaddr.s_addr &&
 	    myaddr.s_addr != 0) {
 		ARP_LOG(LOG_ERR, "%*D is using my IP address %s on %s!\n",
 		   ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 		   inet_ntoa(isaddr), ifp->if_xname);
 		itaddr = myaddr;
 		ARPSTAT_INC(dupips);
 		goto reply;
 	}
 	if (ifp->if_flags & IFF_STATICARP)
 		goto reply;
 
 	bzero(&sin, sizeof(sin));
 	sin.sin_len = sizeof(struct sockaddr_in);
 	sin.sin_family = AF_INET;
 	sin.sin_addr = isaddr;
 	dst = (struct sockaddr *)&sin;
 	IF_AFDATA_RLOCK(ifp);
 	la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
 	IF_AFDATA_RUNLOCK(ifp);
 	if (la != NULL)
 		arp_check_update_lle(ah, isaddr, ifp, bridged, la);
 	else if (itaddr.s_addr == myaddr.s_addr) {
 		/*
 		 * Request/reply to our address, but no lle exists yet.
 		 * Calculate full link prepend to use in lle.
 		 */
 		linkhdrsize = sizeof(linkhdr);
 		if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr,
 		    &linkhdrsize, &lladdr_off) != 0)
 			goto reply;
 
 		/* Allocate new entry */
 		la = lltable_alloc_entry(LLTABLE(ifp), 0, dst);
 		if (la == NULL) {
 
 			/*
 			 * lle creation may fail if source address belongs
 			 * to non-directly connected subnet. However, we
 			 * will try to answer the request instead of dropping
 			 * frame.
 			 */
 			goto reply;
 		}
 		lltable_set_entry_addr(ifp, la, linkhdr, linkhdrsize,
 		    lladdr_off);
 
 		IF_AFDATA_WLOCK(ifp);
 		LLE_WLOCK(la);
 		la_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
 
 		/*
 		 * Check if lle still does not exists.
 		 * If it does, that means that we either
 		 * 1) have configured it explicitly, via
 		 * 1a) 'arp -s' static entry or
 		 * 1b) interface address static record
 		 * or
 		 * 2) it was the result of sending first packet to-host
 		 * or
 		 * 3) it was another arp reply packet we handled in
 		 * different thread.
 		 *
 		 * In all cases except 3) we definitely need to prefer
 		 * existing lle. For the sake of simplicity, prefer any
 		 * existing lle over newly-create one.
 		 */
 		if (la_tmp == NULL)
 			lltable_link_entry(LLTABLE(ifp), la);
 		IF_AFDATA_WUNLOCK(ifp);
 
 		if (la_tmp == NULL) {
 			arp_mark_lle_reachable(la);
 			LLE_WUNLOCK(la);
 		} else {
 			/* Free newly-create entry and handle packet */
 			lltable_free_entry(LLTABLE(ifp), la);
 			la = la_tmp;
 			la_tmp = NULL;
 			arp_check_update_lle(ah, isaddr, ifp, bridged, la);
 			/* arp_check_update_lle() returns @la unlocked */
 		}
 		la = NULL;
 	}
 reply:
 	if (op != ARPOP_REQUEST)
 		goto drop;
 	ARPSTAT_INC(rxrequests);
 
 	if (itaddr.s_addr == myaddr.s_addr) {
 		/* Shortcut.. the receiving interface is the target. */
 		(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
 		(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
 	} else {
 		struct llentry *lle = NULL;
 
 		sin.sin_addr = itaddr;
 		IF_AFDATA_RLOCK(ifp);
 		lle = lla_lookup(LLTABLE(ifp), 0, (struct sockaddr *)&sin);
 		IF_AFDATA_RUNLOCK(ifp);
 
 		if ((lle != NULL) && (lle->la_flags & LLE_PUB)) {
 			(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
 			(void)memcpy(ar_sha(ah), lle->ll_addr, ah->ar_hln);
 			LLE_RUNLOCK(lle);
 		} else {
 
 			if (lle != NULL)
 				LLE_RUNLOCK(lle);
 
 			if (!V_arp_proxyall)
 				goto drop;
 
 			/* XXX MRT use table 0 for arp reply  */
 			if (fib4_lookup_nh_basic(0, itaddr, 0, 0, &nh4) != 0)
 				goto drop;
 
 			/*
 			 * Don't send proxies for nodes on the same interface
 			 * as this one came out of, or we'll get into a fight
 			 * over who claims what Ether address.
 			 */
 			if (nh4.nh_ifp == ifp)
 				goto drop;
 
 			(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
 			(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
 
 			/*
 			 * Also check that the node which sent the ARP packet
 			 * is on the interface we expect it to be on. This
 			 * avoids ARP chaos if an interface is connected to the
 			 * wrong network.
 			 */
 
 			/* XXX MRT use table 0 for arp checks */
 			if (fib4_lookup_nh_basic(0, isaddr, 0, 0, &nh4) != 0)
 				goto drop;
 			if (nh4.nh_ifp != ifp) {
 				ARP_LOG(LOG_INFO, "proxy: ignoring request"
 				    " from %s via %s\n",
 				    inet_ntoa(isaddr), ifp->if_xname);
 				goto drop;
 			}
 
 #ifdef DEBUG_PROXY
 			printf("arp: proxying for %s\n", inet_ntoa(itaddr));
 #endif
 		}
 	}
 
 	if (itaddr.s_addr == myaddr.s_addr &&
 	    IN_LINKLOCAL(ntohl(itaddr.s_addr))) {
 		/* RFC 3927 link-local IPv4; always reply by broadcast. */
 #ifdef DEBUG_LINKLOCAL
 		printf("arp: sending reply for link-local addr %s\n",
 		    inet_ntoa(itaddr));
 #endif
 		m->m_flags |= M_BCAST;
 		m->m_flags &= ~M_MCAST;
 	} else {
 		/* default behaviour; never reply by broadcast. */
 		m->m_flags &= ~(M_BCAST|M_MCAST);
 	}
 	(void)memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
 	(void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
 	ah->ar_op = htons(ARPOP_REPLY);
 	ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */
 	m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln);
 	m->m_pkthdr.len = m->m_len;
 	m->m_pkthdr.rcvif = NULL;
 	sa.sa_family = AF_ARP;
 	sa.sa_len = 2;
 
 	/* Calculate link header for sending frame */
 	bzero(&ro, sizeof(ro));
 	linkhdrsize = sizeof(linkhdr);
 	error = arp_fillheader(ifp, ah, 0, linkhdr, &linkhdrsize);
 
 	/*
 	 * arp_fillheader() may fail due to lack of support inside encap request
 	 * routing. This is not necessary an error, AF_ARP can/should be handled
 	 * by if_output().
 	 */
 	if (error != 0 && error != EAFNOSUPPORT) {
 		ARP_LOG(LOG_ERR, "Failed to calculate ARP header on %s: %d\n",
 		    if_name(ifp), error);
 		return;
 	}
 
 	ro.ro_prepend = linkhdr;
 	ro.ro_plen = linkhdrsize;
 	ro.ro_flags = 0;
 
 	m_clrprotoflags(m);	/* Avoid confusing lower layers. */
 	(*ifp->if_output)(ifp, m, &sa, &ro);
 	ARPSTAT_INC(txreplies);
 	return;
 
 drop:
 	m_freem(m);
 }
 #endif
 
 /*
  * Checks received arp data against existing @la.
  * Updates lle state/performs notification if necessary.
  */
 static void
 arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr, struct ifnet *ifp,
     int bridged, struct llentry *la)
 {
 	struct sockaddr sa;
 	struct mbuf *m_hold, *m_hold_next;
 	uint8_t linkhdr[LLE_MAX_LINKHDR];
 	size_t linkhdrsize;
 	int lladdr_off;
 
 	LLE_WLOCK_ASSERT(la);
 
 	/* the following is not an error when doing bridging */
 	if (!bridged && la->lle_tbl->llt_ifp != ifp) {
 		if (log_arp_wrong_iface)
 			ARP_LOG(LOG_WARNING, "%s is on %s "
 			    "but got reply from %*D on %s\n",
 			    inet_ntoa(isaddr),
 			    la->lle_tbl->llt_ifp->if_xname,
 			    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 			    ifp->if_xname);
 		LLE_WUNLOCK(la);
 		return;
 	}
 	if ((la->la_flags & LLE_VALID) &&
 	    bcmp(ar_sha(ah), la->ll_addr, ifp->if_addrlen)) {
 		if (la->la_flags & LLE_STATIC) {
 			LLE_WUNLOCK(la);
 			if (log_arp_permanent_modify)
 				ARP_LOG(LOG_ERR,
 				    "%*D attempts to modify "
 				    "permanent entry for %s on %s\n",
 				    ifp->if_addrlen,
 				    (u_char *)ar_sha(ah), ":",
 				    inet_ntoa(isaddr), ifp->if_xname);
 			return;
 		}
 		if (log_arp_movements) {
 			ARP_LOG(LOG_INFO, "%s moved from %*D "
 			    "to %*D on %s\n",
 			    inet_ntoa(isaddr),
 			    ifp->if_addrlen,
 			    (u_char *)&la->ll_addr, ":",
 			    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 			    ifp->if_xname);
 		}
 	}
 
 	/* Calculate full link prepend to use in lle */
 	linkhdrsize = sizeof(linkhdr);
 	if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr,
 	    &linkhdrsize, &lladdr_off) != 0)
 		return;
 
 	/* Check if something has changed */
 	if (memcmp(la->r_linkdata, linkhdr, linkhdrsize) != 0 ||
 	    (la->la_flags & LLE_VALID) == 0) {
 		/* Try to perform LLE update */
 		if (lltable_try_set_entry_addr(ifp, la, linkhdr, linkhdrsize,
 		    lladdr_off) == 0)
 			return;
 
 		/* Clear fast path feedback request if set */
 		la->r_skip_req = 0;
 	}
 
 	arp_mark_lle_reachable(la);
 
 	/*
 	 * The packets are all freed within the call to the output
 	 * routine.
 	 *
 	 * NB: The lock MUST be released before the call to the
 	 * output routine.
 	 */
 	if (la->la_hold != NULL) {
 		m_hold = la->la_hold;
 		la->la_hold = NULL;
 		la->la_numheld = 0;
 		lltable_fill_sa_entry(la, &sa);
 		LLE_WUNLOCK(la);
 		for (; m_hold != NULL; m_hold = m_hold_next) {
 			m_hold_next = m_hold->m_nextpkt;
 			m_hold->m_nextpkt = NULL;
 			/* Avoid confusing lower layers. */
 			m_clrprotoflags(m_hold);
 			(*ifp->if_output)(ifp, m_hold, &sa, NULL);
 		}
 	} else
 		LLE_WUNLOCK(la);
 }
 
 static void
 arp_mark_lle_reachable(struct llentry *la)
 {
-	int canceled, wtime;
+	int wtime;
 
 	LLE_WLOCK_ASSERT(la);
 
 	la->ln_state = ARP_LLINFO_REACHABLE;
 	EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED);
 
 	if (!(la->la_flags & LLE_STATIC)) {
 		LLE_ADDREF(la);
 		la->la_expire = time_uptime + V_arpt_keep;
 		wtime = V_arpt_keep - V_arp_maxtries * V_arpt_rexmit;
 		if (wtime < 0)
 			wtime = V_arpt_keep;
-		canceled = callout_reset(&la->lle_timer,
-		    hz * wtime, arptimer, la);
-		if (canceled & CALLOUT_RET_CANCELLED)
+		if (callout_reset(&la->lle_timer,
+		    hz * wtime, arptimer, la).bit.cancelled)
 			LLE_REMREF(la);
 	}
 	la->la_asked = 0;
 	la->la_preempt = V_arp_maxtries;
 }
 
 /*
  * Add pernament link-layer record for given interface address.
  */
 static __noinline void
 arp_add_ifa_lle(struct ifnet *ifp, const struct sockaddr *dst)
 {
 	struct llentry *lle, *lle_tmp;
 
 	/*
 	 * Interface address LLE record is considered static
 	 * because kernel code relies on LLE_STATIC flag to check
 	 * if these entries can be rewriten by arp updates.
 	 */
 	lle = lltable_alloc_entry(LLTABLE(ifp), LLE_IFADDR | LLE_STATIC, dst);
 	if (lle == NULL) {
 		log(LOG_INFO, "arp_ifinit: cannot create arp "
 		    "entry for interface address\n");
 		return;
 	}
 
 	IF_AFDATA_WLOCK(ifp);
 	LLE_WLOCK(lle);
 	/* Unlink any entry if exists */
 	lle_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
 	if (lle_tmp != NULL)
 		lltable_unlink_entry(LLTABLE(ifp), lle_tmp);
 
 	lltable_link_entry(LLTABLE(ifp), lle);
 	IF_AFDATA_WUNLOCK(ifp);
 
 	if (lle_tmp != NULL)
 		EVENTHANDLER_INVOKE(lle_event, lle_tmp, LLENTRY_EXPIRED);
 
 	EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_RESOLVED);
 	LLE_WUNLOCK(lle);
 	if (lle_tmp != NULL)
 		lltable_free_entry(LLTABLE(ifp), lle_tmp);
 }
 
 /*
  * Handle the garp_rexmit_count. Like sysctl_handle_int(), but limits the range
  * of valid values.
  */
 static int
 sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int rexmit_count = *(int *)arg1;
 
 	error = sysctl_handle_int(oidp, &rexmit_count, 0, req);
 
 	/* Enforce limits on any new value that may have been set. */
 	if (!error && req->newptr) {
 		/* A new value was set. */
 		if (rexmit_count < 0) {
 			rexmit_count = 0;
 		} else if (rexmit_count > MAX_GARP_RETRANSMITS) {
 			rexmit_count = MAX_GARP_RETRANSMITS;
 		}
 		*(int *)arg1 = rexmit_count;
 	}
 
 	return (error);
 }
 
 /*
  * Retransmit a Gratuitous ARP (GARP) and, if necessary, schedule a callout to
  * retransmit it again. A pending callout owns a reference to the ifa.
  */
 static void
 garp_rexmit(void *arg)
 {
 	struct in_ifaddr *ia = arg;
 
 	if (callout_pending(&ia->ia_garp_timer) ||
 	    !callout_active(&ia->ia_garp_timer)) {
 		IF_ADDR_WUNLOCK(ia->ia_ifa.ifa_ifp);
 		ifa_free(&ia->ia_ifa);
 		return;
 	}
 
 	/*
 	 * Drop lock while the ARP request is generated.
 	 */
 	IF_ADDR_WUNLOCK(ia->ia_ifa.ifa_ifp);
 
 	arprequest(ia->ia_ifa.ifa_ifp, &IA_SIN(ia)->sin_addr,
 	    &IA_SIN(ia)->sin_addr, IF_LLADDR(ia->ia_ifa.ifa_ifp));
 
 	/*
 	 * Increment the count of retransmissions. If the count has reached the
 	 * maximum value, stop sending the GARP packets. Otherwise, schedule
 	 * the callout to retransmit another GARP packet.
 	 */
 	++ia->ia_garp_count;
 	if (ia->ia_garp_count >= garp_rexmit_count) {
 		ifa_free(&ia->ia_ifa);
 	} else {
-		int rescheduled;
+		int cancelled;
 		IF_ADDR_WLOCK(ia->ia_ifa.ifa_ifp);
-		rescheduled = callout_reset(&ia->ia_garp_timer,
+		cancelled = callout_reset(&ia->ia_garp_timer,
 		    (1 << ia->ia_garp_count) * hz,
-		    garp_rexmit, ia);
+		    garp_rexmit, ia).bit.cancelled;
 		IF_ADDR_WUNLOCK(ia->ia_ifa.ifa_ifp);
-		if (rescheduled & CALLOUT_RET_CANCELLED) {
+		if (cancelled)
 			ifa_free(&ia->ia_ifa);
-		}
 	}
 }
 
 /*
  * Start the GARP retransmit timer.
  *
  * A single GARP is always transmitted when an IPv4 address is added
  * to an interface and that is usually sufficient. However, in some
  * circumstances, such as when a shared address is passed between
  * cluster nodes, this single GARP may occasionally be dropped or
  * lost. This can lead to neighbors on the network link working with a
  * stale ARP cache and sending packets destined for that address to
  * the node that previously owned the address, which may not respond.
  *
  * To avoid this situation, GARP retransmits can be enabled by setting
  * the net.link.ether.inet.garp_rexmit_count sysctl to a value greater
  * than zero. The setting represents the maximum number of
  * retransmissions. The interval between retransmissions is calculated
  * using an exponential backoff algorithm, doubling each time, so the
  * retransmission intervals are: {1, 2, 4, 8, 16, ...} (seconds).
  */
 static void
 garp_timer_start(struct ifaddr *ifa)
 {
 	struct in_ifaddr *ia = (struct in_ifaddr *) ifa;
 
 	IF_ADDR_WLOCK(ia->ia_ifa.ifa_ifp);
 	ia->ia_garp_count = 0;
 	if (!(callout_reset(&ia->ia_garp_timer, (1 << ia->ia_garp_count) * hz,
-	    garp_rexmit, ia) & CALLOUT_RET_CANCELLED)) {
+	    garp_rexmit, ia).bit.cancelled)) {
 		ifa_ref(ifa);
 	}
 	IF_ADDR_WUNLOCK(ia->ia_ifa.ifa_ifp);
 }
 
 void
 arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
 {
 	const struct sockaddr_in *dst_in;
 	const struct sockaddr *dst;
 
 	if (ifa->ifa_carp != NULL)
 		return;
 
 	dst = ifa->ifa_addr;
 	dst_in = (const struct sockaddr_in *)dst;
 
 	if (ntohl(dst_in->sin_addr.s_addr) == INADDR_ANY)
 		return;
 	arp_announce_ifaddr(ifp, dst_in->sin_addr, IF_LLADDR(ifp));
 	if (garp_rexmit_count > 0) {
 		garp_timer_start(ifa);
 	}
 
 	arp_add_ifa_lle(ifp, dst);
 }
 
 void
 arp_announce_ifaddr(struct ifnet *ifp, struct in_addr addr, u_char *enaddr)
 {
 
 	if (ntohl(addr.s_addr) != INADDR_ANY)
 		arprequest(ifp, &addr, &addr, enaddr);
 }
 
 /*
  * Sends gratuitous ARPs for each ifaddr to notify other
  * nodes about the address change.
  */
 static __noinline void
 arp_handle_ifllchange(struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family == AF_INET)
 			arp_ifinit(ifp, ifa);
 	}
 }
 
 /*
  * A handler for interface link layer address change event.
  */
 static void
 arp_iflladdr(void *arg __unused, struct ifnet *ifp)
 {
 
 	lltable_update_ifaddr(LLTABLE(ifp));
 
 	if ((ifp->if_flags & IFF_UP) != 0)
 		arp_handle_ifllchange(ifp);
 }
 
 static void
 vnet_arp_init(void)
 {
 
 	if (IS_DEFAULT_VNET(curvnet)) {
 		netisr_register(&arp_nh);
 		iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
 		    arp_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
 	}
 #ifdef VIMAGE
 	else
 		netisr_register_vnet(&arp_nh);
 #endif
 }
 VNET_SYSINIT(vnet_arp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND,
     vnet_arp_init, 0);
 
 #ifdef VIMAGE
 /*
  * We have to unregister ARP along with IP otherwise we risk doing INADDR_HASH
  * lookups after destroying the hash.  Ideally this would go on SI_ORDER_3.5.
  */
 static void
 vnet_arp_destroy(__unused void *arg)
 {
 
 	netisr_unregister_vnet(&arp_nh);
 }
 VNET_SYSUNINIT(vnet_arp_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
     vnet_arp_destroy, NULL);
 #endif
Index: projects/hps_head/sys/netinet/in.c
===================================================================
--- projects/hps_head/sys/netinet/in.c	(revision 309217)
+++ projects/hps_head/sys/netinet/in.c	(revision 309218)
@@ -1,1493 +1,1492 @@
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (C) 2001 WIDE Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in.c	8.4 (Berkeley) 1/9/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_mpath.h"
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/malloc.h>
 #include <sys/priv.h>
 #include <sys/socket.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/rmlock.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/sx.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_llatbl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/if_ether.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_carp.h>
 #include <netinet/igmp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
 static int in_aifaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *);
 static int in_difaddr_ioctl(caddr_t, struct ifnet *, struct thread *);
 
 static void	in_socktrim(struct sockaddr_in *);
 static void	in_purgemaddrs(struct ifnet *);
 
 static VNET_DEFINE(int, nosameprefix);
 #define	V_nosameprefix			VNET(nosameprefix)
 SYSCTL_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(nosameprefix), 0,
 	"Refuse to create same prefixes on different interfaces");
 
 VNET_DECLARE(struct inpcbinfo, ripcbinfo);
 #define	V_ripcbinfo			VNET(ripcbinfo)
 
 static struct sx in_control_sx;
 SX_SYSINIT(in_control_sx, &in_control_sx, "in_control");
 
 /*
  * Return 1 if an internet address is for a ``local'' host
  * (one to which we have a connection).
  */
 int
 in_localaddr(struct in_addr in)
 {
 	struct rm_priotracker in_ifa_tracker;
 	register u_long i = ntohl(in.s_addr);
 	register struct in_ifaddr *ia;
 
 	IN_IFADDR_RLOCK(&in_ifa_tracker);
 	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 		if ((i & ia->ia_subnetmask) == ia->ia_subnet) {
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			return (1);
 		}
 	}
 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 	return (0);
 }
 
 /*
  * Return 1 if an internet address is for the local host and configured
  * on one of its interfaces.
  */
 int
 in_localip(struct in_addr in)
 {
 	struct rm_priotracker in_ifa_tracker;
 	struct in_ifaddr *ia;
 
 	IN_IFADDR_RLOCK(&in_ifa_tracker);
 	LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) {
 		if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) {
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			return (1);
 		}
 	}
 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 	return (0);
 }
 
 /*
  * Return 1 if an internet address is configured on an interface.
  */
 int
 in_ifhasaddr(struct ifnet *ifp, struct in_addr in)
 {
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			continue;
 		ia = (struct in_ifaddr *)ifa;
 		if (ia->ia_addr.sin_addr.s_addr == in.s_addr) {
 			IF_ADDR_RUNLOCK(ifp);
 			return (1);
 		}
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	return (0);
 }
 
 /*
  * Return a reference to the interface address which is different to
  * the supplied one but with same IP address value.
  */
 static struct in_ifaddr *
 in_localip_more(struct in_ifaddr *ia)
 {
 	struct rm_priotracker in_ifa_tracker;
 	in_addr_t in = IA_SIN(ia)->sin_addr.s_addr;
 	struct in_ifaddr *it;
 
 	IN_IFADDR_RLOCK(&in_ifa_tracker);
 	LIST_FOREACH(it, INADDR_HASH(in), ia_hash) {
 		if (it != ia && IA_SIN(it)->sin_addr.s_addr == in) {
 			ifa_ref(&it->ia_ifa);
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			return (it);
 		}
 	}
 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 
 	return (NULL);
 }
 
 /*
  * Determine whether an IP address is in a reserved set of addresses
  * that may not be forwarded, or whether datagrams to that destination
  * may be forwarded.
  */
 int
 in_canforward(struct in_addr in)
 {
 	register u_long i = ntohl(in.s_addr);
 	register u_long net;
 
 	if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i))
 		return (0);
 	if (IN_CLASSA(i)) {
 		net = i & IN_CLASSA_NET;
 		if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
 			return (0);
 	}
 	return (1);
 }
 
 /*
  * Trim a mask in a sockaddr
  */
 static void
 in_socktrim(struct sockaddr_in *ap)
 {
     register char *cplim = (char *) &ap->sin_addr;
     register char *cp = (char *) (&ap->sin_addr + 1);
 
     ap->sin_len = 0;
     while (--cp >= cplim)
 	if (*cp) {
 	    (ap)->sin_len = cp - (char *) (ap) + 1;
 	    break;
 	}
 }
 
 /*
  * Generic internet control operations (ioctl's).
  */
 int
 in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
     struct thread *td)
 {
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct sockaddr_in *addr = (struct sockaddr_in *)&ifr->ifr_addr;
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 	int error;
 
 	if (ifp == NULL)
 		return (EADDRNOTAVAIL);
 
 	/*
 	 * Filter out 4 ioctls we implement directly.  Forward the rest
 	 * to specific functions and ifp->if_ioctl().
 	 */
 	switch (cmd) {
 	case SIOCGIFADDR:
 	case SIOCGIFBRDADDR:
 	case SIOCGIFDSTADDR:
 	case SIOCGIFNETMASK:
 		break;
 	case SIOCDIFADDR:
 		sx_xlock(&in_control_sx);
 		error = in_difaddr_ioctl(data, ifp, td);
 		sx_xunlock(&in_control_sx);
 		return (error);
 	case OSIOCAIFADDR:	/* 9.x compat */
 	case SIOCAIFADDR:
 		sx_xlock(&in_control_sx);
 		error = in_aifaddr_ioctl(cmd, data, ifp, td);
 		sx_xunlock(&in_control_sx);
 		return (error);
 	case SIOCSIFADDR:
 	case SIOCSIFBRDADDR:
 	case SIOCSIFDSTADDR:
 	case SIOCSIFNETMASK:
 		/* We no longer support that old commands. */
 		return (EINVAL);
 	default:
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		return ((*ifp->if_ioctl)(ifp, cmd, data));
 	}
 
 	if (addr->sin_addr.s_addr != INADDR_ANY &&
 	    prison_check_ip4(td->td_ucred, &addr->sin_addr) != 0)
 		return (EADDRNOTAVAIL);
 
 	/*
 	 * Find address for this interface, if it exists.  If an
 	 * address was specified, find that one instead of the
 	 * first one on the interface, if possible.
 	 */
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			continue;
 		ia = (struct in_ifaddr *)ifa;
 		if (ia->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr)
 			break;
 	}
 	if (ifa == NULL)
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 			if (ifa->ifa_addr->sa_family == AF_INET) {
 				ia = (struct in_ifaddr *)ifa;
 				if (prison_check_ip4(td->td_ucred,
 				    &ia->ia_addr.sin_addr) == 0)
 					break;
 			}
 
 	if (ifa == NULL) {
 		IF_ADDR_RUNLOCK(ifp);
 		return (EADDRNOTAVAIL);
 	}
 
 	error = 0;
 	switch (cmd) {
 	case SIOCGIFADDR:
 		*addr = ia->ia_addr;
 		break;
 
 	case SIOCGIFBRDADDR:
 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
 			error = EINVAL;
 			break;
 		}
 		*addr = ia->ia_broadaddr;
 		break;
 
 	case SIOCGIFDSTADDR:
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
 			error = EINVAL;
 			break;
 		}
 		*addr = ia->ia_dstaddr;
 		break;
 
 	case SIOCGIFNETMASK:
 		*addr = ia->ia_sockmask;
 		break;
 	}
 
 	IF_ADDR_RUNLOCK(ifp);
 
 	return (error);
 }
 
 static int
 in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td)
 {
 	const struct in_aliasreq *ifra = (struct in_aliasreq *)data;
 	const struct sockaddr_in *addr = &ifra->ifra_addr;
 	const struct sockaddr_in *broadaddr = &ifra->ifra_broadaddr;
 	const struct sockaddr_in *mask = &ifra->ifra_mask;
 	const struct sockaddr_in *dstaddr = &ifra->ifra_dstaddr;
 	const int vhid = (cmd == SIOCAIFADDR) ? ifra->ifra_vhid : 0;
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 	bool iaIsFirst;
 	int error = 0;
 
 	error = priv_check(td, PRIV_NET_ADDIFADDR);
 	if (error)
 		return (error);
 
 	/*
 	 * ifra_addr must be present and be of INET family.
 	 * ifra_broadaddr/ifra_dstaddr and ifra_mask are optional.
 	 */
 	if (addr->sin_len != sizeof(struct sockaddr_in) ||
 	    addr->sin_family != AF_INET)
 		return (EINVAL);
 	if (broadaddr->sin_len != 0 &&
 	    (broadaddr->sin_len != sizeof(struct sockaddr_in) ||
 	    broadaddr->sin_family != AF_INET))
 		return (EINVAL);
 	if (mask->sin_len != 0 &&
 	    (mask->sin_len != sizeof(struct sockaddr_in) ||
 	    mask->sin_family != AF_INET))
 		return (EINVAL);
 	if ((ifp->if_flags & IFF_POINTOPOINT) &&
 	    (dstaddr->sin_len != sizeof(struct sockaddr_in) ||
 	     dstaddr->sin_addr.s_addr == INADDR_ANY))
 		return (EDESTADDRREQ);
 	if (vhid > 0 && carp_attach_p == NULL)
 		return (EPROTONOSUPPORT);
 
 	/*
 	 * See whether address already exist.
 	 */
 	iaIsFirst = true;
 	ia = NULL;
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		struct in_ifaddr *it;
 
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			continue;
 
 		it = (struct in_ifaddr *)ifa;
 		iaIsFirst = false;
 		if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr &&
 		    prison_check_ip4(td->td_ucred, &addr->sin_addr) == 0)
 			ia = it;
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	if (ia != NULL)
 		(void )in_difaddr_ioctl(data, ifp, td);
 
 	ifa = ifa_alloc(sizeof(struct in_ifaddr), M_WAITOK);
 	ia = (struct in_ifaddr *)ifa;
 	ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
 	ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
 	ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
 	callout_init_rw(&ia->ia_garp_timer, &ifp->if_addr_lock,
 	    CALLOUT_RETURNUNLOCKED);
 
 	ia->ia_ifp = ifp;
 	ia->ia_addr = *addr;
 	if (mask->sin_len != 0) {
 		ia->ia_sockmask = *mask;
 		ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
 	} else {
 		in_addr_t i = ntohl(addr->sin_addr.s_addr);
 
 		/*
 	 	 * Be compatible with network classes, if netmask isn't
 		 * supplied, guess it based on classes.
 	 	 */
 		if (IN_CLASSA(i))
 			ia->ia_subnetmask = IN_CLASSA_NET;
 		else if (IN_CLASSB(i))
 			ia->ia_subnetmask = IN_CLASSB_NET;
 		else
 			ia->ia_subnetmask = IN_CLASSC_NET;
 		ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
 	}
 	ia->ia_subnet = ntohl(addr->sin_addr.s_addr) & ia->ia_subnetmask;
 	in_socktrim(&ia->ia_sockmask);
 
 	if (ifp->if_flags & IFF_BROADCAST) {
 		if (broadaddr->sin_len != 0) {
 			ia->ia_broadaddr = *broadaddr;
 		} else if (ia->ia_subnetmask == IN_RFC3021_MASK) {
 			ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST;
 			ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in);
 			ia->ia_broadaddr.sin_family = AF_INET;
 		} else {
 			ia->ia_broadaddr.sin_addr.s_addr =
 			    htonl(ia->ia_subnet | ~ia->ia_subnetmask);
 			ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in);
 			ia->ia_broadaddr.sin_family = AF_INET;
 		}
 	}
 
 	if (ifp->if_flags & IFF_POINTOPOINT)
 		ia->ia_dstaddr = *dstaddr;
 
 	/* XXXGL: rtinit() needs this strange assignment. */
 	if (ifp->if_flags & IFF_LOOPBACK)
                 ia->ia_dstaddr = ia->ia_addr;
 
 	if (vhid != 0) {
 		error = (*carp_attach_p)(&ia->ia_ifa, vhid);
 		if (error)
 			return (error);
 	}
 
 	/* if_addrhead is already referenced by ifa_alloc() */
 	IF_ADDR_WLOCK(ifp);
 	TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
 	IF_ADDR_WUNLOCK(ifp);
 
 	ifa_ref(ifa);			/* in_ifaddrhead */
 	IN_IFADDR_WLOCK();
 	TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link);
 	LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
 	IN_IFADDR_WUNLOCK();
 
 	/*
 	 * Give the interface a chance to initialize
 	 * if this is its first address,
 	 * and to validate the address if necessary.
 	 */
 	if (ifp->if_ioctl != NULL) {
 		error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
 		if (error)
 			goto fail1;
 	}
 
 	/*
 	 * Add route for the network.
 	 */
 	if (vhid == 0) {
 		int flags = RTF_UP;
 
 		if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
 			flags |= RTF_HOST;
 
 		error = in_addprefix(ia, flags);
 		if (error)
 			goto fail1;
 	}
 
 	/*
 	 * Add a loopback route to self.
 	 */
 	if (vhid == 0 && (ifp->if_flags & IFF_LOOPBACK) == 0 &&
 	    ia->ia_addr.sin_addr.s_addr != INADDR_ANY &&
 	    !((ifp->if_flags & IFF_POINTOPOINT) &&
 	     ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)) {
 		struct in_ifaddr *eia;
 
 		eia = in_localip_more(ia);
 
 		if (eia == NULL) {
 			error = ifa_add_loopback_route((struct ifaddr *)ia,
 			    (struct sockaddr *)&ia->ia_addr);
 			if (error)
 				goto fail2;
 		} else
 			ifa_free(&eia->ia_ifa);
 	}
 
 	if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST)) {
 		struct in_addr allhosts_addr;
 		struct in_ifinfo *ii;
 
 		ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
 		allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
 
 		error = in_joingroup(ifp, &allhosts_addr, NULL,
 			&ii->ii_allhosts);
 	}
 
 	EVENTHANDLER_INVOKE(ifaddr_event, ifp);
 
 	return (error);
 
 fail2:
 	if (vhid == 0)
 		(void )in_scrubprefix(ia, LLE_STATIC);
 
 fail1:
 	if (ia->ia_ifa.ifa_carp)
 		(*carp_detach_p)(&ia->ia_ifa);
 
 	IF_ADDR_WLOCK(ifp);
 	TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
 	IF_ADDR_WUNLOCK(ifp);
 	ifa_free(&ia->ia_ifa);		/* if_addrhead */
 
 	IN_IFADDR_WLOCK();
 	TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
 	LIST_REMOVE(ia, ia_hash);
 	IN_IFADDR_WUNLOCK();
 	ifa_free(&ia->ia_ifa);		/* in_ifaddrhead */
 
 	return (error);
 }
 
 static int
 in_difaddr_ioctl(caddr_t data, struct ifnet *ifp, struct thread *td)
 {
 	const struct ifreq *ifr = (struct ifreq *)data;
 	const struct sockaddr_in *addr = (const struct sockaddr_in *)
 	    &ifr->ifr_addr;
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 	bool deleteAny, iaIsLast;
 	int error;
 
 	if (td != NULL) {
 		error = priv_check(td, PRIV_NET_DELIFADDR);
 		if (error)
 			return (error);
 	}
 
 	if (addr->sin_len != sizeof(struct sockaddr_in) ||
 	    addr->sin_family != AF_INET)
 		deleteAny = true;
 	else
 		deleteAny = false;
 
 	iaIsLast = true;
 	ia = NULL;
 	IF_ADDR_WLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		struct in_ifaddr *it;
 
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			continue;
 
 		it = (struct in_ifaddr *)ifa;
 		if (deleteAny && ia == NULL && (td == NULL ||
 		    prison_check_ip4(td->td_ucred, &it->ia_addr.sin_addr) == 0))
 			ia = it;
 
 		if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr &&
 		    (td == NULL || prison_check_ip4(td->td_ucred,
 		    &addr->sin_addr) == 0))
 			ia = it;
 
 		if (it != ia)
 			iaIsLast = false;
 	}
 
 	if (ia == NULL) {
 		IF_ADDR_WUNLOCK(ifp);
 		return (EADDRNOTAVAIL);
 	}
 
 	TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
 	IF_ADDR_WUNLOCK(ifp);
 	ifa_free(&ia->ia_ifa);		/* if_addrhead */
 
 	IN_IFADDR_WLOCK();
 	TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
 	LIST_REMOVE(ia, ia_hash);
 	IN_IFADDR_WUNLOCK();
 
 	/*
 	 * in_scrubprefix() kills the interface route.
 	 */
 	in_scrubprefix(ia, LLE_STATIC);
 
 	/*
 	 * in_ifadown gets rid of all the rest of
 	 * the routes.  This is not quite the right
 	 * thing to do, but at least if we are running
 	 * a routing process they will come back.
 	 */
 	in_ifadown(&ia->ia_ifa, 1);
 
 	if (ia->ia_ifa.ifa_carp)
 		(*carp_detach_p)(&ia->ia_ifa);
 
 	/*
 	 * If this is the last IPv4 address configured on this
 	 * interface, leave the all-hosts group.
 	 * No state-change report need be transmitted.
 	 */
 	if (iaIsLast && (ifp->if_flags & IFF_MULTICAST)) {
 		struct in_ifinfo *ii;
 
 		ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
 		IN_MULTI_LOCK();
 		if (ii->ii_allhosts) {
 			(void)in_leavegroup_locked(ii->ii_allhosts, NULL);
 			ii->ii_allhosts = NULL;
 		}
 		IN_MULTI_UNLOCK();
 	}
 
 	IF_ADDR_WLOCK(ifp);
-	if (callout_stop(&ia->ia_garp_timer) & CALLOUT_RET_CANCELLED) {
+	if (callout_stop(&ia->ia_garp_timer).bit.cancelled)
 		ifa_free(&ia->ia_ifa);
-	}
 	IF_ADDR_WUNLOCK(ifp);
 
 	EVENTHANDLER_INVOKE(ifaddr_event, ifp);
 	ifa_free(&ia->ia_ifa);		/* in_ifaddrhead */
 
 	return (0);
 }
 
 #define rtinitflags(x) \
 	((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \
 	    ? RTF_HOST : 0)
 
 /*
  * Check if we have a route for the given prefix already or add one accordingly.
  */
 int
 in_addprefix(struct in_ifaddr *target, int flags)
 {
 	struct rm_priotracker in_ifa_tracker;
 	struct in_ifaddr *ia;
 	struct in_addr prefix, mask, p, m;
 	int error;
 
 	if ((flags & RTF_HOST) != 0) {
 		prefix = target->ia_dstaddr.sin_addr;
 		mask.s_addr = 0;
 	} else {
 		prefix = target->ia_addr.sin_addr;
 		mask = target->ia_sockmask.sin_addr;
 		prefix.s_addr &= mask.s_addr;
 	}
 
 	IN_IFADDR_RLOCK(&in_ifa_tracker);
 	/* Look for an existing address with the same prefix, mask, and fib */
 	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 		if (rtinitflags(ia)) {
 			p = ia->ia_dstaddr.sin_addr;
 
 			if (prefix.s_addr != p.s_addr)
 				continue;
 		} else {
 			p = ia->ia_addr.sin_addr;
 			m = ia->ia_sockmask.sin_addr;
 			p.s_addr &= m.s_addr;
 
 			if (prefix.s_addr != p.s_addr ||
 			    mask.s_addr != m.s_addr)
 				continue;
 		}
 		if (target->ia_ifp->if_fib != ia->ia_ifp->if_fib)
 			continue;
 
 		/*
 		 * If we got a matching prefix route inserted by other
 		 * interface address, we are done here.
 		 */
 		if (ia->ia_flags & IFA_ROUTE) {
 #ifdef RADIX_MPATH
 			if (ia->ia_addr.sin_addr.s_addr ==
 			    target->ia_addr.sin_addr.s_addr) {
 				IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 				return (EEXIST);
 			} else
 				break;
 #endif
 			if (V_nosameprefix) {
 				IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 				return (EEXIST);
 			} else {
 				int fibnum;
 
 				fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS :
 					target->ia_ifp->if_fib;
 				rt_addrmsg(RTM_ADD, &target->ia_ifa, fibnum);
 				IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 				return (0);
 			}
 		}
 	}
 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 
 	/*
 	 * No-one seem to have this prefix route, so we try to insert it.
 	 */
 	error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags);
 	if (!error)
 		target->ia_flags |= IFA_ROUTE;
 	return (error);
 }
 
 /*
  * Removes either all lle entries for given @ia, or lle
  * corresponding to @ia address.
  */
 static void
 in_scrubprefixlle(struct in_ifaddr *ia, int all, u_int flags)
 {
 	struct sockaddr_in addr, mask;
 	struct sockaddr *saddr, *smask;
 	struct ifnet *ifp;
 
 	saddr = (struct sockaddr *)&addr;
 	bzero(&addr, sizeof(addr));
 	addr.sin_len = sizeof(addr);
 	addr.sin_family = AF_INET;
 	smask = (struct sockaddr *)&mask;
 	bzero(&mask, sizeof(mask));
 	mask.sin_len = sizeof(mask);
 	mask.sin_family = AF_INET;
 	mask.sin_addr.s_addr = ia->ia_subnetmask;
 	ifp = ia->ia_ifp;
 
 	if (all) {
 
 		/*
 		 * Remove all L2 entries matching given prefix.
 		 * Convert address to host representation to avoid
 		 * doing this on every callback. ia_subnetmask is already
 		 * stored in host representation.
 		 */
 		addr.sin_addr.s_addr = ntohl(ia->ia_addr.sin_addr.s_addr);
 		lltable_prefix_free(AF_INET, saddr, smask, flags);
 	} else {
 		/* Remove interface address only */
 		addr.sin_addr.s_addr = ia->ia_addr.sin_addr.s_addr;
 		lltable_delete_addr(LLTABLE(ifp), LLE_IFADDR, saddr);
 	}
 }
 
 /*
  * If there is no other address in the system that can serve a route to the
  * same prefix, remove the route.  Hand over the route to the new address
  * otherwise.
  */
 int
 in_scrubprefix(struct in_ifaddr *target, u_int flags)
 {
 	struct rm_priotracker in_ifa_tracker;
 	struct in_ifaddr *ia;
 	struct in_addr prefix, mask, p, m;
 	int error = 0;
 
 	/*
 	 * Remove the loopback route to the interface address.
 	 */
 	if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) &&
 	    !(target->ia_ifp->if_flags & IFF_LOOPBACK) &&
 	    (flags & LLE_STATIC)) {
 		struct in_ifaddr *eia;
 
 		/*
 		 * XXXME: add fib-aware in_localip.
 		 * We definitely don't want to switch between
 		 * prefixes in different fibs.
 		 */
 		eia = in_localip_more(target);
 
 		if (eia != NULL) {
 			error = ifa_switch_loopback_route((struct ifaddr *)eia,
 			    (struct sockaddr *)&target->ia_addr);
 			ifa_free(&eia->ia_ifa);
 		} else {
 			error = ifa_del_loopback_route((struct ifaddr *)target,
 			    (struct sockaddr *)&target->ia_addr);
 		}
 	}
 
 	if (rtinitflags(target)) {
 		prefix = target->ia_dstaddr.sin_addr;
 		mask.s_addr = 0;
 	} else {
 		prefix = target->ia_addr.sin_addr;
 		mask = target->ia_sockmask.sin_addr;
 		prefix.s_addr &= mask.s_addr;
 	}
 
 	if ((target->ia_flags & IFA_ROUTE) == 0) {
 		int fibnum;
 		
 		fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS :
 			target->ia_ifp->if_fib;
 		rt_addrmsg(RTM_DELETE, &target->ia_ifa, fibnum);
 	
 		/*
 		 * Removing address from !IFF_UP interface or
 		 * prefix which exists on other interface (along with route).
 		 * No entries should exist here except target addr.
 		 * Given that, delete this entry only.
 		 */
 		in_scrubprefixlle(target, 0, flags);
 		return (0);
 	}
 
 	IN_IFADDR_RLOCK(&in_ifa_tracker);
 	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 		if (rtinitflags(ia)) {
 			p = ia->ia_dstaddr.sin_addr;
 
 			if (prefix.s_addr != p.s_addr)
 				continue;
 		} else {
 			p = ia->ia_addr.sin_addr;
 			m = ia->ia_sockmask.sin_addr;
 			p.s_addr &= m.s_addr;
 
 			if (prefix.s_addr != p.s_addr ||
 			    mask.s_addr != m.s_addr)
 				continue;
 		}
 
 		if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
 			continue;
 
 		/*
 		 * If we got a matching prefix address, move IFA_ROUTE and
 		 * the route itself to it.  Make sure that routing daemons
 		 * get a heads-up.
 		 */
 		if ((ia->ia_flags & IFA_ROUTE) == 0) {
 			ifa_ref(&ia->ia_ifa);
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			error = rtinit(&(target->ia_ifa), (int)RTM_DELETE,
 			    rtinitflags(target));
 			if (error == 0)
 				target->ia_flags &= ~IFA_ROUTE;
 			else
 				log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n",
 					error);
 			/* Scrub all entries IFF interface is different */
 			in_scrubprefixlle(target, target->ia_ifp != ia->ia_ifp,
 			    flags);
 			error = rtinit(&ia->ia_ifa, (int)RTM_ADD,
 			    rtinitflags(ia) | RTF_UP);
 			if (error == 0)
 				ia->ia_flags |= IFA_ROUTE;
 			else
 				log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n",
 					error);
 			ifa_free(&ia->ia_ifa);
 			return (error);
 		}
 	}
 	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 
 	/*
 	 * remove all L2 entries on the given prefix
 	 */
 	in_scrubprefixlle(target, 1, flags);
 
 	/*
 	 * As no-one seem to have this prefix, we can remove the route.
 	 */
 	error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target));
 	if (error == 0)
 		target->ia_flags &= ~IFA_ROUTE;
 	else
 		log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error);
 	return (error);
 }
 
 #undef rtinitflags
 
 void
 in_ifscrub_all(void)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa, *nifa;
 	struct ifaliasreq ifr;
 
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		/* Cannot lock here - lock recursion. */
 		/* IF_ADDR_RLOCK(ifp); */
 		TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 
 			/*
 			 * This is ugly but the only way for legacy IP to
 			 * cleanly remove addresses and everything attached.
 			 */
 			bzero(&ifr, sizeof(ifr));
 			ifr.ifra_addr = *ifa->ifa_addr;
 			if (ifa->ifa_dstaddr)
 			ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
 			(void)in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr,
 			    ifp, NULL);
 		}
 		/* IF_ADDR_RUNLOCK(ifp); */
 		in_purgemaddrs(ifp);
 		igmp_domifdetach(ifp);
 	}
 	IFNET_RUNLOCK();
 }
 
 int
 in_ifaddr_broadcast(struct in_addr in, struct in_ifaddr *ia)
 {
 
 	return ((in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
 	     /*
 	      * Check for old-style (host 0) broadcast, but
 	      * taking into account that RFC 3021 obsoletes it.
 	      */
 	    (ia->ia_subnetmask != IN_RFC3021_MASK &&
 	    ntohl(in.s_addr) == ia->ia_subnet)) &&
 	     /*
 	      * Check for an all one subnetmask. These
 	      * only exist when an interface gets a secondary
 	      * address.
 	      */
 	    ia->ia_subnetmask != (u_long)0xffffffff);
 }
 
 /*
  * Return 1 if the address might be a local broadcast address.
  */
 int
 in_broadcast(struct in_addr in, struct ifnet *ifp)
 {
 	register struct ifaddr *ifa;
 	int found;
 
 	if (in.s_addr == INADDR_BROADCAST ||
 	    in.s_addr == INADDR_ANY)
 		return (1);
 	if ((ifp->if_flags & IFF_BROADCAST) == 0)
 		return (0);
 	found = 0;
 	/*
 	 * Look through the list of addresses for a match
 	 * with a broadcast address.
 	 */
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (ifa->ifa_addr->sa_family == AF_INET &&
 		    in_ifaddr_broadcast(in, (struct in_ifaddr *)ifa)) {
 			found = 1;
 			break;
 		}
 	IF_ADDR_RUNLOCK(ifp);
 	return (found);
 }
 
 /*
  * On interface removal, clean up IPv4 data structures hung off of the ifnet.
  */
 void
 in_ifdetach(struct ifnet *ifp)
 {
 
 	in_pcbpurgeif0(&V_ripcbinfo, ifp);
 	in_pcbpurgeif0(&V_udbinfo, ifp);
 	in_pcbpurgeif0(&V_ulitecbinfo, ifp);
 	in_purgemaddrs(ifp);
 }
 
 /*
  * Delete all IPv4 multicast address records, and associated link-layer
  * multicast address records, associated with ifp.
  * XXX It looks like domifdetach runs AFTER the link layer cleanup.
  * XXX This should not race with ifma_protospec being set during
  * a new allocation, if it does, we have bigger problems.
  */
 static void
 in_purgemaddrs(struct ifnet *ifp)
 {
 	LIST_HEAD(,in_multi) purgeinms;
 	struct in_multi		*inm, *tinm;
 	struct ifmultiaddr	*ifma;
 
 	LIST_INIT(&purgeinms);
 	IN_MULTI_LOCK();
 
 	/*
 	 * Extract list of in_multi associated with the detaching ifp
 	 * which the PF_INET layer is about to release.
 	 * We need to do this as IF_ADDR_LOCK() may be re-acquired
 	 * by code further down.
 	 */
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_INET ||
 		    ifma->ifma_protospec == NULL)
 			continue;
 #if 0
 		KASSERT(ifma->ifma_protospec != NULL,
 		    ("%s: ifma_protospec is NULL", __func__));
 #endif
 		inm = (struct in_multi *)ifma->ifma_protospec;
 		LIST_INSERT_HEAD(&purgeinms, inm, inm_link);
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) {
 		LIST_REMOVE(inm, inm_link);
 		inm_release_locked(inm);
 	}
 	igmp_ifdetach(ifp);
 
 	IN_MULTI_UNLOCK();
 }
 
 struct in_llentry {
 	struct llentry		base;
 };
 
 #define	IN_LLTBL_DEFAULT_HSIZE	32
 #define	IN_LLTBL_HASH(k, h) \
 	(((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1))
 
 /*
  * Do actual deallocation of @lle.
  */
 static void
 in_lltable_destroy_lle_unlocked(struct llentry *lle)
 {
 
 	LLE_LOCK_DESTROY(lle);
 	LLE_REQ_DESTROY(lle);
 	free(lle, M_LLTABLE);
 }
 
 /*
  * Called by LLE_FREE_LOCKED when number of references
  * drops to zero.
  */
 static void
 in_lltable_destroy_lle(struct llentry *lle)
 {
 
 	LLE_WUNLOCK(lle);
 	in_lltable_destroy_lle_unlocked(lle);
 }
 
 static struct llentry *
 in_lltable_new(struct in_addr addr4, u_int flags)
 {
 	struct in_llentry *lle;
 
 	lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_NOWAIT | M_ZERO);
 	if (lle == NULL)		/* NB: caller generates msg */
 		return NULL;
 
 	/*
 	 * For IPv4 this will trigger "arpresolve" to generate
 	 * an ARP request.
 	 */
 	lle->base.la_expire = time_uptime; /* mark expired */
 	lle->base.r_l3addr.addr4 = addr4;
 	lle->base.lle_refcnt = 1;
 	lle->base.lle_free = in_lltable_destroy_lle;
 	LLE_LOCK_INIT(&lle->base);
 	LLE_REQ_INIT(&lle->base);
 	callout_init(&lle->base.lle_timer, 1);
 
 	return (&lle->base);
 }
 
 #define IN_ARE_MASKED_ADDR_EQUAL(d, a, m)	(		\
 	((((d).s_addr ^ (a).s_addr) & (m).s_addr)) == 0 )
 
 static int
 in_lltable_match_prefix(const struct sockaddr *saddr,
     const struct sockaddr *smask, u_int flags, struct llentry *lle)
 {
 	struct in_addr addr, mask, lle_addr;
 
 	addr = ((const struct sockaddr_in *)saddr)->sin_addr;
 	mask = ((const struct sockaddr_in *)smask)->sin_addr;
 	lle_addr.s_addr = ntohl(lle->r_l3addr.addr4.s_addr);
 
 	if (IN_ARE_MASKED_ADDR_EQUAL(lle_addr, addr, mask) == 0)
 		return (0);
 
 	if (lle->la_flags & LLE_IFADDR) {
 
 		/*
 		 * Delete LLE_IFADDR records IFF address & flag matches.
 		 * Note that addr is the interface address within prefix
 		 * being matched.
 		 * Note also we should handle 'ifdown' cases without removing
 		 * ifaddr macs.
 		 */
 		if (addr.s_addr == lle_addr.s_addr && (flags & LLE_STATIC) != 0)
 			return (1);
 		return (0);
 	}
 
 	/* flags & LLE_STATIC means deleting both dynamic and static entries */
 	if ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))
 		return (1);
 
 	return (0);
 }
 
 static void
 in_lltable_free_entry(struct lltable *llt, struct llentry *lle)
 {
 	struct ifnet *ifp;
 	size_t pkts_dropped;
 
 	LLE_WLOCK_ASSERT(lle);
 	KASSERT(llt != NULL, ("lltable is NULL"));
 
 	/* Unlink entry from table if not already */
 	if ((lle->la_flags & LLE_LINKED) != 0) {
 		ifp = llt->llt_ifp;
 		IF_AFDATA_WLOCK_ASSERT(ifp);
 		lltable_unlink_entry(llt, lle);
 	}
 
 	/* cancel timer */
-	if (callout_stop(&lle->lle_timer) & CALLOUT_RET_CANCELLED)
+	if (callout_stop(&lle->lle_timer).bit.cancelled)
 		LLE_REMREF(lle);
 
 	/* Drop hold queue */
 	pkts_dropped = llentry_free(lle);
 	ARPSTAT_ADD(dropped, pkts_dropped);
 }
 
 static int
 in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr)
 {
 	struct rt_addrinfo info;
 	struct sockaddr_in rt_key, rt_mask;
 	struct sockaddr rt_gateway;
 	int rt_flags;
 
 	KASSERT(l3addr->sa_family == AF_INET,
 	    ("sin_family %d", l3addr->sa_family));
 
 	bzero(&rt_key, sizeof(rt_key));
 	rt_key.sin_len = sizeof(rt_key);
 	bzero(&rt_mask, sizeof(rt_mask));
 	rt_mask.sin_len = sizeof(rt_mask);
 	bzero(&rt_gateway, sizeof(rt_gateway));
 	rt_gateway.sa_len = sizeof(rt_gateway);
 
 	bzero(&info, sizeof(info));
 	info.rti_info[RTAX_DST] = (struct sockaddr *)&rt_key;
 	info.rti_info[RTAX_NETMASK] = (struct sockaddr *)&rt_mask;
 	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway;
 
 	if (rib_lookup_info(ifp->if_fib, l3addr, NHR_REF, 0, &info) != 0)
 		return (EINVAL);
 
 	rt_flags = info.rti_flags;
 
 	/*
 	 * If the gateway for an existing host route matches the target L3
 	 * address, which is a special route inserted by some implementation
 	 * such as MANET, and the interface is of the correct type, then
 	 * allow for ARP to proceed.
 	 */
 	if (rt_flags & RTF_GATEWAY) {
 		if (!(rt_flags & RTF_HOST) || !info.rti_ifp ||
 		    info.rti_ifp->if_type != IFT_ETHER ||
 		    (info.rti_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 ||
 		    memcmp(rt_gateway.sa_data, l3addr->sa_data,
 		    sizeof(in_addr_t)) != 0) {
 			rib_free_info(&info);
 			return (EINVAL);
 		}
 	}
 	rib_free_info(&info);
 
 	/*
 	 * Make sure that at least the destination address is covered
 	 * by the route. This is for handling the case where 2 or more
 	 * interfaces have the same prefix. An incoming packet arrives
 	 * on one interface and the corresponding outgoing packet leaves
 	 * another interface.
 	 */
 	if (!(rt_flags & RTF_HOST) && info.rti_ifp != ifp) {
 		const char *sa, *mask, *addr, *lim;
 		int len;
 
 		mask = (const char *)&rt_mask;
 		/*
 		 * Just being extra cautious to avoid some custom
 		 * code getting into trouble.
 		 */
 		if ((info.rti_addrs & RTA_NETMASK) == 0)
 			return (EINVAL);
 
 		sa = (const char *)&rt_key;
 		addr = (const char *)l3addr;
 		len = ((const struct sockaddr_in *)l3addr)->sin_len;
 		lim = addr + len;
 
 		for ( ; addr < lim; sa++, mask++, addr++) {
 			if ((*sa ^ *addr) & *mask) {
 #ifdef DIAGNOSTIC
 				log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n",
 				    inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr));
 #endif
 				return (EINVAL);
 			}
 		}
 	}
 
 	return (0);
 }
 
 static inline uint32_t
 in_lltable_hash_dst(const struct in_addr dst, uint32_t hsize)
 {
 
 	return (IN_LLTBL_HASH(dst.s_addr, hsize));
 }
 
 static uint32_t
 in_lltable_hash(const struct llentry *lle, uint32_t hsize)
 {
 
 	return (in_lltable_hash_dst(lle->r_l3addr.addr4, hsize));
 }
 
 static void
 in_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
 {
 	struct sockaddr_in *sin;
 
 	sin = (struct sockaddr_in *)sa;
 	bzero(sin, sizeof(*sin));
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = lle->r_l3addr.addr4;
 }
 
 static inline struct llentry *
 in_lltable_find_dst(struct lltable *llt, struct in_addr dst)
 {
 	struct llentry *lle;
 	struct llentries *lleh;
 	u_int hashidx;
 
 	hashidx = in_lltable_hash_dst(dst, llt->llt_hsize);
 	lleh = &llt->lle_head[hashidx];
 	LIST_FOREACH(lle, lleh, lle_next) {
 		if (lle->la_flags & LLE_DELETED)
 			continue;
 		if (lle->r_l3addr.addr4.s_addr == dst.s_addr)
 			break;
 	}
 
 	return (lle);
 }
 
 static void
 in_lltable_delete_entry(struct lltable *llt, struct llentry *lle)
 {
 
 	lle->la_flags |= LLE_DELETED;
 	EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
 #ifdef DIAGNOSTIC
 	log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
 #endif
 	llentry_free(lle);
 }
 
 static struct llentry *
 in_lltable_alloc(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
 {
 	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
 	struct ifnet *ifp = llt->llt_ifp;
 	struct llentry *lle;
 	char linkhdr[LLE_MAX_LINKHDR];
 	size_t linkhdrsize;
 	int lladdr_off;
 
 	KASSERT(l3addr->sa_family == AF_INET,
 	    ("sin_family %d", l3addr->sa_family));
 
 	/*
 	 * A route that covers the given address must have
 	 * been installed 1st because we are doing a resolution,
 	 * verify this.
 	 */
 	if (!(flags & LLE_IFADDR) &&
 	    in_lltable_rtcheck(ifp, flags, l3addr) != 0)
 		return (NULL);
 
 	lle = in_lltable_new(sin->sin_addr, flags);
 	if (lle == NULL) {
 		log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
 		return (NULL);
 	}
 	lle->la_flags = flags;
 	if (flags & LLE_STATIC)
 		lle->r_flags |= RLLE_VALID;
 	if ((flags & LLE_IFADDR) == LLE_IFADDR) {
 		linkhdrsize = LLE_MAX_LINKHDR;
 		if (lltable_calc_llheader(ifp, AF_INET, IF_LLADDR(ifp),
 		    linkhdr, &linkhdrsize, &lladdr_off) != 0) {
 			in_lltable_destroy_lle_unlocked(lle);
 			return (NULL);
 		}
 		lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
 		    lladdr_off);
 		lle->la_flags |= LLE_STATIC;
 		lle->r_flags |= (RLLE_VALID | RLLE_IFADDR);
 	}
 
 	return (lle);
 }
 
 /*
  * Return NULL if not found or marked for deletion.
  * If found return lle read locked.
  */
 static struct llentry *
 in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
 {
 	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
 	struct llentry *lle;
 
 	IF_AFDATA_LOCK_ASSERT(llt->llt_ifp);
 	KASSERT(l3addr->sa_family == AF_INET,
 	    ("sin_family %d", l3addr->sa_family));
 	lle = in_lltable_find_dst(llt, sin->sin_addr);
 
 	if (lle == NULL)
 		return (NULL);
 
 	KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) !=
 	    (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X",
 	    flags));
 
 	if (flags & LLE_UNLOCKED)
 		return (lle);
 
 	if (flags & LLE_EXCLUSIVE)
 		LLE_WLOCK(lle);
 	else
 		LLE_RLOCK(lle);
 
 	return (lle);
 }
 
 static int
 in_lltable_dump_entry(struct lltable *llt, struct llentry *lle,
     struct sysctl_req *wr)
 {
 	struct ifnet *ifp = llt->llt_ifp;
 	/* XXX stack use */
 	struct {
 		struct rt_msghdr	rtm;
 		struct sockaddr_in	sin;
 		struct sockaddr_dl	sdl;
 	} arpc;
 	struct sockaddr_dl *sdl;
 	int error;
 
 	bzero(&arpc, sizeof(arpc));
 			/* skip deleted entries */
 			if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
 				return (0);
 			/* Skip if jailed and not a valid IP of the prison. */
 			lltable_fill_sa_entry(lle,(struct sockaddr *)&arpc.sin);
 			if (prison_if(wr->td->td_ucred,
 			    (struct sockaddr *)&arpc.sin) != 0)
 				return (0);
 			/*
 			 * produce a msg made of:
 			 *  struct rt_msghdr;
 			 *  struct sockaddr_in; (IPv4)
 			 *  struct sockaddr_dl;
 			 */
 			arpc.rtm.rtm_msglen = sizeof(arpc);
 			arpc.rtm.rtm_version = RTM_VERSION;
 			arpc.rtm.rtm_type = RTM_GET;
 			arpc.rtm.rtm_flags = RTF_UP;
 			arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
 
 			/* publish */
 			if (lle->la_flags & LLE_PUB)
 				arpc.rtm.rtm_flags |= RTF_ANNOUNCE;
 
 			sdl = &arpc.sdl;
 			sdl->sdl_family = AF_LINK;
 			sdl->sdl_len = sizeof(*sdl);
 			sdl->sdl_index = ifp->if_index;
 			sdl->sdl_type = ifp->if_type;
 			if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
 				sdl->sdl_alen = ifp->if_addrlen;
 				bcopy(lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
 			} else {
 				sdl->sdl_alen = 0;
 				bzero(LLADDR(sdl), ifp->if_addrlen);
 			}
 
 			arpc.rtm.rtm_rmx.rmx_expire =
 			    lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
 			arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
 			if (lle->la_flags & LLE_STATIC)
 				arpc.rtm.rtm_flags |= RTF_STATIC;
 			if (lle->la_flags & LLE_IFADDR)
 				arpc.rtm.rtm_flags |= RTF_PINNED;
 			arpc.rtm.rtm_index = ifp->if_index;
 			error = SYSCTL_OUT(wr, &arpc, sizeof(arpc));
 
 	return (error);
 }
 
 static struct lltable *
 in_lltattach(struct ifnet *ifp)
 {
 	struct lltable *llt;
 
 	llt = lltable_allocate_htbl(IN_LLTBL_DEFAULT_HSIZE);
  	llt->llt_af = AF_INET;
  	llt->llt_ifp = ifp;
 
 	llt->llt_lookup = in_lltable_lookup;
 	llt->llt_alloc_entry = in_lltable_alloc;
 	llt->llt_delete_entry = in_lltable_delete_entry;
 	llt->llt_dump_entry = in_lltable_dump_entry;
 	llt->llt_hash = in_lltable_hash;
 	llt->llt_fill_sa_entry = in_lltable_fill_sa_entry;
 	llt->llt_free_entry = in_lltable_free_entry;
 	llt->llt_match_prefix = in_lltable_match_prefix;
  	lltable_link(llt);
 
 	return (llt);
 }
 
 void *
 in_domifattach(struct ifnet *ifp)
 {
 	struct in_ifinfo *ii;
 
 	ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO);
 
 	ii->ii_llt = in_lltattach(ifp);
 	ii->ii_igmp = igmp_domifattach(ifp);
 
 	return (ii);
 }
 
 void
 in_domifdetach(struct ifnet *ifp, void *aux)
 {
 	struct in_ifinfo *ii = (struct in_ifinfo *)aux;
 
 	igmp_domifdetach(ifp);
 	lltable_free(ii->ii_llt);
 	free(ii, M_IFADDR);
 }
Index: projects/hps_head/sys/netinet/tcp_timer.c
===================================================================
--- projects/hps_head/sys/netinet/tcp_timer.c	(revision 309217)
+++ projects/hps_head/sys/netinet/tcp_timer.c	(revision 309218)
@@ -1,1037 +1,1037 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_tcpdebug.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 #include <net/netisr.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_rss.h>
 #include <netinet/in_systm.h>
 #ifdef INET6
 #include <netinet6/in6_pcb.h>
 #endif
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/cc/cc.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
 #include <netinet/tcpip.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 
 int    tcp_persmin;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmin, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_persmin, 0, sysctl_msec_to_ticks, "I", "minimum persistence interval");
 
 int    tcp_persmax;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmax, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_persmax, 0, sysctl_msec_to_ticks, "I", "maximum persistence interval");
 
 int	tcp_keepinit;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
 
 int	tcp_keepidle;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
 
 int	tcp_keepintvl;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
 
 int	tcp_delacktime;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
     "Time before a delayed ACK is sent");
 
 int	tcp_msl;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
 
 int	tcp_rexmit_min;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
     "Minimum Retransmission Timeout");
 
 int	tcp_rexmit_slop;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
     "Retransmission Timer Slop");
 
 static int	always_keepalive = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
     &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
 
 int    tcp_fast_finwait2_recycle = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 
     &tcp_fast_finwait2_recycle, 0,
     "Recycle closed FIN_WAIT_2 connections faster");
 
 int    tcp_finwait2_timeout;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
 
 int	tcp_keepcnt = TCPTV_KEEPCNT;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
     "Number of keepalive probes to send");
 
 	/* max idle probes */
 int	tcp_maxpersistidle;
 
 static int	tcp_rexmit_drop_options = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
     &tcp_rexmit_drop_options, 0,
     "Drop TCP options from 3rd and later retransmitted SYN");
 
 static VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
 #define	V_tcp_pmtud_blackhole_detect	VNET(tcp_pmtud_blackhole_detect)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
     CTLFLAG_RW|CTLFLAG_VNET,
     &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
     "Path MTU Discovery Black Hole Detection Enabled");
 
 static VNET_DEFINE(int, tcp_pmtud_blackhole_activated);
 #define	V_tcp_pmtud_blackhole_activated \
     VNET(tcp_pmtud_blackhole_activated)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated,
     CTLFLAG_RD|CTLFLAG_VNET,
     &VNET_NAME(tcp_pmtud_blackhole_activated), 0,
     "Path MTU Discovery Black Hole Detection, Activation Count");
 
 static VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss);
 #define	V_tcp_pmtud_blackhole_activated_min_mss \
     VNET(tcp_pmtud_blackhole_activated_min_mss)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss,
     CTLFLAG_RD|CTLFLAG_VNET,
     &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0,
     "Path MTU Discovery Black Hole Detection, Activation Count at min MSS");
 
 static VNET_DEFINE(int, tcp_pmtud_blackhole_failed);
 #define	V_tcp_pmtud_blackhole_failed	VNET(tcp_pmtud_blackhole_failed)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed,
     CTLFLAG_RD|CTLFLAG_VNET,
     &VNET_NAME(tcp_pmtud_blackhole_failed), 0,
     "Path MTU Discovery Black Hole Detection, Failure Count");
 
 #ifdef INET
 static VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
 #define	V_tcp_pmtud_blackhole_mss	VNET(tcp_pmtud_blackhole_mss)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
     CTLFLAG_RW|CTLFLAG_VNET,
     &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
     "Path MTU Discovery Black Hole Detection lowered MSS");
 #endif
 
 #ifdef INET6
 static VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
 #define	V_tcp_v6pmtud_blackhole_mss	VNET(tcp_v6pmtud_blackhole_mss)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
     CTLFLAG_RW|CTLFLAG_VNET,
     &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
     "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
 #endif
 
 #ifdef	RSS
 static int	per_cpu_timers = 1;
 #else
 static int	per_cpu_timers = 0;
 #endif
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
     &per_cpu_timers , 0, "run tcp timers on all cpus");
 
 #if 0
 #define	INP_CPU(inp)	(per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
 		((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
 #endif
 
 /*
  * Map the given inp to a CPU id.
  *
  * This queries RSS if it's compiled in, else it defaults to the current
  * CPU ID.
  */
 static inline int
 inp_to_cpuid(struct inpcb *inp)
 {
 	u_int cpuid;
 
 #ifdef	RSS
 	if (per_cpu_timers) {
 		cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
 		if (cpuid == NETISR_CPUID_NONE)
 			return (curcpu);	/* XXX */
 		else
 			return (cpuid);
 	}
 #else
 	/* Legacy, pre-RSS behaviour */
 	if (per_cpu_timers) {
 		/*
 		 * We don't have a flowid -> cpuid mapping, so cheat and
 		 * just map unknown cpuids to curcpu.  Not the best, but
 		 * apparently better than defaulting to swi 0.
 		 */
 		cpuid = inp->inp_flowid % (mp_maxid + 1);
 		if (! CPU_ABSENT(cpuid))
 			return (cpuid);
 		return (curcpu);
 	}
 #endif
 	/* Default for RSS and non-RSS - cpuid 0 */
 	else {
 		return (0);
 	}
 }
 
 /*
  * Tcp protocol timeout routine called every 500 ms.
  * Updates timestamps used for TCP
  * causes finite state machine actions if timers expire.
  */
 void
 tcp_slowtimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		(void) tcp_tw_2msl_scan(0);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 int	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
     { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
 
 int	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
     { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
 
 static int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
 
 /*
  * TCP timer processing.
  */
 
 void
 tcp_timer_delack(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct inpcb *inp;
 	CURVNET_SET(tp->t_vnet);
 
 	inp = tp->t_inpcb;
 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
 	INP_WLOCK(inp);
 	if (callout_pending(&tp->t_timers->tt_delack) ||
 	    !callout_active(&tp->t_timers->tt_delack)) {
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_delack);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 		return;
 	}
 	tp->t_flags |= TF_ACKNOW;
 	TCPSTAT_INC(tcps_delack);
 	(void) tp->t_fb->tfb_tcp_output(tp);
 	INP_WUNLOCK(inp);
 	CURVNET_RESTORE();
 }
 
 /*
  * When a timer wants to remove a TCB it must
  * hold the INP_INFO_RLOCK(). The timer function
  * should only have grabbed the INP_WLOCK() when
  * it entered. To safely switch to holding both the
  * INP_INFO_RLOCK() and the INP_WLOCK() we must first
  * grab a reference on the inp, which will hold the inp
  * so that it can't be removed. We then unlock the INP_WLOCK(), 
  * and grab the INP_INFO_RLOCK() lock. Once we have the INP_INFO_RLOCK()
  * we proceed again to get the INP_WLOCK() (this preserves proper
  * lock order). After acquiring the INP_WLOCK we must check if someone 
  * else deleted the pcb i.e. the inp_flags check.
  * If so we return 1 otherwise we return 0.
  *
  * No matter what the tcp_inpinfo_lock_add() function
  * returns the caller must afterwards call tcp_inpinfo_lock_del()
  * to drop the locks and reference properly.
  */
 
 int
 tcp_inpinfo_lock_add(struct inpcb *inp)
 {
 	in_pcbref(inp);
 	INP_WUNLOCK(inp);
 	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		return(1);
 	}
 	return(0);
 
 }
 
 void
 tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp)
 {
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	if (inp && (tp == NULL)) {
 		/*
 		 * If tcp_close/drop() gets called and tp
 		 * returns NULL, then the function dropped
 		 * the inp lock, we hold a reference keeping
 		 * this around, so we must re-aquire the 
 		 * INP_WLOCK() in order to proceed with
 		 * our dropping the inp reference.
 		 */
 		INP_WLOCK(inp);
 	}
 	if (inp && in_pcbrele_wlocked(inp) == 0)
 		INP_WUNLOCK(inp);
 }
 
 void
 tcp_timer_2msl(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct inpcb *inp;
 	CURVNET_SET(tp->t_vnet);
 #ifdef TCPDEBUG
 	int ostate;
 
 	ostate = tp->t_state;
 #endif
 	inp = tp->t_inpcb;
 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
 	INP_WLOCK(inp);
 	tcp_free_sackholes(tp);
 	if (callout_pending(&tp->t_timers->tt_2msl) ||
 	    !callout_active(&tp->t_timers->tt_2msl)) {
 		INP_WUNLOCK(tp->t_inpcb);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_2msl);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 		return;
 	}
 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
 	/*
 	 * 2 MSL timeout in shutdown went off.  If we're closed but
 	 * still waiting for peer to close and connection has been idle
 	 * too long delete connection control block.  Otherwise, check
 	 * again in a bit.
 	 *
 	 * If in TIME_WAIT state just ignore as this timeout is handled in
 	 * tcp_tw_2msl_scan().
 	 *
 	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 
 	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 
 	 * Ignore fact that there were recent incoming segments.
 	 */
 	if ((inp->inp_flags & INP_TIMEWAIT) != 0) {
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 		return;
 	}
 	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
 	    tp->t_inpcb && tp->t_inpcb->inp_socket && 
 	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
 		TCPSTAT_INC(tcps_finwait2_drops);
 		if (tcp_inpinfo_lock_add(inp)) {
 			tcp_inpinfo_lock_del(inp, tp);
 			goto out;
 		}
 		tp = tcp_close(tp);             
 		tcp_inpinfo_lock_del(inp, tp);
 		goto out;
 	} else {
 		if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
 			callout_reset(&tp->t_timers->tt_2msl,
 				      TP_KEEPINTVL(tp), tcp_timer_2msl, tp);
 		} else {
 			if (tcp_inpinfo_lock_add(inp)) {
 				tcp_inpinfo_lock_del(inp, tp);
 				goto out;
 			}
 			tp = tcp_close(tp);
 			tcp_inpinfo_lock_del(inp, tp);
 			goto out;
 		}
        }
 
 #ifdef TCPDEBUG
 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
 
 	if (tp != NULL)
 		INP_WUNLOCK(inp);
 out:
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_keep(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct tcptemp *t_template;
 	struct inpcb *inp;
 	CURVNET_SET(tp->t_vnet);
 #ifdef TCPDEBUG
 	int ostate;
 
 	ostate = tp->t_state;
 #endif
 	inp = tp->t_inpcb;
 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
 	INP_WLOCK(inp);
 	if (callout_pending(&tp->t_timers->tt_keep) ||
 	    !callout_active(&tp->t_timers->tt_keep)) {
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_keep);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 		return;
 	}
 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
 
 	/*
 	 * Because we don't regularly reset the keepalive callout in
 	 * the ESTABLISHED state, it may be that we don't actually need
 	 * to send a keepalive yet. If that occurs, schedule another
 	 * call for the next time the keepalive timer might expire.
 	 */
 	if (TCPS_HAVEESTABLISHED(tp->t_state)) {
 		u_int idletime;
 
 		idletime = ticks - tp->t_rcvtime;
 		if (idletime < TP_KEEPIDLE(tp)) {
 			callout_reset(&tp->t_timers->tt_keep,
 			    TP_KEEPIDLE(tp) - idletime, tcp_timer_keep, tp);
 			INP_WUNLOCK(inp);
 			CURVNET_RESTORE();
 			return;
 		}
 	}
 
 	/*
 	 * Keep-alive timer went off; send something
 	 * or drop connection if idle for too long.
 	 */
 	TCPSTAT_INC(tcps_keeptimeo);
 	if (tp->t_state < TCPS_ESTABLISHED)
 		goto dropit;
 	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
 	    tp->t_state <= TCPS_CLOSING) {
 		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
 			goto dropit;
 		/*
 		 * Send a packet designed to force a response
 		 * if the peer is up and reachable:
 		 * either an ACK if the connection is still alive,
 		 * or an RST if the peer has closed the connection
 		 * due to timeout or reboot.
 		 * Using sequence number tp->snd_una-1
 		 * causes the transmitted zero-length segment
 		 * to lie outside the receive window;
 		 * by the protocol spec, this requires the
 		 * correspondent TCP to respond.
 		 */
 		TCPSTAT_INC(tcps_keepprobe);
 		t_template = tcpip_maketemplate(inp);
 		if (t_template) {
 			tcp_respond(tp, t_template->tt_ipgen,
 				    &t_template->tt_t, (struct mbuf *)NULL,
 				    tp->rcv_nxt, tp->snd_una - 1, 0);
 			free(t_template, M_TEMP);
 		}
 		callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
 			      tcp_timer_keep, tp);
 	} else
 		callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
 			      tcp_timer_keep, tp);
 
 #ifdef TCPDEBUG
 	if (inp->inp_socket->so_options & SO_DEBUG)
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
 	INP_WUNLOCK(inp);
 	CURVNET_RESTORE();
 	return;
 
 dropit:
 	TCPSTAT_INC(tcps_keepdrops);
 
 	if (tcp_inpinfo_lock_add(inp)) {
 		tcp_inpinfo_lock_del(inp, tp);
 		goto out;
 	}
 	tp = tcp_drop(tp, ETIMEDOUT);
 
 #ifdef TCPDEBUG
 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
 	tcp_inpinfo_lock_del(inp, tp);
 out:
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_persist(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct inpcb *inp;
 	CURVNET_SET(tp->t_vnet);
 #ifdef TCPDEBUG
 	int ostate;
 
 	ostate = tp->t_state;
 #endif
 	inp = tp->t_inpcb;
 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
 	INP_WLOCK(inp);
 	if (callout_pending(&tp->t_timers->tt_persist) ||
 	    !callout_active(&tp->t_timers->tt_persist)) {
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_persist);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 		return;
 	}
 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
 	/*
 	 * Persistence timer into zero window.
 	 * Force a byte to be output, if possible.
 	 */
 	TCPSTAT_INC(tcps_persisttimeo);
 	/*
 	 * Hack: if the peer is dead/unreachable, we do not
 	 * time out if the window is closed.  After a full
 	 * backoff, drop the connection if the idle time
 	 * (no responses to probes) reaches the maximum
 	 * backoff that we would use if retransmitting.
 	 */
 	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
 	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
 	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
 		TCPSTAT_INC(tcps_persistdrop);
 		if (tcp_inpinfo_lock_add(inp)) {
 			tcp_inpinfo_lock_del(inp, tp);
 			goto out;
 		}
 		tp = tcp_drop(tp, ETIMEDOUT);
 		tcp_inpinfo_lock_del(inp, tp);
 		goto out;
 	}
 	/*
 	 * If the user has closed the socket then drop a persisting
 	 * connection after a much reduced timeout.
 	 */
 	if (tp->t_state > TCPS_CLOSE_WAIT &&
 	    (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
 		TCPSTAT_INC(tcps_persistdrop);
 		if (tcp_inpinfo_lock_add(inp)) {
 			tcp_inpinfo_lock_del(inp, tp);
 			goto out;
 		}
 		tp = tcp_drop(tp, ETIMEDOUT);
 		tcp_inpinfo_lock_del(inp, tp);
 		goto out;
 	}
 	tcp_setpersist(tp);
 	tp->t_flags |= TF_FORCEDATA;
 	(void) tp->t_fb->tfb_tcp_output(tp);
 	tp->t_flags &= ~TF_FORCEDATA;
 
 #ifdef TCPDEBUG
 	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
 		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
 #endif
 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
 	INP_WUNLOCK(inp);
 out:
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_rexmt(void * xtp)
 {
 	struct tcpcb *tp = xtp;
 	CURVNET_SET(tp->t_vnet);
 	int rexmt;
 	struct inpcb *inp;
 #ifdef TCPDEBUG
 	int ostate;
 
 	ostate = tp->t_state;
 #endif
 	inp = tp->t_inpcb;
 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
 	INP_WLOCK(inp);
 	if (callout_pending(&tp->t_timers->tt_rexmt) ||
 	    !callout_active(&tp->t_timers->tt_rexmt)) {
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_rexmt);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 		return;
 	}
 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
 	tcp_free_sackholes(tp);
 	if (tp->t_fb->tfb_tcp_rexmit_tmr) {
 		/* The stack has a timer action too. */
 		(*tp->t_fb->tfb_tcp_rexmit_tmr)(tp);
 	}
 	/*
 	 * Retransmission timer went off.  Message has not
 	 * been acked within retransmit interval.  Back off
 	 * to a longer retransmit interval and retransmit one segment.
 	 */
 	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
 		tp->t_rxtshift = TCP_MAXRXTSHIFT;
 		TCPSTAT_INC(tcps_timeoutdrop);
 		if (tcp_inpinfo_lock_add(inp)) {
 			tcp_inpinfo_lock_del(inp, tp);
 			goto out;
 		}
 		tp = tcp_drop(tp, tp->t_softerror ?
 			      tp->t_softerror : ETIMEDOUT);
 		tcp_inpinfo_lock_del(inp, tp);
 		goto out;
 	}
 	if (tp->t_state == TCPS_SYN_SENT) {
 		/*
 		 * If the SYN was retransmitted, indicate CWND to be
 		 * limited to 1 segment in cc_conn_init().
 		 */
 		tp->snd_cwnd = 1;
 	} else if (tp->t_rxtshift == 1) {
 		/*
 		 * first retransmit; record ssthresh and cwnd so they can
 		 * be recovered if this turns out to be a "bad" retransmit.
 		 * A retransmit is considered "bad" if an ACK for this
 		 * segment is received within RTT/2 interval; the assumption
 		 * here is that the ACK was already in flight.  See
 		 * "On Estimating End-to-End Network Path Properties" by
 		 * Allman and Paxson for more details.
 		 */
 		tp->snd_cwnd_prev = tp->snd_cwnd;
 		tp->snd_ssthresh_prev = tp->snd_ssthresh;
 		tp->snd_recover_prev = tp->snd_recover;
 		if (IN_FASTRECOVERY(tp->t_flags))
 			tp->t_flags |= TF_WASFRECOVERY;
 		else
 			tp->t_flags &= ~TF_WASFRECOVERY;
 		if (IN_CONGRECOVERY(tp->t_flags))
 			tp->t_flags |= TF_WASCRECOVERY;
 		else
 			tp->t_flags &= ~TF_WASCRECOVERY;
 		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
 		tp->t_flags |= TF_PREVVALID;
 	} else
 		tp->t_flags &= ~TF_PREVVALID;
 	TCPSTAT_INC(tcps_rexmttimeo);
 	if ((tp->t_state == TCPS_SYN_SENT) ||
 	    (tp->t_state == TCPS_SYN_RECEIVED))
 		rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
 	else
 		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
 	TCPT_RANGESET(tp->t_rxtcur, rexmt,
 		      tp->t_rttmin, TCPTV_REXMTMAX);
 
 	/*
 	 * We enter the path for PLMTUD if connection is established or, if
 	 * connection is FIN_WAIT_1 status, reason for the last is that if
 	 * amount of data we send is very small, we could send it in couple of
 	 * packets and process straight to FIN. In that case we won't catch
 	 * ESTABLISHED state.
 	 */
 	if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
 	    || (tp->t_state == TCPS_FIN_WAIT_1))) {
 #ifdef INET6
 		int isipv6;
 #endif
 
 		/*
 		 * Idea here is that at each stage of mtu probe (usually, 1448
 		 * -> 1188 -> 524) should be given 2 chances to recover before
 		 *  further clamping down. 'tp->t_rxtshift % 2 == 0' should
 		 *  take care of that.
 		 */
 		if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
 		    (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
 		    (tp->t_rxtshift >= 2 && tp->t_rxtshift % 2 == 0)) {
 			/*
 			 * Enter Path MTU Black-hole Detection mechanism:
 			 * - Disable Path MTU Discovery (IP "DF" bit).
 			 * - Reduce MTU to lower value than what we
 			 *   negotiated with peer.
 			 */
 			/* Record that we may have found a black hole. */
 			tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
 
 			/* Keep track of previous MSS. */
 			tp->t_pmtud_saved_maxseg = tp->t_maxseg;
 
 			/* 
 			 * Reduce the MSS to blackhole value or to the default
 			 * in an attempt to retransmit.
 			 */
 #ifdef INET6
 			isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
 			if (isipv6 &&
 			    tp->t_maxseg > V_tcp_v6pmtud_blackhole_mss) {
 				/* Use the sysctl tuneable blackhole MSS. */
 				tp->t_maxseg = V_tcp_v6pmtud_blackhole_mss;
 				V_tcp_pmtud_blackhole_activated++;
 			} else if (isipv6) {
 				/* Use the default MSS. */
 				tp->t_maxseg = V_tcp_v6mssdflt;
 				/*
 				 * Disable Path MTU Discovery when we switch to
 				 * minmss.
 				 */
 				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 				V_tcp_pmtud_blackhole_activated_min_mss++;
 			}
 #endif
 #if defined(INET6) && defined(INET)
 			else
 #endif
 #ifdef INET
 			if (tp->t_maxseg > V_tcp_pmtud_blackhole_mss) {
 				/* Use the sysctl tuneable blackhole MSS. */
 				tp->t_maxseg = V_tcp_pmtud_blackhole_mss;
 				V_tcp_pmtud_blackhole_activated++;
 			} else {
 				/* Use the default MSS. */
 				tp->t_maxseg = V_tcp_mssdflt;
 				/*
 				 * Disable Path MTU Discovery when we switch to
 				 * minmss.
 				 */
 				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 				V_tcp_pmtud_blackhole_activated_min_mss++;
 			}
 #endif
 			/*
 			 * Reset the slow-start flight size
 			 * as it may depend on the new MSS.
 			 */
 			if (CC_ALGO(tp)->conn_init != NULL)
 				CC_ALGO(tp)->conn_init(tp->ccv);
 		} else {
 			/*
 			 * If further retransmissions are still unsuccessful
 			 * with a lowered MTU, maybe this isn't a blackhole and
 			 * we restore the previous MSS and blackhole detection
 			 * flags.
 			 * The limit '6' is determined by giving each probe
 			 * stage (1448, 1188, 524) 2 chances to recover.
 			 */
 			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
 			    (tp->t_rxtshift > 6)) {
 				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
 				tp->t_maxseg = tp->t_pmtud_saved_maxseg;
 				V_tcp_pmtud_blackhole_failed++;
 				/*
 				 * Reset the slow-start flight size as it
 				 * may depend on the new MSS.
 				 */
 				if (CC_ALGO(tp)->conn_init != NULL)
 					CC_ALGO(tp)->conn_init(tp->ccv);
 			}
 		}
 	}
 
 	/*
 	 * Disable RFC1323 and SACK if we haven't got any response to
 	 * our third SYN to work-around some broken terminal servers
 	 * (most of which have hopefully been retired) that have bad VJ
 	 * header compression code which trashes TCP segments containing
 	 * unknown-to-them TCP options.
 	 */
 	if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
 	    (tp->t_rxtshift == 3))
 		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
 	/*
 	 * If we backed off this far, our srtt estimate is probably bogus.
 	 * Clobber it so we'll take the next rtt measurement as our srtt;
 	 * move the current srtt into rttvar to keep the current
 	 * retransmit times until then.
 	 */
 	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
 #ifdef INET6
 		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
 			in6_losing(tp->t_inpcb);
 		else
 #endif
 			in_losing(tp->t_inpcb);
 		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
 		tp->t_srtt = 0;
 	}
 	tp->snd_nxt = tp->snd_una;
 	tp->snd_recover = tp->snd_max;
 	/*
 	 * Force a segment to be sent.
 	 */
 	tp->t_flags |= TF_ACKNOW;
 	/*
 	 * If timing a segment in this window, stop the timer.
 	 */
 	tp->t_rtttime = 0;
 
 	cc_cong_signal(tp, NULL, CC_RTO);
 
 	(void) tp->t_fb->tfb_tcp_output(tp);
 
 #ifdef TCPDEBUG
 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
 	INP_WUNLOCK(inp);
 out:
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
 {
 	struct callout *t_callout;
 	timeout_t *f_callout;
 	struct inpcb *inp = tp->t_inpcb;
 	int cpu = inp_to_cpuid(inp);
 
 #ifdef TCP_OFFLOAD
 	if (tp->t_flags & TF_TOE)
 		return;
 #endif
 
 	if (tp->t_timers->tt_flags & TT_STOPPED)
 		return;
 
 	switch (timer_type) {
 		case TT_DELACK:
 			t_callout = &tp->t_timers->tt_delack;
 			f_callout = tcp_timer_delack;
 			break;
 		case TT_REXMT:
 			t_callout = &tp->t_timers->tt_rexmt;
 			f_callout = tcp_timer_rexmt;
 			break;
 		case TT_PERSIST:
 			t_callout = &tp->t_timers->tt_persist;
 			f_callout = tcp_timer_persist;
 			break;
 		case TT_KEEP:
 			t_callout = &tp->t_timers->tt_keep;
 			f_callout = tcp_timer_keep;
 			break;
 		case TT_2MSL:
 			t_callout = &tp->t_timers->tt_2msl;
 			f_callout = tcp_timer_2msl;
 			break;
 		default:
 			if (tp->t_fb->tfb_tcp_timer_activate) {
 				tp->t_fb->tfb_tcp_timer_activate(tp, timer_type, delta);
 				return;
 			}
 			panic("tp %p bad timer_type %#x", tp, timer_type);
 		}
 	if (delta == 0) {
 		callout_stop(t_callout);
 	} else {
 		callout_reset_on(t_callout, delta, f_callout, tp, cpu);
 	}
 }
 
 int
 tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
 {
 	struct callout *t_callout;
 
 	switch (timer_type) {
 		case TT_DELACK:
 			t_callout = &tp->t_timers->tt_delack;
 			break;
 		case TT_REXMT:
 			t_callout = &tp->t_timers->tt_rexmt;
 			break;
 		case TT_PERSIST:
 			t_callout = &tp->t_timers->tt_persist;
 			break;
 		case TT_KEEP:
 			t_callout = &tp->t_timers->tt_keep;
 			break;
 		case TT_2MSL:
 			t_callout = &tp->t_timers->tt_2msl;
 			break;
 		default:
 			if (tp->t_fb->tfb_tcp_timer_active) {
 				return(tp->t_fb->tfb_tcp_timer_active(tp, timer_type));
 			}
 			panic("tp %p bad timer_type %#x", tp, timer_type);
 		}
 	return callout_active(t_callout);
 }
 
 void
 tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
 {
 	struct callout *t_callout;
 
 	tp->t_timers->tt_flags |= TT_STOPPED;
 	switch (timer_type) {
 		case TT_DELACK:
 			t_callout = &tp->t_timers->tt_delack;
 			break;
 		case TT_REXMT:
 			t_callout = &tp->t_timers->tt_rexmt;
 			break;
 		case TT_PERSIST:
 			t_callout = &tp->t_timers->tt_persist;
 			break;
 		case TT_KEEP:
 			t_callout = &tp->t_timers->tt_keep;
 			break;
 		case TT_2MSL:
 			t_callout = &tp->t_timers->tt_2msl;
 			break;
 		default:
 			if (tp->t_fb->tfb_tcp_timer_stop) {
 				/* 
 				 * XXXrrs we need to look at this with the
 				 * stop case below (flags).
 				 */
 				tp->t_fb->tfb_tcp_timer_stop(tp, timer_type);
 				return;
 			}
 			panic("tp %p bad timer_type %#x", tp, timer_type);
 		}
 
-	if (callout_async_drain(t_callout, tcp_timer_discard) & CALLOUT_RET_DRAINING) {
+	if (callout_async_drain(t_callout, tcp_timer_discard).bit.draining) {
 		/*
 		 * Can't stop the callout, defer tcpcb actual deletion
 		 * to the last one. We do this using the async drain
 		 * function and incrementing the count in 
 		 */
 		tp->t_timers->tt_draincnt++;
 	}
 }
 
 #define	ticks_to_msecs(t)	(1000*(t) / hz)
 
 void
 tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
     struct xtcp_timer *xtimer)
 {
 	sbintime_t now;
 
 	bzero(xtimer, sizeof(*xtimer));
 	if (timer == NULL)
 		return;
 	now = getsbinuptime();
 	if (callout_active(&timer->tt_delack))
 		xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
 	if (callout_active(&timer->tt_rexmt))
 		xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
 	if (callout_active(&timer->tt_persist))
 		xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
 	if (callout_active(&timer->tt_keep))
 		xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
 	if (callout_active(&timer->tt_2msl))
 		xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
 	xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
 }
Index: projects/hps_head/sys/netinet6/in6.c
===================================================================
--- projects/hps_head/sys/netinet6/in6.c	(revision 309217)
+++ projects/hps_head/sys/netinet6/in6.c	(revision 309218)
@@ -1,2501 +1,2501 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: in6.c,v 1.259 2002/01/21 11:37:50 keiichi Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in.c	8.2 (Berkeley) 11/15/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/errno.h>
 #include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/systm.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/if_dl.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <net/if_llatbl.h>
 #include <netinet/if_ether.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_carp.h>
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/mld6_var.h>
 #include <netinet6/ip6_mroute.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_pcb.h>
 
 VNET_DECLARE(int, icmp6_nodeinfo_oldmcprefix);
 #define V_icmp6_nodeinfo_oldmcprefix	VNET(icmp6_nodeinfo_oldmcprefix)
 
 /*
  * Definitions of some costant IP6 addresses.
  */
 const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
 const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
 const struct in6_addr in6addr_nodelocal_allnodes =
 	IN6ADDR_NODELOCAL_ALLNODES_INIT;
 const struct in6_addr in6addr_linklocal_allnodes =
 	IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 const struct in6_addr in6addr_linklocal_allrouters =
 	IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
 const struct in6_addr in6addr_linklocal_allv2routers =
 	IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT;
 
 const struct in6_addr in6mask0 = IN6MASK0;
 const struct in6_addr in6mask32 = IN6MASK32;
 const struct in6_addr in6mask64 = IN6MASK64;
 const struct in6_addr in6mask96 = IN6MASK96;
 const struct in6_addr in6mask128 = IN6MASK128;
 
 const struct sockaddr_in6 sa6_any =
 	{ sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 };
 
 static int in6_notify_ifa(struct ifnet *, struct in6_ifaddr *,
 	struct in6_aliasreq *, int);
 static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *);
 
 static int in6_validate_ifra(struct ifnet *, struct in6_aliasreq *,
     struct in6_ifaddr *, int);
 static struct in6_ifaddr *in6_alloc_ifa(struct ifnet *,
     struct in6_aliasreq *, int flags);
 static int in6_update_ifa_internal(struct ifnet *, struct in6_aliasreq *,
     struct in6_ifaddr *, int, int);
 static int in6_broadcast_ifa(struct ifnet *, struct in6_aliasreq *,
     struct in6_ifaddr *, int);
 
 #define ifa2ia6(ifa)	((struct in6_ifaddr *)(ifa))
 #define ia62ifa(ia6)	(&((ia6)->ia_ifa))
 
 
 void
 in6_newaddrmsg(struct in6_ifaddr *ia, int cmd)
 {
 	struct sockaddr_dl gateway;
 	struct sockaddr_in6 mask, addr;
 	struct rtentry rt;
 
 	/*
 	 * initialize for rtmsg generation
 	 */
 	bzero(&gateway, sizeof(gateway));
 	gateway.sdl_len = sizeof(gateway);
 	gateway.sdl_family = AF_LINK;
 
 	bzero(&rt, sizeof(rt));
 	rt.rt_gateway = (struct sockaddr *)&gateway;
 	memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
 	memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
 	rt_mask(&rt) = (struct sockaddr *)&mask;
 	rt_key(&rt) = (struct sockaddr *)&addr;
 	rt.rt_flags = RTF_HOST | RTF_STATIC;
 	if (cmd == RTM_ADD)
 		rt.rt_flags |= RTF_UP;
 	/* Announce arrival of local address to all FIBs. */
 	rt_newaddrmsg(cmd, &ia->ia_ifa, 0, &rt);
 }
 
 int
 in6_mask2len(struct in6_addr *mask, u_char *lim0)
 {
 	int x = 0, y;
 	u_char *lim = lim0, *p;
 
 	/* ignore the scope_id part */
 	if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask))
 		lim = (u_char *)mask + sizeof(*mask);
 	for (p = (u_char *)mask; p < lim; x++, p++) {
 		if (*p != 0xff)
 			break;
 	}
 	y = 0;
 	if (p < lim) {
 		for (y = 0; y < 8; y++) {
 			if ((*p & (0x80 >> y)) == 0)
 				break;
 		}
 	}
 
 	/*
 	 * when the limit pointer is given, do a stricter check on the
 	 * remaining bits.
 	 */
 	if (p < lim) {
 		if (y != 0 && (*p & (0x00ff >> y)) != 0)
 			return (-1);
 		for (p = p + 1; p < lim; p++)
 			if (*p != 0)
 				return (-1);
 	}
 
 	return x * 8 + y;
 }
 
 #ifdef COMPAT_FREEBSD32
 struct in6_ndifreq32 {
 	char ifname[IFNAMSIZ];
 	uint32_t ifindex;
 };
 #define	SIOCGDEFIFACE32_IN6	_IOWR('i', 86, struct in6_ndifreq32)
 #endif
 
 int
 in6_control(struct socket *so, u_long cmd, caddr_t data,
     struct ifnet *ifp, struct thread *td)
 {
 	struct	in6_ifreq *ifr = (struct in6_ifreq *)data;
 	struct	in6_ifaddr *ia = NULL;
 	struct	in6_aliasreq *ifra = (struct in6_aliasreq *)data;
 	struct sockaddr_in6 *sa6;
 	int carp_attached = 0;
 	int error;
 	u_long ocmd = cmd;
 
 	/*
 	 * Compat to make pre-10.x ifconfig(8) operable.
 	 */
 	if (cmd == OSIOCAIFADDR_IN6)
 		cmd = SIOCAIFADDR_IN6;
 
 	switch (cmd) {
 	case SIOCGETSGCNT_IN6:
 	case SIOCGETMIFCNT_IN6:
 		/*
 		 * XXX mrt_ioctl has a 3rd, unused, FIB argument in route.c.
 		 * We cannot see how that would be needed, so do not adjust the
 		 * KPI blindly; more likely should clean up the IPv4 variant.
 		 */
 		return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP);
 	}
 
 	switch (cmd) {
 	case SIOCAADDRCTL_POLICY:
 	case SIOCDADDRCTL_POLICY:
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NETINET_ADDRCTRL6);
 			if (error)
 				return (error);
 		}
 		return (in6_src_ioctl(cmd, data));
 	}
 
 	if (ifp == NULL)
 		return (EOPNOTSUPP);
 
 	switch (cmd) {
 	case SIOCSNDFLUSH_IN6:
 	case SIOCSPFXFLUSH_IN6:
 	case SIOCSRTRFLUSH_IN6:
 	case SIOCSDEFIFACE_IN6:
 	case SIOCSIFINFO_FLAGS:
 	case SIOCSIFINFO_IN6:
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NETINET_ND6);
 			if (error)
 				return (error);
 		}
 		/* FALLTHROUGH */
 	case OSIOCGIFINFO_IN6:
 	case SIOCGIFINFO_IN6:
 	case SIOCGNBRINFO_IN6:
 	case SIOCGDEFIFACE_IN6:
 		return (nd6_ioctl(cmd, data, ifp));
 
 #ifdef COMPAT_FREEBSD32
 	case SIOCGDEFIFACE32_IN6:
 		{
 			struct in6_ndifreq ndif;
 			struct in6_ndifreq32 *ndif32;
 
 			error = nd6_ioctl(SIOCGDEFIFACE_IN6, (caddr_t)&ndif,
 			    ifp);
 			if (error)
 				return (error);
 			ndif32 = (struct in6_ndifreq32 *)data;
 			ndif32->ifindex = ndif.ifindex;
 			return (0);
 		}
 #endif
 	}
 
 	switch (cmd) {
 	case SIOCSIFPREFIX_IN6:
 	case SIOCDIFPREFIX_IN6:
 	case SIOCAIFPREFIX_IN6:
 	case SIOCCIFPREFIX_IN6:
 	case SIOCSGIFPREFIX_IN6:
 	case SIOCGIFPREFIX_IN6:
 		log(LOG_NOTICE,
 		    "prefix ioctls are now invalidated. "
 		    "please use ifconfig.\n");
 		return (EOPNOTSUPP);
 	}
 
 	switch (cmd) {
 	case SIOCSSCOPE6:
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NETINET_SCOPE6);
 			if (error)
 				return (error);
 		}
 		/* FALLTHROUGH */
 	case SIOCGSCOPE6:
 	case SIOCGSCOPE6DEF:
 		return (scope6_ioctl(cmd, data, ifp));
 	}
 
 	/*
 	 * Find address for this interface, if it exists.
 	 *
 	 * In netinet code, we have checked ifra_addr in SIOCSIF*ADDR operation
 	 * only, and used the first interface address as the target of other
 	 * operations (without checking ifra_addr).  This was because netinet
 	 * code/API assumed at most 1 interface address per interface.
 	 * Since IPv6 allows a node to assign multiple addresses
 	 * on a single interface, we almost always look and check the
 	 * presence of ifra_addr, and reject invalid ones here.
 	 * It also decreases duplicated code among SIOC*_IN6 operations.
 	 */
 	switch (cmd) {
 	case SIOCAIFADDR_IN6:
 	case SIOCSIFPHYADDR_IN6:
 		sa6 = &ifra->ifra_addr;
 		break;
 	case SIOCSIFADDR_IN6:
 	case SIOCGIFADDR_IN6:
 	case SIOCSIFDSTADDR_IN6:
 	case SIOCSIFNETMASK_IN6:
 	case SIOCGIFDSTADDR_IN6:
 	case SIOCGIFNETMASK_IN6:
 	case SIOCDIFADDR_IN6:
 	case SIOCGIFPSRCADDR_IN6:
 	case SIOCGIFPDSTADDR_IN6:
 	case SIOCGIFAFLAG_IN6:
 	case SIOCSNDFLUSH_IN6:
 	case SIOCSPFXFLUSH_IN6:
 	case SIOCSRTRFLUSH_IN6:
 	case SIOCGIFALIFETIME_IN6:
 	case SIOCGIFSTAT_IN6:
 	case SIOCGIFSTAT_ICMP6:
 		sa6 = &ifr->ifr_addr;
 		break;
 	case SIOCSIFADDR:
 	case SIOCSIFBRDADDR:
 	case SIOCSIFDSTADDR:
 	case SIOCSIFNETMASK:
 		/*
 		 * Although we should pass any non-INET6 ioctl requests
 		 * down to driver, we filter some legacy INET requests.
 		 * Drivers trust SIOCSIFADDR et al to come from an already
 		 * privileged layer, and do not perform any credentials
 		 * checks or input validation.
 		 */
 		return (EINVAL);
 	default:
 		sa6 = NULL;
 		break;
 	}
 	if (sa6 && sa6->sin6_family == AF_INET6) {
 		if (sa6->sin6_scope_id != 0)
 			error = sa6_embedscope(sa6, 0);
 		else
 			error = in6_setscope(&sa6->sin6_addr, ifp, NULL);
 		if (error != 0)
 			return (error);
 		if (td != NULL && (error = prison_check_ip6(td->td_ucred,
 		    &sa6->sin6_addr)) != 0)
 			return (error);
 		ia = in6ifa_ifpwithaddr(ifp, &sa6->sin6_addr);
 	} else
 		ia = NULL;
 
 	switch (cmd) {
 	case SIOCSIFADDR_IN6:
 	case SIOCSIFDSTADDR_IN6:
 	case SIOCSIFNETMASK_IN6:
 		/*
 		 * Since IPv6 allows a node to assign multiple addresses
 		 * on a single interface, SIOCSIFxxx ioctls are deprecated.
 		 */
 		/* we decided to obsolete this command (20000704) */
 		error = EINVAL;
 		goto out;
 
 	case SIOCDIFADDR_IN6:
 		/*
 		 * for IPv4, we look for existing in_ifaddr here to allow
 		 * "ifconfig if0 delete" to remove the first IPv4 address on
 		 * the interface.  For IPv6, as the spec allows multiple
 		 * interface address from the day one, we consider "remove the
 		 * first one" semantics to be not preferable.
 		 */
 		if (ia == NULL) {
 			error = EADDRNOTAVAIL;
 			goto out;
 		}
 		/* FALLTHROUGH */
 	case SIOCAIFADDR_IN6:
 		/*
 		 * We always require users to specify a valid IPv6 address for
 		 * the corresponding operation.
 		 */
 		if (ifra->ifra_addr.sin6_family != AF_INET6 ||
 		    ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6)) {
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 
 		if (td != NULL) {
 			error = priv_check(td, (cmd == SIOCDIFADDR_IN6) ?
 			    PRIV_NET_DELIFADDR : PRIV_NET_ADDIFADDR);
 			if (error)
 				goto out;
 		}
 		/* FALLTHROUGH */
 	case SIOCGIFSTAT_IN6:
 	case SIOCGIFSTAT_ICMP6:
 		if (ifp->if_afdata[AF_INET6] == NULL) {
 			error = EPFNOSUPPORT;
 			goto out;
 		}
 		break;
 
 	case SIOCGIFADDR_IN6:
 		/* This interface is basically deprecated. use SIOCGIFCONF. */
 		/* FALLTHROUGH */
 	case SIOCGIFAFLAG_IN6:
 	case SIOCGIFNETMASK_IN6:
 	case SIOCGIFDSTADDR_IN6:
 	case SIOCGIFALIFETIME_IN6:
 		/* must think again about its semantics */
 		if (ia == NULL) {
 			error = EADDRNOTAVAIL;
 			goto out;
 		}
 		break;
 	}
 
 	switch (cmd) {
 	case SIOCGIFADDR_IN6:
 		ifr->ifr_addr = ia->ia_addr;
 		if ((error = sa6_recoverscope(&ifr->ifr_addr)) != 0)
 			goto out;
 		break;
 
 	case SIOCGIFDSTADDR_IN6:
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
 			error = EINVAL;
 			goto out;
 		}
 		/*
 		 * XXX: should we check if ifa_dstaddr is NULL and return
 		 * an error?
 		 */
 		ifr->ifr_dstaddr = ia->ia_dstaddr;
 		if ((error = sa6_recoverscope(&ifr->ifr_dstaddr)) != 0)
 			goto out;
 		break;
 
 	case SIOCGIFNETMASK_IN6:
 		ifr->ifr_addr = ia->ia_prefixmask;
 		break;
 
 	case SIOCGIFAFLAG_IN6:
 		ifr->ifr_ifru.ifru_flags6 = ia->ia6_flags;
 		break;
 
 	case SIOCGIFSTAT_IN6:
 		COUNTER_ARRAY_COPY(((struct in6_ifextra *)
 		    ifp->if_afdata[AF_INET6])->in6_ifstat,
 		    &ifr->ifr_ifru.ifru_stat,
 		    sizeof(struct in6_ifstat) / sizeof(uint64_t));
 		break;
 
 	case SIOCGIFSTAT_ICMP6:
 		COUNTER_ARRAY_COPY(((struct in6_ifextra *)
 		    ifp->if_afdata[AF_INET6])->icmp6_ifstat,
 		    &ifr->ifr_ifru.ifru_icmp6stat,
 		    sizeof(struct icmp6_ifstat) / sizeof(uint64_t));
 		break;
 
 	case SIOCGIFALIFETIME_IN6:
 		ifr->ifr_ifru.ifru_lifetime = ia->ia6_lifetime;
 		if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 			time_t maxexpire;
 			struct in6_addrlifetime *retlt =
 			    &ifr->ifr_ifru.ifru_lifetime;
 
 			/*
 			 * XXX: adjust expiration time assuming time_t is
 			 * signed.
 			 */
 			maxexpire = (-1) &
 			    ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
 			if (ia->ia6_lifetime.ia6t_vltime <
 			    maxexpire - ia->ia6_updatetime) {
 				retlt->ia6t_expire = ia->ia6_updatetime +
 				    ia->ia6_lifetime.ia6t_vltime;
 			} else
 				retlt->ia6t_expire = maxexpire;
 		}
 		if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 			time_t maxexpire;
 			struct in6_addrlifetime *retlt =
 			    &ifr->ifr_ifru.ifru_lifetime;
 
 			/*
 			 * XXX: adjust expiration time assuming time_t is
 			 * signed.
 			 */
 			maxexpire = (-1) &
 			    ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
 			if (ia->ia6_lifetime.ia6t_pltime <
 			    maxexpire - ia->ia6_updatetime) {
 				retlt->ia6t_preferred = ia->ia6_updatetime +
 				    ia->ia6_lifetime.ia6t_pltime;
 			} else
 				retlt->ia6t_preferred = maxexpire;
 		}
 		break;
 
 	case SIOCAIFADDR_IN6:
 	{
 		struct nd_prefixctl pr0;
 		struct nd_prefix *pr;
 
 		/*
 		 * first, make or update the interface address structure,
 		 * and link it to the list.
 		 */
 		if ((error = in6_update_ifa(ifp, ifra, ia, 0)) != 0)
 			goto out;
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);
 		if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr))
 		    == NULL) {
 			/*
 			 * this can happen when the user specify the 0 valid
 			 * lifetime.
 			 */
 			break;
 		}
 
 		if (cmd == ocmd && ifra->ifra_vhid > 0) {
 			if (carp_attach_p != NULL)
 				error = (*carp_attach_p)(&ia->ia_ifa,
 				    ifra->ifra_vhid);
 			else
 				error = EPROTONOSUPPORT;
 			if (error)
 				goto out;
 			else
 				carp_attached = 1;
 		}
 
 		/*
 		 * then, make the prefix on-link on the interface.
 		 * XXX: we'd rather create the prefix before the address, but
 		 * we need at least one address to install the corresponding
 		 * interface route, so we configure the address first.
 		 */
 
 		/*
 		 * convert mask to prefix length (prefixmask has already
 		 * been validated in in6_update_ifa().
 		 */
 		bzero(&pr0, sizeof(pr0));
 		pr0.ndpr_ifp = ifp;
 		pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
 		    NULL);
 		if (pr0.ndpr_plen == 128) {
 			/* we don't need to install a host route. */
 			goto aifaddr_out;
 		}
 		pr0.ndpr_prefix = ifra->ifra_addr;
 		/* apply the mask for safety. */
 		IN6_MASK_ADDR(&pr0.ndpr_prefix.sin6_addr,
 		    &ifra->ifra_prefixmask.sin6_addr);
 
 		/*
 		 * XXX: since we don't have an API to set prefix (not address)
 		 * lifetimes, we just use the same lifetimes as addresses.
 		 * The (temporarily) installed lifetimes can be overridden by
 		 * later advertised RAs (when accept_rtadv is non 0), which is
 		 * an intended behavior.
 		 */
 		pr0.ndpr_raf_onlink = 1; /* should be configurable? */
 		pr0.ndpr_raf_auto =
 		    ((ifra->ifra_flags & IN6_IFF_AUTOCONF) != 0);
 		pr0.ndpr_vltime = ifra->ifra_lifetime.ia6t_vltime;
 		pr0.ndpr_pltime = ifra->ifra_lifetime.ia6t_pltime;
 
 		/* add the prefix if not yet. */
 		if ((pr = nd6_prefix_lookup(&pr0)) == NULL) {
 			/*
 			 * nd6_prelist_add will install the corresponding
 			 * interface route.
 			 */
 			if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) {
 				if (carp_attached)
 					(*carp_detach_p)(&ia->ia_ifa);
 				goto out;
 			}
 		}
 
 		/* relate the address to the prefix */
 		if (ia->ia6_ndpr == NULL) {
 			ia->ia6_ndpr = pr;
 			pr->ndpr_addrcnt++;
 
 			/*
 			 * If this is the first autoconf address from the
 			 * prefix, create a temporary address as well
 			 * (when required).
 			 */
 			if ((ia->ia6_flags & IN6_IFF_AUTOCONF) &&
 			    V_ip6_use_tempaddr && pr->ndpr_addrcnt == 1) {
 				int e;
 				if ((e = in6_tmpifadd(ia, 1, 0)) != 0) {
 					log(LOG_NOTICE, "in6_control: failed "
 					    "to create a temporary address, "
 					    "errno=%d\n", e);
 				}
 			}
 		}
 		nd6_prefix_rele(pr);
 
 		/*
 		 * this might affect the status of autoconfigured addresses,
 		 * that is, this address might make other addresses detached.
 		 */
 		pfxlist_onlink_check();
 
 aifaddr_out:
 		/*
 		 * Try to clear the flag when a new IPv6 address is added
 		 * onto an IFDISABLED interface and it succeeds.
 		 */
 		if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
 			struct in6_ndireq nd;
 
 			memset(&nd, 0, sizeof(nd));
 			nd.ndi.flags = ND_IFINFO(ifp)->flags;
 			nd.ndi.flags &= ~ND6_IFF_IFDISABLED;
 			if (nd6_ioctl(SIOCSIFINFO_FLAGS, (caddr_t)&nd, ifp) < 0)
 				log(LOG_NOTICE, "SIOCAIFADDR_IN6: "
 				    "SIOCSIFINFO_FLAGS for -ifdisabled "
 				    "failed.");
 			/*
 			 * Ignore failure of clearing the flag intentionally.
 			 * The failure means address duplication was detected.
 			 */
 		}
 		EVENTHANDLER_INVOKE(ifaddr_event, ifp);
 		break;
 	}
 
 	case SIOCDIFADDR_IN6:
 	{
 		struct nd_prefix *pr;
 
 		/*
 		 * If the address being deleted is the only one that owns
 		 * the corresponding prefix, expire the prefix as well.
 		 * XXX: theoretically, we don't have to worry about such
 		 * relationship, since we separate the address management
 		 * and the prefix management.  We do this, however, to provide
 		 * as much backward compatibility as possible in terms of
 		 * the ioctl operation.
 		 * Note that in6_purgeaddr() will decrement ndpr_addrcnt.
 		 */
 		pr = ia->ia6_ndpr;
 		in6_purgeaddr(&ia->ia_ifa);
 		if (pr != NULL && pr->ndpr_addrcnt == 0) {
 			ND6_WLOCK();
 			nd6_prefix_unlink(pr, NULL);
 			ND6_WUNLOCK();
 			nd6_prefix_del(pr);
 		}
 		EVENTHANDLER_INVOKE(ifaddr_event, ifp);
 		break;
 	}
 
 	default:
 		if (ifp->if_ioctl == NULL) {
 			error = EOPNOTSUPP;
 			goto out;
 		}
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		goto out;
 	}
 
 	error = 0;
 out:
 	if (ia != NULL)
 		ifa_free(&ia->ia_ifa);
 	return (error);
 }
 
 
 /*
  * Join necessary multicast groups.  Factored out from in6_update_ifa().
  * This entire work should only be done once, for the default FIB.
  */
 static int
 in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra,
     struct in6_ifaddr *ia, int flags, struct in6_multi **in6m_sol)
 {
 	char ip6buf[INET6_ADDRSTRLEN];
 	struct in6_addr mltaddr;
 	struct in6_multi_mship *imm;
 	int delay, error;
 
 	KASSERT(in6m_sol != NULL, ("%s: in6m_sol is NULL", __func__));
 
 	/* Join solicited multicast addr for new host id. */
 	bzero(&mltaddr, sizeof(struct in6_addr));
 	mltaddr.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
 	mltaddr.s6_addr32[2] = htonl(1);
 	mltaddr.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3];
 	mltaddr.s6_addr8[12] = 0xff;
 	if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0) {
 		/* XXX: should not happen */
 		log(LOG_ERR, "%s: in6_setscope failed\n", __func__);
 		goto cleanup;
 	}
 	delay = error = 0;
 	if ((flags & IN6_IFAUPDATE_DADDELAY)) {
 		/*
 		 * We need a random delay for DAD on the address being
 		 * configured.  It also means delaying transmission of the
 		 * corresponding MLD report to avoid report collision.
 		 * [RFC 4861, Section 6.3.7]
 		 */
 		delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz);
 	}
 	imm = in6_joingroup(ifp, &mltaddr, &error, delay);
 	if (imm == NULL) {
 		nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s "
 		    "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr),
 		    if_name(ifp), error));
 		goto cleanup;
 	}
 	LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 	*in6m_sol = imm->i6mm_maddr;
 
 	/*
 	 * Join link-local all-nodes address.
 	 */
 	mltaddr = in6addr_linklocal_allnodes;
 	if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0)
 		goto cleanup; /* XXX: should not fail */
 
 	imm = in6_joingroup(ifp, &mltaddr, &error, 0);
 	if (imm == NULL) {
 		nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s "
 		    "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr),
 		    if_name(ifp), error));
 		goto cleanup;
 	}
 	LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 
 	/*
 	 * Join node information group address.
 	 */
 	delay = 0;
 	if ((flags & IN6_IFAUPDATE_DADDELAY)) {
 		/*
 		 * The spec does not say anything about delay for this group,
 		 * but the same logic should apply.
 		 */
 		delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz);
 	}
 	if (in6_nigroup(ifp, NULL, -1, &mltaddr) == 0) {
 		/* XXX jinmei */
 		imm = in6_joingroup(ifp, &mltaddr, &error, delay);
 		if (imm == NULL)
 			nd6log((LOG_WARNING,
 			    "%s: in6_joingroup failed for %s on %s "
 			    "(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
 			    &mltaddr), if_name(ifp), error));
 			/* XXX not very fatal, go on... */
 		else
 			LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 	}
 	if (V_icmp6_nodeinfo_oldmcprefix &&
 	    in6_nigroup_oldmcprefix(ifp, NULL, -1, &mltaddr) == 0) {
 		imm = in6_joingroup(ifp, &mltaddr, &error, delay);
 		if (imm == NULL)
 			nd6log((LOG_WARNING,
 			    "%s: in6_joingroup failed for %s on %s "
 			    "(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
 			    &mltaddr), if_name(ifp), error));
 			/* XXX not very fatal, go on... */
 		else
 			LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 	}
 
 	/*
 	 * Join interface-local all-nodes address.
 	 * (ff01::1%ifN, and ff01::%ifN/32)
 	 */
 	mltaddr = in6addr_nodelocal_allnodes;
 	if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0)
 		goto cleanup; /* XXX: should not fail */
 
 	imm = in6_joingroup(ifp, &mltaddr, &error, 0);
 	if (imm == NULL) {
 		nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s "
 		    "(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
 		    &mltaddr), if_name(ifp), error));
 		goto cleanup;
 	}
 	LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 
 cleanup:
 	return (error);
 }
 
 /*
  * Update parameters of an IPv6 interface address.
  * If necessary, a new entry is created and linked into address chains.
  * This function is separated from in6_control().
  */
 int
 in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
     struct in6_ifaddr *ia, int flags)
 {
 	int error, hostIsNew = 0;
 
 	if ((error = in6_validate_ifra(ifp, ifra, ia, flags)) != 0)
 		return (error);
 
 	if (ia == NULL) {
 		hostIsNew = 1;
 		if ((ia = in6_alloc_ifa(ifp, ifra, flags)) == NULL)
 			return (ENOBUFS);
 	}
 
 	error = in6_update_ifa_internal(ifp, ifra, ia, hostIsNew, flags);
 	if (error != 0) {
 		if (hostIsNew != 0) {
 			in6_unlink_ifa(ia, ifp);
 			ifa_free(&ia->ia_ifa);
 		}
 		return (error);
 	}
 
 	if (hostIsNew)
 		error = in6_broadcast_ifa(ifp, ifra, ia, flags);
 
 	return (error);
 }
 
 /*
  * Fill in basic IPv6 address request info.
  */
 void
 in6_prepare_ifra(struct in6_aliasreq *ifra, const struct in6_addr *addr,
     const struct in6_addr *mask)
 {
 
 	memset(ifra, 0, sizeof(struct in6_aliasreq));
 
 	ifra->ifra_addr.sin6_family = AF_INET6;
 	ifra->ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
 	if (addr != NULL)
 		ifra->ifra_addr.sin6_addr = *addr;
 
 	ifra->ifra_prefixmask.sin6_family = AF_INET6;
 	ifra->ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
 	if (mask != NULL)
 		ifra->ifra_prefixmask.sin6_addr = *mask;
 }
 
 static int
 in6_validate_ifra(struct ifnet *ifp, struct in6_aliasreq *ifra,
     struct in6_ifaddr *ia, int flags)
 {
 	int plen = -1;
 	struct sockaddr_in6 dst6;
 	struct in6_addrlifetime *lt;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* Validate parameters */
 	if (ifp == NULL || ifra == NULL) /* this maybe redundant */
 		return (EINVAL);
 
 	/*
 	 * The destination address for a p2p link must have a family
 	 * of AF_UNSPEC or AF_INET6.
 	 */
 	if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
 	    ifra->ifra_dstaddr.sin6_family != AF_INET6 &&
 	    ifra->ifra_dstaddr.sin6_family != AF_UNSPEC)
 		return (EAFNOSUPPORT);
 
 	/*
 	 * Validate address
 	 */
 	if (ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6) ||
 	    ifra->ifra_addr.sin6_family != AF_INET6)
 		return (EINVAL);
 
 	/*
 	 * validate ifra_prefixmask.  don't check sin6_family, netmask
 	 * does not carry fields other than sin6_len.
 	 */
 	if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6))
 		return (EINVAL);
 	/*
 	 * Because the IPv6 address architecture is classless, we require
 	 * users to specify a (non 0) prefix length (mask) for a new address.
 	 * We also require the prefix (when specified) mask is valid, and thus
 	 * reject a non-consecutive mask.
 	 */
 	if (ia == NULL && ifra->ifra_prefixmask.sin6_len == 0)
 		return (EINVAL);
 	if (ifra->ifra_prefixmask.sin6_len != 0) {
 		plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
 		    (u_char *)&ifra->ifra_prefixmask +
 		    ifra->ifra_prefixmask.sin6_len);
 		if (plen <= 0)
 			return (EINVAL);
 	} else {
 		/*
 		 * In this case, ia must not be NULL.  We just use its prefix
 		 * length.
 		 */
 		plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL);
 	}
 	/*
 	 * If the destination address on a p2p interface is specified,
 	 * and the address is a scoped one, validate/set the scope
 	 * zone identifier.
 	 */
 	dst6 = ifra->ifra_dstaddr;
 	if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) != 0 &&
 	    (dst6.sin6_family == AF_INET6)) {
 		struct in6_addr in6_tmp;
 		u_int32_t zoneid;
 
 		in6_tmp = dst6.sin6_addr;
 		if (in6_setscope(&in6_tmp, ifp, &zoneid))
 			return (EINVAL); /* XXX: should be impossible */
 
 		if (dst6.sin6_scope_id != 0) {
 			if (dst6.sin6_scope_id != zoneid)
 				return (EINVAL);
 		} else		/* user omit to specify the ID. */
 			dst6.sin6_scope_id = zoneid;
 
 		/* convert into the internal form */
 		if (sa6_embedscope(&dst6, 0))
 			return (EINVAL); /* XXX: should be impossible */
 	}
 	/* Modify original ifra_dstaddr to reflect changes */
 	ifra->ifra_dstaddr = dst6;
 
 	/*
 	 * The destination address can be specified only for a p2p or a
 	 * loopback interface.  If specified, the corresponding prefix length
 	 * must be 128.
 	 */
 	if (ifra->ifra_dstaddr.sin6_family == AF_INET6) {
 		if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) == 0) {
 			/* XXX: noisy message */
 			nd6log((LOG_INFO, "in6_update_ifa: a destination can "
 			    "be specified for a p2p or a loopback IF only\n"));
 			return (EINVAL);
 		}
 		if (plen != 128) {
 			nd6log((LOG_INFO, "in6_update_ifa: prefixlen should "
 			    "be 128 when dstaddr is specified\n"));
 			return (EINVAL);
 		}
 	}
 	/* lifetime consistency check */
 	lt = &ifra->ifra_lifetime;
 	if (lt->ia6t_pltime > lt->ia6t_vltime)
 		return (EINVAL);
 	if (lt->ia6t_vltime == 0) {
 		/*
 		 * the following log might be noisy, but this is a typical
 		 * configuration mistake or a tool's bug.
 		 */
 		nd6log((LOG_INFO,
 		    "in6_update_ifa: valid lifetime is 0 for %s\n",
 		    ip6_sprintf(ip6buf, &ifra->ifra_addr.sin6_addr)));
 
 		if (ia == NULL)
 			return (0); /* there's nothing to do */
 	}
 
 	/* Check prefix mask */
 	if (ia != NULL && ifra->ifra_prefixmask.sin6_len != 0) {
 		/*
 		 * We prohibit changing the prefix length of an existing
 		 * address, because
 		 * + such an operation should be rare in IPv6, and
 		 * + the operation would confuse prefix management.
 		 */
 		if (ia->ia_prefixmask.sin6_len != 0 &&
 		    in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) {
 			nd6log((LOG_INFO, "in6_validate_ifa: the prefix length "
 			    "of an existing %s address should not be changed\n",
 			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
 
 			return (EINVAL);
 		}
 	}
 
 	return (0);
 }
 
 
 /*
  * Allocate a new ifaddr and link it into chains.
  */
 static struct in6_ifaddr *
 in6_alloc_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, int flags)
 {
 	struct in6_ifaddr *ia;
 
 	/*
 	 * When in6_alloc_ifa() is called in a process of a received
 	 * RA, it is called under an interrupt context.  So, we should
 	 * call malloc with M_NOWAIT.
 	 */
 	ia = (struct in6_ifaddr *)ifa_alloc(sizeof(*ia), M_NOWAIT);
 	if (ia == NULL)
 		return (NULL);
 	LIST_INIT(&ia->ia6_memberships);
 	/* Initialize the address and masks, and put time stamp */
 	ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
 	ia->ia_addr.sin6_family = AF_INET6;
 	ia->ia_addr.sin6_len = sizeof(ia->ia_addr);
 	/* XXX: Can we assign ,sin6_addr and skip the rest? */
 	ia->ia_addr = ifra->ifra_addr;
 	ia->ia6_createtime = time_uptime;
 	if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) {
 		/*
 		 * Some functions expect that ifa_dstaddr is not
 		 * NULL for p2p interfaces.
 		 */
 		ia->ia_ifa.ifa_dstaddr =
 		    (struct sockaddr *)&ia->ia_dstaddr;
 	} else {
 		ia->ia_ifa.ifa_dstaddr = NULL;
 	}
 
 	/* set prefix mask if any */
 	ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask;
 	if (ifra->ifra_prefixmask.sin6_len != 0) {
 		ia->ia_prefixmask.sin6_family = AF_INET6;
 		ia->ia_prefixmask.sin6_len = ifra->ifra_prefixmask.sin6_len;
 		ia->ia_prefixmask.sin6_addr = ifra->ifra_prefixmask.sin6_addr;
 	}
 
 	ia->ia_ifp = ifp;
 	ifa_ref(&ia->ia_ifa);			/* if_addrhead */
 	IF_ADDR_WLOCK(ifp);
 	TAILQ_INSERT_TAIL(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
 	IF_ADDR_WUNLOCK(ifp);
 
 	ifa_ref(&ia->ia_ifa);			/* in6_ifaddrhead */
 	IN6_IFADDR_WLOCK();
 	TAILQ_INSERT_TAIL(&V_in6_ifaddrhead, ia, ia_link);
 	LIST_INSERT_HEAD(IN6ADDR_HASH(&ia->ia_addr.sin6_addr), ia, ia6_hash);
 	IN6_IFADDR_WUNLOCK();
 
 	return (ia);
 }
 
 /*
  * Update/configure interface address parameters:
  *
  * 1) Update lifetime
  * 2) Update interface metric ad flags
  * 3) Notify other subsystems
  */
 static int
 in6_update_ifa_internal(struct ifnet *ifp, struct in6_aliasreq *ifra,
     struct in6_ifaddr *ia, int hostIsNew, int flags)
 {
 	int error;
 
 	/* update timestamp */
 	ia->ia6_updatetime = time_uptime;
 
 	/*
 	 * Set lifetimes.  We do not refer to ia6t_expire and ia6t_preferred
 	 * to see if the address is deprecated or invalidated, but initialize
 	 * these members for applications.
 	 */
 	ia->ia6_lifetime = ifra->ifra_lifetime;
 	if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 		ia->ia6_lifetime.ia6t_expire =
 		    time_uptime + ia->ia6_lifetime.ia6t_vltime;
 	} else
 		ia->ia6_lifetime.ia6t_expire = 0;
 	if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 		ia->ia6_lifetime.ia6t_preferred =
 		    time_uptime + ia->ia6_lifetime.ia6t_pltime;
 	} else
 		ia->ia6_lifetime.ia6t_preferred = 0;
 
 	/*
 	 * backward compatibility - if IN6_IFF_DEPRECATED is set from the
 	 * userland, make it deprecated.
 	 */
 	if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) {
 		ia->ia6_lifetime.ia6t_pltime = 0;
 		ia->ia6_lifetime.ia6t_preferred = time_uptime;
 	}
 
 	/*
 	 * configure address flags.
 	 */
 	ia->ia6_flags = ifra->ifra_flags;
 
 	/*
 	 * Make the address tentative before joining multicast addresses,
 	 * so that corresponding MLD responses would not have a tentative
 	 * source address.
 	 */
 	ia->ia6_flags &= ~IN6_IFF_DUPLICATED;	/* safety */
 
 	/*
 	 * DAD should be performed for an new address or addresses on
 	 * an interface with ND6_IFF_IFDISABLED.
 	 */
 	if (in6if_do_dad(ifp) &&
 	    (hostIsNew || (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)))
 		ia->ia6_flags |= IN6_IFF_TENTATIVE;
 
 	/* notify other subsystems */
 	error = in6_notify_ifa(ifp, ia, ifra, hostIsNew);
 
 	return (error);
 }
 
 /*
  * Do link-level ifa job:
  * 1) Add lle entry for added address
  * 2) Notifies routing socket users about new address
  * 3) join appropriate multicast group
  * 4) start DAD if enabled
  */
 static int
 in6_broadcast_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
     struct in6_ifaddr *ia, int flags)
 {
 	struct in6_multi *in6m_sol;
 	int error = 0;
 
 	/* Add local address to lltable, if necessary (ex. on p2p link). */
 	if ((error = nd6_add_ifa_lle(ia)) != 0) {
 		in6_purgeaddr(&ia->ia_ifa);
 		ifa_free(&ia->ia_ifa);
 		return (error);
 	}
 
 	/* Join necessary multicast groups. */
 	in6m_sol = NULL;
 	if ((ifp->if_flags & IFF_MULTICAST) != 0) {
 		error = in6_update_ifa_join_mc(ifp, ifra, ia, flags, &in6m_sol);
 		if (error != 0) {
 			in6_purgeaddr(&ia->ia_ifa);
 			ifa_free(&ia->ia_ifa);
 			return (error);
 		}
 	}
 
 	/* Perform DAD, if the address is TENTATIVE. */
 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
 		int delay, mindelay, maxdelay;
 
 		delay = 0;
 		if ((flags & IN6_IFAUPDATE_DADDELAY)) {
 			/*
 			 * We need to impose a delay before sending an NS
 			 * for DAD.  Check if we also needed a delay for the
 			 * corresponding MLD message.  If we did, the delay
 			 * should be larger than the MLD delay (this could be
 			 * relaxed a bit, but this simple logic is at least
 			 * safe).
 			 * XXX: Break data hiding guidelines and look at
 			 * state for the solicited multicast group.
 			 */
 			mindelay = 0;
 			if (in6m_sol != NULL &&
 			    in6m_sol->in6m_state == MLD_REPORTING_MEMBER) {
 				mindelay = in6m_sol->in6m_timer;
 			}
 			maxdelay = MAX_RTR_SOLICITATION_DELAY * hz;
 			if (maxdelay - mindelay == 0)
 				delay = 0;
 			else {
 				delay =
 				    (arc4random() % (maxdelay - mindelay)) +
 				    mindelay;
 			}
 		}
 		nd6_dad_start((struct ifaddr *)ia, delay);
 	}
 
 	in6_newaddrmsg(ia, RTM_ADD);
 	ifa_free(&ia->ia_ifa);
 	return (error);
 }
 
 void
 in6_purgeaddr(struct ifaddr *ifa)
 {
 	struct ifnet *ifp = ifa->ifa_ifp;
 	struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa;
 	struct in6_multi_mship *imm;
 	int plen, error;
 
 	if (ifa->ifa_carp)
 		(*carp_detach_p)(ifa);
 
 	/*
 	 * Remove the loopback route to the interface address.
 	 * The check for the current setting of "nd6_useloopback"
 	 * is not needed.
 	 */
 	if (ia->ia_flags & IFA_RTSELF) {
 		error = ifa_del_loopback_route((struct ifaddr *)ia,
 		    (struct sockaddr *)&ia->ia_addr);
 		if (error == 0)
 			ia->ia_flags &= ~IFA_RTSELF;
 	}
 
 	/* stop DAD processing */
 	nd6_dad_stop(ifa);
 
 	/* Leave multicast groups. */
 	while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) {
 		LIST_REMOVE(imm, i6mm_chain);
 		in6_leavegroup(imm);
 	}
 	plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
 	if ((ia->ia_flags & IFA_ROUTE) && plen == 128) {
 		error = rtinit(&(ia->ia_ifa), RTM_DELETE, ia->ia_flags |
 		    (ia->ia_dstaddr.sin6_family == AF_INET6 ? RTF_HOST : 0));
 		if (error != 0)
 			log(LOG_INFO, "%s: err=%d, destination address delete "
 			    "failed\n", __func__, error);
 		ia->ia_flags &= ~IFA_ROUTE;
 	}
 
 	in6_newaddrmsg(ia, RTM_DELETE);
 	in6_unlink_ifa(ia, ifp);
 }
 
 static void
 in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
 {
 	char ip6buf[INET6_ADDRSTRLEN];
 	int remove_lle;
 
 	IF_ADDR_WLOCK(ifp);
 	TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
 	IF_ADDR_WUNLOCK(ifp);
 	ifa_free(&ia->ia_ifa);			/* if_addrhead */
 
 	/*
 	 * Defer the release of what might be the last reference to the
 	 * in6_ifaddr so that it can't be freed before the remainder of the
 	 * cleanup.
 	 */
 	IN6_IFADDR_WLOCK();
 	TAILQ_REMOVE(&V_in6_ifaddrhead, ia, ia_link);
 	LIST_REMOVE(ia, ia6_hash);
 	IN6_IFADDR_WUNLOCK();
 
 	/*
 	 * Release the reference to the base prefix.  There should be a
 	 * positive reference.
 	 */
 	remove_lle = 0;
 	if (ia->ia6_ndpr == NULL) {
 		nd6log((LOG_NOTICE,
 		    "in6_unlink_ifa: autoconf'ed address "
 		    "%s has no prefix\n", ip6_sprintf(ip6buf, IA6_IN6(ia))));
 	} else {
 		ia->ia6_ndpr->ndpr_addrcnt--;
 		/* Do not delete lles within prefix if refcont != 0 */
 		if (ia->ia6_ndpr->ndpr_addrcnt == 0)
 			remove_lle = 1;
 		ia->ia6_ndpr = NULL;
 	}
 
 	nd6_rem_ifa_lle(ia, remove_lle);
 
 	/*
 	 * Also, if the address being removed is autoconf'ed, call
 	 * pfxlist_onlink_check() since the release might affect the status of
 	 * other (detached) addresses.
 	 */
 	if ((ia->ia6_flags & IN6_IFF_AUTOCONF)) {
 		pfxlist_onlink_check();
 	}
 	ifa_free(&ia->ia_ifa);			/* in6_ifaddrhead */
 }
 
 /*
  * Notifies other subsystems about address change/arrival:
  * 1) Notifies device handler on the first IPv6 address assignment
  * 2) Handle routing table changes for P2P links and route
  * 3) Handle routing table changes for address host route
  */
 static int
 in6_notify_ifa(struct ifnet *ifp, struct in6_ifaddr *ia,
     struct in6_aliasreq *ifra, int hostIsNew)
 {
 	int	error = 0, plen, ifacount = 0;
 	struct ifaddr *ifa;
 	struct sockaddr_in6 *pdst;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	/*
 	 * Give the interface a chance to initialize
 	 * if this is its first address,
 	 */
 	if (hostIsNew != 0) {
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			ifacount++;
 		}
 		IF_ADDR_RUNLOCK(ifp);
 	}
 
 	if (ifacount <= 1 && ifp->if_ioctl) {
 		error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * If a new destination address is specified, scrub the old one and
 	 * install the new destination.  Note that the interface must be
 	 * p2p or loopback.
 	 */
 	pdst = &ifra->ifra_dstaddr;
 	if (pdst->sin6_family == AF_INET6 &&
 	    !IN6_ARE_ADDR_EQUAL(&pdst->sin6_addr, &ia->ia_dstaddr.sin6_addr)) {
 		if ((ia->ia_flags & IFA_ROUTE) != 0 &&
 		    (rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST) != 0)) {
 			nd6log((LOG_ERR, "in6_update_ifa_internal: failed to "
 			    "remove a route to the old destination: %s\n",
 			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
 			/* proceed anyway... */
 		} else
 			ia->ia_flags &= ~IFA_ROUTE;
 		ia->ia_dstaddr = *pdst;
 	}
 
 	/*
 	 * If a new destination address is specified for a point-to-point
 	 * interface, install a route to the destination as an interface
 	 * direct route.
 	 * XXX: the logic below rejects assigning multiple addresses on a p2p
 	 * interface that share the same destination.
 	 */
 	plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
 	if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 &&
 	    ia->ia_dstaddr.sin6_family == AF_INET6) {
 		int rtflags = RTF_UP | RTF_HOST;
 		/*
 		 * Handle the case for ::1 .
 		 */
 		if (ifp->if_flags & IFF_LOOPBACK)
 			ia->ia_flags |= IFA_RTSELF;
 		error = rtinit(&ia->ia_ifa, RTM_ADD, ia->ia_flags | rtflags);
 		if (error)
 			return (error);
 		ia->ia_flags |= IFA_ROUTE;
 	}
 
 	/*
 	 * add a loopback route to self if not exists
 	 */
 	if (!(ia->ia_flags & IFA_RTSELF) && V_nd6_useloopback) {
 		error = ifa_add_loopback_route((struct ifaddr *)ia,
 		    (struct sockaddr *)&ia->ia_addr);
 		if (error == 0)
 			ia->ia_flags |= IFA_RTSELF;
 	}
 
 	return (error);
 }
 
 /*
  * Find an IPv6 interface link-local address specific to an interface.
  * ifaddr is returned referenced.
  */
 struct in6_ifaddr *
 in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags)
 {
 	struct ifaddr *ifa;
 
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) {
 			if ((((struct in6_ifaddr *)ifa)->ia6_flags &
 			    ignoreflags) != 0)
 				continue;
 			ifa_ref(ifa);
 			break;
 		}
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	return ((struct in6_ifaddr *)ifa);
 }
 
 
 /*
  * find the internet address corresponding to a given address.
  * ifaddr is returned referenced.
  */
 struct in6_ifaddr *
 in6ifa_ifwithaddr(const struct in6_addr *addr, uint32_t zoneid)
 {
 	struct rm_priotracker in6_ifa_tracker;
 	struct in6_ifaddr *ia;
 
 	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
 	LIST_FOREACH(ia, IN6ADDR_HASH(addr), ia6_hash) {
 		if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), addr)) {
 			if (zoneid != 0 &&
 			    zoneid != ia->ia_addr.sin6_scope_id)
 				continue;
 			ifa_ref(&ia->ia_ifa);
 			break;
 		}
 	}
 	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 	return (ia);
 }
 
 /*
  * find the internet address corresponding to a given interface and address.
  * ifaddr is returned referenced.
  */
 struct in6_ifaddr *
 in6ifa_ifpwithaddr(struct ifnet *ifp, const struct in6_addr *addr)
 {
 	struct ifaddr *ifa;
 
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa))) {
 			ifa_ref(ifa);
 			break;
 		}
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	return ((struct in6_ifaddr *)ifa);
 }
 
 /*
  * Find a link-local scoped address on ifp and return it if any.
  */
 struct in6_ifaddr *
 in6ifa_llaonifp(struct ifnet *ifp)
 {
 	struct sockaddr_in6 *sin6;
 	struct ifaddr *ifa;
 
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
 		return (NULL);
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
 		if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) ||
 		    IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr) ||
 		    IN6_IS_ADDR_MC_NODELOCAL(&sin6->sin6_addr))
 			break;
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	return ((struct in6_ifaddr *)ifa);
 }
 
 /*
  * Convert IP6 address to printable (loggable) representation. Caller
  * has to make sure that ip6buf is at least INET6_ADDRSTRLEN long.
  */
 static char digits[] = "0123456789abcdef";
 char *
 ip6_sprintf(char *ip6buf, const struct in6_addr *addr)
 {
 	int i, cnt = 0, maxcnt = 0, idx = 0, index = 0;
 	char *cp;
 	const u_int16_t *a = (const u_int16_t *)addr;
 	const u_int8_t *d;
 	int dcolon = 0, zero = 0;
 
 	cp = ip6buf;
 
 	for (i = 0; i < 8; i++) {
 		if (*(a + i) == 0) {
 			cnt++;
 			if (cnt == 1)
 				idx = i;
 		}
 		else if (maxcnt < cnt) {
 			maxcnt = cnt;
 			index = idx;
 			cnt = 0;
 		}
 	}
 	if (maxcnt < cnt) {
 		maxcnt = cnt;
 		index = idx;
 	}
 
 	for (i = 0; i < 8; i++) {
 		if (dcolon == 1) {
 			if (*a == 0) {
 				if (i == 7)
 					*cp++ = ':';
 				a++;
 				continue;
 			} else
 				dcolon = 2;
 		}
 		if (*a == 0) {
 			if (dcolon == 0 && *(a + 1) == 0 && i == index) {
 				if (i == 0)
 					*cp++ = ':';
 				*cp++ = ':';
 				dcolon = 1;
 			} else {
 				*cp++ = '0';
 				*cp++ = ':';
 			}
 			a++;
 			continue;
 		}
 		d = (const u_char *)a;
 		/* Try to eliminate leading zeros in printout like in :0001. */
 		zero = 1;
 		*cp = digits[*d >> 4];
 		if (*cp != '0') {
 			zero = 0;
 			cp++;
 		}
 		*cp = digits[*d++ & 0xf];
 		if (zero == 0 || (*cp != '0')) {
 			zero = 0;
 			cp++;
 		}
 		*cp = digits[*d >> 4];
 		if (zero == 0 || (*cp != '0')) {
 			zero = 0;
 			cp++;
 		}
 		*cp++ = digits[*d & 0xf];
 		*cp++ = ':';
 		a++;
 	}
 	*--cp = '\0';
 	return (ip6buf);
 }
 
 int
 in6_localaddr(struct in6_addr *in6)
 {
 	struct rm_priotracker in6_ifa_tracker;
 	struct in6_ifaddr *ia;
 
 	if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6))
 		return 1;
 
 	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
 	TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
 		if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr,
 		    &ia->ia_prefixmask.sin6_addr)) {
 			IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 			return 1;
 		}
 	}
 	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 
 	return (0);
 }
 
 /*
  * Return 1 if an internet address is for the local host and configured
  * on one of its interfaces.
  */
 int
 in6_localip(struct in6_addr *in6)
 {
 	struct rm_priotracker in6_ifa_tracker;
 	struct in6_ifaddr *ia;
 
 	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
 	LIST_FOREACH(ia, IN6ADDR_HASH(in6), ia6_hash) {
 		if (IN6_ARE_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr)) {
 			IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 			return (1);
 		}
 	}
 	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 	return (0);
 }
  
 /*
  * Return 1 if an internet address is configured on an interface.
  */
 int
 in6_ifhasaddr(struct ifnet *ifp, struct in6_addr *addr)
 {
 	struct in6_addr in6;
 	struct ifaddr *ifa;
 	struct in6_ifaddr *ia6;
 
 	in6 = *addr;
 	if (in6_clearscope(&in6))
 		return (0);
 	in6_setscope(&in6, ifp, NULL);
 
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		ia6 = (struct in6_ifaddr *)ifa;
 		if (IN6_ARE_ADDR_EQUAL(&ia6->ia_addr.sin6_addr, &in6)) {
 			IF_ADDR_RUNLOCK(ifp);
 			return (1);
 		}
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	return (0);
 }
 
 int
 in6_is_addr_deprecated(struct sockaddr_in6 *sa6)
 {
 	struct rm_priotracker in6_ifa_tracker;
 	struct in6_ifaddr *ia;
 
 	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
 	LIST_FOREACH(ia, IN6ADDR_HASH(&sa6->sin6_addr), ia6_hash) {
 		if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), &sa6->sin6_addr)) {
 			if (ia->ia6_flags & IN6_IFF_DEPRECATED) {
 				IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 				return (1); /* true */
 			}
 			break;
 		}
 	}
 	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 
 	return (0);		/* false */
 }
 
 /*
  * return length of part which dst and src are equal
  * hard coding...
  */
 int
 in6_matchlen(struct in6_addr *src, struct in6_addr *dst)
 {
 	int match = 0;
 	u_char *s = (u_char *)src, *d = (u_char *)dst;
 	u_char *lim = s + 16, r;
 
 	while (s < lim)
 		if ((r = (*d++ ^ *s++)) != 0) {
 			while (r < 128) {
 				match++;
 				r <<= 1;
 			}
 			break;
 		} else
 			match += 8;
 	return match;
 }
 
 /* XXX: to be scope conscious */
 int
 in6_are_prefix_equal(struct in6_addr *p1, struct in6_addr *p2, int len)
 {
 	int bytelen, bitlen;
 
 	/* sanity check */
 	if (0 > len || len > 128) {
 		log(LOG_ERR, "in6_are_prefix_equal: invalid prefix length(%d)\n",
 		    len);
 		return (0);
 	}
 
 	bytelen = len / 8;
 	bitlen = len % 8;
 
 	if (bcmp(&p1->s6_addr, &p2->s6_addr, bytelen))
 		return (0);
 	if (bitlen != 0 &&
 	    p1->s6_addr[bytelen] >> (8 - bitlen) !=
 	    p2->s6_addr[bytelen] >> (8 - bitlen))
 		return (0);
 
 	return (1);
 }
 
 void
 in6_prefixlen2mask(struct in6_addr *maskp, int len)
 {
 	u_char maskarray[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
 	int bytelen, bitlen, i;
 
 	/* sanity check */
 	if (0 > len || len > 128) {
 		log(LOG_ERR, "in6_prefixlen2mask: invalid prefix length(%d)\n",
 		    len);
 		return;
 	}
 
 	bzero(maskp, sizeof(*maskp));
 	bytelen = len / 8;
 	bitlen = len % 8;
 	for (i = 0; i < bytelen; i++)
 		maskp->s6_addr[i] = 0xff;
 	if (bitlen)
 		maskp->s6_addr[bytelen] = maskarray[bitlen - 1];
 }
 
 /*
  * return the best address out of the same scope. if no address was
  * found, return the first valid address from designated IF.
  */
 struct in6_ifaddr *
 in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst)
 {
 	int dst_scope =	in6_addrscope(dst), blen = -1, tlen;
 	struct ifaddr *ifa;
 	struct in6_ifaddr *besta = NULL;
 	struct in6_ifaddr *dep[2];	/* last-resort: deprecated */
 
 	dep[0] = dep[1] = NULL;
 
 	/*
 	 * We first look for addresses in the same scope.
 	 * If there is one, return it.
 	 * If two or more, return one which matches the dst longest.
 	 * If none, return one of global addresses assigned other ifs.
 	 */
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST)
 			continue; /* XXX: is there any case to allow anycast? */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY)
 			continue; /* don't use this interface */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
 			if (V_ip6_use_deprecated)
 				dep[0] = (struct in6_ifaddr *)ifa;
 			continue;
 		}
 
 		if (dst_scope == in6_addrscope(IFA_IN6(ifa))) {
 			/*
 			 * call in6_matchlen() as few as possible
 			 */
 			if (besta) {
 				if (blen == -1)
 					blen = in6_matchlen(&besta->ia_addr.sin6_addr, dst);
 				tlen = in6_matchlen(IFA_IN6(ifa), dst);
 				if (tlen > blen) {
 					blen = tlen;
 					besta = (struct in6_ifaddr *)ifa;
 				}
 			} else
 				besta = (struct in6_ifaddr *)ifa;
 		}
 	}
 	if (besta) {
 		ifa_ref(&besta->ia_ifa);
 		IF_ADDR_RUNLOCK(ifp);
 		return (besta);
 	}
 
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST)
 			continue; /* XXX: is there any case to allow anycast? */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY)
 			continue; /* don't use this interface */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
 			if (V_ip6_use_deprecated)
 				dep[1] = (struct in6_ifaddr *)ifa;
 			continue;
 		}
 
 		if (ifa != NULL)
 			ifa_ref(ifa);
 		IF_ADDR_RUNLOCK(ifp);
 		return (struct in6_ifaddr *)ifa;
 	}
 
 	/* use the last-resort values, that are, deprecated addresses */
 	if (dep[0]) {
 		ifa_ref((struct ifaddr *)dep[0]);
 		IF_ADDR_RUNLOCK(ifp);
 		return dep[0];
 	}
 	if (dep[1]) {
 		ifa_ref((struct ifaddr *)dep[1]);
 		IF_ADDR_RUNLOCK(ifp);
 		return dep[1];
 	}
 
 	IF_ADDR_RUNLOCK(ifp);
 	return NULL;
 }
 
 /*
  * perform DAD when interface becomes IFF_UP.
  */
 void
 in6_if_up(struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 	struct in6_ifaddr *ia;
 
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		ia = (struct in6_ifaddr *)ifa;
 		if (ia->ia6_flags & IN6_IFF_TENTATIVE) {
 			/*
 			 * The TENTATIVE flag was likely set by hand
 			 * beforehand, implicitly indicating the need for DAD.
 			 * We may be able to skip the random delay in this
 			 * case, but we impose delays just in case.
 			 */
 			nd6_dad_start(ifa,
 			    arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz));
 		}
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	/*
 	 * special cases, like 6to4, are handled in in6_ifattach
 	 */
 	in6_ifattach(ifp, NULL);
 }
 
 int
 in6if_do_dad(struct ifnet *ifp)
 {
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
 		return (0);
 
 	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) ||
 	    (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD))
 		return (0);
 
 	/*
 	 * Our DAD routine requires the interface up and running.
 	 * However, some interfaces can be up before the RUNNING
 	 * status.  Additionally, users may try to assign addresses
 	 * before the interface becomes up (or running).
 	 * This function returns EAGAIN in that case.
 	 * The caller should mark "tentative" on the address instead of
 	 * performing DAD immediately.
 	 */
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		return (EAGAIN);
 
 	return (1);
 }
 
 /*
  * Calculate max IPv6 MTU through all the interfaces and store it
  * to in6_maxmtu.
  */
 void
 in6_setmaxmtu(void)
 {
 	unsigned long maxmtu = 0;
 	struct ifnet *ifp;
 
 	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		/* this function can be called during ifnet initialization */
 		if (!ifp->if_afdata[AF_INET6])
 			continue;
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
 		    IN6_LINKMTU(ifp) > maxmtu)
 			maxmtu = IN6_LINKMTU(ifp);
 	}
 	IFNET_RUNLOCK_NOSLEEP();
 	if (maxmtu)	/* update only when maxmtu is positive */
 		V_in6_maxmtu = maxmtu;
 }
 
 /*
  * Provide the length of interface identifiers to be used for the link attached
  * to the given interface.  The length should be defined in "IPv6 over
  * xxx-link" document.  Note that address architecture might also define
  * the length for a particular set of address prefixes, regardless of the
  * link type.  As clarified in rfc2462bis, those two definitions should be
  * consistent, and those really are as of August 2004.
  */
 int
 in6_if2idlen(struct ifnet *ifp)
 {
 	switch (ifp->if_type) {
 	case IFT_ETHER:		/* RFC2464 */
 	case IFT_PROPVIRTUAL:	/* XXX: no RFC. treat it as ether */
 	case IFT_L2VLAN:	/* ditto */
 	case IFT_IEEE80211:	/* ditto */
 	case IFT_BRIDGE:	/* bridge(4) only does Ethernet-like links */
 	case IFT_INFINIBAND:
 		return (64);
 	case IFT_FDDI:		/* RFC2467 */
 		return (64);
 	case IFT_ISO88025:	/* RFC2470 (IPv6 over Token Ring) */
 		return (64);
 	case IFT_PPP:		/* RFC2472 */
 		return (64);
 	case IFT_ARCNET:	/* RFC2497 */
 		return (64);
 	case IFT_FRELAY:	/* RFC2590 */
 		return (64);
 	case IFT_IEEE1394:	/* RFC3146 */
 		return (64);
 	case IFT_GIF:
 		return (64);	/* draft-ietf-v6ops-mech-v2-07 */
 	case IFT_LOOP:
 		return (64);	/* XXX: is this really correct? */
 	default:
 		/*
 		 * Unknown link type:
 		 * It might be controversial to use the today's common constant
 		 * of 64 for these cases unconditionally.  For full compliance,
 		 * we should return an error in this case.  On the other hand,
 		 * if we simply miss the standard for the link type or a new
 		 * standard is defined for a new link type, the IFID length
 		 * is very likely to be the common constant.  As a compromise,
 		 * we always use the constant, but make an explicit notice
 		 * indicating the "unknown" case.
 		 */
 		printf("in6_if2idlen: unknown link type (%d)\n", ifp->if_type);
 		return (64);
 	}
 }
 
 #include <sys/sysctl.h>
 
 struct in6_llentry {
 	struct llentry		base;
 };
 
 #define	IN6_LLTBL_DEFAULT_HSIZE	32
 #define	IN6_LLTBL_HASH(k, h) \
 	(((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1))
 
 /*
  * Do actual deallocation of @lle.
  */
 static void
 in6_lltable_destroy_lle_unlocked(struct llentry *lle)
 {
 
 	LLE_LOCK_DESTROY(lle);
 	LLE_REQ_DESTROY(lle);
 	free(lle, M_LLTABLE);
 }
 
 /*
  * Called by LLE_FREE_LOCKED when number of references
  * drops to zero.
  */
 static void
 in6_lltable_destroy_lle(struct llentry *lle)
 {
 
 	LLE_WUNLOCK(lle);
 	in6_lltable_destroy_lle_unlocked(lle);
 }
 
 static struct llentry *
 in6_lltable_new(const struct in6_addr *addr6, u_int flags)
 {
 	struct in6_llentry *lle;
 
 	lle = malloc(sizeof(struct in6_llentry), M_LLTABLE, M_NOWAIT | M_ZERO);
 	if (lle == NULL)		/* NB: caller generates msg */
 		return NULL;
 
 	lle->base.r_l3addr.addr6 = *addr6;
 	lle->base.lle_refcnt = 1;
 	lle->base.lle_free = in6_lltable_destroy_lle;
 	LLE_LOCK_INIT(&lle->base);
 	LLE_REQ_INIT(&lle->base);
 	callout_init(&lle->base.lle_timer, 1);
 
 	return (&lle->base);
 }
 
 static int
 in6_lltable_match_prefix(const struct sockaddr *saddr,
     const struct sockaddr *smask, u_int flags, struct llentry *lle)
 {
 	const struct in6_addr *addr, *mask, *lle_addr;
 
 	addr = &((const struct sockaddr_in6 *)saddr)->sin6_addr;
 	mask = &((const struct sockaddr_in6 *)smask)->sin6_addr;
 	lle_addr = &lle->r_l3addr.addr6;
 
 	if (IN6_ARE_MASKED_ADDR_EQUAL(lle_addr, addr, mask) == 0)
 		return (0);
 
 	if (lle->la_flags & LLE_IFADDR) {
 
 		/*
 		 * Delete LLE_IFADDR records IFF address & flag matches.
 		 * Note that addr is the interface address within prefix
 		 * being matched.
 		 */
 		if (IN6_ARE_ADDR_EQUAL(addr, lle_addr) &&
 		    (flags & LLE_STATIC) != 0)
 			return (1);
 		return (0);
 	}
 
 	/* flags & LLE_STATIC means deleting both dynamic and static entries */
 	if ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))
 		return (1);
 
 	return (0);
 }
 
 static void
 in6_lltable_free_entry(struct lltable *llt, struct llentry *lle)
 {
 	struct ifnet *ifp;
 
 	LLE_WLOCK_ASSERT(lle);
 	KASSERT(llt != NULL, ("lltable is NULL"));
 
 	/* Unlink entry from table */
 	if ((lle->la_flags & LLE_LINKED) != 0) {
 
 		ifp = llt->llt_ifp;
 		IF_AFDATA_WLOCK_ASSERT(ifp);
 		lltable_unlink_entry(llt, lle);
 	}
 
-	if (callout_stop(&lle->lle_timer) & CALLOUT_RET_CANCELLED)
+	if (callout_stop(&lle->lle_timer).bit.cancelled)
 		LLE_REMREF(lle);
 
 	llentry_free(lle);
 }
 
 static int
 in6_lltable_rtcheck(struct ifnet *ifp,
 		    u_int flags,
 		    const struct sockaddr *l3addr)
 {
 	const struct sockaddr_in6 *sin6;
 	struct nhop6_basic nh6;
 	struct in6_addr dst;
 	uint32_t scopeid;
 	int error;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	KASSERT(l3addr->sa_family == AF_INET6,
 	    ("sin_family %d", l3addr->sa_family));
 
 	/* Our local addresses are always only installed on the default FIB. */
 
 	sin6 = (const struct sockaddr_in6 *)l3addr;
 	in6_splitscope(&sin6->sin6_addr, &dst, &scopeid);
 	error = fib6_lookup_nh_basic(RT_DEFAULT_FIB, &dst, scopeid, 0, 0, &nh6);
 	if (error != 0 || (nh6.nh_flags & NHF_GATEWAY) || nh6.nh_ifp != ifp) {
 		struct ifaddr *ifa;
 		/*
 		 * Create an ND6 cache for an IPv6 neighbor
 		 * that is not covered by our own prefix.
 		 */
 		ifa = ifaof_ifpforaddr(l3addr, ifp);
 		if (ifa != NULL) {
 			ifa_free(ifa);
 			return 0;
 		}
 		log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n",
 		    ip6_sprintf(ip6buf, &sin6->sin6_addr));
 		return EINVAL;
 	}
 	return 0;
 }
 
 static inline uint32_t
 in6_lltable_hash_dst(const struct in6_addr *dst, uint32_t hsize)
 {
 
 	return (IN6_LLTBL_HASH(dst->s6_addr32[3], hsize));
 }
 
 static uint32_t
 in6_lltable_hash(const struct llentry *lle, uint32_t hsize)
 {
 
 	return (in6_lltable_hash_dst(&lle->r_l3addr.addr6, hsize));
 }
 
 static void
 in6_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
 {
 	struct sockaddr_in6 *sin6;
 
 	sin6 = (struct sockaddr_in6 *)sa;
 	bzero(sin6, sizeof(*sin6));
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_len = sizeof(*sin6);
 	sin6->sin6_addr = lle->r_l3addr.addr6;
 }
 
 static inline struct llentry *
 in6_lltable_find_dst(struct lltable *llt, const struct in6_addr *dst)
 {
 	struct llentry *lle;
 	struct llentries *lleh;
 	u_int hashidx;
 
 	hashidx = in6_lltable_hash_dst(dst, llt->llt_hsize);
 	lleh = &llt->lle_head[hashidx];
 	LIST_FOREACH(lle, lleh, lle_next) {
 		if (lle->la_flags & LLE_DELETED)
 			continue;
 		if (IN6_ARE_ADDR_EQUAL(&lle->r_l3addr.addr6, dst))
 			break;
 	}
 
 	return (lle);
 }
 
 static void
 in6_lltable_delete_entry(struct lltable *llt, struct llentry *lle)
 {
 
 	lle->la_flags |= LLE_DELETED;
 	EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
 #ifdef DIAGNOSTIC
 	log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
 #endif
 	llentry_free(lle);
 }
 
 static struct llentry *
 in6_lltable_alloc(struct lltable *llt, u_int flags,
 	const struct sockaddr *l3addr)
 {
 	const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
 	struct ifnet *ifp = llt->llt_ifp;
 	struct llentry *lle;
 	char linkhdr[LLE_MAX_LINKHDR];
 	size_t linkhdrsize;
 	int lladdr_off;
 
 	KASSERT(l3addr->sa_family == AF_INET6,
 	    ("sin_family %d", l3addr->sa_family));
 
 	/*
 	 * A route that covers the given address must have
 	 * been installed 1st because we are doing a resolution,
 	 * verify this.
 	 */
 	if (!(flags & LLE_IFADDR) &&
 	    in6_lltable_rtcheck(ifp, flags, l3addr) != 0)
 		return (NULL);
 
 	lle = in6_lltable_new(&sin6->sin6_addr, flags);
 	if (lle == NULL) {
 		log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
 		return (NULL);
 	}
 	lle->la_flags = flags;
 	if ((flags & LLE_IFADDR) == LLE_IFADDR) {
 		linkhdrsize = LLE_MAX_LINKHDR;
 		if (lltable_calc_llheader(ifp, AF_INET6, IF_LLADDR(ifp),
 		    linkhdr, &linkhdrsize, &lladdr_off) != 0) {
 			in6_lltable_destroy_lle_unlocked(lle);
 			return (NULL);
 		}
 		lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
 		    lladdr_off);
 		lle->la_flags |= LLE_STATIC;
 	}
 
 	if ((lle->la_flags & LLE_STATIC) != 0)
 		lle->ln_state = ND6_LLINFO_REACHABLE;
 
 	return (lle);
 }
 
 static struct llentry *
 in6_lltable_lookup(struct lltable *llt, u_int flags,
 	const struct sockaddr *l3addr)
 {
 	const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
 	struct llentry *lle;
 
 	IF_AFDATA_LOCK_ASSERT(llt->llt_ifp);
 	KASSERT(l3addr->sa_family == AF_INET6,
 	    ("sin_family %d", l3addr->sa_family));
 
 	lle = in6_lltable_find_dst(llt, &sin6->sin6_addr);
 
 	if (lle == NULL)
 		return (NULL);
 
 	KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) !=
 	    (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X",
 	    flags));
 
 	if (flags & LLE_UNLOCKED)
 		return (lle);
 
 	if (flags & LLE_EXCLUSIVE)
 		LLE_WLOCK(lle);
 	else
 		LLE_RLOCK(lle);
 	return (lle);
 }
 
 static int
 in6_lltable_dump_entry(struct lltable *llt, struct llentry *lle,
     struct sysctl_req *wr)
 {
 	struct ifnet *ifp = llt->llt_ifp;
 	/* XXX stack use */
 	struct {
 		struct rt_msghdr	rtm;
 		struct sockaddr_in6	sin6;
 		/*
 		 * ndp.c assumes that sdl is word aligned
 		 */
 #ifdef __LP64__
 		uint32_t		pad;
 #endif
 		struct sockaddr_dl	sdl;
 	} ndpc;
 	struct sockaddr_dl *sdl;
 	int error;
 
 	bzero(&ndpc, sizeof(ndpc));
 			/* skip deleted entries */
 			if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
 				return (0);
 			/* Skip if jailed and not a valid IP of the prison. */
 			lltable_fill_sa_entry(lle,
 			    (struct sockaddr *)&ndpc.sin6);
 			if (prison_if(wr->td->td_ucred,
 			    (struct sockaddr *)&ndpc.sin6) != 0)
 				return (0);
 			/*
 			 * produce a msg made of:
 			 *  struct rt_msghdr;
 			 *  struct sockaddr_in6 (IPv6)
 			 *  struct sockaddr_dl;
 			 */
 			ndpc.rtm.rtm_msglen = sizeof(ndpc);
 			ndpc.rtm.rtm_version = RTM_VERSION;
 			ndpc.rtm.rtm_type = RTM_GET;
 			ndpc.rtm.rtm_flags = RTF_UP;
 			ndpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
 			if (V_deembed_scopeid)
 				sa6_recoverscope(&ndpc.sin6);
 
 			/* publish */
 			if (lle->la_flags & LLE_PUB)
 				ndpc.rtm.rtm_flags |= RTF_ANNOUNCE;
 
 			sdl = &ndpc.sdl;
 			sdl->sdl_family = AF_LINK;
 			sdl->sdl_len = sizeof(*sdl);
 			sdl->sdl_index = ifp->if_index;
 			sdl->sdl_type = ifp->if_type;
 			if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
 				sdl->sdl_alen = ifp->if_addrlen;
 				bcopy(lle->ll_addr, LLADDR(sdl),
 				    ifp->if_addrlen);
 			} else {
 				sdl->sdl_alen = 0;
 				bzero(LLADDR(sdl), ifp->if_addrlen);
 			}
 			if (lle->la_expire != 0)
 				ndpc.rtm.rtm_rmx.rmx_expire = lle->la_expire +
 				    lle->lle_remtime / hz +
 				    time_second - time_uptime;
 			ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
 			if (lle->la_flags & LLE_STATIC)
 				ndpc.rtm.rtm_flags |= RTF_STATIC;
 			if (lle->la_flags & LLE_IFADDR)
 				ndpc.rtm.rtm_flags |= RTF_PINNED;
 			if (lle->ln_router != 0)
 				ndpc.rtm.rtm_flags |= RTF_GATEWAY;
 			ndpc.rtm.rtm_rmx.rmx_pksent = lle->la_asked;
 			/* Store state in rmx_weight value */
 			ndpc.rtm.rtm_rmx.rmx_state = lle->ln_state;
 			ndpc.rtm.rtm_index = ifp->if_index;
 			error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc));
 
 	return (error);
 }
 
 static struct lltable *
 in6_lltattach(struct ifnet *ifp)
 {
 	struct lltable *llt;
 
 	llt = lltable_allocate_htbl(IN6_LLTBL_DEFAULT_HSIZE);
 	llt->llt_af = AF_INET6;
 	llt->llt_ifp = ifp;
 
 	llt->llt_lookup = in6_lltable_lookup;
 	llt->llt_alloc_entry = in6_lltable_alloc;
 	llt->llt_delete_entry = in6_lltable_delete_entry;
 	llt->llt_dump_entry = in6_lltable_dump_entry;
 	llt->llt_hash = in6_lltable_hash;
 	llt->llt_fill_sa_entry = in6_lltable_fill_sa_entry;
 	llt->llt_free_entry = in6_lltable_free_entry;
 	llt->llt_match_prefix = in6_lltable_match_prefix;
  	lltable_link(llt);
 
 	return (llt);
 }
 
 void *
 in6_domifattach(struct ifnet *ifp)
 {
 	struct in6_ifextra *ext;
 
 	/* There are not IPv6-capable interfaces. */
 	switch (ifp->if_type) {
 	case IFT_PFLOG:
 	case IFT_PFSYNC:
 	case IFT_USB:
 		return (NULL);
 	}
 	ext = (struct in6_ifextra *)malloc(sizeof(*ext), M_IFADDR, M_WAITOK);
 	bzero(ext, sizeof(*ext));
 
 	ext->in6_ifstat = malloc(sizeof(counter_u64_t) *
 	    sizeof(struct in6_ifstat) / sizeof(uint64_t), M_IFADDR, M_WAITOK);
 	COUNTER_ARRAY_ALLOC(ext->in6_ifstat,
 	    sizeof(struct in6_ifstat) / sizeof(uint64_t), M_WAITOK);
 
 	ext->icmp6_ifstat = malloc(sizeof(counter_u64_t) *
 	    sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_IFADDR,
 	    M_WAITOK);
 	COUNTER_ARRAY_ALLOC(ext->icmp6_ifstat,
 	    sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_WAITOK);
 
 	ext->nd_ifinfo = nd6_ifattach(ifp);
 	ext->scope6_id = scope6_ifattach(ifp);
 	ext->lltable = in6_lltattach(ifp);
 
 	ext->mld_ifinfo = mld_domifattach(ifp);
 
 	return ext;
 }
 
 int
 in6_domifmtu(struct ifnet *ifp)
 {
 	if (ifp->if_afdata[AF_INET6] == NULL)
 		return ifp->if_mtu;
 
 	return (IN6_LINKMTU(ifp));
 }
 
 void
 in6_domifdetach(struct ifnet *ifp, void *aux)
 {
 	struct in6_ifextra *ext = (struct in6_ifextra *)aux;
 
 	mld_domifdetach(ifp);
 	scope6_ifdetach(ext->scope6_id);
 	nd6_ifdetach(ifp, ext->nd_ifinfo);
 	lltable_free(ext->lltable);
 	COUNTER_ARRAY_FREE(ext->in6_ifstat,
 	    sizeof(struct in6_ifstat) / sizeof(uint64_t));
 	free(ext->in6_ifstat, M_IFADDR);
 	COUNTER_ARRAY_FREE(ext->icmp6_ifstat,
 	    sizeof(struct icmp6_ifstat) / sizeof(uint64_t));
 	free(ext->icmp6_ifstat, M_IFADDR);
 	free(ext, M_IFADDR);
 }
 
 /*
  * Convert sockaddr_in6 to sockaddr_in.  Original sockaddr_in6 must be
  * v4 mapped addr or v4 compat addr
  */
 void
 in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
 {
 
 	bzero(sin, sizeof(*sin));
 	sin->sin_len = sizeof(struct sockaddr_in);
 	sin->sin_family = AF_INET;
 	sin->sin_port = sin6->sin6_port;
 	sin->sin_addr.s_addr = sin6->sin6_addr.s6_addr32[3];
 }
 
 /* Convert sockaddr_in to sockaddr_in6 in v4 mapped addr format. */
 void
 in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
 {
 	bzero(sin6, sizeof(*sin6));
 	sin6->sin6_len = sizeof(struct sockaddr_in6);
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_port = sin->sin_port;
 	sin6->sin6_addr.s6_addr32[0] = 0;
 	sin6->sin6_addr.s6_addr32[1] = 0;
 	sin6->sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
 	sin6->sin6_addr.s6_addr32[3] = sin->sin_addr.s_addr;
 }
 
 /* Convert sockaddr_in6 into sockaddr_in. */
 void
 in6_sin6_2_sin_in_sock(struct sockaddr *nam)
 {
 	struct sockaddr_in *sin_p;
 	struct sockaddr_in6 sin6;
 
 	/*
 	 * Save original sockaddr_in6 addr and convert it
 	 * to sockaddr_in.
 	 */
 	sin6 = *(struct sockaddr_in6 *)nam;
 	sin_p = (struct sockaddr_in *)nam;
 	in6_sin6_2_sin(sin_p, &sin6);
 }
 
 /* Convert sockaddr_in into sockaddr_in6 in v4 mapped addr format. */
 void
 in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam)
 {
 	struct sockaddr_in *sin_p;
 	struct sockaddr_in6 *sin6_p;
 
 	sin6_p = malloc(sizeof *sin6_p, M_SONAME, M_WAITOK);
 	sin_p = (struct sockaddr_in *)*nam;
 	in6_sin_2_v4mapsin6(sin_p, sin6_p);
 	free(*nam, M_SONAME);
 	*nam = (struct sockaddr *)sin6_p;
 }
Index: projects/hps_head/sys/netinet6/nd6.c
===================================================================
--- projects/hps_head/sys/netinet6/nd6.c	(revision 309217)
+++ projects/hps_head/sys/netinet6/nd6.c	(revision 309218)
@@ -1,2753 +1,2753 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/protosw.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
 #include <sys/rwlock.h>
 #include <sys/queue.h>
 #include <sys/sdt.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arc.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/iso88025.h>
 #include <net/fddi.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <net/if_llatbl.h>
 #include <netinet/if_ether.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet/icmp6.h>
 #include <netinet6/send.h>
 
 #include <sys/limits.h>
 
 #include <security/mac/mac_framework.h>
 
 #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
 
 #define SIN6(s) ((const struct sockaddr_in6 *)(s))
 
 MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
 
 /* timer values */
 VNET_DEFINE(int, nd6_prune)	= 1;	/* walk list every 1 seconds */
 VNET_DEFINE(int, nd6_delay)	= 5;	/* delay first probe time 5 second */
 VNET_DEFINE(int, nd6_umaxtries)	= 3;	/* maximum unicast query */
 VNET_DEFINE(int, nd6_mmaxtries)	= 3;	/* maximum multicast query */
 VNET_DEFINE(int, nd6_useloopback) = 1;	/* use loopback interface for
 					 * local traffic */
 VNET_DEFINE(int, nd6_gctimer)	= (60 * 60 * 24); /* 1 day: garbage
 					 * collection timer */
 
 /* preventing too many loops in ND option parsing */
 static VNET_DEFINE(int, nd6_maxndopt) = 10; /* max # of ND options allowed */
 
 VNET_DEFINE(int, nd6_maxnudhint) = 0;	/* max # of subsequent upper
 					 * layer hints */
 static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved
 					 * ND entries */
 #define	V_nd6_maxndopt			VNET(nd6_maxndopt)
 #define	V_nd6_maxqueuelen		VNET(nd6_maxqueuelen)
 
 #ifdef ND6_DEBUG
 VNET_DEFINE(int, nd6_debug) = 1;
 #else
 VNET_DEFINE(int, nd6_debug) = 0;
 #endif
 
 static eventhandler_tag lle_event_eh, iflladdr_event_eh;
 
 VNET_DEFINE(struct nd_drhead, nd_defrouter);
 VNET_DEFINE(struct nd_prhead, nd_prefix);
 VNET_DEFINE(struct rwlock, nd6_lock);
 VNET_DEFINE(uint64_t, nd6_list_genid);
 VNET_DEFINE(struct mtx, nd6_onlink_mtx);
 
 VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
 #define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
 
 int	(*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int);
 
 static int nd6_is_new_addr_neighbor(const struct sockaddr_in6 *,
 	struct ifnet *);
 static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *);
 static void nd6_slowtimo(void *);
 static int regen_tmpaddr(struct in6_ifaddr *);
 static void nd6_free(struct llentry **, int);
 static void nd6_free_redirect(const struct llentry *);
 static void nd6_llinfo_timer(void *);
 static void nd6_llinfo_settimer_locked(struct llentry *, long);
 static void clear_llinfo_pqueue(struct llentry *);
 static void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
 static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *,
     const struct sockaddr_in6 *, u_char *, uint32_t *, struct llentry **);
 static int nd6_need_cache(struct ifnet *);
  
 
 static VNET_DEFINE(struct callout, nd6_slowtimo_ch);
 #define	V_nd6_slowtimo_ch		VNET(nd6_slowtimo_ch)
 
 VNET_DEFINE(struct callout, nd6_timer_ch);
 #define	V_nd6_timer_ch			VNET(nd6_timer_ch)
 
 static void
 nd6_lle_event(void *arg __unused, struct llentry *lle, int evt)
 {
 	struct rt_addrinfo rtinfo;
 	struct sockaddr_in6 dst;
 	struct sockaddr_dl gw;
 	struct ifnet *ifp;
 	int type;
 
 	LLE_WLOCK_ASSERT(lle);
 
 	if (lltable_get_af(lle->lle_tbl) != AF_INET6)
 		return;
 
 	switch (evt) {
 	case LLENTRY_RESOLVED:
 		type = RTM_ADD;
 		KASSERT(lle->la_flags & LLE_VALID,
 		    ("%s: %p resolved but not valid?", __func__, lle));
 		break;
 	case LLENTRY_EXPIRED:
 		type = RTM_DELETE;
 		break;
 	default:
 		return;
 	}
 
 	ifp = lltable_get_ifp(lle->lle_tbl);
 
 	bzero(&dst, sizeof(dst));
 	bzero(&gw, sizeof(gw));
 	bzero(&rtinfo, sizeof(rtinfo));
 	lltable_fill_sa_entry(lle, (struct sockaddr *)&dst);
 	dst.sin6_scope_id = in6_getscopezone(ifp,
 	    in6_addrscope(&dst.sin6_addr));
 	gw.sdl_len = sizeof(struct sockaddr_dl);
 	gw.sdl_family = AF_LINK;
 	gw.sdl_alen = ifp->if_addrlen;
 	gw.sdl_index = ifp->if_index;
 	gw.sdl_type = ifp->if_type;
 	if (evt == LLENTRY_RESOLVED)
 		bcopy(lle->ll_addr, gw.sdl_data, ifp->if_addrlen);
 	rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst;
 	rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw;
 	rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY;
 	rt_missmsg_fib(type, &rtinfo, RTF_HOST | RTF_LLDATA | (
 	    type == RTM_ADD ? RTF_UP: 0), 0, RT_DEFAULT_FIB);
 }
 
 /*
  * A handler for interface link layer address change event.
  */
 static void
 nd6_iflladdr(void *arg __unused, struct ifnet *ifp)
 {
 
 	lltable_update_ifaddr(LLTABLE6(ifp));
 }
 
 void
 nd6_init(void)
 {
 
 	mtx_init(&V_nd6_onlink_mtx, "nd6 onlink", NULL, MTX_DEF);
 	rw_init(&V_nd6_lock, "nd6 list");
 
 	LIST_INIT(&V_nd_prefix);
 	TAILQ_INIT(&V_nd_defrouter);
 
 	/* Start timers. */
 	callout_init(&V_nd6_slowtimo_ch, 0);
 	callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
 	    nd6_slowtimo, curvnet);
 
 	callout_init(&V_nd6_timer_ch, 0);
 	callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet);
 
 	nd6_dad_init();
 	if (IS_DEFAULT_VNET(curvnet)) {
 		lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event,
 		    NULL, EVENTHANDLER_PRI_ANY);
 		iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event,
 		    nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
 	}
 }
 
 #ifdef VIMAGE
 void
 nd6_destroy()
 {
 
 	callout_drain(&V_nd6_slowtimo_ch);
 	callout_drain(&V_nd6_timer_ch);
 	if (IS_DEFAULT_VNET(curvnet)) {
 		EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
 		EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh);
 	}
 	rw_destroy(&V_nd6_lock);
 	mtx_destroy(&V_nd6_onlink_mtx);
 }
 #endif
 
 struct nd_ifinfo *
 nd6_ifattach(struct ifnet *ifp)
 {
 	struct nd_ifinfo *nd;
 
 	nd = malloc(sizeof(*nd), M_IP6NDP, M_WAITOK | M_ZERO);
 	nd->initialized = 1;
 
 	nd->chlim = IPV6_DEFHLIM;
 	nd->basereachable = REACHABLE_TIME;
 	nd->reachable = ND_COMPUTE_RTIME(nd->basereachable);
 	nd->retrans = RETRANS_TIMER;
 
 	nd->flags = ND6_IFF_PERFORMNUD;
 
 	/* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL.
 	 * XXXHRS: Clear ND6_IFF_AUTO_LINKLOCAL on an IFT_BRIDGE interface by
 	 * default regardless of the V_ip6_auto_linklocal configuration to
 	 * give a reasonable default behavior.
 	 */
 	if ((V_ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) ||
 	    (ifp->if_flags & IFF_LOOPBACK))
 		nd->flags |= ND6_IFF_AUTO_LINKLOCAL;
 	/*
 	 * A loopback interface does not need to accept RTADV.
 	 * XXXHRS: Clear ND6_IFF_ACCEPT_RTADV on an IFT_BRIDGE interface by
 	 * default regardless of the V_ip6_accept_rtadv configuration to
 	 * prevent the interface from accepting RA messages arrived
 	 * on one of the member interfaces with ND6_IFF_ACCEPT_RTADV.
 	 */
 	if (V_ip6_accept_rtadv &&
 	    !(ifp->if_flags & IFF_LOOPBACK) &&
 	    (ifp->if_type != IFT_BRIDGE))
 			nd->flags |= ND6_IFF_ACCEPT_RTADV;
 	if (V_ip6_no_radr && !(ifp->if_flags & IFF_LOOPBACK))
 		nd->flags |= ND6_IFF_NO_RADR;
 
 	/* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */
 	nd6_setmtu0(ifp, nd);
 
 	return nd;
 }
 
 void
 nd6_ifdetach(struct ifnet *ifp, struct nd_ifinfo *nd)
 {
 	struct ifaddr *ifa, *next;
 
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 
 		/* stop DAD processing */
 		nd6_dad_stop(ifa);
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	free(nd, M_IP6NDP);
 }
 
 /*
  * Reset ND level link MTU. This function is called when the physical MTU
  * changes, which means we might have to adjust the ND level MTU.
  */
 void
 nd6_setmtu(struct ifnet *ifp)
 {
 	if (ifp->if_afdata[AF_INET6] == NULL)
 		return;
 
 	nd6_setmtu0(ifp, ND_IFINFO(ifp));
 }
 
 /* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */
 void
 nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi)
 {
 	u_int32_t omaxmtu;
 
 	omaxmtu = ndi->maxmtu;
 
 	switch (ifp->if_type) {
 	case IFT_ARCNET:
 		ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */
 		break;
 	case IFT_FDDI:
 		ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); /* RFC2467 */
 		break;
 	case IFT_ISO88025:
 		 ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu);
 		 break;
 	default:
 		ndi->maxmtu = ifp->if_mtu;
 		break;
 	}
 
 	/*
 	 * Decreasing the interface MTU under IPV6 minimum MTU may cause
 	 * undesirable situation.  We thus notify the operator of the change
 	 * explicitly.  The check for omaxmtu is necessary to restrict the
 	 * log to the case of changing the MTU, not initializing it.
 	 */
 	if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
 		log(LOG_NOTICE, "nd6_setmtu0: "
 		    "new link MTU on %s (%lu) is too small for IPv6\n",
 		    if_name(ifp), (unsigned long)ndi->maxmtu);
 	}
 
 	if (ndi->maxmtu > V_in6_maxmtu)
 		in6_setmaxmtu(); /* check all interfaces just in case */
 
 }
 
 void
 nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
 {
 
 	bzero(ndopts, sizeof(*ndopts));
 	ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
 	ndopts->nd_opts_last
 		= (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
 
 	if (icmp6len == 0) {
 		ndopts->nd_opts_done = 1;
 		ndopts->nd_opts_search = NULL;
 	}
 }
 
 /*
  * Take one ND option.
  */
 struct nd_opt_hdr *
 nd6_option(union nd_opts *ndopts)
 {
 	struct nd_opt_hdr *nd_opt;
 	int olen;
 
 	KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__));
 	KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts",
 	    __func__));
 	if (ndopts->nd_opts_search == NULL)
 		return NULL;
 	if (ndopts->nd_opts_done)
 		return NULL;
 
 	nd_opt = ndopts->nd_opts_search;
 
 	/* make sure nd_opt_len is inside the buffer */
 	if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
 		bzero(ndopts, sizeof(*ndopts));
 		return NULL;
 	}
 
 	olen = nd_opt->nd_opt_len << 3;
 	if (olen == 0) {
 		/*
 		 * Message validation requires that all included
 		 * options have a length that is greater than zero.
 		 */
 		bzero(ndopts, sizeof(*ndopts));
 		return NULL;
 	}
 
 	ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
 	if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
 		/* option overruns the end of buffer, invalid */
 		bzero(ndopts, sizeof(*ndopts));
 		return NULL;
 	} else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
 		/* reached the end of options chain */
 		ndopts->nd_opts_done = 1;
 		ndopts->nd_opts_search = NULL;
 	}
 	return nd_opt;
 }
 
 /*
  * Parse multiple ND options.
  * This function is much easier to use, for ND routines that do not need
  * multiple options of the same type.
  */
 int
 nd6_options(union nd_opts *ndopts)
 {
 	struct nd_opt_hdr *nd_opt;
 	int i = 0;
 
 	KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__));
 	KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts",
 	    __func__));
 	if (ndopts->nd_opts_search == NULL)
 		return 0;
 
 	while (1) {
 		nd_opt = nd6_option(ndopts);
 		if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
 			/*
 			 * Message validation requires that all included
 			 * options have a length that is greater than zero.
 			 */
 			ICMP6STAT_INC(icp6s_nd_badopt);
 			bzero(ndopts, sizeof(*ndopts));
 			return -1;
 		}
 
 		if (nd_opt == NULL)
 			goto skip1;
 
 		switch (nd_opt->nd_opt_type) {
 		case ND_OPT_SOURCE_LINKADDR:
 		case ND_OPT_TARGET_LINKADDR:
 		case ND_OPT_MTU:
 		case ND_OPT_REDIRECTED_HEADER:
 		case ND_OPT_NONCE:
 			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
 				nd6log((LOG_INFO,
 				    "duplicated ND6 option found (type=%d)\n",
 				    nd_opt->nd_opt_type));
 				/* XXX bark? */
 			} else {
 				ndopts->nd_opt_array[nd_opt->nd_opt_type]
 					= nd_opt;
 			}
 			break;
 		case ND_OPT_PREFIX_INFORMATION:
 			if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
 				ndopts->nd_opt_array[nd_opt->nd_opt_type]
 					= nd_opt;
 			}
 			ndopts->nd_opts_pi_end =
 				(struct nd_opt_prefix_info *)nd_opt;
 			break;
 		/* What about ND_OPT_ROUTE_INFO? RFC 4191 */
 		case ND_OPT_RDNSS:	/* RFC 6106 */
 		case ND_OPT_DNSSL:	/* RFC 6106 */
 			/*
 			 * Silently ignore options we know and do not care about
 			 * in the kernel.
 			 */
 			break;
 		default:
 			/*
 			 * Unknown options must be silently ignored,
 			 * to accommodate future extension to the protocol.
 			 */
 			nd6log((LOG_DEBUG,
 			    "nd6_options: unsupported option %d - "
 			    "option ignored\n", nd_opt->nd_opt_type));
 		}
 
 skip1:
 		i++;
 		if (i > V_nd6_maxndopt) {
 			ICMP6STAT_INC(icp6s_nd_toomanyopt);
 			nd6log((LOG_INFO, "too many loop in nd opt\n"));
 			break;
 		}
 
 		if (ndopts->nd_opts_done)
 			break;
 	}
 
 	return 0;
 }
 
 /*
  * ND6 timer routine to handle ND6 entries
  */
 static void
 nd6_llinfo_settimer_locked(struct llentry *ln, long tick)
 {
-	int canceled;
+	int cancelled;
 
 	LLE_WLOCK_ASSERT(ln);
 
 	if (tick < 0) {
 		ln->la_expire = 0;
 		ln->ln_ntick = 0;
-		canceled = callout_stop(&ln->lle_timer);
+		cancelled = callout_stop(&ln->lle_timer).bit.cancelled;
 	} else {
 		ln->la_expire = time_uptime + tick / hz;
 		LLE_ADDREF(ln);
 		if (tick > INT_MAX) {
 			ln->ln_ntick = tick - INT_MAX;
-			canceled = callout_reset(&ln->lle_timer, INT_MAX,
-			    nd6_llinfo_timer, ln);
+			cancelled = callout_reset(&ln->lle_timer, INT_MAX,
+			    nd6_llinfo_timer, ln).bit.cancelled;
 		} else {
 			ln->ln_ntick = 0;
-			canceled = callout_reset(&ln->lle_timer, tick,
-			    nd6_llinfo_timer, ln);
+			cancelled = callout_reset(&ln->lle_timer, tick,
+			    nd6_llinfo_timer, ln).bit.cancelled;
 		}
 	}
-	if (canceled & CALLOUT_RET_CANCELLED)
+	if (cancelled)
 		LLE_REMREF(ln);
 }
 
 /*
  * Gets source address of the first packet in hold queue
  * and stores it in @src.
  * Returns pointer to @src (if hold queue is not empty) or NULL.
  *
  * Set noinline to be dtrace-friendly
  */
 static __noinline struct in6_addr *
 nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src)
 {
 	struct ip6_hdr hdr;
 	struct mbuf *m;
 
 	if (ln->la_hold == NULL)
 		return (NULL);
 
 	/*
 	 * assume every packet in la_hold has the same IP header
 	 */
 	m = ln->la_hold;
 	if (sizeof(hdr) > m->m_len)
 		return (NULL);
 
 	m_copydata(m, 0, sizeof(hdr), (caddr_t)&hdr);
 	*src = hdr.ip6_src;
 
 	return (src);
 }
 
 /*
  * Checks if we need to switch from STALE state.
  *
  * RFC 4861 requires switching from STALE to DELAY state
  * on first packet matching entry, waiting V_nd6_delay and
  * transition to PROBE state (if upper layer confirmation was
  * not received).
  *
  * This code performs a bit differently:
  * On packet hit we don't change state (but desired state
  * can be guessed by control plane). However, after V_nd6_delay
  * seconds code will transition to PROBE state (so DELAY state
  * is kinda skipped in most situations).
  *
  * Typically, V_nd6_gctimer is bigger than V_nd6_delay, so
  * we perform the following upon entering STALE state:
  *
  * 1) Arm timer to run each V_nd6_delay seconds to make sure that
  * if packet was transmitted at the start of given interval, we
  * would be able to switch to PROBE state in V_nd6_delay seconds
  * as user expects.
  *
  * 2) Reschedule timer until original V_nd6_gctimer expires keeping
  * lle in STALE state (remaining timer value stored in lle_remtime).
  *
  * 3) Reschedule timer if packet was transmitted less that V_nd6_delay
  * seconds ago.
  *
  * Returns non-zero value if the entry is still STALE (storing
  * the next timer interval in @pdelay).
  *
  * Returns zero value if original timer expired or we need to switch to
  * PROBE (store that in @do_switch variable).
  */
 static int
 nd6_is_stale(struct llentry *lle, long *pdelay, int *do_switch)
 {
 	int nd_delay, nd_gctimer, r_skip_req;
 	time_t lle_hittime;
 	long delay;
 
 	*do_switch = 0;
 	nd_gctimer = V_nd6_gctimer;
 	nd_delay = V_nd6_delay;
 
 	LLE_REQ_LOCK(lle);
 	r_skip_req = lle->r_skip_req;
 	lle_hittime = lle->lle_hittime;
 	LLE_REQ_UNLOCK(lle);
 
 	if (r_skip_req > 0) {
 
 		/*
 		 * Nonzero r_skip_req value was set upon entering
 		 * STALE state. Since value was not changed, no
 		 * packets were passed using this lle. Ask for
 		 * timer reschedule and keep STALE state.
 		 */
 		delay = (long)(MIN(nd_gctimer, nd_delay));
 		delay *= hz;
 		if (lle->lle_remtime > delay)
 			lle->lle_remtime -= delay;
 		else {
 			delay = lle->lle_remtime;
 			lle->lle_remtime = 0;
 		}
 
 		if (delay == 0) {
 
 			/*
 			 * The original ng6_gctime timeout ended,
 			 * no more rescheduling.
 			 */
 			return (0);
 		}
 
 		*pdelay = delay;
 		return (1);
 	}
 
 	/*
 	 * Packet received. Verify timestamp
 	 */
 	delay = (long)(time_uptime - lle_hittime);
 	if (delay < nd_delay) {
 
 		/*
 		 * V_nd6_delay still not passed since the first
 		 * hit in STALE state.
 		 * Reshedule timer and return.
 		 */
 		*pdelay = (long)(nd_delay - delay) * hz;
 		return (1);
 	}
 
 	/* Request switching to probe */
 	*do_switch = 1;
 	return (0);
 }
 
 
 /*
  * Switch @lle state to new state optionally arming timers.
  *
  * Set noinline to be dtrace-friendly
  */
 __noinline void
 nd6_llinfo_setstate(struct llentry *lle, int newstate)
 {
 	struct ifnet *ifp;
 	int nd_gctimer, nd_delay;
 	long delay, remtime;
 
 	delay = 0;
 	remtime = 0;
 
 	switch (newstate) {
 	case ND6_LLINFO_INCOMPLETE:
 		ifp = lle->lle_tbl->llt_ifp;
 		delay = (long)ND_IFINFO(ifp)->retrans * hz / 1000;
 		break;
 	case ND6_LLINFO_REACHABLE:
 		if (!ND6_LLINFO_PERMANENT(lle)) {
 			ifp = lle->lle_tbl->llt_ifp;
 			delay = (long)ND_IFINFO(ifp)->reachable * hz;
 		}
 		break;
 	case ND6_LLINFO_STALE:
 
 		/*
 		 * Notify fast path that we want to know if any packet
 		 * is transmitted by setting r_skip_req.
 		 */
 		LLE_REQ_LOCK(lle);
 		lle->r_skip_req = 1;
 		LLE_REQ_UNLOCK(lle);
 		nd_delay = V_nd6_delay;
 		nd_gctimer = V_nd6_gctimer;
 
 		delay = (long)(MIN(nd_gctimer, nd_delay)) * hz;
 		remtime = (long)nd_gctimer * hz - delay;
 		break;
 	case ND6_LLINFO_DELAY:
 		lle->la_asked = 0;
 		delay = (long)V_nd6_delay * hz;
 		break;
 	}
 
 	if (delay > 0)
 		nd6_llinfo_settimer_locked(lle, delay);
 
 	lle->lle_remtime = remtime;
 	lle->ln_state = newstate;
 }
 
 /*
  * Timer-dependent part of nd state machine.
  *
  * Set noinline to be dtrace-friendly
  */
 static __noinline void
 nd6_llinfo_timer(void *arg)
 {
 	struct llentry *ln;
 	struct in6_addr *dst, *pdst, *psrc, src;
 	struct ifnet *ifp;
 	struct nd_ifinfo *ndi;
 	int do_switch, send_ns;
 	long delay;
 
 	KASSERT(arg != NULL, ("%s: arg NULL", __func__));
 	ln = (struct llentry *)arg;
 	ifp = lltable_get_ifp(ln->lle_tbl);
 	CURVNET_SET(ifp->if_vnet);
 
 	ND6_RLOCK();
 	LLE_WLOCK(ln);
 	if (callout_pending(&ln->lle_timer)) {
 		/*
 		 * Here we are a bit odd here in the treatment of 
 		 * active/pending. If the pending bit is set, it got
 		 * rescheduled before I ran. The active
 		 * bit we ignore, since if it was stopped
 		 * in ll_tablefree() and was currently running
 		 * it would have return 0 so the code would
 		 * not have deleted it since the callout could
 		 * not be stopped so we want to go through
 		 * with the delete here now. If the callout
 		 * was restarted, the pending bit will be back on and
 		 * we just want to bail since the callout_reset would
 		 * return 1 and our reference would have been removed
 		 * by nd6_llinfo_settimer_locked above since canceled
 		 * would have been 1.
 		 */
 		LLE_WUNLOCK(ln);
 		ND6_RUNLOCK();
 		CURVNET_RESTORE();
 		return;
 	}
 	ndi = ND_IFINFO(ifp);
 	send_ns = 0;
 	dst = &ln->r_l3addr.addr6;
 	pdst = dst;
 
 	if (ln->ln_ntick > 0) {
 		if (ln->ln_ntick > INT_MAX) {
 			ln->ln_ntick -= INT_MAX;
 			nd6_llinfo_settimer_locked(ln, INT_MAX);
 		} else {
 			ln->ln_ntick = 0;
 			nd6_llinfo_settimer_locked(ln, ln->ln_ntick);
 		}
 		goto done;
 	}
 
 	if (ln->la_flags & LLE_STATIC) {
 		goto done;
 	}
 
 	if (ln->la_flags & LLE_DELETED) {
 		nd6_free(&ln, 0);
 		goto done;
 	}
 
 	switch (ln->ln_state) {
 	case ND6_LLINFO_INCOMPLETE:
 		if (ln->la_asked < V_nd6_mmaxtries) {
 			ln->la_asked++;
 			send_ns = 1;
 			/* Send NS to multicast address */
 			pdst = NULL;
 		} else {
 			struct mbuf *m = ln->la_hold;
 			if (m) {
 				struct mbuf *m0;
 
 				/*
 				 * assuming every packet in la_hold has the
 				 * same IP header.  Send error after unlock.
 				 */
 				m0 = m->m_nextpkt;
 				m->m_nextpkt = NULL;
 				ln->la_hold = m0;
 				clear_llinfo_pqueue(ln);
 			}
 			nd6_free(&ln, 0);
 			if (m != NULL)
 				icmp6_error2(m, ICMP6_DST_UNREACH,
 				    ICMP6_DST_UNREACH_ADDR, 0, ifp);
 		}
 		break;
 	case ND6_LLINFO_REACHABLE:
 		if (!ND6_LLINFO_PERMANENT(ln))
 			nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
 		break;
 
 	case ND6_LLINFO_STALE:
 		if (nd6_is_stale(ln, &delay, &do_switch) != 0) {
 
 			/*
 			 * No packet has used this entry and GC timeout
 			 * has not been passed. Reshedule timer and
 			 * return.
 			 */
 			nd6_llinfo_settimer_locked(ln, delay);
 			break;
 		}
 
 		if (do_switch == 0) {
 
 			/*
 			 * GC timer has ended and entry hasn't been used.
 			 * Run Garbage collector (RFC 4861, 5.3)
 			 */
 			if (!ND6_LLINFO_PERMANENT(ln))
 				nd6_free(&ln, 1);
 			break;
 		}
 
 		/* Entry has been used AND delay timer has ended. */
 
 		/* FALLTHROUGH */
 
 	case ND6_LLINFO_DELAY:
 		if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
 			/* We need NUD */
 			ln->la_asked = 1;
 			nd6_llinfo_setstate(ln, ND6_LLINFO_PROBE);
 			send_ns = 1;
 		} else
 			nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); /* XXX */
 		break;
 	case ND6_LLINFO_PROBE:
 		if (ln->la_asked < V_nd6_umaxtries) {
 			ln->la_asked++;
 			send_ns = 1;
 		} else {
 			nd6_free(&ln, 0);
 		}
 		break;
 	default:
 		panic("%s: paths in a dark night can be confusing: %d",
 		    __func__, ln->ln_state);
 	}
 done:
 	if (ln != NULL)
 		ND6_RUNLOCK();
 	if (send_ns != 0) {
 		nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
 		psrc = nd6_llinfo_get_holdsrc(ln, &src);
 		LLE_FREE_LOCKED(ln);
 		ln = NULL;
 		nd6_ns_output(ifp, psrc, pdst, dst, NULL);
 	}
 
 	if (ln != NULL)
 		LLE_FREE_LOCKED(ln);
 	CURVNET_RESTORE();
 }
 
 
 /*
  * ND6 timer routine to expire default route list and prefix list
  */
 void
 nd6_timer(void *arg)
 {
 	CURVNET_SET((struct vnet *) arg);
 	struct nd_drhead drq;
 	struct nd_prhead prl;
 	struct nd_defrouter *dr, *ndr;
 	struct nd_prefix *pr, *npr;
 	struct in6_ifaddr *ia6, *nia6;
 	bool onlink_locked;
 
 	TAILQ_INIT(&drq);
 	LIST_INIT(&prl);
 
 	ND6_WLOCK();
 	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr)
 		if (dr->expire && dr->expire < time_uptime)
 			defrouter_unlink(dr, &drq);
 	ND6_WUNLOCK();
 
 	while ((dr = TAILQ_FIRST(&drq)) != NULL) {
 		TAILQ_REMOVE(&drq, dr, dr_entry);
 		defrouter_del(dr);
 	}
 
 	/*
 	 * expire interface addresses.
 	 * in the past the loop was inside prefix expiry processing.
 	 * However, from a stricter speci-confrmance standpoint, we should
 	 * rather separate address lifetimes and prefix lifetimes.
 	 *
 	 * XXXRW: in6_ifaddrhead locking.
 	 */
   addrloop:
 	TAILQ_FOREACH_SAFE(ia6, &V_in6_ifaddrhead, ia_link, nia6) {
 		/* check address lifetime */
 		if (IFA6_IS_INVALID(ia6)) {
 			int regen = 0;
 
 			/*
 			 * If the expiring address is temporary, try
 			 * regenerating a new one.  This would be useful when
 			 * we suspended a laptop PC, then turned it on after a
 			 * period that could invalidate all temporary
 			 * addresses.  Although we may have to restart the
 			 * loop (see below), it must be after purging the
 			 * address.  Otherwise, we'd see an infinite loop of
 			 * regeneration.
 			 */
 			if (V_ip6_use_tempaddr &&
 			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
 				if (regen_tmpaddr(ia6) == 0)
 					regen = 1;
 			}
 
 			in6_purgeaddr(&ia6->ia_ifa);
 
 			if (regen)
 				goto addrloop; /* XXX: see below */
 		} else if (IFA6_IS_DEPRECATED(ia6)) {
 			int oldflags = ia6->ia6_flags;
 
 			ia6->ia6_flags |= IN6_IFF_DEPRECATED;
 
 			/*
 			 * If a temporary address has just become deprecated,
 			 * regenerate a new one if possible.
 			 */
 			if (V_ip6_use_tempaddr &&
 			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 			    (oldflags & IN6_IFF_DEPRECATED) == 0) {
 
 				if (regen_tmpaddr(ia6) == 0) {
 					/*
 					 * A new temporary address is
 					 * generated.
 					 * XXX: this means the address chain
 					 * has changed while we are still in
 					 * the loop.  Although the change
 					 * would not cause disaster (because
 					 * it's not a deletion, but an
 					 * addition,) we'd rather restart the
 					 * loop just for safety.  Or does this
 					 * significantly reduce performance??
 					 */
 					goto addrloop;
 				}
 			}
 		} else if ((ia6->ia6_flags & IN6_IFF_TENTATIVE) != 0) {
 			/*
 			 * Schedule DAD for a tentative address.  This happens
 			 * if the interface was down or not running
 			 * when the address was configured.
 			 */
 			int delay;
 
 			delay = arc4random() %
 			    (MAX_RTR_SOLICITATION_DELAY * hz);
 			nd6_dad_start((struct ifaddr *)ia6, delay);
 		} else {
 			/*
 			 * Check status of the interface.  If it is down,
 			 * mark the address as tentative for future DAD.
 			 */
 			if ((ia6->ia_ifp->if_flags & IFF_UP) == 0 ||
 			    (ia6->ia_ifp->if_drv_flags & IFF_DRV_RUNNING)
 				== 0 ||
 			    (ND_IFINFO(ia6->ia_ifp)->flags &
 				ND6_IFF_IFDISABLED) != 0) {
 				ia6->ia6_flags &= ~IN6_IFF_DUPLICATED;
 				ia6->ia6_flags |= IN6_IFF_TENTATIVE;
 			}
 			/*
 			 * A new RA might have made a deprecated address
 			 * preferred.
 			 */
 			ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
 		}
 	}
 
 	ND6_WLOCK();
 	onlink_locked = false;
 restart:
 	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
 		/*
 		 * Expire prefixes. Since the pltime is only used for
 		 * autoconfigured addresses, pltime processing for prefixes is
 		 * not necessary.
 		 *
 		 * Only unlink after all derived addresses have expired. This
 		 * may not occur until two hours after the prefix has expired
 		 * per RFC 4862. If the prefix expires before its derived
 		 * addresses, mark it off-link. This will be done automatically
 		 * after unlinking if no address references remain.
 		 */
 		if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME ||
 		    time_uptime - pr->ndpr_lastupdate <= pr->ndpr_vltime)
 			continue;
 
 		if (pr->ndpr_addrcnt == 0) {
 			nd6_prefix_unlink(pr, &prl);
 			continue;
 		}
 		if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
 			if (!onlink_locked) {
 				onlink_locked = ND6_ONLINK_TRYLOCK();
 				if (!onlink_locked) {
 					ND6_WUNLOCK();
 					ND6_ONLINK_LOCK();
 					onlink_locked = true;
 					ND6_WLOCK();
 					goto restart;
 				}
 			}
 			(void)nd6_prefix_offlink(pr);
 		}
 	}
 	ND6_WUNLOCK();
 	if (onlink_locked)
 		ND6_ONLINK_UNLOCK();
 
 	while ((pr = LIST_FIRST(&prl)) != NULL) {
 		LIST_REMOVE(pr, ndpr_entry);
 		nd6_prefix_del(pr);
 	}
 
 	callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
 	    nd6_timer, curvnet);
 
 	CURVNET_RESTORE();
 }
 
 /*
  * ia6 - deprecated/invalidated temporary address
  */
 static int
 regen_tmpaddr(struct in6_ifaddr *ia6)
 {
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 	struct in6_ifaddr *public_ifa6 = NULL;
 
 	ifp = ia6->ia_ifa.ifa_ifp;
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		struct in6_ifaddr *it6;
 
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 
 		it6 = (struct in6_ifaddr *)ifa;
 
 		/* ignore no autoconf addresses. */
 		if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 			continue;
 
 		/* ignore autoconf addresses with different prefixes. */
 		if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
 			continue;
 
 		/*
 		 * Now we are looking at an autoconf address with the same
 		 * prefix as ours.  If the address is temporary and is still
 		 * preferred, do not create another one.  It would be rare, but
 		 * could happen, for example, when we resume a laptop PC after
 		 * a long period.
 		 */
 		if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 		    !IFA6_IS_DEPRECATED(it6)) {
 			public_ifa6 = NULL;
 			break;
 		}
 
 		/*
 		 * This is a public autoconf address that has the same prefix
 		 * as ours.  If it is preferred, keep it.  We can't break the
 		 * loop here, because there may be a still-preferred temporary
 		 * address with the prefix.
 		 */
 		if (!IFA6_IS_DEPRECATED(it6))
 			public_ifa6 = it6;
 	}
 	if (public_ifa6 != NULL)
 		ifa_ref(&public_ifa6->ia_ifa);
 	IF_ADDR_RUNLOCK(ifp);
 
 	if (public_ifa6 != NULL) {
 		int e;
 
 		if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) {
 			ifa_free(&public_ifa6->ia_ifa);
 			log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
 			    " tmp addr,errno=%d\n", e);
 			return (-1);
 		}
 		ifa_free(&public_ifa6->ia_ifa);
 		return (0);
 	}
 
 	return (-1);
 }
 
 /*
  * Remove prefix and default router list entries corresponding to ifp. Neighbor
  * cache entries are freed in in6_domifdetach().
  */
 void
 nd6_purge(struct ifnet *ifp)
 {
 	struct nd_drhead drq;
 	struct nd_prhead prl;
 	struct nd_defrouter *dr, *ndr;
 	struct nd_prefix *pr, *npr;
 
 	TAILQ_INIT(&drq);
 	LIST_INIT(&prl);
 
 	/*
 	 * Nuke default router list entries toward ifp.
 	 * We defer removal of default router list entries that is installed
 	 * in the routing table, in order to keep additional side effects as
 	 * small as possible.
 	 */
 	ND6_WLOCK();
 	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
 		if (dr->installed)
 			continue;
 		if (dr->ifp == ifp)
 			defrouter_unlink(dr, &drq);
 	}
 	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
 		if (!dr->installed)
 			continue;
 		if (dr->ifp == ifp)
 			defrouter_unlink(dr, &drq);
 	}
 
 	/*
 	 * Remove prefixes on ifp. We should have already removed addresses on
 	 * this interface, so no addresses should be referencing these prefixes.
 	 */
 	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
 		if (pr->ndpr_ifp == ifp)
 			nd6_prefix_unlink(pr, &prl);
 	}
 	ND6_WUNLOCK();
 
 	/* Delete the unlinked router and prefix objects. */
 	while ((dr = TAILQ_FIRST(&drq)) != NULL) {
 		TAILQ_REMOVE(&drq, dr, dr_entry);
 		defrouter_del(dr);
 	}
 	while ((pr = LIST_FIRST(&prl)) != NULL) {
 		LIST_REMOVE(pr, ndpr_entry);
 		nd6_prefix_del(pr);
 	}
 
 	/* cancel default outgoing interface setting */
 	if (V_nd6_defifindex == ifp->if_index)
 		nd6_setdefaultiface(0);
 
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
 		/* Refresh default router list. */
 		defrouter_select();
 	}
 }
 
 /* 
  * the caller acquires and releases the lock on the lltbls
  * Returns the llentry locked
  */
 struct llentry *
 nd6_lookup(const struct in6_addr *addr6, int flags, struct ifnet *ifp)
 {
 	struct sockaddr_in6 sin6;
 	struct llentry *ln;
 	
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_addr = *addr6;
 
 	IF_AFDATA_LOCK_ASSERT(ifp);
 
 	ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)&sin6);
 
 	return (ln);
 }
 
 struct llentry *
 nd6_alloc(const struct in6_addr *addr6, int flags, struct ifnet *ifp)
 {
 	struct sockaddr_in6 sin6;
 	struct llentry *ln;
 
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_addr = *addr6;
 
 	ln = lltable_alloc_entry(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6);
 	if (ln != NULL)
 		ln->ln_state = ND6_LLINFO_NOSTATE;
 
 	return (ln);
 }
 
 /*
  * Test whether a given IPv6 address is a neighbor or not, ignoring
  * the actual neighbor cache.  The neighbor cache is ignored in order
  * to not reenter the routing code from within itself.
  */
 static int
 nd6_is_new_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
 {
 	struct nd_prefix *pr;
 	struct ifaddr *dstaddr;
 	struct rt_addrinfo info;
 	struct sockaddr_in6 rt_key;
 	const struct sockaddr *dst6;
 	uint64_t genid;
 	int error, fibnum;
 
 	/*
 	 * A link-local address is always a neighbor.
 	 * XXX: a link does not necessarily specify a single interface.
 	 */
 	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
 		struct sockaddr_in6 sin6_copy;
 		u_int32_t zone;
 
 		/*
 		 * We need sin6_copy since sa6_recoverscope() may modify the
 		 * content (XXX).
 		 */
 		sin6_copy = *addr;
 		if (sa6_recoverscope(&sin6_copy))
 			return (0); /* XXX: should be impossible */
 		if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
 			return (0);
 		if (sin6_copy.sin6_scope_id == zone)
 			return (1);
 		else
 			return (0);
 	}
 
 	bzero(&rt_key, sizeof(rt_key));
 	bzero(&info, sizeof(info));
 	info.rti_info[RTAX_DST] = (struct sockaddr *)&rt_key;
 
 	/* Always use the default FIB here. XXME - why? */
 	fibnum = RT_DEFAULT_FIB;
 
 	/*
 	 * If the address matches one of our addresses,
 	 * it should be a neighbor.
 	 * If the address matches one of our on-link prefixes, it should be a
 	 * neighbor.
 	 */
 	ND6_RLOCK();
 restart:
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		if (pr->ndpr_ifp != ifp)
 			continue;
 
 		if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
 			/* Always use the default FIB here. */
 			dst6 = (const struct sockaddr *)&pr->ndpr_prefix;
 
 			genid = V_nd6_list_genid;
 			ND6_RUNLOCK();
 
 			/* Restore length field before retrying lookup */
 			rt_key.sin6_len = sizeof(rt_key);
 			error = rib_lookup_info(fibnum, dst6, 0, 0, &info);
 
 			ND6_RLOCK();
 			if (genid != V_nd6_list_genid)
 				goto restart;
 			if (error != 0)
 				continue;
 
 			/*
 			 * This is the case where multiple interfaces
 			 * have the same prefix, but only one is installed 
 			 * into the routing table and that prefix entry
 			 * is not the one being examined here. In the case
 			 * where RADIX_MPATH is enabled, multiple route
 			 * entries (of the same rt_key value) will be 
 			 * installed because the interface addresses all
 			 * differ.
 			 */
 			if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
 			    &rt_key.sin6_addr))
 				continue;
 		}
 
 		if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
 		    &addr->sin6_addr, &pr->ndpr_mask)) {
 			ND6_RUNLOCK();
 			return (1);
 		}
 	}
 	ND6_RUNLOCK();
 
 	/*
 	 * If the address is assigned on the node of the other side of
 	 * a p2p interface, the address should be a neighbor.
 	 */
 	dstaddr = ifa_ifwithdstaddr((const struct sockaddr *)addr, RT_ALL_FIBS);
 	if (dstaddr != NULL) {
 		if (dstaddr->ifa_ifp == ifp) {
 			ifa_free(dstaddr);
 			return (1);
 		}
 		ifa_free(dstaddr);
 	}
 
 	/*
 	 * If the default router list is empty, all addresses are regarded
 	 * as on-link, and thus, as a neighbor.
 	 */
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV &&
 	    TAILQ_EMPTY(&V_nd_defrouter) &&
 	    V_nd6_defifindex == ifp->if_index) {
 		return (1);
 	}
 
 	return (0);
 }
 
 
 /*
  * Detect if a given IPv6 address identifies a neighbor on a given link.
  * XXX: should take care of the destination of a p2p link?
  */
 int
 nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
 {
 	struct llentry *lle;
 	int rc = 0;
 
 	IF_AFDATA_UNLOCK_ASSERT(ifp);
 	if (nd6_is_new_addr_neighbor(addr, ifp))
 		return (1);
 
 	/*
 	 * Even if the address matches none of our addresses, it might be
 	 * in the neighbor cache.
 	 */
 	IF_AFDATA_RLOCK(ifp);
 	if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) {
 		LLE_RUNLOCK(lle);
 		rc = 1;
 	}
 	IF_AFDATA_RUNLOCK(ifp);
 	return (rc);
 }
 
 /*
  * Free an nd6 llinfo entry.
  * Since the function would cause significant changes in the kernel, DO NOT
  * make it global, unless you have a strong reason for the change, and are sure
  * that the change is safe.
  *
  * Set noinline to be dtrace-friendly
  */
 static __noinline void
 nd6_free(struct llentry **lnp, int gc)
 {
 	struct ifnet *ifp;
 	struct llentry *ln;
 	struct nd_defrouter *dr;
 
 	ln = *lnp;
 	*lnp = NULL;
 
 	LLE_WLOCK_ASSERT(ln);
 	ND6_RLOCK_ASSERT();
 
 	ifp = lltable_get_ifp(ln->lle_tbl);
 	if ((ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) != 0)
 		dr = defrouter_lookup_locked(&ln->r_l3addr.addr6, ifp);
 	else
 		dr = NULL;
 	ND6_RUNLOCK();
 
 	if ((ln->la_flags & LLE_DELETED) == 0)
 		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
 
 	/*
 	 * we used to have pfctlinput(PRC_HOSTDEAD) here.
 	 * even though it is not harmful, it was not really necessary.
 	 */
 
 	/* cancel timer */
 	nd6_llinfo_settimer_locked(ln, -1);
 
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
 		if (dr != NULL && dr->expire &&
 		    ln->ln_state == ND6_LLINFO_STALE && gc) {
 			/*
 			 * If the reason for the deletion is just garbage
 			 * collection, and the neighbor is an active default
 			 * router, do not delete it.  Instead, reset the GC
 			 * timer using the router's lifetime.
 			 * Simply deleting the entry would affect default
 			 * router selection, which is not necessarily a good
 			 * thing, especially when we're using router preference
 			 * values.
 			 * XXX: the check for ln_state would be redundant,
 			 *      but we intentionally keep it just in case.
 			 */
 			if (dr->expire > time_uptime)
 				nd6_llinfo_settimer_locked(ln,
 				    (dr->expire - time_uptime) * hz);
 			else
 				nd6_llinfo_settimer_locked(ln,
 				    (long)V_nd6_gctimer * hz);
 
 			LLE_REMREF(ln);
 			LLE_WUNLOCK(ln);
 			defrouter_rele(dr);
 			return;
 		}
 
 		if (dr) {
 			/*
 			 * Unreachablity of a router might affect the default
 			 * router selection and on-link detection of advertised
 			 * prefixes.
 			 */
 
 			/*
 			 * Temporarily fake the state to choose a new default
 			 * router and to perform on-link determination of
 			 * prefixes correctly.
 			 * Below the state will be set correctly,
 			 * or the entry itself will be deleted.
 			 */
 			ln->ln_state = ND6_LLINFO_INCOMPLETE;
 		}
 
 		if (ln->ln_router || dr) {
 
 			/*
 			 * We need to unlock to avoid a LOR with rt6_flush() with the
 			 * rnh and for the calls to pfxlist_onlink_check() and
 			 * defrouter_select() in the block further down for calls
 			 * into nd6_lookup().  We still hold a ref.
 			 */
 			LLE_WUNLOCK(ln);
 
 			/*
 			 * rt6_flush must be called whether or not the neighbor
 			 * is in the Default Router List.
 			 * See a corresponding comment in nd6_na_input().
 			 */
 			rt6_flush(&ln->r_l3addr.addr6, ifp);
 		}
 
 		if (dr) {
 			/*
 			 * Since defrouter_select() does not affect the
 			 * on-link determination and MIP6 needs the check
 			 * before the default router selection, we perform
 			 * the check now.
 			 */
 			pfxlist_onlink_check();
 
 			/*
 			 * Refresh default router list.
 			 */
 			defrouter_select();
 		}
 
 		/*
 		 * If this entry was added by an on-link redirect, remove the
 		 * corresponding host route.
 		 */
 		if (ln->la_flags & LLE_REDIRECT)
 			nd6_free_redirect(ln);
 
 		if (ln->ln_router || dr)
 			LLE_WLOCK(ln);
 	}
 
 	/*
 	 * Save to unlock. We still hold an extra reference and will not
 	 * free(9) in llentry_free() if someone else holds one as well.
 	 */
 	LLE_WUNLOCK(ln);
 	IF_AFDATA_LOCK(ifp);
 	LLE_WLOCK(ln);
 	/* Guard against race with other llentry_free(). */
 	if (ln->la_flags & LLE_LINKED) {
 		/* Remove callout reference */
 		LLE_REMREF(ln);
 		lltable_unlink_entry(ln->lle_tbl, ln);
 	}
 	IF_AFDATA_UNLOCK(ifp);
 
 	llentry_free(ln);
 	if (dr != NULL)
 		defrouter_rele(dr);
 }
 
 static int
 nd6_isdynrte(const struct rtentry *rt, void *xap)
 {
 
 	if (rt->rt_flags == (RTF_UP | RTF_HOST | RTF_DYNAMIC))
 		return (1);
 
 	return (0);
 }
 /*
  * Remove the rtentry for the given llentry,
  * both of which were installed by a redirect.
  */
 static void
 nd6_free_redirect(const struct llentry *ln)
 {
 	int fibnum;
 	struct sockaddr_in6 sin6;
 	struct rt_addrinfo info;
 
 	lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6);
 	memset(&info, 0, sizeof(info));
 	info.rti_info[RTAX_DST] = (struct sockaddr *)&sin6;
 	info.rti_filter = nd6_isdynrte;
 
 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++)
 		rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum);
 }
 
 /*
  * Rejuvenate this function for routing operations related
  * processing.
  */
 void
 nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
 {
 	struct sockaddr_in6 *gateway;
 	struct nd_defrouter *dr;
 	struct ifnet *ifp;
 
 	gateway = (struct sockaddr_in6 *)rt->rt_gateway;
 	ifp = rt->rt_ifp;
 
 	switch (req) {
 	case RTM_ADD:
 		break;
 
 	case RTM_DELETE:
 		if (!ifp)
 			return;
 		/*
 		 * Only indirect routes are interesting.
 		 */
 		if ((rt->rt_flags & RTF_GATEWAY) == 0)
 			return;
 		/*
 		 * check for default route
 		 */
 		if (IN6_ARE_ADDR_EQUAL(&in6addr_any,
 		    &SIN6(rt_key(rt))->sin6_addr)) {
 			dr = defrouter_lookup(&gateway->sin6_addr, ifp);
 			if (dr != NULL) {
 				dr->installed = 0;
 				defrouter_rele(dr);
 			}
 		}
 		break;
 	}
 }
 
 
 int
 nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 {
 	struct in6_ndireq *ndi = (struct in6_ndireq *)data;
 	struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
 	struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
 	int error = 0;
 
 	if (ifp->if_afdata[AF_INET6] == NULL)
 		return (EPFNOSUPPORT);
 	switch (cmd) {
 	case OSIOCGIFINFO_IN6:
 #define ND	ndi->ndi
 		/* XXX: old ndp(8) assumes a positive value for linkmtu. */
 		bzero(&ND, sizeof(ND));
 		ND.linkmtu = IN6_LINKMTU(ifp);
 		ND.maxmtu = ND_IFINFO(ifp)->maxmtu;
 		ND.basereachable = ND_IFINFO(ifp)->basereachable;
 		ND.reachable = ND_IFINFO(ifp)->reachable;
 		ND.retrans = ND_IFINFO(ifp)->retrans;
 		ND.flags = ND_IFINFO(ifp)->flags;
 		ND.recalctm = ND_IFINFO(ifp)->recalctm;
 		ND.chlim = ND_IFINFO(ifp)->chlim;
 		break;
 	case SIOCGIFINFO_IN6:
 		ND = *ND_IFINFO(ifp);
 		break;
 	case SIOCSIFINFO_IN6:
 		/*
 		 * used to change host variables from userland.
 		 * intended for a use on router to reflect RA configurations.
 		 */
 		/* 0 means 'unspecified' */
 		if (ND.linkmtu != 0) {
 			if (ND.linkmtu < IPV6_MMTU ||
 			    ND.linkmtu > IN6_LINKMTU(ifp)) {
 				error = EINVAL;
 				break;
 			}
 			ND_IFINFO(ifp)->linkmtu = ND.linkmtu;
 		}
 
 		if (ND.basereachable != 0) {
 			int obasereachable = ND_IFINFO(ifp)->basereachable;
 
 			ND_IFINFO(ifp)->basereachable = ND.basereachable;
 			if (ND.basereachable != obasereachable)
 				ND_IFINFO(ifp)->reachable =
 				    ND_COMPUTE_RTIME(ND.basereachable);
 		}
 		if (ND.retrans != 0)
 			ND_IFINFO(ifp)->retrans = ND.retrans;
 		if (ND.chlim != 0)
 			ND_IFINFO(ifp)->chlim = ND.chlim;
 		/* FALLTHROUGH */
 	case SIOCSIFINFO_FLAGS:
 	{
 		struct ifaddr *ifa;
 		struct in6_ifaddr *ia;
 
 		if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
 		    !(ND.flags & ND6_IFF_IFDISABLED)) {
 			/* ifdisabled 1->0 transision */
 
 			/*
 			 * If the interface is marked as ND6_IFF_IFDISABLED and
 			 * has an link-local address with IN6_IFF_DUPLICATED,
 			 * do not clear ND6_IFF_IFDISABLED.
 			 * See RFC 4862, Section 5.4.5.
 			 */
 			IF_ADDR_RLOCK(ifp);
 			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 				if (ifa->ifa_addr->sa_family != AF_INET6)
 					continue;
 				ia = (struct in6_ifaddr *)ifa;
 				if ((ia->ia6_flags & IN6_IFF_DUPLICATED) &&
 				    IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
 					break;
 			}
 			IF_ADDR_RUNLOCK(ifp);
 
 			if (ifa != NULL) {
 				/* LLA is duplicated. */
 				ND.flags |= ND6_IFF_IFDISABLED;
 				log(LOG_ERR, "Cannot enable an interface"
 				    " with a link-local address marked"
 				    " duplicate.\n");
 			} else {
 				ND_IFINFO(ifp)->flags &= ~ND6_IFF_IFDISABLED;
 				if (ifp->if_flags & IFF_UP)
 					in6_if_up(ifp);
 			}
 		} else if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
 			    (ND.flags & ND6_IFF_IFDISABLED)) {
 			/* ifdisabled 0->1 transision */
 			/* Mark all IPv6 address as tentative. */
 
 			ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
 			if (V_ip6_dad_count > 0 &&
 			    (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0) {
 				IF_ADDR_RLOCK(ifp);
 				TAILQ_FOREACH(ifa, &ifp->if_addrhead,
 				    ifa_link) {
 					if (ifa->ifa_addr->sa_family !=
 					    AF_INET6)
 						continue;
 					ia = (struct in6_ifaddr *)ifa;
 					ia->ia6_flags |= IN6_IFF_TENTATIVE;
 				}
 				IF_ADDR_RUNLOCK(ifp);
 			}
 		}
 
 		if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) {
 			if (!(ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL)) {
 				/* auto_linklocal 0->1 transision */
 
 				/* If no link-local address on ifp, configure */
 				ND_IFINFO(ifp)->flags |= ND6_IFF_AUTO_LINKLOCAL;
 				in6_ifattach(ifp, NULL);
 			} else if (!(ND.flags & ND6_IFF_IFDISABLED) &&
 			    ifp->if_flags & IFF_UP) {
 				/*
 				 * When the IF already has
 				 * ND6_IFF_AUTO_LINKLOCAL, no link-local
 				 * address is assigned, and IFF_UP, try to
 				 * assign one.
 				 */
 				IF_ADDR_RLOCK(ifp);
 				TAILQ_FOREACH(ifa, &ifp->if_addrhead,
 				    ifa_link) {
 					if (ifa->ifa_addr->sa_family !=
 					    AF_INET6)
 						continue;
 					ia = (struct in6_ifaddr *)ifa;
 					if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
 						break;
 				}
 				IF_ADDR_RUNLOCK(ifp);
 				if (ifa != NULL)
 					/* No LLA is configured. */
 					in6_ifattach(ifp, NULL);
 			}
 		}
 	}
 		ND_IFINFO(ifp)->flags = ND.flags;
 		break;
 #undef ND
 	case SIOCSNDFLUSH_IN6:	/* XXX: the ioctl name is confusing... */
 		/* sync kernel routing table with the default router list */
 		defrouter_reset();
 		defrouter_select();
 		break;
 	case SIOCSPFXFLUSH_IN6:
 	{
 		/* flush all the prefix advertised by routers */
 		struct in6_ifaddr *ia, *ia_next;
 		struct nd_prefix *pr, *next;
 		struct nd_prhead prl;
 
 		LIST_INIT(&prl);
 
 		ND6_WLOCK();
 		LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) {
 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 				continue; /* XXX */
 			nd6_prefix_unlink(pr, &prl);
 		}
 		ND6_WUNLOCK();
 
 		while ((pr = LIST_FIRST(&prl)) != NULL) {
 			LIST_REMOVE(pr, ndpr_entry);
 			/* XXXRW: in6_ifaddrhead locking. */
 			TAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link,
 			    ia_next) {
 				if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 					continue;
 
 				if (ia->ia6_ndpr == pr)
 					in6_purgeaddr(&ia->ia_ifa);
 			}
 			nd6_prefix_del(pr);
 		}
 		break;
 	}
 	case SIOCSRTRFLUSH_IN6:
 	{
 		/* flush all the default routers */
 		struct nd_drhead drq;
 		struct nd_defrouter *dr;
 
 		TAILQ_INIT(&drq);
 
 		defrouter_reset();
 
 		ND6_WLOCK();
 		while ((dr = TAILQ_FIRST(&V_nd_defrouter)) != NULL)
 			defrouter_unlink(dr, &drq);
 		ND6_WUNLOCK();
 		while ((dr = TAILQ_FIRST(&drq)) != NULL) {
 			TAILQ_REMOVE(&drq, dr, dr_entry);
 			defrouter_del(dr);
 		}
 
 		defrouter_select();
 		break;
 	}
 	case SIOCGNBRINFO_IN6:
 	{
 		struct llentry *ln;
 		struct in6_addr nb_addr = nbi->addr; /* make local for safety */
 
 		if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0)
 			return (error);
 
 		IF_AFDATA_RLOCK(ifp);
 		ln = nd6_lookup(&nb_addr, 0, ifp);
 		IF_AFDATA_RUNLOCK(ifp);
 
 		if (ln == NULL) {
 			error = EINVAL;
 			break;
 		}
 		nbi->state = ln->ln_state;
 		nbi->asked = ln->la_asked;
 		nbi->isrouter = ln->ln_router;
 		if (ln->la_expire == 0)
 			nbi->expire = 0;
 		else
 			nbi->expire = ln->la_expire + ln->lle_remtime / hz +
 			    (time_second - time_uptime);
 		LLE_RUNLOCK(ln);
 		break;
 	}
 	case SIOCGDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
 		ndif->ifindex = V_nd6_defifindex;
 		break;
 	case SIOCSDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
 		return (nd6_setdefaultiface(ndif->ifindex));
 	}
 	return (error);
 }
 
 /*
  * Calculates new isRouter value based on provided parameters and
  * returns it.
  */
 static int
 nd6_is_router(int type, int code, int is_new, int old_addr, int new_addr,
     int ln_router)
 {
 
 	/*
 	 * ICMP6 type dependent behavior.
 	 *
 	 * NS: clear IsRouter if new entry
 	 * RS: clear IsRouter
 	 * RA: set IsRouter if there's lladdr
 	 * redir: clear IsRouter if new entry
 	 *
 	 * RA case, (1):
 	 * The spec says that we must set IsRouter in the following cases:
 	 * - If lladdr exist, set IsRouter.  This means (1-5).
 	 * - If it is old entry (!newentry), set IsRouter.  This means (7).
 	 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
 	 * A quetion arises for (1) case.  (1) case has no lladdr in the
 	 * neighbor cache, this is similar to (6).
 	 * This case is rare but we figured that we MUST NOT set IsRouter.
 	 *
 	 *   is_new  old_addr new_addr 	    NS  RS  RA	redir
 	 *							D R
 	 *	0	n	n	(1)	c   ?     s
 	 *	0	y	n	(2)	c   s     s
 	 *	0	n	y	(3)	c   s     s
 	 *	0	y	y	(4)	c   s     s
 	 *	0	y	y	(5)	c   s     s
 	 *	1	--	n	(6) c	c	c s
 	 *	1	--	y	(7) c	c   s	c s
 	 *
 	 *					(c=clear s=set)
 	 */
 	switch (type & 0xff) {
 	case ND_NEIGHBOR_SOLICIT:
 		/*
 		 * New entry must have is_router flag cleared.
 		 */
 		if (is_new)					/* (6-7) */
 			ln_router = 0;
 		break;
 	case ND_REDIRECT:
 		/*
 		 * If the icmp is a redirect to a better router, always set the
 		 * is_router flag.  Otherwise, if the entry is newly created,
 		 * clear the flag.  [RFC 2461, sec 8.3]
 		 */
 		if (code == ND_REDIRECT_ROUTER)
 			ln_router = 1;
 		else {
 			if (is_new)				/* (6-7) */
 				ln_router = 0;
 		}
 		break;
 	case ND_ROUTER_SOLICIT:
 		/*
 		 * is_router flag must always be cleared.
 		 */
 		ln_router = 0;
 		break;
 	case ND_ROUTER_ADVERT:
 		/*
 		 * Mark an entry with lladdr as a router.
 		 */
 		if ((!is_new && (old_addr || new_addr)) ||	/* (2-5) */
 		    (is_new && new_addr)) {			/* (7) */
 			ln_router = 1;
 		}
 		break;
 	}
 
 	return (ln_router);
 }
 
 /*
  * Create neighbor cache entry and cache link-layer address,
  * on reception of inbound ND6 packets.  (RS/RA/NS/redirect)
  *
  * type - ICMP6 type
  * code - type dependent information
  *
  */
 void
 nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
     int lladdrlen, int type, int code)
 {
 	struct llentry *ln = NULL, *ln_tmp;
 	int is_newentry;
 	int do_update;
 	int olladdr;
 	int llchange;
 	int flags;
 	uint16_t router = 0;
 	struct sockaddr_in6 sin6;
 	struct mbuf *chain = NULL;
 	u_char linkhdr[LLE_MAX_LINKHDR];
 	size_t linkhdrsize;
 	int lladdr_off;
 
 	IF_AFDATA_UNLOCK_ASSERT(ifp);
 
 	KASSERT(ifp != NULL, ("%s: ifp == NULL", __func__));
 	KASSERT(from != NULL, ("%s: from == NULL", __func__));
 
 	/* nothing must be updated for unspecified address */
 	if (IN6_IS_ADDR_UNSPECIFIED(from))
 		return;
 
 	/*
 	 * Validation about ifp->if_addrlen and lladdrlen must be done in
 	 * the caller.
 	 *
 	 * XXX If the link does not have link-layer adderss, what should
 	 * we do? (ifp->if_addrlen == 0)
 	 * Spec says nothing in sections for RA, RS and NA.  There's small
 	 * description on it in NS section (RFC 2461 7.2.3).
 	 */
 	flags = lladdr ? LLE_EXCLUSIVE : 0;
 	IF_AFDATA_RLOCK(ifp);
 	ln = nd6_lookup(from, flags, ifp);
 	IF_AFDATA_RUNLOCK(ifp);
 	is_newentry = 0;
 	if (ln == NULL) {
 		flags |= LLE_EXCLUSIVE;
 		ln = nd6_alloc(from, 0, ifp);
 		if (ln == NULL)
 			return;
 
 		/*
 		 * Since we already know all the data for the new entry,
 		 * fill it before insertion.
 		 */
 		if (lladdr != NULL) {
 			linkhdrsize = sizeof(linkhdr);
 			if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
 			    linkhdr, &linkhdrsize, &lladdr_off) != 0)
 				return;
 			lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
 			    lladdr_off);
 		}
 
 		IF_AFDATA_WLOCK(ifp);
 		LLE_WLOCK(ln);
 		/* Prefer any existing lle over newly-created one */
 		ln_tmp = nd6_lookup(from, LLE_EXCLUSIVE, ifp);
 		if (ln_tmp == NULL)
 			lltable_link_entry(LLTABLE6(ifp), ln);
 		IF_AFDATA_WUNLOCK(ifp);
 		if (ln_tmp == NULL) {
 			/* No existing lle, mark as new entry (6,7) */
 			is_newentry = 1;
 			nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
 			if (lladdr != NULL)	/* (7) */
 				EVENTHANDLER_INVOKE(lle_event, ln,
 				    LLENTRY_RESOLVED);
 		} else {
 			lltable_free_entry(LLTABLE6(ifp), ln);
 			ln = ln_tmp;
 			ln_tmp = NULL;
 		}
 	} 
 	/* do nothing if static ndp is set */
 	if ((ln->la_flags & LLE_STATIC)) {
 		if (flags & LLE_EXCLUSIVE)
 			LLE_WUNLOCK(ln);
 		else
 			LLE_RUNLOCK(ln);
 		return;
 	}
 
 	olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
 	if (olladdr && lladdr) {
 		llchange = bcmp(lladdr, ln->ll_addr,
 		    ifp->if_addrlen);
 	} else if (!olladdr && lladdr)
 		llchange = 1;
 	else
 		llchange = 0;
 
 	/*
 	 * newentry olladdr  lladdr  llchange	(*=record)
 	 *	0	n	n	--	(1)
 	 *	0	y	n	--	(2)
 	 *	0	n	y	y	(3) * STALE
 	 *	0	y	y	n	(4) *
 	 *	0	y	y	y	(5) * STALE
 	 *	1	--	n	--	(6)   NOSTATE(= PASSIVE)
 	 *	1	--	y	--	(7) * STALE
 	 */
 
 	do_update = 0;
 	if (is_newentry == 0 && llchange != 0) {
 		do_update = 1;	/* (3,5) */
 
 		/*
 		 * Record source link-layer address
 		 * XXX is it dependent to ifp->if_type?
 		 */
 		linkhdrsize = sizeof(linkhdr);
 		if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
 		    linkhdr, &linkhdrsize, &lladdr_off) != 0)
 			return;
 
 		if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
 		    lladdr_off) == 0) {
 			/* Entry was deleted */
 			return;
 		}
 
 		nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
 
 		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
 
 		if (ln->la_hold != NULL)
 			nd6_grab_holdchain(ln, &chain, &sin6);
 	}
 
 	/* Calculates new router status */
 	router = nd6_is_router(type, code, is_newentry, olladdr,
 	    lladdr != NULL ? 1 : 0, ln->ln_router);
 
 	ln->ln_router = router;
 	/* Mark non-router redirects with special flag */
 	if ((type & 0xFF) == ND_REDIRECT && code != ND_REDIRECT_ROUTER)
 		ln->la_flags |= LLE_REDIRECT;
 
 	if (flags & LLE_EXCLUSIVE)
 		LLE_WUNLOCK(ln);
 	else
 		LLE_RUNLOCK(ln);
 
 	if (chain != NULL)
 		nd6_flush_holdchain(ifp, ifp, chain, &sin6);
 	
 	/*
 	 * When the link-layer address of a router changes, select the
 	 * best router again.  In particular, when the neighbor entry is newly
 	 * created, it might affect the selection policy.
 	 * Question: can we restrict the first condition to the "is_newentry"
 	 * case?
 	 * XXX: when we hear an RA from a new router with the link-layer
 	 * address option, defrouter_select() is called twice, since
 	 * defrtrlist_update called the function as well.  However, I believe
 	 * we can compromise the overhead, since it only happens the first
 	 * time.
 	 * XXX: although defrouter_select() should not have a bad effect
 	 * for those are not autoconfigured hosts, we explicitly avoid such
 	 * cases for safety.
 	 */
 	if ((do_update || is_newentry) && router &&
 	    ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
 		/*
 		 * guaranteed recursion
 		 */
 		defrouter_select();
 	}
 }
 
 static void
 nd6_slowtimo(void *arg)
 {
 	CURVNET_SET((struct vnet *) arg);
 	struct nd_ifinfo *nd6if;
 	struct ifnet *ifp;
 
 	callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
 	    nd6_slowtimo, curvnet);
 	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (ifp->if_afdata[AF_INET6] == NULL)
 			continue;
 		nd6if = ND_IFINFO(ifp);
 		if (nd6if->basereachable && /* already initialized */
 		    (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
 			/*
 			 * Since reachable time rarely changes by router
 			 * advertisements, we SHOULD insure that a new random
 			 * value gets recomputed at least once every few hours.
 			 * (RFC 2461, 6.3.4)
 			 */
 			nd6if->recalctm = V_nd6_recalc_reachtm_interval;
 			nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
 		}
 	}
 	IFNET_RUNLOCK_NOSLEEP();
 	CURVNET_RESTORE();
 }
 
 void
 nd6_grab_holdchain(struct llentry *ln, struct mbuf **chain,
     struct sockaddr_in6 *sin6)
 {
 
 	LLE_WLOCK_ASSERT(ln);
 
 	*chain = ln->la_hold;
 	ln->la_hold = NULL;
 	lltable_fill_sa_entry(ln, (struct sockaddr *)sin6);
 
 	if (ln->ln_state == ND6_LLINFO_STALE) {
 
 		/*
 		 * The first time we send a packet to a
 		 * neighbor whose entry is STALE, we have
 		 * to change the state to DELAY and a sets
 		 * a timer to expire in DELAY_FIRST_PROBE_TIME
 		 * seconds to ensure do neighbor unreachability
 		 * detection on expiration.
 		 * (RFC 2461 7.3.3)
 		 */
 		nd6_llinfo_setstate(ln, ND6_LLINFO_DELAY);
 	}
 }
 
 int
 nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
     struct sockaddr_in6 *dst, struct route *ro)
 {
 	int error;
 	int ip6len;
 	struct ip6_hdr *ip6;
 	struct m_tag *mtag;
 
 #ifdef MAC
 	mac_netinet6_nd6_send(ifp, m);
 #endif
 
 	/*
 	 * If called from nd6_ns_output() (NS), nd6_na_output() (NA),
 	 * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA
 	 * as handled by rtsol and rtadvd), mbufs will be tagged for SeND
 	 * to be diverted to user space.  When re-injected into the kernel,
 	 * send_output() will directly dispatch them to the outgoing interface.
 	 */
 	if (send_sendso_input_hook != NULL) {
 		mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL);
 		if (mtag != NULL) {
 			ip6 = mtod(m, struct ip6_hdr *);
 			ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
 			/* Use the SEND socket */
 			error = send_sendso_input_hook(m, ifp, SND_OUT,
 			    ip6len);
 			/* -1 == no app on SEND socket */
 			if (error == 0 || error != -1)
 			    return (error);
 		}
 	}
 
 	m_clrprotoflags(m);	/* Avoid confusing lower layers. */
 	IP_PROBE(send, NULL, NULL, mtod(m, struct ip6_hdr *), ifp, NULL,
 	    mtod(m, struct ip6_hdr *));
 
 	if ((ifp->if_flags & IFF_LOOPBACK) == 0)
 		origifp = ifp;
 
 	error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, ro);
 	return (error);
 }
 
 /*
  * Lookup link headerfor @sa_dst address. Stores found
  * data in @desten buffer. Copy of lle ln_flags can be also
  * saved in @pflags if @pflags is non-NULL.
  *
  * If destination LLE does not exists or lle state modification
  * is required, call "slow" version.
  *
  * Return values:
  * - 0 on success (address copied to buffer).
  * - EWOULDBLOCK (no local error, but address is still unresolved)
  * - other errors (alloc failure, etc)
  */
 int
 nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
     const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags,
     struct llentry **plle)
 {
 	struct llentry *ln = NULL;
 	const struct sockaddr_in6 *dst6;
 
 	if (pflags != NULL)
 		*pflags = 0;
 
 	dst6 = (const struct sockaddr_in6 *)sa_dst;
 
 	/* discard the packet if IPv6 operation is disabled on the interface */
 	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
 		m_freem(m);
 		return (ENETDOWN); /* better error? */
 	}
 
 	if (m != NULL && m->m_flags & M_MCAST) {
 		switch (ifp->if_type) {
 		case IFT_ETHER:
 		case IFT_FDDI:
 		case IFT_L2VLAN:
 		case IFT_IEEE80211:
 		case IFT_BRIDGE:
 		case IFT_ISO88025:
 			ETHER_MAP_IPV6_MULTICAST(&dst6->sin6_addr,
 						 desten);
 			return (0);
 		default:
 			m_freem(m);
 			return (EAFNOSUPPORT);
 		}
 	}
 
 	IF_AFDATA_RLOCK(ifp);
 	ln = nd6_lookup(&dst6->sin6_addr, plle ? LLE_EXCLUSIVE : LLE_UNLOCKED,
 	    ifp);
 	if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) {
 		/* Entry found, let's copy lle info */
 		bcopy(ln->r_linkdata, desten, ln->r_hdrlen);
 		if (pflags != NULL)
 			*pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR);
 		/* Check if we have feedback request from nd6 timer */
 		if (ln->r_skip_req != 0) {
 			LLE_REQ_LOCK(ln);
 			ln->r_skip_req = 0; /* Notify that entry was used */
 			ln->lle_hittime = time_uptime;
 			LLE_REQ_UNLOCK(ln);
 		}
 		if (plle) {
 			LLE_ADDREF(ln);
 			*plle = ln;
 			LLE_WUNLOCK(ln);
 		}
 		IF_AFDATA_RUNLOCK(ifp);
 		return (0);
 	} else if (plle && ln)
 		LLE_WUNLOCK(ln);
 	IF_AFDATA_RUNLOCK(ifp);
 
 	return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags, plle));
 }
 
 
 /*
  * Do L2 address resolution for @sa_dst address. Stores found
  * address in @desten buffer. Copy of lle ln_flags can be also
  * saved in @pflags if @pflags is non-NULL.
  *
  * Heavy version.
  * Function assume that destination LLE does not exist,
  * is invalid or stale, so LLE_EXCLUSIVE lock needs to be acquired.
  *
  * Set noinline to be dtrace-friendly
  */
 static __noinline int
 nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m,
     const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags,
     struct llentry **plle)
 {
 	struct llentry *lle = NULL, *lle_tmp;
 	struct in6_addr *psrc, src;
 	int send_ns, ll_len;
 	char *lladdr;
 
 	/*
 	 * Address resolution or Neighbor Unreachability Detection
 	 * for the next hop.
 	 * At this point, the destination of the packet must be a unicast
 	 * or an anycast address(i.e. not a multicast).
 	 */
 	if (lle == NULL) {
 		IF_AFDATA_RLOCK(ifp);
 		lle = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp);
 		IF_AFDATA_RUNLOCK(ifp);
 		if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp))  {
 			/*
 			 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
 			 * the condition below is not very efficient.  But we believe
 			 * it is tolerable, because this should be a rare case.
 			 */
 			lle = nd6_alloc(&dst->sin6_addr, 0, ifp);
 			if (lle == NULL) {
 				char ip6buf[INET6_ADDRSTRLEN];
 				log(LOG_DEBUG,
 				    "nd6_output: can't allocate llinfo for %s "
 				    "(ln=%p)\n",
 				    ip6_sprintf(ip6buf, &dst->sin6_addr), lle);
 				m_freem(m);
 				return (ENOBUFS);
 			}
 
 			IF_AFDATA_WLOCK(ifp);
 			LLE_WLOCK(lle);
 			/* Prefer any existing entry over newly-created one */
 			lle_tmp = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp);
 			if (lle_tmp == NULL)
 				lltable_link_entry(LLTABLE6(ifp), lle);
 			IF_AFDATA_WUNLOCK(ifp);
 			if (lle_tmp != NULL) {
 				lltable_free_entry(LLTABLE6(ifp), lle);
 				lle = lle_tmp;
 				lle_tmp = NULL;
 			}
 		}
 	} 
 	if (lle == NULL) {
 		if (!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
 			m_freem(m);
 			return (ENOBUFS);
 		}
 
 		if (m != NULL)
 			m_freem(m);
 		return (ENOBUFS);
 	}
 
 	LLE_WLOCK_ASSERT(lle);
 
 	/*
 	 * The first time we send a packet to a neighbor whose entry is
 	 * STALE, we have to change the state to DELAY and a sets a timer to
 	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
 	 * neighbor unreachability detection on expiration.
 	 * (RFC 2461 7.3.3)
 	 */
 	if (lle->ln_state == ND6_LLINFO_STALE)
 		nd6_llinfo_setstate(lle, ND6_LLINFO_DELAY);
 
 	/*
 	 * If the neighbor cache entry has a state other than INCOMPLETE
 	 * (i.e. its link-layer address is already resolved), just
 	 * send the packet.
 	 */
 	if (lle->ln_state > ND6_LLINFO_INCOMPLETE) {
 		if (flags & LLE_ADDRONLY) {
 			lladdr = lle->ll_addr;
 			ll_len = ifp->if_addrlen;
 		} else {
 			lladdr = lle->r_linkdata;
 			ll_len = lle->r_hdrlen;
 		}
 		bcopy(lladdr, desten, ll_len);
 		if (pflags != NULL)
 			*pflags = lle->la_flags;
 		if (plle) {
 			LLE_ADDREF(lle);
 			*plle = lle;
 		}
 		LLE_WUNLOCK(lle);
 		return (0);
 	}
 
 	/*
 	 * There is a neighbor cache entry, but no ethernet address
 	 * response yet.  Append this latest packet to the end of the
 	 * packet queue in the mbuf.  When it exceeds nd6_maxqueuelen,
 	 * the oldest packet in the queue will be removed.
 	 */
 
 	if (lle->la_hold != NULL) {
 		struct mbuf *m_hold;
 		int i;
 		
 		i = 0;
 		for (m_hold = lle->la_hold; m_hold; m_hold = m_hold->m_nextpkt){
 			i++;
 			if (m_hold->m_nextpkt == NULL) {
 				m_hold->m_nextpkt = m;
 				break;
 			}
 		}
 		while (i >= V_nd6_maxqueuelen) {
 			m_hold = lle->la_hold;
 			lle->la_hold = lle->la_hold->m_nextpkt;
 			m_freem(m_hold);
 			i--;
 		}
 	} else {
 		lle->la_hold = m;
 	}
 
 	/*
 	 * If there has been no NS for the neighbor after entering the
 	 * INCOMPLETE state, send the first solicitation.
 	 * Note that for newly-created lle la_asked will be 0,
 	 * so we will transition from ND6_LLINFO_NOSTATE to
 	 * ND6_LLINFO_INCOMPLETE state here.
 	 */
 	psrc = NULL;
 	send_ns = 0;
 	if (lle->la_asked == 0) {
 		lle->la_asked++;
 		send_ns = 1;
 		psrc = nd6_llinfo_get_holdsrc(lle, &src);
 
 		nd6_llinfo_setstate(lle, ND6_LLINFO_INCOMPLETE);
 	}
 	LLE_WUNLOCK(lle);
 	if (send_ns != 0)
 		nd6_ns_output(ifp, psrc, NULL, &dst->sin6_addr, NULL);
 
 	return (EWOULDBLOCK);
 }
 
 /*
  * Do L2 address resolution for @sa_dst address. Stores found
  * address in @desten buffer. Copy of lle ln_flags can be also
  * saved in @pflags if @pflags is non-NULL.
  *
  * Return values:
  * - 0 on success (address copied to buffer).
  * - EWOULDBLOCK (no local error, but address is still unresolved)
  * - other errors (alloc failure, etc)
  */
 int
 nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
     char *desten, uint32_t *pflags)
 {
 	int error;
 
 	flags |= LLE_ADDRONLY;
 	error = nd6_resolve_slow(ifp, flags, NULL,
 	    (const struct sockaddr_in6 *)dst, desten, pflags, NULL);
 	return (error);
 }
 
 int
 nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,
     struct sockaddr_in6 *dst)
 {
 	struct mbuf *m, *m_head;
 	struct ifnet *outifp;
 	int error = 0;
 
 	m_head = chain;
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
 		outifp = origifp;
 	else
 		outifp = ifp;
 	
 	while (m_head) {
 		m = m_head;
 		m_head = m_head->m_nextpkt;
 		error = nd6_output_ifp(ifp, origifp, m, dst, NULL);
 	}
 
 	/*
 	 * XXX
 	 * note that intermediate errors are blindly ignored
 	 */
 	return (error);
 }	
 
 static int
 nd6_need_cache(struct ifnet *ifp)
 {
 	/*
 	 * XXX: we currently do not make neighbor cache on any interface
 	 * other than ARCnet, Ethernet, FDDI and GIF.
 	 *
 	 * RFC2893 says:
 	 * - unidirectional tunnels needs no ND
 	 */
 	switch (ifp->if_type) {
 	case IFT_ARCNET:
 	case IFT_ETHER:
 	case IFT_FDDI:
 	case IFT_IEEE1394:
 	case IFT_L2VLAN:
 	case IFT_IEEE80211:
 	case IFT_INFINIBAND:
 	case IFT_BRIDGE:
 	case IFT_PROPVIRTUAL:
 		return (1);
 	default:
 		return (0);
 	}
 }
 
 /*
  * Add pernament ND6 link-layer record for given
  * interface address.
  *
  * Very similar to IPv4 arp_ifinit(), but:
  * 1) IPv6 DAD is performed in different place
  * 2) It is called by IPv6 protocol stack in contrast to
  * arp_ifinit() which is typically called in SIOCSIFADDR
  * driver ioctl handler.
  *
  */
 int
 nd6_add_ifa_lle(struct in6_ifaddr *ia)
 {
 	struct ifnet *ifp;
 	struct llentry *ln, *ln_tmp;
 	struct sockaddr *dst;
 
 	ifp = ia->ia_ifa.ifa_ifp;
 	if (nd6_need_cache(ifp) == 0)
 		return (0);
 
 	ia->ia_ifa.ifa_rtrequest = nd6_rtrequest;
 	dst = (struct sockaddr *)&ia->ia_addr;
 	ln = lltable_alloc_entry(LLTABLE6(ifp), LLE_IFADDR, dst);
 	if (ln == NULL)
 		return (ENOBUFS);
 
 	IF_AFDATA_WLOCK(ifp);
 	LLE_WLOCK(ln);
 	/* Unlink any entry if exists */
 	ln_tmp = lla_lookup(LLTABLE6(ifp), LLE_EXCLUSIVE, dst);
 	if (ln_tmp != NULL)
 		lltable_unlink_entry(LLTABLE6(ifp), ln_tmp);
 	lltable_link_entry(LLTABLE6(ifp), ln);
 	IF_AFDATA_WUNLOCK(ifp);
 
 	if (ln_tmp != NULL)
 		EVENTHANDLER_INVOKE(lle_event, ln_tmp, LLENTRY_EXPIRED);
 	EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
 
 	LLE_WUNLOCK(ln);
 	if (ln_tmp != NULL)
 		llentry_free(ln_tmp);
 
 	return (0);
 }
 
 /*
  * Removes either all lle entries for given @ia, or lle
  * corresponding to @ia address.
  */
 void
 nd6_rem_ifa_lle(struct in6_ifaddr *ia, int all)
 {
 	struct sockaddr_in6 mask, addr;
 	struct sockaddr *saddr, *smask;
 	struct ifnet *ifp;
 
 	ifp = ia->ia_ifa.ifa_ifp;
 	memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
 	memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
 	saddr = (struct sockaddr *)&addr;
 	smask = (struct sockaddr *)&mask;
 
 	if (all != 0)
 		lltable_prefix_free(AF_INET6, saddr, smask, LLE_STATIC);
 	else
 		lltable_delete_addr(LLTABLE6(ifp), LLE_IFADDR, saddr);
 }
 
 static void 
 clear_llinfo_pqueue(struct llentry *ln)
 {
 	struct mbuf *m_hold, *m_hold_next;
 
 	for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) {
 		m_hold_next = m_hold->m_nextpkt;
 		m_freem(m_hold);
 	}
 
 	ln->la_hold = NULL;
 }
 
 static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
 static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
 
 SYSCTL_DECL(_net_inet6_icmp6);
 SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
 	CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
 	NULL, 0, nd6_sysctl_drlist, "S,in6_defrouter",
 	"NDP default router list");
 SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
 	CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
 	NULL, 0, nd6_sysctl_prlist, "S,in6_prefix",
 	"NDP prefix list");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
 SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), "");
 
 static int
 nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
 {
 	struct in6_defrouter d;
 	struct nd_defrouter *dr;
 	int error;
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 
 	bzero(&d, sizeof(d));
 	d.rtaddr.sin6_family = AF_INET6;
 	d.rtaddr.sin6_len = sizeof(d.rtaddr);
 
 	ND6_RLOCK();
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 		d.rtaddr.sin6_addr = dr->rtaddr;
 		error = sa6_recoverscope(&d.rtaddr);
 		if (error != 0)
 			break;
 		d.flags = dr->raflags;
 		d.rtlifetime = dr->rtlifetime;
 		d.expire = dr->expire + (time_second - time_uptime);
 		d.if_index = dr->ifp->if_index;
 		error = SYSCTL_OUT(req, &d, sizeof(d));
 		if (error != 0)
 			break;
 	}
 	ND6_RUNLOCK();
 	return (error);
 }
 
 static int
 nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
 {
 	struct in6_prefix p;
 	struct sockaddr_in6 s6;
 	struct nd_prefix *pr;
 	struct nd_pfxrouter *pfr;
 	time_t maxexpire;
 	int error;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	if (req->newptr)
 		return (EPERM);
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 
 	bzero(&p, sizeof(p));
 	p.origin = PR_ORIG_RA;
 	bzero(&s6, sizeof(s6));
 	s6.sin6_family = AF_INET6;
 	s6.sin6_len = sizeof(s6);
 
 	ND6_RLOCK();
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		p.prefix = pr->ndpr_prefix;
 		if (sa6_recoverscope(&p.prefix)) {
 			log(LOG_ERR, "scope error in prefix list (%s)\n",
 			    ip6_sprintf(ip6buf, &p.prefix.sin6_addr));
 			/* XXX: press on... */
 		}
 		p.raflags = pr->ndpr_raf;
 		p.prefixlen = pr->ndpr_plen;
 		p.vltime = pr->ndpr_vltime;
 		p.pltime = pr->ndpr_pltime;
 		p.if_index = pr->ndpr_ifp->if_index;
 		if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
 			p.expire = 0;
 		else {
 			/* XXX: we assume time_t is signed. */
 			maxexpire = (-1) &
 			    ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
 			if (pr->ndpr_vltime < maxexpire - pr->ndpr_lastupdate)
 				p.expire = pr->ndpr_lastupdate +
 				    pr->ndpr_vltime +
 				    (time_second - time_uptime);
 			else
 				p.expire = maxexpire;
 		}
 		p.refcnt = pr->ndpr_addrcnt;
 		p.flags = pr->ndpr_stateflags;
 		p.advrtrs = 0;
 		LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry)
 			p.advrtrs++;
 		error = SYSCTL_OUT(req, &p, sizeof(p));
 		if (error != 0)
 			break;
 		LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
 			s6.sin6_addr = pfr->router->rtaddr;
 			if (sa6_recoverscope(&s6))
 				log(LOG_ERR,
 				    "scope error in prefix list (%s)\n",
 				    ip6_sprintf(ip6buf, &pfr->router->rtaddr));
 			error = SYSCTL_OUT(req, &s6, sizeof(s6));
 			if (error != 0)
 				goto out;
 		}
 	}
 out:
 	ND6_RUNLOCK();
 	return (error);
 }
Index: projects/hps_head/sys/netpfil/pf/if_pfsync.c
===================================================================
--- projects/hps_head/sys/netpfil/pf/if_pfsync.c	(revision 309217)
+++ projects/hps_head/sys/netpfil/pf/if_pfsync.c	(revision 309218)
@@ -1,2419 +1,2419 @@
 /*-
  * Copyright (c) 2002 Michael Shalayeff
  * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
 /*
  * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $
  *
  * Revisions picked from OpenBSD after revision 1.110 import:
  * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input()
  * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates
  * 1.120, 1.175 - use monotonic time_uptime
  * 1.122 - reduce number of updates for non-TCP sessions
  * 1.125, 1.127 - rewrite merge or stale processing
  * 1.128 - cleanups
  * 1.146 - bzero() mbuf before sparsely filling it with data
  * 1.170 - SIOCSIFMTU checks
  * 1.126, 1.142 - deferred packets processing
  * 1.173 - correct expire time processing
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_pf.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 
 #include <net/bpf.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/vnet.h>
 #include <net/pfvar.h>
 #include <net/if_pfsync.h>
 
 #include <netinet/if_ether.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_carp.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 
 #define PFSYNC_MINPKT ( \
 	sizeof(struct ip) + \
 	sizeof(struct pfsync_header) + \
 	sizeof(struct pfsync_subheader) )
 
 struct pfsync_pkt {
 	struct ip *ip;
 	struct in_addr src;
 	u_int8_t flags;
 };
 
 static int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
 		    struct pfsync_state_peer *);
 static int	pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int);
 static int	pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int);
 
 static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = {
 	pfsync_in_clr,			/* PFSYNC_ACT_CLR */
 	pfsync_in_ins,			/* PFSYNC_ACT_INS */
 	pfsync_in_iack,			/* PFSYNC_ACT_INS_ACK */
 	pfsync_in_upd,			/* PFSYNC_ACT_UPD */
 	pfsync_in_upd_c,		/* PFSYNC_ACT_UPD_C */
 	pfsync_in_ureq,			/* PFSYNC_ACT_UPD_REQ */
 	pfsync_in_del,			/* PFSYNC_ACT_DEL */
 	pfsync_in_del_c,		/* PFSYNC_ACT_DEL_C */
 	pfsync_in_error,		/* PFSYNC_ACT_INS_F */
 	pfsync_in_error,		/* PFSYNC_ACT_DEL_F */
 	pfsync_in_bus,			/* PFSYNC_ACT_BUS */
 	pfsync_in_tdb,			/* PFSYNC_ACT_TDB */
 	pfsync_in_eof			/* PFSYNC_ACT_EOF */
 };
 
 struct pfsync_q {
 	void		(*write)(struct pf_state *, void *);
 	size_t		len;
 	u_int8_t	action;
 };
 
 /* we have one of these for every PFSYNC_S_ */
 static void	pfsync_out_state(struct pf_state *, void *);
 static void	pfsync_out_iack(struct pf_state *, void *);
 static void	pfsync_out_upd_c(struct pf_state *, void *);
 static void	pfsync_out_del(struct pf_state *, void *);
 
 static struct pfsync_q pfsync_qs[] = {
 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
 	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD },
 	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
 	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C }
 };
 
 static void	pfsync_q_ins(struct pf_state *, int);
 static void	pfsync_q_del(struct pf_state *);
 
 static void	pfsync_update_state(struct pf_state *);
 
 struct pfsync_upd_req_item {
 	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
 	struct pfsync_upd_req			ur_msg;
 };
 
 struct pfsync_deferral {
 	struct pfsync_softc		*pd_sc;
 	TAILQ_ENTRY(pfsync_deferral)	pd_entry;
 	u_int				pd_refs;
 	struct callout			pd_tmo;
 
 	struct pf_state			*pd_st;
 	struct mbuf			*pd_m;
 };
 
 struct pfsync_softc {
 	/* Configuration */
 	struct ifnet		*sc_ifp;
 	struct ifnet		*sc_sync_if;
 	struct ip_moptions	sc_imo;
 	struct in_addr		sc_sync_peer;
 	uint32_t		sc_flags;
 #define	PFSYNCF_OK		0x00000001
 #define	PFSYNCF_DEFER		0x00000002
 #define	PFSYNCF_PUSH		0x00000004
 	uint8_t			sc_maxupdates;
 	struct ip		sc_template;
 	struct callout		sc_tmo;
 	struct mtx		sc_mtx;
 
 	/* Queued data */
 	size_t			sc_len;
 	TAILQ_HEAD(, pf_state)			sc_qs[PFSYNC_S_COUNT];
 	TAILQ_HEAD(, pfsync_upd_req_item)	sc_upd_req_list;
 	TAILQ_HEAD(, pfsync_deferral)		sc_deferrals;
 	u_int			sc_deferred;
 	void			*sc_plus;
 	size_t			sc_pluslen;
 
 	/* Bulk update info */
 	struct mtx		sc_bulk_mtx;
 	uint32_t		sc_ureq_sent;
 	int			sc_bulk_tries;
 	uint32_t		sc_ureq_received;
 	int			sc_bulk_hashid;
 	uint64_t		sc_bulk_stateid;
 	uint32_t		sc_bulk_creatorid;
 	struct callout		sc_bulk_tmo;
 	struct callout		sc_bulkfail_tmo;
 };
 
 #define	PFSYNC_LOCK(sc)		mtx_lock(&(sc)->sc_mtx)
 #define	PFSYNC_UNLOCK(sc)	mtx_unlock(&(sc)->sc_mtx)
 #define	PFSYNC_LOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_mtx, MA_OWNED)
 
 #define	PFSYNC_BLOCK(sc)	mtx_lock(&(sc)->sc_bulk_mtx)
 #define	PFSYNC_BUNLOCK(sc)	mtx_unlock(&(sc)->sc_bulk_mtx)
 #define	PFSYNC_BLOCK_ASSERT(sc)	mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED)
 
 static const char pfsyncname[] = "pfsync";
 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data");
 static VNET_DEFINE(struct pfsync_softc	*, pfsyncif) = NULL;
 #define	V_pfsyncif		VNET(pfsyncif)
 static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL;
 #define	V_pfsync_swi_cookie	VNET(pfsync_swi_cookie)
 static VNET_DEFINE(struct pfsyncstats, pfsyncstats);
 #define	V_pfsyncstats		VNET(pfsyncstats)
 static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW;
 #define	V_pfsync_carp_adj	VNET(pfsync_carp_adj)
 
 static void	pfsync_timeout(void *);
 static void	pfsync_push(struct pfsync_softc *);
 static void	pfsyncintr(void *);
 static int	pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
 		    void *);
 static void	pfsync_multicast_cleanup(struct pfsync_softc *);
 static void	pfsync_pointers_init(void);
 static void	pfsync_pointers_uninit(void);
 static int	pfsync_init(void);
 static void	pfsync_uninit(void);
 
 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(pfsyncstats), pfsyncstats,
     "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW,
     &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
 
 static int	pfsync_clone_create(struct if_clone *, int, caddr_t);
 static void	pfsync_clone_destroy(struct ifnet *);
 static int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
 		    struct pf_state_peer *);
 static int	pfsyncoutput(struct ifnet *, struct mbuf *,
 		    const struct sockaddr *, struct route *);
 static int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
 
 static int	pfsync_defer(struct pf_state *, struct mbuf *);
 static void	pfsync_undefer(struct pfsync_deferral *, int);
 static void	pfsync_undefer_state(struct pf_state *, int);
 static void	pfsync_defer_tmo(void *);
 
 static void	pfsync_request_update(u_int32_t, u_int64_t);
 static void	pfsync_update_state_req(struct pf_state *);
 
 static void	pfsync_drop(struct pfsync_softc *);
 static void	pfsync_sendout(int);
 static void	pfsync_send_plus(void *, size_t);
 
 static void	pfsync_bulk_start(void);
 static void	pfsync_bulk_status(u_int8_t);
 static void	pfsync_bulk_update(void *);
 static void	pfsync_bulk_fail(void *);
 
 #ifdef IPSEC
 static void	pfsync_update_net_tdb(struct pfsync_tdb *);
 #endif
 
 #define PFSYNC_MAX_BULKTRIES	12
 
 VNET_DEFINE(struct if_clone *, pfsync_cloner);
 #define	V_pfsync_cloner	VNET(pfsync_cloner)
 
 static int
 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param)
 {
 	struct pfsync_softc *sc;
 	struct ifnet *ifp;
 	int q;
 
 	if (unit != 0)
 		return (EINVAL);
 
 	sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
 	sc->sc_flags |= PFSYNCF_OK;
 
 	for (q = 0; q < PFSYNC_S_COUNT; q++)
 		TAILQ_INIT(&sc->sc_qs[q]);
 
 	TAILQ_INIT(&sc->sc_upd_req_list);
 	TAILQ_INIT(&sc->sc_deferrals);
 
 	sc->sc_len = PFSYNC_MINPKT;
 	sc->sc_maxupdates = 128;
 
 	ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
 	if (ifp == NULL) {
 		free(sc, M_PFSYNC);
 		return (ENOSPC);
 	}
 	if_initname(ifp, pfsyncname, unit);
 	ifp->if_softc = sc;
 	ifp->if_ioctl = pfsyncioctl;
 	ifp->if_output = pfsyncoutput;
 	ifp->if_type = IFT_PFSYNC;
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	ifp->if_hdrlen = sizeof(struct pfsync_header);
 	ifp->if_mtu = ETHERMTU;
 	mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF);
 	mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF);
 	callout_init(&sc->sc_tmo, 1);
 	callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0);
 	callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0);
 
 	if_attach(ifp);
 
 	bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
 
 	V_pfsyncif = sc;
 
 	return (0);
 }
 
 static void
 pfsync_clone_destroy(struct ifnet *ifp)
 {
 	struct pfsync_softc *sc = ifp->if_softc;
 
 	/*
 	 * At this stage, everything should have already been
 	 * cleared by pfsync_uninit(), and we have only to
 	 * drain callouts.
 	 */
 	while (sc->sc_deferred > 0) {
 		struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals);
 
 		TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
 		sc->sc_deferred--;
-		if (callout_stop(&pd->pd_tmo) & CALLOUT_RET_CANCELLED) {
+		if (callout_stop(&pd->pd_tmo).bit.cancelled) {
 			pf_release_state(pd->pd_st);
 			m_freem(pd->pd_m);
 			free(pd, M_PFSYNC);
 		} else {
 			pd->pd_refs++;
 			callout_drain(&pd->pd_tmo);
 			free(pd, M_PFSYNC);
 		}
 	}
 
 	callout_drain(&sc->sc_tmo);
 	callout_drain(&sc->sc_bulkfail_tmo);
 	callout_drain(&sc->sc_bulk_tmo);
 
 	if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
 		(*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy");
 	bpfdetach(ifp);
 	if_detach(ifp);
 
 	pfsync_drop(sc);
 
 	if_free(ifp);
 	if (sc->sc_imo.imo_membership)
 		pfsync_multicast_cleanup(sc);
 	mtx_destroy(&sc->sc_mtx);
 	mtx_destroy(&sc->sc_bulk_mtx);
 	free(sc, M_PFSYNC);
 
 	V_pfsyncif = NULL;
 }
 
 static int
 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
     struct pf_state_peer *d)
 {
 	if (s->scrub.scrub_flag && d->scrub == NULL) {
 		d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO);
 		if (d->scrub == NULL)
 			return (ENOMEM);
 	}
 
 	return (0);
 }
 
 
 static int
 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 #ifndef	__NO_STRICT_ALIGNMENT
 	struct pfsync_state_key key[2];
 #endif
 	struct pfsync_state_key *kw, *ks;
 	struct pf_state	*st = NULL;
 	struct pf_state_key *skw = NULL, *sks = NULL;
 	struct pf_rule *r = NULL;
 	struct pfi_kif	*kif;
 	int error;
 
 	PF_RULES_RASSERT();
 
 	if (sp->creatorid == 0) {
 		if (V_pf_status.debug >= PF_DEBUG_MISC)
 			printf("%s: invalid creator id: %08x\n", __func__,
 			    ntohl(sp->creatorid));
 		return (EINVAL);
 	}
 
 	if ((kif = pfi_kif_find(sp->ifname)) == NULL) {
 		if (V_pf_status.debug >= PF_DEBUG_MISC)
 			printf("%s: unknown interface: %s\n", __func__,
 			    sp->ifname);
 		if (flags & PFSYNC_SI_IOCTL)
 			return (EINVAL);
 		return (0);	/* skip this state */
 	}
 
 	/*
 	 * If the ruleset checksums match or the state is coming from the ioctl,
 	 * it's safe to associate the state with the rule of that number.
 	 */
 	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
 	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
 		r = pf_main_ruleset.rules[
 		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
 	else
 		r = &V_pf_default_rule;
 
 	if ((r->max_states &&
 	    counter_u64_fetch(r->states_cur) >= r->max_states))
 		goto cleanup;
 
 	/*
 	 * XXXGL: consider M_WAITOK in ioctl path after.
 	 */
 	if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL)
 		goto cleanup;
 
 	if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL)
 		goto cleanup;
 
 #ifndef	__NO_STRICT_ALIGNMENT
 	bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2);
 	kw = &key[PF_SK_WIRE];
 	ks = &key[PF_SK_STACK];
 #else
 	kw = &sp->key[PF_SK_WIRE];
 	ks = &sp->key[PF_SK_STACK];
 #endif
 
 	if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) ||
 	    PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) ||
 	    kw->port[0] != ks->port[0] ||
 	    kw->port[1] != ks->port[1]) {
 		sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
 		if (sks == NULL)
 			goto cleanup;
 	} else
 		sks = skw;
 
 	/* allocate memory for scrub info */
 	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
 	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
 		goto cleanup;
 
 	/* Copy to state key(s). */
 	skw->addr[0] = kw->addr[0];
 	skw->addr[1] = kw->addr[1];
 	skw->port[0] = kw->port[0];
 	skw->port[1] = kw->port[1];
 	skw->proto = sp->proto;
 	skw->af = sp->af;
 	if (sks != skw) {
 		sks->addr[0] = ks->addr[0];
 		sks->addr[1] = ks->addr[1];
 		sks->port[0] = ks->port[0];
 		sks->port[1] = ks->port[1];
 		sks->proto = sp->proto;
 		sks->af = sp->af;
 	}
 
 	/* copy to state */
 	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
 	st->creation = time_uptime - ntohl(sp->creation);
 	st->expire = time_uptime;
 	if (sp->expire) {
 		uint32_t timeout;
 
 		timeout = r->timeout[sp->timeout];
 		if (!timeout)
 			timeout = V_pf_default_rule.timeout[sp->timeout];
 
 		/* sp->expire may have been adaptively scaled by export. */
 		st->expire -= timeout - ntohl(sp->expire);
 	}
 
 	st->direction = sp->direction;
 	st->log = sp->log;
 	st->timeout = sp->timeout;
 	st->state_flags = sp->state_flags;
 
 	st->id = sp->id;
 	st->creatorid = sp->creatorid;
 	pf_state_peer_ntoh(&sp->src, &st->src);
 	pf_state_peer_ntoh(&sp->dst, &st->dst);
 
 	st->rule.ptr = r;
 	st->nat_rule.ptr = NULL;
 	st->anchor.ptr = NULL;
 	st->rt_kif = NULL;
 
 	st->pfsync_time = time_uptime;
 	st->sync_state = PFSYNC_S_NONE;
 
 	if (!(flags & PFSYNC_SI_IOCTL))
 		st->state_flags |= PFSTATE_NOSYNC;
 
 	if ((error = pf_state_insert(kif, skw, sks, st)) != 0)
 		goto cleanup_state;
 
 	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
 	counter_u64_add(r->states_cur, 1);
 	counter_u64_add(r->states_tot, 1);
 
 	if (!(flags & PFSYNC_SI_IOCTL)) {
 		st->state_flags &= ~PFSTATE_NOSYNC;
 		if (st->state_flags & PFSTATE_ACK) {
 			pfsync_q_ins(st, PFSYNC_S_IACK);
 			pfsync_push(sc);
 		}
 	}
 	st->state_flags &= ~PFSTATE_ACK;
 	PF_STATE_UNLOCK(st);
 
 	return (0);
 
 cleanup:
 	error = ENOMEM;
 	if (skw == sks)
 		sks = NULL;
 	if (skw != NULL)
 		uma_zfree(V_pf_state_key_z, skw);
 	if (sks != NULL)
 		uma_zfree(V_pf_state_key_z, sks);
 
 cleanup_state:	/* pf_state_insert() frees the state keys. */
 	if (st) {
 		if (st->dst.scrub)
 			uma_zfree(V_pf_state_scrub_z, st->dst.scrub);
 		if (st->src.scrub)
 			uma_zfree(V_pf_state_scrub_z, st->src.scrub);
 		uma_zfree(V_pf_state_z, st);
 	}
 	return (error);
 }
 
 static int
 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_pkt pkt;
 	struct mbuf *m = *mp;
 	struct ip *ip = mtod(m, struct ip *);
 	struct pfsync_header *ph;
 	struct pfsync_subheader subh;
 
 	int offset, len;
 	int rv;
 	uint16_t count;
 
 	*mp = NULL;
 	V_pfsyncstats.pfsyncs_ipackets++;
 
 	/* Verify that we have a sync interface configured. */
 	if (!sc || !sc->sc_sync_if || !V_pf_status.running ||
 	    (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		goto done;
 
 	/* verify that the packet came in on the right interface */
 	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
 		V_pfsyncstats.pfsyncs_badif++;
 		goto done;
 	}
 
 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	/* verify that the IP TTL is 255. */
 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
 		V_pfsyncstats.pfsyncs_badttl++;
 		goto done;
 	}
 
 	offset = ip->ip_hl << 2;
 	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
 		V_pfsyncstats.pfsyncs_hdrops++;
 		goto done;
 	}
 
 	if (offset + sizeof(*ph) > m->m_len) {
 		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
 			V_pfsyncstats.pfsyncs_hdrops++;
 			return (IPPROTO_DONE);
 		}
 		ip = mtod(m, struct ip *);
 	}
 	ph = (struct pfsync_header *)((char *)ip + offset);
 
 	/* verify the version */
 	if (ph->version != PFSYNC_VERSION) {
 		V_pfsyncstats.pfsyncs_badver++;
 		goto done;
 	}
 
 	len = ntohs(ph->len) + offset;
 	if (m->m_pkthdr.len < len) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		goto done;
 	}
 
 	/* Cheaper to grab this now than having to mess with mbufs later */
 	pkt.ip = ip;
 	pkt.src = ip->ip_src;
 	pkt.flags = 0;
 
 	/*
 	 * Trusting pf_chksum during packet processing, as well as seeking
 	 * in interface name tree, require holding PF_RULES_RLOCK().
 	 */
 	PF_RULES_RLOCK();
 	if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
 		pkt.flags |= PFSYNC_SI_CKSUM;
 
 	offset += sizeof(*ph);
 	while (offset <= len - sizeof(subh)) {
 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
 		offset += sizeof(subh);
 
 		if (subh.action >= PFSYNC_ACT_MAX) {
 			V_pfsyncstats.pfsyncs_badact++;
 			PF_RULES_RUNLOCK();
 			goto done;
 		}
 
 		count = ntohs(subh.count);
 		V_pfsyncstats.pfsyncs_iacts[subh.action] += count;
 		rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count);
 		if (rv == -1) {
 			PF_RULES_RUNLOCK();
 			return (IPPROTO_DONE);
 		}
 
 		offset += rv;
 	}
 	PF_RULES_RUNLOCK();
 
 done:
 	m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 static int
 pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_clr *clr;
 	struct mbuf *mp;
 	int len = sizeof(*clr) * count;
 	int i, offp;
 	u_int32_t creatorid;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	clr = (struct pfsync_clr *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		creatorid = clr[i].creatorid;
 
 		if (clr[i].ifname[0] != '\0' &&
 		    pfi_kif_find(clr[i].ifname) == NULL)
 			continue;
 
 		for (int i = 0; i <= pf_hashmask; i++) {
 			struct pf_idhash *ih = &V_pf_idhash[i];
 			struct pf_state *s;
 relock:
 			PF_HASHROW_LOCK(ih);
 			LIST_FOREACH(s, &ih->states, entry) {
 				if (s->creatorid == creatorid) {
 					s->state_flags |= PFSTATE_NOSYNC;
 					pf_unlink_state(s, PF_ENTER_LOCKED);
 					goto relock;
 				}
 			}
 			PF_HASHROW_UNLOCK(ih);
 		}
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct mbuf *mp;
 	struct pfsync_state *sa, *sp;
 	int len = sizeof(*sp) * count;
 	int i, offp;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	sa = (struct pfsync_state *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		sp = &sa[i];
 
 		/* Check for invalid values. */
 		if (sp->timeout >= PFTM_MAX ||
 		    sp->src.state > PF_TCPS_PROXY_DST ||
 		    sp->dst.state > PF_TCPS_PROXY_DST ||
 		    sp->direction > PF_OUT ||
 		    (sp->af != AF_INET && sp->af != AF_INET6)) {
 			if (V_pf_status.debug >= PF_DEBUG_MISC)
 				printf("%s: invalid value\n", __func__);
 			V_pfsyncstats.pfsyncs_badval++;
 			continue;
 		}
 
 		if (pfsync_state_import(sp, pkt->flags) == ENOMEM)
 			/* Drop out, but process the rest of the actions. */
 			break;
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_ins_ack *ia, *iaa;
 	struct pf_state *st;
 
 	struct mbuf *mp;
 	int len = count * sizeof(*ia);
 	int offp, i;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	iaa = (struct pfsync_ins_ack *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		ia = &iaa[i];
 
 		st = pf_find_state_byid(ia->id, ia->creatorid);
 		if (st == NULL)
 			continue;
 
 		if (st->state_flags & PFSTATE_ACK) {
 			PFSYNC_LOCK(V_pfsyncif);
 			pfsync_undefer_state(st, 0);
 			PFSYNC_UNLOCK(V_pfsyncif);
 		}
 		PF_STATE_UNLOCK(st);
 	}
 	/*
 	 * XXX this is not yet implemented, but we know the size of the
 	 * message so we can skip it.
 	 */
 
 	return (count * sizeof(struct pfsync_ins_ack));
 }
 
 static int
 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
     struct pfsync_state_peer *dst)
 {
 	int sync = 0;
 
 	PF_STATE_LOCK_ASSERT(st);
 
 	/*
 	 * The state should never go backwards except
 	 * for syn-proxy states.  Neither should the
 	 * sequence window slide backwards.
 	 */
 	if ((st->src.state > src->state &&
 	    (st->src.state < PF_TCPS_PROXY_SRC ||
 	    src->state >= PF_TCPS_PROXY_SRC)) ||
 
 	    (st->src.state == src->state &&
 	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
 		sync++;
 	else
 		pf_state_peer_ntoh(src, &st->src);
 
 	if ((st->dst.state > dst->state) ||
 
 	    (st->dst.state >= TCPS_SYN_SENT &&
 	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
 		sync++;
 	else
 		pf_state_peer_ntoh(dst, &st->dst);
 
 	return (sync);
 }
 
 static int
 pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_state *sa, *sp;
 	struct pf_state *st;
 	int sync;
 
 	struct mbuf *mp;
 	int len = count * sizeof(*sp);
 	int offp, i;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	sa = (struct pfsync_state *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		sp = &sa[i];
 
 		/* check for invalid values */
 		if (sp->timeout >= PFTM_MAX ||
 		    sp->src.state > PF_TCPS_PROXY_DST ||
 		    sp->dst.state > PF_TCPS_PROXY_DST) {
 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
 				printf("pfsync_input: PFSYNC_ACT_UPD: "
 				    "invalid value\n");
 			}
 			V_pfsyncstats.pfsyncs_badval++;
 			continue;
 		}
 
 		st = pf_find_state_byid(sp->id, sp->creatorid);
 		if (st == NULL) {
 			/* insert the update */
 			if (pfsync_state_import(sp, 0))
 				V_pfsyncstats.pfsyncs_badstate++;
 			continue;
 		}
 
 		if (st->state_flags & PFSTATE_ACK) {
 			PFSYNC_LOCK(sc);
 			pfsync_undefer_state(st, 1);
 			PFSYNC_UNLOCK(sc);
 		}
 
 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
 			sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
 		else {
 			sync = 0;
 
 			/*
 			 * Non-TCP protocol state machine always go
 			 * forwards
 			 */
 			if (st->src.state > sp->src.state)
 				sync++;
 			else
 				pf_state_peer_ntoh(&sp->src, &st->src);
 			if (st->dst.state > sp->dst.state)
 				sync++;
 			else
 				pf_state_peer_ntoh(&sp->dst, &st->dst);
 		}
 		if (sync < 2) {
 			pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
 			pf_state_peer_ntoh(&sp->dst, &st->dst);
 			st->expire = time_uptime;
 			st->timeout = sp->timeout;
 		}
 		st->pfsync_time = time_uptime;
 
 		if (sync) {
 			V_pfsyncstats.pfsyncs_stale++;
 
 			pfsync_update_state(st);
 			PF_STATE_UNLOCK(st);
 			PFSYNC_LOCK(sc);
 			pfsync_push(sc);
 			PFSYNC_UNLOCK(sc);
 			continue;
 		}
 		PF_STATE_UNLOCK(st);
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_upd_c *ua, *up;
 	struct pf_state *st;
 	int len = count * sizeof(*up);
 	int sync;
 	struct mbuf *mp;
 	int offp, i;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	ua = (struct pfsync_upd_c *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		up = &ua[i];
 
 		/* check for invalid values */
 		if (up->timeout >= PFTM_MAX ||
 		    up->src.state > PF_TCPS_PROXY_DST ||
 		    up->dst.state > PF_TCPS_PROXY_DST) {
 			if (V_pf_status.debug >= PF_DEBUG_MISC) {
 				printf("pfsync_input: "
 				    "PFSYNC_ACT_UPD_C: "
 				    "invalid value\n");
 			}
 			V_pfsyncstats.pfsyncs_badval++;
 			continue;
 		}
 
 		st = pf_find_state_byid(up->id, up->creatorid);
 		if (st == NULL) {
 			/* We don't have this state. Ask for it. */
 			PFSYNC_LOCK(sc);
 			pfsync_request_update(up->creatorid, up->id);
 			PFSYNC_UNLOCK(sc);
 			continue;
 		}
 
 		if (st->state_flags & PFSTATE_ACK) {
 			PFSYNC_LOCK(sc);
 			pfsync_undefer_state(st, 1);
 			PFSYNC_UNLOCK(sc);
 		}
 
 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
 			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
 		else {
 			sync = 0;
 
 			/*
 			 * Non-TCP protocol state machine always go
 			 * forwards
 			 */
 			if (st->src.state > up->src.state)
 				sync++;
 			else
 				pf_state_peer_ntoh(&up->src, &st->src);
 			if (st->dst.state > up->dst.state)
 				sync++;
 			else
 				pf_state_peer_ntoh(&up->dst, &st->dst);
 		}
 		if (sync < 2) {
 			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
 			pf_state_peer_ntoh(&up->dst, &st->dst);
 			st->expire = time_uptime;
 			st->timeout = up->timeout;
 		}
 		st->pfsync_time = time_uptime;
 
 		if (sync) {
 			V_pfsyncstats.pfsyncs_stale++;
 
 			pfsync_update_state(st);
 			PF_STATE_UNLOCK(st);
 			PFSYNC_LOCK(sc);
 			pfsync_push(sc);
 			PFSYNC_UNLOCK(sc);
 			continue;
 		}
 		PF_STATE_UNLOCK(st);
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_upd_req *ur, *ura;
 	struct mbuf *mp;
 	int len = count * sizeof(*ur);
 	int i, offp;
 
 	struct pf_state *st;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	ura = (struct pfsync_upd_req *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		ur = &ura[i];
 
 		if (ur->id == 0 && ur->creatorid == 0)
 			pfsync_bulk_start();
 		else {
 			st = pf_find_state_byid(ur->id, ur->creatorid);
 			if (st == NULL) {
 				V_pfsyncstats.pfsyncs_badstate++;
 				continue;
 			}
 			if (st->state_flags & PFSTATE_NOSYNC) {
 				PF_STATE_UNLOCK(st);
 				continue;
 			}
 
 			pfsync_update_state_req(st);
 			PF_STATE_UNLOCK(st);
 		}
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct mbuf *mp;
 	struct pfsync_state *sa, *sp;
 	struct pf_state *st;
 	int len = count * sizeof(*sp);
 	int offp, i;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	sa = (struct pfsync_state *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		sp = &sa[i];
 
 		st = pf_find_state_byid(sp->id, sp->creatorid);
 		if (st == NULL) {
 			V_pfsyncstats.pfsyncs_badstate++;
 			continue;
 		}
 		st->state_flags |= PFSTATE_NOSYNC;
 		pf_unlink_state(st, PF_ENTER_LOCKED);
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct mbuf *mp;
 	struct pfsync_del_c *sa, *sp;
 	struct pf_state *st;
 	int len = count * sizeof(*sp);
 	int offp, i;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	sa = (struct pfsync_del_c *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++) {
 		sp = &sa[i];
 
 		st = pf_find_state_byid(sp->id, sp->creatorid);
 		if (st == NULL) {
 			V_pfsyncstats.pfsyncs_badstate++;
 			continue;
 		}
 
 		st->state_flags |= PFSTATE_NOSYNC;
 		pf_unlink_state(st, PF_ENTER_LOCKED);
 	}
 
 	return (len);
 }
 
 static int
 pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_bus *bus;
 	struct mbuf *mp;
 	int len = count * sizeof(*bus);
 	int offp;
 
 	PFSYNC_BLOCK(sc);
 
 	/* If we're not waiting for a bulk update, who cares. */
 	if (sc->sc_ureq_sent == 0) {
 		PFSYNC_BUNLOCK(sc);
 		return (len);
 	}
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		PFSYNC_BUNLOCK(sc);
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	bus = (struct pfsync_bus *)(mp->m_data + offp);
 
 	switch (bus->status) {
 	case PFSYNC_BUS_START:
 		callout_reset(&sc->sc_bulkfail_tmo, 4 * hz +
 		    V_pf_limits[PF_LIMIT_STATES].limit /
 		    ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) /
 		    sizeof(struct pfsync_state)),
 		    pfsync_bulk_fail, sc);
 		if (V_pf_status.debug >= PF_DEBUG_MISC)
 			printf("pfsync: received bulk update start\n");
 		break;
 
 	case PFSYNC_BUS_END:
 		if (time_uptime - ntohl(bus->endtime) >=
 		    sc->sc_ureq_sent) {
 			/* that's it, we're happy */
 			sc->sc_ureq_sent = 0;
 			sc->sc_bulk_tries = 0;
 			callout_stop(&sc->sc_bulkfail_tmo);
 			if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
 				(*carp_demote_adj_p)(-V_pfsync_carp_adj,
 				    "pfsync bulk done");
 			sc->sc_flags |= PFSYNCF_OK;
 			if (V_pf_status.debug >= PF_DEBUG_MISC)
 				printf("pfsync: received valid "
 				    "bulk update end\n");
 		} else {
 			if (V_pf_status.debug >= PF_DEBUG_MISC)
 				printf("pfsync: received invalid "
 				    "bulk update end: bad timestamp\n");
 		}
 		break;
 	}
 	PFSYNC_BUNLOCK(sc);
 
 	return (len);
 }
 
 static int
 pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	int len = count * sizeof(struct pfsync_tdb);
 
 #if defined(IPSEC)
 	struct pfsync_tdb *tp;
 	struct mbuf *mp;
 	int offp;
 	int i;
 	int s;
 
 	mp = m_pulldown(m, offset, len, &offp);
 	if (mp == NULL) {
 		V_pfsyncstats.pfsyncs_badlen++;
 		return (-1);
 	}
 	tp = (struct pfsync_tdb *)(mp->m_data + offp);
 
 	for (i = 0; i < count; i++)
 		pfsync_update_net_tdb(&tp[i]);
 #endif
 
 	return (len);
 }
 
 #if defined(IPSEC)
 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
 static void
 pfsync_update_net_tdb(struct pfsync_tdb *pt)
 {
 	struct tdb		*tdb;
 	int			 s;
 
 	/* check for invalid values */
 	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
 	    (pt->dst.sa.sa_family != AF_INET &&
 	    pt->dst.sa.sa_family != AF_INET6))
 		goto bad;
 
 	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
 	if (tdb) {
 		pt->rpl = ntohl(pt->rpl);
 		pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes);
 
 		/* Neither replay nor byte counter should ever decrease. */
 		if (pt->rpl < tdb->tdb_rpl ||
 		    pt->cur_bytes < tdb->tdb_cur_bytes) {
 			goto bad;
 		}
 
 		tdb->tdb_rpl = pt->rpl;
 		tdb->tdb_cur_bytes = pt->cur_bytes;
 	}
 	return;
 
 bad:
 	if (V_pf_status.debug >= PF_DEBUG_MISC)
 		printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: "
 		    "invalid value\n");
 	V_pfsyncstats.pfsyncs_badstate++;
 	return;
 }
 #endif
 
 
 static int
 pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	/* check if we are at the right place in the packet */
 	if (offset != m->m_pkthdr.len)
 		V_pfsyncstats.pfsyncs_badlen++;
 
 	/* we're done. free and let the caller return */
 	m_freem(m);
 	return (-1);
 }
 
 static int
 pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
 {
 	V_pfsyncstats.pfsyncs_badact++;
 
 	m_freem(m);
 	return (-1);
 }
 
 static int
 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	struct route *rt)
 {
 	m_freem(m);
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct pfsync_softc *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct pfsyncreq pfsyncr;
 	int error;
 
 	switch (cmd) {
 	case SIOCSIFFLAGS:
 		PFSYNC_LOCK(sc);
 		if (ifp->if_flags & IFF_UP) {
 			ifp->if_drv_flags |= IFF_DRV_RUNNING;
 			PFSYNC_UNLOCK(sc);
 			pfsync_pointers_init();
 		} else {
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			PFSYNC_UNLOCK(sc);
 			pfsync_pointers_uninit();
 		}
 		break;
 	case SIOCSIFMTU:
 		if (!sc->sc_sync_if ||
 		    ifr->ifr_mtu <= PFSYNC_MINPKT ||
 		    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
 			return (EINVAL);
 		if (ifr->ifr_mtu < ifp->if_mtu) {
 			PFSYNC_LOCK(sc);
 			if (sc->sc_len > PFSYNC_MINPKT)
 				pfsync_sendout(1);
 			PFSYNC_UNLOCK(sc);
 		}
 		ifp->if_mtu = ifr->ifr_mtu;
 		break;
 	case SIOCGETPFSYNC:
 		bzero(&pfsyncr, sizeof(pfsyncr));
 		PFSYNC_LOCK(sc);
 		if (sc->sc_sync_if) {
 			strlcpy(pfsyncr.pfsyncr_syncdev,
 			    sc->sc_sync_if->if_xname, IFNAMSIZ);
 		}
 		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
 		pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER ==
 		    (sc->sc_flags & PFSYNCF_DEFER));
 		PFSYNC_UNLOCK(sc);
 		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
 
 	case SIOCSETPFSYNC:
 	    {
 		struct ip_moptions *imo = &sc->sc_imo;
 		struct ifnet *sifp;
 		struct ip *ip;
 		void *mship = NULL;
 
 		if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0)
 			return (error);
 		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
 			return (error);
 
 		if (pfsyncr.pfsyncr_maxupdates > 255)
 			return (EINVAL);
 
 		if (pfsyncr.pfsyncr_syncdev[0] == 0)
 			sifp = NULL;
 		else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL)
 			return (EINVAL);
 
 		if (sifp != NULL && (
 		    pfsyncr.pfsyncr_syncpeer.s_addr == 0 ||
 		    pfsyncr.pfsyncr_syncpeer.s_addr ==
 		    htonl(INADDR_PFSYNC_GROUP)))
 			mship = malloc((sizeof(struct in_multi *) *
 			    IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO);
 
 		PFSYNC_LOCK(sc);
 		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
 			sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP);
 		else
 			sc->sc_sync_peer.s_addr =
 			    pfsyncr.pfsyncr_syncpeer.s_addr;
 
 		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
 		if (pfsyncr.pfsyncr_defer) {
 			sc->sc_flags |= PFSYNCF_DEFER;
 			pfsync_defer_ptr = pfsync_defer;
 		} else {
 			sc->sc_flags &= ~PFSYNCF_DEFER;
 			pfsync_defer_ptr = NULL;
 		}
 
 		if (sifp == NULL) {
 			if (sc->sc_sync_if)
 				if_rele(sc->sc_sync_if);
 			sc->sc_sync_if = NULL;
 			if (imo->imo_membership)
 				pfsync_multicast_cleanup(sc);
 			PFSYNC_UNLOCK(sc);
 			break;
 		}
 
 		if (sc->sc_len > PFSYNC_MINPKT &&
 		    (sifp->if_mtu < sc->sc_ifp->if_mtu ||
 		    (sc->sc_sync_if != NULL &&
 		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
 		    sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
 			pfsync_sendout(1);
 
 		if (imo->imo_membership)
 			pfsync_multicast_cleanup(sc);
 
 		if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) {
 			error = pfsync_multicast_setup(sc, sifp, mship);
 			if (error) {
 				if_rele(sifp);
 				free(mship, M_PFSYNC);
 				return (error);
 			}
 		}
 		if (sc->sc_sync_if)
 			if_rele(sc->sc_sync_if);
 		sc->sc_sync_if = sifp;
 
 		ip = &sc->sc_template;
 		bzero(ip, sizeof(*ip));
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = sizeof(sc->sc_template) >> 2;
 		ip->ip_tos = IPTOS_LOWDELAY;
 		/* len and id are set later. */
 		ip->ip_off = htons(IP_DF);
 		ip->ip_ttl = PFSYNC_DFLTTL;
 		ip->ip_p = IPPROTO_PFSYNC;
 		ip->ip_src.s_addr = INADDR_ANY;
 		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
 
 		/* Request a full state table update. */
 		if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
 			(*carp_demote_adj_p)(V_pfsync_carp_adj,
 			    "pfsync bulk start");
 		sc->sc_flags &= ~PFSYNCF_OK;
 		if (V_pf_status.debug >= PF_DEBUG_MISC)
 			printf("pfsync: requesting bulk update\n");
 		pfsync_request_update(0, 0);
 		PFSYNC_UNLOCK(sc);
 		PFSYNC_BLOCK(sc);
 		sc->sc_ureq_sent = time_uptime;
 		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail,
 		    sc);
 		PFSYNC_BUNLOCK(sc);
 
 		break;
 	    }
 	default:
 		return (ENOTTY);
 	}
 
 	return (0);
 }
 
 static void
 pfsync_out_state(struct pf_state *st, void *buf)
 {
 	struct pfsync_state *sp = buf;
 
 	pfsync_state_export(sp, st);
 }
 
 static void
 pfsync_out_iack(struct pf_state *st, void *buf)
 {
 	struct pfsync_ins_ack *iack = buf;
 
 	iack->id = st->id;
 	iack->creatorid = st->creatorid;
 }
 
 static void
 pfsync_out_upd_c(struct pf_state *st, void *buf)
 {
 	struct pfsync_upd_c *up = buf;
 
 	bzero(up, sizeof(*up));
 	up->id = st->id;
 	pf_state_peer_hton(&st->src, &up->src);
 	pf_state_peer_hton(&st->dst, &up->dst);
 	up->creatorid = st->creatorid;
 	up->timeout = st->timeout;
 }
 
 static void
 pfsync_out_del(struct pf_state *st, void *buf)
 {
 	struct pfsync_del_c *dp = buf;
 
 	dp->id = st->id;
 	dp->creatorid = st->creatorid;
 	st->state_flags |= PFSTATE_NOSYNC;
 }
 
 static void
 pfsync_drop(struct pfsync_softc *sc)
 {
 	struct pf_state *st, *next;
 	struct pfsync_upd_req_item *ur;
 	int q;
 
 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
 			continue;
 
 		TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) {
 			KASSERT(st->sync_state == q,
 				("%s: st->sync_state == q",
 					__func__));
 			st->sync_state = PFSYNC_S_NONE;
 			pf_release_state(st);
 		}
 		TAILQ_INIT(&sc->sc_qs[q]);
 	}
 
 	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
 		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
 		free(ur, M_PFSYNC);
 	}
 
 	sc->sc_plus = NULL;
 	sc->sc_len = PFSYNC_MINPKT;
 }
 
 static void
 pfsync_sendout(int schedswi)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct ifnet *ifp = sc->sc_ifp;
 	struct mbuf *m;
 	struct ip *ip;
 	struct pfsync_header *ph;
 	struct pfsync_subheader *subh;
 	struct pf_state *st;
 	struct pfsync_upd_req_item *ur;
 	int offset;
 	int q, count = 0;
 
 	KASSERT(sc != NULL, ("%s: null sc", __func__));
 	KASSERT(sc->sc_len > PFSYNC_MINPKT,
 	    ("%s: sc_len %zu", __func__, sc->sc_len));
 	PFSYNC_LOCK_ASSERT(sc);
 
 	if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
 		pfsync_drop(sc);
 		return;
 	}
 
 	m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL) {
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
 		V_pfsyncstats.pfsyncs_onomem++;
 		return;
 	}
 	m->m_data += max_linkhdr;
 	m->m_len = m->m_pkthdr.len = sc->sc_len;
 
 	/* build the ip header */
 	ip = (struct ip *)m->m_data;
 	bcopy(&sc->sc_template, ip, sizeof(*ip));
 	offset = sizeof(*ip);
 
 	ip->ip_len = htons(m->m_pkthdr.len);
 	ip_fillid(ip);
 
 	/* build the pfsync header */
 	ph = (struct pfsync_header *)(m->m_data + offset);
 	bzero(ph, sizeof(*ph));
 	offset += sizeof(*ph);
 
 	ph->version = PFSYNC_VERSION;
 	ph->len = htons(sc->sc_len - sizeof(*ip));
 	bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
 
 	/* walk the queues */
 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
 			continue;
 
 		subh = (struct pfsync_subheader *)(m->m_data + offset);
 		offset += sizeof(*subh);
 
 		count = 0;
 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
 			KASSERT(st->sync_state == q,
 				("%s: st->sync_state == q",
 					__func__));
 			/*
 			 * XXXGL: some of write methods do unlocked reads
 			 * of state data :(
 			 */
 			pfsync_qs[q].write(st, m->m_data + offset);
 			offset += pfsync_qs[q].len;
 			st->sync_state = PFSYNC_S_NONE;
 			pf_release_state(st);
 			count++;
 		}
 		TAILQ_INIT(&sc->sc_qs[q]);
 
 		bzero(subh, sizeof(*subh));
 		subh->action = pfsync_qs[q].action;
 		subh->count = htons(count);
 		V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count;
 	}
 
 	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
 		subh = (struct pfsync_subheader *)(m->m_data + offset);
 		offset += sizeof(*subh);
 
 		count = 0;
 		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
 			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
 
 			bcopy(&ur->ur_msg, m->m_data + offset,
 			    sizeof(ur->ur_msg));
 			offset += sizeof(ur->ur_msg);
 			free(ur, M_PFSYNC);
 			count++;
 		}
 
 		bzero(subh, sizeof(*subh));
 		subh->action = PFSYNC_ACT_UPD_REQ;
 		subh->count = htons(count);
 		V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count;
 	}
 
 	/* has someone built a custom region for us to add? */
 	if (sc->sc_plus != NULL) {
 		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
 		offset += sc->sc_pluslen;
 
 		sc->sc_plus = NULL;
 	}
 
 	subh = (struct pfsync_subheader *)(m->m_data + offset);
 	offset += sizeof(*subh);
 
 	bzero(subh, sizeof(*subh));
 	subh->action = PFSYNC_ACT_EOF;
 	subh->count = htons(1);
 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++;
 
 	/* we're done, let's put it on the wire */
 	if (ifp->if_bpf) {
 		m->m_data += sizeof(*ip);
 		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
 		BPF_MTAP(ifp, m);
 		m->m_data -= sizeof(*ip);
 		m->m_len = m->m_pkthdr.len = sc->sc_len;
 	}
 
 	if (sc->sc_sync_if == NULL) {
 		sc->sc_len = PFSYNC_MINPKT;
 		m_freem(m);
 		return;
 	}
 
 	if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
 	if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
 	sc->sc_len = PFSYNC_MINPKT;
 
 	if (!_IF_QFULL(&sc->sc_ifp->if_snd))
 		_IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
 	else {
 		m_freem(m);
 		if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
 	}
 	if (schedswi)
 		swi_sched(V_pfsync_swi_cookie, 0);
 }
 
 static void
 pfsync_insert_state(struct pf_state *st)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 
 	if (st->state_flags & PFSTATE_NOSYNC)
 		return;
 
 	if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) ||
 	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
 		st->state_flags |= PFSTATE_NOSYNC;
 		return;
 	}
 
 	KASSERT(st->sync_state == PFSYNC_S_NONE,
 		("%s: st->sync_state %u", __func__, st->sync_state));
 
 	PFSYNC_LOCK(sc);
 	if (sc->sc_len == PFSYNC_MINPKT)
 		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
 
 	pfsync_q_ins(st, PFSYNC_S_INS);
 	PFSYNC_UNLOCK(sc);
 
 	st->sync_updates = 0;
 }
 
 static int
 pfsync_defer(struct pf_state *st, struct mbuf *m)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_deferral *pd;
 
 	if (m->m_flags & (M_BCAST|M_MCAST))
 		return (0);
 
 	PFSYNC_LOCK(sc);
 
 	if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) ||
 	    !(sc->sc_flags & PFSYNCF_DEFER)) {
 		PFSYNC_UNLOCK(sc);
 		return (0);
 	}
 
 	 if (sc->sc_deferred >= 128)
 		pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
 
 	pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT);
 	if (pd == NULL)
 		return (0);
 	sc->sc_deferred++;
 
 	m->m_flags |= M_SKIP_FIREWALL;
 	st->state_flags |= PFSTATE_ACK;
 
 	pd->pd_sc = sc;
 	pd->pd_refs = 0;
 	pd->pd_st = st;
 	pf_ref_state(st);
 	pd->pd_m = m;
 
 	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
 	callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
 	callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd);
 
 	pfsync_push(sc);
 
 	return (1);
 }
 
 static void
 pfsync_undefer(struct pfsync_deferral *pd, int drop)
 {
 	struct pfsync_softc *sc = pd->pd_sc;
 	struct mbuf *m = pd->pd_m;
 	struct pf_state *st = pd->pd_st;
 
 	PFSYNC_LOCK_ASSERT(sc);
 
 	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
 	sc->sc_deferred--;
 	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
 	free(pd, M_PFSYNC);
 	pf_release_state(st);
 
 	if (drop)
 		m_freem(m);
 	else {
 		_IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
 		pfsync_push(sc);
 	}
 }
 
 static void
 pfsync_defer_tmo(void *arg)
 {
 	struct pfsync_deferral *pd = arg;
 	struct pfsync_softc *sc = pd->pd_sc;
 	struct mbuf *m = pd->pd_m;
 	struct pf_state *st = pd->pd_st;
 
 	PFSYNC_LOCK_ASSERT(sc);
 
 	CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
 
 	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
 	sc->sc_deferred--;
 	pd->pd_st->state_flags &= ~PFSTATE_ACK;	/* XXX: locking! */
 	if (pd->pd_refs == 0)
 		free(pd, M_PFSYNC);
 	PFSYNC_UNLOCK(sc);
 
 	ip_output(m, NULL, NULL, 0, NULL, NULL);
 
 	pf_release_state(st);
 
 	CURVNET_RESTORE();
 }
 
 static void
 pfsync_undefer_state(struct pf_state *st, int drop)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_deferral *pd;
 
 	PFSYNC_LOCK_ASSERT(sc);
 
 	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
 		 if (pd->pd_st == st) {
-			if (callout_stop(&pd->pd_tmo) & CALLOUT_RET_CANCELLED)
+			if (callout_stop(&pd->pd_tmo).bit.cancelled)
 				pfsync_undefer(pd, drop);
 			return;
 		}
 	}
 
 	panic("%s: unable to find deferred state", __func__);
 }
 
 static void
 pfsync_update_state(struct pf_state *st)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	int sync = 0;
 
 	PF_STATE_LOCK_ASSERT(st);
 	PFSYNC_LOCK(sc);
 
 	if (st->state_flags & PFSTATE_ACK)
 		pfsync_undefer_state(st, 0);
 	if (st->state_flags & PFSTATE_NOSYNC) {
 		if (st->sync_state != PFSYNC_S_NONE)
 			pfsync_q_del(st);
 		PFSYNC_UNLOCK(sc);
 		return;
 	}
 
 	if (sc->sc_len == PFSYNC_MINPKT)
 		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
 
 	switch (st->sync_state) {
 	case PFSYNC_S_UPD_C:
 	case PFSYNC_S_UPD:
 	case PFSYNC_S_INS:
 		/* we're already handling it */
 
 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
 			st->sync_updates++;
 			if (st->sync_updates >= sc->sc_maxupdates)
 				sync = 1;
 		}
 		break;
 
 	case PFSYNC_S_IACK:
 		pfsync_q_del(st);
 	case PFSYNC_S_NONE:
 		pfsync_q_ins(st, PFSYNC_S_UPD_C);
 		st->sync_updates = 0;
 		break;
 
 	default:
 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
 	}
 
 	if (sync || (time_uptime - st->pfsync_time) < 2)
 		pfsync_push(sc);
 
 	PFSYNC_UNLOCK(sc);
 }
 
 static void
 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct pfsync_upd_req_item *item;
 	size_t nlen = sizeof(struct pfsync_upd_req);
 
 	PFSYNC_LOCK_ASSERT(sc);
 
 	/*
 	 * This code does a bit to prevent multiple update requests for the
 	 * same state being generated. It searches current subheader queue,
 	 * but it doesn't lookup into queue of already packed datagrams.
 	 */
 	TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry)
 		if (item->ur_msg.id == id &&
 		    item->ur_msg.creatorid == creatorid)
 			return;
 
 	item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT);
 	if (item == NULL)
 		return; /* XXX stats */
 
 	item->ur_msg.id = id;
 	item->ur_msg.creatorid = creatorid;
 
 	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
 		nlen += sizeof(struct pfsync_subheader);
 
 	if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
 		pfsync_sendout(1);
 
 		nlen = sizeof(struct pfsync_subheader) +
 		    sizeof(struct pfsync_upd_req);
 	}
 
 	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
 	sc->sc_len += nlen;
 }
 
 static void
 pfsync_update_state_req(struct pf_state *st)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 
 	PF_STATE_LOCK_ASSERT(st);
 	PFSYNC_LOCK(sc);
 
 	if (st->state_flags & PFSTATE_NOSYNC) {
 		if (st->sync_state != PFSYNC_S_NONE)
 			pfsync_q_del(st);
 		PFSYNC_UNLOCK(sc);
 		return;
 	}
 
 	switch (st->sync_state) {
 	case PFSYNC_S_UPD_C:
 	case PFSYNC_S_IACK:
 		pfsync_q_del(st);
 	case PFSYNC_S_NONE:
 		pfsync_q_ins(st, PFSYNC_S_UPD);
 		pfsync_push(sc);
 		break;
 
 	case PFSYNC_S_INS:
 	case PFSYNC_S_UPD:
 	case PFSYNC_S_DEL:
 		/* we're already handling it */
 		break;
 
 	default:
 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
 	}
 
 	PFSYNC_UNLOCK(sc);
 }
 
 static void
 pfsync_delete_state(struct pf_state *st)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 
 	PFSYNC_LOCK(sc);
 	if (st->state_flags & PFSTATE_ACK)
 		pfsync_undefer_state(st, 1);
 	if (st->state_flags & PFSTATE_NOSYNC) {
 		if (st->sync_state != PFSYNC_S_NONE)
 			pfsync_q_del(st);
 		PFSYNC_UNLOCK(sc);
 		return;
 	}
 
 	if (sc->sc_len == PFSYNC_MINPKT)
 		callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
 
 	switch (st->sync_state) {
 	case PFSYNC_S_INS:
 		/* We never got to tell the world so just forget about it. */
 		pfsync_q_del(st);
 		break;
 
 	case PFSYNC_S_UPD_C:
 	case PFSYNC_S_UPD:
 	case PFSYNC_S_IACK:
 		pfsync_q_del(st);
 		/* FALLTHROUGH to putting it on the del list */
 
 	case PFSYNC_S_NONE:
 		pfsync_q_ins(st, PFSYNC_S_DEL);
 		break;
 
 	default:
 		panic("%s: unexpected sync state %d", __func__, st->sync_state);
 	}
 	PFSYNC_UNLOCK(sc);
 }
 
 static void
 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	struct {
 		struct pfsync_subheader subh;
 		struct pfsync_clr clr;
 	} __packed r;
 
 	bzero(&r, sizeof(r));
 
 	r.subh.action = PFSYNC_ACT_CLR;
 	r.subh.count = htons(1);
 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++;
 
 	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
 	r.clr.creatorid = creatorid;
 
 	PFSYNC_LOCK(sc);
 	pfsync_send_plus(&r, sizeof(r));
 	PFSYNC_UNLOCK(sc);
 }
 
 static void
 pfsync_q_ins(struct pf_state *st, int q)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	size_t nlen = pfsync_qs[q].len;
 
 	PFSYNC_LOCK_ASSERT(sc);
 
 	KASSERT(st->sync_state == PFSYNC_S_NONE,
 		("%s: st->sync_state %u", __func__, st->sync_state));
 	KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
 	    sc->sc_len));
 
 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
 		nlen += sizeof(struct pfsync_subheader);
 
 	if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
 		pfsync_sendout(1);
 
 		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
 	}
 
 	sc->sc_len += nlen;
 	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
 	st->sync_state = q;
 	pf_ref_state(st);
 }
 
 static void
 pfsync_q_del(struct pf_state *st)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 	int q = st->sync_state;
 
 	PFSYNC_LOCK_ASSERT(sc);
 	KASSERT(st->sync_state != PFSYNC_S_NONE,
 		("%s: st->sync_state != PFSYNC_S_NONE", __func__));
 
 	sc->sc_len -= pfsync_qs[q].len;
 	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
 	st->sync_state = PFSYNC_S_NONE;
 	pf_release_state(st);
 
 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
 		sc->sc_len -= sizeof(struct pfsync_subheader);
 }
 
 static void
 pfsync_bulk_start(void)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 
 	if (V_pf_status.debug >= PF_DEBUG_MISC)
 		printf("pfsync: received bulk update request\n");
 
 	PFSYNC_BLOCK(sc);
 
 	sc->sc_ureq_received = time_uptime;
 	sc->sc_bulk_hashid = 0;
 	sc->sc_bulk_stateid = 0;
 	pfsync_bulk_status(PFSYNC_BUS_START);
 	callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc);
 	PFSYNC_BUNLOCK(sc);
 }
 
 static void
 pfsync_bulk_update(void *arg)
 {
 	struct pfsync_softc *sc = arg;
 	struct pf_state *s;
 	int i, sent = 0;
 
 	PFSYNC_BLOCK_ASSERT(sc);
 	CURVNET_SET(sc->sc_ifp->if_vnet);
 
 	/*
 	 * Start with last state from previous invocation.
 	 * It may had gone, in this case start from the
 	 * hash slot.
 	 */
 	s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid);
 
 	if (s != NULL)
 		i = PF_IDHASH(s);
 	else
 		i = sc->sc_bulk_hashid;
 
 	for (; i <= pf_hashmask; i++) {
 		struct pf_idhash *ih = &V_pf_idhash[i];
 
 		if (s != NULL)
 			PF_HASHROW_ASSERT(ih);
 		else {
 			PF_HASHROW_LOCK(ih);
 			s = LIST_FIRST(&ih->states);
 		}
 
 		for (; s; s = LIST_NEXT(s, entry)) {
 
 			if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) <
 			    sizeof(struct pfsync_state)) {
 				/* We've filled a packet. */
 				sc->sc_bulk_hashid = i;
 				sc->sc_bulk_stateid = s->id;
 				sc->sc_bulk_creatorid = s->creatorid;
 				PF_HASHROW_UNLOCK(ih);
 				callout_reset(&sc->sc_bulk_tmo, 1,
 				    pfsync_bulk_update, sc);
 				goto full;
 			}
 
 			if (s->sync_state == PFSYNC_S_NONE &&
 			    s->timeout < PFTM_MAX &&
 			    s->pfsync_time <= sc->sc_ureq_received) {
 				pfsync_update_state_req(s);
 				sent++;
 			}
 		}
 		PF_HASHROW_UNLOCK(ih);
 	}
 
 	/* We're done. */
 	pfsync_bulk_status(PFSYNC_BUS_END);
 
 full:
 	CURVNET_RESTORE();
 }
 
 static void
 pfsync_bulk_status(u_int8_t status)
 {
 	struct {
 		struct pfsync_subheader subh;
 		struct pfsync_bus bus;
 	} __packed r;
 
 	struct pfsync_softc *sc = V_pfsyncif;
 
 	bzero(&r, sizeof(r));
 
 	r.subh.action = PFSYNC_ACT_BUS;
 	r.subh.count = htons(1);
 	V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++;
 
 	r.bus.creatorid = V_pf_status.hostid;
 	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
 	r.bus.status = status;
 
 	PFSYNC_LOCK(sc);
 	pfsync_send_plus(&r, sizeof(r));
 	PFSYNC_UNLOCK(sc);
 }
 
 static void
 pfsync_bulk_fail(void *arg)
 {
 	struct pfsync_softc *sc = arg;
 
 	CURVNET_SET(sc->sc_ifp->if_vnet);
 
 	PFSYNC_BLOCK_ASSERT(sc);
 
 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
 		/* Try again */
 		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
 		    pfsync_bulk_fail, V_pfsyncif);
 		PFSYNC_LOCK(sc);
 		pfsync_request_update(0, 0);
 		PFSYNC_UNLOCK(sc);
 	} else {
 		/* Pretend like the transfer was ok. */
 		sc->sc_ureq_sent = 0;
 		sc->sc_bulk_tries = 0;
 		PFSYNC_LOCK(sc);
 		if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p)
 			(*carp_demote_adj_p)(-V_pfsync_carp_adj,
 			    "pfsync bulk fail");
 		sc->sc_flags |= PFSYNCF_OK;
 		PFSYNC_UNLOCK(sc);
 		if (V_pf_status.debug >= PF_DEBUG_MISC)
 			printf("pfsync: failed to receive bulk update\n");
 	}
 
 	CURVNET_RESTORE();
 }
 
 static void
 pfsync_send_plus(void *plus, size_t pluslen)
 {
 	struct pfsync_softc *sc = V_pfsyncif;
 
 	PFSYNC_LOCK_ASSERT(sc);
 
 	if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu)
 		pfsync_sendout(1);
 
 	sc->sc_plus = plus;
 	sc->sc_len += (sc->sc_pluslen = pluslen);
 
 	pfsync_sendout(1);
 }
 
 static void
 pfsync_timeout(void *arg)
 {
 	struct pfsync_softc *sc = arg;
 
 	CURVNET_SET(sc->sc_ifp->if_vnet);
 	PFSYNC_LOCK(sc);
 	pfsync_push(sc);
 	PFSYNC_UNLOCK(sc);
 	CURVNET_RESTORE();
 }
 
 static void
 pfsync_push(struct pfsync_softc *sc)
 {
 
 	PFSYNC_LOCK_ASSERT(sc);
 
 	sc->sc_flags |= PFSYNCF_PUSH;
 	swi_sched(V_pfsync_swi_cookie, 0);
 }
 
 static void
 pfsyncintr(void *arg)
 {
 	struct pfsync_softc *sc = arg;
 	struct mbuf *m, *n;
 
 	CURVNET_SET(sc->sc_ifp->if_vnet);
 
 	PFSYNC_LOCK(sc);
 	if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) {
 		pfsync_sendout(0);
 		sc->sc_flags &= ~PFSYNCF_PUSH;
 	}
 	_IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m);
 	PFSYNC_UNLOCK(sc);
 
 	for (; m != NULL; m = n) {
 
 		n = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 
 		/*
 		 * We distinguish between a deferral packet and our
 		 * own pfsync packet based on M_SKIP_FIREWALL
 		 * flag. This is XXX.
 		 */
 		if (m->m_flags & M_SKIP_FIREWALL)
 			ip_output(m, NULL, NULL, 0, NULL, NULL);
 		else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
 		    NULL) == 0)
 			V_pfsyncstats.pfsyncs_opackets++;
 		else
 			V_pfsyncstats.pfsyncs_oerrors++;
 	}
 	CURVNET_RESTORE();
 }
 
 static int
 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship)
 {
 	struct ip_moptions *imo = &sc->sc_imo;
 	int error;
 
 	if (!(ifp->if_flags & IFF_MULTICAST))
 		return (EADDRNOTAVAIL);
 
 	imo->imo_membership = (struct in_multi **)mship;
 	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
 	imo->imo_multicast_vif = -1;
 
 	if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL,
 	    &imo->imo_membership[0])) != 0) {
 		imo->imo_membership = NULL;
 		return (error);
 	}
 	imo->imo_num_memberships++;
 	imo->imo_multicast_ifp = ifp;
 	imo->imo_multicast_ttl = PFSYNC_DFLTTL;
 	imo->imo_multicast_loop = 0;
 
 	return (0);
 }
 
 static void
 pfsync_multicast_cleanup(struct pfsync_softc *sc)
 {
 	struct ip_moptions *imo = &sc->sc_imo;
 
 	in_leavegroup(imo->imo_membership[0], NULL);
 	free(imo->imo_membership, M_PFSYNC);
 	imo->imo_membership = NULL;
 	imo->imo_multicast_ifp = NULL;
 }
 
 #ifdef INET
 extern  struct domain inetdomain;
 static struct protosw in_pfsync_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_PFSYNC,
 	.pr_flags =		PR_ATOMIC|PR_ADDR,
 	.pr_input =		pfsync_input,
 	.pr_output =		rip_output,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
 #endif
 
 static void
 pfsync_pointers_init()
 {
 
 	PF_RULES_WLOCK();
 	pfsync_state_import_ptr = pfsync_state_import;
 	pfsync_insert_state_ptr = pfsync_insert_state;
 	pfsync_update_state_ptr = pfsync_update_state;
 	pfsync_delete_state_ptr = pfsync_delete_state;
 	pfsync_clear_states_ptr = pfsync_clear_states;
 	pfsync_defer_ptr = pfsync_defer;
 	PF_RULES_WUNLOCK();
 }
 
 static void
 pfsync_pointers_uninit()
 {
 
 	PF_RULES_WLOCK();
 	pfsync_state_import_ptr = NULL;
 	pfsync_insert_state_ptr = NULL;
 	pfsync_update_state_ptr = NULL;
 	pfsync_delete_state_ptr = NULL;
 	pfsync_clear_states_ptr = NULL;
 	pfsync_defer_ptr = NULL;
 	PF_RULES_WUNLOCK();
 }
 
 static void
 vnet_pfsync_init(const void *unused __unused)
 {
 	int error;
 
 	V_pfsync_cloner = if_clone_simple(pfsyncname,
 	    pfsync_clone_create, pfsync_clone_destroy, 1);
 	error = swi_add(NULL, pfsyncname, pfsyncintr, V_pfsyncif,
 	    SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie);
 	if (error) {
 		if_clone_detach(V_pfsync_cloner);
 		log(LOG_INFO, "swi_add() failed in %s\n", __func__);
 	}
 }
 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY,
     vnet_pfsync_init, NULL);
 
 static void
 vnet_pfsync_uninit(const void *unused __unused)
 {
 
 	if_clone_detach(V_pfsync_cloner);
 	swi_remove(V_pfsync_swi_cookie);
 }
 /*
  * Detach after pf is gone; otherwise we might touch pfsync memory
  * from within pf after freeing pfsync.
  */
 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND,
     vnet_pfsync_uninit, NULL);
 
 static int
 pfsync_init()
 {
 #ifdef INET
 	int error;
 
 	error = pf_proto_register(PF_INET, &in_pfsync_protosw);
 	if (error)
 		return (error);
 	error = ipproto_register(IPPROTO_PFSYNC);
 	if (error) {
 		pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
 		return (error);
 	}
 #endif
 	pfsync_pointers_init();
 
 	return (0);
 }
 
 static void
 pfsync_uninit()
 {
 
 	pfsync_pointers_uninit();
 
 #ifdef INET
 	ipproto_unregister(IPPROTO_PFSYNC);
 	pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW);
 #endif
 }
 
 static int
 pfsync_modevent(module_t mod, int type, void *data)
 {
 	int error = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		error = pfsync_init();
 		break;
 	case MOD_QUIESCE:
 		/*
 		 * Module should not be unloaded due to race conditions.
 		 */
 		error = EBUSY;
 		break;
 	case MOD_UNLOAD:
 		pfsync_uninit();
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return (error);
 }
 
 static moduledata_t pfsync_mod = {
 	pfsyncname,
 	pfsync_modevent,
 	0
 };
 
 #define PFSYNC_MODVER 1
 
 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */
 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY);
 MODULE_VERSION(pfsync, PFSYNC_MODVER);
 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER);
Index: projects/hps_head/sys/sys/callout.h
===================================================================
--- projects/hps_head/sys/sys/callout.h	(revision 309217)
+++ projects/hps_head/sys/sys/callout.h	(revision 309218)
@@ -1,123 +1,139 @@
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)callout.h	8.2 (Berkeley) 1/21/94
  * $FreeBSD$
  */
 
 #ifndef _SYS_CALLOUT_H_
 #define _SYS_CALLOUT_H_
 
 #include <sys/_callout.h>
 
 #define	CALLOUT_LOCAL_ALLOC	0x0001 /* was allocated from callfree */
 #define	CALLOUT_ACTIVE		0x0002 /* callout is currently active */
 #define	CALLOUT_PENDING		0x0004 /* callout is waiting for timeout */
 #define	CALLOUT_MPSAFE		0x0008 /* deprecated */
 #define	CALLOUT_RETURNUNLOCKED	0x0010 /* handler returns with mtx unlocked */
 #define	CALLOUT_UNUSED_5	0x0020 /* --available-- */
 #define	CALLOUT_UNUSED_6	0x0040 /* --available-- */
 #define	CALLOUT_PROCESSED	0x0080 /* callout in wheel or processing list? */
 #define	CALLOUT_DIRECT 		0x0100 /* allow exec from hw int context */
 #define	CALLOUT_SET_LC(x)	(((x) & 7) << 16) /* set lock class */
 #define	CALLOUT_GET_LC(x)	(((x) >> 16) & 7) /* get lock class */
 
-/* return values for all callout_xxx() functions */
-#define	CALLOUT_RET_CANCELLED_AND_DRAINING (CALLOUT_RET_CANCELLED | CALLOUT_RET_DRAINING)
-#define	CALLOUT_RET_DRAINING	2 /* callout is being serviced */
-#define	CALLOUT_RET_CANCELLED	1 /* callout was successfully stopped */
-#define	CALLOUT_RET_STOPPED	0 /* callout was already stopped */
+/* return value for all callout_xxx() functions */
+typedef union callout_ret {
+	struct {
+		unsigned cancelled : 1;
+		unsigned draining : 1;
+		unsigned reserved : 30;
+	} bit;
+	unsigned value;
+} callout_ret_t;
 
+#define	CALLOUT_RET_CANCELLED_AND_DRAINING \
+    ((const callout_ret_t){.bit.cancelled = 1,.bit.draining = 1}).value
+/* callout is being serviced */
+#define	CALLOUT_RET_DRAINING \
+    ((const callout_ret_t){.bit.draining = 1}).value
+/* callout was successfully stopped */
+#define	CALLOUT_RET_CANCELLED \
+    ((const callout_ret_t){.bit.cancelled = 1}).value
+/* callout was already stopped */
+#define	CALLOUT_RET_STOPPED \
+    ((const callout_ret_t){.value = 0}).value
+
 #define	C_DIRECT_EXEC		0x0001 /* direct execution of callout */
 #define	C_PRELBITS		7
 #define	C_PRELRANGE		((1 << C_PRELBITS) - 1)
 #define	C_PREL(x)		(((x) + 1) << 1)
 #define	C_PRELGET(x)		(int)((((x) >> 1) & C_PRELRANGE) - 1)
 #define	C_HARDCLOCK		0x0100 /* align to hardclock() calls */
 #define	C_ABSOLUTE		0x0200 /* event time is absolute. */
 #define	C_PRECALC		0x0400 /* event time is pre-calculated. */
 
 struct callout_handle {
 	struct callout *callout;
 };
 
 #ifdef _KERNEL
 #define	callout_active(c)	((c)->c_flags & CALLOUT_ACTIVE)
 #define	callout_deactivate(c)	((c)->c_flags &= ~CALLOUT_ACTIVE)
-int	callout_drain(struct callout *);
-int	callout_async_drain(struct callout *, callout_func_t *);
+callout_ret_t	callout_drain(struct callout *);
+callout_ret_t	callout_async_drain(struct callout *, callout_func_t *);
 void	callout_init(struct callout *, int);
 void	callout_init_lock_function(struct callout *, callout_lock_func_t *, int);
 void	callout_init_lock_object(struct callout *, struct lock_object *, int);
 #define	callout_init_mtx(c, mtx, flags)	\
 	callout_init_lock_object((c), ((mtx) != NULL) ? &(mtx)->lock_object : \
 	    NULL, (flags))
 #define	callout_init_rm(c, rm, flags) \
 	callout_init_lock_object((c), ((rm) != NULL) ? &(rm)->lock_object : \
 	    NULL, (flags))
 #define	callout_init_rw(c, rw, flags) \
 	callout_init_lock_object((c), ((rw) != NULL) ? &(rw)->lock_object : \
 	   NULL, (flags))
 #define	callout_pending(c)	((c)->c_flags & CALLOUT_PENDING)
-int	callout_reset_sbt_on(struct callout *, sbintime_t, sbintime_t,
+callout_ret_t	callout_reset_sbt_on(struct callout *, sbintime_t, sbintime_t,
 	    callout_func_t *, void *, int, int);
 #define	callout_reset_sbt(c, sbt, pr, fn, arg, flags)			\
     callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), -1, (flags))
 #define	callout_reset_sbt_curcpu(c, sbt, pr, fn, arg, flags)		\
     callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), PCPU_GET(cpuid),\
         (flags))
 #define	callout_reset_on(c, to_ticks, fn, arg, cpu)			\
     callout_reset_sbt_on((c), tick_sbt * (to_ticks), 0, (fn), (arg),	\
         (cpu), C_HARDCLOCK)
 #define	callout_reset(c, on_tick, fn, arg)				\
     callout_reset_on((c), (on_tick), (fn), (arg), -1)
 #define	callout_reset_curcpu(c, on_tick, fn, arg)			\
     callout_reset_on((c), (on_tick), (fn), (arg), PCPU_GET(cpuid))
 #define	callout_schedule_sbt_on(c, sbt, pr, cpu, flags)			\
     callout_reset_sbt_on((c), (sbt), (pr), (c)->c_func, (c)->c_arg,	\
         (cpu), (flags))
 #define	callout_schedule_sbt(c, sbt, pr, flags)				\
     callout_schedule_sbt_on((c), (sbt), (pr), -1, (flags))
 #define	callout_schedule_sbt_curcpu(c, sbt, pr, flags)			\
     callout_schedule_sbt_on((c), (sbt), (pr), PCPU_GET(cpuid), (flags))
-int	callout_schedule(struct callout *, int);
-int	callout_schedule_on(struct callout *, int, int);
+callout_ret_t	callout_schedule(struct callout *, int);
+callout_ret_t	callout_schedule_on(struct callout *, int, int);
 #define	callout_schedule_curcpu(c, on_tick)				\
     callout_schedule_on((c), (on_tick), PCPU_GET(cpuid))
-int	callout_stop(struct callout *);
+callout_ret_t	callout_stop(struct callout *);
 void	callout_when(sbintime_t, sbintime_t, int, sbintime_t *, sbintime_t *);
 void	callout_process(sbintime_t now);
 
 #endif
 
 #endif /* _SYS_CALLOUT_H_ */
Index: projects/hps_head/sys/tests/callout_test/callout_test.c
===================================================================
--- projects/hps_head/sys/tests/callout_test/callout_test.c	(revision 309217)
+++ projects/hps_head/sys/tests/callout_test/callout_test.c	(revision 309218)
@@ -1,283 +1,282 @@
 /*-
  * Copyright (c) 2015 Netflix Inc. All rights reserved.
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/ioccom.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/sched.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/libkern.h>
 #include <sys/uio.h>
 #include <sys/pcpu.h>
 #include <sys/smp.h>
 #include <sys/pmckern.h>
 #include <sys/cpuctl.h>
 #include <tests/kern_testfrwk.h>
 #include <tests/callout_test.h>
 #include <machine/cpu.h>
 
 MALLOC_DEFINE(M_CALLTMP, "Temp callout Memory", "CalloutTest");
 
 struct callout_run {
 	struct mtx lock;
 	struct callout *co_array;
 	int co_test;
 	int co_number_callouts;
 	int co_return_npa;
 	int co_completed;
 	int callout_waiting;
 	int drain_calls;
 	int cnt_zero;
 	int cnt_one;
 	int index;
 };
 
 static struct callout_run *comaster[MAXCPU];
 
 uint64_t callout_total = 0;
 
 static void execute_the_co_test(struct callout_run *rn);
 
 static void
 co_saydone(void *arg)
 {
 	struct callout_run *rn;
 
 	rn = (struct callout_run *)arg;
 	printf("The callout test is now complete for thread %d\n",
 	    rn->index);
 	printf("number_callouts:%d\n",
 	    rn->co_number_callouts);
 	printf("Callouts that bailed (Not PENDING or ACTIVE cleared):%d\n",
 	    rn->co_return_npa);
 	printf("Callouts that completed:%d\n", rn->co_completed);
 	printf("Drain calls:%d\n", rn->drain_calls);
 	printf("Zero returns:%d non-zero:%d\n",
 	    rn->cnt_zero,
 	    rn->cnt_one);
 
 }
 
 static void
 drainit(void *arg)
 {
 	struct callout_run *rn;
 
 	rn = (struct callout_run *)arg;
 	mtx_lock(&rn->lock);
 	rn->drain_calls++;
 	mtx_unlock(&rn->lock);
 }
 
 static void
 test_callout(void *arg)
 {
 	struct callout_run *rn;
 	int cpu;
 
 	critical_enter();
 	cpu = curcpu;
 	critical_exit();
 	rn = (struct callout_run *)arg;
 	atomic_add_int(&rn->callout_waiting, 1);
 	mtx_lock(&rn->lock);
 	if (callout_pending(&rn->co_array[cpu]) ||
 	    !callout_active(&rn->co_array[cpu])) {
 		rn->co_return_npa++;
 		atomic_subtract_int(&rn->callout_waiting, 1);
 		mtx_unlock(&rn->lock);
 		return;
 	}
 	callout_deactivate(&rn->co_array[cpu]);
 	rn->co_completed++;
 	mtx_unlock(&rn->lock);
 	atomic_subtract_int(&rn->callout_waiting, 1);
 }
 
 void
 execute_the_co_test(struct callout_run *rn)
 {
-	int i, ret, cpu;
+	int i, cpu;
 	uint32_t tk_s, tk_e, tk_d;
 
 	mtx_lock(&rn->lock);
 	rn->callout_waiting = 0;
 	for (i = 0; i < rn->co_number_callouts; i++) {
 		if (rn->co_test == 1) {
 			/* start all on spread out cpu's */
 			cpu = i % mp_ncpus;
 			callout_reset_sbt_on(&rn->co_array[i], 3, 0, test_callout, rn,
 			    cpu, 0);
 		} else {
 			/* Start all on the same CPU */
 			callout_reset_sbt_on(&rn->co_array[i], 3, 0, test_callout, rn,
 			    rn->index, 0);
 		}
 	}
 	tk_s = ticks;
 	while (rn->callout_waiting != rn->co_number_callouts) {
 		cpu_spinwait();
 		tk_e = ticks;
 		tk_d = tk_e - tk_s;
 		if (tk_d > 100) {
 			break;
 		}
 	}
 	/* OK everyone is waiting and we have the lock */
 	for (i = 0; i < rn->co_number_callouts; i++) {
-		ret = callout_async_drain(&rn->co_array[i], drainit);
-		if (!(ret & CALLOUT_RET_DRAINING)) {
+		if (!callout_async_drain(&rn->co_array[i], drainit).bit.draining) {
 			rn->cnt_one++;
 		} else {
 			rn->cnt_zero++;
 		}
 	}
 	rn->callout_waiting -= rn->cnt_one;
 	mtx_unlock(&rn->lock);
 	/* Now wait until all are done */
 	tk_s = ticks;
 	while (rn->callout_waiting > 0) {
 		cpu_spinwait();
 		tk_e = ticks;
 		tk_d = tk_e - tk_s;
 		if (tk_d > 100) {
 			break;
 		}
 	}
 	co_saydone((void *)rn);
 }
 
 
 static void
 run_callout_test(struct kern_test *test)
 {
 	struct callout_test *u;
 	size_t sz;
 	int i;
 	struct callout_run *rn;
 	int index = test->tot_threads_running;
 
 	u = (struct callout_test *)test->test_options;
 	if (comaster[index] == NULL) {
 		rn = comaster[index] = malloc(sizeof(struct callout_run), M_CALLTMP, M_WAITOK);
 		memset(comaster[index], 0, sizeof(struct callout_run));
 		mtx_init(&rn->lock, "callouttest", NULL, MTX_DUPOK);
 		rn->index = index;
 	} else {
 		rn = comaster[index];
 		rn->co_number_callouts = rn->co_return_npa = 0;
 		rn->co_completed = rn->callout_waiting = 0;
 		rn->drain_calls = rn->cnt_zero = rn->cnt_one = 0;
 		if (rn->co_array) {
 			free(rn->co_array, M_CALLTMP);
 			rn->co_array = NULL;
 		}
 	}
 	rn->co_number_callouts = u->number_of_callouts;
 	rn->co_test = u->test_number;
 	sz = sizeof(struct callout) * rn->co_number_callouts;
 	rn->co_array = malloc(sz, M_CALLTMP, M_WAITOK);
 	for (i = 0; i < rn->co_number_callouts; i++) {
 		callout_init(&rn->co_array[i], CALLOUT_MPSAFE);
 	}
 	execute_the_co_test(rn);
 }
 
 int callout_test_is_loaded = 0;
 
 static void
 cocleanup(void)
 {
 	int i;
 
 	for (i = 0; i < MAXCPU; i++) {
 		if (comaster[i]) {
 			if (comaster[i]->co_array) {
 				free(comaster[i]->co_array, M_CALLTMP);
 				comaster[i]->co_array = NULL;
 			}
 			free(comaster[i], M_CALLTMP);
 			comaster[i] = NULL;
 		}
 	}
 }
 
 static int
 callout_test_modevent(module_t mod, int type, void *data)
 {
 	int err = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		err = kern_testframework_register("callout_test",
 		    run_callout_test);
 		if (err) {
 			printf("Can't load callout_test err:%d returned\n",
 			    err);
 		} else {
 			memset(comaster, 0, sizeof(comaster));
 			callout_test_is_loaded = 1;
 		}
 		break;
 	case MOD_QUIESCE:
 		err = kern_testframework_deregister("callout_test");
 		if (err == 0) {
 			callout_test_is_loaded = 0;
 			cocleanup();
 		}
 		break;
 	case MOD_UNLOAD:
 		if (callout_test_is_loaded) {
 			err = kern_testframework_deregister("callout_test");
 			if (err == 0) {
 				cocleanup();
 				callout_test_is_loaded = 0;
 			}
 		}
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (err);
 }
 
 static moduledata_t callout_test_mod = {
 	.name = "callout_test",
 	.evhand = callout_test_modevent,
 	.priv = 0
 };
 
 MODULE_DEPEND(callout_test, kern_testframework, 1, 1, 1);
 DECLARE_MODULE(callout_test, callout_test_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);