Index: head/sys/conf/files
===================================================================
--- head/sys/conf/files	(revision 277330)
+++ head/sys/conf/files	(revision 277331)
@@ -1,4011 +1,4013 @@
 # $FreeBSD$
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
 # dependency lines other than the first are silently ignored.
 #
 acpi_quirks.h			optional acpi				   \
 	dependency	"$S/tools/acpi_quirks2h.awk $S/dev/acpica/acpi_quirks" \
 	compile-with	"${AWK} -f $S/tools/acpi_quirks2h.awk $S/dev/acpica/acpi_quirks" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"acpi_quirks.h"
 #
 # The 'fdt_dtb_file' target covers an actual DTB file name, which is derived
 # from the specified source (DTS) file: <platform>.dts -> <platform>.dtb
 #
 fdt_dtb_file			optional fdt fdt_dtb_static \
 	compile-with "sh -c 'MACHINE=${MACHINE} $S/tools/fdt/make_dtb.sh $S ${FDT_DTS_FILE} ${.CURDIR}'" \
 	no-obj no-implicit-rule before-depend	\
 	clean		"${FDT_DTS_FILE:R}.dtb"
 fdt_static_dtb.h		optional fdt fdt_dtb_static \
 	compile-with "sh -c 'MACHINE=${MACHINE} $S/tools/fdt/make_dtbh.sh ${FDT_DTS_FILE} ${.CURDIR}'" \
 	dependency	"fdt_dtb_file" \
 	no-obj no-implicit-rule before-depend \
 	clean		"fdt_static_dtb.h"
 feeder_eq_gen.h			optional sound				   \
 	dependency	"$S/tools/sound/feeder_eq_mkfilter.awk"		   \
 	compile-with	"${AWK} -f $S/tools/sound/feeder_eq_mkfilter.awk -- ${FEEDER_EQ_PRESETS} > feeder_eq_gen.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"feeder_eq_gen.h"
 feeder_rate_gen.h		optional sound				   \
 	dependency	"$S/tools/sound/feeder_rate_mkfilter.awk"	   \
 	compile-with	"${AWK} -f $S/tools/sound/feeder_rate_mkfilter.awk -- ${FEEDER_RATE_PRESETS} > feeder_rate_gen.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"feeder_rate_gen.h"
 snd_fxdiv_gen.h			optional sound				   \
 	dependency	"$S/tools/sound/snd_fxdiv_gen.awk"		   \
 	compile-with	"${AWK} -f $S/tools/sound/snd_fxdiv_gen.awk -- > snd_fxdiv_gen.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"snd_fxdiv_gen.h"
 miidevs.h			optional miibus | mii			   \
 	dependency	"$S/tools/miidevs2h.awk $S/dev/mii/miidevs"	   \
 	compile-with	"${AWK} -f $S/tools/miidevs2h.awk $S/dev/mii/miidevs" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"miidevs.h"
 pccarddevs.h			standard				   \
 	dependency	"$S/tools/pccarddevs2h.awk $S/dev/pccard/pccarddevs" \
 	compile-with	"${AWK} -f $S/tools/pccarddevs2h.awk $S/dev/pccard/pccarddevs" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"pccarddevs.h"
 teken_state.h		optional sc | vt				   \
 	dependency	"$S/teken/gensequences $S/teken/sequences" \
 	compile-with	"${AWK} -f $S/teken/gensequences $S/teken/sequences > teken_state.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"teken_state.h"
 usbdevs.h			optional usb				   \
 	dependency	"$S/tools/usbdevs2h.awk $S/dev/usb/usbdevs" \
 	compile-with	"${AWK} -f $S/tools/usbdevs2h.awk $S/dev/usb/usbdevs -h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"usbdevs.h"
 usbdevs_data.h			optional usb				   \
 	dependency	"$S/tools/usbdevs2h.awk $S/dev/usb/usbdevs" \
 	compile-with	"${AWK} -f $S/tools/usbdevs2h.awk $S/dev/usb/usbdevs -d" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"usbdevs_data.h"
 cam/cam.c			optional scbus
 cam/cam_compat.c		optional scbus
 cam/cam_periph.c		optional scbus
 cam/cam_queue.c			optional scbus
 cam/cam_sim.c			optional scbus
 cam/cam_xpt.c			optional scbus
 cam/ata/ata_all.c		optional scbus
 cam/ata/ata_xpt.c		optional scbus
 cam/ata/ata_pmp.c		optional scbus
 cam/scsi/scsi_xpt.c		optional scbus
 cam/scsi/scsi_all.c		optional scbus
 cam/scsi/scsi_cd.c		optional cd
 cam/scsi/scsi_ch.c		optional ch
 cam/ata/ata_da.c		optional ada | da
 cam/ctl/ctl.c			optional ctl
 cam/ctl/ctl_backend.c		optional ctl
 cam/ctl/ctl_backend_block.c	optional ctl
 cam/ctl/ctl_backend_ramdisk.c	optional ctl
 cam/ctl/ctl_cmd_table.c		optional ctl
 cam/ctl/ctl_frontend.c		optional ctl
 cam/ctl/ctl_frontend_cam_sim.c	optional ctl
 cam/ctl/ctl_frontend_internal.c	optional ctl
 cam/ctl/ctl_frontend_iscsi.c	optional ctl
 cam/ctl/ctl_scsi_all.c		optional ctl
 cam/ctl/ctl_tpc.c		optional ctl
 cam/ctl/ctl_tpc_local.c		optional ctl
 cam/ctl/ctl_error.c		optional ctl
 cam/ctl/ctl_util.c		optional ctl
 cam/ctl/scsi_ctl.c		optional ctl
 cam/scsi/scsi_da.c		optional da
 cam/scsi/scsi_low.c		optional ct | ncv | nsp | stg
 cam/scsi/scsi_pass.c		optional pass
 cam/scsi/scsi_pt.c		optional pt
 cam/scsi/scsi_sa.c		optional sa
 cam/scsi/scsi_enc.c		optional ses
 cam/scsi/scsi_enc_ses.c		optional ses
 cam/scsi/scsi_enc_safte.c	optional ses
 cam/scsi/scsi_sg.c		optional sg
 cam/scsi/scsi_targ_bh.c		optional targbh
 cam/scsi/scsi_target.c		optional targ
 cam/scsi/smp_all.c		optional scbus
 # shared between zfs and dtrace
 cddl/compat/opensolaris/kern/opensolaris.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_cmn_err.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_kmem.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_misc.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_sunddi.c			optional zfs compile-with "${ZFS_C}"
 # zfs specific
 cddl/compat/opensolaris/kern/opensolaris_acl.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_dtrace.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_kobj.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_kstat.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_lookup.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_policy.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_string.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_sysevent.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_taskq.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_uio.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_vfs.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_vm.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_zone.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/acl/acl_common.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/avl/avl.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/nvpair/fnvpair.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/nvpair/nvpair.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/nvpair/nvpair_alloc_fixed.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/unicode/u8_textprep.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfeature_common.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_comutil.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_deleg.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_fletcher.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_namecheck.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_prop.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zpool_prop.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zprop_common.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/gfs.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/vnode.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/blkptr.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/bplist.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/ddt_zap.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c			optional zfs compile-with "${ZFS_C}" \
 	warning "kernel contains CDDL licensed ZFS filesystem"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_bookmark.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/gzip.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/lz4.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/lzjb.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/range_tree.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/refcount.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/sha256.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/space_reftree.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/uberblock.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/unique.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_byteswap.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_debug.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_sa.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zle.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zrlock.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/callb.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/fm.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/list.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/nvpair_alloc_system.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/adler32.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/deflate.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/inffast.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/inflate.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/inftrees.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/opensolaris_crc32.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/trees.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/zmod.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/zmod_subr.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/zutil.c			optional zfs compile-with "${ZFS_C}"
 compat/freebsd32/freebsd32_capability.c	optional compat_freebsd32
 compat/freebsd32/freebsd32_ioctl.c	optional compat_freebsd32
 compat/freebsd32/freebsd32_misc.c	optional compat_freebsd32
 compat/freebsd32/freebsd32_syscalls.c	optional compat_freebsd32
 compat/freebsd32/freebsd32_sysent.c	optional compat_freebsd32
 contrib/altq/altq/altq_cbq.c		optional altq
 contrib/altq/altq/altq_cdnr.c		optional altq
 contrib/altq/altq/altq_hfsc.c		optional altq
 contrib/altq/altq/altq_priq.c		optional altq
 contrib/altq/altq/altq_red.c		optional altq
 contrib/altq/altq/altq_rio.c		optional altq
 contrib/altq/altq/altq_rmclass.c	optional altq
 contrib/altq/altq/altq_subr.c		optional altq
 contrib/dev/acpica/common/ahids.c			optional acpi acpi_debug
 contrib/dev/acpica/common/ahuuids.c			optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbcmds.c		optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbconvert.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbdisply.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbexec.c		optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbfileio.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbhistry.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbinput.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbmethod.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbnames.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbstats.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbtest.c		optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbutils.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbxface.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmbuffer.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmdeferred.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmnames.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmopcode.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmobject.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmresrc.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmresrcl.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmresrcl2.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmresrcs.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmutils.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmwalk.c	optional acpi acpi_debug
 contrib/dev/acpica/components/dispatcher/dsargs.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dscontrol.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsfield.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsinit.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsmethod.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsmthdat.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsobject.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsopcode.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsutils.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswexec.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswload.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswload2.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswscope.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswstate.c	optional acpi
 contrib/dev/acpica/components/events/evevent.c		optional acpi
 contrib/dev/acpica/components/events/evglock.c		optional acpi
 contrib/dev/acpica/components/events/evgpe.c		optional acpi
 contrib/dev/acpica/components/events/evgpeblk.c		optional acpi
 contrib/dev/acpica/components/events/evgpeinit.c	optional acpi
 contrib/dev/acpica/components/events/evgpeutil.c	optional acpi
 contrib/dev/acpica/components/events/evhandler.c	optional acpi
 contrib/dev/acpica/components/events/evmisc.c		optional acpi
 contrib/dev/acpica/components/events/evregion.c		optional acpi
 contrib/dev/acpica/components/events/evrgnini.c		optional acpi
 contrib/dev/acpica/components/events/evsci.c		optional acpi
 contrib/dev/acpica/components/events/evxface.c		optional acpi
 contrib/dev/acpica/components/events/evxfevnt.c		optional acpi
 contrib/dev/acpica/components/events/evxfgpe.c		optional acpi
 contrib/dev/acpica/components/events/evxfregn.c		optional acpi
 contrib/dev/acpica/components/executer/exconfig.c	optional acpi
 contrib/dev/acpica/components/executer/exconvrt.c	optional acpi
 contrib/dev/acpica/components/executer/excreate.c	optional acpi
 contrib/dev/acpica/components/executer/exdebug.c	optional acpi
 contrib/dev/acpica/components/executer/exdump.c		optional acpi
 contrib/dev/acpica/components/executer/exfield.c	optional acpi
 contrib/dev/acpica/components/executer/exfldio.c	optional acpi
 contrib/dev/acpica/components/executer/exmisc.c		optional acpi
 contrib/dev/acpica/components/executer/exmutex.c	optional acpi
 contrib/dev/acpica/components/executer/exnames.c	optional acpi
 contrib/dev/acpica/components/executer/exoparg1.c	optional acpi
 contrib/dev/acpica/components/executer/exoparg2.c	optional acpi
 contrib/dev/acpica/components/executer/exoparg3.c	optional acpi
 contrib/dev/acpica/components/executer/exoparg6.c	optional acpi
 contrib/dev/acpica/components/executer/exprep.c		optional acpi
 contrib/dev/acpica/components/executer/exregion.c	optional acpi
 contrib/dev/acpica/components/executer/exresnte.c	optional acpi
 contrib/dev/acpica/components/executer/exresolv.c	optional acpi
 contrib/dev/acpica/components/executer/exresop.c	optional acpi
 contrib/dev/acpica/components/executer/exstore.c	optional acpi
 contrib/dev/acpica/components/executer/exstoren.c	optional acpi
 contrib/dev/acpica/components/executer/exstorob.c	optional acpi
 contrib/dev/acpica/components/executer/exsystem.c	optional acpi
 contrib/dev/acpica/components/executer/exutils.c	optional acpi
 contrib/dev/acpica/components/hardware/hwacpi.c		optional acpi
 contrib/dev/acpica/components/hardware/hwesleep.c	optional acpi
 contrib/dev/acpica/components/hardware/hwgpe.c		optional acpi
 contrib/dev/acpica/components/hardware/hwpci.c		optional acpi
 contrib/dev/acpica/components/hardware/hwregs.c		optional acpi
 contrib/dev/acpica/components/hardware/hwsleep.c	optional acpi
 contrib/dev/acpica/components/hardware/hwtimer.c	optional acpi
 contrib/dev/acpica/components/hardware/hwvalid.c	optional acpi
 contrib/dev/acpica/components/hardware/hwxface.c	optional acpi
 contrib/dev/acpica/components/hardware/hwxfsleep.c	optional acpi
 contrib/dev/acpica/components/namespace/nsaccess.c	optional acpi
 contrib/dev/acpica/components/namespace/nsalloc.c	optional acpi
 contrib/dev/acpica/components/namespace/nsarguments.c	optional acpi
 contrib/dev/acpica/components/namespace/nsconvert.c	optional acpi
 contrib/dev/acpica/components/namespace/nsdump.c	optional acpi
 contrib/dev/acpica/components/namespace/nseval.c	optional acpi
 contrib/dev/acpica/components/namespace/nsinit.c	optional acpi
 contrib/dev/acpica/components/namespace/nsload.c	optional acpi
 contrib/dev/acpica/components/namespace/nsnames.c	optional acpi
 contrib/dev/acpica/components/namespace/nsobject.c	optional acpi
 contrib/dev/acpica/components/namespace/nsparse.c	optional acpi
 contrib/dev/acpica/components/namespace/nspredef.c	optional acpi
 contrib/dev/acpica/components/namespace/nsprepkg.c	optional acpi
 contrib/dev/acpica/components/namespace/nsrepair.c	optional acpi
 contrib/dev/acpica/components/namespace/nsrepair2.c	optional acpi
 contrib/dev/acpica/components/namespace/nssearch.c	optional acpi
 contrib/dev/acpica/components/namespace/nsutils.c	optional acpi
 contrib/dev/acpica/components/namespace/nswalk.c	optional acpi
 contrib/dev/acpica/components/namespace/nsxfeval.c	optional acpi
 contrib/dev/acpica/components/namespace/nsxfname.c	optional acpi
 contrib/dev/acpica/components/namespace/nsxfobj.c	optional acpi
 contrib/dev/acpica/components/parser/psargs.c		optional acpi
 contrib/dev/acpica/components/parser/psloop.c		optional acpi
 contrib/dev/acpica/components/parser/psobject.c		optional acpi
 contrib/dev/acpica/components/parser/psopcode.c		optional acpi
 contrib/dev/acpica/components/parser/psopinfo.c		optional acpi
 contrib/dev/acpica/components/parser/psparse.c		optional acpi
 contrib/dev/acpica/components/parser/psscope.c		optional acpi
 contrib/dev/acpica/components/parser/pstree.c		optional acpi
 contrib/dev/acpica/components/parser/psutils.c		optional acpi
 contrib/dev/acpica/components/parser/pswalk.c		optional acpi
 contrib/dev/acpica/components/parser/psxface.c		optional acpi
 contrib/dev/acpica/components/resources/rsaddr.c	optional acpi
 contrib/dev/acpica/components/resources/rscalc.c	optional acpi
 contrib/dev/acpica/components/resources/rscreate.c	optional acpi
 contrib/dev/acpica/components/resources/rsdump.c	optional acpi
 contrib/dev/acpica/components/resources/rsdumpinfo.c	optional acpi
 contrib/dev/acpica/components/resources/rsinfo.c	optional acpi
 contrib/dev/acpica/components/resources/rsio.c		optional acpi
 contrib/dev/acpica/components/resources/rsirq.c		optional acpi
 contrib/dev/acpica/components/resources/rslist.c	optional acpi
 contrib/dev/acpica/components/resources/rsmemory.c	optional acpi
 contrib/dev/acpica/components/resources/rsmisc.c	optional acpi
 contrib/dev/acpica/components/resources/rsserial.c	optional acpi
 contrib/dev/acpica/components/resources/rsutils.c	optional acpi
 contrib/dev/acpica/components/resources/rsxface.c	optional acpi
 contrib/dev/acpica/components/tables/tbdata.c		optional acpi
 contrib/dev/acpica/components/tables/tbfadt.c		optional acpi
 contrib/dev/acpica/components/tables/tbfind.c		optional acpi
 contrib/dev/acpica/components/tables/tbinstal.c		optional acpi
 contrib/dev/acpica/components/tables/tbprint.c		optional acpi
 contrib/dev/acpica/components/tables/tbutils.c		optional acpi
 contrib/dev/acpica/components/tables/tbxface.c		optional acpi
 contrib/dev/acpica/components/tables/tbxfload.c		optional acpi
 contrib/dev/acpica/components/tables/tbxfroot.c		optional acpi
 contrib/dev/acpica/components/utilities/utaddress.c	optional acpi
 contrib/dev/acpica/components/utilities/utalloc.c	optional acpi
 contrib/dev/acpica/components/utilities/utbuffer.c	optional acpi
 contrib/dev/acpica/components/utilities/utcache.c	optional acpi
 contrib/dev/acpica/components/utilities/utcopy.c	optional acpi
 contrib/dev/acpica/components/utilities/utdebug.c	optional acpi
 contrib/dev/acpica/components/utilities/utdecode.c	optional acpi
 contrib/dev/acpica/components/utilities/utdelete.c	optional acpi
 contrib/dev/acpica/components/utilities/uterror.c	optional acpi
 contrib/dev/acpica/components/utilities/uteval.c	optional acpi
 contrib/dev/acpica/components/utilities/utexcep.c	optional acpi
 contrib/dev/acpica/components/utilities/utglobal.c	optional acpi
 contrib/dev/acpica/components/utilities/uthex.c		optional acpi
 contrib/dev/acpica/components/utilities/utids.c		optional acpi
 contrib/dev/acpica/components/utilities/utinit.c	optional acpi
 contrib/dev/acpica/components/utilities/utlock.c	optional acpi
 contrib/dev/acpica/components/utilities/utmath.c	optional acpi
 contrib/dev/acpica/components/utilities/utmisc.c	optional acpi
 contrib/dev/acpica/components/utilities/utmutex.c	optional acpi
 contrib/dev/acpica/components/utilities/utobject.c	optional acpi
 contrib/dev/acpica/components/utilities/utosi.c		optional acpi
 contrib/dev/acpica/components/utilities/utownerid.c	optional acpi
 contrib/dev/acpica/components/utilities/utpredef.c	optional acpi
 contrib/dev/acpica/components/utilities/utresrc.c	optional acpi
 contrib/dev/acpica/components/utilities/utstate.c	optional acpi
 contrib/dev/acpica/components/utilities/utstring.c	optional acpi
 contrib/dev/acpica/components/utilities/utuuid.c	optional acpi acpi_debug
 contrib/dev/acpica/components/utilities/utxface.c	optional acpi
 contrib/dev/acpica/components/utilities/utxferror.c	optional acpi
 contrib/dev/acpica/components/utilities/utxfinit.c	optional acpi
 #contrib/dev/acpica/components/utilities/utxfmutex.c	optional acpi
 contrib/ipfilter/netinet/fil.c	optional ipfilter inet \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_auth.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_fil_freebsd.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_frag.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_log.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_nat.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_proxy.c optional ipfilter inet \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_state.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_lookup.c optional ipfilter inet \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-unused -Wno-error -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_pool.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_htable.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_sync.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/mlfk_ipl.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_nat6.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_rules.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_scan.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_dstlist.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/radix_ipf.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/libfdt/fdt.c		optional fdt
 contrib/libfdt/fdt_ro.c		optional fdt
 contrib/libfdt/fdt_rw.c		optional fdt
 contrib/libfdt/fdt_strerror.c	optional fdt
 contrib/libfdt/fdt_sw.c		optional fdt
 contrib/libfdt/fdt_wip.c	optional fdt
 contrib/ngatm/netnatm/api/cc_conn.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C_NOWERROR} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_data.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_dump.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_port.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_sig.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_user.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/unisap.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/misc/straddr.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/misc/unimsg_common.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/msg/traffic.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/msg/uni_ie.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/msg/uni_msg.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/saal/saal_sscfu.c	optional ngatm_sscfu \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/saal/saal_sscop.c	optional ngatm_sscop \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_call.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_coord.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_party.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_print.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_reset.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_uni.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_unimsgcpy.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_verify.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 crypto/blowfish/bf_ecb.c	optional ipsec
 crypto/blowfish/bf_skey.c	optional crypto | ipsec
 crypto/camellia/camellia.c	optional crypto | ipsec
 crypto/camellia/camellia-api.c	optional crypto | ipsec
 crypto/des/des_ecb.c		optional crypto | ipsec | netsmb
 crypto/des/des_setkey.c		optional crypto | ipsec | netsmb
 crypto/rc4/rc4.c		optional netgraph_mppc_encryption | kgssapi
 crypto/rijndael/rijndael-alg-fst.c optional crypto | geom_bde | \
 					 ipsec | random | wlan_ccmp
 crypto/rijndael/rijndael-api-fst.c optional geom_bde | random
 crypto/rijndael/rijndael-api.c	optional crypto | ipsec | wlan_ccmp
 crypto/sha1.c			optional carp | crypto | ipsec | \
 					 netgraph_mppc_encryption | sctp
 crypto/sha2/sha2.c		optional crypto | geom_bde | ipsec | random | \
 					 sctp | zfs
 crypto/sha2/sha256c.c		optional crypto | geom_bde | ipsec | random | \
 					 sctp | zfs
 crypto/siphash/siphash.c	optional inet | inet6
 crypto/siphash/siphash_test.c	optional inet | inet6
 ddb/db_access.c			optional ddb
 ddb/db_break.c			optional ddb
 ddb/db_capture.c		optional ddb
 ddb/db_command.c		optional ddb
 ddb/db_examine.c		optional ddb
 ddb/db_expr.c			optional ddb
 ddb/db_input.c			optional ddb
 ddb/db_lex.c			optional ddb
 ddb/db_main.c			optional ddb
 ddb/db_output.c			optional ddb
 ddb/db_print.c			optional ddb
 ddb/db_ps.c			optional ddb
 ddb/db_run.c			optional ddb
 ddb/db_script.c			optional ddb
 ddb/db_sym.c			optional ddb
 ddb/db_thread.c			optional ddb
 ddb/db_textdump.c		optional ddb
 ddb/db_variables.c		optional ddb
 ddb/db_watch.c			optional ddb
 ddb/db_write_cmd.c		optional ddb
 dev/aac/aac.c			optional aac
 dev/aac/aac_cam.c		optional aacp aac
 dev/aac/aac_debug.c		optional aac
 dev/aac/aac_disk.c		optional aac
 dev/aac/aac_linux.c		optional aac compat_linux
 dev/aac/aac_pci.c		optional aac pci
 dev/aacraid/aacraid.c		optional aacraid
 dev/aacraid/aacraid_cam.c	optional aacraid scbus
 dev/aacraid/aacraid_debug.c	optional aacraid
 dev/aacraid/aacraid_linux.c	optional aacraid compat_linux
 dev/aacraid/aacraid_pci.c	optional aacraid pci
 dev/acpi_support/acpi_wmi.c	optional acpi_wmi acpi
 dev/acpi_support/acpi_asus.c	optional acpi_asus acpi
 dev/acpi_support/acpi_asus_wmi.c	optional acpi_asus_wmi acpi
 dev/acpi_support/acpi_fujitsu.c	optional acpi_fujitsu acpi
 dev/acpi_support/acpi_hp.c	optional acpi_hp acpi
 dev/acpi_support/acpi_ibm.c	optional acpi_ibm acpi
 dev/acpi_support/acpi_panasonic.c optional acpi_panasonic acpi
 dev/acpi_support/acpi_sony.c	optional acpi_sony acpi
 dev/acpi_support/acpi_toshiba.c	optional acpi_toshiba acpi
 dev/acpi_support/atk0110.c	optional aibs acpi
 dev/acpica/Osd/OsdDebug.c	optional acpi
 dev/acpica/Osd/OsdHardware.c	optional acpi
 dev/acpica/Osd/OsdInterrupt.c	optional acpi
 dev/acpica/Osd/OsdMemory.c	optional acpi
 dev/acpica/Osd/OsdSchedule.c	optional acpi
 dev/acpica/Osd/OsdStream.c	optional acpi
 dev/acpica/Osd/OsdSynch.c	optional acpi
 dev/acpica/Osd/OsdTable.c	optional acpi
 dev/acpica/acpi.c		optional acpi
 dev/acpica/acpi_acad.c		optional acpi
 dev/acpica/acpi_battery.c	optional acpi
 dev/acpica/acpi_button.c	optional acpi
 dev/acpica/acpi_cmbat.c		optional acpi
 dev/acpica/acpi_cpu.c		optional acpi
 dev/acpica/acpi_ec.c		optional acpi
 dev/acpica/acpi_hpet.c		optional acpi
 dev/acpica/acpi_isab.c		optional acpi isa
 dev/acpica/acpi_lid.c		optional acpi
 dev/acpica/acpi_package.c	optional acpi
 dev/acpica/acpi_pci.c		optional acpi pci
 dev/acpica/acpi_pci_link.c	optional acpi pci
 dev/acpica/acpi_pcib.c		optional acpi pci
 dev/acpica/acpi_pcib_acpi.c	optional acpi pci
 dev/acpica/acpi_pcib_pci.c	optional acpi pci
 dev/acpica/acpi_perf.c		optional acpi
 dev/acpica/acpi_powerres.c	optional acpi
 dev/acpica/acpi_quirk.c		optional acpi
 dev/acpica/acpi_resource.c	optional acpi
 dev/acpica/acpi_smbat.c		optional acpi
 dev/acpica/acpi_thermal.c	optional acpi
 dev/acpica/acpi_throttle.c	optional acpi
 dev/acpica/acpi_timer.c		optional acpi
 dev/acpica/acpi_video.c		optional acpi_video acpi
 dev/acpica/acpi_dock.c		optional acpi_dock acpi
 dev/adlink/adlink.c		optional adlink
 dev/advansys/adv_eisa.c		optional adv eisa
 dev/advansys/adv_pci.c		optional adv pci
 dev/advansys/advansys.c		optional adv
 dev/advansys/advlib.c		optional adv
 dev/advansys/advmcode.c		optional adv
 dev/advansys/adw_pci.c		optional adw pci
 dev/advansys/adwcam.c		optional adw
 dev/advansys/adwlib.c		optional adw
 dev/advansys/adwmcode.c		optional adw
 dev/ae/if_ae.c			optional ae pci
 dev/age/if_age.c		optional age pci
 dev/agp/agp.c			optional agp pci
 dev/agp/agp_if.m		optional agp pci
 dev/aha/aha.c			optional aha
 dev/aha/aha_isa.c		optional aha isa
 dev/aha/aha_mca.c		optional aha mca
 dev/ahb/ahb.c			optional ahb eisa
 dev/ahci/ahci.c			optional ahci
 dev/ahci/ahciem.c		optional ahci
 dev/ahci/ahci_pci.c		optional ahci pci
 dev/aic/aic.c			optional aic
 dev/aic/aic_pccard.c		optional aic pccard
 dev/aic7xxx/ahc_eisa.c		optional ahc eisa
 dev/aic7xxx/ahc_isa.c		optional ahc isa
 dev/aic7xxx/ahc_pci.c		optional ahc pci \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/aic7xxx/ahd_pci.c		optional ahd pci \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/aic7xxx/aic7770.c		optional ahc
 dev/aic7xxx/aic79xx.c		optional ahd pci
 dev/aic7xxx/aic79xx_osm.c	optional ahd pci
 dev/aic7xxx/aic79xx_pci.c	optional ahd pci
 dev/aic7xxx/aic79xx_reg_print.c	optional ahd pci ahd_reg_pretty_print
 dev/aic7xxx/aic7xxx.c		optional ahc
 dev/aic7xxx/aic7xxx_93cx6.c	optional ahc
 dev/aic7xxx/aic7xxx_osm.c	optional ahc
 dev/aic7xxx/aic7xxx_pci.c	optional ahc pci
 dev/aic7xxx/aic7xxx_reg_print.c	optional ahc ahc_reg_pretty_print
 dev/alc/if_alc.c		optional alc pci
 dev/ale/if_ale.c		optional ale pci
 dev/alpm/alpm.c			optional alpm pci
 dev/altera/avgen/altera_avgen.c		optional altera_avgen
 dev/altera/avgen/altera_avgen_fdt.c	optional altera_avgen fdt
 dev/altera/avgen/altera_avgen_nexus.c	optional altera_avgen
 dev/altera/sdcard/altera_sdcard.c	optional altera_sdcard
 dev/altera/sdcard/altera_sdcard_disk.c	optional altera_sdcard
 dev/altera/sdcard/altera_sdcard_io.c	optional altera_sdcard
 dev/altera/sdcard/altera_sdcard_fdt.c	optional altera_sdcard fdt
 dev/altera/sdcard/altera_sdcard_nexus.c	optional altera_sdcard
 dev/altera/pio/pio.c		optional altera_pio
 dev/altera/pio/pio_if.m		optional altera_pio
 dev/amdpm/amdpm.c		optional amdpm pci | nfpm pci
 dev/amdsmb/amdsmb.c		optional amdsmb pci
 dev/amr/amr.c			optional amr
 dev/amr/amr_cam.c		optional amrp amr
 dev/amr/amr_disk.c		optional amr
 dev/amr/amr_linux.c		optional amr compat_linux
 dev/amr/amr_pci.c		optional amr pci
 dev/an/if_an.c			optional an
 dev/an/if_an_isa.c		optional an isa
 dev/an/if_an_pccard.c		optional an pccard
 dev/an/if_an_pci.c		optional an pci
 #
 dev/ata/ata_if.m		optional ata | atacore
 dev/ata/ata-all.c		optional ata | atacore
 dev/ata/ata-dma.c		optional ata | atacore
 dev/ata/ata-lowlevel.c		optional ata | atacore
 dev/ata/ata-sata.c		optional ata | atacore
 dev/ata/ata-card.c		optional ata pccard | atapccard
 dev/ata/ata-cbus.c		optional ata pc98 | atapc98
 dev/ata/ata-isa.c		optional ata isa | ataisa
 dev/ata/ata-pci.c		optional ata pci | atapci
 dev/ata/chipsets/ata-ahci.c	optional ata pci | ataahci | ataacerlabs | \
 					 ataati | ataintel | atajmicron | \
 					 atavia | atanvidia
 dev/ata/chipsets/ata-acard.c	optional ata pci | ataacard
 dev/ata/chipsets/ata-acerlabs.c	optional ata pci | ataacerlabs
 dev/ata/chipsets/ata-adaptec.c	optional ata pci | ataadaptec
 dev/ata/chipsets/ata-amd.c	optional ata pci | ataamd
 dev/ata/chipsets/ata-ati.c	optional ata pci | ataati
 dev/ata/chipsets/ata-cenatek.c	optional ata pci | atacenatek
 dev/ata/chipsets/ata-cypress.c	optional ata pci | atacypress
 dev/ata/chipsets/ata-cyrix.c	optional ata pci | atacyrix
 dev/ata/chipsets/ata-highpoint.c	optional ata pci | atahighpoint
 dev/ata/chipsets/ata-intel.c	optional ata pci | ataintel
 dev/ata/chipsets/ata-ite.c	optional ata pci | ataite
 dev/ata/chipsets/ata-jmicron.c	optional ata pci | atajmicron
 dev/ata/chipsets/ata-marvell.c	optional ata pci | atamarvell | ataadaptec
 dev/ata/chipsets/ata-micron.c	optional ata pci | atamicron
 dev/ata/chipsets/ata-national.c	optional ata pci | atanational
 dev/ata/chipsets/ata-netcell.c	optional ata pci | atanetcell
 dev/ata/chipsets/ata-nvidia.c	optional ata pci | atanvidia
 dev/ata/chipsets/ata-promise.c	optional ata pci | atapromise
 dev/ata/chipsets/ata-serverworks.c	optional ata pci | ataserverworks
 dev/ata/chipsets/ata-siliconimage.c	optional ata pci | atasiliconimage | ataati
 dev/ata/chipsets/ata-sis.c	optional ata pci | atasis
 dev/ata/chipsets/ata-via.c	optional ata pci | atavia
 #
 dev/ath/if_ath_pci.c		optional ath_pci pci \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 #
 dev/ath/if_ath_ahb.c		optional ath_ahb \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 #
 dev/ath/if_ath.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_alq.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_beacon.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_btcoex.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_debug.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_keycache.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_led.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_lna_div.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_tx.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_tx_edma.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_tx_ht.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_tdma.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_sysctl.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_rx.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_rx_edma.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_spectral.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ah_osdep.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 #
 dev/ath/ath_hal/ah.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_v1.c	optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_v3.c	optional ath_hal | ath_ar5211 | ath_ar5212 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_v14.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_v4k.c \
 	optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_9287.c \
 	optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_regdomain.c	optional ath \
 	compile-with "${NORMAL_C} ${NO_WSHIFT_COUNT_NEGATIVE} ${NO_WSHIFT_COUNT_OVERFLOW} -I$S/dev/ath"
 # ar5210
 dev/ath/ath_hal/ar5210/ar5210_attach.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_beacon.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_interrupts.c	optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_keycache.c	optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_misc.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_phy.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_power.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_recv.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_reset.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_xmit.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar5211
 dev/ath/ath_hal/ar5211/ar5211_attach.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_beacon.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_interrupts.c	optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_keycache.c	optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_misc.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_phy.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_power.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_recv.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_reset.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_xmit.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar5212
 dev/ath/ath_hal/ar5212/ar5212_ani.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_attach.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_beacon.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_eeprom.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_gpio.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_interrupts.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_keycache.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_misc.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_phy.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_power.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_recv.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_reset.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_rfgain.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_xmit.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar5416 (depends on ar5212)
 dev/ath/ath_hal/ar5416/ar5416_ani.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_attach.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_beacon.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_btcoex.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_cal.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_cal_iq.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_cal_adcgain.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_cal_adcdc.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_eeprom.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_gpio.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_interrupts.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_keycache.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_misc.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_phy.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_power.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_radar.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_recv.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_reset.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_spectral.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_xmit.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9130 (depends upon ar5416) - also requires AH_SUPPORT_AR9130
 #
 # Since this is an embedded MAC SoC, there's no need to compile it into the
 # default HAL.
 dev/ath/ath_hal/ar9001/ar9130_attach.c optional ath_ar9130 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9001/ar9130_phy.c optional ath_ar9130 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9001/ar9130_eeprom.c optional ath_ar9130 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9160 (depends on ar5416)
 dev/ath/ath_hal/ar9001/ar9160_attach.c optional ath_hal | ath_ar9160 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9280 (depends on ar5416)
 dev/ath/ath_hal/ar9002/ar9280_attach.c optional ath_hal | ath_ar9280 | \
 	ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9280_olc.c optional ath_hal | ath_ar9280 | \
 	ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9285 (depends on ar5416 and ar9280)
 dev/ath/ath_hal/ar9002/ar9285_attach.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_btcoex.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_reset.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_cal.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_phy.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_diversity.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9287 (depends on ar5416)
 dev/ath/ath_hal/ar9002/ar9287_attach.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9287_reset.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9287_cal.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9287_olc.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 
 # ar9300
 contrib/dev/ath/ath_hal/ar9300/ar9300_ani.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_attach.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_beacon.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_eeprom.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal ${NO_WCONSTANT_CONVERSION}"
 contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_gpio.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_interrupts.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_keycache.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_mci.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_paprd.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_phy.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_power.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_radar.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_radio.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_recv.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_recv_ds.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_reset.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal ${NO_WSOMETIMES_UNINITIALIZED} -Wno-unused-function"
 contrib/dev/ath/ath_hal/ar9300/ar9300_stub.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_stub_funcs.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_timer.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_xmit.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_xmit_ds.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 
 # rf backends
 dev/ath/ath_hal/ar5212/ar2316.c	optional ath_rf2316 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar2317.c	optional ath_rf2317 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar2413.c	optional ath_hal | ath_rf2413 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar2425.c	optional ath_hal | ath_rf2425 | ath_rf2417 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5111.c	optional ath_hal | ath_rf5111 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5112.c	optional ath_hal | ath_rf5112 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5413.c	optional ath_hal | ath_rf5413 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar2133.c optional ath_hal | ath_ar5416 | \
 	ath_ar9130 | ath_ar9160 | ath_ar9280 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9280.c optional ath_hal | ath_ar9280 | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9287.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 
 # ath rate control algorithms
 dev/ath/ath_rate/amrr/amrr.c	optional ath_rate_amrr \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_rate/onoe/onoe.c	optional ath_rate_onoe \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_rate/sample/sample.c	optional ath_rate_sample \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 # ath DFS modules
 dev/ath/ath_dfs/null/dfs_null.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 #
 dev/bce/if_bce.c		optional bce
 dev/bfe/if_bfe.c		optional bfe
 dev/bge/if_bge.c		optional bge
 dev/bktr/bktr_audio.c		optional bktr pci
 dev/bktr/bktr_card.c		optional bktr pci
 dev/bktr/bktr_core.c		optional bktr pci
 dev/bktr/bktr_i2c.c		optional bktr pci smbus
 dev/bktr/bktr_os.c		optional bktr pci
 dev/bktr/bktr_tuner.c		optional bktr pci
 dev/bktr/msp34xx.c		optional bktr pci
 dev/buslogic/bt.c		optional bt
 dev/buslogic/bt_eisa.c		optional bt eisa
 dev/buslogic/bt_isa.c		optional bt isa
 dev/buslogic/bt_mca.c		optional bt mca
 dev/buslogic/bt_pci.c		optional bt pci
 dev/bwi/bwimac.c		optional bwi
 dev/bwi/bwiphy.c		optional bwi
 dev/bwi/bwirf.c			optional bwi
 dev/bwi/if_bwi.c		optional bwi
 dev/bwi/if_bwi_pci.c		optional bwi pci
 # XXX Work around clang warning, until maintainer approves fix.
 dev/bwn/if_bwn.c		optional bwn siba_bwn \
 	compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}"
 dev/cardbus/cardbus.c		optional cardbus
 dev/cardbus/cardbus_cis.c	optional cardbus
 dev/cardbus/cardbus_device.c	optional cardbus
 dev/cas/if_cas.c		optional cas
 dev/cfi/cfi_bus_fdt.c		optional cfi fdt
 dev/cfi/cfi_bus_nexus.c		optional cfi
 dev/cfi/cfi_core.c		optional cfi
 dev/cfi/cfi_dev.c		optional cfi
 dev/cfi/cfi_disk.c		optional cfid
 dev/ciss/ciss.c			optional ciss
 dev/cm/smc90cx6.c		optional cm
 dev/cmx/cmx.c			optional cmx
 dev/cmx/cmx_pccard.c		optional cmx pccard
 dev/cpufreq/ichss.c		optional cpufreq
 dev/cs/if_cs.c			optional cs
 dev/cs/if_cs_isa.c		optional cs isa
 dev/cs/if_cs_pccard.c		optional cs pccard
 dev/cxgb/cxgb_main.c		optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/cxgb_sge.c		optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_mc5.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_vsc7323.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_vsc8211.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_ael1002.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_aq100x.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_mv88e1xxx.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_xgmac.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_t3_hw.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_tn1010.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/sys/uipc_mvec.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/cxgb_t3fw.c		optional cxgb cxgb_t3fw \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgbe/t4_mp_ring.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe -fms-extensions"
 dev/cxgbe/t4_main.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_netmap.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_sge.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_l2t.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_tracer.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/common/t4_hw.c	optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 t4fw_cfg.c		optional cxgbe					\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk t4fw_cfg.fw:t4fw_cfg t4fw_cfg_uwire.fw:t4fw_cfg_uwire t4fw.fw:t4fw -mt4fw_cfg -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"t4fw_cfg.c"
 t4fw_cfg.fwo		optional cxgbe					\
 	dependency	"t4fw_cfg.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t4fw_cfg.fwo"
 t4fw_cfg.fw		optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t4fw_cfg.txt"		\
 	compile-with	"${CP} ${.ALLSRC} ${.TARGET}"			\
 	no-obj no-implicit-rule						\
 	clean		"t4fw_cfg.fw"
 t4fw_cfg_uwire.fwo	optional cxgbe					\
 	dependency	"t4fw_cfg_uwire.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t4fw_cfg_uwire.fwo"
 t4fw_cfg_uwire.fw	optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t4fw_cfg_uwire.txt"	\
 	compile-with	"${CP} ${.ALLSRC} ${.TARGET}"			\
 	no-obj no-implicit-rule						\
 	clean		"t4fw_cfg_uwire.fw"
 t4fw.fwo		optional cxgbe					\
 	dependency	"t4fw.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t4fw.fwo"
 t4fw.fw			optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t4fw-1.11.27.0.bin.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"t4fw.fw"
 t5fw_cfg.c		optional cxgbe					\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk t5fw_cfg.fw:t5fw_cfg t5fw.fw:t5fw -mt5fw_cfg -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"t5fw_cfg.c"
 t5fw_cfg.fwo		optional cxgbe					\
 	dependency	"t5fw_cfg.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t5fw_cfg.fwo"
 t5fw_cfg.fw		optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t5fw_cfg.txt"		\
 	compile-with	"${CP} ${.ALLSRC} ${.TARGET}"			\
 	no-obj no-implicit-rule						\
 	clean		"t5fw_cfg.fw"
 t5fw.fwo		optional cxgbe					\
 	dependency	"t5fw.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t5fw.fwo"
 t5fw.fw			optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t5fw-1.11.27.0.bin.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"t5fw.fw"
 dev/cy/cy.c			optional cy
 dev/cy/cy_isa.c			optional cy isa
 dev/cy/cy_pci.c			optional cy pci
 dev/dc/if_dc.c			optional dc pci
 dev/dc/dcphy.c			optional dc pci
 dev/dc/pnphy.c			optional dc pci
 dev/dcons/dcons.c		optional dcons
 dev/dcons/dcons_crom.c		optional dcons_crom
 dev/dcons/dcons_os.c		optional dcons
 dev/de/if_de.c			optional de pci
 dev/digi/CX.c			optional digi_CX
 dev/digi/CX_PCI.c		optional digi_CX_PCI
 dev/digi/EPCX.c			optional digi_EPCX
 dev/digi/EPCX_PCI.c		optional digi_EPCX_PCI
 dev/digi/Xe.c			optional digi_Xe
 dev/digi/Xem.c			optional digi_Xem
 dev/digi/Xr.c			optional digi_Xr
 dev/digi/digi.c			optional digi
 dev/digi/digi_isa.c		optional digi isa
 dev/digi/digi_pci.c		optional digi pci
 dev/dpt/dpt_eisa.c		optional dpt eisa
 dev/dpt/dpt_pci.c		optional dpt pci
 dev/dpt/dpt_scsi.c		optional dpt
 dev/drm/ati_pcigart.c		optional drm
 dev/drm/drm_agpsupport.c	optional drm
 dev/drm/drm_auth.c		optional drm
 dev/drm/drm_bufs.c		optional drm
 dev/drm/drm_context.c		optional drm
 dev/drm/drm_dma.c		optional drm
 dev/drm/drm_drawable.c		optional drm
 dev/drm/drm_drv.c		optional drm
 dev/drm/drm_fops.c		optional drm
 dev/drm/drm_hashtab.c		optional drm
 dev/drm/drm_ioctl.c		optional drm
 dev/drm/drm_irq.c		optional drm
 dev/drm/drm_lock.c		optional drm
 dev/drm/drm_memory.c		optional drm
 dev/drm/drm_mm.c		optional drm
 dev/drm/drm_pci.c		optional drm
 dev/drm/drm_scatter.c		optional drm
 dev/drm/drm_sman.c		optional drm
 dev/drm/drm_sysctl.c		optional drm
 dev/drm/drm_vm.c		optional drm
 dev/drm/i915_dma.c		optional i915drm
 dev/drm/i915_drv.c		optional i915drm
 dev/drm/i915_irq.c		optional i915drm
 dev/drm/i915_mem.c		optional i915drm
 dev/drm/i915_suspend.c		optional i915drm
 dev/drm/mach64_dma.c		optional mach64drm
 dev/drm/mach64_drv.c		optional mach64drm
 dev/drm/mach64_irq.c		optional mach64drm
 dev/drm/mach64_state.c		optional mach64drm
 dev/drm/mga_dma.c		optional mgadrm
 dev/drm/mga_drv.c		optional mgadrm
 dev/drm/mga_irq.c		optional mgadrm
 dev/drm/mga_state.c		optional mgadrm
 dev/drm/mga_warp.c		optional mgadrm
 dev/drm/r128_cce.c		optional r128drm \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/drm/r128_drv.c		optional r128drm
 dev/drm/r128_irq.c		optional r128drm
 dev/drm/r128_state.c		optional r128drm
 dev/drm/r300_cmdbuf.c		optional radeondrm
 dev/drm/r600_blit.c		optional radeondrm
 dev/drm/r600_cp.c		optional radeondrm \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/drm/radeon_cp.c		optional radeondrm \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/drm/radeon_cs.c		optional radeondrm
 dev/drm/radeon_drv.c		optional radeondrm
 dev/drm/radeon_irq.c		optional radeondrm
 dev/drm/radeon_mem.c		optional radeondrm
 dev/drm/radeon_state.c		optional radeondrm
 dev/drm/savage_bci.c		optional savagedrm
 dev/drm/savage_drv.c		optional savagedrm
 dev/drm/savage_state.c		optional savagedrm
 dev/drm/sis_drv.c		optional sisdrm
 dev/drm/sis_ds.c		optional sisdrm
 dev/drm/sis_mm.c		optional sisdrm
 dev/drm/tdfx_drv.c		optional tdfxdrm
 dev/drm/via_dma.c		optional viadrm
 dev/drm/via_dmablit.c		optional viadrm
 dev/drm/via_drv.c		optional viadrm
 dev/drm/via_irq.c		optional viadrm
 dev/drm/via_map.c		optional viadrm
 dev/drm/via_mm.c		optional viadrm
 dev/drm/via_verifier.c		optional viadrm
 dev/drm/via_video.c		optional viadrm
 dev/ed/if_ed.c			optional ed
 dev/ed/if_ed_novell.c		optional ed
 dev/ed/if_ed_rtl80x9.c		optional ed
 dev/ed/if_ed_pccard.c		optional ed pccard
 dev/ed/if_ed_pci.c		optional ed pci
 dev/eisa/eisa_if.m		standard
 dev/eisa/eisaconf.c		optional eisa
 dev/e1000/if_em.c		optional em \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/if_lem.c		optional em \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/if_igb.c		optional igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_80003es2lan.c	optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82540.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82541.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82542.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82543.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82571.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82575.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_ich8lan.c	optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_i210.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_api.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_mac.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_manage.c	optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_nvm.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_phy.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_vf.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_mbx.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_osdep.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/et/if_et.c			optional et
 dev/en/if_en_pci.c		optional en pci
 dev/en/midway.c			optional en
 dev/ep/if_ep.c			optional ep
 dev/ep/if_ep_eisa.c		optional ep eisa
 dev/ep/if_ep_isa.c		optional ep isa
 dev/ep/if_ep_mca.c		optional ep mca
 dev/ep/if_ep_pccard.c		optional ep pccard
 dev/esp/esp_pci.c		optional esp pci
 dev/esp/ncr53c9x.c		optional esp
 dev/etherswitch/arswitch/arswitch.c		optional arswitch
 dev/etherswitch/arswitch/arswitch_reg.c		optional arswitch
 dev/etherswitch/arswitch/arswitch_phy.c		optional arswitch
 dev/etherswitch/arswitch/arswitch_8216.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_8226.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_8316.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_8327.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_7240.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_9340.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_vlans.c	optional arswitch
 dev/etherswitch/etherswitch.c		optional etherswitch
 dev/etherswitch/etherswitch_if.m	optional etherswitch
 dev/etherswitch/ip17x/ip17x.c		optional ip17x
 dev/etherswitch/ip17x/ip175c.c		optional ip17x
 dev/etherswitch/ip17x/ip175d.c		optional ip17x
 dev/etherswitch/ip17x/ip17x_phy.c	optional ip17x
 dev/etherswitch/ip17x/ip17x_vlans.c	optional ip17x
 dev/etherswitch/mdio_if.m		optional miiproxy
 dev/etherswitch/mdio.c			optional miiproxy
 dev/etherswitch/miiproxy.c		optional miiproxy
 dev/etherswitch/rtl8366/rtl8366rb.c	optional rtl8366rb
 dev/etherswitch/ukswitch/ukswitch.c	optional ukswitch
 dev/ex/if_ex.c			optional ex
 dev/ex/if_ex_isa.c		optional ex isa
 dev/ex/if_ex_pccard.c		optional ex pccard
 dev/exca/exca.c			optional cbb
 dev/fatm/if_fatm.c		optional fatm pci
 dev/fb/fbd.c			optional fbd | vt
 dev/fb/fb_if.m			standard
 dev/fb/splash.c			optional sc splash
 dev/fdt/fdt_clock.c		optional fdt fdt_clock
 dev/fdt/fdt_clock_if.m		optional fdt fdt_clock
 dev/fdt/fdt_common.c		optional fdt
 dev/fdt/fdt_pinctrl.c		optional fdt fdt_pinctrl
 dev/fdt/fdt_pinctrl_if.m	optional fdt fdt_pinctrl
 dev/fdt/fdt_slicer.c		optional fdt cfi | fdt nand
 dev/fdt/fdt_static_dtb.S	optional fdt fdt_dtb_static \
 	dependency	"$S/boot/fdt/dts/${MACHINE}/${FDT_DTS_FILE}"
 dev/fdt/simplebus.c		optional fdt
 dev/fe/if_fe.c			optional fe
 dev/fe/if_fe_pccard.c		optional fe pccard
 dev/filemon/filemon.c		optional filemon
 dev/firewire/firewire.c		optional firewire
 dev/firewire/fwcrom.c		optional firewire
 dev/firewire/fwdev.c		optional firewire
 dev/firewire/fwdma.c		optional firewire
 dev/firewire/fwmem.c		optional firewire
 dev/firewire/fwohci.c		optional firewire
 dev/firewire/fwohci_pci.c	optional firewire pci
 dev/firewire/if_fwe.c		optional fwe
 dev/firewire/if_fwip.c		optional fwip
 dev/firewire/sbp.c		optional sbp
 dev/firewire/sbp_targ.c		optional sbp_targ
 dev/flash/at45d.c		optional at45d
 dev/flash/mx25l.c		optional mx25l
 dev/fxp/if_fxp.c		optional fxp
 dev/fxp/inphy.c			optional fxp
 dev/gem/if_gem.c		optional gem
 dev/gem/if_gem_pci.c		optional gem pci
 dev/gem/if_gem_sbus.c		optional gem sbus
 dev/gpio/gpiobus.c		optional gpio				\
 	dependency	"gpiobus_if.h"
 dev/gpio/gpioc.c		optional gpio				\
 	dependency	"gpio_if.h"
 dev/gpio/gpioiic.c		optional gpioiic
 dev/gpio/gpioled.c		optional gpioled
 dev/gpio/gpio_if.m		optional gpio
 dev/gpio/gpiobus_if.m		optional gpio
 dev/gpio/ofw_gpiobus.c		optional fdt gpio
 dev/hatm/if_hatm.c		optional hatm pci
 dev/hatm/if_hatm_intr.c		optional hatm pci
 dev/hatm/if_hatm_ioctl.c	optional hatm pci
 dev/hatm/if_hatm_rx.c		optional hatm pci
 dev/hatm/if_hatm_tx.c		optional hatm pci
 dev/hifn/hifn7751.c		optional hifn
 dev/hme/if_hme.c		optional hme
 dev/hme/if_hme_pci.c		optional hme pci
 dev/hme/if_hme_sbus.c		optional hme sbus
 dev/hptiop/hptiop.c		optional hptiop scbus
 dev/hwpmc/hwpmc_logging.c	optional hwpmc
 dev/hwpmc/hwpmc_mod.c		optional hwpmc
 dev/hwpmc/hwpmc_soft.c		optional hwpmc
 dev/ichsmb/ichsmb.c		optional ichsmb
 dev/ichsmb/ichsmb_pci.c		optional ichsmb pci
 dev/ida/ida.c			optional ida
 dev/ida/ida_disk.c		optional ida
 dev/ida/ida_eisa.c		optional ida eisa
 dev/ida/ida_pci.c		optional ida pci
 dev/ie/if_ie.c			optional ie isa nowerror
 dev/ie/if_ie_isa.c		optional ie isa
 dev/iicbus/ad7418.c		optional ad7418
 dev/iicbus/ds133x.c		optional ds133x
 dev/iicbus/ds1374.c		optional ds1374
 dev/iicbus/ds1672.c		optional ds1672
 dev/iicbus/icee.c		optional icee
 dev/iicbus/if_ic.c		optional ic
 dev/iicbus/iic.c		optional iic
 dev/iicbus/iicbb.c		optional iicbb
 dev/iicbus/iicbb_if.m		optional iicbb
 dev/iicbus/iicbus.c		optional iicbus
 dev/iicbus/iicbus_if.m		optional iicbus
 dev/iicbus/iiconf.c		optional iicbus
 dev/iicbus/iicsmb.c		optional iicsmb				\
 	dependency	"iicbus_if.h"
 dev/iicbus/iicoc.c		optional iicoc
 dev/iicbus/lm75.c		optional lm75
 dev/iicbus/pcf8563.c		optional pcf8563
 dev/iicbus/s35390a.c		optional s35390a
 dev/iir/iir.c			optional iir
 dev/iir/iir_ctrl.c		optional iir
 dev/iir/iir_pci.c		optional iir pci
 dev/intpm/intpm.c		optional intpm pci
 # XXX Work around clang warning, until maintainer approves fix.
 dev/ips/ips.c			optional ips \
 	compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}"
 dev/ips/ips_commands.c		optional ips
 dev/ips/ips_disk.c		optional ips
 dev/ips/ips_ioctl.c		optional ips
 dev/ips/ips_pci.c		optional ips pci
 dev/ipw/if_ipw.c		optional ipw
 ipwbssfw.c			optional ipwbssfw | ipwfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk ipw_bss.fw:ipw_bss:130 -lintel_ipw -mipw_bss -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"ipwbssfw.c"
 ipw_bss.fwo			optional ipwbssfw | ipwfw		\
 	dependency	"ipw_bss.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"ipw_bss.fwo"
 ipw_bss.fw			optional ipwbssfw | ipwfw		\
 	dependency	"$S/contrib/dev/ipw/ipw2100-1.3.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"ipw_bss.fw"
 ipwibssfw.c			optional ipwibssfw | ipwfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk ipw_ibss.fw:ipw_ibss:130 -lintel_ipw -mipw_ibss -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"ipwibssfw.c"
 ipw_ibss.fwo			optional ipwibssfw | ipwfw		\
 	dependency	"ipw_ibss.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"ipw_ibss.fwo"
 ipw_ibss.fw			optional ipwibssfw | ipwfw		\
 	dependency	"$S/contrib/dev/ipw/ipw2100-1.3-i.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"ipw_ibss.fw"
 ipwmonitorfw.c			optional ipwmonitorfw | ipwfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk ipw_monitor.fw:ipw_monitor:130 -lintel_ipw -mipw_monitor -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"ipwmonitorfw.c"
 ipw_monitor.fwo			optional ipwmonitorfw | ipwfw		\
 	dependency	"ipw_monitor.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"ipw_monitor.fwo"
 ipw_monitor.fw			optional ipwmonitorfw | ipwfw		\
 	dependency	"$S/contrib/dev/ipw/ipw2100-1.3-p.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"ipw_monitor.fw"
 dev/iscsi/icl.c			optional iscsi | ctl 
 dev/iscsi/icl_proxy.c		optional iscsi | ctl
 dev/iscsi/iscsi.c		optional iscsi scbus
 dev/iscsi_initiator/iscsi.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/iscsi_subr.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/isc_cam.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/isc_soc.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/isc_sm.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/isc_subr.c	optional iscsi_initiator scbus
 dev/ismt/ismt.c			optional ismt
 dev/isp/isp.c			optional isp
 dev/isp/isp_freebsd.c		optional isp
 dev/isp/isp_library.c		optional isp
 dev/isp/isp_pci.c		optional isp pci
 dev/isp/isp_sbus.c		optional isp sbus
 dev/isp/isp_target.c		optional isp
 dev/ispfw/ispfw.c		optional ispfw
 dev/iwi/if_iwi.c		optional iwi
 iwibssfw.c			optional iwibssfw | iwifw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwi_bss.fw:iwi_bss:300 -lintel_iwi -miwi_bss -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwibssfw.c"
 iwi_bss.fwo			optional iwibssfw | iwifw		\
 	dependency	"iwi_bss.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwi_bss.fwo"
 iwi_bss.fw			optional iwibssfw | iwifw		\
 	dependency	"$S/contrib/dev/iwi/ipw2200-bss.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwi_bss.fw"
 iwiibssfw.c			optional iwiibssfw | iwifw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwi_ibss.fw:iwi_ibss:300 -lintel_iwi -miwi_ibss -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwiibssfw.c"
 iwi_ibss.fwo			optional iwiibssfw | iwifw		\
 	dependency	"iwi_ibss.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwi_ibss.fwo"
 iwi_ibss.fw			optional iwiibssfw | iwifw		\
 	dependency	"$S/contrib/dev/iwi/ipw2200-ibss.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwi_ibss.fw"
 iwimonitorfw.c			optional iwimonitorfw | iwifw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwi_monitor.fw:iwi_monitor:300 -lintel_iwi -miwi_monitor -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwimonitorfw.c"
 iwi_monitor.fwo			optional iwimonitorfw | iwifw		\
 	dependency	"iwi_monitor.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwi_monitor.fwo"
 iwi_monitor.fw			optional iwimonitorfw | iwifw		\
 	dependency	"$S/contrib/dev/iwi/ipw2200-sniffer.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwi_monitor.fw"
 dev/iwn/if_iwn.c		optional iwn
 iwn1000fw.c			optional iwn1000fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn1000.fw:iwn1000fw -miwn1000fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn1000fw.c"
 iwn1000fw.fwo			optional iwn1000fw | iwnfw		\
 	dependency	"iwn1000.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn1000fw.fwo"
 iwn1000.fw			optional iwn1000fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-1000-39.31.5.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn1000.fw"
 iwn100fw.c			optional iwn100fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn100.fw:iwn100fw -miwn100fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn100fw.c"
 iwn100fw.fwo			optional iwn100fw | iwnfw		\
 	dependency	"iwn100.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn100fw.fwo"
 iwn100.fw			optional iwn100fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-100-39.31.5.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn100.fw"
 iwn105fw.c			optional iwn105fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn105.fw:iwn105fw -miwn105fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn105fw.c"
 iwn105fw.fwo			optional iwn105fw | iwnfw		\
 	dependency	"iwn105.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn105fw.fwo"
 iwn105.fw			optional iwn105fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-105-6-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn105.fw"
 iwn135fw.c			optional iwn135fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn135.fw:iwn135fw -miwn135fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn135fw.c"
 iwn135fw.fwo			optional iwn135fw | iwnfw		\
 	dependency	"iwn135.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn135fw.fwo"
 iwn135.fw			optional iwn135fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-135-6-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn135.fw"
 iwn2000fw.c			optional iwn2000fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn2000.fw:iwn2000fw -miwn2000fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn2000fw.c"
 iwn2000fw.fwo			optional iwn2000fw | iwnfw		\
 	dependency	"iwn2000.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn2000fw.fwo"
 iwn2000.fw			optional iwn2000fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-2000-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn2000.fw"
 iwn2030fw.c			optional iwn2030fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn2030.fw:iwn2030fw -miwn2030fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn2030fw.c"
 iwn2030fw.fwo			optional iwn2030fw | iwnfw		\
 	dependency	"iwn2030.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn2030fw.fwo"
 iwn2030.fw			optional iwn2030fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwnwifi-2030-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn2030.fw"
 iwn4965fw.c			optional iwn4965fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn4965.fw:iwn4965fw -miwn4965fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn4965fw.c"
 iwn4965fw.fwo			optional iwn4965fw | iwnfw		\
 	dependency	"iwn4965.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn4965fw.fwo"
 iwn4965.fw			optional iwn4965fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-4965-228.61.2.24.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn4965.fw"
 iwn5000fw.c			optional iwn5000fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn5000.fw:iwn5000fw -miwn5000fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn5000fw.c"
 iwn5000fw.fwo		optional iwn5000fw | iwnfw			\
 	dependency	"iwn5000.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn5000fw.fwo"
 iwn5000.fw			optional iwn5000fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-5000-8.83.5.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn5000.fw"
 iwn5150fw.c			optional iwn5150fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn5150.fw:iwn5150fw -miwn5150fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn5150fw.c"
 iwn5150fw.fwo			optional iwn5150fw | iwnfw		\
 	dependency	"iwn5150.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn5150fw.fwo"
 iwn5150.fw			optional iwn5150fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-5150-8.24.2.2.fw.uu"\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn5150.fw"
 iwn6000fw.c			optional iwn6000fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn6000.fw:iwn6000fw -miwn6000fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn6000fw.c"
 iwn6000fw.fwo			optional iwn6000fw | iwnfw		\
 	dependency	"iwn6000.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn6000fw.fwo"
 iwn6000.fw			optional iwn6000fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-6000-9.221.4.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn6000.fw"
 iwn6000g2afw.c			optional iwn6000g2afw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn6000g2a.fw:iwn6000g2afw -miwn6000g2afw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn6000g2afw.c"
 iwn6000g2afw.fwo		optional iwn6000g2afw | iwnfw		\
 	dependency	"iwn6000g2a.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn6000g2afw.fwo"
 iwn6000g2a.fw			optional iwn6000g2afw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-6000g2a-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn6000g2a.fw"
 iwn6000g2bfw.c			optional iwn6000g2bfw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn6000g2b.fw:iwn6000g2bfw -miwn6000g2bfw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn6000g2bfw.c"
 iwn6000g2bfw.fwo		optional iwn6000g2bfw | iwnfw		\
 	dependency	"iwn6000g2b.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn6000g2bfw.fwo"
 iwn6000g2b.fw			optional iwn6000g2bfw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-6000g2b-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn6000g2b.fw"
 iwn6050fw.c			optional iwn6050fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn6050.fw:iwn6050fw -miwn6050fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn6050fw.c"
 iwn6050fw.fwo			optional iwn6050fw | iwnfw		\
 	dependency	"iwn6050.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn6050fw.fwo"
 iwn6050.fw			optional iwn6050fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-6050-41.28.5.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn6050.fw"
 dev/ixgb/if_ixgb.c		optional ixgb
 dev/ixgb/ixgb_ee.c		optional ixgb
 dev/ixgb/ixgb_hw.c		optional ixgb
 dev/ixgbe/ixgbe.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe -DSMP"
 dev/ixgbe/ixv.c			optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_phy.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_api.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_common.c	optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_mbx.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_vf.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_82598.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_82599.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_x540.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_dcb.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_dcb_82598.c	optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_dcb_82599.c	optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/jme/if_jme.c		optional jme pci
 dev/joy/joy.c			optional joy
 dev/joy/joy_isa.c		optional joy isa
 dev/kbdmux/kbdmux.c		optional kbdmux
 dev/ksyms/ksyms.c		optional ksyms
 dev/le/am7990.c			optional le
 dev/le/am79900.c		optional le
 dev/le/if_le_pci.c		optional le pci
 dev/le/lance.c			optional le
 dev/led/led.c			standard
 dev/lge/if_lge.c		optional lge
 dev/lmc/if_lmc.c		optional lmc
 dev/malo/if_malo.c		optional malo
 dev/malo/if_malohal.c		optional malo
 dev/malo/if_malo_pci.c		optional malo pci
 dev/mc146818/mc146818.c		optional mc146818
 dev/mca/mca_bus.c		optional mca
 dev/mcd/mcd.c			optional mcd isa nowerror
 dev/mcd/mcd_isa.c		optional mcd isa nowerror
 dev/md/md.c			optional md
 dev/mem/memdev.c		optional mem
 dev/mem/memutil.c		optional mem
 dev/mfi/mfi.c			optional mfi
 dev/mfi/mfi_debug.c		optional mfi
 dev/mfi/mfi_pci.c		optional mfi pci
 dev/mfi/mfi_disk.c		optional mfi
 dev/mfi/mfi_syspd.c		optional mfi
 dev/mfi/mfi_tbolt.c		optional mfi
 dev/mfi/mfi_linux.c		optional mfi compat_linux
 dev/mfi/mfi_cam.c		optional mfip scbus
 dev/mii/acphy.c			optional miibus | acphy
 dev/mii/amphy.c			optional miibus | amphy
 dev/mii/atphy.c			optional miibus | atphy
 dev/mii/axphy.c			optional miibus | axphy
 dev/mii/bmtphy.c		optional miibus | bmtphy
 dev/mii/brgphy.c		optional miibus | brgphy
 dev/mii/ciphy.c			optional miibus | ciphy
 dev/mii/e1000phy.c		optional miibus | e1000phy
 dev/mii/gentbi.c		optional miibus | gentbi
 dev/mii/icsphy.c		optional miibus | icsphy
 dev/mii/ip1000phy.c		optional miibus | ip1000phy
 dev/mii/jmphy.c			optional miibus | jmphy
 dev/mii/lxtphy.c		optional miibus | lxtphy
 dev/mii/mii.c			optional miibus | mii
 dev/mii/mii_bitbang.c		optional miibus | mii_bitbang
 dev/mii/mii_physubr.c		optional miibus | mii
 dev/mii/miibus_if.m		optional miibus | mii
 dev/mii/mlphy.c			optional miibus | mlphy
 dev/mii/nsgphy.c		optional miibus | nsgphy
 dev/mii/nsphy.c			optional miibus | nsphy
 dev/mii/nsphyter.c		optional miibus | nsphyter
 dev/mii/pnaphy.c		optional miibus | pnaphy
 dev/mii/qsphy.c			optional miibus | qsphy
 dev/mii/rdcphy.c		optional miibus | rdcphy
 dev/mii/rgephy.c		optional miibus | rgephy
 dev/mii/rlphy.c			optional miibus | rlphy
 dev/mii/rlswitch.c		optional rlswitch
 dev/mii/smcphy.c		optional miibus | smcphy
 dev/mii/smscphy.c		optional miibus | smscphy
 dev/mii/tdkphy.c		optional miibus | tdkphy
 dev/mii/tlphy.c			optional miibus | tlphy
 dev/mii/truephy.c		optional miibus | truephy
 dev/mii/ukphy.c			optional miibus | mii
 dev/mii/ukphy_subr.c		optional miibus | mii
 dev/mii/xmphy.c			optional miibus | xmphy
 dev/mk48txx/mk48txx.c		optional mk48txx
 dev/mlx/mlx.c			optional mlx
 dev/mlx/mlx_disk.c		optional mlx
 dev/mlx/mlx_pci.c		optional mlx pci
 dev/mly/mly.c			optional mly
 dev/mmc/mmc.c			optional mmc
 dev/mmc/mmcbr_if.m		standard
 dev/mmc/mmcbus_if.m		standard
 dev/mmc/mmcsd.c			optional mmcsd
 dev/mn/if_mn.c			optional mn pci
 dev/mpr/mpr.c			optional mpr
 dev/mpr/mpr_config.c		optional mpr
 # XXX Work around clang warning, until maintainer approves fix.
 dev/mpr/mpr_mapping.c		optional mpr \
 	compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}"
 dev/mpr/mpr_pci.c		optional mpr pci
 dev/mpr/mpr_sas.c		optional mpr \
 	compile-with "${NORMAL_C} ${NO_WUNNEEDED_INTERNAL_DECL}"
 dev/mpr/mpr_sas_lsi.c		optional mpr
 dev/mpr/mpr_table.c		optional mpr
 dev/mpr/mpr_user.c		optional mpr
 dev/mps/mps.c			optional mps
 dev/mps/mps_config.c		optional mps
 # XXX Work around clang warning, until maintainer approves fix.
 dev/mps/mps_mapping.c		optional mps \
 	compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}"
 dev/mps/mps_pci.c		optional mps pci
 dev/mps/mps_sas.c		optional mps \
 	compile-with "${NORMAL_C} ${NO_WUNNEEDED_INTERNAL_DECL}"
 dev/mps/mps_sas_lsi.c		optional mps
 dev/mps/mps_table.c		optional mps
 dev/mps/mps_user.c		optional mps
 dev/mpt/mpt.c			optional mpt
 dev/mpt/mpt_cam.c		optional mpt
 dev/mpt/mpt_debug.c		optional mpt
 dev/mpt/mpt_pci.c		optional mpt pci
 dev/mpt/mpt_raid.c		optional mpt
 dev/mpt/mpt_user.c		optional mpt
 dev/mrsas/mrsas.c		optional mrsas
 dev/mrsas/mrsas_cam.c		optional mrsas
 dev/mrsas/mrsas_ioctl.c		optional mrsas
 dev/mrsas/mrsas_fp.c		optional mrsas
 dev/msk/if_msk.c		optional msk
 dev/mvs/mvs.c			optional mvs
 dev/mvs/mvs_if.m		optional mvs
 dev/mvs/mvs_pci.c		optional mvs pci
 dev/mwl/if_mwl.c		optional mwl
 dev/mwl/if_mwl_pci.c		optional mwl pci
 dev/mwl/mwlhal.c		optional mwl
 mwlfw.c				optional mwlfw				\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk mw88W8363.fw:mw88W8363fw mwlboot.fw:mwlboot -mmwl -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"mwlfw.c"
 mw88W8363.fwo		optional mwlfw					\
 	dependency	"mw88W8363.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"mw88W8363.fwo"
 mw88W8363.fw		optional mwlfw					\
 	dependency	"$S/contrib/dev/mwl/mw88W8363.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"mw88W8363.fw"
 mwlboot.fwo		optional mwlfw					\
 	dependency	"mwlboot.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"mwlboot.fwo"
 mwlboot.fw		optional mwlfw					\
 	dependency	"$S/contrib/dev/mwl/mwlboot.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"mwlboot.fw"
 dev/mxge/if_mxge.c		optional mxge pci
 dev/mxge/mxge_eth_z8e.c		optional mxge pci
 dev/mxge/mxge_ethp_z8e.c	optional mxge pci
 dev/mxge/mxge_rss_eth_z8e.c	optional mxge pci
 dev/mxge/mxge_rss_ethp_z8e.c	optional mxge pci
 dev/my/if_my.c			optional my
 dev/nand/nand.c			optional nand
 dev/nand/nand_bbt.c		optional nand
 dev/nand/nand_cdev.c		optional nand
 dev/nand/nand_generic.c		optional nand
 dev/nand/nand_geom.c		optional nand
 dev/nand/nand_id.c		optional nand
 dev/nand/nandbus.c		optional nand
 dev/nand/nandbus_if.m		optional nand
 dev/nand/nand_if.m		optional nand
 dev/nand/nandsim.c		optional nandsim nand
 dev/nand/nandsim_chip.c		optional nandsim nand
 dev/nand/nandsim_ctrl.c		optional nandsim nand
 dev/nand/nandsim_log.c		optional nandsim nand
 dev/nand/nandsim_swap.c		optional nandsim nand
 dev/nand/nfc_if.m		optional nand
 dev/ncr/ncr.c			optional ncr pci
 dev/ncv/ncr53c500.c		optional ncv
 dev/ncv/ncr53c500_pccard.c	optional ncv pccard
 dev/netmap/netmap.c		optional netmap
 dev/netmap/netmap_freebsd.c	optional netmap
 dev/netmap/netmap_generic.c	optional netmap
 dev/netmap/netmap_mbq.c		optional netmap
 dev/netmap/netmap_mem2.c	optional netmap
 dev/netmap/netmap_monitor.c	optional netmap
 dev/netmap/netmap_offloadings.c	optional netmap
 dev/netmap/netmap_pipe.c	optional netmap
 dev/netmap/netmap_vale.c	optional netmap
 # compile-with "${NORMAL_C} -Wconversion -Wextra"
 dev/nfsmb/nfsmb.c		optional nfsmb pci
 dev/nge/if_nge.c		optional nge
 dev/nxge/if_nxge.c		optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-device.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-mm.c	optional nxge
 dev/nxge/xgehal/xge-queue.c	optional nxge
 dev/nxge/xgehal/xgehal-driver.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-ring.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-channel.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-fifo.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-stats.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-config.c	optional nxge
 dev/nxge/xgehal/xgehal-mgmt.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nmdm/nmdm.c			optional nmdm
 dev/nsp/nsp.c			optional nsp
 dev/nsp/nsp_pccard.c		optional nsp pccard
 dev/null/null.c			standard
 dev/oce/oce_hw.c		optional oce pci
 dev/oce/oce_if.c		optional oce pci
 dev/oce/oce_mbox.c		optional oce pci
 dev/oce/oce_queue.c		optional oce pci
 dev/oce/oce_sysctl.c		optional oce pci
 dev/oce/oce_util.c		optional oce pci
 dev/ofw/ofw_bus_if.m		optional fdt
 dev/ofw/ofw_bus_subr.c		optional fdt
 dev/ofw/ofw_fdt.c		optional fdt
 dev/ofw/ofw_if.m		optional fdt
 dev/ofw/ofw_iicbus.c		optional fdt iicbus
 dev/ofw/ofwbus.c		optional fdt
 dev/ofw/openfirm.c		optional fdt
 dev/ofw/openfirmio.c		optional fdt
 dev/patm/if_patm.c		optional patm pci
 dev/patm/if_patm_attach.c	optional patm pci
 dev/patm/if_patm_intr.c		optional patm pci
 dev/patm/if_patm_ioctl.c	optional patm pci
 dev/patm/if_patm_rtables.c	optional patm pci
 dev/patm/if_patm_rx.c		optional patm pci
 dev/patm/if_patm_tx.c		optional patm pci
 dev/pbio/pbio.c			optional pbio isa
 dev/pccard/card_if.m		standard
 dev/pccard/pccard.c		optional pccard
 dev/pccard/pccard_cis.c		optional pccard
 dev/pccard/pccard_cis_quirks.c	optional pccard
 dev/pccard/pccard_device.c	optional pccard
 dev/pccard/power_if.m		standard
 dev/pccbb/pccbb.c		optional cbb
 dev/pccbb/pccbb_isa.c		optional cbb isa
 dev/pccbb/pccbb_pci.c		optional cbb pci
 dev/pcf/pcf.c			optional pcf
 dev/pci/eisa_pci.c		optional pci eisa
 dev/pci/fixup_pci.c		optional pci
 dev/pci/hostb_pci.c		optional pci
 dev/pci/ignore_pci.c		optional pci
 dev/pci/isa_pci.c		optional pci isa
 dev/pci/pci.c			optional pci
 dev/pci/pci_if.m		standard
 dev/pci/pci_pci.c		optional pci
 dev/pci/pci_subr.c		optional pci
 dev/pci/pci_user.c		optional pci
 dev/pci/pcib_if.m		standard
 dev/pci/pcib_support.c		standard
 dev/pci/vga_pci.c		optional pci
 dev/pcn/if_pcn.c		optional pcn pci
 dev/pdq/if_fea.c		optional fea eisa
 dev/pdq/if_fpa.c		optional fpa pci
 dev/pdq/pdq.c			optional nowerror fea eisa | fpa pci
 dev/pdq/pdq_ifsubr.c		optional nowerror fea eisa | fpa pci
 dev/ppbus/if_plip.c		optional plip
 dev/ppbus/immio.c		optional vpo
 dev/ppbus/lpbb.c		optional lpbb
 dev/ppbus/lpt.c			optional lpt
 dev/ppbus/pcfclock.c		optional pcfclock
 dev/ppbus/ppb_1284.c		optional ppbus
 dev/ppbus/ppb_base.c		optional ppbus
 dev/ppbus/ppb_msq.c		optional ppbus
 dev/ppbus/ppbconf.c		optional ppbus
 dev/ppbus/ppbus_if.m		optional ppbus
 dev/ppbus/ppi.c			optional ppi
 dev/ppbus/pps.c			optional pps
 dev/ppbus/vpo.c			optional vpo
 dev/ppbus/vpoio.c		optional vpo
 dev/ppc/ppc.c			optional ppc
 dev/ppc/ppc_acpi.c		optional ppc acpi
 dev/ppc/ppc_isa.c		optional ppc isa
 dev/ppc/ppc_pci.c		optional ppc pci
 dev/ppc/ppc_puc.c		optional ppc puc
 dev/pst/pst-iop.c		optional pst
 dev/pst/pst-pci.c		optional pst pci
 dev/pst/pst-raid.c		optional pst
 dev/pty/pty.c			optional pty
 dev/puc/puc.c			optional puc
 dev/puc/puc_cfg.c		optional puc
 dev/puc/puc_pccard.c		optional puc pccard
 dev/puc/puc_pci.c		optional puc pci
 dev/puc/pucdata.c		optional puc pci
 dev/quicc/quicc_core.c		optional quicc
 dev/ral/rt2560.c		optional ral
 dev/ral/rt2661.c		optional ral
 dev/ral/rt2860.c		optional ral
 dev/ral/if_ral_pci.c		optional ral pci
 rt2561fw.c			optional rt2561fw | ralfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rt2561.fw:rt2561fw -mrt2561 -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rt2561fw.c"
 rt2561fw.fwo			optional rt2561fw | ralfw		\
 	dependency	"rt2561.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rt2561fw.fwo"
 rt2561.fw			optional rt2561fw | ralfw		\
 	dependency	"$S/contrib/dev/ral/rt2561.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rt2561.fw"
 rt2561sfw.c			optional rt2561sfw | ralfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rt2561s.fw:rt2561sfw -mrt2561s -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rt2561sfw.c"
 rt2561sfw.fwo			optional rt2561sfw | ralfw		\
 	dependency	"rt2561s.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rt2561sfw.fwo"
 rt2561s.fw			optional rt2561sfw | ralfw		\
 	dependency	"$S/contrib/dev/ral/rt2561s.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rt2561s.fw"
 rt2661fw.c			optional rt2661fw | ralfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rt2661.fw:rt2661fw -mrt2661 -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rt2661fw.c"
 rt2661fw.fwo			optional rt2661fw | ralfw		\
 	dependency	"rt2661.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rt2661fw.fwo"
 rt2661.fw			optional rt2661fw | ralfw		\
 	dependency	"$S/contrib/dev/ral/rt2661.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rt2661.fw"
 rt2860fw.c			optional rt2860fw | ralfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rt2860.fw:rt2860fw -mrt2860 -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rt2860fw.c"
 rt2860fw.fwo			optional rt2860fw | ralfw		\
 	dependency	"rt2860.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rt2860fw.fwo"
 rt2860.fw			optional rt2860fw | ralfw		\
 	dependency	"$S/contrib/dev/ral/rt2860.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rt2860.fw"
 dev/random/randomdev.c		standard
 dev/random/random_adaptors.c	standard
 dev/random/dummy_rng.c		standard
 dev/random/live_entropy_sources.c	standard
 dev/random/random_harvestq.c	standard
 dev/random/randomdev_soft.c	optional random
 dev/random/yarrow.c		optional random
 dev/random/fortuna.c		optional random
 dev/random/hash.c		optional random
 dev/rc/rc.c			optional rc
 dev/re/if_re.c			optional re
 dev/rl/if_rl.c			optional rl pci
 dev/rndtest/rndtest.c		optional rndtest
 dev/rp/rp.c			optional rp
 dev/rp/rp_isa.c			optional rp isa
 dev/rp/rp_pci.c			optional rp pci
 dev/safe/safe.c			optional safe
 dev/scc/scc_if.m		optional scc
 dev/scc/scc_bfe_ebus.c		optional scc ebus
 dev/scc/scc_bfe_quicc.c		optional scc quicc
 dev/scc/scc_bfe_sbus.c		optional scc fhc | scc sbus
 dev/scc/scc_core.c		optional scc
 dev/scc/scc_dev_quicc.c		optional scc quicc
 dev/scc/scc_dev_sab82532.c	optional scc
 dev/scc/scc_dev_z8530.c		optional scc
 dev/scd/scd.c			optional scd isa
 dev/scd/scd_isa.c		optional scd isa
 dev/sdhci/sdhci.c		optional sdhci
 dev/sdhci/sdhci_if.m		optional sdhci
 dev/sdhci/sdhci_pci.c		optional sdhci pci
 dev/sf/if_sf.c			optional sf pci
 dev/sge/if_sge.c		optional sge pci
 dev/si/si.c			optional si
 dev/si/si2_z280.c		optional si
 dev/si/si3_t225.c		optional si
 dev/si/si_eisa.c		optional si eisa
 dev/si/si_isa.c			optional si isa
 dev/si/si_pci.c			optional si pci
 dev/siba/siba.c			optional siba
 dev/siba/siba_bwn.c		optional siba_bwn pci
 dev/siba/siba_cc.c		optional siba
 dev/siba/siba_core.c		optional siba | siba_bwn pci
 dev/siba/siba_pcib.c		optional siba pci
 dev/siis/siis.c			optional siis pci
 dev/sis/if_sis.c		optional sis pci
 dev/sk/if_sk.c			optional sk pci
 dev/smbus/smb.c			optional smb
 dev/smbus/smbconf.c		optional smbus
 dev/smbus/smbus.c		optional smbus
 dev/smbus/smbus_if.m		optional smbus
 dev/smc/if_smc.c		optional smc
 dev/smc/if_smc_fdt.c		optional smc fdt
 dev/sn/if_sn.c			optional sn
 dev/sn/if_sn_isa.c		optional sn isa
 dev/sn/if_sn_pccard.c		optional sn pccard
 dev/snp/snp.c			optional snp
 dev/sound/clone.c		optional sound
 dev/sound/unit.c		optional sound
 dev/sound/isa/ad1816.c		optional snd_ad1816 isa
 dev/sound/isa/ess.c		optional snd_ess isa
 dev/sound/isa/gusc.c		optional snd_gusc isa
 dev/sound/isa/mss.c		optional snd_mss isa
 dev/sound/isa/sb16.c		optional snd_sb16 isa
 dev/sound/isa/sb8.c		optional snd_sb8 isa
 dev/sound/isa/sbc.c		optional snd_sbc isa
 dev/sound/isa/sndbuf_dma.c	optional sound isa
 dev/sound/pci/als4000.c		optional snd_als4000 pci
 dev/sound/pci/atiixp.c		optional snd_atiixp pci
 dev/sound/pci/cmi.c		optional snd_cmi pci
 dev/sound/pci/cs4281.c		optional snd_cs4281 pci
 dev/sound/pci/csa.c		optional snd_csa pci
 dev/sound/pci/csapcm.c		optional snd_csa pci
 dev/sound/pci/ds1.c		optional snd_ds1 pci
 dev/sound/pci/emu10k1.c		optional snd_emu10k1 pci
 dev/sound/pci/emu10kx.c		optional snd_emu10kx pci
 dev/sound/pci/emu10kx-pcm.c	optional snd_emu10kx pci
 dev/sound/pci/emu10kx-midi.c	optional snd_emu10kx pci
 dev/sound/pci/envy24.c		optional snd_envy24 pci
 dev/sound/pci/envy24ht.c	optional snd_envy24ht pci
 dev/sound/pci/es137x.c		optional snd_es137x pci
 dev/sound/pci/fm801.c		optional snd_fm801 pci
 dev/sound/pci/ich.c		optional snd_ich pci
 dev/sound/pci/maestro.c		optional snd_maestro pci
 dev/sound/pci/maestro3.c	optional snd_maestro3 pci
 dev/sound/pci/neomagic.c	optional snd_neomagic pci
 dev/sound/pci/solo.c		optional snd_solo pci
 dev/sound/pci/spicds.c		optional snd_spicds pci
 dev/sound/pci/t4dwave.c		optional snd_t4dwave pci
 dev/sound/pci/via8233.c		optional snd_via8233 pci
 dev/sound/pci/via82c686.c	optional snd_via82c686 pci
 dev/sound/pci/vibes.c		optional snd_vibes pci
 dev/sound/pci/hda/hdaa.c	optional snd_hda pci
 dev/sound/pci/hda/hdaa_patches.c	optional snd_hda pci
 dev/sound/pci/hda/hdac.c	optional snd_hda pci
 dev/sound/pci/hda/hdac_if.m	optional snd_hda pci
 dev/sound/pci/hda/hdacc.c	optional snd_hda pci
 dev/sound/pci/hdspe.c		optional snd_hdspe pci
 dev/sound/pci/hdspe-pcm.c	optional snd_hdspe pci
 dev/sound/pcm/ac97.c		optional sound
 dev/sound/pcm/ac97_if.m		optional sound
 dev/sound/pcm/ac97_patch.c	optional sound
 dev/sound/pcm/buffer.c		optional sound	\
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/channel.c		optional sound
 dev/sound/pcm/channel_if.m	optional sound
 dev/sound/pcm/dsp.c		optional sound
 dev/sound/pcm/feeder.c		optional sound
 dev/sound/pcm/feeder_chain.c	optional sound
 dev/sound/pcm/feeder_eq.c	optional sound	\
 	dependency	"feeder_eq_gen.h"	\
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_if.m	optional sound
 dev/sound/pcm/feeder_format.c	optional sound  \
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_matrix.c	optional sound  \
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_mixer.c	optional sound  \
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_rate.c	optional sound	\
 	dependency	"feeder_rate_gen.h"	\
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_volume.c	optional sound  \
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/mixer.c		optional sound
 dev/sound/pcm/mixer_if.m	optional sound
 dev/sound/pcm/sndstat.c		optional sound
 dev/sound/pcm/sound.c		optional sound
 dev/sound/pcm/vchan.c		optional sound
 dev/sound/usb/uaudio.c		optional snd_uaudio usb
 dev/sound/usb/uaudio_pcm.c	optional snd_uaudio usb
 dev/sound/midi/midi.c		optional sound
 dev/sound/midi/mpu401.c		optional sound
 dev/sound/midi/mpu_if.m		optional sound
 dev/sound/midi/mpufoi_if.m	optional sound
 dev/sound/midi/sequencer.c	optional sound
 dev/sound/midi/synth_if.m	optional sound
 dev/spibus/ofw_spibus.c		optional fdt spibus
 dev/spibus/spibus.c		optional spibus				\
 	dependency	"spibus_if.h"
 dev/spibus/spibus_if.m		optional spibus
 dev/ste/if_ste.c		optional ste pci
 dev/stg/tmc18c30.c		optional stg
 dev/stg/tmc18c30_isa.c		optional stg isa
 dev/stg/tmc18c30_pccard.c	optional stg pccard
 dev/stg/tmc18c30_pci.c		optional stg pci
 dev/stg/tmc18c30_subr.c		optional stg
 dev/stge/if_stge.c		optional stge
 dev/streams/streams.c		optional streams
 dev/sym/sym_hipd.c		optional sym				\
 	dependency	"$S/dev/sym/sym_{conf,defs}.h"
 dev/syscons/blank/blank_saver.c	optional blank_saver
 dev/syscons/daemon/daemon_saver.c optional daemon_saver
 dev/syscons/dragon/dragon_saver.c optional dragon_saver
 dev/syscons/fade/fade_saver.c	optional fade_saver
 dev/syscons/fire/fire_saver.c	optional fire_saver
 dev/syscons/green/green_saver.c	optional green_saver
 dev/syscons/logo/logo.c		optional logo_saver
 dev/syscons/logo/logo_saver.c	optional logo_saver
 dev/syscons/rain/rain_saver.c	optional rain_saver
 dev/syscons/schistory.c		optional sc
 dev/syscons/scmouse.c		optional sc
 dev/syscons/scterm.c		optional sc
 dev/syscons/scvidctl.c		optional sc
 dev/syscons/snake/snake_saver.c	optional snake_saver
 dev/syscons/star/star_saver.c	optional star_saver
 dev/syscons/syscons.c		optional sc
 dev/syscons/sysmouse.c		optional sc
 dev/syscons/warp/warp_saver.c	optional warp_saver
 dev/tdfx/tdfx_linux.c		optional tdfx_linux tdfx compat_linux
 dev/tdfx/tdfx_pci.c		optional tdfx pci
 dev/ti/if_ti.c			optional ti pci
 dev/tl/if_tl.c			optional tl pci
 dev/trm/trm.c			optional trm
 dev/twa/tw_cl_init.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_cl_intr.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_cl_io.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_cl_misc.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_osl_cam.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_osl_freebsd.c	optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twe/twe.c			optional twe
 dev/twe/twe_freebsd.c		optional twe
 dev/tws/tws.c			optional tws
 dev/tws/tws_cam.c		optional tws
 dev/tws/tws_hdm.c		optional tws
 dev/tws/tws_services.c		optional tws
 dev/tws/tws_user.c		optional tws
 dev/tx/if_tx.c			optional tx
 dev/txp/if_txp.c		optional txp
 dev/uart/uart_bus_acpi.c	optional uart acpi
 #dev/uart/uart_bus_cbus.c	optional uart cbus
 dev/uart/uart_bus_ebus.c	optional uart ebus
 dev/uart/uart_bus_fdt.c		optional uart fdt
 dev/uart/uart_bus_isa.c		optional uart isa
 dev/uart/uart_bus_pccard.c	optional uart pccard
 dev/uart/uart_bus_pci.c		optional uart pci
 dev/uart/uart_bus_puc.c		optional uart puc
 dev/uart/uart_bus_scc.c		optional uart scc
 dev/uart/uart_core.c		optional uart
 dev/uart/uart_dbg.c		optional uart gdb
 dev/uart/uart_dev_ns8250.c	optional uart uart_ns8250
 dev/uart/uart_dev_pl011.c	optional uart pl011
 dev/uart/uart_dev_quicc.c	optional uart quicc
 dev/uart/uart_dev_sab82532.c	optional uart uart_sab82532
 dev/uart/uart_dev_sab82532.c	optional uart scc
 dev/uart/uart_dev_z8530.c	optional uart uart_z8530
 dev/uart/uart_dev_z8530.c	optional uart scc
 dev/uart/uart_if.m		optional uart
 dev/uart/uart_subr.c		optional uart
 dev/uart/uart_tty.c		optional uart
 dev/ubsec/ubsec.c		optional ubsec
 #
 # USB controller drivers
 #
 dev/usb/controller/at91dci.c		optional at91dci
 dev/usb/controller/at91dci_atmelarm.c	optional at91dci at91rm9200
 dev/usb/controller/musb_otg.c		optional musb
 dev/usb/controller/musb_otg_atmelarm.c	optional musb at91rm9200
 dev/usb/controller/dwc_otg.c		optional dwcotg
 dev/usb/controller/dwc_otg_fdt.c	optional dwcotg fdt
 dev/usb/controller/ehci.c		optional ehci
 dev/usb/controller/ehci_pci.c		optional ehci pci
 dev/usb/controller/ohci.c		optional ohci
 dev/usb/controller/ohci_atmelarm.c	optional ohci at91rm9200
 dev/usb/controller/ohci_pci.c		optional ohci pci
 dev/usb/controller/uhci.c		optional uhci
 dev/usb/controller/uhci_pci.c		optional uhci pci
 dev/usb/controller/xhci.c		optional xhci
 dev/usb/controller/xhci_pci.c		optional xhci pci
 dev/usb/controller/saf1761_otg.c	optional saf1761otg
 dev/usb/controller/saf1761_otg_fdt.c	optional saf1761otg fdt
 dev/usb/controller/uss820dci.c		optional uss820dci
 dev/usb/controller/uss820dci_atmelarm.c	optional uss820dci at91rm9200
 dev/usb/controller/usb_controller.c	optional usb
 #
 # USB storage drivers
 #
 dev/usb/storage/umass.c		optional umass
 dev/usb/storage/urio.c		optional urio
 dev/usb/storage/ustorage_fs.c	optional usfs
 #
 # USB core
 #
 dev/usb/usb_busdma.c		optional usb
 dev/usb/usb_compat_linux.c	optional usb
 dev/usb/usb_core.c		optional usb
 dev/usb/usb_debug.c		optional usb
 dev/usb/usb_dev.c		optional usb
 dev/usb/usb_device.c		optional usb
 dev/usb/usb_dynamic.c		optional usb
 dev/usb/usb_error.c		optional usb
 dev/usb/usb_generic.c		optional usb
 dev/usb/usb_handle_request.c	optional usb
 dev/usb/usb_hid.c		optional usb
 dev/usb/usb_hub.c		optional usb
 dev/usb/usb_if.m		optional usb
 dev/usb/usb_lookup.c		optional usb
 dev/usb/usb_mbuf.c		optional usb
 dev/usb/usb_msctest.c		optional usb
 dev/usb/usb_parse.c		optional usb
 dev/usb/usb_pf.c		optional usb
 dev/usb/usb_process.c		optional usb
 dev/usb/usb_request.c		optional usb
 dev/usb/usb_transfer.c		optional usb
 dev/usb/usb_util.c		optional usb
 #
 # USB network drivers
 #
 dev/usb/net/if_aue.c		optional aue
 dev/usb/net/if_axe.c		optional axe
 dev/usb/net/if_axge.c		optional axge
 dev/usb/net/if_cdce.c		optional cdce
 dev/usb/net/if_cue.c		optional cue
 dev/usb/net/if_ipheth.c		optional ipheth
 dev/usb/net/if_kue.c		optional kue
 dev/usb/net/if_mos.c		optional mos
 dev/usb/net/if_rue.c		optional rue
 dev/usb/net/if_smsc.c		optional smsc
 dev/usb/net/if_udav.c		optional udav
 dev/usb/net/if_usie.c		optional usie
 dev/usb/net/if_urndis.c		optional urndis
 dev/usb/net/ruephy.c		optional rue
 dev/usb/net/usb_ethernet.c	optional aue | axe | axge | cdce | cue | kue | \
 					 mos | rue | smsc | udav | ipheth | \
 					 urndis
 dev/usb/net/uhso.c		optional uhso
 #
 # USB WLAN drivers
 #
 dev/usb/wlan/if_rsu.c		optional rsu
 rsu-rtl8712fw.c			optional rsu-rtl8712fw | rsufw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rsu-rtl8712fw.fw:rsu-rtl8712fw:120 -mrsu-rtl8712fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rsu-rtl8712fw.c"
 rsu-rtl8712fw.fwo		optional rsu-rtl8712fw | rsufw		\
 	dependency	"rsu-rtl8712fw.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rsu-rtl8712fw.fwo"
 rsu-rtl8712fw.fw		optional rsu-rtl8712.fw | rsufw		\
 	dependency	"$S/contrib/dev/rsu/rsu-rtl8712fw.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rsu-rtl8712fw.fw"
 dev/usb/wlan/if_rum.c		optional rum
 dev/usb/wlan/if_run.c		optional run
 runfw.c				optional runfw							\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk run.fw:runfw -mrunfw -c${.TARGET}"	\
 	no-implicit-rule before-depend local							\
 	clean		"runfw.c"
 runfw.fwo			optional runfw							\
 	dependency	"run.fw"								\
 	compile-with	"${NORMAL_FWO}"								\
 	no-implicit-rule									\
 	clean		"runfw.fwo"
 run.fw				optional runfw							\
 	dependency	"$S/contrib/dev/run/rt2870.fw.uu"					\
 	compile-with	"${NORMAL_FW}"								\
 	no-obj no-implicit-rule									\
 	clean		"run.fw"
 dev/usb/wlan/if_uath.c		optional uath
 dev/usb/wlan/if_upgt.c		optional upgt
 dev/usb/wlan/if_ural.c		optional ural
 dev/usb/wlan/if_urtw.c		optional urtw
 dev/usb/wlan/if_urtwn.c		optional urtwn
 urtwn-rtl8188eufw.c		optional urtwn-rtl8188eufw | urtwnfw	\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk urtwn-rtl8188eufw.fw:urtwn-rtl8188eufw:111 -murtwn-rtl8188eufw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"urtwn-rtl8188eufw.c"
 urtwn-rtl8188eufw.fwo		optional urtwn-rtl8188eufw | urtwnfw	\
 	dependency	"urtwn-rtl8188eufw.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"urtwn-rtl8188eufw.fwo"
 urtwn-rtl8188eufw.fw		optional urtwn-rtl8188eufw | urtwnfw	\
 	dependency	"$S/contrib/dev/urtwn/urtwn-rtl8188eufw.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"urtwn-rtl8188eufw.fw"
 urtwn-rtl8192cfwT.c		optional urtwn-rtl8192cfwT | urtwnfw	\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk urtwn-rtl8192cfwT.fw:urtwn-rtl8192cfwT:111 -murtwn-rtl8192cfwT -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"urtwn-rtl8192cfwT.c"
 urtwn-rtl8192cfwT.fwo		optional urtwn-rtl8192cfwT | urtwnfw	\
 	dependency	"urtwn-rtl8192cfwT.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"urtwn-rtl8192cfwT.fwo"
 urtwn-rtl8192cfwT.fw		optional urtwn-rtl8192cfwT | urtwnfw	\
 	dependency	"$S/contrib/dev/urtwn/urtwn-rtl8192cfwT.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"urtwn-rtl8192cfwT.fw"
 urtwn-rtl8192cfwU.c		optional urtwn-rtl8192cfwU | urtwnfw	\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk urtwn-rtl8192cfwU.fw:urtwn-rtl8192cfwU:111 -murtwn-rtl8192cfwU -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"urtwn-rtl8192cfwU.c"
 urtwn-rtl8192cfwU.fwo		optional urtwn-rtl8192cfwU | urtwnfw	\
 	dependency	"urtwn-rtl8192cfwU.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"urtwn-rtl8192cfwU.fwo"
 urtwn-rtl8192cfwU.fw		optional urtwn-rtl8192cfwU | urtwnfw	\
 	dependency	"$S/contrib/dev/urtwn/urtwn-rtl8192cfwU.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"urtwn-rtl8192cfwU.fw"
 
 dev/usb/wlan/if_zyd.c		optional zyd
 #
 # USB serial and parallel port drivers
 #
 dev/usb/serial/u3g.c		optional u3g
 dev/usb/serial/uark.c		optional uark
 dev/usb/serial/ubsa.c		optional ubsa
 dev/usb/serial/ubser.c		optional ubser
 dev/usb/serial/uchcom.c		optional uchcom
 dev/usb/serial/ucycom.c		optional ucycom
 dev/usb/serial/ufoma.c		optional ufoma
 dev/usb/serial/uftdi.c		optional uftdi
 dev/usb/serial/ugensa.c		optional ugensa
 dev/usb/serial/uipaq.c		optional uipaq
 dev/usb/serial/ulpt.c		optional ulpt
 dev/usb/serial/umcs.c		optional umcs
 dev/usb/serial/umct.c		optional umct
 dev/usb/serial/umodem.c		optional umodem
 dev/usb/serial/umoscom.c	optional umoscom
 dev/usb/serial/uplcom.c		optional uplcom
 dev/usb/serial/uslcom.c		optional uslcom
 dev/usb/serial/uvisor.c		optional uvisor
 dev/usb/serial/uvscom.c		optional uvscom
 dev/usb/serial/usb_serial.c 	optional ucom | u3g | uark | ubsa | ubser | \
 					 uchcom | ucycom | ufoma | uftdi | \
 					 ugensa | uipaq | umcs | umct | \
 					 umodem | umoscom | uplcom | usie | \
 					 uslcom | uvisor | uvscom
 #
 # USB misc drivers
 #
 dev/usb/misc/ufm.c		optional ufm
 dev/usb/misc/udbp.c		optional udbp
 dev/usb/misc/uled.c		optional uled
 #
 # USB input drivers
 #
 dev/usb/input/atp.c		optional atp
 dev/usb/input/uep.c		optional uep
 dev/usb/input/uhid.c		optional uhid
 dev/usb/input/ukbd.c		optional ukbd
 dev/usb/input/ums.c		optional ums
 dev/usb/input/wsp.c		optional wsp
 #
 # USB quirks
 #
 dev/usb/quirk/usb_quirk.c	optional usb
 #
 # USB templates
 #
 dev/usb/template/usb_template.c		optional usb_template
 dev/usb/template/usb_template_audio.c	optional usb_template
 dev/usb/template/usb_template_cdce.c	optional usb_template
 dev/usb/template/usb_template_kbd.c	optional usb_template
 dev/usb/template/usb_template_modem.c	optional usb_template
 dev/usb/template/usb_template_mouse.c	optional usb_template
 dev/usb/template/usb_template_msc.c	optional usb_template
 dev/usb/template/usb_template_mtp.c	optional usb_template
 dev/usb/template/usb_template_phone.c	optional usb_template
 dev/usb/template/usb_template_serialnet.c	optional usb_template
 #
 # USB END
 #
 dev/utopia/idtphy.c		optional utopia
 dev/utopia/suni.c		optional utopia
 dev/utopia/utopia.c		optional utopia
 dev/vge/if_vge.c		optional vge
 dev/viapm/viapm.c		optional viapm pci
 dev/virtio/virtio.c			optional	virtio
 dev/virtio/virtqueue.c			optional	virtio
 dev/virtio/virtio_bus_if.m		optional	virtio
 dev/virtio/virtio_if.m			optional	virtio
 dev/virtio/pci/virtio_pci.c		optional	virtio_pci
 dev/virtio/mmio/virtio_mmio.c		optional	virtio_mmio
 dev/virtio/mmio/virtio_mmio_if.m	optional	virtio_mmio
 dev/virtio/network/if_vtnet.c		optional	vtnet
 dev/virtio/block/virtio_blk.c		optional	virtio_blk
 dev/virtio/balloon/virtio_balloon.c	optional	virtio_balloon
 dev/virtio/scsi/virtio_scsi.c		optional	virtio_scsi
 dev/virtio/random/virtio_random.c	optional	virtio_random
 dev/virtio/console/virtio_console.c	optional	virtio_console
 dev/vkbd/vkbd.c			optional vkbd
 dev/vr/if_vr.c			optional vr pci
 dev/vt/colors/vt_termcolors.c	optional vt
 dev/vt/font/vt_font_default.c	optional vt
 dev/vt/font/vt_mouse_cursor.c	optional vt
 dev/vt/hw/efifb/efifb.c		optional vt_efifb
 dev/vt/hw/fb/vt_fb.c		optional vt
 dev/vt/hw/vga/vt_vga.c		optional vt vt_vga
 dev/vt/logo/logo_freebsd.c	optional vt splash
 dev/vt/vt_buf.c			optional vt
 dev/vt/vt_consolectl.c		optional vt
 dev/vt/vt_core.c		optional vt
 dev/vt/vt_font.c		optional vt
 dev/vt/vt_sysmouse.c		optional vt
 dev/vte/if_vte.c		optional vte pci
 dev/vx/if_vx.c			optional vx
 dev/vx/if_vx_eisa.c		optional vx eisa
 dev/vx/if_vx_pci.c		optional vx pci
 dev/vxge/vxge.c				optional vxge
 dev/vxge/vxgehal/vxgehal-ifmsg.c	optional vxge
 dev/vxge/vxgehal/vxgehal-mrpcim.c	optional vxge
 dev/vxge/vxgehal/vxge-queue.c		optional vxge
 dev/vxge/vxgehal/vxgehal-ring.c		optional vxge
 dev/vxge/vxgehal/vxgehal-swapper.c	optional vxge
 dev/vxge/vxgehal/vxgehal-mgmt.c		optional vxge
 dev/vxge/vxgehal/vxgehal-srpcim.c	optional vxge
 dev/vxge/vxgehal/vxgehal-config.c	optional vxge
 dev/vxge/vxgehal/vxgehal-blockpool.c	optional vxge
 dev/vxge/vxgehal/vxgehal-doorbells.c	optional vxge
 dev/vxge/vxgehal/vxgehal-mgmtaux.c	optional vxge
 dev/vxge/vxgehal/vxgehal-device.c	optional vxge
 dev/vxge/vxgehal/vxgehal-mm.c		optional vxge
 dev/vxge/vxgehal/vxgehal-driver.c	optional vxge
 dev/vxge/vxgehal/vxgehal-virtualpath.c	optional vxge
 dev/vxge/vxgehal/vxgehal-channel.c	optional vxge
 dev/vxge/vxgehal/vxgehal-fifo.c		optional vxge
 dev/watchdog/watchdog.c		standard
 dev/wb/if_wb.c			optional wb pci
 dev/wds/wd7000.c		optional wds isa
 dev/wi/if_wi.c			optional wi
 dev/wi/if_wi_pccard.c		optional wi pccard
 dev/wi/if_wi_pci.c		optional wi pci
 dev/wl/if_wl.c			optional wl isa
 dev/wpi/if_wpi.c		optional wpi pci
 wpifw.c			optional wpifw					\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk wpi.fw:wpifw:153229 -mwpi -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"wpifw.c"
 wpifw.fwo			optional wpifw				\
 	dependency	"wpi.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"wpifw.fwo"
 wpi.fw			optional wpifw					\
 	dependency	"$S/contrib/dev/wpi/iwlwifi-3945-15.32.2.9.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"wpi.fw"
 dev/xe/if_xe.c			optional xe
 dev/xe/if_xe_pccard.c		optional xe pccard
 dev/xen/balloon/balloon.c	optional xen | xenhvm
 dev/xen/blkfront/blkfront.c	optional xen | xenhvm
 dev/xen/blkback/blkback.c	optional xen | xenhvm
 dev/xen/console/console.c	optional xen | xenhvm
 dev/xen/console/xencons_ring.c	optional xen | xenhvm
 dev/xen/control/control.c	optional xen | xenhvm
 dev/xen/grant_table/grant_table.c	optional xen | xenhvm
 dev/xen/netback/netback.c	optional xen | xenhvm
 dev/xen/netfront/netfront.c	optional xen | xenhvm
 dev/xen/xenpci/xenpci.c		optional xenpci
 dev/xen/timer/timer.c		optional xen | xenhvm
 dev/xen/pvcpu/pvcpu.c		optional xen | xenhvm
 dev/xen/xenstore/xenstore.c	optional xen | xenhvm
 dev/xen/xenstore/xenstore_dev.c	optional xen | xenhvm
 dev/xen/xenstore/xenstored_dev.c	optional xen | xenhvm
 dev/xen/evtchn/evtchn_dev.c	optional xen | xenhvm
 dev/xen/privcmd/privcmd.c	optional xen | xenhvm
 dev/xl/if_xl.c			optional xl pci
 dev/xl/xlphy.c			optional xl pci
 fs/autofs/autofs.c		optional autofs
 fs/autofs/autofs_vfsops.c	optional autofs
 fs/autofs/autofs_vnops.c	optional autofs
 fs/deadfs/dead_vnops.c		standard
 fs/devfs/devfs_devs.c		standard
 fs/devfs/devfs_dir.c		standard
 fs/devfs/devfs_rule.c		standard
 fs/devfs/devfs_vfsops.c		standard
 fs/devfs/devfs_vnops.c		standard
 fs/fdescfs/fdesc_vfsops.c	optional fdescfs
 fs/fdescfs/fdesc_vnops.c	optional fdescfs
 fs/fifofs/fifo_vnops.c		standard
 fs/cuse/cuse.c			optional cuse
 fs/fuse/fuse_device.c		optional fuse
 fs/fuse/fuse_file.c		optional fuse
 fs/fuse/fuse_internal.c		optional fuse
 fs/fuse/fuse_io.c		optional fuse
 fs/fuse/fuse_ipc.c		optional fuse
 fs/fuse/fuse_main.c		optional fuse
 fs/fuse/fuse_node.c		optional fuse
 fs/fuse/fuse_vfsops.c		optional fuse
 fs/fuse/fuse_vnops.c		optional fuse
 fs/msdosfs/msdosfs_conv.c	optional msdosfs
 fs/msdosfs/msdosfs_denode.c	optional msdosfs
 fs/msdosfs/msdosfs_fat.c	optional msdosfs
 fs/msdosfs/msdosfs_fileno.c	optional msdosfs
 fs/msdosfs/msdosfs_iconv.c	optional msdosfs_iconv
 fs/msdosfs/msdosfs_lookup.c	optional msdosfs
 fs/msdosfs/msdosfs_vfsops.c	optional msdosfs
 fs/msdosfs/msdosfs_vnops.c	optional msdosfs
 fs/nandfs/bmap.c		optional nandfs
 fs/nandfs/nandfs_alloc.c	optional nandfs
 fs/nandfs/nandfs_bmap.c		optional nandfs
 fs/nandfs/nandfs_buffer.c	optional nandfs
 fs/nandfs/nandfs_cleaner.c	optional nandfs
 fs/nandfs/nandfs_cpfile.c	optional nandfs
 fs/nandfs/nandfs_dat.c		optional nandfs
 fs/nandfs/nandfs_dir.c		optional nandfs
 fs/nandfs/nandfs_ifile.c	optional nandfs
 fs/nandfs/nandfs_segment.c	optional nandfs
 fs/nandfs/nandfs_subr.c		optional nandfs
 fs/nandfs/nandfs_sufile.c	optional nandfs
 fs/nandfs/nandfs_vfsops.c	optional nandfs
 fs/nandfs/nandfs_vnops.c	optional nandfs
 fs/nfs/nfs_commonkrpc.c		optional nfscl | nfsd
 fs/nfs/nfs_commonsubs.c		optional nfscl | nfsd
 fs/nfs/nfs_commonport.c		optional nfscl | nfsd
 fs/nfs/nfs_commonacl.c		optional nfscl | nfsd
 fs/nfsclient/nfs_clcomsubs.c	optional nfscl
 fs/nfsclient/nfs_clsubs.c	optional nfscl
 fs/nfsclient/nfs_clstate.c	optional nfscl
 fs/nfsclient/nfs_clkrpc.c	optional nfscl
 fs/nfsclient/nfs_clrpcops.c	optional nfscl
 fs/nfsclient/nfs_clvnops.c	optional nfscl
 fs/nfsclient/nfs_clnode.c	optional nfscl
 fs/nfsclient/nfs_clvfsops.c	optional nfscl
 fs/nfsclient/nfs_clport.c	optional nfscl
 fs/nfsclient/nfs_clbio.c	optional nfscl
 fs/nfsclient/nfs_clnfsiod.c	optional nfscl
 fs/nfsserver/nfs_fha_new.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdsocket.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdsubs.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdstate.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdkrpc.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdserv.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdport.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdcache.c	optional nfsd inet
 fs/nullfs/null_subr.c		optional nullfs
 fs/nullfs/null_vfsops.c		optional nullfs
 fs/nullfs/null_vnops.c		optional nullfs
 fs/procfs/procfs.c		optional procfs
 fs/procfs/procfs_ctl.c		optional procfs
 fs/procfs/procfs_dbregs.c	optional procfs
 fs/procfs/procfs_fpregs.c	optional procfs
 fs/procfs/procfs_ioctl.c	optional procfs
 fs/procfs/procfs_map.c		optional procfs
 fs/procfs/procfs_mem.c		optional procfs
 fs/procfs/procfs_note.c		optional procfs
 fs/procfs/procfs_osrel.c	optional procfs
 fs/procfs/procfs_regs.c		optional procfs
 fs/procfs/procfs_rlimit.c	optional procfs
 fs/procfs/procfs_status.c	optional procfs
 fs/procfs/procfs_type.c		optional procfs
 fs/pseudofs/pseudofs.c		optional pseudofs
 fs/pseudofs/pseudofs_fileno.c	optional pseudofs
 fs/pseudofs/pseudofs_vncache.c	optional pseudofs
 fs/pseudofs/pseudofs_vnops.c	optional pseudofs
 fs/smbfs/smbfs_io.c		optional smbfs
 fs/smbfs/smbfs_node.c		optional smbfs
 fs/smbfs/smbfs_smb.c		optional smbfs
 fs/smbfs/smbfs_subr.c		optional smbfs
 fs/smbfs/smbfs_vfsops.c		optional smbfs
 fs/smbfs/smbfs_vnops.c		optional smbfs
 fs/udf/osta.c			optional udf
 fs/udf/udf_iconv.c		optional udf_iconv
 fs/udf/udf_vfsops.c		optional udf
 fs/udf/udf_vnops.c		optional udf
 fs/unionfs/union_subr.c		optional unionfs
 fs/unionfs/union_vfsops.c	optional unionfs
 fs/unionfs/union_vnops.c	optional unionfs
 fs/tmpfs/tmpfs_vnops.c		optional tmpfs
 fs/tmpfs/tmpfs_fifoops.c 	optional tmpfs
 fs/tmpfs/tmpfs_vfsops.c 	optional tmpfs
 fs/tmpfs/tmpfs_subr.c 		optional tmpfs
 gdb/gdb_cons.c			optional gdb
 gdb/gdb_main.c			optional gdb
 gdb/gdb_packet.c		optional gdb
 geom/bde/g_bde.c		optional geom_bde
 geom/bde/g_bde_crypt.c		optional geom_bde
 geom/bde/g_bde_lock.c		optional geom_bde
 geom/bde/g_bde_work.c		optional geom_bde
 geom/cache/g_cache.c		optional geom_cache
 geom/concat/g_concat.c		optional geom_concat
 geom/eli/g_eli.c		optional geom_eli
 geom/eli/g_eli_crypto.c		optional geom_eli
 geom/eli/g_eli_ctl.c		optional geom_eli
 geom/eli/g_eli_integrity.c	optional geom_eli
 geom/eli/g_eli_key.c		optional geom_eli
 geom/eli/g_eli_key_cache.c	optional geom_eli
 geom/eli/g_eli_privacy.c	optional geom_eli
 geom/eli/pkcs5v2.c		optional geom_eli
 geom/gate/g_gate.c		optional geom_gate
 geom/geom_aes.c			optional geom_aes
 geom/geom_bsd.c			optional geom_bsd
 geom/geom_bsd_enc.c		optional geom_bsd | geom_part_bsd
 geom/geom_ccd.c			optional ccd | geom_ccd
 geom/geom_ctl.c			standard
 geom/geom_dev.c			standard
 geom/geom_disk.c		standard
 geom/geom_dump.c		standard
 geom/geom_event.c		standard
 geom/geom_fox.c			optional geom_fox
 geom/geom_flashmap.c		optional fdt cfi | fdt nand
 geom/geom_io.c			standard
 geom/geom_kern.c		standard
 geom/geom_map.c			optional geom_map
 geom/geom_mbr.c			optional geom_mbr
 geom/geom_mbr_enc.c		optional geom_mbr
 geom/geom_pc98.c		optional geom_pc98
 geom/geom_pc98_enc.c		optional geom_pc98
 geom/geom_redboot.c		optional geom_redboot
 geom/geom_slice.c		standard
 geom/geom_subr.c		standard
 geom/geom_sunlabel.c		optional geom_sunlabel
 geom/geom_sunlabel_enc.c	optional geom_sunlabel
 geom/geom_vfs.c			standard
 geom/geom_vol_ffs.c		optional geom_vol
 geom/journal/g_journal.c	optional geom_journal
 geom/journal/g_journal_ufs.c	optional geom_journal
 geom/label/g_label.c		optional geom_label | geom_label_gpt
 geom/label/g_label_ext2fs.c	optional geom_label
 geom/label/g_label_iso9660.c	optional geom_label
 geom/label/g_label_msdosfs.c	optional geom_label
 geom/label/g_label_ntfs.c	optional geom_label
 geom/label/g_label_reiserfs.c	optional geom_label
 geom/label/g_label_ufs.c	optional geom_label
 geom/label/g_label_gpt.c	optional geom_label | geom_label_gpt
 geom/label/g_label_disk_ident.c	optional geom_label
 geom/linux_lvm/g_linux_lvm.c	optional geom_linux_lvm
 geom/mirror/g_mirror.c		optional geom_mirror
 geom/mirror/g_mirror_ctl.c	optional geom_mirror
 geom/mountver/g_mountver.c	optional geom_mountver
 geom/multipath/g_multipath.c	optional geom_multipath
 geom/nop/g_nop.c		optional geom_nop
 geom/part/g_part.c		standard
 geom/part/g_part_if.m		standard
 geom/part/g_part_apm.c		optional geom_part_apm
 geom/part/g_part_bsd.c		optional geom_part_bsd
 geom/part/g_part_bsd64.c	optional geom_part_bsd64
 geom/part/g_part_ebr.c		optional geom_part_ebr
 geom/part/g_part_gpt.c		optional geom_part_gpt
 geom/part/g_part_ldm.c		optional geom_part_ldm
 geom/part/g_part_mbr.c		optional geom_part_mbr
 geom/part/g_part_pc98.c		optional geom_part_pc98
 geom/part/g_part_vtoc8.c	optional geom_part_vtoc8
 geom/raid/g_raid.c		optional geom_raid
 geom/raid/g_raid_ctl.c		optional geom_raid
 geom/raid/g_raid_md_if.m	optional geom_raid
 geom/raid/g_raid_tr_if.m	optional geom_raid
 geom/raid/md_ddf.c		optional geom_raid
 geom/raid/md_intel.c		optional geom_raid
 geom/raid/md_jmicron.c		optional geom_raid
 geom/raid/md_nvidia.c		optional geom_raid
 geom/raid/md_promise.c		optional geom_raid
 geom/raid/md_sii.c		optional geom_raid
 geom/raid/tr_concat.c		optional geom_raid
 geom/raid/tr_raid0.c		optional geom_raid
 geom/raid/tr_raid1.c		optional geom_raid
 geom/raid/tr_raid1e.c		optional geom_raid
 geom/raid/tr_raid5.c		optional geom_raid
 geom/raid3/g_raid3.c		optional geom_raid3
 geom/raid3/g_raid3_ctl.c	optional geom_raid3
 geom/shsec/g_shsec.c		optional geom_shsec
 geom/stripe/g_stripe.c		optional geom_stripe
 geom/uncompress/g_uncompress.c	optional geom_uncompress
 contrib/xz-embedded/freebsd/xz_malloc.c	\
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 contrib/xz-embedded/linux/lib/xz/xz_crc32.c \
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 contrib/xz-embedded/linux/lib/xz/xz_dec_bcj.c \
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 contrib/xz-embedded/linux/lib/xz/xz_dec_lzma2.c \
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 contrib/xz-embedded/linux/lib/xz/xz_dec_stream.c \
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 geom/uzip/g_uzip.c		optional geom_uzip
 geom/vinum/geom_vinum.c		optional geom_vinum
 geom/vinum/geom_vinum_create.c	optional geom_vinum
 geom/vinum/geom_vinum_drive.c	optional geom_vinum
 geom/vinum/geom_vinum_plex.c	optional geom_vinum
 geom/vinum/geom_vinum_volume.c	optional geom_vinum
 geom/vinum/geom_vinum_subr.c	optional geom_vinum
 geom/vinum/geom_vinum_raid5.c	optional geom_vinum
 geom/vinum/geom_vinum_share.c	optional geom_vinum
 geom/vinum/geom_vinum_list.c	optional geom_vinum
 geom/vinum/geom_vinum_rm.c	optional geom_vinum
 geom/vinum/geom_vinum_init.c	optional geom_vinum
 geom/vinum/geom_vinum_state.c	optional geom_vinum
 geom/vinum/geom_vinum_rename.c	optional geom_vinum
 geom/vinum/geom_vinum_move.c	optional geom_vinum
 geom/vinum/geom_vinum_events.c	optional geom_vinum
 geom/virstor/binstream.c	optional geom_virstor
 geom/virstor/g_virstor.c	optional geom_virstor
 geom/virstor/g_virstor_md.c	optional geom_virstor
 geom/zero/g_zero.c		optional geom_zero
 fs/ext2fs/ext2_alloc.c		optional ext2fs
 fs/ext2fs/ext2_balloc.c		optional ext2fs
 fs/ext2fs/ext2_bmap.c		optional ext2fs
 fs/ext2fs/ext2_extents.c	optional ext2fs
 fs/ext2fs/ext2_inode.c		optional ext2fs
 fs/ext2fs/ext2_inode_cnv.c	optional ext2fs
 fs/ext2fs/ext2_hash.c		optional ext2fs
 fs/ext2fs/ext2_htree.c		optional ext2fs
 fs/ext2fs/ext2_lookup.c		optional ext2fs
 fs/ext2fs/ext2_subr.c		optional ext2fs
 fs/ext2fs/ext2_vfsops.c		optional ext2fs
 fs/ext2fs/ext2_vnops.c		optional ext2fs
 gnu/fs/reiserfs/reiserfs_hashes.c	optional reiserfs \
 	warning "kernel contains GPL contaminated ReiserFS filesystem"
 gnu/fs/reiserfs/reiserfs_inode.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_item_ops.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_namei.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_prints.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_stree.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_vfsops.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_vnops.c	optional reiserfs
 #
 isa/isa_if.m			standard
 isa/isa_common.c		optional isa
 isa/isahint.c			optional isa
 isa/pnp.c			optional isa isapnp
 isa/pnpparse.c			optional isa isapnp
 fs/cd9660/cd9660_bmap.c	optional cd9660
 fs/cd9660/cd9660_lookup.c	optional cd9660
 fs/cd9660/cd9660_node.c	optional cd9660
 fs/cd9660/cd9660_rrip.c	optional cd9660
 fs/cd9660/cd9660_util.c	optional cd9660
 fs/cd9660/cd9660_vfsops.c	optional cd9660
 fs/cd9660/cd9660_vnops.c	optional cd9660
 fs/cd9660/cd9660_iconv.c	optional cd9660_iconv
 kern/bus_if.m			standard
 kern/clock_if.m			standard
 kern/cpufreq_if.m		standard
 kern/device_if.m		standard
 kern/imgact_binmisc.c		optional	imagact_binmisc
 kern/imgact_elf.c		standard
 kern/imgact_elf32.c		optional compat_freebsd32
 kern/imgact_shell.c		standard
 kern/inflate.c			optional gzip
 kern/init_main.c		standard
 kern/init_sysent.c		standard
 kern/ksched.c			optional _kposix_priority_scheduling
 kern/kern_acct.c		standard
 kern/kern_alq.c			optional alq
 kern/kern_clock.c		standard
 kern/kern_condvar.c		standard
 kern/kern_conf.c		standard
 kern/kern_cons.c		standard
 kern/kern_cpu.c			standard
 kern/kern_cpuset.c		standard
 kern/kern_context.c		standard
 kern/kern_descrip.c		standard
 kern/kern_dtrace.c		optional kdtrace_hooks
 kern/kern_dump.c		standard
 kern/kern_environment.c		standard
 kern/kern_et.c			standard
 kern/kern_event.c		standard
 kern/kern_exec.c		standard
 kern/kern_exit.c		standard
 kern/kern_fail.c		standard
 kern/kern_ffclock.c		standard
 kern/kern_fork.c		standard
 kern/kern_gzio.c		optional gzio
 kern/kern_hhook.c		standard
 kern/kern_idle.c		standard
 kern/kern_intr.c		standard
 kern/kern_jail.c		standard
 kern/kern_khelp.c		standard
 kern/kern_kthread.c		standard
 kern/kern_ktr.c			optional ktr
 kern/kern_ktrace.c		standard
 kern/kern_linker.c		standard
 kern/kern_lock.c		standard
 kern/kern_lockf.c		standard
 kern/kern_lockstat.c		optional kdtrace_hooks
 kern/kern_loginclass.c		standard
 kern/kern_malloc.c		standard
 kern/kern_mbuf.c		standard
 kern/kern_mib.c			standard
 kern/kern_module.c		standard
 kern/kern_mtxpool.c		standard
 kern/kern_mutex.c		standard
 kern/kern_ntptime.c		standard
 kern/kern_osd.c			standard
 kern/kern_physio.c		standard
 kern/kern_pmc.c			standard
 kern/kern_poll.c		optional device_polling
 kern/kern_priv.c		standard
 kern/kern_proc.c		standard
 kern/kern_procctl.c		standard
 kern/kern_prot.c		standard
 kern/kern_racct.c		standard
 kern/kern_rangelock.c		standard
 kern/kern_rctl.c		standard
 kern/kern_resource.c		standard
 kern/kern_rmlock.c		standard
 kern/kern_rwlock.c		standard
 kern/kern_sdt.c			optional kdtrace_hooks
 kern/kern_sema.c		standard
 kern/kern_sharedpage.c		standard
 kern/kern_shutdown.c		standard
 kern/kern_sig.c			standard
 kern/kern_switch.c		standard
 kern/kern_sx.c			standard
 kern/kern_synch.c		standard
 kern/kern_syscalls.c		standard
 kern/kern_sysctl.c		standard
 kern/kern_tc.c			standard
 kern/kern_thr.c			standard
 kern/kern_thread.c		standard
 kern/kern_time.c		standard
 kern/kern_timeout.c		standard
 kern/kern_umtx.c		standard
 kern/kern_uuid.c		standard
 kern/kern_xxx.c			standard
 kern/link_elf.c			standard
 kern/linker_if.m		standard
 kern/md4c.c			optional netsmb
 kern/md5c.c			standard
 kern/p1003_1b.c			standard
 kern/posix4_mib.c		standard
 kern/sched_4bsd.c		optional sched_4bsd
 kern/sched_ule.c		optional sched_ule
 kern/serdev_if.m		standard
 kern/stack_protector.c		standard \
 	compile-with "${NORMAL_C:N-fstack-protector*}"
 kern/subr_acl_nfs4.c		optional ufs_acl | zfs
 kern/subr_acl_posix1e.c		optional ufs_acl
 kern/subr_autoconf.c		standard
 kern/subr_blist.c		standard
 kern/subr_bus.c			standard
 kern/subr_bus_dma.c		standard
 kern/subr_bufring.c		standard
 kern/subr_capability.c		standard
 kern/subr_clock.c		standard
 kern/subr_counter.c		standard
 kern/subr_devstat.c		standard
 kern/subr_disk.c		standard
 kern/subr_eventhandler.c	standard
 kern/subr_fattime.c		standard
 kern/subr_firmware.c		optional firmware
 kern/subr_hash.c		standard
 kern/subr_hints.c		standard
 kern/subr_kdb.c			standard
 kern/subr_kobj.c		standard
 kern/subr_lock.c		standard
 kern/subr_log.c			standard
 kern/subr_mbpool.c		optional libmbpool
 kern/subr_mchain.c		optional libmchain
 kern/subr_module.c		standard
 kern/subr_msgbuf.c		standard
 kern/subr_param.c		standard
 kern/subr_pcpu.c		standard
 kern/subr_pctrie.c		standard
 kern/subr_power.c		standard
 kern/subr_prf.c			standard
 kern/subr_prof.c		standard
 kern/subr_rman.c		standard
 kern/subr_rtc.c			standard
 kern/subr_sbuf.c		standard
 kern/subr_scanf.c		standard
 kern/subr_sglist.c		standard
 kern/subr_sleepqueue.c		standard
 kern/subr_smp.c			standard
 kern/subr_stack.c		optional ddb | stack | ktr
 kern/subr_taskqueue.c		standard
 kern/subr_terminal.c		optional vt
 kern/subr_trap.c		standard
 kern/subr_turnstile.c		standard
 kern/subr_uio.c			standard
 kern/subr_unit.c		standard
 kern/subr_vmem.c		standard
 kern/subr_witness.c		optional witness
 kern/sys_capability.c		standard
 kern/sys_generic.c		standard
 kern/sys_pipe.c			standard
 kern/sys_procdesc.c		standard
 kern/sys_process.c		standard
 kern/sys_socket.c		standard
 kern/syscalls.c			standard
 kern/sysv_ipc.c			standard
 kern/sysv_msg.c			optional sysvmsg
 kern/sysv_sem.c			optional sysvsem
 kern/sysv_shm.c			optional sysvshm
 kern/tty.c			standard
 kern/tty_compat.c		optional compat_43tty
 kern/tty_info.c			standard
 kern/tty_inq.c			standard
 kern/tty_outq.c			standard
 kern/tty_pts.c			standard
 kern/tty_tty.c			standard
 kern/tty_ttydisc.c		standard
 kern/uipc_accf.c		standard
 kern/uipc_debug.c		optional ddb
 kern/uipc_domain.c		standard
 kern/uipc_mbuf.c		standard
 kern/uipc_mbuf2.c		standard
 kern/uipc_mqueue.c		optional p1003_1b_mqueue
 kern/uipc_sem.c			optional p1003_1b_semaphores
 kern/uipc_shm.c			standard
 kern/uipc_sockbuf.c		standard
 kern/uipc_socket.c		standard
 kern/uipc_syscalls.c		standard
 kern/uipc_usrreq.c		standard
 kern/vfs_acl.c			standard
 kern/vfs_aio.c			optional vfs_aio
 kern/vfs_bio.c			standard
 kern/vfs_cache.c		standard
 kern/vfs_cluster.c		standard
 kern/vfs_default.c		standard
 kern/vfs_export.c		standard
 kern/vfs_extattr.c		standard
 kern/vfs_hash.c			standard
 kern/vfs_init.c			standard
 kern/vfs_lookup.c		standard
 kern/vfs_mount.c		standard
 kern/vfs_mountroot.c		standard
 kern/vfs_subr.c			standard
 kern/vfs_syscalls.c		standard
 kern/vfs_vnops.c		standard
 #
 # Kernel GSS-API
 #
 gssd.h				optional kgssapi			\
 	dependency		"$S/kgssapi/gssd.x"			\
 	compile-with		"RPCGEN_CPP='${CPP}' rpcgen -hM $S/kgssapi/gssd.x | grep -v pthread.h > gssd.h" \
 	no-obj no-implicit-rule before-depend local			\
 	clean			"gssd.h"
 gssd_xdr.c			optional kgssapi			\
 	dependency		"$S/kgssapi/gssd.x gssd.h"		\
 	compile-with		"RPCGEN_CPP='${CPP}' rpcgen -c $S/kgssapi/gssd.x -o gssd_xdr.c" \
 	no-implicit-rule before-depend local				\
 	clean			"gssd_xdr.c"
 gssd_clnt.c			optional kgssapi			\
 	dependency		"$S/kgssapi/gssd.x gssd.h"		\
 	compile-with		"RPCGEN_CPP='${CPP}' rpcgen -lM $S/kgssapi/gssd.x | grep -v string.h > gssd_clnt.c" \
 	no-implicit-rule before-depend local				\
 	clean			"gssd_clnt.c"
 kgssapi/gss_accept_sec_context.c optional kgssapi
 kgssapi/gss_add_oid_set_member.c optional kgssapi
 kgssapi/gss_acquire_cred.c	optional kgssapi
 kgssapi/gss_canonicalize_name.c	optional kgssapi
 kgssapi/gss_create_empty_oid_set.c optional kgssapi
 kgssapi/gss_delete_sec_context.c optional kgssapi
 kgssapi/gss_display_status.c	optional kgssapi
 kgssapi/gss_export_name.c	optional kgssapi
 kgssapi/gss_get_mic.c		optional kgssapi
 kgssapi/gss_init_sec_context.c	optional kgssapi
 kgssapi/gss_impl.c		optional kgssapi
 kgssapi/gss_import_name.c	optional kgssapi
 kgssapi/gss_names.c		optional kgssapi
 kgssapi/gss_pname_to_uid.c	optional kgssapi
 kgssapi/gss_release_buffer.c	optional kgssapi
 kgssapi/gss_release_cred.c	optional kgssapi
 kgssapi/gss_release_name.c	optional kgssapi
 kgssapi/gss_release_oid_set.c	optional kgssapi
 kgssapi/gss_set_cred_option.c	optional kgssapi
 kgssapi/gss_test_oid_set_member.c optional kgssapi
 kgssapi/gss_unwrap.c		optional kgssapi
 kgssapi/gss_verify_mic.c	optional kgssapi
 kgssapi/gss_wrap.c		optional kgssapi
 kgssapi/gss_wrap_size_limit.c	optional kgssapi
 kgssapi/gssd_prot.c		optional kgssapi
 kgssapi/krb5/krb5_mech.c	optional kgssapi
 kgssapi/krb5/kcrypto.c		optional kgssapi
 kgssapi/krb5/kcrypto_aes.c	optional kgssapi
 kgssapi/krb5/kcrypto_arcfour.c	optional kgssapi
 kgssapi/krb5/kcrypto_des.c	optional kgssapi
 kgssapi/krb5/kcrypto_des3.c	optional kgssapi
 kgssapi/kgss_if.m		optional kgssapi
 kgssapi/gsstest.c		optional kgssapi_debug
 # These files in libkern/ are those needed by all architectures.  Some
 # of the files in libkern/ are only needed on some architectures, e.g.,
 # libkern/divdi3.c is needed by i386 but not alpha.  Also, some of these
 # routines may be optimized for a particular platform.  In either case,
 # the file should be moved to conf/files.<arch> from here.
 #
 libkern/arc4random.c		standard
 libkern/bcd.c			standard
 libkern/bsearch.c		standard
 libkern/crc32.c			standard
 libkern/explicit_bzero.c	standard
 libkern/fnmatch.c		standard
 libkern/iconv.c			optional libiconv
 libkern/iconv_converter_if.m	optional libiconv
 libkern/iconv_ucs.c		optional libiconv
 libkern/iconv_xlat.c		optional libiconv
 libkern/iconv_xlat16.c		optional libiconv
 libkern/inet_aton.c		standard
 libkern/inet_ntoa.c		standard
 libkern/inet_ntop.c		standard
 libkern/inet_pton.c		standard
 libkern/jenkins_hash.c		standard
 libkern/murmur3_32.c		standard
 libkern/mcount.c		optional profiling-routine
 libkern/memcchr.c		standard
 libkern/memchr.c		optional fdt | gdb
 libkern/memcmp.c		standard
 libkern/memmem.c		optional gdb
 libkern/qsort.c			standard
 libkern/qsort_r.c		standard
 libkern/random.c		standard
 libkern/scanc.c			standard
 libkern/strcasecmp.c		standard
 libkern/strcat.c		standard
 libkern/strchr.c		standard
 libkern/strcmp.c		standard
 libkern/strcpy.c		standard
 libkern/strcspn.c		standard
 libkern/strdup.c		standard
 libkern/strndup.c		standard
 libkern/strlcat.c		standard
 libkern/strlcpy.c		standard
 libkern/strlen.c		standard
 libkern/strncmp.c		standard
 libkern/strncpy.c		standard
 libkern/strnlen.c		standard
 libkern/strrchr.c		standard
 libkern/strsep.c		standard
 libkern/strspn.c		standard
 libkern/strstr.c		standard
 libkern/strtol.c		standard
 libkern/strtoq.c		standard
 libkern/strtoul.c		standard
 libkern/strtouq.c		standard
 libkern/strvalid.c		standard
 libkern/timingsafe_bcmp.c	standard
 net/bpf.c			standard
 net/bpf_buffer.c		optional bpf
 net/bpf_jitter.c		optional bpf_jitter
 net/bpf_filter.c		optional bpf | netgraph_bpf
 net/bpf_zerocopy.c		optional bpf
 net/bridgestp.c			optional bridge | if_bridge
 net/flowtable.c			optional flowtable inet | flowtable inet6
 net/ieee8023ad_lacp.c		optional lagg
 net/if.c			standard
 net/if_arcsubr.c		optional arcnet
 net/if_atmsubr.c		optional atm
 net/if_bridge.c			optional bridge inet | if_bridge inet
 net/if_clone.c			standard
 net/if_dead.c			standard
 net/if_debug.c			optional ddb
 net/if_disc.c			optional disc
 net/if_edsc.c			optional edsc
 net/if_enc.c			optional enc ipsec inet | enc ipsec inet6
 net/if_epair.c			optional epair
 net/if_ethersubr.c		optional ether
 net/if_fddisubr.c		optional fddi
 net/if_fwsubr.c			optional fwip
 net/if_gif.c			optional gif inet | gif inet6 | \
 					 netgraph_gif inet | netgraph_gif inet6
 net/if_gre.c			optional gre inet | gre inet6
 net/if_iso88025subr.c		optional token
 net/if_lagg.c			optional lagg
 net/if_loop.c			optional loop
 net/if_llatbl.c			standard
 net/if_me.c			optional me inet
 net/if_media.c			standard
 net/if_mib.c			standard
 net/if_spppfr.c			optional sppp | netgraph_sppp
 net/if_spppsubr.c		optional sppp | netgraph_sppp
 net/if_stf.c			optional stf inet inet6
 net/if_tun.c			optional tun
 net/if_tap.c			optional tap
 net/if_vlan.c			optional vlan
 net/if_vxlan.c			optional vxlan inet | vxlan inet6
 net/mppcc.c			optional netgraph_mppc_compression
 net/mppcd.c			optional netgraph_mppc_compression
 net/netisr.c			standard
 net/pfil.c			optional ether | inet
 net/radix.c			standard
 net/radix_mpath.c		standard
 net/raw_cb.c			standard
 net/raw_usrreq.c		standard
 net/route.c			standard
+net/rss_config.c		optional inet rss | inet6 rss
 net/rtsock.c			standard
 net/slcompress.c		optional netgraph_vjc | sppp | \
 					 netgraph_sppp
+net/toeplitz.c			optional inet rss | inet6 rss
 net/vnet.c			optional vimage
 net/zlib.c			optional crypto | geom_uzip | ipsec | \
 					 mxge | netgraph_deflate | \
 					 ddb_ctf | gzio | geom_uncompress
 net80211/ieee80211.c		optional wlan
 net80211/ieee80211_acl.c	optional wlan wlan_acl
 net80211/ieee80211_action.c	optional wlan
 net80211/ieee80211_ageq.c	optional wlan
 net80211/ieee80211_adhoc.c	optional wlan \
 	compile-with "${NORMAL_C} -Wno-unused-function"
 net80211/ieee80211_ageq.c	optional wlan
 net80211/ieee80211_amrr.c	optional wlan | wlan_amrr
 net80211/ieee80211_crypto.c	optional wlan \
 	compile-with "${NORMAL_C} -Wno-unused-function"
 net80211/ieee80211_crypto_ccmp.c optional wlan wlan_ccmp
 net80211/ieee80211_crypto_none.c optional wlan
 net80211/ieee80211_crypto_tkip.c optional wlan wlan_tkip
 net80211/ieee80211_crypto_wep.c	optional wlan wlan_wep
 net80211/ieee80211_ddb.c	optional wlan ddb
 net80211/ieee80211_dfs.c	optional wlan
 net80211/ieee80211_freebsd.c	optional wlan
 net80211/ieee80211_hostap.c	optional wlan \
 	compile-with "${NORMAL_C} -Wno-unused-function"
 net80211/ieee80211_ht.c		optional wlan
 net80211/ieee80211_hwmp.c	optional wlan ieee80211_support_mesh
 net80211/ieee80211_input.c	optional wlan
 net80211/ieee80211_ioctl.c	optional wlan
 net80211/ieee80211_mesh.c	optional wlan ieee80211_support_mesh \
 	compile-with "${NORMAL_C} -Wno-unused-function"
 net80211/ieee80211_monitor.c	optional wlan
 net80211/ieee80211_node.c	optional wlan
 net80211/ieee80211_output.c	optional wlan
 net80211/ieee80211_phy.c	optional wlan
 net80211/ieee80211_power.c	optional wlan
 net80211/ieee80211_proto.c	optional wlan
 net80211/ieee80211_radiotap.c	optional wlan
 net80211/ieee80211_ratectl.c	optional wlan
 net80211/ieee80211_ratectl_none.c optional wlan
 net80211/ieee80211_regdomain.c	optional wlan
 net80211/ieee80211_rssadapt.c	optional wlan wlan_rssadapt
 net80211/ieee80211_scan.c	optional wlan
 net80211/ieee80211_scan_sta.c	optional wlan
 net80211/ieee80211_sta.c	optional wlan \
 	compile-with "${NORMAL_C} -Wno-unused-function"
 net80211/ieee80211_superg.c	optional wlan ieee80211_support_superg
 net80211/ieee80211_scan_sw.c	optional wlan
 net80211/ieee80211_tdma.c	optional wlan ieee80211_support_tdma
 net80211/ieee80211_wds.c	optional wlan
 net80211/ieee80211_xauth.c	optional wlan wlan_xauth
 net80211/ieee80211_alq.c	optional wlan ieee80211_alq
 netgraph/atm/ccatm/ng_ccatm.c	optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/atm/ng_atm.c		optional ngatm_atm
 netgraph/atm/ngatmbase.c	optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/atm/sscfu/ng_sscfu.c	optional ngatm_sscfu \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/atm/sscop/ng_sscop.c optional ngatm_sscop \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/atm/uni/ng_uni.c	optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/bluetooth/common/ng_bluetooth.c optional netgraph_bluetooth
 netgraph/bluetooth/drivers/bt3c/ng_bt3c_pccard.c optional netgraph_bluetooth_bt3c
 netgraph/bluetooth/drivers/h4/ng_h4.c optional netgraph_bluetooth_h4
 netgraph/bluetooth/drivers/ubt/ng_ubt.c optional netgraph_bluetooth_ubt usb
 netgraph/bluetooth/drivers/ubtbcmfw/ubtbcmfw.c optional netgraph_bluetooth_ubtbcmfw usb
 netgraph/bluetooth/hci/ng_hci_cmds.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/hci/ng_hci_evnt.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/hci/ng_hci_main.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/hci/ng_hci_misc.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/hci/ng_hci_ulpi.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/l2cap/ng_l2cap_cmds.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_evnt.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_llpi.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_main.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_misc.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_ulpi.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/socket/ng_btsocket.c optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_hci_raw.c	optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_l2cap.c optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_rfcomm.c optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_sco.c optional netgraph_bluetooth_socket
 netgraph/netflow/netflow.c	optional netgraph_netflow
 netgraph/netflow/netflow_v9.c	optional netgraph_netflow
 netgraph/netflow/ng_netflow.c	optional netgraph_netflow
 netgraph/ng_UI.c		optional netgraph_UI
 netgraph/ng_async.c		optional netgraph_async
 netgraph/ng_atmllc.c		optional netgraph_atmllc
 netgraph/ng_base.c		optional netgraph
 netgraph/ng_bpf.c		optional netgraph_bpf
 netgraph/ng_bridge.c		optional netgraph_bridge
 netgraph/ng_car.c		optional netgraph_car
 netgraph/ng_cisco.c		optional netgraph_cisco
 netgraph/ng_deflate.c		optional netgraph_deflate
 netgraph/ng_device.c		optional netgraph_device
 netgraph/ng_echo.c		optional netgraph_echo
 netgraph/ng_eiface.c		optional netgraph_eiface
 netgraph/ng_ether.c		optional netgraph_ether
 netgraph/ng_ether_echo.c	optional netgraph_ether_echo
 netgraph/ng_frame_relay.c	optional netgraph_frame_relay
 netgraph/ng_gif.c		optional netgraph_gif inet6 | netgraph_gif inet
 netgraph/ng_gif_demux.c		optional netgraph_gif_demux
 netgraph/ng_hole.c		optional netgraph_hole
 netgraph/ng_iface.c		optional netgraph_iface
 netgraph/ng_ip_input.c		optional netgraph_ip_input
 netgraph/ng_ipfw.c		optional netgraph_ipfw inet ipfirewall
 netgraph/ng_ksocket.c		optional netgraph_ksocket
 netgraph/ng_l2tp.c		optional netgraph_l2tp
 netgraph/ng_lmi.c		optional netgraph_lmi
 netgraph/ng_mppc.c		optional netgraph_mppc_compression | \
 					 netgraph_mppc_encryption
 netgraph/ng_nat.c		optional netgraph_nat inet libalias
 netgraph/ng_one2many.c		optional netgraph_one2many
 netgraph/ng_parse.c		optional netgraph
 netgraph/ng_patch.c		optional netgraph_patch
 netgraph/ng_pipe.c		optional netgraph_pipe
 netgraph/ng_ppp.c		optional netgraph_ppp
 netgraph/ng_pppoe.c		optional netgraph_pppoe
 netgraph/ng_pptpgre.c		optional netgraph_pptpgre
 netgraph/ng_pred1.c		optional netgraph_pred1
 netgraph/ng_rfc1490.c		optional netgraph_rfc1490
 netgraph/ng_socket.c		optional netgraph_socket
 netgraph/ng_split.c		optional netgraph_split
 netgraph/ng_sppp.c		optional netgraph_sppp
 netgraph/ng_tag.c		optional netgraph_tag
 netgraph/ng_tcpmss.c		optional netgraph_tcpmss
 netgraph/ng_tee.c		optional netgraph_tee
 netgraph/ng_tty.c		optional netgraph_tty
 netgraph/ng_vjc.c		optional netgraph_vjc
 netgraph/ng_vlan.c		optional netgraph_vlan
 netinet/accf_data.c		optional accept_filter_data inet
 netinet/accf_dns.c		optional accept_filter_dns inet
 netinet/accf_http.c		optional accept_filter_http inet
 netinet/if_atm.c		optional atm
 netinet/if_ether.c		optional inet ether
 netinet/igmp.c			optional inet
 netinet/in.c			optional inet
 netinet/in_debug.c		optional inet ddb
 netinet/in_kdtrace.c		optional inet | inet6
 netinet/ip_carp.c		optional inet carp | inet6 carp
 netinet/in_gif.c		optional gif inet | netgraph_gif inet
 netinet/ip_gre.c		optional gre inet
 netinet/ip_id.c			optional inet
 netinet/in_mcast.c		optional inet
 netinet/in_pcb.c		optional inet | inet6
 netinet/in_pcbgroup.c		optional inet pcbgroup | inet6 pcbgroup
 netinet/in_proto.c		optional inet | inet6
 netinet/in_rmx.c		optional inet
-netinet/in_rss.c		optional inet rss | inet6 rss
+netinet/in_rss.c		optional inet rss
 netinet/ip_divert.c		optional inet ipdivert ipfirewall
 netinet/ip_ecn.c		optional inet | inet6
 netinet/ip_encap.c		optional inet | inet6
 netinet/ip_fastfwd.c		optional inet
 netinet/ip_icmp.c		optional inet | inet6
 netinet/ip_input.c		optional inet
 netinet/ip_ipsec.c		optional inet ipsec
 netinet/ip_mroute.c		optional mrouting inet
 netinet/ip_options.c		optional inet
 netinet/ip_output.c		optional inet
 netinet/raw_ip.c		optional inet | inet6
 netinet/cc/cc.c			optional inet | inet6
 netinet/cc/cc_newreno.c		optional inet | inet6
 netinet/sctp_asconf.c		optional inet sctp | inet6 sctp
 netinet/sctp_auth.c		optional inet sctp | inet6 sctp
 netinet/sctp_bsd_addr.c		optional inet sctp | inet6 sctp
 netinet/sctp_cc_functions.c	optional inet sctp | inet6 sctp
 netinet/sctp_crc32.c		optional inet sctp | inet6 sctp
 netinet/sctp_indata.c		optional inet sctp | inet6 sctp
 netinet/sctp_input.c		optional inet sctp | inet6 sctp
 netinet/sctp_output.c		optional inet sctp | inet6 sctp
 netinet/sctp_pcb.c		optional inet sctp | inet6 sctp
 netinet/sctp_peeloff.c		optional inet sctp | inet6 sctp
 netinet/sctp_ss_functions.c	optional inet sctp | inet6 sctp
 netinet/sctp_syscalls.c		optional inet sctp | inet6 sctp
 netinet/sctp_sysctl.c		optional inet sctp | inet6 sctp
 netinet/sctp_timer.c		optional inet sctp | inet6 sctp
 netinet/sctp_usrreq.c		optional inet sctp | inet6 sctp
 netinet/sctputil.c		optional inet sctp | inet6 sctp
 netinet/tcp_debug.c		optional tcpdebug
 netinet/tcp_hostcache.c		optional inet | inet6
 netinet/tcp_input.c		optional inet | inet6
 netinet/tcp_lro.c		optional inet | inet6
 netinet/tcp_output.c		optional inet | inet6
 netinet/tcp_offload.c		optional tcp_offload inet | tcp_offload inet6
 netinet/tcp_reass.c		optional inet | inet6
 netinet/tcp_sack.c		optional inet | inet6
 netinet/tcp_subr.c		optional inet | inet6
 netinet/tcp_syncache.c		optional inet | inet6
 netinet/tcp_timer.c		optional inet | inet6
 netinet/tcp_timewait.c		optional inet | inet6
 netinet/tcp_usrreq.c		optional inet | inet6
-netinet/toeplitz.c		optional inet rss | inet6 rss
 netinet/udp_usrreq.c		optional inet | inet6
 netinet/libalias/alias.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_db.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_mod.c	optional libalias | netgraph_nat
 netinet/libalias/alias_proxy.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_util.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_sctp.c	optional libalias inet | netgraph_nat inet
 netinet6/dest6.c		optional inet6
 netinet6/frag6.c		optional inet6
 netinet6/icmp6.c		optional inet6
 netinet6/in6.c			optional inet6
 netinet6/in6_cksum.c		optional inet6
 netinet6/in6_gif.c		optional gif inet6 | netgraph_gif inet6
 netinet6/in6_ifattach.c		optional inet6
 netinet6/in6_mcast.c		optional inet6
 netinet6/in6_pcb.c		optional inet6
 netinet6/in6_pcbgroup.c		optional inet6 pcbgroup
 netinet6/in6_proto.c		optional inet6
 netinet6/in6_rmx.c		optional inet6
+netinet6/in6_rss.c		optional inet6 rss
 netinet6/in6_src.c		optional inet6
 netinet6/ip6_forward.c		optional inet6
 netinet6/ip6_gre.c		optional gre inet6
 netinet6/ip6_id.c		optional inet6
 netinet6/ip6_input.c		optional inet6
 netinet6/ip6_mroute.c		optional mrouting inet6
 netinet6/ip6_output.c		optional inet6
 netinet6/ip6_ipsec.c		optional inet6 ipsec
 netinet6/mld6.c			optional inet6
 netinet6/nd6.c			optional inet6
 netinet6/nd6_nbr.c		optional inet6
 netinet6/nd6_rtr.c		optional inet6
 netinet6/raw_ip6.c		optional inet6
 netinet6/route6.c		optional inet6
 netinet6/scope6.c		optional inet6
 netinet6/sctp6_usrreq.c		optional inet6 sctp
 netinet6/udp6_usrreq.c		optional inet6
 netipsec/ipsec.c		optional ipsec inet | ipsec inet6
 netipsec/ipsec_input.c		optional ipsec inet | ipsec inet6
 netipsec/ipsec_mbuf.c		optional ipsec inet | ipsec inet6
 netipsec/ipsec_output.c		optional ipsec inet | ipsec inet6
 netipsec/key.c			optional ipsec inet | ipsec inet6
 netipsec/key_debug.c		optional ipsec inet | ipsec inet6
 netipsec/keysock.c		optional ipsec inet | ipsec inet6
 netipsec/xform_ah.c		optional ipsec inet | ipsec inet6
 netipsec/xform_esp.c		optional ipsec inet | ipsec inet6
 netipsec/xform_ipcomp.c		optional ipsec inet | ipsec inet6
 netipsec/xform_ipip.c		optional ipsec inet | ipsec inet6
 netipsec/xform_tcp.c		optional ipsec inet tcp_signature | \
 					 ipsec inet6 tcp_signature
 netnatm/natm.c			optional natm
 netnatm/natm_pcb.c		optional natm
 netnatm/natm_proto.c		optional natm
 netpfil/ipfw/dn_heap.c		optional inet dummynet
 netpfil/ipfw/dn_sched_fifo.c	optional inet dummynet
 netpfil/ipfw/dn_sched_prio.c	optional inet dummynet
 netpfil/ipfw/dn_sched_qfq.c	optional inet dummynet
 netpfil/ipfw/dn_sched_rr.c	optional inet dummynet
 netpfil/ipfw/dn_sched_wf2q.c	optional inet dummynet
 netpfil/ipfw/ip_dummynet.c	optional inet dummynet
 netpfil/ipfw/ip_dn_io.c		optional inet dummynet
 netpfil/ipfw/ip_dn_glue.c	optional inet dummynet
 netpfil/ipfw/ip_fw2.c		optional inet ipfirewall
 netpfil/ipfw/ip_fw_dynamic.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_log.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_pfil.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_sockopt.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_table.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_table_algo.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_table_value.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_iface.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_nat.c	optional inet ipfirewall_nat
 netpfil/pf/if_pflog.c		optional pflog pf inet
 netpfil/pf/if_pfsync.c		optional pfsync pf inet
 netpfil/pf/pf.c			optional pf inet
 netpfil/pf/pf_if.c		optional pf inet
 netpfil/pf/pf_ioctl.c		optional pf inet
 netpfil/pf/pf_lb.c		optional pf inet
 netpfil/pf/pf_norm.c		optional pf inet
 netpfil/pf/pf_osfp.c		optional pf inet
 netpfil/pf/pf_ruleset.c		optional pf inet
 netpfil/pf/pf_table.c		optional pf inet
 netpfil/pf/in4_cksum.c		optional pf inet
 netsmb/smb_conn.c		optional netsmb
 netsmb/smb_crypt.c		optional netsmb
 netsmb/smb_dev.c		optional netsmb
 netsmb/smb_iod.c		optional netsmb
 netsmb/smb_rq.c			optional netsmb
 netsmb/smb_smb.c		optional netsmb
 netsmb/smb_subr.c		optional netsmb
 netsmb/smb_trantcp.c		optional netsmb
 netsmb/smb_usr.c		optional netsmb
 nfs/bootp_subr.c		optional bootp nfscl
 nfs/krpc_subr.c			optional bootp nfscl
 nfs/nfs_diskless.c		optional nfscl nfs_root
 nfs/nfs_fha.c			optional nfsd
 nfs/nfs_lock.c			optional nfscl | nfslockd | nfsd
 nfs/nfs_nfssvc.c		optional nfscl | nfsd
 nlm/nlm_advlock.c		optional nfslockd | nfsd
 nlm/nlm_prot_clnt.c		optional nfslockd | nfsd
 nlm/nlm_prot_impl.c		optional nfslockd | nfsd
 nlm/nlm_prot_server.c		optional nfslockd | nfsd
 nlm/nlm_prot_svc.c		optional nfslockd | nfsd
 nlm/nlm_prot_xdr.c		optional nfslockd | nfsd
 nlm/sm_inter_xdr.c		optional nfslockd | nfsd
 
 # Linux Kernel Compatibility API
 ofed/include/linux/linux_kmod.c			optional ofed | compat_linuxapi \
 	no-depend compile-with "${OFED_C}"
 ofed/include/linux/linux_compat.c		optional ofed | compat_linuxapi \
 	no-depend compile-with "${OFED_C}"
 ofed/include/linux/linux_pci.c			optional ofed | compat_linuxapi \
 	no-depend compile-with "${OFED_C}"
 ofed/include/linux/linux_idr.c			optional ofed | compat_linuxapi \
 	no-depend compile-with "${OFED_C}"
 ofed/include/linux/linux_radix.c		optional ofed | compat_linuxapi \
 	no-depend compile-with "${OFED_C}"
 # OpenFabrics Enterprise Distribution (Infiniband)
 ofed/drivers/infiniband/core/addr.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/agent.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/cache.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 # XXX Mad.c must be ordered before cm.c for sysinit sets to occur in
 # the correct order.
 ofed/drivers/infiniband/core/mad.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/cm.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/ -Wno-unused-function"
 ofed/drivers/infiniband/core/cma.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/device.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/fmr_pool.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/iwcm.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/local_sa.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/mad_rmpp.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/multicast.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/notice.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/packer.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/sa_query.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/smi.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/sysfs.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/ucm.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/ucma.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/ud_header.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/umem.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/user_mad.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/uverbs_cmd.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/uverbs_main.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/uverbs_marshall.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/verbs.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 
 ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c	optional ipoib		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 #ofed/drivers/infiniband/ulp/ipoib/ipoib_fs.c	optional ipoib		\
 #	no-depend							\
 #	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c	optional ipoib		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c	optional ipoib		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c	optional ipoib	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	optional ipoib		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 #ofed/drivers/infiniband/ulp/ipoib/ipoib_vlan.c	optional ipoib		\
 #	no-depend							\
 #	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 
 ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c	optional sdp inet	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 ofed/drivers/infiniband/ulp/sdp/sdp_main.c	optional sdp inet 	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 ofed/drivers/infiniband/ulp/sdp/sdp_rx.c	optional sdp inet 	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 ofed/drivers/infiniband/ulp/sdp/sdp_cma.c	optional sdp inet 	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 ofed/drivers/infiniband/ulp/sdp/sdp_tx.c	optional sdp inet 	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 
 ofed/drivers/infiniband/hw/mlx4/alias_GUID.c    optional mlx4ib         \
         no-depend obj-prefix "mlx4ib_"                                  \
         compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/mcg.c           optional mlx4ib         \
         no-depend obj-prefix "mlx4ib_"                                  \
         compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/sysfs.c         optional mlx4ib         \
         no-depend obj-prefix "mlx4ib_"                                  \
         compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/cm.c            optional mlx4ib         \
         no-depend obj-prefix "mlx4ib_"                                  \
         compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/ah.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/cq.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/doorbell.c	optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/mad.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/main.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/mr.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/qp.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/srq.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/wc.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 
 ofed/drivers/net/mlx4/alloc.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/catas.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/cmd.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/cq.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/eq.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/fw.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/icm.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/intf.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/main.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/mcg.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/ -Wno-unused"
 ofed/drivers/net/mlx4/mr.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/pd.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/port.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/profile.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/qp.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/reset.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/sense.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/srq.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/resource_tracker.c        optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/sys_tune.c		optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 
 ofed/drivers/net/mlx4/en_cq.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/utils.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_main.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_netdev.c		optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_port.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_resources.c		optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_rx.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_tx.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 
 ofed/drivers/infiniband/hw/mthca/mthca_allocator.c	optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_av.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_catas.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_cmd.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_cq.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_eq.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mad.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_main.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mcg.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_memfree.c	optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mr.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_pd.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_profile.c	optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_provider.c	optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_qp.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_reset.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_srq.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_uar.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 
 # crypto support
 opencrypto/cast.c		optional crypto | ipsec
 opencrypto/criov.c		optional crypto
 opencrypto/crypto.c		optional crypto
 opencrypto/cryptodev.c		optional cryptodev
 opencrypto/cryptodev_if.m	optional crypto
 opencrypto/cryptosoft.c		optional crypto
 opencrypto/cryptodeflate.c	optional crypto
 opencrypto/gmac.c		optional crypto
 opencrypto/gfmult.c		optional crypto
 opencrypto/rmd160.c		optional crypto | ipsec
 opencrypto/skipjack.c		optional crypto
 opencrypto/xform.c		optional crypto
 rpc/auth_none.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/auth_unix.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/authunix_prot.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/clnt_bck.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/clnt_dg.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/clnt_rc.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/clnt_vc.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/getnetconfig.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/replay.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/rpc_callmsg.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/rpc_generic.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/rpc_prot.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/rpcb_clnt.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/rpcb_prot.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/svc.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/svc_auth.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/svc_auth_unix.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/svc_dg.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/svc_generic.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/svc_vc.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/rpcsec_gss/rpcsec_gss.c	optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 rpc/rpcsec_gss/rpcsec_gss_conf.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 rpc/rpcsec_gss/rpcsec_gss_misc.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 rpc/rpcsec_gss/rpcsec_gss_prot.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 rpc/rpcsec_gss/svc_rpcsec_gss.c	optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 security/audit/audit.c		optional audit
 security/audit/audit_arg.c	optional audit
 security/audit/audit_bsm.c	optional audit
 security/audit/audit_bsm_klib.c	optional audit
 security/audit/audit_pipe.c	optional audit
 security/audit/audit_syscalls.c	standard
 security/audit/audit_trigger.c	optional audit
 security/audit/audit_worker.c	optional audit
 security/audit/bsm_domain.c	optional audit
 security/audit/bsm_errno.c	optional audit
 security/audit/bsm_fcntl.c	optional audit
 security/audit/bsm_socket_type.c	optional audit
 security/audit/bsm_token.c	optional audit
 security/mac/mac_audit.c	optional mac audit
 security/mac/mac_cred.c		optional mac
 security/mac/mac_framework.c	optional mac
 security/mac/mac_inet.c		optional mac inet | mac inet6
 security/mac/mac_inet6.c	optional mac inet6
 security/mac/mac_label.c	optional mac
 security/mac/mac_net.c		optional mac
 security/mac/mac_pipe.c		optional mac
 security/mac/mac_posix_sem.c	optional mac
 security/mac/mac_posix_shm.c	optional mac
 security/mac/mac_priv.c		optional mac
 security/mac/mac_process.c	optional mac
 security/mac/mac_socket.c	optional mac
 security/mac/mac_syscalls.c	standard
 security/mac/mac_system.c	optional mac
 security/mac/mac_sysv_msg.c	optional mac
 security/mac/mac_sysv_sem.c	optional mac
 security/mac/mac_sysv_shm.c	optional mac
 security/mac/mac_vfs.c		optional mac
 security/mac_biba/mac_biba.c	optional mac_biba
 security/mac_bsdextended/mac_bsdextended.c	optional mac_bsdextended
 security/mac_bsdextended/ugidfw_system.c	optional mac_bsdextended
 security/mac_bsdextended/ugidfw_vnode.c		optional mac_bsdextended
 security/mac_ifoff/mac_ifoff.c	optional mac_ifoff
 security/mac_lomac/mac_lomac.c	optional mac_lomac
 security/mac_mls/mac_mls.c	optional mac_mls
 security/mac_none/mac_none.c	optional mac_none
 security/mac_partition/mac_partition.c optional mac_partition
 security/mac_portacl/mac_portacl.c optional mac_portacl
 security/mac_seeotheruids/mac_seeotheruids.c optional mac_seeotheruids
 security/mac_stub/mac_stub.c	optional mac_stub
 security/mac_test/mac_test.c	optional mac_test
 teken/teken.c			optional sc | vt
 ufs/ffs/ffs_alloc.c		optional ffs
 ufs/ffs/ffs_balloc.c		optional ffs
 ufs/ffs/ffs_inode.c		optional ffs
 ufs/ffs/ffs_snapshot.c		optional ffs
 ufs/ffs/ffs_softdep.c		optional ffs
 ufs/ffs/ffs_subr.c		optional ffs
 ufs/ffs/ffs_tables.c		optional ffs
 ufs/ffs/ffs_vfsops.c		optional ffs
 ufs/ffs/ffs_vnops.c		optional ffs
 ufs/ffs/ffs_rawread.c		optional ffs directio
 ufs/ffs/ffs_suspend.c		optional ffs
 ufs/ufs/ufs_acl.c		optional ffs
 ufs/ufs/ufs_bmap.c		optional ffs
 ufs/ufs/ufs_dirhash.c		optional ffs
 ufs/ufs/ufs_extattr.c		optional ffs
 ufs/ufs/ufs_gjournal.c		optional ffs UFS_GJOURNAL
 ufs/ufs/ufs_inode.c		optional ffs
 ufs/ufs/ufs_lookup.c		optional ffs
 ufs/ufs/ufs_quota.c		optional ffs
 ufs/ufs/ufs_vfsops.c		optional ffs
 ufs/ufs/ufs_vnops.c		optional ffs
 vm/default_pager.c		standard
 vm/device_pager.c		standard
 vm/phys_pager.c			standard
 vm/redzone.c			optional DEBUG_REDZONE
 vm/sg_pager.c			standard
 vm/swap_pager.c			standard
 vm/uma_core.c			standard
 vm/uma_dbg.c			standard
 vm/memguard.c			optional DEBUG_MEMGUARD
 vm/vm_fault.c			standard
 vm/vm_glue.c			standard
 vm/vm_init.c			standard
 vm/vm_kern.c			standard
 vm/vm_map.c			standard
 vm/vm_meter.c			standard
 vm/vm_mmap.c			standard
 vm/vm_object.c			standard
 vm/vm_page.c			standard
 vm/vm_pageout.c			standard
 vm/vm_pager.c			standard
 vm/vm_phys.c			standard
 vm/vm_radix.c			standard
 vm/vm_reserv.c			standard
 vm/vm_unix.c			standard
 vm/vm_zeroidle.c		standard
 vm/vnode_pager.c		standard
 xen/features.c			optional xen | xenhvm
 xen/xenbus/xenbus_if.m		optional xen | xenhvm
 xen/xenbus/xenbus.c		optional xen | xenhvm
 xen/xenbus/xenbusb_if.m		optional xen | xenhvm
 xen/xenbus/xenbusb.c		optional xen | xenhvm
 xen/xenbus/xenbusb_front.c	optional xen | xenhvm
 xen/xenbus/xenbusb_back.c	optional xen | xenhvm
 xdr/xdr.c			optional krpc | nfslockd | nfscl | nfsd
 xdr/xdr_array.c			optional krpc | nfslockd | nfscl | nfsd
 xdr/xdr_mbuf.c			optional krpc | nfslockd | nfscl | nfsd
 xdr/xdr_mem.c			optional krpc | nfslockd | nfscl | nfsd
 xdr/xdr_reference.c		optional krpc | nfslockd | nfscl | nfsd
 xdr/xdr_sizeof.c		optional krpc | nfslockd | nfscl | nfsd
Index: head/sys/dev/e1000/if_igb.c
===================================================================
--- head/sys/dev/e1000/if_igb.c	(revision 277330)
+++ head/sys/dev/e1000/if_igb.c	(revision 277331)
@@ -1,6382 +1,6382 @@
 /******************************************************************************
 
   Copyright (c) 2001-2013, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
   modification, are permitted provided that the following conditions are met:
   
    1. Redistributions of source code must retain the above copyright notice, 
       this list of conditions and the following disclaimer.
   
    2. Redistributions in binary form must reproduce the above copyright 
       notice, this list of conditions and the following disclaimer in the 
       documentation and/or other materials provided with the distribution.
   
    3. Neither the name of the Intel Corporation nor the names of its 
       contributors may be used to endorse or promote products derived from 
       this software without specific prior written permission.
   
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   POSSIBILITY OF SUCH DAMAGE.
 
 ******************************************************************************/
 /*$FreeBSD$*/
 
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_rss.h"
 
 #ifdef HAVE_KERNEL_OPTION_HEADERS
 #include "opt_device_polling.h"
 #include "opt_altq.h"
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #ifndef IGB_LEGACY_TX
 #include <sys/buf_ring.h>
 #endif
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/rman.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/eventhandler.h>
 #include <sys/pcpu.h>
 #include <sys/smp.h>
 #include <machine/smp.h>
 #include <machine/bus.h>
 #include <machine/resource.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
+#ifdef	RSS
+#include <net/rss_config.h>
+#endif
 
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_lro.h>
 #include <netinet/udp.h>
-#ifdef	RSS
-#include <netinet/in_rss.h>
-#endif
 
 #include <machine/in_cksum.h>
 #include <dev/led/led.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
 #include "e1000_api.h"
 #include "e1000_82575.h"
 #include "if_igb.h"
 
 /*********************************************************************
  *  Set this to one to display debug statistics
  *********************************************************************/
 int	igb_display_debug_stats = 0;
 
 /*********************************************************************
  *  Driver version:
  *********************************************************************/
 char igb_driver_version[] = "version - 2.4.0";
 
 
 /*********************************************************************
  *  PCI Device ID Table
  *
  *  Used by probe to select devices to load on
  *  Last field stores an index into e1000_strings
  *  Last entry must be all 0s
  *
  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
  *********************************************************************/
 
 static igb_vendor_info_t igb_vendor_info_array[] =
 {
 	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I210_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I210_COPPER_IT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I210_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I210_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I210_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I211_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_I354_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	/* required last entry */
 	{ 0, 0, 0, 0, 0}
 };
 
 /*********************************************************************
  *  Table of branding strings for all supported NICs.
  *********************************************************************/
 
 static char *igb_strings[] = {
 	"Intel(R) PRO/1000 Network Connection"
 };
 
 /*********************************************************************
  *  Function prototypes
  *********************************************************************/
 static int	igb_probe(device_t);
 static int	igb_attach(device_t);
 static int	igb_detach(device_t);
 static int	igb_shutdown(device_t);
 static int	igb_suspend(device_t);
 static int	igb_resume(device_t);
 #ifndef IGB_LEGACY_TX
 static int	igb_mq_start(struct ifnet *, struct mbuf *);
 static int	igb_mq_start_locked(struct ifnet *, struct tx_ring *);
 static void	igb_qflush(struct ifnet *);
 static void	igb_deferred_mq_start(void *, int);
 #else
 static void	igb_start(struct ifnet *);
 static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
 #endif
 static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
 static uint64_t	igb_get_counter(if_t, ift_counter);
 static void	igb_init(void *);
 static void	igb_init_locked(struct adapter *);
 static void	igb_stop(void *);
 static void	igb_media_status(struct ifnet *, struct ifmediareq *);
 static int	igb_media_change(struct ifnet *);
 static void	igb_identify_hardware(struct adapter *);
 static int	igb_allocate_pci_resources(struct adapter *);
 static int	igb_allocate_msix(struct adapter *);
 static int	igb_allocate_legacy(struct adapter *);
 static int	igb_setup_msix(struct adapter *);
 static void	igb_free_pci_resources(struct adapter *);
 static void	igb_local_timer(void *);
 static void	igb_reset(struct adapter *);
 static int	igb_setup_interface(device_t, struct adapter *);
 static int	igb_allocate_queues(struct adapter *);
 static void	igb_configure_queues(struct adapter *);
 
 static int	igb_allocate_transmit_buffers(struct tx_ring *);
 static void	igb_setup_transmit_structures(struct adapter *);
 static void	igb_setup_transmit_ring(struct tx_ring *);
 static void	igb_initialize_transmit_units(struct adapter *);
 static void	igb_free_transmit_structures(struct adapter *);
 static void	igb_free_transmit_buffers(struct tx_ring *);
 
 static int	igb_allocate_receive_buffers(struct rx_ring *);
 static int	igb_setup_receive_structures(struct adapter *);
 static int	igb_setup_receive_ring(struct rx_ring *);
 static void	igb_initialize_receive_units(struct adapter *);
 static void	igb_free_receive_structures(struct adapter *);
 static void	igb_free_receive_buffers(struct rx_ring *);
 static void	igb_free_receive_ring(struct rx_ring *);
 
 static void	igb_enable_intr(struct adapter *);
 static void	igb_disable_intr(struct adapter *);
 static void	igb_update_stats_counters(struct adapter *);
 static bool	igb_txeof(struct tx_ring *);
 
 static __inline	void igb_rx_discard(struct rx_ring *, int);
 static __inline void igb_rx_input(struct rx_ring *,
 		    struct ifnet *, struct mbuf *, u32);
 
 static bool	igb_rxeof(struct igb_queue *, int, int *);
 static void	igb_rx_checksum(u32, struct mbuf *, u32);
 static int	igb_tx_ctx_setup(struct tx_ring *,
 		    struct mbuf *, u32 *, u32 *);
 static int	igb_tso_setup(struct tx_ring *,
 		    struct mbuf *, u32 *, u32 *);
 static void	igb_set_promisc(struct adapter *);
 static void	igb_disable_promisc(struct adapter *);
 static void	igb_set_multi(struct adapter *);
 static void	igb_update_link_status(struct adapter *);
 static void	igb_refresh_mbufs(struct rx_ring *, int);
 
 static void	igb_register_vlan(void *, struct ifnet *, u16);
 static void	igb_unregister_vlan(void *, struct ifnet *, u16);
 static void	igb_setup_vlan_hw_support(struct adapter *);
 
 static int	igb_xmit(struct tx_ring *, struct mbuf **);
 static int	igb_dma_malloc(struct adapter *, bus_size_t,
 		    struct igb_dma_alloc *, int);
 static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
 static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
 static void	igb_print_nvm_info(struct adapter *);
 static int 	igb_is_valid_ether_addr(u8 *);
 static void     igb_add_hw_stats(struct adapter *);
 
 static void	igb_vf_init_stats(struct adapter *);
 static void	igb_update_vf_stats_counters(struct adapter *);
 
 /* Management and WOL Support */
 static void	igb_init_manageability(struct adapter *);
 static void	igb_release_manageability(struct adapter *);
 static void     igb_get_hw_control(struct adapter *);
 static void     igb_release_hw_control(struct adapter *);
 static void     igb_enable_wakeup(device_t);
 static void     igb_led_func(void *, int);
 
 static int	igb_irq_fast(void *);
 static void	igb_msix_que(void *);
 static void	igb_msix_link(void *);
 static void	igb_handle_que(void *context, int pending);
 static void	igb_handle_link(void *context, int pending);
 static void	igb_handle_link_locked(struct adapter *);
 
 static void	igb_set_sysctl_value(struct adapter *, const char *,
 		    const char *, int *, int);
 static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
 static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
 static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
 
 #ifdef DEVICE_POLLING
 static poll_handler_t igb_poll;
 #endif /* POLLING */
 
 /*********************************************************************
  *  FreeBSD Device Interface Entry Points
  *********************************************************************/
 
 static device_method_t igb_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, igb_probe),
 	DEVMETHOD(device_attach, igb_attach),
 	DEVMETHOD(device_detach, igb_detach),
 	DEVMETHOD(device_shutdown, igb_shutdown),
 	DEVMETHOD(device_suspend, igb_suspend),
 	DEVMETHOD(device_resume, igb_resume),
 	DEVMETHOD_END
 };
 
 static driver_t igb_driver = {
 	"igb", igb_methods, sizeof(struct adapter),
 };
 
 static devclass_t igb_devclass;
 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
 MODULE_DEPEND(igb, pci, 1, 1, 1);
 MODULE_DEPEND(igb, ether, 1, 1, 1);
 
 /*********************************************************************
  *  Tunable default values.
  *********************************************************************/
 
 static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
 
 /* Descriptor defaults */
 static int igb_rxd = IGB_DEFAULT_RXD;
 static int igb_txd = IGB_DEFAULT_TXD;
 SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
     "Number of receive descriptors per queue");
 SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
     "Number of transmit descriptors per queue");
 
 /*
 ** AIM: Adaptive Interrupt Moderation
 ** which means that the interrupt rate
 ** is varied over time based on the
 ** traffic for that interrupt vector
 */
 static int igb_enable_aim = TRUE;
 SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
     "Enable adaptive interrupt moderation");
 
 /*
  * MSIX should be the default for best performance,
  * but this allows it to be forced off for testing.
  */         
 static int igb_enable_msix = 1;
 SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
     "Enable MSI-X interrupts");
 
 /*
 ** Tuneable Interrupt rate
 */
 static int igb_max_interrupt_rate = 8000;
 SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
     &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
 
 #ifndef IGB_LEGACY_TX
 /*
 ** Tuneable number of buffers in the buf-ring (drbr_xxx)
 */
 static int igb_buf_ring_size = IGB_BR_SIZE;
 SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
     &igb_buf_ring_size, 0, "Size of the bufring");
 #endif
 
 /*
 ** Header split causes the packet header to
 ** be dma'd to a seperate mbuf from the payload.
 ** this can have memory alignment benefits. But
 ** another plus is that small packets often fit
 ** into the header and thus use no cluster. Its
 ** a very workload dependent type feature.
 */
 static int igb_header_split = FALSE;
 SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
     "Enable receive mbuf header split");
 
 /*
 ** This will autoconfigure based on the
 ** number of CPUs and max supported
 ** MSIX messages if left at 0.
 */
 static int igb_num_queues = 0;
 SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
     "Number of queues to configure, 0 indicates autoconfigure");
 
 /*
 ** Global variable to store last used CPU when binding queues
 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
 ** queue is bound to a cpu.
 */
 static int igb_last_bind_cpu = -1;
 
 /* How many packets rxeof tries to clean at a time */
 static int igb_rx_process_limit = 100;
 SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
     &igb_rx_process_limit, 0,
     "Maximum number of received packets to process at a time, -1 means unlimited");
 
 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
 #include <dev/netmap/if_igb_netmap.h>
 #endif /* DEV_NETMAP */
 /*********************************************************************
  *  Device identification routine
  *
  *  igb_probe determines if the driver should be loaded on
  *  adapter based on PCI vendor/device id of the adapter.
  *
  *  return BUS_PROBE_DEFAULT on success, positive on failure
  *********************************************************************/
 
 static int
 igb_probe(device_t dev)
 {
 	char		adapter_name[60];
 	uint16_t	pci_vendor_id = 0;
 	uint16_t	pci_device_id = 0;
 	uint16_t	pci_subvendor_id = 0;
 	uint16_t	pci_subdevice_id = 0;
 	igb_vendor_info_t *ent;
 
 	INIT_DEBUGOUT("igb_probe: begin");
 
 	pci_vendor_id = pci_get_vendor(dev);
 	if (pci_vendor_id != IGB_VENDOR_ID)
 		return (ENXIO);
 
 	pci_device_id = pci_get_device(dev);
 	pci_subvendor_id = pci_get_subvendor(dev);
 	pci_subdevice_id = pci_get_subdevice(dev);
 
 	ent = igb_vendor_info_array;
 	while (ent->vendor_id != 0) {
 		if ((pci_vendor_id == ent->vendor_id) &&
 		    (pci_device_id == ent->device_id) &&
 
 		    ((pci_subvendor_id == ent->subvendor_id) ||
 		    (ent->subvendor_id == PCI_ANY_ID)) &&
 
 		    ((pci_subdevice_id == ent->subdevice_id) ||
 		    (ent->subdevice_id == PCI_ANY_ID))) {
 			sprintf(adapter_name, "%s %s",
 				igb_strings[ent->index],
 				igb_driver_version);
 			device_set_desc_copy(dev, adapter_name);
 			return (BUS_PROBE_DEFAULT);
 		}
 		ent++;
 	}
 
 	return (ENXIO);
 }
 
 /*********************************************************************
  *  Device initialization routine
  *
  *  The attach entry point is called when the driver is being loaded.
  *  This routine identifies the type of hardware, allocates all resources
  *  and initializes the hardware.
  *
  *  return 0 on success, positive on failure
  *********************************************************************/
 
 static int
 igb_attach(device_t dev)
 {
 	struct adapter	*adapter;
 	int		error = 0;
 	u16		eeprom_data;
 
 	INIT_DEBUGOUT("igb_attach: begin");
 
 	if (resource_disabled("igb", device_get_unit(dev))) {
 		device_printf(dev, "Disabled by device hint\n");
 		return (ENXIO);
 	}
 
 	adapter = device_get_softc(dev);
 	adapter->dev = adapter->osdep.dev = dev;
 	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
 
 	/* SYSCTL stuff */
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
 	    igb_sysctl_nvm_info, "I", "NVM Information");
 
 	igb_set_sysctl_value(adapter, "enable_aim",
 	    "Interrupt Moderation", &adapter->enable_aim,
 	    igb_enable_aim);
 
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
 	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
 
 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
 
 	/* Determine hardware and mac info */
 	igb_identify_hardware(adapter);
 
 	/* Setup PCI resources */
 	if (igb_allocate_pci_resources(adapter)) {
 		device_printf(dev, "Allocation of PCI resources failed\n");
 		error = ENXIO;
 		goto err_pci;
 	}
 
 	/* Do Shared Code initialization */
 	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
 		device_printf(dev, "Setup of Shared code failed\n");
 		error = ENXIO;
 		goto err_pci;
 	}
 
 	e1000_get_bus_info(&adapter->hw);
 
 	/* Sysctl for limiting the amount of work done in the taskqueue */
 	igb_set_sysctl_value(adapter, "rx_processing_limit",
 	    "max number of rx packets to process",
 	    &adapter->rx_process_limit, igb_rx_process_limit);
 
 	/*
 	 * Validate number of transmit and receive descriptors. It
 	 * must not exceed hardware maximum, and must be multiple
 	 * of E1000_DBA_ALIGN.
 	 */
 	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
 	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
 		    IGB_DEFAULT_TXD, igb_txd);
 		adapter->num_tx_desc = IGB_DEFAULT_TXD;
 	} else
 		adapter->num_tx_desc = igb_txd;
 	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
 	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
 		    IGB_DEFAULT_RXD, igb_rxd);
 		adapter->num_rx_desc = IGB_DEFAULT_RXD;
 	} else
 		adapter->num_rx_desc = igb_rxd;
 
 	adapter->hw.mac.autoneg = DO_AUTO_NEG;
 	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
 	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
 
 	/* Copper options */
 	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
 		adapter->hw.phy.mdix = AUTO_ALL_MODES;
 		adapter->hw.phy.disable_polarity_correction = FALSE;
 		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
 	}
 
 	/*
 	 * Set the frame limits assuming
 	 * standard ethernet sized frames.
 	 */
 	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
 
 	/*
 	** Allocate and Setup Queues
 	*/
 	if (igb_allocate_queues(adapter)) {
 		error = ENOMEM;
 		goto err_pci;
 	}
 
 	/* Allocate the appropriate stats memory */
 	if (adapter->vf_ifp) {
 		adapter->stats =
 		    (struct e1000_vf_stats *)malloc(sizeof \
 		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
 		igb_vf_init_stats(adapter);
 	} else
 		adapter->stats =
 		    (struct e1000_hw_stats *)malloc(sizeof \
 		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (adapter->stats == NULL) {
 		device_printf(dev, "Can not allocate stats memory\n");
 		error = ENOMEM;
 		goto err_late;
 	}
 
 	/* Allocate multicast array memory. */
 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
 	if (adapter->mta == NULL) {
 		device_printf(dev, "Can not allocate multicast setup array\n");
 		error = ENOMEM;
 		goto err_late;
 	}
 
 	/* Some adapter-specific advanced features */
 	if (adapter->hw.mac.type >= e1000_i350) {
 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
 		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
 		    adapter, 0, igb_sysctl_eee, "I",
 		    "Disable Energy Efficient Ethernet");
 		if (adapter->hw.phy.media_type == e1000_media_type_copper) {
 			if (adapter->hw.mac.type == e1000_i354)
 				e1000_set_eee_i354(&adapter->hw);
 			else
 				e1000_set_eee_i350(&adapter->hw);
 		}
 	}
 
 	/*
 	** Start from a known state, this is
 	** important in reading the nvm and
 	** mac from that.
 	*/
 	e1000_reset_hw(&adapter->hw);
 
 	/* Make sure we have a good EEPROM before we read from it */
 	if (((adapter->hw.mac.type != e1000_i210) &&
 	    (adapter->hw.mac.type != e1000_i211)) &&
 	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
 		/*
 		** Some PCI-E parts fail the first check due to
 		** the link being in sleep state, call it again,
 		** if it fails a second time its a real issue.
 		*/
 		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
 			device_printf(dev,
 			    "The EEPROM Checksum Is Not Valid\n");
 			error = EIO;
 			goto err_late;
 		}
 	}
 
 	/*
 	** Copy the permanent MAC address out of the EEPROM
 	*/
 	if (e1000_read_mac_addr(&adapter->hw) < 0) {
 		device_printf(dev, "EEPROM read error while reading MAC"
 		    " address\n");
 		error = EIO;
 		goto err_late;
 	}
 	/* Check its sanity */
 	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
 		device_printf(dev, "Invalid MAC address\n");
 		error = EIO;
 		goto err_late;
 	}
 
 	/* Setup OS specific network interface */
 	if (igb_setup_interface(dev, adapter) != 0)
 		goto err_late;
 
 	/* Now get a good starting state */
 	igb_reset(adapter);
 
 	/* Initialize statistics */
 	igb_update_stats_counters(adapter);
 
 	adapter->hw.mac.get_link_status = 1;
 	igb_update_link_status(adapter);
 
 	/* Indicate SOL/IDER usage */
 	if (e1000_check_reset_block(&adapter->hw))
 		device_printf(dev,
 		    "PHY reset is blocked due to SOL/IDER session.\n");
 
 	/* Determine if we have to control management hardware */
 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
 
 	/*
 	 * Setup Wake-on-Lan
 	 */
 	/* APME bit in EEPROM is mapped to WUC.APME */
 	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
 	if (eeprom_data)
 		adapter->wol = E1000_WUFC_MAG;
 
 	/* Register for VLAN events */
 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
 
 	igb_add_hw_stats(adapter);
 
 	/* Tell the stack that the interface is not active */
 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
 
 	adapter->led_dev = led_create(igb_led_func, adapter,
 	    device_get_nameunit(dev));
 
 	/* 
 	** Configure Interrupts
 	*/
 	if ((adapter->msix > 1) && (igb_enable_msix))
 		error = igb_allocate_msix(adapter);
 	else /* MSI or Legacy */
 		error = igb_allocate_legacy(adapter);
 	if (error)
 		goto err_late;
 
 #ifdef DEV_NETMAP
 	igb_netmap_attach(adapter);
 #endif /* DEV_NETMAP */
 	INIT_DEBUGOUT("igb_attach: end");
 
 	return (0);
 
 err_late:
 	igb_detach(dev);
 	igb_free_transmit_structures(adapter);
 	igb_free_receive_structures(adapter);
 	igb_release_hw_control(adapter);
 err_pci:
 	igb_free_pci_resources(adapter);
 	if (adapter->ifp != NULL)
 		if_free(adapter->ifp);
 	free(adapter->mta, M_DEVBUF);
 	IGB_CORE_LOCK_DESTROY(adapter);
 
 	return (error);
 }
 
 /*********************************************************************
  *  Device removal routine
  *
  *  The detach entry point is called when the driver is being removed.
  *  This routine stops the adapter and deallocates all the resources
  *  that were allocated for driver operation.
  *
  *  return 0 on success, positive on failure
  *********************************************************************/
 
 static int
 igb_detach(device_t dev)
 {
 	struct adapter	*adapter = device_get_softc(dev);
 	struct ifnet	*ifp = adapter->ifp;
 
 	INIT_DEBUGOUT("igb_detach: begin");
 
 	/* Make sure VLANS are not using driver */
 	if (adapter->ifp->if_vlantrunk != NULL) {
 		device_printf(dev,"Vlan in use, detach first\n");
 		return (EBUSY);
 	}
 
 	ether_ifdetach(adapter->ifp);
 
 	if (adapter->led_dev != NULL)
 		led_destroy(adapter->led_dev);
 
 #ifdef DEVICE_POLLING
 	if (ifp->if_capenable & IFCAP_POLLING)
 		ether_poll_deregister(ifp);
 #endif
 
 	IGB_CORE_LOCK(adapter);
 	adapter->in_detach = 1;
 	igb_stop(adapter);
 	IGB_CORE_UNLOCK(adapter);
 
 	e1000_phy_hw_reset(&adapter->hw);
 
 	/* Give control back to firmware */
 	igb_release_manageability(adapter);
 	igb_release_hw_control(adapter);
 
 	if (adapter->wol) {
 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
 		igb_enable_wakeup(dev);
 	}
 
 	/* Unregister VLAN events */
 	if (adapter->vlan_attach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
 	if (adapter->vlan_detach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
 
 	callout_drain(&adapter->timer);
 
 #ifdef DEV_NETMAP
 	netmap_detach(adapter->ifp);
 #endif /* DEV_NETMAP */
 	igb_free_pci_resources(adapter);
 	bus_generic_detach(dev);
 	if_free(ifp);
 
 	igb_free_transmit_structures(adapter);
 	igb_free_receive_structures(adapter);
 	if (adapter->mta != NULL)
 		free(adapter->mta, M_DEVBUF);
 
 	IGB_CORE_LOCK_DESTROY(adapter);
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Shutdown entry point
  *
  **********************************************************************/
 
 static int
 igb_shutdown(device_t dev)
 {
 	return igb_suspend(dev);
 }
 
 /*
  * Suspend/resume device methods.
  */
 static int
 igb_suspend(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
 
 	IGB_CORE_LOCK(adapter);
 
 	igb_stop(adapter);
 
         igb_release_manageability(adapter);
 	igb_release_hw_control(adapter);
 
         if (adapter->wol) {
                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
                 igb_enable_wakeup(dev);
         }
 
 	IGB_CORE_UNLOCK(adapter);
 
 	return bus_generic_suspend(dev);
 }
 
 static int
 igb_resume(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct ifnet *ifp = adapter->ifp;
 
 	IGB_CORE_LOCK(adapter);
 	igb_init_locked(adapter);
 	igb_init_manageability(adapter);
 
 	if ((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
 			IGB_TX_LOCK(txr);
 #ifndef IGB_LEGACY_TX
 			/* Process the stack queue only if not depleted */
 			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
 			    !drbr_empty(ifp, txr->br))
 				igb_mq_start_locked(ifp, txr);
 #else
 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 				igb_start_locked(txr, ifp);
 #endif
 			IGB_TX_UNLOCK(txr);
 		}
 	}
 	IGB_CORE_UNLOCK(adapter);
 
 	return bus_generic_resume(dev);
 }
 
 
 #ifdef IGB_LEGACY_TX
 
 /*********************************************************************
  *  Transmit entry point
  *
  *  igb_start is called by the stack to initiate a transmit.
  *  The driver will remain in this routine as long as there are
  *  packets to transmit and transmit resources are available.
  *  In case resources are not available stack is notified and
  *  the packet is requeued.
  **********************************************************************/
 
 static void
 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	struct mbuf	*m_head;
 
 	IGB_TX_LOCK_ASSERT(txr);
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return;
 	if (!adapter->link_active)
 		return;
 
 	/* Call cleanup if number of TX descriptors low */
 	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
 		igb_txeof(txr);
 
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		if (txr->tx_avail <= IGB_MAX_SCATTER) {
 			txr->queue_status |= IGB_QUEUE_DEPLETED;
 			break;
 		}
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 		/*
 		 *  Encapsulation can modify our pointer, and or make it
 		 *  NULL on failure.  In that event, we can't requeue.
 		 */
 		if (igb_xmit(txr, &m_head)) {
 			if (m_head != NULL)
 				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			if (txr->tx_avail <= IGB_MAX_SCATTER)
 				txr->queue_status |= IGB_QUEUE_DEPLETED;
 			break;
 		}
 
 		/* Send a copy of the frame to the BPF listener */
 		ETHER_BPF_MTAP(ifp, m_head);
 
 		/* Set watchdog on */
 		txr->watchdog_time = ticks;
 		txr->queue_status |= IGB_QUEUE_WORKING;
 	}
 }
  
 /*
  * Legacy TX driver routine, called from the
  * stack, always uses tx[0], and spins for it.
  * Should not be used with multiqueue tx
  */
 static void
 igb_start(struct ifnet *ifp)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	struct tx_ring	*txr = adapter->tx_rings;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		IGB_TX_LOCK(txr);
 		igb_start_locked(txr, ifp);
 		IGB_TX_UNLOCK(txr);
 	}
 	return;
 }
 
 #else /* ~IGB_LEGACY_TX */
 
 /*
 ** Multiqueue Transmit Entry:
 **  quick turnaround to the stack
 **
 */
 static int
 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
 {
 	struct adapter		*adapter = ifp->if_softc;
 	struct igb_queue	*que;
 	struct tx_ring		*txr;
 	int 			i, err = 0;
 #ifdef	RSS
 	uint32_t		bucket_id;
 #endif
 
 	/* Which queue to use */
 	/*
 	 * When doing RSS, map it to the same outbound queue
 	 * as the incoming flow would be mapped to.
 	 *
 	 * If everything is setup correctly, it should be the
 	 * same bucket that the current CPU we're on is.
 	 */
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
 #ifdef	RSS
 		if (rss_hash2bucket(m->m_pkthdr.flowid,
 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
 			/* XXX TODO: spit out something if bucket_id > num_queues? */
 			i = bucket_id % adapter->num_queues;
 		} else {
 #endif
 			i = m->m_pkthdr.flowid % adapter->num_queues;
 #ifdef	RSS
 		}
 #endif
 	} else {
 		i = curcpu % adapter->num_queues;
 	}
 	txr = &adapter->tx_rings[i];
 	que = &adapter->queues[i];
 
 	err = drbr_enqueue(ifp, txr->br, m);
 	if (err)
 		return (err);
 	if (IGB_TX_TRYLOCK(txr)) {
 		igb_mq_start_locked(ifp, txr);
 		IGB_TX_UNLOCK(txr);
 	} else
 		taskqueue_enqueue(que->tq, &txr->txq_task);
 
 	return (0);
 }
 
 static int
 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
 {
 	struct adapter  *adapter = txr->adapter;
         struct mbuf     *next;
         int             err = 0, enq = 0;
 
 	IGB_TX_LOCK_ASSERT(txr);
 
 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
 	    adapter->link_active == 0)
 		return (ENETDOWN);
 
 
 	/* Process the queue */
 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
 		if ((err = igb_xmit(txr, &next)) != 0) {
 			if (next == NULL) {
 				/* It was freed, move forward */
 				drbr_advance(ifp, txr->br);
 			} else {
 				/* 
 				 * Still have one left, it may not be
 				 * the same since the transmit function
 				 * may have changed it.
 				 */
 				drbr_putback(ifp, txr->br, next);
 			}
 			break;
 		}
 		drbr_advance(ifp, txr->br);
 		enq++;
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
 		if (next->m_flags & M_MCAST)
 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 		ETHER_BPF_MTAP(ifp, next);
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			break;
 	}
 	if (enq > 0) {
 		/* Set the watchdog */
 		txr->queue_status |= IGB_QUEUE_WORKING;
 		txr->watchdog_time = ticks;
 	}
 	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
 		igb_txeof(txr);
 	if (txr->tx_avail <= IGB_MAX_SCATTER)
 		txr->queue_status |= IGB_QUEUE_DEPLETED;
 	return (err);
 }
 
 /*
  * Called from a taskqueue to drain queued transmit packets.
  */
 static void
 igb_deferred_mq_start(void *arg, int pending)
 {
 	struct tx_ring *txr = arg;
 	struct adapter *adapter = txr->adapter;
 	struct ifnet *ifp = adapter->ifp;
 
 	IGB_TX_LOCK(txr);
 	if (!drbr_empty(ifp, txr->br))
 		igb_mq_start_locked(ifp, txr);
 	IGB_TX_UNLOCK(txr);
 }
 
 /*
 ** Flush all ring buffers
 */
 static void
 igb_qflush(struct ifnet *ifp)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct mbuf	*m;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		IGB_TX_LOCK(txr);
 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
 			m_freem(m);
 		IGB_TX_UNLOCK(txr);
 	}
 	if_qflush(ifp);
 }
 #endif /* ~IGB_LEGACY_TX */
 
 /*********************************************************************
  *  Ioctl entry point
  *
  *  igb_ioctl is called when the user wants to configure the
  *  interface.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 
 static int
 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	struct ifreq	*ifr = (struct ifreq *)data;
 #if defined(INET) || defined(INET6)
 	struct ifaddr	*ifa = (struct ifaddr *)data;
 #endif
 	bool		avoid_reset = FALSE;
 	int		error = 0;
 
 	if (adapter->in_detach)
 		return (error);
 
 	switch (command) {
 	case SIOCSIFADDR:
 #ifdef INET
 		if (ifa->ifa_addr->sa_family == AF_INET)
 			avoid_reset = TRUE;
 #endif
 #ifdef INET6
 		if (ifa->ifa_addr->sa_family == AF_INET6)
 			avoid_reset = TRUE;
 #endif
 		/*
 		** Calling init results in link renegotiation,
 		** so we avoid doing it when possible.
 		*/
 		if (avoid_reset) {
 			ifp->if_flags |= IFF_UP;
 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
 				igb_init(adapter);
 #ifdef INET
 			if (!(ifp->if_flags & IFF_NOARP))
 				arp_ifinit(ifp, ifa);
 #endif
 		} else
 			error = ether_ioctl(ifp, command, data);
 		break;
 	case SIOCSIFMTU:
 	    {
 		int max_frame_size;
 
 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
 
 		IGB_CORE_LOCK(adapter);
 		max_frame_size = 9234;
 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
 		    ETHER_CRC_LEN) {
 			IGB_CORE_UNLOCK(adapter);
 			error = EINVAL;
 			break;
 		}
 
 		ifp->if_mtu = ifr->ifr_mtu;
 		adapter->max_frame_size =
 		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
 		igb_init_locked(adapter);
 		IGB_CORE_UNLOCK(adapter);
 		break;
 	    }
 	case SIOCSIFFLAGS:
 		IOCTL_DEBUGOUT("ioctl rcv'd:\
 		    SIOCSIFFLAGS (Set Interface Flags)");
 		IGB_CORE_LOCK(adapter);
 		if (ifp->if_flags & IFF_UP) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 				if ((ifp->if_flags ^ adapter->if_flags) &
 				    (IFF_PROMISC | IFF_ALLMULTI)) {
 					igb_disable_promisc(adapter);
 					igb_set_promisc(adapter);
 				}
 			} else
 				igb_init_locked(adapter);
 		} else
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				igb_stop(adapter);
 		adapter->if_flags = ifp->if_flags;
 		IGB_CORE_UNLOCK(adapter);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			IGB_CORE_LOCK(adapter);
 			igb_disable_intr(adapter);
 			igb_set_multi(adapter);
 #ifdef DEVICE_POLLING
 			if (!(ifp->if_capenable & IFCAP_POLLING))
 #endif
 				igb_enable_intr(adapter);
 			IGB_CORE_UNLOCK(adapter);
 		}
 		break;
 	case SIOCSIFMEDIA:
 		/* Check SOL/IDER usage */
 		IGB_CORE_LOCK(adapter);
 		if (e1000_check_reset_block(&adapter->hw)) {
 			IGB_CORE_UNLOCK(adapter);
 			device_printf(adapter->dev, "Media change is"
 			    " blocked due to SOL/IDER session.\n");
 			break;
 		}
 		IGB_CORE_UNLOCK(adapter);
 	case SIOCGIFMEDIA:
 		IOCTL_DEBUGOUT("ioctl rcv'd: \
 		    SIOCxIFMEDIA (Get/Set Interface Media)");
 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
 		break;
 	case SIOCSIFCAP:
 	    {
 		int mask, reinit;
 
 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
 		reinit = 0;
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 #ifdef DEVICE_POLLING
 		if (mask & IFCAP_POLLING) {
 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
 				error = ether_poll_register(igb_poll, ifp);
 				if (error)
 					return (error);
 				IGB_CORE_LOCK(adapter);
 				igb_disable_intr(adapter);
 				ifp->if_capenable |= IFCAP_POLLING;
 				IGB_CORE_UNLOCK(adapter);
 			} else {
 				error = ether_poll_deregister(ifp);
 				/* Enable interrupt even in error case */
 				IGB_CORE_LOCK(adapter);
 				igb_enable_intr(adapter);
 				ifp->if_capenable &= ~IFCAP_POLLING;
 				IGB_CORE_UNLOCK(adapter);
 			}
 		}
 #endif
 		if (mask & IFCAP_HWCSUM) {
 			ifp->if_capenable ^= IFCAP_HWCSUM;
 			reinit = 1;
 		}
 		if (mask & IFCAP_TSO4) {
 			ifp->if_capenable ^= IFCAP_TSO4;
 			reinit = 1;
 		}
 		if (mask & IFCAP_TSO6) {
 			ifp->if_capenable ^= IFCAP_TSO6;
 			reinit = 1;
 		}
 		if (mask & IFCAP_VLAN_HWTAGGING) {
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 			reinit = 1;
 		}
 		if (mask & IFCAP_VLAN_HWFILTER) {
 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
 			reinit = 1;
 		}
 		if (mask & IFCAP_VLAN_HWTSO) {
 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
 			reinit = 1;
 		}
 		if (mask & IFCAP_LRO) {
 			ifp->if_capenable ^= IFCAP_LRO;
 			reinit = 1;
 		}
 		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
 			igb_init(adapter);
 		VLAN_CAPABILITIES(ifp);
 		break;
 	    }
 
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 
 /*********************************************************************
  *  Init entry point
  *
  *  This routine is used in two ways. It is used by the stack as
  *  init entry point in network interface structure. It is also used
  *  by the driver as a hw/sw initialization routine to get to a
  *  consistent state.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 
 static void
 igb_init_locked(struct adapter *adapter)
 {
 	struct ifnet	*ifp = adapter->ifp;
 	device_t	dev = adapter->dev;
 
 	INIT_DEBUGOUT("igb_init: begin");
 
 	IGB_CORE_LOCK_ASSERT(adapter);
 
 	igb_disable_intr(adapter);
 	callout_stop(&adapter->timer);
 
 	/* Get the latest mac address, User can use a LAA */
         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
               ETHER_ADDR_LEN);
 
 	/* Put the address into the Receive Address Array */
 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
 
 	igb_reset(adapter);
 	igb_update_link_status(adapter);
 
 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
 
 	/* Set hardware offload abilities */
 	ifp->if_hwassist = 0;
 	if (ifp->if_capenable & IFCAP_TXCSUM) {
 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
 #if __FreeBSD_version >= 800000
 		if (adapter->hw.mac.type == e1000_82576)
 			ifp->if_hwassist |= CSUM_SCTP;
 #endif
 	}
 
 	if (ifp->if_capenable & IFCAP_TSO)
 		ifp->if_hwassist |= CSUM_TSO;
 
 	/* Configure for OS presence */
 	igb_init_manageability(adapter);
 
 	/* Prepare transmit descriptors and buffers */
 	igb_setup_transmit_structures(adapter);
 	igb_initialize_transmit_units(adapter);
 
 	/* Setup Multicast table */
 	igb_set_multi(adapter);
 
 	/*
 	** Figure out the desired mbuf pool
 	** for doing jumbo/packetsplit
 	*/
 	if (adapter->max_frame_size <= 2048)
 		adapter->rx_mbuf_sz = MCLBYTES;
 	else if (adapter->max_frame_size <= 4096)
 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
 	else
 		adapter->rx_mbuf_sz = MJUM9BYTES;
 
 	/* Prepare receive descriptors and buffers */
 	if (igb_setup_receive_structures(adapter)) {
 		device_printf(dev, "Could not setup receive structures\n");
 		return;
 	}
 	igb_initialize_receive_units(adapter);
 
         /* Enable VLAN support */
 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
 		igb_setup_vlan_hw_support(adapter);
                                 
 	/* Don't lose promiscuous settings */
 	igb_set_promisc(adapter);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
 
 	if (adapter->msix > 1) /* Set up queue routing */
 		igb_configure_queues(adapter);
 
 	/* this clears any pending interrupts */
 	E1000_READ_REG(&adapter->hw, E1000_ICR);
 #ifdef DEVICE_POLLING
 	/*
 	 * Only enable interrupts if we are not polling, make sure
 	 * they are off otherwise.
 	 */
 	if (ifp->if_capenable & IFCAP_POLLING)
 		igb_disable_intr(adapter);
 	else
 #endif /* DEVICE_POLLING */
 	{
 		igb_enable_intr(adapter);
 		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
 	}
 
 	/* Set Energy Efficient Ethernet */
 	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
 		if (adapter->hw.mac.type == e1000_i354)
 			e1000_set_eee_i354(&adapter->hw);
 		else
 			e1000_set_eee_i350(&adapter->hw);
 	}
 }
 
 static void
 igb_init(void *arg)
 {
 	struct adapter *adapter = arg;
 
 	IGB_CORE_LOCK(adapter);
 	igb_init_locked(adapter);
 	IGB_CORE_UNLOCK(adapter);
 }
 
 
 static void
 igb_handle_que(void *context, int pending)
 {
 	struct igb_queue *que = context;
 	struct adapter *adapter = que->adapter;
 	struct tx_ring *txr = que->txr;
 	struct ifnet	*ifp = adapter->ifp;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		bool	more;
 
 		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
 
 		IGB_TX_LOCK(txr);
 		igb_txeof(txr);
 #ifndef IGB_LEGACY_TX
 		/* Process the stack queue only if not depleted */
 		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
 		    !drbr_empty(ifp, txr->br))
 			igb_mq_start_locked(ifp, txr);
 #else
 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 			igb_start_locked(txr, ifp);
 #endif
 		IGB_TX_UNLOCK(txr);
 		/* Do we need another? */
 		if (more) {
 			taskqueue_enqueue(que->tq, &que->que_task);
 			return;
 		}
 	}
 
 #ifdef DEVICE_POLLING
 	if (ifp->if_capenable & IFCAP_POLLING)
 		return;
 #endif
 	/* Reenable this interrupt */
 	if (que->eims)
 		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
 	else
 		igb_enable_intr(adapter);
 }
 
 /* Deal with link in a sleepable context */
 static void
 igb_handle_link(void *context, int pending)
 {
 	struct adapter *adapter = context;
 
 	IGB_CORE_LOCK(adapter);
 	igb_handle_link_locked(adapter);
 	IGB_CORE_UNLOCK(adapter);
 }
 
 static void
 igb_handle_link_locked(struct adapter *adapter)
 {
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct ifnet *ifp = adapter->ifp;
 
 	IGB_CORE_LOCK_ASSERT(adapter);
 	adapter->hw.mac.get_link_status = 1;
 	igb_update_link_status(adapter);
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
 			IGB_TX_LOCK(txr);
 #ifndef IGB_LEGACY_TX
 			/* Process the stack queue only if not depleted */
 			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
 			    !drbr_empty(ifp, txr->br))
 				igb_mq_start_locked(ifp, txr);
 #else
 			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 				igb_start_locked(txr, ifp);
 #endif
 			IGB_TX_UNLOCK(txr);
 		}
 	}
 }
 
 /*********************************************************************
  *
  *  MSI/Legacy Deferred
  *  Interrupt Service routine  
  *
  *********************************************************************/
 static int
 igb_irq_fast(void *arg)
 {
 	struct adapter		*adapter = arg;
 	struct igb_queue	*que = adapter->queues;
 	u32			reg_icr;
 
 
 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
 
 	/* Hot eject?  */
 	if (reg_icr == 0xffffffff)
 		return FILTER_STRAY;
 
 	/* Definitely not our interrupt.  */
 	if (reg_icr == 0x0)
 		return FILTER_STRAY;
 
 	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
 		return FILTER_STRAY;
 
 	/*
 	 * Mask interrupts until the taskqueue is finished running.  This is
 	 * cheap, just assume that it is needed.  This also works around the
 	 * MSI message reordering errata on certain systems.
 	 */
 	igb_disable_intr(adapter);
 	taskqueue_enqueue(que->tq, &que->que_task);
 
 	/* Link status change */
 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
 		taskqueue_enqueue(que->tq, &adapter->link_task);
 
 	if (reg_icr & E1000_ICR_RXO)
 		adapter->rx_overruns++;
 	return FILTER_HANDLED;
 }
 
 #ifdef DEVICE_POLLING
 #if __FreeBSD_version >= 800000
 #define POLL_RETURN_COUNT(a) (a)
 static int
 #else
 #define POLL_RETURN_COUNT(a)
 static void
 #endif
 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
 {
 	struct adapter		*adapter = ifp->if_softc;
 	struct igb_queue	*que;
 	struct tx_ring		*txr;
 	u32			reg_icr, rx_done = 0;
 	u32			loop = IGB_MAX_LOOP;
 	bool			more;
 
 	IGB_CORE_LOCK(adapter);
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		IGB_CORE_UNLOCK(adapter);
 		return POLL_RETURN_COUNT(rx_done);
 	}
 
 	if (cmd == POLL_AND_CHECK_STATUS) {
 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
 		/* Link status change */
 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
 			igb_handle_link_locked(adapter);
 
 		if (reg_icr & E1000_ICR_RXO)
 			adapter->rx_overruns++;
 	}
 	IGB_CORE_UNLOCK(adapter);
 
 	for (int i = 0; i < adapter->num_queues; i++) {
 		que = &adapter->queues[i];
 		txr = que->txr;
 
 		igb_rxeof(que, count, &rx_done);
 
 		IGB_TX_LOCK(txr);
 		do {
 			more = igb_txeof(txr);
 		} while (loop-- && more);
 #ifndef IGB_LEGACY_TX
 		if (!drbr_empty(ifp, txr->br))
 			igb_mq_start_locked(ifp, txr);
 #else
 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 			igb_start_locked(txr, ifp);
 #endif
 		IGB_TX_UNLOCK(txr);
 	}
 
 	return POLL_RETURN_COUNT(rx_done);
 }
 #endif /* DEVICE_POLLING */
 
 /*********************************************************************
  *
  *  MSIX Que Interrupt Service routine
  *
  **********************************************************************/
 static void
 igb_msix_que(void *arg)
 {
 	struct igb_queue *que = arg;
 	struct adapter *adapter = que->adapter;
 	struct ifnet   *ifp = adapter->ifp;
 	struct tx_ring *txr = que->txr;
 	struct rx_ring *rxr = que->rxr;
 	u32		newitr = 0;
 	bool		more_rx;
 
 	/* Ignore spurious interrupts */
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 
 	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
 	++que->irqs;
 
 	IGB_TX_LOCK(txr);
 	igb_txeof(txr);
 #ifndef IGB_LEGACY_TX
 	/* Process the stack queue only if not depleted */
 	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
 	    !drbr_empty(ifp, txr->br))
 		igb_mq_start_locked(ifp, txr);
 #else
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		igb_start_locked(txr, ifp);
 #endif
 	IGB_TX_UNLOCK(txr);
 
 	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
 
 	if (adapter->enable_aim == FALSE)
 		goto no_calc;
 	/*
 	** Do Adaptive Interrupt Moderation:
         **  - Write out last calculated setting
 	**  - Calculate based on average size over
 	**    the last interval.
 	*/
         if (que->eitr_setting)
                 E1000_WRITE_REG(&adapter->hw,
                     E1000_EITR(que->msix), que->eitr_setting);
  
         que->eitr_setting = 0;
 
         /* Idle, do nothing */
         if ((txr->bytes == 0) && (rxr->bytes == 0))
                 goto no_calc;
                                 
         /* Used half Default if sub-gig */
         if (adapter->link_speed != 1000)
                 newitr = IGB_DEFAULT_ITR / 2;
         else {
 		if ((txr->bytes) && (txr->packets))
                 	newitr = txr->bytes/txr->packets;
 		if ((rxr->bytes) && (rxr->packets))
 			newitr = max(newitr,
 			    (rxr->bytes / rxr->packets));
                 newitr += 24; /* account for hardware frame, crc */
 		/* set an upper boundary */
 		newitr = min(newitr, 3000);
 		/* Be nice to the mid range */
                 if ((newitr > 300) && (newitr < 1200))
                         newitr = (newitr / 3);
                 else
                         newitr = (newitr / 2);
         }
         newitr &= 0x7FFC;  /* Mask invalid bits */
         if (adapter->hw.mac.type == e1000_82575)
                 newitr |= newitr << 16;
         else
                 newitr |= E1000_EITR_CNT_IGNR;
                  
         /* save for next interrupt */
         que->eitr_setting = newitr;
 
         /* Reset state */
         txr->bytes = 0;
         txr->packets = 0;
         rxr->bytes = 0;
         rxr->packets = 0;
 
 no_calc:
 	/* Schedule a clean task if needed*/
 	if (more_rx)
 		taskqueue_enqueue(que->tq, &que->que_task);
 	else
 		/* Reenable this interrupt */
 		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
 	return;
 }
 
 
 /*********************************************************************
  *
  *  MSIX Link Interrupt Service routine
  *
  **********************************************************************/
 
 static void
 igb_msix_link(void *arg)
 {
 	struct adapter	*adapter = arg;
 	u32       	icr;
 
 	++adapter->link_irq;
 	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
 	if (!(icr & E1000_ICR_LSC))
 		goto spurious;
 	igb_handle_link(adapter, 0);
 
 spurious:
 	/* Rearm */
 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
 	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
 	return;
 }
 
 
 /*********************************************************************
  *
  *  Media Ioctl callback
  *
  *  This routine is called whenever the user queries the status of
  *  the interface using ifconfig.
  *
  **********************************************************************/
 static void
 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct adapter *adapter = ifp->if_softc;
 
 	INIT_DEBUGOUT("igb_media_status: begin");
 
 	IGB_CORE_LOCK(adapter);
 	igb_update_link_status(adapter);
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if (!adapter->link_active) {
 		IGB_CORE_UNLOCK(adapter);
 		return;
 	}
 
 	ifmr->ifm_status |= IFM_ACTIVE;
 
 	switch (adapter->link_speed) {
 	case 10:
 		ifmr->ifm_active |= IFM_10_T;
 		break;
 	case 100:
 		/*
 		** Support for 100Mb SFP - these are Fiber 
 		** but the media type appears as serdes
 		*/
 		if (adapter->hw.phy.media_type ==
 		    e1000_media_type_internal_serdes)
 			ifmr->ifm_active |= IFM_100_FX;
 		else
 			ifmr->ifm_active |= IFM_100_TX;
 		break;
 	case 1000:
 		ifmr->ifm_active |= IFM_1000_T;
 		break;
 	case 2500:
 		ifmr->ifm_active |= IFM_2500_SX;
 		break;
 	}
 
 	if (adapter->link_duplex == FULL_DUPLEX)
 		ifmr->ifm_active |= IFM_FDX;
 	else
 		ifmr->ifm_active |= IFM_HDX;
 
 	IGB_CORE_UNLOCK(adapter);
 }
 
 /*********************************************************************
  *
  *  Media Ioctl callback
  *
  *  This routine is called when the user changes speed/duplex using
  *  media/mediopt option with ifconfig.
  *
  **********************************************************************/
 static int
 igb_media_change(struct ifnet *ifp)
 {
 	struct adapter *adapter = ifp->if_softc;
 	struct ifmedia  *ifm = &adapter->media;
 
 	INIT_DEBUGOUT("igb_media_change: begin");
 
 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
 		return (EINVAL);
 
 	IGB_CORE_LOCK(adapter);
 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
 	case IFM_AUTO:
 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
 		break;
 	case IFM_1000_LX:
 	case IFM_1000_SX:
 	case IFM_1000_T:
 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
 		break;
 	case IFM_100_TX:
 		adapter->hw.mac.autoneg = FALSE;
 		adapter->hw.phy.autoneg_advertised = 0;
 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
 		else
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
 		break;
 	case IFM_10_T:
 		adapter->hw.mac.autoneg = FALSE;
 		adapter->hw.phy.autoneg_advertised = 0;
 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
 		else
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
 		break;
 	default:
 		device_printf(adapter->dev, "Unsupported media type\n");
 	}
 
 	igb_init_locked(adapter);
 	IGB_CORE_UNLOCK(adapter);
 
 	return (0);
 }
 
 
 /*********************************************************************
  *
  *  This routine maps the mbufs to Advanced TX descriptors.
  *  
  **********************************************************************/
 static int
 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
 {
 	struct adapter  *adapter = txr->adapter;
 	u32		olinfo_status = 0, cmd_type_len;
 	int             i, j, error, nsegs;
 	int		first;
 	bool		remap = TRUE;
 	struct mbuf	*m_head;
 	bus_dma_segment_t segs[IGB_MAX_SCATTER];
 	bus_dmamap_t	map;
 	struct igb_tx_buf *txbuf;
 	union e1000_adv_tx_desc *txd = NULL;
 
 	m_head = *m_headp;
 
 	/* Basic descriptor defines */
         cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
 	    E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
 
 	if (m_head->m_flags & M_VLANTAG)
         	cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
 
         /*
          * Important to capture the first descriptor
          * used because it will contain the index of
          * the one we tell the hardware to report back
          */
         first = txr->next_avail_desc;
 	txbuf = &txr->tx_buffers[first];
 	map = txbuf->map;
 
 	/*
 	 * Map the packet for DMA.
 	 */
 retry:
 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
 
 	if (__predict_false(error)) {
 		struct mbuf *m;
 
 		switch (error) {
 		case EFBIG:
 			/* Try it again? - one try */
 			if (remap == TRUE) {
 				remap = FALSE;
 				m = m_defrag(*m_headp, M_NOWAIT);
 				if (m == NULL) {
 					adapter->mbuf_defrag_failed++;
 					m_freem(*m_headp);
 					*m_headp = NULL;
 					return (ENOBUFS);
 				}
 				*m_headp = m;
 				goto retry;
 			} else
 				return (error);
 		case ENOMEM:
 			txr->no_tx_dma_setup++;
 			return (error);
 		default:
 			txr->no_tx_dma_setup++;
 			m_freem(*m_headp);
 			*m_headp = NULL;
 			return (error);
 		}
 	}
 
 	/* Make certain there are enough descriptors */
 	if (nsegs > txr->tx_avail - 2) {
 		txr->no_desc_avail++;
 		bus_dmamap_unload(txr->txtag, map);
 		return (ENOBUFS);
 	}
 	m_head = *m_headp;
 
 	/*
 	** Set up the appropriate offload context
 	** this will consume the first descriptor
 	*/
 	error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
 	if (__predict_false(error)) {
 		m_freem(*m_headp);
 		*m_headp = NULL;
 		return (error);
 	}
 
 	/* 82575 needs the queue index added */
 	if (adapter->hw.mac.type == e1000_82575)
 		olinfo_status |= txr->me << 4;
 
 	i = txr->next_avail_desc;
 	for (j = 0; j < nsegs; j++) {
 		bus_size_t seglen;
 		bus_addr_t segaddr;
 
 		txbuf = &txr->tx_buffers[i];
 		txd = &txr->tx_base[i];
 		seglen = segs[j].ds_len;
 		segaddr = htole64(segs[j].ds_addr);
 
 		txd->read.buffer_addr = segaddr;
 		txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
 		    cmd_type_len | seglen);
 		txd->read.olinfo_status = htole32(olinfo_status);
 
 		if (++i == txr->num_desc)
 			i = 0;
 	}
 
 	txd->read.cmd_type_len |=
 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
 	txr->tx_avail -= nsegs;
 	txr->next_avail_desc = i;
 
 	txbuf->m_head = m_head;
 	/*
 	** Here we swap the map so the last descriptor,
 	** which gets the completion interrupt has the
 	** real map, and the first descriptor gets the
 	** unused map from this descriptor.
 	*/
 	txr->tx_buffers[first].map = txbuf->map;
 	txbuf->map = map;
 	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
 
         /* Set the EOP descriptor that will be marked done */
         txbuf = &txr->tx_buffers[first];
 	txbuf->eop = txd;
 
         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	/*
 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
 	 * hardware that this frame is available to transmit.
 	 */
 	++txr->total_packets;
 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
 
 	return (0);
 }
 static void
 igb_set_promisc(struct adapter *adapter)
 {
 	struct ifnet	*ifp = adapter->ifp;
 	struct e1000_hw *hw = &adapter->hw;
 	u32		reg;
 
 	if (adapter->vf_ifp) {
 		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
 		return;
 	}
 
 	reg = E1000_READ_REG(hw, E1000_RCTL);
 	if (ifp->if_flags & IFF_PROMISC) {
 		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
 		E1000_WRITE_REG(hw, E1000_RCTL, reg);
 	} else if (ifp->if_flags & IFF_ALLMULTI) {
 		reg |= E1000_RCTL_MPE;
 		reg &= ~E1000_RCTL_UPE;
 		E1000_WRITE_REG(hw, E1000_RCTL, reg);
 	}
 }
 
 static void
 igb_disable_promisc(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	struct ifnet	*ifp = adapter->ifp;
 	u32		reg;
 	int		mcnt = 0;
 
 	if (adapter->vf_ifp) {
 		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
 		return;
 	}
 	reg = E1000_READ_REG(hw, E1000_RCTL);
 	reg &=  (~E1000_RCTL_UPE);
 	if (ifp->if_flags & IFF_ALLMULTI)
 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
 	else {
 		struct  ifmultiaddr *ifma;
 #if __FreeBSD_version < 800000
 		IF_ADDR_LOCK(ifp);
 #else   
 		if_maddr_rlock(ifp);
 #endif
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_LINK)
 				continue;
 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
 				break;
 			mcnt++;
 		}
 #if __FreeBSD_version < 800000
 		IF_ADDR_UNLOCK(ifp);
 #else
 		if_maddr_runlock(ifp);
 #endif
 	}
 	/* Don't disable if in MAX groups */
 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
 		reg &=  (~E1000_RCTL_MPE);
 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
 }
 
 
 /*********************************************************************
  *  Multicast Update
  *
  *  This routine is called whenever multicast address list is updated.
  *
  **********************************************************************/
 
 static void
 igb_set_multi(struct adapter *adapter)
 {
 	struct ifnet	*ifp = adapter->ifp;
 	struct ifmultiaddr *ifma;
 	u32 reg_rctl = 0;
 	u8  *mta;
 
 	int mcnt = 0;
 
 	IOCTL_DEBUGOUT("igb_set_multi: begin");
 
 	mta = adapter->mta;
 	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
 	    MAX_NUM_MULTICAST_ADDRESSES);
 
 #if __FreeBSD_version < 800000
 	IF_ADDR_LOCK(ifp);
 #else
 	if_maddr_rlock(ifp);
 #endif
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 
 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
 			break;
 
 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
 		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
 		mcnt++;
 	}
 #if __FreeBSD_version < 800000
 	IF_ADDR_UNLOCK(ifp);
 #else
 	if_maddr_runlock(ifp);
 #endif
 
 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 		reg_rctl |= E1000_RCTL_MPE;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 	} else
 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
 }
 
 
 /*********************************************************************
  *  Timer routine:
  *  	This routine checks for link status,
  *	updates statistics, and does the watchdog.
  *
  **********************************************************************/
 
 static void
 igb_local_timer(void *arg)
 {
 	struct adapter		*adapter = arg;
 	device_t		dev = adapter->dev;
 	struct ifnet		*ifp = adapter->ifp;
 	struct tx_ring		*txr = adapter->tx_rings;
 	struct igb_queue	*que = adapter->queues;
 	int			hung = 0, busy = 0;
 
 
 	IGB_CORE_LOCK_ASSERT(adapter);
 
 	igb_update_link_status(adapter);
 	igb_update_stats_counters(adapter);
 
         /*
         ** Check the TX queues status
 	**	- central locked handling of OACTIVE
 	**	- watchdog only if all queues show hung
         */
 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
 		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
 		    (adapter->pause_frames == 0))
 			++hung;
 		if (txr->queue_status & IGB_QUEUE_DEPLETED)
 			++busy;
 		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
 			taskqueue_enqueue(que->tq, &que->que_task);
 	}
 	if (hung == adapter->num_queues)
 		goto timeout;
 	if (busy == adapter->num_queues)
 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
 	    (busy < adapter->num_queues))
 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	adapter->pause_frames = 0;
 	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
 #ifndef DEVICE_POLLING
 	/* Schedule all queue interrupts - deadlock protection */
 	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
 #endif
 	return;
 
 timeout:
 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
 	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
 	device_printf(dev,"TX(%d) desc avail = %d,"
             "Next TX to Clean = %d\n",
             txr->me, txr->tx_avail, txr->next_to_clean);
 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	adapter->watchdog_events++;
 	igb_init_locked(adapter);
 }
 
 static void
 igb_update_link_status(struct adapter *adapter)
 {
 	struct e1000_hw		*hw = &adapter->hw;
 	struct e1000_fc_info	*fc = &hw->fc;
 	struct ifnet		*ifp = adapter->ifp;
 	device_t		dev = adapter->dev;
 	struct tx_ring		*txr = adapter->tx_rings;
 	u32			link_check, thstat, ctrl;
 	char			*flowctl = NULL;
 
 	link_check = thstat = ctrl = 0;
 
 	/* Get the cached link value or read for real */
         switch (hw->phy.media_type) {
         case e1000_media_type_copper:
                 if (hw->mac.get_link_status) {
 			/* Do the work to read phy */
                         e1000_check_for_link(hw);
                         link_check = !hw->mac.get_link_status;
                 } else
                         link_check = TRUE;
                 break;
         case e1000_media_type_fiber:
                 e1000_check_for_link(hw);
                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
                                  E1000_STATUS_LU);
                 break;
         case e1000_media_type_internal_serdes:
                 e1000_check_for_link(hw);
                 link_check = adapter->hw.mac.serdes_has_link;
                 break;
 	/* VF device is type_unknown */
         case e1000_media_type_unknown:
                 e1000_check_for_link(hw);
 		link_check = !hw->mac.get_link_status;
 		/* Fall thru */
         default:
                 break;
         }
 
 	/* Check for thermal downshift or shutdown */
 	if (hw->mac.type == e1000_i350) {
 		thstat = E1000_READ_REG(hw, E1000_THSTAT);
 		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
 	}
 
 	/* Get the flow control for display */
 	switch (fc->current_mode) {
 	case e1000_fc_rx_pause:
 		flowctl = "RX";
 		break;	
 	case e1000_fc_tx_pause:
 		flowctl = "TX";
 		break;	
 	case e1000_fc_full:
 		flowctl = "Full";
 		break;	
 	case e1000_fc_none:
 	default:
 		flowctl = "None";
 		break;	
 	}
 
 	/* Now we check if a transition has happened */
 	if (link_check && (adapter->link_active == 0)) {
 		e1000_get_speed_and_duplex(&adapter->hw, 
 		    &adapter->link_speed, &adapter->link_duplex);
 		if (bootverbose)
 			device_printf(dev, "Link is up %d Mbps %s,"
 			    " Flow Control: %s\n",
 			    adapter->link_speed,
 			    ((adapter->link_duplex == FULL_DUPLEX) ?
 			    "Full Duplex" : "Half Duplex"), flowctl);
 		adapter->link_active = 1;
 		ifp->if_baudrate = adapter->link_speed * 1000000;
 		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
 		    (thstat & E1000_THSTAT_LINK_THROTTLE))
 			device_printf(dev, "Link: thermal downshift\n");
 		/* Delay Link Up for Phy update */
 		if (((hw->mac.type == e1000_i210) ||
 		    (hw->mac.type == e1000_i211)) &&
 		    (hw->phy.id == I210_I_PHY_ID))
 			msec_delay(I210_LINK_DELAY);
 		/* Reset if the media type changed. */
 		if (hw->dev_spec._82575.media_changed) {
 			hw->dev_spec._82575.media_changed = false;
 			adapter->flags |= IGB_MEDIA_RESET;
 			igb_reset(adapter);
 		}	
 		/* This can sleep */
 		if_link_state_change(ifp, LINK_STATE_UP);
 	} else if (!link_check && (adapter->link_active == 1)) {
 		ifp->if_baudrate = adapter->link_speed = 0;
 		adapter->link_duplex = 0;
 		if (bootverbose)
 			device_printf(dev, "Link is Down\n");
 		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
 		    (thstat & E1000_THSTAT_PWR_DOWN))
 			device_printf(dev, "Link: thermal shutdown\n");
 		adapter->link_active = 0;
 		/* This can sleep */
 		if_link_state_change(ifp, LINK_STATE_DOWN);
 		/* Reset queue state */
 		for (int i = 0; i < adapter->num_queues; i++, txr++)
 			txr->queue_status = IGB_QUEUE_IDLE;
 	}
 }
 
 /*********************************************************************
  *
  *  This routine disables all traffic on the adapter by issuing a
  *  global reset on the MAC and deallocates TX/RX buffers.
  *
  **********************************************************************/
 
 static void
 igb_stop(void *arg)
 {
 	struct adapter	*adapter = arg;
 	struct ifnet	*ifp = adapter->ifp;
 	struct tx_ring *txr = adapter->tx_rings;
 
 	IGB_CORE_LOCK_ASSERT(adapter);
 
 	INIT_DEBUGOUT("igb_stop: begin");
 
 	igb_disable_intr(adapter);
 
 	callout_stop(&adapter->timer);
 
 	/* Tell the stack that the interface is no longer active */
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 
 	/* Disarm watchdog timer. */
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		IGB_TX_LOCK(txr);
 		txr->queue_status = IGB_QUEUE_IDLE;
 		IGB_TX_UNLOCK(txr);
 	}
 
 	e1000_reset_hw(&adapter->hw);
 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
 
 	e1000_led_off(&adapter->hw);
 	e1000_cleanup_led(&adapter->hw);
 }
 
 
 /*********************************************************************
  *
  *  Determine hardware revision.
  *
  **********************************************************************/
 static void
 igb_identify_hardware(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 
 	/* Make sure our PCI config space has the necessary stuff set */
 	pci_enable_busmaster(dev);
 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
 
 	/* Save off the information about this board */
 	adapter->hw.vendor_id = pci_get_vendor(dev);
 	adapter->hw.device_id = pci_get_device(dev);
 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
 	adapter->hw.subsystem_vendor_id =
 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
 	adapter->hw.subsystem_device_id =
 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
 
 	/* Set MAC type early for PCI setup */
 	e1000_set_mac_type(&adapter->hw);
 
 	/* Are we a VF device? */
 	if ((adapter->hw.mac.type == e1000_vfadapt) ||
 	    (adapter->hw.mac.type == e1000_vfadapt_i350))
 		adapter->vf_ifp = 1;
 	else
 		adapter->vf_ifp = 0;
 }
 
 static int
 igb_allocate_pci_resources(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	int		rid;
 
 	rid = PCIR_BAR(0);
 	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &rid, RF_ACTIVE);
 	if (adapter->pci_mem == NULL) {
 		device_printf(dev, "Unable to allocate bus resource: memory\n");
 		return (ENXIO);
 	}
 	adapter->osdep.mem_bus_space_tag =
 	    rman_get_bustag(adapter->pci_mem);
 	adapter->osdep.mem_bus_space_handle =
 	    rman_get_bushandle(adapter->pci_mem);
 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
 
 	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
 
 	/* This will setup either MSI/X or MSI */
 	adapter->msix = igb_setup_msix(adapter);
 	adapter->hw.back = &adapter->osdep;
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Setup the Legacy or MSI Interrupt handler
  *
  **********************************************************************/
 static int
 igb_allocate_legacy(struct adapter *adapter)
 {
 	device_t		dev = adapter->dev;
 	struct igb_queue	*que = adapter->queues;
 #ifndef IGB_LEGACY_TX
 	struct tx_ring		*txr = adapter->tx_rings;
 #endif
 	int			error, rid = 0;
 
 	/* Turn off all interrupts */
 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
 
 	/* MSI RID is 1 */
 	if (adapter->msix == 1)
 		rid = 1;
 
 	/* We allocate a single interrupt resource */
 	adapter->res = bus_alloc_resource_any(dev,
 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
 	if (adapter->res == NULL) {
 		device_printf(dev, "Unable to allocate bus resource: "
 		    "interrupt\n");
 		return (ENXIO);
 	}
 
 #ifndef IGB_LEGACY_TX
 	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
 #endif
 
 	/*
 	 * Try allocating a fast interrupt and the associated deferred
 	 * processing contexts.
 	 */
 	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
 	/* Make tasklet for deferred link handling */
 	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
 	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
 	    taskqueue_thread_enqueue, &que->tq);
 	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
 	    device_get_nameunit(adapter->dev));
 	if ((error = bus_setup_intr(dev, adapter->res,
 	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
 	    adapter, &adapter->tag)) != 0) {
 		device_printf(dev, "Failed to register fast interrupt "
 			    "handler: %d\n", error);
 		taskqueue_free(que->tq);
 		que->tq = NULL;
 		return (error);
 	}
 
 	return (0);
 }
 
 
 /*********************************************************************
  *
  *  Setup the MSIX Queue Interrupt handlers: 
  *
  **********************************************************************/
 static int
 igb_allocate_msix(struct adapter *adapter)
 {
 	device_t		dev = adapter->dev;
 	struct igb_queue	*que = adapter->queues;
 	int			error, rid, vector = 0;
 	int			cpu_id = 0;
 
 	/* Be sure to start with all interrupts disabled */
 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
 	E1000_WRITE_FLUSH(&adapter->hw);
 
 #ifdef	RSS
 	/*
 	 * If we're doing RSS, the number of queues needs to
 	 * match the number of RSS buckets that are configured.
 	 *
 	 * + If there's more queues than RSS buckets, we'll end
 	 *   up with queues that get no traffic.
 	 *
 	 * + If there's more RSS buckets than queues, we'll end
 	 *   up having multiple RSS buckets map to the same queue,
 	 *   so there'll be some contention.
 	 */
 	if (adapter->num_queues != rss_getnumbuckets()) {
 		device_printf(dev,
 		    "%s: number of queues (%d) != number of RSS buckets (%d)"
 		    "; performance will be impacted.\n",
 		    __func__,
 		    adapter->num_queues,
 		    rss_getnumbuckets());
 	}
 #endif
 
 	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
 		rid = vector +1;
 		que->res = bus_alloc_resource_any(dev,
 		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
 		if (que->res == NULL) {
 			device_printf(dev,
 			    "Unable to allocate bus resource: "
 			    "MSIX Queue Interrupt\n");
 			return (ENXIO);
 		}
 		error = bus_setup_intr(dev, que->res,
 	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
 		    igb_msix_que, que, &que->tag);
 		if (error) {
 			que->res = NULL;
 			device_printf(dev, "Failed to register Queue handler");
 			return (error);
 		}
 #if __FreeBSD_version >= 800504
 		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
 #endif
 		que->msix = vector;
 		if (adapter->hw.mac.type == e1000_82575)
 			que->eims = E1000_EICR_TX_QUEUE0 << i;
 		else
 			que->eims = 1 << vector;
 
 #ifdef	RSS
 		/*
 		 * The queue ID is used as the RSS layer bucket ID.
 		 * We look up the queue ID -> RSS CPU ID and select
 		 * that.
 		 */
 		cpu_id = rss_getcpu(i % rss_getnumbuckets());
 #else
 		/*
 		 * Bind the msix vector, and thus the
 		 * rings to the corresponding cpu.
 		 *
 		 * This just happens to match the default RSS round-robin
 		 * bucket -> queue -> CPU allocation.
 		 */
 		if (adapter->num_queues > 1) {
 			if (igb_last_bind_cpu < 0)
 				igb_last_bind_cpu = CPU_FIRST();
 			cpu_id = igb_last_bind_cpu;
 		}
 #endif
 
 		if (adapter->num_queues > 1) {
 			bus_bind_intr(dev, que->res, cpu_id);
 #ifdef	RSS
 			device_printf(dev,
 				"Bound queue %d to RSS bucket %d\n",
 				i, cpu_id);
 #else
 			device_printf(dev,
 				"Bound queue %d to cpu %d\n",
 				i, cpu_id);
 #endif
 		}
 
 #ifndef IGB_LEGACY_TX
 		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
 		    que->txr);
 #endif
 		/* Make tasklet for deferred handling */
 		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
 		que->tq = taskqueue_create("igb_que", M_NOWAIT,
 		    taskqueue_thread_enqueue, &que->tq);
 		if (adapter->num_queues > 1) {
 			/*
 			 * Only pin the taskqueue thread to a CPU if
 			 * RSS is in use.
 			 *
 			 * This again just happens to match the default RSS
 			 * round-robin bucket -> queue -> CPU allocation.
 			 */
 #ifdef	RSS
 			taskqueue_start_threads_pinned(&que->tq, 1, PI_NET,
 			    cpu_id,
 			    "%s que (bucket %d)",
 			    device_get_nameunit(adapter->dev),
 			    cpu_id);
 #else
 			taskqueue_start_threads(&que->tq, 1, PI_NET,
 			    "%s que (qid %d)",
 			    device_get_nameunit(adapter->dev),
 			    cpu_id);
 #endif
 		} else {
 			taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
 			    device_get_nameunit(adapter->dev));
 		}
 
 		/* Finally update the last bound CPU id */
 		if (adapter->num_queues > 1)
 			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
 	}
 
 	/* And Link */
 	rid = vector + 1;
 	adapter->res = bus_alloc_resource_any(dev,
 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
 	if (adapter->res == NULL) {
 		device_printf(dev,
 		    "Unable to allocate bus resource: "
 		    "MSIX Link Interrupt\n");
 		return (ENXIO);
 	}
 	if ((error = bus_setup_intr(dev, adapter->res,
 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
 	    igb_msix_link, adapter, &adapter->tag)) != 0) {
 		device_printf(dev, "Failed to register Link handler");
 		return (error);
 	}
 #if __FreeBSD_version >= 800504
 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
 #endif
 	adapter->linkvec = vector;
 
 	return (0);
 }
 
 
 static void
 igb_configure_queues(struct adapter *adapter)
 {
 	struct	e1000_hw	*hw = &adapter->hw;
 	struct	igb_queue	*que;
 	u32			tmp, ivar = 0, newitr = 0;
 
 	/* First turn on RSS capability */
 	if (adapter->hw.mac.type != e1000_82575)
 		E1000_WRITE_REG(hw, E1000_GPIE,
 		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
 		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
 
 	/* Turn on MSIX */
 	switch (adapter->hw.mac.type) {
 	case e1000_82580:
 	case e1000_i350:
 	case e1000_i354:
 	case e1000_i210:
 	case e1000_i211:
 	case e1000_vfadapt:
 	case e1000_vfadapt_i350:
 		/* RX entries */
 		for (int i = 0; i < adapter->num_queues; i++) {
 			u32 index = i >> 1;
 			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
 			que = &adapter->queues[i];
 			if (i & 1) {
 				ivar &= 0xFF00FFFF;
 				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
 			} else {
 				ivar &= 0xFFFFFF00;
 				ivar |= que->msix | E1000_IVAR_VALID;
 			}
 			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
 		}
 		/* TX entries */
 		for (int i = 0; i < adapter->num_queues; i++) {
 			u32 index = i >> 1;
 			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
 			que = &adapter->queues[i];
 			if (i & 1) {
 				ivar &= 0x00FFFFFF;
 				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
 			} else {
 				ivar &= 0xFFFF00FF;
 				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
 			}
 			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
 			adapter->que_mask |= que->eims;
 		}
 
 		/* And for the link interrupt */
 		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
 		adapter->link_mask = 1 << adapter->linkvec;
 		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
 		break;
 	case e1000_82576:
 		/* RX entries */
 		for (int i = 0; i < adapter->num_queues; i++) {
 			u32 index = i & 0x7; /* Each IVAR has two entries */
 			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
 			que = &adapter->queues[i];
 			if (i < 8) {
 				ivar &= 0xFFFFFF00;
 				ivar |= que->msix | E1000_IVAR_VALID;
 			} else {
 				ivar &= 0xFF00FFFF;
 				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
 			}
 			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
 			adapter->que_mask |= que->eims;
 		}
 		/* TX entries */
 		for (int i = 0; i < adapter->num_queues; i++) {
 			u32 index = i & 0x7; /* Each IVAR has two entries */
 			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
 			que = &adapter->queues[i];
 			if (i < 8) {
 				ivar &= 0xFFFF00FF;
 				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
 			} else {
 				ivar &= 0x00FFFFFF;
 				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
 			}
 			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
 			adapter->que_mask |= que->eims;
 		}
 
 		/* And for the link interrupt */
 		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
 		adapter->link_mask = 1 << adapter->linkvec;
 		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
 		break;
 
 	case e1000_82575:
                 /* enable MSI-X support*/
 		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
                 tmp |= E1000_CTRL_EXT_PBA_CLR;
                 /* Auto-Mask interrupts upon ICR read. */
                 tmp |= E1000_CTRL_EXT_EIAME;
                 tmp |= E1000_CTRL_EXT_IRCA;
                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
 
 		/* Queues */
 		for (int i = 0; i < adapter->num_queues; i++) {
 			que = &adapter->queues[i];
 			tmp = E1000_EICR_RX_QUEUE0 << i;
 			tmp |= E1000_EICR_TX_QUEUE0 << i;
 			que->eims = tmp;
 			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
 			    i, que->eims);
 			adapter->que_mask |= que->eims;
 		}
 
 		/* Link */
 		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
 		    E1000_EIMS_OTHER);
 		adapter->link_mask |= E1000_EIMS_OTHER;
 	default:
 		break;
 	}
 
 	/* Set the starting interrupt rate */
 	if (igb_max_interrupt_rate > 0)
 		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
 
         if (hw->mac.type == e1000_82575)
                 newitr |= newitr << 16;
         else
                 newitr |= E1000_EITR_CNT_IGNR;
 
 	for (int i = 0; i < adapter->num_queues; i++) {
 		que = &adapter->queues[i];
 		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
 	}
 
 	return;
 }
 
 
 static void
 igb_free_pci_resources(struct adapter *adapter)
 {
 	struct		igb_queue *que = adapter->queues;
 	device_t	dev = adapter->dev;
 	int		rid;
 
 	/*
 	** There is a slight possibility of a failure mode
 	** in attach that will result in entering this function
 	** before interrupt resources have been initialized, and
 	** in that case we do not want to execute the loops below
 	** We can detect this reliably by the state of the adapter
 	** res pointer.
 	*/
 	if (adapter->res == NULL)
 		goto mem;
 
 	/*
 	 * First release all the interrupt resources:
 	 */
 	for (int i = 0; i < adapter->num_queues; i++, que++) {
 		rid = que->msix + 1;
 		if (que->tag != NULL) {
 			bus_teardown_intr(dev, que->res, que->tag);
 			que->tag = NULL;
 		}
 		if (que->res != NULL)
 			bus_release_resource(dev,
 			    SYS_RES_IRQ, rid, que->res);
 	}
 
 	/* Clean the Legacy or Link interrupt last */
 	if (adapter->linkvec) /* we are doing MSIX */
 		rid = adapter->linkvec + 1;
 	else
 		(adapter->msix != 0) ? (rid = 1):(rid = 0);
 
 	que = adapter->queues;
 	if (adapter->tag != NULL) {
 		taskqueue_drain(que->tq, &adapter->link_task);
 		bus_teardown_intr(dev, adapter->res, adapter->tag);
 		adapter->tag = NULL;
 	}
 	if (adapter->res != NULL)
 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
 
 	for (int i = 0; i < adapter->num_queues; i++, que++) {
 		if (que->tq != NULL) {
 #ifndef IGB_LEGACY_TX
 			taskqueue_drain(que->tq, &que->txr->txq_task);
 #endif
 			taskqueue_drain(que->tq, &que->que_task);
 			taskqueue_free(que->tq);
 		}
 	}
 mem:
 	if (adapter->msix)
 		pci_release_msi(dev);
 
 	if (adapter->msix_mem != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    adapter->memrid, adapter->msix_mem);
 
 	if (adapter->pci_mem != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    PCIR_BAR(0), adapter->pci_mem);
 
 }
 
 /*
  * Setup Either MSI/X or MSI
  */
 static int
 igb_setup_msix(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	int		bar, want, queues, msgs, maxqueues;
 
 	/* tuneable override */
 	if (igb_enable_msix == 0)
 		goto msi;
 
 	/* First try MSI/X */
 	msgs = pci_msix_count(dev); 
 	if (msgs == 0)
 		goto msi;
 	/*
 	** Some new devices, as with ixgbe, now may
 	** use a different BAR, so we need to keep
 	** track of which is used.
 	*/
 	adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
 	bar = pci_read_config(dev, adapter->memrid, 4);
 	if (bar == 0) /* use next bar */
 		adapter->memrid += 4;
 	adapter->msix_mem = bus_alloc_resource_any(dev,
 	    SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
        	if (adapter->msix_mem == NULL) {
 		/* May not be enabled */
 		device_printf(adapter->dev,
 		    "Unable to map MSIX table \n");
 		goto msi;
 	}
 
 	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
 
 	/* Override via tuneable */
 	if (igb_num_queues != 0)
 		queues = igb_num_queues;
 
 #ifdef	RSS
 	/* If we're doing RSS, clamp at the number of RSS buckets */
 	if (queues > rss_getnumbuckets())
 		queues = rss_getnumbuckets();
 #endif
 
 
 	/* Sanity check based on HW */
 	switch (adapter->hw.mac.type) {
 		case e1000_82575:
 			maxqueues = 4;
 			break;
 		case e1000_82576:
 		case e1000_82580:
 		case e1000_i350:
 		case e1000_i354:
 			maxqueues = 8;
 			break;
 		case e1000_i210:
 			maxqueues = 4;
 			break;
 		case e1000_i211:
 			maxqueues = 2;
 			break;
 		default:  /* VF interfaces */
 			maxqueues = 1;
 			break;
 	}
 
 	/* Final clamp on the actual hardware capability */
 	if (queues > maxqueues)
 		queues = maxqueues;
 
 	/*
 	** One vector (RX/TX pair) per queue
 	** plus an additional for Link interrupt
 	*/
 	want = queues + 1;
 	if (msgs >= want)
 		msgs = want;
 	else {
                	device_printf(adapter->dev,
 		    "MSIX Configuration Problem, "
 		    "%d vectors configured, but %d queues wanted!\n",
 		    msgs, want);
 		goto msi;
 	}
 	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
                	device_printf(adapter->dev,
 		    "Using MSIX interrupts with %d vectors\n", msgs);
 		adapter->num_queues = queues;
 		return (msgs);
 	}
 	/*
 	** If MSIX alloc failed or provided us with
 	** less than needed, free and fall through to MSI
 	*/
 	pci_release_msi(dev);
 
 msi:
        	if (adapter->msix_mem != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
 		adapter->msix_mem = NULL;
 	}
        	msgs = 1;
 	if (pci_alloc_msi(dev, &msgs) == 0) {
 		device_printf(adapter->dev," Using an MSI interrupt\n");
 		return (msgs);
 	}
 	device_printf(adapter->dev," Using a Legacy interrupt\n");
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Initialize the DMA Coalescing feature
  *
  **********************************************************************/
 static void
 igb_init_dmac(struct adapter *adapter, u32 pba)
 {
 	device_t	dev = adapter->dev;
 	struct e1000_hw *hw = &adapter->hw;
 	u32 		dmac, reg = ~E1000_DMACR_DMAC_EN;
 	u16		hwm;
 
 	if (hw->mac.type == e1000_i211)
 		return;
 
 	if (hw->mac.type > e1000_82580) {
 
 		if (adapter->dmac == 0) { /* Disabling it */
 			E1000_WRITE_REG(hw, E1000_DMACR, reg);
 			return;
 		} else
 			device_printf(dev, "DMA Coalescing enabled\n");
 
 		/* Set starting threshold */
 		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
 
 		hwm = 64 * pba - adapter->max_frame_size / 16;
 		if (hwm < 64 * (pba - 6))
 			hwm = 64 * (pba - 6);
 		reg = E1000_READ_REG(hw, E1000_FCRTC);
 		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
 		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
 		    & E1000_FCRTC_RTH_COAL_MASK);
 		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
 
 
 		dmac = pba - adapter->max_frame_size / 512;
 		if (dmac < pba - 10)
 			dmac = pba - 10;
 		reg = E1000_READ_REG(hw, E1000_DMACR);
 		reg &= ~E1000_DMACR_DMACTHR_MASK;
 		reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
 		    & E1000_DMACR_DMACTHR_MASK);
 
 		/* transition to L0x or L1 if available..*/
 		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
 
 		/* Check if status is 2.5Gb backplane connection
 		* before configuration of watchdog timer, which is
 		* in msec values in 12.8usec intervals
 		* watchdog timer= msec values in 32usec intervals
 		* for non 2.5Gb connection
 		*/
 		if (hw->mac.type == e1000_i354) {
 			int status = E1000_READ_REG(hw, E1000_STATUS);
 			if ((status & E1000_STATUS_2P5_SKU) &&
 			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
 				reg |= ((adapter->dmac * 5) >> 6);
 			else
 				reg |= (adapter->dmac >> 5);
 		} else {
 			reg |= (adapter->dmac >> 5);
 		}
 
 		E1000_WRITE_REG(hw, E1000_DMACR, reg);
 
 #ifdef I210_OBFF_SUPPORT
 		/*
 		 * Set the OBFF Rx threshold to DMA Coalescing Rx
 		 * threshold - 2KB and enable the feature in the
 		 * hardware for I210.
 		 */
 		if (hw->mac.type == e1000_i210) {
 			int obff = dmac - 2;
 			reg = E1000_READ_REG(hw, E1000_DOBFFCTL);
 			reg &= ~E1000_DOBFFCTL_OBFFTHR_MASK;
 			reg |= (obff & E1000_DOBFFCTL_OBFFTHR_MASK)
 			    | E1000_DOBFFCTL_EXIT_ACT_MASK;
 			E1000_WRITE_REG(hw, E1000_DOBFFCTL, reg);
 		}
 #endif
 		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
 
 		/* Set the interval before transition */
 		reg = E1000_READ_REG(hw, E1000_DMCTLX);
 		if (hw->mac.type == e1000_i350)
 			reg |= IGB_DMCTLX_DCFLUSH_DIS;
 		/*
 		** in 2.5Gb connection, TTLX unit is 0.4 usec
 		** which is 0x4*2 = 0xA. But delay is still 4 usec
 		*/
 		if (hw->mac.type == e1000_i354) {
 			int status = E1000_READ_REG(hw, E1000_STATUS);
 			if ((status & E1000_STATUS_2P5_SKU) &&
 			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
 				reg |= 0xA;
 			else
 				reg |= 0x4;
 		} else {
 			reg |= 0x4;
 		}
 
 		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
 
 		/* free space in tx packet buffer to wake from DMA coal */
 		E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
 		    (2 * adapter->max_frame_size)) >> 6);
 
 		/* make low power state decision controlled by DMA coal */
 		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
 		reg &= ~E1000_PCIEMISC_LX_DECISION;
 		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
 
 	} else if (hw->mac.type == e1000_82580) {
 		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
 		E1000_WRITE_REG(hw, E1000_PCIEMISC,
 		    reg & ~E1000_PCIEMISC_LX_DECISION);
 		E1000_WRITE_REG(hw, E1000_DMACR, 0);
 	}
 }
 
 
 /*********************************************************************
  *
  *  Set up an fresh starting state
  *
  **********************************************************************/
 static void
 igb_reset(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	struct e1000_hw *hw = &adapter->hw;
 	struct e1000_fc_info *fc = &hw->fc;
 	struct ifnet	*ifp = adapter->ifp;
 	u32		pba = 0;
 	u16		hwm;
 
 	INIT_DEBUGOUT("igb_reset: begin");
 
 	/* Let the firmware know the OS is in control */
 	igb_get_hw_control(adapter);
 
 	/*
 	 * Packet Buffer Allocation (PBA)
 	 * Writing PBA sets the receive portion of the buffer
 	 * the remainder is used for the transmit buffer.
 	 */
 	switch (hw->mac.type) {
 	case e1000_82575:
 		pba = E1000_PBA_32K;
 		break;
 	case e1000_82576:
 	case e1000_vfadapt:
 		pba = E1000_READ_REG(hw, E1000_RXPBS);
 		pba &= E1000_RXPBS_SIZE_MASK_82576;
 		break;
 	case e1000_82580:
 	case e1000_i350:
 	case e1000_i354:
 	case e1000_vfadapt_i350:
 		pba = E1000_READ_REG(hw, E1000_RXPBS);
 		pba = e1000_rxpbs_adjust_82580(pba);
 		break;
 	case e1000_i210:
 	case e1000_i211:
 		pba = E1000_PBA_34K;
 	default:
 		break;
 	}
 
 	/* Special needs in case of Jumbo frames */
 	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
 		u32 tx_space, min_tx, min_rx;
 		pba = E1000_READ_REG(hw, E1000_PBA);
 		tx_space = pba >> 16;
 		pba &= 0xffff;
 		min_tx = (adapter->max_frame_size +
 		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
 		min_tx = roundup2(min_tx, 1024);
 		min_tx >>= 10;
                 min_rx = adapter->max_frame_size;
                 min_rx = roundup2(min_rx, 1024);
                 min_rx >>= 10;
 		if (tx_space < min_tx &&
 		    ((min_tx - tx_space) < pba)) {
 			pba = pba - (min_tx - tx_space);
 			/*
                          * if short on rx space, rx wins
                          * and must trump tx adjustment
 			 */
                         if (pba < min_rx)
                                 pba = min_rx;
 		}
 		E1000_WRITE_REG(hw, E1000_PBA, pba);
 	}
 
 	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
 
 	/*
 	 * These parameters control the automatic generation (Tx) and
 	 * response (Rx) to Ethernet PAUSE frames.
 	 * - High water mark should allow for at least two frames to be
 	 *   received after sending an XOFF.
 	 * - Low water mark works best when it is very near the high water mark.
 	 *   This allows the receiver to restart by sending XON when it has
 	 *   drained a bit.
 	 */
 	hwm = min(((pba << 10) * 9 / 10),
 	    ((pba << 10) - 2 * adapter->max_frame_size));
 
 	if (hw->mac.type < e1000_82576) {
 		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
 		fc->low_water = fc->high_water - 8;
 	} else {
 		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
 		fc->low_water = fc->high_water - 16;
 	}
 
 	fc->pause_time = IGB_FC_PAUSE_TIME;
 	fc->send_xon = TRUE;
 	if (adapter->fc)
 		fc->requested_mode = adapter->fc;
 	else
 		fc->requested_mode = e1000_fc_default;
 
 	/* Issue a global reset */
 	e1000_reset_hw(hw);
 	E1000_WRITE_REG(hw, E1000_WUC, 0);
 
 	/* Reset for AutoMediaDetect */
 	if (adapter->flags & IGB_MEDIA_RESET) {
 		e1000_setup_init_funcs(hw, TRUE);
 		e1000_get_bus_info(hw);
 		adapter->flags &= ~IGB_MEDIA_RESET;
 	}
 
 	if (e1000_init_hw(hw) < 0)
 		device_printf(dev, "Hardware Initialization Failed\n");
 
 	/* Setup DMA Coalescing */
 	igb_init_dmac(adapter, pba);
 
 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
 	e1000_get_phy_info(hw);
 	e1000_check_for_link(hw);
 	return;
 }
 
 /*********************************************************************
  *
  *  Setup networking device structure and register an interface.
  *
  **********************************************************************/
 static int
 igb_setup_interface(device_t dev, struct adapter *adapter)
 {
 	struct ifnet   *ifp;
 
 	INIT_DEBUGOUT("igb_setup_interface: begin");
 
 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "can not allocate ifnet structure\n");
 		return (-1);
 	}
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_init =  igb_init;
 	ifp->if_softc = adapter;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = igb_ioctl;
 	ifp->if_get_counter = igb_get_counter;
 #ifndef IGB_LEGACY_TX
 	ifp->if_transmit = igb_mq_start;
 	ifp->if_qflush = igb_qflush;
 #else
 	ifp->if_start = igb_start;
 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
 	IFQ_SET_READY(&ifp->if_snd);
 #endif
 
 	ether_ifattach(ifp, adapter->hw.mac.addr);
 
 	ifp->if_capabilities = ifp->if_capenable = 0;
 
 	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
 	ifp->if_capabilities |= IFCAP_TSO;
 	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
 	ifp->if_capenable = ifp->if_capabilities;
 
 	/* Don't enable LRO by default */
 	ifp->if_capabilities |= IFCAP_LRO;
 
 #ifdef DEVICE_POLLING
 	ifp->if_capabilities |= IFCAP_POLLING;
 #endif
 
 	/*
 	 * Tell the upper layer(s) we
 	 * support full VLAN capability.
 	 */
 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
 			     |  IFCAP_VLAN_HWTSO
 			     |  IFCAP_VLAN_MTU;
 	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
 			  |  IFCAP_VLAN_HWTSO
 			  |  IFCAP_VLAN_MTU;
 
 	/*
 	** Don't turn this on by default, if vlans are
 	** created on another pseudo device (eg. lagg)
 	** then vlan events are not passed thru, breaking
 	** operation, but with HW FILTER off it works. If
 	** using vlans directly on the igb driver you can
 	** enable this and get full hardware tag filtering.
 	*/
 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
 
 	/*
 	 * Specify the media types supported by this adapter and register
 	 * callbacks to update media and link information
 	 */
 	ifmedia_init(&adapter->media, IFM_IMASK,
 	    igb_media_change, igb_media_status);
 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
 			    0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
 	} else {
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
 			    0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
 			    0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
 			    0, NULL);
 		if (adapter->hw.phy.type != e1000_phy_ife) {
 			ifmedia_add(&adapter->media,
 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
 			ifmedia_add(&adapter->media,
 				IFM_ETHER | IFM_1000_T, 0, NULL);
 		}
 	}
 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
 	return (0);
 }
 
 
 /*
  * Manage DMA'able memory.
  */
 static void
 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	if (error)
 		return;
 	*(bus_addr_t *) arg = segs[0].ds_addr;
 }
 
 static int
 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
         struct igb_dma_alloc *dma, int mapflags)
 {
 	int error;
 
 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
 				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				size,			/* maxsize */
 				1,			/* nsegments */
 				size,			/* maxsegsize */
 				0,			/* flags */
 				NULL,			/* lockfunc */
 				NULL,			/* lockarg */
 				&dma->dma_tag);
 	if (error) {
 		device_printf(adapter->dev,
 		    "%s: bus_dma_tag_create failed: %d\n",
 		    __func__, error);
 		goto fail_0;
 	}
 
 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
 	if (error) {
 		device_printf(adapter->dev,
 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
 		    __func__, (uintmax_t)size, error);
 		goto fail_2;
 	}
 
 	dma->dma_paddr = 0;
 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
 	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
 	if (error || dma->dma_paddr == 0) {
 		device_printf(adapter->dev,
 		    "%s: bus_dmamap_load failed: %d\n",
 		    __func__, error);
 		goto fail_3;
 	}
 
 	return (0);
 
 fail_3:
 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
 fail_2:
 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
 	bus_dma_tag_destroy(dma->dma_tag);
 fail_0:
 	dma->dma_tag = NULL;
 
 	return (error);
 }
 
 static void
 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
 {
 	if (dma->dma_tag == NULL)
 		return;
 	if (dma->dma_paddr != 0) {
 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
 		dma->dma_paddr = 0;
 	}
 	if (dma->dma_vaddr != NULL) {
 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
 		dma->dma_vaddr = NULL;
 	}
 	bus_dma_tag_destroy(dma->dma_tag);
 	dma->dma_tag = NULL;
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for the transmit and receive rings, and then
  *  the descriptors associated with each, called only once at attach.
  *
  **********************************************************************/
 static int
 igb_allocate_queues(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 	struct igb_queue	*que = NULL;
 	struct tx_ring		*txr = NULL;
 	struct rx_ring		*rxr = NULL;
 	int rsize, tsize, error = E1000_SUCCESS;
 	int txconf = 0, rxconf = 0;
 
 	/* First allocate the top level queue structs */
 	if (!(adapter->queues =
 	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate queue memory\n");
 		error = ENOMEM;
 		goto fail;
 	}
 
 	/* Next allocate the TX ring struct memory */
 	if (!(adapter->tx_rings =
 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate TX ring memory\n");
 		error = ENOMEM;
 		goto tx_fail;
 	}
 
 	/* Now allocate the RX */
 	if (!(adapter->rx_rings =
 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate RX ring memory\n");
 		error = ENOMEM;
 		goto rx_fail;
 	}
 
 	tsize = roundup2(adapter->num_tx_desc *
 	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
 	/*
 	 * Now set up the TX queues, txconf is needed to handle the
 	 * possibility that things fail midcourse and we need to
 	 * undo memory gracefully
 	 */ 
 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
 		/* Set up some basics */
 		txr = &adapter->tx_rings[i];
 		txr->adapter = adapter;
 		txr->me = i;
 		txr->num_desc = adapter->num_tx_desc;
 
 		/* Initialize the TX lock */
 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
 		    device_get_nameunit(dev), txr->me);
 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
 
 		if (igb_dma_malloc(adapter, tsize,
 			&txr->txdma, BUS_DMA_NOWAIT)) {
 			device_printf(dev,
 			    "Unable to allocate TX Descriptor memory\n");
 			error = ENOMEM;
 			goto err_tx_desc;
 		}
 		txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
 		bzero((void *)txr->tx_base, tsize);
 
         	/* Now allocate transmit buffers for the ring */
         	if (igb_allocate_transmit_buffers(txr)) {
 			device_printf(dev,
 			    "Critical Failure setting up transmit buffers\n");
 			error = ENOMEM;
 			goto err_tx_desc;
         	}
 #ifndef IGB_LEGACY_TX
 		/* Allocate a buf ring */
 		txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
 		    M_WAITOK, &txr->tx_mtx);
 #endif
 	}
 
 	/*
 	 * Next the RX queues...
 	 */ 
 	rsize = roundup2(adapter->num_rx_desc *
 	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
 		rxr = &adapter->rx_rings[i];
 		rxr->adapter = adapter;
 		rxr->me = i;
 
 		/* Initialize the RX lock */
 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
 		    device_get_nameunit(dev), txr->me);
 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
 
 		if (igb_dma_malloc(adapter, rsize,
 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
 			device_printf(dev,
 			    "Unable to allocate RxDescriptor memory\n");
 			error = ENOMEM;
 			goto err_rx_desc;
 		}
 		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
 		bzero((void *)rxr->rx_base, rsize);
 
         	/* Allocate receive buffers for the ring*/
 		if (igb_allocate_receive_buffers(rxr)) {
 			device_printf(dev,
 			    "Critical Failure setting up receive buffers\n");
 			error = ENOMEM;
 			goto err_rx_desc;
 		}
 	}
 
 	/*
 	** Finally set up the queue holding structs
 	*/
 	for (int i = 0; i < adapter->num_queues; i++) {
 		que = &adapter->queues[i];
 		que->adapter = adapter;
 		que->txr = &adapter->tx_rings[i];
 		que->rxr = &adapter->rx_rings[i];
 	}
 
 	return (0);
 
 err_rx_desc:
 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
 		igb_dma_free(adapter, &rxr->rxdma);
 err_tx_desc:
 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
 		igb_dma_free(adapter, &txr->txdma);
 	free(adapter->rx_rings, M_DEVBUF);
 rx_fail:
 #ifndef IGB_LEGACY_TX
 	buf_ring_free(txr->br, M_DEVBUF);
 #endif
 	free(adapter->tx_rings, M_DEVBUF);
 tx_fail:
 	free(adapter->queues, M_DEVBUF);
 fail:
 	return (error);
 }
 
 /*********************************************************************
  *
  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
  *  the information needed to transmit a packet on the wire. This is
  *  called only once at attach, setup is done every reset.
  *
  **********************************************************************/
 static int
 igb_allocate_transmit_buffers(struct tx_ring *txr)
 {
 	struct adapter *adapter = txr->adapter;
 	device_t dev = adapter->dev;
 	struct igb_tx_buf *txbuf;
 	int error, i;
 
 	/*
 	 * Setup DMA descriptor areas.
 	 */
 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
 			       1, 0,			/* alignment, bounds */
 			       BUS_SPACE_MAXADDR,	/* lowaddr */
 			       BUS_SPACE_MAXADDR,	/* highaddr */
 			       NULL, NULL,		/* filter, filterarg */
 			       IGB_TSO_SIZE,		/* maxsize */
 			       IGB_MAX_SCATTER,		/* nsegments */
 			       PAGE_SIZE,		/* maxsegsize */
 			       0,			/* flags */
 			       NULL,			/* lockfunc */
 			       NULL,			/* lockfuncarg */
 			       &txr->txtag))) {
 		device_printf(dev,"Unable to allocate TX DMA tag\n");
 		goto fail;
 	}
 
 	if (!(txr->tx_buffers =
 	    (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
 		error = ENOMEM;
 		goto fail;
 	}
 
         /* Create the descriptor buffer dma maps */
 	txbuf = txr->tx_buffers;
 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
 		if (error != 0) {
 			device_printf(dev, "Unable to create TX DMA map\n");
 			goto fail;
 		}
 	}
 
 	return 0;
 fail:
 	/* We free all, it handles case where we are in the middle */
 	igb_free_transmit_structures(adapter);
 	return (error);
 }
 
 /*********************************************************************
  *
  *  Initialize a transmit ring.
  *
  **********************************************************************/
 static void
 igb_setup_transmit_ring(struct tx_ring *txr)
 {
 	struct adapter *adapter = txr->adapter;
 	struct igb_tx_buf *txbuf;
 	int i;
 #ifdef DEV_NETMAP
 	struct netmap_adapter *na = NA(adapter->ifp);
 	struct netmap_slot *slot;
 #endif /* DEV_NETMAP */
 
 	/* Clear the old descriptor contents */
 	IGB_TX_LOCK(txr);
 #ifdef DEV_NETMAP
 	slot = netmap_reset(na, NR_TX, txr->me, 0);
 #endif /* DEV_NETMAP */
 	bzero((void *)txr->tx_base,
 	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
 	/* Reset indices */
 	txr->next_avail_desc = 0;
 	txr->next_to_clean = 0;
 
 	/* Free any existing tx buffers. */
         txbuf = txr->tx_buffers;
 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
 		if (txbuf->m_head != NULL) {
 			bus_dmamap_sync(txr->txtag, txbuf->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(txr->txtag, txbuf->map);
 			m_freem(txbuf->m_head);
 			txbuf->m_head = NULL;
 		}
 #ifdef DEV_NETMAP
 		if (slot) {
 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
 			/* no need to set the address */
 			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
 		}
 #endif /* DEV_NETMAP */
 		/* clear the watch index */
 		txbuf->eop = NULL;
         }
 
 	/* Set number of descriptors available */
 	txr->tx_avail = adapter->num_tx_desc;
 
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	IGB_TX_UNLOCK(txr);
 }
 
 /*********************************************************************
  *
  *  Initialize all transmit rings.
  *
  **********************************************************************/
 static void
 igb_setup_transmit_structures(struct adapter *adapter)
 {
 	struct tx_ring *txr = adapter->tx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++)
 		igb_setup_transmit_ring(txr);
 
 	return;
 }
 
 /*********************************************************************
  *
  *  Enable transmit unit.
  *
  **********************************************************************/
 static void
 igb_initialize_transmit_units(struct adapter *adapter)
 {
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct e1000_hw *hw = &adapter->hw;
 	u32		tctl, txdctl;
 
 	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
 	tctl = txdctl = 0;
 
 	/* Setup the Tx Descriptor Rings */
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		u64 bus_addr = txr->txdma.dma_paddr;
 
 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
 		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
 		    (uint32_t)(bus_addr >> 32));
 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
 		    (uint32_t)bus_addr);
 
 		/* Setup the HW Tx Head and Tail descriptor pointers */
 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
 
 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
 		    E1000_READ_REG(hw, E1000_TDBAL(i)),
 		    E1000_READ_REG(hw, E1000_TDLEN(i)));
 
 		txr->queue_status = IGB_QUEUE_IDLE;
 
 		txdctl |= IGB_TX_PTHRESH;
 		txdctl |= IGB_TX_HTHRESH << 8;
 		txdctl |= IGB_TX_WTHRESH << 16;
 		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
 		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
 	}
 
 	if (adapter->vf_ifp)
 		return;
 
 	e1000_config_collision_dist(hw);
 
 	/* Program the Transmit Control Register */
 	tctl = E1000_READ_REG(hw, E1000_TCTL);
 	tctl &= ~E1000_TCTL_CT;
 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
 
 	/* This write will effectively turn on the transmit unit. */
 	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
 }
 
 /*********************************************************************
  *
  *  Free all transmit rings.
  *
  **********************************************************************/
 static void
 igb_free_transmit_structures(struct adapter *adapter)
 {
 	struct tx_ring *txr = adapter->tx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		IGB_TX_LOCK(txr);
 		igb_free_transmit_buffers(txr);
 		igb_dma_free(adapter, &txr->txdma);
 		IGB_TX_UNLOCK(txr);
 		IGB_TX_LOCK_DESTROY(txr);
 	}
 	free(adapter->tx_rings, M_DEVBUF);
 }
 
 /*********************************************************************
  *
  *  Free transmit ring related data structures.
  *
  **********************************************************************/
 static void
 igb_free_transmit_buffers(struct tx_ring *txr)
 {
 	struct adapter *adapter = txr->adapter;
 	struct igb_tx_buf *tx_buffer;
 	int             i;
 
 	INIT_DEBUGOUT("free_transmit_ring: begin");
 
 	if (txr->tx_buffers == NULL)
 		return;
 
 	tx_buffer = txr->tx_buffers;
 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
 		if (tx_buffer->m_head != NULL) {
 			bus_dmamap_sync(txr->txtag, tx_buffer->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(txr->txtag,
 			    tx_buffer->map);
 			m_freem(tx_buffer->m_head);
 			tx_buffer->m_head = NULL;
 			if (tx_buffer->map != NULL) {
 				bus_dmamap_destroy(txr->txtag,
 				    tx_buffer->map);
 				tx_buffer->map = NULL;
 			}
 		} else if (tx_buffer->map != NULL) {
 			bus_dmamap_unload(txr->txtag,
 			    tx_buffer->map);
 			bus_dmamap_destroy(txr->txtag,
 			    tx_buffer->map);
 			tx_buffer->map = NULL;
 		}
 	}
 #ifndef IGB_LEGACY_TX
 	if (txr->br != NULL)
 		buf_ring_free(txr->br, M_DEVBUF);
 #endif
 	if (txr->tx_buffers != NULL) {
 		free(txr->tx_buffers, M_DEVBUF);
 		txr->tx_buffers = NULL;
 	}
 	if (txr->txtag != NULL) {
 		bus_dma_tag_destroy(txr->txtag);
 		txr->txtag = NULL;
 	}
 	return;
 }
 
 /**********************************************************************
  *
  *  Setup work for hardware segmentation offload (TSO) on
  *  adapters using advanced tx descriptors
  *
  **********************************************************************/
 static int
 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
     u32 *cmd_type_len, u32 *olinfo_status)
 {
 	struct adapter *adapter = txr->adapter;
 	struct e1000_adv_tx_context_desc *TXD;
 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
 	u32 mss_l4len_idx = 0, paylen;
 	u16 vtag = 0, eh_type;
 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
 	struct ether_vlan_header *eh;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 #ifdef INET
 	struct ip *ip;
 #endif
 	struct tcphdr *th;
 
 
 	/*
 	 * Determine where frame payload starts.
 	 * Jump over vlan headers if already present
 	 */
 	eh = mtod(mp, struct ether_vlan_header *);
 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 		eh_type = eh->evl_proto;
 	} else {
 		ehdrlen = ETHER_HDR_LEN;
 		eh_type = eh->evl_encap_proto;
 	}
 
 	switch (ntohs(eh_type)) {
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
 		if (ip6->ip6_nxt != IPPROTO_TCP)
 			return (ENXIO);
 		ip_hlen = sizeof(struct ip6_hdr);
 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
 		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
 		break;
 #endif
 #ifdef INET
 	case ETHERTYPE_IP:
 		ip = (struct ip *)(mp->m_data + ehdrlen);
 		if (ip->ip_p != IPPROTO_TCP)
 			return (ENXIO);
 		ip->ip_sum = 0;
 		ip_hlen = ip->ip_hl << 2;
 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
 		/* Tell transmit desc to also do IPv4 checksum. */
 		*olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
 		break;
 #endif
 	default:
 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
 		    __func__, ntohs(eh_type));
 		break;
 	}
 
 	ctxd = txr->next_avail_desc;
 	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
 
 	tcp_hlen = th->th_off << 2;
 
 	/* This is used in the transmit desc in encap */
 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
 
 	/* VLAN MACLEN IPLEN */
 	if (mp->m_flags & M_VLANTAG) {
 		vtag = htole16(mp->m_pkthdr.ether_vtag);
                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
 	}
 
 	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
 	vlan_macip_lens |= ip_hlen;
 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
 
 	/* ADV DTYPE TUCMD */
 	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
 	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
 
 	/* MSS L4LEN IDX */
 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
 	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
 	/* 82575 needs the queue index added */
 	if (adapter->hw.mac.type == e1000_82575)
 		mss_l4len_idx |= txr->me << 4;
 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
 
 	TXD->seqnum_seed = htole32(0);
 
 	if (++ctxd == txr->num_desc)
 		ctxd = 0;
 
 	txr->tx_avail--;
 	txr->next_avail_desc = ctxd;
 	*cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
 	*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
 	*olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
 	++txr->tso_tx;
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
  *
  **********************************************************************/
 
 static int
 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
     u32 *cmd_type_len, u32 *olinfo_status)
 {
 	struct e1000_adv_tx_context_desc *TXD;
 	struct adapter *adapter = txr->adapter;
 	struct ether_vlan_header *eh;
 	struct ip *ip;
 	struct ip6_hdr *ip6;
 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
 	int	ehdrlen, ip_hlen = 0;
 	u16	etype;
 	u8	ipproto = 0;
 	int	offload = TRUE;
 	int	ctxd = txr->next_avail_desc;
 	u16	vtag = 0;
 
 	/* First check if TSO is to be used */
 	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
 		return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
 
 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
 		offload = FALSE;
 
 	/* Indicate the whole packet as payload when not doing TSO */
        	*olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
 
 	/* Now ready a context descriptor */
 	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
 
 	/*
 	** In advanced descriptors the vlan tag must 
 	** be placed into the context descriptor. Hence
 	** we need to make one even if not doing offloads.
 	*/
 	if (mp->m_flags & M_VLANTAG) {
 		vtag = htole16(mp->m_pkthdr.ether_vtag);
 		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
 	} else if (offload == FALSE) /* ... no offload to do */
 		return (0);
 
 	/*
 	 * Determine where frame payload starts.
 	 * Jump over vlan headers if already present,
 	 * helpful for QinQ too.
 	 */
 	eh = mtod(mp, struct ether_vlan_header *);
 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 		etype = ntohs(eh->evl_proto);
 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 	} else {
 		etype = ntohs(eh->evl_encap_proto);
 		ehdrlen = ETHER_HDR_LEN;
 	}
 
 	/* Set the ether header length */
 	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
 
 	switch (etype) {
 		case ETHERTYPE_IP:
 			ip = (struct ip *)(mp->m_data + ehdrlen);
 			ip_hlen = ip->ip_hl << 2;
 			ipproto = ip->ip_p;
 			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
 			break;
 		case ETHERTYPE_IPV6:
 			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
 			ip_hlen = sizeof(struct ip6_hdr);
 			/* XXX-BZ this will go badly in case of ext hdrs. */
 			ipproto = ip6->ip6_nxt;
 			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
 			break;
 		default:
 			offload = FALSE;
 			break;
 	}
 
 	vlan_macip_lens |= ip_hlen;
 	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
 
 	switch (ipproto) {
 		case IPPROTO_TCP:
 			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
 				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
 			break;
 		case IPPROTO_UDP:
 			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
 				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
 			break;
 
 #if __FreeBSD_version >= 800000
 		case IPPROTO_SCTP:
 			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
 				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
 			break;
 #endif
 		default:
 			offload = FALSE;
 			break;
 	}
 
 	if (offload) /* For the TX descriptor setup */
 		*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
 
 	/* 82575 needs the queue index added */
 	if (adapter->hw.mac.type == e1000_82575)
 		mss_l4len_idx = txr->me << 4;
 
 	/* Now copy bits into descriptor */
 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
 	TXD->seqnum_seed = htole32(0);
 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
 
 	/* We've consumed the first desc, adjust counters */
 	if (++ctxd == txr->num_desc)
 		ctxd = 0;
 	txr->next_avail_desc = ctxd;
 	--txr->tx_avail;
 
         return (0);
 }
 
 /**********************************************************************
  *
  *  Examine each tx_buffer in the used queue. If the hardware is done
  *  processing the packet then free associated resources. The
  *  tx_buffer is put back on the free queue.
  *
  *  TRUE return means there's work in the ring to clean, FALSE its empty.
  **********************************************************************/
 static bool
 igb_txeof(struct tx_ring *txr)
 {
 	struct adapter		*adapter = txr->adapter;
 	struct ifnet		*ifp = adapter->ifp;
 	u32			work, processed = 0;
 	u16			limit = txr->process_limit;
 	struct igb_tx_buf	*buf;
 	union e1000_adv_tx_desc *txd;
 
 	mtx_assert(&txr->tx_mtx, MA_OWNED);
 
 #ifdef DEV_NETMAP
 	if (netmap_tx_irq(ifp, txr->me))
 		return (FALSE);
 #endif /* DEV_NETMAP */
 
 	if (txr->tx_avail == txr->num_desc) {
 		txr->queue_status = IGB_QUEUE_IDLE;
 		return FALSE;
 	}
 
 	/* Get work starting point */
 	work = txr->next_to_clean;
 	buf = &txr->tx_buffers[work];
 	txd = &txr->tx_base[work];
 	work -= txr->num_desc; /* The distance to ring end */
         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 	do {
 		union e1000_adv_tx_desc *eop = buf->eop;
 		if (eop == NULL) /* No work */
 			break;
 
 		if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
 			break;	/* I/O not complete */
 
 		if (buf->m_head) {
 			txr->bytes +=
 			    buf->m_head->m_pkthdr.len;
 			bus_dmamap_sync(txr->txtag,
 			    buf->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(txr->txtag,
 			    buf->map);
 			m_freem(buf->m_head);
 			buf->m_head = NULL;
 		}
 		buf->eop = NULL;
 		++txr->tx_avail;
 
 		/* We clean the range if multi segment */
 		while (txd != eop) {
 			++txd;
 			++buf;
 			++work;
 			/* wrap the ring? */
 			if (__predict_false(!work)) {
 				work -= txr->num_desc;
 				buf = txr->tx_buffers;
 				txd = txr->tx_base;
 			}
 			if (buf->m_head) {
 				txr->bytes +=
 				    buf->m_head->m_pkthdr.len;
 				bus_dmamap_sync(txr->txtag,
 				    buf->map,
 				    BUS_DMASYNC_POSTWRITE);
 				bus_dmamap_unload(txr->txtag,
 				    buf->map);
 				m_freem(buf->m_head);
 				buf->m_head = NULL;
 			}
 			++txr->tx_avail;
 			buf->eop = NULL;
 
 		}
 		++txr->packets;
 		++processed;
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		txr->watchdog_time = ticks;
 
 		/* Try the next packet */
 		++txd;
 		++buf;
 		++work;
 		/* reset with a wrap */
 		if (__predict_false(!work)) {
 			work -= txr->num_desc;
 			buf = txr->tx_buffers;
 			txd = txr->tx_base;
 		}
 		prefetch(txd);
 	} while (__predict_true(--limit));
 
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	work += txr->num_desc;
 	txr->next_to_clean = work;
 
 	/*
 	** Watchdog calculation, we know there's
 	** work outstanding or the first return
 	** would have been taken, so none processed
 	** for too long indicates a hang.
 	*/
 	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
 		txr->queue_status |= IGB_QUEUE_HUNG;
 
 	if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
 		txr->queue_status &= ~IGB_QUEUE_DEPLETED;	
 
 	if (txr->tx_avail == txr->num_desc) {
 		txr->queue_status = IGB_QUEUE_IDLE;
 		return (FALSE);
 	}
 
 	return (TRUE);
 }
 
 /*********************************************************************
  *
  *  Refresh mbuf buffers for RX descriptor rings
  *   - now keeps its own state so discards due to resource
  *     exhaustion are unnecessary, if an mbuf cannot be obtained
  *     it just returns, keeping its placeholder, thus it can simply
  *     be recalled to try again.
  *
  **********************************************************************/
 static void
 igb_refresh_mbufs(struct rx_ring *rxr, int limit)
 {
 	struct adapter		*adapter = rxr->adapter;
 	bus_dma_segment_t	hseg[1];
 	bus_dma_segment_t	pseg[1];
 	struct igb_rx_buf	*rxbuf;
 	struct mbuf		*mh, *mp;
 	int			i, j, nsegs, error;
 	bool			refreshed = FALSE;
 
 	i = j = rxr->next_to_refresh;
 	/*
 	** Get one descriptor beyond
 	** our work mark to control
 	** the loop.
         */
 	if (++j == adapter->num_rx_desc)
 		j = 0;
 
 	while (j != limit) {
 		rxbuf = &rxr->rx_buffers[i];
 		/* No hdr mbuf used with header split off */
 		if (rxr->hdr_split == FALSE)
 			goto no_split;
 		if (rxbuf->m_head == NULL) {
 			mh = m_gethdr(M_NOWAIT, MT_DATA);
 			if (mh == NULL)
 				goto update;
 		} else
 			mh = rxbuf->m_head;
 
 		mh->m_pkthdr.len = mh->m_len = MHLEN;
 		mh->m_len = MHLEN;
 		mh->m_flags |= M_PKTHDR;
 		/* Get the memory mapping */
 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
 		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0) {
 			printf("Refresh mbufs: hdr dmamap load"
 			    " failure - %d\n", error);
 			m_free(mh);
 			rxbuf->m_head = NULL;
 			goto update;
 		}
 		rxbuf->m_head = mh;
 		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
 		    BUS_DMASYNC_PREREAD);
 		rxr->rx_base[i].read.hdr_addr =
 		    htole64(hseg[0].ds_addr);
 no_split:
 		if (rxbuf->m_pack == NULL) {
 			mp = m_getjcl(M_NOWAIT, MT_DATA,
 			    M_PKTHDR, adapter->rx_mbuf_sz);
 			if (mp == NULL)
 				goto update;
 		} else
 			mp = rxbuf->m_pack;
 
 		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
 		/* Get the memory mapping */
 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
 		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0) {
 			printf("Refresh mbufs: payload dmamap load"
 			    " failure - %d\n", error);
 			m_free(mp);
 			rxbuf->m_pack = NULL;
 			goto update;
 		}
 		rxbuf->m_pack = mp;
 		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
 		    BUS_DMASYNC_PREREAD);
 		rxr->rx_base[i].read.pkt_addr =
 		    htole64(pseg[0].ds_addr);
 		refreshed = TRUE; /* I feel wefreshed :) */
 
 		i = j; /* our next is precalculated */
 		rxr->next_to_refresh = i;
 		if (++j == adapter->num_rx_desc)
 			j = 0;
 	}
 update:
 	if (refreshed) /* update tail */
 		E1000_WRITE_REG(&adapter->hw,
 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
 	return;
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for rx_buffer structures. Since we use one
  *  rx_buffer per received packet, the maximum number of rx_buffer's
  *  that we'll need is equal to the number of receive descriptors
  *  that we've allocated.
  *
  **********************************************************************/
 static int
 igb_allocate_receive_buffers(struct rx_ring *rxr)
 {
 	struct	adapter 	*adapter = rxr->adapter;
 	device_t 		dev = adapter->dev;
 	struct igb_rx_buf	*rxbuf;
 	int             	i, bsize, error;
 
 	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
 	if (!(rxr->rx_buffers =
 	    (struct igb_rx_buf *) malloc(bsize,
 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
 		error = ENOMEM;
 		goto fail;
 	}
 
 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
 				   1, 0,		/* alignment, bounds */
 				   BUS_SPACE_MAXADDR,	/* lowaddr */
 				   BUS_SPACE_MAXADDR,	/* highaddr */
 				   NULL, NULL,		/* filter, filterarg */
 				   MSIZE,		/* maxsize */
 				   1,			/* nsegments */
 				   MSIZE,		/* maxsegsize */
 				   0,			/* flags */
 				   NULL,		/* lockfunc */
 				   NULL,		/* lockfuncarg */
 				   &rxr->htag))) {
 		device_printf(dev, "Unable to create RX DMA tag\n");
 		goto fail;
 	}
 
 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
 				   1, 0,		/* alignment, bounds */
 				   BUS_SPACE_MAXADDR,	/* lowaddr */
 				   BUS_SPACE_MAXADDR,	/* highaddr */
 				   NULL, NULL,		/* filter, filterarg */
 				   MJUM9BYTES,		/* maxsize */
 				   1,			/* nsegments */
 				   MJUM9BYTES,		/* maxsegsize */
 				   0,			/* flags */
 				   NULL,		/* lockfunc */
 				   NULL,		/* lockfuncarg */
 				   &rxr->ptag))) {
 		device_printf(dev, "Unable to create RX payload DMA tag\n");
 		goto fail;
 	}
 
 	for (i = 0; i < adapter->num_rx_desc; i++) {
 		rxbuf = &rxr->rx_buffers[i];
 		error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
 		if (error) {
 			device_printf(dev,
 			    "Unable to create RX head DMA maps\n");
 			goto fail;
 		}
 		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
 		if (error) {
 			device_printf(dev,
 			    "Unable to create RX packet DMA maps\n");
 			goto fail;
 		}
 	}
 
 	return (0);
 
 fail:
 	/* Frees all, but can handle partial completion */
 	igb_free_receive_structures(adapter);
 	return (error);
 }
 
 
 static void
 igb_free_receive_ring(struct rx_ring *rxr)
 {
 	struct	adapter		*adapter = rxr->adapter;
 	struct igb_rx_buf	*rxbuf;
 
 
 	for (int i = 0; i < adapter->num_rx_desc; i++) {
 		rxbuf = &rxr->rx_buffers[i];
 		if (rxbuf->m_head != NULL) {
 			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
 			rxbuf->m_head->m_flags |= M_PKTHDR;
 			m_freem(rxbuf->m_head);
 		}
 		if (rxbuf->m_pack != NULL) {
 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
 			rxbuf->m_pack->m_flags |= M_PKTHDR;
 			m_freem(rxbuf->m_pack);
 		}
 		rxbuf->m_head = NULL;
 		rxbuf->m_pack = NULL;
 	}
 }
 
 
 /*********************************************************************
  *
  *  Initialize a receive ring and its buffers.
  *
  **********************************************************************/
 static int
 igb_setup_receive_ring(struct rx_ring *rxr)
 {
 	struct	adapter		*adapter;
 	struct  ifnet		*ifp;
 	device_t		dev;
 	struct igb_rx_buf	*rxbuf;
 	bus_dma_segment_t	pseg[1], hseg[1];
 	struct lro_ctrl		*lro = &rxr->lro;
 	int			rsize, nsegs, error = 0;
 #ifdef DEV_NETMAP
 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
 	struct netmap_slot *slot;
 #endif /* DEV_NETMAP */
 
 	adapter = rxr->adapter;
 	dev = adapter->dev;
 	ifp = adapter->ifp;
 
 	/* Clear the ring contents */
 	IGB_RX_LOCK(rxr);
 #ifdef DEV_NETMAP
 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
 #endif /* DEV_NETMAP */
 	rsize = roundup2(adapter->num_rx_desc *
 	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
 	bzero((void *)rxr->rx_base, rsize);
 
 	/*
 	** Free current RX buffer structures and their mbufs
 	*/
 	igb_free_receive_ring(rxr);
 
 	/* Configure for header split? */
 	if (igb_header_split)
 		rxr->hdr_split = TRUE;
 
         /* Now replenish the ring mbufs */
 	for (int j = 0; j < adapter->num_rx_desc; ++j) {
 		struct mbuf	*mh, *mp;
 
 		rxbuf = &rxr->rx_buffers[j];
 #ifdef DEV_NETMAP
 		if (slot) {
 			/* slot sj is mapped to the j-th NIC-ring entry */
 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
 			uint64_t paddr;
 			void *addr;
 
 			addr = PNMB(na, slot + sj, &paddr);
 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
 			/* Update descriptor */
 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
 			continue;
 		}
 #endif /* DEV_NETMAP */
 		if (rxr->hdr_split == FALSE)
 			goto skip_head;
 
 		/* First the header */
 		rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
 		if (rxbuf->m_head == NULL) {
 			error = ENOBUFS;
                         goto fail;
 		}
 		m_adj(rxbuf->m_head, ETHER_ALIGN);
 		mh = rxbuf->m_head;
 		mh->m_len = mh->m_pkthdr.len = MHLEN;
 		mh->m_flags |= M_PKTHDR;
 		/* Get the memory mapping */
 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
 		    rxbuf->hmap, rxbuf->m_head, hseg,
 		    &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0) /* Nothing elegant to do here */
                         goto fail;
 		bus_dmamap_sync(rxr->htag,
 		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
 		/* Update descriptor */
 		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
 
 skip_head:
 		/* Now the payload cluster */
 		rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
 		    M_PKTHDR, adapter->rx_mbuf_sz);
 		if (rxbuf->m_pack == NULL) {
 			error = ENOBUFS;
                         goto fail;
 		}
 		mp = rxbuf->m_pack;
 		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
 		/* Get the memory mapping */
 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
 		    rxbuf->pmap, mp, pseg,
 		    &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0)
                         goto fail;
 		bus_dmamap_sync(rxr->ptag,
 		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
 		/* Update descriptor */
 		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
         }
 
 	/* Setup our descriptor indices */
 	rxr->next_to_check = 0;
 	rxr->next_to_refresh = adapter->num_rx_desc - 1;
 	rxr->lro_enabled = FALSE;
 	rxr->rx_split_packets = 0;
 	rxr->rx_bytes = 0;
 
 	rxr->fmp = NULL;
 	rxr->lmp = NULL;
 
 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	/*
 	** Now set up the LRO interface, we
 	** also only do head split when LRO
 	** is enabled, since so often they
 	** are undesireable in similar setups.
 	*/
 	if (ifp->if_capenable & IFCAP_LRO) {
 		error = tcp_lro_init(lro);
 		if (error) {
 			device_printf(dev, "LRO Initialization failed!\n");
 			goto fail;
 		}
 		INIT_DEBUGOUT("RX LRO Initialized\n");
 		rxr->lro_enabled = TRUE;
 		lro->ifp = adapter->ifp;
 	}
 
 	IGB_RX_UNLOCK(rxr);
 	return (0);
 
 fail:
 	igb_free_receive_ring(rxr);
 	IGB_RX_UNLOCK(rxr);
 	return (error);
 }
 
 
 /*********************************************************************
  *
  *  Initialize all receive rings.
  *
  **********************************************************************/
 static int
 igb_setup_receive_structures(struct adapter *adapter)
 {
 	struct rx_ring *rxr = adapter->rx_rings;
 	int i;
 
 	for (i = 0; i < adapter->num_queues; i++, rxr++)
 		if (igb_setup_receive_ring(rxr))
 			goto fail;
 
 	return (0);
 fail:
 	/*
 	 * Free RX buffers allocated so far, we will only handle
 	 * the rings that completed, the failing case will have
 	 * cleaned up for itself. 'i' is the endpoint.
 	 */
 	for (int j = 0; j < i; ++j) {
 		rxr = &adapter->rx_rings[j];
 		IGB_RX_LOCK(rxr);
 		igb_free_receive_ring(rxr);
 		IGB_RX_UNLOCK(rxr);
 	}
 
 	return (ENOBUFS);
 }
 
 /*
  * Initialise the RSS mapping for NICs that support multiple transmit/
  * receive rings.
  */
 static void
 igb_initialise_rss_mapping(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	int i;
 	int queue_id;
 	u32 reta;
 	u32 rss_key[10], mrqc, shift = 0;
 
 	/* XXX? */
 	if (adapter->hw.mac.type == e1000_82575)
 		shift = 6;
 
 	/*
 	 * The redirection table controls which destination
 	 * queue each bucket redirects traffic to.
 	 * Each DWORD represents four queues, with the LSB
 	 * being the first queue in the DWORD.
 	 *
 	 * This just allocates buckets to queues using round-robin
 	 * allocation.
 	 *
 	 * NOTE: It Just Happens to line up with the default
 	 * RSS allocation method.
 	 */
 
 	/* Warning FM follows */
 	reta = 0;
 	for (i = 0; i < 128; i++) {
 #ifdef	RSS
 		queue_id = rss_get_indirection_to_bucket(i);
 		/*
 		 * If we have more queues than buckets, we'll
 		 * end up mapping buckets to a subset of the
 		 * queues.
 		 *
 		 * If we have more buckets than queues, we'll
 		 * end up instead assigning multiple buckets
 		 * to queues.
 		 *
 		 * Both are suboptimal, but we need to handle
 		 * the case so we don't go out of bounds
 		 * indexing arrays and such.
 		 */
 		queue_id = queue_id % adapter->num_queues;
 #else
 		queue_id = (i % adapter->num_queues);
 #endif
 		/* Adjust if required */
 		queue_id = queue_id << shift;
 
 		/*
 		 * The low 8 bits are for hash value (n+0);
 		 * The next 8 bits are for hash value (n+1), etc.
 		 */
 		reta = reta >> 8;
 		reta = reta | ( ((uint32_t) queue_id) << 24);
 		if ((i & 3) == 3) {
 			E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
 			reta = 0;
 		}
 	}
 
 	/* Now fill in hash table */
 
 	/* XXX This means RSS enable + 8 queues for my igb (82580.) */
 	mrqc = E1000_MRQC_ENABLE_RSS_4Q;
 
 #ifdef	RSS
 	/* XXX ew typecasting */
 	rss_getkey((uint8_t *) &rss_key);
 #else
 	arc4rand(&rss_key, sizeof(rss_key), 0);
 #endif
 	for (i = 0; i < 10; i++)
 		E1000_WRITE_REG_ARRAY(hw,
 		    E1000_RSSRK(0), i, rss_key[i]);
 
 	/*
 	 * Configure the RSS fields to hash upon.
 	 */
 	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
 	    E1000_MRQC_RSS_FIELD_IPV4_TCP);
 	mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
 	    E1000_MRQC_RSS_FIELD_IPV6_TCP);
 	mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
 	    E1000_MRQC_RSS_FIELD_IPV6_UDP);
 	mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
 	    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
 
 	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
 }
 
 /*********************************************************************
  *
  *  Enable receive unit.
  *
  **********************************************************************/
 static void
 igb_initialize_receive_units(struct adapter *adapter)
 {
 	struct rx_ring	*rxr = adapter->rx_rings;
 	struct ifnet	*ifp = adapter->ifp;
 	struct e1000_hw *hw = &adapter->hw;
 	u32		rctl, rxcsum, psize, srrctl = 0;
 
 	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
 
 	/*
 	 * Make sure receives are disabled while setting
 	 * up the descriptor ring
 	 */
 	rctl = E1000_READ_REG(hw, E1000_RCTL);
 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
 
 	/*
 	** Set up for header split
 	*/
 	if (igb_header_split) {
 		/* Use a standard mbuf for the header */
 		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
 		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
 	} else
 		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
 
 	/*
 	** Set up for jumbo frames
 	*/
 	if (ifp->if_mtu > ETHERMTU) {
 		rctl |= E1000_RCTL_LPE;
 		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
 			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
 			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
 		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
 			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
 			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
 		}
 		/* Set maximum packet len */
 		psize = adapter->max_frame_size;
 		/* are we on a vlan? */
 		if (adapter->ifp->if_vlantrunk != NULL)
 			psize += VLAN_TAG_SIZE;
 		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
 	} else {
 		rctl &= ~E1000_RCTL_LPE;
 		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
 		rctl |= E1000_RCTL_SZ_2048;
 	}
 
 	/*
 	 * If TX flow control is disabled and there's >1 queue defined,
 	 * enable DROP.
 	 *
 	 * This drops frames rather than hanging the RX MAC for all queues.
 	 */
 	if ((adapter->num_queues > 1) &&
 	    (adapter->fc == e1000_fc_none ||
 	     adapter->fc == e1000_fc_rx_pause)) {
 		srrctl |= E1000_SRRCTL_DROP_EN;
 	}
 
 	/* Setup the Base and Length of the Rx Descriptor Rings */
 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
 		u64 bus_addr = rxr->rxdma.dma_paddr;
 		u32 rxdctl;
 
 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
 		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
 		E1000_WRITE_REG(hw, E1000_RDBAH(i),
 		    (uint32_t)(bus_addr >> 32));
 		E1000_WRITE_REG(hw, E1000_RDBAL(i),
 		    (uint32_t)bus_addr);
 		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
 		/* Enable this Queue */
 		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
 		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
 		rxdctl &= 0xFFF00000;
 		rxdctl |= IGB_RX_PTHRESH;
 		rxdctl |= IGB_RX_HTHRESH << 8;
 		rxdctl |= IGB_RX_WTHRESH << 16;
 		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
 	}
 
 	/*
 	** Setup for RX MultiQueue
 	*/
 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
 	if (adapter->num_queues >1) {
 
 		/* rss setup */
 		igb_initialise_rss_mapping(adapter);
 
 		/*
 		** NOTE: Receive Full-Packet Checksum Offload 
 		** is mutually exclusive with Multiqueue. However
 		** this is not the same as TCP/IP checksums which
 		** still work.
 		*/
 		rxcsum |= E1000_RXCSUM_PCSD;
 #if __FreeBSD_version >= 800000
 		/* For SCTP Offload */
 		if ((hw->mac.type == e1000_82576)
 		    && (ifp->if_capenable & IFCAP_RXCSUM))
 			rxcsum |= E1000_RXCSUM_CRCOFL;
 #endif
 	} else {
 		/* Non RSS setup */
 		if (ifp->if_capenable & IFCAP_RXCSUM) {
 			rxcsum |= E1000_RXCSUM_IPPCSE;
 #if __FreeBSD_version >= 800000
 			if (adapter->hw.mac.type == e1000_82576)
 				rxcsum |= E1000_RXCSUM_CRCOFL;
 #endif
 		} else
 			rxcsum &= ~E1000_RXCSUM_TUOFL;
 	}
 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
 
 	/* Setup the Receive Control Register */
 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
 		   E1000_RCTL_RDMTS_HALF |
 		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
 	/* Strip CRC bytes. */
 	rctl |= E1000_RCTL_SECRC;
 	/* Make sure VLAN Filters are off */
 	rctl &= ~E1000_RCTL_VFE;
 	/* Don't store bad packets */
 	rctl &= ~E1000_RCTL_SBP;
 
 	/* Enable Receives */
 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
 
 	/*
 	 * Setup the HW Rx Head and Tail Descriptor Pointers
 	 *   - needs to be after enable
 	 */
 	for (int i = 0; i < adapter->num_queues; i++) {
 		rxr = &adapter->rx_rings[i];
 		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
 #ifdef DEV_NETMAP
 		/*
 		 * an init() while a netmap client is active must
 		 * preserve the rx buffers passed to userspace.
 		 * In this driver it means we adjust RDT to
 		 * something different from next_to_refresh
 		 * (which is not used in netmap mode).
 		 */
 		if (ifp->if_capenable & IFCAP_NETMAP) {
 			struct netmap_adapter *na = NA(adapter->ifp);
 			struct netmap_kring *kring = &na->rx_rings[i];
 			int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
 
 			if (t >= adapter->num_rx_desc)
 				t -= adapter->num_rx_desc;
 			else if (t < 0)
 				t += adapter->num_rx_desc;
 			E1000_WRITE_REG(hw, E1000_RDT(i), t);
 		} else
 #endif /* DEV_NETMAP */
 		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
 	}
 	return;
 }
 
 /*********************************************************************
  *
  *  Free receive rings.
  *
  **********************************************************************/
 static void
 igb_free_receive_structures(struct adapter *adapter)
 {
 	struct rx_ring *rxr = adapter->rx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
 		struct lro_ctrl	*lro = &rxr->lro;
 		igb_free_receive_buffers(rxr);
 		tcp_lro_free(lro);
 		igb_dma_free(adapter, &rxr->rxdma);
 	}
 
 	free(adapter->rx_rings, M_DEVBUF);
 }
 
 /*********************************************************************
  *
  *  Free receive ring data structures.
  *
  **********************************************************************/
 static void
 igb_free_receive_buffers(struct rx_ring *rxr)
 {
 	struct adapter		*adapter = rxr->adapter;
 	struct igb_rx_buf	*rxbuf;
 	int i;
 
 	INIT_DEBUGOUT("free_receive_structures: begin");
 
 	/* Cleanup any existing buffers */
 	if (rxr->rx_buffers != NULL) {
 		for (i = 0; i < adapter->num_rx_desc; i++) {
 			rxbuf = &rxr->rx_buffers[i];
 			if (rxbuf->m_head != NULL) {
 				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
 				    BUS_DMASYNC_POSTREAD);
 				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
 				rxbuf->m_head->m_flags |= M_PKTHDR;
 				m_freem(rxbuf->m_head);
 			}
 			if (rxbuf->m_pack != NULL) {
 				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
 				    BUS_DMASYNC_POSTREAD);
 				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
 				rxbuf->m_pack->m_flags |= M_PKTHDR;
 				m_freem(rxbuf->m_pack);
 			}
 			rxbuf->m_head = NULL;
 			rxbuf->m_pack = NULL;
 			if (rxbuf->hmap != NULL) {
 				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
 				rxbuf->hmap = NULL;
 			}
 			if (rxbuf->pmap != NULL) {
 				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
 				rxbuf->pmap = NULL;
 			}
 		}
 		if (rxr->rx_buffers != NULL) {
 			free(rxr->rx_buffers, M_DEVBUF);
 			rxr->rx_buffers = NULL;
 		}
 	}
 
 	if (rxr->htag != NULL) {
 		bus_dma_tag_destroy(rxr->htag);
 		rxr->htag = NULL;
 	}
 	if (rxr->ptag != NULL) {
 		bus_dma_tag_destroy(rxr->ptag);
 		rxr->ptag = NULL;
 	}
 }
 
 static __inline void
 igb_rx_discard(struct rx_ring *rxr, int i)
 {
 	struct igb_rx_buf	*rbuf;
 
 	rbuf = &rxr->rx_buffers[i];
 
 	/* Partially received? Free the chain */
 	if (rxr->fmp != NULL) {
 		rxr->fmp->m_flags |= M_PKTHDR;
 		m_freem(rxr->fmp);
 		rxr->fmp = NULL;
 		rxr->lmp = NULL;
 	}
 
 	/*
 	** With advanced descriptors the writeback
 	** clobbers the buffer addrs, so its easier
 	** to just free the existing mbufs and take
 	** the normal refresh path to get new buffers
 	** and mapping.
 	*/
 	if (rbuf->m_head) {
 		m_free(rbuf->m_head);
 		rbuf->m_head = NULL;
 		bus_dmamap_unload(rxr->htag, rbuf->hmap);
 	}
 
 	if (rbuf->m_pack) {
 		m_free(rbuf->m_pack);
 		rbuf->m_pack = NULL;
 		bus_dmamap_unload(rxr->ptag, rbuf->pmap);
 	}
 
 	return;
 }
 
 static __inline void
 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
 {
 
 	/*
 	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
 	 * should be computed by hardware. Also it should not have VLAN tag in
 	 * ethernet header.
 	 */
 	if (rxr->lro_enabled &&
 	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
 	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
 	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
 	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
 	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
 	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
 		/*
 		 * Send to the stack if:
 		 **  - LRO not enabled, or
 		 **  - no LRO resources, or
 		 **  - lro enqueue fails
 		 */
 		if (rxr->lro.lro_cnt != 0)
 			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
 				return;
 	}
 	IGB_RX_UNLOCK(rxr);
 	(*ifp->if_input)(ifp, m);
 	IGB_RX_LOCK(rxr);
 }
 
 /*********************************************************************
  *
  *  This routine executes in interrupt context. It replenishes
  *  the mbufs in the descriptor and sends data which has been
  *  dma'ed into host memory to upper layer.
  *
  *  We loop at most count times if count is > 0, or until done if
  *  count < 0.
  *
  *  Return TRUE if more to clean, FALSE otherwise
  *********************************************************************/
 static bool
 igb_rxeof(struct igb_queue *que, int count, int *done)
 {
 	struct adapter		*adapter = que->adapter;
 	struct rx_ring		*rxr = que->rxr;
 	struct ifnet		*ifp = adapter->ifp;
 	struct lro_ctrl		*lro = &rxr->lro;
 	struct lro_entry	*queued;
 	int			i, processed = 0, rxdone = 0;
 	u32			ptype, staterr = 0;
 	union e1000_adv_rx_desc	*cur;
 
 	IGB_RX_LOCK(rxr);
 	/* Sync the ring. */
 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 #ifdef DEV_NETMAP
 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
 		IGB_RX_UNLOCK(rxr);
 		return (FALSE);
 	}
 #endif /* DEV_NETMAP */
 
 	/* Main clean loop */
 	for (i = rxr->next_to_check; count != 0;) {
 		struct mbuf		*sendmp, *mh, *mp;
 		struct igb_rx_buf	*rxbuf;
 		u16			hlen, plen, hdr, vtag, pkt_info;
 		bool			eop = FALSE;
  
 		cur = &rxr->rx_base[i];
 		staterr = le32toh(cur->wb.upper.status_error);
 		if ((staterr & E1000_RXD_STAT_DD) == 0)
 			break;
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			break;
 		count--;
 		sendmp = mh = mp = NULL;
 		cur->wb.upper.status_error = 0;
 		rxbuf = &rxr->rx_buffers[i];
 		plen = le16toh(cur->wb.upper.length);
 		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
 		if (((adapter->hw.mac.type == e1000_i350) ||
 		    (adapter->hw.mac.type == e1000_i354)) &&
 		    (staterr & E1000_RXDEXT_STATERR_LB))
 			vtag = be16toh(cur->wb.upper.vlan);
 		else
 			vtag = le16toh(cur->wb.upper.vlan);
 		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
 		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
 
 		/*
 		 * Free the frame (all segments) if we're at EOP and
 		 * it's an error.
 		 *
 		 * The datasheet states that EOP + status is only valid for
 		 * the final segment in a multi-segment frame.
 		 */
 		if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
 			adapter->dropped_pkts++;
 			++rxr->rx_discarded;
 			igb_rx_discard(rxr, i);
 			goto next_desc;
 		}
 
 		/*
 		** The way the hardware is configured to
 		** split, it will ONLY use the header buffer
 		** when header split is enabled, otherwise we
 		** get normal behavior, ie, both header and
 		** payload are DMA'd into the payload buffer.
 		**
 		** The fmp test is to catch the case where a
 		** packet spans multiple descriptors, in that
 		** case only the first header is valid.
 		*/
 		if (rxr->hdr_split && rxr->fmp == NULL) {
 			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
 			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
 			    E1000_RXDADV_HDRBUFLEN_SHIFT;
 			if (hlen > IGB_HDR_BUF)
 				hlen = IGB_HDR_BUF;
 			mh = rxr->rx_buffers[i].m_head;
 			mh->m_len = hlen;
 			/* clear buf pointer for refresh */
 			rxbuf->m_head = NULL;
 			/*
 			** Get the payload length, this
 			** could be zero if its a small
 			** packet.
 			*/
 			if (plen > 0) {
 				mp = rxr->rx_buffers[i].m_pack;
 				mp->m_len = plen;
 				mh->m_next = mp;
 				/* clear buf pointer */
 				rxbuf->m_pack = NULL;
 				rxr->rx_split_packets++;
 			}
 		} else {
 			/*
 			** Either no header split, or a
 			** secondary piece of a fragmented
 			** split packet.
 			*/
 			mh = rxr->rx_buffers[i].m_pack;
 			mh->m_len = plen;
 			/* clear buf info for refresh */
 			rxbuf->m_pack = NULL;
 		}
 		bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
 
 		++processed; /* So we know when to refresh */
 
 		/* Initial frame - setup */
 		if (rxr->fmp == NULL) {
 			mh->m_pkthdr.len = mh->m_len;
 			/* Save the head of the chain */
 			rxr->fmp = mh;
 			rxr->lmp = mh;
 			if (mp != NULL) {
 				/* Add payload if split */
 				mh->m_pkthdr.len += mp->m_len;
 				rxr->lmp = mh->m_next;
 			}
 		} else {
 			/* Chain mbuf's together */
 			rxr->lmp->m_next = mh;
 			rxr->lmp = rxr->lmp->m_next;
 			rxr->fmp->m_pkthdr.len += mh->m_len;
 		}
 
 		if (eop) {
 			rxr->fmp->m_pkthdr.rcvif = ifp;
 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 			rxr->rx_packets++;
 			/* capture data for AIM */
 			rxr->packets++;
 			rxr->bytes += rxr->fmp->m_pkthdr.len;
 			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
 
 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
 				igb_rx_checksum(staterr, rxr->fmp, ptype);
 
 			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
 			    (staterr & E1000_RXD_STAT_VP) != 0) {
 				rxr->fmp->m_pkthdr.ether_vtag = vtag;
 				rxr->fmp->m_flags |= M_VLANTAG;
 			}
 #ifdef	RSS
 			/* XXX set flowtype once this works right */
 			rxr->fmp->m_pkthdr.flowid = 
 			    le32toh(cur->wb.lower.hi_dword.rss);
 			switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
 			case E1000_RXDADV_RSSTYPE_IPV4_TCP:
 				M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV4);
 				break;
 			case E1000_RXDADV_RSSTYPE_IPV4:
 				M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV4);
 				break;
 			case E1000_RXDADV_RSSTYPE_IPV6_TCP:
 				M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV6);
 				break;
 			case E1000_RXDADV_RSSTYPE_IPV6_EX:
 				M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV6_EX);
 				break;
 			case E1000_RXDADV_RSSTYPE_IPV6:
 				M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_IPV6);
 				break;
 			case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
 				M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
 				break;
 
 			/* XXX no UDP support in RSS just yet */
 #ifdef notyet
 			case E1000_RXDADV_RSSTYPE_IPV4_UDP:
 			case E1000_RXDADV_RSSTYPE_IPV6_UDP:
 			case E1000_RXDADV_RSSTYPE_IPV6_UDP_EX:
 #endif
 			
 			default:
 				/* XXX fallthrough */
 				M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
 			}
 #elif !defined(IGB_LEGACY_TX)
 			rxr->fmp->m_pkthdr.flowid = que->msix;
 			M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
 #endif
 			sendmp = rxr->fmp;
 			/* Make sure to set M_PKTHDR. */
 			sendmp->m_flags |= M_PKTHDR;
 			rxr->fmp = NULL;
 			rxr->lmp = NULL;
 		}
 
 next_desc:
 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 		/* Advance our pointers to the next descriptor. */
 		if (++i == adapter->num_rx_desc)
 			i = 0;
 		/*
 		** Send to the stack or LRO
 		*/
 		if (sendmp != NULL) {
 			rxr->next_to_check = i;
 			igb_rx_input(rxr, ifp, sendmp, ptype);
 			i = rxr->next_to_check;
 			rxdone++;
 		}
 
 		/* Every 8 descriptors we go to refresh mbufs */
 		if (processed == 8) {
                         igb_refresh_mbufs(rxr, i);
                         processed = 0;
 		}
 	}
 
 	/* Catch any remainders */
 	if (igb_rx_unrefreshed(rxr))
 		igb_refresh_mbufs(rxr, i);
 
 	rxr->next_to_check = i;
 
 	/*
 	 * Flush any outstanding LRO work
 	 */
 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
 		tcp_lro_flush(lro, queued);
 	}
 
 	if (done != NULL)
 		*done += rxdone;
 
 	IGB_RX_UNLOCK(rxr);
 	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
 }
 
 /*********************************************************************
  *
  *  Verify that the hardware indicated that the checksum is valid.
  *  Inform the stack about the status of checksum so that stack
  *  doesn't spend time verifying the checksum.
  *
  *********************************************************************/
 static void
 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
 {
 	u16 status = (u16)staterr;
 	u8  errors = (u8) (staterr >> 24);
 	int sctp;
 
 	/* Ignore Checksum bit is set */
 	if (status & E1000_RXD_STAT_IXSM) {
 		mp->m_pkthdr.csum_flags = 0;
 		return;
 	}
 
 	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
 	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
 		sctp = 1;
 	else
 		sctp = 0;
 	if (status & E1000_RXD_STAT_IPCS) {
 		/* Did it pass? */
 		if (!(errors & E1000_RXD_ERR_IPE)) {
 			/* IP Checksum Good */
 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
 		} else
 			mp->m_pkthdr.csum_flags = 0;
 	}
 
 	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
 		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 #if __FreeBSD_version >= 800000
 		if (sctp) /* reassign */
 			type = CSUM_SCTP_VALID;
 #endif
 		/* Did it pass? */
 		if (!(errors & E1000_RXD_ERR_TCPE)) {
 			mp->m_pkthdr.csum_flags |= type;
 			if (sctp == 0)
 				mp->m_pkthdr.csum_data = htons(0xffff);
 		}
 	}
 	return;
 }
 
 /*
  * This routine is run via an vlan
  * config EVENT
  */
 static void
 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	u32		index, bit;
 
 	if (ifp->if_softc !=  arg)   /* Not our event */
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
                 return;
 
 	IGB_CORE_LOCK(adapter);
 	index = (vtag >> 5) & 0x7F;
 	bit = vtag & 0x1F;
 	adapter->shadow_vfta[index] |= (1 << bit);
 	++adapter->num_vlans;
 	/* Change hw filter setting */
 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
 		igb_setup_vlan_hw_support(adapter);
 	IGB_CORE_UNLOCK(adapter);
 }
 
 /*
  * This routine is run via an vlan
  * unconfig EVENT
  */
 static void
 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	u32		index, bit;
 
 	if (ifp->if_softc !=  arg)
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
                 return;
 
 	IGB_CORE_LOCK(adapter);
 	index = (vtag >> 5) & 0x7F;
 	bit = vtag & 0x1F;
 	adapter->shadow_vfta[index] &= ~(1 << bit);
 	--adapter->num_vlans;
 	/* Change hw filter setting */
 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
 		igb_setup_vlan_hw_support(adapter);
 	IGB_CORE_UNLOCK(adapter);
 }
 
 static void
 igb_setup_vlan_hw_support(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	struct ifnet	*ifp = adapter->ifp;
 	u32             reg;
 
 	if (adapter->vf_ifp) {
 		e1000_rlpml_set_vf(hw,
 		    adapter->max_frame_size + VLAN_TAG_SIZE);
 		return;
 	}
 
 	reg = E1000_READ_REG(hw, E1000_CTRL);
 	reg |= E1000_CTRL_VME;
 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
 
 	/* Enable the Filter Table */
 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
 		reg = E1000_READ_REG(hw, E1000_RCTL);
 		reg &= ~E1000_RCTL_CFIEN;
 		reg |= E1000_RCTL_VFE;
 		E1000_WRITE_REG(hw, E1000_RCTL, reg);
 	}
 
 	/* Update the frame size */
 	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
 	    adapter->max_frame_size + VLAN_TAG_SIZE);
 
 	/* Don't bother with table if no vlans */
 	if ((adapter->num_vlans == 0) ||
 	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
                 return;
 	/*
 	** A soft reset zero's out the VFTA, so
 	** we need to repopulate it now.
 	*/
 	for (int i = 0; i < IGB_VFTA_SIZE; i++)
                 if (adapter->shadow_vfta[i] != 0) {
 			if (adapter->vf_ifp)
 				e1000_vfta_set_vf(hw,
 				    adapter->shadow_vfta[i], TRUE);
 			else
 				e1000_write_vfta(hw,
 				    i, adapter->shadow_vfta[i]);
 		}
 }
 
 static void
 igb_enable_intr(struct adapter *adapter)
 {
 	/* With RSS set up what to auto clear */
 	if (adapter->msix_mem) {
 		u32 mask = (adapter->que_mask | adapter->link_mask);
 		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
 		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
 		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
 		    E1000_IMS_LSC);
 	} else {
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
 		    IMS_ENABLE_MASK);
 	}
 	E1000_WRITE_FLUSH(&adapter->hw);
 
 	return;
 }
 
 static void
 igb_disable_intr(struct adapter *adapter)
 {
 	if (adapter->msix_mem) {
 		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
 		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
 	} 
 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
 	E1000_WRITE_FLUSH(&adapter->hw);
 	return;
 }
 
 /*
  * Bit of a misnomer, what this really means is
  * to enable OS management of the system... aka
  * to disable special hardware management features 
  */
 static void
 igb_init_manageability(struct adapter *adapter)
 {
 	if (adapter->has_manage) {
 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
 
 		/* disable hardware interception of ARP */
 		manc &= ~(E1000_MANC_ARP_EN);
 
                 /* enable receiving management packets to the host */
 		manc |= E1000_MANC_EN_MNG2HOST;
 		manc2h |= 1 << 5;  /* Mng Port 623 */
 		manc2h |= 1 << 6;  /* Mng Port 664 */
 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
 	}
 }
 
 /*
  * Give control back to hardware management
  * controller if there is one.
  */
 static void
 igb_release_manageability(struct adapter *adapter)
 {
 	if (adapter->has_manage) {
 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
 
 		/* re-enable hardware interception of ARP */
 		manc |= E1000_MANC_ARP_EN;
 		manc &= ~E1000_MANC_EN_MNG2HOST;
 
 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
 	}
 }
 
 /*
  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
  * For ASF and Pass Through versions of f/w this means that
  * the driver is loaded. 
  *
  */
 static void
 igb_get_hw_control(struct adapter *adapter)
 {
 	u32 ctrl_ext;
 
 	if (adapter->vf_ifp)
 		return;
 
 	/* Let firmware know the driver has taken over */
 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
 }
 
 /*
  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
  * For ASF and Pass Through versions of f/w this means that the
  * driver is no longer loaded.
  *
  */
 static void
 igb_release_hw_control(struct adapter *adapter)
 {
 	u32 ctrl_ext;
 
 	if (adapter->vf_ifp)
 		return;
 
 	/* Let firmware taken over control of h/w */
 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
 }
 
 static int
 igb_is_valid_ether_addr(uint8_t *addr)
 {
 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
 
 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
 		return (FALSE);
 	}
 
 	return (TRUE);
 }
 
 
 /*
  * Enable PCI Wake On Lan capability
  */
 static void
 igb_enable_wakeup(device_t dev)
 {
 	u16     cap, status;
 	u8      id;
 
 	/* First find the capabilities pointer*/
 	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
 	/* Read the PM Capabilities */
 	id = pci_read_config(dev, cap, 1);
 	if (id != PCIY_PMG)     /* Something wrong */
 		return;
 	/* OK, we have the power capabilities, so
 	   now get the status register */
 	cap += PCIR_POWER_STATUS;
 	status = pci_read_config(dev, cap, 2);
 	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
 	pci_write_config(dev, cap, status, 2);
 	return;
 }
 
 static void
 igb_led_func(void *arg, int onoff)
 {
 	struct adapter	*adapter = arg;
 
 	IGB_CORE_LOCK(adapter);
 	if (onoff) {
 		e1000_setup_led(&adapter->hw);
 		e1000_led_on(&adapter->hw);
 	} else {
 		e1000_led_off(&adapter->hw);
 		e1000_cleanup_led(&adapter->hw);
 	}
 	IGB_CORE_UNLOCK(adapter);
 }
 
 static uint64_t
 igb_get_counter(if_t ifp, ift_counter cnt)
 {
 	struct adapter *adapter;
 	struct e1000_hw_stats *stats;
 
 	adapter = if_getsoftc(ifp);
 	stats = (struct e1000_hw_stats *)adapter->stats;
 
 	switch (cnt) {
 	case IFCOUNTER_IERRORS:
 		return (adapter->dropped_pkts + stats->rxerrc +
 		    stats->crcerrs + stats->algnerrc +
 		    stats->ruc + stats->roc + stats->mpc + stats->cexterr);
 	case IFCOUNTER_OERRORS:
 		return (stats->ecol + stats->latecol +
 		    adapter->watchdog_events);
 	case IFCOUNTER_COLLISIONS:
 		return (stats->colc);
 	default:
 		return (if_get_counter_default(ifp, cnt));
 	}
 }
 
 /**********************************************************************
  *
  *  Update the board statistics counters.
  *
  **********************************************************************/
 static void
 igb_update_stats_counters(struct adapter *adapter)
 {
         struct e1000_hw		*hw = &adapter->hw;
 	struct e1000_hw_stats	*stats;
 
 	/* 
 	** The virtual function adapter has only a
 	** small controlled set of stats, do only 
 	** those and return.
 	*/
 	if (adapter->vf_ifp) {
 		igb_update_vf_stats_counters(adapter);
 		return;
 	}
 
 	stats = (struct e1000_hw_stats	*)adapter->stats;
 
 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
 	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
 		stats->symerrs +=
 		    E1000_READ_REG(hw,E1000_SYMERRS);
 		stats->sec += E1000_READ_REG(hw, E1000_SEC);
 	}
 
 	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
 	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
 	stats->scc += E1000_READ_REG(hw, E1000_SCC);
 	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
 
 	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
 	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
 	stats->colc += E1000_READ_REG(hw, E1000_COLC);
 	stats->dc += E1000_READ_REG(hw, E1000_DC);
 	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
 	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
 	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
 	/*
 	** For watchdog management we need to know if we have been
 	** paused during the last interval, so capture that here.
 	*/ 
         adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
         stats->xoffrxc += adapter->pause_frames;
 	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
 	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
 	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
 	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
 	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
 	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
 	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
 	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
 	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
 	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
 	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
 	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
 
 	/* For the 64-bit byte counters the low dword must be read first. */
 	/* Both registers clear on the read of the high dword */
 
 	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
 	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
 	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
 	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
 
 	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
 	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
 	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
 	stats->roc += E1000_READ_REG(hw, E1000_ROC);
 	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
 
 	stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
 	stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
 	stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
 
 	stats->tor += E1000_READ_REG(hw, E1000_TORL) +
 	    ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
 	stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
 	    ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
 
 	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
 	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
 	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
 	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
 	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
 	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
 	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
 	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
 	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
 	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
 
 	/* Interrupt Counts */
 
 	stats->iac += E1000_READ_REG(hw, E1000_IAC);
 	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
 	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
 	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
 	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
 	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
 	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
 	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
 	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
 
 	/* Host to Card Statistics */
 
 	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
 	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
 	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
 	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
 	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
 	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
 	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
 	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
 	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
 	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
 	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
 	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
 	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
 	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
 
 	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
 	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
 	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
 	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
 	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
 	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
 
 	/* Driver specific counters */
 	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
 	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
 	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
 	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
 	adapter->packet_buf_alloc_tx =
 	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
 	adapter->packet_buf_alloc_rx =
 	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
 }
 
 
 /**********************************************************************
  *
  *  Initialize the VF board statistics counters.
  *
  **********************************************************************/
 static void
 igb_vf_init_stats(struct adapter *adapter)
 {
         struct e1000_hw *hw = &adapter->hw;
 	struct e1000_vf_stats	*stats;
 
 	stats = (struct e1000_vf_stats	*)adapter->stats;
 	if (stats == NULL)
 		return;
         stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
         stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
         stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
         stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
         stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
 }
  
 /**********************************************************************
  *
  *  Update the VF board statistics counters.
  *
  **********************************************************************/
 static void
 igb_update_vf_stats_counters(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	struct e1000_vf_stats	*stats;
 
 	if (adapter->link_speed == 0)
 		return;
 
 	stats = (struct e1000_vf_stats	*)adapter->stats;
 
 	UPDATE_VF_REG(E1000_VFGPRC,
 	    stats->last_gprc, stats->gprc);
 	UPDATE_VF_REG(E1000_VFGORC,
 	    stats->last_gorc, stats->gorc);
 	UPDATE_VF_REG(E1000_VFGPTC,
 	    stats->last_gptc, stats->gptc);
 	UPDATE_VF_REG(E1000_VFGOTC,
 	    stats->last_gotc, stats->gotc);
 	UPDATE_VF_REG(E1000_VFMPRC,
 	    stats->last_mprc, stats->mprc);
 }
 
 /* Export a single 32-bit register via a read-only sysctl. */
 static int
 igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter;
 	u_int val;
 
 	adapter = oidp->oid_arg1;
 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
 	return (sysctl_handle_int(oidp, &val, 0, req));
 }
 
 /*
 **  Tuneable interrupt rate handler
 */
 static int
 igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
 {
 	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
 	int			error;
 	u32			reg, usec, rate;
                         
 	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
 	usec = ((reg & 0x7FFC) >> 2);
 	if (usec > 0)
 		rate = 1000000 / usec;
 	else
 		rate = 0;
 	error = sysctl_handle_int(oidp, &rate, 0, req);
 	if (error || !req->newptr)
 		return error;
 	return 0;
 }
 
 /*
  * Add sysctl variables, one per statistic, to the system.
  */
 static void
 igb_add_hw_stats(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 
 	struct tx_ring *txr = adapter->tx_rings;
 	struct rx_ring *rxr = adapter->rx_rings;
 
 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
 	struct e1000_hw_stats *stats = adapter->stats;
 
 	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
 	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
 
 #define QUEUE_NAME_LEN 32
 	char namebuf[QUEUE_NAME_LEN];
 
 	/* Driver Statistics */
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", 
 			CTLFLAG_RD, &adapter->link_irq,
 			"Link MSIX IRQ Handled");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
 			CTLFLAG_RD, &adapter->dropped_pkts,
 			"Driver dropped packets");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
 			"Driver tx dma failure in xmit");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
 			CTLFLAG_RD, &adapter->rx_overruns,
 			"RX overruns");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
 			CTLFLAG_RD, &adapter->watchdog_events,
 			"Watchdog timeouts");
 
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 
 			CTLFLAG_RD, &adapter->device_control,
 			"Device Control Register");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 
 			CTLFLAG_RD, &adapter->rx_control,
 			"Receiver Control Register");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 
 			CTLFLAG_RD, &adapter->int_mask,
 			"Interrupt Mask");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 
 			CTLFLAG_RD, &adapter->eint_mask,
 			"Extended Interrupt Mask");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 
 			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
 			"Transmit Buffer Packet Allocation");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 
 			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
 			"Receive Buffer Packet Allocation");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
 			"Flow Control High Watermark");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
 			"Flow Control Low Watermark");
 
 	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
 		struct lro_ctrl *lro = &rxr->lro;
 
 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 					    CTLFLAG_RD, NULL, "Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 
 				CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
 				sizeof(&adapter->queues[i]),
 				igb_sysctl_interrupt_rate_handler,
 				"IU", "Interrupt Rate");
 
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
 				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
 				igb_sysctl_reg_handler, "IU",
  				"Transmit Descriptor Head");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
 				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
 				igb_sysctl_reg_handler, "IU",
  				"Transmit Descriptor Tail");
 		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 
 				CTLFLAG_RD, &txr->no_desc_avail,
 				"Queue Descriptors Unavailable");
 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
 				CTLFLAG_RD, &txr->total_packets,
 				"Queue Packets Transmitted");
 
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
 				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
 				igb_sysctl_reg_handler, "IU",
 				"Receive Descriptor Head");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
 				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
 				igb_sysctl_reg_handler, "IU",
 				"Receive Descriptor Tail");
 		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
 				CTLFLAG_RD, &rxr->rx_packets,
 				"Queue Packets Received");
 		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
 				CTLFLAG_RD, &rxr->rx_bytes,
 				"Queue Bytes Received");
 		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
 				CTLFLAG_RD, &lro->lro_queued, 0,
 				"LRO Queued");
 		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
 				CTLFLAG_RD, &lro->lro_flushed, 0,
 				"LRO Flushed");
 	}
 
 	/* MAC stats get their own sub node */
 
 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
 				    CTLFLAG_RD, NULL, "MAC Statistics");
 	stat_list = SYSCTL_CHILDREN(stat_node);
 
 	/*
 	** VF adapter has a very limited set of stats
 	** since its not managing the metal, so to speak.
 	*/
 	if (adapter->vf_ifp) {
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
 			CTLFLAG_RD, &stats->gprc,
 			"Good Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
 			CTLFLAG_RD, &stats->gptc,
 			"Good Packets Transmitted");
  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
  			CTLFLAG_RD, &stats->gorc, 
  			"Good Octets Received"); 
  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
  			CTLFLAG_RD, &stats->gotc, 
  			"Good Octets Transmitted"); 
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
 			CTLFLAG_RD, &stats->mprc,
 			"Multicast Packets Received");
 		return;
 	}
 
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 
 			CTLFLAG_RD, &stats->ecol,
 			"Excessive collisions");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 
 			CTLFLAG_RD, &stats->scc,
 			"Single collisions");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 
 			CTLFLAG_RD, &stats->mcc,
 			"Multiple collisions");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 
 			CTLFLAG_RD, &stats->latecol,
 			"Late collisions");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 
 			CTLFLAG_RD, &stats->colc,
 			"Collision Count");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
 			CTLFLAG_RD, &stats->symerrs,
 			"Symbol Errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
 			CTLFLAG_RD, &stats->sec,
 			"Sequence Errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
 			CTLFLAG_RD, &stats->dc,
 			"Defer Count");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
 			CTLFLAG_RD, &stats->mpc,
 			"Missed Packets");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
 			CTLFLAG_RD, &stats->rlec,
 			"Receive Length Errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
 			CTLFLAG_RD, &stats->rnbc,
 			"Receive No Buffers");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
 			CTLFLAG_RD, &stats->ruc,
 			"Receive Undersize");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
 			CTLFLAG_RD, &stats->rfc,
 			"Fragmented Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
 			CTLFLAG_RD, &stats->roc,
 			"Oversized Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
 			CTLFLAG_RD, &stats->rjc,
 			"Recevied Jabber");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
 			CTLFLAG_RD, &stats->rxerrc,
 			"Receive Errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
 			CTLFLAG_RD, &stats->crcerrs,
 			"CRC errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
 			CTLFLAG_RD, &stats->algnerrc,
 			"Alignment Errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
 			CTLFLAG_RD, &stats->tncrs,
 			"Transmit with No CRS");
 	/* On 82575 these are collision counts */
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
 			CTLFLAG_RD, &stats->cexterr,
 			"Collision/Carrier extension errors");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
 			CTLFLAG_RD, &stats->xonrxc,
 			"XON Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
 			CTLFLAG_RD, &stats->xontxc,
 			"XON Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
 			CTLFLAG_RD, &stats->xoffrxc,
 			"XOFF Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
 			CTLFLAG_RD, &stats->xofftxc,
 			"XOFF Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
 			CTLFLAG_RD, &stats->fcruc,
 			"Unsupported Flow Control Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
 			CTLFLAG_RD, &stats->mgprc,
 			"Management Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
 			CTLFLAG_RD, &stats->mgpdc,
 			"Management Packets Dropped");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
 			CTLFLAG_RD, &stats->mgptc,
 			"Management Packets Transmitted");
 	/* Packet Reception Stats */
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
 			CTLFLAG_RD, &stats->tpr,
 			"Total Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
 			CTLFLAG_RD, &stats->gprc,
 			"Good Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
 			CTLFLAG_RD, &stats->bprc,
 			"Broadcast Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
 			CTLFLAG_RD, &stats->mprc,
 			"Multicast Packets Received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
 			CTLFLAG_RD, &stats->prc64,
 			"64 byte frames received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
 			CTLFLAG_RD, &stats->prc127,
 			"65-127 byte frames received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
 			CTLFLAG_RD, &stats->prc255,
 			"128-255 byte frames received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
 			CTLFLAG_RD, &stats->prc511,
 			"256-511 byte frames received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
 			CTLFLAG_RD, &stats->prc1023,
 			"512-1023 byte frames received");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
 			CTLFLAG_RD, &stats->prc1522,
 			"1023-1522 byte frames received");
  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 
  			CTLFLAG_RD, &stats->gorc, 
  			"Good Octets Received");
  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", 
  			CTLFLAG_RD, &stats->tor, 
  			"Total Octets Received");
 
 	/* Packet Transmission Stats */
  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 
  			CTLFLAG_RD, &stats->gotc, 
  			"Good Octets Transmitted"); 
  	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", 
  			CTLFLAG_RD, &stats->tot, 
  			"Total Octets Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
 			CTLFLAG_RD, &stats->tpt,
 			"Total Packets Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
 			CTLFLAG_RD, &stats->gptc,
 			"Good Packets Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
 			CTLFLAG_RD, &stats->bptc,
 			"Broadcast Packets Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
 			CTLFLAG_RD, &stats->mptc,
 			"Multicast Packets Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
 			CTLFLAG_RD, &stats->ptc64,
 			"64 byte frames transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
 			CTLFLAG_RD, &stats->ptc127,
 			"65-127 byte frames transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
 			CTLFLAG_RD, &stats->ptc255,
 			"128-255 byte frames transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
 			CTLFLAG_RD, &stats->ptc511,
 			"256-511 byte frames transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
 			CTLFLAG_RD, &stats->ptc1023,
 			"512-1023 byte frames transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
 			CTLFLAG_RD, &stats->ptc1522,
 			"1024-1522 byte frames transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
 			CTLFLAG_RD, &stats->tsctc,
 			"TSO Contexts Transmitted");
 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
 			CTLFLAG_RD, &stats->tsctfc,
 			"TSO Contexts Failed");
 
 
 	/* Interrupt Stats */
 
 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
 	int_list = SYSCTL_CHILDREN(int_node);
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
 			CTLFLAG_RD, &stats->iac,
 			"Interrupt Assertion Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
 			CTLFLAG_RD, &stats->icrxptc,
 			"Interrupt Cause Rx Pkt Timer Expire Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
 			CTLFLAG_RD, &stats->icrxatc,
 			"Interrupt Cause Rx Abs Timer Expire Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
 			CTLFLAG_RD, &stats->ictxptc,
 			"Interrupt Cause Tx Pkt Timer Expire Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
 			CTLFLAG_RD, &stats->ictxatc,
 			"Interrupt Cause Tx Abs Timer Expire Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
 			CTLFLAG_RD, &stats->ictxqec,
 			"Interrupt Cause Tx Queue Empty Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
 			CTLFLAG_RD, &stats->ictxqmtc,
 			"Interrupt Cause Tx Queue Min Thresh Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
 			CTLFLAG_RD, &stats->icrxdmtc,
 			"Interrupt Cause Rx Desc Min Thresh Count");
 
 	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
 			CTLFLAG_RD, &stats->icrxoc,
 			"Interrupt Cause Receiver Overrun Count");
 
 	/* Host to Card Stats */
 
 	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 
 				    CTLFLAG_RD, NULL, 
 				    "Host to Card Statistics");
 
 	host_list = SYSCTL_CHILDREN(host_node);
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
 			CTLFLAG_RD, &stats->cbtmpc,
 			"Circuit Breaker Tx Packet Count");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
 			CTLFLAG_RD, &stats->htdpmc,
 			"Host Transmit Discarded Packets");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
 			CTLFLAG_RD, &stats->rpthc,
 			"Rx Packets To Host");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
 			CTLFLAG_RD, &stats->cbrmpc,
 			"Circuit Breaker Rx Packet Count");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
 			CTLFLAG_RD, &stats->cbrdpc,
 			"Circuit Breaker Rx Dropped Count");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
 			CTLFLAG_RD, &stats->hgptc,
 			"Host Good Packets Tx Count");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
 			CTLFLAG_RD, &stats->htcbdpc,
 			"Host Tx Circuit Breaker Dropped Count");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
 			CTLFLAG_RD, &stats->hgorc,
 			"Host Good Octets Received Count");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
 			CTLFLAG_RD, &stats->hgotc,
 			"Host Good Octets Transmit Count");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
 			CTLFLAG_RD, &stats->lenerrs,
 			"Length Errors");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
 			CTLFLAG_RD, &stats->scvpc,
 			"SerDes/SGMII Code Violation Pkt Count");
 
 	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
 			CTLFLAG_RD, &stats->hrmpc,
 			"Header Redirection Missed Packet Count");
 }
 
 
 /**********************************************************************
  *
  *  This routine provides a way to dump out the adapter eeprom,
  *  often a useful debug/service tool. This only dumps the first
  *  32 words, stuff that matters is in that extent.
  *
  **********************************************************************/
 static int
 igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter;
 	int error;
 	int result;
 
 	result = -1;
 	error = sysctl_handle_int(oidp, &result, 0, req);
 
 	if (error || !req->newptr)
 		return (error);
 
 	/*
 	 * This value will cause a hex dump of the
 	 * first 32 16-bit words of the EEPROM to
 	 * the screen.
 	 */
 	if (result == 1) {
 		adapter = (struct adapter *)arg1;
 		igb_print_nvm_info(adapter);
         }
 
 	return (error);
 }
 
 static void
 igb_print_nvm_info(struct adapter *adapter)
 {
 	u16	eeprom_data;
 	int	i, j, row = 0;
 
 	/* Its a bit crude, but it gets the job done */
 	printf("\nInterface EEPROM Dump:\n");
 	printf("Offset\n0x0000  ");
 	for (i = 0, j = 0; i < 32; i++, j++) {
 		if (j == 8) { /* Make the offset block */
 			j = 0; ++row;
 			printf("\n0x00%x0  ",row);
 		}
 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
 		printf("%04x ", eeprom_data);
 	}
 	printf("\n");
 }
 
 static void
 igb_set_sysctl_value(struct adapter *adapter, const char *name,
 	const char *description, int *limit, int value)
 {
 	*limit = value;
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
 }
 
 /*
 ** Set flow control using sysctl:
 ** Flow control values:
 ** 	0 - off
 **	1 - rx pause
 **	2 - tx pause
 **	3 - full
 */
 static int
 igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
 {
 	int		error;
 	static int	input = 3; /* default is full */
 	struct adapter	*adapter = (struct adapter *) arg1;
 
 	error = sysctl_handle_int(oidp, &input, 0, req);
 
 	if ((error) || (req->newptr == NULL))
 		return (error);
 
 	switch (input) {
 		case e1000_fc_rx_pause:
 		case e1000_fc_tx_pause:
 		case e1000_fc_full:
 		case e1000_fc_none:
 			adapter->hw.fc.requested_mode = input;
 			adapter->fc = input;
 			break;
 		default:
 			/* Do nothing */
 			return (error);
 	}
 
 	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
 	e1000_force_mac_fc(&adapter->hw);
 	/* XXX TODO: update DROP_EN on each RX queue if appropriate */
 	return (error);
 }
 
 /*
 ** Manage DMA Coalesce:
 ** Control values:
 ** 	0/1 - off/on
 **	Legal timer values are:
 **	250,500,1000-10000 in thousands
 */
 static int
 igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter = (struct adapter *) arg1;
 	int		error;
 
 	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
 
 	if ((error) || (req->newptr == NULL))
 		return (error);
 
 	switch (adapter->dmac) {
 		case 0:
 			/*Disabling */
 			break;
 		case 1: /* Just enable and use default */
 			adapter->dmac = 1000;
 			break;
 		case 250:
 		case 500:
 		case 1000:
 		case 2000:
 		case 3000:
 		case 4000:
 		case 5000:
 		case 6000:
 		case 7000:
 		case 8000:
 		case 9000:
 		case 10000:
 			/* Legal values - allow */
 			break;
 		default:
 			/* Do nothing, illegal value */
 			adapter->dmac = 0;
 			return (EINVAL);
 	}
 	/* Reinit the interface */
 	igb_init(adapter);
 	return (error);
 }
 
 /*
 ** Manage Energy Efficient Ethernet:
 ** Control values:
 **     0/1 - enabled/disabled
 */
 static int
 igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter	*adapter = (struct adapter *) arg1;
 	int		error, value;
 
 	value = adapter->hw.dev_spec._82575.eee_disable;
 	error = sysctl_handle_int(oidp, &value, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 	IGB_CORE_LOCK(adapter);
 	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
 	igb_init_locked(adapter);
 	IGB_CORE_UNLOCK(adapter);
 	return (0);
 }
Index: head/sys/dev/ixgbe/ixgbe.c
===================================================================
--- head/sys/dev/ixgbe/ixgbe.c	(revision 277330)
+++ head/sys/dev/ixgbe/ixgbe.c	(revision 277331)
@@ -1,6058 +1,6058 @@
 /******************************************************************************
 
   Copyright (c) 2001-2013, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
   modification, are permitted provided that the following conditions are met:
   
    1. Redistributions of source code must retain the above copyright notice, 
       this list of conditions and the following disclaimer.
   
    2. Redistributions in binary form must reproduce the above copyright 
       notice, this list of conditions and the following disclaimer in the 
       documentation and/or other materials provided with the distribution.
   
    3. Neither the name of the Intel Corporation nor the names of its 
       contributors may be used to endorse or promote products derived from 
       this software without specific prior written permission.
   
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   POSSIBILITY OF SUCH DAMAGE.
 
 ******************************************************************************/
 /*$FreeBSD$*/
 
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_rss.h"
 #include "ixgbe.h"
 
 #ifdef	RSS
-#include <netinet/in_rss.h>
+#include <net/rss_config.h>
 #endif
 
 /*********************************************************************
  *  Set this to one to display debug statistics
  *********************************************************************/
 int             ixgbe_display_debug_stats = 0;
 
 /*********************************************************************
  *  Driver version
  *********************************************************************/
 char ixgbe_driver_version[] = "2.5.15";
 
 /*********************************************************************
  *  PCI Device ID Table
  *
  *  Used by probe to select devices to load on
  *  Last field stores an index into ixgbe_strings
  *  Last entry must be all 0s
  *
  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
  *********************************************************************/
 
 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
 {
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
 	/* required last entry */
 	{0, 0, 0, 0, 0}
 };
 
 /*********************************************************************
  *  Table of branding strings
  *********************************************************************/
 
 static char    *ixgbe_strings[] = {
 	"Intel(R) PRO/10GbE PCI-Express Network Driver"
 };
 
 /*********************************************************************
  *  Function prototypes
  *********************************************************************/
 static int      ixgbe_probe(device_t);
 static int      ixgbe_attach(device_t);
 static int      ixgbe_detach(device_t);
 static int      ixgbe_shutdown(device_t);
 #ifdef IXGBE_LEGACY_TX
 static void     ixgbe_start(struct ifnet *);
 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
 #else /* ! IXGBE_LEGACY_TX */
 static int	ixgbe_mq_start(struct ifnet *, struct mbuf *);
 static int	ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
 static void	ixgbe_qflush(struct ifnet *);
 static void	ixgbe_deferred_mq_start(void *, int);
 #endif /* IXGBE_LEGACY_TX */
 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t);
 static void	ixgbe_init(void *);
 static void	ixgbe_init_locked(struct adapter *);
 static void     ixgbe_stop(void *);
 static uint64_t	ixgbe_get_counter(struct ifnet *, ift_counter);
 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
 static int      ixgbe_media_change(struct ifnet *);
 static void     ixgbe_identify_hardware(struct adapter *);
 static int      ixgbe_allocate_pci_resources(struct adapter *);
 static void	ixgbe_get_slot_info(struct ixgbe_hw *);
 static int      ixgbe_allocate_msix(struct adapter *);
 static int      ixgbe_allocate_legacy(struct adapter *);
 static int	ixgbe_allocate_queues(struct adapter *);
 static int	ixgbe_setup_msix(struct adapter *);
 static void	ixgbe_free_pci_resources(struct adapter *);
 static void	ixgbe_local_timer(void *);
 static int	ixgbe_setup_interface(device_t, struct adapter *);
 static void	ixgbe_config_link(struct adapter *);
 
 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
 static int	ixgbe_setup_transmit_structures(struct adapter *);
 static void	ixgbe_setup_transmit_ring(struct tx_ring *);
 static void     ixgbe_initialize_transmit_units(struct adapter *);
 static void     ixgbe_free_transmit_structures(struct adapter *);
 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
 
 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
 static int      ixgbe_setup_receive_structures(struct adapter *);
 static int	ixgbe_setup_receive_ring(struct rx_ring *);
 static void     ixgbe_initialize_receive_units(struct adapter *);
 static void     ixgbe_free_receive_structures(struct adapter *);
 static void     ixgbe_free_receive_buffers(struct rx_ring *);
 static void	ixgbe_setup_hw_rsc(struct rx_ring *);
 
 static void     ixgbe_enable_intr(struct adapter *);
 static void     ixgbe_disable_intr(struct adapter *);
 static void     ixgbe_update_stats_counters(struct adapter *);
 static void	ixgbe_txeof(struct tx_ring *);
 static bool	ixgbe_rxeof(struct ix_queue *);
 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32);
 static void     ixgbe_set_promisc(struct adapter *);
 static void     ixgbe_set_multi(struct adapter *);
 static void     ixgbe_update_link_status(struct adapter *);
 static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
 static int	ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
 static int	ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
 static int	ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
 static int	ixgbe_dma_malloc(struct adapter *, bus_size_t,
 		    struct ixgbe_dma_alloc *, int);
 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
 static int	ixgbe_tx_ctx_setup(struct tx_ring *,
 		    struct mbuf *, u32 *, u32 *);
 static int	ixgbe_tso_setup(struct tx_ring *,
 		    struct mbuf *, u32 *, u32 *);
 static void	ixgbe_set_ivar(struct adapter *, u8, u8, s8);
 static void	ixgbe_configure_ivars(struct adapter *);
 static u8 *	ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
 
 static void	ixgbe_setup_vlan_hw_support(struct adapter *);
 static void	ixgbe_register_vlan(void *, struct ifnet *, u16);
 static void	ixgbe_unregister_vlan(void *, struct ifnet *, u16);
 
 static void     ixgbe_add_hw_stats(struct adapter *adapter);
 
 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
 		    struct mbuf *, u32);
 
 static void	ixgbe_enable_rx_drop(struct adapter *);
 static void	ixgbe_disable_rx_drop(struct adapter *);
 
 /* Support for pluggable optic modules */
 static bool	ixgbe_sfp_probe(struct adapter *);
 static void	ixgbe_setup_optics(struct adapter *);
 
 /* Legacy (single vector interrupt handler */
 static void	ixgbe_legacy_irq(void *);
 
 /* The MSI/X Interrupt handlers */
 static void	ixgbe_msix_que(void *);
 static void	ixgbe_msix_link(void *);
 
 /* Deferred interrupt tasklets */
 static void	ixgbe_handle_que(void *, int);
 static void	ixgbe_handle_link(void *, int);
 static void	ixgbe_handle_msf(void *, int);
 static void	ixgbe_handle_mod(void *, int);
 
 #ifdef IXGBE_FDIR
 static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
 static void	ixgbe_reinit_fdir(void *, int);
 #endif
 
 /* Missing shared code prototype */
 extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
 
 /*********************************************************************
  *  FreeBSD Device Interface Entry Points
  *********************************************************************/
 
 static device_method_t ixgbe_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, ixgbe_probe),
 	DEVMETHOD(device_attach, ixgbe_attach),
 	DEVMETHOD(device_detach, ixgbe_detach),
 	DEVMETHOD(device_shutdown, ixgbe_shutdown),
 	DEVMETHOD_END
 };
 
 static driver_t ixgbe_driver = {
 	"ix", ixgbe_methods, sizeof(struct adapter),
 };
 
 devclass_t ixgbe_devclass;
 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
 
 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
 
 /*
 ** TUNEABLE PARAMETERS:
 */
 
 static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0,
 		   "IXGBE driver parameters");
 
 /*
 ** AIM: Adaptive Interrupt Moderation
 ** which means that the interrupt rate
 ** is varied over time based on the
 ** traffic for that interrupt vector
 */
 static int ixgbe_enable_aim = TRUE;
 SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &ixgbe_enable_aim, 0,
     "Enable adaptive interrupt moderation");
 
 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
 SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
     &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second");
 
 /* How many packets rxeof tries to clean at a time */
 static int ixgbe_rx_process_limit = 256;
 SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
     &ixgbe_rx_process_limit, 0,
     "Maximum number of received packets to process at a time,"
     "-1 means unlimited");
 
 /* How many packets txeof tries to clean at a time */
 static int ixgbe_tx_process_limit = 256;
 SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
     &ixgbe_tx_process_limit, 0,
     "Maximum number of sent packets to process at a time,"
     "-1 means unlimited");
 
 /*
 ** Smart speed setting, default to on
 ** this only works as a compile option
 ** right now as its during attach, set
 ** this to 'ixgbe_smart_speed_off' to
 ** disable.
 */
 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
 
 /*
  * MSIX should be the default for best performance,
  * but this allows it to be forced off for testing.
  */
 static int ixgbe_enable_msix = 1;
 SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0,
     "Enable MSI-X interrupts");
 
 /*
  * Number of Queues, can be set to 0,
  * it then autoconfigures based on the
  * number of cpus with a max of 8. This
  * can be overriden manually here.
  */
 static int ixgbe_num_queues = 0;
 SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0,
     "Number of queues to configure, 0 indicates autoconfigure");
 
 /*
 ** Number of TX descriptors per ring,
 ** setting higher than RX as this seems
 ** the better performing choice.
 */
 static int ixgbe_txd = PERFORM_TXD;
 SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0,
     "Number of transmit descriptors per queue");
 
 /* Number of RX descriptors per ring */
 static int ixgbe_rxd = PERFORM_RXD;
 SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0,
     "Number of receive descriptors per queue");
 
 /*
 ** Defining this on will allow the use
 ** of unsupported SFP+ modules, note that
 ** doing so you are on your own :)
 */
 static int allow_unsupported_sfp = FALSE;
 TUNABLE_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp);
 
 /*
 ** HW RSC control: 
 **  this feature only works with
 **  IPv4, and only on 82599 and later.
 **  Also this will cause IP forwarding to
 **  fail and that can't be controlled by
 **  the stack as LRO can. For all these
 **  reasons I've deemed it best to leave
 **  this off and not bother with a tuneable
 **  interface, this would need to be compiled
 **  to enable.
 */
 static bool ixgbe_rsc_enable = FALSE;
 
 /* Keep running tab on them for sanity check */
 static int ixgbe_total_ports;
 
 #ifdef IXGBE_FDIR
 /*
 ** For Flow Director: this is the
 ** number of TX packets we sample
 ** for the filter pool, this means
 ** every 20th packet will be probed.
 **
 ** This feature can be disabled by 
 ** setting this to 0.
 */
 static int atr_sample_rate = 20;
 /* 
 ** Flow Director actually 'steals'
 ** part of the packet buffer as its
 ** filter pool, this variable controls
 ** how much it uses:
 **  0 = 64K, 1 = 128K, 2 = 256K
 */
 static int fdir_pballoc = 1;
 #endif
 
 #ifdef DEV_NETMAP
 /*
  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
  * be a reference on how to implement netmap support in a driver.
  * Additional comments are in ixgbe_netmap.h .
  *
  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
  * that extend the standard driver.
  */
 #include <dev/netmap/ixgbe_netmap.h>
 #endif /* DEV_NETMAP */
 
 /*********************************************************************
  *  Device identification routine
  *
  *  ixgbe_probe determines if the driver should be loaded on
  *  adapter based on PCI vendor/device id of the adapter.
  *
  *  return BUS_PROBE_DEFAULT on success, positive on failure
  *********************************************************************/
 
 static int
 ixgbe_probe(device_t dev)
 {
 	ixgbe_vendor_info_t *ent;
 
 	u16	pci_vendor_id = 0;
 	u16	pci_device_id = 0;
 	u16	pci_subvendor_id = 0;
 	u16	pci_subdevice_id = 0;
 	char	adapter_name[256];
 
 	INIT_DEBUGOUT("ixgbe_probe: begin");
 
 	pci_vendor_id = pci_get_vendor(dev);
 	if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
 		return (ENXIO);
 
 	pci_device_id = pci_get_device(dev);
 	pci_subvendor_id = pci_get_subvendor(dev);
 	pci_subdevice_id = pci_get_subdevice(dev);
 
 	ent = ixgbe_vendor_info_array;
 	while (ent->vendor_id != 0) {
 		if ((pci_vendor_id == ent->vendor_id) &&
 		    (pci_device_id == ent->device_id) &&
 
 		    ((pci_subvendor_id == ent->subvendor_id) ||
 		     (ent->subvendor_id == 0)) &&
 
 		    ((pci_subdevice_id == ent->subdevice_id) ||
 		     (ent->subdevice_id == 0))) {
 			sprintf(adapter_name, "%s, Version - %s",
 				ixgbe_strings[ent->index],
 				ixgbe_driver_version);
 			device_set_desc_copy(dev, adapter_name);
 			++ixgbe_total_ports;
 			return (BUS_PROBE_DEFAULT);
 		}
 		ent++;
 	}
 	return (ENXIO);
 }
 
 /*********************************************************************
  *  Device initialization routine
  *
  *  The attach entry point is called when the driver is being loaded.
  *  This routine identifies the type of hardware, allocates all resources
  *  and initializes the hardware.
  *
  *  return 0 on success, positive on failure
  *********************************************************************/
 
 static int
 ixgbe_attach(device_t dev)
 {
 	struct adapter *adapter;
 	struct ixgbe_hw *hw;
 	int             error = 0;
 	u16		csum;
 	u32		ctrl_ext;
 
 	INIT_DEBUGOUT("ixgbe_attach: begin");
 
 	/* Allocate, clear, and link in our adapter structure */
 	adapter = device_get_softc(dev);
 	adapter->dev = adapter->osdep.dev = dev;
 	hw = &adapter->hw;
 
 	/* Core Lock Init*/
 	IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
 
 	/* SYSCTL APIs */
 
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 			OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
 			adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
 
         SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
 			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 			OID_AUTO, "enable_aim", CTLFLAG_RW,
 			&ixgbe_enable_aim, 1, "Interrupt Moderation");
 
 	/*
 	** Allow a kind of speed control by forcing the autoneg
 	** advertised speed list to only a certain value, this
 	** supports 1G on 82599 devices, and 100Mb on x540.
 	*/
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 			OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
 			adapter, 0, ixgbe_set_advertise, "I", "Link Speed");
 
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 			OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
 			0, ixgbe_set_thermal_test, "I", "Thermal Test");
 
 	/* Set up the timer callout */
 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
 
 	/* Determine hardware revision */
 	ixgbe_identify_hardware(adapter);
 
 	/* Do base PCI setup - map BAR0 */
 	if (ixgbe_allocate_pci_resources(adapter)) {
 		device_printf(dev, "Allocation of PCI resources failed\n");
 		error = ENXIO;
 		goto err_out;
 	}
 
 	/* Do descriptor calc and sanity checks */
 	if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
 	    ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
 		device_printf(dev, "TXD config issue, using default!\n");
 		adapter->num_tx_desc = DEFAULT_TXD;
 	} else
 		adapter->num_tx_desc = ixgbe_txd;
 
 	/*
 	** With many RX rings it is easy to exceed the
 	** system mbuf allocation. Tuning nmbclusters
 	** can alleviate this.
 	*/
 	if (nmbclusters > 0 ) {
 		int s;
 		s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
 		if (s > nmbclusters) {
 			device_printf(dev, "RX Descriptors exceed "
 			    "system mbuf max, using default instead!\n");
 			ixgbe_rxd = DEFAULT_RXD;
 		}
 	}
 
 	if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
 	    ixgbe_rxd < MIN_RXD || ixgbe_rxd > MAX_RXD) {
 		device_printf(dev, "RXD config issue, using default!\n");
 		adapter->num_rx_desc = DEFAULT_RXD;
 	} else
 		adapter->num_rx_desc = ixgbe_rxd;
 
 	/* Allocate our TX/RX Queues */
 	if (ixgbe_allocate_queues(adapter)) {
 		error = ENOMEM;
 		goto err_out;
 	}
 
 	/* Allocate multicast array memory. */
 	adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
 	if (adapter->mta == NULL) {
 		device_printf(dev, "Can not allocate multicast setup array\n");
 		error = ENOMEM;
 		goto err_late;
 	}
 
 	/* Initialize the shared code */
 	hw->allow_unsupported_sfp = allow_unsupported_sfp;
 	error = ixgbe_init_shared_code(hw);
 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
 		/*
 		** No optics in this port, set up
 		** so the timer routine will probe 
 		** for later insertion.
 		*/
 		adapter->sfp_probe = TRUE;
 		error = 0;
 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
 		device_printf(dev,"Unsupported SFP+ module detected!\n");
 		error = EIO;
 		goto err_late;
 	} else if (error) {
 		device_printf(dev,"Unable to initialize the shared code\n");
 		error = EIO;
 		goto err_late;
 	}
 
 	/* Make sure we have a good EEPROM before we read from it */
 	if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
 		device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
 		error = EIO;
 		goto err_late;
 	}
 
 	error = ixgbe_init_hw(hw);
 	switch (error) {
 	case IXGBE_ERR_EEPROM_VERSION:
 		device_printf(dev, "This device is a pre-production adapter/"
 		    "LOM.  Please be aware there may be issues associated "
 		    "with your hardware.\n If you are experiencing problems "
 		    "please contact your Intel or hardware representative "
 		    "who provided you with this hardware.\n");
 		break;
 	case IXGBE_ERR_SFP_NOT_SUPPORTED:
 		device_printf(dev,"Unsupported SFP+ Module\n");
 		error = EIO;
 		goto err_late;
 	case IXGBE_ERR_SFP_NOT_PRESENT:
 		device_printf(dev,"No SFP+ Module found\n");
 		/* falls thru */
 	default:
 		break;
 	}
 
 	/* Detect and set physical type */
 	ixgbe_setup_optics(adapter);
 
 	if ((adapter->msix > 1) && (ixgbe_enable_msix))
 		error = ixgbe_allocate_msix(adapter); 
 	else
 		error = ixgbe_allocate_legacy(adapter); 
 	if (error) 
 		goto err_late;
 
 	/* Setup OS specific network interface */
 	if (ixgbe_setup_interface(dev, adapter) != 0)
 		goto err_late;
 
 	/* Initialize statistics */
 	ixgbe_update_stats_counters(adapter);
 
 	/* Register for VLAN events */
 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 	    ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 	    ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
 
         /*
 	** Check PCIE slot type/speed/width
 	*/
 	ixgbe_get_slot_info(hw);
 
 	/* Set an initial default flow control value */
 	adapter->fc =  ixgbe_fc_full;
 
 	/* let hardware know driver is loaded */
 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
 
 	ixgbe_add_hw_stats(adapter);
 
 #ifdef DEV_NETMAP
 	ixgbe_netmap_attach(adapter);
 #endif /* DEV_NETMAP */
 	INIT_DEBUGOUT("ixgbe_attach: end");
 	return (0);
 err_late:
 	ixgbe_free_transmit_structures(adapter);
 	ixgbe_free_receive_structures(adapter);
 err_out:
 	if (adapter->ifp != NULL)
 		if_free(adapter->ifp);
 	ixgbe_free_pci_resources(adapter);
 	free(adapter->mta, M_DEVBUF);
 	return (error);
 
 }
 
 /*********************************************************************
  *  Device removal routine
  *
  *  The detach entry point is called when the driver is being removed.
  *  This routine stops the adapter and deallocates all the resources
  *  that were allocated for driver operation.
  *
  *  return 0 on success, positive on failure
  *********************************************************************/
 
 static int
 ixgbe_detach(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
 	struct ix_queue *que = adapter->queues;
 	struct tx_ring *txr = adapter->tx_rings;
 	u32	ctrl_ext;
 
 	INIT_DEBUGOUT("ixgbe_detach: begin");
 
 	/* Make sure VLANS are not using driver */
 	if (adapter->ifp->if_vlantrunk != NULL) {
 		device_printf(dev,"Vlan in use, detach first\n");
 		return (EBUSY);
 	}
 
 	IXGBE_CORE_LOCK(adapter);
 	ixgbe_stop(adapter);
 	IXGBE_CORE_UNLOCK(adapter);
 
 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
 		if (que->tq) {
 #ifndef IXGBE_LEGACY_TX
 			taskqueue_drain(que->tq, &txr->txq_task);
 #endif
 			taskqueue_drain(que->tq, &que->que_task);
 			taskqueue_free(que->tq);
 		}
 	}
 
 	/* Drain the Link queue */
 	if (adapter->tq) {
 		taskqueue_drain(adapter->tq, &adapter->link_task);
 		taskqueue_drain(adapter->tq, &adapter->mod_task);
 		taskqueue_drain(adapter->tq, &adapter->msf_task);
 #ifdef IXGBE_FDIR
 		taskqueue_drain(adapter->tq, &adapter->fdir_task);
 #endif
 		taskqueue_free(adapter->tq);
 	}
 
 	/* let hardware know driver is unloading */
 	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
 	ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
 
 	/* Unregister VLAN events */
 	if (adapter->vlan_attach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
 	if (adapter->vlan_detach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
 
 	ether_ifdetach(adapter->ifp);
 	callout_drain(&adapter->timer);
 #ifdef DEV_NETMAP
 	netmap_detach(adapter->ifp);
 #endif /* DEV_NETMAP */
 	ixgbe_free_pci_resources(adapter);
 	bus_generic_detach(dev);
 	if_free(adapter->ifp);
 
 	ixgbe_free_transmit_structures(adapter);
 	ixgbe_free_receive_structures(adapter);
 	free(adapter->mta, M_DEVBUF);
 
 	IXGBE_CORE_LOCK_DESTROY(adapter);
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Shutdown entry point
  *
  **********************************************************************/
 
 static int
 ixgbe_shutdown(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
 	IXGBE_CORE_LOCK(adapter);
 	ixgbe_stop(adapter);
 	IXGBE_CORE_UNLOCK(adapter);
 	return (0);
 }
 
 
 #ifdef IXGBE_LEGACY_TX
 /*********************************************************************
  *  Transmit entry point
  *
  *  ixgbe_start is called by the stack to initiate a transmit.
  *  The driver will remain in this routine as long as there are
  *  packets to transmit and transmit resources are available.
  *  In case resources are not available stack is notified and
  *  the packet is requeued.
  **********************************************************************/
 
 static void
 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
 {
 	struct mbuf    *m_head;
 	struct adapter *adapter = txr->adapter;
 
 	IXGBE_TX_LOCK_ASSERT(txr);
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 	if (!adapter->link_active)
 		return;
 
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
 			break;
 
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 
 		if (ixgbe_xmit(txr, &m_head)) {
 			if (m_head != NULL)
 				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			break;
 		}
 		/* Send a copy of the frame to the BPF listener */
 		ETHER_BPF_MTAP(ifp, m_head);
 
 		/* Set watchdog on */
 		txr->watchdog_time = ticks;
 		txr->queue_status = IXGBE_QUEUE_WORKING;
 
 	}
 	return;
 }
 
 /*
  * Legacy TX start - called by the stack, this
  * always uses the first tx ring, and should
  * not be used with multiqueue tx enabled.
  */
 static void
 ixgbe_start(struct ifnet *ifp)
 {
 	struct adapter *adapter = ifp->if_softc;
 	struct tx_ring	*txr = adapter->tx_rings;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		IXGBE_TX_LOCK(txr);
 		ixgbe_start_locked(txr, ifp);
 		IXGBE_TX_UNLOCK(txr);
 	}
 	return;
 }
 
 #else /* ! IXGBE_LEGACY_TX */
 
 /*
 ** Multiqueue Transmit driver
 **
 */
 static int
 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	struct ix_queue	*que;
 	struct tx_ring	*txr;
 	int 		i, err = 0;
 #ifdef	RSS
 	uint32_t bucket_id;
 #endif
 
 	/* Which queue to use */
 	/*
 	 * When doing RSS, map it to the same outbound queue
 	 * as the incoming flow would be mapped to.
 	 *
 	 * If everything is setup correctly, it should be the
 	 * same bucket that the current CPU we're on is.
 	 */
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
 #ifdef	RSS
 		if (rss_hash2bucket(m->m_pkthdr.flowid,
 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
 			/* XXX TODO: spit out something if bucket_id > num_queues? */
 			i = bucket_id % adapter->num_queues;
 		} else {
 #endif
 			i = m->m_pkthdr.flowid % adapter->num_queues;
 #ifdef	RSS
 		}
 #endif
 	} else {
 		i = curcpu % adapter->num_queues;
 	}
 
 	txr = &adapter->tx_rings[i];
 	que = &adapter->queues[i];
 
 	err = drbr_enqueue(ifp, txr->br, m);
 	if (err)
 		return (err);
 	if (IXGBE_TX_TRYLOCK(txr)) {
 		ixgbe_mq_start_locked(ifp, txr);
 		IXGBE_TX_UNLOCK(txr);
 	} else
 		taskqueue_enqueue(que->tq, &txr->txq_task);
 
 	return (0);
 }
 
 static int
 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
 {
 	struct adapter  *adapter = txr->adapter;
         struct mbuf     *next;
         int             enqueued = 0, err = 0;
 
 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
 	    adapter->link_active == 0)
 		return (ENETDOWN);
 
 	/* Process the queue */
 #if __FreeBSD_version < 901504
 	next = drbr_dequeue(ifp, txr->br);
 	while (next != NULL) {
 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
 			if (next != NULL)
 				err = drbr_enqueue(ifp, txr->br, next);
 #else
 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
 			if (next == NULL) {
 				drbr_advance(ifp, txr->br);
 			} else {
 				drbr_putback(ifp, txr->br, next);
 			}
 #endif
 			break;
 		}
 #if __FreeBSD_version >= 901504
 		drbr_advance(ifp, txr->br);
 #endif
 		enqueued++;
 		/* Send a copy of the frame to the BPF listener */
 		ETHER_BPF_MTAP(ifp, next);
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			break;
 #if __FreeBSD_version < 901504
 		next = drbr_dequeue(ifp, txr->br);
 #endif
 	}
 
 	if (enqueued > 0) {
 		/* Set watchdog on */
 		txr->queue_status = IXGBE_QUEUE_WORKING;
 		txr->watchdog_time = ticks;
 	}
 
 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
 		ixgbe_txeof(txr);
 
 	return (err);
 }
 
 /*
  * Called from a taskqueue to drain queued transmit packets.
  */
 static void
 ixgbe_deferred_mq_start(void *arg, int pending)
 {
 	struct tx_ring *txr = arg;
 	struct adapter *adapter = txr->adapter;
 	struct ifnet *ifp = adapter->ifp;
 
 	IXGBE_TX_LOCK(txr);
 	if (!drbr_empty(ifp, txr->br))
 		ixgbe_mq_start_locked(ifp, txr);
 	IXGBE_TX_UNLOCK(txr);
 }
 
 /*
 ** Flush all ring buffers
 */
 static void
 ixgbe_qflush(struct ifnet *ifp)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct mbuf	*m;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		IXGBE_TX_LOCK(txr);
 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
 			m_freem(m);
 		IXGBE_TX_UNLOCK(txr);
 	}
 	if_qflush(ifp);
 }
 #endif /* IXGBE_LEGACY_TX */
 
 /*********************************************************************
  *  Ioctl entry point
  *
  *  ixgbe_ioctl is called when the user wants to configure the
  *  interface.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 
 static int
 ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct ifreq	*ifr = (struct ifreq *) data;
 #if defined(INET) || defined(INET6)
 	struct ifaddr *ifa = (struct ifaddr *)data;
 	bool		avoid_reset = FALSE;
 #endif
 	int             error = 0;
 
 	switch (command) {
 
         case SIOCSIFADDR:
 #ifdef INET
 		if (ifa->ifa_addr->sa_family == AF_INET)
 			avoid_reset = TRUE;
 #endif
 #ifdef INET6
 		if (ifa->ifa_addr->sa_family == AF_INET6)
 			avoid_reset = TRUE;
 #endif
 #if defined(INET) || defined(INET6)
 		/*
 		** Calling init results in link renegotiation,
 		** so we avoid doing it when possible.
 		*/
 		if (avoid_reset) {
 			ifp->if_flags |= IFF_UP;
 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
 				ixgbe_init(adapter);
 			if (!(ifp->if_flags & IFF_NOARP))
 				arp_ifinit(ifp, ifa);
 		} else
 			error = ether_ioctl(ifp, command, data);
 #endif
 		break;
 	case SIOCSIFMTU:
 		IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
 		if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
 			error = EINVAL;
 		} else {
 			IXGBE_CORE_LOCK(adapter);
 			ifp->if_mtu = ifr->ifr_mtu;
 			adapter->max_frame_size =
 				ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
 			ixgbe_init_locked(adapter);
 			IXGBE_CORE_UNLOCK(adapter);
 		}
 		break;
 	case SIOCSIFFLAGS:
 		IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
 		IXGBE_CORE_LOCK(adapter);
 		if (ifp->if_flags & IFF_UP) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 				if ((ifp->if_flags ^ adapter->if_flags) &
 				    (IFF_PROMISC | IFF_ALLMULTI)) {
 					ixgbe_set_promisc(adapter);
                                 }
 			} else
 				ixgbe_init_locked(adapter);
 		} else
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				ixgbe_stop(adapter);
 		adapter->if_flags = ifp->if_flags;
 		IXGBE_CORE_UNLOCK(adapter);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			IXGBE_CORE_LOCK(adapter);
 			ixgbe_disable_intr(adapter);
 			ixgbe_set_multi(adapter);
 			ixgbe_enable_intr(adapter);
 			IXGBE_CORE_UNLOCK(adapter);
 		}
 		break;
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
 		break;
 	case SIOCSIFCAP:
 	{
 		int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
 		if (mask & IFCAP_HWCSUM)
 			ifp->if_capenable ^= IFCAP_HWCSUM;
 		if (mask & IFCAP_TSO4)
 			ifp->if_capenable ^= IFCAP_TSO4;
 		if (mask & IFCAP_TSO6)
 			ifp->if_capenable ^= IFCAP_TSO6;
 		if (mask & IFCAP_LRO)
 			ifp->if_capenable ^= IFCAP_LRO;
 		if (mask & IFCAP_VLAN_HWTAGGING)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 		if (mask & IFCAP_VLAN_HWFILTER)
 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
 		if (mask & IFCAP_VLAN_HWTSO)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			IXGBE_CORE_LOCK(adapter);
 			ixgbe_init_locked(adapter);
 			IXGBE_CORE_UNLOCK(adapter);
 		}
 		VLAN_CAPABILITIES(ifp);
 		break;
 	}
 	case SIOCGI2C:
 	{
 		struct ifi2creq i2c;
 		int i;
 		IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
 		if (error != 0)
 			break;
 		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
 			error = EINVAL;
 			break;
 		}
 		if (i2c.len > sizeof(i2c.data)) {
 			error = EINVAL;
 			break;
 		}
 
 		for (i = 0; i < i2c.len; i++)
 			hw->phy.ops.read_i2c_byte(hw, i2c.offset + i,
 			    i2c.dev_addr, &i2c.data[i]);
 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
 		break;
 	}
 	default:
 		IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 /*********************************************************************
  *  Init entry point
  *
  *  This routine is used in two ways. It is used by the stack as
  *  init entry point in network interface structure. It is also used
  *  by the driver as a hw/sw initialization routine to get to a
  *  consistent state.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 #define IXGBE_MHADD_MFS_SHIFT 16
 
 static void
 ixgbe_init_locked(struct adapter *adapter)
 {
 	struct ifnet   *ifp = adapter->ifp;
 	device_t 	dev = adapter->dev;
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32		k, txdctl, mhadd, gpie;
 	u32		rxdctl, rxctrl;
 
 	mtx_assert(&adapter->core_mtx, MA_OWNED);
 	INIT_DEBUGOUT("ixgbe_init_locked: begin");
 	hw->adapter_stopped = FALSE;
 	ixgbe_stop_adapter(hw);
         callout_stop(&adapter->timer);
 
         /* reprogram the RAR[0] in case user changed it. */
         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
 
 	/* Get the latest mac address, User can use a LAA */
 	bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
 	      IXGBE_ETH_LENGTH_OF_ADDRESS);
 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
 	hw->addr_ctrl.rar_used_count = 1;
 
 	/* Set the various hardware offload abilities */
 	ifp->if_hwassist = 0;
 	if (ifp->if_capenable & IFCAP_TSO)
 		ifp->if_hwassist |= CSUM_TSO;
 	if (ifp->if_capenable & IFCAP_TXCSUM) {
 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
 #if __FreeBSD_version >= 800000
 		if (hw->mac.type != ixgbe_mac_82598EB)
 			ifp->if_hwassist |= CSUM_SCTP;
 #endif
 	}
 
 	/* Prepare transmit descriptors and buffers */
 	if (ixgbe_setup_transmit_structures(adapter)) {
 		device_printf(dev,"Could not setup transmit structures\n");
 		ixgbe_stop(adapter);
 		return;
 	}
 
 	ixgbe_init_hw(hw);
 	ixgbe_initialize_transmit_units(adapter);
 
 	/* Setup Multicast table */
 	ixgbe_set_multi(adapter);
 
 	/*
 	** Determine the correct mbuf pool
 	** for doing jumbo frames
 	*/
 	if (adapter->max_frame_size <= 2048)
 		adapter->rx_mbuf_sz = MCLBYTES;
 	else if (adapter->max_frame_size <= 4096)
 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
 	else if (adapter->max_frame_size <= 9216)
 		adapter->rx_mbuf_sz = MJUM9BYTES;
 	else
 		adapter->rx_mbuf_sz = MJUM16BYTES;
 
 	/* Prepare receive descriptors and buffers */
 	if (ixgbe_setup_receive_structures(adapter)) {
 		device_printf(dev,"Could not setup receive structures\n");
 		ixgbe_stop(adapter);
 		return;
 	}
 
 	/* Configure RX settings */
 	ixgbe_initialize_receive_units(adapter);
 
 	gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
 
 	/* Enable Fan Failure Interrupt */
 	gpie |= IXGBE_SDP1_GPIEN;
 
 	/* Add for Module detection */
 	if (hw->mac.type == ixgbe_mac_82599EB)
 		gpie |= IXGBE_SDP2_GPIEN;
 
 	/* Thermal Failure Detection */
 	if (hw->mac.type == ixgbe_mac_X540)
 		gpie |= IXGBE_SDP0_GPIEN;
 
 	if (adapter->msix > 1) {
 		/* Enable Enhanced MSIX mode */
 		gpie |= IXGBE_GPIE_MSIX_MODE;
 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
 		    IXGBE_GPIE_OCD;
 	}
 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
 
 	/* Set MTU size */
 	if (ifp->if_mtu > ETHERMTU) {
 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
 		mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
 	}
 	
 	/* Now enable all the queues */
 
 	for (int i = 0; i < adapter->num_queues; i++) {
 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
 		txdctl |= IXGBE_TXDCTL_ENABLE;
 		/* Set WTHRESH to 8, burst writeback */
 		txdctl |= (8 << 16);
 		/*
 		 * When the internal queue falls below PTHRESH (32),
 		 * start prefetching as long as there are at least
 		 * HTHRESH (1) buffers ready. The values are taken
 		 * from the Intel linux driver 3.8.21.
 		 * Prefetching enables tx line rate even with 1 queue.
 		 */
 		txdctl |= (32 << 0) | (1 << 8);
 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
 	}
 
 	for (int i = 0; i < adapter->num_queues; i++) {
 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
 		if (hw->mac.type == ixgbe_mac_82598EB) {
 			/*
 			** PTHRESH = 21
 			** HTHRESH = 4
 			** WTHRESH = 8
 			*/
 			rxdctl &= ~0x3FFFFF;
 			rxdctl |= 0x080420;
 		}
 		rxdctl |= IXGBE_RXDCTL_ENABLE;
 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
 		for (k = 0; k < 10; k++) {
 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
 			    IXGBE_RXDCTL_ENABLE)
 				break;
 			else
 				msec_delay(1);
 		}
 		wmb();
 #ifdef DEV_NETMAP
 		/*
 		 * In netmap mode, we must preserve the buffers made
 		 * available to userspace before the if_init()
 		 * (this is true by default on the TX side, because
 		 * init makes all buffers available to userspace).
 		 *
 		 * netmap_reset() and the device specific routines
 		 * (e.g. ixgbe_setup_receive_rings()) map these
 		 * buffers at the end of the NIC ring, so here we
 		 * must set the RDT (tail) register to make sure
 		 * they are not overwritten.
 		 *
 		 * In this driver the NIC ring starts at RDH = 0,
 		 * RDT points to the last slot available for reception (?),
 		 * so RDT = num_rx_desc - 1 means the whole ring is available.
 		 */
 		if (ifp->if_capenable & IFCAP_NETMAP) {
 			struct netmap_adapter *na = NA(adapter->ifp);
 			struct netmap_kring *kring = &na->rx_rings[i];
 			int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
 
 			IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
 		} else
 #endif /* DEV_NETMAP */
 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
 	}
 
 	/* Enable Receive engine */
 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
 	if (hw->mac.type == ixgbe_mac_82598EB)
 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
 	rxctrl |= IXGBE_RXCTRL_RXEN;
 	ixgbe_enable_rx_dma(hw, rxctrl);
 
 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
 
 	/* Set up MSI/X routing */
 	if (ixgbe_enable_msix)  {
 		ixgbe_configure_ivars(adapter);
 		/* Set up auto-mask */
 		if (hw->mac.type == ixgbe_mac_82598EB)
 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
 		else {
 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
 		}
 	} else {  /* Simple settings for Legacy/MSI */
                 ixgbe_set_ivar(adapter, 0, 0, 0);
                 ixgbe_set_ivar(adapter, 0, 0, 1);
 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
 	}
 
 #ifdef IXGBE_FDIR
 	/* Init Flow director */
 	if (hw->mac.type != ixgbe_mac_82598EB) {
 		u32 hdrm = 32 << fdir_pballoc;
 
 		hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
 		ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
 	}
 #endif
 
 	/*
 	** Check on any SFP devices that
 	** need to be kick-started
 	*/
 	if (hw->phy.type == ixgbe_phy_none) {
 		int err = hw->phy.ops.identify(hw);
 		if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
                 	device_printf(dev,
 			    "Unsupported SFP+ module type was detected.\n");
 			return;
         	}
 	}
 
 	/* Set moderation on the Link interrupt */
 	IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
 
 	/* Config/Enable Link */
 	ixgbe_config_link(adapter);
 
 	/* Hardware Packet Buffer & Flow Control setup */
 	{
 		u32 rxpb, frame, size, tmp;
 
 		frame = adapter->max_frame_size;
 
 		/* Calculate High Water */
 		if (hw->mac.type == ixgbe_mac_X540)
 			tmp = IXGBE_DV_X540(frame, frame);
 		else
 			tmp = IXGBE_DV(frame, frame);
 		size = IXGBE_BT2KB(tmp);
 		rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
 		hw->fc.high_water[0] = rxpb - size;
 
 		/* Now calculate Low Water */
 		if (hw->mac.type == ixgbe_mac_X540)
 			tmp = IXGBE_LOW_DV_X540(frame);
 		else
 			tmp = IXGBE_LOW_DV(frame);
 		hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
 		
 		hw->fc.requested_mode = adapter->fc;
 		hw->fc.pause_time = IXGBE_FC_PAUSE;
 		hw->fc.send_xon = TRUE;
 	}
 	/* Initialize the FC settings */
 	ixgbe_start_hw(hw);
 
 	/* Set up VLAN support and filter */
 	ixgbe_setup_vlan_hw_support(adapter);
 
 	/* And now turn on interrupts */
 	ixgbe_enable_intr(adapter);
 
 	/* Now inform the stack we're ready */
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 
 	return;
 }
 
 static void
 ixgbe_init(void *arg)
 {
 	struct adapter *adapter = arg;
 
 	IXGBE_CORE_LOCK(adapter);
 	ixgbe_init_locked(adapter);
 	IXGBE_CORE_UNLOCK(adapter);
 	return;
 }
 
 
 /*
 **
 ** MSIX Interrupt Handlers and Tasklets
 **
 */
 
 static inline void
 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u64	queue = (u64)(1 << vector);
 	u32	mask;
 
 	if (hw->mac.type == ixgbe_mac_82598EB) {
                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
 	} else {
                 mask = (queue & 0xFFFFFFFF);
                 if (mask)
                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
                 mask = (queue >> 32);
                 if (mask)
                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
 	}
 }
 
 static inline void
 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u64	queue = (u64)(1 << vector);
 	u32	mask;
 
 	if (hw->mac.type == ixgbe_mac_82598EB) {
                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
 	} else {
                 mask = (queue & 0xFFFFFFFF);
                 if (mask)
                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
                 mask = (queue >> 32);
                 if (mask)
                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
 	}
 }
 
 static void
 ixgbe_handle_que(void *context, int pending)
 {
 	struct ix_queue *que = context;
 	struct adapter  *adapter = que->adapter;
 	struct tx_ring  *txr = que->txr;
 	struct ifnet    *ifp = adapter->ifp;
 	bool		more;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		more = ixgbe_rxeof(que);
 		IXGBE_TX_LOCK(txr);
 		ixgbe_txeof(txr);
 #ifndef IXGBE_LEGACY_TX
 		if (!drbr_empty(ifp, txr->br))
 			ixgbe_mq_start_locked(ifp, txr);
 #else
 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 			ixgbe_start_locked(txr, ifp);
 #endif
 		IXGBE_TX_UNLOCK(txr);
 	}
 
 	/* Reenable this interrupt */
 	if (que->res != NULL)
 		ixgbe_enable_queue(adapter, que->msix);
 	else
 		ixgbe_enable_intr(adapter);
 	return;
 }
 
 
 /*********************************************************************
  *
  *  Legacy Interrupt Service routine
  *
  **********************************************************************/
 
 static void
 ixgbe_legacy_irq(void *arg)
 {
 	struct ix_queue *que = arg;
 	struct adapter	*adapter = que->adapter;
 	struct ixgbe_hw	*hw = &adapter->hw;
 	struct ifnet    *ifp = adapter->ifp;
 	struct 		tx_ring *txr = adapter->tx_rings;
 	bool		more;
 	u32       	reg_eicr;
 
 
 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
 
 	++que->irqs;
 	if (reg_eicr == 0) {
 		ixgbe_enable_intr(adapter);
 		return;
 	}
 
 	more = ixgbe_rxeof(que);
 
 	IXGBE_TX_LOCK(txr);
 	ixgbe_txeof(txr);
 #ifdef IXGBE_LEGACY_TX
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		ixgbe_start_locked(txr, ifp);
 #else
 	if (!drbr_empty(ifp, txr->br))
 		ixgbe_mq_start_locked(ifp, txr);
 #endif
 	IXGBE_TX_UNLOCK(txr);
 
 	/* Check for fan failure */
 	if ((hw->phy.media_type == ixgbe_media_type_copper) &&
 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
 		    "REPLACE IMMEDIATELY!!\n");
 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
 	}
 
 	/* Link status change */
 	if (reg_eicr & IXGBE_EICR_LSC)
 		taskqueue_enqueue(adapter->tq, &adapter->link_task);
 
 	if (more)
 		taskqueue_enqueue(que->tq, &que->que_task);
 	else
 		ixgbe_enable_intr(adapter);
 	return;
 }
 
 
 /*********************************************************************
  *
  *  MSIX Queue Interrupt Service routine
  *
  **********************************************************************/
 void
 ixgbe_msix_que(void *arg)
 {
 	struct ix_queue	*que = arg;
 	struct adapter  *adapter = que->adapter;
 	struct ifnet    *ifp = adapter->ifp;
 	struct tx_ring	*txr = que->txr;
 	struct rx_ring	*rxr = que->rxr;
 	bool		more;
 	u32		newitr = 0;
 
 	/* Protect against spurious interrupts */
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 
 	ixgbe_disable_queue(adapter, que->msix);
 	++que->irqs;
 
 	more = ixgbe_rxeof(que);
 
 	IXGBE_TX_LOCK(txr);
 	ixgbe_txeof(txr);
 #ifdef IXGBE_LEGACY_TX
 	if (!IFQ_DRV_IS_EMPTY(ifp->if_snd))
 		ixgbe_start_locked(txr, ifp);
 #else
 	if (!drbr_empty(ifp, txr->br))
 		ixgbe_mq_start_locked(ifp, txr);
 #endif
 	IXGBE_TX_UNLOCK(txr);
 
 	/* Do AIM now? */
 
 	if (ixgbe_enable_aim == FALSE)
 		goto no_calc;
 	/*
 	** Do Adaptive Interrupt Moderation:
         **  - Write out last calculated setting
 	**  - Calculate based on average size over
 	**    the last interval.
 	*/
         if (que->eitr_setting)
                 IXGBE_WRITE_REG(&adapter->hw,
                     IXGBE_EITR(que->msix), que->eitr_setting);
  
         que->eitr_setting = 0;
 
         /* Idle, do nothing */
         if ((txr->bytes == 0) && (rxr->bytes == 0))
                 goto no_calc;
                                 
 	if ((txr->bytes) && (txr->packets))
                	newitr = txr->bytes/txr->packets;
 	if ((rxr->bytes) && (rxr->packets))
 		newitr = max(newitr,
 		    (rxr->bytes / rxr->packets));
 	newitr += 24; /* account for hardware frame, crc */
 
 	/* set an upper boundary */
 	newitr = min(newitr, 3000);
 
 	/* Be nice to the mid range */
 	if ((newitr > 300) && (newitr < 1200))
 		newitr = (newitr / 3);
 	else
 		newitr = (newitr / 2);
 
         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
                 newitr |= newitr << 16;
         else
                 newitr |= IXGBE_EITR_CNT_WDIS;
                  
         /* save for next interrupt */
         que->eitr_setting = newitr;
 
         /* Reset state */
         txr->bytes = 0;
         txr->packets = 0;
         rxr->bytes = 0;
         rxr->packets = 0;
 
 no_calc:
 	if (more)
 		taskqueue_enqueue(que->tq, &que->que_task);
 	else
 		ixgbe_enable_queue(adapter, que->msix);
 	return;
 }
 
 
 static void
 ixgbe_msix_link(void *arg)
 {
 	struct adapter	*adapter = arg;
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32		reg_eicr;
 
 	++adapter->link_irq;
 
 	/* First get the cause */
 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
 	/* Be sure the queue bits are not cleared */
 	reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
 	/* Clear interrupt with write */
 	IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
 
 	/* Link status change */
 	if (reg_eicr & IXGBE_EICR_LSC)
 		taskqueue_enqueue(adapter->tq, &adapter->link_task);
 
 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
 #ifdef IXGBE_FDIR
 		if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
 			/* This is probably overkill :) */
 			if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
 				return;
                 	/* Disable the interrupt */
 			IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
 			taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
 		} else
 #endif
 		if (reg_eicr & IXGBE_EICR_ECC) {
                 	device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
 			    "Please Reboot!!\n");
 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
 		} else
 
 		if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
                 	/* Clear the interrupt */
                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
 			taskqueue_enqueue(adapter->tq, &adapter->msf_task);
         	} else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
                 	/* Clear the interrupt */
                 	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
 			taskqueue_enqueue(adapter->tq, &adapter->mod_task);
 		}
         } 
 
 	/* Check for fan failure */
 	if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
 		    "REPLACE IMMEDIATELY!!\n");
 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
 	}
 
 	/* Check for over temp condition */
 	if ((hw->mac.type == ixgbe_mac_X540) &&
 	    (reg_eicr & IXGBE_EICR_TS)) {
                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
 		    "PHY IS SHUT DOWN!!\n");
                 device_printf(adapter->dev, "System shutdown required\n");
 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
 	}
 
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
 	return;
 }
 
 /*********************************************************************
  *
  *  Media Ioctl callback
  *
  *  This routine is called whenever the user queries the status of
  *  the interface using ifconfig.
  *
  **********************************************************************/
 static void
 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
 {
 	struct adapter *adapter = ifp->if_softc;
 
 	INIT_DEBUGOUT("ixgbe_media_status: begin");
 	IXGBE_CORE_LOCK(adapter);
 	ixgbe_update_link_status(adapter);
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if (!adapter->link_active) {
 		IXGBE_CORE_UNLOCK(adapter);
 		return;
 	}
 
 	ifmr->ifm_status |= IFM_ACTIVE;
 
 	switch (adapter->link_speed) {
 		case IXGBE_LINK_SPEED_100_FULL:
 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
 			break;
 		case IXGBE_LINK_SPEED_1GB_FULL:
 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
 			break;
 		case IXGBE_LINK_SPEED_10GB_FULL:
 			ifmr->ifm_active |= adapter->optics | IFM_FDX;
 			break;
 	}
 
 	IXGBE_CORE_UNLOCK(adapter);
 
 	return;
 }
 
 /*********************************************************************
  *
  *  Media Ioctl callback
  *
  *  This routine is called when the user changes speed/duplex using
  *  media/mediopt option with ifconfig.
  *
  **********************************************************************/
 static int
 ixgbe_media_change(struct ifnet * ifp)
 {
 	struct adapter *adapter = ifp->if_softc;
 	struct ifmedia *ifm = &adapter->media;
 
 	INIT_DEBUGOUT("ixgbe_media_change: begin");
 
 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
 		return (EINVAL);
 
         switch (IFM_SUBTYPE(ifm->ifm_media)) {
         case IFM_AUTO:
                 adapter->hw.phy.autoneg_advertised =
 		    IXGBE_LINK_SPEED_100_FULL |
 		    IXGBE_LINK_SPEED_1GB_FULL |
 		    IXGBE_LINK_SPEED_10GB_FULL;
                 break;
         default:
                 device_printf(adapter->dev, "Only auto media type\n");
 		return (EINVAL);
         }
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  This routine maps the mbufs to tx descriptors, allowing the
  *  TX engine to transmit the packets. 
  *  	- return 0 on success, positive on failure
  *
  **********************************************************************/
 
 static int
 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
 {
 	struct adapter  *adapter = txr->adapter;
 	u32		olinfo_status = 0, cmd_type_len;
 	int             i, j, error, nsegs;
 	int		first;
 	bool		remap = TRUE;
 	struct mbuf	*m_head;
 	bus_dma_segment_t segs[adapter->num_segs];
 	bus_dmamap_t	map;
 	struct ixgbe_tx_buf *txbuf;
 	union ixgbe_adv_tx_desc *txd = NULL;
 
 	m_head = *m_headp;
 
 	/* Basic descriptor defines */
         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
 
 	if (m_head->m_flags & M_VLANTAG)
         	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
 
         /*
          * Important to capture the first descriptor
          * used because it will contain the index of
          * the one we tell the hardware to report back
          */
         first = txr->next_avail_desc;
 	txbuf = &txr->tx_buffers[first];
 	map = txbuf->map;
 
 	/*
 	 * Map the packet for DMA.
 	 */
 retry:
 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
 
 	if (__predict_false(error)) {
 		struct mbuf *m;
 
 		switch (error) {
 		case EFBIG:
 			/* Try it again? - one try */
 			if (remap == TRUE) {
 				remap = FALSE;
 				m = m_defrag(*m_headp, M_NOWAIT);
 				if (m == NULL) {
 					adapter->mbuf_defrag_failed++;
 					m_freem(*m_headp);
 					*m_headp = NULL;
 					return (ENOBUFS);
 				}
 				*m_headp = m;
 				goto retry;
 			} else
 				return (error);
 		case ENOMEM:
 			txr->no_tx_dma_setup++;
 			return (error);
 		default:
 			txr->no_tx_dma_setup++;
 			m_freem(*m_headp);
 			*m_headp = NULL;
 			return (error);
 		}
 	}
 
 	/* Make certain there are enough descriptors */
 	if (nsegs > txr->tx_avail - 2) {
 		txr->no_desc_avail++;
 		bus_dmamap_unload(txr->txtag, map);
 		return (ENOBUFS);
 	}
 	m_head = *m_headp;
 
 	/*
 	** Set up the appropriate offload context
 	** this will consume the first descriptor
 	*/
 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
 	if (__predict_false(error)) {
 		if (error == ENOBUFS)
 			*m_headp = NULL;
 		return (error);
 	}
 
 #ifdef IXGBE_FDIR
 	/* Do the flow director magic */
 	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
 		++txr->atr_count;
 		if (txr->atr_count >= atr_sample_rate) {
 			ixgbe_atr(txr, m_head);
 			txr->atr_count = 0;
 		}
 	}
 #endif
 
 	i = txr->next_avail_desc;
 	for (j = 0; j < nsegs; j++) {
 		bus_size_t seglen;
 		bus_addr_t segaddr;
 
 		txbuf = &txr->tx_buffers[i];
 		txd = &txr->tx_base[i];
 		seglen = segs[j].ds_len;
 		segaddr = htole64(segs[j].ds_addr);
 
 		txd->read.buffer_addr = segaddr;
 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
 		    cmd_type_len |seglen);
 		txd->read.olinfo_status = htole32(olinfo_status);
 
 		if (++i == txr->num_desc)
 			i = 0;
 	}
 
 	txd->read.cmd_type_len |=
 	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
 	txr->tx_avail -= nsegs;
 	txr->next_avail_desc = i;
 
 	txbuf->m_head = m_head;
 	/*
 	** Here we swap the map so the last descriptor,
 	** which gets the completion interrupt has the
 	** real map, and the first descriptor gets the
 	** unused map from this descriptor.
 	*/
 	txr->tx_buffers[first].map = txbuf->map;
 	txbuf->map = map;
 	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
 
         /* Set the EOP descriptor that will be marked done */
         txbuf = &txr->tx_buffers[first];
 	txbuf->eop = txd;
 
         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	/*
 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
 	 * hardware that this frame is available to transmit.
 	 */
 	++txr->total_packets;
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
 
 	return (0);
 
 }
 
 static void
 ixgbe_set_promisc(struct adapter *adapter)
 {
 	u_int32_t       reg_rctl;
 	struct ifnet   *ifp = adapter->ifp;
 	int		mcnt = 0;
 
 	reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
 	reg_rctl &= (~IXGBE_FCTRL_UPE);
 	if (ifp->if_flags & IFF_ALLMULTI)
 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
 	else {
 		struct	ifmultiaddr *ifma;
 #if __FreeBSD_version < 800000
 		IF_ADDR_LOCK(ifp);
 #else
 		if_maddr_rlock(ifp);
 #endif
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_LINK)
 				continue;
 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
 				break;
 			mcnt++;
 		}
 #if __FreeBSD_version < 800000
 		IF_ADDR_UNLOCK(ifp);
 #else
 		if_maddr_runlock(ifp);
 #endif
 	}
 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
 		reg_rctl &= (~IXGBE_FCTRL_MPE);
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
 
 	if (ifp->if_flags & IFF_PROMISC) {
 		reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
 	} else if (ifp->if_flags & IFF_ALLMULTI) {
 		reg_rctl |= IXGBE_FCTRL_MPE;
 		reg_rctl &= ~IXGBE_FCTRL_UPE;
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
 	}
 	return;
 }
 
 
 /*********************************************************************
  *  Multicast Update
  *
  *  This routine is called whenever multicast address list is updated.
  *
  **********************************************************************/
 #define IXGBE_RAR_ENTRIES 16
 
 static void
 ixgbe_set_multi(struct adapter *adapter)
 {
 	u32	fctrl;
 	u8	*mta;
 	u8	*update_ptr;
 	struct	ifmultiaddr *ifma;
 	int	mcnt = 0;
 	struct ifnet   *ifp = adapter->ifp;
 
 	IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
 
 	mta = adapter->mta;
 	bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
 	    MAX_NUM_MULTICAST_ADDRESSES);
 
 #if __FreeBSD_version < 800000
 	IF_ADDR_LOCK(ifp);
 #else
 	if_maddr_rlock(ifp);
 #endif
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
 			break;
 		bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
 		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
 		    IXGBE_ETH_LENGTH_OF_ADDRESS);
 		mcnt++;
 	}
 #if __FreeBSD_version < 800000
 	IF_ADDR_UNLOCK(ifp);
 #else
 	if_maddr_runlock(ifp);
 #endif
 
 	fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
 	fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
 	if (ifp->if_flags & IFF_PROMISC)
 		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
 	else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES ||
 	    ifp->if_flags & IFF_ALLMULTI) {
 		fctrl |= IXGBE_FCTRL_MPE;
 		fctrl &= ~IXGBE_FCTRL_UPE;
 	} else
 		fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
 	
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
 
 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
 		update_ptr = mta;
 		ixgbe_update_mc_addr_list(&adapter->hw,
 		    update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
 	}
 
 	return;
 }
 
 /*
  * This is an iterator function now needed by the multicast
  * shared code. It simply feeds the shared code routine the
  * addresses in the array of ixgbe_set_multi() one by one.
  */
 static u8 *
 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
 {
 	u8 *addr = *update_ptr;
 	u8 *newptr;
 	*vmdq = 0;
 
 	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
 	*update_ptr = newptr;
 	return addr;
 }
 
 
 /*********************************************************************
  *  Timer routine
  *
  *  This routine checks for link status,updates statistics,
  *  and runs the watchdog check.
  *
  **********************************************************************/
 
 static void
 ixgbe_local_timer(void *arg)
 {
 	struct adapter	*adapter = arg;
 	device_t	dev = adapter->dev;
 	struct ix_queue *que = adapter->queues;
 	struct tx_ring	*txr = adapter->tx_rings;
 	int		hung = 0, paused = 0;
 
 	mtx_assert(&adapter->core_mtx, MA_OWNED);
 
 	/* Check for pluggable optics */
 	if (adapter->sfp_probe)
 		if (!ixgbe_sfp_probe(adapter))
 			goto out; /* Nothing to do */
 
 	ixgbe_update_link_status(adapter);
 	ixgbe_update_stats_counters(adapter);
 
 	/*
 	 * If the interface has been paused
 	 * then don't do the watchdog check
 	 */
 	if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
 		paused = 1;
 
 	/*
 	** Check the TX queues status
 	**      - watchdog only if all queues show hung
 	*/          
 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
 		if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
 		    (paused == 0))
 			++hung;
 		else if (txr->queue_status == IXGBE_QUEUE_WORKING)
 			taskqueue_enqueue(que->tq, &txr->txq_task);
         }
 	/* Only truely watchdog if all queues show hung */
         if (hung == adapter->num_queues)
                 goto watchdog;
 
 out:
 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
 	return;
 
 watchdog:
 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
 	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
 	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
 	device_printf(dev,"TX(%d) desc avail = %d,"
 	    "Next TX to Clean = %d\n",
 	    txr->me, txr->tx_avail, txr->next_to_clean);
 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	adapter->watchdog_events++;
 	ixgbe_init_locked(adapter);
 }
 
 /*
 ** Note: this routine updates the OS on the link state
 **	the real check of the hardware only happens with
 **	a link interrupt.
 */
 static void
 ixgbe_update_link_status(struct adapter *adapter)
 {
 	struct ifnet	*ifp = adapter->ifp;
 	device_t dev = adapter->dev;
 
 
 	if (adapter->link_up){ 
 		if (adapter->link_active == FALSE) {
 			if (bootverbose)
 				device_printf(dev,"Link is up %d Gbps %s \n",
 				    ((adapter->link_speed == 128)? 10:1),
 				    "Full Duplex");
 			adapter->link_active = TRUE;
 			/* Update any Flow Control changes */
 			ixgbe_fc_enable(&adapter->hw);
 			if_link_state_change(ifp, LINK_STATE_UP);
 		}
 	} else { /* Link down */
 		if (adapter->link_active == TRUE) {
 			if (bootverbose)
 				device_printf(dev,"Link is Down\n");
 			if_link_state_change(ifp, LINK_STATE_DOWN);
 			adapter->link_active = FALSE;
 		}
 	}
 
 	return;
 }
 
 
 /*********************************************************************
  *
  *  This routine disables all traffic on the adapter by issuing a
  *  global reset on the MAC and deallocates TX/RX buffers.
  *
  **********************************************************************/
 
 static void
 ixgbe_stop(void *arg)
 {
 	struct ifnet   *ifp;
 	struct adapter *adapter = arg;
 	struct ixgbe_hw *hw = &adapter->hw;
 	ifp = adapter->ifp;
 
 	mtx_assert(&adapter->core_mtx, MA_OWNED);
 
 	INIT_DEBUGOUT("ixgbe_stop: begin\n");
 	ixgbe_disable_intr(adapter);
 	callout_stop(&adapter->timer);
 
 	/* Let the stack know...*/
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 
 	ixgbe_reset_hw(hw);
 	hw->adapter_stopped = FALSE;
 	ixgbe_stop_adapter(hw);
 	if (hw->mac.type == ixgbe_mac_82599EB)
 		ixgbe_stop_mac_link_on_d3_82599(hw);
 	/* Turn off the laser - noop with no optics */
 	ixgbe_disable_tx_laser(hw);
 
 	/* Update the stack */
 	adapter->link_up = FALSE;
        	ixgbe_update_link_status(adapter);
 
 	/* reprogram the RAR[0] in case user changed it. */
 	ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
 
 	return;
 }
 
 
 /*********************************************************************
  *
  *  Determine hardware revision.
  *
  **********************************************************************/
 static void
 ixgbe_identify_hardware(struct adapter *adapter)
 {
 	device_t        dev = adapter->dev;
 	struct ixgbe_hw *hw = &adapter->hw;
 
 	/* Save off the information about this board */
 	hw->vendor_id = pci_get_vendor(dev);
 	hw->device_id = pci_get_device(dev);
 	hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
 	hw->subsystem_vendor_id =
 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
 	hw->subsystem_device_id =
 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
 
 	/* We need this here to set the num_segs below */
 	ixgbe_set_mac_type(hw);
 
 	/* Pick up the 82599 and VF settings */
 	if (hw->mac.type != ixgbe_mac_82598EB) {
 		hw->phy.smart_speed = ixgbe_smart_speed;
 		adapter->num_segs = IXGBE_82599_SCATTER;
 	} else
 		adapter->num_segs = IXGBE_82598_SCATTER;
 
 	return;
 }
 
 /*********************************************************************
  *
  *  Determine optic type
  *
  **********************************************************************/
 static void
 ixgbe_setup_optics(struct adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	int		layer;
 
 	layer = ixgbe_get_supported_physical_layer(hw);
 
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
 		adapter->optics = IFM_10G_T;
 		return;
 	}
 
 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
 		adapter->optics = IFM_1000_T;
 		return;
 	}
 
 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
 		adapter->optics = IFM_1000_SX;
 		return;
 	}
 
 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
 	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
 		adapter->optics = IFM_10G_LR;
 		return;
 	}
 
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
 		adapter->optics = IFM_10G_SR;
 		return;
 	}
 
 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
 		adapter->optics = IFM_10G_TWINAX;
 		return;
 	}
 
 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
 	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
 		adapter->optics = IFM_10G_CX4;
 		return;
 	}
 
 	/* If we get here just set the default */
 	adapter->optics = IFM_ETHER | IFM_AUTO;
 	return;
 }
 
 /*********************************************************************
  *
  *  Setup the Legacy or MSI Interrupt handler
  *
  **********************************************************************/
 static int
 ixgbe_allocate_legacy(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	struct		ix_queue *que = adapter->queues;
 #ifndef IXGBE_LEGACY_TX
 	struct tx_ring		*txr = adapter->tx_rings;
 #endif
 	int		error, rid = 0;
 
 	/* MSI RID at 1 */
 	if (adapter->msix == 1)
 		rid = 1;
 
 	/* We allocate a single interrupt resource */
 	adapter->res = bus_alloc_resource_any(dev,
             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
 	if (adapter->res == NULL) {
 		device_printf(dev, "Unable to allocate bus resource: "
 		    "interrupt\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * Try allocating a fast interrupt and the associated deferred
 	 * processing contexts.
 	 */
 #ifndef IXGBE_LEGACY_TX
 	TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
 #endif
 	TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
 	que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
             taskqueue_thread_enqueue, &que->tq);
 	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq",
             device_get_nameunit(adapter->dev));
 
 	/* Tasklets for Link, SFP and Multispeed Fiber */
 	TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
 	TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
 	TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
 #ifdef IXGBE_FDIR
 	TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
 #endif
 	adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
 	    taskqueue_thread_enqueue, &adapter->tq);
 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
 	    device_get_nameunit(adapter->dev));
 
 	if ((error = bus_setup_intr(dev, adapter->res,
             INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq,
             que, &adapter->tag)) != 0) {
 		device_printf(dev, "Failed to register fast interrupt "
 		    "handler: %d\n", error);
 		taskqueue_free(que->tq);
 		taskqueue_free(adapter->tq);
 		que->tq = NULL;
 		adapter->tq = NULL;
 		return (error);
 	}
 	/* For simplicity in the handlers */
 	adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
 
 	return (0);
 }
 
 
 /*********************************************************************
  *
  *  Setup MSIX Interrupt resources and handlers 
  *
  **********************************************************************/
 static int
 ixgbe_allocate_msix(struct adapter *adapter)
 {
 	device_t        dev = adapter->dev;
 	struct 		ix_queue *que = adapter->queues;
 	struct  	tx_ring *txr = adapter->tx_rings;
 	int 		error, rid, vector = 0;
 	int		cpu_id = 0;
 
 #ifdef	RSS
 	/*
 	 * If we're doing RSS, the number of queues needs to
 	 * match the number of RSS buckets that are configured.
 	 *
 	 * + If there's more queues than RSS buckets, we'll end
 	 *   up with queues that get no traffic.
 	 *
 	 * + If there's more RSS buckets than queues, we'll end
 	 *   up having multiple RSS buckets map to the same queue,
 	 *   so there'll be some contention.
 	 */
 	if (adapter->num_queues != rss_getnumbuckets()) {
 		device_printf(dev,
 		    "%s: number of queues (%d) != number of RSS buckets (%d)"
 		    "; performance will be impacted.\n",
 		    __func__,
 		    adapter->num_queues,
 		    rss_getnumbuckets());
 	}
 #endif
 
 
 
 	for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
 		rid = vector + 1;
 		que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
 		    RF_SHAREABLE | RF_ACTIVE);
 		if (que->res == NULL) {
 			device_printf(dev,"Unable to allocate"
 		    	    " bus resource: que interrupt [%d]\n", vector);
 			return (ENXIO);
 		}
 		/* Set the handler function */
 		error = bus_setup_intr(dev, que->res,
 		    INTR_TYPE_NET | INTR_MPSAFE, NULL,
 		    ixgbe_msix_que, que, &que->tag);
 		if (error) {
 			que->res = NULL;
 			device_printf(dev, "Failed to register QUE handler");
 			return (error);
 		}
 #if __FreeBSD_version >= 800504
 		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
 #endif
 		que->msix = vector;
         	adapter->que_mask |= (u64)(1 << que->msix);
 #ifdef	RSS
 		/*
 		 * The queue ID is used as the RSS layer bucket ID.
 		 * We look up the queue ID -> RSS CPU ID and select
 		 * that.
 		 */
 		cpu_id = rss_getcpu(i % rss_getnumbuckets());
 #else
 		/*
 		 * Bind the msix vector, and thus the
 		 * rings to the corresponding cpu.
 		 *
 		 * This just happens to match the default RSS round-robin
 		 * bucket -> queue -> CPU allocation.
 		 */
 		if (adapter->num_queues > 1)
 			cpu_id = i;
 #endif
 		if (adapter->num_queues > 1)
 			bus_bind_intr(dev, que->res, cpu_id);
 
 #ifdef	RSS
 		device_printf(dev,
 		    "Bound RSS bucket %d to CPU %d\n",
 		    i, cpu_id);
 #else
 		device_printf(dev,
 		    "Bound queue %d to cpu %d\n",
 		    i, cpu_id);
 #endif
 
 
 #ifndef IXGBE_LEGACY_TX
 		TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
 #endif
 		TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
 		que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
 		    taskqueue_thread_enqueue, &que->tq);
 #ifdef	RSS
 		taskqueue_start_threads_pinned(&que->tq, 1, PI_NET,
 		    cpu_id,
 		    "%s (bucket %d)",
 		    device_get_nameunit(adapter->dev),
 		    cpu_id);
 #else
 		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
 		    device_get_nameunit(adapter->dev));
 #endif
 	}
 
 	/* and Link */
 	rid = vector + 1;
 	adapter->res = bus_alloc_resource_any(dev,
     	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
 	if (!adapter->res) {
 		device_printf(dev,"Unable to allocate"
     	    " bus resource: Link interrupt [%d]\n", rid);
 		return (ENXIO);
 	}
 	/* Set the link handler function */
 	error = bus_setup_intr(dev, adapter->res,
 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
 	    ixgbe_msix_link, adapter, &adapter->tag);
 	if (error) {
 		adapter->res = NULL;
 		device_printf(dev, "Failed to register LINK handler");
 		return (error);
 	}
 #if __FreeBSD_version >= 800504
 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
 #endif
 	adapter->linkvec = vector;
 	/* Tasklets for Link, SFP and Multispeed Fiber */
 	TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
 	TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
 	TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
 #ifdef IXGBE_FDIR
 	TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
 #endif
 	adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
 	    taskqueue_thread_enqueue, &adapter->tq);
 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
 	    device_get_nameunit(adapter->dev));
 
 	return (0);
 }
 
 /*
  * Setup Either MSI/X or MSI
  */
 static int
 ixgbe_setup_msix(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 	int rid, want, queues, msgs;
 
 	/* Override by tuneable */
 	if (ixgbe_enable_msix == 0)
 		goto msi;
 
 	/* First try MSI/X */
 	msgs = pci_msix_count(dev); 
 	if (msgs == 0)
 		goto msi;
 	rid = PCIR_BAR(MSIX_82598_BAR);
 	adapter->msix_mem = bus_alloc_resource_any(dev,
 	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
        	if (adapter->msix_mem == NULL) {
 		rid += 4;	/* 82599 maps in higher BAR */
 		adapter->msix_mem = bus_alloc_resource_any(dev,
 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
 	}
        	if (adapter->msix_mem == NULL) {
 		/* May not be enabled */
 		device_printf(adapter->dev,
 		    "Unable to map MSIX table \n");
 		goto msi;
 	}
 
 	/* Figure out a reasonable auto config value */
 	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
 
 	/* Override based on tuneable */
 	if (ixgbe_num_queues != 0)
 		queues = ixgbe_num_queues;
 
 #ifdef	RSS
 	/* If we're doing RSS, clamp at the number of RSS buckets */
 	if (queues > rss_getnumbuckets())
 		queues = rss_getnumbuckets();
 #endif
 
 	/* reflect correct sysctl value */
 	ixgbe_num_queues = queues;
 
 	/*
 	** Want one vector (RX/TX pair) per queue
 	** plus an additional for Link.
 	*/
 	want = queues + 1;
 	if (msgs >= want)
 		msgs = want;
 	else {
                	device_printf(adapter->dev,
 		    "MSIX Configuration Problem, "
 		    "%d vectors but %d queues wanted!\n",
 		    msgs, want);
 		goto msi;
 	}
 	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
                	device_printf(adapter->dev,
 		    "Using MSIX interrupts with %d vectors\n", msgs);
 		adapter->num_queues = queues;
 		return (msgs);
 	}
 	/*
 	** If MSIX alloc failed or provided us with
 	** less than needed, free and fall through to MSI
 	*/
 	pci_release_msi(dev);
 
 msi:
        	if (adapter->msix_mem != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    rid, adapter->msix_mem);
 		adapter->msix_mem = NULL;
 	}
        	msgs = 1;
        	if (pci_alloc_msi(dev, &msgs) == 0) {
                	device_printf(adapter->dev,"Using an MSI interrupt\n");
 		return (msgs);
 	}
 	device_printf(adapter->dev,"Using a Legacy interrupt\n");
 	return (0);
 }
 
 
 static int
 ixgbe_allocate_pci_resources(struct adapter *adapter)
 {
 	int             rid;
 	device_t        dev = adapter->dev;
 
 	rid = PCIR_BAR(0);
 	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &rid, RF_ACTIVE);
 
 	if (!(adapter->pci_mem)) {
 		device_printf(dev,"Unable to allocate bus resource: memory\n");
 		return (ENXIO);
 	}
 
 	adapter->osdep.mem_bus_space_tag =
 		rman_get_bustag(adapter->pci_mem);
 	adapter->osdep.mem_bus_space_handle =
 		rman_get_bushandle(adapter->pci_mem);
 	adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
 
 	/* Legacy defaults */
 	adapter->num_queues = 1;
 	adapter->hw.back = &adapter->osdep;
 
 	/*
 	** Now setup MSI or MSI/X, should
 	** return us the number of supported
 	** vectors. (Will be 1 for MSI)
 	*/
 	adapter->msix = ixgbe_setup_msix(adapter);
 	return (0);
 }
 
 static void
 ixgbe_free_pci_resources(struct adapter * adapter)
 {
 	struct 		ix_queue *que = adapter->queues;
 	device_t	dev = adapter->dev;
 	int		rid, memrid;
 
 	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
 		memrid = PCIR_BAR(MSIX_82598_BAR);
 	else
 		memrid = PCIR_BAR(MSIX_82599_BAR);
 
 	/*
 	** There is a slight possibility of a failure mode
 	** in attach that will result in entering this function
 	** before interrupt resources have been initialized, and
 	** in that case we do not want to execute the loops below
 	** We can detect this reliably by the state of the adapter
 	** res pointer.
 	*/
 	if (adapter->res == NULL)
 		goto mem;
 
 	/*
 	**  Release all msix queue resources:
 	*/
 	for (int i = 0; i < adapter->num_queues; i++, que++) {
 		rid = que->msix + 1;
 		if (que->tag != NULL) {
 			bus_teardown_intr(dev, que->res, que->tag);
 			que->tag = NULL;
 		}
 		if (que->res != NULL)
 			bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
 	}
 
 
 	/* Clean the Legacy or Link interrupt last */
 	if (adapter->linkvec) /* we are doing MSIX */
 		rid = adapter->linkvec + 1;
 	else
 		(adapter->msix != 0) ? (rid = 1):(rid = 0);
 
 	if (adapter->tag != NULL) {
 		bus_teardown_intr(dev, adapter->res, adapter->tag);
 		adapter->tag = NULL;
 	}
 	if (adapter->res != NULL)
 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
 
 mem:
 	if (adapter->msix)
 		pci_release_msi(dev);
 
 	if (adapter->msix_mem != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    memrid, adapter->msix_mem);
 
 	if (adapter->pci_mem != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    PCIR_BAR(0), adapter->pci_mem);
 
 	return;
 }
 
 /*********************************************************************
  *
  *  Setup networking device structure and register an interface.
  *
  **********************************************************************/
 static int
 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct ifnet   *ifp;
 
 	INIT_DEBUGOUT("ixgbe_setup_interface: begin");
 
 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "can not allocate ifnet structure\n");
 		return (-1);
 	}
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_baudrate = IF_Gbps(10);
 	ifp->if_init = ixgbe_init;
 	ifp->if_softc = adapter;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = ixgbe_ioctl;
 	ifp->if_get_counter = ixgbe_get_counter;
 #ifndef IXGBE_LEGACY_TX
 	ifp->if_transmit = ixgbe_mq_start;
 	ifp->if_qflush = ixgbe_qflush;
 #else
 	ifp->if_start = ixgbe_start;
 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
 	IFQ_SET_READY(&ifp->if_snd);
 #endif
 
 	ether_ifattach(ifp, adapter->hw.mac.addr);
 
 	adapter->max_frame_size =
 	    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
 
 	/*
 	 * Tell the upper layer(s) we support long frames.
 	 */
 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 
 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
 	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
 	ifp->if_capabilities |= IFCAP_LRO;
 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
 			     |  IFCAP_VLAN_HWTSO
 			     |  IFCAP_VLAN_MTU
 			     |  IFCAP_HWSTATS;
 	ifp->if_capenable = ifp->if_capabilities;
 
 	/*
 	** Don't turn this on by default, if vlans are
 	** created on another pseudo device (eg. lagg)
 	** then vlan events are not passed thru, breaking
 	** operation, but with HW FILTER off it works. If
 	** using vlans directly on the ixgbe driver you can
 	** enable this and get full hardware tag filtering.
 	*/
 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
 
 	/*
 	 * Specify the media types supported by this adapter and register
 	 * callbacks to update media and link information
 	 */
 	ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
 		     ixgbe_media_status);
 	ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
 	ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
 		ifmedia_add(&adapter->media,
 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
 		ifmedia_add(&adapter->media,
 		    IFM_ETHER | IFM_1000_T, 0, NULL);
 	}
 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
 
 	return (0);
 }
 
 static void
 ixgbe_config_link(struct adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32	autoneg, err = 0;
 	bool	sfp, negotiate;
 
 	sfp = ixgbe_is_sfp(hw);
 
 	if (sfp) { 
 		if (hw->phy.multispeed_fiber) {
 			hw->mac.ops.setup_sfp(hw);
 			ixgbe_enable_tx_laser(hw);
 			taskqueue_enqueue(adapter->tq, &adapter->msf_task);
 		} else
 			taskqueue_enqueue(adapter->tq, &adapter->mod_task);
 	} else {
 		if (hw->mac.ops.check_link)
 			err = ixgbe_check_link(hw, &adapter->link_speed,
 			    &adapter->link_up, FALSE);
 		if (err)
 			goto out;
 		autoneg = hw->phy.autoneg_advertised;
 		if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
                 	err  = hw->mac.ops.get_link_capabilities(hw,
 			    &autoneg, &negotiate);
 		if (err)
 			goto out;
 		if (hw->mac.ops.setup_link)
                 	err = hw->mac.ops.setup_link(hw,
 			    autoneg, adapter->link_up);
 	}
 out:
 	return;
 }
 
 /********************************************************************
  * Manage DMA'able memory.
  *******************************************************************/
 static void
 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
 {
 	if (error)
 		return;
 	*(bus_addr_t *) arg = segs->ds_addr;
 	return;
 }
 
 static int
 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
 		struct ixgbe_dma_alloc *dma, int mapflags)
 {
 	device_t dev = adapter->dev;
 	int             r;
 
 	r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),	/* parent */
 			       DBA_ALIGN, 0,	/* alignment, bounds */
 			       BUS_SPACE_MAXADDR,	/* lowaddr */
 			       BUS_SPACE_MAXADDR,	/* highaddr */
 			       NULL, NULL,	/* filter, filterarg */
 			       size,	/* maxsize */
 			       1,	/* nsegments */
 			       size,	/* maxsegsize */
 			       BUS_DMA_ALLOCNOW,	/* flags */
 			       NULL,	/* lockfunc */
 			       NULL,	/* lockfuncarg */
 			       &dma->dma_tag);
 	if (r != 0) {
 		device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
 		       "error %u\n", r);
 		goto fail_0;
 	}
 	r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
 			     BUS_DMA_NOWAIT, &dma->dma_map);
 	if (r != 0) {
 		device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
 		       "error %u\n", r);
 		goto fail_1;
 	}
 	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
 			    size,
 			    ixgbe_dmamap_cb,
 			    &dma->dma_paddr,
 			    mapflags | BUS_DMA_NOWAIT);
 	if (r != 0) {
 		device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
 		       "error %u\n", r);
 		goto fail_2;
 	}
 	dma->dma_size = size;
 	return (0);
 fail_2:
 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
 fail_1:
 	bus_dma_tag_destroy(dma->dma_tag);
 fail_0:
 	dma->dma_tag = NULL;
 	return (r);
 }
 
 static void
 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
 {
 	bus_dmamap_sync(dma->dma_tag, dma->dma_map,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
 	bus_dma_tag_destroy(dma->dma_tag);
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for the transmit and receive rings, and then
  *  the descriptors associated with each, called only once at attach.
  *
  **********************************************************************/
 static int
 ixgbe_allocate_queues(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	struct ix_queue	*que;
 	struct tx_ring	*txr;
 	struct rx_ring	*rxr;
 	int rsize, tsize, error = IXGBE_SUCCESS;
 	int txconf = 0, rxconf = 0;
 
         /* First allocate the top level queue structs */
         if (!(adapter->queues =
             (struct ix_queue *) malloc(sizeof(struct ix_queue) *
             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
                 device_printf(dev, "Unable to allocate queue memory\n");
                 error = ENOMEM;
                 goto fail;
         }
 
 	/* First allocate the TX ring struct memory */
 	if (!(adapter->tx_rings =
 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate TX ring memory\n");
 		error = ENOMEM;
 		goto tx_fail;
 	}
 
 	/* Next allocate the RX */
 	if (!(adapter->rx_rings =
 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate RX ring memory\n");
 		error = ENOMEM;
 		goto rx_fail;
 	}
 
 	/* For the ring itself */
 	tsize = roundup2(adapter->num_tx_desc *
 	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
 
 	/*
 	 * Now set up the TX queues, txconf is needed to handle the
 	 * possibility that things fail midcourse and we need to
 	 * undo memory gracefully
 	 */ 
 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
 		/* Set up some basics */
 		txr = &adapter->tx_rings[i];
 		txr->adapter = adapter;
 		txr->me = i;
 		txr->num_desc = adapter->num_tx_desc;
 
 		/* Initialize the TX side lock */
 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
 		    device_get_nameunit(dev), txr->me);
 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
 
 		if (ixgbe_dma_malloc(adapter, tsize,
 			&txr->txdma, BUS_DMA_NOWAIT)) {
 			device_printf(dev,
 			    "Unable to allocate TX Descriptor memory\n");
 			error = ENOMEM;
 			goto err_tx_desc;
 		}
 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
 		bzero((void *)txr->tx_base, tsize);
 
         	/* Now allocate transmit buffers for the ring */
         	if (ixgbe_allocate_transmit_buffers(txr)) {
 			device_printf(dev,
 			    "Critical Failure setting up transmit buffers\n");
 			error = ENOMEM;
 			goto err_tx_desc;
         	}
 #ifndef IXGBE_LEGACY_TX
 		/* Allocate a buf ring */
 		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
 		    M_WAITOK, &txr->tx_mtx);
 		if (txr->br == NULL) {
 			device_printf(dev,
 			    "Critical Failure setting up buf ring\n");
 			error = ENOMEM;
 			goto err_tx_desc;
         	}
 #endif
 	}
 
 	/*
 	 * Next the RX queues...
 	 */ 
 	rsize = roundup2(adapter->num_rx_desc *
 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
 		rxr = &adapter->rx_rings[i];
 		/* Set up some basics */
 		rxr->adapter = adapter;
 		rxr->me = i;
 		rxr->num_desc = adapter->num_rx_desc;
 
 		/* Initialize the RX side lock */
 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
 		    device_get_nameunit(dev), rxr->me);
 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
 
 		if (ixgbe_dma_malloc(adapter, rsize,
 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
 			device_printf(dev,
 			    "Unable to allocate RxDescriptor memory\n");
 			error = ENOMEM;
 			goto err_rx_desc;
 		}
 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
 		bzero((void *)rxr->rx_base, rsize);
 
         	/* Allocate receive buffers for the ring*/
 		if (ixgbe_allocate_receive_buffers(rxr)) {
 			device_printf(dev,
 			    "Critical Failure setting up receive buffers\n");
 			error = ENOMEM;
 			goto err_rx_desc;
 		}
 	}
 
 	/*
 	** Finally set up the queue holding structs
 	*/
 	for (int i = 0; i < adapter->num_queues; i++) {
 		que = &adapter->queues[i];
 		que->adapter = adapter;
 		que->txr = &adapter->tx_rings[i];
 		que->rxr = &adapter->rx_rings[i];
 	}
 
 	return (0);
 
 err_rx_desc:
 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
 		ixgbe_dma_free(adapter, &rxr->rxdma);
 err_tx_desc:
 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
 		ixgbe_dma_free(adapter, &txr->txdma);
 	free(adapter->rx_rings, M_DEVBUF);
 rx_fail:
 	free(adapter->tx_rings, M_DEVBUF);
 tx_fail:
 	free(adapter->queues, M_DEVBUF);
 fail:
 	return (error);
 }
 
 /*********************************************************************
  *
  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
  *  the information needed to transmit a packet on the wire. This is
  *  called only once at attach, setup is done every reset.
  *
  **********************************************************************/
 static int
 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
 {
 	struct adapter *adapter = txr->adapter;
 	device_t dev = adapter->dev;
 	struct ixgbe_tx_buf *txbuf;
 	int error, i;
 
 	/*
 	 * Setup DMA descriptor areas.
 	 */
 	if ((error = bus_dma_tag_create(
 			       bus_get_dma_tag(adapter->dev),	/* parent */
 			       1, 0,		/* alignment, bounds */
 			       BUS_SPACE_MAXADDR,	/* lowaddr */
 			       BUS_SPACE_MAXADDR,	/* highaddr */
 			       NULL, NULL,		/* filter, filterarg */
 			       IXGBE_TSO_SIZE,		/* maxsize */
 			       adapter->num_segs,	/* nsegments */
 			       PAGE_SIZE,		/* maxsegsize */
 			       0,			/* flags */
 			       NULL,			/* lockfunc */
 			       NULL,			/* lockfuncarg */
 			       &txr->txtag))) {
 		device_printf(dev,"Unable to allocate TX DMA tag\n");
 		goto fail;
 	}
 
 	if (!(txr->tx_buffers =
 	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
 		error = ENOMEM;
 		goto fail;
 	}
 
         /* Create the descriptor buffer dma maps */
 	txbuf = txr->tx_buffers;
 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
 		if (error != 0) {
 			device_printf(dev, "Unable to create TX DMA map\n");
 			goto fail;
 		}
 	}
 
 	return 0;
 fail:
 	/* We free all, it handles case where we are in the middle */
 	ixgbe_free_transmit_structures(adapter);
 	return (error);
 }
 
 /*********************************************************************
  *
  *  Initialize a transmit ring.
  *
  **********************************************************************/
 static void
 ixgbe_setup_transmit_ring(struct tx_ring *txr)
 {
 	struct adapter *adapter = txr->adapter;
 	struct ixgbe_tx_buf *txbuf;
 	int i;
 #ifdef DEV_NETMAP
 	struct netmap_adapter *na = NA(adapter->ifp);
 	struct netmap_slot *slot;
 #endif /* DEV_NETMAP */
 
 	/* Clear the old ring contents */
 	IXGBE_TX_LOCK(txr);
 #ifdef DEV_NETMAP
 	/*
 	 * (under lock): if in netmap mode, do some consistency
 	 * checks and set slot to entry 0 of the netmap ring.
 	 */
 	slot = netmap_reset(na, NR_TX, txr->me, 0);
 #endif /* DEV_NETMAP */
 	bzero((void *)txr->tx_base,
 	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
 	/* Reset indices */
 	txr->next_avail_desc = 0;
 	txr->next_to_clean = 0;
 
 	/* Free any existing tx buffers. */
         txbuf = txr->tx_buffers;
 	for (i = 0; i < txr->num_desc; i++, txbuf++) {
 		if (txbuf->m_head != NULL) {
 			bus_dmamap_sync(txr->txtag, txbuf->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(txr->txtag, txbuf->map);
 			m_freem(txbuf->m_head);
 			txbuf->m_head = NULL;
 		}
 #ifdef DEV_NETMAP
 		/*
 		 * In netmap mode, set the map for the packet buffer.
 		 * NOTE: Some drivers (not this one) also need to set
 		 * the physical buffer address in the NIC ring.
 		 * Slots in the netmap ring (indexed by "si") are
 		 * kring->nkr_hwofs positions "ahead" wrt the
 		 * corresponding slot in the NIC ring. In some drivers
 		 * (not here) nkr_hwofs can be negative. Function
 		 * netmap_idx_n2k() handles wraparounds properly.
 		 */
 		if (slot) {
 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
 			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
 		}
 #endif /* DEV_NETMAP */
 		/* Clear the EOP descriptor pointer */
 		txbuf->eop = NULL;
         }
 
 #ifdef IXGBE_FDIR
 	/* Set the rate at which we sample packets */
 	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
 		txr->atr_sample = atr_sample_rate;
 #endif
 
 	/* Set number of descriptors available */
 	txr->tx_avail = adapter->num_tx_desc;
 
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	IXGBE_TX_UNLOCK(txr);
 }
 
 /*********************************************************************
  *
  *  Initialize all transmit rings.
  *
  **********************************************************************/
 static int
 ixgbe_setup_transmit_structures(struct adapter *adapter)
 {
 	struct tx_ring *txr = adapter->tx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++)
 		ixgbe_setup_transmit_ring(txr);
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Enable transmit unit.
  *
  **********************************************************************/
 static void
 ixgbe_initialize_transmit_units(struct adapter *adapter)
 {
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct ixgbe_hw	*hw = &adapter->hw;
 
 	/* Setup the Base and Length of the Tx Descriptor Ring */
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		u64	tdba = txr->txdma.dma_paddr;
 		u32	txctrl;
 
 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
 		       (tdba & 0x00000000ffffffffULL));
 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
 		    adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
 
 		/* Setup the HW Tx Head and Tail descriptor pointers */
 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
 
 		/* Setup Transmit Descriptor Cmd Settings */
 		txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
 		txr->queue_status = IXGBE_QUEUE_IDLE;
 
 		/* Set the processing limit */
 		txr->process_limit = ixgbe_tx_process_limit;
 
 		/* Disable Head Writeback */
 		switch (hw->mac.type) {
 		case ixgbe_mac_82598EB:
 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
 			break;
 		case ixgbe_mac_82599EB:
 		case ixgbe_mac_X540:
 		default:
 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
 			break;
                 }
 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
 		switch (hw->mac.type) {
 		case ixgbe_mac_82598EB:
 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
 			break;
 		case ixgbe_mac_82599EB:
 		case ixgbe_mac_X540:
 		default:
 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
 			break;
 		}
 
 	}
 
 	if (hw->mac.type != ixgbe_mac_82598EB) {
 		u32 dmatxctl, rttdcs;
 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
 		dmatxctl |= IXGBE_DMATXCTL_TE;
 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
 		/* Disable arbiter to set MTQC */
 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
 	}
 
 	return;
 }
 
 /*********************************************************************
  *
  *  Free all transmit rings.
  *
  **********************************************************************/
 static void
 ixgbe_free_transmit_structures(struct adapter *adapter)
 {
 	struct tx_ring *txr = adapter->tx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		IXGBE_TX_LOCK(txr);
 		ixgbe_free_transmit_buffers(txr);
 		ixgbe_dma_free(adapter, &txr->txdma);
 		IXGBE_TX_UNLOCK(txr);
 		IXGBE_TX_LOCK_DESTROY(txr);
 	}
 	free(adapter->tx_rings, M_DEVBUF);
 }
 
 /*********************************************************************
  *
  *  Free transmit ring related data structures.
  *
  **********************************************************************/
 static void
 ixgbe_free_transmit_buffers(struct tx_ring *txr)
 {
 	struct adapter *adapter = txr->adapter;
 	struct ixgbe_tx_buf *tx_buffer;
 	int             i;
 
 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
 
 	if (txr->tx_buffers == NULL)
 		return;
 
 	tx_buffer = txr->tx_buffers;
 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
 		if (tx_buffer->m_head != NULL) {
 			bus_dmamap_sync(txr->txtag, tx_buffer->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(txr->txtag,
 			    tx_buffer->map);
 			m_freem(tx_buffer->m_head);
 			tx_buffer->m_head = NULL;
 			if (tx_buffer->map != NULL) {
 				bus_dmamap_destroy(txr->txtag,
 				    tx_buffer->map);
 				tx_buffer->map = NULL;
 			}
 		} else if (tx_buffer->map != NULL) {
 			bus_dmamap_unload(txr->txtag,
 			    tx_buffer->map);
 			bus_dmamap_destroy(txr->txtag,
 			    tx_buffer->map);
 			tx_buffer->map = NULL;
 		}
 	}
 #ifdef IXGBE_LEGACY_TX
 	if (txr->br != NULL)
 		buf_ring_free(txr->br, M_DEVBUF);
 #endif
 	if (txr->tx_buffers != NULL) {
 		free(txr->tx_buffers, M_DEVBUF);
 		txr->tx_buffers = NULL;
 	}
 	if (txr->txtag != NULL) {
 		bus_dma_tag_destroy(txr->txtag);
 		txr->txtag = NULL;
 	}
 	return;
 }
 
 /*********************************************************************
  *
  *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
  *
  **********************************************************************/
 
 static int
 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
     u32 *cmd_type_len, u32 *olinfo_status)
 {
 	struct ixgbe_adv_tx_context_desc *TXD;
 	struct ether_vlan_header *eh;
 	struct ip *ip;
 	struct ip6_hdr *ip6;
 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
 	int	ehdrlen, ip_hlen = 0;
 	u16	etype;
 	u8	ipproto = 0;
 	int	offload = TRUE;
 	int	ctxd = txr->next_avail_desc;
 	u16	vtag = 0;
 
 	/* First check if TSO is to be used */
 	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
 
 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
 		offload = FALSE;
 
 	/* Indicate the whole packet as payload when not doing TSO */
        	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
 
 	/* Now ready a context descriptor */
 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
 
 	/*
 	** In advanced descriptors the vlan tag must 
 	** be placed into the context descriptor. Hence
 	** we need to make one even if not doing offloads.
 	*/
 	if (mp->m_flags & M_VLANTAG) {
 		vtag = htole16(mp->m_pkthdr.ether_vtag);
 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
 	} else if (offload == FALSE) /* ... no offload to do */
 		return (0);
 
 	/*
 	 * Determine where frame payload starts.
 	 * Jump over vlan headers if already present,
 	 * helpful for QinQ too.
 	 */
 	eh = mtod(mp, struct ether_vlan_header *);
 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 		etype = ntohs(eh->evl_proto);
 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 	} else {
 		etype = ntohs(eh->evl_encap_proto);
 		ehdrlen = ETHER_HDR_LEN;
 	}
 
 	/* Set the ether header length */
 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
 
 	switch (etype) {
 		case ETHERTYPE_IP:
 			ip = (struct ip *)(mp->m_data + ehdrlen);
 			ip_hlen = ip->ip_hl << 2;
 			ipproto = ip->ip_p;
 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
 			break;
 		case ETHERTYPE_IPV6:
 			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
 			ip_hlen = sizeof(struct ip6_hdr);
 			/* XXX-BZ this will go badly in case of ext hdrs. */
 			ipproto = ip6->ip6_nxt;
 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
 			break;
 		default:
 			offload = FALSE;
 			break;
 	}
 
 	vlan_macip_lens |= ip_hlen;
 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
 
 	switch (ipproto) {
 		case IPPROTO_TCP:
 			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
 			break;
 
 		case IPPROTO_UDP:
 			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
 			break;
 
 #if __FreeBSD_version >= 800000
 		case IPPROTO_SCTP:
 			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
 			break;
 #endif
 		default:
 			offload = FALSE;
 			break;
 	}
 
 	if (offload) /* For the TX descriptor setup */
 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
 
 	/* Now copy bits into descriptor */
 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
 	TXD->seqnum_seed = htole32(0);
 	TXD->mss_l4len_idx = htole32(0);
 
 	/* We've consumed the first desc, adjust counters */
 	if (++ctxd == txr->num_desc)
 		ctxd = 0;
 	txr->next_avail_desc = ctxd;
 	--txr->tx_avail;
 
         return (0);
 }
 
 /**********************************************************************
  *
  *  Setup work for hardware segmentation offload (TSO) on
  *  adapters using advanced tx descriptors
  *
  **********************************************************************/
 static int
 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
     u32 *cmd_type_len, u32 *olinfo_status)
 {
 	struct ixgbe_adv_tx_context_desc *TXD;
 	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
 	u32 mss_l4len_idx = 0, paylen;
 	u16 vtag = 0, eh_type;
 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
 	struct ether_vlan_header *eh;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 #ifdef INET
 	struct ip *ip;
 #endif
 	struct tcphdr *th;
 
 
 	/*
 	 * Determine where frame payload starts.
 	 * Jump over vlan headers if already present
 	 */
 	eh = mtod(mp, struct ether_vlan_header *);
 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 		eh_type = eh->evl_proto;
 	} else {
 		ehdrlen = ETHER_HDR_LEN;
 		eh_type = eh->evl_encap_proto;
 	}
 
 	switch (ntohs(eh_type)) {
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
 		if (ip6->ip6_nxt != IPPROTO_TCP)
 			return (ENXIO);
 		ip_hlen = sizeof(struct ip6_hdr);
 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
 		break;
 #endif
 #ifdef INET
 	case ETHERTYPE_IP:
 		ip = (struct ip *)(mp->m_data + ehdrlen);
 		if (ip->ip_p != IPPROTO_TCP)
 			return (ENXIO);
 		ip->ip_sum = 0;
 		ip_hlen = ip->ip_hl << 2;
 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
 		/* Tell transmit desc to also do IPv4 checksum. */
 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
 		break;
 #endif
 	default:
 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
 		    __func__, ntohs(eh_type));
 		break;
 	}
 
 	ctxd = txr->next_avail_desc;
 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
 
 	tcp_hlen = th->th_off << 2;
 
 	/* This is used in the transmit desc in encap */
 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
 
 	/* VLAN MACLEN IPLEN */
 	if (mp->m_flags & M_VLANTAG) {
 		vtag = htole16(mp->m_pkthdr.ether_vtag);
                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
 	}
 
 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
 	vlan_macip_lens |= ip_hlen;
 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
 
 	/* ADV DTYPE TUCMD */
 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
 
 	/* MSS L4LEN IDX */
 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
 
 	TXD->seqnum_seed = htole32(0);
 
 	if (++ctxd == txr->num_desc)
 		ctxd = 0;
 
 	txr->tx_avail--;
 	txr->next_avail_desc = ctxd;
 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
 	++txr->tso_tx;
 	return (0);
 }
 
 #ifdef IXGBE_FDIR
 /*
 ** This routine parses packet headers so that Flow
 ** Director can make a hashed filter table entry 
 ** allowing traffic flows to be identified and kept
 ** on the same cpu.  This would be a performance
 ** hit, but we only do it at IXGBE_FDIR_RATE of
 ** packets.
 */
 static void
 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
 {
 	struct adapter			*adapter = txr->adapter;
 	struct ix_queue			*que;
 	struct ip			*ip;
 	struct tcphdr			*th;
 	struct udphdr			*uh;
 	struct ether_vlan_header	*eh;
 	union ixgbe_atr_hash_dword	input = {.dword = 0}; 
 	union ixgbe_atr_hash_dword	common = {.dword = 0}; 
 	int  				ehdrlen, ip_hlen;
 	u16				etype;
 
 	eh = mtod(mp, struct ether_vlan_header *);
 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 		etype = eh->evl_proto;
 	} else {
 		ehdrlen = ETHER_HDR_LEN;
 		etype = eh->evl_encap_proto;
 	}
 
 	/* Only handling IPv4 */
 	if (etype != htons(ETHERTYPE_IP))
 		return;
 
 	ip = (struct ip *)(mp->m_data + ehdrlen);
 	ip_hlen = ip->ip_hl << 2;
 
 	/* check if we're UDP or TCP */
 	switch (ip->ip_p) {
 	case IPPROTO_TCP:
 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
 		/* src and dst are inverted */
 		common.port.dst ^= th->th_sport;
 		common.port.src ^= th->th_dport;
 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
 		break;
 	case IPPROTO_UDP:
 		uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
 		/* src and dst are inverted */
 		common.port.dst ^= uh->uh_sport;
 		common.port.src ^= uh->uh_dport;
 		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
 		break;
 	default:
 		return;
 	}
 
 	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
 	if (mp->m_pkthdr.ether_vtag)
 		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
 	else
 		common.flex_bytes ^= etype;
 	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
 
 	que = &adapter->queues[txr->me];
 	/*
 	** This assumes the Rx queue and Tx
 	** queue are bound to the same CPU
 	*/
 	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
 	    input, common, que->msix);
 }
 #endif /* IXGBE_FDIR */
 
 /**********************************************************************
  *
  *  Examine each tx_buffer in the used queue. If the hardware is done
  *  processing the packet then free associated resources. The
  *  tx_buffer is put back on the free queue.
  *
  **********************************************************************/
 static void
 ixgbe_txeof(struct tx_ring *txr)
 {
 #ifdef DEV_NETMAP
 	struct adapter		*adapter = txr->adapter;
 	struct ifnet		*ifp = adapter->ifp;
 #endif
 	u32			work, processed = 0;
 	u16			limit = txr->process_limit;
 	struct ixgbe_tx_buf	*buf;
 	union ixgbe_adv_tx_desc *txd;
 
 	mtx_assert(&txr->tx_mtx, MA_OWNED);
 
 #ifdef DEV_NETMAP
 	if (ifp->if_capenable & IFCAP_NETMAP) {
 		struct netmap_adapter *na = NA(ifp);
 		struct netmap_kring *kring = &na->tx_rings[txr->me];
 		txd = txr->tx_base;
 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 		    BUS_DMASYNC_POSTREAD);
 		/*
 		 * In netmap mode, all the work is done in the context
 		 * of the client thread. Interrupt handlers only wake up
 		 * clients, which may be sleeping on individual rings
 		 * or on a global resource for all rings.
 		 * To implement tx interrupt mitigation, we wake up the client
 		 * thread roughly every half ring, even if the NIC interrupts
 		 * more frequently. This is implemented as follows:
 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
 		 *   the slot that should wake up the thread (nkr_num_slots
 		 *   means the user thread should not be woken up);
 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
 		 *   or the slot has the DD bit set.
 		 */
 		if (!netmap_mitigate ||
 		    (kring->nr_kflags < kring->nkr_num_slots &&
 		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
 			netmap_tx_irq(ifp, txr->me);
 		}
 		return;
 	}
 #endif /* DEV_NETMAP */
 
 	if (txr->tx_avail == txr->num_desc) {
 		txr->queue_status = IXGBE_QUEUE_IDLE;
 		return;
 	}
 
 	/* Get work starting point */
 	work = txr->next_to_clean;
 	buf = &txr->tx_buffers[work];
 	txd = &txr->tx_base[work];
 	work -= txr->num_desc; /* The distance to ring end */
         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
             BUS_DMASYNC_POSTREAD);
 
 	do {
 		union ixgbe_adv_tx_desc *eop= buf->eop;
 		if (eop == NULL) /* No work */
 			break;
 
 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
 			break;	/* I/O not complete */
 
 		if (buf->m_head) {
 			txr->bytes +=
 			    buf->m_head->m_pkthdr.len;
 			bus_dmamap_sync(txr->txtag,
 			    buf->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(txr->txtag,
 			    buf->map);
 			m_freem(buf->m_head);
 			buf->m_head = NULL;
 			buf->map = NULL;
 		}
 		buf->eop = NULL;
 		++txr->tx_avail;
 
 		/* We clean the range if multi segment */
 		while (txd != eop) {
 			++txd;
 			++buf;
 			++work;
 			/* wrap the ring? */
 			if (__predict_false(!work)) {
 				work -= txr->num_desc;
 				buf = txr->tx_buffers;
 				txd = txr->tx_base;
 			}
 			if (buf->m_head) {
 				txr->bytes +=
 				    buf->m_head->m_pkthdr.len;
 				bus_dmamap_sync(txr->txtag,
 				    buf->map,
 				    BUS_DMASYNC_POSTWRITE);
 				bus_dmamap_unload(txr->txtag,
 				    buf->map);
 				m_freem(buf->m_head);
 				buf->m_head = NULL;
 				buf->map = NULL;
 			}
 			++txr->tx_avail;
 			buf->eop = NULL;
 
 		}
 		++txr->packets;
 		++processed;
 		txr->watchdog_time = ticks;
 
 		/* Try the next packet */
 		++txd;
 		++buf;
 		++work;
 		/* reset with a wrap */
 		if (__predict_false(!work)) {
 			work -= txr->num_desc;
 			buf = txr->tx_buffers;
 			txd = txr->tx_base;
 		}
 		prefetch(txd);
 	} while (__predict_true(--limit));
 
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	work += txr->num_desc;
 	txr->next_to_clean = work;
 
 	/*
 	** Watchdog calculation, we know there's
 	** work outstanding or the first return
 	** would have been taken, so none processed
 	** for too long indicates a hang.
 	*/
 	if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
 		txr->queue_status = IXGBE_QUEUE_HUNG;
 
 	if (txr->tx_avail == txr->num_desc)
 		txr->queue_status = IXGBE_QUEUE_IDLE;
 
 	return;
 }
 
 /*********************************************************************
  *
  *  Refresh mbuf buffers for RX descriptor rings
  *   - now keeps its own state so discards due to resource
  *     exhaustion are unnecessary, if an mbuf cannot be obtained
  *     it just returns, keeping its placeholder, thus it can simply
  *     be recalled to try again.
  *
  **********************************************************************/
 static void
 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
 {
 	struct adapter		*adapter = rxr->adapter;
 	bus_dma_segment_t	seg[1];
 	struct ixgbe_rx_buf	*rxbuf;
 	struct mbuf		*mp;
 	int			i, j, nsegs, error;
 	bool			refreshed = FALSE;
 
 	i = j = rxr->next_to_refresh;
 	/* Control the loop with one beyond */
 	if (++j == rxr->num_desc)
 		j = 0;
 
 	while (j != limit) {
 		rxbuf = &rxr->rx_buffers[i];
 		if (rxbuf->buf == NULL) {
 			mp = m_getjcl(M_NOWAIT, MT_DATA,
 			    M_PKTHDR, rxr->mbuf_sz);
 			if (mp == NULL)
 				goto update;
 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
 				m_adj(mp, ETHER_ALIGN);
 		} else
 			mp = rxbuf->buf;
 
 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
 
 		/* If we're dealing with an mbuf that was copied rather
 		 * than replaced, there's no need to go through busdma.
 		 */
 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
 			/* Get the memory mapping */
 			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
 			    rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
 			if (error != 0) {
 				printf("Refresh mbufs: payload dmamap load"
 				    " failure - %d\n", error);
 				m_free(mp);
 				rxbuf->buf = NULL;
 				goto update;
 			}
 			rxbuf->buf = mp;
 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
 			    BUS_DMASYNC_PREREAD);
 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
 			    htole64(seg[0].ds_addr);
 		} else {
 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
 			rxbuf->flags &= ~IXGBE_RX_COPY;
 		}
 
 		refreshed = TRUE;
 		/* Next is precalculated */
 		i = j;
 		rxr->next_to_refresh = i;
 		if (++j == rxr->num_desc)
 			j = 0;
 	}
 update:
 	if (refreshed) /* Update hardware tail index */
 		IXGBE_WRITE_REG(&adapter->hw,
 		    IXGBE_RDT(rxr->me), rxr->next_to_refresh);
 	return;
 }
 
 /*********************************************************************
  *
  *  Allocate memory for rx_buffer structures. Since we use one
  *  rx_buffer per received packet, the maximum number of rx_buffer's
  *  that we'll need is equal to the number of receive descriptors
  *  that we've allocated.
  *
  **********************************************************************/
 static int
 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
 {
 	struct	adapter 	*adapter = rxr->adapter;
 	device_t 		dev = adapter->dev;
 	struct ixgbe_rx_buf 	*rxbuf;
 	int             	i, bsize, error;
 
 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
 	if (!(rxr->rx_buffers =
 	    (struct ixgbe_rx_buf *) malloc(bsize,
 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
 		error = ENOMEM;
 		goto fail;
 	}
 
 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
 				   1, 0,	/* alignment, bounds */
 				   BUS_SPACE_MAXADDR,	/* lowaddr */
 				   BUS_SPACE_MAXADDR,	/* highaddr */
 				   NULL, NULL,		/* filter, filterarg */
 				   MJUM16BYTES,		/* maxsize */
 				   1,			/* nsegments */
 				   MJUM16BYTES,		/* maxsegsize */
 				   0,			/* flags */
 				   NULL,		/* lockfunc */
 				   NULL,		/* lockfuncarg */
 				   &rxr->ptag))) {
 		device_printf(dev, "Unable to create RX DMA tag\n");
 		goto fail;
 	}
 
 	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
 		rxbuf = &rxr->rx_buffers[i];
 		error = bus_dmamap_create(rxr->ptag,
 		    BUS_DMA_NOWAIT, &rxbuf->pmap);
 		if (error) {
 			device_printf(dev, "Unable to create RX dma map\n");
 			goto fail;
 		}
 	}
 
 	return (0);
 
 fail:
 	/* Frees all, but can handle partial completion */
 	ixgbe_free_receive_structures(adapter);
 	return (error);
 }
 
 /*
 ** Used to detect a descriptor that has
 ** been merged by Hardware RSC.
 */
 static inline u32
 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
 {
 	return (le32toh(rx->wb.lower.lo_dword.data) &
 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
 }
 
 /*********************************************************************
  *
  *  Initialize Hardware RSC (LRO) feature on 82599
  *  for an RX ring, this is toggled by the LRO capability
  *  even though it is transparent to the stack.
  *
  *  NOTE: since this HW feature only works with IPV4 and 
  *        our testing has shown soft LRO to be as effective
  *        I have decided to disable this by default.
  *
  **********************************************************************/
 static void
 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
 {
 	struct	adapter 	*adapter = rxr->adapter;
 	struct	ixgbe_hw	*hw = &adapter->hw;
 	u32			rscctrl, rdrxctl;
 
 	/* If turning LRO/RSC off we need to disable it */
 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
 		return;
 	}
 
 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
 	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
 #endif /* DEV_NETMAP */
 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
 
 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
 	rscctrl |= IXGBE_RSCCTL_RSCEN;
 	/*
 	** Limit the total number of descriptors that
 	** can be combined, so it does not exceed 64K
 	*/
 	if (rxr->mbuf_sz == MCLBYTES)
 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
 	else if (rxr->mbuf_sz == MJUM9BYTES)
 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
 	else  /* Using 16K cluster */
 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
 
 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
 
 	/* Enable TCP header recognition */
 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
 	    IXGBE_PSRTYPE_TCPHDR));
 
 	/* Disable RSC for ACK packets */
 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
 
 	rxr->hw_rsc = TRUE;
 }
 
 
 static void     
 ixgbe_free_receive_ring(struct rx_ring *rxr)
 { 
 	struct ixgbe_rx_buf       *rxbuf;
 	int i;
 
 	for (i = 0; i < rxr->num_desc; i++) {
 		rxbuf = &rxr->rx_buffers[i];
 		if (rxbuf->buf != NULL) {
 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
 			rxbuf->buf->m_flags |= M_PKTHDR;
 			m_freem(rxbuf->buf);
 			rxbuf->buf = NULL;
 			rxbuf->flags = 0;
 		}
 	}
 }
 
 
 /*********************************************************************
  *
  *  Initialize a receive ring and its buffers.
  *
  **********************************************************************/
 static int
 ixgbe_setup_receive_ring(struct rx_ring *rxr)
 {
 	struct	adapter 	*adapter;
 	struct ifnet		*ifp;
 	device_t		dev;
 	struct ixgbe_rx_buf	*rxbuf;
 	bus_dma_segment_t	seg[1];
 	struct lro_ctrl		*lro = &rxr->lro;
 	int			rsize, nsegs, error = 0;
 #ifdef DEV_NETMAP
 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
 	struct netmap_slot *slot;
 #endif /* DEV_NETMAP */
 
 	adapter = rxr->adapter;
 	ifp = adapter->ifp;
 	dev = adapter->dev;
 
 	/* Clear the ring contents */
 	IXGBE_RX_LOCK(rxr);
 #ifdef DEV_NETMAP
 	/* same as in ixgbe_setup_transmit_ring() */
 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
 #endif /* DEV_NETMAP */
 	rsize = roundup2(adapter->num_rx_desc *
 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
 	bzero((void *)rxr->rx_base, rsize);
 	/* Cache the size */
 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
 
 	/* Free current RX buffer structs and their mbufs */
 	ixgbe_free_receive_ring(rxr);
 
 	/* Now replenish the mbufs */
 	for (int j = 0; j != rxr->num_desc; ++j) {
 		struct mbuf	*mp;
 
 		rxbuf = &rxr->rx_buffers[j];
 #ifdef DEV_NETMAP
 		/*
 		 * In netmap mode, fill the map and set the buffer
 		 * address in the NIC ring, considering the offset
 		 * between the netmap and NIC rings (see comment in
 		 * ixgbe_setup_transmit_ring() ). No need to allocate
 		 * an mbuf, so end the block with a continue;
 		 */
 		if (slot) {
 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
 			uint64_t paddr;
 			void *addr;
 
 			addr = PNMB(na, slot + sj, &paddr);
 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
 			/* Update descriptor and the cached value */
 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
 			rxbuf->addr = htole64(paddr);
 			continue;
 		}
 #endif /* DEV_NETMAP */
 		rxbuf->flags = 0; 
 		rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
 		    M_PKTHDR, adapter->rx_mbuf_sz);
 		if (rxbuf->buf == NULL) {
 			error = ENOBUFS;
                         goto fail;
 		}
 		mp = rxbuf->buf;
 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
 		/* Get the memory mapping */
 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
 		    rxbuf->pmap, mp, seg,
 		    &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0)
                         goto fail;
 		bus_dmamap_sync(rxr->ptag,
 		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
 		/* Update the descriptor and the cached value */
 		rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
 		rxbuf->addr = htole64(seg[0].ds_addr);
 	}
 
 
 	/* Setup our descriptor indices */
 	rxr->next_to_check = 0;
 	rxr->next_to_refresh = 0;
 	rxr->lro_enabled = FALSE;
 	rxr->rx_copies = 0;
 	rxr->rx_bytes = 0;
 	rxr->vtag_strip = FALSE;
 
 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	/*
 	** Now set up the LRO interface:
 	*/
 	if (ixgbe_rsc_enable)
 		ixgbe_setup_hw_rsc(rxr);
 	else if (ifp->if_capenable & IFCAP_LRO) {
 		int err = tcp_lro_init(lro);
 		if (err) {
 			device_printf(dev, "LRO Initialization failed!\n");
 			goto fail;
 		}
 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
 		rxr->lro_enabled = TRUE;
 		lro->ifp = adapter->ifp;
 	}
 
 	IXGBE_RX_UNLOCK(rxr);
 	return (0);
 
 fail:
 	ixgbe_free_receive_ring(rxr);
 	IXGBE_RX_UNLOCK(rxr);
 	return (error);
 }
 
 /*********************************************************************
  *
  *  Initialize all receive rings.
  *
  **********************************************************************/
 static int
 ixgbe_setup_receive_structures(struct adapter *adapter)
 {
 	struct rx_ring *rxr = adapter->rx_rings;
 	int j;
 
 	for (j = 0; j < adapter->num_queues; j++, rxr++)
 		if (ixgbe_setup_receive_ring(rxr))
 			goto fail;
 
 	return (0);
 fail:
 	/*
 	 * Free RX buffers allocated so far, we will only handle
 	 * the rings that completed, the failing case will have
 	 * cleaned up for itself. 'j' failed, so its the terminus.
 	 */
 	for (int i = 0; i < j; ++i) {
 		rxr = &adapter->rx_rings[i];
 		ixgbe_free_receive_ring(rxr);
 	}
 
 	return (ENOBUFS);
 }
 
 static void
 ixgbe_initialise_rss_mapping(struct adapter *adapter)
 {
 	struct ixgbe_hw	*hw = &adapter->hw;
 	uint32_t reta;
 	int i, j, queue_id;
 	uint32_t rss_key[10];
 	uint32_t mrqc;
 #ifdef	RSS
 	uint32_t rss_hash_config;
 #endif
 
 	/* Setup RSS */
 	reta = 0;
 
 #ifdef	RSS
 	/* Fetch the configured RSS key */
 	rss_getkey((uint8_t *) &rss_key);
 #else
 	/* set up random bits */
 	arc4rand(&rss_key, sizeof(rss_key), 0);
 #endif
 
 	/* Set up the redirection table */
 	for (i = 0, j = 0; i < 128; i++, j++) {
 		if (j == adapter->num_queues) j = 0;
 #ifdef	RSS
 		/*
 		 * Fetch the RSS bucket id for the given indirection entry.
 		 * Cap it at the number of configured buckets (which is
 		 * num_queues.)
 		 */
 		queue_id = rss_get_indirection_to_bucket(i);
 		queue_id = queue_id % adapter->num_queues;
 #else
 		queue_id = (j * 0x11);
 #endif
 		/*
 		 * The low 8 bits are for hash value (n+0);
 		 * The next 8 bits are for hash value (n+1), etc.
 		 */
 		reta = reta >> 8;
 		reta = reta | ( ((uint32_t) queue_id) << 24);
 		if ((i & 3) == 3) {
 			IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
 			reta = 0;
 		}
 	}
 
 	/* Now fill our hash function seeds */
 	for (int i = 0; i < 10; i++)
 		IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]);
 
 	/* Perform hash on these packet types */
 #ifdef	RSS
 	mrqc = IXGBE_MRQC_RSSEN;
 	rss_hash_config = rss_gethashconfig();
 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX)
 		device_printf(adapter->dev,
 		    "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, "
 		    "but not supported\n", __func__);
 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
 #else
 	/*
 	 * Disable UDP - IP fragments aren't currently being handled
 	 * and so we end up with a mix of 2-tuple and 4-tuple
 	 * traffic.
 	 */
 	mrqc = IXGBE_MRQC_RSSEN
 	     | IXGBE_MRQC_RSS_FIELD_IPV4
 	     | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
 #if 0
 	     | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
 #endif
 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX
 	     | IXGBE_MRQC_RSS_FIELD_IPV6
 	     | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
 #if 0
 	     | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP
 #endif
 	;
 #endif /* RSS */
 	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
 }
 
 
 /*********************************************************************
  *
  *  Setup receive registers and features.
  *
  **********************************************************************/
 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
 
 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
 	
 static void
 ixgbe_initialize_receive_units(struct adapter *adapter)
 {
 	struct	rx_ring	*rxr = adapter->rx_rings;
 	struct ixgbe_hw	*hw = &adapter->hw;
 	struct ifnet   *ifp = adapter->ifp;
 	u32		bufsz, rxctrl, fctrl, srrctl, rxcsum;
 	u32		hlreg;
 
 
 	/*
 	 * Make sure receives are disabled while
 	 * setting up the descriptor ring
 	 */
 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
 	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
 	    rxctrl & ~IXGBE_RXCTRL_RXEN);
 
 	/* Enable broadcasts */
 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
 	fctrl |= IXGBE_FCTRL_BAM;
 	fctrl |= IXGBE_FCTRL_DPF;
 	fctrl |= IXGBE_FCTRL_PMCF;
 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
 
 	/* Set for Jumbo Frames? */
 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
 	if (ifp->if_mtu > ETHERMTU)
 		hlreg |= IXGBE_HLREG0_JUMBOEN;
 	else
 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
 #ifdef DEV_NETMAP
 	/* crcstrip is conditional in netmap (in RDRXCTL too ?) */
 	if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
 		hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
 	else
 		hlreg |= IXGBE_HLREG0_RXCRCSTRP;
 #endif /* DEV_NETMAP */
 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
 
 	bufsz = (adapter->rx_mbuf_sz +
 	    BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
 
 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
 		u64 rdba = rxr->rxdma.dma_paddr;
 
 		/* Setup the Base and Length of the Rx Descriptor Ring */
 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
 			       (rdba & 0x00000000ffffffffULL));
 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
 		    adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
 
 		/* Set up the SRRCTL register */
 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
 		srrctl |= bufsz;
 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
 
 		/*
 		 * Set DROP_EN iff we have no flow control and >1 queue.
 		 * Note that srrctl was cleared shortly before during reset,
 		 * so we do not need to clear the bit, but do it just in case
 		 * this code is moved elsewhere.
 		 */
 		if (adapter->num_queues > 1 &&
 		    adapter->fc == ixgbe_fc_none) {
 			srrctl |= IXGBE_SRRCTL_DROP_EN;
 		} else {
 			srrctl &= ~IXGBE_SRRCTL_DROP_EN;
 		}
 
 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
 
 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
 
 		/* Set the processing limit */
 		rxr->process_limit = ixgbe_rx_process_limit;
 	}
 
 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
 		u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
 			      IXGBE_PSRTYPE_UDPHDR |
 			      IXGBE_PSRTYPE_IPV4HDR |
 			      IXGBE_PSRTYPE_IPV6HDR;
 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
 	}
 
 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
 
 	ixgbe_initialise_rss_mapping(adapter);
 
 	if (adapter->num_queues > 1) {
 		/* RSS and RX IPP Checksum are mutually exclusive */
 		rxcsum |= IXGBE_RXCSUM_PCSD;
 	}
 
 	if (ifp->if_capenable & IFCAP_RXCSUM)
 		rxcsum |= IXGBE_RXCSUM_PCSD;
 
 	if (!(rxcsum & IXGBE_RXCSUM_PCSD))
 		rxcsum |= IXGBE_RXCSUM_IPPCSE;
 
 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
 
 	return;
 }
 
 /*********************************************************************
  *
  *  Free all receive rings.
  *
  **********************************************************************/
 static void
 ixgbe_free_receive_structures(struct adapter *adapter)
 {
 	struct rx_ring *rxr = adapter->rx_rings;
 
 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
 
 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
 		struct lro_ctrl		*lro = &rxr->lro;
 		ixgbe_free_receive_buffers(rxr);
 		/* Free LRO memory */
 		tcp_lro_free(lro);
 		/* Free the ring memory as well */
 		ixgbe_dma_free(adapter, &rxr->rxdma);
 	}
 
 	free(adapter->rx_rings, M_DEVBUF);
 }
 
 
 /*********************************************************************
  *
  *  Free receive ring data structures
  *
  **********************************************************************/
 static void
 ixgbe_free_receive_buffers(struct rx_ring *rxr)
 {
 	struct adapter		*adapter = rxr->adapter;
 	struct ixgbe_rx_buf	*rxbuf;
 
 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
 
 	/* Cleanup any existing buffers */
 	if (rxr->rx_buffers != NULL) {
 		for (int i = 0; i < adapter->num_rx_desc; i++) {
 			rxbuf = &rxr->rx_buffers[i];
 			if (rxbuf->buf != NULL) {
 				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
 				    BUS_DMASYNC_POSTREAD);
 				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
 				rxbuf->buf->m_flags |= M_PKTHDR;
 				m_freem(rxbuf->buf);
 			}
 			rxbuf->buf = NULL;
 			if (rxbuf->pmap != NULL) {
 				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
 				rxbuf->pmap = NULL;
 			}
 		}
 		if (rxr->rx_buffers != NULL) {
 			free(rxr->rx_buffers, M_DEVBUF);
 			rxr->rx_buffers = NULL;
 		}
 	}
 
 	if (rxr->ptag != NULL) {
 		bus_dma_tag_destroy(rxr->ptag);
 		rxr->ptag = NULL;
 	}
 
 	return;
 }
 
 static __inline void
 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
 {
                  
         /*
          * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
          * should be computed by hardware. Also it should not have VLAN tag in
          * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
          */
         if (rxr->lro_enabled &&
             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
             (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
             ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
             (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
             (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
             (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
                 /*
                  * Send to the stack if:
                  **  - LRO not enabled, or
                  **  - no LRO resources, or
                  **  - lro enqueue fails
                  */
                 if (rxr->lro.lro_cnt != 0)
                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
                                 return;
         }
 	IXGBE_RX_UNLOCK(rxr);
         (*ifp->if_input)(ifp, m);
 	IXGBE_RX_LOCK(rxr);
 }
 
 static __inline void
 ixgbe_rx_discard(struct rx_ring *rxr, int i)
 {
 	struct ixgbe_rx_buf	*rbuf;
 
 	rbuf = &rxr->rx_buffers[i];
 
 
 	/*
 	** With advanced descriptors the writeback
 	** clobbers the buffer addrs, so its easier
 	** to just free the existing mbufs and take
 	** the normal refresh path to get new buffers
 	** and mapping.
 	*/
 
 	if (rbuf->fmp != NULL) {/* Partial chain ? */
 		rbuf->fmp->m_flags |= M_PKTHDR;
 		m_freem(rbuf->fmp);
 		rbuf->fmp = NULL;
 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
 	} else if (rbuf->buf) {
 		m_free(rbuf->buf);
 		rbuf->buf = NULL;
 	}
 
 	rbuf->flags = 0;
  
 	return;
 }
 
 
 /*********************************************************************
  *
  *  This routine executes in interrupt context. It replenishes
  *  the mbufs in the descriptor and sends data which has been
  *  dma'ed into host memory to upper layer.
  *
  *  We loop at most count times if count is > 0, or until done if
  *  count < 0.
  *
  *  Return TRUE for more work, FALSE for all clean.
  *********************************************************************/
 static bool
 ixgbe_rxeof(struct ix_queue *que)
 {
 	struct adapter		*adapter = que->adapter;
 	struct rx_ring		*rxr = que->rxr;
 	struct ifnet		*ifp = adapter->ifp;
 	struct lro_ctrl		*lro = &rxr->lro;
 	struct lro_entry	*queued;
 	int			i, nextp, processed = 0;
 	u32			staterr = 0;
 	u16			count = rxr->process_limit;
 	union ixgbe_adv_rx_desc	*cur;
 	struct ixgbe_rx_buf	*rbuf, *nbuf;
 	u16			pkt_info;
 
 	IXGBE_RX_LOCK(rxr);
 
 #ifdef DEV_NETMAP
 	/* Same as the txeof routine: wakeup clients on intr. */
 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
 		IXGBE_RX_UNLOCK(rxr);
 		return (FALSE);
 	}
 #endif /* DEV_NETMAP */
 
 	for (i = rxr->next_to_check; count != 0;) {
 		struct mbuf	*sendmp, *mp;
 		u32		rsc, ptype;
 		u16		len;
 		u16		vtag = 0;
 		bool		eop;
  
 		/* Sync the ring. */
 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 		cur = &rxr->rx_base[i];
 		staterr = le32toh(cur->wb.upper.status_error);
 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
 
 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
 			break;
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			break;
 
 		count--;
 		sendmp = NULL;
 		nbuf = NULL;
 		rsc = 0;
 		cur->wb.upper.status_error = 0;
 		rbuf = &rxr->rx_buffers[i];
 		mp = rbuf->buf;
 
 		len = le16toh(cur->wb.upper.length);
 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
 		    IXGBE_RXDADV_PKTTYPE_MASK;
 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
 
 		/* Make sure bad packets are discarded */
 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
 			rxr->rx_discarded++;
 			ixgbe_rx_discard(rxr, i);
 			goto next_desc;
 		}
 
 		/*
 		** On 82599 which supports a hardware
 		** LRO (called HW RSC), packets need
 		** not be fragmented across sequential
 		** descriptors, rather the next descriptor
 		** is indicated in bits of the descriptor.
 		** This also means that we might proceses
 		** more than one packet at a time, something
 		** that has never been true before, it
 		** required eliminating global chain pointers
 		** in favor of what we are doing here.  -jfv
 		*/
 		if (!eop) {
 			/*
 			** Figure out the next descriptor
 			** of this frame.
 			*/
 			if (rxr->hw_rsc == TRUE) {
 				rsc = ixgbe_rsc_count(cur);
 				rxr->rsc_num += (rsc - 1);
 			}
 			if (rsc) { /* Get hardware index */
 				nextp = ((staterr &
 				    IXGBE_RXDADV_NEXTP_MASK) >>
 				    IXGBE_RXDADV_NEXTP_SHIFT);
 			} else { /* Just sequential */
 				nextp = i + 1;
 				if (nextp == adapter->num_rx_desc)
 					nextp = 0;
 			}
 			nbuf = &rxr->rx_buffers[nextp];
 			prefetch(nbuf);
 		}
 		/*
 		** Rather than using the fmp/lmp global pointers
 		** we now keep the head of a packet chain in the
 		** buffer struct and pass this along from one
 		** descriptor to the next, until we get EOP.
 		*/
 		mp->m_len = len;
 		/*
 		** See if there is a stored head
 		** that determines what we are
 		*/
 		sendmp = rbuf->fmp;
 		if (sendmp != NULL) {  /* secondary frag */
 			rbuf->buf = rbuf->fmp = NULL;
 			mp->m_flags &= ~M_PKTHDR;
 			sendmp->m_pkthdr.len += mp->m_len;
 		} else {
 			/*
 			 * Optimize.  This might be a small packet,
 			 * maybe just a TCP ACK.  Do a fast copy that
 			 * is cache aligned into a new mbuf, and
 			 * leave the old mbuf+cluster for re-use.
 			 */
 			if (eop && len <= IXGBE_RX_COPY_LEN) {
 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
 				if (sendmp != NULL) {
 					sendmp->m_data +=
 					    IXGBE_RX_COPY_ALIGN;
 					ixgbe_bcopy(mp->m_data,
 					    sendmp->m_data, len);
 					sendmp->m_len = len;
 					rxr->rx_copies++;
 					rbuf->flags |= IXGBE_RX_COPY;
 				}
 			}
 			if (sendmp == NULL) {
 				rbuf->buf = rbuf->fmp = NULL;
 				sendmp = mp;
 			}
 
 			/* first desc of a non-ps chain */
 			sendmp->m_flags |= M_PKTHDR;
 			sendmp->m_pkthdr.len = mp->m_len;
 		}
 		++processed;
 
 		/* Pass the head pointer on */
 		if (eop == 0) {
 			nbuf->fmp = sendmp;
 			sendmp = NULL;
 			mp->m_next = nbuf->buf;
 		} else { /* Sending this frame */
 			sendmp->m_pkthdr.rcvif = ifp;
 			rxr->rx_packets++;
 			/* capture data for AIM */
 			rxr->bytes += sendmp->m_pkthdr.len;
 			rxr->rx_bytes += sendmp->m_pkthdr.len;
 			/* Process vlan info */
 			if ((rxr->vtag_strip) &&
 			    (staterr & IXGBE_RXD_STAT_VP))
 				vtag = le16toh(cur->wb.upper.vlan);
 			if (vtag) {
 				sendmp->m_pkthdr.ether_vtag = vtag;
 				sendmp->m_flags |= M_VLANTAG;
 			}
 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
 				ixgbe_rx_checksum(staterr, sendmp, ptype);
 #if __FreeBSD_version >= 800000
 #ifdef RSS
 			sendmp->m_pkthdr.flowid =
 			    le32toh(cur->wb.lower.hi_dword.rss);
 			switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
 			case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
 				break;
 			case IXGBE_RXDADV_RSSTYPE_IPV4:
 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
 				break;
 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
 				break;
 			case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
 				break;
 			case IXGBE_RXDADV_RSSTYPE_IPV6:
 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
 				break;
 			case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
 				break;
 			case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
 				break;
 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
 				break;
 			case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
 				break;
 			default:
 				/* XXX fallthrough */
 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
 				break;
 			}
 #else /* RSS */
 			sendmp->m_pkthdr.flowid = que->msix;
 			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
 #endif /* RSS */
 #endif /* FreeBSD_version */
 		}
 next_desc:
 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 		/* Advance our pointers to the next descriptor. */
 		if (++i == rxr->num_desc)
 			i = 0;
 
 		/* Now send to the stack or do LRO */
 		if (sendmp != NULL) {
 			rxr->next_to_check = i;
 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
 			i = rxr->next_to_check;
 		}
 
                /* Every 8 descriptors we go to refresh mbufs */
 		if (processed == 8) {
 			ixgbe_refresh_mbufs(rxr, i);
 			processed = 0;
 		}
 	}
 
 	/* Refresh any remaining buf structs */
 	if (ixgbe_rx_unrefreshed(rxr))
 		ixgbe_refresh_mbufs(rxr, i);
 
 	rxr->next_to_check = i;
 
 	/*
 	 * Flush any outstanding LRO work
 	 */
 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
 		tcp_lro_flush(lro, queued);
 	}
 
 	IXGBE_RX_UNLOCK(rxr);
 
 	/*
 	** Still have cleaning to do?
 	*/
 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
 		return (TRUE);
 	else
 		return (FALSE);
 }
 
 
 /*********************************************************************
  *
  *  Verify that the hardware indicated that the checksum is valid.
  *  Inform the stack about the status of checksum so that stack
  *  doesn't spend time verifying the checksum.
  *
  *********************************************************************/
 static void
 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
 {
 	u16	status = (u16) staterr;
 	u8	errors = (u8) (staterr >> 24);
 	bool	sctp = FALSE;
 
 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
 		sctp = TRUE;
 
 	if (status & IXGBE_RXD_STAT_IPCS) {
 		if (!(errors & IXGBE_RXD_ERR_IPE)) {
 			/* IP Checksum Good */
 			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
 			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
 
 		} else
 			mp->m_pkthdr.csum_flags = 0;
 	}
 	if (status & IXGBE_RXD_STAT_L4CS) {
 		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 #if __FreeBSD_version >= 800000
 		if (sctp)
 			type = CSUM_SCTP_VALID;
 #endif
 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
 			mp->m_pkthdr.csum_flags |= type;
 			if (!sctp)
 				mp->m_pkthdr.csum_data = htons(0xffff);
 		} 
 	}
 	return;
 }
 
 
 /*
 ** This routine is run via an vlan config EVENT,
 ** it enables us to use the HW Filter table since
 ** we can get the vlan id. This just creates the
 ** entry in the soft version of the VFTA, init will
 ** repopulate the real table.
 */
 static void
 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	u16		index, bit;
 
 	if (ifp->if_softc !=  arg)   /* Not our event */
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
 		return;
 
 	IXGBE_CORE_LOCK(adapter);
 	index = (vtag >> 5) & 0x7F;
 	bit = vtag & 0x1F;
 	adapter->shadow_vfta[index] |= (1 << bit);
 	++adapter->num_vlans;
 	ixgbe_setup_vlan_hw_support(adapter);
 	IXGBE_CORE_UNLOCK(adapter);
 }
 
 /*
 ** This routine is run via an vlan
 ** unconfig EVENT, remove our entry
 ** in the soft vfta.
 */
 static void
 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	u16		index, bit;
 
 	if (ifp->if_softc !=  arg)
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
 		return;
 
 	IXGBE_CORE_LOCK(adapter);
 	index = (vtag >> 5) & 0x7F;
 	bit = vtag & 0x1F;
 	adapter->shadow_vfta[index] &= ~(1 << bit);
 	--adapter->num_vlans;
 	/* Re-init to load the changes */
 	ixgbe_setup_vlan_hw_support(adapter);
 	IXGBE_CORE_UNLOCK(adapter);
 }
 
 static void
 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
 {
 	struct ifnet 	*ifp = adapter->ifp;
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct rx_ring	*rxr;
 	u32		ctrl;
 
 
 	/*
 	** We get here thru init_locked, meaning
 	** a soft reset, this has already cleared
 	** the VFTA and other state, so if there
 	** have been no vlan's registered do nothing.
 	*/
 	if (adapter->num_vlans == 0)
 		return;
 
 	/* Setup the queues for vlans */
 	for (int i = 0; i < adapter->num_queues; i++) {
 		rxr = &adapter->rx_rings[i];
 		/* On 82599 the VLAN enable is per/queue in RXDCTL */
 		if (hw->mac.type != ixgbe_mac_82598EB) {
 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
 			ctrl |= IXGBE_RXDCTL_VME;
 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
 		}
 		rxr->vtag_strip = TRUE;
 	}
 
 	if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
 		return;
 	/*
 	** A soft reset zero's out the VFTA, so
 	** we need to repopulate it now.
 	*/
 	for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
 		if (adapter->shadow_vfta[i] != 0)
 			IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
 			    adapter->shadow_vfta[i]);
 
 	ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
 	/* Enable the Filter Table if enabled */
 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
 		ctrl &= ~IXGBE_VLNCTRL_CFIEN;
 		ctrl |= IXGBE_VLNCTRL_VFE;
 	}
 	if (hw->mac.type == ixgbe_mac_82598EB)
 		ctrl |= IXGBE_VLNCTRL_VME;
 	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
 }
 
 static void
 ixgbe_enable_intr(struct adapter *adapter)
 {
 	struct ixgbe_hw	*hw = &adapter->hw;
 	struct ix_queue	*que = adapter->queues;
 	u32		mask, fwsm;
 
 	mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
 	/* Enable Fan Failure detection */
 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
 		    mask |= IXGBE_EIMS_GPI_SDP1;
 
 	switch (adapter->hw.mac.type) {
 		case ixgbe_mac_82599EB:
 			mask |= IXGBE_EIMS_ECC;
 			mask |= IXGBE_EIMS_GPI_SDP0;
 			mask |= IXGBE_EIMS_GPI_SDP1;
 			mask |= IXGBE_EIMS_GPI_SDP2;
 #ifdef IXGBE_FDIR
 			mask |= IXGBE_EIMS_FLOW_DIR;
 #endif
 			break;
 		case ixgbe_mac_X540:
 			mask |= IXGBE_EIMS_ECC;
 			/* Detect if Thermal Sensor is enabled */
 			fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
 			if (fwsm & IXGBE_FWSM_TS_ENABLED)
 				mask |= IXGBE_EIMS_TS;
 #ifdef IXGBE_FDIR
 			mask |= IXGBE_EIMS_FLOW_DIR;
 #endif
 		/* falls through */
 		default:
 			break;
 	}
 
 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
 
 	/* With RSS we use auto clear */
 	if (adapter->msix_mem) {
 		mask = IXGBE_EIMS_ENABLE_MASK;
 		/* Don't autoclear Link */
 		mask &= ~IXGBE_EIMS_OTHER;
 		mask &= ~IXGBE_EIMS_LSC;
 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
 	}
 
 	/*
 	** Now enable all queues, this is done separately to
 	** allow for handling the extended (beyond 32) MSIX
 	** vectors that can be used by 82599
 	*/
         for (int i = 0; i < adapter->num_queues; i++, que++)
                 ixgbe_enable_queue(adapter, que->msix);
 
 	IXGBE_WRITE_FLUSH(hw);
 
 	return;
 }
 
 static void
 ixgbe_disable_intr(struct adapter *adapter)
 {
 	if (adapter->msix_mem)
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
 	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
 	} else {
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
 	}
 	IXGBE_WRITE_FLUSH(&adapter->hw);
 	return;
 }
 
 u16
 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
 {
 	u16 value;
 
 	value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
 	    reg, 2);
 
 	return (value);
 }
 
 void
 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
 {
 	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
 	    reg, value, 2);
 
 	return;
 }
 
 /*
 ** Get the width and transaction speed of
 ** the slot this adapter is plugged into.
 */
 static void
 ixgbe_get_slot_info(struct ixgbe_hw *hw)
 {
 	device_t		dev = ((struct ixgbe_osdep *)hw->back)->dev;
 	struct ixgbe_mac_info	*mac = &hw->mac;
 	u16			link;
 	u32			offset;
 
 	/* For most devices simply call the shared code routine */
 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
 		ixgbe_get_bus_info(hw);
 		goto display;
 	}
 
 	/*
 	** For the Quad port adapter we need to parse back
 	** up the PCI tree to find the speed of the expansion
 	** slot into which this adapter is plugged. A bit more work.
 	*/
 	dev = device_get_parent(device_get_parent(dev));
 #ifdef IXGBE_DEBUG
 	device_printf(dev, "parent pcib = %x,%x,%x\n",
 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
 #endif
 	dev = device_get_parent(device_get_parent(dev));
 #ifdef IXGBE_DEBUG
 	device_printf(dev, "slot pcib = %x,%x,%x\n",
 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
 #endif
 	/* Now get the PCI Express Capabilities offset */
 	pci_find_cap(dev, PCIY_EXPRESS, &offset);
 	/* ...and read the Link Status Register */
 	link = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
 	switch (link & IXGBE_PCI_LINK_WIDTH) {
 	case IXGBE_PCI_LINK_WIDTH_1:
 		hw->bus.width = ixgbe_bus_width_pcie_x1;
 		break;
 	case IXGBE_PCI_LINK_WIDTH_2:
 		hw->bus.width = ixgbe_bus_width_pcie_x2;
 		break;
 	case IXGBE_PCI_LINK_WIDTH_4:
 		hw->bus.width = ixgbe_bus_width_pcie_x4;
 		break;
 	case IXGBE_PCI_LINK_WIDTH_8:
 		hw->bus.width = ixgbe_bus_width_pcie_x8;
 		break;
 	default:
 		hw->bus.width = ixgbe_bus_width_unknown;
 		break;
 	}
 
 	switch (link & IXGBE_PCI_LINK_SPEED) {
 	case IXGBE_PCI_LINK_SPEED_2500:
 		hw->bus.speed = ixgbe_bus_speed_2500;
 		break;
 	case IXGBE_PCI_LINK_SPEED_5000:
 		hw->bus.speed = ixgbe_bus_speed_5000;
 		break;
 	case IXGBE_PCI_LINK_SPEED_8000:
 		hw->bus.speed = ixgbe_bus_speed_8000;
 		break;
 	default:
 		hw->bus.speed = ixgbe_bus_speed_unknown;
 		break;
 	}
 
 	mac->ops.set_lan_id(hw);
 
 display:
 	device_printf(dev,"PCI Express Bus: Speed %s %s\n",
 	    ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
 	    (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
 	    (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
 	    (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
 	    (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
 	    (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
 	    ("Unknown"));
 
 	if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
 	    ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
 	    (hw->bus.speed == ixgbe_bus_speed_2500))) {
 		device_printf(dev, "PCI-Express bandwidth available"
 		    " for this card\n     is not sufficient for"
 		    " optimal performance.\n");
 		device_printf(dev, "For optimal performance a x8 "
 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
         }
 	if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
 	    ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
 	    (hw->bus.speed < ixgbe_bus_speed_8000))) {
 		device_printf(dev, "PCI-Express bandwidth available"
 		    " for this card\n     is not sufficient for"
 		    " optimal performance.\n");
 		device_printf(dev, "For optimal performance a x8 "
 		    "PCIE Gen3 slot is required.\n");
         }
 
 	return;
 }
 
 
 /*
 ** Setup the correct IVAR register for a particular MSIX interrupt
 **   (yes this is all very magic and confusing :)
 **  - entry is the register array entry
 **  - vector is the MSIX vector for this queue
 **  - type is RX/TX/MISC
 */
 static void
 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 ivar, index;
 
 	vector |= IXGBE_IVAR_ALLOC_VAL;
 
 	switch (hw->mac.type) {
 
 	case ixgbe_mac_82598EB:
 		if (type == -1)
 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
 		else
 			entry += (type * 64);
 		index = (entry >> 2) & 0x1F;
 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
 		ivar |= (vector << (8 * (entry & 0x3)));
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
 		break;
 
 	case ixgbe_mac_82599EB:
 	case ixgbe_mac_X540:
 		if (type == -1) { /* MISC IVAR */
 			index = (entry & 1) * 8;
 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
 			ivar &= ~(0xFF << index);
 			ivar |= (vector << index);
 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
 		} else {	/* RX/TX IVARS */
 			index = (16 * (entry & 1)) + (8 * type);
 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
 			ivar &= ~(0xFF << index);
 			ivar |= (vector << index);
 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
 		}
 
 	default:
 		break;
 	}
 }
 
 static void
 ixgbe_configure_ivars(struct adapter *adapter)
 {
 	struct  ix_queue *que = adapter->queues;
 	u32 newitr;
 
 	if (ixgbe_max_interrupt_rate > 0)
 		newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
 	else
 		newitr = 0;
 
         for (int i = 0; i < adapter->num_queues; i++, que++) {
 		/* First the RX queue entry */
                 ixgbe_set_ivar(adapter, i, que->msix, 0);
 		/* ... and the TX */
 		ixgbe_set_ivar(adapter, i, que->msix, 1);
 		/* Set an Initial EITR value */
                 IXGBE_WRITE_REG(&adapter->hw,
                     IXGBE_EITR(que->msix), newitr);
 	}
 
 	/* For the Link interrupt */
         ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
 }
 
 /*
 ** ixgbe_sfp_probe - called in the local timer to
 ** determine if a port had optics inserted.
 */  
 static bool ixgbe_sfp_probe(struct adapter *adapter)
 {
 	struct ixgbe_hw	*hw = &adapter->hw;
 	device_t	dev = adapter->dev;
 	bool		result = FALSE;
 
 	if ((hw->phy.type == ixgbe_phy_nl) &&
 	    (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
 		s32 ret = hw->phy.ops.identify_sfp(hw);
 		if (ret)
                         goto out;
 		ret = hw->phy.ops.reset(hw);
 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
 			device_printf(dev,"Unsupported SFP+ module detected!");
 			printf(" Reload driver with supported module.\n");
 			adapter->sfp_probe = FALSE;
                         goto out;
 		} else
 			device_printf(dev,"SFP+ module detected!\n");
 		/* We now have supported optics */
 		adapter->sfp_probe = FALSE;
 		/* Set the optics type so system reports correctly */
 		ixgbe_setup_optics(adapter);
 		result = TRUE;
 	}
 out:
 	return (result);
 }
 
 /*
 ** Tasklet handler for MSIX Link interrupts
 **  - do outside interrupt since it might sleep
 */
 static void
 ixgbe_handle_link(void *context, int pending)
 {
 	struct adapter  *adapter = context;
 
 	ixgbe_check_link(&adapter->hw,
 	    &adapter->link_speed, &adapter->link_up, 0);
        	ixgbe_update_link_status(adapter);
 }
 
 /*
 ** Tasklet for handling SFP module interrupts
 */
 static void
 ixgbe_handle_mod(void *context, int pending)
 {
 	struct adapter  *adapter = context;
 	struct ixgbe_hw *hw = &adapter->hw;
 	device_t	dev = adapter->dev;
 	u32 err;
 
 	err = hw->phy.ops.identify_sfp(hw);
 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
 		device_printf(dev,
 		    "Unsupported SFP+ module type was detected.\n");
 		return;
 	}
 	err = hw->mac.ops.setup_sfp(hw);
 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
 		device_printf(dev,
 		    "Setup failure - unsupported SFP+ module type.\n");
 		return;
 	}
 	taskqueue_enqueue(adapter->tq, &adapter->msf_task);
 	return;
 }
 
 
 /*
 ** Tasklet for handling MSF (multispeed fiber) interrupts
 */
 static void
 ixgbe_handle_msf(void *context, int pending)
 {
 	struct adapter  *adapter = context;
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 autoneg;
 	bool negotiate;
 
 	autoneg = hw->phy.autoneg_advertised;
 	if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
 	if (hw->mac.ops.setup_link)
 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
 	return;
 }
 
 #ifdef IXGBE_FDIR
 /*
 ** Tasklet for reinitializing the Flow Director filter table
 */
 static void
 ixgbe_reinit_fdir(void *context, int pending)
 {
 	struct adapter  *adapter = context;
 	struct ifnet   *ifp = adapter->ifp;
 
 	if (adapter->fdir_reinit != 1) /* Shouldn't happen */
 		return;
 	ixgbe_reinit_fdir_tables_82599(&adapter->hw);
 	adapter->fdir_reinit = 0;
 	/* re-enable flow director interrupts */
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
 	/* Restart the interface */
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	return;
 }
 #endif
 
 /**********************************************************************
  *
  *  Update the board statistics counters.
  *
  **********************************************************************/
 static void
 ixgbe_update_stats_counters(struct adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32  missed_rx = 0, bprc, lxon, lxoff, total;
 
 	adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
 	adapter->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
 	adapter->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
 	adapter->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
 
 	/*
 	** Note: these are for the 8 possible traffic classes,
 	**	 which in current implementation is unused,
 	**	 therefore only 0 should read real data.
 	*/
 	for (int i = 0; i < 8; i++) {
 		u32 mp;
 		mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
 		/* missed_rx tallies misses for the gprc workaround */
 		missed_rx += mp;
 		/* global total per queue */
         	adapter->stats.mpc[i] += mp;
 		if (hw->mac.type == ixgbe_mac_82598EB) {
 			adapter->stats.rnbc[i] +=
 			    IXGBE_READ_REG(hw, IXGBE_RNBC(i));
 			adapter->stats.qbtc[i] +=
 			    IXGBE_READ_REG(hw, IXGBE_QBTC(i));
 			adapter->stats.qbrc[i] +=
 			    IXGBE_READ_REG(hw, IXGBE_QBRC(i));
 			adapter->stats.pxonrxc[i] +=
 		    	    IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
 		} else
 			adapter->stats.pxonrxc[i] +=
 		    	    IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
 		adapter->stats.pxontxc[i] +=
 		    IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
 		adapter->stats.pxofftxc[i] +=
 		    IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
 		adapter->stats.pxoffrxc[i] +=
 		    IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
 		adapter->stats.pxon2offc[i] +=
 		    IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
 	}
 	for (int i = 0; i < 16; i++) {
 		adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
 		adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
 		adapter->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
 	}
 	adapter->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
 	adapter->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
 	adapter->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
 
 	/* Hardware workaround, gprc counts missed packets */
 	adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
 	adapter->stats.gprc -= missed_rx;
 
 	if (hw->mac.type != ixgbe_mac_82598EB) {
 		adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
 		adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
 		adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
 		    ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
 		adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
 		adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
 	} else {
 		adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
 		adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
 		/* 82598 only has a counter in the high register */
 		adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
 		adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
 		adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
 	}
 
 	/*
 	 * Workaround: mprc hardware is incorrectly counting
 	 * broadcasts, so for now we subtract those.
 	 */
 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
 	adapter->stats.bprc += bprc;
 	adapter->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
 	if (hw->mac.type == ixgbe_mac_82598EB)
 		adapter->stats.mprc -= bprc;
 
 	adapter->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
 	adapter->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
 	adapter->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
 	adapter->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
 	adapter->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
 	adapter->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
 
 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
 	adapter->stats.lxontxc += lxon;
 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
 	adapter->stats.lxofftxc += lxoff;
 	total = lxon + lxoff;
 
 	adapter->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
 	adapter->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
 	adapter->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
 	adapter->stats.gptc -= total;
 	adapter->stats.mptc -= total;
 	adapter->stats.ptc64 -= total;
 	adapter->stats.gotc -= total * ETHER_MIN_LEN;
 
 	adapter->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
 	adapter->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
 	adapter->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
 	adapter->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
 	adapter->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
 	adapter->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
 	adapter->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
 	adapter->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
 	adapter->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
 	adapter->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
 	adapter->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
 	adapter->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
 	adapter->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
 	adapter->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
 	adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
 	adapter->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
 	adapter->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
 	adapter->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
 	/* Only read FCOE on 82599 */
 	if (hw->mac.type != ixgbe_mac_82598EB) {
 		adapter->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
 		adapter->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
 		adapter->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
 		adapter->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
 		adapter->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
 	}
 }
 
 static uint64_t
 ixgbe_get_counter(struct ifnet *ifp, ift_counter cnt)
 {
 	struct adapter *adapter;
 	uint64_t rv;
 
 	adapter = if_getsoftc(ifp);
 
 	switch (cnt) {
 	case IFCOUNTER_IPACKETS:
 		return (adapter->stats.gprc);
 	case IFCOUNTER_OPACKETS:
 		return (adapter->stats.gptc);
 	case IFCOUNTER_IBYTES:
 		return (adapter->stats.gorc);
 	case IFCOUNTER_OBYTES:
 		return (adapter->stats.gotc);
 	case IFCOUNTER_IMCASTS:
 		return (adapter->stats.mprc);
 	case IFCOUNTER_OMCASTS:
 		return (adapter->stats.mptc);
 	case IFCOUNTER_COLLISIONS:
 		return (0);
 	case IFCOUNTER_IQDROPS:
 		rv = 0;
 		for (int i = 0; i < 8; i++)
 			rv += adapter->stats.mpc[i];
 		return (rv);
 	case IFCOUNTER_IERRORS:
 		return (adapter->stats.crcerrs + adapter->stats.rlec);
 	default:
 		return (if_get_counter_default(ifp, cnt));
 	}
 }
 
 /** ixgbe_sysctl_tdh_handler - Handler function
  *  Retrieves the TDH value from the hardware
  */
 static int 
 ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
 	if (!txr) return 0;
 
 	unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr)
 		return error;
 	return 0;
 }
 
 /** ixgbe_sysctl_tdt_handler - Handler function
  *  Retrieves the TDT value from the hardware
  */
 static int 
 ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
 	if (!txr) return 0;
 
 	unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr)
 		return error;
 	return 0;
 }
 
 /** ixgbe_sysctl_rdh_handler - Handler function
  *  Retrieves the RDH value from the hardware
  */
 static int 
 ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
 	if (!rxr) return 0;
 
 	unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr)
 		return error;
 	return 0;
 }
 
 /** ixgbe_sysctl_rdt_handler - Handler function
  *  Retrieves the RDT value from the hardware
  */
 static int 
 ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
 	if (!rxr) return 0;
 
 	unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr)
 		return error;
 	return 0;
 }
 
 static int
 ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1);
 	unsigned int reg, usec, rate;
 
 	reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
 	usec = ((reg & 0x0FF8) >> 3);
 	if (usec > 0)
 		rate = 500000 / usec;
 	else
 		rate = 0;
 	error = sysctl_handle_int(oidp, &rate, 0, req);
 	if (error || !req->newptr)
 		return error;
 	reg &= ~0xfff; /* default, no limitation */
 	ixgbe_max_interrupt_rate = 0;
 	if (rate > 0 && rate < 500000) {
 		if (rate < 1000)
 			rate = 1000;
 		ixgbe_max_interrupt_rate = rate;
 		reg |= ((4000000/rate) & 0xff8 );
 	}
 	IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
 	return 0;
 }
 
 /*
  * Add sysctl variables, one per statistic, to the system.
  */
 static void
 ixgbe_add_hw_stats(struct adapter *adapter)
 {
 
 	device_t dev = adapter->dev;
 
 	struct tx_ring *txr = adapter->tx_rings;
 	struct rx_ring *rxr = adapter->rx_rings;
 
 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
 	struct ixgbe_hw_stats *stats = &adapter->stats;
 
 	struct sysctl_oid *stat_node, *queue_node;
 	struct sysctl_oid_list *stat_list, *queue_list;
 
 #define QUEUE_NAME_LEN 32
 	char namebuf[QUEUE_NAME_LEN];
 
 	/* Driver Statistics */
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
 			CTLFLAG_RD, &adapter->dropped_pkts,
 			"Driver dropped packets");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed",
 			CTLFLAG_RD, &adapter->mbuf_defrag_failed,
 			"m_defrag() failed");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events",
 			CTLFLAG_RD, &adapter->watchdog_events,
 			"Watchdog timeouts");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
 			CTLFLAG_RD, &adapter->link_irq,
 			"Link MSIX IRQ Handled");
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 					    CTLFLAG_RD, NULL, "Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
 				CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i],
 				sizeof(&adapter->queues[i]),
 				ixgbe_sysctl_interrupt_rate_handler, "IU",
 				"Interrupt Rate");
 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
 				CTLFLAG_RD, &(adapter->queues[i].irqs),
 				"irqs on this queue");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
 				CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
 				ixgbe_sysctl_tdh_handler, "IU",
 				"Transmit Descriptor Head");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
 				CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
 				ixgbe_sysctl_tdt_handler, "IU",
 				"Transmit Descriptor Tail");
 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx",
 				CTLFLAG_RD, &txr->tso_tx,
 				"TSO");
 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_tx_dma_setup",
 				CTLFLAG_RD, &txr->no_tx_dma_setup,
 				"Driver tx dma failure in xmit");
 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
 				CTLFLAG_RD, &txr->no_desc_avail,
 				"Queue No Descriptor Available");
 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
 				CTLFLAG_RD, &txr->total_packets,
 				"Queue Packets Transmitted");
 	}
 
 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
 					    CTLFLAG_RD, NULL, "Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
 		struct lro_ctrl *lro = &rxr->lro;
 
 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
 					    CTLFLAG_RD, NULL, "Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
 				CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
 				ixgbe_sysctl_rdh_handler, "IU",
 				"Receive Descriptor Head");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
 				CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
 				ixgbe_sysctl_rdt_handler, "IU",
 				"Receive Descriptor Tail");
 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
 				CTLFLAG_RD, &rxr->rx_packets,
 				"Queue Packets Received");
 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
 				CTLFLAG_RD, &rxr->rx_bytes,
 				"Queue Bytes Received");
 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies",
 				CTLFLAG_RD, &rxr->rx_copies,
 				"Copied RX Frames");
 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
 				CTLFLAG_RD, &lro->lro_queued, 0,
 				"LRO Queued");
 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
 				CTLFLAG_RD, &lro->lro_flushed, 0,
 				"LRO Flushed");
 	}
 
 	/* MAC stats get the own sub node */
 
 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
 				    CTLFLAG_RD, NULL, "MAC Statistics");
 	stat_list = SYSCTL_CHILDREN(stat_node);
 
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
 			CTLFLAG_RD, &stats->crcerrs,
 			"CRC Errors");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
 			CTLFLAG_RD, &stats->illerrc,
 			"Illegal Byte Errors");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
 			CTLFLAG_RD, &stats->errbc,
 			"Byte Errors");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
 			CTLFLAG_RD, &stats->mspdc,
 			"MAC Short Packets Discarded");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
 			CTLFLAG_RD, &stats->mlfc,
 			"MAC Local Faults");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
 			CTLFLAG_RD, &stats->mrfc,
 			"MAC Remote Faults");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
 			CTLFLAG_RD, &stats->rlec,
 			"Receive Length Errors");
 
 	/* Flow Control stats */
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
 			CTLFLAG_RD, &stats->lxontxc,
 			"Link XON Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
 			CTLFLAG_RD, &stats->lxonrxc,
 			"Link XON Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
 			CTLFLAG_RD, &stats->lxofftxc,
 			"Link XOFF Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
 			CTLFLAG_RD, &stats->lxoffrxc,
 			"Link XOFF Received");
 
 	/* Packet Reception Stats */
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
 			CTLFLAG_RD, &stats->tor, 
 			"Total Octets Received"); 
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
 			CTLFLAG_RD, &stats->gorc, 
 			"Good Octets Received"); 
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
 			CTLFLAG_RD, &stats->tpr,
 			"Total Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
 			CTLFLAG_RD, &stats->gprc,
 			"Good Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
 			CTLFLAG_RD, &stats->mprc,
 			"Multicast Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
 			CTLFLAG_RD, &stats->bprc,
 			"Broadcast Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
 			CTLFLAG_RD, &stats->prc64,
 			"64 byte frames received ");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
 			CTLFLAG_RD, &stats->prc127,
 			"65-127 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
 			CTLFLAG_RD, &stats->prc255,
 			"128-255 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
 			CTLFLAG_RD, &stats->prc511,
 			"256-511 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
 			CTLFLAG_RD, &stats->prc1023,
 			"512-1023 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
 			CTLFLAG_RD, &stats->prc1522,
 			"1023-1522 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
 			CTLFLAG_RD, &stats->ruc,
 			"Receive Undersized");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
 			CTLFLAG_RD, &stats->rfc,
 			"Fragmented Packets Received ");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
 			CTLFLAG_RD, &stats->roc,
 			"Oversized Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
 			CTLFLAG_RD, &stats->rjc,
 			"Received Jabber");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
 			CTLFLAG_RD, &stats->mngprc,
 			"Management Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
 			CTLFLAG_RD, &stats->mngptc,
 			"Management Packets Dropped");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
 			CTLFLAG_RD, &stats->xec,
 			"Checksum Errors");
 
 	/* Packet Transmission Stats */
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
 			CTLFLAG_RD, &stats->gotc, 
 			"Good Octets Transmitted"); 
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
 			CTLFLAG_RD, &stats->tpt,
 			"Total Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
 			CTLFLAG_RD, &stats->gptc,
 			"Good Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
 			CTLFLAG_RD, &stats->bptc,
 			"Broadcast Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
 			CTLFLAG_RD, &stats->mptc,
 			"Multicast Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
 			CTLFLAG_RD, &stats->mngptc,
 			"Management Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
 			CTLFLAG_RD, &stats->ptc64,
 			"64 byte frames transmitted ");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
 			CTLFLAG_RD, &stats->ptc127,
 			"65-127 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
 			CTLFLAG_RD, &stats->ptc255,
 			"128-255 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
 			CTLFLAG_RD, &stats->ptc511,
 			"256-511 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
 			CTLFLAG_RD, &stats->ptc1023,
 			"512-1023 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
 			CTLFLAG_RD, &stats->ptc1522,
 			"1024-1522 byte frames transmitted");
 }
 
 /*
 ** Set flow control using sysctl:
 ** Flow control values:
 ** 	0 - off
 **	1 - rx pause
 **	2 - tx pause
 **	3 - full
 */
 static int
 ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS)
 {
 	int error, last;
 	struct adapter *adapter = (struct adapter *) arg1;
 
 	last = adapter->fc;
 	error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
 	if ((error) || (req->newptr == NULL))
 		return (error);
 
 	/* Don't bother if it's not changed */
 	if (adapter->fc == last)
 		return (0);
 
 	switch (adapter->fc) {
 		case ixgbe_fc_rx_pause:
 		case ixgbe_fc_tx_pause:
 		case ixgbe_fc_full:
 			adapter->hw.fc.requested_mode = adapter->fc;
 			if (adapter->num_queues > 1)
 				ixgbe_disable_rx_drop(adapter);
 			break;
 		case ixgbe_fc_none:
 			adapter->hw.fc.requested_mode = ixgbe_fc_none;
 			if (adapter->num_queues > 1)
 				ixgbe_enable_rx_drop(adapter);
 			break;
 		default:
 			adapter->fc = last;
 			return (EINVAL);
 	}
 	/* Don't autoneg if forcing a value */
 	adapter->hw.fc.disable_fc_autoneg = TRUE;
 	ixgbe_fc_enable(&adapter->hw);
 	return error;
 }
 
 
 /*
 ** Control link advertise speed:
 **	1 - advertise only 1G
 **	2 - advertise 100Mb
 **	3 - advertise normal
 */
 static int
 ixgbe_set_advertise(SYSCTL_HANDLER_ARGS)
 {
 	int			error = 0;
 	struct adapter		*adapter;
 	device_t		dev;
 	struct ixgbe_hw		*hw;
 	ixgbe_link_speed	speed, last;
 
 	adapter = (struct adapter *) arg1;
 	dev = adapter->dev;
 	hw = &adapter->hw;
 	last = adapter->advertise;
 
 	error = sysctl_handle_int(oidp, &adapter->advertise, 0, req);
 	if ((error) || (req->newptr == NULL))
 		return (error);
 
 	if (adapter->advertise == last) /* no change */
 		return (0);
 
 	if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
             (hw->phy.multispeed_fiber)))
 		return (EINVAL);
 
 	if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
 		device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
 		return (EINVAL);
 	}
 
 	if (adapter->advertise == 1)
                 speed = IXGBE_LINK_SPEED_1GB_FULL;
 	else if (adapter->advertise == 2)
                 speed = IXGBE_LINK_SPEED_100_FULL;
 	else if (adapter->advertise == 3)
                 speed = IXGBE_LINK_SPEED_1GB_FULL |
 			IXGBE_LINK_SPEED_10GB_FULL;
 	else {	/* bogus value */
 		adapter->advertise = last;
 		return (EINVAL);
 	}
 
 	hw->mac.autotry_restart = TRUE;
 	hw->mac.ops.setup_link(hw, speed, TRUE);
 
 	return (error);
 }
 
 /*
 ** Thermal Shutdown Trigger
 **   - cause a Thermal Overtemp IRQ
 **   - this now requires firmware enabling
 */
 static int
 ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS)
 {
 	int		error, fire = 0;
 	struct adapter	*adapter = (struct adapter *) arg1;
 	struct ixgbe_hw *hw = &adapter->hw;
 
 
 	if (hw->mac.type != ixgbe_mac_X540)
 		return (0);
 
 	error = sysctl_handle_int(oidp, &fire, 0, req);
 	if ((error) || (req->newptr == NULL))
 		return (error);
 
 	if (fire) {
 		u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
 		reg |= IXGBE_EICR_TS;
 		IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
 	}
 
 	return (0);
 }
 
 /*
 ** Enable the hardware to drop packets when the buffer is
 ** full. This is useful when multiqueue,so that no single
 ** queue being full stalls the entire RX engine. We only
 ** enable this when Multiqueue AND when Flow Control is 
 ** disabled.
 */
 static void
 ixgbe_enable_rx_drop(struct adapter *adapter)
 {
         struct ixgbe_hw *hw = &adapter->hw;
 
 	for (int i = 0; i < adapter->num_queues; i++) {
         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
         	srrctl |= IXGBE_SRRCTL_DROP_EN;
         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
 	}
 }
 
 static void
 ixgbe_disable_rx_drop(struct adapter *adapter)
 {
         struct ixgbe_hw *hw = &adapter->hw;
 
 	for (int i = 0; i < adapter->num_queues; i++) {
         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
         	srrctl &= ~IXGBE_SRRCTL_DROP_EN;
         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
 	}
 }
Index: head/sys/dev/ixl/ixl_txrx.c
===================================================================
--- head/sys/dev/ixl/ixl_txrx.c	(revision 277330)
+++ head/sys/dev/ixl/ixl_txrx.c	(revision 277331)
@@ -1,1760 +1,1760 @@
 /******************************************************************************
 
   Copyright (c) 2013-2014, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
   modification, are permitted provided that the following conditions are met:
   
    1. Redistributions of source code must retain the above copyright notice, 
       this list of conditions and the following disclaimer.
   
    2. Redistributions in binary form must reproduce the above copyright 
       notice, this list of conditions and the following disclaimer in the 
       documentation and/or other materials provided with the distribution.
   
    3. Neither the name of the Intel Corporation nor the names of its 
       contributors may be used to endorse or promote products derived from 
       this software without specific prior written permission.
   
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   POSSIBILITY OF SUCH DAMAGE.
 
 ******************************************************************************/
 /*$FreeBSD$*/
 
 /*
 **	IXL driver TX/RX Routines:
 **	    This was seperated to allow usage by
 ** 	    both the BASE and the VF drivers.
 */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_rss.h"
 #include "ixl.h"
 
 #ifdef RSS 
 #include <net/rss_config.h>
 #endif
 
 /* Local Prototypes */
 static void	ixl_rx_checksum(struct mbuf *, u32, u32, u8);
 static void	ixl_refresh_mbufs(struct ixl_queue *, int);
 static int      ixl_xmit(struct ixl_queue *, struct mbuf **);
 static int	ixl_tx_setup_offload(struct ixl_queue *,
 		    struct mbuf *, u32 *, u32 *);
 static bool	ixl_tso_setup(struct ixl_queue *, struct mbuf *);
 
 static __inline void ixl_rx_discard(struct rx_ring *, int);
 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
 		    struct mbuf *, u8);
 
 /*
 ** Multiqueue Transmit driver
 **
 */
 int
 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ixl_vsi		*vsi = ifp->if_softc;
 	struct ixl_queue	*que;
 	struct tx_ring		*txr;
 	int 			err, i;
 #ifdef RSS
 	u32			bucket_id;
 #endif
 
 	/*
 	** Which queue to use:
 	**
 	** When doing RSS, map it to the same outbound
 	** queue as the incoming flow would be mapped to.
 	** If everything is setup correctly, it should be
 	** the same bucket that the current CPU we're on is.
 	*/
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
 #ifdef  RSS
 		if (rss_hash2bucket(m->m_pkthdr.flowid,
 		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
 			i = bucket_id % vsi->num_queues;
                 } else
 #endif
                         i = m->m_pkthdr.flowid % vsi->num_queues;
         } else
 		i = curcpu % vsi->num_queues;
 	/*
 	** This may not be perfect, but until something
 	** better comes along it will keep from scheduling
 	** on stalled queues.
 	*/
 	if (((1 << i) & vsi->active_queues) == 0)
 		i = ffsl(vsi->active_queues);
 
 	que = &vsi->queues[i];
 	txr = &que->txr;
 
 	err = drbr_enqueue(ifp, txr->br, m);
 	if (err)
 		return(err);
 	if (IXL_TX_TRYLOCK(txr)) {
 		ixl_mq_start_locked(ifp, txr);
 		IXL_TX_UNLOCK(txr);
 	} else
 		taskqueue_enqueue(que->tq, &que->tx_task);
 
 	return (0);
 }
 
 int
 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
 {
 	struct ixl_queue	*que = txr->que;
 	struct ixl_vsi		*vsi = que->vsi;
         struct mbuf		*next;
         int			err = 0;
 
 
 	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
 	    vsi->link_active == 0)
 		return (ENETDOWN);
 
 	/* Process the transmit queue */
 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
 		if ((err = ixl_xmit(que, &next)) != 0) {
 			if (next == NULL)
 				drbr_advance(ifp, txr->br);
 			else
 				drbr_putback(ifp, txr->br, next);
 			break;
 		}
 		drbr_advance(ifp, txr->br);
 		/* Send a copy of the frame to the BPF listener */
 		ETHER_BPF_MTAP(ifp, next);
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			break;
 	}
 
 	if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
 		ixl_txeof(que);
 
 	return (err);
 }
 
 /*
  * Called from a taskqueue to drain queued transmit packets.
  */
 void
 ixl_deferred_mq_start(void *arg, int pending)
 {
 	struct ixl_queue	*que = arg;
         struct tx_ring		*txr = &que->txr;
 	struct ixl_vsi		*vsi = que->vsi;
         struct ifnet		*ifp = vsi->ifp;
         
 	IXL_TX_LOCK(txr);
 	if (!drbr_empty(ifp, txr->br))
 		ixl_mq_start_locked(ifp, txr);
 	IXL_TX_UNLOCK(txr);
 }
 
 /*
 ** Flush all queue ring buffers
 */
 void
 ixl_qflush(struct ifnet *ifp)
 {
 	struct ixl_vsi	*vsi = ifp->if_softc;
 
         for (int i = 0; i < vsi->num_queues; i++) {
 		struct ixl_queue *que = &vsi->queues[i];
 		struct tx_ring	*txr = &que->txr;
 		struct mbuf	*m;
 		IXL_TX_LOCK(txr);
 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
 			m_freem(m);
 		IXL_TX_UNLOCK(txr);
 	}
 	if_qflush(ifp);
 }
 
 /*
 ** Find mbuf chains passed to the driver 
 ** that are 'sparse', using more than 8
 ** mbufs to deliver an mss-size chunk of data
 */
 static inline bool
 ixl_tso_detect_sparse(struct mbuf *mp)
 {
 	struct mbuf	*m;
 	int		num = 0, mss;
 	bool		ret = FALSE;
 
 	mss = mp->m_pkthdr.tso_segsz;
 	for (m = mp->m_next; m != NULL; m = m->m_next) {
 		num++;
 		mss -= m->m_len;
 		if (mss < 1)
 			break;
 		if (m->m_next == NULL)
 			break;
 	}
 	if (num > IXL_SPARSE_CHAIN)
 		ret = TRUE;
 
 	return (ret);
 }
 
 
 /*********************************************************************
  *
  *  This routine maps the mbufs to tx descriptors, allowing the
  *  TX engine to transmit the packets. 
  *  	- return 0 on success, positive on failure
  *
  **********************************************************************/
 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
 
 static int
 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
 {
 	struct ixl_vsi		*vsi = que->vsi;
 	struct i40e_hw		*hw = vsi->hw;
 	struct tx_ring		*txr = &que->txr;
 	struct ixl_tx_buf	*buf;
 	struct i40e_tx_desc	*txd = NULL;
 	struct mbuf		*m_head, *m;
 	int             	i, j, error, nsegs, maxsegs;
 	int			first, last = 0;
 	u16			vtag = 0;
 	u32			cmd, off;
 	bus_dmamap_t		map;
 	bus_dma_tag_t		tag;
 	bus_dma_segment_t	segs[IXL_MAX_TSO_SEGS];
 
 
 	cmd = off = 0;
 	m_head = *m_headp;
 
         /*
          * Important to capture the first descriptor
          * used because it will contain the index of
          * the one we tell the hardware to report back
          */
         first = txr->next_avail;
 	buf = &txr->buffers[first];
 	map = buf->map;
 	tag = txr->tx_tag;
 	maxsegs = IXL_MAX_TX_SEGS;
 
 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 		/* Use larger mapping for TSO */
 		tag = txr->tso_tag;
 		maxsegs = IXL_MAX_TSO_SEGS;
 		if (ixl_tso_detect_sparse(m_head)) {
 			m = m_defrag(m_head, M_NOWAIT);
 			if (m == NULL) {
 				m_freem(*m_headp);
 				*m_headp = NULL;
 				return (ENOBUFS);
 			}
 			*m_headp = m;
 		}
 	}
 
 	/*
 	 * Map the packet for DMA.
 	 */
 	error = bus_dmamap_load_mbuf_sg(tag, map,
 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
 
 	if (error == EFBIG) {
 		struct mbuf *m;
 
 		m = m_collapse(*m_headp, M_NOWAIT, maxsegs);
 		if (m == NULL) {
 			que->mbuf_defrag_failed++;
 			m_freem(*m_headp);
 			*m_headp = NULL;
 			return (ENOBUFS);
 		}
 		*m_headp = m;
 
 		/* Try it again */
 		error = bus_dmamap_load_mbuf_sg(tag, map,
 		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
 
 		if (error == ENOMEM) {
 			que->tx_dma_setup++;
 			return (error);
 		} else if (error != 0) {
 			que->tx_dma_setup++;
 			m_freem(*m_headp);
 			*m_headp = NULL;
 			return (error);
 		}
 	} else if (error == ENOMEM) {
 		que->tx_dma_setup++;
 		return (error);
 	} else if (error != 0) {
 		que->tx_dma_setup++;
 		m_freem(*m_headp);
 		*m_headp = NULL;
 		return (error);
 	}
 
 	/* Make certain there are enough descriptors */
 	if (nsegs > txr->avail - 2) {
 		txr->no_desc++;
 		error = ENOBUFS;
 		goto xmit_fail;
 	}
 	m_head = *m_headp;
 
 	/* Set up the TSO/CSUM offload */
 	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
 		error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
 		if (error)
 			goto xmit_fail;
 	}
 
 	cmd |= I40E_TX_DESC_CMD_ICRC;
 	/* Grab the VLAN tag */
 	if (m_head->m_flags & M_VLANTAG) {
 		cmd |= I40E_TX_DESC_CMD_IL2TAG1;
 		vtag = htole16(m_head->m_pkthdr.ether_vtag);
 	}
 
 	i = txr->next_avail;
 	for (j = 0; j < nsegs; j++) {
 		bus_size_t seglen;
 
 		buf = &txr->buffers[i];
 		buf->tag = tag; /* Keep track of the type tag */
 		txd = &txr->base[i];
 		seglen = segs[j].ds_len;
 
 		txd->buffer_addr = htole64(segs[j].ds_addr);
 		txd->cmd_type_offset_bsz =
 		    htole64(I40E_TX_DESC_DTYPE_DATA
 		    | ((u64)cmd  << I40E_TXD_QW1_CMD_SHIFT)
 		    | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
 		    | ((u64)seglen  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
 		    | ((u64)vtag  << I40E_TXD_QW1_L2TAG1_SHIFT));
 
 		last = i; /* descriptor that will get completion IRQ */
 
 		if (++i == que->num_desc)
 			i = 0;
 
 		buf->m_head = NULL;
 		buf->eop_index = -1;
 	}
 	/* Set the last descriptor for report */
 	txd->cmd_type_offset_bsz |=
 	    htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
 	txr->avail -= nsegs;
 	txr->next_avail = i;
 
 	buf->m_head = m_head;
 	/* Swap the dma map between the first and last descriptor */
 	txr->buffers[first].map = buf->map;
 	buf->map = map;
 	bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
 
         /* Set the index of the descriptor that will be marked done */
         buf = &txr->buffers[first];
 	buf->eop_index = last;
 
         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	/*
 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
 	 * hardware that this frame is available to transmit.
 	 */
 	++txr->total_packets;
 	wr32(hw, txr->tail, i);
 
 	ixl_flush(hw);
 	/* Mark outstanding work */
 	if (que->busy == 0)
 		que->busy = 1;
 	return (0);
 
 xmit_fail:
 	bus_dmamap_unload(tag, buf->map);
 	return (error);
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
  *  the information needed to transmit a packet on the wire. This is
  *  called only once at attach, setup is done every reset.
  *
  **********************************************************************/
 int
 ixl_allocate_tx_data(struct ixl_queue *que)
 {
 	struct tx_ring		*txr = &que->txr;
 	struct ixl_vsi		*vsi = que->vsi;
 	device_t		dev = vsi->dev;
 	struct ixl_tx_buf	*buf;
 	int			error = 0;
 
 	/*
 	 * Setup DMA descriptor areas.
 	 */
 	if ((error = bus_dma_tag_create(NULL,		/* parent */
 			       1, 0,			/* alignment, bounds */
 			       BUS_SPACE_MAXADDR,	/* lowaddr */
 			       BUS_SPACE_MAXADDR,	/* highaddr */
 			       NULL, NULL,		/* filter, filterarg */
 			       IXL_TSO_SIZE,		/* maxsize */
 			       IXL_MAX_TX_SEGS,		/* nsegments */
 			       PAGE_SIZE,		/* maxsegsize */
 			       0,			/* flags */
 			       NULL,			/* lockfunc */
 			       NULL,			/* lockfuncarg */
 			       &txr->tx_tag))) {
 		device_printf(dev,"Unable to allocate TX DMA tag\n");
 		goto fail;
 	}
 
 	/* Make a special tag for TSO */
 	if ((error = bus_dma_tag_create(NULL,		/* parent */
 			       1, 0,			/* alignment, bounds */
 			       BUS_SPACE_MAXADDR,	/* lowaddr */
 			       BUS_SPACE_MAXADDR,	/* highaddr */
 			       NULL, NULL,		/* filter, filterarg */
 			       IXL_TSO_SIZE,		/* maxsize */
 			       IXL_MAX_TSO_SEGS,	/* nsegments */
 			       PAGE_SIZE,		/* maxsegsize */
 			       0,			/* flags */
 			       NULL,			/* lockfunc */
 			       NULL,			/* lockfuncarg */
 			       &txr->tso_tag))) {
 		device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
 		goto fail;
 	}
 
 	if (!(txr->buffers =
 	    (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
 	    que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
 		error = ENOMEM;
 		goto fail;
 	}
 
         /* Create the descriptor buffer default dma maps */
 	buf = txr->buffers;
 	for (int i = 0; i < que->num_desc; i++, buf++) {
 		buf->tag = txr->tx_tag;
 		error = bus_dmamap_create(buf->tag, 0, &buf->map);
 		if (error != 0) {
 			device_printf(dev, "Unable to create TX DMA map\n");
 			goto fail;
 		}
 	}
 fail:
 	return (error);
 }
 
 
 /*********************************************************************
  *
  *  (Re)Initialize a queue transmit ring.
  *	- called by init, it clears the descriptor ring,
  *	  and frees any stale mbufs 
  *
  **********************************************************************/
 void
 ixl_init_tx_ring(struct ixl_queue *que)
 {
 	struct tx_ring *txr = &que->txr;
 	struct ixl_tx_buf *buf;
 
 	/* Clear the old ring contents */
 	IXL_TX_LOCK(txr);
 	bzero((void *)txr->base,
 	      (sizeof(struct i40e_tx_desc)) * que->num_desc);
 
 	/* Reset indices */
 	txr->next_avail = 0;
 	txr->next_to_clean = 0;
 
 #ifdef IXL_FDIR
 	/* Initialize flow director */
 	txr->atr_rate = ixl_atr_rate;
 	txr->atr_count = 0;
 #endif
 
 	/* Free any existing tx mbufs. */
         buf = txr->buffers;
 	for (int i = 0; i < que->num_desc; i++, buf++) {
 		if (buf->m_head != NULL) {
 			bus_dmamap_sync(buf->tag, buf->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(buf->tag, buf->map);
 			m_freem(buf->m_head);
 			buf->m_head = NULL;
 		}
 		/* Clear the EOP index */
 		buf->eop_index = -1;
         }
 
 	/* Set number of descriptors available */
 	txr->avail = que->num_desc;
 
 	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	IXL_TX_UNLOCK(txr);
 }
 
 
 /*********************************************************************
  *
  *  Free transmit ring related data structures.
  *
  **********************************************************************/
 void
 ixl_free_que_tx(struct ixl_queue *que)
 {
 	struct tx_ring *txr = &que->txr;
 	struct ixl_tx_buf *buf;
 
 	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
 
 	for (int i = 0; i < que->num_desc; i++) {
 		buf = &txr->buffers[i];
 		if (buf->m_head != NULL) {
 			bus_dmamap_sync(buf->tag, buf->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(buf->tag,
 			    buf->map);
 			m_freem(buf->m_head);
 			buf->m_head = NULL;
 			if (buf->map != NULL) {
 				bus_dmamap_destroy(buf->tag,
 				    buf->map);
 				buf->map = NULL;
 			}
 		} else if (buf->map != NULL) {
 			bus_dmamap_unload(buf->tag,
 			    buf->map);
 			bus_dmamap_destroy(buf->tag,
 			    buf->map);
 			buf->map = NULL;
 		}
 	}
 	if (txr->br != NULL)
 		buf_ring_free(txr->br, M_DEVBUF);
 	if (txr->buffers != NULL) {
 		free(txr->buffers, M_DEVBUF);
 		txr->buffers = NULL;
 	}
 	if (txr->tx_tag != NULL) {
 		bus_dma_tag_destroy(txr->tx_tag);
 		txr->tx_tag = NULL;
 	}
 	if (txr->tso_tag != NULL) {
 		bus_dma_tag_destroy(txr->tso_tag);
 		txr->tso_tag = NULL;
 	}
 
 	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
 	return;
 }
 
 /*********************************************************************
  *
  *  Setup descriptor for hw offloads 
  *
  **********************************************************************/
 
 static int
 ixl_tx_setup_offload(struct ixl_queue *que,
     struct mbuf *mp, u32 *cmd, u32 *off)
 {
 	struct ether_vlan_header	*eh;
 #ifdef INET
 	struct ip			*ip = NULL;
 #endif
 	struct tcphdr			*th = NULL;
 #ifdef INET6
 	struct ip6_hdr			*ip6;
 #endif
 	int				elen, ip_hlen = 0, tcp_hlen;
 	u16				etype;
 	u8				ipproto = 0;
 	bool				tso = FALSE;
 
 
 	/* Set up the TSO context descriptor if required */
 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
 		tso = ixl_tso_setup(que, mp);
 		if (tso)
 			++que->tso;
 		else
 			return (ENXIO);
 	}
 
 	/*
 	 * Determine where frame payload starts.
 	 * Jump over vlan headers if already present,
 	 * helpful for QinQ too.
 	 */
 	eh = mtod(mp, struct ether_vlan_header *);
 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 		etype = ntohs(eh->evl_proto);
 		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 	} else {
 		etype = ntohs(eh->evl_encap_proto);
 		elen = ETHER_HDR_LEN;
 	}
 
 	switch (etype) {
 #ifdef INET
 		case ETHERTYPE_IP:
 			ip = (struct ip *)(mp->m_data + elen);
 			ip_hlen = ip->ip_hl << 2;
 			ipproto = ip->ip_p;
 			th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
 			/* The IP checksum must be recalculated with TSO */
 			if (tso)
 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
 			else
 				*cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
 			break;
 #endif
 #ifdef INET6
 		case ETHERTYPE_IPV6:
 			ip6 = (struct ip6_hdr *)(mp->m_data + elen);
 			ip_hlen = sizeof(struct ip6_hdr);
 			ipproto = ip6->ip6_nxt;
 			th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
 			*cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
 			break;
 #endif
 		default:
 			break;
 	}
 
 	*off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
 	*off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
 
 	switch (ipproto) {
 		case IPPROTO_TCP:
 			tcp_hlen = th->th_off << 2;
 			if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
 				*off |= (tcp_hlen >> 2) <<
 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
 			}
 #ifdef IXL_FDIR
 			ixl_atr(que, th, etype);
 #endif
 			break;
 		case IPPROTO_UDP:
 			if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
 				*off |= (sizeof(struct udphdr) >> 2) <<
 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
 			}
 			break;
 
 		case IPPROTO_SCTP:
 			if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
 				*cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
 				*off |= (sizeof(struct sctphdr) >> 2) <<
 				    I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
 			}
 			/* Fall Thru */
 		default:
 			break;
 	}
 
         return (0);
 }
 
 
 /**********************************************************************
  *
  *  Setup context for hardware segmentation offload (TSO)
  *
  **********************************************************************/
 static bool
 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
 {
 	struct tx_ring			*txr = &que->txr;
 	struct i40e_tx_context_desc	*TXD;
 	struct ixl_tx_buf		*buf;
 	u32				cmd, mss, type, tsolen;
 	u16				etype;
 	int				idx, elen, ip_hlen, tcp_hlen;
 	struct ether_vlan_header	*eh;
 #ifdef INET
 	struct ip			*ip;
 #endif
 #ifdef INET6
 	struct ip6_hdr			*ip6;
 #endif
 #if defined(INET6) || defined(INET)
 	struct tcphdr			*th;
 #endif
 	u64				type_cmd_tso_mss;
 
 	/*
 	 * Determine where frame payload starts.
 	 * Jump over vlan headers if already present
 	 */
 	eh = mtod(mp, struct ether_vlan_header *);
 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 		elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 		etype = eh->evl_proto;
 	} else {
 		elen = ETHER_HDR_LEN;
 		etype = eh->evl_encap_proto;
 	}
 
         switch (ntohs(etype)) {
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		ip6 = (struct ip6_hdr *)(mp->m_data + elen);
 		if (ip6->ip6_nxt != IPPROTO_TCP)
 			return (ENXIO);
 		ip_hlen = sizeof(struct ip6_hdr);
 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
 		tcp_hlen = th->th_off << 2;
 		break;
 #endif
 #ifdef INET
 	case ETHERTYPE_IP:
 		ip = (struct ip *)(mp->m_data + elen);
 		if (ip->ip_p != IPPROTO_TCP)
 			return (ENXIO);
 		ip->ip_sum = 0;
 		ip_hlen = ip->ip_hl << 2;
 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 		tcp_hlen = th->th_off << 2;
 		break;
 #endif
 	default:
 		printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
 		    __func__, ntohs(etype));
 		return FALSE;
         }
 
         /* Ensure we have at least the IP+TCP header in the first mbuf. */
         if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
 		return FALSE;
 
 	idx = txr->next_avail;
 	buf = &txr->buffers[idx];
 	TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
 	tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
 
 	type = I40E_TX_DESC_DTYPE_CONTEXT;
 	cmd = I40E_TX_CTX_DESC_TSO;
 	mss = mp->m_pkthdr.tso_segsz;
 
 	type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
 	    ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
 	    ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
 	    ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
 	TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
 
 	TXD->tunneling_params = htole32(0);
 	buf->m_head = NULL;
 	buf->eop_index = -1;
 
 	if (++idx == que->num_desc)
 		idx = 0;
 
 	txr->avail--;
 	txr->next_avail = idx;
 
 	return TRUE;
 }
 
 /*             
 ** ixl_get_tx_head - Retrieve the value from the 
 **    location the HW records its HEAD index
 */
 static inline u32
 ixl_get_tx_head(struct ixl_queue *que)
 {
 	struct tx_ring  *txr = &que->txr;
 	void *head = &txr->base[que->num_desc];
 	return LE32_TO_CPU(*(volatile __le32 *)head);
 }
 
 /**********************************************************************
  *
  *  Examine each tx_buffer in the used queue. If the hardware is done
  *  processing the packet then free associated resources. The
  *  tx_buffer is put back on the free queue.
  *
  **********************************************************************/
 bool
 ixl_txeof(struct ixl_queue *que)
 {
 	struct tx_ring		*txr = &que->txr;
 	u32			first, last, head, done, processed;
 	struct ixl_tx_buf	*buf;
 	struct i40e_tx_desc	*tx_desc, *eop_desc;
 
 
 	mtx_assert(&txr->mtx, MA_OWNED);
 
 
 	/* These are not the descriptors you seek, move along :) */
 	if (txr->avail == que->num_desc) {
 		que->busy = 0;
 		return FALSE;
 	}
 
 	processed = 0;
 	first = txr->next_to_clean;
 	buf = &txr->buffers[first];
 	tx_desc = (struct i40e_tx_desc *)&txr->base[first];
 	last = buf->eop_index;
 	if (last == -1)
 		return FALSE;
 	eop_desc = (struct i40e_tx_desc *)&txr->base[last];
 
 	/* Get the Head WB value */
 	head = ixl_get_tx_head(que);
 
 	/*
 	** Get the index of the first descriptor
 	** BEYOND the EOP and call that 'done'.
 	** I do this so the comparison in the
 	** inner while loop below can be simple
 	*/
 	if (++last == que->num_desc) last = 0;
 	done = last;
 
         bus_dmamap_sync(txr->dma.tag, txr->dma.map,
             BUS_DMASYNC_POSTREAD);
 	/*
 	** The HEAD index of the ring is written in a 
 	** defined location, this rather than a done bit
 	** is what is used to keep track of what must be
 	** 'cleaned'.
 	*/
 	while (first != head) {
 		/* We clean the range of the packet */
 		while (first != done) {
 			++txr->avail;
 			++processed;
 
 			if (buf->m_head) {
 				txr->bytes += /* for ITR adjustment */
 				    buf->m_head->m_pkthdr.len;
 				txr->tx_bytes += /* for TX stats */
 				    buf->m_head->m_pkthdr.len;
 				bus_dmamap_sync(buf->tag,
 				    buf->map,
 				    BUS_DMASYNC_POSTWRITE);
 				bus_dmamap_unload(buf->tag,
 				    buf->map);
 				m_freem(buf->m_head);
 				buf->m_head = NULL;
 				buf->map = NULL;
 			}
 			buf->eop_index = -1;
 
 			if (++first == que->num_desc)
 				first = 0;
 
 			buf = &txr->buffers[first];
 			tx_desc = &txr->base[first];
 		}
 		++txr->packets;
 		/* See if there is more work now */
 		last = buf->eop_index;
 		if (last != -1) {
 			eop_desc = &txr->base[last];
 			/* Get next done point */
 			if (++last == que->num_desc) last = 0;
 			done = last;
 		} else
 			break;
 	}
 	bus_dmamap_sync(txr->dma.tag, txr->dma.map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	txr->next_to_clean = first;
 
 
 	/*
 	** Hang detection, we know there's
 	** work outstanding or the first return
 	** would have been taken, so indicate an
 	** unsuccessful pass, in local_timer if
 	** the value is too great the queue will
 	** be considered hung. If anything has been
 	** cleaned then reset the state.
 	*/
 	if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG))
 		++que->busy;
 
 	if (processed)
 		que->busy = 1; /* Note this turns off HUNG */
 
 	/*
 	 * If there are no pending descriptors, clear the timeout.
 	 */
 	if (txr->avail == que->num_desc) {
 		que->busy = 0;
 		return FALSE;
 	}
 
 	return TRUE;
 }
 
 /*********************************************************************
  *
  *  Refresh mbuf buffers for RX descriptor rings
  *   - now keeps its own state so discards due to resource
  *     exhaustion are unnecessary, if an mbuf cannot be obtained
  *     it just returns, keeping its placeholder, thus it can simply
  *     be recalled to try again.
  *
  **********************************************************************/
 static void
 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
 {
 	struct ixl_vsi		*vsi = que->vsi;
 	struct rx_ring		*rxr = &que->rxr;
 	bus_dma_segment_t	hseg[1];
 	bus_dma_segment_t	pseg[1];
 	struct ixl_rx_buf	*buf;
 	struct mbuf		*mh, *mp;
 	int			i, j, nsegs, error;
 	bool			refreshed = FALSE;
 
 	i = j = rxr->next_refresh;
 	/* Control the loop with one beyond */
 	if (++j == que->num_desc)
 		j = 0;
 
 	while (j != limit) {
 		buf = &rxr->buffers[i];
 		if (rxr->hdr_split == FALSE)
 			goto no_split;
 
 		if (buf->m_head == NULL) {
 			mh = m_gethdr(M_NOWAIT, MT_DATA);
 			if (mh == NULL)
 				goto update;
 		} else
 			mh = buf->m_head;
 
 		mh->m_pkthdr.len = mh->m_len = MHLEN;
 		mh->m_len = MHLEN;
 		mh->m_flags |= M_PKTHDR;
 		/* Get the memory mapping */
 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
 		    buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0) {
 			printf("Refresh mbufs: hdr dmamap load"
 			    " failure - %d\n", error);
 			m_free(mh);
 			buf->m_head = NULL;
 			goto update;
 		}
 		buf->m_head = mh;
 		bus_dmamap_sync(rxr->htag, buf->hmap,
 		    BUS_DMASYNC_PREREAD);
 		rxr->base[i].read.hdr_addr =
 		   htole64(hseg[0].ds_addr);
 
 no_split:
 		if (buf->m_pack == NULL) {
 			mp = m_getjcl(M_NOWAIT, MT_DATA,
 			    M_PKTHDR, rxr->mbuf_sz);
 			if (mp == NULL)
 				goto update;
 		} else
 			mp = buf->m_pack;
 
 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
 		/* Get the memory mapping */
 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
 		    buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0) {
 			printf("Refresh mbufs: payload dmamap load"
 			    " failure - %d\n", error);
 			m_free(mp);
 			buf->m_pack = NULL;
 			goto update;
 		}
 		buf->m_pack = mp;
 		bus_dmamap_sync(rxr->ptag, buf->pmap,
 		    BUS_DMASYNC_PREREAD);
 		rxr->base[i].read.pkt_addr =
 		   htole64(pseg[0].ds_addr);
 		/* Used only when doing header split */
 		rxr->base[i].read.hdr_addr = 0;
 
 		refreshed = TRUE;
 		/* Next is precalculated */
 		i = j;
 		rxr->next_refresh = i;
 		if (++j == que->num_desc)
 			j = 0;
 	}
 update:
 	if (refreshed) /* Update hardware tail index */
 		wr32(vsi->hw, rxr->tail, rxr->next_refresh);
 	return;
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for rx_buffer structures. Since we use one
  *  rx_buffer per descriptor, the maximum number of rx_buffer's
  *  that we'll need is equal to the number of receive descriptors
  *  that we've defined.
  *
  **********************************************************************/
 int
 ixl_allocate_rx_data(struct ixl_queue *que)
 {
 	struct rx_ring		*rxr = &que->rxr;
 	struct ixl_vsi		*vsi = que->vsi;
 	device_t 		dev = vsi->dev;
 	struct ixl_rx_buf 	*buf;
 	int             	i, bsize, error;
 
 	bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
 	if (!(rxr->buffers =
 	    (struct ixl_rx_buf *) malloc(bsize,
 	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
 		error = ENOMEM;
 		return (error);
 	}
 
 	if ((error = bus_dma_tag_create(NULL,	/* parent */
 				   1, 0,	/* alignment, bounds */
 				   BUS_SPACE_MAXADDR,	/* lowaddr */
 				   BUS_SPACE_MAXADDR,	/* highaddr */
 				   NULL, NULL,		/* filter, filterarg */
 				   MSIZE,		/* maxsize */
 				   1,			/* nsegments */
 				   MSIZE,		/* maxsegsize */
 				   0,			/* flags */
 				   NULL,		/* lockfunc */
 				   NULL,		/* lockfuncarg */
 				   &rxr->htag))) {
 		device_printf(dev, "Unable to create RX DMA htag\n");
 		return (error);
 	}
 
 	if ((error = bus_dma_tag_create(NULL,	/* parent */
 				   1, 0,	/* alignment, bounds */
 				   BUS_SPACE_MAXADDR,	/* lowaddr */
 				   BUS_SPACE_MAXADDR,	/* highaddr */
 				   NULL, NULL,		/* filter, filterarg */
 				   MJUM16BYTES,		/* maxsize */
 				   1,			/* nsegments */
 				   MJUM16BYTES,		/* maxsegsize */
 				   0,			/* flags */
 				   NULL,		/* lockfunc */
 				   NULL,		/* lockfuncarg */
 				   &rxr->ptag))) {
 		device_printf(dev, "Unable to create RX DMA ptag\n");
 		return (error);
 	}
 
 	for (i = 0; i < que->num_desc; i++) {
 		buf = &rxr->buffers[i];
 		error = bus_dmamap_create(rxr->htag,
 		    BUS_DMA_NOWAIT, &buf->hmap);
 		if (error) {
 			device_printf(dev, "Unable to create RX head map\n");
 			break;
 		}
 		error = bus_dmamap_create(rxr->ptag,
 		    BUS_DMA_NOWAIT, &buf->pmap);
 		if (error) {
 			device_printf(dev, "Unable to create RX pkt map\n");
 			break;
 		}
 	}
 
 	return (error);
 }
 
 
 /*********************************************************************
  *
  *  (Re)Initialize the queue receive ring and its buffers.
  *
  **********************************************************************/
 int
 ixl_init_rx_ring(struct ixl_queue *que)
 {
 	struct	rx_ring 	*rxr = &que->rxr;
 	struct ixl_vsi		*vsi = que->vsi;
 #if defined(INET6) || defined(INET)
 	struct ifnet		*ifp = vsi->ifp;
 	struct lro_ctrl		*lro = &rxr->lro;
 #endif
 	struct ixl_rx_buf	*buf;
 	bus_dma_segment_t	pseg[1], hseg[1];
 	int			rsize, nsegs, error = 0;
 
 	IXL_RX_LOCK(rxr);
 	/* Clear the ring contents */
 	rsize = roundup2(que->num_desc *
 	    sizeof(union i40e_rx_desc), DBA_ALIGN);
 	bzero((void *)rxr->base, rsize);
 	/* Cleanup any existing buffers */
 	for (int i = 0; i < que->num_desc; i++) {
 		buf = &rxr->buffers[i];
 		if (buf->m_head != NULL) {
 			bus_dmamap_sync(rxr->htag, buf->hmap,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(rxr->htag, buf->hmap);
 			buf->m_head->m_flags |= M_PKTHDR;
 			m_freem(buf->m_head);
 		}
 		if (buf->m_pack != NULL) {
 			bus_dmamap_sync(rxr->ptag, buf->pmap,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(rxr->ptag, buf->pmap);
 			buf->m_pack->m_flags |= M_PKTHDR;
 			m_freem(buf->m_pack);
 		}
 		buf->m_head = NULL;
 		buf->m_pack = NULL;
 	}
 
 	/* header split is off */
 	rxr->hdr_split = FALSE;
 
 	/* Now replenish the mbufs */
 	for (int j = 0; j != que->num_desc; ++j) {
 		struct mbuf	*mh, *mp;
 
 		buf = &rxr->buffers[j];
 		/*
 		** Don't allocate mbufs if not
 		** doing header split, its wasteful
 		*/ 
 		if (rxr->hdr_split == FALSE)
 			goto skip_head;
 
 		/* First the header */
 		buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
 		if (buf->m_head == NULL) {
 			error = ENOBUFS;
 			goto fail;
 		}
 		m_adj(buf->m_head, ETHER_ALIGN);
 		mh = buf->m_head;
 		mh->m_len = mh->m_pkthdr.len = MHLEN;
 		mh->m_flags |= M_PKTHDR;
 		/* Get the memory mapping */
 		error = bus_dmamap_load_mbuf_sg(rxr->htag,
 		    buf->hmap, buf->m_head, hseg,
 		    &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0) /* Nothing elegant to do here */
 			goto fail;
 		bus_dmamap_sync(rxr->htag,
 		    buf->hmap, BUS_DMASYNC_PREREAD);
 		/* Update descriptor */
 		rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
 
 skip_head:
 		/* Now the payload cluster */
 		buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
 		    M_PKTHDR, rxr->mbuf_sz);
 		if (buf->m_pack == NULL) {
 			error = ENOBUFS;
                         goto fail;
 		}
 		mp = buf->m_pack;
 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
 		/* Get the memory mapping */
 		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
 		    buf->pmap, mp, pseg,
 		    &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0)
                         goto fail;
 		bus_dmamap_sync(rxr->ptag,
 		    buf->pmap, BUS_DMASYNC_PREREAD);
 		/* Update descriptor */
 		rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
 		rxr->base[j].read.hdr_addr = 0;
 	}
 
 
 	/* Setup our descriptor indices */
 	rxr->next_check = 0;
 	rxr->next_refresh = 0;
 	rxr->lro_enabled = FALSE;
 	rxr->split = 0;
 	rxr->bytes = 0;
 	rxr->discard = FALSE;
 
 	wr32(vsi->hw, rxr->tail, que->num_desc - 1);
 	ixl_flush(vsi->hw);
 
 #if defined(INET6) || defined(INET)
 	/*
 	** Now set up the LRO interface:
 	*/
 	if (ifp->if_capenable & IFCAP_LRO) {
 		int err = tcp_lro_init(lro);
 		if (err) {
 			if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
 			goto fail;
 		}
 		INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
 		rxr->lro_enabled = TRUE;
 		lro->ifp = vsi->ifp;
 	}
 #endif
 
 	bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 fail:
 	IXL_RX_UNLOCK(rxr);
 	return (error);
 }
 
 
 /*********************************************************************
  *
  *  Free station receive ring data structures
  *
  **********************************************************************/
 void
 ixl_free_que_rx(struct ixl_queue *que)
 {
 	struct rx_ring		*rxr = &que->rxr;
 	struct ixl_rx_buf	*buf;
 
 	INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
 
 	/* Cleanup any existing buffers */
 	if (rxr->buffers != NULL) {
 		for (int i = 0; i < que->num_desc; i++) {
 			buf = &rxr->buffers[i];
 			if (buf->m_head != NULL) {
 				bus_dmamap_sync(rxr->htag, buf->hmap,
 				    BUS_DMASYNC_POSTREAD);
 				bus_dmamap_unload(rxr->htag, buf->hmap);
 				buf->m_head->m_flags |= M_PKTHDR;
 				m_freem(buf->m_head);
 			}
 			if (buf->m_pack != NULL) {
 				bus_dmamap_sync(rxr->ptag, buf->pmap,
 				    BUS_DMASYNC_POSTREAD);
 				bus_dmamap_unload(rxr->ptag, buf->pmap);
 				buf->m_pack->m_flags |= M_PKTHDR;
 				m_freem(buf->m_pack);
 			}
 			buf->m_head = NULL;
 			buf->m_pack = NULL;
 			if (buf->hmap != NULL) {
 				bus_dmamap_destroy(rxr->htag, buf->hmap);
 				buf->hmap = NULL;
 			}
 			if (buf->pmap != NULL) {
 				bus_dmamap_destroy(rxr->ptag, buf->pmap);
 				buf->pmap = NULL;
 			}
 		}
 		if (rxr->buffers != NULL) {
 			free(rxr->buffers, M_DEVBUF);
 			rxr->buffers = NULL;
 		}
 	}
 
 	if (rxr->htag != NULL) {
 		bus_dma_tag_destroy(rxr->htag);
 		rxr->htag = NULL;
 	}
 	if (rxr->ptag != NULL) {
 		bus_dma_tag_destroy(rxr->ptag);
 		rxr->ptag = NULL;
 	}
 
 	INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
 	return;
 }
 
 static __inline void
 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
 {
 
 #if defined(INET6) || defined(INET)
         /*
          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
          * should be computed by hardware. Also it should not have VLAN tag in
          * ethernet header.
          */
         if (rxr->lro_enabled &&
             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
                 /*
                  * Send to the stack if:
                  **  - LRO not enabled, or
                  **  - no LRO resources, or
                  **  - lro enqueue fails
                  */
                 if (rxr->lro.lro_cnt != 0)
                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
                                 return;
         }
 #endif
 	IXL_RX_UNLOCK(rxr);
         (*ifp->if_input)(ifp, m);
 	IXL_RX_LOCK(rxr);
 }
 
 
 static __inline void
 ixl_rx_discard(struct rx_ring *rxr, int i)
 {
 	struct ixl_rx_buf	*rbuf;
 
 	rbuf = &rxr->buffers[i];
 
         if (rbuf->fmp != NULL) {/* Partial chain ? */
 		rbuf->fmp->m_flags |= M_PKTHDR;
                 m_freem(rbuf->fmp);
                 rbuf->fmp = NULL;
 	}
 
 	/*
 	** With advanced descriptors the writeback
 	** clobbers the buffer addrs, so its easier
 	** to just free the existing mbufs and take
 	** the normal refresh path to get new buffers
 	** and mapping.
 	*/
 	if (rbuf->m_head) {
 		m_free(rbuf->m_head);
 		rbuf->m_head = NULL;
 	}
  
 	if (rbuf->m_pack) {
 		m_free(rbuf->m_pack);
 		rbuf->m_pack = NULL;
 	}
 
 	return;
 }
 
 #ifdef RSS
 /*
 ** ixl_ptype_to_hash: parse the packet type
 ** to determine the appropriate hash.
 */
 static inline int
 ixl_ptype_to_hash(u8 ptype)
 {
         struct i40e_rx_ptype_decoded	decoded;
-	u8				ex = 0
+	u8				ex = 0;
 
 	decoded = decode_rx_desc_ptype(ptype);
 	ex = decoded.outer_frag;
 
 	if (!decoded.known)
 		return M_HASHTYPE_OPAQUE;
 
 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2) 
 		return M_HASHTYPE_OPAQUE;
 
 	/* Note: anything that gets to this point is IP */
         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) { 
 		switch (decoded.inner_prot) {
 			case I40E_RX_PTYPE_INNER_PROT_TCP:
 				if (ex)
 					return M_HASHTYPE_RSS_TCP_IPV6_EX;
 				else
 					return M_HASHTYPE_RSS_TCP_IPV6;
 			case I40E_RX_PTYPE_INNER_PROT_UDP:
 				if (ex)
 					return M_HASHTYPE_RSS_UDP_IPV6_EX;
 				else
 					return M_HASHTYPE_RSS_UDP_IPV6;
 			default:
 				if (ex)
 					return M_HASHTYPE_RSS_IPV6_EX;
 				else
 					return M_HASHTYPE_RSS_IPV6;
 		}
 	}
         if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 
 		switch (decoded.inner_prot) {
 			case I40E_RX_PTYPE_INNER_PROT_TCP:
 					return M_HASHTYPE_RSS_TCP_IPV4;
 			case I40E_RX_PTYPE_INNER_PROT_UDP:
 				if (ex)
 					return M_HASHTYPE_RSS_UDP_IPV4_EX;
 				else
 					return M_HASHTYPE_RSS_UDP_IPV4;
 			default:
 					return M_HASHTYPE_RSS_IPV4;
 		}
 	}
 	/* We should never get here!! */
 	return M_HASHTYPE_OPAQUE;
 }
 #endif /* RSS */
 
 /*********************************************************************
  *
  *  This routine executes in interrupt context. It replenishes
  *  the mbufs in the descriptor and sends data which has been
  *  dma'ed into host memory to upper layer.
  *
  *  We loop at most count times if count is > 0, or until done if
  *  count < 0.
  *
  *  Return TRUE for more work, FALSE for all clean.
  *********************************************************************/
 bool
 ixl_rxeof(struct ixl_queue *que, int count)
 {
 	struct ixl_vsi		*vsi = que->vsi;
 	struct rx_ring		*rxr = &que->rxr;
 	struct ifnet		*ifp = vsi->ifp;
 #if defined(INET6) || defined(INET)
 	struct lro_ctrl		*lro = &rxr->lro;
 	struct lro_entry	*queued;
 #endif
 	int			i, nextp, processed = 0;
 	union i40e_rx_desc	*cur;
 	struct ixl_rx_buf	*rbuf, *nbuf;
 
 
 	IXL_RX_LOCK(rxr);
 
 
 	for (i = rxr->next_check; count != 0;) {
 		struct mbuf	*sendmp, *mh, *mp;
 		u32		rsc, status, error;
 		u16		hlen, plen, vtag;
 		u64		qword;
 		u8		ptype;
 		bool		eop;
  
 		/* Sync the ring. */
 		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 		cur = &rxr->base[i];
 		qword = le64toh(cur->wb.qword1.status_error_len);
 		status = (qword & I40E_RXD_QW1_STATUS_MASK)
 		    >> I40E_RXD_QW1_STATUS_SHIFT;
 		error = (qword & I40E_RXD_QW1_ERROR_MASK)
 		    >> I40E_RXD_QW1_ERROR_SHIFT;
 		plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
 		    >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
 		hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
 		    >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
 		ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
 		    >> I40E_RXD_QW1_PTYPE_SHIFT;
 
 		if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
 			++rxr->not_done;
 			break;
 		}
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			break;
 
 		count--;
 		sendmp = NULL;
 		nbuf = NULL;
 		rsc = 0;
 		cur->wb.qword1.status_error_len = 0;
 		rbuf = &rxr->buffers[i];
 		mh = rbuf->m_head;
 		mp = rbuf->m_pack;
 		eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
 		if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
 			vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
 		else
 			vtag = 0;
 
 		/*
 		** Make sure bad packets are discarded,
 		** note that only EOP descriptor has valid
 		** error results.
 		*/
                 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
 			rxr->discarded++;
 			ixl_rx_discard(rxr, i);
 			goto next_desc;
 		}
 
 		/* Prefetch the next buffer */
 		if (!eop) {
 			nextp = i + 1;
 			if (nextp == que->num_desc)
 				nextp = 0;
 			nbuf = &rxr->buffers[nextp];
 			prefetch(nbuf);
 		}
 
 		/*
 		** The header mbuf is ONLY used when header 
 		** split is enabled, otherwise we get normal 
 		** behavior, ie, both header and payload
 		** are DMA'd into the payload buffer.
 		**
 		** Rather than using the fmp/lmp global pointers
 		** we now keep the head of a packet chain in the
 		** buffer struct and pass this along from one
 		** descriptor to the next, until we get EOP.
 		*/
 		if (rxr->hdr_split && (rbuf->fmp == NULL)) {
 			if (hlen > IXL_RX_HDR)
 				hlen = IXL_RX_HDR;
 			mh->m_len = hlen;
 			mh->m_flags |= M_PKTHDR;
 			mh->m_next = NULL;
 			mh->m_pkthdr.len = mh->m_len;
 			/* Null buf pointer so it is refreshed */
 			rbuf->m_head = NULL;
 			/*
 			** Check the payload length, this
 			** could be zero if its a small
 			** packet.
 			*/
 			if (plen > 0) {
 				mp->m_len = plen;
 				mp->m_next = NULL;
 				mp->m_flags &= ~M_PKTHDR;
 				mh->m_next = mp;
 				mh->m_pkthdr.len += mp->m_len;
 				/* Null buf pointer so it is refreshed */
 				rbuf->m_pack = NULL;
 				rxr->split++;
 			}
 			/*
 			** Now create the forward
 			** chain so when complete 
 			** we wont have to.
 			*/
                         if (eop == 0) {
 				/* stash the chain head */
                                 nbuf->fmp = mh;
 				/* Make forward chain */
                                 if (plen)
                                         mp->m_next = nbuf->m_pack;
                                 else
                                         mh->m_next = nbuf->m_pack;
                         } else {
 				/* Singlet, prepare to send */
                                 sendmp = mh;
                                 if (vtag) {
                                         sendmp->m_pkthdr.ether_vtag = vtag;
                                         sendmp->m_flags |= M_VLANTAG;
                                 }
                         }
 		} else {
 			/*
 			** Either no header split, or a
 			** secondary piece of a fragmented
 			** split packet.
 			*/
 			mp->m_len = plen;
 			/*
 			** See if there is a stored head
 			** that determines what we are
 			*/
 			sendmp = rbuf->fmp;
 			rbuf->m_pack = rbuf->fmp = NULL;
 
 			if (sendmp != NULL) /* secondary frag */
 				sendmp->m_pkthdr.len += mp->m_len;
 			else {
 				/* first desc of a non-ps chain */
 				sendmp = mp;
 				sendmp->m_flags |= M_PKTHDR;
 				sendmp->m_pkthdr.len = mp->m_len;
 				if (vtag) {
 					sendmp->m_pkthdr.ether_vtag = vtag;
 					sendmp->m_flags |= M_VLANTAG;
 				}
                         }
 			/* Pass the head pointer on */
 			if (eop == 0) {
 				nbuf->fmp = sendmp;
 				sendmp = NULL;
 				mp->m_next = nbuf->m_pack;
 			}
 		}
 		++processed;
 		/* Sending this frame? */
 		if (eop) {
 			sendmp->m_pkthdr.rcvif = ifp;
 			/* gather stats */
 			rxr->rx_packets++;
 			rxr->rx_bytes += sendmp->m_pkthdr.len;
 			/* capture data for dynamic ITR adjustment */
 			rxr->packets++;
 			rxr->bytes += sendmp->m_pkthdr.len;
 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
 				ixl_rx_checksum(sendmp, status, error, ptype);
 #ifdef RSS
 			sendmp->m_pkthdr.flowid =
 			    le32toh(cur->wb.qword0.hi_dword.rss);
 			M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
 #else
 			sendmp->m_pkthdr.flowid = que->msix;
 			M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
 #endif
 		}
 next_desc:
 		bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 		/* Advance our pointers to the next descriptor. */
 		if (++i == que->num_desc)
 			i = 0;
 
 		/* Now send to the stack or do LRO */
 		if (sendmp != NULL) {
 			rxr->next_check = i;
 			ixl_rx_input(rxr, ifp, sendmp, ptype);
 			i = rxr->next_check;
 		}
 
                /* Every 8 descriptors we go to refresh mbufs */
 		if (processed == 8) {
 			ixl_refresh_mbufs(que, i);
 			processed = 0;
 		}
 	}
 
 	/* Refresh any remaining buf structs */
 	if (ixl_rx_unrefreshed(que))
 		ixl_refresh_mbufs(que, i);
 
 	rxr->next_check = i;
 
 #if defined(INET6) || defined(INET)
 	/*
 	 * Flush any outstanding LRO work
 	 */
 	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
 		tcp_lro_flush(lro, queued);
 	}
 #endif
 
 	IXL_RX_UNLOCK(rxr);
 	return (FALSE);
 }
 
 
 /*********************************************************************
  *
  *  Verify that the hardware indicated that the checksum is valid.
  *  Inform the stack about the status of checksum so that stack
  *  doesn't spend time verifying the checksum.
  *
  *********************************************************************/
 static void
 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
 {
 	struct i40e_rx_ptype_decoded decoded;
 
 	decoded = decode_rx_desc_ptype(ptype);
 
 	/* Errors? */
  	if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
 	    (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
 		mp->m_pkthdr.csum_flags = 0;
 		return;
 	}
 
 	/* IPv6 with extension headers likely have bad csum */
 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
 		if (status &
 		    (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
 			mp->m_pkthdr.csum_flags = 0;
 			return;
 		}
 
  
 	/* IP Checksum Good */
 	mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
 	mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
 
 	if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
 		mp->m_pkthdr.csum_flags |= 
 		    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 		mp->m_pkthdr.csum_data |= htons(0xffff);
 	}
 	return;
 }
 
 #if __FreeBSD_version >= 1100000
 uint64_t
 ixl_get_counter(if_t ifp, ift_counter cnt)
 {
 	struct ixl_vsi *vsi;
 
 	vsi = if_getsoftc(ifp);
 
 	switch (cnt) {
 	case IFCOUNTER_IPACKETS:
 		return (vsi->ipackets);
 	case IFCOUNTER_IERRORS:
 		return (vsi->ierrors);
 	case IFCOUNTER_OPACKETS:
 		return (vsi->opackets);
 	case IFCOUNTER_OERRORS:
 		return (vsi->oerrors);
 	case IFCOUNTER_COLLISIONS:
 		/* Collisions are by standard impossible in 40G/10G Ethernet */
 		return (0);
 	case IFCOUNTER_IBYTES:
 		return (vsi->ibytes);
 	case IFCOUNTER_OBYTES:
 		return (vsi->obytes);
 	case IFCOUNTER_IMCASTS:
 		return (vsi->imcasts);
 	case IFCOUNTER_OMCASTS:
 		return (vsi->omcasts);
 	case IFCOUNTER_IQDROPS:
 		return (vsi->iqdrops);
 	case IFCOUNTER_OQDROPS:
 		return (vsi->oqdrops);
 	case IFCOUNTER_NOPROTO:
 		return (vsi->noproto);
 	default:
 		return (if_get_counter_default(ifp, cnt));
 	}
 }
 #endif
 
Index: head/sys/net/if_ethersubr.c
===================================================================
--- head/sys/net/if_ethersubr.c	(revision 277330)
+++ head/sys/net/if_ethersubr.c	(revision 277331)
@@ -1,1162 +1,1162 @@
 /*-
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_ethersubr.c	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_netgraph.h"
 #include "opt_mbuf_profiling.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/random.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/uuid.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/if_llc.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if_bridgevar.h>
 #include <net/if_vlan_var.h>
 #include <net/if_llatbl.h>
 #include <net/pfil.h>
+#include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netpfil/pf/pf_mtag.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
-#include <netinet/in_rss.h>
 #include <netinet/ip_carp.h>
 #include <netinet/ip_var.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 #include <security/mac/mac_framework.h>
 
 #ifdef CTASSERT
 CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
 CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
 #endif
 
 VNET_DEFINE(struct pfil_head, link_pfil_hook);	/* Packet filter hooks */
 
 /* netgraph node hooks for ng_ether(4) */
 void	(*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
 int	(*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_attach_p)(struct ifnet *ifp);
 void	(*ng_ether_detach_p)(struct ifnet *ifp);
 
 void	(*vlan_input_p)(struct ifnet *, struct mbuf *);
 
 /* if_bridge(4) support */
 struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *); 
 int	(*bridge_output_p)(struct ifnet *, struct mbuf *, 
 		struct sockaddr *, struct rtentry *);
 void	(*bridge_dn_p)(struct mbuf *, struct ifnet *);
 
 /* if_lagg(4) support */
 struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); 
 
 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 static	int ether_resolvemulti(struct ifnet *, struct sockaddr **,
 		struct sockaddr *);
 #ifdef VIMAGE
 static	void ether_reassign(struct ifnet *, struct vnet *, char *);
 #endif
 
 #define	ETHER_IS_BROADCAST(addr) \
 	(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
 
 #define senderr(e) do { error = (e); goto bad;} while (0)
 
 static void
 update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
 {
 	int csum_flags = 0;
 
 	if (src->m_pkthdr.csum_flags & CSUM_IP)
 		csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
 	if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
 		csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
 	if (src->m_pkthdr.csum_flags & CSUM_SCTP)
 		csum_flags |= CSUM_SCTP_VALID;
 	dst->m_pkthdr.csum_flags |= csum_flags;
 	if (csum_flags & CSUM_DATA_VALID)
 		dst->m_pkthdr.csum_data = 0xffff;
 }
 
 /*
  * Ethernet output routine.
  * Encapsulate a packet of type family for the local net.
  * Use trailer local net encapsulation if enough data in first
  * packet leaves a multiple of 512 bytes of data in remainder.
  */
 int
 ether_output(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro)
 {
 	short type;
 	int error = 0, hdrcmplt = 0;
 	u_char edst[ETHER_ADDR_LEN];
 	struct llentry *lle = NULL;
 	struct rtentry *rt0 = NULL;
 	struct ether_header *eh;
 	struct pf_mtag *t;
 	int loop_copy = 1;
 	int hlen;	/* link layer header length */
 	int is_gw = 0;
 	uint32_t pflags = 0;
 
 	if (ro != NULL) {
 		if (!(m->m_flags & (M_BCAST | M_MCAST))) {
 			lle = ro->ro_lle;
 			if (lle != NULL)
 				pflags = lle->la_flags;
 		}
 		rt0 = ro->ro_rt;
 		if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0)
 			is_gw = 1;
 	}
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		senderr(error);
 #endif
 
 	M_PROFILE(m);
 	if (ifp->if_flags & IFF_MONITOR)
 		senderr(ENETDOWN);
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		senderr(ENETDOWN);
 
 	hlen = ETHER_HDR_LEN;
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		if (lle != NULL && (pflags & LLE_VALID) != 0)
 			memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
 		else
 			error = arpresolve(ifp, is_gw, m, dst, edst, &pflags);
 		if (error)
 			return (error == EWOULDBLOCK ? 0 : error);
 		type = htons(ETHERTYPE_IP);
 		break;
 	case AF_ARP:
 	{
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 		ah->ar_hrd = htons(ARPHRD_ETHER);
 
 		loop_copy = 0; /* if this is for us, don't do it */
 
 		switch(ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			type = htons(ETHERTYPE_REVARP);
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			type = htons(ETHERTYPE_ARP);
 			break;
 		}
 
 		if (m->m_flags & M_BCAST)
 			bcopy(ifp->if_broadcastaddr, edst, ETHER_ADDR_LEN);
 		else
 			bcopy(ar_tha(ah), edst, ETHER_ADDR_LEN);
 
 	}
 	break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if (lle != NULL && (pflags & LLE_VALID))
 			memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
 		else
 			error = nd6_storelladdr(ifp, m, dst, (u_char *)edst,
 			    &pflags);
 		if (error)
 			return error;
 		type = htons(ETHERTYPE_IPV6);
 		break;
 #endif
 	case pseudo_AF_HDRCMPLT:
 	    {
 		const struct ether_header *eh;
 
 		hdrcmplt = 1;
 		/* FALLTHROUGH */
 
 	case AF_UNSPEC:
 		loop_copy = 0; /* if this is for us, don't do it */
 		eh = (const struct ether_header *)dst->sa_data;
 		(void)memcpy(edst, eh->ether_dhost, sizeof (edst));
 		type = eh->ether_type;
 		break;
             }
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		senderr(EAFNOSUPPORT);
 	}
 
 	if ((pflags & LLE_IFADDR) != 0) {
 		update_mbuf_csumflags(m, m);
 		return (if_simloop(ifp, m, dst->sa_family, 0));
 	}
 
 	/*
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 */
 	M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
 	if (m == NULL)
 		senderr(ENOBUFS);
 	eh = mtod(m, struct ether_header *);
 	if (hdrcmplt == 0) {
 		memcpy(&eh->ether_type, &type, sizeof(eh->ether_type));
 		memcpy(eh->ether_dhost, edst, sizeof (edst));
 		memcpy(eh->ether_shost, IF_LLADDR(ifp),sizeof(eh->ether_shost));
 	}
 
 	/*
 	 * If a simplex interface, and the packet is being sent to our
 	 * Ethernet address or a broadcast address, loopback a copy.
 	 * XXX To make a simplex device behave exactly like a duplex
 	 * device, we should copy in the case of sending to our own
 	 * ethernet address (thus letting the original actually appear
 	 * on the wire). However, we don't do that here for security
 	 * reasons and compatibility with the original behavior.
 	 */
 	if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy &&
 	    ((t = pf_find_mtag(m)) == NULL || !t->routed)) {
 		if (m->m_flags & M_BCAST) {
 			struct mbuf *n;
 
 			/*
 			 * Because if_simloop() modifies the packet, we need a
 			 * writable copy through m_dup() instead of a readonly
 			 * one as m_copy[m] would give us. The alternative would
 			 * be to modify if_simloop() to handle the readonly mbuf,
 			 * but performancewise it is mostly equivalent (trading
 			 * extra data copying vs. extra locking).
 			 *
 			 * XXX This is a local workaround.  A number of less
 			 * often used kernel parts suffer from the same bug.
 			 * See PR kern/105943 for a proposed general solution.
 			 */
 			if ((n = m_dup(m, M_NOWAIT)) != NULL) {
 				update_mbuf_csumflags(m, n);
 				(void)if_simloop(ifp, n, dst->sa_family, hlen);
 			} else
 				if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 		} else if (bcmp(eh->ether_dhost, eh->ether_shost,
 				ETHER_ADDR_LEN) == 0) {
 			update_mbuf_csumflags(m, m);
 			(void) if_simloop(ifp, m, dst->sa_family, hlen);
 			return (0);	/* XXX */
 		}
 	}
 
        /*
 	* Bridges require special output handling.
 	*/
 	if (ifp->if_bridge) {
 		BRIDGE_OUTPUT(ifp, m, error);
 		return (error);
 	}
 
 #if defined(INET) || defined(INET6)
 	if (ifp->if_carp &&
 	    (error = (*carp_output_p)(ifp, m, dst)))
 		goto bad;
 #endif
 
 	/* Handle ng_ether(4) processing, if any */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_output_p != NULL,
 		    ("ng_ether_output_p is NULL"));
 		if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
 bad:			if (m != NULL)
 				m_freem(m);
 			return (error);
 		}
 		if (m == NULL)
 			return (0);
 	}
 
 	/* Continue with link-layer output */
 	return ether_output_frame(ifp, m);
 }
 
 /*
  * Ethernet link layer output routine to send a raw frame to the device.
  *
  * This assumes that the 14 byte Ethernet header is present and contiguous
  * in the first mbuf (if BRIDGE'ing).
  */
 int
 ether_output_frame(struct ifnet *ifp, struct mbuf *m)
 {
 	int i;
 
 	if (PFIL_HOOKED(&V_link_pfil_hook)) {
 		i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_OUT, NULL);
 
 		if (i != 0)
 			return (EACCES);
 
 		if (m == NULL)
 			return (0);
 	}
 
 	/*
 	 * Queue message on interface, update output statistics if
 	 * successful, and start output if interface not yet active.
 	 */
 	return ((ifp->if_transmit)(ifp, m));
 }
 
 #if defined(INET) || defined(INET6)
 #endif
 
 /*
  * Process a received Ethernet packet; the packet is in the
  * mbuf chain m with the ethernet header at the front.
  */
 static void
 ether_input_internal(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
 	u_short etype;
 
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return;
 	}
 #ifdef DIAGNOSTIC
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n");
 		m_freem(m);
 		return;
 	}
 #endif
 	/*
 	 * Do consistency checks to verify assumptions
 	 * made by code past this point.
 	 */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		if_printf(ifp, "discard frame w/o packet header\n");
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 	if (m->m_len < ETHER_HDR_LEN) {
 		/* XXX maybe should pullup? */
 		if_printf(ifp, "discard frame w/o leading ethernet "
 				"header (len %u pkt len %u)\n",
 				m->m_len, m->m_pkthdr.len);
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 	eh = mtod(m, struct ether_header *);
 	etype = ntohs(eh->ether_type);
 	if (m->m_pkthdr.rcvif == NULL) {
 		if_printf(ifp, "discard frame w/o interface pointer\n");
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 #ifdef DIAGNOSTIC
 	if (m->m_pkthdr.rcvif != ifp) {
 		if_printf(ifp, "Warning, frame marked as received on %s\n",
 			m->m_pkthdr.rcvif->if_xname);
 	}
 #endif
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 
 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 		if (ETHER_IS_BROADCAST(eh->ether_dhost))
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 #ifdef MAC
 	/*
 	 * Tag the mbuf with an appropriate MAC label before any other
 	 * consumers can get to it.
 	 */
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Give bpf a chance at the packet.
 	 */
 	ETHER_BPF_MTAP(ifp, m);
 
 	/*
 	 * If the CRC is still on the packet, trim it off. We do this once
 	 * and once only in case we are re-entered. Nothing else on the
 	 * Ethernet receive path expects to see the FCS.
 	 */
 	if (m->m_flags & M_HASFCS) {
 		m_adj(m, -ETHER_CRC_LEN);
 		m->m_flags &= ~M_HASFCS;
 	}
 
 	if (!(ifp->if_capenable & IFCAP_HWSTATS))
 		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 
 	/* Allow monitor mode to claim this frame, after stats are updated. */
 	if (ifp->if_flags & IFF_MONITOR) {
 		m_freem(m);
 		CURVNET_RESTORE();
 		return;
 	}
 
 	/* Handle input from a lagg(4) port */
 	if (ifp->if_type == IFT_IEEE8023ADLAG) {
 		KASSERT(lagg_input_p != NULL,
 		    ("%s: if_lagg not loaded!", __func__));
 		m = (*lagg_input_p)(ifp, m);
 		if (m != NULL)
 			ifp = m->m_pkthdr.rcvif;
 		else {
 			CURVNET_RESTORE();
 			return;
 		}
 	}
 
 	/*
 	 * If the hardware did not process an 802.1Q tag, do this now,
 	 * to allow 802.1P priority frames to be passed to the main input
 	 * path correctly.
 	 * TODO: Deal with Q-in-Q frames, but not arbitrary nesting levels.
 	 */
 	if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_VLAN) {
 		struct ether_vlan_header *evl;
 
 		if (m->m_len < sizeof(*evl) &&
 		    (m = m_pullup(m, sizeof(*evl))) == NULL) {
 #ifdef DIAGNOSTIC
 			if_printf(ifp, "cannot pullup VLAN header\n");
 #endif
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			m_freem(m);
 			CURVNET_RESTORE();
 			return;
 		}
 
 		evl = mtod(m, struct ether_vlan_header *);
 		m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
 		m->m_flags |= M_VLANTAG;
 
 		bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
 		    ETHER_HDR_LEN - ETHER_TYPE_LEN);
 		m_adj(m, ETHER_VLAN_ENCAP_LEN);
 		eh = mtod(m, struct ether_header *);
 	}
 
 	M_SETFIB(m, ifp->if_fib);
 
 	/* Allow ng_ether(4) to claim this frame. */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_input_p != NULL,
 		    ("%s: ng_ether_input_p is NULL", __func__));
 		m->m_flags &= ~M_PROMISC;
 		(*ng_ether_input_p)(ifp, &m);
 		if (m == NULL) {
 			CURVNET_RESTORE();
 			return;
 		}
 		eh = mtod(m, struct ether_header *);
 	}
 
 	/*
 	 * Allow if_bridge(4) to claim this frame.
 	 * The BRIDGE_INPUT() macro will update ifp if the bridge changed it
 	 * and the frame should be delivered locally.
 	 */
 	if (ifp->if_bridge != NULL) {
 		m->m_flags &= ~M_PROMISC;
 		BRIDGE_INPUT(ifp, m);
 		if (m == NULL) {
 			CURVNET_RESTORE();
 			return;
 		}
 		eh = mtod(m, struct ether_header *);
 	}
 
 #if defined(INET) || defined(INET6)
 	/*
 	 * Clear M_PROMISC on frame so that carp(4) will see it when the
 	 * mbuf flows up to Layer 3.
 	 * FreeBSD's implementation of carp(4) uses the inprotosw
 	 * to dispatch IPPROTO_CARP. carp(4) also allocates its own
 	 * Ethernet addresses of the form 00:00:5e:00:01:xx, which
 	 * is outside the scope of the M_PROMISC test below.
 	 * TODO: Maintain a hash table of ethernet addresses other than
 	 * ether_dhost which may be active on this ifp.
 	 */
 	if (ifp->if_carp && (*carp_forus_p)(ifp, eh->ether_dhost)) {
 		m->m_flags &= ~M_PROMISC;
 	} else
 #endif
 	{
 		/*
 		 * If the frame received was not for our MAC address, set the
 		 * M_PROMISC flag on the mbuf chain. The frame may need to
 		 * be seen by the rest of the Ethernet input path in case of
 		 * re-entry (e.g. bridge, vlan, netgraph) but should not be
 		 * seen by upper protocol layers.
 		 */
 		if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
 		    bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
 			m->m_flags |= M_PROMISC;
 	}
 
 	random_harvest(&(m->m_data), 12, 2, RANDOM_NET_ETHER);
 
 	ether_demux(ifp, m);
 	CURVNET_RESTORE();
 }
 
 /*
  * Ethernet input dispatch; by default, direct dispatch here regardless of
  * global configuration.  However, if RSS is enabled, hook up RSS affinity
  * so that when deferred or hybrid dispatch is enabled, we can redistribute
  * load based on RSS.
  *
  * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or
  * not it had already done work distribution via multi-queue.  Then we could
  * direct dispatch in the event load balancing was already complete and
  * handle the case of interfaces with different capabilities better.
  *
  * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions
  * at multiple layers?
  *
  * XXXRW: For now, enable all this only if RSS is compiled in, although it
  * works fine without RSS.  Need to characterise the performance overhead
  * of the detour through the netisr code in the event the result is always
  * direct dispatch.
  */
 static void
 ether_nh_input(struct mbuf *m)
 {
 
 	ether_input_internal(m->m_pkthdr.rcvif, m);
 }
 
 static struct netisr_handler	ether_nh = {
 	.nh_name = "ether",
 	.nh_handler = ether_nh_input,
 	.nh_proto = NETISR_ETHER,
 #ifdef RSS
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_DIRECT,
 	.nh_m2cpuid = rss_m2cpuid,
 #else
 	.nh_policy = NETISR_POLICY_SOURCE,
 	.nh_dispatch = NETISR_DISPATCH_DIRECT,
 #endif
 };
 
 static void
 ether_init(__unused void *arg)
 {
 
 	netisr_register(&ether_nh);
 }
 SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
 
 static void
 vnet_ether_init(__unused void *arg)
 {
 	int i;
 
 	/* Initialize packet filter hooks. */
 	V_link_pfil_hook.ph_type = PFIL_TYPE_AF;
 	V_link_pfil_hook.ph_af = AF_LINK;
 	if ((i = pfil_head_register(&V_link_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to register pfil link hook, "
 			"error %d\n", __func__, i);
 }
 VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_ether_init, NULL);
  
 static void
 vnet_ether_destroy(__unused void *arg)
 {
 	int i;
 
 	if ((i = pfil_head_unregister(&V_link_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to unregister pfil link hook, "
 			"error %d\n", __func__, i);
 }
 VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_ether_destroy, NULL);
 
 
 
 static void
 ether_input(struct ifnet *ifp, struct mbuf *m)
 {
 
 	struct mbuf *mn;
 
 	/*
 	 * The drivers are allowed to pass in a chain of packets linked with
 	 * m_nextpkt. We split them up into separate packets here and pass
 	 * them up. This allows the drivers to amortize the receive lock.
 	 */
 	while (m) {
 		mn = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 
 		/*
 		 * We will rely on rcvif being set properly in the deferred context,
 		 * so assert it is correct here.
 		 */
 		KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__));
 		netisr_dispatch(NETISR_ETHER, m);
 		m = mn;
 	}
 }
 
 /*
  * Upper layer processing for a received Ethernet packet.
  */
 void
 ether_demux(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
 	int i, isr;
 	u_short ether_type;
 
 	KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
 
 	/* Do not grab PROMISC frames in case we are re-entered. */
 	if (PFIL_HOOKED(&V_link_pfil_hook) && !(m->m_flags & M_PROMISC)) {
 		i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_IN, NULL);
 
 		if (i != 0 || m == NULL)
 			return;
 	}
 
 	eh = mtod(m, struct ether_header *);
 	ether_type = ntohs(eh->ether_type);
 
 	/*
 	 * If this frame has a VLAN tag other than 0, call vlan_input()
 	 * if its module is loaded. Otherwise, drop.
 	 */
 	if ((m->m_flags & M_VLANTAG) &&
 	    EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) {
 		if (ifp->if_vlantrunk == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			m_freem(m);
 			return;
 		}
 		KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!",
 		    __func__));
 		/* Clear before possibly re-entering ether_input(). */
 		m->m_flags &= ~M_PROMISC;
 		(*vlan_input_p)(ifp, m);
 		return;
 	}
 
 	/*
 	 * Pass promiscuously received frames to the upper layer if the user
 	 * requested this by setting IFF_PPROMISC. Otherwise, drop them.
 	 */
 	if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) {
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Reset layer specific mbuf flags to avoid confusing upper layers.
 	 * Strip off Ethernet header.
 	 */
 	m->m_flags &= ~M_VLANTAG;
 	m_clrprotoflags(m);
 	m_adj(m, ETHER_HDR_LEN);
 
 	/*
 	 * Dispatch frame to upper layer.
 	 */
 	switch (ether_type) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		if ((m = ip_fastforward(m)) == NULL)
 			return;
 		isr = NETISR_IP;
 		break;
 
 	case ETHERTYPE_ARP:
 		if (ifp->if_flags & IFF_NOARP) {
 			/* Discard packet if ARP is disabled on interface */
 			m_freem(m);
 			return;
 		}
 		isr = NETISR_ARP;
 		break;
 #endif
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		goto discard;
 	}
 	netisr_dispatch(isr, m);
 	return;
 
 discard:
 	/*
 	 * Packet is to be discarded.  If netgraph is present,
 	 * hand the packet to it for last chance processing;
 	 * otherwise dispose of it.
 	 */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_input_orphan_p != NULL,
 		    ("ng_ether_input_orphan_p is NULL"));
 		/*
 		 * Put back the ethernet header so netgraph has a
 		 * consistent view of inbound packets.
 		 */
 		M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
 		(*ng_ether_input_orphan_p)(ifp, m);
 		return;
 	}
 	m_freem(m);
 }
 
 /*
  * Convert Ethernet address to printable (loggable) representation.
  * This routine is for compatibility; it's better to just use
  *
  *	printf("%6D", <pointer to address>, ":");
  *
  * since there's no static buffer involved.
  */
 char *
 ether_sprintf(const u_char *ap)
 {
 	static char etherbuf[18];
 	snprintf(etherbuf, sizeof (etherbuf), "%6D", ap, ":");
 	return (etherbuf);
 }
 
 /*
  * Perform common duties while attaching to interface list
  */
 void
 ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
 {
 	int i;
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 
 	ifp->if_addrlen = ETHER_ADDR_LEN;
 	ifp->if_hdrlen = ETHER_HDR_LEN;
 	if_attach(ifp);
 	ifp->if_mtu = ETHERMTU;
 	ifp->if_output = ether_output;
 	ifp->if_input = ether_input;
 	ifp->if_resolvemulti = ether_resolvemulti;
 #ifdef VIMAGE
 	ifp->if_reassign = ether_reassign;
 #endif
 	if (ifp->if_baudrate == 0)
 		ifp->if_baudrate = IF_Mbps(10);		/* just a default */
 	ifp->if_broadcastaddr = etherbroadcastaddr;
 
 	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_ETHER;
 	sdl->sdl_alen = ifp->if_addrlen;
 	bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
 
 	bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
 	if (ng_ether_attach_p != NULL)
 		(*ng_ether_attach_p)(ifp);
 
 	/* Announce Ethernet MAC address if non-zero. */
 	for (i = 0; i < ifp->if_addrlen; i++)
 		if (lla[i] != 0)
 			break; 
 	if (i != ifp->if_addrlen)
 		if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
 
 	uuid_ether_add(LLADDR(sdl));
 }
 
 /*
  * Perform common duties while detaching an Ethernet interface
  */
 void
 ether_ifdetach(struct ifnet *ifp)
 {
 	struct sockaddr_dl *sdl;
 
 	sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr);
 	uuid_ether_del(LLADDR(sdl));
 
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
 	}
 
 	bpfdetach(ifp);
 	if_detach(ifp);
 }
 
 #ifdef VIMAGE
 void
 ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
 {
 
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
 	}
 
 	if (ng_ether_attach_p != NULL) {
 		CURVNET_SET_QUIET(new_vnet);
 		(*ng_ether_attach_p)(ifp);
 		CURVNET_RESTORE();
 	}
 }
 #endif
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
 
 #if 0
 /*
  * This is for reference.  We have a table-driven version
  * of the little-endian crc32 generator, which is faster
  * than the double-loop.
  */
 uint32_t
 ether_crc32_le(const uint8_t *buf, size_t len)
 {
 	size_t i;
 	uint32_t crc;
 	int bit;
 	uint8_t data;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 			carry = (crc ^ data) & 1;
 			crc >>= 1;
 			if (carry)
 				crc = (crc ^ ETHER_CRC_POLY_LE);
 		}
 	}
 
 	return (crc);
 }
 #else
 uint32_t
 ether_crc32_le(const uint8_t *buf, size_t len)
 {
 	static const uint32_t crctab[] = {
 		0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
 		0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
 		0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
 		0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
 	};
 	size_t i;
 	uint32_t crc;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		crc ^= buf[i];
 		crc = (crc >> 4) ^ crctab[crc & 0xf];
 		crc = (crc >> 4) ^ crctab[crc & 0xf];
 	}
 
 	return (crc);
 }
 #endif
 
 uint32_t
 ether_crc32_be(const uint8_t *buf, size_t len)
 {
 	size_t i;
 	uint32_t crc, carry;
 	int bit;
 	uint8_t data;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 			carry = ((crc & 0x80000000) ? 1 : 0) ^ (data & 0x01);
 			crc <<= 1;
 			if (carry)
 				crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
 		}
 	}
 
 	return (crc);
 }
 
 int
 ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ifaddr *ifa = (struct ifaddr *) data;
 	struct ifreq *ifr = (struct ifreq *) data;
 	int error = 0;
 
 	switch (command) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			ifp->if_init(ifp->if_softc);	/* before arpwhohas */
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
 		}
 		break;
 
 	case SIOCGIFADDR:
 		{
 			struct sockaddr *sa;
 
 			sa = (struct sockaddr *) & ifr->ifr_data;
 			bcopy(IF_LLADDR(ifp),
 			      (caddr_t) sa->sa_data, ETHER_ADDR_LEN);
 		}
 		break;
 
 	case SIOCSIFMTU:
 		/*
 		 * Set the interface MTU.
 		 */
 		if (ifr->ifr_mtu > ETHERMTU) {
 			error = EINVAL;
 		} else {
 			ifp->if_mtu = ifr->ifr_mtu;
 		}
 		break;
 	default:
 		error = EINVAL;			/* XXX netbsd has ENOTTY??? */
 		break;
 	}
 	return (error);
 }
 
 static int
 ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 	struct sockaddr *sa)
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 	u_char *e_addr;
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed. Just check that it's a valid MC address.
 		 */
 		sdl = (struct sockaddr_dl *)sa;
 		e_addr = LLADDR(sdl);
 		if (!ETHER_IS_MULTICAST(e_addr))
 			return EADDRNOTAVAIL;
 		*llsa = 0;
 		return 0;
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ETHER_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = 0;
 			return 0;
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ETHER_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return EAFNOSUPPORT;
 	}
 }
 
 static moduledata_t ether_mod = {
 	.name = "ether",
 };
 
 void
 ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen)
 {
 	struct ether_vlan_header vlan;
 	struct mbuf mv, mb;
 
 	KASSERT((m->m_flags & M_VLANTAG) != 0,
 	    ("%s: vlan information not present", __func__));
 	KASSERT(m->m_len >= sizeof(struct ether_header),
 	    ("%s: mbuf not large enough for header", __func__));
 	bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header));
 	vlan.evl_proto = vlan.evl_encap_proto;
 	vlan.evl_encap_proto = htons(ETHERTYPE_VLAN);
 	vlan.evl_tag = htons(m->m_pkthdr.ether_vtag);
 	m->m_len -= sizeof(struct ether_header);
 	m->m_data += sizeof(struct ether_header);
 	/*
 	 * If a data link has been supplied by the caller, then we will need to
 	 * re-create a stack allocated mbuf chain with the following structure:
 	 *
 	 * (1) mbuf #1 will contain the supplied data link
 	 * (2) mbuf #2 will contain the vlan header
 	 * (3) mbuf #3 will contain the original mbuf's packet data
 	 *
 	 * Otherwise, submit the packet and vlan header via bpf_mtap2().
 	 */
 	if (data != NULL) {
 		mv.m_next = m;
 		mv.m_data = (caddr_t)&vlan;
 		mv.m_len = sizeof(vlan);
 		mb.m_next = &mv;
 		mb.m_data = data;
 		mb.m_len = dlen;
 		bpf_mtap(bp, &mb);
 	} else
 		bpf_mtap2(bp, &vlan, sizeof(vlan), m);
 	m->m_len += sizeof(struct ether_header);
 	m->m_data -= sizeof(struct ether_header);
 }
 
 struct mbuf *
 ether_vlanencap(struct mbuf *m, uint16_t tag)
 {
 	struct ether_vlan_header *evl;
 
 	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
 	if (m == NULL)
 		return (NULL);
 	/* M_PREPEND takes care of m_len, m_pkthdr.len for us */
 
 	if (m->m_len < sizeof(*evl)) {
 		m = m_pullup(m, sizeof(*evl));
 		if (m == NULL)
 			return (NULL);
 	}
 
 	/*
 	 * Transform the Ethernet header into an Ethernet header
 	 * with 802.1Q encapsulation.
 	 */
 	evl = mtod(m, struct ether_vlan_header *);
 	bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
 	    (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
 	evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
 	evl->evl_tag = htons(tag);
 	return (m);
 }
 
 DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(ether, 1);
Index: head/sys/net/rss_config.c
===================================================================
--- head/sys/net/rss_config.c	(nonexistent)
+++ head/sys/net/rss_config.c	(revision 277331)
@@ -0,0 +1,558 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet6.h"
+#include "opt_pcbgroup.h"
+
+#ifndef PCBGROUP
+#error "options RSS depends on options PCBGROUP"
+#endif
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/priv.h>
+#include <sys/kernel.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/sbuf.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/netisr.h>
+#include <net/rss_config.h>
+#include <net/toeplitz.h>
+
+#if 0
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_rss.h>
+#include <netinet/in_var.h>
+
+/* for software rss hash support */
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#endif
+
+/*-
+ * Operating system parts of receiver-side scaling (RSS), which allows
+ * network cards to direct flows to particular receive queues based on hashes
+ * of header tuples.  This implementation aligns RSS buckets with connection
+ * groups at the TCP/IP layer, so each bucket is associated with exactly one
+ * group.  As a result, the group lookup structures (and lock) should have an
+ * effective affinity with exactly one CPU.
+ *
+ * Network device drivers needing to configure RSS will query this framework
+ * for parameters, such as the current RSS key, hashing policies, number of
+ * bits, and indirection table mapping hashes to buckets and CPUs.  They may
+ * provide their own supplementary information, such as queue<->CPU bindings.
+ * It is the responsibility of the network device driver to inject packets
+ * into the stack on as close to the right CPU as possible, if playing by RSS
+ * rules.
+ *
+ * TODO:
+ *
+ * - Synchronization for rss_key and other future-configurable parameters.
+ * - Event handler drivers can register to pick up RSS configuration changes.
+ * - Should we allow rss_basecpu to be configured?
+ * - Randomize key on boot.
+ * - IPv6 support.
+ * - Statistics on how often there's a misalignment between hardware
+ *   placement and pcbgroup expectations.
+ */
+
+SYSCTL_DECL(_net_inet);
+SYSCTL_NODE(_net_inet, OID_AUTO, rss, CTLFLAG_RW, 0, "Receive-side steering");
+
+/*
+ * Toeplitz is the only required hash function in the RSS spec, so use it by
+ * default.
+ */
+static u_int	rss_hashalgo = RSS_HASH_TOEPLITZ;
+SYSCTL_INT(_net_inet_rss, OID_AUTO, hashalgo, CTLFLAG_RDTUN, &rss_hashalgo, 0,
+    "RSS hash algorithm");
+
+/*
+ * Size of the indirection table; at most 128 entries per the RSS spec.  We
+ * size it to at least 2 times the number of CPUs by default to allow useful
+ * rebalancing.  If not set explicitly with a loader tunable, we tune based
+ * on the number of CPUs present.
+ *
+ * XXXRW: buckets might be better to use for the tunable than bits.
+ */
+static u_int	rss_bits;
+SYSCTL_INT(_net_inet_rss, OID_AUTO, bits, CTLFLAG_RDTUN, &rss_bits, 0,
+    "RSS bits");
+
+static u_int	rss_mask;
+SYSCTL_INT(_net_inet_rss, OID_AUTO, mask, CTLFLAG_RD, &rss_mask, 0,
+    "RSS mask");
+
+static const u_int	rss_maxbits = RSS_MAXBITS;
+SYSCTL_INT(_net_inet_rss, OID_AUTO, maxbits, CTLFLAG_RD,
+    __DECONST(int *, &rss_maxbits), 0, "RSS maximum bits");
+
+/*
+ * RSS's own count of the number of CPUs it could be using for processing.
+ * Bounded to 64 by RSS constants.
+ */
+static u_int	rss_ncpus;
+SYSCTL_INT(_net_inet_rss, OID_AUTO, ncpus, CTLFLAG_RD, &rss_ncpus, 0,
+    "Number of CPUs available to RSS");
+
+#define	RSS_MAXCPUS	(1 << (RSS_MAXBITS - 1))
+static const u_int	rss_maxcpus = RSS_MAXCPUS;
+SYSCTL_INT(_net_inet_rss, OID_AUTO, maxcpus, CTLFLAG_RD,
+    __DECONST(int *, &rss_maxcpus), 0, "RSS maximum CPUs that can be used");
+
+/*
+ * Variable exists just for reporting rss_bits in a user-friendly way.
+ */
+static u_int	rss_buckets;
+SYSCTL_INT(_net_inet_rss, OID_AUTO, buckets, CTLFLAG_RD, &rss_buckets, 0,
+    "RSS buckets");
+
+/*
+ * Base CPU number; devices will add this to all CPU numbers returned by the
+ * RSS indirection table.  Currently unmodifable in FreeBSD.
+ */
+static const u_int	rss_basecpu;
+SYSCTL_INT(_net_inet_rss, OID_AUTO, basecpu, CTLFLAG_RD,
+    __DECONST(int *, &rss_basecpu), 0, "RSS base CPU");
+
+/*
+ * RSS secret key, intended to prevent attacks on load-balancing.  Its
+ * effectiveness may be limited by algorithm choice and available entropy
+ * during the boot.
+ *
+ * XXXRW: And that we don't randomize it yet!
+ *
+ * This is the default Microsoft RSS specification key which is also
+ * the Chelsio T5 firmware default key.
+ */
+static uint8_t rss_key[RSS_KEYSIZE] = {
+	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
+	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
+	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
+	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
+	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
+};
+
+/*
+ * RSS hash->CPU table, which maps hashed packet headers to particular CPUs.
+ * Drivers may supplement this table with a seperate CPU<->queue table when
+ * programming devices.
+ */
+struct rss_table_entry {
+	uint8_t		rte_cpu;	/* CPU affinity of bucket. */
+};
+static struct rss_table_entry	rss_table[RSS_TABLE_MAXLEN];
+
+static void
+rss_init(__unused void *arg)
+{
+	u_int i;
+	u_int cpuid;
+
+	/*
+	 * Validate tunables, coerce to sensible values.
+	 */
+	switch (rss_hashalgo) {
+	case RSS_HASH_TOEPLITZ:
+	case RSS_HASH_NAIVE:
+		break;
+
+	default:
+		printf("%s: invalid RSS hashalgo %u, coercing to %u",
+		    __func__, rss_hashalgo, RSS_HASH_TOEPLITZ);
+		rss_hashalgo = RSS_HASH_TOEPLITZ;
+	}
+
+	/*
+	 * Count available CPUs.
+	 *
+	 * XXXRW: Note incorrect assumptions regarding contiguity of this set
+	 * elsewhere.
+	 */
+	rss_ncpus = 0;
+	for (i = 0; i <= mp_maxid; i++) {
+		if (CPU_ABSENT(i))
+			continue;
+		rss_ncpus++;
+	}
+	if (rss_ncpus > RSS_MAXCPUS)
+		rss_ncpus = RSS_MAXCPUS;
+
+	/*
+	 * Tune RSS table entries to be no less than 2x the number of CPUs
+	 * -- unless we're running uniprocessor, in which case there's not
+	 * much point in having buckets to rearrange for load-balancing!
+	 */
+	if (rss_ncpus > 1) {
+		if (rss_bits == 0)
+			rss_bits = fls(rss_ncpus - 1) + 1;
+
+		/*
+		 * Microsoft limits RSS table entries to 128, so apply that
+		 * limit to both auto-detected CPU counts and user-configured
+		 * ones.
+		 */
+		if (rss_bits == 0 || rss_bits > RSS_MAXBITS) {
+			printf("%s: RSS bits %u not valid, coercing to  %u",
+			    __func__, rss_bits, RSS_MAXBITS);
+			rss_bits = RSS_MAXBITS;
+		}
+
+		/*
+		 * Figure out how many buckets to use; warn if less than the
+		 * number of configured CPUs, although this is not a fatal
+		 * problem.
+		 */
+		rss_buckets = (1 << rss_bits);
+		if (rss_buckets < rss_ncpus)
+			printf("%s: WARNING: rss_buckets (%u) less than "
+			    "rss_ncpus (%u)\n", __func__, rss_buckets,
+			    rss_ncpus);
+		rss_mask = rss_buckets - 1;
+	} else {
+		rss_bits = 0;
+		rss_buckets = 1;
+		rss_mask = 0;
+	}
+
+	/*
+	 * Set up initial CPU assignments: round-robin by default.
+	 */
+	cpuid = CPU_FIRST();
+	for (i = 0; i < rss_buckets; i++) {
+		rss_table[i].rte_cpu = cpuid;
+		cpuid = CPU_NEXT(cpuid);
+	}
+
+	/*
+	 * Randomize rrs_key.
+	 *
+	 * XXXRW: Not yet.  If nothing else, will require an rss_isbadkey()
+	 * loop to check for "bad" RSS keys.
+	 */
+}
+SYSINIT(rss_init, SI_SUB_SOFTINTR, SI_ORDER_SECOND, rss_init, NULL);
+
+static uint32_t
+rss_naive_hash(u_int keylen, const uint8_t *key, u_int datalen,
+    const uint8_t *data)
+{
+	uint32_t v;
+	u_int i;
+
+	v = 0;
+	for (i = 0; i < keylen; i++)
+		v += key[i];
+	for (i = 0; i < datalen; i++)
+		v += data[i];
+	return (v);
+}
+
+uint32_t
+rss_hash(u_int datalen, const uint8_t *data)
+{
+ 
+	switch (rss_hashalgo) {
+	case RSS_HASH_TOEPLITZ:
+		return (toeplitz_hash(sizeof(rss_key), rss_key, datalen,
+		    data));
+
+	case RSS_HASH_NAIVE:
+		return (rss_naive_hash(sizeof(rss_key), rss_key, datalen,
+		    data));
+
+	default:
+		panic("%s: unsupported/unknown hashalgo %d", __func__,
+		    rss_hashalgo);
+	}
+}
+
+/*
+ * Query the number of RSS bits in use.
+ */
+u_int
+rss_getbits(void)
+{
+
+	return (rss_bits);
+}
+
+/*
+ * Query the RSS bucket associated with an RSS hash.
+ */
+u_int
+rss_getbucket(u_int hash)
+{
+
+	return (hash & rss_mask);
+}
+
+/*
+ * Query the RSS layer bucket associated with the given
+ * entry in the RSS hash space.
+ *
+ * The RSS indirection table is 0 .. rss_buckets-1,
+ * covering the low 'rss_bits' of the total 128 slot
+ * RSS indirection table.  So just mask off rss_bits and
+ * return that.
+ *
+ * NIC drivers can then iterate over the 128 slot RSS
+ * indirection table and fetch which RSS bucket to
+ * map it to.  This will typically be a CPU queue
+ */
+u_int
+rss_get_indirection_to_bucket(u_int index)
+{
+
+	return (index & rss_mask);
+}
+
+/*
+ * Query the RSS CPU associated with an RSS bucket.
+ */
+u_int
+rss_getcpu(u_int bucket)
+{
+
+	return (rss_table[bucket].rte_cpu);
+}
+
+/*
+ * netisr CPU affinity lookup given just the hash and hashtype.
+ */
+u_int
+rss_hash2cpuid(uint32_t hash_val, uint32_t hash_type)
+{
+
+	switch (hash_type) {
+	case M_HASHTYPE_RSS_IPV4:
+	case M_HASHTYPE_RSS_TCP_IPV4:
+	case M_HASHTYPE_RSS_UDP_IPV4:
+	case M_HASHTYPE_RSS_IPV6:
+	case M_HASHTYPE_RSS_TCP_IPV6:
+	case M_HASHTYPE_RSS_UDP_IPV6:
+		return (rss_getcpu(rss_getbucket(hash_val)));
+	default:
+		return (NETISR_CPUID_NONE);
+	}
+}
+
+/*
+ * Query the RSS bucket associated with the given hash value and
+ * type.
+ */
+int
+rss_hash2bucket(uint32_t hash_val, uint32_t hash_type, uint32_t *bucket_id)
+{
+
+	switch (hash_type) {
+	case M_HASHTYPE_RSS_IPV4:
+	case M_HASHTYPE_RSS_TCP_IPV4:
+	case M_HASHTYPE_RSS_UDP_IPV4:
+	case M_HASHTYPE_RSS_IPV6:
+	case M_HASHTYPE_RSS_TCP_IPV6:
+	case M_HASHTYPE_RSS_UDP_IPV6:
+		*bucket_id = rss_getbucket(hash_val);
+		return (0);
+	default:
+		return (-1);
+	}
+}
+
+/*
+ * netisr CPU affinity lookup routine for use by protocols.
+ */
+struct mbuf *
+rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid)
+{
+
+	M_ASSERTPKTHDR(m);
+	*cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m));
+	return (m);
+}
+
+int
+rss_m2bucket(struct mbuf *m, uint32_t *bucket_id)
+{
+
+	M_ASSERTPKTHDR(m);
+
+	return(rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
+	    bucket_id));
+}
+
+/*
+ * Query the RSS hash algorithm.
+ */
+u_int
+rss_gethashalgo(void)
+{
+
+	return (rss_hashalgo);
+}
+
+/*
+ * Query the current RSS key; likely to be used by device drivers when
+ * configuring hardware RSS.  Caller must pass an array of size RSS_KEYSIZE.
+ *
+ * XXXRW: Perhaps we should do the accept-a-length-and-truncate thing?
+ */
+void
+rss_getkey(uint8_t *key)
+{
+
+	bcopy(rss_key, key, sizeof(rss_key));
+}
+
+/*
+ * Query the number of buckets; this may be used by both network device
+ * drivers, which will need to populate hardware shadows of the software
+ * indirection table, and the network stack itself (such as when deciding how
+ * many connection groups to allocate).
+ */
+u_int
+rss_getnumbuckets(void)
+{
+
+	return (rss_buckets);
+}
+
+/*
+ * Query the number of CPUs in use by RSS; may be useful to device drivers
+ * trying to figure out how to map a larger number of CPUs into a smaller
+ * number of receive queues.
+ */
+u_int
+rss_getnumcpus(void)
+{
+
+	return (rss_ncpus);
+}
+
+/*
+ * Return the supported RSS hash configuration.
+ *
+ * NICs should query this to determine what to configure in their redirection
+ * matching table.
+ */
+inline u_int
+rss_gethashconfig(void)
+{
+
+	/* Return 4-tuple for TCP; 2-tuple for others */
+	/*
+	 * UDP may fragment more often than TCP and thus we'll end up with
+	 * NICs returning 2-tuple fragments.
+	 * udp_init() and udplite_init() both currently initialise things
+	 * as 2-tuple.
+	 * So for now disable UDP 4-tuple hashing until all of the other
+	 * pieces are in place.
+	 */
+	return (
+	    RSS_HASHTYPE_RSS_IPV4
+	|    RSS_HASHTYPE_RSS_TCP_IPV4
+	|    RSS_HASHTYPE_RSS_IPV6
+	|    RSS_HASHTYPE_RSS_TCP_IPV6
+	|    RSS_HASHTYPE_RSS_IPV6_EX
+	|    RSS_HASHTYPE_RSS_TCP_IPV6_EX
+#if 0
+	|    RSS_HASHTYPE_RSS_UDP_IPV4
+	|    RSS_HASHTYPE_RSS_UDP_IPV4_EX
+	|    RSS_HASHTYPE_RSS_UDP_IPV6
+	|    RSS_HASHTYPE_RSS_UDP_IPV6_EX
+#endif
+	);
+}
+
+/*
+ * XXXRW: Confirm that sysctl -a won't dump this keying material, don't want
+ * it appearing in debugging output unnecessarily.
+ */
+static int
+sysctl_rss_key(SYSCTL_HANDLER_ARGS)
+{
+	uint8_t temp_rss_key[RSS_KEYSIZE];
+	int error;
+
+	error = priv_check(req->td, PRIV_NETINET_HASHKEY);
+	if (error)
+		return (error);
+
+	bcopy(rss_key, temp_rss_key, sizeof(temp_rss_key));
+	error = sysctl_handle_opaque(oidp, temp_rss_key,
+	    sizeof(temp_rss_key), req);
+	if (error)
+		return (error);
+	if (req->newptr != NULL) {
+		/* XXXRW: Not yet. */
+		return (EINVAL);
+	}
+	return (0);
+}
+SYSCTL_PROC(_net_inet_rss, OID_AUTO, key,
+    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_rss_key,
+    "", "RSS keying material");
+
+static int
+sysctl_rss_bucket_mapping(SYSCTL_HANDLER_ARGS)
+{
+	struct sbuf *sb;
+	int error;
+	int i;
+
+	error = 0;
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
+	sb = sbuf_new_for_sysctl(NULL, NULL, 512, req);
+	if (sb == NULL)
+		return (ENOMEM);
+	for (i = 0; i < rss_buckets; i++) {
+		sbuf_printf(sb, "%s%d:%d", i == 0 ? "" : " ",
+		    i,
+		    rss_getcpu(i));
+	}
+	error = sbuf_finish(sb);
+	sbuf_delete(sb);
+
+	return (error);
+}
+SYSCTL_PROC(_net_inet_rss, OID_AUTO, bucket_mapping,
+    CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
+    sysctl_rss_bucket_mapping, "", "RSS bucket -> CPU mapping");

Property changes on: head/sys/net/rss_config.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/sys/net/rss_config.h
===================================================================
--- head/sys/net/rss_config.h	(nonexistent)
+++ head/sys/net/rss_config.h	(revision 277331)
@@ -0,0 +1,123 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_RSS_CONFIG_H_
+#define	_NET_RSS_CONFIG_H_
+
+#include <netinet/in.h>		/* in_addr_t */
+
+/*
+ * Supported RSS hash functions.
+ */
+#define	RSS_HASH_NAIVE		0x00000001	/* Poor but fast hash. */
+#define	RSS_HASH_TOEPLITZ	0x00000002	/* Required by RSS. */
+#define	RSS_HASH_CRC32		0x00000004	/* Future; some NICs do it. */
+
+#define	RSS_HASH_MASK		(RSS_HASH_NAIVE | RSS_HASH_TOEPLITZ)
+
+/*
+ * Instances of struct inpcbinfo declare an RSS hash type indicating what
+ * header fields are covered.
+ */
+#define	RSS_HASHFIELDS_NONE		0
+#define	RSS_HASHFIELDS_4TUPLE		1
+#define	RSS_HASHFIELDS_2TUPLE		2
+
+/*
+ * Define RSS representations of the M_HASHTYPE_* values, representing
+ * which particular bits are supported.  The NICs can then use this to
+ * calculate which hash types to enable and which not to enable.
+ *
+ * The fact that these line up with M_HASHTYPE_* is not to be relied
+ * upon.
+ */
+#define	RSS_HASHTYPE_RSS_IPV4		(1 << 1)	/* IPv4 2-tuple */
+#define	RSS_HASHTYPE_RSS_TCP_IPV4	(1 << 2)	/* TCPv4 4-tuple */
+#define	RSS_HASHTYPE_RSS_IPV6		(1 << 3)	/* IPv6 2-tuple */
+#define	RSS_HASHTYPE_RSS_TCP_IPV6	(1 << 4)	/* TCPv6 4-tuple */
+#define	RSS_HASHTYPE_RSS_IPV6_EX	(1 << 5)	/* IPv6 2-tuple + ext hdrs */
+#define	RSS_HASHTYPE_RSS_TCP_IPV6_EX	(1 << 6)	/* TCPv6 4-tiple + ext hdrs */
+#define	RSS_HASHTYPE_RSS_UDP_IPV4	(1 << 7)	/* IPv4 UDP 4-tuple */
+#define	RSS_HASHTYPE_RSS_UDP_IPV4_EX	(1 << 8)	/* IPv4 UDP 4-tuple + ext hdrs */
+#define	RSS_HASHTYPE_RSS_UDP_IPV6	(1 << 9)	/* IPv6 UDP 4-tuple */
+#define	RSS_HASHTYPE_RSS_UDP_IPV6_EX	(1 << 10)	/* IPv6 UDP 4-tuple + ext hdrs */
+
+/*
+ * Compile-time limits on the size of the indirection table.
+ */
+#define	RSS_MAXBITS	7
+#define	RSS_TABLE_MAXLEN	(1 << RSS_MAXBITS)
+
+/*
+ * Maximum key size used throughout.  It's OK for hardware to use only the
+ * first 16 bytes, which is all that's required for IPv4.
+ */
+#define	RSS_KEYSIZE	40
+
+/*
+ * For RSS hash methods that do a software hash on an mbuf, the packet
+ * direction (ingress / egress) is required.
+ *
+ * The default direction (INGRESS) is the "receive into the NIC" - ie,
+ * what the hardware is hashing on.
+ */
+#define	RSS_HASH_PKT_INGRESS	0
+#define	RSS_HASH_PKT_EGRESS	1
+
+/*
+ * Device driver interfaces to query RSS properties that must be programmed
+ * into hardware.
+ */
+u_int	rss_getbits(void);
+u_int	rss_getbucket(u_int hash);
+u_int	rss_get_indirection_to_bucket(u_int index);
+u_int	rss_getcpu(u_int bucket);
+void	rss_getkey(uint8_t *key);
+u_int	rss_gethashalgo(void);
+u_int	rss_getnumbuckets(void);
+u_int	rss_getnumcpus(void);
+u_int	rss_gethashconfig(void);
+
+/*
+ * Hash calculation functions.
+ */
+uint32_t	rss_hash(u_int datalen, const uint8_t *data);
+
+/*
+ * Network stack interface to query desired CPU affinity of a packet.
+ */
+struct mbuf * rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid);
+u_int	rss_hash2cpuid(uint32_t hash_val, uint32_t hash_type);
+int	rss_hash2bucket(uint32_t hash_val, uint32_t hash_type,
+	    uint32_t *bucket_id);
+int	rss_m2bucket(struct mbuf *m, uint32_t *bucket_id);
+
+#endif /* !_NET_RSS_CONFIG_H_ */

Property changes on: head/sys/net/rss_config.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/sys/net/toeplitz.c
===================================================================
--- head/sys/net/toeplitz.c	(nonexistent)
+++ head/sys/net/toeplitz.c	(revision 277331)
@@ -0,0 +1,58 @@
+/*-
+ * Copyright (c) 2010 David Malone <dwmalone@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+
+#include <net/rss_config.h>
+#include <net/toeplitz.h>
+
+#include <sys/systm.h>
+
+uint32_t
+toeplitz_hash(u_int keylen, const uint8_t *key, u_int datalen,
+    const uint8_t *data)
+{
+	uint32_t hash = 0, v;
+	u_int i, b;
+
+	/* XXXRW: Perhaps an assertion about key length vs. data length? */
+
+	v = (key[0]<<24) + (key[1]<<16) + (key[2] <<8) + key[3];
+	for (i = 0; i < datalen; i++) {
+		for (b = 0; b < 8; b++) {
+			if (data[i] & (1<<(7-b)))
+				hash ^= v;
+			v <<= 1;
+			if ((i + 4) < RSS_KEYSIZE &&
+			    (key[i+4] & (1<<(7-b))))
+				v |= 1;
+		}
+	}
+	return (hash);
+}

Property changes on: head/sys/net/toeplitz.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/sys/net/toeplitz.h
===================================================================
--- head/sys/net/toeplitz.h	(nonexistent)
+++ head/sys/net/toeplitz.h	(revision 277331)
@@ -0,0 +1,40 @@
+/*-
+ * Copyright (c) 2010 David Malone <dwmalone@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TOEPLITZ_H_
+#define	_NETINET_TOEPLITZ_H_
+
+/*
+ * Toeplitz (RSS) hash algorithm; possibly we should cache intermediate
+ * results between runs, in which case we'll need explicit init/destroy and
+ * state management.
+ */
+uint32_t	toeplitz_hash(u_int keylen, const uint8_t *key,
+		    u_int datalen, const uint8_t *data);
+
+#endif /* !_NETINET_TOEPLITZ_H_ */

Property changes on: head/sys/net/toeplitz.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/sys/netinet/toeplitz.c
===================================================================
--- head/sys/netinet/toeplitz.c	(revision 277330)
+++ head/sys/netinet/toeplitz.c	(nonexistent)
@@ -1,58 +0,0 @@
-/*-
- * Copyright (c) 2010 David Malone <dwmalone@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/types.h>
-
-#include <netinet/in_rss.h>
-#include <netinet/toeplitz.h>
-
-#include <sys/systm.h>
-
-uint32_t
-toeplitz_hash(u_int keylen, const uint8_t *key, u_int datalen,
-    const uint8_t *data)
-{
-	uint32_t hash = 0, v;
-	u_int i, b;
-
-	/* XXXRW: Perhaps an assertion about key length vs. data length? */
-
-	v = (key[0]<<24) + (key[1]<<16) + (key[2] <<8) + key[3];
-	for (i = 0; i < datalen; i++) {
-		for (b = 0; b < 8; b++) {
-			if (data[i] & (1<<(7-b)))
-				hash ^= v;
-			v <<= 1;
-			if ((i + 4) < RSS_KEYSIZE &&
-			    (key[i+4] & (1<<(7-b))))
-				v |= 1;
-		}
-	}
-	return (hash);
-}

Property changes on: head/sys/netinet/toeplitz.c
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/netinet/toeplitz.h
===================================================================
--- head/sys/netinet/toeplitz.h	(revision 277330)
+++ head/sys/netinet/toeplitz.h	(nonexistent)
@@ -1,40 +0,0 @@
-/*-
- * Copyright (c) 2010 David Malone <dwmalone@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _NETINET_TOEPLITZ_H_
-#define	_NETINET_TOEPLITZ_H_
-
-/*
- * Toeplitz (RSS) hash algorithm; possibly we should cache intermediate
- * results between runs, in which case we'll need explicit init/destroy and
- * state management.
- */
-uint32_t	toeplitz_hash(u_int keylen, const uint8_t *key,
-		    u_int datalen, const uint8_t *data);
-
-#endif /* !_NETINET_TOEPLITZ_H_ */

Property changes on: head/sys/netinet/toeplitz.h
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/netinet/in_pcb.c
===================================================================
--- head/sys/netinet/in_pcb.c	(revision 277330)
+++ head/sys/netinet/in_pcb.c	(revision 277331)
@@ -1,2618 +1,2618 @@
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993, 1995
  *	The Regents of the University of California.
  * Copyright (c) 2007-2009 Robert N. M. Watson
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_ipsec.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_pcbgroup.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/callout.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/refcount.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/route.h>
+#include <net/rss_config.h>
 #include <net/vnet.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
-#include <netinet/in_rss.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #endif
 #ifdef INET
 #include <netinet/in_var.h>
 #endif
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #endif /* INET6 */
 
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/key.h>
 #endif /* IPSEC */
 
 #include <security/mac/mac_framework.h>
 
 static struct callout	ipport_tick_callout;
 
 /*
  * These configure the range of local port addresses assigned to
  * "unspecified" outgoing connections/packets/whatever.
  */
 VNET_DEFINE(int, ipport_lowfirstauto) = IPPORT_RESERVED - 1;	/* 1023 */
 VNET_DEFINE(int, ipport_lowlastauto) = IPPORT_RESERVEDSTART;	/* 600 */
 VNET_DEFINE(int, ipport_firstauto) = IPPORT_EPHEMERALFIRST;	/* 10000 */
 VNET_DEFINE(int, ipport_lastauto) = IPPORT_EPHEMERALLAST;	/* 65535 */
 VNET_DEFINE(int, ipport_hifirstauto) = IPPORT_HIFIRSTAUTO;	/* 49152 */
 VNET_DEFINE(int, ipport_hilastauto) = IPPORT_HILASTAUTO;	/* 65535 */
 
 /*
  * Reserved ports accessible only to root. There are significant
  * security considerations that must be accounted for when changing these,
  * but the security benefits can be great. Please be careful.
  */
 VNET_DEFINE(int, ipport_reservedhigh) = IPPORT_RESERVED - 1;	/* 1023 */
 VNET_DEFINE(int, ipport_reservedlow);
 
 /* Variables dealing with random ephemeral port allocation. */
 VNET_DEFINE(int, ipport_randomized) = 1;	/* user controlled via sysctl */
 VNET_DEFINE(int, ipport_randomcps) = 10;	/* user controlled via sysctl */
 VNET_DEFINE(int, ipport_randomtime) = 45;	/* user controlled via sysctl */
 VNET_DEFINE(int, ipport_stoprandom);		/* toggled by ipport_tick */
 VNET_DEFINE(int, ipport_tcpallocs);
 static VNET_DEFINE(int, ipport_tcplastcount);
 
 #define	V_ipport_tcplastcount		VNET(ipport_tcplastcount)
 
 static void	in_pcbremlists(struct inpcb *inp);
 #ifdef INET
 static struct inpcb	*in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo,
 			    struct in_addr faddr, u_int fport_arg,
 			    struct in_addr laddr, u_int lport_arg,
 			    int lookupflags, struct ifnet *ifp);
 
 #define RANGECHK(var, min, max) \
 	if ((var) < (min)) { (var) = (min); } \
 	else if ((var) > (max)) { (var) = (max); }
 
 static int
 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	error = sysctl_handle_int(oidp, arg1, arg2, req);
 	if (error == 0) {
 		RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
 		RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
 		RANGECHK(V_ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX);
 		RANGECHK(V_ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX);
 		RANGECHK(V_ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX);
 		RANGECHK(V_ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX);
 	}
 	return (error);
 }
 
 #undef RANGECHK
 
 static SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0,
     "IP Ports");
 
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
 	&VNET_NAME(ipport_lowfirstauto), 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
 	&VNET_NAME(ipport_lowlastauto), 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
 	&VNET_NAME(ipport_firstauto), 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
 	&VNET_NAME(ipport_lastauto), 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
 	&VNET_NAME(ipport_hifirstauto), 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
 	&VNET_NAME(ipport_hilastauto), 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh,
 	CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
 	&VNET_NAME(ipport_reservedhigh), 0, "");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
 	CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedlow), 0, "");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized,
 	CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(ipport_randomized), 0, "Enable random port allocation");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomcps,
 	CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(ipport_randomcps), 0, "Maximum number of random port "
 	"allocations before switching to a sequental one");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime,
 	CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(ipport_randomtime), 0,
 	"Minimum time to keep sequental port "
 	"allocation before switching to a random one");
 #endif /* INET */
 
 /*
  * in_pcb.c: manage the Protocol Control Blocks.
  *
  * NOTE: It is assumed that most of these functions will be called with
  * the pcbinfo lock held, and often, the inpcb lock held, as these utility
  * functions often modify hash chains or addresses in pcbs.
  */
 
 /*
  * Initialize an inpcbinfo -- we should be able to reduce the number of
  * arguments in time.
  */
 void
 in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
     struct inpcbhead *listhead, int hash_nelements, int porthash_nelements,
     char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini,
     uint32_t inpcbzone_flags, u_int hashfields)
 {
 
 	INP_INFO_LOCK_INIT(pcbinfo, name);
 	INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash");	/* XXXRW: argument? */
 #ifdef VIMAGE
 	pcbinfo->ipi_vnet = curvnet;
 #endif
 	pcbinfo->ipi_listhead = listhead;
 	LIST_INIT(pcbinfo->ipi_listhead);
 	pcbinfo->ipi_count = 0;
 	pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB,
 	    &pcbinfo->ipi_hashmask);
 	pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
 	    &pcbinfo->ipi_porthashmask);
 #ifdef PCBGROUP
 	in_pcbgroup_init(pcbinfo, hashfields, hash_nelements);
 #endif
 	pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb),
 	    NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR,
 	    inpcbzone_flags);
 	uma_zone_set_max(pcbinfo->ipi_zone, maxsockets);
 	uma_zone_set_warning(pcbinfo->ipi_zone,
 	    "kern.ipc.maxsockets limit reached");
 }
 
 /*
  * Destroy an inpcbinfo.
  */
 void
 in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
 {
 
 	KASSERT(pcbinfo->ipi_count == 0,
 	    ("%s: ipi_count = %u", __func__, pcbinfo->ipi_count));
 
 	hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
 	hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
 	    pcbinfo->ipi_porthashmask);
 #ifdef PCBGROUP
 	in_pcbgroup_destroy(pcbinfo);
 #endif
 	uma_zdestroy(pcbinfo->ipi_zone);
 	INP_HASH_LOCK_DESTROY(pcbinfo);
 	INP_INFO_LOCK_DESTROY(pcbinfo);
 }
 
 /*
  * Allocate a PCB and associate it with the socket.
  * On success return with the PCB locked.
  */
 int
 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
 {
 	struct inpcb *inp;
 	int error;
 
 	INP_INFO_WLOCK_ASSERT(pcbinfo);
 	error = 0;
 	inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
 	if (inp == NULL)
 		return (ENOBUFS);
 	bzero(inp, inp_zero_size);
 	inp->inp_pcbinfo = pcbinfo;
 	inp->inp_socket = so;
 	inp->inp_cred = crhold(so->so_cred);
 	inp->inp_inc.inc_fibnum = so->so_fibnum;
 #ifdef MAC
 	error = mac_inpcb_init(inp, M_NOWAIT);
 	if (error != 0)
 		goto out;
 	mac_inpcb_create(so, inp);
 #endif
 #ifdef IPSEC
 	error = ipsec_init_policy(so, &inp->inp_sp);
 	if (error != 0) {
 #ifdef MAC
 		mac_inpcb_destroy(inp);
 #endif
 		goto out;
 	}
 #endif /*IPSEC*/
 #ifdef INET6
 	if (INP_SOCKAF(so) == AF_INET6) {
 		inp->inp_vflag |= INP_IPV6PROTO;
 		if (V_ip6_v6only)
 			inp->inp_flags |= IN6P_IPV6_V6ONLY;
 	}
 #endif
 	LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
 	pcbinfo->ipi_count++;
 	so->so_pcb = (caddr_t)inp;
 #ifdef INET6
 	if (V_ip6_auto_flowlabel)
 		inp->inp_flags |= IN6P_AUTOFLOWLABEL;
 #endif
 	INP_WLOCK(inp);
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	refcount_init(&inp->inp_refcount, 1);	/* Reference from inpcbinfo */
 #if defined(IPSEC) || defined(MAC)
 out:
 	if (error != 0) {
 		crfree(inp->inp_cred);
 		uma_zfree(pcbinfo->ipi_zone, inp);
 	}
 #endif
 	return (error);
 }
 
 #ifdef INET
 int
 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
 {
 	int anonport, error;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 
 	if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
 		return (EINVAL);
 	anonport = nam == NULL || ((struct sockaddr_in *)nam)->sin_port == 0;
 	error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr,
 	    &inp->inp_lport, cred);
 	if (error)
 		return (error);
 	if (in_pcbinshash(inp) != 0) {
 		inp->inp_laddr.s_addr = INADDR_ANY;
 		inp->inp_lport = 0;
 		return (EAGAIN);
 	}
 	if (anonport)
 		inp->inp_flags |= INP_ANONPORT;
 	return (0);
 }
 #endif
 
 /*
  * Select a local port (number) to use.
  */
 #if defined(INET) || defined(INET6)
 int
 in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
     struct ucred *cred, int lookupflags)
 {
 	struct inpcbinfo *pcbinfo;
 	struct inpcb *tmpinp;
 	unsigned short *lastport;
 	int count, dorandom, error;
 	u_short aux, first, last, lport;
 #ifdef INET
 	struct in_addr laddr;
 #endif
 
 	pcbinfo = inp->inp_pcbinfo;
 
 	/*
 	 * Because no actual state changes occur here, a global write lock on
 	 * the pcbinfo isn't required.
 	 */
 	INP_LOCK_ASSERT(inp);
 	INP_HASH_LOCK_ASSERT(pcbinfo);
 
 	if (inp->inp_flags & INP_HIGHPORT) {
 		first = V_ipport_hifirstauto;	/* sysctl */
 		last  = V_ipport_hilastauto;
 		lastport = &pcbinfo->ipi_lasthi;
 	} else if (inp->inp_flags & INP_LOWPORT) {
 		error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
 		if (error)
 			return (error);
 		first = V_ipport_lowfirstauto;	/* 1023 */
 		last  = V_ipport_lowlastauto;	/* 600 */
 		lastport = &pcbinfo->ipi_lastlow;
 	} else {
 		first = V_ipport_firstauto;	/* sysctl */
 		last  = V_ipport_lastauto;
 		lastport = &pcbinfo->ipi_lastport;
 	}
 	/*
 	 * For UDP(-Lite), use random port allocation as long as the user
 	 * allows it.  For TCP (and as of yet unknown) connections,
 	 * use random port allocation only if the user allows it AND
 	 * ipport_tick() allows it.
 	 */
 	if (V_ipport_randomized &&
 		(!V_ipport_stoprandom || pcbinfo == &V_udbinfo ||
 		pcbinfo == &V_ulitecbinfo))
 		dorandom = 1;
 	else
 		dorandom = 0;
 	/*
 	 * It makes no sense to do random port allocation if
 	 * we have the only port available.
 	 */
 	if (first == last)
 		dorandom = 0;
 	/* Make sure to not include UDP(-Lite) packets in the count. */
 	if (pcbinfo != &V_udbinfo || pcbinfo != &V_ulitecbinfo)
 		V_ipport_tcpallocs++;
 	/*
 	 * Instead of having two loops further down counting up or down
 	 * make sure that first is always <= last and go with only one
 	 * code path implementing all logic.
 	 */
 	if (first > last) {
 		aux = first;
 		first = last;
 		last = aux;
 	}
 
 #ifdef INET
 	/* Make the compiler happy. */
 	laddr.s_addr = 0;
 	if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) {
 		KASSERT(laddrp != NULL, ("%s: laddrp NULL for v4 inp %p",
 		    __func__, inp));
 		laddr = *laddrp;
 	}
 #endif
 	tmpinp = NULL;	/* Make compiler happy. */
 	lport = *lportp;
 
 	if (dorandom)
 		*lastport = first + (arc4random() % (last - first));
 
 	count = last - first;
 
 	do {
 		if (count-- < 0)	/* completely used? */
 			return (EADDRNOTAVAIL);
 		++*lastport;
 		if (*lastport < first || *lastport > last)
 			*lastport = first;
 		lport = htons(*lastport);
 
 #ifdef INET6
 		if ((inp->inp_vflag & INP_IPV6) != 0)
 			tmpinp = in6_pcblookup_local(pcbinfo,
 			    &inp->in6p_laddr, lport, lookupflags, cred);
 #endif
 #if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
 			tmpinp = in_pcblookup_local(pcbinfo, laddr,
 			    lport, lookupflags, cred);
 #endif
 	} while (tmpinp != NULL);
 
 #ifdef INET
 	if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4)
 		laddrp->s_addr = laddr.s_addr;
 #endif
 	*lportp = lport;
 
 	return (0);
 }
 
 /*
  * Return cached socket options.
  */
 short
 inp_so_options(const struct inpcb *inp)
 {
    short so_options;
 
    so_options = 0;
 
    if ((inp->inp_flags2 & INP_REUSEPORT) != 0)
 	   so_options |= SO_REUSEPORT;
    if ((inp->inp_flags2 & INP_REUSEADDR) != 0)
 	   so_options |= SO_REUSEADDR;
    return (so_options);
 }
 #endif /* INET || INET6 */
 
 /*
  * Check if a new BINDMULTI socket is allowed to be created.
  *
  * ni points to the new inp.
  * oi points to the exisitng inp.
  *
  * This checks whether the existing inp also has BINDMULTI and
  * whether the credentials match.
  */
 int
 in_pcbbind_check_bindmulti(const struct inpcb *ni, const struct inpcb *oi)
 {
 	/* Check permissions match */
 	if ((ni->inp_flags2 & INP_BINDMULTI) &&
 	    (ni->inp_cred->cr_uid !=
 	    oi->inp_cred->cr_uid))
 		return (0);
 
 	/* Check the existing inp has BINDMULTI set */
 	if ((ni->inp_flags2 & INP_BINDMULTI) &&
 	    ((oi->inp_flags2 & INP_BINDMULTI) == 0))
 		return (0);
 
 	/*
 	 * We're okay - either INP_BINDMULTI isn't set on ni, or
 	 * it is and it matches the checks.
 	 */
 	return (1);
 }
 
 #ifdef INET
 /*
  * Set up a bind operation on a PCB, performing port allocation
  * as required, but do not actually modify the PCB. Callers can
  * either complete the bind by setting inp_laddr/inp_lport and
  * calling in_pcbinshash(), or they can just use the resulting
  * port and address to authorise the sending of a once-off packet.
  *
  * On error, the values of *laddrp and *lportp are not changed.
  */
 int
 in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
     u_short *lportp, struct ucred *cred)
 {
 	struct socket *so = inp->inp_socket;
 	struct sockaddr_in *sin;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct in_addr laddr;
 	u_short lport = 0;
 	int lookupflags = 0, reuseport = (so->so_options & SO_REUSEPORT);
 	int error;
 
 	/*
 	 * No state changes, so read locks are sufficient here.
 	 */
 	INP_LOCK_ASSERT(inp);
 	INP_HASH_LOCK_ASSERT(pcbinfo);
 
 	if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */
 		return (EADDRNOTAVAIL);
 	laddr.s_addr = *laddrp;
 	if (nam != NULL && laddr.s_addr != INADDR_ANY)
 		return (EINVAL);
 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
 		lookupflags = INPLOOKUP_WILDCARD;
 	if (nam == NULL) {
 		if ((error = prison_local_ip4(cred, &laddr)) != 0)
 			return (error);
 	} else {
 		sin = (struct sockaddr_in *)nam;
 		if (nam->sa_len != sizeof (*sin))
 			return (EINVAL);
 #ifdef notdef
 		/*
 		 * We should check the family, but old programs
 		 * incorrectly fail to initialize it.
 		 */
 		if (sin->sin_family != AF_INET)
 			return (EAFNOSUPPORT);
 #endif
 		error = prison_local_ip4(cred, &sin->sin_addr);
 		if (error)
 			return (error);
 		if (sin->sin_port != *lportp) {
 			/* Don't allow the port to change. */
 			if (*lportp != 0)
 				return (EINVAL);
 			lport = sin->sin_port;
 		}
 		/* NB: lport is left as 0 if the port isn't being changed. */
 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
 			/*
 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
 			 * allow complete duplication of binding if
 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
 			 * and a multicast address is bound on both
 			 * new and duplicated sockets.
 			 */
 			if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
 			sin->sin_port = 0;		/* yech... */
 			bzero(&sin->sin_zero, sizeof(sin->sin_zero));
 			/*
 			 * Is the address a local IP address? 
 			 * If INP_BINDANY is set, then the socket may be bound
 			 * to any endpoint address, local or not.
 			 */
 			if ((inp->inp_flags & INP_BINDANY) == 0 &&
 			    ifa_ifwithaddr_check((struct sockaddr *)sin) == 0) 
 				return (EADDRNOTAVAIL);
 		}
 		laddr = sin->sin_addr;
 		if (lport) {
 			struct inpcb *t;
 			struct tcptw *tw;
 
 			/* GROSS */
 			if (ntohs(lport) <= V_ipport_reservedhigh &&
 			    ntohs(lport) >= V_ipport_reservedlow &&
 			    priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT,
 			    0))
 				return (EACCES);
 			if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
 			    priv_check_cred(inp->inp_cred,
 			    PRIV_NETINET_REUSEPORT, 0) != 0) {
 				t = in_pcblookup_local(pcbinfo, sin->sin_addr,
 				    lport, INPLOOKUP_WILDCARD, cred);
 	/*
 	 * XXX
 	 * This entire block sorely needs a rewrite.
 	 */
 				if (t &&
 				    ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
 				    ((t->inp_flags & INP_TIMEWAIT) == 0) &&
 				    (so->so_type != SOCK_STREAM ||
 				     ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
 				    (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
 				     ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
 				     (t->inp_flags2 & INP_REUSEPORT) == 0) &&
 				    (inp->inp_cred->cr_uid !=
 				     t->inp_cred->cr_uid))
 					return (EADDRINUSE);
 
 				/*
 				 * If the socket is a BINDMULTI socket, then
 				 * the credentials need to match and the
 				 * original socket also has to have been bound
 				 * with BINDMULTI.
 				 */
 				if (t && (! in_pcbbind_check_bindmulti(inp, t)))
 					return (EADDRINUSE);
 			}
 			t = in_pcblookup_local(pcbinfo, sin->sin_addr,
 			    lport, lookupflags, cred);
 			if (t && (t->inp_flags & INP_TIMEWAIT)) {
 				/*
 				 * XXXRW: If an incpb has had its timewait
 				 * state recycled, we treat the address as
 				 * being in use (for now).  This is better
 				 * than a panic, but not desirable.
 				 */
 				tw = intotw(t);
 				if (tw == NULL ||
 				    (reuseport & tw->tw_so_options) == 0)
 					return (EADDRINUSE);
 			} else if (t &&
 			    ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
 			    (reuseport & inp_so_options(t)) == 0) {
 #ifdef INET6
 				if (ntohl(sin->sin_addr.s_addr) !=
 				    INADDR_ANY ||
 				    ntohl(t->inp_laddr.s_addr) !=
 				    INADDR_ANY ||
 				    (inp->inp_vflag & INP_IPV6PROTO) == 0 ||
 				    (t->inp_vflag & INP_IPV6PROTO) == 0)
 #endif
 				return (EADDRINUSE);
 				if (t && (! in_pcbbind_check_bindmulti(inp, t)))
 					return (EADDRINUSE);
 			}
 		}
 	}
 	if (*lportp != 0)
 		lport = *lportp;
 	if (lport == 0) {
 		error = in_pcb_lport(inp, &laddr, &lport, cred, lookupflags);
 		if (error != 0)
 			return (error);
 
 	}
 	*laddrp = laddr.s_addr;
 	*lportp = lport;
 	return (0);
 }
 
 /*
  * Connect from a socket to a specified address.
  * Both address and port must be specified in argument sin.
  * If don't have a local address for this socket yet,
  * then pick one.
  */
 int
 in_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
     struct ucred *cred, struct mbuf *m)
 {
 	u_short lport, fport;
 	in_addr_t laddr, faddr;
 	int anonport, error;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 
 	lport = inp->inp_lport;
 	laddr = inp->inp_laddr.s_addr;
 	anonport = (lport == 0);
 	error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport,
 	    NULL, cred);
 	if (error)
 		return (error);
 
 	/* Do the initial binding of the local address if required. */
 	if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
 		inp->inp_lport = lport;
 		inp->inp_laddr.s_addr = laddr;
 		if (in_pcbinshash(inp) != 0) {
 			inp->inp_laddr.s_addr = INADDR_ANY;
 			inp->inp_lport = 0;
 			return (EAGAIN);
 		}
 	}
 
 	/* Commit the remaining changes. */
 	inp->inp_lport = lport;
 	inp->inp_laddr.s_addr = laddr;
 	inp->inp_faddr.s_addr = faddr;
 	inp->inp_fport = fport;
 	in_pcbrehash_mbuf(inp, m);
 
 	if (anonport)
 		inp->inp_flags |= INP_ANONPORT;
 	return (0);
 }
 
 int
 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
 {
 
 	return (in_pcbconnect_mbuf(inp, nam, cred, NULL));
 }
 
 /*
  * Do proper source address selection on an unbound socket in case
  * of connect. Take jails into account as well.
  */
 int
 in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
     struct ucred *cred)
 {
 	struct ifaddr *ifa;
 	struct sockaddr *sa;
 	struct sockaddr_in *sin;
 	struct route sro;
 	int error;
 
 	KASSERT(laddr != NULL, ("%s: laddr NULL", __func__));
 
 	/*
 	 * Bypass source address selection and use the primary jail IP
 	 * if requested.
 	 */
 	if (cred != NULL && !prison_saddrsel_ip4(cred, laddr))
 		return (0);
 
 	error = 0;
 	bzero(&sro, sizeof(sro));
 
 	sin = (struct sockaddr_in *)&sro.ro_dst;
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(struct sockaddr_in);
 	sin->sin_addr.s_addr = faddr->s_addr;
 
 	/*
 	 * If route is known our src addr is taken from the i/f,
 	 * else punt.
 	 *
 	 * Find out route to destination.
 	 */
 	if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0)
 		in_rtalloc_ign(&sro, 0, inp->inp_inc.inc_fibnum);
 
 	/*
 	 * If we found a route, use the address corresponding to
 	 * the outgoing interface.
 	 * 
 	 * Otherwise assume faddr is reachable on a directly connected
 	 * network and try to find a corresponding interface to take
 	 * the source address from.
 	 */
 	if (sro.ro_rt == NULL || sro.ro_rt->rt_ifp == NULL) {
 		struct in_ifaddr *ia;
 		struct ifnet *ifp;
 
 		ia = ifatoia(ifa_ifwithdstaddr((struct sockaddr *)sin,
 					inp->inp_socket->so_fibnum));
 		if (ia == NULL)
 			ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin, 0,
 						inp->inp_socket->so_fibnum));
 		if (ia == NULL) {
 			error = ENETUNREACH;
 			goto done;
 		}
 
 		if (cred == NULL || !prison_flag(cred, PR_IP4)) {
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			ifa_free(&ia->ia_ifa);
 			goto done;
 		}
 
 		ifp = ia->ia_ifp;
 		ifa_free(&ia->ia_ifa);
 		ia = NULL;
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 
 			sa = ifa->ifa_addr;
 			if (sa->sa_family != AF_INET)
 				continue;
 			sin = (struct sockaddr_in *)sa;
 			if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
 				ia = (struct in_ifaddr *)ifa;
 				break;
 			}
 		}
 		if (ia != NULL) {
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			IF_ADDR_RUNLOCK(ifp);
 			goto done;
 		}
 		IF_ADDR_RUNLOCK(ifp);
 
 		/* 3. As a last resort return the 'default' jail address. */
 		error = prison_get_ip4(cred, laddr);
 		goto done;
 	}
 
 	/*
 	 * If the outgoing interface on the route found is not
 	 * a loopback interface, use the address from that interface.
 	 * In case of jails do those three steps:
 	 * 1. check if the interface address belongs to the jail. If so use it.
 	 * 2. check if we have any address on the outgoing interface
 	 *    belonging to this jail. If so use it.
 	 * 3. as a last resort return the 'default' jail address.
 	 */
 	if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) {
 		struct in_ifaddr *ia;
 		struct ifnet *ifp;
 
 		/* If not jailed, use the default returned. */
 		if (cred == NULL || !prison_flag(cred, PR_IP4)) {
 			ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			goto done;
 		}
 
 		/* Jailed. */
 		/* 1. Check if the iface address belongs to the jail. */
 		sin = (struct sockaddr_in *)sro.ro_rt->rt_ifa->ifa_addr;
 		if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
 			ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			goto done;
 		}
 
 		/*
 		 * 2. Check if we have any address on the outgoing interface
 		 *    belonging to this jail.
 		 */
 		ia = NULL;
 		ifp = sro.ro_rt->rt_ifp;
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			sa = ifa->ifa_addr;
 			if (sa->sa_family != AF_INET)
 				continue;
 			sin = (struct sockaddr_in *)sa;
 			if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
 				ia = (struct in_ifaddr *)ifa;
 				break;
 			}
 		}
 		if (ia != NULL) {
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			IF_ADDR_RUNLOCK(ifp);
 			goto done;
 		}
 		IF_ADDR_RUNLOCK(ifp);
 
 		/* 3. As a last resort return the 'default' jail address. */
 		error = prison_get_ip4(cred, laddr);
 		goto done;
 	}
 
 	/*
 	 * The outgoing interface is marked with 'loopback net', so a route
 	 * to ourselves is here.
 	 * Try to find the interface of the destination address and then
 	 * take the address from there. That interface is not necessarily
 	 * a loopback interface.
 	 * In case of jails, check that it is an address of the jail
 	 * and if we cannot find, fall back to the 'default' jail address.
 	 */
 	if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
 		struct sockaddr_in sain;
 		struct in_ifaddr *ia;
 
 		bzero(&sain, sizeof(struct sockaddr_in));
 		sain.sin_family = AF_INET;
 		sain.sin_len = sizeof(struct sockaddr_in);
 		sain.sin_addr.s_addr = faddr->s_addr;
 
 		ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sain),
 					inp->inp_socket->so_fibnum));
 		if (ia == NULL)
 			ia = ifatoia(ifa_ifwithnet(sintosa(&sain), 0,
 						inp->inp_socket->so_fibnum));
 		if (ia == NULL)
 			ia = ifatoia(ifa_ifwithaddr(sintosa(&sain)));
 
 		if (cred == NULL || !prison_flag(cred, PR_IP4)) {
 			if (ia == NULL) {
 				error = ENETUNREACH;
 				goto done;
 			}
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			ifa_free(&ia->ia_ifa);
 			goto done;
 		}
 
 		/* Jailed. */
 		if (ia != NULL) {
 			struct ifnet *ifp;
 
 			ifp = ia->ia_ifp;
 			ifa_free(&ia->ia_ifa);
 			ia = NULL;
 			IF_ADDR_RLOCK(ifp);
 			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 
 				sa = ifa->ifa_addr;
 				if (sa->sa_family != AF_INET)
 					continue;
 				sin = (struct sockaddr_in *)sa;
 				if (prison_check_ip4(cred,
 				    &sin->sin_addr) == 0) {
 					ia = (struct in_ifaddr *)ifa;
 					break;
 				}
 			}
 			if (ia != NULL) {
 				laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 				IF_ADDR_RUNLOCK(ifp);
 				goto done;
 			}
 			IF_ADDR_RUNLOCK(ifp);
 		}
 
 		/* 3. As a last resort return the 'default' jail address. */
 		error = prison_get_ip4(cred, laddr);
 		goto done;
 	}
 
 done:
 	if (sro.ro_rt != NULL)
 		RTFREE(sro.ro_rt);
 	return (error);
 }
 
 /*
  * Set up for a connect from a socket to the specified address.
  * On entry, *laddrp and *lportp should contain the current local
  * address and port for the PCB; these are updated to the values
  * that should be placed in inp_laddr and inp_lport to complete
  * the connect.
  *
  * On success, *faddrp and *fportp will be set to the remote address
  * and port. These are not updated in the error case.
  *
  * If the operation fails because the connection already exists,
  * *oinpp will be set to the PCB of that connection so that the
  * caller can decide to override it. In all other cases, *oinpp
  * is set to NULL.
  */
 int
 in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
     in_addr_t *laddrp, u_short *lportp, in_addr_t *faddrp, u_short *fportp,
     struct inpcb **oinpp, struct ucred *cred)
 {
 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
 	struct in_ifaddr *ia;
 	struct inpcb *oinp;
 	struct in_addr laddr, faddr;
 	u_short lport, fport;
 	int error;
 
 	/*
 	 * Because a global state change doesn't actually occur here, a read
 	 * lock is sufficient.
 	 */
 	INP_LOCK_ASSERT(inp);
 	INP_HASH_LOCK_ASSERT(inp->inp_pcbinfo);
 
 	if (oinpp != NULL)
 		*oinpp = NULL;
 	if (nam->sa_len != sizeof (*sin))
 		return (EINVAL);
 	if (sin->sin_family != AF_INET)
 		return (EAFNOSUPPORT);
 	if (sin->sin_port == 0)
 		return (EADDRNOTAVAIL);
 	laddr.s_addr = *laddrp;
 	lport = *lportp;
 	faddr = sin->sin_addr;
 	fport = sin->sin_port;
 
 	if (!TAILQ_EMPTY(&V_in_ifaddrhead)) {
 		/*
 		 * If the destination address is INADDR_ANY,
 		 * use the primary local address.
 		 * If the supplied address is INADDR_BROADCAST,
 		 * and the primary interface supports broadcast,
 		 * choose the broadcast address for that interface.
 		 */
 		if (faddr.s_addr == INADDR_ANY) {
 			IN_IFADDR_RLOCK();
 			faddr =
 			    IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr;
 			IN_IFADDR_RUNLOCK();
 			if (cred != NULL &&
 			    (error = prison_get_ip4(cred, &faddr)) != 0)
 				return (error);
 		} else if (faddr.s_addr == (u_long)INADDR_BROADCAST) {
 			IN_IFADDR_RLOCK();
 			if (TAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags &
 			    IFF_BROADCAST)
 				faddr = satosin(&TAILQ_FIRST(
 				    &V_in_ifaddrhead)->ia_broadaddr)->sin_addr;
 			IN_IFADDR_RUNLOCK();
 		}
 	}
 	if (laddr.s_addr == INADDR_ANY) {
 		error = in_pcbladdr(inp, &faddr, &laddr, cred);
 		/*
 		 * If the destination address is multicast and an outgoing
 		 * interface has been set as a multicast option, prefer the
 		 * address of that interface as our source address.
 		 */
 		if (IN_MULTICAST(ntohl(faddr.s_addr)) &&
 		    inp->inp_moptions != NULL) {
 			struct ip_moptions *imo;
 			struct ifnet *ifp;
 
 			imo = inp->inp_moptions;
 			if (imo->imo_multicast_ifp != NULL) {
 				ifp = imo->imo_multicast_ifp;
 				IN_IFADDR_RLOCK();
 				TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 					if ((ia->ia_ifp == ifp) &&
 					    (cred == NULL ||
 					    prison_check_ip4(cred,
 					    &ia->ia_addr.sin_addr) == 0))
 						break;
 				}
 				if (ia == NULL)
 					error = EADDRNOTAVAIL;
 				else {
 					laddr = ia->ia_addr.sin_addr;
 					error = 0;
 				}
 				IN_IFADDR_RUNLOCK();
 			}
 		}
 		if (error)
 			return (error);
 	}
 	oinp = in_pcblookup_hash_locked(inp->inp_pcbinfo, faddr, fport,
 	    laddr, lport, 0, NULL);
 	if (oinp != NULL) {
 		if (oinpp != NULL)
 			*oinpp = oinp;
 		return (EADDRINUSE);
 	}
 	if (lport == 0) {
 		error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport,
 		    cred);
 		if (error)
 			return (error);
 	}
 	*laddrp = laddr.s_addr;
 	*lportp = lport;
 	*faddrp = faddr.s_addr;
 	*fportp = fport;
 	return (0);
 }
 
 void
 in_pcbdisconnect(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 
 	inp->inp_faddr.s_addr = INADDR_ANY;
 	inp->inp_fport = 0;
 	in_pcbrehash(inp);
 }
 #endif /* INET */
 
 /*
  * in_pcbdetach() is responsibe for disassociating a socket from an inpcb.
  * For most protocols, this will be invoked immediately prior to calling
  * in_pcbfree().  However, with TCP the inpcb may significantly outlive the
  * socket, in which case in_pcbfree() is deferred.
  */
 void
 in_pcbdetach(struct inpcb *inp)
 {
 
 	KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__));
 
 	inp->inp_socket->so_pcb = NULL;
 	inp->inp_socket = NULL;
 }
 
 /*
  * in_pcbref() bumps the reference count on an inpcb in order to maintain
  * stability of an inpcb pointer despite the inpcb lock being released.  This
  * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded,
  * but where the inpcb lock may already held, or when acquiring a reference
  * via a pcbgroup.
  *
  * in_pcbref() should be used only to provide brief memory stability, and
  * must always be followed by a call to INP_WLOCK() and in_pcbrele() to
  * garbage collect the inpcb if it has been in_pcbfree()'d from another
  * context.  Until in_pcbrele() has returned that the inpcb is still valid,
  * lock and rele are the *only* safe operations that may be performed on the
  * inpcb.
  *
  * While the inpcb will not be freed, releasing the inpcb lock means that the
  * connection's state may change, so the caller should be careful to
  * revalidate any cached state on reacquiring the lock.  Drop the reference
  * using in_pcbrele().
  */
 void
 in_pcbref(struct inpcb *inp)
 {
 
 	KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
 
 	refcount_acquire(&inp->inp_refcount);
 }
 
 /*
  * Drop a refcount on an inpcb elevated using in_pcbref(); because a call to
  * in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we
  * return a flag indicating whether or not the inpcb remains valid.  If it is
  * valid, we return with the inpcb lock held.
  *
  * Notice that, unlike in_pcbref(), the inpcb lock must be held to drop a
  * reference on an inpcb.  Historically more work was done here (actually, in
  * in_pcbfree_internal()) but has been moved to in_pcbfree() to avoid the
  * need for the pcbinfo lock in in_pcbrele().  Deferring the free is entirely
  * about memory stability (and continued use of the write lock).
  */
 int
 in_pcbrele_rlocked(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo;
 
 	KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
 
 	INP_RLOCK_ASSERT(inp);
 
 	if (refcount_release(&inp->inp_refcount) == 0) {
 		/*
 		 * If the inpcb has been freed, let the caller know, even if
 		 * this isn't the last reference.
 		 */
 		if (inp->inp_flags2 & INP_FREED) {
 			INP_RUNLOCK(inp);
 			return (1);
 		}
 		return (0);
 	}
 
 	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
 
 	INP_RUNLOCK(inp);
 	pcbinfo = inp->inp_pcbinfo;
 	uma_zfree(pcbinfo->ipi_zone, inp);
 	return (1);
 }
 
 int
 in_pcbrele_wlocked(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo;
 
 	KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
 
 	INP_WLOCK_ASSERT(inp);
 
 	if (refcount_release(&inp->inp_refcount) == 0)
 		return (0);
 
 	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
 
 	INP_WUNLOCK(inp);
 	pcbinfo = inp->inp_pcbinfo;
 	uma_zfree(pcbinfo->ipi_zone, inp);
 	return (1);
 }
 
 /*
  * Temporary wrapper.
  */
 int
 in_pcbrele(struct inpcb *inp)
 {
 
 	return (in_pcbrele_wlocked(inp));
 }
 
 /*
  * Unconditionally schedule an inpcb to be freed by decrementing its
  * reference count, which should occur only after the inpcb has been detached
  * from its socket.  If another thread holds a temporary reference (acquired
  * using in_pcbref()) then the free is deferred until that reference is
  * released using in_pcbrele(), but the inpcb is still unlocked.  Almost all
  * work, including removal from global lists, is done in this context, where
  * the pcbinfo lock is held.
  */
 void
 in_pcbfree(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 
 	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
 
 	INP_INFO_WLOCK_ASSERT(pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	/* XXXRW: Do as much as possible here. */
 #ifdef IPSEC
 	if (inp->inp_sp != NULL)
 		ipsec_delete_pcbpolicy(inp);
 #endif
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	in_pcbremlists(inp);
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6PROTO) {
 		ip6_freepcbopts(inp->in6p_outputopts);
 		if (inp->in6p_moptions != NULL)
 			ip6_freemoptions(inp->in6p_moptions);
 	}
 #endif
 	if (inp->inp_options)
 		(void)m_free(inp->inp_options);
 #ifdef INET
 	if (inp->inp_moptions != NULL)
 		inp_freemoptions(inp->inp_moptions);
 #endif
 	inp->inp_vflag = 0;
 	inp->inp_flags2 |= INP_FREED;
 	crfree(inp->inp_cred);
 #ifdef MAC
 	mac_inpcb_destroy(inp);
 #endif
 	if (!in_pcbrele_wlocked(inp))
 		INP_WUNLOCK(inp);
 }
 
 /*
  * in_pcbdrop() removes an inpcb from hashed lists, releasing its address and
  * port reservation, and preventing it from being returned by inpcb lookups.
  *
  * It is used by TCP to mark an inpcb as unused and avoid future packet
  * delivery or event notification when a socket remains open but TCP has
  * closed.  This might occur as a result of a shutdown()-initiated TCP close
  * or a RST on the wire, and allows the port binding to be reused while still
  * maintaining the invariant that so_pcb always points to a valid inpcb until
  * in_pcbdetach().
  *
  * XXXRW: Possibly in_pcbdrop() should also prevent future notifications by
  * in_pcbnotifyall() and in_pcbpurgeif0()?
  */
 void
 in_pcbdrop(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * XXXRW: Possibly we should protect the setting of INP_DROPPED with
 	 * the hash lock...?
 	 */
 	inp->inp_flags |= INP_DROPPED;
 	if (inp->inp_flags & INP_INHASHLIST) {
 		struct inpcbport *phd = inp->inp_phd;
 
 		INP_HASH_WLOCK(inp->inp_pcbinfo);
 		LIST_REMOVE(inp, inp_hash);
 		LIST_REMOVE(inp, inp_portlist);
 		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
 			LIST_REMOVE(phd, phd_hash);
 			free(phd, M_PCB);
 		}
 		INP_HASH_WUNLOCK(inp->inp_pcbinfo);
 		inp->inp_flags &= ~INP_INHASHLIST;
 #ifdef PCBGROUP
 		in_pcbgroup_remove(inp);
 #endif
 	}
 }
 
 #ifdef INET
 /*
  * Common routines to return the socket addresses associated with inpcbs.
  */
 struct sockaddr *
 in_sockaddr(in_port_t port, struct in_addr *addr_p)
 {
 	struct sockaddr_in *sin;
 
 	sin = malloc(sizeof *sin, M_SONAME,
 		M_WAITOK | M_ZERO);
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = *addr_p;
 	sin->sin_port = port;
 
 	return (struct sockaddr *)sin;
 }
 
 int
 in_getsockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct inpcb *inp;
 	struct in_addr addr;
 	in_port_t port;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL"));
 
 	INP_RLOCK(inp);
 	port = inp->inp_lport;
 	addr = inp->inp_laddr;
 	INP_RUNLOCK(inp);
 
 	*nam = in_sockaddr(port, &addr);
 	return 0;
 }
 
 int
 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct inpcb *inp;
 	struct in_addr addr;
 	in_port_t port;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL"));
 
 	INP_RLOCK(inp);
 	port = inp->inp_fport;
 	addr = inp->inp_faddr;
 	INP_RUNLOCK(inp);
 
 	*nam = in_sockaddr(port, &addr);
 	return 0;
 }
 
 void
 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno,
     struct inpcb *(*notify)(struct inpcb *, int))
 {
 	struct inpcb *inp, *inp_temp;
 
 	INP_INFO_WLOCK(pcbinfo);
 	LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
 		INP_WLOCK(inp);
 #ifdef INET6
 		if ((inp->inp_vflag & INP_IPV4) == 0) {
 			INP_WUNLOCK(inp);
 			continue;
 		}
 #endif
 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
 		    inp->inp_socket == NULL) {
 			INP_WUNLOCK(inp);
 			continue;
 		}
 		if ((*notify)(inp, errno))
 			INP_WUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(pcbinfo);
 }
 
 void
 in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
 {
 	struct inpcb *inp;
 	struct ip_moptions *imo;
 	int i, gap;
 
 	INP_INFO_RLOCK(pcbinfo);
 	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
 		INP_WLOCK(inp);
 		imo = inp->inp_moptions;
 		if ((inp->inp_vflag & INP_IPV4) &&
 		    imo != NULL) {
 			/*
 			 * Unselect the outgoing interface if it is being
 			 * detached.
 			 */
 			if (imo->imo_multicast_ifp == ifp)
 				imo->imo_multicast_ifp = NULL;
 
 			/*
 			 * Drop multicast group membership if we joined
 			 * through the interface being detached.
 			 */
 			for (i = 0, gap = 0; i < imo->imo_num_memberships;
 			    i++) {
 				if (imo->imo_membership[i]->inm_ifp == ifp) {
 					in_delmulti(imo->imo_membership[i]);
 					gap++;
 				} else if (gap != 0)
 					imo->imo_membership[i - gap] =
 					    imo->imo_membership[i];
 			}
 			imo->imo_num_memberships -= gap;
 		}
 		INP_WUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(pcbinfo);
 }
 
 /*
  * Lookup a PCB based on the local address and port.  Caller must hold the
  * hash lock.  No inpcb locks or references are acquired.
  */
 #define INP_LOOKUP_MAPPED_PCB_COST	3
 struct inpcb *
 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
     u_short lport, int lookupflags, struct ucred *cred)
 {
 	struct inpcb *inp;
 #ifdef INET6
 	int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST;
 #else
 	int matchwild = 3;
 #endif
 	int wildcard;
 
 	KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 
 	INP_HASH_LOCK_ASSERT(pcbinfo);
 
 	if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
 		struct inpcbhead *head;
 		/*
 		 * Look for an unconnected (wildcard foreign addr) PCB that
 		 * matches the local address and port we're looking for.
 		 */
 		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
 		    0, pcbinfo->ipi_hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 #ifdef INET6
 			/* XXX inp locking */
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
 			    inp->inp_laddr.s_addr == laddr.s_addr &&
 			    inp->inp_lport == lport) {
 				/*
 				 * Found?
 				 */
 				if (cred == NULL ||
 				    prison_equal_ip4(cred->cr_prison,
 					inp->inp_cred->cr_prison))
 					return (inp);
 			}
 		}
 		/*
 		 * Not found.
 		 */
 		return (NULL);
 	} else {
 		struct inpcbporthead *porthash;
 		struct inpcbport *phd;
 		struct inpcb *match = NULL;
 		/*
 		 * Best fit PCB lookup.
 		 *
 		 * First see if this local port is in use by looking on the
 		 * port hash list.
 		 */
 		porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
 		    pcbinfo->ipi_porthashmask)];
 		LIST_FOREACH(phd, porthash, phd_hash) {
 			if (phd->phd_port == lport)
 				break;
 		}
 		if (phd != NULL) {
 			/*
 			 * Port is in use by one or more PCBs. Look for best
 			 * fit.
 			 */
 			LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
 				wildcard = 0;
 				if (cred != NULL &&
 				    !prison_equal_ip4(inp->inp_cred->cr_prison,
 					cred->cr_prison))
 					continue;
 #ifdef INET6
 				/* XXX inp locking */
 				if ((inp->inp_vflag & INP_IPV4) == 0)
 					continue;
 				/*
 				 * We never select the PCB that has
 				 * INP_IPV6 flag and is bound to :: if
 				 * we have another PCB which is bound
 				 * to 0.0.0.0.  If a PCB has the
 				 * INP_IPV6 flag, then we set its cost
 				 * higher than IPv4 only PCBs.
 				 *
 				 * Note that the case only happens
 				 * when a socket is bound to ::, under
 				 * the condition that the use of the
 				 * mapped address is allowed.
 				 */
 				if ((inp->inp_vflag & INP_IPV6) != 0)
 					wildcard += INP_LOOKUP_MAPPED_PCB_COST;
 #endif
 				if (inp->inp_faddr.s_addr != INADDR_ANY)
 					wildcard++;
 				if (inp->inp_laddr.s_addr != INADDR_ANY) {
 					if (laddr.s_addr == INADDR_ANY)
 						wildcard++;
 					else if (inp->inp_laddr.s_addr != laddr.s_addr)
 						continue;
 				} else {
 					if (laddr.s_addr != INADDR_ANY)
 						wildcard++;
 				}
 				if (wildcard < matchwild) {
 					match = inp;
 					matchwild = wildcard;
 					if (matchwild == 0)
 						break;
 				}
 			}
 		}
 		return (match);
 	}
 }
 #undef INP_LOOKUP_MAPPED_PCB_COST
 
 #ifdef PCBGROUP
 /*
  * Lookup PCB in hash list, using pcbgroup tables.
  */
 static struct inpcb *
 in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
     struct in_addr faddr, u_int fport_arg, struct in_addr laddr,
     u_int lport_arg, int lookupflags, struct ifnet *ifp)
 {
 	struct inpcbhead *head;
 	struct inpcb *inp, *tmpinp;
 	u_short fport = fport_arg, lport = lport_arg;
 
 	/*
 	 * First look for an exact match.
 	 */
 	tmpinp = NULL;
 	INP_GROUP_LOCK(pcbgroup);
 	head = &pcbgroup->ipg_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
 	    pcbgroup->ipg_hashmask)];
 	LIST_FOREACH(inp, head, inp_pcbgrouphash) {
 #ifdef INET6
 		/* XXX inp locking */
 		if ((inp->inp_vflag & INP_IPV4) == 0)
 			continue;
 #endif
 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
 		    inp->inp_laddr.s_addr == laddr.s_addr &&
 		    inp->inp_fport == fport &&
 		    inp->inp_lport == lport) {
 			/*
 			 * XXX We should be able to directly return
 			 * the inp here, without any checks.
 			 * Well unless both bound with SO_REUSEPORT?
 			 */
 			if (prison_flag(inp->inp_cred, PR_IP4))
 				goto found;
 			if (tmpinp == NULL)
 				tmpinp = inp;
 		}
 	}
 	if (tmpinp != NULL) {
 		inp = tmpinp;
 		goto found;
 	}
 
 #ifdef	RSS
 	/*
 	 * For incoming connections, we may wish to do a wildcard
 	 * match for an RSS-local socket.
 	 */
 	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
 		struct inpcb *local_wild = NULL, *local_exact = NULL;
 #ifdef INET6
 		struct inpcb *local_wild_mapped = NULL;
 #endif
 		struct inpcb *jail_wild = NULL;
 		struct inpcbhead *head;
 		int injail;
 
 		/*
 		 * Order of socket selection - we always prefer jails.
 		 *      1. jailed, non-wild.
 		 *      2. jailed, wild.
 		 *      3. non-jailed, non-wild.
 		 *      4. non-jailed, wild.
 		 */
 
 		head = &pcbgroup->ipg_hashbase[INP_PCBHASH(INADDR_ANY,
 		    lport, 0, pcbgroup->ipg_hashmask)];
 		LIST_FOREACH(inp, head, inp_pcbgrouphash) {
 #ifdef INET6
 			/* XXX inp locking */
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_faddr.s_addr != INADDR_ANY ||
 			    inp->inp_lport != lport)
 				continue;
 
 			injail = prison_flag(inp->inp_cred, PR_IP4);
 			if (injail) {
 				if (prison_check_ip4(inp->inp_cred,
 				    &laddr) != 0)
 					continue;
 			} else {
 				if (local_exact != NULL)
 					continue;
 			}
 
 			if (inp->inp_laddr.s_addr == laddr.s_addr) {
 				if (injail)
 					goto found;
 				else
 					local_exact = inp;
 			} else if (inp->inp_laddr.s_addr == INADDR_ANY) {
 #ifdef INET6
 				/* XXX inp locking, NULL check */
 				if (inp->inp_vflag & INP_IPV6PROTO)
 					local_wild_mapped = inp;
 				else
 #endif
 					if (injail)
 						jail_wild = inp;
 					else
 						local_wild = inp;
 			}
 		} /* LIST_FOREACH */
 
 		inp = jail_wild;
 		if (inp == NULL)
 			inp = local_exact;
 		if (inp == NULL)
 			inp = local_wild;
 #ifdef INET6
 		if (inp == NULL)
 			inp = local_wild_mapped;
 #endif
 		if (inp != NULL)
 			goto found;
 	}
 #endif
 
 	/*
 	 * Then look for a wildcard match, if requested.
 	 */
 	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
 		struct inpcb *local_wild = NULL, *local_exact = NULL;
 #ifdef INET6
 		struct inpcb *local_wild_mapped = NULL;
 #endif
 		struct inpcb *jail_wild = NULL;
 		struct inpcbhead *head;
 		int injail;
 
 		/*
 		 * Order of socket selection - we always prefer jails.
 		 *      1. jailed, non-wild.
 		 *      2. jailed, wild.
 		 *      3. non-jailed, non-wild.
 		 *      4. non-jailed, wild.
 		 */
 		head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
 		    0, pcbinfo->ipi_wildmask)];
 		LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
 #ifdef INET6
 			/* XXX inp locking */
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_faddr.s_addr != INADDR_ANY ||
 			    inp->inp_lport != lport)
 				continue;
 
 			injail = prison_flag(inp->inp_cred, PR_IP4);
 			if (injail) {
 				if (prison_check_ip4(inp->inp_cred,
 				    &laddr) != 0)
 					continue;
 			} else {
 				if (local_exact != NULL)
 					continue;
 			}
 
 			if (inp->inp_laddr.s_addr == laddr.s_addr) {
 				if (injail)
 					goto found;
 				else
 					local_exact = inp;
 			} else if (inp->inp_laddr.s_addr == INADDR_ANY) {
 #ifdef INET6
 				/* XXX inp locking, NULL check */
 				if (inp->inp_vflag & INP_IPV6PROTO)
 					local_wild_mapped = inp;
 				else
 #endif
 					if (injail)
 						jail_wild = inp;
 					else
 						local_wild = inp;
 			}
 		} /* LIST_FOREACH */
 		inp = jail_wild;
 		if (inp == NULL)
 			inp = local_exact;
 		if (inp == NULL)
 			inp = local_wild;
 #ifdef INET6
 		if (inp == NULL)
 			inp = local_wild_mapped;
 #endif
 		if (inp != NULL)
 			goto found;
 	} /* if (lookupflags & INPLOOKUP_WILDCARD) */
 	INP_GROUP_UNLOCK(pcbgroup);
 	return (NULL);
 
 found:
 	in_pcbref(inp);
 	INP_GROUP_UNLOCK(pcbgroup);
 	if (lookupflags & INPLOOKUP_WLOCKPCB) {
 		INP_WLOCK(inp);
 		if (in_pcbrele_wlocked(inp))
 			return (NULL);
 	} else if (lookupflags & INPLOOKUP_RLOCKPCB) {
 		INP_RLOCK(inp);
 		if (in_pcbrele_rlocked(inp))
 			return (NULL);
 	} else
 		panic("%s: locking bug", __func__);
 	return (inp);
 }
 #endif /* PCBGROUP */
 
 /*
  * Lookup PCB in hash list, using pcbinfo tables.  This variation assumes
  * that the caller has locked the hash list, and will not perform any further
  * locking or reference operations on either the hash list or the connection.
  */
 static struct inpcb *
 in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags,
     struct ifnet *ifp)
 {
 	struct inpcbhead *head;
 	struct inpcb *inp, *tmpinp;
 	u_short fport = fport_arg, lport = lport_arg;
 
 	KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 
 	INP_HASH_LOCK_ASSERT(pcbinfo);
 
 	/*
 	 * First look for an exact match.
 	 */
 	tmpinp = NULL;
 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
 	    pcbinfo->ipi_hashmask)];
 	LIST_FOREACH(inp, head, inp_hash) {
 #ifdef INET6
 		/* XXX inp locking */
 		if ((inp->inp_vflag & INP_IPV4) == 0)
 			continue;
 #endif
 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
 		    inp->inp_laddr.s_addr == laddr.s_addr &&
 		    inp->inp_fport == fport &&
 		    inp->inp_lport == lport) {
 			/*
 			 * XXX We should be able to directly return
 			 * the inp here, without any checks.
 			 * Well unless both bound with SO_REUSEPORT?
 			 */
 			if (prison_flag(inp->inp_cred, PR_IP4))
 				return (inp);
 			if (tmpinp == NULL)
 				tmpinp = inp;
 		}
 	}
 	if (tmpinp != NULL)
 		return (tmpinp);
 
 	/*
 	 * Then look for a wildcard match, if requested.
 	 */
 	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
 		struct inpcb *local_wild = NULL, *local_exact = NULL;
 #ifdef INET6
 		struct inpcb *local_wild_mapped = NULL;
 #endif
 		struct inpcb *jail_wild = NULL;
 		int injail;
 
 		/*
 		 * Order of socket selection - we always prefer jails.
 		 *      1. jailed, non-wild.
 		 *      2. jailed, wild.
 		 *      3. non-jailed, non-wild.
 		 *      4. non-jailed, wild.
 		 */
 
 		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
 		    0, pcbinfo->ipi_hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 #ifdef INET6
 			/* XXX inp locking */
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_faddr.s_addr != INADDR_ANY ||
 			    inp->inp_lport != lport)
 				continue;
 
 			injail = prison_flag(inp->inp_cred, PR_IP4);
 			if (injail) {
 				if (prison_check_ip4(inp->inp_cred,
 				    &laddr) != 0)
 					continue;
 			} else {
 				if (local_exact != NULL)
 					continue;
 			}
 
 			if (inp->inp_laddr.s_addr == laddr.s_addr) {
 				if (injail)
 					return (inp);
 				else
 					local_exact = inp;
 			} else if (inp->inp_laddr.s_addr == INADDR_ANY) {
 #ifdef INET6
 				/* XXX inp locking, NULL check */
 				if (inp->inp_vflag & INP_IPV6PROTO)
 					local_wild_mapped = inp;
 				else
 #endif
 					if (injail)
 						jail_wild = inp;
 					else
 						local_wild = inp;
 			}
 		} /* LIST_FOREACH */
 		if (jail_wild != NULL)
 			return (jail_wild);
 		if (local_exact != NULL)
 			return (local_exact);
 		if (local_wild != NULL)
 			return (local_wild);
 #ifdef INET6
 		if (local_wild_mapped != NULL)
 			return (local_wild_mapped);
 #endif
 	} /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
 
 	return (NULL);
 }
 
 /*
  * Lookup PCB in hash list, using pcbinfo tables.  This variation locks the
  * hash list lock, and will return the inpcb locked (i.e., requires
  * INPLOOKUP_LOCKPCB).
  */
 static struct inpcb *
 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
     struct ifnet *ifp)
 {
 	struct inpcb *inp;
 
 	INP_HASH_RLOCK(pcbinfo);
 	inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
 	    (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
 	if (inp != NULL) {
 		in_pcbref(inp);
 		INP_HASH_RUNLOCK(pcbinfo);
 		if (lookupflags & INPLOOKUP_WLOCKPCB) {
 			INP_WLOCK(inp);
 			if (in_pcbrele_wlocked(inp))
 				return (NULL);
 		} else if (lookupflags & INPLOOKUP_RLOCKPCB) {
 			INP_RLOCK(inp);
 			if (in_pcbrele_rlocked(inp))
 				return (NULL);
 		} else
 			panic("%s: locking bug", __func__);
 	} else
 		INP_HASH_RUNLOCK(pcbinfo);
 	return (inp);
 }
 
 /*
  * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
  * from which a pre-calculated hash value may be extracted.
  *
  * Possibly more of this logic should be in in_pcbgroup.c.
  */
 struct inpcb *
 in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport,
     struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp)
 {
 #if defined(PCBGROUP) && !defined(RSS)
 	struct inpcbgroup *pcbgroup;
 #endif
 
 	KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 	KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
 	    ("%s: LOCKPCB not set", __func__));
 
 	/*
 	 * When not using RSS, use connection groups in preference to the
 	 * reservation table when looking up 4-tuples.  When using RSS, just
 	 * use the reservation table, due to the cost of the Toeplitz hash
 	 * in software.
 	 *
 	 * XXXRW: This policy belongs in the pcbgroup code, as in principle
 	 * we could be doing RSS with a non-Toeplitz hash that is affordable
 	 * in software.
 	 */
 #if defined(PCBGROUP) && !defined(RSS)
 	if (in_pcbgroup_enabled(pcbinfo)) {
 		pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
 		    fport);
 		return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
 		    laddr, lport, lookupflags, ifp));
 	}
 #endif
 	return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
 	    lookupflags, ifp));
 }
 
 struct inpcb *
 in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
     struct ifnet *ifp, struct mbuf *m)
 {
 #ifdef PCBGROUP
 	struct inpcbgroup *pcbgroup;
 #endif
 
 	KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 	KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
 	    ("%s: LOCKPCB not set", __func__));
 
 #ifdef PCBGROUP
 	/*
 	 * If we can use a hardware-generated hash to look up the connection
 	 * group, use that connection group to find the inpcb.  Otherwise
 	 * fall back on a software hash -- or the reservation table if we're
 	 * using RSS.
 	 *
 	 * XXXRW: As above, that policy belongs in the pcbgroup code.
 	 */
 	if (in_pcbgroup_enabled(pcbinfo) &&
 	    !(M_HASHTYPE_TEST(m, M_HASHTYPE_NONE))) {
 		pcbgroup = in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
 		    m->m_pkthdr.flowid);
 		if (pcbgroup != NULL)
 			return (in_pcblookup_group(pcbinfo, pcbgroup, faddr,
 			    fport, laddr, lport, lookupflags, ifp));
 #ifndef RSS
 		pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
 		    fport);
 		return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
 		    laddr, lport, lookupflags, ifp));
 #endif
 	}
 #endif
 	return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
 	    lookupflags, ifp));
 }
 #endif /* INET */
 
 /*
  * Insert PCB onto various hash lists.
  */
 static int
 in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
 {
 	struct inpcbhead *pcbhash;
 	struct inpcbporthead *pcbporthash;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbport *phd;
 	u_int32_t hashkey_faddr;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 
 	KASSERT((inp->inp_flags & INP_INHASHLIST) == 0,
 	    ("in_pcbinshash: INP_INHASHLIST"));
 
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6)
 		hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr);
 	else
 #endif
 	hashkey_faddr = inp->inp_faddr.s_addr;
 
 	pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
 		 inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
 
 	pcbporthash = &pcbinfo->ipi_porthashbase[
 	    INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
 
 	/*
 	 * Go through port list and look for a head for this lport.
 	 */
 	LIST_FOREACH(phd, pcbporthash, phd_hash) {
 		if (phd->phd_port == inp->inp_lport)
 			break;
 	}
 	/*
 	 * If none exists, malloc one and tack it on.
 	 */
 	if (phd == NULL) {
 		phd = malloc(sizeof(struct inpcbport), M_PCB, M_NOWAIT);
 		if (phd == NULL) {
 			return (ENOBUFS); /* XXX */
 		}
 		phd->phd_port = inp->inp_lport;
 		LIST_INIT(&phd->phd_pcblist);
 		LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
 	}
 	inp->inp_phd = phd;
 	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
 	inp->inp_flags |= INP_INHASHLIST;
 #ifdef PCBGROUP
 	if (do_pcbgroup_update)
 		in_pcbgroup_update(inp);
 #endif
 	return (0);
 }
 
 /*
  * For now, there are two public interfaces to insert an inpcb into the hash
  * lists -- one that does update pcbgroups, and one that doesn't.  The latter
  * is used only in the TCP syncache, where in_pcbinshash is called before the
  * full 4-tuple is set for the inpcb, and we don't want to install in the
  * pcbgroup until later.
  *
  * XXXRW: This seems like a misfeature.  in_pcbinshash should always update
  * connection groups, and partially initialised inpcbs should not be exposed
  * to either reservation hash tables or pcbgroups.
  */
 int
 in_pcbinshash(struct inpcb *inp)
 {
 
 	return (in_pcbinshash_internal(inp, 1));
 }
 
 int
 in_pcbinshash_nopcbgroup(struct inpcb *inp)
 {
 
 	return (in_pcbinshash_internal(inp, 0));
 }
 
 /*
  * Move PCB to the proper hash bucket when { faddr, fport } have  been
  * changed. NOTE: This does not handle the case of the lport changing (the
  * hashed port list would have to be updated as well), so the lport must
  * not change after in_pcbinshash() has been called.
  */
 void
 in_pcbrehash_mbuf(struct inpcb *inp, struct mbuf *m)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbhead *head;
 	u_int32_t hashkey_faddr;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 
 	KASSERT(inp->inp_flags & INP_INHASHLIST,
 	    ("in_pcbrehash: !INP_INHASHLIST"));
 
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6)
 		hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr);
 	else
 #endif
 	hashkey_faddr = inp->inp_faddr.s_addr;
 
 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
 		inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
 
 	LIST_REMOVE(inp, inp_hash);
 	LIST_INSERT_HEAD(head, inp, inp_hash);
 
 #ifdef PCBGROUP
 	if (m != NULL)
 		in_pcbgroup_update_mbuf(inp, m);
 	else
 		in_pcbgroup_update(inp);
 #endif
 }
 
 void
 in_pcbrehash(struct inpcb *inp)
 {
 
 	in_pcbrehash_mbuf(inp, NULL);
 }
 
 /*
  * Remove PCB from various lists.
  */
 static void
 in_pcbremlists(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 
 	INP_INFO_WLOCK_ASSERT(pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	if (inp->inp_flags & INP_INHASHLIST) {
 		struct inpcbport *phd = inp->inp_phd;
 
 		INP_HASH_WLOCK(pcbinfo);
 		LIST_REMOVE(inp, inp_hash);
 		LIST_REMOVE(inp, inp_portlist);
 		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
 			LIST_REMOVE(phd, phd_hash);
 			free(phd, M_PCB);
 		}
 		INP_HASH_WUNLOCK(pcbinfo);
 		inp->inp_flags &= ~INP_INHASHLIST;
 	}
 	LIST_REMOVE(inp, inp_list);
 	pcbinfo->ipi_count--;
 #ifdef PCBGROUP
 	in_pcbgroup_remove(inp);
 #endif
 }
 
 /*
  * A set label operation has occurred at the socket layer, propagate the
  * label change into the in_pcb for the socket.
  */
 void
 in_pcbsosetlabel(struct socket *so)
 {
 #ifdef MAC
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL"));
 
 	INP_WLOCK(inp);
 	SOCK_LOCK(so);
 	mac_inpcb_sosetlabel(so, inp);
 	SOCK_UNLOCK(so);
 	INP_WUNLOCK(inp);
 #endif
 }
 
 /*
  * ipport_tick runs once per second, determining if random port allocation
  * should be continued.  If more than ipport_randomcps ports have been
  * allocated in the last second, then we return to sequential port
  * allocation. We return to random allocation only once we drop below
  * ipport_randomcps for at least ipport_randomtime seconds.
  */
 static void
 ipport_tick(void *xtp)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);	/* XXX appease INVARIANTS here */
 		if (V_ipport_tcpallocs <=
 		    V_ipport_tcplastcount + V_ipport_randomcps) {
 			if (V_ipport_stoprandom > 0)
 				V_ipport_stoprandom--;
 		} else
 			V_ipport_stoprandom = V_ipport_randomtime;
 		V_ipport_tcplastcount = V_ipport_tcpallocs;
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 	callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
 }
 
 static void
 ip_fini(void *xtp)
 {
 
 	callout_stop(&ipport_tick_callout);
 }
 
 /* 
  * The ipport_callout should start running at about the time we attach the
  * inet or inet6 domains.
  */
 static void
 ipport_tick_init(const void *unused __unused)
 {
 
 	/* Start ipport_tick. */
 	callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
 	callout_reset(&ipport_tick_callout, 1, ipport_tick, NULL);
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
 		SHUTDOWN_PRI_DEFAULT);
 }
 SYSINIT(ipport_tick_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, 
     ipport_tick_init, NULL);
 
 void
 inp_wlock(struct inpcb *inp)
 {
 
 	INP_WLOCK(inp);
 }
 
 void
 inp_wunlock(struct inpcb *inp)
 {
 
 	INP_WUNLOCK(inp);
 }
 
 void
 inp_rlock(struct inpcb *inp)
 {
 
 	INP_RLOCK(inp);
 }
 
 void
 inp_runlock(struct inpcb *inp)
 {
 
 	INP_RUNLOCK(inp);
 }
 
 #ifdef INVARIANTS
 void
 inp_lock_assert(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 }
 
 void
 inp_unlock_assert(struct inpcb *inp)
 {
 
 	INP_UNLOCK_ASSERT(inp);
 }
 #endif
 
 void
 inp_apply_all(void (*func)(struct inpcb *, void *), void *arg)
 {
 	struct inpcb *inp;
 
 	INP_INFO_RLOCK(&V_tcbinfo);
 	LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
 		INP_WLOCK(inp);
 		func(inp, arg);
 		INP_WUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 }
 
 struct socket *
 inp_inpcbtosocket(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 	return (inp->inp_socket);
 }
 
 struct tcpcb *
 inp_inpcbtotcpcb(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 	return ((struct tcpcb *)inp->inp_ppcb);
 }
 
 int
 inp_ip_tos_get(const struct inpcb *inp)
 {
 
 	return (inp->inp_ip_tos);
 }
 
 void
 inp_ip_tos_set(struct inpcb *inp, int val)
 {
 
 	inp->inp_ip_tos = val;
 }
 
 void
 inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
     uint32_t *faddr, uint16_t *fp)
 {
 
 	INP_LOCK_ASSERT(inp);
 	*laddr = inp->inp_laddr.s_addr;
 	*faddr = inp->inp_faddr.s_addr;
 	*lp = inp->inp_lport;
 	*fp = inp->inp_fport;
 }
 
 struct inpcb *
 so_sotoinpcb(struct socket *so)
 {
 
 	return (sotoinpcb(so));
 }
 
 struct tcpcb *
 so_sototcpcb(struct socket *so)
 {
 
 	return (sototcpcb(so));
 }
 
 #ifdef DDB
 static void
 db_print_indent(int indent)
 {
 	int i;
 
 	for (i = 0; i < indent; i++)
 		db_printf(" ");
 }
 
 static void
 db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent)
 {
 	char faddr_str[48], laddr_str[48];
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", name, inc);
 
 	indent += 2;
 
 #ifdef INET6
 	if (inc->inc_flags & INC_ISIPV6) {
 		/* IPv6. */
 		ip6_sprintf(laddr_str, &inc->inc6_laddr);
 		ip6_sprintf(faddr_str, &inc->inc6_faddr);
 	} else
 #endif
 	{
 		/* IPv4. */
 		inet_ntoa_r(inc->inc_laddr, laddr_str);
 		inet_ntoa_r(inc->inc_faddr, faddr_str);
 	}
 	db_print_indent(indent);
 	db_printf("inc_laddr %s   inc_lport %u\n", laddr_str,
 	    ntohs(inc->inc_lport));
 	db_print_indent(indent);
 	db_printf("inc_faddr %s   inc_fport %u\n", faddr_str,
 	    ntohs(inc->inc_fport));
 }
 
 static void
 db_print_inpflags(int inp_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (inp_flags & INP_RECVOPTS) {
 		db_printf("%sINP_RECVOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVRETOPTS) {
 		db_printf("%sINP_RECVRETOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVDSTADDR) {
 		db_printf("%sINP_RECVDSTADDR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_HDRINCL) {
 		db_printf("%sINP_HDRINCL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_HIGHPORT) {
 		db_printf("%sINP_HIGHPORT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_LOWPORT) {
 		db_printf("%sINP_LOWPORT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_ANONPORT) {
 		db_printf("%sINP_ANONPORT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVIF) {
 		db_printf("%sINP_RECVIF", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_MTUDISC) {
 		db_printf("%sINP_MTUDISC", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVTTL) {
 		db_printf("%sINP_RECVTTL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_DONTFRAG) {
 		db_printf("%sINP_DONTFRAG", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVTOS) {
 		db_printf("%sINP_RECVTOS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_IPV6_V6ONLY) {
 		db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_PKTINFO) {
 		db_printf("%sIN6P_PKTINFO", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_HOPLIMIT) {
 		db_printf("%sIN6P_HOPLIMIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_HOPOPTS) {
 		db_printf("%sIN6P_HOPOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_DSTOPTS) {
 		db_printf("%sIN6P_DSTOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_RTHDR) {
 		db_printf("%sIN6P_RTHDR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_RTHDRDSTOPTS) {
 		db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_TCLASS) {
 		db_printf("%sIN6P_TCLASS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_AUTOFLOWLABEL) {
 		db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_TIMEWAIT) {
 		db_printf("%sINP_TIMEWAIT", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_flags & INP_ONESBCAST) {
 		db_printf("%sINP_ONESBCAST", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_flags & INP_DROPPED) {
 		db_printf("%sINP_DROPPED", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_flags & INP_SOCKREF) {
 		db_printf("%sINP_SOCKREF", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_flags & IN6P_RFC2292) {
 		db_printf("%sIN6P_RFC2292", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_MTU) {
 		db_printf("IN6P_MTU%s", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_inpvflag(u_char inp_vflag)
 {
 	int comma;
 
 	comma = 0;
 	if (inp_vflag & INP_IPV4) {
 		db_printf("%sINP_IPV4", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_vflag & INP_IPV6) {
 		db_printf("%sINP_IPV6", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_vflag & INP_IPV6PROTO) {
 		db_printf("%sINP_IPV6PROTO", comma ? ", " : "");
 		comma  = 1;
 	}
 }
 
 static void
 db_print_inpcb(struct inpcb *inp, const char *name, int indent)
 {
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", name, inp);
 
 	indent += 2;
 
 	db_print_indent(indent);
 	db_printf("inp_flow: 0x%x\n", inp->inp_flow);
 
 	db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent);
 
 	db_print_indent(indent);
 	db_printf("inp_ppcb: %p   inp_pcbinfo: %p   inp_socket: %p\n",
 	    inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket);
 
 	db_print_indent(indent);
 	db_printf("inp_label: %p   inp_flags: 0x%x (",
 	   inp->inp_label, inp->inp_flags);
 	db_print_inpflags(inp->inp_flags);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("inp_sp: %p   inp_vflag: 0x%x (", inp->inp_sp,
 	    inp->inp_vflag);
 	db_print_inpvflag(inp->inp_vflag);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("inp_ip_ttl: %d   inp_ip_p: %d   inp_ip_minttl: %d\n",
 	    inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl);
 
 	db_print_indent(indent);
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6) {
 		db_printf("in6p_options: %p   in6p_outputopts: %p   "
 		    "in6p_moptions: %p\n", inp->in6p_options,
 		    inp->in6p_outputopts, inp->in6p_moptions);
 		db_printf("in6p_icmp6filt: %p   in6p_cksum %d   "
 		    "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum,
 		    inp->in6p_hops);
 	} else
 #endif
 	{
 		db_printf("inp_ip_tos: %d   inp_ip_options: %p   "
 		    "inp_ip_moptions: %p\n", inp->inp_ip_tos,
 		    inp->inp_options, inp->inp_moptions);
 	}
 
 	db_print_indent(indent);
 	db_printf("inp_phd: %p   inp_gencnt: %ju\n", inp->inp_phd,
 	    (uintmax_t)inp->inp_gencnt);
 }
 
 DB_SHOW_COMMAND(inpcb, db_show_inpcb)
 {
 	struct inpcb *inp;
 
 	if (!have_addr) {
 		db_printf("usage: show inpcb <addr>\n");
 		return;
 	}
 	inp = (struct inpcb *)addr;
 
 	db_print_inpcb(inp, "inpcb", 0);
 }
 #endif /* DDB */
Index: head/sys/netinet/in_pcbgroup.c
===================================================================
--- head/sys/netinet/in_pcbgroup.c	(revision 277330)
+++ head/sys/netinet/in_pcbgroup.c	(revision 277331)
@@ -1,560 +1,563 @@
 /*-
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * All rights reserved.
  *
  * This software was developed by Robert N. M. Watson under contract
  * to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/smp.h>
 #include <sys/socketvar.h>
 
+#include <net/rss_config.h>
+
 #include <netinet/in.h>
+
 #include <netinet/in_pcb.h>
 #include <netinet/in_rss.h>
 #ifdef INET6
 #include <netinet6/in6_pcb.h>
 #endif /* INET6 */
 
 /*
  * pcbgroups, or "connection groups" are based on Willman, Rixner, and Cox's
  * 2006 USENIX paper, "An Evaluation of Network Stack Parallelization
  * Strategies in Modern Operating Systems".  This implementation differs
  * significantly from that described in the paper, in that it attempts to
  * introduce not just notions of affinity for connections and distribute work
  * so as to reduce lock contention, but also align those notions with
  * hardware work distribution strategies such as RSS.  In this construction,
  * connection groups supplement, rather than replace, existing reservation
  * tables for protocol 4-tuples, offering CPU-affine lookup tables with
  * minimal cache line migration and lock contention during steady state
  * operation.
  *
  * Hardware-offloaded checksums are often inefficient in software -- for
  * example, Toeplitz, specified by RSS, introduced a significant overhead if
  * performed during per-packge processing.  It is therefore desirable to fall
  * back on traditional reservation table lookups without affinity where
  * hardware-offloaded checksums aren't available, such as for traffic over
  * non-RSS interfaces.
  *
  * Internet protocols, such as UDP and TCP, register to use connection groups
  * by providing an ipi_hashfields value other than IPI_HASHFIELDS_NONE; this
  * indicates to the connection group code whether a 2-tuple or 4-tuple is
  * used as an argument to hashes that assign a connection to a particular
  * group.  This must be aligned with any hardware offloaded distribution
  * model, such as RSS or similar approaches taken in embedded network boards.
  * Wildcard sockets require special handling, as in Willman 2006, and are
  * shared between connection groups -- while being protected by group-local
  * locks.  This means that connection establishment and teardown can be
  * signficantly more expensive than without connection groups, but that
  * steady-state processing can be significantly faster.
  *
  * When RSS is used, certain connection group parameters, such as the number
  * of groups, are provided by the RSS implementation, found in in_rss.c.
  * Otherwise, in_pcbgroup.c selects possible sensible parameters
  * corresponding to the degree of parallelism exposed by netisr.
  *
  * Most of the implementation of connection groups is in this file; however,
  * connection group lookup is implemented in in_pcb.c alongside reservation
  * table lookups -- see in_pcblookup_group().
  *
  * TODO:
  *
  * Implement dynamic rebalancing of buckets with connection groups; when
  * load is unevenly distributed, search for more optimal balancing on
  * demand.  This might require scaling up the number of connection groups
  * by <<1.
  *
  * Provide an IP 2-tuple or 4-tuple netisr m2cpu handler based on connection
  * groups for ip_input and ip6_input, allowing non-offloaded work
  * distribution.
  *
  * Expose effective CPU affinity of connections to userspace using socket
  * options.
  *
  * Investigate per-connection affinity overrides based on socket options; an
  * option could be set, certainly resulting in work being distributed
  * differently in software, and possibly propagated to supporting hardware
  * with TCAMs or hardware hash tables.  This might require connections to
  * exist in more than one connection group at a time.
  *
  * Hook netisr thread reconfiguration events, and propagate those to RSS so
  * that rebalancing can occur when the thread pool grows or shrinks.
  *
  * Expose per-pcbgroup statistics to userspace monitoring tools such as
  * netstat, in order to allow better debugging and profiling.
  */
 
 void
 in_pcbgroup_init(struct inpcbinfo *pcbinfo, u_int hashfields,
     int hash_nelements)
 {
 	struct inpcbgroup *pcbgroup;
 	u_int numpcbgroups, pgn;
 
 	/*
 	 * Only enable connection groups for a protocol if it has been
 	 * specifically requested.
 	 */
 	if (hashfields == IPI_HASHFIELDS_NONE)
 		return;
 
 	/*
 	 * Connection groups are about multi-processor load distribution,
 	 * lock contention, and connection CPU affinity.  As such, no point
 	 * in turning them on for a uniprocessor machine, it only wastes
 	 * memory.
 	 */
 	if (mp_ncpus == 1)
 		return;
 
 #ifdef RSS
 	/*
 	 * If we're using RSS, then RSS determines the number of connection
 	 * groups to use: one connection group per RSS bucket.  If for some
 	 * reason RSS isn't able to provide a number of buckets, disable
 	 * connection groups entirely.
 	 *
 	 * XXXRW: Can this ever happen?
 	 */
 	numpcbgroups = rss_getnumbuckets();
 	if (numpcbgroups == 0)
 		return;
 #else
 	/*
 	 * Otherwise, we'll just use one per CPU for now.  If we decide to
 	 * do dynamic rebalancing a la RSS, we'll need similar logic here.
 	 */
 	numpcbgroups = mp_ncpus;
 #endif
 
 	pcbinfo->ipi_hashfields = hashfields;
 	pcbinfo->ipi_pcbgroups = malloc(numpcbgroups *
 	    sizeof(*pcbinfo->ipi_pcbgroups), M_PCB, M_WAITOK | M_ZERO);
 	pcbinfo->ipi_npcbgroups = numpcbgroups;
 	pcbinfo->ipi_wildbase = hashinit(hash_nelements, M_PCB,
 	    &pcbinfo->ipi_wildmask);
 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
 		pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
 		pcbgroup->ipg_hashbase = hashinit(hash_nelements, M_PCB,
 		    &pcbgroup->ipg_hashmask);
 		INP_GROUP_LOCK_INIT(pcbgroup, "pcbgroup");
 
 		/*
 		 * Initialise notional affinity of the pcbgroup -- for RSS,
 		 * we want the same notion of affinity as NICs to be used.  In
 		 * the non-RSS case, just round robin for the time being.
 		 *
 		 * XXXRW: The notion of a bucket to CPU mapping is common at
 		 * both pcbgroup and RSS layers -- does that mean that we
 		 * should migrate it all from RSS to here, and just leave RSS
 		 * responsible only for providing hashing and mapping funtions?
 		 */
 #ifdef RSS
 		pcbgroup->ipg_cpu = rss_getcpu(pgn);
 #else
 		pcbgroup->ipg_cpu = (pgn % mp_ncpus);
 #endif
 	}
 }
 
 void
 in_pcbgroup_destroy(struct inpcbinfo *pcbinfo)
 {
 	struct inpcbgroup *pcbgroup;
 	u_int pgn;
 
 	if (pcbinfo->ipi_npcbgroups == 0)
 		return;
 
 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
 		pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
 		KASSERT(LIST_EMPTY(pcbinfo->ipi_listhead),
 		    ("in_pcbinfo_destroy: listhead not empty"));
 		INP_GROUP_LOCK_DESTROY(pcbgroup);
 		hashdestroy(pcbgroup->ipg_hashbase, M_PCB,
 		    pcbgroup->ipg_hashmask);
 	}
 	hashdestroy(pcbinfo->ipi_wildbase, M_PCB, pcbinfo->ipi_wildmask);
 	free(pcbinfo->ipi_pcbgroups, M_PCB);
 	pcbinfo->ipi_pcbgroups = NULL;
 	pcbinfo->ipi_npcbgroups = 0;
 	pcbinfo->ipi_hashfields = 0;
 }
 
 /*
  * Given a hash of whatever the covered tuple might be, return a pcbgroup
  * index.  Where RSS is supported, try to align bucket selection with RSS CPU
  * affinity strategy.
  */
 static __inline u_int
 in_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash)
 {
 
 #ifdef RSS
 	return (rss_getbucket(hash));
 #else
 	return (hash % pcbinfo->ipi_npcbgroups);
 #endif
 }
 
 /*
  * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash
  * information is insufficient to identify the pcbgroup.  This might occur if
  * a TCP packet turns up with a 2-tuple hash, or if an RSS hash is present but
  * RSS is not compiled into the kernel.
  */
 struct inpcbgroup *
 in_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash)
 {
 
 #ifdef RSS
 	if ((pcbinfo->ipi_hashfields == IPI_HASHFIELDS_4TUPLE &&
 	    hashtype == M_HASHTYPE_RSS_TCP_IPV4) ||
 	    (pcbinfo->ipi_hashfields == IPI_HASHFIELDS_4TUPLE &&
 	    hashtype == M_HASHTYPE_RSS_UDP_IPV4) ||
 	    (pcbinfo->ipi_hashfields == IPI_HASHFIELDS_2TUPLE &&
 	    hashtype == M_HASHTYPE_RSS_IPV4))
 		return (&pcbinfo->ipi_pcbgroups[
 		    in_pcbgroup_getbucket(pcbinfo, hash)]);
 #endif
 	return (NULL);
 }
 
 static struct inpcbgroup *
 in_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m)
 {
 
 	return (in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
 	    m->m_pkthdr.flowid));
 }
 
 struct inpcbgroup *
 in_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, struct in_addr laddr,
     u_short lport, struct in_addr faddr, u_short fport)
 {
 	uint32_t hash;
 
 	/*
 	 * RSS note: we pass foreign addr/port as source, and local addr/port
 	 * as destination, as we want to align with what the hardware is
 	 * doing.
 	 */
 	switch (pcbinfo->ipi_hashfields) {
 	case IPI_HASHFIELDS_4TUPLE:
 #ifdef RSS
 		hash = rss_hash_ip4_4tuple(faddr, fport, laddr, lport);
 #else
 		hash = faddr.s_addr ^ fport;
 #endif
 		break;
 
 	case IPI_HASHFIELDS_2TUPLE:
 #ifdef RSS
 		hash = rss_hash_ip4_2tuple(faddr, laddr);
 #else
 		hash = faddr.s_addr ^ laddr.s_addr;
 #endif
 		break;
 
 	default:
 		hash = 0;
 	}
 	return (&pcbinfo->ipi_pcbgroups[in_pcbgroup_getbucket(pcbinfo,
 	    hash)]);
 }
 
 struct inpcbgroup *
 in_pcbgroup_byinpcb(struct inpcb *inp)
 {
 #ifdef	RSS
 	/*
 	 * Listen sockets with INP_RSS_BUCKET_SET set have a pre-determined
 	 * RSS bucket and thus we should use this pcbgroup, rather than
 	 * using a tuple or hash.
 	 *
 	 * XXX should verify that there's actually pcbgroups and inp_rss_listen_bucket
 	 * fits in that!
 	 */
 	if (inp->inp_flags2 & INP_RSS_BUCKET_SET)
 		return (&inp->inp_pcbinfo->ipi_pcbgroups[inp->inp_rss_listen_bucket]);
 #endif
 
 	return (in_pcbgroup_bytuple(inp->inp_pcbinfo, inp->inp_laddr,
 	    inp->inp_lport, inp->inp_faddr, inp->inp_fport));
 }
 
 static void
 in_pcbwild_add(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo;
 	struct inpcbhead *head;
 	u_int pgn;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(!(inp->inp_flags2 & INP_PCBGROUPWILD),
 	    ("%s: is wild",__func__));
 
 	pcbinfo = inp->inp_pcbinfo;
 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
 		INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
 	head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, inp->inp_lport,
 	    0, pcbinfo->ipi_wildmask)];
 	LIST_INSERT_HEAD(head, inp, inp_pcbgroup_wild);
 	inp->inp_flags2 |= INP_PCBGROUPWILD;
 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
 		INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
 }
 
 static void
 in_pcbwild_remove(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo;
 	u_int pgn;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT((inp->inp_flags2 & INP_PCBGROUPWILD),
 	    ("%s: not wild", __func__));
 
 	pcbinfo = inp->inp_pcbinfo;
 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
 		INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
 	LIST_REMOVE(inp, inp_pcbgroup_wild);
 	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
 		INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
 	inp->inp_flags2 &= ~INP_PCBGROUPWILD;
 }
 
 static __inline int
 in_pcbwild_needed(struct inpcb *inp)
 {
 #ifdef	RSS
 	/*
 	 * If it's a listen socket and INP_RSS_BUCKET_SET is set,
 	 * it's a wildcard socket _but_ it's in a specific pcbgroup.
 	 * Thus we don't treat it as a pcbwild inp.
 	 */
 	if (inp->inp_flags2 & INP_RSS_BUCKET_SET)
 		return (0);
 #endif
 
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6)
 		return (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr));
 	else
 #endif
 		return (inp->inp_faddr.s_addr == htonl(INADDR_ANY));
 }
 
 static void
 in_pcbwild_update_internal(struct inpcb *inp)
 {
 	int wildcard_needed;
 
 	wildcard_needed = in_pcbwild_needed(inp);
 	if (wildcard_needed && !(inp->inp_flags2 & INP_PCBGROUPWILD))
 		in_pcbwild_add(inp);
 	else if (!wildcard_needed && (inp->inp_flags2 & INP_PCBGROUPWILD))
 		in_pcbwild_remove(inp);
 }
 
 /*
  * Update the pcbgroup of an inpcb, which might include removing an old
  * pcbgroup reference and/or adding a new one.  Wildcard processing is not
  * performed here, although ideally we'll never install a pcbgroup for a
  * wildcard inpcb (asserted below).
  */
 static void
 in_pcbgroup_update_internal(struct inpcbinfo *pcbinfo,
     struct inpcbgroup *newpcbgroup, struct inpcb *inp)
 {
 	struct inpcbgroup *oldpcbgroup;
 	struct inpcbhead *pcbhash;
 	uint32_t hashkey_faddr;
 
 	INP_WLOCK_ASSERT(inp);
 
 	oldpcbgroup = inp->inp_pcbgroup;
 	if (oldpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
 		INP_GROUP_LOCK(oldpcbgroup);
 		LIST_REMOVE(inp, inp_pcbgrouphash);
 		inp->inp_pcbgroup = NULL;
 		INP_GROUP_UNLOCK(oldpcbgroup);
 	}
 	if (newpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
 #ifdef INET6
 		if (inp->inp_vflag & INP_IPV6)
 			hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr);
 		else
 #endif
 			hashkey_faddr = inp->inp_faddr.s_addr;
 		INP_GROUP_LOCK(newpcbgroup);
 		/*
 		 * If the inp is an RSS bucket wildcard entry, ensure
 		 * that the PCB hash is calculated correctly.
 		 *
 		 * The wildcard hash calculation differs from the
 		 * non-wildcard definition.  The source address is
 		 * INADDR_ANY and the far port is 0.
 		 */
 		if (inp->inp_flags2 & INP_RSS_BUCKET_SET) {
 			pcbhash = &newpcbgroup->ipg_hashbase[
 			    INP_PCBHASH(INADDR_ANY, inp->inp_lport, 0,
 			    newpcbgroup->ipg_hashmask)];
 		} else {
 			pcbhash = &newpcbgroup->ipg_hashbase[
 			    INP_PCBHASH(hashkey_faddr, inp->inp_lport,
 			    inp->inp_fport,
 			    newpcbgroup->ipg_hashmask)];
 		}
 		LIST_INSERT_HEAD(pcbhash, inp, inp_pcbgrouphash);
 		inp->inp_pcbgroup = newpcbgroup;
 		INP_GROUP_UNLOCK(newpcbgroup);
 	}
 
 	KASSERT(!(newpcbgroup != NULL && in_pcbwild_needed(inp)),
 	    ("%s: pcbgroup and wildcard!", __func__));
 }
 
 /*
  * Two update paths: one in which the 4-tuple on an inpcb has been updated
  * and therefore connection groups may need to change (or a wildcard entry
  * may needed to be installed), and another in which the 4-tuple has been
  * set as a result of a packet received, in which case we may be able to use
  * the hash on the mbuf to avoid doing a software hash calculation for RSS.
  *
  * In each case: first, let the wildcard code have a go at placing it as a
  * wildcard socket.  If it was a wildcard, or if the connection has been
  * dropped, then no pcbgroup is required (so potentially clear it);
  * otherwise, calculate and update the pcbgroup for the inpcb.
  */
 void
 in_pcbgroup_update(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo;
 	struct inpcbgroup *newpcbgroup;
 
 	INP_WLOCK_ASSERT(inp);
 
 	pcbinfo = inp->inp_pcbinfo;
 	if (!in_pcbgroup_enabled(pcbinfo))
 		return;
 
 	in_pcbwild_update_internal(inp);
 	if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
 	    !(inp->inp_flags & INP_DROPPED)) {
 #ifdef INET6
 		if (inp->inp_vflag & INP_IPV6)
 			newpcbgroup = in6_pcbgroup_byinpcb(inp);
 		else
 #endif
 			newpcbgroup = in_pcbgroup_byinpcb(inp);
 	} else
 		newpcbgroup = NULL;
 	in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
 }
 
 void
 in_pcbgroup_update_mbuf(struct inpcb *inp, struct mbuf *m)
 {
 	struct inpcbinfo *pcbinfo;
 	struct inpcbgroup *newpcbgroup;
 
 	INP_WLOCK_ASSERT(inp);
 
 	pcbinfo = inp->inp_pcbinfo;
 	if (!in_pcbgroup_enabled(pcbinfo))
 		return;
 
 	/*
 	 * Possibly should assert !INP_PCBGROUPWILD rather than testing for
 	 * it; presumably this function should never be called for anything
 	 * other than non-wildcard socket?
 	 */
 	in_pcbwild_update_internal(inp);
 	if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
 	    !(inp->inp_flags & INP_DROPPED)) {
 		newpcbgroup = in_pcbgroup_bymbuf(pcbinfo, m);
 #ifdef INET6
 		if (inp->inp_vflag & INP_IPV6) {
 			if (newpcbgroup == NULL)
 				newpcbgroup = in6_pcbgroup_byinpcb(inp);
 		} else {
 #endif
 			if (newpcbgroup == NULL)
 				newpcbgroup = in_pcbgroup_byinpcb(inp);
 #ifdef INET6
 		}
 #endif
 	} else
 		newpcbgroup = NULL;
 	in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
 }
 
 /*
  * Remove pcbgroup entry and optional pcbgroup wildcard entry for this inpcb.
  */
 void
 in_pcbgroup_remove(struct inpcb *inp)
 {
 	struct inpcbgroup *pcbgroup;
 
 	INP_WLOCK_ASSERT(inp);
 
 	if (!in_pcbgroup_enabled(inp->inp_pcbinfo))
 		return;
 
 	if (inp->inp_flags2 & INP_PCBGROUPWILD)
 		in_pcbwild_remove(inp);
 
 	pcbgroup = inp->inp_pcbgroup;
 	if (pcbgroup != NULL) {
 		INP_GROUP_LOCK(pcbgroup);
 		LIST_REMOVE(inp, inp_pcbgrouphash);
 		inp->inp_pcbgroup = NULL;
 		INP_GROUP_UNLOCK(pcbgroup);
 	}
 }
 
 /*
  * Query whether or not it is appropriate to use pcbgroups to look up inpcbs
  * for a protocol.
  */
 int
 in_pcbgroup_enabled(struct inpcbinfo *pcbinfo)
 {
 
 	return (pcbinfo->ipi_npcbgroups > 0);
 }
Index: head/sys/netinet/in_rss.c
===================================================================
--- head/sys/netinet/in_rss.c	(revision 277330)
+++ head/sys/netinet/in_rss.c	(revision 277331)
@@ -1,899 +1,357 @@
 /*-
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * All rights reserved.
  *
  * This software was developed by Robert N. M. Watson under contract
  * to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 #include "opt_pcbgroup.h"
 
 #ifndef PCBGROUP
 #error "options RSS depends on options PCBGROUP"
 #endif
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/priv.h>
 #include <sys/kernel.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/sbuf.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
+#include <net/rss_config.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_rss.h>
 #include <netinet/in_var.h>
-#include <netinet/toeplitz.h>
 
 /* for software rss hash support */
 #include <netinet/ip.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
-/*-
- * Operating system parts of receiver-side scaling (RSS), which allows
- * network cards to direct flows to particular receive queues based on hashes
- * of header tuples.  This implementation aligns RSS buckets with connection
- * groups at the TCP/IP layer, so each bucket is associated with exactly one
- * group.  As a result, the group lookup structures (and lock) should have an
- * effective affinity with exactly one CPU.
- *
- * Network device drivers needing to configure RSS will query this framework
- * for parameters, such as the current RSS key, hashing policies, number of
- * bits, and indirection table mapping hashes to buckets and CPUs.  They may
- * provide their own supplementary information, such as queue<->CPU bindings.
- * It is the responsibility of the network device driver to inject packets
- * into the stack on as close to the right CPU as possible, if playing by RSS
- * rules.
- *
- * TODO:
- *
- * - Synchronization for rss_key and other future-configurable parameters.
- * - Event handler drivers can register to pick up RSS configuration changes.
- * - Should we allow rss_basecpu to be configured?
- * - Randomize key on boot.
- * - IPv6 support.
- * - Statistics on how often there's a misalignment between hardware
- *   placement and pcbgroup expectations.
- */
-
-SYSCTL_NODE(_net_inet, OID_AUTO, rss, CTLFLAG_RW, 0, "Receive-side steering");
-
 /*
- * Toeplitz is the only required hash function in the RSS spec, so use it by
- * default.
- */
-static u_int	rss_hashalgo = RSS_HASH_TOEPLITZ;
-SYSCTL_INT(_net_inet_rss, OID_AUTO, hashalgo, CTLFLAG_RDTUN, &rss_hashalgo, 0,
-    "RSS hash algorithm");
-
-/*
- * Size of the indirection table; at most 128 entries per the RSS spec.  We
- * size it to at least 2 times the number of CPUs by default to allow useful
- * rebalancing.  If not set explicitly with a loader tunable, we tune based
- * on the number of CPUs present.
- *
- * XXXRW: buckets might be better to use for the tunable than bits.
- */
-static u_int	rss_bits;
-SYSCTL_INT(_net_inet_rss, OID_AUTO, bits, CTLFLAG_RDTUN, &rss_bits, 0,
-    "RSS bits");
-
-static u_int	rss_mask;
-SYSCTL_INT(_net_inet_rss, OID_AUTO, mask, CTLFLAG_RD, &rss_mask, 0,
-    "RSS mask");
-
-static const u_int	rss_maxbits = RSS_MAXBITS;
-SYSCTL_INT(_net_inet_rss, OID_AUTO, maxbits, CTLFLAG_RD,
-    __DECONST(int *, &rss_maxbits), 0, "RSS maximum bits");
-
-/*
- * RSS's own count of the number of CPUs it could be using for processing.
- * Bounded to 64 by RSS constants.
- */
-static u_int	rss_ncpus;
-SYSCTL_INT(_net_inet_rss, OID_AUTO, ncpus, CTLFLAG_RD, &rss_ncpus, 0,
-    "Number of CPUs available to RSS");
-
-#define	RSS_MAXCPUS	(1 << (RSS_MAXBITS - 1))
-static const u_int	rss_maxcpus = RSS_MAXCPUS;
-SYSCTL_INT(_net_inet_rss, OID_AUTO, maxcpus, CTLFLAG_RD,
-    __DECONST(int *, &rss_maxcpus), 0, "RSS maximum CPUs that can be used");
-
-/*
- * Variable exists just for reporting rss_bits in a user-friendly way.
- */
-static u_int	rss_buckets;
-SYSCTL_INT(_net_inet_rss, OID_AUTO, buckets, CTLFLAG_RD, &rss_buckets, 0,
-    "RSS buckets");
-
-/*
- * Base CPU number; devices will add this to all CPU numbers returned by the
- * RSS indirection table.  Currently unmodifable in FreeBSD.
- */
-static const u_int	rss_basecpu;
-SYSCTL_INT(_net_inet_rss, OID_AUTO, basecpu, CTLFLAG_RD,
-    __DECONST(int *, &rss_basecpu), 0, "RSS base CPU");
-
-/*
- * RSS secret key, intended to prevent attacks on load-balancing.  Its
- * effectiveness may be limited by algorithm choice and available entropy
- * during the boot.
- *
- * XXXRW: And that we don't randomize it yet!
- *
- * This is the default Microsoft RSS specification key which is also
- * the Chelsio T5 firmware default key.
- */
-static uint8_t rss_key[RSS_KEYSIZE] = {
-	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
-	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
-	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
-	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
-	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
-};
-
-/*
- * RSS hash->CPU table, which maps hashed packet headers to particular CPUs.
- * Drivers may supplement this table with a seperate CPU<->queue table when
- * programming devices.
- */
-struct rss_table_entry {
-	uint8_t		rte_cpu;	/* CPU affinity of bucket. */
-};
-static struct rss_table_entry	rss_table[RSS_TABLE_MAXLEN];
-
-static inline u_int rss_gethashconfig_local(void);
-
-static void
-rss_init(__unused void *arg)
-{
-	u_int i;
-	u_int cpuid;
-
-	/*
-	 * Validate tunables, coerce to sensible values.
-	 */
-	switch (rss_hashalgo) {
-	case RSS_HASH_TOEPLITZ:
-	case RSS_HASH_NAIVE:
-		break;
-
-	default:
-		printf("%s: invalid RSS hashalgo %u, coercing to %u",
-		    __func__, rss_hashalgo, RSS_HASH_TOEPLITZ);
-		rss_hashalgo = RSS_HASH_TOEPLITZ;
-	}
-
-	/*
-	 * Count available CPUs.
-	 *
-	 * XXXRW: Note incorrect assumptions regarding contiguity of this set
-	 * elsewhere.
-	 */
-	rss_ncpus = 0;
-	for (i = 0; i <= mp_maxid; i++) {
-		if (CPU_ABSENT(i))
-			continue;
-		rss_ncpus++;
-	}
-	if (rss_ncpus > RSS_MAXCPUS)
-		rss_ncpus = RSS_MAXCPUS;
-
-	/*
-	 * Tune RSS table entries to be no less than 2x the number of CPUs
-	 * -- unless we're running uniprocessor, in which case there's not
-	 * much point in having buckets to rearrange for load-balancing!
-	 */
-	if (rss_ncpus > 1) {
-		if (rss_bits == 0)
-			rss_bits = fls(rss_ncpus - 1) + 1;
-
-		/*
-		 * Microsoft limits RSS table entries to 128, so apply that
-		 * limit to both auto-detected CPU counts and user-configured
-		 * ones.
-		 */
-		if (rss_bits == 0 || rss_bits > RSS_MAXBITS) {
-			printf("%s: RSS bits %u not valid, coercing to  %u",
-			    __func__, rss_bits, RSS_MAXBITS);
-			rss_bits = RSS_MAXBITS;
-		}
-
-		/*
-		 * Figure out how many buckets to use; warn if less than the
-		 * number of configured CPUs, although this is not a fatal
-		 * problem.
-		 */
-		rss_buckets = (1 << rss_bits);
-		if (rss_buckets < rss_ncpus)
-			printf("%s: WARNING: rss_buckets (%u) less than "
-			    "rss_ncpus (%u)\n", __func__, rss_buckets,
-			    rss_ncpus);
-		rss_mask = rss_buckets - 1;
-	} else {
-		rss_bits = 0;
-		rss_buckets = 1;
-		rss_mask = 0;
-	}
-
-	/*
-	 * Set up initial CPU assignments: round-robin by default.
-	 */
-	cpuid = CPU_FIRST();
-	for (i = 0; i < rss_buckets; i++) {
-		rss_table[i].rte_cpu = cpuid;
-		cpuid = CPU_NEXT(cpuid);
-	}
-
-	/*
-	 * Randomize rrs_key.
-	 *
-	 * XXXRW: Not yet.  If nothing else, will require an rss_isbadkey()
-	 * loop to check for "bad" RSS keys.
-	 */
-}
-SYSINIT(rss_init, SI_SUB_SOFTINTR, SI_ORDER_SECOND, rss_init, NULL);
-
-static uint32_t
-rss_naive_hash(u_int keylen, const uint8_t *key, u_int datalen,
-    const uint8_t *data)
-{
-	uint32_t v;
-	u_int i;
-
-	v = 0;
-	for (i = 0; i < keylen; i++)
-		v += key[i];
-	for (i = 0; i < datalen; i++)
-		v += data[i];
-	return (v);
-}
-
-static uint32_t
-rss_hash(u_int datalen, const uint8_t *data)
-{
- 
-	switch (rss_hashalgo) {
-	case RSS_HASH_TOEPLITZ:
-		return (toeplitz_hash(sizeof(rss_key), rss_key, datalen,
-		    data));
-
-	case RSS_HASH_NAIVE:
-		return (rss_naive_hash(sizeof(rss_key), rss_key, datalen,
-		    data));
-
-	default:
-		panic("%s: unsupported/unknown hashalgo %d", __func__,
-		    rss_hashalgo);
-	}
-}
-
-/*
  * Hash an IPv4 2-tuple.
  */
 uint32_t
 rss_hash_ip4_2tuple(struct in_addr src, struct in_addr dst)
 {
 	uint8_t data[sizeof(src) + sizeof(dst)];
 	u_int datalen;
 
 	datalen = 0;
 	bcopy(&src, &data[datalen], sizeof(src));
 	datalen += sizeof(src);
 	bcopy(&dst, &data[datalen], sizeof(dst));
 	datalen += sizeof(dst);
 	return (rss_hash(datalen, data));
 }
 
 /*
  * Hash an IPv4 4-tuple.
  */
 uint32_t
 rss_hash_ip4_4tuple(struct in_addr src, u_short srcport, struct in_addr dst,
     u_short dstport)
 {
 	uint8_t data[sizeof(src) + sizeof(dst) + sizeof(srcport) +
 	    sizeof(dstport)];
 	u_int datalen;
 
 	datalen = 0;
 	bcopy(&src, &data[datalen], sizeof(src));
 	datalen += sizeof(src);
 	bcopy(&dst, &data[datalen], sizeof(dst));
 	datalen += sizeof(dst);
 	bcopy(&srcport, &data[datalen], sizeof(srcport));
 	datalen += sizeof(srcport);
 	bcopy(&dstport, &data[datalen], sizeof(dstport));
 	datalen += sizeof(dstport);
 	return (rss_hash(datalen, data));
 }
 
-#ifdef INET6
 /*
- * Hash an IPv6 2-tuple.
- */
-uint32_t
-rss_hash_ip6_2tuple(const struct in6_addr *src, const struct in6_addr *dst)
-{
-	uint8_t data[sizeof(*src) + sizeof(*dst)];
-	u_int datalen;
-
-	datalen = 0;
-	bcopy(src, &data[datalen], sizeof(*src));
-	datalen += sizeof(*src);
-	bcopy(dst, &data[datalen], sizeof(*dst));
-	datalen += sizeof(*dst);
-	return (rss_hash(datalen, data));
-}
-
-/*
- * Hash an IPv6 4-tuple.
- */
-uint32_t
-rss_hash_ip6_4tuple(const struct in6_addr *src, u_short srcport,
-    const struct in6_addr *dst, u_short dstport)
-{
-	uint8_t data[sizeof(*src) + sizeof(*dst) + sizeof(srcport) +
-	    sizeof(dstport)];
-	u_int datalen;
-
-	datalen = 0;
-	bcopy(src, &data[datalen], sizeof(*src));
-	datalen += sizeof(*src);
-	bcopy(dst, &data[datalen], sizeof(*dst));
-	datalen += sizeof(*dst);
-	bcopy(&srcport, &data[datalen], sizeof(srcport));
-	datalen += sizeof(srcport);
-	bcopy(&dstport, &data[datalen], sizeof(dstport));
-	datalen += sizeof(dstport);
-	return (rss_hash(datalen, data));
-}
-#endif /* INET6 */
-
-/*
- * Query the number of RSS bits in use.
- */
-u_int
-rss_getbits(void)
-{
-
-	return (rss_bits);
-}
-
-/*
- * Query the RSS bucket associated with an RSS hash.
- */
-u_int
-rss_getbucket(u_int hash)
-{
-
-	return (hash & rss_mask);
-}
-
-/*
- * Query the RSS layer bucket associated with the given
- * entry in the RSS hash space.
- *
- * The RSS indirection table is 0 .. rss_buckets-1,
- * covering the low 'rss_bits' of the total 128 slot
- * RSS indirection table.  So just mask off rss_bits and
- * return that.
- *
- * NIC drivers can then iterate over the 128 slot RSS
- * indirection table and fetch which RSS bucket to
- * map it to.  This will typically be a CPU queue
- */
-u_int
-rss_get_indirection_to_bucket(u_int index)
-{
-
-	return (index & rss_mask);
-}
-
-/*
- * Query the RSS CPU associated with an RSS bucket.
- */
-u_int
-rss_getcpu(u_int bucket)
-{
-
-	return (rss_table[bucket].rte_cpu);
-}
-
-/*
- * netisr CPU affinity lookup given just the hash and hashtype.
- */
-u_int
-rss_hash2cpuid(uint32_t hash_val, uint32_t hash_type)
-{
-
-	switch (hash_type) {
-	case M_HASHTYPE_RSS_IPV4:
-	case M_HASHTYPE_RSS_TCP_IPV4:
-	case M_HASHTYPE_RSS_UDP_IPV4:
-	case M_HASHTYPE_RSS_IPV6:
-	case M_HASHTYPE_RSS_TCP_IPV6:
-	case M_HASHTYPE_RSS_UDP_IPV6:
-		return (rss_getcpu(rss_getbucket(hash_val)));
-	default:
-		return (NETISR_CPUID_NONE);
-	}
-}
-
-/*
- * Query the RSS bucket associated with the given hash value and
- * type.
- */
-int
-rss_hash2bucket(uint32_t hash_val, uint32_t hash_type, uint32_t *bucket_id)
-{
-
-	switch (hash_type) {
-	case M_HASHTYPE_RSS_IPV4:
-	case M_HASHTYPE_RSS_TCP_IPV4:
-	case M_HASHTYPE_RSS_UDP_IPV4:
-	case M_HASHTYPE_RSS_IPV6:
-	case M_HASHTYPE_RSS_TCP_IPV6:
-	case M_HASHTYPE_RSS_UDP_IPV6:
-		*bucket_id = rss_getbucket(hash_val);
-		return (0);
-	default:
-		return (-1);
-	}
-}
-
-/*
- * netisr CPU affinity lookup routine for use by protocols.
- */
-struct mbuf *
-rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid)
-{
-
-	M_ASSERTPKTHDR(m);
-	*cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m));
-	return (m);
-}
-
-int
-rss_m2bucket(struct mbuf *m, uint32_t *bucket_id)
-{
-
-	M_ASSERTPKTHDR(m);
-
-	return(rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
-	    bucket_id));
-}
-
-/*
  * Calculate an appropriate ipv4 2-tuple or 4-tuple given the given
  * IPv4 source/destination address, UDP or TCP source/destination ports
  * and the protocol type.
  *
  * The protocol code may wish to do a software hash of the given
  * tuple.  This depends upon the currently configured RSS hash types.
  *
  * This assumes that the packet in question isn't a fragment.
  *
  * It also assumes the packet source/destination address
  * are in "incoming" packet order (ie, source is "far" address.)
  */
 int
 rss_proto_software_hash_v4(struct in_addr s, struct in_addr d,
     u_short sp, u_short dp, int proto,
     uint32_t *hashval, uint32_t *hashtype)
 {
 	uint32_t hash;
 
 	/*
 	 * Next, choose the hash type depending upon the protocol
 	 * identifier.
 	 */
 	if ((proto == IPPROTO_TCP) &&
-	    (rss_gethashconfig_local() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
+	    (rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
 		hash = rss_hash_ip4_4tuple(s, sp, d, dp);
 		*hashval = hash;
 		*hashtype = M_HASHTYPE_RSS_TCP_IPV4;
 		return (0);
 	} else if ((proto == IPPROTO_UDP) &&
-	    (rss_gethashconfig_local() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
+	    (rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
 		hash = rss_hash_ip4_4tuple(s, sp, d, dp);
 		*hashval = hash;
 		*hashtype = M_HASHTYPE_RSS_UDP_IPV4;
 		return (0);
-	} else if (rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) {
+	} else if (rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV4) {
 		/* RSS doesn't hash on other protocols like SCTP; so 2-tuple */
 		hash = rss_hash_ip4_2tuple(s, d);
 		*hashval = hash;
 		*hashtype = M_HASHTYPE_RSS_IPV4;
 		return (0);
 	}
 
 	/* No configured available hashtypes! */
 	printf("%s: no available hashtypes!\n", __func__);
 	return (-1);
 }
 
 /*
  * Do a software calculation of the RSS for the given mbuf.
  *
  * This is typically used by the input path to recalculate the RSS after
  * some form of packet processing (eg de-capsulation, IP fragment reassembly.)
  *
  * dir is the packet direction - RSS_HASH_PKT_INGRESS for incoming and
  * RSS_HASH_PKT_EGRESS for outgoing.
  *
  * Returns 0 if a hash was done, -1 if no hash was done, +1 if
  * the mbuf already had a valid RSS flowid.
  *
  * This function doesn't modify the mbuf.  It's up to the caller to
  * assign flowid/flowtype as appropriate.
  */
 int
 rss_mbuf_software_hash_v4(const struct mbuf *m, int dir, uint32_t *hashval,
     uint32_t *hashtype)
 {
 	const struct ip *ip;
 	const struct tcphdr *th;
 	const struct udphdr *uh;
 	uint32_t flowid;
 	uint32_t flowtype;
 	uint8_t proto;
 	int iphlen;
 	int is_frag = 0;
 
 	/*
 	 * XXX For now this only handles hashing on incoming mbufs.
 	 */
 	if (dir != RSS_HASH_PKT_INGRESS) {
 		printf("%s: called on EGRESS packet!\n", __func__);
 		return (-1);
 	}
 
 	/*
 	 * First, validate that the mbuf we have is long enough
 	 * to have an IPv4 header in it.
 	 */
 	if (m->m_pkthdr.len < (sizeof(struct ip))) {
 		printf("%s: short mbuf pkthdr\n", __func__);
 		return (-1);
 	}
 	if (m->m_len < (sizeof(struct ip))) {
 		printf("%s: short mbuf len\n", __func__);
 		return (-1);
 	}
 
 	/* Ok, let's dereference that */
 	ip = mtod(m, struct ip *);
 	proto = ip->ip_p;
 	iphlen = ip->ip_hl << 2;
 
 	/*
 	 * If this is a fragment then it shouldn't be four-tuple
 	 * hashed just yet.  Once it's reassembled into a full
 	 * frame it should be re-hashed.
 	 */
 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK))
 		is_frag = 1;
 
 	/*
 	 * If the mbuf flowid/flowtype matches the packet type,
 	 * and we don't support the 4-tuple version of the given protocol,
 	 * then signal to the owner that it can trust the flowid/flowtype
 	 * details.
 	 *
 	 * This is a little picky - eg, if TCPv4 / UDPv4 hashing
 	 * is supported but we got a TCP/UDP frame only 2-tuple hashed,
 	 * then we shouldn't just "trust" the 2-tuple hash.  We need
 	 * a 4-tuple hash.
 	 */
 	flowid = m->m_pkthdr.flowid;
 	flowtype = M_HASHTYPE_GET(m);
 
 	if (flowtype != M_HASHTYPE_NONE) {
 		switch (proto) {
 		case IPPROTO_UDP:
-			if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_UDP_IPV4) &&
+			if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4) &&
 			    (flowtype == M_HASHTYPE_RSS_UDP_IPV4) &&
 			    (is_frag == 0)) {
 				return (1);
 			}
 			/*
 			 * Only allow 2-tuple for UDP frames if we don't also
 			 * support 4-tuple for UDP.
 			 */
-			if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) &&
-			    ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_UDP_IPV4) == 0) &&
+			if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV4) &&
+			    ((rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4) == 0) &&
 			    flowtype == M_HASHTYPE_RSS_IPV4) {
 				return (1);
 			}
 			break;
 		case IPPROTO_TCP:
-			if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_TCP_IPV4) &&
+			if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4) &&
 			    (flowtype == M_HASHTYPE_RSS_TCP_IPV4) &&
 			    (is_frag == 0)) {
 				return (1);
 			}
 			/*
 			 * Only allow 2-tuple for TCP frames if we don't also
 			 * support 2-tuple for TCP.
 			 */
-			if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) &&
-			    ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_TCP_IPV4) == 0) &&
+			if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV4) &&
+			    ((rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4) == 0) &&
 			    flowtype == M_HASHTYPE_RSS_IPV4) {
 				return (1);
 			}
 			break;
 		default:
-			if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) &&
+			if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV4) &&
 			    flowtype == M_HASHTYPE_RSS_IPV4) {
 				return (1);
 			}
 			break;
 		}
 	}
 
 	/*
 	 * Decode enough information to make a hash decision.
 	 *
 	 * XXX TODO: does the hardware hash on 4-tuple if IP
 	 *    options are present?
 	 */
-	if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_TCP_IPV4) &&
+	if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4) &&
 	    (proto == IPPROTO_TCP) &&
 	    (is_frag == 0)) {
 		if (m->m_len < iphlen + sizeof(struct tcphdr)) {
 			printf("%s: short TCP frame?\n", __func__);
 			return (-1);
 		}
 		th = (struct tcphdr *)((caddr_t)ip + iphlen);
 		return rss_proto_software_hash_v4(ip->ip_src, ip->ip_dst,
 		    th->th_sport,
 		    th->th_dport,
 		    proto,
 		    hashval,
 		    hashtype);
-	} else if ((rss_gethashconfig_local() & RSS_HASHTYPE_RSS_UDP_IPV4) &&
+	} else if ((rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4) &&
 	    (proto == IPPROTO_UDP) &&
 	    (is_frag == 0)) {
 		uh = (struct udphdr *)((caddr_t)ip + iphlen);
 		if (m->m_len < iphlen + sizeof(struct udphdr)) {
 			printf("%s: short UDP frame?\n", __func__);
 			return (-1);
 		}
 		return rss_proto_software_hash_v4(ip->ip_src, ip->ip_dst,
 		    uh->uh_sport,
 		    uh->uh_dport,
 		    proto,
 		    hashval,
 		    hashtype);
-	} else if (rss_gethashconfig_local() & RSS_HASHTYPE_RSS_IPV4) {
+	} else if (rss_gethashconfig() & RSS_HASHTYPE_RSS_IPV4) {
 		/* Default to 2-tuple hash */
 		return rss_proto_software_hash_v4(ip->ip_src, ip->ip_dst,
 		    0,	/* source port */
 		    0,	/* destination port */
 		    0,	/* IPPROTO_IP */
 		    hashval,
 		    hashtype);
 	} else {
 		printf("%s: no available hashtypes!\n", __func__);
 		return (-1);
 	}
 }
 
 /*
  * Similar to rss_m2cpuid, but designed to be used by the IP NETISR
  * on incoming frames.
  *
  * If an existing RSS hash exists and it matches what the configured
  * hashing is, then use it.
  *
  * If there's an existing RSS hash but the desired hash is different,
  * or if there's no useful RSS hash, then calculate it via
  * the software path.
  *
  * XXX TODO: definitely want statistics here!
  */
 struct mbuf *
 rss_soft_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid)
 {
 	uint32_t hash_val, hash_type;
 	int ret;
 
 	M_ASSERTPKTHDR(m);
 
 	ret = rss_mbuf_software_hash_v4(m, RSS_HASH_PKT_INGRESS,
 	    &hash_val, &hash_type);
 	if (ret > 0) {
 		/* mbuf has a valid hash already; don't need to modify it */
 		*cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m));
 	} else if (ret == 0) {
 		/* hash was done; update */
 		m->m_pkthdr.flowid = hash_val;
 		M_HASHTYPE_SET(m, hash_type);
 		*cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m));
 	} else { /* ret < 0 */
 		/* no hash was done */
 		*cpuid = NETISR_CPUID_NONE;
 	}
 	return (m);
 }
-
-/*
- * Query the RSS hash algorithm.
- */
-u_int
-rss_gethashalgo(void)
-{
-
-	return (rss_hashalgo);
-}
-
-/*
- * Query the current RSS key; likely to be used by device drivers when
- * configuring hardware RSS.  Caller must pass an array of size RSS_KEYSIZE.
- *
- * XXXRW: Perhaps we should do the accept-a-length-and-truncate thing?
- */
-void
-rss_getkey(uint8_t *key)
-{
-
-	bcopy(rss_key, key, sizeof(rss_key));
-}
-
-/*
- * Query the number of buckets; this may be used by both network device
- * drivers, which will need to populate hardware shadows of the software
- * indirection table, and the network stack itself (such as when deciding how
- * many connection groups to allocate).
- */
-u_int
-rss_getnumbuckets(void)
-{
-
-	return (rss_buckets);
-}
-
-/*
- * Query the number of CPUs in use by RSS; may be useful to device drivers
- * trying to figure out how to map a larger number of CPUs into a smaller
- * number of receive queues.
- */
-u_int
-rss_getnumcpus(void)
-{
-
-	return (rss_ncpus);
-}
-
-static inline u_int
-rss_gethashconfig_local(void)
-{
-
-	/* Return 4-tuple for TCP; 2-tuple for others */
-	/*
-	 * UDP may fragment more often than TCP and thus we'll end up with
-	 * NICs returning 2-tuple fragments.
-	 * udp_init() and udplite_init() both currently initialise things
-	 * as 2-tuple.
-	 * So for now disable UDP 4-tuple hashing until all of the other
-	 * pieces are in place.
-	 */
-	return (
-	    RSS_HASHTYPE_RSS_IPV4
-	|    RSS_HASHTYPE_RSS_TCP_IPV4
-	|    RSS_HASHTYPE_RSS_IPV6
-	|    RSS_HASHTYPE_RSS_TCP_IPV6
-	|    RSS_HASHTYPE_RSS_IPV6_EX
-	|    RSS_HASHTYPE_RSS_TCP_IPV6_EX
-#if 0
-	|    RSS_HASHTYPE_RSS_UDP_IPV4
-	|    RSS_HASHTYPE_RSS_UDP_IPV4_EX
-	|    RSS_HASHTYPE_RSS_UDP_IPV6
-	|    RSS_HASHTYPE_RSS_UDP_IPV6_EX
-#endif
-	);
-}
-
-/*
- * Return the supported RSS hash configuration.
- *
- * NICs should query this to determine what to configure in their redirection
- * matching table.
- */
-u_int
-rss_gethashconfig(void)
-{
-
-	return (rss_gethashconfig_local());
-}
-
-/*
- * XXXRW: Confirm that sysctl -a won't dump this keying material, don't want
- * it appearing in debugging output unnecessarily.
- */
-static int
-sysctl_rss_key(SYSCTL_HANDLER_ARGS)
-{
-	uint8_t temp_rss_key[RSS_KEYSIZE];
-	int error;
-
-	error = priv_check(req->td, PRIV_NETINET_HASHKEY);
-	if (error)
-		return (error);
-
-	bcopy(rss_key, temp_rss_key, sizeof(temp_rss_key));
-	error = sysctl_handle_opaque(oidp, temp_rss_key,
-	    sizeof(temp_rss_key), req);
-	if (error)
-		return (error);
-	if (req->newptr != NULL) {
-		/* XXXRW: Not yet. */
-		return (EINVAL);
-	}
-	return (0);
-}
-SYSCTL_PROC(_net_inet_rss, OID_AUTO, key,
-    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_rss_key,
-    "", "RSS keying material");
-
-static int
-sysctl_rss_bucket_mapping(SYSCTL_HANDLER_ARGS)
-{
-	struct sbuf *sb;
-	int error;
-	int i;
-
-	error = 0;
-	error = sysctl_wire_old_buffer(req, 0);
-	if (error != 0)
-		return (error);
-	sb = sbuf_new_for_sysctl(NULL, NULL, 512, req);
-	if (sb == NULL)
-		return (ENOMEM);
-	for (i = 0; i < rss_buckets; i++) {
-		sbuf_printf(sb, "%s%d:%d", i == 0 ? "" : " ",
-		    i,
-		    rss_getcpu(i));
-	}
-	error = sbuf_finish(sb);
-	sbuf_delete(sb);
-
-	return (error);
-}
-SYSCTL_PROC(_net_inet_rss, OID_AUTO, bucket_mapping,
-    CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
-    sysctl_rss_bucket_mapping, "", "RSS bucket -> CPU mapping");
Index: head/sys/netinet/in_rss.h
===================================================================
--- head/sys/netinet/in_rss.h	(revision 277330)
+++ head/sys/netinet/in_rss.h	(revision 277331)
@@ -1,142 +1,57 @@
 /*-
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * All rights reserved.
  *
  * This software was developed by Robert N. M. Watson under contract
  * to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _NETINET_IN_RSS_H_
 #define	_NETINET_IN_RSS_H_
 
 #include <netinet/in.h>		/* in_addr_t */
 
 /*
- * Supported RSS hash functions.
- */
-#define	RSS_HASH_NAIVE		0x00000001	/* Poor but fast hash. */
-#define	RSS_HASH_TOEPLITZ	0x00000002	/* Required by RSS. */
-#define	RSS_HASH_CRC32		0x00000004	/* Future; some NICs do it. */
-
-#define	RSS_HASH_MASK		(RSS_HASH_NAIVE | RSS_HASH_TOEPLITZ)
-
-/*
- * Instances of struct inpcbinfo declare an RSS hash type indicating what
- * header fields are covered.
- */
-#define	RSS_HASHFIELDS_NONE		0
-#define	RSS_HASHFIELDS_4TUPLE		1
-#define	RSS_HASHFIELDS_2TUPLE		2
-
-/*
- * Define RSS representations of the M_HASHTYPE_* values, representing
- * which particular bits are supported.  The NICs can then use this to
- * calculate which hash types to enable and which not to enable.
- *
- * The fact that these line up with M_HASHTYPE_* is not to be relied
- * upon.
- */
-#define	RSS_HASHTYPE_RSS_IPV4		(1 << 1)	/* IPv4 2-tuple */
-#define	RSS_HASHTYPE_RSS_TCP_IPV4	(1 << 2)	/* TCPv4 4-tuple */
-#define	RSS_HASHTYPE_RSS_IPV6		(1 << 3)	/* IPv6 2-tuple */
-#define	RSS_HASHTYPE_RSS_TCP_IPV6	(1 << 4)	/* TCPv6 4-tuple */
-#define	RSS_HASHTYPE_RSS_IPV6_EX	(1 << 5)	/* IPv6 2-tuple + ext hdrs */
-#define	RSS_HASHTYPE_RSS_TCP_IPV6_EX	(1 << 6)	/* TCPv6 4-tiple + ext hdrs */
-#define	RSS_HASHTYPE_RSS_UDP_IPV4	(1 << 7)	/* IPv4 UDP 4-tuple */
-#define	RSS_HASHTYPE_RSS_UDP_IPV4_EX	(1 << 8)	/* IPv4 UDP 4-tuple + ext hdrs */
-#define	RSS_HASHTYPE_RSS_UDP_IPV6	(1 << 9)	/* IPv6 UDP 4-tuple */
-#define	RSS_HASHTYPE_RSS_UDP_IPV6_EX	(1 << 10)	/* IPv6 UDP 4-tuple + ext hdrs */
-
-/*
- * Compile-time limits on the size of the indirection table.
- */
-#define	RSS_MAXBITS	7
-#define	RSS_TABLE_MAXLEN	(1 << RSS_MAXBITS)
-
-/*
- * Maximum key size used throughout.  It's OK for hardware to use only the
- * first 16 bytes, which is all that's required for IPv4.
- */
-#define	RSS_KEYSIZE	40
-
-/*
- * For RSS hash methods that do a software hash on an mbuf, the packet
- * direction (ingress / egress) is required.
- *
- * The default direction (INGRESS) is the "receive into the NIC" - ie,
- * what the hardware is hashing on.
- */
-#define	RSS_HASH_PKT_INGRESS	0
-#define	RSS_HASH_PKT_EGRESS	1
-
-/*
- * Device driver interfaces to query RSS properties that must be programmed
- * into hardware.
- */
-u_int	rss_getbits(void);
-u_int	rss_getbucket(u_int hash);
-u_int	rss_get_indirection_to_bucket(u_int index);
-u_int	rss_getcpu(u_int bucket);
-void	rss_getkey(uint8_t *key);
-u_int	rss_gethashalgo(void);
-u_int	rss_getnumbuckets(void);
-u_int	rss_getnumcpus(void);
-u_int	rss_gethashconfig(void);
-
-/*
  * Network stack interface to generate a hash for a protocol tuple.
  */
 uint32_t	rss_hash_ip4_4tuple(struct in_addr src, u_short srcport,
 		    struct in_addr dst, u_short dstport);
 uint32_t	rss_hash_ip4_2tuple(struct in_addr src, struct in_addr dst);
-uint32_t	rss_hash_ip6_4tuple(const struct in6_addr *src, u_short srcport,
-		    const struct in6_addr *dst, u_short dstport);
-uint32_t	rss_hash_ip6_2tuple(const struct in6_addr *src,
-		    const struct in6_addr *dst);
-
-/*
- * Network stack interface to query desired CPU affinity of a packet.
- */
-struct mbuf	*rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid);
-u_int		rss_hash2cpuid(uint32_t hash_val, uint32_t hash_type);
-int		rss_hash2bucket(uint32_t hash_val, uint32_t hash_type,
-		uint32_t *bucket_id);
-int		rss_m2bucket(struct mbuf *m, uint32_t *bucket_id);
 
 /*
  * Functions to calculate a software RSS hash for a given mbuf or
  * packet detail.
  */
 int		rss_mbuf_software_hash_v4(const struct mbuf *m, int dir,
 		    uint32_t *hashval, uint32_t *hashtype);
 int		rss_proto_software_hash_v4(struct in_addr src,
 		    struct in_addr dst, u_short src_port, u_short dst_port,
 		    int proto, uint32_t *hashval,
 		    uint32_t *hashtype);
 struct mbuf *	rss_soft_m2cpuid(struct mbuf *m, uintptr_t source,
 		    u_int *cpuid);
 
 #endif /* !_NETINET_IN_RSS_H_ */
Index: head/sys/netinet/ip_input.c
===================================================================
--- head/sys/netinet/ip_input.c	(revision 277330)
+++ head/sys/netinet/ip_input.c	(revision 277331)
@@ -1,1874 +1,1875 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bootp.h"
 #include "opt_ipfw.h"
 #include "opt_ipstealth.h"
 #include "opt_ipsec.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/sdt.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 
 #include <net/pfil.h>
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/netisr.h>
+#include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_options.h>
 #include <machine/in_cksum.h>
 #include <netinet/ip_carp.h>
 #ifdef IPSEC
 #include <netinet/ip_ipsec.h>
 #endif /* IPSEC */
 #include <netinet/in_rss.h>
 
 #include <sys/socketvar.h>
 
 #include <security/mac/mac_framework.h>
 
 #ifdef CTASSERT
 CTASSERT(sizeof(struct ip) == 20);
 #endif
 
 struct	rwlock in_ifaddr_lock;
 RW_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock");
 
 VNET_DEFINE(int, rsvp_on);
 
 VNET_DEFINE(int, ipforwarding);
 SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ipforwarding), 0,
     "Enable IP forwarding between interfaces");
 
 static VNET_DEFINE(int, ipsendredirects) = 1;	/* XXX */
 #define	V_ipsendredirects	VNET(ipsendredirects)
 SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ipsendredirects), 0,
     "Enable sending IP redirects");
 
 VNET_DEFINE(int, ip_do_randomid);
 SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_do_randomid), 0,
     "Assign random ip_id values");
 
 /*
  * XXX - Setting ip_checkinterface mostly implements the receive side of
  * the Strong ES model described in RFC 1122, but since the routing table
  * and transmit implementation do not implement the Strong ES model,
  * setting this to 1 results in an odd hybrid.
  *
  * XXX - ip_checkinterface currently must be disabled if you use ipnat
  * to translate the destination address to another local interface.
  *
  * XXX - ip_checkinterface must be disabled if you add IP aliases
  * to the loopback interface instead of the interface where the
  * packets for those addresses are received.
  */
 static VNET_DEFINE(int, ip_checkinterface);
 #define	V_ip_checkinterface	VNET(ip_checkinterface)
 SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_checkinterface), 0,
     "Verify packet arrives on correct interface");
 
 VNET_DEFINE(struct pfil_head, inet_pfil_hook);	/* Packet filter hooks */
 
 static struct netisr_handler ip_nh = {
 	.nh_name = "ip",
 	.nh_handler = ip_input,
 	.nh_proto = NETISR_IP,
 #ifdef	RSS
 	.nh_m2cpuid = rss_soft_m2cpuid,
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
 #else
 	.nh_policy = NETISR_POLICY_FLOW,
 #endif
 };
 
 #ifdef	RSS
 /*
  * Directly dispatched frames are currently assumed
  * to have a flowid already calculated.
  *
  * It should likely have something that assert it
  * actually has valid flow details.
  */
 static struct netisr_handler ip_direct_nh = {
 	.nh_name = "ip_direct",
 	.nh_handler = ip_direct_input,
 	.nh_proto = NETISR_IP_DIRECT,
 	.nh_m2cpuid = rss_m2cpuid,
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
 };
 #endif
 
 extern	struct domain inetdomain;
 extern	struct protosw inetsw[];
 u_char	ip_protox[IPPROTO_MAX];
 VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
 VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
 VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
 
 static VNET_DEFINE(uma_zone_t, ipq_zone);
 static VNET_DEFINE(TAILQ_HEAD(ipqhead, ipq), ipq[IPREASS_NHASH]);
 static struct mtx ipqlock;
 
 #define	V_ipq_zone		VNET(ipq_zone)
 #define	V_ipq			VNET(ipq)
 
 #define	IPQ_LOCK()	mtx_lock(&ipqlock)
 #define	IPQ_UNLOCK()	mtx_unlock(&ipqlock)
 #define	IPQ_LOCK_INIT()	mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF)
 #define	IPQ_LOCK_ASSERT()	mtx_assert(&ipqlock, MA_OWNED)
 
 static void	maxnipq_update(void);
 static void	ipq_zone_change(void *);
 static void	ip_drain_locked(void);
 
 static VNET_DEFINE(int, maxnipq);  /* Administrative limit on # reass queues. */
 static VNET_DEFINE(int, nipq);			/* Total # of reass queues */
 #define	V_maxnipq		VNET(maxnipq)
 #define	V_nipq			VNET(nipq)
 SYSCTL_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_VNET | CTLFLAG_RD,
     &VNET_NAME(nipq), 0,
     "Current number of IPv4 fragment reassembly queue entries");
 
 static VNET_DEFINE(int, maxfragsperpacket);
 #define	V_maxfragsperpacket	VNET(maxfragsperpacket)
 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(maxfragsperpacket), 0,
     "Maximum number of IPv4 fragments allowed per packet");
 
 #ifdef IPCTL_DEFMTU
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
     &ip_mtu, 0, "Default MTU");
 #endif
 
 #ifdef IPSTEALTH
 VNET_DEFINE(int, ipstealth);
 SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ipstealth), 0,
     "IP stealth mode, no TTL decrementation on forwarding");
 #endif
 
 static void	ip_freef(struct ipqhead *, struct ipq *);
 
 /*
  * IP statistics are stored in the "array" of counter(9)s.
  */
 VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
 VNET_PCPUSTAT_SYSINIT(ipstat);
 SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
     "IP statistics (struct ipstat, netinet/ip_var.h)");
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(ipstat);
 #endif /* VIMAGE */
 
 /*
  * Kernel module interface for updating ipstat.  The argument is an index
  * into ipstat treated as an array.
  */
 void
 kmod_ipstat_inc(int statnum)
 {
 
 	counter_u64_add(VNET(ipstat)[statnum], 1);
 }
 
 void
 kmod_ipstat_dec(int statnum)
 {
 
 	counter_u64_add(VNET(ipstat)[statnum], -1);
 }
 
 static int
 sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
 	netisr_getqlimit(&ip_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qlimit < 1)
 		return (EINVAL);
 	return (netisr_setqlimit(&ip_nh, qlimit));
 }
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I",
     "Maximum size of the IP input queue");
 
 static int
 sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
 {
 	u_int64_t qdrops_long;
 	int error, qdrops;
 
 	netisr_getqdrops(&ip_nh, &qdrops_long);
 	qdrops = qdrops_long;
 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qdrops != 0)
 		return (EINVAL);
 	netisr_clearqdrops(&ip_nh);
 	return (0);
 }
 
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I",
     "Number of packets dropped from the IP input queue");
 
 #ifdef	RSS
 static int
 sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
 {
 	int error, qlimit;
 
 	netisr_getqlimit(&ip_direct_nh, &qlimit);
 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qlimit < 1)
 		return (EINVAL);
 	return (netisr_setqlimit(&ip_direct_nh, qlimit));
 }
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen,
     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I",
     "Maximum size of the IP direct input queue");
 
 static int
 sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
 {
 	u_int64_t qdrops_long;
 	int error, qdrops;
 
 	netisr_getqdrops(&ip_direct_nh, &qdrops_long);
 	qdrops = qdrops_long;
 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (qdrops != 0)
 		return (EINVAL);
 	netisr_clearqdrops(&ip_direct_nh);
 	return (0);
 }
 
 SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops,
     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I",
     "Number of packets dropped from the IP direct input queue");
 #endif	/* RSS */
 
 /*
  * IP initialization: fill in IP protocol switch table.
  * All protocols not implemented in kernel go to raw IP protocol handler.
  */
 void
 ip_init(void)
 {
 	struct protosw *pr;
 	int i;
 
 	V_ip_id = time_second & 0xffff;
 
 	TAILQ_INIT(&V_in_ifaddrhead);
 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
 
 	/* Initialize IP reassembly queue. */
 	for (i = 0; i < IPREASS_NHASH; i++)
 		TAILQ_INIT(&V_ipq[i]);
 	V_maxnipq = nmbclusters / 32;
 	V_maxfragsperpacket = 16;
 	V_ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
 	    NULL, UMA_ALIGN_PTR, 0);
 	maxnipq_update();
 
 	/* Initialize packet filter hooks. */
 	V_inet_pfil_hook.ph_type = PFIL_TYPE_AF;
 	V_inet_pfil_hook.ph_af = AF_INET;
 	if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to register pfil hook, "
 			"error %d\n", __func__, i);
 
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		panic("ip_init: PF_INET not found");
 
 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
 	for (i = 0; i < IPPROTO_MAX; i++)
 		ip_protox[i] = pr - inetsw;
 	/*
 	 * Cycle through IP protocols and put them into the appropriate place
 	 * in ip_protox[].
 	 */
 	for (pr = inetdomain.dom_protosw;
 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
 			/* Be careful to only index valid IP protocols. */
 			if (pr->pr_protocol < IPPROTO_MAX)
 				ip_protox[pr->pr_protocol] = pr - inetsw;
 		}
 
 	EVENTHANDLER_REGISTER(nmbclusters_change, ipq_zone_change,
 		NULL, EVENTHANDLER_PRI_ANY);
 
 	/* Initialize various other remaining things. */
 	IPQ_LOCK_INIT();
 	netisr_register(&ip_nh);
 #ifdef	RSS
 	netisr_register(&ip_direct_nh);
 #endif
 }
 
 #ifdef VIMAGE
 void
 ip_destroy(void)
 {
 	int i;
 
 	if ((i = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to unregister pfil hook, "
 		    "error %d\n", __func__, i);
 
 	/* Cleanup in_ifaddr hash table; should be empty. */
 	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
 
 	IPQ_LOCK();
 	ip_drain_locked();
 	IPQ_UNLOCK();
 
 	uma_zdestroy(V_ipq_zone);
 }
 #endif
 
 #ifdef	RSS
 /*
  * IP direct input routine.
  *
  * This is called when reinjecting completed fragments where
  * all of the previous checking and book-keeping has been done.
  */
 void
 ip_direct_input(struct mbuf *m)
 {
 	struct ip *ip;
 	int hlen;
 
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 
 	IPSTAT_INC(ips_delivered);
 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
 	return;
 }
 #endif
 
 /*
  * Ip input routine.  Checksum and byte swap header.  If fragmented
  * try to reassemble.  Process options.  Pass to next level.
  */
 void
 ip_input(struct mbuf *m)
 {
 	struct ip *ip = NULL;
 	struct in_ifaddr *ia = NULL;
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 	int    checkif, hlen = 0;
 	uint16_t sum, ip_len;
 	int dchg = 0;				/* dest changed after fw */
 	struct in_addr odst;			/* original dst address */
 
 	M_ASSERTPKTHDR(m);
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		m->m_flags &= ~M_FASTFWD_OURS;
 		/* Set up some basics that will be used later. */
 		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2;
 		ip_len = ntohs(ip->ip_len);
 		goto ours;
 	}
 
 	IPSTAT_INC(ips_total);
 
 	if (m->m_pkthdr.len < sizeof(struct ip))
 		goto tooshort;
 
 	if (m->m_len < sizeof (struct ip) &&
 	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
 		IPSTAT_INC(ips_toosmall);
 		return;
 	}
 	ip = mtod(m, struct ip *);
 
 	if (ip->ip_v != IPVERSION) {
 		IPSTAT_INC(ips_badvers);
 		goto bad;
 	}
 
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
 		IPSTAT_INC(ips_badhlen);
 		goto bad;
 	}
 	if (hlen > m->m_len) {
 		if ((m = m_pullup(m, hlen)) == NULL) {
 			IPSTAT_INC(ips_badhlen);
 			return;
 		}
 		ip = mtod(m, struct ip *);
 	}
 
 	IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
 
 	/* 127/8 must not appear on wire - RFC1122 */
 	ifp = m->m_pkthdr.rcvif;
 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
 			IPSTAT_INC(ips_badaddr);
 			goto bad;
 		}
 	}
 
 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
 	} else {
 		if (hlen == sizeof(struct ip)) {
 			sum = in_cksum_hdr(ip);
 		} else {
 			sum = in_cksum(m, hlen);
 		}
 	}
 	if (sum) {
 		IPSTAT_INC(ips_badsum);
 		goto bad;
 	}
 
 #ifdef ALTQ
 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
 		/* packet is dropped by traffic conditioner */
 		return;
 #endif
 
 	ip_len = ntohs(ip->ip_len);
 	if (ip_len < hlen) {
 		IPSTAT_INC(ips_badlen);
 		goto bad;
 	}
 
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IP header would have us expect.
 	 * Trim mbufs if longer than we expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (m->m_pkthdr.len < ip_len) {
 tooshort:
 		IPSTAT_INC(ips_tooshort);
 		goto bad;
 	}
 	if (m->m_pkthdr.len > ip_len) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = ip_len;
 			m->m_pkthdr.len = ip_len;
 		} else
 			m_adj(m, ip_len - m->m_pkthdr.len);
 	}
 
 #ifdef IPSEC
 	/*
 	 * Bypass packet filtering for packets previously handled by IPsec.
 	 */
 	if (ip_ipsec_filtertunnel(m))
 		goto passin;
 #endif /* IPSEC */
 
 	/*
 	 * Run through list of hooks for input packets.
 	 *
 	 * NB: Beware of the destination address changing (e.g.
 	 *     by NAT rewriting).  When this happens, tell
 	 *     ip_forward to do the right thing.
 	 */
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&V_inet_pfil_hook))
 		goto passin;
 
 	odst = ip->ip_dst;
 	if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0)
 		return;
 	if (m == NULL)			/* consumed by filter */
 		return;
 
 	ip = mtod(m, struct ip *);
 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
 	ifp = m->m_pkthdr.rcvif;
 
 	if (m->m_flags & M_FASTFWD_OURS) {
 		m->m_flags &= ~M_FASTFWD_OURS;
 		goto ours;
 	}
 	if (m->m_flags & M_IP_NEXTHOP) {
 		dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL);
 		if (dchg != 0) {
 			/*
 			 * Directly ship the packet on.  This allows
 			 * forwarding packets originally destined to us
 			 * to some other directly connected host.
 			 */
 			ip_forward(m, 1);
 			return;
 		}
 	}
 passin:
 
 	/*
 	 * Process options and, if not destined for us,
 	 * ship it on.  ip_dooptions returns 1 when an
 	 * error was detected (causing an icmp message
 	 * to be sent and the original packet to be freed).
 	 */
 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
 		return;
 
         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
          * matter if it is destined to another node, or whether it is 
          * a multicast one, RSVP wants it! and prevents it from being forwarded
          * anywhere else. Also checks if the rsvp daemon is running before
 	 * grabbing the packet.
          */
 	if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 
 		goto ours;
 
 	/*
 	 * Check our list of addresses, to see if the packet is for us.
 	 * If we don't have any addresses, assume any unicast packet
 	 * we receive might be for us (and let the upper layers deal
 	 * with it).
 	 */
 	if (TAILQ_EMPTY(&V_in_ifaddrhead) &&
 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
 		goto ours;
 
 	/*
 	 * Enable a consistency check between the destination address
 	 * and the arrival interface for a unicast packet (the RFC 1122
 	 * strong ES model) if IP forwarding is disabled and the packet
 	 * is not locally generated and the packet is not subject to
 	 * 'ipfw fwd'.
 	 *
 	 * XXX - Checking also should be disabled if the destination
 	 * address is ipnat'ed to a different interface.
 	 *
 	 * XXX - Checking is incompatible with IP aliases added
 	 * to the loopback interface instead of the interface where
 	 * the packets are received.
 	 *
 	 * XXX - This is the case for carp vhost IPs as well so we
 	 * insert a workaround. If the packet got here, we already
 	 * checked with carp_iamatch() and carp_forus().
 	 */
 	checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 
 	    ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
 	    ifp->if_carp == NULL && (dchg == 0);
 
 	/*
 	 * Check for exact addresses in the hash bucket.
 	 */
 	/* IN_IFADDR_RLOCK(); */
 	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
 		/*
 		 * If the address matches, verify that the packet
 		 * arrived via the correct interface if checking is
 		 * enabled.
 		 */
 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 
 		    (!checkif || ia->ia_ifp == ifp)) {
 			counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 			counter_u64_add(ia->ia_ifa.ifa_ibytes,
 			    m->m_pkthdr.len);
 			/* IN_IFADDR_RUNLOCK(); */
 			goto ours;
 		}
 	}
 	/* IN_IFADDR_RUNLOCK(); */
 
 	/*
 	 * Check for broadcast addresses.
 	 *
 	 * Only accept broadcast packets that arrive via the matching
 	 * interface.  Reception of forwarded directed broadcasts would
 	 * be handled via ip_forward() and ether_output() with the loopback
 	 * into the stack for SIMPLEX interfaces handled by ether_output().
 	 */
 	if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
 		IF_ADDR_RLOCK(ifp);
 	        TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			ia = ifatoia(ifa);
 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
 			    ip->ip_dst.s_addr) {
 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
 				    m->m_pkthdr.len);
 				IF_ADDR_RUNLOCK(ifp);
 				goto ours;
 			}
 #ifdef BOOTP_COMPAT
 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
 				    m->m_pkthdr.len);
 				IF_ADDR_RUNLOCK(ifp);
 				goto ours;
 			}
 #endif
 		}
 		IF_ADDR_RUNLOCK(ifp);
 		ia = NULL;
 	}
 	/* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return;
 	}
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		if (V_ip_mrouter) {
 			/*
 			 * If we are acting as a multicast router, all
 			 * incoming multicast packets are passed to the
 			 * kernel-level multicast forwarding function.
 			 * The packet is returned (relatively) intact; if
 			 * ip_mforward() returns a non-zero value, the packet
 			 * must be discarded, else it may be accepted below.
 			 */
 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
 				IPSTAT_INC(ips_cantforward);
 				m_freem(m);
 				return;
 			}
 
 			/*
 			 * The process-level routing daemon needs to receive
 			 * all multicast IGMP packets, whether or not this
 			 * host belongs to their destination groups.
 			 */
 			if (ip->ip_p == IPPROTO_IGMP)
 				goto ours;
 			IPSTAT_INC(ips_forward);
 		}
 		/*
 		 * Assume the packet is for us, to avoid prematurely taking
 		 * a lock on the in_multi hash. Protocols must perform
 		 * their own filtering and update statistics accordingly.
 		 */
 		goto ours;
 	}
 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
 		goto ours;
 	if (ip->ip_dst.s_addr == INADDR_ANY)
 		goto ours;
 
 	/*
 	 * Not for us; forward if possible and desirable.
 	 */
 	if (V_ipforwarding == 0) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 	} else {
 		ip_forward(m, dchg);
 	}
 	return;
 
 ours:
 #ifdef IPSTEALTH
 	/*
 	 * IPSTEALTH: Process non-routing options only
 	 * if the packet is destined for us.
 	 */
 	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
 		return;
 #endif /* IPSTEALTH */
 
 	/*
 	 * Attempt reassembly; if it succeeds, proceed.
 	 * ip_reass() will return a different mbuf.
 	 */
 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
 		/* XXXGL: shouldn't we save & set m_flags? */
 		m = ip_reass(m);
 		if (m == NULL)
 			return;
 		ip = mtod(m, struct ip *);
 		/* Get the header length of the reassembled packet */
 		hlen = ip->ip_hl << 2;
 	}
 
 #ifdef IPSEC
 	/*
 	 * enforce IPsec policy checking if we are seeing last header.
 	 * note that we do not visit this with protocols with pcb layer
 	 * code - like udp/tcp/raw ip.
 	 */
 	if (ip_ipsec_input(m, ip->ip_p) != 0)
 		goto bad;
 #endif /* IPSEC */
 
 	/*
 	 * Switch out to protocol's input routine.
 	 */
 	IPSTAT_INC(ips_delivered);
 
 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
 	return;
 bad:
 	m_freem(m);
 }
 
 /*
  * After maxnipq has been updated, propagate the change to UMA.  The UMA zone
  * max has slightly different semantics than the sysctl, for historical
  * reasons.
  */
 static void
 maxnipq_update(void)
 {
 
 	/*
 	 * -1 for unlimited allocation.
 	 */
 	if (V_maxnipq < 0)
 		uma_zone_set_max(V_ipq_zone, 0);
 	/*
 	 * Positive number for specific bound.
 	 */
 	if (V_maxnipq > 0)
 		uma_zone_set_max(V_ipq_zone, V_maxnipq);
 	/*
 	 * Zero specifies no further fragment queue allocation -- set the
 	 * bound very low, but rely on implementation elsewhere to actually
 	 * prevent allocation and reclaim current queues.
 	 */
 	if (V_maxnipq == 0)
 		uma_zone_set_max(V_ipq_zone, 1);
 }
 
 static void
 ipq_zone_change(void *tag)
 {
 
 	if (V_maxnipq > 0 && V_maxnipq < (nmbclusters / 32)) {
 		V_maxnipq = nmbclusters / 32;
 		maxnipq_update();
 	}
 }
 
 static int
 sysctl_maxnipq(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 
 	i = V_maxnipq;
 	error = sysctl_handle_int(oidp, &i, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	/*
 	 * XXXRW: Might be a good idea to sanity check the argument and place
 	 * an extreme upper bound.
 	 */
 	if (i < -1)
 		return (EINVAL);
 	V_maxnipq = i;
 	maxnipq_update();
 	return (0);
 }
 
 SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLTYPE_INT|CTLFLAG_RW,
     NULL, 0, sysctl_maxnipq, "I",
     "Maximum number of IPv4 fragment reassembly queue entries");
 
 #define	M_IP_FRAG	M_PROTO9
 
 /*
  * Take incoming datagram fragment and try to reassemble it into
  * whole datagram.  If the argument is the first fragment or one
  * in between the function will return NULL and store the mbuf
  * in the fragment chain.  If the argument is the last fragment
  * the packet will be reassembled and the pointer to the new
  * mbuf returned for further processing.  Only m_tags attached
  * to the first packet/fragment are preserved.
  * The IP header is *NOT* adjusted out of iplen.
  */
 struct mbuf *
 ip_reass(struct mbuf *m)
 {
 	struct ip *ip;
 	struct mbuf *p, *q, *nq, *t;
 	struct ipq *fp = NULL;
 	struct ipqhead *head;
 	int i, hlen, next;
 	u_int8_t ecn, ecn0;
 	u_short hash;
 #ifdef	RSS
 	uint32_t rss_hash, rss_type;
 #endif
 
 	/* If maxnipq or maxfragsperpacket are 0, never accept fragments. */
 	if (V_maxnipq == 0 || V_maxfragsperpacket == 0) {
 		IPSTAT_INC(ips_fragments);
 		IPSTAT_INC(ips_fragdropped);
 		m_freem(m);
 		return (NULL);
 	}
 
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 
 	hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
 	head = &V_ipq[hash];
 	IPQ_LOCK();
 
 	/*
 	 * Look for queue of fragments
 	 * of this datagram.
 	 */
 	TAILQ_FOREACH(fp, head, ipq_list)
 		if (ip->ip_id == fp->ipq_id &&
 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
 #ifdef MAC
 		    mac_ipq_match(m, fp) &&
 #endif
 		    ip->ip_p == fp->ipq_p)
 			goto found;
 
 	fp = NULL;
 
 	/*
 	 * Attempt to trim the number of allocated fragment queues if it
 	 * exceeds the administrative limit.
 	 */
 	if ((V_nipq > V_maxnipq) && (V_maxnipq > 0)) {
 		/*
 		 * drop something from the tail of the current queue
 		 * before proceeding further
 		 */
 		struct ipq *q = TAILQ_LAST(head, ipqhead);
 		if (q == NULL) {   /* gak */
 			for (i = 0; i < IPREASS_NHASH; i++) {
 				struct ipq *r = TAILQ_LAST(&V_ipq[i], ipqhead);
 				if (r) {
 					IPSTAT_ADD(ips_fragtimeout,
 					    r->ipq_nfrags);
 					ip_freef(&V_ipq[i], r);
 					break;
 				}
 			}
 		} else {
 			IPSTAT_ADD(ips_fragtimeout, q->ipq_nfrags);
 			ip_freef(head, q);
 		}
 	}
 
 found:
 	/*
 	 * Adjust ip_len to not reflect header,
 	 * convert offset of this to bytes.
 	 */
 	ip->ip_len = htons(ntohs(ip->ip_len) - hlen);
 	if (ip->ip_off & htons(IP_MF)) {
 		/*
 		 * Make sure that fragments have a data length
 		 * that's a non-zero multiple of 8 bytes.
 		 */
 		if (ip->ip_len == htons(0) || (ntohs(ip->ip_len) & 0x7) != 0) {
 			IPSTAT_INC(ips_toosmall); /* XXX */
 			goto dropfrag;
 		}
 		m->m_flags |= M_IP_FRAG;
 	} else
 		m->m_flags &= ~M_IP_FRAG;
 	ip->ip_off = htons(ntohs(ip->ip_off) << 3);
 
 	/*
 	 * Attempt reassembly; if it succeeds, proceed.
 	 * ip_reass() will return a different mbuf.
 	 */
 	IPSTAT_INC(ips_fragments);
 	m->m_pkthdr.PH_loc.ptr = ip;
 
 	/* Previous ip_reass() started here. */
 	/*
 	 * Presence of header sizes in mbufs
 	 * would confuse code below.
 	 */
 	m->m_data += hlen;
 	m->m_len -= hlen;
 
 	/*
 	 * If first fragment to arrive, create a reassembly queue.
 	 */
 	if (fp == NULL) {
 		fp = uma_zalloc(V_ipq_zone, M_NOWAIT);
 		if (fp == NULL)
 			goto dropfrag;
 #ifdef MAC
 		if (mac_ipq_init(fp, M_NOWAIT) != 0) {
 			uma_zfree(V_ipq_zone, fp);
 			fp = NULL;
 			goto dropfrag;
 		}
 		mac_ipq_create(m, fp);
 #endif
 		TAILQ_INSERT_HEAD(head, fp, ipq_list);
 		V_nipq++;
 		fp->ipq_nfrags = 1;
 		fp->ipq_ttl = IPFRAGTTL;
 		fp->ipq_p = ip->ip_p;
 		fp->ipq_id = ip->ip_id;
 		fp->ipq_src = ip->ip_src;
 		fp->ipq_dst = ip->ip_dst;
 		fp->ipq_frags = m;
 		m->m_nextpkt = NULL;
 		goto done;
 	} else {
 		fp->ipq_nfrags++;
 #ifdef MAC
 		mac_ipq_update(m, fp);
 #endif
 	}
 
 #define GETIP(m)	((struct ip*)((m)->m_pkthdr.PH_loc.ptr))
 
 	/*
 	 * Handle ECN by comparing this segment with the first one;
 	 * if CE is set, do not lose CE.
 	 * drop if CE and not-ECT are mixed for the same packet.
 	 */
 	ecn = ip->ip_tos & IPTOS_ECN_MASK;
 	ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK;
 	if (ecn == IPTOS_ECN_CE) {
 		if (ecn0 == IPTOS_ECN_NOTECT)
 			goto dropfrag;
 		if (ecn0 != IPTOS_ECN_CE)
 			GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE;
 	}
 	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
 		goto dropfrag;
 
 	/*
 	 * Find a segment which begins after this one does.
 	 */
 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
 		if (ntohs(GETIP(q)->ip_off) > ntohs(ip->ip_off))
 			break;
 
 	/*
 	 * If there is a preceding segment, it may provide some of
 	 * our data already.  If so, drop the data from the incoming
 	 * segment.  If it provides all of our data, drop us, otherwise
 	 * stick new segment in the proper place.
 	 *
 	 * If some of the data is dropped from the preceding
 	 * segment, then it's checksum is invalidated.
 	 */
 	if (p) {
 		i = ntohs(GETIP(p)->ip_off) + ntohs(GETIP(p)->ip_len) -
 		    ntohs(ip->ip_off);
 		if (i > 0) {
 			if (i >= ntohs(ip->ip_len))
 				goto dropfrag;
 			m_adj(m, i);
 			m->m_pkthdr.csum_flags = 0;
 			ip->ip_off = htons(ntohs(ip->ip_off) + i);
 			ip->ip_len = htons(ntohs(ip->ip_len) - i);
 		}
 		m->m_nextpkt = p->m_nextpkt;
 		p->m_nextpkt = m;
 	} else {
 		m->m_nextpkt = fp->ipq_frags;
 		fp->ipq_frags = m;
 	}
 
 	/*
 	 * While we overlap succeeding segments trim them or,
 	 * if they are completely covered, dequeue them.
 	 */
 	for (; q != NULL && ntohs(ip->ip_off) + ntohs(ip->ip_len) >
 	    ntohs(GETIP(q)->ip_off); q = nq) {
 		i = (ntohs(ip->ip_off) + ntohs(ip->ip_len)) -
 		    ntohs(GETIP(q)->ip_off);
 		if (i < ntohs(GETIP(q)->ip_len)) {
 			GETIP(q)->ip_len = htons(ntohs(GETIP(q)->ip_len) - i);
 			GETIP(q)->ip_off = htons(ntohs(GETIP(q)->ip_off) + i);
 			m_adj(q, i);
 			q->m_pkthdr.csum_flags = 0;
 			break;
 		}
 		nq = q->m_nextpkt;
 		m->m_nextpkt = nq;
 		IPSTAT_INC(ips_fragdropped);
 		fp->ipq_nfrags--;
 		m_freem(q);
 	}
 
 	/*
 	 * Check for complete reassembly and perform frag per packet
 	 * limiting.
 	 *
 	 * Frag limiting is performed here so that the nth frag has
 	 * a chance to complete the packet before we drop the packet.
 	 * As a result, n+1 frags are actually allowed per packet, but
 	 * only n will ever be stored. (n = maxfragsperpacket.)
 	 *
 	 */
 	next = 0;
 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
 		if (ntohs(GETIP(q)->ip_off) != next) {
 			if (fp->ipq_nfrags > V_maxfragsperpacket) {
 				IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
 				ip_freef(head, fp);
 			}
 			goto done;
 		}
 		next += ntohs(GETIP(q)->ip_len);
 	}
 	/* Make sure the last packet didn't have the IP_MF flag */
 	if (p->m_flags & M_IP_FRAG) {
 		if (fp->ipq_nfrags > V_maxfragsperpacket) {
 			IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
 			ip_freef(head, fp);
 		}
 		goto done;
 	}
 
 	/*
 	 * Reassembly is complete.  Make sure the packet is a sane size.
 	 */
 	q = fp->ipq_frags;
 	ip = GETIP(q);
 	if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
 		IPSTAT_INC(ips_toolong);
 		IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
 		ip_freef(head, fp);
 		goto done;
 	}
 
 	/*
 	 * Concatenate fragments.
 	 */
 	m = q;
 	t = m->m_next;
 	m->m_next = NULL;
 	m_cat(m, t);
 	nq = q->m_nextpkt;
 	q->m_nextpkt = NULL;
 	for (q = nq; q != NULL; q = nq) {
 		nq = q->m_nextpkt;
 		q->m_nextpkt = NULL;
 		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
 		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
 		m_cat(m, q);
 	}
 	/*
 	 * In order to do checksumming faster we do 'end-around carry' here
 	 * (and not in for{} loop), though it implies we are not going to
 	 * reassemble more than 64k fragments.
 	 */
 	while (m->m_pkthdr.csum_data & 0xffff0000)
 		m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
 		    (m->m_pkthdr.csum_data >> 16);
 #ifdef MAC
 	mac_ipq_reassemble(fp, m);
 	mac_ipq_destroy(fp);
 #endif
 
 	/*
 	 * Create header for new ip packet by modifying header of first
 	 * packet;  dequeue and discard fragment reassembly header.
 	 * Make header visible.
 	 */
 	ip->ip_len = htons((ip->ip_hl << 2) + next);
 	ip->ip_src = fp->ipq_src;
 	ip->ip_dst = fp->ipq_dst;
 	TAILQ_REMOVE(head, fp, ipq_list);
 	V_nipq--;
 	uma_zfree(V_ipq_zone, fp);
 	m->m_len += (ip->ip_hl << 2);
 	m->m_data -= (ip->ip_hl << 2);
 	/* some debugging cruft by sklower, below, will go away soon */
 	if (m->m_flags & M_PKTHDR)	/* XXX this should be done elsewhere */
 		m_fixhdr(m);
 	IPSTAT_INC(ips_reassembled);
 	IPQ_UNLOCK();
 
 #ifdef	RSS
 	/*
 	 * Query the RSS layer for the flowid / flowtype for the
 	 * mbuf payload.
 	 *
 	 * For now, just assume we have to calculate a new one.
 	 * Later on we should check to see if the assigned flowid matches
 	 * what RSS wants for the given IP protocol and if so, just keep it.
 	 *
 	 * We then queue into the relevant netisr so it can be dispatched
 	 * to the correct CPU.
 	 *
 	 * Note - this may return 1, which means the flowid in the mbuf
 	 * is correct for the configured RSS hash types and can be used.
 	 */
 	if (rss_mbuf_software_hash_v4(m, 0, &rss_hash, &rss_type) == 0) {
 		m->m_pkthdr.flowid = rss_hash;
 		M_HASHTYPE_SET(m, rss_type);
 	}
 
 	/*
 	 * Queue/dispatch for reprocessing.
 	 *
 	 * Note: this is much slower than just handling the frame in the
 	 * current receive context.  It's likely worth investigating
 	 * why this is.
 	 */
 	netisr_dispatch(NETISR_IP_DIRECT, m);
 	return (NULL);
 #endif
 
 	/* Handle in-line */
 	return (m);
 
 dropfrag:
 	IPSTAT_INC(ips_fragdropped);
 	if (fp != NULL)
 		fp->ipq_nfrags--;
 	m_freem(m);
 done:
 	IPQ_UNLOCK();
 	return (NULL);
 
 #undef GETIP
 }
 
 /*
  * Free a fragment reassembly header and all
  * associated datagrams.
  */
 static void
 ip_freef(struct ipqhead *fhp, struct ipq *fp)
 {
 	struct mbuf *q;
 
 	IPQ_LOCK_ASSERT();
 
 	while (fp->ipq_frags) {
 		q = fp->ipq_frags;
 		fp->ipq_frags = q->m_nextpkt;
 		m_freem(q);
 	}
 	TAILQ_REMOVE(fhp, fp, ipq_list);
 	uma_zfree(V_ipq_zone, fp);
 	V_nipq--;
 }
 
 /*
  * IP timer processing;
  * if a timer expires on a reassembly
  * queue, discard it.
  */
 void
 ip_slowtimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 	struct ipq *fp;
 	int i;
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	IPQ_LOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		for (i = 0; i < IPREASS_NHASH; i++) {
 			for(fp = TAILQ_FIRST(&V_ipq[i]); fp;) {
 				struct ipq *fpp;
 
 				fpp = fp;
 				fp = TAILQ_NEXT(fp, ipq_list);
 				if(--fpp->ipq_ttl == 0) {
 					IPSTAT_ADD(ips_fragtimeout,
 					    fpp->ipq_nfrags);
 					ip_freef(&V_ipq[i], fpp);
 				}
 			}
 		}
 		/*
 		 * If we are over the maximum number of fragments
 		 * (due to the limit being lowered), drain off
 		 * enough to get down to the new limit.
 		 */
 		if (V_maxnipq >= 0 && V_nipq > V_maxnipq) {
 			for (i = 0; i < IPREASS_NHASH; i++) {
 				while (V_nipq > V_maxnipq &&
 				    !TAILQ_EMPTY(&V_ipq[i])) {
 					IPSTAT_ADD(ips_fragdropped,
 					    TAILQ_FIRST(&V_ipq[i])->ipq_nfrags);
 					ip_freef(&V_ipq[i],
 					    TAILQ_FIRST(&V_ipq[i]));
 				}
 			}
 		}
 		CURVNET_RESTORE();
 	}
 	IPQ_UNLOCK();
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * Drain off all datagram fragments.
  */
 static void
 ip_drain_locked(void)
 {
 	int     i;
 
 	IPQ_LOCK_ASSERT();
 
 	for (i = 0; i < IPREASS_NHASH; i++) {
 		while(!TAILQ_EMPTY(&V_ipq[i])) {
 			IPSTAT_ADD(ips_fragdropped,
 			    TAILQ_FIRST(&V_ipq[i])->ipq_nfrags);
 			ip_freef(&V_ipq[i], TAILQ_FIRST(&V_ipq[i]));
 		}
 	}
 }
 
 void
 ip_drain(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	IPQ_LOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		ip_drain_locked();
 		CURVNET_RESTORE();
 	}
 	IPQ_UNLOCK();
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * The protocol to be inserted into ip_protox[] must be already registered
  * in inetsw[], either statically or through pf_proto_register().
  */
 int
 ipproto_register(short ipproto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
 		return (EPROTONOSUPPORT);
 
 	/*
 	 * The protocol slot must not be occupied by another protocol
 	 * already.  An index pointing to IPPROTO_RAW is unused.
 	 */
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
 		return (EEXIST);
 
 	/* Find the protocol position in inetsw[] and set the index. */
 	for (pr = inetdomain.dom_protosw;
 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
 			ip_protox[pr->pr_protocol] = pr - inetsw;
 			return (0);
 		}
 	}
 	return (EPROTONOSUPPORT);
 }
 
 int
 ipproto_unregister(short ipproto)
 {
 	struct protosw *pr;
 
 	/* Sanity checks. */
 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
 		return (EPROTONOSUPPORT);
 
 	/* Check if the protocol was indeed registered. */
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == NULL)
 		return (EPFNOSUPPORT);
 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
 		return (ENOENT);
 
 	/* Reset the protocol slot to IPPROTO_RAW. */
 	ip_protox[ipproto] = pr - inetsw;
 	return (0);
 }
 
 /*
  * Given address of next destination (final or next hop), return (referenced)
  * internet address info of interface to be used to get there.
  */
 struct in_ifaddr *
 ip_rtaddr(struct in_addr dst, u_int fibnum)
 {
 	struct route sro;
 	struct sockaddr_in *sin;
 	struct in_ifaddr *ia;
 
 	bzero(&sro, sizeof(sro));
 	sin = (struct sockaddr_in *)&sro.ro_dst;
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = dst;
 	in_rtalloc_ign(&sro, 0, fibnum);
 
 	if (sro.ro_rt == NULL)
 		return (NULL);
 
 	ia = ifatoia(sro.ro_rt->rt_ifa);
 	ifa_ref(&ia->ia_ifa);
 	RTFREE(sro.ro_rt);
 	return (ia);
 }
 
 u_char inetctlerrmap[PRC_NCMDS] = {
 	0,		0,		0,		0,
 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
 	0,		0,		EHOSTUNREACH,	0,
 	ENOPROTOOPT,	ECONNREFUSED
 };
 
 /*
  * Forward a packet.  If some error occurs return the sender
  * an icmp packet.  Note we can't always generate a meaningful
  * icmp message because icmp doesn't have a large enough repertoire
  * of codes and types.
  *
  * If not forwarding, just drop the packet.  This could be confusing
  * if ipforwarding was zero but some routing protocol was advancing
  * us as a gateway to somewhere.  However, we must let the routing
  * protocol deal with that.
  *
  * The srcrt parameter indicates whether the packet is being forwarded
  * via a source route.
  */
 void
 ip_forward(struct mbuf *m, int srcrt)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	struct in_ifaddr *ia;
 	struct mbuf *mcopy;
 	struct in_addr dest;
 	struct route ro;
 	int error, type = 0, code = 0, mtu = 0;
 
 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return;
 	}
 #ifdef IPSEC
 	if (ip_ipsec_fwd(m) != 0) {
 		IPSTAT_INC(ips_cantforward);
 		m_freem(m);
 		return;
 	}
 #endif /* IPSEC */
 #ifdef IPSTEALTH
 	if (!V_ipstealth) {
 #endif
 		if (ip->ip_ttl <= IPTTLDEC) {
 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
 			    0, 0);
 			return;
 		}
 #ifdef IPSTEALTH
 	}
 #endif
 
 	ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m));
 #ifndef IPSEC
 	/*
 	 * 'ia' may be NULL if there is no route for this destination.
 	 * In case of IPsec, Don't discard it just yet, but pass it to
 	 * ip_output in case of outgoing IPsec policy.
 	 */
 	if (!srcrt && ia == NULL) {
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
 		return;
 	}
 #endif
 
 	/*
 	 * Save the IP header and at most 8 bytes of the payload,
 	 * in case we need to generate an ICMP message to the src.
 	 *
 	 * XXX this can be optimized a lot by saving the data in a local
 	 * buffer on the stack (72 bytes at most), and only allocating the
 	 * mbuf if really necessary. The vast majority of the packets
 	 * are forwarded without having to send an ICMP back (either
 	 * because unnecessary, or because rate limited), so we are
 	 * really we are wasting a lot of work here.
 	 *
 	 * We don't use m_copy() because it might return a reference
 	 * to a shared cluster. Both this function and ip_output()
 	 * assume exclusive access to the IP header in `m', so any
 	 * data in a cluster may change before we reach icmp_error().
 	 */
 	mcopy = m_gethdr(M_NOWAIT, m->m_type);
 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
 		/*
 		 * It's probably ok if the pkthdr dup fails (because
 		 * the deep copy of the tag chain failed), but for now
 		 * be conservative and just discard the copy since
 		 * code below may some day want the tags.
 		 */
 		m_free(mcopy);
 		mcopy = NULL;
 	}
 	if (mcopy != NULL) {
 		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
 		mcopy->m_pkthdr.len = mcopy->m_len;
 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
 	}
 
 #ifdef IPSTEALTH
 	if (!V_ipstealth) {
 #endif
 		ip->ip_ttl -= IPTTLDEC;
 #ifdef IPSTEALTH
 	}
 #endif
 
 	/*
 	 * If forwarding packet using same interface that it came in on,
 	 * perhaps should send a redirect to sender to shortcut a hop.
 	 * Only send redirect if source is sending directly to us,
 	 * and if packet was not source routed (or has any options).
 	 * Also, don't send redirect if forwarding using a default route
 	 * or a route modified by a redirect.
 	 */
 	dest.s_addr = 0;
 	if (!srcrt && V_ipsendredirects &&
 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
 		struct sockaddr_in *sin;
 		struct rtentry *rt;
 
 		bzero(&ro, sizeof(ro));
 		sin = (struct sockaddr_in *)&ro.ro_dst;
 		sin->sin_family = AF_INET;
 		sin->sin_len = sizeof(*sin);
 		sin->sin_addr = ip->ip_dst;
 		in_rtalloc_ign(&ro, 0, M_GETFIB(m));
 
 		rt = ro.ro_rt;
 
 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
 		    satosin(rt_key(rt))->sin_addr.s_addr != 0) {
 #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
 			u_long src = ntohl(ip->ip_src.s_addr);
 
 			if (RTA(rt) &&
 			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
 				if (rt->rt_flags & RTF_GATEWAY)
 					dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr;
 				else
 					dest.s_addr = ip->ip_dst.s_addr;
 				/* Router requirements says to only send host redirects */
 				type = ICMP_REDIRECT;
 				code = ICMP_REDIRECT_HOST;
 			}
 		}
 		if (rt)
 			RTFREE(rt);
 	}
 
 	/*
 	 * Try to cache the route MTU from ip_output so we can consider it for
 	 * the ICMP_UNREACH_NEEDFRAG "Next-Hop MTU" field described in RFC1191.
 	 */
 	bzero(&ro, sizeof(ro));
 
 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
 
 	if (error == EMSGSIZE && ro.ro_rt)
 		mtu = ro.ro_rt->rt_mtu;
 	RO_RTFREE(&ro);
 
 	if (error)
 		IPSTAT_INC(ips_cantforward);
 	else {
 		IPSTAT_INC(ips_forward);
 		if (type)
 			IPSTAT_INC(ips_redirectsent);
 		else {
 			if (mcopy)
 				m_freem(mcopy);
 			if (ia != NULL)
 				ifa_free(&ia->ia_ifa);
 			return;
 		}
 	}
 	if (mcopy == NULL) {
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);
 		return;
 	}
 
 	switch (error) {
 
 	case 0:				/* forwarded, but need redirect */
 		/* type, code set above */
 		break;
 
 	case ENETUNREACH:
 	case EHOSTUNREACH:
 	case ENETDOWN:
 	case EHOSTDOWN:
 	default:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_HOST;
 		break;
 
 	case EMSGSIZE:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_NEEDFRAG;
 
 #ifdef IPSEC
 		/* 
 		 * If IPsec is configured for this path,
 		 * override any possibly mtu value set by ip_output.
 		 */ 
 		mtu = ip_ipsec_mtu(mcopy, mtu);
 #endif /* IPSEC */
 		/*
 		 * If the MTU was set before make sure we are below the
 		 * interface MTU.
 		 * If the MTU wasn't set before use the interface mtu or
 		 * fall back to the next smaller mtu step compared to the
 		 * current packet size.
 		 */
 		if (mtu != 0) {
 			if (ia != NULL)
 				mtu = min(mtu, ia->ia_ifp->if_mtu);
 		} else {
 			if (ia != NULL)
 				mtu = ia->ia_ifp->if_mtu;
 			else
 				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
 		}
 		IPSTAT_INC(ips_cantfrag);
 		break;
 
 	case ENOBUFS:
 	case EACCES:			/* ipfw denied packet */
 		m_freem(mcopy);
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);
 		return;
 	}
 	if (ia != NULL)
 		ifa_free(&ia->ia_ifa);
 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
 }
 
 void
 ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
     struct mbuf *m)
 {
 
 	if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
 		struct bintime bt;
 
 		bintime(&bt);
 		if (inp->inp_socket->so_options & SO_BINTIME) {
 			*mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
 			    SCM_BINTIME, SOL_SOCKET);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 		if (inp->inp_socket->so_options & SO_TIMESTAMP) {
 			struct timeval tv;
 
 			bintime2timeval(&bt, &tv);
 			*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
 			    SCM_TIMESTAMP, SOL_SOCKET);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 	}
 	if (inp->inp_flags & INP_RECVDSTADDR) {
 		*mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVTTL) {
 		*mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #ifdef notyet
 	/* XXX
 	 * Moving these out of udp_input() made them even more broken
 	 * than they already were.
 	 */
 	/* options were tossed already */
 	if (inp->inp_flags & INP_RECVOPTS) {
 		*mp = sbcreatecontrol((caddr_t)opts_deleted_above,
 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	/* ip_srcroute doesn't do what we want here, need to fix */
 	if (inp->inp_flags & INP_RECVRETOPTS) {
 		*mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #endif
 	if (inp->inp_flags & INP_RECVIF) {
 		struct ifnet *ifp;
 		struct sdlbuf {
 			struct sockaddr_dl sdl;
 			u_char	pad[32];
 		} sdlbuf;
 		struct sockaddr_dl *sdp;
 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
 
 		if ((ifp = m->m_pkthdr.rcvif) &&
 		    ifp->if_index && ifp->if_index <= V_if_index) {
 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
 			/*
 			 * Change our mind and don't try copy.
 			 */
 			if (sdp->sdl_family != AF_LINK ||
 			    sdp->sdl_len > sizeof(sdlbuf)) {
 				goto makedummy;
 			}
 			bcopy(sdp, sdl2, sdp->sdl_len);
 		} else {
 makedummy:	
 			sdl2->sdl_len =
 			    offsetof(struct sockaddr_dl, sdl_data[0]);
 			sdl2->sdl_family = AF_LINK;
 			sdl2->sdl_index = 0;
 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
 		}
 		*mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
 		    IP_RECVIF, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVTOS) {
 		*mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
 		    sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	if (inp->inp_flags2 & INP_RECVFLOWID) {
 		uint32_t flowid, flow_type;
 
 		flowid = m->m_pkthdr.flowid;
 		flow_type = M_HASHTYPE_GET(m);
 
 		/*
 		 * XXX should handle the failure of one or the
 		 * other - don't populate both?
 		 */
 		*mp = sbcreatecontrol((caddr_t) &flowid,
 		    sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 		*mp = sbcreatecontrol((caddr_t) &flow_type,
 		    sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 #ifdef	RSS
 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
 		uint32_t flowid, flow_type;
 		uint32_t rss_bucketid;
 
 		flowid = m->m_pkthdr.flowid;
 		flow_type = M_HASHTYPE_GET(m);
 
 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
 			*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
 			   sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
 			if (*mp)
 				mp = &(*mp)->m_next;
 		}
 	}
 #endif
 }
 
 /*
  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
  * compiled.
  */
 static VNET_DEFINE(int, ip_rsvp_on);
 VNET_DEFINE(struct socket *, ip_rsvpd);
 
 #define	V_ip_rsvp_on		VNET(ip_rsvp_on)
 
 int
 ip_rsvp_init(struct socket *so)
 {
 
 	if (so->so_type != SOCK_RAW ||
 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
 		return EOPNOTSUPP;
 
 	if (V_ip_rsvpd != NULL)
 		return EADDRINUSE;
 
 	V_ip_rsvpd = so;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-increment
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (!V_ip_rsvp_on) {
 		V_ip_rsvp_on = 1;
 		V_rsvp_on++;
 	}
 
 	return 0;
 }
 
 int
 ip_rsvp_done(void)
 {
 
 	V_ip_rsvpd = NULL;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-decrement
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (V_ip_rsvp_on) {
 		V_ip_rsvp_on = 0;
 		V_rsvp_on--;
 	}
 	return 0;
 }
 
 int
 rsvp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m;
 
 	m = *mp;
 	*mp = NULL;
 
 	if (rsvp_input_p) { /* call the real one if loaded */
 		*mp = m;
 		rsvp_input_p(mp, offp, proto);
 		return (IPPROTO_DONE);
 	}
 
 	/* Can still get packets with rsvp_on = 0 if there is a local member
 	 * of the group to which the RSVP packet is addressed.  But in this
 	 * case we want to throw the packet away.
 	 */
 	
 	if (!V_rsvp_on) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	if (V_ip_rsvpd != NULL) { 
 		*mp = m;
 		rip_input(mp, offp, proto);
 		return (IPPROTO_DONE);
 	}
 	/* Drop the packet */
 	m_freem(m);
 	return (IPPROTO_DONE);
 }
Index: head/sys/netinet/ip_output.c
===================================================================
--- head/sys/netinet/ip_output.c	(revision 277330)
+++ head/sys/netinet/ip_output.c	(revision 277331)
@@ -1,1373 +1,1374 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_ipfw.h"
 #include "opt_ipsec.h"
 #include "opt_mbuf_stress_test.h"
 #include "opt_mpath.h"
 #include "opt_route.h"
 #include "opt_sctp.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/ucred.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_llatbl.h>
 #include <net/netisr.h>
 #include <net/pfil.h>
 #include <net/route.h>
 #include <net/flowtable.h>
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
+#include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_rss.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #ifdef SCTP
 #include <netinet/sctp.h>
 #include <netinet/sctp_crc32.h>
 #endif
 
 #ifdef IPSEC
 #include <netinet/ip_ipsec.h>
 #include <netipsec/ipsec.h>
 #endif /* IPSEC*/
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE(u_short, ip_id);
 
 #ifdef MBUF_STRESS_TEST
 static int mbuf_frag_size = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
 	&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
 #endif
 
 static void	ip_mloopback
 	(struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
 
 
 extern int in_mcast_loop;
 extern	struct protosw inetsw[];
 
 /*
  * IP output.  The packet in mbuf chain m contains a skeletal IP
  * header (with len, off, ttl, proto, tos, src, dst).
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  * If route ro is present and has ro_rt initialized, route lookup would be
  * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
  * then result of route lookup is stored in ro->ro_rt.
  *
  * In the IP forwarding case, the packet will arrive with options already
  * inserted, so must have a NULL opt pointer.
  */
 int
 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
     struct ip_moptions *imo, struct inpcb *inp)
 {
 	struct ip *ip;
 	struct ifnet *ifp = NULL;	/* keep compiler happy */
 	struct mbuf *m0;
 	int hlen = sizeof (struct ip);
 	int mtu;
 	int error = 0;
 	struct sockaddr_in *dst;
 	const struct sockaddr_in *gw;
 	struct in_ifaddr *ia;
 	int isbroadcast;
 	uint16_t ip_len, ip_off;
 	struct route iproute;
 	struct rtentry *rte;	/* cache for ro->ro_rt */
 	struct in_addr odst;
 	struct m_tag *fwd_tag = NULL;
 	uint32_t fibnum;
 	int have_ia_ref;
 	int needfiblookup;
 #ifdef IPSEC
 	int no_route_but_check_spd = 0;
 #endif
 	M_ASSERTPKTHDR(m);
 
 	if (inp != NULL) {
 		INP_LOCK_ASSERT(inp);
 		M_SETFIB(m, inp->inp_inc.inc_fibnum);
 		if ((flags & IP_NODEFAULTFLOWID) == 0) {
 			m->m_pkthdr.flowid = inp->inp_flowid;
 			M_HASHTYPE_SET(m, inp->inp_flowtype);
 		}
 	}
 
 	if (ro == NULL) {
 		ro = &iproute;
 		bzero(ro, sizeof (*ro));
 	}
 
 #ifdef FLOWTABLE
 	if (ro->ro_rt == NULL)
 		(void )flowtable_lookup(AF_INET, m, ro);
 #endif
 
 	if (opt) {
 		int len = 0;
 		m = ip_insertoptions(m, opt, &len);
 		if (len != 0)
 			hlen = len; /* ip->ip_hl is updated above */
 	}
 	ip = mtod(m, struct ip *);
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
 	/*
 	 * Fill in IP header.  If we are not allowing fragmentation,
 	 * then the ip_id field is meaningless, but we don't set it
 	 * to zero.  Doing so causes various problems when devices along
 	 * the path (routers, load balancers, firewalls, etc.) illegally
 	 * disable DF on our packet.  Note that a 16-bit counter
 	 * will wrap around in less than 10 seconds at 100 Mbit/s on a
 	 * medium with MTU 1500.  See Steven M. Bellovin, "A Technique
 	 * for Counting NATted Hosts", Proc. IMW'02, available at
 	 * <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>.
 	 */
 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = hlen >> 2;
 		ip->ip_id = ip_newid();
 		IPSTAT_INC(ips_localout);
 	} else {
 		/* Header already set, fetch hlen from there */
 		hlen = ip->ip_hl << 2;
 	}
 
 	/*
 	 * dst/gw handling:
 	 *
 	 * dst can be rewritten but always points to &ro->ro_dst.
 	 * gw is readonly but can point either to dst OR rt_gateway,
 	 * therefore we need restore gw if we're redoing lookup.
 	 */
 	gw = dst = (struct sockaddr_in *)&ro->ro_dst;
 	fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
 again:
 	ia = NULL;
 	have_ia_ref = 0;
 	/*
 	 * If there is a cached route, check that it is to the same
 	 * destination and is still up.  If not, free it and try again.
 	 * The address family should also be checked in case of sharing
 	 * the cache with IPv6.
 	 */
 	rte = ro->ro_rt;
 	if (rte && ((rte->rt_flags & RTF_UP) == 0 ||
 		    rte->rt_ifp == NULL ||
 		    !RT_LINK_IS_UP(rte->rt_ifp) ||
 			  dst->sin_family != AF_INET ||
 			  dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
 		RO_RTFREE(ro);
 		ro->ro_lle = NULL;
 		rte = NULL;
 		gw = dst;
 	}
 	if (rte == NULL && fwd_tag == NULL) {
 		bzero(dst, sizeof(*dst));
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = ip->ip_dst;
 	}
 	/*
 	 * If routing to interface only, short circuit routing lookup.
 	 * The use of an all-ones broadcast address implies this; an
 	 * interface is specified by the broadcast address of an interface,
 	 * or the destination address of a ptp interface.
 	 */
 	if (flags & IP_SENDONES) {
 		if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst),
 						      M_GETFIB(m)))) == NULL &&
 		    (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
 						    M_GETFIB(m)))) == NULL) {
 			IPSTAT_INC(ips_noroute);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		have_ia_ref = 1;
 		ip->ip_dst.s_addr = INADDR_BROADCAST;
 		dst->sin_addr = ip->ip_dst;
 		ifp = ia->ia_ifp;
 		ip->ip_ttl = 1;
 		isbroadcast = 1;
 	} else if (flags & IP_ROUTETOIF) {
 		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
 						    M_GETFIB(m)))) == NULL &&
 		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0,
 						M_GETFIB(m)))) == NULL) {
 			IPSTAT_INC(ips_noroute);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		have_ia_ref = 1;
 		ifp = ia->ia_ifp;
 		ip->ip_ttl = 1;
 		isbroadcast = in_broadcast(dst->sin_addr, ifp);
 	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
 	    imo != NULL && imo->imo_multicast_ifp != NULL) {
 		/*
 		 * Bypass the normal routing lookup for multicast
 		 * packets if the interface is specified.
 		 */
 		ifp = imo->imo_multicast_ifp;
 		IFP_TO_IA(ifp, ia);
 		if (ia)
 			have_ia_ref = 1;
 		isbroadcast = 0;	/* fool gcc */
 	} else {
 		/*
 		 * We want to do any cloning requested by the link layer,
 		 * as this is probably required in all cases for correct
 		 * operation (as it is for ARP).
 		 */
 		if (rte == NULL) {
 #ifdef RADIX_MPATH
 			rtalloc_mpath_fib(ro,
 			    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
 			    fibnum);
 #else
 			in_rtalloc_ign(ro, 0, fibnum);
 #endif
 			rte = ro->ro_rt;
 		}
 		if (rte == NULL ||
 		    rte->rt_ifp == NULL ||
 		    !RT_LINK_IS_UP(rte->rt_ifp)) {
 #ifdef IPSEC
 			/*
 			 * There is no route for this packet, but it is
 			 * possible that a matching SPD entry exists.
 			 */
 			no_route_but_check_spd = 1;
 			mtu = 0; /* Silence GCC warning. */
 			goto sendit;
 #endif
 			IPSTAT_INC(ips_noroute);
 			error = EHOSTUNREACH;
 			goto bad;
 		}
 		ia = ifatoia(rte->rt_ifa);
 		ifp = rte->rt_ifp;
 		counter_u64_add(rte->rt_pksent, 1);
 		if (rte->rt_flags & RTF_GATEWAY)
 			gw = (struct sockaddr_in *)rte->rt_gateway;
 		if (rte->rt_flags & RTF_HOST)
 			isbroadcast = (rte->rt_flags & RTF_BROADCAST);
 		else
 			isbroadcast = in_broadcast(gw->sin_addr, ifp);
 	}
 	/*
 	 * Calculate MTU.  If we have a route that is up, use that,
 	 * otherwise use the interface's MTU.
 	 */
 	if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST)))
 		mtu = rte->rt_mtu;
 	else
 		mtu = ifp->if_mtu;
 	/* Catch a possible divide by zero later. */
 	KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p",
 	    __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp));
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		m->m_flags |= M_MCAST;
 		/*
 		 * IP destination address is multicast.  Make sure "gw"
 		 * still points to the address in "ro".  (It may have been
 		 * changed to point to a gateway address, above.)
 		 */
 		gw = dst;
 		/*
 		 * See if the caller provided any multicast options
 		 */
 		if (imo != NULL) {
 			ip->ip_ttl = imo->imo_multicast_ttl;
 			if (imo->imo_multicast_vif != -1)
 				ip->ip_src.s_addr =
 				    ip_mcast_src ?
 				    ip_mcast_src(imo->imo_multicast_vif) :
 				    INADDR_ANY;
 		} else
 			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
 		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
 			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 				IPSTAT_INC(ips_noroute);
 				error = ENETUNREACH;
 				goto bad;
 			}
 		}
 		/*
 		 * If source address not specified yet, use address
 		 * of outgoing interface.
 		 */
 		if (ip->ip_src.s_addr == INADDR_ANY) {
 			/* Interface may have no addresses. */
 			if (ia != NULL)
 				ip->ip_src = IA_SIN(ia)->sin_addr;
 		}
 
 		if ((imo == NULL && in_mcast_loop) ||
 		    (imo && imo->imo_multicast_loop)) {
 			/*
 			 * Loop back multicast datagram if not expressly
 			 * forbidden to do so, even if we are not a member
 			 * of the group; ip_input() will filter it later,
 			 * thus deferring a hash lookup and mutex acquisition
 			 * at the expense of a cheap copy using m_copym().
 			 */
 			ip_mloopback(ifp, m, dst, hlen);
 		} else {
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
 			 * arrived on the interface to which we are about
 			 * to send.  The multicast forwarding function
 			 * recursively calls this function, using the
 			 * IP_FORWARDING flag to prevent infinite recursion.
 			 *
 			 * Multicasts that are looped back by ip_mloopback(),
 			 * above, will be forwarded by the ip_input() routine,
 			 * if necessary.
 			 */
 			if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) {
 				/*
 				 * If rsvp daemon is not running, do not
 				 * set ip_moptions. This ensures that the packet
 				 * is multicast and not just sent down one link
 				 * as prescribed by rsvpd.
 				 */
 				if (!V_rsvp_on)
 					imo = NULL;
 				if (ip_mforward &&
 				    ip_mforward(ip, ifp, m, imo) != 0) {
 					m_freem(m);
 					goto done;
 				}
 			}
 		}
 
 		/*
 		 * Multicasts with a time-to-live of zero may be looped-
 		 * back, above, but must not be transmitted on a network.
 		 * Also, multicasts addressed to the loopback interface
 		 * are not sent -- the above call to ip_mloopback() will
 		 * loop back a copy. ip_input() will drop the copy if
 		 * this host does not belong to the destination group on
 		 * the loopback interface.
 		 */
 		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
 			m_freem(m);
 			goto done;
 		}
 
 		goto sendit;
 	}
 
 	/*
 	 * If the source address is not specified yet, use the address
 	 * of the outoing interface.
 	 */
 	if (ip->ip_src.s_addr == INADDR_ANY) {
 		/* Interface may have no addresses. */
 		if (ia != NULL) {
 			ip->ip_src = IA_SIN(ia)->sin_addr;
 		}
 	}
 
 	/*
 	 * Look for broadcast address and
 	 * verify user is allowed to send
 	 * such a packet.
 	 */
 	if (isbroadcast) {
 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 		if ((flags & IP_ALLOWBROADCAST) == 0) {
 			error = EACCES;
 			goto bad;
 		}
 		/* don't allow broadcast messages to be fragmented */
 		if (ip_len > mtu) {
 			error = EMSGSIZE;
 			goto bad;
 		}
 		m->m_flags |= M_BCAST;
 	} else {
 		m->m_flags &= ~M_BCAST;
 	}
 
 sendit:
 #ifdef IPSEC
 	switch(ip_ipsec_output(&m, inp, &error)) {
 	case 1:
 		goto bad;
 	case -1:
 		goto done;
 	case 0:
 	default:
 		break;	/* Continue with packet processing. */
 	}
 	/*
 	 * Check if there was a route for this packet; return error if not.
 	 */
 	if (no_route_but_check_spd) {
 		IPSTAT_INC(ips_noroute);
 		error = EHOSTUNREACH;
 		goto bad;
 	}
 	/* Update variables that are affected by ipsec4_output(). */
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 #endif /* IPSEC */
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&V_inet_pfil_hook))
 		goto passout;
 
 	/* Run through list of hooks for output packets. */
 	odst.s_addr = ip->ip_dst.s_addr;
 	error = pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_OUT, inp);
 	if (error != 0 || m == NULL)
 		goto done;
 
 	ip = mtod(m, struct ip *);
 	needfiblookup = 0;
 
 	/* See if destination IP address was changed by packet filter. */
 	if (odst.s_addr != ip->ip_dst.s_addr) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip_input(). */
 		if (in_localip(ip->ip_dst)) {
 			m->m_flags |= M_FASTFWD_OURS;
 			if (m->m_pkthdr.rcvif == NULL)
 				m->m_pkthdr.rcvif = V_loif;
 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 				m->m_pkthdr.csum_flags |=
 				    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 			m->m_pkthdr.csum_flags |=
 			    CSUM_IP_CHECKED | CSUM_IP_VALID;
 #ifdef SCTP
 			if (m->m_pkthdr.csum_flags & CSUM_SCTP)
 				m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 			error = netisr_queue(NETISR_IP, m);
 			goto done;
 		} else {
 			if (have_ia_ref)
 				ifa_free(&ia->ia_ifa);
 			needfiblookup = 1; /* Redo the routing table lookup. */
 		}
 	}
 	/* See if fib was changed by packet filter. */
 	if (fibnum != M_GETFIB(m)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		fibnum = M_GETFIB(m);
 		RO_RTFREE(ro);
 		needfiblookup = 1;
 	}
 	if (needfiblookup)
 		goto again;
 
 	/* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
 	if (m->m_flags & M_FASTFWD_OURS) {
 		if (m->m_pkthdr.rcvif == NULL)
 			m->m_pkthdr.rcvif = V_loif;
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 			m->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP)
 			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 		m->m_pkthdr.csum_flags |=
 			    CSUM_IP_CHECKED | CSUM_IP_VALID;
 
 		error = netisr_queue(NETISR_IP, m);
 		goto done;
 	}
 	/* Or forward to some other address? */
 	if ((m->m_flags & M_IP_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
 		m->m_flags |= M_SKIP_FIREWALL;
 		m->m_flags &= ~M_IP_NEXTHOP;
 		m_tag_delete(m, fwd_tag);
 		if (have_ia_ref)
 			ifa_free(&ia->ia_ifa);
 		goto again;
 	}
 
 passout:
 	/* 127/8 must not appear on wire - RFC1122. */
 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
 			IPSTAT_INC(ips_badaddr);
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 	}
 
 	m->m_pkthdr.csum_flags |= CSUM_IP;
 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
 		in_delayed_cksum(m);
 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #ifdef SCTP
 	if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
 		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
 		m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 #endif
 
 	/*
 	 * If small enough for interface, or the interface will take
 	 * care of the fragmentation for us, we can just send directly.
 	 */
 	if (ip_len <= mtu ||
 	    (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
 		ip->ip_sum = 0;
 		if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
 			ip->ip_sum = in_cksum(m, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_IP;
 		}
 
 		/*
 		 * Record statistics for this interface address.
 		 * With CSUM_TSO the byte/packet count will be slightly
 		 * incorrect because we count the IP+TCP headers only
 		 * once instead of for every generated packet.
 		 */
 		if (!(flags & IP_FORWARDING) && ia) {
 			if (m->m_pkthdr.csum_flags & CSUM_TSO)
 				counter_u64_add(ia->ia_ifa.ifa_opackets,
 				    m->m_pkthdr.len / m->m_pkthdr.tso_segsz);
 			else
 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
 
 			counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len);
 		}
 #ifdef MBUF_STRESS_TEST
 		if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
 			m = m_fragment(m, M_NOWAIT, mbuf_frag_size);
 #endif
 		/*
 		 * Reset layer specific mbuf flags
 		 * to avoid confusing lower layers.
 		 */
 		m_clrprotoflags(m);
 		IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
 		error = (*ifp->if_output)(ifp, m,
 		    (const struct sockaddr *)gw, ro);
 		goto done;
 	}
 
 	/* Balk when DF bit is set or the interface didn't support TSO. */
 	if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
 		error = EMSGSIZE;
 		IPSTAT_INC(ips_cantfrag);
 		goto bad;
 	}
 
 	/*
 	 * Too large for interface; fragment if possible. If successful,
 	 * on return, m will point to a list of packets to be sent.
 	 */
 	error = ip_fragment(ip, &m, mtu, ifp->if_hwassist);
 	if (error)
 		goto bad;
 	for (; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 		if (error == 0) {
 			/* Record statistics for this interface address. */
 			if (ia != NULL) {
 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_obytes,
 				    m->m_pkthdr.len);
 			}
 			/*
 			 * Reset layer specific mbuf flags
 			 * to avoid confusing upper layers.
 			 */
 			m_clrprotoflags(m);
 
 			IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
 			error = (*ifp->if_output)(ifp, m,
 			    (const struct sockaddr *)gw, ro);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		IPSTAT_INC(ips_fragmented);
 
 done:
 	if (ro == &iproute)
 		RO_RTFREE(ro);
 	if (have_ia_ref)
 		ifa_free(&ia->ia_ifa);
 	return (error);
 bad:
 	m_freem(m);
 	goto done;
 }
 
 /*
  * Create a chain of fragments which fit the given mtu. m_frag points to the
  * mbuf to be fragmented; on return it points to the chain with the fragments.
  * Return 0 if no error. If error, m_frag may contain a partially built
  * chain of fragments that should be freed by the caller.
  *
  * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
  */
 int
 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
     u_long if_hwassist_flags)
 {
 	int error = 0;
 	int hlen = ip->ip_hl << 2;
 	int len = (mtu - hlen) & ~7;	/* size of payload in each fragment */
 	int off;
 	struct mbuf *m0 = *m_frag;	/* the original packet		*/
 	int firstlen;
 	struct mbuf **mnext;
 	int nfrags;
 	uint16_t ip_len, ip_off;
 
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
 	if (ip_off & IP_DF) {	/* Fragmentation not allowed */
 		IPSTAT_INC(ips_cantfrag);
 		return EMSGSIZE;
 	}
 
 	/*
 	 * Must be able to put at least 8 bytes per fragment.
 	 */
 	if (len < 8)
 		return EMSGSIZE;
 
 	/*
 	 * If the interface will not calculate checksums on
 	 * fragmented packets, then do it here.
 	 */
 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 		in_delayed_cksum(m0);
 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #ifdef SCTP
 	if (m0->m_pkthdr.csum_flags & CSUM_SCTP) {
 		sctp_delayed_cksum(m0, hlen);
 		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 #endif
 	if (len > PAGE_SIZE) {
 		/*
 		 * Fragment large datagrams such that each segment
 		 * contains a multiple of PAGE_SIZE amount of data,
 		 * plus headers. This enables a receiver to perform
 		 * page-flipping zero-copy optimizations.
 		 *
 		 * XXX When does this help given that sender and receiver
 		 * could have different page sizes, and also mtu could
 		 * be less than the receiver's page size ?
 		 */
 		int newlen;
 		struct mbuf *m;
 
 		for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
 			off += m->m_len;
 
 		/*
 		 * firstlen (off - hlen) must be aligned on an
 		 * 8-byte boundary
 		 */
 		if (off < hlen)
 			goto smart_frag_failure;
 		off = ((off - hlen) & ~7) + hlen;
 		newlen = (~PAGE_MASK) & mtu;
 		if ((newlen + sizeof (struct ip)) > mtu) {
 			/* we failed, go back the default */
 smart_frag_failure:
 			newlen = len;
 			off = hlen + len;
 		}
 		len = newlen;
 
 	} else {
 		off = hlen + len;
 	}
 
 	firstlen = off - hlen;
 	mnext = &m0->m_nextpkt;		/* pointer to next packet */
 
 	/*
 	 * Loop through length of segment after first fragment,
 	 * make new header and copy data of each part and link onto chain.
 	 * Here, m0 is the original packet, m is the fragment being created.
 	 * The fragments are linked off the m_nextpkt of the original
 	 * packet, which after processing serves as the first fragment.
 	 */
 	for (nfrags = 1; off < ip_len; off += len, nfrags++) {
 		struct ip *mhip;	/* ip header on the fragment */
 		struct mbuf *m;
 		int mhlen = sizeof (struct ip);
 
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			error = ENOBUFS;
 			IPSTAT_INC(ips_odropped);
 			goto done;
 		}
 		m->m_flags |= (m0->m_flags & M_MCAST);
 		/*
 		 * In the first mbuf, leave room for the link header, then
 		 * copy the original IP header including options. The payload
 		 * goes into an additional mbuf chain returned by m_copym().
 		 */
 		m->m_data += max_linkhdr;
 		mhip = mtod(m, struct ip *);
 		*mhip = *ip;
 		if (hlen > sizeof (struct ip)) {
 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
 			mhip->ip_v = IPVERSION;
 			mhip->ip_hl = mhlen >> 2;
 		}
 		m->m_len = mhlen;
 		/* XXX do we need to add ip_off below ? */
 		mhip->ip_off = ((off - hlen) >> 3) + ip_off;
 		if (off + len >= ip_len)
 			len = ip_len - off;
 		else
 			mhip->ip_off |= IP_MF;
 		mhip->ip_len = htons((u_short)(len + mhlen));
 		m->m_next = m_copym(m0, off, len, M_NOWAIT);
 		if (m->m_next == NULL) {	/* copy failed */
 			m_free(m);
 			error = ENOBUFS;	/* ??? */
 			IPSTAT_INC(ips_odropped);
 			goto done;
 		}
 		m->m_pkthdr.len = mhlen + len;
 		m->m_pkthdr.rcvif = NULL;
 #ifdef MAC
 		mac_netinet_fragment(m0, m);
 #endif
 		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
 		mhip->ip_off = htons(mhip->ip_off);
 		mhip->ip_sum = 0;
 		if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
 			mhip->ip_sum = in_cksum(m, mhlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_IP;
 		}
 		*mnext = m;
 		mnext = &m->m_nextpkt;
 	}
 	IPSTAT_ADD(ips_ofragments, nfrags);
 
 	/*
 	 * Update first fragment by trimming what's been copied out
 	 * and updating header.
 	 */
 	m_adj(m0, hlen + firstlen - ip_len);
 	m0->m_pkthdr.len = hlen + firstlen;
 	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
 	ip->ip_off = htons(ip_off | IP_MF);
 	ip->ip_sum = 0;
 	if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
 		ip->ip_sum = in_cksum(m0, hlen);
 		m0->m_pkthdr.csum_flags &= ~CSUM_IP;
 	}
 
 done:
 	*m_frag = m0;
 	return error;
 }
 
 void
 in_delayed_cksum(struct mbuf *m)
 {
 	struct ip *ip;
 	uint16_t csum, offset, ip_len;
 
 	ip = mtod(m, struct ip *);
 	offset = ip->ip_hl << 2 ;
 	ip_len = ntohs(ip->ip_len);
 	csum = in_cksum_skip(m, ip_len, offset);
 	if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
 		csum = 0xffff;
 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
 
 	/* find the mbuf in the chain where the checksum starts*/
 	while ((m != NULL) && (offset >= m->m_len)) {
 		offset -= m->m_len;
 		m = m->m_next;
 	}
 	KASSERT(m != NULL, ("in_delayed_cksum: checksum outside mbuf chain."));
 	KASSERT(offset + sizeof(u_short) <= m->m_len, ("in_delayed_cksum: checksum split between mbufs."));
 	*(u_short *)(m->m_data + offset) = csum;
 }
 
 /*
  * IP socket option processing.
  */
 int
 ip_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error, optval;
 #ifdef	RSS
 	uint32_t rss_bucket;
 	int retval;
 #endif
 
 	error = optval = 0;
 	if (sopt->sopt_level != IPPROTO_IP) {
 		error = EINVAL;
 
 		if (sopt->sopt_level == SOL_SOCKET &&
 		    sopt->sopt_dir == SOPT_SET) {
 			switch (sopt->sopt_name) {
 			case SO_REUSEADDR:
 				INP_WLOCK(inp);
 				if ((so->so_options & SO_REUSEADDR) != 0)
 					inp->inp_flags2 |= INP_REUSEADDR;
 				else
 					inp->inp_flags2 &= ~INP_REUSEADDR;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			case SO_REUSEPORT:
 				INP_WLOCK(inp);
 				if ((so->so_options & SO_REUSEPORT) != 0)
 					inp->inp_flags2 |= INP_REUSEPORT;
 				else
 					inp->inp_flags2 &= ~INP_REUSEPORT;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			case SO_SETFIB:
 				INP_WLOCK(inp);
 				inp->inp_inc.inc_fibnum = so->so_fibnum;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			default:
 				break;
 			}
 		}
 		return (error);
 	}
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case IP_OPTIONS:
 #ifdef notyet
 		case IP_RETOPTS:
 #endif
 		{
 			struct mbuf *m;
 			if (sopt->sopt_valsize > MLEN) {
 				error = EMSGSIZE;
 				break;
 			}
 			m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 			if (m == NULL) {
 				error = ENOBUFS;
 				break;
 			}
 			m->m_len = sopt->sopt_valsize;
 			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
 					    m->m_len);
 			if (error) {
 				m_free(m);
 				break;
 			}
 			INP_WLOCK(inp);
 			error = ip_pcbopts(inp, sopt->sopt_name, m);
 			INP_WUNLOCK(inp);
 			return (error);
 		}
 
 		case IP_BINDANY:
 			if (sopt->sopt_td != NULL) {
 				error = priv_check(sopt->sopt_td,
 				    PRIV_NETINET_BINDANY);
 				if (error)
 					break;
 			}
 			/* FALLTHROUGH */
 		case IP_BINDMULTI:
 #ifdef	RSS
 		case IP_RSS_LISTEN_BUCKET:
 #endif
 		case IP_TOS:
 		case IP_TTL:
 		case IP_MINTTL:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_RECVDSTADDR:
 		case IP_RECVTTL:
 		case IP_RECVIF:
 		case IP_ONESBCAST:
 		case IP_DONTFRAG:
 		case IP_RECVTOS:
 		case IP_RECVFLOWID:
 #ifdef	RSS
 		case IP_RECVRSSBUCKETID:
 #endif
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 
 			switch (sopt->sopt_name) {
 			case IP_TOS:
 				inp->inp_ip_tos = optval;
 				break;
 
 			case IP_TTL:
 				inp->inp_ip_ttl = optval;
 				break;
 
 			case IP_MINTTL:
 				if (optval >= 0 && optval <= MAXTTL)
 					inp->inp_ip_minttl = optval;
 				else
 					error = EINVAL;
 				break;
 
 #define	OPTSET(bit) do {						\
 	INP_WLOCK(inp);							\
 	if (optval)							\
 		inp->inp_flags |= bit;					\
 	else								\
 		inp->inp_flags &= ~bit;					\
 	INP_WUNLOCK(inp);						\
 } while (0)
 
 #define	OPTSET2(bit, val) do {						\
 	INP_WLOCK(inp);							\
 	if (val)							\
 		inp->inp_flags2 |= bit;					\
 	else								\
 		inp->inp_flags2 &= ~bit;				\
 	INP_WUNLOCK(inp);						\
 } while (0)
 
 			case IP_RECVOPTS:
 				OPTSET(INP_RECVOPTS);
 				break;
 
 			case IP_RECVRETOPTS:
 				OPTSET(INP_RECVRETOPTS);
 				break;
 
 			case IP_RECVDSTADDR:
 				OPTSET(INP_RECVDSTADDR);
 				break;
 
 			case IP_RECVTTL:
 				OPTSET(INP_RECVTTL);
 				break;
 
 			case IP_RECVIF:
 				OPTSET(INP_RECVIF);
 				break;
 
 			case IP_ONESBCAST:
 				OPTSET(INP_ONESBCAST);
 				break;
 			case IP_DONTFRAG:
 				OPTSET(INP_DONTFRAG);
 				break;
 			case IP_BINDANY:
 				OPTSET(INP_BINDANY);
 				break;
 			case IP_RECVTOS:
 				OPTSET(INP_RECVTOS);
 				break;
 			case IP_BINDMULTI:
 				OPTSET2(INP_BINDMULTI, optval);
 				break;
 			case IP_RECVFLOWID:
 				OPTSET2(INP_RECVFLOWID, optval);
 				break;
 #ifdef	RSS
 			case IP_RSS_LISTEN_BUCKET:
 				if ((optval >= 0) &&
 				    (optval < rss_getnumbuckets())) {
 					inp->inp_rss_listen_bucket = optval;
 					OPTSET2(INP_RSS_BUCKET_SET, 1);
 				} else {
 					error = EINVAL;
 				}
 				break;
 			case IP_RECVRSSBUCKETID:
 				OPTSET2(INP_RECVRSSBUCKETID, optval);
 				break;
 #endif
 			}
 			break;
 #undef OPTSET
 #undef OPTSET2
 
 		/*
 		 * Multicast socket options are processed by the in_mcast
 		 * module.
 		 */
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_ADD_MEMBERSHIP:
 		case IP_DROP_MEMBERSHIP:
 		case IP_ADD_SOURCE_MEMBERSHIP:
 		case IP_DROP_SOURCE_MEMBERSHIP:
 		case IP_BLOCK_SOURCE:
 		case IP_UNBLOCK_SOURCE:
 		case IP_MSFILTER:
 		case MCAST_JOIN_GROUP:
 		case MCAST_LEAVE_GROUP:
 		case MCAST_JOIN_SOURCE_GROUP:
 		case MCAST_LEAVE_SOURCE_GROUP:
 		case MCAST_BLOCK_SOURCE:
 		case MCAST_UNBLOCK_SOURCE:
 			error = inp_setmoptions(inp, sopt);
 			break;
 
 		case IP_PORTRANGE:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 
 			INP_WLOCK(inp);
 			switch (optval) {
 			case IP_PORTRANGE_DEFAULT:
 				inp->inp_flags &= ~(INP_LOWPORT);
 				inp->inp_flags &= ~(INP_HIGHPORT);
 				break;
 
 			case IP_PORTRANGE_HIGH:
 				inp->inp_flags &= ~(INP_LOWPORT);
 				inp->inp_flags |= INP_HIGHPORT;
 				break;
 
 			case IP_PORTRANGE_LOW:
 				inp->inp_flags &= ~(INP_HIGHPORT);
 				inp->inp_flags |= INP_LOWPORT;
 				break;
 
 			default:
 				error = EINVAL;
 				break;
 			}
 			INP_WUNLOCK(inp);
 			break;
 
 #ifdef IPSEC
 		case IP_IPSEC_POLICY:
 		{
 			caddr_t req;
 			struct mbuf *m;
 
 			if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
 				break;
 			if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
 				break;
 			req = mtod(m, caddr_t);
 			error = ipsec_set_policy(inp, sopt->sopt_name, req,
 			    m->m_len, (sopt->sopt_td != NULL) ?
 			    sopt->sopt_td->td_ucred : NULL);
 			m_freem(m);
 			break;
 		}
 #endif /* IPSEC */
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case IP_OPTIONS:
 		case IP_RETOPTS:
 			if (inp->inp_options)
 				error = sooptcopyout(sopt,
 						     mtod(inp->inp_options,
 							  char *),
 						     inp->inp_options->m_len);
 			else
 				sopt->sopt_valsize = 0;
 			break;
 
 		case IP_TOS:
 		case IP_TTL:
 		case IP_MINTTL:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_RECVDSTADDR:
 		case IP_RECVTTL:
 		case IP_RECVIF:
 		case IP_PORTRANGE:
 		case IP_ONESBCAST:
 		case IP_DONTFRAG:
 		case IP_BINDANY:
 		case IP_RECVTOS:
 		case IP_BINDMULTI:
 		case IP_FLOWID:
 		case IP_FLOWTYPE:
 		case IP_RECVFLOWID:
 #ifdef	RSS
 		case IP_RSSBUCKETID:
 		case IP_RECVRSSBUCKETID:
 #endif
 			switch (sopt->sopt_name) {
 
 			case IP_TOS:
 				optval = inp->inp_ip_tos;
 				break;
 
 			case IP_TTL:
 				optval = inp->inp_ip_ttl;
 				break;
 
 			case IP_MINTTL:
 				optval = inp->inp_ip_minttl;
 				break;
 
 #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
 #define	OPTBIT2(bit)	(inp->inp_flags2 & bit ? 1 : 0)
 
 			case IP_RECVOPTS:
 				optval = OPTBIT(INP_RECVOPTS);
 				break;
 
 			case IP_RECVRETOPTS:
 				optval = OPTBIT(INP_RECVRETOPTS);
 				break;
 
 			case IP_RECVDSTADDR:
 				optval = OPTBIT(INP_RECVDSTADDR);
 				break;
 
 			case IP_RECVTTL:
 				optval = OPTBIT(INP_RECVTTL);
 				break;
 
 			case IP_RECVIF:
 				optval = OPTBIT(INP_RECVIF);
 				break;
 
 			case IP_PORTRANGE:
 				if (inp->inp_flags & INP_HIGHPORT)
 					optval = IP_PORTRANGE_HIGH;
 				else if (inp->inp_flags & INP_LOWPORT)
 					optval = IP_PORTRANGE_LOW;
 				else
 					optval = 0;
 				break;
 
 			case IP_ONESBCAST:
 				optval = OPTBIT(INP_ONESBCAST);
 				break;
 			case IP_DONTFRAG:
 				optval = OPTBIT(INP_DONTFRAG);
 				break;
 			case IP_BINDANY:
 				optval = OPTBIT(INP_BINDANY);
 				break;
 			case IP_RECVTOS:
 				optval = OPTBIT(INP_RECVTOS);
 				break;
 			case IP_FLOWID:
 				optval = inp->inp_flowid;
 				break;
 			case IP_FLOWTYPE:
 				optval = inp->inp_flowtype;
 				break;
 			case IP_RECVFLOWID:
 				optval = OPTBIT2(INP_RECVFLOWID);
 				break;
 #ifdef	RSS
 			case IP_RSSBUCKETID:
 				retval = rss_hash2bucket(inp->inp_flowid,
 				    inp->inp_flowtype,
 				    &rss_bucket);
 				if (retval == 0)
 					optval = rss_bucket;
 				else
 					error = EINVAL;
 				break;
 			case IP_RECVRSSBUCKETID:
 				optval = OPTBIT2(INP_RECVRSSBUCKETID);
 				break;
 #endif
 			case IP_BINDMULTI:
 				optval = OPTBIT2(INP_BINDMULTI);
 				break;
 			}
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		/*
 		 * Multicast socket options are processed by the in_mcast
 		 * module.
 		 */
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_MSFILTER:
 			error = inp_getmoptions(inp, sopt);
 			break;
 
 #ifdef IPSEC
 		case IP_IPSEC_POLICY:
 		{
 			struct mbuf *m = NULL;
 			caddr_t req = NULL;
 			size_t len = 0;
 
 			if (m != 0) {
 				req = mtod(m, caddr_t);
 				len = m->m_len;
 			}
 			error = ipsec_get_policy(sotoinpcb(so), req, len, &m);
 			if (error == 0)
 				error = soopt_mcopyout(sopt, m); /* XXX */
 			if (error == 0)
 				m_freem(m);
 			break;
 		}
 #endif /* IPSEC */
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 	return (error);
 }
 
 /*
  * Routine called from ip_output() to loop back a copy of an IP multicast
  * packet to the input queue of a specified interface.  Note that this
  * calls the output routine of the loopback "driver", but with an interface
  * pointer that might NOT be a loopback interface -- evil, but easier than
  * replicating that code here.
  */
 static void
 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst,
     int hlen)
 {
 	register struct ip *ip;
 	struct mbuf *copym;
 
 	/*
 	 * Make a deep copy of the packet because we're going to
 	 * modify the pack in order to generate checksums.
 	 */
 	copym = m_dup(m, M_NOWAIT);
 	if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen))
 		copym = m_pullup(copym, hlen);
 	if (copym != NULL) {
 		/* If needed, compute the checksum and mark it as valid. */
 		if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 			in_delayed_cksum(copym);
 			copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 			copym->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 			copym->m_pkthdr.csum_data = 0xffff;
 		}
 		/*
 		 * We don't bother to fragment if the IP length is greater
 		 * than the interface's MTU.  Can this possibly matter?
 		 */
 		ip = mtod(copym, struct ip *);
 		ip->ip_sum = 0;
 		ip->ip_sum = in_cksum(copym, hlen);
 #if 1 /* XXX */
 		if (dst->sin_family != AF_INET) {
 			printf("ip_mloopback: bad address family %d\n",
 						dst->sin_family);
 			dst->sin_family = AF_INET;
 		}
 #endif
 		if_simloop(ifp, copym, dst->sin_family, 0);
 	}
 }
Index: head/sys/netinet/tcp_timer.c
===================================================================
--- head/sys/netinet/tcp_timer.c	(revision 277330)
+++ head/sys/netinet/tcp_timer.c	(revision 277331)
@@ -1,945 +1,946 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_tcpdebug.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <net/if.h>
 #include <net/route.h>
+#include <net/rss_config.h>
 #include <net/vnet.h>
 #include <net/netisr.h>
 
 #include <netinet/cc.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_rss.h>
 #include <netinet/in_systm.h>
 #ifdef INET6
 #include <netinet6/in6_pcb.h>
 #endif
 #include <netinet/ip_var.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
 #include <netinet/tcpip.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 
 int	tcp_keepinit;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
 
 int	tcp_keepidle;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
 
 int	tcp_keepintvl;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
 
 int	tcp_delacktime;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
     "Time before a delayed ACK is sent");
 
 int	tcp_msl;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
 
 int	tcp_rexmit_min;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
     "Minimum Retransmission Timeout");
 
 int	tcp_rexmit_slop;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
     "Retransmission Timer Slop");
 
 static int	always_keepalive = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
     &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
 
 int    tcp_fast_finwait2_recycle = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 
     &tcp_fast_finwait2_recycle, 0,
     "Recycle closed FIN_WAIT_2 connections faster");
 
 int    tcp_finwait2_timeout;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
 
 int	tcp_keepcnt = TCPTV_KEEPCNT;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
     "Number of keepalive probes to send");
 
 	/* max idle probes */
 int	tcp_maxpersistidle;
 
 static int	tcp_rexmit_drop_options = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
     &tcp_rexmit_drop_options, 0,
     "Drop TCP options from 3rd and later retransmitted SYN");
 
 static VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
 #define	V_tcp_pmtud_blackhole_detect	VNET(tcp_pmtud_blackhole_detect)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
     CTLFLAG_RW|CTLFLAG_VNET,
     &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
     "Path MTU Discovery Black Hole Detection Enabled");
 
 static VNET_DEFINE(int, tcp_pmtud_blackhole_activated);
 #define	V_tcp_pmtud_blackhole_activated \
     VNET(tcp_pmtud_blackhole_activated)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated,
     CTLFLAG_RD|CTLFLAG_VNET,
     &VNET_NAME(tcp_pmtud_blackhole_activated), 0,
     "Path MTU Discovery Black Hole Detection, Activation Count");
 
 static VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss);
 #define	V_tcp_pmtud_blackhole_activated_min_mss \
     VNET(tcp_pmtud_blackhole_activated_min_mss)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss,
     CTLFLAG_RD|CTLFLAG_VNET,
     &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0,
     "Path MTU Discovery Black Hole Detection, Activation Count at min MSS");
 
 static VNET_DEFINE(int, tcp_pmtud_blackhole_failed);
 #define	V_tcp_pmtud_blackhole_failed	VNET(tcp_pmtud_blackhole_failed)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed,
     CTLFLAG_RD|CTLFLAG_VNET,
     &VNET_NAME(tcp_pmtud_blackhole_failed), 0,
     "Path MTU Discovery Black Hole Detection, Failure Count");
 
 #ifdef INET
 static VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
 #define	V_tcp_pmtud_blackhole_mss	VNET(tcp_pmtud_blackhole_mss)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
     CTLFLAG_RW|CTLFLAG_VNET,
     &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
     "Path MTU Discovery Black Hole Detection lowered MSS");
 #endif
 
 #ifdef INET6
 static VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
 #define	V_tcp_v6pmtud_blackhole_mss	VNET(tcp_v6pmtud_blackhole_mss)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
     CTLFLAG_RW|CTLFLAG_VNET,
     &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
     "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
 #endif
 
 #ifdef	RSS
 static int	per_cpu_timers = 1;
 #else
 static int	per_cpu_timers = 0;
 #endif
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
     &per_cpu_timers , 0, "run tcp timers on all cpus");
 
 #if 0
 #define	INP_CPU(inp)	(per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
 		((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
 #endif
 
 /*
  * Map the given inp to a CPU id.
  *
  * This queries RSS if it's compiled in, else it defaults to the current
  * CPU ID.
  */
 static inline int
 inp_to_cpuid(struct inpcb *inp)
 {
 	u_int cpuid;
 
 #ifdef	RSS
 	if (per_cpu_timers) {
 		cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
 		if (cpuid == NETISR_CPUID_NONE)
 			return (curcpu);	/* XXX */
 		else
 			return (cpuid);
 	}
 #else
 	/* Legacy, pre-RSS behaviour */
 	if (per_cpu_timers) {
 		/*
 		 * We don't have a flowid -> cpuid mapping, so cheat and
 		 * just map unknown cpuids to curcpu.  Not the best, but
 		 * apparently better than defaulting to swi 0.
 		 */
 		cpuid = inp->inp_flowid % (mp_maxid + 1);
 		if (! CPU_ABSENT(cpuid))
 			return (cpuid);
 		return (curcpu);
 	}
 #endif
 	/* Default for RSS and non-RSS - cpuid 0 */
 	else {
 		return (0);
 	}
 }
 
 /*
  * Tcp protocol timeout routine called every 500 ms.
  * Updates timestamps used for TCP
  * causes finite state machine actions if timers expire.
  */
 void
 tcp_slowtimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		(void) tcp_tw_2msl_scan(0);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 int	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
     { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
 
 int	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
     { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
 
 static int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
 
 static int tcp_timer_race;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
     0, "Count of t_inpcb races on tcp_discardcb");
 
 /*
  * TCP timer processing.
  */
 
 void
 tcp_timer_delack(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct inpcb *inp;
 	CURVNET_SET(tp->t_vnet);
 
 	inp = tp->t_inpcb;
 	/*
 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
 	 * tear-down mean we need it as a work-around for races between
 	 * timers and tcp_discardcb().
 	 *
 	 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
 	 */
 	if (inp == NULL) {
 		tcp_timer_race++;
 		CURVNET_RESTORE();
 		return;
 	}
 	INP_WLOCK(inp);
 	if (callout_pending(&tp->t_timers->tt_delack) ||
 	    !callout_active(&tp->t_timers->tt_delack)) {
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_delack);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 		return;
 	}
 
 	tp->t_flags |= TF_ACKNOW;
 	TCPSTAT_INC(tcps_delack);
 	(void) tcp_output(tp);
 	INP_WUNLOCK(inp);
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_2msl(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct inpcb *inp;
 	CURVNET_SET(tp->t_vnet);
 #ifdef TCPDEBUG
 	int ostate;
 
 	ostate = tp->t_state;
 #endif
 	/*
 	 * XXXRW: Does this actually happen?
 	 */
 	INP_INFO_WLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
 	/*
 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
 	 * tear-down mean we need it as a work-around for races between
 	 * timers and tcp_discardcb().
 	 *
 	 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
 	 */
 	if (inp == NULL) {
 		tcp_timer_race++;
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	INP_WLOCK(inp);
 	tcp_free_sackholes(tp);
 	if (callout_pending(&tp->t_timers->tt_2msl) ||
 	    !callout_active(&tp->t_timers->tt_2msl)) {
 		INP_WUNLOCK(tp->t_inpcb);
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_2msl);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	/*
 	 * 2 MSL timeout in shutdown went off.  If we're closed but
 	 * still waiting for peer to close and connection has been idle
 	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
 	 * control block.  Otherwise, check again in a bit.
 	 *
 	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 
 	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 
 	 * Ignore fact that there were recent incoming segments.
 	 */
 	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
 	    tp->t_inpcb && tp->t_inpcb->inp_socket && 
 	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
 		TCPSTAT_INC(tcps_finwait2_drops);
 		tp = tcp_close(tp);             
 	} else {
 		if (tp->t_state != TCPS_TIME_WAIT &&
 		   ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
 		       callout_reset_on(&tp->t_timers->tt_2msl,
 			   TP_KEEPINTVL(tp), tcp_timer_2msl, tp,
 			   inp_to_cpuid(inp));
 	       else
 		       tp = tcp_close(tp);
        }
 
 #ifdef TCPDEBUG
 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
 	if (tp != NULL)
 		INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_keep(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct tcptemp *t_template;
 	struct inpcb *inp;
 	CURVNET_SET(tp->t_vnet);
 #ifdef TCPDEBUG
 	int ostate;
 
 	ostate = tp->t_state;
 #endif
 	INP_INFO_WLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
 	/*
 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
 	 * tear-down mean we need it as a work-around for races between
 	 * timers and tcp_discardcb().
 	 *
 	 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
 	 */
 	if (inp == NULL) {
 		tcp_timer_race++;
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	INP_WLOCK(inp);
 	if (callout_pending(&tp->t_timers->tt_keep) ||
 	    !callout_active(&tp->t_timers->tt_keep)) {
 		INP_WUNLOCK(inp);
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_keep);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	/*
 	 * Keep-alive timer went off; send something
 	 * or drop connection if idle for too long.
 	 */
 	TCPSTAT_INC(tcps_keeptimeo);
 	if (tp->t_state < TCPS_ESTABLISHED)
 		goto dropit;
 	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
 	    tp->t_state <= TCPS_CLOSING) {
 		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
 			goto dropit;
 		/*
 		 * Send a packet designed to force a response
 		 * if the peer is up and reachable:
 		 * either an ACK if the connection is still alive,
 		 * or an RST if the peer has closed the connection
 		 * due to timeout or reboot.
 		 * Using sequence number tp->snd_una-1
 		 * causes the transmitted zero-length segment
 		 * to lie outside the receive window;
 		 * by the protocol spec, this requires the
 		 * correspondent TCP to respond.
 		 */
 		TCPSTAT_INC(tcps_keepprobe);
 		t_template = tcpip_maketemplate(inp);
 		if (t_template) {
 			tcp_respond(tp, t_template->tt_ipgen,
 				    &t_template->tt_t, (struct mbuf *)NULL,
 				    tp->rcv_nxt, tp->snd_una - 1, 0);
 			free(t_template, M_TEMP);
 		}
 		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
 		    tcp_timer_keep, tp, inp_to_cpuid(inp));
 	} else
 		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
 		    tcp_timer_keep, tp, inp_to_cpuid(inp));
 
 #ifdef TCPDEBUG
 	if (inp->inp_socket->so_options & SO_DEBUG)
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 	return;
 
 dropit:
 	TCPSTAT_INC(tcps_keepdrops);
 	tp = tcp_drop(tp, ETIMEDOUT);
 
 #ifdef TCPDEBUG
 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
 	if (tp != NULL)
 		INP_WUNLOCK(tp->t_inpcb);
 	INP_INFO_WUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_persist(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct inpcb *inp;
 	CURVNET_SET(tp->t_vnet);
 #ifdef TCPDEBUG
 	int ostate;
 
 	ostate = tp->t_state;
 #endif
 	INP_INFO_WLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
 	/*
 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
 	 * tear-down mean we need it as a work-around for races between
 	 * timers and tcp_discardcb().
 	 *
 	 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
 	 */
 	if (inp == NULL) {
 		tcp_timer_race++;
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	INP_WLOCK(inp);
 	if (callout_pending(&tp->t_timers->tt_persist) ||
 	    !callout_active(&tp->t_timers->tt_persist)) {
 		INP_WUNLOCK(inp);
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_persist);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	/*
 	 * Persistance timer into zero window.
 	 * Force a byte to be output, if possible.
 	 */
 	TCPSTAT_INC(tcps_persisttimeo);
 	/*
 	 * Hack: if the peer is dead/unreachable, we do not
 	 * time out if the window is closed.  After a full
 	 * backoff, drop the connection if the idle time
 	 * (no responses to probes) reaches the maximum
 	 * backoff that we would use if retransmitting.
 	 */
 	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
 	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
 	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
 		TCPSTAT_INC(tcps_persistdrop);
 		tp = tcp_drop(tp, ETIMEDOUT);
 		goto out;
 	}
 	/*
 	 * If the user has closed the socket then drop a persisting
 	 * connection after a much reduced timeout.
 	 */
 	if (tp->t_state > TCPS_CLOSE_WAIT &&
 	    (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
 		TCPSTAT_INC(tcps_persistdrop);
 		tp = tcp_drop(tp, ETIMEDOUT);
 		goto out;
 	}
 	tcp_setpersist(tp);
 	tp->t_flags |= TF_FORCEDATA;
 	(void) tcp_output(tp);
 	tp->t_flags &= ~TF_FORCEDATA;
 
 out:
 #ifdef TCPDEBUG
 	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
 		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
 #endif
 	if (tp != NULL)
 		INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_rexmt(void * xtp)
 {
 	struct tcpcb *tp = xtp;
 	CURVNET_SET(tp->t_vnet);
 	int rexmt;
 	int headlocked;
 	struct inpcb *inp;
 #ifdef TCPDEBUG
 	int ostate;
 
 	ostate = tp->t_state;
 #endif
 
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
 	/*
 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
 	 * tear-down mean we need it as a work-around for races between
 	 * timers and tcp_discardcb().
 	 *
 	 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
 	 */
 	if (inp == NULL) {
 		tcp_timer_race++;
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	INP_WLOCK(inp);
 	if (callout_pending(&tp->t_timers->tt_rexmt) ||
 	    !callout_active(&tp->t_timers->tt_rexmt)) {
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_rexmt);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	tcp_free_sackholes(tp);
 	/*
 	 * Retransmission timer went off.  Message has not
 	 * been acked within retransmit interval.  Back off
 	 * to a longer retransmit interval and retransmit one segment.
 	 */
 	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
 		tp->t_rxtshift = TCP_MAXRXTSHIFT;
 		TCPSTAT_INC(tcps_timeoutdrop);
 		in_pcbref(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		INP_WUNLOCK(inp);
 		INP_INFO_WLOCK(&V_tcbinfo);
 		INP_WLOCK(inp);
 		if (in_pcbrele_wlocked(inp)) {
 			INP_INFO_WUNLOCK(&V_tcbinfo);
 			CURVNET_RESTORE();
 			return;
 		}
 		if (inp->inp_flags & INP_DROPPED) {
 			INP_WUNLOCK(inp);
 			INP_INFO_WUNLOCK(&V_tcbinfo);
 			CURVNET_RESTORE();
 			return;
 		}
 
 		tp = tcp_drop(tp, tp->t_softerror ?
 			      tp->t_softerror : ETIMEDOUT);
 		headlocked = 1;
 		goto out;
 	}
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	headlocked = 0;
 	if (tp->t_state == TCPS_SYN_SENT) {
 		/*
 		 * If the SYN was retransmitted, indicate CWND to be
 		 * limited to 1 segment in cc_conn_init().
 		 */
 		tp->snd_cwnd = 1;
 	} else if (tp->t_rxtshift == 1) {
 		/*
 		 * first retransmit; record ssthresh and cwnd so they can
 		 * be recovered if this turns out to be a "bad" retransmit.
 		 * A retransmit is considered "bad" if an ACK for this
 		 * segment is received within RTT/2 interval; the assumption
 		 * here is that the ACK was already in flight.  See
 		 * "On Estimating End-to-End Network Path Properties" by
 		 * Allman and Paxson for more details.
 		 */
 		tp->snd_cwnd_prev = tp->snd_cwnd;
 		tp->snd_ssthresh_prev = tp->snd_ssthresh;
 		tp->snd_recover_prev = tp->snd_recover;
 		if (IN_FASTRECOVERY(tp->t_flags))
 			tp->t_flags |= TF_WASFRECOVERY;
 		else
 			tp->t_flags &= ~TF_WASFRECOVERY;
 		if (IN_CONGRECOVERY(tp->t_flags))
 			tp->t_flags |= TF_WASCRECOVERY;
 		else
 			tp->t_flags &= ~TF_WASCRECOVERY;
 		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
 		tp->t_flags |= TF_PREVVALID;
 	} else
 		tp->t_flags &= ~TF_PREVVALID;
 	TCPSTAT_INC(tcps_rexmttimeo);
 	if (tp->t_state == TCPS_SYN_SENT)
 		rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
 	else
 		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
 	TCPT_RANGESET(tp->t_rxtcur, rexmt,
 		      tp->t_rttmin, TCPTV_REXMTMAX);
 
 	/*
 	 * We enter the path for PLMTUD if connection is established or, if
 	 * connection is FIN_WAIT_1 status, reason for the last is that if
 	 * amount of data we send is very small, we could send it in couple of
 	 * packets and process straight to FIN. In that case we won't catch
 	 * ESTABLISHED state.
 	 */
 	if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
 	    || (tp->t_state == TCPS_FIN_WAIT_1))) {
 		int optlen;
 #ifdef INET6
 		int isipv6;
 #endif
 
 		if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
 		    (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
 		    (tp->t_rxtshift <= 2)) {
 			/*
 			 * Enter Path MTU Black-hole Detection mechanism:
 			 * - Disable Path MTU Discovery (IP "DF" bit).
 			 * - Reduce MTU to lower value than what we
 			 *   negotiated with peer.
 			 */
 			/* Record that we may have found a black hole. */
 			tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
 
 			/* Keep track of previous MSS. */
 			optlen = tp->t_maxopd - tp->t_maxseg;
 			tp->t_pmtud_saved_maxopd = tp->t_maxopd;
 
 			/* 
 			 * Reduce the MSS to blackhole value or to the default
 			 * in an attempt to retransmit.
 			 */
 #ifdef INET6
 			isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
 			if (isipv6 &&
 			    tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) {
 				/* Use the sysctl tuneable blackhole MSS. */
 				tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss;
 				V_tcp_pmtud_blackhole_activated++;
 			} else if (isipv6) {
 				/* Use the default MSS. */
 				tp->t_maxopd = V_tcp_v6mssdflt;
 				/*
 				 * Disable Path MTU Discovery when we switch to
 				 * minmss.
 				 */
 				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 				V_tcp_pmtud_blackhole_activated_min_mss++;
 			}
 #endif
 #if defined(INET6) && defined(INET)
 			else
 #endif
 #ifdef INET
 			if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) {
 				/* Use the sysctl tuneable blackhole MSS. */
 				tp->t_maxopd = V_tcp_pmtud_blackhole_mss;
 				V_tcp_pmtud_blackhole_activated++;
 			} else {
 				/* Use the default MSS. */
 				tp->t_maxopd = V_tcp_mssdflt;
 				/*
 				 * Disable Path MTU Discovery when we switch to
 				 * minmss.
 				 */
 				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 				V_tcp_pmtud_blackhole_activated_min_mss++;
 			}
 #endif
 			tp->t_maxseg = tp->t_maxopd - optlen;
 			/*
 			 * Reset the slow-start flight size
 			 * as it may depend on the new MSS.
 			 */
 			if (CC_ALGO(tp)->conn_init != NULL)
 				CC_ALGO(tp)->conn_init(tp->ccv);
 		} else {
 			/*
 			 * If further retransmissions are still unsuccessful
 			 * with a lowered MTU, maybe this isn't a blackhole and
 			 * we restore the previous MSS and blackhole detection
 			 * flags.
 			 */
 			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
 			    (tp->t_rxtshift > 4)) {
 				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
 				optlen = tp->t_maxopd - tp->t_maxseg;
 				tp->t_maxopd = tp->t_pmtud_saved_maxopd;
 				tp->t_maxseg = tp->t_maxopd - optlen;
 				V_tcp_pmtud_blackhole_failed++;
 				/*
 				 * Reset the slow-start flight size as it
 				 * may depend on the new MSS.
 				 */
 				if (CC_ALGO(tp)->conn_init != NULL)
 					CC_ALGO(tp)->conn_init(tp->ccv);
 			}
 		}
 	}
 
 	/*
 	 * Disable RFC1323 and SACK if we haven't got any response to
 	 * our third SYN to work-around some broken terminal servers
 	 * (most of which have hopefully been retired) that have bad VJ
 	 * header compression code which trashes TCP segments containing
 	 * unknown-to-them TCP options.
 	 */
 	if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
 	    (tp->t_rxtshift == 3))
 		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
 	/*
 	 * If we backed off this far, our srtt estimate is probably bogus.
 	 * Clobber it so we'll take the next rtt measurement as our srtt;
 	 * move the current srtt into rttvar to keep the current
 	 * retransmit times until then.
 	 */
 	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
 #ifdef INET6
 		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
 			in6_losing(tp->t_inpcb);
 #endif
 		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
 		tp->t_srtt = 0;
 	}
 	tp->snd_nxt = tp->snd_una;
 	tp->snd_recover = tp->snd_max;
 	/*
 	 * Force a segment to be sent.
 	 */
 	tp->t_flags |= TF_ACKNOW;
 	/*
 	 * If timing a segment in this window, stop the timer.
 	 */
 	tp->t_rtttime = 0;
 
 	cc_cong_signal(tp, NULL, CC_RTO);
 
 	(void) tcp_output(tp);
 
 out:
 #ifdef TCPDEBUG
 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
 	if (tp != NULL)
 		INP_WUNLOCK(inp);
 	if (headlocked)
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
 {
 	struct callout *t_callout;
 	void *f_callout;
 	struct inpcb *inp = tp->t_inpcb;
 	int cpu = inp_to_cpuid(inp);
 
 #ifdef TCP_OFFLOAD
 	if (tp->t_flags & TF_TOE)
 		return;
 #endif
 
 	switch (timer_type) {
 		case TT_DELACK:
 			t_callout = &tp->t_timers->tt_delack;
 			f_callout = tcp_timer_delack;
 			break;
 		case TT_REXMT:
 			t_callout = &tp->t_timers->tt_rexmt;
 			f_callout = tcp_timer_rexmt;
 			break;
 		case TT_PERSIST:
 			t_callout = &tp->t_timers->tt_persist;
 			f_callout = tcp_timer_persist;
 			break;
 		case TT_KEEP:
 			t_callout = &tp->t_timers->tt_keep;
 			f_callout = tcp_timer_keep;
 			break;
 		case TT_2MSL:
 			t_callout = &tp->t_timers->tt_2msl;
 			f_callout = tcp_timer_2msl;
 			break;
 		default:
 			panic("bad timer_type");
 		}
 	if (delta == 0) {
 		callout_stop(t_callout);
 	} else {
 		callout_reset_on(t_callout, delta, f_callout, tp, cpu);
 	}
 }
 
 int
 tcp_timer_active(struct tcpcb *tp, int timer_type)
 {
 	struct callout *t_callout;
 
 	switch (timer_type) {
 		case TT_DELACK:
 			t_callout = &tp->t_timers->tt_delack;
 			break;
 		case TT_REXMT:
 			t_callout = &tp->t_timers->tt_rexmt;
 			break;
 		case TT_PERSIST:
 			t_callout = &tp->t_timers->tt_persist;
 			break;
 		case TT_KEEP:
 			t_callout = &tp->t_timers->tt_keep;
 			break;
 		case TT_2MSL:
 			t_callout = &tp->t_timers->tt_2msl;
 			break;
 		default:
 			panic("bad timer_type");
 		}
 	return callout_active(t_callout);
 }
 
 #define	ticks_to_msecs(t)	(1000*(t) / hz)
 
 void
 tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
     struct xtcp_timer *xtimer)
 {
 	sbintime_t now;
 
 	bzero(xtimer, sizeof(*xtimer));
 	if (timer == NULL)
 		return;
 	now = getsbinuptime();
 	if (callout_active(&timer->tt_delack))
 		xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
 	if (callout_active(&timer->tt_rexmt))
 		xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
 	if (callout_active(&timer->tt_persist))
 		xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
 	if (callout_active(&timer->tt_keep))
 		xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
 	if (callout_active(&timer->tt_2msl))
 		xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
 	xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
 }
Index: head/sys/netinet/udp_usrreq.c
===================================================================
--- head/sys/netinet/udp_usrreq.c	(revision 277330)
+++ head/sys/netinet/udp_usrreq.c	(revision 277331)
@@ -1,1898 +1,1899 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.
  * Copyright (c) 2008 Robert N. M. Watson
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * Copyright (c) 2014 Kevin Lo
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipfw.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/eventhandler.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
+#include <net/rss_config.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 #include <netinet/ip_icmp.h>
 #include <netinet/icmp_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #ifdef INET6
 #include <netinet6/ip6_var.h>
 #endif
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/udplite.h>
 #include <netinet/in_rss.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/esp.h>
 #endif
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 /*
  * UDP and UDP-Lite protocols implementation.
  * Per RFC 768, August, 1980.
  * Per RFC 3828, July, 2004.
  */
 
 /*
  * BSD 4.2 defaulted the udp checksum to be off.  Turning off udp checksums
  * removes the only data integrity mechanism for packets and malformed
  * packets that would otherwise be discarded due to bad checksums, and may
  * cause problems (especially for NFS data blocks).
  */
 VNET_DEFINE(int, udp_cksum) = 1;
 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(udp_cksum), 0, "compute udp checksum");
 
 int	udp_log_in_vain = 0;
 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
     &udp_log_in_vain, 0, "Log all incoming UDP packets");
 
 VNET_DEFINE(int, udp_blackhole) = 0;
 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(udp_blackhole), 0,
     "Do not send port unreachables for refused connects");
 
 u_long	udp_sendspace = 9216;		/* really max datagram size */
 					/* 40 1K datagrams */
 SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
     &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
 
 u_long	udp_recvspace = 40 * (1024 +
 #ifdef INET6
 				      sizeof(struct sockaddr_in6)
 #else
 				      sizeof(struct sockaddr_in)
 #endif
 				      );
 
 SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
     &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
 
 VNET_DEFINE(struct inpcbhead, udb);		/* from udp_var.h */
 VNET_DEFINE(struct inpcbinfo, udbinfo);
 VNET_DEFINE(struct inpcbhead, ulitecb);
 VNET_DEFINE(struct inpcbinfo, ulitecbinfo);
 static VNET_DEFINE(uma_zone_t, udpcb_zone);
 #define	V_udpcb_zone			VNET(udpcb_zone)
 
 #ifndef UDBHASHSIZE
 #define	UDBHASHSIZE	128
 #endif
 
 VNET_PCPUSTAT_DEFINE(struct udpstat, udpstat);		/* from udp_var.h */
 VNET_PCPUSTAT_SYSINIT(udpstat);
 SYSCTL_VNET_PCPUSTAT(_net_inet_udp, UDPCTL_STATS, stats, struct udpstat,
     udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(udpstat);
 #endif /* VIMAGE */
 #ifdef INET
 static void	udp_detach(struct socket *so);
 static int	udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
 		    struct mbuf *, struct thread *);
 #endif
 
 #ifdef IPSEC
 #ifdef IPSEC_NAT_T
 #define	UF_ESPINUDP_ALL	(UF_ESPINUDP_NON_IKE|UF_ESPINUDP)
 #ifdef INET
 static struct mbuf *udp4_espdecap(struct inpcb *, struct mbuf *, int);
 #endif
 #endif /* IPSEC_NAT_T */
 #endif /* IPSEC */
 
 static void
 udp_zone_change(void *tag)
 {
 
 	uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets);
 	uma_zone_set_max(V_udpcb_zone, maxsockets);
 }
 
 static int
 udp_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp;
 
 	inp = mem;
 	INP_LOCK_INIT(inp, "inp", "udpinp");
 	return (0);
 }
 
 static int
 udplite_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp;
 
 	inp = mem;
 	INP_LOCK_INIT(inp, "inp", "udpliteinp");
 	return (0);
 }
 
 void
 udp_init(void)
 {
 
 	/*
 	 * For now default to 2-tuple UDP hashing - until the fragment
 	 * reassembly code can also update the flowid.
 	 *
 	 * Once we can calculate the flowid that way and re-establish
 	 * a 4-tuple, flip this to 4-tuple.
 	 */
 	in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE,
 	    "udp_inpcb", udp_inpcb_init, NULL, 0,
 	    IPI_HASHFIELDS_2TUPLE);
 	V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	uma_zone_set_max(V_udpcb_zone, maxsockets);
 	uma_zone_set_warning(V_udpcb_zone, "kern.ipc.maxsockets limit reached");
 	EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
 	    EVENTHANDLER_PRI_ANY);
 }
 
 void
 udplite_init(void)
 {
 
 	in_pcbinfo_init(&V_ulitecbinfo, "udplite", &V_ulitecb, UDBHASHSIZE,
 	    UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init, NULL,
 	    0, IPI_HASHFIELDS_2TUPLE);
 }
 
 /*
  * Kernel module interface for updating udpstat.  The argument is an index
  * into udpstat treated as an array of u_long.  While this encodes the
  * general layout of udpstat into the caller, it doesn't encode its location,
  * so that future changes to add, for example, per-CPU stats support won't
  * cause binary compatibility problems for kernel modules.
  */
 void
 kmod_udpstat_inc(int statnum)
 {
 
 	counter_u64_add(VNET(udpstat)[statnum], 1);
 }
 
 int
 udp_newudpcb(struct inpcb *inp)
 {
 	struct udpcb *up;
 
 	up = uma_zalloc(V_udpcb_zone, M_NOWAIT | M_ZERO);
 	if (up == NULL)
 		return (ENOBUFS);
 	inp->inp_ppcb = up;
 	return (0);
 }
 
 void
 udp_discardcb(struct udpcb *up)
 {
 
 	uma_zfree(V_udpcb_zone, up);
 }
 
 #ifdef VIMAGE
 void
 udp_destroy(void)
 {
 
 	in_pcbinfo_destroy(&V_udbinfo);
 	uma_zdestroy(V_udpcb_zone);
 }
 
 void
 udplite_destroy(void)
 {
 
 	in_pcbinfo_destroy(&V_ulitecbinfo);
 }
 #endif
 
 #ifdef INET
 /*
  * Subroutine of udp_input(), which appends the provided mbuf chain to the
  * passed pcb/socket.  The caller must provide a sockaddr_in via udp_in that
  * contains the source address.  If the socket ends up being an IPv6 socket,
  * udp_append() will convert to a sockaddr_in6 before passing the address
  * into the socket code.
  */
 static void
 udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
     struct sockaddr_in *udp_in)
 {
 	struct sockaddr *append_sa;
 	struct socket *so;
 	struct mbuf *opts = 0;
 #ifdef INET6
 	struct sockaddr_in6 udp_in6;
 #endif
 	struct udpcb *up;
 
 	INP_LOCK_ASSERT(inp);
 
 	/*
 	 * Engage the tunneling protocol.
 	 */
 	up = intoudpcb(inp);
 	if (up->u_tun_func != NULL) {
 		(*up->u_tun_func)(n, off, inp, (struct sockaddr *)udp_in,
 		    up->u_tun_ctx);
 		return;
 	}
 
 	off += sizeof(struct udphdr);
 
 #ifdef IPSEC
 	/* Check AH/ESP integrity. */
 	if (ipsec4_in_reject(n, inp)) {
 		m_freem(n);
 		return;
 	}
 #ifdef IPSEC_NAT_T
 	up = intoudpcb(inp);
 	KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
 	if (up->u_flags & UF_ESPINUDP_ALL) {	/* IPSec UDP encaps. */
 		n = udp4_espdecap(inp, n, off);
 		if (n == NULL)				/* Consumed. */
 			return;
 	}
 #endif /* IPSEC_NAT_T */
 #endif /* IPSEC */
 #ifdef MAC
 	if (mac_inpcb_check_deliver(inp, n) != 0) {
 		m_freem(n);
 		return;
 	}
 #endif /* MAC */
 	if (inp->inp_flags & INP_CONTROLOPTS ||
 	    inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
 #ifdef INET6
 		if (inp->inp_vflag & INP_IPV6)
 			(void)ip6_savecontrol_v4(inp, n, &opts, NULL);
 		else
 #endif /* INET6 */
 			ip_savecontrol(inp, &opts, ip, n);
 	}
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6) {
 		bzero(&udp_in6, sizeof(udp_in6));
 		udp_in6.sin6_len = sizeof(udp_in6);
 		udp_in6.sin6_family = AF_INET6;
 		in6_sin_2_v4mapsin6(udp_in, &udp_in6);
 		append_sa = (struct sockaddr *)&udp_in6;
 	} else
 #endif /* INET6 */
 		append_sa = (struct sockaddr *)udp_in;
 	m_adj(n, off);
 
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		m_freem(n);
 		if (opts)
 			m_freem(opts);
 		UDPSTAT_INC(udps_fullsock);
 	} else
 		sorwakeup_locked(so);
 }
 
 int
 udp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct ip *ip;
 	struct udphdr *uh;
 	struct ifnet *ifp;
 	struct inpcb *inp;
 	uint16_t len, ip_len;
 	struct inpcbinfo *pcbinfo;
 	struct ip save_ip;
 	struct sockaddr_in udp_in;
 	struct mbuf *m;
 	struct m_tag *fwd_tag;
 	int cscov_partial, iphlen;
 
 	m = *mp;
 	iphlen = *offp;
 	ifp = m->m_pkthdr.rcvif;
 	*mp = NULL;
 	UDPSTAT_INC(udps_ipackets);
 
 	/*
 	 * Strip IP options, if any; should skip this, make available to
 	 * user, and use on returned packets, but we don't yet have a way to
 	 * check the checksum with options still present.
 	 */
 	if (iphlen > sizeof (struct ip)) {
 		ip_stripoptions(m);
 		iphlen = sizeof(struct ip);
 	}
 
 	/*
 	 * Get IP and UDP header together in first mbuf.
 	 */
 	ip = mtod(m, struct ip *);
 	if (m->m_len < iphlen + sizeof(struct udphdr)) {
 		if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == NULL) {
 			UDPSTAT_INC(udps_hdrops);
 			return (IPPROTO_DONE);
 		}
 		ip = mtod(m, struct ip *);
 	}
 	uh = (struct udphdr *)((caddr_t)ip + iphlen);
 	cscov_partial = (proto == IPPROTO_UDPLITE) ? 1 : 0;
 
 	/*
 	 * Destination port of 0 is illegal, based on RFC768.
 	 */
 	if (uh->uh_dport == 0)
 		goto badunlocked;
 
 	/*
 	 * Construct sockaddr format source address.  Stuff source address
 	 * and datagram in user buffer.
 	 */
 	bzero(&udp_in, sizeof(udp_in));
 	udp_in.sin_len = sizeof(udp_in);
 	udp_in.sin_family = AF_INET;
 	udp_in.sin_port = uh->uh_sport;
 	udp_in.sin_addr = ip->ip_src;
 
 	/*
 	 * Make mbuf data length reflect UDP length.  If not enough data to
 	 * reflect UDP length, drop.
 	 */
 	len = ntohs((u_short)uh->uh_ulen);
 	ip_len = ntohs(ip->ip_len) - iphlen;
 	if (proto == IPPROTO_UDPLITE && (len == 0 || len == ip_len)) {
 		/* Zero means checksum over the complete packet. */
 		if (len == 0)
 			len = ip_len;
 		cscov_partial = 0;
 	}
 	if (ip_len != len) {
 		if (len > ip_len || len < sizeof(struct udphdr)) {
 			UDPSTAT_INC(udps_badlen);
 			goto badunlocked;
 		}
 		if (proto == IPPROTO_UDP)
 			m_adj(m, len - ip_len);
 	}
 
 	/*
 	 * Save a copy of the IP header in case we want restore it for
 	 * sending an ICMP error message in response.
 	 */
 	if (!V_udp_blackhole)
 		save_ip = *ip;
 	else
 		memset(&save_ip, 0, sizeof(save_ip));
 
 	/*
 	 * Checksum extended UDP header and data.
 	 */
 	if (uh->uh_sum) {
 		u_short uh_sum;
 
 		if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID) &&
 		    !cscov_partial) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 				uh_sum = m->m_pkthdr.csum_data;
 			else
 				uh_sum = in_pseudo(ip->ip_src.s_addr,
 				    ip->ip_dst.s_addr, htonl((u_short)len +
 				    m->m_pkthdr.csum_data + proto));
 			uh_sum ^= 0xffff;
 		} else {
 			char b[9];
 
 			bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
 			bzero(((struct ipovly *)ip)->ih_x1, 9);
 			((struct ipovly *)ip)->ih_len = (proto == IPPROTO_UDP) ?
 			    uh->uh_ulen : htons(ip_len);
 			uh_sum = in_cksum(m, len + sizeof (struct ip));
 			bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
 		}
 		if (uh_sum) {
 			UDPSTAT_INC(udps_badsum);
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	} else {
 		if (proto == IPPROTO_UDP) {
 			UDPSTAT_INC(udps_nosum);
 		} else {
 			/* UDPLite requires a checksum */
 			/* XXX: What is the right UDPLite MIB counter here? */
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	}
 
 	pcbinfo = get_inpcbinfo(proto);
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 	    in_broadcast(ip->ip_dst, ifp)) {
 		struct inpcb *last;
 		struct inpcbhead *pcblist;
 		struct ip_moptions *imo;
 
 		INP_INFO_RLOCK(pcbinfo);
 		pcblist = get_pcblist(proto);
 		last = NULL;
 		LIST_FOREACH(inp, pcblist, inp_list) {
 			if (inp->inp_lport != uh->uh_dport)
 				continue;
 #ifdef INET6
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_laddr.s_addr != INADDR_ANY &&
 			    inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
 				continue;
 			if (inp->inp_faddr.s_addr != INADDR_ANY &&
 			    inp->inp_faddr.s_addr != ip->ip_src.s_addr)
 				continue;
 			if (inp->inp_fport != 0 &&
 			    inp->inp_fport != uh->uh_sport)
 				continue;
 
 			INP_RLOCK(inp);
 
 			/*
 			 * XXXRW: Because we weren't holding either the inpcb
 			 * or the hash lock when we checked for a match
 			 * before, we should probably recheck now that the
 			 * inpcb lock is held.
 			 */
 
 			/*
 			 * Handle socket delivery policy for any-source
 			 * and source-specific multicast. [RFC3678]
 			 */
 			imo = inp->inp_moptions;
 			if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 				struct sockaddr_in	 group;
 				int			 blocked;
 				if (imo == NULL) {
 					INP_RUNLOCK(inp);
 					continue;
 				}
 				bzero(&group, sizeof(struct sockaddr_in));
 				group.sin_len = sizeof(struct sockaddr_in);
 				group.sin_family = AF_INET;
 				group.sin_addr = ip->ip_dst;
 
 				blocked = imo_multi_filter(imo, ifp,
 					(struct sockaddr *)&group,
 					(struct sockaddr *)&udp_in);
 				if (blocked != MCAST_PASS) {
 					if (blocked == MCAST_NOTGMEMBER)
 						IPSTAT_INC(ips_notmember);
 					if (blocked == MCAST_NOTSMEMBER ||
 					    blocked == MCAST_MUTED)
 						UDPSTAT_INC(udps_filtermcast);
 					INP_RUNLOCK(inp);
 					continue;
 				}
 			}
 			if (last != NULL) {
 				struct mbuf *n;
 
 				if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
 					UDP_PROBE(receive, NULL, last, ip,
 					    last, uh);
 					udp_append(last, ip, n, iphlen,
 					    &udp_in);
 				}
 				INP_RUNLOCK(last);
 			}
 			last = inp;
 			/*
 			 * Don't look for additional matches if this one does
 			 * not have either the SO_REUSEPORT or SO_REUSEADDR
 			 * socket options set.  This heuristic avoids
 			 * searching through all pcbs in the common case of a
 			 * non-shared port.  It assumes that an application
 			 * will never clear these options after setting them.
 			 */
 			if ((last->inp_socket->so_options &
 			    (SO_REUSEPORT|SO_REUSEADDR)) == 0)
 				break;
 		}
 
 		if (last == NULL) {
 			/*
 			 * No matching pcb found; discard datagram.  (No need
 			 * to send an ICMP Port Unreachable for a broadcast
 			 * or multicast datgram.)
 			 */
 			UDPSTAT_INC(udps_noportbcast);
 			if (inp)
 				INP_RUNLOCK(inp);
 			INP_INFO_RUNLOCK(pcbinfo);
 			goto badunlocked;
 		}
 		UDP_PROBE(receive, NULL, last, ip, last, uh);
 		udp_append(last, ip, m, iphlen, &udp_in);
 		INP_RUNLOCK(last);
 		INP_INFO_RUNLOCK(pcbinfo);
 		return (IPPROTO_DONE);
 	}
 
 	/*
 	 * Locate pcb for datagram.
 	 */
 
 	/*
 	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
 	 */
 	if ((m->m_flags & M_IP_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		struct sockaddr_in *next_hop;
 
 		next_hop = (struct sockaddr_in *)(fwd_tag + 1);
 
 		/*
 		 * Transparently forwarded. Pretend to be the destination.
 		 * Already got one like this?
 		 */
 		inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
 		    ip->ip_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, ifp, m);
 		if (!inp) {
 			/*
 			 * It's new.  Try to find the ambushing socket.
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
 			inp = in_pcblookup(pcbinfo, ip->ip_src,
 			    uh->uh_sport, next_hop->sin_addr,
 			    next_hop->sin_port ? htons(next_hop->sin_port) :
 			    uh->uh_dport, INPLOOKUP_WILDCARD |
 			    INPLOOKUP_RLOCKPCB, ifp);
 		}
 		/* Remove the tag from the packet. We don't need it anymore. */
 		m_tag_delete(m, fwd_tag);
 		m->m_flags &= ~M_IP_NEXTHOP;
 	} else
 		inp = in_pcblookup_mbuf(pcbinfo, ip->ip_src, uh->uh_sport,
 		    ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
 		    INPLOOKUP_RLOCKPCB, ifp, m);
 	if (inp == NULL) {
 		if (udp_log_in_vain) {
 			char buf[4*sizeof "123"];
 
 			strcpy(buf, inet_ntoa(ip->ip_dst));
 			log(LOG_INFO,
 			    "Connection attempt to UDP %s:%d from %s:%d\n",
 			    buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
 			    ntohs(uh->uh_sport));
 		}
 		UDPSTAT_INC(udps_noport);
 		if (m->m_flags & (M_BCAST | M_MCAST)) {
 			UDPSTAT_INC(udps_noportbcast);
 			goto badunlocked;
 		}
 		if (V_udp_blackhole)
 			goto badunlocked;
 		if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
 			goto badunlocked;
 		*ip = save_ip;
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
 		return (IPPROTO_DONE);
 	}
 
 	/*
 	 * Check the minimum TTL for socket.
 	 */
 	INP_RLOCK_ASSERT(inp);
 	if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl) {
 		INP_RUNLOCK(inp);
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 	if (cscov_partial) {
 		struct udpcb *up;
 
 		up = intoudpcb(inp);
 		if (up->u_rxcslen == 0 || up->u_rxcslen > len) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	}
 
 	UDP_PROBE(receive, NULL, inp, ip, inp, uh);
 	udp_append(inp, ip, m, iphlen, &udp_in);
 	INP_RUNLOCK(inp);
 	return (IPPROTO_DONE);
 
 badunlocked:
 	m_freem(m);
 	return (IPPROTO_DONE);
 }
 #endif /* INET */
 
 /*
  * Notify a udp user of an asynchronous error; just wake up so that they can
  * collect error status.
  */
 struct inpcb *
 udp_notify(struct inpcb *inp, int errno)
 {
 
 	/*
 	 * While udp_ctlinput() always calls udp_notify() with a read lock
 	 * when invoking it directly, in_pcbnotifyall() currently uses write
 	 * locks due to sharing code with TCP.  For now, accept either a read
 	 * or a write lock, but a read lock is sufficient.
 	 */
 	INP_LOCK_ASSERT(inp);
 
 	inp->inp_socket->so_error = errno;
 	sorwakeup(inp->inp_socket);
 	sowwakeup(inp->inp_socket);
 	return (inp);
 }
 
 #ifdef INET
 static void
 udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip,
     struct inpcbinfo *pcbinfo)
 {
 	struct ip *ip = vip;
 	struct udphdr *uh;
 	struct in_addr faddr;
 	struct inpcb *inp;
 
 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
 		return;
 
 	/*
 	 * Redirects don't need to be handled up here.
 	 */
 	if (PRC_IS_REDIRECT(cmd))
 		return;
 
 	/*
 	 * Hostdead is ugly because it goes linearly through all PCBs.
 	 *
 	 * XXX: We never get this from ICMP, otherwise it makes an excellent
 	 * DoS attack on machines with many connections.
 	 */
 	if (cmd == PRC_HOSTDEAD)
 		ip = NULL;
 	else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
 		return;
 	if (ip != NULL) {
 		uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 		inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
 		    ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL);
 		if (inp != NULL) {
 			INP_RLOCK_ASSERT(inp);
 			if (inp->inp_socket != NULL) {
 				udp_notify(inp, inetctlerrmap[cmd]);
 			}
 			INP_RUNLOCK(inp);
 		}
 	} else
 		in_pcbnotifyall(pcbinfo, faddr, inetctlerrmap[cmd],
 		    udp_notify);
 }
 void
 udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 
 	return (udp_common_ctlinput(cmd, sa, vip, &V_udbinfo));
 }
 
 void
 udplite_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 
 	return (udp_common_ctlinput(cmd, sa, vip, &V_ulitecbinfo));
 }
 #endif /* INET */
 
 static int
 udp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	int error, i, n;
 	struct inpcb *inp, **inp_list;
 	inp_gen_t gencnt;
 	struct xinpgen xig;
 
 	/*
 	 * The process of preparing the PCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == 0) {
 		n = V_udbinfo.ipi_count;
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
 		return (0);
 	}
 
 	if (req->newptr != 0)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	INP_INFO_RLOCK(&V_udbinfo);
 	gencnt = V_udbinfo.ipi_gencnt;
 	n = V_udbinfo.ipi_count;
 	INP_INFO_RUNLOCK(&V_udbinfo);
 
 	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
 		+ n * sizeof(struct xinpcb));
 	if (error != 0)
 		return (error);
 
 	xig.xig_len = sizeof xig;
 	xig.xig_count = n;
 	xig.xig_gen = gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 	if (inp_list == 0)
 		return (ENOMEM);
 
 	INP_INFO_RLOCK(&V_udbinfo);
 	for (inp = LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n;
 	     inp = LIST_NEXT(inp, inp_list)) {
 		INP_WLOCK(inp);
 		if (inp->inp_gencnt <= gencnt &&
 		    cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
 			in_pcbref(inp);
 			inp_list[i++] = inp;
 		}
 		INP_WUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(&V_udbinfo);
 	n = i;
 
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xinpcb xi;
 
 			bzero(&xi, sizeof(xi));
 			xi.xi_len = sizeof xi;
 			/* XXX should avoid extra copy */
 			bcopy(inp, &xi.xi_inp, sizeof *inp);
 			if (inp->inp_socket)
 				sotoxsocket(inp->inp_socket, &xi.xi_socket);
 			xi.xi_inp.inp_gencnt = inp->inp_gencnt;
 			INP_RUNLOCK(inp);
 			error = SYSCTL_OUT(req, &xi, sizeof xi);
 		} else
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_WLOCK(&V_udbinfo);
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (!in_pcbrele_rlocked(inp))
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(&V_udbinfo);
 
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.  If the
 		 * generation differs from what we told her before, she knows
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
 		INP_INFO_RLOCK(&V_udbinfo);
 		xig.xig_gen = V_udbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_udbinfo.ipi_count;
 		INP_INFO_RUNLOCK(&V_udbinfo);
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	free(inp_list, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
     udp_pcblist, "S,xinpcb", "List of active UDP sockets");
 
 #ifdef INET
 static int
 udp_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in addrs[2];
 	struct inpcb *inp;
 	int error;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
 	    addrs[0].sin_addr, addrs[0].sin_port,
 	    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
 	if (inp != NULL) {
 		INP_RLOCK_ASSERT(inp);
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseeinpcb(req->td->td_ucred, inp);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred,
     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
     udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
 #endif /* INET */
 
 int
 udp_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct inpcb *inp;
 	struct udpcb *up;
 	int isudplite, error, optval;
 
 	error = 0;
 	isudplite = (so->so_proto->pr_protocol == IPPROTO_UDPLITE) ? 1 : 0;
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 	INP_WLOCK(inp);
 	if (sopt->sopt_level != so->so_proto->pr_protocol) {
 #ifdef INET6
 		if (INP_CHECK_SOCKAF(so, AF_INET6)) {
 			INP_WUNLOCK(inp);
 			error = ip6_ctloutput(so, sopt);
 		}
 #endif
 #if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
 		{
 			INP_WUNLOCK(inp);
 			error = ip_ctloutput(so, sopt);
 		}
 #endif
 		return (error);
 	}
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case UDP_ENCAP:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 			inp = sotoinpcb(so);
 			KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 			INP_WLOCK(inp);
 #ifdef IPSEC_NAT_T
 			up = intoudpcb(inp);
 			KASSERT(up != NULL, ("%s: up == NULL", __func__));
 #endif
 			switch (optval) {
 			case 0:
 				/* Clear all UDP encap. */
 #ifdef IPSEC_NAT_T
 				up->u_flags &= ~UF_ESPINUDP_ALL;
 #endif
 				break;
 #ifdef IPSEC_NAT_T
 			case UDP_ENCAP_ESPINUDP:
 			case UDP_ENCAP_ESPINUDP_NON_IKE:
 				up->u_flags &= ~UF_ESPINUDP_ALL;
 				if (optval == UDP_ENCAP_ESPINUDP)
 					up->u_flags |= UF_ESPINUDP;
 				else if (optval == UDP_ENCAP_ESPINUDP_NON_IKE)
 					up->u_flags |= UF_ESPINUDP_NON_IKE;
 				break;
 #endif
 			default:
 				error = EINVAL;
 				break;
 			}
 			INP_WUNLOCK(inp);
 			break;
 		case UDPLITE_SEND_CSCOV:
 		case UDPLITE_RECV_CSCOV:
 			if (!isudplite) {
 				INP_WUNLOCK(inp);
 				error = ENOPROTOOPT;
 				break;
 			}
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 			    sizeof(optval));
 			if (error != 0)
 				break;
 			inp = sotoinpcb(so);
 			KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
 			INP_WLOCK(inp);
 			up = intoudpcb(inp);
 			KASSERT(up != NULL, ("%s: up == NULL", __func__));
 			if ((optval != 0 && optval < 8) || (optval > 65535)) {
 				INP_WUNLOCK(inp);
 				error = EINVAL;
 				break;
 			}
 			if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
 				up->u_txcslen = optval;
 			else
 				up->u_rxcslen = optval;
 			INP_WUNLOCK(inp);
 			break;
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 #ifdef IPSEC_NAT_T
 		case UDP_ENCAP:
 			up = intoudpcb(inp);
 			KASSERT(up != NULL, ("%s: up == NULL", __func__));
 			optval = up->u_flags & UF_ESPINUDP_ALL;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 #endif
 		case UDPLITE_SEND_CSCOV:
 		case UDPLITE_RECV_CSCOV:
 			if (!isudplite) {
 				INP_WUNLOCK(inp);
 				error = ENOPROTOOPT;
 				break;
 			}
 			up = intoudpcb(inp);
 			KASSERT(up != NULL, ("%s: up == NULL", __func__));
 			if (sopt->sopt_name == UDPLITE_SEND_CSCOV)
 				optval = up->u_txcslen;
 			else
 				optval = up->u_rxcslen;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}	
 	return (error);
 }
 
 #ifdef INET
 #define	UH_WLOCKED	2
 #define	UH_RLOCKED	1
 #define	UH_UNLOCKED	0
 static int
 udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
     struct mbuf *control, struct thread *td)
 {
 	struct udpiphdr *ui;
 	int len = m->m_pkthdr.len;
 	struct in_addr faddr, laddr;
 	struct cmsghdr *cm;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in *sin, src;
 	int cscov_partial = 0;
 	int error = 0;
 	int ipflags;
 	u_short fport, lport;
 	int unlock_udbinfo;
 	u_char tos;
 	uint8_t pr;
 	uint16_t cscov = 0;
 	uint32_t flowid = 0;
 	uint8_t flowtype = M_HASHTYPE_NONE;
 
 	/*
 	 * udp_output() may need to temporarily bind or connect the current
 	 * inpcb.  As such, we don't know up front whether we will need the
 	 * pcbinfo lock or not.  Do any work to decide what is needed up
 	 * front before acquiring any locks.
 	 */
 	if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
 		if (control)
 			m_freem(control);
 		m_freem(m);
 		return (EMSGSIZE);
 	}
 
 	src.sin_family = 0;
 	INP_RLOCK(inp);
 	tos = inp->inp_ip_tos;
 	if (control != NULL) {
 		/*
 		 * XXX: Currently, we assume all the optional information is
 		 * stored in a single mbuf.
 		 */
 		if (control->m_next) {
 			INP_RUNLOCK(inp);
 			m_freem(control);
 			m_freem(m);
 			return (EINVAL);
 		}
 		for (; control->m_len > 0;
 		    control->m_data += CMSG_ALIGN(cm->cmsg_len),
 		    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
 			cm = mtod(control, struct cmsghdr *);
 			if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0
 			    || cm->cmsg_len > control->m_len) {
 				error = EINVAL;
 				break;
 			}
 			if (cm->cmsg_level != IPPROTO_IP)
 				continue;
 
 			switch (cm->cmsg_type) {
 			case IP_SENDSRCADDR:
 				if (cm->cmsg_len !=
 				    CMSG_LEN(sizeof(struct in_addr))) {
 					error = EINVAL;
 					break;
 				}
 				bzero(&src, sizeof(src));
 				src.sin_family = AF_INET;
 				src.sin_len = sizeof(src);
 				src.sin_port = inp->inp_lport;
 				src.sin_addr =
 				    *(struct in_addr *)CMSG_DATA(cm);
 				break;
 
 			case IP_TOS:
 				if (cm->cmsg_len != CMSG_LEN(sizeof(u_char))) {
 					error = EINVAL;
 					break;
 				}
 				tos = *(u_char *)CMSG_DATA(cm);
 				break;
 
 			case IP_FLOWID:
 				if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
 					error = EINVAL;
 					break;
 				}
 				flowid = *(uint32_t *) CMSG_DATA(cm);
 				break;
 
 			case IP_FLOWTYPE:
 				if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
 					error = EINVAL;
 					break;
 				}
 				flowtype = *(uint32_t *) CMSG_DATA(cm);
 				break;
 
 #ifdef	RSS
 			case IP_RSSBUCKETID:
 				if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
 					error = EINVAL;
 					break;
 				}
 				/* This is just a placeholder for now */
 				break;
 #endif	/* RSS */
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			if (error)
 				break;
 		}
 		m_freem(control);
 	}
 	if (error) {
 		INP_RUNLOCK(inp);
 		m_freem(m);
 		return (error);
 	}
 
 	/*
 	 * Depending on whether or not the application has bound or connected
 	 * the socket, we may have to do varying levels of work.  The optimal
 	 * case is for a connected UDP socket, as a global lock isn't
 	 * required at all.
 	 *
 	 * In order to decide which we need, we require stability of the
 	 * inpcb binding, which we ensure by acquiring a read lock on the
 	 * inpcb.  This doesn't strictly follow the lock order, so we play
 	 * the trylock and retry game; note that we may end up with more
 	 * conservative locks than required the second time around, so later
 	 * assertions have to accept that.  Further analysis of the number of
 	 * misses under contention is required.
 	 *
 	 * XXXRW: Check that hash locking update here is correct.
 	 */
 	pr = inp->inp_socket->so_proto->pr_protocol;
 	pcbinfo = get_inpcbinfo(pr);
 	sin = (struct sockaddr_in *)addr;
 	if (sin != NULL &&
 	    (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
 		INP_RUNLOCK(inp);
 		INP_WLOCK(inp);
 		INP_HASH_WLOCK(pcbinfo);
 		unlock_udbinfo = UH_WLOCKED;
 	} else if ((sin != NULL && (
 	    (sin->sin_addr.s_addr == INADDR_ANY) ||
 	    (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
 	    (inp->inp_laddr.s_addr == INADDR_ANY) ||
 	    (inp->inp_lport == 0))) ||
 	    (src.sin_family == AF_INET)) {
 		INP_HASH_RLOCK(pcbinfo);
 		unlock_udbinfo = UH_RLOCKED;
 	} else
 		unlock_udbinfo = UH_UNLOCKED;
 
 	/*
 	 * If the IP_SENDSRCADDR control message was specified, override the
 	 * source address for this datagram.  Its use is invalidated if the
 	 * address thus specified is incomplete or clobbers other inpcbs.
 	 */
 	laddr = inp->inp_laddr;
 	lport = inp->inp_lport;
 	if (src.sin_family == AF_INET) {
 		INP_HASH_LOCK_ASSERT(pcbinfo);
 		if ((lport == 0) ||
 		    (laddr.s_addr == INADDR_ANY &&
 		     src.sin_addr.s_addr == INADDR_ANY)) {
 			error = EINVAL;
 			goto release;
 		}
 		error = in_pcbbind_setup(inp, (struct sockaddr *)&src,
 		    &laddr.s_addr, &lport, td->td_ucred);
 		if (error)
 			goto release;
 	}
 
 	/*
 	 * If a UDP socket has been connected, then a local address/port will
 	 * have been selected and bound.
 	 *
 	 * If a UDP socket has not been connected to, then an explicit
 	 * destination address must be used, in which case a local
 	 * address/port may not have been selected and bound.
 	 */
 	if (sin != NULL) {
 		INP_LOCK_ASSERT(inp);
 		if (inp->inp_faddr.s_addr != INADDR_ANY) {
 			error = EISCONN;
 			goto release;
 		}
 
 		/*
 		 * Jail may rewrite the destination address, so let it do
 		 * that before we use it.
 		 */
 		error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
 		if (error)
 			goto release;
 
 		/*
 		 * If a local address or port hasn't yet been selected, or if
 		 * the destination address needs to be rewritten due to using
 		 * a special INADDR_ constant, invoke in_pcbconnect_setup()
 		 * to do the heavy lifting.  Once a port is selected, we
 		 * commit the binding back to the socket; we also commit the
 		 * binding of the address if in jail.
 		 *
 		 * If we already have a valid binding and we're not
 		 * requesting a destination address rewrite, use a fast path.
 		 */
 		if (inp->inp_laddr.s_addr == INADDR_ANY ||
 		    inp->inp_lport == 0 ||
 		    sin->sin_addr.s_addr == INADDR_ANY ||
 		    sin->sin_addr.s_addr == INADDR_BROADCAST) {
 			INP_HASH_LOCK_ASSERT(pcbinfo);
 			error = in_pcbconnect_setup(inp, addr, &laddr.s_addr,
 			    &lport, &faddr.s_addr, &fport, NULL,
 			    td->td_ucred);
 			if (error)
 				goto release;
 
 			/*
 			 * XXXRW: Why not commit the port if the address is
 			 * !INADDR_ANY?
 			 */
 			/* Commit the local port if newly assigned. */
 			if (inp->inp_laddr.s_addr == INADDR_ANY &&
 			    inp->inp_lport == 0) {
 				INP_WLOCK_ASSERT(inp);
 				INP_HASH_WLOCK_ASSERT(pcbinfo);
 				/*
 				 * Remember addr if jailed, to prevent
 				 * rebinding.
 				 */
 				if (prison_flag(td->td_ucred, PR_IP4))
 					inp->inp_laddr = laddr;
 				inp->inp_lport = lport;
 				if (in_pcbinshash(inp) != 0) {
 					inp->inp_lport = 0;
 					error = EAGAIN;
 					goto release;
 				}
 				inp->inp_flags |= INP_ANONPORT;
 			}
 		} else {
 			faddr = sin->sin_addr;
 			fport = sin->sin_port;
 		}
 	} else {
 		INP_LOCK_ASSERT(inp);
 		faddr = inp->inp_faddr;
 		fport = inp->inp_fport;
 		if (faddr.s_addr == INADDR_ANY) {
 			error = ENOTCONN;
 			goto release;
 		}
 	}
 
 	/*
 	 * Calculate data length and get a mbuf for UDP, IP, and possible
 	 * link-layer headers.  Immediate slide the data pointer back forward
 	 * since we won't use that space at this layer.
 	 */
 	M_PREPEND(m, sizeof(struct udpiphdr) + max_linkhdr, M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto release;
 	}
 	m->m_data += max_linkhdr;
 	m->m_len -= max_linkhdr;
 	m->m_pkthdr.len -= max_linkhdr;
 
 	/*
 	 * Fill in mbuf with extended UDP header and addresses and length put
 	 * into network format.
 	 */
 	ui = mtod(m, struct udpiphdr *);
 	bzero(ui->ui_x1, sizeof(ui->ui_x1));	/* XXX still needed? */
 	ui->ui_pr = pr;
 	ui->ui_src = laddr;
 	ui->ui_dst = faddr;
 	ui->ui_sport = lport;
 	ui->ui_dport = fport;
 	ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
 	if (pr == IPPROTO_UDPLITE) {
 		struct udpcb *up;
 		uint16_t plen;
 
 		up = intoudpcb(inp);
 		cscov = up->u_txcslen;
 		plen = (u_short)len + sizeof(struct udphdr);
 		if (cscov >= plen)
 			cscov = 0;
 		ui->ui_len = htons(plen);
 		ui->ui_ulen = htons(cscov);
 		/*
 		 * For UDP-Lite, checksum coverage length of zero means
 		 * the entire UDPLite packet is covered by the checksum.
 		 */
 		cscov_partial = (cscov == 0) ? 0 : 1;
 	} else
 		ui->ui_v = IPVERSION << 4;
 
 	/*
 	 * Set the Don't Fragment bit in the IP header.
 	 */
 	if (inp->inp_flags & INP_DONTFRAG) {
 		struct ip *ip;
 
 		ip = (struct ip *)&ui->ui_i;
 		ip->ip_off |= htons(IP_DF);
 	}
 
 	ipflags = 0;
 	if (inp->inp_socket->so_options & SO_DONTROUTE)
 		ipflags |= IP_ROUTETOIF;
 	if (inp->inp_socket->so_options & SO_BROADCAST)
 		ipflags |= IP_ALLOWBROADCAST;
 	if (inp->inp_flags & INP_ONESBCAST)
 		ipflags |= IP_SENDONES;
 
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 
 	/*
 	 * Set up checksum and output datagram.
 	 */
 	ui->ui_sum = 0;
 	if (pr == IPPROTO_UDPLITE) {
 		if (inp->inp_flags & INP_ONESBCAST)
 			faddr.s_addr = INADDR_BROADCAST;
 		if (cscov_partial) {
 			if ((ui->ui_sum = in_cksum(m, sizeof(struct ip) + cscov)) == 0)
 				ui->ui_sum = 0xffff;
 		} else {
 			if ((ui->ui_sum = in_cksum(m, sizeof(struct udpiphdr) + len)) == 0)
 				ui->ui_sum = 0xffff;
 		}
 	} else if (V_udp_cksum) {
 		if (inp->inp_flags & INP_ONESBCAST)
 			faddr.s_addr = INADDR_BROADCAST;
 		ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
 		    htons((u_short)len + sizeof(struct udphdr) + pr));
 		m->m_pkthdr.csum_flags = CSUM_UDP;
 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 	}
 	((struct ip *)ui)->ip_len = htons(sizeof(struct udpiphdr) + len);
 	((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;	/* XXX */
 	((struct ip *)ui)->ip_tos = tos;		/* XXX */
 	UDPSTAT_INC(udps_opackets);
 
 	/*
 	 * Setup flowid / RSS information for outbound socket.
 	 *
 	 * Once the UDP code decides to set a flowid some other way,
 	 * this allows the flowid to be overridden by userland.
 	 */
 	if (flowtype != M_HASHTYPE_NONE) {
 		m->m_pkthdr.flowid = flowid;
 		M_HASHTYPE_SET(m, flowtype);
 #ifdef	RSS
 	} else {
 		uint32_t hash_val, hash_type;
 		/*
 		 * Calculate an appropriate RSS hash for UDP and
 		 * UDP Lite.
 		 *
 		 * The called function will take care of figuring out
 		 * whether a 2-tuple or 4-tuple hash is required based
 		 * on the currently configured scheme.
 		 *
 		 * Later later on connected socket values should be
 		 * cached in the inpcb and reused, rather than constantly
 		 * re-calculating it.
 		 *
 		 * UDP Lite is a different protocol number and will
 		 * likely end up being hashed as a 2-tuple until
 		 * RSS / NICs grow UDP Lite protocol awareness.
 		 */
 		if (rss_proto_software_hash_v4(faddr, laddr, fport, lport,
 		    pr, &hash_val, &hash_type) == 0) {
 			m->m_pkthdr.flowid = hash_val;
 			M_HASHTYPE_SET(m, hash_type);
 		}
 #endif
 	}
 
 #ifdef	RSS
 	/*
 	 * Don't override with the inp cached flowid value.
 	 *
 	 * Depending upon the kind of send being done, the inp
 	 * flowid/flowtype values may actually not be appropriate
 	 * for this particular socket send.
 	 *
 	 * We should either leave the flowid at zero (which is what is
 	 * currently done) or set it to some software generated
 	 * hash value based on the packet contents.
 	 */
 	ipflags |= IP_NODEFAULTFLOWID;
 #endif	/* RSS */
 
 	if (unlock_udbinfo == UH_WLOCKED)
 		INP_HASH_WUNLOCK(pcbinfo);
 	else if (unlock_udbinfo == UH_RLOCKED)
 		INP_HASH_RUNLOCK(pcbinfo);
 	UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
 	error = ip_output(m, inp->inp_options, NULL, ipflags,
 	    inp->inp_moptions, inp);
 	if (unlock_udbinfo == UH_WLOCKED)
 		INP_WUNLOCK(inp);
 	else
 		INP_RUNLOCK(inp);
 	return (error);
 
 release:
 	if (unlock_udbinfo == UH_WLOCKED) {
 		INP_HASH_WUNLOCK(pcbinfo);
 		INP_WUNLOCK(inp);
 	} else if (unlock_udbinfo == UH_RLOCKED) {
 		INP_HASH_RUNLOCK(pcbinfo);
 		INP_RUNLOCK(inp);
 	} else
 		INP_RUNLOCK(inp);
 	m_freem(m);
 	return (error);
 }
 
 
 #if defined(IPSEC) && defined(IPSEC_NAT_T)
 /*
  * Potentially decap ESP in UDP frame.  Check for an ESP header
  * and optional marker; if present, strip the UDP header and
  * push the result through IPSec.
  *
  * Returns mbuf to be processed (potentially re-allocated) or
  * NULL if consumed and/or processed.
  */
 static struct mbuf *
 udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
 {
 	size_t minlen, payload, skip, iphlen;
 	caddr_t data;
 	struct udpcb *up;
 	struct m_tag *tag;
 	struct udphdr *udphdr;
 	struct ip *ip;
 
 	INP_RLOCK_ASSERT(inp);
 
 	/* 
 	 * Pull up data so the longest case is contiguous:
 	 *    IP/UDP hdr + non ESP marker + ESP hdr.
 	 */
 	minlen = off + sizeof(uint64_t) + sizeof(struct esp);
 	if (minlen > m->m_pkthdr.len)
 		minlen = m->m_pkthdr.len;
 	if ((m = m_pullup(m, minlen)) == NULL) {
 		IPSECSTAT_INC(ips_in_inval);
 		return (NULL);		/* Bypass caller processing. */
 	}
 	data = mtod(m, caddr_t);	/* Points to ip header. */
 	payload = m->m_len - off;	/* Size of payload. */
 
 	if (payload == 1 && data[off] == '\xff')
 		return (m);		/* NB: keepalive packet, no decap. */
 
 	up = intoudpcb(inp);
 	KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
 	KASSERT((up->u_flags & UF_ESPINUDP_ALL) != 0,
 	    ("u_flags 0x%x", up->u_flags));
 
 	/* 
 	 * Check that the payload is large enough to hold an
 	 * ESP header and compute the amount of data to remove.
 	 *
 	 * NB: the caller has already done a pullup for us.
 	 * XXX can we assume alignment and eliminate bcopys?
 	 */
 	if (up->u_flags & UF_ESPINUDP_NON_IKE) {
 		/*
 		 * draft-ietf-ipsec-nat-t-ike-0[01].txt and
 		 * draft-ietf-ipsec-udp-encaps-(00/)01.txt, ignoring
 		 * possible AH mode non-IKE marker+non-ESP marker
 		 * from draft-ietf-ipsec-udp-encaps-00.txt.
 		 */
 		uint64_t marker;
 
 		if (payload <= sizeof(uint64_t) + sizeof(struct esp))
 			return (m);	/* NB: no decap. */
 		bcopy(data + off, &marker, sizeof(uint64_t));
 		if (marker != 0)	/* Non-IKE marker. */
 			return (m);	/* NB: no decap. */
 		skip = sizeof(uint64_t) + sizeof(struct udphdr);
 	} else {
 		uint32_t spi;
 
 		if (payload <= sizeof(struct esp)) {
 			IPSECSTAT_INC(ips_in_inval);
 			m_freem(m);
 			return (NULL);	/* Discard. */
 		}
 		bcopy(data + off, &spi, sizeof(uint32_t));
 		if (spi == 0)		/* Non-ESP marker. */
 			return (m);	/* NB: no decap. */
 		skip = sizeof(struct udphdr);
 	}
 
 	/*
 	 * Setup a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember
 	 * the UDP ports. This is required if we want to select
 	 * the right SPD for multiple hosts behind same NAT.
 	 *
 	 * NB: ports are maintained in network byte order everywhere
 	 *     in the NAT-T code.
 	 */
 	tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
 		2 * sizeof(uint16_t), M_NOWAIT);
 	if (tag == NULL) {
 		IPSECSTAT_INC(ips_in_nomem);
 		m_freem(m);
 		return (NULL);		/* Discard. */
 	}
 	iphlen = off - sizeof(struct udphdr);
 	udphdr = (struct udphdr *)(data + iphlen);
 	((uint16_t *)(tag + 1))[0] = udphdr->uh_sport;
 	((uint16_t *)(tag + 1))[1] = udphdr->uh_dport;
 	m_tag_prepend(m, tag);
 
 	/*
 	 * Remove the UDP header (and possibly the non ESP marker)
 	 * IP header length is iphlen
 	 * Before:
 	 *   <--- off --->
 	 *   +----+------+-----+
 	 *   | IP |  UDP | ESP |
 	 *   +----+------+-----+
 	 *        <-skip->
 	 * After:
 	 *          +----+-----+
 	 *          | IP | ESP |
 	 *          +----+-----+
 	 *   <-skip->
 	 */
 	ovbcopy(data, data + skip, iphlen);
 	m_adj(m, skip);
 
 	ip = mtod(m, struct ip *);
 	ip->ip_len = htons(ntohs(ip->ip_len) - skip);
 	ip->ip_p = IPPROTO_ESP;
 
 	/*
 	 * We cannot yet update the cksums so clear any
 	 * h/w cksum flags as they are no longer valid.
 	 */
 	if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)
 		m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
 
 	(void) ipsec4_common_input(m, iphlen, ip->ip_p);
 	return (NULL);			/* NB: consumed, bypass processing. */
 }
 #endif /* defined(IPSEC) && defined(IPSEC_NAT_T) */
 
 static void
 udp_abort(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
 		INP_HASH_WLOCK(pcbinfo);
 		in_pcbdisconnect(inp);
 		inp->inp_laddr.s_addr = INADDR_ANY;
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 
 	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
 	error = soreserve(so, udp_sendspace, udp_recvspace);
 	if (error)
 		return (error);
 	INP_INFO_WLOCK(pcbinfo);
 	error = in_pcballoc(so, pcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(pcbinfo);
 		return (error);
 	}
 
 	inp = sotoinpcb(so);
 	inp->inp_vflag |= INP_IPV4;
 	inp->inp_ip_ttl = V_ip_defttl;
 
 	error = udp_newudpcb(inp);
 	if (error) {
 		in_pcbdetach(inp);
 		in_pcbfree(inp);
 		INP_INFO_WUNLOCK(pcbinfo);
 		return (error);
 	}
 
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(pcbinfo);
 	return (0);
 }
 #endif /* INET */
 
 int
 udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f, void *ctx)
 {
 	struct inpcb *inp;
 	struct udpcb *up;
 
 	KASSERT(so->so_type == SOCK_DGRAM,
 	    ("udp_set_kernel_tunneling: !dgram"));
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_set_kernel_tunneling: inp == NULL"));
 	INP_WLOCK(inp);
 	up = intoudpcb(inp);
 	if (up->u_tun_func != NULL) {
 		INP_WUNLOCK(inp);
 		return (EBUSY);
 	}
 	up->u_tun_func = f;
 	up->u_tun_ctx = ctx;
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 #ifdef INET
 static int
 udp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 
 	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(pcbinfo);
 	error = in_pcbbind(inp, nam, td->td_ucred);
 	INP_HASH_WUNLOCK(pcbinfo);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp_close(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_close: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
 		INP_HASH_WLOCK(pcbinfo);
 		in_pcbdisconnect(inp);
 		inp->inp_laddr.s_addr = INADDR_ANY;
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in *sin;
 	int error;
 
 	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
 		INP_WUNLOCK(inp);
 		return (EISCONN);
 	}
 	sin = (struct sockaddr_in *)nam;
 	error = prison_remote_ip4(td->td_ucred, &sin->sin_addr);
 	if (error != 0) {
 		INP_WUNLOCK(inp);
 		return (error);
 	}
 	INP_HASH_WLOCK(pcbinfo);
 	error = in_pcbconnect(inp, nam, td->td_ucred);
 	INP_HASH_WUNLOCK(pcbinfo);
 	if (error == 0)
 		soisconnected(so);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp_detach(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct udpcb *up;
 
 	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
 	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
 	    ("udp_detach: not disconnected"));
 	INP_INFO_WLOCK(pcbinfo);
 	INP_WLOCK(inp);
 	up = intoudpcb(inp);
 	KASSERT(up != NULL, ("%s: up == NULL", __func__));
 	inp->inp_ppcb = NULL;
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	INP_INFO_WUNLOCK(pcbinfo);
 	udp_discardcb(up);
 }
 
 static int
 udp_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_faddr.s_addr == INADDR_ANY) {
 		INP_WUNLOCK(inp);
 		return (ENOTCONN);
 	}
 	INP_HASH_WLOCK(pcbinfo);
 	in_pcbdisconnect(inp);
 	inp->inp_laddr.s_addr = INADDR_ANY;
 	INP_HASH_WUNLOCK(pcbinfo);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
 	SOCK_UNLOCK(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
     struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_send: inp == NULL"));
 	return (udp_output(inp, m, addr, control, td));
 }
 #endif /* INET */
 
 int
 udp_shutdown(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
 	INP_WLOCK(inp);
 	socantsendmore(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 #ifdef INET
 struct pr_usrreqs udp_usrreqs = {
 	.pru_abort =		udp_abort,
 	.pru_attach =		udp_attach,
 	.pru_bind =		udp_bind,
 	.pru_connect =		udp_connect,
 	.pru_control =		in_control,
 	.pru_detach =		udp_detach,
 	.pru_disconnect =	udp_disconnect,
 	.pru_peeraddr =		in_getpeeraddr,
 	.pru_send =		udp_send,
 	.pru_soreceive =	soreceive_dgram,
 	.pru_sosend =		sosend_dgram,
 	.pru_shutdown =		udp_shutdown,
 	.pru_sockaddr =		in_getsockaddr,
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		udp_close,
 };
 #endif /* INET */
Index: head/sys/netinet6/in6_pcbgroup.c
===================================================================
--- head/sys/netinet6/in6_pcbgroup.c	(revision 277330)
+++ head/sys/netinet6/in6_pcbgroup.c	(revision 277331)
@@ -1,148 +1,150 @@
 /*-
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * All rights reserved.
  *
  * This software was developed by Robert N. M. Watson under contract
  * to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
 
+#include <net/rss_config.h>
+
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
-#include <netinet/in_rss.h>
 #ifdef INET6
 #include <netinet6/in6_pcb.h>
+#include <netinet6/in6_rss.h>
 #endif /* INET6 */
 
 /*
  * Given a hash of whatever the covered tuple might be, return a pcbgroup
  * index.  Where RSS is supported, try to align bucket selection with RSS CPU
  * affinity strategy.
  */
 static __inline u_int
 in6_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash)
 {
 
 #ifdef RSS
 	return (rss_getbucket(hash));
 #else
 	return (hash % pcbinfo->ipi_npcbgroups);
 #endif
 }
 
 /*
  * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash 
  * information is insufficient to identify the pcbgroup.  This might occur if
  * a TCP packet turnsup with a 2-tuple hash, or if an RSS hash is present but
  * RSS is not compiled into the kernel.
  */
 struct inpcbgroup *
 in6_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash)
 {
 
 #ifdef RSS
 	if ((pcbinfo->ipi_hashfields == IPI_HASHFIELDS_4TUPLE &&
 	    hashtype == M_HASHTYPE_RSS_TCP_IPV6) ||
 	    (pcbinfo->ipi_hashfields == IPI_HASHFIELDS_4TUPLE &&
 	    hashtype == M_HASHTYPE_RSS_UDP_IPV6) ||
 	    (pcbinfo->ipi_hashfields == IPI_HASHFIELDS_2TUPLE &&
 	    hashtype == M_HASHTYPE_RSS_IPV6))
 		return (&pcbinfo->ipi_pcbgroups[
 		    in6_pcbgroup_getbucket(pcbinfo, hash)]);
 #endif
 	return (NULL);
 }
 
 struct inpcbgroup *
 in6_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m)
 {
 
 	return (in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
 	    m->m_pkthdr.flowid));
 }
 
 struct inpcbgroup *
 in6_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, const struct in6_addr *laddrp,
     u_short lport, const struct in6_addr *faddrp, u_short fport)
 {
 	uint32_t hash;
 
 	/*
 	 * RSS note: we pass foreign addr/port as source, and local addr/port
 	 * as destination, as we want to align with what the hardware is
 	 * doing.
 	 */
 	switch (pcbinfo->ipi_hashfields) {
 	case IPI_HASHFIELDS_4TUPLE:
 #ifdef RSS
 		hash = rss_hash_ip6_4tuple(faddrp, fport, laddrp, lport);
 #else
 		hash = faddrp->s6_addr32[3] ^ fport;
 #endif
 		break;
 
 	case IPI_HASHFIELDS_2TUPLE:
 #ifdef RSS
 		hash = rss_hash_ip6_2tuple(faddrp, laddrp);
 #else
 		hash = faddrp->s6_addr32[3] ^ laddrp->s6_addr32[3];
 #endif
 		break;
 
 	default:
 		hash = 0;
 	}
 	return (&pcbinfo->ipi_pcbgroups[in6_pcbgroup_getbucket(pcbinfo,
 	    hash)]);
 }
 
 struct inpcbgroup *
 in6_pcbgroup_byinpcb(struct inpcb *inp)
 {
 
 #ifdef	RSS
 	/*
 	 * Listen sockets with INP_RSS_BUCKET_SET set have a pre-determined
 	 * RSS bucket and thus we should use this pcbgroup, rather than
 	 * using a tuple or hash.
 	 *
 	 * XXX should verify that there's actually pcbgroups and inp_rss_listen_bucket
 	 * fits in that!
 	 */
 	if (inp->inp_flags2 & INP_RSS_BUCKET_SET)
 		return (&inp->inp_pcbinfo->ipi_pcbgroups[inp->inp_rss_listen_bucket]);
 #endif
 
 	return (in6_pcbgroup_bytuple(inp->inp_pcbinfo, &inp->in6p_laddr,
 	    inp->inp_lport, &inp->in6p_faddr, inp->inp_fport));
 }
Index: head/sys/netinet6/in6_rss.c
===================================================================
--- head/sys/netinet6/in6_rss.c	(nonexistent)
+++ head/sys/netinet6/in6_rss.c	(revision 277331)
@@ -0,0 +1,103 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet6.h"
+#include "opt_pcbgroup.h"
+
+#ifndef PCBGROUP
+#error "options RSS depends on options PCBGROUP"
+#endif
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/priv.h>
+#include <sys/kernel.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+#include <sys/sbuf.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/netisr.h>
+#include <net/rss_config.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet6/in6_rss.h>
+#include <netinet/in_var.h>
+
+/* for software rss hash support */
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+/*
+ * Hash an IPv6 2-tuple.
+ */
+uint32_t
+rss_hash_ip6_2tuple(const struct in6_addr *src, const struct in6_addr *dst)
+{
+	uint8_t data[sizeof(*src) + sizeof(*dst)];
+	u_int datalen;
+
+	datalen = 0;
+	bcopy(src, &data[datalen], sizeof(*src));
+	datalen += sizeof(*src);
+	bcopy(dst, &data[datalen], sizeof(*dst));
+	datalen += sizeof(*dst);
+	return (rss_hash(datalen, data));
+}
+
+/*
+ * Hash an IPv6 4-tuple.
+ */
+uint32_t
+rss_hash_ip6_4tuple(const struct in6_addr *src, u_short srcport,
+    const struct in6_addr *dst, u_short dstport)
+{
+	uint8_t data[sizeof(*src) + sizeof(*dst) + sizeof(srcport) +
+	    sizeof(dstport)];
+	u_int datalen;
+
+	datalen = 0;
+	bcopy(src, &data[datalen], sizeof(*src));
+	datalen += sizeof(*src);
+	bcopy(dst, &data[datalen], sizeof(*dst));
+	datalen += sizeof(*dst);
+	bcopy(&srcport, &data[datalen], sizeof(srcport));
+	datalen += sizeof(srcport);
+	bcopy(&dstport, &data[datalen], sizeof(dstport));
+	datalen += sizeof(dstport);
+	return (rss_hash(datalen, data));
+}

Property changes on: head/sys/netinet6/in6_rss.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/sys/netinet6/in6_rss.h
===================================================================
--- head/sys/netinet6/in6_rss.h	(nonexistent)
+++ head/sys/netinet6/in6_rss.h	(revision 277331)
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET6_IN6_RSS_H_
+#define	_NETINET6_IN6_RSS_H_
+
+#include <netinet/in.h>		/* in_addr_t */
+
+/*
+ * Network stack interface to generate a hash for a protocol tuple.
+ */
+uint32_t	rss_hash_ip6_4tuple(const struct in6_addr *src, u_short srcport,
+		    const struct in6_addr *dst, u_short dstport);
+uint32_t	rss_hash_ip6_2tuple(const struct in6_addr *src,
+		    const struct in6_addr *dst);
+
+#endif /* !_NETINET6_IN6_RSS_H_ */

Property changes on: head/sys/netinet6/in6_rss.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/sys/netinet6/ip6_output.c
===================================================================
--- head/sys/netinet6/ip6_output.c	(revision 277330)
+++ head/sys/netinet6/ip6_output.c	(revision 277331)
@@ -1,2964 +1,2965 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipfw.h"
 #include "opt_ipsec.h"
 #include "opt_sctp.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 
 #include <machine/in_cksum.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/pfil.h>
+#include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp_var.h>
 #include <netinet6/nd6.h>
-#include <netinet/in_rss.h>
+#include <netinet6/in6_rss.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #include <netipsec/key.h>
 #include <netinet6/ip6_ipsec.h>
 #endif /* IPSEC */
 #ifdef SCTP
 #include <netinet/sctp.h>
 #include <netinet/sctp_crc32.h>
 #endif
 
 #include <netinet6/ip6protosw.h>
 #include <netinet6/scope6_var.h>
 
 #ifdef FLOWTABLE
 #include <net/flowtable.h>
 #endif
 
 extern int in6_mcast_loop;
 
 struct ip6_exthdrs {
 	struct mbuf *ip6e_ip6;
 	struct mbuf *ip6e_hbh;
 	struct mbuf *ip6e_dest1;
 	struct mbuf *ip6e_rthdr;
 	struct mbuf *ip6e_dest2;
 };
 
 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
 			   struct ucred *, int);
 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
 	struct socket *, struct sockopt *);
 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *,
 	struct ucred *, int, int, int);
 
 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
 	struct ip6_frag **);
 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
 static int ip6_getpmtu(struct route_in6 *, struct route_in6 *,
 	struct ifnet *, struct in6_addr *, u_long *, int *, u_int);
 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
 
 
 /*
  * Make an extension header from option data.  hp is the source, and
  * mp is the destination.
  */
 #define MAKE_EXTHDR(hp, mp)						\
     do {								\
 	if (hp) {							\
 		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
 		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
 		    ((eh)->ip6e_len + 1) << 3);				\
 		if (error)						\
 			goto freehdrs;					\
 	}								\
     } while (/*CONSTCOND*/ 0)
 
 /*
  * Form a chain of extension headers.
  * m is the extension header mbuf
  * mp is the previous mbuf in the chain
  * p is the next header
  * i is the type of option.
  */
 #define MAKE_CHAIN(m, mp, p, i)\
     do {\
 	if (m) {\
 		if (!hdrsplit) \
 			panic("assumption failed: hdr not split"); \
 		*mtod((m), u_char *) = *(p);\
 		*(p) = (i);\
 		p = mtod((m), u_char *);\
 		(m)->m_next = (mp)->m_next;\
 		(mp)->m_next = (m);\
 		(mp) = (m);\
 	}\
     } while (/*CONSTCOND*/ 0)
 
 void
 in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
 {
 	u_short csum;
 
 	csum = in_cksum_skip(m, offset + plen, offset);
 	if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0)
 		csum = 0xffff;
 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
 
 	if (offset + sizeof(u_short) > m->m_len) {
 		printf("%s: delayed m_pullup, m->len: %d plen %u off %u "
 		    "csum_flags=%b\n", __func__, m->m_len, plen, offset,
 		    (int)m->m_pkthdr.csum_flags, CSUM_BITS);
 		/*
 		 * XXX this should not happen, but if it does, the correct
 		 * behavior may be to insert the checksum in the appropriate
 		 * next mbuf in the chain.
 		 */
 		return;
 	}
 	*(u_short *)(m->m_data + offset) = csum;
 }
 
 /*
  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
  * header (with pri, len, nxt, hlim, src, dst).
  * This function may modify ver and hlim only.
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  * If route_in6 ro is present and has ro_rt initialized, route lookup would be
  * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
  * then result of route lookup is stored in ro->ro_rt.
  *
  * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and
  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
  * which is rt_mtu.
  *
  * ifpp - XXX: just for statistics
  */
 /*
  * XXX TODO: no flowid is assigned for outbound flows?
  */
 int
 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
     struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
     struct ifnet **ifpp, struct inpcb *inp)
 {
 	struct ip6_hdr *ip6, *mhip6;
 	struct ifnet *ifp, *origifp;
 	struct mbuf *m = m0;
 	struct mbuf *mprev = NULL;
 	int hlen, tlen, len, off;
 	struct route_in6 ip6route;
 	struct rtentry *rt = NULL;
 	struct sockaddr_in6 *dst, src_sa, dst_sa;
 	struct in6_addr odst;
 	int error = 0;
 	struct in6_ifaddr *ia = NULL;
 	u_long mtu;
 	int alwaysfrag, dontfrag;
 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
 	struct ip6_exthdrs exthdrs;
 	struct in6_addr finaldst, src0, dst0;
 	u_int32_t zone;
 	struct route_in6 *ro_pmtu = NULL;
 	int hdrsplit = 0;
 	int sw_csum, tso;
 	int needfiblookup;
 	uint32_t fibnum;
 	struct m_tag *fwd_tag = NULL;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if (ip6 == NULL) {
 		printf ("ip6 is NULL");
 		goto bad;
 	}
 
 	if (inp != NULL) {
 		M_SETFIB(m, inp->inp_inc.inc_fibnum);
 		if ((flags & IP_NODEFAULTFLOWID) == 0) {
 			/* unconditionally set flowid */
 			m->m_pkthdr.flowid = inp->inp_flowid;
 			M_HASHTYPE_SET(m, inp->inp_flowtype);
 		}
 	}
 
 	finaldst = ip6->ip6_dst;
 	bzero(&exthdrs, sizeof(exthdrs));
 	if (opt) {
 		/* Hop-by-Hop options header */
 		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
 		/* Destination options header(1st part) */
 		if (opt->ip6po_rthdr) {
 			/*
 			 * Destination options header(1st part)
 			 * This only makes sense with a routing header.
 			 * See Section 9.2 of RFC 3542.
 			 * Disabling this part just for MIP6 convenience is
 			 * a bad idea.  We need to think carefully about a
 			 * way to make the advanced API coexist with MIP6
 			 * options, which might automatically be inserted in
 			 * the kernel.
 			 */
 			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
 		}
 		/* Routing header */
 		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
 		/* Destination options header(2nd part) */
 		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
 	}
 
 #ifdef IPSEC
 	/*
 	 * IPSec checking which handles several cases.
 	 * FAST IPSEC: We re-injected the packet.
 	 * XXX: need scope argument.
 	 */
 	switch(ip6_ipsec_output(&m, inp, &error))
 	{
 	case 1:                 /* Bad packet */
 		goto freehdrs;
 	case -1:                /* IPSec done */
 		goto done;
 	case 0:                 /* No IPSec */
 	default:
 		break;
 	}
 #endif /* IPSEC */
 
 	/*
 	 * Calculate the total length of the extension header chain.
 	 * Keep the length of the unfragmentable part for fragmentation.
 	 */
 	optlen = 0;
 	if (exthdrs.ip6e_hbh)
 		optlen += exthdrs.ip6e_hbh->m_len;
 	if (exthdrs.ip6e_dest1)
 		optlen += exthdrs.ip6e_dest1->m_len;
 	if (exthdrs.ip6e_rthdr)
 		optlen += exthdrs.ip6e_rthdr->m_len;
 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
 
 	/* NOTE: we don't add AH/ESP length here (done in ip6_ipsec_output) */
 	if (exthdrs.ip6e_dest2)
 		optlen += exthdrs.ip6e_dest2->m_len;
 
 	/*
 	 * If there is at least one extension header,
 	 * separate IP6 header from the payload.
 	 */
 	if (optlen && !hdrsplit) {
 		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 			m = NULL;
 			goto freehdrs;
 		}
 		m = exthdrs.ip6e_ip6;
 		hdrsplit++;
 	}
 
 	/* adjust pointer */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* adjust mbuf packet header length */
 	m->m_pkthdr.len += optlen;
 	plen = m->m_pkthdr.len - sizeof(*ip6);
 
 	/* If this is a jumbo payload, insert a jumbo payload option. */
 	if (plen > IPV6_MAXPACKET) {
 		if (!hdrsplit) {
 			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 				m = NULL;
 				goto freehdrs;
 			}
 			m = exthdrs.ip6e_ip6;
 			hdrsplit++;
 		}
 		/* adjust pointer */
 		ip6 = mtod(m, struct ip6_hdr *);
 		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
 			goto freehdrs;
 		ip6->ip6_plen = 0;
 	} else
 		ip6->ip6_plen = htons(plen);
 
 	/*
 	 * Concatenate headers and fill in next header fields.
 	 * Here we have, on "m"
 	 *	IPv6 payload
 	 * and we insert headers accordingly.  Finally, we should be getting:
 	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
 	 *
 	 * during the header composing process, "m" points to IPv6 header.
 	 * "mprev" points to an extension header prior to esp.
 	 */
 	u_char *nexthdrp = &ip6->ip6_nxt;
 	mprev = m;
 
 	/*
 	 * we treat dest2 specially.  this makes IPsec processing
 	 * much easier.  the goal here is to make mprev point the
 	 * mbuf prior to dest2.
 	 *
 	 * result: IPv6 dest2 payload
 	 * m and mprev will point to IPv6 header.
 	 */
 	if (exthdrs.ip6e_dest2) {
 		if (!hdrsplit)
 			panic("assumption failed: hdr not split");
 		exthdrs.ip6e_dest2->m_next = m->m_next;
 		m->m_next = exthdrs.ip6e_dest2;
 		*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
 		ip6->ip6_nxt = IPPROTO_DSTOPTS;
 	}
 
 	/*
 	 * result: IPv6 hbh dest1 rthdr dest2 payload
 	 * m will point to IPv6 header.  mprev will point to the
 	 * extension header prior to dest2 (rthdr in the above case).
 	 */
 	MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
 	MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
 		   IPPROTO_DSTOPTS);
 	MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
 		   IPPROTO_ROUTING);
 
 	/*
 	 * If there is a routing header, discard the packet.
 	 */
 	if (exthdrs.ip6e_rthdr) {
 		 error = EINVAL;
 		 goto bad;
 	}
 
 	/* Source address validation */
 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
 	    (flags & IPV6_UNSPECSRC) == 0) {
 		error = EOPNOTSUPP;
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
 		error = EOPNOTSUPP;
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 
 	IP6STAT_INC(ip6s_localout);
 
 	/*
 	 * Route packet.
 	 */
 	if (ro == 0) {
 		ro = &ip6route;
 		bzero((caddr_t)ro, sizeof(*ro));
 	}
 	ro_pmtu = ro;
 	if (opt && opt->ip6po_rthdr)
 		ro = &opt->ip6po_route;
 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
 #ifdef FLOWTABLE
 	if (ro->ro_rt == NULL)
 		(void )flowtable_lookup(AF_INET6, m, (struct route *)ro);
 #endif
 	fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
 again:
 	/*
 	 * if specified, try to fill in the traffic class field.
 	 * do not override if a non-zero value is already set.
 	 * we check the diffserv field and the ecn field separately.
 	 */
 	if (opt && opt->ip6po_tclass >= 0) {
 		int mask = 0;
 
 		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
 			mask |= 0xfc;
 		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
 			mask |= 0x03;
 		if (mask != 0)
 			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
 	}
 
 	/* fill in or override the hop limit field, if necessary. */
 	if (opt && opt->ip6po_hlim != -1)
 		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
 	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		if (im6o != NULL)
 			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
 		else
 			ip6->ip6_hlim = V_ip6_defmcasthlim;
 	}
 
 	/* adjust pointer */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	if (ro->ro_rt && fwd_tag == NULL) {
 		rt = ro->ro_rt;
 		ifp = ro->ro_rt->rt_ifp;
 	} else {
 		if (fwd_tag == NULL) {
 			bzero(&dst_sa, sizeof(dst_sa));
 			dst_sa.sin6_family = AF_INET6;
 			dst_sa.sin6_len = sizeof(dst_sa);
 			dst_sa.sin6_addr = ip6->ip6_dst;
 		}
 		error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp,
 		    &rt, fibnum);
 		if (error != 0) {
 			if (ifp != NULL)
 				in6_ifstat_inc(ifp, ifs6_out_discard);
 			goto bad;
 		}
 	}
 	if (rt == NULL) {
 		/*
 		 * If in6_selectroute() does not return a route entry,
 		 * dst may not have been updated.
 		 */
 		*dst = dst_sa;	/* XXX */
 	}
 
 	/*
 	 * then rt (for unicast) and ifp must be non-NULL valid values.
 	 */
 	if ((flags & IPV6_FORWARDING) == 0) {
 		/* XXX: the FORWARDING flag can be set for mrouting. */
 		in6_ifstat_inc(ifp, ifs6_out_request);
 	}
 	if (rt != NULL) {
 		ia = (struct in6_ifaddr *)(rt->rt_ifa);
 		counter_u64_add(rt->rt_pksent, 1);
 	}
 
 
 	/*
 	 * The outgoing interface must be in the zone of source and
 	 * destination addresses.
 	 */
 	origifp = ifp;
 
 	src0 = ip6->ip6_src;
 	if (in6_setscope(&src0, origifp, &zone))
 		goto badscope;
 	bzero(&src_sa, sizeof(src_sa));
 	src_sa.sin6_family = AF_INET6;
 	src_sa.sin6_len = sizeof(src_sa);
 	src_sa.sin6_addr = ip6->ip6_src;
 	if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
 		goto badscope;
 
 	dst0 = ip6->ip6_dst;
 	if (in6_setscope(&dst0, origifp, &zone))
 		goto badscope;
 	/* re-initialize to be sure */
 	bzero(&dst_sa, sizeof(dst_sa));
 	dst_sa.sin6_family = AF_INET6;
 	dst_sa.sin6_len = sizeof(dst_sa);
 	dst_sa.sin6_addr = ip6->ip6_dst;
 	if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
 		goto badscope;
 	}
 
 	/* We should use ia_ifp to support the case of
 	 * sending packets to an address of our own.
 	 */
 	if (ia != NULL && ia->ia_ifp)
 		ifp = ia->ia_ifp;
 
 	/* scope check is done. */
 	goto routefound;
 
   badscope:
 	IP6STAT_INC(ip6s_badscope);
 	in6_ifstat_inc(origifp, ifs6_out_discard);
 	if (error == 0)
 		error = EHOSTUNREACH; /* XXX */
 	goto bad;
 
   routefound:
 	if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		if (opt && opt->ip6po_nextroute.ro_rt) {
 			/*
 			 * The nexthop is explicitly specified by the
 			 * application.  We assume the next hop is an IPv6
 			 * address.
 			 */
 			dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
 		}
 		else if ((rt->rt_flags & RTF_GATEWAY))
 			dst = (struct sockaddr_in6 *)rt->rt_gateway;
 	}
 
 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
 	} else {
 		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
 		in6_ifstat_inc(ifp, ifs6_out_mcast);
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
 		if (!(ifp->if_flags & IFF_MULTICAST)) {
 			IP6STAT_INC(ip6s_noroute);
 			in6_ifstat_inc(ifp, ifs6_out_discard);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		if ((im6o == NULL && in6_mcast_loop) ||
 		    (im6o && im6o->im6o_multicast_loop)) {
 			/*
 			 * Loop back multicast datagram if not expressly
 			 * forbidden to do so, even if we have not joined
 			 * the address; protocols will filter it later,
 			 * thus deferring a hash lookup and lock acquisition
 			 * at the expense of an m_copym().
 			 */
 			ip6_mloopback(ifp, m, dst);
 		} else {
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
 			 * arrived on the interface to which we are about
 			 * to send.  The multicast forwarding function
 			 * recursively calls this function, using the
 			 * IPV6_FORWARDING flag to prevent infinite recursion.
 			 *
 			 * Multicasts that are looped back by ip6_mloopback(),
 			 * above, will be forwarded by the ip6_input() routine,
 			 * if necessary.
 			 */
 			if (V_ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
 				/*
 				 * XXX: ip6_mforward expects that rcvif is NULL
 				 * when it is called from the originating path.
 				 * However, it may not always be the case.
 				 */
 				m->m_pkthdr.rcvif = NULL;
 				if (ip6_mforward(ip6, ifp, m) != 0) {
 					m_freem(m);
 					goto done;
 				}
 			}
 		}
 		/*
 		 * Multicasts with a hoplimit of zero may be looped back,
 		 * above, but must not be transmitted on a network.
 		 * Also, multicasts addressed to the loopback interface
 		 * are not sent -- the above call to ip6_mloopback() will
 		 * loop back a copy if this host actually belongs to the
 		 * destination group on the loopback interface.
 		 */
 		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
 		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
 			m_freem(m);
 			goto done;
 		}
 	}
 
 	/*
 	 * Fill the outgoing inteface to tell the upper layer
 	 * to increment per-interface statistics.
 	 */
 	if (ifpp)
 		*ifpp = ifp;
 
 	/* Determine path MTU. */
 	if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
 	    &alwaysfrag, fibnum)) != 0)
 		goto bad;
 
 	/*
 	 * The caller of this function may specify to use the minimum MTU
 	 * in some cases.
 	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
 	 * setting.  The logic is a bit complicated; by default, unicast
 	 * packets will follow path MTU while multicast packets will be sent at
 	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
 	 * including unicast ones will be sent at the minimum MTU.  Multicast
 	 * packets will always be sent at the minimum MTU unless
 	 * IP6PO_MINMTU_DISABLE is explicitly specified.
 	 * See RFC 3542 for more details.
 	 */
 	if (mtu > IPV6_MMTU) {
 		if ((flags & IPV6_MINMTU))
 			mtu = IPV6_MMTU;
 		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
 			mtu = IPV6_MMTU;
 		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
 			 (opt == NULL ||
 			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
 			mtu = IPV6_MMTU;
 		}
 	}
 
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 
 	/*
 	 * If the outgoing packet contains a hop-by-hop options header,
 	 * it must be examined and processed even by the source node.
 	 * (RFC 2460, section 4.)
 	 */
 	if (exthdrs.ip6e_hbh) {
 		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
 		u_int32_t dummy; /* XXX unused */
 		u_int32_t plen = 0; /* XXX: ip6_process will check the value */
 
 #ifdef DIAGNOSTIC
 		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
 			panic("ip6e_hbh is not contiguous");
 #endif
 		/*
 		 *  XXX: if we have to send an ICMPv6 error to the sender,
 		 *       we need the M_LOOP flag since icmp6_error() expects
 		 *       the IPv6 and the hop-by-hop options header are
 		 *       contiguous unless the flag is set.
 		 */
 		m->m_flags |= M_LOOP;
 		m->m_pkthdr.rcvif = ifp;
 		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
 		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
 		    &dummy, &plen) < 0) {
 			/* m was already freed at this point */
 			error = EINVAL;/* better error? */
 			goto done;
 		}
 		m->m_flags &= ~M_LOOP; /* XXX */
 		m->m_pkthdr.rcvif = NULL;
 	}
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&V_inet6_pfil_hook))
 		goto passout;
 
 	odst = ip6->ip6_dst;
 	/* Run through list of hooks for output packets. */
 	error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
 	if (error != 0 || m == NULL)
 		goto done;
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	needfiblookup = 0;
 	/* See if destination IP address was changed by packet filter. */
 	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip6_input(). */
 		if (in6_localip(&ip6->ip6_dst)) {
 			m->m_flags |= M_FASTFWD_OURS;
 			if (m->m_pkthdr.rcvif == NULL)
 				m->m_pkthdr.rcvif = V_loif;
 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 				m->m_pkthdr.csum_flags |=
 				    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 #ifdef SCTP
 			if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
 				m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 			error = netisr_queue(NETISR_IPV6, m);
 			goto done;
 		} else
 			needfiblookup = 1; /* Redo the routing table lookup. */
 	}
 	/* See if fib was changed by packet filter. */
 	if (fibnum != M_GETFIB(m)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		fibnum = M_GETFIB(m);
 		RO_RTFREE(ro);
 		needfiblookup = 1;
 	}
 	if (needfiblookup)
 		goto again;
 
 	/* See if local, if yes, send it to netisr. */
 	if (m->m_flags & M_FASTFWD_OURS) {
 		if (m->m_pkthdr.rcvif == NULL)
 			m->m_pkthdr.rcvif = V_loif;
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 			m->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
 			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 		error = netisr_queue(NETISR_IPV6, m);
 		goto done;
 	}
 	/* Or forward to some other address? */
 	if ((m->m_flags & M_IP6_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		dst = (struct sockaddr_in6 *)&ro->ro_dst;
 		bcopy((fwd_tag+1), &dst_sa, sizeof(struct sockaddr_in6));
 		m->m_flags |= M_SKIP_FIREWALL;
 		m->m_flags &= ~M_IP6_NEXTHOP;
 		m_tag_delete(m, fwd_tag);
 		goto again;
 	}
 
 passout:
 	/*
 	 * Send the packet to the outgoing interface.
 	 * If necessary, do IPv6 fragmentation before sending.
 	 *
 	 * the logic here is rather complex:
 	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
 	 * 1-a:	send as is if tlen <= path mtu
 	 * 1-b:	fragment if tlen > path mtu
 	 *
 	 * 2: if user asks us not to fragment (dontfrag == 1)
 	 * 2-a:	send as is if tlen <= interface mtu
 	 * 2-b:	error if tlen > interface mtu
 	 *
 	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
 	 *	always fragment
 	 *
 	 * 4: if dontfrag == 1 && alwaysfrag == 1
 	 *	error, as we cannot handle this conflicting request
 	 */
 	sw_csum = m->m_pkthdr.csum_flags;
 	if (!hdrsplit) {
 		tso = ((sw_csum & ifp->if_hwassist & CSUM_TSO) != 0) ? 1 : 0;
 		sw_csum &= ~ifp->if_hwassist;
 	} else
 		tso = 0;
 	/*
 	 * If we added extension headers, we will not do TSO and calculate the
 	 * checksums ourselves for now.
 	 * XXX-BZ  Need a framework to know when the NIC can handle it, even
 	 * with ext. hdrs.
 	 */
 	if (sw_csum & CSUM_DELAY_DATA_IPV6) {
 		sw_csum &= ~CSUM_DELAY_DATA_IPV6;
 		in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
 	}
 #ifdef SCTP
 	if (sw_csum & CSUM_SCTP_IPV6) {
 		sw_csum &= ~CSUM_SCTP_IPV6;
 		sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
 	}
 #endif
 	m->m_pkthdr.csum_flags &= ifp->if_hwassist;
 	tlen = m->m_pkthdr.len;
 
 	if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso)
 		dontfrag = 1;
 	else
 		dontfrag = 0;
 	if (dontfrag && alwaysfrag) {	/* case 4 */
 		/* conflicting request - can't transmit */
 		error = EMSGSIZE;
 		goto bad;
 	}
 	if (dontfrag && tlen > IN6_LINKMTU(ifp) && !tso) {	/* case 2-b */
 		/*
 		 * Even if the DONTFRAG option is specified, we cannot send the
 		 * packet when the data length is larger than the MTU of the
 		 * outgoing interface.
 		 * Notify the error by sending IPV6_PATHMTU ancillary data as
 		 * well as returning an error code (the latter is not described
 		 * in the API spec.)
 		 */
 		u_int32_t mtu32;
 		struct ip6ctlparam ip6cp;
 
 		mtu32 = (u_int32_t)mtu;
 		bzero(&ip6cp, sizeof(ip6cp));
 		ip6cp.ip6c_cmdarg = (void *)&mtu32;
 		pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
 		    (void *)&ip6cp);
 
 		error = EMSGSIZE;
 		goto bad;
 	}
 
 	/*
 	 * transmit packet without fragmentation
 	 */
 	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
 		struct in6_ifaddr *ia6;
 
 		ip6 = mtod(m, struct ip6_hdr *);
 		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
 		if (ia6) {
 			/* Record statistics for this interface address. */
 			counter_u64_add(ia6->ia_ifa.ifa_opackets, 1);
 			counter_u64_add(ia6->ia_ifa.ifa_obytes,
 			    m->m_pkthdr.len);
 			ifa_free(&ia6->ia_ifa);
 		}
 		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
 		goto done;
 	}
 
 	/*
 	 * try to fragment the packet.  case 1-b and 3
 	 */
 	if (mtu < IPV6_MMTU) {
 		/* path MTU cannot be less than IPV6_MMTU */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else if (ip6->ip6_plen == 0) {
 		/* jumbo payload cannot be fragmented */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else {
 		struct mbuf **mnext, *m_frgpart;
 		struct ip6_frag *ip6f;
 		u_int32_t id = htonl(ip6_randomid());
 		u_char nextproto;
 
 		/*
 		 * Too large for the destination or interface;
 		 * fragment if possible.
 		 * Must be able to put at least 8 bytes per fragment.
 		 */
 		hlen = unfragpartlen;
 		if (mtu > IPV6_MAXPACKET)
 			mtu = IPV6_MAXPACKET;
 
 		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
 		if (len < 8) {
 			error = EMSGSIZE;
 			in6_ifstat_inc(ifp, ifs6_out_fragfail);
 			goto bad;
 		}
 
 		/*
 		 * If the interface will not calculate checksums on
 		 * fragmented packets, then do it here.
 		 * XXX-BZ handle the hw offloading case.  Need flags.
 		 */
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 			in6_delayed_cksum(m, plen, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
 			sctp_delayed_cksum(m, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
 		}
 #endif
 		mnext = &m->m_nextpkt;
 
 		/*
 		 * Change the next header field of the last header in the
 		 * unfragmentable part.
 		 */
 		if (exthdrs.ip6e_rthdr) {
 			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
 			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
 		} else if (exthdrs.ip6e_dest1) {
 			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
 			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
 		} else if (exthdrs.ip6e_hbh) {
 			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
 			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
 		} else {
 			nextproto = ip6->ip6_nxt;
 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
 		}
 
 		/*
 		 * Loop through length of segment after first fragment,
 		 * make new header and copy data of each part and link onto
 		 * chain.
 		 */
 		m0 = m;
 		for (off = hlen; off < tlen; off += len) {
 			m = m_gethdr(M_NOWAIT, MT_DATA);
 			if (!m) {
 				error = ENOBUFS;
 				IP6STAT_INC(ip6s_odropped);
 				goto sendorfree;
 			}
 			m->m_flags = m0->m_flags & M_COPYFLAGS;
 			*mnext = m;
 			mnext = &m->m_nextpkt;
 			m->m_data += max_linkhdr;
 			mhip6 = mtod(m, struct ip6_hdr *);
 			*mhip6 = *ip6;
 			m->m_len = sizeof(*mhip6);
 			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
 			if (error) {
 				IP6STAT_INC(ip6s_odropped);
 				goto sendorfree;
 			}
 			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
 			if (off + len >= tlen)
 				len = tlen - off;
 			else
 				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
 			mhip6->ip6_plen = htons((u_short)(len + hlen +
 			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
 			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
 				error = ENOBUFS;
 				IP6STAT_INC(ip6s_odropped);
 				goto sendorfree;
 			}
 			m_cat(m, m_frgpart);
 			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
 			m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum;
 			m->m_pkthdr.rcvif = NULL;
 			ip6f->ip6f_reserved = 0;
 			ip6f->ip6f_ident = id;
 			ip6f->ip6f_nxt = nextproto;
 			IP6STAT_INC(ip6s_ofragments);
 			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
 		}
 
 		in6_ifstat_inc(ifp, ifs6_out_fragok);
 	}
 
 	/*
 	 * Remove leading garbages.
 	 */
 sendorfree:
 	m = m0->m_nextpkt;
 	m0->m_nextpkt = 0;
 	m_freem(m0);
 	for (m0 = m; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 		if (error == 0) {
 			/* Record statistics for this interface address. */
 			if (ia) {
 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_obytes,
 				    m->m_pkthdr.len);
 			}
 			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		IP6STAT_INC(ip6s_fragmented);
 
 done:
 	if (ro == &ip6route)
 		RO_RTFREE(ro);
 	if (ro_pmtu == &ip6route)
 		RO_RTFREE(ro_pmtu);
 	return (error);
 
 freehdrs:
 	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
 	m_freem(exthdrs.ip6e_dest1);
 	m_freem(exthdrs.ip6e_rthdr);
 	m_freem(exthdrs.ip6e_dest2);
 	/* FALLTHROUGH */
 bad:
 	if (m)
 		m_freem(m);
 	goto done;
 }
 
 static int
 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
 {
 	struct mbuf *m;
 
 	if (hlen > MCLBYTES)
 		return (ENOBUFS); /* XXX */
 
 	if (hlen > MLEN)
 		m = m_getcl(M_NOWAIT, MT_DATA, 0);
 	else
 		m = m_get(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 	m->m_len = hlen;
 	if (hdr)
 		bcopy(hdr, mtod(m, caddr_t), hlen);
 
 	*mp = m;
 	return (0);
 }
 
 /*
  * Insert jumbo payload option.
  */
 static int
 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
 {
 	struct mbuf *mopt;
 	u_char *optbuf;
 	u_int32_t v;
 
 #define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
 
 	/*
 	 * If there is no hop-by-hop options header, allocate new one.
 	 * If there is one but it doesn't have enough space to store the
 	 * jumbo payload option, allocate a cluster to store the whole options.
 	 * Otherwise, use it to store the options.
 	 */
 	if (exthdrs->ip6e_hbh == 0) {
 		mopt = m_get(M_NOWAIT, MT_DATA);
 		if (mopt == NULL)
 			return (ENOBUFS);
 		mopt->m_len = JUMBOOPTLEN;
 		optbuf = mtod(mopt, u_char *);
 		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
 		exthdrs->ip6e_hbh = mopt;
 	} else {
 		struct ip6_hbh *hbh;
 
 		mopt = exthdrs->ip6e_hbh;
 		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
 			/*
 			 * XXX assumption:
 			 * - exthdrs->ip6e_hbh is not referenced from places
 			 *   other than exthdrs.
 			 * - exthdrs->ip6e_hbh is not an mbuf chain.
 			 */
 			int oldoptlen = mopt->m_len;
 			struct mbuf *n;
 
 			/*
 			 * XXX: give up if the whole (new) hbh header does
 			 * not fit even in an mbuf cluster.
 			 */
 			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
 				return (ENOBUFS);
 
 			/*
 			 * As a consequence, we must always prepare a cluster
 			 * at this point.
 			 */
 			n = m_getcl(M_NOWAIT, MT_DATA, 0);
 			if (n == NULL)
 				return (ENOBUFS);
 			n->m_len = oldoptlen + JUMBOOPTLEN;
 			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
 			    oldoptlen);
 			optbuf = mtod(n, caddr_t) + oldoptlen;
 			m_freem(mopt);
 			mopt = exthdrs->ip6e_hbh = n;
 		} else {
 			optbuf = mtod(mopt, u_char *) + mopt->m_len;
 			mopt->m_len += JUMBOOPTLEN;
 		}
 		optbuf[0] = IP6OPT_PADN;
 		optbuf[1] = 1;
 
 		/*
 		 * Adjust the header length according to the pad and
 		 * the jumbo payload option.
 		 */
 		hbh = mtod(mopt, struct ip6_hbh *);
 		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
 	}
 
 	/* fill in the option. */
 	optbuf[2] = IP6OPT_JUMBO;
 	optbuf[3] = 4;
 	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
 	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
 
 	/* finally, adjust the packet header length */
 	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
 
 	return (0);
 #undef JUMBOOPTLEN
 }
 
 /*
  * Insert fragment header and copy unfragmentable header portions.
  */
 static int
 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
     struct ip6_frag **frghdrp)
 {
 	struct mbuf *n, *mlast;
 
 	if (hlen > sizeof(struct ip6_hdr)) {
 		n = m_copym(m0, sizeof(struct ip6_hdr),
 		    hlen - sizeof(struct ip6_hdr), M_NOWAIT);
 		if (n == 0)
 			return (ENOBUFS);
 		m->m_next = n;
 	} else
 		n = m;
 
 	/* Search for the last mbuf of unfragmentable part. */
 	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
 		;
 
 	if (M_WRITABLE(mlast) &&
 	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
 		/* use the trailing space of the last mbuf for the fragment hdr */
 		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
 		    mlast->m_len);
 		mlast->m_len += sizeof(struct ip6_frag);
 		m->m_pkthdr.len += sizeof(struct ip6_frag);
 	} else {
 		/* allocate a new mbuf for the fragment header */
 		struct mbuf *mfrg;
 
 		mfrg = m_get(M_NOWAIT, MT_DATA);
 		if (mfrg == NULL)
 			return (ENOBUFS);
 		mfrg->m_len = sizeof(struct ip6_frag);
 		*frghdrp = mtod(mfrg, struct ip6_frag *);
 		mlast->m_next = mfrg;
 	}
 
 	return (0);
 }
 
 static int
 ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
     struct ifnet *ifp, struct in6_addr *dst, u_long *mtup,
     int *alwaysfragp, u_int fibnum)
 {
 	u_int32_t mtu = 0;
 	int alwaysfrag = 0;
 	int error = 0;
 
 	if (ro_pmtu != ro) {
 		/* The first hop and the final destination may differ. */
 		struct sockaddr_in6 *sa6_dst =
 		    (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
 		if (ro_pmtu->ro_rt &&
 		    ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
 		     !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
 			RTFREE(ro_pmtu->ro_rt);
 			ro_pmtu->ro_rt = (struct rtentry *)NULL;
 		}
 		if (ro_pmtu->ro_rt == NULL) {
 			bzero(sa6_dst, sizeof(*sa6_dst));
 			sa6_dst->sin6_family = AF_INET6;
 			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
 			sa6_dst->sin6_addr = *dst;
 
 			in6_rtalloc(ro_pmtu, fibnum);
 		}
 	}
 	if (ro_pmtu->ro_rt) {
 		u_int32_t ifmtu;
 		struct in_conninfo inc;
 
 		bzero(&inc, sizeof(inc));
 		inc.inc_flags |= INC_ISIPV6;
 		inc.inc6_faddr = *dst;
 
 		if (ifp == NULL)
 			ifp = ro_pmtu->ro_rt->rt_ifp;
 		ifmtu = IN6_LINKMTU(ifp);
 		mtu = tcp_hc_getmtu(&inc);
 		if (mtu)
 			mtu = min(mtu, ro_pmtu->ro_rt->rt_mtu);
 		else
 			mtu = ro_pmtu->ro_rt->rt_mtu;
 		if (mtu == 0)
 			mtu = ifmtu;
 		else if (mtu < IPV6_MMTU) {
 			/*
 			 * RFC2460 section 5, last paragraph:
 			 * if we record ICMPv6 too big message with
 			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
 			 * or smaller, with framgent header attached.
 			 * (fragment header is needed regardless from the
 			 * packet size, for translators to identify packets)
 			 */
 			alwaysfrag = 1;
 			mtu = IPV6_MMTU;
 		}
 	} else if (ifp) {
 		mtu = IN6_LINKMTU(ifp);
 	} else
 		error = EHOSTUNREACH; /* XXX */
 
 	*mtup = mtu;
 	if (alwaysfragp)
 		*alwaysfragp = alwaysfrag;
 	return (error);
 }
 
 /*
  * IP6 socket option processing.
  */
 int
 ip6_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int optdatalen, uproto;
 	void *optdata;
 	struct inpcb *in6p = sotoinpcb(so);
 	int error, optval;
 	int level, op, optname;
 	int optlen;
 	struct thread *td;
 #ifdef	RSS
 	uint32_t rss_bucket;
 	int retval;
 #endif
 
 	level = sopt->sopt_level;
 	op = sopt->sopt_dir;
 	optname = sopt->sopt_name;
 	optlen = sopt->sopt_valsize;
 	td = sopt->sopt_td;
 	error = 0;
 	optval = 0;
 	uproto = (int)so->so_proto->pr_protocol;
 
 	if (level != IPPROTO_IPV6) {
 		error = EINVAL;
 
 		if (sopt->sopt_level == SOL_SOCKET &&
 		    sopt->sopt_dir == SOPT_SET) {
 			switch (sopt->sopt_name) {
 			case SO_REUSEADDR:
 				INP_WLOCK(in6p);
 				if ((so->so_options & SO_REUSEADDR) != 0)
 					in6p->inp_flags2 |= INP_REUSEADDR;
 				else
 					in6p->inp_flags2 &= ~INP_REUSEADDR;
 				INP_WUNLOCK(in6p);
 				error = 0;
 				break;
 			case SO_REUSEPORT:
 				INP_WLOCK(in6p);
 				if ((so->so_options & SO_REUSEPORT) != 0)
 					in6p->inp_flags2 |= INP_REUSEPORT;
 				else
 					in6p->inp_flags2 &= ~INP_REUSEPORT;
 				INP_WUNLOCK(in6p);
 				error = 0;
 				break;
 			case SO_SETFIB:
 				INP_WLOCK(in6p);
 				in6p->inp_inc.inc_fibnum = so->so_fibnum;
 				INP_WUNLOCK(in6p);
 				error = 0;
 				break;
 			default:
 				break;
 			}
 		}
 	} else {		/* level == IPPROTO_IPV6 */
 		switch (op) {
 
 		case SOPT_SET:
 			switch (optname) {
 			case IPV6_2292PKTOPTIONS:
 #ifdef IPV6_PKTOPTIONS
 			case IPV6_PKTOPTIONS:
 #endif
 			{
 				struct mbuf *m;
 
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
 				error = soopt_mcopyin(sopt, m); /* XXX */
 				if (error != 0)
 					break;
 				error = ip6_pcbopts(&in6p->in6p_outputopts,
 						    m, so, sopt);
 				m_freem(m); /* XXX */
 				break;
 			}
 
 			/*
 			 * Use of some Hop-by-Hop options or some
 			 * Destination options, might require special
 			 * privilege.  That is, normal applications
 			 * (without special privilege) might be forbidden
 			 * from setting certain options in outgoing packets,
 			 * and might never see certain options in received
 			 * packets. [RFC 2292 Section 6]
 			 * KAME specific note:
 			 *  KAME prevents non-privileged users from sending or
 			 *  receiving ANY hbh/dst options in order to avoid
 			 *  overhead of parsing options in the kernel.
 			 */
 			case IPV6_RECVHOPOPTS:
 			case IPV6_RECVDSTOPTS:
 			case IPV6_RECVRTHDRDSTOPTS:
 				if (td != NULL) {
 					error = priv_check(td,
 					    PRIV_NETINET_SETHDROPTS);
 					if (error)
 						break;
 				}
 				/* FALLTHROUGH */
 			case IPV6_UNICAST_HOPS:
 			case IPV6_HOPLIMIT:
 
 			case IPV6_RECVPKTINFO:
 			case IPV6_RECVHOPLIMIT:
 			case IPV6_RECVRTHDR:
 			case IPV6_RECVPATHMTU:
 			case IPV6_RECVTCLASS:
 			case IPV6_V6ONLY:
 			case IPV6_AUTOFLOWLABEL:
 			case IPV6_BINDANY:
 			case IPV6_BINDMULTI:
 #ifdef	RSS
 			case IPV6_RSS_LISTEN_BUCKET:
 #endif
 				if (optname == IPV6_BINDANY && td != NULL) {
 					error = priv_check(td,
 					    PRIV_NETINET_BINDANY);
 					if (error)
 						break;
 				}
 
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				switch (optname) {
 
 				case IPV6_UNICAST_HOPS:
 					if (optval < -1 || optval >= 256)
 						error = EINVAL;
 					else {
 						/* -1 = kernel default */
 						in6p->in6p_hops = optval;
 						if ((in6p->inp_vflag &
 						     INP_IPV4) != 0)
 							in6p->inp_ip_ttl = optval;
 					}
 					break;
 #define OPTSET(bit) \
 do { \
 	INP_WLOCK(in6p); \
 	if (optval) \
 		in6p->inp_flags |= (bit); \
 	else \
 		in6p->inp_flags &= ~(bit); \
 	INP_WUNLOCK(in6p); \
 } while (/*CONSTCOND*/ 0)
 #define OPTSET2292(bit) \
 do { \
 	INP_WLOCK(in6p); \
 	in6p->inp_flags |= IN6P_RFC2292; \
 	if (optval) \
 		in6p->inp_flags |= (bit); \
 	else \
 		in6p->inp_flags &= ~(bit); \
 	INP_WUNLOCK(in6p); \
 } while (/*CONSTCOND*/ 0)
 #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
 
 #define OPTSET2(bit, val) do {						\
 	INP_WLOCK(in6p);						\
 	if (val)							\
 		in6p->inp_flags2 |= bit;				\
 	else								\
 		in6p->inp_flags2 &= ~bit;				\
 	INP_WUNLOCK(in6p);						\
 } while (0)
 #define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0)
 
 				case IPV6_RECVPKTINFO:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_PKTINFO);
 					break;
 
 				case IPV6_HOPLIMIT:
 				{
 					struct ip6_pktopts **optp;
 
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					optp = &in6p->in6p_outputopts;
 					error = ip6_pcbopt(IPV6_HOPLIMIT,
 					    (u_char *)&optval, sizeof(optval),
 					    optp, (td != NULL) ? td->td_ucred :
 					    NULL, uproto);
 					break;
 				}
 
 				case IPV6_RECVHOPLIMIT:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_HOPLIMIT);
 					break;
 
 				case IPV6_RECVHOPOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_HOPOPTS);
 					break;
 
 				case IPV6_RECVDSTOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_DSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDRDSTOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_RTHDRDSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDR:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_RTHDR);
 					break;
 
 				case IPV6_RECVPATHMTU:
 					/*
 					 * We ignore this option for TCP
 					 * sockets.
 					 * (RFC3542 leaves this case
 					 * unspecified.)
 					 */
 					if (uproto != IPPROTO_TCP)
 						OPTSET(IN6P_MTU);
 					break;
 
 				case IPV6_V6ONLY:
 					/*
 					 * make setsockopt(IPV6_V6ONLY)
 					 * available only prior to bind(2).
 					 * see ipng mailing list, Jun 22 2001.
 					 */
 					if (in6p->inp_lport ||
 					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_IPV6_V6ONLY);
 					if (optval)
 						in6p->inp_vflag &= ~INP_IPV4;
 					else
 						in6p->inp_vflag |= INP_IPV4;
 					break;
 				case IPV6_RECVTCLASS:
 					/* cannot mix with RFC2292 XXX */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_TCLASS);
 					break;
 				case IPV6_AUTOFLOWLABEL:
 					OPTSET(IN6P_AUTOFLOWLABEL);
 					break;
 
 				case IPV6_BINDANY:
 					OPTSET(INP_BINDANY);
 					break;
 
 				case IPV6_BINDMULTI:
 					OPTSET2(INP_BINDMULTI, optval);
 					break;
 #ifdef	RSS
 				case IPV6_RSS_LISTEN_BUCKET:
 					if ((optval >= 0) &&
 					    (optval < rss_getnumbuckets())) {
 						in6p->inp_rss_listen_bucket = optval;
 						OPTSET2(INP_RSS_BUCKET_SET, 1);
 					} else {
 						error = EINVAL;
 					}
 					break;
 #endif
 				}
 				break;
 
 			case IPV6_TCLASS:
 			case IPV6_DONTFRAG:
 			case IPV6_USE_MIN_MTU:
 			case IPV6_PREFER_TEMPADDR:
 				if (optlen != sizeof(optval)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				{
 					struct ip6_pktopts **optp;
 					optp = &in6p->in6p_outputopts;
 					error = ip6_pcbopt(optname,
 					    (u_char *)&optval, sizeof(optval),
 					    optp, (td != NULL) ? td->td_ucred :
 					    NULL, uproto);
 					break;
 				}
 
 			case IPV6_2292PKTINFO:
 			case IPV6_2292HOPLIMIT:
 			case IPV6_2292HOPOPTS:
 			case IPV6_2292DSTOPTS:
 			case IPV6_2292RTHDR:
 				/* RFC 2292 */
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				switch (optname) {
 				case IPV6_2292PKTINFO:
 					OPTSET2292(IN6P_PKTINFO);
 					break;
 				case IPV6_2292HOPLIMIT:
 					OPTSET2292(IN6P_HOPLIMIT);
 					break;
 				case IPV6_2292HOPOPTS:
 					/*
 					 * Check super-user privilege.
 					 * See comments for IPV6_RECVHOPOPTS.
 					 */
 					if (td != NULL) {
 						error = priv_check(td,
 						    PRIV_NETINET_SETHDROPTS);
 						if (error)
 							return (error);
 					}
 					OPTSET2292(IN6P_HOPOPTS);
 					break;
 				case IPV6_2292DSTOPTS:
 					if (td != NULL) {
 						error = priv_check(td,
 						    PRIV_NETINET_SETHDROPTS);
 						if (error)
 							return (error);
 					}
 					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
 					break;
 				case IPV6_2292RTHDR:
 					OPTSET2292(IN6P_RTHDR);
 					break;
 				}
 				break;
 			case IPV6_PKTINFO:
 			case IPV6_HOPOPTS:
 			case IPV6_RTHDR:
 			case IPV6_DSTOPTS:
 			case IPV6_RTHDRDSTOPTS:
 			case IPV6_NEXTHOP:
 			{
 				/* new advanced API (RFC3542) */
 				u_char *optbuf;
 				u_char optbuf_storage[MCLBYTES];
 				int optlen;
 				struct ip6_pktopts **optp;
 
 				/* cannot mix with RFC2292 */
 				if (OPTBIT(IN6P_RFC2292)) {
 					error = EINVAL;
 					break;
 				}
 
 				/*
 				 * We only ensure valsize is not too large
 				 * here.  Further validation will be done
 				 * later.
 				 */
 				error = sooptcopyin(sopt, optbuf_storage,
 				    sizeof(optbuf_storage), 0);
 				if (error)
 					break;
 				optlen = sopt->sopt_valsize;
 				optbuf = optbuf_storage;
 				optp = &in6p->in6p_outputopts;
 				error = ip6_pcbopt(optname, optbuf, optlen,
 				    optp, (td != NULL) ? td->td_ucred : NULL,
 				    uproto);
 				break;
 			}
 #undef OPTSET
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_JOIN_GROUP:
 			case IPV6_LEAVE_GROUP:
 			case IPV6_MSFILTER:
 			case MCAST_BLOCK_SOURCE:
 			case MCAST_UNBLOCK_SOURCE:
 			case MCAST_JOIN_GROUP:
 			case MCAST_LEAVE_GROUP:
 			case MCAST_JOIN_SOURCE_GROUP:
 			case MCAST_LEAVE_SOURCE_GROUP:
 				error = ip6_setmoptions(in6p, sopt);
 				break;
 
 			case IPV6_PORTRANGE:
 				error = sooptcopyin(sopt, &optval,
 				    sizeof optval, sizeof optval);
 				if (error)
 					break;
 
 				INP_WLOCK(in6p);
 				switch (optval) {
 				case IPV6_PORTRANGE_DEFAULT:
 					in6p->inp_flags &= ~(INP_LOWPORT);
 					in6p->inp_flags &= ~(INP_HIGHPORT);
 					break;
 
 				case IPV6_PORTRANGE_HIGH:
 					in6p->inp_flags &= ~(INP_LOWPORT);
 					in6p->inp_flags |= INP_HIGHPORT;
 					break;
 
 				case IPV6_PORTRANGE_LOW:
 					in6p->inp_flags &= ~(INP_HIGHPORT);
 					in6p->inp_flags |= INP_LOWPORT;
 					break;
 
 				default:
 					error = EINVAL;
 					break;
 				}
 				INP_WUNLOCK(in6p);
 				break;
 
 #ifdef IPSEC
 			case IPV6_IPSEC_POLICY:
 			{
 				caddr_t req;
 				struct mbuf *m;
 
 				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
 					break;
 				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
 					break;
 				req = mtod(m, caddr_t);
 				error = ipsec_set_policy(in6p, optname, req,
 				    m->m_len, (sopt->sopt_td != NULL) ?
 				    sopt->sopt_td->td_ucred : NULL);
 				m_freem(m);
 				break;
 			}
 #endif /* IPSEC */
 
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			break;
 
 		case SOPT_GET:
 			switch (optname) {
 
 			case IPV6_2292PKTOPTIONS:
 #ifdef IPV6_PKTOPTIONS
 			case IPV6_PKTOPTIONS:
 #endif
 				/*
 				 * RFC3542 (effectively) deprecated the
 				 * semantics of the 2292-style pktoptions.
 				 * Since it was not reliable in nature (i.e.,
 				 * applications had to expect the lack of some
 				 * information after all), it would make sense
 				 * to simplify this part by always returning
 				 * empty data.
 				 */
 				sopt->sopt_valsize = 0;
 				break;
 
 			case IPV6_RECVHOPOPTS:
 			case IPV6_RECVDSTOPTS:
 			case IPV6_RECVRTHDRDSTOPTS:
 			case IPV6_UNICAST_HOPS:
 			case IPV6_RECVPKTINFO:
 			case IPV6_RECVHOPLIMIT:
 			case IPV6_RECVRTHDR:
 			case IPV6_RECVPATHMTU:
 
 			case IPV6_V6ONLY:
 			case IPV6_PORTRANGE:
 			case IPV6_RECVTCLASS:
 			case IPV6_AUTOFLOWLABEL:
 			case IPV6_BINDANY:
 			case IPV6_FLOWID:
 			case IPV6_FLOWTYPE:
 #ifdef	RSS
 			case IPV6_RSSBUCKETID:
 #endif
 				switch (optname) {
 
 				case IPV6_RECVHOPOPTS:
 					optval = OPTBIT(IN6P_HOPOPTS);
 					break;
 
 				case IPV6_RECVDSTOPTS:
 					optval = OPTBIT(IN6P_DSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDRDSTOPTS:
 					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
 					break;
 
 				case IPV6_UNICAST_HOPS:
 					optval = in6p->in6p_hops;
 					break;
 
 				case IPV6_RECVPKTINFO:
 					optval = OPTBIT(IN6P_PKTINFO);
 					break;
 
 				case IPV6_RECVHOPLIMIT:
 					optval = OPTBIT(IN6P_HOPLIMIT);
 					break;
 
 				case IPV6_RECVRTHDR:
 					optval = OPTBIT(IN6P_RTHDR);
 					break;
 
 				case IPV6_RECVPATHMTU:
 					optval = OPTBIT(IN6P_MTU);
 					break;
 
 				case IPV6_V6ONLY:
 					optval = OPTBIT(IN6P_IPV6_V6ONLY);
 					break;
 
 				case IPV6_PORTRANGE:
 				    {
 					int flags;
 					flags = in6p->inp_flags;
 					if (flags & INP_HIGHPORT)
 						optval = IPV6_PORTRANGE_HIGH;
 					else if (flags & INP_LOWPORT)
 						optval = IPV6_PORTRANGE_LOW;
 					else
 						optval = 0;
 					break;
 				    }
 				case IPV6_RECVTCLASS:
 					optval = OPTBIT(IN6P_TCLASS);
 					break;
 
 				case IPV6_AUTOFLOWLABEL:
 					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
 					break;
 
 				case IPV6_BINDANY:
 					optval = OPTBIT(INP_BINDANY);
 					break;
 
 				case IPV6_FLOWID:
 					optval = in6p->inp_flowid;
 					break;
 
 				case IPV6_FLOWTYPE:
 					optval = in6p->inp_flowtype;
 					break;
 #ifdef	RSS
 				case IPV6_RSSBUCKETID:
 					retval =
 					    rss_hash2bucket(in6p->inp_flowid,
 					    in6p->inp_flowtype,
 					    &rss_bucket);
 					if (retval == 0)
 						optval = rss_bucket;
 					else
 						error = EINVAL;
 					break;
 #endif
 
 				case IPV6_BINDMULTI:
 					optval = OPTBIT2(INP_BINDMULTI);
 					break;
 
 				}
 				if (error)
 					break;
 				error = sooptcopyout(sopt, &optval,
 					sizeof optval);
 				break;
 
 			case IPV6_PATHMTU:
 			{
 				u_long pmtu = 0;
 				struct ip6_mtuinfo mtuinfo;
 				struct route_in6 sro;
 
 				bzero(&sro, sizeof(sro));
 
 				if (!(so->so_state & SS_ISCONNECTED))
 					return (ENOTCONN);
 				/*
 				 * XXX: we dot not consider the case of source
 				 * routing, or optional information to specify
 				 * the outgoing interface.
 				 */
 				error = ip6_getpmtu(&sro, NULL, NULL,
 				    &in6p->in6p_faddr, &pmtu, NULL,
 				    so->so_fibnum);
 				if (sro.ro_rt)
 					RTFREE(sro.ro_rt);
 				if (error)
 					break;
 				if (pmtu > IPV6_MAXPACKET)
 					pmtu = IPV6_MAXPACKET;
 
 				bzero(&mtuinfo, sizeof(mtuinfo));
 				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
 				optdata = (void *)&mtuinfo;
 				optdatalen = sizeof(mtuinfo);
 				error = sooptcopyout(sopt, optdata,
 				    optdatalen);
 				break;
 			}
 
 			case IPV6_2292PKTINFO:
 			case IPV6_2292HOPLIMIT:
 			case IPV6_2292HOPOPTS:
 			case IPV6_2292RTHDR:
 			case IPV6_2292DSTOPTS:
 				switch (optname) {
 				case IPV6_2292PKTINFO:
 					optval = OPTBIT(IN6P_PKTINFO);
 					break;
 				case IPV6_2292HOPLIMIT:
 					optval = OPTBIT(IN6P_HOPLIMIT);
 					break;
 				case IPV6_2292HOPOPTS:
 					optval = OPTBIT(IN6P_HOPOPTS);
 					break;
 				case IPV6_2292RTHDR:
 					optval = OPTBIT(IN6P_RTHDR);
 					break;
 				case IPV6_2292DSTOPTS:
 					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
 					break;
 				}
 				error = sooptcopyout(sopt, &optval,
 				    sizeof optval);
 				break;
 			case IPV6_PKTINFO:
 			case IPV6_HOPOPTS:
 			case IPV6_RTHDR:
 			case IPV6_DSTOPTS:
 			case IPV6_RTHDRDSTOPTS:
 			case IPV6_NEXTHOP:
 			case IPV6_TCLASS:
 			case IPV6_DONTFRAG:
 			case IPV6_USE_MIN_MTU:
 			case IPV6_PREFER_TEMPADDR:
 				error = ip6_getpcbopt(in6p->in6p_outputopts,
 				    optname, sopt);
 				break;
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_MSFILTER:
 				error = ip6_getmoptions(in6p, sopt);
 				break;
 
 #ifdef IPSEC
 			case IPV6_IPSEC_POLICY:
 			  {
 				caddr_t req = NULL;
 				size_t len = 0;
 				struct mbuf *m = NULL;
 				struct mbuf **mp = &m;
 				size_t ovalsize = sopt->sopt_valsize;
 				caddr_t oval = (caddr_t)sopt->sopt_val;
 
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
 				error = soopt_mcopyin(sopt, m); /* XXX */
 				if (error != 0)
 					break;
 				sopt->sopt_valsize = ovalsize;
 				sopt->sopt_val = oval;
 				if (m) {
 					req = mtod(m, caddr_t);
 					len = m->m_len;
 				}
 				error = ipsec_get_policy(in6p, req, len, mp);
 				if (error == 0)
 					error = soopt_mcopyout(sopt, m); /* XXX */
 				if (error == 0 && m)
 					m_freem(m);
 				break;
 			  }
 #endif /* IPSEC */
 
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			break;
 		}
 	}
 	return (error);
 }
 
 int
 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0, optval, optlen;
 	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
 	struct inpcb *in6p = sotoinpcb(so);
 	int level, op, optname;
 
 	level = sopt->sopt_level;
 	op = sopt->sopt_dir;
 	optname = sopt->sopt_name;
 	optlen = sopt->sopt_valsize;
 
 	if (level != IPPROTO_IPV6) {
 		return (EINVAL);
 	}
 
 	switch (optname) {
 	case IPV6_CHECKSUM:
 		/*
 		 * For ICMPv6 sockets, no modification allowed for checksum
 		 * offset, permit "no change" values to help existing apps.
 		 *
 		 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
 		 * for an ICMPv6 socket will fail."
 		 * The current behavior does not meet RFC3542.
 		 */
 		switch (op) {
 		case SOPT_SET:
 			if (optlen != sizeof(int)) {
 				error = EINVAL;
 				break;
 			}
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 					    sizeof(optval));
 			if (error)
 				break;
 			if ((optval % 2) != 0) {
 				/* the API assumes even offset values */
 				error = EINVAL;
 			} else if (so->so_proto->pr_protocol ==
 			    IPPROTO_ICMPV6) {
 				if (optval != icmp6off)
 					error = EINVAL;
 			} else
 				in6p->in6p_cksum = optval;
 			break;
 
 		case SOPT_GET:
 			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
 				optval = icmp6off;
 			else
 				optval = in6p->in6p_cksum;
 
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 
 	default:
 		error = ENOPROTOOPT;
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Set up IP6 options in pcb for insertion in output packets or
  * specifying behavior of outgoing packets.
  */
 static int
 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
     struct socket *so, struct sockopt *sopt)
 {
 	struct ip6_pktopts *opt = *pktopt;
 	int error = 0;
 	struct thread *td = sopt->sopt_td;
 
 	/* turn off any old options. */
 	if (opt) {
 #ifdef DIAGNOSTIC
 		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
 		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
 		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			printf("ip6_pcbopts: all specified options are cleared.\n");
 #endif
 		ip6_clearpktopts(opt, -1);
 	} else
 		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
 	*pktopt = NULL;
 
 	if (!m || m->m_len == 0) {
 		/*
 		 * Only turning off any previous options, regardless of
 		 * whether the opt is just created or given.
 		 */
 		free(opt, M_IP6OPT);
 		return (0);
 	}
 
 	/*  set options specified by user. */
 	if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ?
 	    td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) {
 		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
 		free(opt, M_IP6OPT);
 		return (error);
 	}
 	*pktopt = opt;
 	return (0);
 }
 
 /*
  * initialize ip6_pktopts.  beware that there are non-zero default values in
  * the struct.
  */
 void
 ip6_initpktopts(struct ip6_pktopts *opt)
 {
 
 	bzero(opt, sizeof(*opt));
 	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
 	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
 	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
 	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
 }
 
 static int
 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
     struct ucred *cred, int uproto)
 {
 	struct ip6_pktopts *opt;
 
 	if (*pktopt == NULL) {
 		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
 		    M_WAITOK);
 		ip6_initpktopts(*pktopt);
 	}
 	opt = *pktopt;
 
 	return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
 }
 
 static int
 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
 {
 	void *optdata = NULL;
 	int optdatalen = 0;
 	struct ip6_ext *ip6e;
 	int error = 0;
 	struct in6_pktinfo null_pktinfo;
 	int deftclass = 0, on;
 	int defminmtu = IP6PO_MINMTU_MCASTONLY;
 	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
 
 	switch (optname) {
 	case IPV6_PKTINFO:
 		if (pktopt && pktopt->ip6po_pktinfo)
 			optdata = (void *)pktopt->ip6po_pktinfo;
 		else {
 			/* XXX: we don't have to do this every time... */
 			bzero(&null_pktinfo, sizeof(null_pktinfo));
 			optdata = (void *)&null_pktinfo;
 		}
 		optdatalen = sizeof(struct in6_pktinfo);
 		break;
 	case IPV6_TCLASS:
 		if (pktopt && pktopt->ip6po_tclass >= 0)
 			optdata = (void *)&pktopt->ip6po_tclass;
 		else
 			optdata = (void *)&deftclass;
 		optdatalen = sizeof(int);
 		break;
 	case IPV6_HOPOPTS:
 		if (pktopt && pktopt->ip6po_hbh) {
 			optdata = (void *)pktopt->ip6po_hbh;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_RTHDR:
 		if (pktopt && pktopt->ip6po_rthdr) {
 			optdata = (void *)pktopt->ip6po_rthdr;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_RTHDRDSTOPTS:
 		if (pktopt && pktopt->ip6po_dest1) {
 			optdata = (void *)pktopt->ip6po_dest1;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_DSTOPTS:
 		if (pktopt && pktopt->ip6po_dest2) {
 			optdata = (void *)pktopt->ip6po_dest2;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_NEXTHOP:
 		if (pktopt && pktopt->ip6po_nexthop) {
 			optdata = (void *)pktopt->ip6po_nexthop;
 			optdatalen = pktopt->ip6po_nexthop->sa_len;
 		}
 		break;
 	case IPV6_USE_MIN_MTU:
 		if (pktopt)
 			optdata = (void *)&pktopt->ip6po_minmtu;
 		else
 			optdata = (void *)&defminmtu;
 		optdatalen = sizeof(int);
 		break;
 	case IPV6_DONTFRAG:
 		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
 			on = 1;
 		else
 			on = 0;
 		optdata = (void *)&on;
 		optdatalen = sizeof(on);
 		break;
 	case IPV6_PREFER_TEMPADDR:
 		if (pktopt)
 			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
 		else
 			optdata = (void *)&defpreftemp;
 		optdatalen = sizeof(int);
 		break;
 	default:		/* should not happen */
 #ifdef DIAGNOSTIC
 		panic("ip6_getpcbopt: unexpected option\n");
 #endif
 		return (ENOPROTOOPT);
 	}
 
 	error = sooptcopyout(sopt, optdata, optdatalen);
 
 	return (error);
 }
 
 void
 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
 {
 	if (pktopt == NULL)
 		return;
 
 	if (optname == -1 || optname == IPV6_PKTINFO) {
 		if (pktopt->ip6po_pktinfo)
 			free(pktopt->ip6po_pktinfo, M_IP6OPT);
 		pktopt->ip6po_pktinfo = NULL;
 	}
 	if (optname == -1 || optname == IPV6_HOPLIMIT)
 		pktopt->ip6po_hlim = -1;
 	if (optname == -1 || optname == IPV6_TCLASS)
 		pktopt->ip6po_tclass = -1;
 	if (optname == -1 || optname == IPV6_NEXTHOP) {
 		if (pktopt->ip6po_nextroute.ro_rt) {
 			RTFREE(pktopt->ip6po_nextroute.ro_rt);
 			pktopt->ip6po_nextroute.ro_rt = NULL;
 		}
 		if (pktopt->ip6po_nexthop)
 			free(pktopt->ip6po_nexthop, M_IP6OPT);
 		pktopt->ip6po_nexthop = NULL;
 	}
 	if (optname == -1 || optname == IPV6_HOPOPTS) {
 		if (pktopt->ip6po_hbh)
 			free(pktopt->ip6po_hbh, M_IP6OPT);
 		pktopt->ip6po_hbh = NULL;
 	}
 	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
 		if (pktopt->ip6po_dest1)
 			free(pktopt->ip6po_dest1, M_IP6OPT);
 		pktopt->ip6po_dest1 = NULL;
 	}
 	if (optname == -1 || optname == IPV6_RTHDR) {
 		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
 		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
 		if (pktopt->ip6po_route.ro_rt) {
 			RTFREE(pktopt->ip6po_route.ro_rt);
 			pktopt->ip6po_route.ro_rt = NULL;
 		}
 	}
 	if (optname == -1 || optname == IPV6_DSTOPTS) {
 		if (pktopt->ip6po_dest2)
 			free(pktopt->ip6po_dest2, M_IP6OPT);
 		pktopt->ip6po_dest2 = NULL;
 	}
 }
 
 #define PKTOPT_EXTHDRCPY(type) \
 do {\
 	if (src->type) {\
 		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
 		dst->type = malloc(hlen, M_IP6OPT, canwait);\
 		if (dst->type == NULL && canwait == M_NOWAIT)\
 			goto bad;\
 		bcopy(src->type, dst->type, hlen);\
 	}\
 } while (/*CONSTCOND*/ 0)
 
 static int
 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
 {
 	if (dst == NULL || src == NULL)  {
 		printf("ip6_clearpktopts: invalid argument\n");
 		return (EINVAL);
 	}
 
 	dst->ip6po_hlim = src->ip6po_hlim;
 	dst->ip6po_tclass = src->ip6po_tclass;
 	dst->ip6po_flags = src->ip6po_flags;
 	dst->ip6po_minmtu = src->ip6po_minmtu;
 	dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr;
 	if (src->ip6po_pktinfo) {
 		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
 		    M_IP6OPT, canwait);
 		if (dst->ip6po_pktinfo == NULL)
 			goto bad;
 		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
 	}
 	if (src->ip6po_nexthop) {
 		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
 		    M_IP6OPT, canwait);
 		if (dst->ip6po_nexthop == NULL)
 			goto bad;
 		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
 		    src->ip6po_nexthop->sa_len);
 	}
 	PKTOPT_EXTHDRCPY(ip6po_hbh);
 	PKTOPT_EXTHDRCPY(ip6po_dest1);
 	PKTOPT_EXTHDRCPY(ip6po_dest2);
 	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
 	return (0);
 
   bad:
 	ip6_clearpktopts(dst, -1);
 	return (ENOBUFS);
 }
 #undef PKTOPT_EXTHDRCPY
 
 struct ip6_pktopts *
 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
 {
 	int error;
 	struct ip6_pktopts *dst;
 
 	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
 	if (dst == NULL)
 		return (NULL);
 	ip6_initpktopts(dst);
 
 	if ((error = copypktopts(dst, src, canwait)) != 0) {
 		free(dst, M_IP6OPT);
 		return (NULL);
 	}
 
 	return (dst);
 }
 
 void
 ip6_freepcbopts(struct ip6_pktopts *pktopt)
 {
 	if (pktopt == NULL)
 		return;
 
 	ip6_clearpktopts(pktopt, -1);
 
 	free(pktopt, M_IP6OPT);
 }
 
 /*
  * Set IPv6 outgoing packet options based on advanced API.
  */
 int
 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
     struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto)
 {
 	struct cmsghdr *cm = 0;
 
 	if (control == NULL || opt == NULL)
 		return (EINVAL);
 
 	ip6_initpktopts(opt);
 	if (stickyopt) {
 		int error;
 
 		/*
 		 * If stickyopt is provided, make a local copy of the options
 		 * for this particular packet, then override them by ancillary
 		 * objects.
 		 * XXX: copypktopts() does not copy the cached route to a next
 		 * hop (if any).  This is not very good in terms of efficiency,
 		 * but we can allow this since this option should be rarely
 		 * used.
 		 */
 		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
 			return (error);
 	}
 
 	/*
 	 * XXX: Currently, we assume all the optional information is stored
 	 * in a single mbuf.
 	 */
 	if (control->m_next)
 		return (EINVAL);
 
 	for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
 	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
 		int error;
 
 		if (control->m_len < CMSG_LEN(0))
 			return (EINVAL);
 
 		cm = mtod(control, struct cmsghdr *);
 		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
 			return (EINVAL);
 		if (cm->cmsg_level != IPPROTO_IPV6)
 			continue;
 
 		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
 		    cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
 		if (error)
 			return (error);
 	}
 
 	return (0);
 }
 
 /*
  * Set a particular packet option, as a sticky option or an ancillary data
  * item.  "len" can be 0 only when it's a sticky option.
  * We have 4 cases of combination of "sticky" and "cmsg":
  * "sticky=0, cmsg=0": impossible
  * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
  * "sticky=1, cmsg=0": RFC3542 socket option
  * "sticky=1, cmsg=1": RFC2292 socket option
  */
 static int
 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
     struct ucred *cred, int sticky, int cmsg, int uproto)
 {
 	int minmtupolicy, preftemp;
 	int error;
 
 	if (!sticky && !cmsg) {
 #ifdef DIAGNOSTIC
 		printf("ip6_setpktopt: impossible case\n");
 #endif
 		return (EINVAL);
 	}
 
 	/*
 	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
 	 * not be specified in the context of RFC3542.  Conversely,
 	 * RFC3542 types should not be specified in the context of RFC2292.
 	 */
 	if (!cmsg) {
 		switch (optname) {
 		case IPV6_2292PKTINFO:
 		case IPV6_2292HOPLIMIT:
 		case IPV6_2292NEXTHOP:
 		case IPV6_2292HOPOPTS:
 		case IPV6_2292DSTOPTS:
 		case IPV6_2292RTHDR:
 		case IPV6_2292PKTOPTIONS:
 			return (ENOPROTOOPT);
 		}
 	}
 	if (sticky && cmsg) {
 		switch (optname) {
 		case IPV6_PKTINFO:
 		case IPV6_HOPLIMIT:
 		case IPV6_NEXTHOP:
 		case IPV6_HOPOPTS:
 		case IPV6_DSTOPTS:
 		case IPV6_RTHDRDSTOPTS:
 		case IPV6_RTHDR:
 		case IPV6_USE_MIN_MTU:
 		case IPV6_DONTFRAG:
 		case IPV6_TCLASS:
 		case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
 			return (ENOPROTOOPT);
 		}
 	}
 
 	switch (optname) {
 	case IPV6_2292PKTINFO:
 	case IPV6_PKTINFO:
 	{
 		struct ifnet *ifp = NULL;
 		struct in6_pktinfo *pktinfo;
 
 		if (len != sizeof(struct in6_pktinfo))
 			return (EINVAL);
 
 		pktinfo = (struct in6_pktinfo *)buf;
 
 		/*
 		 * An application can clear any sticky IPV6_PKTINFO option by
 		 * doing a "regular" setsockopt with ipi6_addr being
 		 * in6addr_any and ipi6_ifindex being zero.
 		 * [RFC 3542, Section 6]
 		 */
 		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
 		    pktinfo->ipi6_ifindex == 0 &&
 		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			ip6_clearpktopts(opt, optname);
 			break;
 		}
 
 		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
 		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			return (EINVAL);
 		}
 		if (IN6_IS_ADDR_MULTICAST(&pktinfo->ipi6_addr))
 			return (EINVAL);
 		/* validate the interface index if specified. */
 		if (pktinfo->ipi6_ifindex > V_if_index)
 			 return (ENXIO);
 		if (pktinfo->ipi6_ifindex) {
 			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
 			if (ifp == NULL)
 				return (ENXIO);
 		}
 		if (ifp != NULL && (
 		    ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED))
 			return (ENETDOWN);
 
 		if (ifp != NULL &&
 		    !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			struct in6_ifaddr *ia;
 
 			ia = in6ifa_ifpwithaddr(ifp, &pktinfo->ipi6_addr);
 			if (ia == NULL)
 				return (EADDRNOTAVAIL);
 			ifa_free(&ia->ia_ifa);
 		}
 		/*
 		 * We store the address anyway, and let in6_selectsrc()
 		 * validate the specified address.  This is because ipi6_addr
 		 * may not have enough information about its scope zone, and
 		 * we may need additional information (such as outgoing
 		 * interface or the scope zone of a destination address) to
 		 * disambiguate the scope.
 		 * XXX: the delay of the validation may confuse the
 		 * application when it is used as a sticky option.
 		 */
 		if (opt->ip6po_pktinfo == NULL) {
 			opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
 			    M_IP6OPT, M_NOWAIT);
 			if (opt->ip6po_pktinfo == NULL)
 				return (ENOBUFS);
 		}
 		bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
 		break;
 	}
 
 	case IPV6_2292HOPLIMIT:
 	case IPV6_HOPLIMIT:
 	{
 		int *hlimp;
 
 		/*
 		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
 		 * to simplify the ordering among hoplimit options.
 		 */
 		if (optname == IPV6_HOPLIMIT && sticky)
 			return (ENOPROTOOPT);
 
 		if (len != sizeof(int))
 			return (EINVAL);
 		hlimp = (int *)buf;
 		if (*hlimp < -1 || *hlimp > 255)
 			return (EINVAL);
 
 		opt->ip6po_hlim = *hlimp;
 		break;
 	}
 
 	case IPV6_TCLASS:
 	{
 		int tclass;
 
 		if (len != sizeof(int))
 			return (EINVAL);
 		tclass = *(int *)buf;
 		if (tclass < -1 || tclass > 255)
 			return (EINVAL);
 
 		opt->ip6po_tclass = tclass;
 		break;
 	}
 
 	case IPV6_2292NEXTHOP:
 	case IPV6_NEXTHOP:
 		if (cred != NULL) {
 			error = priv_check_cred(cred,
 			    PRIV_NETINET_SETHDROPTS, 0);
 			if (error)
 				return (error);
 		}
 
 		if (len == 0) {	/* just remove the option */
 			ip6_clearpktopts(opt, IPV6_NEXTHOP);
 			break;
 		}
 
 		/* check if cmsg_len is large enough for sa_len */
 		if (len < sizeof(struct sockaddr) || len < *buf)
 			return (EINVAL);
 
 		switch (((struct sockaddr *)buf)->sa_family) {
 		case AF_INET6:
 		{
 			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
 			int error;
 
 			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
 				return (EINVAL);
 
 			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
 			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
 				return (EINVAL);
 			}
 			if ((error = sa6_embedscope(sa6, V_ip6_use_defzone))
 			    != 0) {
 				return (error);
 			}
 			break;
 		}
 		case AF_LINK:	/* should eventually be supported */
 		default:
 			return (EAFNOSUPPORT);
 		}
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, IPV6_NEXTHOP);
 		opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_nexthop == NULL)
 			return (ENOBUFS);
 		bcopy(buf, opt->ip6po_nexthop, *buf);
 		break;
 
 	case IPV6_2292HOPOPTS:
 	case IPV6_HOPOPTS:
 	{
 		struct ip6_hbh *hbh;
 		int hbhlen;
 
 		/*
 		 * XXX: We don't allow a non-privileged user to set ANY HbH
 		 * options, since per-option restriction has too much
 		 * overhead.
 		 */
 		if (cred != NULL) {
 			error = priv_check_cred(cred,
 			    PRIV_NETINET_SETHDROPTS, 0);
 			if (error)
 				return (error);
 		}
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, IPV6_HOPOPTS);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_hbh))
 			return (EINVAL);
 		hbh = (struct ip6_hbh *)buf;
 		hbhlen = (hbh->ip6h_len + 1) << 3;
 		if (len != hbhlen)
 			return (EINVAL);
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, IPV6_HOPOPTS);
 		opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_hbh == NULL)
 			return (ENOBUFS);
 		bcopy(hbh, opt->ip6po_hbh, hbhlen);
 
 		break;
 	}
 
 	case IPV6_2292DSTOPTS:
 	case IPV6_DSTOPTS:
 	case IPV6_RTHDRDSTOPTS:
 	{
 		struct ip6_dest *dest, **newdest = NULL;
 		int destlen;
 
 		if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */
 			error = priv_check_cred(cred,
 			    PRIV_NETINET_SETHDROPTS, 0);
 			if (error)
 				return (error);
 		}
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, optname);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_dest))
 			return (EINVAL);
 		dest = (struct ip6_dest *)buf;
 		destlen = (dest->ip6d_len + 1) << 3;
 		if (len != destlen)
 			return (EINVAL);
 
 		/*
 		 * Determine the position that the destination options header
 		 * should be inserted; before or after the routing header.
 		 */
 		switch (optname) {
 		case IPV6_2292DSTOPTS:
 			/*
 			 * The old advacned API is ambiguous on this point.
 			 * Our approach is to determine the position based
 			 * according to the existence of a routing header.
 			 * Note, however, that this depends on the order of the
 			 * extension headers in the ancillary data; the 1st
 			 * part of the destination options header must appear
 			 * before the routing header in the ancillary data,
 			 * too.
 			 * RFC3542 solved the ambiguity by introducing
 			 * separate ancillary data or option types.
 			 */
 			if (opt->ip6po_rthdr == NULL)
 				newdest = &opt->ip6po_dest1;
 			else
 				newdest = &opt->ip6po_dest2;
 			break;
 		case IPV6_RTHDRDSTOPTS:
 			newdest = &opt->ip6po_dest1;
 			break;
 		case IPV6_DSTOPTS:
 			newdest = &opt->ip6po_dest2;
 			break;
 		}
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, optname);
 		*newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
 		if (*newdest == NULL)
 			return (ENOBUFS);
 		bcopy(dest, *newdest, destlen);
 
 		break;
 	}
 
 	case IPV6_2292RTHDR:
 	case IPV6_RTHDR:
 	{
 		struct ip6_rthdr *rth;
 		int rthlen;
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, IPV6_RTHDR);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_rthdr))
 			return (EINVAL);
 		rth = (struct ip6_rthdr *)buf;
 		rthlen = (rth->ip6r_len + 1) << 3;
 		if (len != rthlen)
 			return (EINVAL);
 
 		switch (rth->ip6r_type) {
 		case IPV6_RTHDR_TYPE_0:
 			if (rth->ip6r_len == 0)	/* must contain one addr */
 				return (EINVAL);
 			if (rth->ip6r_len % 2) /* length must be even */
 				return (EINVAL);
 			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
 				return (EINVAL);
 			break;
 		default:
 			return (EINVAL);	/* not supported */
 		}
 
 		/* turn off the previous option */
 		ip6_clearpktopts(opt, IPV6_RTHDR);
 		opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_rthdr == NULL)
 			return (ENOBUFS);
 		bcopy(rth, opt->ip6po_rthdr, rthlen);
 
 		break;
 	}
 
 	case IPV6_USE_MIN_MTU:
 		if (len != sizeof(int))
 			return (EINVAL);
 		minmtupolicy = *(int *)buf;
 		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
 		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
 		    minmtupolicy != IP6PO_MINMTU_ALL) {
 			return (EINVAL);
 		}
 		opt->ip6po_minmtu = minmtupolicy;
 		break;
 
 	case IPV6_DONTFRAG:
 		if (len != sizeof(int))
 			return (EINVAL);
 
 		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
 			/*
 			 * we ignore this option for TCP sockets.
 			 * (RFC3542 leaves this case unspecified.)
 			 */
 			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
 		} else
 			opt->ip6po_flags |= IP6PO_DONTFRAG;
 		break;
 
 	case IPV6_PREFER_TEMPADDR:
 		if (len != sizeof(int))
 			return (EINVAL);
 		preftemp = *(int *)buf;
 		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
 		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
 		    preftemp != IP6PO_TEMPADDR_PREFER) {
 			return (EINVAL);
 		}
 		opt->ip6po_prefer_tempaddr = preftemp;
 		break;
 
 	default:
 		return (ENOPROTOOPT);
 	} /* end of switch */
 
 	return (0);
 }
 
 /*
  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
  * packet to the input queue of a specified interface.  Note that this
  * calls the output routine of the loopback "driver", but with an interface
  * pointer that might NOT be &loif -- easier than replicating that code here.
  */
 void
 ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
 {
 	struct mbuf *copym;
 	struct ip6_hdr *ip6;
 
 	copym = m_copy(m, 0, M_COPYALL);
 	if (copym == NULL)
 		return;
 
 	/*
 	 * Make sure to deep-copy IPv6 header portion in case the data
 	 * is in an mbuf cluster, so that we can safely override the IPv6
 	 * header portion later.
 	 */
 	if (!M_WRITABLE(copym) ||
 	    copym->m_len < sizeof(struct ip6_hdr)) {
 		copym = m_pullup(copym, sizeof(struct ip6_hdr));
 		if (copym == NULL)
 			return;
 	}
 
 #ifdef DIAGNOSTIC
 	if (copym->m_len < sizeof(*ip6)) {
 		m_freem(copym);
 		return;
 	}
 #endif
 
 	ip6 = mtod(copym, struct ip6_hdr *);
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 
 	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
 }
 
 /*
  * Chop IPv6 header off from the payload.
  */
 static int
 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
 {
 	struct mbuf *mh;
 	struct ip6_hdr *ip6;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if (m->m_len > sizeof(*ip6)) {
 		mh = m_gethdr(M_NOWAIT, MT_DATA);
 		if (mh == NULL) {
 			m_freem(m);
 			return ENOBUFS;
 		}
 		m_move_pkthdr(mh, m);
 		M_ALIGN(mh, sizeof(*ip6));
 		m->m_len -= sizeof(*ip6);
 		m->m_data += sizeof(*ip6);
 		mh->m_next = m;
 		m = mh;
 		m->m_len = sizeof(*ip6);
 		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
 	}
 	exthdrs->ip6e_ip6 = m;
 	return 0;
 }
 
 /*
  * Compute IPv6 extension header length.
  */
 int
 ip6_optlen(struct inpcb *in6p)
 {
 	int len;
 
 	if (!in6p->in6p_outputopts)
 		return 0;
 
 	len = 0;
 #define elen(x) \
     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
 
 	len += elen(in6p->in6p_outputopts->ip6po_hbh);
 	if (in6p->in6p_outputopts->ip6po_rthdr)
 		/* dest1 is valid with rthdr only */
 		len += elen(in6p->in6p_outputopts->ip6po_dest1);
 	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
 	len += elen(in6p->in6p_outputopts->ip6po_dest2);
 	return len;
 #undef elen
 }
Index: head/sys/netinet6/udp6_usrreq.c
===================================================================
--- head/sys/netinet6/udp6_usrreq.c	(revision 277330)
+++ head/sys/netinet6/udp6_usrreq.c	(revision 277331)
@@ -1,1249 +1,1250 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * Copyright (c) 2014 Kevin Lo
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $
  *	$KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipfw.h"
 #include "opt_ipsec.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/route.h>
+#include <net/rss_config.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp_var.h>
 #include <netinet/icmp6.h>
 #include <netinet/ip_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/udplite.h>
-#include <netinet/in_rss.h>
 
 #include <netinet6/ip6protosw.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_pcb.h>
+#include <netinet6/in6_rss.h>
 #include <netinet6/udp6_var.h>
 #include <netinet6/scope6_var.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #endif /* IPSEC */
 
 #include <security/mac/mac_framework.h>
 
 /*
  * UDP protocol implementation.
  * Per RFC 768, August, 1980.
  */
 
 extern struct protosw	inetsw[];
 static void		udp6_detach(struct socket *so);
 
 static void
 udp6_append(struct inpcb *inp, struct mbuf *n, int off,
     struct sockaddr_in6 *fromsa)
 {
 	struct socket *so;
 	struct mbuf *opts;
 	struct udpcb *up;
 
 	INP_LOCK_ASSERT(inp);
 
 	/*
 	 * Engage the tunneling protocol.
 	 */
 	up = intoudpcb(inp);
 	if (up->u_tun_func != NULL) {
 		(*up->u_tun_func)(n, off, inp, (struct sockaddr *)fromsa,
 		    up->u_tun_ctx);
 		return;
 	}
 #ifdef IPSEC
 	/* Check AH/ESP integrity. */
 	if (ipsec6_in_reject(n, inp)) {
 		m_freem(n);
 		return;
 	}
 #endif /* IPSEC */
 #ifdef MAC
 	if (mac_inpcb_check_deliver(inp, n) != 0) {
 		m_freem(n);
 		return;
 	}
 #endif
 	opts = NULL;
 	if (inp->inp_flags & INP_CONTROLOPTS ||
 	    inp->inp_socket->so_options & SO_TIMESTAMP)
 		ip6_savecontrol(inp, n, &opts);
 	m_adj(n, off + sizeof(struct udphdr));
 
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)fromsa, n,
 	    opts) == 0) {
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		m_freem(n);
 		if (opts)
 			m_freem(opts);
 		UDPSTAT_INC(udps_fullsock);
 	} else
 		sorwakeup_locked(so);
 }
 
 int
 udp6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct ifnet *ifp;
 	struct ip6_hdr *ip6;
 	struct udphdr *uh;
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct udpcb *up;
 	int off = *offp;
 	int cscov_partial;
 	int plen, ulen;
 	struct sockaddr_in6 fromsa;
 	struct m_tag *fwd_tag;
 	uint16_t uh_sum;
 	uint8_t nxt;
 
 	ifp = m->m_pkthdr.rcvif;
 	ip6 = mtod(m, struct ip6_hdr *);
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE);
 	ip6 = mtod(m, struct ip6_hdr *);
 	uh = (struct udphdr *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(*uh));
 	if (!uh)
 		return (IPPROTO_DONE);
 #endif
 
 	UDPSTAT_INC(udps_ipackets);
 
 	/*
 	 * Destination port of 0 is illegal, based on RFC768.
 	 */
 	if (uh->uh_dport == 0)
 		goto badunlocked;
 
 	plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6);
 	ulen = ntohs((u_short)uh->uh_ulen);
 
 	nxt = ip6->ip6_nxt;
 	cscov_partial = (nxt == IPPROTO_UDPLITE) ? 1 : 0;
 	if (nxt == IPPROTO_UDPLITE) {
 		/* Zero means checksum over the complete packet. */
 		if (ulen == 0)
 			ulen = plen;
 		if (ulen == plen)
 			cscov_partial = 0;
 		if ((ulen < sizeof(struct udphdr)) || (ulen > plen)) {
 			/* XXX: What is the right UDPLite MIB counter? */
 			goto badunlocked;
 		}
 		if (uh->uh_sum == 0) {
 			/* XXX: What is the right UDPLite MIB counter? */
 			goto badunlocked;
 		}
 	} else {
 		if ((ulen < sizeof(struct udphdr)) || (plen != ulen)) {
 			UDPSTAT_INC(udps_badlen);
 			goto badunlocked;
 		}
 		if (uh->uh_sum == 0) {
 			UDPSTAT_INC(udps_nosum);
 			goto badunlocked;
 		}
 	}
 
 	if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) &&
 	    !cscov_partial) {
 		if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 			uh_sum = m->m_pkthdr.csum_data;
 		else
 			uh_sum = in6_cksum_pseudo(ip6, ulen, nxt,
 			    m->m_pkthdr.csum_data);
 		uh_sum ^= 0xffff;
 	} else
 		uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen);
 
 	if (uh_sum != 0) {
 		UDPSTAT_INC(udps_badsum);
 		goto badunlocked;
 	}
 
 	/*
 	 * Construct sockaddr format source address.
 	 */
 	init_sin6(&fromsa, m);
 	fromsa.sin6_port = uh->uh_sport;
 
 	pcbinfo = get_inpcbinfo(nxt);
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		struct inpcb *last;
 		struct inpcbhead *pcblist;
 		struct ip6_moptions *imo;
 
 		INP_INFO_RLOCK(pcbinfo);
 		/*
 		 * In the event that laddr should be set to the link-local
 		 * address (this happens in RIPng), the multicast address
 		 * specified in the received packet will not match laddr.  To
 		 * handle this situation, matching is relaxed if the
 		 * receiving interface is the same as one specified in the
 		 * socket and if the destination multicast address matches
 		 * one of the multicast groups specified in the socket.
 		 */
 
 		/*
 		 * KAME note: traditionally we dropped udpiphdr from mbuf
 		 * here.  We need udphdr for IPsec processing so we do that
 		 * later.
 		 */
 		pcblist = get_pcblist(nxt);
 		last = NULL;
 		LIST_FOREACH(inp, pcblist, inp_list) {
 			if ((inp->inp_vflag & INP_IPV6) == 0)
 				continue;
 			if (inp->inp_lport != uh->uh_dport)
 				continue;
 			if (inp->inp_fport != 0 &&
 			    inp->inp_fport != uh->uh_sport)
 				continue;
 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
 				if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
 							&ip6->ip6_dst))
 					continue;
 			}
 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 				if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
 							&ip6->ip6_src) ||
 				    inp->inp_fport != uh->uh_sport)
 					continue;
 			}
 
 			/*
 			 * XXXRW: Because we weren't holding either the inpcb
 			 * or the hash lock when we checked for a match 
 			 * before, we should probably recheck now that the 
 			 * inpcb lock is (supposed to be) held.
 			 */
 
 			/*
 			 * Handle socket delivery policy for any-source
 			 * and source-specific multicast. [RFC3678]
 			 */
 			imo = inp->in6p_moptions;
 			if (imo && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 				struct sockaddr_in6	 mcaddr;
 				int			 blocked;
 
 				INP_RLOCK(inp);
 
 				bzero(&mcaddr, sizeof(struct sockaddr_in6));
 				mcaddr.sin6_len = sizeof(struct sockaddr_in6);
 				mcaddr.sin6_family = AF_INET6;
 				mcaddr.sin6_addr = ip6->ip6_dst;
 
 				blocked = im6o_mc_filter(imo, ifp,
 					(struct sockaddr *)&mcaddr,
 					(struct sockaddr *)&fromsa);
 				if (blocked != MCAST_PASS) {
 					if (blocked == MCAST_NOTGMEMBER)
 						IP6STAT_INC(ip6s_notmember);
 					if (blocked == MCAST_NOTSMEMBER ||
 					    blocked == MCAST_MUTED)
 						UDPSTAT_INC(udps_filtermcast);
 					INP_RUNLOCK(inp); /* XXX */
 					continue;
 				}
 
 				INP_RUNLOCK(inp);
 			}
 			if (last != NULL) {
 				struct mbuf *n;
 
 				if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
 					INP_RLOCK(last);
 					UDP_PROBE(receive, NULL, last, ip6,
 					    last, uh);
 					udp6_append(last, n, off, &fromsa);
 					INP_RUNLOCK(last);
 				}
 			}
 			last = inp;
 			/*
 			 * Don't look for additional matches if this one does
 			 * not have either the SO_REUSEPORT or SO_REUSEADDR
 			 * socket options set.  This heuristic avoids
 			 * searching through all pcbs in the common case of a
 			 * non-shared port.  It assumes that an application
 			 * will never clear these options after setting them.
 			 */
 			if ((last->inp_socket->so_options &
 			     (SO_REUSEPORT|SO_REUSEADDR)) == 0)
 				break;
 		}
 
 		if (last == NULL) {
 			/*
 			 * No matching pcb found; discard datagram.  (No need
 			 * to send an ICMP Port Unreachable for a broadcast
 			 * or multicast datgram.)
 			 */
 			UDPSTAT_INC(udps_noport);
 			UDPSTAT_INC(udps_noportmcast);
 			goto badheadlocked;
 		}
 		INP_RLOCK(last);
 		INP_INFO_RUNLOCK(pcbinfo);
 		UDP_PROBE(receive, NULL, last, ip6, last, uh);
 		udp6_append(last, m, off, &fromsa);
 		INP_RUNLOCK(last);
 		return (IPPROTO_DONE);
 	}
 	/*
 	 * Locate pcb for datagram.
 	 */
 
 	/*
 	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
 	 */
 	if ((m->m_flags & M_IP6_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		struct sockaddr_in6 *next_hop6;
 
 		next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1);
 
 		/*
 		 * Transparently forwarded. Pretend to be the destination.
 		 * Already got one like this?
 		 */
 		inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
 		    uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
 		    INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m);
 		if (!inp) {
 			/*
 			 * It's new.  Try to find the ambushing socket.
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
 			inp = in6_pcblookup(pcbinfo, &ip6->ip6_src,
 			    uh->uh_sport, &next_hop6->sin6_addr,
 			    next_hop6->sin6_port ? htons(next_hop6->sin6_port) :
 			    uh->uh_dport, INPLOOKUP_WILDCARD |
 			    INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif);
 		}
 		/* Remove the tag from the packet. We don't need it anymore. */
 		m_tag_delete(m, fwd_tag);
 		m->m_flags &= ~M_IP6_NEXTHOP;
 	} else
 		inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
 		    uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
 		    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
 		    m->m_pkthdr.rcvif, m);
 	if (inp == NULL) {
 		if (udp_log_in_vain) {
 			char ip6bufs[INET6_ADDRSTRLEN];
 			char ip6bufd[INET6_ADDRSTRLEN];
 
 			log(LOG_INFO,
 			    "Connection attempt to UDP [%s]:%d from [%s]:%d\n",
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ntohs(uh->uh_dport),
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ntohs(uh->uh_sport));
 		}
 		UDPSTAT_INC(udps_noport);
 		if (m->m_flags & M_MCAST) {
 			printf("UDP6: M_MCAST is set in a unicast packet.\n");
 			UDPSTAT_INC(udps_noportmcast);
 			goto badunlocked;
 		}
 		if (V_udp_blackhole)
 			goto badunlocked;
 		if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0)
 			goto badunlocked;
 		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0);
 		return (IPPROTO_DONE);
 	}
 	INP_RLOCK_ASSERT(inp);
 	up = intoudpcb(inp);
 	if (cscov_partial) {
 		if (up->u_rxcslen == 0 || up->u_rxcslen > ulen) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	}
 	UDP_PROBE(receive, NULL, inp, ip6, inp, uh);
 	udp6_append(inp, m, off, &fromsa);
 	INP_RUNLOCK(inp);
 	return (IPPROTO_DONE);
 
 badheadlocked:
 	INP_INFO_RUNLOCK(pcbinfo);
 badunlocked:
 	if (m)
 		m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 static void
 udp6_common_ctlinput(int cmd, struct sockaddr *sa, void *d,
     struct inpcbinfo *pcbinfo)
 {
 	struct udphdr uh;
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
 	int off = 0;
 	struct ip6ctlparam *ip6cp = NULL;
 	const struct sockaddr_in6 *sa6_src = NULL;
 	void *cmdarg;
 	struct inpcb *(*notify)(struct inpcb *, int) = udp_notify;
 	struct udp_portonly {
 		u_int16_t uh_sport;
 		u_int16_t uh_dport;
 	} *uhp;
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
 		return;
 
 	if ((unsigned)cmd >= PRC_NCMDS)
 		return;
 	if (PRC_IS_REDIRECT(cmd))
 		notify = in6_rtchange, d = NULL;
 	else if (cmd == PRC_HOSTDEAD)
 		d = NULL;
 	else if (inet6ctlerrmap[cmd] == 0)
 		return;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
 		ip6cp = (struct ip6ctlparam *)d;
 		m = ip6cp->ip6c_m;
 		ip6 = ip6cp->ip6c_ip6;
 		off = ip6cp->ip6c_off;
 		cmdarg = ip6cp->ip6c_cmdarg;
 		sa6_src = ip6cp->ip6c_src;
 	} else {
 		m = NULL;
 		ip6 = NULL;
 		cmdarg = NULL;
 		sa6_src = &sa6_any;
 	}
 
 	if (ip6) {
 		/*
 		 * XXX: We assume that when IPV6 is non NULL,
 		 * M and OFF are valid.
 		 */
 
 		/* Check if we can safely examine src and dst ports. */
 		if (m->m_pkthdr.len < off + sizeof(*uhp))
 			return;
 
 		bzero(&uh, sizeof(uh));
 		m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh);
 
 		(void)in6_pcbnotify(pcbinfo, sa, uh.uh_dport,
 		    (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd,
 		    cmdarg, notify);
 	} else
 		(void)in6_pcbnotify(pcbinfo, sa, 0,
 		    (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
 }
 
 void
 udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 
 	return (udp6_common_ctlinput(cmd, sa, d, &V_udbinfo));
 }
 
 void
 udplite6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 
 	return (udp6_common_ctlinput(cmd, sa, d, &V_ulitecbinfo));
 }
 
 static int
 udp6_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in6 addrs[2];
 	struct inpcb *inp;
 	int error;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 
 	if (req->newlen != sizeof(addrs))
 		return (EINVAL);
 	if (req->oldlen != sizeof(struct xucred))
 		return (EINVAL);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 ||
 	    (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
 		return (error);
 	}
 	inp = in6_pcblookup(&V_udbinfo, &addrs[1].sin6_addr,
 	    addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port,
 	    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
 	if (inp != NULL) {
 		INP_RLOCK_ASSERT(inp);
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseesocket(req->td->td_ucred,
 			    inp->inp_socket);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0,
     0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection");
 
 static int
 udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
     struct mbuf *control, struct thread *td)
 {
 	u_int32_t ulen = m->m_pkthdr.len;
 	u_int32_t plen = sizeof(struct udphdr) + ulen;
 	struct ip6_hdr *ip6;
 	struct udphdr *udp6;
 	struct in6_addr *laddr, *faddr, in6a;
 	struct sockaddr_in6 *sin6 = NULL;
 	struct ifnet *oifp = NULL;
 	int cscov_partial = 0;
 	int scope_ambiguous = 0;
 	u_short fport;
 	int error = 0;
 	uint8_t nxt;
 	uint16_t cscov = 0;
 	struct ip6_pktopts *optp, opt;
 	int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
 	int flags;
 	struct sockaddr_in6 tmp;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 
 	if (addr6) {
 		/* addr6 has been validated in udp6_send(). */
 		sin6 = (struct sockaddr_in6 *)addr6;
 
 		/* protect *sin6 from overwrites */
 		tmp = *sin6;
 		sin6 = &tmp;
 
 		/*
 		 * Application should provide a proper zone ID or the use of
 		 * default zone IDs should be enabled.  Unfortunately, some
 		 * applications do not behave as it should, so we need a
 		 * workaround.  Even if an appropriate ID is not determined,
 		 * we'll see if we can determine the outgoing interface.  If we
 		 * can, determine the zone ID based on the interface below.
 		 */
 		if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone)
 			scope_ambiguous = 1;
 		if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
 			return (error);
 	}
 
 	if (control) {
 		if ((error = ip6_setpktopts(control, &opt,
 		    inp->in6p_outputopts, td->td_ucred, IPPROTO_UDP)) != 0)
 			goto release;
 		optp = &opt;
 	} else
 		optp = inp->in6p_outputopts;
 
 	if (sin6) {
 		faddr = &sin6->sin6_addr;
 
 		/*
 		 * Since we saw no essential reason for calling in_pcbconnect,
 		 * we get rid of such kind of logic, and call in6_selectsrc
 		 * and in6_pcbsetport in order to fill in the local address
 		 * and the local port.
 		 */
 		if (sin6->sin6_port == 0) {
 			error = EADDRNOTAVAIL;
 			goto release;
 		}
 
 		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 			/* how about ::ffff:0.0.0.0 case? */
 			error = EISCONN;
 			goto release;
 		}
 
 		fport = sin6->sin6_port; /* allow 0 port */
 
 		if (IN6_IS_ADDR_V4MAPPED(faddr)) {
 			if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
 				/*
 				 * I believe we should explicitly discard the
 				 * packet when mapped addresses are disabled,
 				 * rather than send the packet as an IPv6 one.
 				 * If we chose the latter approach, the packet
 				 * might be sent out on the wire based on the
 				 * default route, the situation which we'd
 				 * probably want to avoid.
 				 * (20010421 jinmei@kame.net)
 				 */
 				error = EINVAL;
 				goto release;
 			}
 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
 			    !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) {
 				/*
 				 * when remote addr is an IPv4-mapped address,
 				 * local addr should not be an IPv6 address,
 				 * since you cannot determine how to map IPv6
 				 * source address to IPv4.
 				 */
 				error = EINVAL;
 				goto release;
 			}
 
 			af = AF_INET;
 		}
 
 		if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
 			error = in6_selectsrc(sin6, optp, inp, NULL,
 			    td->td_ucred, &oifp, &in6a);
 			if (error)
 				goto release;
 			if (oifp && scope_ambiguous &&
 			    (error = in6_setscope(&sin6->sin6_addr,
 			    oifp, NULL))) {
 				goto release;
 			}
 			laddr = &in6a;
 		} else
 			laddr = &inp->in6p_laddr;	/* XXX */
 		if (laddr == NULL) {
 			if (error == 0)
 				error = EADDRNOTAVAIL;
 			goto release;
 		}
 		if (inp->inp_lport == 0 &&
 		    (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0) {
 			/* Undo an address bind that may have occurred. */
 			inp->in6p_laddr = in6addr_any;
 			goto release;
 		}
 	} else {
 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 			error = ENOTCONN;
 			goto release;
 		}
 		if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) {
 			if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
 				/*
 				 * XXX: this case would happen when the
 				 * application sets the V6ONLY flag after
 				 * connecting the foreign address.
 				 * Such applications should be fixed,
 				 * so we bark here.
 				 */
 				log(LOG_INFO, "udp6_output: IPV6_V6ONLY "
 				    "option was set for a connected socket\n");
 				error = EINVAL;
 				goto release;
 			} else
 				af = AF_INET;
 		}
 		laddr = &inp->in6p_laddr;
 		faddr = &inp->in6p_faddr;
 		fport = inp->inp_fport;
 	}
 
 	if (af == AF_INET)
 		hlen = sizeof(struct ip);
 
 	/*
 	 * Calculate data length and get a mbuf
 	 * for UDP and IP6 headers.
 	 */
 	M_PREPEND(m, hlen + sizeof(struct udphdr), M_NOWAIT);
 	if (m == 0) {
 		error = ENOBUFS;
 		goto release;
 	}
 
 	/*
 	 * Stuff checksum and output datagram.
 	 */
 	nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
 	    IPPROTO_UDP : IPPROTO_UDPLITE;
 	udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
 	udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */
 	udp6->uh_dport = fport;
 	if (nxt == IPPROTO_UDPLITE) {
 		struct udpcb *up;
 
 		up = intoudpcb(inp);
 		cscov = up->u_txcslen;
 		if (cscov >= plen)
 			cscov = 0;
 		udp6->uh_ulen = htons(cscov);
 		/*
 		 * For UDP-Lite, checksum coverage length of zero means
 		 * the entire UDPLite packet is covered by the checksum.
 		 */
 		cscov_partial = (cscov == 0) ? 0 : 1;
 	} else if (plen <= 0xffff)
 		udp6->uh_ulen = htons((u_short)plen);
 	else
 		udp6->uh_ulen = 0;
 	udp6->uh_sum = 0;
 
 	switch (af) {
 	case AF_INET6:
 		ip6 = mtod(m, struct ip6_hdr *);
 		ip6->ip6_flow	= inp->inp_flow & IPV6_FLOWINFO_MASK;
 		ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
 		ip6->ip6_vfc	|= IPV6_VERSION;
 		ip6->ip6_plen	= htons((u_short)plen);
 		ip6->ip6_nxt	= nxt;
 		ip6->ip6_hlim	= in6_selecthlim(inp, NULL);
 		ip6->ip6_src	= *laddr;
 		ip6->ip6_dst	= *faddr;
 
 		if (cscov_partial) {
 			if ((udp6->uh_sum = in6_cksum_partial(m, nxt,
 			    sizeof(struct ip6_hdr), plen, cscov)) == 0)
 				udp6->uh_sum = 0xffff;
 		} else {
 			udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0);
 			m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
 			m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 		}
 
 		/*
 		 * XXX for now assume UDP is 2-tuple.
 		 * Later on this may become configurable as 4-tuple;
 		 * we should support that.
 		 *
 		 * XXX .. and we should likely cache this in the inpcb.
 		 */
 #ifdef	RSS
 		m->m_pkthdr.flowid = rss_hash_ip6_2tuple(faddr, laddr);
 		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
 #endif
 		flags = 0;
 
 #ifdef	RSS
 		/*
 		 * Don't override with the inp cached flowid.
 		 *
 		 * Until the whole UDP path is vetted, it may actually
 		 * be incorrect.
 		 */
 		flags |= IP_NODEFAULTFLOWID;
 #endif
 
 		UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
 		UDPSTAT_INC(udps_opackets);
 		error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions,
 		    NULL, inp);
 		break;
 	case AF_INET:
 		error = EAFNOSUPPORT;
 		goto release;
 	}
 	goto releaseopt;
 
 release:
 	m_freem(m);
 
 releaseopt:
 	if (control) {
 		ip6_clearpktopts(&opt, -1);
 		m_freem(control);
 	}
 	return (error);
 }
 
 static void
 udp6_abort(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_abort: inp == NULL"));
 
 #ifdef INET
 	if (inp->inp_vflag & INP_IPV4) {
 		struct pr_usrreqs *pru;
 
 		pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
 		(*pru->pru_abort)(so);
 		return;
 	}
 #endif
 
 	INP_WLOCK(inp);
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		INP_HASH_WLOCK(pcbinfo);
 		in6_pcbdisconnect(inp);
 		inp->in6p_laddr = in6addr_any;
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp6_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 
 	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("udp6_attach: inp != NULL"));
 
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		error = soreserve(so, udp_sendspace, udp_recvspace);
 		if (error)
 			return (error);
 	}
 	INP_INFO_WLOCK(pcbinfo);
 	error = in_pcballoc(so, pcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(pcbinfo);
 		return (error);
 	}
 	inp = (struct inpcb *)so->so_pcb;
 	inp->inp_vflag |= INP_IPV6;
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
 		inp->inp_vflag |= INP_IPV4;
 	inp->in6p_hops = -1;	/* use kernel default */
 	inp->in6p_cksum = -1;	/* just to be sure */
 	/*
 	 * XXX: ugly!!
 	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
 	 * because the socket may be bound to an IPv6 wildcard address,
 	 * which may match an IPv4-mapped IPv6 address.
 	 */
 	inp->inp_ip_ttl = V_ip_defttl;
 
 	error = udp_newudpcb(inp);
 	if (error) {
 		in_pcbdetach(inp);
 		in_pcbfree(inp);
 		INP_INFO_WUNLOCK(pcbinfo);
 		return (error);
 	}
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(pcbinfo);
 	return (0);
 }
 
 static int
 udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 
 	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_bind: inp == NULL"));
 
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(pcbinfo);
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
 		struct sockaddr_in6 *sin6_p;
 
 		sin6_p = (struct sockaddr_in6 *)nam;
 
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr))
 			inp->inp_vflag |= INP_IPV4;
 #ifdef INET
 		else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) {
 			struct sockaddr_in sin;
 
 			in6_sin6_2_sin(&sin, sin6_p);
 			inp->inp_vflag |= INP_IPV4;
 			inp->inp_vflag &= ~INP_IPV6;
 			error = in_pcbbind(inp, (struct sockaddr *)&sin,
 			    td->td_ucred);
 			goto out;
 		}
 #endif
 	}
 
 	error = in6_pcbbind(inp, nam, td->td_ucred);
 #ifdef INET
 out:
 #endif
 	INP_HASH_WUNLOCK(pcbinfo);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp6_close(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_close: inp == NULL"));
 
 #ifdef INET
 	if (inp->inp_vflag & INP_IPV4) {
 		struct pr_usrreqs *pru;
 
 		pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
 		(*pru->pru_disconnect)(so);
 		return;
 	}
 #endif
 	INP_WLOCK(inp);
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		INP_HASH_WLOCK(pcbinfo);
 		in6_pcbdisconnect(inp);
 		inp->in6p_laddr = in6addr_any;
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in6 *sin6;
 	int error;
 
 	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	sin6 = (struct sockaddr_in6 *)nam;
 	KASSERT(inp != NULL, ("udp6_connect: inp == NULL"));
 
 	/*
 	 * XXXRW: Need to clarify locking of v4/v6 flags.
 	 */
 	INP_WLOCK(inp);
 #ifdef INET
 	if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 		struct sockaddr_in sin;
 
 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
 			error = EINVAL;
 			goto out;
 		}
 		if (inp->inp_faddr.s_addr != INADDR_ANY) {
 			error = EISCONN;
 			goto out;
 		}
 		in6_sin6_2_sin(&sin, sin6);
 		inp->inp_vflag |= INP_IPV4;
 		inp->inp_vflag &= ~INP_IPV6;
 		error = prison_remote_ip4(td->td_ucred, &sin.sin_addr);
 		if (error != 0)
 			goto out;
 		INP_HASH_WLOCK(pcbinfo);
 		error = in_pcbconnect(inp, (struct sockaddr *)&sin,
 		    td->td_ucred);
 		INP_HASH_WUNLOCK(pcbinfo);
 		if (error == 0)
 			soisconnected(so);
 		goto out;
 	}
 #endif
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		error = EISCONN;
 		goto out;
 	}
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr);
 	if (error != 0)
 		goto out;
 	INP_HASH_WLOCK(pcbinfo);
 	error = in6_pcbconnect(inp, nam, td->td_ucred);
 	INP_HASH_WUNLOCK(pcbinfo);
 	if (error == 0)
 		soisconnected(so);
 out:
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp6_detach(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct udpcb *up;
 
 	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_detach: inp == NULL"));
 
 	INP_INFO_WLOCK(pcbinfo);
 	INP_WLOCK(inp);
 	up = intoudpcb(inp);
 	KASSERT(up != NULL, ("%s: up == NULL", __func__));
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	INP_INFO_WUNLOCK(pcbinfo);
 	udp_discardcb(up);
 }
 
 static int
 udp6_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 
 	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL"));
 
 #ifdef INET
 	if (inp->inp_vflag & INP_IPV4) {
 		struct pr_usrreqs *pru;
 
 		pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
 		(void)(*pru->pru_disconnect)(so);
 		return (0);
 	}
 #endif
 
 	INP_WLOCK(inp);
 
 	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		error = ENOTCONN;
 		goto out;
 	}
 
 	INP_HASH_WLOCK(pcbinfo);
 	in6_pcbdisconnect(inp);
 	inp->in6p_laddr = in6addr_any;
 	INP_HASH_WUNLOCK(pcbinfo);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
 	SOCK_UNLOCK(so);
 out:
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 udp6_send(struct socket *so, int flags, struct mbuf *m,
     struct sockaddr *addr, struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error = 0;
 
 	pcbinfo = get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_send: inp == NULL"));
 
 	INP_WLOCK(inp);
 	if (addr) {
 		if (addr->sa_len != sizeof(struct sockaddr_in6)) {
 			error = EINVAL;
 			goto bad;
 		}
 		if (addr->sa_family != AF_INET6) {
 			error = EAFNOSUPPORT;
 			goto bad;
 		}
 	}
 
 #ifdef INET
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
 		int hasv4addr;
 		struct sockaddr_in6 *sin6 = 0;
 
 		if (addr == 0)
 			hasv4addr = (inp->inp_vflag & INP_IPV4);
 		else {
 			sin6 = (struct sockaddr_in6 *)addr;
 			hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
 			    ? 1 : 0;
 		}
 		if (hasv4addr) {
 			struct pr_usrreqs *pru;
 
 			/*
 			 * XXXRW: We release UDP-layer locks before calling
 			 * udp_send() in order to avoid recursion.  However,
 			 * this does mean there is a short window where inp's
 			 * fields are unstable.  Could this lead to a
 			 * potential race in which the factors causing us to
 			 * select the UDPv4 output routine are invalidated?
 			 */
 			INP_WUNLOCK(inp);
 			if (sin6)
 				in6_sin6_2_sin_in_sock(addr);
 			pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
 			/* addr will just be freed in sendit(). */
 			return ((*pru->pru_send)(so, flags, m, addr, control,
 			    td));
 		}
 	}
 #endif
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 	INP_HASH_WLOCK(pcbinfo);
 	error = udp6_output(inp, m, addr, control, td);
 	INP_HASH_WUNLOCK(pcbinfo);
 #ifdef INET
 #endif	
 	INP_WUNLOCK(inp);
 	return (error);
 
 bad:
 	INP_WUNLOCK(inp);
 	m_freem(m);
 	return (error);
 }
 
 struct pr_usrreqs udp6_usrreqs = {
 	.pru_abort =		udp6_abort,
 	.pru_attach =		udp6_attach,
 	.pru_bind =		udp6_bind,
 	.pru_connect =		udp6_connect,
 	.pru_control =		in6_control,
 	.pru_detach =		udp6_detach,
 	.pru_disconnect =	udp6_disconnect,
 	.pru_peeraddr =		in6_mapped_peeraddr,
 	.pru_send =		udp6_send,
 	.pru_shutdown =		udp_shutdown,
 	.pru_sockaddr =		in6_mapped_sockaddr,
 	.pru_soreceive =	soreceive_dgram,
 	.pru_sosend =		sosend_dgram,
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		udp6_close
 };