Index: head/sys/dev/hyperv/include/hyperv.h
===================================================================
--- head/sys/dev/hyperv/include/hyperv.h	(revision 298445)
+++ head/sys/dev/hyperv/include/hyperv.h	(revision 298446)
@@ -1,930 +1,930 @@
 /*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /**
  * HyperV definitions for messages that are sent between instances of the
  * Channel Management Library in separate partitions, or in some cases,
  * back to itself.
  */
 
 #ifndef __HYPERV_H__
 #define __HYPERV_H__
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
 #include <sys/queue.h>
 #include <sys/malloc.h>
 #include <sys/kthread.h>
 #include <sys/taskqueue.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/sema.h>
 #include <sys/smp.h>
 #include <sys/mutex.h>
 #include <sys/bus.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <amd64/include/xen/synch_bitops.h>
 #include <amd64/include/atomic.h>
 
 typedef uint8_t	hv_bool_uint8_t;
 
 #define HV_S_OK			0x00000000
 #define HV_E_FAIL		0x80004005
 #define HV_ERROR_NOT_SUPPORTED	0x80070032
 #define HV_ERROR_MACHINE_LOCKED	0x800704F7
 
 /*
  * VMBUS version is 32 bit, upper 16 bit for major_number and lower
  * 16 bit for minor_number.
  *
  * 0.13  --  Windows Server 2008
  * 1.1   --  Windows 7
  * 2.4   --  Windows 8
  * 3.0   --  Windows 8.1
  */
 #define HV_VMBUS_VERSION_WS2008		((0 << 16) | (13))
 #define HV_VMBUS_VERSION_WIN7		((1 << 16) | (1))
 #define HV_VMBUS_VERSION_WIN8		((2 << 16) | (4))
 #define HV_VMBUS_VERSION_WIN8_1		((3 << 16) | (0))
 
 #define HV_VMBUS_VERSION_INVALID	-1
 
 #define HV_VMBUS_VERSION_CURRENT	HV_VMBUS_VERSION_WIN8_1
 
 /*
  * Make maximum size of pipe payload of 16K
  */
 
 #define HV_MAX_PIPE_DATA_PAYLOAD	(sizeof(BYTE) * 16384)
 
 /*
  * Define pipe_mode values
  */
 
 #define HV_VMBUS_PIPE_TYPE_BYTE		0x00000000
 #define HV_VMBUS_PIPE_TYPE_MESSAGE	0x00000004
 
 /*
  * The size of the user defined data buffer for non-pipe offers
  */
 
 #define HV_MAX_USER_DEFINED_BYTES	120
 
 /*
  *  The size of the user defined data buffer for pipe offers
  */
 
 #define HV_MAX_PIPE_USER_DEFINED_BYTES	116
 
 
 #define HV_MAX_PAGE_BUFFER_COUNT	32
 #define HV_MAX_MULTIPAGE_BUFFER_COUNT	32
 
 #define HV_ALIGN_UP(value, align)					\
 		(((value) & (align-1)) ?				\
 		    (((value) + (align-1)) & ~(align-1) ) : (value))
 
 #define HV_ALIGN_DOWN(value, align) ( (value) & ~(align-1) )
 
 #define HV_NUM_PAGES_SPANNED(addr, len)					\
 		((HV_ALIGN_UP(addr+len, PAGE_SIZE) -			\
 		    HV_ALIGN_DOWN(addr, PAGE_SIZE)) >> PAGE_SHIFT )
 
 typedef struct hv_guid {
 	 unsigned char data[16];
 } __packed hv_guid;
 
 int snprintf_hv_guid(char *, size_t, const hv_guid *);
 
 #define HV_NIC_GUID							\
 	.data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,	\
 		0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
 
 #define HV_IDE_GUID							\
 	.data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,	\
 		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
 
 #define HV_SCSI_GUID							\
 	.data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,	\
 		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
 
 /*
  * At the center of the Channel Management library is
  * the Channel Offer. This struct contains the
  * fundamental information about an offer.
  */
 
 typedef struct hv_vmbus_channel_offer {
 	hv_guid		interface_type;
 	hv_guid		interface_instance;
 	uint64_t	interrupt_latency_in_100ns_units;
 	uint32_t	interface_revision;
 	uint32_t	server_context_area_size; /* in bytes */
 	uint16_t	channel_flags;
 	uint16_t	mmio_megabytes;		  /* in bytes * 1024 * 1024 */
 	union
 	{
         /*
          * Non-pipes: The user has HV_MAX_USER_DEFINED_BYTES bytes.
          */
 		struct {
 			uint8_t	user_defined[HV_MAX_USER_DEFINED_BYTES];
 		} __packed standard;
 
         /*
          * Pipes: The following structure is an integrated pipe protocol, which
          *        is implemented on top of standard user-defined data. pipe
          *        clients  have HV_MAX_PIPE_USER_DEFINED_BYTES left for their
          *        own use.
          */
 		struct {
 			uint32_t	pipe_mode;
 			uint8_t	user_defined[HV_MAX_PIPE_USER_DEFINED_BYTES];
 		} __packed pipe;
 	} u;
 
 	/*
 	 * Sub_channel_index, newly added in Win8.
 	 */
 	uint16_t	sub_channel_index;
 	uint16_t	padding;
 
 } __packed hv_vmbus_channel_offer;
 
 typedef uint32_t hv_gpadl_handle;
 
 typedef struct {
 	uint16_t type;
 	uint16_t data_offset8;
 	uint16_t length8;
 	uint16_t flags;
 	uint64_t transaction_id;
 } __packed hv_vm_packet_descriptor;
 
 typedef uint32_t hv_previous_packet_offset;
 
 typedef struct {
 	hv_previous_packet_offset	previous_packet_start_offset;
 	hv_vm_packet_descriptor		descriptor;
 } __packed hv_vm_packet_header;
 
 typedef struct {
 	uint32_t byte_count;
 	uint32_t byte_offset;
 } __packed hv_vm_transfer_page;
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint16_t		transfer_page_set_id;
 	hv_bool_uint8_t		sender_owns_set;
 	uint8_t			reserved;
 	uint32_t		range_count;
 	hv_vm_transfer_page	ranges[1];
 } __packed hv_vm_transfer_page_packet_header;
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		gpadl;
 	uint32_t		reserved;
 } __packed hv_vm_gpadl_packet_header;
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		gpadl;
 	uint16_t		transfer_page_set_id;
 	uint16_t		reserved;
 } __packed hv_vm_add_remove_transfer_page_set;
 
 /*
  * This structure defines a range in guest
  * physical space that can be made
  * to look virtually contiguous.
  */
 
 typedef struct {
 	uint32_t byte_count;
 	uint32_t byte_offset;
 	uint64_t pfn_array[0];
 } __packed hv_gpa_range;
 
 /*
  * This is the format for an Establish Gpadl packet, which contains a handle
  * by which this GPADL will be known and a set of GPA ranges associated with
  * it.  This can be converted to a MDL by the guest OS.  If there are multiple
  * GPA ranges, then the resulting MDL will be "chained," representing multiple
  * VA ranges.
  */
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		gpadl;
 	uint32_t		range_count;
 	hv_gpa_range		range[1];
 } __packed hv_vm_establish_gpadl;
 
 /*
  * This is the format for a Teardown Gpadl packet, which indicates that the
  * GPADL handle in the Establish Gpadl packet will never be referenced again.
  */
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		gpadl;
 				/* for alignment to a 8-byte boundary */
 	uint32_t		reserved;
 } __packed hv_vm_teardown_gpadl;
 
 /*
  * This is the format for a GPA-Direct packet, which contains a set of GPA
  * ranges, in addition to commands and/or data.
  */
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		reserved;
 	uint32_t		range_count;
 	hv_gpa_range		range[1];
 } __packed hv_vm_data_gpa_direct;
 
 /*
  * This is the format for a Additional data Packet.
  */
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint64_t		total_bytes;
 	uint32_t		byte_offset;
 	uint32_t		byte_count;
 	uint8_t			data[1];
 } __packed hv_vm_additional_data;
 
 typedef union {
 	hv_vm_packet_descriptor             simple_header;
 	hv_vm_transfer_page_packet_header   transfer_page_header;
 	hv_vm_gpadl_packet_header           gpadl_header;
 	hv_vm_add_remove_transfer_page_set  add_remove_transfer_page_header;
 	hv_vm_establish_gpadl               establish_gpadl_header;
 	hv_vm_teardown_gpadl                teardown_gpadl_header;
 	hv_vm_data_gpa_direct               data_gpa_direct_header;
 } __packed hv_vm_packet_largest_possible_header;
 
 typedef enum {
 	HV_VMBUS_PACKET_TYPE_INVALID				= 0x0,
 	HV_VMBUS_PACKET_TYPES_SYNCH				= 0x1,
 	HV_VMBUS_PACKET_TYPE_ADD_TRANSFER_PAGE_SET		= 0x2,
 	HV_VMBUS_PACKET_TYPE_REMOVE_TRANSFER_PAGE_SET		= 0x3,
 	HV_VMBUS_PACKET_TYPE_ESTABLISH_GPADL			= 0x4,
 	HV_VMBUS_PACKET_TYPE_TEAR_DOWN_GPADL			= 0x5,
 	HV_VMBUS_PACKET_TYPE_DATA_IN_BAND			= 0x6,
 	HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES		= 0x7,
 	HV_VMBUS_PACKET_TYPE_DATA_USING_GPADL			= 0x8,
 	HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT		= 0x9,
 	HV_VMBUS_PACKET_TYPE_CANCEL_REQUEST			= 0xa,
 	HV_VMBUS_PACKET_TYPE_COMPLETION				= 0xb,
 	HV_VMBUS_PACKET_TYPE_DATA_USING_ADDITIONAL_PACKETS	= 0xc,
 	HV_VMBUS_PACKET_TYPE_ADDITIONAL_DATA = 0xd
 } hv_vmbus_packet_type;
 
 #define HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED    1
 
 /*
  * Version 1 messages
  */
 typedef enum {
 	HV_CHANNEL_MESSAGE_INVALID			= 0,
 	HV_CHANNEL_MESSAGE_OFFER_CHANNEL		= 1,
 	HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER	= 2,
 	HV_CHANNEL_MESSAGE_REQUEST_OFFERS		= 3,
 	HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED		= 4,
 	HV_CHANNEL_MESSAGE_OPEN_CHANNEL			= 5,
 	HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT		= 6,
 	HV_CHANNEL_MESSAGE_CLOSE_CHANNEL		= 7,
 	HV_CHANNEL_MESSAGEL_GPADL_HEADER		= 8,
 	HV_CHANNEL_MESSAGE_GPADL_BODY			= 9,
 	HV_CHANNEL_MESSAGE_GPADL_CREATED		= 10,
 	HV_CHANNEL_MESSAGE_GPADL_TEARDOWN		= 11,
 	HV_CHANNEL_MESSAGE_GPADL_TORNDOWN		= 12,
 	HV_CHANNEL_MESSAGE_REL_ID_RELEASED		= 13,
 	HV_CHANNEL_MESSAGE_INITIATED_CONTACT		= 14,
 	HV_CHANNEL_MESSAGE_VERSION_RESPONSE		= 15,
 	HV_CHANNEL_MESSAGE_UNLOAD			= 16,
 	HV_CHANNEL_MESSAGE_COUNT
 } hv_vmbus_channel_msg_type;
 
 typedef struct {
 	hv_vmbus_channel_msg_type	message_type;
 	uint32_t			padding;
 } __packed hv_vmbus_channel_msg_header;
 
 /*
  * Query VMBus Version parameters
  */
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			version;
 } __packed hv_vmbus_channel_query_vmbus_version;
 
 /*
  * VMBus Version Supported parameters
  */
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	hv_bool_uint8_t			version_supported;
 } __packed hv_vmbus_channel_version_supported;
 
 /*
  * Channel Offer parameters
  */
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	hv_vmbus_channel_offer		offer;
 	uint32_t			child_rel_id;
 	uint8_t				monitor_id;
 	/*
 	 * This field has been split into a bit field on Win7
 	 * and higher.
 	 */
 	uint8_t				monitor_allocated:1;
 	uint8_t				reserved:7;
 	/*
 	 * Following fields were added in win7 and higher.
 	 * Make sure to check the version before accessing these fields.
 	 *
 	 * If "is_dedicated_interrupt" is set, we must not set the
 	 * associated bit in the channel bitmap while sending the
 	 * interrupt to the host.
 	 *
 	 * connection_id is used in signaling the host.
 	 */
 	uint16_t			is_dedicated_interrupt:1;
 	uint16_t			reserved1:15;
 	uint32_t			connection_id;
 } __packed hv_vmbus_channel_offer_channel;
 
 /*
  * Rescind Offer parameters
  */
 typedef struct
 {
     hv_vmbus_channel_msg_header	header;
     uint32_t			child_rel_id;
 } __packed hv_vmbus_channel_rescind_offer;
 
 
 /*
  * Request Offer -- no parameters, SynIC message contains the partition ID
  *
  * Set Snoop -- no parameters, SynIC message contains the partition ID
  *
  * Clear Snoop -- no parameters, SynIC message contains the partition ID
  *
  * All Offers Delivered -- no parameters, SynIC message contains the
  * partition ID
  *
  * Flush Client -- no parameters, SynIC message contains the partition ID
  */
 
 
 /*
  * Open Channel parameters
  */
 typedef struct
 {
     hv_vmbus_channel_msg_header header;
 
     /*
      * Identifies the specific VMBus channel that is being opened.
      */
     uint32_t		child_rel_id;
 
     /*
      * ID making a particular open request at a channel offer unique.
      */
     uint32_t		open_id;
 
     /*
      * GPADL for the channel's ring buffer.
      */
     hv_gpadl_handle	ring_buffer_gpadl_handle;
 
     /*
      * Before win8, all incoming channel interrupts are only
      * delivered on cpu 0. Setting this value to 0 would
      * preserve the earlier behavior.
      */
     uint32_t		target_vcpu;
 
     /*
      * The upstream ring buffer begins at offset zero in the memory described
      * by ring_buffer_gpadl_handle. The downstream ring buffer follows it at
      * this offset (in pages).
      */
     uint32_t		downstream_ring_buffer_page_offset;
 
     /*
      * User-specific data to be passed along to the server endpoint.
      */
     uint8_t		user_data[HV_MAX_USER_DEFINED_BYTES];
 
 } __packed hv_vmbus_channel_open_channel;
 
 typedef uint32_t hv_nt_status;
 
 /*
  * Open Channel Result parameters
  */
 typedef struct
 {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 	uint32_t			open_id;
 	hv_nt_status			status;
 } __packed hv_vmbus_channel_open_result;
 
 /*
  * Close channel parameters
  */
 typedef struct
 {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 } __packed hv_vmbus_channel_close_channel;
 
 /*
  * Channel Message GPADL
  */
 #define HV_GPADL_TYPE_RING_BUFFER	1
 #define HV_GPADL_TYPE_SERVER_SAVE_AREA	2
 #define HV_GPADL_TYPE_TRANSACTION	8
 
 /*
  * The number of PFNs in a GPADL message is defined by the number of pages
  * that would be spanned by byte_count and byte_offset.  If the implied number
  * of PFNs won't fit in this packet, there will be a follow-up packet that
  * contains more
  */
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 	uint32_t			gpadl;
 	uint16_t			range_buf_len;
 	uint16_t			range_count;
 	hv_gpa_range			range[0];
 } __packed hv_vmbus_channel_gpadl_header;
 
 /*
  * This is the follow-up packet that contains more PFNs
  */
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			message_number;
 	uint32_t 			gpadl;
 	uint64_t 			pfn[0];
 } __packed hv_vmbus_channel_gpadl_body;
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 	uint32_t			gpadl;
 	uint32_t			creation_status;
 } __packed hv_vmbus_channel_gpadl_created;
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 	uint32_t			gpadl;
 } __packed hv_vmbus_channel_gpadl_teardown;
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			gpadl;
 } __packed hv_vmbus_channel_gpadl_torndown;
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 } __packed hv_vmbus_channel_relid_released;
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			vmbus_version_requested;
 	uint32_t			padding2;
 	uint64_t			interrupt_page;
 	uint64_t			monitor_page_1;
 	uint64_t			monitor_page_2;
 } __packed hv_vmbus_channel_initiate_contact;
 
 typedef struct {
 	hv_vmbus_channel_msg_header header;
 	hv_bool_uint8_t		version_supported;
 } __packed hv_vmbus_channel_version_response;
 
 typedef hv_vmbus_channel_msg_header hv_vmbus_channel_unload;
 
 #define HW_MACADDR_LEN	6
 
 /*
  * Fixme:  Added to quiet "typeof" errors involving hv_vmbus.h when
  * the including C file was compiled with "-std=c99".
  */
 #ifndef typeof
 #define typeof __typeof
 #endif
 
 #ifndef NULL
 #define NULL  (void *)0
 #endif
 
 typedef void *hv_vmbus_handle;
 
 #ifndef CONTAINING_RECORD
 #define CONTAINING_RECORD(address, type, field) ((type *)(	\
 		(uint8_t *)(address) -				\
 		(uint8_t *)(&((type *)0)->field)))
 #endif /* CONTAINING_RECORD */
 
 
 #define container_of(ptr, type, member) ({				\
 		__typeof__( ((type *)0)->member ) *__mptr = (ptr);	\
 		(type *)( (char *)__mptr - offsetof(type,member) );})
 
 enum {
 	HV_VMBUS_IVAR_TYPE,
 	HV_VMBUS_IVAR_INSTANCE,
 	HV_VMBUS_IVAR_NODE,
 	HV_VMBUS_IVAR_DEVCTX
 };
 
 #define HV_VMBUS_ACCESSOR(var, ivar, type) \
 		__BUS_ACCESSOR(vmbus, var, HV_VMBUS, ivar, type)
 
 HV_VMBUS_ACCESSOR(type, TYPE,  const char *)
 HV_VMBUS_ACCESSOR(devctx, DEVCTX,  struct hv_device *)
 
 
 /*
  * Common defines for Hyper-V ICs
  */
 #define HV_ICMSGTYPE_NEGOTIATE		0
 #define HV_ICMSGTYPE_HEARTBEAT		1
 #define HV_ICMSGTYPE_KVPEXCHANGE	2
 #define HV_ICMSGTYPE_SHUTDOWN		3
 #define HV_ICMSGTYPE_TIMESYNC		4
 #define HV_ICMSGTYPE_VSS		5
 
 #define HV_ICMSGHDRFLAG_TRANSACTION	1
 #define HV_ICMSGHDRFLAG_REQUEST		2
 #define HV_ICMSGHDRFLAG_RESPONSE	4
 
 typedef struct hv_vmbus_pipe_hdr {
 	uint32_t flags;
 	uint32_t msgsize;
 } __packed hv_vmbus_pipe_hdr;
 
 typedef struct hv_vmbus_ic_version {
 	uint16_t major;
 	uint16_t minor;
 } __packed hv_vmbus_ic_version;
 
 typedef struct hv_vmbus_icmsg_hdr {
 	hv_vmbus_ic_version	icverframe;
 	uint16_t		icmsgtype;
 	hv_vmbus_ic_version	icvermsg;
 	uint16_t		icmsgsize;
 	uint32_t		status;
 	uint8_t			ictransaction_id;
 	uint8_t			icflags;
 	uint8_t			reserved[2];
 } __packed hv_vmbus_icmsg_hdr;
 
 typedef struct hv_vmbus_icmsg_negotiate {
 	uint16_t		icframe_vercnt;
 	uint16_t		icmsg_vercnt;
 	uint32_t		reserved;
 	hv_vmbus_ic_version	icversion_data[1]; /* any size array */
 } __packed hv_vmbus_icmsg_negotiate;
 
 typedef struct hv_vmbus_shutdown_msg_data {
 	uint32_t		reason_code;
 	uint32_t		timeout_seconds;
 	uint32_t 		flags;
 	uint8_t			display_message[2048];
 } __packed hv_vmbus_shutdown_msg_data;
 
 typedef struct hv_vmbus_heartbeat_msg_data {
 	uint64_t 		seq_num;
 	uint32_t 		reserved[8];
 } __packed hv_vmbus_heartbeat_msg_data;
 
 typedef struct {
 	/*
 	 * offset in bytes from the start of ring data below
 	 */
 	volatile uint32_t       write_index;
 	/*
 	 * offset in bytes from the start of ring data below
 	 */
 	volatile uint32_t       read_index;
 	/*
 	 * NOTE: The interrupt_mask field is used only for channels, but
 	 * vmbus connection also uses this data structure
 	 */
 	volatile uint32_t       interrupt_mask;
 	/* pad it to PAGE_SIZE so that data starts on a page */
 	uint8_t                 reserved[4084];
 
 	/*
 	 * WARNING: Ring data starts here + ring_data_start_offset
 	 *  !!! DO NOT place any fields below this !!!
 	 */
 	uint8_t			buffer[0];	/* doubles as interrupt mask */
 } __packed hv_vmbus_ring_buffer;
 
 typedef struct {
 	int		length;
 	int		offset;
 	uint64_t	pfn;
 } __packed hv_vmbus_page_buffer;
 
 typedef struct {
 	int		length;
 	int		offset;
 	uint64_t	pfn_array[HV_MAX_MULTIPAGE_BUFFER_COUNT];
 } __packed hv_vmbus_multipage_buffer;
 
 typedef struct {
 	hv_vmbus_ring_buffer*	ring_buffer;
 	uint32_t		ring_size;	/* Include the shared header */
 	struct mtx		ring_lock;
 	uint32_t		ring_data_size;	/* ring_size */
 	uint32_t		ring_data_start_offset;
 } hv_vmbus_ring_buffer_info;
 
 typedef void (*hv_vmbus_pfn_channel_callback)(void *context);
 typedef void (*hv_vmbus_sc_creation_callback)(void *context);
 
 typedef enum {
 	HV_CHANNEL_OFFER_STATE,
 	HV_CHANNEL_OPENING_STATE,
 	HV_CHANNEL_OPEN_STATE,
 	HV_CHANNEL_OPENED_STATE,
 	HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE,
 } hv_vmbus_channel_state;
 
 /*
  *  Connection identifier type
  */
 typedef union {
 	uint32_t		as_uint32_t;
 	struct {
 		uint32_t	id:24;
 		uint32_t	reserved:8;
 	} u;
 
 } __packed hv_vmbus_connection_id;
 
 /*
  * Definition of the hv_vmbus_signal_event hypercall input structure
  */
 typedef struct {
 	hv_vmbus_connection_id	connection_id;
 	uint16_t		flag_number;
 	uint16_t		rsvd_z;
 } __packed hv_vmbus_input_signal_event;
 
 typedef struct {
 	uint64_t			align8;
 	hv_vmbus_input_signal_event	event;
 } __packed hv_vmbus_input_signal_event_buffer;
 
 typedef struct hv_vmbus_channel {
 	TAILQ_ENTRY(hv_vmbus_channel)	list_entry;
 	struct hv_device*		device;
 	hv_vmbus_channel_state		state;
 	hv_vmbus_channel_offer_channel	offer_msg;
 	/*
 	 * These are based on the offer_msg.monitor_id.
 	 * Save it here for easy access.
 	 */
 	uint8_t				monitor_group;
 	uint8_t				monitor_bit;
 
 	uint32_t			ring_buffer_gpadl_handle;
 	/*
 	 * Allocated memory for ring buffer
 	 */
 	void*				ring_buffer_pages;
 	unsigned long			ring_buffer_size;
 	uint32_t			ring_buffer_page_count;
 	/*
 	 * send to parent
 	 */
 	hv_vmbus_ring_buffer_info	outbound;
 	/*
 	 * receive from parent
 	 */
 	hv_vmbus_ring_buffer_info	inbound;
 
 	struct taskqueue *		rxq;
 	struct task			channel_task;
 	hv_vmbus_pfn_channel_callback	on_channel_callback;
 	void*				channel_callback_context;
 
 	/*
 	 * If batched_reading is set to "true", mask the interrupt
 	 * and read until the channel is empty.
 	 * If batched_reading is set to "false", the channel is not
 	 * going to perform batched reading.
 	 *
 	 * Batched reading is enabled by default; specific
 	 * drivers that don't want this behavior can turn it off.
 	 */
 	boolean_t			batched_reading;
 
 	boolean_t			is_dedicated_interrupt;
 
 	/*
 	 * Used as an input param for HV_CALL_SIGNAL_EVENT hypercall.
 	 */
 	hv_vmbus_input_signal_event_buffer	signal_event_buffer;
 	/*
 	 * 8-bytes aligned of the buffer above
 	 */
 	hv_vmbus_input_signal_event	*signal_event_param;
 
 	/*
 	 * From Win8, this field specifies the target virtual process
 	 * on which to deliver the interupt from the host to guest.
 	 * Before Win8, all channel interrupts would only be
 	 * delivered on cpu 0. Setting this value to 0 would preserve
 	 * the earlier behavior.
 	 */
 	uint32_t			target_vcpu;
 	/* The corresponding CPUID in the guest */
 	uint32_t			target_cpu;
 
 	/*
 	 * Support for multi-channels.
 	 * The initial offer is considered the primary channel and this
 	 * offer message will indicate if the host supports multi-channels.
 	 * The guest is free to ask for multi-channels to be offerred and can
 	 * open these multi-channels as a normal "primary" channel. However,
 	 * all multi-channels will have the same type and instance guids as the
 	 * primary channel. Requests sent on a given channel will result in a
 	 * response on the same channel.
 	 */
 
 	/*
 	 * Multi-channel creation callback. This callback will be called in
 	 * process context when a Multi-channel offer is received from the host.
 	 * The guest can open the Multi-channel in the context of this callback.
 	 */
 	hv_vmbus_sc_creation_callback	sc_creation_callback;
 
 	struct mtx			sc_lock;
 
 	/*
 	 * Link list of all the multi-channels if this is a primary channel
 	 */
 	TAILQ_HEAD(, hv_vmbus_channel)	sc_list_anchor;
 	TAILQ_ENTRY(hv_vmbus_channel)	sc_list_entry;
 
 	/*
 	 * The primary channel this sub-channle belongs to.
 	 * This will be NULL for the primary channel.
 	 */
 	struct hv_vmbus_channel		*primary_channel;
 
 	/*
 	 * Driver private data
 	 */
 	void				*hv_chan_priv1;
 	void				*hv_chan_priv2;
 	void				*hv_chan_priv3;
 } hv_vmbus_channel;
 
 #define HV_VMBUS_CHAN_ISPRIMARY(chan)	((chan)->primary_channel == NULL)
 
 static inline void
 hv_set_channel_read_state(hv_vmbus_channel* channel, boolean_t state)
 {
 	channel->batched_reading = state;
 }
 
 typedef struct hv_device {
 	hv_guid		    class_id;
 	hv_guid		    device_id;
 	device_t	    device;
 	hv_vmbus_channel*   channel;
 } hv_device;
 
 
 
 int		hv_vmbus_channel_recv_packet(
 				hv_vmbus_channel*	channel,
 				void*			buffer,
 				uint32_t		buffer_len,
 				uint32_t*		buffer_actual_len,
 				uint64_t*		request_id);
 
 int		hv_vmbus_channel_recv_packet_raw(
 				hv_vmbus_channel*	channel,
 				void*			buffer,
 				uint32_t		buffer_len,
 				uint32_t*		buffer_actual_len,
 				uint64_t*		request_id);
 
 int		hv_vmbus_channel_open(
 				hv_vmbus_channel*	channel,
 				uint32_t		send_ring_buffer_size,
 				uint32_t		recv_ring_buffer_size,
 				void*			user_data,
 				uint32_t		user_data_len,
 				hv_vmbus_pfn_channel_callback
 							pfn_on_channel_callback,
 				void*			context);
 
 void		hv_vmbus_channel_close(hv_vmbus_channel *channel);
 
 int		hv_vmbus_channel_send_packet(
 				hv_vmbus_channel*	channel,
 				void*			buffer,
 				uint32_t		buffer_len,
 				uint64_t		request_id,
 				hv_vmbus_packet_type	type,
 				uint32_t		flags);
 
 int		hv_vmbus_channel_send_packet_pagebuffer(
 				hv_vmbus_channel*	channel,
 				hv_vmbus_page_buffer	page_buffers[],
 				uint32_t		page_count,
 				void*			buffer,
 				uint32_t		buffer_len,
 				uint64_t		request_id);
 
 int		hv_vmbus_channel_send_packet_multipagebuffer(
 				hv_vmbus_channel*	    channel,
 				hv_vmbus_multipage_buffer*  multi_page_buffer,
 				void*			    buffer,
 				uint32_t		    buffer_len,
 				uint64_t		    request_id);
 
 int		hv_vmbus_channel_establish_gpadl(
 				hv_vmbus_channel*	channel,
 				/* must be phys and virt contiguous */
 				void*			contig_buffer,
 				/*  page-size multiple	*/
 				uint32_t		size,
 				uint32_t*		gpadl_handle);
 
 int		hv_vmbus_channel_teardown_gpdal(
 				hv_vmbus_channel*	channel,
 				uint32_t		gpadl_handle);
 
 struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
 
 void		vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu);
 
 /**
  * @brief Get physical address from virtual
  */
 static inline unsigned long
 hv_get_phys_addr(void *virt)
 {
 	unsigned long ret;
 	ret = (vtophys(virt) | ((vm_offset_t) virt & PAGE_MASK));
 	return (ret);
 }
 
 extern uint32_t hv_vmbus_protocal_version;
 #endif  /* __HYPERV_H__ */
Index: head/sys/dev/hyperv/netvsc/hv_net_vsc.c
===================================================================
--- head/sys/dev/hyperv/netvsc/hv_net_vsc.c	(revision 298445)
+++ head/sys/dev/hyperv/netvsc/hv_net_vsc.c	(revision 298446)
@@ -1,1109 +1,1109 @@
 /*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /**
  * HyperV vmbus network VSC (virtual services client) module
  *
  */
 
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/lock.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <machine/bus.h>
 #include <machine/atomic.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include "hv_net_vsc.h"
 #include "hv_rndis.h"
 #include "hv_rndis_filter.h"
 
 /* priv1 and priv2 are consumed by the main driver */
 #define hv_chan_rdbuf	hv_chan_priv3
 
 MALLOC_DEFINE(M_NETVSC, "netvsc", "Hyper-V netvsc driver");
 
 /*
  * Forward declarations
  */
 static void hv_nv_on_channel_callback(void *xchan);
 static int  hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device);
 static int  hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device);
 static int  hv_nv_destroy_send_buffer(netvsc_dev *net_dev);
 static int  hv_nv_destroy_rx_buffer(netvsc_dev *net_dev);
 static int  hv_nv_connect_to_vsp(struct hv_device *device);
 static void hv_nv_on_send_completion(netvsc_dev *net_dev,
     struct hv_device *device, struct hv_vmbus_channel *, hv_vm_packet_descriptor *pkt);
 static void hv_nv_on_receive_completion(struct hv_vmbus_channel *chan,
     uint64_t tid, uint32_t status);
 static void hv_nv_on_receive(netvsc_dev *net_dev,
     struct hv_device *device, struct hv_vmbus_channel *chan,
     hv_vm_packet_descriptor *pkt);
 
 /*
  *
  */
 static inline netvsc_dev *
 hv_nv_alloc_net_device(struct hv_device *device)
 {
 	netvsc_dev *net_dev;
 	hn_softc_t *sc = device_get_softc(device->device);
 
 	net_dev = malloc(sizeof(netvsc_dev), M_NETVSC, M_WAITOK | M_ZERO);
 
 	net_dev->dev = device;
 	net_dev->destroy = FALSE;
 	sc->net_dev = net_dev;
 
 	return (net_dev);
 }
 
 /*
  *
  */
 static inline netvsc_dev *
 hv_nv_get_outbound_net_device(struct hv_device *device)
 {
 	hn_softc_t *sc = device_get_softc(device->device);
 	netvsc_dev *net_dev = sc->net_dev;;
 
 	if ((net_dev != NULL) && net_dev->destroy) {
 		return (NULL);
 	}
 
 	return (net_dev);
 }
 
 /*
  *
  */
 static inline netvsc_dev *
 hv_nv_get_inbound_net_device(struct hv_device *device)
 {
 	hn_softc_t *sc = device_get_softc(device->device);
 	netvsc_dev *net_dev = sc->net_dev;;
 
 	if (net_dev == NULL) {
 		return (net_dev);
 	}
 	/*
 	 * When the device is being destroyed; we only
 	 * permit incoming packets if and only if there
 	 * are outstanding sends.
 	 */
 	if (net_dev->destroy) {
 		return (NULL);
 	}
 
 	return (net_dev);
 }
 
 int
 hv_nv_get_next_send_section(netvsc_dev *net_dev)
 {
 	unsigned long bitsmap_words = net_dev->bitsmap_words;
 	unsigned long *bitsmap = net_dev->send_section_bitsmap;
 	unsigned long idx;
 	int ret = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
 	int i;
 
 	for (i = 0; i < bitsmap_words; i++) {
 		idx = ffsl(~bitsmap[i]);
 		if (0 == idx)
 			continue;
 
 		idx--;
 		KASSERT(i * BITS_PER_LONG + idx < net_dev->send_section_count,
 		    ("invalid i %d and idx %lu", i, idx));
 
 		if (atomic_testandset_long(&bitsmap[i], idx))
 			continue;
 
 		ret = i * BITS_PER_LONG + idx;
 		break;
 	}
 
 	return (ret);
 }
 
 /*
  * Net VSC initialize receive buffer with net VSP
  * 
  * Net VSP:  Network virtual services client, also known as the
  *     Hyper-V extensible switch and the synthetic data path.
  */
 static int 
 hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device)
 {
 	netvsc_dev *net_dev;
 	nvsp_msg *init_pkt;
 	int ret = 0;
 
 	net_dev = hv_nv_get_outbound_net_device(device);
 	if (!net_dev) {
 		return (ENODEV);
 	}
 
 	net_dev->rx_buf = contigmalloc(net_dev->rx_buf_size, M_NETVSC,
 	    M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
 
 	/*
 	 * Establish the GPADL handle for this buffer on this channel.
 	 * Note:  This call uses the vmbus connection rather than the
 	 * channel to establish the gpadl handle. 
 	 * GPADL:  Guest physical address descriptor list.
 	 */
 	ret = hv_vmbus_channel_establish_gpadl(
 		device->channel, net_dev->rx_buf,
 		net_dev->rx_buf_size, &net_dev->rx_buf_gpadl_handle);
 	if (ret != 0) {
 		goto cleanup;
 	}
 	
 	/* sema_wait(&ext->channel_init_sema); KYS CHECK */
 
 	/* Notify the NetVsp of the gpadl handle */
 	init_pkt = &net_dev->channel_init_packet;
 
 	memset(init_pkt, 0, sizeof(nvsp_msg));
 
 	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_rx_buf;
 	init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
 	    net_dev->rx_buf_gpadl_handle;
 	init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
 	    NETVSC_RECEIVE_BUFFER_ID;
 
 	/* Send the gpadl notification request */
 
 	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
 	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	sema_wait(&net_dev->channel_init_sema);
 
 	/* Check the response */
 	if (init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.status
 	    != nvsp_status_success) {
 		ret = EINVAL;
 		goto cleanup;
 	}
 
 	net_dev->rx_section_count =
 	    init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections;
 
 	net_dev->rx_sections = malloc(net_dev->rx_section_count *
 	    sizeof(nvsp_1_rx_buf_section), M_NETVSC, M_WAITOK);
 	memcpy(net_dev->rx_sections, 
 	    init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections,
 	    net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section));
 
 
 	/*
 	 * For first release, there should only be 1 section that represents
 	 * the entire receive buffer
 	 */
 	if (net_dev->rx_section_count != 1
 	    || net_dev->rx_sections->offset != 0) {
 		ret = EINVAL;
 		goto cleanup;
 	}
 
 	goto exit;
 
 cleanup:
 	hv_nv_destroy_rx_buffer(net_dev);
 	
 exit:
 	return (ret);
 }
 
 /*
  * Net VSC initialize send buffer with net VSP
  */
 static int 
 hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device)
 {
 	netvsc_dev *net_dev;
 	nvsp_msg *init_pkt;
 	int ret = 0;
 
 	net_dev = hv_nv_get_outbound_net_device(device);
 	if (!net_dev) {
 		return (ENODEV);
 	}
 
 	net_dev->send_buf  = contigmalloc(net_dev->send_buf_size, M_NETVSC,
 	    M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
 	if (net_dev->send_buf == NULL) {
 		ret = ENOMEM;
 		goto cleanup;
 	}
 
 	/*
 	 * Establish the gpadl handle for this buffer on this channel.
 	 * Note:  This call uses the vmbus connection rather than the
 	 * channel to establish the gpadl handle. 
 	 */
 	ret = hv_vmbus_channel_establish_gpadl(device->channel,
   	    net_dev->send_buf, net_dev->send_buf_size,
 	    &net_dev->send_buf_gpadl_handle);
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	/* Notify the NetVsp of the gpadl handle */
 
 	init_pkt = &net_dev->channel_init_packet;
 
 	memset(init_pkt, 0, sizeof(nvsp_msg));
 
 	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_send_buf;
 	init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
 	    net_dev->send_buf_gpadl_handle;
 	init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
 	    NETVSC_SEND_BUFFER_ID;
 
 	/* Send the gpadl notification request */
 
 	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
   	    sizeof(nvsp_msg), (uint64_t)init_pkt,
 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	sema_wait(&net_dev->channel_init_sema);
 
 	/* Check the response */
 	if (init_pkt->msgs.vers_1_msgs.send_send_buf_complete.status
 	    != nvsp_status_success) {
 		ret = EINVAL;
 		goto cleanup;
 	}
 
 	net_dev->send_section_size =
 	    init_pkt->msgs.vers_1_msgs.send_send_buf_complete.section_size;
 	net_dev->send_section_count =
 	    net_dev->send_buf_size / net_dev->send_section_size;
 	net_dev->bitsmap_words = howmany(net_dev->send_section_count,
 	    BITS_PER_LONG);
 	net_dev->send_section_bitsmap =
 	    malloc(net_dev->bitsmap_words * sizeof(long), M_NETVSC,
 	    M_WAITOK | M_ZERO);
 
 	goto exit;
 
 cleanup:
 	hv_nv_destroy_send_buffer(net_dev);
 	
 exit:
 	return (ret);
 }
 
 /*
  * Net VSC destroy receive buffer
  */
 static int
 hv_nv_destroy_rx_buffer(netvsc_dev *net_dev)
 {
 	nvsp_msg *revoke_pkt;
 	int ret = 0;
 
 	/*
 	 * If we got a section count, it means we received a
 	 * send_rx_buf_complete msg 
 	 * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
 	 * we need to send a revoke msg here
 	 */
 	if (net_dev->rx_section_count) {
 		/* Send the revoke receive buffer */
 		revoke_pkt = &net_dev->revoke_packet;
 		memset(revoke_pkt, 0, sizeof(nvsp_msg));
 
 		revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_rx_buf;
 		revoke_pkt->msgs.vers_1_msgs.revoke_rx_buf.id =
 		    NETVSC_RECEIVE_BUFFER_ID;
 
 		ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
 		    revoke_pkt, sizeof(nvsp_msg),
 		    (uint64_t)(uintptr_t)revoke_pkt,
 		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 
 		/*
 		 * If we failed here, we might as well return and have a leak 
 		 * rather than continue and a bugchk
 		 */
 		if (ret != 0) {
 			return (ret);
 		}
 	}
 		
 	/* Tear down the gpadl on the vsp end */
 	if (net_dev->rx_buf_gpadl_handle) {
 		ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
 		    net_dev->rx_buf_gpadl_handle);
 		/*
 		 * If we failed here, we might as well return and have a leak 
 		 * rather than continue and a bugchk
 		 */
 		if (ret != 0) {
 			return (ret);
 		}
 		net_dev->rx_buf_gpadl_handle = 0;
 	}
 
 	if (net_dev->rx_buf) {
 		/* Free up the receive buffer */
 		contigfree(net_dev->rx_buf, net_dev->rx_buf_size, M_NETVSC);
 		net_dev->rx_buf = NULL;
 	}
 
 	if (net_dev->rx_sections) {
 		free(net_dev->rx_sections, M_NETVSC);
 		net_dev->rx_sections = NULL;
 		net_dev->rx_section_count = 0;
 	}
 
 	return (ret);
 }
 
 /*
  * Net VSC destroy send buffer
  */
 static int
 hv_nv_destroy_send_buffer(netvsc_dev *net_dev)
 {
 	nvsp_msg *revoke_pkt;
 	int ret = 0;
 
 	/*
 	 * If we got a section count, it means we received a
 	 * send_rx_buf_complete msg 
 	 * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
 	 * we need to send a revoke msg here
 	 */
 	if (net_dev->send_section_size) {
 		/* Send the revoke send buffer */
 		revoke_pkt = &net_dev->revoke_packet;
 		memset(revoke_pkt, 0, sizeof(nvsp_msg));
 
 		revoke_pkt->hdr.msg_type =
 		    nvsp_msg_1_type_revoke_send_buf;
 		revoke_pkt->msgs.vers_1_msgs.revoke_send_buf.id =
 		    NETVSC_SEND_BUFFER_ID;
 
 		ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
 		    revoke_pkt, sizeof(nvsp_msg),
 		    (uint64_t)(uintptr_t)revoke_pkt,
 		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 		/*
 		 * If we failed here, we might as well return and have a leak 
 		 * rather than continue and a bugchk
 		 */
 		if (ret != 0) {
 			return (ret);
 		}
 	}
 		
 	/* Tear down the gpadl on the vsp end */
 	if (net_dev->send_buf_gpadl_handle) {
 		ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
 		    net_dev->send_buf_gpadl_handle);
 
 		/*
 		 * If we failed here, we might as well return and have a leak 
 		 * rather than continue and a bugchk
 		 */
 		if (ret != 0) {
 			return (ret);
 		}
 		net_dev->send_buf_gpadl_handle = 0;
 	}
 
 	if (net_dev->send_buf) {
 		/* Free up the receive buffer */
 		contigfree(net_dev->send_buf, net_dev->send_buf_size, M_NETVSC);
 		net_dev->send_buf = NULL;
 	}
 
 	if (net_dev->send_section_bitsmap) {
 		free(net_dev->send_section_bitsmap, M_NETVSC);
 	}
 
 	return (ret);
 }
 
 
 /*
  * Attempt to negotiate the caller-specified NVSP version
  *
  * For NVSP v2, Server 2008 R2 does not set
  * init_pkt->msgs.init_msgs.init_compl.negotiated_prot_vers
  * to the negotiated version, so we cannot rely on that.
  */
 static int
 hv_nv_negotiate_nvsp_protocol(struct hv_device *device, netvsc_dev *net_dev,
     uint32_t nvsp_ver)
 {
 	nvsp_msg *init_pkt;
 	int ret;
 
 	init_pkt = &net_dev->channel_init_packet;
 	memset(init_pkt, 0, sizeof(nvsp_msg));
 	init_pkt->hdr.msg_type = nvsp_msg_type_init;
 
 	/*
 	 * Specify parameter as the only acceptable protocol version
 	 */
 	init_pkt->msgs.init_msgs.init.p1.protocol_version = nvsp_ver;
 	init_pkt->msgs.init_msgs.init.protocol_version_2 = nvsp_ver;
 
 	/* Send the init request */
 	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
 	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	if (ret != 0)
 		return (-1);
 
 	sema_wait(&net_dev->channel_init_sema);
 
 	if (init_pkt->msgs.init_msgs.init_compl.status != nvsp_status_success)
 		return (EINVAL);
 
 	return (0);
 }
 
 /*
  * Send NDIS version 2 config packet containing MTU.
  *
  * Not valid for NDIS version 1.
  */
 static int
 hv_nv_send_ndis_config(struct hv_device *device, uint32_t mtu)
 {
 	netvsc_dev *net_dev;
 	nvsp_msg *init_pkt;
 	int ret;
 
 	net_dev = hv_nv_get_outbound_net_device(device);
 	if (!net_dev)
 		return (-ENODEV);
 
 	/*
 	 * Set up configuration packet, write MTU
 	 * Indicate we are capable of handling VLAN tags
 	 */
 	init_pkt = &net_dev->channel_init_packet;
 	memset(init_pkt, 0, sizeof(nvsp_msg));
 	init_pkt->hdr.msg_type = nvsp_msg_2_type_send_ndis_config;
 	init_pkt->msgs.vers_2_msgs.send_ndis_config.mtu = mtu;
 	init_pkt->
 		msgs.vers_2_msgs.send_ndis_config.capabilities.u1.u2.ieee8021q
 		= 1;
 
 	/* Send the configuration packet */
 	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
 	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 	if (ret != 0)
 		return (-EINVAL);
 
 	return (0);
 }
 
 /*
  * Net VSC connect to VSP
  */
 static int
 hv_nv_connect_to_vsp(struct hv_device *device)
 {
 	netvsc_dev *net_dev;
 	nvsp_msg *init_pkt;
 	uint32_t ndis_version;
 	uint32_t protocol_list[] = { NVSP_PROTOCOL_VERSION_1,
 	    NVSP_PROTOCOL_VERSION_2,
 	    NVSP_PROTOCOL_VERSION_4,
 	    NVSP_PROTOCOL_VERSION_5 };
 	int i;
 	int protocol_number = nitems(protocol_list);
 	int ret = 0;
 	device_t dev = device->device;
 	hn_softc_t *sc = device_get_softc(dev);
 	struct ifnet *ifp = sc->hn_ifp;
 
 	net_dev = hv_nv_get_outbound_net_device(device);
 	if (!net_dev) {
 		return (ENODEV);
 	}
 
 	/*
 	 * Negotiate the NVSP version.  Try the latest NVSP first.
 	 */
 	for (i = protocol_number - 1; i >= 0; i--) {
 		if (hv_nv_negotiate_nvsp_protocol(device, net_dev,
 		    protocol_list[i]) == 0) {
 			net_dev->nvsp_version = protocol_list[i];
 			if (bootverbose)
 				device_printf(dev, "Netvsc: got version 0x%x\n",
 				    net_dev->nvsp_version);
 			break;
 		}
 	}
 
 	if (i < 0) {
 		if (bootverbose)
 			device_printf(dev, "failed to negotiate a valid "
 			    "protocol.\n");
 		return (EPROTO);
 	}
 
 	/*
 	 * Set the MTU if supported by this NVSP protocol version
 	 * This needs to be right after the NVSP init message per Haiyang
 	 */
 	if (net_dev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
 		ret = hv_nv_send_ndis_config(device, ifp->if_mtu);
 
 	/*
 	 * Send the NDIS version
 	 */
 	init_pkt = &net_dev->channel_init_packet;
 
 	memset(init_pkt, 0, sizeof(nvsp_msg));
 
 	if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_4) {
 		ndis_version = NDIS_VERSION_6_1;
 	} else {
 		ndis_version = NDIS_VERSION_6_30;
 	}
 
 	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_ndis_vers;
 	init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_major_vers =
 	    (ndis_version & 0xFFFF0000) >> 16;
 	init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_minor_vers =
 	    ndis_version & 0xFFFF;
 
 	/* Send the init request */
 
 	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
 	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 	if (ret != 0) {
 		goto cleanup;
 	}
 	/*
 	 * TODO:  BUGBUG - We have to wait for the above msg since the netvsp
 	 * uses KMCL which acknowledges packet (completion packet) 
 	 * since our Vmbus always set the
 	 * HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED flag
 	 */
 	/* sema_wait(&NetVscChannel->channel_init_sema); */
 
 	/* Post the big receive buffer to NetVSP */
 	if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
 		net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
 	else
 		net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
 	net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
 
 	ret = hv_nv_init_rx_buffer_with_net_vsp(device);
 	if (ret == 0)
 		ret = hv_nv_init_send_buffer_with_net_vsp(device);
 
 cleanup:
 	return (ret);
 }
 
 /*
  * Net VSC disconnect from VSP
  */
 static void
 hv_nv_disconnect_from_vsp(netvsc_dev *net_dev)
 {
 	hv_nv_destroy_rx_buffer(net_dev);
 	hv_nv_destroy_send_buffer(net_dev);
 }
 
 /*
  * Callback handler for subchannel offer
  * @@param context new subchannel
  */
 static void
 hv_nv_subchan_callback(void *xchan)
 {
 	struct hv_vmbus_channel *chan = xchan;
 	netvsc_dev *net_dev;
 	uint16_t chn_index = chan->offer_msg.offer.sub_channel_index;
 	struct hv_device *device = chan->device;
 	hn_softc_t *sc = device_get_softc(device->device);
 	int ret;
 
 	net_dev = sc->net_dev;
 
 	if (chn_index >= net_dev->num_channel) {
 		/* Would this ever happen? */
 		return;
 	}
 	netvsc_subchan_callback(sc, chan);
 
 	chan->hv_chan_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK);
 	ret = hv_vmbus_channel_open(chan, NETVSC_DEVICE_RING_BUFFER_SIZE,
 	    NETVSC_DEVICE_RING_BUFFER_SIZE, NULL, 0,
 	    hv_nv_on_channel_callback, chan);
 }
 
 /*
  * Net VSC on device add
  * 
  * Callback when the device belonging to this driver is added
  */
 netvsc_dev *
 hv_nv_on_device_add(struct hv_device *device, void *additional_info)
 {
 	struct hv_vmbus_channel *chan = device->channel;
 	netvsc_dev *net_dev;
 	int ret = 0;
 
 	net_dev = hv_nv_alloc_net_device(device);
 	if (net_dev == NULL)
 		return NULL;
 
 	/* Initialize the NetVSC channel extension */
 
 	sema_init(&net_dev->channel_init_sema, 0, "netdev_sema");
 
 	chan->hv_chan_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK);
 
 	/*
 	 * Open the channel
 	 */
 	ret = hv_vmbus_channel_open(chan,
 	    NETVSC_DEVICE_RING_BUFFER_SIZE, NETVSC_DEVICE_RING_BUFFER_SIZE,
 	    NULL, 0, hv_nv_on_channel_callback, chan);
 	if (ret != 0) {
 		free(chan->hv_chan_rdbuf, M_NETVSC);
 		goto cleanup;
 	}
 	chan->sc_creation_callback = hv_nv_subchan_callback;
 
 	/*
 	 * Connect with the NetVsp
 	 */
 	ret = hv_nv_connect_to_vsp(device);
 	if (ret != 0)
 		goto close;
 
 	return (net_dev);
 
 close:
 	/* Now, we can close the channel safely */
 	free(chan->hv_chan_rdbuf, M_NETVSC);
 	hv_vmbus_channel_close(chan);
 
 cleanup:
 	/*
 	 * Free the packet buffers on the netvsc device packet queue.
 	 * Release other resources.
 	 */
 	sema_destroy(&net_dev->channel_init_sema);
 	free(net_dev, M_NETVSC);
 
 	return (NULL);
 }
 
 /*
  * Net VSC on device remove
  */
 int
 hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel)
 {
 	hn_softc_t *sc = device_get_softc(device->device);
 	netvsc_dev *net_dev = sc->net_dev;;
 	
 	/* Stop outbound traffic ie sends and receives completions */
 	net_dev->destroy = TRUE;
 
 	hv_nv_disconnect_from_vsp(net_dev);
 
 	/* At this point, no one should be accessing net_dev except in here */
 
 	/* Now, we can close the channel safely */
 
 	if (!destroy_channel) {
 		device->channel->state =
 		    HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE;
 	}
 
 	free(device->channel->hv_chan_rdbuf, M_NETVSC);
 	hv_vmbus_channel_close(device->channel);
 
 	sema_destroy(&net_dev->channel_init_sema);
 	free(net_dev, M_NETVSC);
 
 	return (0);
 }
 
 /*
  * Net VSC on send completion
  */
 static void
 hv_nv_on_send_completion(netvsc_dev *net_dev,
     struct hv_device *device, struct hv_vmbus_channel *chan,
     hv_vm_packet_descriptor *pkt)
 {
 	nvsp_msg *nvsp_msg_pkt;
 	netvsc_packet *net_vsc_pkt;
 
 	nvsp_msg_pkt =
 	    (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));
 
 	if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_type_init_complete
 		|| nvsp_msg_pkt->hdr.msg_type
 			== nvsp_msg_1_type_send_rx_buf_complete
 		|| nvsp_msg_pkt->hdr.msg_type
 			== nvsp_msg_1_type_send_send_buf_complete
 		|| nvsp_msg_pkt->hdr.msg_type
 			== nvsp_msg5_type_subchannel) {
 		/* Copy the response back */
 		memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt,
 		    sizeof(nvsp_msg));
 		sema_post(&net_dev->channel_init_sema);
 	} else if (nvsp_msg_pkt->hdr.msg_type ==
 		    nvsp_msg_1_type_send_rndis_pkt_complete) {
 		/* Get the send context */
 		net_vsc_pkt =
 		    (netvsc_packet *)(unsigned long)pkt->transaction_id;
 		if (NULL != net_vsc_pkt) {
 			if (net_vsc_pkt->send_buf_section_idx !=
 			    NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
 				u_long mask;
 				int idx;
 
 				idx = net_vsc_pkt->send_buf_section_idx /
 				    BITS_PER_LONG;
 				KASSERT(idx < net_dev->bitsmap_words,
 				    ("invalid section index %u",
 				     net_vsc_pkt->send_buf_section_idx));
 				mask = 1UL <<
 				    (net_vsc_pkt->send_buf_section_idx %
 				     BITS_PER_LONG);
 
 				KASSERT(net_dev->send_section_bitsmap[idx] &
 				    mask,
 				    ("index bitmap 0x%lx, section index %u, "
 				     "bitmap idx %d, bitmask 0x%lx",
 				     net_dev->send_section_bitsmap[idx],
 				     net_vsc_pkt->send_buf_section_idx,
 				     idx, mask));
 				atomic_clear_long(
 				    &net_dev->send_section_bitsmap[idx], mask);
 			}
 			
 			/* Notify the layer above us */
 			net_vsc_pkt->compl.send.on_send_completion(chan,
 			    net_vsc_pkt->compl.send.send_completion_context);
 
 		}
 	}
 }
 
 /*
  * Net VSC on send
  * Sends a packet on the specified Hyper-V device.
  * Returns 0 on success, non-zero on failure.
  */
 int
 hv_nv_on_send(struct hv_vmbus_channel *chan, netvsc_packet *pkt)
 {
 	nvsp_msg send_msg;
 	int ret;
 
 	send_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt;
 	if (pkt->is_data_pkt) {
 		/* 0 is RMC_DATA */
 		send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 0;
 	} else {
 		/* 1 is RMC_CONTROL */
 		send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 1;
 	}
 
 	send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_idx =
 	    pkt->send_buf_section_idx;
 	send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size =
 	    pkt->send_buf_section_size;
 
 	if (pkt->page_buf_count) {
 		ret = hv_vmbus_channel_send_packet_pagebuffer(chan,
 		    pkt->page_buffers, pkt->page_buf_count,
 		    &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt);
 	} else {
 		ret = hv_vmbus_channel_send_packet(chan,
 		    &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt,
 		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 		    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	}
 
 	return (ret);
 }
 
 /*
  * Net VSC on receive
  *
  * In the FreeBSD Hyper-V virtual world, this function deals exclusively
  * with virtual addresses.
  */
 static void
 hv_nv_on_receive(netvsc_dev *net_dev, struct hv_device *device,
     struct hv_vmbus_channel *chan, hv_vm_packet_descriptor *pkt)
 {
 	hv_vm_transfer_page_packet_header *vm_xfer_page_pkt;
 	nvsp_msg *nvsp_msg_pkt;
 	netvsc_packet vsc_pkt;
 	netvsc_packet *net_vsc_pkt = &vsc_pkt;
 	device_t dev = device->device;
 	int count = 0;
 	int i = 0;
 	int status = nvsp_status_success;
 
 	/*
 	 * All inbound packets other than send completion should be
 	 * xfer page packet.
 	 */
 	if (pkt->type != HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES) {
 		device_printf(dev, "packet type %d is invalid!\n", pkt->type);
 		return;
 	}
 
 	nvsp_msg_pkt = (nvsp_msg *)((unsigned long)pkt
 		+ (pkt->data_offset8 << 3));
 
 	/* Make sure this is a valid nvsp packet */
 	if (nvsp_msg_pkt->hdr.msg_type != nvsp_msg_1_type_send_rndis_pkt) {
 		device_printf(dev, "packet hdr type %d is invalid!\n",
 		    pkt->type);
 		return;
 	}
 	
 	vm_xfer_page_pkt = (hv_vm_transfer_page_packet_header *)pkt;
 
 	if (vm_xfer_page_pkt->transfer_page_set_id !=
 	    NETVSC_RECEIVE_BUFFER_ID) {
 		device_printf(dev, "transfer_page_set_id %d is invalid!\n",
 		    vm_xfer_page_pkt->transfer_page_set_id);
 		return;
 	}
 
 	count = vm_xfer_page_pkt->range_count;
 	net_vsc_pkt->device = device;
 
 	/* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
 	for (i = 0; i < count; i++) {
 		net_vsc_pkt->status = nvsp_status_success;
 		net_vsc_pkt->data = (void *)((unsigned long)net_dev->rx_buf +
 		    vm_xfer_page_pkt->ranges[i].byte_offset);
 		net_vsc_pkt->tot_data_buf_len = 
 		    vm_xfer_page_pkt->ranges[i].byte_count;
 
 		hv_rf_on_receive(net_dev, device, chan, net_vsc_pkt);
 		if (net_vsc_pkt->status != nvsp_status_success) {
 			status = nvsp_status_failure;
 		}
 	}
 	
 	/*
 	 * Moved completion call back here so that all received 
 	 * messages (not just data messages) will trigger a response
 	 * message back to the host.
 	 */
 	hv_nv_on_receive_completion(chan, vm_xfer_page_pkt->d.transaction_id,
 	    status);
 }
 
 /*
  * Net VSC on receive completion
  *
  * Send a receive completion packet to RNDIS device (ie NetVsp)
  */
 static void
 hv_nv_on_receive_completion(struct hv_vmbus_channel *chan, uint64_t tid,
     uint32_t status)
 {
 	nvsp_msg rx_comp_msg;
 	int retries = 0;
 	int ret = 0;
 	
 	rx_comp_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt_complete;
 
 	/* Pass in the status */
 	rx_comp_msg.msgs.vers_1_msgs.send_rndis_pkt_complete.status =
 	    status;
 
 retry_send_cmplt:
 	/* Send the completion */
 	ret = hv_vmbus_channel_send_packet(chan, &rx_comp_msg,
 	    sizeof(nvsp_msg), tid, HV_VMBUS_PACKET_TYPE_COMPLETION, 0);
 	if (ret == 0) {
 		/* success */
 		/* no-op */
 	} else if (ret == EAGAIN) {
 		/* no more room... wait a bit and attempt to retry 3 times */
 		retries++;
 
 		if (retries < 4) {
 			DELAY(100);
 			goto retry_send_cmplt;
 		}
 	}
 }
 
 /*
  * Net VSC receiving vRSS send table from VSP
  */
 static void
 hv_nv_send_table(struct hv_device *device, hv_vm_packet_descriptor *pkt)
 {
 	netvsc_dev *net_dev;
 	nvsp_msg *nvsp_msg_pkt;
 	int i;
 	uint32_t count, *table;
 
 	net_dev = hv_nv_get_inbound_net_device(device);
 	if (!net_dev)
         	return;
 
 	nvsp_msg_pkt =
 	    (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));
 
 	if (nvsp_msg_pkt->hdr.msg_type !=
 	    nvsp_msg5_type_send_indirection_table) {
 		printf("Netvsc: !Warning! receive msg type not "
 			"send_indirection_table. type = %d\n",
 			nvsp_msg_pkt->hdr.msg_type);
 		return;
 	}
 
 	count = nvsp_msg_pkt->msgs.vers_5_msgs.send_table.count;
 	if (count != VRSS_SEND_TABLE_SIZE) {
         	printf("Netvsc: Received wrong send table size: %u\n", count);
 	        return;
 	}
 
 	table = (uint32_t *)
 	    ((unsigned long)&nvsp_msg_pkt->msgs.vers_5_msgs.send_table +
 	     nvsp_msg_pkt->msgs.vers_5_msgs.send_table.offset);
 
 	for (i = 0; i < count; i++)
         	net_dev->vrss_send_table[i] = table[i];
 }
 
 /*
  * Net VSC on channel callback
  */
 static void
 hv_nv_on_channel_callback(void *xchan)
 {
 	struct hv_vmbus_channel *chan = xchan;
 	struct hv_device *device = chan->device;
 	netvsc_dev *net_dev;
 	device_t dev = device->device;
 	uint32_t bytes_rxed;
 	uint64_t request_id;
  	hv_vm_packet_descriptor *desc;
 	uint8_t *buffer;
 	int bufferlen = NETVSC_PACKET_SIZE;
 	int ret = 0;
 
 	net_dev = hv_nv_get_inbound_net_device(device);
 	if (net_dev == NULL)
 		return;
 
 	buffer = chan->hv_chan_rdbuf;
 
 	do {
 		ret = hv_vmbus_channel_recv_packet_raw(chan,
 		    buffer, bufferlen, &bytes_rxed, &request_id);
 		if (ret == 0) {
 			if (bytes_rxed > 0) {
 				desc = (hv_vm_packet_descriptor *)buffer;
 				switch (desc->type) {
 				case HV_VMBUS_PACKET_TYPE_COMPLETION:
 					hv_nv_on_send_completion(net_dev, device,
 					    chan, desc);
 					break;
 				case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
 					hv_nv_on_receive(net_dev, device, chan, desc);
 					break;
 				case HV_VMBUS_PACKET_TYPE_DATA_IN_BAND:
 					hv_nv_send_table(device, desc);
 					break;
 				default:
 					device_printf(dev,
 					    "hv_cb recv unknow type %d "
 					    " packet\n", desc->type);
 					break;
 				}
 			} else {
 				break;
 			}
 		} else if (ret == ENOBUFS) {
 			/* Handle large packet */
 			if (bufferlen > NETVSC_PACKET_SIZE) {
 				free(buffer, M_NETVSC);
 				buffer = NULL;
 			}
 
 			/* alloc new buffer */
 			buffer = malloc(bytes_rxed, M_NETVSC, M_NOWAIT);
 			if (buffer == NULL) {
 				device_printf(dev,
 				    "hv_cb malloc buffer failed, len=%u\n",
 				    bytes_rxed);
 				bufferlen = 0;
 				break;
 			}
 			bufferlen = bytes_rxed;
 		}
 	} while (1);
 
 	if (bufferlen > NETVSC_PACKET_SIZE)
 		free(buffer, M_NETVSC);
 
 	hv_rf_channel_rollup(chan);
 }
Index: head/sys/dev/hyperv/netvsc/hv_net_vsc.h
===================================================================
--- head/sys/dev/hyperv/netvsc/hv_net_vsc.h	(revision 298445)
+++ head/sys/dev/hyperv/netvsc/hv_net_vsc.h	(revision 298446)
@@ -1,1271 +1,1271 @@
 /*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * HyperV vmbus (virtual machine bus) network VSC (virtual services client)
  * header file
  *
  * (Updated from unencumbered NvspProtocol.h)
  */
 
 #ifndef __HV_NET_VSC_H__
 #define __HV_NET_VSC_H__
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/taskqueue.h>
 #include <sys/sx.h>
 
 #include <machine/bus.h>
 #include <sys/bus.h>
 #include <sys/bus_dma.h>
 
 #include <netinet/in.h>
 #include <netinet/tcp_lro.h>
 
 #include <net/if.h>
 #include <net/if_media.h>
 
 #include <dev/hyperv/include/hyperv.h>
 
 #define HN_USE_TXDESC_BUFRING
 
 MALLOC_DECLARE(M_NETVSC);
 
 #define NVSP_INVALID_PROTOCOL_VERSION           (0xFFFFFFFF)
 
 #define NVSP_PROTOCOL_VERSION_1                 2
 #define NVSP_PROTOCOL_VERSION_2                 0x30002
 #define NVSP_PROTOCOL_VERSION_4                 0x40000
 #define NVSP_PROTOCOL_VERSION_5                 0x50000
 #define NVSP_MIN_PROTOCOL_VERSION               (NVSP_PROTOCOL_VERSION_1)
 #define NVSP_MAX_PROTOCOL_VERSION               (NVSP_PROTOCOL_VERSION_2)
 
 #define NVSP_PROTOCOL_VERSION_CURRENT           NVSP_PROTOCOL_VERSION_2
 
 #define VERSION_4_OFFLOAD_SIZE                  22
 
 #define NVSP_OPERATIONAL_STATUS_OK              (0x00000000)
 #define NVSP_OPERATIONAL_STATUS_DEGRADED        (0x00000001)
 #define NVSP_OPERATIONAL_STATUS_NONRECOVERABLE  (0x00000002)
 #define NVSP_OPERATIONAL_STATUS_NO_CONTACT      (0x00000003)
 #define NVSP_OPERATIONAL_STATUS_LOST_COMMUNICATION (0x00000004)
 
 /*
  * Maximun number of transfer pages (packets) the VSP will use on a receive
  */
 #define NVSP_MAX_PACKETS_PER_RECEIVE            375
 
 /* vRSS stuff */
 #define RNDIS_OBJECT_TYPE_RSS_CAPABILITIES      0x88
 #define RNDIS_OBJECT_TYPE_RSS_PARAMETERS        0x89
 
 #define RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2     2
 #define RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2       2
 
 struct rndis_obj_header {
         uint8_t type;
         uint8_t rev;
         uint16_t size;
 } __packed;
 
 /* rndis_recv_scale_cap/cap_flag */
 #define RNDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS      0x01000000
 #define RNDIS_RSS_CAPS_CLASSIFICATION_AT_ISR            0x02000000
 #define RNDIS_RSS_CAPS_CLASSIFICATION_AT_DPC            0x04000000
 #define RNDIS_RSS_CAPS_USING_MSI_X                      0x08000000
 #define RNDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS           0x10000000
 #define RNDIS_RSS_CAPS_SUPPORTS_MSI_X                   0x20000000
 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4               0x00000100
 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6               0x00000200
 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX            0x00000400
 
 /* RNDIS_RECEIVE_SCALE_CAPABILITIES */
 struct rndis_recv_scale_cap {
         struct rndis_obj_header hdr;
         uint32_t cap_flag;
         uint32_t num_int_msg;
         uint32_t num_recv_que;
         uint16_t num_indirect_tabent;
 } __packed;
 
 /* rndis_recv_scale_param flags */
 #define RNDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED         0x0001
 #define RNDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED        0x0002
 #define RNDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED           0x0004
 #define RNDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED         0x0008
 #define RNDIS_RSS_PARAM_FLAG_DISABLE_RSS                0x0010
 
 /* Hash info bits */
 #define RNDIS_HASH_FUNC_TOEPLITZ                0x00000001
 #define RNDIS_HASH_IPV4                         0x00000100
 #define RNDIS_HASH_TCP_IPV4                     0x00000200
 #define RNDIS_HASH_IPV6                         0x00000400
 #define RNDIS_HASH_IPV6_EX                      0x00000800
 #define RNDIS_HASH_TCP_IPV6                     0x00001000
 #define RNDIS_HASH_TCP_IPV6_EX                  0x00002000
 
 #define RNDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4)
 #define RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2   40
 
 #define ITAB_NUM                                        128
 #define HASH_KEYLEN RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
 
 /* RNDIS_RECEIVE_SCALE_PARAMETERS */
 typedef struct rndis_recv_scale_param_ {
         struct rndis_obj_header hdr;
 
         /* Qualifies the rest of the information */
         uint16_t flag;
 
         /* The base CPU number to do receive processing. not used */
         uint16_t base_cpu_number;
 
         /* This describes the hash function and type being enabled */
         uint32_t hashinfo;
 
         /* The size of indirection table array */
         uint16_t indirect_tabsize;
 
         /* The offset of the indirection table from the beginning of this
          * structure
          */
         uint32_t indirect_taboffset;
 
         /* The size of the hash secret key */
         uint16_t hashkey_size;
 
         /* The offset of the secret key from the beginning of this structure */
         uint32_t hashkey_offset;
 
         uint32_t processor_masks_offset;
         uint32_t num_processor_masks;
         uint32_t processor_masks_entry_size;
 } rndis_recv_scale_param;
 
 typedef enum nvsp_msg_type_ {
 	nvsp_msg_type_none                      = 0,
 
 	/*
 	 * Init Messages
 	 */
 	nvsp_msg_type_init                      = 1,
 	nvsp_msg_type_init_complete             = 2,
 
 	nvsp_version_msg_start                  = 100,
 
 	/*
 	 * Version 1 Messages
 	 */
 	nvsp_msg_1_type_send_ndis_vers          = nvsp_version_msg_start,
 
 	nvsp_msg_1_type_send_rx_buf,
 	nvsp_msg_1_type_send_rx_buf_complete,
 	nvsp_msg_1_type_revoke_rx_buf,
 
 	nvsp_msg_1_type_send_send_buf,
 	nvsp_msg_1_type_send_send_buf_complete,
 	nvsp_msg_1_type_revoke_send_buf,
 
 	nvsp_msg_1_type_send_rndis_pkt,
 	nvsp_msg_1_type_send_rndis_pkt_complete,
     
 	/*
 	 * Version 2 Messages
 	 */
 	nvsp_msg_2_type_send_chimney_delegated_buf,
 	nvsp_msg_2_type_send_chimney_delegated_buf_complete,
 	nvsp_msg_2_type_revoke_chimney_delegated_buf,
 
 	nvsp_msg_2_type_resume_chimney_rx_indication,
 
 	nvsp_msg_2_type_terminate_chimney,
 	nvsp_msg_2_type_terminate_chimney_complete,
 
 	nvsp_msg_2_type_indicate_chimney_event,
 
 	nvsp_msg_2_type_send_chimney_packet,
 	nvsp_msg_2_type_send_chimney_packet_complete,
 
 	nvsp_msg_2_type_post_chimney_rx_request,
 	nvsp_msg_2_type_post_chimney_rx_request_complete,
 
 	nvsp_msg_2_type_alloc_rx_buf,
 	nvsp_msg_2_type_alloc_rx_buf_complete,
 
 	nvsp_msg_2_type_free_rx_buf,
 
 	nvsp_msg_2_send_vmq_rndis_pkt,
 	nvsp_msg_2_send_vmq_rndis_pkt_complete,
 
 	nvsp_msg_2_type_send_ndis_config,
 
 	nvsp_msg_2_type_alloc_chimney_handle,
 	nvsp_msg_2_type_alloc_chimney_handle_complete,
 
 	nvsp_msg2_max = nvsp_msg_2_type_alloc_chimney_handle_complete,
 
 	/*
 	 * Version 4 Messages
 	 */
 	nvsp_msg4_type_send_vf_association,
 	nvsp_msg4_type_switch_data_path,
 	nvsp_msg4_type_uplink_connect_state_deprecated,
 
 	nvsp_msg4_max = nvsp_msg4_type_uplink_connect_state_deprecated,
 
 	/*
 	 * Version 5 Messages
 	 */
 	nvsp_msg5_type_oid_query_ex,
 	nvsp_msg5_type_oid_query_ex_comp,
 	nvsp_msg5_type_subchannel,
 	nvsp_msg5_type_send_indirection_table,
 
 	nvsp_msg5_max = nvsp_msg5_type_send_indirection_table,
 } nvsp_msg_type;
 
 typedef enum nvsp_status_ {
 	nvsp_status_none = 0,
 	nvsp_status_success,
 	nvsp_status_failure,
 	/* Deprecated */
 	nvsp_status_prot_vers_range_too_new,
 	/* Deprecated */
 	nvsp_status_prot_vers_range_too_old,
 	nvsp_status_invalid_rndis_pkt,
 	nvsp_status_busy,
 	nvsp_status_max,
 } nvsp_status;
 
 typedef struct nvsp_msg_hdr_ {
 	uint32_t                                msg_type;
 } __packed nvsp_msg_hdr;
 
 /*
  * Init Messages
  */
 
 /*
  * This message is used by the VSC to initialize the channel
  * after the channels has been opened. This message should 
  * never include anything other then versioning (i.e. this
  * message will be the same for ever).
  *
  * Forever is a long time.  The values have been redefined
  * in Win7 to indicate major and minor protocol version
  * number.
  */
 typedef struct nvsp_msg_init_ {
 	union {
 		struct {
 			uint16_t                minor_protocol_version;
 			uint16_t                major_protocol_version;
 		} s;
 		/* Formerly min_protocol_version */
 		uint32_t                        protocol_version;
 	} p1;
 	/* Formerly max_protocol_version */
 	uint32_t                                protocol_version_2;
 } __packed nvsp_msg_init;
 
 /*
  * This message is used by the VSP to complete the initialization
  * of the channel. This message should never include anything other 
  * then versioning (i.e. this message will be the same forever).
  */
 typedef struct nvsp_msg_init_complete_ {
 	/* Deprecated */
 	uint32_t                                negotiated_prot_vers;
 	uint32_t                                max_mdl_chain_len;
 	uint32_t                                status;
 } __packed nvsp_msg_init_complete;
 
 typedef union nvsp_msg_init_uber_ {
 	nvsp_msg_init                           init;
 	nvsp_msg_init_complete                  init_compl;
 } __packed nvsp_msg_init_uber;
 
 /*
  * Version 1 Messages
  */
 
 /*
  * This message is used by the VSC to send the NDIS version
  * to the VSP.  The VSP can use this information when handling
  * OIDs sent by the VSC.
  */
 typedef struct nvsp_1_msg_send_ndis_version_ {
 	uint32_t                                ndis_major_vers;
 	/* Deprecated */
 	uint32_t                                ndis_minor_vers;
 } __packed nvsp_1_msg_send_ndis_version;
 
 /*
  * This message is used by the VSC to send a receive buffer
  * to the VSP. The VSP can then use the receive buffer to
  * send data to the VSC.
  */
 typedef struct nvsp_1_msg_send_rx_buf_ {
 	uint32_t                                gpadl_handle;
 	uint16_t                                id;
 } __packed nvsp_1_msg_send_rx_buf;
 
 typedef struct nvsp_1_rx_buf_section_ {
 	uint32_t                                offset;
 	uint32_t                                sub_allocation_size;
 	uint32_t                                num_sub_allocations;
 	uint32_t                                end_offset;
 } __packed nvsp_1_rx_buf_section;
 
 /*
  * This message is used by the VSP to acknowledge a receive 
  * buffer send by the VSC.  This message must be sent by the 
  * VSP before the VSP uses the receive buffer.
  */
 typedef struct nvsp_1_msg_send_rx_buf_complete_ {
 	uint32_t                                status;
 	uint32_t                                num_sections;
 
 	/*
 	 * The receive buffer is split into two parts, a large
 	 * suballocation section and a small suballocation
 	 * section. These sections are then suballocated by a 
 	 * certain size.
 	 *
 	 * For example, the following break up of the receive
 	 * buffer has 6 large suballocations and 10 small
 	 * suballocations.
 	 *
 	 * |            Large Section          |  |   Small Section   |
 	 * ------------------------------------------------------------
 	 * |     |     |     |     |     |     |  | | | | | | | | | | |
 	 * |                                      |  
 	 * LargeOffset                            SmallOffset
 	 */
 	nvsp_1_rx_buf_section                   sections[1];
 
 } __packed nvsp_1_msg_send_rx_buf_complete;
 
 /*
  * This message is sent by the VSC to revoke the receive buffer.
  * After the VSP completes this transaction, the VSP should never
  * use the receive buffer again.
  */
 typedef struct nvsp_1_msg_revoke_rx_buf_ {
 	uint16_t                                id;
 } __packed nvsp_1_msg_revoke_rx_buf;
 
 /*
  * This message is used by the VSC to send a send buffer
  * to the VSP. The VSC can then use the send buffer to
  * send data to the VSP.
  */
 typedef struct nvsp_1_msg_send_send_buf_ {
 	uint32_t                                gpadl_handle;
 	uint16_t                                id;
 } __packed nvsp_1_msg_send_send_buf;
 
 /*
  * This message is used by the VSP to acknowledge a send 
  * buffer sent by the VSC. This message must be sent by the 
  * VSP before the VSP uses the sent buffer.
  */
 typedef struct nvsp_1_msg_send_send_buf_complete_ {
 	uint32_t                                status;
 
 	/*
 	 * The VSC gets to choose the size of the send buffer and
 	 * the VSP gets to choose the sections size of the buffer.
 	 * This was done to enable dynamic reconfigurations when
 	 * the cost of GPA-direct buffers decreases.
 	 */
 	uint32_t                                section_size;
 } __packed nvsp_1_msg_send_send_buf_complete;
 
 /*
  * This message is sent by the VSC to revoke the send buffer.
  * After the VSP completes this transaction, the vsp should never
  * use the send buffer again.
  */
 typedef struct nvsp_1_msg_revoke_send_buf_ {
 	uint16_t                                id;
 } __packed nvsp_1_msg_revoke_send_buf;
 
 /*
  * This message is used by both the VSP and the VSC to send
  * an RNDIS message to the opposite channel endpoint.
  */
 typedef struct nvsp_1_msg_send_rndis_pkt_ {
 	/*
 	 * This field is specified by RNIDS.  They assume there's
 	 * two different channels of communication. However, 
 	 * the Network VSP only has one.  Therefore, the channel
 	 * travels with the RNDIS packet.
 	 */
 	uint32_t                                chan_type;
 
 	/*
 	 * This field is used to send part or all of the data
 	 * through a send buffer. This values specifies an 
 	 * index into the send buffer.  If the index is 
 	 * 0xFFFFFFFF, then the send buffer is not being used
 	 * and all of the data was sent through other VMBus
 	 * mechanisms.
 	 */
 	uint32_t                                send_buf_section_idx;
 	uint32_t                                send_buf_section_size;
 } __packed nvsp_1_msg_send_rndis_pkt;
 
 /*
  * This message is used by both the VSP and the VSC to complete
  * a RNDIS message to the opposite channel endpoint.  At this
  * point, the initiator of this message cannot use any resources
  * associated with the original RNDIS packet.
  */
 typedef struct nvsp_1_msg_send_rndis_pkt_complete_ {
 	uint32_t                                status;
 } __packed nvsp_1_msg_send_rndis_pkt_complete;
 
 
 /*
  * Version 2 Messages
  */
 
 /*
  * This message is used by the VSC to send the NDIS version
  * to the VSP.  The VSP can use this information when handling
  * OIDs sent by the VSC.
  */
 typedef struct nvsp_2_netvsc_capabilities_ {
 	union {
 		uint64_t                        as_uint64;
 		struct {
 			uint64_t                vmq           : 1;
 			uint64_t                chimney       : 1;
 			uint64_t                sriov         : 1;
 			uint64_t                ieee8021q     : 1;
 			uint64_t                correlationid : 1;
 			uint64_t                teaming       : 1;
 		} u2;
 	} u1;
 } __packed nvsp_2_netvsc_capabilities;
 
 typedef struct nvsp_2_msg_send_ndis_config_ {
 	uint32_t                                mtu;
 	uint32_t                                reserved;
 	nvsp_2_netvsc_capabilities              capabilities;
 } __packed nvsp_2_msg_send_ndis_config;
 
 /*
  * NvspMessage2TypeSendChimneyDelegatedBuffer
  */
 typedef struct nvsp_2_msg_send_chimney_buf_
 {
 	/*
 	 * On WIN7 beta, delegated_obj_max_size is defined as a uint32_t
 	 * Since WIN7 RC, it was split into two uint16_t.  To have the same
 	 * struct layout, delegated_obj_max_size shall be the first field.
 	 */
 	uint16_t                                delegated_obj_max_size;
 
 	/*
 	 * The revision # of chimney protocol used between NVSC and NVSP.
 	 *
 	 * This revision is NOT related to the chimney revision between
 	 * NDIS protocol and miniport drivers.
 	 */
 	uint16_t                                revision;
 
 	uint32_t                                gpadl_handle;
 } __packed nvsp_2_msg_send_chimney_buf;
 
 
 /* Unsupported chimney revision 0 (only present in WIN7 beta) */
 #define NVSP_CHIMNEY_REVISION_0                 0
 
 /* WIN7 Beta Chimney QFE */
 #define NVSP_CHIMNEY_REVISION_1                 1
 
 /* The chimney revision since WIN7 RC */
 #define NVSP_CHIMNEY_REVISION_2                 2
 
 
 /*
  * NvspMessage2TypeSendChimneyDelegatedBufferComplete
  */
 typedef struct nvsp_2_msg_send_chimney_buf_complete_ {
 	uint32_t                                status;
 
 	/*
 	 * Maximum number outstanding sends and pre-posted receives.
 	 *
 	 * NVSC should not post more than SendQuota/ReceiveQuota packets.
 	 * Otherwise, it can block the non-chimney path for an indefinite
 	 * amount of time.
 	 * (since chimney sends/receives are affected by the remote peer).
 	 *
 	 * Note: NVSP enforces the quota restrictions on a per-VMBCHANNEL
 	 * basis.  It doesn't enforce the restriction separately for chimney
 	 * send/receive.  If NVSC doesn't voluntarily enforce "SendQuota",
 	 * it may kill its own network connectivity.
 	 */
 	uint32_t                                send_quota;
 	uint32_t                                rx_quota;
 } __packed nvsp_2_msg_send_chimney_buf_complete;
 
 /*
  * NvspMessage2TypeRevokeChimneyDelegatedBuffer
  */
 typedef struct nvsp_2_msg_revoke_chimney_buf_ {
 	uint32_t                                gpadl_handle;
 } __packed nvsp_2_msg_revoke_chimney_buf;
 
 
 #define NVSP_CHIMNEY_OBJECT_TYPE_NEIGHBOR       0
 #define NVSP_CHIMNEY_OBJECT_TYPE_PATH4          1
 #define NVSP_CHIMNEY_OBJECT_TYPE_PATH6          2
 #define NVSP_CHIMNEY_OBJECT_TYPE_TCP            3
 
 /*
  * NvspMessage2TypeAllocateChimneyHandle
  */
 typedef struct nvsp_2_msg_alloc_chimney_handle_ {
 	uint64_t                                vsc_context;
 	uint32_t                                object_type;
 } __packed nvsp_2_msg_alloc_chimney_handle;
 
 /*
  * NvspMessage2TypeAllocateChimneyHandleComplete
  */
 typedef struct nvsp_2_msg_alloc_chimney_handle_complete_ {
 	uint32_t                                vsp_handle;
 } __packed nvsp_2_msg_alloc_chimney_handle_complete;
 
 
 /*
  * NvspMessage2TypeResumeChimneyRXIndication
  */
 typedef struct nvsp_2_msg_resume_chimney_rx_indication {
 	/*
 	 * Handle identifying the offloaded connection
 	 */
 	uint32_t                                vsp_tcp_handle;
 } __packed nvsp_2_msg_resume_chimney_rx_indication;
 
 
 #define NVSP_2_MSG_TERMINATE_CHIMNEY_FLAGS_FIRST_STAGE      (0x01u)
 #define NVSP_2_MSG_TERMINATE_CHIMNEY_FLAGS_RESERVED         (~(0x01u))
 
 /*
  * NvspMessage2TypeTerminateChimney
  */
 typedef struct nvsp_2_msg_terminate_chimney_ {
 	/*
 	* Handle identifying the offloaded object
 	*/
 	uint32_t                                vsp_handle;
 
 	/*
 	 * Terminate Offload Flags
 	 *     Bit 0:
 	 *         When set to 0, terminate the offload at the destination NIC
 	 *     Bit 1-31:  Reserved, shall be zero
 	 */
 	uint32_t                                flags;
 
 	union {
 		/*
 		 * This field is valid only when bit 0 of flags is clear.
 		 * It specifies the index into the premapped delegated
 		 * object buffer.  The buffer was sent through the
 		 * NvspMessage2TypeSendChimneyDelegatedBuffer
 		 * message at initialization time.
 		 *
 		 * NVSP will write the delegated state into the delegated
 		 * buffer upon upload completion.
 		 */
 		uint32_t                        index;
 
 		/*
 		 * This field is valid only when bit 0 of flags is set.
 		 *
 		 * The seqence number of the most recently accepted RX
 		 * indication when VSC sets its TCP context into
 		 * "terminating" state.
 		 *
 		 * This allows NVSP to determines if there are any in-flight
 		 * RX indications for which the acceptance state is still
 		 * undefined.
 		 */
 		uint64_t                        last_accepted_rx_seq_no;
 	} f0;
 } __packed nvsp_2_msg_terminate_chimney;
 
 
 #define NVSP_TERMINATE_CHIMNEY_COMPLETE_FLAG_DATA_CORRUPTED     0x0000001u
 
 /*
  * NvspMessage2TypeTerminateChimneyComplete
  */
 typedef struct nvsp_2_msg_terminate_chimney_complete_ {
 	uint64_t                                vsc_context;
 	uint32_t                                flags;
 } __packed nvsp_2_msg_terminate_chimney_complete;
 
 /*
  * NvspMessage2TypeIndicateChimneyEvent
  */
 typedef struct nvsp_2_msg_indicate_chimney_event_ {
 	/*
 	 * When VscTcpContext is 0, event_type is an NDIS_STATUS event code
 	 * Otherwise, EventType is an TCP connection event (defined in
 	 * NdisTcpOffloadEventHandler chimney DDK document).
 	 */
 	uint32_t                                event_type;
 
 	/*
 	 * When VscTcpContext is 0, EventType is an NDIS_STATUS event code
 	 * Otherwise, EventType is an TCP connection event specific information
 	 * (defined in NdisTcpOffloadEventHandler chimney DDK document).
 	 */
 	uint32_t                                event_specific_info;
 
 	/*
 	 * If not 0, the event is per-TCP connection event.  This field
 	 * contains the VSC's TCP context.
 	 * If 0, the event indication is global.
 	 */
 	uint64_t                                vsc_tcp_context;
 } __packed nvsp_2_msg_indicate_chimney_event;
 
 
 #define NVSP_1_CHIMNEY_SEND_INVALID_OOB_INDEX       0xffffu
 #define NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX   0xffffffff
 
 /*
  * NvspMessage2TypeSendChimneyPacket
  */
 typedef struct nvsp_2_msg_send_chimney_pkt_ {
     /*
      * Identify the TCP connection for which this chimney send is
      */
     uint32_t                                    vsp_tcp_handle;
 
     /*
      * This field is used to send part or all of the data
      * through a send buffer. This values specifies an
      * index into the send buffer. If the index is
      * 0xFFFF, then the send buffer is not being used
      * and all of the data was sent through other VMBus
      * mechanisms.
      */
     uint16_t                                    send_buf_section_index;
     uint16_t                                    send_buf_section_size;
 
     /*
      * OOB Data Index
      * This an index to the OOB data buffer. If the index is 0xFFFFFFFF,
      * then there is no OOB data.
      *
      * This field shall be always 0xFFFFFFFF for now. It is reserved for
      * the future.
      */
     uint16_t                                    oob_data_index;
 
     /*
      * DisconnectFlags = 0
      *      Normal chimney send. See MiniportTcpOffloadSend for details.
      *
      * DisconnectFlags = TCP_DISCONNECT_GRACEFUL_CLOSE (0x01)
      *      Graceful disconnect. See MiniportTcpOffloadDisconnect for details.
      *
      * DisconnectFlags = TCP_DISCONNECT_ABORTIVE_CLOSE (0x02)
      *      Abortive disconnect. See MiniportTcpOffloadDisconnect for details.
      */
     uint16_t                                    disconnect_flags;
 
     uint32_t                                    seq_no;
 } __packed nvsp_2_msg_send_chimney_pkt;
 
 /*
  * NvspMessage2TypeSendChimneyPacketComplete
  */
 typedef struct nvsp_2_msg_send_chimney_pkt_complete_ {
     /*
      * The NDIS_STATUS for the chimney send
      */
     uint32_t                                    status;
 
     /*
      * Number of bytes that have been sent to the peer (and ACKed by the peer).
      */
     uint32_t                                    bytes_transferred;
 } __packed nvsp_2_msg_send_chimney_pkt_complete;
 
 
 #define NVSP_1_CHIMNEY_RECV_FLAG_NO_PUSH        0x0001u
 #define NVSP_1_CHIMNEY_RECV_INVALID_OOB_INDEX   0xffffu
 
 /*
  * NvspMessage2TypePostChimneyRecvRequest
  */
 typedef struct nvsp_2_msg_post_chimney_rx_request_ {
 	/*
 	 * Identify the TCP connection which this chimney receive request
 	 * is for.
 	 */
 	uint32_t                                vsp_tcp_handle;
 
 	/*
 	 * OOB Data Index
 	 * This an index to the OOB data buffer. If the index is 0xFFFFFFFF,
 	 * then there is no OOB data.
 	 *
 	 * This field shall be always 0xFFFFFFFF for now. It is reserved for
 	 * the future.
 	 */
 	uint32_t                                oob_data_index;
 
 	/*
 	 * Bit 0
 	 *      When it is set, this is a "no-push" receive.
 	 *      When it is clear, this is a "push" receive.
 	 *
 	 * Bit 1-15:  Reserved and shall be zero
 	 */
 	uint16_t                                flags;
 
 	/*
 	 * For debugging and diagnoses purpose.
 	 * The SeqNo is per TCP connection and starts from 0.
 	 */
 	uint32_t                                seq_no;
 } __packed nvsp_2_msg_post_chimney_rx_request;
 
 /*
  * NvspMessage2TypePostChimneyRecvRequestComplete
  */
 typedef struct nvsp_2_msg_post_chimney_rx_request_complete_ {
 	/*
 	 * The NDIS_STATUS for the chimney send
 	 */
 	uint32_t                                status;
 
 	/*
 	 * Number of bytes that have been sent to the peer (and ACKed by
 	 * the peer).
 	 */
 	uint32_t                                bytes_xferred;
 } __packed nvsp_2_msg_post_chimney_rx_request_complete;
 
 /*
  * NvspMessage2TypeAllocateReceiveBuffer
  */
 typedef struct nvsp_2_msg_alloc_rx_buf_ {
 	/*
 	 * Allocation ID to match the allocation request and response
 	 */
 	uint32_t                                allocation_id;
 
 	/*
 	 * Length of the VM shared memory receive buffer that needs to
 	 * be allocated
 	 */
 	uint32_t                                length;
 } __packed nvsp_2_msg_alloc_rx_buf;
 
 /*
  * NvspMessage2TypeAllocateReceiveBufferComplete
  */
 typedef struct nvsp_2_msg_alloc_rx_buf_complete_ {
 	/*
 	 * The NDIS_STATUS code for buffer allocation
 	 */
 	uint32_t                                status;
 
 	/*
 	 * Allocation ID from NVSP_2_MESSAGE_ALLOCATE_RECEIVE_BUFFER
 	 */
 	uint32_t                                allocation_id;
 
 	/*
 	 * GPADL handle for the allocated receive buffer
 	 */
 	uint32_t                                gpadl_handle;
 
 	/*
 	 * Receive buffer ID that is further used in
 	 * NvspMessage2SendVmqRndisPacket
 	 */
 	uint64_t                                rx_buf_id;
 } __packed nvsp_2_msg_alloc_rx_buf_complete;
 
 /*
  * NvspMessage2TypeFreeReceiveBuffer
  */
 typedef struct nvsp_2_msg_free_rx_buf_ {
 	/*
 	 * Receive buffer ID previous returned in
 	 * NvspMessage2TypeAllocateReceiveBufferComplete message
 	 */
 	uint64_t                                rx_buf_id;
 } __packed nvsp_2_msg_free_rx_buf;
 
 /*
  * This structure is used in defining the buffers in
  * NVSP_2_MESSAGE_SEND_VMQ_RNDIS_PACKET structure
  */
 typedef struct nvsp_xfer_page_range_ {
 	/*
 	 * Specifies the ID of the receive buffer that has the buffer. This
 	 * ID can be the general receive buffer ID specified in
 	 * NvspMessage1TypeSendReceiveBuffer or it can be the shared memory
 	 * receive buffer ID allocated by the VSC and specified in
 	 * NvspMessage2TypeAllocateReceiveBufferComplete message
 	 */
 	uint64_t                                xfer_page_set_id;
 
 	/*
 	 * Number of bytes
 	 */
 	uint32_t                                byte_count;
 
 	/*
 	 * Offset in bytes from the beginning of the buffer
 	 */
 	uint32_t                                byte_offset;
 } __packed nvsp_xfer_page_range;
 
 /*
  * NvspMessage2SendVmqRndisPacket
  */
 typedef struct nvsp_2_msg_send_vmq_rndis_pkt_ {
 	/*
 	 * This field is specified by RNIDS. They assume there's
 	 * two different channels of communication. However,
 	 * the Network VSP only has one. Therefore, the channel
 	 * travels with the RNDIS packet. It must be RMC_DATA
 	 */
 	uint32_t                                channel_type;
 
 	/*
 	 * Only the Range element corresponding to the RNDIS header of
 	 * the first RNDIS message in the multiple RNDIS messages sent
 	 * in one NVSP message.  Information about the data portions as well
 	 * as the subsequent RNDIS messages in the same NVSP message are
 	 * embedded in the RNDIS header itself
 	 */
 	nvsp_xfer_page_range                    range;
 } __packed nvsp_2_msg_send_vmq_rndis_pkt;
 
 /*
  * This message is used by the VSC to complete
  * a RNDIS VMQ message to the VSP.  At this point,
  * the initiator of this message can use any resources
  * associated with the original RNDIS VMQ packet.
  */
 typedef struct nvsp_2_msg_send_vmq_rndis_pkt_complete_
 {
 	uint32_t                                status;
 } __packed nvsp_2_msg_send_vmq_rndis_pkt_complete;
 
 /*
  * Version 5 messages
  */
 enum nvsp_subchannel_operation {
         NVSP_SUBCHANNEL_NONE = 0,
         NVSP_SUBCHANNE_ALLOCATE,
         NVSP_SUBCHANNE_MAX
 };
 
 typedef struct nvsp_5_subchannel_request_
 {
         uint32_t                                op;
         uint32_t                                num_subchannels;
 } __packed nvsp_5_subchannel_request;
 
 typedef struct nvsp_5_subchannel_complete_
 {
         uint32_t                                status;
         /* Actual number of subchannels allocated */
         uint32_t                                num_subchannels;
 } __packed nvsp_5_subchannel_complete;
 
 typedef struct nvsp_5_send_indirect_table_
 {
         /* The number of entries in the send indirection table */
         uint32_t                                count;
         /*
          * The offset of the send indireciton table from top of
          * this struct. The send indirection table tells which channel
          * to put the send traffic on. Each entry is a channel number.
          */
         uint32_t                                offset;
 } __packed nvsp_5_send_indirect_table;
 
 typedef union nvsp_1_msg_uber_ {
 	nvsp_1_msg_send_ndis_version            send_ndis_vers;
 
 	nvsp_1_msg_send_rx_buf                  send_rx_buf;
 	nvsp_1_msg_send_rx_buf_complete         send_rx_buf_complete;
 	nvsp_1_msg_revoke_rx_buf                revoke_rx_buf;
 
 	nvsp_1_msg_send_send_buf                send_send_buf;
 	nvsp_1_msg_send_send_buf_complete       send_send_buf_complete;
 	nvsp_1_msg_revoke_send_buf              revoke_send_buf;
 
 	nvsp_1_msg_send_rndis_pkt               send_rndis_pkt;
 	nvsp_1_msg_send_rndis_pkt_complete      send_rndis_pkt_complete;
 } __packed nvsp_1_msg_uber;
 
 
 typedef union nvsp_2_msg_uber_ {
 	nvsp_2_msg_send_ndis_config             send_ndis_config;
 
 	nvsp_2_msg_send_chimney_buf             send_chimney_buf;
 	nvsp_2_msg_send_chimney_buf_complete    send_chimney_buf_complete;
 	nvsp_2_msg_revoke_chimney_buf           revoke_chimney_buf;
 
 	nvsp_2_msg_resume_chimney_rx_indication resume_chimney_rx_indication;
 	nvsp_2_msg_terminate_chimney            terminate_chimney;
 	nvsp_2_msg_terminate_chimney_complete   terminate_chimney_complete;
 	nvsp_2_msg_indicate_chimney_event       indicate_chimney_event;
 
 	nvsp_2_msg_send_chimney_pkt             send_chimney_packet;
 	nvsp_2_msg_send_chimney_pkt_complete    send_chimney_packet_complete;
 	nvsp_2_msg_post_chimney_rx_request      post_chimney_rx_request;
 	nvsp_2_msg_post_chimney_rx_request_complete
 	                                       post_chimney_rx_request_complete;
 
 	nvsp_2_msg_alloc_rx_buf                 alloc_rx_buffer;
 	nvsp_2_msg_alloc_rx_buf_complete        alloc_rx_buffer_complete;
 	nvsp_2_msg_free_rx_buf                  free_rx_buffer;
 
 	nvsp_2_msg_send_vmq_rndis_pkt           send_vmq_rndis_pkt;
 	nvsp_2_msg_send_vmq_rndis_pkt_complete  send_vmq_rndis_pkt_complete;
 	nvsp_2_msg_alloc_chimney_handle         alloc_chimney_handle;
 	nvsp_2_msg_alloc_chimney_handle_complete alloc_chimney_handle_complete;
 } __packed nvsp_2_msg_uber;
 
 typedef union nvsp_5_msg_uber_
 {
         nvsp_5_subchannel_request               subchannel_request;
         nvsp_5_subchannel_complete              subchn_complete;
         nvsp_5_send_indirect_table              send_table;
 } __packed nvsp_5_msg_uber;
 
 typedef union nvsp_all_msgs_ {
 	nvsp_msg_init_uber                      init_msgs;
 	nvsp_1_msg_uber                         vers_1_msgs;
 	nvsp_2_msg_uber                         vers_2_msgs;
 	nvsp_5_msg_uber				vers_5_msgs;
 } __packed nvsp_all_msgs;
 
 /*
  * ALL Messages
  */
 typedef struct nvsp_msg_ {
 	nvsp_msg_hdr                            hdr; 
 	nvsp_all_msgs                           msgs;
 } __packed nvsp_msg;
 
 
 /*
  * The following arguably belongs in a separate header file
  */
 
 /*
  * Defines
  */
 
 #define NETVSC_SEND_BUFFER_SIZE			(1024*1024*15)   /* 15M */
 #define NETVSC_SEND_BUFFER_ID			0xface
 
 #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY	(1024*1024*15) /* 15MB */
 #define NETVSC_RECEIVE_BUFFER_SIZE		(1024*1024*16) /* 16MB */
 
 #define NETVSC_RECEIVE_BUFFER_ID		0xcafe
 
 #define NETVSC_RECEIVE_SG_COUNT			1
 
 /* Preallocated receive packets */
 #define NETVSC_RECEIVE_PACKETLIST_COUNT		256
 
 /*
  * Maximum MTU we permit to be configured for a netvsc interface.
  * When the code was developed, a max MTU of 12232 was tested and
  * proven to work.  9K is a reasonable maximum for an Ethernet.
  */
 #define NETVSC_MAX_CONFIGURABLE_MTU		(9 * 1024)
 
 #define NETVSC_PACKET_SIZE			PAGE_SIZE
 #define VRSS_SEND_TABLE_SIZE			16
 
 /*
  * Data types
  */
 
 /*
  * Per netvsc channel-specific
  */
 typedef struct netvsc_dev_ {
 	struct hv_device			*dev;
 
 	/* Send buffer allocated by us but manages by NetVSP */
 	void					*send_buf;
 	uint32_t				send_buf_size;
 	uint32_t				send_buf_gpadl_handle;
 	uint32_t				send_section_size;
 	uint32_t				send_section_count;
 	unsigned long				bitsmap_words;
 	unsigned long				*send_section_bitsmap;
 
 	/* Receive buffer allocated by us but managed by NetVSP */
 	void					*rx_buf;
 	uint32_t				rx_buf_size;
 	uint32_t				rx_buf_gpadl_handle;
 	uint32_t				rx_section_count;
 	nvsp_1_rx_buf_section			*rx_sections;
 
 	/* Used for NetVSP initialization protocol */
 	struct sema				channel_init_sema;
 	nvsp_msg				channel_init_packet;
 
 	nvsp_msg				revoke_packet;
 	/*uint8_t				hw_mac_addr[HW_MACADDR_LEN];*/
 
 	/* Holds rndis device info */
 	void					*extension;
 
 	hv_bool_uint8_t				destroy;
 	/* Negotiated NVSP version */
 	uint32_t				nvsp_version;
 
 	uint32_t                                num_channel;
 
 	uint32_t                                vrss_send_table[VRSS_SEND_TABLE_SIZE];
 } netvsc_dev;
 
 struct hv_vmbus_channel;
 
 typedef void (*pfn_on_send_rx_completion)(struct hv_vmbus_channel *, void *);
 
 #define NETVSC_DEVICE_RING_BUFFER_SIZE	(128 * PAGE_SIZE)
 #define NETVSC_PACKET_MAXPAGE		32 
 
 
 #define NETVSC_VLAN_PRIO_MASK		0xe000
 #define NETVSC_VLAN_PRIO_SHIFT		13
 #define NETVSC_VLAN_VID_MASK		0x0fff
 
 #define TYPE_IPV4			2
 #define TYPE_IPV6			4
 #define TYPE_TCP			2
 #define TYPE_UDP			4
 
 #define TRANSPORT_TYPE_NOT_IP		0
 #define TRANSPORT_TYPE_IPV4_TCP		((TYPE_IPV4 << 16) | TYPE_TCP)
 #define TRANSPORT_TYPE_IPV4_UDP		((TYPE_IPV4 << 16) | TYPE_UDP)
 #define TRANSPORT_TYPE_IPV6_TCP		((TYPE_IPV6 << 16) | TYPE_TCP)
 #define TRANSPORT_TYPE_IPV6_UDP		((TYPE_IPV6 << 16) | TYPE_UDP)
 
 #ifdef __LP64__
 #define BITS_PER_LONG 64
 #else
 #define BITS_PER_LONG 32
 #endif
 
 typedef struct netvsc_packet_ {
 	struct hv_device           *device;
 	hv_bool_uint8_t            is_data_pkt;      /* One byte */
 	uint16_t		   vlan_tci;
 	uint32_t status;
 
 	/* Completion */
 	union {
 		struct {
 			uint64_t   rx_completion_tid;
 			void	   *rx_completion_context;
 			/* This is no longer used */
 			pfn_on_send_rx_completion   on_rx_completion;
 		} rx;
 		struct {
 			uint64_t    send_completion_tid;
 			void	    *send_completion_context;
 			/* Still used in netvsc and filter code */
 			pfn_on_send_rx_completion   on_send_completion;
 		} send;
 	} compl;
 	uint32_t	send_buf_section_idx;
 	uint32_t	send_buf_section_size;
 
 	void		*rndis_mesg;
 	uint32_t	tot_data_buf_len;
 	void		*data;
 	uint32_t	page_buf_count;
 	hv_vmbus_page_buffer	page_buffers[NETVSC_PACKET_MAXPAGE];
 } netvsc_packet;
 
 typedef struct {
 	uint8_t		mac_addr[6];  /* Assumption unsigned long */
 	hv_bool_uint8_t	link_state;
 } netvsc_device_info;
 
 #ifndef HN_USE_TXDESC_BUFRING
 struct hn_txdesc;
 SLIST_HEAD(hn_txdesc_list, hn_txdesc);
 #else
 struct buf_ring;
 #endif
 
 struct hn_rx_ring {
 	struct ifnet	*hn_ifp;
 	int		hn_rx_idx;
 
 	/* Trust csum verification on host side */
 	int		hn_trust_hcsum;	/* HN_TRUST_HCSUM_ */
 	struct lro_ctrl	hn_lro;
 
 	u_long		hn_csum_ip;
 	u_long		hn_csum_tcp;
 	u_long		hn_csum_udp;
 	u_long		hn_csum_trusted;
 	u_long		hn_lro_tried;
 	u_long		hn_small_pkts;
 	u_long		hn_pkts;
 
 	/* Rarely used stuffs */
 	struct sysctl_oid *hn_rx_sysctl_tree;
 	int		hn_rx_flags;
 } __aligned(CACHE_LINE_SIZE);
 
 #define HN_TRUST_HCSUM_IP	0x0001
 #define HN_TRUST_HCSUM_TCP	0x0002
 #define HN_TRUST_HCSUM_UDP	0x0004
 
 #define HN_RX_FLAG_ATTACHED	0x1
 
 struct hn_tx_ring {
 #ifndef HN_USE_TXDESC_BUFRING
 	struct mtx	hn_txlist_spin;
 	struct hn_txdesc_list hn_txlist;
 #else
 	struct buf_ring	*hn_txdesc_br;
 #endif
 	int		hn_txdesc_cnt;
 	int		hn_txdesc_avail;
 	int		hn_has_txeof;
 
 	int		hn_sched_tx;
 	void		(*hn_txeof)(struct hn_tx_ring *);
 	struct taskqueue *hn_tx_taskq;
 	struct task	hn_tx_task;
 	struct task	hn_txeof_task;
 
 	struct buf_ring	*hn_mbuf_br;
 	int		hn_oactive;
 	int		hn_tx_idx;
 
 	struct mtx	hn_tx_lock;
 	struct hn_softc	*hn_sc;
 	struct hv_vmbus_channel *hn_chan;
 
 	int		hn_direct_tx_size;
 	int		hn_tx_chimney_size;
 	bus_dma_tag_t	hn_tx_data_dtag;
 	uint64_t	hn_csum_assist;
 
 	u_long		hn_no_txdescs;
 	u_long		hn_send_failed;
 	u_long		hn_txdma_failed;
 	u_long		hn_tx_collapsed;
 	u_long		hn_tx_chimney;
 	u_long		hn_pkts;
 
 	/* Rarely used stuffs */
 	struct hn_txdesc *hn_txdesc;
 	bus_dma_tag_t	hn_tx_rndis_dtag;
 	struct sysctl_oid *hn_tx_sysctl_tree;
 	int		hn_tx_flags;
 } __aligned(CACHE_LINE_SIZE);
 
 #define HN_TX_FLAG_ATTACHED	0x1
 
 /*
  * Device-specific softc structure
  */
 typedef struct hn_softc {
 	struct ifnet    *hn_ifp;
 	struct ifmedia	hn_media;
 	device_t        hn_dev;
 	uint8_t         hn_unit;
 	int             hn_carrier;
 	int             hn_if_flags;
 	struct mtx      hn_lock;
 	int             hn_initdone;
 	/* See hv_netvsc_drv_freebsd.c for rules on how to use */
 	int             temp_unusable;
 	struct hv_device  *hn_dev_obj;
 	netvsc_dev  	*net_dev;
 
 	int		hn_rx_ring_cnt;
 	int		hn_rx_ring_inuse;
 	struct hn_rx_ring *hn_rx_ring;
 
 	int		hn_tx_ring_cnt;
 	int		hn_tx_ring_inuse;
 	struct hn_tx_ring *hn_tx_ring;
 
 	int		hn_cpu;
 	int		hn_tx_chimney_max;
 	struct taskqueue *hn_tx_taskq;
 	struct sysctl_oid *hn_tx_sysctl_tree;
 	struct sysctl_oid *hn_rx_sysctl_tree;
 } hn_softc_t;
 
 /*
  * Externs
  */
 extern int hv_promisc_mode;
 
 void netvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status);
 netvsc_dev *hv_nv_on_device_add(struct hv_device *device,
     void *additional_info);
 int hv_nv_on_device_remove(struct hv_device *device,
     boolean_t destroy_channel);
 int hv_nv_on_send(struct hv_vmbus_channel *chan, netvsc_packet *pkt);
 int hv_nv_get_next_send_section(netvsc_dev *net_dev);
 
 #endif  /* __HV_NET_VSC_H__ */
 
Index: head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
===================================================================
--- head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c	(revision 298445)
+++ head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c	(revision 298446)
@@ -1,2900 +1,2900 @@
 /*-
  * Copyright (c) 2010-2012 Citrix Inc.
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 2004-2006 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/buf_ring.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 
 #include <net/bpf.h>
 
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/if_ether.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 #include <netinet/ip6.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/pmap.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <machine/frame.h>
 
 #include <sys/bus.h>
 #include <sys/rman.h>
 #include <sys/mutex.h>
 #include <sys/errno.h>
 #include <sys/types.h>
 #include <machine/atomic.h>
 
 #include <machine/intr_machdep.h>
 
 #include <machine/in_cksum.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include "hv_net_vsc.h"
 #include "hv_rndis.h"
 #include "hv_rndis_filter.h"
 
 #define hv_chan_rxr	hv_chan_priv1
 #define hv_chan_txr	hv_chan_priv2
 
 /* Short for Hyper-V network interface */
 #define NETVSC_DEVNAME    "hn"
 
 /*
  * It looks like offset 0 of buf is reserved to hold the softc pointer.
  * The sc pointer evidently not needed, and is not presently populated.
  * The packet offset is where the netvsc_packet starts in the buffer.
  */
 #define HV_NV_SC_PTR_OFFSET_IN_BUF         0
 #define HV_NV_PACKET_OFFSET_IN_BUF         16
 
 /* YYY should get it from the underlying channel */
 #define HN_TX_DESC_CNT			512
 
 #define HN_LROENT_CNT_DEF		128
 
 #define HN_RING_CNT_DEF_MAX		8
 
 #define HN_RNDIS_MSG_LEN		\
     (sizeof(rndis_msg) +		\
      RNDIS_HASH_PPI_SIZE +		\
      RNDIS_VLAN_PPI_SIZE +		\
      RNDIS_TSO_PPI_SIZE +		\
      RNDIS_CSUM_PPI_SIZE)
 #define HN_RNDIS_MSG_BOUNDARY		PAGE_SIZE
 #define HN_RNDIS_MSG_ALIGN		CACHE_LINE_SIZE
 
 #define HN_TX_DATA_BOUNDARY		PAGE_SIZE
 #define HN_TX_DATA_MAXSIZE		IP_MAXPACKET
 #define HN_TX_DATA_SEGSIZE		PAGE_SIZE
 #define HN_TX_DATA_SEGCNT_MAX		\
     (NETVSC_PACKET_MAXPAGE - HV_RF_NUM_TX_RESERVED_PAGE_BUFS)
 
 #define HN_DIRECT_TX_SIZE_DEF		128
 
 struct hn_txdesc {
 #ifndef HN_USE_TXDESC_BUFRING
 	SLIST_ENTRY(hn_txdesc) link;
 #endif
 	struct mbuf	*m;
 	struct hn_tx_ring *txr;
 	int		refs;
 	uint32_t	flags;		/* HN_TXD_FLAG_ */
 	netvsc_packet	netvsc_pkt;	/* XXX to be removed */
 
 	bus_dmamap_t	data_dmap;
 
 	bus_addr_t	rndis_msg_paddr;
 	rndis_msg	*rndis_msg;
 	bus_dmamap_t	rndis_msg_dmap;
 };
 
 #define HN_TXD_FLAG_ONLIST	0x1
 #define HN_TXD_FLAG_DMAMAP	0x2
 
 /*
  * Only enable UDP checksum offloading when it is on 2012R2 or
  * later.  UDP checksum offloading doesn't work on earlier
  * Windows releases.
  */
 #define HN_CSUM_ASSIST_WIN8	(CSUM_IP | CSUM_TCP)
 #define HN_CSUM_ASSIST		(CSUM_IP | CSUM_UDP | CSUM_TCP)
 
 #define HN_LRO_LENLIM_MULTIRX_DEF	(12 * ETHERMTU)
 #define HN_LRO_LENLIM_DEF		(25 * ETHERMTU)
 /* YYY 2*MTU is a bit rough, but should be good enough. */
 #define HN_LRO_LENLIM_MIN(ifp)		(2 * (ifp)->if_mtu)
 
 #define HN_LRO_ACKCNT_DEF		1
 
 /*
  * Be aware that this sleepable mutex will exhibit WITNESS errors when
  * certain TCP and ARP code paths are taken.  This appears to be a
  * well-known condition, as all other drivers checked use a sleeping
  * mutex to protect their transmit paths.
  * Also Be aware that mutexes do not play well with semaphores, and there
  * is a conflicting semaphore in a certain channel code path.
  */
 #define NV_LOCK_INIT(_sc, _name) \
 	    mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF)
 #define NV_LOCK(_sc)		mtx_lock(&(_sc)->hn_lock)
 #define NV_LOCK_ASSERT(_sc)	mtx_assert(&(_sc)->hn_lock, MA_OWNED)
 #define NV_UNLOCK(_sc)		mtx_unlock(&(_sc)->hn_lock)
 #define NV_LOCK_DESTROY(_sc)	mtx_destroy(&(_sc)->hn_lock)
 
 
 /*
  * Globals
  */
 
 int hv_promisc_mode = 0;    /* normal mode by default */
 
 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD, NULL, "Hyper-V network interface");
 
 /* Trust tcp segements verification on host side. */
 static int hn_trust_hosttcp = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
     &hn_trust_hosttcp, 0,
     "Trust tcp segement verification on host side, "
     "when csum info is missing (global setting)");
 
 /* Trust udp datagrams verification on host side. */
 static int hn_trust_hostudp = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
     &hn_trust_hostudp, 0,
     "Trust udp datagram verification on host side, "
     "when csum info is missing (global setting)");
 
 /* Trust ip packets verification on host side. */
 static int hn_trust_hostip = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
     &hn_trust_hostip, 0,
     "Trust ip packet verification on host side, "
     "when csum info is missing (global setting)");
 
 #if __FreeBSD_version >= 1100045
 /* Limit TSO burst size */
 static int hn_tso_maxlen = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
     &hn_tso_maxlen, 0, "TSO burst limit");
 #endif
 
 /* Limit chimney send size */
 static int hn_tx_chimney_size = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
     &hn_tx_chimney_size, 0, "Chimney send packet size limit");
 
 /* Limit the size of packet for direct transmission */
 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
     &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
 
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 static int hn_lro_entry_count = HN_LROENT_CNT_DEF;
 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
     &hn_lro_entry_count, 0, "LRO entry count");
 #endif
 #endif
 
 static int hn_share_tx_taskq = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN,
     &hn_share_tx_taskq, 0, "Enable shared TX taskqueue");
 
 static struct taskqueue	*hn_tx_taskq;
 
 #ifndef HN_USE_TXDESC_BUFRING
 static int hn_use_txdesc_bufring = 0;
 #else
 static int hn_use_txdesc_bufring = 1;
 #endif
 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
     &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
 
 static int hn_bind_tx_taskq = -1;
 SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN,
     &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu");
 
 static int hn_use_if_start = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
     &hn_use_if_start, 0, "Use if_start TX method");
 
 static int hn_chan_cnt = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
     &hn_chan_cnt, 0,
     "# of channels to use; each channel has one RX ring and one TX ring");
 
 static int hn_tx_ring_cnt = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
     &hn_tx_ring_cnt, 0, "# of TX rings to use");
 
 static u_int hn_cpu_index;
 
 /*
  * Forward declarations
  */
 static void hn_stop(hn_softc_t *sc);
 static void hn_ifinit_locked(hn_softc_t *sc);
 static void hn_ifinit(void *xsc);
 static int  hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
 static int hn_start_locked(struct hn_tx_ring *txr, int len);
 static void hn_start(struct ifnet *ifp);
 static void hn_start_txeof(struct hn_tx_ring *);
 static int hn_ifmedia_upd(struct ifnet *ifp);
 static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
 #if __FreeBSD_version >= 1100099
 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
 #endif
 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_check_iplen(const struct mbuf *, int);
 static int hn_create_tx_ring(struct hn_softc *, int);
 static void hn_destroy_tx_ring(struct hn_tx_ring *);
 static int hn_create_tx_data(struct hn_softc *, int);
 static void hn_destroy_tx_data(struct hn_softc *);
 static void hn_start_taskfunc(void *, int);
 static void hn_start_txeof_taskfunc(void *, int);
 static void hn_stop_tx_tasks(struct hn_softc *);
 static int hn_encap(struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **);
 static void hn_create_rx_data(struct hn_softc *sc, int);
 static void hn_destroy_rx_data(struct hn_softc *sc);
 static void hn_set_tx_chimney_size(struct hn_softc *, int);
 static void hn_channel_attach(struct hn_softc *, struct hv_vmbus_channel *);
 
 static int hn_transmit(struct ifnet *, struct mbuf *);
 static void hn_xmit_qflush(struct ifnet *);
 static int hn_xmit(struct hn_tx_ring *, int);
 static void hn_xmit_txeof(struct hn_tx_ring *);
 static void hn_xmit_taskfunc(void *, int);
 static void hn_xmit_txeof_taskfunc(void *, int);
 
 #if __FreeBSD_version >= 1100099
 static void
 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
 {
 	int i;
 
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i)
 		sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
 }
 #endif
 
 static int
 hn_ifmedia_upd(struct ifnet *ifp __unused)
 {
 
 	return EOPNOTSUPP;
 }
 
 static void
 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct hn_softc *sc = ifp->if_softc;
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if (!sc->hn_carrier) {
 		ifmr->ifm_active |= IFM_NONE;
 		return;
 	}
 	ifmr->ifm_status |= IFM_ACTIVE;
 	ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
 }
 
 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
 static const hv_guid g_net_vsc_device_type = {
 	.data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
 		0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
 };
 
 /*
  * Standard probe entry point.
  *
  */
 static int
 netvsc_probe(device_t dev)
 {
 	const char *p;
 
 	p = vmbus_get_type(dev);
 	if (!memcmp(p, &g_net_vsc_device_type.data, sizeof(hv_guid))) {
 		device_set_desc(dev, "Synthetic Network Interface");
 		if (bootverbose)
 			printf("Netvsc probe... DONE \n");
 
 		return (BUS_PROBE_DEFAULT);
 	}
 
 	return (ENXIO);
 }
 
 /*
  * Standard attach entry point.
  *
  * Called when the driver is loaded.  It allocates needed resources,
  * and initializes the "hardware" and software.
  */
 static int
 netvsc_attach(device_t dev)
 {
 	struct hv_device *device_ctx = vmbus_get_devctx(dev);
 	struct hv_vmbus_channel *chan;
 	netvsc_device_info device_info;
 	hn_softc_t *sc;
 	int unit = device_get_unit(dev);
 	struct ifnet *ifp = NULL;
 	int error, ring_cnt, tx_ring_cnt;
 #if __FreeBSD_version >= 1100045
 	int tso_maxlen;
 #endif
 
 	sc = device_get_softc(dev);
 
 	sc->hn_unit = unit;
 	sc->hn_dev = dev;
 
 	if (hn_tx_taskq == NULL) {
 		sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
 		    taskqueue_thread_enqueue, &sc->hn_tx_taskq);
 		if (hn_bind_tx_taskq >= 0) {
 			int cpu = hn_bind_tx_taskq;
 			cpuset_t cpu_set;
 
 			if (cpu > mp_ncpus - 1)
 				cpu = mp_ncpus - 1;
 			CPU_SETOF(cpu, &cpu_set);
 			taskqueue_start_threads_cpuset(&sc->hn_tx_taskq, 1,
 			    PI_NET, &cpu_set, "%s tx",
 			    device_get_nameunit(dev));
 		} else {
 			taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET,
 			    "%s tx", device_get_nameunit(dev));
 		}
 	} else {
 		sc->hn_tx_taskq = hn_tx_taskq;
 	}
 	NV_LOCK_INIT(sc, "NetVSCLock");
 
 	sc->hn_dev_obj = device_ctx;
 
 	ifp = sc->hn_ifp = if_alloc(IFT_ETHER);
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 
 	/*
 	 * Figure out the # of RX rings (ring_cnt) and the # of TX rings
 	 * to use (tx_ring_cnt).
 	 *
 	 * NOTE:
 	 * The # of RX rings to use is same as the # of channels to use.
 	 */
 	ring_cnt = hn_chan_cnt;
 	if (ring_cnt <= 0) {
 		/* Default */
 		ring_cnt = mp_ncpus;
 		if (ring_cnt > HN_RING_CNT_DEF_MAX)
 			ring_cnt = HN_RING_CNT_DEF_MAX;
 	} else if (ring_cnt > mp_ncpus) {
 		ring_cnt = mp_ncpus;
 	}
 
 	tx_ring_cnt = hn_tx_ring_cnt;
 	if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
 		tx_ring_cnt = ring_cnt;
 	if (hn_use_if_start) {
 		/* ifnet.if_start only needs one TX ring. */
 		tx_ring_cnt = 1;
 	}
 
 	/*
 	 * Set the leader CPU for channels.
 	 */
 	sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
 
 	error = hn_create_tx_data(sc, tx_ring_cnt);
 	if (error)
 		goto failed;
 	hn_create_rx_data(sc, ring_cnt);
 
 	/*
 	 * Associate the first TX/RX ring w/ the primary channel.
 	 */
 	chan = device_ctx->channel;
 	KASSERT(HV_VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel"));
 	KASSERT(chan->offer_msg.offer.sub_channel_index == 0,
 	    ("primary channel subidx %u",
 	     chan->offer_msg.offer.sub_channel_index));
 	hn_channel_attach(sc, chan);
 
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = hn_ioctl;
 	ifp->if_init = hn_ifinit;
 	/* needed by hv_rf_on_device_add() code */
 	ifp->if_mtu = ETHERMTU;
 	if (hn_use_if_start) {
 		ifp->if_start = hn_start;
 		IFQ_SET_MAXLEN(&ifp->if_snd, 512);
 		ifp->if_snd.ifq_drv_maxlen = 511;
 		IFQ_SET_READY(&ifp->if_snd);
 	} else {
 		ifp->if_transmit = hn_transmit;
 		ifp->if_qflush = hn_xmit_qflush;
 	}
 
 	ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
 	ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
 	/* XXX ifmedia_set really should do this for us */
 	sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
 
 	/*
 	 * Tell upper layers that we support full VLAN capability.
 	 */
 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 	ifp->if_capabilities |=
 	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO |
 	    IFCAP_LRO;
 	ifp->if_capenable |=
 	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO |
 	    IFCAP_LRO;
 	ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO;
 
 	error = hv_rf_on_device_add(device_ctx, &device_info, ring_cnt);
 	if (error)
 		goto failed;
 	KASSERT(sc->net_dev->num_channel > 0 &&
 	    sc->net_dev->num_channel <= sc->hn_rx_ring_inuse,
 	    ("invalid channel count %u, should be less than %d",
 	     sc->net_dev->num_channel, sc->hn_rx_ring_inuse));
 
 	/*
 	 * Set the # of TX/RX rings that could be used according to
 	 * the # of channels that host offered.
 	 */
 	if (sc->hn_tx_ring_inuse > sc->net_dev->num_channel)
 		sc->hn_tx_ring_inuse = sc->net_dev->num_channel;
 	sc->hn_rx_ring_inuse = sc->net_dev->num_channel;
 	device_printf(dev, "%d TX ring, %d RX ring\n",
 	    sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
 
 #if __FreeBSD_version >= 1100099
 	if (sc->hn_rx_ring_inuse > 1) {
 		/*
 		 * Reduce TCP segment aggregation limit for multiple
 		 * RX rings to increase ACK timeliness.
 		 */
 		hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
 	}
 #endif
 
 	if (device_info.link_state == 0) {
 		sc->hn_carrier = 1;
 	}
 
 #if __FreeBSD_version >= 1100045
 	tso_maxlen = hn_tso_maxlen;
 	if (tso_maxlen <= 0 || tso_maxlen > IP_MAXPACKET)
 		tso_maxlen = IP_MAXPACKET;
 
 	ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
 	ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
 	ifp->if_hw_tsomax = tso_maxlen -
 	    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
 #endif
 
 	ether_ifattach(ifp, device_info.mac_addr);
 
 #if __FreeBSD_version >= 1100045
 	if_printf(ifp, "TSO: %u/%u/%u\n", ifp->if_hw_tsomax,
 	    ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
 #endif
 
 	sc->hn_tx_chimney_max = sc->net_dev->send_section_size;
 	hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max);
 	if (hn_tx_chimney_size > 0 &&
 	    hn_tx_chimney_size < sc->hn_tx_chimney_max)
 		hn_set_tx_chimney_size(sc, hn_tx_chimney_size);
 
 	return (0);
 failed:
 	hn_destroy_tx_data(sc);
 	if (ifp != NULL)
 		if_free(ifp);
 	return (error);
 }
 
 /*
  * Standard detach entry point
  */
 static int
 netvsc_detach(device_t dev)
 {
 	struct hn_softc *sc = device_get_softc(dev);
 	struct hv_device *hv_device = vmbus_get_devctx(dev); 
 
 	if (bootverbose)
 		printf("netvsc_detach\n");
 
 	/*
 	 * XXXKYS:  Need to clean up all our
 	 * driver state; this is the driver
 	 * unloading.
 	 */
 
 	/*
 	 * XXXKYS:  Need to stop outgoing traffic and unregister
 	 * the netdevice.
 	 */
 
 	hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL);
 
 	hn_stop_tx_tasks(sc);
 
 	ifmedia_removeall(&sc->hn_media);
 	hn_destroy_rx_data(sc);
 	hn_destroy_tx_data(sc);
 
 	if (sc->hn_tx_taskq != hn_tx_taskq)
 		taskqueue_free(sc->hn_tx_taskq);
 
 	return (0);
 }
 
 /*
  * Standard shutdown entry point
  */
 static int
 netvsc_shutdown(device_t dev)
 {
 	return (0);
 }
 
 static __inline int
 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
     struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
 {
 	struct mbuf *m = *m_head;
 	int error;
 
 	error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
 	    m, segs, nsegs, BUS_DMA_NOWAIT);
 	if (error == EFBIG) {
 		struct mbuf *m_new;
 
 		m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
 		if (m_new == NULL)
 			return ENOBUFS;
 		else
 			*m_head = m = m_new;
 		txr->hn_tx_collapsed++;
 
 		error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
 		    txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
 	}
 	if (!error) {
 		bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
 		    BUS_DMASYNC_PREWRITE);
 		txd->flags |= HN_TXD_FLAG_DMAMAP;
 	}
 	return error;
 }
 
 static __inline void
 hn_txdesc_dmamap_unload(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	if (txd->flags & HN_TXD_FLAG_DMAMAP) {
 		bus_dmamap_sync(txr->hn_tx_data_dtag,
 		    txd->data_dmap, BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(txr->hn_tx_data_dtag,
 		    txd->data_dmap);
 		txd->flags &= ~HN_TXD_FLAG_DMAMAP;
 	}
 }
 
 static __inline int
 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
 	    ("put an onlist txd %#x", txd->flags));
 
 	KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
 	if (atomic_fetchadd_int(&txd->refs, -1) != 1)
 		return 0;
 
 	hn_txdesc_dmamap_unload(txr, txd);
 	if (txd->m != NULL) {
 		m_freem(txd->m);
 		txd->m = NULL;
 	}
 
 	txd->flags |= HN_TXD_FLAG_ONLIST;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_lock_spin(&txr->hn_txlist_spin);
 	KASSERT(txr->hn_txdesc_avail >= 0 &&
 	    txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
 	    ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
 	txr->hn_txdesc_avail++;
 	SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
 	mtx_unlock_spin(&txr->hn_txlist_spin);
 #else
 	atomic_add_int(&txr->hn_txdesc_avail, 1);
 	buf_ring_enqueue(txr->hn_txdesc_br, txd);
 #endif
 
 	return 1;
 }
 
 static __inline struct hn_txdesc *
 hn_txdesc_get(struct hn_tx_ring *txr)
 {
 	struct hn_txdesc *txd;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_lock_spin(&txr->hn_txlist_spin);
 	txd = SLIST_FIRST(&txr->hn_txlist);
 	if (txd != NULL) {
 		KASSERT(txr->hn_txdesc_avail > 0,
 		    ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
 		txr->hn_txdesc_avail--;
 		SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
 	}
 	mtx_unlock_spin(&txr->hn_txlist_spin);
 #else
 	txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
 #endif
 
 	if (txd != NULL) {
 #ifdef HN_USE_TXDESC_BUFRING
 		atomic_subtract_int(&txr->hn_txdesc_avail, 1);
 #endif
 		KASSERT(txd->m == NULL && txd->refs == 0 &&
 		    (txd->flags & HN_TXD_FLAG_ONLIST), ("invalid txd"));
 		txd->flags &= ~HN_TXD_FLAG_ONLIST;
 		txd->refs = 1;
 	}
 	return txd;
 }
 
 static __inline void
 hn_txdesc_hold(struct hn_txdesc *txd)
 {
 
 	/* 0->1 transition will never work */
 	KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs));
 	atomic_add_int(&txd->refs, 1);
 }
 
 static void
 hn_tx_done(struct hv_vmbus_channel *chan, void *xpkt)
 {
 	netvsc_packet *packet = xpkt;
 	struct hn_txdesc *txd;
 	struct hn_tx_ring *txr;
 
 	txd = (struct hn_txdesc *)(uintptr_t)
 	    packet->compl.send.send_completion_tid;
 
 	txr = txd->txr;
 	KASSERT(txr->hn_chan == chan,
 	    ("channel mismatch, on channel%u, should be channel%u",
 	     chan->offer_msg.offer.sub_channel_index,
 	     txr->hn_chan->offer_msg.offer.sub_channel_index));
 
 	txr->hn_has_txeof = 1;
 	hn_txdesc_put(txr, txd);
 }
 
 void
 netvsc_channel_rollup(struct hv_vmbus_channel *chan)
 {
 	struct hn_tx_ring *txr = chan->hv_chan_txr;
 #if defined(INET) || defined(INET6)
 	struct hn_rx_ring *rxr = chan->hv_chan_rxr;
 
 	tcp_lro_flush_all(&rxr->hn_lro);
 #endif
 
 	/*
 	 * NOTE:
 	 * 'txr' could be NULL, if multiple channels and
 	 * ifnet.if_start method are enabled.
 	 */
 	if (txr == NULL || !txr->hn_has_txeof)
 		return;
 
 	txr->hn_has_txeof = 0;
 	txr->hn_txeof(txr);
 }
 
 /*
  * NOTE:
  * If this function fails, then both txd and m_head0 will be freed.
  */
 static int
 hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
 {
 	bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
 	int error, nsegs, i;
 	struct mbuf *m_head = *m_head0;
 	netvsc_packet *packet;
 	rndis_msg *rndis_mesg;
 	rndis_packet *rndis_pkt;
 	rndis_per_packet_info *rppi;
 	struct ndis_hash_info *hash_info;
 	uint32_t rndis_msg_size;
 
 	packet = &txd->netvsc_pkt;
 	packet->is_data_pkt = TRUE;
 	packet->tot_data_buf_len = m_head->m_pkthdr.len;
 
 	/*
 	 * extension points to the area reserved for the
 	 * rndis_filter_packet, which is placed just after
 	 * the netvsc_packet (and rppi struct, if present;
 	 * length is updated later).
 	 */
 	rndis_mesg = txd->rndis_msg;
 	/* XXX not necessary */
 	memset(rndis_mesg, 0, HN_RNDIS_MSG_LEN);
 	rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG;
 
 	rndis_pkt = &rndis_mesg->msg.packet;
 	rndis_pkt->data_offset = sizeof(rndis_packet);
 	rndis_pkt->data_length = packet->tot_data_buf_len;
 	rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet);
 
 	rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet);
 
 	/*
 	 * Set the hash info for this packet, so that the host could
 	 * dispatch the TX done event for this packet back to this TX
 	 * ring's channel.
 	 */
 	rndis_msg_size += RNDIS_HASH_PPI_SIZE;
 	rppi = hv_set_rppi_data(rndis_mesg, RNDIS_HASH_PPI_SIZE,
 	    nbl_hash_value);
 	hash_info = (struct ndis_hash_info *)((uint8_t *)rppi +
 	    rppi->per_packet_info_offset);
 	hash_info->hash = txr->hn_tx_idx;
 
 	if (m_head->m_flags & M_VLANTAG) {
 		ndis_8021q_info *rppi_vlan_info;
 
 		rndis_msg_size += RNDIS_VLAN_PPI_SIZE;
 		rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE,
 		    ieee_8021q_info);
 
 		rppi_vlan_info = (ndis_8021q_info *)((uint8_t *)rppi +
 		    rppi->per_packet_info_offset);
 		rppi_vlan_info->u1.s1.vlan_id =
 		    m_head->m_pkthdr.ether_vtag & 0xfff;
 	}
 
 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 		rndis_tcp_tso_info *tso_info;	
 		struct ether_vlan_header *eh;
 		int ether_len;
 
 		/*
 		 * XXX need m_pullup and use mtodo
 		 */
 		eh = mtod(m_head, struct ether_vlan_header*);
 		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
 			ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 		else
 			ether_len = ETHER_HDR_LEN;
 
 		rndis_msg_size += RNDIS_TSO_PPI_SIZE;
 		rppi = hv_set_rppi_data(rndis_mesg, RNDIS_TSO_PPI_SIZE,
 		    tcp_large_send_info);
 
 		tso_info = (rndis_tcp_tso_info *)((uint8_t *)rppi +
 		    rppi->per_packet_info_offset);
 		tso_info->lso_v2_xmit.type =
 		    RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
 
 #ifdef INET
 		if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
 			struct ip *ip =
 			    (struct ip *)(m_head->m_data + ether_len);
 			unsigned long iph_len = ip->ip_hl << 2;
 			struct tcphdr *th =
 			    (struct tcphdr *)((caddr_t)ip + iph_len);
 
 			tso_info->lso_v2_xmit.ip_version =
 			    RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
 			ip->ip_len = 0;
 			ip->ip_sum = 0;
 
 			th->th_sum = in_pseudo(ip->ip_src.s_addr,
 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 		}
 #endif
 #if defined(INET6) && defined(INET)
 		else
 #endif
 #ifdef INET6
 		{
 			struct ip6_hdr *ip6 = (struct ip6_hdr *)
 			    (m_head->m_data + ether_len);
 			struct tcphdr *th = (struct tcphdr *)(ip6 + 1);
 
 			tso_info->lso_v2_xmit.ip_version =
 			    RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
 			ip6->ip6_plen = 0;
 			th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
 		}
 #endif
 		tso_info->lso_v2_xmit.tcp_header_offset = 0;
 		tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz;
 	} else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
 		rndis_tcp_ip_csum_info *csum_info;
 
 		rndis_msg_size += RNDIS_CSUM_PPI_SIZE;
 		rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE,
 		    tcpip_chksum_info);
 		csum_info = (rndis_tcp_ip_csum_info *)((uint8_t *)rppi +
 		    rppi->per_packet_info_offset);
 
 		csum_info->xmit.is_ipv4 = 1;
 		if (m_head->m_pkthdr.csum_flags & CSUM_IP)
 			csum_info->xmit.ip_header_csum = 1;
 
 		if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
 			csum_info->xmit.tcp_csum = 1;
 			csum_info->xmit.tcp_header_offset = 0;
 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
 			csum_info->xmit.udp_csum = 1;
 		}
 	}
 
 	rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size;
 	packet->tot_data_buf_len = rndis_mesg->msg_len;
 
 	/*
 	 * Chimney send, if the packet could fit into one chimney buffer.
 	 *
 	 * TODO: vRSS, chimney buffer should be per-channel.
 	 */
 	if (packet->tot_data_buf_len < txr->hn_tx_chimney_size) {
 		netvsc_dev *net_dev = txr->hn_sc->net_dev;
 		uint32_t send_buf_section_idx;
 
 		send_buf_section_idx =
 		    hv_nv_get_next_send_section(net_dev);
 		if (send_buf_section_idx !=
 		    NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
 			uint8_t *dest = ((uint8_t *)net_dev->send_buf +
 			    (send_buf_section_idx *
 			     net_dev->send_section_size));
 
 			memcpy(dest, rndis_mesg, rndis_msg_size);
 			dest += rndis_msg_size;
 			m_copydata(m_head, 0, m_head->m_pkthdr.len, dest);
 
 			packet->send_buf_section_idx = send_buf_section_idx;
 			packet->send_buf_section_size =
 			    packet->tot_data_buf_len;
 			packet->page_buf_count = 0;
 			txr->hn_tx_chimney++;
 			goto done;
 		}
 	}
 
 	error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
 	if (error) {
 		int freed;
 
 		/*
 		 * This mbuf is not linked w/ the txd yet, so free it now.
 		 */
 		m_freem(m_head);
 		*m_head0 = NULL;
 
 		freed = hn_txdesc_put(txr, txd);
 		KASSERT(freed != 0,
 		    ("fail to free txd upon txdma error"));
 
 		txr->hn_txdma_failed++;
 		if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1);
 		return error;
 	}
 	*m_head0 = m_head;
 
 	packet->page_buf_count = nsegs + HV_RF_NUM_TX_RESERVED_PAGE_BUFS;
 
 	/* send packet with page buffer */
 	packet->page_buffers[0].pfn = atop(txd->rndis_msg_paddr);
 	packet->page_buffers[0].offset = txd->rndis_msg_paddr & PAGE_MASK;
 	packet->page_buffers[0].length = rndis_msg_size;
 
 	/*
 	 * Fill the page buffers with mbuf info starting at index
 	 * HV_RF_NUM_TX_RESERVED_PAGE_BUFS.
 	 */
 	for (i = 0; i < nsegs; ++i) {
 		hv_vmbus_page_buffer *pb = &packet->page_buffers[
 		    i + HV_RF_NUM_TX_RESERVED_PAGE_BUFS];
 
 		pb->pfn = atop(segs[i].ds_addr);
 		pb->offset = segs[i].ds_addr & PAGE_MASK;
 		pb->length = segs[i].ds_len;
 	}
 
 	packet->send_buf_section_idx =
 	    NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
 	packet->send_buf_section_size = 0;
 done:
 	txd->m = m_head;
 
 	/* Set the completion routine */
 	packet->compl.send.on_send_completion = hn_tx_done;
 	packet->compl.send.send_completion_context = packet;
 	packet->compl.send.send_completion_tid = (uint64_t)(uintptr_t)txd;
 
 	return 0;
 }
 
 /*
  * NOTE:
  * If this function fails, then txd will be freed, but the mbuf
  * associated w/ the txd will _not_ be freed.
  */
 static int
 hn_send_pkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 	int error, send_failed = 0;
 
 again:
 	/*
 	 * Make sure that txd is not freed before ETHER_BPF_MTAP.
 	 */
 	hn_txdesc_hold(txd);
 	error = hv_nv_on_send(txr->hn_chan, &txd->netvsc_pkt);
 	if (!error) {
 		ETHER_BPF_MTAP(ifp, txd->m);
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		if (!hn_use_if_start) {
 			if_inc_counter(ifp, IFCOUNTER_OBYTES,
 			    txd->m->m_pkthdr.len);
 			if (txd->m->m_flags & M_MCAST)
 				if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 		}
 		txr->hn_pkts++;
 	}
 	hn_txdesc_put(txr, txd);
 
 	if (__predict_false(error)) {
 		int freed;
 
 		/*
 		 * This should "really rarely" happen.
 		 *
 		 * XXX Too many RX to be acked or too many sideband
 		 * commands to run?  Ask netvsc_channel_rollup()
 		 * to kick start later.
 		 */
 		txr->hn_has_txeof = 1;
 		if (!send_failed) {
 			txr->hn_send_failed++;
 			send_failed = 1;
 			/*
 			 * Try sending again after set hn_has_txeof;
 			 * in case that we missed the last
 			 * netvsc_channel_rollup().
 			 */
 			goto again;
 		}
 		if_printf(ifp, "send failed\n");
 
 		/*
 		 * Caller will perform further processing on the
 		 * associated mbuf, so don't free it in hn_txdesc_put();
 		 * only unload it from the DMA map in hn_txdesc_put(),
 		 * if it was loaded.
 		 */
 		txd->m = NULL;
 		freed = hn_txdesc_put(txr, txd);
 		KASSERT(freed != 0,
 		    ("fail to free txd upon send error"));
 
 		txr->hn_send_failed++;
 	}
 	return error;
 }
 
 /*
  * Start a transmit of one or more packets
  */
 static int
 hn_start_locked(struct hn_tx_ring *txr, int len)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	struct ifnet *ifp = sc->hn_ifp;
 
 	KASSERT(hn_use_if_start,
 	    ("hn_start_locked is called, when if_start is disabled"));
 	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
 	mtx_assert(&txr->hn_tx_lock, MA_OWNED);
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return 0;
 
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		struct hn_txdesc *txd;
 		struct mbuf *m_head;
 		int error;
 
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 
 		if (len > 0 && m_head->m_pkthdr.len > len) {
 			/*
 			 * This sending could be time consuming; let callers
 			 * dispatch this packet sending (and sending of any
 			 * following up packets) to tx taskqueue.
 			 */
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			return 1;
 		}
 
 		txd = hn_txdesc_get(txr);
 		if (txd == NULL) {
 			txr->hn_no_txdescs++;
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 			break;
 		}
 
 		error = hn_encap(txr, txd, &m_head);
 		if (error) {
 			/* Both txd and m_head are freed */
 			continue;
 		}
 
 		error = hn_send_pkt(ifp, txr, txd);
 		if (__predict_false(error)) {
 			/* txd is freed, but m_head is not */
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 			break;
 		}
 	}
 	return 0;
 }
 
 /*
  * Link up/down notification
  */
 void
 netvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status)
 {
 	hn_softc_t *sc = device_get_softc(device_obj->device);
 
 	if (status == 1) {
 		sc->hn_carrier = 1;
 	} else {
 		sc->hn_carrier = 0;
 	}
 }
 
 /*
  * Append the specified data to the indicated mbuf chain,
  * Extend the mbuf chain if the new data does not fit in
  * existing space.
  *
  * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
  * There should be an equivalent in the kernel mbuf code,
  * but there does not appear to be one yet.
  *
  * Differs from m_append() in that additional mbufs are
  * allocated with cluster size MJUMPAGESIZE, and filled
  * accordingly.
  *
  * Return 1 if able to complete the job; otherwise 0.
  */
 static int
 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
 {
 	struct mbuf *m, *n;
 	int remainder, space;
 
 	for (m = m0; m->m_next != NULL; m = m->m_next)
 		;
 	remainder = len;
 	space = M_TRAILINGSPACE(m);
 	if (space > 0) {
 		/*
 		 * Copy into available space.
 		 */
 		if (space > remainder)
 			space = remainder;
 		bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
 		m->m_len += space;
 		cp += space;
 		remainder -= space;
 	}
 	while (remainder > 0) {
 		/*
 		 * Allocate a new mbuf; could check space
 		 * and allocate a cluster instead.
 		 */
 		n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE);
 		if (n == NULL)
 			break;
 		n->m_len = min(MJUMPAGESIZE, remainder);
 		bcopy(cp, mtod(n, caddr_t), n->m_len);
 		cp += n->m_len;
 		remainder -= n->m_len;
 		m->m_next = n;
 		m = n;
 	}
 	if (m0->m_flags & M_PKTHDR)
 		m0->m_pkthdr.len += len - remainder;
 
 	return (remainder == 0);
 }
 
 
 /*
  * Called when we receive a data packet from the "wire" on the
  * specified device
  *
  * Note:  This is no longer used as a callback
  */
 int
 netvsc_recv(struct hv_vmbus_channel *chan, netvsc_packet *packet,
     rndis_tcp_ip_csum_info *csum_info)
 {
 	struct hn_rx_ring *rxr = chan->hv_chan_rxr;
 	struct ifnet *ifp = rxr->hn_ifp;
 	struct mbuf *m_new;
 	int size, do_lro = 0, do_csum = 1;
 
 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
 		return (0);
 
 	/*
 	 * Bail out if packet contains more data than configured MTU.
 	 */
 	if (packet->tot_data_buf_len > (ifp->if_mtu + ETHER_HDR_LEN)) {
 		return (0);
 	} else if (packet->tot_data_buf_len <= MHLEN) {
 		m_new = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m_new == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 			return (0);
 		}
 		memcpy(mtod(m_new, void *), packet->data,
 		    packet->tot_data_buf_len);
 		m_new->m_pkthdr.len = m_new->m_len = packet->tot_data_buf_len;
 		rxr->hn_small_pkts++;
 	} else {
 		/*
 		 * Get an mbuf with a cluster.  For packets 2K or less,
 		 * get a standard 2K cluster.  For anything larger, get a
 		 * 4K cluster.  Any buffers larger than 4K can cause problems
 		 * if looped around to the Hyper-V TX channel, so avoid them.
 		 */
 		size = MCLBYTES;
 		if (packet->tot_data_buf_len > MCLBYTES) {
 			/* 4096 */
 			size = MJUMPAGESIZE;
 		}
 
 		m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
 		if (m_new == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 			return (0);
 		}
 
 		hv_m_append(m_new, packet->tot_data_buf_len, packet->data);
 	}
 	m_new->m_pkthdr.rcvif = ifp;
 
 	if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0))
 		do_csum = 0;
 
 	/* receive side checksum offload */
 	if (csum_info != NULL) {
 		/* IP csum offload */
 		if (csum_info->receive.ip_csum_succeeded && do_csum) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_IP_CHECKED | CSUM_IP_VALID);
 			rxr->hn_csum_ip++;
 		}
 
 		/* TCP/UDP csum offload */
 		if ((csum_info->receive.tcp_csum_succeeded ||
 		     csum_info->receive.udp_csum_succeeded) && do_csum) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 			m_new->m_pkthdr.csum_data = 0xffff;
 			if (csum_info->receive.tcp_csum_succeeded)
 				rxr->hn_csum_tcp++;
 			else
 				rxr->hn_csum_udp++;
 		}
 
 		if (csum_info->receive.ip_csum_succeeded &&
 		    csum_info->receive.tcp_csum_succeeded)
 			do_lro = 1;
 	} else {
 		const struct ether_header *eh;
 		uint16_t etype;
 		int hoff;
 
 		hoff = sizeof(*eh);
 		if (m_new->m_len < hoff)
 			goto skip;
 		eh = mtod(m_new, struct ether_header *);
 		etype = ntohs(eh->ether_type);
 		if (etype == ETHERTYPE_VLAN) {
 			const struct ether_vlan_header *evl;
 
 			hoff = sizeof(*evl);
 			if (m_new->m_len < hoff)
 				goto skip;
 			evl = mtod(m_new, struct ether_vlan_header *);
 			etype = ntohs(evl->evl_proto);
 		}
 
 		if (etype == ETHERTYPE_IP) {
 			int pr;
 
 			pr = hn_check_iplen(m_new, hoff);
 			if (pr == IPPROTO_TCP) {
 				if (do_csum &&
 				    (rxr->hn_trust_hcsum &
 				     HN_TRUST_HCSUM_TCP)) {
 					rxr->hn_csum_trusted++;
 					m_new->m_pkthdr.csum_flags |=
 					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
 					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 					m_new->m_pkthdr.csum_data = 0xffff;
 				}
 				/* Rely on SW csum verification though... */
 				do_lro = 1;
 			} else if (pr == IPPROTO_UDP) {
 				if (do_csum &&
 				    (rxr->hn_trust_hcsum &
 				     HN_TRUST_HCSUM_UDP)) {
 					rxr->hn_csum_trusted++;
 					m_new->m_pkthdr.csum_flags |=
 					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
 					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 					m_new->m_pkthdr.csum_data = 0xffff;
 				}
 			} else if (pr != IPPROTO_DONE && do_csum &&
 			    (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
 				rxr->hn_csum_trusted++;
 				m_new->m_pkthdr.csum_flags |=
 				    (CSUM_IP_CHECKED | CSUM_IP_VALID);
 			}
 		}
 	}
 skip:
 	if ((packet->vlan_tci != 0) &&
 	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) {
 		m_new->m_pkthdr.ether_vtag = packet->vlan_tci;
 		m_new->m_flags |= M_VLANTAG;
 	}
 
 	m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
 	M_HASHTYPE_SET(m_new, M_HASHTYPE_OPAQUE);
 
 	/*
 	 * Note:  Moved RX completion back to hv_nv_on_receive() so all
 	 * messages (not just data messages) will trigger a response.
 	 */
 
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	rxr->hn_pkts++;
 
 	if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
 #if defined(INET) || defined(INET6)
 		struct lro_ctrl *lro = &rxr->hn_lro;
 
 		if (lro->lro_cnt) {
 			rxr->hn_lro_tried++;
 			if (tcp_lro_rx(lro, m_new, 0) == 0) {
 				/* DONE! */
 				return 0;
 			}
 		}
 #endif
 	}
 
 	/* We're not holding the lock here, so don't release it */
 	(*ifp->if_input)(ifp, m_new);
 
 	return (0);
 }
 
 /*
  * Rules for using sc->temp_unusable:
  * 1.  sc->temp_unusable can only be read or written while holding NV_LOCK()
  * 2.  code reading sc->temp_unusable under NV_LOCK(), and finding 
  *     sc->temp_unusable set, must release NV_LOCK() and exit
  * 3.  to retain exclusive control of the interface,
  *     sc->temp_unusable must be set by code before releasing NV_LOCK()
  * 4.  only code setting sc->temp_unusable can clear sc->temp_unusable
  * 5.  code setting sc->temp_unusable must eventually clear sc->temp_unusable
  */
 
 /*
  * Standard ioctl entry point.  Called when the user wants to configure
  * the interface.
  */
 static int
 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	hn_softc_t *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
 #ifdef INET
 	struct ifaddr *ifa = (struct ifaddr *)data;
 #endif
 	netvsc_device_info device_info;
 	struct hv_device *hn_dev;
 	int mask, error = 0;
 	int retry_cnt = 500;
 	
 	switch(cmd) {
 
 	case SIOCSIFADDR:
 #ifdef INET
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			ifp->if_flags |= IFF_UP;
 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
 				hn_ifinit(sc);
 			arp_ifinit(ifp, ifa);
 		} else
 #endif
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	case SIOCSIFMTU:
 		hn_dev = vmbus_get_devctx(sc->hn_dev);
 
 		/* Check MTU value change */
 		if (ifp->if_mtu == ifr->ifr_mtu)
 			break;
 
 		if (ifr->ifr_mtu > NETVSC_MAX_CONFIGURABLE_MTU) {
 			error = EINVAL;
 			break;
 		}
 
 		/* Obtain and record requested MTU */
 		ifp->if_mtu = ifr->ifr_mtu;
 
 #if __FreeBSD_version >= 1100099
 		/*
 		 * Make sure that LRO aggregation length limit is still
 		 * valid, after the MTU change.
 		 */
 		NV_LOCK(sc);
 		if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
 		    HN_LRO_LENLIM_MIN(ifp))
 			hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
 		NV_UNLOCK(sc);
 #endif
 
 		do {
 			NV_LOCK(sc);
 			if (!sc->temp_unusable) {
 				sc->temp_unusable = TRUE;
 				retry_cnt = -1;
 			}
 			NV_UNLOCK(sc);
 			if (retry_cnt > 0) {
 				retry_cnt--;
 				DELAY(5 * 1000);
 			}
 		} while (retry_cnt > 0);
 
 		if (retry_cnt == 0) {
 			error = EINVAL;
 			break;
 		}
 
 		/* We must remove and add back the device to cause the new
 		 * MTU to take effect.  This includes tearing down, but not
 		 * deleting the channel, then bringing it back up.
 		 */
 		error = hv_rf_on_device_remove(hn_dev, HV_RF_NV_RETAIN_CHANNEL);
 		if (error) {
 			NV_LOCK(sc);
 			sc->temp_unusable = FALSE;
 			NV_UNLOCK(sc);
 			break;
 		}
 		error = hv_rf_on_device_add(hn_dev, &device_info,
 		    sc->hn_rx_ring_inuse);
 		if (error) {
 			NV_LOCK(sc);
 			sc->temp_unusable = FALSE;
 			NV_UNLOCK(sc);
 			break;
 		}
 
 		sc->hn_tx_chimney_max = sc->net_dev->send_section_size;
 		if (sc->hn_tx_ring[0].hn_tx_chimney_size >
 		    sc->hn_tx_chimney_max)
 			hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max);
 
 		hn_ifinit_locked(sc);
 
 		NV_LOCK(sc);
 		sc->temp_unusable = FALSE;
 		NV_UNLOCK(sc);
 		break;
 	case SIOCSIFFLAGS:
 		do {
                        NV_LOCK(sc);
                        if (!sc->temp_unusable) {
                                sc->temp_unusable = TRUE;
                                retry_cnt = -1;
                        }
                        NV_UNLOCK(sc);
                        if (retry_cnt > 0) {
                       	        retry_cnt--;
                         	DELAY(5 * 1000);
                        }
                 } while (retry_cnt > 0);
 
                 if (retry_cnt == 0) {
                        error = EINVAL;
                        break;
                 }
 
 		if (ifp->if_flags & IFF_UP) {
 			/*
 			 * If only the state of the PROMISC flag changed,
 			 * then just use the 'set promisc mode' command
 			 * instead of reinitializing the entire NIC. Doing
 			 * a full re-init means reloading the firmware and
 			 * waiting for it to start up, which may take a
 			 * second or two.
 			 */
 #ifdef notyet
 			/* Fixme:  Promiscuous mode? */
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
 			    ifp->if_flags & IFF_PROMISC &&
 			    !(sc->hn_if_flags & IFF_PROMISC)) {
 				/* do something here for Hyper-V */
 			} else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
 			    !(ifp->if_flags & IFF_PROMISC) &&
 			    sc->hn_if_flags & IFF_PROMISC) {
 				/* do something here for Hyper-V */
 			} else
 #endif
 				hn_ifinit_locked(sc);
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				hn_stop(sc);
 			}
 		}
 		NV_LOCK(sc);
 		sc->temp_unusable = FALSE;
 		NV_UNLOCK(sc);
 		sc->hn_if_flags = ifp->if_flags;
 		error = 0;
 		break;
 	case SIOCSIFCAP:
 		NV_LOCK(sc);
 
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		if (mask & IFCAP_TXCSUM) {
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 			if (ifp->if_capenable & IFCAP_TXCSUM) {
 				ifp->if_hwassist |=
 				    sc->hn_tx_ring[0].hn_csum_assist;
 			} else {
 				ifp->if_hwassist &=
 				    ~sc->hn_tx_ring[0].hn_csum_assist;
 			}
 		}
 
 		if (mask & IFCAP_RXCSUM)
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 
 		if (mask & IFCAP_LRO)
 			ifp->if_capenable ^= IFCAP_LRO;
 
 		if (mask & IFCAP_TSO4) {
 			ifp->if_capenable ^= IFCAP_TSO4;
 			if (ifp->if_capenable & IFCAP_TSO4)
 				ifp->if_hwassist |= CSUM_IP_TSO;
 			else
 				ifp->if_hwassist &= ~CSUM_IP_TSO;
 		}
 
 		if (mask & IFCAP_TSO6) {
 			ifp->if_capenable ^= IFCAP_TSO6;
 			if (ifp->if_capenable & IFCAP_TSO6)
 				ifp->if_hwassist |= CSUM_IP6_TSO;
 			else
 				ifp->if_hwassist &= ~CSUM_IP6_TSO;
 		}
 
 		NV_UNLOCK(sc);
 		error = 0;
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 #ifdef notyet
 		/* Fixme:  Multicast mode? */
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			NV_LOCK(sc);
 			netvsc_setmulti(sc);
 			NV_UNLOCK(sc);
 			error = 0;
 		}
 #endif
 		error = EINVAL;
 		break;
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
 		break;
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 
 	return (error);
 }
 
 /*
  *
  */
 static void
 hn_stop(hn_softc_t *sc)
 {
 	struct ifnet *ifp;
 	int ret, i;
 	struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
 
 	ifp = sc->hn_ifp;
 
 	if (bootverbose)
 		printf(" Closing Device ...\n");
 
 	atomic_clear_int(&ifp->if_drv_flags,
 	    (IFF_DRV_RUNNING | IFF_DRV_OACTIVE));
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		sc->hn_tx_ring[i].hn_oactive = 0;
 
 	if_link_state_change(ifp, LINK_STATE_DOWN);
 	sc->hn_initdone = 0;
 
 	ret = hv_rf_on_close(device_ctx);
 }
 
 /*
  * FreeBSD transmit entry point
  */
 static void
 hn_start(struct ifnet *ifp)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (!sched)
 			return;
 	}
 do_sched:
 	taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
 }
 
 static void
 hn_start_txeof(struct hn_tx_ring *txr)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	struct ifnet *ifp = sc->hn_ifp;
 
 	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (sched) {
 			taskqueue_enqueue(txr->hn_tx_taskq,
 			    &txr->hn_tx_task);
 		}
 	} else {
 do_sched:
 		/*
 		 * Release the OACTIVE earlier, with the hope, that
 		 * others could catch up.  The task will clear the
 		 * flag again with the hn_tx_lock to avoid possible
 		 * races.
 		 */
 		atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 /*
  *
  */
 static void
 hn_ifinit_locked(hn_softc_t *sc)
 {
 	struct ifnet *ifp;
 	struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
 	int ret, i;
 
 	ifp = sc->hn_ifp;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		return;
 	}
 
 	hv_promisc_mode = 1;
 
 	ret = hv_rf_on_open(device_ctx);
 	if (ret != 0) {
 		return;
 	} else {
 		sc->hn_initdone = 1;
 	}
 
 	atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		sc->hn_tx_ring[i].hn_oactive = 0;
 
 	atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
 	if_link_state_change(ifp, LINK_STATE_UP);
 }
 
 /*
  *
  */
 static void
 hn_ifinit(void *xsc)
 {
 	hn_softc_t *sc = xsc;
 
 	NV_LOCK(sc);
 	if (sc->temp_unusable) {
 		NV_UNLOCK(sc);
 		return;
 	}
 	sc->temp_unusable = TRUE;
 	NV_UNLOCK(sc);
 
 	hn_ifinit_locked(sc);
 
 	NV_LOCK(sc);
 	sc->temp_unusable = FALSE;
 	NV_UNLOCK(sc);
 }
 
 #ifdef LATER
 /*
  *
  */
 static void
 hn_watchdog(struct ifnet *ifp)
 {
 	hn_softc_t *sc;
 	sc = ifp->if_softc;
 
 	printf("hn%d: watchdog timeout -- resetting\n", sc->hn_unit);
 	hn_ifinit(sc);    /*???*/
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 }
 #endif
 
 #if __FreeBSD_version >= 1100099
 
 static int
 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	unsigned int lenlim;
 	int error;
 
 	lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
 	error = sysctl_handle_int(oidp, &lenlim, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
 	    lenlim > TCP_LRO_LENGTH_MAX)
 		return EINVAL;
 
 	NV_LOCK(sc);
 	hn_set_lro_lenlim(sc, lenlim);
 	NV_UNLOCK(sc);
 	return 0;
 }
 
 static int
 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ackcnt, error, i;
 
 	/*
 	 * lro_ackcnt_lim is append count limit,
 	 * +1 to turn it into aggregation limit.
 	 */
 	ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
 	error = sysctl_handle_int(oidp, &ackcnt, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
 		return EINVAL;
 
 	/*
 	 * Convert aggregation limit back to append
 	 * count limit.
 	 */
 	--ackcnt;
 	NV_LOCK(sc);
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i)
 		sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
 	NV_UNLOCK(sc);
 	return 0;
 }
 
 #endif
 
 static int
 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int hcsum = arg2;
 	int on, error, i;
 
 	on = 0;
 	if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
 		on = 1;
 
 	error = sysctl_handle_int(oidp, &on, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	NV_LOCK(sc);
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		if (on)
 			rxr->hn_trust_hcsum |= hcsum;
 		else
 			rxr->hn_trust_hcsum &= ~hcsum;
 	}
 	NV_UNLOCK(sc);
 	return 0;
 }
 
 static int
 hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int chimney_size, error;
 
 	chimney_size = sc->hn_tx_ring[0].hn_tx_chimney_size;
 	error = sysctl_handle_int(oidp, &chimney_size, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	if (chimney_size > sc->hn_tx_chimney_max || chimney_size <= 0)
 		return EINVAL;
 
 	hn_set_tx_chimney_size(sc, chimney_size);
 	return 0;
 }
 
 static int
 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_rx_ring *rxr;
 	u_long stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		stat += *((u_long *)((uint8_t *)rxr + ofs));
 	}
 
 	error = sysctl_handle_long(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		*((u_long *)((uint8_t *)rxr + ofs)) = 0;
 	}
 	return 0;
 }
 
 static int
 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_rx_ring *rxr;
 	uint64_t stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		stat += *((uint64_t *)((uint8_t *)rxr + ofs));
 	}
 
 	error = sysctl_handle_64(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		*((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
 	}
 	return 0;
 }
 
 static int
 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_tx_ring *txr;
 	u_long stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		stat += *((u_long *)((uint8_t *)txr + ofs));
 	}
 
 	error = sysctl_handle_long(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		*((u_long *)((uint8_t *)txr + ofs)) = 0;
 	}
 	return 0;
 }
 
 static int
 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error, conf;
 	struct hn_tx_ring *txr;
 
 	txr = &sc->hn_tx_ring[0];
 	conf = *((int *)((uint8_t *)txr + ofs));
 
 	error = sysctl_handle_int(oidp, &conf, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	NV_LOCK(sc);
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		*((int *)((uint8_t *)txr + ofs)) = conf;
 	}
 	NV_UNLOCK(sc);
 
 	return 0;
 }
 
 static int
 hn_check_iplen(const struct mbuf *m, int hoff)
 {
 	const struct ip *ip;
 	int len, iphlen, iplen;
 	const struct tcphdr *th;
 	int thoff;				/* TCP data offset */
 
 	len = hoff + sizeof(struct ip);
 
 	/* The packet must be at least the size of an IP header. */
 	if (m->m_pkthdr.len < len)
 		return IPPROTO_DONE;
 
 	/* The fixed IP header must reside completely in the first mbuf. */
 	if (m->m_len < len)
 		return IPPROTO_DONE;
 
 	ip = mtodo(m, hoff);
 
 	/* Bound check the packet's stated IP header length. */
 	iphlen = ip->ip_hl << 2;
 	if (iphlen < sizeof(struct ip))		/* minimum header length */
 		return IPPROTO_DONE;
 
 	/* The full IP header must reside completely in the one mbuf. */
 	if (m->m_len < hoff + iphlen)
 		return IPPROTO_DONE;
 
 	iplen = ntohs(ip->ip_len);
 
 	/*
 	 * Check that the amount of data in the buffers is as
 	 * at least much as the IP header would have us expect.
 	 */
 	if (m->m_pkthdr.len < hoff + iplen)
 		return IPPROTO_DONE;
 
 	/*
 	 * Ignore IP fragments.
 	 */
 	if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
 		return IPPROTO_DONE;
 
 	/*
 	 * The TCP/IP or UDP/IP header must be entirely contained within
 	 * the first fragment of a packet.
 	 */
 	switch (ip->ip_p) {
 	case IPPROTO_TCP:
 		if (iplen < iphlen + sizeof(struct tcphdr))
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
 			return IPPROTO_DONE;
 		th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
 		thoff = th->th_off << 2;
 		if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + thoff)
 			return IPPROTO_DONE;
 		break;
 	case IPPROTO_UDP:
 		if (iplen < iphlen + sizeof(struct udphdr))
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
 			return IPPROTO_DONE;
 		break;
 	default:
 		if (iplen < iphlen)
 			return IPPROTO_DONE;
 		break;
 	}
 	return ip->ip_p;
 }
 
 static void
 hn_dma_map_paddr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	bus_addr_t *paddr = arg;
 
 	if (error)
 		return;
 
 	KASSERT(nseg == 1, ("too many segments %d!", nseg));
 	*paddr = segs->ds_addr;
 }
 
 static void
 hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	device_t dev = sc->hn_dev;
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 	int lroent_cnt;
 #endif
 #endif
 	int i;
 
 	sc->hn_rx_ring_cnt = ring_cnt;
 	sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
 
 	sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
 	    M_NETVSC, M_WAITOK | M_ZERO);
 
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 	lroent_cnt = hn_lro_entry_count;
 	if (lroent_cnt < TCP_LRO_ENTRIES)
 		lroent_cnt = TCP_LRO_ENTRIES;
 	device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
 #endif
 #endif	/* INET || INET6 */
 
 	ctx = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 
 	/* Create dev.hn.UNIT.rx sysctl tree */
 	sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
 	    CTLFLAG_RD, 0, "");
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		if (hn_trust_hosttcp)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
 		if (hn_trust_hostudp)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
 		if (hn_trust_hostip)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
 		rxr->hn_ifp = sc->hn_ifp;
 		rxr->hn_rx_idx = i;
 
 		/*
 		 * Initialize LRO.
 		 */
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 		tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 0);
 #else
 		tcp_lro_init(&rxr->hn_lro);
 		rxr->hn_lro.ifp = sc->hn_ifp;
 #endif
 #if __FreeBSD_version >= 1100099
 		rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
 		rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
 #endif
 #endif	/* INET || INET6 */
 
 		if (sc->hn_rx_sysctl_tree != NULL) {
 			char name[16];
 
 			/*
 			 * Create per RX ring sysctl tree:
 			 * dev.hn.UNIT.rx.RINGID
 			 */
 			snprintf(name, sizeof(name), "%d", i);
 			rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
 			    SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
 			    OID_AUTO, name, CTLFLAG_RD, 0, "");
 
 			if (rxr->hn_rx_sysctl_tree != NULL) {
 				SYSCTL_ADD_ULONG(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "packets", CTLFLAG_RW,
 				    &rxr->hn_pkts, "# of packets received");
 			}
 		}
 	}
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
 	    CTLTYPE_U64 | CTLFLAG_RW, sc,
 	    __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
 	    hn_rx_stat_u64_sysctl, "LU", "LRO queued");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
 	    CTLTYPE_U64 | CTLFLAG_RW, sc,
 	    __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
 	    hn_rx_stat_u64_sysctl, "LU", "LRO flushed");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
 	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
 	    __offsetof(struct hn_rx_ring, hn_lro_tried),
 	    hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
 #if __FreeBSD_version >= 1100099
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
 	    CTLTYPE_UINT | CTLFLAG_RW, sc, 0, hn_lro_lenlim_sysctl, "IU",
 	    "Max # of data bytes to be aggregated by LRO");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
 	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_ackcnt_sysctl, "I",
 	    "Max # of ACKs to be aggregated by LRO");
 #endif
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
 	    CTLTYPE_INT | CTLFLAG_RW, sc, HN_TRUST_HCSUM_TCP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust tcp segement verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
 	    CTLTYPE_INT | CTLFLAG_RW, sc, HN_TRUST_HCSUM_UDP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust udp datagram verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
 	    CTLTYPE_INT | CTLFLAG_RW, sc, HN_TRUST_HCSUM_IP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust ip packet verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
 	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_ip),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
 	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_tcp),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
 	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_udp),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
 	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_trusted),
 	    hn_rx_stat_ulong_sysctl, "LU",
 	    "# of packets that we trust host's csum verification");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
 	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
 	    __offsetof(struct hn_rx_ring, hn_small_pkts),
 	    hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
 	    CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
 	    CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
 }
 
 static void
 hn_destroy_rx_data(struct hn_softc *sc)
 {
 #if defined(INET) || defined(INET6)
 	int i;
 #endif
 
 	if (sc->hn_rx_ring_cnt == 0)
 		return;
 
 #if defined(INET) || defined(INET6)
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
 		tcp_lro_free(&sc->hn_rx_ring[i].hn_lro);
 #endif
 	free(sc->hn_rx_ring, M_NETVSC);
 	sc->hn_rx_ring = NULL;
 
 	sc->hn_rx_ring_cnt = 0;
 	sc->hn_rx_ring_inuse = 0;
 }
 
 static int
 hn_create_tx_ring(struct hn_softc *sc, int id)
 {
 	struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
 	bus_dma_tag_t parent_dtag;
 	int error, i;
 
 	txr->hn_sc = sc;
 	txr->hn_tx_idx = id;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
 #endif
 	mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
 
 	txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
 	txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
 	    M_NETVSC, M_WAITOK | M_ZERO);
 #ifndef HN_USE_TXDESC_BUFRING
 	SLIST_INIT(&txr->hn_txlist);
 #else
 	txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_NETVSC,
 	    M_WAITOK, &txr->hn_tx_lock);
 #endif
 
 	txr->hn_tx_taskq = sc->hn_tx_taskq;
 
 	if (hn_use_if_start) {
 		txr->hn_txeof = hn_start_txeof;
 		TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
 		TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
 	} else {
 		txr->hn_txeof = hn_xmit_txeof;
 		TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
 		TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
 		txr->hn_mbuf_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_NETVSC,
 		    M_WAITOK, &txr->hn_tx_lock);
 	}
 
 	txr->hn_direct_tx_size = hn_direct_tx_size;
 	if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1)
 		txr->hn_csum_assist = HN_CSUM_ASSIST;
 	else
 		txr->hn_csum_assist = HN_CSUM_ASSIST_WIN8;
 
 	/*
 	 * Always schedule transmission instead of trying to do direct
 	 * transmission.  This one gives the best performance so far.
 	 */
 	txr->hn_sched_tx = 1;
 
 	parent_dtag = bus_get_dma_tag(sc->hn_dev);
 
 	/* DMA tag for RNDIS messages. */
 	error = bus_dma_tag_create(parent_dtag, /* parent */
 	    HN_RNDIS_MSG_ALIGN,		/* alignment */
 	    HN_RNDIS_MSG_BOUNDARY,	/* boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    HN_RNDIS_MSG_LEN,		/* maxsize */
 	    1,				/* nsegments */
 	    HN_RNDIS_MSG_LEN,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockfuncarg */
 	    &txr->hn_tx_rndis_dtag);
 	if (error) {
 		device_printf(sc->hn_dev, "failed to create rndis dmatag\n");
 		return error;
 	}
 
 	/* DMA tag for data. */
 	error = bus_dma_tag_create(parent_dtag, /* parent */
 	    1,				/* alignment */
 	    HN_TX_DATA_BOUNDARY,	/* boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    HN_TX_DATA_MAXSIZE,		/* maxsize */
 	    HN_TX_DATA_SEGCNT_MAX,	/* nsegments */
 	    HN_TX_DATA_SEGSIZE,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockfuncarg */
 	    &txr->hn_tx_data_dtag);
 	if (error) {
 		device_printf(sc->hn_dev, "failed to create data dmatag\n");
 		return error;
 	}
 
 	for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
 		struct hn_txdesc *txd = &txr->hn_txdesc[i];
 
 		txd->txr = txr;
 
 		/*
 		 * Allocate and load RNDIS messages.
 		 */
         	error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
 		    (void **)&txd->rndis_msg,
 		    BUS_DMA_WAITOK | BUS_DMA_COHERENT,
 		    &txd->rndis_msg_dmap);
 		if (error) {
 			device_printf(sc->hn_dev,
 			    "failed to allocate rndis_msg, %d\n", i);
 			return error;
 		}
 
 		error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
 		    txd->rndis_msg_dmap,
 		    txd->rndis_msg, HN_RNDIS_MSG_LEN,
 		    hn_dma_map_paddr, &txd->rndis_msg_paddr,
 		    BUS_DMA_NOWAIT);
 		if (error) {
 			device_printf(sc->hn_dev,
 			    "failed to load rndis_msg, %d\n", i);
 			bus_dmamem_free(txr->hn_tx_rndis_dtag,
 			    txd->rndis_msg, txd->rndis_msg_dmap);
 			return error;
 		}
 
 		/* DMA map for TX data. */
 		error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
 		    &txd->data_dmap);
 		if (error) {
 			device_printf(sc->hn_dev,
 			    "failed to allocate tx data dmamap\n");
 			bus_dmamap_unload(txr->hn_tx_rndis_dtag,
 			    txd->rndis_msg_dmap);
 			bus_dmamem_free(txr->hn_tx_rndis_dtag,
 			    txd->rndis_msg, txd->rndis_msg_dmap);
 			return error;
 		}
 
 		/* All set, put it to list */
 		txd->flags |= HN_TXD_FLAG_ONLIST;
 #ifndef HN_USE_TXDESC_BUFRING
 		SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
 #else
 		buf_ring_enqueue(txr->hn_txdesc_br, txd);
 #endif
 	}
 	txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
 
 	if (sc->hn_tx_sysctl_tree != NULL) {
 		struct sysctl_oid_list *child;
 		struct sysctl_ctx_list *ctx;
 		char name[16];
 
 		/*
 		 * Create per TX ring sysctl tree:
 		 * dev.hn.UNIT.tx.RINGID
 		 */
 		ctx = device_get_sysctl_ctx(sc->hn_dev);
 		child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
 
 		snprintf(name, sizeof(name), "%d", id);
 		txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
 		    name, CTLFLAG_RD, 0, "");
 
 		if (txr->hn_tx_sysctl_tree != NULL) {
 			child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
 
 			SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
 			    CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
 			    "# of available TX descs");
 			if (!hn_use_if_start) {
 				SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
 				    CTLFLAG_RD, &txr->hn_oactive, 0,
 				    "over active");
 			}
 			SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
 			    CTLFLAG_RW, &txr->hn_pkts,
 			    "# of packets transmitted");
 		}
 	}
 
 	return 0;
 }
 
 static void
 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
 {
 	struct hn_tx_ring *txr = txd->txr;
 
 	KASSERT(txd->m == NULL, ("still has mbuf installed"));
 	KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
 
 	bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap);
 	bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg,
 	    txd->rndis_msg_dmap);
 	bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
 }
 
 static void
 hn_destroy_tx_ring(struct hn_tx_ring *txr)
 {
 	struct hn_txdesc *txd;
 
 	if (txr->hn_txdesc == NULL)
 		return;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) {
 		SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
 		hn_txdesc_dmamap_destroy(txd);
 	}
 #else
 	mtx_lock(&txr->hn_tx_lock);
 	while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL)
 		hn_txdesc_dmamap_destroy(txd);
 	mtx_unlock(&txr->hn_tx_lock);
 #endif
 
 	if (txr->hn_tx_data_dtag != NULL)
 		bus_dma_tag_destroy(txr->hn_tx_data_dtag);
 	if (txr->hn_tx_rndis_dtag != NULL)
 		bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
 
 #ifdef HN_USE_TXDESC_BUFRING
 	buf_ring_free(txr->hn_txdesc_br, M_NETVSC);
 #endif
 
 	free(txr->hn_txdesc, M_NETVSC);
 	txr->hn_txdesc = NULL;
 
 	if (txr->hn_mbuf_br != NULL)
 		buf_ring_free(txr->hn_mbuf_br, M_NETVSC);
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_destroy(&txr->hn_txlist_spin);
 #endif
 	mtx_destroy(&txr->hn_tx_lock);
 }
 
 static int
 hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	int i;
 
 	sc->hn_tx_ring_cnt = ring_cnt;
 	sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
 
 	sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
 	    M_NETVSC, M_WAITOK | M_ZERO);
 
 	ctx = device_get_sysctl_ctx(sc->hn_dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
 
 	/* Create dev.hn.UNIT.tx sysctl tree */
 	sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
 	    CTLFLAG_RD, 0, "");
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		int error;
 
 		error = hn_create_tx_ring(sc, i);
 		if (error)
 			return error;
 	}
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
 	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
 	    __offsetof(struct hn_tx_ring, hn_no_txdescs),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
 	    __offsetof(struct hn_tx_ring, hn_send_failed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
 	    __offsetof(struct hn_tx_ring, hn_txdma_failed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
 	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_collapsed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
 	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_chimney),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
 	    CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
 	    "# of total TX descs");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
 	    CTLFLAG_RD, &sc->hn_tx_chimney_max, 0,
 	    "Chimney send packet size upper boundary");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
 	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_tx_chimney_size_sysctl,
 	    "I", "Chimney send packet size limit");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
 	    CTLTYPE_INT | CTLFLAG_RW, sc,
 	    __offsetof(struct hn_tx_ring, hn_direct_tx_size),
 	    hn_tx_conf_int_sysctl, "I",
 	    "Size of the packet for direct transmission");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
 	    CTLTYPE_INT | CTLFLAG_RW, sc,
 	    __offsetof(struct hn_tx_ring, hn_sched_tx),
 	    hn_tx_conf_int_sysctl, "I",
 	    "Always schedule transmission "
 	    "instead of doing direct transmission");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
 	    CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
 	    CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
 
 	return 0;
 }
 
 static void
 hn_set_tx_chimney_size(struct hn_softc *sc, int chimney_size)
 {
 	int i;
 
 	NV_LOCK(sc);
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		sc->hn_tx_ring[i].hn_tx_chimney_size = chimney_size;
 	NV_UNLOCK(sc);
 }
 
 static void
 hn_destroy_tx_data(struct hn_softc *sc)
 {
 	int i;
 
 	if (sc->hn_tx_ring_cnt == 0)
 		return;
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
 		hn_destroy_tx_ring(&sc->hn_tx_ring[i]);
 
 	free(sc->hn_tx_ring, M_NETVSC);
 	sc->hn_tx_ring = NULL;
 
 	sc->hn_tx_ring_cnt = 0;
 	sc->hn_tx_ring_inuse = 0;
 }
 
 static void
 hn_start_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	hn_start_locked(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
 	hn_start_locked(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_stop_tx_tasks(struct hn_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
 
 		taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
 		taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 static int
 hn_xmit(struct hn_tx_ring *txr, int len)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	struct ifnet *ifp = sc->hn_ifp;
 	struct mbuf *m_head;
 
 	mtx_assert(&txr->hn_tx_lock, MA_OWNED);
 	KASSERT(hn_use_if_start == 0,
 	    ("hn_xmit is called, when if_start is enabled"));
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
 		return 0;
 
 	while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
 		struct hn_txdesc *txd;
 		int error;
 
 		if (len > 0 && m_head->m_pkthdr.len > len) {
 			/*
 			 * This sending could be time consuming; let callers
 			 * dispatch this packet sending (and sending of any
 			 * following up packets) to tx taskqueue.
 			 */
 			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
 			return 1;
 		}
 
 		txd = hn_txdesc_get(txr);
 		if (txd == NULL) {
 			txr->hn_no_txdescs++;
 			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
 			txr->hn_oactive = 1;
 			break;
 		}
 
 		error = hn_encap(txr, txd, &m_head);
 		if (error) {
 			/* Both txd and m_head are freed; discard */
 			drbr_advance(ifp, txr->hn_mbuf_br);
 			continue;
 		}
 
 		error = hn_send_pkt(ifp, txr, txd);
 		if (__predict_false(error)) {
 			/* txd is freed, but m_head is not */
 			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
 			txr->hn_oactive = 1;
 			break;
 		}
 
 		/* Sent */
 		drbr_advance(ifp, txr->hn_mbuf_br);
 	}
 	return 0;
 }
 
 static int
 hn_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	struct hn_tx_ring *txr;
 	int error, idx = 0;
 
 	/*
 	 * Select the TX ring based on flowid
 	 */
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
 	txr = &sc->hn_tx_ring[idx];
 
 	error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
 	if (error)
 		return error;
 
 	if (txr->hn_oactive)
 		return 0;
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		sched = hn_xmit(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (!sched)
 			return 0;
 	}
 do_sched:
 	taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
 	return 0;
 }
 
 static void
 hn_xmit_qflush(struct ifnet *ifp)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	int i;
 
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
 		struct mbuf *m;
 
 		mtx_lock(&txr->hn_tx_lock);
 		while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
 			m_freem(m);
 		mtx_unlock(&txr->hn_tx_lock);
 	}
 	if_qflush(ifp);
 }
 
 static void
 hn_xmit_txeof(struct hn_tx_ring *txr)
 {
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		txr->hn_oactive = 0;
 		sched = hn_xmit(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (sched) {
 			taskqueue_enqueue(txr->hn_tx_taskq,
 			    &txr->hn_tx_task);
 		}
 	} else {
 do_sched:
 		/*
 		 * Release the oactive earlier, with the hope, that
 		 * others could catch up.  The task will clear the
 		 * oactive again with the hn_tx_lock to avoid possible
 		 * races.
 		 */
 		txr->hn_oactive = 0;
 		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 static void
 hn_xmit_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	hn_xmit(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	txr->hn_oactive = 0;
 	hn_xmit(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_channel_attach(struct hn_softc *sc, struct hv_vmbus_channel *chan)
 {
 	struct hn_rx_ring *rxr;
 	int idx;
 
 	idx = chan->offer_msg.offer.sub_channel_index;
 
 	KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
 	    ("invalid channel index %d, should > 0 && < %d",
 	     idx, sc->hn_rx_ring_inuse));
 	rxr = &sc->hn_rx_ring[idx];
 	KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
 	    ("RX ring %d already attached", idx));
 	rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
 
 	chan->hv_chan_rxr = rxr;
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "link RX ring %d to channel%u\n",
 		    idx, chan->offer_msg.child_rel_id);
 	}
 
 	if (idx < sc->hn_tx_ring_inuse) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
 
 		KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
 		    ("TX ring %d already attached", idx));
 		txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
 
 		chan->hv_chan_txr = txr;
 		txr->hn_chan = chan;
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "link TX ring %d to channel%u\n",
 			    idx, chan->offer_msg.child_rel_id);
 		}
 	}
 
 	/* Bind channel to a proper CPU */
 	vmbus_channel_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus);
 }
 
 void
 netvsc_subchan_callback(struct hn_softc *sc, struct hv_vmbus_channel *chan)
 {
 
 	KASSERT(!HV_VMBUS_CHAN_ISPRIMARY(chan),
 	    ("subchannel callback on primary channel"));
 	KASSERT(chan->offer_msg.offer.sub_channel_index > 0,
 	    ("invalid channel subidx %u",
 	     chan->offer_msg.offer.sub_channel_index));
 	hn_channel_attach(sc, chan);
 }
 
 static void
 hn_tx_taskq_create(void *arg __unused)
 {
 	if (!hn_share_tx_taskq)
 		return;
 
 	hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
 	    taskqueue_thread_enqueue, &hn_tx_taskq);
 	if (hn_bind_tx_taskq >= 0) {
 		int cpu = hn_bind_tx_taskq;
 		cpuset_t cpu_set;
 
 		if (cpu > mp_ncpus - 1)
 			cpu = mp_ncpus - 1;
 		CPU_SETOF(cpu, &cpu_set);
 		taskqueue_start_threads_cpuset(&hn_tx_taskq, 1, PI_NET,
 		    &cpu_set, "hn tx");
 	} else {
 		taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx");
 	}
 }
 SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_FIRST,
     hn_tx_taskq_create, NULL);
 
 static void
 hn_tx_taskq_destroy(void *arg __unused)
 {
 	if (hn_tx_taskq != NULL)
 		taskqueue_free(hn_tx_taskq);
 }
 SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_FIRST,
     hn_tx_taskq_destroy, NULL);
 
 static device_method_t netvsc_methods[] = {
         /* Device interface */
         DEVMETHOD(device_probe,         netvsc_probe),
         DEVMETHOD(device_attach,        netvsc_attach),
         DEVMETHOD(device_detach,        netvsc_detach),
         DEVMETHOD(device_shutdown,      netvsc_shutdown),
 
         { 0, 0 }
 };
 
 static driver_t netvsc_driver = {
         NETVSC_DEVNAME,
         netvsc_methods,
         sizeof(hn_softc_t)
 };
 
 static devclass_t netvsc_devclass;
 
 DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0);
 MODULE_VERSION(hn, 1);
 MODULE_DEPEND(hn, vmbus, 1, 1, 1);
Index: head/sys/dev/hyperv/netvsc/hv_rndis.h
===================================================================
--- head/sys/dev/hyperv/netvsc/hv_rndis.h	(revision 298445)
+++ head/sys/dev/hyperv/netvsc/hv_rndis.h	(revision 298446)
@@ -1,1081 +1,1081 @@
 /*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __HV_RNDIS_H__
 #define __HV_RNDIS_H__
 
 
 /*
  * NDIS protocol version numbers
  */
 #define NDIS_VERSION_5_0                        0x00050000
 #define NDIS_VERSION_5_1                        0x00050001
 #define NDIS_VERSION_6_0                        0x00060000
 #define NDIS_VERSION_6_1                        0x00060001
 #define NDIS_VERSION_6_30                       0x0006001e
 
 #define NDIS_VERSION                            (NDIS_VERSION_5_1)
 
 /*
  * Status codes
  */
 
 #define STATUS_SUCCESS                          (0x00000000L)
 #define STATUS_UNSUCCESSFUL                     (0xC0000001L)
 #define STATUS_PENDING                          (0x00000103L)
 #define STATUS_INSUFFICIENT_RESOURCES           (0xC000009AL)
 #define STATUS_BUFFER_OVERFLOW                  (0x80000005L)
 #define STATUS_NOT_SUPPORTED                    (0xC00000BBL)
 
 #define RNDIS_STATUS_SUCCESS                    (STATUS_SUCCESS)
 #define RNDIS_STATUS_PENDING                    (STATUS_PENDING)
 #define RNDIS_STATUS_NOT_RECOGNIZED             (0x00010001L)
 #define RNDIS_STATUS_NOT_COPIED                 (0x00010002L)
 #define RNDIS_STATUS_NOT_ACCEPTED               (0x00010003L)
 #define RNDIS_STATUS_CALL_ACTIVE                (0x00010007L)
 
 #define RNDIS_STATUS_ONLINE                     (0x40010003L)
 #define RNDIS_STATUS_RESET_START                (0x40010004L)
 #define RNDIS_STATUS_RESET_END                  (0x40010005L)
 #define RNDIS_STATUS_RING_STATUS                (0x40010006L)
 #define RNDIS_STATUS_CLOSED                     (0x40010007L)
 #define RNDIS_STATUS_WAN_LINE_UP                (0x40010008L)
 #define RNDIS_STATUS_WAN_LINE_DOWN              (0x40010009L)
 #define RNDIS_STATUS_WAN_FRAGMENT               (0x4001000AL)
 #define RNDIS_STATUS_MEDIA_CONNECT              (0x4001000BL)
 #define RNDIS_STATUS_MEDIA_DISCONNECT           (0x4001000CL)
 #define RNDIS_STATUS_HARDWARE_LINE_UP           (0x4001000DL)
 #define RNDIS_STATUS_HARDWARE_LINE_DOWN         (0x4001000EL)
 #define RNDIS_STATUS_INTERFACE_UP               (0x4001000FL)
 #define RNDIS_STATUS_INTERFACE_DOWN             (0x40010010L)
 #define RNDIS_STATUS_MEDIA_BUSY                 (0x40010011L)
 #define RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION  (0x40010012L)
 #define RNDIS_STATUS_WW_INDICATION        RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION
 #define RNDIS_STATUS_LINK_SPEED_CHANGE          (0x40010013L)
 
 #define RNDIS_STATUS_NOT_RESETTABLE             (0x80010001L)
 #define RNDIS_STATUS_SOFT_ERRORS                (0x80010003L)
 #define RNDIS_STATUS_HARD_ERRORS                (0x80010004L)
 #define RNDIS_STATUS_BUFFER_OVERFLOW            (STATUS_BUFFER_OVERFLOW)
 
 #define RNDIS_STATUS_FAILURE                    (STATUS_UNSUCCESSFUL)
 #define RNDIS_STATUS_RESOURCES                  (STATUS_INSUFFICIENT_RESOURCES)
 #define RNDIS_STATUS_CLOSING                    (0xC0010002L)
 #define RNDIS_STATUS_BAD_VERSION                (0xC0010004L)
 #define RNDIS_STATUS_BAD_CHARACTERISTICS        (0xC0010005L)
 #define RNDIS_STATUS_ADAPTER_NOT_FOUND          (0xC0010006L)
 #define RNDIS_STATUS_OPEN_FAILED                (0xC0010007L)
 #define RNDIS_STATUS_DEVICE_FAILED              (0xC0010008L)
 #define RNDIS_STATUS_MULTICAST_FULL             (0xC0010009L)
 #define RNDIS_STATUS_MULTICAST_EXISTS           (0xC001000AL)
 #define RNDIS_STATUS_MULTICAST_NOT_FOUND        (0xC001000BL)
 #define RNDIS_STATUS_REQUEST_ABORTED            (0xC001000CL)
 #define RNDIS_STATUS_RESET_IN_PROGRESS          (0xC001000DL)
 #define RNDIS_STATUS_CLOSING_INDICATING         (0xC001000EL)
 #define RNDIS_STATUS_NOT_SUPPORTED              (STATUS_NOT_SUPPORTED)
 #define RNDIS_STATUS_INVALID_PACKET             (0xC001000FL)
 #define RNDIS_STATUS_OPEN_LIST_FULL             (0xC0010010L)
 #define RNDIS_STATUS_ADAPTER_NOT_READY          (0xC0010011L)
 #define RNDIS_STATUS_ADAPTER_NOT_OPEN           (0xC0010012L)
 #define RNDIS_STATUS_NOT_INDICATING             (0xC0010013L)
 #define RNDIS_STATUS_INVALID_LENGTH             (0xC0010014L)
 #define RNDIS_STATUS_INVALID_DATA               (0xC0010015L)
 #define RNDIS_STATUS_BUFFER_TOO_SHORT           (0xC0010016L)
 #define RNDIS_STATUS_INVALID_OID                (0xC0010017L)
 #define RNDIS_STATUS_ADAPTER_REMOVED            (0xC0010018L)
 #define RNDIS_STATUS_UNSUPPORTED_MEDIA          (0xC0010019L)
 #define RNDIS_STATUS_GROUP_ADDRESS_IN_USE       (0xC001001AL)
 #define RNDIS_STATUS_FILE_NOT_FOUND             (0xC001001BL)
 #define RNDIS_STATUS_ERROR_READING_FILE         (0xC001001CL)
 #define RNDIS_STATUS_ALREADY_MAPPED             (0xC001001DL)
 #define RNDIS_STATUS_RESOURCE_CONFLICT          (0xC001001EL)
 #define RNDIS_STATUS_NO_CABLE                   (0xC001001FL)
 
 #define RNDIS_STATUS_INVALID_SAP                (0xC0010020L)
 #define RNDIS_STATUS_SAP_IN_USE                 (0xC0010021L)
 #define RNDIS_STATUS_INVALID_ADDRESS            (0xC0010022L)
 #define RNDIS_STATUS_VC_NOT_ACTIVATED           (0xC0010023L)
 #define RNDIS_STATUS_DEST_OUT_OF_ORDER          (0xC0010024L)
 #define RNDIS_STATUS_VC_NOT_AVAILABLE           (0xC0010025L)
 #define RNDIS_STATUS_CELLRATE_NOT_AVAILABLE     (0xC0010026L)
 #define RNDIS_STATUS_INCOMPATABLE_QOS           (0xC0010027L)
 #define RNDIS_STATUS_AAL_PARAMS_UNSUPPORTED     (0xC0010028L)
 #define RNDIS_STATUS_NO_ROUTE_TO_DESTINATION    (0xC0010029L)
 
 #define RNDIS_STATUS_TOKEN_RING_OPEN_ERROR      (0xC0011000L)
 
 
 /*
  * Object Identifiers used by NdisRequest Query/Set Information
  */
 
 /*
  * General Objects
  */
 
 #define RNDIS_OID_GEN_SUPPORTED_LIST                    0x00010101
 #define RNDIS_OID_GEN_HARDWARE_STATUS                   0x00010102
 #define RNDIS_OID_GEN_MEDIA_SUPPORTED                   0x00010103
 #define RNDIS_OID_GEN_MEDIA_IN_USE                      0x00010104
 #define RNDIS_OID_GEN_MAXIMUM_LOOKAHEAD                 0x00010105
 #define RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE                0x00010106
 #define RNDIS_OID_GEN_LINK_SPEED                        0x00010107
 #define RNDIS_OID_GEN_TRANSMIT_BUFFER_SPACE             0x00010108
 #define RNDIS_OID_GEN_RECEIVE_BUFFER_SPACE              0x00010109
 #define RNDIS_OID_GEN_TRANSMIT_BLOCK_SIZE               0x0001010A
 #define RNDIS_OID_GEN_RECEIVE_BLOCK_SIZE                0x0001010B
 #define RNDIS_OID_GEN_VENDOR_ID                         0x0001010C
 #define RNDIS_OID_GEN_VENDOR_DESCRIPTION                0x0001010D
 #define RNDIS_OID_GEN_CURRENT_PACKET_FILTER             0x0001010E
 #define RNDIS_OID_GEN_CURRENT_LOOKAHEAD                 0x0001010F
 #define RNDIS_OID_GEN_DRIVER_VERSION                    0x00010110
 #define RNDIS_OID_GEN_MAXIMUM_TOTAL_SIZE                0x00010111
 #define RNDIS_OID_GEN_PROTOCOL_OPTIONS                  0x00010112
 #define RNDIS_OID_GEN_MAC_OPTIONS                       0x00010113
 #define RNDIS_OID_GEN_MEDIA_CONNECT_STATUS              0x00010114
 #define RNDIS_OID_GEN_MAXIMUM_SEND_PACKETS              0x00010115
 #define RNDIS_OID_GEN_VENDOR_DRIVER_VERSION             0x00010116
 #define RNDIS_OID_GEN_NETWORK_LAYER_ADDRESSES           0x00010118
 #define RNDIS_OID_GEN_TRANSPORT_HEADER_OFFSET           0x00010119
 #define RNDIS_OID_GEN_MACHINE_NAME                      0x0001021A
 #define RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER            0x0001021B
 
 /*
  * For receive side scale
  */
 /* Query only */
 #define RNDIS_OID_GEN_RSS_CAPABILITIES			0x00010203
 /* Query and set */
 #define RNDIS_OID_GEN_RSS_PARAMETERS			0x00010204
 
 #define RNDIS_OID_GEN_XMIT_OK                           0x00020101
 #define RNDIS_OID_GEN_RCV_OK                            0x00020102
 #define RNDIS_OID_GEN_XMIT_ERROR                        0x00020103
 #define RNDIS_OID_GEN_RCV_ERROR                         0x00020104
 #define RNDIS_OID_GEN_RCV_NO_BUFFER                     0x00020105
 
 #define RNDIS_OID_GEN_DIRECTED_BYTES_XMIT               0x00020201
 #define RNDIS_OID_GEN_DIRECTED_FRAMES_XMIT              0x00020202
 #define RNDIS_OID_GEN_MULTICAST_BYTES_XMIT              0x00020203
 #define RNDIS_OID_GEN_MULTICAST_FRAMES_XMIT             0x00020204
 #define RNDIS_OID_GEN_BROADCAST_BYTES_XMIT              0x00020205
 #define RNDIS_OID_GEN_BROADCAST_FRAMES_XMIT             0x00020206
 #define RNDIS_OID_GEN_DIRECTED_BYTES_RCV                0x00020207
 #define RNDIS_OID_GEN_DIRECTED_FRAMES_RCV               0x00020208
 #define RNDIS_OID_GEN_MULTICAST_BYTES_RCV               0x00020209
 #define RNDIS_OID_GEN_MULTICAST_FRAMES_RCV              0x0002020A
 #define RNDIS_OID_GEN_BROADCAST_BYTES_RCV               0x0002020B
 #define RNDIS_OID_GEN_BROADCAST_FRAMES_RCV              0x0002020C
 
 #define RNDIS_OID_GEN_RCV_CRC_ERROR                     0x0002020D
 #define RNDIS_OID_GEN_TRANSMIT_QUEUE_LENGTH             0x0002020E
 
 #define RNDIS_OID_GEN_GET_TIME_CAPS                     0x0002020F
 #define RNDIS_OID_GEN_GET_NETCARD_TIME                  0x00020210
 
 /*
  * These are connection-oriented general OIDs.
  * These replace the above OIDs for connection-oriented media.
  */
 #define RNDIS_OID_GEN_CO_SUPPORTED_LIST                 0x00010101
 #define RNDIS_OID_GEN_CO_HARDWARE_STATUS                0x00010102
 #define RNDIS_OID_GEN_CO_MEDIA_SUPPORTED                0x00010103
 #define RNDIS_OID_GEN_CO_MEDIA_IN_USE                   0x00010104
 #define RNDIS_OID_GEN_CO_LINK_SPEED                     0x00010105
 #define RNDIS_OID_GEN_CO_VENDOR_ID                      0x00010106
 #define RNDIS_OID_GEN_CO_VENDOR_DESCRIPTION             0x00010107
 #define RNDIS_OID_GEN_CO_DRIVER_VERSION                 0x00010108
 #define RNDIS_OID_GEN_CO_PROTOCOL_OPTIONS               0x00010109
 #define RNDIS_OID_GEN_CO_MAC_OPTIONS                    0x0001010A
 #define RNDIS_OID_GEN_CO_MEDIA_CONNECT_STATUS           0x0001010B
 #define RNDIS_OID_GEN_CO_VENDOR_DRIVER_VERSION          0x0001010C
 #define RNDIS_OID_GEN_CO_MINIMUM_LINK_SPEED             0x0001010D
 
 #define RNDIS_OID_GEN_CO_GET_TIME_CAPS                  0x00010201
 #define RNDIS_OID_GEN_CO_GET_NETCARD_TIME               0x00010202
 
 /*
  * These are connection-oriented statistics OIDs.
  */
 #define RNDIS_OID_GEN_CO_XMIT_PDUS_OK                   0x00020101
 #define RNDIS_OID_GEN_CO_RCV_PDUS_OK                    0x00020102
 #define RNDIS_OID_GEN_CO_XMIT_PDUS_ERROR                0x00020103
 #define RNDIS_OID_GEN_CO_RCV_PDUS_ERROR                 0x00020104
 #define RNDIS_OID_GEN_CO_RCV_PDUS_NO_BUFFER             0x00020105
 
 
 #define RNDIS_OID_GEN_CO_RCV_CRC_ERROR                  0x00020201
 #define RNDIS_OID_GEN_CO_TRANSMIT_QUEUE_LENGTH          0x00020202
 #define RNDIS_OID_GEN_CO_BYTES_XMIT                     0x00020203
 #define RNDIS_OID_GEN_CO_BYTES_RCV                      0x00020204
 #define RNDIS_OID_GEN_CO_BYTES_XMIT_OUTSTANDING         0x00020205
 #define RNDIS_OID_GEN_CO_NETCARD_LOAD                   0x00020206
 
 /*
  * These are objects for Connection-oriented media call-managers.
  */
 #define RNDIS_OID_CO_ADD_PVC                            0xFF000001
 #define RNDIS_OID_CO_DELETE_PVC                         0xFF000002
 #define RNDIS_OID_CO_GET_CALL_INFORMATION               0xFF000003
 #define RNDIS_OID_CO_ADD_ADDRESS                        0xFF000004
 #define RNDIS_OID_CO_DELETE_ADDRESS                     0xFF000005
 #define RNDIS_OID_CO_GET_ADDRESSES                      0xFF000006
 #define RNDIS_OID_CO_ADDRESS_CHANGE                     0xFF000007
 #define RNDIS_OID_CO_SIGNALING_ENABLED                  0xFF000008
 #define RNDIS_OID_CO_SIGNALING_DISABLED                 0xFF000009
 
 
 /*
  * 802.3 Objects (Ethernet)
  */
 
 #define RNDIS_OID_802_3_PERMANENT_ADDRESS               0x01010101
 #define RNDIS_OID_802_3_CURRENT_ADDRESS                 0x01010102
 #define RNDIS_OID_802_3_MULTICAST_LIST                  0x01010103
 #define RNDIS_OID_802_3_MAXIMUM_LIST_SIZE               0x01010104
 #define RNDIS_OID_802_3_MAC_OPTIONS                     0x01010105
 
 /*
  *
  */
 #define NDIS_802_3_MAC_OPTION_PRIORITY                  0x00000001
 
 #define RNDIS_OID_802_3_RCV_ERROR_ALIGNMENT             0x01020101
 #define RNDIS_OID_802_3_XMIT_ONE_COLLISION              0x01020102
 #define RNDIS_OID_802_3_XMIT_MORE_COLLISIONS            0x01020103
 
 #define RNDIS_OID_802_3_XMIT_DEFERRED                   0x01020201
 #define RNDIS_OID_802_3_XMIT_MAX_COLLISIONS             0x01020202
 #define RNDIS_OID_802_3_RCV_OVERRUN                     0x01020203
 #define RNDIS_OID_802_3_XMIT_UNDERRUN                   0x01020204
 #define RNDIS_OID_802_3_XMIT_HEARTBEAT_FAILURE          0x01020205
 #define RNDIS_OID_802_3_XMIT_TIMES_CRS_LOST             0x01020206
 #define RNDIS_OID_802_3_XMIT_LATE_COLLISIONS            0x01020207
 
 
 /*
  * RNDIS MP custom OID for test
  */
 #define OID_RNDISMP_GET_RECEIVE_BUFFERS                 0xFFA0C90D // Query only
 
 
 /*
  * Remote NDIS message types
  */
 #define REMOTE_NDIS_PACKET_MSG                          0x00000001
 #define REMOTE_NDIS_INITIALIZE_MSG                      0x00000002
 #define REMOTE_NDIS_HALT_MSG                            0x00000003
 #define REMOTE_NDIS_QUERY_MSG                           0x00000004
 #define REMOTE_NDIS_SET_MSG                             0x00000005
 #define REMOTE_NDIS_RESET_MSG                           0x00000006
 #define REMOTE_NDIS_INDICATE_STATUS_MSG                 0x00000007
 #define REMOTE_NDIS_KEEPALIVE_MSG                       0x00000008
 
 #define REMOTE_CONDIS_MP_CREATE_VC_MSG                  0x00008001
 #define REMOTE_CONDIS_MP_DELETE_VC_MSG                  0x00008002
 #define REMOTE_CONDIS_MP_ACTIVATE_VC_MSG                0x00008005
 #define REMOTE_CONDIS_MP_DEACTIVATE_VC_MSG              0x00008006
 #define REMOTE_CONDIS_INDICATE_STATUS_MSG               0x00008007
 
 /*
  * Remote NDIS message completion types
  */
 #define REMOTE_NDIS_INITIALIZE_CMPLT                    0x80000002
 #define REMOTE_NDIS_QUERY_CMPLT                         0x80000004
 #define REMOTE_NDIS_SET_CMPLT                           0x80000005
 #define REMOTE_NDIS_RESET_CMPLT                         0x80000006
 #define REMOTE_NDIS_KEEPALIVE_CMPLT                     0x80000008
 
 #define REMOTE_CONDIS_MP_CREATE_VC_CMPLT                0x80008001
 #define REMOTE_CONDIS_MP_DELETE_VC_CMPLT                0x80008002
 #define REMOTE_CONDIS_MP_ACTIVATE_VC_CMPLT              0x80008005
 #define REMOTE_CONDIS_MP_DEACTIVATE_VC_CMPLT            0x80008006
 
 /*
  * Reserved message type for private communication between lower-layer
  * host driver and remote device, if necessary.
  */
 #define REMOTE_NDIS_BUS_MSG                             0xff000001
 
 /*
  * Defines for DeviceFlags in rndis_initialize_complete
  */
 #define RNDIS_DF_CONNECTIONLESS                         0x00000001
 #define RNDIS_DF_CONNECTION_ORIENTED                    0x00000002
 #define RNDIS_DF_RAW_DATA                               0x00000004
 
 /*
  * Remote NDIS medium types.
  */
 #define RNDIS_MEDIUM_802_3                              0x00000000
 #define RNDIS_MEDIUM_802_5                              0x00000001
 #define RNDIS_MEDIUM_FDDI                               0x00000002
 #define RNDIS_MEDIUM_WAN                                0x00000003
 #define RNDIS_MEDIUM_LOCAL_TALK                         0x00000004
 #define RNDIS_MEDIUM_ARCNET_RAW                         0x00000006
 #define RNDIS_MEDIUM_ARCNET_878_2                       0x00000007
 #define RNDIS_MEDIUM_ATM                                0x00000008
 #define RNDIS_MEDIUM_WIRELESS_WAN                       0x00000009
 #define RNDIS_MEDIUM_IRDA                               0x0000000a
 #define RNDIS_MEDIUM_CO_WAN                             0x0000000b
 /* Not a real medium, defined as an upper bound */
 #define RNDIS_MEDIUM_MAX                                0x0000000d
 
 /*
  * Remote NDIS medium connection states.
  */
 #define RNDIS_MEDIA_STATE_CONNECTED                     0x00000000
 #define RNDIS_MEDIA_STATE_DISCONNECTED                  0x00000001
 
 /*
  * Remote NDIS version numbers
  */
 #define RNDIS_MAJOR_VERSION                             0x00000001
 #define RNDIS_MINOR_VERSION                             0x00000000
 
 
 /*
  * Remote NDIS offload parameters
  */
 #define RNDIS_OBJECT_TYPE_DEFAULT			0x80
  
 #define RNDIS_OFFLOAD_PARAMETERS_REVISION_3		3
 #define RNDIS_OFFLOAD_PARAMETERS_NO_CHANGE		0
 #define RNDIS_OFFLOAD_PARAMETERS_LSOV2_DISABLED		1
 #define RNDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED 		2
 #define RNDIS_OFFLOAD_PARAMETERS_LSOV1_ENABLED		2
 #define RNDIS_OFFLOAD_PARAMETERS_RSC_DISABLED		1
 #define RNDIS_OFFLOAD_PARAMETERS_RSC_ENABLED		2
 #define RNDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED		1
 #define RNDIS_OFFLOAD_PARAMETERS_TX_ENABLED_RX_DISABLED	2
 #define RNDIS_OFFLOAD_PARAMETERS_RX_ENABLED_TX_DISABLED	3
 #define RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED		4
 
 #define RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE		1
 #define RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4		0
 #define RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6		1
 
 
 #define RNDIS_OID_TCP_OFFLOAD_CURRENT_CONFIG		0xFC01020B /* query only */
 #define RNDIS_OID_TCP_OFFLOAD_PARAMETERS		0xFC01020C /* set only */
 #define RNDIS_OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES	0xFC01020D/* query only */
 #define RNDIS_OID_TCP_CONNECTION_OFFLOAD_CURRENT_CONFIG	0xFC01020E /* query only */
 #define RNDIS_OID_TCP_CONNECTION_OFFLOAD_HARDWARE_CAPABILITIES	0xFC01020F /* query */
 #define RNDIS_OID_OFFLOAD_ENCAPSULATION			0x0101010A /* set/query */
 
 /*
  * NdisInitialize message
  */
 typedef struct rndis_initialize_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     uint32_t                                major_version;
     uint32_t                                minor_version;
     uint32_t                                max_xfer_size;
 } rndis_initialize_request;
 
 /*
  * Response to NdisInitialize
  */
 typedef struct rndis_initialize_complete_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS status */
     uint32_t                                status;
     uint32_t                                major_version;
     uint32_t                                minor_version;
     uint32_t                                device_flags;
     /* RNDIS medium */
     uint32_t                                medium;
     uint32_t                                max_pkts_per_msg;
     uint32_t                                max_xfer_size;
     uint32_t                                pkt_align_factor;
     uint32_t                                af_list_offset;
     uint32_t                                af_list_size;
 } rndis_initialize_complete;
 
 /*
  * Call manager devices only: Information about an address family
  * supported by the device is appended to the response to NdisInitialize.
  */
 typedef struct rndis_co_address_family_ {
     /* RNDIS AF */
     uint32_t                                address_family;
     uint32_t                                major_version;
     uint32_t                                minor_version;
 } rndis_co_address_family;
 
 /*
  * NdisHalt message
  */
 typedef struct rndis_halt_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
 } rndis_halt_request;
 
 /*
  * NdisQueryRequest message
  */
 typedef struct rndis_query_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS OID */
     uint32_t                                oid;
     uint32_t                                info_buffer_length;
     uint32_t                                info_buffer_offset;
     /* RNDIS handle */
     uint32_t                                device_vc_handle;
 } rndis_query_request;
 
 /*
  * Response to NdisQueryRequest
  */
 typedef struct rndis_query_complete_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS status */
     uint32_t                                status;
     uint32_t                                info_buffer_length;
     uint32_t                                info_buffer_offset;
 } rndis_query_complete;
 
 /*
  * NdisSetRequest message
  */
 typedef struct rndis_set_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS OID */
     uint32_t                                oid;
     uint32_t                                info_buffer_length;
     uint32_t                                info_buffer_offset;
     /* RNDIS handle */
     uint32_t                                device_vc_handle;
 } rndis_set_request;
 
 /*
  * Response to NdisSetRequest
  */
 typedef struct rndis_set_complete_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS status */
     uint32_t                                status;
 } rndis_set_complete;
 
 /*
  * NdisReset message
  */
 typedef struct rndis_reset_request_ {
     uint32_t                                reserved;
 } rndis_reset_request;
 
 /*
  * Response to NdisReset
  */
 typedef struct rndis_reset_complete_ {
     /* RNDIS status */
     uint32_t                                status;
     uint32_t                                addressing_reset;
 } rndis_reset_complete;
 
 /*
  * NdisMIndicateStatus message
  */
 typedef struct rndis_indicate_status_ {
     /* RNDIS status */
     uint32_t                                status;
     uint32_t                                status_buf_length;
     uint32_t                                status_buf_offset;
 } rndis_indicate_status;
 
 /*
  * Diagnostic information passed as the status buffer in
  * rndis_indicate_status messages signifying error conditions.
  */
 typedef struct rndis_diagnostic_info_ {
     /* RNDIS status */
     uint32_t                                diag_status;
     uint32_t                                error_offset;
 } rndis_diagnostic_info;
 
 /*
  * NdisKeepAlive message
  */
 typedef struct rndis_keepalive_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
 } rndis_keepalive_request;
 
 /*
  * Response to NdisKeepAlive
  */  
 typedef struct rndis_keepalive_complete_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS status */
     uint32_t                                status;
 } rndis_keepalive_complete;
 
 /*
  * Data message. All offset fields contain byte offsets from the beginning
  * of the rndis_packet structure. All length fields are in bytes.
  * VcHandle is set to 0 for connectionless data, otherwise it
  * contains the VC handle.
  */
 typedef struct rndis_packet_ {
     uint32_t                                data_offset;
     uint32_t                                data_length;
     uint32_t                                oob_data_offset;
     uint32_t                                oob_data_length;
     uint32_t                                num_oob_data_elements;
     uint32_t                                per_pkt_info_offset;
     uint32_t                                per_pkt_info_length;
     /* RNDIS handle */
     uint32_t                                vc_handle;
     uint32_t                                reserved;
 } rndis_packet;
 
 typedef struct rndis_packet_ex_ {
     uint32_t                                data_offset;
     uint32_t                                data_length;
     uint32_t                                oob_data_offset;
     uint32_t                                oob_data_length;
     uint32_t                                num_oob_data_elements;
     uint32_t                                per_pkt_info_offset;
     uint32_t                                per_pkt_info_length;
     /* RNDIS handle */
     uint32_t                                vc_handle;
     uint32_t                                reserved;
     uint64_t                                data_buf_id;
     uint32_t                                data_buf_offset;
     uint64_t                                next_header_buf_id;
     uint32_t                                next_header_byte_offset;
     uint32_t                                next_header_byte_count;
 } rndis_packet_ex;
 
 /*
  * Optional Out of Band data associated with a Data message.
  */
 typedef struct rndis_oobd_ {
     uint32_t                                size;
     /* RNDIS class ID */
     uint32_t                                type;
     uint32_t                                class_info_offset;
 } rndis_oobd;
 
 /*
  * Packet extension field contents associated with a Data message.
  */
 typedef struct rndis_per_packet_info_ {
     uint32_t                                size;
     uint32_t                                type;
     uint32_t                                per_packet_info_offset;
 } rndis_per_packet_info;
 
 typedef enum ndis_per_pkt_infotype_ {
 	tcpip_chksum_info,
 	ipsec_info,
 	tcp_large_send_info,
 	classification_handle_info,
 	ndis_reserved,
 	sgl_info,
 	ieee_8021q_info,
 	original_pkt_info,
 	pkt_cancel_id,
 	original_netbuf_list,
 	cached_netbuf_list,
 	short_pkt_padding_info,
 	max_perpkt_info
 } ndis_per_pkt_infotype;
 
 #define nbl_hash_value	pkt_cancel_id
 
 typedef struct ndis_8021q_info_ {
 	union {
 		struct {
 			uint32_t   user_pri : 3;  /* User Priority */
 			uint32_t   cfi      : 1;  /* Canonical Format ID */
 			uint32_t   vlan_id  : 12;
 			uint32_t   reserved : 16;
 		} s1;
 		uint32_t    value;
 	} u1;
 } ndis_8021q_info;
 
 struct ndis_hash_info {
 	uint32_t	hash;
 } __packed;
 
 struct rndis_object_header {
 	uint8_t type;
 	uint8_t revision;
 	uint16_t size;
 };
 
 typedef struct rndis_offload_params_ {
 	struct rndis_object_header header;
 	uint8_t ipv4_csum;
 	uint8_t tcp_ipv4_csum;
 	uint8_t udp_ipv4_csum;
 	uint8_t tcp_ipv6_csum;
 	uint8_t udp_ipv6_csum;
 	uint8_t lso_v1;
 	uint8_t ip_sec_v1;
 	uint8_t lso_v2_ipv4;
 	uint8_t lso_v2_ipv6;
 	uint8_t tcp_connection_ipv4;
 	uint8_t tcp_connection_ipv6;
 	uint32_t flags;
 	uint8_t ip_sec_v2;
 	uint8_t ip_sec_v2_ipv4;
 	struct {
 		uint8_t rsc_ipv4;
 		uint8_t rsc_ipv6;
 	};
 	struct {
 		uint8_t encapsulated_packet_task_offload;
 		uint8_t encapsulation_types;
 	};
 
 } rndis_offload_params;
 
 
 typedef struct rndis_tcp_ip_csum_info_ {
 	union {
 		struct {
 			uint32_t is_ipv4:1;
 			uint32_t is_ipv6:1;
 			uint32_t tcp_csum:1;
 			uint32_t udp_csum:1;
 			uint32_t ip_header_csum:1;
 			uint32_t reserved:11;
 			uint32_t tcp_header_offset:10;
 		} xmit;
 		struct {
 			uint32_t tcp_csum_failed:1;
 			uint32_t udp_csum_failed:1;
 			uint32_t ip_csum_failed:1;
 			uint32_t tcp_csum_succeeded:1;
 			uint32_t udp_csum_succeeded:1;
 			uint32_t ip_csum_succeeded:1;
 			uint32_t loopback:1;
 			uint32_t tcp_csum_value_invalid:1;
 			uint32_t ip_csum_value_invalid:1;
 		} receive;
 		uint32_t  value;
 	};
 } rndis_tcp_ip_csum_info;
 
 typedef struct rndis_tcp_tso_info_ {
 	union {
 		struct {
 			uint32_t unused:30;
 			uint32_t type:1;
 			uint32_t reserved2:1;
 		} xmit;
 		struct {
 			uint32_t mss:20;
 			uint32_t tcp_header_offset:10;
 			uint32_t type:1;
 			uint32_t reserved2:1;
 		} lso_v1_xmit;
 		struct {
 			uint32_t tcp_payload:30;
 			uint32_t type:1;
 			uint32_t reserved2:1;
 		} lso_v1_xmit_complete;
 		struct {
 			uint32_t mss:20;
 			uint32_t tcp_header_offset:10;
 			uint32_t type:1;
 			uint32_t ip_version:1;
 		} lso_v2_xmit;
 		struct {
 			uint32_t reserved:30;
 			uint32_t type:1;
 			uint32_t reserved2:1;
 		} lso_v2_xmit_complete;
 		uint32_t  value;
 	};
 } rndis_tcp_tso_info;
 
 #define RNDIS_HASH_PPI_SIZE	(sizeof(rndis_per_packet_info) + \
 				sizeof(struct ndis_hash_info))
 
 #define RNDIS_VLAN_PPI_SIZE	(sizeof(rndis_per_packet_info) + \
 				sizeof(ndis_8021q_info))
 
 #define RNDIS_CSUM_PPI_SIZE	(sizeof(rndis_per_packet_info) + \
 				sizeof(rndis_tcp_ip_csum_info))
 
 #define RNDIS_TSO_PPI_SIZE	(sizeof(rndis_per_packet_info) + \
 				sizeof(rndis_tcp_tso_info))
 
 /*
  * Format of Information buffer passed in a SetRequest for the OID
  * OID_GEN_RNDIS_CONFIG_PARAMETER.
  */
 typedef struct rndis_config_parameter_info_ {
     uint32_t                                parameter_name_offset;
     uint32_t                                parameter_name_length;
     uint32_t                                parameter_type;
     uint32_t                                parameter_value_offset;
     uint32_t                                parameter_value_length;
 } rndis_config_parameter_info;
 
 /*
  * Values for ParameterType in rndis_config_parameter_info
  */
 #define RNDIS_CONFIG_PARAM_TYPE_INTEGER     0
 #define RNDIS_CONFIG_PARAM_TYPE_STRING      2
 
 
 /*
  * CONDIS Miniport messages for connection oriented devices
  * that do not implement a call manager.
  */
 
 /*
  * CoNdisMiniportCreateVc message
  */
 typedef struct rcondis_mp_create_vc_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS handle */
     uint32_t                                ndis_vc_handle;
 } rcondis_mp_create_vc;
 
 /*
  * Response to CoNdisMiniportCreateVc
  */
 typedef struct rcondis_mp_create_vc_complete_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS handle */
     uint32_t                                device_vc_handle;
     /* RNDIS status */
     uint32_t                                status;
 } rcondis_mp_create_vc_complete;
 
 /*
  * CoNdisMiniportDeleteVc message
  */
 typedef struct rcondis_mp_delete_vc_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS handle */
     uint32_t                                device_vc_handle;
 } rcondis_mp_delete_vc;
 
 /*
  * Response to CoNdisMiniportDeleteVc
  */
 typedef struct rcondis_mp_delete_vc_complete_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS status */
     uint32_t                                status;
 } rcondis_mp_delete_vc_complete;
 
 /*
  * CoNdisMiniportQueryRequest message
  */
 typedef struct rcondis_mp_query_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS request type */
     uint32_t                                request_type;
     /* RNDIS OID */
     uint32_t                                oid;
     /* RNDIS handle */
     uint32_t                                device_vc_handle;
     uint32_t                                info_buf_length;
     uint32_t                                info_buf_offset;
 } rcondis_mp_query_request;
 
 /*
  * CoNdisMiniportSetRequest message
  */
 typedef struct rcondis_mp_set_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS request type */
     uint32_t                                request_type;
     /* RNDIS OID */
     uint32_t                                oid;
     /* RNDIS handle */
     uint32_t                                device_vc_handle;
     uint32_t                                info_buf_length;
     uint32_t                                info_buf_offset;
 } rcondis_mp_set_request;
 
 /*
  * CoNdisIndicateStatus message
  */
 typedef struct rcondis_indicate_status_ {
     /* RNDIS handle */
     uint32_t                                ndis_vc_handle;
     /* RNDIS status */
     uint32_t                                status;
     uint32_t                                status_buf_length;
     uint32_t                                status_buf_offset;
 } rcondis_indicate_status;
 
 /*
  * CONDIS Call/VC parameters
  */
 
 typedef struct rcondis_specific_parameters_ {
     uint32_t                                parameter_type;
     uint32_t                                parameter_length;
     uint32_t                                parameter_offset;
 } rcondis_specific_parameters;
 
 typedef struct rcondis_media_parameters_ {
     uint32_t                                flags;
     uint32_t                                reserved1;
     uint32_t                                reserved2;
     rcondis_specific_parameters             media_specific;
 } rcondis_media_parameters;
 
 typedef struct rndis_flowspec_ {
     uint32_t                                token_rate;
     uint32_t                                token_bucket_size;
     uint32_t                                peak_bandwidth;
     uint32_t                                latency;
     uint32_t                                delay_variation;
     uint32_t                                service_type;
     uint32_t                                max_sdu_size;
     uint32_t                                minimum_policed_size;
 } rndis_flowspec;
 
 typedef struct rcondis_call_manager_parameters_ {
     rndis_flowspec                          transmit;
     rndis_flowspec                          receive;
     rcondis_specific_parameters             call_mgr_specific;
 } rcondis_call_manager_parameters;
 
 /*
  * CoNdisMiniportActivateVc message
  */
 typedef struct rcondis_mp_activate_vc_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     uint32_t                                flags;
     /* RNDIS handle */
     uint32_t                                device_vc_handle;
     uint32_t                                media_params_offset;
     uint32_t                                media_params_length;
     uint32_t                                call_mgr_params_offset;
     uint32_t                                call_mgr_params_length;
 } rcondis_mp_activate_vc_request;
 
 /*
  * Response to CoNdisMiniportActivateVc
  */
 typedef struct rcondis_mp_activate_vc_complete_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS status */
     uint32_t                                status;
 } rcondis_mp_activate_vc_complete;
 
 /*
  * CoNdisMiniportDeactivateVc message
  */
 typedef struct rcondis_mp_deactivate_vc_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     uint32_t                                flags;
     /* RNDIS handle */
     uint32_t                                device_vc_handle;
 } rcondis_mp_deactivate_vc_request;
 
 /*
  * Response to CoNdisMiniportDeactivateVc
  */
 typedef struct rcondis_mp_deactivate_vc_complete_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS status */
     uint32_t                                status;
 } rcondis_mp_deactivate_vc_complete;
 
 /*
  * union with all of the RNDIS messages
  */
 typedef union rndis_msg_container_ {
     rndis_packet                            packet;
     rndis_initialize_request                init_request;
     rndis_halt_request                      halt_request;
     rndis_query_request                     query_request;
     rndis_set_request                       set_request;
     rndis_reset_request                     reset_request;
     rndis_keepalive_request                 keepalive_request;
     rndis_indicate_status                   indicate_status;
     rndis_initialize_complete               init_complete;
     rndis_query_complete                    query_complete;
     rndis_set_complete                      set_complete;
     rndis_reset_complete                    reset_complete;
     rndis_keepalive_complete                keepalive_complete;
     rcondis_mp_create_vc                    co_miniport_create_vc;
     rcondis_mp_delete_vc                    co_miniport_delete_vc;
     rcondis_indicate_status                 co_miniport_status;
     rcondis_mp_activate_vc_request          co_miniport_activate_vc;
     rcondis_mp_deactivate_vc_request        co_miniport_deactivate_vc;
     rcondis_mp_create_vc_complete           co_miniport_create_vc_complete;
     rcondis_mp_delete_vc_complete           co_miniport_delete_vc_complete;
     rcondis_mp_activate_vc_complete         co_miniport_activate_vc_complete;
     rcondis_mp_deactivate_vc_complete       co_miniport_deactivate_vc_complete;
     rndis_packet_ex                         packet_ex;
 } rndis_msg_container;
 
 /*
  * Remote NDIS message format
  */
 typedef struct rndis_msg_ {
     uint32_t                                ndis_msg_type;
 
     /*
      * Total length of this message, from the beginning
      * of the rndis_msg struct, in bytes.
      */
     uint32_t                                msg_len;
 
     /* Actual message */
     rndis_msg_container                     msg;
 } rndis_msg;
 
 
 /*
  * Handy macros
  */
 
 /*
  * get the size of an RNDIS message. Pass in the message type, 
  * rndis_set_request, rndis_packet for example
  */
 #define RNDIS_MESSAGE_SIZE(message)                             \
     (sizeof(message) + (sizeof(rndis_msg) - sizeof(rndis_msg_container)))
 
 /*
  * get pointer to info buffer with message pointer
  */
 #define MESSAGE_TO_INFO_BUFFER(message)                         \
     (((PUCHAR)(message)) + message->InformationBufferOffset)
 
 /*
  * get pointer to status buffer with message pointer
  */
 #define MESSAGE_TO_STATUS_BUFFER(message)                       \
     (((PUCHAR)(message)) + message->StatusBufferOffset)
 
 /*
  * get pointer to OOBD buffer with message pointer
  */
 #define MESSAGE_TO_OOBD_BUFFER(message)                         \
     (((PUCHAR)(message)) + message->OOBDataOffset)
 
 /*
  * get pointer to data buffer with message pointer
  */
 #define MESSAGE_TO_DATA_BUFFER(message)                         \
     (((PUCHAR)(message)) + message->PerPacketInfoOffset)
 
 /*
  * get pointer to contained message from NDIS_MESSAGE pointer
  */
 #define RNDIS_MESSAGE_PTR_TO_MESSAGE_PTR(rndis_message)         \
     ((void *) &rndis_message->Message)
 
 /*
  * get pointer to contained message from NDIS_MESSAGE pointer
  */
 #define RNDIS_MESSAGE_RAW_PTR_TO_MESSAGE_PTR(rndis_message)     \
     ((void *) rndis_message)
 
 
 
 /*
  * Structures used in OID_RNDISMP_GET_RECEIVE_BUFFERS
  */
 
 #define RNDISMP_RECEIVE_BUFFER_ELEM_FLAG_VMQ_RECEIVE_BUFFER 0x00000001
 
 typedef struct rndismp_rx_buf_elem_ {
     uint32_t                            flags;
     uint32_t                            length;
     uint64_t                            rx_buf_id;
     uint32_t                            gpadl_handle;
     void                                *rx_buf;
 } rndismp_rx_buf_elem;
 
 typedef struct rndismp_rx_bufs_info_ {
     uint32_t                            num_rx_bufs;
     rndismp_rx_buf_elem                 rx_buf_elems[1];
 } rndismp_rx_bufs_info;
 
 
 
 #define RNDIS_HEADER_SIZE (sizeof(rndis_msg) - sizeof(rndis_msg_container))
 
 #define NDIS_PACKET_TYPE_DIRECTED	0x00000001
 #define NDIS_PACKET_TYPE_MULTICAST	0x00000002
 #define NDIS_PACKET_TYPE_ALL_MULTICAST	0x00000004
 #define NDIS_PACKET_TYPE_BROADCAST	0x00000008
 #define NDIS_PACKET_TYPE_SOURCE_ROUTING	0x00000010
 #define NDIS_PACKET_TYPE_PROMISCUOUS	0x00000020
 #define NDIS_PACKET_TYPE_SMT		0x00000040
 #define NDIS_PACKET_TYPE_ALL_LOCAL	0x00000080
 #define NDIS_PACKET_TYPE_GROUP		0x00000100
 #define NDIS_PACKET_TYPE_ALL_FUNCTIONAL	0x00000200
 #define NDIS_PACKET_TYPE_FUNCTIONAL	0x00000400
 #define NDIS_PACKET_TYPE_MAC_FRAME	0x00000800
 
 /*
  * Externs
  */
 struct hv_vmbus_channel;
 
 int netvsc_recv(struct hv_vmbus_channel *chan,
     netvsc_packet *packet, rndis_tcp_ip_csum_info *csum_info);
 void netvsc_channel_rollup(struct hv_vmbus_channel *chan);
 void netvsc_subchan_callback(struct hn_softc *sc,
     struct hv_vmbus_channel *chan);
 
 void* hv_set_rppi_data(rndis_msg *rndis_mesg,
     uint32_t rppi_size,
     int pkt_type);
 
 void* hv_get_ppi_data(rndis_packet *rpkt, uint32_t type);
 
 #endif  /* __HV_RNDIS_H__ */
 
Index: head/sys/dev/hyperv/netvsc/hv_rndis_filter.c
===================================================================
--- head/sys/dev/hyperv/netvsc/hv_rndis_filter.c	(revision 298445)
+++ head/sys/dev/hyperv/netvsc/hv_rndis_filter.c	(revision 298446)
@@ -1,1185 +1,1185 @@
 /*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <sys/types.h>
 #include <machine/atomic.h>
 #include <sys/sema.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/vmbus/hv_vmbus_priv.h>
 #include "hv_net_vsc.h"
 #include "hv_rndis.h"
 #include "hv_rndis_filter.h"
 
 
 /*
  * Forward declarations
  */
 static int  hv_rf_send_request(rndis_device *device, rndis_request *request,
 			       uint32_t message_type);
 static void hv_rf_receive_response(rndis_device *device, rndis_msg *response);
 static void hv_rf_receive_indicate_status(rndis_device *device,
 					  rndis_msg *response);
 static void hv_rf_receive_data(rndis_device *device, rndis_msg *message,
 			       struct hv_vmbus_channel *chan,
 			       netvsc_packet *pkt);
 static int  hv_rf_query_device(rndis_device *device, uint32_t oid,
 			       void *result, uint32_t *result_size);
 static inline int hv_rf_query_device_mac(rndis_device *device);
 static inline int hv_rf_query_device_link_status(rndis_device *device);
 static int  hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter);
 static int  hv_rf_init_device(rndis_device *device);
 static int  hv_rf_open_device(rndis_device *device);
 static int  hv_rf_close_device(rndis_device *device);
 static void hv_rf_on_send_request_completion(struct hv_vmbus_channel *, void *context);
 static void hv_rf_on_send_request_halt_completion(struct hv_vmbus_channel *, void *context);
 int
 hv_rf_send_offload_request(struct hv_device *device,
     rndis_offload_params *offloads);
 /*
  * Set the Per-Packet-Info with the specified type
  */
 void *
 hv_set_rppi_data(rndis_msg *rndis_mesg, uint32_t rppi_size,
 	int pkt_type)
 {
 	rndis_packet *rndis_pkt;
 	rndis_per_packet_info *rppi;
 
 	rndis_pkt = &rndis_mesg->msg.packet;
 	rndis_pkt->data_offset += rppi_size;
 
 	rppi = (rndis_per_packet_info *)((char *)rndis_pkt +
 	    rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_length);
 
 	rppi->size = rppi_size;
 	rppi->type = pkt_type;
 	rppi->per_packet_info_offset = sizeof(rndis_per_packet_info);
 
 	rndis_pkt->per_pkt_info_length += rppi_size;
 
 	return (rppi);
 }
 
 /*
  * Get the Per-Packet-Info with the specified type
  * return NULL if not found.
  */
 void *
 hv_get_ppi_data(rndis_packet *rpkt, uint32_t type)
 {
 	rndis_per_packet_info *ppi;
 	int len;
 
 	if (rpkt->per_pkt_info_offset == 0)
 		return (NULL);
 
 	ppi = (rndis_per_packet_info *)((unsigned long)rpkt +
 	    rpkt->per_pkt_info_offset);
 	len = rpkt->per_pkt_info_length;
 
 	while (len > 0) {
 		if (ppi->type == type)
 			return (void *)((unsigned long)ppi +
 			    ppi->per_packet_info_offset);
 
 		len -= ppi->size;
 		ppi = (rndis_per_packet_info *)((unsigned long)ppi + ppi->size);
 	}
 
 	return (NULL);
 }
 
 
 /*
  * Allow module_param to work and override to switch to promiscuous mode.
  */
 static inline rndis_device *
 hv_get_rndis_device(void)
 {
 	rndis_device *device;
 
 	device = malloc(sizeof(rndis_device), M_NETVSC, M_WAITOK | M_ZERO);
 
 	mtx_init(&device->req_lock, "HV-FRL", NULL, MTX_DEF);
 
 	/* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */
 	STAILQ_INIT(&device->myrequest_list);
 
 	device->state = RNDIS_DEV_UNINITIALIZED;
 
 	return (device);
 }
 
 /*
  *
  */
 static inline void
 hv_put_rndis_device(rndis_device *device)
 {
 	mtx_destroy(&device->req_lock);
 	free(device, M_NETVSC);
 }
 
 /*
  *
  */
 static inline rndis_request *
 hv_rndis_request(rndis_device *device, uint32_t message_type,
 		 uint32_t message_length)
 {
 	rndis_request *request;
 	rndis_msg *rndis_mesg;
 	rndis_set_request *set;
 
 	request = malloc(sizeof(rndis_request), M_NETVSC, M_WAITOK | M_ZERO);
 
 	sema_init(&request->wait_sema, 0, "rndis sema");
 	
 	rndis_mesg = &request->request_msg;
 	rndis_mesg->ndis_msg_type = message_type;
 	rndis_mesg->msg_len = message_length;
 
 	/*
 	 * Set the request id. This field is always after the rndis header
 	 * for request/response packet types so we just use the set_request
 	 * as a template.
 	 */
 	set = &rndis_mesg->msg.set_request;
 	set->request_id = atomic_fetchadd_int(&device->new_request_id, 1);
 	/* Increment to get the new value (call above returns old value) */
 	set->request_id += 1;
 
 	/* Add to the request list */
 	mtx_lock(&device->req_lock);
 	STAILQ_INSERT_TAIL(&device->myrequest_list, request, mylist_entry);
 	mtx_unlock(&device->req_lock);
 
 	return (request);
 }
 
 /*
  *
  */
 static inline void
 hv_put_rndis_request(rndis_device *device, rndis_request *request)
 {
 	mtx_lock(&device->req_lock);
 	/* Fixme:  Has O(n) performance */
 	/*
 	 * XXXKYS: Use Doubly linked lists.
 	 */
 	STAILQ_REMOVE(&device->myrequest_list, request, rndis_request_,
 	    mylist_entry);
 	mtx_unlock(&device->req_lock);
 
 	sema_destroy(&request->wait_sema);
 	free(request, M_NETVSC);
 }
 
 /*
  *
  */
 static int
 hv_rf_send_request(rndis_device *device, rndis_request *request,
     uint32_t message_type)
 {
 	int ret;
 	netvsc_packet *packet;
 	netvsc_dev      *net_dev = device->net_dev;
 	int send_buf_section_idx;
 
 	/* Set up the packet to send it */
 	packet = &request->pkt;
 	
 	packet->is_data_pkt = FALSE;
 	packet->tot_data_buf_len = request->request_msg.msg_len;
 	packet->page_buf_count = 1;
 
 	packet->page_buffers[0].pfn =
 	    hv_get_phys_addr(&request->request_msg) >> PAGE_SHIFT;
 	packet->page_buffers[0].length = request->request_msg.msg_len;
 	packet->page_buffers[0].offset =
 	    (unsigned long)&request->request_msg & (PAGE_SIZE - 1);
 
 	if (packet->page_buffers[0].offset +
 		packet->page_buffers[0].length > PAGE_SIZE) {
 		packet->page_buf_count = 2;
 		packet->page_buffers[0].length =
 		        PAGE_SIZE - packet->page_buffers[0].offset;
 		packet->page_buffers[1].pfn =
 		        hv_get_phys_addr((char*)&request->request_msg +
                 		packet->page_buffers[0].length) >> PAGE_SHIFT;
 		packet->page_buffers[1].offset = 0;
 		packet->page_buffers[1].length =
 		        request->request_msg.msg_len -
 			        packet->page_buffers[0].length;
 	}
 
 	packet->compl.send.send_completion_context = request; /* packet */
 	if (message_type != REMOTE_NDIS_HALT_MSG) {
 		packet->compl.send.on_send_completion =
 		    hv_rf_on_send_request_completion;
 	} else {
 		packet->compl.send.on_send_completion =
 		    hv_rf_on_send_request_halt_completion;
 	}
 	packet->compl.send.send_completion_tid = (unsigned long)device;
 	if (packet->tot_data_buf_len < net_dev->send_section_size) {
 		send_buf_section_idx = hv_nv_get_next_send_section(net_dev);
 		if (send_buf_section_idx !=
 			NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
 			char *dest = ((char *)net_dev->send_buf +
 				send_buf_section_idx * net_dev->send_section_size);
 
 			memcpy(dest, &request->request_msg, request->request_msg.msg_len);
 			packet->send_buf_section_idx = send_buf_section_idx;
 			packet->send_buf_section_size = packet->tot_data_buf_len;
 			packet->page_buf_count = 0;
 			goto sendit;
 		}
 		/* Failed to allocate chimney send buffer; move on */
 	}
 	packet->send_buf_section_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
 	packet->send_buf_section_size = 0;
 
 sendit:
 	ret = hv_nv_on_send(device->net_dev->dev->channel, packet);
 
 	return (ret);
 }
 
 /*
  * RNDIS filter receive response
  */
 static void 
 hv_rf_receive_response(rndis_device *device, rndis_msg *response)
 {
 	rndis_request *request = NULL;
 	rndis_request *next_request;
 	boolean_t found = FALSE;
 
 	mtx_lock(&device->req_lock);
 	request = STAILQ_FIRST(&device->myrequest_list);
 	while (request != NULL) {
 		/*
 		 * All request/response message contains request_id as the
 		 * first field
 		 */
 		if (request->request_msg.msg.init_request.request_id ==
 				      response->msg.init_complete.request_id) {
 			found = TRUE;
 			break;
 		}
 		next_request = STAILQ_NEXT(request, mylist_entry);
 		request = next_request;
 	}
 	mtx_unlock(&device->req_lock);
 
 	if (found) {
 		if (response->msg_len <= sizeof(rndis_msg)) {
 			memcpy(&request->response_msg, response,
 			    response->msg_len);
 		} else {
 			if (response->ndis_msg_type == REMOTE_NDIS_RESET_CMPLT) {
 				/* Does not have a request id field */
 				request->response_msg.msg.reset_complete.status =
 				    STATUS_BUFFER_OVERFLOW;
 			} else {
 				request->response_msg.msg.init_complete.status =
 				    STATUS_BUFFER_OVERFLOW;
 			}
 		}
 
 		sema_post(&request->wait_sema);
 	}
 }
 
 int
 hv_rf_send_offload_request(struct hv_device *device,
     rndis_offload_params *offloads)
 {
 	rndis_request *request;
 	rndis_set_request *set;
 	rndis_offload_params *offload_req;
 	rndis_set_complete *set_complete;	
 	rndis_device *rndis_dev;
 	hn_softc_t *sc = device_get_softc(device->device);
 	device_t dev = device->device;
 	netvsc_dev *net_dev = sc->net_dev;
 	uint32_t vsp_version = net_dev->nvsp_version;
 	uint32_t extlen = sizeof(rndis_offload_params);
 	int ret;
 
 	if (vsp_version <= NVSP_PROTOCOL_VERSION_4) {
 		extlen = VERSION_4_OFFLOAD_SIZE;
 		/* On NVSP_PROTOCOL_VERSION_4 and below, we do not support
 		 * UDP checksum offload.
 		 */
 		offloads->udp_ipv4_csum = 0;
 		offloads->udp_ipv6_csum = 0;
 	}
 
 	rndis_dev = net_dev->extension;
 
 	request = hv_rndis_request(rndis_dev, REMOTE_NDIS_SET_MSG,
 	    RNDIS_MESSAGE_SIZE(rndis_set_request) + extlen);
 	if (!request)
 		return (ENOMEM);
 
 	set = &request->request_msg.msg.set_request;
 	set->oid = RNDIS_OID_TCP_OFFLOAD_PARAMETERS;
 	set->info_buffer_length = extlen;
 	set->info_buffer_offset = sizeof(rndis_set_request);
 	set->device_vc_handle = 0;
 
 	offload_req = (rndis_offload_params *)((unsigned long)set +
 	    set->info_buffer_offset);
 	*offload_req = *offloads;
 	offload_req->header.type = RNDIS_OBJECT_TYPE_DEFAULT;
 	offload_req->header.revision = RNDIS_OFFLOAD_PARAMETERS_REVISION_3;
 	offload_req->header.size = extlen;
 
 	ret = hv_rf_send_request(rndis_dev, request, REMOTE_NDIS_SET_MSG);
 	if (ret != 0) {
 		device_printf(dev, "hv send offload request failed, ret=%d!\n",
 		    ret);
 		goto cleanup;
 	}
 
 	ret = sema_timedwait(&request->wait_sema, 5 * hz);
 	if (ret != 0) {
 		device_printf(dev, "hv send offload request timeout\n");
 		goto cleanup;
 	}
 
 	set_complete = &request->response_msg.msg.set_complete;
 	if (set_complete->status == RNDIS_STATUS_SUCCESS) {
 		device_printf(dev, "hv send offload request succeeded\n");
 		ret = 0;
 	} else {
 		if (set_complete->status == STATUS_NOT_SUPPORTED) {
 			device_printf(dev, "HV Not support offload\n");
 			ret = 0;
 		} else {
 			ret = set_complete->status;
 		}
 	}
 
 cleanup:
 	hv_put_rndis_request(rndis_dev, request);
 
 	return (ret);
 }
 
 /*
  * RNDIS filter receive indicate status
  */
 static void 
 hv_rf_receive_indicate_status(rndis_device *device, rndis_msg *response)
 {
 	rndis_indicate_status *indicate = &response->msg.indicate_status;
 		
 	switch(indicate->status) {
 	case RNDIS_STATUS_MEDIA_CONNECT:
 		netvsc_linkstatus_callback(device->net_dev->dev, 1);
 		break;
 	case RNDIS_STATUS_MEDIA_DISCONNECT:
 		netvsc_linkstatus_callback(device->net_dev->dev, 0);
 		break;
 	default:
 		/* TODO: */
 		device_printf(device->net_dev->dev->device,
 		    "unknown status %d received\n", indicate->status);
 		break;
 	}
 }
 
 /*
  * RNDIS filter receive data
  */
 static void
 hv_rf_receive_data(rndis_device *device, rndis_msg *message,
     struct hv_vmbus_channel *chan, netvsc_packet *pkt)
 {
 	rndis_packet *rndis_pkt;
 	ndis_8021q_info *rppi_vlan_info;
 	uint32_t data_offset;
 	rndis_tcp_ip_csum_info *csum_info = NULL;
 	device_t dev = device->net_dev->dev->device;
 
 	rndis_pkt = &message->msg.packet;
 
 	/*
 	 * Fixme:  Handle multiple rndis pkt msgs that may be enclosed in this
 	 * netvsc packet (ie tot_data_buf_len != message_length)
 	 */
 
 	/* Remove rndis header, then pass data packet up the stack */
 	data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
 
 	pkt->tot_data_buf_len -= data_offset;
 	if (pkt->tot_data_buf_len < rndis_pkt->data_length) {
 		pkt->status = nvsp_status_failure;
 		device_printf(dev,
 		    "total length %u is less than data length %u\n",
 		    pkt->tot_data_buf_len, rndis_pkt->data_length);
 		return;
 	}
 
 	pkt->tot_data_buf_len = rndis_pkt->data_length;
 	pkt->data = (void *)((unsigned long)pkt->data + data_offset);
 
 	rppi_vlan_info = hv_get_ppi_data(rndis_pkt, ieee_8021q_info);
 	if (rppi_vlan_info) {
 		pkt->vlan_tci = rppi_vlan_info->u1.s1.vlan_id;
 	} else {
 		pkt->vlan_tci = 0;
 	}
 
 	csum_info = hv_get_ppi_data(rndis_pkt, tcpip_chksum_info);
 	netvsc_recv(chan, pkt, csum_info);
 }
 
 /*
  * RNDIS filter on receive
  */
 int
 hv_rf_on_receive(netvsc_dev *net_dev, struct hv_device *device,
     struct hv_vmbus_channel *chan, netvsc_packet *pkt)
 {
 	rndis_device *rndis_dev;
 	rndis_msg *rndis_hdr;
 
 	/* Make sure the rndis device state is initialized */
 	if (net_dev->extension == NULL) {
 		pkt->status = nvsp_status_failure;
 		return (ENODEV);
 	}
 
 	rndis_dev = (rndis_device *)net_dev->extension;
 	if (rndis_dev->state == RNDIS_DEV_UNINITIALIZED) {
 		pkt->status = nvsp_status_failure;
 		return (EINVAL);
 	}
 
 	rndis_hdr = pkt->data;
 
 	switch (rndis_hdr->ndis_msg_type) {
 
 	/* data message */
 	case REMOTE_NDIS_PACKET_MSG:
 		hv_rf_receive_data(rndis_dev, rndis_hdr, chan, pkt);
 		break;
 	/* completion messages */
 	case REMOTE_NDIS_INITIALIZE_CMPLT:
 	case REMOTE_NDIS_QUERY_CMPLT:
 	case REMOTE_NDIS_SET_CMPLT:
 	case REMOTE_NDIS_RESET_CMPLT:
 	case REMOTE_NDIS_KEEPALIVE_CMPLT:
 		hv_rf_receive_response(rndis_dev, rndis_hdr);
 		break;
 	/* notification message */
 	case REMOTE_NDIS_INDICATE_STATUS_MSG:
 		hv_rf_receive_indicate_status(rndis_dev, rndis_hdr);
 		break;
 	default:
 		printf("hv_rf_on_receive():  Unknown msg_type 0x%x\n",
 			rndis_hdr->ndis_msg_type);
 		break;
 	}
 
 	return (0);
 }
 
 /*
  * RNDIS filter query device
  */
 static int
 hv_rf_query_device(rndis_device *device, uint32_t oid, void *result,
 		   uint32_t *result_size)
 {
 	rndis_request *request;
 	uint32_t in_result_size = *result_size;
 	rndis_query_request *query;
 	rndis_query_complete *query_complete;
 	int ret = 0;
 
 	*result_size = 0;
 	request = hv_rndis_request(device, REMOTE_NDIS_QUERY_MSG,
 	    RNDIS_MESSAGE_SIZE(rndis_query_request));
 	if (request == NULL) {
 		ret = -1;
 		goto cleanup;
 	}
 
 	/* Set up the rndis query */
 	query = &request->request_msg.msg.query_request;
 	query->oid = oid;
 	query->info_buffer_offset = sizeof(rndis_query_request); 
 	query->info_buffer_length = 0;
 	query->device_vc_handle = 0;
 
 	if (oid == RNDIS_OID_GEN_RSS_CAPABILITIES) {
 		struct rndis_recv_scale_cap *cap;
 
 		request->request_msg.msg_len += 
 			sizeof(struct rndis_recv_scale_cap);
 		query->info_buffer_length = sizeof(struct rndis_recv_scale_cap);
 		cap = (struct rndis_recv_scale_cap *)((unsigned long)query + 
 						query->info_buffer_offset);
 		cap->hdr.type = RNDIS_OBJECT_TYPE_RSS_CAPABILITIES;
 		cap->hdr.rev = RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2;
 		cap->hdr.size = sizeof(struct rndis_recv_scale_cap);
 	}
 
 	ret = hv_rf_send_request(device, request, REMOTE_NDIS_QUERY_MSG);
 	if (ret != 0) {
 		/* Fixme:  printf added */
 		printf("RNDISFILTER request failed to Send!\n");
 		goto cleanup;
 	}
 
 	sema_wait(&request->wait_sema);
 
 	/* Copy the response back */
 	query_complete = &request->response_msg.msg.query_complete;
 	
 	if (query_complete->info_buffer_length > in_result_size) {
 		ret = EINVAL;
 		goto cleanup;
 	}
 
 	memcpy(result, (void *)((unsigned long)query_complete +
 	    query_complete->info_buffer_offset),
 	    query_complete->info_buffer_length);
 
 	*result_size = query_complete->info_buffer_length;
 
 cleanup:
 	if (request != NULL)
 		hv_put_rndis_request(device, request);
 
 	return (ret);
 }
 
 /*
  * RNDIS filter query device MAC address
  */
 static inline int
 hv_rf_query_device_mac(rndis_device *device)
 {
 	uint32_t size = HW_MACADDR_LEN;
 
 	return (hv_rf_query_device(device,
 	    RNDIS_OID_802_3_PERMANENT_ADDRESS, device->hw_mac_addr, &size));
 }
 
 /*
  * RNDIS filter query device link status
  */
 static inline int
 hv_rf_query_device_link_status(rndis_device *device)
 {
 	uint32_t size = sizeof(uint32_t);
 
 	return (hv_rf_query_device(device,
 	    RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, &device->link_status, &size));
 }
 
 static uint8_t netvsc_hash_key[HASH_KEYLEN] = {
 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
 };
 
 /*
  * RNDIS set vRSS parameters
  */
 static int
 hv_rf_set_rss_param(rndis_device *device, int num_queue)
 {
 	rndis_request *request;
 	rndis_set_request *set;
 	rndis_set_complete *set_complete;
 	rndis_recv_scale_param *rssp;
 	uint32_t extlen = sizeof(rndis_recv_scale_param) +
 	    (4 * ITAB_NUM) + HASH_KEYLEN;
 	uint32_t *itab, status;
 	uint8_t *keyp;
 	int i, ret;
 
 
 	request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG,
 	    RNDIS_MESSAGE_SIZE(rndis_set_request) + extlen);
 	if (request == NULL) {
 		if (bootverbose)
 			printf("Netvsc: No memory to set vRSS parameters.\n");
 		ret = -1;
 		goto cleanup;
 	}
 
 	set = &request->request_msg.msg.set_request;
 	set->oid = RNDIS_OID_GEN_RSS_PARAMETERS;
 	set->info_buffer_length = extlen;
 	set->info_buffer_offset = sizeof(rndis_set_request);
 	set->device_vc_handle = 0;
 
 	/* Fill out the rssp parameter structure */
 	rssp = (rndis_recv_scale_param *)(set + 1);
 	rssp->hdr.type = RNDIS_OBJECT_TYPE_RSS_PARAMETERS;
 	rssp->hdr.rev = RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2;
 	rssp->hdr.size = sizeof(rndis_recv_scale_param);
 	rssp->flag = 0;
 	rssp->hashinfo = RNDIS_HASH_FUNC_TOEPLITZ | RNDIS_HASH_IPV4 |
 	    RNDIS_HASH_TCP_IPV4 | RNDIS_HASH_IPV6 | RNDIS_HASH_TCP_IPV6;
 	rssp->indirect_tabsize = 4 * ITAB_NUM;
 	rssp->indirect_taboffset = sizeof(rndis_recv_scale_param);
 	rssp->hashkey_size = HASH_KEYLEN;
 	rssp->hashkey_offset = rssp->indirect_taboffset +
 	    rssp->indirect_tabsize;
 
 	/* Set indirection table entries */
 	itab = (uint32_t *)(rssp + 1);
 	for (i = 0; i < ITAB_NUM; i++)
 		itab[i] = i % num_queue;
 
 	/* Set hash key values */
 	keyp = (uint8_t *)((unsigned long)rssp + rssp->hashkey_offset);
 	for (i = 0; i < HASH_KEYLEN; i++)
 		keyp[i] = netvsc_hash_key[i];
 
 	ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG);
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	/*
 	 * Wait for the response from the host.  Another thread will signal
 	 * us when the response has arrived.  In the failure case,
 	 * sema_timedwait() returns a non-zero status after waiting 5 seconds.
 	 */
 	ret = sema_timedwait(&request->wait_sema, 5 * hz);
 	if (ret == 0) {
 		/* Response received, check status */
 		set_complete = &request->response_msg.msg.set_complete;
 		status = set_complete->status;
 		if (status != RNDIS_STATUS_SUCCESS) {
 			/* Bad response status, return error */
 			if (bootverbose)
 				printf("Netvsc: Failed to set vRSS "
 				    "parameters.\n");
 			ret = -2;
 		} else {
 			if (bootverbose)
 				printf("Netvsc: Successfully set vRSS "
 				    "parameters.\n");
 		}
 	} else {
 		/*
 		 * We cannot deallocate the request since we may still
 		 * receive a send completion for it.
 		 */
 		printf("Netvsc: vRSS set timeout, id = %u, ret = %d\n",
 		    request->request_msg.msg.init_request.request_id, ret);
 		goto exit;
 	}
 
 cleanup:
 	if (request != NULL) {
 		hv_put_rndis_request(device, request);
 	}
 exit:
 	return (ret);
 }
 
 /*
  * RNDIS filter set packet filter
  * Sends an rndis request with the new filter, then waits for a response
  * from the host.
  * Returns zero on success, non-zero on failure.
  */
 static int
 hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter)
 {
 	rndis_request *request;
 	rndis_set_request *set;
 	rndis_set_complete *set_complete;
 	uint32_t status;
 	int ret;
 
 	request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG,
 	    RNDIS_MESSAGE_SIZE(rndis_set_request) + sizeof(uint32_t));
 	if (request == NULL) {
 		ret = -1;
 		goto cleanup;
 	}
 
 	/* Set up the rndis set */
 	set = &request->request_msg.msg.set_request;
 	set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER;
 	set->info_buffer_length = sizeof(uint32_t);
 	set->info_buffer_offset = sizeof(rndis_set_request); 
 
 	memcpy((void *)((unsigned long)set + sizeof(rndis_set_request)),
 	    &new_filter, sizeof(uint32_t));
 
 	ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG);
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	/*
 	 * Wait for the response from the host.  Another thread will signal
 	 * us when the response has arrived.  In the failure case,
 	 * sema_timedwait() returns a non-zero status after waiting 5 seconds.
 	 */
 	ret = sema_timedwait(&request->wait_sema, 5 * hz);
 	if (ret == 0) {
 		/* Response received, check status */
 		set_complete = &request->response_msg.msg.set_complete;
 		status = set_complete->status;
 		if (status != RNDIS_STATUS_SUCCESS) {
 			/* Bad response status, return error */
 			ret = -2;
 		}
 	} else {
 		/*
 		 * We cannot deallocate the request since we may still
 		 * receive a send completion for it.
 		 */
 		goto exit;
 	}
 
 cleanup:
 	if (request != NULL) {
 		hv_put_rndis_request(device, request);
 	}
 exit:
 	return (ret);
 }
 
 /*
  * RNDIS filter init device
  */
 static int
 hv_rf_init_device(rndis_device *device)
 {
 	rndis_request *request;
 	rndis_initialize_request *init;
 	rndis_initialize_complete *init_complete;
 	uint32_t status;
 	int ret;
 
 	request = hv_rndis_request(device, REMOTE_NDIS_INITIALIZE_MSG,
 	    RNDIS_MESSAGE_SIZE(rndis_initialize_request));
 	if (!request) {
 		ret = -1;
 		goto cleanup;
 	}
 
 	/* Set up the rndis set */
 	init = &request->request_msg.msg.init_request;
 	init->major_version = RNDIS_MAJOR_VERSION;
 	init->minor_version = RNDIS_MINOR_VERSION;
 	/*
 	 * Per the RNDIS document, this should be set to the max MTU
 	 * plus the header size.  However, 2048 works fine, so leaving
 	 * it as is.
 	 */
 	init->max_xfer_size = 2048;
 	
 	device->state = RNDIS_DEV_INITIALIZING;
 
 	ret = hv_rf_send_request(device, request, REMOTE_NDIS_INITIALIZE_MSG);
 	if (ret != 0) {
 		device->state = RNDIS_DEV_UNINITIALIZED;
 		goto cleanup;
 	}
 
 	sema_wait(&request->wait_sema);
 
 	init_complete = &request->response_msg.msg.init_complete;
 	status = init_complete->status;
 	if (status == RNDIS_STATUS_SUCCESS) {
 		device->state = RNDIS_DEV_INITIALIZED;
 		ret = 0;
 	} else {
 		device->state = RNDIS_DEV_UNINITIALIZED; 
 		ret = -1;
 	}
 
 cleanup:
 	if (request) {
 		hv_put_rndis_request(device, request);
 	}
 
 	return (ret);
 }
 
 #define HALT_COMPLETION_WAIT_COUNT      25
 
 /*
  * RNDIS filter halt device
  */
 static int
 hv_rf_halt_device(rndis_device *device)
 {
 	rndis_request *request;
 	rndis_halt_request *halt;
 	int i, ret;
 
 	/* Attempt to do a rndis device halt */
 	request = hv_rndis_request(device, REMOTE_NDIS_HALT_MSG,
 	    RNDIS_MESSAGE_SIZE(rndis_halt_request));
 	if (request == NULL) {
 		return (-1);
 	}
 
 	/* initialize "poor man's semaphore" */
 	request->halt_complete_flag = 0;
 
 	/* Set up the rndis set */
 	halt = &request->request_msg.msg.halt_request;
 	halt->request_id = atomic_fetchadd_int(&device->new_request_id, 1);
 	/* Increment to get the new value (call above returns old value) */
 	halt->request_id += 1;
 	
 	ret = hv_rf_send_request(device, request, REMOTE_NDIS_HALT_MSG);
 	if (ret != 0) {
 		return (-1);
 	}
 
 	/*
 	 * Wait for halt response from halt callback.  We must wait for
 	 * the transaction response before freeing the request and other
 	 * resources.
 	 */
 	for (i=HALT_COMPLETION_WAIT_COUNT; i > 0; i--) {
 		if (request->halt_complete_flag != 0) {
 			break;
 		}
 		DELAY(400);
 	}
 	if (i == 0) {
 		return (-1);
 	}
 
 	device->state = RNDIS_DEV_UNINITIALIZED;
 
 	hv_put_rndis_request(device, request);
 
 	return (0);
 }
 
 /*
  * RNDIS filter open device
  */
 static int
 hv_rf_open_device(rndis_device *device)
 {
 	int ret;
 
 	if (device->state != RNDIS_DEV_INITIALIZED) {
 		return (0);
 	}
 
 	if (hv_promisc_mode != 1) {
 		ret = hv_rf_set_packet_filter(device, 
 		    NDIS_PACKET_TYPE_BROADCAST     |
 		    NDIS_PACKET_TYPE_ALL_MULTICAST |
 		    NDIS_PACKET_TYPE_DIRECTED);
 	} else {
 		ret = hv_rf_set_packet_filter(device, 
 		    NDIS_PACKET_TYPE_PROMISCUOUS);
 	}
 
 	if (ret == 0) {
 		device->state = RNDIS_DEV_DATAINITIALIZED;
 	}
 
 	return (ret);
 }
 
 /*
  * RNDIS filter close device
  */
 static int
 hv_rf_close_device(rndis_device *device)
 {
 	int ret;
 
 	if (device->state != RNDIS_DEV_DATAINITIALIZED) {
 		return (0);
 	}
 
 	ret = hv_rf_set_packet_filter(device, 0);
 	if (ret == 0) {
 		device->state = RNDIS_DEV_INITIALIZED;
 	}
 
 	return (ret);
 }
 
 /*
  * RNDIS filter on device add
  */
 int
 hv_rf_on_device_add(struct hv_device *device, void *additl_info,
     int nchan)
 {
 	int ret;
 	netvsc_dev *net_dev;
 	rndis_device *rndis_dev;
 	nvsp_msg *init_pkt;
 	rndis_offload_params offloads;
 	struct rndis_recv_scale_cap rsscaps;
 	uint32_t rsscaps_size = sizeof(struct rndis_recv_scale_cap);
 	netvsc_device_info *dev_info = (netvsc_device_info *)additl_info;
 	device_t dev = device->device;
 
 	rndis_dev = hv_get_rndis_device();
 	if (rndis_dev == NULL) {
 		return (ENOMEM);
 	}
 
 	/*
 	 * Let the inner driver handle this first to create the netvsc channel
 	 * NOTE! Once the channel is created, we may get a receive callback 
 	 * (hv_rf_on_receive()) before this call is completed.
 	 * Note:  Earlier code used a function pointer here.
 	 */
 	net_dev = hv_nv_on_device_add(device, additl_info);
 	if (!net_dev) {
 		hv_put_rndis_device(rndis_dev);
 
 		return (ENOMEM);
 	}
 
 	/*
 	 * Initialize the rndis device
 	 */
 
 	net_dev->extension = rndis_dev;
 	rndis_dev->net_dev = net_dev;
 
 	/* Send the rndis initialization message */
 	ret = hv_rf_init_device(rndis_dev);
 	if (ret != 0) {
 		/*
 		 * TODO: If rndis init failed, we will need to shut down
 		 * the channel
 		 */
 	}
 
 	/* Get the mac address */
 	ret = hv_rf_query_device_mac(rndis_dev);
 	if (ret != 0) {
 		/* TODO: shut down rndis device and the channel */
 	}
 
 	/* config csum offload and send request to host */
 	memset(&offloads, 0, sizeof(offloads));
 	offloads.ipv4_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
 	offloads.tcp_ipv4_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
 	offloads.udp_ipv4_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
 	offloads.tcp_ipv6_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
 	offloads.udp_ipv6_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
 	offloads.lso_v2_ipv4 = RNDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED;
 
 	ret = hv_rf_send_offload_request(device, &offloads);
 	if (ret != 0) {
 		/* TODO: shut down rndis device and the channel */
 		device_printf(dev,
 		    "hv_rf_send_offload_request failed, ret=%d\n", ret);
 	}
 	
 	memcpy(dev_info->mac_addr, rndis_dev->hw_mac_addr, HW_MACADDR_LEN);
 
 	hv_rf_query_device_link_status(rndis_dev);
 	
 	dev_info->link_state = rndis_dev->link_status;
 
 	net_dev->num_channel = 1;
 	if (net_dev->nvsp_version < NVSP_PROTOCOL_VERSION_5 || nchan == 1)
 		return (0);
 
 	memset(&rsscaps, 0, rsscaps_size);
 	ret = hv_rf_query_device(rndis_dev,
 			RNDIS_OID_GEN_RSS_CAPABILITIES,
 			&rsscaps, &rsscaps_size);
 	if ((ret != 0) || (rsscaps.num_recv_que < 2)) {
 		device_printf(dev, "hv_rf_query_device failed or "
 			"rsscaps.num_recv_que < 2 \n");
 		goto out;
 	}
 	device_printf(dev, "channel, offered %u, requested %d\n",
 	    rsscaps.num_recv_que, nchan);
 	if (nchan > rsscaps.num_recv_que)
 		nchan = rsscaps.num_recv_que;
 	net_dev->num_channel = nchan;
 
 	if (net_dev->num_channel == 1) {
 		device_printf(dev, "net_dev->num_channel == 1 under VRSS\n");
 		goto out;
 	}
 	
 	/* request host to create sub channels */
 	init_pkt = &net_dev->channel_init_packet;
 	memset(init_pkt, 0, sizeof(nvsp_msg));
 
 	init_pkt->hdr.msg_type = nvsp_msg5_type_subchannel;
 	init_pkt->msgs.vers_5_msgs.subchannel_request.op =
 	    NVSP_SUBCHANNE_ALLOCATE;
 	init_pkt->msgs.vers_5_msgs.subchannel_request.num_subchannels =
 	    net_dev->num_channel - 1;
 
 	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
 	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	if (ret != 0) {
 		device_printf(dev, "Fail to allocate subchannel\n");
 		goto out;
 	}
 
 	sema_wait(&net_dev->channel_init_sema);
 
 	if (init_pkt->msgs.vers_5_msgs.subchn_complete.status !=
 	    nvsp_status_success) {
 		ret = ENODEV;
 		device_printf(dev, "sub channel complete error\n");
 		goto out;
 	}
 
 	net_dev->num_channel = 1 +
 	    init_pkt->msgs.vers_5_msgs.subchn_complete.num_subchannels;
 
 	ret = hv_rf_set_rss_param(rndis_dev, net_dev->num_channel);
 
 out:
 	if (ret)
 		net_dev->num_channel = 1;
 
 	return (ret);
 }
 
 /*
  * RNDIS filter on device remove
  */
 int
 hv_rf_on_device_remove(struct hv_device *device, boolean_t destroy_channel)
 {
 	hn_softc_t *sc = device_get_softc(device->device);
 	netvsc_dev *net_dev = sc->net_dev;
 	rndis_device *rndis_dev = (rndis_device *)net_dev->extension;
 	int ret;
 
 	/* Halt and release the rndis device */
 	ret = hv_rf_halt_device(rndis_dev);
 
 	hv_put_rndis_device(rndis_dev);
 	net_dev->extension = NULL;
 
 	/* Pass control to inner driver to remove the device */
 	ret |= hv_nv_on_device_remove(device, destroy_channel);
 
 	return (ret);
 }
 
 /*
  * RNDIS filter on open
  */
 int
 hv_rf_on_open(struct hv_device *device)
 {
 	hn_softc_t *sc = device_get_softc(device->device);	
 	netvsc_dev *net_dev = sc->net_dev;
 
 	return (hv_rf_open_device((rndis_device *)net_dev->extension));
 }
 
 /*
  * RNDIS filter on close
  */
 int 
 hv_rf_on_close(struct hv_device *device)
 {
 	hn_softc_t *sc = device_get_softc(device->device);	
 	netvsc_dev *net_dev = sc->net_dev;
 
 	return (hv_rf_close_device((rndis_device *)net_dev->extension));
 }
 
 /*
  * RNDIS filter on send request completion callback
  */
 static void 
 hv_rf_on_send_request_completion(struct hv_vmbus_channel *chan __unused,
     void *context __unused)
 {
 }
 
 /*
  * RNDIS filter on send request (halt only) completion callback
  */
 static void 
 hv_rf_on_send_request_halt_completion(struct hv_vmbus_channel *chan __unused,
     void *context)
 {
 	rndis_request *request = context;
 
 	/*
 	 * Notify hv_rf_halt_device() about halt completion.
 	 * The halt code must wait for completion before freeing
 	 * the transaction resources.
 	 */
 	request->halt_complete_flag = 1;
 }
 
 void
 hv_rf_channel_rollup(struct hv_vmbus_channel *chan)
 {
 
 	netvsc_channel_rollup(chan);
 }
Index: head/sys/dev/hyperv/netvsc/hv_rndis_filter.h
===================================================================
--- head/sys/dev/hyperv/netvsc/hv_rndis_filter.h	(revision 298445)
+++ head/sys/dev/hyperv/netvsc/hv_rndis_filter.h	(revision 298446)
@@ -1,125 +1,125 @@
 /*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __HV_RNDIS_FILTER_H__
 #define __HV_RNDIS_FILTER_H__
 
 
 /*
  * Defines
  */
 
 /* Destroy or preserve channel on filter/netvsc teardown */
 #define HV_RF_NV_DESTROY_CHANNEL	TRUE
 #define HV_RF_NV_RETAIN_CHANNEL		FALSE
 
 /*
  * Number of page buffers to reserve for the RNDIS filter packet in the
  * transmitted message.
  */
 #define HV_RF_NUM_TX_RESERVED_PAGE_BUFS	1
 
 
 /*
  * Data types
  */
 
 typedef enum {
 	RNDIS_DEV_UNINITIALIZED = 0,
 	RNDIS_DEV_INITIALIZING,
 	RNDIS_DEV_INITIALIZED,
 	RNDIS_DEV_DATAINITIALIZED,
 } rndis_device_state;
 
 typedef struct rndis_request_ {
 	STAILQ_ENTRY(rndis_request_)	mylist_entry;
 	struct sema			wait_sema;	
 
 	/*
 	 * The max response size is sizeof(rndis_msg) + PAGE_SIZE.
 	 *
 	 * XXX
 	 * This is ugly and should be cleaned up once we busdma-fy
 	 * RNDIS request bits.
 	 */
 	rndis_msg			response_msg;
 	uint8_t				buf_resp[PAGE_SIZE];
 
 	/* Simplify allocation by having a netvsc packet inline */
 	netvsc_packet			pkt;
 	hv_vmbus_page_buffer		buffer;
 
 	/*
 	 * The max request size is sizeof(rndis_msg) + PAGE_SIZE.
 	 *
 	 * NOTE:
 	 * This is required for the large request like RSS settings.
 	 *
 	 * XXX
 	 * This is ugly and should be cleaned up once we busdma-fy
 	 * RNDIS request bits.
 	 */
 	rndis_msg			request_msg;
 	uint8_t				buf_req[PAGE_SIZE];
 
 	/* Fixme:  Poor man's semaphore. */
 	uint32_t			halt_complete_flag;
 } rndis_request;
 
 typedef struct rndis_device_ {
 	netvsc_dev			*net_dev;
 
 	rndis_device_state		state;
 	uint32_t			link_status;
 	uint32_t			new_request_id;
 
 	struct mtx			req_lock;
 
 	STAILQ_HEAD(RQ, rndis_request_)	myrequest_list;
 
 	uint8_t				hw_mac_addr[HW_MACADDR_LEN];
 } rndis_device;
 
 /*
  * Externs
  */
 struct hv_vmbus_channel;
 
 int hv_rf_on_receive(netvsc_dev *net_dev, struct hv_device *device,
     struct hv_vmbus_channel *chan, netvsc_packet *pkt);
 void hv_rf_receive_rollup(netvsc_dev *net_dev);
 void hv_rf_channel_rollup(struct hv_vmbus_channel *chan);
 int hv_rf_on_device_add(struct hv_device *device, void *additl_info, int nchan);
 int hv_rf_on_device_remove(struct hv_device *device, boolean_t destroy_channel);
 int hv_rf_on_open(struct hv_device *device);
 int hv_rf_on_close(struct hv_device *device);
 
 #endif  /* __HV_RNDIS_FILTER_H__ */
 
Index: head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
===================================================================
--- head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c	(revision 298445)
+++ head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c	(revision 298446)
@@ -1,2135 +1,2135 @@
 /*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /**
  * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
  * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
  * converted into VSCSI protocol messages which are delivered to the parent
  * partition StorVSP driver over the Hyper-V VMBUS.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/condvar.h>
 #include <sys/time.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/taskqueue.h>
 #include <sys/bus.h>
 #include <sys/mutex.h>
 #include <sys/callout.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/uma.h>
 #include <sys/lock.h>
 #include <sys/sema.h>
 #include <sys/sglist.h>
 #include <machine/bus.h>
 #include <sys/bus_dma.h>
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_periph.h>
 #include <cam/cam_sim.h>
 #include <cam/cam_xpt_sim.h>
 #include <cam/cam_xpt_internal.h>
 #include <cam/cam_debug.h>
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_message.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include "hv_vstorage.h"
 
 #define STORVSC_RINGBUFFER_SIZE		(20*PAGE_SIZE)
 #define STORVSC_MAX_LUNS_PER_TARGET	(64)
 #define STORVSC_MAX_IO_REQUESTS		(STORVSC_MAX_LUNS_PER_TARGET * 2)
 #define BLKVSC_MAX_IDE_DISKS_PER_TARGET	(1)
 #define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
 #define STORVSC_MAX_TARGETS		(2)
 
 #define STORVSC_WIN7_MAJOR 4
 #define STORVSC_WIN7_MINOR 2
 
 #define STORVSC_WIN8_MAJOR 5
 #define STORVSC_WIN8_MINOR 1
 
 #define VSTOR_PKT_SIZE	(sizeof(struct vstor_packet) - vmscsi_size_delta)
 
 #define HV_ALIGN(x, a) roundup2(x, a)
 
 struct storvsc_softc;
 
 struct hv_sgl_node {
 	LIST_ENTRY(hv_sgl_node) link;
 	struct sglist *sgl_data;
 };
 
 struct hv_sgl_page_pool{
 	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
 	LIST_HEAD(, hv_sgl_node) free_sgl_list;
 	boolean_t                is_init;
 } g_hv_sgl_page_pool;
 
 #define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * HV_MAX_MULTIPAGE_BUFFER_COUNT
 
 enum storvsc_request_type {
 	WRITE_TYPE,
 	READ_TYPE,
 	UNKNOWN_TYPE
 };
 
 struct hv_storvsc_request {
 	LIST_ENTRY(hv_storvsc_request) link;
 	struct vstor_packet	vstor_packet;
 	hv_vmbus_multipage_buffer data_buf;
 	void *sense_data;
 	uint8_t sense_info_len;
 	uint8_t retries;
 	union ccb *ccb;
 	struct storvsc_softc *softc;
 	struct callout callout;
 	struct sema synch_sema; /*Synchronize the request/response if needed */
 	struct sglist *bounce_sgl;
 	unsigned int bounce_sgl_count;
 	uint64_t not_aligned_seg_bits;
 };
 
 struct storvsc_softc {
 	struct hv_device		*hs_dev;
 	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
 	struct mtx			hs_lock;
 	struct storvsc_driver_props	*hs_drv_props;
 	int 				hs_unit;
 	uint32_t			hs_frozen;
 	struct cam_sim			*hs_sim;
 	struct cam_path 		*hs_path;
 	uint32_t			hs_num_out_reqs;
 	boolean_t			hs_destroy;
 	boolean_t			hs_drain_notify;
 	boolean_t			hs_open_multi_channel;
 	struct sema 			hs_drain_sema;	
 	struct hv_storvsc_request	hs_init_req;
 	struct hv_storvsc_request	hs_reset_req;
 };
 
 
 /**
  * HyperV storvsc timeout testing cases:
  * a. IO returned after first timeout;
  * b. IO returned after second timeout and queue freeze;
  * c. IO returned while timer handler is running
  * The first can be tested by "sg_senddiag -vv /dev/daX",
  * and the second and third can be done by
  * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
  */
 #define HVS_TIMEOUT_TEST 0
 
 /*
  * Bus/adapter reset functionality on the Hyper-V host is
  * buggy and it will be disabled until
  * it can be further tested.
  */
 #define HVS_HOST_RESET 0
 
 struct storvsc_driver_props {
 	char		*drv_name;
 	char		*drv_desc;
 	uint8_t		drv_max_luns_per_target;
 	uint8_t		drv_max_ios_per_target;
 	uint32_t	drv_ringbuffer_size;
 };
 
 enum hv_storage_type {
 	DRIVER_BLKVSC,
 	DRIVER_STORVSC,
 	DRIVER_UNKNOWN
 };
 
 #define HS_MAX_ADAPTERS 10
 
 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
 
 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
 static const hv_guid gStorVscDeviceType={
 	.data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
 		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
 };
 
 /* {32412632-86cb-44a2-9b5c-50d1417354f5} */
 static const hv_guid gBlkVscDeviceType={
 	.data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
 		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
 };
 
 static struct storvsc_driver_props g_drv_props_table[] = {
 	{"blkvsc", "Hyper-V IDE Storage Interface",
 	 BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
 	 STORVSC_RINGBUFFER_SIZE},
 	{"storvsc", "Hyper-V SCSI Storage Interface",
 	 STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
 	 STORVSC_RINGBUFFER_SIZE}
 };
 
 /*
  * Sense buffer size changed in win8; have a run-time
  * variable to track the size we should use.
  */
 static int sense_buffer_size;
 
 /*
  * The size of the vmscsi_request has changed in win8. The
  * additional size is for the newly added elements in the
  * structure. These elements are valid only when we are talking
  * to a win8 host.
  * Track the correct size we need to apply.
  */
 static int vmscsi_size_delta;
 
 static int storvsc_current_major;
 static int storvsc_current_minor;
 
 /* static functions */
 static int storvsc_probe(device_t dev);
 static int storvsc_attach(device_t dev);
 static int storvsc_detach(device_t dev);
 static void storvsc_poll(struct cam_sim * sim);
 static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
 static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
 static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
 static enum hv_storage_type storvsc_get_storage_type(device_t dev);
 static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
 static void hv_storvsc_on_channel_callback(void *context);
 static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
 					struct vstor_packet *vstor_packet,
 					struct hv_storvsc_request *request);
 static int hv_storvsc_connect_vsp(struct hv_device *device);
 static void storvsc_io_done(struct hv_storvsc_request *reqp);
 static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
 				bus_dma_segment_t *orig_sgl,
 				unsigned int orig_sgl_count,
 				uint64_t seg_bits);
 void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
 				unsigned int dest_sgl_count,
 				struct sglist* src_sgl,
 				uint64_t seg_bits);
 
 static device_method_t storvsc_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		storvsc_probe),
 	DEVMETHOD(device_attach,	storvsc_attach),
 	DEVMETHOD(device_detach,	storvsc_detach),
 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
 	DEVMETHOD_END
 };
 
 static driver_t storvsc_driver = {
 	"storvsc", storvsc_methods, sizeof(struct storvsc_softc),
 };
 
 static devclass_t storvsc_devclass;
 DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0);
 MODULE_VERSION(storvsc, 1);
 MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
 
 
 /**
  * The host is capable of sending messages to us that are
  * completely unsolicited. So, we need to address the race
  * condition where we may be in the process of unloading the
  * driver when the host may send us an unsolicited message.
  * We address this issue by implementing a sequentially
  * consistent protocol:
  *
  * 1. Channel callback is invoked while holding the the channel lock
  *    and an unloading driver will reset the channel callback under
  *    the protection of this channel lock.
  *
  * 2. To ensure bounded wait time for unloading a driver, we don't
  *    permit outgoing traffic once the device is marked as being
  *    destroyed.
  *
  * 3. Once the device is marked as being destroyed, we only
  *    permit incoming traffic to properly account for
  *    packets already sent out.
  */
 static inline struct storvsc_softc *
 get_stor_device(struct hv_device *device,
 				boolean_t outbound)
 {
 	struct storvsc_softc *sc;
 
 	sc = device_get_softc(device->device);
 
 	if (outbound) {
 		/*
 		 * Here we permit outgoing I/O only
 		 * if the device is not being destroyed.
 		 */
 
 		if (sc->hs_destroy) {
 			sc = NULL;
 		}
 	} else {
 		/*
 		 * inbound case; if being destroyed
 		 * only permit to account for
 		 * messages already sent out.
 		 */
 		if (sc->hs_destroy && (sc->hs_num_out_reqs == 0)) {
 			sc = NULL;
 		}
 	}
 	return sc;
 }
 
 /**
  * @brief Callback handler, will be invoked when receive mutil-channel offer
  *
  * @param context  new multi-channel
  */
 static void
 storvsc_handle_sc_creation(void *context)
 {
 	hv_vmbus_channel *new_channel;
 	struct hv_device *device;
 	struct storvsc_softc *sc;
 	struct vmstor_chan_props props;
 	int ret = 0;
 
 	new_channel = (hv_vmbus_channel *)context;
 	device = new_channel->device;
 	sc = get_stor_device(device, TRUE);
 	if (sc == NULL)
 		return;
 
 	if (FALSE == sc->hs_open_multi_channel)
 		return;
 	
 	memset(&props, 0, sizeof(props));
 
 	ret = hv_vmbus_channel_open(new_channel,
 	    sc->hs_drv_props->drv_ringbuffer_size,
   	    sc->hs_drv_props->drv_ringbuffer_size,
 	    (void *)&props,
 	    sizeof(struct vmstor_chan_props),
 	    hv_storvsc_on_channel_callback,
 	    new_channel);
 
 	return;
 }
 
 /**
  * @brief Send multi-channel creation request to host
  *
  * @param device  a Hyper-V device pointer
  * @param max_chans  the max channels supported by vmbus
  */
 static void
 storvsc_send_multichannel_request(struct hv_device *dev, int max_chans)
 {
 	struct storvsc_softc *sc;
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;	
 	int request_channels_cnt = 0;
 	int ret;
 
 	/* get multichannels count that need to create */
 	request_channels_cnt = MIN(max_chans, mp_ncpus);
 
 	sc = get_stor_device(dev, TRUE);
 	if (sc == NULL) {
 		printf("Storvsc_error: get sc failed while send mutilchannel "
 		    "request\n");
 		return;
 	}
 
 	request = &sc->hs_init_req;
 
 	/* Establish a handler for multi-channel */
 	dev->channel->sc_creation_callback = storvsc_handle_sc_creation;
 
 	/* request the host to create multi-channel */
 	memset(request, 0, sizeof(struct hv_storvsc_request));
 	
 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
 
 	vstor_packet = &request->vstor_packet;
 	
 	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 	vstor_packet->u.multi_channels_cnt = request_channels_cnt;
 
 	ret = hv_vmbus_channel_send_packet(
 	    dev->channel,
 	    vstor_packet,
 	    VSTOR_PKT_SIZE,
 	    (uint64_t)(uintptr_t)request,
 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	/* wait for 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 	if (ret != 0) {		
 		printf("Storvsc_error: create multi-channel timeout, %d\n",
 		    ret);
 		return;
 	}
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 	    vstor_packet->status != 0) {		
 		printf("Storvsc_error: create multi-channel invalid operation "
 		    "(%d) or statue (%u)\n",
 		    vstor_packet->operation, vstor_packet->status);
 		return;
 	}
 
 	sc->hs_open_multi_channel = TRUE;
 
 	if (bootverbose)
 		printf("Storvsc create multi-channel success!\n");
 }
 
 /**
  * @brief initialize channel connection to parent partition
  *
  * @param dev  a Hyper-V device pointer
  * @returns  0 on success, non-zero error on failure
  */
 static int
 hv_storvsc_channel_init(struct hv_device *dev)
 {
 	int ret = 0;
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 	struct storvsc_softc *sc;
 	uint16_t max_chans = 0;
 	boolean_t support_multichannel = FALSE;
 
 	max_chans = 0;
 	support_multichannel = FALSE;
 
 	sc = get_stor_device(dev, TRUE);
 	if (sc == NULL)
 		return (ENODEV);
 
 	request = &sc->hs_init_req;
 	memset(request, 0, sizeof(struct hv_storvsc_request));
 	vstor_packet = &request->vstor_packet;
 	request->softc = sc;
 
 	/**
 	 * Initiate the vsc/vsp initialization protocol on the open channel
 	 */
 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
 
 	vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 
 	ret = hv_vmbus_channel_send_packet(
 			dev->channel,
 			vstor_packet,
 			VSTOR_PKT_SIZE,
 			(uint64_t)(uintptr_t)request,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	if (ret != 0)
 		goto cleanup;
 
 	/* wait 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 	if (ret != 0)
 		goto cleanup;
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 		vstor_packet->status != 0) {
 		goto cleanup;
 	}
 
 	/* reuse the packet for version range supported */
 
 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	vstor_packet->u.version.major_minor =
 	    VMSTOR_PROTOCOL_VERSION(storvsc_current_major, storvsc_current_minor);
 
 	/* revision is only significant for Windows guests */
 	vstor_packet->u.version.revision = 0;
 
 	ret = hv_vmbus_channel_send_packet(
 			dev->channel,
 			vstor_packet,
 			VSTOR_PKT_SIZE,
 			(uint64_t)(uintptr_t)request,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	if (ret != 0)
 		goto cleanup;
 
 	/* wait 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
 	if (ret)
 		goto cleanup;
 
 	/* TODO: Check returned version */
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 		vstor_packet->status != 0)
 		goto cleanup;
 
 	/**
 	 * Query channel properties
 	 */
 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	ret = hv_vmbus_channel_send_packet(
 				dev->channel,
 				vstor_packet,
 				VSTOR_PKT_SIZE,
 				(uint64_t)(uintptr_t)request,
 				HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 				HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	if ( ret != 0)
 		goto cleanup;
 
 	/* wait 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
 	if (ret != 0)
 		goto cleanup;
 
 	/* TODO: Check returned version */
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 	    vstor_packet->status != 0) {
 		goto cleanup;
 	}
 
 	/* multi-channels feature is supported by WIN8 and above version */
 	max_chans = vstor_packet->u.chan_props.max_channel_cnt;
 	if ((hv_vmbus_protocal_version != HV_VMBUS_VERSION_WIN7) &&
 	    (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) &&
 	    (vstor_packet->u.chan_props.flags &
 	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
 		support_multichannel = TRUE;
 	}
 
 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
 	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	ret = hv_vmbus_channel_send_packet(
 			dev->channel,
 			vstor_packet,
 			VSTOR_PKT_SIZE,
 			(uint64_t)(uintptr_t)request,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	/* wait 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
 	if (ret != 0)
 		goto cleanup;
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 	    vstor_packet->status != 0)
 		goto cleanup;
 
 	/*
 	 * If multi-channel is supported, send multichannel create
 	 * request to host.
 	 */
 	if (support_multichannel)
 		storvsc_send_multichannel_request(dev, max_chans);
 
 cleanup:
 	sema_destroy(&request->synch_sema);
 	return (ret);
 }
 
 /**
  * @brief Open channel connection to paraent partition StorVSP driver
  *
  * Open and initialize channel connection to parent partition StorVSP driver.
  *
  * @param pointer to a Hyper-V device
  * @returns 0 on success, non-zero error on failure
  */
 static int
 hv_storvsc_connect_vsp(struct hv_device *dev)
 {	
 	int ret = 0;
 	struct vmstor_chan_props props;
 	struct storvsc_softc *sc;
 
 	sc = device_get_softc(dev->device);
 		
 	memset(&props, 0, sizeof(struct vmstor_chan_props));
 
 	/*
 	 * Open the channel
 	 */
 
 	ret = hv_vmbus_channel_open(
 		dev->channel,
 		sc->hs_drv_props->drv_ringbuffer_size,
 		sc->hs_drv_props->drv_ringbuffer_size,
 		(void *)&props,
 		sizeof(struct vmstor_chan_props),
 		hv_storvsc_on_channel_callback,
 		dev->channel);
 
 	if (ret != 0) {
 		return ret;
 	}
 
 	ret = hv_storvsc_channel_init(dev);
 
 	return (ret);
 }
 
 #if HVS_HOST_RESET
 static int
 hv_storvsc_host_reset(struct hv_device *dev)
 {
 	int ret = 0;
 	struct storvsc_softc *sc;
 
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 
 	sc = get_stor_device(dev, TRUE);
 	if (sc == NULL) {
 		return ENODEV;
 	}
 
 	request = &sc->hs_reset_req;
 	request->softc = sc;
 	vstor_packet = &request->vstor_packet;
 
 	sema_init(&request->synch_sema, 0, "stor synch sema");
 
 	vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	ret = hv_vmbus_channel_send_packet(dev->channel,
 			vstor_packet,
 			VSTOR_PKT_SIZE,
 			(uint64_t)(uintptr_t)&sc->hs_reset_req,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */
 
 	if (ret) {
 		goto cleanup;
 	}
 
 
 	/*
 	 * At this point, all outstanding requests in the adapter
 	 * should have been flushed out and return to us
 	 */
 
 cleanup:
 	sema_destroy(&request->synch_sema);
 	return (ret);
 }
 #endif /* HVS_HOST_RESET */
 
 /**
  * @brief Function to initiate an I/O request
  *
  * @param device Hyper-V device pointer
  * @param request pointer to a request structure
  * @returns 0 on success, non-zero error on failure
  */
 static int
 hv_storvsc_io_request(struct hv_device *device,
 					  struct hv_storvsc_request *request)
 {
 	struct storvsc_softc *sc;
 	struct vstor_packet *vstor_packet = &request->vstor_packet;
 	struct hv_vmbus_channel* outgoing_channel = NULL;
 	int ret = 0;
 
 	sc = get_stor_device(device, TRUE);
 
 	if (sc == NULL) {
 		return ENODEV;
 	}
 
 	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
 
 	vstor_packet->u.vm_srb.length = VSTOR_PKT_SIZE;
 	
 	vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
 
 	vstor_packet->u.vm_srb.transfer_len = request->data_buf.length;
 
 	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
 
 	outgoing_channel = vmbus_select_outgoing_channel(device->channel);
 
 	mtx_unlock(&request->softc->hs_lock);
 	if (request->data_buf.length) {
 		ret = hv_vmbus_channel_send_packet_multipagebuffer(
 				outgoing_channel,
 				&request->data_buf,
 				vstor_packet,
 				VSTOR_PKT_SIZE,
 				(uint64_t)(uintptr_t)request);
 
 	} else {
 		ret = hv_vmbus_channel_send_packet(
 			outgoing_channel,
 			vstor_packet,
 			VSTOR_PKT_SIZE,
 			(uint64_t)(uintptr_t)request,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	}
 	mtx_lock(&request->softc->hs_lock);
 
 	if (ret != 0) {
 		printf("Unable to send packet %p ret %d", vstor_packet, ret);
 	} else {
 		atomic_add_int(&sc->hs_num_out_reqs, 1);
 	}
 
 	return (ret);
 }
 
 
 /**
  * Process IO_COMPLETION_OPERATION and ready
  * the result to be completed for upper layer
  * processing by the CAM layer.
  */
 static void
 hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
 			   struct vstor_packet *vstor_packet,
 			   struct hv_storvsc_request *request)
 {
 	struct vmscsi_req *vm_srb;
 
 	vm_srb = &vstor_packet->u.vm_srb;
 
 	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
 			(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
 		/* Autosense data available */
 
 		KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
 				("vm_srb->sense_info_len <= "
 				 "request->sense_info_len"));
 
 		memcpy(request->sense_data, vm_srb->u.sense_data,
 			vm_srb->sense_info_len);
 
 		request->sense_info_len = vm_srb->sense_info_len;
 	}
 
 	/* Complete request by passing to the CAM layer */
 	storvsc_io_done(request);
 	atomic_subtract_int(&sc->hs_num_out_reqs, 1);
 	if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
 		sema_post(&sc->hs_drain_sema);
 	}
 }
 
 static void
 hv_storvsc_rescan_target(struct storvsc_softc *sc)
 {
 	path_id_t pathid;
 	target_id_t targetid;
 	union ccb *ccb;
 
 	pathid = cam_sim_path(sc->hs_sim);
 	targetid = CAM_TARGET_WILDCARD;
 
 	/*
 	 * Allocate a CCB and schedule a rescan.
 	 */
 	ccb = xpt_alloc_ccb_nowait();
 	if (ccb == NULL) {
 		printf("unable to alloc CCB for rescan\n");
 		return;
 	}
 
 	if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
 	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		printf("unable to create path for rescan, pathid: %u,"
 		    "targetid: %u\n", pathid, targetid);
 		xpt_free_ccb(ccb);
 		return;
 	}
 
 	if (targetid == CAM_TARGET_WILDCARD)
 		ccb->ccb_h.func_code = XPT_SCAN_BUS;
 	else
 		ccb->ccb_h.func_code = XPT_SCAN_TGT;
 
 	xpt_rescan(ccb);
 }
 
 static void
 hv_storvsc_on_channel_callback(void *context)
 {
 	int ret = 0;
 	hv_vmbus_channel *channel = (hv_vmbus_channel *)context;
 	struct hv_device *device = NULL;
 	struct storvsc_softc *sc;
 	uint32_t bytes_recvd;
 	uint64_t request_id;
 	uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 
 	device = channel->device;
 	KASSERT(device, ("device is NULL"));
 
 	sc = get_stor_device(device, FALSE);
 	if (sc == NULL) {
 		printf("Storvsc_error: get stor device failed.\n");
 		return;
 	}
 
 	ret = hv_vmbus_channel_recv_packet(
 			channel,
 			packet,
 			roundup2(VSTOR_PKT_SIZE, 8),
 			&bytes_recvd,
 			&request_id);
 
 	while ((ret == 0) && (bytes_recvd > 0)) {
 		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
 
 		if ((request == &sc->hs_init_req) ||
 			(request == &sc->hs_reset_req)) {
 			memcpy(&request->vstor_packet, packet,
 				   sizeof(struct vstor_packet));
 			sema_post(&request->synch_sema);
 		} else {
 			vstor_packet = (struct vstor_packet *)packet;
 			switch(vstor_packet->operation) {
 			case VSTOR_OPERATION_COMPLETEIO:
 				if (request == NULL)
 					panic("VMBUS: storvsc received a "
 					    "packet with NULL request id in "
 					    "COMPLETEIO operation.");
 
 				hv_storvsc_on_iocompletion(sc,
 							vstor_packet, request);
 				break;
 			case VSTOR_OPERATION_REMOVEDEVICE:
 				printf("VMBUS: storvsc operation %d not "
 				    "implemented.\n", vstor_packet->operation);
 				/* TODO: implement */
 				break;
 			case VSTOR_OPERATION_ENUMERATE_BUS:
 				hv_storvsc_rescan_target(sc);
 				break;
 			default:
 				break;
 			}			
 		}
 		ret = hv_vmbus_channel_recv_packet(
 				channel,
 				packet,
 				roundup2(VSTOR_PKT_SIZE, 8),
 				&bytes_recvd,
 				&request_id);
 	}
 }
 
 /**
  * @brief StorVSC probe function
  *
  * Device probe function.  Returns 0 if the input device is a StorVSC
  * device.  Otherwise, a ENXIO is returned.  If the input device is
  * for BlkVSC (paravirtual IDE) device and this support is disabled in
  * favor of the emulated ATA/IDE device, return ENXIO.
  *
  * @param a device
  * @returns 0 on success, ENXIO if not a matcing StorVSC device
  */
 static int
 storvsc_probe(device_t dev)
 {
 	int ata_disk_enable = 0;
 	int ret	= ENXIO;
 	
 	if (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008 ||
 	    hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) {
 		sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
 		vmscsi_size_delta = sizeof(struct vmscsi_win8_extension);
 		storvsc_current_major = STORVSC_WIN7_MAJOR;
 		storvsc_current_minor = STORVSC_WIN7_MINOR;
 	} else {
 		sense_buffer_size = POST_WIN7_STORVSC_SENSE_BUFFER_SIZE;
 		vmscsi_size_delta = 0;
 		storvsc_current_major = STORVSC_WIN8_MAJOR;
 		storvsc_current_minor = STORVSC_WIN8_MINOR;
 	}
 	
 	switch (storvsc_get_storage_type(dev)) {
 	case DRIVER_BLKVSC:
 		if(bootverbose)
 			device_printf(dev, "DRIVER_BLKVSC-Emulated ATA/IDE probe\n");
 		if (!getenv_int("hw.ata.disk_enable", &ata_disk_enable)) {
 			if(bootverbose)
 				device_printf(dev,
 					"Enlightened ATA/IDE detected\n");
 			ret = BUS_PROBE_DEFAULT;
 		} else if(bootverbose)
 			device_printf(dev, "Emulated ATA/IDE set (hw.ata.disk_enable set)\n");
 		break;
 	case DRIVER_STORVSC:
 		if(bootverbose)
 			device_printf(dev, "Enlightened SCSI device detected\n");
 		ret = BUS_PROBE_DEFAULT;
 		break;
 	default:
 		ret = ENXIO;
 	}
 	return (ret);
 }
 
 /**
  * @brief StorVSC attach function
  *
  * Function responsible for allocating per-device structures,
  * setting up CAM interfaces and scanning for available LUNs to
  * be used for SCSI device peripherals.
  *
  * @param a device
  * @returns 0 on success or an error on failure
  */
 static int
 storvsc_attach(device_t dev)
 {
 	struct hv_device *hv_dev = vmbus_get_devctx(dev);
 	enum hv_storage_type stor_type;
 	struct storvsc_softc *sc;
 	struct cam_devq *devq;
 	int ret, i, j;
 	struct hv_storvsc_request *reqp;
 	struct root_hold_token *root_mount_token = NULL;
 	struct hv_sgl_node *sgl_node = NULL;
 	void *tmp_buff = NULL;
 
 	/*
 	 * We need to serialize storvsc attach calls.
 	 */
 	root_mount_token = root_mount_hold("storvsc");
 
 	sc = device_get_softc(dev);
 
 	stor_type = storvsc_get_storage_type(dev);
 
 	if (stor_type == DRIVER_UNKNOWN) {
 		ret = ENODEV;
 		goto cleanup;
 	}
 
 	/* fill in driver specific properties */
 	sc->hs_drv_props = &g_drv_props_table[stor_type];
 
 	/* fill in device specific properties */
 	sc->hs_unit	= device_get_unit(dev);
 	sc->hs_dev	= hv_dev;
 	device_set_desc(dev, g_drv_props_table[stor_type].drv_desc);
 
 	LIST_INIT(&sc->hs_free_list);
 	mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
 
 	for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
 		reqp = malloc(sizeof(struct hv_storvsc_request),
 				 M_DEVBUF, M_WAITOK|M_ZERO);
 		reqp->softc = sc;
 
 		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
 	}
 
 	/* create sg-list page pool */
 	if (FALSE == g_hv_sgl_page_pool.is_init) {
 		g_hv_sgl_page_pool.is_init = TRUE;
 		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
 		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
 
 		/*
 		 * Pre-create SG list, each SG list with
 		 * HV_MAX_MULTIPAGE_BUFFER_COUNT segments, each
 		 * segment has one page buffer
 		 */
 		for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) {
 	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
 			    M_DEVBUF, M_WAITOK|M_ZERO);
 
 			sgl_node->sgl_data =
 			    sglist_alloc(HV_MAX_MULTIPAGE_BUFFER_COUNT,
 			    M_WAITOK|M_ZERO);
 
 			for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
 				tmp_buff = malloc(PAGE_SIZE,
 				    M_DEVBUF, M_WAITOK|M_ZERO);
 
 				sgl_node->sgl_data->sg_segs[j].ss_paddr =
 				    (vm_paddr_t)tmp_buff;
 			}
 
 			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
 			    sgl_node, link);
 		}
 	}
 
 	sc->hs_destroy = FALSE;
 	sc->hs_drain_notify = FALSE;
 	sc->hs_open_multi_channel = FALSE;
 	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
 
 	ret = hv_storvsc_connect_vsp(hv_dev);
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	/*
 	 * Create the device queue.
 	 * Hyper-V maps each target to one SCSI HBA
 	 */
 	devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
 	if (devq == NULL) {
 		device_printf(dev, "Failed to alloc device queue\n");
 		ret = ENOMEM;
 		goto cleanup;
 	}
 
 	sc->hs_sim = cam_sim_alloc(storvsc_action,
 				storvsc_poll,
 				sc->hs_drv_props->drv_name,
 				sc,
 				sc->hs_unit,
 				&sc->hs_lock, 1,
 				sc->hs_drv_props->drv_max_ios_per_target,
 				devq);
 
 	if (sc->hs_sim == NULL) {
 		device_printf(dev, "Failed to alloc sim\n");
 		cam_simq_free(devq);
 		ret = ENOMEM;
 		goto cleanup;
 	}
 
 	mtx_lock(&sc->hs_lock);
 	/* bus_id is set to 0, need to get it from VMBUS channel query? */
 	if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
 		mtx_unlock(&sc->hs_lock);
 		device_printf(dev, "Unable to register SCSI bus\n");
 		ret = ENXIO;
 		goto cleanup;
 	}
 
 	if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
 		 cam_sim_path(sc->hs_sim),
 		CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		xpt_bus_deregister(cam_sim_path(sc->hs_sim));
 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
 		mtx_unlock(&sc->hs_lock);
 		device_printf(dev, "Unable to create path\n");
 		ret = ENXIO;
 		goto cleanup;
 	}
 
 	mtx_unlock(&sc->hs_lock);
 
 	root_mount_rel(root_mount_token);
 	return (0);
 
 
 cleanup:
 	root_mount_rel(root_mount_token);
 	while (!LIST_EMPTY(&sc->hs_free_list)) {
 		reqp = LIST_FIRST(&sc->hs_free_list);
 		LIST_REMOVE(reqp, link);
 		free(reqp, M_DEVBUF);
 	}
 
 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
 		LIST_REMOVE(sgl_node, link);
 		for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
 			if (NULL !=
 			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
 				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
 			}
 		}
 		sglist_free(sgl_node->sgl_data);
 		free(sgl_node, M_DEVBUF);
 	}
 
 	return (ret);
 }
 
 /**
  * @brief StorVSC device detach function
  *
  * This function is responsible for safely detaching a
  * StorVSC device.  This includes waiting for inbound responses
  * to complete and freeing associated per-device structures.
  *
  * @param dev a device
  * returns 0 on success
  */
 static int
 storvsc_detach(device_t dev)
 {
 	struct storvsc_softc *sc = device_get_softc(dev);
 	struct hv_storvsc_request *reqp = NULL;
 	struct hv_device *hv_device = vmbus_get_devctx(dev);
 	struct hv_sgl_node *sgl_node = NULL;
 	int j = 0;
 
 	sc->hs_destroy = TRUE;
 
 	/*
 	 * At this point, all outbound traffic should be disabled. We
 	 * only allow inbound traffic (responses) to proceed so that
 	 * outstanding requests can be completed.
 	 */
 
 	sc->hs_drain_notify = TRUE;
 	sema_wait(&sc->hs_drain_sema);
 	sc->hs_drain_notify = FALSE;
 
 	/*
 	 * Since we have already drained, we don't need to busy wait.
 	 * The call to close the channel will reset the callback
 	 * under the protection of the incoming channel lock.
 	 */
 
 	hv_vmbus_channel_close(hv_device->channel);
 
 	mtx_lock(&sc->hs_lock);
 	while (!LIST_EMPTY(&sc->hs_free_list)) {
 		reqp = LIST_FIRST(&sc->hs_free_list);
 		LIST_REMOVE(reqp, link);
 
 		free(reqp, M_DEVBUF);
 	}
 	mtx_unlock(&sc->hs_lock);
 
 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
 		LIST_REMOVE(sgl_node, link);
 		for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++){
 			if (NULL !=
 			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
 				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
 			}
 		}
 		sglist_free(sgl_node->sgl_data);
 		free(sgl_node, M_DEVBUF);
 	}
 	
 	return (0);
 }
 
 #if HVS_TIMEOUT_TEST
 /**
  * @brief unit test for timed out operations
  *
  * This function provides unit testing capability to simulate
  * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
  * is required.
  *
  * @param reqp pointer to a request structure
  * @param opcode SCSI operation being performed
  * @param wait if 1, wait for I/O to complete
  */
 static void
 storvsc_timeout_test(struct hv_storvsc_request *reqp,
 		uint8_t opcode, int wait)
 {
 	int ret;
 	union ccb *ccb = reqp->ccb;
 	struct storvsc_softc *sc = reqp->softc;
 
 	if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
 		return;
 	}
 
 	if (wait) {
 		mtx_lock(&reqp->event.mtx);
 	}
 	ret = hv_storvsc_io_request(sc->hs_dev, reqp);
 	if (ret != 0) {
 		if (wait) {
 			mtx_unlock(&reqp->event.mtx);
 		}
 		printf("%s: io_request failed with %d.\n",
 				__func__, ret);
 		ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 		mtx_lock(&sc->hs_lock);
 		storvsc_free_request(sc, reqp);
 		xpt_done(ccb);
 		mtx_unlock(&sc->hs_lock);
 		return;
 	}
 
 	if (wait) {
 		xpt_print(ccb->ccb_h.path,
 				"%u: %s: waiting for IO return.\n",
 				ticks, __func__);
 		ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
 		mtx_unlock(&reqp->event.mtx);
 		xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
 				ticks, __func__, (ret == 0)?
 				"IO return detected" :
 				"IO return not detected");
 		/*
 		 * Now both the timer handler and io done are running
 		 * simultaneously. We want to confirm the io done always
 		 * finishes after the timer handler exits. So reqp used by
 		 * timer handler is not freed or stale. Do busy loop for
 		 * another 1/10 second to make sure io done does
 		 * wait for the timer handler to complete.
 		 */
 		DELAY(100*1000);
 		mtx_lock(&sc->hs_lock);
 		xpt_print(ccb->ccb_h.path,
 				"%u: %s: finishing, queue frozen %d, "
 				"ccb status 0x%x scsi_status 0x%x.\n",
 				ticks, __func__, sc->hs_frozen,
 				ccb->ccb_h.status,
 				ccb->csio.scsi_status);
 		mtx_unlock(&sc->hs_lock);
 	}
 }
 #endif /* HVS_TIMEOUT_TEST */
 
 #ifdef notyet
 /**
  * @brief timeout handler for requests
  *
  * This function is called as a result of a callout expiring.
  *
  * @param arg pointer to a request
  */
 static void
 storvsc_timeout(void *arg)
 {
 	struct hv_storvsc_request *reqp = arg;
 	struct storvsc_softc *sc = reqp->softc;
 	union ccb *ccb = reqp->ccb;
 
 	if (reqp->retries == 0) {
 		mtx_lock(&sc->hs_lock);
 		xpt_print(ccb->ccb_h.path,
 		    "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
 		    ticks, reqp, ccb->ccb_h.timeout / 1000);
 		cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
 		mtx_unlock(&sc->hs_lock);
 
 		reqp->retries++;
 		callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
 		    0, storvsc_timeout, reqp, 0);
 #if HVS_TIMEOUT_TEST
 		storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
 #endif
 		return;
 	}
 
 	mtx_lock(&sc->hs_lock);
 	xpt_print(ccb->ccb_h.path,
 		"%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
 		ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
 		(sc->hs_frozen == 0)?
 		"freezing the queue" : "the queue is already frozen");
 	if (sc->hs_frozen == 0) {
 		sc->hs_frozen = 1;
 		xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
 	}
 	mtx_unlock(&sc->hs_lock);
 	
 #if HVS_TIMEOUT_TEST
 	storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
 #endif
 }
 #endif
 
 /**
  * @brief StorVSC device poll function
  *
  * This function is responsible for servicing requests when
  * interrupts are disabled (i.e when we are dumping core.)
  *
  * @param sim a pointer to a CAM SCSI interface module
  */
 static void
 storvsc_poll(struct cam_sim *sim)
 {
 	struct storvsc_softc *sc = cam_sim_softc(sim);
 
 	mtx_assert(&sc->hs_lock, MA_OWNED);
 	mtx_unlock(&sc->hs_lock);
 	hv_storvsc_on_channel_callback(sc->hs_dev->channel);
 	mtx_lock(&sc->hs_lock);
 }
 
 /**
  * @brief StorVSC device action function
  *
  * This function is responsible for handling SCSI operations which
  * are passed from the CAM layer.  The requests are in the form of
  * CAM control blocks which indicate the action being performed.
  * Not all actions require converting the request to a VSCSI protocol
  * message - these actions can be responded to by this driver.
  * Requests which are destined for a backend storage device are converted
  * to a VSCSI protocol message and sent on the channel connection associated
  * with this device.
  *
  * @param sim pointer to a CAM SCSI interface module
  * @param ccb pointer to a CAM control block
  */
 static void
 storvsc_action(struct cam_sim *sim, union ccb *ccb)
 {
 	struct storvsc_softc *sc = cam_sim_softc(sim);
 	int res;
 
 	mtx_assert(&sc->hs_lock, MA_OWNED);
 	switch (ccb->ccb_h.func_code) {
 	case XPT_PATH_INQ: {
 		struct ccb_pathinq *cpi = &ccb->cpi;
 
 		cpi->version_num = 1;
 		cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
 		cpi->target_sprt = 0;
 		cpi->hba_misc = PIM_NOBUSRESET;
 		cpi->hba_eng_cnt = 0;
 		cpi->max_target = STORVSC_MAX_TARGETS;
 		cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
 		cpi->initiator_id = cpi->max_target;
 		cpi->bus_id = cam_sim_bus(sim);
 		cpi->base_transfer_speed = 300000;
 		cpi->transport = XPORT_SAS;
 		cpi->transport_version = 0;
 		cpi->protocol = PROTO_SCSI;
 		cpi->protocol_version = SCSI_REV_SPC2;
 		strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
 		strncpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
 		strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
 		cpi->unit_number = cam_sim_unit(sim);
 
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 	}
 	case XPT_GET_TRAN_SETTINGS: {
 		struct  ccb_trans_settings *cts = &ccb->cts;
 
 		cts->transport = XPORT_SAS;
 		cts->transport_version = 0;
 		cts->protocol = PROTO_SCSI;
 		cts->protocol_version = SCSI_REV_SPC2;
 
 		/* enable tag queuing and disconnected mode */
 		cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
 		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
 		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
 		cts->xport_specific.valid = CTS_SPI_VALID_DISC;
 		cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
 			
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 	}
 	case XPT_SET_TRAN_SETTINGS:	{
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 	}
 	case XPT_CALC_GEOMETRY:{
 		cam_calc_geometry(&ccb->ccg, 1);
 		xpt_done(ccb);
 		return;
 	}
 	case  XPT_RESET_BUS:
 	case  XPT_RESET_DEV:{
 #if HVS_HOST_RESET
 		if ((res = hv_storvsc_host_reset(sc->hs_dev)) != 0) {
 			xpt_print(ccb->ccb_h.path,
 				"hv_storvsc_host_reset failed with %d\n", res);
 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 			xpt_done(ccb);
 			return;
 		}
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 #else
 		xpt_print(ccb->ccb_h.path,
 				  "%s reset not supported.\n",
 				  (ccb->ccb_h.func_code == XPT_RESET_BUS)?
 				  "bus" : "dev");
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		xpt_done(ccb);
 		return;
 #endif	/* HVS_HOST_RESET */
 	}
 	case XPT_SCSI_IO:
 	case XPT_IMMED_NOTIFY: {
 		struct hv_storvsc_request *reqp = NULL;
 
 		if (ccb->csio.cdb_len == 0) {
 			panic("cdl_len is 0\n");
 		}
 
 		if (LIST_EMPTY(&sc->hs_free_list)) {
 			ccb->ccb_h.status = CAM_REQUEUE_REQ;
 			if (sc->hs_frozen == 0) {
 				sc->hs_frozen = 1;
 				xpt_freeze_simq(sim, /* count*/1);
 			}
 			xpt_done(ccb);
 			return;
 		}
 
 		reqp = LIST_FIRST(&sc->hs_free_list);
 		LIST_REMOVE(reqp, link);
 
 		bzero(reqp, sizeof(struct hv_storvsc_request));
 		reqp->softc = sc;
 		
 		ccb->ccb_h.status |= CAM_SIM_QUEUED;
 		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
 			ccb->ccb_h.status = CAM_REQ_INVALID;
 			xpt_done(ccb);
 			return;
 		}
 
 #ifdef notyet
 		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
 			callout_init(&reqp->callout, 1);
 			callout_reset_sbt(&reqp->callout,
 			    SBT_1MS * ccb->ccb_h.timeout, 0,
 			    storvsc_timeout, reqp, 0);
 #if HVS_TIMEOUT_TEST
 			cv_init(&reqp->event.cv, "storvsc timeout cv");
 			mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
 					NULL, MTX_DEF);
 			switch (reqp->vstor_packet.vm_srb.cdb[0]) {
 				case MODE_SELECT_10:
 				case SEND_DIAGNOSTIC:
 					/* To have timer send the request. */
 					return;
 				default:
 					break;
 			}
 #endif /* HVS_TIMEOUT_TEST */
 		}
 #endif
 
 		if ((res = hv_storvsc_io_request(sc->hs_dev, reqp)) != 0) {
 			xpt_print(ccb->ccb_h.path,
 				"hv_storvsc_io_request failed with %d\n", res);
 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 			storvsc_free_request(sc, reqp);
 			xpt_done(ccb);
 			return;
 		}
 		return;
 	}
 
 	default:
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		xpt_done(ccb);
 		return;
 	}
 }
 
 /**
  * @brief destroy bounce buffer
  *
  * This function is responsible for destroy a Scatter/Gather list
  * that create by storvsc_create_bounce_buffer()
  *
  * @param sgl- the Scatter/Gather need be destroy
  * @param sg_count- page count of the SG list.
  *
  */
 static void
 storvsc_destroy_bounce_buffer(struct sglist *sgl)
 {
 	struct hv_sgl_node *sgl_node = NULL;
 	if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
 		printf("storvsc error: not enough in use sgl\n");
 		return;
 	}
 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
 	LIST_REMOVE(sgl_node, link);
 	sgl_node->sgl_data = sgl;
 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
 }
 
 /**
  * @brief create bounce buffer
  *
  * This function is responsible for create a Scatter/Gather list,
  * which hold several pages that can be aligned with page size.
  *
  * @param seg_count- SG-list segments count
  * @param write - if WRITE_TYPE, set SG list page used size to 0,
  * otherwise set used size to page size.
  *
  * return NULL if create failed
  */
 static struct sglist *
 storvsc_create_bounce_buffer(uint16_t seg_count, int write)
 {
 	int i = 0;
 	struct sglist *bounce_sgl = NULL;
 	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
 	struct hv_sgl_node *sgl_node = NULL;	
 
 	/* get struct sglist from free_sgl_list */
 	if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
 		printf("storvsc error: not enough free sgl\n");
 		return NULL;
 	}
 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
 	LIST_REMOVE(sgl_node, link);
 	bounce_sgl = sgl_node->sgl_data;
 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
 
 	bounce_sgl->sg_maxseg = seg_count;
 
 	if (write == WRITE_TYPE)
 		bounce_sgl->sg_nseg = 0;
 	else
 		bounce_sgl->sg_nseg = seg_count;
 
 	for (i = 0; i < seg_count; i++)
 	        bounce_sgl->sg_segs[i].ss_len = buf_len;
 
 	return bounce_sgl;
 }
 
 /**
  * @brief copy data from SG list to bounce buffer
  *
  * This function is responsible for copy data from one SG list's segments
  * to another SG list which used as bounce buffer.
  *
  * @param bounce_sgl - the destination SG list
  * @param orig_sgl - the segment of the source SG list.
  * @param orig_sgl_count - the count of segments.
  * @param orig_sgl_count - indicate which segment need bounce buffer,
  *  set 1 means need.
  *
  */
 static void
 storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
 			       bus_dma_segment_t *orig_sgl,
 			       unsigned int orig_sgl_count,
 			       uint64_t seg_bits)
 {
 	int src_sgl_idx = 0;
 
 	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
 		if (seg_bits & (1 << src_sgl_idx)) {
 			memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
 			    (void*)orig_sgl[src_sgl_idx].ds_addr,
 			    orig_sgl[src_sgl_idx].ds_len);
 
 			bounce_sgl->sg_segs[src_sgl_idx].ss_len =
 			    orig_sgl[src_sgl_idx].ds_len;
 		}
 	}
 }
 
 /**
  * @brief copy data from SG list which used as bounce to another SG list
  *
  * This function is responsible for copy data from one SG list with bounce
  * buffer to another SG list's segments.
  *
  * @param dest_sgl - the destination SG list's segments
  * @param dest_sgl_count - the count of destination SG list's segment.
  * @param src_sgl - the source SG list.
  * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
  *
  */
 void
 storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
 				    unsigned int dest_sgl_count,
 				    struct sglist* src_sgl,
 				    uint64_t seg_bits)
 {
 	int sgl_idx = 0;
 	
 	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
 		if (seg_bits & (1 << sgl_idx)) {
 			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
 			    (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
 			    src_sgl->sg_segs[sgl_idx].ss_len);
 		}
 	}
 }
 
 /**
  * @brief check SG list with bounce buffer or not
  *
  * This function is responsible for check if need bounce buffer for SG list.
  *
  * @param sgl - the SG list's segments
  * @param sg_count - the count of SG list's segment.
  * @param bits - segmengs number that need bounce buffer
  *
  * return -1 if SG list needless bounce buffer
  */
 static int
 storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
 				unsigned int sg_count,
 				uint64_t *bits)
 {
 	int i = 0;
 	int offset = 0;
 	uint64_t phys_addr = 0;
 	uint64_t tmp_bits = 0;
 	boolean_t found_hole = FALSE;
 	boolean_t pre_aligned = TRUE;
 
 	if (sg_count < 2){
 		return -1;
 	}
 
 	*bits = 0;
 	
 	phys_addr = vtophys(sgl[0].ds_addr);
 	offset =  phys_addr - trunc_page(phys_addr);
 
 	if (offset != 0) {
 		pre_aligned = FALSE;
 		tmp_bits |= 1;
 	}
 
 	for (i = 1; i < sg_count; i++) {
 		phys_addr = vtophys(sgl[i].ds_addr);
 		offset =  phys_addr - trunc_page(phys_addr);
 
 		if (offset == 0) {
 			if (FALSE == pre_aligned){
 				/*
 				 * This segment is aligned, if the previous
 				 * one is not aligned, find a hole
 				 */
 				found_hole = TRUE;
 			}
 			pre_aligned = TRUE;
 		} else {
 			tmp_bits |= 1 << i;
 			if (!pre_aligned) {
 				if (phys_addr != vtophys(sgl[i-1].ds_addr +
 				    sgl[i-1].ds_len)) {
 					/*
 					 * Check whether connect to previous
 					 * segment,if not, find the hole
 					 */
 					found_hole = TRUE;
 				}
 			} else {
 				found_hole = TRUE;
 			}
 			pre_aligned = FALSE;
 		}
 	}
 
 	if (!found_hole) {
 		return (-1);
 	} else {
 		*bits = tmp_bits;
 		return 0;
 	}
 }
 
 /**
  * @brief Fill in a request structure based on a CAM control block
  *
  * Fills in a request structure based on the contents of a CAM control
  * block.  The request structure holds the payload information for
  * VSCSI protocol request.
  *
  * @param ccb pointer to a CAM contorl block
  * @param reqp pointer to a request structure
  */
 static int
 create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
 {
 	struct ccb_scsiio *csio = &ccb->csio;
 	uint64_t phys_addr;
 	uint32_t bytes_to_copy = 0;
 	uint32_t pfn_num = 0;
 	uint32_t pfn;
 	uint64_t not_aligned_seg_bits = 0;
 	
 	/* refer to struct vmscsi_req for meanings of these two fields */
 	reqp->vstor_packet.u.vm_srb.port =
 		cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
 	reqp->vstor_packet.u.vm_srb.path_id =
 		cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
 
 	reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
 	reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
 
 	reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
 	if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
 			csio->cdb_len);
 	} else {
 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
 			csio->cdb_len);
 	}
 
 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
 	case CAM_DIR_OUT:
 		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;	
 		break;
 	case CAM_DIR_IN:
 		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
 		break;
 	case CAM_DIR_NONE:
 		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
 		break;
 	default:
 		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
 		break;
 	}
 
 	reqp->sense_data     = &csio->sense_data;
 	reqp->sense_info_len = csio->sense_len;
 
 	reqp->ccb = ccb;
 
 	if (0 == csio->dxfer_len) {
 		return (0);
 	}
 
 	reqp->data_buf.length = csio->dxfer_len;
 
 	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
 	case CAM_DATA_VADDR:
 	{
 		bytes_to_copy = csio->dxfer_len;
 		phys_addr = vtophys(csio->data_ptr);
 		reqp->data_buf.offset = phys_addr & PAGE_MASK;
 		
 		while (bytes_to_copy != 0) {
 			int bytes, page_offset;
 			phys_addr =
 			    vtophys(&csio->data_ptr[reqp->data_buf.length -
 			    bytes_to_copy]);
 			pfn = phys_addr >> PAGE_SHIFT;
 			reqp->data_buf.pfn_array[pfn_num] = pfn;
 			page_offset = phys_addr & PAGE_MASK;
 
 			bytes = min(PAGE_SIZE - page_offset, bytes_to_copy);
 
 			bytes_to_copy -= bytes;
 			pfn_num++;
 		}
 		break;
 	}
 
 	case CAM_DATA_SG:
 	{
 		int i = 0;
 		int offset = 0;
 		int ret;
 
 		bus_dma_segment_t *storvsc_sglist =
 		    (bus_dma_segment_t *)ccb->csio.data_ptr;
 		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
 
 		printf("Storvsc: get SG I/O operation, %d\n",
 		    reqp->vstor_packet.u.vm_srb.data_in);
 
 		if (storvsc_sg_count > HV_MAX_MULTIPAGE_BUFFER_COUNT){
 			printf("Storvsc: %d segments is too much, "
 			    "only support %d segments\n",
 			    storvsc_sg_count, HV_MAX_MULTIPAGE_BUFFER_COUNT);
 			return (EINVAL);
 		}
 
 		/*
 		 * We create our own bounce buffer function currently. Idealy
 		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
 		 * code there is no callback API to check the page alignment of
 		 * middle segments before busdma can decide if a bounce buffer
 		 * is needed for particular segment. There is callback,
 		 * "bus_dma_filter_t *filter", but the parrameters are not
 		 * sufficient for storvsc driver.
 		 * TODO:
 		 *	Add page alignment check in BUS_DMA(9) callback. Once
 		 *	this is complete, switch the following code to use
 		 *	BUS_DMA(9) for storvsc bounce buffer support.
 		 */
 		/* check if we need to create bounce buffer */
 		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
 		    storvsc_sg_count, &not_aligned_seg_bits);
 		if (ret != -1) {
 			reqp->bounce_sgl =
 			    storvsc_create_bounce_buffer(storvsc_sg_count,
 			    reqp->vstor_packet.u.vm_srb.data_in);
 			if (NULL == reqp->bounce_sgl) {
 				printf("Storvsc_error: "
 				    "create bounce buffer failed.\n");
 				return (ENOMEM);
 			}
 
 			reqp->bounce_sgl_count = storvsc_sg_count;
 			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
 
 			/*
 			 * if it is write, we need copy the original data
 			 *to bounce buffer
 			 */
 			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
 				storvsc_copy_sgl_to_bounce_buf(
 				    reqp->bounce_sgl,
 				    storvsc_sglist,
 				    storvsc_sg_count,
 				    reqp->not_aligned_seg_bits);
 			}
 
 			/* transfer virtual address to physical frame number */
 			if (reqp->not_aligned_seg_bits & 0x1){
  				phys_addr =
 				    vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
 			}else{
  				phys_addr =
 					vtophys(storvsc_sglist[0].ds_addr);
 			}
 			reqp->data_buf.offset = phys_addr & PAGE_MASK;
 
 			pfn = phys_addr >> PAGE_SHIFT;
 			reqp->data_buf.pfn_array[0] = pfn;
 			
 			for (i = 1; i < storvsc_sg_count; i++) {
 				if (reqp->not_aligned_seg_bits & (1 << i)) {
 					phys_addr =
 					    vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
 				} else {
 					phys_addr =
 					    vtophys(storvsc_sglist[i].ds_addr);
 				}
 
 				pfn = phys_addr >> PAGE_SHIFT;
 				reqp->data_buf.pfn_array[i] = pfn;
 			}
 		} else {
 			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
 
 			reqp->data_buf.offset = phys_addr & PAGE_MASK;
 
 			for (i = 0; i < storvsc_sg_count; i++) {
 				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
 				pfn = phys_addr >> PAGE_SHIFT;
 				reqp->data_buf.pfn_array[i] = pfn;
 			}
 
 			/* check the last segment cross boundary or not */
 			offset = phys_addr & PAGE_MASK;
 			if (offset) {
 				phys_addr =
 				    vtophys(storvsc_sglist[i-1].ds_addr +
 				    PAGE_SIZE - offset);
 				pfn = phys_addr >> PAGE_SHIFT;
 				reqp->data_buf.pfn_array[i] = pfn;
 			}
 			
 			reqp->bounce_sgl_count = 0;
 		}
 		break;
 	}
 	default:
 		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
 		return(EINVAL);
 	}
 
 	return(0);
 }
 
 /*
  * Modified based on scsi_print_inquiry which is responsible to
  * print the detail information for scsi_inquiry_data.
  *
  * Return 1 if it is valid, 0 otherwise.
  */
 static inline int
 is_inquiry_valid(const struct scsi_inquiry_data *inq_data)
 {
 	uint8_t type;
 	char vendor[16], product[48], revision[16];
 
 	/*
 	 * Check device type and qualifier
 	 */
 	if (!(SID_QUAL_IS_VENDOR_UNIQUE(inq_data) ||
 	    SID_QUAL(inq_data) == SID_QUAL_LU_CONNECTED))
 		return (0);
 
 	type = SID_TYPE(inq_data);
 	switch (type) {
 	case T_DIRECT:
 	case T_SEQUENTIAL:
 	case T_PRINTER:
 	case T_PROCESSOR:
 	case T_WORM:
 	case T_CDROM:
 	case T_SCANNER:
 	case T_OPTICAL:
 	case T_CHANGER:
 	case T_COMM:
 	case T_STORARRAY:
 	case T_ENCLOSURE:
 	case T_RBC:
 	case T_OCRW:
 	case T_OSD:
 	case T_ADC:
 		break;
 	case T_NODEVICE:
 	default:
 		return (0);
 	}
 
 	/*
 	 * Check vendor, product, and revision
 	 */
 	cam_strvis(vendor, inq_data->vendor, sizeof(inq_data->vendor),
 	    sizeof(vendor));
 	cam_strvis(product, inq_data->product, sizeof(inq_data->product),
 	    sizeof(product));
 	cam_strvis(revision, inq_data->revision, sizeof(inq_data->revision),
 	    sizeof(revision));
 	if (strlen(vendor) == 0  ||
 	    strlen(product) == 0 ||
 	    strlen(revision) == 0)
 		return (0);
 
 	return (1);
 }
 
 /**
  * @brief completion function before returning to CAM
  *
  * I/O process has been completed and the result needs
  * to be passed to the CAM layer.
  * Free resources related to this request.
  *
  * @param reqp pointer to a request structure
  */
 static void
 storvsc_io_done(struct hv_storvsc_request *reqp)
 {
 	union ccb *ccb = reqp->ccb;
 	struct ccb_scsiio *csio = &ccb->csio;
 	struct storvsc_softc *sc = reqp->softc;
 	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
 	bus_dma_segment_t *ori_sglist = NULL;
 	int ori_sg_count = 0;
 
 	/* destroy bounce buffer if it is used */
 	if (reqp->bounce_sgl_count) {
 		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
 		ori_sg_count = ccb->csio.sglist_cnt;
 
 		/*
 		 * If it is READ operation, we should copy back the data
 		 * to original SG list.
 		 */
 		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
 			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
 			    ori_sg_count,
 			    reqp->bounce_sgl,
 			    reqp->not_aligned_seg_bits);
 		}
 
 		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
 		reqp->bounce_sgl_count = 0;
 	}
 		
 	if (reqp->retries > 0) {
 		mtx_lock(&sc->hs_lock);
 #if HVS_TIMEOUT_TEST
 		xpt_print(ccb->ccb_h.path,
 			"%u: IO returned after timeout, "
 			"waking up timer handler if any.\n", ticks);
 		mtx_lock(&reqp->event.mtx);
 		cv_signal(&reqp->event.cv);
 		mtx_unlock(&reqp->event.mtx);
 #endif
 		reqp->retries = 0;
 		xpt_print(ccb->ccb_h.path,
 			"%u: IO returned after timeout, "
 			"stopping timer if any.\n", ticks);
 		mtx_unlock(&sc->hs_lock);
 	}
 
 #ifdef notyet
 	/*
 	 * callout_drain() will wait for the timer handler to finish
 	 * if it is running. So we don't need any lock to synchronize
 	 * between this routine and the timer handler.
 	 * Note that we need to make sure reqp is not freed when timer
 	 * handler is using or will use it.
 	 */
 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
 		callout_drain(&reqp->callout);
 	}
 #endif
 
 	ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
 		const struct scsi_generic *cmd;
 
 		/*
 		 * Check whether the data for INQUIRY cmd is valid or
 		 * not.  Windows 10 and Windows 2016 send all zero
 		 * inquiry data to VM even for unpopulated slots.
 		 */
 		cmd = (const struct scsi_generic *)
 		    ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
 		     csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
 		if (cmd->opcode == INQUIRY &&
 		    is_inquiry_valid(
 		    (const struct scsi_inquiry_data *)csio->data_ptr) == 0) {
 			ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
 			if (bootverbose) {
 				mtx_lock(&sc->hs_lock);
 				xpt_print(ccb->ccb_h.path,
 				    "storvsc uninstalled device\n");
 				mtx_unlock(&sc->hs_lock);
 			}
 		} else {
 			ccb->ccb_h.status |= CAM_REQ_CMP;
 		}
 	} else {
 		mtx_lock(&sc->hs_lock);
 		xpt_print(ccb->ccb_h.path,
 			"storvsc scsi_status = %d\n",
 			vm_srb->scsi_status);
 		mtx_unlock(&sc->hs_lock);
 		ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
 	}
 
 	ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
 	ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
 
 	if (reqp->sense_info_len != 0) {
 		csio->sense_resid = csio->sense_len - reqp->sense_info_len;
 		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
 	}
 
 	mtx_lock(&sc->hs_lock);
 	if (reqp->softc->hs_frozen == 1) {
 		xpt_print(ccb->ccb_h.path,
 			"%u: storvsc unfreezing softc 0x%p.\n",
 			ticks, reqp->softc);
 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
 		reqp->softc->hs_frozen = 0;
 	}
 	storvsc_free_request(sc, reqp);
 	mtx_unlock(&sc->hs_lock);
 
 	xpt_done_direct(ccb);
 }
 
 /**
  * @brief Free a request structure
  *
  * Free a request structure by returning it to the free list
  *
  * @param sc pointer to a softc
  * @param reqp pointer to a request structure
  */	
 static void
 storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
 {
 
 	LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
 }
 
 /**
  * @brief Determine type of storage device from GUID
  *
  * Using the type GUID, determine if this is a StorVSC (paravirtual
  * SCSI or BlkVSC (paravirtual IDE) device.
  *
  * @param dev a device
  * returns an enum
  */
 static enum hv_storage_type
 storvsc_get_storage_type(device_t dev)
 {
 	const char *p = vmbus_get_type(dev);
 
 	if (!memcmp(p, &gBlkVscDeviceType, sizeof(hv_guid))) {
 		return DRIVER_BLKVSC;
 	} else if (!memcmp(p, &gStorVscDeviceType, sizeof(hv_guid))) {
 		return DRIVER_STORVSC;
 	}
 	return (DRIVER_UNKNOWN);
 }
 
Index: head/sys/dev/hyperv/utilities/hv_heartbeat.c
===================================================================
--- head/sys/dev/hyperv/utilities/hv_heartbeat.c	(revision 298445)
+++ head/sys/dev/hyperv/utilities/hv_heartbeat.c	(revision 298446)
@@ -1,133 +1,133 @@
 /*-
- * Copyright (c) 2014 Microsoft Corp.
+ * Copyright (c) 2014,2016 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/timetc.h>
 #include <sys/syscallsubr.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include "hv_util.h"
 
 /* Heartbeat Service */
 static hv_guid service_guid = { .data =
 	{0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e,
 	0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d} };
 
 /**
  * Process heartbeat message
  */
 static void
 hv_heartbeat_cb(void *context)
 {
 	uint8_t*		buf;
 	hv_vmbus_channel*	channel;
 	uint32_t		recvlen;
 	uint64_t		requestid;
 	int			ret;
 
 	struct hv_vmbus_heartbeat_msg_data*	heartbeat_msg;
 	struct hv_vmbus_icmsg_hdr*		icmsghdrp;
 	hv_util_sc			*softc;
 
 	softc = (hv_util_sc*)context;
 	buf = softc->receive_buffer;
 	channel = softc->hv_dev->channel;
 
 	ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE, &recvlen,
 					    &requestid);
 
 	if ((ret == 0) && recvlen > 0) {
 
 	    icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
 		&buf[sizeof(struct hv_vmbus_pipe_hdr)];
 
 	    if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
 		hv_negotiate_version(icmsghdrp, NULL, buf);
 
 	    } else {
 		heartbeat_msg =
 		    (struct hv_vmbus_heartbeat_msg_data *)
 			&buf[sizeof(struct hv_vmbus_pipe_hdr) +
 			     sizeof(struct hv_vmbus_icmsg_hdr)];
 
 		heartbeat_msg->seq_num += 1;
 	    }
 
 	    icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
 				 HV_ICMSGHDRFLAG_RESPONSE;
 
 	    hv_vmbus_channel_send_packet(channel, buf, recvlen, requestid,
 		HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 	}
 }
 
 static int
 hv_heartbeat_probe(device_t dev)
 {
 	const char *p = vmbus_get_type(dev);
 
 	if (resource_disabled("hvheartbeat", 0))
 		return ENXIO;
 
 	if (!memcmp(p, &service_guid, sizeof(hv_guid))) {
 		device_set_desc(dev, "Hyper-V Heartbeat Service");
 		return BUS_PROBE_DEFAULT;
 	}
 
 	return ENXIO;
 }
 
 static int
 hv_heartbeat_attach(device_t dev)
 {
 	hv_util_sc *softc = (hv_util_sc*)device_get_softc(dev);
 
 	softc->callback = hv_heartbeat_cb;
 
 	return hv_util_attach(dev);
 }
 
 static device_method_t heartbeat_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, hv_heartbeat_probe),
 	DEVMETHOD(device_attach, hv_heartbeat_attach),
 	DEVMETHOD(device_detach, hv_util_detach),
 	{ 0, 0 }
 };
 
 static driver_t heartbeat_driver = { "hvheartbeat", heartbeat_methods, sizeof(hv_util_sc)};
 
 static devclass_t heartbeat_devclass;
 
 DRIVER_MODULE(hv_heartbeat, vmbus, heartbeat_driver, heartbeat_devclass, NULL, NULL);
 MODULE_VERSION(hv_heartbeat, 1);
 MODULE_DEPEND(hv_heartbeat, vmbus, 1, 1, 1);
Index: head/sys/dev/hyperv/utilities/hv_kvp.c
===================================================================
--- head/sys/dev/hyperv/utilities/hv_kvp.c	(revision 298445)
+++ head/sys/dev/hyperv/utilities/hv_kvp.c	(revision 298446)
@@ -1,946 +1,946 @@
 /*-
- * Copyright (c) 2014 Microsoft Corp.
+ * Copyright (c) 2014,2016 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  *	Author:	Sainath Varanasi.
  *	Date:	4/2012
  *	Email:	bsdic@microsoft.com
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/uio.h>
 #include <sys/bus.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/reboot.h>
 #include <sys/lock.h>
 #include <sys/taskqueue.h>
 #include <sys/selinfo.h>
 #include <sys/sysctl.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
 #include <sys/un.h>
 #include <sys/endian.h>
 #include <sys/_null.h>
 #include <sys/signal.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/mutex.h>
 #include <net/if_arp.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/netvsc/hv_net_vsc.h>
 
 #include "hv_util.h"
 #include "unicode.h"
 #include "hv_kvp.h"
 
 /* hv_kvp defines */
 #define BUFFERSIZE	sizeof(struct hv_kvp_msg)
 #define KVP_SUCCESS	0
 #define KVP_ERROR	1
 #define kvp_hdr		hdr.kvp_hdr
 
 /* hv_kvp debug control */
 static int hv_kvp_log = 0;
 
 #define	hv_kvp_log_error(...)	do {				\
 	if (hv_kvp_log > 0)				\
 		log(LOG_ERR, "hv_kvp: " __VA_ARGS__);	\
 } while (0)
 
 #define	hv_kvp_log_info(...) do {				\
 	if (hv_kvp_log > 1)				\
 		log(LOG_INFO, "hv_kvp: " __VA_ARGS__);		\
 } while (0)
 
 static hv_guid service_guid = { .data =
 	{0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d,
 	0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3,  0xe6} };
 
 /* character device prototypes */
 static d_open_t		hv_kvp_dev_open;
 static d_close_t	hv_kvp_dev_close;
 static d_read_t		hv_kvp_dev_daemon_read;
 static d_write_t	hv_kvp_dev_daemon_write;
 static d_poll_t		hv_kvp_dev_daemon_poll;
 
 /* hv_kvp character device structure */
 static struct cdevsw hv_kvp_cdevsw =
 {
 	.d_version	= D_VERSION,
 	.d_open		= hv_kvp_dev_open,
 	.d_close	= hv_kvp_dev_close,
 	.d_read		= hv_kvp_dev_daemon_read,
 	.d_write	= hv_kvp_dev_daemon_write,
 	.d_poll		= hv_kvp_dev_daemon_poll,
 	.d_name		= "hv_kvp_dev",
 };
 
 
 /*
  * Global state to track and synchronize multiple
  * KVP transaction requests from the host.
  */
 typedef struct hv_kvp_sc {
 	struct hv_util_sc	util_sc;
 
 	/* Unless specified the pending mutex should be
 	 * used to alter the values of the following paramters:
 	 * 1. req_in_progress
 	 * 2. req_timed_out
 	 */
 	struct mtx		pending_mutex;
 
 	struct task		task;
 
 	/* To track if transaction is active or not */
 	boolean_t		req_in_progress;
 	/* Tracks if daemon did not reply back in time */
 	boolean_t		req_timed_out;
 	/* Tracks if daemon is serving a request currently */
 	boolean_t		daemon_busy;
 
 	/* Length of host message */
 	uint32_t		host_msg_len;
 
 	/* Host message id */
 	uint64_t		host_msg_id;
 
 	/* Current kvp message from the host */
 	struct hv_kvp_msg	*host_kvp_msg;
 
 	 /* Current kvp message for daemon */
 	struct hv_kvp_msg	daemon_kvp_msg;
 
 	/* Rcv buffer for communicating with the host*/
 	uint8_t			*rcv_buf;
 
 	/* Device semaphore to control communication */
 	struct sema		dev_sema;
 
 	/* Indicates if daemon registered with driver */
 	boolean_t		register_done;
 
 	/* Character device status */
 	boolean_t		dev_accessed;
 
 	struct cdev *hv_kvp_dev;
 
 	struct proc *daemon_task;
 
 	struct selinfo hv_kvp_selinfo;
 } hv_kvp_sc;
 
 /* hv_kvp prototypes */
 static int	hv_kvp_req_in_progress(hv_kvp_sc *sc);
 static void	hv_kvp_transaction_init(hv_kvp_sc *sc, uint32_t, uint64_t, uint8_t *);
 static void	hv_kvp_send_msg_to_daemon(hv_kvp_sc *sc);
 static void	hv_kvp_process_request(void *context, int pending);
 
 /*
  * hv_kvp low level functions
  */
 
 /*
  * Check if kvp transaction is in progres
  */
 static int
 hv_kvp_req_in_progress(hv_kvp_sc *sc)
 {
 
 	return (sc->req_in_progress);
 }
 
 
 /*
  * This routine is called whenever a message is received from the host
  */
 static void
 hv_kvp_transaction_init(hv_kvp_sc *sc, uint32_t rcv_len,
 			uint64_t request_id, uint8_t *rcv_buf)
 {
 
 	/* Store all the relevant message details in the global structure */
 	/* Do not need to use mutex for req_in_progress here */
 	sc->req_in_progress = true;
 	sc->host_msg_len = rcv_len;
 	sc->host_msg_id = request_id;
 	sc->rcv_buf = rcv_buf;
 	sc->host_kvp_msg = (struct hv_kvp_msg *)&rcv_buf[
 		sizeof(struct hv_vmbus_pipe_hdr) +
 		sizeof(struct hv_vmbus_icmsg_hdr)];
 }
 
 
 /*
  * hv_kvp - version neogtiation function
  */
 static void
 hv_kvp_negotiate_version(struct hv_vmbus_icmsg_hdr *icmsghdrp,
 			 struct hv_vmbus_icmsg_negotiate *negop,
 			 uint8_t *buf)
 {
 	int icframe_vercnt;
 	int icmsg_vercnt;
 
 	icmsghdrp->icmsgsize = 0x10;
 
 	negop = (struct hv_vmbus_icmsg_negotiate *)&buf[
 		sizeof(struct hv_vmbus_pipe_hdr) +
 		sizeof(struct hv_vmbus_icmsg_hdr)];
 	icframe_vercnt = negop->icframe_vercnt;
 	icmsg_vercnt = negop->icmsg_vercnt;
 
 	/*
 	 * Select the framework version number we will support
 	 */
 	if ((icframe_vercnt >= 2) && (negop->icversion_data[1].major == 3)) {
 		icframe_vercnt = 3;
 		if (icmsg_vercnt > 2)
 			icmsg_vercnt = 4;
 		else
 			icmsg_vercnt = 3;
 	} else {
 		icframe_vercnt = 1;
 		icmsg_vercnt = 1;
 	}
 
 	negop->icframe_vercnt = 1;
 	negop->icmsg_vercnt = 1;
 	negop->icversion_data[0].major = icframe_vercnt;
 	negop->icversion_data[0].minor = 0;
 	negop->icversion_data[1].major = icmsg_vercnt;
 	negop->icversion_data[1].minor = 0;
 }
 
 
 /*
  * Convert ip related info in umsg from utf8 to utf16 and store in hmsg
  */
 static int
 hv_kvp_convert_utf8_ipinfo_to_utf16(struct hv_kvp_msg *umsg,
 				    struct hv_kvp_ip_msg *host_ip_msg)
 {
 	int err_ip, err_subnet, err_gway, err_dns, err_adap;
 	int UNUSED_FLAG = 1;
 
 	utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.ip_addr,
 	    MAX_IP_ADDR_SIZE,
 	    (char *)umsg->body.kvp_ip_val.ip_addr,
 	    strlen((char *)umsg->body.kvp_ip_val.ip_addr),
 	    UNUSED_FLAG,
 	    &err_ip);
 	utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.sub_net,
 	    MAX_IP_ADDR_SIZE,
 	    (char *)umsg->body.kvp_ip_val.sub_net,
 	    strlen((char *)umsg->body.kvp_ip_val.sub_net),
 	    UNUSED_FLAG,
 	    &err_subnet);
 	utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.gate_way,
 	    MAX_GATEWAY_SIZE,
 	    (char *)umsg->body.kvp_ip_val.gate_way,
 	    strlen((char *)umsg->body.kvp_ip_val.gate_way),
 	    UNUSED_FLAG,
 	    &err_gway);
 	utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.dns_addr,
 	    MAX_IP_ADDR_SIZE,
 	    (char *)umsg->body.kvp_ip_val.dns_addr,
 	    strlen((char *)umsg->body.kvp_ip_val.dns_addr),
 	    UNUSED_FLAG,
 	    &err_dns);
 	utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.adapter_id,
 	    MAX_IP_ADDR_SIZE,
 	    (char *)umsg->body.kvp_ip_val.adapter_id,
 	    strlen((char *)umsg->body.kvp_ip_val.adapter_id),
 	    UNUSED_FLAG,
 	    &err_adap);
 
 	host_ip_msg->kvp_ip_val.dhcp_enabled = umsg->body.kvp_ip_val.dhcp_enabled;
 	host_ip_msg->kvp_ip_val.addr_family = umsg->body.kvp_ip_val.addr_family;
 
 	return (err_ip | err_subnet | err_gway | err_dns | err_adap);
 }
 
 
 /*
  * Convert ip related info in hmsg from utf16 to utf8 and store in umsg
  */
 static int
 hv_kvp_convert_utf16_ipinfo_to_utf8(struct hv_kvp_ip_msg *host_ip_msg,
 				    struct hv_kvp_msg *umsg)
 {
 	int err_ip, err_subnet, err_gway, err_dns, err_adap;
 	int UNUSED_FLAG = 1;
 	struct hv_device *hv_dev;       /* GUID Data Structure */
 	hn_softc_t *sc;                 /* hn softc structure  */
 	char if_name[4];
 	char buf[39];
 
 	device_t *devs;
 	int devcnt;
 
 	/* IP Address */
 	utf16_to_utf8((char *)umsg->body.kvp_ip_val.ip_addr,
 	    MAX_IP_ADDR_SIZE,
 	    (uint16_t *)host_ip_msg->kvp_ip_val.ip_addr,
 	    MAX_IP_ADDR_SIZE,
 	    UNUSED_FLAG,
 	    &err_ip);
 
 	/* Adapter ID : GUID */
 	utf16_to_utf8((char *)umsg->body.kvp_ip_val.adapter_id,
 	    MAX_ADAPTER_ID_SIZE,
 	    (uint16_t *)host_ip_msg->kvp_ip_val.adapter_id,
 	    MAX_ADAPTER_ID_SIZE,
 	    UNUSED_FLAG,
 	    &err_adap);
 
 	if (devclass_get_devices(devclass_find("hn"), &devs, &devcnt) == 0) {
 		for (devcnt = devcnt - 1; devcnt >= 0; devcnt--) {
 			sc = device_get_softc(devs[devcnt]);
 
 			/* Trying to find GUID of Network Device */
 			hv_dev = sc->hn_dev_obj;
 
 			snprintf_hv_guid(buf, sizeof(buf), &hv_dev->device_id);
 			sprintf(if_name, "%s%d", "hn", device_get_unit(devs[devcnt]));
 
 			if (strncmp(buf, (char *)umsg->body.kvp_ip_val.adapter_id, 39) == 0) {
 				strcpy((char *)umsg->body.kvp_ip_val.adapter_id, if_name);
 				break;
 			}
 		}
 		free(devs, M_TEMP);
 	}
 
 	/* Address Family , DHCP , SUBNET, Gateway, DNS */
 	umsg->kvp_hdr.operation = host_ip_msg->operation;
 	umsg->body.kvp_ip_val.addr_family = host_ip_msg->kvp_ip_val.addr_family;
 	umsg->body.kvp_ip_val.dhcp_enabled = host_ip_msg->kvp_ip_val.dhcp_enabled;
 	utf16_to_utf8((char *)umsg->body.kvp_ip_val.sub_net, MAX_IP_ADDR_SIZE,
 	    (uint16_t *)host_ip_msg->kvp_ip_val.sub_net,
 	    MAX_IP_ADDR_SIZE,
 	    UNUSED_FLAG,
 	    &err_subnet);
 
 	utf16_to_utf8((char *)umsg->body.kvp_ip_val.gate_way, MAX_GATEWAY_SIZE,
 	    (uint16_t *)host_ip_msg->kvp_ip_val.gate_way,
 	    MAX_GATEWAY_SIZE,
 	    UNUSED_FLAG,
 	    &err_gway);
 
 	utf16_to_utf8((char *)umsg->body.kvp_ip_val.dns_addr, MAX_IP_ADDR_SIZE,
 	    (uint16_t *)host_ip_msg->kvp_ip_val.dns_addr,
 	    MAX_IP_ADDR_SIZE,
 	    UNUSED_FLAG,
 	    &err_dns);
 
 	return (err_ip | err_subnet | err_gway | err_dns | err_adap);
 }
 
 
 /*
  * Prepare a user kvp msg based on host kvp msg (utf16 to utf8)
  * Ensure utf16_utf8 takes care of the additional string terminating char!!
  */
 static void
 hv_kvp_convert_hostmsg_to_usermsg(struct hv_kvp_msg *hmsg, struct hv_kvp_msg *umsg)
 {
 	int utf_err = 0;
 	uint32_t value_type;
 	struct hv_kvp_ip_msg *host_ip_msg;
 
 	host_ip_msg = (struct hv_kvp_ip_msg*)hmsg;
 	memset(umsg, 0, sizeof(struct hv_kvp_msg));
 
 	umsg->kvp_hdr.operation = hmsg->kvp_hdr.operation;
 	umsg->kvp_hdr.pool = hmsg->kvp_hdr.pool;
 
 	switch (umsg->kvp_hdr.operation) {
 	case HV_KVP_OP_SET_IP_INFO:
 		hv_kvp_convert_utf16_ipinfo_to_utf8(host_ip_msg, umsg);
 		break;
 
 	case HV_KVP_OP_GET_IP_INFO:
 		utf16_to_utf8((char *)umsg->body.kvp_ip_val.adapter_id,
 		    MAX_ADAPTER_ID_SIZE,
 		    (uint16_t *)host_ip_msg->kvp_ip_val.adapter_id,
 		    MAX_ADAPTER_ID_SIZE, 1, &utf_err);
 
 		umsg->body.kvp_ip_val.addr_family =
 		    host_ip_msg->kvp_ip_val.addr_family;
 		break;
 
 	case HV_KVP_OP_SET:
 		value_type = hmsg->body.kvp_set.data.value_type;
 
 		switch (value_type) {
 		case HV_REG_SZ:
 			umsg->body.kvp_set.data.value_size =
 			    utf16_to_utf8(
 				(char *)umsg->body.kvp_set.data.msg_value.value,
 				HV_KVP_EXCHANGE_MAX_VALUE_SIZE - 1,
 				(uint16_t *)hmsg->body.kvp_set.data.msg_value.value,
 				hmsg->body.kvp_set.data.value_size,
 				1, &utf_err);
 			/* utf8 encoding */
 			umsg->body.kvp_set.data.value_size =
 			    umsg->body.kvp_set.data.value_size / 2;
 			break;
 
 		case HV_REG_U32:
 			umsg->body.kvp_set.data.value_size =
 			    sprintf(umsg->body.kvp_set.data.msg_value.value, "%d",
 				hmsg->body.kvp_set.data.msg_value.value_u32) + 1;
 			break;
 
 		case HV_REG_U64:
 			umsg->body.kvp_set.data.value_size =
 			    sprintf(umsg->body.kvp_set.data.msg_value.value, "%llu",
 				(unsigned long long)
 				hmsg->body.kvp_set.data.msg_value.value_u64) + 1;
 			break;
 		}
 
 		umsg->body.kvp_set.data.key_size =
 		    utf16_to_utf8(
 			umsg->body.kvp_set.data.key,
 			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1,
 			(uint16_t *)hmsg->body.kvp_set.data.key,
 			hmsg->body.kvp_set.data.key_size,
 			1, &utf_err);
 
 		/* utf8 encoding */
 		umsg->body.kvp_set.data.key_size =
 		    umsg->body.kvp_set.data.key_size / 2;
 		break;
 
 	case HV_KVP_OP_GET:
 		umsg->body.kvp_get.data.key_size =
 		    utf16_to_utf8(umsg->body.kvp_get.data.key,
 			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1,
 			(uint16_t *)hmsg->body.kvp_get.data.key,
 			hmsg->body.kvp_get.data.key_size,
 			1, &utf_err);
 		/* utf8 encoding */
 		umsg->body.kvp_get.data.key_size =
 		    umsg->body.kvp_get.data.key_size / 2;
 		break;
 
 	case HV_KVP_OP_DELETE:
 		umsg->body.kvp_delete.key_size =
 		    utf16_to_utf8(umsg->body.kvp_delete.key,
 			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1,
 			(uint16_t *)hmsg->body.kvp_delete.key,
 			hmsg->body.kvp_delete.key_size,
 			1, &utf_err);
 		/* utf8 encoding */
 		umsg->body.kvp_delete.key_size =
 		    umsg->body.kvp_delete.key_size / 2;
 		break;
 
 	case HV_KVP_OP_ENUMERATE:
 		umsg->body.kvp_enum_data.index =
 		    hmsg->body.kvp_enum_data.index;
 		break;
 
 	default:
 		hv_kvp_log_info("%s: daemon_kvp_msg: Invalid operation : %d\n",
 		    __func__, umsg->kvp_hdr.operation);
 	}
 }
 
 
 /*
  * Prepare a host kvp msg based on user kvp msg (utf8 to utf16)
  */
 static int
 hv_kvp_convert_usermsg_to_hostmsg(struct hv_kvp_msg *umsg, struct hv_kvp_msg *hmsg)
 {
 	int hkey_len = 0, hvalue_len = 0, utf_err = 0;
 	struct hv_kvp_exchg_msg_value *host_exchg_data;
 	char *key_name, *value;
 
 	struct hv_kvp_ip_msg *host_ip_msg = (struct hv_kvp_ip_msg *)hmsg;
 
 	switch (hmsg->kvp_hdr.operation) {
 	case HV_KVP_OP_GET_IP_INFO:
 		return (hv_kvp_convert_utf8_ipinfo_to_utf16(umsg, host_ip_msg));
 
 	case HV_KVP_OP_SET_IP_INFO:
 	case HV_KVP_OP_SET:
 	case HV_KVP_OP_DELETE:
 		return (KVP_SUCCESS);
 
 	case HV_KVP_OP_ENUMERATE:
 		host_exchg_data = &hmsg->body.kvp_enum_data.data;
 		key_name = umsg->body.kvp_enum_data.data.key;
 		hkey_len = utf8_to_utf16((uint16_t *)host_exchg_data->key,
 				((HV_KVP_EXCHANGE_MAX_KEY_SIZE / 2) - 2),
 				key_name, strlen(key_name),
 				1, &utf_err);
 		/* utf16 encoding */
 		host_exchg_data->key_size = 2 * (hkey_len + 1);
 		value = umsg->body.kvp_enum_data.data.msg_value.value;
 		hvalue_len = utf8_to_utf16(
 				(uint16_t *)host_exchg_data->msg_value.value,
 				((HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2) - 2),
 				value, strlen(value),
 				1, &utf_err);
 		host_exchg_data->value_size = 2 * (hvalue_len + 1);
 		host_exchg_data->value_type = HV_REG_SZ;
 
 		if ((hkey_len < 0) || (hvalue_len < 0))
 			return (HV_KVP_E_FAIL);
 
 		return (KVP_SUCCESS);
 
 	case HV_KVP_OP_GET:
 		host_exchg_data = &hmsg->body.kvp_get.data;
 		value = umsg->body.kvp_get.data.msg_value.value;
 		hvalue_len = utf8_to_utf16(
 				(uint16_t *)host_exchg_data->msg_value.value,
 				((HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2) - 2),
 				value, strlen(value),
 				1, &utf_err);
 		/* Convert value size to uft16 */
 		host_exchg_data->value_size = 2 * (hvalue_len + 1);
 		/* Use values by string */
 		host_exchg_data->value_type = HV_REG_SZ;
 
 		if ((hkey_len < 0) || (hvalue_len < 0))
 			return (HV_KVP_E_FAIL);
 
 		return (KVP_SUCCESS);
 
 	default:
 		return (HV_KVP_E_FAIL);
 	}
 }
 
 
 /*
  * Send the response back to the host.
  */
 static void
 hv_kvp_respond_host(hv_kvp_sc *sc, int error)
 {
 	struct hv_vmbus_icmsg_hdr *hv_icmsg_hdrp;
 
 	hv_icmsg_hdrp = (struct hv_vmbus_icmsg_hdr *)
 	    &sc->rcv_buf[sizeof(struct hv_vmbus_pipe_hdr)];
 
 	if (error)
 		error = HV_KVP_E_FAIL;
 
 	hv_icmsg_hdrp->status = error;
 	hv_icmsg_hdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE;
 
 	error = hv_vmbus_channel_send_packet(sc->util_sc.hv_dev->channel,
 			sc->rcv_buf,
 			sc->host_msg_len, sc->host_msg_id,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 
 	if (error)
 		hv_kvp_log_info("%s: hv_kvp_respond_host: sendpacket error:%d\n",
 			__func__, error);
 }
 
 
 /*
  * This is the main kvp kernel process that interacts with both user daemon
  * and the host
  */
 static void
 hv_kvp_send_msg_to_daemon(hv_kvp_sc *sc)
 {
 	struct hv_kvp_msg *hmsg = sc->host_kvp_msg;
 	struct hv_kvp_msg *umsg = &sc->daemon_kvp_msg;
 
 	/* Prepare kvp_msg to be sent to user */
 	hv_kvp_convert_hostmsg_to_usermsg(hmsg, umsg);
 
 	/* Send the msg to user via function deamon_read - setting sema */
 	sema_post(&sc->dev_sema);
 
 	/* We should wake up the daemon, in case it's doing poll() */
 	selwakeup(&sc->hv_kvp_selinfo);
 }
 
 
 /*
  * Function to read the kvp request buffer from host
  * and interact with daemon
  */
 static void
 hv_kvp_process_request(void *context, int pending)
 {
 	uint8_t *kvp_buf;
 	hv_vmbus_channel *channel;
 	uint32_t recvlen = 0;
 	uint64_t requestid;
 	struct hv_vmbus_icmsg_hdr *icmsghdrp;
 	int ret = 0;
 	hv_kvp_sc		*sc;
 
 	hv_kvp_log_info("%s: entering hv_kvp_process_request\n", __func__);
 
 	sc = (hv_kvp_sc*)context;
 	kvp_buf = sc->util_sc.receive_buffer;
 	channel = sc->util_sc.hv_dev->channel;
 
 	ret = hv_vmbus_channel_recv_packet(channel, kvp_buf, 2 * PAGE_SIZE,
 		&recvlen, &requestid);
 
 	while ((ret == 0) && (recvlen > 0)) {
 
 		icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
 			&kvp_buf[sizeof(struct hv_vmbus_pipe_hdr)];
 
 		hv_kvp_transaction_init(sc, recvlen, requestid, kvp_buf);
 		if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
 			hv_kvp_negotiate_version(icmsghdrp, NULL, kvp_buf);
 			hv_kvp_respond_host(sc, ret);
 
 			/*
 			 * It is ok to not acquire the mutex before setting
 			 * req_in_progress here because negotiation is the
 			 * first thing that happens and hence there is no
 			 * chance of a race condition.
 			 */
 
 			sc->req_in_progress = false;
 			hv_kvp_log_info("%s :version negotiated\n", __func__);
 
 		} else {
 			if (!sc->daemon_busy) {
 
 				hv_kvp_log_info("%s: issuing qury to daemon\n", __func__);
 				mtx_lock(&sc->pending_mutex);
 				sc->req_timed_out = false;
 				sc->daemon_busy = true;
 				mtx_unlock(&sc->pending_mutex);
 
 				hv_kvp_send_msg_to_daemon(sc);
 				hv_kvp_log_info("%s: waiting for daemon\n", __func__);
 			}
 
 			/* Wait 5 seconds for daemon to respond back */
 			tsleep(sc, 0, "kvpworkitem", 5 * hz);
 			hv_kvp_log_info("%s: came out of wait\n", __func__);
 		}
 
 		mtx_lock(&sc->pending_mutex);
 
 		/* Notice that once req_timed_out is set to true
 		 * it will remain true until the next request is
 		 * sent to the daemon. The response from daemon
 		 * is forwarded to host only when this flag is
 		 * false.
 		 */
 		sc->req_timed_out = true;
 
 		/*
 		 * Cancel request if so need be.
 		 */
 		if (hv_kvp_req_in_progress(sc)) {
 			hv_kvp_log_info("%s: request was still active after wait so failing\n", __func__);
 			hv_kvp_respond_host(sc, HV_KVP_E_FAIL);
 			sc->req_in_progress = false;
 		}
 
 		mtx_unlock(&sc->pending_mutex);
 
 		/*
 		 * Try reading next buffer
 		 */
 		recvlen = 0;
 		ret = hv_vmbus_channel_recv_packet(channel, kvp_buf, 2 * PAGE_SIZE,
 			&recvlen, &requestid);
 		hv_kvp_log_info("%s: read: context %p, ret =%d, recvlen=%d\n",
 			__func__, context, ret, recvlen);
 	}
 }
 
 
 /*
  * Callback routine that gets called whenever there is a message from host
  */
 static void
 hv_kvp_callback(void *context)
 {
 	hv_kvp_sc *sc = (hv_kvp_sc*)context;
 	/*
 	 The first request from host will not be handled until daemon is registered.
 	 when callback is triggered without a registered daemon, callback just return.
 	 When a new daemon gets regsitered, this callbcak is trigged from _write op.
 	*/
 	if (sc->register_done) {
 		hv_kvp_log_info("%s: Queuing work item\n", __func__);
 		taskqueue_enqueue(taskqueue_thread, &sc->task);
 	}
 }
 
 static int
 hv_kvp_dev_open(struct cdev *dev, int oflags, int devtype,
 				struct thread *td)
 {
 	hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
 
 	hv_kvp_log_info("%s: Opened device \"hv_kvp_device\" successfully.\n", __func__);
 	if (sc->dev_accessed)
 		return (-EBUSY);
 
 	sc->daemon_task = curproc;
 	sc->dev_accessed = true;
 	sc->daemon_busy = false;
 	return (0);
 }
 
 
 static int
 hv_kvp_dev_close(struct cdev *dev __unused, int fflag __unused, int devtype __unused,
 				 struct thread *td __unused)
 {
 	hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
 
 	hv_kvp_log_info("%s: Closing device \"hv_kvp_device\".\n", __func__);
 	sc->dev_accessed = false;
 	sc->register_done = false;
 	return (0);
 }
 
 
 /*
  * hv_kvp_daemon read invokes this function
  * acts as a send to daemon
  */
 static int
 hv_kvp_dev_daemon_read(struct cdev *dev, struct uio *uio, int ioflag __unused)
 {
 	size_t amt;
 	int error = 0;
 	struct hv_kvp_msg *hv_kvp_dev_buf;
 	hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
 
 	/* Check hv_kvp daemon registration status*/
 	if (!sc->register_done)
 		return (KVP_ERROR);
 
 	sema_wait(&sc->dev_sema);
 
 	hv_kvp_dev_buf = malloc(sizeof(*hv_kvp_dev_buf), M_TEMP, M_WAITOK);
 	memcpy(hv_kvp_dev_buf, &sc->daemon_kvp_msg, sizeof(struct hv_kvp_msg));
 
 	amt = MIN(uio->uio_resid, uio->uio_offset >= BUFFERSIZE + 1 ? 0 :
 		BUFFERSIZE + 1 - uio->uio_offset);
 
 	if ((error = uiomove(hv_kvp_dev_buf, amt, uio)) != 0)
 		hv_kvp_log_info("%s: hv_kvp uiomove read failed!\n", __func__);
 
 	free(hv_kvp_dev_buf, M_TEMP);
 	return (error);
 }
 
 
 /*
  * hv_kvp_daemon write invokes this function
  * acts as a recieve from daemon
  */
 static int
 hv_kvp_dev_daemon_write(struct cdev *dev, struct uio *uio, int ioflag __unused)
 {
 	size_t amt;
 	int error = 0;
 	struct hv_kvp_msg *hv_kvp_dev_buf;
 	hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
 
 	uio->uio_offset = 0;
 	hv_kvp_dev_buf = malloc(sizeof(*hv_kvp_dev_buf), M_TEMP, M_WAITOK);
 
 	amt = MIN(uio->uio_resid, BUFFERSIZE);
 	error = uiomove(hv_kvp_dev_buf, amt, uio);
 
 	if (error != 0) {
 		free(hv_kvp_dev_buf, M_TEMP);
 		return (error);
 	}
 	memcpy(&sc->daemon_kvp_msg, hv_kvp_dev_buf, sizeof(struct hv_kvp_msg));
 
 	free(hv_kvp_dev_buf, M_TEMP);
 	if (sc->register_done == false) {
 		if (sc->daemon_kvp_msg.kvp_hdr.operation == HV_KVP_OP_REGISTER) {
 			sc->register_done = true;
 			hv_kvp_callback(dev->si_drv1);
 		}
 		else {
 			hv_kvp_log_info("%s, KVP Registration Failed\n", __func__);
 			return (KVP_ERROR);
 		}
 	} else {
 
 		mtx_lock(&sc->pending_mutex);
 
 		if(!sc->req_timed_out) {
 			struct hv_kvp_msg *hmsg = sc->host_kvp_msg;
 			struct hv_kvp_msg *umsg = &sc->daemon_kvp_msg;
 
 			hv_kvp_convert_usermsg_to_hostmsg(umsg, hmsg);
 			hv_kvp_respond_host(sc, KVP_SUCCESS);
 			wakeup(sc);
 			sc->req_in_progress = false;
 		}
 
 		sc->daemon_busy = false;
 		mtx_unlock(&sc->pending_mutex);
 	}
 
 	return (error);
 }
 
 
 /*
  * hv_kvp_daemon poll invokes this function to check if data is available
  * for daemon to read.
  */
 static int
 hv_kvp_dev_daemon_poll(struct cdev *dev, int events, struct thread *td)
 {
 	int revents = 0;
 	hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
 
 	mtx_lock(&sc->pending_mutex);
 	/*
 	 * We check global flag daemon_busy for the data availiability for
 	 * userland to read. Deamon_busy is set to true before driver has data
 	 * for daemon to read. It is set to false after daemon sends
 	 * then response back to driver.
 	 */
 	if (sc->daemon_busy == true)
 		revents = POLLIN;
 	else
 		selrecord(td, &sc->hv_kvp_selinfo);
 
 	mtx_unlock(&sc->pending_mutex);
 
 	return (revents);
 }
 
 static int
 hv_kvp_probe(device_t dev)
 {
 	const char *p = vmbus_get_type(dev);
 
 	if (resource_disabled("hvkvp", 0))
 		return ENXIO;
 
 	if (!memcmp(p, &service_guid, sizeof(hv_guid))) {
 		device_set_desc(dev, "Hyper-V KVP Service");
 		return BUS_PROBE_DEFAULT;
 	}
 
 	return ENXIO;
 }
 
 static int
 hv_kvp_attach(device_t dev)
 {
 	int error;
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 
 	hv_kvp_sc *sc = (hv_kvp_sc*)device_get_softc(dev);
 
 	sc->util_sc.callback = hv_kvp_callback;
 	sema_init(&sc->dev_sema, 0, "hv_kvp device semaphore");
 	mtx_init(&sc->pending_mutex, "hv-kvp pending mutex",
 		NULL, MTX_DEF);
 
 	ctx = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "hv_kvp_log",
 	    CTLFLAG_RW, &hv_kvp_log, 0, "Hyperv KVP service log level");
 
 	TASK_INIT(&sc->task, 0, hv_kvp_process_request, sc);
 
 	/* create character device */
 	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK,
 			&sc->hv_kvp_dev,
 			&hv_kvp_cdevsw,
 			0,
 			UID_ROOT,
 			GID_WHEEL,
 			0640,
 			"hv_kvp_dev");
 
 	if (error != 0)
 		return (error);
 	sc->hv_kvp_dev->si_drv1 = sc;
 
 	return hv_util_attach(dev);
 }
 
 static int
 hv_kvp_detach(device_t dev)
 {
 	hv_kvp_sc *sc = (hv_kvp_sc*)device_get_softc(dev);
 
 	if (sc->daemon_task != NULL) {
 		PROC_LOCK(sc->daemon_task);
 		kern_psignal(sc->daemon_task, SIGKILL);
 		PROC_UNLOCK(sc->daemon_task);
 	}
 
 	destroy_dev(sc->hv_kvp_dev);
 	return hv_util_detach(dev);
 }
 
 static device_method_t kvp_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, hv_kvp_probe),
 	DEVMETHOD(device_attach, hv_kvp_attach),
 	DEVMETHOD(device_detach, hv_kvp_detach),
 	{ 0, 0 }
 };
 
 static driver_t kvp_driver = { "hvkvp", kvp_methods, sizeof(hv_kvp_sc)};
 
 static devclass_t kvp_devclass;
 
 DRIVER_MODULE(hv_kvp, vmbus, kvp_driver, kvp_devclass, NULL, NULL);
 MODULE_VERSION(hv_kvp, 1);
 MODULE_DEPEND(hv_kvp, vmbus, 1, 1, 1);
Index: head/sys/dev/hyperv/utilities/hv_kvp.h
===================================================================
--- head/sys/dev/hyperv/utilities/hv_kvp.h	(revision 298445)
+++ head/sys/dev/hyperv/utilities/hv_kvp.h	(revision 298446)
@@ -1,241 +1,241 @@
 /*-
- * Copyright (c) 2014 Microsoft Corp.
+ * Copyright (c) 2014,2016 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _KVP_H
 #define _KVP_H
 
 /*
  * An implementation of HyperV key value pair (KVP) functionality for FreeBSD
  *
  */
 
 /*
  * Maximum value size - used for both key names and value data, and includes
  * any applicable NULL terminators.
  *
  * Note:  This limit is somewhat arbitrary, but falls easily within what is
  * supported for all native guests (back to Win 2000) and what is reasonable
  * for the IC KVP exchange functionality.  Note that Windows Me/98/95 are
  * limited to 255 character key names.
  *
  * MSDN recommends not storing data values larger than 2048 bytes in the
  * registry.
  *
  * Note:  This value is used in defining the KVP exchange message - this value
  * cannot be modified without affecting the message size and compatibility.
  */
 
 /*
  * bytes, including any null terminators
  */
 #define HV_KVP_EXCHANGE_MAX_VALUE_SIZE    (2048)
 
 
 /*
  * Maximum key size - the registry limit for the length of an entry name
  * is 256 characters, including the null terminator
  */
 #define HV_KVP_EXCHANGE_MAX_KEY_SIZE    (512)
 
 
 /*
  * In FreeBSD, we implement the KVP functionality in two components:
  * 1) The kernel component which is packaged as part of the hv_utils driver
  * is responsible for communicating with the host and responsible for
  * implementing the host/guest protocol. 2) A user level daemon that is
  * responsible for data gathering.
  *
  * Host/Guest Protocol: The host iterates over an index and expects the guest
  * to assign a key name to the index and also return the value corresponding to
  * the key. The host will have atmost one KVP transaction outstanding at any
  * given point in time. The host side iteration stops when the guest returns
  * an error. Microsoft has specified the following mapping of key names to
  * host specified index:
  *
  *  Index		Key Name
  *	0		FullyQualifiedDomainName
  *	1		IntegrationServicesVersion
  *	2		NetworkAddressIPv4
  *	3		NetworkAddressIPv6
  *	4		OSBuildNumber
  *	5		OSName
  *	6		OSMajorVersion
  *	7		OSMinorVersion
  *	8		OSVersion
  *	9		ProcessorArchitecture
  *
  * The Windows host expects the Key Name and Key Value to be encoded in utf16.
  *
  * Guest Kernel/KVP Daemon Protocol: As noted earlier, we implement all of the
  * data gathering functionality in a user mode daemon. The user level daemon
  * is also responsible for binding the key name to the index as well. The
  * kernel and user-level daemon communicate using a connector channel.
  *
  * The user mode component first registers with the
  * the kernel component. Subsequently, the kernel component requests, data
  * for the specified keys. In response to this message the user mode component
  * fills in the value corresponding to the specified key. We overload the
  * sequence field in the cn_msg header to define our KVP message types.
  *
  *
  * The kernel component simply acts as a conduit for communication between the
  * Windows host and the user-level daemon. The kernel component passes up the
  * index received from the Host to the user-level daemon. If the index is
  * valid (supported), the corresponding key as well as its
  * value (both are strings) is returned. If the index is invalid
  * (not supported), a NULL key string is returned.
  */
 
  
 /*
  * Registry value types.
  */
 #define HV_REG_SZ     1
 #define HV_REG_U32    4
 #define HV_REG_U64    8
 
 
 /*
  * Daemon code supporting IP injection.
  */
 #define HV_KVP_OP_REGISTER    4
 
 
 enum hv_kvp_exchg_op {
 	HV_KVP_OP_GET = 0,
 	HV_KVP_OP_SET,
 	HV_KVP_OP_DELETE,
 	HV_KVP_OP_ENUMERATE,
 	HV_KVP_OP_GET_IP_INFO,
 	HV_KVP_OP_SET_IP_INFO,
 	HV_KVP_OP_COUNT /* Number of operations, must be last. */
 };
 
 enum hv_kvp_exchg_pool {
 	HV_KVP_POOL_EXTERNAL = 0,
 	HV_KVP_POOL_GUEST,
 	HV_KVP_POOL_AUTO,
 	HV_KVP_POOL_AUTO_EXTERNAL,
 	HV_KVP_POOL_AUTO_INTERNAL,
 	HV_KVP_POOL_COUNT /* Number of pools, must be last. */
 };
 
 
 /*
  * Some Hyper-V status codes.
  */
 #define HV_KVP_S_OK                      0x00000000
 #define HV_KVP_E_FAIL                    0x80004005
 #define HV_KVP_S_CONT                    0x80070103
 #define HV_ERROR_NOT_SUPPORTED           0x80070032
 #define HV_ERROR_MACHINE_LOCKED          0x800704F7
 #define HV_ERROR_DEVICE_NOT_CONNECTED    0x8007048F
 #define HV_INVALIDARG                    0x80070057
 #define HV_KVP_GUID_NOTFOUND             0x80041002
 
 #define ADDR_FAMILY_NONE                 0x00
 #define ADDR_FAMILY_IPV4                 0x01
 #define ADDR_FAMILY_IPV6                 0x02
 
 #define MAX_ADAPTER_ID_SIZE              128
 #define MAX_IP_ADDR_SIZE                 1024
 #define MAX_GATEWAY_SIZE                 512
 
 
 struct hv_kvp_ipaddr_value {
 	uint16_t adapter_id[MAX_ADAPTER_ID_SIZE];
 	uint8_t  addr_family;
 	uint8_t  dhcp_enabled;
 	uint16_t ip_addr[MAX_IP_ADDR_SIZE];
 	uint16_t sub_net[MAX_IP_ADDR_SIZE];
 	uint16_t gate_way[MAX_GATEWAY_SIZE];
 	uint16_t dns_addr[MAX_IP_ADDR_SIZE];
 }__attribute__((packed));
 
 struct hv_kvp_hdr {
 	uint8_t  operation;
 	uint8_t  pool;
 	uint16_t pad;
 } __attribute__((packed));
 
 struct hv_kvp_exchg_msg_value {
 	uint32_t value_type;
 	uint32_t key_size;
 	uint32_t value_size;
 	uint8_t  key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
 	union {
 		uint8_t  value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE];
 		uint32_t value_u32;
 		uint64_t value_u64;
 	} msg_value;
 } __attribute__((packed));
 
 struct hv_kvp_msg_enumerate {
 	uint32_t index;
 	struct hv_kvp_exchg_msg_value data;
 } __attribute__((packed));
 
 struct hv_kvp_msg_get {
 	struct hv_kvp_exchg_msg_value data;
 } __attribute__((packed));
 
 struct hv_kvp_msg_set {
 	struct hv_kvp_exchg_msg_value data;
 } __attribute__((packed));
 
 struct hv_kvp_msg_delete {
 	uint32_t key_size;
 	uint8_t key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
 } __attribute__((packed));
 
 struct hv_kvp_register {
 	uint8_t version[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
 } __attribute__((packed));
 
 struct hv_kvp_msg {
 	union {
 		struct hv_kvp_hdr kvp_hdr;
 		uint32_t error;
 	} hdr;
 	union {
 		struct hv_kvp_msg_get		kvp_get;
 		struct hv_kvp_msg_set		kvp_set;
 		struct hv_kvp_msg_delete	kvp_delete;
 		struct hv_kvp_msg_enumerate	kvp_enum_data;
 		struct hv_kvp_ipaddr_value	kvp_ip_val;
 		struct hv_kvp_register		kvp_register;
 	} body;
 } __attribute__((packed));
 
 struct hv_kvp_ip_msg {
 	uint8_t operation;
 	uint8_t pool;
 	struct hv_kvp_ipaddr_value      kvp_ip_val;
 } __attribute__((packed));
 
 #endif /* _KVP_H */
Index: head/sys/dev/hyperv/utilities/hv_shutdown.c
===================================================================
--- head/sys/dev/hyperv/utilities/hv_shutdown.c	(revision 298445)
+++ head/sys/dev/hyperv/utilities/hv_shutdown.c	(revision 298446)
@@ -1,155 +1,155 @@
 /*-
- * Copyright (c) 2014 Microsoft Corp.
+ * Copyright (c) 2014,2016 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * A common driver for all hyper-V util services.
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/reboot.h>
 #include <sys/timetc.h>
 #include <sys/syscallsubr.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include "hv_util.h"
 
 static hv_guid service_guid = { .data =
 	{0x31, 0x60, 0x0B, 0X0E, 0x13, 0x52, 0x34, 0x49,
 	0x81, 0x8B, 0x38, 0XD9, 0x0C, 0xED, 0x39, 0xDB} };
 
 /**
  * Shutdown
  */
 static void
 hv_shutdown_cb(void *context)
 {
 	uint8_t*			buf;
 	hv_vmbus_channel*		channel;
 	uint8_t				execute_shutdown = 0;
 	hv_vmbus_icmsg_hdr*		icmsghdrp;
 	uint32_t			recv_len;
 	uint64_t			request_id;
 	int				ret;
 	hv_vmbus_shutdown_msg_data*	shutdown_msg;
 	hv_util_sc			*softc;
 
 	softc = (hv_util_sc*)context;
 	buf = softc->receive_buffer;
 	channel = softc->hv_dev->channel;
 	ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE,
 					    &recv_len, &request_id);
 
 	if ((ret == 0) && recv_len > 0) {
 
 	    icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
 		&buf[sizeof(struct hv_vmbus_pipe_hdr)];
 
 	    if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
 		hv_negotiate_version(icmsghdrp, NULL, buf);
 
 	    } else {
 		shutdown_msg =
 		    (struct hv_vmbus_shutdown_msg_data *)
 		    &buf[sizeof(struct hv_vmbus_pipe_hdr) +
 			sizeof(struct hv_vmbus_icmsg_hdr)];
 
 		switch (shutdown_msg->flags) {
 		    case 0:
 		    case 1:
 			icmsghdrp->status = HV_S_OK;
 			execute_shutdown = 1;
 			if(bootverbose)
 			    printf("Shutdown request received -"
 				    " graceful shutdown initiated\n");
 			break;
 		    default:
 			icmsghdrp->status = HV_E_FAIL;
 			execute_shutdown = 0;
 			printf("Shutdown request received -"
 			    " Invalid request\n");
 			break;
 		    }
 	    }
 
 	icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
 				 HV_ICMSGHDRFLAG_RESPONSE;
 
 	    hv_vmbus_channel_send_packet(channel, buf,
 					recv_len, request_id,
 					HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 	}
 
 	if (execute_shutdown)
 	    shutdown_nice(RB_POWEROFF);
 }
 
 static int
 hv_shutdown_probe(device_t dev)
 {
 	const char *p = vmbus_get_type(dev);
 
 	if (resource_disabled("hvshutdown", 0))
 		return ENXIO;
 
 	if (!memcmp(p, &service_guid, sizeof(hv_guid))) {
 		device_set_desc(dev, "Hyper-V Shutdown Service");
 		return BUS_PROBE_DEFAULT;
 	}
 
 	return ENXIO;
 }
 
 static int
 hv_shutdown_attach(device_t dev)
 {
 	hv_util_sc *softc = (hv_util_sc*)device_get_softc(dev);
 
 	softc->callback = hv_shutdown_cb;
 
 	return hv_util_attach(dev);
 }
 
 static device_method_t shutdown_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, hv_shutdown_probe),
 	DEVMETHOD(device_attach, hv_shutdown_attach),
 	DEVMETHOD(device_detach, hv_util_detach),
 	{ 0, 0 }
 };
 
 static driver_t shutdown_driver = { "hvshutdown", shutdown_methods, sizeof(hv_util_sc)};
 
 static devclass_t shutdown_devclass;
 
 DRIVER_MODULE(hv_shutdown, vmbus, shutdown_driver, shutdown_devclass, NULL, NULL);
 MODULE_VERSION(hv_shutdown, 1);
 MODULE_DEPEND(hv_shutdown, vmbus, 1, 1, 1);
Index: head/sys/dev/hyperv/utilities/hv_timesync.c
===================================================================
--- head/sys/dev/hyperv/utilities/hv_timesync.c	(revision 298445)
+++ head/sys/dev/hyperv/utilities/hv_timesync.c	(revision 298446)
@@ -1,220 +1,220 @@
 /*-
- * Copyright (c) 2014 Microsoft Corp.
+ * Copyright (c) 2014,2016 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * A common driver for all hyper-V util services.
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/reboot.h>
 #include <sys/timetc.h>
 #include <sys/syscallsubr.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include "hv_util.h"
 
 #define HV_WLTIMEDELTA              116444736000000000L     /* in 100ns unit */
 #define HV_ICTIMESYNCFLAG_PROBE     0
 #define HV_ICTIMESYNCFLAG_SYNC      1
 #define HV_ICTIMESYNCFLAG_SAMPLE    2
 #define HV_NANO_SEC_PER_SEC         1000000000
 
 /* Time Sync data */
 typedef struct {
 	uint64_t data;
 } time_sync_data;
 
         /* Time Synch Service */
 static hv_guid service_guid = {.data =
 	{0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49,
 	0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf } };
 
 struct hv_ictimesync_data {
 	uint64_t    parenttime;
 	uint64_t    childtime;
 	uint64_t    roundtriptime;
 	uint8_t     flags;
 } __packed;
 
 typedef struct hv_timesync_sc {
 	hv_util_sc	util_sc;
 	struct task	task;
 	time_sync_data	time_msg;
 } hv_timesync_sc;
 
 /**
  * Set host time based on time sync message from host
  */
 static void
 hv_set_host_time(void *context, int pending)
 {
 	hv_timesync_sc *softc = (hv_timesync_sc*)context;
 	uint64_t hosttime = softc->time_msg.data;
 	struct timespec guest_ts, host_ts;
 	uint64_t host_tns;
 	int64_t diff;
 	int error;
 
 	host_tns = (hosttime - HV_WLTIMEDELTA) * 100;
 	host_ts.tv_sec = (time_t)(host_tns/HV_NANO_SEC_PER_SEC);
 	host_ts.tv_nsec = (long)(host_tns%HV_NANO_SEC_PER_SEC);
 
 	nanotime(&guest_ts);
 
 	diff = (int64_t)host_ts.tv_sec - (int64_t)guest_ts.tv_sec;
 
 	/*
 	 * If host differs by 5 seconds then make the guest catch up
 	 */
 	if (diff > 5 || diff < -5) {
 		error = kern_clock_settime(curthread, CLOCK_REALTIME,
 		    &host_ts);
 	}
 }
 
 /**
  * @brief Synchronize time with host after reboot, restore, etc.
  *
  * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
  * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
  * message after the timesync channel is opened. Since the hv_utils module is
  * loaded after hv_vmbus, the first message is usually missed. The other
  * thing is, systime is automatically set to emulated hardware clock which may
  * not be UTC time or in the same time zone. So, to override these effects, we
  * use the first 50 time samples for initial system time setting.
  */
 static inline
 void hv_adj_guesttime(hv_timesync_sc *sc, uint64_t hosttime, uint8_t flags)
 {
 	sc->time_msg.data = hosttime;
 
 	if (((flags & HV_ICTIMESYNCFLAG_SYNC) != 0) ||
 		((flags & HV_ICTIMESYNCFLAG_SAMPLE) != 0)) {
 		taskqueue_enqueue(taskqueue_thread, &sc->task);
 	}
 }
 
 /**
  * Time Sync Channel message handler
  */
 static void
 hv_timesync_cb(void *context)
 {
 	hv_vmbus_channel*	channel;
 	hv_vmbus_icmsg_hdr*	icmsghdrp;
 	uint32_t		recvlen;
 	uint64_t		requestId;
 	int			ret;
 	uint8_t*		time_buf;
 	struct hv_ictimesync_data* timedatap;
 	hv_timesync_sc		*softc;
 
 	softc = (hv_timesync_sc*)context;
 	channel = softc->util_sc.hv_dev->channel;
 	time_buf = softc->util_sc.receive_buffer;
 
 	ret = hv_vmbus_channel_recv_packet(channel, time_buf,
 		PAGE_SIZE, &recvlen, &requestId);
 
 	if ((ret == 0) && recvlen > 0) {
 	    icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &time_buf[
 		sizeof(struct hv_vmbus_pipe_hdr)];
 
 	    if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
 		hv_negotiate_version(icmsghdrp, NULL, time_buf);
 	    } else {
 		timedatap = (struct hv_ictimesync_data *) &time_buf[
 		    sizeof(struct hv_vmbus_pipe_hdr) +
 			sizeof(struct hv_vmbus_icmsg_hdr)];
 		hv_adj_guesttime(softc, timedatap->parenttime, timedatap->flags);
 	    }
 
 	    icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION
 		| HV_ICMSGHDRFLAG_RESPONSE;
 
 	    hv_vmbus_channel_send_packet(channel, time_buf,
 		recvlen, requestId,
 		HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 	}
 }
 
 static int
 hv_timesync_probe(device_t dev)
 {
 	const char *p = vmbus_get_type(dev);
 
 	if (resource_disabled("hvtimesync", 0))
 		return ENXIO;
 
 	if (!memcmp(p, &service_guid, sizeof(hv_guid))) {
 		device_set_desc(dev, "Hyper-V Time Synch Service");
 		return BUS_PROBE_DEFAULT;
 	}
 
 	return ENXIO;
 }
 
 static int
 hv_timesync_attach(device_t dev)
 {
 	hv_timesync_sc *softc = device_get_softc(dev);
 
 	softc->util_sc.callback = hv_timesync_cb;
 	TASK_INIT(&softc->task, 1, hv_set_host_time, softc);
 
 	return hv_util_attach(dev);
 }
 
 static int
 hv_timesync_detach(device_t dev)
 {
 	hv_timesync_sc *softc = device_get_softc(dev);
 	taskqueue_drain(taskqueue_thread, &softc->task);
 
 	return hv_util_detach(dev);
 }
 
 static device_method_t timesync_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, hv_timesync_probe),
 	DEVMETHOD(device_attach, hv_timesync_attach),
 	DEVMETHOD(device_detach, hv_timesync_detach),
 	{ 0, 0 }
 };
 
 static driver_t timesync_driver = { "hvtimesync", timesync_methods, sizeof(hv_timesync_sc)};
 
 static devclass_t timesync_devclass;
 
 DRIVER_MODULE(hv_timesync, vmbus, timesync_driver, timesync_devclass, NULL, NULL);
 MODULE_VERSION(hv_timesync, 1);
 MODULE_DEPEND(hv_timesync, vmbus, 1, 1, 1);
Index: head/sys/dev/hyperv/utilities/hv_util.c
===================================================================
--- head/sys/dev/hyperv/utilities/hv_util.c	(revision 298445)
+++ head/sys/dev/hyperv/utilities/hv_util.c	(revision 298446)
@@ -1,123 +1,123 @@
 /*-
- * Copyright (c) 2014 Microsoft Corp.
+ * Copyright (c) 2014,2016 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * A common driver for all hyper-V util services.
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/reboot.h>
 #include <sys/timetc.h>
 #include <sys/syscallsubr.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include "hv_util.h"
 
 void
 hv_negotiate_version(
 	struct hv_vmbus_icmsg_hdr*		icmsghdrp,
 	struct hv_vmbus_icmsg_negotiate*	negop,
 	uint8_t*				buf)
 {
 	icmsghdrp->icmsgsize = 0x10;
 
 	negop = (struct hv_vmbus_icmsg_negotiate *)&buf[
 		sizeof(struct hv_vmbus_pipe_hdr) +
 		sizeof(struct hv_vmbus_icmsg_hdr)];
 
 	if (negop->icframe_vercnt >= 2 &&
 	    negop->icversion_data[1].major == 3) {
 		negop->icversion_data[0].major = 3;
 		negop->icversion_data[0].minor = 0;
 		negop->icversion_data[1].major = 3;
 		negop->icversion_data[1].minor = 0;
 	} else {
 		negop->icversion_data[0].major = 1;
 		negop->icversion_data[0].minor = 0;
 		negop->icversion_data[1].major = 1;
 		negop->icversion_data[1].minor = 0;
 	}
 
 	negop->icframe_vercnt = 1;
 	negop->icmsg_vercnt = 1;
 }
 
 int
 hv_util_attach(device_t dev)
 {
 	struct hv_device*	hv_dev;
 	struct hv_util_sc*	softc;
 	int			ret;
 
 	hv_dev = vmbus_get_devctx(dev);
 	softc = device_get_softc(dev);
 	softc->hv_dev = hv_dev;
 	softc->receive_buffer =
 		malloc(4 * PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
 
 	/*
 	 * These services are not performance critical and do not need
 	 * batched reading. Furthermore, some services such as KVP can
 	 * only handle one message from the host at a time.
 	 * Turn off batched reading for all util drivers before we open the
 	 * channel.
 	 */
 	hv_set_channel_read_state(hv_dev->channel, FALSE);
 
 	ret = hv_vmbus_channel_open(hv_dev->channel, 4 * PAGE_SIZE,
 			4 * PAGE_SIZE, NULL, 0,
 			softc->callback, softc);
 
 	if (ret)
 		goto error0;
 
 	return (0);
 
 error0:
 	free(softc->receive_buffer, M_DEVBUF);
 	return (ret);
 }
 
 int
 hv_util_detach(device_t dev)
 {
 	struct hv_device*	hv_dev;
 	struct hv_util_sc*	softc;
 
 	hv_dev = vmbus_get_devctx(dev);
 
 	hv_vmbus_channel_close(hv_dev->channel);
 	softc = device_get_softc(dev);
 
 	free(softc->receive_buffer, M_DEVBUF);
 	return (0);
 }
Index: head/sys/dev/hyperv/utilities/hv_util.h
===================================================================
--- head/sys/dev/hyperv/utilities/hv_util.h	(revision 298445)
+++ head/sys/dev/hyperv/utilities/hv_util.h	(revision 298446)
@@ -1,55 +1,55 @@
 /*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _HVUTIL_H_
 #define _HVUTIL_H_
 
 /**
  * hv_util related structures
  *
  */
 typedef struct hv_util_sc {
 	/*
 	 * function to process Hyper-V messages
 	 */
 	void (*callback)(void *);
 
 	struct hv_device*	hv_dev;
 	uint8_t			*receive_buffer;
 } hv_util_sc;
 
 void hv_negotiate_version(
 	struct hv_vmbus_icmsg_hdr*		icmsghdrp,
 	struct hv_vmbus_icmsg_negotiate*	negop,
 	uint8_t*				buf);
 
 int hv_util_attach(device_t dev);
 int hv_util_detach(device_t dev);
 #endif
Index: head/sys/dev/hyperv/vmbus/hv_channel.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_channel.c	(revision 298445)
+++ head/sys/dev/hyperv/vmbus/hv_channel.c	(revision 298446)
@@ -1,1028 +1,1028 @@
 /*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <machine/bus.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include "hv_vmbus_priv.h"
 
 static int 	vmbus_channel_create_gpadl_header(
 			/* must be phys and virt contiguous*/
 			void*				contig_buffer,
 			/* page-size multiple */
 			uint32_t 			size,
 			hv_vmbus_channel_msg_info**	msg_info,
 			uint32_t*			message_count);
 
 static void 	vmbus_channel_set_event(hv_vmbus_channel* channel);
 static void	VmbusProcessChannelEvent(void* channel, int pending);
 
 /**
  *  @brief Trigger an event notification on the specified channel
  */
 static void
 vmbus_channel_set_event(hv_vmbus_channel *channel)
 {
 	hv_vmbus_monitor_page *monitor_page;
 
 	if (channel->offer_msg.monitor_allocated) {
 		/* Each uint32_t represents 32 channels */
 		synch_set_bit((channel->offer_msg.child_rel_id & 31),
 			((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
 				+ ((channel->offer_msg.child_rel_id >> 5))));
 
 		monitor_page = (hv_vmbus_monitor_page *)
 			hv_vmbus_g_connection.monitor_page_2;
 
 		synch_set_bit(channel->monitor_bit,
 			(uint32_t *)&monitor_page->
 				trigger_group[channel->monitor_group].u.pending);
 	} else {
 		hv_vmbus_set_event(channel);
 	}
 
 }
 
 static int
 vmbus_channel_sysctl_monalloc(SYSCTL_HANDLER_ARGS)
 {
 	struct hv_vmbus_channel *chan = arg1;
 	int alloc = 0;
 
 	if (chan->offer_msg.monitor_allocated)
 		alloc = 1;
 	return sysctl_handle_int(oidp, &alloc, 0, req);
 }
 
 static void
 vmbus_channel_sysctl_create(hv_vmbus_channel* channel)
 {
 	device_t dev;
 	struct sysctl_oid *devch_sysctl;
 	struct sysctl_oid *devch_id_sysctl, *devch_sub_sysctl;
 	struct sysctl_oid *devch_id_in_sysctl, *devch_id_out_sysctl;
 	struct sysctl_ctx_list *ctx;
 	uint32_t ch_id;
 	uint16_t sub_ch_id;
 	char name[16];
 	
 	hv_vmbus_channel* primary_ch = channel->primary_channel;
 
 	if (primary_ch == NULL) {
 		dev = channel->device->device;
 		ch_id = channel->offer_msg.child_rel_id;
 	} else {
 		dev = primary_ch->device->device;
 		ch_id = primary_ch->offer_msg.child_rel_id;
 		sub_ch_id = channel->offer_msg.offer.sub_channel_index;
 	}
 	ctx = device_get_sysctl_ctx(dev);
 	/* This creates dev.DEVNAME.DEVUNIT.channel tree */
 	devch_sysctl = SYSCTL_ADD_NODE(ctx,
 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 		    OID_AUTO, "channel", CTLFLAG_RD, 0, "");
 	/* This creates dev.DEVNAME.DEVUNIT.channel.CHANID tree */
 	snprintf(name, sizeof(name), "%d", ch_id);
 	devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
 	    	    SYSCTL_CHILDREN(devch_sysctl),
 	    	    OID_AUTO, name, CTLFLAG_RD, 0, "");
 
 	if (primary_ch != NULL) {
 		devch_sub_sysctl = SYSCTL_ADD_NODE(ctx,
 			SYSCTL_CHILDREN(devch_id_sysctl),
 			OID_AUTO, "sub", CTLFLAG_RD, 0, "");
 		snprintf(name, sizeof(name), "%d", sub_ch_id);
 		devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
 			SYSCTL_CHILDREN(devch_sub_sysctl),
 			OID_AUTO, name, CTLFLAG_RD, 0, "");
 
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl),
 		    OID_AUTO, "chanid", CTLFLAG_RD,
 		    &channel->offer_msg.child_rel_id, 0, "channel id");
 	}
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
 	    "cpu", CTLFLAG_RD, &channel->target_cpu, 0, "owner CPU id");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
 	    "monitor_allocated", CTLTYPE_INT | CTLFLAG_RD, channel, 0,
 	    vmbus_channel_sysctl_monalloc, "I",
 	    "is monitor allocated to this channel");
 
 	devch_id_in_sysctl = SYSCTL_ADD_NODE(ctx,
                     SYSCTL_CHILDREN(devch_id_sysctl),
                     OID_AUTO,
 		    "in",
 		    CTLFLAG_RD, 0, "");
 	devch_id_out_sysctl = SYSCTL_ADD_NODE(ctx,
                     SYSCTL_CHILDREN(devch_id_sysctl),
                     OID_AUTO,
 		    "out",
 		    CTLFLAG_RD, 0, "");
 	hv_ring_buffer_stat(ctx,
 		SYSCTL_CHILDREN(devch_id_in_sysctl),
 		&(channel->inbound),
 		"inbound ring buffer stats");
 	hv_ring_buffer_stat(ctx,
 		SYSCTL_CHILDREN(devch_id_out_sysctl),
 		&(channel->outbound),
 		"outbound ring buffer stats");
 }
 
 /**
  * @brief Open the specified channel
  */
 int
 hv_vmbus_channel_open(
 	hv_vmbus_channel*		new_channel,
 	uint32_t			send_ring_buffer_size,
 	uint32_t			recv_ring_buffer_size,
 	void*				user_data,
 	uint32_t			user_data_len,
 	hv_vmbus_pfn_channel_callback	pfn_on_channel_callback,
 	void* 				context)
 {
 
 	int ret = 0;
 	void *in, *out;
 	hv_vmbus_channel_open_channel*	open_msg;
 	hv_vmbus_channel_msg_info* 	open_info;
 
 	mtx_lock(&new_channel->sc_lock);
 	if (new_channel->state == HV_CHANNEL_OPEN_STATE) {
 	    new_channel->state = HV_CHANNEL_OPENING_STATE;
 	} else {
 	    mtx_unlock(&new_channel->sc_lock);
 	    if(bootverbose)
 		printf("VMBUS: Trying to open channel <%p> which in "
 		    "%d state.\n", new_channel, new_channel->state);
 	    return (EINVAL);
 	}
 	mtx_unlock(&new_channel->sc_lock);
 
 	new_channel->on_channel_callback = pfn_on_channel_callback;
 	new_channel->channel_callback_context = context;
 
 	new_channel->rxq = hv_vmbus_g_context.hv_event_queue[new_channel->target_cpu];
 	TASK_INIT(&new_channel->channel_task, 0, VmbusProcessChannelEvent, new_channel);
 
 	/* Allocate the ring buffer */
 	out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size),
 	    M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
 	KASSERT(out != NULL,
 	    ("Error VMBUS: contigmalloc failed to allocate Ring Buffer!"));
 	if (out == NULL)
 		return (ENOMEM);
 
 	in = ((uint8_t *) out + send_ring_buffer_size);
 
 	new_channel->ring_buffer_pages = out;
 	new_channel->ring_buffer_page_count = (send_ring_buffer_size +
 	    recv_ring_buffer_size) >> PAGE_SHIFT;
 	new_channel->ring_buffer_size = send_ring_buffer_size +
 	    recv_ring_buffer_size;
 
 	hv_vmbus_ring_buffer_init(
 		&new_channel->outbound,
 		out,
 		send_ring_buffer_size);
 
 	hv_vmbus_ring_buffer_init(
 		&new_channel->inbound,
 		in,
 		recv_ring_buffer_size);
 
 	/* Create sysctl tree for this channel */
 	vmbus_channel_sysctl_create(new_channel);
 
 	/**
 	 * Establish the gpadl for the ring buffer
 	 */
 	new_channel->ring_buffer_gpadl_handle = 0;
 
 	ret = hv_vmbus_channel_establish_gpadl(new_channel,
 		new_channel->outbound.ring_buffer,
 		send_ring_buffer_size + recv_ring_buffer_size,
 		&new_channel->ring_buffer_gpadl_handle);
 
 	/**
 	 * Create and init the channel open message
 	 */
 	open_info = (hv_vmbus_channel_msg_info*) malloc(
 		sizeof(hv_vmbus_channel_msg_info) +
 			sizeof(hv_vmbus_channel_open_channel),
 		M_DEVBUF,
 		M_NOWAIT);
 	KASSERT(open_info != NULL,
 	    ("Error VMBUS: malloc failed to allocate Open Channel message!"));
 
 	if (open_info == NULL)
 		return (ENOMEM);
 
 	sema_init(&open_info->wait_sema, 0, "Open Info Sema");
 
 	open_msg = (hv_vmbus_channel_open_channel*) open_info->msg;
 	open_msg->header.message_type = HV_CHANNEL_MESSAGE_OPEN_CHANNEL;
 	open_msg->open_id = new_channel->offer_msg.child_rel_id;
 	open_msg->child_rel_id = new_channel->offer_msg.child_rel_id;
 	open_msg->ring_buffer_gpadl_handle =
 		new_channel->ring_buffer_gpadl_handle;
 	open_msg->downstream_ring_buffer_page_offset = send_ring_buffer_size
 		>> PAGE_SHIFT;
 	open_msg->target_vcpu = new_channel->target_vcpu;
 
 	if (user_data_len)
 		memcpy(open_msg->user_data, user_data, user_data_len);
 
 	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_INSERT_TAIL(
 		&hv_vmbus_g_connection.channel_msg_anchor,
 		open_info,
 		msg_list_entry);
 	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
 
 	ret = hv_vmbus_post_message(
 		open_msg, sizeof(hv_vmbus_channel_open_channel));
 
 	if (ret != 0)
 	    goto cleanup;
 
 	ret = sema_timedwait(&open_info->wait_sema, 5 * hz); /* KYS 5 seconds */
 
 	if (ret) {
 	    if(bootverbose)
 		printf("VMBUS: channel <%p> open timeout.\n", new_channel);
 	    goto cleanup;
 	}
 
 	if (open_info->response.open_result.status == 0) {
 	    new_channel->state = HV_CHANNEL_OPENED_STATE;
 	    if(bootverbose)
 		printf("VMBUS: channel <%p> open success.\n", new_channel);
 	} else {
 	    if(bootverbose)
 		printf("Error VMBUS: channel <%p> open failed - %d!\n",
 			new_channel, open_info->response.open_result.status);
 	}
 
 	cleanup:
 	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_REMOVE(
 		&hv_vmbus_g_connection.channel_msg_anchor,
 		open_info,
 		msg_list_entry);
 	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
 	sema_destroy(&open_info->wait_sema);
 	free(open_info, M_DEVBUF);
 
 	return (ret);
 }
 
 /**
  * @brief Create a gpadl for the specified buffer
  */
 static int
 vmbus_channel_create_gpadl_header(
 	void*				contig_buffer,
 	uint32_t			size,	/* page-size multiple */
 	hv_vmbus_channel_msg_info**	msg_info,
 	uint32_t*			message_count)
 {
 	int				i;
 	int				page_count;
 	unsigned long long 		pfn;
 	uint32_t			msg_size;
 	hv_vmbus_channel_gpadl_header*	gpa_header;
 	hv_vmbus_channel_gpadl_body*	gpadl_body;
 	hv_vmbus_channel_msg_info*	msg_header;
 	hv_vmbus_channel_msg_info*	msg_body;
 
 	int pfnSum, pfnCount, pfnLeft, pfnCurr, pfnSize;
 
 	page_count = size >> PAGE_SHIFT;
 	pfn = hv_get_phys_addr(contig_buffer) >> PAGE_SHIFT;
 
 	/*do we need a gpadl body msg */
 	pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE
 	    - sizeof(hv_vmbus_channel_gpadl_header)
 	    - sizeof(hv_gpa_range);
 	pfnCount = pfnSize / sizeof(uint64_t);
 
 	if (page_count > pfnCount) { /* if(we need a gpadl body)	*/
 	    /* fill in the header		*/
 	    msg_size = sizeof(hv_vmbus_channel_msg_info)
 		+ sizeof(hv_vmbus_channel_gpadl_header)
 		+ sizeof(hv_gpa_range)
 		+ pfnCount * sizeof(uint64_t);
 	    msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
 	    KASSERT(
 		msg_header != NULL,
 		("Error VMBUS: malloc failed to allocate Gpadl Message!"));
 	    if (msg_header == NULL)
 		return (ENOMEM);
 
 	    TAILQ_INIT(&msg_header->sub_msg_list_anchor);
 	    msg_header->message_size = msg_size;
 
 	    gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg;
 	    gpa_header->range_count = 1;
 	    gpa_header->range_buf_len = sizeof(hv_gpa_range)
 		+ page_count * sizeof(uint64_t);
 	    gpa_header->range[0].byte_offset = 0;
 	    gpa_header->range[0].byte_count = size;
 	    for (i = 0; i < pfnCount; i++) {
 		gpa_header->range[0].pfn_array[i] = pfn + i;
 	    }
 	    *msg_info = msg_header;
 	    *message_count = 1;
 
 	    pfnSum = pfnCount;
 	    pfnLeft = page_count - pfnCount;
 
 	    /*
 	     *  figure out how many pfns we can fit
 	     */
 	    pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE
 		- sizeof(hv_vmbus_channel_gpadl_body);
 	    pfnCount = pfnSize / sizeof(uint64_t);
 
 	    /*
 	     * fill in the body
 	     */
 	    while (pfnLeft) {
 		if (pfnLeft > pfnCount) {
 		    pfnCurr = pfnCount;
 		} else {
 		    pfnCurr = pfnLeft;
 		}
 
 		msg_size = sizeof(hv_vmbus_channel_msg_info) +
 		    sizeof(hv_vmbus_channel_gpadl_body) +
 		    pfnCurr * sizeof(uint64_t);
 		msg_body = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
 		KASSERT(
 		    msg_body != NULL,
 		    ("Error VMBUS: malloc failed to allocate Gpadl msg_body!"));
 		if (msg_body == NULL)
 		    return (ENOMEM);
 
 		msg_body->message_size = msg_size;
 		(*message_count)++;
 		gpadl_body =
 		    (hv_vmbus_channel_gpadl_body*) msg_body->msg;
 		/*
 		 * gpadl_body->gpadl = kbuffer;
 		 */
 		for (i = 0; i < pfnCurr; i++) {
 		    gpadl_body->pfn[i] = pfn + pfnSum + i;
 		}
 
 		TAILQ_INSERT_TAIL(
 		    &msg_header->sub_msg_list_anchor,
 		    msg_body,
 		    msg_list_entry);
 		pfnSum += pfnCurr;
 		pfnLeft -= pfnCurr;
 	    }
 	} else { /* else everything fits in a header */
 
 	    msg_size = sizeof(hv_vmbus_channel_msg_info) +
 		sizeof(hv_vmbus_channel_gpadl_header) +
 		sizeof(hv_gpa_range) +
 		page_count * sizeof(uint64_t);
 	    msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
 	    KASSERT(
 		msg_header != NULL,
 		("Error VMBUS: malloc failed to allocate Gpadl Message!"));
 	    if (msg_header == NULL)
 		return (ENOMEM);
 
 	    msg_header->message_size = msg_size;
 
 	    gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg;
 	    gpa_header->range_count = 1;
 	    gpa_header->range_buf_len = sizeof(hv_gpa_range) +
 		page_count * sizeof(uint64_t);
 	    gpa_header->range[0].byte_offset = 0;
 	    gpa_header->range[0].byte_count = size;
 	    for (i = 0; i < page_count; i++) {
 		gpa_header->range[0].pfn_array[i] = pfn + i;
 	    }
 
 	    *msg_info = msg_header;
 	    *message_count = 1;
 	}
 
 	return (0);
 }
 
 /**
  * @brief Establish a GPADL for the specified buffer
  */
 int
 hv_vmbus_channel_establish_gpadl(
 	hv_vmbus_channel*	channel,
 	void*			contig_buffer,
 	uint32_t		size, /* page-size multiple */
 	uint32_t*		gpadl_handle)
 
 {
 	int ret = 0;
 	hv_vmbus_channel_gpadl_header*	gpadl_msg;
 	hv_vmbus_channel_gpadl_body*	gpadl_body;
 	hv_vmbus_channel_msg_info*	msg_info;
 	hv_vmbus_channel_msg_info*	sub_msg_info;
 	uint32_t			msg_count;
 	hv_vmbus_channel_msg_info*	curr;
 	uint32_t			next_gpadl_handle;
 
 	next_gpadl_handle = atomic_fetchadd_int(
 	    &hv_vmbus_g_connection.next_gpadl_handle, 1);
 
 	ret = vmbus_channel_create_gpadl_header(
 		contig_buffer, size, &msg_info, &msg_count);
 
 	if(ret != 0) {
 		/*
 		 * XXX
 		 * We can _not_ even revert the above incremental,
 		 * if multiple GPADL establishments are running
 		 * parallelly, decrement the global next_gpadl_handle
 		 * is calling for _big_ trouble.  A better solution
 		 * is to have a 0-based GPADL id bitmap ...
 		 */
 		return ret;
 	}
 
 	sema_init(&msg_info->wait_sema, 0, "Open Info Sema");
 	gpadl_msg = (hv_vmbus_channel_gpadl_header*) msg_info->msg;
 	gpadl_msg->header.message_type = HV_CHANNEL_MESSAGEL_GPADL_HEADER;
 	gpadl_msg->child_rel_id = channel->offer_msg.child_rel_id;
 	gpadl_msg->gpadl = next_gpadl_handle;
 
 	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_INSERT_TAIL(
 		&hv_vmbus_g_connection.channel_msg_anchor,
 		msg_info,
 		msg_list_entry);
 
 	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
 
 	ret = hv_vmbus_post_message(
 		gpadl_msg,
 		msg_info->message_size -
 		    (uint32_t) sizeof(hv_vmbus_channel_msg_info));
 
 	if (ret != 0)
 	    goto cleanup;
 
 	if (msg_count > 1) {
 	    TAILQ_FOREACH(curr,
 		    &msg_info->sub_msg_list_anchor, msg_list_entry) {
 		sub_msg_info = curr;
 		gpadl_body =
 		    (hv_vmbus_channel_gpadl_body*) sub_msg_info->msg;
 
 		gpadl_body->header.message_type =
 		    HV_CHANNEL_MESSAGE_GPADL_BODY;
 		gpadl_body->gpadl = next_gpadl_handle;
 
 		ret = hv_vmbus_post_message(
 			gpadl_body,
 			sub_msg_info->message_size
 			    - (uint32_t) sizeof(hv_vmbus_channel_msg_info));
 		 /* if (the post message failed) give up and clean up */
 		if(ret != 0)
 		    goto cleanup;
 	    }
 	}
 
 	ret = sema_timedwait(&msg_info->wait_sema, 5 * hz); /* KYS 5 seconds*/
 	if (ret != 0)
 	    goto cleanup;
 
 	*gpadl_handle = gpadl_msg->gpadl;
 
 cleanup:
 
 	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
 		msg_info, msg_list_entry);
 	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
 
 	sema_destroy(&msg_info->wait_sema);
 	free(msg_info, M_DEVBUF);
 
 	return (ret);
 }
 
 /**
  * @brief Teardown the specified GPADL handle
  */
 int
 hv_vmbus_channel_teardown_gpdal(
 	hv_vmbus_channel*	channel,
 	uint32_t		gpadl_handle)
 {
 	int					ret = 0;
 	hv_vmbus_channel_gpadl_teardown*	msg;
 	hv_vmbus_channel_msg_info*		info;
 
 	info = (hv_vmbus_channel_msg_info *)
 		malloc(	sizeof(hv_vmbus_channel_msg_info) +
 			sizeof(hv_vmbus_channel_gpadl_teardown),
 				M_DEVBUF, M_NOWAIT);
 	KASSERT(info != NULL,
 	    ("Error VMBUS: malloc failed to allocate Gpadl Teardown Msg!"));
 	if (info == NULL) {
 	    ret = ENOMEM;
 	    goto cleanup;
 	}
 
 	sema_init(&info->wait_sema, 0, "Open Info Sema");
 
 	msg = (hv_vmbus_channel_gpadl_teardown*) info->msg;
 
 	msg->header.message_type = HV_CHANNEL_MESSAGE_GPADL_TEARDOWN;
 	msg->child_rel_id = channel->offer_msg.child_rel_id;
 	msg->gpadl = gpadl_handle;
 
 	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_msg_anchor,
 			info, msg_list_entry);
 	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
 
 	ret = hv_vmbus_post_message(msg,
 			sizeof(hv_vmbus_channel_gpadl_teardown));
 	if (ret != 0) 
 	    goto cleanup;
 	
 	ret = sema_timedwait(&info->wait_sema, 5 * hz); /* KYS 5 seconds */
 
 cleanup:
 	/*
 	 * Received a torndown response
 	 */
 	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
 			info, msg_list_entry);
 	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
 	sema_destroy(&info->wait_sema);
 	free(info, M_DEVBUF);
 
 	return (ret);
 }
 
 static void
 hv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
 {
 	int ret = 0;
 	struct taskqueue *rxq = channel->rxq;
 	hv_vmbus_channel_close_channel* msg;
 	hv_vmbus_channel_msg_info* info;
 
 	channel->state = HV_CHANNEL_OPEN_STATE;
 	channel->sc_creation_callback = NULL;
 
 	/*
 	 * set rxq to NULL to avoid more requests be scheduled
 	 */
 	channel->rxq = NULL;
 	taskqueue_drain(rxq, &channel->channel_task);
 	channel->on_channel_callback = NULL;
 
 	/**
 	 * Send a closing message
 	 */
 	info = (hv_vmbus_channel_msg_info *)
 		malloc(	sizeof(hv_vmbus_channel_msg_info) +
 			sizeof(hv_vmbus_channel_close_channel),
 				M_DEVBUF, M_NOWAIT);
 	KASSERT(info != NULL, ("VMBUS: malloc failed hv_vmbus_channel_close!"));
 	if(info == NULL)
 	    return;
 
 	msg = (hv_vmbus_channel_close_channel*) info->msg;
 	msg->header.message_type = HV_CHANNEL_MESSAGE_CLOSE_CHANNEL;
 	msg->child_rel_id = channel->offer_msg.child_rel_id;
 
 	ret = hv_vmbus_post_message(
 		msg, sizeof(hv_vmbus_channel_close_channel));
 
 	/* Tear down the gpadl for the channel's ring buffer */
 	if (channel->ring_buffer_gpadl_handle) {
 		hv_vmbus_channel_teardown_gpdal(channel,
 			channel->ring_buffer_gpadl_handle);
 	}
 
 	/* TODO: Send a msg to release the childRelId */
 
 	/* cleanup the ring buffers for this channel */
 	hv_ring_buffer_cleanup(&channel->outbound);
 	hv_ring_buffer_cleanup(&channel->inbound);
 
 	contigfree(channel->ring_buffer_pages, channel->ring_buffer_size,
 	    M_DEVBUF);
 
 	free(info, M_DEVBUF);
 }
 
 /**
  * @brief Close the specified channel
  */
 void
 hv_vmbus_channel_close(hv_vmbus_channel *channel)
 {
 	hv_vmbus_channel*	sub_channel;
 
 	if (channel->primary_channel != NULL) {
 		/*
 		 * We only close multi-channels when the primary is
 		 * closed.
 		 */
 		return;
 	}
 
 	/*
 	 * Close all multi-channels first.
 	 */
 	TAILQ_FOREACH(sub_channel, &channel->sc_list_anchor,
 	    sc_list_entry) {
 		if (sub_channel->state != HV_CHANNEL_OPENED_STATE)
 			continue;
 		hv_vmbus_channel_close_internal(sub_channel);
 	}
 	/*
 	 * Then close the primary channel.
 	 */
 	hv_vmbus_channel_close_internal(channel);
 }
 
 /**
  * @brief Send the specified buffer on the given channel
  */
 int
 hv_vmbus_channel_send_packet(
 	hv_vmbus_channel*	channel,
 	void*			buffer,
 	uint32_t		buffer_len,
 	uint64_t		request_id,
 	hv_vmbus_packet_type	type,
 	uint32_t		flags)
 {
 	int			ret = 0;
 	hv_vm_packet_descriptor	desc;
 	uint32_t		packet_len;
 	uint64_t		aligned_data;
 	uint32_t		packet_len_aligned;
 	boolean_t		need_sig;
 	hv_vmbus_sg_buffer_list	buffer_list[3];
 
 	packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len;
 	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
 	aligned_data = 0;
 
 	/* Setup the descriptor */
 	desc.type = type;   /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND;             */
 	desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
 			    /* in 8-bytes granularity */
 	desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3;
 	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
 	desc.transaction_id = request_id;
 
 	buffer_list[0].data = &desc;
 	buffer_list[0].length = sizeof(hv_vm_packet_descriptor);
 
 	buffer_list[1].data = buffer;
 	buffer_list[1].length = buffer_len;
 
 	buffer_list[2].data = &aligned_data;
 	buffer_list[2].length = packet_len_aligned - packet_len;
 
 	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
 	    &need_sig);
 
 	/* TODO: We should determine if this is optional */
 	if (ret == 0 && need_sig) {
 		vmbus_channel_set_event(channel);
 	}
 
 	return (ret);
 }
 
 /**
  * @brief Send a range of single-page buffer packets using
  * a GPADL Direct packet type
  */
 int
 hv_vmbus_channel_send_packet_pagebuffer(
 	hv_vmbus_channel*	channel,
 	hv_vmbus_page_buffer	page_buffers[],
 	uint32_t		page_count,
 	void*			buffer,
 	uint32_t		buffer_len,
 	uint64_t		request_id)
 {
 
 	int					ret = 0;
 	boolean_t				need_sig;
 	uint32_t				packet_len;
 	uint32_t				page_buflen;
 	uint32_t				packetLen_aligned;
 	hv_vmbus_sg_buffer_list			buffer_list[4];
 	hv_vmbus_channel_packet_page_buffer	desc;
 	uint32_t				descSize;
 	uint64_t				alignedData = 0;
 
 	if (page_count > HV_MAX_PAGE_BUFFER_COUNT)
 		return (EINVAL);
 
 	/*
 	 * Adjust the size down since hv_vmbus_channel_packet_page_buffer
 	 *  is the largest size we support
 	 */
 	descSize = __offsetof(hv_vmbus_channel_packet_page_buffer, range);
 	page_buflen = sizeof(hv_vmbus_page_buffer) * page_count;
 	packet_len = descSize + page_buflen + buffer_len;
 	packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
 
 	/* Setup the descriptor */
 	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
 	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
 	/* in 8-bytes granularity */
 	desc.data_offset8 = (descSize + page_buflen) >> 3;
 	desc.length8 = (uint16_t) (packetLen_aligned >> 3);
 	desc.transaction_id = request_id;
 	desc.range_count = page_count;
 
 	buffer_list[0].data = &desc;
 	buffer_list[0].length = descSize;
 
 	buffer_list[1].data = page_buffers;
 	buffer_list[1].length = page_buflen;
 
 	buffer_list[2].data = buffer;
 	buffer_list[2].length = buffer_len;
 
 	buffer_list[3].data = &alignedData;
 	buffer_list[3].length = packetLen_aligned - packet_len;
 
 	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 4,
 	    &need_sig);
 
 	/* TODO: We should determine if this is optional */
 	if (ret == 0 && need_sig) {
 		vmbus_channel_set_event(channel);
 	}
 
 	return (ret);
 }
 
 /**
  * @brief Send a multi-page buffer packet using a GPADL Direct packet type
  */
 int
 hv_vmbus_channel_send_packet_multipagebuffer(
 	hv_vmbus_channel*		channel,
 	hv_vmbus_multipage_buffer*	multi_page_buffer,
 	void*				buffer,
 	uint32_t			buffer_len,
 	uint64_t			request_id)
 {
 
 	int			ret = 0;
 	uint32_t		desc_size;
 	boolean_t		need_sig;
 	uint32_t		packet_len;
 	uint32_t		packet_len_aligned;
 	uint32_t		pfn_count;
 	uint64_t		aligned_data = 0;
 	hv_vmbus_sg_buffer_list	buffer_list[3];
 	hv_vmbus_channel_packet_multipage_buffer desc;
 
 	pfn_count =
 	    HV_NUM_PAGES_SPANNED(
 		    multi_page_buffer->offset,
 		    multi_page_buffer->length);
 
 	if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT))
 	    return (EINVAL);
 	/*
 	 * Adjust the size down since hv_vmbus_channel_packet_multipage_buffer
 	 * is the largest size we support
 	 */
 	desc_size =
 	    sizeof(hv_vmbus_channel_packet_multipage_buffer) -
 		    ((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) *
 			sizeof(uint64_t));
 	packet_len = desc_size + buffer_len;
 	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
 
 	/*
 	 * Setup the descriptor
 	 */
 	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
 	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
 	desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */
 	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
 	desc.transaction_id = request_id;
 	desc.range_count = 1;
 
 	desc.range.length = multi_page_buffer->length;
 	desc.range.offset = multi_page_buffer->offset;
 
 	memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array,
 		pfn_count * sizeof(uint64_t));
 
 	buffer_list[0].data = &desc;
 	buffer_list[0].length = desc_size;
 
 	buffer_list[1].data = buffer;
 	buffer_list[1].length = buffer_len;
 
 	buffer_list[2].data = &aligned_data;
 	buffer_list[2].length = packet_len_aligned - packet_len;
 
 	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
 	    &need_sig);
 
 	/* TODO: We should determine if this is optional */
 	if (ret == 0 && need_sig) {
 	    vmbus_channel_set_event(channel);
 	}
 
 	return (ret);
 }
 
 /**
  * @brief Retrieve the user packet on the specified channel
  */
 int
 hv_vmbus_channel_recv_packet(
 	hv_vmbus_channel*	channel,
 	void*			Buffer,
 	uint32_t		buffer_len,
 	uint32_t*		buffer_actual_len,
 	uint64_t*		request_id)
 {
 	int			ret;
 	uint32_t		user_len;
 	uint32_t		packet_len;
 	hv_vm_packet_descriptor	desc;
 
 	*buffer_actual_len = 0;
 	*request_id = 0;
 
 	ret = hv_ring_buffer_peek(&channel->inbound, &desc,
 		sizeof(hv_vm_packet_descriptor));
 	if (ret != 0)
 		return (0);
 
 	packet_len = desc.length8 << 3;
 	user_len = packet_len - (desc.data_offset8 << 3);
 
 	*buffer_actual_len = user_len;
 
 	if (user_len > buffer_len)
 		return (EINVAL);
 
 	*request_id = desc.transaction_id;
 
 	/* Copy over the packet to the user buffer */
 	ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len,
 		(desc.data_offset8 << 3));
 
 	return (0);
 }
 
 /**
  * @brief Retrieve the raw packet on the specified channel
  */
 int
 hv_vmbus_channel_recv_packet_raw(
 	hv_vmbus_channel*	channel,
 	void*			buffer,
 	uint32_t		buffer_len,
 	uint32_t*		buffer_actual_len,
 	uint64_t*		request_id)
 {
 	int		ret;
 	uint32_t	packetLen;
 	hv_vm_packet_descriptor	desc;
 
 	*buffer_actual_len = 0;
 	*request_id = 0;
 
 	ret = hv_ring_buffer_peek(
 		&channel->inbound, &desc,
 		sizeof(hv_vm_packet_descriptor));
 
 	if (ret != 0)
 	    return (0);
 
 	packetLen = desc.length8 << 3;
 	*buffer_actual_len = packetLen;
 
 	if (packetLen > buffer_len)
 	    return (ENOBUFS);
 
 	*request_id = desc.transaction_id;
 
 	/* Copy over the entire packet to the user buffer */
 	ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0);
 
 	return (0);
 }
 
 
 /**
  * Process a channel event notification
  */
 static void
 VmbusProcessChannelEvent(void* context, int pending)
 {
 	void* arg;
 	uint32_t bytes_to_read;
 	hv_vmbus_channel* channel = (hv_vmbus_channel*)context;
 	boolean_t is_batched_reading;
 
 	/**
 	 * Find the channel based on this relid and invokes
 	 * the channel callback to process the event
 	 */
 
 	if (channel == NULL) {
 		return;
 	}
 	/**
 	 * To deal with the race condition where we might
 	 * receive a packet while the relevant driver is
 	 * being unloaded, dispatch the callback while
 	 * holding the channel lock. The unloading driver
 	 * will acquire the same channel lock to set the
 	 * callback to NULL. This closes the window.
 	 */
 
 	if (channel->on_channel_callback != NULL) {
 		arg = channel->channel_callback_context;
 		is_batched_reading = channel->batched_reading;
 		/*
 		 * Optimize host to guest signaling by ensuring:
 		 * 1. While reading the channel, we disable interrupts from
 		 *    host.
 		 * 2. Ensure that we process all posted messages from the host
 		 *    before returning from this callback.
 		 * 3. Once we return, enable signaling from the host. Once this
 		 *    state is set we check to see if additional packets are
 		 *    available to read. In this case we repeat the process.
 		 */
 		do {
 			if (is_batched_reading)
 				hv_ring_buffer_read_begin(&channel->inbound);
 
 			channel->on_channel_callback(arg);
 
 			if (is_batched_reading)
 				bytes_to_read =
 				    hv_ring_buffer_read_end(&channel->inbound);
 			else
 				bytes_to_read = 0;
 		} while (is_batched_reading && (bytes_to_read != 0));
 	}
 }
Index: head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c	(revision 298445)
+++ head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c	(revision 298446)
@@ -1,788 +1,788 @@
 /*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 
 #include "hv_vmbus_priv.h"
 
 /*
  * Internal functions
  */
 
 static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_offer_internal(void* context);
 static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_offer_rescind_internal(void* context);
 static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr);
 
 /**
  * Channel message dispatch table
  */
 hv_vmbus_channel_msg_table_entry
     g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = {
 	{ HV_CHANNEL_MESSAGE_INVALID,
 		NULL },
 	{ HV_CHANNEL_MESSAGE_OFFER_CHANNEL,
 		vmbus_channel_on_offer },
 	{ HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER,
 		vmbus_channel_on_offer_rescind },
 	{ HV_CHANNEL_MESSAGE_REQUEST_OFFERS,
 		NULL },
 	{ HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED,
 		vmbus_channel_on_offers_delivered },
 	{ HV_CHANNEL_MESSAGE_OPEN_CHANNEL,
 		NULL },
 	{ HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT,
 		vmbus_channel_on_open_result },
 	{ HV_CHANNEL_MESSAGE_CLOSE_CHANNEL,
 		NULL },
 	{ HV_CHANNEL_MESSAGEL_GPADL_HEADER,
 		NULL },
 	{ HV_CHANNEL_MESSAGE_GPADL_BODY,
 		NULL },
 	{ HV_CHANNEL_MESSAGE_GPADL_CREATED,
 		vmbus_channel_on_gpadl_created },
 	{ HV_CHANNEL_MESSAGE_GPADL_TEARDOWN,
 		NULL },
 	{ HV_CHANNEL_MESSAGE_GPADL_TORNDOWN,
 		vmbus_channel_on_gpadl_torndown },
 	{ HV_CHANNEL_MESSAGE_REL_ID_RELEASED,
 		NULL },
 	{ HV_CHANNEL_MESSAGE_INITIATED_CONTACT,
 		NULL },
 	{ HV_CHANNEL_MESSAGE_VERSION_RESPONSE,
 		vmbus_channel_on_version_response },
 	{ HV_CHANNEL_MESSAGE_UNLOAD,
 		NULL }
 };
 
 typedef struct hv_work_item {
 	struct task	work;
 	void		(*callback)(void *);
 	void*		context;
 } hv_work_item;
 
 static struct mtx	vmbus_chwait_lock;
 MTX_SYSINIT(vmbus_chwait_lk, &vmbus_chwait_lock, "vmbus primarych wait lock",
     MTX_DEF);
 static uint32_t		vmbus_chancnt;
 static uint32_t		vmbus_devcnt;
 
 #define VMBUS_CHANCNT_DONE	0x80000000
 
 /**
  * Implementation of the work abstraction.
  */
 static void
 work_item_callback(void *work, int pending)
 {
 	struct hv_work_item *w = (struct hv_work_item *)work;
 
 	w->callback(w->context);
 
 	free(w, M_DEVBUF);
 }
 
 /**
  * @brief Create work item
  */
 static int
 hv_queue_work_item(
 	void (*callback)(void *), void *context)
 {
 	struct hv_work_item *w = malloc(sizeof(struct hv_work_item),
 					M_DEVBUF, M_NOWAIT);
 	KASSERT(w != NULL, ("Error VMBUS: Failed to allocate WorkItem\n"));
 	if (w == NULL)
 	    return (ENOMEM);
 
 	w->callback = callback;
 	w->context = context;
 
 	TASK_INIT(&w->work, 0, work_item_callback, w);
 
 	return (taskqueue_enqueue(taskqueue_thread, &w->work));
 }
 
 
 /**
  * @brief Allocate and initialize a vmbus channel object
  */
 hv_vmbus_channel*
 hv_vmbus_allocate_channel(void)
 {
 	hv_vmbus_channel* channel;
 
 	channel = (hv_vmbus_channel*) malloc(
 					sizeof(hv_vmbus_channel),
 					M_DEVBUF,
 					M_WAITOK | M_ZERO);
 
 	mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF);
 	TAILQ_INIT(&channel->sc_list_anchor);
 
 	return (channel);
 }
 
 /**
  * @brief Release the resources used by the vmbus channel object
  */
 void
 hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
 {
 	mtx_destroy(&channel->sc_lock);
 	free(channel, M_DEVBUF);
 }
 
 /**
  * @brief Process the offer by creating a channel/device
  * associated with this offer
  */
 static void
 vmbus_channel_process_offer(hv_vmbus_channel *new_channel)
 {
 	boolean_t		f_new;
 	hv_vmbus_channel*	channel;
 	int			ret;
 	uint32_t                relid;
 
 	f_new = TRUE;
 	channel = NULL;
 	relid = new_channel->offer_msg.child_rel_id;
 	/*
 	 * Make sure this is a new offer
 	 */
 	mtx_lock(&hv_vmbus_g_connection.channel_lock);
 	hv_vmbus_g_connection.channels[relid] = new_channel;
 
 	TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor,
 	    list_entry)
 	{
 		if (memcmp(&channel->offer_msg.offer.interface_type,
 		    &new_channel->offer_msg.offer.interface_type,
 		    sizeof(hv_guid)) == 0 &&
 		    memcmp(&channel->offer_msg.offer.interface_instance,
 		    &new_channel->offer_msg.offer.interface_instance,
 		    sizeof(hv_guid)) == 0) {
 			f_new = FALSE;
 			break;
 		}
 	}
 
 	if (f_new) {
 		/* Insert at tail */
 		TAILQ_INSERT_TAIL(
 		    &hv_vmbus_g_connection.channel_anchor,
 		    new_channel,
 		    list_entry);
 	}
 	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
 
 	/*XXX add new channel to percpu_list */
 
 	if (!f_new) {
 		/*
 		 * Check if this is a sub channel.
 		 */
 		if (new_channel->offer_msg.offer.sub_channel_index != 0) {
 			/*
 			 * It is a sub channel offer, process it.
 			 */
 			new_channel->primary_channel = channel;
 			new_channel->device = channel->device;
 			mtx_lock(&channel->sc_lock);
 			TAILQ_INSERT_TAIL(
 			    &channel->sc_list_anchor,
 			    new_channel,
 			    sc_list_entry);
 			mtx_unlock(&channel->sc_lock);
 
 			if (bootverbose) {
 				printf("VMBUS get multi-channel offer, "
 				    "rel=%u, sub=%u\n",
 				    new_channel->offer_msg.child_rel_id,
 				    new_channel->offer_msg.offer.sub_channel_index);	
 			}
 
 			/* Insert new channel into channel_anchor. */
 			mtx_lock(&hv_vmbus_g_connection.channel_lock);
 			TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor,
 			    new_channel, list_entry);				
 			mtx_unlock(&hv_vmbus_g_connection.channel_lock);
 
 			if(bootverbose)
 				printf("VMBUS: new multi-channel offer <%p>, "
 				    "its primary channel is <%p>.\n",
 				    new_channel, new_channel->primary_channel);
 
 			/*XXX add it to percpu_list */
 
 			new_channel->state = HV_CHANNEL_OPEN_STATE;
 			if (channel->sc_creation_callback != NULL) {
 				channel->sc_creation_callback(new_channel);
 			}
 			return;
 		}
 
 	    hv_vmbus_free_vmbus_channel(new_channel);
 	    return;
 	}
 
 	new_channel->state = HV_CHANNEL_OPEN_STATE;
 
 	/*
 	 * Start the process of binding this offer to the driver
 	 * (We need to set the device field before calling
 	 * hv_vmbus_child_device_add())
 	 */
 	new_channel->device = hv_vmbus_child_device_create(
 	    new_channel->offer_msg.offer.interface_type,
 	    new_channel->offer_msg.offer.interface_instance, new_channel);
 
 	/*
 	 * Add the new device to the bus. This will kick off device-driver
 	 * binding which eventually invokes the device driver's AddDevice()
 	 * method.
 	 */
 	ret = hv_vmbus_child_device_register(new_channel->device);
 	if (ret != 0) {
 		mtx_lock(&hv_vmbus_g_connection.channel_lock);
 		TAILQ_REMOVE(
 		    &hv_vmbus_g_connection.channel_anchor,
 		    new_channel,
 		    list_entry);
 		mtx_unlock(&hv_vmbus_g_connection.channel_lock);
 		hv_vmbus_free_vmbus_channel(new_channel);
 	}
 
 	mtx_lock(&vmbus_chwait_lock);
 	vmbus_devcnt++;
 	mtx_unlock(&vmbus_chwait_lock);
 	wakeup(&vmbus_devcnt);
 }
 
 void
 vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu)
 {
 	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
 
 	chan->target_cpu = cpu;
 	chan->target_vcpu = hv_vmbus_g_context.hv_vcpu_index[cpu];
 
 	if (bootverbose) {
 		printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
 		    chan->offer_msg.child_rel_id,
 		    chan->target_cpu, chan->target_vcpu);
 	}
 }
 
 /**
  * Array of device guids that are performance critical. We try to distribute
  * the interrupt load for these devices across all online cpus. 
  */
 static const hv_guid high_perf_devices[] = {
 	{HV_NIC_GUID, },
 	{HV_IDE_GUID, },
 	{HV_SCSI_GUID, },
 };
 
 enum {
 	PERF_CHN_NIC = 0,
 	PERF_CHN_IDE,
 	PERF_CHN_SCSI,
 	MAX_PERF_CHN,
 };
 
 /*
  * We use this static number to distribute the channel interrupt load.
  */
 static uint32_t next_vcpu;
 
 /**
  * Starting with Win8, we can statically distribute the incoming
  * channel interrupt load by binding a channel to VCPU. We
  * implement here a simple round robin scheme for distributing
  * the interrupt load.
  * We will bind channels that are not performance critical to cpu 0 and
  * performance critical channels (IDE, SCSI and Network) will be uniformly
  * distributed across all available CPUs.
  */
 static void
 vmbus_channel_select_defcpu(struct hv_vmbus_channel *channel)
 {
 	uint32_t current_cpu;
 	int i;
 	boolean_t is_perf_channel = FALSE;
 	const hv_guid *guid = &channel->offer_msg.offer.interface_type;
 
 	for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) {
 		if (memcmp(guid->data, high_perf_devices[i].data,
 		    sizeof(hv_guid)) == 0) {
 			is_perf_channel = TRUE;
 			break;
 		}
 	}
 
 	if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
 	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) ||
 	    (!is_perf_channel)) {
 		/* Stick to cpu0 */
 		vmbus_channel_cpu_set(channel, 0);
 		return;
 	}
 	/* mp_ncpus should have the number cpus currently online */
 	current_cpu = (++next_vcpu % mp_ncpus);
 	vmbus_channel_cpu_set(channel, current_cpu);
 }
 
 /**
  * @brief Handler for channel offers from Hyper-V/Azure
  *
  * Handler for channel offers from vmbus in parent partition. We ignore
  * all offers except network and storage offers. For each network and storage
  * offers, we create a channel object and queue a work item to the channel
  * object to process the offer synchronously
  */
 static void
 vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_offer_channel* offer;
 	hv_vmbus_channel_offer_channel* copied;
 
 	offer = (hv_vmbus_channel_offer_channel*) hdr;
 
 	// copy offer data
 	copied = malloc(sizeof(*copied), M_DEVBUF, M_NOWAIT);
 	if (copied == NULL) {
 		printf("fail to allocate memory\n");
 		return;
 	}
 
 	memcpy(copied, hdr, sizeof(*copied));
 	hv_queue_work_item(vmbus_channel_on_offer_internal, copied);
 
 	mtx_lock(&vmbus_chwait_lock);
 	if ((vmbus_chancnt & VMBUS_CHANCNT_DONE) == 0)
 		vmbus_chancnt++;
 	mtx_unlock(&vmbus_chwait_lock);
 }
 
 static void
 vmbus_channel_on_offer_internal(void* context)
 {
 	hv_vmbus_channel* new_channel;
 
 	hv_vmbus_channel_offer_channel* offer = (hv_vmbus_channel_offer_channel*)context;
 	/* Allocate the channel object and save this offer */
 	new_channel = hv_vmbus_allocate_channel();
 
 	/*
 	 * By default we setup state to enable batched
 	 * reading. A specific service can choose to
 	 * disable this prior to opening the channel.
 	 */
 	new_channel->batched_reading = TRUE;
 
 	new_channel->signal_event_param =
 	    (hv_vmbus_input_signal_event *)
 	    (HV_ALIGN_UP((unsigned long)
 		&new_channel->signal_event_buffer,
 		HV_HYPERCALL_PARAM_ALIGN));
 
  	new_channel->signal_event_param->connection_id.as_uint32_t = 0;	
 	new_channel->signal_event_param->connection_id.u.id =
 	    HV_VMBUS_EVENT_CONNECTION_ID;
 	new_channel->signal_event_param->flag_number = 0;
 	new_channel->signal_event_param->rsvd_z = 0;
 
 	if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) {
 		new_channel->is_dedicated_interrupt =
 		    (offer->is_dedicated_interrupt != 0);
 		new_channel->signal_event_param->connection_id.u.id =
 		    offer->connection_id;
 	}
 
 	memcpy(&new_channel->offer_msg, offer,
 	    sizeof(hv_vmbus_channel_offer_channel));
 	new_channel->monitor_group = (uint8_t) offer->monitor_id / 32;
 	new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32;
 
 	/* Select default cpu for this channel. */
 	vmbus_channel_select_defcpu(new_channel);
 
 	vmbus_channel_process_offer(new_channel);
 
 	free(offer, M_DEVBUF);
 }
 
 /**
  * @brief Rescind offer handler.
  *
  * We queue a work item to process this offer
  * synchronously
  */
 static void
 vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_rescind_offer*	rescind;
 	hv_vmbus_channel*		channel;
 
 	rescind = (hv_vmbus_channel_rescind_offer*) hdr;
 
 	channel = hv_vmbus_g_connection.channels[rescind->child_rel_id];
 	if (channel == NULL)
 	    return;
 
 	hv_queue_work_item(vmbus_channel_on_offer_rescind_internal, channel);
 	hv_vmbus_g_connection.channels[rescind->child_rel_id] = NULL;
 }
 
 static void
 vmbus_channel_on_offer_rescind_internal(void *context)
 {
 	hv_vmbus_channel*               channel;
 
 	channel = (hv_vmbus_channel*)context;
 	if (HV_VMBUS_CHAN_ISPRIMARY(channel)) {
 		/* Only primary channel owns the hv_device */
 		hv_vmbus_child_device_unregister(channel->device);
 	}
 }
 
 /**
  *
  * @brief Invoked when all offers have been delivered.
  */
 static void
 vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr)
 {
 
 	mtx_lock(&vmbus_chwait_lock);
 	vmbus_chancnt |= VMBUS_CHANCNT_DONE;
 	mtx_unlock(&vmbus_chwait_lock);
 	wakeup(&vmbus_chancnt);
 }
 
 /**
  * @brief Open result handler.
  *
  * This is invoked when we received a response
  * to our channel open request. Find the matching request, copy the
  * response and signal the requesting thread.
  */
 static void
 vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_open_result*	result;
 	hv_vmbus_channel_msg_info*	msg_info;
 	hv_vmbus_channel_msg_header*	requestHeader;
 	hv_vmbus_channel_open_channel*	openMsg;
 
 	result = (hv_vmbus_channel_open_result*) hdr;
 
 	/*
 	 * Find the open msg, copy the result and signal/unblock the wait event
 	 */
 	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
 
 	TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
 	    msg_list_entry) {
 	    requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
 
 	    if (requestHeader->message_type ==
 		    HV_CHANNEL_MESSAGE_OPEN_CHANNEL) {
 		openMsg = (hv_vmbus_channel_open_channel*) msg_info->msg;
 		if (openMsg->child_rel_id == result->child_rel_id
 		    && openMsg->open_id == result->open_id) {
 		    memcpy(&msg_info->response.open_result, result,
 			sizeof(hv_vmbus_channel_open_result));
 		    sema_post(&msg_info->wait_sema);
 		    break;
 		}
 	    }
 	}
 	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
 
 }
 
 /**
  * @brief GPADL created handler.
  *
  * This is invoked when we received a response
  * to our gpadl create request. Find the matching request, copy the
  * response and signal the requesting thread.
  */
 static void
 vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_gpadl_created*		gpadl_created;
 	hv_vmbus_channel_msg_info*		msg_info;
 	hv_vmbus_channel_msg_header*		request_header;
 	hv_vmbus_channel_gpadl_header*		gpadl_header;
 
 	gpadl_created = (hv_vmbus_channel_gpadl_created*) hdr;
 
 	/* Find the establish msg, copy the result and signal/unblock
 	 * the wait event
 	 */
 	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
 		msg_list_entry) {
 	    request_header = (hv_vmbus_channel_msg_header*) msg_info->msg;
 	    if (request_header->message_type ==
 		    HV_CHANNEL_MESSAGEL_GPADL_HEADER) {
 		gpadl_header =
 		    (hv_vmbus_channel_gpadl_header*) request_header;
 
 		if ((gpadl_created->child_rel_id == gpadl_header->child_rel_id)
 		    && (gpadl_created->gpadl == gpadl_header->gpadl)) {
 		    memcpy(&msg_info->response.gpadl_created,
 			gpadl_created,
 			sizeof(hv_vmbus_channel_gpadl_created));
 		    sema_post(&msg_info->wait_sema);
 		    break;
 		}
 	    }
 	}
 	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
 }
 
 /**
  * @brief GPADL torndown handler.
  *
  * This is invoked when we received a respons
  * to our gpadl teardown request. Find the matching request, copy the
  * response and signal the requesting thread
  */
 static void
 vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_gpadl_torndown*	gpadl_torndown;
 	hv_vmbus_channel_msg_info*		msg_info;
 	hv_vmbus_channel_msg_header*		requestHeader;
 	hv_vmbus_channel_gpadl_teardown*	gpadlTeardown;
 
 	gpadl_torndown = (hv_vmbus_channel_gpadl_torndown*)hdr;
 
 	/*
 	 * Find the open msg, copy the result and signal/unblock the
 	 * wait event.
 	 */
 
 	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
 
 	TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
 		msg_list_entry) {
 	    requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
 
 	    if (requestHeader->message_type
 		    == HV_CHANNEL_MESSAGE_GPADL_TEARDOWN) {
 		gpadlTeardown =
 		    (hv_vmbus_channel_gpadl_teardown*) requestHeader;
 
 		if (gpadl_torndown->gpadl == gpadlTeardown->gpadl) {
 		    memcpy(&msg_info->response.gpadl_torndown,
 			gpadl_torndown,
 			sizeof(hv_vmbus_channel_gpadl_torndown));
 		    sema_post(&msg_info->wait_sema);
 		    break;
 		}
 	    }
 	}
     mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
 }
 
 /**
  * @brief Version response handler.
  *
  * This is invoked when we received a response
  * to our initiate contact request. Find the matching request, copy th
  * response and signal the requesting thread.
  */
 static void
 vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_msg_info*		msg_info;
 	hv_vmbus_channel_msg_header*		requestHeader;
 	hv_vmbus_channel_initiate_contact*	initiate;
 	hv_vmbus_channel_version_response*	versionResponse;
 
 	versionResponse = (hv_vmbus_channel_version_response*)hdr;
 
 	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
 	    msg_list_entry) {
 	    requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
 	    if (requestHeader->message_type
 		== HV_CHANNEL_MESSAGE_INITIATED_CONTACT) {
 		initiate =
 		    (hv_vmbus_channel_initiate_contact*) requestHeader;
 		memcpy(&msg_info->response.version_response,
 		    versionResponse,
 		    sizeof(hv_vmbus_channel_version_response));
 		sema_post(&msg_info->wait_sema);
 	    }
 	}
     mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
 
 }
 
 /**
  *  @brief Send a request to get all our pending offers.
  */
 int
 hv_vmbus_request_channel_offers(void)
 {
 	int				ret;
 	hv_vmbus_channel_msg_header*	msg;
 	hv_vmbus_channel_msg_info*	msg_info;
 
 	msg_info = (hv_vmbus_channel_msg_info *)
 	    malloc(sizeof(hv_vmbus_channel_msg_info)
 		    + sizeof(hv_vmbus_channel_msg_header), M_DEVBUF, M_NOWAIT);
 
 	if (msg_info == NULL) {
 	    if(bootverbose)
 		printf("Error VMBUS: malloc failed for Request Offers\n");
 	    return (ENOMEM);
 	}
 
 	msg = (hv_vmbus_channel_msg_header*) msg_info->msg;
 	msg->message_type = HV_CHANNEL_MESSAGE_REQUEST_OFFERS;
 
 	ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_msg_header));
 
 	free(msg_info, M_DEVBUF);
 
 	return (ret);
 }
 
 /**
  * @brief Release channels that are unattached/unconnected (i.e., no drivers associated)
  */
 void
 hv_vmbus_release_unattached_channels(void) 
 {
 	hv_vmbus_channel *channel;
 
 	mtx_lock(&hv_vmbus_g_connection.channel_lock);
 
 	while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) {
 	    channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor);
 	    TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor,
 			    channel, list_entry);
 
 	    if (HV_VMBUS_CHAN_ISPRIMARY(channel)) {
 		/* Only primary channel owns the hv_device */
 		hv_vmbus_child_device_unregister(channel->device);
 	    }
 	    hv_vmbus_free_vmbus_channel(channel);
 	}
 	bzero(hv_vmbus_g_connection.channels, 
 		sizeof(hv_vmbus_channel*) * HV_CHANNEL_MAX_COUNT);
 	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
 }
 
 /**
  * @brief Select the best outgoing channel
  * 
  * The channel whose vcpu binding is closest to the currect vcpu will
  * be selected.
  * If no multi-channel, always select primary channel
  * 
  * @param primary - primary channel
  */
 struct hv_vmbus_channel *
 vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
 {
 	hv_vmbus_channel *new_channel = NULL;
 	hv_vmbus_channel *outgoing_channel = primary;
 	int old_cpu_distance = 0;
 	int new_cpu_distance = 0;
 	int cur_vcpu = 0;
 	int smp_pro_id = PCPU_GET(cpuid);
 
 	if (TAILQ_EMPTY(&primary->sc_list_anchor)) {
 		return outgoing_channel;
 	}
 
 	if (smp_pro_id >= MAXCPU) {
 		return outgoing_channel;
 	}
 
 	cur_vcpu = hv_vmbus_g_context.hv_vcpu_index[smp_pro_id];
 	
 	TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) {
 		if (new_channel->state != HV_CHANNEL_OPENED_STATE){
 			continue;
 		}
 
 		if (new_channel->target_vcpu == cur_vcpu){
 			return new_channel;
 		}
 
 		old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ?
 		    (outgoing_channel->target_vcpu - cur_vcpu) :
 		    (cur_vcpu - outgoing_channel->target_vcpu));
 
 		new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ?
 		    (new_channel->target_vcpu - cur_vcpu) :
 		    (cur_vcpu - new_channel->target_vcpu));
 
 		if (old_cpu_distance < new_cpu_distance) {
 			continue;
 		}
 
 		outgoing_channel = new_channel;
 	}
 
 	return(outgoing_channel);
 }
 
 void
 vmbus_scan(void)
 {
 	uint32_t chancnt;
 
 	mtx_lock(&vmbus_chwait_lock);
 	while ((vmbus_chancnt & VMBUS_CHANCNT_DONE) == 0)
 		mtx_sleep(&vmbus_chancnt, &vmbus_chwait_lock, 0, "waitch", 0);
 	chancnt = vmbus_chancnt & ~VMBUS_CHANCNT_DONE;
 
 	while (vmbus_devcnt != chancnt)
 		mtx_sleep(&vmbus_devcnt, &vmbus_chwait_lock, 0, "waitdev", 0);
 	mtx_unlock(&vmbus_chwait_lock);
 }
Index: head/sys/dev/hyperv/vmbus/hv_connection.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_connection.c	(revision 298445)
+++ head/sys/dev/hyperv/vmbus/hv_connection.c	(revision 298446)
@@ -1,415 +1,415 @@
 /*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <machine/bus.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include "hv_vmbus_priv.h"
 
 /*
  * Globals
  */
 hv_vmbus_connection hv_vmbus_g_connection =
 	{ .connect_state = HV_DISCONNECTED,
 	  .next_gpadl_handle = 0xE1E10, };
 
 uint32_t hv_vmbus_protocal_version = HV_VMBUS_VERSION_WS2008;
 
 static uint32_t
 hv_vmbus_get_next_version(uint32_t current_ver)
 {
 	switch (current_ver) {
 	case (HV_VMBUS_VERSION_WIN7):
 		return(HV_VMBUS_VERSION_WS2008);
 
 	case (HV_VMBUS_VERSION_WIN8):
 		return(HV_VMBUS_VERSION_WIN7);
 
 	case (HV_VMBUS_VERSION_WIN8_1):
 		return(HV_VMBUS_VERSION_WIN8);
 
 	case (HV_VMBUS_VERSION_WS2008):
 	default:
 		return(HV_VMBUS_VERSION_INVALID);
 	}
 }
 
 /**
  * Negotiate the highest supported hypervisor version.
  */
 static int
 hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info,
 	uint32_t version)
 {
 	int					ret = 0;
 	hv_vmbus_channel_initiate_contact	*msg;
 
 	sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
 	msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
 
 	msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
 	msg->vmbus_version_requested = version;
 
 	msg->interrupt_page = hv_get_phys_addr(
 		hv_vmbus_g_connection.interrupt_page);
 
 	msg->monitor_page_1 = hv_get_phys_addr(
 		hv_vmbus_g_connection.monitor_page_1);
 
 	msg->monitor_page_2 = hv_get_phys_addr(
 		hv_vmbus_g_connection.monitor_page_2);
 
 	/**
 	 * Add to list before we send the request since we may receive the
 	 * response before returning from this routine
 	 */
 	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
 
 	TAILQ_INSERT_TAIL(
 		&hv_vmbus_g_connection.channel_msg_anchor,
 		msg_info,
 		msg_list_entry);
 
 	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
 
 	ret = hv_vmbus_post_message(
 		msg,
 		sizeof(hv_vmbus_channel_initiate_contact));
 
 	if (ret != 0) {
 		mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
 		TAILQ_REMOVE(
 			&hv_vmbus_g_connection.channel_msg_anchor,
 			msg_info,
 			msg_list_entry);
 		mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
 		return (ret);
 	}
 
 	/**
 	 * Wait for the connection response
 	 */
 	ret = sema_timedwait(&msg_info->wait_sema, 5 * hz); /* KYS 5 seconds */
 
 	mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_REMOVE(
 		&hv_vmbus_g_connection.channel_msg_anchor,
 		msg_info,
 		msg_list_entry);
 	mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
 
 	/**
 	 * Check if successful
 	 */
 	if (msg_info->response.version_response.version_supported) {
 		hv_vmbus_g_connection.connect_state = HV_CONNECTED;
 	} else {
 		ret = ECONNREFUSED;
 	}
 
 	return (ret);
 }
 
 /**
  * Send a connect request on the partition service connection
  */
 int
 hv_vmbus_connect(void) {
 	int					ret = 0;
 	uint32_t				version;
 	hv_vmbus_channel_msg_info*		msg_info = NULL;
 
 	/**
 	 * Make sure we are not connecting or connected
 	 */
 	if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) {
 		return (-1);
 	}
 
 	/**
 	 * Initialize the vmbus connection
 	 */
 	hv_vmbus_g_connection.connect_state = HV_CONNECTING;
 
 	TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
 	mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
 		NULL, MTX_DEF);
 
 	TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
 	mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
 		NULL, MTX_DEF);
 
 	/**
 	 * Setup the vmbus event connection for channel interrupt abstraction
 	 * stuff
 	 */
 	hv_vmbus_g_connection.interrupt_page = malloc(
 					PAGE_SIZE, M_DEVBUF,
 					M_WAITOK | M_ZERO);
 
 	hv_vmbus_g_connection.recv_interrupt_page =
 		hv_vmbus_g_connection.interrupt_page;
 
 	hv_vmbus_g_connection.send_interrupt_page =
 		((uint8_t *) hv_vmbus_g_connection.interrupt_page +
 		    (PAGE_SIZE >> 1));
 
 	/**
 	 * Set up the monitor notification facility. The 1st page for
 	 * parent->child and the 2nd page for child->parent
 	 */
 	hv_vmbus_g_connection.monitor_page_1 = malloc(
 		PAGE_SIZE,
 		M_DEVBUF,
 		M_WAITOK | M_ZERO);
 	hv_vmbus_g_connection.monitor_page_2 = malloc(
 		PAGE_SIZE,
 		M_DEVBUF,
 		M_WAITOK | M_ZERO);
 
 	msg_info = (hv_vmbus_channel_msg_info*)
 		malloc(sizeof(hv_vmbus_channel_msg_info) +
 			sizeof(hv_vmbus_channel_initiate_contact),
 			M_DEVBUF, M_WAITOK | M_ZERO);
 
 	hv_vmbus_g_connection.channels = malloc(sizeof(hv_vmbus_channel*) *
 		HV_CHANNEL_MAX_COUNT,
 		M_DEVBUF, M_WAITOK | M_ZERO);
 	/*
 	 * Find the highest vmbus version number we can support.
 	 */
 	version = HV_VMBUS_VERSION_CURRENT;
 
 	do {
 		ret = hv_vmbus_negotiate_version(msg_info, version);
 		if (ret == EWOULDBLOCK) {
 			/*
 			 * We timed out.
 			 */
 			goto cleanup;
 		}
 
 		if (hv_vmbus_g_connection.connect_state == HV_CONNECTED)
 			break;
 
 		version = hv_vmbus_get_next_version(version);
 	} while (version != HV_VMBUS_VERSION_INVALID);
 
 	hv_vmbus_protocal_version = version;
 	if (bootverbose)
 		printf("VMBUS: Protocol Version: %d.%d\n",
 		    version >> 16, version & 0xFFFF);
 
 	sema_destroy(&msg_info->wait_sema);
 	free(msg_info, M_DEVBUF);
 
 	return (0);
 
 	/*
 	 * Cleanup after failure!
 	 */
 	cleanup:
 
 	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
 
 	mtx_destroy(&hv_vmbus_g_connection.channel_lock);
 	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
 
 	if (hv_vmbus_g_connection.interrupt_page != NULL) {
 		free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
 		hv_vmbus_g_connection.interrupt_page = NULL;
 	}
 
 	free(hv_vmbus_g_connection.monitor_page_1, M_DEVBUF);
 	free(hv_vmbus_g_connection.monitor_page_2, M_DEVBUF);
 
 	if (msg_info) {
 		sema_destroy(&msg_info->wait_sema);
 		free(msg_info, M_DEVBUF);
 	}
 
 	free(hv_vmbus_g_connection.channels, M_DEVBUF);
 	return (ret);
 }
 
 /**
  * Send a disconnect request on the partition service connection
  */
 int
 hv_vmbus_disconnect(void) {
 	int			 ret = 0;
 	hv_vmbus_channel_unload  msg;
 
 	msg.message_type = HV_CHANNEL_MESSAGE_UNLOAD;
 
 	ret = hv_vmbus_post_message(&msg, sizeof(hv_vmbus_channel_unload));
 
 	free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
 
 	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
 
 	free(hv_vmbus_g_connection.channels, M_DEVBUF);
 	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
 
 	return (ret);
 }
 
 /**
  * Handler for events
  */
 void
 hv_vmbus_on_events(int cpu)
 {
 	int bit;
 	int dword;
 	void *page_addr;
 	uint32_t* recv_interrupt_page = NULL;
 	int rel_id;
 	int maxdword;
 	hv_vmbus_synic_event_flags *event;
 	/* int maxdword = PAGE_SIZE >> 3; */
 
 	KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: "
 	    "cpu out of range!"));
 
 	page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
 	event = (hv_vmbus_synic_event_flags *)
 	    page_addr + HV_VMBUS_MESSAGE_SINT;
 	if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
 	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
 		maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5;
 		/*
 		 * receive size is 1/2 page and divide that by 4 bytes
 		 */
 		if (synch_test_and_clear_bit(0, &event->flags32[0]))
 			recv_interrupt_page =
 			    hv_vmbus_g_connection.recv_interrupt_page;
 	} else {
 		/*
 		 * On Host with Win8 or above, the event page can be
 		 * checked directly to get the id of the channel
 		 * that has the pending interrupt.
 		 */
 		maxdword = HV_EVENT_FLAGS_DWORD_COUNT;
 		recv_interrupt_page = event->flags32;
 	}
 
 	/*
 	 * Check events
 	 */
 	if (recv_interrupt_page != NULL) {
 	    for (dword = 0; dword < maxdword; dword++) {
 		if (recv_interrupt_page[dword]) {
 		    for (bit = 0; bit < HV_CHANNEL_DWORD_LEN; bit++) {
 			if (synch_test_and_clear_bit(bit,
 			    (uint32_t *) &recv_interrupt_page[dword])) {
 			    rel_id = (dword << 5) + bit;
 			    if (rel_id == 0) {
 				/*
 				 * Special case -
 				 * vmbus channel protocol msg.
 				 */
 				continue;
 			    } else {
 				hv_vmbus_channel * channel = hv_vmbus_g_connection.channels[rel_id];
 				/* if channel is closed or closing */
 				if (channel == NULL || channel->rxq == NULL)
 					continue;
 
 				if (channel->batched_reading)
 					hv_ring_buffer_read_begin(&channel->inbound);
 				taskqueue_enqueue(channel->rxq, &channel->channel_task);
 			    }
 			}
 		    }
 		}
 	    }
 	}
 
 	return;
 }
 
 /**
  * Send a msg on the vmbus's message connection
  */
 int hv_vmbus_post_message(void *buffer, size_t bufferLen)
 {
 	hv_vmbus_connection_id connId;
 	sbintime_t time = SBT_1MS;
 	int retries;
 	int ret;
 
 	connId.as_uint32_t = 0;
 	connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID;
 
 	/*
 	 * We retry to cope with transient failures caused by host side's
 	 * insufficient resources. 20 times should suffice in practice.
 	 */
 	for (retries = 0; retries < 20; retries++) {
 		ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer,
 						    bufferLen);
 		if (ret == HV_STATUS_SUCCESS)
 			return (0);
 
 		pause_sbt("pstmsg", time, 0, C_HARDCLOCK);
 		if (time < SBT_1S * 2)
 			time *= 2;
 	}
 
 	KASSERT(ret == HV_STATUS_SUCCESS,
 		("Error VMBUS: Message Post Failed, ret=%d\n", ret));
 
 	return (EAGAIN);
 }
 
 /**
  * Send an event notification to the parent
  */
 int
 hv_vmbus_set_event(hv_vmbus_channel *channel) {
 	int ret = 0;
 	uint32_t child_rel_id = channel->offer_msg.child_rel_id;
 
 	/* Each uint32_t represents 32 channels */
 
 	synch_set_bit(child_rel_id & 31,
 		(((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
 			+ (child_rel_id >> 5))));
 	ret = hv_vmbus_signal_event(channel->signal_event_param);
 
 	return (ret);
 }
Index: head/sys/dev/hyperv/vmbus/hv_et.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_et.c	(revision 298445)
+++ head/sys/dev/hyperv/vmbus/hv_et.c	(revision 298446)
@@ -1,131 +1,131 @@
 /*-
- * Copyright (c) 2015 Microsoft Corp.
+ * Copyright (c) 2015,2016 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
  *      documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/smp.h>
 #include <sys/time.h>
 #include <sys/timeet.h>
 
 #include "hv_vmbus_priv.h"
 
 #define HV_TIMER_FREQUENCY		(10 * 1000 * 1000LL) /* 100ns period */
 #define HV_MAX_DELTA_TICKS		0xffffffffLL
 #define HV_MIN_DELTA_TICKS		1LL
 
 static struct eventtimer et;
 static uint64_t periodticks[MAXCPU];
 
 static inline uint64_t
 sbintime2tick(sbintime_t time)
 {
 	struct timespec val;
 
 	val = sbttots(time);
 	return val.tv_sec * HV_TIMER_FREQUENCY + val.tv_nsec / 100;
 }
 
 static int
 hv_et_start(struct eventtimer *et, sbintime_t firsttime, sbintime_t periodtime)
 {
 	union hv_timer_config timer_cfg;
 	uint64_t current;
 
 	timer_cfg.as_uint64 = 0;
 	timer_cfg.auto_enable = 1;
 	timer_cfg.sintx = HV_VMBUS_TIMER_SINT;
 
 	periodticks[curcpu] = sbintime2tick(periodtime);
 	if (firsttime == 0)
 		firsttime = periodtime;
 
 	current = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
 	current += sbintime2tick(firsttime);
 
 	wrmsr(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
 	wrmsr(HV_X64_MSR_STIMER0_COUNT, current);
 
 	return (0);
 }
 
 static int
 hv_et_stop(struct eventtimer *et)
 {
 	wrmsr(HV_X64_MSR_STIMER0_CONFIG, 0);
 	wrmsr(HV_X64_MSR_STIMER0_COUNT, 0);
 
 	return (0);
 }
 
 void
 hv_et_intr(struct trapframe *frame)
 {
 	union hv_timer_config timer_cfg;
 	struct trapframe *oldframe;
 	struct thread *td;
 
 	if (periodticks[curcpu] != 0) {
 		uint64_t tick = sbintime2tick(periodticks[curcpu]);
 		timer_cfg.as_uint64 = rdmsr(HV_X64_MSR_STIMER0_CONFIG);
 		timer_cfg.enable = 0;
 		timer_cfg.auto_enable = 1;
 		timer_cfg.periodic = 1;
 		periodticks[curcpu] = 0;
 
 		wrmsr(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
 		wrmsr(HV_X64_MSR_STIMER0_COUNT, tick);
 	}
 
 	if (et.et_active) {
 		td = curthread;
 		td->td_intr_nesting_level++;
 		oldframe = td->td_intr_frame;
 		td->td_intr_frame = frame;
 		et.et_event_cb(&et, et.et_arg);
 		td->td_intr_frame = oldframe;
 		td->td_intr_nesting_level--;
 	}
 }
 
 void
 hv_et_init(void)
 {
 	et.et_name = "HyperV";
 	et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU | ET_FLAGS_PERIODIC;
 	et.et_quality = 1000;
 	et.et_frequency = HV_TIMER_FREQUENCY;
 	et.et_min_period = (1LL << 32) / HV_TIMER_FREQUENCY;
 	et.et_max_period = HV_MAX_DELTA_TICKS * ((1LL << 32) / HV_TIMER_FREQUENCY);
 	et.et_start = hv_et_start;
 	et.et_stop = hv_et_stop;
 	et.et_priv = &et;
 	et_register(&et);
 }
 
Index: head/sys/dev/hyperv/vmbus/hv_hv.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_hv.c	(revision 298445)
+++ head/sys/dev/hyperv/vmbus/hv_hv.c	(revision 298446)
@@ -1,515 +1,515 @@
 /*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /**
  * Implements low-level interactions with Hypver-V/Azure
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/pcpu.h>
 #include <sys/timetc.h>
 #include <machine/bus.h>
 #include <machine/md_var.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 
 #include "hv_vmbus_priv.h"
 
 #define HV_NANOSECONDS_PER_SEC		1000000000L
 
 #define	HYPERV_INTERFACE		0x31237648	/* HV#1 */
 
 static u_int hv_get_timecount(struct timecounter *tc);
 
 u_int	hyperv_features;
 u_int	hyperv_recommends;
 
 static u_int	hyperv_pm_features;
 static u_int	hyperv_features3;
 
 /**
  * Globals
  */
 hv_vmbus_context hv_vmbus_g_context = {
 	.syn_ic_initialized = FALSE,
 	.hypercall_page = NULL,
 };
 
 static struct timecounter hv_timecounter = {
 	hv_get_timecount, 0, ~0u, HV_NANOSECONDS_PER_SEC/100, "Hyper-V", HV_NANOSECONDS_PER_SEC/100
 };
 
 static u_int
 hv_get_timecount(struct timecounter *tc)
 {
 	u_int now = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
 	return (now);
 }
 
 /**
  * @brief Invoke the specified hypercall
  */
 static uint64_t
 hv_vmbus_do_hypercall(uint64_t control, void* input, void* output)
 {
 #ifdef __x86_64__
 	uint64_t hv_status = 0;
 	uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0;
 	uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0;
 	volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page;
 
 	__asm__ __volatile__ ("mov %0, %%r8" : : "r" (output_address): "r8");
 	__asm__ __volatile__ ("call *%3" : "=a"(hv_status):
 				"c" (control), "d" (input_address),
 				"m" (hypercall_page));
 	return (hv_status);
 #else
 	uint32_t control_high = control >> 32;
 	uint32_t control_low = control & 0xFFFFFFFF;
 	uint32_t hv_status_high = 1;
 	uint32_t hv_status_low = 1;
 	uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0;
 	uint32_t input_address_high = input_address >> 32;
 	uint32_t input_address_low = input_address & 0xFFFFFFFF;
 	uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0;
 	uint32_t output_address_high = output_address >> 32;
 	uint32_t output_address_low = output_address & 0xFFFFFFFF;
 	volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page;
 
 	__asm__ __volatile__ ("call *%8" : "=d"(hv_status_high),
 				"=a"(hv_status_low) : "d" (control_high),
 				"a" (control_low), "b" (input_address_high),
 				"c" (input_address_low),
 				"D"(output_address_high),
 				"S"(output_address_low), "m" (hypercall_page));
 	return (hv_status_low | ((uint64_t)hv_status_high << 32));
 #endif /* __x86_64__ */
 }
 
 /**
  *  @brief Main initialization routine.
  *
  *  This routine must be called
  *  before any other routines in here are called
  */
 int
 hv_vmbus_init(void) 
 {
 	hv_vmbus_x64_msr_hypercall_contents	hypercall_msr;
 	void* 					virt_addr = NULL;
 
 	memset(
 	    hv_vmbus_g_context.syn_ic_event_page,
 	    0,
 	    sizeof(hv_vmbus_handle) * MAXCPU);
 
 	memset(
 	    hv_vmbus_g_context.syn_ic_msg_page,
 	    0,
 	    sizeof(hv_vmbus_handle) * MAXCPU);
 
 	if (vm_guest != VM_GUEST_HV)
 	    goto cleanup;
 
 	/*
 	 * Write our OS info
 	 */
 	uint64_t os_guest_info = HV_FREEBSD_GUEST_ID;
 	wrmsr(HV_X64_MSR_GUEST_OS_ID, os_guest_info);
 	hv_vmbus_g_context.guest_id = os_guest_info;
 
 	/*
 	 * See if the hypercall page is already set
 	 */
 	hypercall_msr.as_uint64_t = rdmsr(HV_X64_MSR_HYPERCALL);
 	virt_addr = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
 
 	hypercall_msr.u.enable = 1;
 	hypercall_msr.u.guest_physical_address =
 	    (hv_get_phys_addr(virt_addr) >> PAGE_SHIFT);
 	wrmsr(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64_t);
 
 	/*
 	 * Confirm that hypercall page did get set up
 	 */
 	hypercall_msr.as_uint64_t = 0;
 	hypercall_msr.as_uint64_t = rdmsr(HV_X64_MSR_HYPERCALL);
 
 	if (!hypercall_msr.u.enable)
 	    goto cleanup;
 
 	hv_vmbus_g_context.hypercall_page = virt_addr;
 
 	hv_et_init();
 	
 	return (0);
 
 	cleanup:
 	if (virt_addr != NULL) {
 	    if (hypercall_msr.u.enable) {
 		hypercall_msr.as_uint64_t = 0;
 		wrmsr(HV_X64_MSR_HYPERCALL,
 					hypercall_msr.as_uint64_t);
 	    }
 
 	    free(virt_addr, M_DEVBUF);
 	}
 	return (ENOTSUP);
 }
 
 /**
  * @brief Cleanup routine, called normally during driver unloading or exiting
  */
 void
 hv_vmbus_cleanup(void) 
 {
 	hv_vmbus_x64_msr_hypercall_contents hypercall_msr;
 
 	if (hv_vmbus_g_context.guest_id == HV_FREEBSD_GUEST_ID) {
 	    if (hv_vmbus_g_context.hypercall_page != NULL) {
 		hypercall_msr.as_uint64_t = 0;
 		wrmsr(HV_X64_MSR_HYPERCALL,
 					hypercall_msr.as_uint64_t);
 		free(hv_vmbus_g_context.hypercall_page, M_DEVBUF);
 		hv_vmbus_g_context.hypercall_page = NULL;
 	    }
 	}
 }
 
 /**
  * @brief Post a message using the hypervisor message IPC.
  * (This involves a hypercall.)
  */
 hv_vmbus_status
 hv_vmbus_post_msg_via_msg_ipc(
 	hv_vmbus_connection_id	connection_id,
 	hv_vmbus_msg_type	message_type,
 	void*			payload,
 	size_t			payload_size)
 {
 	struct alignedinput {
 	    uint64_t alignment8;
 	    hv_vmbus_input_post_message msg;
 	};
 
 	hv_vmbus_input_post_message*	aligned_msg;
 	hv_vmbus_status 		status;
 	size_t				addr;
 
 	if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
 	    return (EMSGSIZE);
 
 	addr = (size_t) malloc(sizeof(struct alignedinput), M_DEVBUF,
 			    M_ZERO | M_NOWAIT);
 	KASSERT(addr != 0,
 	    ("Error VMBUS: malloc failed to allocate message buffer!"));
 	if (addr == 0)
 	    return (ENOMEM);
 
 	aligned_msg = (hv_vmbus_input_post_message*)
 	    (HV_ALIGN_UP(addr, HV_HYPERCALL_PARAM_ALIGN));
 
 	aligned_msg->connection_id = connection_id;
 	aligned_msg->message_type = message_type;
 	aligned_msg->payload_size = payload_size;
 	memcpy((void*) aligned_msg->payload, payload, payload_size);
 
 	status = hv_vmbus_do_hypercall(
 		    HV_CALL_POST_MESSAGE, aligned_msg, 0) & 0xFFFF;
 
 	free((void *) addr, M_DEVBUF);
 	return (status);
 }
 
 /**
  * @brief Signal an event on the specified connection using the hypervisor
  * event IPC. (This involves a hypercall.)
  */
 hv_vmbus_status
 hv_vmbus_signal_event(void *con_id)
 {
 	hv_vmbus_status status;
 
 	status = hv_vmbus_do_hypercall(
 		    HV_CALL_SIGNAL_EVENT,
 		    con_id,
 		    0) & 0xFFFF;
 
 	return (status);
 }
 
 /**
  * @brief hv_vmbus_synic_init
  */
 void
 hv_vmbus_synic_init(void *arg)
 
 {
 	int			cpu;
 	uint64_t		hv_vcpu_index;
 	hv_vmbus_synic_simp	simp;
 	hv_vmbus_synic_siefp	siefp;
 	hv_vmbus_synic_scontrol sctrl;
 	hv_vmbus_synic_sint	shared_sint;
 	uint64_t		version;
 	hv_setup_args* 		setup_args = (hv_setup_args *)arg;
 
 	cpu = PCPU_GET(cpuid);
 
 	if (hv_vmbus_g_context.hypercall_page == NULL)
 	    return;
 
 	/*
 	 * TODO: Check the version
 	 */
 	version = rdmsr(HV_X64_MSR_SVERSION);
 	
 	hv_vmbus_g_context.syn_ic_msg_page[cpu] =
 	    setup_args->page_buffers[2 * cpu];
 	hv_vmbus_g_context.syn_ic_event_page[cpu] =
 	    setup_args->page_buffers[2 * cpu + 1];
 
 	/*
 	 * Setup the Synic's message page
 	 */
 
 	simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP);
 	simp.u.simp_enabled = 1;
 	simp.u.base_simp_gpa = ((hv_get_phys_addr(
 	    hv_vmbus_g_context.syn_ic_msg_page[cpu])) >> PAGE_SHIFT);
 
 	wrmsr(HV_X64_MSR_SIMP, simp.as_uint64_t);
 
 	/*
 	 * Setup the Synic's event page
 	 */
 	siefp.as_uint64_t = rdmsr(HV_X64_MSR_SIEFP);
 	siefp.u.siefp_enabled = 1;
 	siefp.u.base_siefp_gpa = ((hv_get_phys_addr(
 	    hv_vmbus_g_context.syn_ic_event_page[cpu])) >> PAGE_SHIFT);
 
 	wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
 
 	/*HV_SHARED_SINT_IDT_VECTOR + 0x20; */
 	shared_sint.as_uint64_t = 0;
 	shared_sint.u.vector = setup_args->vector;
 	shared_sint.u.masked = FALSE;
 	shared_sint.u.auto_eoi = TRUE;
 
 	wrmsr(HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
 	    shared_sint.as_uint64_t);
 
 	wrmsr(HV_X64_MSR_SINT0 + HV_VMBUS_TIMER_SINT,
 	    shared_sint.as_uint64_t);
 
 	/* Enable the global synic bit */
 	sctrl.as_uint64_t = rdmsr(HV_X64_MSR_SCONTROL);
 	sctrl.u.enable = 1;
 
 	wrmsr(HV_X64_MSR_SCONTROL, sctrl.as_uint64_t);
 
 	hv_vmbus_g_context.syn_ic_initialized = TRUE;
 
 	/*
 	 * Set up the cpuid mapping from Hyper-V to FreeBSD.
 	 * The array is indexed using FreeBSD cpuid.
 	 */
 	hv_vcpu_index = rdmsr(HV_X64_MSR_VP_INDEX);
 	hv_vmbus_g_context.hv_vcpu_index[cpu] = (uint32_t)hv_vcpu_index;
 
 	return;
 }
 
 /**
  * @brief Cleanup routine for hv_vmbus_synic_init()
  */
 void hv_vmbus_synic_cleanup(void *arg)
 {
 	hv_vmbus_synic_sint	shared_sint;
 	hv_vmbus_synic_simp	simp;
 	hv_vmbus_synic_siefp	siefp;
 
 	if (!hv_vmbus_g_context.syn_ic_initialized)
 	    return;
 
 	shared_sint.as_uint64_t = rdmsr(
 	    HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT);
 
 	shared_sint.u.masked = 1;
 
 	/*
 	 * Disable the interrupt 0
 	 */
 	wrmsr(
 	    HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
 	    shared_sint.as_uint64_t);
 
 	shared_sint.as_uint64_t = rdmsr(
 	    HV_X64_MSR_SINT0 + HV_VMBUS_TIMER_SINT);
 
 	shared_sint.u.masked = 1;
 
 	/*
 	 * Disable the interrupt 1
 	 */
 	wrmsr(
 	    HV_X64_MSR_SINT0 + HV_VMBUS_TIMER_SINT,
 	    shared_sint.as_uint64_t);
 	simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP);
 	simp.u.simp_enabled = 0;
 	simp.u.base_simp_gpa = 0;
 
 	wrmsr(HV_X64_MSR_SIMP, simp.as_uint64_t);
 
 	siefp.as_uint64_t = rdmsr(HV_X64_MSR_SIEFP);
 	siefp.u.siefp_enabled = 0;
 	siefp.u.base_siefp_gpa = 0;
 
 	wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
 }
 
 static bool
 hyperv_identify(void)
 {
 	u_int regs[4];
 	unsigned int maxLeaf;
 	unsigned int op;
 
 	if (vm_guest != VM_GUEST_HV)
 		return (false);
 
 	op = HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION;
 	do_cpuid(op, regs);
 	maxLeaf = regs[0];
 	if (maxLeaf < HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS)
 		return (false);
 
 	op = HV_CPU_ID_FUNCTION_HV_INTERFACE;
 	do_cpuid(op, regs);
 	if (regs[0] != HYPERV_INTERFACE)
 		return (false);
 
 	op = HV_CPU_ID_FUNCTION_MS_HV_FEATURES;
 	do_cpuid(op, regs);
 	if ((regs[0] & HV_FEATURE_MSR_HYPERCALL) == 0) {
 		/*
 		 * Hyper-V w/o Hypercall is impossible; someone
 		 * is faking Hyper-V.
 		 */
 		return (false);
 	}
 	hyperv_features = regs[0];
 	hyperv_pm_features = regs[2];
 	hyperv_features3 = regs[3];
 
 	op = HV_CPU_ID_FUNCTION_MS_HV_VERSION;
 	do_cpuid(op, regs);
 	printf("Hyper-V Version: %d.%d.%d [SP%d]\n",
 	    regs[1] >> 16, regs[1] & 0xffff, regs[0], regs[2]);
 
 	printf("  Features=0x%b\n", hyperv_features,
 	    "\020"
 	    "\001VPRUNTIME"	/* MSR_VP_RUNTIME */
 	    "\002TMREFCNT"	/* MSR_TIME_REF_COUNT */
 	    "\003SYNIC"		/* MSRs for SynIC */
 	    "\004SYNTM"		/* MSRs for SynTimer */
 	    "\005APIC"		/* MSR_{EOI,ICR,TPR} */
 	    "\006HYPERCALL"	/* MSR_{GUEST_OS_ID,HYPERCALL} */
 	    "\007VPINDEX"	/* MSR_VP_INDEX */
 	    "\010RESET"		/* MSR_RESET */
 	    "\011STATS"		/* MSR_STATS_ */
 	    "\012REFTSC"	/* MSR_REFERENCE_TSC */
 	    "\013IDLE"		/* MSR_GUEST_IDLE */
 	    "\014TMFREQ"	/* MSR_{TSC,APIC}_FREQUENCY */
 	    "\015DEBUG");	/* MSR_SYNTH_DEBUG_ */
 	printf("  PM Features=max C%u, 0x%b\n",
 	    HV_PM_FEATURE_CSTATE(hyperv_pm_features),
 	    (hyperv_pm_features & ~HV_PM_FEATURE_CSTATE_MASK),
 	    "\020"
 	    "\005C3HPET");	/* HPET is required for C3 state */
 	printf("  Features3=0x%b\n", hyperv_features3,
 	    "\020"
 	    "\001MWAIT"		/* MWAIT */
 	    "\002DEBUG"		/* guest debug support */
 	    "\003PERFMON"	/* performance monitor */
 	    "\004PCPUDPE"	/* physical CPU dynamic partition event */
 	    "\005XMMHC"		/* hypercall input through XMM regs */
 	    "\006IDLE"		/* guest idle support */
 	    "\007SLEEP"		/* hypervisor sleep support */
 	    "\010NUMA"		/* NUMA distance query support */
 	    "\011TMFREQ"	/* timer frequency query (TSC, LAPIC) */
 	    "\012SYNCMC"	/* inject synthetic machine checks */
 	    "\013CRASH"		/* MSRs for guest crash */
 	    "\014DEBUGMSR"	/* MSRs for guest debug */
 	    "\015NPIEP"		/* NPIEP */
 	    "\016HVDIS");	/* disabling hypervisor */
 
 	op = HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION;
 	do_cpuid(op, regs);
 	hyperv_recommends = regs[0];
 	if (bootverbose)
 		printf("  Recommends: %08x %08x\n", regs[0], regs[1]);
 
 	op = HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS;
 	do_cpuid(op, regs);
 	if (bootverbose) {
 		printf("  Limits: Vcpu:%d Lcpu:%d Int:%d\n",
 		    regs[0], regs[1], regs[2]);
 	}
 
 	if (maxLeaf >= HV_CPU_ID_FUNCTION_MS_HV_HARDWARE_FEATURE) {
 		op = HV_CPU_ID_FUNCTION_MS_HV_HARDWARE_FEATURE;
 		do_cpuid(op, regs);
 		if (bootverbose) {
 			printf("  HW Features: %08x AMD: %08x\n",
 			    regs[0], regs[3]);
 		}
 	}
 
 	return (true);
 }
 
 static void
 hyperv_init(void *dummy __unused)
 {
 	if (!hyperv_identify())
 		return;
 
 	if (hyperv_features & HV_FEATURE_MSR_TIME_REFCNT) {
 		/* Register virtual timecount */
 		tc_init(&hv_timecounter);
 	}
 }
 SYSINIT(hyperv_initialize, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, hyperv_init,
     NULL);
Index: head/sys/dev/hyperv/vmbus/hv_ring_buffer.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_ring_buffer.c	(revision 298445)
+++ head/sys/dev/hyperv/vmbus/hv_ring_buffer.c	(revision 298446)
@@ -1,552 +1,552 @@
 /*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 
 #include "hv_vmbus_priv.h"
 
 /* Amount of space to write to */
 #define	HV_BYTES_AVAIL_TO_WRITE(r, w, z) ((w) >= (r))? \
 				((z) - ((w) - (r))):((r) - (w))
 
 static int
 hv_rbi_sysctl_stats(SYSCTL_HANDLER_ARGS)
 {
 	hv_vmbus_ring_buffer_info* rbi;
 	uint32_t read_index, write_index, interrupt_mask, sz;
 	uint32_t read_avail, write_avail;
 	char rbi_stats[256];
 
 	rbi = (hv_vmbus_ring_buffer_info*)arg1;
 	read_index = rbi->ring_buffer->read_index;
 	write_index = rbi->ring_buffer->write_index;
 	interrupt_mask = rbi->ring_buffer->interrupt_mask;
 	sz = rbi->ring_data_size;
 	write_avail = HV_BYTES_AVAIL_TO_WRITE(read_index,
 			write_index, sz);
 	read_avail = sz - write_avail;
 	snprintf(rbi_stats, sizeof(rbi_stats),
 		"r_idx:%d "
 		"w_idx:%d "
 		"int_mask:%d "
 		"r_avail:%d "
 		"w_avail:%d",
 		read_index, write_index, interrupt_mask,
 		read_avail, write_avail);
 
 	return (sysctl_handle_string(oidp, rbi_stats,
 			sizeof(rbi_stats), req));
 }
 
 void
 hv_ring_buffer_stat(
 	struct sysctl_ctx_list		*ctx,
 	struct sysctl_oid_list		*tree_node,
 	hv_vmbus_ring_buffer_info	*rbi,
 	const char			*desc)	
 {
 	SYSCTL_ADD_PROC(ctx, tree_node, OID_AUTO,
 	    "ring_buffer_stats",
 	    CTLTYPE_STRING|CTLFLAG_RD, rbi, 0,
 	    hv_rbi_sysctl_stats, "A", desc);
 }
 /**
  * @brief Get number of bytes available to read and to write to
  * for the specified ring buffer
  */
 static inline void
 get_ring_buffer_avail_bytes(
 	    hv_vmbus_ring_buffer_info*	rbi,
 	    uint32_t*			read,
 	    uint32_t*			write)
 {
 	uint32_t read_loc, write_loc;
 
 	/*
 	 * Capture the read/write indices before they changed
 	 */
 	read_loc = rbi->ring_buffer->read_index;
 	write_loc = rbi->ring_buffer->write_index;
 
 	*write = HV_BYTES_AVAIL_TO_WRITE(
 		read_loc, write_loc, rbi->ring_data_size);
 	*read = rbi->ring_data_size - *write;
 }
 
 /**
  * @brief Get the next write location for the specified ring buffer
  */
 static inline uint32_t
 get_next_write_location(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	uint32_t next = ring_info->ring_buffer->write_index;
 	return (next);
 }
 
 /**
  * @brief Set the next write location for the specified ring buffer
  */
 static inline void
 set_next_write_location(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	uint32_t			next_write_location)
 {
 	ring_info->ring_buffer->write_index = next_write_location;
 }
 
 /**
  * @brief Get the next read location for the specified ring buffer
  */
 static inline uint32_t
 get_next_read_location(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	uint32_t next = ring_info->ring_buffer->read_index;
 	return (next);
 }
 
 /**
  * @brief Get the next read location + offset for the specified ring buffer.
  * This allows the caller to skip.
  */
 static inline uint32_t
 get_next_read_location_with_offset(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	uint32_t			offset)
 {
 	uint32_t next = ring_info->ring_buffer->read_index;
 	next += offset;
 	next %= ring_info->ring_data_size;
 	return (next);
 }
 
 /**
  * @brief Set the next read location for the specified ring buffer
  */
 static inline void
 set_next_read_location(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	uint32_t			next_read_location)
 {
 	ring_info->ring_buffer->read_index = next_read_location;
 }
 
 /**
  * @brief Get the start of the ring buffer
  */
 static inline void *
 get_ring_buffer(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	return (void *) ring_info->ring_buffer->buffer;
 }
 
 /**
  * @brief Get the size of the ring buffer.
  */
 static inline uint32_t
 get_ring_buffer_size(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	return ring_info->ring_data_size;
 }
 
 /**
  * Get the read and write indices as uint64_t of the specified ring buffer.
  */
 static inline uint64_t
 get_ring_buffer_indices(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	return (uint64_t) ring_info->ring_buffer->write_index << 32;
 }
 
 void
 hv_ring_buffer_read_begin(
 	hv_vmbus_ring_buffer_info*	ring_info)
 {
 	ring_info->ring_buffer->interrupt_mask = 1;
 	mb();
 }
 
 uint32_t
 hv_ring_buffer_read_end(
 	hv_vmbus_ring_buffer_info*	ring_info)
 {
 	uint32_t read, write;	
 
 	ring_info->ring_buffer->interrupt_mask = 0;
 	mb();
 
 	/*
 	 * Now check to see if the ring buffer is still empty.
 	 * If it is not, we raced and we need to process new
 	 * incoming messages.
 	 */
 	get_ring_buffer_avail_bytes(ring_info, &read, &write);
 
 	return (read);
 }
 
 /*
  * When we write to the ring buffer, check if the host needs to
  * be signaled. Here is the details of this protocol:
  *
  *	1. The host guarantees that while it is draining the
  *	   ring buffer, it will set the interrupt_mask to
  *	   indicate it does not need to be interrupted when
  *	   new data is placed.
  *
  *	2. The host guarantees that it will completely drain
  *	   the ring buffer before exiting the read loop. Further,
  *	   once the ring buffer is empty, it will clear the
  *	   interrupt_mask and re-check to see if new data has
  *	   arrived.
  */
 static boolean_t
 hv_ring_buffer_needsig_on_write(
 	uint32_t			old_write_location,
 	hv_vmbus_ring_buffer_info*	rbi)
 {
 	mb();
 	if (rbi->ring_buffer->interrupt_mask)
 		return (FALSE);
 
 	/* Read memory barrier */
 	rmb();
 	/*
 	 * This is the only case we need to signal when the
 	 * ring transitions from being empty to non-empty.
 	 */
 	if (old_write_location == rbi->ring_buffer->read_index)
 		return (TRUE);
 
 	return (FALSE);
 }
 
 static uint32_t	copy_to_ring_buffer(
 			hv_vmbus_ring_buffer_info*	ring_info,
 			uint32_t			start_write_offset,
 			char*				src,
 			uint32_t			src_len);
 
 static uint32_t copy_from_ring_buffer(
 			hv_vmbus_ring_buffer_info*	ring_info,
 			char*				dest,
 			uint32_t			dest_len,
 			uint32_t			start_read_offset);
 
 
 /**
  * @brief Get the interrupt mask for the specified ring buffer.
  */
 uint32_t
 hv_vmbus_get_ring_buffer_interrupt_mask(hv_vmbus_ring_buffer_info *rbi) 
 {
 	return rbi->ring_buffer->interrupt_mask;
 }
 
 /**
  * @brief Initialize the ring buffer.
  */
 int
 hv_vmbus_ring_buffer_init(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	void*				buffer,
 	uint32_t			buffer_len)
 {
 	memset(ring_info, 0, sizeof(hv_vmbus_ring_buffer_info));
 
 	ring_info->ring_buffer = (hv_vmbus_ring_buffer*) buffer;
 	ring_info->ring_buffer->read_index =
 	    ring_info->ring_buffer->write_index = 0;
 
 	ring_info->ring_size = buffer_len;
 	ring_info->ring_data_size = buffer_len - sizeof(hv_vmbus_ring_buffer);
 
 	mtx_init(&ring_info->ring_lock, "vmbus ring buffer", NULL, MTX_SPIN);
 
 	return (0);
 }
 
 /**
  * @brief Cleanup the ring buffer.
  */
 void hv_ring_buffer_cleanup(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	mtx_destroy(&ring_info->ring_lock);
 }
 
 /**
  * @brief Write to the ring buffer.
  */
 int
 hv_ring_buffer_write(
 	hv_vmbus_ring_buffer_info*	out_ring_info,
 	hv_vmbus_sg_buffer_list		sg_buffers[],
 	uint32_t			sg_buffer_count,
 	boolean_t			*need_sig)
 {
 	int i = 0;
 	uint32_t byte_avail_to_write;
 	uint32_t byte_avail_to_read;
 	uint32_t old_write_location;
 	uint32_t total_bytes_to_write = 0;
 
 	volatile uint32_t next_write_location;
 	uint64_t prev_indices = 0;
 
 	for (i = 0; i < sg_buffer_count; i++) {
 	    total_bytes_to_write += sg_buffers[i].length;
 	}
 
 	total_bytes_to_write += sizeof(uint64_t);
 
 	mtx_lock_spin(&out_ring_info->ring_lock);
 
 	get_ring_buffer_avail_bytes(out_ring_info, &byte_avail_to_read,
 	    &byte_avail_to_write);
 
 	/*
 	 * If there is only room for the packet, assume it is full.
 	 * Otherwise, the next time around, we think the ring buffer
 	 * is empty since the read index == write index
 	 */
 
 	if (byte_avail_to_write <= total_bytes_to_write) {
 
 	    mtx_unlock_spin(&out_ring_info->ring_lock);
 	    return (EAGAIN);
 	}
 
 	/*
 	 * Write to the ring buffer
 	 */
 	next_write_location = get_next_write_location(out_ring_info);
 
 	old_write_location = next_write_location;
 
 	for (i = 0; i < sg_buffer_count; i++) {
 	    next_write_location = copy_to_ring_buffer(out_ring_info,
 		next_write_location, (char *) sg_buffers[i].data,
 		sg_buffers[i].length);
 	}
 
 	/*
 	 * Set previous packet start
 	 */
 	prev_indices = get_ring_buffer_indices(out_ring_info);
 
 	next_write_location = copy_to_ring_buffer(
 		out_ring_info, next_write_location,
 		(char *) &prev_indices, sizeof(uint64_t));
 
 	/*
 	 * Full memory barrier before upding the write index. 
 	 */
 	mb();
 
 	/*
 	 * Now, update the write location
 	 */
 	set_next_write_location(out_ring_info, next_write_location);
 
 	mtx_unlock_spin(&out_ring_info->ring_lock);
 
 	*need_sig = hv_ring_buffer_needsig_on_write(old_write_location,
 	    out_ring_info);
 
 	return (0);
 }
 
 /**
  * @brief Read without advancing the read index.
  */
 int
 hv_ring_buffer_peek(
 	hv_vmbus_ring_buffer_info*	in_ring_info,
 	void*				buffer,
 	uint32_t			buffer_len)
 {
 	uint32_t bytesAvailToWrite;
 	uint32_t bytesAvailToRead;
 	uint32_t nextReadLocation = 0;
 
 	mtx_lock_spin(&in_ring_info->ring_lock);
 
 	get_ring_buffer_avail_bytes(in_ring_info, &bytesAvailToRead,
 		&bytesAvailToWrite);
 
 	/*
 	 * Make sure there is something to read
 	 */
 	if (bytesAvailToRead < buffer_len) {
 	    mtx_unlock_spin(&in_ring_info->ring_lock);
 	    return (EAGAIN);
 	}
 
 	/*
 	 * Convert to byte offset
 	 */
 	nextReadLocation = get_next_read_location(in_ring_info);
 
 	nextReadLocation = copy_from_ring_buffer(
 		in_ring_info, (char *)buffer, buffer_len, nextReadLocation);
 
 	mtx_unlock_spin(&in_ring_info->ring_lock);
 
 	return (0);
 }
 
 /**
  * @brief Read and advance the read index.
  */
 int
 hv_ring_buffer_read(
 	hv_vmbus_ring_buffer_info*	in_ring_info,
 	void*				buffer,
 	uint32_t			buffer_len,
 	uint32_t			offset)
 {
 	uint32_t bytes_avail_to_write;
 	uint32_t bytes_avail_to_read;
 	uint32_t next_read_location = 0;
 	uint64_t prev_indices = 0;
 
 	if (buffer_len <= 0)
 	    return (EINVAL);
 
 	mtx_lock_spin(&in_ring_info->ring_lock);
 
 	get_ring_buffer_avail_bytes(
 	    in_ring_info, &bytes_avail_to_read,
 	    &bytes_avail_to_write);
 
 	/*
 	 * Make sure there is something to read
 	 */
 	if (bytes_avail_to_read < buffer_len) {
 	    mtx_unlock_spin(&in_ring_info->ring_lock);
 	    return (EAGAIN);
 	}
 
 	next_read_location = get_next_read_location_with_offset(
 	    in_ring_info,
 	    offset);
 
 	next_read_location = copy_from_ring_buffer(
 	    in_ring_info,
 	    (char *) buffer,
 	    buffer_len,
 	    next_read_location);
 
 	next_read_location = copy_from_ring_buffer(
 	    in_ring_info,
 	    (char *) &prev_indices,
 	    sizeof(uint64_t),
 	    next_read_location);
 
 	/*
 	 * Make sure all reads are done before we update the read index since
 	 * the writer may start writing to the read area once the read index
 	 * is updated.
 	 */
 	wmb();
 
 	/*
 	 * Update the read index
 	 */
 	set_next_read_location(in_ring_info, next_read_location);
 
 	mtx_unlock_spin(&in_ring_info->ring_lock);
 
 	return (0);
 }
 
 /**
  * @brief Helper routine to copy from source to ring buffer.
  *
  * Assume there is enough room. Handles wrap-around in dest case only!
  */
 uint32_t
 copy_to_ring_buffer(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	uint32_t 			start_write_offset,
 	char*				src,
 	uint32_t			src_len)
 {
 	char *ring_buffer = get_ring_buffer(ring_info);
 	uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
 	uint32_t fragLen;
 
 	if (src_len > ring_buffer_size - start_write_offset)  {
 	    /* wrap-around detected! */
 	    fragLen = ring_buffer_size - start_write_offset;
 	    memcpy(ring_buffer + start_write_offset, src, fragLen);
 	    memcpy(ring_buffer, src + fragLen, src_len - fragLen);
 	} else {
 	    memcpy(ring_buffer + start_write_offset, src, src_len);
 	}
 
 	start_write_offset += src_len;
 	start_write_offset %= ring_buffer_size;
 
 	return (start_write_offset);
 }
 
 /**
  * @brief Helper routine to copy to source from ring buffer.
  *
  * Assume there is enough room. Handles wrap-around in src case only!
  */
 uint32_t
 copy_from_ring_buffer(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	char*				dest,
 	uint32_t			dest_len,
 	uint32_t			start_read_offset)
 {
 	uint32_t fragLen;
 	char *ring_buffer = get_ring_buffer(ring_info);
 	uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
 
 	if (dest_len > ring_buffer_size - start_read_offset) {
 	    /*  wrap-around detected at the src */
 	    fragLen = ring_buffer_size - start_read_offset;
 	    memcpy(dest, ring_buffer + start_read_offset, fragLen);
 	    memcpy(dest + fragLen, ring_buffer, dest_len - fragLen);
 	} else {
 	    memcpy(dest, ring_buffer + start_read_offset, dest_len);
 	}
 
 	start_read_offset += dest_len;
 	start_read_offset %= ring_buffer_size;
 
 	return (start_read_offset);
 }
 
Index: head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c	(revision 298445)
+++ head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c	(revision 298446)
@@ -1,650 +1,650 @@
 /*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * VM Bus Driver Implementation
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/proc.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/rtprio.h>
 #include <sys/interrupt.h>
 #include <sys/sx.h>
 #include <sys/taskqueue.h>
 #include <sys/mutex.h>
 #include <sys/smp.h>
 
 #include <machine/resource.h>
 #include <sys/rman.h>
 
 #include <machine/stdarg.h>
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <machine/segments.h>
 #include <sys/pcpu.h>
 #include <x86/apicvar.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include "hv_vmbus_priv.h"
 
 #include <contrib/dev/acpica/include/acpi.h>
 #include "acpi_if.h"
 
 static device_t vmbus_devp;
 static int vmbus_inited;
 static hv_setup_args setup_args; /* only CPU 0 supported at this time */
 
 static char *vmbus_ids[] = { "VMBUS", NULL };
 
 /**
  * @brief Software interrupt thread routine to handle channel messages from
  * the hypervisor.
  */
 static void
 vmbus_msg_swintr(void *arg, int pending __unused)
 {
 	int 			cpu;
 	void*			page_addr;
 	hv_vmbus_channel_msg_header	 *hdr;
 	hv_vmbus_channel_msg_table_entry *entry;
 	hv_vmbus_channel_msg_type msg_type;
 	hv_vmbus_message*	msg;
 
 	cpu = (int)(long)arg;
 	KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: "
 	    "cpu out of range!"));
 
 	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
 	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
 
 	for (;;) {
 		if (msg->header.message_type == HV_MESSAGE_TYPE_NONE)
 			break; /* no message */
 
 		hdr = (hv_vmbus_channel_msg_header *)msg->u.payload;
 		msg_type = hdr->message_type;
 
 		if (msg_type >= HV_CHANNEL_MESSAGE_COUNT) {
 			printf("VMBUS: unknown message type = %d\n", msg_type);
 			goto handled;
 		}
 
 		entry = &g_channel_message_table[msg_type];
 
 		if (entry->messageHandler)
 			entry->messageHandler(hdr);
 handled:
 	    msg->header.message_type = HV_MESSAGE_TYPE_NONE;
 
 	    /*
 	     * Make sure the write to message_type (ie set to
 	     * HV_MESSAGE_TYPE_NONE) happens before we read the
 	     * message_pending and EOMing. Otherwise, the EOMing will
 	     * not deliver any more messages
 	     * since there is no empty slot
 	     *
 	     * NOTE:
 	     * mb() is used here, since atomic_thread_fence_seq_cst()
 	     * will become compiler fence on UP kernel.
 	     */
 	    mb();
 
 	    if (msg->header.message_flags.u.message_pending) {
 			/*
 			 * This will cause message queue rescan to possibly
 			 * deliver another msg from the hypervisor
 			 */
 			wrmsr(HV_X64_MSR_EOM, 0);
 	    }
 	}
 }
 
 /**
  * @brief Interrupt filter routine for VMBUS.
  *
  * The purpose of this routine is to determine the type of VMBUS protocol
  * message to process - an event or a channel message.
  */
 static inline int
 hv_vmbus_isr(struct trapframe *frame)
 {
 	int				cpu;
 	hv_vmbus_message*		msg;
 	void*				page_addr;
 
 	cpu = PCPU_GET(cpuid);
 
 	/*
 	 * The Windows team has advised that we check for events
 	 * before checking for messages. This is the way they do it
 	 * in Windows when running as a guest in Hyper-V
 	 */
 
 	hv_vmbus_on_events(cpu);
 
 	/* Check if there are actual msgs to be process */
 	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
 	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_TIMER_SINT;
 
 	/* we call eventtimer process the message */
 	if (msg->header.message_type == HV_MESSAGE_TIMER_EXPIRED) {
 		msg->header.message_type = HV_MESSAGE_TYPE_NONE;
 
 		/* call intrrupt handler of event timer */
 		hv_et_intr(frame);
 
 		/*
 		 * Make sure the write to message_type (ie set to
 		 * HV_MESSAGE_TYPE_NONE) happens before we read the
 		 * message_pending and EOMing. Otherwise, the EOMing will
 		 * not deliver any more messages
 		 * since there is no empty slot
 		 *
 		 * NOTE:
 		 * mb() is used here, since atomic_thread_fence_seq_cst()
 		 * will become compiler fence on UP kernel.
 		 */
 		mb();
 
 		if (msg->header.message_flags.u.message_pending) {
 			/*
 			 * This will cause message queue rescan to possibly
 			 * deliver another msg from the hypervisor
 			 */
 			wrmsr(HV_X64_MSR_EOM, 0);
 		}
 	}
 
 	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
 	if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
 		taskqueue_enqueue(hv_vmbus_g_context.hv_msg_tq[cpu],
 		    &hv_vmbus_g_context.hv_msg_task[cpu]);
 	}
 
 	return (FILTER_HANDLED);
 }
 
 u_long *hv_vmbus_intr_cpu[MAXCPU];
 
 void
 hv_vector_handler(struct trapframe *trap_frame)
 {
 	int cpu;
 
 	/*
 	 * Disable preemption.
 	 */
 	critical_enter();
 
 	/*
 	 * Do a little interrupt counting.
 	 */
 	cpu = PCPU_GET(cpuid);
 	(*hv_vmbus_intr_cpu[cpu])++;
 
 	hv_vmbus_isr(trap_frame);
 
 	/*
 	 * Enable preemption.
 	 */
 	critical_exit();
 }
 
 static int
 vmbus_read_ivar(
 	device_t	dev,
 	device_t	child,
 	int		index,
 	uintptr_t*	result)
 {
 	struct hv_device *child_dev_ctx = device_get_ivars(child);
 
 	switch (index) {
 
 	case HV_VMBUS_IVAR_TYPE:
 		*result = (uintptr_t) &child_dev_ctx->class_id;
 		return (0);
 	case HV_VMBUS_IVAR_INSTANCE:
 		*result = (uintptr_t) &child_dev_ctx->device_id;
 		return (0);
 	case HV_VMBUS_IVAR_DEVCTX:
 		*result = (uintptr_t) child_dev_ctx;
 		return (0);
 	case HV_VMBUS_IVAR_NODE:
 		*result = (uintptr_t) child_dev_ctx->device;
 		return (0);
 	}
 	return (ENOENT);
 }
 
 static int
 vmbus_write_ivar(
 	device_t	dev,
 	device_t	child,
 	int		index,
 	uintptr_t	value)
 {
 	switch (index) {
 
 	case HV_VMBUS_IVAR_TYPE:
 	case HV_VMBUS_IVAR_INSTANCE:
 	case HV_VMBUS_IVAR_DEVCTX:
 	case HV_VMBUS_IVAR_NODE:
 		/* read-only */
 		return (EINVAL);
 	}
 	return (ENOENT);
 }
 
 static int
 vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
 {
 	char guidbuf[40];
 	struct hv_device *dev_ctx = device_get_ivars(child);
 
 	strlcat(buf, "classid=", buflen);
 	snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->class_id);
 	strlcat(buf, guidbuf, buflen);
 
 	strlcat(buf, " deviceid=", buflen);
 	snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->device_id);
 	strlcat(buf, guidbuf, buflen);
 
 	return (0);
 }
 
 struct hv_device*
 hv_vmbus_child_device_create(
 	hv_guid		type,
 	hv_guid		instance,
 	hv_vmbus_channel*	channel)
 {
 	hv_device* child_dev;
 
 	/*
 	 * Allocate the new child device
 	 */
 	child_dev = malloc(sizeof(hv_device), M_DEVBUF,
 			M_WAITOK |  M_ZERO);
 
 	child_dev->channel = channel;
 	memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
 	memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
 
 	return (child_dev);
 }
 
 int
 snprintf_hv_guid(char *buf, size_t sz, const hv_guid *guid)
 {
 	int cnt;
 	const unsigned char *d = guid->data;
 
 	cnt = snprintf(buf, sz,
 		"%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
 		d[3], d[2], d[1], d[0], d[5], d[4], d[7], d[6],
 		d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]);
 	return (cnt);
 }
 
 int
 hv_vmbus_child_device_register(struct hv_device *child_dev)
 {
 	device_t child;
 
 	if (bootverbose) {
 		char name[40];
 		snprintf_hv_guid(name, sizeof(name), &child_dev->class_id);
 		printf("VMBUS: Class ID: %s\n", name);
 	}
 
 	child = device_add_child(vmbus_devp, NULL, -1);
 	child_dev->device = child;
 	device_set_ivars(child, child_dev);
 
 	return (0);
 }
 
 int
 hv_vmbus_child_device_unregister(struct hv_device *child_dev)
 {
 	int ret = 0;
 	/*
 	 * XXXKYS: Ensure that this is the opposite of
 	 * device_add_child()
 	 */
 	mtx_lock(&Giant);
 	ret = device_delete_child(vmbus_devp, child_dev->device);
 	mtx_unlock(&Giant);
 	return(ret);
 }
 
 static int
 vmbus_probe(device_t dev) {
 	if (ACPI_ID_PROBE(device_get_parent(dev), dev, vmbus_ids) == NULL ||
 	    device_get_unit(dev) != 0)
 		return (ENXIO);
 
 	device_set_desc(dev, "Vmbus Devices");
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 extern inthand_t IDTVEC(hv_vmbus_callback);
 
 /**
  * @brief Main vmbus driver initialization routine.
  *
  * Here, we
  * - initialize the vmbus driver context
  * - setup various driver entry points
  * - invoke the vmbus hv main init routine
  * - get the irq resource
  * - invoke the vmbus to add the vmbus root device
  * - setup the vmbus root device
  * - retrieve the channel offers
  */
 static int
 vmbus_bus_init(void)
 {
 	int i, j, n, ret;
 	char buf[MAXCOMLEN + 1];
 	cpuset_t cpu_mask;
 
 	if (vmbus_inited)
 		return (0);
 
 	vmbus_inited = 1;
 
 	ret = hv_vmbus_init();
 
 	if (ret) {
 		if(bootverbose)
 			printf("Error VMBUS: Hypervisor Initialization Failed!\n");
 		return (ret);
 	}
 
 	/*
 	 * Find a free IDT slot for vmbus callback.
 	 */
 	hv_vmbus_g_context.hv_cb_vector = lapic_ipi_alloc(IDTVEC(hv_vmbus_callback));
 	if (hv_vmbus_g_context.hv_cb_vector < 0) {
 		if(bootverbose)
 			printf("Error VMBUS: Cannot find free IDT slot for "
 			    "vmbus callback!\n");
 		goto cleanup;
 	}
 
 	if(bootverbose)
 		printf("VMBUS: vmbus callback vector %d\n",
 		    hv_vmbus_g_context.hv_cb_vector);
 
 	/*
 	 * Notify the hypervisor of our vector.
 	 */
 	setup_args.vector = hv_vmbus_g_context.hv_cb_vector;
 
 	CPU_FOREACH(j) {
 		snprintf(buf, sizeof(buf), "cpu%d:hyperv", j);
 		intrcnt_add(buf, &hv_vmbus_intr_cpu[j]);
 
 		for (i = 0; i < 2; i++)
 			setup_args.page_buffers[2 * j + i] = NULL;
 	}
 
 	/*
 	 * Per cpu setup.
 	 */
 	CPU_FOREACH(j) {
 		/*
 		 * Setup taskqueue to handle events
 		 */
 		hv_vmbus_g_context.hv_event_queue[j] = taskqueue_create_fast("hyperv event", M_WAITOK,
 			taskqueue_thread_enqueue, &hv_vmbus_g_context.hv_event_queue[j]);
 		CPU_SETOF(j, &cpu_mask);
 		taskqueue_start_threads_cpuset(&hv_vmbus_g_context.hv_event_queue[j], 1, PI_NET, &cpu_mask,
 			"hvevent%d", j);
 
 		/*
 		 * Setup per-cpu tasks and taskqueues to handle msg.
 		 */
 		hv_vmbus_g_context.hv_msg_tq[j] = taskqueue_create_fast(
 		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
 		    &hv_vmbus_g_context.hv_msg_tq[j]);
 		CPU_SETOF(j, &cpu_mask);
 		taskqueue_start_threads_cpuset(&hv_vmbus_g_context.hv_msg_tq[j],
 		    1, PI_NET, &cpu_mask, "hvmsg%d", j);
 		TASK_INIT(&hv_vmbus_g_context.hv_msg_task[j], 0,
 		    vmbus_msg_swintr, (void *)(long)j);
 
 		/*
 		 * Prepare the per cpu msg and event pages to be called on each cpu.
 		 */
 		for(i = 0; i < 2; i++) {
 			setup_args.page_buffers[2 * j + i] =
 				malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
 		}
 	}
 
 	if (bootverbose)
 		printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n",
 		    smp_started);
 
 	smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
 
 	/*
 	 * Connect to VMBus in the root partition
 	 */
 	ret = hv_vmbus_connect();
 
 	if (ret != 0)
 		goto cleanup1;
 
 	hv_vmbus_request_channel_offers();
 
 	vmbus_scan();
 	bus_generic_attach(vmbus_devp);
 	device_printf(vmbus_devp, "device scan, probe and attach done\n");
 
 	return (ret);
 
 	cleanup1:
 	/*
 	 * Free pages alloc'ed
 	 */
 	for (n = 0; n < 2 * MAXCPU; n++)
 		if (setup_args.page_buffers[n] != NULL)
 			free(setup_args.page_buffers[n], M_DEVBUF);
 
 	/*
 	 * remove swi and vmbus callback vector;
 	 */
 	CPU_FOREACH(j) {
 		if (hv_vmbus_g_context.hv_event_queue[j] != NULL) {
 			taskqueue_free(hv_vmbus_g_context.hv_event_queue[j]);
 			hv_vmbus_g_context.hv_event_queue[j] = NULL;
 		}
 	}
 
 	lapic_ipi_free(hv_vmbus_g_context.hv_cb_vector);
 
 	cleanup:
 	hv_vmbus_cleanup();
 
 	return (ret);
 }
 
 static int
 vmbus_attach(device_t dev)
 {
 	if(bootverbose)
 		device_printf(dev, "VMBUS: attach dev: %p\n", dev);
 	vmbus_devp = dev;
 
 	/* 
 	 * If the system has already booted and thread
 	 * scheduling is possible indicated by the global
 	 * cold set to zero, we just call the driver
 	 * initialization directly.
 	 */
 	if (!cold)
 		vmbus_bus_init();
 
 	return (0);
 }
 
 static void
 vmbus_init(void)
 {
 	if (vm_guest != VM_GUEST_HV)
 		return;
 
 	/* 
 	 * If the system has already booted and thread
 	 * scheduling is possible, as indicated by the
 	 * global cold set to zero, we just call the driver
 	 * initialization directly.
 	 */
 	if (!cold) 
 		vmbus_bus_init();
 }
 
 static void
 vmbus_bus_exit(void)
 {
 	int i;
 
 	hv_vmbus_release_unattached_channels();
 	hv_vmbus_disconnect();
 
 	smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
 
 	for(i = 0; i < 2 * MAXCPU; i++) {
 		if (setup_args.page_buffers[i] != NULL)
 			free(setup_args.page_buffers[i], M_DEVBUF);
 	}
 
 	hv_vmbus_cleanup();
 
 	/* remove swi */
 	CPU_FOREACH(i) {
 		if (hv_vmbus_g_context.hv_event_queue[i] != NULL) {
 			taskqueue_free(hv_vmbus_g_context.hv_event_queue[i]);
 			hv_vmbus_g_context.hv_event_queue[i] = NULL;
 		}
 	}
 
 	lapic_ipi_free(hv_vmbus_g_context.hv_cb_vector);
 
 	return;
 }
 
 static void
 vmbus_exit(void)
 {
 	vmbus_bus_exit();
 }
 
 static int
 vmbus_detach(device_t dev)
 {
 	vmbus_exit();
 	return (0);
 }
 
 static void
 vmbus_mod_load(void)
 {
 	if(bootverbose)
 		printf("VMBUS: load\n");
 }
 
 static void
 vmbus_mod_unload(void)
 {
 	if(bootverbose)
 		printf("VMBUS: unload\n");
 }
 
 static int
 vmbus_modevent(module_t mod, int what, void *arg)
 {
 	switch (what) {
 
 	case MOD_LOAD:
 		vmbus_mod_load();
 		break;
 	case MOD_UNLOAD:
 		vmbus_mod_unload();
 		break;
 	}
 
 	return (0);
 }
 
 static device_method_t vmbus_methods[] = {
 	/** Device interface */
 	DEVMETHOD(device_probe, vmbus_probe),
 	DEVMETHOD(device_attach, vmbus_attach),
 	DEVMETHOD(device_detach, vmbus_detach),
 	DEVMETHOD(device_shutdown, bus_generic_shutdown),
 	DEVMETHOD(device_suspend, bus_generic_suspend),
 	DEVMETHOD(device_resume, bus_generic_resume),
 
 	/** Bus interface */
 	DEVMETHOD(bus_add_child, bus_generic_add_child),
 	DEVMETHOD(bus_print_child, bus_generic_print_child),
 	DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
 	DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
 	DEVMETHOD(bus_child_pnpinfo_str, vmbus_child_pnpinfo_str),
 
 	{ 0, 0 } };
 
 static char driver_name[] = "vmbus";
 static driver_t vmbus_driver = { driver_name, vmbus_methods,0, };
 
 
 devclass_t vmbus_devclass;
 
 DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
 MODULE_VERSION(vmbus, 1);
 
 /* We want to be started after SMP is initialized */
 SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL);
 
Index: head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h	(revision 298445)
+++ head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h	(revision 298446)
@@ -1,800 +1,800 @@
 /*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __HYPERV_PRIV_H__
 #define __HYPERV_PRIV_H__
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sema.h>
 
 #include <dev/hyperv/include/hyperv.h>
 
 
 /*
  *  Status codes for hypervisor operations.
  */
 
 typedef uint16_t hv_vmbus_status;
 
 #define HV_MESSAGE_SIZE                 (256)
 #define HV_MESSAGE_PAYLOAD_BYTE_COUNT   (240)
 #define HV_MESSAGE_PAYLOAD_QWORD_COUNT  (30)
 #define HV_ANY_VP                       (0xFFFFFFFF)
 
 /*
  * Synthetic interrupt controller flag constants.
  */
 
 #define HV_EVENT_FLAGS_COUNT        (256 * 8)
 #define HV_EVENT_FLAGS_BYTE_COUNT   (256)
 #define HV_EVENT_FLAGS_DWORD_COUNT  (256 / sizeof(uint32_t))
 
 /**
  * max channel count <== event_flags_dword_count * bit_of_dword
  */
 #define HV_CHANNEL_DWORD_LEN        (32)
 #define HV_CHANNEL_MAX_COUNT        \
 	((HV_EVENT_FLAGS_DWORD_COUNT) * HV_CHANNEL_DWORD_LEN)
 /*
  * MessageId: HV_STATUS_INSUFFICIENT_BUFFERS
  * MessageText:
  *    You did not supply enough message buffers to send a message.
  */
 
 #define HV_STATUS_SUCCESS                ((uint16_t)0)
 #define HV_STATUS_INSUFFICIENT_BUFFERS   ((uint16_t)0x0013)
 
 typedef void (*hv_vmbus_channel_callback)(void *context);
 
 typedef struct {
 	void*		data;
 	uint32_t	length;
 } hv_vmbus_sg_buffer_list;
 
 typedef struct {
 	uint32_t	current_interrupt_mask;
 	uint32_t	current_read_index;
 	uint32_t	current_write_index;
 	uint32_t	bytes_avail_to_read;
 	uint32_t	bytes_avail_to_write;
 } hv_vmbus_ring_buffer_debug_info;
 
 typedef struct {
 	uint32_t 		rel_id;
 	hv_vmbus_channel_state	state;
 	hv_guid			interface_type;
 	hv_guid			interface_instance;
 	uint32_t		monitor_id;
 	uint32_t		server_monitor_pending;
 	uint32_t		server_monitor_latency;
 	uint32_t		server_monitor_connection_id;
 	uint32_t		client_monitor_pending;
 	uint32_t		client_monitor_latency;
 	uint32_t		client_monitor_connection_id;
 	hv_vmbus_ring_buffer_debug_info	inbound;
 	hv_vmbus_ring_buffer_debug_info	outbound;
 } hv_vmbus_channel_debug_info;
 
 typedef union {
 	hv_vmbus_channel_version_supported	version_supported;
 	hv_vmbus_channel_open_result		open_result;
 	hv_vmbus_channel_gpadl_torndown		gpadl_torndown;
 	hv_vmbus_channel_gpadl_created		gpadl_created;
 	hv_vmbus_channel_version_response	version_response;
 } hv_vmbus_channel_msg_response;
 
 /*
  * Represents each channel msg on the vmbus connection
  * This is a variable-size data structure depending on
  * the msg type itself
  */
 typedef struct hv_vmbus_channel_msg_info {
 	/*
 	 * Bookkeeping stuff
 	 */
 	TAILQ_ENTRY(hv_vmbus_channel_msg_info)  msg_list_entry;
 	/*
 	 * So far, this is only used to handle
 	 * gpadl body message
 	 */
 	TAILQ_HEAD(, hv_vmbus_channel_msg_info) sub_msg_list_anchor;
 	/*
 	 * Synchronize the request/response if
 	 * needed.
 	 * KYS: Use a semaphore for now.
 	 * Not perf critical.
 	 */
 	struct sema				wait_sema;
 	hv_vmbus_channel_msg_response		response;
 	uint32_t				message_size;
 	/**
 	 * The channel message that goes out on
 	 *  the "wire". It will contain at
 	 *  minimum the
 	 *  hv_vmbus_channel_msg_header
 	 * header.
 	 */
 	unsigned char 				msg[0];
 } hv_vmbus_channel_msg_info;
 
 /*
  * The format must be the same as hv_vm_data_gpa_direct
  */
 typedef struct hv_vmbus_channel_packet_page_buffer {
 	uint16_t		type;
 	uint16_t		data_offset8;
 	uint16_t		length8;
 	uint16_t		flags;
 	uint64_t		transaction_id;
 	uint32_t		reserved;
 	uint32_t		range_count;
 	hv_vmbus_page_buffer	range[HV_MAX_PAGE_BUFFER_COUNT];
 } __packed hv_vmbus_channel_packet_page_buffer;
 
 /*
  * The format must be the same as hv_vm_data_gpa_direct
  */
 typedef struct hv_vmbus_channel_packet_multipage_buffer {
 	uint16_t 			type;
 	uint16_t 			data_offset8;
 	uint16_t 			length8;
 	uint16_t 			flags;
 	uint64_t			transaction_id;
 	uint32_t 			reserved;
 	uint32_t			range_count; /* Always 1 in this case */
 	hv_vmbus_multipage_buffer	range;
 } __packed hv_vmbus_channel_packet_multipage_buffer;
 
 enum {
 	HV_VMBUS_MESSAGE_CONNECTION_ID	= 1,
 	HV_VMBUS_MESSAGE_PORT_ID	= 1,
 	HV_VMBUS_EVENT_CONNECTION_ID	= 2,
 	HV_VMBUS_EVENT_PORT_ID		= 2,
 	HV_VMBUS_MONITOR_CONNECTION_ID	= 3,
 	HV_VMBUS_MONITOR_PORT_ID	= 3,
 	HV_VMBUS_MESSAGE_SINT		= 2,
 	HV_VMBUS_TIMER_SINT		= 4,
 };
 
 #define HV_PRESENT_BIT		0x80000000
 
 #define HV_HYPERCALL_PARAM_ALIGN sizeof(uint64_t)
 
 typedef struct {
 	uint64_t	guest_id;
 	void*		hypercall_page;
 	hv_bool_uint8_t	syn_ic_initialized;
 
 	hv_vmbus_handle	syn_ic_msg_page[MAXCPU];
 	hv_vmbus_handle	syn_ic_event_page[MAXCPU];
 	/*
 	 * For FreeBSD cpuid to Hyper-V vcpuid mapping.
 	 */
 	uint32_t	hv_vcpu_index[MAXCPU];
 	/*
 	 * Each cpu has its own software interrupt handler for channel
 	 * event and msg handling.
 	 */
 	struct taskqueue		*hv_event_queue[MAXCPU];
 	struct taskqueue		*hv_msg_tq[MAXCPU];
 	struct task			hv_msg_task[MAXCPU];
 	/*
 	 * Host use this vector to interrupt guest for vmbus channel
 	 * event and msg.
 	 */
 	int				hv_cb_vector;
 } hv_vmbus_context;
 
 /*
  * Define hypervisor message types
  */
 typedef enum {
 
 	HV_MESSAGE_TYPE_NONE				= 0x00000000,
 
 	/*
 	 * Memory access messages
 	 */
 	HV_MESSAGE_TYPE_UNMAPPED_GPA			= 0x80000000,
 	HV_MESSAGE_TYPE_GPA_INTERCEPT			= 0x80000001,
 
 	/*
 	 * Timer notification messages
 	 */
 	HV_MESSAGE_TIMER_EXPIRED			= 0x80000010,
 
 	/*
 	 * Error messages
 	 */
 	HV_MESSAGE_TYPE_INVALID_VP_REGISTER_VALUE	= 0x80000020,
 	HV_MESSAGE_TYPE_UNRECOVERABLE_EXCEPTION		= 0x80000021,
 	HV_MESSAGE_TYPE_UNSUPPORTED_FEATURE		= 0x80000022,
 
 	/*
 	 * Trace buffer complete messages
 	 */
 	HV_MESSAGE_TYPE_EVENT_LOG_BUFFER_COMPLETE	= 0x80000040,
 
 	/*
 	 * Platform-specific processor intercept messages
 	 */
 	HV_MESSAGE_TYPE_X64_IO_PORT_INTERCEPT		= 0x80010000,
 	HV_MESSAGE_TYPE_X64_MSR_INTERCEPT		= 0x80010001,
 	HV_MESSAGE_TYPE_X64_CPU_INTERCEPT		= 0x80010002,
 	HV_MESSAGE_TYPE_X64_EXCEPTION_INTERCEPT		= 0x80010003,
 	HV_MESSAGE_TYPE_X64_APIC_EOI			= 0x80010004,
 	HV_MESSAGE_TYPE_X64_LEGACY_FP_ERROR		= 0x80010005
 
 } hv_vmbus_msg_type;
 
 /*
  * Define port identifier type
  */
 typedef union _hv_vmbus_port_id {
 	uint32_t	as_uint32_t;
 	struct {
 		uint32_t	id:24;
 		uint32_t	reserved:8;
 	} u ;
 } hv_vmbus_port_id;
 
 /*
  * Define synthetic interrupt controller message flag
  */
 typedef union {
 	uint8_t	as_uint8_t;
 	struct {
 		uint8_t	message_pending:1;
 		uint8_t	reserved:7;
 	} u;
 } hv_vmbus_msg_flags;
 
 typedef uint64_t hv_vmbus_partition_id;
 
 /*
  * Define synthetic interrupt controller message header
  */
 typedef struct {
 	hv_vmbus_msg_type	message_type;
 	uint8_t			payload_size;
 	hv_vmbus_msg_flags	message_flags;
 	uint8_t			reserved[2];
 	union {
 		hv_vmbus_partition_id	sender;
 		hv_vmbus_port_id	port;
 	} u;
 } hv_vmbus_msg_header;
 
 /*
  *  Define synthetic interrupt controller message format
  */
 typedef struct {
 	hv_vmbus_msg_header	header;
 	union {
 		uint64_t	payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
 	} u ;
 } hv_vmbus_message;
 
 /*
  *  Maximum channels is determined by the size of the interrupt
  *  page which is PAGE_SIZE. 1/2 of PAGE_SIZE is for
  *  send endpoint interrupt and the other is receive
  *  endpoint interrupt.
  *
  *   Note: (PAGE_SIZE >> 1) << 3 allocates 16348 channels
  */
 #define HV_MAX_NUM_CHANNELS			(PAGE_SIZE >> 1) << 3
 
 /*
  * (The value here must be in multiple of 32)
  */
 #define HV_MAX_NUM_CHANNELS_SUPPORTED		256
 
 /*
  * VM Bus connection states
  */
 typedef enum {
 	HV_DISCONNECTED,
 	HV_CONNECTING,
 	HV_CONNECTED,
 	HV_DISCONNECTING
 } hv_vmbus_connect_state;
 
 #define HV_MAX_SIZE_CHANNEL_MESSAGE	HV_MESSAGE_PAYLOAD_BYTE_COUNT
 
 
 typedef struct {
 	hv_vmbus_connect_state			connect_state;
 	uint32_t				next_gpadl_handle;
 	/**
 	 * Represents channel interrupts. Each bit position
 	 * represents a channel.
 	 * When a channel sends an interrupt via VMBUS, it
 	 * finds its bit in the send_interrupt_page, set it and
 	 * calls Hv to generate a port event. The other end
 	 * receives the port event and parse the
 	 * recv_interrupt_page to see which bit is set
 	 */
 	void					*interrupt_page;
 	void					*send_interrupt_page;
 	void					*recv_interrupt_page;
 	/*
 	 * 2 pages - 1st page for parent->child
 	 * notification and 2nd is child->parent
 	 * notification
 	 */
 	void					*monitor_page_1;
 	void					*monitor_page_2;
 	TAILQ_HEAD(, hv_vmbus_channel_msg_info)	channel_msg_anchor;
 	struct mtx				channel_msg_lock;
 	/**
 	 * List of primary channels. Sub channels will be linked
 	 * under their primary channel.
 	 */
 	TAILQ_HEAD(, hv_vmbus_channel)		channel_anchor;
 	struct mtx				channel_lock;
 
 	/**
 	 * channel table for fast lookup through id.
 	*/
 	hv_vmbus_channel                        **channels;
 } hv_vmbus_connection;
 
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint64_t build_number		: 16;
 		uint64_t service_version	: 8; /* Service Pack, etc. */
 		uint64_t minor_version		: 8;
 		uint64_t major_version		: 8;
 		/*
 		 * HV_GUEST_OS_MICROSOFT_IDS (If Vendor=MS)
 		 * HV_GUEST_OS_VENDOR
 		 */
 		uint64_t os_id			: 8;
 		uint64_t vendor_id		: 16;
 	} u;
 } hv_vmbus_x64_msr_guest_os_id_contents;
 
 
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint64_t enable :1;
 		uint64_t reserved :11;
 		uint64_t guest_physical_address :52;
 	} u;
 } hv_vmbus_x64_msr_hypercall_contents;
 
 typedef union {
 	uint32_t as_uint32_t;
 	struct {
 		uint32_t group_enable :4;
 		uint32_t rsvd_z :28;
 	} u;
 } hv_vmbus_monitor_trigger_state;
 
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint32_t pending;
 		uint32_t armed;
 	} u;
 } hv_vmbus_monitor_trigger_group;
 
 typedef struct {
 	hv_vmbus_connection_id	connection_id;
 	uint16_t		flag_number;
 	uint16_t		rsvd_z;
 } hv_vmbus_monitor_parameter;
 
 /*
  * hv_vmbus_monitor_page Layout
  * ------------------------------------------------------
  * | 0   | trigger_state (4 bytes) | Rsvd1 (4 bytes)     |
  * | 8   | trigger_group[0]                              |
  * | 10  | trigger_group[1]                              |
  * | 18  | trigger_group[2]                              |
  * | 20  | trigger_group[3]                              |
  * | 28  | Rsvd2[0]                                      |
  * | 30  | Rsvd2[1]                                      |
  * | 38  | Rsvd2[2]                                      |
  * | 40  | next_check_time[0][0] | next_check_time[0][1] |
  * | ...                                                 |
  * | 240 | latency[0][0..3]                              |
  * | 340 | Rsvz3[0]                                      |
  * | 440 | parameter[0][0]                               |
  * | 448 | parameter[0][1]                               |
  * | ...                                                 |
  * | 840 | Rsvd4[0]                                      |
  * ------------------------------------------------------
  */
 
 typedef struct {
 	hv_vmbus_monitor_trigger_state	trigger_state;
 	uint32_t			rsvd_z1;
 
 	hv_vmbus_monitor_trigger_group	trigger_group[4];
 	uint64_t			rsvd_z2[3];
 
 	int32_t				next_check_time[4][32];
 
 	uint16_t			latency[4][32];
 	uint64_t			rsvd_z3[32];
 
 	hv_vmbus_monitor_parameter	parameter[4][32];
 
 	uint8_t				rsvd_z4[1984];
 } hv_vmbus_monitor_page;
 
 /*
  * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
  * is set by CPUID(HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES).
  */
 typedef enum {
 	HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES			= 0x00000001,
 	HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION		= 0x40000000,
 	HV_CPU_ID_FUNCTION_HV_INTERFACE				= 0x40000001,
 	/*
 	 * The remaining functions depend on the value
 	 * of hv_cpu_id_function_interface
 	 */
 	HV_CPU_ID_FUNCTION_MS_HV_VERSION			= 0x40000002,
 	HV_CPU_ID_FUNCTION_MS_HV_FEATURES			= 0x40000003,
 	HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION	= 0x40000004,
 	HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS		= 0x40000005,
 	HV_CPU_ID_FUNCTION_MS_HV_HARDWARE_FEATURE		= 0x40000006
 } hv_vmbus_cpuid_function;
 
 #define	HV_FEATURE_MSR_TIME_REFCNT	0x0002	/* MSR_TIME_REF_COUNT */
 #define	HV_FEATURE_MSR_SYNIC		0x0004	/* MSRs for SynIC */
 #define	HV_FEATURE_MSR_SYNTIMER		0x0008	/* MSRs for SynTimer */
 #define	HV_FEATURE_MSR_APIC		0x0010	/* MSR_{EOI,ICR,TPR} */
 #define	HV_FEATURE_MSR_HYPERCALL	0x0020	/* MSR_{GUEST_OS_ID,HYPERCALL} */
 #define	HV_FEATURE_MSR_GUEST_IDLE	0x0400	/* MSR_GUEST_IDLE */
 
 #define	HV_PM_FEATURE_CSTATE_MASK	0x000f
 #define	HV_PM_FEATURE_C3_HPET		0x0010	/* C3 requires HPET */
 #define	HV_PM_FEATURE_CSTATE(f)		((f) & HV_PM_FEATURE_CSTATE_MASK)
 
 #define	HV_FEATURE3_MWAIT		0x0001	/* MWAIT */
 #define	HV_FEATURE3_XMM_HYPERCALL	0x0010	/* hypercall input through XMM regs */
 #define	HV_FEATURE3_GUEST_IDLE		0x0020	/* guest idle support */
 #define	HV_FEATURE3_NUMA		0x0080	/* NUMA distance query support */
 #define	HV_FEATURE3_TIME_FREQ		0x0100	/* timer frequency query (TSC, LAPIC) */
 #define	HV_FEATURE3_MSR_CRASH		0x0400	/* MSRs for guest crash */
 
 /*
  * Define the format of the SIMP register
  */
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint64_t simp_enabled	: 1;
 		uint64_t preserved	: 11;
 		uint64_t base_simp_gpa	: 52;
 	} u;
 } hv_vmbus_synic_simp;
 
 /*
  * Define the format of the SIEFP register
  */
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint64_t siefp_enabled	: 1;
 		uint64_t preserved	: 11;
 		uint64_t base_siefp_gpa	: 52;
 	} u;
 } hv_vmbus_synic_siefp;
 
 /*
  * Define synthetic interrupt source
  */
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint64_t vector		: 8;
 		uint64_t reserved1	: 8;
 		uint64_t masked		: 1;
 		uint64_t auto_eoi	: 1;
 		uint64_t reserved2	: 46;
 	} u;
 } hv_vmbus_synic_sint;
 
 /*
  * Timer configuration register.
  */
 union hv_timer_config {
 	uint64_t as_uint64;
 	struct {
 		uint64_t enable:1;
 		uint64_t periodic:1;
 		uint64_t lazy:1;
 		uint64_t auto_enable:1;
 		uint64_t reserved_z0:12;
 		uint64_t sintx:4;
 		uint64_t reserved_z1:44;
 	};
 };
 
 /*
  * Define syn_ic control register
  */
 typedef union _hv_vmbus_synic_scontrol {
     uint64_t as_uint64_t;
     struct {
         uint64_t enable		: 1;
         uint64_t reserved	: 63;
     } u;
 } hv_vmbus_synic_scontrol;
 
 /*
  *  Define the hv_vmbus_post_message hypercall input structure
  */
 typedef struct {
 	hv_vmbus_connection_id	connection_id;
 	uint32_t		reserved;
 	hv_vmbus_msg_type	message_type;
 	uint32_t		payload_size;
 	uint64_t		payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
 } hv_vmbus_input_post_message;
 
 /*
  * Define the synthetic interrupt controller event flags format
  */
 typedef union {
 	uint8_t		flags8[HV_EVENT_FLAGS_BYTE_COUNT];
 	uint32_t	flags32[HV_EVENT_FLAGS_DWORD_COUNT];
 } hv_vmbus_synic_event_flags;
 
 #define HV_X64_CPUID_MIN	(0x40000005)
 #define HV_X64_CPUID_MAX	(0x4000ffff)
 
 /*
  * Declare the MSR used to identify the guest OS
  */
 #define HV_X64_MSR_GUEST_OS_ID	(0x40000000)
 /*
  *  Declare the MSR used to setup pages used to communicate with the hypervisor
  */
 #define HV_X64_MSR_HYPERCALL	(0x40000001)
 /* MSR used to provide vcpu index */
 #define	HV_X64_MSR_VP_INDEX	(0x40000002)
 
 #define HV_X64_MSR_TIME_REF_COUNT      (0x40000020)
 
 /*
  * Define synthetic interrupt controller model specific registers
  */
 #define HV_X64_MSR_SCONTROL   (0x40000080)
 #define HV_X64_MSR_SVERSION   (0x40000081)
 #define HV_X64_MSR_SIEFP      (0x40000082)
 #define HV_X64_MSR_SIMP       (0x40000083)
 #define HV_X64_MSR_EOM        (0x40000084)
 
 #define HV_X64_MSR_SINT0      (0x40000090)
 #define HV_X64_MSR_SINT1      (0x40000091)
 #define HV_X64_MSR_SINT2      (0x40000092)
 #define HV_X64_MSR_SINT3      (0x40000093)
 #define HV_X64_MSR_SINT4      (0x40000094)
 #define HV_X64_MSR_SINT5      (0x40000095)
 #define HV_X64_MSR_SINT6      (0x40000096)
 #define HV_X64_MSR_SINT7      (0x40000097)
 #define HV_X64_MSR_SINT8      (0x40000098)
 #define HV_X64_MSR_SINT9      (0x40000099)
 #define HV_X64_MSR_SINT10     (0x4000009A)
 #define HV_X64_MSR_SINT11     (0x4000009B)
 #define HV_X64_MSR_SINT12     (0x4000009C)
 #define HV_X64_MSR_SINT13     (0x4000009D)
 #define HV_X64_MSR_SINT14     (0x4000009E)
 #define HV_X64_MSR_SINT15     (0x4000009F)
 
 /*
  * Synthetic Timer MSRs. Four timers per vcpu.
  */
 #define HV_X64_MSR_STIMER0_CONFIG		0x400000B0
 #define HV_X64_MSR_STIMER0_COUNT		0x400000B1
 #define HV_X64_MSR_STIMER1_CONFIG		0x400000B2
 #define HV_X64_MSR_STIMER1_COUNT		0x400000B3
 #define HV_X64_MSR_STIMER2_CONFIG		0x400000B4
 #define HV_X64_MSR_STIMER2_COUNT		0x400000B5
 #define HV_X64_MSR_STIMER3_CONFIG		0x400000B6
 #define HV_X64_MSR_STIMER3_COUNT		0x400000B7
 
 /*
  * Declare the various hypercall operations
  */
 typedef enum {
 	HV_CALL_POST_MESSAGE	= 0x005c,
 	HV_CALL_SIGNAL_EVENT	= 0x005d,
 } hv_vmbus_call_code;
 
 /**
  * Global variables
  */
 
 extern hv_vmbus_context		hv_vmbus_g_context;
 extern hv_vmbus_connection	hv_vmbus_g_connection;
 
 extern u_int			hyperv_features;
 extern u_int			hyperv_recommends;
 
 typedef void (*vmbus_msg_handler)(hv_vmbus_channel_msg_header *msg);
 
 typedef struct hv_vmbus_channel_msg_table_entry {
 	hv_vmbus_channel_msg_type    messageType;
 
 	vmbus_msg_handler   messageHandler;
 } hv_vmbus_channel_msg_table_entry;
 
 extern hv_vmbus_channel_msg_table_entry	g_channel_message_table[];
 
 /*
  * Private, VM Bus functions
  */
 struct sysctl_ctx_list;
 struct sysctl_oid_list;
 
 void			hv_ring_buffer_stat(
 				struct sysctl_ctx_list		*ctx,
 				struct sysctl_oid_list		*tree_node,
 				hv_vmbus_ring_buffer_info	*rbi,
 				const char			*desc);
 
 int			hv_vmbus_ring_buffer_init(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				void				*buffer,
 				uint32_t			buffer_len);
 
 void			hv_ring_buffer_cleanup(
 				hv_vmbus_ring_buffer_info	*ring_info);
 
 int			hv_ring_buffer_write(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				hv_vmbus_sg_buffer_list		sg_buffers[],
 				uint32_t			sg_buff_count,
 				boolean_t			*need_sig);
 
 int			hv_ring_buffer_peek(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				void				*buffer,
 				uint32_t			buffer_len);
 
 int			hv_ring_buffer_read(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				void				*buffer,
 				uint32_t			buffer_len,
 				uint32_t			offset);
 
 uint32_t		hv_vmbus_get_ring_buffer_interrupt_mask(
 				hv_vmbus_ring_buffer_info	*ring_info);
 
 void			hv_vmbus_dump_ring_info(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				char				*prefix);
 
 void			hv_ring_buffer_read_begin(
 				hv_vmbus_ring_buffer_info	*ring_info);
 
 uint32_t		hv_ring_buffer_read_end(
 				hv_vmbus_ring_buffer_info	*ring_info);
 
 hv_vmbus_channel*	hv_vmbus_allocate_channel(void);
 void			hv_vmbus_free_vmbus_channel(hv_vmbus_channel *channel);
 int			hv_vmbus_request_channel_offers(void);
 void			hv_vmbus_release_unattached_channels(void);
 int			hv_vmbus_init(void);
 void			hv_vmbus_cleanup(void);
 
 uint16_t		hv_vmbus_post_msg_via_msg_ipc(
 				hv_vmbus_connection_id	connection_id,
 				hv_vmbus_msg_type	message_type,
 				void			*payload,
 				size_t			payload_size);
 
 uint16_t		hv_vmbus_signal_event(void *con_id);
 void			hv_vmbus_synic_init(void *irq_arg);
 void			hv_vmbus_synic_cleanup(void *arg);
 
 struct hv_device*	hv_vmbus_child_device_create(
 				hv_guid			device_type,
 				hv_guid			device_instance,
 				hv_vmbus_channel	*channel);
 
 int			hv_vmbus_child_device_register(
 					struct hv_device *child_dev);
 int			hv_vmbus_child_device_unregister(
 					struct hv_device *child_dev);
 
 /**
  * Connection interfaces
  */
 int			hv_vmbus_connect(void);
 int			hv_vmbus_disconnect(void);
 int			hv_vmbus_post_message(void *buffer, size_t buf_size);
 int			hv_vmbus_set_event(hv_vmbus_channel *channel);
 void			hv_vmbus_on_events(int cpu);
 
 /**
  * Event Timer interfaces
  */
 void			hv_et_init(void);
 void			hv_et_intr(struct trapframe*);
 
 /* Wait for device creation */
 void			vmbus_scan(void);
 
 /*
  * The guest OS needs to register the guest ID with the hypervisor.
  * The guest ID is a 64 bit entity and the structure of this ID is
  * specified in the Hyper-V specification:
  *
  * http://msdn.microsoft.com/en-us/library/windows/
  * hardware/ff542653%28v=vs.85%29.aspx
  *
  * While the current guideline does not specify how FreeBSD guest ID(s)
  * need to be generated, our plan is to publish the guidelines for
  * FreeBSD and other guest operating systems that currently are hosted
  * on Hyper-V. The implementation here conforms to this yet
  * unpublished guidelines.
  *
  * Bit(s)
  * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
  * 62:56 - Os Type; Linux is 0x100, FreeBSD is 0x200
  * 55:48 - Distro specific identification
  * 47:16 - FreeBSD kernel version number
  * 15:0  - Distro specific identification
  *
  */
 
 #define HV_FREEBSD_VENDOR_ID	0x8200
 #define HV_FREEBSD_GUEST_ID	hv_generate_guest_id(0,0)
 
 static inline  uint64_t hv_generate_guest_id(
 	uint8_t distro_id_part1,
 	uint16_t distro_id_part2)
 {
 	uint64_t guest_id;
 	guest_id =  (((uint64_t)HV_FREEBSD_VENDOR_ID) << 48);
 	guest_id |= (((uint64_t)(distro_id_part1)) << 48);
 	guest_id |= (((uint64_t)(__FreeBSD_version)) << 16); /* in param.h */
 	guest_id |= ((uint64_t)(distro_id_part2));
 	return guest_id;
 }
 
 typedef struct {
 	unsigned int	vector;
 	void		*page_buffers[2 * MAXCPU];
 } hv_setup_args;
 
 #endif  /* __HYPERV_PRIV_H__ */