Index: head/sys/dev/hyperv/include/hyperv.h
===================================================================
--- head/sys/dev/hyperv/include/hyperv.h	(revision 296082)
+++ head/sys/dev/hyperv/include/hyperv.h	(revision 296083)
@@ -1,923 +1,921 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /**
  * HyperV definitions for messages that are sent between instances of the
  * Channel Management Library in separate partitions, or in some cases,
  * back to itself.
  */
 
 #ifndef __HYPERV_H__
 #define __HYPERV_H__
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
 #include <sys/queue.h>
 #include <sys/malloc.h>
 #include <sys/kthread.h>
 #include <sys/taskqueue.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/sema.h>
 #include <sys/smp.h>
 #include <sys/mutex.h>
 #include <sys/bus.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <amd64/include/xen/synch_bitops.h>
 #include <amd64/include/atomic.h>
 
 typedef uint8_t	hv_bool_uint8_t;
 
 #define HV_S_OK			0x00000000
 #define HV_E_FAIL		0x80004005
 #define HV_ERROR_NOT_SUPPORTED	0x80070032
 #define HV_ERROR_MACHINE_LOCKED	0x800704F7
 
 /*
  * VMBUS version is 32 bit, upper 16 bit for major_number and lower
  * 16 bit for minor_number.
  *
  * 0.13  --  Windows Server 2008
  * 1.1   --  Windows 7
  * 2.4   --  Windows 8
  * 3.0   --  Windows 8.1
  */
 #define HV_VMBUS_VERSION_WS2008		((0 << 16) | (13))
 #define HV_VMBUS_VERSION_WIN7		((1 << 16) | (1))
 #define HV_VMBUS_VERSION_WIN8		((2 << 16) | (4))
 #define HV_VMBUS_VERSION_WIN8_1		((3 << 16) | (0))
 
 #define HV_VMBUS_VERSION_INVALID	-1
 
 #define HV_VMBUS_VERSION_CURRENT	HV_VMBUS_VERSION_WIN8_1
 
 /*
  * Make maximum size of pipe payload of 16K
  */
 
 #define HV_MAX_PIPE_DATA_PAYLOAD	(sizeof(BYTE) * 16384)
 
 /*
  * Define pipe_mode values
  */
 
 #define HV_VMBUS_PIPE_TYPE_BYTE		0x00000000
 #define HV_VMBUS_PIPE_TYPE_MESSAGE	0x00000004
 
 /*
  * The size of the user defined data buffer for non-pipe offers
  */
 
 #define HV_MAX_USER_DEFINED_BYTES	120
 
 /*
  *  The size of the user defined data buffer for pipe offers
  */
 
 #define HV_MAX_PIPE_USER_DEFINED_BYTES	116
 
 
 #define HV_MAX_PAGE_BUFFER_COUNT	32
 #define HV_MAX_MULTIPAGE_BUFFER_COUNT	32
 
 #define HV_ALIGN_UP(value, align)					\
 		(((value) & (align-1)) ?				\
 		    (((value) + (align-1)) & ~(align-1) ) : (value))
 
 #define HV_ALIGN_DOWN(value, align) ( (value) & ~(align-1) )
 
 #define HV_NUM_PAGES_SPANNED(addr, len)					\
 		((HV_ALIGN_UP(addr+len, PAGE_SIZE) -			\
 		    HV_ALIGN_DOWN(addr, PAGE_SIZE)) >> PAGE_SHIFT )
 
 typedef struct hv_guid {
 	 unsigned char data[16];
 } __packed hv_guid;
 
 #define HV_NIC_GUID							\
 	.data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,	\
 		0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
 
 #define HV_IDE_GUID							\
 	.data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,	\
 		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
 
 #define HV_SCSI_GUID							\
 	.data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,	\
 		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
 
 /*
  * At the center of the Channel Management library is
  * the Channel Offer. This struct contains the
  * fundamental information about an offer.
  */
 
 typedef struct hv_vmbus_channel_offer {
 	hv_guid		interface_type;
 	hv_guid		interface_instance;
 	uint64_t	interrupt_latency_in_100ns_units;
 	uint32_t	interface_revision;
 	uint32_t	server_context_area_size; /* in bytes */
 	uint16_t	channel_flags;
 	uint16_t	mmio_megabytes;		  /* in bytes * 1024 * 1024 */
 	union
 	{
         /*
          * Non-pipes: The user has HV_MAX_USER_DEFINED_BYTES bytes.
          */
 		struct {
 			uint8_t	user_defined[HV_MAX_USER_DEFINED_BYTES];
 		} __packed standard;
 
         /*
          * Pipes: The following structure is an integrated pipe protocol, which
          *        is implemented on top of standard user-defined data. pipe
          *        clients  have HV_MAX_PIPE_USER_DEFINED_BYTES left for their
          *        own use.
          */
 		struct {
 			uint32_t	pipe_mode;
 			uint8_t	user_defined[HV_MAX_PIPE_USER_DEFINED_BYTES];
 		} __packed pipe;
 	} u;
 
 	/*
 	 * Sub_channel_index, newly added in Win8.
 	 */
 	uint16_t	sub_channel_index;
 	uint16_t	padding;
 
 } __packed hv_vmbus_channel_offer;
 
 typedef uint32_t hv_gpadl_handle;
 
 typedef struct {
 	uint16_t type;
 	uint16_t data_offset8;
 	uint16_t length8;
 	uint16_t flags;
 	uint64_t transaction_id;
 } __packed hv_vm_packet_descriptor;
 
 typedef uint32_t hv_previous_packet_offset;
 
 typedef struct {
 	hv_previous_packet_offset	previous_packet_start_offset;
 	hv_vm_packet_descriptor		descriptor;
 } __packed hv_vm_packet_header;
 
 typedef struct {
 	uint32_t byte_count;
 	uint32_t byte_offset;
 } __packed hv_vm_transfer_page;
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint16_t		transfer_page_set_id;
 	hv_bool_uint8_t		sender_owns_set;
 	uint8_t			reserved;
 	uint32_t		range_count;
 	hv_vm_transfer_page	ranges[1];
 } __packed hv_vm_transfer_page_packet_header;
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		gpadl;
 	uint32_t		reserved;
 } __packed hv_vm_gpadl_packet_header;
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		gpadl;
 	uint16_t		transfer_page_set_id;
 	uint16_t		reserved;
 } __packed hv_vm_add_remove_transfer_page_set;
 
 /*
  * This structure defines a range in guest
  * physical space that can be made
  * to look virtually contiguous.
  */
 
 typedef struct {
 	uint32_t byte_count;
 	uint32_t byte_offset;
 	uint64_t pfn_array[0];
 } __packed hv_gpa_range;
 
 /*
  * This is the format for an Establish Gpadl packet, which contains a handle
  * by which this GPADL will be known and a set of GPA ranges associated with
  * it.  This can be converted to a MDL by the guest OS.  If there are multiple
  * GPA ranges, then the resulting MDL will be "chained," representing multiple
  * VA ranges.
  */
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		gpadl;
 	uint32_t		range_count;
 	hv_gpa_range		range[1];
 } __packed hv_vm_establish_gpadl;
 
 /*
  * This is the format for a Teardown Gpadl packet, which indicates that the
  * GPADL handle in the Establish Gpadl packet will never be referenced again.
  */
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		gpadl;
 				/* for alignment to a 8-byte boundary */
 	uint32_t		reserved;
 } __packed hv_vm_teardown_gpadl;
 
 /*
  * This is the format for a GPA-Direct packet, which contains a set of GPA
  * ranges, in addition to commands and/or data.
  */
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		reserved;
 	uint32_t		range_count;
 	hv_gpa_range		range[1];
 } __packed hv_vm_data_gpa_direct;
 
 /*
  * This is the format for a Additional data Packet.
  */
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint64_t		total_bytes;
 	uint32_t		byte_offset;
 	uint32_t		byte_count;
 	uint8_t			data[1];
 } __packed hv_vm_additional_data;
 
 typedef union {
 	hv_vm_packet_descriptor             simple_header;
 	hv_vm_transfer_page_packet_header   transfer_page_header;
 	hv_vm_gpadl_packet_header           gpadl_header;
 	hv_vm_add_remove_transfer_page_set  add_remove_transfer_page_header;
 	hv_vm_establish_gpadl               establish_gpadl_header;
 	hv_vm_teardown_gpadl                teardown_gpadl_header;
 	hv_vm_data_gpa_direct               data_gpa_direct_header;
 } __packed hv_vm_packet_largest_possible_header;
 
 typedef enum {
 	HV_VMBUS_PACKET_TYPE_INVALID				= 0x0,
 	HV_VMBUS_PACKET_TYPES_SYNCH				= 0x1,
 	HV_VMBUS_PACKET_TYPE_ADD_TRANSFER_PAGE_SET		= 0x2,
 	HV_VMBUS_PACKET_TYPE_REMOVE_TRANSFER_PAGE_SET		= 0x3,
 	HV_VMBUS_PACKET_TYPE_ESTABLISH_GPADL			= 0x4,
 	HV_VMBUS_PACKET_TYPE_TEAR_DOWN_GPADL			= 0x5,
 	HV_VMBUS_PACKET_TYPE_DATA_IN_BAND			= 0x6,
 	HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES		= 0x7,
 	HV_VMBUS_PACKET_TYPE_DATA_USING_GPADL			= 0x8,
 	HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT		= 0x9,
 	HV_VMBUS_PACKET_TYPE_CANCEL_REQUEST			= 0xa,
 	HV_VMBUS_PACKET_TYPE_COMPLETION				= 0xb,
 	HV_VMBUS_PACKET_TYPE_DATA_USING_ADDITIONAL_PACKETS	= 0xc,
 	HV_VMBUS_PACKET_TYPE_ADDITIONAL_DATA = 0xd
 } hv_vmbus_packet_type;
 
 #define HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED    1
 
 /*
  * Version 1 messages
  */
 typedef enum {
 	HV_CHANNEL_MESSAGE_INVALID			= 0,
 	HV_CHANNEL_MESSAGE_OFFER_CHANNEL		= 1,
 	HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER	= 2,
 	HV_CHANNEL_MESSAGE_REQUEST_OFFERS		= 3,
 	HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED		= 4,
 	HV_CHANNEL_MESSAGE_OPEN_CHANNEL			= 5,
 	HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT		= 6,
 	HV_CHANNEL_MESSAGE_CLOSE_CHANNEL		= 7,
 	HV_CHANNEL_MESSAGEL_GPADL_HEADER		= 8,
 	HV_CHANNEL_MESSAGE_GPADL_BODY			= 9,
 	HV_CHANNEL_MESSAGE_GPADL_CREATED		= 10,
 	HV_CHANNEL_MESSAGE_GPADL_TEARDOWN		= 11,
 	HV_CHANNEL_MESSAGE_GPADL_TORNDOWN		= 12,
 	HV_CHANNEL_MESSAGE_REL_ID_RELEASED		= 13,
 	HV_CHANNEL_MESSAGE_INITIATED_CONTACT		= 14,
 	HV_CHANNEL_MESSAGE_VERSION_RESPONSE		= 15,
 	HV_CHANNEL_MESSAGE_UNLOAD			= 16,
 	HV_CHANNEL_MESSAGE_COUNT
 } hv_vmbus_channel_msg_type;
 
 typedef struct {
 	hv_vmbus_channel_msg_type	message_type;
 	uint32_t			padding;
 } __packed hv_vmbus_channel_msg_header;
 
 /*
  * Query VMBus Version parameters
  */
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			version;
 } __packed hv_vmbus_channel_query_vmbus_version;
 
 /*
  * VMBus Version Supported parameters
  */
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	hv_bool_uint8_t			version_supported;
 } __packed hv_vmbus_channel_version_supported;
 
 /*
  * Channel Offer parameters
  */
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	hv_vmbus_channel_offer		offer;
 	uint32_t			child_rel_id;
 	uint8_t				monitor_id;
 	/*
 	 * This field has been split into a bit field on Win7
 	 * and higher.
 	 */
 	uint8_t				monitor_allocated:1;
 	uint8_t				reserved:7;
 	/*
 	 * Following fields were added in win7 and higher.
 	 * Make sure to check the version before accessing these fields.
 	 *
 	 * If "is_dedicated_interrupt" is set, we must not set the
 	 * associated bit in the channel bitmap while sending the
 	 * interrupt to the host.
 	 *
 	 * connection_id is used in signaling the host.
 	 */
 	uint16_t			is_dedicated_interrupt:1;
 	uint16_t			reserved1:15;
 	uint32_t			connection_id;
 } __packed hv_vmbus_channel_offer_channel;
 
 /*
  * Rescind Offer parameters
  */
 typedef struct
 {
     hv_vmbus_channel_msg_header	header;
     uint32_t			child_rel_id;
 } __packed hv_vmbus_channel_rescind_offer;
 
 
 /*
  * Request Offer -- no parameters, SynIC message contains the partition ID
  *
  * Set Snoop -- no parameters, SynIC message contains the partition ID
  *
  * Clear Snoop -- no parameters, SynIC message contains the partition ID
  *
  * All Offers Delivered -- no parameters, SynIC message contains the
  * partition ID
  *
  * Flush Client -- no parameters, SynIC message contains the partition ID
  */
 
 
 /*
  * Open Channel parameters
  */
 typedef struct
 {
     hv_vmbus_channel_msg_header header;
 
     /*
      * Identifies the specific VMBus channel that is being opened.
      */
     uint32_t		child_rel_id;
 
     /*
      * ID making a particular open request at a channel offer unique.
      */
     uint32_t		open_id;
 
     /*
      * GPADL for the channel's ring buffer.
      */
     hv_gpadl_handle	ring_buffer_gpadl_handle;
 
     /*
      * Before win8, all incoming channel interrupts are only
      * delivered on cpu 0. Setting this value to 0 would
      * preserve the earlier behavior.
      */
     uint32_t		target_vcpu;
 
     /*
      * The upstream ring buffer begins at offset zero in the memory described
      * by ring_buffer_gpadl_handle. The downstream ring buffer follows it at
      * this offset (in pages).
      */
     uint32_t		downstream_ring_buffer_page_offset;
 
     /*
      * User-specific data to be passed along to the server endpoint.
      */
     uint8_t		user_data[HV_MAX_USER_DEFINED_BYTES];
 
 } __packed hv_vmbus_channel_open_channel;
 
 typedef uint32_t hv_nt_status;
 
 /*
  * Open Channel Result parameters
  */
 typedef struct
 {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 	uint32_t			open_id;
 	hv_nt_status			status;
 } __packed hv_vmbus_channel_open_result;
 
 /*
  * Close channel parameters
  */
 typedef struct
 {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 } __packed hv_vmbus_channel_close_channel;
 
 /*
  * Channel Message GPADL
  */
 #define HV_GPADL_TYPE_RING_BUFFER	1
 #define HV_GPADL_TYPE_SERVER_SAVE_AREA	2
 #define HV_GPADL_TYPE_TRANSACTION	8
 
 /*
  * The number of PFNs in a GPADL message is defined by the number of pages
  * that would be spanned by byte_count and byte_offset.  If the implied number
  * of PFNs won't fit in this packet, there will be a follow-up packet that
  * contains more
  */
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 	uint32_t			gpadl;
 	uint16_t			range_buf_len;
 	uint16_t			range_count;
 	hv_gpa_range			range[0];
 } __packed hv_vmbus_channel_gpadl_header;
 
 /*
  * This is the follow-up packet that contains more PFNs
  */
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			message_number;
 	uint32_t 			gpadl;
 	uint64_t 			pfn[0];
 } __packed hv_vmbus_channel_gpadl_body;
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 	uint32_t			gpadl;
 	uint32_t			creation_status;
 } __packed hv_vmbus_channel_gpadl_created;
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 	uint32_t			gpadl;
 } __packed hv_vmbus_channel_gpadl_teardown;
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			gpadl;
 } __packed hv_vmbus_channel_gpadl_torndown;
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 } __packed hv_vmbus_channel_relid_released;
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			vmbus_version_requested;
 	uint32_t			padding2;
 	uint64_t			interrupt_page;
 	uint64_t			monitor_page_1;
 	uint64_t			monitor_page_2;
 } __packed hv_vmbus_channel_initiate_contact;
 
 typedef struct {
 	hv_vmbus_channel_msg_header header;
 	hv_bool_uint8_t		version_supported;
 } __packed hv_vmbus_channel_version_response;
 
 typedef hv_vmbus_channel_msg_header hv_vmbus_channel_unload;
 
 #define HW_MACADDR_LEN	6
 
 /*
  * Fixme:  Added to quiet "typeof" errors involving hv_vmbus.h when
  * the including C file was compiled with "-std=c99".
  */
 #ifndef typeof
 #define typeof __typeof
 #endif
 
 #ifndef NULL
 #define NULL  (void *)0
 #endif
 
 typedef void *hv_vmbus_handle;
 
 #ifndef CONTAINING_RECORD
 #define CONTAINING_RECORD(address, type, field) ((type *)(	\
 		(uint8_t *)(address) -				\
 		(uint8_t *)(&((type *)0)->field)))
 #endif /* CONTAINING_RECORD */
 
 
 #define container_of(ptr, type, member) ({				\
 		__typeof__( ((type *)0)->member ) *__mptr = (ptr);	\
 		(type *)( (char *)__mptr - offsetof(type,member) );})
 
 enum {
 	HV_VMBUS_IVAR_TYPE,
 	HV_VMBUS_IVAR_INSTANCE,
 	HV_VMBUS_IVAR_NODE,
 	HV_VMBUS_IVAR_DEVCTX
 };
 
 #define HV_VMBUS_ACCESSOR(var, ivar, type) \
 		__BUS_ACCESSOR(vmbus, var, HV_VMBUS, ivar, type)
 
 HV_VMBUS_ACCESSOR(type, TYPE,  const char *)
 HV_VMBUS_ACCESSOR(devctx, DEVCTX,  struct hv_device *)
 
 
 /*
  * Common defines for Hyper-V ICs
  */
 #define HV_ICMSGTYPE_NEGOTIATE		0
 #define HV_ICMSGTYPE_HEARTBEAT		1
 #define HV_ICMSGTYPE_KVPEXCHANGE	2
 #define HV_ICMSGTYPE_SHUTDOWN		3
 #define HV_ICMSGTYPE_TIMESYNC		4
 #define HV_ICMSGTYPE_VSS		5
 
 #define HV_ICMSGHDRFLAG_TRANSACTION	1
 #define HV_ICMSGHDRFLAG_REQUEST		2
 #define HV_ICMSGHDRFLAG_RESPONSE	4
 
 typedef struct hv_vmbus_pipe_hdr {
 	uint32_t flags;
 	uint32_t msgsize;
 } __packed hv_vmbus_pipe_hdr;
 
 typedef struct hv_vmbus_ic_version {
 	uint16_t major;
 	uint16_t minor;
 } __packed hv_vmbus_ic_version;
 
 typedef struct hv_vmbus_icmsg_hdr {
 	hv_vmbus_ic_version	icverframe;
 	uint16_t		icmsgtype;
 	hv_vmbus_ic_version	icvermsg;
 	uint16_t		icmsgsize;
 	uint32_t		status;
 	uint8_t			ictransaction_id;
 	uint8_t			icflags;
 	uint8_t			reserved[2];
 } __packed hv_vmbus_icmsg_hdr;
 
 typedef struct hv_vmbus_icmsg_negotiate {
 	uint16_t		icframe_vercnt;
 	uint16_t		icmsg_vercnt;
 	uint32_t		reserved;
 	hv_vmbus_ic_version	icversion_data[1]; /* any size array */
 } __packed hv_vmbus_icmsg_negotiate;
 
 typedef struct hv_vmbus_shutdown_msg_data {
 	uint32_t		reason_code;
 	uint32_t		timeout_seconds;
 	uint32_t 		flags;
 	uint8_t			display_message[2048];
 } __packed hv_vmbus_shutdown_msg_data;
 
 typedef struct hv_vmbus_heartbeat_msg_data {
 	uint64_t 		seq_num;
 	uint32_t 		reserved[8];
 } __packed hv_vmbus_heartbeat_msg_data;
 
 typedef struct {
 	/*
 	 * offset in bytes from the start of ring data below
 	 */
 	volatile uint32_t       write_index;
 	/*
 	 * offset in bytes from the start of ring data below
 	 */
 	volatile uint32_t       read_index;
 	/*
 	 * NOTE: The interrupt_mask field is used only for channels, but
 	 * vmbus connection also uses this data structure
 	 */
 	volatile uint32_t       interrupt_mask;
 	/* pad it to PAGE_SIZE so that data starts on a page */
 	uint8_t                 reserved[4084];
 
 	/*
 	 * WARNING: Ring data starts here + ring_data_start_offset
 	 *  !!! DO NOT place any fields below this !!!
 	 */
 	uint8_t			buffer[0];	/* doubles as interrupt mask */
 } __packed hv_vmbus_ring_buffer;
 
 typedef struct {
 	int		length;
 	int		offset;
 	uint64_t	pfn;
 } __packed hv_vmbus_page_buffer;
 
 typedef struct {
 	int		length;
 	int		offset;
 	uint64_t	pfn_array[HV_MAX_MULTIPAGE_BUFFER_COUNT];
 } __packed hv_vmbus_multipage_buffer;
 
 typedef struct {
 	hv_vmbus_ring_buffer*	ring_buffer;
 	uint32_t		ring_size;	/* Include the shared header */
 	struct mtx		ring_lock;
 	uint32_t		ring_data_size;	/* ring_size */
 	uint32_t		ring_data_start_offset;
 } hv_vmbus_ring_buffer_info;
 
 typedef void (*hv_vmbus_pfn_channel_callback)(void *context);
 typedef void (*hv_vmbus_sc_creation_callback)(void *context);
 
 typedef enum {
 	HV_CHANNEL_OFFER_STATE,
 	HV_CHANNEL_OPENING_STATE,
 	HV_CHANNEL_OPEN_STATE,
 	HV_CHANNEL_OPENED_STATE,
 	HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE,
 } hv_vmbus_channel_state;
 
 /*
  *  Connection identifier type
  */
 typedef union {
 	uint32_t		as_uint32_t;
 	struct {
 		uint32_t	id:24;
 		uint32_t	reserved:8;
 	} u;
 
 } __packed hv_vmbus_connection_id;
 
 /*
  * Definition of the hv_vmbus_signal_event hypercall input structure
  */
 typedef struct {
 	hv_vmbus_connection_id	connection_id;
 	uint16_t		flag_number;
 	uint16_t		rsvd_z;
 } __packed hv_vmbus_input_signal_event;
 
 typedef struct {
 	uint64_t			align8;
 	hv_vmbus_input_signal_event	event;
 } __packed hv_vmbus_input_signal_event_buffer;
 
 typedef struct hv_vmbus_channel {
 	TAILQ_ENTRY(hv_vmbus_channel)	list_entry;
 	struct hv_device*		device;
 	hv_vmbus_channel_state		state;
 	hv_vmbus_channel_offer_channel	offer_msg;
 	/*
 	 * These are based on the offer_msg.monitor_id.
 	 * Save it here for easy access.
 	 */
 	uint8_t				monitor_group;
 	uint8_t				monitor_bit;
 
 	uint32_t			ring_buffer_gpadl_handle;
 	/*
 	 * Allocated memory for ring buffer
 	 */
 	void*				ring_buffer_pages;
 	unsigned long			ring_buffer_size;
 	uint32_t			ring_buffer_page_count;
 	/*
 	 * send to parent
 	 */
 	hv_vmbus_ring_buffer_info	outbound;
 	/*
 	 * receive from parent
 	 */
 	hv_vmbus_ring_buffer_info	inbound;
 
-	struct mtx			inbound_lock;
-
 	struct taskqueue *		rxq;
 	struct task			channel_task;
 	hv_vmbus_pfn_channel_callback	on_channel_callback;
 	void*				channel_callback_context;
 
 	/*
 	 * If batched_reading is set to "true", mask the interrupt
 	 * and read until the channel is empty.
 	 * If batched_reading is set to "false", the channel is not
 	 * going to perform batched reading.
 	 *
 	 * Batched reading is enabled by default; specific
 	 * drivers that don't want this behavior can turn it off.
 	 */
 	boolean_t			batched_reading;
 
 	boolean_t			is_dedicated_interrupt;
 
 	/*
 	 * Used as an input param for HV_CALL_SIGNAL_EVENT hypercall.
 	 */
 	hv_vmbus_input_signal_event_buffer	signal_event_buffer;
 	/*
 	 * 8-bytes aligned of the buffer above
 	 */
 	hv_vmbus_input_signal_event	*signal_event_param;
 
 	/*
 	 * From Win8, this field specifies the target virtual process
 	 * on which to deliver the interupt from the host to guest.
 	 * Before Win8, all channel interrupts would only be
 	 * delivered on cpu 0. Setting this value to 0 would preserve
 	 * the earlier behavior.
 	 */
 	uint32_t			target_vcpu;
 	/* The corresponding CPUID in the guest */
 	uint32_t			target_cpu;
 
 	/*
 	 * Support for multi-channels.
 	 * The initial offer is considered the primary channel and this
 	 * offer message will indicate if the host supports multi-channels.
 	 * The guest is free to ask for multi-channels to be offerred and can
 	 * open these multi-channels as a normal "primary" channel. However,
 	 * all multi-channels will have the same type and instance guids as the
 	 * primary channel. Requests sent on a given channel will result in a
 	 * response on the same channel.
 	 */
 
 	/*
 	 * Multi-channel creation callback. This callback will be called in
 	 * process context when a Multi-channel offer is received from the host.
 	 * The guest can open the Multi-channel in the context of this callback.
 	 */
 	hv_vmbus_sc_creation_callback	sc_creation_callback;
 
 	struct mtx			sc_lock;
 
 	/*
 	 * Link list of all the multi-channels if this is a primary channel
 	 */
 	TAILQ_HEAD(, hv_vmbus_channel)	sc_list_anchor;
 	TAILQ_ENTRY(hv_vmbus_channel)	sc_list_entry;
 
 	/*
 	 * The primary channel this sub-channle belongs to.
 	 * This will be NULL for the primary channel.
 	 */
 	struct hv_vmbus_channel		*primary_channel;
 	/*
 	 * Support per channel state for use by vmbus drivers.
 	 */
 	void				*per_channel_state;
 } hv_vmbus_channel;
 
 static inline void
 hv_set_channel_read_state(hv_vmbus_channel* channel, boolean_t state)
 {
 	channel->batched_reading = state;
 }
 
 typedef struct hv_device {
 	hv_guid		    class_id;
 	hv_guid		    device_id;
 	device_t	    device;
 	hv_vmbus_channel*   channel;
 } hv_device;
 
 
 
 int		hv_vmbus_channel_recv_packet(
 				hv_vmbus_channel*	channel,
 				void*			buffer,
 				uint32_t		buffer_len,
 				uint32_t*		buffer_actual_len,
 				uint64_t*		request_id);
 
 int		hv_vmbus_channel_recv_packet_raw(
 				hv_vmbus_channel*	channel,
 				void*			buffer,
 				uint32_t		buffer_len,
 				uint32_t*		buffer_actual_len,
 				uint64_t*		request_id);
 
 int		hv_vmbus_channel_open(
 				hv_vmbus_channel*	channel,
 				uint32_t		send_ring_buffer_size,
 				uint32_t		recv_ring_buffer_size,
 				void*			user_data,
 				uint32_t		user_data_len,
 				hv_vmbus_pfn_channel_callback
 							pfn_on_channel_callback,
 				void*			context);
 
 void		hv_vmbus_channel_close(hv_vmbus_channel *channel);
 
 int		hv_vmbus_channel_send_packet(
 				hv_vmbus_channel*	channel,
 				void*			buffer,
 				uint32_t		buffer_len,
 				uint64_t		request_id,
 				hv_vmbus_packet_type	type,
 				uint32_t		flags);
 
 int		hv_vmbus_channel_send_packet_pagebuffer(
 				hv_vmbus_channel*	channel,
 				hv_vmbus_page_buffer	page_buffers[],
 				uint32_t		page_count,
 				void*			buffer,
 				uint32_t		buffer_len,
 				uint64_t		request_id);
 
 int		hv_vmbus_channel_send_packet_multipagebuffer(
 				hv_vmbus_channel*	    channel,
 				hv_vmbus_multipage_buffer*  multi_page_buffer,
 				void*			    buffer,
 				uint32_t		    buffer_len,
 				uint64_t		    request_id);
 
 int		hv_vmbus_channel_establish_gpadl(
 				hv_vmbus_channel*	channel,
 				/* must be phys and virt contiguous */
 				void*			contig_buffer,
 				/*  page-size multiple	*/
 				uint32_t		size,
 				uint32_t*		gpadl_handle);
 
 int		hv_vmbus_channel_teardown_gpdal(
 				hv_vmbus_channel*	channel,
 				uint32_t		gpadl_handle);
 
 struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
 
 /**
  * @brief Get physical address from virtual
  */
 static inline unsigned long
 hv_get_phys_addr(void *virt)
 {
 	unsigned long ret;
 	ret = (vtophys(virt) | ((vm_offset_t) virt & PAGE_MASK));
 	return (ret);
 }
 
 extern uint32_t hv_vmbus_protocal_version;
 #endif  /* __HYPERV_H__ */
Index: head/sys/dev/hyperv/netvsc/hv_net_vsc.c
===================================================================
--- head/sys/dev/hyperv/netvsc/hv_net_vsc.c	(revision 296082)
+++ head/sys/dev/hyperv/netvsc/hv_net_vsc.c	(revision 296083)
@@ -1,1041 +1,1039 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /**
  * HyperV vmbus network VSC (virtual services client) module
  *
  */
 
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/lock.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <machine/bus.h>
 #include <machine/atomic.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include "hv_net_vsc.h"
 #include "hv_rndis.h"
 #include "hv_rndis_filter.h"
 
 MALLOC_DEFINE(M_NETVSC, "netvsc", "Hyper-V netvsc driver");
 
 /*
  * Forward declarations
  */
 static void hv_nv_on_channel_callback(void *context);
 static int  hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device);
 static int  hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device);
 static int  hv_nv_destroy_send_buffer(netvsc_dev *net_dev);
 static int  hv_nv_destroy_rx_buffer(netvsc_dev *net_dev);
 static int  hv_nv_connect_to_vsp(struct hv_device *device);
 static void hv_nv_on_send_completion(netvsc_dev *net_dev,
     struct hv_device *device, hv_vm_packet_descriptor *pkt);
 static void hv_nv_on_receive(netvsc_dev *net_dev,
     struct hv_device *device, hv_vm_packet_descriptor *pkt);
 
 /*
  *
  */
 static inline netvsc_dev *
 hv_nv_alloc_net_device(struct hv_device *device)
 {
 	netvsc_dev *net_dev;
 	hn_softc_t *sc = device_get_softc(device->device);
 
 	net_dev = malloc(sizeof(netvsc_dev), M_NETVSC, M_WAITOK | M_ZERO);
 
 	net_dev->dev = device;
 	net_dev->destroy = FALSE;
 	sc->net_dev = net_dev;
 
 	return (net_dev);
 }
 
 /*
  *
  */
 static inline netvsc_dev *
 hv_nv_get_outbound_net_device(struct hv_device *device)
 {
 	hn_softc_t *sc = device_get_softc(device->device);
 	netvsc_dev *net_dev = sc->net_dev;;
 
 	if ((net_dev != NULL) && net_dev->destroy) {
 		return (NULL);
 	}
 
 	return (net_dev);
 }
 
 /*
  *
  */
 static inline netvsc_dev *
 hv_nv_get_inbound_net_device(struct hv_device *device)
 {
 	hn_softc_t *sc = device_get_softc(device->device);
 	netvsc_dev *net_dev = sc->net_dev;;
 
 	if (net_dev == NULL) {
 		return (net_dev);
 	}
 	/*
 	 * When the device is being destroyed; we only
 	 * permit incoming packets if and only if there
 	 * are outstanding sends.
 	 */
 	if (net_dev->destroy && net_dev->num_outstanding_sends == 0) {
 		return (NULL);
 	}
 
 	return (net_dev);
 }
 
 int
 hv_nv_get_next_send_section(netvsc_dev *net_dev)
 {
 	unsigned long bitsmap_words = net_dev->bitsmap_words;
 	unsigned long *bitsmap = net_dev->send_section_bitsmap;
 	unsigned long idx;
 	int ret = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
 	int i;
 
 	for (i = 0; i < bitsmap_words; i++) {
 		idx = ffsl(~bitsmap[i]);
 		if (0 == idx)
 			continue;
 
 		idx--;
 		KASSERT(i * BITS_PER_LONG + idx < net_dev->send_section_count,
 		    ("invalid i %d and idx %lu", i, idx));
 
 		if (atomic_testandset_long(&bitsmap[i], idx))
 			continue;
 
 		ret = i * BITS_PER_LONG + idx;
 		break;
 	}
 
 	return (ret);
 }
 
 /*
  * Net VSC initialize receive buffer with net VSP
  * 
  * Net VSP:  Network virtual services client, also known as the
  *     Hyper-V extensible switch and the synthetic data path.
  */
 static int 
 hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device)
 {
 	netvsc_dev *net_dev;
 	nvsp_msg *init_pkt;
 	int ret = 0;
 
 	net_dev = hv_nv_get_outbound_net_device(device);
 	if (!net_dev) {
 		return (ENODEV);
 	}
 
 	net_dev->rx_buf = contigmalloc(net_dev->rx_buf_size, M_NETVSC,
 	    M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
 
 	/*
 	 * Establish the GPADL handle for this buffer on this channel.
 	 * Note:  This call uses the vmbus connection rather than the
 	 * channel to establish the gpadl handle. 
 	 * GPADL:  Guest physical address descriptor list.
 	 */
 	ret = hv_vmbus_channel_establish_gpadl(
 		device->channel, net_dev->rx_buf,
 		net_dev->rx_buf_size, &net_dev->rx_buf_gpadl_handle);
 	if (ret != 0) {
 		goto cleanup;
 	}
 	
 	/* sema_wait(&ext->channel_init_sema); KYS CHECK */
 
 	/* Notify the NetVsp of the gpadl handle */
 	init_pkt = &net_dev->channel_init_packet;
 
 	memset(init_pkt, 0, sizeof(nvsp_msg));
 
 	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_rx_buf;
 	init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
 	    net_dev->rx_buf_gpadl_handle;
 	init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
 	    NETVSC_RECEIVE_BUFFER_ID;
 
 	/* Send the gpadl notification request */
 
 	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
 	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	sema_wait(&net_dev->channel_init_sema);
 
 	/* Check the response */
 	if (init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.status
 	    != nvsp_status_success) {
 		ret = EINVAL;
 		goto cleanup;
 	}
 
 	net_dev->rx_section_count =
 	    init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections;
 
 	net_dev->rx_sections = malloc(net_dev->rx_section_count *
 	    sizeof(nvsp_1_rx_buf_section), M_NETVSC, M_WAITOK);
 	memcpy(net_dev->rx_sections, 
 	    init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections,
 	    net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section));
 
 
 	/*
 	 * For first release, there should only be 1 section that represents
 	 * the entire receive buffer
 	 */
 	if (net_dev->rx_section_count != 1
 	    || net_dev->rx_sections->offset != 0) {
 		ret = EINVAL;
 		goto cleanup;
 	}
 
 	goto exit;
 
 cleanup:
 	hv_nv_destroy_rx_buffer(net_dev);
 	
 exit:
 	return (ret);
 }
 
 /*
  * Net VSC initialize send buffer with net VSP
  */
 static int 
 hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device)
 {
 	netvsc_dev *net_dev;
 	nvsp_msg *init_pkt;
 	int ret = 0;
 
 	net_dev = hv_nv_get_outbound_net_device(device);
 	if (!net_dev) {
 		return (ENODEV);
 	}
 
 	net_dev->send_buf  = contigmalloc(net_dev->send_buf_size, M_NETVSC,
 	    M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
 	if (net_dev->send_buf == NULL) {
 		ret = ENOMEM;
 		goto cleanup;
 	}
 
 	/*
 	 * Establish the gpadl handle for this buffer on this channel.
 	 * Note:  This call uses the vmbus connection rather than the
 	 * channel to establish the gpadl handle. 
 	 */
 	ret = hv_vmbus_channel_establish_gpadl(device->channel,
   	    net_dev->send_buf, net_dev->send_buf_size,
 	    &net_dev->send_buf_gpadl_handle);
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	/* Notify the NetVsp of the gpadl handle */
 
 	init_pkt = &net_dev->channel_init_packet;
 
 	memset(init_pkt, 0, sizeof(nvsp_msg));
 
 	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_send_buf;
 	init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
 	    net_dev->send_buf_gpadl_handle;
 	init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
 	    NETVSC_SEND_BUFFER_ID;
 
 	/* Send the gpadl notification request */
 
 	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
   	    sizeof(nvsp_msg), (uint64_t)init_pkt,
 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	sema_wait(&net_dev->channel_init_sema);
 
 	/* Check the response */
 	if (init_pkt->msgs.vers_1_msgs.send_send_buf_complete.status
 	    != nvsp_status_success) {
 		ret = EINVAL;
 		goto cleanup;
 	}
 
 	net_dev->send_section_size =
 	    init_pkt->msgs.vers_1_msgs.send_send_buf_complete.section_size;
 	net_dev->send_section_count =
 	    net_dev->send_buf_size / net_dev->send_section_size;
 	net_dev->bitsmap_words = howmany(net_dev->send_section_count,
 	    BITS_PER_LONG);
 	net_dev->send_section_bitsmap =
 	    malloc(net_dev->bitsmap_words * sizeof(long), M_NETVSC,
 	    M_WAITOK | M_ZERO);
 
 	goto exit;
 
 cleanup:
 	hv_nv_destroy_send_buffer(net_dev);
 	
 exit:
 	return (ret);
 }
 
 /*
  * Net VSC destroy receive buffer
  */
 static int
 hv_nv_destroy_rx_buffer(netvsc_dev *net_dev)
 {
 	nvsp_msg *revoke_pkt;
 	int ret = 0;
 
 	/*
 	 * If we got a section count, it means we received a
 	 * send_rx_buf_complete msg 
 	 * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
 	 * we need to send a revoke msg here
 	 */
 	if (net_dev->rx_section_count) {
 		/* Send the revoke receive buffer */
 		revoke_pkt = &net_dev->revoke_packet;
 		memset(revoke_pkt, 0, sizeof(nvsp_msg));
 
 		revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_rx_buf;
 		revoke_pkt->msgs.vers_1_msgs.revoke_rx_buf.id =
 		    NETVSC_RECEIVE_BUFFER_ID;
 
 		ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
 		    revoke_pkt, sizeof(nvsp_msg),
 		    (uint64_t)(uintptr_t)revoke_pkt,
 		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 
 		/*
 		 * If we failed here, we might as well return and have a leak 
 		 * rather than continue and a bugchk
 		 */
 		if (ret != 0) {
 			return (ret);
 		}
 	}
 		
 	/* Tear down the gpadl on the vsp end */
 	if (net_dev->rx_buf_gpadl_handle) {
 		ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
 		    net_dev->rx_buf_gpadl_handle);
 		/*
 		 * If we failed here, we might as well return and have a leak 
 		 * rather than continue and a bugchk
 		 */
 		if (ret != 0) {
 			return (ret);
 		}
 		net_dev->rx_buf_gpadl_handle = 0;
 	}
 
 	if (net_dev->rx_buf) {
 		/* Free up the receive buffer */
 		contigfree(net_dev->rx_buf, net_dev->rx_buf_size, M_NETVSC);
 		net_dev->rx_buf = NULL;
 	}
 
 	if (net_dev->rx_sections) {
 		free(net_dev->rx_sections, M_NETVSC);
 		net_dev->rx_sections = NULL;
 		net_dev->rx_section_count = 0;
 	}
 
 	return (ret);
 }
 
 /*
  * Net VSC destroy send buffer
  */
 static int
 hv_nv_destroy_send_buffer(netvsc_dev *net_dev)
 {
 	nvsp_msg *revoke_pkt;
 	int ret = 0;
 
 	/*
 	 * If we got a section count, it means we received a
 	 * send_rx_buf_complete msg 
 	 * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
 	 * we need to send a revoke msg here
 	 */
 	if (net_dev->send_section_size) {
 		/* Send the revoke send buffer */
 		revoke_pkt = &net_dev->revoke_packet;
 		memset(revoke_pkt, 0, sizeof(nvsp_msg));
 
 		revoke_pkt->hdr.msg_type =
 		    nvsp_msg_1_type_revoke_send_buf;
 		revoke_pkt->msgs.vers_1_msgs.revoke_send_buf.id =
 		    NETVSC_SEND_BUFFER_ID;
 
 		ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
 		    revoke_pkt, sizeof(nvsp_msg),
 		    (uint64_t)(uintptr_t)revoke_pkt,
 		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 		/*
 		 * If we failed here, we might as well return and have a leak 
 		 * rather than continue and a bugchk
 		 */
 		if (ret != 0) {
 			return (ret);
 		}
 	}
 		
 	/* Tear down the gpadl on the vsp end */
 	if (net_dev->send_buf_gpadl_handle) {
 		ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
 		    net_dev->send_buf_gpadl_handle);
 
 		/*
 		 * If we failed here, we might as well return and have a leak 
 		 * rather than continue and a bugchk
 		 */
 		if (ret != 0) {
 			return (ret);
 		}
 		net_dev->send_buf_gpadl_handle = 0;
 	}
 
 	if (net_dev->send_buf) {
 		/* Free up the receive buffer */
 		contigfree(net_dev->send_buf, net_dev->send_buf_size, M_NETVSC);
 		net_dev->send_buf = NULL;
 	}
 
 	if (net_dev->send_section_bitsmap) {
 		free(net_dev->send_section_bitsmap, M_NETVSC);
 	}
 
 	return (ret);
 }
 
 
 /*
  * Attempt to negotiate the caller-specified NVSP version
  *
  * For NVSP v2, Server 2008 R2 does not set
  * init_pkt->msgs.init_msgs.init_compl.negotiated_prot_vers
  * to the negotiated version, so we cannot rely on that.
  */
 static int
 hv_nv_negotiate_nvsp_protocol(struct hv_device *device, netvsc_dev *net_dev,
     uint32_t nvsp_ver)
 {
 	nvsp_msg *init_pkt;
 	int ret;
 
 	init_pkt = &net_dev->channel_init_packet;
 	memset(init_pkt, 0, sizeof(nvsp_msg));
 	init_pkt->hdr.msg_type = nvsp_msg_type_init;
 
 	/*
 	 * Specify parameter as the only acceptable protocol version
 	 */
 	init_pkt->msgs.init_msgs.init.p1.protocol_version = nvsp_ver;
 	init_pkt->msgs.init_msgs.init.protocol_version_2 = nvsp_ver;
 
 	/* Send the init request */
 	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
 	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	if (ret != 0)
 		return (-1);
 
 	sema_wait(&net_dev->channel_init_sema);
 
 	if (init_pkt->msgs.init_msgs.init_compl.status != nvsp_status_success)
 		return (EINVAL);
 
 	return (0);
 }
 
 /*
  * Send NDIS version 2 config packet containing MTU.
  *
  * Not valid for NDIS version 1.
  */
 static int
 hv_nv_send_ndis_config(struct hv_device *device, uint32_t mtu)
 {
 	netvsc_dev *net_dev;
 	nvsp_msg *init_pkt;
 	int ret;
 
 	net_dev = hv_nv_get_outbound_net_device(device);
 	if (!net_dev)
 		return (-ENODEV);
 
 	/*
 	 * Set up configuration packet, write MTU
 	 * Indicate we are capable of handling VLAN tags
 	 */
 	init_pkt = &net_dev->channel_init_packet;
 	memset(init_pkt, 0, sizeof(nvsp_msg));
 	init_pkt->hdr.msg_type = nvsp_msg_2_type_send_ndis_config;
 	init_pkt->msgs.vers_2_msgs.send_ndis_config.mtu = mtu;
 	init_pkt->
 		msgs.vers_2_msgs.send_ndis_config.capabilities.u1.u2.ieee8021q
 		= 1;
 
 	/* Send the configuration packet */
 	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
 	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 	if (ret != 0)
 		return (-EINVAL);
 
 	return (0);
 }
 
 /*
  * Net VSC connect to VSP
  */
 static int
 hv_nv_connect_to_vsp(struct hv_device *device)
 {
 	netvsc_dev *net_dev;
 	nvsp_msg *init_pkt;
 	uint32_t ndis_version;
 	uint32_t protocol_list[] = { NVSP_PROTOCOL_VERSION_1,
 	    NVSP_PROTOCOL_VERSION_2,
 	    NVSP_PROTOCOL_VERSION_4,
 	    NVSP_PROTOCOL_VERSION_5 };
 	int i;
 	int protocol_number = nitems(protocol_list);
 	int ret = 0;
 	device_t dev = device->device;
 	hn_softc_t *sc = device_get_softc(dev);
 	struct ifnet *ifp = sc->hn_ifp;
 
 	net_dev = hv_nv_get_outbound_net_device(device);
 	if (!net_dev) {
 		return (ENODEV);
 	}
 
 	/*
 	 * Negotiate the NVSP version.  Try the latest NVSP first.
 	 */
 	for (i = protocol_number - 1; i >= 0; i--) {
 		if (hv_nv_negotiate_nvsp_protocol(device, net_dev,
 		    protocol_list[i]) == 0) {
 			net_dev->nvsp_version = protocol_list[i];
 			if (bootverbose)
 				device_printf(dev, "Netvsc: got version 0x%x\n",
 				    net_dev->nvsp_version);
 			break;
 		}
 	}
 
 	if (i < 0) {
 		if (bootverbose)
 			device_printf(dev, "failed to negotiate a valid "
 			    "protocol.\n");
 		return (EPROTO);
 	}
 
 	/*
 	 * Set the MTU if supported by this NVSP protocol version
 	 * This needs to be right after the NVSP init message per Haiyang
 	 */
 	if (net_dev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
 		ret = hv_nv_send_ndis_config(device, ifp->if_mtu);
 
 	/*
 	 * Send the NDIS version
 	 */
 	init_pkt = &net_dev->channel_init_packet;
 
 	memset(init_pkt, 0, sizeof(nvsp_msg));
 
 	if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_4) {
 		ndis_version = NDIS_VERSION_6_1;
 	} else {
 		ndis_version = NDIS_VERSION_6_30;
 	}
 
 	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_ndis_vers;
 	init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_major_vers =
 	    (ndis_version & 0xFFFF0000) >> 16;
 	init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_minor_vers =
 	    ndis_version & 0xFFFF;
 
 	/* Send the init request */
 
 	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
 	    sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 	if (ret != 0) {
 		goto cleanup;
 	}
 	/*
 	 * TODO:  BUGBUG - We have to wait for the above msg since the netvsp
 	 * uses KMCL which acknowledges packet (completion packet) 
 	 * since our Vmbus always set the
 	 * HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED flag
 	 */
 	/* sema_wait(&NetVscChannel->channel_init_sema); */
 
 	/* Post the big receive buffer to NetVSP */
 	if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
 		net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
 	else
 		net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
 	net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
 
 	ret = hv_nv_init_rx_buffer_with_net_vsp(device);
 	if (ret == 0)
 		ret = hv_nv_init_send_buffer_with_net_vsp(device);
 
 cleanup:
 	return (ret);
 }
 
 /*
  * Net VSC disconnect from VSP
  */
 static void
 hv_nv_disconnect_from_vsp(netvsc_dev *net_dev)
 {
 	hv_nv_destroy_rx_buffer(net_dev);
 	hv_nv_destroy_send_buffer(net_dev);
 }
 
 /*
  * Net VSC on device add
  * 
  * Callback when the device belonging to this driver is added
  */
 netvsc_dev *
 hv_nv_on_device_add(struct hv_device *device, void *additional_info)
 {
 	netvsc_dev *net_dev;
 	int ret = 0;
 
 	net_dev = hv_nv_alloc_net_device(device);
 	if (!net_dev)
 		goto cleanup;
 
 	/* Initialize the NetVSC channel extension */
 
 	sema_init(&net_dev->channel_init_sema, 0, "netdev_sema");
 
 	/*
 	 * Open the channel
 	 */
 	ret = hv_vmbus_channel_open(device->channel,
 	    NETVSC_DEVICE_RING_BUFFER_SIZE, NETVSC_DEVICE_RING_BUFFER_SIZE,
 	    NULL, 0, hv_nv_on_channel_callback, device);
 	if (ret != 0)
 		goto cleanup;
 
 	/*
 	 * Connect with the NetVsp
 	 */
 	ret = hv_nv_connect_to_vsp(device);
 	if (ret != 0)
 		goto close;
 
 	return (net_dev);
 
 close:
 	/* Now, we can close the channel safely */
 
 	hv_vmbus_channel_close(device->channel);
 
 cleanup:
 	/*
 	 * Free the packet buffers on the netvsc device packet queue.
 	 * Release other resources.
 	 */
 	if (net_dev) {
 		sema_destroy(&net_dev->channel_init_sema);
 		free(net_dev, M_NETVSC);
 	}
 
 	return (NULL);
 }
 
 /*
  * Net VSC on device remove
  */
 int
 hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel)
 {
 	hn_softc_t *sc = device_get_softc(device->device);
 	netvsc_dev *net_dev = sc->net_dev;;
 	
 	/* Stop outbound traffic ie sends and receives completions */
-	mtx_lock(&device->channel->inbound_lock);
 	net_dev->destroy = TRUE;
-	mtx_unlock(&device->channel->inbound_lock);
 
 	/* Wait for all send completions */
 	while (net_dev->num_outstanding_sends) {
 		DELAY(100);
 	}
 
 	hv_nv_disconnect_from_vsp(net_dev);
 
 	/* At this point, no one should be accessing net_dev except in here */
 
 	/* Now, we can close the channel safely */
 
 	if (!destroy_channel) {
 		device->channel->state =
 		    HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE;
 	}
 
 	hv_vmbus_channel_close(device->channel);
 
 	sema_destroy(&net_dev->channel_init_sema);
 	free(net_dev, M_NETVSC);
 
 	return (0);
 }
 
 /*
  * Net VSC on send completion
  */
 static void
 hv_nv_on_send_completion(netvsc_dev *net_dev,
     struct hv_device *device, hv_vm_packet_descriptor *pkt)
 {
 	nvsp_msg *nvsp_msg_pkt;
 	netvsc_packet *net_vsc_pkt;
 
 	nvsp_msg_pkt =
 	    (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));
 
 	if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_type_init_complete
 		|| nvsp_msg_pkt->hdr.msg_type
 			== nvsp_msg_1_type_send_rx_buf_complete
 		|| nvsp_msg_pkt->hdr.msg_type
 			== nvsp_msg_1_type_send_send_buf_complete) {
 		/* Copy the response back */
 		memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt,
 		    sizeof(nvsp_msg));
 		sema_post(&net_dev->channel_init_sema);
 	} else if (nvsp_msg_pkt->hdr.msg_type ==
 		    nvsp_msg_1_type_send_rndis_pkt_complete) {
 		/* Get the send context */
 		net_vsc_pkt =
 		    (netvsc_packet *)(unsigned long)pkt->transaction_id;
 		if (NULL != net_vsc_pkt) {
 			if (net_vsc_pkt->send_buf_section_idx !=
 			    NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
 				u_long mask;
 				int idx;
 
 				idx = net_vsc_pkt->send_buf_section_idx /
 				    BITS_PER_LONG;
 				KASSERT(idx < net_dev->bitsmap_words,
 				    ("invalid section index %u",
 				     net_vsc_pkt->send_buf_section_idx));
 				mask = 1UL <<
 				    (net_vsc_pkt->send_buf_section_idx %
 				     BITS_PER_LONG);
 
 				KASSERT(net_dev->send_section_bitsmap[idx] &
 				    mask,
 				    ("index bitmap 0x%lx, section index %u, "
 				     "bitmap idx %d, bitmask 0x%lx",
 				     net_dev->send_section_bitsmap[idx],
 				     net_vsc_pkt->send_buf_section_idx,
 				     idx, mask));
 				atomic_clear_long(
 				    &net_dev->send_section_bitsmap[idx], mask);
 			}
 			
 			/* Notify the layer above us */
 			net_vsc_pkt->compl.send.on_send_completion(
 			    net_vsc_pkt->compl.send.send_completion_context);
 
 		}
 
 		atomic_subtract_int(&net_dev->num_outstanding_sends, 1);
 	}
 }
 
 /*
  * Net VSC on send
  * Sends a packet on the specified Hyper-V device.
  * Returns 0 on success, non-zero on failure.
  */
 int
 hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt)
 {
 	netvsc_dev *net_dev;
 	nvsp_msg send_msg;
 	int ret;
 
 	net_dev = hv_nv_get_outbound_net_device(device);
 	if (!net_dev)
 		return (ENODEV);
 
 	send_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt;
 	if (pkt->is_data_pkt) {
 		/* 0 is RMC_DATA */
 		send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 0;
 	} else {
 		/* 1 is RMC_CONTROL */
 		send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 1;
 	}
 
 	send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_idx =
 	    pkt->send_buf_section_idx;
 	send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size =
 	    pkt->send_buf_section_size;
 
 	if (pkt->page_buf_count) {
 		ret = hv_vmbus_channel_send_packet_pagebuffer(device->channel,
 		    pkt->page_buffers, pkt->page_buf_count,
 		    &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt);
 	} else {
 		ret = hv_vmbus_channel_send_packet(device->channel,
 		    &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt,
 		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 		    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	}
 
 	/* Record outstanding send only if send_packet() succeeded */
 	if (ret == 0)
 		atomic_add_int(&net_dev->num_outstanding_sends, 1);
 
 	return (ret);
 }
 
 /*
  * Net VSC on receive
  *
  * In the FreeBSD Hyper-V virtual world, this function deals exclusively
  * with virtual addresses.
  */
 static void
 hv_nv_on_receive(netvsc_dev *net_dev, struct hv_device *device,
     hv_vm_packet_descriptor *pkt)
 {
 	hv_vm_transfer_page_packet_header *vm_xfer_page_pkt;
 	nvsp_msg *nvsp_msg_pkt;
 	netvsc_packet vsc_pkt;
 	netvsc_packet *net_vsc_pkt = &vsc_pkt;
 	device_t dev = device->device;
 	int count = 0;
 	int i = 0;
 	int status = nvsp_status_success;
 
 	/*
 	 * All inbound packets other than send completion should be
 	 * xfer page packet.
 	 */
 	if (pkt->type != HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES) {
 		device_printf(dev, "packet type %d is invalid!\n", pkt->type);
 		return;
 	}
 
 	nvsp_msg_pkt = (nvsp_msg *)((unsigned long)pkt
 		+ (pkt->data_offset8 << 3));
 
 	/* Make sure this is a valid nvsp packet */
 	if (nvsp_msg_pkt->hdr.msg_type != nvsp_msg_1_type_send_rndis_pkt) {
 		device_printf(dev, "packet hdr type %d is invalid!\n",
 		    pkt->type);
 		return;
 	}
 	
 	vm_xfer_page_pkt = (hv_vm_transfer_page_packet_header *)pkt;
 
 	if (vm_xfer_page_pkt->transfer_page_set_id !=
 	    NETVSC_RECEIVE_BUFFER_ID) {
 		device_printf(dev, "transfer_page_set_id %d is invalid!\n",
 		    vm_xfer_page_pkt->transfer_page_set_id);
 		return;
 	}
 
 	count = vm_xfer_page_pkt->range_count;
 	net_vsc_pkt->device = device;
 
 	/* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
 	for (i = 0; i < count; i++) {
 		net_vsc_pkt->status = nvsp_status_success;
 		net_vsc_pkt->data = (void *)((unsigned long)net_dev->rx_buf +
 		    vm_xfer_page_pkt->ranges[i].byte_offset);
 		net_vsc_pkt->tot_data_buf_len = 
 		    vm_xfer_page_pkt->ranges[i].byte_count;
 
 		hv_rf_on_receive(net_dev, device, net_vsc_pkt);
 		if (net_vsc_pkt->status != nvsp_status_success) {
 			status = nvsp_status_failure;
 		}
 	}
 	
 	/*
 	 * Moved completion call back here so that all received 
 	 * messages (not just data messages) will trigger a response
 	 * message back to the host.
 	 */
 	hv_nv_on_receive_completion(device, vm_xfer_page_pkt->d.transaction_id,
 	    status);
 	hv_rf_receive_rollup(net_dev);
 }
 
 /*
  * Net VSC on receive completion
  *
  * Send a receive completion packet to RNDIS device (ie NetVsp)
  */
 void
 hv_nv_on_receive_completion(struct hv_device *device, uint64_t tid,
     uint32_t status)
 {
 	nvsp_msg rx_comp_msg;
 	int retries = 0;
 	int ret = 0;
 	
 	rx_comp_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt_complete;
 
 	/* Pass in the status */
 	rx_comp_msg.msgs.vers_1_msgs.send_rndis_pkt_complete.status =
 	    status;
 
 retry_send_cmplt:
 	/* Send the completion */
 	ret = hv_vmbus_channel_send_packet(device->channel, &rx_comp_msg,
 	    sizeof(nvsp_msg), tid, HV_VMBUS_PACKET_TYPE_COMPLETION, 0);
 	if (ret == 0) {
 		/* success */
 		/* no-op */
 	} else if (ret == EAGAIN) {
 		/* no more room... wait a bit and attempt to retry 3 times */
 		retries++;
 
 		if (retries < 4) {
 			DELAY(100);
 			goto retry_send_cmplt;
 		}
 	}
 }
 
 /*
  * Net VSC on channel callback
  */
 static void
 hv_nv_on_channel_callback(void *context)
 {
 	struct hv_device *device = (struct hv_device *)context;
 	netvsc_dev *net_dev;
 	device_t dev = device->device;
 	uint32_t bytes_rxed;
 	uint64_t request_id;
  	hv_vm_packet_descriptor *desc;
 	uint8_t *buffer;
 	int bufferlen = NETVSC_PACKET_SIZE;
 	int ret = 0;
 
 	net_dev = hv_nv_get_inbound_net_device(device);
 	if (net_dev == NULL)
 		return;
 
 	buffer = net_dev->callback_buf;
 
 	do {
 		ret = hv_vmbus_channel_recv_packet_raw(device->channel,
 		    buffer, bufferlen, &bytes_rxed, &request_id);
 		if (ret == 0) {
 			if (bytes_rxed > 0) {
 				desc = (hv_vm_packet_descriptor *)buffer;
 				switch (desc->type) {
 				case HV_VMBUS_PACKET_TYPE_COMPLETION:
 					hv_nv_on_send_completion(net_dev, device, desc);
 					break;
 				case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
 					hv_nv_on_receive(net_dev, device, desc);
 					break;
 				default:
 					device_printf(dev,
 					    "hv_cb recv unknow type %d "
 					    " packet\n", desc->type);
 					break;
 				}
 			} else {
 				break;
 			}
 		} else if (ret == ENOBUFS) {
 			/* Handle large packet */
 			if (bufferlen > NETVSC_PACKET_SIZE) {
 				free(buffer, M_NETVSC);
 				buffer = NULL;
 			}
 
 			/* alloc new buffer */
 			buffer = malloc(bytes_rxed, M_NETVSC, M_NOWAIT);
 			if (buffer == NULL) {
 				device_printf(dev,
 				    "hv_cb malloc buffer failed, len=%u\n",
 				    bytes_rxed);
 				bufferlen = 0;
 				break;
 			}
 			bufferlen = bytes_rxed;
 		}
 	} while (1);
 
 	if (bufferlen > NETVSC_PACKET_SIZE)
 		free(buffer, M_NETVSC);
 
 	hv_rf_channel_rollup(net_dev);
 }
Index: head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
===================================================================
--- head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c	(revision 296082)
+++ head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c	(revision 296083)
@@ -1,2144 +1,2142 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /**
  * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
  * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
  * converted into VSCSI protocol messages which are delivered to the parent
  * partition StorVSP driver over the Hyper-V VMBUS.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/condvar.h>
 #include <sys/time.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/taskqueue.h>
 #include <sys/bus.h>
 #include <sys/mutex.h>
 #include <sys/callout.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/uma.h>
 #include <sys/lock.h>
 #include <sys/sema.h>
 #include <sys/sglist.h>
 #include <machine/bus.h>
 #include <sys/bus_dma.h>
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_periph.h>
 #include <cam/cam_sim.h>
 #include <cam/cam_xpt_sim.h>
 #include <cam/cam_xpt_internal.h>
 #include <cam/cam_debug.h>
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_message.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include "hv_vstorage.h"
 
 #define STORVSC_RINGBUFFER_SIZE		(20*PAGE_SIZE)
 #define STORVSC_MAX_LUNS_PER_TARGET	(64)
 #define STORVSC_MAX_IO_REQUESTS		(STORVSC_MAX_LUNS_PER_TARGET * 2)
 #define BLKVSC_MAX_IDE_DISKS_PER_TARGET	(1)
 #define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
 #define STORVSC_MAX_TARGETS		(2)
 
 #define STORVSC_WIN7_MAJOR 4
 #define STORVSC_WIN7_MINOR 2
 
 #define STORVSC_WIN8_MAJOR 5
 #define STORVSC_WIN8_MINOR 1
 
 #define VSTOR_PKT_SIZE	(sizeof(struct vstor_packet) - vmscsi_size_delta)
 
 #define HV_ALIGN(x, a) roundup2(x, a)
 
 struct storvsc_softc;
 
 struct hv_sgl_node {
 	LIST_ENTRY(hv_sgl_node) link;
 	struct sglist *sgl_data;
 };
 
 struct hv_sgl_page_pool{
 	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
 	LIST_HEAD(, hv_sgl_node) free_sgl_list;
 	boolean_t                is_init;
 } g_hv_sgl_page_pool;
 
 #define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * HV_MAX_MULTIPAGE_BUFFER_COUNT
 
 enum storvsc_request_type {
 	WRITE_TYPE,
 	READ_TYPE,
 	UNKNOWN_TYPE
 };
 
 struct hv_storvsc_request {
 	LIST_ENTRY(hv_storvsc_request) link;
 	struct vstor_packet	vstor_packet;
 	hv_vmbus_multipage_buffer data_buf;
 	void *sense_data;
 	uint8_t sense_info_len;
 	uint8_t retries;
 	union ccb *ccb;
 	struct storvsc_softc *softc;
 	struct callout callout;
 	struct sema synch_sema; /*Synchronize the request/response if needed */
 	struct sglist *bounce_sgl;
 	unsigned int bounce_sgl_count;
 	uint64_t not_aligned_seg_bits;
 };
 
 struct storvsc_softc {
 	struct hv_device		*hs_dev;
 	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
 	struct mtx			hs_lock;
 	struct storvsc_driver_props	*hs_drv_props;
 	int 				hs_unit;
 	uint32_t			hs_frozen;
 	struct cam_sim			*hs_sim;
 	struct cam_path 		*hs_path;
 	uint32_t			hs_num_out_reqs;
 	boolean_t			hs_destroy;
 	boolean_t			hs_drain_notify;
 	boolean_t			hs_open_multi_channel;
 	struct sema 			hs_drain_sema;	
 	struct hv_storvsc_request	hs_init_req;
 	struct hv_storvsc_request	hs_reset_req;
 };
 
 
 /**
  * HyperV storvsc timeout testing cases:
  * a. IO returned after first timeout;
  * b. IO returned after second timeout and queue freeze;
  * c. IO returned while timer handler is running
  * The first can be tested by "sg_senddiag -vv /dev/daX",
  * and the second and third can be done by
  * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
  */
 #define HVS_TIMEOUT_TEST 0
 
 /*
  * Bus/adapter reset functionality on the Hyper-V host is
  * buggy and it will be disabled until
  * it can be further tested.
  */
 #define HVS_HOST_RESET 0
 
 struct storvsc_driver_props {
 	char		*drv_name;
 	char		*drv_desc;
 	uint8_t		drv_max_luns_per_target;
 	uint8_t		drv_max_ios_per_target;
 	uint32_t	drv_ringbuffer_size;
 };
 
 enum hv_storage_type {
 	DRIVER_BLKVSC,
 	DRIVER_STORVSC,
 	DRIVER_UNKNOWN
 };
 
 #define HS_MAX_ADAPTERS 10
 
 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
 
 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
 static const hv_guid gStorVscDeviceType={
 	.data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
 		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
 };
 
 /* {32412632-86cb-44a2-9b5c-50d1417354f5} */
 static const hv_guid gBlkVscDeviceType={
 	.data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
 		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
 };
 
 static struct storvsc_driver_props g_drv_props_table[] = {
 	{"blkvsc", "Hyper-V IDE Storage Interface",
 	 BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
 	 STORVSC_RINGBUFFER_SIZE},
 	{"storvsc", "Hyper-V SCSI Storage Interface",
 	 STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
 	 STORVSC_RINGBUFFER_SIZE}
 };
 
 /*
  * Sense buffer size changed in win8; have a run-time
  * variable to track the size we should use.
  */
 static int sense_buffer_size;
 
 /*
  * The size of the vmscsi_request has changed in win8. The
  * additional size is for the newly added elements in the
  * structure. These elements are valid only when we are talking
  * to a win8 host.
  * Track the correct size we need to apply.
  */
 static int vmscsi_size_delta;
 
 static int storvsc_current_major;
 static int storvsc_current_minor;
 
 /* static functions */
 static int storvsc_probe(device_t dev);
 static int storvsc_attach(device_t dev);
 static int storvsc_detach(device_t dev);
 static void storvsc_poll(struct cam_sim * sim);
 static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
 static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
 static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
 static enum hv_storage_type storvsc_get_storage_type(device_t dev);
 static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
 static void hv_storvsc_on_channel_callback(void *context);
 static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
 					struct vstor_packet *vstor_packet,
 					struct hv_storvsc_request *request);
 static int hv_storvsc_connect_vsp(struct hv_device *device);
 static void storvsc_io_done(struct hv_storvsc_request *reqp);
 static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
 				bus_dma_segment_t *orig_sgl,
 				unsigned int orig_sgl_count,
 				uint64_t seg_bits);
 void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
 				unsigned int dest_sgl_count,
 				struct sglist* src_sgl,
 				uint64_t seg_bits);
 
 static device_method_t storvsc_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		storvsc_probe),
 	DEVMETHOD(device_attach,	storvsc_attach),
 	DEVMETHOD(device_detach,	storvsc_detach),
 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
 	DEVMETHOD_END
 };
 
 static driver_t storvsc_driver = {
 	"storvsc", storvsc_methods, sizeof(struct storvsc_softc),
 };
 
 static devclass_t storvsc_devclass;
 DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0);
 MODULE_VERSION(storvsc, 1);
 MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
 
 
 /**
  * The host is capable of sending messages to us that are
  * completely unsolicited. So, we need to address the race
  * condition where we may be in the process of unloading the
  * driver when the host may send us an unsolicited message.
  * We address this issue by implementing a sequentially
  * consistent protocol:
  *
  * 1. Channel callback is invoked while holding the the channel lock
  *    and an unloading driver will reset the channel callback under
  *    the protection of this channel lock.
  *
  * 2. To ensure bounded wait time for unloading a driver, we don't
  *    permit outgoing traffic once the device is marked as being
  *    destroyed.
  *
  * 3. Once the device is marked as being destroyed, we only
  *    permit incoming traffic to properly account for
  *    packets already sent out.
  */
 static inline struct storvsc_softc *
 get_stor_device(struct hv_device *device,
 				boolean_t outbound)
 {
 	struct storvsc_softc *sc;
 
 	sc = device_get_softc(device->device);
 	if (sc == NULL) {
 		return NULL;
 	}
 
 	if (outbound) {
 		/*
 		 * Here we permit outgoing I/O only
 		 * if the device is not being destroyed.
 		 */
 
 		if (sc->hs_destroy) {
 			sc = NULL;
 		}
 	} else {
 		/*
 		 * inbound case; if being destroyed
 		 * only permit to account for
 		 * messages already sent out.
 		 */
 		if (sc->hs_destroy && (sc->hs_num_out_reqs == 0)) {
 			sc = NULL;
 		}
 	}
 	return sc;
 }
 
 /**
  * @brief Callback handler, will be invoked when receive mutil-channel offer
  *
  * @param context  new multi-channel
  */
 static void
 storvsc_handle_sc_creation(void *context)
 {
 	hv_vmbus_channel *new_channel;
 	struct hv_device *device;
 	struct storvsc_softc *sc;
 	struct vmstor_chan_props props;
 	int ret = 0;
 
 	new_channel = (hv_vmbus_channel *)context;
 	device = new_channel->primary_channel->device;
 	sc = get_stor_device(device, TRUE);
 	if (sc == NULL)
 		return;
 
 	if (FALSE == sc->hs_open_multi_channel)
 		return;
 	
 	memset(&props, 0, sizeof(props));
 
 	ret = hv_vmbus_channel_open(new_channel,
 	    sc->hs_drv_props->drv_ringbuffer_size,
   	    sc->hs_drv_props->drv_ringbuffer_size,
 	    (void *)&props,
 	    sizeof(struct vmstor_chan_props),
 	    hv_storvsc_on_channel_callback,
 	    new_channel);
 
 	return;
 }
 
 /**
  * @brief Send multi-channel creation request to host
  *
  * @param device  a Hyper-V device pointer
  * @param max_chans  the max channels supported by vmbus
  */
 static void
 storvsc_send_multichannel_request(struct hv_device *dev, int max_chans)
 {
 	struct storvsc_softc *sc;
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;	
 	int request_channels_cnt = 0;
 	int ret;
 
 	/* get multichannels count that need to create */
 	request_channels_cnt = MIN(max_chans, mp_ncpus);
 
 	sc = get_stor_device(dev, TRUE);
 	if (sc == NULL) {
 		printf("Storvsc_error: get sc failed while send mutilchannel "
 		    "request\n");
 		return;
 	}
 
 	request = &sc->hs_init_req;
 
 	/* Establish a handler for multi-channel */
 	dev->channel->sc_creation_callback = storvsc_handle_sc_creation;
 
 	/* request the host to create multi-channel */
 	memset(request, 0, sizeof(struct hv_storvsc_request));
 	
 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
 
 	vstor_packet = &request->vstor_packet;
 	
 	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 	vstor_packet->u.multi_channels_cnt = request_channels_cnt;
 
 	ret = hv_vmbus_channel_send_packet(
 	    dev->channel,
 	    vstor_packet,
 	    VSTOR_PKT_SIZE,
 	    (uint64_t)(uintptr_t)request,
 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	/* wait for 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 	if (ret != 0) {		
 		printf("Storvsc_error: create multi-channel timeout, %d\n",
 		    ret);
 		return;
 	}
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 	    vstor_packet->status != 0) {		
 		printf("Storvsc_error: create multi-channel invalid operation "
 		    "(%d) or statue (%u)\n",
 		    vstor_packet->operation, vstor_packet->status);
 		return;
 	}
 
 	sc->hs_open_multi_channel = TRUE;
 
 	if (bootverbose)
 		printf("Storvsc create multi-channel success!\n");
 }
 
 /**
  * @brief initialize channel connection to parent partition
  *
  * @param dev  a Hyper-V device pointer
  * @returns  0 on success, non-zero error on failure
  */
 static int
 hv_storvsc_channel_init(struct hv_device *dev)
 {
 	int ret = 0;
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 	struct storvsc_softc *sc;
 	uint16_t max_chans = 0;
 	boolean_t support_multichannel = FALSE;
 
 	max_chans = 0;
 	support_multichannel = FALSE;
 
 	sc = get_stor_device(dev, TRUE);
 	if (sc == NULL)
 		return (ENODEV);
 
 	request = &sc->hs_init_req;
 	memset(request, 0, sizeof(struct hv_storvsc_request));
 	vstor_packet = &request->vstor_packet;
 	request->softc = sc;
 
 	/**
 	 * Initiate the vsc/vsp initialization protocol on the open channel
 	 */
 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
 
 	vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 
 	ret = hv_vmbus_channel_send_packet(
 			dev->channel,
 			vstor_packet,
 			VSTOR_PKT_SIZE,
 			(uint64_t)(uintptr_t)request,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	if (ret != 0)
 		goto cleanup;
 
 	/* wait 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 	if (ret != 0)
 		goto cleanup;
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 		vstor_packet->status != 0) {
 		goto cleanup;
 	}
 
 	/* reuse the packet for version range supported */
 
 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	vstor_packet->u.version.major_minor =
 	    VMSTOR_PROTOCOL_VERSION(storvsc_current_major, storvsc_current_minor);
 
 	/* revision is only significant for Windows guests */
 	vstor_packet->u.version.revision = 0;
 
 	ret = hv_vmbus_channel_send_packet(
 			dev->channel,
 			vstor_packet,
 			VSTOR_PKT_SIZE,
 			(uint64_t)(uintptr_t)request,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	if (ret != 0)
 		goto cleanup;
 
 	/* wait 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
 	if (ret)
 		goto cleanup;
 
 	/* TODO: Check returned version */
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 		vstor_packet->status != 0)
 		goto cleanup;
 
 	/**
 	 * Query channel properties
 	 */
 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	ret = hv_vmbus_channel_send_packet(
 				dev->channel,
 				vstor_packet,
 				VSTOR_PKT_SIZE,
 				(uint64_t)(uintptr_t)request,
 				HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 				HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	if ( ret != 0)
 		goto cleanup;
 
 	/* wait 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
 	if (ret != 0)
 		goto cleanup;
 
 	/* TODO: Check returned version */
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 	    vstor_packet->status != 0) {
 		goto cleanup;
 	}
 
 	/* multi-channels feature is supported by WIN8 and above version */
 	max_chans = vstor_packet->u.chan_props.max_channel_cnt;
 	if ((hv_vmbus_protocal_version != HV_VMBUS_VERSION_WIN7) &&
 	    (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) &&
 	    (vstor_packet->u.chan_props.flags &
 	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
 		support_multichannel = TRUE;
 	}
 
 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
 	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	ret = hv_vmbus_channel_send_packet(
 			dev->channel,
 			vstor_packet,
 			VSTOR_PKT_SIZE,
 			(uint64_t)(uintptr_t)request,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	/* wait 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
 	if (ret != 0)
 		goto cleanup;
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 	    vstor_packet->status != 0)
 		goto cleanup;
 
 	/*
 	 * If multi-channel is supported, send multichannel create
 	 * request to host.
 	 */
 	if (support_multichannel)
 		storvsc_send_multichannel_request(dev, max_chans);
 
 cleanup:
 	sema_destroy(&request->synch_sema);
 	return (ret);
 }
 
 /**
  * @brief Open channel connection to paraent partition StorVSP driver
  *
  * Open and initialize channel connection to parent partition StorVSP driver.
  *
  * @param pointer to a Hyper-V device
  * @returns 0 on success, non-zero error on failure
  */
 static int
 hv_storvsc_connect_vsp(struct hv_device *dev)
 {	
 	int ret = 0;
 	struct vmstor_chan_props props;
 	struct storvsc_softc *sc;
 
 	sc = device_get_softc(dev->device);
 		
 	memset(&props, 0, sizeof(struct vmstor_chan_props));
 
 	/*
 	 * Open the channel
 	 */
 
 	ret = hv_vmbus_channel_open(
 		dev->channel,
 		sc->hs_drv_props->drv_ringbuffer_size,
 		sc->hs_drv_props->drv_ringbuffer_size,
 		(void *)&props,
 		sizeof(struct vmstor_chan_props),
 		hv_storvsc_on_channel_callback,
 		dev->channel);
 
 	if (ret != 0) {
 		return ret;
 	}
 
 	ret = hv_storvsc_channel_init(dev);
 
 	return (ret);
 }
 
 #if HVS_HOST_RESET
 static int
 hv_storvsc_host_reset(struct hv_device *dev)
 {
 	int ret = 0;
 	struct storvsc_softc *sc;
 
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 
 	sc = get_stor_device(dev, TRUE);
 	if (sc == NULL) {
 		return ENODEV;
 	}
 
 	request = &sc->hs_reset_req;
 	request->softc = sc;
 	vstor_packet = &request->vstor_packet;
 
 	sema_init(&request->synch_sema, 0, "stor synch sema");
 
 	vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	ret = hv_vmbus_channel_send_packet(dev->channel,
 			vstor_packet,
 			VSTOR_PKT_SIZE,
 			(uint64_t)(uintptr_t)&sc->hs_reset_req,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */
 
 	if (ret) {
 		goto cleanup;
 	}
 
 
 	/*
 	 * At this point, all outstanding requests in the adapter
 	 * should have been flushed out and return to us
 	 */
 
 cleanup:
 	sema_destroy(&request->synch_sema);
 	return (ret);
 }
 #endif /* HVS_HOST_RESET */
 
 /**
  * @brief Function to initiate an I/O request
  *
  * @param device Hyper-V device pointer
  * @param request pointer to a request structure
  * @returns 0 on success, non-zero error on failure
  */
 static int
 hv_storvsc_io_request(struct hv_device *device,
 					  struct hv_storvsc_request *request)
 {
 	struct storvsc_softc *sc;
 	struct vstor_packet *vstor_packet = &request->vstor_packet;
 	struct hv_vmbus_channel* outgoing_channel = NULL;
 	int ret = 0;
 
 	sc = get_stor_device(device, TRUE);
 
 	if (sc == NULL) {
 		return ENODEV;
 	}
 
 	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
 
 	vstor_packet->u.vm_srb.length = VSTOR_PKT_SIZE;
 	
 	vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
 
 	vstor_packet->u.vm_srb.transfer_len = request->data_buf.length;
 
 	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
 
 	outgoing_channel = vmbus_select_outgoing_channel(device->channel);
 
 	mtx_unlock(&request->softc->hs_lock);
 	if (request->data_buf.length) {
 		ret = hv_vmbus_channel_send_packet_multipagebuffer(
 				outgoing_channel,
 				&request->data_buf,
 				vstor_packet,
 				VSTOR_PKT_SIZE,
 				(uint64_t)(uintptr_t)request);
 
 	} else {
 		ret = hv_vmbus_channel_send_packet(
 			outgoing_channel,
 			vstor_packet,
 			VSTOR_PKT_SIZE,
 			(uint64_t)(uintptr_t)request,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	}
 	mtx_lock(&request->softc->hs_lock);
 
 	if (ret != 0) {
 		printf("Unable to send packet %p ret %d", vstor_packet, ret);
 	} else {
 		atomic_add_int(&sc->hs_num_out_reqs, 1);
 	}
 
 	return (ret);
 }
 
 
 /**
  * Process IO_COMPLETION_OPERATION and ready
  * the result to be completed for upper layer
  * processing by the CAM layer.
  */
 static void
 hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
 			   struct vstor_packet *vstor_packet,
 			   struct hv_storvsc_request *request)
 {
 	struct vmscsi_req *vm_srb;
 
 	vm_srb = &vstor_packet->u.vm_srb;
 
 	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
 			(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
 		/* Autosense data available */
 
 		KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
 				("vm_srb->sense_info_len <= "
 				 "request->sense_info_len"));
 
 		memcpy(request->sense_data, vm_srb->u.sense_data,
 			vm_srb->sense_info_len);
 
 		request->sense_info_len = vm_srb->sense_info_len;
 	}
 
 	/* Complete request by passing to the CAM layer */
 	storvsc_io_done(request);
 	atomic_subtract_int(&sc->hs_num_out_reqs, 1);
 	if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
 		sema_post(&sc->hs_drain_sema);
 	}
 }
 
 static void
 hv_storvsc_rescan_target(struct storvsc_softc *sc)
 {
 	path_id_t pathid;
 	target_id_t targetid;
 	union ccb *ccb;
 
 	pathid = cam_sim_path(sc->hs_sim);
 	targetid = CAM_TARGET_WILDCARD;
 
 	/*
 	 * Allocate a CCB and schedule a rescan.
 	 */
 	ccb = xpt_alloc_ccb_nowait();
 	if (ccb == NULL) {
 		printf("unable to alloc CCB for rescan\n");
 		return;
 	}
 
 	if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
 	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		printf("unable to create path for rescan, pathid: %u,"
 		    "targetid: %u\n", pathid, targetid);
 		xpt_free_ccb(ccb);
 		return;
 	}
 
 	if (targetid == CAM_TARGET_WILDCARD)
 		ccb->ccb_h.func_code = XPT_SCAN_BUS;
 	else
 		ccb->ccb_h.func_code = XPT_SCAN_TGT;
 
 	xpt_rescan(ccb);
 }
 
 static void
 hv_storvsc_on_channel_callback(void *context)
 {
 	int ret = 0;
 	hv_vmbus_channel *channel = (hv_vmbus_channel *)context;
 	struct hv_device *device = NULL;
 	struct storvsc_softc *sc;
 	uint32_t bytes_recvd;
 	uint64_t request_id;
 	uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 
 	if (channel->primary_channel != NULL){
 		device = channel->primary_channel->device;
 	} else {
 		device = channel->device;
 	}
 
 	KASSERT(device, ("device is NULL"));
 
 	sc = get_stor_device(device, FALSE);
 	if (sc == NULL) {
 		printf("Storvsc_error: get stor device failed.\n");
 		return;
 	}
 
 	ret = hv_vmbus_channel_recv_packet(
 			channel,
 			packet,
 			roundup2(VSTOR_PKT_SIZE, 8),
 			&bytes_recvd,
 			&request_id);
 
 	while ((ret == 0) && (bytes_recvd > 0)) {
 		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
 
 		if ((request == &sc->hs_init_req) ||
 			(request == &sc->hs_reset_req)) {
 			memcpy(&request->vstor_packet, packet,
 				   sizeof(struct vstor_packet));
 			sema_post(&request->synch_sema);
 		} else {
 			vstor_packet = (struct vstor_packet *)packet;
 			switch(vstor_packet->operation) {
 			case VSTOR_OPERATION_COMPLETEIO:
 				if (request == NULL)
 					panic("VMBUS: storvsc received a "
 					    "packet with NULL request id in "
 					    "COMPLETEIO operation.");
 
 				hv_storvsc_on_iocompletion(sc,
 							vstor_packet, request);
 				break;
 			case VSTOR_OPERATION_REMOVEDEVICE:
 				printf("VMBUS: storvsc operation %d not "
 				    "implemented.\n", vstor_packet->operation);
 				/* TODO: implement */
 				break;
 			case VSTOR_OPERATION_ENUMERATE_BUS:
 				hv_storvsc_rescan_target(sc);
 				break;
 			default:
 				break;
 			}			
 		}
 		ret = hv_vmbus_channel_recv_packet(
 				channel,
 				packet,
 				roundup2(VSTOR_PKT_SIZE, 8),
 				&bytes_recvd,
 				&request_id);
 	}
 }
 
 /**
  * @brief StorVSC probe function
  *
  * Device probe function.  Returns 0 if the input device is a StorVSC
  * device.  Otherwise, a ENXIO is returned.  If the input device is
  * for BlkVSC (paravirtual IDE) device and this support is disabled in
  * favor of the emulated ATA/IDE device, return ENXIO.
  *
  * @param a device
  * @returns 0 on success, ENXIO if not a matcing StorVSC device
  */
 static int
 storvsc_probe(device_t dev)
 {
 	int ata_disk_enable = 0;
 	int ret	= ENXIO;
 	
 	if (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008 ||
 	    hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) {
 		sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
 		vmscsi_size_delta = sizeof(struct vmscsi_win8_extension);
 		storvsc_current_major = STORVSC_WIN7_MAJOR;
 		storvsc_current_minor = STORVSC_WIN7_MINOR;
 	} else {
 		sense_buffer_size = POST_WIN7_STORVSC_SENSE_BUFFER_SIZE;
 		vmscsi_size_delta = 0;
 		storvsc_current_major = STORVSC_WIN8_MAJOR;
 		storvsc_current_minor = STORVSC_WIN8_MINOR;
 	}
 	
 	switch (storvsc_get_storage_type(dev)) {
 	case DRIVER_BLKVSC:
 		if(bootverbose)
 			device_printf(dev, "DRIVER_BLKVSC-Emulated ATA/IDE probe\n");
 		if (!getenv_int("hw.ata.disk_enable", &ata_disk_enable)) {
 			if(bootverbose)
 				device_printf(dev,
 					"Enlightened ATA/IDE detected\n");
 			ret = BUS_PROBE_DEFAULT;
 		} else if(bootverbose)
 			device_printf(dev, "Emulated ATA/IDE set (hw.ata.disk_enable set)\n");
 		break;
 	case DRIVER_STORVSC:
 		if(bootverbose)
 			device_printf(dev, "Enlightened SCSI device detected\n");
 		ret = BUS_PROBE_DEFAULT;
 		break;
 	default:
 		ret = ENXIO;
 	}
 	return (ret);
 }
 
 /**
  * @brief StorVSC attach function
  *
  * Function responsible for allocating per-device structures,
  * setting up CAM interfaces and scanning for available LUNs to
  * be used for SCSI device peripherals.
  *
  * @param a device
  * @returns 0 on success or an error on failure
  */
 static int
 storvsc_attach(device_t dev)
 {
 	struct hv_device *hv_dev = vmbus_get_devctx(dev);
 	enum hv_storage_type stor_type;
 	struct storvsc_softc *sc;
 	struct cam_devq *devq;
 	int ret, i, j;
 	struct hv_storvsc_request *reqp;
 	struct root_hold_token *root_mount_token = NULL;
 	struct hv_sgl_node *sgl_node = NULL;
 	void *tmp_buff = NULL;
 
 	/*
 	 * We need to serialize storvsc attach calls.
 	 */
 	root_mount_token = root_mount_hold("storvsc");
 
 	sc = device_get_softc(dev);
 	if (sc == NULL) {
 		ret = ENOMEM;
 		goto cleanup;
 	}
 
 	stor_type = storvsc_get_storage_type(dev);
 
 	if (stor_type == DRIVER_UNKNOWN) {
 		ret = ENODEV;
 		goto cleanup;
 	}
 
 	bzero(sc, sizeof(struct storvsc_softc));
 
 	/* fill in driver specific properties */
 	sc->hs_drv_props = &g_drv_props_table[stor_type];
 
 	/* fill in device specific properties */
 	sc->hs_unit	= device_get_unit(dev);
 	sc->hs_dev	= hv_dev;
 	device_set_desc(dev, g_drv_props_table[stor_type].drv_desc);
 
 	LIST_INIT(&sc->hs_free_list);
 	mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
 
 	for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
 		reqp = malloc(sizeof(struct hv_storvsc_request),
 				 M_DEVBUF, M_WAITOK|M_ZERO);
 		reqp->softc = sc;
 
 		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
 	}
 
 	/* create sg-list page pool */
 	if (FALSE == g_hv_sgl_page_pool.is_init) {
 		g_hv_sgl_page_pool.is_init = TRUE;
 		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
 		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
 
 		/*
 		 * Pre-create SG list, each SG list with
 		 * HV_MAX_MULTIPAGE_BUFFER_COUNT segments, each
 		 * segment has one page buffer
 		 */
 		for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) {
 	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
 			    M_DEVBUF, M_WAITOK|M_ZERO);
 
 			sgl_node->sgl_data =
 			    sglist_alloc(HV_MAX_MULTIPAGE_BUFFER_COUNT,
 			    M_WAITOK|M_ZERO);
 
 			for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
 				tmp_buff = malloc(PAGE_SIZE,
 				    M_DEVBUF, M_WAITOK|M_ZERO);
 
 				sgl_node->sgl_data->sg_segs[j].ss_paddr =
 				    (vm_paddr_t)tmp_buff;
 			}
 
 			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
 			    sgl_node, link);
 		}
 	}
 
 	sc->hs_destroy = FALSE;
 	sc->hs_drain_notify = FALSE;
 	sc->hs_open_multi_channel = FALSE;
 	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
 
 	ret = hv_storvsc_connect_vsp(hv_dev);
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	/*
 	 * Create the device queue.
 	 * Hyper-V maps each target to one SCSI HBA
 	 */
 	devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
 	if (devq == NULL) {
 		device_printf(dev, "Failed to alloc device queue\n");
 		ret = ENOMEM;
 		goto cleanup;
 	}
 
 	sc->hs_sim = cam_sim_alloc(storvsc_action,
 				storvsc_poll,
 				sc->hs_drv_props->drv_name,
 				sc,
 				sc->hs_unit,
 				&sc->hs_lock, 1,
 				sc->hs_drv_props->drv_max_ios_per_target,
 				devq);
 
 	if (sc->hs_sim == NULL) {
 		device_printf(dev, "Failed to alloc sim\n");
 		cam_simq_free(devq);
 		ret = ENOMEM;
 		goto cleanup;
 	}
 
 	mtx_lock(&sc->hs_lock);
 	/* bus_id is set to 0, need to get it from VMBUS channel query? */
 	if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
 		mtx_unlock(&sc->hs_lock);
 		device_printf(dev, "Unable to register SCSI bus\n");
 		ret = ENXIO;
 		goto cleanup;
 	}
 
 	if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
 		 cam_sim_path(sc->hs_sim),
 		CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		xpt_bus_deregister(cam_sim_path(sc->hs_sim));
 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
 		mtx_unlock(&sc->hs_lock);
 		device_printf(dev, "Unable to create path\n");
 		ret = ENXIO;
 		goto cleanup;
 	}
 
 	mtx_unlock(&sc->hs_lock);
 
 	root_mount_rel(root_mount_token);
 	return (0);
 
 
 cleanup:
 	root_mount_rel(root_mount_token);
 	while (!LIST_EMPTY(&sc->hs_free_list)) {
 		reqp = LIST_FIRST(&sc->hs_free_list);
 		LIST_REMOVE(reqp, link);
 		free(reqp, M_DEVBUF);
 	}
 
 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
 		LIST_REMOVE(sgl_node, link);
 		for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
 			if (NULL !=
 			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
 				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
 			}
 		}
 		sglist_free(sgl_node->sgl_data);
 		free(sgl_node, M_DEVBUF);
 	}
 
 	return (ret);
 }
 
 /**
  * @brief StorVSC device detach function
  *
  * This function is responsible for safely detaching a
  * StorVSC device.  This includes waiting for inbound responses
  * to complete and freeing associated per-device structures.
  *
  * @param dev a device
  * returns 0 on success
  */
 static int
 storvsc_detach(device_t dev)
 {
 	struct storvsc_softc *sc = device_get_softc(dev);
 	struct hv_storvsc_request *reqp = NULL;
 	struct hv_device *hv_device = vmbus_get_devctx(dev);
 	struct hv_sgl_node *sgl_node = NULL;
 	int j = 0;
 
-	mtx_lock(&hv_device->channel->inbound_lock);
 	sc->hs_destroy = TRUE;
-	mtx_unlock(&hv_device->channel->inbound_lock);
 
 	/*
 	 * At this point, all outbound traffic should be disabled. We
 	 * only allow inbound traffic (responses) to proceed so that
 	 * outstanding requests can be completed.
 	 */
 
 	sc->hs_drain_notify = TRUE;
 	sema_wait(&sc->hs_drain_sema);
 	sc->hs_drain_notify = FALSE;
 
 	/*
 	 * Since we have already drained, we don't need to busy wait.
 	 * The call to close the channel will reset the callback
 	 * under the protection of the incoming channel lock.
 	 */
 
 	hv_vmbus_channel_close(hv_device->channel);
 
 	mtx_lock(&sc->hs_lock);
 	while (!LIST_EMPTY(&sc->hs_free_list)) {
 		reqp = LIST_FIRST(&sc->hs_free_list);
 		LIST_REMOVE(reqp, link);
 
 		free(reqp, M_DEVBUF);
 	}
 	mtx_unlock(&sc->hs_lock);
 
 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
 		LIST_REMOVE(sgl_node, link);
 		for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++){
 			if (NULL !=
 			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
 				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
 			}
 		}
 		sglist_free(sgl_node->sgl_data);
 		free(sgl_node, M_DEVBUF);
 	}
 	
 	return (0);
 }
 
 #if HVS_TIMEOUT_TEST
 /**
  * @brief unit test for timed out operations
  *
  * This function provides unit testing capability to simulate
  * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
  * is required.
  *
  * @param reqp pointer to a request structure
  * @param opcode SCSI operation being performed
  * @param wait if 1, wait for I/O to complete
  */
 static void
 storvsc_timeout_test(struct hv_storvsc_request *reqp,
 		uint8_t opcode, int wait)
 {
 	int ret;
 	union ccb *ccb = reqp->ccb;
 	struct storvsc_softc *sc = reqp->softc;
 
 	if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
 		return;
 	}
 
 	if (wait) {
 		mtx_lock(&reqp->event.mtx);
 	}
 	ret = hv_storvsc_io_request(sc->hs_dev, reqp);
 	if (ret != 0) {
 		if (wait) {
 			mtx_unlock(&reqp->event.mtx);
 		}
 		printf("%s: io_request failed with %d.\n",
 				__func__, ret);
 		ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 		mtx_lock(&sc->hs_lock);
 		storvsc_free_request(sc, reqp);
 		xpt_done(ccb);
 		mtx_unlock(&sc->hs_lock);
 		return;
 	}
 
 	if (wait) {
 		xpt_print(ccb->ccb_h.path,
 				"%u: %s: waiting for IO return.\n",
 				ticks, __func__);
 		ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
 		mtx_unlock(&reqp->event.mtx);
 		xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
 				ticks, __func__, (ret == 0)?
 				"IO return detected" :
 				"IO return not detected");
 		/*
 		 * Now both the timer handler and io done are running
 		 * simultaneously. We want to confirm the io done always
 		 * finishes after the timer handler exits. So reqp used by
 		 * timer handler is not freed or stale. Do busy loop for
 		 * another 1/10 second to make sure io done does
 		 * wait for the timer handler to complete.
 		 */
 		DELAY(100*1000);
 		mtx_lock(&sc->hs_lock);
 		xpt_print(ccb->ccb_h.path,
 				"%u: %s: finishing, queue frozen %d, "
 				"ccb status 0x%x scsi_status 0x%x.\n",
 				ticks, __func__, sc->hs_frozen,
 				ccb->ccb_h.status,
 				ccb->csio.scsi_status);
 		mtx_unlock(&sc->hs_lock);
 	}
 }
 #endif /* HVS_TIMEOUT_TEST */
 
 /**
  * @brief timeout handler for requests
  *
  * This function is called as a result of a callout expiring.
  *
  * @param arg pointer to a request
  */
 static void
 storvsc_timeout(void *arg)
 {
 	struct hv_storvsc_request *reqp = arg;
 	struct storvsc_softc *sc = reqp->softc;
 	union ccb *ccb = reqp->ccb;
 
 	if (reqp->retries == 0) {
 		mtx_lock(&sc->hs_lock);
 		xpt_print(ccb->ccb_h.path,
 		    "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
 		    ticks, reqp, ccb->ccb_h.timeout / 1000);
 		cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
 		mtx_unlock(&sc->hs_lock);
 
 		reqp->retries++;
 		callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
 		    0, storvsc_timeout, reqp, 0);
 #if HVS_TIMEOUT_TEST
 		storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
 #endif
 		return;
 	}
 
 	mtx_lock(&sc->hs_lock);
 	xpt_print(ccb->ccb_h.path,
 		"%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
 		ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
 		(sc->hs_frozen == 0)?
 		"freezing the queue" : "the queue is already frozen");
 	if (sc->hs_frozen == 0) {
 		sc->hs_frozen = 1;
 		xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
 	}
 	mtx_unlock(&sc->hs_lock);
 	
 #if HVS_TIMEOUT_TEST
 	storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
 #endif
 }
 
 /**
  * @brief StorVSC device poll function
  *
  * This function is responsible for servicing requests when
  * interrupts are disabled (i.e when we are dumping core.)
  *
  * @param sim a pointer to a CAM SCSI interface module
  */
 static void
 storvsc_poll(struct cam_sim *sim)
 {
 	struct storvsc_softc *sc = cam_sim_softc(sim);
 
 	mtx_assert(&sc->hs_lock, MA_OWNED);
 	mtx_unlock(&sc->hs_lock);
 	hv_storvsc_on_channel_callback(sc->hs_dev->channel);
 	mtx_lock(&sc->hs_lock);
 }
 
 /**
  * @brief StorVSC device action function
  *
  * This function is responsible for handling SCSI operations which
  * are passed from the CAM layer.  The requests are in the form of
  * CAM control blocks which indicate the action being performed.
  * Not all actions require converting the request to a VSCSI protocol
  * message - these actions can be responded to by this driver.
  * Requests which are destined for a backend storage device are converted
  * to a VSCSI protocol message and sent on the channel connection associated
  * with this device.
  *
  * @param sim pointer to a CAM SCSI interface module
  * @param ccb pointer to a CAM control block
  */
 static void
 storvsc_action(struct cam_sim *sim, union ccb *ccb)
 {
 	struct storvsc_softc *sc = cam_sim_softc(sim);
 	int res;
 
 	mtx_assert(&sc->hs_lock, MA_OWNED);
 	switch (ccb->ccb_h.func_code) {
 	case XPT_PATH_INQ: {
 		struct ccb_pathinq *cpi = &ccb->cpi;
 
 		cpi->version_num = 1;
 		cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
 		cpi->target_sprt = 0;
 		cpi->hba_misc = PIM_NOBUSRESET;
 		cpi->hba_eng_cnt = 0;
 		cpi->max_target = STORVSC_MAX_TARGETS;
 		cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
 		cpi->initiator_id = cpi->max_target;
 		cpi->bus_id = cam_sim_bus(sim);
 		cpi->base_transfer_speed = 300000;
 		cpi->transport = XPORT_SAS;
 		cpi->transport_version = 0;
 		cpi->protocol = PROTO_SCSI;
 		cpi->protocol_version = SCSI_REV_SPC2;
 		strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
 		strncpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
 		strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
 		cpi->unit_number = cam_sim_unit(sim);
 
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 	}
 	case XPT_GET_TRAN_SETTINGS: {
 		struct  ccb_trans_settings *cts = &ccb->cts;
 
 		cts->transport = XPORT_SAS;
 		cts->transport_version = 0;
 		cts->protocol = PROTO_SCSI;
 		cts->protocol_version = SCSI_REV_SPC2;
 
 		/* enable tag queuing and disconnected mode */
 		cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
 		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
 		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
 		cts->xport_specific.valid = CTS_SPI_VALID_DISC;
 		cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
 			
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 	}
 	case XPT_SET_TRAN_SETTINGS:	{
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 	}
 	case XPT_CALC_GEOMETRY:{
 		cam_calc_geometry(&ccb->ccg, 1);
 		xpt_done(ccb);
 		return;
 	}
 	case  XPT_RESET_BUS:
 	case  XPT_RESET_DEV:{
 #if HVS_HOST_RESET
 		if ((res = hv_storvsc_host_reset(sc->hs_dev)) != 0) {
 			xpt_print(ccb->ccb_h.path,
 				"hv_storvsc_host_reset failed with %d\n", res);
 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 			xpt_done(ccb);
 			return;
 		}
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 #else
 		xpt_print(ccb->ccb_h.path,
 				  "%s reset not supported.\n",
 				  (ccb->ccb_h.func_code == XPT_RESET_BUS)?
 				  "bus" : "dev");
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		xpt_done(ccb);
 		return;
 #endif	/* HVS_HOST_RESET */
 	}
 	case XPT_SCSI_IO:
 	case XPT_IMMED_NOTIFY: {
 		struct hv_storvsc_request *reqp = NULL;
 
 		if (ccb->csio.cdb_len == 0) {
 			panic("cdl_len is 0\n");
 		}
 
 		if (LIST_EMPTY(&sc->hs_free_list)) {
 			ccb->ccb_h.status = CAM_REQUEUE_REQ;
 			if (sc->hs_frozen == 0) {
 				sc->hs_frozen = 1;
 				xpt_freeze_simq(sim, /* count*/1);
 			}
 			xpt_done(ccb);
 			return;
 		}
 
 		reqp = LIST_FIRST(&sc->hs_free_list);
 		LIST_REMOVE(reqp, link);
 
 		bzero(reqp, sizeof(struct hv_storvsc_request));
 		reqp->softc = sc;
 		
 		ccb->ccb_h.status |= CAM_SIM_QUEUED;
 		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
 			ccb->ccb_h.status = CAM_REQ_INVALID;
 			xpt_done(ccb);
 			return;
 		}
 
 		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
 			callout_init(&reqp->callout, 1);
 			callout_reset_sbt(&reqp->callout,
 			    SBT_1MS * ccb->ccb_h.timeout, 0,
 			    storvsc_timeout, reqp, 0);
 #if HVS_TIMEOUT_TEST
 			cv_init(&reqp->event.cv, "storvsc timeout cv");
 			mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
 					NULL, MTX_DEF);
 			switch (reqp->vstor_packet.vm_srb.cdb[0]) {
 				case MODE_SELECT_10:
 				case SEND_DIAGNOSTIC:
 					/* To have timer send the request. */
 					return;
 				default:
 					break;
 			}
 #endif /* HVS_TIMEOUT_TEST */
 		}
 
 		if ((res = hv_storvsc_io_request(sc->hs_dev, reqp)) != 0) {
 			xpt_print(ccb->ccb_h.path,
 				"hv_storvsc_io_request failed with %d\n", res);
 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 			storvsc_free_request(sc, reqp);
 			xpt_done(ccb);
 			return;
 		}
 		return;
 	}
 
 	default:
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		xpt_done(ccb);
 		return;
 	}
 }
 
 /**
  * @brief destroy bounce buffer
  *
  * This function is responsible for destroy a Scatter/Gather list
  * that create by storvsc_create_bounce_buffer()
  *
  * @param sgl- the Scatter/Gather need be destroy
  * @param sg_count- page count of the SG list.
  *
  */
 static void
 storvsc_destroy_bounce_buffer(struct sglist *sgl)
 {
 	struct hv_sgl_node *sgl_node = NULL;
 	if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
 		printf("storvsc error: not enough in use sgl\n");
 		return;
 	}
 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
 	LIST_REMOVE(sgl_node, link);
 	sgl_node->sgl_data = sgl;
 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
 }
 
 /**
  * @brief create bounce buffer
  *
  * This function is responsible for create a Scatter/Gather list,
  * which hold several pages that can be aligned with page size.
  *
  * @param seg_count- SG-list segments count
  * @param write - if WRITE_TYPE, set SG list page used size to 0,
  * otherwise set used size to page size.
  *
  * return NULL if create failed
  */
 static struct sglist *
 storvsc_create_bounce_buffer(uint16_t seg_count, int write)
 {
 	int i = 0;
 	struct sglist *bounce_sgl = NULL;
 	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
 	struct hv_sgl_node *sgl_node = NULL;	
 
 	/* get struct sglist from free_sgl_list */
 	if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
 		printf("storvsc error: not enough free sgl\n");
 		return NULL;
 	}
 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
 	LIST_REMOVE(sgl_node, link);
 	bounce_sgl = sgl_node->sgl_data;
 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
 
 	bounce_sgl->sg_maxseg = seg_count;
 
 	if (write == WRITE_TYPE)
 		bounce_sgl->sg_nseg = 0;
 	else
 		bounce_sgl->sg_nseg = seg_count;
 
 	for (i = 0; i < seg_count; i++)
 	        bounce_sgl->sg_segs[i].ss_len = buf_len;
 
 	return bounce_sgl;
 }
 
 /**
  * @brief copy data from SG list to bounce buffer
  *
  * This function is responsible for copy data from one SG list's segments
  * to another SG list which used as bounce buffer.
  *
  * @param bounce_sgl - the destination SG list
  * @param orig_sgl - the segment of the source SG list.
  * @param orig_sgl_count - the count of segments.
  * @param orig_sgl_count - indicate which segment need bounce buffer,
  *  set 1 means need.
  *
  */
 static void
 storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
 			       bus_dma_segment_t *orig_sgl,
 			       unsigned int orig_sgl_count,
 			       uint64_t seg_bits)
 {
 	int src_sgl_idx = 0;
 
 	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
 		if (seg_bits & (1 << src_sgl_idx)) {
 			memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
 			    (void*)orig_sgl[src_sgl_idx].ds_addr,
 			    orig_sgl[src_sgl_idx].ds_len);
 
 			bounce_sgl->sg_segs[src_sgl_idx].ss_len =
 			    orig_sgl[src_sgl_idx].ds_len;
 		}
 	}
 }
 
 /**
  * @brief copy data from SG list which used as bounce to another SG list
  *
  * This function is responsible for copy data from one SG list with bounce
  * buffer to another SG list's segments.
  *
  * @param dest_sgl - the destination SG list's segments
  * @param dest_sgl_count - the count of destination SG list's segment.
  * @param src_sgl - the source SG list.
  * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
  *
  */
 void
 storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
 				    unsigned int dest_sgl_count,
 				    struct sglist* src_sgl,
 				    uint64_t seg_bits)
 {
 	int sgl_idx = 0;
 	
 	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
 		if (seg_bits & (1 << sgl_idx)) {
 			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
 			    (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
 			    src_sgl->sg_segs[sgl_idx].ss_len);
 		}
 	}
 }
 
 /**
  * @brief check SG list with bounce buffer or not
  *
  * This function is responsible for check if need bounce buffer for SG list.
  *
  * @param sgl - the SG list's segments
  * @param sg_count - the count of SG list's segment.
  * @param bits - segmengs number that need bounce buffer
  *
  * return -1 if SG list needless bounce buffer
  */
 static int
 storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
 				unsigned int sg_count,
 				uint64_t *bits)
 {
 	int i = 0;
 	int offset = 0;
 	uint64_t phys_addr = 0;
 	uint64_t tmp_bits = 0;
 	boolean_t found_hole = FALSE;
 	boolean_t pre_aligned = TRUE;
 
 	if (sg_count < 2){
 		return -1;
 	}
 
 	*bits = 0;
 	
 	phys_addr = vtophys(sgl[0].ds_addr);
 	offset =  phys_addr - trunc_page(phys_addr);
 
 	if (offset != 0) {
 		pre_aligned = FALSE;
 		tmp_bits |= 1;
 	}
 
 	for (i = 1; i < sg_count; i++) {
 		phys_addr = vtophys(sgl[i].ds_addr);
 		offset =  phys_addr - trunc_page(phys_addr);
 
 		if (offset == 0) {
 			if (FALSE == pre_aligned){
 				/*
 				 * This segment is aligned, if the previous
 				 * one is not aligned, find a hole
 				 */
 				found_hole = TRUE;
 			}
 			pre_aligned = TRUE;
 		} else {
 			tmp_bits |= 1 << i;
 			if (!pre_aligned) {
 				if (phys_addr != vtophys(sgl[i-1].ds_addr +
 				    sgl[i-1].ds_len)) {
 					/*
 					 * Check whether connect to previous
 					 * segment,if not, find the hole
 					 */
 					found_hole = TRUE;
 				}
 			} else {
 				found_hole = TRUE;
 			}
 			pre_aligned = FALSE;
 		}
 	}
 
 	if (!found_hole) {
 		return (-1);
 	} else {
 		*bits = tmp_bits;
 		return 0;
 	}
 }
 
 /**
  * @brief Fill in a request structure based on a CAM control block
  *
  * Fills in a request structure based on the contents of a CAM control
  * block.  The request structure holds the payload information for
  * VSCSI protocol request.
  *
  * @param ccb pointer to a CAM contorl block
  * @param reqp pointer to a request structure
  */
 static int
 create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
 {
 	struct ccb_scsiio *csio = &ccb->csio;
 	uint64_t phys_addr;
 	uint32_t bytes_to_copy = 0;
 	uint32_t pfn_num = 0;
 	uint32_t pfn;
 	uint64_t not_aligned_seg_bits = 0;
 	
 	/* refer to struct vmscsi_req for meanings of these two fields */
 	reqp->vstor_packet.u.vm_srb.port =
 		cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
 	reqp->vstor_packet.u.vm_srb.path_id =
 		cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
 
 	reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
 	reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
 
 	reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
 	if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
 			csio->cdb_len);
 	} else {
 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
 			csio->cdb_len);
 	}
 
 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
 	case CAM_DIR_OUT:
 		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;	
 		break;
 	case CAM_DIR_IN:
 		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
 		break;
 	case CAM_DIR_NONE:
 		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
 		break;
 	default:
 		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
 		break;
 	}
 
 	reqp->sense_data     = &csio->sense_data;
 	reqp->sense_info_len = csio->sense_len;
 
 	reqp->ccb = ccb;
 
 	if (0 == csio->dxfer_len) {
 		return (0);
 	}
 
 	reqp->data_buf.length = csio->dxfer_len;
 
 	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
 	case CAM_DATA_VADDR:
 	{
 		bytes_to_copy = csio->dxfer_len;
 		phys_addr = vtophys(csio->data_ptr);
 		reqp->data_buf.offset = phys_addr & PAGE_MASK;
 		
 		while (bytes_to_copy != 0) {
 			int bytes, page_offset;
 			phys_addr =
 			    vtophys(&csio->data_ptr[reqp->data_buf.length -
 			    bytes_to_copy]);
 			pfn = phys_addr >> PAGE_SHIFT;
 			reqp->data_buf.pfn_array[pfn_num] = pfn;
 			page_offset = phys_addr & PAGE_MASK;
 
 			bytes = min(PAGE_SIZE - page_offset, bytes_to_copy);
 
 			bytes_to_copy -= bytes;
 			pfn_num++;
 		}
 		break;
 	}
 
 	case CAM_DATA_SG:
 	{
 		int i = 0;
 		int offset = 0;
 		int ret;
 
 		bus_dma_segment_t *storvsc_sglist =
 		    (bus_dma_segment_t *)ccb->csio.data_ptr;
 		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
 
 		printf("Storvsc: get SG I/O operation, %d\n",
 		    reqp->vstor_packet.u.vm_srb.data_in);
 
 		if (storvsc_sg_count > HV_MAX_MULTIPAGE_BUFFER_COUNT){
 			printf("Storvsc: %d segments is too much, "
 			    "only support %d segments\n",
 			    storvsc_sg_count, HV_MAX_MULTIPAGE_BUFFER_COUNT);
 			return (EINVAL);
 		}
 
 		/*
 		 * We create our own bounce buffer function currently. Idealy
 		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
 		 * code there is no callback API to check the page alignment of
 		 * middle segments before busdma can decide if a bounce buffer
 		 * is needed for particular segment. There is callback,
 		 * "bus_dma_filter_t *filter", but the parrameters are not
 		 * sufficient for storvsc driver.
 		 * TODO:
 		 *	Add page alignment check in BUS_DMA(9) callback. Once
 		 *	this is complete, switch the following code to use
 		 *	BUS_DMA(9) for storvsc bounce buffer support.
 		 */
 		/* check if we need to create bounce buffer */
 		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
 		    storvsc_sg_count, &not_aligned_seg_bits);
 		if (ret != -1) {
 			reqp->bounce_sgl =
 			    storvsc_create_bounce_buffer(storvsc_sg_count,
 			    reqp->vstor_packet.u.vm_srb.data_in);
 			if (NULL == reqp->bounce_sgl) {
 				printf("Storvsc_error: "
 				    "create bounce buffer failed.\n");
 				return (ENOMEM);
 			}
 
 			reqp->bounce_sgl_count = storvsc_sg_count;
 			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
 
 			/*
 			 * if it is write, we need copy the original data
 			 *to bounce buffer
 			 */
 			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
 				storvsc_copy_sgl_to_bounce_buf(
 				    reqp->bounce_sgl,
 				    storvsc_sglist,
 				    storvsc_sg_count,
 				    reqp->not_aligned_seg_bits);
 			}
 
 			/* transfer virtual address to physical frame number */
 			if (reqp->not_aligned_seg_bits & 0x1){
  				phys_addr =
 				    vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
 			}else{
  				phys_addr =
 					vtophys(storvsc_sglist[0].ds_addr);
 			}
 			reqp->data_buf.offset = phys_addr & PAGE_MASK;
 
 			pfn = phys_addr >> PAGE_SHIFT;
 			reqp->data_buf.pfn_array[0] = pfn;
 			
 			for (i = 1; i < storvsc_sg_count; i++) {
 				if (reqp->not_aligned_seg_bits & (1 << i)) {
 					phys_addr =
 					    vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
 				} else {
 					phys_addr =
 					    vtophys(storvsc_sglist[i].ds_addr);
 				}
 
 				pfn = phys_addr >> PAGE_SHIFT;
 				reqp->data_buf.pfn_array[i] = pfn;
 			}
 		} else {
 			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
 
 			reqp->data_buf.offset = phys_addr & PAGE_MASK;
 
 			for (i = 0; i < storvsc_sg_count; i++) {
 				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
 				pfn = phys_addr >> PAGE_SHIFT;
 				reqp->data_buf.pfn_array[i] = pfn;
 			}
 
 			/* check the last segment cross boundary or not */
 			offset = phys_addr & PAGE_MASK;
 			if (offset) {
 				phys_addr =
 				    vtophys(storvsc_sglist[i-1].ds_addr +
 				    PAGE_SIZE - offset);
 				pfn = phys_addr >> PAGE_SHIFT;
 				reqp->data_buf.pfn_array[i] = pfn;
 			}
 			
 			reqp->bounce_sgl_count = 0;
 		}
 		break;
 	}
 	default:
 		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
 		return(EINVAL);
 	}
 
 	return(0);
 }
 
 /*
  * Modified based on scsi_print_inquiry which is responsible to
  * print the detail information for scsi_inquiry_data.
  *
  * Return 1 if it is valid, 0 otherwise.
  */
 static inline int
 is_inquiry_valid(const struct scsi_inquiry_data *inq_data)
 {
 	uint8_t type;
 	char vendor[16], product[48], revision[16];
 
 	/*
 	 * Check device type and qualifier
 	 */
 	if (!(SID_QUAL_IS_VENDOR_UNIQUE(inq_data) ||
 	    SID_QUAL(inq_data) == SID_QUAL_LU_CONNECTED))
 		return (0);
 
 	type = SID_TYPE(inq_data);
 	switch (type) {
 	case T_DIRECT:
 	case T_SEQUENTIAL:
 	case T_PRINTER:
 	case T_PROCESSOR:
 	case T_WORM:
 	case T_CDROM:
 	case T_SCANNER:
 	case T_OPTICAL:
 	case T_CHANGER:
 	case T_COMM:
 	case T_STORARRAY:
 	case T_ENCLOSURE:
 	case T_RBC:
 	case T_OCRW:
 	case T_OSD:
 	case T_ADC:
 		break;
 	case T_NODEVICE:
 	default:
 		return (0);
 	}
 
 	/*
 	 * Check vendor, product, and revision
 	 */
 	cam_strvis(vendor, inq_data->vendor, sizeof(inq_data->vendor),
 	    sizeof(vendor));
 	cam_strvis(product, inq_data->product, sizeof(inq_data->product),
 	    sizeof(product));
 	cam_strvis(revision, inq_data->revision, sizeof(inq_data->revision),
 	    sizeof(revision));
 	if (strlen(vendor) == 0  ||
 	    strlen(product) == 0 ||
 	    strlen(revision) == 0)
 		return (0);
 
 	return (1);
 }
 
 /**
  * @brief completion function before returning to CAM
  *
  * I/O process has been completed and the result needs
  * to be passed to the CAM layer.
  * Free resources related to this request.
  *
  * @param reqp pointer to a request structure
  */
 static void
 storvsc_io_done(struct hv_storvsc_request *reqp)
 {
 	union ccb *ccb = reqp->ccb;
 	struct ccb_scsiio *csio = &ccb->csio;
 	struct storvsc_softc *sc = reqp->softc;
 	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
 	bus_dma_segment_t *ori_sglist = NULL;
 	int ori_sg_count = 0;
 
 	/* destroy bounce buffer if it is used */
 	if (reqp->bounce_sgl_count) {
 		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
 		ori_sg_count = ccb->csio.sglist_cnt;
 
 		/*
 		 * If it is READ operation, we should copy back the data
 		 * to original SG list.
 		 */
 		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
 			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
 			    ori_sg_count,
 			    reqp->bounce_sgl,
 			    reqp->not_aligned_seg_bits);
 		}
 
 		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
 		reqp->bounce_sgl_count = 0;
 	}
 		
 	if (reqp->retries > 0) {
 		mtx_lock(&sc->hs_lock);
 #if HVS_TIMEOUT_TEST
 		xpt_print(ccb->ccb_h.path,
 			"%u: IO returned after timeout, "
 			"waking up timer handler if any.\n", ticks);
 		mtx_lock(&reqp->event.mtx);
 		cv_signal(&reqp->event.cv);
 		mtx_unlock(&reqp->event.mtx);
 #endif
 		reqp->retries = 0;
 		xpt_print(ccb->ccb_h.path,
 			"%u: IO returned after timeout, "
 			"stopping timer if any.\n", ticks);
 		mtx_unlock(&sc->hs_lock);
 	}
 
 	/*
 	 * callout_drain() will wait for the timer handler to finish
 	 * if it is running. So we don't need any lock to synchronize
 	 * between this routine and the timer handler.
 	 * Note that we need to make sure reqp is not freed when timer
 	 * handler is using or will use it.
 	 */
 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
 		callout_drain(&reqp->callout);
 	}
 
 	ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
 		const struct scsi_generic *cmd;
 
 		/*
 		 * Check whether the data for INQUIRY cmd is valid or
 		 * not.  Windows 10 and Windows 2016 send all zero
 		 * inquiry data to VM even for unpopulated slots.
 		 */
 		cmd = (const struct scsi_generic *)
 		    ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
 		     csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
 		if (cmd->opcode == INQUIRY &&
 		    is_inquiry_valid(
 		    (const struct scsi_inquiry_data *)csio->data_ptr) == 0) {
 			ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
 			if (bootverbose) {
 				mtx_lock(&sc->hs_lock);
 				xpt_print(ccb->ccb_h.path,
 				    "storvsc uninstalled device\n");
 				mtx_unlock(&sc->hs_lock);
 			}
 		} else {
 			ccb->ccb_h.status |= CAM_REQ_CMP;
 		}
 	} else {
 		mtx_lock(&sc->hs_lock);
 		xpt_print(ccb->ccb_h.path,
 			"storvsc scsi_status = %d\n",
 			vm_srb->scsi_status);
 		mtx_unlock(&sc->hs_lock);
 		ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
 	}
 
 	ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
 	ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
 
 	if (reqp->sense_info_len != 0) {
 		csio->sense_resid = csio->sense_len - reqp->sense_info_len;
 		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
 	}
 
 	mtx_lock(&sc->hs_lock);
 	if (reqp->softc->hs_frozen == 1) {
 		xpt_print(ccb->ccb_h.path,
 			"%u: storvsc unfreezing softc 0x%p.\n",
 			ticks, reqp->softc);
 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
 		reqp->softc->hs_frozen = 0;
 	}
 	storvsc_free_request(sc, reqp);
 	xpt_done(ccb);
 	mtx_unlock(&sc->hs_lock);
 }
 
 /**
  * @brief Free a request structure
  *
  * Free a request structure by returning it to the free list
  *
  * @param sc pointer to a softc
  * @param reqp pointer to a request structure
  */	
 static void
 storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
 {
 
 	LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
 }
 
 /**
  * @brief Determine type of storage device from GUID
  *
  * Using the type GUID, determine if this is a StorVSC (paravirtual
  * SCSI or BlkVSC (paravirtual IDE) device.
  *
  * @param dev a device
  * returns an enum
  */
 static enum hv_storage_type
 storvsc_get_storage_type(device_t dev)
 {
 	const char *p = vmbus_get_type(dev);
 
 	if (!memcmp(p, &gBlkVscDeviceType, sizeof(hv_guid))) {
 		return DRIVER_BLKVSC;
 	} else if (!memcmp(p, &gStorVscDeviceType, sizeof(hv_guid))) {
 		return DRIVER_STORVSC;
 	}
 	return (DRIVER_UNKNOWN);
 }
 
Index: head/sys/dev/hyperv/vmbus/hv_channel.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_channel.c	(revision 296082)
+++ head/sys/dev/hyperv/vmbus/hv_channel.c	(revision 296083)
@@ -1,956 +1,943 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <machine/bus.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include "hv_vmbus_priv.h"
 
 static int 	vmbus_channel_create_gpadl_header(
 			/* must be phys and virt contiguous*/
 			void*				contig_buffer,
 			/* page-size multiple */
 			uint32_t 			size,
 			hv_vmbus_channel_msg_info**	msg_info,
 			uint32_t*			message_count);
 
 static void 	vmbus_channel_set_event(hv_vmbus_channel* channel);
 static void	VmbusProcessChannelEvent(void* channel, int pending);
 
 /**
  *  @brief Trigger an event notification on the specified channel
  */
 static void
 vmbus_channel_set_event(hv_vmbus_channel *channel)
 {
 	hv_vmbus_monitor_page *monitor_page;
 
 	if (channel->offer_msg.monitor_allocated) {
 		/* Each uint32_t represents 32 channels */
 		synch_set_bit((channel->offer_msg.child_rel_id & 31),
 			((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
 				+ ((channel->offer_msg.child_rel_id >> 5))));
 
 		monitor_page = (hv_vmbus_monitor_page *)
 			hv_vmbus_g_connection.monitor_page_2;
 
 		synch_set_bit(channel->monitor_bit,
 			(uint32_t *)&monitor_page->
 				trigger_group[channel->monitor_group].u.pending);
 	} else {
 		hv_vmbus_set_event(channel);
 	}
 
 }
 
 /**
  * @brief Open the specified channel
  */
 int
 hv_vmbus_channel_open(
 	hv_vmbus_channel*		new_channel,
 	uint32_t			send_ring_buffer_size,
 	uint32_t			recv_ring_buffer_size,
 	void*				user_data,
 	uint32_t			user_data_len,
 	hv_vmbus_pfn_channel_callback	pfn_on_channel_callback,
 	void* 				context)
 {
 
 	int ret = 0;
 	void *in, *out;
 	hv_vmbus_channel_open_channel*	open_msg;
 	hv_vmbus_channel_msg_info* 	open_info;
 
 	mtx_lock(&new_channel->sc_lock);
 	if (new_channel->state == HV_CHANNEL_OPEN_STATE) {
 	    new_channel->state = HV_CHANNEL_OPENING_STATE;
 	} else {
 	    mtx_unlock(&new_channel->sc_lock);
 	    if(bootverbose)
 		printf("VMBUS: Trying to open channel <%p> which in "
 		    "%d state.\n", new_channel, new_channel->state);
 	    return (EINVAL);
 	}
 	mtx_unlock(&new_channel->sc_lock);
 
 	new_channel->on_channel_callback = pfn_on_channel_callback;
 	new_channel->channel_callback_context = context;
 
 	new_channel->rxq = hv_vmbus_g_context.hv_event_queue[new_channel->target_cpu];
 	TASK_INIT(&new_channel->channel_task, 0, VmbusProcessChannelEvent, new_channel);
 
 	/* Allocate the ring buffer */
 	out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size),
 	    M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
 	KASSERT(out != NULL,
 	    ("Error VMBUS: contigmalloc failed to allocate Ring Buffer!"));
 	if (out == NULL)
 		return (ENOMEM);
 
 	in = ((uint8_t *) out + send_ring_buffer_size);
 
 	new_channel->ring_buffer_pages = out;
 	new_channel->ring_buffer_page_count = (send_ring_buffer_size +
 	    recv_ring_buffer_size) >> PAGE_SHIFT;
 	new_channel->ring_buffer_size = send_ring_buffer_size +
 	    recv_ring_buffer_size;
 
 	hv_vmbus_ring_buffer_init(
 		&new_channel->outbound,
 		out,
 		send_ring_buffer_size);
 
 	hv_vmbus_ring_buffer_init(
 		&new_channel->inbound,
 		in,
 		recv_ring_buffer_size);
 
 	/**
 	 * Establish the gpadl for the ring buffer
 	 */
 	new_channel->ring_buffer_gpadl_handle = 0;
 
 	ret = hv_vmbus_channel_establish_gpadl(new_channel,
 		new_channel->outbound.ring_buffer,
 		send_ring_buffer_size + recv_ring_buffer_size,
 		&new_channel->ring_buffer_gpadl_handle);
 
 	/**
 	 * Create and init the channel open message
 	 */
 	open_info = (hv_vmbus_channel_msg_info*) malloc(
 		sizeof(hv_vmbus_channel_msg_info) +
 			sizeof(hv_vmbus_channel_open_channel),
 		M_DEVBUF,
 		M_NOWAIT);
 	KASSERT(open_info != NULL,
 	    ("Error VMBUS: malloc failed to allocate Open Channel message!"));
 
 	if (open_info == NULL)
 		return (ENOMEM);
 
 	sema_init(&open_info->wait_sema, 0, "Open Info Sema");
 
 	open_msg = (hv_vmbus_channel_open_channel*) open_info->msg;
 	open_msg->header.message_type = HV_CHANNEL_MESSAGE_OPEN_CHANNEL;
 	open_msg->open_id = new_channel->offer_msg.child_rel_id;
 	open_msg->child_rel_id = new_channel->offer_msg.child_rel_id;
 	open_msg->ring_buffer_gpadl_handle =
 		new_channel->ring_buffer_gpadl_handle;
 	open_msg->downstream_ring_buffer_page_offset = send_ring_buffer_size
 		>> PAGE_SHIFT;
 	open_msg->target_vcpu = new_channel->target_vcpu;
 
 	if (user_data_len)
 		memcpy(open_msg->user_data, user_data, user_data_len);
 
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_INSERT_TAIL(
 		&hv_vmbus_g_connection.channel_msg_anchor,
 		open_info,
 		msg_list_entry);
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 	ret = hv_vmbus_post_message(
 		open_msg, sizeof(hv_vmbus_channel_open_channel));
 
 	if (ret != 0)
 	    goto cleanup;
 
 	ret = sema_timedwait(&open_info->wait_sema, 5 * hz); /* KYS 5 seconds */
 
 	if (ret) {
 	    if(bootverbose)
 		printf("VMBUS: channel <%p> open timeout.\n", new_channel);
 	    goto cleanup;
 	}
 
 	if (open_info->response.open_result.status == 0) {
 	    new_channel->state = HV_CHANNEL_OPENED_STATE;
 	    if(bootverbose)
 		printf("VMBUS: channel <%p> open success.\n", new_channel);
 	} else {
 	    if(bootverbose)
 		printf("Error VMBUS: channel <%p> open failed - %d!\n",
 			new_channel, open_info->response.open_result.status);
 	}
 
 	cleanup:
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_REMOVE(
 		&hv_vmbus_g_connection.channel_msg_anchor,
 		open_info,
 		msg_list_entry);
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	sema_destroy(&open_info->wait_sema);
 	free(open_info, M_DEVBUF);
 
 	return (ret);
 }
 
 /**
  * @brief Create a gpadl for the specified buffer
  */
 static int
 vmbus_channel_create_gpadl_header(
 	void*				contig_buffer,
 	uint32_t			size,	/* page-size multiple */
 	hv_vmbus_channel_msg_info**	msg_info,
 	uint32_t*			message_count)
 {
 	int				i;
 	int				page_count;
 	unsigned long long 		pfn;
 	uint32_t			msg_size;
 	hv_vmbus_channel_gpadl_header*	gpa_header;
 	hv_vmbus_channel_gpadl_body*	gpadl_body;
 	hv_vmbus_channel_msg_info*	msg_header;
 	hv_vmbus_channel_msg_info*	msg_body;
 
 	int pfnSum, pfnCount, pfnLeft, pfnCurr, pfnSize;
 
 	page_count = size >> PAGE_SHIFT;
 	pfn = hv_get_phys_addr(contig_buffer) >> PAGE_SHIFT;
 
 	/*do we need a gpadl body msg */
 	pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE
 	    - sizeof(hv_vmbus_channel_gpadl_header)
 	    - sizeof(hv_gpa_range);
 	pfnCount = pfnSize / sizeof(uint64_t);
 
 	if (page_count > pfnCount) { /* if(we need a gpadl body)	*/
 	    /* fill in the header		*/
 	    msg_size = sizeof(hv_vmbus_channel_msg_info)
 		+ sizeof(hv_vmbus_channel_gpadl_header)
 		+ sizeof(hv_gpa_range)
 		+ pfnCount * sizeof(uint64_t);
 	    msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
 	    KASSERT(
 		msg_header != NULL,
 		("Error VMBUS: malloc failed to allocate Gpadl Message!"));
 	    if (msg_header == NULL)
 		return (ENOMEM);
 
 	    TAILQ_INIT(&msg_header->sub_msg_list_anchor);
 	    msg_header->message_size = msg_size;
 
 	    gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg;
 	    gpa_header->range_count = 1;
 	    gpa_header->range_buf_len = sizeof(hv_gpa_range)
 		+ page_count * sizeof(uint64_t);
 	    gpa_header->range[0].byte_offset = 0;
 	    gpa_header->range[0].byte_count = size;
 	    for (i = 0; i < pfnCount; i++) {
 		gpa_header->range[0].pfn_array[i] = pfn + i;
 	    }
 	    *msg_info = msg_header;
 	    *message_count = 1;
 
 	    pfnSum = pfnCount;
 	    pfnLeft = page_count - pfnCount;
 
 	    /*
 	     *  figure out how many pfns we can fit
 	     */
 	    pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE
 		- sizeof(hv_vmbus_channel_gpadl_body);
 	    pfnCount = pfnSize / sizeof(uint64_t);
 
 	    /*
 	     * fill in the body
 	     */
 	    while (pfnLeft) {
 		if (pfnLeft > pfnCount) {
 		    pfnCurr = pfnCount;
 		} else {
 		    pfnCurr = pfnLeft;
 		}
 
 		msg_size = sizeof(hv_vmbus_channel_msg_info) +
 		    sizeof(hv_vmbus_channel_gpadl_body) +
 		    pfnCurr * sizeof(uint64_t);
 		msg_body = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
 		KASSERT(
 		    msg_body != NULL,
 		    ("Error VMBUS: malloc failed to allocate Gpadl msg_body!"));
 		if (msg_body == NULL)
 		    return (ENOMEM);
 
 		msg_body->message_size = msg_size;
 		(*message_count)++;
 		gpadl_body =
 		    (hv_vmbus_channel_gpadl_body*) msg_body->msg;
 		/*
 		 * gpadl_body->gpadl = kbuffer;
 		 */
 		for (i = 0; i < pfnCurr; i++) {
 		    gpadl_body->pfn[i] = pfn + pfnSum + i;
 		}
 
 		TAILQ_INSERT_TAIL(
 		    &msg_header->sub_msg_list_anchor,
 		    msg_body,
 		    msg_list_entry);
 		pfnSum += pfnCurr;
 		pfnLeft -= pfnCurr;
 	    }
 	} else { /* else everything fits in a header */
 
 	    msg_size = sizeof(hv_vmbus_channel_msg_info) +
 		sizeof(hv_vmbus_channel_gpadl_header) +
 		sizeof(hv_gpa_range) +
 		page_count * sizeof(uint64_t);
 	    msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
 	    KASSERT(
 		msg_header != NULL,
 		("Error VMBUS: malloc failed to allocate Gpadl Message!"));
 	    if (msg_header == NULL)
 		return (ENOMEM);
 
 	    msg_header->message_size = msg_size;
 
 	    gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg;
 	    gpa_header->range_count = 1;
 	    gpa_header->range_buf_len = sizeof(hv_gpa_range) +
 		page_count * sizeof(uint64_t);
 	    gpa_header->range[0].byte_offset = 0;
 	    gpa_header->range[0].byte_count = size;
 	    for (i = 0; i < page_count; i++) {
 		gpa_header->range[0].pfn_array[i] = pfn + i;
 	    }
 
 	    *msg_info = msg_header;
 	    *message_count = 1;
 	}
 
 	return (0);
 }
 
 /**
  * @brief Establish a GPADL for the specified buffer
  */
 int
 hv_vmbus_channel_establish_gpadl(
 	hv_vmbus_channel*	channel,
 	void*			contig_buffer,
 	uint32_t		size, /* page-size multiple */
 	uint32_t*		gpadl_handle)
 
 {
 	int ret = 0;
 	hv_vmbus_channel_gpadl_header*	gpadl_msg;
 	hv_vmbus_channel_gpadl_body*	gpadl_body;
 	hv_vmbus_channel_msg_info*	msg_info;
 	hv_vmbus_channel_msg_info*	sub_msg_info;
 	uint32_t			msg_count;
 	hv_vmbus_channel_msg_info*	curr;
 	uint32_t			next_gpadl_handle;
 
 	next_gpadl_handle = atomic_fetchadd_int(
 	    &hv_vmbus_g_connection.next_gpadl_handle, 1);
 
 	ret = vmbus_channel_create_gpadl_header(
 		contig_buffer, size, &msg_info, &msg_count);
 
 	if(ret != 0) {
 		/*
 		 * XXX
 		 * We can _not_ even revert the above incremental,
 		 * if multiple GPADL establishments are running
 		 * parallelly, decrement the global next_gpadl_handle
 		 * is calling for _big_ trouble.  A better solution
 		 * is to have a 0-based GPADL id bitmap ...
 		 */
 		return ret;
 	}
 
 	sema_init(&msg_info->wait_sema, 0, "Open Info Sema");
 	gpadl_msg = (hv_vmbus_channel_gpadl_header*) msg_info->msg;
 	gpadl_msg->header.message_type = HV_CHANNEL_MESSAGEL_GPADL_HEADER;
 	gpadl_msg->child_rel_id = channel->offer_msg.child_rel_id;
 	gpadl_msg->gpadl = next_gpadl_handle;
 
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_INSERT_TAIL(
 		&hv_vmbus_g_connection.channel_msg_anchor,
 		msg_info,
 		msg_list_entry);
 
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 	ret = hv_vmbus_post_message(
 		gpadl_msg,
 		msg_info->message_size -
 		    (uint32_t) sizeof(hv_vmbus_channel_msg_info));
 
 	if (ret != 0)
 	    goto cleanup;
 
 	if (msg_count > 1) {
 	    TAILQ_FOREACH(curr,
 		    &msg_info->sub_msg_list_anchor, msg_list_entry) {
 		sub_msg_info = curr;
 		gpadl_body =
 		    (hv_vmbus_channel_gpadl_body*) sub_msg_info->msg;
 
 		gpadl_body->header.message_type =
 		    HV_CHANNEL_MESSAGE_GPADL_BODY;
 		gpadl_body->gpadl = next_gpadl_handle;
 
 		ret = hv_vmbus_post_message(
 			gpadl_body,
 			sub_msg_info->message_size
 			    - (uint32_t) sizeof(hv_vmbus_channel_msg_info));
 		 /* if (the post message failed) give up and clean up */
 		if(ret != 0)
 		    goto cleanup;
 	    }
 	}
 
 	ret = sema_timedwait(&msg_info->wait_sema, 5 * hz); /* KYS 5 seconds*/
 	if (ret != 0)
 	    goto cleanup;
 
 	*gpadl_handle = gpadl_msg->gpadl;
 
 cleanup:
 
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
 		msg_info, msg_list_entry);
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 	sema_destroy(&msg_info->wait_sema);
 	free(msg_info, M_DEVBUF);
 
 	return (ret);
 }
 
 /**
  * @brief Teardown the specified GPADL handle
  */
 int
 hv_vmbus_channel_teardown_gpdal(
 	hv_vmbus_channel*	channel,
 	uint32_t		gpadl_handle)
 {
 	int					ret = 0;
 	hv_vmbus_channel_gpadl_teardown*	msg;
 	hv_vmbus_channel_msg_info*		info;
 
 	info = (hv_vmbus_channel_msg_info *)
 		malloc(	sizeof(hv_vmbus_channel_msg_info) +
 			sizeof(hv_vmbus_channel_gpadl_teardown),
 				M_DEVBUF, M_NOWAIT);
 	KASSERT(info != NULL,
 	    ("Error VMBUS: malloc failed to allocate Gpadl Teardown Msg!"));
 	if (info == NULL) {
 	    ret = ENOMEM;
 	    goto cleanup;
 	}
 
 	sema_init(&info->wait_sema, 0, "Open Info Sema");
 
 	msg = (hv_vmbus_channel_gpadl_teardown*) info->msg;
 
 	msg->header.message_type = HV_CHANNEL_MESSAGE_GPADL_TEARDOWN;
 	msg->child_rel_id = channel->offer_msg.child_rel_id;
 	msg->gpadl = gpadl_handle;
 
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_msg_anchor,
 			info, msg_list_entry);
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 	ret = hv_vmbus_post_message(msg,
 			sizeof(hv_vmbus_channel_gpadl_teardown));
 	if (ret != 0) 
 	    goto cleanup;
 	
 	ret = sema_timedwait(&info->wait_sema, 5 * hz); /* KYS 5 seconds */
 
 cleanup:
 	/*
 	 * Received a torndown response
 	 */
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
 			info, msg_list_entry);
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	sema_destroy(&info->wait_sema);
 	free(info, M_DEVBUF);
 
 	return (ret);
 }
 
 static void
 hv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
 {
 	int ret = 0;
 	struct taskqueue *rxq = channel->rxq;
 	hv_vmbus_channel_close_channel* msg;
 	hv_vmbus_channel_msg_info* info;
 
 	channel->state = HV_CHANNEL_OPEN_STATE;
 	channel->sc_creation_callback = NULL;
 
 	/*
 	 * set rxq to NULL to avoid more requests be scheduled
 	 */
 	channel->rxq = NULL;
 	taskqueue_drain(rxq, &channel->channel_task);
-	/*
-	 * Grab the lock to prevent race condition when a packet received
-	 * and unloading driver is in the process.
-	 */
-	mtx_lock(&channel->inbound_lock);
 	channel->on_channel_callback = NULL;
-	mtx_unlock(&channel->inbound_lock);
 
 	/**
 	 * Send a closing message
 	 */
 	info = (hv_vmbus_channel_msg_info *)
 		malloc(	sizeof(hv_vmbus_channel_msg_info) +
 			sizeof(hv_vmbus_channel_close_channel),
 				M_DEVBUF, M_NOWAIT);
 	KASSERT(info != NULL, ("VMBUS: malloc failed hv_vmbus_channel_close!"));
 	if(info == NULL)
 	    return;
 
 	msg = (hv_vmbus_channel_close_channel*) info->msg;
 	msg->header.message_type = HV_CHANNEL_MESSAGE_CLOSE_CHANNEL;
 	msg->child_rel_id = channel->offer_msg.child_rel_id;
 
 	ret = hv_vmbus_post_message(
 		msg, sizeof(hv_vmbus_channel_close_channel));
 
 	/* Tear down the gpadl for the channel's ring buffer */
 	if (channel->ring_buffer_gpadl_handle) {
 		hv_vmbus_channel_teardown_gpdal(channel,
 			channel->ring_buffer_gpadl_handle);
 	}
 
 	/* TODO: Send a msg to release the childRelId */
 
 	/* cleanup the ring buffers for this channel */
 	hv_ring_buffer_cleanup(&channel->outbound);
 	hv_ring_buffer_cleanup(&channel->inbound);
 
 	contigfree(channel->ring_buffer_pages, channel->ring_buffer_size,
 	    M_DEVBUF);
 
 	free(info, M_DEVBUF);
 }
 
 /**
  * @brief Close the specified channel
  */
 void
 hv_vmbus_channel_close(hv_vmbus_channel *channel)
 {
 	hv_vmbus_channel*	sub_channel;
 
 	if (channel->primary_channel != NULL) {
 		/*
 		 * We only close multi-channels when the primary is
 		 * closed.
 		 */
 		return;
 	}
 
 	/*
 	 * Close all multi-channels first.
 	 */
 	TAILQ_FOREACH(sub_channel, &channel->sc_list_anchor,
 	    sc_list_entry) {
 		if (sub_channel->state != HV_CHANNEL_OPENED_STATE)
 			continue;
 		hv_vmbus_channel_close_internal(sub_channel);
 	}
 	/*
 	 * Then close the primary channel.
 	 */
 	hv_vmbus_channel_close_internal(channel);
 }
 
 /**
  * @brief Send the specified buffer on the given channel
  */
 int
 hv_vmbus_channel_send_packet(
 	hv_vmbus_channel*	channel,
 	void*			buffer,
 	uint32_t		buffer_len,
 	uint64_t		request_id,
 	hv_vmbus_packet_type	type,
 	uint32_t		flags)
 {
 	int			ret = 0;
 	hv_vm_packet_descriptor	desc;
 	uint32_t		packet_len;
 	uint64_t		aligned_data;
 	uint32_t		packet_len_aligned;
 	boolean_t		need_sig;
 	hv_vmbus_sg_buffer_list	buffer_list[3];
 
 	packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len;
 	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
 	aligned_data = 0;
 
 	/* Setup the descriptor */
 	desc.type = type;   /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND;             */
 	desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
 			    /* in 8-bytes granularity */
 	desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3;
 	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
 	desc.transaction_id = request_id;
 
 	buffer_list[0].data = &desc;
 	buffer_list[0].length = sizeof(hv_vm_packet_descriptor);
 
 	buffer_list[1].data = buffer;
 	buffer_list[1].length = buffer_len;
 
 	buffer_list[2].data = &aligned_data;
 	buffer_list[2].length = packet_len_aligned - packet_len;
 
 	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
 	    &need_sig);
 
 	/* TODO: We should determine if this is optional */
 	if (ret == 0 && need_sig) {
 		vmbus_channel_set_event(channel);
 	}
 
 	return (ret);
 }
 
 /**
  * @brief Send a range of single-page buffer packets using
  * a GPADL Direct packet type
  */
 int
 hv_vmbus_channel_send_packet_pagebuffer(
 	hv_vmbus_channel*	channel,
 	hv_vmbus_page_buffer	page_buffers[],
 	uint32_t		page_count,
 	void*			buffer,
 	uint32_t		buffer_len,
 	uint64_t		request_id)
 {
 
 	int					ret = 0;
 	boolean_t				need_sig;
 	uint32_t				packet_len;
 	uint32_t				page_buflen;
 	uint32_t				packetLen_aligned;
 	hv_vmbus_sg_buffer_list			buffer_list[4];
 	hv_vmbus_channel_packet_page_buffer	desc;
 	uint32_t				descSize;
 	uint64_t				alignedData = 0;
 
 	if (page_count > HV_MAX_PAGE_BUFFER_COUNT)
 		return (EINVAL);
 
 	/*
 	 * Adjust the size down since hv_vmbus_channel_packet_page_buffer
 	 *  is the largest size we support
 	 */
 	descSize = __offsetof(hv_vmbus_channel_packet_page_buffer, range);
 	page_buflen = sizeof(hv_vmbus_page_buffer) * page_count;
 	packet_len = descSize + page_buflen + buffer_len;
 	packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
 
 	/* Setup the descriptor */
 	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
 	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
 	/* in 8-bytes granularity */
 	desc.data_offset8 = (descSize + page_buflen) >> 3;
 	desc.length8 = (uint16_t) (packetLen_aligned >> 3);
 	desc.transaction_id = request_id;
 	desc.range_count = page_count;
 
 	buffer_list[0].data = &desc;
 	buffer_list[0].length = descSize;
 
 	buffer_list[1].data = page_buffers;
 	buffer_list[1].length = page_buflen;
 
 	buffer_list[2].data = buffer;
 	buffer_list[2].length = buffer_len;
 
 	buffer_list[3].data = &alignedData;
 	buffer_list[3].length = packetLen_aligned - packet_len;
 
 	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 4,
 	    &need_sig);
 
 	/* TODO: We should determine if this is optional */
 	if (ret == 0 && need_sig) {
 		vmbus_channel_set_event(channel);
 	}
 
 	return (ret);
 }
 
 /**
  * @brief Send a multi-page buffer packet using a GPADL Direct packet type
  */
 int
 hv_vmbus_channel_send_packet_multipagebuffer(
 	hv_vmbus_channel*		channel,
 	hv_vmbus_multipage_buffer*	multi_page_buffer,
 	void*				buffer,
 	uint32_t			buffer_len,
 	uint64_t			request_id)
 {
 
 	int			ret = 0;
 	uint32_t		desc_size;
 	boolean_t		need_sig;
 	uint32_t		packet_len;
 	uint32_t		packet_len_aligned;
 	uint32_t		pfn_count;
 	uint64_t		aligned_data = 0;
 	hv_vmbus_sg_buffer_list	buffer_list[3];
 	hv_vmbus_channel_packet_multipage_buffer desc;
 
 	pfn_count =
 	    HV_NUM_PAGES_SPANNED(
 		    multi_page_buffer->offset,
 		    multi_page_buffer->length);
 
 	if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT))
 	    return (EINVAL);
 	/*
 	 * Adjust the size down since hv_vmbus_channel_packet_multipage_buffer
 	 * is the largest size we support
 	 */
 	desc_size =
 	    sizeof(hv_vmbus_channel_packet_multipage_buffer) -
 		    ((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) *
 			sizeof(uint64_t));
 	packet_len = desc_size + buffer_len;
 	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
 
 	/*
 	 * Setup the descriptor
 	 */
 	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
 	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
 	desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */
 	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
 	desc.transaction_id = request_id;
 	desc.range_count = 1;
 
 	desc.range.length = multi_page_buffer->length;
 	desc.range.offset = multi_page_buffer->offset;
 
 	memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array,
 		pfn_count * sizeof(uint64_t));
 
 	buffer_list[0].data = &desc;
 	buffer_list[0].length = desc_size;
 
 	buffer_list[1].data = buffer;
 	buffer_list[1].length = buffer_len;
 
 	buffer_list[2].data = &aligned_data;
 	buffer_list[2].length = packet_len_aligned - packet_len;
 
 	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
 	    &need_sig);
 
 	/* TODO: We should determine if this is optional */
 	if (ret == 0 && need_sig) {
 	    vmbus_channel_set_event(channel);
 	}
 
 	return (ret);
 }
 
 /**
  * @brief Retrieve the user packet on the specified channel
  */
 int
 hv_vmbus_channel_recv_packet(
 	hv_vmbus_channel*	channel,
 	void*			Buffer,
 	uint32_t		buffer_len,
 	uint32_t*		buffer_actual_len,
 	uint64_t*		request_id)
 {
 	int			ret;
 	uint32_t		user_len;
 	uint32_t		packet_len;
 	hv_vm_packet_descriptor	desc;
 
 	*buffer_actual_len = 0;
 	*request_id = 0;
 
 	ret = hv_ring_buffer_peek(&channel->inbound, &desc,
 		sizeof(hv_vm_packet_descriptor));
 	if (ret != 0)
 		return (0);
 
 	packet_len = desc.length8 << 3;
 	user_len = packet_len - (desc.data_offset8 << 3);
 
 	*buffer_actual_len = user_len;
 
 	if (user_len > buffer_len)
 		return (EINVAL);
 
 	*request_id = desc.transaction_id;
 
 	/* Copy over the packet to the user buffer */
 	ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len,
 		(desc.data_offset8 << 3));
 
 	return (0);
 }
 
 /**
  * @brief Retrieve the raw packet on the specified channel
  */
 int
 hv_vmbus_channel_recv_packet_raw(
 	hv_vmbus_channel*	channel,
 	void*			buffer,
 	uint32_t		buffer_len,
 	uint32_t*		buffer_actual_len,
 	uint64_t*		request_id)
 {
 	int		ret;
 	uint32_t	packetLen;
 	uint32_t	userLen;
 	hv_vm_packet_descriptor	desc;
 
 	*buffer_actual_len = 0;
 	*request_id = 0;
 
 	ret = hv_ring_buffer_peek(
 		&channel->inbound, &desc,
 		sizeof(hv_vm_packet_descriptor));
 
 	if (ret != 0)
 	    return (0);
 
 	packetLen = desc.length8 << 3;
 	userLen = packetLen - (desc.data_offset8 << 3);
 
 	*buffer_actual_len = packetLen;
 
 	if (packetLen > buffer_len)
 	    return (ENOBUFS);
 
 	*request_id = desc.transaction_id;
 
 	/* Copy over the entire packet to the user buffer */
 	ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0);
 
 	return (0);
 }
 
 
 /**
  * Process a channel event notification
  */
 static void
 VmbusProcessChannelEvent(void* context, int pending)
 {
 	void* arg;
 	uint32_t bytes_to_read;
 	hv_vmbus_channel* channel = (hv_vmbus_channel*)context;
 	boolean_t is_batched_reading;
 
 	/**
 	 * Find the channel based on this relid and invokes
 	 * the channel callback to process the event
 	 */
 
 	if (channel == NULL) {
 		return;
 	}
 	/**
 	 * To deal with the race condition where we might
 	 * receive a packet while the relevant driver is
 	 * being unloaded, dispatch the callback while
 	 * holding the channel lock. The unloading driver
 	 * will acquire the same channel lock to set the
 	 * callback to NULL. This closes the window.
 	 */
 
-	/*
-	 * Disable the lock due to newly added WITNESS check in r277723.
-	 * Will seek other way to avoid race condition.
-	 * -- whu
-	 */
-	// mtx_lock(&channel->inbound_lock);
 	if (channel->on_channel_callback != NULL) {
 		arg = channel->channel_callback_context;
 		is_batched_reading = channel->batched_reading;
 		/*
 		 * Optimize host to guest signaling by ensuring:
 		 * 1. While reading the channel, we disable interrupts from
 		 *    host.
 		 * 2. Ensure that we process all posted messages from the host
 		 *    before returning from this callback.
 		 * 3. Once we return, enable signaling from the host. Once this
 		 *    state is set we check to see if additional packets are
 		 *    available to read. In this case we repeat the process.
 		 */
 		do {
 			if (is_batched_reading)
 				hv_ring_buffer_read_begin(&channel->inbound);
 
 			channel->on_channel_callback(arg);
 
 			if (is_batched_reading)
 				bytes_to_read =
 				    hv_ring_buffer_read_end(&channel->inbound);
 			else
 				bytes_to_read = 0;
 		} while (is_batched_reading && (bytes_to_read != 0));
 	}
-	// mtx_unlock(&channel->inbound_lock);
 }
Index: head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c	(revision 296082)
+++ head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c	(revision 296083)
@@ -1,741 +1,737 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
 
 #include "hv_vmbus_priv.h"
 
 /*
  * Internal functions
  */
 
 static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_offer_internal(void* context);
 static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_offer_rescind_internal(void* context);
 static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr);
 
 /**
  * Channel message dispatch table
  */
 hv_vmbus_channel_msg_table_entry
     g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = {
 	{ HV_CHANNEL_MESSAGE_INVALID,
 		NULL },
 	{ HV_CHANNEL_MESSAGE_OFFER_CHANNEL,
 		vmbus_channel_on_offer },
 	{ HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER,
 		vmbus_channel_on_offer_rescind },
 	{ HV_CHANNEL_MESSAGE_REQUEST_OFFERS,
 		NULL },
 	{ HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED,
 		vmbus_channel_on_offers_delivered },
 	{ HV_CHANNEL_MESSAGE_OPEN_CHANNEL,
 		NULL },
 	{ HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT,
 		vmbus_channel_on_open_result },
 	{ HV_CHANNEL_MESSAGE_CLOSE_CHANNEL,
 		NULL },
 	{ HV_CHANNEL_MESSAGEL_GPADL_HEADER,
 		NULL },
 	{ HV_CHANNEL_MESSAGE_GPADL_BODY,
 		NULL },
 	{ HV_CHANNEL_MESSAGE_GPADL_CREATED,
 		vmbus_channel_on_gpadl_created },
 	{ HV_CHANNEL_MESSAGE_GPADL_TEARDOWN,
 		NULL },
 	{ HV_CHANNEL_MESSAGE_GPADL_TORNDOWN,
 		vmbus_channel_on_gpadl_torndown },
 	{ HV_CHANNEL_MESSAGE_REL_ID_RELEASED,
 		NULL },
 	{ HV_CHANNEL_MESSAGE_INITIATED_CONTACT,
 		NULL },
 	{ HV_CHANNEL_MESSAGE_VERSION_RESPONSE,
 		vmbus_channel_on_version_response },
 	{ HV_CHANNEL_MESSAGE_UNLOAD,
 		NULL }
 };
 
 typedef struct hv_work_item {
 	struct task	work;
 	void		(*callback)(void *);
 	void*		context;
 } hv_work_item;
 
 /**
  * Implementation of the work abstraction.
  */
 static void
 work_item_callback(void *work, int pending)
 {
 	struct hv_work_item *w = (struct hv_work_item *)work;
 
 	w->callback(w->context);
 
 	free(w, M_DEVBUF);
 }
 
 /**
  * @brief Create work item
  */
 static int
 hv_queue_work_item(
 	void (*callback)(void *), void *context)
 {
 	struct hv_work_item *w = malloc(sizeof(struct hv_work_item),
 					M_DEVBUF, M_NOWAIT);
 	KASSERT(w != NULL, ("Error VMBUS: Failed to allocate WorkItem\n"));
 	if (w == NULL)
 	    return (ENOMEM);
 
 	w->callback = callback;
 	w->context = context;
 
 	TASK_INIT(&w->work, 0, work_item_callback, w);
 
 	return (taskqueue_enqueue(taskqueue_thread, &w->work));
 }
 
 
 /**
  * @brief Allocate and initialize a vmbus channel object
  */
 hv_vmbus_channel*
 hv_vmbus_allocate_channel(void)
 {
 	hv_vmbus_channel* channel;
 
 	channel = (hv_vmbus_channel*) malloc(
 					sizeof(hv_vmbus_channel),
 					M_DEVBUF,
 					M_WAITOK | M_ZERO);
 
-	mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF);
 	mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF);
-
 	TAILQ_INIT(&channel->sc_list_anchor);
 
 	return (channel);
 }
 
 /**
  * @brief Release the resources used by the vmbus channel object
  */
 void
 hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
 {
 	mtx_destroy(&channel->sc_lock);
-	mtx_destroy(&channel->inbound_lock);
-
 	free(channel, M_DEVBUF);
 }
 
 /**
  * @brief Process the offer by creating a channel/device
  * associated with this offer
  */
 static void
 vmbus_channel_process_offer(hv_vmbus_channel *new_channel)
 {
 	boolean_t		f_new;
 	hv_vmbus_channel*	channel;
 	int			ret;
 	uint32_t                relid;
 
 	f_new = TRUE;
 	channel = NULL;
 	relid = new_channel->offer_msg.child_rel_id;
 	/*
 	 * Make sure this is a new offer
 	 */
 	mtx_lock(&hv_vmbus_g_connection.channel_lock);
 	hv_vmbus_g_connection.channels[relid] = new_channel;
 
 	TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor,
 	    list_entry)
 	{
 		if (memcmp(&channel->offer_msg.offer.interface_type,
 		    &new_channel->offer_msg.offer.interface_type,
 		    sizeof(hv_guid)) == 0 &&
 		    memcmp(&channel->offer_msg.offer.interface_instance,
 		    &new_channel->offer_msg.offer.interface_instance,
 		    sizeof(hv_guid)) == 0) {
 			f_new = FALSE;
 			break;
 		}
 	}
 
 	if (f_new) {
 		/* Insert at tail */
 		TAILQ_INSERT_TAIL(
 		    &hv_vmbus_g_connection.channel_anchor,
 		    new_channel,
 		    list_entry);
 	}
 	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
 
 	/*XXX add new channel to percpu_list */
 
 	if (!f_new) {
 		/*
 		 * Check if this is a sub channel.
 		 */
 		if (new_channel->offer_msg.offer.sub_channel_index != 0) {
 			/*
 			 * It is a sub channel offer, process it.
 			 */
 			new_channel->primary_channel = channel;
 			mtx_lock(&channel->sc_lock);
 			TAILQ_INSERT_TAIL(
 			    &channel->sc_list_anchor,
 			    new_channel,
 			    sc_list_entry);
 			mtx_unlock(&channel->sc_lock);
 
 			/* Insert new channel into channel_anchor. */
 			printf("VMBUS get multi-channel offer, rel=%u,sub=%u\n",
 			    new_channel->offer_msg.child_rel_id,
 			    new_channel->offer_msg.offer.sub_channel_index);	
 			mtx_lock(&hv_vmbus_g_connection.channel_lock);
 			TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor,
 			    new_channel, list_entry);				
 			mtx_unlock(&hv_vmbus_g_connection.channel_lock);
 
 			if(bootverbose)
 				printf("VMBUS: new multi-channel offer <%p>, "
 				    "its primary channel is <%p>.\n",
 				    new_channel, new_channel->primary_channel);
 
 			/*XXX add it to percpu_list */
 
 			new_channel->state = HV_CHANNEL_OPEN_STATE;
 			if (channel->sc_creation_callback != NULL) {
 				channel->sc_creation_callback(new_channel);
 			}
 			return;
 		}
 
 	    hv_vmbus_free_vmbus_channel(new_channel);
 	    return;
 	}
 
 	new_channel->state = HV_CHANNEL_OPEN_STATE;
 
 	/*
 	 * Start the process of binding this offer to the driver
 	 * (We need to set the device field before calling
 	 * hv_vmbus_child_device_add())
 	 */
 	new_channel->device = hv_vmbus_child_device_create(
 	    new_channel->offer_msg.offer.interface_type,
 	    new_channel->offer_msg.offer.interface_instance, new_channel);
 
 	/*
 	 * Add the new device to the bus. This will kick off device-driver
 	 * binding which eventually invokes the device driver's AddDevice()
 	 * method.
 	 */
 	ret = hv_vmbus_child_device_register(new_channel->device);
 	if (ret != 0) {
 		mtx_lock(&hv_vmbus_g_connection.channel_lock);
 		TAILQ_REMOVE(
 		    &hv_vmbus_g_connection.channel_anchor,
 		    new_channel,
 		    list_entry);
 		mtx_unlock(&hv_vmbus_g_connection.channel_lock);
 		hv_vmbus_free_vmbus_channel(new_channel);
 	}
 }
 
 /**
  * Array of device guids that are performance critical. We try to distribute
  * the interrupt load for these devices across all online cpus. 
  */
 static const hv_guid high_perf_devices[] = {
 	{HV_NIC_GUID, },
 	{HV_IDE_GUID, },
 	{HV_SCSI_GUID, },
 };
 
 enum {
 	PERF_CHN_NIC = 0,
 	PERF_CHN_IDE,
 	PERF_CHN_SCSI,
 	MAX_PERF_CHN,
 };
 
 /*
  * We use this static number to distribute the channel interrupt load.
  */
 static uint32_t next_vcpu;
 
 /**
  * Starting with Win8, we can statically distribute the incoming
  * channel interrupt load by binding a channel to VCPU. We
  * implement here a simple round robin scheme for distributing
  * the interrupt load.
  * We will bind channels that are not performance critical to cpu 0 and
  * performance critical channels (IDE, SCSI and Network) will be uniformly
  * distributed across all available CPUs.
  */
 static void
 vmbus_channel_select_cpu(hv_vmbus_channel *channel, hv_guid *guid)
 {
 	uint32_t current_cpu;
 	int i;
 	boolean_t is_perf_channel = FALSE;
 
 	for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) {
 		if (memcmp(guid->data, high_perf_devices[i].data,
 		    sizeof(hv_guid)) == 0) {
 			is_perf_channel = TRUE;
 			break;
 		}
 	}
 
 	if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
 	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) ||
 	    (!is_perf_channel)) {
 		/* Host's view of guest cpu */
 		channel->target_vcpu = 0;
 		/* Guest's own view of cpu */
 		channel->target_cpu = 0;
 		return;
 	}
 	/* mp_ncpus should have the number cpus currently online */
 	current_cpu = (++next_vcpu % mp_ncpus);
 	channel->target_cpu = current_cpu;
 	channel->target_vcpu =
 	    hv_vmbus_g_context.hv_vcpu_index[current_cpu];
 	if (bootverbose)
 		printf("VMBUS: Total online cpus %d, assign perf channel %d "
 		    "to vcpu %d, cpu %d\n", mp_ncpus, i, channel->target_vcpu,
 		    current_cpu);
 }
 
 /**
  * @brief Handler for channel offers from Hyper-V/Azure
  *
  * Handler for channel offers from vmbus in parent partition. We ignore
  * all offers except network and storage offers. For each network and storage
  * offers, we create a channel object and queue a work item to the channel
  * object to process the offer synchronously
  */
 static void
 vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_offer_channel* offer;
 	hv_vmbus_channel_offer_channel* copied;
 
 	offer = (hv_vmbus_channel_offer_channel*) hdr;
 
 	hv_guid *guidType;
 	hv_guid *guidInstance;
 
 	guidType = &offer->offer.interface_type;
 	guidInstance = &offer->offer.interface_instance;
 
 	// copy offer data
 	copied = malloc(sizeof(*copied), M_DEVBUF, M_NOWAIT);
 	if (copied == NULL) {
 		printf("fail to allocate memory\n");
 		return;
 	}
 
 	memcpy(copied, hdr, sizeof(*copied));
 	hv_queue_work_item(vmbus_channel_on_offer_internal, copied);
 }
 
 static void
 vmbus_channel_on_offer_internal(void* context)
 {
 	hv_vmbus_channel* new_channel;
 
 	hv_vmbus_channel_offer_channel* offer = (hv_vmbus_channel_offer_channel*)context;
 	/* Allocate the channel object and save this offer */
 	new_channel = hv_vmbus_allocate_channel();
 
 	/*
 	 * By default we setup state to enable batched
 	 * reading. A specific service can choose to
 	 * disable this prior to opening the channel.
 	 */
 	new_channel->batched_reading = TRUE;
 
 	new_channel->signal_event_param =
 	    (hv_vmbus_input_signal_event *)
 	    (HV_ALIGN_UP((unsigned long)
 		&new_channel->signal_event_buffer,
 		HV_HYPERCALL_PARAM_ALIGN));
 
  	new_channel->signal_event_param->connection_id.as_uint32_t = 0;	
 	new_channel->signal_event_param->connection_id.u.id =
 	    HV_VMBUS_EVENT_CONNECTION_ID;
 	new_channel->signal_event_param->flag_number = 0;
 	new_channel->signal_event_param->rsvd_z = 0;
 
 	if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) {
 		new_channel->is_dedicated_interrupt =
 		    (offer->is_dedicated_interrupt != 0);
 		new_channel->signal_event_param->connection_id.u.id =
 		    offer->connection_id;
 	}
 
 	/*
 	 * Bind the channel to a chosen cpu.
 	 */
 	vmbus_channel_select_cpu(new_channel,
 	    &offer->offer.interface_type);
 
 	memcpy(&new_channel->offer_msg, offer,
 	    sizeof(hv_vmbus_channel_offer_channel));
 	new_channel->monitor_group = (uint8_t) offer->monitor_id / 32;
 	new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32;
 
 	vmbus_channel_process_offer(new_channel);
 
 	free(offer, M_DEVBUF);
 }
 
 /**
  * @brief Rescind offer handler.
  *
  * We queue a work item to process this offer
  * synchronously
  */
 static void
 vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_rescind_offer*	rescind;
 	hv_vmbus_channel*		channel;
 
 	rescind = (hv_vmbus_channel_rescind_offer*) hdr;
 
 	channel = hv_vmbus_g_connection.channels[rescind->child_rel_id];
 	if (channel == NULL)
 	    return;
 
 	hv_queue_work_item(vmbus_channel_on_offer_rescind_internal, channel);
 	hv_vmbus_g_connection.channels[rescind->child_rel_id] = NULL;
 }
 
 static void
 vmbus_channel_on_offer_rescind_internal(void *context)
 {
 	hv_vmbus_channel*               channel;
 
 	channel = (hv_vmbus_channel*)context;
 	hv_vmbus_child_device_unregister(channel->device);
 }
 
 /**
  *
  * @brief Invoked when all offers have been delivered.
  */
 static void
 vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr)
 {
 }
 
 /**
  * @brief Open result handler.
  *
  * This is invoked when we received a response
  * to our channel open request. Find the matching request, copy the
  * response and signal the requesting thread.
  */
 static void
 vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_open_result*	result;
 	hv_vmbus_channel_msg_info*	msg_info;
 	hv_vmbus_channel_msg_header*	requestHeader;
 	hv_vmbus_channel_open_channel*	openMsg;
 
 	result = (hv_vmbus_channel_open_result*) hdr;
 
 	/*
 	 * Find the open msg, copy the result and signal/unblock the wait event
 	 */
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 	TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
 	    msg_list_entry) {
 	    requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
 
 	    if (requestHeader->message_type ==
 		    HV_CHANNEL_MESSAGE_OPEN_CHANNEL) {
 		openMsg = (hv_vmbus_channel_open_channel*) msg_info->msg;
 		if (openMsg->child_rel_id == result->child_rel_id
 		    && openMsg->open_id == result->open_id) {
 		    memcpy(&msg_info->response.open_result, result,
 			sizeof(hv_vmbus_channel_open_result));
 		    sema_post(&msg_info->wait_sema);
 		    break;
 		}
 	    }
 	}
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 }
 
 /**
  * @brief GPADL created handler.
  *
  * This is invoked when we received a response
  * to our gpadl create request. Find the matching request, copy the
  * response and signal the requesting thread.
  */
 static void
 vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_gpadl_created*		gpadl_created;
 	hv_vmbus_channel_msg_info*		msg_info;
 	hv_vmbus_channel_msg_header*		request_header;
 	hv_vmbus_channel_gpadl_header*		gpadl_header;
 
 	gpadl_created = (hv_vmbus_channel_gpadl_created*) hdr;
 
 	/* Find the establish msg, copy the result and signal/unblock
 	 * the wait event
 	 */
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
 		msg_list_entry) {
 	    request_header = (hv_vmbus_channel_msg_header*) msg_info->msg;
 	    if (request_header->message_type ==
 		    HV_CHANNEL_MESSAGEL_GPADL_HEADER) {
 		gpadl_header =
 		    (hv_vmbus_channel_gpadl_header*) request_header;
 
 		if ((gpadl_created->child_rel_id == gpadl_header->child_rel_id)
 		    && (gpadl_created->gpadl == gpadl_header->gpadl)) {
 		    memcpy(&msg_info->response.gpadl_created,
 			gpadl_created,
 			sizeof(hv_vmbus_channel_gpadl_created));
 		    sema_post(&msg_info->wait_sema);
 		    break;
 		}
 	    }
 	}
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 }
 
 /**
  * @brief GPADL torndown handler.
  *
  * This is invoked when we received a respons
  * to our gpadl teardown request. Find the matching request, copy the
  * response and signal the requesting thread
  */
 static void
 vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_gpadl_torndown*	gpadl_torndown;
 	hv_vmbus_channel_msg_info*		msg_info;
 	hv_vmbus_channel_msg_header*		requestHeader;
 	hv_vmbus_channel_gpadl_teardown*	gpadlTeardown;
 
 	gpadl_torndown = (hv_vmbus_channel_gpadl_torndown*)hdr;
 
 	/*
 	 * Find the open msg, copy the result and signal/unblock the
 	 * wait event.
 	 */
 
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 	TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
 		msg_list_entry) {
 	    requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
 
 	    if (requestHeader->message_type
 		    == HV_CHANNEL_MESSAGE_GPADL_TEARDOWN) {
 		gpadlTeardown =
 		    (hv_vmbus_channel_gpadl_teardown*) requestHeader;
 
 		if (gpadl_torndown->gpadl == gpadlTeardown->gpadl) {
 		    memcpy(&msg_info->response.gpadl_torndown,
 			gpadl_torndown,
 			sizeof(hv_vmbus_channel_gpadl_torndown));
 		    sema_post(&msg_info->wait_sema);
 		    break;
 		}
 	    }
 	}
     mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 }
 
 /**
  * @brief Version response handler.
  *
  * This is invoked when we received a response
  * to our initiate contact request. Find the matching request, copy th
  * response and signal the requesting thread.
  */
 static void
 vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_msg_info*		msg_info;
 	hv_vmbus_channel_msg_header*		requestHeader;
 	hv_vmbus_channel_initiate_contact*	initiate;
 	hv_vmbus_channel_version_response*	versionResponse;
 
 	versionResponse = (hv_vmbus_channel_version_response*)hdr;
 
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
 	    msg_list_entry) {
 	    requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
 	    if (requestHeader->message_type
 		== HV_CHANNEL_MESSAGE_INITIATED_CONTACT) {
 		initiate =
 		    (hv_vmbus_channel_initiate_contact*) requestHeader;
 		memcpy(&msg_info->response.version_response,
 		    versionResponse,
 		    sizeof(hv_vmbus_channel_version_response));
 		sema_post(&msg_info->wait_sema);
 	    }
 	}
     mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 }
 
 /**
  *  @brief Send a request to get all our pending offers.
  */
 int
 hv_vmbus_request_channel_offers(void)
 {
 	int				ret;
 	hv_vmbus_channel_msg_header*	msg;
 	hv_vmbus_channel_msg_info*	msg_info;
 
 	msg_info = (hv_vmbus_channel_msg_info *)
 	    malloc(sizeof(hv_vmbus_channel_msg_info)
 		    + sizeof(hv_vmbus_channel_msg_header), M_DEVBUF, M_NOWAIT);
 
 	if (msg_info == NULL) {
 	    if(bootverbose)
 		printf("Error VMBUS: malloc failed for Request Offers\n");
 	    return (ENOMEM);
 	}
 
 	msg = (hv_vmbus_channel_msg_header*) msg_info->msg;
 	msg->message_type = HV_CHANNEL_MESSAGE_REQUEST_OFFERS;
 
 	ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_msg_header));
 
 	free(msg_info, M_DEVBUF);
 
 	return (ret);
 }
 
 /**
  * @brief Release channels that are unattached/unconnected (i.e., no drivers associated)
  */
 void
 hv_vmbus_release_unattached_channels(void) 
 {
 	hv_vmbus_channel *channel;
 
 	mtx_lock(&hv_vmbus_g_connection.channel_lock);
 
 	while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) {
 	    channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor);
 	    TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor,
 			    channel, list_entry);
 
 	    hv_vmbus_child_device_unregister(channel->device);
 	    hv_vmbus_free_vmbus_channel(channel);
 	}
 	bzero(hv_vmbus_g_connection.channels, 
 		sizeof(hv_vmbus_channel*) * HV_CHANNEL_MAX_COUNT);
 	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
 }
 
 /**
  * @brief Select the best outgoing channel
  * 
  * The channel whose vcpu binding is closest to the currect vcpu will
  * be selected.
  * If no multi-channel, always select primary channel
  * 
  * @param primary - primary channel
  */
 struct hv_vmbus_channel *
 vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
 {
 	hv_vmbus_channel *new_channel = NULL;
 	hv_vmbus_channel *outgoing_channel = primary;
 	int old_cpu_distance = 0;
 	int new_cpu_distance = 0;
 	int cur_vcpu = 0;
 	int smp_pro_id = PCPU_GET(cpuid);
 
 	if (TAILQ_EMPTY(&primary->sc_list_anchor)) {
 		return outgoing_channel;
 	}
 
 	if (smp_pro_id >= MAXCPU) {
 		return outgoing_channel;
 	}
 
 	cur_vcpu = hv_vmbus_g_context.hv_vcpu_index[smp_pro_id];
 	
 	TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) {
 		if (new_channel->state != HV_CHANNEL_OPENED_STATE){
 			continue;
 		}
 
 		if (new_channel->target_vcpu == cur_vcpu){
 			return new_channel;
 		}
 
 		old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ?
 		    (outgoing_channel->target_vcpu - cur_vcpu) :
 		    (cur_vcpu - outgoing_channel->target_vcpu));
 
 		new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ?
 		    (new_channel->target_vcpu - cur_vcpu) :
 		    (cur_vcpu - new_channel->target_vcpu));
 
 		if (old_cpu_distance < new_cpu_distance) {
 			continue;
 		}
 
 		outgoing_channel = new_channel;
 	}
 
 	return(outgoing_channel);
 }