Index: head/sys/dev/hyperv/include/hyperv.h
===================================================================
--- head/sys/dev/hyperv/include/hyperv.h	(revision 302620)
+++ head/sys/dev/hyperv/include/hyperv.h	(revision 302621)
@@ -1,726 +1,727 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /**
  * HyperV definitions for messages that are sent between instances of the
  * Channel Management Library in separate partitions, or in some cases,
  * back to itself.
  */
 
 #ifndef __HYPERV_H__
 #define __HYPERV_H__
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
 #include <sys/queue.h>
 #include <sys/malloc.h>
 #include <sys/kthread.h>
 #include <sys/taskqueue.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/sema.h>
 #include <sys/smp.h>
 #include <sys/mutex.h>
 #include <sys/bus.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <amd64/include/xen/synch_bitops.h>
 #include <amd64/include/atomic.h>
 #include <dev/hyperv/include/hyperv_busdma.h>
 
 typedef uint8_t	hv_bool_uint8_t;
 
 #define HV_S_OK			0x00000000
 #define HV_E_FAIL		0x80004005
 #define HV_ERROR_NOT_SUPPORTED	0x80070032
 #define HV_ERROR_MACHINE_LOCKED	0x800704F7
 
 /*
  * VMBUS version is 32 bit, upper 16 bit for major_number and lower
  * 16 bit for minor_number.
  *
  * 0.13  --  Windows Server 2008
  * 1.1   --  Windows 7
  * 2.4   --  Windows 8
  * 3.0   --  Windows 8.1
  */
 #define HV_VMBUS_VERSION_WS2008		((0 << 16) | (13))
 #define HV_VMBUS_VERSION_WIN7		((1 << 16) | (1))
 #define HV_VMBUS_VERSION_WIN8		((2 << 16) | (4))
 #define HV_VMBUS_VERSION_WIN8_1		((3 << 16) | (0))
 
 /*
  * Make maximum size of pipe payload of 16K
  */
 
 #define HV_MAX_PIPE_DATA_PAYLOAD	(sizeof(BYTE) * 16384)
 
 /*
  * Define pipe_mode values
  */
 
 #define HV_VMBUS_PIPE_TYPE_BYTE		0x00000000
 #define HV_VMBUS_PIPE_TYPE_MESSAGE	0x00000004
 
 /*
  * The size of the user defined data buffer for non-pipe offers
  */
 
 #define HV_MAX_USER_DEFINED_BYTES	120
 
 /*
  *  The size of the user defined data buffer for pipe offers
  */
 
 #define HV_MAX_PIPE_USER_DEFINED_BYTES	116
 
 
 #define HV_MAX_PAGE_BUFFER_COUNT	32
 #define HV_MAX_MULTIPAGE_BUFFER_COUNT	32
 
 #define HV_ALIGN_UP(value, align)					\
 		(((value) & (align-1)) ?				\
 		    (((value) + (align-1)) & ~(align-1) ) : (value))
 
 #define HV_ALIGN_DOWN(value, align) ( (value) & ~(align-1) )
 
 #define HV_NUM_PAGES_SPANNED(addr, len)					\
 		((HV_ALIGN_UP(addr+len, PAGE_SIZE) -			\
 		    HV_ALIGN_DOWN(addr, PAGE_SIZE)) >> PAGE_SHIFT )
 
 typedef struct hv_guid {
 	uint8_t data[16];
 } __packed hv_guid;
 
 #define HYPERV_GUID_STRLEN	40
 
 int	hyperv_guid2str(const struct hv_guid *, char *, size_t);
 
 #define HV_NIC_GUID							\
 	.data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,	\
 		0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
 
 #define HV_IDE_GUID							\
 	.data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,	\
 		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
 
 #define HV_SCSI_GUID							\
 	.data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,	\
 		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
 
 /*
  * At the center of the Channel Management library is
  * the Channel Offer. This struct contains the
  * fundamental information about an offer.
  */
 
 typedef struct hv_vmbus_channel_offer {
 	hv_guid		interface_type;
 	hv_guid		interface_instance;
 	uint64_t	interrupt_latency_in_100ns_units;
 	uint32_t	interface_revision;
 	uint32_t	server_context_area_size; /* in bytes */
 	uint16_t	channel_flags;
 	uint16_t	mmio_megabytes;		  /* in bytes * 1024 * 1024 */
 	union
 	{
         /*
          * Non-pipes: The user has HV_MAX_USER_DEFINED_BYTES bytes.
          */
 		struct {
 			uint8_t	user_defined[HV_MAX_USER_DEFINED_BYTES];
 		} __packed standard;
 
         /*
          * Pipes: The following structure is an integrated pipe protocol, which
          *        is implemented on top of standard user-defined data. pipe
          *        clients  have HV_MAX_PIPE_USER_DEFINED_BYTES left for their
          *        own use.
          */
 		struct {
 			uint32_t	pipe_mode;
 			uint8_t	user_defined[HV_MAX_PIPE_USER_DEFINED_BYTES];
 		} __packed pipe;
 	} u;
 
 	/*
 	 * Sub_channel_index, newly added in Win8.
 	 */
 	uint16_t	sub_channel_index;
 	uint16_t	padding;
 
 } __packed hv_vmbus_channel_offer;
 
 typedef struct {
 	uint16_t type;
 	uint16_t data_offset8;
 	uint16_t length8;
 	uint16_t flags;
 	uint64_t transaction_id;
 } __packed hv_vm_packet_descriptor;
 
 typedef uint32_t hv_previous_packet_offset;
 
 typedef struct {
 	hv_previous_packet_offset	previous_packet_start_offset;
 	hv_vm_packet_descriptor		descriptor;
 } __packed hv_vm_packet_header;
 
 typedef struct {
 	uint32_t byte_count;
 	uint32_t byte_offset;
 } __packed hv_vm_transfer_page;
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint16_t		transfer_page_set_id;
 	hv_bool_uint8_t		sender_owns_set;
 	uint8_t			reserved;
 	uint32_t		range_count;
 	hv_vm_transfer_page	ranges[1];
 } __packed hv_vm_transfer_page_packet_header;
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		gpadl;
 	uint32_t		reserved;
 } __packed hv_vm_gpadl_packet_header;
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		gpadl;
 	uint16_t		transfer_page_set_id;
 	uint16_t		reserved;
 } __packed hv_vm_add_remove_transfer_page_set;
 
 /*
  * This structure defines a range in guest
  * physical space that can be made
  * to look virtually contiguous.
  */
 
 typedef struct {
 	uint32_t byte_count;
 	uint32_t byte_offset;
 	uint64_t pfn_array[0];
 } __packed hv_gpa_range;
 
 /*
  * This is the format for an Establish Gpadl packet, which contains a handle
  * by which this GPADL will be known and a set of GPA ranges associated with
  * it.  This can be converted to a MDL by the guest OS.  If there are multiple
  * GPA ranges, then the resulting MDL will be "chained," representing multiple
  * VA ranges.
  */
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		gpadl;
 	uint32_t		range_count;
 	hv_gpa_range		range[1];
 } __packed hv_vm_establish_gpadl;
 
 /*
  * This is the format for a Teardown Gpadl packet, which indicates that the
  * GPADL handle in the Establish Gpadl packet will never be referenced again.
  */
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		gpadl;
 				/* for alignment to a 8-byte boundary */
 	uint32_t		reserved;
 } __packed hv_vm_teardown_gpadl;
 
 /*
  * This is the format for a GPA-Direct packet, which contains a set of GPA
  * ranges, in addition to commands and/or data.
  */
 
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint32_t		reserved;
 	uint32_t		range_count;
 	hv_gpa_range		range[1];
 } __packed hv_vm_data_gpa_direct;
 
 /*
  * This is the format for a Additional data Packet.
  */
 typedef struct {
 	hv_vm_packet_descriptor	d;
 	uint64_t		total_bytes;
 	uint32_t		byte_offset;
 	uint32_t		byte_count;
 	uint8_t			data[1];
 } __packed hv_vm_additional_data;
 
 typedef union {
 	hv_vm_packet_descriptor             simple_header;
 	hv_vm_transfer_page_packet_header   transfer_page_header;
 	hv_vm_gpadl_packet_header           gpadl_header;
 	hv_vm_add_remove_transfer_page_set  add_remove_transfer_page_header;
 	hv_vm_establish_gpadl               establish_gpadl_header;
 	hv_vm_teardown_gpadl                teardown_gpadl_header;
 	hv_vm_data_gpa_direct               data_gpa_direct_header;
 } __packed hv_vm_packet_largest_possible_header;
 
 typedef enum {
 	HV_VMBUS_PACKET_TYPE_INVALID				= 0x0,
 	HV_VMBUS_PACKET_TYPES_SYNCH				= 0x1,
 	HV_VMBUS_PACKET_TYPE_ADD_TRANSFER_PAGE_SET		= 0x2,
 	HV_VMBUS_PACKET_TYPE_REMOVE_TRANSFER_PAGE_SET		= 0x3,
 	HV_VMBUS_PACKET_TYPE_ESTABLISH_GPADL			= 0x4,
 	HV_VMBUS_PACKET_TYPE_TEAR_DOWN_GPADL			= 0x5,
 	HV_VMBUS_PACKET_TYPE_DATA_IN_BAND			= 0x6,
 	HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES		= 0x7,
 	HV_VMBUS_PACKET_TYPE_DATA_USING_GPADL			= 0x8,
 	HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT		= 0x9,
 	HV_VMBUS_PACKET_TYPE_CANCEL_REQUEST			= 0xa,
 	HV_VMBUS_PACKET_TYPE_COMPLETION				= 0xb,
 	HV_VMBUS_PACKET_TYPE_DATA_USING_ADDITIONAL_PACKETS	= 0xc,
 	HV_VMBUS_PACKET_TYPE_ADDITIONAL_DATA = 0xd
 } hv_vmbus_packet_type;
 
 #define HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED    1
 
 /*
  * Version 1 messages
  */
 typedef enum {
 	HV_CHANNEL_MESSAGE_INVALID			= 0,
 	HV_CHANNEL_MESSAGE_OFFER_CHANNEL		= 1,
 	HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER	= 2,
 	HV_CHANNEL_MESSAGE_REQUEST_OFFERS		= 3,
 	HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED		= 4,
 	HV_CHANNEL_MESSAGE_OPEN_CHANNEL			= 5,
 	HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT		= 6,
 	HV_CHANNEL_MESSAGE_CLOSE_CHANNEL		= 7,
 	HV_CHANNEL_MESSAGEL_GPADL_HEADER		= 8,
 	HV_CHANNEL_MESSAGE_GPADL_BODY			= 9,
 	HV_CHANNEL_MESSAGE_GPADL_CREATED		= 10,
 	HV_CHANNEL_MESSAGE_GPADL_TEARDOWN		= 11,
 	HV_CHANNEL_MESSAGE_GPADL_TORNDOWN		= 12,
 	HV_CHANNEL_MESSAGE_REL_ID_RELEASED		= 13,
 	HV_CHANNEL_MESSAGE_INITIATED_CONTACT		= 14,
 	HV_CHANNEL_MESSAGE_VERSION_RESPONSE		= 15,
 	HV_CHANNEL_MESSAGE_UNLOAD			= 16,
 	HV_CHANNEL_MESSAGE_COUNT
 } hv_vmbus_channel_msg_type;
 
 typedef struct {
 	hv_vmbus_channel_msg_type	message_type;
 	uint32_t			padding;
 } __packed hv_vmbus_channel_msg_header;
 
 /*
  * Query VMBus Version parameters
  */
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			version;
 } __packed hv_vmbus_channel_query_vmbus_version;
 
 /*
  * Channel Offer parameters
  */
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	hv_vmbus_channel_offer		offer;
 	uint32_t			child_rel_id;
 	uint8_t				monitor_id;
 	/*
 	 * This field has been split into a bit field on Win7
 	 * and higher.
 	 */
 	uint8_t				monitor_allocated:1;
 	uint8_t				reserved:7;
 	/*
 	 * Following fields were added in win7 and higher.
 	 * Make sure to check the version before accessing these fields.
 	 *
 	 * If "is_dedicated_interrupt" is set, we must not set the
 	 * associated bit in the channel bitmap while sending the
 	 * interrupt to the host.
 	 *
 	 * connection_id is used in signaling the host.
 	 */
 	uint16_t			is_dedicated_interrupt:1;
 	uint16_t			reserved1:15;
 	uint32_t			connection_id;
 } __packed hv_vmbus_channel_offer_channel;
 
 /*
  * Rescind Offer parameters
  */
 typedef struct
 {
     hv_vmbus_channel_msg_header	header;
     uint32_t			child_rel_id;
 } __packed hv_vmbus_channel_rescind_offer;
 
 typedef struct {
 	hv_vmbus_channel_msg_header	header;
 	uint32_t			child_rel_id;
 } __packed hv_vmbus_channel_relid_released;
 
 #define HW_MACADDR_LEN	6
 
 enum {
 	HV_VMBUS_IVAR_TYPE,
 	HV_VMBUS_IVAR_INSTANCE,
 	HV_VMBUS_IVAR_NODE,
 	HV_VMBUS_IVAR_DEVCTX
 };
 
 #define HV_VMBUS_ACCESSOR(var, ivar, type) \
 		__BUS_ACCESSOR(vmbus, var, HV_VMBUS, ivar, type)
 
 HV_VMBUS_ACCESSOR(type, TYPE,  const char *)
 HV_VMBUS_ACCESSOR(devctx, DEVCTX,  struct hv_device *)
 
 
 /*
  * Common defines for Hyper-V ICs
  */
 #define HV_ICMSGTYPE_NEGOTIATE		0
 #define HV_ICMSGTYPE_HEARTBEAT		1
 #define HV_ICMSGTYPE_KVPEXCHANGE	2
 #define HV_ICMSGTYPE_SHUTDOWN		3
 #define HV_ICMSGTYPE_TIMESYNC		4
 #define HV_ICMSGTYPE_VSS		5
 
 #define HV_ICMSGHDRFLAG_TRANSACTION	1
 #define HV_ICMSGHDRFLAG_REQUEST		2
 #define HV_ICMSGHDRFLAG_RESPONSE	4
 
 typedef struct hv_vmbus_pipe_hdr {
 	uint32_t flags;
 	uint32_t msgsize;
 } __packed hv_vmbus_pipe_hdr;
 
 typedef struct hv_vmbus_ic_version {
 	uint16_t major;
 	uint16_t minor;
 } __packed hv_vmbus_ic_version;
 
 typedef struct hv_vmbus_icmsg_hdr {
 	hv_vmbus_ic_version	icverframe;
 	uint16_t		icmsgtype;
 	hv_vmbus_ic_version	icvermsg;
 	uint16_t		icmsgsize;
 	uint32_t		status;
 	uint8_t			ictransaction_id;
 	uint8_t			icflags;
 	uint8_t			reserved[2];
 } __packed hv_vmbus_icmsg_hdr;
 
 typedef struct hv_vmbus_icmsg_negotiate {
 	uint16_t		icframe_vercnt;
 	uint16_t		icmsg_vercnt;
 	uint32_t		reserved;
 	hv_vmbus_ic_version	icversion_data[1]; /* any size array */
 } __packed hv_vmbus_icmsg_negotiate;
 
 typedef struct hv_vmbus_shutdown_msg_data {
 	uint32_t		reason_code;
 	uint32_t		timeout_seconds;
 	uint32_t 		flags;
 	uint8_t			display_message[2048];
 } __packed hv_vmbus_shutdown_msg_data;
 
 typedef struct hv_vmbus_heartbeat_msg_data {
 	uint64_t 		seq_num;
 	uint32_t 		reserved[8];
 } __packed hv_vmbus_heartbeat_msg_data;
 
 typedef struct {
 	/*
 	 * offset in bytes from the start of ring data below
 	 */
 	volatile uint32_t       write_index;
 	/*
 	 * offset in bytes from the start of ring data below
 	 */
 	volatile uint32_t       read_index;
 	/*
 	 * NOTE: The interrupt_mask field is used only for channels, but
 	 * vmbus connection also uses this data structure
 	 */
 	volatile uint32_t       interrupt_mask;
 	/* pad it to PAGE_SIZE so that data starts on a page */
 	uint8_t                 reserved[4084];
 
 	/*
 	 * WARNING: Ring data starts here + ring_data_start_offset
 	 *  !!! DO NOT place any fields below this !!!
 	 */
 	uint8_t			buffer[0];	/* doubles as interrupt mask */
 } __packed hv_vmbus_ring_buffer;
 
 typedef struct {
 	int		length;
 	int		offset;
 	uint64_t	pfn;
 } __packed hv_vmbus_page_buffer;
 
 typedef struct {
 	int		length;
 	int		offset;
 	uint64_t	pfn_array[HV_MAX_MULTIPAGE_BUFFER_COUNT];
 } __packed hv_vmbus_multipage_buffer;
 
 typedef struct {
 	hv_vmbus_ring_buffer*	ring_buffer;
 	uint32_t		ring_size;	/* Include the shared header */
 	struct mtx		ring_lock;
 	uint32_t		ring_data_size;	/* ring_size */
 	uint32_t		ring_data_start_offset;
 } hv_vmbus_ring_buffer_info;
 
 typedef void (*hv_vmbus_pfn_channel_callback)(void *context);
 
 typedef enum {
 	HV_CHANNEL_OFFER_STATE,
 	HV_CHANNEL_OPENING_STATE,
 	HV_CHANNEL_OPEN_STATE,
 	HV_CHANNEL_OPENED_STATE,
 	HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE,
 } hv_vmbus_channel_state;
 
 /*
  *  Connection identifier type
  */
 typedef union {
 	uint32_t		as_uint32_t;
 	struct {
 		uint32_t	id:24;
 		uint32_t	reserved:8;
 	} u;
 
 } __packed hv_vmbus_connection_id;
 
 typedef struct hv_vmbus_channel {
 	TAILQ_ENTRY(hv_vmbus_channel)	list_entry;
 	struct hv_device*		device;
 	struct vmbus_softc		*vmbus_sc;
 	hv_vmbus_channel_state		state;
 	hv_vmbus_channel_offer_channel	offer_msg;
 	/*
 	 * These are based on the offer_msg.monitor_id.
 	 * Save it here for easy access.
 	 */
 	uint8_t				monitor_group;
 	uint8_t				monitor_bit;
 
 	uint32_t			ring_buffer_gpadl_handle;
 	/*
 	 * Allocated memory for ring buffer
 	 */
 	void*				ring_buffer_pages;
 	unsigned long			ring_buffer_size;
 	uint32_t			ring_buffer_page_count;
 	/*
 	 * send to parent
 	 */
 	hv_vmbus_ring_buffer_info	outbound;
 	/*
 	 * receive from parent
 	 */
 	hv_vmbus_ring_buffer_info	inbound;
 
 	struct taskqueue *		rxq;
 	struct task			channel_task;
 	hv_vmbus_pfn_channel_callback	on_channel_callback;
 	void*				channel_callback_context;
 
 	/*
 	 * If batched_reading is set to "true", mask the interrupt
 	 * and read until the channel is empty.
 	 * If batched_reading is set to "false", the channel is not
 	 * going to perform batched reading.
 	 *
 	 * Batched reading is enabled by default; specific
 	 * drivers that don't want this behavior can turn it off.
 	 */
 	boolean_t			batched_reading;
 
 	boolean_t			is_dedicated_interrupt;
 
 	struct hypercall_sigevt_in	*ch_sigevt;
 	struct hyperv_dma		ch_sigevt_dma;
 
 	/*
 	 * From Win8, this field specifies the target virtual process
 	 * on which to deliver the interrupt from the host to guest.
 	 * Before Win8, all channel interrupts would only be
 	 * delivered on cpu 0. Setting this value to 0 would preserve
 	 * the earlier behavior.
 	 */
 	uint32_t			target_vcpu;
 	/* The corresponding CPUID in the guest */
 	uint32_t			target_cpu;
 
 	/*
 	 * Support for multi-channels.
 	 * The initial offer is considered the primary channel and this
 	 * offer message will indicate if the host supports multi-channels.
 	 * The guest is free to ask for multi-channels to be offerred and can
 	 * open these multi-channels as a normal "primary" channel. However,
 	 * all multi-channels will have the same type and instance guids as the
 	 * primary channel. Requests sent on a given channel will result in a
 	 * response on the same channel.
 	 */
 
 	struct mtx			sc_lock;
 
 	/*
 	 * Link list of all the multi-channels if this is a primary channel
 	 */
 	TAILQ_HEAD(, hv_vmbus_channel)	sc_list_anchor;
 	TAILQ_ENTRY(hv_vmbus_channel)	sc_list_entry;
 	int				subchan_cnt;
 
 	/*
 	 * The primary channel this sub-channle belongs to.
 	 * This will be NULL for the primary channel.
 	 */
 	struct hv_vmbus_channel		*primary_channel;
 
 	/*
 	 * Driver private data
 	 */
 	void				*hv_chan_priv1;
 	void				*hv_chan_priv2;
 	void				*hv_chan_priv3;
 
 	struct task			ch_detach_task;
 } hv_vmbus_channel;
 
 #define HV_VMBUS_CHAN_ISPRIMARY(chan)	((chan)->primary_channel == NULL)
 
 static inline void
 hv_set_channel_read_state(hv_vmbus_channel* channel, boolean_t state)
 {
 	channel->batched_reading = state;
 }
 
 typedef struct hv_device {
 	hv_guid		    class_id;
 	hv_guid		    device_id;
 	device_t	    device;
 	hv_vmbus_channel*   channel;
 } hv_device;
 
 
 
 int		hv_vmbus_channel_recv_packet(
 				hv_vmbus_channel*	channel,
 				void*			buffer,
 				uint32_t		buffer_len,
 				uint32_t*		buffer_actual_len,
 				uint64_t*		request_id);
 
 int		hv_vmbus_channel_recv_packet_raw(
 				hv_vmbus_channel*	channel,
 				void*			buffer,
 				uint32_t		buffer_len,
 				uint32_t*		buffer_actual_len,
 				uint64_t*		request_id);
 
 int		hv_vmbus_channel_open(
 				hv_vmbus_channel*	channel,
 				uint32_t		send_ring_buffer_size,
 				uint32_t		recv_ring_buffer_size,
 				void*			user_data,
 				uint32_t		user_data_len,
 				hv_vmbus_pfn_channel_callback
 							pfn_on_channel_callback,
 				void*			context);
 
 void		hv_vmbus_channel_close(hv_vmbus_channel *channel);
 
 int		hv_vmbus_channel_send_packet(
 				hv_vmbus_channel*	channel,
 				void*			buffer,
 				uint32_t		buffer_len,
 				uint64_t		request_id,
 				hv_vmbus_packet_type	type,
 				uint32_t		flags);
 
 int		hv_vmbus_channel_send_packet_pagebuffer(
 				hv_vmbus_channel*	channel,
 				hv_vmbus_page_buffer	page_buffers[],
 				uint32_t		page_count,
 				void*			buffer,
 				uint32_t		buffer_len,
 				uint64_t		request_id);
 
 int		hv_vmbus_channel_send_packet_multipagebuffer(
 				hv_vmbus_channel*	    channel,
 				hv_vmbus_multipage_buffer*  multi_page_buffer,
 				void*			    buffer,
 				uint32_t		    buffer_len,
 				uint64_t		    request_id);
 
 int		hv_vmbus_channel_establish_gpadl(
 				hv_vmbus_channel*	channel,
 				/* must be phys and virt contiguous */
 				void*			contig_buffer,
 				/*  page-size multiple	*/
 				uint32_t		size,
 				uint32_t*		gpadl_handle);
 
 int		hv_vmbus_channel_teardown_gpdal(
 				hv_vmbus_channel*	channel,
 				uint32_t		gpadl_handle);
 
 struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
 
 void		vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu);
+void		vmbus_channel_cpu_rr(struct hv_vmbus_channel *chan);
 struct hv_vmbus_channel **
 		vmbus_get_subchan(struct hv_vmbus_channel *pri_chan, int subchan_cnt);
 void		vmbus_rel_subchan(struct hv_vmbus_channel **subchan, int subchan_cnt);
 
 /**
  * @brief Get physical address from virtual
  */
 static inline unsigned long
 hv_get_phys_addr(void *virt)
 {
 	unsigned long ret;
 	ret = (vtophys(virt) | ((vm_offset_t) virt & PAGE_MASK));
 	return (ret);
 }
 
 extern uint32_t hv_vmbus_protocal_version;
 #endif  /* __HYPERV_H__ */
Index: head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
===================================================================
--- head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c	(revision 302620)
+++ head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c	(revision 302621)
@@ -1,2174 +1,2175 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /**
  * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
  * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
  * converted into VSCSI protocol messages which are delivered to the parent
  * partition StorVSP driver over the Hyper-V VMBUS.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/condvar.h>
 #include <sys/time.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/taskqueue.h>
 #include <sys/bus.h>
 #include <sys/mutex.h>
 #include <sys/callout.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/uma.h>
 #include <sys/lock.h>
 #include <sys/sema.h>
 #include <sys/sglist.h>
 #include <machine/bus.h>
 #include <sys/bus_dma.h>
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_periph.h>
 #include <cam/cam_sim.h>
 #include <cam/cam_xpt_sim.h>
 #include <cam/cam_xpt_internal.h>
 #include <cam/cam_debug.h>
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_message.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include "hv_vstorage.h"
 
 #define STORVSC_RINGBUFFER_SIZE		(20*PAGE_SIZE)
 #define STORVSC_MAX_LUNS_PER_TARGET	(64)
 #define STORVSC_MAX_IO_REQUESTS		(STORVSC_MAX_LUNS_PER_TARGET * 2)
 #define BLKVSC_MAX_IDE_DISKS_PER_TARGET	(1)
 #define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
 #define STORVSC_MAX_TARGETS		(2)
 
 #define VSTOR_PKT_SIZE	(sizeof(struct vstor_packet) - vmscsi_size_delta)
 
 #define HV_ALIGN(x, a) roundup2(x, a)
 
 struct storvsc_softc;
 
 struct hv_sgl_node {
 	LIST_ENTRY(hv_sgl_node) link;
 	struct sglist *sgl_data;
 };
 
 struct hv_sgl_page_pool{
 	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
 	LIST_HEAD(, hv_sgl_node) free_sgl_list;
 	boolean_t                is_init;
 } g_hv_sgl_page_pool;
 
 #define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * HV_MAX_MULTIPAGE_BUFFER_COUNT
 
 enum storvsc_request_type {
 	WRITE_TYPE,
 	READ_TYPE,
 	UNKNOWN_TYPE
 };
 
 struct hv_storvsc_request {
 	LIST_ENTRY(hv_storvsc_request) link;
 	struct vstor_packet	vstor_packet;
 	hv_vmbus_multipage_buffer data_buf;
 	void *sense_data;
 	uint8_t sense_info_len;
 	uint8_t retries;
 	union ccb *ccb;
 	struct storvsc_softc *softc;
 	struct callout callout;
 	struct sema synch_sema; /*Synchronize the request/response if needed */
 	struct sglist *bounce_sgl;
 	unsigned int bounce_sgl_count;
 	uint64_t not_aligned_seg_bits;
 };
 
 struct storvsc_softc {
 	struct hv_device		*hs_dev;
 	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
 	struct mtx			hs_lock;
 	struct storvsc_driver_props	*hs_drv_props;
 	int 				hs_unit;
 	uint32_t			hs_frozen;
 	struct cam_sim			*hs_sim;
 	struct cam_path 		*hs_path;
 	uint32_t			hs_num_out_reqs;
 	boolean_t			hs_destroy;
 	boolean_t			hs_drain_notify;
 	struct sema 			hs_drain_sema;	
 	struct hv_storvsc_request	hs_init_req;
 	struct hv_storvsc_request	hs_reset_req;
 };
 
 
 /**
  * HyperV storvsc timeout testing cases:
  * a. IO returned after first timeout;
  * b. IO returned after second timeout and queue freeze;
  * c. IO returned while timer handler is running
  * The first can be tested by "sg_senddiag -vv /dev/daX",
  * and the second and third can be done by
  * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
  */
 #define HVS_TIMEOUT_TEST 0
 
 /*
  * Bus/adapter reset functionality on the Hyper-V host is
  * buggy and it will be disabled until
  * it can be further tested.
  */
 #define HVS_HOST_RESET 0
 
 struct storvsc_driver_props {
 	char		*drv_name;
 	char		*drv_desc;
 	uint8_t		drv_max_luns_per_target;
 	uint8_t		drv_max_ios_per_target;
 	uint32_t	drv_ringbuffer_size;
 };
 
 enum hv_storage_type {
 	DRIVER_BLKVSC,
 	DRIVER_STORVSC,
 	DRIVER_UNKNOWN
 };
 
 #define HS_MAX_ADAPTERS 10
 
 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
 
 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
 static const hv_guid gStorVscDeviceType={
 	.data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
 		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
 };
 
 /* {32412632-86cb-44a2-9b5c-50d1417354f5} */
 static const hv_guid gBlkVscDeviceType={
 	.data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
 		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
 };
 
 static struct storvsc_driver_props g_drv_props_table[] = {
 	{"blkvsc", "Hyper-V IDE Storage Interface",
 	 BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
 	 STORVSC_RINGBUFFER_SIZE},
 	{"storvsc", "Hyper-V SCSI Storage Interface",
 	 STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
 	 STORVSC_RINGBUFFER_SIZE}
 };
 
 /*
  * Sense buffer size changed in win8; have a run-time
  * variable to track the size we should use.
  */
 static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
 
 /*
  * The size of the vmscsi_request has changed in win8. The
  * additional size is for the newly added elements in the
  * structure. These elements are valid only when we are talking
  * to a win8 host.
  * Track the correct size we need to apply.
  */
 static int vmscsi_size_delta;
 /*
  * The storage protocol version is determined during the
  * initial exchange with the host.  It will indicate which
  * storage functionality is available in the host.
 */
 static int vmstor_proto_version;
 
 struct vmstor_proto {
         int proto_version;
         int sense_buffer_size;
         int vmscsi_size_delta;
 };
 
 static const struct vmstor_proto vmstor_proto_list[] = {
         {
                 VMSTOR_PROTOCOL_VERSION_WIN10,
                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
                 0
         },
         {
                 VMSTOR_PROTOCOL_VERSION_WIN8_1,
                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
                 0
         },
         {
                 VMSTOR_PROTOCOL_VERSION_WIN8,
                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
                 0
         },
         {
                 VMSTOR_PROTOCOL_VERSION_WIN7,
                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
                 sizeof(struct vmscsi_win8_extension),
         },
         {
                 VMSTOR_PROTOCOL_VERSION_WIN6,
                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
                 sizeof(struct vmscsi_win8_extension),
         }
 };
 
 /* static functions */
 static int storvsc_probe(device_t dev);
 static int storvsc_attach(device_t dev);
 static int storvsc_detach(device_t dev);
 static void storvsc_poll(struct cam_sim * sim);
 static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
 static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
 static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
 static enum hv_storage_type storvsc_get_storage_type(device_t dev);
 static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
 static void hv_storvsc_on_channel_callback(void *context);
 static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
 					struct vstor_packet *vstor_packet,
 					struct hv_storvsc_request *request);
 static int hv_storvsc_connect_vsp(struct hv_device *device);
 static void storvsc_io_done(struct hv_storvsc_request *reqp);
 static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
 				bus_dma_segment_t *orig_sgl,
 				unsigned int orig_sgl_count,
 				uint64_t seg_bits);
 void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
 				unsigned int dest_sgl_count,
 				struct sglist* src_sgl,
 				uint64_t seg_bits);
 
 static device_method_t storvsc_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		storvsc_probe),
 	DEVMETHOD(device_attach,	storvsc_attach),
 	DEVMETHOD(device_detach,	storvsc_detach),
 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
 	DEVMETHOD_END
 };
 
 static driver_t storvsc_driver = {
 	"storvsc", storvsc_methods, sizeof(struct storvsc_softc),
 };
 
 static devclass_t storvsc_devclass;
 DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0);
 MODULE_VERSION(storvsc, 1);
 MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
 
 
 /**
  * The host is capable of sending messages to us that are
  * completely unsolicited. So, we need to address the race
  * condition where we may be in the process of unloading the
  * driver when the host may send us an unsolicited message.
  * We address this issue by implementing a sequentially
  * consistent protocol:
  *
  * 1. Channel callback is invoked while holding the channel lock
  *    and an unloading driver will reset the channel callback under
  *    the protection of this channel lock.
  *
  * 2. To ensure bounded wait time for unloading a driver, we don't
  *    permit outgoing traffic once the device is marked as being
  *    destroyed.
  *
  * 3. Once the device is marked as being destroyed, we only
  *    permit incoming traffic to properly account for
  *    packets already sent out.
  */
 static inline struct storvsc_softc *
 get_stor_device(struct hv_device *device,
 				boolean_t outbound)
 {
 	struct storvsc_softc *sc;
 
 	sc = device_get_softc(device->device);
 
 	if (outbound) {
 		/*
 		 * Here we permit outgoing I/O only
 		 * if the device is not being destroyed.
 		 */
 
 		if (sc->hs_destroy) {
 			sc = NULL;
 		}
 	} else {
 		/*
 		 * inbound case; if being destroyed
 		 * only permit to account for
 		 * messages already sent out.
 		 */
 		if (sc->hs_destroy && (sc->hs_num_out_reqs == 0)) {
 			sc = NULL;
 		}
 	}
 	return sc;
 }
 
 static void
 storvsc_subchan_attach(struct hv_vmbus_channel *new_channel)
 {
 	struct hv_device *device;
 	struct storvsc_softc *sc;
 	struct vmstor_chan_props props;
 	int ret = 0;
 
 	device = new_channel->device;
 	sc = get_stor_device(device, TRUE);
 	if (sc == NULL)
 		return;
 
 	memset(&props, 0, sizeof(props));
 
+	vmbus_channel_cpu_rr(new_channel);
 	ret = hv_vmbus_channel_open(new_channel,
 	    sc->hs_drv_props->drv_ringbuffer_size,
   	    sc->hs_drv_props->drv_ringbuffer_size,
 	    (void *)&props,
 	    sizeof(struct vmstor_chan_props),
 	    hv_storvsc_on_channel_callback,
 	    new_channel);
 
 	return;
 }
 
 /**
  * @brief Send multi-channel creation request to host
  *
  * @param device  a Hyper-V device pointer
  * @param max_chans  the max channels supported by vmbus
  */
 static void
 storvsc_send_multichannel_request(struct hv_device *dev, int max_chans)
 {
 	struct hv_vmbus_channel **subchan;
 	struct storvsc_softc *sc;
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;	
 	int request_channels_cnt = 0;
 	int ret, i;
 
 	/* get multichannels count that need to create */
 	request_channels_cnt = MIN(max_chans, mp_ncpus);
 
 	sc = get_stor_device(dev, TRUE);
 	if (sc == NULL) {
 		printf("Storvsc_error: get sc failed while send mutilchannel "
 		    "request\n");
 		return;
 	}
 
 	request = &sc->hs_init_req;
 
 	/* request the host to create multi-channel */
 	memset(request, 0, sizeof(struct hv_storvsc_request));
 	
 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
 
 	vstor_packet = &request->vstor_packet;
 	
 	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 	vstor_packet->u.multi_channels_cnt = request_channels_cnt;
 
 	ret = hv_vmbus_channel_send_packet(
 	    dev->channel,
 	    vstor_packet,
 	    VSTOR_PKT_SIZE,
 	    (uint64_t)(uintptr_t)request,
 	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	/* wait for 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 	if (ret != 0) {		
 		printf("Storvsc_error: create multi-channel timeout, %d\n",
 		    ret);
 		return;
 	}
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 	    vstor_packet->status != 0) {		
 		printf("Storvsc_error: create multi-channel invalid operation "
 		    "(%d) or statue (%u)\n",
 		    vstor_packet->operation, vstor_packet->status);
 		return;
 	}
 
 	/* Wait for sub-channels setup to complete. */
 	subchan = vmbus_get_subchan(dev->channel, request_channels_cnt);
 
 	/* Attach the sub-channels. */
 	for (i = 0; i < request_channels_cnt; ++i)
 		storvsc_subchan_attach(subchan[i]);
 
 	/* Release the sub-channels. */
 	vmbus_rel_subchan(subchan, request_channels_cnt);
 
 	if (bootverbose)
 		printf("Storvsc create multi-channel success!\n");
 }
 
 /**
  * @brief initialize channel connection to parent partition
  *
  * @param dev  a Hyper-V device pointer
  * @returns  0 on success, non-zero error on failure
  */
 static int
 hv_storvsc_channel_init(struct hv_device *dev)
 {
 	int ret = 0, i;
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 	struct storvsc_softc *sc;
 	uint16_t max_chans = 0;
 	boolean_t support_multichannel = FALSE;
 
 	max_chans = 0;
 	support_multichannel = FALSE;
 
 	sc = get_stor_device(dev, TRUE);
 	if (sc == NULL)
 		return (ENODEV);
 
 	request = &sc->hs_init_req;
 	memset(request, 0, sizeof(struct hv_storvsc_request));
 	vstor_packet = &request->vstor_packet;
 	request->softc = sc;
 
 	/**
 	 * Initiate the vsc/vsp initialization protocol on the open channel
 	 */
 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
 
 	vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 
 	ret = hv_vmbus_channel_send_packet(
 			dev->channel,
 			vstor_packet,
 			VSTOR_PKT_SIZE,
 			(uint64_t)(uintptr_t)request,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	if (ret != 0)
 		goto cleanup;
 
 	/* wait 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 	if (ret != 0)
 		goto cleanup;
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 		vstor_packet->status != 0) {
 		goto cleanup;
 	}
 
 	for (i = 0; i < nitems(vmstor_proto_list); i++) {
 		/* reuse the packet for version range supported */
 
 		memset(vstor_packet, 0, sizeof(struct vstor_packet));
 		vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
 		vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 		vstor_packet->u.version.major_minor =
 			vmstor_proto_list[i].proto_version;
 
 		/* revision is only significant for Windows guests */
 		vstor_packet->u.version.revision = 0;
 
 		ret = hv_vmbus_channel_send_packet(
 			dev->channel,
 			vstor_packet,
 			VSTOR_PKT_SIZE,
 			(uint64_t)(uintptr_t)request,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 		if (ret != 0)
 			goto cleanup;
 
 		/* wait 5 seconds */
 		ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
 		if (ret)
 			goto cleanup;
 
 		if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) {
 			ret = EINVAL;
 			goto cleanup;	
 		}
 		if (vstor_packet->status == 0) {
 			vmstor_proto_version =
 				vmstor_proto_list[i].proto_version;
 			sense_buffer_size =
 				vmstor_proto_list[i].sense_buffer_size;
 			vmscsi_size_delta =
 				vmstor_proto_list[i].vmscsi_size_delta;
 			break;
 		}
 	}
 
 	if (vstor_packet->status != 0) {
 		ret = EINVAL;
 		goto cleanup;
 	}
 	/**
 	 * Query channel properties
 	 */
 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	ret = hv_vmbus_channel_send_packet(
 				dev->channel,
 				vstor_packet,
 				VSTOR_PKT_SIZE,
 				(uint64_t)(uintptr_t)request,
 				HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 				HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	if ( ret != 0)
 		goto cleanup;
 
 	/* wait 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
 	if (ret != 0)
 		goto cleanup;
 
 	/* TODO: Check returned version */
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 	    vstor_packet->status != 0) {
 		goto cleanup;
 	}
 
 	/* multi-channels feature is supported by WIN8 and above version */
 	max_chans = vstor_packet->u.chan_props.max_channel_cnt;
 	if ((hv_vmbus_protocal_version != HV_VMBUS_VERSION_WIN7) &&
 	    (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) &&
 	    (vstor_packet->u.chan_props.flags &
 	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
 		support_multichannel = TRUE;
 	}
 
 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
 	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	ret = hv_vmbus_channel_send_packet(
 			dev->channel,
 			vstor_packet,
 			VSTOR_PKT_SIZE,
 			(uint64_t)(uintptr_t)request,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	/* wait 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
 	if (ret != 0)
 		goto cleanup;
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 	    vstor_packet->status != 0)
 		goto cleanup;
 
 	/*
 	 * If multi-channel is supported, send multichannel create
 	 * request to host.
 	 */
 	if (support_multichannel)
 		storvsc_send_multichannel_request(dev, max_chans);
 
 cleanup:
 	sema_destroy(&request->synch_sema);
 	return (ret);
 }
 
 /**
  * @brief Open channel connection to paraent partition StorVSP driver
  *
  * Open and initialize channel connection to parent partition StorVSP driver.
  *
  * @param pointer to a Hyper-V device
  * @returns 0 on success, non-zero error on failure
  */
 static int
 hv_storvsc_connect_vsp(struct hv_device *dev)
 {	
 	int ret = 0;
 	struct vmstor_chan_props props;
 	struct storvsc_softc *sc;
 
 	sc = device_get_softc(dev->device);
 		
 	memset(&props, 0, sizeof(struct vmstor_chan_props));
 
 	/*
 	 * Open the channel
 	 */
-
+	vmbus_channel_cpu_rr(dev->channel);
 	ret = hv_vmbus_channel_open(
 		dev->channel,
 		sc->hs_drv_props->drv_ringbuffer_size,
 		sc->hs_drv_props->drv_ringbuffer_size,
 		(void *)&props,
 		sizeof(struct vmstor_chan_props),
 		hv_storvsc_on_channel_callback,
 		dev->channel);
 
 	if (ret != 0) {
 		return ret;
 	}
 
 	ret = hv_storvsc_channel_init(dev);
 
 	return (ret);
 }
 
 #if HVS_HOST_RESET
 static int
 hv_storvsc_host_reset(struct hv_device *dev)
 {
 	int ret = 0;
 	struct storvsc_softc *sc;
 
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 
 	sc = get_stor_device(dev, TRUE);
 	if (sc == NULL) {
 		return ENODEV;
 	}
 
 	request = &sc->hs_reset_req;
 	request->softc = sc;
 	vstor_packet = &request->vstor_packet;
 
 	sema_init(&request->synch_sema, 0, "stor synch sema");
 
 	vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	ret = hv_vmbus_channel_send_packet(dev->channel,
 			vstor_packet,
 			VSTOR_PKT_SIZE,
 			(uint64_t)(uintptr_t)&sc->hs_reset_req,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */
 
 	if (ret) {
 		goto cleanup;
 	}
 
 
 	/*
 	 * At this point, all outstanding requests in the adapter
 	 * should have been flushed out and return to us
 	 */
 
 cleanup:
 	sema_destroy(&request->synch_sema);
 	return (ret);
 }
 #endif /* HVS_HOST_RESET */
 
 /**
  * @brief Function to initiate an I/O request
  *
  * @param device Hyper-V device pointer
  * @param request pointer to a request structure
  * @returns 0 on success, non-zero error on failure
  */
 static int
 hv_storvsc_io_request(struct hv_device *device,
 					  struct hv_storvsc_request *request)
 {
 	struct storvsc_softc *sc;
 	struct vstor_packet *vstor_packet = &request->vstor_packet;
 	struct hv_vmbus_channel* outgoing_channel = NULL;
 	int ret = 0;
 
 	sc = get_stor_device(device, TRUE);
 
 	if (sc == NULL) {
 		return ENODEV;
 	}
 
 	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
 
 	vstor_packet->u.vm_srb.length = VSTOR_PKT_SIZE;
 	
 	vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
 
 	vstor_packet->u.vm_srb.transfer_len = request->data_buf.length;
 
 	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
 
 	outgoing_channel = vmbus_select_outgoing_channel(device->channel);
 
 	mtx_unlock(&request->softc->hs_lock);
 	if (request->data_buf.length) {
 		ret = hv_vmbus_channel_send_packet_multipagebuffer(
 				outgoing_channel,
 				&request->data_buf,
 				vstor_packet,
 				VSTOR_PKT_SIZE,
 				(uint64_t)(uintptr_t)request);
 
 	} else {
 		ret = hv_vmbus_channel_send_packet(
 			outgoing_channel,
 			vstor_packet,
 			VSTOR_PKT_SIZE,
 			(uint64_t)(uintptr_t)request,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	}
 	mtx_lock(&request->softc->hs_lock);
 
 	if (ret != 0) {
 		printf("Unable to send packet %p ret %d", vstor_packet, ret);
 	} else {
 		atomic_add_int(&sc->hs_num_out_reqs, 1);
 	}
 
 	return (ret);
 }
 
 
 /**
  * Process IO_COMPLETION_OPERATION and ready
  * the result to be completed for upper layer
  * processing by the CAM layer.
  */
 static void
 hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
 			   struct vstor_packet *vstor_packet,
 			   struct hv_storvsc_request *request)
 {
 	struct vmscsi_req *vm_srb;
 
 	vm_srb = &vstor_packet->u.vm_srb;
 
 	/*
 	 * Copy some fields of the host's response into the request structure,
 	 * because the fields will be used later in storvsc_io_done().
 	 */
 	request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
 	request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
 
 	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
 			(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
 		/* Autosense data available */
 
 		KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
 				("vm_srb->sense_info_len <= "
 				 "request->sense_info_len"));
 
 		memcpy(request->sense_data, vm_srb->u.sense_data,
 			vm_srb->sense_info_len);
 
 		request->sense_info_len = vm_srb->sense_info_len;
 	}
 
 	/* Complete request by passing to the CAM layer */
 	storvsc_io_done(request);
 	atomic_subtract_int(&sc->hs_num_out_reqs, 1);
 	if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
 		sema_post(&sc->hs_drain_sema);
 	}
 }
 
 static void
 hv_storvsc_rescan_target(struct storvsc_softc *sc)
 {
 	path_id_t pathid;
 	target_id_t targetid;
 	union ccb *ccb;
 
 	pathid = cam_sim_path(sc->hs_sim);
 	targetid = CAM_TARGET_WILDCARD;
 
 	/*
 	 * Allocate a CCB and schedule a rescan.
 	 */
 	ccb = xpt_alloc_ccb_nowait();
 	if (ccb == NULL) {
 		printf("unable to alloc CCB for rescan\n");
 		return;
 	}
 
 	if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
 	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		printf("unable to create path for rescan, pathid: %u,"
 		    "targetid: %u\n", pathid, targetid);
 		xpt_free_ccb(ccb);
 		return;
 	}
 
 	if (targetid == CAM_TARGET_WILDCARD)
 		ccb->ccb_h.func_code = XPT_SCAN_BUS;
 	else
 		ccb->ccb_h.func_code = XPT_SCAN_TGT;
 
 	xpt_rescan(ccb);
 }
 
 static void
 hv_storvsc_on_channel_callback(void *context)
 {
 	int ret = 0;
 	hv_vmbus_channel *channel = (hv_vmbus_channel *)context;
 	struct hv_device *device = NULL;
 	struct storvsc_softc *sc;
 	uint32_t bytes_recvd;
 	uint64_t request_id;
 	uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 
 	device = channel->device;
 	KASSERT(device, ("device is NULL"));
 
 	sc = get_stor_device(device, FALSE);
 	if (sc == NULL) {
 		printf("Storvsc_error: get stor device failed.\n");
 		return;
 	}
 
 	ret = hv_vmbus_channel_recv_packet(
 			channel,
 			packet,
 			roundup2(VSTOR_PKT_SIZE, 8),
 			&bytes_recvd,
 			&request_id);
 
 	while ((ret == 0) && (bytes_recvd > 0)) {
 		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
 
 		if ((request == &sc->hs_init_req) ||
 			(request == &sc->hs_reset_req)) {
 			memcpy(&request->vstor_packet, packet,
 				   sizeof(struct vstor_packet));
 			sema_post(&request->synch_sema);
 		} else {
 			vstor_packet = (struct vstor_packet *)packet;
 			switch(vstor_packet->operation) {
 			case VSTOR_OPERATION_COMPLETEIO:
 				if (request == NULL)
 					panic("VMBUS: storvsc received a "
 					    "packet with NULL request id in "
 					    "COMPLETEIO operation.");
 
 				hv_storvsc_on_iocompletion(sc,
 							vstor_packet, request);
 				break;
 			case VSTOR_OPERATION_REMOVEDEVICE:
 				printf("VMBUS: storvsc operation %d not "
 				    "implemented.\n", vstor_packet->operation);
 				/* TODO: implement */
 				break;
 			case VSTOR_OPERATION_ENUMERATE_BUS:
 				hv_storvsc_rescan_target(sc);
 				break;
 			default:
 				break;
 			}			
 		}
 		ret = hv_vmbus_channel_recv_packet(
 				channel,
 				packet,
 				roundup2(VSTOR_PKT_SIZE, 8),
 				&bytes_recvd,
 				&request_id);
 	}
 }
 
 /**
  * @brief StorVSC probe function
  *
  * Device probe function.  Returns 0 if the input device is a StorVSC
  * device.  Otherwise, a ENXIO is returned.  If the input device is
  * for BlkVSC (paravirtual IDE) device and this support is disabled in
  * favor of the emulated ATA/IDE device, return ENXIO.
  *
  * @param a device
  * @returns 0 on success, ENXIO if not a matcing StorVSC device
  */
 static int
 storvsc_probe(device_t dev)
 {
 	int ata_disk_enable = 0;
 	int ret	= ENXIO;
 	
 	switch (storvsc_get_storage_type(dev)) {
 	case DRIVER_BLKVSC:
 		if(bootverbose)
 			device_printf(dev, "DRIVER_BLKVSC-Emulated ATA/IDE probe\n");
 		if (!getenv_int("hw.ata.disk_enable", &ata_disk_enable)) {
 			if(bootverbose)
 				device_printf(dev,
 					"Enlightened ATA/IDE detected\n");
 			device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc);
 			ret = BUS_PROBE_DEFAULT;
 		} else if(bootverbose)
 			device_printf(dev, "Emulated ATA/IDE set (hw.ata.disk_enable set)\n");
 		break;
 	case DRIVER_STORVSC:
 		if(bootverbose)
 			device_printf(dev, "Enlightened SCSI device detected\n");
 		device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc);
 		ret = BUS_PROBE_DEFAULT;
 		break;
 	default:
 		ret = ENXIO;
 	}
 	return (ret);
 }
 
 /**
  * @brief StorVSC attach function
  *
  * Function responsible for allocating per-device structures,
  * setting up CAM interfaces and scanning for available LUNs to
  * be used for SCSI device peripherals.
  *
  * @param a device
  * @returns 0 on success or an error on failure
  */
 static int
 storvsc_attach(device_t dev)
 {
 	struct hv_device *hv_dev = vmbus_get_devctx(dev);
 	enum hv_storage_type stor_type;
 	struct storvsc_softc *sc;
 	struct cam_devq *devq;
 	int ret, i, j;
 	struct hv_storvsc_request *reqp;
 	struct root_hold_token *root_mount_token = NULL;
 	struct hv_sgl_node *sgl_node = NULL;
 	void *tmp_buff = NULL;
 
 	/*
 	 * We need to serialize storvsc attach calls.
 	 */
 	root_mount_token = root_mount_hold("storvsc");
 
 	sc = device_get_softc(dev);
 
 	stor_type = storvsc_get_storage_type(dev);
 
 	if (stor_type == DRIVER_UNKNOWN) {
 		ret = ENODEV;
 		goto cleanup;
 	}
 
 	/* fill in driver specific properties */
 	sc->hs_drv_props = &g_drv_props_table[stor_type];
 
 	/* fill in device specific properties */
 	sc->hs_unit	= device_get_unit(dev);
 	sc->hs_dev	= hv_dev;
 
 	LIST_INIT(&sc->hs_free_list);
 	mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
 
 	for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
 		reqp = malloc(sizeof(struct hv_storvsc_request),
 				 M_DEVBUF, M_WAITOK|M_ZERO);
 		reqp->softc = sc;
 
 		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
 	}
 
 	/* create sg-list page pool */
 	if (FALSE == g_hv_sgl_page_pool.is_init) {
 		g_hv_sgl_page_pool.is_init = TRUE;
 		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
 		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
 
 		/*
 		 * Pre-create SG list, each SG list with
 		 * HV_MAX_MULTIPAGE_BUFFER_COUNT segments, each
 		 * segment has one page buffer
 		 */
 		for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) {
 	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
 			    M_DEVBUF, M_WAITOK|M_ZERO);
 
 			sgl_node->sgl_data =
 			    sglist_alloc(HV_MAX_MULTIPAGE_BUFFER_COUNT,
 			    M_WAITOK|M_ZERO);
 
 			for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
 				tmp_buff = malloc(PAGE_SIZE,
 				    M_DEVBUF, M_WAITOK|M_ZERO);
 
 				sgl_node->sgl_data->sg_segs[j].ss_paddr =
 				    (vm_paddr_t)tmp_buff;
 			}
 
 			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
 			    sgl_node, link);
 		}
 	}
 
 	sc->hs_destroy = FALSE;
 	sc->hs_drain_notify = FALSE;
 	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
 
 	ret = hv_storvsc_connect_vsp(hv_dev);
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	/*
 	 * Create the device queue.
 	 * Hyper-V maps each target to one SCSI HBA
 	 */
 	devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
 	if (devq == NULL) {
 		device_printf(dev, "Failed to alloc device queue\n");
 		ret = ENOMEM;
 		goto cleanup;
 	}
 
 	sc->hs_sim = cam_sim_alloc(storvsc_action,
 				storvsc_poll,
 				sc->hs_drv_props->drv_name,
 				sc,
 				sc->hs_unit,
 				&sc->hs_lock, 1,
 				sc->hs_drv_props->drv_max_ios_per_target,
 				devq);
 
 	if (sc->hs_sim == NULL) {
 		device_printf(dev, "Failed to alloc sim\n");
 		cam_simq_free(devq);
 		ret = ENOMEM;
 		goto cleanup;
 	}
 
 	mtx_lock(&sc->hs_lock);
 	/* bus_id is set to 0, need to get it from VMBUS channel query? */
 	if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
 		mtx_unlock(&sc->hs_lock);
 		device_printf(dev, "Unable to register SCSI bus\n");
 		ret = ENXIO;
 		goto cleanup;
 	}
 
 	if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
 		 cam_sim_path(sc->hs_sim),
 		CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		xpt_bus_deregister(cam_sim_path(sc->hs_sim));
 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
 		mtx_unlock(&sc->hs_lock);
 		device_printf(dev, "Unable to create path\n");
 		ret = ENXIO;
 		goto cleanup;
 	}
 
 	mtx_unlock(&sc->hs_lock);
 
 	root_mount_rel(root_mount_token);
 	return (0);
 
 
 cleanup:
 	root_mount_rel(root_mount_token);
 	while (!LIST_EMPTY(&sc->hs_free_list)) {
 		reqp = LIST_FIRST(&sc->hs_free_list);
 		LIST_REMOVE(reqp, link);
 		free(reqp, M_DEVBUF);
 	}
 
 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
 		LIST_REMOVE(sgl_node, link);
 		for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
 			if (NULL !=
 			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
 				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
 			}
 		}
 		sglist_free(sgl_node->sgl_data);
 		free(sgl_node, M_DEVBUF);
 	}
 
 	return (ret);
 }
 
 /**
  * @brief StorVSC device detach function
  *
  * This function is responsible for safely detaching a
  * StorVSC device.  This includes waiting for inbound responses
  * to complete and freeing associated per-device structures.
  *
  * @param dev a device
  * returns 0 on success
  */
 static int
 storvsc_detach(device_t dev)
 {
 	struct storvsc_softc *sc = device_get_softc(dev);
 	struct hv_storvsc_request *reqp = NULL;
 	struct hv_device *hv_device = vmbus_get_devctx(dev);
 	struct hv_sgl_node *sgl_node = NULL;
 	int j = 0;
 
 	sc->hs_destroy = TRUE;
 
 	/*
 	 * At this point, all outbound traffic should be disabled. We
 	 * only allow inbound traffic (responses) to proceed so that
 	 * outstanding requests can be completed.
 	 */
 
 	sc->hs_drain_notify = TRUE;
 	sema_wait(&sc->hs_drain_sema);
 	sc->hs_drain_notify = FALSE;
 
 	/*
 	 * Since we have already drained, we don't need to busy wait.
 	 * The call to close the channel will reset the callback
 	 * under the protection of the incoming channel lock.
 	 */
 
 	hv_vmbus_channel_close(hv_device->channel);
 
 	mtx_lock(&sc->hs_lock);
 	while (!LIST_EMPTY(&sc->hs_free_list)) {
 		reqp = LIST_FIRST(&sc->hs_free_list);
 		LIST_REMOVE(reqp, link);
 
 		free(reqp, M_DEVBUF);
 	}
 	mtx_unlock(&sc->hs_lock);
 
 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
 		LIST_REMOVE(sgl_node, link);
 		for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++){
 			if (NULL !=
 			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
 				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
 			}
 		}
 		sglist_free(sgl_node->sgl_data);
 		free(sgl_node, M_DEVBUF);
 	}
 	
 	return (0);
 }
 
 #if HVS_TIMEOUT_TEST
 /**
  * @brief unit test for timed out operations
  *
  * This function provides unit testing capability to simulate
  * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
  * is required.
  *
  * @param reqp pointer to a request structure
  * @param opcode SCSI operation being performed
  * @param wait if 1, wait for I/O to complete
  */
 static void
 storvsc_timeout_test(struct hv_storvsc_request *reqp,
 		uint8_t opcode, int wait)
 {
 	int ret;
 	union ccb *ccb = reqp->ccb;
 	struct storvsc_softc *sc = reqp->softc;
 
 	if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
 		return;
 	}
 
 	if (wait) {
 		mtx_lock(&reqp->event.mtx);
 	}
 	ret = hv_storvsc_io_request(sc->hs_dev, reqp);
 	if (ret != 0) {
 		if (wait) {
 			mtx_unlock(&reqp->event.mtx);
 		}
 		printf("%s: io_request failed with %d.\n",
 				__func__, ret);
 		ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 		mtx_lock(&sc->hs_lock);
 		storvsc_free_request(sc, reqp);
 		xpt_done(ccb);
 		mtx_unlock(&sc->hs_lock);
 		return;
 	}
 
 	if (wait) {
 		xpt_print(ccb->ccb_h.path,
 				"%u: %s: waiting for IO return.\n",
 				ticks, __func__);
 		ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
 		mtx_unlock(&reqp->event.mtx);
 		xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
 				ticks, __func__, (ret == 0)?
 				"IO return detected" :
 				"IO return not detected");
 		/*
 		 * Now both the timer handler and io done are running
 		 * simultaneously. We want to confirm the io done always
 		 * finishes after the timer handler exits. So reqp used by
 		 * timer handler is not freed or stale. Do busy loop for
 		 * another 1/10 second to make sure io done does
 		 * wait for the timer handler to complete.
 		 */
 		DELAY(100*1000);
 		mtx_lock(&sc->hs_lock);
 		xpt_print(ccb->ccb_h.path,
 				"%u: %s: finishing, queue frozen %d, "
 				"ccb status 0x%x scsi_status 0x%x.\n",
 				ticks, __func__, sc->hs_frozen,
 				ccb->ccb_h.status,
 				ccb->csio.scsi_status);
 		mtx_unlock(&sc->hs_lock);
 	}
 }
 #endif /* HVS_TIMEOUT_TEST */
 
 #ifdef notyet
 /**
  * @brief timeout handler for requests
  *
  * This function is called as a result of a callout expiring.
  *
  * @param arg pointer to a request
  */
 static void
 storvsc_timeout(void *arg)
 {
 	struct hv_storvsc_request *reqp = arg;
 	struct storvsc_softc *sc = reqp->softc;
 	union ccb *ccb = reqp->ccb;
 
 	if (reqp->retries == 0) {
 		mtx_lock(&sc->hs_lock);
 		xpt_print(ccb->ccb_h.path,
 		    "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
 		    ticks, reqp, ccb->ccb_h.timeout / 1000);
 		cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
 		mtx_unlock(&sc->hs_lock);
 
 		reqp->retries++;
 		callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
 		    0, storvsc_timeout, reqp, 0);
 #if HVS_TIMEOUT_TEST
 		storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
 #endif
 		return;
 	}
 
 	mtx_lock(&sc->hs_lock);
 	xpt_print(ccb->ccb_h.path,
 		"%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
 		ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
 		(sc->hs_frozen == 0)?
 		"freezing the queue" : "the queue is already frozen");
 	if (sc->hs_frozen == 0) {
 		sc->hs_frozen = 1;
 		xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
 	}
 	mtx_unlock(&sc->hs_lock);
 	
 #if HVS_TIMEOUT_TEST
 	storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
 #endif
 }
 #endif
 
 /**
  * @brief StorVSC device poll function
  *
  * This function is responsible for servicing requests when
  * interrupts are disabled (i.e when we are dumping core.)
  *
  * @param sim a pointer to a CAM SCSI interface module
  */
 static void
 storvsc_poll(struct cam_sim *sim)
 {
 	struct storvsc_softc *sc = cam_sim_softc(sim);
 
 	mtx_assert(&sc->hs_lock, MA_OWNED);
 	mtx_unlock(&sc->hs_lock);
 	hv_storvsc_on_channel_callback(sc->hs_dev->channel);
 	mtx_lock(&sc->hs_lock);
 }
 
 /**
  * @brief StorVSC device action function
  *
  * This function is responsible for handling SCSI operations which
  * are passed from the CAM layer.  The requests are in the form of
  * CAM control blocks which indicate the action being performed.
  * Not all actions require converting the request to a VSCSI protocol
  * message - these actions can be responded to by this driver.
  * Requests which are destined for a backend storage device are converted
  * to a VSCSI protocol message and sent on the channel connection associated
  * with this device.
  *
  * @param sim pointer to a CAM SCSI interface module
  * @param ccb pointer to a CAM control block
  */
 static void
 storvsc_action(struct cam_sim *sim, union ccb *ccb)
 {
 	struct storvsc_softc *sc = cam_sim_softc(sim);
 	int res;
 
 	mtx_assert(&sc->hs_lock, MA_OWNED);
 	switch (ccb->ccb_h.func_code) {
 	case XPT_PATH_INQ: {
 		struct ccb_pathinq *cpi = &ccb->cpi;
 
 		cpi->version_num = 1;
 		cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
 		cpi->target_sprt = 0;
 		cpi->hba_misc = PIM_NOBUSRESET;
 		cpi->hba_eng_cnt = 0;
 		cpi->max_target = STORVSC_MAX_TARGETS;
 		cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
 		cpi->initiator_id = cpi->max_target;
 		cpi->bus_id = cam_sim_bus(sim);
 		cpi->base_transfer_speed = 300000;
 		cpi->transport = XPORT_SAS;
 		cpi->transport_version = 0;
 		cpi->protocol = PROTO_SCSI;
 		cpi->protocol_version = SCSI_REV_SPC2;
 		strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
 		strncpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
 		strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
 		cpi->unit_number = cam_sim_unit(sim);
 
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 	}
 	case XPT_GET_TRAN_SETTINGS: {
 		struct  ccb_trans_settings *cts = &ccb->cts;
 
 		cts->transport = XPORT_SAS;
 		cts->transport_version = 0;
 		cts->protocol = PROTO_SCSI;
 		cts->protocol_version = SCSI_REV_SPC2;
 
 		/* enable tag queuing and disconnected mode */
 		cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
 		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
 		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
 		cts->xport_specific.valid = CTS_SPI_VALID_DISC;
 		cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
 			
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 	}
 	case XPT_SET_TRAN_SETTINGS:	{
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 	}
 	case XPT_CALC_GEOMETRY:{
 		cam_calc_geometry(&ccb->ccg, 1);
 		xpt_done(ccb);
 		return;
 	}
 	case  XPT_RESET_BUS:
 	case  XPT_RESET_DEV:{
 #if HVS_HOST_RESET
 		if ((res = hv_storvsc_host_reset(sc->hs_dev)) != 0) {
 			xpt_print(ccb->ccb_h.path,
 				"hv_storvsc_host_reset failed with %d\n", res);
 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 			xpt_done(ccb);
 			return;
 		}
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 #else
 		xpt_print(ccb->ccb_h.path,
 				  "%s reset not supported.\n",
 				  (ccb->ccb_h.func_code == XPT_RESET_BUS)?
 				  "bus" : "dev");
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		xpt_done(ccb);
 		return;
 #endif	/* HVS_HOST_RESET */
 	}
 	case XPT_SCSI_IO:
 	case XPT_IMMED_NOTIFY: {
 		struct hv_storvsc_request *reqp = NULL;
 
 		if (ccb->csio.cdb_len == 0) {
 			panic("cdl_len is 0\n");
 		}
 
 		if (LIST_EMPTY(&sc->hs_free_list)) {
 			ccb->ccb_h.status = CAM_REQUEUE_REQ;
 			if (sc->hs_frozen == 0) {
 				sc->hs_frozen = 1;
 				xpt_freeze_simq(sim, /* count*/1);
 			}
 			xpt_done(ccb);
 			return;
 		}
 
 		reqp = LIST_FIRST(&sc->hs_free_list);
 		LIST_REMOVE(reqp, link);
 
 		bzero(reqp, sizeof(struct hv_storvsc_request));
 		reqp->softc = sc;
 		
 		ccb->ccb_h.status |= CAM_SIM_QUEUED;
 		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
 			ccb->ccb_h.status = CAM_REQ_INVALID;
 			xpt_done(ccb);
 			return;
 		}
 
 #ifdef notyet
 		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
 			callout_init(&reqp->callout, 1);
 			callout_reset_sbt(&reqp->callout,
 			    SBT_1MS * ccb->ccb_h.timeout, 0,
 			    storvsc_timeout, reqp, 0);
 #if HVS_TIMEOUT_TEST
 			cv_init(&reqp->event.cv, "storvsc timeout cv");
 			mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
 					NULL, MTX_DEF);
 			switch (reqp->vstor_packet.vm_srb.cdb[0]) {
 				case MODE_SELECT_10:
 				case SEND_DIAGNOSTIC:
 					/* To have timer send the request. */
 					return;
 				default:
 					break;
 			}
 #endif /* HVS_TIMEOUT_TEST */
 		}
 #endif
 
 		if ((res = hv_storvsc_io_request(sc->hs_dev, reqp)) != 0) {
 			xpt_print(ccb->ccb_h.path,
 				"hv_storvsc_io_request failed with %d\n", res);
 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 			storvsc_free_request(sc, reqp);
 			xpt_done(ccb);
 			return;
 		}
 		return;
 	}
 
 	default:
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		xpt_done(ccb);
 		return;
 	}
 }
 
 /**
  * @brief destroy bounce buffer
  *
  * This function is responsible for destroy a Scatter/Gather list
  * that create by storvsc_create_bounce_buffer()
  *
  * @param sgl- the Scatter/Gather need be destroy
  * @param sg_count- page count of the SG list.
  *
  */
 static void
 storvsc_destroy_bounce_buffer(struct sglist *sgl)
 {
 	struct hv_sgl_node *sgl_node = NULL;
 	if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
 		printf("storvsc error: not enough in use sgl\n");
 		return;
 	}
 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
 	LIST_REMOVE(sgl_node, link);
 	sgl_node->sgl_data = sgl;
 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
 }
 
 /**
  * @brief create bounce buffer
  *
  * This function is responsible for create a Scatter/Gather list,
  * which hold several pages that can be aligned with page size.
  *
  * @param seg_count- SG-list segments count
  * @param write - if WRITE_TYPE, set SG list page used size to 0,
  * otherwise set used size to page size.
  *
  * return NULL if create failed
  */
 static struct sglist *
 storvsc_create_bounce_buffer(uint16_t seg_count, int write)
 {
 	int i = 0;
 	struct sglist *bounce_sgl = NULL;
 	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
 	struct hv_sgl_node *sgl_node = NULL;	
 
 	/* get struct sglist from free_sgl_list */
 	if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
 		printf("storvsc error: not enough free sgl\n");
 		return NULL;
 	}
 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
 	LIST_REMOVE(sgl_node, link);
 	bounce_sgl = sgl_node->sgl_data;
 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
 
 	bounce_sgl->sg_maxseg = seg_count;
 
 	if (write == WRITE_TYPE)
 		bounce_sgl->sg_nseg = 0;
 	else
 		bounce_sgl->sg_nseg = seg_count;
 
 	for (i = 0; i < seg_count; i++)
 	        bounce_sgl->sg_segs[i].ss_len = buf_len;
 
 	return bounce_sgl;
 }
 
 /**
  * @brief copy data from SG list to bounce buffer
  *
  * This function is responsible for copy data from one SG list's segments
  * to another SG list which used as bounce buffer.
  *
  * @param bounce_sgl - the destination SG list
  * @param orig_sgl - the segment of the source SG list.
  * @param orig_sgl_count - the count of segments.
  * @param orig_sgl_count - indicate which segment need bounce buffer,
  *  set 1 means need.
  *
  */
 static void
 storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
 			       bus_dma_segment_t *orig_sgl,
 			       unsigned int orig_sgl_count,
 			       uint64_t seg_bits)
 {
 	int src_sgl_idx = 0;
 
 	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
 		if (seg_bits & (1 << src_sgl_idx)) {
 			memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
 			    (void*)orig_sgl[src_sgl_idx].ds_addr,
 			    orig_sgl[src_sgl_idx].ds_len);
 
 			bounce_sgl->sg_segs[src_sgl_idx].ss_len =
 			    orig_sgl[src_sgl_idx].ds_len;
 		}
 	}
 }
 
 /**
  * @brief copy data from SG list which used as bounce to another SG list
  *
  * This function is responsible for copy data from one SG list with bounce
  * buffer to another SG list's segments.
  *
  * @param dest_sgl - the destination SG list's segments
  * @param dest_sgl_count - the count of destination SG list's segment.
  * @param src_sgl - the source SG list.
  * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
  *
  */
 void
 storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
 				    unsigned int dest_sgl_count,
 				    struct sglist* src_sgl,
 				    uint64_t seg_bits)
 {
 	int sgl_idx = 0;
 	
 	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
 		if (seg_bits & (1 << sgl_idx)) {
 			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
 			    (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
 			    src_sgl->sg_segs[sgl_idx].ss_len);
 		}
 	}
 }
 
 /**
  * @brief check SG list with bounce buffer or not
  *
  * This function is responsible for check if need bounce buffer for SG list.
  *
  * @param sgl - the SG list's segments
  * @param sg_count - the count of SG list's segment.
  * @param bits - segmengs number that need bounce buffer
  *
  * return -1 if SG list needless bounce buffer
  */
 static int
 storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
 				unsigned int sg_count,
 				uint64_t *bits)
 {
 	int i = 0;
 	int offset = 0;
 	uint64_t phys_addr = 0;
 	uint64_t tmp_bits = 0;
 	boolean_t found_hole = FALSE;
 	boolean_t pre_aligned = TRUE;
 
 	if (sg_count < 2){
 		return -1;
 	}
 
 	*bits = 0;
 	
 	phys_addr = vtophys(sgl[0].ds_addr);
 	offset =  phys_addr - trunc_page(phys_addr);
 
 	if (offset != 0) {
 		pre_aligned = FALSE;
 		tmp_bits |= 1;
 	}
 
 	for (i = 1; i < sg_count; i++) {
 		phys_addr = vtophys(sgl[i].ds_addr);
 		offset =  phys_addr - trunc_page(phys_addr);
 
 		if (offset == 0) {
 			if (FALSE == pre_aligned){
 				/*
 				 * This segment is aligned, if the previous
 				 * one is not aligned, find a hole
 				 */
 				found_hole = TRUE;
 			}
 			pre_aligned = TRUE;
 		} else {
 			tmp_bits |= 1 << i;
 			if (!pre_aligned) {
 				if (phys_addr != vtophys(sgl[i-1].ds_addr +
 				    sgl[i-1].ds_len)) {
 					/*
 					 * Check whether connect to previous
 					 * segment,if not, find the hole
 					 */
 					found_hole = TRUE;
 				}
 			} else {
 				found_hole = TRUE;
 			}
 			pre_aligned = FALSE;
 		}
 	}
 
 	if (!found_hole) {
 		return (-1);
 	} else {
 		*bits = tmp_bits;
 		return 0;
 	}
 }
 
 /**
  * @brief Fill in a request structure based on a CAM control block
  *
  * Fills in a request structure based on the contents of a CAM control
  * block.  The request structure holds the payload information for
  * VSCSI protocol request.
  *
  * @param ccb pointer to a CAM contorl block
  * @param reqp pointer to a request structure
  */
 static int
 create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
 {
 	struct ccb_scsiio *csio = &ccb->csio;
 	uint64_t phys_addr;
 	uint32_t bytes_to_copy = 0;
 	uint32_t pfn_num = 0;
 	uint32_t pfn;
 	uint64_t not_aligned_seg_bits = 0;
 	
 	/* refer to struct vmscsi_req for meanings of these two fields */
 	reqp->vstor_packet.u.vm_srb.port =
 		cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
 	reqp->vstor_packet.u.vm_srb.path_id =
 		cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
 
 	reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
 	reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
 
 	reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
 	if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
 			csio->cdb_len);
 	} else {
 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
 			csio->cdb_len);
 	}
 
 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
 	case CAM_DIR_OUT:
 		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;	
 		break;
 	case CAM_DIR_IN:
 		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
 		break;
 	case CAM_DIR_NONE:
 		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
 		break;
 	default:
 		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
 		break;
 	}
 
 	reqp->sense_data     = &csio->sense_data;
 	reqp->sense_info_len = csio->sense_len;
 
 	reqp->ccb = ccb;
 
 	if (0 == csio->dxfer_len) {
 		return (0);
 	}
 
 	reqp->data_buf.length = csio->dxfer_len;
 
 	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
 	case CAM_DATA_VADDR:
 	{
 		bytes_to_copy = csio->dxfer_len;
 		phys_addr = vtophys(csio->data_ptr);
 		reqp->data_buf.offset = phys_addr & PAGE_MASK;
 		
 		while (bytes_to_copy != 0) {
 			int bytes, page_offset;
 			phys_addr =
 			    vtophys(&csio->data_ptr[reqp->data_buf.length -
 			    bytes_to_copy]);
 			pfn = phys_addr >> PAGE_SHIFT;
 			reqp->data_buf.pfn_array[pfn_num] = pfn;
 			page_offset = phys_addr & PAGE_MASK;
 
 			bytes = min(PAGE_SIZE - page_offset, bytes_to_copy);
 
 			bytes_to_copy -= bytes;
 			pfn_num++;
 		}
 		break;
 	}
 
 	case CAM_DATA_SG:
 	{
 		int i = 0;
 		int offset = 0;
 		int ret;
 
 		bus_dma_segment_t *storvsc_sglist =
 		    (bus_dma_segment_t *)ccb->csio.data_ptr;
 		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
 
 		printf("Storvsc: get SG I/O operation, %d\n",
 		    reqp->vstor_packet.u.vm_srb.data_in);
 
 		if (storvsc_sg_count > HV_MAX_MULTIPAGE_BUFFER_COUNT){
 			printf("Storvsc: %d segments is too much, "
 			    "only support %d segments\n",
 			    storvsc_sg_count, HV_MAX_MULTIPAGE_BUFFER_COUNT);
 			return (EINVAL);
 		}
 
 		/*
 		 * We create our own bounce buffer function currently. Idealy
 		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
 		 * code there is no callback API to check the page alignment of
 		 * middle segments before busdma can decide if a bounce buffer
 		 * is needed for particular segment. There is callback,
 		 * "bus_dma_filter_t *filter", but the parrameters are not
 		 * sufficient for storvsc driver.
 		 * TODO:
 		 *	Add page alignment check in BUS_DMA(9) callback. Once
 		 *	this is complete, switch the following code to use
 		 *	BUS_DMA(9) for storvsc bounce buffer support.
 		 */
 		/* check if we need to create bounce buffer */
 		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
 		    storvsc_sg_count, &not_aligned_seg_bits);
 		if (ret != -1) {
 			reqp->bounce_sgl =
 			    storvsc_create_bounce_buffer(storvsc_sg_count,
 			    reqp->vstor_packet.u.vm_srb.data_in);
 			if (NULL == reqp->bounce_sgl) {
 				printf("Storvsc_error: "
 				    "create bounce buffer failed.\n");
 				return (ENOMEM);
 			}
 
 			reqp->bounce_sgl_count = storvsc_sg_count;
 			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
 
 			/*
 			 * if it is write, we need copy the original data
 			 *to bounce buffer
 			 */
 			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
 				storvsc_copy_sgl_to_bounce_buf(
 				    reqp->bounce_sgl,
 				    storvsc_sglist,
 				    storvsc_sg_count,
 				    reqp->not_aligned_seg_bits);
 			}
 
 			/* transfer virtual address to physical frame number */
 			if (reqp->not_aligned_seg_bits & 0x1){
  				phys_addr =
 				    vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
 			}else{
  				phys_addr =
 					vtophys(storvsc_sglist[0].ds_addr);
 			}
 			reqp->data_buf.offset = phys_addr & PAGE_MASK;
 
 			pfn = phys_addr >> PAGE_SHIFT;
 			reqp->data_buf.pfn_array[0] = pfn;
 			
 			for (i = 1; i < storvsc_sg_count; i++) {
 				if (reqp->not_aligned_seg_bits & (1 << i)) {
 					phys_addr =
 					    vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
 				} else {
 					phys_addr =
 					    vtophys(storvsc_sglist[i].ds_addr);
 				}
 
 				pfn = phys_addr >> PAGE_SHIFT;
 				reqp->data_buf.pfn_array[i] = pfn;
 			}
 		} else {
 			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
 
 			reqp->data_buf.offset = phys_addr & PAGE_MASK;
 
 			for (i = 0; i < storvsc_sg_count; i++) {
 				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
 				pfn = phys_addr >> PAGE_SHIFT;
 				reqp->data_buf.pfn_array[i] = pfn;
 			}
 
 			/* check the last segment cross boundary or not */
 			offset = phys_addr & PAGE_MASK;
 			if (offset) {
 				phys_addr =
 				    vtophys(storvsc_sglist[i-1].ds_addr +
 				    PAGE_SIZE - offset);
 				pfn = phys_addr >> PAGE_SHIFT;
 				reqp->data_buf.pfn_array[i] = pfn;
 			}
 			
 			reqp->bounce_sgl_count = 0;
 		}
 		break;
 	}
 	default:
 		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
 		return(EINVAL);
 	}
 
 	return(0);
 }
 
 /*
  * SCSI Inquiry checks qualifier and type.
  * If qualifier is 011b, means the device server is not capable
  * of supporting a peripheral device on this logical unit, and
  * the type should be set to 1Fh.
  * 
  * Return 1 if it is valid, 0 otherwise.
  */
 static inline int
 is_inquiry_valid(const struct scsi_inquiry_data *inq_data)
 {
 	uint8_t type;
 	if (SID_QUAL(inq_data) != SID_QUAL_LU_CONNECTED) {
 		return (0);
 	}
 	type = SID_TYPE(inq_data);
 	if (type == T_NODEVICE) {
 		return (0);
 	}
 	return (1);
 }
 
 /**
  * @brief completion function before returning to CAM
  *
  * I/O process has been completed and the result needs
  * to be passed to the CAM layer.
  * Free resources related to this request.
  *
  * @param reqp pointer to a request structure
  */
 static void
 storvsc_io_done(struct hv_storvsc_request *reqp)
 {
 	union ccb *ccb = reqp->ccb;
 	struct ccb_scsiio *csio = &ccb->csio;
 	struct storvsc_softc *sc = reqp->softc;
 	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
 	bus_dma_segment_t *ori_sglist = NULL;
 	int ori_sg_count = 0;
 
 	/* destroy bounce buffer if it is used */
 	if (reqp->bounce_sgl_count) {
 		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
 		ori_sg_count = ccb->csio.sglist_cnt;
 
 		/*
 		 * If it is READ operation, we should copy back the data
 		 * to original SG list.
 		 */
 		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
 			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
 			    ori_sg_count,
 			    reqp->bounce_sgl,
 			    reqp->not_aligned_seg_bits);
 		}
 
 		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
 		reqp->bounce_sgl_count = 0;
 	}
 		
 	if (reqp->retries > 0) {
 		mtx_lock(&sc->hs_lock);
 #if HVS_TIMEOUT_TEST
 		xpt_print(ccb->ccb_h.path,
 			"%u: IO returned after timeout, "
 			"waking up timer handler if any.\n", ticks);
 		mtx_lock(&reqp->event.mtx);
 		cv_signal(&reqp->event.cv);
 		mtx_unlock(&reqp->event.mtx);
 #endif
 		reqp->retries = 0;
 		xpt_print(ccb->ccb_h.path,
 			"%u: IO returned after timeout, "
 			"stopping timer if any.\n", ticks);
 		mtx_unlock(&sc->hs_lock);
 	}
 
 #ifdef notyet
 	/*
 	 * callout_drain() will wait for the timer handler to finish
 	 * if it is running. So we don't need any lock to synchronize
 	 * between this routine and the timer handler.
 	 * Note that we need to make sure reqp is not freed when timer
 	 * handler is using or will use it.
 	 */
 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
 		callout_drain(&reqp->callout);
 	}
 #endif
 
 	ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
 		const struct scsi_generic *cmd;
 		/*
 		 * Check whether the data for INQUIRY cmd is valid or
 		 * not.  Windows 10 and Windows 2016 send all zero
 		 * inquiry data to VM even for unpopulated slots.
 		 */
 		cmd = (const struct scsi_generic *)
 		    ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
 		     csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
 		if (cmd->opcode == INQUIRY) {
 		    /*
 		     * The host of Windows 10 or 2016 server will response
 		     * the inquiry request with invalid data for unexisted device:
 			[0x7f 0x0 0x5 0x2 0x1f ... ]
 		     * But on windows 2012 R2, the response is:
 			[0x7f 0x0 0x0 0x0 0x0 ]
 		     * That is why here wants to validate the inquiry response.
 		     * The validation will skip the INQUIRY whose response is short,
 		     * which is less than SHORT_INQUIRY_LENGTH (36).
 		     *
 		     * For more information about INQUIRY, please refer to:
 		     *  ftp://ftp.avc-pioneer.com/Mtfuji_7/Proposal/Jun09/INQUIRY.pdf
 		     */
 		    const struct scsi_inquiry_data *inq_data =
 			(const struct scsi_inquiry_data *)csio->data_ptr;
 		    uint8_t* resp_buf = (uint8_t*)csio->data_ptr;
 		    /* Get the buffer length reported by host */
 		    int resp_xfer_len = vm_srb->transfer_len;
 		    /* Get the available buffer length */
 		    int resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
 		    int data_len = (resp_buf_len < resp_xfer_len) ? resp_buf_len : resp_xfer_len;
 		    if (data_len < SHORT_INQUIRY_LENGTH) {
 			ccb->ccb_h.status |= CAM_REQ_CMP;
 			if (bootverbose && data_len >= 5) {
 				mtx_lock(&sc->hs_lock);
 				xpt_print(ccb->ccb_h.path,
 				    "storvsc skips the validation for short inquiry (%d)"
 				    " [%x %x %x %x %x]\n",
 				    data_len,resp_buf[0],resp_buf[1],resp_buf[2],
 				    resp_buf[3],resp_buf[4]);
 				mtx_unlock(&sc->hs_lock);
 			}
 		    } else if (is_inquiry_valid(inq_data) == 0) {
 			ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
 			if (bootverbose && data_len >= 5) {
 				mtx_lock(&sc->hs_lock);
 				xpt_print(ccb->ccb_h.path,
 				    "storvsc uninstalled invalid device"
 				    " [%x %x %x %x %x]\n",
 				resp_buf[0],resp_buf[1],resp_buf[2],resp_buf[3],resp_buf[4]);
 				mtx_unlock(&sc->hs_lock);
 			}
 		    } else {
 			ccb->ccb_h.status |= CAM_REQ_CMP;
 			if (bootverbose) {
 				mtx_lock(&sc->hs_lock);
 				xpt_print(ccb->ccb_h.path,
 				    "storvsc has passed inquiry response (%d) validation\n",
 				    data_len);
 				mtx_unlock(&sc->hs_lock);
 			}
 		    }
 		} else {
 			ccb->ccb_h.status |= CAM_REQ_CMP;
 		}
 	} else {
 		mtx_lock(&sc->hs_lock);
 		xpt_print(ccb->ccb_h.path,
 			"storvsc scsi_status = %d\n",
 			vm_srb->scsi_status);
 		mtx_unlock(&sc->hs_lock);
 		ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
 	}
 
 	ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
 	ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
 
 	if (reqp->sense_info_len != 0) {
 		csio->sense_resid = csio->sense_len - reqp->sense_info_len;
 		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
 	}
 
 	mtx_lock(&sc->hs_lock);
 	if (reqp->softc->hs_frozen == 1) {
 		xpt_print(ccb->ccb_h.path,
 			"%u: storvsc unfreezing softc 0x%p.\n",
 			ticks, reqp->softc);
 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
 		reqp->softc->hs_frozen = 0;
 	}
 	storvsc_free_request(sc, reqp);
 	mtx_unlock(&sc->hs_lock);
 
 	xpt_done_direct(ccb);
 }
 
 /**
  * @brief Free a request structure
  *
  * Free a request structure by returning it to the free list
  *
  * @param sc pointer to a softc
  * @param reqp pointer to a request structure
  */	
 static void
 storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
 {
 
 	LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
 }
 
 /**
  * @brief Determine type of storage device from GUID
  *
  * Using the type GUID, determine if this is a StorVSC (paravirtual
  * SCSI or BlkVSC (paravirtual IDE) device.
  *
  * @param dev a device
  * returns an enum
  */
 static enum hv_storage_type
 storvsc_get_storage_type(device_t dev)
 {
 	const char *p = vmbus_get_type(dev);
 
 	if (!memcmp(p, &gBlkVscDeviceType, sizeof(hv_guid))) {
 		return DRIVER_BLKVSC;
 	} else if (!memcmp(p, &gStorVscDeviceType, sizeof(hv_guid))) {
 		return DRIVER_STORVSC;
 	}
 	return (DRIVER_UNKNOWN);
 }
 
Index: head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c	(revision 302620)
+++ head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c	(revision 302621)
@@ -1,546 +1,510 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 
 #include <dev/hyperv/include/hyperv_busdma.h>
 #include <dev/hyperv/vmbus/hv_vmbus_priv.h>
 #include <dev/hyperv/vmbus/vmbus_reg.h>
 #include <dev/hyperv/vmbus/vmbus_var.h>
 
 typedef void	(*vmbus_chanmsg_proc_t)
 		(struct vmbus_softc *, const struct vmbus_message *);
 
 static struct hv_vmbus_channel *hv_vmbus_allocate_channel(struct vmbus_softc *);
 static void	vmbus_channel_on_offer_internal(struct vmbus_softc *,
 		    const hv_vmbus_channel_offer_channel *offer);
 static void	vmbus_chan_detach_task(void *, int);
 
 static void	vmbus_channel_on_offer(struct vmbus_softc *,
 		    const struct vmbus_message *);
 static void	vmbus_channel_on_offer_rescind(struct vmbus_softc *,
 		    const struct vmbus_message *);
 static void	vmbus_channel_on_offers_delivered(struct vmbus_softc *,
 		    const struct vmbus_message *);
 
 /**
  * Channel message dispatch table
  */
 static const vmbus_chanmsg_proc_t
 vmbus_chanmsg_process[HV_CHANNEL_MESSAGE_COUNT] = {
 	[HV_CHANNEL_MESSAGE_OFFER_CHANNEL] =
 		vmbus_channel_on_offer,
 	[HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER] =
 		vmbus_channel_on_offer_rescind,
 	[HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED] =
 		vmbus_channel_on_offers_delivered,
 	[HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT] =
 		vmbus_msghc_wakeup,
 	[HV_CHANNEL_MESSAGE_GPADL_CREATED] =
 		vmbus_msghc_wakeup,
 	[HV_CHANNEL_MESSAGE_GPADL_TORNDOWN] =
 		vmbus_msghc_wakeup,
 	[HV_CHANNEL_MESSAGE_VERSION_RESPONSE] =
 		vmbus_msghc_wakeup
 };
 
 /**
  * @brief Allocate and initialize a vmbus channel object
  */
 static struct hv_vmbus_channel *
 hv_vmbus_allocate_channel(struct vmbus_softc *sc)
 {
 	struct hv_vmbus_channel *channel;
 
 	channel = malloc(sizeof(*channel), M_DEVBUF, M_WAITOK | M_ZERO);
 	channel->vmbus_sc = sc;
 
 	mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF);
 	TAILQ_INIT(&channel->sc_list_anchor);
 	TASK_INIT(&channel->ch_detach_task, 0, vmbus_chan_detach_task, channel);
 
 	return (channel);
 }
 
 /**
  * @brief Release the resources used by the vmbus channel object
  */
 void
 hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
 {
 	mtx_destroy(&channel->sc_lock);
 	free(channel, M_DEVBUF);
 }
 
 /**
  * @brief Process the offer by creating a channel/device
  * associated with this offer
  */
 static void
 vmbus_channel_process_offer(hv_vmbus_channel *new_channel)
 {
 	hv_vmbus_channel*	channel;
 	uint32_t                relid;
 
 	relid = new_channel->offer_msg.child_rel_id;
 	/*
 	 * Make sure this is a new offer
 	 */
 	mtx_lock(&hv_vmbus_g_connection.channel_lock);
 	if (relid == 0) {
 		/*
 		 * XXX channel0 will not be processed; skip it.
 		 */
 		printf("VMBUS: got channel0 offer\n");
 	} else {
 		hv_vmbus_g_connection.channels[relid] = new_channel;
 	}
 
 	TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor,
 	    list_entry) {
 		if (memcmp(&channel->offer_msg.offer.interface_type,
 		    &new_channel->offer_msg.offer.interface_type,
 		    sizeof(hv_guid)) == 0 &&
 		    memcmp(&channel->offer_msg.offer.interface_instance,
 		    &new_channel->offer_msg.offer.interface_instance,
 		    sizeof(hv_guid)) == 0)
 			break;
 	}
 
 	if (channel == NULL) {
 		/* Install the new primary channel */
 		TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor,
 		    new_channel, list_entry);
 	}
 	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
 
 	if (channel != NULL) {
 		/*
 		 * Check if this is a sub channel.
 		 */
 		if (new_channel->offer_msg.offer.sub_channel_index != 0) {
 			/*
 			 * It is a sub channel offer, process it.
 			 */
 			new_channel->primary_channel = channel;
 			new_channel->device = channel->device;
 			mtx_lock(&channel->sc_lock);
 			TAILQ_INSERT_TAIL(&channel->sc_list_anchor,
 			    new_channel, sc_list_entry);
 			mtx_unlock(&channel->sc_lock);
 
 			if (bootverbose) {
 				printf("VMBUS get multi-channel offer, "
 				    "rel=%u, sub=%u\n",
 				    new_channel->offer_msg.child_rel_id,
 				    new_channel->offer_msg.offer.sub_channel_index);	
 			}
 
 			/* Insert new channel into channel_anchor. */
 			mtx_lock(&hv_vmbus_g_connection.channel_lock);
 			TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor,
 			    new_channel, list_entry);				
 			mtx_unlock(&hv_vmbus_g_connection.channel_lock);
 
 			if(bootverbose)
 				printf("VMBUS: new multi-channel offer <%p>, "
 				    "its primary channel is <%p>.\n",
 				    new_channel, new_channel->primary_channel);
 
 			new_channel->state = HV_CHANNEL_OPEN_STATE;
 
 			/*
 			 * Bump up sub-channel count and notify anyone that is
 			 * interested in this sub-channel, after this sub-channel
 			 * is setup.
 			 */
 			mtx_lock(&channel->sc_lock);
 			channel->subchan_cnt++;
 			mtx_unlock(&channel->sc_lock);
 			wakeup(channel);
 
 			return;
 		}
 
 		printf("VMBUS: duplicated primary channel%u\n",
 		    new_channel->offer_msg.child_rel_id);
 		hv_vmbus_free_vmbus_channel(new_channel);
 		return;
 	}
 
 	new_channel->state = HV_CHANNEL_OPEN_STATE;
 
 	/*
 	 * Start the process of binding this offer to the driver
 	 * (We need to set the device field before calling
 	 * hv_vmbus_child_device_add())
 	 */
 	new_channel->device = hv_vmbus_child_device_create(
 	    new_channel->offer_msg.offer.interface_type,
 	    new_channel->offer_msg.offer.interface_instance, new_channel);
 
 	/*
 	 * Add the new device to the bus. This will kick off device-driver
 	 * binding which eventually invokes the device driver's AddDevice()
 	 * method.
 	 */
 	hv_vmbus_child_device_register(new_channel->vmbus_sc,
 	    new_channel->device);
 }
 
 void
 vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu)
 {
 	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
 
 	if (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008 ||
 	    hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) {
 		/* Only cpu0 is supported */
 		cpu = 0;
 	}
 
 	chan->target_cpu = cpu;
 	chan->target_vcpu = VMBUS_PCPU_GET(chan->vmbus_sc, vcpuid, cpu);
 
 	if (bootverbose) {
 		printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
 		    chan->offer_msg.child_rel_id,
 		    chan->target_cpu, chan->target_vcpu);
 	}
 }
 
-/**
- * Array of device guids that are performance critical. We try to distribute
- * the interrupt load for these devices across all online cpus. 
- */
-static const hv_guid high_perf_devices[] = {
-	{HV_NIC_GUID, },
-	{HV_IDE_GUID, },
-	{HV_SCSI_GUID, },
-};
+void
+vmbus_channel_cpu_rr(struct hv_vmbus_channel *chan)
+{
+	static uint32_t vmbus_chan_nextcpu;
+	int cpu;
 
-enum {
-	PERF_CHN_NIC = 0,
-	PERF_CHN_IDE,
-	PERF_CHN_SCSI,
-	MAX_PERF_CHN,
-};
+	cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus;
+	vmbus_channel_cpu_set(chan, cpu);
+}
 
-/*
- * We use this static number to distribute the channel interrupt load.
- */
-static uint32_t next_vcpu;
-
-/**
- * Starting with Win8, we can statically distribute the incoming
- * channel interrupt load by binding a channel to VCPU. We
- * implement here a simple round robin scheme for distributing
- * the interrupt load.
- * We will bind channels that are not performance critical to cpu 0 and
- * performance critical channels (IDE, SCSI and Network) will be uniformly
- * distributed across all available CPUs.
- */
 static void
-vmbus_channel_select_defcpu(struct hv_vmbus_channel *channel)
+vmbus_channel_select_defcpu(struct hv_vmbus_channel *chan)
 {
-	uint32_t current_cpu;
-	int i;
-	boolean_t is_perf_channel = FALSE;
-	const hv_guid *guid = &channel->offer_msg.offer.interface_type;
-
-	for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) {
-		if (memcmp(guid->data, high_perf_devices[i].data,
-		    sizeof(hv_guid)) == 0) {
-			is_perf_channel = TRUE;
-			break;
-		}
-	}
-
-	if (!is_perf_channel) {
-		/* Stick to cpu0 */
-		vmbus_channel_cpu_set(channel, 0);
-		return;
-	}
-	/* mp_ncpus should have the number cpus currently online */
-	current_cpu = (++next_vcpu % mp_ncpus);
-	vmbus_channel_cpu_set(channel, current_cpu);
+	/*
+	 * By default, pin the channel to cpu0.  Devices having
+	 * special channel-cpu mapping requirement should call
+	 * vmbus_channel_cpu_{set,rr}().
+	 */
+	vmbus_channel_cpu_set(chan, 0);
 }
 
 /**
  * @brief Handler for channel offers from Hyper-V/Azure
  *
  * Handler for channel offers from vmbus in parent partition.
  */
 static void
 vmbus_channel_on_offer(struct vmbus_softc *sc, const struct vmbus_message *msg)
 {
 	const hv_vmbus_channel_offer_channel *offer;
 
 	/* New channel is offered by vmbus */
 	vmbus_scan_newchan(sc);
 
 	offer = (const hv_vmbus_channel_offer_channel *)msg->msg_data;
 	vmbus_channel_on_offer_internal(sc, offer);
 }
 
 static void
 vmbus_channel_on_offer_internal(struct vmbus_softc *sc,
     const hv_vmbus_channel_offer_channel *offer)
 {
 	hv_vmbus_channel* new_channel;
 
 	/* Allocate the channel object and save this offer */
 	new_channel = hv_vmbus_allocate_channel(sc);
 
 	/*
 	 * By default we setup state to enable batched
 	 * reading. A specific service can choose to
 	 * disable this prior to opening the channel.
 	 */
 	new_channel->batched_reading = TRUE;
 
 	new_channel->ch_sigevt = hyperv_dmamem_alloc(
 	    bus_get_dma_tag(sc->vmbus_dev),
 	    HYPERCALL_SIGEVTIN_ALIGN, 0, sizeof(struct hypercall_sigevt_in),
 	    &new_channel->ch_sigevt_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (new_channel->ch_sigevt == NULL) {
 		device_printf(sc->vmbus_dev, "sigevt alloc failed\n");
 		/* XXX */
 		mtx_destroy(&new_channel->sc_lock);
 		free(new_channel, M_DEVBUF);
 		return;
 	}
 	new_channel->ch_sigevt->hc_connid = VMBUS_CONNID_EVENT;
 
 	if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) {
 		new_channel->is_dedicated_interrupt =
 		    (offer->is_dedicated_interrupt != 0);
 		new_channel->ch_sigevt->hc_connid = offer->connection_id;
 	}
 
 	memcpy(&new_channel->offer_msg, offer,
 	    sizeof(hv_vmbus_channel_offer_channel));
 	new_channel->monitor_group = (uint8_t) offer->monitor_id / 32;
 	new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32;
 
 	/* Select default cpu for this channel. */
 	vmbus_channel_select_defcpu(new_channel);
 
 	vmbus_channel_process_offer(new_channel);
 }
 
 /**
  * @brief Rescind offer handler.
  *
  * We queue a work item to process this offer
  * synchronously.
  *
  * XXX pretty broken; need rework.
  */
 static void
 vmbus_channel_on_offer_rescind(struct vmbus_softc *sc,
     const struct vmbus_message *msg)
 {
 	const hv_vmbus_channel_rescind_offer *rescind;
 	hv_vmbus_channel*		channel;
 
 	rescind = (const hv_vmbus_channel_rescind_offer *)msg->msg_data;
 
 	channel = hv_vmbus_g_connection.channels[rescind->child_rel_id];
 	if (channel == NULL)
 	    return;
 	hv_vmbus_g_connection.channels[rescind->child_rel_id] = NULL;
 
 	taskqueue_enqueue(taskqueue_thread, &channel->ch_detach_task);
 }
 
 static void
 vmbus_chan_detach_task(void *xchan, int pending __unused)
 {
 	struct hv_vmbus_channel *chan = xchan;
 
 	if (HV_VMBUS_CHAN_ISPRIMARY(chan)) {
 		/* Only primary channel owns the hv_device */
 		hv_vmbus_child_device_unregister(chan->device);
 	}
 }
 
 /**
  *
  * @brief Invoked when all offers have been delivered.
  */
 static void
 vmbus_channel_on_offers_delivered(struct vmbus_softc *sc,
     const struct vmbus_message *msg __unused)
 {
 
 	/* No more new channels for the channel request. */
 	vmbus_scan_done(sc);
 }
 
 /**
  * @brief Release channels that are unattached/unconnected (i.e., no drivers associated)
  */
 void
 hv_vmbus_release_unattached_channels(void) 
 {
 	hv_vmbus_channel *channel;
 
 	mtx_lock(&hv_vmbus_g_connection.channel_lock);
 
 	while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) {
 	    channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor);
 	    TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor,
 			    channel, list_entry);
 
 	    if (HV_VMBUS_CHAN_ISPRIMARY(channel)) {
 		/* Only primary channel owns the hv_device */
 		hv_vmbus_child_device_unregister(channel->device);
 	    }
 	    hv_vmbus_free_vmbus_channel(channel);
 	}
 	bzero(hv_vmbus_g_connection.channels,
 	    sizeof(hv_vmbus_channel*) * VMBUS_CHAN_MAX);
 	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
 }
 
 /**
  * @brief Select the best outgoing channel
  * 
  * The channel whose vcpu binding is closest to the currect vcpu will
  * be selected.
  * If no multi-channel, always select primary channel
  * 
  * @param primary - primary channel
  */
 struct hv_vmbus_channel *
 vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
 {
 	hv_vmbus_channel *new_channel = NULL;
 	hv_vmbus_channel *outgoing_channel = primary;
 	int old_cpu_distance = 0;
 	int new_cpu_distance = 0;
 	int cur_vcpu = 0;
 	int smp_pro_id = PCPU_GET(cpuid);
 
 	if (TAILQ_EMPTY(&primary->sc_list_anchor)) {
 		return outgoing_channel;
 	}
 
 	if (smp_pro_id >= MAXCPU) {
 		return outgoing_channel;
 	}
 
 	cur_vcpu = VMBUS_PCPU_GET(primary->vmbus_sc, vcpuid, smp_pro_id);
 	
 	TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) {
 		if (new_channel->state != HV_CHANNEL_OPENED_STATE){
 			continue;
 		}
 
 		if (new_channel->target_vcpu == cur_vcpu){
 			return new_channel;
 		}
 
 		old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ?
 		    (outgoing_channel->target_vcpu - cur_vcpu) :
 		    (cur_vcpu - outgoing_channel->target_vcpu));
 
 		new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ?
 		    (new_channel->target_vcpu - cur_vcpu) :
 		    (cur_vcpu - new_channel->target_vcpu));
 
 		if (old_cpu_distance < new_cpu_distance) {
 			continue;
 		}
 
 		outgoing_channel = new_channel;
 	}
 
 	return(outgoing_channel);
 }
 
 struct hv_vmbus_channel **
 vmbus_get_subchan(struct hv_vmbus_channel *pri_chan, int subchan_cnt)
 {
 	struct hv_vmbus_channel **ret, *chan;
 	int i;
 
 	ret = malloc(subchan_cnt * sizeof(struct hv_vmbus_channel *), M_TEMP,
 	    M_WAITOK);
 
 	mtx_lock(&pri_chan->sc_lock);
 
 	while (pri_chan->subchan_cnt < subchan_cnt)
 		mtx_sleep(pri_chan, &pri_chan->sc_lock, 0, "subch", 0);
 
 	i = 0;
 	TAILQ_FOREACH(chan, &pri_chan->sc_list_anchor, sc_list_entry) {
 		/* TODO: refcnt chan */
 		ret[i] = chan;
 
 		++i;
 		if (i == subchan_cnt)
 			break;
 	}
 	KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
 	    pri_chan->subchan_cnt, subchan_cnt));
 
 	mtx_unlock(&pri_chan->sc_lock);
 
 	return ret;
 }
 
 void
 vmbus_rel_subchan(struct hv_vmbus_channel **subchan, int subchan_cnt __unused)
 {
 
 	free(subchan, M_TEMP);
 }
 
 void
 vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
 {
 	vmbus_chanmsg_proc_t msg_proc;
 	uint32_t msg_type;
 
 	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
 	if (msg_type >= HV_CHANNEL_MESSAGE_COUNT) {
 		device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
 		    msg_type);
 		return;
 	}
 
 	msg_proc = vmbus_chanmsg_process[msg_type];
 	if (msg_proc != NULL)
 		msg_proc(sc, msg);
 }