Index: head/sys/dev/hyperv/netvsc/hv_rndis_filter.c
===================================================================
--- head/sys/dev/hyperv/netvsc/hv_rndis_filter.c	(revision 303282)
+++ head/sys/dev/hyperv/netvsc/hv_rndis_filter.c	(revision 303283)
@@ -1,1273 +1,1272 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_var.h>
 #include <net/ethernet.h>
 #include <sys/types.h>
 #include <machine/atomic.h>
 #include <sys/sema.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <dev/hyperv/include/hyperv.h>
-#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
 #include "hv_net_vsc.h"
 #include "hv_rndis.h"
 #include "hv_rndis_filter.h"
 
 struct hv_rf_recvinfo {
 	const ndis_8021q_info		*vlan_info;
 	const rndis_tcp_ip_csum_info	*csum_info;
 	const struct rndis_hash_info	*hash_info;
 	const struct rndis_hash_value	*hash_value;
 };
 
 #define HV_RF_RECVINFO_VLAN	0x1
 #define HV_RF_RECVINFO_CSUM	0x2
 #define HV_RF_RECVINFO_HASHINF	0x4
 #define HV_RF_RECVINFO_HASHVAL	0x8
 #define HV_RF_RECVINFO_ALL		\
 	(HV_RF_RECVINFO_VLAN |		\
 	 HV_RF_RECVINFO_CSUM |		\
 	 HV_RF_RECVINFO_HASHINF |	\
 	 HV_RF_RECVINFO_HASHVAL)
 
 /*
  * Forward declarations
  */
 static int  hv_rf_send_request(rndis_device *device, rndis_request *request,
 			       uint32_t message_type);
 static void hv_rf_receive_response(rndis_device *device, rndis_msg *response);
 static void hv_rf_receive_indicate_status(rndis_device *device,
 					  rndis_msg *response);
 static void hv_rf_receive_data(struct hn_rx_ring *rxr, rndis_msg *message,
 			       netvsc_packet *pkt);
 static int  hv_rf_query_device(rndis_device *device, uint32_t oid,
 			       void *result, uint32_t *result_size);
 static inline int hv_rf_query_device_mac(rndis_device *device);
 static inline int hv_rf_query_device_link_status(rndis_device *device);
 static int  hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter);
 static int  hv_rf_init_device(rndis_device *device);
 static int  hv_rf_open_device(rndis_device *device);
 static int  hv_rf_close_device(rndis_device *device);
 static void hv_rf_on_send_request_completion(struct vmbus_channel *, void *context);
 static void hv_rf_on_send_request_halt_completion(struct vmbus_channel *, void *context);
 int
 hv_rf_send_offload_request(struct hn_softc *sc,
     rndis_offload_params *offloads);
 /*
  * Set the Per-Packet-Info with the specified type
  */
 void *
 hv_set_rppi_data(rndis_msg *rndis_mesg, uint32_t rppi_size,
 	int pkt_type)
 {
 	rndis_packet *rndis_pkt;
 	rndis_per_packet_info *rppi;
 
 	rndis_pkt = &rndis_mesg->msg.packet;
 	rndis_pkt->data_offset += rppi_size;
 
 	rppi = (rndis_per_packet_info *)((char *)rndis_pkt +
 	    rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_length);
 
 	rppi->size = rppi_size;
 	rppi->type = pkt_type;
 	rppi->per_packet_info_offset = sizeof(rndis_per_packet_info);
 
 	rndis_pkt->per_pkt_info_length += rppi_size;
 
 	return (rppi);
 }
 
 /*
  * Get the Per-Packet-Info with the specified type
  * return NULL if not found.
  */
 void *
 hv_get_ppi_data(rndis_packet *rpkt, uint32_t type)
 {
 	rndis_per_packet_info *ppi;
 	int len;
 
 	if (rpkt->per_pkt_info_offset == 0)
 		return (NULL);
 
 	ppi = (rndis_per_packet_info *)((unsigned long)rpkt +
 	    rpkt->per_pkt_info_offset);
 	len = rpkt->per_pkt_info_length;
 
 	while (len > 0) {
 		if (ppi->type == type)
 			return (void *)((unsigned long)ppi +
 			    ppi->per_packet_info_offset);
 
 		len -= ppi->size;
 		ppi = (rndis_per_packet_info *)((unsigned long)ppi + ppi->size);
 	}
 
 	return (NULL);
 }
 
 
 /*
  * Allow module_param to work and override to switch to promiscuous mode.
  */
 static inline rndis_device *
 hv_get_rndis_device(void)
 {
 	rndis_device *device;
 
 	device = malloc(sizeof(rndis_device), M_NETVSC, M_WAITOK | M_ZERO);
 
 	mtx_init(&device->req_lock, "HV-FRL", NULL, MTX_DEF);
 
 	/* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */
 	STAILQ_INIT(&device->myrequest_list);
 
 	device->state = RNDIS_DEV_UNINITIALIZED;
 
 	return (device);
 }
 
 /*
  *
  */
 static inline void
 hv_put_rndis_device(rndis_device *device)
 {
 	mtx_destroy(&device->req_lock);
 	free(device, M_NETVSC);
 }
 
 /*
  *
  */
 static inline rndis_request *
 hv_rndis_request(rndis_device *device, uint32_t message_type,
 		 uint32_t message_length)
 {
 	rndis_request *request;
 	rndis_msg *rndis_mesg;
 	rndis_set_request *set;
 
 	request = malloc(sizeof(rndis_request), M_NETVSC, M_WAITOK | M_ZERO);
 
 	sema_init(&request->wait_sema, 0, "rndis sema");
 	
 	rndis_mesg = &request->request_msg;
 	rndis_mesg->ndis_msg_type = message_type;
 	rndis_mesg->msg_len = message_length;
 
 	/*
 	 * Set the request id. This field is always after the rndis header
 	 * for request/response packet types so we just use the set_request
 	 * as a template.
 	 */
 	set = &rndis_mesg->msg.set_request;
 	set->request_id = atomic_fetchadd_int(&device->new_request_id, 1);
 	/* Increment to get the new value (call above returns old value) */
 	set->request_id += 1;
 
 	/* Add to the request list */
 	mtx_lock(&device->req_lock);
 	STAILQ_INSERT_TAIL(&device->myrequest_list, request, mylist_entry);
 	mtx_unlock(&device->req_lock);
 
 	return (request);
 }
 
 /*
  *
  */
 static inline void
 hv_put_rndis_request(rndis_device *device, rndis_request *request)
 {
 	mtx_lock(&device->req_lock);
 	/* Fixme:  Has O(n) performance */
 	/*
 	 * XXXKYS: Use Doubly linked lists.
 	 */
 	STAILQ_REMOVE(&device->myrequest_list, request, rndis_request_,
 	    mylist_entry);
 	mtx_unlock(&device->req_lock);
 
 	sema_destroy(&request->wait_sema);
 	free(request, M_NETVSC);
 }
 
 /*
  *
  */
 static int
 hv_rf_send_request(rndis_device *device, rndis_request *request,
     uint32_t message_type)
 {
 	int ret;
 	netvsc_packet *packet;
 	netvsc_dev      *net_dev = device->net_dev;
 	int send_buf_section_idx;
 
 	/* Set up the packet to send it */
 	packet = &request->pkt;
 	
 	packet->is_data_pkt = FALSE;
 	packet->tot_data_buf_len = request->request_msg.msg_len;
 	packet->gpa_cnt = 1;
 
 	packet->gpa[0].gpa_page =
 	    hv_get_phys_addr(&request->request_msg) >> PAGE_SHIFT;
 	packet->gpa[0].gpa_len = request->request_msg.msg_len;
 	packet->gpa[0].gpa_ofs =
 	    (unsigned long)&request->request_msg & (PAGE_SIZE - 1);
 
 	if (packet->gpa[0].gpa_ofs + packet->gpa[0].gpa_len > PAGE_SIZE) {
 		packet->gpa_cnt = 2;
 		packet->gpa[0].gpa_len = PAGE_SIZE - packet->gpa[0].gpa_ofs;
 		packet->gpa[1].gpa_page =
 		        hv_get_phys_addr((char*)&request->request_msg +
                 		packet->gpa[0].gpa_len) >> PAGE_SHIFT;
 		packet->gpa[1].gpa_ofs = 0;
 		packet->gpa[1].gpa_len = request->request_msg.msg_len -
 		    packet->gpa[0].gpa_len;
 	}
 
 	packet->compl.send.send_completion_context = request; /* packet */
 	if (message_type != REMOTE_NDIS_HALT_MSG) {
 		packet->compl.send.on_send_completion =
 		    hv_rf_on_send_request_completion;
 	} else {
 		packet->compl.send.on_send_completion =
 		    hv_rf_on_send_request_halt_completion;
 	}
 	packet->compl.send.send_completion_tid = (unsigned long)device;
 	if (packet->tot_data_buf_len < net_dev->send_section_size) {
 		send_buf_section_idx = hv_nv_get_next_send_section(net_dev);
 		if (send_buf_section_idx !=
 			NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
 			char *dest = ((char *)net_dev->send_buf +
 				send_buf_section_idx * net_dev->send_section_size);
 
 			memcpy(dest, &request->request_msg, request->request_msg.msg_len);
 			packet->send_buf_section_idx = send_buf_section_idx;
 			packet->send_buf_section_size = packet->tot_data_buf_len;
 			packet->gpa_cnt = 0;
 			goto sendit;
 		}
 		/* Failed to allocate chimney send buffer; move on */
 	}
 	packet->send_buf_section_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
 	packet->send_buf_section_size = 0;
 
 sendit:
 	ret = hv_nv_on_send(device->net_dev->sc->hn_prichan, packet);
 
 	return (ret);
 }
 
 /*
  * RNDIS filter receive response
  */
 static void 
 hv_rf_receive_response(rndis_device *device, rndis_msg *response)
 {
 	rndis_request *request = NULL;
 	rndis_request *next_request;
 	boolean_t found = FALSE;
 
 	mtx_lock(&device->req_lock);
 	request = STAILQ_FIRST(&device->myrequest_list);
 	while (request != NULL) {
 		/*
 		 * All request/response message contains request_id as the
 		 * first field
 		 */
 		if (request->request_msg.msg.init_request.request_id ==
 				      response->msg.init_complete.request_id) {
 			found = TRUE;
 			break;
 		}
 		next_request = STAILQ_NEXT(request, mylist_entry);
 		request = next_request;
 	}
 	mtx_unlock(&device->req_lock);
 
 	if (found) {
 		if (response->msg_len <= sizeof(rndis_msg)) {
 			memcpy(&request->response_msg, response,
 			    response->msg_len);
 		} else {
 			if (response->ndis_msg_type == REMOTE_NDIS_RESET_CMPLT) {
 				/* Does not have a request id field */
 				request->response_msg.msg.reset_complete.status =
 				    STATUS_BUFFER_OVERFLOW;
 			} else {
 				request->response_msg.msg.init_complete.status =
 				    STATUS_BUFFER_OVERFLOW;
 			}
 		}
 
 		sema_post(&request->wait_sema);
 	}
 }
 
 int
 hv_rf_send_offload_request(struct hn_softc *sc,
     rndis_offload_params *offloads)
 {
 	rndis_request *request;
 	rndis_set_request *set;
 	rndis_offload_params *offload_req;
 	rndis_set_complete *set_complete;	
 	rndis_device *rndis_dev;
 	device_t dev = sc->hn_dev;
 	netvsc_dev *net_dev = sc->net_dev;
 	uint32_t vsp_version = net_dev->nvsp_version;
 	uint32_t extlen = sizeof(rndis_offload_params);
 	int ret;
 
 	if (vsp_version <= NVSP_PROTOCOL_VERSION_4) {
 		extlen = VERSION_4_OFFLOAD_SIZE;
 		/* On NVSP_PROTOCOL_VERSION_4 and below, we do not support
 		 * UDP checksum offload.
 		 */
 		offloads->udp_ipv4_csum = 0;
 		offloads->udp_ipv6_csum = 0;
 	}
 
 	rndis_dev = net_dev->extension;
 
 	request = hv_rndis_request(rndis_dev, REMOTE_NDIS_SET_MSG,
 	    RNDIS_MESSAGE_SIZE(rndis_set_request) + extlen);
 	if (!request)
 		return (ENOMEM);
 
 	set = &request->request_msg.msg.set_request;
 	set->oid = RNDIS_OID_TCP_OFFLOAD_PARAMETERS;
 	set->info_buffer_length = extlen;
 	set->info_buffer_offset = sizeof(rndis_set_request);
 	set->device_vc_handle = 0;
 
 	offload_req = (rndis_offload_params *)((unsigned long)set +
 	    set->info_buffer_offset);
 	*offload_req = *offloads;
 	offload_req->header.type = RNDIS_OBJECT_TYPE_DEFAULT;
 	offload_req->header.revision = RNDIS_OFFLOAD_PARAMETERS_REVISION_3;
 	offload_req->header.size = extlen;
 
 	ret = hv_rf_send_request(rndis_dev, request, REMOTE_NDIS_SET_MSG);
 	if (ret != 0) {
 		device_printf(dev, "hv send offload request failed, ret=%d!\n",
 		    ret);
 		goto cleanup;
 	}
 
 	ret = sema_timedwait(&request->wait_sema, 5 * hz);
 	if (ret != 0) {
 		device_printf(dev, "hv send offload request timeout\n");
 		goto cleanup;
 	}
 
 	set_complete = &request->response_msg.msg.set_complete;
 	if (set_complete->status == RNDIS_STATUS_SUCCESS) {
 		device_printf(dev, "hv send offload request succeeded\n");
 		ret = 0;
 	} else {
 		if (set_complete->status == STATUS_NOT_SUPPORTED) {
 			device_printf(dev, "HV Not support offload\n");
 			ret = 0;
 		} else {
 			ret = set_complete->status;
 		}
 	}
 
 cleanup:
 	hv_put_rndis_request(rndis_dev, request);
 
 	return (ret);
 }
 
 /*
  * RNDIS filter receive indicate status
  */
 static void 
 hv_rf_receive_indicate_status(rndis_device *device, rndis_msg *response)
 {
 	rndis_indicate_status *indicate = &response->msg.indicate_status;
 		
 	switch(indicate->status) {
 	case RNDIS_STATUS_MEDIA_CONNECT:
 		netvsc_linkstatus_callback(device->net_dev->sc, 1);
 		break;
 	case RNDIS_STATUS_MEDIA_DISCONNECT:
 		netvsc_linkstatus_callback(device->net_dev->sc, 0);
 		break;
 	default:
 		/* TODO: */
 		device_printf(device->net_dev->sc->hn_dev,
 		    "unknown status %d received\n", indicate->status);
 		break;
 	}
 }
 
 static int
 hv_rf_find_recvinfo(const rndis_packet *rpkt, struct hv_rf_recvinfo *info)
 {
 	const rndis_per_packet_info *ppi;
 	uint32_t mask, len;
 
 	info->vlan_info = NULL;
 	info->csum_info = NULL;
 	info->hash_info = NULL;
 	info->hash_value = NULL;
 
 	if (rpkt->per_pkt_info_offset == 0)
 		return 0;
 
 	ppi = (const rndis_per_packet_info *)
 	    ((const uint8_t *)rpkt + rpkt->per_pkt_info_offset);
 	len = rpkt->per_pkt_info_length;
 	mask = 0;
 
 	while (len != 0) {
 		const void *ppi_dptr;
 		uint32_t ppi_dlen;
 
 		if (__predict_false(ppi->size < ppi->per_packet_info_offset))
 			return EINVAL;
 		ppi_dlen = ppi->size - ppi->per_packet_info_offset;
 		ppi_dptr = (const uint8_t *)ppi + ppi->per_packet_info_offset;
 
 		switch (ppi->type) {
 		case ieee_8021q_info:
 			if (__predict_false(ppi_dlen < sizeof(ndis_8021q_info)))
 				return EINVAL;
 			info->vlan_info = ppi_dptr;
 			mask |= HV_RF_RECVINFO_VLAN;
 			break;
 
 		case tcpip_chksum_info:
 			if (__predict_false(ppi_dlen <
 			    sizeof(rndis_tcp_ip_csum_info)))
 				return EINVAL;
 			info->csum_info = ppi_dptr;
 			mask |= HV_RF_RECVINFO_CSUM;
 			break;
 
 		case nbl_hash_value:
 			if (__predict_false(ppi_dlen <
 			    sizeof(struct rndis_hash_value)))
 				return EINVAL;
 			info->hash_value = ppi_dptr;
 			mask |= HV_RF_RECVINFO_HASHVAL;
 			break;
 
 		case nbl_hash_info:
 			if (__predict_false(ppi_dlen <
 			    sizeof(struct rndis_hash_info)))
 				return EINVAL;
 			info->hash_info = ppi_dptr;
 			mask |= HV_RF_RECVINFO_HASHINF;
 			break;
 
 		default:
 			goto skip;
 		}
 
 		if (mask == HV_RF_RECVINFO_ALL) {
 			/* All found; done */
 			break;
 		}
 skip:
 		if (__predict_false(len < ppi->size))
 			return EINVAL;
 		len -= ppi->size;
 		ppi = (const rndis_per_packet_info *)
 		    ((const uint8_t *)ppi + ppi->size);
 	}
 	return 0;
 }
 
 /*
  * RNDIS filter receive data
  */
 static void
 hv_rf_receive_data(struct hn_rx_ring *rxr, rndis_msg *message,
     netvsc_packet *pkt)
 {
 	rndis_packet *rndis_pkt;
 	uint32_t data_offset;
 	struct hv_rf_recvinfo info;
 
 	rndis_pkt = &message->msg.packet;
 
 	/*
 	 * Fixme:  Handle multiple rndis pkt msgs that may be enclosed in this
 	 * netvsc packet (ie tot_data_buf_len != message_length)
 	 */
 
 	/* Remove rndis header, then pass data packet up the stack */
 	data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
 
 	pkt->tot_data_buf_len -= data_offset;
 	if (pkt->tot_data_buf_len < rndis_pkt->data_length) {
 		pkt->status = nvsp_status_failure;
 		if_printf(rxr->hn_ifp,
 		    "total length %u is less than data length %u\n",
 		    pkt->tot_data_buf_len, rndis_pkt->data_length);
 		return;
 	}
 
 	pkt->tot_data_buf_len = rndis_pkt->data_length;
 	pkt->data = (void *)((unsigned long)pkt->data + data_offset);
 
 	if (hv_rf_find_recvinfo(rndis_pkt, &info)) {
 		pkt->status = nvsp_status_failure;
 		if_printf(rxr->hn_ifp, "recvinfo parsing failed\n");
 		return;
 	}
 
 	if (info.vlan_info != NULL)
 		pkt->vlan_tci = info.vlan_info->u1.s1.vlan_id;
 	else
 		pkt->vlan_tci = 0;
 
 	netvsc_recv(rxr, pkt, info.csum_info, info.hash_info, info.hash_value);
 }
 
 /*
  * RNDIS filter on receive
  */
 int
 hv_rf_on_receive(netvsc_dev *net_dev,
     struct hn_rx_ring *rxr, netvsc_packet *pkt)
 {
 	rndis_device *rndis_dev;
 	rndis_msg *rndis_hdr;
 
 	/* Make sure the rndis device state is initialized */
 	if (net_dev->extension == NULL) {
 		pkt->status = nvsp_status_failure;
 		return (ENODEV);
 	}
 
 	rndis_dev = (rndis_device *)net_dev->extension;
 	if (rndis_dev->state == RNDIS_DEV_UNINITIALIZED) {
 		pkt->status = nvsp_status_failure;
 		return (EINVAL);
 	}
 
 	rndis_hdr = pkt->data;
 
 	switch (rndis_hdr->ndis_msg_type) {
 
 	/* data message */
 	case REMOTE_NDIS_PACKET_MSG:
 		hv_rf_receive_data(rxr, rndis_hdr, pkt);
 		break;
 	/* completion messages */
 	case REMOTE_NDIS_INITIALIZE_CMPLT:
 	case REMOTE_NDIS_QUERY_CMPLT:
 	case REMOTE_NDIS_SET_CMPLT:
 	case REMOTE_NDIS_RESET_CMPLT:
 	case REMOTE_NDIS_KEEPALIVE_CMPLT:
 		hv_rf_receive_response(rndis_dev, rndis_hdr);
 		break;
 	/* notification message */
 	case REMOTE_NDIS_INDICATE_STATUS_MSG:
 		hv_rf_receive_indicate_status(rndis_dev, rndis_hdr);
 		break;
 	default:
 		printf("hv_rf_on_receive():  Unknown msg_type 0x%x\n",
 			rndis_hdr->ndis_msg_type);
 		break;
 	}
 
 	return (0);
 }
 
 /*
  * RNDIS filter query device
  */
 static int
 hv_rf_query_device(rndis_device *device, uint32_t oid, void *result,
 		   uint32_t *result_size)
 {
 	rndis_request *request;
 	uint32_t in_result_size = *result_size;
 	rndis_query_request *query;
 	rndis_query_complete *query_complete;
 	int ret = 0;
 
 	*result_size = 0;
 	request = hv_rndis_request(device, REMOTE_NDIS_QUERY_MSG,
 	    RNDIS_MESSAGE_SIZE(rndis_query_request));
 	if (request == NULL) {
 		ret = -1;
 		goto cleanup;
 	}
 
 	/* Set up the rndis query */
 	query = &request->request_msg.msg.query_request;
 	query->oid = oid;
 	query->info_buffer_offset = sizeof(rndis_query_request); 
 	query->info_buffer_length = 0;
 	query->device_vc_handle = 0;
 
 	if (oid == RNDIS_OID_GEN_RSS_CAPABILITIES) {
 		struct rndis_recv_scale_cap *cap;
 
 		request->request_msg.msg_len += 
 			sizeof(struct rndis_recv_scale_cap);
 		query->info_buffer_length = sizeof(struct rndis_recv_scale_cap);
 		cap = (struct rndis_recv_scale_cap *)((unsigned long)query + 
 						query->info_buffer_offset);
 		cap->hdr.type = RNDIS_OBJECT_TYPE_RSS_CAPABILITIES;
 		cap->hdr.rev = RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2;
 		cap->hdr.size = sizeof(struct rndis_recv_scale_cap);
 	}
 
 	ret = hv_rf_send_request(device, request, REMOTE_NDIS_QUERY_MSG);
 	if (ret != 0) {
 		/* Fixme:  printf added */
 		printf("RNDISFILTER request failed to Send!\n");
 		goto cleanup;
 	}
 
 	sema_wait(&request->wait_sema);
 
 	/* Copy the response back */
 	query_complete = &request->response_msg.msg.query_complete;
 	
 	if (query_complete->info_buffer_length > in_result_size) {
 		ret = EINVAL;
 		goto cleanup;
 	}
 
 	memcpy(result, (void *)((unsigned long)query_complete +
 	    query_complete->info_buffer_offset),
 	    query_complete->info_buffer_length);
 
 	*result_size = query_complete->info_buffer_length;
 
 cleanup:
 	if (request != NULL)
 		hv_put_rndis_request(device, request);
 
 	return (ret);
 }
 
 /*
  * RNDIS filter query device MAC address
  */
 static inline int
 hv_rf_query_device_mac(rndis_device *device)
 {
 	uint32_t size = ETHER_ADDR_LEN;
 
 	return (hv_rf_query_device(device,
 	    RNDIS_OID_802_3_PERMANENT_ADDRESS, device->hw_mac_addr, &size));
 }
 
 /*
  * RNDIS filter query device link status
  */
 static inline int
 hv_rf_query_device_link_status(rndis_device *device)
 {
 	uint32_t size = sizeof(uint32_t);
 
 	return (hv_rf_query_device(device,
 	    RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, &device->link_status, &size));
 }
 
 static uint8_t netvsc_hash_key[HASH_KEYLEN] = {
 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
 };
 
 /*
  * RNDIS set vRSS parameters
  */
 static int
 hv_rf_set_rss_param(rndis_device *device, int num_queue)
 {
 	rndis_request *request;
 	rndis_set_request *set;
 	rndis_set_complete *set_complete;
 	rndis_recv_scale_param *rssp;
 	uint32_t extlen = sizeof(rndis_recv_scale_param) +
 	    (4 * ITAB_NUM) + HASH_KEYLEN;
 	uint32_t *itab, status;
 	uint8_t *keyp;
 	int i, ret;
 
 
 	request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG,
 	    RNDIS_MESSAGE_SIZE(rndis_set_request) + extlen);
 	if (request == NULL) {
 		if (bootverbose)
 			printf("Netvsc: No memory to set vRSS parameters.\n");
 		ret = -1;
 		goto cleanup;
 	}
 
 	set = &request->request_msg.msg.set_request;
 	set->oid = RNDIS_OID_GEN_RSS_PARAMETERS;
 	set->info_buffer_length = extlen;
 	set->info_buffer_offset = sizeof(rndis_set_request);
 	set->device_vc_handle = 0;
 
 	/* Fill out the rssp parameter structure */
 	rssp = (rndis_recv_scale_param *)(set + 1);
 	rssp->hdr.type = RNDIS_OBJECT_TYPE_RSS_PARAMETERS;
 	rssp->hdr.rev = RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2;
 	rssp->hdr.size = sizeof(rndis_recv_scale_param);
 	rssp->flag = 0;
 	rssp->hashinfo = RNDIS_HASH_FUNC_TOEPLITZ | RNDIS_HASH_IPV4 |
 	    RNDIS_HASH_TCP_IPV4 | RNDIS_HASH_IPV6 | RNDIS_HASH_TCP_IPV6;
 	rssp->indirect_tabsize = 4 * ITAB_NUM;
 	rssp->indirect_taboffset = sizeof(rndis_recv_scale_param);
 	rssp->hashkey_size = HASH_KEYLEN;
 	rssp->hashkey_offset = rssp->indirect_taboffset +
 	    rssp->indirect_tabsize;
 
 	/* Set indirection table entries */
 	itab = (uint32_t *)(rssp + 1);
 	for (i = 0; i < ITAB_NUM; i++)
 		itab[i] = i % num_queue;
 
 	/* Set hash key values */
 	keyp = (uint8_t *)((unsigned long)rssp + rssp->hashkey_offset);
 	for (i = 0; i < HASH_KEYLEN; i++)
 		keyp[i] = netvsc_hash_key[i];
 
 	ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG);
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	/*
 	 * Wait for the response from the host.  Another thread will signal
 	 * us when the response has arrived.  In the failure case,
 	 * sema_timedwait() returns a non-zero status after waiting 5 seconds.
 	 */
 	ret = sema_timedwait(&request->wait_sema, 5 * hz);
 	if (ret == 0) {
 		/* Response received, check status */
 		set_complete = &request->response_msg.msg.set_complete;
 		status = set_complete->status;
 		if (status != RNDIS_STATUS_SUCCESS) {
 			/* Bad response status, return error */
 			if (bootverbose)
 				printf("Netvsc: Failed to set vRSS "
 				    "parameters.\n");
 			ret = -2;
 		} else {
 			if (bootverbose)
 				printf("Netvsc: Successfully set vRSS "
 				    "parameters.\n");
 		}
 	} else {
 		/*
 		 * We cannot deallocate the request since we may still
 		 * receive a send completion for it.
 		 */
 		printf("Netvsc: vRSS set timeout, id = %u, ret = %d\n",
 		    request->request_msg.msg.init_request.request_id, ret);
 		goto exit;
 	}
 
 cleanup:
 	if (request != NULL) {
 		hv_put_rndis_request(device, request);
 	}
 exit:
 	return (ret);
 }
 
 /*
  * RNDIS filter set packet filter
  * Sends an rndis request with the new filter, then waits for a response
  * from the host.
  * Returns zero on success, non-zero on failure.
  */
 static int
 hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter)
 {
 	rndis_request *request;
 	rndis_set_request *set;
 	rndis_set_complete *set_complete;
 	uint32_t status;
 	int ret;
 
 	request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG,
 	    RNDIS_MESSAGE_SIZE(rndis_set_request) + sizeof(uint32_t));
 	if (request == NULL) {
 		ret = -1;
 		goto cleanup;
 	}
 
 	/* Set up the rndis set */
 	set = &request->request_msg.msg.set_request;
 	set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER;
 	set->info_buffer_length = sizeof(uint32_t);
 	set->info_buffer_offset = sizeof(rndis_set_request); 
 
 	memcpy((void *)((unsigned long)set + sizeof(rndis_set_request)),
 	    &new_filter, sizeof(uint32_t));
 
 	ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG);
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	/*
 	 * Wait for the response from the host.  Another thread will signal
 	 * us when the response has arrived.  In the failure case,
 	 * sema_timedwait() returns a non-zero status after waiting 5 seconds.
 	 */
 	ret = sema_timedwait(&request->wait_sema, 5 * hz);
 	if (ret == 0) {
 		/* Response received, check status */
 		set_complete = &request->response_msg.msg.set_complete;
 		status = set_complete->status;
 		if (status != RNDIS_STATUS_SUCCESS) {
 			/* Bad response status, return error */
 			ret = -2;
 		}
 	} else {
 		/*
 		 * We cannot deallocate the request since we may still
 		 * receive a send completion for it.
 		 */
 		goto exit;
 	}
 
 cleanup:
 	if (request != NULL) {
 		hv_put_rndis_request(device, request);
 	}
 exit:
 	return (ret);
 }
 
 /*
  * RNDIS filter init device
  */
 static int
 hv_rf_init_device(rndis_device *device)
 {
 	rndis_request *request;
 	rndis_initialize_request *init;
 	rndis_initialize_complete *init_complete;
 	uint32_t status;
 	int ret;
 
 	request = hv_rndis_request(device, REMOTE_NDIS_INITIALIZE_MSG,
 	    RNDIS_MESSAGE_SIZE(rndis_initialize_request));
 	if (!request) {
 		ret = -1;
 		goto cleanup;
 	}
 
 	/* Set up the rndis set */
 	init = &request->request_msg.msg.init_request;
 	init->major_version = RNDIS_MAJOR_VERSION;
 	init->minor_version = RNDIS_MINOR_VERSION;
 	/*
 	 * Per the RNDIS document, this should be set to the max MTU
 	 * plus the header size.  However, 2048 works fine, so leaving
 	 * it as is.
 	 */
 	init->max_xfer_size = 2048;
 	
 	device->state = RNDIS_DEV_INITIALIZING;
 
 	ret = hv_rf_send_request(device, request, REMOTE_NDIS_INITIALIZE_MSG);
 	if (ret != 0) {
 		device->state = RNDIS_DEV_UNINITIALIZED;
 		goto cleanup;
 	}
 
 	sema_wait(&request->wait_sema);
 
 	init_complete = &request->response_msg.msg.init_complete;
 	status = init_complete->status;
 	if (status == RNDIS_STATUS_SUCCESS) {
 		device->state = RNDIS_DEV_INITIALIZED;
 		ret = 0;
 	} else {
 		device->state = RNDIS_DEV_UNINITIALIZED; 
 		ret = -1;
 	}
 
 cleanup:
 	if (request) {
 		hv_put_rndis_request(device, request);
 	}
 
 	return (ret);
 }
 
 #define HALT_COMPLETION_WAIT_COUNT      25
 
 /*
  * RNDIS filter halt device
  */
 static int
 hv_rf_halt_device(rndis_device *device)
 {
 	rndis_request *request;
 	rndis_halt_request *halt;
 	int i, ret;
 
 	/* Attempt to do a rndis device halt */
 	request = hv_rndis_request(device, REMOTE_NDIS_HALT_MSG,
 	    RNDIS_MESSAGE_SIZE(rndis_halt_request));
 	if (request == NULL) {
 		return (-1);
 	}
 
 	/* initialize "poor man's semaphore" */
 	request->halt_complete_flag = 0;
 
 	/* Set up the rndis set */
 	halt = &request->request_msg.msg.halt_request;
 	halt->request_id = atomic_fetchadd_int(&device->new_request_id, 1);
 	/* Increment to get the new value (call above returns old value) */
 	halt->request_id += 1;
 	
 	ret = hv_rf_send_request(device, request, REMOTE_NDIS_HALT_MSG);
 	if (ret != 0) {
 		return (-1);
 	}
 
 	/*
 	 * Wait for halt response from halt callback.  We must wait for
 	 * the transaction response before freeing the request and other
 	 * resources.
 	 */
 	for (i=HALT_COMPLETION_WAIT_COUNT; i > 0; i--) {
 		if (request->halt_complete_flag != 0) {
 			break;
 		}
 		DELAY(400);
 	}
 	if (i == 0) {
 		return (-1);
 	}
 
 	device->state = RNDIS_DEV_UNINITIALIZED;
 
 	hv_put_rndis_request(device, request);
 
 	return (0);
 }
 
 /*
  * RNDIS filter open device
  */
 static int
 hv_rf_open_device(rndis_device *device)
 {
 	int ret;
 
 	if (device->state != RNDIS_DEV_INITIALIZED) {
 		return (0);
 	}
 
 	if (hv_promisc_mode != 1) {
 		ret = hv_rf_set_packet_filter(device, 
 		    NDIS_PACKET_TYPE_BROADCAST     |
 		    NDIS_PACKET_TYPE_ALL_MULTICAST |
 		    NDIS_PACKET_TYPE_DIRECTED);
 	} else {
 		ret = hv_rf_set_packet_filter(device, 
 		    NDIS_PACKET_TYPE_PROMISCUOUS);
 	}
 
 	if (ret == 0) {
 		device->state = RNDIS_DEV_DATAINITIALIZED;
 	}
 
 	return (ret);
 }
 
 /*
  * RNDIS filter close device
  */
 static int
 hv_rf_close_device(rndis_device *device)
 {
 	int ret;
 
 	if (device->state != RNDIS_DEV_DATAINITIALIZED) {
 		return (0);
 	}
 
 	ret = hv_rf_set_packet_filter(device, 0);
 	if (ret == 0) {
 		device->state = RNDIS_DEV_INITIALIZED;
 	}
 
 	return (ret);
 }
 
 /*
  * RNDIS filter on device add
  */
 int
 hv_rf_on_device_add(struct hn_softc *sc, void *additl_info,
     int nchan, struct hn_rx_ring *rxr)
 {
 	int ret;
 	netvsc_dev *net_dev;
 	rndis_device *rndis_dev;
 	nvsp_msg *init_pkt;
 	rndis_offload_params offloads;
 	struct rndis_recv_scale_cap rsscaps;
 	uint32_t rsscaps_size = sizeof(struct rndis_recv_scale_cap);
 	netvsc_device_info *dev_info = (netvsc_device_info *)additl_info;
 	device_t dev = sc->hn_dev;
 
 	rndis_dev = hv_get_rndis_device();
 	if (rndis_dev == NULL) {
 		return (ENOMEM);
 	}
 
 	/*
 	 * Let the inner driver handle this first to create the netvsc channel
 	 * NOTE! Once the channel is created, we may get a receive callback 
 	 * (hv_rf_on_receive()) before this call is completed.
 	 * Note:  Earlier code used a function pointer here.
 	 */
 	net_dev = hv_nv_on_device_add(sc, additl_info, rxr);
 	if (!net_dev) {
 		hv_put_rndis_device(rndis_dev);
 
 		return (ENOMEM);
 	}
 
 	/*
 	 * Initialize the rndis device
 	 */
 
 	net_dev->extension = rndis_dev;
 	rndis_dev->net_dev = net_dev;
 
 	/* Send the rndis initialization message */
 	ret = hv_rf_init_device(rndis_dev);
 	if (ret != 0) {
 		/*
 		 * TODO: If rndis init failed, we will need to shut down
 		 * the channel
 		 */
 	}
 
 	/* Get the mac address */
 	ret = hv_rf_query_device_mac(rndis_dev);
 	if (ret != 0) {
 		/* TODO: shut down rndis device and the channel */
 	}
 
 	/* config csum offload and send request to host */
 	memset(&offloads, 0, sizeof(offloads));
 	offloads.ipv4_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
 	offloads.tcp_ipv4_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
 	offloads.udp_ipv4_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
 	offloads.tcp_ipv6_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
 	offloads.udp_ipv6_csum = RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
 	offloads.lso_v2_ipv4 = RNDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED;
 
 	ret = hv_rf_send_offload_request(sc, &offloads);
 	if (ret != 0) {
 		/* TODO: shut down rndis device and the channel */
 		device_printf(dev,
 		    "hv_rf_send_offload_request failed, ret=%d\n", ret);
 	}
 	
 	memcpy(dev_info->mac_addr, rndis_dev->hw_mac_addr, ETHER_ADDR_LEN);
 
 	hv_rf_query_device_link_status(rndis_dev);
 	
 	dev_info->link_state = rndis_dev->link_status;
 
 	net_dev->num_channel = 1;
 	if (net_dev->nvsp_version < NVSP_PROTOCOL_VERSION_5 || nchan == 1)
 		return (0);
 
 	memset(&rsscaps, 0, rsscaps_size);
 	ret = hv_rf_query_device(rndis_dev,
 			RNDIS_OID_GEN_RSS_CAPABILITIES,
 			&rsscaps, &rsscaps_size);
 	if ((ret != 0) || (rsscaps.num_recv_que < 2)) {
 		device_printf(dev, "hv_rf_query_device failed or "
 			"rsscaps.num_recv_que < 2 \n");
 		goto out;
 	}
 	device_printf(dev, "channel, offered %u, requested %d\n",
 	    rsscaps.num_recv_que, nchan);
 	if (nchan > rsscaps.num_recv_que)
 		nchan = rsscaps.num_recv_que;
 	net_dev->num_channel = nchan;
 
 	if (net_dev->num_channel == 1) {
 		device_printf(dev, "net_dev->num_channel == 1 under VRSS\n");
 		goto out;
 	}
 	
 	/* request host to create sub channels */
 	init_pkt = &net_dev->channel_init_packet;
 	memset(init_pkt, 0, sizeof(nvsp_msg));
 
 	init_pkt->hdr.msg_type = nvsp_msg5_type_subchannel;
 	init_pkt->msgs.vers_5_msgs.subchannel_request.op =
 	    NVSP_SUBCHANNE_ALLOCATE;
 	init_pkt->msgs.vers_5_msgs.subchannel_request.num_subchannels =
 	    net_dev->num_channel - 1;
 
 	ret = vmbus_chan_send(sc->hn_prichan,
 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
 	    init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt);
 	if (ret != 0) {
 		device_printf(dev, "Fail to allocate subchannel\n");
 		goto out;
 	}
 
 	sema_wait(&net_dev->channel_init_sema);
 
 	if (init_pkt->msgs.vers_5_msgs.subchn_complete.status !=
 	    nvsp_status_success) {
 		ret = ENODEV;
 		device_printf(dev, "sub channel complete error\n");
 		goto out;
 	}
 
 	net_dev->num_channel = 1 +
 	    init_pkt->msgs.vers_5_msgs.subchn_complete.num_subchannels;
 
 	ret = hv_rf_set_rss_param(rndis_dev, net_dev->num_channel);
 
 out:
 	if (ret)
 		net_dev->num_channel = 1;
 
 	return (ret);
 }
 
 /*
  * RNDIS filter on device remove
  */
 int
 hv_rf_on_device_remove(struct hn_softc *sc, boolean_t destroy_channel)
 {
 	netvsc_dev *net_dev = sc->net_dev;
 	rndis_device *rndis_dev = (rndis_device *)net_dev->extension;
 	int ret;
 
 	/* Halt and release the rndis device */
 	ret = hv_rf_halt_device(rndis_dev);
 
 	hv_put_rndis_device(rndis_dev);
 	net_dev->extension = NULL;
 
 	/* Pass control to inner driver to remove the device */
 	ret |= hv_nv_on_device_remove(sc, destroy_channel);
 
 	return (ret);
 }
 
 /*
  * RNDIS filter on open
  */
 int
 hv_rf_on_open(struct hn_softc *sc)
 {
 	netvsc_dev *net_dev = sc->net_dev;
 
 	return (hv_rf_open_device((rndis_device *)net_dev->extension));
 }
 
 /*
  * RNDIS filter on close
  */
 int 
 hv_rf_on_close(struct hn_softc *sc)
 {
 	netvsc_dev *net_dev = sc->net_dev;
 
 	return (hv_rf_close_device((rndis_device *)net_dev->extension));
 }
 
 /*
  * RNDIS filter on send request completion callback
  */
 static void 
 hv_rf_on_send_request_completion(struct vmbus_channel *chan __unused,
     void *context __unused)
 {
 }
 
 /*
  * RNDIS filter on send request (halt only) completion callback
  */
 static void 
 hv_rf_on_send_request_halt_completion(struct vmbus_channel *chan __unused,
     void *context)
 {
 	rndis_request *request = context;
 
 	/*
 	 * Notify hv_rf_halt_device() about halt completion.
 	 * The halt code must wait for completion before freeing
 	 * the transaction resources.
 	 */
 	request->halt_complete_flag = 1;
 }
 
 void
 hv_rf_channel_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
 {
 
 	netvsc_channel_rollup(rxr, txr);
 }
Index: head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h	(revision 303282)
+++ head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h	(nonexistent)
@@ -1,85 +0,0 @@
-/*-
- * Copyright (c) 2009-2012,2016 Microsoft Corp.
- * Copyright (c) 2012 NetApp Inc.
- * Copyright (c) 2012 Citrix Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice unmodified, this list of conditions, and the following
- *    disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef __HYPERV_PRIV_H__
-#define __HYPERV_PRIV_H__
-
-#include <sys/param.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/sema.h>
-#include <sys/_iovec.h>
-
-#include <dev/hyperv/vmbus/vmbus_chanvar.h>
-
-struct vmbus_softc;
-
-/*
- * Private, VM Bus functions
- */
-struct sysctl_ctx_list;
-struct sysctl_oid;
-
-void	vmbus_br_sysctl_create(struct sysctl_ctx_list *ctx,
-	    struct sysctl_oid *br_tree, hv_vmbus_ring_buffer_info *br,
-	    const char *name);
-
-int			hv_vmbus_ring_buffer_init(
-				hv_vmbus_ring_buffer_info	*ring_info,
-				void				*buffer,
-				uint32_t			buffer_len);
-
-void			hv_ring_buffer_cleanup(
-				hv_vmbus_ring_buffer_info	*ring_info);
-
-int			hv_ring_buffer_write(
-				hv_vmbus_ring_buffer_info	*ring_info,
-				const struct iovec		iov[],
-				uint32_t			iovlen,
-				boolean_t			*need_sig);
-
-int			hv_ring_buffer_peek(
-				hv_vmbus_ring_buffer_info	*ring_info,
-				void				*buffer,
-				uint32_t			buffer_len);
-
-int			hv_ring_buffer_read(
-				hv_vmbus_ring_buffer_info	*ring_info,
-				void				*buffer,
-				uint32_t			buffer_len,
-				uint32_t			offset);
-
-void			hv_ring_buffer_read_begin(
-				hv_vmbus_ring_buffer_info	*ring_info);
-
-uint32_t		hv_ring_buffer_read_end(
-				hv_vmbus_ring_buffer_info	*ring_info);
-
-#endif  /* __HYPERV_PRIV_H__ */

Property changes on: head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/dev/hyperv/vmbus/hv_ring_buffer.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_ring_buffer.c	(revision 303282)
+++ head/sys/dev/hyperv/vmbus/hv_ring_buffer.c	(revision 303283)
@@ -1,524 +1,524 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 
-#include "hv_vmbus_priv.h"
 #include <dev/hyperv/vmbus/vmbus_reg.h>
+#include <dev/hyperv/vmbus/vmbus_brvar.h>
 
 /* Amount of space to write to */
 #define	HV_BYTES_AVAIL_TO_WRITE(r, w, z)	\
 	((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w))
 
 static uint32_t	copy_to_ring_buffer(hv_vmbus_ring_buffer_info *ring_info,
 		    uint32_t start_write_offset, const uint8_t *src,
 		    uint32_t src_len);
 static uint32_t copy_from_ring_buffer(hv_vmbus_ring_buffer_info *ring_info,
 		    char *dest, uint32_t dest_len, uint32_t start_read_offset);
 
 static int
 vmbus_br_sysctl_state(SYSCTL_HANDLER_ARGS)
 {
 	const hv_vmbus_ring_buffer_info *br = arg1;
 	uint32_t rindex, windex, intr_mask, ravail, wavail;
 	char state[256];
 
 	rindex = br->ring_buffer->br_rindex;
 	windex = br->ring_buffer->br_windex;
 	intr_mask = br->ring_buffer->br_imask;
 	wavail = HV_BYTES_AVAIL_TO_WRITE(rindex, windex, br->ring_data_size);
 	ravail = br->ring_data_size - wavail;
 
 	snprintf(state, sizeof(state),
 	    "rindex:%u windex:%u intr_mask:%u ravail:%u wavail:%u",
 	    rindex, windex, intr_mask, ravail, wavail);
 	return sysctl_handle_string(oidp, state, sizeof(state), req);
 }
 
 /*
  * Binary bufring states.
  */
 static int
 vmbus_br_sysctl_state_bin(SYSCTL_HANDLER_ARGS)
 {
 #define BR_STATE_RIDX	0
 #define BR_STATE_WIDX	1
 #define BR_STATE_IMSK	2
 #define BR_STATE_RSPC	3
 #define BR_STATE_WSPC	4
 #define BR_STATE_MAX	5
 
 	const hv_vmbus_ring_buffer_info *br = arg1;
 	uint32_t rindex, windex, wavail, state[BR_STATE_MAX];
 
 	rindex = br->ring_buffer->br_rindex;
 	windex = br->ring_buffer->br_windex;
 	wavail = HV_BYTES_AVAIL_TO_WRITE(rindex, windex, br->ring_data_size);
 
 	state[BR_STATE_RIDX] = rindex;
 	state[BR_STATE_WIDX] = windex;
 	state[BR_STATE_IMSK] = br->ring_buffer->br_imask;
 	state[BR_STATE_WSPC] = wavail;
 	state[BR_STATE_RSPC] = br->ring_data_size - wavail;
 
 	return sysctl_handle_opaque(oidp, state, sizeof(state), req);
 }
 
 void
 vmbus_br_sysctl_create(struct sysctl_ctx_list *ctx, struct sysctl_oid *br_tree,
     hv_vmbus_ring_buffer_info *br, const char *name)
 {
 	struct sysctl_oid *tree;
 	char desc[64];
 
 	tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(br_tree), OID_AUTO,
 	    name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 	if (tree == NULL)
 		return;
 
 	snprintf(desc, sizeof(desc), "%s state", name);
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "state",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
 	    br, 0, vmbus_br_sysctl_state, "A", desc);
 
 	snprintf(desc, sizeof(desc), "%s binary state", name);
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "state_bin",
 	    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
 	    br, 0, vmbus_br_sysctl_state_bin, "IU", desc);
 }
 
 /**
  * @brief Get number of bytes available to read and to write to
  * for the specified ring buffer
  */
 static __inline void
 get_ring_buffer_avail_bytes(hv_vmbus_ring_buffer_info *rbi, uint32_t *read,
     uint32_t *write)
 {
 	uint32_t read_loc, write_loc;
 
 	/*
 	 * Capture the read/write indices before they changed
 	 */
 	read_loc = rbi->ring_buffer->br_rindex;
 	write_loc = rbi->ring_buffer->br_windex;
 
 	*write = HV_BYTES_AVAIL_TO_WRITE(read_loc, write_loc,
 	    rbi->ring_data_size);
 	*read = rbi->ring_data_size - *write;
 }
 
 /**
  * @brief Get the next write location for the specified ring buffer
  */
 static __inline uint32_t
 get_next_write_location(hv_vmbus_ring_buffer_info *ring_info)
 {
 	return ring_info->ring_buffer->br_windex;
 }
 
 /**
  * @brief Set the next write location for the specified ring buffer
  */
 static __inline void
 set_next_write_location(hv_vmbus_ring_buffer_info *ring_info,
     uint32_t next_write_location)
 {
 	ring_info->ring_buffer->br_windex = next_write_location;
 }
 
 /**
  * @brief Get the next read location for the specified ring buffer
  */
 static __inline uint32_t
 get_next_read_location(hv_vmbus_ring_buffer_info *ring_info)
 {
 	return ring_info->ring_buffer->br_rindex;
 }
 
 /**
  * @brief Get the next read location + offset for the specified ring buffer.
  * This allows the caller to skip.
  */
 static __inline uint32_t
 get_next_read_location_with_offset(hv_vmbus_ring_buffer_info *ring_info,
     uint32_t offset)
 {
 	uint32_t next = ring_info->ring_buffer->br_rindex;
 
 	next += offset;
 	next %= ring_info->ring_data_size;
 	return (next);
 }
 
 /**
  * @brief Set the next read location for the specified ring buffer
  */
 static __inline void
 set_next_read_location(hv_vmbus_ring_buffer_info *ring_info,
     uint32_t next_read_location)
 {
 	ring_info->ring_buffer->br_rindex = next_read_location;
 }
 
 /**
  * @brief Get the start of the ring buffer
  */
 static __inline void *
 get_ring_buffer(hv_vmbus_ring_buffer_info *ring_info)
 {
 	return ring_info->ring_buffer->br_data;
 }
 
 /**
  * @brief Get the size of the ring buffer.
  */
 static __inline uint32_t
 get_ring_buffer_size(hv_vmbus_ring_buffer_info *ring_info)
 {
 	return ring_info->ring_data_size;
 }
 
 /**
  * Get the read and write indices as uint64_t of the specified ring buffer.
  */
 static __inline uint64_t
 get_ring_buffer_indices(hv_vmbus_ring_buffer_info *ring_info)
 {
 	return ((uint64_t)ring_info->ring_buffer->br_windex) << 32;
 }
 
 void
 hv_ring_buffer_read_begin(hv_vmbus_ring_buffer_info *ring_info)
 {
 	ring_info->ring_buffer->br_imask = 1;
 	mb();
 }
 
 uint32_t
 hv_ring_buffer_read_end(hv_vmbus_ring_buffer_info *ring_info)
 {
 	uint32_t read, write;
 
 	ring_info->ring_buffer->br_imask = 0;
 	mb();
 
 	/*
 	 * Now check to see if the ring buffer is still empty.
 	 * If it is not, we raced and we need to process new
 	 * incoming messages.
 	 */
 	get_ring_buffer_avail_bytes(ring_info, &read, &write);
 	return (read);
 }
 
 /*
  * When we write to the ring buffer, check if the host needs to
  * be signaled. Here is the details of this protocol:
  *
  *	1. The host guarantees that while it is draining the
  *	   ring buffer, it will set the interrupt_mask to
  *	   indicate it does not need to be interrupted when
  *	   new data is placed.
  *
  *	2. The host guarantees that it will completely drain
  *	   the ring buffer before exiting the read loop. Further,
  *	   once the ring buffer is empty, it will clear the
  *	   interrupt_mask and re-check to see if new data has
  *	   arrived.
  */
 static boolean_t
 hv_ring_buffer_needsig_on_write(uint32_t old_write_location,
     hv_vmbus_ring_buffer_info *rbi)
 {
 	mb();
 	if (rbi->ring_buffer->br_imask)
 		return (FALSE);
 
 	/* Read memory barrier */
 	rmb();
 	/*
 	 * This is the only case we need to signal when the
 	 * ring transitions from being empty to non-empty.
 	 */
 	if (old_write_location == rbi->ring_buffer->br_rindex)
 		return (TRUE);
 
 	return (FALSE);
 }
 
 /**
  * @brief Initialize the ring buffer.
  */
 int
 hv_vmbus_ring_buffer_init(hv_vmbus_ring_buffer_info *ring_info, void *buffer,
     uint32_t buffer_len)
 {
 	memset(ring_info, 0, sizeof(hv_vmbus_ring_buffer_info));
 
 	ring_info->ring_buffer = buffer;
 	ring_info->ring_buffer->br_rindex = 0;
 	ring_info->ring_buffer->br_windex = 0;
 
 	ring_info->ring_data_size = buffer_len - sizeof(struct vmbus_bufring);
 	mtx_init(&ring_info->ring_lock, "vmbus ring buffer", NULL, MTX_SPIN);
 
 	return (0);
 }
 
 /**
  * @brief Cleanup the ring buffer.
  */
 void
 hv_ring_buffer_cleanup(hv_vmbus_ring_buffer_info *ring_info) 
 {
 	mtx_destroy(&ring_info->ring_lock);
 }
 
 /**
  * @brief Write to the ring buffer.
  */
 int
 hv_ring_buffer_write(hv_vmbus_ring_buffer_info *out_ring_info,
     const struct iovec iov[], uint32_t iovlen, boolean_t *need_sig)
 {
 	int i = 0;
 	uint32_t byte_avail_to_write;
 	uint32_t byte_avail_to_read;
 	uint32_t old_write_location;
 	uint32_t total_bytes_to_write = 0;
 	volatile uint32_t next_write_location;
 	uint64_t prev_indices = 0;
 
 	for (i = 0; i < iovlen; i++)
 		total_bytes_to_write += iov[i].iov_len;
 
 	total_bytes_to_write += sizeof(uint64_t);
 
 	mtx_lock_spin(&out_ring_info->ring_lock);
 
 	get_ring_buffer_avail_bytes(out_ring_info, &byte_avail_to_read,
 	    &byte_avail_to_write);
 
 	/*
 	 * If there is only room for the packet, assume it is full.
 	 * Otherwise, the next time around, we think the ring buffer
 	 * is empty since the read index == write index
 	 */
 	if (byte_avail_to_write <= total_bytes_to_write) {
 		mtx_unlock_spin(&out_ring_info->ring_lock);
 		return (EAGAIN);
 	}
 
 	/*
 	 * Write to the ring buffer
 	 */
 	next_write_location = get_next_write_location(out_ring_info);
 
 	old_write_location = next_write_location;
 
 	for (i = 0; i < iovlen; i++) {
 		next_write_location = copy_to_ring_buffer(out_ring_info,
 		    next_write_location, iov[i].iov_base, iov[i].iov_len);
 	}
 
 	/*
 	 * Set previous packet start
 	 */
 	prev_indices = get_ring_buffer_indices(out_ring_info);
 
 	next_write_location = copy_to_ring_buffer(out_ring_info,
 	    next_write_location, (char *)&prev_indices, sizeof(uint64_t));
 
 	/*
 	 * Full memory barrier before upding the write index. 
 	 */
 	mb();
 
 	/*
 	 * Now, update the write location
 	 */
 	set_next_write_location(out_ring_info, next_write_location);
 
 	mtx_unlock_spin(&out_ring_info->ring_lock);
 
 	*need_sig = hv_ring_buffer_needsig_on_write(old_write_location,
 	    out_ring_info);
 
 	return (0);
 }
 
 /**
  * @brief Read without advancing the read index.
  */
 int
 hv_ring_buffer_peek(hv_vmbus_ring_buffer_info *in_ring_info, void *buffer,
     uint32_t buffer_len)
 {
 	uint32_t bytesAvailToWrite;
 	uint32_t bytesAvailToRead;
 	uint32_t nextReadLocation = 0;
 
 	mtx_lock_spin(&in_ring_info->ring_lock);
 
 	get_ring_buffer_avail_bytes(in_ring_info, &bytesAvailToRead,
 	    &bytesAvailToWrite);
 
 	/*
 	 * Make sure there is something to read
 	 */
 	if (bytesAvailToRead < buffer_len) {
 		mtx_unlock_spin(&in_ring_info->ring_lock);
 		return (EAGAIN);
 	}
 
 	/*
 	 * Convert to byte offset
 	 */
 	nextReadLocation = get_next_read_location(in_ring_info);
 
 	nextReadLocation = copy_from_ring_buffer(in_ring_info,
 	    (char *)buffer, buffer_len, nextReadLocation);
 
 	mtx_unlock_spin(&in_ring_info->ring_lock);
 
 	return (0);
 }
 
 /**
  * @brief Read and advance the read index.
  */
 int
 hv_ring_buffer_read(hv_vmbus_ring_buffer_info *in_ring_info, void *buffer,
     uint32_t buffer_len, uint32_t offset)
 {
 	uint32_t bytes_avail_to_write;
 	uint32_t bytes_avail_to_read;
 	uint32_t next_read_location = 0;
 	uint64_t prev_indices = 0;
 
 	if (buffer_len <= 0)
 		return (EINVAL);
 
 	mtx_lock_spin(&in_ring_info->ring_lock);
 
 	get_ring_buffer_avail_bytes(in_ring_info, &bytes_avail_to_read,
 	    &bytes_avail_to_write);
 
 	/*
 	 * Make sure there is something to read
 	 */
 	if (bytes_avail_to_read < buffer_len) {
 		mtx_unlock_spin(&in_ring_info->ring_lock);
 		return (EAGAIN);
 	}
 
 	next_read_location = get_next_read_location_with_offset(in_ring_info,
 	    offset);
 
 	next_read_location = copy_from_ring_buffer(in_ring_info, (char *)buffer,
 	    buffer_len, next_read_location);
 
 	next_read_location = copy_from_ring_buffer(in_ring_info,
 	    (char *)&prev_indices, sizeof(uint64_t), next_read_location);
 
 	/*
 	 * Make sure all reads are done before we update the read index since
 	 * the writer may start writing to the read area once the read index
 	 * is updated.
 	 */
 	wmb();
 
 	/*
 	 * Update the read index
 	 */
 	set_next_read_location(in_ring_info, next_read_location);
 
 	mtx_unlock_spin(&in_ring_info->ring_lock);
 
 	return (0);
 }
 
 /**
  * @brief Helper routine to copy from source to ring buffer.
  *
  * Assume there is enough room. Handles wrap-around in dest case only!
  */
 static uint32_t
 copy_to_ring_buffer(hv_vmbus_ring_buffer_info *ring_info,
     uint32_t start_write_offset, const uint8_t *src, uint32_t src_len)
 {
 	char *ring_buffer = get_ring_buffer(ring_info);
 	uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
 	uint32_t fragLen;
 
 	if (src_len > ring_buffer_size - start_write_offset) {
 		/* wrap-around detected! */
 		fragLen = ring_buffer_size - start_write_offset;
 		memcpy(ring_buffer + start_write_offset, src, fragLen);
 		memcpy(ring_buffer, src + fragLen, src_len - fragLen);
 	} else {
 		memcpy(ring_buffer + start_write_offset, src, src_len);
 	}
 
 	start_write_offset += src_len;
 	start_write_offset %= ring_buffer_size;
 
 	return (start_write_offset);
 }
 
 /**
  * @brief Helper routine to copy to source from ring buffer.
  *
  * Assume there is enough room. Handles wrap-around in src case only!
  */
 static uint32_t
 copy_from_ring_buffer(hv_vmbus_ring_buffer_info *ring_info, char *dest,
     uint32_t dest_len, uint32_t start_read_offset)
 {
 	uint32_t fragLen;
 	char *ring_buffer = get_ring_buffer(ring_info);
 	uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
 
 	if (dest_len > ring_buffer_size - start_read_offset) {
 		/* wrap-around detected at the src */
 		fragLen = ring_buffer_size - start_read_offset;
 		memcpy(dest, ring_buffer + start_read_offset, fragLen);
 		memcpy(dest + fragLen, ring_buffer, dest_len - fragLen);
 	} else {
 		memcpy(dest, ring_buffer + start_read_offset, dest_len);
 	}
 
 	start_read_offset += dest_len;
 	start_read_offset %= ring_buffer_size;
 
 	return (start_read_offset);
 }
Index: head/sys/dev/hyperv/vmbus/hyperv.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hyperv.c	(revision 303282)
+++ head/sys/dev/hyperv/vmbus/hyperv.c	(revision 303283)
@@ -1,317 +1,319 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /**
  * Implements low-level interactions with Hypver-V/Azure
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/pcpu.h>
 #include <sys/timetc.h>
 #include <machine/bus.h>
 #include <machine/md_var.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
+#include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/include/hyperv_busdma.h>
-#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
 #include <dev/hyperv/vmbus/hyperv_machdep.h>
 #include <dev/hyperv/vmbus/hyperv_reg.h>
 #include <dev/hyperv/vmbus/hyperv_var.h>
+#if 0
 #include <dev/hyperv/vmbus/vmbus_var.h>
+#endif
 
 #define HYPERV_FREEBSD_BUILD		0ULL
 #define HYPERV_FREEBSD_VERSION		((uint64_t)__FreeBSD_version)
 #define HYPERV_FREEBSD_OSID		0ULL
 
 #define MSR_HV_GUESTID_BUILD_FREEBSD	\
 	(HYPERV_FREEBSD_BUILD & MSR_HV_GUESTID_BUILD_MASK)
 #define MSR_HV_GUESTID_VERSION_FREEBSD	\
 	((HYPERV_FREEBSD_VERSION << MSR_HV_GUESTID_VERSION_SHIFT) & \
 	 MSR_HV_GUESTID_VERSION_MASK)
 #define MSR_HV_GUESTID_OSID_FREEBSD	\
 	((HYPERV_FREEBSD_OSID << MSR_HV_GUESTID_OSID_SHIFT) & \
 	 MSR_HV_GUESTID_OSID_MASK)
 
 #define MSR_HV_GUESTID_FREEBSD		\
 	(MSR_HV_GUESTID_BUILD_FREEBSD |	\
 	 MSR_HV_GUESTID_VERSION_FREEBSD | \
 	 MSR_HV_GUESTID_OSID_FREEBSD |	\
 	 MSR_HV_GUESTID_OSTYPE_FREEBSD)
 
 struct hypercall_ctx {
 	void			*hc_addr;
 	struct hyperv_dma	hc_dma;
 };
 
 static u_int	hyperv_get_timecount(struct timecounter *tc);
 
 u_int		hyperv_features;
 u_int		hyperv_recommends;
 
 static u_int	hyperv_pm_features;
 static u_int	hyperv_features3;
 
 static struct timecounter	hyperv_timecounter = {
 	.tc_get_timecount	= hyperv_get_timecount,
 	.tc_poll_pps		= NULL,
 	.tc_counter_mask	= 0xffffffff,
 	.tc_frequency		= HYPERV_TIMER_FREQ,
 	.tc_name		= "Hyper-V",
 	.tc_quality		= 2000,
 	.tc_flags		= 0,
 	.tc_priv		= NULL
 };
 
 static struct hypercall_ctx	hypercall_context;
 
 static u_int
 hyperv_get_timecount(struct timecounter *tc __unused)
 {
 	return rdmsr(MSR_HV_TIME_REF_COUNT);
 }
 
 uint64_t
 hypercall_post_message(bus_addr_t msg_paddr)
 {
 	return hypercall_md(hypercall_context.hc_addr,
 	    HYPERCALL_POST_MESSAGE, msg_paddr, 0);
 }
 
 uint64_t
 hypercall_signal_event(bus_addr_t monprm_paddr)
 {
 	return hypercall_md(hypercall_context.hc_addr,
 	    HYPERCALL_SIGNAL_EVENT, monprm_paddr, 0);
 }
 
 int
 hyperv_guid2str(const struct hyperv_guid *guid, char *buf, size_t sz)
 {
 	const uint8_t *d = guid->hv_guid;
 
 	return snprintf(buf, sz, "%02x%02x%02x%02x-"
 	    "%02x%02x-%02x%02x-%02x%02x-"
 	    "%02x%02x%02x%02x%02x%02x",
 	    d[3], d[2], d[1], d[0],
 	    d[5], d[4], d[7], d[6], d[8], d[9],
 	    d[10], d[11], d[12], d[13], d[14], d[15]);
 }
 
 static bool
 hyperv_identify(void)
 {
 	u_int regs[4];
 	unsigned int maxleaf;
 
 	if (vm_guest != VM_GUEST_HV)
 		return (false);
 
 	do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
 	maxleaf = regs[0];
 	if (maxleaf < CPUID_LEAF_HV_LIMITS)
 		return (false);
 
 	do_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
 	if (regs[0] != CPUID_HV_IFACE_HYPERV)
 		return (false);
 
 	do_cpuid(CPUID_LEAF_HV_FEATURES, regs);
 	if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0) {
 		/*
 		 * Hyper-V w/o Hypercall is impossible; someone
 		 * is faking Hyper-V.
 		 */
 		return (false);
 	}
 	hyperv_features = regs[0];
 	hyperv_pm_features = regs[2];
 	hyperv_features3 = regs[3];
 
 	do_cpuid(CPUID_LEAF_HV_IDENTITY, regs);
 	printf("Hyper-V Version: %d.%d.%d [SP%d]\n",
 	    regs[1] >> 16, regs[1] & 0xffff, regs[0], regs[2]);
 
 	printf("  Features=0x%b\n", hyperv_features,
 	    "\020"
 	    "\001VPRUNTIME"	/* MSR_HV_VP_RUNTIME */
 	    "\002TMREFCNT"	/* MSR_HV_TIME_REF_COUNT */
 	    "\003SYNIC"		/* MSRs for SynIC */
 	    "\004SYNTM"		/* MSRs for SynTimer */
 	    "\005APIC"		/* MSR_HV_{EOI,ICR,TPR} */
 	    "\006HYPERCALL"	/* MSR_HV_{GUEST_OS_ID,HYPERCALL} */
 	    "\007VPINDEX"	/* MSR_HV_VP_INDEX */
 	    "\010RESET"		/* MSR_HV_RESET */
 	    "\011STATS"		/* MSR_HV_STATS_ */
 	    "\012REFTSC"	/* MSR_HV_REFERENCE_TSC */
 	    "\013IDLE"		/* MSR_HV_GUEST_IDLE */
 	    "\014TMFREQ"	/* MSR_HV_{TSC,APIC}_FREQUENCY */
 	    "\015DEBUG");	/* MSR_HV_SYNTH_DEBUG_ */
 	printf("  PM Features=0x%b [C%u]\n",
 	    (hyperv_pm_features & ~CPUPM_HV_CSTATE_MASK),
 	    "\020"
 	    "\005C3HPET",	/* HPET is required for C3 state */
 	    CPUPM_HV_CSTATE(hyperv_pm_features));
 	printf("  Features3=0x%b\n", hyperv_features3,
 	    "\020"
 	    "\001MWAIT"		/* MWAIT */
 	    "\002DEBUG"		/* guest debug support */
 	    "\003PERFMON"	/* performance monitor */
 	    "\004PCPUDPE"	/* physical CPU dynamic partition event */
 	    "\005XMMHC"		/* hypercall input through XMM regs */
 	    "\006IDLE"		/* guest idle support */
 	    "\007SLEEP"		/* hypervisor sleep support */
 	    "\010NUMA"		/* NUMA distance query support */
 	    "\011TMFREQ"	/* timer frequency query (TSC, LAPIC) */
 	    "\012SYNCMC"	/* inject synthetic machine checks */
 	    "\013CRASH"		/* MSRs for guest crash */
 	    "\014DEBUGMSR"	/* MSRs for guest debug */
 	    "\015NPIEP"		/* NPIEP */
 	    "\016HVDIS");	/* disabling hypervisor */
 
 	do_cpuid(CPUID_LEAF_HV_RECOMMENDS, regs);
 	hyperv_recommends = regs[0];
 	if (bootverbose)
 		printf("  Recommends: %08x %08x\n", regs[0], regs[1]);
 
 	do_cpuid(CPUID_LEAF_HV_LIMITS, regs);
 	if (bootverbose) {
 		printf("  Limits: Vcpu:%d Lcpu:%d Int:%d\n",
 		    regs[0], regs[1], regs[2]);
 	}
 
 	if (maxleaf >= CPUID_LEAF_HV_HWFEATURES) {
 		do_cpuid(CPUID_LEAF_HV_HWFEATURES, regs);
 		if (bootverbose) {
 			printf("  HW Features: %08x, AMD: %08x\n",
 			    regs[0], regs[3]);
 		}
 	}
 
 	return (true);
 }
 
 static void
 hyperv_init(void *dummy __unused)
 {
 	if (!hyperv_identify()) {
 		/* Not Hyper-V; reset guest id to the generic one. */
 		if (vm_guest == VM_GUEST_HV)
 			vm_guest = VM_GUEST_VM;
 		return;
 	}
 
 	/* Set guest id */
 	wrmsr(MSR_HV_GUEST_OS_ID, MSR_HV_GUESTID_FREEBSD);
 
 	if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT) {
 		/* Register Hyper-V timecounter */
 		tc_init(&hyperv_timecounter);
 	}
 }
 SYSINIT(hyperv_initialize, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, hyperv_init,
     NULL);
 
 static void
 hypercall_memfree(void)
 {
 	hyperv_dmamem_free(&hypercall_context.hc_dma,
 	    hypercall_context.hc_addr);
 	hypercall_context.hc_addr = NULL;
 }
 
 static void
 hypercall_create(void *arg __unused)
 {
 	uint64_t hc, hc_orig;
 
 	if (vm_guest != VM_GUEST_HV)
 		return;
 
 	hypercall_context.hc_addr = hyperv_dmamem_alloc(NULL, PAGE_SIZE, 0,
 	    PAGE_SIZE, &hypercall_context.hc_dma, BUS_DMA_WAITOK);
 	if (hypercall_context.hc_addr == NULL) {
 		printf("hyperv: Hypercall page allocation failed\n");
 		/* Can't perform any Hyper-V specific actions */
 		vm_guest = VM_GUEST_VM;
 		return;
 	}
 
 	/* Get the 'reserved' bits, which requires preservation. */
 	hc_orig = rdmsr(MSR_HV_HYPERCALL);
 
 	/*
 	 * Setup the Hypercall page.
 	 *
 	 * NOTE: 'reserved' bits MUST be preserved.
 	 */
 	hc = ((hypercall_context.hc_dma.hv_paddr >> PAGE_SHIFT) <<
 	    MSR_HV_HYPERCALL_PGSHIFT) |
 	    (hc_orig & MSR_HV_HYPERCALL_RSVD_MASK) |
 	    MSR_HV_HYPERCALL_ENABLE;
 	wrmsr(MSR_HV_HYPERCALL, hc);
 
 	/*
 	 * Confirm that Hypercall page did get setup.
 	 */
 	hc = rdmsr(MSR_HV_HYPERCALL);
 	if ((hc & MSR_HV_HYPERCALL_ENABLE) == 0) {
 		printf("hyperv: Hypercall setup failed\n");
 		hypercall_memfree();
 		/* Can't perform any Hyper-V specific actions */
 		vm_guest = VM_GUEST_VM;
 		return;
 	}
 	if (bootverbose)
 		printf("hyperv: Hypercall created\n");
 }
 SYSINIT(hypercall_ctor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_create, NULL);
 
 static void
 hypercall_destroy(void *arg __unused)
 {
 	uint64_t hc;
 
 	if (hypercall_context.hc_addr == NULL)
 		return;
 
 	/* Disable Hypercall */
 	hc = rdmsr(MSR_HV_HYPERCALL);
 	wrmsr(MSR_HV_HYPERCALL, (hc & MSR_HV_HYPERCALL_RSVD_MASK));
 	hypercall_memfree();
 
 	if (bootverbose)
 		printf("hyperv: Hypercall destroyed\n");
 }
 SYSUNINIT(hypercall_dtor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_destroy,
     NULL);
Index: head/sys/dev/hyperv/vmbus/hyperv_reg.h
===================================================================
--- head/sys/dev/hyperv/vmbus/hyperv_reg.h	(revision 303282)
+++ head/sys/dev/hyperv/vmbus/hyperv_reg.h	(revision 303283)
@@ -1,197 +1,198 @@
 /*-
  * Copyright (c) 2016 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _HYPERV_REG_H_
 #define _HYPERV_REG_H_
 
 #include <sys/param.h>
+#include <sys/systm.h>
 
 /*
  * Hyper-V Synthetic MSRs
  */
 
 #define MSR_HV_GUEST_OS_ID		0x40000000
 #define MSR_HV_GUESTID_BUILD_MASK	0xffffULL
 #define MSR_HV_GUESTID_VERSION_MASK	0x0000ffffffff0000ULL
 #define MSR_HV_GUESTID_VERSION_SHIFT	16
 #define MSR_HV_GUESTID_OSID_MASK	0x00ff000000000000ULL
 #define MSR_HV_GUESTID_OSID_SHIFT	48
 #define MSR_HV_GUESTID_OSTYPE_MASK	0x7f00000000000000ULL
 #define MSR_HV_GUESTID_OSTYPE_SHIFT	56
 #define MSR_HV_GUESTID_OPENSRC		0x8000000000000000ULL
 #define MSR_HV_GUESTID_OSTYPE_LINUX	\
 	((0x01ULL << MSR_HV_GUESTID_OSTYPE_SHIFT) | MSR_HV_GUESTID_OPENSRC)
 #define MSR_HV_GUESTID_OSTYPE_FREEBSD	\
 	((0x02ULL << MSR_HV_GUESTID_OSTYPE_SHIFT) | MSR_HV_GUESTID_OPENSRC)
 
 #define MSR_HV_HYPERCALL		0x40000001
 #define MSR_HV_HYPERCALL_ENABLE		0x0001ULL
 #define MSR_HV_HYPERCALL_RSVD_MASK	0x0ffeULL
 #define MSR_HV_HYPERCALL_PGSHIFT	12
 
 #define MSR_HV_VP_INDEX			0x40000002
 
 #define MSR_HV_TIME_REF_COUNT		0x40000020
 
 #define MSR_HV_SCONTROL			0x40000080
 #define MSR_HV_SCTRL_ENABLE		0x0001ULL
 #define MSR_HV_SCTRL_RSVD_MASK		0xfffffffffffffffeULL
 
 #define MSR_HV_SIEFP			0x40000082
 #define MSR_HV_SIEFP_ENABLE		0x0001ULL
 #define MSR_HV_SIEFP_RSVD_MASK		0x0ffeULL
 #define MSR_HV_SIEFP_PGSHIFT		12
 
 #define MSR_HV_SIMP			0x40000083
 #define MSR_HV_SIMP_ENABLE		0x0001ULL
 #define MSR_HV_SIMP_RSVD_MASK		0x0ffeULL
 #define MSR_HV_SIMP_PGSHIFT		12
 
 #define MSR_HV_EOM			0x40000084
 
 #define MSR_HV_SINT0			0x40000090
 #define MSR_HV_SINT_VECTOR_MASK		0x00ffULL
 #define MSR_HV_SINT_RSVD1_MASK		0xff00ULL
 #define MSR_HV_SINT_MASKED		0x00010000ULL
 #define MSR_HV_SINT_AUTOEOI		0x00020000ULL
 #define MSR_HV_SINT_RSVD2_MASK		0xfffffffffffc0000ULL
 #define MSR_HV_SINT_RSVD_MASK		(MSR_HV_SINT_RSVD1_MASK |	\
 					 MSR_HV_SINT_RSVD2_MASK)
 
 #define MSR_HV_STIMER0_CONFIG		0x400000b0
 #define MSR_HV_STIMER_CFG_ENABLE	0x0001ULL
 #define MSR_HV_STIMER_CFG_PERIODIC	0x0002ULL
 #define MSR_HV_STIMER_CFG_LAZY		0x0004ULL
 #define MSR_HV_STIMER_CFG_AUTOEN	0x0008ULL
 #define MSR_HV_STIMER_CFG_SINT_MASK	0x000f0000ULL
 #define MSR_HV_STIMER_CFG_SINT_SHIFT	16
 
 #define MSR_HV_STIMER0_COUNT		0x400000b1
 
 /*
  * CPUID leaves
  */
 
 #define CPUID_LEAF_HV_MAXLEAF		0x40000000
 
 #define CPUID_LEAF_HV_INTERFACE		0x40000001
 #define CPUID_HV_IFACE_HYPERV		0x31237648	/* HV#1 */
 
 #define CPUID_LEAF_HV_IDENTITY		0x40000002
 
 #define CPUID_LEAF_HV_FEATURES		0x40000003
 /* EAX: features */
 #define CPUID_HV_MSR_TIME_REFCNT	0x0002	/* MSR_HV_TIME_REF_COUNT */
 #define CPUID_HV_MSR_SYNIC		0x0004	/* MSRs for SynIC */
 #define CPUID_HV_MSR_SYNTIMER		0x0008	/* MSRs for SynTimer */
 #define CPUID_HV_MSR_APIC		0x0010	/* MSR_HV_{EOI,ICR,TPR} */
 #define CPUID_HV_MSR_HYPERCALL		0x0020	/* MSR_HV_GUEST_OS_ID
 						 * MSR_HV_HYPERCALL */
 #define CPUID_HV_MSR_VP_INDEX		0x0040	/* MSR_HV_VP_INDEX */
 #define CPUID_HV_MSR_GUEST_IDLE		0x0400	/* MSR_HV_GUEST_IDLE */
 /* ECX: power management features */
 #define CPUPM_HV_CSTATE_MASK		0x000f	/* deepest C-state */
 #define CPUPM_HV_C3_HPET		0x0010	/* C3 requires HPET */
 #define CPUPM_HV_CSTATE(f)		((f) & CPUPM_HV_CSTATE_MASK)
 /* EDX: features3 */
 #define CPUID3_HV_MWAIT			0x0001	/* MWAIT */
 #define CPUID3_HV_XMM_HYPERCALL		0x0010	/* Hypercall input through
 						 * XMM regs */
 #define CPUID3_HV_GUEST_IDLE		0x0020	/* guest idle */
 #define CPUID3_HV_NUMA			0x0080	/* NUMA distance query */
 #define CPUID3_HV_TIME_FREQ		0x0100	/* timer frequency query
 						 * (TSC, LAPIC) */
 #define CPUID3_HV_MSR_CRASH		0x0400	/* MSRs for guest crash */
 
 #define CPUID_LEAF_HV_RECOMMENDS	0x40000004
 #define CPUID_LEAF_HV_LIMITS		0x40000005
 #define CPUID_LEAF_HV_HWFEATURES	0x40000006
 
 /*
  * Hyper-V Monitor Notification Facility
  */
 struct hyperv_mon_param {
 	uint32_t	mp_connid;
 	uint16_t	mp_evtflag_ofs;
 	uint16_t	mp_rsvd;
 } __packed;
 
 /*
  * Hyper-V message types
  */
 #define HYPERV_MSGTYPE_NONE		0
 #define HYPERV_MSGTYPE_CHANNEL		1
 #define HYPERV_MSGTYPE_TIMER_EXPIRED	0x80000010
 
 /*
  * Hypercall status codes
  */
 #define HYPERCALL_STATUS_SUCCESS	0x0000
 
 /*
  * Hypercall input values
  */
 #define HYPERCALL_POST_MESSAGE		0x005c
 #define HYPERCALL_SIGNAL_EVENT		0x005d
 
 /*
  * Hypercall input parameters
  */
 #define HYPERCALL_PARAM_ALIGN		8
 #if 0
 /*
  * XXX
  * <<Hypervisor Top Level Functional Specification 4.0b>> requires
  * input parameters size to be multiple of 8, however, many post
  * message input parameters do _not_ meet this requirement.
  */
 #define HYPERCALL_PARAM_SIZE_ALIGN	8
 #endif
 
 /*
  * HYPERCALL_POST_MESSAGE
  */
 #define HYPERCALL_POSTMSGIN_DSIZE_MAX	240
 #define HYPERCALL_POSTMSGIN_SIZE	256
 
 struct hypercall_postmsg_in {
 	uint32_t	hc_connid;
 	uint32_t	hc_rsvd;
 	uint32_t	hc_msgtype;	/* HYPERV_MSGTYPE_ */
 	uint32_t	hc_dsize;
 	uint8_t		hc_data[HYPERCALL_POSTMSGIN_DSIZE_MAX];
 } __packed;
 CTASSERT(sizeof(struct hypercall_postmsg_in) == HYPERCALL_POSTMSGIN_SIZE);
 
 /*
  * HYPERCALL_SIGNAL_EVENT
  *
  * struct hyperv_mon_param.
  */
 
 #endif	/* !_HYPERV_REG_H_ */
Index: head/sys/dev/hyperv/vmbus/vmbus.c
===================================================================
--- head/sys/dev/hyperv/vmbus/vmbus.c	(revision 303282)
+++ head/sys/dev/hyperv/vmbus/vmbus.c	(revision 303283)
@@ -1,1332 +1,1332 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * VM Bus Driver Implementation
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/proc.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/rtprio.h>
 #include <sys/interrupt.h>
 #include <sys/sx.h>
 #include <sys/taskqueue.h>
 #include <sys/mutex.h>
 #include <sys/smp.h>
 
 #include <machine/resource.h>
 #include <sys/rman.h>
 
 #include <machine/stdarg.h>
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <machine/segments.h>
 #include <sys/pcpu.h>
 #include <x86/apicvar.h>
 
 #include <dev/hyperv/include/hyperv.h>
-#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
 #include <dev/hyperv/vmbus/hyperv_reg.h>
 #include <dev/hyperv/vmbus/hyperv_var.h>
 #include <dev/hyperv/vmbus/vmbus_reg.h>
 #include <dev/hyperv/vmbus/vmbus_var.h>
+#include <dev/hyperv/vmbus/vmbus_chanvar.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 #include "acpi_if.h"
 #include "vmbus_if.h"
 
 #define VMBUS_GPADL_START		0xe1e10
 
 struct vmbus_msghc {
 	struct hypercall_postmsg_in	*mh_inprm;
 	struct hypercall_postmsg_in	mh_inprm_save;
 	struct hyperv_dma		mh_inprm_dma;
 
 	struct vmbus_message		*mh_resp;
 	struct vmbus_message		mh_resp0;
 };
 
 struct vmbus_msghc_ctx {
 	struct vmbus_msghc		*mhc_free;
 	struct mtx			mhc_free_lock;
 	uint32_t			mhc_flags;
 
 	struct vmbus_msghc		*mhc_active;
 	struct mtx			mhc_active_lock;
 };
 
 #define VMBUS_MSGHC_CTXF_DESTROY	0x0001
 
 static int			vmbus_init(struct vmbus_softc *);
 static int			vmbus_connect(struct vmbus_softc *, uint32_t);
 static int			vmbus_req_channels(struct vmbus_softc *sc);
 static void			vmbus_disconnect(struct vmbus_softc *);
 static int			vmbus_scan(struct vmbus_softc *);
 static void			vmbus_scan_wait(struct vmbus_softc *);
 static void			vmbus_scan_newchan(struct vmbus_softc *);
 static void			vmbus_scan_newdev(struct vmbus_softc *);
 static void			vmbus_scan_done(struct vmbus_softc *,
 				    const struct vmbus_message *);
 static void			vmbus_chanmsg_handle(struct vmbus_softc *,
 				    const struct vmbus_message *);
 
 static int			vmbus_sysctl_version(SYSCTL_HANDLER_ARGS);
 
 static struct vmbus_msghc_ctx	*vmbus_msghc_ctx_create(bus_dma_tag_t);
 static void			vmbus_msghc_ctx_destroy(
 				    struct vmbus_msghc_ctx *);
 static void			vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *);
 static struct vmbus_msghc	*vmbus_msghc_alloc(bus_dma_tag_t);
 static void			vmbus_msghc_free(struct vmbus_msghc *);
 static struct vmbus_msghc	*vmbus_msghc_get1(struct vmbus_msghc_ctx *,
 				    uint32_t);
 
 struct vmbus_softc	*vmbus_sc;
 
 extern inthand_t IDTVEC(vmbus_isr);
 
 static const uint32_t		vmbus_version[] = {
 	VMBUS_VERSION_WIN8_1,
 	VMBUS_VERSION_WIN8,
 	VMBUS_VERSION_WIN7,
 	VMBUS_VERSION_WS2008
 };
 
 static const vmbus_chanmsg_proc_t
 vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = {
 	VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done),
 	VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP)
 };
 
 static struct vmbus_msghc *
 vmbus_msghc_alloc(bus_dma_tag_t parent_dtag)
 {
 	struct vmbus_msghc *mh;
 
 	mh = malloc(sizeof(*mh), M_DEVBUF, M_WAITOK | M_ZERO);
 
 	mh->mh_inprm = hyperv_dmamem_alloc(parent_dtag,
 	    HYPERCALL_PARAM_ALIGN, 0, HYPERCALL_POSTMSGIN_SIZE,
 	    &mh->mh_inprm_dma, BUS_DMA_WAITOK);
 	if (mh->mh_inprm == NULL) {
 		free(mh, M_DEVBUF);
 		return NULL;
 	}
 	return mh;
 }
 
 static void
 vmbus_msghc_free(struct vmbus_msghc *mh)
 {
 	hyperv_dmamem_free(&mh->mh_inprm_dma, mh->mh_inprm);
 	free(mh, M_DEVBUF);
 }
 
 static void
 vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *mhc)
 {
 	KASSERT(mhc->mhc_active == NULL, ("still have active msg hypercall"));
 	KASSERT(mhc->mhc_free == NULL, ("still have hypercall msg"));
 
 	mtx_destroy(&mhc->mhc_free_lock);
 	mtx_destroy(&mhc->mhc_active_lock);
 	free(mhc, M_DEVBUF);
 }
 
 static struct vmbus_msghc_ctx *
 vmbus_msghc_ctx_create(bus_dma_tag_t parent_dtag)
 {
 	struct vmbus_msghc_ctx *mhc;
 
 	mhc = malloc(sizeof(*mhc), M_DEVBUF, M_WAITOK | M_ZERO);
 	mtx_init(&mhc->mhc_free_lock, "vmbus msghc free", NULL, MTX_DEF);
 	mtx_init(&mhc->mhc_active_lock, "vmbus msghc act", NULL, MTX_DEF);
 
 	mhc->mhc_free = vmbus_msghc_alloc(parent_dtag);
 	if (mhc->mhc_free == NULL) {
 		vmbus_msghc_ctx_free(mhc);
 		return NULL;
 	}
 	return mhc;
 }
 
 static struct vmbus_msghc *
 vmbus_msghc_get1(struct vmbus_msghc_ctx *mhc, uint32_t dtor_flag)
 {
 	struct vmbus_msghc *mh;
 
 	mtx_lock(&mhc->mhc_free_lock);
 
 	while ((mhc->mhc_flags & dtor_flag) == 0 && mhc->mhc_free == NULL) {
 		mtx_sleep(&mhc->mhc_free, &mhc->mhc_free_lock, 0,
 		    "gmsghc", 0);
 	}
 	if (mhc->mhc_flags & dtor_flag) {
 		/* Being destroyed */
 		mh = NULL;
 	} else {
 		mh = mhc->mhc_free;
 		KASSERT(mh != NULL, ("no free hypercall msg"));
 		KASSERT(mh->mh_resp == NULL,
 		    ("hypercall msg has pending response"));
 		mhc->mhc_free = NULL;
 	}
 
 	mtx_unlock(&mhc->mhc_free_lock);
 
 	return mh;
 }
 
 void
 vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize)
 {
 	struct hypercall_postmsg_in *inprm;
 
 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
 		panic("invalid data size %zu", dsize);
 
 	inprm = mh->mh_inprm;
 	memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE);
 	inprm->hc_connid = VMBUS_CONNID_MESSAGE;
 	inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL;
 	inprm->hc_dsize = dsize;
 }
 
 struct vmbus_msghc *
 vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize)
 {
 	struct vmbus_msghc *mh;
 
 	if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX)
 		panic("invalid data size %zu", dsize);
 
 	mh = vmbus_msghc_get1(sc->vmbus_msg_hc, VMBUS_MSGHC_CTXF_DESTROY);
 	if (mh == NULL)
 		return NULL;
 
 	vmbus_msghc_reset(mh, dsize);
 	return mh;
 }
 
 void
 vmbus_msghc_put(struct vmbus_softc *sc, struct vmbus_msghc *mh)
 {
 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
 
 	KASSERT(mhc->mhc_active == NULL, ("msg hypercall is active"));
 	mh->mh_resp = NULL;
 
 	mtx_lock(&mhc->mhc_free_lock);
 	KASSERT(mhc->mhc_free == NULL, ("has free hypercall msg"));
 	mhc->mhc_free = mh;
 	mtx_unlock(&mhc->mhc_free_lock);
 	wakeup(&mhc->mhc_free);
 }
 
 void *
 vmbus_msghc_dataptr(struct vmbus_msghc *mh)
 {
 	return mh->mh_inprm->hc_data;
 }
 
 static void
 vmbus_msghc_ctx_destroy(struct vmbus_msghc_ctx *mhc)
 {
 	struct vmbus_msghc *mh;
 
 	mtx_lock(&mhc->mhc_free_lock);
 	mhc->mhc_flags |= VMBUS_MSGHC_CTXF_DESTROY;
 	mtx_unlock(&mhc->mhc_free_lock);
 	wakeup(&mhc->mhc_free);
 
 	mh = vmbus_msghc_get1(mhc, 0);
 	if (mh == NULL)
 		panic("can't get msghc");
 
 	vmbus_msghc_free(mh);
 	vmbus_msghc_ctx_free(mhc);
 }
 
 int
 vmbus_msghc_exec_noresult(struct vmbus_msghc *mh)
 {
 	sbintime_t time = SBT_1MS;
 	int i;
 
 	/*
 	 * Save the input parameter so that we could restore the input
 	 * parameter if the Hypercall failed.
 	 *
 	 * XXX
 	 * Is this really necessary?!  i.e. Will the Hypercall ever
 	 * overwrite the input parameter?
 	 */
 	memcpy(&mh->mh_inprm_save, mh->mh_inprm, HYPERCALL_POSTMSGIN_SIZE);
 
 	/*
 	 * In order to cope with transient failures, e.g. insufficient
 	 * resources on host side, we retry the post message Hypercall
 	 * several times.  20 retries seem sufficient.
 	 */
 #define HC_RETRY_MAX	20
 
 	for (i = 0; i < HC_RETRY_MAX; ++i) {
 		uint64_t status;
 
 		status = hypercall_post_message(mh->mh_inprm_dma.hv_paddr);
 		if (status == HYPERCALL_STATUS_SUCCESS)
 			return 0;
 
 		pause_sbt("hcpmsg", time, 0, C_HARDCLOCK);
 		if (time < SBT_1S * 2)
 			time *= 2;
 
 		/* Restore input parameter and try again */
 		memcpy(mh->mh_inprm, &mh->mh_inprm_save,
 		    HYPERCALL_POSTMSGIN_SIZE);
 	}
 
 #undef HC_RETRY_MAX
 
 	return EIO;
 }
 
 int
 vmbus_msghc_exec(struct vmbus_softc *sc, struct vmbus_msghc *mh)
 {
 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
 	int error;
 
 	KASSERT(mh->mh_resp == NULL, ("hypercall msg has pending response"));
 
 	mtx_lock(&mhc->mhc_active_lock);
 	KASSERT(mhc->mhc_active == NULL, ("pending active msg hypercall"));
 	mhc->mhc_active = mh;
 	mtx_unlock(&mhc->mhc_active_lock);
 
 	error = vmbus_msghc_exec_noresult(mh);
 	if (error) {
 		mtx_lock(&mhc->mhc_active_lock);
 		KASSERT(mhc->mhc_active == mh, ("msghc mismatch"));
 		mhc->mhc_active = NULL;
 		mtx_unlock(&mhc->mhc_active_lock);
 	}
 	return error;
 }
 
 const struct vmbus_message *
 vmbus_msghc_wait_result(struct vmbus_softc *sc, struct vmbus_msghc *mh)
 {
 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
 
 	mtx_lock(&mhc->mhc_active_lock);
 
 	KASSERT(mhc->mhc_active == mh, ("msghc mismatch"));
 	while (mh->mh_resp == NULL) {
 		mtx_sleep(&mhc->mhc_active, &mhc->mhc_active_lock, 0,
 		    "wmsghc", 0);
 	}
 	mhc->mhc_active = NULL;
 
 	mtx_unlock(&mhc->mhc_active_lock);
 
 	return mh->mh_resp;
 }
 
 void
 vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg)
 {
 	struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc;
 	struct vmbus_msghc *mh;
 
 	mtx_lock(&mhc->mhc_active_lock);
 
 	mh = mhc->mhc_active;
 	KASSERT(mh != NULL, ("no pending msg hypercall"));
 	memcpy(&mh->mh_resp0, msg, sizeof(mh->mh_resp0));
 	mh->mh_resp = &mh->mh_resp0;
 
 	mtx_unlock(&mhc->mhc_active_lock);
 	wakeup(&mhc->mhc_active);
 }
 
 uint32_t
 vmbus_gpadl_alloc(struct vmbus_softc *sc)
 {
 	return atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
 }
 
 static int
 vmbus_connect(struct vmbus_softc *sc, uint32_t version)
 {
 	struct vmbus_chanmsg_connect *req;
 	const struct vmbus_message *msg;
 	struct vmbus_msghc *mh;
 	int error, done = 0;
 
 	mh = vmbus_msghc_get(sc, sizeof(*req));
 	if (mh == NULL)
 		return ENXIO;
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT;
 	req->chm_ver = version;
 	req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr;
 	req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr;
 	req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr;
 
 	error = vmbus_msghc_exec(sc, mh);
 	if (error) {
 		vmbus_msghc_put(sc, mh);
 		return error;
 	}
 
 	msg = vmbus_msghc_wait_result(sc, mh);
 	done = ((const struct vmbus_chanmsg_connect_resp *)
 	    msg->msg_data)->chm_done;
 
 	vmbus_msghc_put(sc, mh);
 
 	return (done ? 0 : EOPNOTSUPP);
 }
 
 static int
 vmbus_init(struct vmbus_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < nitems(vmbus_version); ++i) {
 		int error;
 
 		error = vmbus_connect(sc, vmbus_version[i]);
 		if (!error) {
 			sc->vmbus_version = vmbus_version[i];
 			device_printf(sc->vmbus_dev, "version %u.%u\n",
 			    VMBUS_VERSION_MAJOR(sc->vmbus_version),
 			    VMBUS_VERSION_MINOR(sc->vmbus_version));
 			return 0;
 		}
 	}
 	return ENXIO;
 }
 
 static void
 vmbus_disconnect(struct vmbus_softc *sc)
 {
 	struct vmbus_chanmsg_disconnect *req;
 	struct vmbus_msghc *mh;
 	int error;
 
 	mh = vmbus_msghc_get(sc, sizeof(*req));
 	if (mh == NULL) {
 		device_printf(sc->vmbus_dev,
 		    "can not get msg hypercall for disconnect\n");
 		return;
 	}
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT;
 
 	error = vmbus_msghc_exec_noresult(mh);
 	vmbus_msghc_put(sc, mh);
 
 	if (error) {
 		device_printf(sc->vmbus_dev,
 		    "disconnect msg hypercall failed\n");
 	}
 }
 
 static int
 vmbus_req_channels(struct vmbus_softc *sc)
 {
 	struct vmbus_chanmsg_chrequest *req;
 	struct vmbus_msghc *mh;
 	int error;
 
 	mh = vmbus_msghc_get(sc, sizeof(*req));
 	if (mh == NULL)
 		return ENXIO;
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST;
 
 	error = vmbus_msghc_exec_noresult(mh);
 	vmbus_msghc_put(sc, mh);
 
 	return error;
 }
 
 static void
 vmbus_scan_newchan(struct vmbus_softc *sc)
 {
 	mtx_lock(&sc->vmbus_scan_lock);
 	if ((sc->vmbus_scan_chcnt & VMBUS_SCAN_CHCNT_DONE) == 0)
 		sc->vmbus_scan_chcnt++;
 	mtx_unlock(&sc->vmbus_scan_lock);
 }
 
 static void
 vmbus_scan_done(struct vmbus_softc *sc,
     const struct vmbus_message *msg __unused)
 {
 	mtx_lock(&sc->vmbus_scan_lock);
 	sc->vmbus_scan_chcnt |= VMBUS_SCAN_CHCNT_DONE;
 	mtx_unlock(&sc->vmbus_scan_lock);
 	wakeup(&sc->vmbus_scan_chcnt);
 }
 
 static void
 vmbus_scan_newdev(struct vmbus_softc *sc)
 {
 	mtx_lock(&sc->vmbus_scan_lock);
 	sc->vmbus_scan_devcnt++;
 	mtx_unlock(&sc->vmbus_scan_lock);
 	wakeup(&sc->vmbus_scan_devcnt);
 }
 
 static void
 vmbus_scan_wait(struct vmbus_softc *sc)
 {
 	uint32_t chancnt;
 
 	mtx_lock(&sc->vmbus_scan_lock);
 	while ((sc->vmbus_scan_chcnt & VMBUS_SCAN_CHCNT_DONE) == 0) {
 		mtx_sleep(&sc->vmbus_scan_chcnt, &sc->vmbus_scan_lock, 0,
 		    "waitch", 0);
 	}
 	chancnt = sc->vmbus_scan_chcnt & ~VMBUS_SCAN_CHCNT_DONE;
 
 	while (sc->vmbus_scan_devcnt != chancnt) {
 		mtx_sleep(&sc->vmbus_scan_devcnt, &sc->vmbus_scan_lock, 0,
 		    "waitdev", 0);
 	}
 	mtx_unlock(&sc->vmbus_scan_lock);
 }
 
 static int
 vmbus_scan(struct vmbus_softc *sc)
 {
 	int error;
 
 	/*
 	 * Start vmbus scanning.
 	 */
 	error = vmbus_req_channels(sc);
 	if (error) {
 		device_printf(sc->vmbus_dev, "channel request failed: %d\n",
 		    error);
 		return error;
 	}
 
 	/*
 	 * Wait for all devices are added to vmbus.
 	 */
 	vmbus_scan_wait(sc);
 
 	/*
 	 * Identify, probe and attach.
 	 */
 	bus_generic_probe(sc->vmbus_dev);
 	bus_generic_attach(sc->vmbus_dev);
 
 	if (bootverbose) {
 		device_printf(sc->vmbus_dev, "device scan, probe and attach "
 		    "done\n");
 	}
 	return 0;
 }
 
 static void
 vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg)
 {
 	vmbus_chanmsg_proc_t msg_proc;
 	uint32_t msg_type;
 
 	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
 	if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) {
 		device_printf(sc->vmbus_dev, "unknown message type 0x%x\n",
 		    msg_type);
 		return;
 	}
 
 	msg_proc = vmbus_chanmsg_handlers[msg_type];
 	if (msg_proc != NULL)
 		msg_proc(sc, msg);
 
 	/* Channel specific processing */
 	vmbus_chan_msgproc(sc, msg);
 }
 
 static void
 vmbus_msg_task(void *xsc, int pending __unused)
 {
 	struct vmbus_softc *sc = xsc;
 	volatile struct vmbus_message *msg;
 
 	msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE;
 	for (;;) {
 		if (msg->msg_type == HYPERV_MSGTYPE_NONE) {
 			/* No message */
 			break;
 		} else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) {
 			/* Channel message */
 			vmbus_chanmsg_handle(sc,
 			    __DEVOLATILE(const struct vmbus_message *, msg));
 		}
 
 		msg->msg_type = HYPERV_MSGTYPE_NONE;
 		/*
 		 * Make sure the write to msg_type (i.e. set to
 		 * HYPERV_MSGTYPE_NONE) happens before we read the
 		 * msg_flags and EOMing. Otherwise, the EOMing will
 		 * not deliver any more messages since there is no
 		 * empty slot
 		 *
 		 * NOTE:
 		 * mb() is used here, since atomic_thread_fence_seq_cst()
 		 * will become compiler fence on UP kernel.
 		 */
 		mb();
 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
 			/*
 			 * This will cause message queue rescan to possibly
 			 * deliver another msg from the hypervisor
 			 */
 			wrmsr(MSR_HV_EOM, 0);
 		}
 	}
 }
 
 static __inline int
 vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu)
 {
 	volatile struct vmbus_message *msg;
 	struct vmbus_message *msg_base;
 
 	msg_base = VMBUS_PCPU_GET(sc, message, cpu);
 
 	/*
 	 * Check event timer.
 	 *
 	 * TODO: move this to independent IDT vector.
 	 */
 	msg = msg_base + VMBUS_SINT_TIMER;
 	if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) {
 		msg->msg_type = HYPERV_MSGTYPE_NONE;
 
 		vmbus_et_intr(frame);
 
 		/*
 		 * Make sure the write to msg_type (i.e. set to
 		 * HYPERV_MSGTYPE_NONE) happens before we read the
 		 * msg_flags and EOMing. Otherwise, the EOMing will
 		 * not deliver any more messages since there is no
 		 * empty slot
 		 *
 		 * NOTE:
 		 * mb() is used here, since atomic_thread_fence_seq_cst()
 		 * will become compiler fence on UP kernel.
 		 */
 		mb();
 		if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) {
 			/*
 			 * This will cause message queue rescan to possibly
 			 * deliver another msg from the hypervisor
 			 */
 			wrmsr(MSR_HV_EOM, 0);
 		}
 	}
 
 	/*
 	 * Check events.  Hot path for network and storage I/O data; high rate.
 	 *
 	 * NOTE:
 	 * As recommended by the Windows guest fellows, we check events before
 	 * checking messages.
 	 */
 	sc->vmbus_event_proc(sc, cpu);
 
 	/*
 	 * Check messages.  Mainly management stuffs; ultra low rate.
 	 */
 	msg = msg_base + VMBUS_SINT_MESSAGE;
 	if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) {
 		taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu),
 		    VMBUS_PCPU_PTR(sc, message_task, cpu));
 	}
 
 	return (FILTER_HANDLED);
 }
 
 void
 vmbus_handle_intr(struct trapframe *trap_frame)
 {
 	struct vmbus_softc *sc = vmbus_get_softc();
 	int cpu = curcpu;
 
 	/*
 	 * Disable preemption.
 	 */
 	critical_enter();
 
 	/*
 	 * Do a little interrupt counting.
 	 */
 	(*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++;
 
 	vmbus_handle_intr1(sc, trap_frame, cpu);
 
 	/*
 	 * Enable preemption.
 	 */
 	critical_exit();
 }
 
 static void
 vmbus_synic_setup(void *xsc)
 {
 	struct vmbus_softc *sc = xsc;
 	int cpu = curcpu;
 	uint64_t val, orig;
 	uint32_t sint;
 
 	if (hyperv_features & CPUID_HV_MSR_VP_INDEX) {
 		/* Save virtual processor id. */
 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX);
 	} else {
 		/* Set virtual processor id to 0 for compatibility. */
 		VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0;
 	}
 
 	/*
 	 * Setup the SynIC message.
 	 */
 	orig = rdmsr(MSR_HV_SIMP);
 	val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
 	    ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) <<
 	     MSR_HV_SIMP_PGSHIFT);
 	wrmsr(MSR_HV_SIMP, val);
 
 	/*
 	 * Setup the SynIC event flags.
 	 */
 	orig = rdmsr(MSR_HV_SIEFP);
 	val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
 	    ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu)
 	      >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT);
 	wrmsr(MSR_HV_SIEFP, val);
 
 
 	/*
 	 * Configure and unmask SINT for message and event flags.
 	 */
 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
 	orig = rdmsr(sint);
 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
 	    (orig & MSR_HV_SINT_RSVD_MASK);
 	wrmsr(sint, val);
 
 	/*
 	 * Configure and unmask SINT for timer.
 	 */
 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
 	orig = rdmsr(sint);
 	val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI |
 	    (orig & MSR_HV_SINT_RSVD_MASK);
 	wrmsr(sint, val);
 
 	/*
 	 * All done; enable SynIC.
 	 */
 	orig = rdmsr(MSR_HV_SCONTROL);
 	val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
 	wrmsr(MSR_HV_SCONTROL, val);
 }
 
 static void
 vmbus_synic_teardown(void *arg)
 {
 	uint64_t orig;
 	uint32_t sint;
 
 	/*
 	 * Disable SynIC.
 	 */
 	orig = rdmsr(MSR_HV_SCONTROL);
 	wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));
 
 	/*
 	 * Mask message and event flags SINT.
 	 */
 	sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
 	orig = rdmsr(sint);
 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
 
 	/*
 	 * Mask timer SINT.
 	 */
 	sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
 	orig = rdmsr(sint);
 	wrmsr(sint, orig | MSR_HV_SINT_MASKED);
 
 	/*
 	 * Teardown SynIC message.
 	 */
 	orig = rdmsr(MSR_HV_SIMP);
 	wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));
 
 	/*
 	 * Teardown SynIC event flags.
 	 */
 	orig = rdmsr(MSR_HV_SIEFP);
 	wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
 }
 
 static int
 vmbus_dma_alloc(struct vmbus_softc *sc)
 {
 	bus_dma_tag_t parent_dtag;
 	uint8_t *evtflags;
 	int cpu;
 
 	parent_dtag = bus_get_dma_tag(sc->vmbus_dev);
 	CPU_FOREACH(cpu) {
 		void *ptr;
 
 		/*
 		 * Per-cpu messages and event flags.
 		 */
 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu),
 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
 		if (ptr == NULL)
 			return ENOMEM;
 		VMBUS_PCPU_GET(sc, message, cpu) = ptr;
 
 		ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
 		    PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
 		    BUS_DMA_WAITOK | BUS_DMA_ZERO);
 		if (ptr == NULL)
 			return ENOMEM;
 		VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr;
 	}
 
 	evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
 	    PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (evtflags == NULL)
 		return ENOMEM;
 	sc->vmbus_rx_evtflags = (u_long *)evtflags;
 	sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2));
 	sc->vmbus_evtflags = evtflags;
 
 	sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
 	    PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (sc->vmbus_mnf1 == NULL)
 		return ENOMEM;
 
 	sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0,
 	    sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma,
 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (sc->vmbus_mnf2 == NULL)
 		return ENOMEM;
 
 	return 0;
 }
 
 static void
 vmbus_dma_free(struct vmbus_softc *sc)
 {
 	int cpu;
 
 	if (sc->vmbus_evtflags != NULL) {
 		hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags);
 		sc->vmbus_evtflags = NULL;
 		sc->vmbus_rx_evtflags = NULL;
 		sc->vmbus_tx_evtflags = NULL;
 	}
 	if (sc->vmbus_mnf1 != NULL) {
 		hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1);
 		sc->vmbus_mnf1 = NULL;
 	}
 	if (sc->vmbus_mnf2 != NULL) {
 		hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2);
 		sc->vmbus_mnf2 = NULL;
 	}
 
 	CPU_FOREACH(cpu) {
 		if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) {
 			hyperv_dmamem_free(
 			    VMBUS_PCPU_PTR(sc, message_dma, cpu),
 			    VMBUS_PCPU_GET(sc, message, cpu));
 			VMBUS_PCPU_GET(sc, message, cpu) = NULL;
 		}
 		if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) {
 			hyperv_dmamem_free(
 			    VMBUS_PCPU_PTR(sc, event_flags_dma, cpu),
 			    VMBUS_PCPU_GET(sc, event_flags, cpu));
 			VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL;
 		}
 	}
 }
 
 static int
 vmbus_intr_setup(struct vmbus_softc *sc)
 {
 	int cpu;
 
 	CPU_FOREACH(cpu) {
 		char buf[MAXCOMLEN + 1];
 		cpuset_t cpu_mask;
 
 		/* Allocate an interrupt counter for Hyper-V interrupt */
 		snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu);
 		intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu));
 
 		/*
 		 * Setup taskqueue to handle events.  Task will be per-
 		 * channel.
 		 */
 		VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast(
 		    "hyperv event", M_WAITOK, taskqueue_thread_enqueue,
 		    VMBUS_PCPU_PTR(sc, event_tq, cpu));
 		CPU_SETOF(cpu, &cpu_mask);
 		taskqueue_start_threads_cpuset(
 		    VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET, &cpu_mask,
 		    "hvevent%d", cpu);
 
 		/*
 		 * Setup tasks and taskqueues to handle messages.
 		 */
 		VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast(
 		    "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
 		    VMBUS_PCPU_PTR(sc, message_tq, cpu));
 		CPU_SETOF(cpu, &cpu_mask);
 		taskqueue_start_threads_cpuset(
 		    VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask,
 		    "hvmsg%d", cpu);
 		TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0,
 		    vmbus_msg_task, sc);
 	}
 
 	/*
 	 * All Hyper-V ISR required resources are setup, now let's find a
 	 * free IDT vector for Hyper-V ISR and set it up.
 	 */
 	sc->vmbus_idtvec = lapic_ipi_alloc(IDTVEC(vmbus_isr));
 	if (sc->vmbus_idtvec < 0) {
 		device_printf(sc->vmbus_dev, "cannot find free IDT vector\n");
 		return ENXIO;
 	}
 	if(bootverbose) {
 		device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n",
 		    sc->vmbus_idtvec);
 	}
 	return 0;
 }
 
 static void
 vmbus_intr_teardown(struct vmbus_softc *sc)
 {
 	int cpu;
 
 	if (sc->vmbus_idtvec >= 0) {
 		lapic_ipi_free(sc->vmbus_idtvec);
 		sc->vmbus_idtvec = -1;
 	}
 
 	CPU_FOREACH(cpu) {
 		if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) {
 			taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu));
 			VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL;
 		}
 		if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) {
 			taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu),
 			    VMBUS_PCPU_PTR(sc, message_task, cpu));
 			taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu));
 			VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL;
 		}
 	}
 }
 
 static int
 vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
 {
 	return (ENOENT);
 }
 
 static int
 vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
 {
 	const struct vmbus_channel *chan;
 	char guidbuf[HYPERV_GUID_STRLEN];
 
 	chan = vmbus_get_channel(child);
 	if (chan == NULL) {
 		/* Event timer device, which does not belong to a channel */
 		return (0);
 	}
 
 	strlcat(buf, "classid=", buflen);
 	hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf));
 	strlcat(buf, guidbuf, buflen);
 
 	strlcat(buf, " deviceid=", buflen);
 	hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf));
 	strlcat(buf, guidbuf, buflen);
 
 	return (0);
 }
 
 int
 vmbus_add_child(struct vmbus_channel *chan)
 {
 	struct vmbus_softc *sc = chan->ch_vmbus;
 	device_t parent = sc->vmbus_dev;
 	int error = 0;
 
 	/* New channel has been offered */
 	vmbus_scan_newchan(sc);
 
 	chan->ch_dev = device_add_child(parent, NULL, -1);
 	if (chan->ch_dev == NULL) {
 		device_printf(parent, "device_add_child for chan%u failed\n",
 		    chan->ch_id);
 		error = ENXIO;
 		goto done;
 	}
 	device_set_ivars(chan->ch_dev, chan);
 
 done:
 	/* New device has been/should be added to vmbus. */
 	vmbus_scan_newdev(sc);
 	return error;
 }
 
 int
 vmbus_delete_child(struct vmbus_channel *chan)
 {
 	int error;
 
 	if (chan->ch_dev == NULL) {
 		/* Failed to add a device. */
 		return 0;
 	}
 
 	/*
 	 * XXXKYS: Ensure that this is the opposite of
 	 * device_add_child()
 	 */
 	mtx_lock(&Giant);
 	error = device_delete_child(chan->ch_vmbus->vmbus_dev, chan->ch_dev);
 	mtx_unlock(&Giant);
 
 	return error;
 }
 
 static int
 vmbus_sysctl_version(SYSCTL_HANDLER_ARGS)
 {
 	struct vmbus_softc *sc = arg1;
 	char verstr[16];
 
 	snprintf(verstr, sizeof(verstr), "%u.%u",
 	    VMBUS_VERSION_MAJOR(sc->vmbus_version),
 	    VMBUS_VERSION_MINOR(sc->vmbus_version));
 	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
 }
 
 static uint32_t
 vmbus_get_version_method(device_t bus, device_t dev)
 {
 	struct vmbus_softc *sc = device_get_softc(bus);
 
 	return sc->vmbus_version;
 }
 
 static int
 vmbus_probe_guid_method(device_t bus, device_t dev,
     const struct hyperv_guid *guid)
 {
 	const struct vmbus_channel *chan = vmbus_get_channel(dev);
 
 	if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0)
 		return 0;
 	return ENXIO;
 }
 
 static int
 vmbus_probe(device_t dev)
 {
 	char *id[] = { "VMBUS", NULL };
 
 	if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL ||
 	    device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV ||
 	    (hyperv_features & CPUID_HV_MSR_SYNIC) == 0)
 		return (ENXIO);
 
 	device_set_desc(dev, "Hyper-V Vmbus");
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 /**
  * @brief Main vmbus driver initialization routine.
  *
  * Here, we
  * - initialize the vmbus driver context
  * - setup various driver entry points
  * - invoke the vmbus hv main init routine
  * - get the irq resource
  * - invoke the vmbus to add the vmbus root device
  * - setup the vmbus root device
  * - retrieve the channel offers
  */
 static int
 vmbus_doattach(struct vmbus_softc *sc)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	int ret;
 
 	if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED)
 		return (0);
 	sc->vmbus_flags |= VMBUS_FLAG_ATTACHED;
 
 	mtx_init(&sc->vmbus_scan_lock, "vmbus scan", NULL, MTX_DEF);
 	sc->vmbus_gpadl = VMBUS_GPADL_START;
 	mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF);
 	TAILQ_INIT(&sc->vmbus_prichans);
 	sc->vmbus_chmap = malloc(
 	    sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF,
 	    M_WAITOK | M_ZERO);
 
 	/*
 	 * Create context for "post message" Hypercalls
 	 */
 	sc->vmbus_msg_hc = vmbus_msghc_ctx_create(
 	    bus_get_dma_tag(sc->vmbus_dev));
 	if (sc->vmbus_msg_hc == NULL) {
 		ret = ENXIO;
 		goto cleanup;
 	}
 
 	/*
 	 * Allocate DMA stuffs.
 	 */
 	ret = vmbus_dma_alloc(sc);
 	if (ret != 0)
 		goto cleanup;
 
 	/*
 	 * Setup interrupt.
 	 */
 	ret = vmbus_intr_setup(sc);
 	if (ret != 0)
 		goto cleanup;
 
 	/*
 	 * Setup SynIC.
 	 */
 	if (bootverbose)
 		device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started);
 	smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc);
 	sc->vmbus_flags |= VMBUS_FLAG_SYNIC;
 
 	/*
 	 * Initialize vmbus, e.g. connect to Hypervisor.
 	 */
 	ret = vmbus_init(sc);
 	if (ret != 0)
 		goto cleanup;
 
 	if (sc->vmbus_version == VMBUS_VERSION_WS2008 ||
 	    sc->vmbus_version == VMBUS_VERSION_WIN7)
 		sc->vmbus_event_proc = vmbus_event_proc_compat;
 	else
 		sc->vmbus_event_proc = vmbus_event_proc;
 
 	ret = vmbus_scan(sc);
 	if (ret != 0)
 		goto cleanup;
 
 	ctx = device_get_sysctl_ctx(sc->vmbus_dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev));
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    vmbus_sysctl_version, "A", "vmbus version");
 
 	return (ret);
 
 cleanup:
 	vmbus_intr_teardown(sc);
 	vmbus_dma_free(sc);
 	if (sc->vmbus_msg_hc != NULL) {
 		vmbus_msghc_ctx_destroy(sc->vmbus_msg_hc);
 		sc->vmbus_msg_hc = NULL;
 	}
 	free(sc->vmbus_chmap, M_DEVBUF);
 	mtx_destroy(&sc->vmbus_scan_lock);
 	mtx_destroy(&sc->vmbus_prichan_lock);
 
 	return (ret);
 }
 
 static void
 vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused)
 {
 }
 
 static int
 vmbus_attach(device_t dev)
 {
 	vmbus_sc = device_get_softc(dev);
 	vmbus_sc->vmbus_dev = dev;
 	vmbus_sc->vmbus_idtvec = -1;
 
 	/*
 	 * Event processing logic will be configured:
 	 * - After the vmbus protocol version negotiation.
 	 * - Before we request channel offers.
 	 */
 	vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy;
 
 #ifndef EARLY_AP_STARTUP
 	/* 
 	 * If the system has already booted and thread
 	 * scheduling is possible indicated by the global
 	 * cold set to zero, we just call the driver
 	 * initialization directly.
 	 */
 	if (!cold)
 #endif
 		vmbus_doattach(vmbus_sc);
 
 	return (0);
 }
 
 static void
 vmbus_sysinit(void *arg __unused)
 {
 	struct vmbus_softc *sc = vmbus_get_softc();
 
 	if (vm_guest != VM_GUEST_HV || sc == NULL)
 		return;
 
 #ifndef EARLY_AP_STARTUP
 	/* 
 	 * If the system has already booted and thread
 	 * scheduling is possible, as indicated by the
 	 * global cold set to zero, we just call the driver
 	 * initialization directly.
 	 */
 	if (!cold) 
 #endif
 		vmbus_doattach(sc);
 }
 
 static int
 vmbus_detach(device_t dev)
 {
 	struct vmbus_softc *sc = device_get_softc(dev);
 
 	vmbus_chan_destroy_all(sc);
 
 	vmbus_disconnect(sc);
 
 	if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) {
 		sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC;
 		smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL);
 	}
 
 	vmbus_intr_teardown(sc);
 	vmbus_dma_free(sc);
 
 	if (sc->vmbus_msg_hc != NULL) {
 		vmbus_msghc_ctx_destroy(sc->vmbus_msg_hc);
 		sc->vmbus_msg_hc = NULL;
 	}
 
 	free(sc->vmbus_chmap, M_DEVBUF);
 	mtx_destroy(&sc->vmbus_scan_lock);
 	mtx_destroy(&sc->vmbus_prichan_lock);
 
 	return (0);
 }
 
 static device_method_t vmbus_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,			vmbus_probe),
 	DEVMETHOD(device_attach,		vmbus_attach),
 	DEVMETHOD(device_detach,		vmbus_detach),
 	DEVMETHOD(device_shutdown,		bus_generic_shutdown),
 	DEVMETHOD(device_suspend,		bus_generic_suspend),
 	DEVMETHOD(device_resume,		bus_generic_resume),
 
 	/* Bus interface */
 	DEVMETHOD(bus_add_child,		bus_generic_add_child),
 	DEVMETHOD(bus_print_child,		bus_generic_print_child),
 	DEVMETHOD(bus_read_ivar,		vmbus_read_ivar),
 	DEVMETHOD(bus_child_pnpinfo_str,	vmbus_child_pnpinfo_str),
 
 	/* Vmbus interface */
 	DEVMETHOD(vmbus_get_version,		vmbus_get_version_method),
 	DEVMETHOD(vmbus_probe_guid,		vmbus_probe_guid_method),
 
 	DEVMETHOD_END
 };
 
 static driver_t vmbus_driver = {
 	"vmbus",
 	vmbus_methods,
 	sizeof(struct vmbus_softc)
 };
 
 static devclass_t vmbus_devclass;
 
 DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL);
 MODULE_DEPEND(vmbus, acpi, 1, 1, 1);
 MODULE_VERSION(vmbus, 1);
 
 #ifndef EARLY_AP_STARTUP
 /*
  * NOTE:
  * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is
  * initialized.
  */
 SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL);
 #endif
Index: head/sys/dev/hyperv/vmbus/vmbus_brvar.h
===================================================================
--- head/sys/dev/hyperv/vmbus/vmbus_brvar.h	(nonexistent)
+++ head/sys/dev/hyperv/vmbus/vmbus_brvar.h	(revision 303283)
@@ -0,0 +1,85 @@
+/*-
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __HYPERV_PRIV_H__
+#define __HYPERV_PRIV_H__
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sema.h>
+#include <sys/_iovec.h>
+
+#include <dev/hyperv/vmbus/vmbus_chanvar.h>
+
+struct vmbus_softc;
+
+/*
+ * Private, VM Bus functions
+ */
+struct sysctl_ctx_list;
+struct sysctl_oid;
+
+void	vmbus_br_sysctl_create(struct sysctl_ctx_list *ctx,
+	    struct sysctl_oid *br_tree, hv_vmbus_ring_buffer_info *br,
+	    const char *name);
+
+int			hv_vmbus_ring_buffer_init(
+				hv_vmbus_ring_buffer_info	*ring_info,
+				void				*buffer,
+				uint32_t			buffer_len);
+
+void			hv_ring_buffer_cleanup(
+				hv_vmbus_ring_buffer_info	*ring_info);
+
+int			hv_ring_buffer_write(
+				hv_vmbus_ring_buffer_info	*ring_info,
+				const struct iovec		iov[],
+				uint32_t			iovlen,
+				boolean_t			*need_sig);
+
+int			hv_ring_buffer_peek(
+				hv_vmbus_ring_buffer_info	*ring_info,
+				void				*buffer,
+				uint32_t			buffer_len);
+
+int			hv_ring_buffer_read(
+				hv_vmbus_ring_buffer_info	*ring_info,
+				void				*buffer,
+				uint32_t			buffer_len,
+				uint32_t			offset);
+
+void			hv_ring_buffer_read_begin(
+				hv_vmbus_ring_buffer_info	*ring_info);
+
+uint32_t		hv_ring_buffer_read_end(
+				hv_vmbus_ring_buffer_info	*ring_info);
+
+#endif  /* __HYPERV_PRIV_H__ */

Property changes on: head/sys/dev/hyperv/vmbus/vmbus_brvar.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/sys/dev/hyperv/vmbus/vmbus_chan.c
===================================================================
--- head/sys/dev/hyperv/vmbus/vmbus_chan.c	(revision 303282)
+++ head/sys/dev/hyperv/vmbus/vmbus_chan.c	(revision 303283)
@@ -1,1404 +1,1405 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <dev/hyperv/include/hyperv_busdma.h>
-#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
 #include <dev/hyperv/vmbus/hyperv_var.h>
 #include <dev/hyperv/vmbus/vmbus_reg.h>
 #include <dev/hyperv/vmbus/vmbus_var.h>
+#include <dev/hyperv/vmbus/vmbus_brvar.h>
+#include <dev/hyperv/vmbus/vmbus_chanvar.h>
 
 static void	vmbus_chan_update_evtflagcnt(struct vmbus_softc *,
 		    const struct vmbus_channel *);
 
 static void	vmbus_chan_task(void *, int);
 static void	vmbus_chan_task_nobatch(void *, int);
 static void	vmbus_chan_detach_task(void *, int);
 
 static void	vmbus_chan_msgproc_choffer(struct vmbus_softc *,
 		    const struct vmbus_message *);
 static void	vmbus_chan_msgproc_chrescind(struct vmbus_softc *,
 		    const struct vmbus_message *);
 
 /*
  * Vmbus channel message processing.
  */
 static const vmbus_chanmsg_proc_t
 vmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = {
 	VMBUS_CHANMSG_PROC(CHOFFER,	vmbus_chan_msgproc_choffer),
 	VMBUS_CHANMSG_PROC(CHRESCIND,	vmbus_chan_msgproc_chrescind),
 
 	VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP),
 	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP),
 	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP)
 };
 
 /*
  * Notify host that there are data pending on our TX bufring.
  */
 static __inline void
 vmbus_chan_signal_tx(const struct vmbus_channel *chan)
 {
 	atomic_set_long(chan->ch_evtflag, chan->ch_evtflag_mask);
 	if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF)
 		atomic_set_int(chan->ch_montrig, chan->ch_montrig_mask);
 	else
 		hypercall_signal_event(chan->ch_monprm_dma.hv_paddr);
 }
 
 static int
 vmbus_chan_sysctl_mnf(SYSCTL_HANDLER_ARGS)
 {
 	struct vmbus_channel *chan = arg1;
 	int mnf = 0;
 
 	if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF)
 		mnf = 1;
 	return sysctl_handle_int(oidp, &mnf, 0, req);
 }
 
 static void
 vmbus_chan_sysctl_create(struct vmbus_channel *chan)
 {
 	struct sysctl_oid *ch_tree, *chid_tree, *br_tree;
 	struct sysctl_ctx_list *ctx;
 	uint32_t ch_id;
 	char name[16];
 
 	/*
 	 * Add sysctl nodes related to this channel to this
 	 * channel's sysctl ctx, so that they can be destroyed
 	 * independently upon close of this channel, which can
 	 * happen even if the device is not detached.
 	 */
 	ctx = &chan->ch_sysctl_ctx;
 	sysctl_ctx_init(ctx);
 
 	/*
 	 * Create dev.NAME.UNIT.channel tree.
 	 */
 	ch_tree = SYSCTL_ADD_NODE(ctx,
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(chan->ch_dev)),
 	    OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 	if (ch_tree == NULL)
 		return;
 
 	/*
 	 * Create dev.NAME.UNIT.channel.CHANID tree.
 	 */
 	if (VMBUS_CHAN_ISPRIMARY(chan))
 		ch_id = chan->ch_id;
 	else
 		ch_id = chan->ch_prichan->ch_id;
 	snprintf(name, sizeof(name), "%d", ch_id);
 	chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
 	    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 	if (chid_tree == NULL)
 		return;
 
 	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
 		/*
 		 * Create dev.NAME.UNIT.channel.CHANID.sub tree.
 		 */
 		ch_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree),
 		    OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 		if (ch_tree == NULL)
 			return;
 
 		/*
 		 * Create dev.NAME.UNIT.channel.CHANID.sub.SUBIDX tree.
 		 *
 		 * NOTE:
 		 * chid_tree is changed to this new sysctl tree.
 		 */
 		snprintf(name, sizeof(name), "%d", chan->ch_subidx);
 		chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
 		    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 		if (chid_tree == NULL)
 			return;
 
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
 		    "chanid", CTLFLAG_RD, &chan->ch_id, 0, "channel id");
 	}
 
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
 	    "cpu", CTLFLAG_RD, &chan->ch_cpuid, 0, "owner CPU id");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
 	    "mnf", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 	    chan, 0, vmbus_chan_sysctl_mnf, "I",
 	    "has monitor notification facilities");
 
 	br_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
 	    "br", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 	if (br_tree != NULL) {
 		/*
 		 * Create sysctl tree for RX bufring.
 		 */
 		vmbus_br_sysctl_create(ctx, br_tree, &chan->ch_rxbr, "rx");
 		/*
 		 * Create sysctl tree for TX bufring.
 		 */
 		vmbus_br_sysctl_create(ctx, br_tree, &chan->ch_txbr, "tx");
 	}
 }
 
 int
 vmbus_chan_open(struct vmbus_channel *chan, int txbr_size, int rxbr_size,
     const void *udata, int udlen, vmbus_chan_callback_t cb, void *cbarg)
 {
 	struct vmbus_softc *sc = chan->ch_vmbus;
 	const struct vmbus_chanmsg_chopen_resp *resp;
 	const struct vmbus_message *msg;
 	struct vmbus_chanmsg_chopen *req;
 	struct vmbus_msghc *mh;
 	uint32_t status;
 	int error;
 	uint8_t *br;
 
 	if (udlen > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) {
 		device_printf(sc->vmbus_dev,
 		    "invalid udata len %d for chan%u\n", udlen, chan->ch_id);
 		return EINVAL;
 	}
 	KASSERT((txbr_size & PAGE_MASK) == 0,
 	    ("send bufring size is not multiple page"));
 	KASSERT((rxbr_size & PAGE_MASK) == 0,
 	    ("recv bufring size is not multiple page"));
 
 	if (atomic_testandset_int(&chan->ch_stflags,
 	    VMBUS_CHAN_ST_OPENED_SHIFT))
 		panic("double-open chan%u", chan->ch_id);
 
 	chan->ch_cb = cb;
 	chan->ch_cbarg = cbarg;
 
 	vmbus_chan_update_evtflagcnt(sc, chan);
 
 	chan->ch_tq = VMBUS_PCPU_GET(chan->ch_vmbus, event_tq, chan->ch_cpuid);
 	if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
 		TASK_INIT(&chan->ch_task, 0, vmbus_chan_task, chan);
 	else
 		TASK_INIT(&chan->ch_task, 0, vmbus_chan_task_nobatch, chan);
 
 	/*
 	 * Allocate the TX+RX bufrings.
 	 * XXX should use ch_dev dtag
 	 */
 	br = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
 	    PAGE_SIZE, 0, txbr_size + rxbr_size, &chan->ch_bufring_dma,
 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (br == NULL) {
 		device_printf(sc->vmbus_dev, "bufring allocation failed\n");
 		error = ENOMEM;
 		goto failed;
 	}
 	chan->ch_bufring = br;
 
 	/* TX bufring comes first */
 	hv_vmbus_ring_buffer_init(&chan->ch_txbr, br, txbr_size);
 	/* RX bufring immediately follows TX bufring */
 	hv_vmbus_ring_buffer_init(&chan->ch_rxbr, br + txbr_size, rxbr_size);
 
 	/* Create sysctl tree for this channel */
 	vmbus_chan_sysctl_create(chan);
 
 	/*
 	 * Connect the bufrings, both RX and TX, to this channel.
 	 */
 	error = vmbus_chan_gpadl_connect(chan, chan->ch_bufring_dma.hv_paddr,
 	    txbr_size + rxbr_size, &chan->ch_bufring_gpadl);
 	if (error) {
 		device_printf(sc->vmbus_dev,
 		    "failed to connect bufring GPADL to chan%u\n", chan->ch_id);
 		goto failed;
 	}
 
 	/*
 	 * Open channel w/ the bufring GPADL on the target CPU.
 	 */
 	mh = vmbus_msghc_get(sc, sizeof(*req));
 	if (mh == NULL) {
 		device_printf(sc->vmbus_dev,
 		    "can not get msg hypercall for chopen(chan%u)\n",
 		    chan->ch_id);
 		error = ENXIO;
 		goto failed;
 	}
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN;
 	req->chm_chanid = chan->ch_id;
 	req->chm_openid = chan->ch_id;
 	req->chm_gpadl = chan->ch_bufring_gpadl;
 	req->chm_vcpuid = chan->ch_vcpuid;
 	req->chm_txbr_pgcnt = txbr_size >> PAGE_SHIFT;
 	if (udlen > 0)
 		memcpy(req->chm_udata, udata, udlen);
 
 	error = vmbus_msghc_exec(sc, mh);
 	if (error) {
 		device_printf(sc->vmbus_dev,
 		    "chopen(chan%u) msg hypercall exec failed: %d\n",
 		    chan->ch_id, error);
 		vmbus_msghc_put(sc, mh);
 		goto failed;
 	}
 
 	msg = vmbus_msghc_wait_result(sc, mh);
 	resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data;
 	status = resp->chm_status;
 
 	vmbus_msghc_put(sc, mh);
 
 	if (status == 0) {
 		if (bootverbose) {
 			device_printf(sc->vmbus_dev, "chan%u opened\n",
 			    chan->ch_id);
 		}
 		return 0;
 	}
 
 	device_printf(sc->vmbus_dev, "failed to open chan%u\n", chan->ch_id);
 	error = ENXIO;
 
 failed:
 	if (chan->ch_bufring_gpadl) {
 		vmbus_chan_gpadl_disconnect(chan, chan->ch_bufring_gpadl);
 		chan->ch_bufring_gpadl = 0;
 	}
 	if (chan->ch_bufring != NULL) {
 		hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring);
 		chan->ch_bufring = NULL;
 	}
 	atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED);
 	return error;
 }
 
 int
 vmbus_chan_gpadl_connect(struct vmbus_channel *chan, bus_addr_t paddr,
     int size, uint32_t *gpadl0)
 {
 	struct vmbus_softc *sc = chan->ch_vmbus;
 	struct vmbus_msghc *mh;
 	struct vmbus_chanmsg_gpadl_conn *req;
 	const struct vmbus_message *msg;
 	size_t reqsz;
 	uint32_t gpadl, status;
 	int page_count, range_len, i, cnt, error;
 	uint64_t page_id;
 
 	/*
 	 * Preliminary checks.
 	 */
 
 	KASSERT((size & PAGE_MASK) == 0,
 	    ("invalid GPA size %d, not multiple page size", size));
 	page_count = size >> PAGE_SHIFT;
 
 	KASSERT((paddr & PAGE_MASK) == 0,
 	    ("GPA is not page aligned %jx", (uintmax_t)paddr));
 	page_id = paddr >> PAGE_SHIFT;
 
 	range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]);
 	/*
 	 * We don't support multiple GPA ranges.
 	 */
 	if (range_len > UINT16_MAX) {
 		device_printf(sc->vmbus_dev, "GPA too large, %d pages\n",
 		    page_count);
 		return EOPNOTSUPP;
 	}
 
 	/*
 	 * Allocate GPADL id.
 	 */
 	gpadl = vmbus_gpadl_alloc(sc);
 	*gpadl0 = gpadl;
 
 	/*
 	 * Connect this GPADL to the target channel.
 	 *
 	 * NOTE:
 	 * Since each message can only hold small set of page
 	 * addresses, several messages may be required to
 	 * complete the connection.
 	 */
 	if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX)
 		cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX;
 	else
 		cnt = page_count;
 	page_count -= cnt;
 
 	reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn,
 	    chm_range.gpa_page[cnt]);
 	mh = vmbus_msghc_get(sc, reqsz);
 	if (mh == NULL) {
 		device_printf(sc->vmbus_dev,
 		    "can not get msg hypercall for gpadl->chan%u\n",
 		    chan->ch_id);
 		return EIO;
 	}
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN;
 	req->chm_chanid = chan->ch_id;
 	req->chm_gpadl = gpadl;
 	req->chm_range_len = range_len;
 	req->chm_range_cnt = 1;
 	req->chm_range.gpa_len = size;
 	req->chm_range.gpa_ofs = 0;
 	for (i = 0; i < cnt; ++i)
 		req->chm_range.gpa_page[i] = page_id++;
 
 	error = vmbus_msghc_exec(sc, mh);
 	if (error) {
 		device_printf(sc->vmbus_dev,
 		    "gpadl->chan%u msg hypercall exec failed: %d\n",
 		    chan->ch_id, error);
 		vmbus_msghc_put(sc, mh);
 		return error;
 	}
 
 	while (page_count > 0) {
 		struct vmbus_chanmsg_gpadl_subconn *subreq;
 
 		if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX)
 			cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX;
 		else
 			cnt = page_count;
 		page_count -= cnt;
 
 		reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn,
 		    chm_gpa_page[cnt]);
 		vmbus_msghc_reset(mh, reqsz);
 
 		subreq = vmbus_msghc_dataptr(mh);
 		subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN;
 		subreq->chm_gpadl = gpadl;
 		for (i = 0; i < cnt; ++i)
 			subreq->chm_gpa_page[i] = page_id++;
 
 		vmbus_msghc_exec_noresult(mh);
 	}
 	KASSERT(page_count == 0, ("invalid page count %d", page_count));
 
 	msg = vmbus_msghc_wait_result(sc, mh);
 	status = ((const struct vmbus_chanmsg_gpadl_connresp *)
 	    msg->msg_data)->chm_status;
 
 	vmbus_msghc_put(sc, mh);
 
 	if (status != 0) {
 		device_printf(sc->vmbus_dev, "gpadl->chan%u failed: "
 		    "status %u\n", chan->ch_id, status);
 		return EIO;
 	} else {
 		if (bootverbose) {
 			device_printf(sc->vmbus_dev, "gpadl->chan%u "
 			    "succeeded\n", chan->ch_id);
 		}
 	}
 	return 0;
 }
 
 /*
  * Disconnect the GPA from the target channel
  */
 int
 vmbus_chan_gpadl_disconnect(struct vmbus_channel *chan, uint32_t gpadl)
 {
 	struct vmbus_softc *sc = chan->ch_vmbus;
 	struct vmbus_msghc *mh;
 	struct vmbus_chanmsg_gpadl_disconn *req;
 	int error;
 
 	mh = vmbus_msghc_get(sc, sizeof(*req));
 	if (mh == NULL) {
 		device_printf(sc->vmbus_dev,
 		    "can not get msg hypercall for gpa x->chan%u\n",
 		    chan->ch_id);
 		return EBUSY;
 	}
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN;
 	req->chm_chanid = chan->ch_id;
 	req->chm_gpadl = gpadl;
 
 	error = vmbus_msghc_exec(sc, mh);
 	if (error) {
 		device_printf(sc->vmbus_dev,
 		    "gpa x->chan%u msg hypercall exec failed: %d\n",
 		    chan->ch_id, error);
 		vmbus_msghc_put(sc, mh);
 		return error;
 	}
 
 	vmbus_msghc_wait_result(sc, mh);
 	/* Discard result; no useful information */
 	vmbus_msghc_put(sc, mh);
 
 	return 0;
 }
 
 static void
 vmbus_chan_close_internal(struct vmbus_channel *chan)
 {
 	struct vmbus_softc *sc = chan->ch_vmbus;
 	struct vmbus_msghc *mh;
 	struct vmbus_chanmsg_chclose *req;
 	struct taskqueue *tq = chan->ch_tq;
 	int error;
 
 	/* TODO: stringent check */
 	atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED);
 
 	/*
 	 * Free this channel's sysctl tree attached to its device's
 	 * sysctl tree.
 	 */
 	sysctl_ctx_free(&chan->ch_sysctl_ctx);
 
 	/*
 	 * Set ch_tq to NULL to avoid more requests be scheduled.
 	 * XXX pretty broken; need rework.
 	 */
 	chan->ch_tq = NULL;
 	taskqueue_drain(tq, &chan->ch_task);
 	chan->ch_cb = NULL;
 
 	/*
 	 * Close this channel.
 	 */
 	mh = vmbus_msghc_get(sc, sizeof(*req));
 	if (mh == NULL) {
 		device_printf(sc->vmbus_dev,
 		    "can not get msg hypercall for chclose(chan%u)\n",
 		    chan->ch_id);
 		return;
 	}
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE;
 	req->chm_chanid = chan->ch_id;
 
 	error = vmbus_msghc_exec_noresult(mh);
 	vmbus_msghc_put(sc, mh);
 
 	if (error) {
 		device_printf(sc->vmbus_dev,
 		    "chclose(chan%u) msg hypercall exec failed: %d\n",
 		    chan->ch_id, error);
 		return;
 	} else if (bootverbose) {
 		device_printf(sc->vmbus_dev, "close chan%u\n", chan->ch_id);
 	}
 
 	/*
 	 * Disconnect the TX+RX bufrings from this channel.
 	 */
 	if (chan->ch_bufring_gpadl) {
 		vmbus_chan_gpadl_disconnect(chan, chan->ch_bufring_gpadl);
 		chan->ch_bufring_gpadl = 0;
 	}
 
 	/*
 	 * Destroy the TX+RX bufrings.
 	 */
 	hv_ring_buffer_cleanup(&chan->ch_txbr);
 	hv_ring_buffer_cleanup(&chan->ch_rxbr);
 	if (chan->ch_bufring != NULL) {
 		hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring);
 		chan->ch_bufring = NULL;
 	}
 }
 
 /*
  * Caller should make sure that all sub-channels have
  * been added to 'chan' and all to-be-closed channels
  * are not being opened.
  */
 void
 vmbus_chan_close(struct vmbus_channel *chan)
 {
 	int subchan_cnt;
 
 	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
 		/*
 		 * Sub-channel is closed when its primary channel
 		 * is closed; done.
 		 */
 		return;
 	}
 
 	/*
 	 * Close all sub-channels, if any.
 	 */
 	subchan_cnt = chan->ch_subchan_cnt;
 	if (subchan_cnt > 0) {
 		struct vmbus_channel **subchan;
 		int i;
 
 		subchan = vmbus_subchan_get(chan, subchan_cnt);
 		for (i = 0; i < subchan_cnt; ++i)
 			vmbus_chan_close_internal(subchan[i]);
 		vmbus_subchan_rel(subchan, subchan_cnt);
 	}
 
 	/* Then close the primary channel. */
 	vmbus_chan_close_internal(chan);
 }
 
 int
 vmbus_chan_send(struct vmbus_channel *chan, uint16_t type, uint16_t flags,
     void *data, int dlen, uint64_t xactid)
 {
 	struct vmbus_chanpkt pkt;
 	int pktlen, pad_pktlen, hlen, error;
 	uint64_t pad = 0;
 	struct iovec iov[3];
 	boolean_t send_evt;
 
 	hlen = sizeof(pkt);
 	pktlen = hlen + dlen;
 	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
 
 	pkt.cp_hdr.cph_type = type;
 	pkt.cp_hdr.cph_flags = flags;
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
 	pkt.cp_hdr.cph_xactid = xactid;
 
 	iov[0].iov_base = &pkt;
 	iov[0].iov_len = hlen;
 	iov[1].iov_base = data;
 	iov[1].iov_len = dlen;
 	iov[2].iov_base = &pad;
 	iov[2].iov_len = pad_pktlen - pktlen;
 
 	error = hv_ring_buffer_write(&chan->ch_txbr, iov, 3, &send_evt);
 	if (!error && send_evt)
 		vmbus_chan_signal_tx(chan);
 	return error;
 }
 
 int
 vmbus_chan_send_sglist(struct vmbus_channel *chan,
     struct vmbus_gpa sg[], int sglen, void *data, int dlen, uint64_t xactid)
 {
 	struct vmbus_chanpkt_sglist pkt;
 	int pktlen, pad_pktlen, hlen, error;
 	struct iovec iov[4];
 	boolean_t send_evt;
 	uint64_t pad = 0;
 
 	KASSERT(sglen < VMBUS_CHAN_SGLIST_MAX,
 	    ("invalid sglist len %d", sglen));
 
 	hlen = __offsetof(struct vmbus_chanpkt_sglist, cp_gpa[sglen]);
 	pktlen = hlen + dlen;
 	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
 
 	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
 	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
 	pkt.cp_hdr.cph_xactid = xactid;
 	pkt.cp_rsvd = 0;
 	pkt.cp_gpa_cnt = sglen;
 
 	iov[0].iov_base = &pkt;
 	iov[0].iov_len = sizeof(pkt);
 	iov[1].iov_base = sg;
 	iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen;
 	iov[2].iov_base = data;
 	iov[2].iov_len = dlen;
 	iov[3].iov_base = &pad;
 	iov[3].iov_len = pad_pktlen - pktlen;
 
 	error = hv_ring_buffer_write(&chan->ch_txbr, iov, 4, &send_evt);
 	if (!error && send_evt)
 		vmbus_chan_signal_tx(chan);
 	return error;
 }
 
 int
 vmbus_chan_send_prplist(struct vmbus_channel *chan,
     struct vmbus_gpa_range *prp, int prp_cnt, void *data, int dlen,
     uint64_t xactid)
 {
 	struct vmbus_chanpkt_prplist pkt;
 	int pktlen, pad_pktlen, hlen, error;
 	struct iovec iov[4];
 	boolean_t send_evt;
 	uint64_t pad = 0;
 
 	KASSERT(prp_cnt < VMBUS_CHAN_PRPLIST_MAX,
 	    ("invalid prplist entry count %d", prp_cnt));
 
 	hlen = __offsetof(struct vmbus_chanpkt_prplist,
 	    cp_range[0].gpa_page[prp_cnt]);
 	pktlen = hlen + dlen;
 	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
 
 	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
 	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
 	pkt.cp_hdr.cph_xactid = xactid;
 	pkt.cp_rsvd = 0;
 	pkt.cp_range_cnt = 1;
 
 	iov[0].iov_base = &pkt;
 	iov[0].iov_len = sizeof(pkt);
 	iov[1].iov_base = prp;
 	iov[1].iov_len = __offsetof(struct vmbus_gpa_range, gpa_page[prp_cnt]);
 	iov[2].iov_base = data;
 	iov[2].iov_len = dlen;
 	iov[3].iov_base = &pad;
 	iov[3].iov_len = pad_pktlen - pktlen;
 
 	error = hv_ring_buffer_write(&chan->ch_txbr, iov, 4, &send_evt);
 	if (!error && send_evt)
 		vmbus_chan_signal_tx(chan);
 	return error;
 }
 
 int
 vmbus_chan_recv(struct vmbus_channel *chan, void *data, int *dlen0,
     uint64_t *xactid)
 {
 	struct vmbus_chanpkt_hdr pkt;
 	int error, dlen, hlen;
 
 	error = hv_ring_buffer_peek(&chan->ch_rxbr, &pkt, sizeof(pkt));
 	if (error)
 		return error;
 
 	hlen = VMBUS_CHANPKT_GETLEN(pkt.cph_hlen);
 	dlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen) - hlen;
 
 	if (*dlen0 < dlen) {
 		/* Return the size of this packet's data. */
 		*dlen0 = dlen;
 		return ENOBUFS;
 	}
 
 	*xactid = pkt.cph_xactid;
 	*dlen0 = dlen;
 
 	/* Skip packet header */
 	error = hv_ring_buffer_read(&chan->ch_rxbr, data, dlen, hlen);
 	KASSERT(!error, ("hv_ring_buffer_read failed"));
 
 	return 0;
 }
 
 int
 vmbus_chan_recv_pkt(struct vmbus_channel *chan,
     struct vmbus_chanpkt_hdr *pkt0, int *pktlen0)
 {
 	struct vmbus_chanpkt_hdr pkt;
 	int error, pktlen;
 
 	error = hv_ring_buffer_peek(&chan->ch_rxbr, &pkt, sizeof(pkt));
 	if (error)
 		return error;
 
 	pktlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen);
 	if (*pktlen0 < pktlen) {
 		/* Return the size of this packet. */
 		*pktlen0 = pktlen;
 		return ENOBUFS;
 	}
 	*pktlen0 = pktlen;
 
 	/* Include packet header */
 	error = hv_ring_buffer_read(&chan->ch_rxbr, pkt0, pktlen, 0);
 	KASSERT(!error, ("hv_ring_buffer_read failed"));
 
 	return 0;
 }
 
 static void
 vmbus_chan_task(void *xchan, int pending __unused)
 {
 	struct vmbus_channel *chan = xchan;
 	vmbus_chan_callback_t cb = chan->ch_cb;
 	void *cbarg = chan->ch_cbarg;
 
 	/*
 	 * Optimize host to guest signaling by ensuring:
 	 * 1. While reading the channel, we disable interrupts from
 	 *    host.
 	 * 2. Ensure that we process all posted messages from the host
 	 *    before returning from this callback.
 	 * 3. Once we return, enable signaling from the host. Once this
 	 *    state is set we check to see if additional packets are
 	 *    available to read. In this case we repeat the process.
 	 *
 	 * NOTE: Interrupt has been disabled in the ISR.
 	 */
 	for (;;) {
 		uint32_t left;
 
 		cb(chan, cbarg);
 
 		left = hv_ring_buffer_read_end(&chan->ch_rxbr);
 		if (left == 0) {
 			/* No more data in RX bufring; done */
 			break;
 		}
 		hv_ring_buffer_read_begin(&chan->ch_rxbr);
 	}
 }
 
 static void
 vmbus_chan_task_nobatch(void *xchan, int pending __unused)
 {
 	struct vmbus_channel *chan = xchan;
 
 	chan->ch_cb(chan, chan->ch_cbarg);
 }
 
 static __inline void
 vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags,
     int flag_cnt)
 {
 	int f;
 
 	for (f = 0; f < flag_cnt; ++f) {
 		uint32_t chid_base;
 		u_long flags;
 		int chid_ofs;
 
 		if (event_flags[f] == 0)
 			continue;
 
 		flags = atomic_swap_long(&event_flags[f], 0);
 		chid_base = f << VMBUS_EVTFLAG_SHIFT;
 
 		while ((chid_ofs = ffsl(flags)) != 0) {
 			struct vmbus_channel *chan;
 
 			--chid_ofs; /* NOTE: ffsl is 1-based */
 			flags &= ~(1UL << chid_ofs);
 
 			chan = sc->vmbus_chmap[chid_base + chid_ofs];
 
 			/* if channel is closed or closing */
 			if (chan == NULL || chan->ch_tq == NULL)
 				continue;
 
 			if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
 				hv_ring_buffer_read_begin(&chan->ch_rxbr);
 			taskqueue_enqueue(chan->ch_tq, &chan->ch_task);
 		}
 	}
 }
 
 void
 vmbus_event_proc(struct vmbus_softc *sc, int cpu)
 {
 	struct vmbus_evtflags *eventf;
 
 	/*
 	 * On Host with Win8 or above, the event page can be checked directly
 	 * to get the id of the channel that has the pending interrupt.
 	 */
 	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
 	vmbus_event_flags_proc(sc, eventf->evt_flags,
 	    VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
 }
 
 void
 vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
 {
 	struct vmbus_evtflags *eventf;
 
 	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
 	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
 		vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags,
 		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
 	}
 }
 
 static void
 vmbus_chan_update_evtflagcnt(struct vmbus_softc *sc,
     const struct vmbus_channel *chan)
 {
 	volatile int *flag_cnt_ptr;
 	int flag_cnt;
 
 	flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1;
 	flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->ch_cpuid);
 
 	for (;;) {
 		int old_flag_cnt;
 
 		old_flag_cnt = *flag_cnt_ptr;
 		if (old_flag_cnt >= flag_cnt)
 			break;
 		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
 			if (bootverbose) {
 				device_printf(sc->vmbus_dev,
 				    "channel%u update cpu%d flag_cnt to %d\n",
 				    chan->ch_id, chan->ch_cpuid, flag_cnt);
 			}
 			break;
 		}
 	}
 }
 
 static struct vmbus_channel *
 vmbus_chan_alloc(struct vmbus_softc *sc)
 {
 	struct vmbus_channel *chan;
 
 	chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO);
 
 	chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
 	    HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param),
 	    &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (chan->ch_monprm == NULL) {
 		device_printf(sc->vmbus_dev, "monprm alloc failed\n");
 		free(chan, M_DEVBUF);
 		return NULL;
 	}
 
 	chan->ch_vmbus = sc;
 	mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF);
 	TAILQ_INIT(&chan->ch_subchans);
 	TASK_INIT(&chan->ch_detach_task, 0, vmbus_chan_detach_task, chan);
 
 	return chan;
 }
 
 static void
 vmbus_chan_free(struct vmbus_channel *chan)
 {
 	/* TODO: assert sub-channel list is empty */
 	/* TODO: asset no longer on the primary channel's sub-channel list */
 	/* TODO: asset no longer on the vmbus channel list */
 	hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm);
 	mtx_destroy(&chan->ch_subchan_lock);
 	free(chan, M_DEVBUF);
 }
 
 static int
 vmbus_chan_add(struct vmbus_channel *newchan)
 {
 	struct vmbus_softc *sc = newchan->ch_vmbus;
 	struct vmbus_channel *prichan;
 
 	if (newchan->ch_id == 0) {
 		/*
 		 * XXX
 		 * Chan0 will neither be processed nor should be offered;
 		 * skip it.
 		 */
 		device_printf(sc->vmbus_dev, "got chan0 offer, discard\n");
 		return EINVAL;
 	} else if (newchan->ch_id >= VMBUS_CHAN_MAX) {
 		device_printf(sc->vmbus_dev, "invalid chan%u offer\n",
 		    newchan->ch_id);
 		return EINVAL;
 	}
 	sc->vmbus_chmap[newchan->ch_id] = newchan;
 
 	if (bootverbose) {
 		device_printf(sc->vmbus_dev, "chan%u subidx%u offer\n",
 		    newchan->ch_id, newchan->ch_subidx);
 	}
 
 	mtx_lock(&sc->vmbus_prichan_lock);
 	TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) {
 		/*
 		 * Sub-channel will have the same type GUID and instance
 		 * GUID as its primary channel.
 		 */
 		if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type,
 		    sizeof(struct hyperv_guid)) == 0 &&
 		    memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst,
 		    sizeof(struct hyperv_guid)) == 0)
 			break;
 	}
 	if (VMBUS_CHAN_ISPRIMARY(newchan)) {
 		if (prichan == NULL) {
 			/* Install the new primary channel */
 			TAILQ_INSERT_TAIL(&sc->vmbus_prichans, newchan,
 			    ch_prilink);
 			mtx_unlock(&sc->vmbus_prichan_lock);
 			return 0;
 		} else {
 			mtx_unlock(&sc->vmbus_prichan_lock);
 			device_printf(sc->vmbus_dev, "duplicated primary "
 			    "chan%u\n", newchan->ch_id);
 			return EINVAL;
 		}
 	} else { /* Sub-channel */
 		if (prichan == NULL) {
 			mtx_unlock(&sc->vmbus_prichan_lock);
 			device_printf(sc->vmbus_dev, "no primary chan for "
 			    "chan%u\n", newchan->ch_id);
 			return EINVAL;
 		}
 		/*
 		 * Found the primary channel for this sub-channel and
 		 * move on.
 		 *
 		 * XXX refcnt prichan
 		 */
 	}
 	mtx_unlock(&sc->vmbus_prichan_lock);
 
 	/*
 	 * This is a sub-channel; link it with the primary channel.
 	 */
 	KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan),
 	    ("new channel is not sub-channel"));
 	KASSERT(prichan != NULL, ("no primary channel"));
 
 	newchan->ch_prichan = prichan;
 	newchan->ch_dev = prichan->ch_dev;
 
 	mtx_lock(&prichan->ch_subchan_lock);
 	TAILQ_INSERT_TAIL(&prichan->ch_subchans, newchan, ch_sublink);
 	/*
 	 * Bump up sub-channel count and notify anyone that is
 	 * interested in this sub-channel, after this sub-channel
 	 * is setup.
 	 */
 	prichan->ch_subchan_cnt++;
 	mtx_unlock(&prichan->ch_subchan_lock);
 	wakeup(prichan);
 
 	return 0;
 }
 
 void
 vmbus_chan_cpu_set(struct vmbus_channel *chan, int cpu)
 {
 	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
 
 	if (chan->ch_vmbus->vmbus_version == VMBUS_VERSION_WS2008 ||
 	    chan->ch_vmbus->vmbus_version == VMBUS_VERSION_WIN7) {
 		/* Only cpu0 is supported */
 		cpu = 0;
 	}
 
 	chan->ch_cpuid = cpu;
 	chan->ch_vcpuid = VMBUS_PCPU_GET(chan->ch_vmbus, vcpuid, cpu);
 
 	if (bootverbose) {
 		printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
 		    chan->ch_id, chan->ch_cpuid, chan->ch_vcpuid);
 	}
 }
 
 void
 vmbus_chan_cpu_rr(struct vmbus_channel *chan)
 {
 	static uint32_t vmbus_chan_nextcpu;
 	int cpu;
 
 	cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus;
 	vmbus_chan_cpu_set(chan, cpu);
 }
 
 static void
 vmbus_chan_cpu_default(struct vmbus_channel *chan)
 {
 	/*
 	 * By default, pin the channel to cpu0.  Devices having
 	 * special channel-cpu mapping requirement should call
 	 * vmbus_chan_cpu_{set,rr}().
 	 */
 	vmbus_chan_cpu_set(chan, 0);
 }
 
 static void
 vmbus_chan_msgproc_choffer(struct vmbus_softc *sc,
     const struct vmbus_message *msg)
 {
 	const struct vmbus_chanmsg_choffer *offer;
 	struct vmbus_channel *chan;
 	int error;
 
 	offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data;
 
 	chan = vmbus_chan_alloc(sc);
 	if (chan == NULL) {
 		device_printf(sc->vmbus_dev, "allocate chan%u failed\n",
 		    offer->chm_chanid);
 		return;
 	}
 
 	chan->ch_id = offer->chm_chanid;
 	chan->ch_subidx = offer->chm_subidx;
 	chan->ch_guid_type = offer->chm_chtype;
 	chan->ch_guid_inst = offer->chm_chinst;
 
 	/* Batch reading is on by default */
 	chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
 
 	chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT;
 	if (sc->vmbus_version != VMBUS_VERSION_WS2008)
 		chan->ch_monprm->mp_connid = offer->chm_connid;
 
 	if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) {
 		int trig_idx;
 
 		/*
 		 * Setup MNF stuffs.
 		 */
 		chan->ch_txflags |= VMBUS_CHAN_TXF_HASMNF;
 
 		trig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN;
 		if (trig_idx >= VMBUS_MONTRIGS_MAX)
 			panic("invalid monitor trigger %u", offer->chm_montrig);
 		chan->ch_montrig =
 		    &sc->vmbus_mnf2->mnf_trigs[trig_idx].mt_pending;
 
 		chan->ch_montrig_mask =
 		    1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
 	}
 
 	/*
 	 * Setup event flag.
 	 */
 	chan->ch_evtflag =
 	    &sc->vmbus_tx_evtflags[chan->ch_id >> VMBUS_EVTFLAG_SHIFT];
 	chan->ch_evtflag_mask = 1UL << (chan->ch_id & VMBUS_EVTFLAG_MASK);
 
 	/* Select default cpu for this channel. */
 	vmbus_chan_cpu_default(chan);
 
 	error = vmbus_chan_add(chan);
 	if (error) {
 		device_printf(sc->vmbus_dev, "add chan%u failed: %d\n",
 		    chan->ch_id, error);
 		vmbus_chan_free(chan);
 		return;
 	}
 
 	if (VMBUS_CHAN_ISPRIMARY(chan)) {
 		/*
 		 * Add device for this primary channel.
 		 *
 		 * NOTE:
 		 * Error is ignored here; don't have much to do if error
 		 * really happens.
 		 */
 		vmbus_add_child(chan);
 	}
 }
 
 /*
  * XXX pretty broken; need rework.
  */
 static void
 vmbus_chan_msgproc_chrescind(struct vmbus_softc *sc,
     const struct vmbus_message *msg)
 {
 	const struct vmbus_chanmsg_chrescind *note;
 	struct vmbus_channel *chan;
 
 	note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data;
 	if (note->chm_chanid > VMBUS_CHAN_MAX) {
 		device_printf(sc->vmbus_dev, "invalid rescinded chan%u\n",
 		    note->chm_chanid);
 		return;
 	}
 
 	if (bootverbose) {
 		device_printf(sc->vmbus_dev, "chan%u rescinded\n",
 		    note->chm_chanid);
 	}
 
 	chan = sc->vmbus_chmap[note->chm_chanid];
 	if (chan == NULL)
 		return;
 	sc->vmbus_chmap[note->chm_chanid] = NULL;
 
 	taskqueue_enqueue(taskqueue_thread, &chan->ch_detach_task);
 }
 
 static void
 vmbus_chan_detach_task(void *xchan, int pending __unused)
 {
 	struct vmbus_channel *chan = xchan;
 
 	if (VMBUS_CHAN_ISPRIMARY(chan)) {
 		/* Only primary channel owns the device */
 		vmbus_delete_child(chan);
 		/* NOTE: DO NOT free primary channel for now */
 	} else {
 		struct vmbus_softc *sc = chan->ch_vmbus;
 		struct vmbus_channel *pri_chan = chan->ch_prichan;
 		struct vmbus_chanmsg_chfree *req;
 		struct vmbus_msghc *mh;
 		int error;
 
 		mh = vmbus_msghc_get(sc, sizeof(*req));
 		if (mh == NULL) {
 			device_printf(sc->vmbus_dev,
 			    "can not get msg hypercall for chfree(chan%u)\n",
 			    chan->ch_id);
 			goto remove;
 		}
 
 		req = vmbus_msghc_dataptr(mh);
 		req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE;
 		req->chm_chanid = chan->ch_id;
 
 		error = vmbus_msghc_exec_noresult(mh);
 		vmbus_msghc_put(sc, mh);
 
 		if (error) {
 			device_printf(sc->vmbus_dev,
 			    "chfree(chan%u) failed: %d",
 			    chan->ch_id, error);
 			/* NOTE: Move on! */
 		} else {
 			if (bootverbose) {
 				device_printf(sc->vmbus_dev, "chan%u freed\n",
 				    chan->ch_id);
 			}
 		}
 remove:
 		mtx_lock(&pri_chan->ch_subchan_lock);
 		TAILQ_REMOVE(&pri_chan->ch_subchans, chan, ch_sublink);
 		KASSERT(pri_chan->ch_subchan_cnt > 0,
 		    ("invalid subchan_cnt %d", pri_chan->ch_subchan_cnt));
 		pri_chan->ch_subchan_cnt--;
 		mtx_unlock(&pri_chan->ch_subchan_lock);
 		wakeup(pri_chan);
 
 		vmbus_chan_free(chan);
 	}
 }
 
 /*
  * Detach all devices and destroy the corresponding primary channels.
  */
 void
 vmbus_chan_destroy_all(struct vmbus_softc *sc)
 {
 	struct vmbus_channel *chan;
 
 	mtx_lock(&sc->vmbus_prichan_lock);
 	while ((chan = TAILQ_FIRST(&sc->vmbus_prichans)) != NULL) {
 		KASSERT(VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel"));
 		TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink);
 		mtx_unlock(&sc->vmbus_prichan_lock);
 
 		vmbus_delete_child(chan);
 		vmbus_chan_free(chan);
 
 		mtx_lock(&sc->vmbus_prichan_lock);
 	}
 	bzero(sc->vmbus_chmap,
 	    sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX);
 	mtx_unlock(&sc->vmbus_prichan_lock);
 }
 
 /*
  * The channel whose vcpu binding is closest to the currect vcpu will
  * be selected.
  * If no multi-channel, always select primary channel.
  */
 struct vmbus_channel *
 vmbus_chan_cpu2chan(struct vmbus_channel *prichan, int cpu)
 {
 	struct vmbus_channel *sel, *chan;
 	uint32_t vcpu, sel_dist;
 
 	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpuid %d", cpu));
 	if (TAILQ_EMPTY(&prichan->ch_subchans))
 		return prichan;
 
 	vcpu = VMBUS_PCPU_GET(prichan->ch_vmbus, vcpuid, cpu);
 
 #define CHAN_VCPU_DIST(ch, vcpu)		\
 	(((ch)->ch_vcpuid > (vcpu)) ?		\
 	 ((ch)->ch_vcpuid - (vcpu)) : ((vcpu) - (ch)->ch_vcpuid))
 
 #define CHAN_SELECT(ch)				\
 do {						\
 	sel = ch;				\
 	sel_dist = CHAN_VCPU_DIST(ch, vcpu);	\
 } while (0)
 
 	CHAN_SELECT(prichan);
 
 	mtx_lock(&prichan->ch_subchan_lock);
 	TAILQ_FOREACH(chan, &prichan->ch_subchans, ch_sublink) {
 		uint32_t dist;
 
 		KASSERT(chan->ch_stflags & VMBUS_CHAN_ST_OPENED,
 		    ("chan%u is not opened", chan->ch_id));
 
 		if (chan->ch_vcpuid == vcpu) {
 			/* Exact match; done */
 			CHAN_SELECT(chan);
 			break;
 		}
 
 		dist = CHAN_VCPU_DIST(chan, vcpu);
 		if (sel_dist <= dist) {
 			/* Far or same distance; skip */
 			continue;
 		}
 
 		/* Select the closer channel. */
 		CHAN_SELECT(chan);
 	}
 	mtx_unlock(&prichan->ch_subchan_lock);
 
 #undef CHAN_SELECT
 #undef CHAN_VCPU_DIST
 
 	return sel;
 }
 
 struct vmbus_channel **
 vmbus_subchan_get(struct vmbus_channel *pri_chan, int subchan_cnt)
 {
 	struct vmbus_channel **ret, *chan;
 	int i;
 
 	ret = malloc(subchan_cnt * sizeof(struct vmbus_channel *), M_TEMP,
 	    M_WAITOK);
 
 	mtx_lock(&pri_chan->ch_subchan_lock);
 
 	while (pri_chan->ch_subchan_cnt < subchan_cnt)
 		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0);
 
 	i = 0;
 	TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) {
 		/* TODO: refcnt chan */
 		ret[i] = chan;
 
 		++i;
 		if (i == subchan_cnt)
 			break;
 	}
 	KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
 	    pri_chan->ch_subchan_cnt, subchan_cnt));
 
 	mtx_unlock(&pri_chan->ch_subchan_lock);
 
 	return ret;
 }
 
 void
 vmbus_subchan_rel(struct vmbus_channel **subchan, int subchan_cnt __unused)
 {
 
 	free(subchan, M_TEMP);
 }
 
 void
 vmbus_subchan_drain(struct vmbus_channel *pri_chan)
 {
 	mtx_lock(&pri_chan->ch_subchan_lock);
 	while (pri_chan->ch_subchan_cnt > 0)
 		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0);
 	mtx_unlock(&pri_chan->ch_subchan_lock);
 }
 
 void
 vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
 {
 	vmbus_chanmsg_proc_t msg_proc;
 	uint32_t msg_type;
 
 	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
 	KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX,
 	    ("invalid message type %u", msg_type));
 
 	msg_proc = vmbus_chan_msgprocs[msg_type];
 	if (msg_proc != NULL)
 		msg_proc(sc, msg);
 }
 
 void
 vmbus_chan_set_readbatch(struct vmbus_channel *chan, bool on)
 {
 	if (!on)
 		chan->ch_flags &= ~VMBUS_CHAN_FLAG_BATCHREAD;
 	else
 		chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
 }
 
 uint32_t
 vmbus_chan_id(const struct vmbus_channel *chan)
 {
 	return chan->ch_id;
 }
 
 uint32_t
 vmbus_chan_subidx(const struct vmbus_channel *chan)
 {
 	return chan->ch_subidx;
 }
 
 bool
 vmbus_chan_is_primary(const struct vmbus_channel *chan)
 {
 	if (VMBUS_CHAN_ISPRIMARY(chan))
 		return true;
 	else
 		return false;
 }
 
 const struct hyperv_guid *
 vmbus_chan_guid_inst(const struct vmbus_channel *chan)
 {
 	return &chan->ch_guid_inst;
 }