Index: head/sys/dev/hyperv/vmbus/hv_channel.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_channel.c	(revision 296180)
+++ head/sys/dev/hyperv/vmbus/hv_channel.c	(revision 296181)
@@ -1,943 +1,1009 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
+#include <sys/sysctl.h>
 #include <machine/bus.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include "hv_vmbus_priv.h"
 
 static int 	vmbus_channel_create_gpadl_header(
 			/* must be phys and virt contiguous*/
 			void*				contig_buffer,
 			/* page-size multiple */
 			uint32_t 			size,
 			hv_vmbus_channel_msg_info**	msg_info,
 			uint32_t*			message_count);
 
 static void 	vmbus_channel_set_event(hv_vmbus_channel* channel);
 static void	VmbusProcessChannelEvent(void* channel, int pending);
 
 /**
  *  @brief Trigger an event notification on the specified channel
  */
 static void
 vmbus_channel_set_event(hv_vmbus_channel *channel)
 {
 	hv_vmbus_monitor_page *monitor_page;
 
 	if (channel->offer_msg.monitor_allocated) {
 		/* Each uint32_t represents 32 channels */
 		synch_set_bit((channel->offer_msg.child_rel_id & 31),
 			((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
 				+ ((channel->offer_msg.child_rel_id >> 5))));
 
 		monitor_page = (hv_vmbus_monitor_page *)
 			hv_vmbus_g_connection.monitor_page_2;
 
 		synch_set_bit(channel->monitor_bit,
 			(uint32_t *)&monitor_page->
 				trigger_group[channel->monitor_group].u.pending);
 	} else {
 		hv_vmbus_set_event(channel);
 	}
 
 }
 
+static void
+hv_vmbus_channel_stat(hv_vmbus_channel* channel)
+{
+	device_t dev;
+	struct sysctl_oid *devch_sysctl;
+	struct sysctl_oid *devch_id_sysctl, *devch_sub_sysctl;
+	struct sysctl_oid *devch_id_in_sysctl, *devch_id_out_sysctl;
+	struct sysctl_ctx_list *ctx;
+	uint32_t ch_id;
+	uint16_t sub_ch_id;
+	char name[16];
+	
+	hv_vmbus_channel* primary_ch = channel->primary_channel;
+
+	if (primary_ch == NULL) {
+		dev = channel->device->device;
+		ch_id = channel->offer_msg.child_rel_id;
+	} else {
+		dev = primary_ch->device->device;
+		ch_id = primary_ch->offer_msg.child_rel_id;
+		sub_ch_id = channel->offer_msg.offer.sub_channel_index;
+	}
+	ctx = device_get_sysctl_ctx(dev);
+	/* This creates dev.DEVNAME.DEVUNIT.channel tree */
+	devch_sysctl = SYSCTL_ADD_NODE(ctx,
+		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+		    OID_AUTO, "channel", CTLFLAG_RD, 0, "");
+	/* This creates dev.DEVNAME.DEVUNIT.channel.CHANID tree */
+	snprintf(name, sizeof(name), "%d", ch_id);
+	devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
+	    	    SYSCTL_CHILDREN(devch_sysctl),
+	    	    OID_AUTO, name, CTLFLAG_RD, 0, "");
+
+	if (primary_ch != NULL) {
+		devch_sub_sysctl = SYSCTL_ADD_NODE(ctx,
+			SYSCTL_CHILDREN(devch_id_sysctl),
+			OID_AUTO, "sub", CTLFLAG_RD, 0, "");
+		snprintf(name, sizeof(name), "%d", sub_ch_id);
+		devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
+			SYSCTL_CHILDREN(devch_sub_sysctl),
+			OID_AUTO, name, CTLFLAG_RD, 0, "");
+	}
+	
+	devch_id_in_sysctl = SYSCTL_ADD_NODE(ctx,
+                    SYSCTL_CHILDREN(devch_id_sysctl),
+                    OID_AUTO,
+		    "in",
+		    CTLFLAG_RD, 0, "");
+	devch_id_out_sysctl = SYSCTL_ADD_NODE(ctx,
+                    SYSCTL_CHILDREN(devch_id_sysctl),
+                    OID_AUTO,
+		    "out",
+		    CTLFLAG_RD, 0, "");
+	hv_ring_buffer_stat(ctx,
+		SYSCTL_CHILDREN(devch_id_in_sysctl),
+		&(channel->inbound),
+		"inbound ring buffer stats");
+	hv_ring_buffer_stat(ctx,
+		SYSCTL_CHILDREN(devch_id_out_sysctl),
+		&(channel->outbound),
+		"outbound ring buffer stats");
+}
 /**
  * @brief Open the specified channel
  */
 int
 hv_vmbus_channel_open(
 	hv_vmbus_channel*		new_channel,
 	uint32_t			send_ring_buffer_size,
 	uint32_t			recv_ring_buffer_size,
 	void*				user_data,
 	uint32_t			user_data_len,
 	hv_vmbus_pfn_channel_callback	pfn_on_channel_callback,
 	void* 				context)
 {
 
 	int ret = 0;
 	void *in, *out;
 	hv_vmbus_channel_open_channel*	open_msg;
 	hv_vmbus_channel_msg_info* 	open_info;
 
 	mtx_lock(&new_channel->sc_lock);
 	if (new_channel->state == HV_CHANNEL_OPEN_STATE) {
 	    new_channel->state = HV_CHANNEL_OPENING_STATE;
 	} else {
 	    mtx_unlock(&new_channel->sc_lock);
 	    if(bootverbose)
 		printf("VMBUS: Trying to open channel <%p> which in "
 		    "%d state.\n", new_channel, new_channel->state);
 	    return (EINVAL);
 	}
 	mtx_unlock(&new_channel->sc_lock);
 
 	new_channel->on_channel_callback = pfn_on_channel_callback;
 	new_channel->channel_callback_context = context;
 
 	new_channel->rxq = hv_vmbus_g_context.hv_event_queue[new_channel->target_cpu];
 	TASK_INIT(&new_channel->channel_task, 0, VmbusProcessChannelEvent, new_channel);
 
 	/* Allocate the ring buffer */
 	out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size),
 	    M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
 	KASSERT(out != NULL,
 	    ("Error VMBUS: contigmalloc failed to allocate Ring Buffer!"));
 	if (out == NULL)
 		return (ENOMEM);
 
 	in = ((uint8_t *) out + send_ring_buffer_size);
 
 	new_channel->ring_buffer_pages = out;
 	new_channel->ring_buffer_page_count = (send_ring_buffer_size +
 	    recv_ring_buffer_size) >> PAGE_SHIFT;
 	new_channel->ring_buffer_size = send_ring_buffer_size +
 	    recv_ring_buffer_size;
 
 	hv_vmbus_ring_buffer_init(
 		&new_channel->outbound,
 		out,
 		send_ring_buffer_size);
 
 	hv_vmbus_ring_buffer_init(
 		&new_channel->inbound,
 		in,
 		recv_ring_buffer_size);
+
+	/* setup statistic tracking for this channel */
+	hv_vmbus_channel_stat(new_channel);
 
 	/**
 	 * Establish the gpadl for the ring buffer
 	 */
 	new_channel->ring_buffer_gpadl_handle = 0;
 
 	ret = hv_vmbus_channel_establish_gpadl(new_channel,
 		new_channel->outbound.ring_buffer,
 		send_ring_buffer_size + recv_ring_buffer_size,
 		&new_channel->ring_buffer_gpadl_handle);
 
 	/**
 	 * Create and init the channel open message
 	 */
 	open_info = (hv_vmbus_channel_msg_info*) malloc(
 		sizeof(hv_vmbus_channel_msg_info) +
 			sizeof(hv_vmbus_channel_open_channel),
 		M_DEVBUF,
 		M_NOWAIT);
 	KASSERT(open_info != NULL,
 	    ("Error VMBUS: malloc failed to allocate Open Channel message!"));
 
 	if (open_info == NULL)
 		return (ENOMEM);
 
 	sema_init(&open_info->wait_sema, 0, "Open Info Sema");
 
 	open_msg = (hv_vmbus_channel_open_channel*) open_info->msg;
 	open_msg->header.message_type = HV_CHANNEL_MESSAGE_OPEN_CHANNEL;
 	open_msg->open_id = new_channel->offer_msg.child_rel_id;
 	open_msg->child_rel_id = new_channel->offer_msg.child_rel_id;
 	open_msg->ring_buffer_gpadl_handle =
 		new_channel->ring_buffer_gpadl_handle;
 	open_msg->downstream_ring_buffer_page_offset = send_ring_buffer_size
 		>> PAGE_SHIFT;
 	open_msg->target_vcpu = new_channel->target_vcpu;
 
 	if (user_data_len)
 		memcpy(open_msg->user_data, user_data, user_data_len);
 
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_INSERT_TAIL(
 		&hv_vmbus_g_connection.channel_msg_anchor,
 		open_info,
 		msg_list_entry);
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 	ret = hv_vmbus_post_message(
 		open_msg, sizeof(hv_vmbus_channel_open_channel));
 
 	if (ret != 0)
 	    goto cleanup;
 
 	ret = sema_timedwait(&open_info->wait_sema, 5 * hz); /* KYS 5 seconds */
 
 	if (ret) {
 	    if(bootverbose)
 		printf("VMBUS: channel <%p> open timeout.\n", new_channel);
 	    goto cleanup;
 	}
 
 	if (open_info->response.open_result.status == 0) {
 	    new_channel->state = HV_CHANNEL_OPENED_STATE;
 	    if(bootverbose)
 		printf("VMBUS: channel <%p> open success.\n", new_channel);
 	} else {
 	    if(bootverbose)
 		printf("Error VMBUS: channel <%p> open failed - %d!\n",
 			new_channel, open_info->response.open_result.status);
 	}
 
 	cleanup:
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_REMOVE(
 		&hv_vmbus_g_connection.channel_msg_anchor,
 		open_info,
 		msg_list_entry);
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	sema_destroy(&open_info->wait_sema);
 	free(open_info, M_DEVBUF);
 
 	return (ret);
 }
 
 /**
  * @brief Create a gpadl for the specified buffer
  */
 static int
 vmbus_channel_create_gpadl_header(
 	void*				contig_buffer,
 	uint32_t			size,	/* page-size multiple */
 	hv_vmbus_channel_msg_info**	msg_info,
 	uint32_t*			message_count)
 {
 	int				i;
 	int				page_count;
 	unsigned long long 		pfn;
 	uint32_t			msg_size;
 	hv_vmbus_channel_gpadl_header*	gpa_header;
 	hv_vmbus_channel_gpadl_body*	gpadl_body;
 	hv_vmbus_channel_msg_info*	msg_header;
 	hv_vmbus_channel_msg_info*	msg_body;
 
 	int pfnSum, pfnCount, pfnLeft, pfnCurr, pfnSize;
 
 	page_count = size >> PAGE_SHIFT;
 	pfn = hv_get_phys_addr(contig_buffer) >> PAGE_SHIFT;
 
 	/*do we need a gpadl body msg */
 	pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE
 	    - sizeof(hv_vmbus_channel_gpadl_header)
 	    - sizeof(hv_gpa_range);
 	pfnCount = pfnSize / sizeof(uint64_t);
 
 	if (page_count > pfnCount) { /* if(we need a gpadl body)	*/
 	    /* fill in the header		*/
 	    msg_size = sizeof(hv_vmbus_channel_msg_info)
 		+ sizeof(hv_vmbus_channel_gpadl_header)
 		+ sizeof(hv_gpa_range)
 		+ pfnCount * sizeof(uint64_t);
 	    msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
 	    KASSERT(
 		msg_header != NULL,
 		("Error VMBUS: malloc failed to allocate Gpadl Message!"));
 	    if (msg_header == NULL)
 		return (ENOMEM);
 
 	    TAILQ_INIT(&msg_header->sub_msg_list_anchor);
 	    msg_header->message_size = msg_size;
 
 	    gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg;
 	    gpa_header->range_count = 1;
 	    gpa_header->range_buf_len = sizeof(hv_gpa_range)
 		+ page_count * sizeof(uint64_t);
 	    gpa_header->range[0].byte_offset = 0;
 	    gpa_header->range[0].byte_count = size;
 	    for (i = 0; i < pfnCount; i++) {
 		gpa_header->range[0].pfn_array[i] = pfn + i;
 	    }
 	    *msg_info = msg_header;
 	    *message_count = 1;
 
 	    pfnSum = pfnCount;
 	    pfnLeft = page_count - pfnCount;
 
 	    /*
 	     *  figure out how many pfns we can fit
 	     */
 	    pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE
 		- sizeof(hv_vmbus_channel_gpadl_body);
 	    pfnCount = pfnSize / sizeof(uint64_t);
 
 	    /*
 	     * fill in the body
 	     */
 	    while (pfnLeft) {
 		if (pfnLeft > pfnCount) {
 		    pfnCurr = pfnCount;
 		} else {
 		    pfnCurr = pfnLeft;
 		}
 
 		msg_size = sizeof(hv_vmbus_channel_msg_info) +
 		    sizeof(hv_vmbus_channel_gpadl_body) +
 		    pfnCurr * sizeof(uint64_t);
 		msg_body = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
 		KASSERT(
 		    msg_body != NULL,
 		    ("Error VMBUS: malloc failed to allocate Gpadl msg_body!"));
 		if (msg_body == NULL)
 		    return (ENOMEM);
 
 		msg_body->message_size = msg_size;
 		(*message_count)++;
 		gpadl_body =
 		    (hv_vmbus_channel_gpadl_body*) msg_body->msg;
 		/*
 		 * gpadl_body->gpadl = kbuffer;
 		 */
 		for (i = 0; i < pfnCurr; i++) {
 		    gpadl_body->pfn[i] = pfn + pfnSum + i;
 		}
 
 		TAILQ_INSERT_TAIL(
 		    &msg_header->sub_msg_list_anchor,
 		    msg_body,
 		    msg_list_entry);
 		pfnSum += pfnCurr;
 		pfnLeft -= pfnCurr;
 	    }
 	} else { /* else everything fits in a header */
 
 	    msg_size = sizeof(hv_vmbus_channel_msg_info) +
 		sizeof(hv_vmbus_channel_gpadl_header) +
 		sizeof(hv_gpa_range) +
 		page_count * sizeof(uint64_t);
 	    msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
 	    KASSERT(
 		msg_header != NULL,
 		("Error VMBUS: malloc failed to allocate Gpadl Message!"));
 	    if (msg_header == NULL)
 		return (ENOMEM);
 
 	    msg_header->message_size = msg_size;
 
 	    gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg;
 	    gpa_header->range_count = 1;
 	    gpa_header->range_buf_len = sizeof(hv_gpa_range) +
 		page_count * sizeof(uint64_t);
 	    gpa_header->range[0].byte_offset = 0;
 	    gpa_header->range[0].byte_count = size;
 	    for (i = 0; i < page_count; i++) {
 		gpa_header->range[0].pfn_array[i] = pfn + i;
 	    }
 
 	    *msg_info = msg_header;
 	    *message_count = 1;
 	}
 
 	return (0);
 }
 
 /**
  * @brief Establish a GPADL for the specified buffer
  */
 int
 hv_vmbus_channel_establish_gpadl(
 	hv_vmbus_channel*	channel,
 	void*			contig_buffer,
 	uint32_t		size, /* page-size multiple */
 	uint32_t*		gpadl_handle)
 
 {
 	int ret = 0;
 	hv_vmbus_channel_gpadl_header*	gpadl_msg;
 	hv_vmbus_channel_gpadl_body*	gpadl_body;
 	hv_vmbus_channel_msg_info*	msg_info;
 	hv_vmbus_channel_msg_info*	sub_msg_info;
 	uint32_t			msg_count;
 	hv_vmbus_channel_msg_info*	curr;
 	uint32_t			next_gpadl_handle;
 
 	next_gpadl_handle = atomic_fetchadd_int(
 	    &hv_vmbus_g_connection.next_gpadl_handle, 1);
 
 	ret = vmbus_channel_create_gpadl_header(
 		contig_buffer, size, &msg_info, &msg_count);
 
 	if(ret != 0) {
 		/*
 		 * XXX
 		 * We can _not_ even revert the above incremental,
 		 * if multiple GPADL establishments are running
 		 * parallelly, decrement the global next_gpadl_handle
 		 * is calling for _big_ trouble.  A better solution
 		 * is to have a 0-based GPADL id bitmap ...
 		 */
 		return ret;
 	}
 
 	sema_init(&msg_info->wait_sema, 0, "Open Info Sema");
 	gpadl_msg = (hv_vmbus_channel_gpadl_header*) msg_info->msg;
 	gpadl_msg->header.message_type = HV_CHANNEL_MESSAGEL_GPADL_HEADER;
 	gpadl_msg->child_rel_id = channel->offer_msg.child_rel_id;
 	gpadl_msg->gpadl = next_gpadl_handle;
 
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_INSERT_TAIL(
 		&hv_vmbus_g_connection.channel_msg_anchor,
 		msg_info,
 		msg_list_entry);
 
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 	ret = hv_vmbus_post_message(
 		gpadl_msg,
 		msg_info->message_size -
 		    (uint32_t) sizeof(hv_vmbus_channel_msg_info));
 
 	if (ret != 0)
 	    goto cleanup;
 
 	if (msg_count > 1) {
 	    TAILQ_FOREACH(curr,
 		    &msg_info->sub_msg_list_anchor, msg_list_entry) {
 		sub_msg_info = curr;
 		gpadl_body =
 		    (hv_vmbus_channel_gpadl_body*) sub_msg_info->msg;
 
 		gpadl_body->header.message_type =
 		    HV_CHANNEL_MESSAGE_GPADL_BODY;
 		gpadl_body->gpadl = next_gpadl_handle;
 
 		ret = hv_vmbus_post_message(
 			gpadl_body,
 			sub_msg_info->message_size
 			    - (uint32_t) sizeof(hv_vmbus_channel_msg_info));
 		 /* if (the post message failed) give up and clean up */
 		if(ret != 0)
 		    goto cleanup;
 	    }
 	}
 
 	ret = sema_timedwait(&msg_info->wait_sema, 5 * hz); /* KYS 5 seconds*/
 	if (ret != 0)
 	    goto cleanup;
 
 	*gpadl_handle = gpadl_msg->gpadl;
 
 cleanup:
 
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
 		msg_info, msg_list_entry);
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 	sema_destroy(&msg_info->wait_sema);
 	free(msg_info, M_DEVBUF);
 
 	return (ret);
 }
 
 /**
  * @brief Teardown the specified GPADL handle
  */
 int
 hv_vmbus_channel_teardown_gpdal(
 	hv_vmbus_channel*	channel,
 	uint32_t		gpadl_handle)
 {
 	int					ret = 0;
 	hv_vmbus_channel_gpadl_teardown*	msg;
 	hv_vmbus_channel_msg_info*		info;
 
 	info = (hv_vmbus_channel_msg_info *)
 		malloc(	sizeof(hv_vmbus_channel_msg_info) +
 			sizeof(hv_vmbus_channel_gpadl_teardown),
 				M_DEVBUF, M_NOWAIT);
 	KASSERT(info != NULL,
 	    ("Error VMBUS: malloc failed to allocate Gpadl Teardown Msg!"));
 	if (info == NULL) {
 	    ret = ENOMEM;
 	    goto cleanup;
 	}
 
 	sema_init(&info->wait_sema, 0, "Open Info Sema");
 
 	msg = (hv_vmbus_channel_gpadl_teardown*) info->msg;
 
 	msg->header.message_type = HV_CHANNEL_MESSAGE_GPADL_TEARDOWN;
 	msg->child_rel_id = channel->offer_msg.child_rel_id;
 	msg->gpadl = gpadl_handle;
 
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_msg_anchor,
 			info, msg_list_entry);
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 
 	ret = hv_vmbus_post_message(msg,
 			sizeof(hv_vmbus_channel_gpadl_teardown));
 	if (ret != 0) 
 	    goto cleanup;
 	
 	ret = sema_timedwait(&info->wait_sema, 5 * hz); /* KYS 5 seconds */
 
 cleanup:
 	/*
 	 * Received a torndown response
 	 */
 	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
 			info, msg_list_entry);
 	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
 	sema_destroy(&info->wait_sema);
 	free(info, M_DEVBUF);
 
 	return (ret);
 }
 
 static void
 hv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
 {
 	int ret = 0;
 	struct taskqueue *rxq = channel->rxq;
 	hv_vmbus_channel_close_channel* msg;
 	hv_vmbus_channel_msg_info* info;
 
 	channel->state = HV_CHANNEL_OPEN_STATE;
 	channel->sc_creation_callback = NULL;
 
 	/*
 	 * set rxq to NULL to avoid more requests be scheduled
 	 */
 	channel->rxq = NULL;
 	taskqueue_drain(rxq, &channel->channel_task);
 	channel->on_channel_callback = NULL;
 
 	/**
 	 * Send a closing message
 	 */
 	info = (hv_vmbus_channel_msg_info *)
 		malloc(	sizeof(hv_vmbus_channel_msg_info) +
 			sizeof(hv_vmbus_channel_close_channel),
 				M_DEVBUF, M_NOWAIT);
 	KASSERT(info != NULL, ("VMBUS: malloc failed hv_vmbus_channel_close!"));
 	if(info == NULL)
 	    return;
 
 	msg = (hv_vmbus_channel_close_channel*) info->msg;
 	msg->header.message_type = HV_CHANNEL_MESSAGE_CLOSE_CHANNEL;
 	msg->child_rel_id = channel->offer_msg.child_rel_id;
 
 	ret = hv_vmbus_post_message(
 		msg, sizeof(hv_vmbus_channel_close_channel));
 
 	/* Tear down the gpadl for the channel's ring buffer */
 	if (channel->ring_buffer_gpadl_handle) {
 		hv_vmbus_channel_teardown_gpdal(channel,
 			channel->ring_buffer_gpadl_handle);
 	}
 
 	/* TODO: Send a msg to release the childRelId */
 
 	/* cleanup the ring buffers for this channel */
 	hv_ring_buffer_cleanup(&channel->outbound);
 	hv_ring_buffer_cleanup(&channel->inbound);
 
 	contigfree(channel->ring_buffer_pages, channel->ring_buffer_size,
 	    M_DEVBUF);
 
 	free(info, M_DEVBUF);
 }
 
 /**
  * @brief Close the specified channel
  */
 void
 hv_vmbus_channel_close(hv_vmbus_channel *channel)
 {
 	hv_vmbus_channel*	sub_channel;
 
 	if (channel->primary_channel != NULL) {
 		/*
 		 * We only close multi-channels when the primary is
 		 * closed.
 		 */
 		return;
 	}
 
 	/*
 	 * Close all multi-channels first.
 	 */
 	TAILQ_FOREACH(sub_channel, &channel->sc_list_anchor,
 	    sc_list_entry) {
 		if (sub_channel->state != HV_CHANNEL_OPENED_STATE)
 			continue;
 		hv_vmbus_channel_close_internal(sub_channel);
 	}
 	/*
 	 * Then close the primary channel.
 	 */
 	hv_vmbus_channel_close_internal(channel);
 }
 
 /**
  * @brief Send the specified buffer on the given channel
  */
 int
 hv_vmbus_channel_send_packet(
 	hv_vmbus_channel*	channel,
 	void*			buffer,
 	uint32_t		buffer_len,
 	uint64_t		request_id,
 	hv_vmbus_packet_type	type,
 	uint32_t		flags)
 {
 	int			ret = 0;
 	hv_vm_packet_descriptor	desc;
 	uint32_t		packet_len;
 	uint64_t		aligned_data;
 	uint32_t		packet_len_aligned;
 	boolean_t		need_sig;
 	hv_vmbus_sg_buffer_list	buffer_list[3];
 
 	packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len;
 	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
 	aligned_data = 0;
 
 	/* Setup the descriptor */
 	desc.type = type;   /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND;             */
 	desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
 			    /* in 8-bytes granularity */
 	desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3;
 	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
 	desc.transaction_id = request_id;
 
 	buffer_list[0].data = &desc;
 	buffer_list[0].length = sizeof(hv_vm_packet_descriptor);
 
 	buffer_list[1].data = buffer;
 	buffer_list[1].length = buffer_len;
 
 	buffer_list[2].data = &aligned_data;
 	buffer_list[2].length = packet_len_aligned - packet_len;
 
 	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
 	    &need_sig);
 
 	/* TODO: We should determine if this is optional */
 	if (ret == 0 && need_sig) {
 		vmbus_channel_set_event(channel);
 	}
 
 	return (ret);
 }
 
 /**
  * @brief Send a range of single-page buffer packets using
  * a GPADL Direct packet type
  */
 int
 hv_vmbus_channel_send_packet_pagebuffer(
 	hv_vmbus_channel*	channel,
 	hv_vmbus_page_buffer	page_buffers[],
 	uint32_t		page_count,
 	void*			buffer,
 	uint32_t		buffer_len,
 	uint64_t		request_id)
 {
 
 	int					ret = 0;
 	boolean_t				need_sig;
 	uint32_t				packet_len;
 	uint32_t				page_buflen;
 	uint32_t				packetLen_aligned;
 	hv_vmbus_sg_buffer_list			buffer_list[4];
 	hv_vmbus_channel_packet_page_buffer	desc;
 	uint32_t				descSize;
 	uint64_t				alignedData = 0;
 
 	if (page_count > HV_MAX_PAGE_BUFFER_COUNT)
 		return (EINVAL);
 
 	/*
 	 * Adjust the size down since hv_vmbus_channel_packet_page_buffer
 	 *  is the largest size we support
 	 */
 	descSize = __offsetof(hv_vmbus_channel_packet_page_buffer, range);
 	page_buflen = sizeof(hv_vmbus_page_buffer) * page_count;
 	packet_len = descSize + page_buflen + buffer_len;
 	packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
 
 	/* Setup the descriptor */
 	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
 	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
 	/* in 8-bytes granularity */
 	desc.data_offset8 = (descSize + page_buflen) >> 3;
 	desc.length8 = (uint16_t) (packetLen_aligned >> 3);
 	desc.transaction_id = request_id;
 	desc.range_count = page_count;
 
 	buffer_list[0].data = &desc;
 	buffer_list[0].length = descSize;
 
 	buffer_list[1].data = page_buffers;
 	buffer_list[1].length = page_buflen;
 
 	buffer_list[2].data = buffer;
 	buffer_list[2].length = buffer_len;
 
 	buffer_list[3].data = &alignedData;
 	buffer_list[3].length = packetLen_aligned - packet_len;
 
 	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 4,
 	    &need_sig);
 
 	/* TODO: We should determine if this is optional */
 	if (ret == 0 && need_sig) {
 		vmbus_channel_set_event(channel);
 	}
 
 	return (ret);
 }
 
 /**
  * @brief Send a multi-page buffer packet using a GPADL Direct packet type
  */
 int
 hv_vmbus_channel_send_packet_multipagebuffer(
 	hv_vmbus_channel*		channel,
 	hv_vmbus_multipage_buffer*	multi_page_buffer,
 	void*				buffer,
 	uint32_t			buffer_len,
 	uint64_t			request_id)
 {
 
 	int			ret = 0;
 	uint32_t		desc_size;
 	boolean_t		need_sig;
 	uint32_t		packet_len;
 	uint32_t		packet_len_aligned;
 	uint32_t		pfn_count;
 	uint64_t		aligned_data = 0;
 	hv_vmbus_sg_buffer_list	buffer_list[3];
 	hv_vmbus_channel_packet_multipage_buffer desc;
 
 	pfn_count =
 	    HV_NUM_PAGES_SPANNED(
 		    multi_page_buffer->offset,
 		    multi_page_buffer->length);
 
 	if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT))
 	    return (EINVAL);
 	/*
 	 * Adjust the size down since hv_vmbus_channel_packet_multipage_buffer
 	 * is the largest size we support
 	 */
 	desc_size =
 	    sizeof(hv_vmbus_channel_packet_multipage_buffer) -
 		    ((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) *
 			sizeof(uint64_t));
 	packet_len = desc_size + buffer_len;
 	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
 
 	/*
 	 * Setup the descriptor
 	 */
 	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
 	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
 	desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */
 	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
 	desc.transaction_id = request_id;
 	desc.range_count = 1;
 
 	desc.range.length = multi_page_buffer->length;
 	desc.range.offset = multi_page_buffer->offset;
 
 	memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array,
 		pfn_count * sizeof(uint64_t));
 
 	buffer_list[0].data = &desc;
 	buffer_list[0].length = desc_size;
 
 	buffer_list[1].data = buffer;
 	buffer_list[1].length = buffer_len;
 
 	buffer_list[2].data = &aligned_data;
 	buffer_list[2].length = packet_len_aligned - packet_len;
 
 	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
 	    &need_sig);
 
 	/* TODO: We should determine if this is optional */
 	if (ret == 0 && need_sig) {
 	    vmbus_channel_set_event(channel);
 	}
 
 	return (ret);
 }
 
 /**
  * @brief Retrieve the user packet on the specified channel
  */
 int
 hv_vmbus_channel_recv_packet(
 	hv_vmbus_channel*	channel,
 	void*			Buffer,
 	uint32_t		buffer_len,
 	uint32_t*		buffer_actual_len,
 	uint64_t*		request_id)
 {
 	int			ret;
 	uint32_t		user_len;
 	uint32_t		packet_len;
 	hv_vm_packet_descriptor	desc;
 
 	*buffer_actual_len = 0;
 	*request_id = 0;
 
 	ret = hv_ring_buffer_peek(&channel->inbound, &desc,
 		sizeof(hv_vm_packet_descriptor));
 	if (ret != 0)
 		return (0);
 
 	packet_len = desc.length8 << 3;
 	user_len = packet_len - (desc.data_offset8 << 3);
 
 	*buffer_actual_len = user_len;
 
 	if (user_len > buffer_len)
 		return (EINVAL);
 
 	*request_id = desc.transaction_id;
 
 	/* Copy over the packet to the user buffer */
 	ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len,
 		(desc.data_offset8 << 3));
 
 	return (0);
 }
 
 /**
  * @brief Retrieve the raw packet on the specified channel
  */
 int
 hv_vmbus_channel_recv_packet_raw(
 	hv_vmbus_channel*	channel,
 	void*			buffer,
 	uint32_t		buffer_len,
 	uint32_t*		buffer_actual_len,
 	uint64_t*		request_id)
 {
 	int		ret;
 	uint32_t	packetLen;
 	uint32_t	userLen;
 	hv_vm_packet_descriptor	desc;
 
 	*buffer_actual_len = 0;
 	*request_id = 0;
 
 	ret = hv_ring_buffer_peek(
 		&channel->inbound, &desc,
 		sizeof(hv_vm_packet_descriptor));
 
 	if (ret != 0)
 	    return (0);
 
 	packetLen = desc.length8 << 3;
 	userLen = packetLen - (desc.data_offset8 << 3);
 
 	*buffer_actual_len = packetLen;
 
 	if (packetLen > buffer_len)
 	    return (ENOBUFS);
 
 	*request_id = desc.transaction_id;
 
 	/* Copy over the entire packet to the user buffer */
 	ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0);
 
 	return (0);
 }
 
 
 /**
  * Process a channel event notification
  */
 static void
 VmbusProcessChannelEvent(void* context, int pending)
 {
 	void* arg;
 	uint32_t bytes_to_read;
 	hv_vmbus_channel* channel = (hv_vmbus_channel*)context;
 	boolean_t is_batched_reading;
 
 	/**
 	 * Find the channel based on this relid and invokes
 	 * the channel callback to process the event
 	 */
 
 	if (channel == NULL) {
 		return;
 	}
 	/**
 	 * To deal with the race condition where we might
 	 * receive a packet while the relevant driver is
 	 * being unloaded, dispatch the callback while
 	 * holding the channel lock. The unloading driver
 	 * will acquire the same channel lock to set the
 	 * callback to NULL. This closes the window.
 	 */
 
 	if (channel->on_channel_callback != NULL) {
 		arg = channel->channel_callback_context;
 		is_batched_reading = channel->batched_reading;
 		/*
 		 * Optimize host to guest signaling by ensuring:
 		 * 1. While reading the channel, we disable interrupts from
 		 *    host.
 		 * 2. Ensure that we process all posted messages from the host
 		 *    before returning from this callback.
 		 * 3. Once we return, enable signaling from the host. Once this
 		 *    state is set we check to see if additional packets are
 		 *    available to read. In this case we repeat the process.
 		 */
 		do {
 			if (is_batched_reading)
 				hv_ring_buffer_read_begin(&channel->inbound);
 
 			channel->on_channel_callback(arg);
 
 			if (is_batched_reading)
 				bytes_to_read =
 				    hv_ring_buffer_read_end(&channel->inbound);
 			else
 				bytes_to_read = 0;
 		} while (is_batched_reading && (bytes_to_read != 0));
 	}
 }
Index: head/sys/dev/hyperv/vmbus/hv_ring_buffer.c
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_ring_buffer.c	(revision 296180)
+++ head/sys/dev/hyperv/vmbus/hv_ring_buffer.c	(revision 296181)
@@ -1,510 +1,552 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
+#include <sys/sysctl.h>
 
 #include "hv_vmbus_priv.h"
 
 /* Amount of space to write to */
 #define	HV_BYTES_AVAIL_TO_WRITE(r, w, z) ((w) >= (r))? \
 				((z) - ((w) - (r))):((r) - (w))
 
+static int
+hv_rbi_sysctl_stats(SYSCTL_HANDLER_ARGS)
+{
+	hv_vmbus_ring_buffer_info* rbi;
+	uint32_t read_index, write_index, interrupt_mask, sz;
+	uint32_t read_avail, write_avail;
+	char rbi_stats[256];
+
+	rbi = (hv_vmbus_ring_buffer_info*)arg1;
+	read_index = rbi->ring_buffer->read_index;
+	write_index = rbi->ring_buffer->write_index;
+	interrupt_mask = rbi->ring_buffer->interrupt_mask;
+	sz = rbi->ring_data_size;
+	write_avail = HV_BYTES_AVAIL_TO_WRITE(read_index,
+			write_index, sz);
+	read_avail = sz - write_avail;
+	snprintf(rbi_stats, sizeof(rbi_stats),
+		"r_idx:%d "
+		"w_idx:%d "
+		"int_mask:%d "
+		"r_avail:%d "
+		"w_avail:%d",
+		read_index, write_index, interrupt_mask,
+		read_avail, write_avail);
+
+	return (sysctl_handle_string(oidp, rbi_stats,
+			sizeof(rbi_stats), req));
+}
+
+void
+hv_ring_buffer_stat(
+	struct sysctl_ctx_list		*ctx,
+	struct sysctl_oid_list		*tree_node,
+	hv_vmbus_ring_buffer_info	*rbi,
+	const char			*desc)	
+{
+	SYSCTL_ADD_PROC(ctx, tree_node, OID_AUTO,
+	    "ring_buffer_stats",
+	    CTLTYPE_STRING|CTLFLAG_RD, rbi, 0,
+	    hv_rbi_sysctl_stats, "A", desc);
+}
 /**
  * @brief Get number of bytes available to read and to write to
  * for the specified ring buffer
  */
 static inline void
 get_ring_buffer_avail_bytes(
 	    hv_vmbus_ring_buffer_info*	rbi,
 	    uint32_t*			read,
 	    uint32_t*			write)
 {
 	uint32_t read_loc, write_loc;
 
 	/*
 	 * Capture the read/write indices before they changed
 	 */
 	read_loc = rbi->ring_buffer->read_index;
 	write_loc = rbi->ring_buffer->write_index;
 
 	*write = HV_BYTES_AVAIL_TO_WRITE(
 		read_loc, write_loc, rbi->ring_data_size);
 	*read = rbi->ring_data_size - *write;
 }
 
 /**
  * @brief Get the next write location for the specified ring buffer
  */
 static inline uint32_t
 get_next_write_location(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	uint32_t next = ring_info->ring_buffer->write_index;
 	return (next);
 }
 
 /**
  * @brief Set the next write location for the specified ring buffer
  */
 static inline void
 set_next_write_location(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	uint32_t			next_write_location)
 {
 	ring_info->ring_buffer->write_index = next_write_location;
 }
 
 /**
  * @brief Get the next read location for the specified ring buffer
  */
 static inline uint32_t
 get_next_read_location(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	uint32_t next = ring_info->ring_buffer->read_index;
 	return (next);
 }
 
 /**
  * @brief Get the next read location + offset for the specified ring buffer.
  * This allows the caller to skip.
  */
 static inline uint32_t
 get_next_read_location_with_offset(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	uint32_t			offset)
 {
 	uint32_t next = ring_info->ring_buffer->read_index;
 	next += offset;
 	next %= ring_info->ring_data_size;
 	return (next);
 }
 
 /**
  * @brief Set the next read location for the specified ring buffer
  */
 static inline void
 set_next_read_location(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	uint32_t			next_read_location)
 {
 	ring_info->ring_buffer->read_index = next_read_location;
 }
 
 /**
  * @brief Get the start of the ring buffer
  */
 static inline void *
 get_ring_buffer(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	return (void *) ring_info->ring_buffer->buffer;
 }
 
 /**
  * @brief Get the size of the ring buffer.
  */
 static inline uint32_t
 get_ring_buffer_size(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	return ring_info->ring_data_size;
 }
 
 /**
  * Get the read and write indices as uint64_t of the specified ring buffer.
  */
 static inline uint64_t
 get_ring_buffer_indices(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	return (uint64_t) ring_info->ring_buffer->write_index << 32;
 }
 
 void
 hv_ring_buffer_read_begin(
 	hv_vmbus_ring_buffer_info*	ring_info)
 {
 	ring_info->ring_buffer->interrupt_mask = 1;
 	mb();
 }
 
 uint32_t
 hv_ring_buffer_read_end(
 	hv_vmbus_ring_buffer_info*	ring_info)
 {
 	uint32_t read, write;	
 
 	ring_info->ring_buffer->interrupt_mask = 0;
 	mb();
 
 	/*
 	 * Now check to see if the ring buffer is still empty.
 	 * If it is not, we raced and we need to process new
 	 * incoming messages.
 	 */
 	get_ring_buffer_avail_bytes(ring_info, &read, &write);
 
 	return (read);
 }
 
 /*
  * When we write to the ring buffer, check if the host needs to
  * be signaled. Here is the details of this protocol:
  *
  *	1. The host guarantees that while it is draining the
  *	   ring buffer, it will set the interrupt_mask to
  *	   indicate it does not need to be interrupted when
  *	   new data is placed.
  *
  *	2. The host guarantees that it will completely drain
  *	   the ring buffer before exiting the read loop. Further,
  *	   once the ring buffer is empty, it will clear the
  *	   interrupt_mask and re-check to see if new data has
  *	   arrived.
  */
 static boolean_t
 hv_ring_buffer_needsig_on_write(
 	uint32_t			old_write_location,
 	hv_vmbus_ring_buffer_info*	rbi)
 {
 	mb();
 	if (rbi->ring_buffer->interrupt_mask)
 		return (FALSE);
 
 	/* Read memory barrier */
 	rmb();
 	/*
 	 * This is the only case we need to signal when the
 	 * ring transitions from being empty to non-empty.
 	 */
 	if (old_write_location == rbi->ring_buffer->read_index)
 		return (TRUE);
 
 	return (FALSE);
 }
 
 static uint32_t	copy_to_ring_buffer(
 			hv_vmbus_ring_buffer_info*	ring_info,
 			uint32_t			start_write_offset,
 			char*				src,
 			uint32_t			src_len);
 
 static uint32_t copy_from_ring_buffer(
 			hv_vmbus_ring_buffer_info*	ring_info,
 			char*				dest,
 			uint32_t			dest_len,
 			uint32_t			start_read_offset);
 
 
 /**
  * @brief Get the interrupt mask for the specified ring buffer.
  */
 uint32_t
 hv_vmbus_get_ring_buffer_interrupt_mask(hv_vmbus_ring_buffer_info *rbi) 
 {
 	return rbi->ring_buffer->interrupt_mask;
 }
 
 /**
  * @brief Initialize the ring buffer.
  */
 int
 hv_vmbus_ring_buffer_init(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	void*				buffer,
 	uint32_t			buffer_len)
 {
 	memset(ring_info, 0, sizeof(hv_vmbus_ring_buffer_info));
 
 	ring_info->ring_buffer = (hv_vmbus_ring_buffer*) buffer;
 	ring_info->ring_buffer->read_index =
 	    ring_info->ring_buffer->write_index = 0;
 
 	ring_info->ring_size = buffer_len;
 	ring_info->ring_data_size = buffer_len - sizeof(hv_vmbus_ring_buffer);
 
 	mtx_init(&ring_info->ring_lock, "vmbus ring buffer", NULL, MTX_SPIN);
 
 	return (0);
 }
 
 /**
  * @brief Cleanup the ring buffer.
  */
 void hv_ring_buffer_cleanup(hv_vmbus_ring_buffer_info* ring_info) 
 {
 	mtx_destroy(&ring_info->ring_lock);
 }
 
 /**
  * @brief Write to the ring buffer.
  */
 int
 hv_ring_buffer_write(
 	hv_vmbus_ring_buffer_info*	out_ring_info,
 	hv_vmbus_sg_buffer_list		sg_buffers[],
 	uint32_t			sg_buffer_count,
 	boolean_t			*need_sig)
 {
 	int i = 0;
 	uint32_t byte_avail_to_write;
 	uint32_t byte_avail_to_read;
 	uint32_t old_write_location;
 	uint32_t total_bytes_to_write = 0;
 
 	volatile uint32_t next_write_location;
 	uint64_t prev_indices = 0;
 
 	for (i = 0; i < sg_buffer_count; i++) {
 	    total_bytes_to_write += sg_buffers[i].length;
 	}
 
 	total_bytes_to_write += sizeof(uint64_t);
 
 	mtx_lock_spin(&out_ring_info->ring_lock);
 
 	get_ring_buffer_avail_bytes(out_ring_info, &byte_avail_to_read,
 	    &byte_avail_to_write);
 
 	/*
 	 * If there is only room for the packet, assume it is full.
 	 * Otherwise, the next time around, we think the ring buffer
 	 * is empty since the read index == write index
 	 */
 
 	if (byte_avail_to_write <= total_bytes_to_write) {
 
 	    mtx_unlock_spin(&out_ring_info->ring_lock);
 	    return (EAGAIN);
 	}
 
 	/*
 	 * Write to the ring buffer
 	 */
 	next_write_location = get_next_write_location(out_ring_info);
 
 	old_write_location = next_write_location;
 
 	for (i = 0; i < sg_buffer_count; i++) {
 	    next_write_location = copy_to_ring_buffer(out_ring_info,
 		next_write_location, (char *) sg_buffers[i].data,
 		sg_buffers[i].length);
 	}
 
 	/*
 	 * Set previous packet start
 	 */
 	prev_indices = get_ring_buffer_indices(out_ring_info);
 
 	next_write_location = copy_to_ring_buffer(
 		out_ring_info, next_write_location,
 		(char *) &prev_indices, sizeof(uint64_t));
 
 	/*
 	 * Full memory barrier before upding the write index. 
 	 */
 	mb();
 
 	/*
 	 * Now, update the write location
 	 */
 	set_next_write_location(out_ring_info, next_write_location);
 
 	mtx_unlock_spin(&out_ring_info->ring_lock);
 
 	*need_sig = hv_ring_buffer_needsig_on_write(old_write_location,
 	    out_ring_info);
 
 	return (0);
 }
 
 /**
  * @brief Read without advancing the read index.
  */
 int
 hv_ring_buffer_peek(
 	hv_vmbus_ring_buffer_info*	in_ring_info,
 	void*				buffer,
 	uint32_t			buffer_len)
 {
 	uint32_t bytesAvailToWrite;
 	uint32_t bytesAvailToRead;
 	uint32_t nextReadLocation = 0;
 
 	mtx_lock_spin(&in_ring_info->ring_lock);
 
 	get_ring_buffer_avail_bytes(in_ring_info, &bytesAvailToRead,
 		&bytesAvailToWrite);
 
 	/*
 	 * Make sure there is something to read
 	 */
 	if (bytesAvailToRead < buffer_len) {
 	    mtx_unlock_spin(&in_ring_info->ring_lock);
 	    return (EAGAIN);
 	}
 
 	/*
 	 * Convert to byte offset
 	 */
 	nextReadLocation = get_next_read_location(in_ring_info);
 
 	nextReadLocation = copy_from_ring_buffer(
 		in_ring_info, (char *)buffer, buffer_len, nextReadLocation);
 
 	mtx_unlock_spin(&in_ring_info->ring_lock);
 
 	return (0);
 }
 
 /**
  * @brief Read and advance the read index.
  */
 int
 hv_ring_buffer_read(
 	hv_vmbus_ring_buffer_info*	in_ring_info,
 	void*				buffer,
 	uint32_t			buffer_len,
 	uint32_t			offset)
 {
 	uint32_t bytes_avail_to_write;
 	uint32_t bytes_avail_to_read;
 	uint32_t next_read_location = 0;
 	uint64_t prev_indices = 0;
 
 	if (buffer_len <= 0)
 	    return (EINVAL);
 
 	mtx_lock_spin(&in_ring_info->ring_lock);
 
 	get_ring_buffer_avail_bytes(
 	    in_ring_info, &bytes_avail_to_read,
 	    &bytes_avail_to_write);
 
 	/*
 	 * Make sure there is something to read
 	 */
 	if (bytes_avail_to_read < buffer_len) {
 	    mtx_unlock_spin(&in_ring_info->ring_lock);
 	    return (EAGAIN);
 	}
 
 	next_read_location = get_next_read_location_with_offset(
 	    in_ring_info,
 	    offset);
 
 	next_read_location = copy_from_ring_buffer(
 	    in_ring_info,
 	    (char *) buffer,
 	    buffer_len,
 	    next_read_location);
 
 	next_read_location = copy_from_ring_buffer(
 	    in_ring_info,
 	    (char *) &prev_indices,
 	    sizeof(uint64_t),
 	    next_read_location);
 
 	/*
 	 * Make sure all reads are done before we update the read index since
 	 * the writer may start writing to the read area once the read index
 	 * is updated.
 	 */
 	wmb();
 
 	/*
 	 * Update the read index
 	 */
 	set_next_read_location(in_ring_info, next_read_location);
 
 	mtx_unlock_spin(&in_ring_info->ring_lock);
 
 	return (0);
 }
 
 /**
  * @brief Helper routine to copy from source to ring buffer.
  *
  * Assume there is enough room. Handles wrap-around in dest case only!
  */
 uint32_t
 copy_to_ring_buffer(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	uint32_t 			start_write_offset,
 	char*				src,
 	uint32_t			src_len)
 {
 	char *ring_buffer = get_ring_buffer(ring_info);
 	uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
 	uint32_t fragLen;
 
 	if (src_len > ring_buffer_size - start_write_offset)  {
 	    /* wrap-around detected! */
 	    fragLen = ring_buffer_size - start_write_offset;
 	    memcpy(ring_buffer + start_write_offset, src, fragLen);
 	    memcpy(ring_buffer, src + fragLen, src_len - fragLen);
 	} else {
 	    memcpy(ring_buffer + start_write_offset, src, src_len);
 	}
 
 	start_write_offset += src_len;
 	start_write_offset %= ring_buffer_size;
 
 	return (start_write_offset);
 }
 
 /**
  * @brief Helper routine to copy to source from ring buffer.
  *
  * Assume there is enough room. Handles wrap-around in src case only!
  */
 uint32_t
 copy_from_ring_buffer(
 	hv_vmbus_ring_buffer_info*	ring_info,
 	char*				dest,
 	uint32_t			dest_len,
 	uint32_t			start_read_offset)
 {
 	uint32_t fragLen;
 	char *ring_buffer = get_ring_buffer(ring_info);
 	uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
 
 	if (dest_len > ring_buffer_size - start_read_offset) {
 	    /*  wrap-around detected at the src */
 	    fragLen = ring_buffer_size - start_read_offset;
 	    memcpy(dest, ring_buffer + start_read_offset, fragLen);
 	    memcpy(dest + fragLen, ring_buffer, dest_len - fragLen);
 	} else {
 	    memcpy(dest, ring_buffer + start_read_offset, dest_len);
 	}
 
 	start_read_offset += dest_len;
 	start_read_offset %= ring_buffer_size;
 
 	return (start_read_offset);
 }
 
Index: head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
===================================================================
--- head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h	(revision 296180)
+++ head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h	(revision 296181)
@@ -1,767 +1,775 @@
 /*-
  * Copyright (c) 2009-2012 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __HYPERV_PRIV_H__
 #define __HYPERV_PRIV_H__
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sema.h>
 
 #include <dev/hyperv/include/hyperv.h>
 
 
 /*
  *  Status codes for hypervisor operations.
  */
 
 typedef uint16_t hv_vmbus_status;
 
 #define HV_MESSAGE_SIZE                 (256)
 #define HV_MESSAGE_PAYLOAD_BYTE_COUNT   (240)
 #define HV_MESSAGE_PAYLOAD_QWORD_COUNT  (30)
 #define HV_ANY_VP                       (0xFFFFFFFF)
 
 /*
  * Synthetic interrupt controller flag constants.
  */
 
 #define HV_EVENT_FLAGS_COUNT        (256 * 8)
 #define HV_EVENT_FLAGS_BYTE_COUNT   (256)
 #define HV_EVENT_FLAGS_DWORD_COUNT  (256 / sizeof(uint32_t))
 
 /**
  * max channel count <== event_flags_dword_count * bit_of_dword
  */
 #define HV_CHANNEL_DWORD_LEN        (32)
 #define HV_CHANNEL_MAX_COUNT        \
 	((HV_EVENT_FLAGS_DWORD_COUNT) * HV_CHANNEL_DWORD_LEN)
 /*
  * MessageId: HV_STATUS_INSUFFICIENT_BUFFERS
  * MessageText:
  *    You did not supply enough message buffers to send a message.
  */
 
 #define HV_STATUS_INSUFFICIENT_BUFFERS   ((uint16_t)0x0013)
 
 typedef void (*hv_vmbus_channel_callback)(void *context);
 
 typedef struct {
 	void*		data;
 	uint32_t	length;
 } hv_vmbus_sg_buffer_list;
 
 typedef struct {
 	uint32_t	current_interrupt_mask;
 	uint32_t	current_read_index;
 	uint32_t	current_write_index;
 	uint32_t	bytes_avail_to_read;
 	uint32_t	bytes_avail_to_write;
 } hv_vmbus_ring_buffer_debug_info;
 
 typedef struct {
 	uint32_t 		rel_id;
 	hv_vmbus_channel_state	state;
 	hv_guid			interface_type;
 	hv_guid			interface_instance;
 	uint32_t		monitor_id;
 	uint32_t		server_monitor_pending;
 	uint32_t		server_monitor_latency;
 	uint32_t		server_monitor_connection_id;
 	uint32_t		client_monitor_pending;
 	uint32_t		client_monitor_latency;
 	uint32_t		client_monitor_connection_id;
 	hv_vmbus_ring_buffer_debug_info	inbound;
 	hv_vmbus_ring_buffer_debug_info	outbound;
 } hv_vmbus_channel_debug_info;
 
 typedef union {
 	hv_vmbus_channel_version_supported	version_supported;
 	hv_vmbus_channel_open_result		open_result;
 	hv_vmbus_channel_gpadl_torndown		gpadl_torndown;
 	hv_vmbus_channel_gpadl_created		gpadl_created;
 	hv_vmbus_channel_version_response	version_response;
 } hv_vmbus_channel_msg_response;
 
 /*
  * Represents each channel msg on the vmbus connection
  * This is a variable-size data structure depending on
  * the msg type itself
  */
 typedef struct hv_vmbus_channel_msg_info {
 	/*
 	 * Bookkeeping stuff
 	 */
 	TAILQ_ENTRY(hv_vmbus_channel_msg_info)  msg_list_entry;
 	/*
 	 * So far, this is only used to handle
 	 * gpadl body message
 	 */
 	TAILQ_HEAD(, hv_vmbus_channel_msg_info) sub_msg_list_anchor;
 	/*
 	 * Synchronize the request/response if
 	 * needed.
 	 * KYS: Use a semaphore for now.
 	 * Not perf critical.
 	 */
 	struct sema				wait_sema;
 	hv_vmbus_channel_msg_response		response;
 	uint32_t				message_size;
 	/**
 	 * The channel message that goes out on
 	 *  the "wire". It will contain at
 	 *  minimum the
 	 *  hv_vmbus_channel_msg_header
 	 * header.
 	 */
 	unsigned char 				msg[0];
 } hv_vmbus_channel_msg_info;
 
 /*
  * The format must be the same as hv_vm_data_gpa_direct
  */
 typedef struct hv_vmbus_channel_packet_page_buffer {
 	uint16_t		type;
 	uint16_t		data_offset8;
 	uint16_t		length8;
 	uint16_t		flags;
 	uint64_t		transaction_id;
 	uint32_t		reserved;
 	uint32_t		range_count;
 	hv_vmbus_page_buffer	range[HV_MAX_PAGE_BUFFER_COUNT];
 } __packed hv_vmbus_channel_packet_page_buffer;
 
 /*
  * The format must be the same as hv_vm_data_gpa_direct
  */
 typedef struct hv_vmbus_channel_packet_multipage_buffer {
 	uint16_t 			type;
 	uint16_t 			data_offset8;
 	uint16_t 			length8;
 	uint16_t 			flags;
 	uint64_t			transaction_id;
 	uint32_t 			reserved;
 	uint32_t			range_count; /* Always 1 in this case */
 	hv_vmbus_multipage_buffer	range;
 } __packed hv_vmbus_channel_packet_multipage_buffer;
 
 enum {
 	HV_VMBUS_MESSAGE_CONNECTION_ID	= 1,
 	HV_VMBUS_MESSAGE_PORT_ID	= 1,
 	HV_VMBUS_EVENT_CONNECTION_ID	= 2,
 	HV_VMBUS_EVENT_PORT_ID		= 2,
 	HV_VMBUS_MONITOR_CONNECTION_ID	= 3,
 	HV_VMBUS_MONITOR_PORT_ID	= 3,
 	HV_VMBUS_MESSAGE_SINT		= 2
 };
 
 #define HV_PRESENT_BIT		0x80000000
 
 #define HV_HYPERCALL_PARAM_ALIGN sizeof(uint64_t)
 
 typedef struct {
 	uint64_t	guest_id;
 	void*		hypercall_page;
 	hv_bool_uint8_t	syn_ic_initialized;
 
 	hv_vmbus_handle	syn_ic_msg_page[MAXCPU];
 	hv_vmbus_handle	syn_ic_event_page[MAXCPU];
 	/*
 	 * For FreeBSD cpuid to Hyper-V vcpuid mapping.
 	 */
 	uint32_t	hv_vcpu_index[MAXCPU];
 	/*
 	 * Each cpu has its own software interrupt handler for channel
 	 * event and msg handling.
 	 */
 	struct taskqueue		*hv_event_queue[MAXCPU];
 	struct intr_event		*hv_msg_intr_event[MAXCPU];
 	void				*msg_swintr[MAXCPU];
 	/*
 	 * Host use this vector to intrrupt guest for vmbus channel
 	 * event and msg.
 	 */
 	unsigned int			hv_cb_vector;
 } hv_vmbus_context;
 
 /*
  * Define hypervisor message types
  */
 typedef enum {
 
 	HV_MESSAGE_TYPE_NONE				= 0x00000000,
 
 	/*
 	 * Memory access messages
 	 */
 	HV_MESSAGE_TYPE_UNMAPPED_GPA			= 0x80000000,
 	HV_MESSAGE_TYPE_GPA_INTERCEPT			= 0x80000001,
 
 	/*
 	 * Timer notification messages
 	 */
 	HV_MESSAGE_TIMER_EXPIRED			= 0x80000010,
 
 	/*
 	 * Error messages
 	 */
 	HV_MESSAGE_TYPE_INVALID_VP_REGISTER_VALUE	= 0x80000020,
 	HV_MESSAGE_TYPE_UNRECOVERABLE_EXCEPTION		= 0x80000021,
 	HV_MESSAGE_TYPE_UNSUPPORTED_FEATURE		= 0x80000022,
 
 	/*
 	 * Trace buffer complete messages
 	 */
 	HV_MESSAGE_TYPE_EVENT_LOG_BUFFER_COMPLETE	= 0x80000040,
 
 	/*
 	 * Platform-specific processor intercept messages
 	 */
 	HV_MESSAGE_TYPE_X64_IO_PORT_INTERCEPT		= 0x80010000,
 	HV_MESSAGE_TYPE_X64_MSR_INTERCEPT		= 0x80010001,
 	HV_MESSAGE_TYPE_X64_CPU_INTERCEPT		= 0x80010002,
 	HV_MESSAGE_TYPE_X64_EXCEPTION_INTERCEPT		= 0x80010003,
 	HV_MESSAGE_TYPE_X64_APIC_EOI			= 0x80010004,
 	HV_MESSAGE_TYPE_X64_LEGACY_FP_ERROR		= 0x80010005
 
 } hv_vmbus_msg_type;
 
 /*
  * Define port identifier type
  */
 typedef union _hv_vmbus_port_id {
 	uint32_t	as_uint32_t;
 	struct {
 		uint32_t	id:24;
 		uint32_t	reserved:8;
 	} u ;
 } hv_vmbus_port_id;
 
 /*
  * Define synthetic interrupt controller message flag
  */
 typedef union {
 	uint8_t	as_uint8_t;
 	struct {
 		uint8_t	message_pending:1;
 		uint8_t	reserved:7;
 	} u;
 } hv_vmbus_msg_flags;
 
 typedef uint64_t hv_vmbus_partition_id;
 
 /*
  * Define synthetic interrupt controller message header
  */
 typedef struct {
 	hv_vmbus_msg_type	message_type;
 	uint8_t			payload_size;
 	hv_vmbus_msg_flags	message_flags;
 	uint8_t			reserved[2];
 	union {
 		hv_vmbus_partition_id	sender;
 		hv_vmbus_port_id	port;
 	} u;
 } hv_vmbus_msg_header;
 
 /*
  *  Define synthetic interrupt controller message format
  */
 typedef struct {
 	hv_vmbus_msg_header	header;
 	union {
 		uint64_t	payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
 	} u ;
 } hv_vmbus_message;
 
 /*
  *  Maximum channels is determined by the size of the interrupt
  *  page which is PAGE_SIZE. 1/2 of PAGE_SIZE is for
  *  send endpoint interrupt and the other is receive
  *  endpoint interrupt.
  *
  *   Note: (PAGE_SIZE >> 1) << 3 allocates 16348 channels
  */
 #define HV_MAX_NUM_CHANNELS			(PAGE_SIZE >> 1) << 3
 
 /*
  * (The value here must be in multiple of 32)
  */
 #define HV_MAX_NUM_CHANNELS_SUPPORTED		256
 
 /*
  * VM Bus connection states
  */
 typedef enum {
 	HV_DISCONNECTED,
 	HV_CONNECTING,
 	HV_CONNECTED,
 	HV_DISCONNECTING
 } hv_vmbus_connect_state;
 
 #define HV_MAX_SIZE_CHANNEL_MESSAGE	HV_MESSAGE_PAYLOAD_BYTE_COUNT
 
 
 typedef struct {
 	hv_vmbus_connect_state			connect_state;
 	uint32_t				next_gpadl_handle;
 	/**
 	 * Represents channel interrupts. Each bit position
 	 * represents a channel.
 	 * When a channel sends an interrupt via VMBUS, it
 	 * finds its bit in the send_interrupt_page, set it and
 	 * calls Hv to generate a port event. The other end
 	 * receives the port event and parse the
 	 * recv_interrupt_page to see which bit is set
 	 */
 	void					*interrupt_page;
 	void					*send_interrupt_page;
 	void					*recv_interrupt_page;
 	/*
 	 * 2 pages - 1st page for parent->child
 	 * notification and 2nd is child->parent
 	 * notification
 	 */
 	void					*monitor_page_1;
 	void					*monitor_page_2;
 	TAILQ_HEAD(, hv_vmbus_channel_msg_info)	channel_msg_anchor;
 	struct mtx				channel_msg_lock;
 	/**
 	 * List of primary channels. Sub channels will be linked
 	 * under their primary channel.
 	 */
 	TAILQ_HEAD(, hv_vmbus_channel)		channel_anchor;
 	struct mtx				channel_lock;
 
 	/**
 	 * channel table for fast lookup through id.
 	*/
 	hv_vmbus_channel                        **channels;
 } hv_vmbus_connection;
 
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint64_t build_number		: 16;
 		uint64_t service_version	: 8; /* Service Pack, etc. */
 		uint64_t minor_version		: 8;
 		uint64_t major_version		: 8;
 		/*
 		 * HV_GUEST_OS_MICROSOFT_IDS (If Vendor=MS)
 		 * HV_GUEST_OS_VENDOR
 		 */
 		uint64_t os_id			: 8;
 		uint64_t vendor_id		: 16;
 	} u;
 } hv_vmbus_x64_msr_guest_os_id_contents;
 
 
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint64_t enable :1;
 		uint64_t reserved :11;
 		uint64_t guest_physical_address :52;
 	} u;
 } hv_vmbus_x64_msr_hypercall_contents;
 
 typedef union {
 	uint32_t as_uint32_t;
 	struct {
 		uint32_t group_enable :4;
 		uint32_t rsvd_z :28;
 	} u;
 } hv_vmbus_monitor_trigger_state;
 
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint32_t pending;
 		uint32_t armed;
 	} u;
 } hv_vmbus_monitor_trigger_group;
 
 typedef struct {
 	hv_vmbus_connection_id	connection_id;
 	uint16_t		flag_number;
 	uint16_t		rsvd_z;
 } hv_vmbus_monitor_parameter;
 
 /*
  * hv_vmbus_monitor_page Layout
  * ------------------------------------------------------
  * | 0   | trigger_state (4 bytes) | Rsvd1 (4 bytes)     |
  * | 8   | trigger_group[0]                              |
  * | 10  | trigger_group[1]                              |
  * | 18  | trigger_group[2]                              |
  * | 20  | trigger_group[3]                              |
  * | 28  | Rsvd2[0]                                      |
  * | 30  | Rsvd2[1]                                      |
  * | 38  | Rsvd2[2]                                      |
  * | 40  | next_check_time[0][0] | next_check_time[0][1] |
  * | ...                                                 |
  * | 240 | latency[0][0..3]                              |
  * | 340 | Rsvz3[0]                                      |
  * | 440 | parameter[0][0]                               |
  * | 448 | parameter[0][1]                               |
  * | ...                                                 |
  * | 840 | Rsvd4[0]                                      |
  * ------------------------------------------------------
  */
 
 typedef struct {
 	hv_vmbus_monitor_trigger_state	trigger_state;
 	uint32_t			rsvd_z1;
 
 	hv_vmbus_monitor_trigger_group	trigger_group[4];
 	uint64_t			rsvd_z2[3];
 
 	int32_t				next_check_time[4][32];
 
 	uint16_t			latency[4][32];
 	uint64_t			rsvd_z3[32];
 
 	hv_vmbus_monitor_parameter	parameter[4][32];
 
 	uint8_t				rsvd_z4[1984];
 } hv_vmbus_monitor_page;
 
 /*
  * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
  * is set by CPUID(HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES).
  */
 typedef enum {
 	HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES			= 0x00000001,
 	HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION		= 0x40000000,
 	HV_CPU_ID_FUNCTION_HV_INTERFACE				= 0x40000001,
 	/*
 	 * The remaining functions depend on the value
 	 * of hv_cpu_id_function_interface
 	 */
 	HV_CPU_ID_FUNCTION_MS_HV_VERSION			= 0x40000002,
 	HV_CPU_ID_FUNCTION_MS_HV_FEATURES			= 0x40000003,
 	HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION	= 0x40000004,
 	HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS		= 0x40000005
 
 } hv_vmbus_cpuid_function;
 
 /*
  * Define the format of the SIMP register
  */
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint64_t simp_enabled	: 1;
 		uint64_t preserved	: 11;
 		uint64_t base_simp_gpa	: 52;
 	} u;
 } hv_vmbus_synic_simp;
 
 /*
  * Define the format of the SIEFP register
  */
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint64_t siefp_enabled	: 1;
 		uint64_t preserved	: 11;
 		uint64_t base_siefp_gpa	: 52;
 	} u;
 } hv_vmbus_synic_siefp;
 
 /*
  * Define synthetic interrupt source
  */
 typedef union {
 	uint64_t as_uint64_t;
 	struct {
 		uint64_t vector		: 8;
 		uint64_t reserved1	: 8;
 		uint64_t masked		: 1;
 		uint64_t auto_eoi	: 1;
 		uint64_t reserved2	: 46;
 	} u;
 } hv_vmbus_synic_sint;
 
 /*
  * Timer configuration register.
  */
 union hv_timer_config {
 	uint64_t as_uint64;
 	struct {
 		uint64_t enable:1;
 		uint64_t periodic:1;
 		uint64_t lazy:1;
 		uint64_t auto_enable:1;
 		uint64_t reserved_z0:12;
 		uint64_t sintx:4;
 		uint64_t reserved_z1:44;
 	};
 };
 
 /*
  * Define syn_ic control register
  */
 typedef union _hv_vmbus_synic_scontrol {
     uint64_t as_uint64_t;
     struct {
         uint64_t enable		: 1;
         uint64_t reserved	: 63;
     } u;
 } hv_vmbus_synic_scontrol;
 
 /*
  *  Define the hv_vmbus_post_message hypercall input structure
  */
 typedef struct {
 	hv_vmbus_connection_id	connection_id;
 	uint32_t		reserved;
 	hv_vmbus_msg_type	message_type;
 	uint32_t		payload_size;
 	uint64_t		payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
 } hv_vmbus_input_post_message;
 
 /*
  * Define the synthetic interrupt controller event flags format
  */
 typedef union {
 	uint8_t		flags8[HV_EVENT_FLAGS_BYTE_COUNT];
 	uint32_t	flags32[HV_EVENT_FLAGS_DWORD_COUNT];
 } hv_vmbus_synic_event_flags;
 
 #define HV_X64_CPUID_MIN	(0x40000005)
 #define HV_X64_CPUID_MAX	(0x4000ffff)
 
 /*
  * Declare the MSR used to identify the guest OS
  */
 #define HV_X64_MSR_GUEST_OS_ID	(0x40000000)
 /*
  *  Declare the MSR used to setup pages used to communicate with the hypervisor
  */
 #define HV_X64_MSR_HYPERCALL	(0x40000001)
 /* MSR used to provide vcpu index */
 #define	HV_X64_MSR_VP_INDEX	(0x40000002)
 
 #define HV_X64_MSR_TIME_REF_COUNT      (0x40000020)
 
 /*
  * Define synthetic interrupt controller model specific registers
  */
 #define HV_X64_MSR_SCONTROL   (0x40000080)
 #define HV_X64_MSR_SVERSION   (0x40000081)
 #define HV_X64_MSR_SIEFP      (0x40000082)
 #define HV_X64_MSR_SIMP       (0x40000083)
 #define HV_X64_MSR_EOM        (0x40000084)
 
 #define HV_X64_MSR_SINT0      (0x40000090)
 #define HV_X64_MSR_SINT1      (0x40000091)
 #define HV_X64_MSR_SINT2      (0x40000092)
 #define HV_X64_MSR_SINT3      (0x40000093)
 #define HV_X64_MSR_SINT4      (0x40000094)
 #define HV_X64_MSR_SINT5      (0x40000095)
 #define HV_X64_MSR_SINT6      (0x40000096)
 #define HV_X64_MSR_SINT7      (0x40000097)
 #define HV_X64_MSR_SINT8      (0x40000098)
 #define HV_X64_MSR_SINT9      (0x40000099)
 #define HV_X64_MSR_SINT10     (0x4000009A)
 #define HV_X64_MSR_SINT11     (0x4000009B)
 #define HV_X64_MSR_SINT12     (0x4000009C)
 #define HV_X64_MSR_SINT13     (0x4000009D)
 #define HV_X64_MSR_SINT14     (0x4000009E)
 #define HV_X64_MSR_SINT15     (0x4000009F)
 
 /*
  * Synthetic Timer MSRs. Four timers per vcpu.
  */
 #define HV_X64_MSR_STIMER0_CONFIG		0x400000B0
 #define HV_X64_MSR_STIMER0_COUNT		0x400000B1
 #define HV_X64_MSR_STIMER1_CONFIG		0x400000B2
 #define HV_X64_MSR_STIMER1_COUNT		0x400000B3
 #define HV_X64_MSR_STIMER2_CONFIG		0x400000B4
 #define HV_X64_MSR_STIMER2_COUNT		0x400000B5
 #define HV_X64_MSR_STIMER3_CONFIG		0x400000B6
 #define HV_X64_MSR_STIMER3_COUNT		0x400000B7
 
 /*
  * Declare the various hypercall operations
  */
 typedef enum {
 	HV_CALL_POST_MESSAGE	= 0x005c,
 	HV_CALL_SIGNAL_EVENT	= 0x005d,
 } hv_vmbus_call_code;
 
 /**
  * Global variables
  */
 
 extern hv_vmbus_context		hv_vmbus_g_context;
 extern hv_vmbus_connection	hv_vmbus_g_connection;
 
 typedef void (*vmbus_msg_handler)(hv_vmbus_channel_msg_header *msg);
 
 typedef struct hv_vmbus_channel_msg_table_entry {
 	hv_vmbus_channel_msg_type    messageType;
 
 	vmbus_msg_handler   messageHandler;
 } hv_vmbus_channel_msg_table_entry;
 
 extern hv_vmbus_channel_msg_table_entry	g_channel_message_table[];
 
 /*
  * Private, VM Bus functions
  */
+struct sysctl_ctx_list;
+struct sysctl_oid_list;
+
+void			hv_ring_buffer_stat(
+				struct sysctl_ctx_list		*ctx,
+				struct sysctl_oid_list		*tree_node,
+				hv_vmbus_ring_buffer_info	*rbi,
+				const char			*desc);
 
 int			hv_vmbus_ring_buffer_init(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				void				*buffer,
 				uint32_t			buffer_len);
 
 void			hv_ring_buffer_cleanup(
 				hv_vmbus_ring_buffer_info	*ring_info);
 
 int			hv_ring_buffer_write(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				hv_vmbus_sg_buffer_list		sg_buffers[],
 				uint32_t			sg_buff_count,
 				boolean_t			*need_sig);
 
 int			hv_ring_buffer_peek(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				void				*buffer,
 				uint32_t			buffer_len);
 
 int			hv_ring_buffer_read(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				void				*buffer,
 				uint32_t			buffer_len,
 				uint32_t			offset);
 
 uint32_t		hv_vmbus_get_ring_buffer_interrupt_mask(
 				hv_vmbus_ring_buffer_info	*ring_info);
 
 void			hv_vmbus_dump_ring_info(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				char				*prefix);
 
 void			hv_ring_buffer_read_begin(
 				hv_vmbus_ring_buffer_info	*ring_info);
 
 uint32_t		hv_ring_buffer_read_end(
 				hv_vmbus_ring_buffer_info	*ring_info);
 
 hv_vmbus_channel*	hv_vmbus_allocate_channel(void);
 void			hv_vmbus_free_vmbus_channel(hv_vmbus_channel *channel);
 int			hv_vmbus_request_channel_offers(void);
 void			hv_vmbus_release_unattached_channels(void);
 int			hv_vmbus_init(void);
 void			hv_vmbus_cleanup(void);
 
 uint16_t		hv_vmbus_post_msg_via_msg_ipc(
 				hv_vmbus_connection_id	connection_id,
 				hv_vmbus_msg_type	message_type,
 				void			*payload,
 				size_t			payload_size);
 
 uint16_t		hv_vmbus_signal_event(void *con_id);
 void			hv_vmbus_synic_init(void *irq_arg);
 void			hv_vmbus_synic_cleanup(void *arg);
 int			hv_vmbus_query_hypervisor_presence(void);
 
 struct hv_device*	hv_vmbus_child_device_create(
 				hv_guid			device_type,
 				hv_guid			device_instance,
 				hv_vmbus_channel	*channel);
 
 int			hv_vmbus_child_device_register(
 					struct hv_device *child_dev);
 int			hv_vmbus_child_device_unregister(
 					struct hv_device *child_dev);
 
 /**
  * Connection interfaces
  */
 int			hv_vmbus_connect(void);
 int			hv_vmbus_disconnect(void);
 int			hv_vmbus_post_message(void *buffer, size_t buf_size);
 int			hv_vmbus_set_event(hv_vmbus_channel *channel);
 void			hv_vmbus_on_events(int cpu);
 
 /**
  * Event Timer interfaces
  */
 void			hv_et_init(void);
 void			hv_et_intr(struct trapframe*);
 
 /*
  * The guest OS needs to register the guest ID with the hypervisor.
  * The guest ID is a 64 bit entity and the structure of this ID is
  * specified in the Hyper-V specification:
  *
  * http://msdn.microsoft.com/en-us/library/windows/
  * hardware/ff542653%28v=vs.85%29.aspx
  *
  * While the current guideline does not specify how FreeBSD guest ID(s)
  * need to be generated, our plan is to publish the guidelines for
  * FreeBSD and other guest operating systems that currently are hosted
  * on Hyper-V. The implementation here conforms to this yet
  * unpublished guidelines.
  *
  * Bit(s)
  * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
  * 62:56 - Os Type; Linux is 0x100, FreeBSD is 0x200
  * 55:48 - Distro specific identification
  * 47:16 - FreeBSD kernel version number
  * 15:0  - Distro specific identification
  *
  */
 
 #define HV_FREEBSD_VENDOR_ID	0x8200
 #define HV_FREEBSD_GUEST_ID	hv_generate_guest_id(0,0)
 
 static inline  uint64_t hv_generate_guest_id(
 	uint8_t distro_id_part1,
 	uint16_t distro_id_part2)
 {
 	uint64_t guest_id;
 	guest_id =  (((uint64_t)HV_FREEBSD_VENDOR_ID) << 48);
 	guest_id |= (((uint64_t)(distro_id_part1)) << 48);
 	guest_id |= (((uint64_t)(__FreeBSD_version)) << 16); /* in param.h */
 	guest_id |= ((uint64_t)(distro_id_part2));
 	return guest_id;
 }
 
 typedef struct {
 	unsigned int	vector;
 	void		*page_buffers[2 * MAXCPU];
 } hv_setup_args;
 
 #endif  /* __HYPERV_PRIV_H__ */