Index: head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c (revision 294552) +++ head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c (revision 294553) @@ -1,839 +1,848 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "hv_vmbus_priv.h" /* * Internal functions */ static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr); /** * Channel message dispatch table */ hv_vmbus_channel_msg_table_entry g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = { { HV_CHANNEL_MESSAGE_INVALID, 0, NULL }, { HV_CHANNEL_MESSAGE_OFFER_CHANNEL, 0, vmbus_channel_on_offer }, { HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER, 0, vmbus_channel_on_offer_rescind }, { HV_CHANNEL_MESSAGE_REQUEST_OFFERS, 0, NULL }, { HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED, 1, vmbus_channel_on_offers_delivered }, { HV_CHANNEL_MESSAGE_OPEN_CHANNEL, 0, NULL }, { HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT, 1, vmbus_channel_on_open_result }, { HV_CHANNEL_MESSAGE_CLOSE_CHANNEL, 0, NULL }, { HV_CHANNEL_MESSAGEL_GPADL_HEADER, 0, NULL }, { HV_CHANNEL_MESSAGE_GPADL_BODY, 0, NULL }, { HV_CHANNEL_MESSAGE_GPADL_CREATED, 1, vmbus_channel_on_gpadl_created }, { HV_CHANNEL_MESSAGE_GPADL_TEARDOWN, 0, NULL }, { HV_CHANNEL_MESSAGE_GPADL_TORNDOWN, 1, vmbus_channel_on_gpadl_torndown }, { HV_CHANNEL_MESSAGE_REL_ID_RELEASED, 0, NULL }, { HV_CHANNEL_MESSAGE_INITIATED_CONTACT, 0, NULL }, { HV_CHANNEL_MESSAGE_VERSION_RESPONSE, 1, vmbus_channel_on_version_response }, { HV_CHANNEL_MESSAGE_UNLOAD, 0, NULL } }; /** * Implementation of the work abstraction. */ static void work_item_callback(void *work, int pending) { struct hv_work_item *w = (struct hv_work_item *)work; /* * Serialize work execution. */ if (w->wq->work_sema != NULL) { sema_wait(w->wq->work_sema); } w->callback(w->context); if (w->wq->work_sema != NULL) { sema_post(w->wq->work_sema); } free(w, M_DEVBUF); } struct hv_work_queue* hv_work_queue_create(char* name) { static unsigned int qid = 0; char qname[64]; int pri; struct hv_work_queue* wq; wq = malloc(sizeof(struct hv_work_queue), M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(wq != NULL, ("Error VMBUS: Failed to allocate work_queue\n")); if (wq == NULL) return (NULL); /* * We use work abstraction to handle messages * coming from the host and these are typically offers. * Some FreeBsd drivers appear to have a concurrency issue * where probe/attach needs to be serialized. We ensure that * by having only one thread process work elements in a * specific queue by serializing work execution. * */ if (strcmp(name, "vmbusQ") == 0) { pri = PI_DISK; } else { /* control */ pri = PI_NET; /* * Initialize semaphore for this queue by pointing * to the globale semaphore used for synchronizing all * control messages. */ wq->work_sema = &hv_vmbus_g_connection.control_sema; } sprintf(qname, "hv_%s_%u", name, qid); /* * Fixme: FreeBSD 8.2 has a different prototype for * taskqueue_create(), and for certain other taskqueue functions. * We need to research the implications of these changes. * Fixme: Not sure when the changes were introduced. */ wq->queue = taskqueue_create(qname, M_NOWAIT, taskqueue_thread_enqueue, &wq->queue #if __FreeBSD_version < 800000 , &wq->proc #endif ); if (wq->queue == NULL) { free(wq, M_DEVBUF); return (NULL); } if (taskqueue_start_threads(&wq->queue, 1, pri, "%s taskq", qname)) { taskqueue_free(wq->queue); free(wq, M_DEVBUF); return (NULL); } qid++; return (wq); } void hv_work_queue_close(struct hv_work_queue *wq) { /* * KYS: Need to drain the taskqueue * before we close the hv_work_queue. */ /*KYS: taskqueue_drain(wq->tq, ); */ taskqueue_free(wq->queue); free(wq, M_DEVBUF); } /** * @brief Create work item */ int hv_queue_work_item( struct hv_work_queue *wq, void (*callback)(void *), void *context) { struct hv_work_item *w = malloc(sizeof(struct hv_work_item), M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(w != NULL, ("Error VMBUS: Failed to allocate WorkItem\n")); if (w == NULL) return (ENOMEM); w->callback = callback; w->context = context; w->wq = wq; TASK_INIT(&w->work, 0, work_item_callback, w); return (taskqueue_enqueue(wq->queue, &w->work)); } /** * @brief Allocate and initialize a vmbus channel object */ hv_vmbus_channel* hv_vmbus_allocate_channel(void) { hv_vmbus_channel* channel; channel = (hv_vmbus_channel*) malloc( sizeof(hv_vmbus_channel), M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(channel != NULL, ("Error VMBUS: Failed to allocate channel!")); if (channel == NULL) return (NULL); mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF); mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF); TAILQ_INIT(&channel->sc_list_anchor); return (channel); } /** * @brief Release the vmbus channel object itself */ static inline void ReleaseVmbusChannel(void *context) { hv_vmbus_channel* channel = (hv_vmbus_channel*) context; free(channel, M_DEVBUF); } /** * @brief Release the resources used by the vmbus channel object */ void hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel) { mtx_destroy(&channel->sc_lock); mtx_destroy(&channel->inbound_lock); /* * We have to release the channel's workqueue/thread in * the vmbus's workqueue/thread context * ie we can't destroy ourselves */ hv_queue_work_item(hv_vmbus_g_connection.work_queue, ReleaseVmbusChannel, (void *) channel); } /** * @brief Process the offer by creating a channel/device * associated with this offer */ static void vmbus_channel_process_offer(hv_vmbus_channel *new_channel) { boolean_t f_new; hv_vmbus_channel* channel; int ret; + uint32_t relid; f_new = TRUE; channel = NULL; - + relid = new_channel->offer_msg.child_rel_id; /* * Make sure this is a new offer */ mtx_lock(&hv_vmbus_g_connection.channel_lock); + hv_vmbus_g_connection.channels[relid] = new_channel; TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor, list_entry) { if (memcmp(&channel->offer_msg.offer.interface_type, &new_channel->offer_msg.offer.interface_type, sizeof(hv_guid)) == 0 && memcmp(&channel->offer_msg.offer.interface_instance, &new_channel->offer_msg.offer.interface_instance, sizeof(hv_guid)) == 0) { f_new = FALSE; break; } } if (f_new) { /* Insert at tail */ TAILQ_INSERT_TAIL( &hv_vmbus_g_connection.channel_anchor, new_channel, list_entry); } mtx_unlock(&hv_vmbus_g_connection.channel_lock); /*XXX add new channel to percpu_list */ if (!f_new) { /* * Check if this is a sub channel. */ if (new_channel->offer_msg.offer.sub_channel_index != 0) { /* * It is a sub channel offer, process it. */ new_channel->primary_channel = channel; mtx_lock(&channel->sc_lock); TAILQ_INSERT_TAIL( &channel->sc_list_anchor, new_channel, sc_list_entry); mtx_unlock(&channel->sc_lock); /* Insert new channel into channel_anchor. */ - printf("Storvsc get multi-channel offer, rel=%u.\n", - new_channel->offer_msg.child_rel_id); + printf("VMBUS get multi-channel offer, rel=%u,sub=%u\n", + new_channel->offer_msg.child_rel_id, + new_channel->offer_msg.offer.sub_channel_index); mtx_lock(&hv_vmbus_g_connection.channel_lock); TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor, new_channel, list_entry); mtx_unlock(&hv_vmbus_g_connection.channel_lock); if(bootverbose) - printf("VMBUS: new multi-channel offer <%p>.\n", - new_channel); + printf("VMBUS: new multi-channel offer <%p>, " + "its primary channel is <%p>.\n", + new_channel, new_channel->primary_channel); /*XXX add it to percpu_list */ new_channel->state = HV_CHANNEL_OPEN_STATE; if (channel->sc_creation_callback != NULL) { channel->sc_creation_callback(new_channel); } return; } hv_vmbus_free_vmbus_channel(new_channel); return; } new_channel->state = HV_CHANNEL_OPEN_STATE; /* * Start the process of binding this offer to the driver * (We need to set the device field before calling * hv_vmbus_child_device_add()) */ new_channel->device = hv_vmbus_child_device_create( new_channel->offer_msg.offer.interface_type, new_channel->offer_msg.offer.interface_instance, new_channel); /* * Add the new device to the bus. This will kick off device-driver * binding which eventually invokes the device driver's AddDevice() * method. */ ret = hv_vmbus_child_device_register(new_channel->device); if (ret != 0) { mtx_lock(&hv_vmbus_g_connection.channel_lock); TAILQ_REMOVE( &hv_vmbus_g_connection.channel_anchor, new_channel, list_entry); mtx_unlock(&hv_vmbus_g_connection.channel_lock); hv_vmbus_free_vmbus_channel(new_channel); } } /** * Array of device guids that are performance critical. We try to distribute * the interrupt load for these devices across all online cpus. */ static const hv_guid high_perf_devices[] = { {HV_NIC_GUID, }, {HV_IDE_GUID, }, {HV_SCSI_GUID, }, }; enum { PERF_CHN_NIC = 0, PERF_CHN_IDE, PERF_CHN_SCSI, MAX_PERF_CHN, }; /* * We use this static number to distribute the channel interrupt load. */ static uint32_t next_vcpu; /** * Starting with Win8, we can statically distribute the incoming * channel interrupt load by binding a channel to VCPU. We * implement here a simple round robin scheme for distributing * the interrupt load. * We will bind channels that are not performance critical to cpu 0 and * performance critical channels (IDE, SCSI and Network) will be uniformly * distributed across all available CPUs. */ static void vmbus_channel_select_cpu(hv_vmbus_channel *channel, hv_guid *guid) { uint32_t current_cpu; int i; boolean_t is_perf_channel = FALSE; for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) { if (memcmp(guid->data, high_perf_devices[i].data, sizeof(hv_guid)) == 0) { is_perf_channel = TRUE; break; } } if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) || (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) || (!is_perf_channel)) { /* Host's view of guest cpu */ channel->target_vcpu = 0; /* Guest's own view of cpu */ channel->target_cpu = 0; return; } /* mp_ncpus should have the number cpus currently online */ current_cpu = (++next_vcpu % mp_ncpus); channel->target_cpu = current_cpu; channel->target_vcpu = hv_vmbus_g_context.hv_vcpu_index[current_cpu]; if (bootverbose) printf("VMBUS: Total online cpus %d, assign perf channel %d " "to vcpu %d, cpu %d\n", mp_ncpus, i, channel->target_vcpu, current_cpu); } /** * @brief Handler for channel offers from Hyper-V/Azure * * Handler for channel offers from vmbus in parent partition. We ignore * all offers except network and storage offers. For each network and storage * offers, we create a channel object and queue a work item to the channel * object to process the offer synchronously */ static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_offer_channel* offer; hv_vmbus_channel* new_channel; offer = (hv_vmbus_channel_offer_channel*) hdr; hv_guid *guidType; hv_guid *guidInstance; guidType = &offer->offer.interface_type; guidInstance = &offer->offer.interface_instance; /* Allocate the channel object and save this offer */ new_channel = hv_vmbus_allocate_channel(); if (new_channel == NULL) return; /* * By default we setup state to enable batched * reading. A specific service can choose to * disable this prior to opening the channel. */ new_channel->batched_reading = TRUE; new_channel->signal_event_param = (hv_vmbus_input_signal_event *) (HV_ALIGN_UP((unsigned long) &new_channel->signal_event_buffer, HV_HYPERCALL_PARAM_ALIGN)); new_channel->signal_event_param->connection_id.as_uint32_t = 0; new_channel->signal_event_param->connection_id.u.id = HV_VMBUS_EVENT_CONNECTION_ID; new_channel->signal_event_param->flag_number = 0; new_channel->signal_event_param->rsvd_z = 0; if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) { new_channel->is_dedicated_interrupt = (offer->is_dedicated_interrupt != 0); new_channel->signal_event_param->connection_id.u.id = offer->connection_id; } /* * Bind the channel to a chosen cpu. */ vmbus_channel_select_cpu(new_channel, &offer->offer.interface_type); memcpy(&new_channel->offer_msg, offer, sizeof(hv_vmbus_channel_offer_channel)); new_channel->monitor_group = (uint8_t) offer->monitor_id / 32; new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32; vmbus_channel_process_offer(new_channel); } /** * @brief Rescind offer handler. * * We queue a work item to process this offer * synchronously */ static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_rescind_offer* rescind; hv_vmbus_channel* channel; rescind = (hv_vmbus_channel_rescind_offer*) hdr; - channel = hv_vmbus_get_channel_from_rel_id(rescind->child_rel_id); + channel = hv_vmbus_g_connection.channels[rescind->child_rel_id]; if (channel == NULL) return; hv_vmbus_child_device_unregister(channel->device); + mtx_lock(&hv_vmbus_g_connection.channel_lock); + hv_vmbus_g_connection.channels[rescind->child_rel_id] = NULL; + mtx_unlock(&hv_vmbus_g_connection.channel_lock); } /** * * @brief Invoked when all offers have been delivered. */ static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr) { } /** * @brief Open result handler. * * This is invoked when we received a response * to our channel open request. Find the matching request, copy the * response and signal the requesting thread. */ static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_open_result* result; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* requestHeader; hv_vmbus_channel_open_channel* openMsg; result = (hv_vmbus_channel_open_result*) hdr; /* * Find the open msg, copy the result and signal/unblock the wait event */ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, msg_list_entry) { requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg; if (requestHeader->message_type == HV_CHANNEL_MESSAGE_OPEN_CHANNEL) { openMsg = (hv_vmbus_channel_open_channel*) msg_info->msg; if (openMsg->child_rel_id == result->child_rel_id && openMsg->open_id == result->open_id) { memcpy(&msg_info->response.open_result, result, sizeof(hv_vmbus_channel_open_result)); sema_post(&msg_info->wait_sema); break; } } } mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); } /** * @brief GPADL created handler. * * This is invoked when we received a response * to our gpadl create request. Find the matching request, copy the * response and signal the requesting thread. */ static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_gpadl_created* gpadl_created; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* request_header; hv_vmbus_channel_gpadl_header* gpadl_header; gpadl_created = (hv_vmbus_channel_gpadl_created*) hdr; /* Find the establish msg, copy the result and signal/unblock * the wait event */ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, msg_list_entry) { request_header = (hv_vmbus_channel_msg_header*) msg_info->msg; if (request_header->message_type == HV_CHANNEL_MESSAGEL_GPADL_HEADER) { gpadl_header = (hv_vmbus_channel_gpadl_header*) request_header; if ((gpadl_created->child_rel_id == gpadl_header->child_rel_id) && (gpadl_created->gpadl == gpadl_header->gpadl)) { memcpy(&msg_info->response.gpadl_created, gpadl_created, sizeof(hv_vmbus_channel_gpadl_created)); sema_post(&msg_info->wait_sema); break; } } } mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); } /** * @brief GPADL torndown handler. * * This is invoked when we received a respons * to our gpadl teardown request. Find the matching request, copy the * response and signal the requesting thread */ static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_gpadl_torndown* gpadl_torndown; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* requestHeader; hv_vmbus_channel_gpadl_teardown* gpadlTeardown; gpadl_torndown = (hv_vmbus_channel_gpadl_torndown*)hdr; /* * Find the open msg, copy the result and signal/unblock the * wait event. */ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, msg_list_entry) { requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg; if (requestHeader->message_type == HV_CHANNEL_MESSAGE_GPADL_TEARDOWN) { gpadlTeardown = (hv_vmbus_channel_gpadl_teardown*) requestHeader; if (gpadl_torndown->gpadl == gpadlTeardown->gpadl) { memcpy(&msg_info->response.gpadl_torndown, gpadl_torndown, sizeof(hv_vmbus_channel_gpadl_torndown)); sema_post(&msg_info->wait_sema); break; } } } mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); } /** * @brief Version response handler. * * This is invoked when we received a response * to our initiate contact request. Find the matching request, copy th * response and signal the requesting thread. */ static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* requestHeader; hv_vmbus_channel_initiate_contact* initiate; hv_vmbus_channel_version_response* versionResponse; versionResponse = (hv_vmbus_channel_version_response*)hdr; mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, msg_list_entry) { requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg; if (requestHeader->message_type == HV_CHANNEL_MESSAGE_INITIATED_CONTACT) { initiate = (hv_vmbus_channel_initiate_contact*) requestHeader; memcpy(&msg_info->response.version_response, versionResponse, sizeof(hv_vmbus_channel_version_response)); sema_post(&msg_info->wait_sema); } } mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); } /** * @brief Handler for channel protocol messages. * * This is invoked in the vmbus worker thread context. */ void hv_vmbus_on_channel_message(void *context) { hv_vmbus_message* msg; hv_vmbus_channel_msg_header* hdr; int size; msg = (hv_vmbus_message*) context; hdr = (hv_vmbus_channel_msg_header*) msg->u.payload; size = msg->header.payload_size; if (hdr->message_type >= HV_CHANNEL_MESSAGE_COUNT) { free(msg, M_DEVBUF); return; } if (g_channel_message_table[hdr->message_type].messageHandler) { g_channel_message_table[hdr->message_type].messageHandler(hdr); } /* Free the msg that was allocated in VmbusOnMsgDPC() */ free(msg, M_DEVBUF); } /** * @brief Send a request to get all our pending offers. */ int hv_vmbus_request_channel_offers(void) { int ret; hv_vmbus_channel_msg_header* msg; hv_vmbus_channel_msg_info* msg_info; msg_info = (hv_vmbus_channel_msg_info *) malloc(sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_msg_header), M_DEVBUF, M_NOWAIT); if (msg_info == NULL) { if(bootverbose) printf("Error VMBUS: malloc failed for Request Offers\n"); return (ENOMEM); } msg = (hv_vmbus_channel_msg_header*) msg_info->msg; msg->message_type = HV_CHANNEL_MESSAGE_REQUEST_OFFERS; ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_msg_header)); if (msg_info) free(msg_info, M_DEVBUF); return (ret); } /** * @brief Release channels that are unattached/unconnected (i.e., no drivers associated) */ void hv_vmbus_release_unattached_channels(void) { hv_vmbus_channel *channel; mtx_lock(&hv_vmbus_g_connection.channel_lock); while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) { channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor); TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor, channel, list_entry); hv_vmbus_child_device_unregister(channel->device); hv_vmbus_free_vmbus_channel(channel); } + bzero(hv_vmbus_g_connection.channels, + sizeof(hv_vmbus_channel*) * HV_CHANNEL_MAX_COUNT); mtx_unlock(&hv_vmbus_g_connection.channel_lock); } /** * @brief Select the best outgoing channel * * The channel whose vcpu binding is closest to the currect vcpu will * be selected. * If no multi-channel, always select primary channel * * @param primary - primary channel */ struct hv_vmbus_channel * vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary) { hv_vmbus_channel *new_channel = NULL; hv_vmbus_channel *outgoing_channel = primary; int old_cpu_distance = 0; int new_cpu_distance = 0; int cur_vcpu = 0; int smp_pro_id = PCPU_GET(cpuid); if (TAILQ_EMPTY(&primary->sc_list_anchor)) { return outgoing_channel; } if (smp_pro_id >= MAXCPU) { return outgoing_channel; } cur_vcpu = hv_vmbus_g_context.hv_vcpu_index[smp_pro_id]; TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) { if (new_channel->state != HV_CHANNEL_OPENED_STATE){ continue; } if (new_channel->target_vcpu == cur_vcpu){ return new_channel; } old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ? (outgoing_channel->target_vcpu - cur_vcpu) : (cur_vcpu - outgoing_channel->target_vcpu)); new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ? (new_channel->target_vcpu - cur_vcpu) : (cur_vcpu - new_channel->target_vcpu)); if (old_cpu_distance < new_cpu_distance) { continue; } outgoing_channel = new_channel; } return(outgoing_channel); } Index: head/sys/dev/hyperv/vmbus/hv_connection.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_connection.c (revision 294552) +++ head/sys/dev/hyperv/vmbus/hv_connection.c (revision 294553) @@ -1,542 +1,518 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include "hv_vmbus_priv.h" /* * Globals */ hv_vmbus_connection hv_vmbus_g_connection = { .connect_state = HV_DISCONNECTED, .next_gpadl_handle = 0xE1E10, }; uint32_t hv_vmbus_protocal_version = HV_VMBUS_VERSION_WS2008; static uint32_t hv_vmbus_get_next_version(uint32_t current_ver) { switch (current_ver) { case (HV_VMBUS_VERSION_WIN7): return(HV_VMBUS_VERSION_WS2008); case (HV_VMBUS_VERSION_WIN8): return(HV_VMBUS_VERSION_WIN7); case (HV_VMBUS_VERSION_WIN8_1): return(HV_VMBUS_VERSION_WIN8); case (HV_VMBUS_VERSION_WS2008): default: return(HV_VMBUS_VERSION_INVALID); } } /** * Negotiate the highest supported hypervisor version. */ static int hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info, uint32_t version) { int ret = 0; hv_vmbus_channel_initiate_contact *msg; sema_init(&msg_info->wait_sema, 0, "Msg Info Sema"); msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg; msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT; msg->vmbus_version_requested = version; msg->interrupt_page = hv_get_phys_addr( hv_vmbus_g_connection.interrupt_page); msg->monitor_page_1 = hv_get_phys_addr( hv_vmbus_g_connection.monitor_pages); msg->monitor_page_2 = hv_get_phys_addr( ((uint8_t *) hv_vmbus_g_connection.monitor_pages + PAGE_SIZE)); /** * Add to list before we send the request since we may receive the * response before returning from this routine */ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_INSERT_TAIL( &hv_vmbus_g_connection.channel_msg_anchor, msg_info, msg_list_entry); mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); ret = hv_vmbus_post_message( msg, sizeof(hv_vmbus_channel_initiate_contact)); if (ret != 0) { mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_REMOVE( &hv_vmbus_g_connection.channel_msg_anchor, msg_info, msg_list_entry); mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); return (ret); } /** * Wait for the connection response */ ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds */ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_REMOVE( &hv_vmbus_g_connection.channel_msg_anchor, msg_info, msg_list_entry); mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); /** * Check if successful */ if (msg_info->response.version_response.version_supported) { hv_vmbus_g_connection.connect_state = HV_CONNECTED; } else { ret = ECONNREFUSED; } return (ret); } /** * Send a connect request on the partition service connection */ int hv_vmbus_connect(void) { int ret = 0; uint32_t version; hv_vmbus_channel_msg_info* msg_info = NULL; /** * Make sure we are not connecting or connected */ if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) { return (-1); } /** * Initialize the vmbus connection */ hv_vmbus_g_connection.connect_state = HV_CONNECTING; hv_vmbus_g_connection.work_queue = hv_work_queue_create("vmbusQ"); sema_init(&hv_vmbus_g_connection.control_sema, 1, "control_sema"); TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor); mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg", NULL, MTX_SPIN); TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor); mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel", NULL, MTX_DEF); /** * Setup the vmbus event connection for channel interrupt abstraction * stuff */ hv_vmbus_g_connection.interrupt_page = contigmalloc( PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); KASSERT(hv_vmbus_g_connection.interrupt_page != NULL, ("Error VMBUS: malloc failed to allocate Channel" " Request Event message!")); if (hv_vmbus_g_connection.interrupt_page == NULL) { ret = ENOMEM; goto cleanup; } hv_vmbus_g_connection.recv_interrupt_page = hv_vmbus_g_connection.interrupt_page; hv_vmbus_g_connection.send_interrupt_page = ((uint8_t *) hv_vmbus_g_connection.interrupt_page + (PAGE_SIZE >> 1)); /** * Set up the monitor notification facility. The 1st page for * parent->child and the 2nd page for child->parent */ hv_vmbus_g_connection.monitor_pages = contigmalloc( 2 * PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); KASSERT(hv_vmbus_g_connection.monitor_pages != NULL, ("Error VMBUS: malloc failed to allocate Monitor Pages!")); if (hv_vmbus_g_connection.monitor_pages == NULL) { ret = ENOMEM; goto cleanup; } msg_info = (hv_vmbus_channel_msg_info*) malloc(sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_initiate_contact), M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(msg_info != NULL, ("Error VMBUS: malloc failed for Initiate Contact message!")); if (msg_info == NULL) { ret = ENOMEM; goto cleanup; } + hv_vmbus_g_connection.channels = malloc(sizeof(hv_vmbus_channel*) * + HV_CHANNEL_MAX_COUNT, + M_DEVBUF, M_WAITOK | M_ZERO); /* * Find the highest vmbus version number we can support. */ version = HV_VMBUS_VERSION_CURRENT; do { ret = hv_vmbus_negotiate_version(msg_info, version); if (ret == EWOULDBLOCK) { /* * We timed out. */ goto cleanup; } if (hv_vmbus_g_connection.connect_state == HV_CONNECTED) break; version = hv_vmbus_get_next_version(version); } while (version != HV_VMBUS_VERSION_INVALID); hv_vmbus_protocal_version = version; if (bootverbose) printf("VMBUS: Protocol Version: %d.%d\n", version >> 16, version & 0xFFFF); sema_destroy(&msg_info->wait_sema); free(msg_info, M_DEVBUF); return (0); /* * Cleanup after failure! */ cleanup: hv_vmbus_g_connection.connect_state = HV_DISCONNECTED; hv_work_queue_close(hv_vmbus_g_connection.work_queue); sema_destroy(&hv_vmbus_g_connection.control_sema); mtx_destroy(&hv_vmbus_g_connection.channel_lock); mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock); if (hv_vmbus_g_connection.interrupt_page != NULL) { contigfree( hv_vmbus_g_connection.interrupt_page, PAGE_SIZE, M_DEVBUF); hv_vmbus_g_connection.interrupt_page = NULL; } if (hv_vmbus_g_connection.monitor_pages != NULL) { contigfree( hv_vmbus_g_connection.monitor_pages, 2 * PAGE_SIZE, M_DEVBUF); hv_vmbus_g_connection.monitor_pages = NULL; } if (msg_info) { sema_destroy(&msg_info->wait_sema); free(msg_info, M_DEVBUF); } + free(hv_vmbus_g_connection.channels, M_DEVBUF); return (ret); } /** * Send a disconnect request on the partition service connection */ int hv_vmbus_disconnect(void) { int ret = 0; hv_vmbus_channel_unload* msg; msg = malloc(sizeof(hv_vmbus_channel_unload), M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(msg != NULL, ("Error VMBUS: malloc failed to allocate Channel Unload Msg!")); if (msg == NULL) return (ENOMEM); msg->message_type = HV_CHANNEL_MESSAGE_UNLOAD; ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_unload)); contigfree(hv_vmbus_g_connection.interrupt_page, PAGE_SIZE, M_DEVBUF); mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock); hv_work_queue_close(hv_vmbus_g_connection.work_queue); sema_destroy(&hv_vmbus_g_connection.control_sema); + free(hv_vmbus_g_connection.channels, M_DEVBUF); hv_vmbus_g_connection.connect_state = HV_DISCONNECTED; free(msg, M_DEVBUF); return (ret); } /** - * Get the channel object given its child relative id (ie channel id) - */ -hv_vmbus_channel* -hv_vmbus_get_channel_from_rel_id(uint32_t rel_id) { - - hv_vmbus_channel* channel; - hv_vmbus_channel* foundChannel = NULL; - - /* - * TODO: - * Consider optimization where relids are stored in a fixed size array - * and channels are accessed without the need to take this lock or search - * the list. - */ - mtx_lock(&hv_vmbus_g_connection.channel_lock); - TAILQ_FOREACH(channel, - &hv_vmbus_g_connection.channel_anchor, list_entry) { - - if (channel->offer_msg.child_rel_id == rel_id) { - foundChannel = channel; - break; - } - } - mtx_unlock(&hv_vmbus_g_connection.channel_lock); - - return (foundChannel); -} - -/** * Process a channel event notification */ static void VmbusProcessChannelEvent(uint32_t relid) { void* arg; uint32_t bytes_to_read; hv_vmbus_channel* channel; boolean_t is_batched_reading; /** * Find the channel based on this relid and invokes * the channel callback to process the event */ - channel = hv_vmbus_get_channel_from_rel_id(relid); + channel = hv_vmbus_g_connection.channels[relid]; if (channel == NULL) { return; } /** * To deal with the race condition where we might * receive a packet while the relevant driver is * being unloaded, dispatch the callback while * holding the channel lock. The unloading driver * will acquire the same channel lock to set the * callback to NULL. This closes the window. */ /* * Disable the lock due to newly added WITNESS check in r277723. * Will seek other way to avoid race condition. * -- whu */ // mtx_lock(&channel->inbound_lock); if (channel->on_channel_callback != NULL) { arg = channel->channel_callback_context; is_batched_reading = channel->batched_reading; /* * Optimize host to guest signaling by ensuring: * 1. While reading the channel, we disable interrupts from * host. * 2. Ensure that we process all posted messages from the host * before returning from this callback. * 3. Once we return, enable signaling from the host. Once this * state is set we check to see if additional packets are * available to read. In this case we repeat the process. */ do { if (is_batched_reading) hv_ring_buffer_read_begin(&channel->inbound); channel->on_channel_callback(arg); if (is_batched_reading) bytes_to_read = hv_ring_buffer_read_end(&channel->inbound); else bytes_to_read = 0; } while (is_batched_reading && (bytes_to_read != 0)); } // mtx_unlock(&channel->inbound_lock); } /** * Handler for events */ void hv_vmbus_on_events(void *arg) { int bit; int cpu; int dword; void *page_addr; uint32_t* recv_interrupt_page = NULL; int rel_id; int maxdword; hv_vmbus_synic_event_flags *event; /* int maxdword = PAGE_SIZE >> 3; */ cpu = (int)(long)arg; KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: " "cpu out of range!")); if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) || (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) { maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5; /* * receive size is 1/2 page and divide that by 4 bytes */ recv_interrupt_page = hv_vmbus_g_connection.recv_interrupt_page; } else { /* * On Host with Win8 or above, the event page can be * checked directly to get the id of the channel * that has the pending interrupt. */ maxdword = HV_EVENT_FLAGS_DWORD_COUNT; page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu]; event = (hv_vmbus_synic_event_flags *) page_addr + HV_VMBUS_MESSAGE_SINT; recv_interrupt_page = event->flags32; } /* * Check events */ if (recv_interrupt_page != NULL) { for (dword = 0; dword < maxdword; dword++) { if (recv_interrupt_page[dword]) { - for (bit = 0; bit < 32; bit++) { + for (bit = 0; bit < HV_CHANNEL_DWORD_LEN; bit++) { if (synch_test_and_clear_bit(bit, (uint32_t *) &recv_interrupt_page[dword])) { rel_id = (dword << 5) + bit; if (rel_id == 0) { /* * Special case - * vmbus channel protocol msg. */ continue; } else { VmbusProcessChannelEvent(rel_id); } } } } } } return; } /** * Send a msg on the vmbus's message connection */ int hv_vmbus_post_message(void *buffer, size_t bufferLen) { int ret = 0; hv_vmbus_connection_id connId; unsigned retries = 0; /* NetScaler delays from previous code were consolidated here */ static int delayAmount[] = {100, 100, 100, 500, 500, 5000, 5000, 5000}; /* for(each entry in delayAmount) try to post message, * delay a little bit before retrying */ for (retries = 0; retries < sizeof(delayAmount)/sizeof(delayAmount[0]); retries++) { connId.as_uint32_t = 0; connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID; ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer, bufferLen); if (ret != HV_STATUS_INSUFFICIENT_BUFFERS) break; /* TODO: KYS We should use a blocking wait call */ DELAY(delayAmount[retries]); } KASSERT(ret == 0, ("Error VMBUS: Message Post Failed\n")); return (ret); } /** * Send an event notification to the parent */ int hv_vmbus_set_event(hv_vmbus_channel *channel) { int ret = 0; uint32_t child_rel_id = channel->offer_msg.child_rel_id; /* Each uint32_t represents 32 channels */ synch_set_bit(child_rel_id & 31, (((uint32_t *)hv_vmbus_g_connection.send_interrupt_page + (child_rel_id >> 5)))); ret = hv_vmbus_signal_event(channel->signal_event_param); return (ret); } Index: head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h =================================================================== --- head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h (revision 294552) +++ head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h (revision 294553) @@ -1,762 +1,771 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __HYPERV_PRIV_H__ #define __HYPERV_PRIV_H__ #include #include #include #include #include /* * Status codes for hypervisor operations. */ typedef uint16_t hv_vmbus_status; #define HV_MESSAGE_SIZE (256) #define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) #define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) #define HV_ANY_VP (0xFFFFFFFF) /* * Synthetic interrupt controller flag constants. */ #define HV_EVENT_FLAGS_COUNT (256 * 8) #define HV_EVENT_FLAGS_BYTE_COUNT (256) #define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(uint32_t)) +/** + * max channel count <== event_flags_dword_count * bit_of_dword + */ +#define HV_CHANNEL_DWORD_LEN (32) +#define HV_CHANNEL_MAX_COUNT \ + ((HV_EVENT_FLAGS_DWORD_COUNT) * HV_CHANNEL_DWORD_LEN) /* * MessageId: HV_STATUS_INSUFFICIENT_BUFFERS * MessageText: * You did not supply enough message buffers to send a message. */ #define HV_STATUS_INSUFFICIENT_BUFFERS ((uint16_t)0x0013) typedef void (*hv_vmbus_channel_callback)(void *context); typedef struct { void* data; uint32_t length; } hv_vmbus_sg_buffer_list; typedef struct { uint32_t current_interrupt_mask; uint32_t current_read_index; uint32_t current_write_index; uint32_t bytes_avail_to_read; uint32_t bytes_avail_to_write; } hv_vmbus_ring_buffer_debug_info; typedef struct { uint32_t rel_id; hv_vmbus_channel_state state; hv_guid interface_type; hv_guid interface_instance; uint32_t monitor_id; uint32_t server_monitor_pending; uint32_t server_monitor_latency; uint32_t server_monitor_connection_id; uint32_t client_monitor_pending; uint32_t client_monitor_latency; uint32_t client_monitor_connection_id; hv_vmbus_ring_buffer_debug_info inbound; hv_vmbus_ring_buffer_debug_info outbound; } hv_vmbus_channel_debug_info; typedef union { hv_vmbus_channel_version_supported version_supported; hv_vmbus_channel_open_result open_result; hv_vmbus_channel_gpadl_torndown gpadl_torndown; hv_vmbus_channel_gpadl_created gpadl_created; hv_vmbus_channel_version_response version_response; } hv_vmbus_channel_msg_response; /* * Represents each channel msg on the vmbus connection * This is a variable-size data structure depending on * the msg type itself */ typedef struct hv_vmbus_channel_msg_info { /* * Bookkeeping stuff */ TAILQ_ENTRY(hv_vmbus_channel_msg_info) msg_list_entry; /* * So far, this is only used to handle * gpadl body message */ TAILQ_HEAD(, hv_vmbus_channel_msg_info) sub_msg_list_anchor; /* * Synchronize the request/response if * needed. * KYS: Use a semaphore for now. * Not perf critical. */ struct sema wait_sema; hv_vmbus_channel_msg_response response; uint32_t message_size; /** * The channel message that goes out on * the "wire". It will contain at * minimum the * hv_vmbus_channel_msg_header * header. */ unsigned char msg[0]; } hv_vmbus_channel_msg_info; /* * The format must be the same as hv_vm_data_gpa_direct */ typedef struct hv_vmbus_channel_packet_page_buffer { uint16_t type; uint16_t data_offset8; uint16_t length8; uint16_t flags; uint64_t transaction_id; uint32_t reserved; uint32_t range_count; hv_vmbus_page_buffer range[HV_MAX_PAGE_BUFFER_COUNT]; } __packed hv_vmbus_channel_packet_page_buffer; /* * The format must be the same as hv_vm_data_gpa_direct */ typedef struct hv_vmbus_channel_packet_multipage_buffer { uint16_t type; uint16_t data_offset8; uint16_t length8; uint16_t flags; uint64_t transaction_id; uint32_t reserved; uint32_t range_count; /* Always 1 in this case */ hv_vmbus_multipage_buffer range; } __packed hv_vmbus_channel_packet_multipage_buffer; enum { HV_VMBUS_MESSAGE_CONNECTION_ID = 1, HV_VMBUS_MESSAGE_PORT_ID = 1, HV_VMBUS_EVENT_CONNECTION_ID = 2, HV_VMBUS_EVENT_PORT_ID = 2, HV_VMBUS_MONITOR_CONNECTION_ID = 3, HV_VMBUS_MONITOR_PORT_ID = 3, HV_VMBUS_MESSAGE_SINT = 2 }; #define HV_PRESENT_BIT 0x80000000 #define HV_HYPERCALL_PARAM_ALIGN sizeof(uint64_t) typedef struct { uint64_t guest_id; void* hypercall_page; hv_bool_uint8_t syn_ic_initialized; hv_vmbus_handle syn_ic_msg_page[MAXCPU]; hv_vmbus_handle syn_ic_event_page[MAXCPU]; /* * For FreeBSD cpuid to Hyper-V vcpuid mapping. */ uint32_t hv_vcpu_index[MAXCPU]; /* * Each cpu has its own software interrupt handler for channel * event and msg handling. */ struct intr_event *hv_event_intr_event[MAXCPU]; struct intr_event *hv_msg_intr_event[MAXCPU]; void *event_swintr[MAXCPU]; void *msg_swintr[MAXCPU]; /* * Host use this vector to intrrupt guest for vmbus channel * event and msg. */ unsigned int hv_cb_vector; } hv_vmbus_context; /* * Define hypervisor message types */ typedef enum { HV_MESSAGE_TYPE_NONE = 0x00000000, /* * Memory access messages */ HV_MESSAGE_TYPE_UNMAPPED_GPA = 0x80000000, HV_MESSAGE_TYPE_GPA_INTERCEPT = 0x80000001, /* * Timer notification messages */ HV_MESSAGE_TIMER_EXPIRED = 0x80000010, /* * Error messages */ HV_MESSAGE_TYPE_INVALID_VP_REGISTER_VALUE = 0x80000020, HV_MESSAGE_TYPE_UNRECOVERABLE_EXCEPTION = 0x80000021, HV_MESSAGE_TYPE_UNSUPPORTED_FEATURE = 0x80000022, /* * Trace buffer complete messages */ HV_MESSAGE_TYPE_EVENT_LOG_BUFFER_COMPLETE = 0x80000040, /* * Platform-specific processor intercept messages */ HV_MESSAGE_TYPE_X64_IO_PORT_INTERCEPT = 0x80010000, HV_MESSAGE_TYPE_X64_MSR_INTERCEPT = 0x80010001, HV_MESSAGE_TYPE_X64_CPU_INTERCEPT = 0x80010002, HV_MESSAGE_TYPE_X64_EXCEPTION_INTERCEPT = 0x80010003, HV_MESSAGE_TYPE_X64_APIC_EOI = 0x80010004, HV_MESSAGE_TYPE_X64_LEGACY_FP_ERROR = 0x80010005 } hv_vmbus_msg_type; /* * Define port identifier type */ typedef union _hv_vmbus_port_id { uint32_t as_uint32_t; struct { uint32_t id:24; uint32_t reserved:8; } u ; } hv_vmbus_port_id; /* * Define synthetic interrupt controller message flag */ typedef union { uint8_t as_uint8_t; struct { uint8_t message_pending:1; uint8_t reserved:7; } u; } hv_vmbus_msg_flags; typedef uint64_t hv_vmbus_partition_id; /* * Define synthetic interrupt controller message header */ typedef struct { hv_vmbus_msg_type message_type; uint8_t payload_size; hv_vmbus_msg_flags message_flags; uint8_t reserved[2]; union { hv_vmbus_partition_id sender; hv_vmbus_port_id port; } u; } hv_vmbus_msg_header; /* * Define synthetic interrupt controller message format */ typedef struct { hv_vmbus_msg_header header; union { uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; } u ; } hv_vmbus_message; /* * Maximum channels is determined by the size of the interrupt * page which is PAGE_SIZE. 1/2 of PAGE_SIZE is for * send endpoint interrupt and the other is receive * endpoint interrupt. * * Note: (PAGE_SIZE >> 1) << 3 allocates 16348 channels */ #define HV_MAX_NUM_CHANNELS (PAGE_SIZE >> 1) << 3 /* * (The value here must be in multiple of 32) */ #define HV_MAX_NUM_CHANNELS_SUPPORTED 256 /* * VM Bus connection states */ typedef enum { HV_DISCONNECTED, HV_CONNECTING, HV_CONNECTED, HV_DISCONNECTING } hv_vmbus_connect_state; #define HV_MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT typedef struct { hv_vmbus_connect_state connect_state; uint32_t next_gpadl_handle; /** * Represents channel interrupts. Each bit position * represents a channel. * When a channel sends an interrupt via VMBUS, it * finds its bit in the send_interrupt_page, set it and * calls Hv to generate a port event. The other end * receives the port event and parse the * recv_interrupt_page to see which bit is set */ void *interrupt_page; void *send_interrupt_page; void *recv_interrupt_page; /* * 2 pages - 1st page for parent->child * notification and 2nd is child->parent * notification */ void *monitor_pages; TAILQ_HEAD(, hv_vmbus_channel_msg_info) channel_msg_anchor; struct mtx channel_msg_lock; /** * List of primary channels. Sub channels will be linked * under their primary channel. */ TAILQ_HEAD(, hv_vmbus_channel) channel_anchor; struct mtx channel_lock; + /** + * channel table for fast lookup through id. + */ + hv_vmbus_channel **channels; hv_vmbus_handle work_queue; struct sema control_sema; } hv_vmbus_connection; typedef union { uint64_t as_uint64_t; struct { uint64_t build_number : 16; uint64_t service_version : 8; /* Service Pack, etc. */ uint64_t minor_version : 8; uint64_t major_version : 8; /* * HV_GUEST_OS_MICROSOFT_IDS (If Vendor=MS) * HV_GUEST_OS_VENDOR */ uint64_t os_id : 8; uint64_t vendor_id : 16; } u; } hv_vmbus_x64_msr_guest_os_id_contents; typedef union { uint64_t as_uint64_t; struct { uint64_t enable :1; uint64_t reserved :11; uint64_t guest_physical_address :52; } u; } hv_vmbus_x64_msr_hypercall_contents; typedef union { uint32_t as_uint32_t; struct { uint32_t group_enable :4; uint32_t rsvd_z :28; } u; } hv_vmbus_monitor_trigger_state; typedef union { uint64_t as_uint64_t; struct { uint32_t pending; uint32_t armed; } u; } hv_vmbus_monitor_trigger_group; typedef struct { hv_vmbus_connection_id connection_id; uint16_t flag_number; uint16_t rsvd_z; } hv_vmbus_monitor_parameter; /* * hv_vmbus_monitor_page Layout * ------------------------------------------------------ * | 0 | trigger_state (4 bytes) | Rsvd1 (4 bytes) | * | 8 | trigger_group[0] | * | 10 | trigger_group[1] | * | 18 | trigger_group[2] | * | 20 | trigger_group[3] | * | 28 | Rsvd2[0] | * | 30 | Rsvd2[1] | * | 38 | Rsvd2[2] | * | 40 | next_check_time[0][0] | next_check_time[0][1] | * | ... | * | 240 | latency[0][0..3] | * | 340 | Rsvz3[0] | * | 440 | parameter[0][0] | * | 448 | parameter[0][1] | * | ... | * | 840 | Rsvd4[0] | * ------------------------------------------------------ */ typedef struct { hv_vmbus_monitor_trigger_state trigger_state; uint32_t rsvd_z1; hv_vmbus_monitor_trigger_group trigger_group[4]; uint64_t rsvd_z2[3]; int32_t next_check_time[4][32]; uint16_t latency[4][32]; uint64_t rsvd_z3[32]; hv_vmbus_monitor_parameter parameter[4][32]; uint8_t rsvd_z4[1984]; } hv_vmbus_monitor_page; /* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent * is set by CPUID(HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES). */ typedef enum { HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES = 0x00000001, HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION = 0x40000000, HV_CPU_ID_FUNCTION_HV_INTERFACE = 0x40000001, /* * The remaining functions depend on the value * of hv_cpu_id_function_interface */ HV_CPU_ID_FUNCTION_MS_HV_VERSION = 0x40000002, HV_CPU_ID_FUNCTION_MS_HV_FEATURES = 0x40000003, HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION = 0x40000004, HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS = 0x40000005 } hv_vmbus_cpuid_function; /* * Define the format of the SIMP register */ typedef union { uint64_t as_uint64_t; struct { uint64_t simp_enabled : 1; uint64_t preserved : 11; uint64_t base_simp_gpa : 52; } u; } hv_vmbus_synic_simp; /* * Define the format of the SIEFP register */ typedef union { uint64_t as_uint64_t; struct { uint64_t siefp_enabled : 1; uint64_t preserved : 11; uint64_t base_siefp_gpa : 52; } u; } hv_vmbus_synic_siefp; /* * Define synthetic interrupt source */ typedef union { uint64_t as_uint64_t; struct { uint64_t vector : 8; uint64_t reserved1 : 8; uint64_t masked : 1; uint64_t auto_eoi : 1; uint64_t reserved2 : 46; } u; } hv_vmbus_synic_sint; /* * Timer configuration register. */ union hv_timer_config { uint64_t as_uint64; struct { uint64_t enable:1; uint64_t periodic:1; uint64_t lazy:1; uint64_t auto_enable:1; uint64_t reserved_z0:12; uint64_t sintx:4; uint64_t reserved_z1:44; }; }; /* * Define syn_ic control register */ typedef union _hv_vmbus_synic_scontrol { uint64_t as_uint64_t; struct { uint64_t enable : 1; uint64_t reserved : 63; } u; } hv_vmbus_synic_scontrol; /* * Define the hv_vmbus_post_message hypercall input structure */ typedef struct { hv_vmbus_connection_id connection_id; uint32_t reserved; hv_vmbus_msg_type message_type; uint32_t payload_size; uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; } hv_vmbus_input_post_message; /* * Define the synthetic interrupt controller event flags format */ typedef union { uint8_t flags8[HV_EVENT_FLAGS_BYTE_COUNT]; uint32_t flags32[HV_EVENT_FLAGS_DWORD_COUNT]; } hv_vmbus_synic_event_flags; #define HV_X64_CPUID_MIN (0x40000005) #define HV_X64_CPUID_MAX (0x4000ffff) /* * Declare the MSR used to identify the guest OS */ #define HV_X64_MSR_GUEST_OS_ID (0x40000000) /* * Declare the MSR used to setup pages used to communicate with the hypervisor */ #define HV_X64_MSR_HYPERCALL (0x40000001) /* MSR used to provide vcpu index */ #define HV_X64_MSR_VP_INDEX (0x40000002) #define HV_X64_MSR_TIME_REF_COUNT (0x40000020) /* * Define synthetic interrupt controller model specific registers */ #define HV_X64_MSR_SCONTROL (0x40000080) #define HV_X64_MSR_SVERSION (0x40000081) #define HV_X64_MSR_SIEFP (0x40000082) #define HV_X64_MSR_SIMP (0x40000083) #define HV_X64_MSR_EOM (0x40000084) #define HV_X64_MSR_SINT0 (0x40000090) #define HV_X64_MSR_SINT1 (0x40000091) #define HV_X64_MSR_SINT2 (0x40000092) #define HV_X64_MSR_SINT3 (0x40000093) #define HV_X64_MSR_SINT4 (0x40000094) #define HV_X64_MSR_SINT5 (0x40000095) #define HV_X64_MSR_SINT6 (0x40000096) #define HV_X64_MSR_SINT7 (0x40000097) #define HV_X64_MSR_SINT8 (0x40000098) #define HV_X64_MSR_SINT9 (0x40000099) #define HV_X64_MSR_SINT10 (0x4000009A) #define HV_X64_MSR_SINT11 (0x4000009B) #define HV_X64_MSR_SINT12 (0x4000009C) #define HV_X64_MSR_SINT13 (0x4000009D) #define HV_X64_MSR_SINT14 (0x4000009E) #define HV_X64_MSR_SINT15 (0x4000009F) /* * Synthetic Timer MSRs. Four timers per vcpu. */ #define HV_X64_MSR_STIMER0_CONFIG 0x400000B0 #define HV_X64_MSR_STIMER0_COUNT 0x400000B1 #define HV_X64_MSR_STIMER1_CONFIG 0x400000B2 #define HV_X64_MSR_STIMER1_COUNT 0x400000B3 #define HV_X64_MSR_STIMER2_CONFIG 0x400000B4 #define HV_X64_MSR_STIMER2_COUNT 0x400000B5 #define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 #define HV_X64_MSR_STIMER3_COUNT 0x400000B7 /* * Declare the various hypercall operations */ typedef enum { HV_CALL_POST_MESSAGE = 0x005c, HV_CALL_SIGNAL_EVENT = 0x005d, } hv_vmbus_call_code; /** * Global variables */ extern hv_vmbus_context hv_vmbus_g_context; extern hv_vmbus_connection hv_vmbus_g_connection; typedef void (*vmbus_msg_handler)(hv_vmbus_channel_msg_header *msg); typedef struct hv_vmbus_channel_msg_table_entry { hv_vmbus_channel_msg_type messageType; bool handler_no_sleep; /* true: the handler doesn't sleep */ vmbus_msg_handler messageHandler; } hv_vmbus_channel_msg_table_entry; extern hv_vmbus_channel_msg_table_entry g_channel_message_table[]; /* * Private, VM Bus functions */ int hv_vmbus_ring_buffer_init( hv_vmbus_ring_buffer_info *ring_info, void *buffer, uint32_t buffer_len); void hv_ring_buffer_cleanup( hv_vmbus_ring_buffer_info *ring_info); int hv_ring_buffer_write( hv_vmbus_ring_buffer_info *ring_info, hv_vmbus_sg_buffer_list sg_buffers[], uint32_t sg_buff_count, boolean_t *need_sig); int hv_ring_buffer_peek( hv_vmbus_ring_buffer_info *ring_info, void *buffer, uint32_t buffer_len); int hv_ring_buffer_read( hv_vmbus_ring_buffer_info *ring_info, void *buffer, uint32_t buffer_len, uint32_t offset); uint32_t hv_vmbus_get_ring_buffer_interrupt_mask( hv_vmbus_ring_buffer_info *ring_info); void hv_vmbus_dump_ring_info( hv_vmbus_ring_buffer_info *ring_info, char *prefix); void hv_ring_buffer_read_begin( hv_vmbus_ring_buffer_info *ring_info); uint32_t hv_ring_buffer_read_end( hv_vmbus_ring_buffer_info *ring_info); hv_vmbus_channel* hv_vmbus_allocate_channel(void); void hv_vmbus_free_vmbus_channel(hv_vmbus_channel *channel); void hv_vmbus_on_channel_message(void *context); int hv_vmbus_request_channel_offers(void); void hv_vmbus_release_unattached_channels(void); int hv_vmbus_init(void); void hv_vmbus_cleanup(void); uint16_t hv_vmbus_post_msg_via_msg_ipc( hv_vmbus_connection_id connection_id, hv_vmbus_msg_type message_type, void *payload, size_t payload_size); uint16_t hv_vmbus_signal_event(void *con_id); void hv_vmbus_synic_init(void *irq_arg); void hv_vmbus_synic_cleanup(void *arg); int hv_vmbus_query_hypervisor_presence(void); struct hv_device* hv_vmbus_child_device_create( hv_guid device_type, hv_guid device_instance, hv_vmbus_channel *channel); int hv_vmbus_child_device_register( struct hv_device *child_dev); int hv_vmbus_child_device_unregister( struct hv_device *child_dev); -hv_vmbus_channel* hv_vmbus_get_channel_from_rel_id(uint32_t rel_id); /** * Connection interfaces */ int hv_vmbus_connect(void); int hv_vmbus_disconnect(void); int hv_vmbus_post_message(void *buffer, size_t buf_size); int hv_vmbus_set_event(hv_vmbus_channel *channel); void hv_vmbus_on_events(void *); /** * Event Timer interfaces */ void hv_et_init(void); void hv_et_intr(struct trapframe*); /* * The guest OS needs to register the guest ID with the hypervisor. * The guest ID is a 64 bit entity and the structure of this ID is * specified in the Hyper-V specification: * * http://msdn.microsoft.com/en-us/library/windows/ * hardware/ff542653%28v=vs.85%29.aspx * * While the current guideline does not specify how FreeBSD guest ID(s) * need to be generated, our plan is to publish the guidelines for * FreeBSD and other guest operating systems that currently are hosted * on Hyper-V. The implementation here conforms to this yet * unpublished guidelines. * * Bit(s) * 63 - Indicates if the OS is Open Source or not; 1 is Open Source * 62:56 - Os Type; Linux is 0x100, FreeBSD is 0x200 * 55:48 - Distro specific identification * 47:16 - FreeBSD kernel version number * 15:0 - Distro specific identification * */ #define HV_FREEBSD_VENDOR_ID 0x8200 #define HV_FREEBSD_GUEST_ID hv_generate_guest_id(0,0) static inline uint64_t hv_generate_guest_id( uint8_t distro_id_part1, uint16_t distro_id_part2) { uint64_t guest_id; guest_id = (((uint64_t)HV_FREEBSD_VENDOR_ID) << 48); guest_id |= (((uint64_t)(distro_id_part1)) << 48); guest_id |= (((uint64_t)(__FreeBSD_version)) << 16); /* in param.h */ guest_id |= ((uint64_t)(distro_id_part2)); return guest_id; } typedef struct { unsigned int vector; void *page_buffers[2 * MAXCPU]; } hv_setup_args; #endif /* __HYPERV_PRIV_H__ */