Index: head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c (revision 292860) +++ head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c (revision 292861) @@ -1,835 +1,839 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "hv_vmbus_priv.h" -typedef void (*hv_pfn_channel_msg_handler)(hv_vmbus_channel_msg_header* msg); - -typedef struct hv_vmbus_channel_msg_table_entry { - hv_vmbus_channel_msg_type messageType; - hv_pfn_channel_msg_handler messageHandler; -} hv_vmbus_channel_msg_table_entry; - /* * Internal functions */ static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr); /** * Channel message dispatch table */ hv_vmbus_channel_msg_table_entry g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = { - { HV_CHANNEL_MESSAGE_INVALID, NULL }, - { HV_CHANNEL_MESSAGE_OFFER_CHANNEL, vmbus_channel_on_offer }, + { HV_CHANNEL_MESSAGE_INVALID, + 0, NULL }, + { HV_CHANNEL_MESSAGE_OFFER_CHANNEL, + 0, vmbus_channel_on_offer }, { HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER, - vmbus_channel_on_offer_rescind }, - { HV_CHANNEL_MESSAGE_REQUEST_OFFERS, NULL }, + 0, vmbus_channel_on_offer_rescind }, + { HV_CHANNEL_MESSAGE_REQUEST_OFFERS, + 0, NULL }, { HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED, - vmbus_channel_on_offers_delivered }, - { HV_CHANNEL_MESSAGE_OPEN_CHANNEL, NULL }, + 1, vmbus_channel_on_offers_delivered }, + { HV_CHANNEL_MESSAGE_OPEN_CHANNEL, + 0, NULL }, { HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT, - vmbus_channel_on_open_result }, - { HV_CHANNEL_MESSAGE_CLOSE_CHANNEL, NULL }, - { HV_CHANNEL_MESSAGEL_GPADL_HEADER, NULL }, - { HV_CHANNEL_MESSAGE_GPADL_BODY, NULL }, + 1, vmbus_channel_on_open_result }, + { HV_CHANNEL_MESSAGE_CLOSE_CHANNEL, + 0, NULL }, + { HV_CHANNEL_MESSAGEL_GPADL_HEADER, + 0, NULL }, + { HV_CHANNEL_MESSAGE_GPADL_BODY, + 0, NULL }, { HV_CHANNEL_MESSAGE_GPADL_CREATED, - vmbus_channel_on_gpadl_created }, - { HV_CHANNEL_MESSAGE_GPADL_TEARDOWN, NULL }, + 1, vmbus_channel_on_gpadl_created }, + { HV_CHANNEL_MESSAGE_GPADL_TEARDOWN, + 0, NULL }, { HV_CHANNEL_MESSAGE_GPADL_TORNDOWN, - vmbus_channel_on_gpadl_torndown }, - { HV_CHANNEL_MESSAGE_REL_ID_RELEASED, NULL }, - { HV_CHANNEL_MESSAGE_INITIATED_CONTACT, NULL }, + 1, vmbus_channel_on_gpadl_torndown }, + { HV_CHANNEL_MESSAGE_REL_ID_RELEASED, + 0, NULL }, + { HV_CHANNEL_MESSAGE_INITIATED_CONTACT, + 0, NULL }, { HV_CHANNEL_MESSAGE_VERSION_RESPONSE, - vmbus_channel_on_version_response }, - { HV_CHANNEL_MESSAGE_UNLOAD, NULL } + 1, vmbus_channel_on_version_response }, + { HV_CHANNEL_MESSAGE_UNLOAD, + 0, NULL } }; /** * Implementation of the work abstraction. */ static void work_item_callback(void *work, int pending) { struct hv_work_item *w = (struct hv_work_item *)work; /* * Serialize work execution. */ if (w->wq->work_sema != NULL) { sema_wait(w->wq->work_sema); } w->callback(w->context); if (w->wq->work_sema != NULL) { sema_post(w->wq->work_sema); } free(w, M_DEVBUF); } struct hv_work_queue* hv_work_queue_create(char* name) { static unsigned int qid = 0; char qname[64]; int pri; struct hv_work_queue* wq; wq = malloc(sizeof(struct hv_work_queue), M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(wq != NULL, ("Error VMBUS: Failed to allocate work_queue\n")); if (wq == NULL) return (NULL); /* * We use work abstraction to handle messages * coming from the host and these are typically offers. * Some FreeBsd drivers appear to have a concurrency issue * where probe/attach needs to be serialized. We ensure that * by having only one thread process work elements in a * specific queue by serializing work execution. * */ if (strcmp(name, "vmbusQ") == 0) { pri = PI_DISK; } else { /* control */ pri = PI_NET; /* * Initialize semaphore for this queue by pointing * to the globale semaphore used for synchronizing all * control messages. */ wq->work_sema = &hv_vmbus_g_connection.control_sema; } sprintf(qname, "hv_%s_%u", name, qid); /* * Fixme: FreeBSD 8.2 has a different prototype for * taskqueue_create(), and for certain other taskqueue functions. * We need to research the implications of these changes. * Fixme: Not sure when the changes were introduced. */ wq->queue = taskqueue_create(qname, M_NOWAIT, taskqueue_thread_enqueue, &wq->queue #if __FreeBSD_version < 800000 , &wq->proc #endif ); if (wq->queue == NULL) { free(wq, M_DEVBUF); return (NULL); } if (taskqueue_start_threads(&wq->queue, 1, pri, "%s taskq", qname)) { taskqueue_free(wq->queue); free(wq, M_DEVBUF); return (NULL); } qid++; return (wq); } void hv_work_queue_close(struct hv_work_queue *wq) { /* * KYS: Need to drain the taskqueue * before we close the hv_work_queue. */ /*KYS: taskqueue_drain(wq->tq, ); */ taskqueue_free(wq->queue); free(wq, M_DEVBUF); } /** * @brief Create work item */ int hv_queue_work_item( struct hv_work_queue *wq, void (*callback)(void *), void *context) { struct hv_work_item *w = malloc(sizeof(struct hv_work_item), M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(w != NULL, ("Error VMBUS: Failed to allocate WorkItem\n")); if (w == NULL) return (ENOMEM); w->callback = callback; w->context = context; w->wq = wq; TASK_INIT(&w->work, 0, work_item_callback, w); return (taskqueue_enqueue(wq->queue, &w->work)); } /** * @brief Allocate and initialize a vmbus channel object */ hv_vmbus_channel* hv_vmbus_allocate_channel(void) { hv_vmbus_channel* channel; channel = (hv_vmbus_channel*) malloc( sizeof(hv_vmbus_channel), M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(channel != NULL, ("Error VMBUS: Failed to allocate channel!")); if (channel == NULL) return (NULL); mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF); mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF); TAILQ_INIT(&channel->sc_list_anchor); return (channel); } /** * @brief Release the vmbus channel object itself */ static inline void ReleaseVmbusChannel(void *context) { hv_vmbus_channel* channel = (hv_vmbus_channel*) context; free(channel, M_DEVBUF); } /** * @brief Release the resources used by the vmbus channel object */ void hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel) { mtx_destroy(&channel->sc_lock); mtx_destroy(&channel->inbound_lock); /* * We have to release the channel's workqueue/thread in * the vmbus's workqueue/thread context * ie we can't destroy ourselves */ hv_queue_work_item(hv_vmbus_g_connection.work_queue, ReleaseVmbusChannel, (void *) channel); } /** * @brief Process the offer by creating a channel/device * associated with this offer */ static void vmbus_channel_process_offer(hv_vmbus_channel *new_channel) { boolean_t f_new; hv_vmbus_channel* channel; int ret; f_new = TRUE; channel = NULL; /* * Make sure this is a new offer */ mtx_lock(&hv_vmbus_g_connection.channel_lock); TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor, list_entry) { if (memcmp(&channel->offer_msg.offer.interface_type, &new_channel->offer_msg.offer.interface_type, sizeof(hv_guid)) == 0 && memcmp(&channel->offer_msg.offer.interface_instance, &new_channel->offer_msg.offer.interface_instance, sizeof(hv_guid)) == 0) { f_new = FALSE; break; } } if (f_new) { /* Insert at tail */ TAILQ_INSERT_TAIL( &hv_vmbus_g_connection.channel_anchor, new_channel, list_entry); } mtx_unlock(&hv_vmbus_g_connection.channel_lock); /*XXX add new channel to percpu_list */ if (!f_new) { /* * Check if this is a sub channel. */ if (new_channel->offer_msg.offer.sub_channel_index != 0) { /* * It is a sub channel offer, process it. */ new_channel->primary_channel = channel; mtx_lock(&channel->sc_lock); TAILQ_INSERT_TAIL( &channel->sc_list_anchor, new_channel, sc_list_entry); mtx_unlock(&channel->sc_lock); /* Insert new channel into channel_anchor. */ printf("Storvsc get multi-channel offer, rel=%u.\n", new_channel->offer_msg.child_rel_id); mtx_lock(&hv_vmbus_g_connection.channel_lock); TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor, new_channel, list_entry); mtx_unlock(&hv_vmbus_g_connection.channel_lock); if(bootverbose) printf("VMBUS: new multi-channel offer <%p>.\n", new_channel); /*XXX add it to percpu_list */ new_channel->state = HV_CHANNEL_OPEN_STATE; if (channel->sc_creation_callback != NULL) { channel->sc_creation_callback(new_channel); } return; } hv_vmbus_free_vmbus_channel(new_channel); return; } new_channel->state = HV_CHANNEL_OPEN_STATE; /* * Start the process of binding this offer to the driver * (We need to set the device field before calling * hv_vmbus_child_device_add()) */ new_channel->device = hv_vmbus_child_device_create( new_channel->offer_msg.offer.interface_type, new_channel->offer_msg.offer.interface_instance, new_channel); /* * Add the new device to the bus. This will kick off device-driver * binding which eventually invokes the device driver's AddDevice() * method. */ ret = hv_vmbus_child_device_register(new_channel->device); if (ret != 0) { mtx_lock(&hv_vmbus_g_connection.channel_lock); TAILQ_REMOVE( &hv_vmbus_g_connection.channel_anchor, new_channel, list_entry); mtx_unlock(&hv_vmbus_g_connection.channel_lock); hv_vmbus_free_vmbus_channel(new_channel); } } /** * Array of device guids that are performance critical. We try to distribute * the interrupt load for these devices across all online cpus. */ static const hv_guid high_perf_devices[] = { {HV_NIC_GUID, }, {HV_IDE_GUID, }, {HV_SCSI_GUID, }, }; enum { PERF_CHN_NIC = 0, PERF_CHN_IDE, PERF_CHN_SCSI, MAX_PERF_CHN, }; /* * We use this static number to distribute the channel interrupt load. */ static uint32_t next_vcpu; /** * Starting with Win8, we can statically distribute the incoming * channel interrupt load by binding a channel to VCPU. We * implement here a simple round robin scheme for distributing * the interrupt load. * We will bind channels that are not performance critical to cpu 0 and * performance critical channels (IDE, SCSI and Network) will be uniformly * distributed across all available CPUs. */ static void vmbus_channel_select_cpu(hv_vmbus_channel *channel, hv_guid *guid) { uint32_t current_cpu; int i; boolean_t is_perf_channel = FALSE; for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) { if (memcmp(guid->data, high_perf_devices[i].data, sizeof(hv_guid)) == 0) { is_perf_channel = TRUE; break; } } if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) || (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) || (!is_perf_channel)) { /* Host's view of guest cpu */ channel->target_vcpu = 0; /* Guest's own view of cpu */ channel->target_cpu = 0; return; } /* mp_ncpus should have the number cpus currently online */ current_cpu = (++next_vcpu % mp_ncpus); channel->target_cpu = current_cpu; channel->target_vcpu = hv_vmbus_g_context.hv_vcpu_index[current_cpu]; if (bootverbose) printf("VMBUS: Total online cpus %d, assign perf channel %d " "to vcpu %d, cpu %d\n", mp_ncpus, i, channel->target_vcpu, current_cpu); } /** * @brief Handler for channel offers from Hyper-V/Azure * * Handler for channel offers from vmbus in parent partition. We ignore * all offers except network and storage offers. For each network and storage * offers, we create a channel object and queue a work item to the channel * object to process the offer synchronously */ static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_offer_channel* offer; hv_vmbus_channel* new_channel; offer = (hv_vmbus_channel_offer_channel*) hdr; hv_guid *guidType; hv_guid *guidInstance; guidType = &offer->offer.interface_type; guidInstance = &offer->offer.interface_instance; /* Allocate the channel object and save this offer */ new_channel = hv_vmbus_allocate_channel(); if (new_channel == NULL) return; /* * By default we setup state to enable batched * reading. A specific service can choose to * disable this prior to opening the channel. */ new_channel->batched_reading = TRUE; new_channel->signal_event_param = (hv_vmbus_input_signal_event *) (HV_ALIGN_UP((unsigned long) &new_channel->signal_event_buffer, HV_HYPERCALL_PARAM_ALIGN)); new_channel->signal_event_param->connection_id.as_uint32_t = 0; new_channel->signal_event_param->connection_id.u.id = HV_VMBUS_EVENT_CONNECTION_ID; new_channel->signal_event_param->flag_number = 0; new_channel->signal_event_param->rsvd_z = 0; if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) { new_channel->is_dedicated_interrupt = (offer->is_dedicated_interrupt != 0); new_channel->signal_event_param->connection_id.u.id = offer->connection_id; } /* * Bind the channel to a chosen cpu. */ vmbus_channel_select_cpu(new_channel, &offer->offer.interface_type); memcpy(&new_channel->offer_msg, offer, sizeof(hv_vmbus_channel_offer_channel)); new_channel->monitor_group = (uint8_t) offer->monitor_id / 32; new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32; vmbus_channel_process_offer(new_channel); } /** * @brief Rescind offer handler. * * We queue a work item to process this offer * synchronously */ static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_rescind_offer* rescind; hv_vmbus_channel* channel; rescind = (hv_vmbus_channel_rescind_offer*) hdr; channel = hv_vmbus_get_channel_from_rel_id(rescind->child_rel_id); if (channel == NULL) return; hv_vmbus_child_device_unregister(channel->device); } /** * * @brief Invoked when all offers have been delivered. */ static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr) { } /** * @brief Open result handler. * * This is invoked when we received a response * to our channel open request. Find the matching request, copy the * response and signal the requesting thread. */ static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_open_result* result; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* requestHeader; hv_vmbus_channel_open_channel* openMsg; result = (hv_vmbus_channel_open_result*) hdr; /* * Find the open msg, copy the result and signal/unblock the wait event */ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, msg_list_entry) { requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg; if (requestHeader->message_type == HV_CHANNEL_MESSAGE_OPEN_CHANNEL) { openMsg = (hv_vmbus_channel_open_channel*) msg_info->msg; if (openMsg->child_rel_id == result->child_rel_id && openMsg->open_id == result->open_id) { memcpy(&msg_info->response.open_result, result, sizeof(hv_vmbus_channel_open_result)); sema_post(&msg_info->wait_sema); break; } } } mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); } /** * @brief GPADL created handler. * * This is invoked when we received a response * to our gpadl create request. Find the matching request, copy the * response and signal the requesting thread. */ static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_gpadl_created* gpadl_created; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* request_header; hv_vmbus_channel_gpadl_header* gpadl_header; gpadl_created = (hv_vmbus_channel_gpadl_created*) hdr; /* Find the establish msg, copy the result and signal/unblock * the wait event */ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, msg_list_entry) { request_header = (hv_vmbus_channel_msg_header*) msg_info->msg; if (request_header->message_type == HV_CHANNEL_MESSAGEL_GPADL_HEADER) { gpadl_header = (hv_vmbus_channel_gpadl_header*) request_header; if ((gpadl_created->child_rel_id == gpadl_header->child_rel_id) && (gpadl_created->gpadl == gpadl_header->gpadl)) { memcpy(&msg_info->response.gpadl_created, gpadl_created, sizeof(hv_vmbus_channel_gpadl_created)); sema_post(&msg_info->wait_sema); break; } } } mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); } /** * @brief GPADL torndown handler. * * This is invoked when we received a respons * to our gpadl teardown request. Find the matching request, copy the * response and signal the requesting thread */ static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_gpadl_torndown* gpadl_torndown; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* requestHeader; hv_vmbus_channel_gpadl_teardown* gpadlTeardown; gpadl_torndown = (hv_vmbus_channel_gpadl_torndown*)hdr; /* * Find the open msg, copy the result and signal/unblock the * wait event. */ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, msg_list_entry) { requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg; if (requestHeader->message_type == HV_CHANNEL_MESSAGE_GPADL_TEARDOWN) { gpadlTeardown = (hv_vmbus_channel_gpadl_teardown*) requestHeader; if (gpadl_torndown->gpadl == gpadlTeardown->gpadl) { memcpy(&msg_info->response.gpadl_torndown, gpadl_torndown, sizeof(hv_vmbus_channel_gpadl_torndown)); sema_post(&msg_info->wait_sema); break; } } } mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); } /** * @brief Version response handler. * * This is invoked when we received a response * to our initiate contact request. Find the matching request, copy th * response and signal the requesting thread. */ static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* requestHeader; hv_vmbus_channel_initiate_contact* initiate; hv_vmbus_channel_version_response* versionResponse; versionResponse = (hv_vmbus_channel_version_response*)hdr; mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, msg_list_entry) { requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg; if (requestHeader->message_type == HV_CHANNEL_MESSAGE_INITIATED_CONTACT) { initiate = (hv_vmbus_channel_initiate_contact*) requestHeader; memcpy(&msg_info->response.version_response, versionResponse, sizeof(hv_vmbus_channel_version_response)); sema_post(&msg_info->wait_sema); } } mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); } /** * @brief Handler for channel protocol messages. * * This is invoked in the vmbus worker thread context. */ void hv_vmbus_on_channel_message(void *context) { hv_vmbus_message* msg; hv_vmbus_channel_msg_header* hdr; int size; msg = (hv_vmbus_message*) context; hdr = (hv_vmbus_channel_msg_header*) msg->u.payload; size = msg->header.payload_size; if (hdr->message_type >= HV_CHANNEL_MESSAGE_COUNT) { free(msg, M_DEVBUF); return; } if (g_channel_message_table[hdr->message_type].messageHandler) { g_channel_message_table[hdr->message_type].messageHandler(hdr); } /* Free the msg that was allocated in VmbusOnMsgDPC() */ free(msg, M_DEVBUF); } /** * @brief Send a request to get all our pending offers. */ int hv_vmbus_request_channel_offers(void) { int ret; hv_vmbus_channel_msg_header* msg; hv_vmbus_channel_msg_info* msg_info; msg_info = (hv_vmbus_channel_msg_info *) malloc(sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_msg_header), M_DEVBUF, M_NOWAIT); if (msg_info == NULL) { if(bootverbose) printf("Error VMBUS: malloc failed for Request Offers\n"); return (ENOMEM); } msg = (hv_vmbus_channel_msg_header*) msg_info->msg; msg->message_type = HV_CHANNEL_MESSAGE_REQUEST_OFFERS; ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_msg_header)); if (msg_info) free(msg_info, M_DEVBUF); return (ret); } /** * @brief Release channels that are unattached/unconnected (i.e., no drivers associated) */ void hv_vmbus_release_unattached_channels(void) { hv_vmbus_channel *channel; mtx_lock(&hv_vmbus_g_connection.channel_lock); while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) { channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor); TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor, channel, list_entry); hv_vmbus_child_device_unregister(channel->device); hv_vmbus_free_vmbus_channel(channel); } mtx_unlock(&hv_vmbus_g_connection.channel_lock); } /** * @brief Select the best outgoing channel * * The channel whose vcpu binding is closest to the currect vcpu will * be selected. * If no multi-channel, always select primary channel * * @param primary - primary channel */ struct hv_vmbus_channel * vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary) { hv_vmbus_channel *new_channel = NULL; hv_vmbus_channel *outgoing_channel = primary; int old_cpu_distance = 0; int new_cpu_distance = 0; int cur_vcpu = 0; int smp_pro_id = PCPU_GET(cpuid); if (TAILQ_EMPTY(&primary->sc_list_anchor)) { return outgoing_channel; } if (smp_pro_id >= MAXCPU) { return outgoing_channel; } cur_vcpu = hv_vmbus_g_context.hv_vcpu_index[smp_pro_id]; TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) { if (new_channel->state != HV_CHANNEL_OPENED_STATE){ continue; } if (new_channel->target_vcpu == cur_vcpu){ return new_channel; } old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ? (outgoing_channel->target_vcpu - cur_vcpu) : (cur_vcpu - outgoing_channel->target_vcpu)); new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ? (new_channel->target_vcpu - cur_vcpu) : (cur_vcpu - new_channel->target_vcpu)); if (old_cpu_distance < new_cpu_distance) { continue; } outgoing_channel = new_channel; } return(outgoing_channel); } Index: head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c (revision 292860) +++ head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c (revision 292861) @@ -1,732 +1,753 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * VM Bus Driver Implementation */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hv_vmbus_priv.h" #define VMBUS_IRQ 0x5 static device_t vmbus_devp; static int vmbus_inited; static hv_setup_args setup_args; /* only CPU 0 supported at this time */ /** * @brief Software interrupt thread routine to handle channel messages from * the hypervisor. */ static void vmbus_msg_swintr(void *arg) { int cpu; void* page_addr; + hv_vmbus_channel_msg_header *hdr; + hv_vmbus_channel_msg_table_entry *entry; + hv_vmbus_channel_msg_type msg_type; hv_vmbus_message* msg; hv_vmbus_message* copied; + static bool warned = false; cpu = (int)(long)arg; KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: " "cpu out of range!")); page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; for (;;) { - if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) { + if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) break; /* no message */ - } else { + + hdr = (hv_vmbus_channel_msg_header *)msg->u.payload; + msg_type = hdr->message_type; + + if (msg_type >= HV_CHANNEL_MESSAGE_COUNT && !warned) { + warned = true; + printf("VMBUS: unknown message type = %d\n", msg_type); + goto handled; + } + + entry = &g_channel_message_table[msg_type]; + + if (entry->handler_no_sleep) + entry->messageHandler(hdr); + else { + copied = malloc(sizeof(hv_vmbus_message), M_DEVBUF, M_NOWAIT); KASSERT(copied != NULL, ("Error VMBUS: malloc failed to allocate" " hv_vmbus_message!")); if (copied == NULL) continue; + memcpy(copied, msg, sizeof(hv_vmbus_message)); hv_queue_work_item(hv_vmbus_g_connection.work_queue, - hv_vmbus_on_channel_message, copied); - } - + hv_vmbus_on_channel_message, + copied); + } +handled: msg->header.message_type = HV_MESSAGE_TYPE_NONE; /* * Make sure the write to message_type (ie set to * HV_MESSAGE_TYPE_NONE) happens before we read the * message_pending and EOMing. Otherwise, the EOMing will * not deliver any more messages * since there is no empty slot */ wmb(); if (msg->header.message_flags.u.message_pending) { /* * This will cause message queue rescan to possibly * deliver another msg from the hypervisor */ wrmsr(HV_X64_MSR_EOM, 0); } } } /** * @brief Interrupt filter routine for VMBUS. * * The purpose of this routine is to determine the type of VMBUS protocol * message to process - an event or a channel message. */ static inline int hv_vmbus_isr(void *unused) { int cpu; hv_vmbus_message* msg; hv_vmbus_synic_event_flags* event; void* page_addr; cpu = PCPU_GET(cpuid); /* * The Windows team has advised that we check for events * before checking for messages. This is the way they do it * in Windows when running as a guest in Hyper-V */ page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu]; event = (hv_vmbus_synic_event_flags*) page_addr + HV_VMBUS_MESSAGE_SINT; if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) || (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) { /* Since we are a child, we only need to check bit 0 */ if (synch_test_and_clear_bit(0, &event->flags32[0])) { swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0); } } else { /* * On host with Win8 or above, we can directly look at * the event page. If bit n is set, we have an interrupt * on the channel with id n. * Directly schedule the event software interrupt on * current cpu. */ swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0); } /* Check if there are actual msgs to be process */ page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) { swi_sched(hv_vmbus_g_context.msg_swintr[cpu], 0); } return FILTER_HANDLED; } #ifdef HV_DEBUG_INTR uint32_t hv_intr_count = 0; #endif uint32_t hv_vmbus_swintr_event_cpu[MAXCPU]; uint32_t hv_vmbus_intr_cpu[MAXCPU]; void hv_vector_handler(struct trapframe *trap_frame) { #ifdef HV_DEBUG_INTR int cpu; #endif /* * Disable preemption. */ critical_enter(); #ifdef HV_DEBUG_INTR /* * Do a little interrupt counting. */ cpu = PCPU_GET(cpuid); hv_vmbus_intr_cpu[cpu]++; hv_intr_count++; #endif hv_vmbus_isr(NULL); /* * Enable preemption. */ critical_exit(); } static int vmbus_read_ivar( device_t dev, device_t child, int index, uintptr_t* result) { struct hv_device *child_dev_ctx = device_get_ivars(child); switch (index) { case HV_VMBUS_IVAR_TYPE: *result = (uintptr_t) &child_dev_ctx->class_id; return (0); case HV_VMBUS_IVAR_INSTANCE: *result = (uintptr_t) &child_dev_ctx->device_id; return (0); case HV_VMBUS_IVAR_DEVCTX: *result = (uintptr_t) child_dev_ctx; return (0); case HV_VMBUS_IVAR_NODE: *result = (uintptr_t) child_dev_ctx->device; return (0); } return (ENOENT); } static int vmbus_write_ivar( device_t dev, device_t child, int index, uintptr_t value) { switch (index) { case HV_VMBUS_IVAR_TYPE: case HV_VMBUS_IVAR_INSTANCE: case HV_VMBUS_IVAR_DEVCTX: case HV_VMBUS_IVAR_NODE: /* read-only */ return (EINVAL); } return (ENOENT); } struct hv_device* hv_vmbus_child_device_create( hv_guid type, hv_guid instance, hv_vmbus_channel* channel) { hv_device* child_dev; /* * Allocate the new child device */ child_dev = malloc(sizeof(hv_device), M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT(child_dev != NULL, ("Error VMBUS: malloc failed to allocate hv_device!")); if (child_dev == NULL) return (NULL); child_dev->channel = channel; memcpy(&child_dev->class_id, &type, sizeof(hv_guid)); memcpy(&child_dev->device_id, &instance, sizeof(hv_guid)); return (child_dev); } static void print_dev_guid(struct hv_device *dev) { int i; unsigned char guid_name[100]; for (i = 0; i < 32; i += 2) sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]); if(bootverbose) printf("VMBUS: Class ID: %s\n", guid_name); } int hv_vmbus_child_device_register(struct hv_device *child_dev) { device_t child; int ret = 0; print_dev_guid(child_dev); child = device_add_child(vmbus_devp, NULL, -1); child_dev->device = child; device_set_ivars(child, child_dev); mtx_lock(&Giant); ret = device_probe_and_attach(child); mtx_unlock(&Giant); return (0); } int hv_vmbus_child_device_unregister(struct hv_device *child_dev) { int ret = 0; /* * XXXKYS: Ensure that this is the opposite of * device_add_child() */ mtx_lock(&Giant); ret = device_delete_child(vmbus_devp, child_dev->device); mtx_unlock(&Giant); return(ret); } static void vmbus_identify(driver_t *driver, device_t parent) { if (!hv_vmbus_query_hypervisor_presence()) return; vm_guest = VM_GUEST_HV; BUS_ADD_CHILD(parent, 0, "vmbus", 0); } static int vmbus_probe(device_t dev) { if(bootverbose) device_printf(dev, "VMBUS: probe\n"); device_set_desc(dev, "Vmbus Devices"); return (BUS_PROBE_NOWILDCARD); } #ifdef HYPERV extern inthand_t IDTVEC(rsvd), IDTVEC(hv_vmbus_callback); /** * @brief Find a free IDT slot and setup the interrupt handler. */ static int vmbus_vector_alloc(void) { int vector; uintptr_t func; struct gate_descriptor *ip; /* * Search backwards form the highest IDT vector available for use * as vmbus channel callback vector. We install 'hv_vmbus_callback' * handler at that vector and use it to interrupt vcpus. */ vector = APIC_SPURIOUS_INT; while (--vector >= APIC_IPI_INTS) { ip = &idt[vector]; func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); if (func == (uintptr_t)&IDTVEC(rsvd)) { #ifdef __i386__ setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #else setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYSIGT, SEL_KPL, 0); #endif return (vector); } } return (0); } /** * @brief Restore the IDT slot to rsvd. */ static void vmbus_vector_free(int vector) { uintptr_t func; struct gate_descriptor *ip; if (vector == 0) return; KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT, ("invalid vector %d", vector)); ip = &idt[vector]; func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); KASSERT(func == (uintptr_t)&IDTVEC(hv_vmbus_callback), ("invalid vector %d", vector)); setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); } #else /* HYPERV */ static int vmbus_vector_alloc(void) { return(0); } static void vmbus_vector_free(int vector) { } #endif /* HYPERV */ /** * @brief Main vmbus driver initialization routine. * * Here, we * - initialize the vmbus driver context * - setup various driver entry points * - invoke the vmbus hv main init routine * - get the irq resource * - invoke the vmbus to add the vmbus root device * - setup the vmbus root device * - retrieve the channel offers */ static int vmbus_bus_init(void) { int i, j, n, ret; if (vmbus_inited) return (0); vmbus_inited = 1; ret = hv_vmbus_init(); if (ret) { if(bootverbose) printf("Error VMBUS: Hypervisor Initialization Failed!\n"); return (ret); } /* * Find a free IDT slot for vmbus callback. */ hv_vmbus_g_context.hv_cb_vector = vmbus_vector_alloc(); if (hv_vmbus_g_context.hv_cb_vector == 0) { if(bootverbose) printf("Error VMBUS: Cannot find free IDT slot for " "vmbus callback!\n"); goto cleanup; } if(bootverbose) printf("VMBUS: vmbus callback vector %d\n", hv_vmbus_g_context.hv_cb_vector); /* * Notify the hypervisor of our vector. */ setup_args.vector = hv_vmbus_g_context.hv_cb_vector; CPU_FOREACH(j) { hv_vmbus_intr_cpu[j] = 0; hv_vmbus_swintr_event_cpu[j] = 0; hv_vmbus_g_context.hv_event_intr_event[j] = NULL; hv_vmbus_g_context.hv_msg_intr_event[j] = NULL; hv_vmbus_g_context.event_swintr[j] = NULL; hv_vmbus_g_context.msg_swintr[j] = NULL; for (i = 0; i < 2; i++) setup_args.page_buffers[2 * j + i] = NULL; } /* * Per cpu setup. */ CPU_FOREACH(j) { /* * Setup software interrupt thread and handler for msg handling. */ ret = swi_add(&hv_vmbus_g_context.hv_msg_intr_event[j], "hv_msg", vmbus_msg_swintr, (void *)(long)j, SWI_CLOCK, 0, &hv_vmbus_g_context.msg_swintr[j]); if (ret) { if(bootverbose) printf("VMBUS: failed to setup msg swi for " "cpu %d\n", j); goto cleanup1; } /* * Bind the swi thread to the cpu. */ ret = intr_event_bind(hv_vmbus_g_context.hv_msg_intr_event[j], j); if (ret) { if(bootverbose) printf("VMBUS: failed to bind msg swi thread " "to cpu %d\n", j); goto cleanup1; } /* * Setup software interrupt thread and handler for * event handling. */ ret = swi_add(&hv_vmbus_g_context.hv_event_intr_event[j], "hv_event", hv_vmbus_on_events, (void *)(long)j, SWI_CLOCK, 0, &hv_vmbus_g_context.event_swintr[j]); if (ret) { if(bootverbose) printf("VMBUS: failed to setup event swi for " "cpu %d\n", j); goto cleanup1; } /* * Prepare the per cpu msg and event pages to be called on each cpu. */ for(i = 0; i < 2; i++) { setup_args.page_buffers[2 * j + i] = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO); if (setup_args.page_buffers[2 * j + i] == NULL) { KASSERT(setup_args.page_buffers[2 * j + i] != NULL, ("Error VMBUS: malloc failed!")); goto cleanup1; } } } if (bootverbose) printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n", smp_started); smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args); /* * Connect to VMBus in the root partition */ ret = hv_vmbus_connect(); if (ret != 0) goto cleanup1; hv_vmbus_request_channel_offers(); return (ret); cleanup1: /* * Free pages alloc'ed */ for (n = 0; n < 2 * MAXCPU; n++) if (setup_args.page_buffers[n] != NULL) free(setup_args.page_buffers[n], M_DEVBUF); /* * remove swi and vmbus callback vector; */ CPU_FOREACH(j) { if (hv_vmbus_g_context.msg_swintr[j] != NULL) swi_remove(hv_vmbus_g_context.msg_swintr[j]); if (hv_vmbus_g_context.event_swintr[j] != NULL) swi_remove(hv_vmbus_g_context.event_swintr[j]); hv_vmbus_g_context.hv_msg_intr_event[j] = NULL; hv_vmbus_g_context.hv_event_intr_event[j] = NULL; } vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector); cleanup: hv_vmbus_cleanup(); return (ret); } static int vmbus_attach(device_t dev) { if(bootverbose) device_printf(dev, "VMBUS: attach dev: %p\n", dev); vmbus_devp = dev; /* * If the system has already booted and thread * scheduling is possible indicated by the global * cold set to zero, we just call the driver * initialization directly. */ if (!cold) vmbus_bus_init(); return (0); } static void vmbus_init(void) { if (vm_guest != VM_GUEST_HV) return; /* * If the system has already booted and thread * scheduling is possible, as indicated by the * global cold set to zero, we just call the driver * initialization directly. */ if (!cold) vmbus_bus_init(); } static void vmbus_bus_exit(void) { int i; hv_vmbus_release_unattached_channels(); hv_vmbus_disconnect(); smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL); for(i = 0; i < 2 * MAXCPU; i++) { if (setup_args.page_buffers[i] != 0) free(setup_args.page_buffers[i], M_DEVBUF); } hv_vmbus_cleanup(); /* remove swi */ CPU_FOREACH(i) { if (hv_vmbus_g_context.msg_swintr[i] != NULL) swi_remove(hv_vmbus_g_context.msg_swintr[i]); if (hv_vmbus_g_context.event_swintr[i] != NULL) swi_remove(hv_vmbus_g_context.event_swintr[i]); hv_vmbus_g_context.hv_msg_intr_event[i] = NULL; hv_vmbus_g_context.hv_event_intr_event[i] = NULL; } vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector); return; } static void vmbus_exit(void) { vmbus_bus_exit(); } static int vmbus_detach(device_t dev) { vmbus_exit(); return (0); } static void vmbus_mod_load(void) { if(bootverbose) printf("VMBUS: load\n"); } static void vmbus_mod_unload(void) { if(bootverbose) printf("VMBUS: unload\n"); } static int vmbus_modevent(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: vmbus_mod_load(); break; case MOD_UNLOAD: vmbus_mod_unload(); break; } return (0); } static device_method_t vmbus_methods[] = { /** Device interface */ DEVMETHOD(device_identify, vmbus_identify), DEVMETHOD(device_probe, vmbus_probe), DEVMETHOD(device_attach, vmbus_attach), DEVMETHOD(device_detach, vmbus_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /** Bus interface */ DEVMETHOD(bus_add_child, bus_generic_add_child), DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_read_ivar, vmbus_read_ivar), DEVMETHOD(bus_write_ivar, vmbus_write_ivar), { 0, 0 } }; static char driver_name[] = "vmbus"; static driver_t vmbus_driver = { driver_name, vmbus_methods,0, }; devclass_t vmbus_devclass; DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0); MODULE_VERSION(vmbus,1); /* We want to be started after SMP is initialized */ SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL); Index: head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h =================================================================== --- head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h (revision 292860) +++ head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h (revision 292861) @@ -1,715 +1,725 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __HYPERV_PRIV_H__ #define __HYPERV_PRIV_H__ #include #include #include #include #include /* * Status codes for hypervisor operations. */ typedef uint16_t hv_vmbus_status; #define HV_MESSAGE_SIZE (256) #define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) #define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) #define HV_ANY_VP (0xFFFFFFFF) /* * Synthetic interrupt controller flag constants. */ #define HV_EVENT_FLAGS_COUNT (256 * 8) #define HV_EVENT_FLAGS_BYTE_COUNT (256) #define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(uint32_t)) /* * MessageId: HV_STATUS_INSUFFICIENT_BUFFERS * MessageText: * You did not supply enough message buffers to send a message. */ #define HV_STATUS_INSUFFICIENT_BUFFERS ((uint16_t)0x0013) typedef void (*hv_vmbus_channel_callback)(void *context); typedef struct { void* data; uint32_t length; } hv_vmbus_sg_buffer_list; typedef struct { uint32_t current_interrupt_mask; uint32_t current_read_index; uint32_t current_write_index; uint32_t bytes_avail_to_read; uint32_t bytes_avail_to_write; } hv_vmbus_ring_buffer_debug_info; typedef struct { uint32_t rel_id; hv_vmbus_channel_state state; hv_guid interface_type; hv_guid interface_instance; uint32_t monitor_id; uint32_t server_monitor_pending; uint32_t server_monitor_latency; uint32_t server_monitor_connection_id; uint32_t client_monitor_pending; uint32_t client_monitor_latency; uint32_t client_monitor_connection_id; hv_vmbus_ring_buffer_debug_info inbound; hv_vmbus_ring_buffer_debug_info outbound; } hv_vmbus_channel_debug_info; typedef union { hv_vmbus_channel_version_supported version_supported; hv_vmbus_channel_open_result open_result; hv_vmbus_channel_gpadl_torndown gpadl_torndown; hv_vmbus_channel_gpadl_created gpadl_created; hv_vmbus_channel_version_response version_response; } hv_vmbus_channel_msg_response; /* * Represents each channel msg on the vmbus connection * This is a variable-size data structure depending on * the msg type itself */ typedef struct hv_vmbus_channel_msg_info { /* * Bookkeeping stuff */ TAILQ_ENTRY(hv_vmbus_channel_msg_info) msg_list_entry; /* * So far, this is only used to handle * gpadl body message */ TAILQ_HEAD(, hv_vmbus_channel_msg_info) sub_msg_list_anchor; /* * Synchronize the request/response if * needed. * KYS: Use a semaphore for now. * Not perf critical. */ struct sema wait_sema; hv_vmbus_channel_msg_response response; uint32_t message_size; /** * The channel message that goes out on * the "wire". It will contain at * minimum the * hv_vmbus_channel_msg_header * header. */ unsigned char msg[0]; } hv_vmbus_channel_msg_info; /* * The format must be the same as hv_vm_data_gpa_direct */ typedef struct hv_vmbus_channel_packet_page_buffer { uint16_t type; uint16_t data_offset8; uint16_t length8; uint16_t flags; uint64_t transaction_id; uint32_t reserved; uint32_t range_count; hv_vmbus_page_buffer range[HV_MAX_PAGE_BUFFER_COUNT]; } __packed hv_vmbus_channel_packet_page_buffer; /* * The format must be the same as hv_vm_data_gpa_direct */ typedef struct hv_vmbus_channel_packet_multipage_buffer { uint16_t type; uint16_t data_offset8; uint16_t length8; uint16_t flags; uint64_t transaction_id; uint32_t reserved; uint32_t range_count; /* Always 1 in this case */ hv_vmbus_multipage_buffer range; } __packed hv_vmbus_channel_packet_multipage_buffer; enum { HV_VMBUS_MESSAGE_CONNECTION_ID = 1, HV_VMBUS_MESSAGE_PORT_ID = 1, HV_VMBUS_EVENT_CONNECTION_ID = 2, HV_VMBUS_EVENT_PORT_ID = 2, HV_VMBUS_MONITOR_CONNECTION_ID = 3, HV_VMBUS_MONITOR_PORT_ID = 3, HV_VMBUS_MESSAGE_SINT = 2 }; #define HV_PRESENT_BIT 0x80000000 #define HV_HYPERCALL_PARAM_ALIGN sizeof(uint64_t) typedef struct { uint64_t guest_id; void* hypercall_page; hv_bool_uint8_t syn_ic_initialized; hv_vmbus_handle syn_ic_msg_page[MAXCPU]; hv_vmbus_handle syn_ic_event_page[MAXCPU]; /* * For FreeBSD cpuid to Hyper-V vcpuid mapping. */ uint32_t hv_vcpu_index[MAXCPU]; /* * Each cpu has its own software interrupt handler for channel * event and msg handling. */ struct intr_event *hv_event_intr_event[MAXCPU]; struct intr_event *hv_msg_intr_event[MAXCPU]; void *event_swintr[MAXCPU]; void *msg_swintr[MAXCPU]; /* * Host use this vector to intrrupt guest for vmbus channel * event and msg. */ unsigned int hv_cb_vector; } hv_vmbus_context; /* * Define hypervisor message types */ typedef enum { HV_MESSAGE_TYPE_NONE = 0x00000000, /* * Memory access messages */ HV_MESSAGE_TYPE_UNMAPPED_GPA = 0x80000000, HV_MESSAGE_TYPE_GPA_INTERCEPT = 0x80000001, /* * Timer notification messages */ HV_MESSAGE_TIMER_EXPIRED = 0x80000010, /* * Error messages */ HV_MESSAGE_TYPE_INVALID_VP_REGISTER_VALUE = 0x80000020, HV_MESSAGE_TYPE_UNRECOVERABLE_EXCEPTION = 0x80000021, HV_MESSAGE_TYPE_UNSUPPORTED_FEATURE = 0x80000022, /* * Trace buffer complete messages */ HV_MESSAGE_TYPE_EVENT_LOG_BUFFER_COMPLETE = 0x80000040, /* * Platform-specific processor intercept messages */ HV_MESSAGE_TYPE_X64_IO_PORT_INTERCEPT = 0x80010000, HV_MESSAGE_TYPE_X64_MSR_INTERCEPT = 0x80010001, HV_MESSAGE_TYPE_X64_CPU_INTERCEPT = 0x80010002, HV_MESSAGE_TYPE_X64_EXCEPTION_INTERCEPT = 0x80010003, HV_MESSAGE_TYPE_X64_APIC_EOI = 0x80010004, HV_MESSAGE_TYPE_X64_LEGACY_FP_ERROR = 0x80010005 } hv_vmbus_msg_type; /* * Define port identifier type */ typedef union _hv_vmbus_port_id { uint32_t as_uint32_t; struct { uint32_t id:24; uint32_t reserved:8; } u ; } hv_vmbus_port_id; /* * Define synthetic interrupt controller message flag */ typedef union { uint8_t as_uint8_t; struct { uint8_t message_pending:1; uint8_t reserved:7; } u; } hv_vmbus_msg_flags; typedef uint64_t hv_vmbus_partition_id; /* * Define synthetic interrupt controller message header */ typedef struct { hv_vmbus_msg_type message_type; uint8_t payload_size; hv_vmbus_msg_flags message_flags; uint8_t reserved[2]; union { hv_vmbus_partition_id sender; hv_vmbus_port_id port; } u; } hv_vmbus_msg_header; /* * Define synthetic interrupt controller message format */ typedef struct { hv_vmbus_msg_header header; union { uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; } u ; } hv_vmbus_message; /* * Maximum channels is determined by the size of the interrupt * page which is PAGE_SIZE. 1/2 of PAGE_SIZE is for * send endpoint interrupt and the other is receive * endpoint interrupt. * * Note: (PAGE_SIZE >> 1) << 3 allocates 16348 channels */ #define HV_MAX_NUM_CHANNELS (PAGE_SIZE >> 1) << 3 /* * (The value here must be in multiple of 32) */ #define HV_MAX_NUM_CHANNELS_SUPPORTED 256 /* * VM Bus connection states */ typedef enum { HV_DISCONNECTED, HV_CONNECTING, HV_CONNECTED, HV_DISCONNECTING } hv_vmbus_connect_state; #define HV_MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT typedef struct { hv_vmbus_connect_state connect_state; uint32_t next_gpadl_handle; /** * Represents channel interrupts. Each bit position * represents a channel. * When a channel sends an interrupt via VMBUS, it * finds its bit in the send_interrupt_page, set it and * calls Hv to generate a port event. The other end * receives the port event and parse the * recv_interrupt_page to see which bit is set */ void *interrupt_page; void *send_interrupt_page; void *recv_interrupt_page; /* * 2 pages - 1st page for parent->child * notification and 2nd is child->parent * notification */ void *monitor_pages; TAILQ_HEAD(, hv_vmbus_channel_msg_info) channel_msg_anchor; struct mtx channel_msg_lock; /** * List of primary channels. Sub channels will be linked * under their primary channel. */ TAILQ_HEAD(, hv_vmbus_channel) channel_anchor; struct mtx channel_lock; hv_vmbus_handle work_queue; struct sema control_sema; } hv_vmbus_connection; /* * Declare the MSR used to identify the guest OS */ #define HV_X64_MSR_GUEST_OS_ID 0x40000000 typedef union { uint64_t as_uint64_t; struct { uint64_t build_number : 16; uint64_t service_version : 8; /* Service Pack, etc. */ uint64_t minor_version : 8; uint64_t major_version : 8; /* * HV_GUEST_OS_MICROSOFT_IDS (If Vendor=MS) * HV_GUEST_OS_VENDOR */ uint64_t os_id : 8; uint64_t vendor_id : 16; } u; } hv_vmbus_x64_msr_guest_os_id_contents; /* * Declare the MSR used to setup pages used to communicate with the hypervisor */ #define HV_X64_MSR_HYPERCALL 0x40000001 typedef union { uint64_t as_uint64_t; struct { uint64_t enable :1; uint64_t reserved :11; uint64_t guest_physical_address :52; } u; } hv_vmbus_x64_msr_hypercall_contents; typedef union { uint32_t as_uint32_t; struct { uint32_t group_enable :4; uint32_t rsvd_z :28; } u; } hv_vmbus_monitor_trigger_state; typedef union { uint64_t as_uint64_t; struct { uint32_t pending; uint32_t armed; } u; } hv_vmbus_monitor_trigger_group; typedef struct { hv_vmbus_connection_id connection_id; uint16_t flag_number; uint16_t rsvd_z; } hv_vmbus_monitor_parameter; /* * hv_vmbus_monitor_page Layout * ------------------------------------------------------ * | 0 | trigger_state (4 bytes) | Rsvd1 (4 bytes) | * | 8 | trigger_group[0] | * | 10 | trigger_group[1] | * | 18 | trigger_group[2] | * | 20 | trigger_group[3] | * | 28 | Rsvd2[0] | * | 30 | Rsvd2[1] | * | 38 | Rsvd2[2] | * | 40 | next_check_time[0][0] | next_check_time[0][1] | * | ... | * | 240 | latency[0][0..3] | * | 340 | Rsvz3[0] | * | 440 | parameter[0][0] | * | 448 | parameter[0][1] | * | ... | * | 840 | Rsvd4[0] | * ------------------------------------------------------ */ typedef struct { hv_vmbus_monitor_trigger_state trigger_state; uint32_t rsvd_z1; hv_vmbus_monitor_trigger_group trigger_group[4]; uint64_t rsvd_z2[3]; int32_t next_check_time[4][32]; uint16_t latency[4][32]; uint64_t rsvd_z3[32]; hv_vmbus_monitor_parameter parameter[4][32]; uint8_t rsvd_z4[1984]; } hv_vmbus_monitor_page; /* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent * is set by CPUID(HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES). */ typedef enum { HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES = 0x00000001, HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION = 0x40000000, HV_CPU_ID_FUNCTION_HV_INTERFACE = 0x40000001, /* * The remaining functions depend on the value * of hv_cpu_id_function_interface */ HV_CPU_ID_FUNCTION_MS_HV_VERSION = 0x40000002, HV_CPU_ID_FUNCTION_MS_HV_FEATURES = 0x40000003, HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION = 0x40000004, HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS = 0x40000005 } hv_vmbus_cpuid_function; /* * Define the format of the SIMP register */ typedef union { uint64_t as_uint64_t; struct { uint64_t simp_enabled : 1; uint64_t preserved : 11; uint64_t base_simp_gpa : 52; } u; } hv_vmbus_synic_simp; /* * Define the format of the SIEFP register */ typedef union { uint64_t as_uint64_t; struct { uint64_t siefp_enabled : 1; uint64_t preserved : 11; uint64_t base_siefp_gpa : 52; } u; } hv_vmbus_synic_siefp; /* * Define synthetic interrupt source */ typedef union { uint64_t as_uint64_t; struct { uint64_t vector : 8; uint64_t reserved1 : 8; uint64_t masked : 1; uint64_t auto_eoi : 1; uint64_t reserved2 : 46; } u; } hv_vmbus_synic_sint; /* * Define syn_ic control register */ typedef union _hv_vmbus_synic_scontrol { uint64_t as_uint64_t; struct { uint64_t enable : 1; uint64_t reserved : 63; } u; } hv_vmbus_synic_scontrol; /* * Define the hv_vmbus_post_message hypercall input structure */ typedef struct { hv_vmbus_connection_id connection_id; uint32_t reserved; hv_vmbus_msg_type message_type; uint32_t payload_size; uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; } hv_vmbus_input_post_message; /* * Define the synthetic interrupt controller event flags format */ typedef union { uint8_t flags8[HV_EVENT_FLAGS_BYTE_COUNT]; uint32_t flags32[HV_EVENT_FLAGS_DWORD_COUNT]; } hv_vmbus_synic_event_flags; /* MSR used to provide vcpu index */ #define HV_X64_MSR_VP_INDEX (0x40000002) /* * Define synthetic interrupt controller model specific registers */ #define HV_X64_MSR_SCONTROL (0x40000080) #define HV_X64_MSR_SVERSION (0x40000081) #define HV_X64_MSR_SIEFP (0x40000082) #define HV_X64_MSR_SIMP (0x40000083) #define HV_X64_MSR_EOM (0x40000084) #define HV_X64_MSR_SINT0 (0x40000090) #define HV_X64_MSR_SINT1 (0x40000091) #define HV_X64_MSR_SINT2 (0x40000092) #define HV_X64_MSR_SINT3 (0x40000093) #define HV_X64_MSR_SINT4 (0x40000094) #define HV_X64_MSR_SINT5 (0x40000095) #define HV_X64_MSR_SINT6 (0x40000096) #define HV_X64_MSR_SINT7 (0x40000097) #define HV_X64_MSR_SINT8 (0x40000098) #define HV_X64_MSR_SINT9 (0x40000099) #define HV_X64_MSR_SINT10 (0x4000009A) #define HV_X64_MSR_SINT11 (0x4000009B) #define HV_X64_MSR_SINT12 (0x4000009C) #define HV_X64_MSR_SINT13 (0x4000009D) #define HV_X64_MSR_SINT14 (0x4000009E) #define HV_X64_MSR_SINT15 (0x4000009F) /* * Declare the various hypercall operations */ typedef enum { HV_CALL_POST_MESSAGE = 0x005c, HV_CALL_SIGNAL_EVENT = 0x005d, } hv_vmbus_call_code; /** * Global variables */ extern hv_vmbus_context hv_vmbus_g_context; extern hv_vmbus_connection hv_vmbus_g_connection; +typedef void (*vmbus_msg_handler)(hv_vmbus_channel_msg_header *msg); + +typedef struct hv_vmbus_channel_msg_table_entry { + hv_vmbus_channel_msg_type messageType; + + bool handler_no_sleep; /* true: the handler doesn't sleep */ + vmbus_msg_handler messageHandler; +} hv_vmbus_channel_msg_table_entry; + +extern hv_vmbus_channel_msg_table_entry g_channel_message_table[]; /* * Private, VM Bus functions */ int hv_vmbus_ring_buffer_init( hv_vmbus_ring_buffer_info *ring_info, void *buffer, uint32_t buffer_len); void hv_ring_buffer_cleanup( hv_vmbus_ring_buffer_info *ring_info); int hv_ring_buffer_write( hv_vmbus_ring_buffer_info *ring_info, hv_vmbus_sg_buffer_list sg_buffers[], uint32_t sg_buff_count, boolean_t *need_sig); int hv_ring_buffer_peek( hv_vmbus_ring_buffer_info *ring_info, void *buffer, uint32_t buffer_len); int hv_ring_buffer_read( hv_vmbus_ring_buffer_info *ring_info, void *buffer, uint32_t buffer_len, uint32_t offset); uint32_t hv_vmbus_get_ring_buffer_interrupt_mask( hv_vmbus_ring_buffer_info *ring_info); void hv_vmbus_dump_ring_info( hv_vmbus_ring_buffer_info *ring_info, char *prefix); void hv_ring_buffer_read_begin( hv_vmbus_ring_buffer_info *ring_info); uint32_t hv_ring_buffer_read_end( hv_vmbus_ring_buffer_info *ring_info); hv_vmbus_channel* hv_vmbus_allocate_channel(void); void hv_vmbus_free_vmbus_channel(hv_vmbus_channel *channel); void hv_vmbus_on_channel_message(void *context); int hv_vmbus_request_channel_offers(void); void hv_vmbus_release_unattached_channels(void); int hv_vmbus_init(void); void hv_vmbus_cleanup(void); uint16_t hv_vmbus_post_msg_via_msg_ipc( hv_vmbus_connection_id connection_id, hv_vmbus_msg_type message_type, void *payload, size_t payload_size); uint16_t hv_vmbus_signal_event(void *con_id); void hv_vmbus_synic_init(void *irq_arg); void hv_vmbus_synic_cleanup(void *arg); int hv_vmbus_query_hypervisor_presence(void); struct hv_device* hv_vmbus_child_device_create( hv_guid device_type, hv_guid device_instance, hv_vmbus_channel *channel); int hv_vmbus_child_device_register( struct hv_device *child_dev); int hv_vmbus_child_device_unregister( struct hv_device *child_dev); hv_vmbus_channel* hv_vmbus_get_channel_from_rel_id(uint32_t rel_id); /** * Connection interfaces */ int hv_vmbus_connect(void); int hv_vmbus_disconnect(void); int hv_vmbus_post_message(void *buffer, size_t buf_size); int hv_vmbus_set_event(hv_vmbus_channel *channel); void hv_vmbus_on_events(void *); /* * The guest OS needs to register the guest ID with the hypervisor. * The guest ID is a 64 bit entity and the structure of this ID is * specified in the Hyper-V specification: * * http://msdn.microsoft.com/en-us/library/windows/ * hardware/ff542653%28v=vs.85%29.aspx * * While the current guideline does not specify how FreeBSD guest ID(s) * need to be generated, our plan is to publish the guidelines for * FreeBSD and other guest operating systems that currently are hosted * on Hyper-V. The implementation here conforms to this yet * unpublished guidelines. * * Bit(s) * 63 - Indicates if the OS is Open Source or not; 1 is Open Source * 62:56 - Os Type; Linux is 0x100, FreeBSD is 0x200 * 55:48 - Distro specific identification * 47:16 - FreeBSD kernel version number * 15:0 - Distro specific identification * */ #define HV_FREEBSD_VENDOR_ID 0x8200 #define HV_FREEBSD_GUEST_ID hv_generate_guest_id(0,0) static inline uint64_t hv_generate_guest_id( uint8_t distro_id_part1, uint16_t distro_id_part2) { uint64_t guest_id; guest_id = (((uint64_t)HV_FREEBSD_VENDOR_ID) << 48); guest_id |= (((uint64_t)(distro_id_part1)) << 48); guest_id |= (((uint64_t)(__FreeBSD_version)) << 16); /* in param.h */ guest_id |= ((uint64_t)(distro_id_part2)); return guest_id; } typedef struct { unsigned int vector; void *page_buffers[2 * MAXCPU]; } hv_setup_args; #endif /* __HYPERV_PRIV_H__ */