Index: head/sys/dev/hyperv/include/hyperv.h =================================================================== --- head/sys/dev/hyperv/include/hyperv.h (revision 296082) +++ head/sys/dev/hyperv/include/hyperv.h (revision 296083) @@ -1,923 +1,921 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /** * HyperV definitions for messages that are sent between instances of the * Channel Management Library in separate partitions, or in some cases, * back to itself. */ #ifndef __HYPERV_H__ #define __HYPERV_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef uint8_t hv_bool_uint8_t; #define HV_S_OK 0x00000000 #define HV_E_FAIL 0x80004005 #define HV_ERROR_NOT_SUPPORTED 0x80070032 #define HV_ERROR_MACHINE_LOCKED 0x800704F7 /* * VMBUS version is 32 bit, upper 16 bit for major_number and lower * 16 bit for minor_number. * * 0.13 -- Windows Server 2008 * 1.1 -- Windows 7 * 2.4 -- Windows 8 * 3.0 -- Windows 8.1 */ #define HV_VMBUS_VERSION_WS2008 ((0 << 16) | (13)) #define HV_VMBUS_VERSION_WIN7 ((1 << 16) | (1)) #define HV_VMBUS_VERSION_WIN8 ((2 << 16) | (4)) #define HV_VMBUS_VERSION_WIN8_1 ((3 << 16) | (0)) #define HV_VMBUS_VERSION_INVALID -1 #define HV_VMBUS_VERSION_CURRENT HV_VMBUS_VERSION_WIN8_1 /* * Make maximum size of pipe payload of 16K */ #define HV_MAX_PIPE_DATA_PAYLOAD (sizeof(BYTE) * 16384) /* * Define pipe_mode values */ #define HV_VMBUS_PIPE_TYPE_BYTE 0x00000000 #define HV_VMBUS_PIPE_TYPE_MESSAGE 0x00000004 /* * The size of the user defined data buffer for non-pipe offers */ #define HV_MAX_USER_DEFINED_BYTES 120 /* * The size of the user defined data buffer for pipe offers */ #define HV_MAX_PIPE_USER_DEFINED_BYTES 116 #define HV_MAX_PAGE_BUFFER_COUNT 32 #define HV_MAX_MULTIPAGE_BUFFER_COUNT 32 #define HV_ALIGN_UP(value, align) \ (((value) & (align-1)) ? \ (((value) + (align-1)) & ~(align-1) ) : (value)) #define HV_ALIGN_DOWN(value, align) ( (value) & ~(align-1) ) #define HV_NUM_PAGES_SPANNED(addr, len) \ ((HV_ALIGN_UP(addr+len, PAGE_SIZE) - \ HV_ALIGN_DOWN(addr, PAGE_SIZE)) >> PAGE_SHIFT ) typedef struct hv_guid { unsigned char data[16]; } __packed hv_guid; #define HV_NIC_GUID \ .data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, \ 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} #define HV_IDE_GUID \ .data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, \ 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5} #define HV_SCSI_GUID \ .data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, \ 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f} /* * At the center of the Channel Management library is * the Channel Offer. This struct contains the * fundamental information about an offer. */ typedef struct hv_vmbus_channel_offer { hv_guid interface_type; hv_guid interface_instance; uint64_t interrupt_latency_in_100ns_units; uint32_t interface_revision; uint32_t server_context_area_size; /* in bytes */ uint16_t channel_flags; uint16_t mmio_megabytes; /* in bytes * 1024 * 1024 */ union { /* * Non-pipes: The user has HV_MAX_USER_DEFINED_BYTES bytes. */ struct { uint8_t user_defined[HV_MAX_USER_DEFINED_BYTES]; } __packed standard; /* * Pipes: The following structure is an integrated pipe protocol, which * is implemented on top of standard user-defined data. pipe * clients have HV_MAX_PIPE_USER_DEFINED_BYTES left for their * own use. */ struct { uint32_t pipe_mode; uint8_t user_defined[HV_MAX_PIPE_USER_DEFINED_BYTES]; } __packed pipe; } u; /* * Sub_channel_index, newly added in Win8. */ uint16_t sub_channel_index; uint16_t padding; } __packed hv_vmbus_channel_offer; typedef uint32_t hv_gpadl_handle; typedef struct { uint16_t type; uint16_t data_offset8; uint16_t length8; uint16_t flags; uint64_t transaction_id; } __packed hv_vm_packet_descriptor; typedef uint32_t hv_previous_packet_offset; typedef struct { hv_previous_packet_offset previous_packet_start_offset; hv_vm_packet_descriptor descriptor; } __packed hv_vm_packet_header; typedef struct { uint32_t byte_count; uint32_t byte_offset; } __packed hv_vm_transfer_page; typedef struct { hv_vm_packet_descriptor d; uint16_t transfer_page_set_id; hv_bool_uint8_t sender_owns_set; uint8_t reserved; uint32_t range_count; hv_vm_transfer_page ranges[1]; } __packed hv_vm_transfer_page_packet_header; typedef struct { hv_vm_packet_descriptor d; uint32_t gpadl; uint32_t reserved; } __packed hv_vm_gpadl_packet_header; typedef struct { hv_vm_packet_descriptor d; uint32_t gpadl; uint16_t transfer_page_set_id; uint16_t reserved; } __packed hv_vm_add_remove_transfer_page_set; /* * This structure defines a range in guest * physical space that can be made * to look virtually contiguous. */ typedef struct { uint32_t byte_count; uint32_t byte_offset; uint64_t pfn_array[0]; } __packed hv_gpa_range; /* * This is the format for an Establish Gpadl packet, which contains a handle * by which this GPADL will be known and a set of GPA ranges associated with * it. This can be converted to a MDL by the guest OS. If there are multiple * GPA ranges, then the resulting MDL will be "chained," representing multiple * VA ranges. */ typedef struct { hv_vm_packet_descriptor d; uint32_t gpadl; uint32_t range_count; hv_gpa_range range[1]; } __packed hv_vm_establish_gpadl; /* * This is the format for a Teardown Gpadl packet, which indicates that the * GPADL handle in the Establish Gpadl packet will never be referenced again. */ typedef struct { hv_vm_packet_descriptor d; uint32_t gpadl; /* for alignment to a 8-byte boundary */ uint32_t reserved; } __packed hv_vm_teardown_gpadl; /* * This is the format for a GPA-Direct packet, which contains a set of GPA * ranges, in addition to commands and/or data. */ typedef struct { hv_vm_packet_descriptor d; uint32_t reserved; uint32_t range_count; hv_gpa_range range[1]; } __packed hv_vm_data_gpa_direct; /* * This is the format for a Additional data Packet. */ typedef struct { hv_vm_packet_descriptor d; uint64_t total_bytes; uint32_t byte_offset; uint32_t byte_count; uint8_t data[1]; } __packed hv_vm_additional_data; typedef union { hv_vm_packet_descriptor simple_header; hv_vm_transfer_page_packet_header transfer_page_header; hv_vm_gpadl_packet_header gpadl_header; hv_vm_add_remove_transfer_page_set add_remove_transfer_page_header; hv_vm_establish_gpadl establish_gpadl_header; hv_vm_teardown_gpadl teardown_gpadl_header; hv_vm_data_gpa_direct data_gpa_direct_header; } __packed hv_vm_packet_largest_possible_header; typedef enum { HV_VMBUS_PACKET_TYPE_INVALID = 0x0, HV_VMBUS_PACKET_TYPES_SYNCH = 0x1, HV_VMBUS_PACKET_TYPE_ADD_TRANSFER_PAGE_SET = 0x2, HV_VMBUS_PACKET_TYPE_REMOVE_TRANSFER_PAGE_SET = 0x3, HV_VMBUS_PACKET_TYPE_ESTABLISH_GPADL = 0x4, HV_VMBUS_PACKET_TYPE_TEAR_DOWN_GPADL = 0x5, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND = 0x6, HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES = 0x7, HV_VMBUS_PACKET_TYPE_DATA_USING_GPADL = 0x8, HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT = 0x9, HV_VMBUS_PACKET_TYPE_CANCEL_REQUEST = 0xa, HV_VMBUS_PACKET_TYPE_COMPLETION = 0xb, HV_VMBUS_PACKET_TYPE_DATA_USING_ADDITIONAL_PACKETS = 0xc, HV_VMBUS_PACKET_TYPE_ADDITIONAL_DATA = 0xd } hv_vmbus_packet_type; #define HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED 1 /* * Version 1 messages */ typedef enum { HV_CHANNEL_MESSAGE_INVALID = 0, HV_CHANNEL_MESSAGE_OFFER_CHANNEL = 1, HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER = 2, HV_CHANNEL_MESSAGE_REQUEST_OFFERS = 3, HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED = 4, HV_CHANNEL_MESSAGE_OPEN_CHANNEL = 5, HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT = 6, HV_CHANNEL_MESSAGE_CLOSE_CHANNEL = 7, HV_CHANNEL_MESSAGEL_GPADL_HEADER = 8, HV_CHANNEL_MESSAGE_GPADL_BODY = 9, HV_CHANNEL_MESSAGE_GPADL_CREATED = 10, HV_CHANNEL_MESSAGE_GPADL_TEARDOWN = 11, HV_CHANNEL_MESSAGE_GPADL_TORNDOWN = 12, HV_CHANNEL_MESSAGE_REL_ID_RELEASED = 13, HV_CHANNEL_MESSAGE_INITIATED_CONTACT = 14, HV_CHANNEL_MESSAGE_VERSION_RESPONSE = 15, HV_CHANNEL_MESSAGE_UNLOAD = 16, HV_CHANNEL_MESSAGE_COUNT } hv_vmbus_channel_msg_type; typedef struct { hv_vmbus_channel_msg_type message_type; uint32_t padding; } __packed hv_vmbus_channel_msg_header; /* * Query VMBus Version parameters */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t version; } __packed hv_vmbus_channel_query_vmbus_version; /* * VMBus Version Supported parameters */ typedef struct { hv_vmbus_channel_msg_header header; hv_bool_uint8_t version_supported; } __packed hv_vmbus_channel_version_supported; /* * Channel Offer parameters */ typedef struct { hv_vmbus_channel_msg_header header; hv_vmbus_channel_offer offer; uint32_t child_rel_id; uint8_t monitor_id; /* * This field has been split into a bit field on Win7 * and higher. */ uint8_t monitor_allocated:1; uint8_t reserved:7; /* * Following fields were added in win7 and higher. * Make sure to check the version before accessing these fields. * * If "is_dedicated_interrupt" is set, we must not set the * associated bit in the channel bitmap while sending the * interrupt to the host. * * connection_id is used in signaling the host. */ uint16_t is_dedicated_interrupt:1; uint16_t reserved1:15; uint32_t connection_id; } __packed hv_vmbus_channel_offer_channel; /* * Rescind Offer parameters */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; } __packed hv_vmbus_channel_rescind_offer; /* * Request Offer -- no parameters, SynIC message contains the partition ID * * Set Snoop -- no parameters, SynIC message contains the partition ID * * Clear Snoop -- no parameters, SynIC message contains the partition ID * * All Offers Delivered -- no parameters, SynIC message contains the * partition ID * * Flush Client -- no parameters, SynIC message contains the partition ID */ /* * Open Channel parameters */ typedef struct { hv_vmbus_channel_msg_header header; /* * Identifies the specific VMBus channel that is being opened. */ uint32_t child_rel_id; /* * ID making a particular open request at a channel offer unique. */ uint32_t open_id; /* * GPADL for the channel's ring buffer. */ hv_gpadl_handle ring_buffer_gpadl_handle; /* * Before win8, all incoming channel interrupts are only * delivered on cpu 0. Setting this value to 0 would * preserve the earlier behavior. */ uint32_t target_vcpu; /* * The upstream ring buffer begins at offset zero in the memory described * by ring_buffer_gpadl_handle. The downstream ring buffer follows it at * this offset (in pages). */ uint32_t downstream_ring_buffer_page_offset; /* * User-specific data to be passed along to the server endpoint. */ uint8_t user_data[HV_MAX_USER_DEFINED_BYTES]; } __packed hv_vmbus_channel_open_channel; typedef uint32_t hv_nt_status; /* * Open Channel Result parameters */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; uint32_t open_id; hv_nt_status status; } __packed hv_vmbus_channel_open_result; /* * Close channel parameters */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; } __packed hv_vmbus_channel_close_channel; /* * Channel Message GPADL */ #define HV_GPADL_TYPE_RING_BUFFER 1 #define HV_GPADL_TYPE_SERVER_SAVE_AREA 2 #define HV_GPADL_TYPE_TRANSACTION 8 /* * The number of PFNs in a GPADL message is defined by the number of pages * that would be spanned by byte_count and byte_offset. If the implied number * of PFNs won't fit in this packet, there will be a follow-up packet that * contains more */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; uint32_t gpadl; uint16_t range_buf_len; uint16_t range_count; hv_gpa_range range[0]; } __packed hv_vmbus_channel_gpadl_header; /* * This is the follow-up packet that contains more PFNs */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t message_number; uint32_t gpadl; uint64_t pfn[0]; } __packed hv_vmbus_channel_gpadl_body; typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; uint32_t gpadl; uint32_t creation_status; } __packed hv_vmbus_channel_gpadl_created; typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; uint32_t gpadl; } __packed hv_vmbus_channel_gpadl_teardown; typedef struct { hv_vmbus_channel_msg_header header; uint32_t gpadl; } __packed hv_vmbus_channel_gpadl_torndown; typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; } __packed hv_vmbus_channel_relid_released; typedef struct { hv_vmbus_channel_msg_header header; uint32_t vmbus_version_requested; uint32_t padding2; uint64_t interrupt_page; uint64_t monitor_page_1; uint64_t monitor_page_2; } __packed hv_vmbus_channel_initiate_contact; typedef struct { hv_vmbus_channel_msg_header header; hv_bool_uint8_t version_supported; } __packed hv_vmbus_channel_version_response; typedef hv_vmbus_channel_msg_header hv_vmbus_channel_unload; #define HW_MACADDR_LEN 6 /* * Fixme: Added to quiet "typeof" errors involving hv_vmbus.h when * the including C file was compiled with "-std=c99". */ #ifndef typeof #define typeof __typeof #endif #ifndef NULL #define NULL (void *)0 #endif typedef void *hv_vmbus_handle; #ifndef CONTAINING_RECORD #define CONTAINING_RECORD(address, type, field) ((type *)( \ (uint8_t *)(address) - \ (uint8_t *)(&((type *)0)->field))) #endif /* CONTAINING_RECORD */ #define container_of(ptr, type, member) ({ \ __typeof__( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - offsetof(type,member) );}) enum { HV_VMBUS_IVAR_TYPE, HV_VMBUS_IVAR_INSTANCE, HV_VMBUS_IVAR_NODE, HV_VMBUS_IVAR_DEVCTX }; #define HV_VMBUS_ACCESSOR(var, ivar, type) \ __BUS_ACCESSOR(vmbus, var, HV_VMBUS, ivar, type) HV_VMBUS_ACCESSOR(type, TYPE, const char *) HV_VMBUS_ACCESSOR(devctx, DEVCTX, struct hv_device *) /* * Common defines for Hyper-V ICs */ #define HV_ICMSGTYPE_NEGOTIATE 0 #define HV_ICMSGTYPE_HEARTBEAT 1 #define HV_ICMSGTYPE_KVPEXCHANGE 2 #define HV_ICMSGTYPE_SHUTDOWN 3 #define HV_ICMSGTYPE_TIMESYNC 4 #define HV_ICMSGTYPE_VSS 5 #define HV_ICMSGHDRFLAG_TRANSACTION 1 #define HV_ICMSGHDRFLAG_REQUEST 2 #define HV_ICMSGHDRFLAG_RESPONSE 4 typedef struct hv_vmbus_pipe_hdr { uint32_t flags; uint32_t msgsize; } __packed hv_vmbus_pipe_hdr; typedef struct hv_vmbus_ic_version { uint16_t major; uint16_t minor; } __packed hv_vmbus_ic_version; typedef struct hv_vmbus_icmsg_hdr { hv_vmbus_ic_version icverframe; uint16_t icmsgtype; hv_vmbus_ic_version icvermsg; uint16_t icmsgsize; uint32_t status; uint8_t ictransaction_id; uint8_t icflags; uint8_t reserved[2]; } __packed hv_vmbus_icmsg_hdr; typedef struct hv_vmbus_icmsg_negotiate { uint16_t icframe_vercnt; uint16_t icmsg_vercnt; uint32_t reserved; hv_vmbus_ic_version icversion_data[1]; /* any size array */ } __packed hv_vmbus_icmsg_negotiate; typedef struct hv_vmbus_shutdown_msg_data { uint32_t reason_code; uint32_t timeout_seconds; uint32_t flags; uint8_t display_message[2048]; } __packed hv_vmbus_shutdown_msg_data; typedef struct hv_vmbus_heartbeat_msg_data { uint64_t seq_num; uint32_t reserved[8]; } __packed hv_vmbus_heartbeat_msg_data; typedef struct { /* * offset in bytes from the start of ring data below */ volatile uint32_t write_index; /* * offset in bytes from the start of ring data below */ volatile uint32_t read_index; /* * NOTE: The interrupt_mask field is used only for channels, but * vmbus connection also uses this data structure */ volatile uint32_t interrupt_mask; /* pad it to PAGE_SIZE so that data starts on a page */ uint8_t reserved[4084]; /* * WARNING: Ring data starts here + ring_data_start_offset * !!! DO NOT place any fields below this !!! */ uint8_t buffer[0]; /* doubles as interrupt mask */ } __packed hv_vmbus_ring_buffer; typedef struct { int length; int offset; uint64_t pfn; } __packed hv_vmbus_page_buffer; typedef struct { int length; int offset; uint64_t pfn_array[HV_MAX_MULTIPAGE_BUFFER_COUNT]; } __packed hv_vmbus_multipage_buffer; typedef struct { hv_vmbus_ring_buffer* ring_buffer; uint32_t ring_size; /* Include the shared header */ struct mtx ring_lock; uint32_t ring_data_size; /* ring_size */ uint32_t ring_data_start_offset; } hv_vmbus_ring_buffer_info; typedef void (*hv_vmbus_pfn_channel_callback)(void *context); typedef void (*hv_vmbus_sc_creation_callback)(void *context); typedef enum { HV_CHANNEL_OFFER_STATE, HV_CHANNEL_OPENING_STATE, HV_CHANNEL_OPEN_STATE, HV_CHANNEL_OPENED_STATE, HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE, } hv_vmbus_channel_state; /* * Connection identifier type */ typedef union { uint32_t as_uint32_t; struct { uint32_t id:24; uint32_t reserved:8; } u; } __packed hv_vmbus_connection_id; /* * Definition of the hv_vmbus_signal_event hypercall input structure */ typedef struct { hv_vmbus_connection_id connection_id; uint16_t flag_number; uint16_t rsvd_z; } __packed hv_vmbus_input_signal_event; typedef struct { uint64_t align8; hv_vmbus_input_signal_event event; } __packed hv_vmbus_input_signal_event_buffer; typedef struct hv_vmbus_channel { TAILQ_ENTRY(hv_vmbus_channel) list_entry; struct hv_device* device; hv_vmbus_channel_state state; hv_vmbus_channel_offer_channel offer_msg; /* * These are based on the offer_msg.monitor_id. * Save it here for easy access. */ uint8_t monitor_group; uint8_t monitor_bit; uint32_t ring_buffer_gpadl_handle; /* * Allocated memory for ring buffer */ void* ring_buffer_pages; unsigned long ring_buffer_size; uint32_t ring_buffer_page_count; /* * send to parent */ hv_vmbus_ring_buffer_info outbound; /* * receive from parent */ hv_vmbus_ring_buffer_info inbound; - struct mtx inbound_lock; - struct taskqueue * rxq; struct task channel_task; hv_vmbus_pfn_channel_callback on_channel_callback; void* channel_callback_context; /* * If batched_reading is set to "true", mask the interrupt * and read until the channel is empty. * If batched_reading is set to "false", the channel is not * going to perform batched reading. * * Batched reading is enabled by default; specific * drivers that don't want this behavior can turn it off. */ boolean_t batched_reading; boolean_t is_dedicated_interrupt; /* * Used as an input param for HV_CALL_SIGNAL_EVENT hypercall. */ hv_vmbus_input_signal_event_buffer signal_event_buffer; /* * 8-bytes aligned of the buffer above */ hv_vmbus_input_signal_event *signal_event_param; /* * From Win8, this field specifies the target virtual process * on which to deliver the interupt from the host to guest. * Before Win8, all channel interrupts would only be * delivered on cpu 0. Setting this value to 0 would preserve * the earlier behavior. */ uint32_t target_vcpu; /* The corresponding CPUID in the guest */ uint32_t target_cpu; /* * Support for multi-channels. * The initial offer is considered the primary channel and this * offer message will indicate if the host supports multi-channels. * The guest is free to ask for multi-channels to be offerred and can * open these multi-channels as a normal "primary" channel. However, * all multi-channels will have the same type and instance guids as the * primary channel. Requests sent on a given channel will result in a * response on the same channel. */ /* * Multi-channel creation callback. This callback will be called in * process context when a Multi-channel offer is received from the host. * The guest can open the Multi-channel in the context of this callback. */ hv_vmbus_sc_creation_callback sc_creation_callback; struct mtx sc_lock; /* * Link list of all the multi-channels if this is a primary channel */ TAILQ_HEAD(, hv_vmbus_channel) sc_list_anchor; TAILQ_ENTRY(hv_vmbus_channel) sc_list_entry; /* * The primary channel this sub-channle belongs to. * This will be NULL for the primary channel. */ struct hv_vmbus_channel *primary_channel; /* * Support per channel state for use by vmbus drivers. */ void *per_channel_state; } hv_vmbus_channel; static inline void hv_set_channel_read_state(hv_vmbus_channel* channel, boolean_t state) { channel->batched_reading = state; } typedef struct hv_device { hv_guid class_id; hv_guid device_id; device_t device; hv_vmbus_channel* channel; } hv_device; int hv_vmbus_channel_recv_packet( hv_vmbus_channel* channel, void* buffer, uint32_t buffer_len, uint32_t* buffer_actual_len, uint64_t* request_id); int hv_vmbus_channel_recv_packet_raw( hv_vmbus_channel* channel, void* buffer, uint32_t buffer_len, uint32_t* buffer_actual_len, uint64_t* request_id); int hv_vmbus_channel_open( hv_vmbus_channel* channel, uint32_t send_ring_buffer_size, uint32_t recv_ring_buffer_size, void* user_data, uint32_t user_data_len, hv_vmbus_pfn_channel_callback pfn_on_channel_callback, void* context); void hv_vmbus_channel_close(hv_vmbus_channel *channel); int hv_vmbus_channel_send_packet( hv_vmbus_channel* channel, void* buffer, uint32_t buffer_len, uint64_t request_id, hv_vmbus_packet_type type, uint32_t flags); int hv_vmbus_channel_send_packet_pagebuffer( hv_vmbus_channel* channel, hv_vmbus_page_buffer page_buffers[], uint32_t page_count, void* buffer, uint32_t buffer_len, uint64_t request_id); int hv_vmbus_channel_send_packet_multipagebuffer( hv_vmbus_channel* channel, hv_vmbus_multipage_buffer* multi_page_buffer, void* buffer, uint32_t buffer_len, uint64_t request_id); int hv_vmbus_channel_establish_gpadl( hv_vmbus_channel* channel, /* must be phys and virt contiguous */ void* contig_buffer, /* page-size multiple */ uint32_t size, uint32_t* gpadl_handle); int hv_vmbus_channel_teardown_gpdal( hv_vmbus_channel* channel, uint32_t gpadl_handle); struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary); /** * @brief Get physical address from virtual */ static inline unsigned long hv_get_phys_addr(void *virt) { unsigned long ret; ret = (vtophys(virt) | ((vm_offset_t) virt & PAGE_MASK)); return (ret); } extern uint32_t hv_vmbus_protocal_version; #endif /* __HYPERV_H__ */ Index: head/sys/dev/hyperv/netvsc/hv_net_vsc.c =================================================================== --- head/sys/dev/hyperv/netvsc/hv_net_vsc.c (revision 296082) +++ head/sys/dev/hyperv/netvsc/hv_net_vsc.c (revision 296083) @@ -1,1041 +1,1039 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /** * HyperV vmbus network VSC (virtual services client) module * */ #include #include #include #include #include #include #include #include #include #include #include "hv_net_vsc.h" #include "hv_rndis.h" #include "hv_rndis_filter.h" MALLOC_DEFINE(M_NETVSC, "netvsc", "Hyper-V netvsc driver"); /* * Forward declarations */ static void hv_nv_on_channel_callback(void *context); static int hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device); static int hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device); static int hv_nv_destroy_send_buffer(netvsc_dev *net_dev); static int hv_nv_destroy_rx_buffer(netvsc_dev *net_dev); static int hv_nv_connect_to_vsp(struct hv_device *device); static void hv_nv_on_send_completion(netvsc_dev *net_dev, struct hv_device *device, hv_vm_packet_descriptor *pkt); static void hv_nv_on_receive(netvsc_dev *net_dev, struct hv_device *device, hv_vm_packet_descriptor *pkt); /* * */ static inline netvsc_dev * hv_nv_alloc_net_device(struct hv_device *device) { netvsc_dev *net_dev; hn_softc_t *sc = device_get_softc(device->device); net_dev = malloc(sizeof(netvsc_dev), M_NETVSC, M_WAITOK | M_ZERO); net_dev->dev = device; net_dev->destroy = FALSE; sc->net_dev = net_dev; return (net_dev); } /* * */ static inline netvsc_dev * hv_nv_get_outbound_net_device(struct hv_device *device) { hn_softc_t *sc = device_get_softc(device->device); netvsc_dev *net_dev = sc->net_dev;; if ((net_dev != NULL) && net_dev->destroy) { return (NULL); } return (net_dev); } /* * */ static inline netvsc_dev * hv_nv_get_inbound_net_device(struct hv_device *device) { hn_softc_t *sc = device_get_softc(device->device); netvsc_dev *net_dev = sc->net_dev;; if (net_dev == NULL) { return (net_dev); } /* * When the device is being destroyed; we only * permit incoming packets if and only if there * are outstanding sends. */ if (net_dev->destroy && net_dev->num_outstanding_sends == 0) { return (NULL); } return (net_dev); } int hv_nv_get_next_send_section(netvsc_dev *net_dev) { unsigned long bitsmap_words = net_dev->bitsmap_words; unsigned long *bitsmap = net_dev->send_section_bitsmap; unsigned long idx; int ret = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; int i; for (i = 0; i < bitsmap_words; i++) { idx = ffsl(~bitsmap[i]); if (0 == idx) continue; idx--; KASSERT(i * BITS_PER_LONG + idx < net_dev->send_section_count, ("invalid i %d and idx %lu", i, idx)); if (atomic_testandset_long(&bitsmap[i], idx)) continue; ret = i * BITS_PER_LONG + idx; break; } return (ret); } /* * Net VSC initialize receive buffer with net VSP * * Net VSP: Network virtual services client, also known as the * Hyper-V extensible switch and the synthetic data path. */ static int hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device) { netvsc_dev *net_dev; nvsp_msg *init_pkt; int ret = 0; net_dev = hv_nv_get_outbound_net_device(device); if (!net_dev) { return (ENODEV); } net_dev->rx_buf = contigmalloc(net_dev->rx_buf_size, M_NETVSC, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); /* * Establish the GPADL handle for this buffer on this channel. * Note: This call uses the vmbus connection rather than the * channel to establish the gpadl handle. * GPADL: Guest physical address descriptor list. */ ret = hv_vmbus_channel_establish_gpadl( device->channel, net_dev->rx_buf, net_dev->rx_buf_size, &net_dev->rx_buf_gpadl_handle); if (ret != 0) { goto cleanup; } /* sema_wait(&ext->channel_init_sema); KYS CHECK */ /* Notify the NetVsp of the gpadl handle */ init_pkt = &net_dev->channel_init_packet; memset(init_pkt, 0, sizeof(nvsp_msg)); init_pkt->hdr.msg_type = nvsp_msg_1_type_send_rx_buf; init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle = net_dev->rx_buf_gpadl_handle; init_pkt->msgs.vers_1_msgs.send_rx_buf.id = NETVSC_RECEIVE_BUFFER_ID; /* Send the gpadl notification request */ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) { goto cleanup; } sema_wait(&net_dev->channel_init_sema); /* Check the response */ if (init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.status != nvsp_status_success) { ret = EINVAL; goto cleanup; } net_dev->rx_section_count = init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections; net_dev->rx_sections = malloc(net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section), M_NETVSC, M_WAITOK); memcpy(net_dev->rx_sections, init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections, net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section)); /* * For first release, there should only be 1 section that represents * the entire receive buffer */ if (net_dev->rx_section_count != 1 || net_dev->rx_sections->offset != 0) { ret = EINVAL; goto cleanup; } goto exit; cleanup: hv_nv_destroy_rx_buffer(net_dev); exit: return (ret); } /* * Net VSC initialize send buffer with net VSP */ static int hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device) { netvsc_dev *net_dev; nvsp_msg *init_pkt; int ret = 0; net_dev = hv_nv_get_outbound_net_device(device); if (!net_dev) { return (ENODEV); } net_dev->send_buf = contigmalloc(net_dev->send_buf_size, M_NETVSC, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); if (net_dev->send_buf == NULL) { ret = ENOMEM; goto cleanup; } /* * Establish the gpadl handle for this buffer on this channel. * Note: This call uses the vmbus connection rather than the * channel to establish the gpadl handle. */ ret = hv_vmbus_channel_establish_gpadl(device->channel, net_dev->send_buf, net_dev->send_buf_size, &net_dev->send_buf_gpadl_handle); if (ret != 0) { goto cleanup; } /* Notify the NetVsp of the gpadl handle */ init_pkt = &net_dev->channel_init_packet; memset(init_pkt, 0, sizeof(nvsp_msg)); init_pkt->hdr.msg_type = nvsp_msg_1_type_send_send_buf; init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle = net_dev->send_buf_gpadl_handle; init_pkt->msgs.vers_1_msgs.send_rx_buf.id = NETVSC_SEND_BUFFER_ID; /* Send the gpadl notification request */ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, sizeof(nvsp_msg), (uint64_t)init_pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) { goto cleanup; } sema_wait(&net_dev->channel_init_sema); /* Check the response */ if (init_pkt->msgs.vers_1_msgs.send_send_buf_complete.status != nvsp_status_success) { ret = EINVAL; goto cleanup; } net_dev->send_section_size = init_pkt->msgs.vers_1_msgs.send_send_buf_complete.section_size; net_dev->send_section_count = net_dev->send_buf_size / net_dev->send_section_size; net_dev->bitsmap_words = howmany(net_dev->send_section_count, BITS_PER_LONG); net_dev->send_section_bitsmap = malloc(net_dev->bitsmap_words * sizeof(long), M_NETVSC, M_WAITOK | M_ZERO); goto exit; cleanup: hv_nv_destroy_send_buffer(net_dev); exit: return (ret); } /* * Net VSC destroy receive buffer */ static int hv_nv_destroy_rx_buffer(netvsc_dev *net_dev) { nvsp_msg *revoke_pkt; int ret = 0; /* * If we got a section count, it means we received a * send_rx_buf_complete msg * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore, * we need to send a revoke msg here */ if (net_dev->rx_section_count) { /* Send the revoke receive buffer */ revoke_pkt = &net_dev->revoke_packet; memset(revoke_pkt, 0, sizeof(nvsp_msg)); revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_rx_buf; revoke_pkt->msgs.vers_1_msgs.revoke_rx_buf.id = NETVSC_RECEIVE_BUFFER_ID; ret = hv_vmbus_channel_send_packet(net_dev->dev->channel, revoke_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)revoke_pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); /* * If we failed here, we might as well return and have a leak * rather than continue and a bugchk */ if (ret != 0) { return (ret); } } /* Tear down the gpadl on the vsp end */ if (net_dev->rx_buf_gpadl_handle) { ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel, net_dev->rx_buf_gpadl_handle); /* * If we failed here, we might as well return and have a leak * rather than continue and a bugchk */ if (ret != 0) { return (ret); } net_dev->rx_buf_gpadl_handle = 0; } if (net_dev->rx_buf) { /* Free up the receive buffer */ contigfree(net_dev->rx_buf, net_dev->rx_buf_size, M_NETVSC); net_dev->rx_buf = NULL; } if (net_dev->rx_sections) { free(net_dev->rx_sections, M_NETVSC); net_dev->rx_sections = NULL; net_dev->rx_section_count = 0; } return (ret); } /* * Net VSC destroy send buffer */ static int hv_nv_destroy_send_buffer(netvsc_dev *net_dev) { nvsp_msg *revoke_pkt; int ret = 0; /* * If we got a section count, it means we received a * send_rx_buf_complete msg * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore, * we need to send a revoke msg here */ if (net_dev->send_section_size) { /* Send the revoke send buffer */ revoke_pkt = &net_dev->revoke_packet; memset(revoke_pkt, 0, sizeof(nvsp_msg)); revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_send_buf; revoke_pkt->msgs.vers_1_msgs.revoke_send_buf.id = NETVSC_SEND_BUFFER_ID; ret = hv_vmbus_channel_send_packet(net_dev->dev->channel, revoke_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)revoke_pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); /* * If we failed here, we might as well return and have a leak * rather than continue and a bugchk */ if (ret != 0) { return (ret); } } /* Tear down the gpadl on the vsp end */ if (net_dev->send_buf_gpadl_handle) { ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel, net_dev->send_buf_gpadl_handle); /* * If we failed here, we might as well return and have a leak * rather than continue and a bugchk */ if (ret != 0) { return (ret); } net_dev->send_buf_gpadl_handle = 0; } if (net_dev->send_buf) { /* Free up the receive buffer */ contigfree(net_dev->send_buf, net_dev->send_buf_size, M_NETVSC); net_dev->send_buf = NULL; } if (net_dev->send_section_bitsmap) { free(net_dev->send_section_bitsmap, M_NETVSC); } return (ret); } /* * Attempt to negotiate the caller-specified NVSP version * * For NVSP v2, Server 2008 R2 does not set * init_pkt->msgs.init_msgs.init_compl.negotiated_prot_vers * to the negotiated version, so we cannot rely on that. */ static int hv_nv_negotiate_nvsp_protocol(struct hv_device *device, netvsc_dev *net_dev, uint32_t nvsp_ver) { nvsp_msg *init_pkt; int ret; init_pkt = &net_dev->channel_init_packet; memset(init_pkt, 0, sizeof(nvsp_msg)); init_pkt->hdr.msg_type = nvsp_msg_type_init; /* * Specify parameter as the only acceptable protocol version */ init_pkt->msgs.init_msgs.init.p1.protocol_version = nvsp_ver; init_pkt->msgs.init_msgs.init.protocol_version_2 = nvsp_ver; /* Send the init request */ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) return (-1); sema_wait(&net_dev->channel_init_sema); if (init_pkt->msgs.init_msgs.init_compl.status != nvsp_status_success) return (EINVAL); return (0); } /* * Send NDIS version 2 config packet containing MTU. * * Not valid for NDIS version 1. */ static int hv_nv_send_ndis_config(struct hv_device *device, uint32_t mtu) { netvsc_dev *net_dev; nvsp_msg *init_pkt; int ret; net_dev = hv_nv_get_outbound_net_device(device); if (!net_dev) return (-ENODEV); /* * Set up configuration packet, write MTU * Indicate we are capable of handling VLAN tags */ init_pkt = &net_dev->channel_init_packet; memset(init_pkt, 0, sizeof(nvsp_msg)); init_pkt->hdr.msg_type = nvsp_msg_2_type_send_ndis_config; init_pkt->msgs.vers_2_msgs.send_ndis_config.mtu = mtu; init_pkt-> msgs.vers_2_msgs.send_ndis_config.capabilities.u1.u2.ieee8021q = 1; /* Send the configuration packet */ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); if (ret != 0) return (-EINVAL); return (0); } /* * Net VSC connect to VSP */ static int hv_nv_connect_to_vsp(struct hv_device *device) { netvsc_dev *net_dev; nvsp_msg *init_pkt; uint32_t ndis_version; uint32_t protocol_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 }; int i; int protocol_number = nitems(protocol_list); int ret = 0; device_t dev = device->device; hn_softc_t *sc = device_get_softc(dev); struct ifnet *ifp = sc->hn_ifp; net_dev = hv_nv_get_outbound_net_device(device); if (!net_dev) { return (ENODEV); } /* * Negotiate the NVSP version. Try the latest NVSP first. */ for (i = protocol_number - 1; i >= 0; i--) { if (hv_nv_negotiate_nvsp_protocol(device, net_dev, protocol_list[i]) == 0) { net_dev->nvsp_version = protocol_list[i]; if (bootverbose) device_printf(dev, "Netvsc: got version 0x%x\n", net_dev->nvsp_version); break; } } if (i < 0) { if (bootverbose) device_printf(dev, "failed to negotiate a valid " "protocol.\n"); return (EPROTO); } /* * Set the MTU if supported by this NVSP protocol version * This needs to be right after the NVSP init message per Haiyang */ if (net_dev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) ret = hv_nv_send_ndis_config(device, ifp->if_mtu); /* * Send the NDIS version */ init_pkt = &net_dev->channel_init_packet; memset(init_pkt, 0, sizeof(nvsp_msg)); if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_4) { ndis_version = NDIS_VERSION_6_1; } else { ndis_version = NDIS_VERSION_6_30; } init_pkt->hdr.msg_type = nvsp_msg_1_type_send_ndis_vers; init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_major_vers = (ndis_version & 0xFFFF0000) >> 16; init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_minor_vers = ndis_version & 0xFFFF; /* Send the init request */ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); if (ret != 0) { goto cleanup; } /* * TODO: BUGBUG - We have to wait for the above msg since the netvsp * uses KMCL which acknowledges packet (completion packet) * since our Vmbus always set the * HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED flag */ /* sema_wait(&NetVscChannel->channel_init_sema); */ /* Post the big receive buffer to NetVSP */ if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_2) net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; else net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE; ret = hv_nv_init_rx_buffer_with_net_vsp(device); if (ret == 0) ret = hv_nv_init_send_buffer_with_net_vsp(device); cleanup: return (ret); } /* * Net VSC disconnect from VSP */ static void hv_nv_disconnect_from_vsp(netvsc_dev *net_dev) { hv_nv_destroy_rx_buffer(net_dev); hv_nv_destroy_send_buffer(net_dev); } /* * Net VSC on device add * * Callback when the device belonging to this driver is added */ netvsc_dev * hv_nv_on_device_add(struct hv_device *device, void *additional_info) { netvsc_dev *net_dev; int ret = 0; net_dev = hv_nv_alloc_net_device(device); if (!net_dev) goto cleanup; /* Initialize the NetVSC channel extension */ sema_init(&net_dev->channel_init_sema, 0, "netdev_sema"); /* * Open the channel */ ret = hv_vmbus_channel_open(device->channel, NETVSC_DEVICE_RING_BUFFER_SIZE, NETVSC_DEVICE_RING_BUFFER_SIZE, NULL, 0, hv_nv_on_channel_callback, device); if (ret != 0) goto cleanup; /* * Connect with the NetVsp */ ret = hv_nv_connect_to_vsp(device); if (ret != 0) goto close; return (net_dev); close: /* Now, we can close the channel safely */ hv_vmbus_channel_close(device->channel); cleanup: /* * Free the packet buffers on the netvsc device packet queue. * Release other resources. */ if (net_dev) { sema_destroy(&net_dev->channel_init_sema); free(net_dev, M_NETVSC); } return (NULL); } /* * Net VSC on device remove */ int hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel) { hn_softc_t *sc = device_get_softc(device->device); netvsc_dev *net_dev = sc->net_dev;; /* Stop outbound traffic ie sends and receives completions */ - mtx_lock(&device->channel->inbound_lock); net_dev->destroy = TRUE; - mtx_unlock(&device->channel->inbound_lock); /* Wait for all send completions */ while (net_dev->num_outstanding_sends) { DELAY(100); } hv_nv_disconnect_from_vsp(net_dev); /* At this point, no one should be accessing net_dev except in here */ /* Now, we can close the channel safely */ if (!destroy_channel) { device->channel->state = HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE; } hv_vmbus_channel_close(device->channel); sema_destroy(&net_dev->channel_init_sema); free(net_dev, M_NETVSC); return (0); } /* * Net VSC on send completion */ static void hv_nv_on_send_completion(netvsc_dev *net_dev, struct hv_device *device, hv_vm_packet_descriptor *pkt) { nvsp_msg *nvsp_msg_pkt; netvsc_packet *net_vsc_pkt; nvsp_msg_pkt = (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3)); if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_type_init_complete || nvsp_msg_pkt->hdr.msg_type == nvsp_msg_1_type_send_rx_buf_complete || nvsp_msg_pkt->hdr.msg_type == nvsp_msg_1_type_send_send_buf_complete) { /* Copy the response back */ memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt, sizeof(nvsp_msg)); sema_post(&net_dev->channel_init_sema); } else if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_1_type_send_rndis_pkt_complete) { /* Get the send context */ net_vsc_pkt = (netvsc_packet *)(unsigned long)pkt->transaction_id; if (NULL != net_vsc_pkt) { if (net_vsc_pkt->send_buf_section_idx != NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { u_long mask; int idx; idx = net_vsc_pkt->send_buf_section_idx / BITS_PER_LONG; KASSERT(idx < net_dev->bitsmap_words, ("invalid section index %u", net_vsc_pkt->send_buf_section_idx)); mask = 1UL << (net_vsc_pkt->send_buf_section_idx % BITS_PER_LONG); KASSERT(net_dev->send_section_bitsmap[idx] & mask, ("index bitmap 0x%lx, section index %u, " "bitmap idx %d, bitmask 0x%lx", net_dev->send_section_bitsmap[idx], net_vsc_pkt->send_buf_section_idx, idx, mask)); atomic_clear_long( &net_dev->send_section_bitsmap[idx], mask); } /* Notify the layer above us */ net_vsc_pkt->compl.send.on_send_completion( net_vsc_pkt->compl.send.send_completion_context); } atomic_subtract_int(&net_dev->num_outstanding_sends, 1); } } /* * Net VSC on send * Sends a packet on the specified Hyper-V device. * Returns 0 on success, non-zero on failure. */ int hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt) { netvsc_dev *net_dev; nvsp_msg send_msg; int ret; net_dev = hv_nv_get_outbound_net_device(device); if (!net_dev) return (ENODEV); send_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt; if (pkt->is_data_pkt) { /* 0 is RMC_DATA */ send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 0; } else { /* 1 is RMC_CONTROL */ send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 1; } send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_idx = pkt->send_buf_section_idx; send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size = pkt->send_buf_section_size; if (pkt->page_buf_count) { ret = hv_vmbus_channel_send_packet_pagebuffer(device->channel, pkt->page_buffers, pkt->page_buf_count, &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt); } else { ret = hv_vmbus_channel_send_packet(device->channel, &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); } /* Record outstanding send only if send_packet() succeeded */ if (ret == 0) atomic_add_int(&net_dev->num_outstanding_sends, 1); return (ret); } /* * Net VSC on receive * * In the FreeBSD Hyper-V virtual world, this function deals exclusively * with virtual addresses. */ static void hv_nv_on_receive(netvsc_dev *net_dev, struct hv_device *device, hv_vm_packet_descriptor *pkt) { hv_vm_transfer_page_packet_header *vm_xfer_page_pkt; nvsp_msg *nvsp_msg_pkt; netvsc_packet vsc_pkt; netvsc_packet *net_vsc_pkt = &vsc_pkt; device_t dev = device->device; int count = 0; int i = 0; int status = nvsp_status_success; /* * All inbound packets other than send completion should be * xfer page packet. */ if (pkt->type != HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES) { device_printf(dev, "packet type %d is invalid!\n", pkt->type); return; } nvsp_msg_pkt = (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3)); /* Make sure this is a valid nvsp packet */ if (nvsp_msg_pkt->hdr.msg_type != nvsp_msg_1_type_send_rndis_pkt) { device_printf(dev, "packet hdr type %d is invalid!\n", pkt->type); return; } vm_xfer_page_pkt = (hv_vm_transfer_page_packet_header *)pkt; if (vm_xfer_page_pkt->transfer_page_set_id != NETVSC_RECEIVE_BUFFER_ID) { device_printf(dev, "transfer_page_set_id %d is invalid!\n", vm_xfer_page_pkt->transfer_page_set_id); return; } count = vm_xfer_page_pkt->range_count; net_vsc_pkt->device = device; /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ for (i = 0; i < count; i++) { net_vsc_pkt->status = nvsp_status_success; net_vsc_pkt->data = (void *)((unsigned long)net_dev->rx_buf + vm_xfer_page_pkt->ranges[i].byte_offset); net_vsc_pkt->tot_data_buf_len = vm_xfer_page_pkt->ranges[i].byte_count; hv_rf_on_receive(net_dev, device, net_vsc_pkt); if (net_vsc_pkt->status != nvsp_status_success) { status = nvsp_status_failure; } } /* * Moved completion call back here so that all received * messages (not just data messages) will trigger a response * message back to the host. */ hv_nv_on_receive_completion(device, vm_xfer_page_pkt->d.transaction_id, status); hv_rf_receive_rollup(net_dev); } /* * Net VSC on receive completion * * Send a receive completion packet to RNDIS device (ie NetVsp) */ void hv_nv_on_receive_completion(struct hv_device *device, uint64_t tid, uint32_t status) { nvsp_msg rx_comp_msg; int retries = 0; int ret = 0; rx_comp_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt_complete; /* Pass in the status */ rx_comp_msg.msgs.vers_1_msgs.send_rndis_pkt_complete.status = status; retry_send_cmplt: /* Send the completion */ ret = hv_vmbus_channel_send_packet(device->channel, &rx_comp_msg, sizeof(nvsp_msg), tid, HV_VMBUS_PACKET_TYPE_COMPLETION, 0); if (ret == 0) { /* success */ /* no-op */ } else if (ret == EAGAIN) { /* no more room... wait a bit and attempt to retry 3 times */ retries++; if (retries < 4) { DELAY(100); goto retry_send_cmplt; } } } /* * Net VSC on channel callback */ static void hv_nv_on_channel_callback(void *context) { struct hv_device *device = (struct hv_device *)context; netvsc_dev *net_dev; device_t dev = device->device; uint32_t bytes_rxed; uint64_t request_id; hv_vm_packet_descriptor *desc; uint8_t *buffer; int bufferlen = NETVSC_PACKET_SIZE; int ret = 0; net_dev = hv_nv_get_inbound_net_device(device); if (net_dev == NULL) return; buffer = net_dev->callback_buf; do { ret = hv_vmbus_channel_recv_packet_raw(device->channel, buffer, bufferlen, &bytes_rxed, &request_id); if (ret == 0) { if (bytes_rxed > 0) { desc = (hv_vm_packet_descriptor *)buffer; switch (desc->type) { case HV_VMBUS_PACKET_TYPE_COMPLETION: hv_nv_on_send_completion(net_dev, device, desc); break; case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES: hv_nv_on_receive(net_dev, device, desc); break; default: device_printf(dev, "hv_cb recv unknow type %d " " packet\n", desc->type); break; } } else { break; } } else if (ret == ENOBUFS) { /* Handle large packet */ if (bufferlen > NETVSC_PACKET_SIZE) { free(buffer, M_NETVSC); buffer = NULL; } /* alloc new buffer */ buffer = malloc(bytes_rxed, M_NETVSC, M_NOWAIT); if (buffer == NULL) { device_printf(dev, "hv_cb malloc buffer failed, len=%u\n", bytes_rxed); bufferlen = 0; break; } bufferlen = bytes_rxed; } } while (1); if (bufferlen > NETVSC_PACKET_SIZE) free(buffer, M_NETVSC); hv_rf_channel_rollup(net_dev); } Index: head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c =================================================================== --- head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c (revision 296082) +++ head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c (revision 296083) @@ -1,2144 +1,2142 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * StorVSC driver for Hyper-V. This driver presents a SCSI HBA interface * to the Comman Access Method (CAM) layer. CAM control blocks (CCBs) are * converted into VSCSI protocol messages which are delivered to the parent * partition StorVSP driver over the Hyper-V VMBUS. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hv_vstorage.h" #define STORVSC_RINGBUFFER_SIZE (20*PAGE_SIZE) #define STORVSC_MAX_LUNS_PER_TARGET (64) #define STORVSC_MAX_IO_REQUESTS (STORVSC_MAX_LUNS_PER_TARGET * 2) #define BLKVSC_MAX_IDE_DISKS_PER_TARGET (1) #define BLKVSC_MAX_IO_REQUESTS STORVSC_MAX_IO_REQUESTS #define STORVSC_MAX_TARGETS (2) #define STORVSC_WIN7_MAJOR 4 #define STORVSC_WIN7_MINOR 2 #define STORVSC_WIN8_MAJOR 5 #define STORVSC_WIN8_MINOR 1 #define VSTOR_PKT_SIZE (sizeof(struct vstor_packet) - vmscsi_size_delta) #define HV_ALIGN(x, a) roundup2(x, a) struct storvsc_softc; struct hv_sgl_node { LIST_ENTRY(hv_sgl_node) link; struct sglist *sgl_data; }; struct hv_sgl_page_pool{ LIST_HEAD(, hv_sgl_node) in_use_sgl_list; LIST_HEAD(, hv_sgl_node) free_sgl_list; boolean_t is_init; } g_hv_sgl_page_pool; #define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * HV_MAX_MULTIPAGE_BUFFER_COUNT enum storvsc_request_type { WRITE_TYPE, READ_TYPE, UNKNOWN_TYPE }; struct hv_storvsc_request { LIST_ENTRY(hv_storvsc_request) link; struct vstor_packet vstor_packet; hv_vmbus_multipage_buffer data_buf; void *sense_data; uint8_t sense_info_len; uint8_t retries; union ccb *ccb; struct storvsc_softc *softc; struct callout callout; struct sema synch_sema; /*Synchronize the request/response if needed */ struct sglist *bounce_sgl; unsigned int bounce_sgl_count; uint64_t not_aligned_seg_bits; }; struct storvsc_softc { struct hv_device *hs_dev; LIST_HEAD(, hv_storvsc_request) hs_free_list; struct mtx hs_lock; struct storvsc_driver_props *hs_drv_props; int hs_unit; uint32_t hs_frozen; struct cam_sim *hs_sim; struct cam_path *hs_path; uint32_t hs_num_out_reqs; boolean_t hs_destroy; boolean_t hs_drain_notify; boolean_t hs_open_multi_channel; struct sema hs_drain_sema; struct hv_storvsc_request hs_init_req; struct hv_storvsc_request hs_reset_req; }; /** * HyperV storvsc timeout testing cases: * a. IO returned after first timeout; * b. IO returned after second timeout and queue freeze; * c. IO returned while timer handler is running * The first can be tested by "sg_senddiag -vv /dev/daX", * and the second and third can be done by * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX". */ #define HVS_TIMEOUT_TEST 0 /* * Bus/adapter reset functionality on the Hyper-V host is * buggy and it will be disabled until * it can be further tested. */ #define HVS_HOST_RESET 0 struct storvsc_driver_props { char *drv_name; char *drv_desc; uint8_t drv_max_luns_per_target; uint8_t drv_max_ios_per_target; uint32_t drv_ringbuffer_size; }; enum hv_storage_type { DRIVER_BLKVSC, DRIVER_STORVSC, DRIVER_UNKNOWN }; #define HS_MAX_ADAPTERS 10 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */ static const hv_guid gStorVscDeviceType={ .data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f} }; /* {32412632-86cb-44a2-9b5c-50d1417354f5} */ static const hv_guid gBlkVscDeviceType={ .data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5} }; static struct storvsc_driver_props g_drv_props_table[] = { {"blkvsc", "Hyper-V IDE Storage Interface", BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS, STORVSC_RINGBUFFER_SIZE}, {"storvsc", "Hyper-V SCSI Storage Interface", STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS, STORVSC_RINGBUFFER_SIZE} }; /* * Sense buffer size changed in win8; have a run-time * variable to track the size we should use. */ static int sense_buffer_size; /* * The size of the vmscsi_request has changed in win8. The * additional size is for the newly added elements in the * structure. These elements are valid only when we are talking * to a win8 host. * Track the correct size we need to apply. */ static int vmscsi_size_delta; static int storvsc_current_major; static int storvsc_current_minor; /* static functions */ static int storvsc_probe(device_t dev); static int storvsc_attach(device_t dev); static int storvsc_detach(device_t dev); static void storvsc_poll(struct cam_sim * sim); static void storvsc_action(struct cam_sim * sim, union ccb * ccb); static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp); static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp); static enum hv_storage_type storvsc_get_storage_type(device_t dev); static void hv_storvsc_rescan_target(struct storvsc_softc *sc); static void hv_storvsc_on_channel_callback(void *context); static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc, struct vstor_packet *vstor_packet, struct hv_storvsc_request *request); static int hv_storvsc_connect_vsp(struct hv_device *device); static void storvsc_io_done(struct hv_storvsc_request *reqp); static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl, bus_dma_segment_t *orig_sgl, unsigned int orig_sgl_count, uint64_t seg_bits); void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl, unsigned int dest_sgl_count, struct sglist* src_sgl, uint64_t seg_bits); static device_method_t storvsc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, storvsc_probe), DEVMETHOD(device_attach, storvsc_attach), DEVMETHOD(device_detach, storvsc_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD_END }; static driver_t storvsc_driver = { "storvsc", storvsc_methods, sizeof(struct storvsc_softc), }; static devclass_t storvsc_devclass; DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0); MODULE_VERSION(storvsc, 1); MODULE_DEPEND(storvsc, vmbus, 1, 1, 1); /** * The host is capable of sending messages to us that are * completely unsolicited. So, we need to address the race * condition where we may be in the process of unloading the * driver when the host may send us an unsolicited message. * We address this issue by implementing a sequentially * consistent protocol: * * 1. Channel callback is invoked while holding the the channel lock * and an unloading driver will reset the channel callback under * the protection of this channel lock. * * 2. To ensure bounded wait time for unloading a driver, we don't * permit outgoing traffic once the device is marked as being * destroyed. * * 3. Once the device is marked as being destroyed, we only * permit incoming traffic to properly account for * packets already sent out. */ static inline struct storvsc_softc * get_stor_device(struct hv_device *device, boolean_t outbound) { struct storvsc_softc *sc; sc = device_get_softc(device->device); if (sc == NULL) { return NULL; } if (outbound) { /* * Here we permit outgoing I/O only * if the device is not being destroyed. */ if (sc->hs_destroy) { sc = NULL; } } else { /* * inbound case; if being destroyed * only permit to account for * messages already sent out. */ if (sc->hs_destroy && (sc->hs_num_out_reqs == 0)) { sc = NULL; } } return sc; } /** * @brief Callback handler, will be invoked when receive mutil-channel offer * * @param context new multi-channel */ static void storvsc_handle_sc_creation(void *context) { hv_vmbus_channel *new_channel; struct hv_device *device; struct storvsc_softc *sc; struct vmstor_chan_props props; int ret = 0; new_channel = (hv_vmbus_channel *)context; device = new_channel->primary_channel->device; sc = get_stor_device(device, TRUE); if (sc == NULL) return; if (FALSE == sc->hs_open_multi_channel) return; memset(&props, 0, sizeof(props)); ret = hv_vmbus_channel_open(new_channel, sc->hs_drv_props->drv_ringbuffer_size, sc->hs_drv_props->drv_ringbuffer_size, (void *)&props, sizeof(struct vmstor_chan_props), hv_storvsc_on_channel_callback, new_channel); return; } /** * @brief Send multi-channel creation request to host * * @param device a Hyper-V device pointer * @param max_chans the max channels supported by vmbus */ static void storvsc_send_multichannel_request(struct hv_device *dev, int max_chans) { struct storvsc_softc *sc; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; int request_channels_cnt = 0; int ret; /* get multichannels count that need to create */ request_channels_cnt = MIN(max_chans, mp_ncpus); sc = get_stor_device(dev, TRUE); if (sc == NULL) { printf("Storvsc_error: get sc failed while send mutilchannel " "request\n"); return; } request = &sc->hs_init_req; /* Establish a handler for multi-channel */ dev->channel->sc_creation_callback = storvsc_handle_sc_creation; /* request the host to create multi-channel */ memset(request, 0, sizeof(struct hv_storvsc_request)); sema_init(&request->synch_sema, 0, ("stor_synch_sema")); vstor_packet = &request->vstor_packet; vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS; vstor_packet->flags = REQUEST_COMPLETION_FLAG; vstor_packet->u.multi_channels_cnt = request_channels_cnt; ret = hv_vmbus_channel_send_packet( dev->channel, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); /* wait for 5 seconds */ ret = sema_timedwait(&request->synch_sema, 5 * hz); if (ret != 0) { printf("Storvsc_error: create multi-channel timeout, %d\n", ret); return; } if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) { printf("Storvsc_error: create multi-channel invalid operation " "(%d) or statue (%u)\n", vstor_packet->operation, vstor_packet->status); return; } sc->hs_open_multi_channel = TRUE; if (bootverbose) printf("Storvsc create multi-channel success!\n"); } /** * @brief initialize channel connection to parent partition * * @param dev a Hyper-V device pointer * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_channel_init(struct hv_device *dev) { int ret = 0; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; struct storvsc_softc *sc; uint16_t max_chans = 0; boolean_t support_multichannel = FALSE; max_chans = 0; support_multichannel = FALSE; sc = get_stor_device(dev, TRUE); if (sc == NULL) return (ENODEV); request = &sc->hs_init_req; memset(request, 0, sizeof(struct hv_storvsc_request)); vstor_packet = &request->vstor_packet; request->softc = sc; /** * Initiate the vsc/vsp initialization protocol on the open channel */ sema_init(&request->synch_sema, 0, ("stor_synch_sema")); vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = hv_vmbus_channel_send_packet( dev->channel, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) goto cleanup; /* wait 5 seconds */ ret = sema_timedwait(&request->synch_sema, 5 * hz); if (ret != 0) goto cleanup; if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) { goto cleanup; } /* reuse the packet for version range supported */ memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; vstor_packet->u.version.major_minor = VMSTOR_PROTOCOL_VERSION(storvsc_current_major, storvsc_current_minor); /* revision is only significant for Windows guests */ vstor_packet->u.version.revision = 0; ret = hv_vmbus_channel_send_packet( dev->channel, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) goto cleanup; /* wait 5 seconds */ ret = sema_timedwait(&request->synch_sema, 5 * hz); if (ret) goto cleanup; /* TODO: Check returned version */ if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) goto cleanup; /** * Query channel properties */ memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = hv_vmbus_channel_send_packet( dev->channel, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if ( ret != 0) goto cleanup; /* wait 5 seconds */ ret = sema_timedwait(&request->synch_sema, 5 * hz); if (ret != 0) goto cleanup; /* TODO: Check returned version */ if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) { goto cleanup; } /* multi-channels feature is supported by WIN8 and above version */ max_chans = vstor_packet->u.chan_props.max_channel_cnt; if ((hv_vmbus_protocal_version != HV_VMBUS_VERSION_WIN7) && (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) && (vstor_packet->u.chan_props.flags & HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) { support_multichannel = TRUE; } memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = hv_vmbus_channel_send_packet( dev->channel, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) { goto cleanup; } /* wait 5 seconds */ ret = sema_timedwait(&request->synch_sema, 5 * hz); if (ret != 0) goto cleanup; if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) goto cleanup; /* * If multi-channel is supported, send multichannel create * request to host. */ if (support_multichannel) storvsc_send_multichannel_request(dev, max_chans); cleanup: sema_destroy(&request->synch_sema); return (ret); } /** * @brief Open channel connection to paraent partition StorVSP driver * * Open and initialize channel connection to parent partition StorVSP driver. * * @param pointer to a Hyper-V device * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_connect_vsp(struct hv_device *dev) { int ret = 0; struct vmstor_chan_props props; struct storvsc_softc *sc; sc = device_get_softc(dev->device); memset(&props, 0, sizeof(struct vmstor_chan_props)); /* * Open the channel */ ret = hv_vmbus_channel_open( dev->channel, sc->hs_drv_props->drv_ringbuffer_size, sc->hs_drv_props->drv_ringbuffer_size, (void *)&props, sizeof(struct vmstor_chan_props), hv_storvsc_on_channel_callback, dev->channel); if (ret != 0) { return ret; } ret = hv_storvsc_channel_init(dev); return (ret); } #if HVS_HOST_RESET static int hv_storvsc_host_reset(struct hv_device *dev) { int ret = 0; struct storvsc_softc *sc; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; sc = get_stor_device(dev, TRUE); if (sc == NULL) { return ENODEV; } request = &sc->hs_reset_req; request->softc = sc; vstor_packet = &request->vstor_packet; sema_init(&request->synch_sema, 0, "stor synch sema"); vstor_packet->operation = VSTOR_OPERATION_RESETBUS; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = hv_vmbus_channel_send_packet(dev->channel, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)&sc->hs_reset_req, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) { goto cleanup; } ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */ if (ret) { goto cleanup; } /* * At this point, all outstanding requests in the adapter * should have been flushed out and return to us */ cleanup: sema_destroy(&request->synch_sema); return (ret); } #endif /* HVS_HOST_RESET */ /** * @brief Function to initiate an I/O request * * @param device Hyper-V device pointer * @param request pointer to a request structure * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_io_request(struct hv_device *device, struct hv_storvsc_request *request) { struct storvsc_softc *sc; struct vstor_packet *vstor_packet = &request->vstor_packet; struct hv_vmbus_channel* outgoing_channel = NULL; int ret = 0; sc = get_stor_device(device, TRUE); if (sc == NULL) { return ENODEV; } vstor_packet->flags |= REQUEST_COMPLETION_FLAG; vstor_packet->u.vm_srb.length = VSTOR_PKT_SIZE; vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size; vstor_packet->u.vm_srb.transfer_len = request->data_buf.length; vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB; outgoing_channel = vmbus_select_outgoing_channel(device->channel); mtx_unlock(&request->softc->hs_lock); if (request->data_buf.length) { ret = hv_vmbus_channel_send_packet_multipagebuffer( outgoing_channel, &request->data_buf, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); } else { ret = hv_vmbus_channel_send_packet( outgoing_channel, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); } mtx_lock(&request->softc->hs_lock); if (ret != 0) { printf("Unable to send packet %p ret %d", vstor_packet, ret); } else { atomic_add_int(&sc->hs_num_out_reqs, 1); } return (ret); } /** * Process IO_COMPLETION_OPERATION and ready * the result to be completed for upper layer * processing by the CAM layer. */ static void hv_storvsc_on_iocompletion(struct storvsc_softc *sc, struct vstor_packet *vstor_packet, struct hv_storvsc_request *request) { struct vmscsi_req *vm_srb; vm_srb = &vstor_packet->u.vm_srb; if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) && (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) { /* Autosense data available */ KASSERT(vm_srb->sense_info_len <= request->sense_info_len, ("vm_srb->sense_info_len <= " "request->sense_info_len")); memcpy(request->sense_data, vm_srb->u.sense_data, vm_srb->sense_info_len); request->sense_info_len = vm_srb->sense_info_len; } /* Complete request by passing to the CAM layer */ storvsc_io_done(request); atomic_subtract_int(&sc->hs_num_out_reqs, 1); if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) { sema_post(&sc->hs_drain_sema); } } static void hv_storvsc_rescan_target(struct storvsc_softc *sc) { path_id_t pathid; target_id_t targetid; union ccb *ccb; pathid = cam_sim_path(sc->hs_sim); targetid = CAM_TARGET_WILDCARD; /* * Allocate a CCB and schedule a rescan. */ ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { printf("unable to alloc CCB for rescan\n"); return; } if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { printf("unable to create path for rescan, pathid: %u," "targetid: %u\n", pathid, targetid); xpt_free_ccb(ccb); return; } if (targetid == CAM_TARGET_WILDCARD) ccb->ccb_h.func_code = XPT_SCAN_BUS; else ccb->ccb_h.func_code = XPT_SCAN_TGT; xpt_rescan(ccb); } static void hv_storvsc_on_channel_callback(void *context) { int ret = 0; hv_vmbus_channel *channel = (hv_vmbus_channel *)context; struct hv_device *device = NULL; struct storvsc_softc *sc; uint32_t bytes_recvd; uint64_t request_id; uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)]; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; if (channel->primary_channel != NULL){ device = channel->primary_channel->device; } else { device = channel->device; } KASSERT(device, ("device is NULL")); sc = get_stor_device(device, FALSE); if (sc == NULL) { printf("Storvsc_error: get stor device failed.\n"); return; } ret = hv_vmbus_channel_recv_packet( channel, packet, roundup2(VSTOR_PKT_SIZE, 8), &bytes_recvd, &request_id); while ((ret == 0) && (bytes_recvd > 0)) { request = (struct hv_storvsc_request *)(uintptr_t)request_id; if ((request == &sc->hs_init_req) || (request == &sc->hs_reset_req)) { memcpy(&request->vstor_packet, packet, sizeof(struct vstor_packet)); sema_post(&request->synch_sema); } else { vstor_packet = (struct vstor_packet *)packet; switch(vstor_packet->operation) { case VSTOR_OPERATION_COMPLETEIO: if (request == NULL) panic("VMBUS: storvsc received a " "packet with NULL request id in " "COMPLETEIO operation."); hv_storvsc_on_iocompletion(sc, vstor_packet, request); break; case VSTOR_OPERATION_REMOVEDEVICE: printf("VMBUS: storvsc operation %d not " "implemented.\n", vstor_packet->operation); /* TODO: implement */ break; case VSTOR_OPERATION_ENUMERATE_BUS: hv_storvsc_rescan_target(sc); break; default: break; } } ret = hv_vmbus_channel_recv_packet( channel, packet, roundup2(VSTOR_PKT_SIZE, 8), &bytes_recvd, &request_id); } } /** * @brief StorVSC probe function * * Device probe function. Returns 0 if the input device is a StorVSC * device. Otherwise, a ENXIO is returned. If the input device is * for BlkVSC (paravirtual IDE) device and this support is disabled in * favor of the emulated ATA/IDE device, return ENXIO. * * @param a device * @returns 0 on success, ENXIO if not a matcing StorVSC device */ static int storvsc_probe(device_t dev) { int ata_disk_enable = 0; int ret = ENXIO; if (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008 || hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) { sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE; vmscsi_size_delta = sizeof(struct vmscsi_win8_extension); storvsc_current_major = STORVSC_WIN7_MAJOR; storvsc_current_minor = STORVSC_WIN7_MINOR; } else { sense_buffer_size = POST_WIN7_STORVSC_SENSE_BUFFER_SIZE; vmscsi_size_delta = 0; storvsc_current_major = STORVSC_WIN8_MAJOR; storvsc_current_minor = STORVSC_WIN8_MINOR; } switch (storvsc_get_storage_type(dev)) { case DRIVER_BLKVSC: if(bootverbose) device_printf(dev, "DRIVER_BLKVSC-Emulated ATA/IDE probe\n"); if (!getenv_int("hw.ata.disk_enable", &ata_disk_enable)) { if(bootverbose) device_printf(dev, "Enlightened ATA/IDE detected\n"); ret = BUS_PROBE_DEFAULT; } else if(bootverbose) device_printf(dev, "Emulated ATA/IDE set (hw.ata.disk_enable set)\n"); break; case DRIVER_STORVSC: if(bootverbose) device_printf(dev, "Enlightened SCSI device detected\n"); ret = BUS_PROBE_DEFAULT; break; default: ret = ENXIO; } return (ret); } /** * @brief StorVSC attach function * * Function responsible for allocating per-device structures, * setting up CAM interfaces and scanning for available LUNs to * be used for SCSI device peripherals. * * @param a device * @returns 0 on success or an error on failure */ static int storvsc_attach(device_t dev) { struct hv_device *hv_dev = vmbus_get_devctx(dev); enum hv_storage_type stor_type; struct storvsc_softc *sc; struct cam_devq *devq; int ret, i, j; struct hv_storvsc_request *reqp; struct root_hold_token *root_mount_token = NULL; struct hv_sgl_node *sgl_node = NULL; void *tmp_buff = NULL; /* * We need to serialize storvsc attach calls. */ root_mount_token = root_mount_hold("storvsc"); sc = device_get_softc(dev); if (sc == NULL) { ret = ENOMEM; goto cleanup; } stor_type = storvsc_get_storage_type(dev); if (stor_type == DRIVER_UNKNOWN) { ret = ENODEV; goto cleanup; } bzero(sc, sizeof(struct storvsc_softc)); /* fill in driver specific properties */ sc->hs_drv_props = &g_drv_props_table[stor_type]; /* fill in device specific properties */ sc->hs_unit = device_get_unit(dev); sc->hs_dev = hv_dev; device_set_desc(dev, g_drv_props_table[stor_type].drv_desc); LIST_INIT(&sc->hs_free_list); mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF); for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) { reqp = malloc(sizeof(struct hv_storvsc_request), M_DEVBUF, M_WAITOK|M_ZERO); reqp->softc = sc; LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link); } /* create sg-list page pool */ if (FALSE == g_hv_sgl_page_pool.is_init) { g_hv_sgl_page_pool.is_init = TRUE; LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list); LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list); /* * Pre-create SG list, each SG list with * HV_MAX_MULTIPAGE_BUFFER_COUNT segments, each * segment has one page buffer */ for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) { sgl_node = malloc(sizeof(struct hv_sgl_node), M_DEVBUF, M_WAITOK|M_ZERO); sgl_node->sgl_data = sglist_alloc(HV_MAX_MULTIPAGE_BUFFER_COUNT, M_WAITOK|M_ZERO); for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) { tmp_buff = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK|M_ZERO); sgl_node->sgl_data->sg_segs[j].ss_paddr = (vm_paddr_t)tmp_buff; } LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link); } } sc->hs_destroy = FALSE; sc->hs_drain_notify = FALSE; sc->hs_open_multi_channel = FALSE; sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema"); ret = hv_storvsc_connect_vsp(hv_dev); if (ret != 0) { goto cleanup; } /* * Create the device queue. * Hyper-V maps each target to one SCSI HBA */ devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target); if (devq == NULL) { device_printf(dev, "Failed to alloc device queue\n"); ret = ENOMEM; goto cleanup; } sc->hs_sim = cam_sim_alloc(storvsc_action, storvsc_poll, sc->hs_drv_props->drv_name, sc, sc->hs_unit, &sc->hs_lock, 1, sc->hs_drv_props->drv_max_ios_per_target, devq); if (sc->hs_sim == NULL) { device_printf(dev, "Failed to alloc sim\n"); cam_simq_free(devq); ret = ENOMEM; goto cleanup; } mtx_lock(&sc->hs_lock); /* bus_id is set to 0, need to get it from VMBUS channel query? */ if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) { cam_sim_free(sc->hs_sim, /*free_devq*/TRUE); mtx_unlock(&sc->hs_lock); device_printf(dev, "Unable to register SCSI bus\n"); ret = ENXIO; goto cleanup; } if (xpt_create_path(&sc->hs_path, /*periph*/NULL, cam_sim_path(sc->hs_sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_bus_deregister(cam_sim_path(sc->hs_sim)); cam_sim_free(sc->hs_sim, /*free_devq*/TRUE); mtx_unlock(&sc->hs_lock); device_printf(dev, "Unable to create path\n"); ret = ENXIO; goto cleanup; } mtx_unlock(&sc->hs_lock); root_mount_rel(root_mount_token); return (0); cleanup: root_mount_rel(root_mount_token); while (!LIST_EMPTY(&sc->hs_free_list)) { reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); free(reqp, M_DEVBUF); } while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); LIST_REMOVE(sgl_node, link); for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) { if (NULL != (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) { free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF); } } sglist_free(sgl_node->sgl_data); free(sgl_node, M_DEVBUF); } return (ret); } /** * @brief StorVSC device detach function * * This function is responsible for safely detaching a * StorVSC device. This includes waiting for inbound responses * to complete and freeing associated per-device structures. * * @param dev a device * returns 0 on success */ static int storvsc_detach(device_t dev) { struct storvsc_softc *sc = device_get_softc(dev); struct hv_storvsc_request *reqp = NULL; struct hv_device *hv_device = vmbus_get_devctx(dev); struct hv_sgl_node *sgl_node = NULL; int j = 0; - mtx_lock(&hv_device->channel->inbound_lock); sc->hs_destroy = TRUE; - mtx_unlock(&hv_device->channel->inbound_lock); /* * At this point, all outbound traffic should be disabled. We * only allow inbound traffic (responses) to proceed so that * outstanding requests can be completed. */ sc->hs_drain_notify = TRUE; sema_wait(&sc->hs_drain_sema); sc->hs_drain_notify = FALSE; /* * Since we have already drained, we don't need to busy wait. * The call to close the channel will reset the callback * under the protection of the incoming channel lock. */ hv_vmbus_channel_close(hv_device->channel); mtx_lock(&sc->hs_lock); while (!LIST_EMPTY(&sc->hs_free_list)) { reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); free(reqp, M_DEVBUF); } mtx_unlock(&sc->hs_lock); while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); LIST_REMOVE(sgl_node, link); for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++){ if (NULL != (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) { free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF); } } sglist_free(sgl_node->sgl_data); free(sgl_node, M_DEVBUF); } return (0); } #if HVS_TIMEOUT_TEST /** * @brief unit test for timed out operations * * This function provides unit testing capability to simulate * timed out operations. Recompilation with HV_TIMEOUT_TEST=1 * is required. * * @param reqp pointer to a request structure * @param opcode SCSI operation being performed * @param wait if 1, wait for I/O to complete */ static void storvsc_timeout_test(struct hv_storvsc_request *reqp, uint8_t opcode, int wait) { int ret; union ccb *ccb = reqp->ccb; struct storvsc_softc *sc = reqp->softc; if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) { return; } if (wait) { mtx_lock(&reqp->event.mtx); } ret = hv_storvsc_io_request(sc->hs_dev, reqp); if (ret != 0) { if (wait) { mtx_unlock(&reqp->event.mtx); } printf("%s: io_request failed with %d.\n", __func__, ret); ccb->ccb_h.status = CAM_PROVIDE_FAIL; mtx_lock(&sc->hs_lock); storvsc_free_request(sc, reqp); xpt_done(ccb); mtx_unlock(&sc->hs_lock); return; } if (wait) { xpt_print(ccb->ccb_h.path, "%u: %s: waiting for IO return.\n", ticks, __func__); ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz); mtx_unlock(&reqp->event.mtx); xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n", ticks, __func__, (ret == 0)? "IO return detected" : "IO return not detected"); /* * Now both the timer handler and io done are running * simultaneously. We want to confirm the io done always * finishes after the timer handler exits. So reqp used by * timer handler is not freed or stale. Do busy loop for * another 1/10 second to make sure io done does * wait for the timer handler to complete. */ DELAY(100*1000); mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: %s: finishing, queue frozen %d, " "ccb status 0x%x scsi_status 0x%x.\n", ticks, __func__, sc->hs_frozen, ccb->ccb_h.status, ccb->csio.scsi_status); mtx_unlock(&sc->hs_lock); } } #endif /* HVS_TIMEOUT_TEST */ /** * @brief timeout handler for requests * * This function is called as a result of a callout expiring. * * @param arg pointer to a request */ static void storvsc_timeout(void *arg) { struct hv_storvsc_request *reqp = arg; struct storvsc_softc *sc = reqp->softc; union ccb *ccb = reqp->ccb; if (reqp->retries == 0) { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: IO timed out (req=0x%p), wait for another %u secs.\n", ticks, reqp, ccb->ccb_h.timeout / 1000); cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL); mtx_unlock(&sc->hs_lock); reqp->retries++; callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout, 0, storvsc_timeout, reqp, 0); #if HVS_TIMEOUT_TEST storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0); #endif return; } mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n", ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000, (sc->hs_frozen == 0)? "freezing the queue" : "the queue is already frozen"); if (sc->hs_frozen == 0) { sc->hs_frozen = 1; xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1); } mtx_unlock(&sc->hs_lock); #if HVS_TIMEOUT_TEST storvsc_timeout_test(reqp, MODE_SELECT_10, 1); #endif } /** * @brief StorVSC device poll function * * This function is responsible for servicing requests when * interrupts are disabled (i.e when we are dumping core.) * * @param sim a pointer to a CAM SCSI interface module */ static void storvsc_poll(struct cam_sim *sim) { struct storvsc_softc *sc = cam_sim_softc(sim); mtx_assert(&sc->hs_lock, MA_OWNED); mtx_unlock(&sc->hs_lock); hv_storvsc_on_channel_callback(sc->hs_dev->channel); mtx_lock(&sc->hs_lock); } /** * @brief StorVSC device action function * * This function is responsible for handling SCSI operations which * are passed from the CAM layer. The requests are in the form of * CAM control blocks which indicate the action being performed. * Not all actions require converting the request to a VSCSI protocol * message - these actions can be responded to by this driver. * Requests which are destined for a backend storage device are converted * to a VSCSI protocol message and sent on the channel connection associated * with this device. * * @param sim pointer to a CAM SCSI interface module * @param ccb pointer to a CAM control block */ static void storvsc_action(struct cam_sim *sim, union ccb *ccb) { struct storvsc_softc *sc = cam_sim_softc(sim); int res; mtx_assert(&sc->hs_lock, MA_OWNED); switch (ccb->ccb_h.func_code) { case XPT_PATH_INQ: { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE; cpi->target_sprt = 0; cpi->hba_misc = PIM_NOBUSRESET; cpi->hba_eng_cnt = 0; cpi->max_target = STORVSC_MAX_TARGETS; cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target; cpi->initiator_id = cpi->max_target; cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 300000; cpi->transport = XPORT_SAS; cpi->transport_version = 0; cpi->protocol = PROTO_SCSI; cpi->protocol_version = SCSI_REV_SPC2; strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strncpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN); strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_GET_TRAN_SETTINGS: { struct ccb_trans_settings *cts = &ccb->cts; cts->transport = XPORT_SAS; cts->transport_version = 0; cts->protocol = PROTO_SCSI; cts->protocol_version = SCSI_REV_SPC2; /* enable tag queuing and disconnected mode */ cts->proto_specific.valid = CTS_SCSI_VALID_TQ; cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ; cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB; cts->xport_specific.valid = CTS_SPI_VALID_DISC; cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB; ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_SET_TRAN_SETTINGS: { ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_CALC_GEOMETRY:{ cam_calc_geometry(&ccb->ccg, 1); xpt_done(ccb); return; } case XPT_RESET_BUS: case XPT_RESET_DEV:{ #if HVS_HOST_RESET if ((res = hv_storvsc_host_reset(sc->hs_dev)) != 0) { xpt_print(ccb->ccb_h.path, "hv_storvsc_host_reset failed with %d\n", res); ccb->ccb_h.status = CAM_PROVIDE_FAIL; xpt_done(ccb); return; } ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; #else xpt_print(ccb->ccb_h.path, "%s reset not supported.\n", (ccb->ccb_h.func_code == XPT_RESET_BUS)? "bus" : "dev"); ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; #endif /* HVS_HOST_RESET */ } case XPT_SCSI_IO: case XPT_IMMED_NOTIFY: { struct hv_storvsc_request *reqp = NULL; if (ccb->csio.cdb_len == 0) { panic("cdl_len is 0\n"); } if (LIST_EMPTY(&sc->hs_free_list)) { ccb->ccb_h.status = CAM_REQUEUE_REQ; if (sc->hs_frozen == 0) { sc->hs_frozen = 1; xpt_freeze_simq(sim, /* count*/1); } xpt_done(ccb); return; } reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); bzero(reqp, sizeof(struct hv_storvsc_request)); reqp->softc = sc; ccb->ccb_h.status |= CAM_SIM_QUEUED; if ((res = create_storvsc_request(ccb, reqp)) != 0) { ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; } if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) { callout_init(&reqp->callout, 1); callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout, 0, storvsc_timeout, reqp, 0); #if HVS_TIMEOUT_TEST cv_init(&reqp->event.cv, "storvsc timeout cv"); mtx_init(&reqp->event.mtx, "storvsc timeout mutex", NULL, MTX_DEF); switch (reqp->vstor_packet.vm_srb.cdb[0]) { case MODE_SELECT_10: case SEND_DIAGNOSTIC: /* To have timer send the request. */ return; default: break; } #endif /* HVS_TIMEOUT_TEST */ } if ((res = hv_storvsc_io_request(sc->hs_dev, reqp)) != 0) { xpt_print(ccb->ccb_h.path, "hv_storvsc_io_request failed with %d\n", res); ccb->ccb_h.status = CAM_PROVIDE_FAIL; storvsc_free_request(sc, reqp); xpt_done(ccb); return; } return; } default: ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; } } /** * @brief destroy bounce buffer * * This function is responsible for destroy a Scatter/Gather list * that create by storvsc_create_bounce_buffer() * * @param sgl- the Scatter/Gather need be destroy * @param sg_count- page count of the SG list. * */ static void storvsc_destroy_bounce_buffer(struct sglist *sgl) { struct hv_sgl_node *sgl_node = NULL; if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) { printf("storvsc error: not enough in use sgl\n"); return; } sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list); LIST_REMOVE(sgl_node, link); sgl_node->sgl_data = sgl; LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link); } /** * @brief create bounce buffer * * This function is responsible for create a Scatter/Gather list, * which hold several pages that can be aligned with page size. * * @param seg_count- SG-list segments count * @param write - if WRITE_TYPE, set SG list page used size to 0, * otherwise set used size to page size. * * return NULL if create failed */ static struct sglist * storvsc_create_bounce_buffer(uint16_t seg_count, int write) { int i = 0; struct sglist *bounce_sgl = NULL; unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE); struct hv_sgl_node *sgl_node = NULL; /* get struct sglist from free_sgl_list */ if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { printf("storvsc error: not enough free sgl\n"); return NULL; } sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); LIST_REMOVE(sgl_node, link); bounce_sgl = sgl_node->sgl_data; LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link); bounce_sgl->sg_maxseg = seg_count; if (write == WRITE_TYPE) bounce_sgl->sg_nseg = 0; else bounce_sgl->sg_nseg = seg_count; for (i = 0; i < seg_count; i++) bounce_sgl->sg_segs[i].ss_len = buf_len; return bounce_sgl; } /** * @brief copy data from SG list to bounce buffer * * This function is responsible for copy data from one SG list's segments * to another SG list which used as bounce buffer. * * @param bounce_sgl - the destination SG list * @param orig_sgl - the segment of the source SG list. * @param orig_sgl_count - the count of segments. * @param orig_sgl_count - indicate which segment need bounce buffer, * set 1 means need. * */ static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl, bus_dma_segment_t *orig_sgl, unsigned int orig_sgl_count, uint64_t seg_bits) { int src_sgl_idx = 0; for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) { if (seg_bits & (1 << src_sgl_idx)) { memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr, (void*)orig_sgl[src_sgl_idx].ds_addr, orig_sgl[src_sgl_idx].ds_len); bounce_sgl->sg_segs[src_sgl_idx].ss_len = orig_sgl[src_sgl_idx].ds_len; } } } /** * @brief copy data from SG list which used as bounce to another SG list * * This function is responsible for copy data from one SG list with bounce * buffer to another SG list's segments. * * @param dest_sgl - the destination SG list's segments * @param dest_sgl_count - the count of destination SG list's segment. * @param src_sgl - the source SG list. * @param seg_bits - indicate which segment used bounce buffer of src SG-list. * */ void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl, unsigned int dest_sgl_count, struct sglist* src_sgl, uint64_t seg_bits) { int sgl_idx = 0; for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) { if (seg_bits & (1 << sgl_idx)) { memcpy((void*)(dest_sgl[sgl_idx].ds_addr), (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr), src_sgl->sg_segs[sgl_idx].ss_len); } } } /** * @brief check SG list with bounce buffer or not * * This function is responsible for check if need bounce buffer for SG list. * * @param sgl - the SG list's segments * @param sg_count - the count of SG list's segment. * @param bits - segmengs number that need bounce buffer * * return -1 if SG list needless bounce buffer */ static int storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl, unsigned int sg_count, uint64_t *bits) { int i = 0; int offset = 0; uint64_t phys_addr = 0; uint64_t tmp_bits = 0; boolean_t found_hole = FALSE; boolean_t pre_aligned = TRUE; if (sg_count < 2){ return -1; } *bits = 0; phys_addr = vtophys(sgl[0].ds_addr); offset = phys_addr - trunc_page(phys_addr); if (offset != 0) { pre_aligned = FALSE; tmp_bits |= 1; } for (i = 1; i < sg_count; i++) { phys_addr = vtophys(sgl[i].ds_addr); offset = phys_addr - trunc_page(phys_addr); if (offset == 0) { if (FALSE == pre_aligned){ /* * This segment is aligned, if the previous * one is not aligned, find a hole */ found_hole = TRUE; } pre_aligned = TRUE; } else { tmp_bits |= 1 << i; if (!pre_aligned) { if (phys_addr != vtophys(sgl[i-1].ds_addr + sgl[i-1].ds_len)) { /* * Check whether connect to previous * segment,if not, find the hole */ found_hole = TRUE; } } else { found_hole = TRUE; } pre_aligned = FALSE; } } if (!found_hole) { return (-1); } else { *bits = tmp_bits; return 0; } } /** * @brief Fill in a request structure based on a CAM control block * * Fills in a request structure based on the contents of a CAM control * block. The request structure holds the payload information for * VSCSI protocol request. * * @param ccb pointer to a CAM contorl block * @param reqp pointer to a request structure */ static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp) { struct ccb_scsiio *csio = &ccb->csio; uint64_t phys_addr; uint32_t bytes_to_copy = 0; uint32_t pfn_num = 0; uint32_t pfn; uint64_t not_aligned_seg_bits = 0; /* refer to struct vmscsi_req for meanings of these two fields */ reqp->vstor_packet.u.vm_srb.port = cam_sim_unit(xpt_path_sim(ccb->ccb_h.path)); reqp->vstor_packet.u.vm_srb.path_id = cam_sim_bus(xpt_path_sim(ccb->ccb_h.path)); reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id; reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun; reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len; if(ccb->ccb_h.flags & CAM_CDB_POINTER) { memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr, csio->cdb_len); } else { memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes, csio->cdb_len); } switch (ccb->ccb_h.flags & CAM_DIR_MASK) { case CAM_DIR_OUT: reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE; break; case CAM_DIR_IN: reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE; break; case CAM_DIR_NONE: reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE; break; default: reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE; break; } reqp->sense_data = &csio->sense_data; reqp->sense_info_len = csio->sense_len; reqp->ccb = ccb; if (0 == csio->dxfer_len) { return (0); } reqp->data_buf.length = csio->dxfer_len; switch (ccb->ccb_h.flags & CAM_DATA_MASK) { case CAM_DATA_VADDR: { bytes_to_copy = csio->dxfer_len; phys_addr = vtophys(csio->data_ptr); reqp->data_buf.offset = phys_addr & PAGE_MASK; while (bytes_to_copy != 0) { int bytes, page_offset; phys_addr = vtophys(&csio->data_ptr[reqp->data_buf.length - bytes_to_copy]); pfn = phys_addr >> PAGE_SHIFT; reqp->data_buf.pfn_array[pfn_num] = pfn; page_offset = phys_addr & PAGE_MASK; bytes = min(PAGE_SIZE - page_offset, bytes_to_copy); bytes_to_copy -= bytes; pfn_num++; } break; } case CAM_DATA_SG: { int i = 0; int offset = 0; int ret; bus_dma_segment_t *storvsc_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr; u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt; printf("Storvsc: get SG I/O operation, %d\n", reqp->vstor_packet.u.vm_srb.data_in); if (storvsc_sg_count > HV_MAX_MULTIPAGE_BUFFER_COUNT){ printf("Storvsc: %d segments is too much, " "only support %d segments\n", storvsc_sg_count, HV_MAX_MULTIPAGE_BUFFER_COUNT); return (EINVAL); } /* * We create our own bounce buffer function currently. Idealy * we should use BUS_DMA(9) framework. But with current BUS_DMA * code there is no callback API to check the page alignment of * middle segments before busdma can decide if a bounce buffer * is needed for particular segment. There is callback, * "bus_dma_filter_t *filter", but the parrameters are not * sufficient for storvsc driver. * TODO: * Add page alignment check in BUS_DMA(9) callback. Once * this is complete, switch the following code to use * BUS_DMA(9) for storvsc bounce buffer support. */ /* check if we need to create bounce buffer */ ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist, storvsc_sg_count, ¬_aligned_seg_bits); if (ret != -1) { reqp->bounce_sgl = storvsc_create_bounce_buffer(storvsc_sg_count, reqp->vstor_packet.u.vm_srb.data_in); if (NULL == reqp->bounce_sgl) { printf("Storvsc_error: " "create bounce buffer failed.\n"); return (ENOMEM); } reqp->bounce_sgl_count = storvsc_sg_count; reqp->not_aligned_seg_bits = not_aligned_seg_bits; /* * if it is write, we need copy the original data *to bounce buffer */ if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) { storvsc_copy_sgl_to_bounce_buf( reqp->bounce_sgl, storvsc_sglist, storvsc_sg_count, reqp->not_aligned_seg_bits); } /* transfer virtual address to physical frame number */ if (reqp->not_aligned_seg_bits & 0x1){ phys_addr = vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr); }else{ phys_addr = vtophys(storvsc_sglist[0].ds_addr); } reqp->data_buf.offset = phys_addr & PAGE_MASK; pfn = phys_addr >> PAGE_SHIFT; reqp->data_buf.pfn_array[0] = pfn; for (i = 1; i < storvsc_sg_count; i++) { if (reqp->not_aligned_seg_bits & (1 << i)) { phys_addr = vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr); } else { phys_addr = vtophys(storvsc_sglist[i].ds_addr); } pfn = phys_addr >> PAGE_SHIFT; reqp->data_buf.pfn_array[i] = pfn; } } else { phys_addr = vtophys(storvsc_sglist[0].ds_addr); reqp->data_buf.offset = phys_addr & PAGE_MASK; for (i = 0; i < storvsc_sg_count; i++) { phys_addr = vtophys(storvsc_sglist[i].ds_addr); pfn = phys_addr >> PAGE_SHIFT; reqp->data_buf.pfn_array[i] = pfn; } /* check the last segment cross boundary or not */ offset = phys_addr & PAGE_MASK; if (offset) { phys_addr = vtophys(storvsc_sglist[i-1].ds_addr + PAGE_SIZE - offset); pfn = phys_addr >> PAGE_SHIFT; reqp->data_buf.pfn_array[i] = pfn; } reqp->bounce_sgl_count = 0; } break; } default: printf("Unknow flags: %d\n", ccb->ccb_h.flags); return(EINVAL); } return(0); } /* * Modified based on scsi_print_inquiry which is responsible to * print the detail information for scsi_inquiry_data. * * Return 1 if it is valid, 0 otherwise. */ static inline int is_inquiry_valid(const struct scsi_inquiry_data *inq_data) { uint8_t type; char vendor[16], product[48], revision[16]; /* * Check device type and qualifier */ if (!(SID_QUAL_IS_VENDOR_UNIQUE(inq_data) || SID_QUAL(inq_data) == SID_QUAL_LU_CONNECTED)) return (0); type = SID_TYPE(inq_data); switch (type) { case T_DIRECT: case T_SEQUENTIAL: case T_PRINTER: case T_PROCESSOR: case T_WORM: case T_CDROM: case T_SCANNER: case T_OPTICAL: case T_CHANGER: case T_COMM: case T_STORARRAY: case T_ENCLOSURE: case T_RBC: case T_OCRW: case T_OSD: case T_ADC: break; case T_NODEVICE: default: return (0); } /* * Check vendor, product, and revision */ cam_strvis(vendor, inq_data->vendor, sizeof(inq_data->vendor), sizeof(vendor)); cam_strvis(product, inq_data->product, sizeof(inq_data->product), sizeof(product)); cam_strvis(revision, inq_data->revision, sizeof(inq_data->revision), sizeof(revision)); if (strlen(vendor) == 0 || strlen(product) == 0 || strlen(revision) == 0) return (0); return (1); } /** * @brief completion function before returning to CAM * * I/O process has been completed and the result needs * to be passed to the CAM layer. * Free resources related to this request. * * @param reqp pointer to a request structure */ static void storvsc_io_done(struct hv_storvsc_request *reqp) { union ccb *ccb = reqp->ccb; struct ccb_scsiio *csio = &ccb->csio; struct storvsc_softc *sc = reqp->softc; struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb; bus_dma_segment_t *ori_sglist = NULL; int ori_sg_count = 0; /* destroy bounce buffer if it is used */ if (reqp->bounce_sgl_count) { ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr; ori_sg_count = ccb->csio.sglist_cnt; /* * If it is READ operation, we should copy back the data * to original SG list. */ if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) { storvsc_copy_from_bounce_buf_to_sgl(ori_sglist, ori_sg_count, reqp->bounce_sgl, reqp->not_aligned_seg_bits); } storvsc_destroy_bounce_buffer(reqp->bounce_sgl); reqp->bounce_sgl_count = 0; } if (reqp->retries > 0) { mtx_lock(&sc->hs_lock); #if HVS_TIMEOUT_TEST xpt_print(ccb->ccb_h.path, "%u: IO returned after timeout, " "waking up timer handler if any.\n", ticks); mtx_lock(&reqp->event.mtx); cv_signal(&reqp->event.cv); mtx_unlock(&reqp->event.mtx); #endif reqp->retries = 0; xpt_print(ccb->ccb_h.path, "%u: IO returned after timeout, " "stopping timer if any.\n", ticks); mtx_unlock(&sc->hs_lock); } /* * callout_drain() will wait for the timer handler to finish * if it is running. So we don't need any lock to synchronize * between this routine and the timer handler. * Note that we need to make sure reqp is not freed when timer * handler is using or will use it. */ if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) { callout_drain(&reqp->callout); } ccb->ccb_h.status &= ~CAM_SIM_QUEUED; ccb->ccb_h.status &= ~CAM_STATUS_MASK; if (vm_srb->scsi_status == SCSI_STATUS_OK) { const struct scsi_generic *cmd; /* * Check whether the data for INQUIRY cmd is valid or * not. Windows 10 and Windows 2016 send all zero * inquiry data to VM even for unpopulated slots. */ cmd = (const struct scsi_generic *) ((ccb->ccb_h.flags & CAM_CDB_POINTER) ? csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes); if (cmd->opcode == INQUIRY && is_inquiry_valid( (const struct scsi_inquiry_data *)csio->data_ptr) == 0) { ccb->ccb_h.status |= CAM_DEV_NOT_THERE; if (bootverbose) { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "storvsc uninstalled device\n"); mtx_unlock(&sc->hs_lock); } } else { ccb->ccb_h.status |= CAM_REQ_CMP; } } else { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "storvsc scsi_status = %d\n", vm_srb->scsi_status); mtx_unlock(&sc->hs_lock); ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR; } ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF); ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len; if (reqp->sense_info_len != 0) { csio->sense_resid = csio->sense_len - reqp->sense_info_len; ccb->ccb_h.status |= CAM_AUTOSNS_VALID; } mtx_lock(&sc->hs_lock); if (reqp->softc->hs_frozen == 1) { xpt_print(ccb->ccb_h.path, "%u: storvsc unfreezing softc 0x%p.\n", ticks, reqp->softc); ccb->ccb_h.status |= CAM_RELEASE_SIMQ; reqp->softc->hs_frozen = 0; } storvsc_free_request(sc, reqp); xpt_done(ccb); mtx_unlock(&sc->hs_lock); } /** * @brief Free a request structure * * Free a request structure by returning it to the free list * * @param sc pointer to a softc * @param reqp pointer to a request structure */ static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp) { LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link); } /** * @brief Determine type of storage device from GUID * * Using the type GUID, determine if this is a StorVSC (paravirtual * SCSI or BlkVSC (paravirtual IDE) device. * * @param dev a device * returns an enum */ static enum hv_storage_type storvsc_get_storage_type(device_t dev) { const char *p = vmbus_get_type(dev); if (!memcmp(p, &gBlkVscDeviceType, sizeof(hv_guid))) { return DRIVER_BLKVSC; } else if (!memcmp(p, &gStorVscDeviceType, sizeof(hv_guid))) { return DRIVER_STORVSC; } return (DRIVER_UNKNOWN); } Index: head/sys/dev/hyperv/vmbus/hv_channel.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_channel.c (revision 296082) +++ head/sys/dev/hyperv/vmbus/hv_channel.c (revision 296083) @@ -1,956 +1,943 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include "hv_vmbus_priv.h" static int vmbus_channel_create_gpadl_header( /* must be phys and virt contiguous*/ void* contig_buffer, /* page-size multiple */ uint32_t size, hv_vmbus_channel_msg_info** msg_info, uint32_t* message_count); static void vmbus_channel_set_event(hv_vmbus_channel* channel); static void VmbusProcessChannelEvent(void* channel, int pending); /** * @brief Trigger an event notification on the specified channel */ static void vmbus_channel_set_event(hv_vmbus_channel *channel) { hv_vmbus_monitor_page *monitor_page; if (channel->offer_msg.monitor_allocated) { /* Each uint32_t represents 32 channels */ synch_set_bit((channel->offer_msg.child_rel_id & 31), ((uint32_t *)hv_vmbus_g_connection.send_interrupt_page + ((channel->offer_msg.child_rel_id >> 5)))); monitor_page = (hv_vmbus_monitor_page *) hv_vmbus_g_connection.monitor_page_2; synch_set_bit(channel->monitor_bit, (uint32_t *)&monitor_page-> trigger_group[channel->monitor_group].u.pending); } else { hv_vmbus_set_event(channel); } } /** * @brief Open the specified channel */ int hv_vmbus_channel_open( hv_vmbus_channel* new_channel, uint32_t send_ring_buffer_size, uint32_t recv_ring_buffer_size, void* user_data, uint32_t user_data_len, hv_vmbus_pfn_channel_callback pfn_on_channel_callback, void* context) { int ret = 0; void *in, *out; hv_vmbus_channel_open_channel* open_msg; hv_vmbus_channel_msg_info* open_info; mtx_lock(&new_channel->sc_lock); if (new_channel->state == HV_CHANNEL_OPEN_STATE) { new_channel->state = HV_CHANNEL_OPENING_STATE; } else { mtx_unlock(&new_channel->sc_lock); if(bootverbose) printf("VMBUS: Trying to open channel <%p> which in " "%d state.\n", new_channel, new_channel->state); return (EINVAL); } mtx_unlock(&new_channel->sc_lock); new_channel->on_channel_callback = pfn_on_channel_callback; new_channel->channel_callback_context = context; new_channel->rxq = hv_vmbus_g_context.hv_event_queue[new_channel->target_cpu]; TASK_INIT(&new_channel->channel_task, 0, VmbusProcessChannelEvent, new_channel); /* Allocate the ring buffer */ out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size), M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); KASSERT(out != NULL, ("Error VMBUS: contigmalloc failed to allocate Ring Buffer!")); if (out == NULL) return (ENOMEM); in = ((uint8_t *) out + send_ring_buffer_size); new_channel->ring_buffer_pages = out; new_channel->ring_buffer_page_count = (send_ring_buffer_size + recv_ring_buffer_size) >> PAGE_SHIFT; new_channel->ring_buffer_size = send_ring_buffer_size + recv_ring_buffer_size; hv_vmbus_ring_buffer_init( &new_channel->outbound, out, send_ring_buffer_size); hv_vmbus_ring_buffer_init( &new_channel->inbound, in, recv_ring_buffer_size); /** * Establish the gpadl for the ring buffer */ new_channel->ring_buffer_gpadl_handle = 0; ret = hv_vmbus_channel_establish_gpadl(new_channel, new_channel->outbound.ring_buffer, send_ring_buffer_size + recv_ring_buffer_size, &new_channel->ring_buffer_gpadl_handle); /** * Create and init the channel open message */ open_info = (hv_vmbus_channel_msg_info*) malloc( sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_open_channel), M_DEVBUF, M_NOWAIT); KASSERT(open_info != NULL, ("Error VMBUS: malloc failed to allocate Open Channel message!")); if (open_info == NULL) return (ENOMEM); sema_init(&open_info->wait_sema, 0, "Open Info Sema"); open_msg = (hv_vmbus_channel_open_channel*) open_info->msg; open_msg->header.message_type = HV_CHANNEL_MESSAGE_OPEN_CHANNEL; open_msg->open_id = new_channel->offer_msg.child_rel_id; open_msg->child_rel_id = new_channel->offer_msg.child_rel_id; open_msg->ring_buffer_gpadl_handle = new_channel->ring_buffer_gpadl_handle; open_msg->downstream_ring_buffer_page_offset = send_ring_buffer_size >> PAGE_SHIFT; open_msg->target_vcpu = new_channel->target_vcpu; if (user_data_len) memcpy(open_msg->user_data, user_data, user_data_len); mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_INSERT_TAIL( &hv_vmbus_g_connection.channel_msg_anchor, open_info, msg_list_entry); mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); ret = hv_vmbus_post_message( open_msg, sizeof(hv_vmbus_channel_open_channel)); if (ret != 0) goto cleanup; ret = sema_timedwait(&open_info->wait_sema, 5 * hz); /* KYS 5 seconds */ if (ret) { if(bootverbose) printf("VMBUS: channel <%p> open timeout.\n", new_channel); goto cleanup; } if (open_info->response.open_result.status == 0) { new_channel->state = HV_CHANNEL_OPENED_STATE; if(bootverbose) printf("VMBUS: channel <%p> open success.\n", new_channel); } else { if(bootverbose) printf("Error VMBUS: channel <%p> open failed - %d!\n", new_channel, open_info->response.open_result.status); } cleanup: mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_REMOVE( &hv_vmbus_g_connection.channel_msg_anchor, open_info, msg_list_entry); mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); sema_destroy(&open_info->wait_sema); free(open_info, M_DEVBUF); return (ret); } /** * @brief Create a gpadl for the specified buffer */ static int vmbus_channel_create_gpadl_header( void* contig_buffer, uint32_t size, /* page-size multiple */ hv_vmbus_channel_msg_info** msg_info, uint32_t* message_count) { int i; int page_count; unsigned long long pfn; uint32_t msg_size; hv_vmbus_channel_gpadl_header* gpa_header; hv_vmbus_channel_gpadl_body* gpadl_body; hv_vmbus_channel_msg_info* msg_header; hv_vmbus_channel_msg_info* msg_body; int pfnSum, pfnCount, pfnLeft, pfnCurr, pfnSize; page_count = size >> PAGE_SHIFT; pfn = hv_get_phys_addr(contig_buffer) >> PAGE_SHIFT; /*do we need a gpadl body msg */ pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE - sizeof(hv_vmbus_channel_gpadl_header) - sizeof(hv_gpa_range); pfnCount = pfnSize / sizeof(uint64_t); if (page_count > pfnCount) { /* if(we need a gpadl body) */ /* fill in the header */ msg_size = sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_gpadl_header) + sizeof(hv_gpa_range) + pfnCount * sizeof(uint64_t); msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT( msg_header != NULL, ("Error VMBUS: malloc failed to allocate Gpadl Message!")); if (msg_header == NULL) return (ENOMEM); TAILQ_INIT(&msg_header->sub_msg_list_anchor); msg_header->message_size = msg_size; gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg; gpa_header->range_count = 1; gpa_header->range_buf_len = sizeof(hv_gpa_range) + page_count * sizeof(uint64_t); gpa_header->range[0].byte_offset = 0; gpa_header->range[0].byte_count = size; for (i = 0; i < pfnCount; i++) { gpa_header->range[0].pfn_array[i] = pfn + i; } *msg_info = msg_header; *message_count = 1; pfnSum = pfnCount; pfnLeft = page_count - pfnCount; /* * figure out how many pfns we can fit */ pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE - sizeof(hv_vmbus_channel_gpadl_body); pfnCount = pfnSize / sizeof(uint64_t); /* * fill in the body */ while (pfnLeft) { if (pfnLeft > pfnCount) { pfnCurr = pfnCount; } else { pfnCurr = pfnLeft; } msg_size = sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_gpadl_body) + pfnCurr * sizeof(uint64_t); msg_body = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT( msg_body != NULL, ("Error VMBUS: malloc failed to allocate Gpadl msg_body!")); if (msg_body == NULL) return (ENOMEM); msg_body->message_size = msg_size; (*message_count)++; gpadl_body = (hv_vmbus_channel_gpadl_body*) msg_body->msg; /* * gpadl_body->gpadl = kbuffer; */ for (i = 0; i < pfnCurr; i++) { gpadl_body->pfn[i] = pfn + pfnSum + i; } TAILQ_INSERT_TAIL( &msg_header->sub_msg_list_anchor, msg_body, msg_list_entry); pfnSum += pfnCurr; pfnLeft -= pfnCurr; } } else { /* else everything fits in a header */ msg_size = sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_gpadl_header) + sizeof(hv_gpa_range) + page_count * sizeof(uint64_t); msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO); KASSERT( msg_header != NULL, ("Error VMBUS: malloc failed to allocate Gpadl Message!")); if (msg_header == NULL) return (ENOMEM); msg_header->message_size = msg_size; gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg; gpa_header->range_count = 1; gpa_header->range_buf_len = sizeof(hv_gpa_range) + page_count * sizeof(uint64_t); gpa_header->range[0].byte_offset = 0; gpa_header->range[0].byte_count = size; for (i = 0; i < page_count; i++) { gpa_header->range[0].pfn_array[i] = pfn + i; } *msg_info = msg_header; *message_count = 1; } return (0); } /** * @brief Establish a GPADL for the specified buffer */ int hv_vmbus_channel_establish_gpadl( hv_vmbus_channel* channel, void* contig_buffer, uint32_t size, /* page-size multiple */ uint32_t* gpadl_handle) { int ret = 0; hv_vmbus_channel_gpadl_header* gpadl_msg; hv_vmbus_channel_gpadl_body* gpadl_body; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_info* sub_msg_info; uint32_t msg_count; hv_vmbus_channel_msg_info* curr; uint32_t next_gpadl_handle; next_gpadl_handle = atomic_fetchadd_int( &hv_vmbus_g_connection.next_gpadl_handle, 1); ret = vmbus_channel_create_gpadl_header( contig_buffer, size, &msg_info, &msg_count); if(ret != 0) { /* * XXX * We can _not_ even revert the above incremental, * if multiple GPADL establishments are running * parallelly, decrement the global next_gpadl_handle * is calling for _big_ trouble. A better solution * is to have a 0-based GPADL id bitmap ... */ return ret; } sema_init(&msg_info->wait_sema, 0, "Open Info Sema"); gpadl_msg = (hv_vmbus_channel_gpadl_header*) msg_info->msg; gpadl_msg->header.message_type = HV_CHANNEL_MESSAGEL_GPADL_HEADER; gpadl_msg->child_rel_id = channel->offer_msg.child_rel_id; gpadl_msg->gpadl = next_gpadl_handle; mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_INSERT_TAIL( &hv_vmbus_g_connection.channel_msg_anchor, msg_info, msg_list_entry); mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); ret = hv_vmbus_post_message( gpadl_msg, msg_info->message_size - (uint32_t) sizeof(hv_vmbus_channel_msg_info)); if (ret != 0) goto cleanup; if (msg_count > 1) { TAILQ_FOREACH(curr, &msg_info->sub_msg_list_anchor, msg_list_entry) { sub_msg_info = curr; gpadl_body = (hv_vmbus_channel_gpadl_body*) sub_msg_info->msg; gpadl_body->header.message_type = HV_CHANNEL_MESSAGE_GPADL_BODY; gpadl_body->gpadl = next_gpadl_handle; ret = hv_vmbus_post_message( gpadl_body, sub_msg_info->message_size - (uint32_t) sizeof(hv_vmbus_channel_msg_info)); /* if (the post message failed) give up and clean up */ if(ret != 0) goto cleanup; } } ret = sema_timedwait(&msg_info->wait_sema, 5 * hz); /* KYS 5 seconds*/ if (ret != 0) goto cleanup; *gpadl_handle = gpadl_msg->gpadl; cleanup: mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor, msg_info, msg_list_entry); mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); sema_destroy(&msg_info->wait_sema); free(msg_info, M_DEVBUF); return (ret); } /** * @brief Teardown the specified GPADL handle */ int hv_vmbus_channel_teardown_gpdal( hv_vmbus_channel* channel, uint32_t gpadl_handle) { int ret = 0; hv_vmbus_channel_gpadl_teardown* msg; hv_vmbus_channel_msg_info* info; info = (hv_vmbus_channel_msg_info *) malloc( sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_gpadl_teardown), M_DEVBUF, M_NOWAIT); KASSERT(info != NULL, ("Error VMBUS: malloc failed to allocate Gpadl Teardown Msg!")); if (info == NULL) { ret = ENOMEM; goto cleanup; } sema_init(&info->wait_sema, 0, "Open Info Sema"); msg = (hv_vmbus_channel_gpadl_teardown*) info->msg; msg->header.message_type = HV_CHANNEL_MESSAGE_GPADL_TEARDOWN; msg->child_rel_id = channel->offer_msg.child_rel_id; msg->gpadl = gpadl_handle; mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_msg_anchor, info, msg_list_entry); mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_gpadl_teardown)); if (ret != 0) goto cleanup; ret = sema_timedwait(&info->wait_sema, 5 * hz); /* KYS 5 seconds */ cleanup: /* * Received a torndown response */ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor, info, msg_list_entry); mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); sema_destroy(&info->wait_sema); free(info, M_DEVBUF); return (ret); } static void hv_vmbus_channel_close_internal(hv_vmbus_channel *channel) { int ret = 0; struct taskqueue *rxq = channel->rxq; hv_vmbus_channel_close_channel* msg; hv_vmbus_channel_msg_info* info; channel->state = HV_CHANNEL_OPEN_STATE; channel->sc_creation_callback = NULL; /* * set rxq to NULL to avoid more requests be scheduled */ channel->rxq = NULL; taskqueue_drain(rxq, &channel->channel_task); - /* - * Grab the lock to prevent race condition when a packet received - * and unloading driver is in the process. - */ - mtx_lock(&channel->inbound_lock); channel->on_channel_callback = NULL; - mtx_unlock(&channel->inbound_lock); /** * Send a closing message */ info = (hv_vmbus_channel_msg_info *) malloc( sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_close_channel), M_DEVBUF, M_NOWAIT); KASSERT(info != NULL, ("VMBUS: malloc failed hv_vmbus_channel_close!")); if(info == NULL) return; msg = (hv_vmbus_channel_close_channel*) info->msg; msg->header.message_type = HV_CHANNEL_MESSAGE_CLOSE_CHANNEL; msg->child_rel_id = channel->offer_msg.child_rel_id; ret = hv_vmbus_post_message( msg, sizeof(hv_vmbus_channel_close_channel)); /* Tear down the gpadl for the channel's ring buffer */ if (channel->ring_buffer_gpadl_handle) { hv_vmbus_channel_teardown_gpdal(channel, channel->ring_buffer_gpadl_handle); } /* TODO: Send a msg to release the childRelId */ /* cleanup the ring buffers for this channel */ hv_ring_buffer_cleanup(&channel->outbound); hv_ring_buffer_cleanup(&channel->inbound); contigfree(channel->ring_buffer_pages, channel->ring_buffer_size, M_DEVBUF); free(info, M_DEVBUF); } /** * @brief Close the specified channel */ void hv_vmbus_channel_close(hv_vmbus_channel *channel) { hv_vmbus_channel* sub_channel; if (channel->primary_channel != NULL) { /* * We only close multi-channels when the primary is * closed. */ return; } /* * Close all multi-channels first. */ TAILQ_FOREACH(sub_channel, &channel->sc_list_anchor, sc_list_entry) { if (sub_channel->state != HV_CHANNEL_OPENED_STATE) continue; hv_vmbus_channel_close_internal(sub_channel); } /* * Then close the primary channel. */ hv_vmbus_channel_close_internal(channel); } /** * @brief Send the specified buffer on the given channel */ int hv_vmbus_channel_send_packet( hv_vmbus_channel* channel, void* buffer, uint32_t buffer_len, uint64_t request_id, hv_vmbus_packet_type type, uint32_t flags) { int ret = 0; hv_vm_packet_descriptor desc; uint32_t packet_len; uint64_t aligned_data; uint32_t packet_len_aligned; boolean_t need_sig; hv_vmbus_sg_buffer_list buffer_list[3]; packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len; packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t)); aligned_data = 0; /* Setup the descriptor */ desc.type = type; /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND; */ desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */ /* in 8-bytes granularity */ desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3; desc.length8 = (uint16_t) (packet_len_aligned >> 3); desc.transaction_id = request_id; buffer_list[0].data = &desc; buffer_list[0].length = sizeof(hv_vm_packet_descriptor); buffer_list[1].data = buffer; buffer_list[1].length = buffer_len; buffer_list[2].data = &aligned_data; buffer_list[2].length = packet_len_aligned - packet_len; ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3, &need_sig); /* TODO: We should determine if this is optional */ if (ret == 0 && need_sig) { vmbus_channel_set_event(channel); } return (ret); } /** * @brief Send a range of single-page buffer packets using * a GPADL Direct packet type */ int hv_vmbus_channel_send_packet_pagebuffer( hv_vmbus_channel* channel, hv_vmbus_page_buffer page_buffers[], uint32_t page_count, void* buffer, uint32_t buffer_len, uint64_t request_id) { int ret = 0; boolean_t need_sig; uint32_t packet_len; uint32_t page_buflen; uint32_t packetLen_aligned; hv_vmbus_sg_buffer_list buffer_list[4]; hv_vmbus_channel_packet_page_buffer desc; uint32_t descSize; uint64_t alignedData = 0; if (page_count > HV_MAX_PAGE_BUFFER_COUNT) return (EINVAL); /* * Adjust the size down since hv_vmbus_channel_packet_page_buffer * is the largest size we support */ descSize = __offsetof(hv_vmbus_channel_packet_page_buffer, range); page_buflen = sizeof(hv_vmbus_page_buffer) * page_count; packet_len = descSize + page_buflen + buffer_len; packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t)); /* Setup the descriptor */ desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT; desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; /* in 8-bytes granularity */ desc.data_offset8 = (descSize + page_buflen) >> 3; desc.length8 = (uint16_t) (packetLen_aligned >> 3); desc.transaction_id = request_id; desc.range_count = page_count; buffer_list[0].data = &desc; buffer_list[0].length = descSize; buffer_list[1].data = page_buffers; buffer_list[1].length = page_buflen; buffer_list[2].data = buffer; buffer_list[2].length = buffer_len; buffer_list[3].data = &alignedData; buffer_list[3].length = packetLen_aligned - packet_len; ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 4, &need_sig); /* TODO: We should determine if this is optional */ if (ret == 0 && need_sig) { vmbus_channel_set_event(channel); } return (ret); } /** * @brief Send a multi-page buffer packet using a GPADL Direct packet type */ int hv_vmbus_channel_send_packet_multipagebuffer( hv_vmbus_channel* channel, hv_vmbus_multipage_buffer* multi_page_buffer, void* buffer, uint32_t buffer_len, uint64_t request_id) { int ret = 0; uint32_t desc_size; boolean_t need_sig; uint32_t packet_len; uint32_t packet_len_aligned; uint32_t pfn_count; uint64_t aligned_data = 0; hv_vmbus_sg_buffer_list buffer_list[3]; hv_vmbus_channel_packet_multipage_buffer desc; pfn_count = HV_NUM_PAGES_SPANNED( multi_page_buffer->offset, multi_page_buffer->length); if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT)) return (EINVAL); /* * Adjust the size down since hv_vmbus_channel_packet_multipage_buffer * is the largest size we support */ desc_size = sizeof(hv_vmbus_channel_packet_multipage_buffer) - ((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) * sizeof(uint64_t)); packet_len = desc_size + buffer_len; packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t)); /* * Setup the descriptor */ desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT; desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */ desc.length8 = (uint16_t) (packet_len_aligned >> 3); desc.transaction_id = request_id; desc.range_count = 1; desc.range.length = multi_page_buffer->length; desc.range.offset = multi_page_buffer->offset; memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array, pfn_count * sizeof(uint64_t)); buffer_list[0].data = &desc; buffer_list[0].length = desc_size; buffer_list[1].data = buffer; buffer_list[1].length = buffer_len; buffer_list[2].data = &aligned_data; buffer_list[2].length = packet_len_aligned - packet_len; ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3, &need_sig); /* TODO: We should determine if this is optional */ if (ret == 0 && need_sig) { vmbus_channel_set_event(channel); } return (ret); } /** * @brief Retrieve the user packet on the specified channel */ int hv_vmbus_channel_recv_packet( hv_vmbus_channel* channel, void* Buffer, uint32_t buffer_len, uint32_t* buffer_actual_len, uint64_t* request_id) { int ret; uint32_t user_len; uint32_t packet_len; hv_vm_packet_descriptor desc; *buffer_actual_len = 0; *request_id = 0; ret = hv_ring_buffer_peek(&channel->inbound, &desc, sizeof(hv_vm_packet_descriptor)); if (ret != 0) return (0); packet_len = desc.length8 << 3; user_len = packet_len - (desc.data_offset8 << 3); *buffer_actual_len = user_len; if (user_len > buffer_len) return (EINVAL); *request_id = desc.transaction_id; /* Copy over the packet to the user buffer */ ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len, (desc.data_offset8 << 3)); return (0); } /** * @brief Retrieve the raw packet on the specified channel */ int hv_vmbus_channel_recv_packet_raw( hv_vmbus_channel* channel, void* buffer, uint32_t buffer_len, uint32_t* buffer_actual_len, uint64_t* request_id) { int ret; uint32_t packetLen; uint32_t userLen; hv_vm_packet_descriptor desc; *buffer_actual_len = 0; *request_id = 0; ret = hv_ring_buffer_peek( &channel->inbound, &desc, sizeof(hv_vm_packet_descriptor)); if (ret != 0) return (0); packetLen = desc.length8 << 3; userLen = packetLen - (desc.data_offset8 << 3); *buffer_actual_len = packetLen; if (packetLen > buffer_len) return (ENOBUFS); *request_id = desc.transaction_id; /* Copy over the entire packet to the user buffer */ ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0); return (0); } /** * Process a channel event notification */ static void VmbusProcessChannelEvent(void* context, int pending) { void* arg; uint32_t bytes_to_read; hv_vmbus_channel* channel = (hv_vmbus_channel*)context; boolean_t is_batched_reading; /** * Find the channel based on this relid and invokes * the channel callback to process the event */ if (channel == NULL) { return; } /** * To deal with the race condition where we might * receive a packet while the relevant driver is * being unloaded, dispatch the callback while * holding the channel lock. The unloading driver * will acquire the same channel lock to set the * callback to NULL. This closes the window. */ - /* - * Disable the lock due to newly added WITNESS check in r277723. - * Will seek other way to avoid race condition. - * -- whu - */ - // mtx_lock(&channel->inbound_lock); if (channel->on_channel_callback != NULL) { arg = channel->channel_callback_context; is_batched_reading = channel->batched_reading; /* * Optimize host to guest signaling by ensuring: * 1. While reading the channel, we disable interrupts from * host. * 2. Ensure that we process all posted messages from the host * before returning from this callback. * 3. Once we return, enable signaling from the host. Once this * state is set we check to see if additional packets are * available to read. In this case we repeat the process. */ do { if (is_batched_reading) hv_ring_buffer_read_begin(&channel->inbound); channel->on_channel_callback(arg); if (is_batched_reading) bytes_to_read = hv_ring_buffer_read_end(&channel->inbound); else bytes_to_read = 0; } while (is_batched_reading && (bytes_to_read != 0)); } - // mtx_unlock(&channel->inbound_lock); } Index: head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c (revision 296082) +++ head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c (revision 296083) @@ -1,741 +1,737 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "hv_vmbus_priv.h" /* * Internal functions */ static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_offer_internal(void* context); static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_offer_rescind_internal(void* context); static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr); static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr); /** * Channel message dispatch table */ hv_vmbus_channel_msg_table_entry g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = { { HV_CHANNEL_MESSAGE_INVALID, NULL }, { HV_CHANNEL_MESSAGE_OFFER_CHANNEL, vmbus_channel_on_offer }, { HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER, vmbus_channel_on_offer_rescind }, { HV_CHANNEL_MESSAGE_REQUEST_OFFERS, NULL }, { HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED, vmbus_channel_on_offers_delivered }, { HV_CHANNEL_MESSAGE_OPEN_CHANNEL, NULL }, { HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT, vmbus_channel_on_open_result }, { HV_CHANNEL_MESSAGE_CLOSE_CHANNEL, NULL }, { HV_CHANNEL_MESSAGEL_GPADL_HEADER, NULL }, { HV_CHANNEL_MESSAGE_GPADL_BODY, NULL }, { HV_CHANNEL_MESSAGE_GPADL_CREATED, vmbus_channel_on_gpadl_created }, { HV_CHANNEL_MESSAGE_GPADL_TEARDOWN, NULL }, { HV_CHANNEL_MESSAGE_GPADL_TORNDOWN, vmbus_channel_on_gpadl_torndown }, { HV_CHANNEL_MESSAGE_REL_ID_RELEASED, NULL }, { HV_CHANNEL_MESSAGE_INITIATED_CONTACT, NULL }, { HV_CHANNEL_MESSAGE_VERSION_RESPONSE, vmbus_channel_on_version_response }, { HV_CHANNEL_MESSAGE_UNLOAD, NULL } }; typedef struct hv_work_item { struct task work; void (*callback)(void *); void* context; } hv_work_item; /** * Implementation of the work abstraction. */ static void work_item_callback(void *work, int pending) { struct hv_work_item *w = (struct hv_work_item *)work; w->callback(w->context); free(w, M_DEVBUF); } /** * @brief Create work item */ static int hv_queue_work_item( void (*callback)(void *), void *context) { struct hv_work_item *w = malloc(sizeof(struct hv_work_item), M_DEVBUF, M_NOWAIT); KASSERT(w != NULL, ("Error VMBUS: Failed to allocate WorkItem\n")); if (w == NULL) return (ENOMEM); w->callback = callback; w->context = context; TASK_INIT(&w->work, 0, work_item_callback, w); return (taskqueue_enqueue(taskqueue_thread, &w->work)); } /** * @brief Allocate and initialize a vmbus channel object */ hv_vmbus_channel* hv_vmbus_allocate_channel(void) { hv_vmbus_channel* channel; channel = (hv_vmbus_channel*) malloc( sizeof(hv_vmbus_channel), M_DEVBUF, M_WAITOK | M_ZERO); - mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF); mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF); - TAILQ_INIT(&channel->sc_list_anchor); return (channel); } /** * @brief Release the resources used by the vmbus channel object */ void hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel) { mtx_destroy(&channel->sc_lock); - mtx_destroy(&channel->inbound_lock); - free(channel, M_DEVBUF); } /** * @brief Process the offer by creating a channel/device * associated with this offer */ static void vmbus_channel_process_offer(hv_vmbus_channel *new_channel) { boolean_t f_new; hv_vmbus_channel* channel; int ret; uint32_t relid; f_new = TRUE; channel = NULL; relid = new_channel->offer_msg.child_rel_id; /* * Make sure this is a new offer */ mtx_lock(&hv_vmbus_g_connection.channel_lock); hv_vmbus_g_connection.channels[relid] = new_channel; TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor, list_entry) { if (memcmp(&channel->offer_msg.offer.interface_type, &new_channel->offer_msg.offer.interface_type, sizeof(hv_guid)) == 0 && memcmp(&channel->offer_msg.offer.interface_instance, &new_channel->offer_msg.offer.interface_instance, sizeof(hv_guid)) == 0) { f_new = FALSE; break; } } if (f_new) { /* Insert at tail */ TAILQ_INSERT_TAIL( &hv_vmbus_g_connection.channel_anchor, new_channel, list_entry); } mtx_unlock(&hv_vmbus_g_connection.channel_lock); /*XXX add new channel to percpu_list */ if (!f_new) { /* * Check if this is a sub channel. */ if (new_channel->offer_msg.offer.sub_channel_index != 0) { /* * It is a sub channel offer, process it. */ new_channel->primary_channel = channel; mtx_lock(&channel->sc_lock); TAILQ_INSERT_TAIL( &channel->sc_list_anchor, new_channel, sc_list_entry); mtx_unlock(&channel->sc_lock); /* Insert new channel into channel_anchor. */ printf("VMBUS get multi-channel offer, rel=%u,sub=%u\n", new_channel->offer_msg.child_rel_id, new_channel->offer_msg.offer.sub_channel_index); mtx_lock(&hv_vmbus_g_connection.channel_lock); TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor, new_channel, list_entry); mtx_unlock(&hv_vmbus_g_connection.channel_lock); if(bootverbose) printf("VMBUS: new multi-channel offer <%p>, " "its primary channel is <%p>.\n", new_channel, new_channel->primary_channel); /*XXX add it to percpu_list */ new_channel->state = HV_CHANNEL_OPEN_STATE; if (channel->sc_creation_callback != NULL) { channel->sc_creation_callback(new_channel); } return; } hv_vmbus_free_vmbus_channel(new_channel); return; } new_channel->state = HV_CHANNEL_OPEN_STATE; /* * Start the process of binding this offer to the driver * (We need to set the device field before calling * hv_vmbus_child_device_add()) */ new_channel->device = hv_vmbus_child_device_create( new_channel->offer_msg.offer.interface_type, new_channel->offer_msg.offer.interface_instance, new_channel); /* * Add the new device to the bus. This will kick off device-driver * binding which eventually invokes the device driver's AddDevice() * method. */ ret = hv_vmbus_child_device_register(new_channel->device); if (ret != 0) { mtx_lock(&hv_vmbus_g_connection.channel_lock); TAILQ_REMOVE( &hv_vmbus_g_connection.channel_anchor, new_channel, list_entry); mtx_unlock(&hv_vmbus_g_connection.channel_lock); hv_vmbus_free_vmbus_channel(new_channel); } } /** * Array of device guids that are performance critical. We try to distribute * the interrupt load for these devices across all online cpus. */ static const hv_guid high_perf_devices[] = { {HV_NIC_GUID, }, {HV_IDE_GUID, }, {HV_SCSI_GUID, }, }; enum { PERF_CHN_NIC = 0, PERF_CHN_IDE, PERF_CHN_SCSI, MAX_PERF_CHN, }; /* * We use this static number to distribute the channel interrupt load. */ static uint32_t next_vcpu; /** * Starting with Win8, we can statically distribute the incoming * channel interrupt load by binding a channel to VCPU. We * implement here a simple round robin scheme for distributing * the interrupt load. * We will bind channels that are not performance critical to cpu 0 and * performance critical channels (IDE, SCSI and Network) will be uniformly * distributed across all available CPUs. */ static void vmbus_channel_select_cpu(hv_vmbus_channel *channel, hv_guid *guid) { uint32_t current_cpu; int i; boolean_t is_perf_channel = FALSE; for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) { if (memcmp(guid->data, high_perf_devices[i].data, sizeof(hv_guid)) == 0) { is_perf_channel = TRUE; break; } } if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) || (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) || (!is_perf_channel)) { /* Host's view of guest cpu */ channel->target_vcpu = 0; /* Guest's own view of cpu */ channel->target_cpu = 0; return; } /* mp_ncpus should have the number cpus currently online */ current_cpu = (++next_vcpu % mp_ncpus); channel->target_cpu = current_cpu; channel->target_vcpu = hv_vmbus_g_context.hv_vcpu_index[current_cpu]; if (bootverbose) printf("VMBUS: Total online cpus %d, assign perf channel %d " "to vcpu %d, cpu %d\n", mp_ncpus, i, channel->target_vcpu, current_cpu); } /** * @brief Handler for channel offers from Hyper-V/Azure * * Handler for channel offers from vmbus in parent partition. We ignore * all offers except network and storage offers. For each network and storage * offers, we create a channel object and queue a work item to the channel * object to process the offer synchronously */ static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_offer_channel* offer; hv_vmbus_channel_offer_channel* copied; offer = (hv_vmbus_channel_offer_channel*) hdr; hv_guid *guidType; hv_guid *guidInstance; guidType = &offer->offer.interface_type; guidInstance = &offer->offer.interface_instance; // copy offer data copied = malloc(sizeof(*copied), M_DEVBUF, M_NOWAIT); if (copied == NULL) { printf("fail to allocate memory\n"); return; } memcpy(copied, hdr, sizeof(*copied)); hv_queue_work_item(vmbus_channel_on_offer_internal, copied); } static void vmbus_channel_on_offer_internal(void* context) { hv_vmbus_channel* new_channel; hv_vmbus_channel_offer_channel* offer = (hv_vmbus_channel_offer_channel*)context; /* Allocate the channel object and save this offer */ new_channel = hv_vmbus_allocate_channel(); /* * By default we setup state to enable batched * reading. A specific service can choose to * disable this prior to opening the channel. */ new_channel->batched_reading = TRUE; new_channel->signal_event_param = (hv_vmbus_input_signal_event *) (HV_ALIGN_UP((unsigned long) &new_channel->signal_event_buffer, HV_HYPERCALL_PARAM_ALIGN)); new_channel->signal_event_param->connection_id.as_uint32_t = 0; new_channel->signal_event_param->connection_id.u.id = HV_VMBUS_EVENT_CONNECTION_ID; new_channel->signal_event_param->flag_number = 0; new_channel->signal_event_param->rsvd_z = 0; if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) { new_channel->is_dedicated_interrupt = (offer->is_dedicated_interrupt != 0); new_channel->signal_event_param->connection_id.u.id = offer->connection_id; } /* * Bind the channel to a chosen cpu. */ vmbus_channel_select_cpu(new_channel, &offer->offer.interface_type); memcpy(&new_channel->offer_msg, offer, sizeof(hv_vmbus_channel_offer_channel)); new_channel->monitor_group = (uint8_t) offer->monitor_id / 32; new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32; vmbus_channel_process_offer(new_channel); free(offer, M_DEVBUF); } /** * @brief Rescind offer handler. * * We queue a work item to process this offer * synchronously */ static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_rescind_offer* rescind; hv_vmbus_channel* channel; rescind = (hv_vmbus_channel_rescind_offer*) hdr; channel = hv_vmbus_g_connection.channels[rescind->child_rel_id]; if (channel == NULL) return; hv_queue_work_item(vmbus_channel_on_offer_rescind_internal, channel); hv_vmbus_g_connection.channels[rescind->child_rel_id] = NULL; } static void vmbus_channel_on_offer_rescind_internal(void *context) { hv_vmbus_channel* channel; channel = (hv_vmbus_channel*)context; hv_vmbus_child_device_unregister(channel->device); } /** * * @brief Invoked when all offers have been delivered. */ static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr) { } /** * @brief Open result handler. * * This is invoked when we received a response * to our channel open request. Find the matching request, copy the * response and signal the requesting thread. */ static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_open_result* result; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* requestHeader; hv_vmbus_channel_open_channel* openMsg; result = (hv_vmbus_channel_open_result*) hdr; /* * Find the open msg, copy the result and signal/unblock the wait event */ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, msg_list_entry) { requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg; if (requestHeader->message_type == HV_CHANNEL_MESSAGE_OPEN_CHANNEL) { openMsg = (hv_vmbus_channel_open_channel*) msg_info->msg; if (openMsg->child_rel_id == result->child_rel_id && openMsg->open_id == result->open_id) { memcpy(&msg_info->response.open_result, result, sizeof(hv_vmbus_channel_open_result)); sema_post(&msg_info->wait_sema); break; } } } mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); } /** * @brief GPADL created handler. * * This is invoked when we received a response * to our gpadl create request. Find the matching request, copy the * response and signal the requesting thread. */ static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_gpadl_created* gpadl_created; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* request_header; hv_vmbus_channel_gpadl_header* gpadl_header; gpadl_created = (hv_vmbus_channel_gpadl_created*) hdr; /* Find the establish msg, copy the result and signal/unblock * the wait event */ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, msg_list_entry) { request_header = (hv_vmbus_channel_msg_header*) msg_info->msg; if (request_header->message_type == HV_CHANNEL_MESSAGEL_GPADL_HEADER) { gpadl_header = (hv_vmbus_channel_gpadl_header*) request_header; if ((gpadl_created->child_rel_id == gpadl_header->child_rel_id) && (gpadl_created->gpadl == gpadl_header->gpadl)) { memcpy(&msg_info->response.gpadl_created, gpadl_created, sizeof(hv_vmbus_channel_gpadl_created)); sema_post(&msg_info->wait_sema); break; } } } mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); } /** * @brief GPADL torndown handler. * * This is invoked when we received a respons * to our gpadl teardown request. Find the matching request, copy the * response and signal the requesting thread */ static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_gpadl_torndown* gpadl_torndown; hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* requestHeader; hv_vmbus_channel_gpadl_teardown* gpadlTeardown; gpadl_torndown = (hv_vmbus_channel_gpadl_torndown*)hdr; /* * Find the open msg, copy the result and signal/unblock the * wait event. */ mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, msg_list_entry) { requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg; if (requestHeader->message_type == HV_CHANNEL_MESSAGE_GPADL_TEARDOWN) { gpadlTeardown = (hv_vmbus_channel_gpadl_teardown*) requestHeader; if (gpadl_torndown->gpadl == gpadlTeardown->gpadl) { memcpy(&msg_info->response.gpadl_torndown, gpadl_torndown, sizeof(hv_vmbus_channel_gpadl_torndown)); sema_post(&msg_info->wait_sema); break; } } } mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); } /** * @brief Version response handler. * * This is invoked when we received a response * to our initiate contact request. Find the matching request, copy th * response and signal the requesting thread. */ static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr) { hv_vmbus_channel_msg_info* msg_info; hv_vmbus_channel_msg_header* requestHeader; hv_vmbus_channel_initiate_contact* initiate; hv_vmbus_channel_version_response* versionResponse; versionResponse = (hv_vmbus_channel_version_response*)hdr; mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock); TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor, msg_list_entry) { requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg; if (requestHeader->message_type == HV_CHANNEL_MESSAGE_INITIATED_CONTACT) { initiate = (hv_vmbus_channel_initiate_contact*) requestHeader; memcpy(&msg_info->response.version_response, versionResponse, sizeof(hv_vmbus_channel_version_response)); sema_post(&msg_info->wait_sema); } } mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock); } /** * @brief Send a request to get all our pending offers. */ int hv_vmbus_request_channel_offers(void) { int ret; hv_vmbus_channel_msg_header* msg; hv_vmbus_channel_msg_info* msg_info; msg_info = (hv_vmbus_channel_msg_info *) malloc(sizeof(hv_vmbus_channel_msg_info) + sizeof(hv_vmbus_channel_msg_header), M_DEVBUF, M_NOWAIT); if (msg_info == NULL) { if(bootverbose) printf("Error VMBUS: malloc failed for Request Offers\n"); return (ENOMEM); } msg = (hv_vmbus_channel_msg_header*) msg_info->msg; msg->message_type = HV_CHANNEL_MESSAGE_REQUEST_OFFERS; ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_msg_header)); free(msg_info, M_DEVBUF); return (ret); } /** * @brief Release channels that are unattached/unconnected (i.e., no drivers associated) */ void hv_vmbus_release_unattached_channels(void) { hv_vmbus_channel *channel; mtx_lock(&hv_vmbus_g_connection.channel_lock); while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) { channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor); TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor, channel, list_entry); hv_vmbus_child_device_unregister(channel->device); hv_vmbus_free_vmbus_channel(channel); } bzero(hv_vmbus_g_connection.channels, sizeof(hv_vmbus_channel*) * HV_CHANNEL_MAX_COUNT); mtx_unlock(&hv_vmbus_g_connection.channel_lock); } /** * @brief Select the best outgoing channel * * The channel whose vcpu binding is closest to the currect vcpu will * be selected. * If no multi-channel, always select primary channel * * @param primary - primary channel */ struct hv_vmbus_channel * vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary) { hv_vmbus_channel *new_channel = NULL; hv_vmbus_channel *outgoing_channel = primary; int old_cpu_distance = 0; int new_cpu_distance = 0; int cur_vcpu = 0; int smp_pro_id = PCPU_GET(cpuid); if (TAILQ_EMPTY(&primary->sc_list_anchor)) { return outgoing_channel; } if (smp_pro_id >= MAXCPU) { return outgoing_channel; } cur_vcpu = hv_vmbus_g_context.hv_vcpu_index[smp_pro_id]; TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) { if (new_channel->state != HV_CHANNEL_OPENED_STATE){ continue; } if (new_channel->target_vcpu == cur_vcpu){ return new_channel; } old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ? (outgoing_channel->target_vcpu - cur_vcpu) : (cur_vcpu - outgoing_channel->target_vcpu)); new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ? (new_channel->target_vcpu - cur_vcpu) : (cur_vcpu - new_channel->target_vcpu)); if (old_cpu_distance < new_cpu_distance) { continue; } outgoing_channel = new_channel; } return(outgoing_channel); }