Index: head/sys/dev/hyperv/include/hyperv.h =================================================================== --- head/sys/dev/hyperv/include/hyperv.h (revision 296295) +++ head/sys/dev/hyperv/include/hyperv.h (revision 296296) @@ -1,925 +1,926 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /** * HyperV definitions for messages that are sent between instances of the * Channel Management Library in separate partitions, or in some cases, * back to itself. */ #ifndef __HYPERV_H__ #define __HYPERV_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef uint8_t hv_bool_uint8_t; #define HV_S_OK 0x00000000 #define HV_E_FAIL 0x80004005 #define HV_ERROR_NOT_SUPPORTED 0x80070032 #define HV_ERROR_MACHINE_LOCKED 0x800704F7 /* * VMBUS version is 32 bit, upper 16 bit for major_number and lower * 16 bit for minor_number. * * 0.13 -- Windows Server 2008 * 1.1 -- Windows 7 * 2.4 -- Windows 8 * 3.0 -- Windows 8.1 */ #define HV_VMBUS_VERSION_WS2008 ((0 << 16) | (13)) #define HV_VMBUS_VERSION_WIN7 ((1 << 16) | (1)) #define HV_VMBUS_VERSION_WIN8 ((2 << 16) | (4)) #define HV_VMBUS_VERSION_WIN8_1 ((3 << 16) | (0)) #define HV_VMBUS_VERSION_INVALID -1 #define HV_VMBUS_VERSION_CURRENT HV_VMBUS_VERSION_WIN8_1 /* * Make maximum size of pipe payload of 16K */ #define HV_MAX_PIPE_DATA_PAYLOAD (sizeof(BYTE) * 16384) /* * Define pipe_mode values */ #define HV_VMBUS_PIPE_TYPE_BYTE 0x00000000 #define HV_VMBUS_PIPE_TYPE_MESSAGE 0x00000004 /* * The size of the user defined data buffer for non-pipe offers */ #define HV_MAX_USER_DEFINED_BYTES 120 /* * The size of the user defined data buffer for pipe offers */ #define HV_MAX_PIPE_USER_DEFINED_BYTES 116 #define HV_MAX_PAGE_BUFFER_COUNT 32 #define HV_MAX_MULTIPAGE_BUFFER_COUNT 32 #define HV_ALIGN_UP(value, align) \ (((value) & (align-1)) ? \ (((value) + (align-1)) & ~(align-1) ) : (value)) #define HV_ALIGN_DOWN(value, align) ( (value) & ~(align-1) ) #define HV_NUM_PAGES_SPANNED(addr, len) \ ((HV_ALIGN_UP(addr+len, PAGE_SIZE) - \ HV_ALIGN_DOWN(addr, PAGE_SIZE)) >> PAGE_SHIFT ) typedef struct hv_guid { unsigned char data[16]; } __packed hv_guid; #define HV_NIC_GUID \ .data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, \ 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} #define HV_IDE_GUID \ .data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, \ 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5} #define HV_SCSI_GUID \ .data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, \ 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f} /* * At the center of the Channel Management library is * the Channel Offer. This struct contains the * fundamental information about an offer. */ typedef struct hv_vmbus_channel_offer { hv_guid interface_type; hv_guid interface_instance; uint64_t interrupt_latency_in_100ns_units; uint32_t interface_revision; uint32_t server_context_area_size; /* in bytes */ uint16_t channel_flags; uint16_t mmio_megabytes; /* in bytes * 1024 * 1024 */ union { /* * Non-pipes: The user has HV_MAX_USER_DEFINED_BYTES bytes. */ struct { uint8_t user_defined[HV_MAX_USER_DEFINED_BYTES]; } __packed standard; /* * Pipes: The following structure is an integrated pipe protocol, which * is implemented on top of standard user-defined data. pipe * clients have HV_MAX_PIPE_USER_DEFINED_BYTES left for their * own use. */ struct { uint32_t pipe_mode; uint8_t user_defined[HV_MAX_PIPE_USER_DEFINED_BYTES]; } __packed pipe; } u; /* * Sub_channel_index, newly added in Win8. */ uint16_t sub_channel_index; uint16_t padding; } __packed hv_vmbus_channel_offer; typedef uint32_t hv_gpadl_handle; typedef struct { uint16_t type; uint16_t data_offset8; uint16_t length8; uint16_t flags; uint64_t transaction_id; } __packed hv_vm_packet_descriptor; typedef uint32_t hv_previous_packet_offset; typedef struct { hv_previous_packet_offset previous_packet_start_offset; hv_vm_packet_descriptor descriptor; } __packed hv_vm_packet_header; typedef struct { uint32_t byte_count; uint32_t byte_offset; } __packed hv_vm_transfer_page; typedef struct { hv_vm_packet_descriptor d; uint16_t transfer_page_set_id; hv_bool_uint8_t sender_owns_set; uint8_t reserved; uint32_t range_count; hv_vm_transfer_page ranges[1]; } __packed hv_vm_transfer_page_packet_header; typedef struct { hv_vm_packet_descriptor d; uint32_t gpadl; uint32_t reserved; } __packed hv_vm_gpadl_packet_header; typedef struct { hv_vm_packet_descriptor d; uint32_t gpadl; uint16_t transfer_page_set_id; uint16_t reserved; } __packed hv_vm_add_remove_transfer_page_set; /* * This structure defines a range in guest * physical space that can be made * to look virtually contiguous. */ typedef struct { uint32_t byte_count; uint32_t byte_offset; uint64_t pfn_array[0]; } __packed hv_gpa_range; /* * This is the format for an Establish Gpadl packet, which contains a handle * by which this GPADL will be known and a set of GPA ranges associated with * it. This can be converted to a MDL by the guest OS. If there are multiple * GPA ranges, then the resulting MDL will be "chained," representing multiple * VA ranges. */ typedef struct { hv_vm_packet_descriptor d; uint32_t gpadl; uint32_t range_count; hv_gpa_range range[1]; } __packed hv_vm_establish_gpadl; /* * This is the format for a Teardown Gpadl packet, which indicates that the * GPADL handle in the Establish Gpadl packet will never be referenced again. */ typedef struct { hv_vm_packet_descriptor d; uint32_t gpadl; /* for alignment to a 8-byte boundary */ uint32_t reserved; } __packed hv_vm_teardown_gpadl; /* * This is the format for a GPA-Direct packet, which contains a set of GPA * ranges, in addition to commands and/or data. */ typedef struct { hv_vm_packet_descriptor d; uint32_t reserved; uint32_t range_count; hv_gpa_range range[1]; } __packed hv_vm_data_gpa_direct; /* * This is the format for a Additional data Packet. */ typedef struct { hv_vm_packet_descriptor d; uint64_t total_bytes; uint32_t byte_offset; uint32_t byte_count; uint8_t data[1]; } __packed hv_vm_additional_data; typedef union { hv_vm_packet_descriptor simple_header; hv_vm_transfer_page_packet_header transfer_page_header; hv_vm_gpadl_packet_header gpadl_header; hv_vm_add_remove_transfer_page_set add_remove_transfer_page_header; hv_vm_establish_gpadl establish_gpadl_header; hv_vm_teardown_gpadl teardown_gpadl_header; hv_vm_data_gpa_direct data_gpa_direct_header; } __packed hv_vm_packet_largest_possible_header; typedef enum { HV_VMBUS_PACKET_TYPE_INVALID = 0x0, HV_VMBUS_PACKET_TYPES_SYNCH = 0x1, HV_VMBUS_PACKET_TYPE_ADD_TRANSFER_PAGE_SET = 0x2, HV_VMBUS_PACKET_TYPE_REMOVE_TRANSFER_PAGE_SET = 0x3, HV_VMBUS_PACKET_TYPE_ESTABLISH_GPADL = 0x4, HV_VMBUS_PACKET_TYPE_TEAR_DOWN_GPADL = 0x5, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND = 0x6, HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES = 0x7, HV_VMBUS_PACKET_TYPE_DATA_USING_GPADL = 0x8, HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT = 0x9, HV_VMBUS_PACKET_TYPE_CANCEL_REQUEST = 0xa, HV_VMBUS_PACKET_TYPE_COMPLETION = 0xb, HV_VMBUS_PACKET_TYPE_DATA_USING_ADDITIONAL_PACKETS = 0xc, HV_VMBUS_PACKET_TYPE_ADDITIONAL_DATA = 0xd } hv_vmbus_packet_type; #define HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED 1 /* * Version 1 messages */ typedef enum { HV_CHANNEL_MESSAGE_INVALID = 0, HV_CHANNEL_MESSAGE_OFFER_CHANNEL = 1, HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER = 2, HV_CHANNEL_MESSAGE_REQUEST_OFFERS = 3, HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED = 4, HV_CHANNEL_MESSAGE_OPEN_CHANNEL = 5, HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT = 6, HV_CHANNEL_MESSAGE_CLOSE_CHANNEL = 7, HV_CHANNEL_MESSAGEL_GPADL_HEADER = 8, HV_CHANNEL_MESSAGE_GPADL_BODY = 9, HV_CHANNEL_MESSAGE_GPADL_CREATED = 10, HV_CHANNEL_MESSAGE_GPADL_TEARDOWN = 11, HV_CHANNEL_MESSAGE_GPADL_TORNDOWN = 12, HV_CHANNEL_MESSAGE_REL_ID_RELEASED = 13, HV_CHANNEL_MESSAGE_INITIATED_CONTACT = 14, HV_CHANNEL_MESSAGE_VERSION_RESPONSE = 15, HV_CHANNEL_MESSAGE_UNLOAD = 16, HV_CHANNEL_MESSAGE_COUNT } hv_vmbus_channel_msg_type; typedef struct { hv_vmbus_channel_msg_type message_type; uint32_t padding; } __packed hv_vmbus_channel_msg_header; /* * Query VMBus Version parameters */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t version; } __packed hv_vmbus_channel_query_vmbus_version; /* * VMBus Version Supported parameters */ typedef struct { hv_vmbus_channel_msg_header header; hv_bool_uint8_t version_supported; } __packed hv_vmbus_channel_version_supported; /* * Channel Offer parameters */ typedef struct { hv_vmbus_channel_msg_header header; hv_vmbus_channel_offer offer; uint32_t child_rel_id; uint8_t monitor_id; /* * This field has been split into a bit field on Win7 * and higher. */ uint8_t monitor_allocated:1; uint8_t reserved:7; /* * Following fields were added in win7 and higher. * Make sure to check the version before accessing these fields. * * If "is_dedicated_interrupt" is set, we must not set the * associated bit in the channel bitmap while sending the * interrupt to the host. * * connection_id is used in signaling the host. */ uint16_t is_dedicated_interrupt:1; uint16_t reserved1:15; uint32_t connection_id; } __packed hv_vmbus_channel_offer_channel; /* * Rescind Offer parameters */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; } __packed hv_vmbus_channel_rescind_offer; /* * Request Offer -- no parameters, SynIC message contains the partition ID * * Set Snoop -- no parameters, SynIC message contains the partition ID * * Clear Snoop -- no parameters, SynIC message contains the partition ID * * All Offers Delivered -- no parameters, SynIC message contains the * partition ID * * Flush Client -- no parameters, SynIC message contains the partition ID */ /* * Open Channel parameters */ typedef struct { hv_vmbus_channel_msg_header header; /* * Identifies the specific VMBus channel that is being opened. */ uint32_t child_rel_id; /* * ID making a particular open request at a channel offer unique. */ uint32_t open_id; /* * GPADL for the channel's ring buffer. */ hv_gpadl_handle ring_buffer_gpadl_handle; /* * Before win8, all incoming channel interrupts are only * delivered on cpu 0. Setting this value to 0 would * preserve the earlier behavior. */ uint32_t target_vcpu; /* * The upstream ring buffer begins at offset zero in the memory described * by ring_buffer_gpadl_handle. The downstream ring buffer follows it at * this offset (in pages). */ uint32_t downstream_ring_buffer_page_offset; /* * User-specific data to be passed along to the server endpoint. */ uint8_t user_data[HV_MAX_USER_DEFINED_BYTES]; } __packed hv_vmbus_channel_open_channel; typedef uint32_t hv_nt_status; /* * Open Channel Result parameters */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; uint32_t open_id; hv_nt_status status; } __packed hv_vmbus_channel_open_result; /* * Close channel parameters */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; } __packed hv_vmbus_channel_close_channel; /* * Channel Message GPADL */ #define HV_GPADL_TYPE_RING_BUFFER 1 #define HV_GPADL_TYPE_SERVER_SAVE_AREA 2 #define HV_GPADL_TYPE_TRANSACTION 8 /* * The number of PFNs in a GPADL message is defined by the number of pages * that would be spanned by byte_count and byte_offset. If the implied number * of PFNs won't fit in this packet, there will be a follow-up packet that * contains more */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; uint32_t gpadl; uint16_t range_buf_len; uint16_t range_count; hv_gpa_range range[0]; } __packed hv_vmbus_channel_gpadl_header; /* * This is the follow-up packet that contains more PFNs */ typedef struct { hv_vmbus_channel_msg_header header; uint32_t message_number; uint32_t gpadl; uint64_t pfn[0]; } __packed hv_vmbus_channel_gpadl_body; typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; uint32_t gpadl; uint32_t creation_status; } __packed hv_vmbus_channel_gpadl_created; typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; uint32_t gpadl; } __packed hv_vmbus_channel_gpadl_teardown; typedef struct { hv_vmbus_channel_msg_header header; uint32_t gpadl; } __packed hv_vmbus_channel_gpadl_torndown; typedef struct { hv_vmbus_channel_msg_header header; uint32_t child_rel_id; } __packed hv_vmbus_channel_relid_released; typedef struct { hv_vmbus_channel_msg_header header; uint32_t vmbus_version_requested; uint32_t padding2; uint64_t interrupt_page; uint64_t monitor_page_1; uint64_t monitor_page_2; } __packed hv_vmbus_channel_initiate_contact; typedef struct { hv_vmbus_channel_msg_header header; hv_bool_uint8_t version_supported; } __packed hv_vmbus_channel_version_response; typedef hv_vmbus_channel_msg_header hv_vmbus_channel_unload; #define HW_MACADDR_LEN 6 /* * Fixme: Added to quiet "typeof" errors involving hv_vmbus.h when * the including C file was compiled with "-std=c99". */ #ifndef typeof #define typeof __typeof #endif #ifndef NULL #define NULL (void *)0 #endif typedef void *hv_vmbus_handle; #ifndef CONTAINING_RECORD #define CONTAINING_RECORD(address, type, field) ((type *)( \ (uint8_t *)(address) - \ (uint8_t *)(&((type *)0)->field))) #endif /* CONTAINING_RECORD */ #define container_of(ptr, type, member) ({ \ __typeof__( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - offsetof(type,member) );}) enum { HV_VMBUS_IVAR_TYPE, HV_VMBUS_IVAR_INSTANCE, HV_VMBUS_IVAR_NODE, HV_VMBUS_IVAR_DEVCTX }; #define HV_VMBUS_ACCESSOR(var, ivar, type) \ __BUS_ACCESSOR(vmbus, var, HV_VMBUS, ivar, type) HV_VMBUS_ACCESSOR(type, TYPE, const char *) HV_VMBUS_ACCESSOR(devctx, DEVCTX, struct hv_device *) /* * Common defines for Hyper-V ICs */ #define HV_ICMSGTYPE_NEGOTIATE 0 #define HV_ICMSGTYPE_HEARTBEAT 1 #define HV_ICMSGTYPE_KVPEXCHANGE 2 #define HV_ICMSGTYPE_SHUTDOWN 3 #define HV_ICMSGTYPE_TIMESYNC 4 #define HV_ICMSGTYPE_VSS 5 #define HV_ICMSGHDRFLAG_TRANSACTION 1 #define HV_ICMSGHDRFLAG_REQUEST 2 #define HV_ICMSGHDRFLAG_RESPONSE 4 typedef struct hv_vmbus_pipe_hdr { uint32_t flags; uint32_t msgsize; } __packed hv_vmbus_pipe_hdr; typedef struct hv_vmbus_ic_version { uint16_t major; uint16_t minor; } __packed hv_vmbus_ic_version; typedef struct hv_vmbus_icmsg_hdr { hv_vmbus_ic_version icverframe; uint16_t icmsgtype; hv_vmbus_ic_version icvermsg; uint16_t icmsgsize; uint32_t status; uint8_t ictransaction_id; uint8_t icflags; uint8_t reserved[2]; } __packed hv_vmbus_icmsg_hdr; typedef struct hv_vmbus_icmsg_negotiate { uint16_t icframe_vercnt; uint16_t icmsg_vercnt; uint32_t reserved; hv_vmbus_ic_version icversion_data[1]; /* any size array */ } __packed hv_vmbus_icmsg_negotiate; typedef struct hv_vmbus_shutdown_msg_data { uint32_t reason_code; uint32_t timeout_seconds; uint32_t flags; uint8_t display_message[2048]; } __packed hv_vmbus_shutdown_msg_data; typedef struct hv_vmbus_heartbeat_msg_data { uint64_t seq_num; uint32_t reserved[8]; } __packed hv_vmbus_heartbeat_msg_data; typedef struct { /* * offset in bytes from the start of ring data below */ volatile uint32_t write_index; /* * offset in bytes from the start of ring data below */ volatile uint32_t read_index; /* * NOTE: The interrupt_mask field is used only for channels, but * vmbus connection also uses this data structure */ volatile uint32_t interrupt_mask; /* pad it to PAGE_SIZE so that data starts on a page */ uint8_t reserved[4084]; /* * WARNING: Ring data starts here + ring_data_start_offset * !!! DO NOT place any fields below this !!! */ uint8_t buffer[0]; /* doubles as interrupt mask */ } __packed hv_vmbus_ring_buffer; typedef struct { int length; int offset; uint64_t pfn; } __packed hv_vmbus_page_buffer; typedef struct { int length; int offset; uint64_t pfn_array[HV_MAX_MULTIPAGE_BUFFER_COUNT]; } __packed hv_vmbus_multipage_buffer; typedef struct { hv_vmbus_ring_buffer* ring_buffer; uint32_t ring_size; /* Include the shared header */ struct mtx ring_lock; uint32_t ring_data_size; /* ring_size */ uint32_t ring_data_start_offset; } hv_vmbus_ring_buffer_info; typedef void (*hv_vmbus_pfn_channel_callback)(void *context); typedef void (*hv_vmbus_sc_creation_callback)(void *context); typedef enum { HV_CHANNEL_OFFER_STATE, HV_CHANNEL_OPENING_STATE, HV_CHANNEL_OPEN_STATE, HV_CHANNEL_OPENED_STATE, HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE, } hv_vmbus_channel_state; /* * Connection identifier type */ typedef union { uint32_t as_uint32_t; struct { uint32_t id:24; uint32_t reserved:8; } u; } __packed hv_vmbus_connection_id; /* * Definition of the hv_vmbus_signal_event hypercall input structure */ typedef struct { hv_vmbus_connection_id connection_id; uint16_t flag_number; uint16_t rsvd_z; } __packed hv_vmbus_input_signal_event; typedef struct { uint64_t align8; hv_vmbus_input_signal_event event; } __packed hv_vmbus_input_signal_event_buffer; typedef struct hv_vmbus_channel { TAILQ_ENTRY(hv_vmbus_channel) list_entry; struct hv_device* device; hv_vmbus_channel_state state; hv_vmbus_channel_offer_channel offer_msg; /* * These are based on the offer_msg.monitor_id. * Save it here for easy access. */ uint8_t monitor_group; uint8_t monitor_bit; uint32_t ring_buffer_gpadl_handle; /* * Allocated memory for ring buffer */ void* ring_buffer_pages; unsigned long ring_buffer_size; uint32_t ring_buffer_page_count; /* * send to parent */ hv_vmbus_ring_buffer_info outbound; /* * receive from parent */ hv_vmbus_ring_buffer_info inbound; struct taskqueue * rxq; struct task channel_task; hv_vmbus_pfn_channel_callback on_channel_callback; void* channel_callback_context; /* * If batched_reading is set to "true", mask the interrupt * and read until the channel is empty. * If batched_reading is set to "false", the channel is not * going to perform batched reading. * * Batched reading is enabled by default; specific * drivers that don't want this behavior can turn it off. */ boolean_t batched_reading; boolean_t is_dedicated_interrupt; /* * Used as an input param for HV_CALL_SIGNAL_EVENT hypercall. */ hv_vmbus_input_signal_event_buffer signal_event_buffer; /* * 8-bytes aligned of the buffer above */ hv_vmbus_input_signal_event *signal_event_param; /* * From Win8, this field specifies the target virtual process * on which to deliver the interupt from the host to guest. * Before Win8, all channel interrupts would only be * delivered on cpu 0. Setting this value to 0 would preserve * the earlier behavior. */ uint32_t target_vcpu; /* The corresponding CPUID in the guest */ uint32_t target_cpu; /* * Support for multi-channels. * The initial offer is considered the primary channel and this * offer message will indicate if the host supports multi-channels. * The guest is free to ask for multi-channels to be offerred and can * open these multi-channels as a normal "primary" channel. However, * all multi-channels will have the same type and instance guids as the * primary channel. Requests sent on a given channel will result in a * response on the same channel. */ /* * Multi-channel creation callback. This callback will be called in * process context when a Multi-channel offer is received from the host. * The guest can open the Multi-channel in the context of this callback. */ hv_vmbus_sc_creation_callback sc_creation_callback; struct mtx sc_lock; /* * Link list of all the multi-channels if this is a primary channel */ TAILQ_HEAD(, hv_vmbus_channel) sc_list_anchor; TAILQ_ENTRY(hv_vmbus_channel) sc_list_entry; /* * The primary channel this sub-channle belongs to. * This will be NULL for the primary channel. */ struct hv_vmbus_channel *primary_channel; /* * Driver private data */ void *hv_chan_priv1; void *hv_chan_priv2; + void *hv_chan_priv3; } hv_vmbus_channel; #define HV_VMBUS_CHAN_ISPRIMARY(chan) ((chan)->primary_channel == NULL) static inline void hv_set_channel_read_state(hv_vmbus_channel* channel, boolean_t state) { channel->batched_reading = state; } typedef struct hv_device { hv_guid class_id; hv_guid device_id; device_t device; hv_vmbus_channel* channel; } hv_device; int hv_vmbus_channel_recv_packet( hv_vmbus_channel* channel, void* buffer, uint32_t buffer_len, uint32_t* buffer_actual_len, uint64_t* request_id); int hv_vmbus_channel_recv_packet_raw( hv_vmbus_channel* channel, void* buffer, uint32_t buffer_len, uint32_t* buffer_actual_len, uint64_t* request_id); int hv_vmbus_channel_open( hv_vmbus_channel* channel, uint32_t send_ring_buffer_size, uint32_t recv_ring_buffer_size, void* user_data, uint32_t user_data_len, hv_vmbus_pfn_channel_callback pfn_on_channel_callback, void* context); void hv_vmbus_channel_close(hv_vmbus_channel *channel); int hv_vmbus_channel_send_packet( hv_vmbus_channel* channel, void* buffer, uint32_t buffer_len, uint64_t request_id, hv_vmbus_packet_type type, uint32_t flags); int hv_vmbus_channel_send_packet_pagebuffer( hv_vmbus_channel* channel, hv_vmbus_page_buffer page_buffers[], uint32_t page_count, void* buffer, uint32_t buffer_len, uint64_t request_id); int hv_vmbus_channel_send_packet_multipagebuffer( hv_vmbus_channel* channel, hv_vmbus_multipage_buffer* multi_page_buffer, void* buffer, uint32_t buffer_len, uint64_t request_id); int hv_vmbus_channel_establish_gpadl( hv_vmbus_channel* channel, /* must be phys and virt contiguous */ void* contig_buffer, /* page-size multiple */ uint32_t size, uint32_t* gpadl_handle); int hv_vmbus_channel_teardown_gpdal( hv_vmbus_channel* channel, uint32_t gpadl_handle); struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary); /** * @brief Get physical address from virtual */ static inline unsigned long hv_get_phys_addr(void *virt) { unsigned long ret; ret = (vtophys(virt) | ((vm_offset_t) virt & PAGE_MASK)); return (ret); } extern uint32_t hv_vmbus_protocal_version; #endif /* __HYPERV_H__ */ Index: head/sys/dev/hyperv/netvsc/hv_net_vsc.c =================================================================== --- head/sys/dev/hyperv/netvsc/hv_net_vsc.c (revision 296295) +++ head/sys/dev/hyperv/netvsc/hv_net_vsc.c (revision 296296) @@ -1,1026 +1,1035 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /** * HyperV vmbus network VSC (virtual services client) module * */ #include #include #include #include #include #include #include #include #include #include #include "hv_net_vsc.h" #include "hv_rndis.h" #include "hv_rndis_filter.h" +/* priv1 and priv1 are consumed by the main driver */ +#define hv_chan_rdbuf hv_chan_priv3 + MALLOC_DEFINE(M_NETVSC, "netvsc", "Hyper-V netvsc driver"); /* * Forward declarations */ static void hv_nv_on_channel_callback(void *context); static int hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device); static int hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device); static int hv_nv_destroy_send_buffer(netvsc_dev *net_dev); static int hv_nv_destroy_rx_buffer(netvsc_dev *net_dev); static int hv_nv_connect_to_vsp(struct hv_device *device); static void hv_nv_on_send_completion(netvsc_dev *net_dev, struct hv_device *device, hv_vm_packet_descriptor *pkt); static void hv_nv_on_receive_completion(struct hv_vmbus_channel *chan, uint64_t tid, uint32_t status); static void hv_nv_on_receive(netvsc_dev *net_dev, struct hv_device *device, struct hv_vmbus_channel *chan, hv_vm_packet_descriptor *pkt); /* * */ static inline netvsc_dev * hv_nv_alloc_net_device(struct hv_device *device) { netvsc_dev *net_dev; hn_softc_t *sc = device_get_softc(device->device); net_dev = malloc(sizeof(netvsc_dev), M_NETVSC, M_WAITOK | M_ZERO); net_dev->dev = device; net_dev->destroy = FALSE; sc->net_dev = net_dev; return (net_dev); } /* * */ static inline netvsc_dev * hv_nv_get_outbound_net_device(struct hv_device *device) { hn_softc_t *sc = device_get_softc(device->device); netvsc_dev *net_dev = sc->net_dev;; if ((net_dev != NULL) && net_dev->destroy) { return (NULL); } return (net_dev); } /* * */ static inline netvsc_dev * hv_nv_get_inbound_net_device(struct hv_device *device) { hn_softc_t *sc = device_get_softc(device->device); netvsc_dev *net_dev = sc->net_dev;; if (net_dev == NULL) { return (net_dev); } /* * When the device is being destroyed; we only * permit incoming packets if and only if there * are outstanding sends. */ if (net_dev->destroy) { return (NULL); } return (net_dev); } int hv_nv_get_next_send_section(netvsc_dev *net_dev) { unsigned long bitsmap_words = net_dev->bitsmap_words; unsigned long *bitsmap = net_dev->send_section_bitsmap; unsigned long idx; int ret = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; int i; for (i = 0; i < bitsmap_words; i++) { idx = ffsl(~bitsmap[i]); if (0 == idx) continue; idx--; KASSERT(i * BITS_PER_LONG + idx < net_dev->send_section_count, ("invalid i %d and idx %lu", i, idx)); if (atomic_testandset_long(&bitsmap[i], idx)) continue; ret = i * BITS_PER_LONG + idx; break; } return (ret); } /* * Net VSC initialize receive buffer with net VSP * * Net VSP: Network virtual services client, also known as the * Hyper-V extensible switch and the synthetic data path. */ static int hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device) { netvsc_dev *net_dev; nvsp_msg *init_pkt; int ret = 0; net_dev = hv_nv_get_outbound_net_device(device); if (!net_dev) { return (ENODEV); } net_dev->rx_buf = contigmalloc(net_dev->rx_buf_size, M_NETVSC, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); /* * Establish the GPADL handle for this buffer on this channel. * Note: This call uses the vmbus connection rather than the * channel to establish the gpadl handle. * GPADL: Guest physical address descriptor list. */ ret = hv_vmbus_channel_establish_gpadl( device->channel, net_dev->rx_buf, net_dev->rx_buf_size, &net_dev->rx_buf_gpadl_handle); if (ret != 0) { goto cleanup; } /* sema_wait(&ext->channel_init_sema); KYS CHECK */ /* Notify the NetVsp of the gpadl handle */ init_pkt = &net_dev->channel_init_packet; memset(init_pkt, 0, sizeof(nvsp_msg)); init_pkt->hdr.msg_type = nvsp_msg_1_type_send_rx_buf; init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle = net_dev->rx_buf_gpadl_handle; init_pkt->msgs.vers_1_msgs.send_rx_buf.id = NETVSC_RECEIVE_BUFFER_ID; /* Send the gpadl notification request */ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) { goto cleanup; } sema_wait(&net_dev->channel_init_sema); /* Check the response */ if (init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.status != nvsp_status_success) { ret = EINVAL; goto cleanup; } net_dev->rx_section_count = init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections; net_dev->rx_sections = malloc(net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section), M_NETVSC, M_WAITOK); memcpy(net_dev->rx_sections, init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections, net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section)); /* * For first release, there should only be 1 section that represents * the entire receive buffer */ if (net_dev->rx_section_count != 1 || net_dev->rx_sections->offset != 0) { ret = EINVAL; goto cleanup; } goto exit; cleanup: hv_nv_destroy_rx_buffer(net_dev); exit: return (ret); } /* * Net VSC initialize send buffer with net VSP */ static int hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device) { netvsc_dev *net_dev; nvsp_msg *init_pkt; int ret = 0; net_dev = hv_nv_get_outbound_net_device(device); if (!net_dev) { return (ENODEV); } net_dev->send_buf = contigmalloc(net_dev->send_buf_size, M_NETVSC, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); if (net_dev->send_buf == NULL) { ret = ENOMEM; goto cleanup; } /* * Establish the gpadl handle for this buffer on this channel. * Note: This call uses the vmbus connection rather than the * channel to establish the gpadl handle. */ ret = hv_vmbus_channel_establish_gpadl(device->channel, net_dev->send_buf, net_dev->send_buf_size, &net_dev->send_buf_gpadl_handle); if (ret != 0) { goto cleanup; } /* Notify the NetVsp of the gpadl handle */ init_pkt = &net_dev->channel_init_packet; memset(init_pkt, 0, sizeof(nvsp_msg)); init_pkt->hdr.msg_type = nvsp_msg_1_type_send_send_buf; init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle = net_dev->send_buf_gpadl_handle; init_pkt->msgs.vers_1_msgs.send_rx_buf.id = NETVSC_SEND_BUFFER_ID; /* Send the gpadl notification request */ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, sizeof(nvsp_msg), (uint64_t)init_pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) { goto cleanup; } sema_wait(&net_dev->channel_init_sema); /* Check the response */ if (init_pkt->msgs.vers_1_msgs.send_send_buf_complete.status != nvsp_status_success) { ret = EINVAL; goto cleanup; } net_dev->send_section_size = init_pkt->msgs.vers_1_msgs.send_send_buf_complete.section_size; net_dev->send_section_count = net_dev->send_buf_size / net_dev->send_section_size; net_dev->bitsmap_words = howmany(net_dev->send_section_count, BITS_PER_LONG); net_dev->send_section_bitsmap = malloc(net_dev->bitsmap_words * sizeof(long), M_NETVSC, M_WAITOK | M_ZERO); goto exit; cleanup: hv_nv_destroy_send_buffer(net_dev); exit: return (ret); } /* * Net VSC destroy receive buffer */ static int hv_nv_destroy_rx_buffer(netvsc_dev *net_dev) { nvsp_msg *revoke_pkt; int ret = 0; /* * If we got a section count, it means we received a * send_rx_buf_complete msg * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore, * we need to send a revoke msg here */ if (net_dev->rx_section_count) { /* Send the revoke receive buffer */ revoke_pkt = &net_dev->revoke_packet; memset(revoke_pkt, 0, sizeof(nvsp_msg)); revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_rx_buf; revoke_pkt->msgs.vers_1_msgs.revoke_rx_buf.id = NETVSC_RECEIVE_BUFFER_ID; ret = hv_vmbus_channel_send_packet(net_dev->dev->channel, revoke_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)revoke_pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); /* * If we failed here, we might as well return and have a leak * rather than continue and a bugchk */ if (ret != 0) { return (ret); } } /* Tear down the gpadl on the vsp end */ if (net_dev->rx_buf_gpadl_handle) { ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel, net_dev->rx_buf_gpadl_handle); /* * If we failed here, we might as well return and have a leak * rather than continue and a bugchk */ if (ret != 0) { return (ret); } net_dev->rx_buf_gpadl_handle = 0; } if (net_dev->rx_buf) { /* Free up the receive buffer */ contigfree(net_dev->rx_buf, net_dev->rx_buf_size, M_NETVSC); net_dev->rx_buf = NULL; } if (net_dev->rx_sections) { free(net_dev->rx_sections, M_NETVSC); net_dev->rx_sections = NULL; net_dev->rx_section_count = 0; } return (ret); } /* * Net VSC destroy send buffer */ static int hv_nv_destroy_send_buffer(netvsc_dev *net_dev) { nvsp_msg *revoke_pkt; int ret = 0; /* * If we got a section count, it means we received a * send_rx_buf_complete msg * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore, * we need to send a revoke msg here */ if (net_dev->send_section_size) { /* Send the revoke send buffer */ revoke_pkt = &net_dev->revoke_packet; memset(revoke_pkt, 0, sizeof(nvsp_msg)); revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_send_buf; revoke_pkt->msgs.vers_1_msgs.revoke_send_buf.id = NETVSC_SEND_BUFFER_ID; ret = hv_vmbus_channel_send_packet(net_dev->dev->channel, revoke_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)revoke_pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); /* * If we failed here, we might as well return and have a leak * rather than continue and a bugchk */ if (ret != 0) { return (ret); } } /* Tear down the gpadl on the vsp end */ if (net_dev->send_buf_gpadl_handle) { ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel, net_dev->send_buf_gpadl_handle); /* * If we failed here, we might as well return and have a leak * rather than continue and a bugchk */ if (ret != 0) { return (ret); } net_dev->send_buf_gpadl_handle = 0; } if (net_dev->send_buf) { /* Free up the receive buffer */ contigfree(net_dev->send_buf, net_dev->send_buf_size, M_NETVSC); net_dev->send_buf = NULL; } if (net_dev->send_section_bitsmap) { free(net_dev->send_section_bitsmap, M_NETVSC); } return (ret); } /* * Attempt to negotiate the caller-specified NVSP version * * For NVSP v2, Server 2008 R2 does not set * init_pkt->msgs.init_msgs.init_compl.negotiated_prot_vers * to the negotiated version, so we cannot rely on that. */ static int hv_nv_negotiate_nvsp_protocol(struct hv_device *device, netvsc_dev *net_dev, uint32_t nvsp_ver) { nvsp_msg *init_pkt; int ret; init_pkt = &net_dev->channel_init_packet; memset(init_pkt, 0, sizeof(nvsp_msg)); init_pkt->hdr.msg_type = nvsp_msg_type_init; /* * Specify parameter as the only acceptable protocol version */ init_pkt->msgs.init_msgs.init.p1.protocol_version = nvsp_ver; init_pkt->msgs.init_msgs.init.protocol_version_2 = nvsp_ver; /* Send the init request */ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) return (-1); sema_wait(&net_dev->channel_init_sema); if (init_pkt->msgs.init_msgs.init_compl.status != nvsp_status_success) return (EINVAL); return (0); } /* * Send NDIS version 2 config packet containing MTU. * * Not valid for NDIS version 1. */ static int hv_nv_send_ndis_config(struct hv_device *device, uint32_t mtu) { netvsc_dev *net_dev; nvsp_msg *init_pkt; int ret; net_dev = hv_nv_get_outbound_net_device(device); if (!net_dev) return (-ENODEV); /* * Set up configuration packet, write MTU * Indicate we are capable of handling VLAN tags */ init_pkt = &net_dev->channel_init_packet; memset(init_pkt, 0, sizeof(nvsp_msg)); init_pkt->hdr.msg_type = nvsp_msg_2_type_send_ndis_config; init_pkt->msgs.vers_2_msgs.send_ndis_config.mtu = mtu; init_pkt-> msgs.vers_2_msgs.send_ndis_config.capabilities.u1.u2.ieee8021q = 1; /* Send the configuration packet */ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); if (ret != 0) return (-EINVAL); return (0); } /* * Net VSC connect to VSP */ static int hv_nv_connect_to_vsp(struct hv_device *device) { netvsc_dev *net_dev; nvsp_msg *init_pkt; uint32_t ndis_version; uint32_t protocol_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 }; int i; int protocol_number = nitems(protocol_list); int ret = 0; device_t dev = device->device; hn_softc_t *sc = device_get_softc(dev); struct ifnet *ifp = sc->hn_ifp; net_dev = hv_nv_get_outbound_net_device(device); if (!net_dev) { return (ENODEV); } /* * Negotiate the NVSP version. Try the latest NVSP first. */ for (i = protocol_number - 1; i >= 0; i--) { if (hv_nv_negotiate_nvsp_protocol(device, net_dev, protocol_list[i]) == 0) { net_dev->nvsp_version = protocol_list[i]; if (bootverbose) device_printf(dev, "Netvsc: got version 0x%x\n", net_dev->nvsp_version); break; } } if (i < 0) { if (bootverbose) device_printf(dev, "failed to negotiate a valid " "protocol.\n"); return (EPROTO); } /* * Set the MTU if supported by this NVSP protocol version * This needs to be right after the NVSP init message per Haiyang */ if (net_dev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) ret = hv_nv_send_ndis_config(device, ifp->if_mtu); /* * Send the NDIS version */ init_pkt = &net_dev->channel_init_packet; memset(init_pkt, 0, sizeof(nvsp_msg)); if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_4) { ndis_version = NDIS_VERSION_6_1; } else { ndis_version = NDIS_VERSION_6_30; } init_pkt->hdr.msg_type = nvsp_msg_1_type_send_ndis_vers; init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_major_vers = (ndis_version & 0xFFFF0000) >> 16; init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_minor_vers = ndis_version & 0xFFFF; /* Send the init request */ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); if (ret != 0) { goto cleanup; } /* * TODO: BUGBUG - We have to wait for the above msg since the netvsp * uses KMCL which acknowledges packet (completion packet) * since our Vmbus always set the * HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED flag */ /* sema_wait(&NetVscChannel->channel_init_sema); */ /* Post the big receive buffer to NetVSP */ if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_2) net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; else net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE; ret = hv_nv_init_rx_buffer_with_net_vsp(device); if (ret == 0) ret = hv_nv_init_send_buffer_with_net_vsp(device); cleanup: return (ret); } /* * Net VSC disconnect from VSP */ static void hv_nv_disconnect_from_vsp(netvsc_dev *net_dev) { hv_nv_destroy_rx_buffer(net_dev); hv_nv_destroy_send_buffer(net_dev); } /* * Net VSC on device add * * Callback when the device belonging to this driver is added */ netvsc_dev * hv_nv_on_device_add(struct hv_device *device, void *additional_info) { + struct hv_vmbus_channel *chan = device->channel; netvsc_dev *net_dev; int ret = 0; net_dev = hv_nv_alloc_net_device(device); - if (!net_dev) - goto cleanup; + if (net_dev == NULL) + return NULL; /* Initialize the NetVSC channel extension */ sema_init(&net_dev->channel_init_sema, 0, "netdev_sema"); + chan->hv_chan_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK); + /* * Open the channel */ - ret = hv_vmbus_channel_open(device->channel, + ret = hv_vmbus_channel_open(chan, NETVSC_DEVICE_RING_BUFFER_SIZE, NETVSC_DEVICE_RING_BUFFER_SIZE, - NULL, 0, hv_nv_on_channel_callback, device->channel); - if (ret != 0) + NULL, 0, hv_nv_on_channel_callback, chan); + if (ret != 0) { + free(chan->hv_chan_rdbuf, M_NETVSC); goto cleanup; + } /* * Connect with the NetVsp */ ret = hv_nv_connect_to_vsp(device); if (ret != 0) goto close; return (net_dev); close: /* Now, we can close the channel safely */ + free(chan->hv_chan_rdbuf, M_NETVSC); + hv_vmbus_channel_close(chan); - hv_vmbus_channel_close(device->channel); - cleanup: /* * Free the packet buffers on the netvsc device packet queue. * Release other resources. */ if (net_dev) { sema_destroy(&net_dev->channel_init_sema); free(net_dev, M_NETVSC); } return (NULL); } /* * Net VSC on device remove */ int hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel) { hn_softc_t *sc = device_get_softc(device->device); netvsc_dev *net_dev = sc->net_dev;; /* Stop outbound traffic ie sends and receives completions */ net_dev->destroy = TRUE; hv_nv_disconnect_from_vsp(net_dev); /* At this point, no one should be accessing net_dev except in here */ /* Now, we can close the channel safely */ if (!destroy_channel) { device->channel->state = HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE; } + free(device->channel->hv_chan_rdbuf, M_NETVSC); hv_vmbus_channel_close(device->channel); sema_destroy(&net_dev->channel_init_sema); free(net_dev, M_NETVSC); return (0); } /* * Net VSC on send completion */ static void hv_nv_on_send_completion(netvsc_dev *net_dev, struct hv_device *device, hv_vm_packet_descriptor *pkt) { nvsp_msg *nvsp_msg_pkt; netvsc_packet *net_vsc_pkt; nvsp_msg_pkt = (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3)); if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_type_init_complete || nvsp_msg_pkt->hdr.msg_type == nvsp_msg_1_type_send_rx_buf_complete || nvsp_msg_pkt->hdr.msg_type == nvsp_msg_1_type_send_send_buf_complete) { /* Copy the response back */ memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt, sizeof(nvsp_msg)); sema_post(&net_dev->channel_init_sema); } else if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_1_type_send_rndis_pkt_complete) { /* Get the send context */ net_vsc_pkt = (netvsc_packet *)(unsigned long)pkt->transaction_id; if (NULL != net_vsc_pkt) { if (net_vsc_pkt->send_buf_section_idx != NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { u_long mask; int idx; idx = net_vsc_pkt->send_buf_section_idx / BITS_PER_LONG; KASSERT(idx < net_dev->bitsmap_words, ("invalid section index %u", net_vsc_pkt->send_buf_section_idx)); mask = 1UL << (net_vsc_pkt->send_buf_section_idx % BITS_PER_LONG); KASSERT(net_dev->send_section_bitsmap[idx] & mask, ("index bitmap 0x%lx, section index %u, " "bitmap idx %d, bitmask 0x%lx", net_dev->send_section_bitsmap[idx], net_vsc_pkt->send_buf_section_idx, idx, mask)); atomic_clear_long( &net_dev->send_section_bitsmap[idx], mask); } /* Notify the layer above us */ net_vsc_pkt->compl.send.on_send_completion( net_vsc_pkt->compl.send.send_completion_context); } } } /* * Net VSC on send * Sends a packet on the specified Hyper-V device. * Returns 0 on success, non-zero on failure. */ int hv_nv_on_send(struct hv_vmbus_channel *chan, netvsc_packet *pkt) { nvsp_msg send_msg; int ret; send_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt; if (pkt->is_data_pkt) { /* 0 is RMC_DATA */ send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 0; } else { /* 1 is RMC_CONTROL */ send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 1; } send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_idx = pkt->send_buf_section_idx; send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size = pkt->send_buf_section_size; if (pkt->page_buf_count) { ret = hv_vmbus_channel_send_packet_pagebuffer(chan, pkt->page_buffers, pkt->page_buf_count, &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt); } else { ret = hv_vmbus_channel_send_packet(chan, &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); } return (ret); } /* * Net VSC on receive * * In the FreeBSD Hyper-V virtual world, this function deals exclusively * with virtual addresses. */ static void hv_nv_on_receive(netvsc_dev *net_dev, struct hv_device *device, struct hv_vmbus_channel *chan, hv_vm_packet_descriptor *pkt) { hv_vm_transfer_page_packet_header *vm_xfer_page_pkt; nvsp_msg *nvsp_msg_pkt; netvsc_packet vsc_pkt; netvsc_packet *net_vsc_pkt = &vsc_pkt; device_t dev = device->device; int count = 0; int i = 0; int status = nvsp_status_success; /* * All inbound packets other than send completion should be * xfer page packet. */ if (pkt->type != HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES) { device_printf(dev, "packet type %d is invalid!\n", pkt->type); return; } nvsp_msg_pkt = (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3)); /* Make sure this is a valid nvsp packet */ if (nvsp_msg_pkt->hdr.msg_type != nvsp_msg_1_type_send_rndis_pkt) { device_printf(dev, "packet hdr type %d is invalid!\n", pkt->type); return; } vm_xfer_page_pkt = (hv_vm_transfer_page_packet_header *)pkt; if (vm_xfer_page_pkt->transfer_page_set_id != NETVSC_RECEIVE_BUFFER_ID) { device_printf(dev, "transfer_page_set_id %d is invalid!\n", vm_xfer_page_pkt->transfer_page_set_id); return; } count = vm_xfer_page_pkt->range_count; net_vsc_pkt->device = device; /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ for (i = 0; i < count; i++) { net_vsc_pkt->status = nvsp_status_success; net_vsc_pkt->data = (void *)((unsigned long)net_dev->rx_buf + vm_xfer_page_pkt->ranges[i].byte_offset); net_vsc_pkt->tot_data_buf_len = vm_xfer_page_pkt->ranges[i].byte_count; hv_rf_on_receive(net_dev, device, chan, net_vsc_pkt); if (net_vsc_pkt->status != nvsp_status_success) { status = nvsp_status_failure; } } /* * Moved completion call back here so that all received * messages (not just data messages) will trigger a response * message back to the host. */ hv_nv_on_receive_completion(chan, vm_xfer_page_pkt->d.transaction_id, status); } /* * Net VSC on receive completion * * Send a receive completion packet to RNDIS device (ie NetVsp) */ static void hv_nv_on_receive_completion(struct hv_vmbus_channel *chan, uint64_t tid, uint32_t status) { nvsp_msg rx_comp_msg; int retries = 0; int ret = 0; rx_comp_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt_complete; /* Pass in the status */ rx_comp_msg.msgs.vers_1_msgs.send_rndis_pkt_complete.status = status; retry_send_cmplt: /* Send the completion */ ret = hv_vmbus_channel_send_packet(chan, &rx_comp_msg, sizeof(nvsp_msg), tid, HV_VMBUS_PACKET_TYPE_COMPLETION, 0); if (ret == 0) { /* success */ /* no-op */ } else if (ret == EAGAIN) { /* no more room... wait a bit and attempt to retry 3 times */ retries++; if (retries < 4) { DELAY(100); goto retry_send_cmplt; } } } /* * Net VSC on channel callback */ static void hv_nv_on_channel_callback(void *xchan) { struct hv_vmbus_channel *chan = xchan; struct hv_device *device = chan->device; netvsc_dev *net_dev; device_t dev = device->device; uint32_t bytes_rxed; uint64_t request_id; hv_vm_packet_descriptor *desc; uint8_t *buffer; int bufferlen = NETVSC_PACKET_SIZE; int ret = 0; net_dev = hv_nv_get_inbound_net_device(device); if (net_dev == NULL) return; - buffer = net_dev->callback_buf; + buffer = chan->hv_chan_rdbuf; do { ret = hv_vmbus_channel_recv_packet_raw(chan, buffer, bufferlen, &bytes_rxed, &request_id); if (ret == 0) { if (bytes_rxed > 0) { desc = (hv_vm_packet_descriptor *)buffer; switch (desc->type) { case HV_VMBUS_PACKET_TYPE_COMPLETION: hv_nv_on_send_completion(net_dev, device, desc); break; case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES: hv_nv_on_receive(net_dev, device, chan, desc); break; default: device_printf(dev, "hv_cb recv unknow type %d " " packet\n", desc->type); break; } } else { break; } } else if (ret == ENOBUFS) { /* Handle large packet */ if (bufferlen > NETVSC_PACKET_SIZE) { free(buffer, M_NETVSC); buffer = NULL; } /* alloc new buffer */ buffer = malloc(bytes_rxed, M_NETVSC, M_NOWAIT); if (buffer == NULL) { device_printf(dev, "hv_cb malloc buffer failed, len=%u\n", bytes_rxed); bufferlen = 0; break; } bufferlen = bytes_rxed; } } while (1); if (bufferlen > NETVSC_PACKET_SIZE) free(buffer, M_NETVSC); hv_rf_channel_rollup(chan); } Index: head/sys/dev/hyperv/netvsc/hv_net_vsc.h =================================================================== --- head/sys/dev/hyperv/netvsc/hv_net_vsc.h (revision 296295) +++ head/sys/dev/hyperv/netvsc/hv_net_vsc.h (revision 296296) @@ -1,1106 +1,1104 @@ /*- * Copyright (c) 2009-2012 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * HyperV vmbus (virtual machine bus) network VSC (virtual services client) * header file * * (Updated from unencumbered NvspProtocol.h) */ #ifndef __HV_NET_VSC_H__ #define __HV_NET_VSC_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define HN_USE_TXDESC_BUFRING MALLOC_DECLARE(M_NETVSC); #define NVSP_INVALID_PROTOCOL_VERSION (0xFFFFFFFF) #define NVSP_PROTOCOL_VERSION_1 2 #define NVSP_PROTOCOL_VERSION_2 0x30002 #define NVSP_PROTOCOL_VERSION_4 0x40000 #define NVSP_PROTOCOL_VERSION_5 0x50000 #define NVSP_MIN_PROTOCOL_VERSION (NVSP_PROTOCOL_VERSION_1) #define NVSP_MAX_PROTOCOL_VERSION (NVSP_PROTOCOL_VERSION_2) #define NVSP_PROTOCOL_VERSION_CURRENT NVSP_PROTOCOL_VERSION_2 #define VERSION_4_OFFLOAD_SIZE 22 #define NVSP_OPERATIONAL_STATUS_OK (0x00000000) #define NVSP_OPERATIONAL_STATUS_DEGRADED (0x00000001) #define NVSP_OPERATIONAL_STATUS_NONRECOVERABLE (0x00000002) #define NVSP_OPERATIONAL_STATUS_NO_CONTACT (0x00000003) #define NVSP_OPERATIONAL_STATUS_LOST_COMMUNICATION (0x00000004) /* * Maximun number of transfer pages (packets) the VSP will use on a receive */ #define NVSP_MAX_PACKETS_PER_RECEIVE 375 typedef enum nvsp_msg_type_ { nvsp_msg_type_none = 0, /* * Init Messages */ nvsp_msg_type_init = 1, nvsp_msg_type_init_complete = 2, nvsp_version_msg_start = 100, /* * Version 1 Messages */ nvsp_msg_1_type_send_ndis_vers = nvsp_version_msg_start, nvsp_msg_1_type_send_rx_buf, nvsp_msg_1_type_send_rx_buf_complete, nvsp_msg_1_type_revoke_rx_buf, nvsp_msg_1_type_send_send_buf, nvsp_msg_1_type_send_send_buf_complete, nvsp_msg_1_type_revoke_send_buf, nvsp_msg_1_type_send_rndis_pkt, nvsp_msg_1_type_send_rndis_pkt_complete, /* * Version 2 Messages */ nvsp_msg_2_type_send_chimney_delegated_buf, nvsp_msg_2_type_send_chimney_delegated_buf_complete, nvsp_msg_2_type_revoke_chimney_delegated_buf, nvsp_msg_2_type_resume_chimney_rx_indication, nvsp_msg_2_type_terminate_chimney, nvsp_msg_2_type_terminate_chimney_complete, nvsp_msg_2_type_indicate_chimney_event, nvsp_msg_2_type_send_chimney_packet, nvsp_msg_2_type_send_chimney_packet_complete, nvsp_msg_2_type_post_chimney_rx_request, nvsp_msg_2_type_post_chimney_rx_request_complete, nvsp_msg_2_type_alloc_rx_buf, nvsp_msg_2_type_alloc_rx_buf_complete, nvsp_msg_2_type_free_rx_buf, nvsp_msg_2_send_vmq_rndis_pkt, nvsp_msg_2_send_vmq_rndis_pkt_complete, nvsp_msg_2_type_send_ndis_config, nvsp_msg_2_type_alloc_chimney_handle, nvsp_msg_2_type_alloc_chimney_handle_complete, } nvsp_msg_type; typedef enum nvsp_status_ { nvsp_status_none = 0, nvsp_status_success, nvsp_status_failure, /* Deprecated */ nvsp_status_prot_vers_range_too_new, /* Deprecated */ nvsp_status_prot_vers_range_too_old, nvsp_status_invalid_rndis_pkt, nvsp_status_busy, nvsp_status_max, } nvsp_status; typedef struct nvsp_msg_hdr_ { uint32_t msg_type; } __packed nvsp_msg_hdr; /* * Init Messages */ /* * This message is used by the VSC to initialize the channel * after the channels has been opened. This message should * never include anything other then versioning (i.e. this * message will be the same for ever). * * Forever is a long time. The values have been redefined * in Win7 to indicate major and minor protocol version * number. */ typedef struct nvsp_msg_init_ { union { struct { uint16_t minor_protocol_version; uint16_t major_protocol_version; } s; /* Formerly min_protocol_version */ uint32_t protocol_version; } p1; /* Formerly max_protocol_version */ uint32_t protocol_version_2; } __packed nvsp_msg_init; /* * This message is used by the VSP to complete the initialization * of the channel. This message should never include anything other * then versioning (i.e. this message will be the same forever). */ typedef struct nvsp_msg_init_complete_ { /* Deprecated */ uint32_t negotiated_prot_vers; uint32_t max_mdl_chain_len; uint32_t status; } __packed nvsp_msg_init_complete; typedef union nvsp_msg_init_uber_ { nvsp_msg_init init; nvsp_msg_init_complete init_compl; } __packed nvsp_msg_init_uber; /* * Version 1 Messages */ /* * This message is used by the VSC to send the NDIS version * to the VSP. The VSP can use this information when handling * OIDs sent by the VSC. */ typedef struct nvsp_1_msg_send_ndis_version_ { uint32_t ndis_major_vers; /* Deprecated */ uint32_t ndis_minor_vers; } __packed nvsp_1_msg_send_ndis_version; /* * This message is used by the VSC to send a receive buffer * to the VSP. The VSP can then use the receive buffer to * send data to the VSC. */ typedef struct nvsp_1_msg_send_rx_buf_ { uint32_t gpadl_handle; uint16_t id; } __packed nvsp_1_msg_send_rx_buf; typedef struct nvsp_1_rx_buf_section_ { uint32_t offset; uint32_t sub_allocation_size; uint32_t num_sub_allocations; uint32_t end_offset; } __packed nvsp_1_rx_buf_section; /* * This message is used by the VSP to acknowledge a receive * buffer send by the VSC. This message must be sent by the * VSP before the VSP uses the receive buffer. */ typedef struct nvsp_1_msg_send_rx_buf_complete_ { uint32_t status; uint32_t num_sections; /* * The receive buffer is split into two parts, a large * suballocation section and a small suballocation * section. These sections are then suballocated by a * certain size. * * For example, the following break up of the receive * buffer has 6 large suballocations and 10 small * suballocations. * * | Large Section | | Small Section | * ------------------------------------------------------------ * | | | | | | | | | | | | | | | | | | * | | * LargeOffset SmallOffset */ nvsp_1_rx_buf_section sections[1]; } __packed nvsp_1_msg_send_rx_buf_complete; /* * This message is sent by the VSC to revoke the receive buffer. * After the VSP completes this transaction, the VSP should never * use the receive buffer again. */ typedef struct nvsp_1_msg_revoke_rx_buf_ { uint16_t id; } __packed nvsp_1_msg_revoke_rx_buf; /* * This message is used by the VSC to send a send buffer * to the VSP. The VSC can then use the send buffer to * send data to the VSP. */ typedef struct nvsp_1_msg_send_send_buf_ { uint32_t gpadl_handle; uint16_t id; } __packed nvsp_1_msg_send_send_buf; /* * This message is used by the VSP to acknowledge a send * buffer sent by the VSC. This message must be sent by the * VSP before the VSP uses the sent buffer. */ typedef struct nvsp_1_msg_send_send_buf_complete_ { uint32_t status; /* * The VSC gets to choose the size of the send buffer and * the VSP gets to choose the sections size of the buffer. * This was done to enable dynamic reconfigurations when * the cost of GPA-direct buffers decreases. */ uint32_t section_size; } __packed nvsp_1_msg_send_send_buf_complete; /* * This message is sent by the VSC to revoke the send buffer. * After the VSP completes this transaction, the vsp should never * use the send buffer again. */ typedef struct nvsp_1_msg_revoke_send_buf_ { uint16_t id; } __packed nvsp_1_msg_revoke_send_buf; /* * This message is used by both the VSP and the VSC to send * an RNDIS message to the opposite channel endpoint. */ typedef struct nvsp_1_msg_send_rndis_pkt_ { /* * This field is specified by RNIDS. They assume there's * two different channels of communication. However, * the Network VSP only has one. Therefore, the channel * travels with the RNDIS packet. */ uint32_t chan_type; /* * This field is used to send part or all of the data * through a send buffer. This values specifies an * index into the send buffer. If the index is * 0xFFFFFFFF, then the send buffer is not being used * and all of the data was sent through other VMBus * mechanisms. */ uint32_t send_buf_section_idx; uint32_t send_buf_section_size; } __packed nvsp_1_msg_send_rndis_pkt; /* * This message is used by both the VSP and the VSC to complete * a RNDIS message to the opposite channel endpoint. At this * point, the initiator of this message cannot use any resources * associated with the original RNDIS packet. */ typedef struct nvsp_1_msg_send_rndis_pkt_complete_ { uint32_t status; } __packed nvsp_1_msg_send_rndis_pkt_complete; /* * Version 2 Messages */ /* * This message is used by the VSC to send the NDIS version * to the VSP. The VSP can use this information when handling * OIDs sent by the VSC. */ typedef struct nvsp_2_netvsc_capabilities_ { union { uint64_t as_uint64; struct { uint64_t vmq : 1; uint64_t chimney : 1; uint64_t sriov : 1; uint64_t ieee8021q : 1; uint64_t correlationid : 1; uint64_t teaming : 1; } u2; } u1; } __packed nvsp_2_netvsc_capabilities; typedef struct nvsp_2_msg_send_ndis_config_ { uint32_t mtu; uint32_t reserved; nvsp_2_netvsc_capabilities capabilities; } __packed nvsp_2_msg_send_ndis_config; /* * NvspMessage2TypeSendChimneyDelegatedBuffer */ typedef struct nvsp_2_msg_send_chimney_buf_ { /* * On WIN7 beta, delegated_obj_max_size is defined as a uint32_t * Since WIN7 RC, it was split into two uint16_t. To have the same * struct layout, delegated_obj_max_size shall be the first field. */ uint16_t delegated_obj_max_size; /* * The revision # of chimney protocol used between NVSC and NVSP. * * This revision is NOT related to the chimney revision between * NDIS protocol and miniport drivers. */ uint16_t revision; uint32_t gpadl_handle; } __packed nvsp_2_msg_send_chimney_buf; /* Unsupported chimney revision 0 (only present in WIN7 beta) */ #define NVSP_CHIMNEY_REVISION_0 0 /* WIN7 Beta Chimney QFE */ #define NVSP_CHIMNEY_REVISION_1 1 /* The chimney revision since WIN7 RC */ #define NVSP_CHIMNEY_REVISION_2 2 /* * NvspMessage2TypeSendChimneyDelegatedBufferComplete */ typedef struct nvsp_2_msg_send_chimney_buf_complete_ { uint32_t status; /* * Maximum number outstanding sends and pre-posted receives. * * NVSC should not post more than SendQuota/ReceiveQuota packets. * Otherwise, it can block the non-chimney path for an indefinite * amount of time. * (since chimney sends/receives are affected by the remote peer). * * Note: NVSP enforces the quota restrictions on a per-VMBCHANNEL * basis. It doesn't enforce the restriction separately for chimney * send/receive. If NVSC doesn't voluntarily enforce "SendQuota", * it may kill its own network connectivity. */ uint32_t send_quota; uint32_t rx_quota; } __packed nvsp_2_msg_send_chimney_buf_complete; /* * NvspMessage2TypeRevokeChimneyDelegatedBuffer */ typedef struct nvsp_2_msg_revoke_chimney_buf_ { uint32_t gpadl_handle; } __packed nvsp_2_msg_revoke_chimney_buf; #define NVSP_CHIMNEY_OBJECT_TYPE_NEIGHBOR 0 #define NVSP_CHIMNEY_OBJECT_TYPE_PATH4 1 #define NVSP_CHIMNEY_OBJECT_TYPE_PATH6 2 #define NVSP_CHIMNEY_OBJECT_TYPE_TCP 3 /* * NvspMessage2TypeAllocateChimneyHandle */ typedef struct nvsp_2_msg_alloc_chimney_handle_ { uint64_t vsc_context; uint32_t object_type; } __packed nvsp_2_msg_alloc_chimney_handle; /* * NvspMessage2TypeAllocateChimneyHandleComplete */ typedef struct nvsp_2_msg_alloc_chimney_handle_complete_ { uint32_t vsp_handle; } __packed nvsp_2_msg_alloc_chimney_handle_complete; /* * NvspMessage2TypeResumeChimneyRXIndication */ typedef struct nvsp_2_msg_resume_chimney_rx_indication { /* * Handle identifying the offloaded connection */ uint32_t vsp_tcp_handle; } __packed nvsp_2_msg_resume_chimney_rx_indication; #define NVSP_2_MSG_TERMINATE_CHIMNEY_FLAGS_FIRST_STAGE (0x01u) #define NVSP_2_MSG_TERMINATE_CHIMNEY_FLAGS_RESERVED (~(0x01u)) /* * NvspMessage2TypeTerminateChimney */ typedef struct nvsp_2_msg_terminate_chimney_ { /* * Handle identifying the offloaded object */ uint32_t vsp_handle; /* * Terminate Offload Flags * Bit 0: * When set to 0, terminate the offload at the destination NIC * Bit 1-31: Reserved, shall be zero */ uint32_t flags; union { /* * This field is valid only when bit 0 of flags is clear. * It specifies the index into the premapped delegated * object buffer. The buffer was sent through the * NvspMessage2TypeSendChimneyDelegatedBuffer * message at initialization time. * * NVSP will write the delegated state into the delegated * buffer upon upload completion. */ uint32_t index; /* * This field is valid only when bit 0 of flags is set. * * The seqence number of the most recently accepted RX * indication when VSC sets its TCP context into * "terminating" state. * * This allows NVSP to determines if there are any in-flight * RX indications for which the acceptance state is still * undefined. */ uint64_t last_accepted_rx_seq_no; } f0; } __packed nvsp_2_msg_terminate_chimney; #define NVSP_TERMINATE_CHIMNEY_COMPLETE_FLAG_DATA_CORRUPTED 0x0000001u /* * NvspMessage2TypeTerminateChimneyComplete */ typedef struct nvsp_2_msg_terminate_chimney_complete_ { uint64_t vsc_context; uint32_t flags; } __packed nvsp_2_msg_terminate_chimney_complete; /* * NvspMessage2TypeIndicateChimneyEvent */ typedef struct nvsp_2_msg_indicate_chimney_event_ { /* * When VscTcpContext is 0, event_type is an NDIS_STATUS event code * Otherwise, EventType is an TCP connection event (defined in * NdisTcpOffloadEventHandler chimney DDK document). */ uint32_t event_type; /* * When VscTcpContext is 0, EventType is an NDIS_STATUS event code * Otherwise, EventType is an TCP connection event specific information * (defined in NdisTcpOffloadEventHandler chimney DDK document). */ uint32_t event_specific_info; /* * If not 0, the event is per-TCP connection event. This field * contains the VSC's TCP context. * If 0, the event indication is global. */ uint64_t vsc_tcp_context; } __packed nvsp_2_msg_indicate_chimney_event; #define NVSP_1_CHIMNEY_SEND_INVALID_OOB_INDEX 0xffffu #define NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX 0xffffffff /* * NvspMessage2TypeSendChimneyPacket */ typedef struct nvsp_2_msg_send_chimney_pkt_ { /* * Identify the TCP connection for which this chimney send is */ uint32_t vsp_tcp_handle; /* * This field is used to send part or all of the data * through a send buffer. This values specifies an * index into the send buffer. If the index is * 0xFFFF, then the send buffer is not being used * and all of the data was sent through other VMBus * mechanisms. */ uint16_t send_buf_section_index; uint16_t send_buf_section_size; /* * OOB Data Index * This an index to the OOB data buffer. If the index is 0xFFFFFFFF, * then there is no OOB data. * * This field shall be always 0xFFFFFFFF for now. It is reserved for * the future. */ uint16_t oob_data_index; /* * DisconnectFlags = 0 * Normal chimney send. See MiniportTcpOffloadSend for details. * * DisconnectFlags = TCP_DISCONNECT_GRACEFUL_CLOSE (0x01) * Graceful disconnect. See MiniportTcpOffloadDisconnect for details. * * DisconnectFlags = TCP_DISCONNECT_ABORTIVE_CLOSE (0x02) * Abortive disconnect. See MiniportTcpOffloadDisconnect for details. */ uint16_t disconnect_flags; uint32_t seq_no; } __packed nvsp_2_msg_send_chimney_pkt; /* * NvspMessage2TypeSendChimneyPacketComplete */ typedef struct nvsp_2_msg_send_chimney_pkt_complete_ { /* * The NDIS_STATUS for the chimney send */ uint32_t status; /* * Number of bytes that have been sent to the peer (and ACKed by the peer). */ uint32_t bytes_transferred; } __packed nvsp_2_msg_send_chimney_pkt_complete; #define NVSP_1_CHIMNEY_RECV_FLAG_NO_PUSH 0x0001u #define NVSP_1_CHIMNEY_RECV_INVALID_OOB_INDEX 0xffffu /* * NvspMessage2TypePostChimneyRecvRequest */ typedef struct nvsp_2_msg_post_chimney_rx_request_ { /* * Identify the TCP connection which this chimney receive request * is for. */ uint32_t vsp_tcp_handle; /* * OOB Data Index * This an index to the OOB data buffer. If the index is 0xFFFFFFFF, * then there is no OOB data. * * This field shall be always 0xFFFFFFFF for now. It is reserved for * the future. */ uint32_t oob_data_index; /* * Bit 0 * When it is set, this is a "no-push" receive. * When it is clear, this is a "push" receive. * * Bit 1-15: Reserved and shall be zero */ uint16_t flags; /* * For debugging and diagnoses purpose. * The SeqNo is per TCP connection and starts from 0. */ uint32_t seq_no; } __packed nvsp_2_msg_post_chimney_rx_request; /* * NvspMessage2TypePostChimneyRecvRequestComplete */ typedef struct nvsp_2_msg_post_chimney_rx_request_complete_ { /* * The NDIS_STATUS for the chimney send */ uint32_t status; /* * Number of bytes that have been sent to the peer (and ACKed by * the peer). */ uint32_t bytes_xferred; } __packed nvsp_2_msg_post_chimney_rx_request_complete; /* * NvspMessage2TypeAllocateReceiveBuffer */ typedef struct nvsp_2_msg_alloc_rx_buf_ { /* * Allocation ID to match the allocation request and response */ uint32_t allocation_id; /* * Length of the VM shared memory receive buffer that needs to * be allocated */ uint32_t length; } __packed nvsp_2_msg_alloc_rx_buf; /* * NvspMessage2TypeAllocateReceiveBufferComplete */ typedef struct nvsp_2_msg_alloc_rx_buf_complete_ { /* * The NDIS_STATUS code for buffer allocation */ uint32_t status; /* * Allocation ID from NVSP_2_MESSAGE_ALLOCATE_RECEIVE_BUFFER */ uint32_t allocation_id; /* * GPADL handle for the allocated receive buffer */ uint32_t gpadl_handle; /* * Receive buffer ID that is further used in * NvspMessage2SendVmqRndisPacket */ uint64_t rx_buf_id; } __packed nvsp_2_msg_alloc_rx_buf_complete; /* * NvspMessage2TypeFreeReceiveBuffer */ typedef struct nvsp_2_msg_free_rx_buf_ { /* * Receive buffer ID previous returned in * NvspMessage2TypeAllocateReceiveBufferComplete message */ uint64_t rx_buf_id; } __packed nvsp_2_msg_free_rx_buf; /* * This structure is used in defining the buffers in * NVSP_2_MESSAGE_SEND_VMQ_RNDIS_PACKET structure */ typedef struct nvsp_xfer_page_range_ { /* * Specifies the ID of the receive buffer that has the buffer. This * ID can be the general receive buffer ID specified in * NvspMessage1TypeSendReceiveBuffer or it can be the shared memory * receive buffer ID allocated by the VSC and specified in * NvspMessage2TypeAllocateReceiveBufferComplete message */ uint64_t xfer_page_set_id; /* * Number of bytes */ uint32_t byte_count; /* * Offset in bytes from the beginning of the buffer */ uint32_t byte_offset; } __packed nvsp_xfer_page_range; /* * NvspMessage2SendVmqRndisPacket */ typedef struct nvsp_2_msg_send_vmq_rndis_pkt_ { /* * This field is specified by RNIDS. They assume there's * two different channels of communication. However, * the Network VSP only has one. Therefore, the channel * travels with the RNDIS packet. It must be RMC_DATA */ uint32_t channel_type; /* * Only the Range element corresponding to the RNDIS header of * the first RNDIS message in the multiple RNDIS messages sent * in one NVSP message. Information about the data portions as well * as the subsequent RNDIS messages in the same NVSP message are * embedded in the RNDIS header itself */ nvsp_xfer_page_range range; } __packed nvsp_2_msg_send_vmq_rndis_pkt; /* * This message is used by the VSC to complete * a RNDIS VMQ message to the VSP. At this point, * the initiator of this message can use any resources * associated with the original RNDIS VMQ packet. */ typedef struct nvsp_2_msg_send_vmq_rndis_pkt_complete_ { uint32_t status; } __packed nvsp_2_msg_send_vmq_rndis_pkt_complete; typedef union nvsp_1_msg_uber_ { nvsp_1_msg_send_ndis_version send_ndis_vers; nvsp_1_msg_send_rx_buf send_rx_buf; nvsp_1_msg_send_rx_buf_complete send_rx_buf_complete; nvsp_1_msg_revoke_rx_buf revoke_rx_buf; nvsp_1_msg_send_send_buf send_send_buf; nvsp_1_msg_send_send_buf_complete send_send_buf_complete; nvsp_1_msg_revoke_send_buf revoke_send_buf; nvsp_1_msg_send_rndis_pkt send_rndis_pkt; nvsp_1_msg_send_rndis_pkt_complete send_rndis_pkt_complete; } __packed nvsp_1_msg_uber; typedef union nvsp_2_msg_uber_ { nvsp_2_msg_send_ndis_config send_ndis_config; nvsp_2_msg_send_chimney_buf send_chimney_buf; nvsp_2_msg_send_chimney_buf_complete send_chimney_buf_complete; nvsp_2_msg_revoke_chimney_buf revoke_chimney_buf; nvsp_2_msg_resume_chimney_rx_indication resume_chimney_rx_indication; nvsp_2_msg_terminate_chimney terminate_chimney; nvsp_2_msg_terminate_chimney_complete terminate_chimney_complete; nvsp_2_msg_indicate_chimney_event indicate_chimney_event; nvsp_2_msg_send_chimney_pkt send_chimney_packet; nvsp_2_msg_send_chimney_pkt_complete send_chimney_packet_complete; nvsp_2_msg_post_chimney_rx_request post_chimney_rx_request; nvsp_2_msg_post_chimney_rx_request_complete post_chimney_rx_request_complete; nvsp_2_msg_alloc_rx_buf alloc_rx_buffer; nvsp_2_msg_alloc_rx_buf_complete alloc_rx_buffer_complete; nvsp_2_msg_free_rx_buf free_rx_buffer; nvsp_2_msg_send_vmq_rndis_pkt send_vmq_rndis_pkt; nvsp_2_msg_send_vmq_rndis_pkt_complete send_vmq_rndis_pkt_complete; nvsp_2_msg_alloc_chimney_handle alloc_chimney_handle; nvsp_2_msg_alloc_chimney_handle_complete alloc_chimney_handle_complete; } __packed nvsp_2_msg_uber; typedef union nvsp_all_msgs_ { nvsp_msg_init_uber init_msgs; nvsp_1_msg_uber vers_1_msgs; nvsp_2_msg_uber vers_2_msgs; } __packed nvsp_all_msgs; /* * ALL Messages */ typedef struct nvsp_msg_ { nvsp_msg_hdr hdr; nvsp_all_msgs msgs; } __packed nvsp_msg; /* * The following arguably belongs in a separate header file */ /* * Defines */ #define NETVSC_SEND_BUFFER_SIZE (1024*1024*15) /* 15M */ #define NETVSC_SEND_BUFFER_ID 0xface #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ #define NETVSC_RECEIVE_BUFFER_ID 0xcafe #define NETVSC_RECEIVE_SG_COUNT 1 /* Preallocated receive packets */ #define NETVSC_RECEIVE_PACKETLIST_COUNT 256 /* * Maximum MTU we permit to be configured for a netvsc interface. * When the code was developed, a max MTU of 12232 was tested and * proven to work. 9K is a reasonable maximum for an Ethernet. */ #define NETVSC_MAX_CONFIGURABLE_MTU (9 * 1024) #define NETVSC_PACKET_SIZE PAGE_SIZE /* * Data types */ /* * Per netvsc channel-specific */ typedef struct netvsc_dev_ { struct hv_device *dev; /* Send buffer allocated by us but manages by NetVSP */ void *send_buf; uint32_t send_buf_size; uint32_t send_buf_gpadl_handle; uint32_t send_section_size; uint32_t send_section_count; unsigned long bitsmap_words; unsigned long *send_section_bitsmap; /* Receive buffer allocated by us but managed by NetVSP */ void *rx_buf; uint32_t rx_buf_size; uint32_t rx_buf_gpadl_handle; uint32_t rx_section_count; nvsp_1_rx_buf_section *rx_sections; /* Used for NetVSP initialization protocol */ struct sema channel_init_sema; nvsp_msg channel_init_packet; nvsp_msg revoke_packet; /*uint8_t hw_mac_addr[HW_MACADDR_LEN];*/ /* Holds rndis device info */ void *extension; hv_bool_uint8_t destroy; /* Negotiated NVSP version */ uint32_t nvsp_version; - - uint8_t callback_buf[NETVSC_PACKET_SIZE]; } netvsc_dev; typedef void (*pfn_on_send_rx_completion)(void *); #define NETVSC_DEVICE_RING_BUFFER_SIZE (128 * PAGE_SIZE) #define NETVSC_PACKET_MAXPAGE 32 #define NETVSC_VLAN_PRIO_MASK 0xe000 #define NETVSC_VLAN_PRIO_SHIFT 13 #define NETVSC_VLAN_VID_MASK 0x0fff #define TYPE_IPV4 2 #define TYPE_IPV6 4 #define TYPE_TCP 2 #define TYPE_UDP 4 #define TRANSPORT_TYPE_NOT_IP 0 #define TRANSPORT_TYPE_IPV4_TCP ((TYPE_IPV4 << 16) | TYPE_TCP) #define TRANSPORT_TYPE_IPV4_UDP ((TYPE_IPV4 << 16) | TYPE_UDP) #define TRANSPORT_TYPE_IPV6_TCP ((TYPE_IPV6 << 16) | TYPE_TCP) #define TRANSPORT_TYPE_IPV6_UDP ((TYPE_IPV6 << 16) | TYPE_UDP) #ifdef __LP64__ #define BITS_PER_LONG 64 #else #define BITS_PER_LONG 32 #endif typedef struct netvsc_packet_ { struct hv_device *device; hv_bool_uint8_t is_data_pkt; /* One byte */ uint16_t vlan_tci; uint32_t status; /* Completion */ union { struct { uint64_t rx_completion_tid; void *rx_completion_context; /* This is no longer used */ pfn_on_send_rx_completion on_rx_completion; } rx; struct { uint64_t send_completion_tid; void *send_completion_context; /* Still used in netvsc and filter code */ pfn_on_send_rx_completion on_send_completion; } send; } compl; uint32_t send_buf_section_idx; uint32_t send_buf_section_size; void *rndis_mesg; uint32_t tot_data_buf_len; void *data; uint32_t page_buf_count; hv_vmbus_page_buffer page_buffers[NETVSC_PACKET_MAXPAGE]; } netvsc_packet; typedef struct { uint8_t mac_addr[6]; /* Assumption unsigned long */ hv_bool_uint8_t link_state; } netvsc_device_info; #ifndef HN_USE_TXDESC_BUFRING struct hn_txdesc; SLIST_HEAD(hn_txdesc_list, hn_txdesc); #else struct buf_ring; #endif struct hn_rx_ring { struct ifnet *hn_ifp; int hn_rx_idx; /* Trust csum verification on host side */ int hn_trust_hcsum; /* HN_TRUST_HCSUM_ */ struct lro_ctrl hn_lro; u_long hn_csum_ip; u_long hn_csum_tcp; u_long hn_csum_udp; u_long hn_csum_trusted; u_long hn_lro_tried; u_long hn_small_pkts; } __aligned(CACHE_LINE_SIZE); #define HN_TRUST_HCSUM_IP 0x0001 #define HN_TRUST_HCSUM_TCP 0x0002 #define HN_TRUST_HCSUM_UDP 0x0004 struct hv_vmbus_channel; struct hn_tx_ring { #ifndef HN_USE_TXDESC_BUFRING struct mtx hn_txlist_spin; struct hn_txdesc_list hn_txlist; #else struct buf_ring *hn_txdesc_br; #endif int hn_txdesc_cnt; int hn_txdesc_avail; int hn_has_txeof; int hn_sched_tx; void (*hn_txeof)(struct hn_tx_ring *); struct taskqueue *hn_tx_taskq; struct task hn_tx_task; struct task hn_txeof_task; struct buf_ring *hn_mbuf_br; int hn_oactive; int hn_tx_idx; struct mtx hn_tx_lock; struct hn_softc *hn_sc; struct hv_vmbus_channel *hn_chan; int hn_direct_tx_size; int hn_tx_chimney_size; bus_dma_tag_t hn_tx_data_dtag; uint64_t hn_csum_assist; u_long hn_no_txdescs; u_long hn_send_failed; u_long hn_txdma_failed; u_long hn_tx_collapsed; u_long hn_tx_chimney; /* Rarely used stuffs */ struct hn_txdesc *hn_txdesc; bus_dma_tag_t hn_tx_rndis_dtag; struct sysctl_oid *hn_tx_sysctl_tree; } __aligned(CACHE_LINE_SIZE); /* * Device-specific softc structure */ typedef struct hn_softc { struct ifnet *hn_ifp; struct ifmedia hn_media; device_t hn_dev; uint8_t hn_unit; int hn_carrier; int hn_if_flags; struct mtx hn_lock; int hn_initdone; /* See hv_netvsc_drv_freebsd.c for rules on how to use */ int temp_unusable; struct hv_device *hn_dev_obj; netvsc_dev *net_dev; int hn_rx_ring_cnt; struct hn_rx_ring *hn_rx_ring; int hn_tx_ring_cnt; struct hn_tx_ring *hn_tx_ring; int hn_tx_chimney_max; struct taskqueue *hn_tx_taskq; struct sysctl_oid *hn_tx_sysctl_tree; } hn_softc_t; /* * Externs */ extern int hv_promisc_mode; void netvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status); netvsc_dev *hv_nv_on_device_add(struct hv_device *device, void *additional_info); int hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel); int hv_nv_on_send(struct hv_vmbus_channel *chan, netvsc_packet *pkt); int hv_nv_get_next_send_section(netvsc_dev *net_dev); #endif /* __HV_NET_VSC_H__ */