Index: head/sys/dev/hyperv/include/hyperv.h =================================================================== --- head/sys/dev/hyperv/include/hyperv.h (revision 302801) +++ head/sys/dev/hyperv/include/hyperv.h (revision 302802) @@ -1,595 +1,583 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /** * HyperV definitions for messages that are sent between instances of the * Channel Management Library in separate partitions, or in some cases, * back to itself. */ #ifndef __HYPERV_H__ #define __HYPERV_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef uint8_t hv_bool_uint8_t; #define HV_S_OK 0x00000000 #define HV_E_FAIL 0x80004005 #define HV_ERROR_NOT_SUPPORTED 0x80070032 #define HV_ERROR_MACHINE_LOCKED 0x800704F7 /* * VMBUS version is 32 bit, upper 16 bit for major_number and lower * 16 bit for minor_number. * * 0.13 -- Windows Server 2008 * 1.1 -- Windows 7 * 2.4 -- Windows 8 * 3.0 -- Windows 8.1 */ #define VMBUS_VERSION_WS2008 ((0 << 16) | (13)) #define VMBUS_VERSION_WIN7 ((1 << 16) | (1)) #define VMBUS_VERSION_WIN8 ((2 << 16) | (4)) #define VMBUS_VERSION_WIN8_1 ((3 << 16) | (0)) #define VMBUS_VERSION_MAJOR(ver) (((uint32_t)(ver)) >> 16) #define VMBUS_VERSION_MINOR(ver) (((uint32_t)(ver)) & 0xffff) /* * Make maximum size of pipe payload of 16K */ #define HV_MAX_PIPE_DATA_PAYLOAD (sizeof(BYTE) * 16384) /* * Define pipe_mode values */ #define HV_VMBUS_PIPE_TYPE_BYTE 0x00000000 #define HV_VMBUS_PIPE_TYPE_MESSAGE 0x00000004 /* * The size of the user defined data buffer for non-pipe offers */ #define HV_MAX_USER_DEFINED_BYTES 120 /* * The size of the user defined data buffer for pipe offers */ #define HV_MAX_PIPE_USER_DEFINED_BYTES 116 #define HV_MAX_PAGE_BUFFER_COUNT 32 #define HV_MAX_MULTIPAGE_BUFFER_COUNT 32 #define HV_ALIGN_UP(value, align) \ (((value) & (align-1)) ? \ (((value) + (align-1)) & ~(align-1) ) : (value)) #define HV_ALIGN_DOWN(value, align) ( (value) & ~(align-1) ) #define HV_NUM_PAGES_SPANNED(addr, len) \ ((HV_ALIGN_UP(addr+len, PAGE_SIZE) - \ HV_ALIGN_DOWN(addr, PAGE_SIZE)) >> PAGE_SHIFT ) -typedef struct hv_guid { - uint8_t data[16]; -} __packed hv_guid; +struct hyperv_guid { + uint8_t hv_guid[16]; +} __packed; #define HYPERV_GUID_STRLEN 40 -int hyperv_guid2str(const struct hv_guid *, char *, size_t); +int hyperv_guid2str(const struct hyperv_guid *, char *, size_t); -#define HV_NIC_GUID \ - .data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, \ - 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} - -#define HV_IDE_GUID \ - .data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, \ - 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5} - -#define HV_SCSI_GUID \ - .data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, \ - 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f} - /* * At the center of the Channel Management library is * the Channel Offer. This struct contains the * fundamental information about an offer. */ typedef struct hv_vmbus_channel_offer { - hv_guid interface_type; - hv_guid interface_instance; + struct hyperv_guid interface_type; + struct hyperv_guid interface_instance; uint64_t interrupt_latency_in_100ns_units; uint32_t interface_revision; uint32_t server_context_area_size; /* in bytes */ uint16_t channel_flags; uint16_t mmio_megabytes; /* in bytes * 1024 * 1024 */ union { /* * Non-pipes: The user has HV_MAX_USER_DEFINED_BYTES bytes. */ struct { uint8_t user_defined[HV_MAX_USER_DEFINED_BYTES]; } __packed standard; /* * Pipes: The following structure is an integrated pipe protocol, which * is implemented on top of standard user-defined data. pipe * clients have HV_MAX_PIPE_USER_DEFINED_BYTES left for their * own use. */ struct { uint32_t pipe_mode; uint8_t user_defined[HV_MAX_PIPE_USER_DEFINED_BYTES]; } __packed pipe; } u; /* * Sub_channel_index, newly added in Win8. */ uint16_t sub_channel_index; uint16_t padding; } __packed hv_vmbus_channel_offer; typedef struct { uint16_t type; uint16_t data_offset8; uint16_t length8; uint16_t flags; uint64_t transaction_id; } __packed hv_vm_packet_descriptor; typedef struct { uint32_t byte_count; uint32_t byte_offset; } __packed hv_vm_transfer_page; typedef struct { hv_vm_packet_descriptor d; uint16_t transfer_page_set_id; hv_bool_uint8_t sender_owns_set; uint8_t reserved; uint32_t range_count; hv_vm_transfer_page ranges[1]; } __packed hv_vm_transfer_page_packet_header; typedef enum { HV_VMBUS_PACKET_TYPE_INVALID = 0x0, HV_VMBUS_PACKET_TYPES_SYNCH = 0x1, HV_VMBUS_PACKET_TYPE_ADD_TRANSFER_PAGE_SET = 0x2, HV_VMBUS_PACKET_TYPE_REMOVE_TRANSFER_PAGE_SET = 0x3, HV_VMBUS_PACKET_TYPE_ESTABLISH_GPADL = 0x4, HV_VMBUS_PACKET_TYPE_TEAR_DOWN_GPADL = 0x5, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND = 0x6, HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES = 0x7, HV_VMBUS_PACKET_TYPE_DATA_USING_GPADL = 0x8, HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT = 0x9, HV_VMBUS_PACKET_TYPE_CANCEL_REQUEST = 0xa, HV_VMBUS_PACKET_TYPE_COMPLETION = 0xb, HV_VMBUS_PACKET_TYPE_DATA_USING_ADDITIONAL_PACKETS = 0xc, HV_VMBUS_PACKET_TYPE_ADDITIONAL_DATA = 0xd } hv_vmbus_packet_type; #define HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED 1 /* * Version 1 messages */ typedef enum { HV_CHANNEL_MESSAGE_INVALID = 0, HV_CHANNEL_MESSAGE_OFFER_CHANNEL = 1, HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER = 2, HV_CHANNEL_MESSAGE_REQUEST_OFFERS = 3, HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED = 4, HV_CHANNEL_MESSAGE_OPEN_CHANNEL = 5, HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT = 6, HV_CHANNEL_MESSAGE_CLOSE_CHANNEL = 7, HV_CHANNEL_MESSAGEL_GPADL_HEADER = 8, HV_CHANNEL_MESSAGE_GPADL_BODY = 9, HV_CHANNEL_MESSAGE_GPADL_CREATED = 10, HV_CHANNEL_MESSAGE_GPADL_TEARDOWN = 11, HV_CHANNEL_MESSAGE_GPADL_TORNDOWN = 12, HV_CHANNEL_MESSAGE_REL_ID_RELEASED = 13, HV_CHANNEL_MESSAGE_INITIATED_CONTACT = 14, HV_CHANNEL_MESSAGE_VERSION_RESPONSE = 15, HV_CHANNEL_MESSAGE_UNLOAD = 16, HV_CHANNEL_MESSAGE_COUNT } hv_vmbus_channel_msg_type; typedef struct { hv_vmbus_channel_msg_type message_type; uint32_t padding; } __packed hv_vmbus_channel_msg_header; /* * Channel Offer parameters */ typedef struct { hv_vmbus_channel_msg_header header; hv_vmbus_channel_offer offer; uint32_t child_rel_id; uint8_t monitor_id; /* * This field has been split into a bit field on Win7 * and higher. */ uint8_t monitor_allocated:1; uint8_t reserved:7; /* * Following fields were added in win7 and higher. * Make sure to check the version before accessing these fields. * * If "is_dedicated_interrupt" is set, we must not set the * associated bit in the channel bitmap while sending the * interrupt to the host. * * connection_id is used in signaling the host. */ uint16_t is_dedicated_interrupt:1; uint16_t reserved1:15; uint32_t connection_id; } __packed hv_vmbus_channel_offer_channel; #define HW_MACADDR_LEN 6 /* * Common defines for Hyper-V ICs */ #define HV_ICMSGTYPE_NEGOTIATE 0 #define HV_ICMSGTYPE_HEARTBEAT 1 #define HV_ICMSGTYPE_KVPEXCHANGE 2 #define HV_ICMSGTYPE_SHUTDOWN 3 #define HV_ICMSGTYPE_TIMESYNC 4 #define HV_ICMSGTYPE_VSS 5 #define HV_ICMSGHDRFLAG_TRANSACTION 1 #define HV_ICMSGHDRFLAG_REQUEST 2 #define HV_ICMSGHDRFLAG_RESPONSE 4 typedef struct hv_vmbus_pipe_hdr { uint32_t flags; uint32_t msgsize; } __packed hv_vmbus_pipe_hdr; typedef struct hv_vmbus_ic_version { uint16_t major; uint16_t minor; } __packed hv_vmbus_ic_version; typedef struct hv_vmbus_icmsg_hdr { hv_vmbus_ic_version icverframe; uint16_t icmsgtype; hv_vmbus_ic_version icvermsg; uint16_t icmsgsize; uint32_t status; uint8_t ictransaction_id; uint8_t icflags; uint8_t reserved[2]; } __packed hv_vmbus_icmsg_hdr; typedef struct hv_vmbus_icmsg_negotiate { uint16_t icframe_vercnt; uint16_t icmsg_vercnt; uint32_t reserved; hv_vmbus_ic_version icversion_data[1]; /* any size array */ } __packed hv_vmbus_icmsg_negotiate; typedef struct hv_vmbus_shutdown_msg_data { uint32_t reason_code; uint32_t timeout_seconds; uint32_t flags; uint8_t display_message[2048]; } __packed hv_vmbus_shutdown_msg_data; typedef struct hv_vmbus_heartbeat_msg_data { uint64_t seq_num; uint32_t reserved[8]; } __packed hv_vmbus_heartbeat_msg_data; typedef struct { /* * offset in bytes from the start of ring data below */ volatile uint32_t write_index; /* * offset in bytes from the start of ring data below */ volatile uint32_t read_index; /* * NOTE: The interrupt_mask field is used only for channels, but * vmbus connection also uses this data structure */ volatile uint32_t interrupt_mask; /* pad it to PAGE_SIZE so that data starts on a page */ uint8_t reserved[4084]; /* * WARNING: Ring data starts here * !!! DO NOT place any fields below this !!! */ uint8_t buffer[0]; /* doubles as interrupt mask */ } __packed hv_vmbus_ring_buffer; typedef struct { int length; int offset; uint64_t pfn; } __packed hv_vmbus_page_buffer; typedef struct { int length; int offset; uint64_t pfn_array[HV_MAX_MULTIPAGE_BUFFER_COUNT]; } __packed hv_vmbus_multipage_buffer; typedef struct { hv_vmbus_ring_buffer* ring_buffer; struct mtx ring_lock; uint32_t ring_data_size; /* ring_size */ } hv_vmbus_ring_buffer_info; typedef void (*hv_vmbus_pfn_channel_callback)(void *context); typedef enum { HV_CHANNEL_OFFER_STATE, HV_CHANNEL_OPENING_STATE, HV_CHANNEL_OPEN_STATE, HV_CHANNEL_OPENED_STATE, HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE, } hv_vmbus_channel_state; typedef struct hv_vmbus_channel { device_t ch_dev; struct vmbus_softc *vmbus_sc; hv_vmbus_channel_state state; uint32_t ch_flags; /* VMBUS_CHAN_FLAG_ */ uint32_t ch_id; /* channel id */ /* * These are based on the offer_msg.monitor_id. * Save it here for easy access. */ int ch_montrig_idx; /* MNF trig index */ uint32_t ch_montrig_mask;/* MNF trig mask */ uint32_t ring_buffer_gpadl_handle; /* * Allocated memory for ring buffer */ void* ring_buffer_pages; unsigned long ring_buffer_size; uint32_t ring_buffer_page_count; /* * send to parent */ hv_vmbus_ring_buffer_info outbound; /* * receive from parent */ hv_vmbus_ring_buffer_info inbound; struct taskqueue * rxq; struct task channel_task; hv_vmbus_pfn_channel_callback on_channel_callback; void* channel_callback_context; struct hyperv_mon_param *ch_monprm; struct hyperv_dma ch_monprm_dma; /* * From Win8, this field specifies the target virtual process * on which to deliver the interrupt from the host to guest. * Before Win8, all channel interrupts would only be * delivered on cpu 0. Setting this value to 0 would preserve * the earlier behavior. */ uint32_t target_vcpu; /* The corresponding CPUID in the guest */ uint32_t target_cpu; /* * Support for multi-channels. * The initial offer is considered the primary channel and this * offer message will indicate if the host supports multi-channels. * The guest is free to ask for multi-channels to be offerred and can * open these multi-channels as a normal "primary" channel. However, * all multi-channels will have the same type and instance guids as the * primary channel. Requests sent on a given channel will result in a * response on the same channel. */ struct mtx sc_lock; /* * Link list of all the multi-channels if this is a primary channel */ TAILQ_HEAD(, hv_vmbus_channel) sc_list_anchor; TAILQ_ENTRY(hv_vmbus_channel) sc_list_entry; int subchan_cnt; /* * The primary channel this sub-channle belongs to. * This will be NULL for the primary channel. */ struct hv_vmbus_channel *primary_channel; /* * Driver private data */ void *hv_chan_priv1; void *hv_chan_priv2; void *hv_chan_priv3; struct task ch_detach_task; TAILQ_ENTRY(hv_vmbus_channel) ch_link; uint32_t ch_subidx; /* subchan index */ - struct hv_guid ch_guid_type; - struct hv_guid ch_guid_inst; + struct hyperv_guid ch_guid_type; + struct hyperv_guid ch_guid_inst; struct sysctl_ctx_list ch_sysctl_ctx; } hv_vmbus_channel; #define HV_VMBUS_CHAN_ISPRIMARY(chan) ((chan)->primary_channel == NULL) #define VMBUS_CHAN_FLAG_HASMNF 0x0001 /* * If this flag is set, this channel's interrupt will be masked in ISR, * and the RX bufring will be drained before this channel's interrupt is * unmasked. * * This flag is turned on by default. Drivers can turn it off according * to their own requirement. */ #define VMBUS_CHAN_FLAG_BATCHREAD 0x0002 static inline void hv_set_channel_read_state(hv_vmbus_channel* channel, boolean_t on) { if (!on) channel->ch_flags &= ~VMBUS_CHAN_FLAG_BATCHREAD; else channel->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD; } int hv_vmbus_channel_recv_packet( hv_vmbus_channel* channel, void* buffer, uint32_t buffer_len, uint32_t* buffer_actual_len, uint64_t* request_id); int hv_vmbus_channel_recv_packet_raw( hv_vmbus_channel* channel, void* buffer, uint32_t buffer_len, uint32_t* buffer_actual_len, uint64_t* request_id); int hv_vmbus_channel_open( hv_vmbus_channel* channel, uint32_t send_ring_buffer_size, uint32_t recv_ring_buffer_size, void* user_data, uint32_t user_data_len, hv_vmbus_pfn_channel_callback pfn_on_channel_callback, void* context); void hv_vmbus_channel_close(hv_vmbus_channel *channel); int hv_vmbus_channel_send_packet( hv_vmbus_channel* channel, void* buffer, uint32_t buffer_len, uint64_t request_id, hv_vmbus_packet_type type, uint32_t flags); int hv_vmbus_channel_send_packet_pagebuffer( hv_vmbus_channel* channel, hv_vmbus_page_buffer page_buffers[], uint32_t page_count, void* buffer, uint32_t buffer_len, uint64_t request_id); int hv_vmbus_channel_send_packet_multipagebuffer( hv_vmbus_channel* channel, hv_vmbus_multipage_buffer* multi_page_buffer, void* buffer, uint32_t buffer_len, uint64_t request_id); int hv_vmbus_channel_establish_gpadl( hv_vmbus_channel* channel, /* must be phys and virt contiguous */ void* contig_buffer, /* page-size multiple */ uint32_t size, uint32_t* gpadl_handle); int hv_vmbus_channel_teardown_gpdal( hv_vmbus_channel* channel, uint32_t gpadl_handle); struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary); void vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu); void vmbus_channel_cpu_rr(struct hv_vmbus_channel *chan); struct hv_vmbus_channel ** vmbus_get_subchan(struct hv_vmbus_channel *pri_chan, int subchan_cnt); void vmbus_rel_subchan(struct hv_vmbus_channel **subchan, int subchan_cnt); void vmbus_drain_subchan(struct hv_vmbus_channel *pri_chan); /** * @brief Get physical address from virtual */ static inline unsigned long hv_get_phys_addr(void *virt) { unsigned long ret; ret = (vtophys(virt) | ((vm_offset_t) virt & PAGE_MASK)); return (ret); } static __inline struct hv_vmbus_channel * vmbus_get_channel(device_t dev) { return device_get_ivars(dev); } #endif /* __HYPERV_H__ */ Index: head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c =================================================================== --- head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c (revision 302801) +++ head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c (revision 302802) @@ -1,3043 +1,3043 @@ /*- * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2004-2006 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hv_net_vsc.h" #include "hv_rndis.h" #include "hv_rndis_filter.h" #include "vmbus_if.h" #define hv_chan_rxr hv_chan_priv1 #define hv_chan_txr hv_chan_priv2 /* Short for Hyper-V network interface */ #define NETVSC_DEVNAME "hn" /* * It looks like offset 0 of buf is reserved to hold the softc pointer. * The sc pointer evidently not needed, and is not presently populated. * The packet offset is where the netvsc_packet starts in the buffer. */ #define HV_NV_SC_PTR_OFFSET_IN_BUF 0 #define HV_NV_PACKET_OFFSET_IN_BUF 16 /* YYY should get it from the underlying channel */ #define HN_TX_DESC_CNT 512 #define HN_LROENT_CNT_DEF 128 #define HN_RING_CNT_DEF_MAX 8 #define HN_RNDIS_MSG_LEN \ (sizeof(rndis_msg) + \ RNDIS_HASHVAL_PPI_SIZE + \ RNDIS_VLAN_PPI_SIZE + \ RNDIS_TSO_PPI_SIZE + \ RNDIS_CSUM_PPI_SIZE) #define HN_RNDIS_MSG_BOUNDARY PAGE_SIZE #define HN_RNDIS_MSG_ALIGN CACHE_LINE_SIZE #define HN_TX_DATA_BOUNDARY PAGE_SIZE #define HN_TX_DATA_MAXSIZE IP_MAXPACKET #define HN_TX_DATA_SEGSIZE PAGE_SIZE #define HN_TX_DATA_SEGCNT_MAX \ (NETVSC_PACKET_MAXPAGE - HV_RF_NUM_TX_RESERVED_PAGE_BUFS) #define HN_DIRECT_TX_SIZE_DEF 128 #define HN_EARLY_TXEOF_THRESH 8 struct hn_txdesc { #ifndef HN_USE_TXDESC_BUFRING SLIST_ENTRY(hn_txdesc) link; #endif struct mbuf *m; struct hn_tx_ring *txr; int refs; uint32_t flags; /* HN_TXD_FLAG_ */ netvsc_packet netvsc_pkt; /* XXX to be removed */ bus_dmamap_t data_dmap; bus_addr_t rndis_msg_paddr; rndis_msg *rndis_msg; bus_dmamap_t rndis_msg_dmap; }; #define HN_TXD_FLAG_ONLIST 0x1 #define HN_TXD_FLAG_DMAMAP 0x2 /* * Only enable UDP checksum offloading when it is on 2012R2 or * later. UDP checksum offloading doesn't work on earlier * Windows releases. */ #define HN_CSUM_ASSIST_WIN8 (CSUM_IP | CSUM_TCP) #define HN_CSUM_ASSIST (CSUM_IP | CSUM_UDP | CSUM_TCP) #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) /* YYY 2*MTU is a bit rough, but should be good enough. */ #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) #define HN_LRO_ACKCNT_DEF 1 /* * Be aware that this sleepable mutex will exhibit WITNESS errors when * certain TCP and ARP code paths are taken. This appears to be a * well-known condition, as all other drivers checked use a sleeping * mutex to protect their transmit paths. * Also Be aware that mutexes do not play well with semaphores, and there * is a conflicting semaphore in a certain channel code path. */ #define NV_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF) #define NV_LOCK(_sc) mtx_lock(&(_sc)->hn_lock) #define NV_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->hn_lock, MA_OWNED) #define NV_UNLOCK(_sc) mtx_unlock(&(_sc)->hn_lock) #define NV_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->hn_lock) /* * Globals */ int hv_promisc_mode = 0; /* normal mode by default */ SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Hyper-V network interface"); /* Trust tcp segements verification on host side. */ static int hn_trust_hosttcp = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, &hn_trust_hosttcp, 0, "Trust tcp segement verification on host side, " "when csum info is missing (global setting)"); /* Trust udp datagrams verification on host side. */ static int hn_trust_hostudp = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, &hn_trust_hostudp, 0, "Trust udp datagram verification on host side, " "when csum info is missing (global setting)"); /* Trust ip packets verification on host side. */ static int hn_trust_hostip = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, &hn_trust_hostip, 0, "Trust ip packet verification on host side, " "when csum info is missing (global setting)"); #if __FreeBSD_version >= 1100045 /* Limit TSO burst size */ static int hn_tso_maxlen = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, &hn_tso_maxlen, 0, "TSO burst limit"); #endif /* Limit chimney send size */ static int hn_tx_chimney_size = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, &hn_tx_chimney_size, 0, "Chimney send packet size limit"); /* Limit the size of packet for direct transmission */ static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, &hn_lro_entry_count, 0, "LRO entry count"); #endif #endif static int hn_share_tx_taskq = 0; SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN, &hn_share_tx_taskq, 0, "Enable shared TX taskqueue"); static struct taskqueue *hn_tx_taskq; #ifndef HN_USE_TXDESC_BUFRING static int hn_use_txdesc_bufring = 0; #else static int hn_use_txdesc_bufring = 1; #endif SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); static int hn_bind_tx_taskq = -1; SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN, &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu"); static int hn_use_if_start = 0; SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, &hn_use_if_start, 0, "Use if_start TX method"); static int hn_chan_cnt = 0; SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, &hn_chan_cnt, 0, "# of channels to use; each channel has one RX ring and one TX ring"); static int hn_tx_ring_cnt = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, &hn_tx_ring_cnt, 0, "# of TX rings to use"); static int hn_tx_swq_depth = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); #if __FreeBSD_version >= 1100095 static u_int hn_lro_mbufq_depth = 0; SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); #endif static u_int hn_cpu_index; /* * Forward declarations */ static void hn_stop(hn_softc_t *sc); static void hn_ifinit_locked(hn_softc_t *sc); static void hn_ifinit(void *xsc); static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int hn_start_locked(struct hn_tx_ring *txr, int len); static void hn_start(struct ifnet *ifp); static void hn_start_txeof(struct hn_tx_ring *); static int hn_ifmedia_upd(struct ifnet *ifp); static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); #if __FreeBSD_version >= 1100099 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); #endif static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); static int hn_check_iplen(const struct mbuf *, int); static int hn_create_tx_ring(struct hn_softc *, int); static void hn_destroy_tx_ring(struct hn_tx_ring *); static int hn_create_tx_data(struct hn_softc *, int); static void hn_destroy_tx_data(struct hn_softc *); static void hn_start_taskfunc(void *, int); static void hn_start_txeof_taskfunc(void *, int); static void hn_stop_tx_tasks(struct hn_softc *); static int hn_encap(struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **); static void hn_create_rx_data(struct hn_softc *sc, int); static void hn_destroy_rx_data(struct hn_softc *sc); static void hn_set_tx_chimney_size(struct hn_softc *, int); static void hn_channel_attach(struct hn_softc *, struct hv_vmbus_channel *); static void hn_subchan_attach(struct hn_softc *, struct hv_vmbus_channel *); static void hn_subchan_setup(struct hn_softc *); static int hn_transmit(struct ifnet *, struct mbuf *); static void hn_xmit_qflush(struct ifnet *); static int hn_xmit(struct hn_tx_ring *, int); static void hn_xmit_txeof(struct hn_tx_ring *); static void hn_xmit_taskfunc(void *, int); static void hn_xmit_txeof_taskfunc(void *, int); #if __FreeBSD_version >= 1100099 static void hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) { int i; for (i = 0; i < sc->hn_rx_ring_inuse; ++i) sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; } #endif static int hn_get_txswq_depth(const struct hn_tx_ring *txr) { KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); if (hn_tx_swq_depth < txr->hn_txdesc_cnt) return txr->hn_txdesc_cnt; return hn_tx_swq_depth; } static int hn_ifmedia_upd(struct ifnet *ifp __unused) { return EOPNOTSUPP; } static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct hn_softc *sc = ifp->if_softc; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!sc->hn_carrier) { ifmr->ifm_active |= IFM_NONE; return; } ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_10G_T | IFM_FDX; } /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ -static const hv_guid g_net_vsc_device_type = { - .data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, +static const struct hyperv_guid g_net_vsc_device_type = { + .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} }; /* * Standard probe entry point. * */ static int netvsc_probe(device_t dev) { if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &g_net_vsc_device_type) == 0) { device_set_desc(dev, "Hyper-V Network Interface"); return BUS_PROBE_DEFAULT; } return ENXIO; } /* * Standard attach entry point. * * Called when the driver is loaded. It allocates needed resources, * and initializes the "hardware" and software. */ static int netvsc_attach(device_t dev) { netvsc_device_info device_info; hn_softc_t *sc; int unit = device_get_unit(dev); struct ifnet *ifp = NULL; int error, ring_cnt, tx_ring_cnt; #if __FreeBSD_version >= 1100045 int tso_maxlen; #endif sc = device_get_softc(dev); sc->hn_unit = unit; sc->hn_dev = dev; sc->hn_prichan = vmbus_get_channel(dev); if (hn_tx_taskq == NULL) { sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, taskqueue_thread_enqueue, &sc->hn_tx_taskq); if (hn_bind_tx_taskq >= 0) { int cpu = hn_bind_tx_taskq; cpuset_t cpu_set; if (cpu > mp_ncpus - 1) cpu = mp_ncpus - 1; CPU_SETOF(cpu, &cpu_set); taskqueue_start_threads_cpuset(&sc->hn_tx_taskq, 1, PI_NET, &cpu_set, "%s tx", device_get_nameunit(dev)); } else { taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx", device_get_nameunit(dev)); } } else { sc->hn_tx_taskq = hn_tx_taskq; } NV_LOCK_INIT(sc, "NetVSCLock"); ifp = sc->hn_ifp = if_alloc(IFT_ETHER); ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); /* * Figure out the # of RX rings (ring_cnt) and the # of TX rings * to use (tx_ring_cnt). * * NOTE: * The # of RX rings to use is same as the # of channels to use. */ ring_cnt = hn_chan_cnt; if (ring_cnt <= 0) { /* Default */ ring_cnt = mp_ncpus; if (ring_cnt > HN_RING_CNT_DEF_MAX) ring_cnt = HN_RING_CNT_DEF_MAX; } else if (ring_cnt > mp_ncpus) { ring_cnt = mp_ncpus; } tx_ring_cnt = hn_tx_ring_cnt; if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) tx_ring_cnt = ring_cnt; if (hn_use_if_start) { /* ifnet.if_start only needs one TX ring. */ tx_ring_cnt = 1; } /* * Set the leader CPU for channels. */ sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; error = hn_create_tx_data(sc, tx_ring_cnt); if (error) goto failed; hn_create_rx_data(sc, ring_cnt); /* * Associate the first TX/RX ring w/ the primary channel. */ hn_channel_attach(sc, sc->hn_prichan); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = hn_ioctl; ifp->if_init = hn_ifinit; /* needed by hv_rf_on_device_add() code */ ifp->if_mtu = ETHERMTU; if (hn_use_if_start) { int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); ifp->if_start = hn_start; IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); ifp->if_snd.ifq_drv_maxlen = qdepth - 1; IFQ_SET_READY(&ifp->if_snd); } else { ifp->if_transmit = hn_transmit; ifp->if_qflush = hn_xmit_qflush; } ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); /* XXX ifmedia_set really should do this for us */ sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; /* * Tell upper layers that we support full VLAN capability. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | IFCAP_LRO; ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | IFCAP_LRO; ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO; error = hv_rf_on_device_add(sc, &device_info, ring_cnt); if (error) goto failed; KASSERT(sc->net_dev->num_channel > 0 && sc->net_dev->num_channel <= sc->hn_rx_ring_inuse, ("invalid channel count %u, should be less than %d", sc->net_dev->num_channel, sc->hn_rx_ring_inuse)); /* * Set the # of TX/RX rings that could be used according to * the # of channels that host offered. */ if (sc->hn_tx_ring_inuse > sc->net_dev->num_channel) sc->hn_tx_ring_inuse = sc->net_dev->num_channel; sc->hn_rx_ring_inuse = sc->net_dev->num_channel; device_printf(dev, "%d TX ring, %d RX ring\n", sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); if (sc->net_dev->num_channel > 1) hn_subchan_setup(sc); #if __FreeBSD_version >= 1100099 if (sc->hn_rx_ring_inuse > 1) { /* * Reduce TCP segment aggregation limit for multiple * RX rings to increase ACK timeliness. */ hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); } #endif if (device_info.link_state == 0) { sc->hn_carrier = 1; } #if __FreeBSD_version >= 1100045 tso_maxlen = hn_tso_maxlen; if (tso_maxlen <= 0 || tso_maxlen > IP_MAXPACKET) tso_maxlen = IP_MAXPACKET; ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; ifp->if_hw_tsomaxsegsize = PAGE_SIZE; ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); #endif ether_ifattach(ifp, device_info.mac_addr); #if __FreeBSD_version >= 1100045 if_printf(ifp, "TSO: %u/%u/%u\n", ifp->if_hw_tsomax, ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); #endif sc->hn_tx_chimney_max = sc->net_dev->send_section_size; hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max); if (hn_tx_chimney_size > 0 && hn_tx_chimney_size < sc->hn_tx_chimney_max) hn_set_tx_chimney_size(sc, hn_tx_chimney_size); return (0); failed: hn_destroy_tx_data(sc); if (ifp != NULL) if_free(ifp); return (error); } /* * Standard detach entry point */ static int netvsc_detach(device_t dev) { struct hn_softc *sc = device_get_softc(dev); if (bootverbose) printf("netvsc_detach\n"); /* * XXXKYS: Need to clean up all our * driver state; this is the driver * unloading. */ /* * XXXKYS: Need to stop outgoing traffic and unregister * the netdevice. */ hv_rf_on_device_remove(sc, HV_RF_NV_DESTROY_CHANNEL); hn_stop_tx_tasks(sc); ifmedia_removeall(&sc->hn_media); hn_destroy_rx_data(sc); hn_destroy_tx_data(sc); if (sc->hn_tx_taskq != hn_tx_taskq) taskqueue_free(sc->hn_tx_taskq); return (0); } /* * Standard shutdown entry point */ static int netvsc_shutdown(device_t dev) { return (0); } static __inline int hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) { struct mbuf *m = *m_head; int error; error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { struct mbuf *m_new; m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); if (m_new == NULL) return ENOBUFS; else *m_head = m = m_new; txr->hn_tx_collapsed++; error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); } if (!error) { bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, BUS_DMASYNC_PREWRITE); txd->flags |= HN_TXD_FLAG_DMAMAP; } return error; } static __inline void hn_txdesc_dmamap_unload(struct hn_tx_ring *txr, struct hn_txdesc *txd) { if (txd->flags & HN_TXD_FLAG_DMAMAP) { bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->hn_tx_data_dtag, txd->data_dmap); txd->flags &= ~HN_TXD_FLAG_DMAMAP; } } static __inline int hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) { KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, ("put an onlist txd %#x", txd->flags)); KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); if (atomic_fetchadd_int(&txd->refs, -1) != 1) return 0; hn_txdesc_dmamap_unload(txr, txd); if (txd->m != NULL) { m_freem(txd->m); txd->m = NULL; } txd->flags |= HN_TXD_FLAG_ONLIST; #ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); KASSERT(txr->hn_txdesc_avail >= 0 && txr->hn_txdesc_avail < txr->hn_txdesc_cnt, ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); txr->hn_txdesc_avail++; SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); mtx_unlock_spin(&txr->hn_txlist_spin); #else atomic_add_int(&txr->hn_txdesc_avail, 1); buf_ring_enqueue(txr->hn_txdesc_br, txd); #endif return 1; } static __inline struct hn_txdesc * hn_txdesc_get(struct hn_tx_ring *txr) { struct hn_txdesc *txd; #ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); txd = SLIST_FIRST(&txr->hn_txlist); if (txd != NULL) { KASSERT(txr->hn_txdesc_avail > 0, ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); txr->hn_txdesc_avail--; SLIST_REMOVE_HEAD(&txr->hn_txlist, link); } mtx_unlock_spin(&txr->hn_txlist_spin); #else txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); #endif if (txd != NULL) { #ifdef HN_USE_TXDESC_BUFRING atomic_subtract_int(&txr->hn_txdesc_avail, 1); #endif KASSERT(txd->m == NULL && txd->refs == 0 && (txd->flags & HN_TXD_FLAG_ONLIST), ("invalid txd")); txd->flags &= ~HN_TXD_FLAG_ONLIST; txd->refs = 1; } return txd; } static __inline void hn_txdesc_hold(struct hn_txdesc *txd) { /* 0->1 transition will never work */ KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs)); atomic_add_int(&txd->refs, 1); } static __inline void hn_txeof(struct hn_tx_ring *txr) { txr->hn_has_txeof = 0; txr->hn_txeof(txr); } static void hn_tx_done(struct hv_vmbus_channel *chan, void *xpkt) { netvsc_packet *packet = xpkt; struct hn_txdesc *txd; struct hn_tx_ring *txr; txd = (struct hn_txdesc *)(uintptr_t) packet->compl.send.send_completion_tid; txr = txd->txr; KASSERT(txr->hn_chan == chan, ("channel mismatch, on channel%u, should be channel%u", chan->ch_subidx, txr->hn_chan->ch_subidx)); txr->hn_has_txeof = 1; hn_txdesc_put(txr, txd); ++txr->hn_txdone_cnt; if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { txr->hn_txdone_cnt = 0; if (txr->hn_oactive) hn_txeof(txr); } } void netvsc_channel_rollup(struct hv_vmbus_channel *chan) { struct hn_tx_ring *txr = chan->hv_chan_txr; #if defined(INET) || defined(INET6) struct hn_rx_ring *rxr = chan->hv_chan_rxr; tcp_lro_flush_all(&rxr->hn_lro); #endif /* * NOTE: * 'txr' could be NULL, if multiple channels and * ifnet.if_start method are enabled. */ if (txr == NULL || !txr->hn_has_txeof) return; txr->hn_txdone_cnt = 0; hn_txeof(txr); } /* * NOTE: * If this function fails, then both txd and m_head0 will be freed. */ static int hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) { bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; int error, nsegs, i; struct mbuf *m_head = *m_head0; netvsc_packet *packet; rndis_msg *rndis_mesg; rndis_packet *rndis_pkt; rndis_per_packet_info *rppi; struct rndis_hash_value *hash_value; uint32_t rndis_msg_size; packet = &txd->netvsc_pkt; packet->is_data_pkt = TRUE; packet->tot_data_buf_len = m_head->m_pkthdr.len; /* * extension points to the area reserved for the * rndis_filter_packet, which is placed just after * the netvsc_packet (and rppi struct, if present; * length is updated later). */ rndis_mesg = txd->rndis_msg; /* XXX not necessary */ memset(rndis_mesg, 0, HN_RNDIS_MSG_LEN); rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG; rndis_pkt = &rndis_mesg->msg.packet; rndis_pkt->data_offset = sizeof(rndis_packet); rndis_pkt->data_length = packet->tot_data_buf_len; rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet); rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet); /* * Set the hash value for this packet, so that the host could * dispatch the TX done event for this packet back to this TX * ring's channel. */ rndis_msg_size += RNDIS_HASHVAL_PPI_SIZE; rppi = hv_set_rppi_data(rndis_mesg, RNDIS_HASHVAL_PPI_SIZE, nbl_hash_value); hash_value = (struct rndis_hash_value *)((uint8_t *)rppi + rppi->per_packet_info_offset); hash_value->hash_value = txr->hn_tx_idx; if (m_head->m_flags & M_VLANTAG) { ndis_8021q_info *rppi_vlan_info; rndis_msg_size += RNDIS_VLAN_PPI_SIZE; rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE, ieee_8021q_info); rppi_vlan_info = (ndis_8021q_info *)((uint8_t *)rppi + rppi->per_packet_info_offset); rppi_vlan_info->u1.s1.vlan_id = m_head->m_pkthdr.ether_vtag & 0xfff; } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { rndis_tcp_tso_info *tso_info; struct ether_vlan_header *eh; int ether_len; /* * XXX need m_pullup and use mtodo */ eh = mtod(m_head, struct ether_vlan_header*); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; else ether_len = ETHER_HDR_LEN; rndis_msg_size += RNDIS_TSO_PPI_SIZE; rppi = hv_set_rppi_data(rndis_mesg, RNDIS_TSO_PPI_SIZE, tcp_large_send_info); tso_info = (rndis_tcp_tso_info *)((uint8_t *)rppi + rppi->per_packet_info_offset); tso_info->lso_v2_xmit.type = RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; #ifdef INET if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { struct ip *ip = (struct ip *)(m_head->m_data + ether_len); unsigned long iph_len = ip->ip_hl << 2; struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iph_len); tso_info->lso_v2_xmit.ip_version = RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; ip->ip_len = 0; ip->ip_sum = 0; th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET6 { struct ip6_hdr *ip6 = (struct ip6_hdr *) (m_head->m_data + ether_len); struct tcphdr *th = (struct tcphdr *)(ip6 + 1); tso_info->lso_v2_xmit.ip_version = RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; ip6->ip6_plen = 0; th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); } #endif tso_info->lso_v2_xmit.tcp_header_offset = 0; tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz; } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { rndis_tcp_ip_csum_info *csum_info; rndis_msg_size += RNDIS_CSUM_PPI_SIZE; rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE, tcpip_chksum_info); csum_info = (rndis_tcp_ip_csum_info *)((uint8_t *)rppi + rppi->per_packet_info_offset); csum_info->xmit.is_ipv4 = 1; if (m_head->m_pkthdr.csum_flags & CSUM_IP) csum_info->xmit.ip_header_csum = 1; if (m_head->m_pkthdr.csum_flags & CSUM_TCP) { csum_info->xmit.tcp_csum = 1; csum_info->xmit.tcp_header_offset = 0; } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) { csum_info->xmit.udp_csum = 1; } } rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size; packet->tot_data_buf_len = rndis_mesg->msg_len; /* * Chimney send, if the packet could fit into one chimney buffer. */ if (packet->tot_data_buf_len < txr->hn_tx_chimney_size) { netvsc_dev *net_dev = txr->hn_sc->net_dev; uint32_t send_buf_section_idx; txr->hn_tx_chimney_tried++; send_buf_section_idx = hv_nv_get_next_send_section(net_dev); if (send_buf_section_idx != NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { uint8_t *dest = ((uint8_t *)net_dev->send_buf + (send_buf_section_idx * net_dev->send_section_size)); memcpy(dest, rndis_mesg, rndis_msg_size); dest += rndis_msg_size; m_copydata(m_head, 0, m_head->m_pkthdr.len, dest); packet->send_buf_section_idx = send_buf_section_idx; packet->send_buf_section_size = packet->tot_data_buf_len; packet->page_buf_count = 0; txr->hn_tx_chimney++; goto done; } } error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); if (error) { int freed; /* * This mbuf is not linked w/ the txd yet, so free it now. */ m_freem(m_head); *m_head0 = NULL; freed = hn_txdesc_put(txr, txd); KASSERT(freed != 0, ("fail to free txd upon txdma error")); txr->hn_txdma_failed++; if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1); return error; } *m_head0 = m_head; packet->page_buf_count = nsegs + HV_RF_NUM_TX_RESERVED_PAGE_BUFS; /* send packet with page buffer */ packet->page_buffers[0].pfn = atop(txd->rndis_msg_paddr); packet->page_buffers[0].offset = txd->rndis_msg_paddr & PAGE_MASK; packet->page_buffers[0].length = rndis_msg_size; /* * Fill the page buffers with mbuf info starting at index * HV_RF_NUM_TX_RESERVED_PAGE_BUFS. */ for (i = 0; i < nsegs; ++i) { hv_vmbus_page_buffer *pb = &packet->page_buffers[ i + HV_RF_NUM_TX_RESERVED_PAGE_BUFS]; pb->pfn = atop(segs[i].ds_addr); pb->offset = segs[i].ds_addr & PAGE_MASK; pb->length = segs[i].ds_len; } packet->send_buf_section_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; packet->send_buf_section_size = 0; done: txd->m = m_head; /* Set the completion routine */ packet->compl.send.on_send_completion = hn_tx_done; packet->compl.send.send_completion_context = packet; packet->compl.send.send_completion_tid = (uint64_t)(uintptr_t)txd; return 0; } /* * NOTE: * If this function fails, then txd will be freed, but the mbuf * associated w/ the txd will _not_ be freed. */ static int hn_send_pkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) { int error, send_failed = 0; again: /* * Make sure that txd is not freed before ETHER_BPF_MTAP. */ hn_txdesc_hold(txd); error = hv_nv_on_send(txr->hn_chan, &txd->netvsc_pkt); if (!error) { ETHER_BPF_MTAP(ifp, txd->m); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if (!hn_use_if_start) { if_inc_counter(ifp, IFCOUNTER_OBYTES, txd->m->m_pkthdr.len); if (txd->m->m_flags & M_MCAST) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); } txr->hn_pkts++; } hn_txdesc_put(txr, txd); if (__predict_false(error)) { int freed; /* * This should "really rarely" happen. * * XXX Too many RX to be acked or too many sideband * commands to run? Ask netvsc_channel_rollup() * to kick start later. */ txr->hn_has_txeof = 1; if (!send_failed) { txr->hn_send_failed++; send_failed = 1; /* * Try sending again after set hn_has_txeof; * in case that we missed the last * netvsc_channel_rollup(). */ goto again; } if_printf(ifp, "send failed\n"); /* * Caller will perform further processing on the * associated mbuf, so don't free it in hn_txdesc_put(); * only unload it from the DMA map in hn_txdesc_put(), * if it was loaded. */ txd->m = NULL; freed = hn_txdesc_put(txr, txd); KASSERT(freed != 0, ("fail to free txd upon send error")); txr->hn_send_failed++; } return error; } /* * Start a transmit of one or more packets */ static int hn_start_locked(struct hn_tx_ring *txr, int len) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; KASSERT(hn_use_if_start, ("hn_start_locked is called, when if_start is disabled")); KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); mtx_assert(&txr->hn_tx_lock, MA_OWNED); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return 0; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { struct hn_txdesc *txd; struct mbuf *m_head; int error; IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (len > 0 && m_head->m_pkthdr.len > len) { /* * This sending could be time consuming; let callers * dispatch this packet sending (and sending of any * following up packets) to tx taskqueue. */ IFQ_DRV_PREPEND(&ifp->if_snd, m_head); return 1; } txd = hn_txdesc_get(txr); if (txd == NULL) { txr->hn_no_txdescs++; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } error = hn_encap(txr, txd, &m_head); if (error) { /* Both txd and m_head are freed */ continue; } error = hn_send_pkt(ifp, txr, txd); if (__predict_false(error)) { /* txd is freed, but m_head is not */ IFQ_DRV_PREPEND(&ifp->if_snd, m_head); atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } } return 0; } /* * Link up/down notification */ void netvsc_linkstatus_callback(struct hn_softc *sc, uint32_t status) { if (status == 1) { sc->hn_carrier = 1; } else { sc->hn_carrier = 0; } } /* * Append the specified data to the indicated mbuf chain, * Extend the mbuf chain if the new data does not fit in * existing space. * * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. * There should be an equivalent in the kernel mbuf code, * but there does not appear to be one yet. * * Differs from m_append() in that additional mbufs are * allocated with cluster size MJUMPAGESIZE, and filled * accordingly. * * Return 1 if able to complete the job; otherwise 0. */ static int hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) { struct mbuf *m, *n; int remainder, space; for (m = m0; m->m_next != NULL; m = m->m_next) ; remainder = len; space = M_TRAILINGSPACE(m); if (space > 0) { /* * Copy into available space. */ if (space > remainder) space = remainder; bcopy(cp, mtod(m, caddr_t) + m->m_len, space); m->m_len += space; cp += space; remainder -= space; } while (remainder > 0) { /* * Allocate a new mbuf; could check space * and allocate a cluster instead. */ n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); if (n == NULL) break; n->m_len = min(MJUMPAGESIZE, remainder); bcopy(cp, mtod(n, caddr_t), n->m_len); cp += n->m_len; remainder -= n->m_len; m->m_next = n; m = n; } if (m0->m_flags & M_PKTHDR) m0->m_pkthdr.len += len - remainder; return (remainder == 0); } #if defined(INET) || defined(INET6) static __inline int hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) { #if __FreeBSD_version >= 1100095 if (hn_lro_mbufq_depth) { tcp_lro_queue_mbuf(lc, m); return 0; } #endif return tcp_lro_rx(lc, m, 0); } #endif /* * Called when we receive a data packet from the "wire" on the * specified device * * Note: This is no longer used as a callback */ int netvsc_recv(struct hv_vmbus_channel *chan, netvsc_packet *packet, const rndis_tcp_ip_csum_info *csum_info, const struct rndis_hash_info *hash_info, const struct rndis_hash_value *hash_value) { struct hn_rx_ring *rxr = chan->hv_chan_rxr; struct ifnet *ifp = rxr->hn_ifp; struct mbuf *m_new; int size, do_lro = 0, do_csum = 1; int hash_type = M_HASHTYPE_OPAQUE_HASH; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return (0); /* * Bail out if packet contains more data than configured MTU. */ if (packet->tot_data_buf_len > (ifp->if_mtu + ETHER_HDR_LEN)) { return (0); } else if (packet->tot_data_buf_len <= MHLEN) { m_new = m_gethdr(M_NOWAIT, MT_DATA); if (m_new == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); return (0); } memcpy(mtod(m_new, void *), packet->data, packet->tot_data_buf_len); m_new->m_pkthdr.len = m_new->m_len = packet->tot_data_buf_len; rxr->hn_small_pkts++; } else { /* * Get an mbuf with a cluster. For packets 2K or less, * get a standard 2K cluster. For anything larger, get a * 4K cluster. Any buffers larger than 4K can cause problems * if looped around to the Hyper-V TX channel, so avoid them. */ size = MCLBYTES; if (packet->tot_data_buf_len > MCLBYTES) { /* 4096 */ size = MJUMPAGESIZE; } m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); if (m_new == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); return (0); } hv_m_append(m_new, packet->tot_data_buf_len, packet->data); } m_new->m_pkthdr.rcvif = ifp; if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0)) do_csum = 0; /* receive side checksum offload */ if (csum_info != NULL) { /* IP csum offload */ if (csum_info->receive.ip_csum_succeeded && do_csum) { m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); rxr->hn_csum_ip++; } /* TCP/UDP csum offload */ if ((csum_info->receive.tcp_csum_succeeded || csum_info->receive.udp_csum_succeeded) && do_csum) { m_new->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; if (csum_info->receive.tcp_csum_succeeded) rxr->hn_csum_tcp++; else rxr->hn_csum_udp++; } if (csum_info->receive.ip_csum_succeeded && csum_info->receive.tcp_csum_succeeded) do_lro = 1; } else { const struct ether_header *eh; uint16_t etype; int hoff; hoff = sizeof(*eh); if (m_new->m_len < hoff) goto skip; eh = mtod(m_new, struct ether_header *); etype = ntohs(eh->ether_type); if (etype == ETHERTYPE_VLAN) { const struct ether_vlan_header *evl; hoff = sizeof(*evl); if (m_new->m_len < hoff) goto skip; evl = mtod(m_new, struct ether_vlan_header *); etype = ntohs(evl->evl_proto); } if (etype == ETHERTYPE_IP) { int pr; pr = hn_check_iplen(m_new, hoff); if (pr == IPPROTO_TCP) { if (do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_TCP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; } do_lro = 1; } else if (pr == IPPROTO_UDP) { if (do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_UDP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; } } else if (pr != IPPROTO_DONE && do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); } } } skip: if ((packet->vlan_tci != 0) && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) { m_new->m_pkthdr.ether_vtag = packet->vlan_tci; m_new->m_flags |= M_VLANTAG; } if (hash_info != NULL && hash_value != NULL) { rxr->hn_rss_pkts++; m_new->m_pkthdr.flowid = hash_value->hash_value; if ((hash_info->hash_info & NDIS_HASH_FUNCTION_MASK) == NDIS_HASH_FUNCTION_TOEPLITZ) { uint32_t type = (hash_info->hash_info & NDIS_HASH_TYPE_MASK); switch (type) { case NDIS_HASH_IPV4: hash_type = M_HASHTYPE_RSS_IPV4; break; case NDIS_HASH_TCP_IPV4: hash_type = M_HASHTYPE_RSS_TCP_IPV4; break; case NDIS_HASH_IPV6: hash_type = M_HASHTYPE_RSS_IPV6; break; case NDIS_HASH_IPV6_EX: hash_type = M_HASHTYPE_RSS_IPV6_EX; break; case NDIS_HASH_TCP_IPV6: hash_type = M_HASHTYPE_RSS_TCP_IPV6; break; case NDIS_HASH_TCP_IPV6_EX: hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; break; } } } else { if (hash_value != NULL) { m_new->m_pkthdr.flowid = hash_value->hash_value; } else { m_new->m_pkthdr.flowid = rxr->hn_rx_idx; hash_type = M_HASHTYPE_OPAQUE; } } M_HASHTYPE_SET(m_new, hash_type); /* * Note: Moved RX completion back to hv_nv_on_receive() so all * messages (not just data messages) will trigger a response. */ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); rxr->hn_pkts++; if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { #if defined(INET) || defined(INET6) struct lro_ctrl *lro = &rxr->hn_lro; if (lro->lro_cnt) { rxr->hn_lro_tried++; if (hn_lro_rx(lro, m_new) == 0) { /* DONE! */ return 0; } } #endif } /* We're not holding the lock here, so don't release it */ (*ifp->if_input)(ifp, m_new); return (0); } /* * Rules for using sc->temp_unusable: * 1. sc->temp_unusable can only be read or written while holding NV_LOCK() * 2. code reading sc->temp_unusable under NV_LOCK(), and finding * sc->temp_unusable set, must release NV_LOCK() and exit * 3. to retain exclusive control of the interface, * sc->temp_unusable must be set by code before releasing NV_LOCK() * 4. only code setting sc->temp_unusable can clear sc->temp_unusable * 5. code setting sc->temp_unusable must eventually clear sc->temp_unusable */ /* * Standard ioctl entry point. Called when the user wants to configure * the interface. */ static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { hn_softc_t *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; #ifdef INET struct ifaddr *ifa = (struct ifaddr *)data; #endif netvsc_device_info device_info; int mask, error = 0; int retry_cnt = 500; switch(cmd) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) hn_ifinit(sc); arp_ifinit(ifp, ifa); } else #endif error = ether_ioctl(ifp, cmd, data); break; case SIOCSIFMTU: /* Check MTU value change */ if (ifp->if_mtu == ifr->ifr_mtu) break; if (ifr->ifr_mtu > NETVSC_MAX_CONFIGURABLE_MTU) { error = EINVAL; break; } /* Obtain and record requested MTU */ ifp->if_mtu = ifr->ifr_mtu; #if __FreeBSD_version >= 1100099 /* * Make sure that LRO aggregation length limit is still * valid, after the MTU change. */ NV_LOCK(sc); if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); NV_UNLOCK(sc); #endif do { NV_LOCK(sc); if (!sc->temp_unusable) { sc->temp_unusable = TRUE; retry_cnt = -1; } NV_UNLOCK(sc); if (retry_cnt > 0) { retry_cnt--; DELAY(5 * 1000); } } while (retry_cnt > 0); if (retry_cnt == 0) { error = EINVAL; break; } /* We must remove and add back the device to cause the new * MTU to take effect. This includes tearing down, but not * deleting the channel, then bringing it back up. */ error = hv_rf_on_device_remove(sc, HV_RF_NV_RETAIN_CHANNEL); if (error) { NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); break; } /* Wait for subchannels to be destroyed */ vmbus_drain_subchan(sc->hn_prichan); error = hv_rf_on_device_add(sc, &device_info, sc->hn_rx_ring_inuse); if (error) { NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); break; } KASSERT(sc->hn_rx_ring_cnt == sc->net_dev->num_channel, ("RX ring count %d and channel count %u mismatch", sc->hn_rx_ring_cnt, sc->net_dev->num_channel)); if (sc->net_dev->num_channel > 1) { int r; /* * Skip the rings on primary channel; they are * handled by the hv_rf_on_device_add() above. */ for (r = 1; r < sc->hn_rx_ring_cnt; ++r) { sc->hn_rx_ring[r].hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; } for (r = 1; r < sc->hn_tx_ring_cnt; ++r) { sc->hn_tx_ring[r].hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; } hn_subchan_setup(sc); } sc->hn_tx_chimney_max = sc->net_dev->send_section_size; if (sc->hn_tx_ring[0].hn_tx_chimney_size > sc->hn_tx_chimney_max) hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max); hn_ifinit_locked(sc); NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); break; case SIOCSIFFLAGS: do { NV_LOCK(sc); if (!sc->temp_unusable) { sc->temp_unusable = TRUE; retry_cnt = -1; } NV_UNLOCK(sc); if (retry_cnt > 0) { retry_cnt--; DELAY(5 * 1000); } } while (retry_cnt > 0); if (retry_cnt == 0) { error = EINVAL; break; } if (ifp->if_flags & IFF_UP) { /* * If only the state of the PROMISC flag changed, * then just use the 'set promisc mode' command * instead of reinitializing the entire NIC. Doing * a full re-init means reloading the firmware and * waiting for it to start up, which may take a * second or two. */ #ifdef notyet /* Fixme: Promiscuous mode? */ if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_flags & IFF_PROMISC && !(sc->hn_if_flags & IFF_PROMISC)) { /* do something here for Hyper-V */ } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && !(ifp->if_flags & IFF_PROMISC) && sc->hn_if_flags & IFF_PROMISC) { /* do something here for Hyper-V */ } else #endif hn_ifinit_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { hn_stop(sc); } } NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); sc->hn_if_flags = ifp->if_flags; error = 0; break; case SIOCSIFCAP: NV_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= sc->hn_tx_ring[0].hn_csum_assist; } else { ifp->if_hwassist &= ~sc->hn_tx_ring[0].hn_csum_assist; } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_IP_TSO; else ifp->if_hwassist &= ~CSUM_IP_TSO; } if (mask & IFCAP_TSO6) { ifp->if_capenable ^= IFCAP_TSO6; if (ifp->if_capenable & IFCAP_TSO6) ifp->if_hwassist |= CSUM_IP6_TSO; else ifp->if_hwassist &= ~CSUM_IP6_TSO; } NV_UNLOCK(sc); error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: #ifdef notyet /* Fixme: Multicast mode? */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) { NV_LOCK(sc); netvsc_setmulti(sc); NV_UNLOCK(sc); error = 0; } #endif error = EINVAL; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } /* * */ static void hn_stop(hn_softc_t *sc) { struct ifnet *ifp; int ret, i; ifp = sc->hn_ifp; if (bootverbose) printf(" Closing Device ...\n"); atomic_clear_int(&ifp->if_drv_flags, (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_oactive = 0; if_link_state_change(ifp, LINK_STATE_DOWN); sc->hn_initdone = 0; ret = hv_rf_on_close(sc); } /* * FreeBSD transmit entry point */ static void hn_start(struct ifnet *ifp) { struct hn_softc *sc = ifp->if_softc; struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; sched = hn_start_locked(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (!sched) return; } do_sched: taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } static void hn_start_txeof(struct hn_tx_ring *txr) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); sched = hn_start_locked(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (sched) { taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } } else { do_sched: /* * Release the OACTIVE earlier, with the hope, that * others could catch up. The task will clear the * flag again with the hn_tx_lock to avoid possible * races. */ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } /* * */ static void hn_ifinit_locked(hn_softc_t *sc) { struct ifnet *ifp; int ret, i; ifp = sc->hn_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { return; } hv_promisc_mode = 1; ret = hv_rf_on_open(sc); if (ret != 0) { return; } else { sc->hn_initdone = 1; } atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_oactive = 0; atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); if_link_state_change(ifp, LINK_STATE_UP); } /* * */ static void hn_ifinit(void *xsc) { hn_softc_t *sc = xsc; NV_LOCK(sc); if (sc->temp_unusable) { NV_UNLOCK(sc); return; } sc->temp_unusable = TRUE; NV_UNLOCK(sc); hn_ifinit_locked(sc); NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); } #ifdef LATER /* * */ static void hn_watchdog(struct ifnet *ifp) { hn_softc_t *sc; sc = ifp->if_softc; printf("hn%d: watchdog timeout -- resetting\n", sc->hn_unit); hn_ifinit(sc); /*???*/ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } #endif #if __FreeBSD_version >= 1100099 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; unsigned int lenlim; int error; lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; error = sysctl_handle_int(oidp, &lenlim, 0, req); if (error || req->newptr == NULL) return error; if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || lenlim > TCP_LRO_LENGTH_MAX) return EINVAL; NV_LOCK(sc); hn_set_lro_lenlim(sc, lenlim); NV_UNLOCK(sc); return 0; } static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ackcnt, error, i; /* * lro_ackcnt_lim is append count limit, * +1 to turn it into aggregation limit. */ ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; error = sysctl_handle_int(oidp, &ackcnt, 0, req); if (error || req->newptr == NULL) return error; if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) return EINVAL; /* * Convert aggregation limit back to append * count limit. */ --ackcnt; NV_LOCK(sc); for (i = 0; i < sc->hn_rx_ring_inuse; ++i) sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; NV_UNLOCK(sc); return 0; } #endif static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int hcsum = arg2; int on, error, i; on = 0; if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) on = 1; error = sysctl_handle_int(oidp, &on, 0, req); if (error || req->newptr == NULL) return error; NV_LOCK(sc); for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; if (on) rxr->hn_trust_hcsum |= hcsum; else rxr->hn_trust_hcsum &= ~hcsum; } NV_UNLOCK(sc); return 0; } static int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int chimney_size, error; chimney_size = sc->hn_tx_ring[0].hn_tx_chimney_size; error = sysctl_handle_int(oidp, &chimney_size, 0, req); if (error || req->newptr == NULL) return error; if (chimney_size > sc->hn_tx_chimney_max || chimney_size <= 0) return EINVAL; hn_set_tx_chimney_size(sc, chimney_size); return 0; } static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_rx_ring *rxr; u_long stat; stat = 0; for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; stat += *((u_long *)((uint8_t *)rxr + ofs)); } error = sysctl_handle_long(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; *((u_long *)((uint8_t *)rxr + ofs)) = 0; } return 0; } static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_rx_ring *rxr; uint64_t stat; stat = 0; for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; stat += *((uint64_t *)((uint8_t *)rxr + ofs)); } error = sysctl_handle_64(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; } return 0; } static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_tx_ring *txr; u_long stat; stat = 0; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; stat += *((u_long *)((uint8_t *)txr + ofs)); } error = sysctl_handle_long(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; *((u_long *)((uint8_t *)txr + ofs)) = 0; } return 0; } static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error, conf; struct hn_tx_ring *txr; txr = &sc->hn_tx_ring[0]; conf = *((int *)((uint8_t *)txr + ofs)); error = sysctl_handle_int(oidp, &conf, 0, req); if (error || req->newptr == NULL) return error; NV_LOCK(sc); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; *((int *)((uint8_t *)txr + ofs)) = conf; } NV_UNLOCK(sc); return 0; } static int hn_check_iplen(const struct mbuf *m, int hoff) { const struct ip *ip; int len, iphlen, iplen; const struct tcphdr *th; int thoff; /* TCP data offset */ len = hoff + sizeof(struct ip); /* The packet must be at least the size of an IP header. */ if (m->m_pkthdr.len < len) return IPPROTO_DONE; /* The fixed IP header must reside completely in the first mbuf. */ if (m->m_len < len) return IPPROTO_DONE; ip = mtodo(m, hoff); /* Bound check the packet's stated IP header length. */ iphlen = ip->ip_hl << 2; if (iphlen < sizeof(struct ip)) /* minimum header length */ return IPPROTO_DONE; /* The full IP header must reside completely in the one mbuf. */ if (m->m_len < hoff + iphlen) return IPPROTO_DONE; iplen = ntohs(ip->ip_len); /* * Check that the amount of data in the buffers is as * at least much as the IP header would have us expect. */ if (m->m_pkthdr.len < hoff + iplen) return IPPROTO_DONE; /* * Ignore IP fragments. */ if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) return IPPROTO_DONE; /* * The TCP/IP or UDP/IP header must be entirely contained within * the first fragment of a packet. */ switch (ip->ip_p) { case IPPROTO_TCP: if (iplen < iphlen + sizeof(struct tcphdr)) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) return IPPROTO_DONE; th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); thoff = th->th_off << 2; if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + thoff) return IPPROTO_DONE; break; case IPPROTO_UDP: if (iplen < iphlen + sizeof(struct udphdr)) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) return IPPROTO_DONE; break; default: if (iplen < iphlen) return IPPROTO_DONE; break; } return ip->ip_p; } static void hn_create_rx_data(struct hn_softc *sc, int ring_cnt) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; device_t dev = sc->hn_dev; #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 int lroent_cnt; #endif #endif int i; sc->hn_rx_ring_cnt = ring_cnt; sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, M_NETVSC, M_WAITOK | M_ZERO); #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 lroent_cnt = hn_lro_entry_count; if (lroent_cnt < TCP_LRO_ENTRIES) lroent_cnt = TCP_LRO_ENTRIES; device_printf(dev, "LRO: entry count %d\n", lroent_cnt); #endif #endif /* INET || INET6 */ ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); /* Create dev.hn.UNIT.rx sysctl tree */ sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; if (hn_trust_hosttcp) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; if (hn_trust_hostudp) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; if (hn_trust_hostip) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; rxr->hn_ifp = sc->hn_ifp; rxr->hn_rx_idx = i; /* * Initialize LRO. */ #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, hn_lro_mbufq_depth); #else tcp_lro_init(&rxr->hn_lro); rxr->hn_lro.ifp = sc->hn_ifp; #endif #if __FreeBSD_version >= 1100099 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; #endif #endif /* INET || INET6 */ if (sc->hn_rx_sysctl_tree != NULL) { char name[16]; /* * Create per RX ring sysctl tree: * dev.hn.UNIT.rx.RINGID */ snprintf(name, sizeof(name), "%d", i); rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (rxr->hn_rx_sysctl_tree != NULL) { SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), OID_AUTO, "packets", CTLFLAG_RW, &rxr->hn_pkts, "# of packets received"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), OID_AUTO, "rss_pkts", CTLFLAG_RW, &rxr->hn_rss_pkts, "# of packets w/ RSS info received"); } } } SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro.lro_queued), hn_rx_stat_u64_sysctl, "LU", "LRO queued"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), hn_rx_stat_u64_sysctl, "LU", "LRO flushed"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro_tried), hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); #if __FreeBSD_version >= 1100099 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_lro_lenlim_sysctl, "IU", "Max # of data bytes to be aggregated by LRO"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_lro_ackcnt_sysctl, "I", "Max # of ACKs to be aggregated by LRO"); #endif SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, hn_trust_hcsum_sysctl, "I", "Trust tcp segement verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, hn_trust_hcsum_sysctl, "I", "Trust udp datagram verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, hn_trust_hcsum_sysctl, "I", "Trust ip packet verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_ip), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_tcp), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_udp), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_trusted), hn_rx_stat_ulong_sysctl, "LU", "# of packets that we trust host's csum verification"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_small_pkts), hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); } static void hn_destroy_rx_data(struct hn_softc *sc) { #if defined(INET) || defined(INET6) int i; #endif if (sc->hn_rx_ring_cnt == 0) return; #if defined(INET) || defined(INET6) for (i = 0; i < sc->hn_rx_ring_cnt; ++i) tcp_lro_free(&sc->hn_rx_ring[i].hn_lro); #endif free(sc->hn_rx_ring, M_NETVSC); sc->hn_rx_ring = NULL; sc->hn_rx_ring_cnt = 0; sc->hn_rx_ring_inuse = 0; } static int hn_create_tx_ring(struct hn_softc *sc, int id) { struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; device_t dev = sc->hn_dev; bus_dma_tag_t parent_dtag; int error, i; uint32_t version; txr->hn_sc = sc; txr->hn_tx_idx = id; #ifndef HN_USE_TXDESC_BUFRING mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); #endif mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); txr->hn_txdesc_cnt = HN_TX_DESC_CNT; txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, M_NETVSC, M_WAITOK | M_ZERO); #ifndef HN_USE_TXDESC_BUFRING SLIST_INIT(&txr->hn_txlist); #else txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_NETVSC, M_WAITOK, &txr->hn_tx_lock); #endif txr->hn_tx_taskq = sc->hn_tx_taskq; if (hn_use_if_start) { txr->hn_txeof = hn_start_txeof; TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); } else { int br_depth; txr->hn_txeof = hn_xmit_txeof; TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); br_depth = hn_get_txswq_depth(txr); txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_NETVSC, M_WAITOK, &txr->hn_tx_lock); } txr->hn_direct_tx_size = hn_direct_tx_size; version = VMBUS_GET_VERSION(device_get_parent(dev), dev); if (version >= VMBUS_VERSION_WIN8_1) { txr->hn_csum_assist = HN_CSUM_ASSIST; } else { txr->hn_csum_assist = HN_CSUM_ASSIST_WIN8; if (id == 0) { device_printf(dev, "bus version %u.%u, " "no UDP checksum offloading\n", VMBUS_VERSION_MAJOR(version), VMBUS_VERSION_MINOR(version)); } } /* * Always schedule transmission instead of trying to do direct * transmission. This one gives the best performance so far. */ txr->hn_sched_tx = 1; parent_dtag = bus_get_dma_tag(dev); /* DMA tag for RNDIS messages. */ error = bus_dma_tag_create(parent_dtag, /* parent */ HN_RNDIS_MSG_ALIGN, /* alignment */ HN_RNDIS_MSG_BOUNDARY, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ HN_RNDIS_MSG_LEN, /* maxsize */ 1, /* nsegments */ HN_RNDIS_MSG_LEN, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->hn_tx_rndis_dtag); if (error) { device_printf(dev, "failed to create rndis dmatag\n"); return error; } /* DMA tag for data. */ error = bus_dma_tag_create(parent_dtag, /* parent */ 1, /* alignment */ HN_TX_DATA_BOUNDARY, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ HN_TX_DATA_MAXSIZE, /* maxsize */ HN_TX_DATA_SEGCNT_MAX, /* nsegments */ HN_TX_DATA_SEGSIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->hn_tx_data_dtag); if (error) { device_printf(dev, "failed to create data dmatag\n"); return error; } for (i = 0; i < txr->hn_txdesc_cnt; ++i) { struct hn_txdesc *txd = &txr->hn_txdesc[i]; txd->txr = txr; /* * Allocate and load RNDIS messages. */ error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, (void **)&txd->rndis_msg, BUS_DMA_WAITOK | BUS_DMA_COHERENT, &txd->rndis_msg_dmap); if (error) { device_printf(dev, "failed to allocate rndis_msg, %d\n", i); return error; } error = bus_dmamap_load(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap, txd->rndis_msg, HN_RNDIS_MSG_LEN, hyperv_dma_map_paddr, &txd->rndis_msg_paddr, BUS_DMA_NOWAIT); if (error) { device_printf(dev, "failed to load rndis_msg, %d\n", i); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg, txd->rndis_msg_dmap); return error; } /* DMA map for TX data. */ error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, &txd->data_dmap); if (error) { device_printf(dev, "failed to allocate tx data dmamap\n"); bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg, txd->rndis_msg_dmap); return error; } /* All set, put it to list */ txd->flags |= HN_TXD_FLAG_ONLIST; #ifndef HN_USE_TXDESC_BUFRING SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); #else buf_ring_enqueue(txr->hn_txdesc_br, txd); #endif } txr->hn_txdesc_avail = txr->hn_txdesc_cnt; if (sc->hn_tx_sysctl_tree != NULL) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; char name[16]; /* * Create per TX ring sysctl tree: * dev.hn.UNIT.tx.RINGID */ ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); snprintf(name, sizeof(name), "%d", id); txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (txr->hn_tx_sysctl_tree != NULL) { child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", CTLFLAG_RD, &txr->hn_txdesc_avail, 0, "# of available TX descs"); if (!hn_use_if_start) { SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", CTLFLAG_RD, &txr->hn_oactive, 0, "over active"); } SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", CTLFLAG_RW, &txr->hn_pkts, "# of packets transmitted"); } } return 0; } static void hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) { struct hn_tx_ring *txr = txd->txr; KASSERT(txd->m == NULL, ("still has mbuf installed")); KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg, txd->rndis_msg_dmap); bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); } static void hn_destroy_tx_ring(struct hn_tx_ring *txr) { struct hn_txdesc *txd; if (txr->hn_txdesc == NULL) return; #ifndef HN_USE_TXDESC_BUFRING while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) { SLIST_REMOVE_HEAD(&txr->hn_txlist, link); hn_txdesc_dmamap_destroy(txd); } #else mtx_lock(&txr->hn_tx_lock); while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL) hn_txdesc_dmamap_destroy(txd); mtx_unlock(&txr->hn_tx_lock); #endif if (txr->hn_tx_data_dtag != NULL) bus_dma_tag_destroy(txr->hn_tx_data_dtag); if (txr->hn_tx_rndis_dtag != NULL) bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); #ifdef HN_USE_TXDESC_BUFRING buf_ring_free(txr->hn_txdesc_br, M_NETVSC); #endif free(txr->hn_txdesc, M_NETVSC); txr->hn_txdesc = NULL; if (txr->hn_mbuf_br != NULL) buf_ring_free(txr->hn_mbuf_br, M_NETVSC); #ifndef HN_USE_TXDESC_BUFRING mtx_destroy(&txr->hn_txlist_spin); #endif mtx_destroy(&txr->hn_tx_lock); } static int hn_create_tx_data(struct hn_softc *sc, int ring_cnt) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; int i; sc->hn_tx_ring_cnt = ring_cnt; sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, M_NETVSC, M_WAITOK | M_ZERO); ctx = device_get_sysctl_ctx(sc->hn_dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); /* Create dev.hn.UNIT.tx sysctl tree */ sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { int error; error = hn_create_tx_ring(sc, i); if (error) return error; } SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_no_txdescs), hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_send_failed), hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_txdma_failed), hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_collapsed), hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_chimney), hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, "# of total TX descs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", CTLFLAG_RD, &sc->hn_tx_chimney_max, 0, "Chimney send packet size upper boundary"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_tx_chimney_size_sysctl, "I", "Chimney send packet size limit"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_direct_tx_size), hn_tx_conf_int_sysctl, "I", "Size of the packet for direct transmission"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_sched_tx), hn_tx_conf_int_sysctl, "I", "Always schedule transmission " "instead of doing direct transmission"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); return 0; } static void hn_set_tx_chimney_size(struct hn_softc *sc, int chimney_size) { int i; NV_LOCK(sc); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_tx_chimney_size = chimney_size; NV_UNLOCK(sc); } static void hn_destroy_tx_data(struct hn_softc *sc) { int i; if (sc->hn_tx_ring_cnt == 0) return; for (i = 0; i < sc->hn_tx_ring_cnt; ++i) hn_destroy_tx_ring(&sc->hn_tx_ring[i]); free(sc->hn_tx_ring, M_NETVSC); sc->hn_tx_ring = NULL; sc->hn_tx_ring_cnt = 0; sc->hn_tx_ring_inuse = 0; } static void hn_start_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); hn_start_locked(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_start_txeof_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); hn_start_locked(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_stop_tx_tasks(struct hn_softc *sc) { int i; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); } } static int hn_xmit(struct hn_tx_ring *txr, int len) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; struct mbuf *m_head; mtx_assert(&txr->hn_tx_lock, MA_OWNED); KASSERT(hn_use_if_start == 0, ("hn_xmit is called, when if_start is enabled")); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) return 0; while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { struct hn_txdesc *txd; int error; if (len > 0 && m_head->m_pkthdr.len > len) { /* * This sending could be time consuming; let callers * dispatch this packet sending (and sending of any * following up packets) to tx taskqueue. */ drbr_putback(ifp, txr->hn_mbuf_br, m_head); return 1; } txd = hn_txdesc_get(txr); if (txd == NULL) { txr->hn_no_txdescs++; drbr_putback(ifp, txr->hn_mbuf_br, m_head); txr->hn_oactive = 1; break; } error = hn_encap(txr, txd, &m_head); if (error) { /* Both txd and m_head are freed; discard */ drbr_advance(ifp, txr->hn_mbuf_br); continue; } error = hn_send_pkt(ifp, txr, txd); if (__predict_false(error)) { /* txd is freed, but m_head is not */ drbr_putback(ifp, txr->hn_mbuf_br, m_head); txr->hn_oactive = 1; break; } /* Sent */ drbr_advance(ifp, txr->hn_mbuf_br); } return 0; } static int hn_transmit(struct ifnet *ifp, struct mbuf *m) { struct hn_softc *sc = ifp->if_softc; struct hn_tx_ring *txr; int error, idx = 0; /* * Select the TX ring based on flowid */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; txr = &sc->hn_tx_ring[idx]; error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); if (error) { if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); return error; } if (txr->hn_oactive) return 0; if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; sched = hn_xmit(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (!sched) return 0; } do_sched: taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); return 0; } static void hn_xmit_qflush(struct ifnet *ifp) { struct hn_softc *sc = ifp->if_softc; int i; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; struct mbuf *m; mtx_lock(&txr->hn_tx_lock); while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) m_freem(m); mtx_unlock(&txr->hn_tx_lock); } if_qflush(ifp); } static void hn_xmit_txeof(struct hn_tx_ring *txr) { if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; txr->hn_oactive = 0; sched = hn_xmit(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (sched) { taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } } else { do_sched: /* * Release the oactive earlier, with the hope, that * others could catch up. The task will clear the * oactive again with the hn_tx_lock to avoid possible * races. */ txr->hn_oactive = 0; taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } static void hn_xmit_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); hn_xmit(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); txr->hn_oactive = 0; hn_xmit(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_channel_attach(struct hn_softc *sc, struct hv_vmbus_channel *chan) { struct hn_rx_ring *rxr; int idx; idx = chan->ch_subidx; KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, ("invalid channel index %d, should > 0 && < %d", idx, sc->hn_rx_ring_inuse)); rxr = &sc->hn_rx_ring[idx]; KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, ("RX ring %d already attached", idx)); rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; chan->hv_chan_rxr = rxr; if (bootverbose) { if_printf(sc->hn_ifp, "link RX ring %d to channel%u\n", idx, chan->ch_id); } if (idx < sc->hn_tx_ring_inuse) { struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, ("TX ring %d already attached", idx)); txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; chan->hv_chan_txr = txr; txr->hn_chan = chan; if (bootverbose) { if_printf(sc->hn_ifp, "link TX ring %d to channel%u\n", idx, chan->ch_id); } } /* Bind channel to a proper CPU */ vmbus_channel_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus); } static void hn_subchan_attach(struct hn_softc *sc, struct hv_vmbus_channel *chan) { KASSERT(!HV_VMBUS_CHAN_ISPRIMARY(chan), ("subchannel callback on primary channel")); KASSERT(chan->ch_subidx > 0, ("invalid channel subidx %u", chan->ch_subidx)); hn_channel_attach(sc, chan); } static void hn_subchan_setup(struct hn_softc *sc) { struct hv_vmbus_channel **subchan; int subchan_cnt = sc->net_dev->num_channel - 1; int i; /* Wait for sub-channels setup to complete. */ subchan = vmbus_get_subchan(sc->hn_prichan, subchan_cnt); /* Attach the sub-channels. */ for (i = 0; i < subchan_cnt; ++i) { /* NOTE: Calling order is critical. */ hn_subchan_attach(sc, subchan[i]); hv_nv_subchan_attach(subchan[i]); } /* Release the sub-channels */ vmbus_rel_subchan(subchan, subchan_cnt); if_printf(sc->hn_ifp, "%d sub-channels setup done\n", subchan_cnt); } static void hn_tx_taskq_create(void *arg __unused) { if (!hn_share_tx_taskq) return; hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, taskqueue_thread_enqueue, &hn_tx_taskq); if (hn_bind_tx_taskq >= 0) { int cpu = hn_bind_tx_taskq; cpuset_t cpu_set; if (cpu > mp_ncpus - 1) cpu = mp_ncpus - 1; CPU_SETOF(cpu, &cpu_set); taskqueue_start_threads_cpuset(&hn_tx_taskq, 1, PI_NET, &cpu_set, "hn tx"); } else { taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx"); } } SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_FIRST, hn_tx_taskq_create, NULL); static void hn_tx_taskq_destroy(void *arg __unused) { if (hn_tx_taskq != NULL) taskqueue_free(hn_tx_taskq); } SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_FIRST, hn_tx_taskq_destroy, NULL); static device_method_t netvsc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, netvsc_probe), DEVMETHOD(device_attach, netvsc_attach), DEVMETHOD(device_detach, netvsc_detach), DEVMETHOD(device_shutdown, netvsc_shutdown), { 0, 0 } }; static driver_t netvsc_driver = { NETVSC_DEVNAME, netvsc_methods, sizeof(hn_softc_t) }; static devclass_t netvsc_devclass; DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0); MODULE_VERSION(hn, 1); MODULE_DEPEND(hn, vmbus, 1, 1, 1); Index: head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c =================================================================== --- head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c (revision 302801) +++ head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c (revision 302802) @@ -1,2081 +1,2081 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * StorVSC driver for Hyper-V. This driver presents a SCSI HBA interface * to the Comman Access Method (CAM) layer. CAM control blocks (CCBs) are * converted into VSCSI protocol messages which are delivered to the parent * partition StorVSP driver over the Hyper-V VMBUS. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hv_vstorage.h" #include "vmbus_if.h" #define STORVSC_RINGBUFFER_SIZE (20*PAGE_SIZE) #define STORVSC_MAX_LUNS_PER_TARGET (64) #define STORVSC_MAX_IO_REQUESTS (STORVSC_MAX_LUNS_PER_TARGET * 2) #define BLKVSC_MAX_IDE_DISKS_PER_TARGET (1) #define BLKVSC_MAX_IO_REQUESTS STORVSC_MAX_IO_REQUESTS #define STORVSC_MAX_TARGETS (2) #define VSTOR_PKT_SIZE (sizeof(struct vstor_packet) - vmscsi_size_delta) #define HV_ALIGN(x, a) roundup2(x, a) struct storvsc_softc; struct hv_sgl_node { LIST_ENTRY(hv_sgl_node) link; struct sglist *sgl_data; }; struct hv_sgl_page_pool{ LIST_HEAD(, hv_sgl_node) in_use_sgl_list; LIST_HEAD(, hv_sgl_node) free_sgl_list; boolean_t is_init; } g_hv_sgl_page_pool; #define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * HV_MAX_MULTIPAGE_BUFFER_COUNT enum storvsc_request_type { WRITE_TYPE, READ_TYPE, UNKNOWN_TYPE }; struct hv_storvsc_request { LIST_ENTRY(hv_storvsc_request) link; struct vstor_packet vstor_packet; hv_vmbus_multipage_buffer data_buf; void *sense_data; uint8_t sense_info_len; uint8_t retries; union ccb *ccb; struct storvsc_softc *softc; struct callout callout; struct sema synch_sema; /*Synchronize the request/response if needed */ struct sglist *bounce_sgl; unsigned int bounce_sgl_count; uint64_t not_aligned_seg_bits; }; struct storvsc_softc { struct hv_vmbus_channel *hs_chan; LIST_HEAD(, hv_storvsc_request) hs_free_list; struct mtx hs_lock; struct storvsc_driver_props *hs_drv_props; int hs_unit; uint32_t hs_frozen; struct cam_sim *hs_sim; struct cam_path *hs_path; uint32_t hs_num_out_reqs; boolean_t hs_destroy; boolean_t hs_drain_notify; struct sema hs_drain_sema; struct hv_storvsc_request hs_init_req; struct hv_storvsc_request hs_reset_req; device_t hs_dev; }; /** * HyperV storvsc timeout testing cases: * a. IO returned after first timeout; * b. IO returned after second timeout and queue freeze; * c. IO returned while timer handler is running * The first can be tested by "sg_senddiag -vv /dev/daX", * and the second and third can be done by * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX". */ #define HVS_TIMEOUT_TEST 0 /* * Bus/adapter reset functionality on the Hyper-V host is * buggy and it will be disabled until * it can be further tested. */ #define HVS_HOST_RESET 0 struct storvsc_driver_props { char *drv_name; char *drv_desc; uint8_t drv_max_luns_per_target; uint8_t drv_max_ios_per_target; uint32_t drv_ringbuffer_size; }; enum hv_storage_type { DRIVER_BLKVSC, DRIVER_STORVSC, DRIVER_UNKNOWN }; #define HS_MAX_ADAPTERS 10 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */ -static const hv_guid gStorVscDeviceType={ - .data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, +static const struct hyperv_guid gStorVscDeviceType={ + .hv_guid = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f} }; /* {32412632-86cb-44a2-9b5c-50d1417354f5} */ -static const hv_guid gBlkVscDeviceType={ - .data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, +static const struct hyperv_guid gBlkVscDeviceType={ + .hv_guid = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5} }; static struct storvsc_driver_props g_drv_props_table[] = { {"blkvsc", "Hyper-V IDE Storage Interface", BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS, STORVSC_RINGBUFFER_SIZE}, {"storvsc", "Hyper-V SCSI Storage Interface", STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS, STORVSC_RINGBUFFER_SIZE} }; /* * Sense buffer size changed in win8; have a run-time * variable to track the size we should use. */ static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE; /* * The size of the vmscsi_request has changed in win8. The * additional size is for the newly added elements in the * structure. These elements are valid only when we are talking * to a win8 host. * Track the correct size we need to apply. */ static int vmscsi_size_delta; /* * The storage protocol version is determined during the * initial exchange with the host. It will indicate which * storage functionality is available in the host. */ static int vmstor_proto_version; struct vmstor_proto { int proto_version; int sense_buffer_size; int vmscsi_size_delta; }; static const struct vmstor_proto vmstor_proto_list[] = { { VMSTOR_PROTOCOL_VERSION_WIN10, POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, 0 }, { VMSTOR_PROTOCOL_VERSION_WIN8_1, POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, 0 }, { VMSTOR_PROTOCOL_VERSION_WIN8, POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, 0 }, { VMSTOR_PROTOCOL_VERSION_WIN7, PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE, sizeof(struct vmscsi_win8_extension), }, { VMSTOR_PROTOCOL_VERSION_WIN6, PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE, sizeof(struct vmscsi_win8_extension), } }; /* static functions */ static int storvsc_probe(device_t dev); static int storvsc_attach(device_t dev); static int storvsc_detach(device_t dev); static void storvsc_poll(struct cam_sim * sim); static void storvsc_action(struct cam_sim * sim, union ccb * ccb); static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp); static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp); static enum hv_storage_type storvsc_get_storage_type(device_t dev); static void hv_storvsc_rescan_target(struct storvsc_softc *sc); static void hv_storvsc_on_channel_callback(void *xchan); static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc, struct vstor_packet *vstor_packet, struct hv_storvsc_request *request); static int hv_storvsc_connect_vsp(struct storvsc_softc *); static void storvsc_io_done(struct hv_storvsc_request *reqp); static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl, bus_dma_segment_t *orig_sgl, unsigned int orig_sgl_count, uint64_t seg_bits); void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl, unsigned int dest_sgl_count, struct sglist* src_sgl, uint64_t seg_bits); static device_method_t storvsc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, storvsc_probe), DEVMETHOD(device_attach, storvsc_attach), DEVMETHOD(device_detach, storvsc_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD_END }; static driver_t storvsc_driver = { "storvsc", storvsc_methods, sizeof(struct storvsc_softc), }; static devclass_t storvsc_devclass; DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0); MODULE_VERSION(storvsc, 1); MODULE_DEPEND(storvsc, vmbus, 1, 1, 1); static void storvsc_subchan_attach(struct storvsc_softc *sc, struct hv_vmbus_channel *new_channel) { struct vmstor_chan_props props; int ret = 0; memset(&props, 0, sizeof(props)); new_channel->hv_chan_priv1 = sc; vmbus_channel_cpu_rr(new_channel); ret = hv_vmbus_channel_open(new_channel, sc->hs_drv_props->drv_ringbuffer_size, sc->hs_drv_props->drv_ringbuffer_size, (void *)&props, sizeof(struct vmstor_chan_props), hv_storvsc_on_channel_callback, new_channel); } /** * @brief Send multi-channel creation request to host * * @param device a Hyper-V device pointer * @param max_chans the max channels supported by vmbus */ static void storvsc_send_multichannel_request(struct storvsc_softc *sc, int max_chans) { struct hv_vmbus_channel **subchan; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; int request_channels_cnt = 0; int ret, i; /* get multichannels count that need to create */ request_channels_cnt = MIN(max_chans, mp_ncpus); request = &sc->hs_init_req; /* request the host to create multi-channel */ memset(request, 0, sizeof(struct hv_storvsc_request)); sema_init(&request->synch_sema, 0, ("stor_synch_sema")); vstor_packet = &request->vstor_packet; vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS; vstor_packet->flags = REQUEST_COMPLETION_FLAG; vstor_packet->u.multi_channels_cnt = request_channels_cnt; ret = hv_vmbus_channel_send_packet( sc->hs_chan, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); /* wait for 5 seconds */ ret = sema_timedwait(&request->synch_sema, 5 * hz); if (ret != 0) { printf("Storvsc_error: create multi-channel timeout, %d\n", ret); return; } if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) { printf("Storvsc_error: create multi-channel invalid operation " "(%d) or statue (%u)\n", vstor_packet->operation, vstor_packet->status); return; } /* Wait for sub-channels setup to complete. */ subchan = vmbus_get_subchan(sc->hs_chan, request_channels_cnt); /* Attach the sub-channels. */ for (i = 0; i < request_channels_cnt; ++i) storvsc_subchan_attach(sc, subchan[i]); /* Release the sub-channels. */ vmbus_rel_subchan(subchan, request_channels_cnt); if (bootverbose) printf("Storvsc create multi-channel success!\n"); } /** * @brief initialize channel connection to parent partition * * @param dev a Hyper-V device pointer * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_channel_init(struct storvsc_softc *sc) { int ret = 0, i; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; uint16_t max_chans = 0; boolean_t support_multichannel = FALSE; uint32_t version; max_chans = 0; support_multichannel = FALSE; request = &sc->hs_init_req; memset(request, 0, sizeof(struct hv_storvsc_request)); vstor_packet = &request->vstor_packet; request->softc = sc; /** * Initiate the vsc/vsp initialization protocol on the open channel */ sema_init(&request->synch_sema, 0, ("stor_synch_sema")); vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = hv_vmbus_channel_send_packet( sc->hs_chan, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) goto cleanup; /* wait 5 seconds */ ret = sema_timedwait(&request->synch_sema, 5 * hz); if (ret != 0) goto cleanup; if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) { goto cleanup; } for (i = 0; i < nitems(vmstor_proto_list); i++) { /* reuse the packet for version range supported */ memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; vstor_packet->u.version.major_minor = vmstor_proto_list[i].proto_version; /* revision is only significant for Windows guests */ vstor_packet->u.version.revision = 0; ret = hv_vmbus_channel_send_packet( sc->hs_chan, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) goto cleanup; /* wait 5 seconds */ ret = sema_timedwait(&request->synch_sema, 5 * hz); if (ret) goto cleanup; if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) { ret = EINVAL; goto cleanup; } if (vstor_packet->status == 0) { vmstor_proto_version = vmstor_proto_list[i].proto_version; sense_buffer_size = vmstor_proto_list[i].sense_buffer_size; vmscsi_size_delta = vmstor_proto_list[i].vmscsi_size_delta; break; } } if (vstor_packet->status != 0) { ret = EINVAL; goto cleanup; } /** * Query channel properties */ memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = hv_vmbus_channel_send_packet( sc->hs_chan, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if ( ret != 0) goto cleanup; /* wait 5 seconds */ ret = sema_timedwait(&request->synch_sema, 5 * hz); if (ret != 0) goto cleanup; /* TODO: Check returned version */ if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) { goto cleanup; } /* multi-channels feature is supported by WIN8 and above version */ max_chans = vstor_packet->u.chan_props.max_channel_cnt; version = VMBUS_GET_VERSION(device_get_parent(sc->hs_dev), sc->hs_dev); if (version != VMBUS_VERSION_WIN7 && version != VMBUS_VERSION_WS2008 && (vstor_packet->u.chan_props.flags & HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) { support_multichannel = TRUE; } memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = hv_vmbus_channel_send_packet( sc->hs_chan, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) { goto cleanup; } /* wait 5 seconds */ ret = sema_timedwait(&request->synch_sema, 5 * hz); if (ret != 0) goto cleanup; if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) goto cleanup; /* * If multi-channel is supported, send multichannel create * request to host. */ if (support_multichannel) storvsc_send_multichannel_request(sc, max_chans); cleanup: sema_destroy(&request->synch_sema); return (ret); } /** * @brief Open channel connection to paraent partition StorVSP driver * * Open and initialize channel connection to parent partition StorVSP driver. * * @param pointer to a Hyper-V device * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_connect_vsp(struct storvsc_softc *sc) { int ret = 0; struct vmstor_chan_props props; memset(&props, 0, sizeof(struct vmstor_chan_props)); /* * Open the channel */ KASSERT(sc->hs_chan->hv_chan_priv1 == sc, ("invalid chan priv1")); vmbus_channel_cpu_rr(sc->hs_chan); ret = hv_vmbus_channel_open( sc->hs_chan, sc->hs_drv_props->drv_ringbuffer_size, sc->hs_drv_props->drv_ringbuffer_size, (void *)&props, sizeof(struct vmstor_chan_props), hv_storvsc_on_channel_callback, sc->hs_chan); if (ret != 0) { return ret; } ret = hv_storvsc_channel_init(sc); return (ret); } #if HVS_HOST_RESET static int hv_storvsc_host_reset(struct storvsc_softc *sc) { int ret = 0; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; request = &sc->hs_reset_req; request->softc = sc; vstor_packet = &request->vstor_packet; sema_init(&request->synch_sema, 0, "stor synch sema"); vstor_packet->operation = VSTOR_OPERATION_RESETBUS; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = hv_vmbus_channel_send_packet(dev->channel, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)&sc->hs_reset_req, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret != 0) { goto cleanup; } ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */ if (ret) { goto cleanup; } /* * At this point, all outstanding requests in the adapter * should have been flushed out and return to us */ cleanup: sema_destroy(&request->synch_sema); return (ret); } #endif /* HVS_HOST_RESET */ /** * @brief Function to initiate an I/O request * * @param device Hyper-V device pointer * @param request pointer to a request structure * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_io_request(struct storvsc_softc *sc, struct hv_storvsc_request *request) { struct vstor_packet *vstor_packet = &request->vstor_packet; struct hv_vmbus_channel* outgoing_channel = NULL; int ret = 0; vstor_packet->flags |= REQUEST_COMPLETION_FLAG; vstor_packet->u.vm_srb.length = VSTOR_PKT_SIZE; vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size; vstor_packet->u.vm_srb.transfer_len = request->data_buf.length; vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB; outgoing_channel = vmbus_select_outgoing_channel(sc->hs_chan); mtx_unlock(&request->softc->hs_lock); if (request->data_buf.length) { ret = hv_vmbus_channel_send_packet_multipagebuffer( outgoing_channel, &request->data_buf, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); } else { ret = hv_vmbus_channel_send_packet( outgoing_channel, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); } mtx_lock(&request->softc->hs_lock); if (ret != 0) { printf("Unable to send packet %p ret %d", vstor_packet, ret); } else { atomic_add_int(&sc->hs_num_out_reqs, 1); } return (ret); } /** * Process IO_COMPLETION_OPERATION and ready * the result to be completed for upper layer * processing by the CAM layer. */ static void hv_storvsc_on_iocompletion(struct storvsc_softc *sc, struct vstor_packet *vstor_packet, struct hv_storvsc_request *request) { struct vmscsi_req *vm_srb; vm_srb = &vstor_packet->u.vm_srb; /* * Copy some fields of the host's response into the request structure, * because the fields will be used later in storvsc_io_done(). */ request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status; request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len; if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) && (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) { /* Autosense data available */ KASSERT(vm_srb->sense_info_len <= request->sense_info_len, ("vm_srb->sense_info_len <= " "request->sense_info_len")); memcpy(request->sense_data, vm_srb->u.sense_data, vm_srb->sense_info_len); request->sense_info_len = vm_srb->sense_info_len; } /* Complete request by passing to the CAM layer */ storvsc_io_done(request); atomic_subtract_int(&sc->hs_num_out_reqs, 1); if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) { sema_post(&sc->hs_drain_sema); } } static void hv_storvsc_rescan_target(struct storvsc_softc *sc) { path_id_t pathid; target_id_t targetid; union ccb *ccb; pathid = cam_sim_path(sc->hs_sim); targetid = CAM_TARGET_WILDCARD; /* * Allocate a CCB and schedule a rescan. */ ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { printf("unable to alloc CCB for rescan\n"); return; } if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { printf("unable to create path for rescan, pathid: %u," "targetid: %u\n", pathid, targetid); xpt_free_ccb(ccb); return; } if (targetid == CAM_TARGET_WILDCARD) ccb->ccb_h.func_code = XPT_SCAN_BUS; else ccb->ccb_h.func_code = XPT_SCAN_TGT; xpt_rescan(ccb); } static void hv_storvsc_on_channel_callback(void *xchan) { int ret = 0; hv_vmbus_channel *channel = xchan; struct storvsc_softc *sc = channel->hv_chan_priv1; uint32_t bytes_recvd; uint64_t request_id; uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)]; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; ret = hv_vmbus_channel_recv_packet( channel, packet, roundup2(VSTOR_PKT_SIZE, 8), &bytes_recvd, &request_id); while ((ret == 0) && (bytes_recvd > 0)) { request = (struct hv_storvsc_request *)(uintptr_t)request_id; if ((request == &sc->hs_init_req) || (request == &sc->hs_reset_req)) { memcpy(&request->vstor_packet, packet, sizeof(struct vstor_packet)); sema_post(&request->synch_sema); } else { vstor_packet = (struct vstor_packet *)packet; switch(vstor_packet->operation) { case VSTOR_OPERATION_COMPLETEIO: if (request == NULL) panic("VMBUS: storvsc received a " "packet with NULL request id in " "COMPLETEIO operation."); hv_storvsc_on_iocompletion(sc, vstor_packet, request); break; case VSTOR_OPERATION_REMOVEDEVICE: printf("VMBUS: storvsc operation %d not " "implemented.\n", vstor_packet->operation); /* TODO: implement */ break; case VSTOR_OPERATION_ENUMERATE_BUS: hv_storvsc_rescan_target(sc); break; default: break; } } ret = hv_vmbus_channel_recv_packet( channel, packet, roundup2(VSTOR_PKT_SIZE, 8), &bytes_recvd, &request_id); } } /** * @brief StorVSC probe function * * Device probe function. Returns 0 if the input device is a StorVSC * device. Otherwise, a ENXIO is returned. If the input device is * for BlkVSC (paravirtual IDE) device and this support is disabled in * favor of the emulated ATA/IDE device, return ENXIO. * * @param a device * @returns 0 on success, ENXIO if not a matcing StorVSC device */ static int storvsc_probe(device_t dev) { int ata_disk_enable = 0; int ret = ENXIO; switch (storvsc_get_storage_type(dev)) { case DRIVER_BLKVSC: if(bootverbose) device_printf(dev, "DRIVER_BLKVSC-Emulated ATA/IDE probe\n"); if (!getenv_int("hw.ata.disk_enable", &ata_disk_enable)) { if(bootverbose) device_printf(dev, "Enlightened ATA/IDE detected\n"); device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc); ret = BUS_PROBE_DEFAULT; } else if(bootverbose) device_printf(dev, "Emulated ATA/IDE set (hw.ata.disk_enable set)\n"); break; case DRIVER_STORVSC: if(bootverbose) device_printf(dev, "Enlightened SCSI device detected\n"); device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc); ret = BUS_PROBE_DEFAULT; break; default: ret = ENXIO; } return (ret); } /** * @brief StorVSC attach function * * Function responsible for allocating per-device structures, * setting up CAM interfaces and scanning for available LUNs to * be used for SCSI device peripherals. * * @param a device * @returns 0 on success or an error on failure */ static int storvsc_attach(device_t dev) { enum hv_storage_type stor_type; struct storvsc_softc *sc; struct cam_devq *devq; int ret, i, j; struct hv_storvsc_request *reqp; struct root_hold_token *root_mount_token = NULL; struct hv_sgl_node *sgl_node = NULL; void *tmp_buff = NULL; /* * We need to serialize storvsc attach calls. */ root_mount_token = root_mount_hold("storvsc"); sc = device_get_softc(dev); sc->hs_chan = vmbus_get_channel(dev); sc->hs_chan->hv_chan_priv1 = sc; stor_type = storvsc_get_storage_type(dev); if (stor_type == DRIVER_UNKNOWN) { ret = ENODEV; goto cleanup; } /* fill in driver specific properties */ sc->hs_drv_props = &g_drv_props_table[stor_type]; /* fill in device specific properties */ sc->hs_unit = device_get_unit(dev); sc->hs_dev = dev; LIST_INIT(&sc->hs_free_list); mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF); for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) { reqp = malloc(sizeof(struct hv_storvsc_request), M_DEVBUF, M_WAITOK|M_ZERO); reqp->softc = sc; LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link); } /* create sg-list page pool */ if (FALSE == g_hv_sgl_page_pool.is_init) { g_hv_sgl_page_pool.is_init = TRUE; LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list); LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list); /* * Pre-create SG list, each SG list with * HV_MAX_MULTIPAGE_BUFFER_COUNT segments, each * segment has one page buffer */ for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) { sgl_node = malloc(sizeof(struct hv_sgl_node), M_DEVBUF, M_WAITOK|M_ZERO); sgl_node->sgl_data = sglist_alloc(HV_MAX_MULTIPAGE_BUFFER_COUNT, M_WAITOK|M_ZERO); for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) { tmp_buff = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK|M_ZERO); sgl_node->sgl_data->sg_segs[j].ss_paddr = (vm_paddr_t)tmp_buff; } LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link); } } sc->hs_destroy = FALSE; sc->hs_drain_notify = FALSE; sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema"); ret = hv_storvsc_connect_vsp(sc); if (ret != 0) { goto cleanup; } /* * Create the device queue. * Hyper-V maps each target to one SCSI HBA */ devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target); if (devq == NULL) { device_printf(dev, "Failed to alloc device queue\n"); ret = ENOMEM; goto cleanup; } sc->hs_sim = cam_sim_alloc(storvsc_action, storvsc_poll, sc->hs_drv_props->drv_name, sc, sc->hs_unit, &sc->hs_lock, 1, sc->hs_drv_props->drv_max_ios_per_target, devq); if (sc->hs_sim == NULL) { device_printf(dev, "Failed to alloc sim\n"); cam_simq_free(devq); ret = ENOMEM; goto cleanup; } mtx_lock(&sc->hs_lock); /* bus_id is set to 0, need to get it from VMBUS channel query? */ if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) { cam_sim_free(sc->hs_sim, /*free_devq*/TRUE); mtx_unlock(&sc->hs_lock); device_printf(dev, "Unable to register SCSI bus\n"); ret = ENXIO; goto cleanup; } if (xpt_create_path(&sc->hs_path, /*periph*/NULL, cam_sim_path(sc->hs_sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_bus_deregister(cam_sim_path(sc->hs_sim)); cam_sim_free(sc->hs_sim, /*free_devq*/TRUE); mtx_unlock(&sc->hs_lock); device_printf(dev, "Unable to create path\n"); ret = ENXIO; goto cleanup; } mtx_unlock(&sc->hs_lock); root_mount_rel(root_mount_token); return (0); cleanup: root_mount_rel(root_mount_token); while (!LIST_EMPTY(&sc->hs_free_list)) { reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); free(reqp, M_DEVBUF); } while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); LIST_REMOVE(sgl_node, link); for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) { if (NULL != (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) { free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF); } } sglist_free(sgl_node->sgl_data); free(sgl_node, M_DEVBUF); } return (ret); } /** * @brief StorVSC device detach function * * This function is responsible for safely detaching a * StorVSC device. This includes waiting for inbound responses * to complete and freeing associated per-device structures. * * @param dev a device * returns 0 on success */ static int storvsc_detach(device_t dev) { struct storvsc_softc *sc = device_get_softc(dev); struct hv_storvsc_request *reqp = NULL; struct hv_sgl_node *sgl_node = NULL; int j = 0; sc->hs_destroy = TRUE; /* * At this point, all outbound traffic should be disabled. We * only allow inbound traffic (responses) to proceed so that * outstanding requests can be completed. */ sc->hs_drain_notify = TRUE; sema_wait(&sc->hs_drain_sema); sc->hs_drain_notify = FALSE; /* * Since we have already drained, we don't need to busy wait. * The call to close the channel will reset the callback * under the protection of the incoming channel lock. */ hv_vmbus_channel_close(sc->hs_chan); mtx_lock(&sc->hs_lock); while (!LIST_EMPTY(&sc->hs_free_list)) { reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); free(reqp, M_DEVBUF); } mtx_unlock(&sc->hs_lock); while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); LIST_REMOVE(sgl_node, link); for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++){ if (NULL != (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) { free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF); } } sglist_free(sgl_node->sgl_data); free(sgl_node, M_DEVBUF); } return (0); } #if HVS_TIMEOUT_TEST /** * @brief unit test for timed out operations * * This function provides unit testing capability to simulate * timed out operations. Recompilation with HV_TIMEOUT_TEST=1 * is required. * * @param reqp pointer to a request structure * @param opcode SCSI operation being performed * @param wait if 1, wait for I/O to complete */ static void storvsc_timeout_test(struct hv_storvsc_request *reqp, uint8_t opcode, int wait) { int ret; union ccb *ccb = reqp->ccb; struct storvsc_softc *sc = reqp->softc; if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) { return; } if (wait) { mtx_lock(&reqp->event.mtx); } ret = hv_storvsc_io_request(sc, reqp); if (ret != 0) { if (wait) { mtx_unlock(&reqp->event.mtx); } printf("%s: io_request failed with %d.\n", __func__, ret); ccb->ccb_h.status = CAM_PROVIDE_FAIL; mtx_lock(&sc->hs_lock); storvsc_free_request(sc, reqp); xpt_done(ccb); mtx_unlock(&sc->hs_lock); return; } if (wait) { xpt_print(ccb->ccb_h.path, "%u: %s: waiting for IO return.\n", ticks, __func__); ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz); mtx_unlock(&reqp->event.mtx); xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n", ticks, __func__, (ret == 0)? "IO return detected" : "IO return not detected"); /* * Now both the timer handler and io done are running * simultaneously. We want to confirm the io done always * finishes after the timer handler exits. So reqp used by * timer handler is not freed or stale. Do busy loop for * another 1/10 second to make sure io done does * wait for the timer handler to complete. */ DELAY(100*1000); mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: %s: finishing, queue frozen %d, " "ccb status 0x%x scsi_status 0x%x.\n", ticks, __func__, sc->hs_frozen, ccb->ccb_h.status, ccb->csio.scsi_status); mtx_unlock(&sc->hs_lock); } } #endif /* HVS_TIMEOUT_TEST */ #ifdef notyet /** * @brief timeout handler for requests * * This function is called as a result of a callout expiring. * * @param arg pointer to a request */ static void storvsc_timeout(void *arg) { struct hv_storvsc_request *reqp = arg; struct storvsc_softc *sc = reqp->softc; union ccb *ccb = reqp->ccb; if (reqp->retries == 0) { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: IO timed out (req=0x%p), wait for another %u secs.\n", ticks, reqp, ccb->ccb_h.timeout / 1000); cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL); mtx_unlock(&sc->hs_lock); reqp->retries++; callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout, 0, storvsc_timeout, reqp, 0); #if HVS_TIMEOUT_TEST storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0); #endif return; } mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n", ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000, (sc->hs_frozen == 0)? "freezing the queue" : "the queue is already frozen"); if (sc->hs_frozen == 0) { sc->hs_frozen = 1; xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1); } mtx_unlock(&sc->hs_lock); #if HVS_TIMEOUT_TEST storvsc_timeout_test(reqp, MODE_SELECT_10, 1); #endif } #endif /** * @brief StorVSC device poll function * * This function is responsible for servicing requests when * interrupts are disabled (i.e when we are dumping core.) * * @param sim a pointer to a CAM SCSI interface module */ static void storvsc_poll(struct cam_sim *sim) { struct storvsc_softc *sc = cam_sim_softc(sim); mtx_assert(&sc->hs_lock, MA_OWNED); mtx_unlock(&sc->hs_lock); hv_storvsc_on_channel_callback(sc->hs_chan); mtx_lock(&sc->hs_lock); } /** * @brief StorVSC device action function * * This function is responsible for handling SCSI operations which * are passed from the CAM layer. The requests are in the form of * CAM control blocks which indicate the action being performed. * Not all actions require converting the request to a VSCSI protocol * message - these actions can be responded to by this driver. * Requests which are destined for a backend storage device are converted * to a VSCSI protocol message and sent on the channel connection associated * with this device. * * @param sim pointer to a CAM SCSI interface module * @param ccb pointer to a CAM control block */ static void storvsc_action(struct cam_sim *sim, union ccb *ccb) { struct storvsc_softc *sc = cam_sim_softc(sim); int res; mtx_assert(&sc->hs_lock, MA_OWNED); switch (ccb->ccb_h.func_code) { case XPT_PATH_INQ: { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE; cpi->target_sprt = 0; cpi->hba_misc = PIM_NOBUSRESET; cpi->hba_eng_cnt = 0; cpi->max_target = STORVSC_MAX_TARGETS; cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target; cpi->initiator_id = cpi->max_target; cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 300000; cpi->transport = XPORT_SAS; cpi->transport_version = 0; cpi->protocol = PROTO_SCSI; cpi->protocol_version = SCSI_REV_SPC2; strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strncpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN); strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_GET_TRAN_SETTINGS: { struct ccb_trans_settings *cts = &ccb->cts; cts->transport = XPORT_SAS; cts->transport_version = 0; cts->protocol = PROTO_SCSI; cts->protocol_version = SCSI_REV_SPC2; /* enable tag queuing and disconnected mode */ cts->proto_specific.valid = CTS_SCSI_VALID_TQ; cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ; cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB; cts->xport_specific.valid = CTS_SPI_VALID_DISC; cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB; ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_SET_TRAN_SETTINGS: { ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_CALC_GEOMETRY:{ cam_calc_geometry(&ccb->ccg, 1); xpt_done(ccb); return; } case XPT_RESET_BUS: case XPT_RESET_DEV:{ #if HVS_HOST_RESET if ((res = hv_storvsc_host_reset(sc)) != 0) { xpt_print(ccb->ccb_h.path, "hv_storvsc_host_reset failed with %d\n", res); ccb->ccb_h.status = CAM_PROVIDE_FAIL; xpt_done(ccb); return; } ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; #else xpt_print(ccb->ccb_h.path, "%s reset not supported.\n", (ccb->ccb_h.func_code == XPT_RESET_BUS)? "bus" : "dev"); ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; #endif /* HVS_HOST_RESET */ } case XPT_SCSI_IO: case XPT_IMMED_NOTIFY: { struct hv_storvsc_request *reqp = NULL; if (ccb->csio.cdb_len == 0) { panic("cdl_len is 0\n"); } if (LIST_EMPTY(&sc->hs_free_list)) { ccb->ccb_h.status = CAM_REQUEUE_REQ; if (sc->hs_frozen == 0) { sc->hs_frozen = 1; xpt_freeze_simq(sim, /* count*/1); } xpt_done(ccb); return; } reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); bzero(reqp, sizeof(struct hv_storvsc_request)); reqp->softc = sc; ccb->ccb_h.status |= CAM_SIM_QUEUED; if ((res = create_storvsc_request(ccb, reqp)) != 0) { ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; } #ifdef notyet if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) { callout_init(&reqp->callout, 1); callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout, 0, storvsc_timeout, reqp, 0); #if HVS_TIMEOUT_TEST cv_init(&reqp->event.cv, "storvsc timeout cv"); mtx_init(&reqp->event.mtx, "storvsc timeout mutex", NULL, MTX_DEF); switch (reqp->vstor_packet.vm_srb.cdb[0]) { case MODE_SELECT_10: case SEND_DIAGNOSTIC: /* To have timer send the request. */ return; default: break; } #endif /* HVS_TIMEOUT_TEST */ } #endif if ((res = hv_storvsc_io_request(sc, reqp)) != 0) { xpt_print(ccb->ccb_h.path, "hv_storvsc_io_request failed with %d\n", res); ccb->ccb_h.status = CAM_PROVIDE_FAIL; storvsc_free_request(sc, reqp); xpt_done(ccb); return; } return; } default: ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; } } /** * @brief destroy bounce buffer * * This function is responsible for destroy a Scatter/Gather list * that create by storvsc_create_bounce_buffer() * * @param sgl- the Scatter/Gather need be destroy * @param sg_count- page count of the SG list. * */ static void storvsc_destroy_bounce_buffer(struct sglist *sgl) { struct hv_sgl_node *sgl_node = NULL; if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) { printf("storvsc error: not enough in use sgl\n"); return; } sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list); LIST_REMOVE(sgl_node, link); sgl_node->sgl_data = sgl; LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link); } /** * @brief create bounce buffer * * This function is responsible for create a Scatter/Gather list, * which hold several pages that can be aligned with page size. * * @param seg_count- SG-list segments count * @param write - if WRITE_TYPE, set SG list page used size to 0, * otherwise set used size to page size. * * return NULL if create failed */ static struct sglist * storvsc_create_bounce_buffer(uint16_t seg_count, int write) { int i = 0; struct sglist *bounce_sgl = NULL; unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE); struct hv_sgl_node *sgl_node = NULL; /* get struct sglist from free_sgl_list */ if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { printf("storvsc error: not enough free sgl\n"); return NULL; } sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); LIST_REMOVE(sgl_node, link); bounce_sgl = sgl_node->sgl_data; LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link); bounce_sgl->sg_maxseg = seg_count; if (write == WRITE_TYPE) bounce_sgl->sg_nseg = 0; else bounce_sgl->sg_nseg = seg_count; for (i = 0; i < seg_count; i++) bounce_sgl->sg_segs[i].ss_len = buf_len; return bounce_sgl; } /** * @brief copy data from SG list to bounce buffer * * This function is responsible for copy data from one SG list's segments * to another SG list which used as bounce buffer. * * @param bounce_sgl - the destination SG list * @param orig_sgl - the segment of the source SG list. * @param orig_sgl_count - the count of segments. * @param orig_sgl_count - indicate which segment need bounce buffer, * set 1 means need. * */ static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl, bus_dma_segment_t *orig_sgl, unsigned int orig_sgl_count, uint64_t seg_bits) { int src_sgl_idx = 0; for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) { if (seg_bits & (1 << src_sgl_idx)) { memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr, (void*)orig_sgl[src_sgl_idx].ds_addr, orig_sgl[src_sgl_idx].ds_len); bounce_sgl->sg_segs[src_sgl_idx].ss_len = orig_sgl[src_sgl_idx].ds_len; } } } /** * @brief copy data from SG list which used as bounce to another SG list * * This function is responsible for copy data from one SG list with bounce * buffer to another SG list's segments. * * @param dest_sgl - the destination SG list's segments * @param dest_sgl_count - the count of destination SG list's segment. * @param src_sgl - the source SG list. * @param seg_bits - indicate which segment used bounce buffer of src SG-list. * */ void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl, unsigned int dest_sgl_count, struct sglist* src_sgl, uint64_t seg_bits) { int sgl_idx = 0; for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) { if (seg_bits & (1 << sgl_idx)) { memcpy((void*)(dest_sgl[sgl_idx].ds_addr), (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr), src_sgl->sg_segs[sgl_idx].ss_len); } } } /** * @brief check SG list with bounce buffer or not * * This function is responsible for check if need bounce buffer for SG list. * * @param sgl - the SG list's segments * @param sg_count - the count of SG list's segment. * @param bits - segmengs number that need bounce buffer * * return -1 if SG list needless bounce buffer */ static int storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl, unsigned int sg_count, uint64_t *bits) { int i = 0; int offset = 0; uint64_t phys_addr = 0; uint64_t tmp_bits = 0; boolean_t found_hole = FALSE; boolean_t pre_aligned = TRUE; if (sg_count < 2){ return -1; } *bits = 0; phys_addr = vtophys(sgl[0].ds_addr); offset = phys_addr - trunc_page(phys_addr); if (offset != 0) { pre_aligned = FALSE; tmp_bits |= 1; } for (i = 1; i < sg_count; i++) { phys_addr = vtophys(sgl[i].ds_addr); offset = phys_addr - trunc_page(phys_addr); if (offset == 0) { if (FALSE == pre_aligned){ /* * This segment is aligned, if the previous * one is not aligned, find a hole */ found_hole = TRUE; } pre_aligned = TRUE; } else { tmp_bits |= 1 << i; if (!pre_aligned) { if (phys_addr != vtophys(sgl[i-1].ds_addr + sgl[i-1].ds_len)) { /* * Check whether connect to previous * segment,if not, find the hole */ found_hole = TRUE; } } else { found_hole = TRUE; } pre_aligned = FALSE; } } if (!found_hole) { return (-1); } else { *bits = tmp_bits; return 0; } } /** * @brief Fill in a request structure based on a CAM control block * * Fills in a request structure based on the contents of a CAM control * block. The request structure holds the payload information for * VSCSI protocol request. * * @param ccb pointer to a CAM contorl block * @param reqp pointer to a request structure */ static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp) { struct ccb_scsiio *csio = &ccb->csio; uint64_t phys_addr; uint32_t bytes_to_copy = 0; uint32_t pfn_num = 0; uint32_t pfn; uint64_t not_aligned_seg_bits = 0; /* refer to struct vmscsi_req for meanings of these two fields */ reqp->vstor_packet.u.vm_srb.port = cam_sim_unit(xpt_path_sim(ccb->ccb_h.path)); reqp->vstor_packet.u.vm_srb.path_id = cam_sim_bus(xpt_path_sim(ccb->ccb_h.path)); reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id; reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun; reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len; if(ccb->ccb_h.flags & CAM_CDB_POINTER) { memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr, csio->cdb_len); } else { memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes, csio->cdb_len); } switch (ccb->ccb_h.flags & CAM_DIR_MASK) { case CAM_DIR_OUT: reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE; break; case CAM_DIR_IN: reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE; break; case CAM_DIR_NONE: reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE; break; default: reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE; break; } reqp->sense_data = &csio->sense_data; reqp->sense_info_len = csio->sense_len; reqp->ccb = ccb; if (0 == csio->dxfer_len) { return (0); } reqp->data_buf.length = csio->dxfer_len; switch (ccb->ccb_h.flags & CAM_DATA_MASK) { case CAM_DATA_VADDR: { bytes_to_copy = csio->dxfer_len; phys_addr = vtophys(csio->data_ptr); reqp->data_buf.offset = phys_addr & PAGE_MASK; while (bytes_to_copy != 0) { int bytes, page_offset; phys_addr = vtophys(&csio->data_ptr[reqp->data_buf.length - bytes_to_copy]); pfn = phys_addr >> PAGE_SHIFT; reqp->data_buf.pfn_array[pfn_num] = pfn; page_offset = phys_addr & PAGE_MASK; bytes = min(PAGE_SIZE - page_offset, bytes_to_copy); bytes_to_copy -= bytes; pfn_num++; } break; } case CAM_DATA_SG: { int i = 0; int offset = 0; int ret; bus_dma_segment_t *storvsc_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr; u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt; printf("Storvsc: get SG I/O operation, %d\n", reqp->vstor_packet.u.vm_srb.data_in); if (storvsc_sg_count > HV_MAX_MULTIPAGE_BUFFER_COUNT){ printf("Storvsc: %d segments is too much, " "only support %d segments\n", storvsc_sg_count, HV_MAX_MULTIPAGE_BUFFER_COUNT); return (EINVAL); } /* * We create our own bounce buffer function currently. Idealy * we should use BUS_DMA(9) framework. But with current BUS_DMA * code there is no callback API to check the page alignment of * middle segments before busdma can decide if a bounce buffer * is needed for particular segment. There is callback, * "bus_dma_filter_t *filter", but the parrameters are not * sufficient for storvsc driver. * TODO: * Add page alignment check in BUS_DMA(9) callback. Once * this is complete, switch the following code to use * BUS_DMA(9) for storvsc bounce buffer support. */ /* check if we need to create bounce buffer */ ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist, storvsc_sg_count, ¬_aligned_seg_bits); if (ret != -1) { reqp->bounce_sgl = storvsc_create_bounce_buffer(storvsc_sg_count, reqp->vstor_packet.u.vm_srb.data_in); if (NULL == reqp->bounce_sgl) { printf("Storvsc_error: " "create bounce buffer failed.\n"); return (ENOMEM); } reqp->bounce_sgl_count = storvsc_sg_count; reqp->not_aligned_seg_bits = not_aligned_seg_bits; /* * if it is write, we need copy the original data *to bounce buffer */ if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) { storvsc_copy_sgl_to_bounce_buf( reqp->bounce_sgl, storvsc_sglist, storvsc_sg_count, reqp->not_aligned_seg_bits); } /* transfer virtual address to physical frame number */ if (reqp->not_aligned_seg_bits & 0x1){ phys_addr = vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr); }else{ phys_addr = vtophys(storvsc_sglist[0].ds_addr); } reqp->data_buf.offset = phys_addr & PAGE_MASK; pfn = phys_addr >> PAGE_SHIFT; reqp->data_buf.pfn_array[0] = pfn; for (i = 1; i < storvsc_sg_count; i++) { if (reqp->not_aligned_seg_bits & (1 << i)) { phys_addr = vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr); } else { phys_addr = vtophys(storvsc_sglist[i].ds_addr); } pfn = phys_addr >> PAGE_SHIFT; reqp->data_buf.pfn_array[i] = pfn; } } else { phys_addr = vtophys(storvsc_sglist[0].ds_addr); reqp->data_buf.offset = phys_addr & PAGE_MASK; for (i = 0; i < storvsc_sg_count; i++) { phys_addr = vtophys(storvsc_sglist[i].ds_addr); pfn = phys_addr >> PAGE_SHIFT; reqp->data_buf.pfn_array[i] = pfn; } /* check the last segment cross boundary or not */ offset = phys_addr & PAGE_MASK; if (offset) { phys_addr = vtophys(storvsc_sglist[i-1].ds_addr + PAGE_SIZE - offset); pfn = phys_addr >> PAGE_SHIFT; reqp->data_buf.pfn_array[i] = pfn; } reqp->bounce_sgl_count = 0; } break; } default: printf("Unknow flags: %d\n", ccb->ccb_h.flags); return(EINVAL); } return(0); } /* * SCSI Inquiry checks qualifier and type. * If qualifier is 011b, means the device server is not capable * of supporting a peripheral device on this logical unit, and * the type should be set to 1Fh. * * Return 1 if it is valid, 0 otherwise. */ static inline int is_inquiry_valid(const struct scsi_inquiry_data *inq_data) { uint8_t type; if (SID_QUAL(inq_data) != SID_QUAL_LU_CONNECTED) { return (0); } type = SID_TYPE(inq_data); if (type == T_NODEVICE) { return (0); } return (1); } /** * @brief completion function before returning to CAM * * I/O process has been completed and the result needs * to be passed to the CAM layer. * Free resources related to this request. * * @param reqp pointer to a request structure */ static void storvsc_io_done(struct hv_storvsc_request *reqp) { union ccb *ccb = reqp->ccb; struct ccb_scsiio *csio = &ccb->csio; struct storvsc_softc *sc = reqp->softc; struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb; bus_dma_segment_t *ori_sglist = NULL; int ori_sg_count = 0; /* destroy bounce buffer if it is used */ if (reqp->bounce_sgl_count) { ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr; ori_sg_count = ccb->csio.sglist_cnt; /* * If it is READ operation, we should copy back the data * to original SG list. */ if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) { storvsc_copy_from_bounce_buf_to_sgl(ori_sglist, ori_sg_count, reqp->bounce_sgl, reqp->not_aligned_seg_bits); } storvsc_destroy_bounce_buffer(reqp->bounce_sgl); reqp->bounce_sgl_count = 0; } if (reqp->retries > 0) { mtx_lock(&sc->hs_lock); #if HVS_TIMEOUT_TEST xpt_print(ccb->ccb_h.path, "%u: IO returned after timeout, " "waking up timer handler if any.\n", ticks); mtx_lock(&reqp->event.mtx); cv_signal(&reqp->event.cv); mtx_unlock(&reqp->event.mtx); #endif reqp->retries = 0; xpt_print(ccb->ccb_h.path, "%u: IO returned after timeout, " "stopping timer if any.\n", ticks); mtx_unlock(&sc->hs_lock); } #ifdef notyet /* * callout_drain() will wait for the timer handler to finish * if it is running. So we don't need any lock to synchronize * between this routine and the timer handler. * Note that we need to make sure reqp is not freed when timer * handler is using or will use it. */ if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) { callout_drain(&reqp->callout); } #endif ccb->ccb_h.status &= ~CAM_SIM_QUEUED; ccb->ccb_h.status &= ~CAM_STATUS_MASK; if (vm_srb->scsi_status == SCSI_STATUS_OK) { const struct scsi_generic *cmd; /* * Check whether the data for INQUIRY cmd is valid or * not. Windows 10 and Windows 2016 send all zero * inquiry data to VM even for unpopulated slots. */ cmd = (const struct scsi_generic *) ((ccb->ccb_h.flags & CAM_CDB_POINTER) ? csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes); if (cmd->opcode == INQUIRY) { /* * The host of Windows 10 or 2016 server will response * the inquiry request with invalid data for unexisted device: [0x7f 0x0 0x5 0x2 0x1f ... ] * But on windows 2012 R2, the response is: [0x7f 0x0 0x0 0x0 0x0 ] * That is why here wants to validate the inquiry response. * The validation will skip the INQUIRY whose response is short, * which is less than SHORT_INQUIRY_LENGTH (36). * * For more information about INQUIRY, please refer to: * ftp://ftp.avc-pioneer.com/Mtfuji_7/Proposal/Jun09/INQUIRY.pdf */ const struct scsi_inquiry_data *inq_data = (const struct scsi_inquiry_data *)csio->data_ptr; uint8_t* resp_buf = (uint8_t*)csio->data_ptr; /* Get the buffer length reported by host */ int resp_xfer_len = vm_srb->transfer_len; /* Get the available buffer length */ int resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0; int data_len = (resp_buf_len < resp_xfer_len) ? resp_buf_len : resp_xfer_len; if (data_len < SHORT_INQUIRY_LENGTH) { ccb->ccb_h.status |= CAM_REQ_CMP; if (bootverbose && data_len >= 5) { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "storvsc skips the validation for short inquiry (%d)" " [%x %x %x %x %x]\n", data_len,resp_buf[0],resp_buf[1],resp_buf[2], resp_buf[3],resp_buf[4]); mtx_unlock(&sc->hs_lock); } } else if (is_inquiry_valid(inq_data) == 0) { ccb->ccb_h.status |= CAM_DEV_NOT_THERE; if (bootverbose && data_len >= 5) { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "storvsc uninstalled invalid device" " [%x %x %x %x %x]\n", resp_buf[0],resp_buf[1],resp_buf[2],resp_buf[3],resp_buf[4]); mtx_unlock(&sc->hs_lock); } } else { ccb->ccb_h.status |= CAM_REQ_CMP; if (bootverbose) { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "storvsc has passed inquiry response (%d) validation\n", data_len); mtx_unlock(&sc->hs_lock); } } } else { ccb->ccb_h.status |= CAM_REQ_CMP; } } else { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "storvsc scsi_status = %d\n", vm_srb->scsi_status); mtx_unlock(&sc->hs_lock); ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR; } ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF); ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len; if (reqp->sense_info_len != 0) { csio->sense_resid = csio->sense_len - reqp->sense_info_len; ccb->ccb_h.status |= CAM_AUTOSNS_VALID; } mtx_lock(&sc->hs_lock); if (reqp->softc->hs_frozen == 1) { xpt_print(ccb->ccb_h.path, "%u: storvsc unfreezing softc 0x%p.\n", ticks, reqp->softc); ccb->ccb_h.status |= CAM_RELEASE_SIMQ; reqp->softc->hs_frozen = 0; } storvsc_free_request(sc, reqp); mtx_unlock(&sc->hs_lock); xpt_done_direct(ccb); } /** * @brief Free a request structure * * Free a request structure by returning it to the free list * * @param sc pointer to a softc * @param reqp pointer to a request structure */ static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp) { LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link); } /** * @brief Determine type of storage device from GUID * * Using the type GUID, determine if this is a StorVSC (paravirtual * SCSI or BlkVSC (paravirtual IDE) device. * * @param dev a device * returns an enum */ static enum hv_storage_type storvsc_get_storage_type(device_t dev) { device_t parent = device_get_parent(dev); if (VMBUS_PROBE_GUID(parent, dev, &gBlkVscDeviceType) == 0) return DRIVER_BLKVSC; if (VMBUS_PROBE_GUID(parent, dev, &gStorVscDeviceType) == 0) return DRIVER_STORVSC; return DRIVER_UNKNOWN; } Index: head/sys/dev/hyperv/utilities/hv_heartbeat.c =================================================================== --- head/sys/dev/hyperv/utilities/hv_heartbeat.c (revision 302801) +++ head/sys/dev/hyperv/utilities/hv_heartbeat.c (revision 302802) @@ -1,131 +1,131 @@ /*- * Copyright (c) 2014,2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include "hv_util.h" #include "vmbus_if.h" /* Heartbeat Service */ -static const hv_guid service_guid = { .data = +static const struct hyperv_guid service_guid = { .hv_guid = {0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e, 0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d} }; /** * Process heartbeat message */ static void hv_heartbeat_cb(void *context) { uint8_t* buf; hv_vmbus_channel* channel; uint32_t recvlen; uint64_t requestid; int ret; struct hv_vmbus_heartbeat_msg_data* heartbeat_msg; struct hv_vmbus_icmsg_hdr* icmsghdrp; hv_util_sc *softc; softc = (hv_util_sc*)context; buf = softc->receive_buffer; channel = softc->channel; ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE, &recvlen, &requestid); if ((ret == 0) && recvlen > 0) { icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &buf[sizeof(struct hv_vmbus_pipe_hdr)]; if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) { hv_negotiate_version(icmsghdrp, NULL, buf); } else { heartbeat_msg = (struct hv_vmbus_heartbeat_msg_data *) &buf[sizeof(struct hv_vmbus_pipe_hdr) + sizeof(struct hv_vmbus_icmsg_hdr)]; heartbeat_msg->seq_num += 1; } icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE; hv_vmbus_channel_send_packet(channel, buf, recvlen, requestid, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); } } static int hv_heartbeat_probe(device_t dev) { if (resource_disabled("hvheartbeat", 0)) return ENXIO; if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &service_guid) == 0) { device_set_desc(dev, "Hyper-V Heartbeat Service"); return BUS_PROBE_DEFAULT; } return ENXIO; } static int hv_heartbeat_attach(device_t dev) { hv_util_sc *softc = (hv_util_sc*)device_get_softc(dev); softc->callback = hv_heartbeat_cb; return hv_util_attach(dev); } static device_method_t heartbeat_methods[] = { /* Device interface */ DEVMETHOD(device_probe, hv_heartbeat_probe), DEVMETHOD(device_attach, hv_heartbeat_attach), DEVMETHOD(device_detach, hv_util_detach), { 0, 0 } }; static driver_t heartbeat_driver = { "hvheartbeat", heartbeat_methods, sizeof(hv_util_sc)}; static devclass_t heartbeat_devclass; DRIVER_MODULE(hv_heartbeat, vmbus, heartbeat_driver, heartbeat_devclass, NULL, NULL); MODULE_VERSION(hv_heartbeat, 1); MODULE_DEPEND(hv_heartbeat, vmbus, 1, 1, 1); Index: head/sys/dev/hyperv/utilities/hv_kvp.c =================================================================== --- head/sys/dev/hyperv/utilities/hv_kvp.c (revision 302801) +++ head/sys/dev/hyperv/utilities/hv_kvp.c (revision 302802) @@ -1,949 +1,949 @@ /*- * Copyright (c) 2014,2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Author: Sainath Varanasi. * Date: 4/2012 * Email: bsdic@microsoft.com */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hv_util.h" #include "unicode.h" #include "hv_kvp.h" #include "vmbus_if.h" /* hv_kvp defines */ #define BUFFERSIZE sizeof(struct hv_kvp_msg) #define KVP_SUCCESS 0 #define KVP_ERROR 1 #define kvp_hdr hdr.kvp_hdr /* hv_kvp debug control */ static int hv_kvp_log = 0; #define hv_kvp_log_error(...) do { \ if (hv_kvp_log > 0) \ log(LOG_ERR, "hv_kvp: " __VA_ARGS__); \ } while (0) #define hv_kvp_log_info(...) do { \ if (hv_kvp_log > 1) \ log(LOG_INFO, "hv_kvp: " __VA_ARGS__); \ } while (0) -static const hv_guid service_guid = { .data = +static const struct hyperv_guid service_guid = { .hv_guid = {0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d, 0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3, 0xe6} }; /* character device prototypes */ static d_open_t hv_kvp_dev_open; static d_close_t hv_kvp_dev_close; static d_read_t hv_kvp_dev_daemon_read; static d_write_t hv_kvp_dev_daemon_write; static d_poll_t hv_kvp_dev_daemon_poll; /* hv_kvp character device structure */ static struct cdevsw hv_kvp_cdevsw = { .d_version = D_VERSION, .d_open = hv_kvp_dev_open, .d_close = hv_kvp_dev_close, .d_read = hv_kvp_dev_daemon_read, .d_write = hv_kvp_dev_daemon_write, .d_poll = hv_kvp_dev_daemon_poll, .d_name = "hv_kvp_dev", }; /* * Global state to track and synchronize multiple * KVP transaction requests from the host. */ typedef struct hv_kvp_sc { struct hv_util_sc util_sc; /* Unless specified the pending mutex should be * used to alter the values of the following parameters: * 1. req_in_progress * 2. req_timed_out */ struct mtx pending_mutex; struct task task; /* To track if transaction is active or not */ boolean_t req_in_progress; /* Tracks if daemon did not reply back in time */ boolean_t req_timed_out; /* Tracks if daemon is serving a request currently */ boolean_t daemon_busy; /* Length of host message */ uint32_t host_msg_len; /* Host message id */ uint64_t host_msg_id; /* Current kvp message from the host */ struct hv_kvp_msg *host_kvp_msg; /* Current kvp message for daemon */ struct hv_kvp_msg daemon_kvp_msg; /* Rcv buffer for communicating with the host*/ uint8_t *rcv_buf; /* Device semaphore to control communication */ struct sema dev_sema; /* Indicates if daemon registered with driver */ boolean_t register_done; /* Character device status */ boolean_t dev_accessed; struct cdev *hv_kvp_dev; struct proc *daemon_task; struct selinfo hv_kvp_selinfo; } hv_kvp_sc; /* hv_kvp prototypes */ static int hv_kvp_req_in_progress(hv_kvp_sc *sc); static void hv_kvp_transaction_init(hv_kvp_sc *sc, uint32_t, uint64_t, uint8_t *); static void hv_kvp_send_msg_to_daemon(hv_kvp_sc *sc); static void hv_kvp_process_request(void *context, int pending); /* * hv_kvp low level functions */ /* * Check if kvp transaction is in progres */ static int hv_kvp_req_in_progress(hv_kvp_sc *sc) { return (sc->req_in_progress); } /* * This routine is called whenever a message is received from the host */ static void hv_kvp_transaction_init(hv_kvp_sc *sc, uint32_t rcv_len, uint64_t request_id, uint8_t *rcv_buf) { /* Store all the relevant message details in the global structure */ /* Do not need to use mutex for req_in_progress here */ sc->req_in_progress = true; sc->host_msg_len = rcv_len; sc->host_msg_id = request_id; sc->rcv_buf = rcv_buf; sc->host_kvp_msg = (struct hv_kvp_msg *)&rcv_buf[ sizeof(struct hv_vmbus_pipe_hdr) + sizeof(struct hv_vmbus_icmsg_hdr)]; } /* * hv_kvp - version neogtiation function */ static void hv_kvp_negotiate_version(struct hv_vmbus_icmsg_hdr *icmsghdrp, struct hv_vmbus_icmsg_negotiate *negop, uint8_t *buf) { int icframe_vercnt; int icmsg_vercnt; icmsghdrp->icmsgsize = 0x10; negop = (struct hv_vmbus_icmsg_negotiate *)&buf[ sizeof(struct hv_vmbus_pipe_hdr) + sizeof(struct hv_vmbus_icmsg_hdr)]; icframe_vercnt = negop->icframe_vercnt; icmsg_vercnt = negop->icmsg_vercnt; /* * Select the framework version number we will support */ if ((icframe_vercnt >= 2) && (negop->icversion_data[1].major == 3)) { icframe_vercnt = 3; if (icmsg_vercnt > 2) icmsg_vercnt = 4; else icmsg_vercnt = 3; } else { icframe_vercnt = 1; icmsg_vercnt = 1; } negop->icframe_vercnt = 1; negop->icmsg_vercnt = 1; negop->icversion_data[0].major = icframe_vercnt; negop->icversion_data[0].minor = 0; negop->icversion_data[1].major = icmsg_vercnt; negop->icversion_data[1].minor = 0; } /* * Convert ip related info in umsg from utf8 to utf16 and store in hmsg */ static int hv_kvp_convert_utf8_ipinfo_to_utf16(struct hv_kvp_msg *umsg, struct hv_kvp_ip_msg *host_ip_msg) { int err_ip, err_subnet, err_gway, err_dns, err_adap; int UNUSED_FLAG = 1; utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.ip_addr, MAX_IP_ADDR_SIZE, (char *)umsg->body.kvp_ip_val.ip_addr, strlen((char *)umsg->body.kvp_ip_val.ip_addr), UNUSED_FLAG, &err_ip); utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.sub_net, MAX_IP_ADDR_SIZE, (char *)umsg->body.kvp_ip_val.sub_net, strlen((char *)umsg->body.kvp_ip_val.sub_net), UNUSED_FLAG, &err_subnet); utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.gate_way, MAX_GATEWAY_SIZE, (char *)umsg->body.kvp_ip_val.gate_way, strlen((char *)umsg->body.kvp_ip_val.gate_way), UNUSED_FLAG, &err_gway); utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.dns_addr, MAX_IP_ADDR_SIZE, (char *)umsg->body.kvp_ip_val.dns_addr, strlen((char *)umsg->body.kvp_ip_val.dns_addr), UNUSED_FLAG, &err_dns); utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.adapter_id, MAX_IP_ADDR_SIZE, (char *)umsg->body.kvp_ip_val.adapter_id, strlen((char *)umsg->body.kvp_ip_val.adapter_id), UNUSED_FLAG, &err_adap); host_ip_msg->kvp_ip_val.dhcp_enabled = umsg->body.kvp_ip_val.dhcp_enabled; host_ip_msg->kvp_ip_val.addr_family = umsg->body.kvp_ip_val.addr_family; return (err_ip | err_subnet | err_gway | err_dns | err_adap); } /* * Convert ip related info in hmsg from utf16 to utf8 and store in umsg */ static int hv_kvp_convert_utf16_ipinfo_to_utf8(struct hv_kvp_ip_msg *host_ip_msg, struct hv_kvp_msg *umsg) { int err_ip, err_subnet, err_gway, err_dns, err_adap; int UNUSED_FLAG = 1; device_t *devs; int devcnt; /* IP Address */ utf16_to_utf8((char *)umsg->body.kvp_ip_val.ip_addr, MAX_IP_ADDR_SIZE, (uint16_t *)host_ip_msg->kvp_ip_val.ip_addr, MAX_IP_ADDR_SIZE, UNUSED_FLAG, &err_ip); /* Adapter ID : GUID */ utf16_to_utf8((char *)umsg->body.kvp_ip_val.adapter_id, MAX_ADAPTER_ID_SIZE, (uint16_t *)host_ip_msg->kvp_ip_val.adapter_id, MAX_ADAPTER_ID_SIZE, UNUSED_FLAG, &err_adap); if (devclass_get_devices(devclass_find("hn"), &devs, &devcnt) == 0) { for (devcnt = devcnt - 1; devcnt >= 0; devcnt--) { /* XXX access other driver's softc? are you kidding? */ device_t dev = devs[devcnt]; struct hn_softc *sc = device_get_softc(dev); struct hv_vmbus_channel *chan; char buf[HYPERV_GUID_STRLEN]; /* * Trying to find GUID of Network Device * TODO: need vmbus interface. */ chan = vmbus_get_channel(dev); hyperv_guid2str(&chan->ch_guid_inst, buf, sizeof(buf)); if (strncmp(buf, (char *)umsg->body.kvp_ip_val.adapter_id, HYPERV_GUID_STRLEN - 1) == 0) { strlcpy((char *)umsg->body.kvp_ip_val.adapter_id, sc->hn_ifp->if_xname, MAX_ADAPTER_ID_SIZE); break; } } free(devs, M_TEMP); } /* Address Family , DHCP , SUBNET, Gateway, DNS */ umsg->kvp_hdr.operation = host_ip_msg->operation; umsg->body.kvp_ip_val.addr_family = host_ip_msg->kvp_ip_val.addr_family; umsg->body.kvp_ip_val.dhcp_enabled = host_ip_msg->kvp_ip_val.dhcp_enabled; utf16_to_utf8((char *)umsg->body.kvp_ip_val.sub_net, MAX_IP_ADDR_SIZE, (uint16_t *)host_ip_msg->kvp_ip_val.sub_net, MAX_IP_ADDR_SIZE, UNUSED_FLAG, &err_subnet); utf16_to_utf8((char *)umsg->body.kvp_ip_val.gate_way, MAX_GATEWAY_SIZE, (uint16_t *)host_ip_msg->kvp_ip_val.gate_way, MAX_GATEWAY_SIZE, UNUSED_FLAG, &err_gway); utf16_to_utf8((char *)umsg->body.kvp_ip_val.dns_addr, MAX_IP_ADDR_SIZE, (uint16_t *)host_ip_msg->kvp_ip_val.dns_addr, MAX_IP_ADDR_SIZE, UNUSED_FLAG, &err_dns); return (err_ip | err_subnet | err_gway | err_dns | err_adap); } /* * Prepare a user kvp msg based on host kvp msg (utf16 to utf8) * Ensure utf16_utf8 takes care of the additional string terminating char!! */ static void hv_kvp_convert_hostmsg_to_usermsg(struct hv_kvp_msg *hmsg, struct hv_kvp_msg *umsg) { int utf_err = 0; uint32_t value_type; struct hv_kvp_ip_msg *host_ip_msg; host_ip_msg = (struct hv_kvp_ip_msg*)hmsg; memset(umsg, 0, sizeof(struct hv_kvp_msg)); umsg->kvp_hdr.operation = hmsg->kvp_hdr.operation; umsg->kvp_hdr.pool = hmsg->kvp_hdr.pool; switch (umsg->kvp_hdr.operation) { case HV_KVP_OP_SET_IP_INFO: hv_kvp_convert_utf16_ipinfo_to_utf8(host_ip_msg, umsg); break; case HV_KVP_OP_GET_IP_INFO: utf16_to_utf8((char *)umsg->body.kvp_ip_val.adapter_id, MAX_ADAPTER_ID_SIZE, (uint16_t *)host_ip_msg->kvp_ip_val.adapter_id, MAX_ADAPTER_ID_SIZE, 1, &utf_err); umsg->body.kvp_ip_val.addr_family = host_ip_msg->kvp_ip_val.addr_family; break; case HV_KVP_OP_SET: value_type = hmsg->body.kvp_set.data.value_type; switch (value_type) { case HV_REG_SZ: umsg->body.kvp_set.data.value_size = utf16_to_utf8( (char *)umsg->body.kvp_set.data.msg_value.value, HV_KVP_EXCHANGE_MAX_VALUE_SIZE - 1, (uint16_t *)hmsg->body.kvp_set.data.msg_value.value, hmsg->body.kvp_set.data.value_size, 1, &utf_err); /* utf8 encoding */ umsg->body.kvp_set.data.value_size = umsg->body.kvp_set.data.value_size / 2; break; case HV_REG_U32: umsg->body.kvp_set.data.value_size = sprintf(umsg->body.kvp_set.data.msg_value.value, "%d", hmsg->body.kvp_set.data.msg_value.value_u32) + 1; break; case HV_REG_U64: umsg->body.kvp_set.data.value_size = sprintf(umsg->body.kvp_set.data.msg_value.value, "%llu", (unsigned long long) hmsg->body.kvp_set.data.msg_value.value_u64) + 1; break; } umsg->body.kvp_set.data.key_size = utf16_to_utf8( umsg->body.kvp_set.data.key, HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1, (uint16_t *)hmsg->body.kvp_set.data.key, hmsg->body.kvp_set.data.key_size, 1, &utf_err); /* utf8 encoding */ umsg->body.kvp_set.data.key_size = umsg->body.kvp_set.data.key_size / 2; break; case HV_KVP_OP_GET: umsg->body.kvp_get.data.key_size = utf16_to_utf8(umsg->body.kvp_get.data.key, HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1, (uint16_t *)hmsg->body.kvp_get.data.key, hmsg->body.kvp_get.data.key_size, 1, &utf_err); /* utf8 encoding */ umsg->body.kvp_get.data.key_size = umsg->body.kvp_get.data.key_size / 2; break; case HV_KVP_OP_DELETE: umsg->body.kvp_delete.key_size = utf16_to_utf8(umsg->body.kvp_delete.key, HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1, (uint16_t *)hmsg->body.kvp_delete.key, hmsg->body.kvp_delete.key_size, 1, &utf_err); /* utf8 encoding */ umsg->body.kvp_delete.key_size = umsg->body.kvp_delete.key_size / 2; break; case HV_KVP_OP_ENUMERATE: umsg->body.kvp_enum_data.index = hmsg->body.kvp_enum_data.index; break; default: hv_kvp_log_info("%s: daemon_kvp_msg: Invalid operation : %d\n", __func__, umsg->kvp_hdr.operation); } } /* * Prepare a host kvp msg based on user kvp msg (utf8 to utf16) */ static int hv_kvp_convert_usermsg_to_hostmsg(struct hv_kvp_msg *umsg, struct hv_kvp_msg *hmsg) { int hkey_len = 0, hvalue_len = 0, utf_err = 0; struct hv_kvp_exchg_msg_value *host_exchg_data; char *key_name, *value; struct hv_kvp_ip_msg *host_ip_msg = (struct hv_kvp_ip_msg *)hmsg; switch (hmsg->kvp_hdr.operation) { case HV_KVP_OP_GET_IP_INFO: return (hv_kvp_convert_utf8_ipinfo_to_utf16(umsg, host_ip_msg)); case HV_KVP_OP_SET_IP_INFO: case HV_KVP_OP_SET: case HV_KVP_OP_DELETE: return (KVP_SUCCESS); case HV_KVP_OP_ENUMERATE: host_exchg_data = &hmsg->body.kvp_enum_data.data; key_name = umsg->body.kvp_enum_data.data.key; hkey_len = utf8_to_utf16((uint16_t *)host_exchg_data->key, ((HV_KVP_EXCHANGE_MAX_KEY_SIZE / 2) - 2), key_name, strlen(key_name), 1, &utf_err); /* utf16 encoding */ host_exchg_data->key_size = 2 * (hkey_len + 1); value = umsg->body.kvp_enum_data.data.msg_value.value; hvalue_len = utf8_to_utf16( (uint16_t *)host_exchg_data->msg_value.value, ((HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2) - 2), value, strlen(value), 1, &utf_err); host_exchg_data->value_size = 2 * (hvalue_len + 1); host_exchg_data->value_type = HV_REG_SZ; if ((hkey_len < 0) || (hvalue_len < 0)) return (HV_KVP_E_FAIL); return (KVP_SUCCESS); case HV_KVP_OP_GET: host_exchg_data = &hmsg->body.kvp_get.data; value = umsg->body.kvp_get.data.msg_value.value; hvalue_len = utf8_to_utf16( (uint16_t *)host_exchg_data->msg_value.value, ((HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2) - 2), value, strlen(value), 1, &utf_err); /* Convert value size to uft16 */ host_exchg_data->value_size = 2 * (hvalue_len + 1); /* Use values by string */ host_exchg_data->value_type = HV_REG_SZ; if ((hkey_len < 0) || (hvalue_len < 0)) return (HV_KVP_E_FAIL); return (KVP_SUCCESS); default: return (HV_KVP_E_FAIL); } } /* * Send the response back to the host. */ static void hv_kvp_respond_host(hv_kvp_sc *sc, int error) { struct hv_vmbus_icmsg_hdr *hv_icmsg_hdrp; hv_icmsg_hdrp = (struct hv_vmbus_icmsg_hdr *) &sc->rcv_buf[sizeof(struct hv_vmbus_pipe_hdr)]; if (error) error = HV_KVP_E_FAIL; hv_icmsg_hdrp->status = error; hv_icmsg_hdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE; error = hv_vmbus_channel_send_packet(sc->util_sc.channel, sc->rcv_buf, sc->host_msg_len, sc->host_msg_id, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); if (error) hv_kvp_log_info("%s: hv_kvp_respond_host: sendpacket error:%d\n", __func__, error); } /* * This is the main kvp kernel process that interacts with both user daemon * and the host */ static void hv_kvp_send_msg_to_daemon(hv_kvp_sc *sc) { struct hv_kvp_msg *hmsg = sc->host_kvp_msg; struct hv_kvp_msg *umsg = &sc->daemon_kvp_msg; /* Prepare kvp_msg to be sent to user */ hv_kvp_convert_hostmsg_to_usermsg(hmsg, umsg); /* Send the msg to user via function deamon_read - setting sema */ sema_post(&sc->dev_sema); /* We should wake up the daemon, in case it's doing poll() */ selwakeup(&sc->hv_kvp_selinfo); } /* * Function to read the kvp request buffer from host * and interact with daemon */ static void hv_kvp_process_request(void *context, int pending) { uint8_t *kvp_buf; hv_vmbus_channel *channel; uint32_t recvlen = 0; uint64_t requestid; struct hv_vmbus_icmsg_hdr *icmsghdrp; int ret = 0; hv_kvp_sc *sc; hv_kvp_log_info("%s: entering hv_kvp_process_request\n", __func__); sc = (hv_kvp_sc*)context; kvp_buf = sc->util_sc.receive_buffer; channel = sc->util_sc.channel; ret = hv_vmbus_channel_recv_packet(channel, kvp_buf, 2 * PAGE_SIZE, &recvlen, &requestid); while ((ret == 0) && (recvlen > 0)) { icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &kvp_buf[sizeof(struct hv_vmbus_pipe_hdr)]; hv_kvp_transaction_init(sc, recvlen, requestid, kvp_buf); if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) { hv_kvp_negotiate_version(icmsghdrp, NULL, kvp_buf); hv_kvp_respond_host(sc, ret); /* * It is ok to not acquire the mutex before setting * req_in_progress here because negotiation is the * first thing that happens and hence there is no * chance of a race condition. */ sc->req_in_progress = false; hv_kvp_log_info("%s :version negotiated\n", __func__); } else { if (!sc->daemon_busy) { hv_kvp_log_info("%s: issuing qury to daemon\n", __func__); mtx_lock(&sc->pending_mutex); sc->req_timed_out = false; sc->daemon_busy = true; mtx_unlock(&sc->pending_mutex); hv_kvp_send_msg_to_daemon(sc); hv_kvp_log_info("%s: waiting for daemon\n", __func__); } /* Wait 5 seconds for daemon to respond back */ tsleep(sc, 0, "kvpworkitem", 5 * hz); hv_kvp_log_info("%s: came out of wait\n", __func__); } mtx_lock(&sc->pending_mutex); /* Notice that once req_timed_out is set to true * it will remain true until the next request is * sent to the daemon. The response from daemon * is forwarded to host only when this flag is * false. */ sc->req_timed_out = true; /* * Cancel request if so need be. */ if (hv_kvp_req_in_progress(sc)) { hv_kvp_log_info("%s: request was still active after wait so failing\n", __func__); hv_kvp_respond_host(sc, HV_KVP_E_FAIL); sc->req_in_progress = false; } mtx_unlock(&sc->pending_mutex); /* * Try reading next buffer */ recvlen = 0; ret = hv_vmbus_channel_recv_packet(channel, kvp_buf, 2 * PAGE_SIZE, &recvlen, &requestid); hv_kvp_log_info("%s: read: context %p, ret =%d, recvlen=%d\n", __func__, context, ret, recvlen); } } /* * Callback routine that gets called whenever there is a message from host */ static void hv_kvp_callback(void *context) { hv_kvp_sc *sc = (hv_kvp_sc*)context; /* The first request from host will not be handled until daemon is registered. when callback is triggered without a registered daemon, callback just return. When a new daemon gets regsitered, this callbcak is trigged from _write op. */ if (sc->register_done) { hv_kvp_log_info("%s: Queuing work item\n", __func__); taskqueue_enqueue(taskqueue_thread, &sc->task); } } static int hv_kvp_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) { hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1; hv_kvp_log_info("%s: Opened device \"hv_kvp_device\" successfully.\n", __func__); if (sc->dev_accessed) return (-EBUSY); sc->daemon_task = curproc; sc->dev_accessed = true; sc->daemon_busy = false; return (0); } static int hv_kvp_dev_close(struct cdev *dev __unused, int fflag __unused, int devtype __unused, struct thread *td __unused) { hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1; hv_kvp_log_info("%s: Closing device \"hv_kvp_device\".\n", __func__); sc->dev_accessed = false; sc->register_done = false; return (0); } /* * hv_kvp_daemon read invokes this function * acts as a send to daemon */ static int hv_kvp_dev_daemon_read(struct cdev *dev, struct uio *uio, int ioflag __unused) { size_t amt; int error = 0; struct hv_kvp_msg *hv_kvp_dev_buf; hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1; /* Check hv_kvp daemon registration status*/ if (!sc->register_done) return (KVP_ERROR); sema_wait(&sc->dev_sema); hv_kvp_dev_buf = malloc(sizeof(*hv_kvp_dev_buf), M_TEMP, M_WAITOK); memcpy(hv_kvp_dev_buf, &sc->daemon_kvp_msg, sizeof(struct hv_kvp_msg)); amt = MIN(uio->uio_resid, uio->uio_offset >= BUFFERSIZE + 1 ? 0 : BUFFERSIZE + 1 - uio->uio_offset); if ((error = uiomove(hv_kvp_dev_buf, amt, uio)) != 0) hv_kvp_log_info("%s: hv_kvp uiomove read failed!\n", __func__); free(hv_kvp_dev_buf, M_TEMP); return (error); } /* * hv_kvp_daemon write invokes this function * acts as a receive from daemon */ static int hv_kvp_dev_daemon_write(struct cdev *dev, struct uio *uio, int ioflag __unused) { size_t amt; int error = 0; struct hv_kvp_msg *hv_kvp_dev_buf; hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1; uio->uio_offset = 0; hv_kvp_dev_buf = malloc(sizeof(*hv_kvp_dev_buf), M_TEMP, M_WAITOK); amt = MIN(uio->uio_resid, BUFFERSIZE); error = uiomove(hv_kvp_dev_buf, amt, uio); if (error != 0) { free(hv_kvp_dev_buf, M_TEMP); return (error); } memcpy(&sc->daemon_kvp_msg, hv_kvp_dev_buf, sizeof(struct hv_kvp_msg)); free(hv_kvp_dev_buf, M_TEMP); if (sc->register_done == false) { if (sc->daemon_kvp_msg.kvp_hdr.operation == HV_KVP_OP_REGISTER) { sc->register_done = true; hv_kvp_callback(dev->si_drv1); } else { hv_kvp_log_info("%s, KVP Registration Failed\n", __func__); return (KVP_ERROR); } } else { mtx_lock(&sc->pending_mutex); if(!sc->req_timed_out) { struct hv_kvp_msg *hmsg = sc->host_kvp_msg; struct hv_kvp_msg *umsg = &sc->daemon_kvp_msg; hv_kvp_convert_usermsg_to_hostmsg(umsg, hmsg); hv_kvp_respond_host(sc, KVP_SUCCESS); wakeup(sc); sc->req_in_progress = false; } sc->daemon_busy = false; mtx_unlock(&sc->pending_mutex); } return (error); } /* * hv_kvp_daemon poll invokes this function to check if data is available * for daemon to read. */ static int hv_kvp_dev_daemon_poll(struct cdev *dev, int events, struct thread *td) { int revents = 0; hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1; mtx_lock(&sc->pending_mutex); /* * We check global flag daemon_busy for the data availiability for * userland to read. Deamon_busy is set to true before driver has data * for daemon to read. It is set to false after daemon sends * then response back to driver. */ if (sc->daemon_busy == true) revents = POLLIN; else selrecord(td, &sc->hv_kvp_selinfo); mtx_unlock(&sc->pending_mutex); return (revents); } static int hv_kvp_probe(device_t dev) { if (resource_disabled("hvkvp", 0)) return ENXIO; if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &service_guid) == 0) { device_set_desc(dev, "Hyper-V KVP Service"); return BUS_PROBE_DEFAULT; } return ENXIO; } static int hv_kvp_attach(device_t dev) { int error; struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; hv_kvp_sc *sc = (hv_kvp_sc*)device_get_softc(dev); sc->util_sc.callback = hv_kvp_callback; sema_init(&sc->dev_sema, 0, "hv_kvp device semaphore"); mtx_init(&sc->pending_mutex, "hv-kvp pending mutex", NULL, MTX_DEF); ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "hv_kvp_log", CTLFLAG_RW, &hv_kvp_log, 0, "Hyperv KVP service log level"); TASK_INIT(&sc->task, 0, hv_kvp_process_request, sc); /* create character device */ error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &sc->hv_kvp_dev, &hv_kvp_cdevsw, 0, UID_ROOT, GID_WHEEL, 0640, "hv_kvp_dev"); if (error != 0) return (error); sc->hv_kvp_dev->si_drv1 = sc; return hv_util_attach(dev); } static int hv_kvp_detach(device_t dev) { hv_kvp_sc *sc = (hv_kvp_sc*)device_get_softc(dev); if (sc->daemon_task != NULL) { PROC_LOCK(sc->daemon_task); kern_psignal(sc->daemon_task, SIGKILL); PROC_UNLOCK(sc->daemon_task); } destroy_dev(sc->hv_kvp_dev); return hv_util_detach(dev); } static device_method_t kvp_methods[] = { /* Device interface */ DEVMETHOD(device_probe, hv_kvp_probe), DEVMETHOD(device_attach, hv_kvp_attach), DEVMETHOD(device_detach, hv_kvp_detach), { 0, 0 } }; static driver_t kvp_driver = { "hvkvp", kvp_methods, sizeof(hv_kvp_sc)}; static devclass_t kvp_devclass; DRIVER_MODULE(hv_kvp, vmbus, kvp_driver, kvp_devclass, NULL, NULL); MODULE_VERSION(hv_kvp, 1); MODULE_DEPEND(hv_kvp, vmbus, 1, 1, 1); Index: head/sys/dev/hyperv/utilities/hv_shutdown.c =================================================================== --- head/sys/dev/hyperv/utilities/hv_shutdown.c (revision 302801) +++ head/sys/dev/hyperv/utilities/hv_shutdown.c (revision 302802) @@ -1,153 +1,153 @@ /*- * Copyright (c) 2014,2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * A common driver for all hyper-V util services. */ #include #include #include #include #include #include #include #include #include #include "hv_util.h" #include "vmbus_if.h" -static const hv_guid service_guid = { .data = +static const struct hyperv_guid service_guid = { .hv_guid = {0x31, 0x60, 0x0B, 0X0E, 0x13, 0x52, 0x34, 0x49, 0x81, 0x8B, 0x38, 0XD9, 0x0C, 0xED, 0x39, 0xDB} }; /** * Shutdown */ static void hv_shutdown_cb(void *context) { uint8_t* buf; hv_vmbus_channel* channel; uint8_t execute_shutdown = 0; hv_vmbus_icmsg_hdr* icmsghdrp; uint32_t recv_len; uint64_t request_id; int ret; hv_vmbus_shutdown_msg_data* shutdown_msg; hv_util_sc *softc; softc = (hv_util_sc*)context; buf = softc->receive_buffer; channel = softc->channel; ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE, &recv_len, &request_id); if ((ret == 0) && recv_len > 0) { icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &buf[sizeof(struct hv_vmbus_pipe_hdr)]; if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) { hv_negotiate_version(icmsghdrp, NULL, buf); } else { shutdown_msg = (struct hv_vmbus_shutdown_msg_data *) &buf[sizeof(struct hv_vmbus_pipe_hdr) + sizeof(struct hv_vmbus_icmsg_hdr)]; switch (shutdown_msg->flags) { case 0: case 1: icmsghdrp->status = HV_S_OK; execute_shutdown = 1; if(bootverbose) printf("Shutdown request received -" " graceful shutdown initiated\n"); break; default: icmsghdrp->status = HV_E_FAIL; execute_shutdown = 0; printf("Shutdown request received -" " Invalid request\n"); break; } } icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE; hv_vmbus_channel_send_packet(channel, buf, recv_len, request_id, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); } if (execute_shutdown) shutdown_nice(RB_POWEROFF); } static int hv_shutdown_probe(device_t dev) { if (resource_disabled("hvshutdown", 0)) return ENXIO; if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &service_guid) == 0) { device_set_desc(dev, "Hyper-V Shutdown Service"); return BUS_PROBE_DEFAULT; } return ENXIO; } static int hv_shutdown_attach(device_t dev) { hv_util_sc *softc = (hv_util_sc*)device_get_softc(dev); softc->callback = hv_shutdown_cb; return hv_util_attach(dev); } static device_method_t shutdown_methods[] = { /* Device interface */ DEVMETHOD(device_probe, hv_shutdown_probe), DEVMETHOD(device_attach, hv_shutdown_attach), DEVMETHOD(device_detach, hv_util_detach), { 0, 0 } }; static driver_t shutdown_driver = { "hvshutdown", shutdown_methods, sizeof(hv_util_sc)}; static devclass_t shutdown_devclass; DRIVER_MODULE(hv_shutdown, vmbus, shutdown_driver, shutdown_devclass, NULL, NULL); MODULE_VERSION(hv_shutdown, 1); MODULE_DEPEND(hv_shutdown, vmbus, 1, 1, 1); Index: head/sys/dev/hyperv/utilities/hv_timesync.c =================================================================== --- head/sys/dev/hyperv/utilities/hv_timesync.c (revision 302801) +++ head/sys/dev/hyperv/utilities/hv_timesync.c (revision 302802) @@ -1,218 +1,218 @@ /*- * Copyright (c) 2014,2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * A common driver for all hyper-V util services. */ #include #include #include #include #include #include #include #include #include #include "hv_util.h" #include "vmbus_if.h" #define HV_WLTIMEDELTA 116444736000000000L /* in 100ns unit */ #define HV_ICTIMESYNCFLAG_PROBE 0 #define HV_ICTIMESYNCFLAG_SYNC 1 #define HV_ICTIMESYNCFLAG_SAMPLE 2 #define HV_NANO_SEC_PER_SEC 1000000000 /* Time Sync data */ typedef struct { uint64_t data; } time_sync_data; /* Time Synch Service */ -static const hv_guid service_guid = {.data = +static const struct hyperv_guid service_guid = {.hv_guid = {0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49, 0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf } }; struct hv_ictimesync_data { uint64_t parenttime; uint64_t childtime; uint64_t roundtriptime; uint8_t flags; } __packed; typedef struct hv_timesync_sc { hv_util_sc util_sc; struct task task; time_sync_data time_msg; } hv_timesync_sc; /** * Set host time based on time sync message from host */ static void hv_set_host_time(void *context, int pending) { hv_timesync_sc *softc = (hv_timesync_sc*)context; uint64_t hosttime = softc->time_msg.data; struct timespec guest_ts, host_ts; uint64_t host_tns; int64_t diff; int error; host_tns = (hosttime - HV_WLTIMEDELTA) * 100; host_ts.tv_sec = (time_t)(host_tns/HV_NANO_SEC_PER_SEC); host_ts.tv_nsec = (long)(host_tns%HV_NANO_SEC_PER_SEC); nanotime(&guest_ts); diff = (int64_t)host_ts.tv_sec - (int64_t)guest_ts.tv_sec; /* * If host differs by 5 seconds then make the guest catch up */ if (diff > 5 || diff < -5) { error = kern_clock_settime(curthread, CLOCK_REALTIME, &host_ts); } } /** * @brief Synchronize time with host after reboot, restore, etc. * * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM. * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time * message after the timesync channel is opened. Since the hv_utils module is * loaded after hv_vmbus, the first message is usually missed. The other * thing is, systime is automatically set to emulated hardware clock which may * not be UTC time or in the same time zone. So, to override these effects, we * use the first 50 time samples for initial system time setting. */ static inline void hv_adj_guesttime(hv_timesync_sc *sc, uint64_t hosttime, uint8_t flags) { sc->time_msg.data = hosttime; if (((flags & HV_ICTIMESYNCFLAG_SYNC) != 0) || ((flags & HV_ICTIMESYNCFLAG_SAMPLE) != 0)) { taskqueue_enqueue(taskqueue_thread, &sc->task); } } /** * Time Sync Channel message handler */ static void hv_timesync_cb(void *context) { hv_vmbus_channel* channel; hv_vmbus_icmsg_hdr* icmsghdrp; uint32_t recvlen; uint64_t requestId; int ret; uint8_t* time_buf; struct hv_ictimesync_data* timedatap; hv_timesync_sc *softc; softc = (hv_timesync_sc*)context; channel = softc->util_sc.channel; time_buf = softc->util_sc.receive_buffer; ret = hv_vmbus_channel_recv_packet(channel, time_buf, PAGE_SIZE, &recvlen, &requestId); if ((ret == 0) && recvlen > 0) { icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &time_buf[ sizeof(struct hv_vmbus_pipe_hdr)]; if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) { hv_negotiate_version(icmsghdrp, NULL, time_buf); } else { timedatap = (struct hv_ictimesync_data *) &time_buf[ sizeof(struct hv_vmbus_pipe_hdr) + sizeof(struct hv_vmbus_icmsg_hdr)]; hv_adj_guesttime(softc, timedatap->parenttime, timedatap->flags); } icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE; hv_vmbus_channel_send_packet(channel, time_buf, recvlen, requestId, HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); } } static int hv_timesync_probe(device_t dev) { if (resource_disabled("hvtimesync", 0)) return ENXIO; if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &service_guid) == 0) { device_set_desc(dev, "Hyper-V Time Synch Service"); return BUS_PROBE_DEFAULT; } return ENXIO; } static int hv_timesync_attach(device_t dev) { hv_timesync_sc *softc = device_get_softc(dev); softc->util_sc.callback = hv_timesync_cb; TASK_INIT(&softc->task, 1, hv_set_host_time, softc); return hv_util_attach(dev); } static int hv_timesync_detach(device_t dev) { hv_timesync_sc *softc = device_get_softc(dev); taskqueue_drain(taskqueue_thread, &softc->task); return hv_util_detach(dev); } static device_method_t timesync_methods[] = { /* Device interface */ DEVMETHOD(device_probe, hv_timesync_probe), DEVMETHOD(device_attach, hv_timesync_attach), DEVMETHOD(device_detach, hv_timesync_detach), { 0, 0 } }; static driver_t timesync_driver = { "hvtimesync", timesync_methods, sizeof(hv_timesync_sc)}; static devclass_t timesync_devclass; DRIVER_MODULE(hv_timesync, vmbus, timesync_driver, timesync_devclass, NULL, NULL); MODULE_VERSION(hv_timesync, 1); MODULE_DEPEND(hv_timesync, vmbus, 1, 1, 1); Index: head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c (revision 302801) +++ head/sys/dev/hyperv/vmbus/hv_channel_mgmt.c (revision 302802) @@ -1,576 +1,576 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include typedef void (*vmbus_chanmsg_proc_t) (struct vmbus_softc *, const struct vmbus_message *); static struct hv_vmbus_channel *hv_vmbus_allocate_channel(struct vmbus_softc *); static void vmbus_channel_on_offer_internal(struct vmbus_softc *, const hv_vmbus_channel_offer_channel *offer); static void vmbus_chan_detach_task(void *, int); static void vmbus_channel_on_offer(struct vmbus_softc *, const struct vmbus_message *); static void vmbus_channel_on_offers_delivered(struct vmbus_softc *, const struct vmbus_message *); static void vmbus_chan_msgproc_chrescind(struct vmbus_softc *, const struct vmbus_message *); /** * Channel message dispatch table */ static const vmbus_chanmsg_proc_t vmbus_chanmsg_process[HV_CHANNEL_MESSAGE_COUNT] = { [HV_CHANNEL_MESSAGE_OFFER_CHANNEL] = vmbus_channel_on_offer, [HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER] = vmbus_chan_msgproc_chrescind, [HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED] = vmbus_channel_on_offers_delivered, [HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT] = vmbus_msghc_wakeup, [HV_CHANNEL_MESSAGE_GPADL_CREATED] = vmbus_msghc_wakeup, [HV_CHANNEL_MESSAGE_GPADL_TORNDOWN] = vmbus_msghc_wakeup, [HV_CHANNEL_MESSAGE_VERSION_RESPONSE] = vmbus_msghc_wakeup }; /** * @brief Allocate and initialize a vmbus channel object */ static struct hv_vmbus_channel * hv_vmbus_allocate_channel(struct vmbus_softc *sc) { struct hv_vmbus_channel *channel; channel = malloc(sizeof(*channel), M_DEVBUF, M_WAITOK | M_ZERO); channel->vmbus_sc = sc; mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF); TAILQ_INIT(&channel->sc_list_anchor); TASK_INIT(&channel->ch_detach_task, 0, vmbus_chan_detach_task, channel); return (channel); } /** * @brief Release the resources used by the vmbus channel object */ void hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel) { mtx_destroy(&channel->sc_lock); free(channel, M_DEVBUF); } /** * @brief Process the offer by creating a channel/device * associated with this offer */ static void vmbus_channel_process_offer(hv_vmbus_channel *new_channel) { struct vmbus_softc *sc = new_channel->vmbus_sc; hv_vmbus_channel* channel; uint32_t relid; relid = new_channel->ch_id; /* * Make sure this is a new offer */ mtx_lock(&sc->vmbus_chlist_lock); if (relid == 0) { /* * XXX channel0 will not be processed; skip it. */ printf("VMBUS: got channel0 offer\n"); } else { sc->vmbus_chmap[relid] = new_channel; } TAILQ_FOREACH(channel, &sc->vmbus_chlist, ch_link) { if (memcmp(&channel->ch_guid_type, &new_channel->ch_guid_type, - sizeof(hv_guid)) == 0 && + sizeof(struct hyperv_guid)) == 0 && memcmp(&channel->ch_guid_inst, &new_channel->ch_guid_inst, - sizeof(hv_guid)) == 0) + sizeof(struct hyperv_guid)) == 0) break; } if (channel == NULL) { /* Install the new primary channel */ TAILQ_INSERT_TAIL(&sc->vmbus_chlist, new_channel, ch_link); } mtx_unlock(&sc->vmbus_chlist_lock); if (bootverbose) { char logstr[64]; logstr[0] = '\0'; if (channel != NULL) { snprintf(logstr, sizeof(logstr), ", primary chan%u", channel->ch_id); } device_printf(sc->vmbus_dev, "chan%u subchanid%u offer%s\n", new_channel->ch_id, new_channel->ch_subidx, logstr); } if (channel != NULL) { /* * Check if this is a sub channel. */ if (new_channel->ch_subidx != 0) { /* * It is a sub channel offer, process it. */ new_channel->primary_channel = channel; new_channel->ch_dev = channel->ch_dev; mtx_lock(&channel->sc_lock); TAILQ_INSERT_TAIL(&channel->sc_list_anchor, new_channel, sc_list_entry); mtx_unlock(&channel->sc_lock); /* * Insert the new channel to the end of the global * channel list. * * NOTE: * The new sub-channel MUST be inserted AFTER it's * primary channel, so that the primary channel will * be found in the above loop for its baby siblings. */ mtx_lock(&sc->vmbus_chlist_lock); TAILQ_INSERT_TAIL(&sc->vmbus_chlist, new_channel, ch_link); mtx_unlock(&sc->vmbus_chlist_lock); new_channel->state = HV_CHANNEL_OPEN_STATE; /* * Bump up sub-channel count and notify anyone that is * interested in this sub-channel, after this sub-channel * is setup. */ mtx_lock(&channel->sc_lock); channel->subchan_cnt++; mtx_unlock(&channel->sc_lock); wakeup(channel); return; } printf("VMBUS: duplicated primary channel%u\n", new_channel->ch_id); hv_vmbus_free_vmbus_channel(new_channel); return; } new_channel->state = HV_CHANNEL_OPEN_STATE; /* * Add the new device to the bus. This will kick off device-driver * binding which eventually invokes the device driver's AddDevice() * method. * * NOTE: * Error is ignored here; don't have much to do if error really * happens. */ hv_vmbus_child_device_register(new_channel); } void vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu) { KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu)); if (chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WS2008 || chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WIN7) { /* Only cpu0 is supported */ cpu = 0; } chan->target_cpu = cpu; chan->target_vcpu = VMBUS_PCPU_GET(chan->vmbus_sc, vcpuid, cpu); if (bootverbose) { printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n", chan->ch_id, chan->target_cpu, chan->target_vcpu); } } void vmbus_channel_cpu_rr(struct hv_vmbus_channel *chan) { static uint32_t vmbus_chan_nextcpu; int cpu; cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus; vmbus_channel_cpu_set(chan, cpu); } static void vmbus_channel_select_defcpu(struct hv_vmbus_channel *chan) { /* * By default, pin the channel to cpu0. Devices having * special channel-cpu mapping requirement should call * vmbus_channel_cpu_{set,rr}(). */ vmbus_channel_cpu_set(chan, 0); } /** * @brief Handler for channel offers from Hyper-V/Azure * * Handler for channel offers from vmbus in parent partition. */ static void vmbus_channel_on_offer(struct vmbus_softc *sc, const struct vmbus_message *msg) { const hv_vmbus_channel_offer_channel *offer; /* New channel is offered by vmbus */ vmbus_scan_newchan(sc); offer = (const hv_vmbus_channel_offer_channel *)msg->msg_data; vmbus_channel_on_offer_internal(sc, offer); } static void vmbus_channel_on_offer_internal(struct vmbus_softc *sc, const hv_vmbus_channel_offer_channel *offer) { hv_vmbus_channel* new_channel; /* * Allocate the channel object and save this offer */ new_channel = hv_vmbus_allocate_channel(sc); new_channel->ch_id = offer->child_rel_id; new_channel->ch_subidx = offer->offer.sub_channel_index; new_channel->ch_guid_type = offer->offer.interface_type; new_channel->ch_guid_inst = offer->offer.interface_instance; /* Batch reading is on by default */ new_channel->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD; if (offer->monitor_allocated) new_channel->ch_flags |= VMBUS_CHAN_FLAG_HASMNF; new_channel->ch_monprm = hyperv_dmamem_alloc( bus_get_dma_tag(sc->vmbus_dev), HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param), &new_channel->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (new_channel->ch_monprm == NULL) { device_printf(sc->vmbus_dev, "monprm alloc failed\n"); /* XXX */ mtx_destroy(&new_channel->sc_lock); free(new_channel, M_DEVBUF); return; } new_channel->ch_monprm->mp_connid = VMBUS_CONNID_EVENT; if (sc->vmbus_version != VMBUS_VERSION_WS2008) new_channel->ch_monprm->mp_connid = offer->connection_id; if (new_channel->ch_flags & VMBUS_CHAN_FLAG_HASMNF) { new_channel->ch_montrig_idx = offer->monitor_id / VMBUS_MONTRIG_LEN; if (new_channel->ch_montrig_idx >= VMBUS_MONTRIGS_MAX) panic("invalid monitor id %u", offer->monitor_id); new_channel->ch_montrig_mask = 1 << (offer->monitor_id % VMBUS_MONTRIG_LEN); } /* Select default cpu for this channel. */ vmbus_channel_select_defcpu(new_channel); vmbus_channel_process_offer(new_channel); } /* * XXX pretty broken; need rework. */ static void vmbus_chan_msgproc_chrescind(struct vmbus_softc *sc, const struct vmbus_message *msg) { const struct vmbus_chanmsg_chrescind *note; struct hv_vmbus_channel *chan; note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data; if (note->chm_chanid > VMBUS_CHAN_MAX) { device_printf(sc->vmbus_dev, "invalid rescinded chan%u\n", note->chm_chanid); return; } if (bootverbose) { device_printf(sc->vmbus_dev, "chan%u rescinded\n", note->chm_chanid); } chan = sc->vmbus_chmap[note->chm_chanid]; if (chan == NULL) return; sc->vmbus_chmap[note->chm_chanid] = NULL; taskqueue_enqueue(taskqueue_thread, &chan->ch_detach_task); } static void vmbus_chan_detach_task(void *xchan, int pending __unused) { struct hv_vmbus_channel *chan = xchan; if (HV_VMBUS_CHAN_ISPRIMARY(chan)) { /* Only primary channel owns the device */ hv_vmbus_child_device_unregister(chan); /* NOTE: DO NOT free primary channel for now */ } else { struct vmbus_softc *sc = chan->vmbus_sc; struct hv_vmbus_channel *pri_chan = chan->primary_channel; struct vmbus_chanmsg_chfree *req; struct vmbus_msghc *mh; int error; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for chfree(chan%u)\n", chan->ch_id); goto remove; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE; req->chm_chanid = chan->ch_id; error = vmbus_msghc_exec_noresult(mh); vmbus_msghc_put(sc, mh); if (error) { device_printf(sc->vmbus_dev, "chfree(chan%u) failed: %d", chan->ch_id, error); /* NOTE: Move on! */ } else { if (bootverbose) { device_printf(sc->vmbus_dev, "chan%u freed\n", chan->ch_id); } } remove: mtx_lock(&sc->vmbus_chlist_lock); TAILQ_REMOVE(&sc->vmbus_chlist, chan, ch_link); mtx_unlock(&sc->vmbus_chlist_lock); mtx_lock(&pri_chan->sc_lock); TAILQ_REMOVE(&pri_chan->sc_list_anchor, chan, sc_list_entry); KASSERT(pri_chan->subchan_cnt > 0, ("invalid subchan_cnt %d", pri_chan->subchan_cnt)); pri_chan->subchan_cnt--; mtx_unlock(&pri_chan->sc_lock); wakeup(pri_chan); hv_vmbus_free_vmbus_channel(chan); } } /** * * @brief Invoked when all offers have been delivered. */ static void vmbus_channel_on_offers_delivered(struct vmbus_softc *sc, const struct vmbus_message *msg __unused) { /* No more new channels for the channel request. */ vmbus_scan_done(sc); } /** * @brief Release channels that are unattached/unconnected (i.e., no drivers associated) */ void hv_vmbus_release_unattached_channels(struct vmbus_softc *sc) { hv_vmbus_channel *channel; mtx_lock(&sc->vmbus_chlist_lock); while (!TAILQ_EMPTY(&sc->vmbus_chlist)) { channel = TAILQ_FIRST(&sc->vmbus_chlist); TAILQ_REMOVE(&sc->vmbus_chlist, channel, ch_link); if (HV_VMBUS_CHAN_ISPRIMARY(channel)) { /* Only primary channel owns the device */ hv_vmbus_child_device_unregister(channel); } hv_vmbus_free_vmbus_channel(channel); } bzero(sc->vmbus_chmap, sizeof(struct hv_vmbus_channel *) * VMBUS_CHAN_MAX); mtx_unlock(&sc->vmbus_chlist_lock); } /** * @brief Select the best outgoing channel * * The channel whose vcpu binding is closest to the currect vcpu will * be selected. * If no multi-channel, always select primary channel * * @param primary - primary channel */ struct hv_vmbus_channel * vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary) { hv_vmbus_channel *new_channel = NULL; hv_vmbus_channel *outgoing_channel = primary; int old_cpu_distance = 0; int new_cpu_distance = 0; int cur_vcpu = 0; int smp_pro_id = PCPU_GET(cpuid); if (TAILQ_EMPTY(&primary->sc_list_anchor)) { return outgoing_channel; } if (smp_pro_id >= MAXCPU) { return outgoing_channel; } cur_vcpu = VMBUS_PCPU_GET(primary->vmbus_sc, vcpuid, smp_pro_id); TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) { if (new_channel->state != HV_CHANNEL_OPENED_STATE){ continue; } if (new_channel->target_vcpu == cur_vcpu){ return new_channel; } old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ? (outgoing_channel->target_vcpu - cur_vcpu) : (cur_vcpu - outgoing_channel->target_vcpu)); new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ? (new_channel->target_vcpu - cur_vcpu) : (cur_vcpu - new_channel->target_vcpu)); if (old_cpu_distance < new_cpu_distance) { continue; } outgoing_channel = new_channel; } return(outgoing_channel); } struct hv_vmbus_channel ** vmbus_get_subchan(struct hv_vmbus_channel *pri_chan, int subchan_cnt) { struct hv_vmbus_channel **ret, *chan; int i; ret = malloc(subchan_cnt * sizeof(struct hv_vmbus_channel *), M_TEMP, M_WAITOK); mtx_lock(&pri_chan->sc_lock); while (pri_chan->subchan_cnt < subchan_cnt) mtx_sleep(pri_chan, &pri_chan->sc_lock, 0, "subch", 0); i = 0; TAILQ_FOREACH(chan, &pri_chan->sc_list_anchor, sc_list_entry) { /* TODO: refcnt chan */ ret[i] = chan; ++i; if (i == subchan_cnt) break; } KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d", pri_chan->subchan_cnt, subchan_cnt)); mtx_unlock(&pri_chan->sc_lock); return ret; } void vmbus_rel_subchan(struct hv_vmbus_channel **subchan, int subchan_cnt __unused) { free(subchan, M_TEMP); } void vmbus_drain_subchan(struct hv_vmbus_channel *pri_chan) { mtx_lock(&pri_chan->sc_lock); while (pri_chan->subchan_cnt > 0) mtx_sleep(pri_chan, &pri_chan->sc_lock, 0, "dsubch", 0); mtx_unlock(&pri_chan->sc_lock); } void vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg) { vmbus_chanmsg_proc_t msg_proc; uint32_t msg_type; msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type; if (msg_type >= HV_CHANNEL_MESSAGE_COUNT) { device_printf(sc->vmbus_dev, "unknown message type 0x%x\n", msg_type); return; } msg_proc = vmbus_chanmsg_process[msg_type]; if (msg_proc != NULL) msg_proc(sc, msg); } Index: head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h =================================================================== --- head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h (revision 302801) +++ head/sys/dev/hyperv/vmbus/hv_vmbus_priv.h (revision 302802) @@ -1,159 +1,159 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __HYPERV_PRIV_H__ #define __HYPERV_PRIV_H__ #include #include #include #include #include struct vmbus_softc; typedef struct { void* data; uint32_t length; } hv_vmbus_sg_buffer_list; typedef struct { uint32_t current_interrupt_mask; uint32_t current_read_index; uint32_t current_write_index; uint32_t bytes_avail_to_read; uint32_t bytes_avail_to_write; } hv_vmbus_ring_buffer_debug_info; typedef struct { uint32_t rel_id; hv_vmbus_channel_state state; - hv_guid interface_type; - hv_guid interface_instance; + struct hyperv_guid interface_type; + struct hyperv_guid interface_instance; uint32_t monitor_id; uint32_t server_monitor_pending; uint32_t server_monitor_latency; uint32_t server_monitor_connection_id; uint32_t client_monitor_pending; uint32_t client_monitor_latency; uint32_t client_monitor_connection_id; hv_vmbus_ring_buffer_debug_info inbound; hv_vmbus_ring_buffer_debug_info outbound; } hv_vmbus_channel_debug_info; /* * The format must be the same as hv_vm_data_gpa_direct */ typedef struct hv_vmbus_channel_packet_page_buffer { uint16_t type; uint16_t data_offset8; uint16_t length8; uint16_t flags; uint64_t transaction_id; uint32_t reserved; uint32_t range_count; hv_vmbus_page_buffer range[HV_MAX_PAGE_BUFFER_COUNT]; } __packed hv_vmbus_channel_packet_page_buffer; /* * The format must be the same as hv_vm_data_gpa_direct */ typedef struct hv_vmbus_channel_packet_multipage_buffer { uint16_t type; uint16_t data_offset8; uint16_t length8; uint16_t flags; uint64_t transaction_id; uint32_t reserved; uint32_t range_count; /* Always 1 in this case */ hv_vmbus_multipage_buffer range; } __packed hv_vmbus_channel_packet_multipage_buffer; /* * Private, VM Bus functions */ struct sysctl_ctx_list; struct sysctl_oid_list; void hv_ring_buffer_stat( struct sysctl_ctx_list *ctx, struct sysctl_oid_list *tree_node, hv_vmbus_ring_buffer_info *rbi, const char *desc); int hv_vmbus_ring_buffer_init( hv_vmbus_ring_buffer_info *ring_info, void *buffer, uint32_t buffer_len); void hv_ring_buffer_cleanup( hv_vmbus_ring_buffer_info *ring_info); int hv_ring_buffer_write( hv_vmbus_ring_buffer_info *ring_info, hv_vmbus_sg_buffer_list sg_buffers[], uint32_t sg_buff_count, boolean_t *need_sig); int hv_ring_buffer_peek( hv_vmbus_ring_buffer_info *ring_info, void *buffer, uint32_t buffer_len); int hv_ring_buffer_read( hv_vmbus_ring_buffer_info *ring_info, void *buffer, uint32_t buffer_len, uint32_t offset); uint32_t hv_vmbus_get_ring_buffer_interrupt_mask( hv_vmbus_ring_buffer_info *ring_info); void hv_vmbus_dump_ring_info( hv_vmbus_ring_buffer_info *ring_info, char *prefix); void hv_ring_buffer_read_begin( hv_vmbus_ring_buffer_info *ring_info); uint32_t hv_ring_buffer_read_end( hv_vmbus_ring_buffer_info *ring_info); void hv_vmbus_free_vmbus_channel(hv_vmbus_channel *channel); void hv_vmbus_release_unattached_channels( struct vmbus_softc *); int hv_vmbus_child_device_register( struct hv_vmbus_channel *chan); int hv_vmbus_child_device_unregister( struct hv_vmbus_channel *chan); #endif /* __HYPERV_PRIV_H__ */ Index: head/sys/dev/hyperv/vmbus/hyperv.c =================================================================== --- head/sys/dev/hyperv/vmbus/hyperv.c (revision 302801) +++ head/sys/dev/hyperv/vmbus/hyperv.c (revision 302802) @@ -1,317 +1,317 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * Implements low-level interactions with Hypver-V/Azure */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define HYPERV_FREEBSD_BUILD 0ULL #define HYPERV_FREEBSD_VERSION ((uint64_t)__FreeBSD_version) #define HYPERV_FREEBSD_OSID 0ULL #define MSR_HV_GUESTID_BUILD_FREEBSD \ (HYPERV_FREEBSD_BUILD & MSR_HV_GUESTID_BUILD_MASK) #define MSR_HV_GUESTID_VERSION_FREEBSD \ ((HYPERV_FREEBSD_VERSION << MSR_HV_GUESTID_VERSION_SHIFT) & \ MSR_HV_GUESTID_VERSION_MASK) #define MSR_HV_GUESTID_OSID_FREEBSD \ ((HYPERV_FREEBSD_OSID << MSR_HV_GUESTID_OSID_SHIFT) & \ MSR_HV_GUESTID_OSID_MASK) #define MSR_HV_GUESTID_FREEBSD \ (MSR_HV_GUESTID_BUILD_FREEBSD | \ MSR_HV_GUESTID_VERSION_FREEBSD | \ MSR_HV_GUESTID_OSID_FREEBSD | \ MSR_HV_GUESTID_OSTYPE_FREEBSD) struct hypercall_ctx { void *hc_addr; struct hyperv_dma hc_dma; }; static u_int hyperv_get_timecount(struct timecounter *tc); u_int hyperv_features; u_int hyperv_recommends; static u_int hyperv_pm_features; static u_int hyperv_features3; static struct timecounter hyperv_timecounter = { .tc_get_timecount = hyperv_get_timecount, .tc_poll_pps = NULL, .tc_counter_mask = 0xffffffff, .tc_frequency = HYPERV_TIMER_FREQ, .tc_name = "Hyper-V", .tc_quality = 2000, .tc_flags = 0, .tc_priv = NULL }; static struct hypercall_ctx hypercall_context; static u_int hyperv_get_timecount(struct timecounter *tc __unused) { return rdmsr(MSR_HV_TIME_REF_COUNT); } uint64_t hypercall_post_message(bus_addr_t msg_paddr) { return hypercall_md(hypercall_context.hc_addr, HYPERCALL_POST_MESSAGE, msg_paddr, 0); } uint64_t hypercall_signal_event(bus_addr_t monprm_paddr) { return hypercall_md(hypercall_context.hc_addr, HYPERCALL_SIGNAL_EVENT, monprm_paddr, 0); } int -hyperv_guid2str(const struct hv_guid *guid, char *buf, size_t sz) +hyperv_guid2str(const struct hyperv_guid *guid, char *buf, size_t sz) { - const uint8_t *d = guid->data; + const uint8_t *d = guid->hv_guid; return snprintf(buf, sz, "%02x%02x%02x%02x-" "%02x%02x-%02x%02x-%02x%02x-" "%02x%02x%02x%02x%02x%02x", d[3], d[2], d[1], d[0], d[5], d[4], d[7], d[6], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]); } static bool hyperv_identify(void) { u_int regs[4]; unsigned int maxleaf; if (vm_guest != VM_GUEST_HV) return (false); do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs); maxleaf = regs[0]; if (maxleaf < CPUID_LEAF_HV_LIMITS) return (false); do_cpuid(CPUID_LEAF_HV_INTERFACE, regs); if (regs[0] != CPUID_HV_IFACE_HYPERV) return (false); do_cpuid(CPUID_LEAF_HV_FEATURES, regs); if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0) { /* * Hyper-V w/o Hypercall is impossible; someone * is faking Hyper-V. */ return (false); } hyperv_features = regs[0]; hyperv_pm_features = regs[2]; hyperv_features3 = regs[3]; do_cpuid(CPUID_LEAF_HV_IDENTITY, regs); printf("Hyper-V Version: %d.%d.%d [SP%d]\n", regs[1] >> 16, regs[1] & 0xffff, regs[0], regs[2]); printf(" Features=0x%b\n", hyperv_features, "\020" "\001VPRUNTIME" /* MSR_HV_VP_RUNTIME */ "\002TMREFCNT" /* MSR_HV_TIME_REF_COUNT */ "\003SYNIC" /* MSRs for SynIC */ "\004SYNTM" /* MSRs for SynTimer */ "\005APIC" /* MSR_HV_{EOI,ICR,TPR} */ "\006HYPERCALL" /* MSR_HV_{GUEST_OS_ID,HYPERCALL} */ "\007VPINDEX" /* MSR_HV_VP_INDEX */ "\010RESET" /* MSR_HV_RESET */ "\011STATS" /* MSR_HV_STATS_ */ "\012REFTSC" /* MSR_HV_REFERENCE_TSC */ "\013IDLE" /* MSR_HV_GUEST_IDLE */ "\014TMFREQ" /* MSR_HV_{TSC,APIC}_FREQUENCY */ "\015DEBUG"); /* MSR_HV_SYNTH_DEBUG_ */ printf(" PM Features=0x%b [C%u]\n", (hyperv_pm_features & ~CPUPM_HV_CSTATE_MASK), "\020" "\005C3HPET", /* HPET is required for C3 state */ CPUPM_HV_CSTATE(hyperv_pm_features)); printf(" Features3=0x%b\n", hyperv_features3, "\020" "\001MWAIT" /* MWAIT */ "\002DEBUG" /* guest debug support */ "\003PERFMON" /* performance monitor */ "\004PCPUDPE" /* physical CPU dynamic partition event */ "\005XMMHC" /* hypercall input through XMM regs */ "\006IDLE" /* guest idle support */ "\007SLEEP" /* hypervisor sleep support */ "\010NUMA" /* NUMA distance query support */ "\011TMFREQ" /* timer frequency query (TSC, LAPIC) */ "\012SYNCMC" /* inject synthetic machine checks */ "\013CRASH" /* MSRs for guest crash */ "\014DEBUGMSR" /* MSRs for guest debug */ "\015NPIEP" /* NPIEP */ "\016HVDIS"); /* disabling hypervisor */ do_cpuid(CPUID_LEAF_HV_RECOMMENDS, regs); hyperv_recommends = regs[0]; if (bootverbose) printf(" Recommends: %08x %08x\n", regs[0], regs[1]); do_cpuid(CPUID_LEAF_HV_LIMITS, regs); if (bootverbose) { printf(" Limits: Vcpu:%d Lcpu:%d Int:%d\n", regs[0], regs[1], regs[2]); } if (maxleaf >= CPUID_LEAF_HV_HWFEATURES) { do_cpuid(CPUID_LEAF_HV_HWFEATURES, regs); if (bootverbose) { printf(" HW Features: %08x, AMD: %08x\n", regs[0], regs[3]); } } return (true); } static void hyperv_init(void *dummy __unused) { if (!hyperv_identify()) { /* Not Hyper-V; reset guest id to the generic one. */ if (vm_guest == VM_GUEST_HV) vm_guest = VM_GUEST_VM; return; } /* Set guest id */ wrmsr(MSR_HV_GUEST_OS_ID, MSR_HV_GUESTID_FREEBSD); if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT) { /* Register Hyper-V timecounter */ tc_init(&hyperv_timecounter); } } SYSINIT(hyperv_initialize, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, hyperv_init, NULL); static void hypercall_memfree(void) { hyperv_dmamem_free(&hypercall_context.hc_dma, hypercall_context.hc_addr); hypercall_context.hc_addr = NULL; } static void hypercall_create(void *arg __unused) { uint64_t hc, hc_orig; if (vm_guest != VM_GUEST_HV) return; hypercall_context.hc_addr = hyperv_dmamem_alloc(NULL, PAGE_SIZE, 0, PAGE_SIZE, &hypercall_context.hc_dma, BUS_DMA_WAITOK); if (hypercall_context.hc_addr == NULL) { printf("hyperv: Hypercall page allocation failed\n"); /* Can't perform any Hyper-V specific actions */ vm_guest = VM_GUEST_VM; return; } /* Get the 'reserved' bits, which requires preservation. */ hc_orig = rdmsr(MSR_HV_HYPERCALL); /* * Setup the Hypercall page. * * NOTE: 'reserved' bits MUST be preserved. */ hc = ((hypercall_context.hc_dma.hv_paddr >> PAGE_SHIFT) << MSR_HV_HYPERCALL_PGSHIFT) | (hc_orig & MSR_HV_HYPERCALL_RSVD_MASK) | MSR_HV_HYPERCALL_ENABLE; wrmsr(MSR_HV_HYPERCALL, hc); /* * Confirm that Hypercall page did get setup. */ hc = rdmsr(MSR_HV_HYPERCALL); if ((hc & MSR_HV_HYPERCALL_ENABLE) == 0) { printf("hyperv: Hypercall setup failed\n"); hypercall_memfree(); /* Can't perform any Hyper-V specific actions */ vm_guest = VM_GUEST_VM; return; } if (bootverbose) printf("hyperv: Hypercall created\n"); } SYSINIT(hypercall_ctor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_create, NULL); static void hypercall_destroy(void *arg __unused) { uint64_t hc; if (hypercall_context.hc_addr == NULL) return; /* Disable Hypercall */ hc = rdmsr(MSR_HV_HYPERCALL); wrmsr(MSR_HV_HYPERCALL, (hc & MSR_HV_HYPERCALL_RSVD_MASK)); hypercall_memfree(); if (bootverbose) printf("hyperv: Hypercall destroyed\n"); } SYSUNINIT(hypercall_dtor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_destroy, NULL); Index: head/sys/dev/hyperv/vmbus/vmbus.c =================================================================== --- head/sys/dev/hyperv/vmbus/vmbus.c (revision 302801) +++ head/sys/dev/hyperv/vmbus/vmbus.c (revision 302802) @@ -1,1303 +1,1304 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * VM Bus Driver Implementation */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "acpi_if.h" #include "vmbus_if.h" #define VMBUS_GPADL_START 0xe1e10 struct vmbus_msghc { struct hypercall_postmsg_in *mh_inprm; struct hypercall_postmsg_in mh_inprm_save; struct hyperv_dma mh_inprm_dma; struct vmbus_message *mh_resp; struct vmbus_message mh_resp0; }; struct vmbus_msghc_ctx { struct vmbus_msghc *mhc_free; struct mtx mhc_free_lock; uint32_t mhc_flags; struct vmbus_msghc *mhc_active; struct mtx mhc_active_lock; }; #define VMBUS_MSGHC_CTXF_DESTROY 0x0001 static int vmbus_init(struct vmbus_softc *); static int vmbus_connect(struct vmbus_softc *, uint32_t); static int vmbus_req_channels(struct vmbus_softc *sc); static void vmbus_disconnect(struct vmbus_softc *); static int vmbus_scan(struct vmbus_softc *); static void vmbus_scan_wait(struct vmbus_softc *); static void vmbus_scan_newdev(struct vmbus_softc *); static int vmbus_sysctl_version(SYSCTL_HANDLER_ARGS); static struct vmbus_msghc_ctx *vmbus_msghc_ctx_create(bus_dma_tag_t); static void vmbus_msghc_ctx_destroy( struct vmbus_msghc_ctx *); static void vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *); static struct vmbus_msghc *vmbus_msghc_alloc(bus_dma_tag_t); static void vmbus_msghc_free(struct vmbus_msghc *); static struct vmbus_msghc *vmbus_msghc_get1(struct vmbus_msghc_ctx *, uint32_t); struct vmbus_softc *vmbus_sc; extern inthand_t IDTVEC(vmbus_isr); static const uint32_t vmbus_version[] = { VMBUS_VERSION_WIN8_1, VMBUS_VERSION_WIN8, VMBUS_VERSION_WIN7, VMBUS_VERSION_WS2008 }; static struct vmbus_msghc * vmbus_msghc_alloc(bus_dma_tag_t parent_dtag) { struct vmbus_msghc *mh; mh = malloc(sizeof(*mh), M_DEVBUF, M_WAITOK | M_ZERO); mh->mh_inprm = hyperv_dmamem_alloc(parent_dtag, HYPERCALL_PARAM_ALIGN, 0, HYPERCALL_POSTMSGIN_SIZE, &mh->mh_inprm_dma, BUS_DMA_WAITOK); if (mh->mh_inprm == NULL) { free(mh, M_DEVBUF); return NULL; } return mh; } static void vmbus_msghc_free(struct vmbus_msghc *mh) { hyperv_dmamem_free(&mh->mh_inprm_dma, mh->mh_inprm); free(mh, M_DEVBUF); } static void vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *mhc) { KASSERT(mhc->mhc_active == NULL, ("still have active msg hypercall")); KASSERT(mhc->mhc_free == NULL, ("still have hypercall msg")); mtx_destroy(&mhc->mhc_free_lock); mtx_destroy(&mhc->mhc_active_lock); free(mhc, M_DEVBUF); } static struct vmbus_msghc_ctx * vmbus_msghc_ctx_create(bus_dma_tag_t parent_dtag) { struct vmbus_msghc_ctx *mhc; mhc = malloc(sizeof(*mhc), M_DEVBUF, M_WAITOK | M_ZERO); mtx_init(&mhc->mhc_free_lock, "vmbus msghc free", NULL, MTX_DEF); mtx_init(&mhc->mhc_active_lock, "vmbus msghc act", NULL, MTX_DEF); mhc->mhc_free = vmbus_msghc_alloc(parent_dtag); if (mhc->mhc_free == NULL) { vmbus_msghc_ctx_free(mhc); return NULL; } return mhc; } static struct vmbus_msghc * vmbus_msghc_get1(struct vmbus_msghc_ctx *mhc, uint32_t dtor_flag) { struct vmbus_msghc *mh; mtx_lock(&mhc->mhc_free_lock); while ((mhc->mhc_flags & dtor_flag) == 0 && mhc->mhc_free == NULL) { mtx_sleep(&mhc->mhc_free, &mhc->mhc_free_lock, 0, "gmsghc", 0); } if (mhc->mhc_flags & dtor_flag) { /* Being destroyed */ mh = NULL; } else { mh = mhc->mhc_free; KASSERT(mh != NULL, ("no free hypercall msg")); KASSERT(mh->mh_resp == NULL, ("hypercall msg has pending response")); mhc->mhc_free = NULL; } mtx_unlock(&mhc->mhc_free_lock); return mh; } void vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize) { struct hypercall_postmsg_in *inprm; if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX) panic("invalid data size %zu", dsize); inprm = mh->mh_inprm; memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE); inprm->hc_connid = VMBUS_CONNID_MESSAGE; inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL; inprm->hc_dsize = dsize; } struct vmbus_msghc * vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize) { struct vmbus_msghc *mh; if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX) panic("invalid data size %zu", dsize); mh = vmbus_msghc_get1(sc->vmbus_msg_hc, VMBUS_MSGHC_CTXF_DESTROY); if (mh == NULL) return NULL; vmbus_msghc_reset(mh, dsize); return mh; } void vmbus_msghc_put(struct vmbus_softc *sc, struct vmbus_msghc *mh) { struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc; KASSERT(mhc->mhc_active == NULL, ("msg hypercall is active")); mh->mh_resp = NULL; mtx_lock(&mhc->mhc_free_lock); KASSERT(mhc->mhc_free == NULL, ("has free hypercall msg")); mhc->mhc_free = mh; mtx_unlock(&mhc->mhc_free_lock); wakeup(&mhc->mhc_free); } void * vmbus_msghc_dataptr(struct vmbus_msghc *mh) { return mh->mh_inprm->hc_data; } static void vmbus_msghc_ctx_destroy(struct vmbus_msghc_ctx *mhc) { struct vmbus_msghc *mh; mtx_lock(&mhc->mhc_free_lock); mhc->mhc_flags |= VMBUS_MSGHC_CTXF_DESTROY; mtx_unlock(&mhc->mhc_free_lock); wakeup(&mhc->mhc_free); mh = vmbus_msghc_get1(mhc, 0); if (mh == NULL) panic("can't get msghc"); vmbus_msghc_free(mh); vmbus_msghc_ctx_free(mhc); } int vmbus_msghc_exec_noresult(struct vmbus_msghc *mh) { sbintime_t time = SBT_1MS; int i; /* * Save the input parameter so that we could restore the input * parameter if the Hypercall failed. * * XXX * Is this really necessary?! i.e. Will the Hypercall ever * overwrite the input parameter? */ memcpy(&mh->mh_inprm_save, mh->mh_inprm, HYPERCALL_POSTMSGIN_SIZE); /* * In order to cope with transient failures, e.g. insufficient * resources on host side, we retry the post message Hypercall * several times. 20 retries seem sufficient. */ #define HC_RETRY_MAX 20 for (i = 0; i < HC_RETRY_MAX; ++i) { uint64_t status; status = hypercall_post_message(mh->mh_inprm_dma.hv_paddr); if (status == HYPERCALL_STATUS_SUCCESS) return 0; pause_sbt("hcpmsg", time, 0, C_HARDCLOCK); if (time < SBT_1S * 2) time *= 2; /* Restore input parameter and try again */ memcpy(mh->mh_inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE); } #undef HC_RETRY_MAX return EIO; } int vmbus_msghc_exec(struct vmbus_softc *sc, struct vmbus_msghc *mh) { struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc; int error; KASSERT(mh->mh_resp == NULL, ("hypercall msg has pending response")); mtx_lock(&mhc->mhc_active_lock); KASSERT(mhc->mhc_active == NULL, ("pending active msg hypercall")); mhc->mhc_active = mh; mtx_unlock(&mhc->mhc_active_lock); error = vmbus_msghc_exec_noresult(mh); if (error) { mtx_lock(&mhc->mhc_active_lock); KASSERT(mhc->mhc_active == mh, ("msghc mismatch")); mhc->mhc_active = NULL; mtx_unlock(&mhc->mhc_active_lock); } return error; } const struct vmbus_message * vmbus_msghc_wait_result(struct vmbus_softc *sc, struct vmbus_msghc *mh) { struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc; mtx_lock(&mhc->mhc_active_lock); KASSERT(mhc->mhc_active == mh, ("msghc mismatch")); while (mh->mh_resp == NULL) { mtx_sleep(&mhc->mhc_active, &mhc->mhc_active_lock, 0, "wmsghc", 0); } mhc->mhc_active = NULL; mtx_unlock(&mhc->mhc_active_lock); return mh->mh_resp; } void vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg) { struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc; struct vmbus_msghc *mh; mtx_lock(&mhc->mhc_active_lock); mh = mhc->mhc_active; KASSERT(mh != NULL, ("no pending msg hypercall")); memcpy(&mh->mh_resp0, msg, sizeof(mh->mh_resp0)); mh->mh_resp = &mh->mh_resp0; mtx_unlock(&mhc->mhc_active_lock); wakeup(&mhc->mhc_active); } uint32_t vmbus_gpadl_alloc(struct vmbus_softc *sc) { return atomic_fetchadd_int(&sc->vmbus_gpadl, 1); } static int vmbus_connect(struct vmbus_softc *sc, uint32_t version) { struct vmbus_chanmsg_connect *req; const struct vmbus_message *msg; struct vmbus_msghc *mh; int error, done = 0; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) return ENXIO; req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT; req->chm_ver = version; req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr; req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr; req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr; error = vmbus_msghc_exec(sc, mh); if (error) { vmbus_msghc_put(sc, mh); return error; } msg = vmbus_msghc_wait_result(sc, mh); done = ((const struct vmbus_chanmsg_connect_resp *) msg->msg_data)->chm_done; vmbus_msghc_put(sc, mh); return (done ? 0 : EOPNOTSUPP); } static int vmbus_init(struct vmbus_softc *sc) { int i; for (i = 0; i < nitems(vmbus_version); ++i) { int error; error = vmbus_connect(sc, vmbus_version[i]); if (!error) { sc->vmbus_version = vmbus_version[i]; device_printf(sc->vmbus_dev, "version %u.%u\n", VMBUS_VERSION_MAJOR(sc->vmbus_version), VMBUS_VERSION_MINOR(sc->vmbus_version)); return 0; } } return ENXIO; } static void vmbus_disconnect(struct vmbus_softc *sc) { struct vmbus_chanmsg_disconnect *req; struct vmbus_msghc *mh; int error; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for disconnect\n"); return; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT; error = vmbus_msghc_exec_noresult(mh); vmbus_msghc_put(sc, mh); if (error) { device_printf(sc->vmbus_dev, "disconnect msg hypercall failed\n"); } } static int vmbus_req_channels(struct vmbus_softc *sc) { struct vmbus_chanmsg_chrequest *req; struct vmbus_msghc *mh; int error; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) return ENXIO; req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST; error = vmbus_msghc_exec_noresult(mh); vmbus_msghc_put(sc, mh); return error; } void vmbus_scan_newchan(struct vmbus_softc *sc) { mtx_lock(&sc->vmbus_scan_lock); if ((sc->vmbus_scan_chcnt & VMBUS_SCAN_CHCNT_DONE) == 0) sc->vmbus_scan_chcnt++; mtx_unlock(&sc->vmbus_scan_lock); } void vmbus_scan_done(struct vmbus_softc *sc) { mtx_lock(&sc->vmbus_scan_lock); sc->vmbus_scan_chcnt |= VMBUS_SCAN_CHCNT_DONE; mtx_unlock(&sc->vmbus_scan_lock); wakeup(&sc->vmbus_scan_chcnt); } static void vmbus_scan_newdev(struct vmbus_softc *sc) { mtx_lock(&sc->vmbus_scan_lock); sc->vmbus_scan_devcnt++; mtx_unlock(&sc->vmbus_scan_lock); wakeup(&sc->vmbus_scan_devcnt); } static void vmbus_scan_wait(struct vmbus_softc *sc) { uint32_t chancnt; mtx_lock(&sc->vmbus_scan_lock); while ((sc->vmbus_scan_chcnt & VMBUS_SCAN_CHCNT_DONE) == 0) { mtx_sleep(&sc->vmbus_scan_chcnt, &sc->vmbus_scan_lock, 0, "waitch", 0); } chancnt = sc->vmbus_scan_chcnt & ~VMBUS_SCAN_CHCNT_DONE; while (sc->vmbus_scan_devcnt != chancnt) { mtx_sleep(&sc->vmbus_scan_devcnt, &sc->vmbus_scan_lock, 0, "waitdev", 0); } mtx_unlock(&sc->vmbus_scan_lock); } static int vmbus_scan(struct vmbus_softc *sc) { int error; /* * Start vmbus scanning. */ error = vmbus_req_channels(sc); if (error) { device_printf(sc->vmbus_dev, "channel request failed: %d\n", error); return error; } /* * Wait for all devices are added to vmbus. */ vmbus_scan_wait(sc); /* * Identify, probe and attach. */ bus_generic_probe(sc->vmbus_dev); bus_generic_attach(sc->vmbus_dev); if (bootverbose) { device_printf(sc->vmbus_dev, "device scan, probe and attach " "done\n"); } return 0; } static void vmbus_msg_task(void *xsc, int pending __unused) { struct vmbus_softc *sc = xsc; volatile struct vmbus_message *msg; msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE; for (;;) { if (msg->msg_type == HYPERV_MSGTYPE_NONE) { /* No message */ break; } else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) { /* Channel message */ vmbus_chan_msgproc(sc, __DEVOLATILE(const struct vmbus_message *, msg)); } msg->msg_type = HYPERV_MSGTYPE_NONE; /* * Make sure the write to msg_type (i.e. set to * HYPERV_MSGTYPE_NONE) happens before we read the * msg_flags and EOMing. Otherwise, the EOMing will * not deliver any more messages since there is no * empty slot * * NOTE: * mb() is used here, since atomic_thread_fence_seq_cst() * will become compiler fence on UP kernel. */ mb(); if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) { /* * This will cause message queue rescan to possibly * deliver another msg from the hypervisor */ wrmsr(MSR_HV_EOM, 0); } } } static __inline int vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu) { volatile struct vmbus_message *msg; struct vmbus_message *msg_base; msg_base = VMBUS_PCPU_GET(sc, message, cpu); /* * Check event timer. * * TODO: move this to independent IDT vector. */ msg = msg_base + VMBUS_SINT_TIMER; if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) { msg->msg_type = HYPERV_MSGTYPE_NONE; vmbus_et_intr(frame); /* * Make sure the write to msg_type (i.e. set to * HYPERV_MSGTYPE_NONE) happens before we read the * msg_flags and EOMing. Otherwise, the EOMing will * not deliver any more messages since there is no * empty slot * * NOTE: * mb() is used here, since atomic_thread_fence_seq_cst() * will become compiler fence on UP kernel. */ mb(); if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) { /* * This will cause message queue rescan to possibly * deliver another msg from the hypervisor */ wrmsr(MSR_HV_EOM, 0); } } /* * Check events. Hot path for network and storage I/O data; high rate. * * NOTE: * As recommended by the Windows guest fellows, we check events before * checking messages. */ sc->vmbus_event_proc(sc, cpu); /* * Check messages. Mainly management stuffs; ultra low rate. */ msg = msg_base + VMBUS_SINT_MESSAGE; if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) { taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu), VMBUS_PCPU_PTR(sc, message_task, cpu)); } return (FILTER_HANDLED); } void vmbus_handle_intr(struct trapframe *trap_frame) { struct vmbus_softc *sc = vmbus_get_softc(); int cpu = curcpu; /* * Disable preemption. */ critical_enter(); /* * Do a little interrupt counting. */ (*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++; vmbus_handle_intr1(sc, trap_frame, cpu); /* * Enable preemption. */ critical_exit(); } static void vmbus_synic_setup(void *xsc) { struct vmbus_softc *sc = xsc; int cpu = curcpu; uint64_t val, orig; uint32_t sint; if (hyperv_features & CPUID_HV_MSR_VP_INDEX) { /* * Save virtual processor id. */ VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX); } else { /* * XXX * Virtual processoor id is only used by a pretty broken * channel selection code from storvsc. It's nothing * critical even if CPUID_HV_MSR_VP_INDEX is not set; keep * moving on. */ VMBUS_PCPU_GET(sc, vcpuid, cpu) = cpu; } /* * Setup the SynIC message. */ orig = rdmsr(MSR_HV_SIMP); val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) | ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) << MSR_HV_SIMP_PGSHIFT); wrmsr(MSR_HV_SIMP, val); /* * Setup the SynIC event flags. */ orig = rdmsr(MSR_HV_SIEFP); val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) | ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu) >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT); wrmsr(MSR_HV_SIEFP, val); /* * Configure and unmask SINT for message and event flags. */ sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE; orig = rdmsr(sint); val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI | (orig & MSR_HV_SINT_RSVD_MASK); wrmsr(sint, val); /* * Configure and unmask SINT for timer. */ sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER; orig = rdmsr(sint); val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI | (orig & MSR_HV_SINT_RSVD_MASK); wrmsr(sint, val); /* * All done; enable SynIC. */ orig = rdmsr(MSR_HV_SCONTROL); val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK); wrmsr(MSR_HV_SCONTROL, val); } static void vmbus_synic_teardown(void *arg) { uint64_t orig; uint32_t sint; /* * Disable SynIC. */ orig = rdmsr(MSR_HV_SCONTROL); wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK)); /* * Mask message and event flags SINT. */ sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE; orig = rdmsr(sint); wrmsr(sint, orig | MSR_HV_SINT_MASKED); /* * Mask timer SINT. */ sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER; orig = rdmsr(sint); wrmsr(sint, orig | MSR_HV_SINT_MASKED); /* * Teardown SynIC message. */ orig = rdmsr(MSR_HV_SIMP); wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK)); /* * Teardown SynIC event flags. */ orig = rdmsr(MSR_HV_SIEFP); wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK)); } static int vmbus_dma_alloc(struct vmbus_softc *sc) { bus_dma_tag_t parent_dtag; uint8_t *evtflags; int cpu; parent_dtag = bus_get_dma_tag(sc->vmbus_dev); CPU_FOREACH(cpu) { void *ptr; /* * Per-cpu messages and event flags. */ ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu), BUS_DMA_WAITOK | BUS_DMA_ZERO); if (ptr == NULL) return ENOMEM; VMBUS_PCPU_GET(sc, message, cpu) = ptr; ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu), BUS_DMA_WAITOK | BUS_DMA_ZERO); if (ptr == NULL) return ENOMEM; VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr; } evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (evtflags == NULL) return ENOMEM; sc->vmbus_rx_evtflags = (u_long *)evtflags; sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2)); sc->vmbus_evtflags = evtflags; sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (sc->vmbus_mnf1 == NULL) return ENOMEM; sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (sc->vmbus_mnf2 == NULL) return ENOMEM; return 0; } static void vmbus_dma_free(struct vmbus_softc *sc) { int cpu; if (sc->vmbus_evtflags != NULL) { hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags); sc->vmbus_evtflags = NULL; sc->vmbus_rx_evtflags = NULL; sc->vmbus_tx_evtflags = NULL; } if (sc->vmbus_mnf1 != NULL) { hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1); sc->vmbus_mnf1 = NULL; } if (sc->vmbus_mnf2 != NULL) { hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2); sc->vmbus_mnf2 = NULL; } CPU_FOREACH(cpu) { if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) { hyperv_dmamem_free( VMBUS_PCPU_PTR(sc, message_dma, cpu), VMBUS_PCPU_GET(sc, message, cpu)); VMBUS_PCPU_GET(sc, message, cpu) = NULL; } if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) { hyperv_dmamem_free( VMBUS_PCPU_PTR(sc, event_flags_dma, cpu), VMBUS_PCPU_GET(sc, event_flags, cpu)); VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL; } } } static int vmbus_intr_setup(struct vmbus_softc *sc) { int cpu; CPU_FOREACH(cpu) { char buf[MAXCOMLEN + 1]; cpuset_t cpu_mask; /* Allocate an interrupt counter for Hyper-V interrupt */ snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu); intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu)); /* * Setup taskqueue to handle events. Task will be per- * channel. */ VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast( "hyperv event", M_WAITOK, taskqueue_thread_enqueue, VMBUS_PCPU_PTR(sc, event_tq, cpu)); CPU_SETOF(cpu, &cpu_mask); taskqueue_start_threads_cpuset( VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET, &cpu_mask, "hvevent%d", cpu); /* * Setup tasks and taskqueues to handle messages. */ VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast( "hyperv msg", M_WAITOK, taskqueue_thread_enqueue, VMBUS_PCPU_PTR(sc, message_tq, cpu)); CPU_SETOF(cpu, &cpu_mask); taskqueue_start_threads_cpuset( VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask, "hvmsg%d", cpu); TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0, vmbus_msg_task, sc); } /* * All Hyper-V ISR required resources are setup, now let's find a * free IDT vector for Hyper-V ISR and set it up. */ sc->vmbus_idtvec = lapic_ipi_alloc(IDTVEC(vmbus_isr)); if (sc->vmbus_idtvec < 0) { device_printf(sc->vmbus_dev, "cannot find free IDT vector\n"); return ENXIO; } if(bootverbose) { device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n", sc->vmbus_idtvec); } return 0; } static void vmbus_intr_teardown(struct vmbus_softc *sc) { int cpu; if (sc->vmbus_idtvec >= 0) { lapic_ipi_free(sc->vmbus_idtvec); sc->vmbus_idtvec = -1; } CPU_FOREACH(cpu) { if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) { taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu)); VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL; } if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) { taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu), VMBUS_PCPU_PTR(sc, message_task, cpu)); taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu)); VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL; } } } static int vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result) { return (ENOENT); } static int vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen) { const struct hv_vmbus_channel *chan; char guidbuf[HYPERV_GUID_STRLEN]; chan = vmbus_get_channel(child); if (chan == NULL) { /* Event timer device, which does not belong to a channel */ return (0); } strlcat(buf, "classid=", buflen); hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf)); strlcat(buf, guidbuf, buflen); strlcat(buf, " deviceid=", buflen); hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf)); strlcat(buf, guidbuf, buflen); return (0); } int hv_vmbus_child_device_register(struct hv_vmbus_channel *chan) { struct vmbus_softc *sc = chan->vmbus_sc; device_t parent = sc->vmbus_dev; int error = 0; chan->ch_dev = device_add_child(parent, NULL, -1); if (chan->ch_dev == NULL) { device_printf(parent, "device_add_child for chan%u failed\n", chan->ch_id); error = ENXIO; goto done; } device_set_ivars(chan->ch_dev, chan); done: /* New device has been/should be added to vmbus. */ vmbus_scan_newdev(sc); return error; } int hv_vmbus_child_device_unregister(struct hv_vmbus_channel *chan) { int error; if (chan->ch_dev == NULL) { /* Failed to add a device. */ return 0; } /* * XXXKYS: Ensure that this is the opposite of * device_add_child() */ mtx_lock(&Giant); error = device_delete_child(chan->vmbus_sc->vmbus_dev, chan->ch_dev); mtx_unlock(&Giant); return error; } static int vmbus_sysctl_version(SYSCTL_HANDLER_ARGS) { struct vmbus_softc *sc = arg1; char verstr[16]; snprintf(verstr, sizeof(verstr), "%u.%u", VMBUS_VERSION_MAJOR(sc->vmbus_version), VMBUS_VERSION_MINOR(sc->vmbus_version)); return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); } static uint32_t vmbus_get_version_method(device_t bus, device_t dev) { struct vmbus_softc *sc = device_get_softc(bus); return sc->vmbus_version; } static int -vmbus_probe_guid_method(device_t bus, device_t dev, const struct hv_guid *guid) +vmbus_probe_guid_method(device_t bus, device_t dev, + const struct hyperv_guid *guid) { const struct hv_vmbus_channel *chan = vmbus_get_channel(dev); - if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hv_guid)) == 0) + if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0) return 0; return ENXIO; } static int vmbus_probe(device_t dev) { char *id[] = { "VMBUS", NULL }; if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL || device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV || (hyperv_features & CPUID_HV_MSR_SYNIC) == 0) return (ENXIO); device_set_desc(dev, "Hyper-V Vmbus"); return (BUS_PROBE_DEFAULT); } /** * @brief Main vmbus driver initialization routine. * * Here, we * - initialize the vmbus driver context * - setup various driver entry points * - invoke the vmbus hv main init routine * - get the irq resource * - invoke the vmbus to add the vmbus root device * - setup the vmbus root device * - retrieve the channel offers */ static int vmbus_doattach(struct vmbus_softc *sc) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; int ret; if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED) return (0); sc->vmbus_flags |= VMBUS_FLAG_ATTACHED; mtx_init(&sc->vmbus_scan_lock, "vmbus scan", NULL, MTX_DEF); sc->vmbus_gpadl = VMBUS_GPADL_START; mtx_init(&sc->vmbus_chlist_lock, "vmbus chlist", NULL, MTX_DEF); TAILQ_INIT(&sc->vmbus_chlist); sc->vmbus_chmap = malloc( sizeof(struct hv_vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF, M_WAITOK | M_ZERO); /* * Create context for "post message" Hypercalls */ sc->vmbus_msg_hc = vmbus_msghc_ctx_create( bus_get_dma_tag(sc->vmbus_dev)); if (sc->vmbus_msg_hc == NULL) { ret = ENXIO; goto cleanup; } /* * Allocate DMA stuffs. */ ret = vmbus_dma_alloc(sc); if (ret != 0) goto cleanup; /* * Setup interrupt. */ ret = vmbus_intr_setup(sc); if (ret != 0) goto cleanup; /* * Setup SynIC. */ if (bootverbose) device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started); smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc); sc->vmbus_flags |= VMBUS_FLAG_SYNIC; /* * Initialize vmbus, e.g. connect to Hypervisor. */ ret = vmbus_init(sc); if (ret != 0) goto cleanup; if (sc->vmbus_version == VMBUS_VERSION_WS2008 || sc->vmbus_version == VMBUS_VERSION_WIN7) sc->vmbus_event_proc = vmbus_event_proc_compat; else sc->vmbus_event_proc = vmbus_event_proc; ret = vmbus_scan(sc); if (ret != 0) goto cleanup; ctx = device_get_sysctl_ctx(sc->vmbus_dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev)); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, vmbus_sysctl_version, "A", "vmbus version"); return (ret); cleanup: vmbus_intr_teardown(sc); vmbus_dma_free(sc); if (sc->vmbus_msg_hc != NULL) { vmbus_msghc_ctx_destroy(sc->vmbus_msg_hc); sc->vmbus_msg_hc = NULL; } free(sc->vmbus_chmap, M_DEVBUF); mtx_destroy(&sc->vmbus_scan_lock); mtx_destroy(&sc->vmbus_chlist_lock); return (ret); } static void vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused) { } static int vmbus_attach(device_t dev) { vmbus_sc = device_get_softc(dev); vmbus_sc->vmbus_dev = dev; vmbus_sc->vmbus_idtvec = -1; /* * Event processing logic will be configured: * - After the vmbus protocol version negotiation. * - Before we request channel offers. */ vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy; #ifndef EARLY_AP_STARTUP /* * If the system has already booted and thread * scheduling is possible indicated by the global * cold set to zero, we just call the driver * initialization directly. */ if (!cold) #endif vmbus_doattach(vmbus_sc); return (0); } static void vmbus_sysinit(void *arg __unused) { struct vmbus_softc *sc = vmbus_get_softc(); if (vm_guest != VM_GUEST_HV || sc == NULL) return; #ifndef EARLY_AP_STARTUP /* * If the system has already booted and thread * scheduling is possible, as indicated by the * global cold set to zero, we just call the driver * initialization directly. */ if (!cold) #endif vmbus_doattach(sc); } static int vmbus_detach(device_t dev) { struct vmbus_softc *sc = device_get_softc(dev); hv_vmbus_release_unattached_channels(sc); vmbus_disconnect(sc); if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) { sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC; smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL); } vmbus_intr_teardown(sc); vmbus_dma_free(sc); if (sc->vmbus_msg_hc != NULL) { vmbus_msghc_ctx_destroy(sc->vmbus_msg_hc); sc->vmbus_msg_hc = NULL; } free(sc->vmbus_chmap, M_DEVBUF); mtx_destroy(&sc->vmbus_scan_lock); mtx_destroy(&sc->vmbus_chlist_lock); return (0); } static device_method_t vmbus_methods[] = { /* Device interface */ DEVMETHOD(device_probe, vmbus_probe), DEVMETHOD(device_attach, vmbus_attach), DEVMETHOD(device_detach, vmbus_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_add_child, bus_generic_add_child), DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_read_ivar, vmbus_read_ivar), DEVMETHOD(bus_child_pnpinfo_str, vmbus_child_pnpinfo_str), /* Vmbus interface */ DEVMETHOD(vmbus_get_version, vmbus_get_version_method), DEVMETHOD(vmbus_probe_guid, vmbus_probe_guid_method), DEVMETHOD_END }; static driver_t vmbus_driver = { "vmbus", vmbus_methods, sizeof(struct vmbus_softc) }; static devclass_t vmbus_devclass; DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL); MODULE_DEPEND(vmbus, acpi, 1, 1, 1); MODULE_VERSION(vmbus, 1); #ifndef EARLY_AP_STARTUP /* * NOTE: * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is * initialized. */ SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL); #endif Index: head/sys/dev/hyperv/vmbus/vmbus_if.m =================================================================== --- head/sys/dev/hyperv/vmbus/vmbus_if.m (revision 302801) +++ head/sys/dev/hyperv/vmbus/vmbus_if.m (revision 302802) @@ -1,47 +1,47 @@ #- # Copyright (c) 2016 Microsoft Corp. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice unmodified, this list of conditions, and the following # disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # $FreeBSD$ # #include #include INTERFACE vmbus; HEADER { - struct hv_guid; + struct hyperv_guid; }; METHOD uint32_t get_version { device_t bus; device_t dev; }; METHOD int probe_guid { device_t bus; device_t dev; - const struct hv_guid *guid; + const struct hyperv_guid *guid; };