Index: head/sys/dev/hyperv/include/hyperv.h =================================================================== --- head/sys/dev/hyperv/include/hyperv.h (revision 302886) +++ head/sys/dev/hyperv/include/hyperv.h (revision 302887) @@ -1,331 +1,323 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /** * HyperV definitions for messages that are sent between instances of the * Channel Management Library in separate partitions, or in some cases, * back to itself. */ #ifndef __HYPERV_H__ #define __HYPERV_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef uint8_t hv_bool_uint8_t; #define HV_S_OK 0x00000000 #define HV_E_FAIL 0x80004005 #define HV_ERROR_NOT_SUPPORTED 0x80070032 #define HV_ERROR_MACHINE_LOCKED 0x800704F7 /* * VMBUS version is 32 bit, upper 16 bit for major_number and lower * 16 bit for minor_number. * * 0.13 -- Windows Server 2008 * 1.1 -- Windows 7 * 2.4 -- Windows 8 * 3.0 -- Windows 8.1 */ #define VMBUS_VERSION_WS2008 ((0 << 16) | (13)) #define VMBUS_VERSION_WIN7 ((1 << 16) | (1)) #define VMBUS_VERSION_WIN8 ((2 << 16) | (4)) #define VMBUS_VERSION_WIN8_1 ((3 << 16) | (0)) #define VMBUS_VERSION_MAJOR(ver) (((uint32_t)(ver)) >> 16) #define VMBUS_VERSION_MINOR(ver) (((uint32_t)(ver)) & 0xffff) struct hyperv_guid { uint8_t hv_guid[16]; } __packed; #define HYPERV_GUID_STRLEN 40 int hyperv_guid2str(const struct hyperv_guid *, char *, size_t); #define HW_MACADDR_LEN 6 /* * Common defines for Hyper-V ICs */ #define HV_ICMSGTYPE_NEGOTIATE 0 #define HV_ICMSGTYPE_HEARTBEAT 1 #define HV_ICMSGTYPE_KVPEXCHANGE 2 #define HV_ICMSGTYPE_SHUTDOWN 3 #define HV_ICMSGTYPE_TIMESYNC 4 #define HV_ICMSGTYPE_VSS 5 #define HV_ICMSGHDRFLAG_TRANSACTION 1 #define HV_ICMSGHDRFLAG_REQUEST 2 #define HV_ICMSGHDRFLAG_RESPONSE 4 typedef struct hv_vmbus_pipe_hdr { uint32_t flags; uint32_t msgsize; } __packed hv_vmbus_pipe_hdr; typedef struct hv_vmbus_ic_version { uint16_t major; uint16_t minor; } __packed hv_vmbus_ic_version; typedef struct hv_vmbus_icmsg_hdr { hv_vmbus_ic_version icverframe; uint16_t icmsgtype; hv_vmbus_ic_version icvermsg; uint16_t icmsgsize; uint32_t status; uint8_t ictransaction_id; uint8_t icflags; uint8_t reserved[2]; } __packed hv_vmbus_icmsg_hdr; typedef struct hv_vmbus_icmsg_negotiate { uint16_t icframe_vercnt; uint16_t icmsg_vercnt; uint32_t reserved; hv_vmbus_ic_version icversion_data[1]; /* any size array */ } __packed hv_vmbus_icmsg_negotiate; typedef struct hv_vmbus_shutdown_msg_data { uint32_t reason_code; uint32_t timeout_seconds; uint32_t flags; uint8_t display_message[2048]; } __packed hv_vmbus_shutdown_msg_data; typedef struct hv_vmbus_heartbeat_msg_data { uint64_t seq_num; uint32_t reserved[8]; } __packed hv_vmbus_heartbeat_msg_data; typedef struct { /* * offset in bytes from the start of ring data below */ volatile uint32_t write_index; /* * offset in bytes from the start of ring data below */ volatile uint32_t read_index; /* * NOTE: The interrupt_mask field is used only for channels, but * vmbus connection also uses this data structure */ volatile uint32_t interrupt_mask; /* pad it to PAGE_SIZE so that data starts on a page */ uint8_t reserved[4084]; /* * WARNING: Ring data starts here * !!! DO NOT place any fields below this !!! */ uint8_t buffer[0]; /* doubles as interrupt mask */ } __packed hv_vmbus_ring_buffer; typedef struct { hv_vmbus_ring_buffer* ring_buffer; struct mtx ring_lock; uint32_t ring_data_size; /* ring_size */ } hv_vmbus_ring_buffer_info; typedef void (*vmbus_chan_callback_t)(void *); typedef struct hv_vmbus_channel { device_t ch_dev; struct vmbus_softc *vmbus_sc; uint32_t ch_flags; /* VMBUS_CHAN_FLAG_ */ uint32_t ch_id; /* channel id */ /* * These are based on the offer_msg.monitor_id. * Save it here for easy access. */ int ch_montrig_idx; /* MNF trig index */ uint32_t ch_montrig_mask;/* MNF trig mask */ /* * send to parent */ hv_vmbus_ring_buffer_info outbound; /* * receive from parent */ hv_vmbus_ring_buffer_info inbound; struct taskqueue *ch_tq; struct task ch_task; vmbus_chan_callback_t ch_cb; void *ch_cbarg; struct hyperv_mon_param *ch_monprm; struct hyperv_dma ch_monprm_dma; int ch_cpuid; /* owner cpu */ /* * Virtual cpuid for ch_cpuid; it is used to communicate cpuid * related information w/ Hyper-V. If MSR_HV_VP_INDEX does not * exist, ch_vcpuid will always be 0 for compatibility. */ uint32_t ch_vcpuid; /* * If this is a primary channel, ch_subchan* fields * contain sub-channels belonging to this primary * channel. */ struct mtx ch_subchan_lock; TAILQ_HEAD(, hv_vmbus_channel) ch_subchans; int ch_subchan_cnt; /* If this is a sub-channel */ TAILQ_ENTRY(hv_vmbus_channel) ch_sublink; /* sub-channel link */ struct hv_vmbus_channel *ch_prichan; /* owner primary chan */ /* * Driver private data */ void *hv_chan_priv1; void *hv_chan_priv2; void *hv_chan_priv3; void *ch_bufring; /* TX+RX bufrings */ struct hyperv_dma ch_bufring_dma; uint32_t ch_bufring_gpadl; struct task ch_detach_task; TAILQ_ENTRY(hv_vmbus_channel) ch_prilink; /* primary chan link */ uint32_t ch_subidx; /* subchan index */ volatile uint32_t ch_stflags; /* atomic-op */ /* VMBUS_CHAN_ST_ */ struct hyperv_guid ch_guid_type; struct hyperv_guid ch_guid_inst; struct sysctl_ctx_list ch_sysctl_ctx; } hv_vmbus_channel; #define VMBUS_CHAN_ISPRIMARY(chan) ((chan)->ch_subidx == 0) #define VMBUS_CHAN_FLAG_HASMNF 0x0001 /* * If this flag is set, this channel's interrupt will be masked in ISR, * and the RX bufring will be drained before this channel's interrupt is * unmasked. * * This flag is turned on by default. Drivers can turn it off according * to their own requirement. */ #define VMBUS_CHAN_FLAG_BATCHREAD 0x0002 #define VMBUS_CHAN_ST_OPENED_SHIFT 0 #define VMBUS_CHAN_ST_OPENED (1 << VMBUS_CHAN_ST_OPENED_SHIFT) static inline void hv_set_channel_read_state(hv_vmbus_channel* channel, boolean_t on) { if (!on) channel->ch_flags &= ~VMBUS_CHAN_FLAG_BATCHREAD; else channel->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD; } int hv_vmbus_channel_open( hv_vmbus_channel* channel, uint32_t send_ring_buffer_size, uint32_t recv_ring_buffer_size, void* user_data, uint32_t user_data_len, vmbus_chan_callback_t cb, void *cbarg); void hv_vmbus_channel_close(hv_vmbus_channel *channel); -int hv_vmbus_channel_establish_gpadl( - hv_vmbus_channel* channel, - /* must be phys and virt contiguous */ - void* contig_buffer, - /* page-size multiple */ - uint32_t size, - uint32_t* gpadl_handle); - int hv_vmbus_channel_teardown_gpdal( hv_vmbus_channel* channel, uint32_t gpadl_handle); int vmbus_chan_gpadl_connect(struct hv_vmbus_channel *chan, bus_addr_t paddr, int size, uint32_t *gpadl); struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary); void vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu); void vmbus_channel_cpu_rr(struct hv_vmbus_channel *chan); struct hv_vmbus_channel ** vmbus_get_subchan(struct hv_vmbus_channel *pri_chan, int subchan_cnt); void vmbus_rel_subchan(struct hv_vmbus_channel **subchan, int subchan_cnt); void vmbus_drain_subchan(struct hv_vmbus_channel *pri_chan); /** * @brief Get physical address from virtual */ static inline unsigned long hv_get_phys_addr(void *virt) { unsigned long ret; ret = (vtophys(virt) | ((vm_offset_t) virt & PAGE_MASK)); return (ret); } static __inline struct hv_vmbus_channel * vmbus_get_channel(device_t dev) { return device_get_ivars(dev); } #endif /* __HYPERV_H__ */ Index: head/sys/dev/hyperv/netvsc/hv_net_vsc.c =================================================================== --- head/sys/dev/hyperv/netvsc/hv_net_vsc.c (revision 302886) +++ head/sys/dev/hyperv/netvsc/hv_net_vsc.c (revision 302887) @@ -1,1031 +1,1044 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /** * HyperV vmbus network VSC (virtual services client) module * */ #include #include #include #include #include #include #include #include #include #include #include "hv_net_vsc.h" #include "hv_rndis.h" #include "hv_rndis_filter.h" /* priv1 and priv2 are consumed by the main driver */ #define hv_chan_rdbuf hv_chan_priv3 MALLOC_DEFINE(M_NETVSC, "netvsc", "Hyper-V netvsc driver"); /* * Forward declarations */ static void hv_nv_on_channel_callback(void *xchan); static int hv_nv_init_send_buffer_with_net_vsp(struct hn_softc *sc); static int hv_nv_init_rx_buffer_with_net_vsp(struct hn_softc *); static int hv_nv_destroy_send_buffer(netvsc_dev *net_dev); static int hv_nv_destroy_rx_buffer(netvsc_dev *net_dev); static int hv_nv_connect_to_vsp(struct hn_softc *sc); static void hv_nv_on_send_completion(netvsc_dev *net_dev, struct hv_vmbus_channel *, const struct vmbus_chanpkt_hdr *pkt); static void hv_nv_on_receive_completion(struct hv_vmbus_channel *chan, uint64_t tid, uint32_t status); static void hv_nv_on_receive(netvsc_dev *net_dev, struct hn_softc *sc, struct hv_vmbus_channel *chan, const struct vmbus_chanpkt_hdr *pkt); /* * */ static inline netvsc_dev * hv_nv_alloc_net_device(struct hn_softc *sc) { netvsc_dev *net_dev; net_dev = malloc(sizeof(netvsc_dev), M_NETVSC, M_WAITOK | M_ZERO); net_dev->sc = sc; net_dev->destroy = FALSE; sc->net_dev = net_dev; return (net_dev); } /* * XXX unnecessary; nuke it. */ static inline netvsc_dev * hv_nv_get_outbound_net_device(struct hn_softc *sc) { return sc->net_dev; } /* * XXX unnecessary; nuke it. */ static inline netvsc_dev * hv_nv_get_inbound_net_device(struct hn_softc *sc) { return sc->net_dev; } int hv_nv_get_next_send_section(netvsc_dev *net_dev) { unsigned long bitsmap_words = net_dev->bitsmap_words; unsigned long *bitsmap = net_dev->send_section_bitsmap; unsigned long idx; int ret = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; int i; for (i = 0; i < bitsmap_words; i++) { idx = ffsl(~bitsmap[i]); if (0 == idx) continue; idx--; KASSERT(i * BITS_PER_LONG + idx < net_dev->send_section_count, ("invalid i %d and idx %lu", i, idx)); if (atomic_testandset_long(&bitsmap[i], idx)) continue; ret = i * BITS_PER_LONG + idx; break; } return (ret); } /* * Net VSC initialize receive buffer with net VSP * * Net VSP: Network virtual services client, also known as the * Hyper-V extensible switch and the synthetic data path. */ static int hv_nv_init_rx_buffer_with_net_vsp(struct hn_softc *sc) { netvsc_dev *net_dev; nvsp_msg *init_pkt; int ret = 0; net_dev = hv_nv_get_outbound_net_device(sc); if (!net_dev) { return (ENODEV); } - net_dev->rx_buf = contigmalloc(net_dev->rx_buf_size, M_NETVSC, - M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); + net_dev->rx_buf = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), + PAGE_SIZE, 0, net_dev->rx_buf_size, &net_dev->rxbuf_dma, + BUS_DMA_WAITOK | BUS_DMA_ZERO); + if (net_dev->rx_buf == NULL) { + device_printf(sc->hn_dev, "allocate rxbuf failed\n"); + return ENOMEM; + } /* - * Establish the GPADL handle for this buffer on this channel. - * Note: This call uses the vmbus connection rather than the - * channel to establish the gpadl handle. - * GPADL: Guest physical address descriptor list. + * Connect the RXBUF GPADL to the primary channel. + * + * NOTE: + * Only primary channel has RXBUF connected to it. Sub-channels + * just share this RXBUF. */ - ret = hv_vmbus_channel_establish_gpadl( - sc->hn_prichan, net_dev->rx_buf, - net_dev->rx_buf_size, &net_dev->rx_buf_gpadl_handle); + ret = vmbus_chan_gpadl_connect(sc->hn_prichan, + net_dev->rxbuf_dma.hv_paddr, net_dev->rx_buf_size, + &net_dev->rx_buf_gpadl_handle); if (ret != 0) { + device_printf(sc->hn_dev, "rxbuf gpadl connect failed: %d\n", + ret); goto cleanup; } /* sema_wait(&ext->channel_init_sema); KYS CHECK */ /* Notify the NetVsp of the gpadl handle */ init_pkt = &net_dev->channel_init_packet; memset(init_pkt, 0, sizeof(nvsp_msg)); init_pkt->hdr.msg_type = nvsp_msg_1_type_send_rx_buf; init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle = net_dev->rx_buf_gpadl_handle; init_pkt->msgs.vers_1_msgs.send_rx_buf.id = NETVSC_RECEIVE_BUFFER_ID; /* Send the gpadl notification request */ ret = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt); if (ret != 0) { goto cleanup; } sema_wait(&net_dev->channel_init_sema); /* Check the response */ if (init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.status != nvsp_status_success) { ret = EINVAL; goto cleanup; } net_dev->rx_section_count = init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections; net_dev->rx_sections = malloc(net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section), M_NETVSC, M_WAITOK); memcpy(net_dev->rx_sections, init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections, net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section)); /* * For first release, there should only be 1 section that represents * the entire receive buffer */ if (net_dev->rx_section_count != 1 || net_dev->rx_sections->offset != 0) { ret = EINVAL; goto cleanup; } goto exit; cleanup: hv_nv_destroy_rx_buffer(net_dev); exit: return (ret); } /* * Net VSC initialize send buffer with net VSP */ static int hv_nv_init_send_buffer_with_net_vsp(struct hn_softc *sc) { netvsc_dev *net_dev; nvsp_msg *init_pkt; int ret = 0; net_dev = hv_nv_get_outbound_net_device(sc); if (!net_dev) { return (ENODEV); } - net_dev->send_buf = contigmalloc(net_dev->send_buf_size, M_NETVSC, - M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); + net_dev->send_buf = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), + PAGE_SIZE, 0, net_dev->send_buf_size, &net_dev->txbuf_dma, + BUS_DMA_WAITOK | BUS_DMA_ZERO); if (net_dev->send_buf == NULL) { - ret = ENOMEM; - goto cleanup; + device_printf(sc->hn_dev, "allocate chimney txbuf failed\n"); + return ENOMEM; } /* - * Establish the gpadl handle for this buffer on this channel. - * Note: This call uses the vmbus connection rather than the - * channel to establish the gpadl handle. + * Connect chimney sending buffer GPADL to the primary channel. + * + * NOTE: + * Only primary channel has chimney sending buffer connected to it. + * Sub-channels just share this chimney sending buffer. */ - ret = hv_vmbus_channel_establish_gpadl(sc->hn_prichan, - net_dev->send_buf, net_dev->send_buf_size, + ret = vmbus_chan_gpadl_connect(sc->hn_prichan, + net_dev->txbuf_dma.hv_paddr, net_dev->send_buf_size, &net_dev->send_buf_gpadl_handle); if (ret != 0) { + device_printf(sc->hn_dev, "chimney sending buffer gpadl " + "connect failed: %d\n", ret); goto cleanup; } /* Notify the NetVsp of the gpadl handle */ init_pkt = &net_dev->channel_init_packet; memset(init_pkt, 0, sizeof(nvsp_msg)); init_pkt->hdr.msg_type = nvsp_msg_1_type_send_send_buf; init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle = net_dev->send_buf_gpadl_handle; init_pkt->msgs.vers_1_msgs.send_rx_buf.id = NETVSC_SEND_BUFFER_ID; /* Send the gpadl notification request */ ret = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, init_pkt, sizeof(nvsp_msg), (uint64_t)init_pkt); if (ret != 0) { goto cleanup; } sema_wait(&net_dev->channel_init_sema); /* Check the response */ if (init_pkt->msgs.vers_1_msgs.send_send_buf_complete.status != nvsp_status_success) { ret = EINVAL; goto cleanup; } net_dev->send_section_size = init_pkt->msgs.vers_1_msgs.send_send_buf_complete.section_size; net_dev->send_section_count = net_dev->send_buf_size / net_dev->send_section_size; net_dev->bitsmap_words = howmany(net_dev->send_section_count, BITS_PER_LONG); net_dev->send_section_bitsmap = malloc(net_dev->bitsmap_words * sizeof(long), M_NETVSC, M_WAITOK | M_ZERO); goto exit; cleanup: hv_nv_destroy_send_buffer(net_dev); exit: return (ret); } /* * Net VSC destroy receive buffer */ static int hv_nv_destroy_rx_buffer(netvsc_dev *net_dev) { nvsp_msg *revoke_pkt; int ret = 0; /* * If we got a section count, it means we received a * send_rx_buf_complete msg * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore, * we need to send a revoke msg here */ if (net_dev->rx_section_count) { /* Send the revoke receive buffer */ revoke_pkt = &net_dev->revoke_packet; memset(revoke_pkt, 0, sizeof(nvsp_msg)); revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_rx_buf; revoke_pkt->msgs.vers_1_msgs.revoke_rx_buf.id = NETVSC_RECEIVE_BUFFER_ID; ret = vmbus_chan_send(net_dev->sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, 0, revoke_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)revoke_pkt); /* * If we failed here, we might as well return and have a leak * rather than continue and a bugchk */ if (ret != 0) { return (ret); } } /* Tear down the gpadl on the vsp end */ if (net_dev->rx_buf_gpadl_handle) { ret = hv_vmbus_channel_teardown_gpdal(net_dev->sc->hn_prichan, net_dev->rx_buf_gpadl_handle); /* * If we failed here, we might as well return and have a leak * rather than continue and a bugchk */ if (ret != 0) { return (ret); } net_dev->rx_buf_gpadl_handle = 0; } if (net_dev->rx_buf) { /* Free up the receive buffer */ - contigfree(net_dev->rx_buf, net_dev->rx_buf_size, M_NETVSC); + hyperv_dmamem_free(&net_dev->rxbuf_dma, net_dev->rx_buf); net_dev->rx_buf = NULL; } if (net_dev->rx_sections) { free(net_dev->rx_sections, M_NETVSC); net_dev->rx_sections = NULL; net_dev->rx_section_count = 0; } return (ret); } /* * Net VSC destroy send buffer */ static int hv_nv_destroy_send_buffer(netvsc_dev *net_dev) { nvsp_msg *revoke_pkt; int ret = 0; /* * If we got a section count, it means we received a * send_rx_buf_complete msg * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore, * we need to send a revoke msg here */ if (net_dev->send_section_size) { /* Send the revoke send buffer */ revoke_pkt = &net_dev->revoke_packet; memset(revoke_pkt, 0, sizeof(nvsp_msg)); revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_send_buf; revoke_pkt->msgs.vers_1_msgs.revoke_send_buf.id = NETVSC_SEND_BUFFER_ID; ret = vmbus_chan_send(net_dev->sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, 0, revoke_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)revoke_pkt); /* * If we failed here, we might as well return and have a leak * rather than continue and a bugchk */ if (ret != 0) { return (ret); } } /* Tear down the gpadl on the vsp end */ if (net_dev->send_buf_gpadl_handle) { ret = hv_vmbus_channel_teardown_gpdal(net_dev->sc->hn_prichan, net_dev->send_buf_gpadl_handle); /* * If we failed here, we might as well return and have a leak * rather than continue and a bugchk */ if (ret != 0) { return (ret); } net_dev->send_buf_gpadl_handle = 0; } if (net_dev->send_buf) { /* Free up the receive buffer */ - contigfree(net_dev->send_buf, net_dev->send_buf_size, M_NETVSC); + hyperv_dmamem_free(&net_dev->txbuf_dma, net_dev->send_buf); net_dev->send_buf = NULL; } if (net_dev->send_section_bitsmap) { free(net_dev->send_section_bitsmap, M_NETVSC); } return (ret); } /* * Attempt to negotiate the caller-specified NVSP version * * For NVSP v2, Server 2008 R2 does not set * init_pkt->msgs.init_msgs.init_compl.negotiated_prot_vers * to the negotiated version, so we cannot rely on that. */ static int hv_nv_negotiate_nvsp_protocol(struct hn_softc *sc, netvsc_dev *net_dev, uint32_t nvsp_ver) { nvsp_msg *init_pkt; int ret; init_pkt = &net_dev->channel_init_packet; memset(init_pkt, 0, sizeof(nvsp_msg)); init_pkt->hdr.msg_type = nvsp_msg_type_init; /* * Specify parameter as the only acceptable protocol version */ init_pkt->msgs.init_msgs.init.p1.protocol_version = nvsp_ver; init_pkt->msgs.init_msgs.init.protocol_version_2 = nvsp_ver; /* Send the init request */ ret = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt); if (ret != 0) return (-1); sema_wait(&net_dev->channel_init_sema); if (init_pkt->msgs.init_msgs.init_compl.status != nvsp_status_success) return (EINVAL); return (0); } /* * Send NDIS version 2 config packet containing MTU. * * Not valid for NDIS version 1. */ static int hv_nv_send_ndis_config(struct hn_softc *sc, uint32_t mtu) { netvsc_dev *net_dev; nvsp_msg *init_pkt; int ret; net_dev = hv_nv_get_outbound_net_device(sc); if (!net_dev) return (-ENODEV); /* * Set up configuration packet, write MTU * Indicate we are capable of handling VLAN tags */ init_pkt = &net_dev->channel_init_packet; memset(init_pkt, 0, sizeof(nvsp_msg)); init_pkt->hdr.msg_type = nvsp_msg_2_type_send_ndis_config; init_pkt->msgs.vers_2_msgs.send_ndis_config.mtu = mtu; init_pkt-> msgs.vers_2_msgs.send_ndis_config.capabilities.u1.u2.ieee8021q = 1; /* Send the configuration packet */ ret = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, 0, init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt); if (ret != 0) return (-EINVAL); return (0); } /* * Net VSC connect to VSP */ static int hv_nv_connect_to_vsp(struct hn_softc *sc) { netvsc_dev *net_dev; nvsp_msg *init_pkt; uint32_t ndis_version; uint32_t protocol_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 }; int i; int protocol_number = nitems(protocol_list); int ret = 0; device_t dev = sc->hn_dev; struct ifnet *ifp = sc->hn_ifp; net_dev = hv_nv_get_outbound_net_device(sc); /* * Negotiate the NVSP version. Try the latest NVSP first. */ for (i = protocol_number - 1; i >= 0; i--) { if (hv_nv_negotiate_nvsp_protocol(sc, net_dev, protocol_list[i]) == 0) { net_dev->nvsp_version = protocol_list[i]; if (bootverbose) device_printf(dev, "Netvsc: got version 0x%x\n", net_dev->nvsp_version); break; } } if (i < 0) { if (bootverbose) device_printf(dev, "failed to negotiate a valid " "protocol.\n"); return (EPROTO); } /* * Set the MTU if supported by this NVSP protocol version * This needs to be right after the NVSP init message per Haiyang */ if (net_dev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) ret = hv_nv_send_ndis_config(sc, ifp->if_mtu); /* * Send the NDIS version */ init_pkt = &net_dev->channel_init_packet; memset(init_pkt, 0, sizeof(nvsp_msg)); if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_4) { ndis_version = NDIS_VERSION_6_1; } else { ndis_version = NDIS_VERSION_6_30; } init_pkt->hdr.msg_type = nvsp_msg_1_type_send_ndis_vers; init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_major_vers = (ndis_version & 0xFFFF0000) >> 16; init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_minor_vers = ndis_version & 0xFFFF; /* Send the init request */ ret = vmbus_chan_send(sc->hn_prichan, VMBUS_CHANPKT_TYPE_INBAND, 0, init_pkt, sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt); if (ret != 0) { goto cleanup; } /* * TODO: BUGBUG - We have to wait for the above msg since the netvsp * uses KMCL which acknowledges packet (completion packet) * since our Vmbus always set the VMBUS_CHANPKT_FLAG_RC flag */ /* sema_wait(&NetVscChannel->channel_init_sema); */ /* Post the big receive buffer to NetVSP */ if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_2) net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; else net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE; ret = hv_nv_init_rx_buffer_with_net_vsp(sc); if (ret == 0) ret = hv_nv_init_send_buffer_with_net_vsp(sc); cleanup: return (ret); } /* * Net VSC disconnect from VSP */ static void hv_nv_disconnect_from_vsp(netvsc_dev *net_dev) { hv_nv_destroy_rx_buffer(net_dev); hv_nv_destroy_send_buffer(net_dev); } void hv_nv_subchan_attach(struct hv_vmbus_channel *chan) { chan->hv_chan_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK); hv_vmbus_channel_open(chan, NETVSC_DEVICE_RING_BUFFER_SIZE, NETVSC_DEVICE_RING_BUFFER_SIZE, NULL, 0, hv_nv_on_channel_callback, chan); } /* * Net VSC on device add * * Callback when the device belonging to this driver is added */ netvsc_dev * hv_nv_on_device_add(struct hn_softc *sc, void *additional_info) { struct hv_vmbus_channel *chan = sc->hn_prichan; netvsc_dev *net_dev; int ret = 0; net_dev = hv_nv_alloc_net_device(sc); if (net_dev == NULL) return NULL; /* Initialize the NetVSC channel extension */ sema_init(&net_dev->channel_init_sema, 0, "netdev_sema"); chan->hv_chan_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK); /* * Open the channel */ ret = hv_vmbus_channel_open(chan, NETVSC_DEVICE_RING_BUFFER_SIZE, NETVSC_DEVICE_RING_BUFFER_SIZE, NULL, 0, hv_nv_on_channel_callback, chan); if (ret != 0) { free(chan->hv_chan_rdbuf, M_NETVSC); goto cleanup; } /* * Connect with the NetVsp */ ret = hv_nv_connect_to_vsp(sc); if (ret != 0) goto close; return (net_dev); close: /* Now, we can close the channel safely */ free(chan->hv_chan_rdbuf, M_NETVSC); hv_vmbus_channel_close(chan); cleanup: /* * Free the packet buffers on the netvsc device packet queue. * Release other resources. */ sema_destroy(&net_dev->channel_init_sema); free(net_dev, M_NETVSC); return (NULL); } /* * Net VSC on device remove */ int hv_nv_on_device_remove(struct hn_softc *sc, boolean_t destroy_channel) { netvsc_dev *net_dev = sc->net_dev;; /* Stop outbound traffic ie sends and receives completions */ net_dev->destroy = TRUE; hv_nv_disconnect_from_vsp(net_dev); /* At this point, no one should be accessing net_dev except in here */ /* Now, we can close the channel safely */ free(sc->hn_prichan->hv_chan_rdbuf, M_NETVSC); hv_vmbus_channel_close(sc->hn_prichan); sema_destroy(&net_dev->channel_init_sema); free(net_dev, M_NETVSC); return (0); } /* * Net VSC on send completion */ static void hv_nv_on_send_completion(netvsc_dev *net_dev, struct hv_vmbus_channel *chan, const struct vmbus_chanpkt_hdr *pkt) { const nvsp_msg *nvsp_msg_pkt; netvsc_packet *net_vsc_pkt; nvsp_msg_pkt = VMBUS_CHANPKT_CONST_DATA(pkt); if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_type_init_complete || nvsp_msg_pkt->hdr.msg_type == nvsp_msg_1_type_send_rx_buf_complete || nvsp_msg_pkt->hdr.msg_type == nvsp_msg_1_type_send_send_buf_complete || nvsp_msg_pkt->hdr.msg_type == nvsp_msg5_type_subchannel) { /* Copy the response back */ memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt, sizeof(nvsp_msg)); sema_post(&net_dev->channel_init_sema); } else if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_1_type_send_rndis_pkt_complete) { /* Get the send context */ net_vsc_pkt = (netvsc_packet *)(unsigned long)pkt->cph_xactid; if (NULL != net_vsc_pkt) { if (net_vsc_pkt->send_buf_section_idx != NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) { u_long mask; int idx; idx = net_vsc_pkt->send_buf_section_idx / BITS_PER_LONG; KASSERT(idx < net_dev->bitsmap_words, ("invalid section index %u", net_vsc_pkt->send_buf_section_idx)); mask = 1UL << (net_vsc_pkt->send_buf_section_idx % BITS_PER_LONG); KASSERT(net_dev->send_section_bitsmap[idx] & mask, ("index bitmap 0x%lx, section index %u, " "bitmap idx %d, bitmask 0x%lx", net_dev->send_section_bitsmap[idx], net_vsc_pkt->send_buf_section_idx, idx, mask)); atomic_clear_long( &net_dev->send_section_bitsmap[idx], mask); } /* Notify the layer above us */ net_vsc_pkt->compl.send.on_send_completion(chan, net_vsc_pkt->compl.send.send_completion_context); } } } /* * Net VSC on send * Sends a packet on the specified Hyper-V device. * Returns 0 on success, non-zero on failure. */ int hv_nv_on_send(struct hv_vmbus_channel *chan, netvsc_packet *pkt) { nvsp_msg send_msg; int ret; send_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt; if (pkt->is_data_pkt) { /* 0 is RMC_DATA */ send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 0; } else { /* 1 is RMC_CONTROL */ send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 1; } send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_idx = pkt->send_buf_section_idx; send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size = pkt->send_buf_section_size; if (pkt->gpa_cnt) { ret = vmbus_chan_send_sglist(chan, pkt->gpa, pkt->gpa_cnt, &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt); } else { ret = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, &send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt); } return (ret); } /* * Net VSC on receive * * In the FreeBSD Hyper-V virtual world, this function deals exclusively * with virtual addresses. */ static void hv_nv_on_receive(netvsc_dev *net_dev, struct hn_softc *sc, struct hv_vmbus_channel *chan, const struct vmbus_chanpkt_hdr *pkthdr) { const struct vmbus_chanpkt_rxbuf *pkt; const nvsp_msg *nvsp_msg_pkt; netvsc_packet vsc_pkt; netvsc_packet *net_vsc_pkt = &vsc_pkt; device_t dev = sc->hn_dev; int count = 0; int i = 0; int status = nvsp_status_success; nvsp_msg_pkt = VMBUS_CHANPKT_CONST_DATA(pkthdr); /* Make sure this is a valid nvsp packet */ if (nvsp_msg_pkt->hdr.msg_type != nvsp_msg_1_type_send_rndis_pkt) { device_printf(dev, "packet hdr type %u is invalid!\n", nvsp_msg_pkt->hdr.msg_type); return; } pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr; if (pkt->cp_rxbuf_id != NETVSC_RECEIVE_BUFFER_ID) { device_printf(dev, "rxbuf_id %d is invalid!\n", pkt->cp_rxbuf_id); return; } count = pkt->cp_rxbuf_cnt; /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ for (i = 0; i < count; i++) { net_vsc_pkt->status = nvsp_status_success; net_vsc_pkt->data = ((uint8_t *)net_dev->rx_buf + pkt->cp_rxbuf[i].rb_ofs); net_vsc_pkt->tot_data_buf_len = pkt->cp_rxbuf[i].rb_len; hv_rf_on_receive(net_dev, chan, net_vsc_pkt); if (net_vsc_pkt->status != nvsp_status_success) { status = nvsp_status_failure; } } /* * Moved completion call back here so that all received * messages (not just data messages) will trigger a response * message back to the host. */ hv_nv_on_receive_completion(chan, pkt->cp_hdr.cph_xactid, status); } /* * Net VSC on receive completion * * Send a receive completion packet to RNDIS device (ie NetVsp) */ static void hv_nv_on_receive_completion(struct hv_vmbus_channel *chan, uint64_t tid, uint32_t status) { nvsp_msg rx_comp_msg; int retries = 0; int ret = 0; rx_comp_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt_complete; /* Pass in the status */ rx_comp_msg.msgs.vers_1_msgs.send_rndis_pkt_complete.status = status; retry_send_cmplt: /* Send the completion */ ret = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, 0, &rx_comp_msg, sizeof(nvsp_msg), tid); if (ret == 0) { /* success */ /* no-op */ } else if (ret == EAGAIN) { /* no more room... wait a bit and attempt to retry 3 times */ retries++; if (retries < 4) { DELAY(100); goto retry_send_cmplt; } } } /* * Net VSC receiving vRSS send table from VSP */ static void hv_nv_send_table(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt) { netvsc_dev *net_dev; const nvsp_msg *nvsp_msg_pkt; int i; uint32_t count; const uint32_t *table; net_dev = hv_nv_get_inbound_net_device(sc); if (!net_dev) return; nvsp_msg_pkt = VMBUS_CHANPKT_CONST_DATA(pkt); if (nvsp_msg_pkt->hdr.msg_type != nvsp_msg5_type_send_indirection_table) { printf("Netvsc: !Warning! receive msg type not " "send_indirection_table. type = %d\n", nvsp_msg_pkt->hdr.msg_type); return; } count = nvsp_msg_pkt->msgs.vers_5_msgs.send_table.count; if (count != VRSS_SEND_TABLE_SIZE) { printf("Netvsc: Received wrong send table size: %u\n", count); return; } table = (const uint32_t *) ((const uint8_t *)&nvsp_msg_pkt->msgs.vers_5_msgs.send_table + nvsp_msg_pkt->msgs.vers_5_msgs.send_table.offset); for (i = 0; i < count; i++) net_dev->vrss_send_table[i] = table[i]; } /* * Net VSC on channel callback */ static void hv_nv_on_channel_callback(void *xchan) { struct hv_vmbus_channel *chan = xchan; device_t dev = chan->ch_dev; struct hn_softc *sc = device_get_softc(dev); netvsc_dev *net_dev; void *buffer; int bufferlen = NETVSC_PACKET_SIZE; net_dev = hv_nv_get_inbound_net_device(sc); if (net_dev == NULL) return; buffer = chan->hv_chan_rdbuf; do { struct vmbus_chanpkt_hdr *pkt = buffer; uint32_t bytes_rxed; int ret; bytes_rxed = bufferlen; ret = vmbus_chan_recv_pkt(chan, pkt, &bytes_rxed); if (ret == 0) { if (bytes_rxed > 0) { switch (pkt->cph_type) { case VMBUS_CHANPKT_TYPE_COMP: hv_nv_on_send_completion(net_dev, chan, pkt); break; case VMBUS_CHANPKT_TYPE_RXBUF: hv_nv_on_receive(net_dev, sc, chan, pkt); break; case VMBUS_CHANPKT_TYPE_INBAND: hv_nv_send_table(sc, pkt); break; default: device_printf(dev, "unknown chan pkt %u\n", pkt->cph_type); break; } } } else if (ret == ENOBUFS) { /* Handle large packet */ if (bufferlen > NETVSC_PACKET_SIZE) { free(buffer, M_NETVSC); buffer = NULL; } /* alloc new buffer */ buffer = malloc(bytes_rxed, M_NETVSC, M_NOWAIT); if (buffer == NULL) { device_printf(dev, "hv_cb malloc buffer failed, len=%u\n", bytes_rxed); bufferlen = 0; break; } bufferlen = bytes_rxed; } else { /* No more packets */ break; } } while (1); if (bufferlen > NETVSC_PACKET_SIZE) free(buffer, M_NETVSC); hv_rf_channel_rollup(chan); } Index: head/sys/dev/hyperv/netvsc/hv_net_vsc.h =================================================================== --- head/sys/dev/hyperv/netvsc/hv_net_vsc.h (revision 302886) +++ head/sys/dev/hyperv/netvsc/hv_net_vsc.h (revision 302887) @@ -1,1273 +1,1276 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * HyperV vmbus (virtual machine bus) network VSC (virtual services client) * header file * * (Updated from unencumbered NvspProtocol.h) */ #ifndef __HV_NET_VSC_H__ #define __HV_NET_VSC_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #define HN_USE_TXDESC_BUFRING MALLOC_DECLARE(M_NETVSC); #define NVSP_INVALID_PROTOCOL_VERSION (0xFFFFFFFF) #define NVSP_PROTOCOL_VERSION_1 2 #define NVSP_PROTOCOL_VERSION_2 0x30002 #define NVSP_PROTOCOL_VERSION_4 0x40000 #define NVSP_PROTOCOL_VERSION_5 0x50000 #define NVSP_MIN_PROTOCOL_VERSION (NVSP_PROTOCOL_VERSION_1) #define NVSP_MAX_PROTOCOL_VERSION (NVSP_PROTOCOL_VERSION_2) #define NVSP_PROTOCOL_VERSION_CURRENT NVSP_PROTOCOL_VERSION_2 #define VERSION_4_OFFLOAD_SIZE 22 #define NVSP_OPERATIONAL_STATUS_OK (0x00000000) #define NVSP_OPERATIONAL_STATUS_DEGRADED (0x00000001) #define NVSP_OPERATIONAL_STATUS_NONRECOVERABLE (0x00000002) #define NVSP_OPERATIONAL_STATUS_NO_CONTACT (0x00000003) #define NVSP_OPERATIONAL_STATUS_LOST_COMMUNICATION (0x00000004) /* * Maximun number of transfer pages (packets) the VSP will use on a receive */ #define NVSP_MAX_PACKETS_PER_RECEIVE 375 /* vRSS stuff */ #define RNDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88 #define RNDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89 #define RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2 #define RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2 struct rndis_obj_header { uint8_t type; uint8_t rev; uint16_t size; } __packed; /* rndis_recv_scale_cap/cap_flag */ #define RNDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000 #define RNDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000 #define RNDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000 #define RNDIS_RSS_CAPS_USING_MSI_X 0x08000000 #define RNDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000 #define RNDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400 /* RNDIS_RECEIVE_SCALE_CAPABILITIES */ struct rndis_recv_scale_cap { struct rndis_obj_header hdr; uint32_t cap_flag; uint32_t num_int_msg; uint32_t num_recv_que; uint16_t num_indirect_tabent; } __packed; /* rndis_recv_scale_param flags */ #define RNDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001 #define RNDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002 #define RNDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004 #define RNDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008 #define RNDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010 /* Hash info bits */ #define RNDIS_HASH_FUNC_TOEPLITZ 0x00000001 #define RNDIS_HASH_IPV4 0x00000100 #define RNDIS_HASH_TCP_IPV4 0x00000200 #define RNDIS_HASH_IPV6 0x00000400 #define RNDIS_HASH_IPV6_EX 0x00000800 #define RNDIS_HASH_TCP_IPV6 0x00001000 #define RNDIS_HASH_TCP_IPV6_EX 0x00002000 #define RNDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4) #define RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40 #define ITAB_NUM 128 #define HASH_KEYLEN RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 /* RNDIS_RECEIVE_SCALE_PARAMETERS */ typedef struct rndis_recv_scale_param_ { struct rndis_obj_header hdr; /* Qualifies the rest of the information */ uint16_t flag; /* The base CPU number to do receive processing. not used */ uint16_t base_cpu_number; /* This describes the hash function and type being enabled */ uint32_t hashinfo; /* The size of indirection table array */ uint16_t indirect_tabsize; /* The offset of the indirection table from the beginning of this * structure */ uint32_t indirect_taboffset; /* The size of the hash secret key */ uint16_t hashkey_size; /* The offset of the secret key from the beginning of this structure */ uint32_t hashkey_offset; uint32_t processor_masks_offset; uint32_t num_processor_masks; uint32_t processor_masks_entry_size; } rndis_recv_scale_param; typedef enum nvsp_msg_type_ { nvsp_msg_type_none = 0, /* * Init Messages */ nvsp_msg_type_init = 1, nvsp_msg_type_init_complete = 2, nvsp_version_msg_start = 100, /* * Version 1 Messages */ nvsp_msg_1_type_send_ndis_vers = nvsp_version_msg_start, nvsp_msg_1_type_send_rx_buf, nvsp_msg_1_type_send_rx_buf_complete, nvsp_msg_1_type_revoke_rx_buf, nvsp_msg_1_type_send_send_buf, nvsp_msg_1_type_send_send_buf_complete, nvsp_msg_1_type_revoke_send_buf, nvsp_msg_1_type_send_rndis_pkt, nvsp_msg_1_type_send_rndis_pkt_complete, /* * Version 2 Messages */ nvsp_msg_2_type_send_chimney_delegated_buf, nvsp_msg_2_type_send_chimney_delegated_buf_complete, nvsp_msg_2_type_revoke_chimney_delegated_buf, nvsp_msg_2_type_resume_chimney_rx_indication, nvsp_msg_2_type_terminate_chimney, nvsp_msg_2_type_terminate_chimney_complete, nvsp_msg_2_type_indicate_chimney_event, nvsp_msg_2_type_send_chimney_packet, nvsp_msg_2_type_send_chimney_packet_complete, nvsp_msg_2_type_post_chimney_rx_request, nvsp_msg_2_type_post_chimney_rx_request_complete, nvsp_msg_2_type_alloc_rx_buf, nvsp_msg_2_type_alloc_rx_buf_complete, nvsp_msg_2_type_free_rx_buf, nvsp_msg_2_send_vmq_rndis_pkt, nvsp_msg_2_send_vmq_rndis_pkt_complete, nvsp_msg_2_type_send_ndis_config, nvsp_msg_2_type_alloc_chimney_handle, nvsp_msg_2_type_alloc_chimney_handle_complete, nvsp_msg2_max = nvsp_msg_2_type_alloc_chimney_handle_complete, /* * Version 4 Messages */ nvsp_msg4_type_send_vf_association, nvsp_msg4_type_switch_data_path, nvsp_msg4_type_uplink_connect_state_deprecated, nvsp_msg4_max = nvsp_msg4_type_uplink_connect_state_deprecated, /* * Version 5 Messages */ nvsp_msg5_type_oid_query_ex, nvsp_msg5_type_oid_query_ex_comp, nvsp_msg5_type_subchannel, nvsp_msg5_type_send_indirection_table, nvsp_msg5_max = nvsp_msg5_type_send_indirection_table, } nvsp_msg_type; typedef enum nvsp_status_ { nvsp_status_none = 0, nvsp_status_success, nvsp_status_failure, /* Deprecated */ nvsp_status_prot_vers_range_too_new, /* Deprecated */ nvsp_status_prot_vers_range_too_old, nvsp_status_invalid_rndis_pkt, nvsp_status_busy, nvsp_status_max, } nvsp_status; typedef struct nvsp_msg_hdr_ { uint32_t msg_type; } __packed nvsp_msg_hdr; /* * Init Messages */ /* * This message is used by the VSC to initialize the channel * after the channels has been opened. This message should * never include anything other then versioning (i.e. this * message will be the same for ever). * * Forever is a long time. The values have been redefined * in Win7 to indicate major and minor protocol version * number. */ typedef struct nvsp_msg_init_ { union { struct { uint16_t minor_protocol_version; uint16_t major_protocol_version; } s; /* Formerly min_protocol_version */ uint32_t protocol_version; } p1; /* Formerly max_protocol_version */ uint32_t protocol_version_2; } __packed nvsp_msg_init; /* * This message is used by the VSP to complete the initialization * of the channel. This message should never include anything other * then versioning (i.e. this message will be the same forever). */ typedef struct nvsp_msg_init_complete_ { /* Deprecated */ uint32_t negotiated_prot_vers; uint32_t max_mdl_chain_len; uint32_t status; } __packed nvsp_msg_init_complete; typedef union nvsp_msg_init_uber_ { nvsp_msg_init init; nvsp_msg_init_complete init_compl; } __packed nvsp_msg_init_uber; /* * Version 1 Messages */ /* * This message is used by the VSC to send the NDIS version * to the VSP. The VSP can use this information when handling * OIDs sent by the VSC. */ typedef struct nvsp_1_msg_send_ndis_version_ { uint32_t ndis_major_vers; /* Deprecated */ uint32_t ndis_minor_vers; } __packed nvsp_1_msg_send_ndis_version; /* * This message is used by the VSC to send a receive buffer * to the VSP. The VSP can then use the receive buffer to * send data to the VSC. */ typedef struct nvsp_1_msg_send_rx_buf_ { uint32_t gpadl_handle; uint16_t id; } __packed nvsp_1_msg_send_rx_buf; typedef struct nvsp_1_rx_buf_section_ { uint32_t offset; uint32_t sub_allocation_size; uint32_t num_sub_allocations; uint32_t end_offset; } __packed nvsp_1_rx_buf_section; /* * This message is used by the VSP to acknowledge a receive * buffer send by the VSC. This message must be sent by the * VSP before the VSP uses the receive buffer. */ typedef struct nvsp_1_msg_send_rx_buf_complete_ { uint32_t status; uint32_t num_sections; /* * The receive buffer is split into two parts, a large * suballocation section and a small suballocation * section. These sections are then suballocated by a * certain size. * * For example, the following break up of the receive * buffer has 6 large suballocations and 10 small * suballocations. * * | Large Section | | Small Section | * ------------------------------------------------------------ * | | | | | | | | | | | | | | | | | | * | | * LargeOffset SmallOffset */ nvsp_1_rx_buf_section sections[1]; } __packed nvsp_1_msg_send_rx_buf_complete; /* * This message is sent by the VSC to revoke the receive buffer. * After the VSP completes this transaction, the VSP should never * use the receive buffer again. */ typedef struct nvsp_1_msg_revoke_rx_buf_ { uint16_t id; } __packed nvsp_1_msg_revoke_rx_buf; /* * This message is used by the VSC to send a send buffer * to the VSP. The VSC can then use the send buffer to * send data to the VSP. */ typedef struct nvsp_1_msg_send_send_buf_ { uint32_t gpadl_handle; uint16_t id; } __packed nvsp_1_msg_send_send_buf; /* * This message is used by the VSP to acknowledge a send * buffer sent by the VSC. This message must be sent by the * VSP before the VSP uses the sent buffer. */ typedef struct nvsp_1_msg_send_send_buf_complete_ { uint32_t status; /* * The VSC gets to choose the size of the send buffer and * the VSP gets to choose the sections size of the buffer. * This was done to enable dynamic reconfigurations when * the cost of GPA-direct buffers decreases. */ uint32_t section_size; } __packed nvsp_1_msg_send_send_buf_complete; /* * This message is sent by the VSC to revoke the send buffer. * After the VSP completes this transaction, the vsp should never * use the send buffer again. */ typedef struct nvsp_1_msg_revoke_send_buf_ { uint16_t id; } __packed nvsp_1_msg_revoke_send_buf; /* * This message is used by both the VSP and the VSC to send * an RNDIS message to the opposite channel endpoint. */ typedef struct nvsp_1_msg_send_rndis_pkt_ { /* * This field is specified by RNIDS. They assume there's * two different channels of communication. However, * the Network VSP only has one. Therefore, the channel * travels with the RNDIS packet. */ uint32_t chan_type; /* * This field is used to send part or all of the data * through a send buffer. This values specifies an * index into the send buffer. If the index is * 0xFFFFFFFF, then the send buffer is not being used * and all of the data was sent through other VMBus * mechanisms. */ uint32_t send_buf_section_idx; uint32_t send_buf_section_size; } __packed nvsp_1_msg_send_rndis_pkt; /* * This message is used by both the VSP and the VSC to complete * a RNDIS message to the opposite channel endpoint. At this * point, the initiator of this message cannot use any resources * associated with the original RNDIS packet. */ typedef struct nvsp_1_msg_send_rndis_pkt_complete_ { uint32_t status; } __packed nvsp_1_msg_send_rndis_pkt_complete; /* * Version 2 Messages */ /* * This message is used by the VSC to send the NDIS version * to the VSP. The VSP can use this information when handling * OIDs sent by the VSC. */ typedef struct nvsp_2_netvsc_capabilities_ { union { uint64_t as_uint64; struct { uint64_t vmq : 1; uint64_t chimney : 1; uint64_t sriov : 1; uint64_t ieee8021q : 1; uint64_t correlationid : 1; uint64_t teaming : 1; } u2; } u1; } __packed nvsp_2_netvsc_capabilities; typedef struct nvsp_2_msg_send_ndis_config_ { uint32_t mtu; uint32_t reserved; nvsp_2_netvsc_capabilities capabilities; } __packed nvsp_2_msg_send_ndis_config; /* * NvspMessage2TypeSendChimneyDelegatedBuffer */ typedef struct nvsp_2_msg_send_chimney_buf_ { /* * On WIN7 beta, delegated_obj_max_size is defined as a uint32_t * Since WIN7 RC, it was split into two uint16_t. To have the same * struct layout, delegated_obj_max_size shall be the first field. */ uint16_t delegated_obj_max_size; /* * The revision # of chimney protocol used between NVSC and NVSP. * * This revision is NOT related to the chimney revision between * NDIS protocol and miniport drivers. */ uint16_t revision; uint32_t gpadl_handle; } __packed nvsp_2_msg_send_chimney_buf; /* Unsupported chimney revision 0 (only present in WIN7 beta) */ #define NVSP_CHIMNEY_REVISION_0 0 /* WIN7 Beta Chimney QFE */ #define NVSP_CHIMNEY_REVISION_1 1 /* The chimney revision since WIN7 RC */ #define NVSP_CHIMNEY_REVISION_2 2 /* * NvspMessage2TypeSendChimneyDelegatedBufferComplete */ typedef struct nvsp_2_msg_send_chimney_buf_complete_ { uint32_t status; /* * Maximum number outstanding sends and pre-posted receives. * * NVSC should not post more than SendQuota/ReceiveQuota packets. * Otherwise, it can block the non-chimney path for an indefinite * amount of time. * (since chimney sends/receives are affected by the remote peer). * * Note: NVSP enforces the quota restrictions on a per-VMBCHANNEL * basis. It doesn't enforce the restriction separately for chimney * send/receive. If NVSC doesn't voluntarily enforce "SendQuota", * it may kill its own network connectivity. */ uint32_t send_quota; uint32_t rx_quota; } __packed nvsp_2_msg_send_chimney_buf_complete; /* * NvspMessage2TypeRevokeChimneyDelegatedBuffer */ typedef struct nvsp_2_msg_revoke_chimney_buf_ { uint32_t gpadl_handle; } __packed nvsp_2_msg_revoke_chimney_buf; #define NVSP_CHIMNEY_OBJECT_TYPE_NEIGHBOR 0 #define NVSP_CHIMNEY_OBJECT_TYPE_PATH4 1 #define NVSP_CHIMNEY_OBJECT_TYPE_PATH6 2 #define NVSP_CHIMNEY_OBJECT_TYPE_TCP 3 /* * NvspMessage2TypeAllocateChimneyHandle */ typedef struct nvsp_2_msg_alloc_chimney_handle_ { uint64_t vsc_context; uint32_t object_type; } __packed nvsp_2_msg_alloc_chimney_handle; /* * NvspMessage2TypeAllocateChimneyHandleComplete */ typedef struct nvsp_2_msg_alloc_chimney_handle_complete_ { uint32_t vsp_handle; } __packed nvsp_2_msg_alloc_chimney_handle_complete; /* * NvspMessage2TypeResumeChimneyRXIndication */ typedef struct nvsp_2_msg_resume_chimney_rx_indication { /* * Handle identifying the offloaded connection */ uint32_t vsp_tcp_handle; } __packed nvsp_2_msg_resume_chimney_rx_indication; #define NVSP_2_MSG_TERMINATE_CHIMNEY_FLAGS_FIRST_STAGE (0x01u) #define NVSP_2_MSG_TERMINATE_CHIMNEY_FLAGS_RESERVED (~(0x01u)) /* * NvspMessage2TypeTerminateChimney */ typedef struct nvsp_2_msg_terminate_chimney_ { /* * Handle identifying the offloaded object */ uint32_t vsp_handle; /* * Terminate Offload Flags * Bit 0: * When set to 0, terminate the offload at the destination NIC * Bit 1-31: Reserved, shall be zero */ uint32_t flags; union { /* * This field is valid only when bit 0 of flags is clear. * It specifies the index into the premapped delegated * object buffer. The buffer was sent through the * NvspMessage2TypeSendChimneyDelegatedBuffer * message at initialization time. * * NVSP will write the delegated state into the delegated * buffer upon upload completion. */ uint32_t index; /* * This field is valid only when bit 0 of flags is set. * * The seqence number of the most recently accepted RX * indication when VSC sets its TCP context into * "terminating" state. * * This allows NVSP to determines if there are any in-flight * RX indications for which the acceptance state is still * undefined. */ uint64_t last_accepted_rx_seq_no; } f0; } __packed nvsp_2_msg_terminate_chimney; #define NVSP_TERMINATE_CHIMNEY_COMPLETE_FLAG_DATA_CORRUPTED 0x0000001u /* * NvspMessage2TypeTerminateChimneyComplete */ typedef struct nvsp_2_msg_terminate_chimney_complete_ { uint64_t vsc_context; uint32_t flags; } __packed nvsp_2_msg_terminate_chimney_complete; /* * NvspMessage2TypeIndicateChimneyEvent */ typedef struct nvsp_2_msg_indicate_chimney_event_ { /* * When VscTcpContext is 0, event_type is an NDIS_STATUS event code * Otherwise, EventType is an TCP connection event (defined in * NdisTcpOffloadEventHandler chimney DDK document). */ uint32_t event_type; /* * When VscTcpContext is 0, EventType is an NDIS_STATUS event code * Otherwise, EventType is an TCP connection event specific information * (defined in NdisTcpOffloadEventHandler chimney DDK document). */ uint32_t event_specific_info; /* * If not 0, the event is per-TCP connection event. This field * contains the VSC's TCP context. * If 0, the event indication is global. */ uint64_t vsc_tcp_context; } __packed nvsp_2_msg_indicate_chimney_event; #define NVSP_1_CHIMNEY_SEND_INVALID_OOB_INDEX 0xffffu #define NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX 0xffffffff /* * NvspMessage2TypeSendChimneyPacket */ typedef struct nvsp_2_msg_send_chimney_pkt_ { /* * Identify the TCP connection for which this chimney send is */ uint32_t vsp_tcp_handle; /* * This field is used to send part or all of the data * through a send buffer. This values specifies an * index into the send buffer. If the index is * 0xFFFF, then the send buffer is not being used * and all of the data was sent through other VMBus * mechanisms. */ uint16_t send_buf_section_index; uint16_t send_buf_section_size; /* * OOB Data Index * This an index to the OOB data buffer. If the index is 0xFFFFFFFF, * then there is no OOB data. * * This field shall be always 0xFFFFFFFF for now. It is reserved for * the future. */ uint16_t oob_data_index; /* * DisconnectFlags = 0 * Normal chimney send. See MiniportTcpOffloadSend for details. * * DisconnectFlags = TCP_DISCONNECT_GRACEFUL_CLOSE (0x01) * Graceful disconnect. See MiniportTcpOffloadDisconnect for details. * * DisconnectFlags = TCP_DISCONNECT_ABORTIVE_CLOSE (0x02) * Abortive disconnect. See MiniportTcpOffloadDisconnect for details. */ uint16_t disconnect_flags; uint32_t seq_no; } __packed nvsp_2_msg_send_chimney_pkt; /* * NvspMessage2TypeSendChimneyPacketComplete */ typedef struct nvsp_2_msg_send_chimney_pkt_complete_ { /* * The NDIS_STATUS for the chimney send */ uint32_t status; /* * Number of bytes that have been sent to the peer (and ACKed by the peer). */ uint32_t bytes_transferred; } __packed nvsp_2_msg_send_chimney_pkt_complete; #define NVSP_1_CHIMNEY_RECV_FLAG_NO_PUSH 0x0001u #define NVSP_1_CHIMNEY_RECV_INVALID_OOB_INDEX 0xffffu /* * NvspMessage2TypePostChimneyRecvRequest */ typedef struct nvsp_2_msg_post_chimney_rx_request_ { /* * Identify the TCP connection which this chimney receive request * is for. */ uint32_t vsp_tcp_handle; /* * OOB Data Index * This an index to the OOB data buffer. If the index is 0xFFFFFFFF, * then there is no OOB data. * * This field shall be always 0xFFFFFFFF for now. It is reserved for * the future. */ uint32_t oob_data_index; /* * Bit 0 * When it is set, this is a "no-push" receive. * When it is clear, this is a "push" receive. * * Bit 1-15: Reserved and shall be zero */ uint16_t flags; /* * For debugging and diagnoses purpose. * The SeqNo is per TCP connection and starts from 0. */ uint32_t seq_no; } __packed nvsp_2_msg_post_chimney_rx_request; /* * NvspMessage2TypePostChimneyRecvRequestComplete */ typedef struct nvsp_2_msg_post_chimney_rx_request_complete_ { /* * The NDIS_STATUS for the chimney send */ uint32_t status; /* * Number of bytes that have been sent to the peer (and ACKed by * the peer). */ uint32_t bytes_xferred; } __packed nvsp_2_msg_post_chimney_rx_request_complete; /* * NvspMessage2TypeAllocateReceiveBuffer */ typedef struct nvsp_2_msg_alloc_rx_buf_ { /* * Allocation ID to match the allocation request and response */ uint32_t allocation_id; /* * Length of the VM shared memory receive buffer that needs to * be allocated */ uint32_t length; } __packed nvsp_2_msg_alloc_rx_buf; /* * NvspMessage2TypeAllocateReceiveBufferComplete */ typedef struct nvsp_2_msg_alloc_rx_buf_complete_ { /* * The NDIS_STATUS code for buffer allocation */ uint32_t status; /* * Allocation ID from NVSP_2_MESSAGE_ALLOCATE_RECEIVE_BUFFER */ uint32_t allocation_id; /* * GPADL handle for the allocated receive buffer */ uint32_t gpadl_handle; /* * Receive buffer ID that is further used in * NvspMessage2SendVmqRndisPacket */ uint64_t rx_buf_id; } __packed nvsp_2_msg_alloc_rx_buf_complete; /* * NvspMessage2TypeFreeReceiveBuffer */ typedef struct nvsp_2_msg_free_rx_buf_ { /* * Receive buffer ID previous returned in * NvspMessage2TypeAllocateReceiveBufferComplete message */ uint64_t rx_buf_id; } __packed nvsp_2_msg_free_rx_buf; /* * This structure is used in defining the buffers in * NVSP_2_MESSAGE_SEND_VMQ_RNDIS_PACKET structure */ typedef struct nvsp_xfer_page_range_ { /* * Specifies the ID of the receive buffer that has the buffer. This * ID can be the general receive buffer ID specified in * NvspMessage1TypeSendReceiveBuffer or it can be the shared memory * receive buffer ID allocated by the VSC and specified in * NvspMessage2TypeAllocateReceiveBufferComplete message */ uint64_t xfer_page_set_id; /* * Number of bytes */ uint32_t byte_count; /* * Offset in bytes from the beginning of the buffer */ uint32_t byte_offset; } __packed nvsp_xfer_page_range; /* * NvspMessage2SendVmqRndisPacket */ typedef struct nvsp_2_msg_send_vmq_rndis_pkt_ { /* * This field is specified by RNIDS. They assume there's * two different channels of communication. However, * the Network VSP only has one. Therefore, the channel * travels with the RNDIS packet. It must be RMC_DATA */ uint32_t channel_type; /* * Only the Range element corresponding to the RNDIS header of * the first RNDIS message in the multiple RNDIS messages sent * in one NVSP message. Information about the data portions as well * as the subsequent RNDIS messages in the same NVSP message are * embedded in the RNDIS header itself */ nvsp_xfer_page_range range; } __packed nvsp_2_msg_send_vmq_rndis_pkt; /* * This message is used by the VSC to complete * a RNDIS VMQ message to the VSP. At this point, * the initiator of this message can use any resources * associated with the original RNDIS VMQ packet. */ typedef struct nvsp_2_msg_send_vmq_rndis_pkt_complete_ { uint32_t status; } __packed nvsp_2_msg_send_vmq_rndis_pkt_complete; /* * Version 5 messages */ enum nvsp_subchannel_operation { NVSP_SUBCHANNEL_NONE = 0, NVSP_SUBCHANNE_ALLOCATE, NVSP_SUBCHANNE_MAX }; typedef struct nvsp_5_subchannel_request_ { uint32_t op; uint32_t num_subchannels; } __packed nvsp_5_subchannel_request; typedef struct nvsp_5_subchannel_complete_ { uint32_t status; /* Actual number of subchannels allocated */ uint32_t num_subchannels; } __packed nvsp_5_subchannel_complete; typedef struct nvsp_5_send_indirect_table_ { /* The number of entries in the send indirection table */ uint32_t count; /* * The offset of the send indireciton table from top of * this struct. The send indirection table tells which channel * to put the send traffic on. Each entry is a channel number. */ uint32_t offset; } __packed nvsp_5_send_indirect_table; typedef union nvsp_1_msg_uber_ { nvsp_1_msg_send_ndis_version send_ndis_vers; nvsp_1_msg_send_rx_buf send_rx_buf; nvsp_1_msg_send_rx_buf_complete send_rx_buf_complete; nvsp_1_msg_revoke_rx_buf revoke_rx_buf; nvsp_1_msg_send_send_buf send_send_buf; nvsp_1_msg_send_send_buf_complete send_send_buf_complete; nvsp_1_msg_revoke_send_buf revoke_send_buf; nvsp_1_msg_send_rndis_pkt send_rndis_pkt; nvsp_1_msg_send_rndis_pkt_complete send_rndis_pkt_complete; } __packed nvsp_1_msg_uber; typedef union nvsp_2_msg_uber_ { nvsp_2_msg_send_ndis_config send_ndis_config; nvsp_2_msg_send_chimney_buf send_chimney_buf; nvsp_2_msg_send_chimney_buf_complete send_chimney_buf_complete; nvsp_2_msg_revoke_chimney_buf revoke_chimney_buf; nvsp_2_msg_resume_chimney_rx_indication resume_chimney_rx_indication; nvsp_2_msg_terminate_chimney terminate_chimney; nvsp_2_msg_terminate_chimney_complete terminate_chimney_complete; nvsp_2_msg_indicate_chimney_event indicate_chimney_event; nvsp_2_msg_send_chimney_pkt send_chimney_packet; nvsp_2_msg_send_chimney_pkt_complete send_chimney_packet_complete; nvsp_2_msg_post_chimney_rx_request post_chimney_rx_request; nvsp_2_msg_post_chimney_rx_request_complete post_chimney_rx_request_complete; nvsp_2_msg_alloc_rx_buf alloc_rx_buffer; nvsp_2_msg_alloc_rx_buf_complete alloc_rx_buffer_complete; nvsp_2_msg_free_rx_buf free_rx_buffer; nvsp_2_msg_send_vmq_rndis_pkt send_vmq_rndis_pkt; nvsp_2_msg_send_vmq_rndis_pkt_complete send_vmq_rndis_pkt_complete; nvsp_2_msg_alloc_chimney_handle alloc_chimney_handle; nvsp_2_msg_alloc_chimney_handle_complete alloc_chimney_handle_complete; } __packed nvsp_2_msg_uber; typedef union nvsp_5_msg_uber_ { nvsp_5_subchannel_request subchannel_request; nvsp_5_subchannel_complete subchn_complete; nvsp_5_send_indirect_table send_table; } __packed nvsp_5_msg_uber; typedef union nvsp_all_msgs_ { nvsp_msg_init_uber init_msgs; nvsp_1_msg_uber vers_1_msgs; nvsp_2_msg_uber vers_2_msgs; nvsp_5_msg_uber vers_5_msgs; } __packed nvsp_all_msgs; /* * ALL Messages */ typedef struct nvsp_msg_ { nvsp_msg_hdr hdr; nvsp_all_msgs msgs; } __packed nvsp_msg; /* * The following arguably belongs in a separate header file */ /* * Defines */ #define NETVSC_SEND_BUFFER_SIZE (1024*1024*15) /* 15M */ #define NETVSC_SEND_BUFFER_ID 0xface #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ #define NETVSC_RECEIVE_BUFFER_ID 0xcafe #define NETVSC_RECEIVE_SG_COUNT 1 /* Preallocated receive packets */ #define NETVSC_RECEIVE_PACKETLIST_COUNT 256 /* * Maximum MTU we permit to be configured for a netvsc interface. * When the code was developed, a max MTU of 12232 was tested and * proven to work. 9K is a reasonable maximum for an Ethernet. */ #define NETVSC_MAX_CONFIGURABLE_MTU (9 * 1024) #define NETVSC_PACKET_SIZE PAGE_SIZE #define VRSS_SEND_TABLE_SIZE 16 /* * Data types */ /* * Per netvsc channel-specific */ typedef struct netvsc_dev_ { struct hn_softc *sc; /* Send buffer allocated by us but manages by NetVSP */ void *send_buf; uint32_t send_buf_size; uint32_t send_buf_gpadl_handle; uint32_t send_section_size; uint32_t send_section_count; unsigned long bitsmap_words; unsigned long *send_section_bitsmap; /* Receive buffer allocated by us but managed by NetVSP */ void *rx_buf; uint32_t rx_buf_size; uint32_t rx_buf_gpadl_handle; uint32_t rx_section_count; nvsp_1_rx_buf_section *rx_sections; /* Used for NetVSP initialization protocol */ struct sema channel_init_sema; nvsp_msg channel_init_packet; nvsp_msg revoke_packet; /*uint8_t hw_mac_addr[HW_MACADDR_LEN];*/ /* Holds rndis device info */ void *extension; hv_bool_uint8_t destroy; /* Negotiated NVSP version */ uint32_t nvsp_version; uint32_t num_channel; + struct hyperv_dma rxbuf_dma; + struct hyperv_dma txbuf_dma; uint32_t vrss_send_table[VRSS_SEND_TABLE_SIZE]; } netvsc_dev; struct hv_vmbus_channel; typedef void (*pfn_on_send_rx_completion)(struct hv_vmbus_channel *, void *); #define NETVSC_DEVICE_RING_BUFFER_SIZE (128 * PAGE_SIZE) #define NETVSC_VLAN_PRIO_MASK 0xe000 #define NETVSC_VLAN_PRIO_SHIFT 13 #define NETVSC_VLAN_VID_MASK 0x0fff #define TYPE_IPV4 2 #define TYPE_IPV6 4 #define TYPE_TCP 2 #define TYPE_UDP 4 #define TRANSPORT_TYPE_NOT_IP 0 #define TRANSPORT_TYPE_IPV4_TCP ((TYPE_IPV4 << 16) | TYPE_TCP) #define TRANSPORT_TYPE_IPV4_UDP ((TYPE_IPV4 << 16) | TYPE_UDP) #define TRANSPORT_TYPE_IPV6_TCP ((TYPE_IPV6 << 16) | TYPE_TCP) #define TRANSPORT_TYPE_IPV6_UDP ((TYPE_IPV6 << 16) | TYPE_UDP) #ifdef __LP64__ #define BITS_PER_LONG 64 #else #define BITS_PER_LONG 32 #endif typedef struct netvsc_packet_ { hv_bool_uint8_t is_data_pkt; /* One byte */ uint16_t vlan_tci; uint32_t status; /* Completion */ union { struct { uint64_t rx_completion_tid; void *rx_completion_context; /* This is no longer used */ pfn_on_send_rx_completion on_rx_completion; } rx; struct { uint64_t send_completion_tid; void *send_completion_context; /* Still used in netvsc and filter code */ pfn_on_send_rx_completion on_send_completion; } send; } compl; uint32_t send_buf_section_idx; uint32_t send_buf_section_size; void *rndis_mesg; uint32_t tot_data_buf_len; void *data; uint32_t gpa_cnt; struct vmbus_gpa gpa[VMBUS_CHAN_SGLIST_MAX]; } netvsc_packet; typedef struct { uint8_t mac_addr[6]; /* Assumption unsigned long */ hv_bool_uint8_t link_state; } netvsc_device_info; #ifndef HN_USE_TXDESC_BUFRING struct hn_txdesc; SLIST_HEAD(hn_txdesc_list, hn_txdesc); #else struct buf_ring; #endif struct hn_rx_ring { struct ifnet *hn_ifp; int hn_rx_idx; /* Trust csum verification on host side */ int hn_trust_hcsum; /* HN_TRUST_HCSUM_ */ struct lro_ctrl hn_lro; u_long hn_csum_ip; u_long hn_csum_tcp; u_long hn_csum_udp; u_long hn_csum_trusted; u_long hn_lro_tried; u_long hn_small_pkts; u_long hn_pkts; u_long hn_rss_pkts; /* Rarely used stuffs */ struct sysctl_oid *hn_rx_sysctl_tree; int hn_rx_flags; } __aligned(CACHE_LINE_SIZE); #define HN_TRUST_HCSUM_IP 0x0001 #define HN_TRUST_HCSUM_TCP 0x0002 #define HN_TRUST_HCSUM_UDP 0x0004 #define HN_RX_FLAG_ATTACHED 0x1 struct hn_tx_ring { #ifndef HN_USE_TXDESC_BUFRING struct mtx hn_txlist_spin; struct hn_txdesc_list hn_txlist; #else struct buf_ring *hn_txdesc_br; #endif int hn_txdesc_cnt; int hn_txdesc_avail; u_short hn_has_txeof; u_short hn_txdone_cnt; int hn_sched_tx; void (*hn_txeof)(struct hn_tx_ring *); struct taskqueue *hn_tx_taskq; struct task hn_tx_task; struct task hn_txeof_task; struct buf_ring *hn_mbuf_br; int hn_oactive; int hn_tx_idx; struct mtx hn_tx_lock; struct hn_softc *hn_sc; struct hv_vmbus_channel *hn_chan; int hn_direct_tx_size; int hn_tx_chimney_size; bus_dma_tag_t hn_tx_data_dtag; uint64_t hn_csum_assist; u_long hn_no_txdescs; u_long hn_send_failed; u_long hn_txdma_failed; u_long hn_tx_collapsed; u_long hn_tx_chimney_tried; u_long hn_tx_chimney; u_long hn_pkts; /* Rarely used stuffs */ struct hn_txdesc *hn_txdesc; bus_dma_tag_t hn_tx_rndis_dtag; struct sysctl_oid *hn_tx_sysctl_tree; int hn_tx_flags; } __aligned(CACHE_LINE_SIZE); #define HN_TX_FLAG_ATTACHED 0x1 /* * Device-specific softc structure */ typedef struct hn_softc { struct ifnet *hn_ifp; struct ifmedia hn_media; device_t hn_dev; uint8_t hn_unit; int hn_carrier; int hn_if_flags; struct mtx hn_lock; int hn_initdone; /* See hv_netvsc_drv_freebsd.c for rules on how to use */ int temp_unusable; netvsc_dev *net_dev; struct hv_vmbus_channel *hn_prichan; int hn_rx_ring_cnt; int hn_rx_ring_inuse; struct hn_rx_ring *hn_rx_ring; int hn_tx_ring_cnt; int hn_tx_ring_inuse; struct hn_tx_ring *hn_tx_ring; int hn_cpu; int hn_tx_chimney_max; struct taskqueue *hn_tx_taskq; struct sysctl_oid *hn_tx_sysctl_tree; struct sysctl_oid *hn_rx_sysctl_tree; } hn_softc_t; /* * Externs */ extern int hv_promisc_mode; void netvsc_linkstatus_callback(struct hn_softc *sc, uint32_t status); netvsc_dev *hv_nv_on_device_add(struct hn_softc *sc, void *additional_info); int hv_nv_on_device_remove(struct hn_softc *sc, boolean_t destroy_channel); int hv_nv_on_send(struct hv_vmbus_channel *chan, netvsc_packet *pkt); int hv_nv_get_next_send_section(netvsc_dev *net_dev); void hv_nv_subchan_attach(struct hv_vmbus_channel *chan); #endif /* __HV_NET_VSC_H__ */ Index: head/sys/dev/hyperv/vmbus/hv_channel.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_channel.c (revision 302886) +++ head/sys/dev/hyperv/vmbus/hv_channel.c (revision 302887) @@ -1,1382 +1,1371 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void vmbus_chan_send_event(hv_vmbus_channel* channel); static void vmbus_chan_update_evtflagcnt(struct vmbus_softc *, const struct hv_vmbus_channel *); static void vmbus_chan_task(void *, int); static void vmbus_chan_task_nobatch(void *, int); static void vmbus_chan_detach_task(void *, int); static void vmbus_chan_msgproc_choffer(struct vmbus_softc *, const struct vmbus_message *); static void vmbus_chan_msgproc_chrescind(struct vmbus_softc *, const struct vmbus_message *); /* * Vmbus channel message processing. */ static const vmbus_chanmsg_proc_t vmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = { VMBUS_CHANMSG_PROC(CHOFFER, vmbus_chan_msgproc_choffer), VMBUS_CHANMSG_PROC(CHRESCIND, vmbus_chan_msgproc_chrescind), VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP), VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP), VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP) }; /** * @brief Trigger an event notification on the specified channel */ static void vmbus_chan_send_event(hv_vmbus_channel *channel) { struct vmbus_softc *sc = channel->vmbus_sc; uint32_t chanid = channel->ch_id; atomic_set_long(&sc->vmbus_tx_evtflags[chanid >> VMBUS_EVTFLAG_SHIFT], 1UL << (chanid & VMBUS_EVTFLAG_MASK)); if (channel->ch_flags & VMBUS_CHAN_FLAG_HASMNF) { atomic_set_int( &sc->vmbus_mnf2->mnf_trigs[channel->ch_montrig_idx].mt_pending, channel->ch_montrig_mask); } else { hypercall_signal_event(channel->ch_monprm_dma.hv_paddr); } } static int vmbus_channel_sysctl_monalloc(SYSCTL_HANDLER_ARGS) { struct hv_vmbus_channel *chan = arg1; int alloc = 0; if (chan->ch_flags & VMBUS_CHAN_FLAG_HASMNF) alloc = 1; return sysctl_handle_int(oidp, &alloc, 0, req); } static void vmbus_channel_sysctl_create(hv_vmbus_channel* channel) { device_t dev; struct sysctl_oid *devch_sysctl; struct sysctl_oid *devch_id_sysctl, *devch_sub_sysctl; struct sysctl_oid *devch_id_in_sysctl, *devch_id_out_sysctl; struct sysctl_ctx_list *ctx; uint32_t ch_id; uint16_t sub_ch_id; char name[16]; hv_vmbus_channel* primary_ch = channel->ch_prichan; if (primary_ch == NULL) { dev = channel->ch_dev; ch_id = channel->ch_id; } else { dev = primary_ch->ch_dev; ch_id = primary_ch->ch_id; sub_ch_id = channel->ch_subidx; } ctx = &channel->ch_sysctl_ctx; sysctl_ctx_init(ctx); /* This creates dev.DEVNAME.DEVUNIT.channel tree */ devch_sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); /* This creates dev.DEVNAME.DEVUNIT.channel.CHANID tree */ snprintf(name, sizeof(name), "%d", ch_id); devch_id_sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(devch_sysctl), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (primary_ch != NULL) { devch_sub_sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); snprintf(name, sizeof(name), "%d", sub_ch_id); devch_id_sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(devch_sub_sysctl), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO, "chanid", CTLFLAG_RD, &channel->ch_id, 0, "channel id"); } SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO, "cpu", CTLFLAG_RD, &channel->ch_cpuid, 0, "owner CPU id"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO, "monitor_allocated", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, channel, 0, vmbus_channel_sysctl_monalloc, "I", "is monitor allocated to this channel"); devch_id_in_sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO, "in", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); devch_id_out_sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO, "out", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); hv_ring_buffer_stat(ctx, SYSCTL_CHILDREN(devch_id_in_sysctl), &(channel->inbound), "inbound ring buffer stats"); hv_ring_buffer_stat(ctx, SYSCTL_CHILDREN(devch_id_out_sysctl), &(channel->outbound), "outbound ring buffer stats"); } /** * @brief Open the specified channel */ int hv_vmbus_channel_open( hv_vmbus_channel* new_channel, uint32_t send_ring_buffer_size, uint32_t recv_ring_buffer_size, void* user_data, uint32_t user_data_len, vmbus_chan_callback_t cb, void *cbarg) { struct vmbus_softc *sc = new_channel->vmbus_sc; const struct vmbus_chanmsg_chopen_resp *resp; const struct vmbus_message *msg; struct vmbus_chanmsg_chopen *req; struct vmbus_msghc *mh; uint32_t status; int ret = 0; uint8_t *br; if (user_data_len > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) { device_printf(sc->vmbus_dev, "invalid udata len %u for chan%u\n", user_data_len, new_channel->ch_id); return EINVAL; } KASSERT((send_ring_buffer_size & PAGE_MASK) == 0, ("send bufring size is not multiple page")); KASSERT((recv_ring_buffer_size & PAGE_MASK) == 0, ("recv bufring size is not multiple page")); if (atomic_testandset_int(&new_channel->ch_stflags, VMBUS_CHAN_ST_OPENED_SHIFT)) panic("double-open chan%u", new_channel->ch_id); new_channel->ch_cb = cb; new_channel->ch_cbarg = cbarg; vmbus_chan_update_evtflagcnt(sc, new_channel); new_channel->ch_tq = VMBUS_PCPU_GET(new_channel->vmbus_sc, event_tq, new_channel->ch_cpuid); if (new_channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) { TASK_INIT(&new_channel->ch_task, 0, vmbus_chan_task, new_channel); } else { TASK_INIT(&new_channel->ch_task, 0, vmbus_chan_task_nobatch, new_channel); } /* * Allocate the TX+RX bufrings. * XXX should use ch_dev dtag */ br = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev), PAGE_SIZE, 0, send_ring_buffer_size + recv_ring_buffer_size, &new_channel->ch_bufring_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (br == NULL) { device_printf(sc->vmbus_dev, "bufring allocation failed\n"); ret = ENOMEM; goto failed; } new_channel->ch_bufring = br; /* TX bufring comes first */ hv_vmbus_ring_buffer_init(&new_channel->outbound, br, send_ring_buffer_size); /* RX bufring immediately follows TX bufring */ hv_vmbus_ring_buffer_init(&new_channel->inbound, br + send_ring_buffer_size, recv_ring_buffer_size); /* Create sysctl tree for this channel */ vmbus_channel_sysctl_create(new_channel); /* * Connect the bufrings, both RX and TX, to this channel. */ ret = vmbus_chan_gpadl_connect(new_channel, new_channel->ch_bufring_dma.hv_paddr, send_ring_buffer_size + recv_ring_buffer_size, &new_channel->ch_bufring_gpadl); if (ret != 0) { device_printf(sc->vmbus_dev, "failed to connect bufring GPADL to chan%u\n", new_channel->ch_id); goto failed; } /* * Open channel w/ the bufring GPADL on the target CPU. */ mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for chopen(chan%u)\n", new_channel->ch_id); ret = ENXIO; goto failed; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN; req->chm_chanid = new_channel->ch_id; req->chm_openid = new_channel->ch_id; req->chm_gpadl = new_channel->ch_bufring_gpadl; req->chm_vcpuid = new_channel->ch_vcpuid; req->chm_rxbr_pgofs = send_ring_buffer_size >> PAGE_SHIFT; if (user_data_len) memcpy(req->chm_udata, user_data, user_data_len); ret = vmbus_msghc_exec(sc, mh); if (ret != 0) { device_printf(sc->vmbus_dev, "chopen(chan%u) msg hypercall exec failed: %d\n", new_channel->ch_id, ret); vmbus_msghc_put(sc, mh); goto failed; } msg = vmbus_msghc_wait_result(sc, mh); resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data; status = resp->chm_status; vmbus_msghc_put(sc, mh); if (status == 0) { if (bootverbose) { device_printf(sc->vmbus_dev, "chan%u opened\n", new_channel->ch_id); } return 0; } device_printf(sc->vmbus_dev, "failed to open chan%u\n", new_channel->ch_id); ret = ENXIO; failed: if (new_channel->ch_bufring_gpadl) { hv_vmbus_channel_teardown_gpdal(new_channel, new_channel->ch_bufring_gpadl); new_channel->ch_bufring_gpadl = 0; } if (new_channel->ch_bufring != NULL) { hyperv_dmamem_free(&new_channel->ch_bufring_dma, new_channel->ch_bufring); new_channel->ch_bufring = NULL; } atomic_clear_int(&new_channel->ch_stflags, VMBUS_CHAN_ST_OPENED); return ret; } -/** - * @brief Establish a GPADL for the specified buffer - */ -int -hv_vmbus_channel_establish_gpadl(struct hv_vmbus_channel *channel, - void *contig_buffer, uint32_t size, uint32_t *gpadl) -{ - return vmbus_chan_gpadl_connect(channel, - hv_get_phys_addr(contig_buffer), size, gpadl); -} - int vmbus_chan_gpadl_connect(struct hv_vmbus_channel *chan, bus_addr_t paddr, int size, uint32_t *gpadl0) { struct vmbus_softc *sc = chan->vmbus_sc; struct vmbus_msghc *mh; struct vmbus_chanmsg_gpadl_conn *req; const struct vmbus_message *msg; size_t reqsz; uint32_t gpadl, status; int page_count, range_len, i, cnt, error; uint64_t page_id; /* * Preliminary checks. */ KASSERT((size & PAGE_MASK) == 0, ("invalid GPA size %d, not multiple page size", size)); page_count = size >> PAGE_SHIFT; KASSERT((paddr & PAGE_MASK) == 0, ("GPA is not page aligned %jx", (uintmax_t)paddr)); page_id = paddr >> PAGE_SHIFT; range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]); /* * We don't support multiple GPA ranges. */ if (range_len > UINT16_MAX) { device_printf(sc->vmbus_dev, "GPA too large, %d pages\n", page_count); return EOPNOTSUPP; } /* * Allocate GPADL id. */ gpadl = vmbus_gpadl_alloc(sc); *gpadl0 = gpadl; /* * Connect this GPADL to the target channel. * * NOTE: * Since each message can only hold small set of page * addresses, several messages may be required to * complete the connection. */ if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX) cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX; else cnt = page_count; page_count -= cnt; reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn, chm_range.gpa_page[cnt]); mh = vmbus_msghc_get(sc, reqsz); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for gpadl->chan%u\n", chan->ch_id); return EIO; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN; req->chm_chanid = chan->ch_id; req->chm_gpadl = gpadl; req->chm_range_len = range_len; req->chm_range_cnt = 1; req->chm_range.gpa_len = size; req->chm_range.gpa_ofs = 0; for (i = 0; i < cnt; ++i) req->chm_range.gpa_page[i] = page_id++; error = vmbus_msghc_exec(sc, mh); if (error) { device_printf(sc->vmbus_dev, "gpadl->chan%u msg hypercall exec failed: %d\n", chan->ch_id, error); vmbus_msghc_put(sc, mh); return error; } while (page_count > 0) { struct vmbus_chanmsg_gpadl_subconn *subreq; if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX) cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX; else cnt = page_count; page_count -= cnt; reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn, chm_gpa_page[cnt]); vmbus_msghc_reset(mh, reqsz); subreq = vmbus_msghc_dataptr(mh); subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN; subreq->chm_gpadl = gpadl; for (i = 0; i < cnt; ++i) subreq->chm_gpa_page[i] = page_id++; vmbus_msghc_exec_noresult(mh); } KASSERT(page_count == 0, ("invalid page count %d", page_count)); msg = vmbus_msghc_wait_result(sc, mh); status = ((const struct vmbus_chanmsg_gpadl_connresp *) msg->msg_data)->chm_status; vmbus_msghc_put(sc, mh); if (status != 0) { device_printf(sc->vmbus_dev, "gpadl->chan%u failed: " "status %u\n", chan->ch_id, status); return EIO; } else { if (bootverbose) { device_printf(sc->vmbus_dev, "gpadl->chan%u " "succeeded\n", chan->ch_id); } } return 0; } /* * Disconnect the GPA from the target channel */ int hv_vmbus_channel_teardown_gpdal(struct hv_vmbus_channel *chan, uint32_t gpadl) { struct vmbus_softc *sc = chan->vmbus_sc; struct vmbus_msghc *mh; struct vmbus_chanmsg_gpadl_disconn *req; int error; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for gpa x->chan%u\n", chan->ch_id); return EBUSY; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN; req->chm_chanid = chan->ch_id; req->chm_gpadl = gpadl; error = vmbus_msghc_exec(sc, mh); if (error) { device_printf(sc->vmbus_dev, "gpa x->chan%u msg hypercall exec failed: %d\n", chan->ch_id, error); vmbus_msghc_put(sc, mh); return error; } vmbus_msghc_wait_result(sc, mh); /* Discard result; no useful information */ vmbus_msghc_put(sc, mh); return 0; } static void hv_vmbus_channel_close_internal(hv_vmbus_channel *channel) { struct vmbus_softc *sc = channel->vmbus_sc; struct vmbus_msghc *mh; struct vmbus_chanmsg_chclose *req; struct taskqueue *tq = channel->ch_tq; int error; /* TODO: stringent check */ atomic_clear_int(&channel->ch_stflags, VMBUS_CHAN_ST_OPENED); sysctl_ctx_free(&channel->ch_sysctl_ctx); /* * Set ch_tq to NULL to avoid more requests be scheduled */ channel->ch_tq = NULL; taskqueue_drain(tq, &channel->ch_task); channel->ch_cb = NULL; /** * Send a closing message */ mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for chclose(chan%u)\n", channel->ch_id); return; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE; req->chm_chanid = channel->ch_id; error = vmbus_msghc_exec_noresult(mh); vmbus_msghc_put(sc, mh); if (error) { device_printf(sc->vmbus_dev, "chclose(chan%u) msg hypercall exec failed: %d\n", channel->ch_id, error); return; } else if (bootverbose) { device_printf(sc->vmbus_dev, "close chan%u\n", channel->ch_id); } /* Tear down the gpadl for the channel's ring buffer */ if (channel->ch_bufring_gpadl) { hv_vmbus_channel_teardown_gpdal(channel, channel->ch_bufring_gpadl); channel->ch_bufring_gpadl = 0; } /* TODO: Send a msg to release the childRelId */ /* cleanup the ring buffers for this channel */ hv_ring_buffer_cleanup(&channel->outbound); hv_ring_buffer_cleanup(&channel->inbound); if (channel->ch_bufring != NULL) { hyperv_dmamem_free(&channel->ch_bufring_dma, channel->ch_bufring); channel->ch_bufring = NULL; } } /* * Caller should make sure that all sub-channels have * been added to 'chan' and all to-be-closed channels * are not being opened. */ void hv_vmbus_channel_close(struct hv_vmbus_channel *chan) { int subchan_cnt; if (!VMBUS_CHAN_ISPRIMARY(chan)) { /* * Sub-channel is closed when its primary channel * is closed; done. */ return; } /* * Close all sub-channels, if any. */ subchan_cnt = chan->ch_subchan_cnt; if (subchan_cnt > 0) { struct hv_vmbus_channel **subchan; int i; subchan = vmbus_get_subchan(chan, subchan_cnt); for (i = 0; i < subchan_cnt; ++i) hv_vmbus_channel_close_internal(subchan[i]); vmbus_rel_subchan(subchan, subchan_cnt); } /* Then close the primary channel. */ hv_vmbus_channel_close_internal(chan); } int vmbus_chan_send(struct hv_vmbus_channel *chan, uint16_t type, uint16_t flags, void *data, int dlen, uint64_t xactid) { struct vmbus_chanpkt pkt; int pktlen, pad_pktlen, hlen, error; uint64_t pad = 0; struct iovec iov[3]; boolean_t send_evt; hlen = sizeof(pkt); pktlen = hlen + dlen; pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen); pkt.cp_hdr.cph_type = type; pkt.cp_hdr.cph_flags = flags; VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen); VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen); pkt.cp_hdr.cph_xactid = xactid; iov[0].iov_base = &pkt; iov[0].iov_len = hlen; iov[1].iov_base = data; iov[1].iov_len = dlen; iov[2].iov_base = &pad; iov[2].iov_len = pad_pktlen - pktlen; error = hv_ring_buffer_write(&chan->outbound, iov, 3, &send_evt); if (!error && send_evt) vmbus_chan_send_event(chan); return error; } int vmbus_chan_send_sglist(struct hv_vmbus_channel *chan, struct vmbus_gpa sg[], int sglen, void *data, int dlen, uint64_t xactid) { struct vmbus_chanpkt_sglist pkt; int pktlen, pad_pktlen, hlen, error; struct iovec iov[4]; boolean_t send_evt; uint64_t pad = 0; KASSERT(sglen < VMBUS_CHAN_SGLIST_MAX, ("invalid sglist len %d", sglen)); hlen = __offsetof(struct vmbus_chanpkt_sglist, cp_gpa[sglen]); pktlen = hlen + dlen; pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen); pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA; pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC; VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen); VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen); pkt.cp_hdr.cph_xactid = xactid; pkt.cp_rsvd = 0; pkt.cp_gpa_cnt = sglen; iov[0].iov_base = &pkt; iov[0].iov_len = sizeof(pkt); iov[1].iov_base = sg; iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen; iov[2].iov_base = data; iov[2].iov_len = dlen; iov[3].iov_base = &pad; iov[3].iov_len = pad_pktlen - pktlen; error = hv_ring_buffer_write(&chan->outbound, iov, 4, &send_evt); if (!error && send_evt) vmbus_chan_send_event(chan); return error; } int vmbus_chan_send_prplist(struct hv_vmbus_channel *chan, struct vmbus_gpa_range *prp, int prp_cnt, void *data, int dlen, uint64_t xactid) { struct vmbus_chanpkt_prplist pkt; int pktlen, pad_pktlen, hlen, error; struct iovec iov[4]; boolean_t send_evt; uint64_t pad = 0; KASSERT(prp_cnt < VMBUS_CHAN_PRPLIST_MAX, ("invalid prplist entry count %d", prp_cnt)); hlen = __offsetof(struct vmbus_chanpkt_prplist, cp_range[0].gpa_page[prp_cnt]); pktlen = hlen + dlen; pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen); pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA; pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC; VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen); VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen); pkt.cp_hdr.cph_xactid = xactid; pkt.cp_rsvd = 0; pkt.cp_range_cnt = 1; iov[0].iov_base = &pkt; iov[0].iov_len = sizeof(pkt); iov[1].iov_base = prp; iov[1].iov_len = __offsetof(struct vmbus_gpa_range, gpa_page[prp_cnt]); iov[2].iov_base = data; iov[2].iov_len = dlen; iov[3].iov_base = &pad; iov[3].iov_len = pad_pktlen - pktlen; error = hv_ring_buffer_write(&chan->outbound, iov, 4, &send_evt); if (!error && send_evt) vmbus_chan_send_event(chan); return error; } int vmbus_chan_recv(struct hv_vmbus_channel *chan, void *data, int *dlen0, uint64_t *xactid) { struct vmbus_chanpkt_hdr pkt; int error, dlen, hlen; error = hv_ring_buffer_peek(&chan->inbound, &pkt, sizeof(pkt)); if (error) return error; hlen = VMBUS_CHANPKT_GETLEN(pkt.cph_hlen); dlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen) - hlen; if (*dlen0 < dlen) { /* Return the size of this packet's data. */ *dlen0 = dlen; return ENOBUFS; } *xactid = pkt.cph_xactid; *dlen0 = dlen; /* Skip packet header */ error = hv_ring_buffer_read(&chan->inbound, data, dlen, hlen); KASSERT(!error, ("hv_ring_buffer_read failed")); return 0; } int vmbus_chan_recv_pkt(struct hv_vmbus_channel *chan, struct vmbus_chanpkt_hdr *pkt0, int *pktlen0) { struct vmbus_chanpkt_hdr pkt; int error, pktlen; error = hv_ring_buffer_peek(&chan->inbound, &pkt, sizeof(pkt)); if (error) return error; pktlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen); if (*pktlen0 < pktlen) { /* Return the size of this packet. */ *pktlen0 = pktlen; return ENOBUFS; } *pktlen0 = pktlen; /* Include packet header */ error = hv_ring_buffer_read(&chan->inbound, pkt0, pktlen, 0); KASSERT(!error, ("hv_ring_buffer_read failed")); return 0; } static void vmbus_chan_task(void *xchan, int pending __unused) { struct hv_vmbus_channel *chan = xchan; vmbus_chan_callback_t cb = chan->ch_cb; void *cbarg = chan->ch_cbarg; /* * Optimize host to guest signaling by ensuring: * 1. While reading the channel, we disable interrupts from * host. * 2. Ensure that we process all posted messages from the host * before returning from this callback. * 3. Once we return, enable signaling from the host. Once this * state is set we check to see if additional packets are * available to read. In this case we repeat the process. * * NOTE: Interrupt has been disabled in the ISR. */ for (;;) { uint32_t left; cb(cbarg); left = hv_ring_buffer_read_end(&chan->inbound); if (left == 0) { /* No more data in RX bufring; done */ break; } hv_ring_buffer_read_begin(&chan->inbound); } } static void vmbus_chan_task_nobatch(void *xchan, int pending __unused) { struct hv_vmbus_channel *chan = xchan; chan->ch_cb(chan->ch_cbarg); } static __inline void vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags, int flag_cnt) { int f; for (f = 0; f < flag_cnt; ++f) { uint32_t chid_base; u_long flags; int chid_ofs; if (event_flags[f] == 0) continue; flags = atomic_swap_long(&event_flags[f], 0); chid_base = f << VMBUS_EVTFLAG_SHIFT; while ((chid_ofs = ffsl(flags)) != 0) { struct hv_vmbus_channel *channel; --chid_ofs; /* NOTE: ffsl is 1-based */ flags &= ~(1UL << chid_ofs); channel = sc->vmbus_chmap[chid_base + chid_ofs]; /* if channel is closed or closing */ if (channel == NULL || channel->ch_tq == NULL) continue; if (channel->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) hv_ring_buffer_read_begin(&channel->inbound); taskqueue_enqueue(channel->ch_tq, &channel->ch_task); } } } void vmbus_event_proc(struct vmbus_softc *sc, int cpu) { struct vmbus_evtflags *eventf; /* * On Host with Win8 or above, the event page can be checked directly * to get the id of the channel that has the pending interrupt. */ eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE; vmbus_event_flags_proc(sc, eventf->evt_flags, VMBUS_PCPU_GET(sc, event_flags_cnt, cpu)); } void vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu) { struct vmbus_evtflags *eventf; eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE; if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) { vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags, VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT); } } static void vmbus_chan_update_evtflagcnt(struct vmbus_softc *sc, const struct hv_vmbus_channel *chan) { volatile int *flag_cnt_ptr; int flag_cnt; flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1; flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->ch_cpuid); for (;;) { int old_flag_cnt; old_flag_cnt = *flag_cnt_ptr; if (old_flag_cnt >= flag_cnt) break; if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) { if (bootverbose) { device_printf(sc->vmbus_dev, "channel%u update cpu%d flag_cnt to %d\n", chan->ch_id, chan->ch_cpuid, flag_cnt); } break; } } } static struct hv_vmbus_channel * vmbus_chan_alloc(struct vmbus_softc *sc) { struct hv_vmbus_channel *chan; chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO); chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev), HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param), &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (chan->ch_monprm == NULL) { device_printf(sc->vmbus_dev, "monprm alloc failed\n"); free(chan, M_DEVBUF); return NULL; } chan->vmbus_sc = sc; mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF); TAILQ_INIT(&chan->ch_subchans); TASK_INIT(&chan->ch_detach_task, 0, vmbus_chan_detach_task, chan); return chan; } static void vmbus_chan_free(struct hv_vmbus_channel *chan) { /* TODO: assert sub-channel list is empty */ /* TODO: asset no longer on the primary channel's sub-channel list */ /* TODO: asset no longer on the vmbus channel list */ hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm); mtx_destroy(&chan->ch_subchan_lock); free(chan, M_DEVBUF); } static int vmbus_chan_add(struct hv_vmbus_channel *newchan) { struct vmbus_softc *sc = newchan->vmbus_sc; struct hv_vmbus_channel *prichan; if (newchan->ch_id == 0) { /* * XXX * Chan0 will neither be processed nor should be offered; * skip it. */ device_printf(sc->vmbus_dev, "got chan0 offer, discard\n"); return EINVAL; } else if (newchan->ch_id >= VMBUS_CHAN_MAX) { device_printf(sc->vmbus_dev, "invalid chan%u offer\n", newchan->ch_id); return EINVAL; } sc->vmbus_chmap[newchan->ch_id] = newchan; if (bootverbose) { device_printf(sc->vmbus_dev, "chan%u subidx%u offer\n", newchan->ch_id, newchan->ch_subidx); } mtx_lock(&sc->vmbus_prichan_lock); TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) { /* * Sub-channel will have the same type GUID and instance * GUID as its primary channel. */ if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type, sizeof(struct hyperv_guid)) == 0 && memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst, sizeof(struct hyperv_guid)) == 0) break; } if (VMBUS_CHAN_ISPRIMARY(newchan)) { if (prichan == NULL) { /* Install the new primary channel */ TAILQ_INSERT_TAIL(&sc->vmbus_prichans, newchan, ch_prilink); mtx_unlock(&sc->vmbus_prichan_lock); return 0; } else { mtx_unlock(&sc->vmbus_prichan_lock); device_printf(sc->vmbus_dev, "duplicated primary " "chan%u\n", newchan->ch_id); return EINVAL; } } else { /* Sub-channel */ if (prichan == NULL) { mtx_unlock(&sc->vmbus_prichan_lock); device_printf(sc->vmbus_dev, "no primary chan for " "chan%u\n", newchan->ch_id); return EINVAL; } /* * Found the primary channel for this sub-channel and * move on. * * XXX refcnt prichan */ } mtx_unlock(&sc->vmbus_prichan_lock); /* * This is a sub-channel; link it with the primary channel. */ KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan), ("new channel is not sub-channel")); KASSERT(prichan != NULL, ("no primary channel")); newchan->ch_prichan = prichan; newchan->ch_dev = prichan->ch_dev; mtx_lock(&prichan->ch_subchan_lock); TAILQ_INSERT_TAIL(&prichan->ch_subchans, newchan, ch_sublink); /* * Bump up sub-channel count and notify anyone that is * interested in this sub-channel, after this sub-channel * is setup. */ prichan->ch_subchan_cnt++; mtx_unlock(&prichan->ch_subchan_lock); wakeup(prichan); return 0; } void vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu) { KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu)); if (chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WS2008 || chan->vmbus_sc->vmbus_version == VMBUS_VERSION_WIN7) { /* Only cpu0 is supported */ cpu = 0; } chan->ch_cpuid = cpu; chan->ch_vcpuid = VMBUS_PCPU_GET(chan->vmbus_sc, vcpuid, cpu); if (bootverbose) { printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n", chan->ch_id, chan->ch_cpuid, chan->ch_vcpuid); } } void vmbus_channel_cpu_rr(struct hv_vmbus_channel *chan) { static uint32_t vmbus_chan_nextcpu; int cpu; cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus; vmbus_channel_cpu_set(chan, cpu); } static void vmbus_chan_cpu_default(struct hv_vmbus_channel *chan) { /* * By default, pin the channel to cpu0. Devices having * special channel-cpu mapping requirement should call * vmbus_channel_cpu_{set,rr}(). */ vmbus_channel_cpu_set(chan, 0); } static void vmbus_chan_msgproc_choffer(struct vmbus_softc *sc, const struct vmbus_message *msg) { const struct vmbus_chanmsg_choffer *offer; struct hv_vmbus_channel *chan; int error; offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data; chan = vmbus_chan_alloc(sc); if (chan == NULL) { device_printf(sc->vmbus_dev, "allocate chan%u failed\n", offer->chm_chanid); return; } chan->ch_id = offer->chm_chanid; chan->ch_subidx = offer->chm_subidx; chan->ch_guid_type = offer->chm_chtype; chan->ch_guid_inst = offer->chm_chinst; /* Batch reading is on by default */ chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD; chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT; if (sc->vmbus_version != VMBUS_VERSION_WS2008) chan->ch_monprm->mp_connid = offer->chm_connid; if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) { /* * Setup MNF stuffs. */ chan->ch_flags |= VMBUS_CHAN_FLAG_HASMNF; chan->ch_montrig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN; if (chan->ch_montrig_idx >= VMBUS_MONTRIGS_MAX) panic("invalid monitor trigger %u", offer->chm_montrig); chan->ch_montrig_mask = 1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN); } /* Select default cpu for this channel. */ vmbus_chan_cpu_default(chan); error = vmbus_chan_add(chan); if (error) { device_printf(sc->vmbus_dev, "add chan%u failed: %d\n", chan->ch_id, error); vmbus_chan_free(chan); return; } if (VMBUS_CHAN_ISPRIMARY(chan)) { /* * Add device for this primary channel. * * NOTE: * Error is ignored here; don't have much to do if error * really happens. */ vmbus_add_child(chan); } } /* * XXX pretty broken; need rework. */ static void vmbus_chan_msgproc_chrescind(struct vmbus_softc *sc, const struct vmbus_message *msg) { const struct vmbus_chanmsg_chrescind *note; struct hv_vmbus_channel *chan; note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data; if (note->chm_chanid > VMBUS_CHAN_MAX) { device_printf(sc->vmbus_dev, "invalid rescinded chan%u\n", note->chm_chanid); return; } if (bootverbose) { device_printf(sc->vmbus_dev, "chan%u rescinded\n", note->chm_chanid); } chan = sc->vmbus_chmap[note->chm_chanid]; if (chan == NULL) return; sc->vmbus_chmap[note->chm_chanid] = NULL; taskqueue_enqueue(taskqueue_thread, &chan->ch_detach_task); } static void vmbus_chan_detach_task(void *xchan, int pending __unused) { struct hv_vmbus_channel *chan = xchan; if (VMBUS_CHAN_ISPRIMARY(chan)) { /* Only primary channel owns the device */ vmbus_delete_child(chan); /* NOTE: DO NOT free primary channel for now */ } else { struct vmbus_softc *sc = chan->vmbus_sc; struct hv_vmbus_channel *pri_chan = chan->ch_prichan; struct vmbus_chanmsg_chfree *req; struct vmbus_msghc *mh; int error; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for chfree(chan%u)\n", chan->ch_id); goto remove; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE; req->chm_chanid = chan->ch_id; error = vmbus_msghc_exec_noresult(mh); vmbus_msghc_put(sc, mh); if (error) { device_printf(sc->vmbus_dev, "chfree(chan%u) failed: %d", chan->ch_id, error); /* NOTE: Move on! */ } else { if (bootverbose) { device_printf(sc->vmbus_dev, "chan%u freed\n", chan->ch_id); } } remove: mtx_lock(&pri_chan->ch_subchan_lock); TAILQ_REMOVE(&pri_chan->ch_subchans, chan, ch_sublink); KASSERT(pri_chan->ch_subchan_cnt > 0, ("invalid subchan_cnt %d", pri_chan->ch_subchan_cnt)); pri_chan->ch_subchan_cnt--; mtx_unlock(&pri_chan->ch_subchan_lock); wakeup(pri_chan); vmbus_chan_free(chan); } } /* * Detach all devices and destroy the corresponding primary channels. */ void vmbus_chan_destroy_all(struct vmbus_softc *sc) { struct hv_vmbus_channel *chan; mtx_lock(&sc->vmbus_prichan_lock); while ((chan = TAILQ_FIRST(&sc->vmbus_prichans)) != NULL) { KASSERT(VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel")); TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink); mtx_unlock(&sc->vmbus_prichan_lock); vmbus_delete_child(chan); vmbus_chan_free(chan); mtx_lock(&sc->vmbus_prichan_lock); } bzero(sc->vmbus_chmap, sizeof(struct hv_vmbus_channel *) * VMBUS_CHAN_MAX); mtx_unlock(&sc->vmbus_prichan_lock); } /** * @brief Select the best outgoing channel * * The channel whose vcpu binding is closest to the currect vcpu will * be selected. * If no multi-channel, always select primary channel * * @param primary - primary channel */ struct hv_vmbus_channel * vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary) { hv_vmbus_channel *new_channel = NULL; hv_vmbus_channel *outgoing_channel = primary; int old_cpu_distance = 0; int new_cpu_distance = 0; int cur_vcpu = 0; int smp_pro_id = PCPU_GET(cpuid); if (TAILQ_EMPTY(&primary->ch_subchans)) { return outgoing_channel; } if (smp_pro_id >= MAXCPU) { return outgoing_channel; } cur_vcpu = VMBUS_PCPU_GET(primary->vmbus_sc, vcpuid, smp_pro_id); /* XXX need lock */ TAILQ_FOREACH(new_channel, &primary->ch_subchans, ch_sublink) { if ((new_channel->ch_stflags & VMBUS_CHAN_ST_OPENED) == 0) { continue; } if (new_channel->ch_vcpuid == cur_vcpu){ return new_channel; } old_cpu_distance = ((outgoing_channel->ch_vcpuid > cur_vcpu) ? (outgoing_channel->ch_vcpuid - cur_vcpu) : (cur_vcpu - outgoing_channel->ch_vcpuid)); new_cpu_distance = ((new_channel->ch_vcpuid > cur_vcpu) ? (new_channel->ch_vcpuid - cur_vcpu) : (cur_vcpu - new_channel->ch_vcpuid)); if (old_cpu_distance < new_cpu_distance) { continue; } outgoing_channel = new_channel; } return(outgoing_channel); } struct hv_vmbus_channel ** vmbus_get_subchan(struct hv_vmbus_channel *pri_chan, int subchan_cnt) { struct hv_vmbus_channel **ret, *chan; int i; ret = malloc(subchan_cnt * sizeof(struct hv_vmbus_channel *), M_TEMP, M_WAITOK); mtx_lock(&pri_chan->ch_subchan_lock); while (pri_chan->ch_subchan_cnt < subchan_cnt) mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0); i = 0; TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) { /* TODO: refcnt chan */ ret[i] = chan; ++i; if (i == subchan_cnt) break; } KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d", pri_chan->ch_subchan_cnt, subchan_cnt)); mtx_unlock(&pri_chan->ch_subchan_lock); return ret; } void vmbus_rel_subchan(struct hv_vmbus_channel **subchan, int subchan_cnt __unused) { free(subchan, M_TEMP); } void vmbus_drain_subchan(struct hv_vmbus_channel *pri_chan) { mtx_lock(&pri_chan->ch_subchan_lock); while (pri_chan->ch_subchan_cnt > 0) mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0); mtx_unlock(&pri_chan->ch_subchan_lock); } void vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg) { vmbus_chanmsg_proc_t msg_proc; uint32_t msg_type; msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type; KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX, ("invalid message type %u", msg_type)); msg_proc = vmbus_chan_msgprocs[msg_type]; if (msg_proc != NULL) msg_proc(sc, msg); }