Page MenuHomeFreeBSD

D24061.diff
No OneTemporary

D24061.diff

Index: head/sys/conf/files.x86
===================================================================
--- head/sys/conf/files.x86
+++ head/sys/conf/files.x86
@@ -133,6 +133,7 @@
dev/hwpmc/hwpmc_uncore.c optional hwpmc
dev/hwpmc/hwpmc_tsc.c optional hwpmc
dev/hwpmc/hwpmc_x86.c optional hwpmc
+dev/hyperv/hvsock/hv_sock.c optional hyperv
dev/hyperv/input/hv_kbd.c optional hyperv
dev/hyperv/input/hv_kbdc.c optional hyperv
dev/hyperv/pcib/vmbus_pcib.c optional hyperv pci
Index: head/sys/dev/hyperv/hvsock/hv_sock.h
===================================================================
--- head/sys/dev/hyperv/hvsock/hv_sock.h
+++ head/sys/dev/hyperv/hvsock/hv_sock.h
@@ -0,0 +1,122 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _HVSOCK_H
+#define _HVSOCK_H
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/queue.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/vmbus.h>
+
+/*
+ * HyperV Socket Protocols
+ */
+#define HYPERV_SOCK_PROTO_TRANS 1 /* Transport protocol */
+
+#define HVADDR_PORT_ANY -1U
+#define HVADDR_PORT_UNKNOWN -1U
+
+#define HVS_LIST_BOUND 0x01
+#define HVS_LIST_CONNECTED 0x02
+#define HVS_LIST_ALL (HVS_LIST_BOUND | HVS_LIST_CONNECTED)
+
+struct sockaddr_hvs {
+ unsigned char sa_len;
+ sa_family_t sa_family;
+ unsigned int hvs_port;
+ unsigned char hvs_zero[sizeof(struct sockaddr) -
+ sizeof(sa_family_t) -
+ sizeof(unsigned char) -
+ sizeof(unsigned int)];
+};
+
+struct vmpipe_proto_header {
+ uint32_t vmpipe_pkt_type;
+ uint32_t vmpipe_data_size;
+} __packed;
+
+struct hvs_pkt_header {
+ struct vmbus_chanpkt_hdr chan_pkt_hdr;
+ struct vmpipe_proto_header vmpipe_pkt_hdr;
+} __packed;
+
+struct hvs_pcb {
+ struct socket *so; /* Pointer to socket */
+ struct sockaddr_hvs local_addr;
+ struct sockaddr_hvs remote_addr;
+
+ struct hyperv_guid vm_srv_id;
+ struct hyperv_guid host_srv_id;
+
+ struct vmbus_channel *chan;
+ /* Current packet header on rx ring */
+ struct hvs_pkt_header hvs_pkt;
+ /* Available data in receive br in current packet */
+ uint32_t recv_data_len;
+ /* offset in the packet */
+ uint32_t recv_data_off;
+ bool rb_init;
+ /* Link lists for global bound and connected sockets */
+ LIST_ENTRY(hvs_pcb) bound_next;
+ LIST_ENTRY(hvs_pcb) connected_next;
+};
+
+#define so2hvspcb(so) \
+ ((struct hvs_pcb *)((so)->so_pcb))
+#define hsvpcb2so(hvspcb) \
+ ((struct socket *)((hvspcb)->so))
+
+void hvs_addr_init(struct sockaddr_hvs *, const struct hyperv_guid *);
+void hvs_trans_init(void);
+void hvs_trans_close(struct socket *);
+void hvs_trans_detach(struct socket *);
+void hvs_trans_abort(struct socket *);
+int hvs_trans_attach(struct socket *, int, struct thread *);
+int hvs_trans_bind(struct socket *, struct sockaddr *, struct thread *);
+int hvs_trans_listen(struct socket *, int, struct thread *);
+int hvs_trans_accept(struct socket *, struct sockaddr **);
+int hvs_trans_connect(struct socket *,
+ struct sockaddr *, struct thread *);
+int hvs_trans_peeraddr(struct socket *, struct sockaddr **);
+int hvs_trans_sockaddr(struct socket *, struct sockaddr **);
+int hvs_trans_soreceive(struct socket *, struct sockaddr **,
+ struct uio *, struct mbuf **, struct mbuf **, int *);
+int hvs_trans_sosend(struct socket *, struct sockaddr *, struct uio *,
+ struct mbuf *, struct mbuf *, int, struct thread *);
+int hvs_trans_disconnect(struct socket *);
+int hvs_trans_shutdown(struct socket *);
+
+int hvs_trans_lock(void);
+void hvs_trans_unlock(void);
+
+void hvs_remove_socket_from_list(struct socket *, unsigned char);
+#endif /* _HVSOCK_H */
Index: head/sys/dev/hyperv/hvsock/hv_sock.c
===================================================================
--- head/sys/dev/hyperv/hvsock/hv_sock.c
+++ head/sys/dev/hyperv/hvsock/hv_sock.c
@@ -0,0 +1,1748 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/domain.h>
+#include <sys/lock.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/sockbuf.h>
+#include <sys/sx.h>
+#include <sys/uio.h>
+
+#include <net/vnet.h>
+
+#include <dev/hyperv/vmbus/vmbus_reg.h>
+
+#include "hv_sock.h"
+
+#define HVSOCK_DBG_NONE 0x0
+#define HVSOCK_DBG_INFO 0x1
+#define HVSOCK_DBG_ERR 0x2
+#define HVSOCK_DBG_VERBOSE 0x3
+
+
+SYSCTL_NODE(_net, OID_AUTO, hvsock, CTLFLAG_RD, 0, "HyperV socket");
+
+static int hvs_dbg_level;
+SYSCTL_INT(_net_hvsock, OID_AUTO, hvs_dbg_level, CTLFLAG_RWTUN, &hvs_dbg_level,
+ 0, "hyperv socket debug level: 0 = none, 1 = info, 2 = error, 3 = verbose");
+
+
+#define HVSOCK_DBG(level, ...) do { \
+ if (hvs_dbg_level >= (level)) \
+ printf(__VA_ARGS__); \
+ } while (0)
+
+MALLOC_DEFINE(M_HVSOCK, "hyperv_socket", "hyperv socket control structures");
+
+/* The MTU is 16KB per host side's design */
+#define HVSOCK_MTU_SIZE (1024 * 16)
+#define HVSOCK_SEND_BUF_SZ (PAGE_SIZE - sizeof(struct vmpipe_proto_header))
+
+#define HVSOCK_HEADER_LEN (sizeof(struct hvs_pkt_header))
+
+#define HVSOCK_PKT_LEN(payload_len) (HVSOCK_HEADER_LEN + \
+ roundup2(payload_len, 8) + \
+ sizeof(uint64_t))
+
+
+static struct domain hv_socket_domain;
+
+/*
+ * HyperV Transport sockets
+ */
+static struct pr_usrreqs hvs_trans_usrreqs = {
+ .pru_attach = hvs_trans_attach,
+ .pru_bind = hvs_trans_bind,
+ .pru_listen = hvs_trans_listen,
+ .pru_accept = hvs_trans_accept,
+ .pru_connect = hvs_trans_connect,
+ .pru_peeraddr = hvs_trans_peeraddr,
+ .pru_sockaddr = hvs_trans_sockaddr,
+ .pru_soreceive = hvs_trans_soreceive,
+ .pru_sosend = hvs_trans_sosend,
+ .pru_disconnect = hvs_trans_disconnect,
+ .pru_close = hvs_trans_close,
+ .pru_detach = hvs_trans_detach,
+ .pru_shutdown = hvs_trans_shutdown,
+ .pru_abort = hvs_trans_abort,
+};
+
+/*
+ * Definitions of protocols supported in HyperV socket domain
+ */
+static struct protosw hv_socket_protosw[] = {
+{
+ .pr_type = SOCK_STREAM,
+ .pr_domain = &hv_socket_domain,
+ .pr_protocol = HYPERV_SOCK_PROTO_TRANS,
+ .pr_flags = PR_CONNREQUIRED,
+ .pr_init = hvs_trans_init,
+ .pr_usrreqs = &hvs_trans_usrreqs,
+},
+};
+
+static struct domain hv_socket_domain = {
+ .dom_family = AF_HYPERV,
+ .dom_name = "hyperv",
+ .dom_protosw = hv_socket_protosw,
+ .dom_protoswNPROTOSW = &hv_socket_protosw[nitems(hv_socket_protosw)]
+};
+
+VNET_DOMAIN_SET(hv_socket_);
+
+#define MAX_PORT ((uint32_t)0xFFFFFFFF)
+#define MIN_PORT ((uint32_t)0x0)
+
+/* 00000000-facb-11e6-bd58-64006a7986d3 */
+static const struct hyperv_guid srv_id_template = {
+ .hv_guid = {
+ 0x00, 0x00, 0x00, 0x00, 0xcb, 0xfa, 0xe6, 0x11,
+ 0xbd, 0x58, 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3 }
+};
+
+static int hvsock_br_callback(void *, int, void *);
+static uint32_t hvsock_canread_check(struct hvs_pcb *);
+static uint32_t hvsock_canwrite_check(struct hvs_pcb *);
+static int hvsock_send_data(struct vmbus_channel *chan,
+ struct uio *uio, uint32_t to_write, struct sockbuf *sb);
+
+
+
+/* Globals */
+static struct sx hvs_trans_socks_sx;
+static struct mtx hvs_trans_socks_mtx;
+static LIST_HEAD(, hvs_pcb) hvs_trans_bound_socks;
+static LIST_HEAD(, hvs_pcb) hvs_trans_connected_socks;
+static uint32_t previous_auto_bound_port;
+
+static void
+hvsock_print_guid(struct hyperv_guid *guid)
+{
+ unsigned char *p = (unsigned char *)guid;
+
+ HVSOCK_DBG(HVSOCK_DBG_INFO,
+ "0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x\n",
+ *(unsigned int *)p,
+ *((unsigned short *) &p[4]),
+ *((unsigned short *) &p[6]),
+ p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
+}
+
+static bool
+is_valid_srv_id(const struct hyperv_guid *id)
+{
+ return !memcmp(&id->hv_guid[4],
+ &srv_id_template.hv_guid[4], sizeof(struct hyperv_guid) - 4);
+}
+
+static unsigned int
+get_port_by_srv_id(const struct hyperv_guid *srv_id)
+{
+ return *((const unsigned int *)srv_id);
+}
+
+static void
+set_port_by_srv_id(struct hyperv_guid *srv_id, unsigned int port)
+{
+ *((unsigned int *)srv_id) = port;
+}
+
+
+static void
+__hvs_remove_pcb_from_list(struct hvs_pcb *pcb, unsigned char list)
+{
+ struct hvs_pcb *p = NULL;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
+
+ if (!pcb)
+ return;
+
+ if (list & HVS_LIST_BOUND) {
+ LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
+ if (p == pcb)
+ LIST_REMOVE(p, bound_next);
+ }
+
+ if (list & HVS_LIST_CONNECTED) {
+ LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
+ if (p == pcb)
+ LIST_REMOVE(pcb, connected_next);
+ }
+}
+
+static void
+__hvs_remove_socket_from_list(struct socket *so, unsigned char list)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
+
+ __hvs_remove_pcb_from_list(pcb, list);
+}
+
+static void
+__hvs_insert_socket_on_list(struct socket *so, unsigned char list)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ if (list & HVS_LIST_BOUND)
+ LIST_INSERT_HEAD(&hvs_trans_bound_socks,
+ pcb, bound_next);
+
+ if (list & HVS_LIST_CONNECTED)
+ LIST_INSERT_HEAD(&hvs_trans_connected_socks,
+ pcb, connected_next);
+}
+
+void
+hvs_remove_socket_from_list(struct socket *so, unsigned char list)
+{
+ if (!so || !so->so_pcb) {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: socket or so_pcb is null\n", __func__);
+ return;
+ }
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ __hvs_remove_socket_from_list(so, list);
+ mtx_unlock(&hvs_trans_socks_mtx);
+}
+
+static void
+hvs_insert_socket_on_list(struct socket *so, unsigned char list)
+{
+ if (!so || !so->so_pcb) {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: socket or so_pcb is null\n", __func__);
+ return;
+ }
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ __hvs_insert_socket_on_list(so, list);
+ mtx_unlock(&hvs_trans_socks_mtx);
+}
+
+static struct socket *
+__hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
+{
+ struct hvs_pcb *p = NULL;
+
+ if (list & HVS_LIST_BOUND)
+ LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
+ if (p->so != NULL &&
+ addr->hvs_port == p->local_addr.hvs_port)
+ return p->so;
+
+ if (list & HVS_LIST_CONNECTED)
+ LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
+ if (p->so != NULL &&
+ addr->hvs_port == p->local_addr.hvs_port)
+ return p->so;
+
+ return NULL;
+}
+
+static struct socket *
+hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
+{
+ struct socket *s = NULL;
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ s = __hvs_find_socket_on_list(addr, list);
+ mtx_unlock(&hvs_trans_socks_mtx);
+
+ return s;
+}
+
+static inline void
+hvs_addr_set(struct sockaddr_hvs *addr, unsigned int port)
+{
+ memset(addr, 0, sizeof(*addr));
+ addr->sa_family = AF_HYPERV;
+ addr->hvs_port = port;
+}
+
+void
+hvs_addr_init(struct sockaddr_hvs *addr, const struct hyperv_guid *svr_id)
+{
+ hvs_addr_set(addr, get_port_by_srv_id(svr_id));
+}
+
+int
+hvs_trans_lock(void)
+{
+ sx_xlock(&hvs_trans_socks_sx);
+ return (0);
+}
+
+void
+hvs_trans_unlock(void)
+{
+ sx_xunlock(&hvs_trans_socks_sx);
+}
+
+void
+hvs_trans_init(void)
+{
+ /* Skip initialization of globals for non-default instances. */
+ if (!IS_DEFAULT_VNET(curvnet))
+ return;
+
+ if (vm_guest != VM_GUEST_HV)
+ return;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_init called\n", __func__);
+
+ /* Initialize Globals */
+ previous_auto_bound_port = MAX_PORT;
+ sx_init(&hvs_trans_socks_sx, "hvs_trans_sock_sx");
+ mtx_init(&hvs_trans_socks_mtx,
+ "hvs_trans_socks_mtx", NULL, MTX_DEF);
+ LIST_INIT(&hvs_trans_bound_socks);
+ LIST_INIT(&hvs_trans_connected_socks);
+}
+
+/*
+ * Called in two cases:
+ * 1) When user calls socket();
+ * 2) When we accept new incoming conneciton and call sonewconn().
+ */
+int
+hvs_trans_attach(struct socket *so, int proto, struct thread *td)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_attach called\n", __func__);
+
+ if (so->so_type != SOCK_STREAM)
+ return (ESOCKTNOSUPPORT);
+
+ if (proto != 0 && proto != HYPERV_SOCK_PROTO_TRANS)
+ return (EPROTONOSUPPORT);
+
+ if (pcb != NULL)
+ return (EISCONN);
+ pcb = malloc(sizeof(struct hvs_pcb), M_HVSOCK, M_NOWAIT | M_ZERO);
+ if (pcb == NULL)
+ return (ENOMEM);
+
+ pcb->so = so;
+ so->so_pcb = (void *)pcb;
+
+ return (0);
+}
+
+void
+hvs_trans_detach(struct socket *so)
+{
+ struct hvs_pcb *pcb;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_detach called\n", __func__);
+
+ (void) hvs_trans_lock();
+ pcb = so2hvspcb(so);
+ if (pcb == NULL) {
+ hvs_trans_unlock();
+ return;
+ }
+
+ if (SOLISTENING(so)) {
+ bzero(pcb, sizeof(*pcb));
+ free(pcb, M_HVSOCK);
+ }
+
+ so->so_pcb = NULL;
+
+ hvs_trans_unlock();
+}
+
+int
+hvs_trans_bind(struct socket *so, struct sockaddr *addr, struct thread *td)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct sockaddr_hvs *sa = (struct sockaddr_hvs *) addr;
+ int error = 0;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_bind called\n", __func__);
+
+ if (sa == NULL) {
+ return (EINVAL);
+ }
+
+ if (pcb == NULL) {
+ return (EINVAL);
+ }
+
+ if (sa->sa_family != AF_HYPERV) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: Not supported, sa_family is %u\n",
+ __func__, sa->sa_family);
+ return (EAFNOSUPPORT);
+ }
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: binding port = 0x%x\n", __func__, sa->hvs_port);
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ if (__hvs_find_socket_on_list(sa,
+ HVS_LIST_BOUND | HVS_LIST_CONNECTED)) {
+ error = EADDRINUSE;
+ } else {
+ /*
+ * The address is available for us to bind.
+ * Add socket to the bound list.
+ */
+ hvs_addr_set(&pcb->local_addr, sa->hvs_port);
+ hvs_addr_set(&pcb->remote_addr, HVADDR_PORT_ANY);
+ __hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
+ }
+ mtx_unlock(&hvs_trans_socks_mtx);
+
+ return (error);
+}
+
+int
+hvs_trans_listen(struct socket *so, int backlog, struct thread *td)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct socket *bound_so;
+ int error;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_listen called\n", __func__);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ /* Check if the address is already bound and it was by us. */
+ bound_so = hvs_find_socket_on_list(&pcb->local_addr, HVS_LIST_BOUND);
+ if (bound_so == NULL || bound_so != so) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: Address not bound or not by us.\n", __func__);
+ return (EADDRNOTAVAIL);
+ }
+
+ SOCK_LOCK(so);
+ error = solisten_proto_check(so);
+ if (error == 0)
+ solisten_proto(so, backlog);
+ SOCK_UNLOCK(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket listen error = %d\n", __func__, error);
+ return (error);
+}
+
+int
+hvs_trans_accept(struct socket *so, struct sockaddr **nam)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_accept called\n", __func__);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ *nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr,
+ M_NOWAIT);
+
+ return ((*nam == NULL) ? ENOMEM : 0);
+}
+
+int
+hvs_trans_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct sockaddr_hvs *raddr = (struct sockaddr_hvs *)nam;
+ bool found_auto_bound_port = false;
+ int i, error = 0;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_connect called, remote port is %x\n",
+ __func__, raddr->hvs_port);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ /* Verify the remote address */
+ if (raddr == NULL)
+ return (EINVAL);
+ if (raddr->sa_family != AF_HYPERV)
+ return (EAFNOSUPPORT);
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ if (so->so_state &
+ (SS_ISCONNECTED|SS_ISDISCONNECTING|SS_ISCONNECTING)) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: socket connect in progress\n",
+ __func__);
+ error = EINPROGRESS;
+ goto out;
+ }
+
+ /*
+ * Find an available port for us to auto bind the local
+ * address.
+ */
+ hvs_addr_set(&pcb->local_addr, 0);
+
+ for (i = previous_auto_bound_port - 1;
+ i != previous_auto_bound_port; i --) {
+ if (i == MIN_PORT)
+ i = MAX_PORT;
+
+ pcb->local_addr.hvs_port = i;
+
+ if (__hvs_find_socket_on_list(&pcb->local_addr,
+ HVS_LIST_BOUND | HVS_LIST_CONNECTED) == NULL) {
+ found_auto_bound_port = true;
+ previous_auto_bound_port = i;
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: found local bound port is %x\n",
+ __func__, pcb->local_addr.hvs_port);
+ break;
+ }
+ }
+
+ if (found_auto_bound_port == true) {
+ /* Found available port for auto bound, put on list */
+ __hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
+ /* Set VM service ID */
+ pcb->vm_srv_id = srv_id_template;
+ set_port_by_srv_id(&pcb->vm_srv_id, pcb->local_addr.hvs_port);
+ /* Set host service ID and remote port */
+ pcb->host_srv_id = srv_id_template;
+ set_port_by_srv_id(&pcb->host_srv_id, raddr->hvs_port);
+ hvs_addr_set(&pcb->remote_addr, raddr->hvs_port);
+
+ /* Change the socket state to SS_ISCONNECTING */
+ soisconnecting(so);
+ } else {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: No local port available for auto bound\n",
+ __func__);
+ error = EADDRINUSE;
+ }
+
+ HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect vm_srv_id is ");
+ hvsock_print_guid(&pcb->vm_srv_id);
+ HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect host_srv_id is ");
+ hvsock_print_guid(&pcb->host_srv_id);
+
+out:
+ mtx_unlock(&hvs_trans_socks_mtx);
+
+ if (found_auto_bound_port == true)
+ vmbus_req_tl_connect(&pcb->vm_srv_id, &pcb->host_srv_id);
+
+ return (error);
+}
+
+int
+hvs_trans_disconnect(struct socket *so)
+{
+ struct hvs_pcb *pcb;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_disconnect called\n", __func__);
+
+ (void) hvs_trans_lock();
+ pcb = so2hvspcb(so);
+ if (pcb == NULL) {
+ hvs_trans_unlock();
+ return (EINVAL);
+ }
+
+ /* If socket is already disconnected, skip this */
+ if ((so->so_state & SS_ISDISCONNECTED) == 0)
+ soisdisconnecting(so);
+
+ hvs_trans_unlock();
+
+ return (0);
+}
+
+#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
+struct hvs_callback_arg {
+ struct uio *uio;
+ struct sockbuf *sb;
+};
+
+int
+hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr,
+ struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct sockbuf *sb;
+ ssize_t orig_resid;
+ uint32_t canread, to_read;
+ int flags, error = 0;
+ struct hvs_callback_arg cbarg;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_soreceive called\n", __func__);
+
+ if (so->so_type != SOCK_STREAM)
+ return (EINVAL);
+ if (pcb == NULL)
+ return (EINVAL);
+
+ if (flagsp != NULL)
+ flags = *flagsp &~ MSG_EOR;
+ else
+ flags = 0;
+
+ if (flags & MSG_PEEK)
+ return (EOPNOTSUPP);
+
+ /* If no space to copy out anything */
+ if (uio->uio_resid == 0 || uio->uio_rw != UIO_READ)
+ return (EINVAL);
+
+ sb = &so->so_rcv;
+
+ orig_resid = uio->uio_resid;
+
+ /* Prevent other readers from entering the socket. */
+ error = sblock(sb, SBLOCKWAIT(flags));
+ if (error) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: sblock returned error = %d\n", __func__, error);
+ return (error);
+ }
+
+ SOCKBUF_LOCK(sb);
+
+ cbarg.uio = uio;
+ cbarg.sb = sb;
+ /*
+ * If the socket is closing, there might still be some data
+ * in rx br to read. However we need to make sure
+ * the channel is still open.
+ */
+ if ((sb->sb_state & SBS_CANTRCVMORE) &&
+ (so->so_state & SS_ISDISCONNECTED)) {
+ /* Other thread already closed the channel */
+ error = EPIPE;
+ goto out;
+ }
+
+ while (true) {
+ while (uio->uio_resid > 0 &&
+ (canread = hvsock_canread_check(pcb)) > 0) {
+ to_read = MIN(canread, uio->uio_resid);
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: to_read = %u, skip = %u\n", __func__, to_read,
+ (unsigned int)(sizeof(struct hvs_pkt_header) +
+ pcb->recv_data_off));
+
+ error = vmbus_chan_recv_peek_call(pcb->chan, to_read,
+ sizeof(struct hvs_pkt_header) + pcb->recv_data_off,
+ hvsock_br_callback, (void *)&cbarg);
+ /*
+ * It is possible socket is disconnected becasue
+ * we released lock in hvsock_br_callback. So we
+ * need to check the state to make sure it is not
+ * disconnected.
+ */
+ if (error || so->so_state & SS_ISDISCONNECTED) {
+ break;
+ }
+
+ pcb->recv_data_len -= to_read;
+ pcb->recv_data_off += to_read;
+ }
+
+ if (error)
+ break;
+
+ /* Abort if socket has reported problems. */
+ if (so->so_error) {
+ if (so->so_error == ESHUTDOWN &&
+ orig_resid > uio->uio_resid) {
+ /*
+ * Although we got a FIN, we also received
+ * some data in this round. Delivery it
+ * to user.
+ */
+ error = 0;
+ } else {
+ if (so->so_error != ESHUTDOWN)
+ error = so->so_error;
+ }
+
+ break;
+ }
+
+ /* Cannot received more. */
+ if (sb->sb_state & SBS_CANTRCVMORE)
+ break;
+
+ /* We are done if buffer has been filled */
+ if (uio->uio_resid == 0)
+ break;
+
+ if (!(flags & MSG_WAITALL) && orig_resid > uio->uio_resid)
+ break;
+
+ /* Buffer ring is empty and we shall not block */
+ if ((so->so_state & SS_NBIO) ||
+ (flags & (MSG_DONTWAIT|MSG_NBIO))) {
+ if (orig_resid == uio->uio_resid) {
+ /* We have not read anything */
+ error = EAGAIN;
+ }
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: non blocked read return, error %d.\n",
+ __func__, error);
+ break;
+ }
+
+ /*
+ * Wait and block until (more) data comes in.
+ * Note: Drops the sockbuf lock during wait.
+ */
+ error = sbwait(sb);
+
+ if (error)
+ break;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: wake up from sbwait, read available is %u\n",
+ __func__, vmbus_chan_read_available(pcb->chan));
+ }
+
+out:
+ SOCKBUF_UNLOCK(sb);
+
+ sbunlock(sb);
+
+ /* We recieved a FIN in this call */
+ if (so->so_error == ESHUTDOWN) {
+ if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
+ /* Send has already closed */
+ soisdisconnecting(so);
+ } else {
+ /* Just close the receive side */
+ socantrcvmore(so);
+ }
+ }
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: returning error = %d, so_error = %d\n",
+ __func__, error, so->so_error);
+
+ return (error);
+}
+
+int
+hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
+ struct mbuf *top, struct mbuf *controlp, int flags, struct thread *td)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct sockbuf *sb;
+ ssize_t orig_resid;
+ uint32_t canwrite, to_write;
+ int error = 0;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_sosend called, uio_resid = %lu\n",
+ __func__, uio->uio_resid);
+
+ if (so->so_type != SOCK_STREAM)
+ return (EINVAL);
+ if (pcb == NULL)
+ return (EINVAL);
+
+ /* If nothing to send */
+ if (uio->uio_resid == 0 || uio->uio_rw != UIO_WRITE)
+ return (EINVAL);
+
+ sb = &so->so_snd;
+
+ orig_resid = uio->uio_resid;
+
+ /* Prevent other writers from entering the socket. */
+ error = sblock(sb, SBLOCKWAIT(flags));
+ if (error) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: sblock returned error = %d\n", __func__, error);
+ return (error);
+ }
+
+ SOCKBUF_LOCK(sb);
+
+ if ((sb->sb_state & SBS_CANTSENDMORE) ||
+ so->so_error == ESHUTDOWN) {
+ error = EPIPE;
+ goto out;
+ }
+
+ while (uio->uio_resid > 0) {
+ canwrite = hvsock_canwrite_check(pcb);
+ if (canwrite == 0) {
+ /* We have sent some data */
+ if (orig_resid > uio->uio_resid)
+ break;
+ /*
+ * We have not sent any data and it is
+ * non-blocked io
+ */
+ if (so->so_state & SS_NBIO ||
+ (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
+ error = EWOULDBLOCK;
+ break;
+ } else {
+ /*
+ * We are here because there is no space on
+ * send buffer ring. Signal the other side
+ * to read and free more space.
+ * Sleep wait until space avaiable to send
+ * Note: Drops the sockbuf lock during wait.
+ */
+ error = sbwait(sb);
+
+ if (error)
+ break;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: wake up from sbwait, space avail on "
+ "tx ring is %u\n",
+ __func__,
+ vmbus_chan_write_available(pcb->chan));
+
+ continue;
+ }
+ }
+ to_write = MIN(canwrite, uio->uio_resid);
+ to_write = MIN(to_write, HVSOCK_SEND_BUF_SZ);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: canwrite is %u, to_write = %u\n", __func__,
+ canwrite, to_write);
+ error = hvsock_send_data(pcb->chan, uio, to_write, sb);
+
+ if (error)
+ break;
+ }
+
+out:
+ SOCKBUF_UNLOCK(sb);
+ sbunlock(sb);
+
+ return (error);
+}
+
+int
+hvs_trans_peeraddr(struct socket *so, struct sockaddr **nam)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_peeraddr called\n", __func__);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ *nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr, M_NOWAIT);
+
+ return ((*nam == NULL)? ENOMEM : 0);
+}
+
+int
+hvs_trans_sockaddr(struct socket *so, struct sockaddr **nam)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_sockaddr called\n", __func__);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ *nam = sodupsockaddr((struct sockaddr *) &pcb->local_addr, M_NOWAIT);
+
+ return ((*nam == NULL)? ENOMEM : 0);
+}
+
+void
+hvs_trans_close(struct socket *so)
+{
+ struct hvs_pcb *pcb;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_close called\n", __func__);
+
+ (void) hvs_trans_lock();
+ pcb = so2hvspcb(so);
+ if (!pcb) {
+ hvs_trans_unlock();
+ return;
+ }
+
+ if (so->so_state & SS_ISCONNECTED) {
+ /* Send a FIN to peer */
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: hvs_trans_close sending a FIN to host\n", __func__);
+ (void) hvsock_send_data(pcb->chan, NULL, 0, NULL);
+ }
+
+ if (so->so_state &
+ (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
+ soisdisconnected(so);
+
+ pcb->chan = NULL;
+ pcb->so = NULL;
+
+ if (SOLISTENING(so)) {
+ mtx_lock(&hvs_trans_socks_mtx);
+ /* Remove from bound list */
+ __hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
+ mtx_unlock(&hvs_trans_socks_mtx);
+ }
+
+ hvs_trans_unlock();
+
+ return;
+}
+
+void
+hvs_trans_abort(struct socket *so)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_abort called\n", __func__);
+
+ (void) hvs_trans_lock();
+ if (pcb == NULL) {
+ hvs_trans_unlock();
+ return;
+ }
+
+ if (SOLISTENING(so)) {
+ mtx_lock(&hvs_trans_socks_mtx);
+ /* Remove from bound list */
+ __hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
+ mtx_unlock(&hvs_trans_socks_mtx);
+ }
+
+ if (so->so_state & SS_ISCONNECTED) {
+ (void) sodisconnect(so);
+ }
+ hvs_trans_unlock();
+
+ return;
+}
+
+int
+hvs_trans_shutdown(struct socket *so)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct sockbuf *sb;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_shutdown called\n", __func__);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ /*
+ * Only get called with the shutdown method is SHUT_WR or
+ * SHUT_RDWR.
+ * When the method is SHUT_RD or SHUT_RDWR, the caller
+ * already set the SBS_CANTRCVMORE on receive side socket
+ * buffer.
+ */
+ if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) {
+ /*
+ * SHUT_WR only case.
+ * Receive side is still open. Just close
+ * the send side.
+ */
+ socantsendmore(so);
+ } else {
+ /* SHUT_RDWR case */
+ if (so->so_state & SS_ISCONNECTED) {
+ /* Send a FIN to peer */
+ sb = &so->so_snd;
+ SOCKBUF_LOCK(sb);
+ (void) hvsock_send_data(pcb->chan, NULL, 0, sb);
+ SOCKBUF_UNLOCK(sb);
+
+ soisdisconnecting(so);
+ }
+ }
+
+ return (0);
+}
+
+/* In the VM, we support Hyper-V Sockets with AF_HYPERV, and the endpoint is
+ * <port> (see struct sockaddr_hvs).
+ *
+ * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
+ * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
+ * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
+ * the below sockaddr:
+ *
+ * struct SOCKADDR_HV
+ * {
+ * ADDRESS_FAMILY Family;
+ * USHORT Reserved;
+ * GUID VmId;
+ * GUID ServiceId;
+ * };
+ * Note: VmID is not used by FreeBSD VM and actually it isn't transmitted via
+ * VMBus, because here it's obvious the host and the VM can easily identify
+ * each other. Though the VmID is useful on the host, especially in the case
+ * of Windows container, FreeBSD VM doesn't need it at all.
+ *
+ * To be compatible with similar infrastructure in Linux VMs, we have
+ * to limit the available GUID space of SOCKADDR_HV so that we can create
+ * a mapping between FreeBSD AF_HYPERV port and SOCKADDR_HV Service GUID.
+ * The rule of writing Hyper-V Sockets apps on the host and in FreeBSD VM is:
+ *
+ ****************************************************************************
+ * The only valid Service GUIDs, from the perspectives of both the host and *
+ * FreeBSD VM, that can be connected by the other end, must conform to this *
+ * format: <port>-facb-11e6-bd58-64006a7986d3. *
+ ****************************************************************************
+ *
+ * When we write apps on the host to connect(), the GUID ServiceID is used.
+ * When we write apps in FreeBSD VM to connect(), we only need to specify the
+ * port and the driver will form the GUID and use that to request the host.
+ *
+ * From the perspective of FreeBSD VM, the remote ephemeral port (i.e. the
+ * auto-generated remote port for a connect request initiated by the host's
+ * connect()) is set to HVADDR_PORT_UNKNOWN, which is not realy used on the
+ * FreeBSD guest.
+ */
+
+/*
+ * Older HyperV hosts (vmbus version 'VMBUS_VERSION_WIN10' or before)
+ * restricts HyperV socket ring buffer size to six 4K pages. Newer
+ * HyperV hosts doen't have this limit.
+ */
+#define HVS_RINGBUF_RCV_SIZE (PAGE_SIZE * 6)
+#define HVS_RINGBUF_SND_SIZE (PAGE_SIZE * 6)
+#define HVS_RINGBUF_MAX_SIZE (PAGE_SIZE * 64)
+
+struct hvsock_sc {
+ device_t dev;
+ struct hvs_pcb *pcb;
+ struct vmbus_channel *channel;
+};
+
+static bool
+hvsock_chan_readable(struct vmbus_channel *chan)
+{
+ uint32_t readable = vmbus_chan_read_available(chan);
+
+ return (readable >= HVSOCK_PKT_LEN(0));
+}
+
+static void
+hvsock_chan_cb(struct vmbus_channel *chan, void *context)
+{
+ struct hvs_pcb *pcb = (struct hvs_pcb *) context;
+ struct socket *so;
+ uint32_t canwrite;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: host send us a wakeup on rb data, pcb = %p\n",
+ __func__, pcb);
+
+ /*
+ * Check if the socket is still attached and valid.
+ * Here we know channel is still open. Need to make
+ * sure the socket has not been closed or freed.
+ */
+ (void) hvs_trans_lock();
+ so = hsvpcb2so(pcb);
+
+ if (pcb->chan != NULL && so != NULL) {
+ /*
+ * Wake up reader if there are data to read.
+ */
+ SOCKBUF_LOCK(&(so)->so_rcv);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: read available = %u\n", __func__,
+ vmbus_chan_read_available(pcb->chan));
+
+ if (hvsock_chan_readable(pcb->chan))
+ sorwakeup_locked(so);
+ else
+ SOCKBUF_UNLOCK(&(so)->so_rcv);
+
+ /*
+ * Wake up sender if space becomes available to write.
+ */
+ SOCKBUF_LOCK(&(so)->so_snd);
+ canwrite = hvsock_canwrite_check(pcb);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: canwrite = %u\n", __func__, canwrite);
+
+ if (canwrite > 0) {
+ sowwakeup_locked(so);
+ } else {
+ SOCKBUF_UNLOCK(&(so)->so_snd);
+ }
+ }
+
+ hvs_trans_unlock();
+
+ return;
+}
+
+static int
+hvsock_br_callback(void *datap, int cplen, void *cbarg)
+{
+ struct hvs_callback_arg *arg = (struct hvs_callback_arg *)cbarg;
+ struct uio *uio = arg->uio;
+ struct sockbuf *sb = arg->sb;
+ int error = 0;
+
+ if (cbarg == NULL || datap == NULL)
+ return (EINVAL);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: called, uio_rw = %s, uio_resid = %lu, cplen = %u, "
+ "datap = %p\n",
+ __func__, (uio->uio_rw == UIO_READ) ? "read from br":"write to br",
+ uio->uio_resid, cplen, datap);
+
+ if (sb)
+ SOCKBUF_UNLOCK(sb);
+
+ error = uiomove(datap, cplen, uio);
+
+ if (sb)
+ SOCKBUF_LOCK(sb);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: after uiomove, uio_resid = %lu, error = %d\n",
+ __func__, uio->uio_resid, error);
+
+ return (error);
+}
+
+static int
+hvsock_send_data(struct vmbus_channel *chan, struct uio *uio,
+ uint32_t to_write, struct sockbuf *sb)
+{
+ struct hvs_pkt_header hvs_pkt;
+ int hvs_pkthlen, hvs_pktlen, pad_pktlen, hlen, error = 0;
+ uint64_t pad = 0;
+ struct iovec iov[3];
+ struct hvs_callback_arg cbarg;
+
+ if (chan == NULL)
+ return (ENOTCONN);
+
+ hlen = sizeof(struct vmbus_chanpkt_hdr);
+ hvs_pkthlen = sizeof(struct hvs_pkt_header);
+ hvs_pktlen = hvs_pkthlen + to_write;
+ pad_pktlen = VMBUS_CHANPKT_TOTLEN(hvs_pktlen);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: hlen = %u, hvs_pkthlen = %u, hvs_pktlen = %u, "
+ "pad_pktlen = %u, data_len = %u\n",
+ __func__, hlen, hvs_pkthlen, hvs_pktlen, pad_pktlen, to_write);
+
+ hvs_pkt.chan_pkt_hdr.cph_type = VMBUS_CHANPKT_TYPE_INBAND;
+ hvs_pkt.chan_pkt_hdr.cph_flags = 0;
+ VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_hlen, hlen);
+ VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_tlen, pad_pktlen);
+ hvs_pkt.chan_pkt_hdr.cph_xactid = 0;
+
+ hvs_pkt.vmpipe_pkt_hdr.vmpipe_pkt_type = 1;
+ hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size = to_write;
+
+ cbarg.uio = uio;
+ cbarg.sb = sb;
+
+ if (uio && to_write > 0) {
+ iov[0].iov_base = &hvs_pkt;
+ iov[0].iov_len = hvs_pkthlen;
+ iov[1].iov_base = NULL;
+ iov[1].iov_len = to_write;
+ iov[2].iov_base = &pad;
+ iov[2].iov_len = pad_pktlen - hvs_pktlen;
+
+ error = vmbus_chan_iov_send(chan, iov, 3,
+ hvsock_br_callback, &cbarg);
+ } else {
+ if (to_write == 0) {
+ iov[0].iov_base = &hvs_pkt;
+ iov[0].iov_len = hvs_pkthlen;
+ iov[1].iov_base = &pad;
+ iov[1].iov_len = pad_pktlen - hvs_pktlen;
+ error = vmbus_chan_iov_send(chan, iov, 2, NULL, NULL);
+ }
+ }
+
+ if (error) {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: error = %d\n", __func__, error);
+ }
+
+ return (error);
+}
+
+/*
+ * Check if we have data on current ring buffer to read
+ * or not. If not, advance the ring buffer read index to
+ * next packet. Update the recev_data_len and recev_data_off
+ * to new value.
+ * Return the number of bytes can read.
+ */
+static uint32_t
+hvsock_canread_check(struct hvs_pcb *pcb)
+{
+ uint32_t advance;
+ uint32_t tlen, hlen, dlen;
+ uint32_t bytes_canread = 0;
+ int error;
+
+ if (pcb == NULL || pcb->chan == NULL) {
+ pcb->so->so_error = EIO;
+ return (0);
+ }
+
+ /* Still have data not read yet on current packet */
+ if (pcb->recv_data_len > 0)
+ return (pcb->recv_data_len);
+
+ if (pcb->rb_init)
+ advance =
+ VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen);
+ else
+ advance = 0;
+
+ bytes_canread = vmbus_chan_read_available(pcb->chan);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: bytes_canread on br = %u, advance = %u\n",
+ __func__, bytes_canread, advance);
+
+ if (pcb->rb_init && bytes_canread == (advance + sizeof(uint64_t))) {
+ /*
+ * Nothing to read. Need to advance the rindex before
+ * calling sbwait, so host knows to wake us up when data
+ * is available to read on rb.
+ */
+ error = vmbus_chan_recv_idxadv(pcb->chan, advance);
+ if (error) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: after calling vmbus_chan_recv_idxadv, "
+ "got error = %d\n", __func__, error);
+ return (0);
+ } else {
+ pcb->rb_init = false;
+ pcb->recv_data_len = 0;
+ pcb->recv_data_off = 0;
+ bytes_canread = vmbus_chan_read_available(pcb->chan);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: advanced %u bytes, "
+ " bytes_canread on br now = %u\n",
+ __func__, advance, bytes_canread);
+
+ if (bytes_canread == 0)
+ return (0);
+ else
+ advance = 0;
+ }
+ }
+
+ if (bytes_canread <
+ advance + (sizeof(struct hvs_pkt_header) + sizeof(uint64_t)))
+ return (0);
+
+ error = vmbus_chan_recv_peek(pcb->chan, &pcb->hvs_pkt,
+ sizeof(struct hvs_pkt_header), advance);
+
+ /* Don't have anything to read */
+ if (error) {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: after calling vmbus_chan_recv_peek, got error = %d\n",
+ __func__, error);
+ return (0);
+ }
+
+ /*
+ * We just read in a new packet header. Do some sanity checks.
+ */
+ tlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen);
+ hlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_hlen);
+ dlen = pcb->hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size;
+ if (__predict_false(hlen < sizeof(struct vmbus_chanpkt_hdr)) ||
+ __predict_false(hlen > tlen) ||
+ __predict_false(tlen < dlen + sizeof(struct hvs_pkt_header))) {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "invalid tlen(%u), hlen(%u) or dlen(%u)\n",
+ tlen, hlen, dlen);
+ pcb->so->so_error = EIO;
+ return (0);
+ }
+ if (pcb->rb_init == false)
+ pcb->rb_init = true;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "Got new pkt tlen(%u), hlen(%u) or dlen(%u)\n",
+ tlen, hlen, dlen);
+
+ /* The other side has sent a close FIN */
+ if (dlen == 0) {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: Received FIN from other side\n", __func__);
+ /* inform the caller by seting so_error to ESHUTDOWN */
+ pcb->so->so_error = ESHUTDOWN;
+ }
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: canread on receive ring is %u \n", __func__, dlen);
+
+ pcb->recv_data_len = dlen;
+ pcb->recv_data_off = 0;
+
+ return (pcb->recv_data_len);
+}
+
+static uint32_t
+hvsock_canwrite_check(struct hvs_pcb *pcb)
+{
+ uint32_t writeable;
+ uint32_t ret;
+
+ if (pcb == NULL || pcb->chan == NULL)
+ return (0);
+
+ writeable = vmbus_chan_write_available(pcb->chan);
+
+ /*
+ * We must always reserve a 0-length-payload packet for the FIN.
+ */
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: writeable is %u, should be greater than %lu\n",
+ __func__, writeable, HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0));
+
+ if (writeable < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) {
+ /*
+ * The Tx ring seems full.
+ */
+ return (0);
+ }
+
+ ret = writeable - HVSOCK_PKT_LEN(0) - HVSOCK_PKT_LEN(0);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: available size is %u\n", __func__, rounddown2(ret, 8));
+
+ return (rounddown2(ret, 8));
+}
+
+static void
+hvsock_set_chan_pending_send_size(struct vmbus_channel *chan)
+{
+ vmbus_chan_set_pending_send_size(chan,
+ HVSOCK_PKT_LEN(HVSOCK_SEND_BUF_SZ));
+}
+
+static int
+hvsock_open_channel(struct vmbus_channel *chan, struct socket *so)
+{
+ unsigned int rcvbuf, sndbuf;
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ int ret;
+
+ if (vmbus_current_version < VMBUS_VERSION_WIN10_V5) {
+ sndbuf = HVS_RINGBUF_SND_SIZE;
+ rcvbuf = HVS_RINGBUF_RCV_SIZE;
+ } else {
+ sndbuf = MAX(so->so_snd.sb_hiwat, HVS_RINGBUF_SND_SIZE);
+ sndbuf = MIN(sndbuf, HVS_RINGBUF_MAX_SIZE);
+ sndbuf = rounddown2(sndbuf, PAGE_SIZE);
+ rcvbuf = MAX(so->so_rcv.sb_hiwat, HVS_RINGBUF_RCV_SIZE);
+ rcvbuf = MIN(rcvbuf, HVS_RINGBUF_MAX_SIZE);
+ rcvbuf = rounddown2(rcvbuf, PAGE_SIZE);
+ }
+
+ /*
+ * Can only read whatever user provided size of data
+ * from ring buffer. Turn off batched reading.
+ */
+ vmbus_chan_set_readbatch(chan, false);
+
+ ret = vmbus_chan_open(chan, sndbuf, rcvbuf, NULL, 0,
+ hvsock_chan_cb, pcb);
+
+ if (ret != 0) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: failed to open hvsock channel, sndbuf = %u, "
+ "rcvbuf = %u\n", __func__, sndbuf, rcvbuf);
+ } else {
+ HVSOCK_DBG(HVSOCK_DBG_INFO,
+ "%s: hvsock channel opened, sndbuf = %u, i"
+ "rcvbuf = %u\n", __func__, sndbuf, rcvbuf);
+ /*
+ * Se the pending send size so to receive wakeup
+ * signals from host when there is enough space on
+ * rx buffer ring to write.
+ */
+ hvsock_set_chan_pending_send_size(chan);
+ }
+
+ return ret;
+}
+
+/*
+ * Guest is listening passively on the socket. Open channel and
+ * create a new socket for the conneciton.
+ */
+static void
+hvsock_open_conn_passive(struct vmbus_channel *chan, struct socket *so,
+ struct hvsock_sc *sc)
+{
+ struct socket *new_so;
+ struct hvs_pcb *new_pcb, *pcb;
+ int error;
+
+ /* Do nothing if socket is not listening */
+ if ((so->so_options & SO_ACCEPTCONN) == 0) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: socket is not a listening one\n", __func__);
+ return;
+ }
+
+ /*
+ * Create a new socket. This will call pru_attach to complete
+ * the socket initialization and put the new socket onto
+ * listening socket's sol_incomp list, waiting to be promoted
+ * to sol_comp list.
+ * The new socket created has ref count 0. There is no other
+ * thread that changes the state of this new one at the
+ * moment, so we don't need to hold its lock while opening
+ * channel and filling out its pcb information.
+ */
+ new_so = sonewconn(so, 0);
+ if (!new_so)
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: creating new socket failed\n", __func__);
+
+ /*
+ * Now open the vmbus channel. If it fails, the socket will be
+ * on the listening socket's sol_incomp queue until it is
+ * replaced and aborted.
+ */
+ error = hvsock_open_channel(chan, new_so);
+ if (error) {
+ new_so->so_error = error;
+ return;
+ }
+
+ pcb = so->so_pcb;
+ new_pcb = new_so->so_pcb;
+
+ hvs_addr_set(&(new_pcb->local_addr), pcb->local_addr.hvs_port);
+ /* Remote port is unknown to guest in this type of conneciton */
+ hvs_addr_set(&(new_pcb->remote_addr), HVADDR_PORT_UNKNOWN);
+ new_pcb->chan = chan;
+ new_pcb->recv_data_len = 0;
+ new_pcb->recv_data_off = 0;
+ new_pcb->rb_init = false;
+
+ new_pcb->vm_srv_id = *vmbus_chan_guid_type(chan);
+ new_pcb->host_srv_id = *vmbus_chan_guid_inst(chan);
+
+ hvs_insert_socket_on_list(new_so, HVS_LIST_CONNECTED);
+
+ sc->pcb = new_pcb;
+
+ /*
+ * Change the socket state to SS_ISCONNECTED. This will promote
+ * the socket to sol_comp queue and wake up the thread which
+ * is accepting connection.
+ */
+ soisconnected(new_so);
+}
+
+
+/*
+ * Guest is actively connecting to host.
+ */
+static void
+hvsock_open_conn_active(struct vmbus_channel *chan, struct socket *so)
+{
+ struct hvs_pcb *pcb;
+ int error;
+
+ error = hvsock_open_channel(chan, so);
+ if (error) {
+ so->so_error = error;
+ return;
+ }
+
+ pcb = so->so_pcb;
+ pcb->chan = chan;
+ pcb->recv_data_len = 0;
+ pcb->recv_data_off = 0;
+ pcb->rb_init = false;
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ __hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
+ __hvs_insert_socket_on_list(so, HVS_LIST_CONNECTED);
+ mtx_unlock(&hvs_trans_socks_mtx);
+
+ /*
+ * Change the socket state to SS_ISCONNECTED. This will wake up
+ * the thread sleeping in connect call.
+ */
+ soisconnected(so);
+}
+
+static void
+hvsock_open_connection(struct vmbus_channel *chan, struct hvsock_sc *sc)
+{
+ struct hyperv_guid *inst_guid, *type_guid;
+ bool conn_from_host;
+ struct sockaddr_hvs addr;
+ struct socket *so;
+ struct hvs_pcb *pcb;
+
+ type_guid = (struct hyperv_guid *) vmbus_chan_guid_type(chan);
+ inst_guid = (struct hyperv_guid *) vmbus_chan_guid_inst(chan);
+ conn_from_host = vmbus_chan_is_hvs_conn_from_host(chan);
+
+ HVSOCK_DBG(HVSOCK_DBG_INFO, "type_guid is ");
+ hvsock_print_guid(type_guid);
+ HVSOCK_DBG(HVSOCK_DBG_INFO, "inst_guid is ");
+ hvsock_print_guid(inst_guid);
+ HVSOCK_DBG(HVSOCK_DBG_INFO, "connection %s host\n",
+ (conn_from_host == true ) ? "from" : "to");
+
+ /*
+ * The listening port should be in [0, MAX_LISTEN_PORT]
+ */
+ if (!is_valid_srv_id(type_guid))
+ return;
+
+ /*
+ * There should be a bound socket already created no matter
+ * it is a passive or active connection.
+ * For host initiated connection (passive on guest side),
+ * the type_guid contains the port which guest is bound and
+ * listening.
+ * For the guest initiated connection (active on guest side),
+ * the inst_guid contains the port that guest has auto bound
+ * to.
+ */
+ hvs_addr_init(&addr, conn_from_host ? type_guid : inst_guid);
+ so = hvs_find_socket_on_list(&addr, HVS_LIST_BOUND);
+ if (!so) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: no bound socket found for port %u\n",
+ __func__, addr.hvs_port);
+ return;
+ }
+
+ if (conn_from_host) {
+ hvsock_open_conn_passive(chan, so, sc);
+ } else {
+ (void) hvs_trans_lock();
+ pcb = so->so_pcb;
+ if (pcb && pcb->so) {
+ sc->pcb = so2hvspcb(so);
+ hvsock_open_conn_active(chan, so);
+ } else {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: channel detached before open\n", __func__);
+ }
+ hvs_trans_unlock();
+ }
+
+}
+
+static int
+hvsock_probe(device_t dev)
+{
+ struct vmbus_channel *channel = vmbus_get_channel(dev);
+
+ if (!channel || !vmbus_chan_is_hvs(channel)) {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "hvsock_probe called but not a hvsock channel id %u\n",
+ vmbus_chan_id(channel));
+
+ return ENXIO;
+ } else {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "hvsock_probe got a hvsock channel id %u\n",
+ vmbus_chan_id(channel));
+
+ return BUS_PROBE_DEFAULT;
+ }
+}
+
+static int
+hvsock_attach(device_t dev)
+{
+ struct vmbus_channel *channel = vmbus_get_channel(dev);
+ struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_attach called.\n");
+
+ hvsock_open_connection(channel, sc);
+
+ /*
+ * Always return success. On error the host will rescind the device
+ * in 30 seconds and we can do cleanup at that time in
+ * vmbus_chan_msgproc_chrescind().
+ */
+ return (0);
+}
+
+static int
+hvsock_detach(device_t dev)
+{
+ struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev);
+ struct socket *so;
+ int error, retry;
+
+ if (bootverbose)
+ device_printf(dev, "hvsock_detach called.\n");
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_detach called.\n");
+
+ if (sc->pcb != NULL) {
+ (void) hvs_trans_lock();
+
+ so = hsvpcb2so(sc->pcb);
+ if (so) {
+ /* Close the connection */
+ if (so->so_state &
+ (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
+ soisdisconnected(so);
+ }
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ __hvs_remove_pcb_from_list(sc->pcb,
+ HVS_LIST_BOUND | HVS_LIST_CONNECTED);
+ mtx_unlock(&hvs_trans_socks_mtx);
+
+ /*
+ * Close channel while no reader and sender are working
+ * on the buffer rings.
+ */
+ if (so) {
+ retry = 0;
+ while ((error = sblock(&so->so_rcv, 0)) ==
+ EWOULDBLOCK) {
+ /*
+ * Someone is reading, rx br is busy
+ */
+ soisdisconnected(so);
+ DELAY(500);
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "waiting for rx reader to exit, "
+ "retry = %d\n", retry++);
+ }
+ retry = 0;
+ while ((error = sblock(&so->so_snd, 0)) ==
+ EWOULDBLOCK) {
+ /*
+ * Someone is sending, tx br is busy
+ */
+ soisdisconnected(so);
+ DELAY(500);
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "waiting for tx sender to exit, "
+ "retry = %d\n", retry++);
+ }
+ }
+
+
+ bzero(sc->pcb, sizeof(struct hvs_pcb));
+ free(sc->pcb, M_HVSOCK);
+ sc->pcb = NULL;
+
+ if (so) {
+ sbunlock(&so->so_rcv);
+ sbunlock(&so->so_snd);
+ so->so_pcb = NULL;
+ }
+
+ hvs_trans_unlock();
+ }
+
+ vmbus_chan_close(vmbus_get_channel(dev));
+
+ return (0);
+}
+
+static device_method_t hvsock_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, hvsock_probe),
+ DEVMETHOD(device_attach, hvsock_attach),
+ DEVMETHOD(device_detach, hvsock_detach),
+ DEVMETHOD_END
+};
+
+static driver_t hvsock_driver = {
+ "hv_sock",
+ hvsock_methods,
+ sizeof(struct hvsock_sc)
+};
+
+static devclass_t hvsock_devclass;
+
+DRIVER_MODULE(hvsock, vmbus, hvsock_driver, hvsock_devclass, NULL, NULL);
+MODULE_VERSION(hvsock, 1);
+MODULE_DEPEND(hvsock, vmbus, 1, 1, 1);
Index: head/sys/dev/hyperv/include/vmbus.h
===================================================================
--- head/sys/dev/hyperv/include/vmbus.h
+++ head/sys/dev/hyperv/include/vmbus.h
@@ -31,6 +31,7 @@
#include <sys/param.h>
#include <sys/bus.h>
+#include <sys/_iovec.h>
/*
* VMBUS version is 32 bit, upper 16 bit for major_number and lower
@@ -130,6 +131,7 @@
struct taskqueue;
typedef void (*vmbus_chan_callback_t)(struct vmbus_channel *, void *);
+typedef int (*vmbus_br_copy_callback_t)(void *, int, void *);
static __inline struct vmbus_channel *
vmbus_get_channel(device_t dev)
@@ -205,6 +207,14 @@
int vmbus_chan_recv_pkt(struct vmbus_channel *chan,
struct vmbus_chanpkt_hdr *pkt, int *pktlen);
+int vmbus_chan_recv_idxadv(struct vmbus_channel *chan,
+ uint32_t advance);
+int vmbus_chan_recv_peek(struct vmbus_channel *chan,
+ void *data, int data_len, uint32_t advance);
+int vmbus_chan_recv_peek_call(struct vmbus_channel *chan,
+ int data_len, uint32_t skip,
+ vmbus_br_copy_callback_t cb, void *cbarg);
+
int vmbus_chan_send(struct vmbus_channel *chan, uint16_t type,
uint16_t flags, void *data, int dlen, uint64_t xactid);
int vmbus_chan_send_sglist(struct vmbus_channel *chan,
@@ -213,13 +223,30 @@
int vmbus_chan_send_prplist(struct vmbus_channel *chan,
struct vmbus_gpa_range *prp, int prp_cnt, void *data,
int dlen, uint64_t xactid);
+int vmbus_chan_iov_send(struct vmbus_channel *chan,
+ const struct iovec iov[], int iovlen,
+ vmbus_br_copy_callback_t cb, void *cbarg);
+uint32_t vmbus_chan_write_available(struct vmbus_channel *chan);
+uint32_t vmbus_chan_read_available(struct vmbus_channel *chan);
+bool vmbus_chan_write_signal(struct vmbus_channel *chan,
+ int32_t min_signal_size);
+void vmbus_chan_set_pending_send_size(struct vmbus_channel *chan,
+ uint32_t size);
uint32_t vmbus_chan_id(const struct vmbus_channel *chan);
uint32_t vmbus_chan_subidx(const struct vmbus_channel *chan);
bool vmbus_chan_is_primary(const struct vmbus_channel *chan);
bool vmbus_chan_is_revoked(const struct vmbus_channel *chan);
-const struct hyperv_guid *
- vmbus_chan_guid_inst(const struct vmbus_channel *chan);
+bool vmbus_chan_is_hvs(const struct vmbus_channel *chan);
+bool vmbus_chan_is_hvs_conn_from_host(
+ const struct vmbus_channel *chan);
+int vmbus_req_tl_connect(struct hyperv_guid *,
+ struct hyperv_guid *);
+
+struct hyperv_guid *
+ vmbus_chan_guid_type(struct vmbus_channel *chan);
+struct hyperv_guid *
+ vmbus_chan_guid_inst(struct vmbus_channel *chan);
int vmbus_chan_prplist_nelem(int br_size, int prpcnt_max,
int dlen_max);
bool vmbus_chan_rx_empty(const struct vmbus_channel *chan);
Index: head/sys/dev/hyperv/vmbus/vmbus.c
===================================================================
--- head/sys/dev/hyperv/vmbus/vmbus.c
+++ head/sys/dev/hyperv/vmbus/vmbus.c
@@ -365,10 +365,46 @@
uint32_t gpadl;
again:
- gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
+ gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
if (gpadl == 0)
goto again;
return (gpadl);
+}
+
+/* Used for Hyper-V socket when guest client connects to host */
+int
+vmbus_req_tl_connect(struct hyperv_guid *guest_srv_id,
+ struct hyperv_guid *host_srv_id)
+{
+ struct vmbus_softc *sc = vmbus_get_softc();
+ struct vmbus_chanmsg_tl_connect *req;
+ struct vmbus_msghc *mh;
+ int error;
+
+ if (!sc)
+ return ENXIO;
+
+ mh = vmbus_msghc_get(sc, sizeof(*req));
+ if (mh == NULL) {
+ device_printf(sc->vmbus_dev,
+ "can not get msg hypercall for tl connect\n");
+ return ENXIO;
+ }
+
+ req = vmbus_msghc_dataptr(mh);
+ req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_TL_CONN;
+ req->guest_endpoint_id = *guest_srv_id;
+ req->host_service_id = *host_srv_id;
+
+ error = vmbus_msghc_exec_noresult(mh);
+ vmbus_msghc_put(sc, mh);
+
+ if (error) {
+ device_printf(sc->vmbus_dev,
+ "tl connect msg hypercall failed\n");
+ }
+
+ return error;
}
static int
Index: head/sys/dev/hyperv/vmbus/vmbus_br.c
===================================================================
--- head/sys/dev/hyperv/vmbus/vmbus_br.c
+++ head/sys/dev/hyperv/vmbus/vmbus_br.c
@@ -52,18 +52,23 @@
vmbus_br_sysctl_state(SYSCTL_HANDLER_ARGS)
{
const struct vmbus_br *br = arg1;
- uint32_t rindex, windex, imask, ravail, wavail;
+ uint32_t rindex, windex, imask, psndsz, fvalue, ravail, wavail;
+ uint64_t intrcnt;
char state[256];
+ intrcnt = br->vbr_intrcnt;
rindex = br->vbr_rindex;
windex = br->vbr_windex;
imask = br->vbr_imask;
+ psndsz = br->vbr_psndsz;
+ fvalue = br->vbr_fvalue;
wavail = VMBUS_BR_WAVAIL(rindex, windex, br->vbr_dsize);
ravail = br->vbr_dsize - wavail;
snprintf(state, sizeof(state),
- "rindex:%u windex:%u imask:%u ravail:%u wavail:%u",
- rindex, windex, imask, ravail, wavail);
+ "intrcnt:%lu rindex:%u windex:%u imask:%u psndsz:%u fvalue:%u "
+ "ravail:%u wavail:%u",
+ intrcnt, rindex, windex, imask, psndsz, fvalue, ravail, wavail);
return sysctl_handle_string(oidp, state, sizeof(state), req);
}
@@ -76,9 +81,11 @@
#define BR_STATE_RIDX 0
#define BR_STATE_WIDX 1
#define BR_STATE_IMSK 2
-#define BR_STATE_RSPC 3
-#define BR_STATE_WSPC 4
-#define BR_STATE_MAX 5
+#define BR_STATE_PSSZ 3
+#define BR_STATE_FVAL 4
+#define BR_STATE_RSPC 5
+#define BR_STATE_WSPC 6
+#define BR_STATE_MAX 7
const struct vmbus_br *br = arg1;
uint32_t rindex, windex, wavail, state[BR_STATE_MAX];
@@ -90,6 +97,8 @@
state[BR_STATE_RIDX] = rindex;
state[BR_STATE_WIDX] = windex;
state[BR_STATE_IMSK] = br->vbr_imask;
+ state[BR_STATE_PSSZ] = br->vbr_psndsz;
+ state[BR_STATE_FVAL] = br->vbr_fvalue;
state[BR_STATE_WSPC] = wavail;
state[BR_STATE_RSPC] = br->vbr_dsize - wavail;
@@ -140,6 +149,12 @@
}
uint32_t
+vmbus_rxbr_available(const struct vmbus_rxbr *rbr)
+{
+ return (vmbus_rxbr_avail(rbr));
+}
+
+uint32_t
vmbus_rxbr_intr_unmask(struct vmbus_rxbr *rbr)
{
rbr->rxbr_imask = 0;
@@ -178,6 +193,40 @@
vmbus_br_setup(&rbr->rxbr, buf, blen);
}
+static __inline boolean_t
+vmbus_rxbr_need_signal(const struct vmbus_rxbr *rbr, uint32_t bytes_read)
+{
+ uint32_t pending_snd_sz, canwrite_size;
+
+ /* No need to signal if host doesn't want us to */
+ if (!rbr->rxbr_fpsndsz)
+ return false;
+
+ mb();
+
+ pending_snd_sz = rbr->rxbr_psndsz;
+ /* No need to signal if host sets pending_snd_sz to 0 */
+ if (!pending_snd_sz)
+ return false;
+
+ mb();
+
+ canwrite_size = rbr->rxbr_dsize - vmbus_rxbr_avail(rbr);
+
+ /* No need to signal if br already has enough space before read */
+ if (canwrite_size - bytes_read > pending_snd_sz)
+ return false;
+
+ /*
+ * No need to signal if still doesn't have enough space
+ * asked by host
+ */
+ if (canwrite_size <= pending_snd_sz)
+ return false;
+
+ return true;
+}
+
void
vmbus_txbr_init(struct vmbus_txbr *tbr)
{
@@ -194,8 +243,25 @@
vmbus_txbr_setup(struct vmbus_txbr *tbr, void *buf, int blen)
{
vmbus_br_setup(&tbr->txbr, buf, blen);
+
+ /* Set feature bit enabling flow control */
+ tbr->txbr_fpsndsz = 1;
}
+uint32_t
+vmbus_txbr_get_imask(const struct vmbus_txbr *tbr)
+{
+ mb();
+
+ return(tbr->txbr_imask);
+}
+
+void
+vmbus_txbr_set_pending_snd_sz(struct vmbus_txbr *tbr, uint32_t size)
+{
+ tbr->txbr_psndsz = size;
+}
+
/*
* When we write to the ring buffer, check if the host needs to be
* signaled.
@@ -260,7 +326,117 @@
return VMBUS_BR_IDXINC(windex, cplen, br_dsize);
}
+static __inline uint32_t
+vmbus_txbr_copyto_call(const struct vmbus_txbr *tbr, uint32_t windex,
+ uint32_t cplen, vmbus_br_copy_callback_t cb, void *cbarg, int *ret)
+{
+ uint8_t *br_data = tbr->txbr_data;
+ uint32_t br_dsize = tbr->txbr_dsize;
+ int err = 0;
+
+ if (cplen > br_dsize - windex) {
+ uint32_t fraglen = br_dsize - windex;
+
+ /* Wrap-around detected */
+ err = cb((void *)(br_data + windex), fraglen, cbarg);
+ if (!err)
+ err = cb((void *)br_data, cplen - fraglen, cbarg);
+ } else {
+ err = cb((void *)(br_data + windex), cplen, cbarg);
+ }
+
+ *ret = err;
+
+ return VMBUS_BR_IDXINC(windex, cplen, br_dsize);
+}
+
+uint32_t
+vmbus_txbr_available(const struct vmbus_txbr *tbr)
+{
+ return (vmbus_txbr_avail(tbr));
+}
+
/*
+ * NOTE:
+ * Not holding lock when calling user provided callback routine.
+ * Caller should hold lock to serialize ring buffer accesses.
+ */
+int
+vmbus_txbr_write_call(struct vmbus_txbr *tbr,
+ const struct iovec iov[], int iovlen,
+ vmbus_br_copy_callback_t cb, void *cbarg,
+ boolean_t *need_sig)
+{
+ uint32_t old_windex, windex, total;
+ uint64_t save_windex;
+ int i;
+ int cb_ret = 0;
+
+ total = 0;
+ for (i = 0; i < iovlen; i++)
+ total += iov[i].iov_len;
+ total += sizeof(save_windex);
+
+
+ /*
+ * NOTE:
+ * If this write is going to make br_windex same as br_rindex,
+ * i.e. the available space for write is same as the write size,
+ * we can't do it then, since br_windex == br_rindex means that
+ * the bufring is empty.
+ */
+ if (vmbus_txbr_avail(tbr) <= total) {
+ return (EAGAIN);
+ }
+
+ /* Save br_windex for later use */
+ old_windex = tbr->txbr_windex;
+
+ /*
+ * Copy the scattered channel packet to the TX bufring.
+ */
+ windex = old_windex;
+ for (i = 0; i < iovlen; i++) {
+ if (iov[i].iov_base != NULL) {
+ windex = vmbus_txbr_copyto(tbr, windex,
+ iov[i].iov_base, iov[i].iov_len);
+ } else if (cb != NULL) {
+ windex = vmbus_txbr_copyto_call(tbr, windex,
+ iov[i].iov_len, cb, cbarg, &cb_ret);
+ /*
+ * If callback fails, return without updating
+ * write index.
+ */
+ if (cb_ret)
+ return (cb_ret);
+ }
+ }
+
+ mtx_lock_spin(&tbr->txbr_lock);
+
+ /*
+ * Set the offset of the current channel packet.
+ */
+ save_windex = ((uint64_t)old_windex) << 32;
+ windex = vmbus_txbr_copyto(tbr, windex, &save_windex,
+ sizeof(save_windex));
+
+ /*
+ * Update the write index _after_ the channel packet
+ * is copied.
+ */
+ __compiler_membar();
+ tbr->txbr_windex = windex;
+
+ mtx_unlock_spin(&tbr->txbr_lock);
+
+ if (need_sig)
+ *need_sig = vmbus_txbr_need_signal(tbr, old_windex);
+
+ return (0);
+}
+
+/*
* Write scattered channel packet to TX bufring.
*
* The offset of this channel packet is written as a 64bits value
@@ -346,6 +522,27 @@
return VMBUS_BR_IDXINC(rindex, cplen, br_dsize);
}
+static __inline uint32_t
+vmbus_rxbr_copyfrom_call(const struct vmbus_rxbr *rbr, uint32_t rindex,
+ int cplen, vmbus_br_copy_callback_t cb, void *cbarg)
+{
+ uint8_t *br_data = rbr->rxbr_data;
+ uint32_t br_dsize = rbr->rxbr_dsize;
+ int error = 0;
+
+ if (cplen > br_dsize - rindex) {
+ uint32_t fraglen = br_dsize - rindex;
+
+ /* Wrap-around detected. */
+ error = cb((void *)(br_data + rindex), fraglen, cbarg);
+ if (!error)
+ error = cb((void *)br_data, cplen - fraglen, cbarg);
+ } else {
+ error = cb((void *)(br_data + rindex), cplen, cbarg);
+ }
+ return (error);
+}
+
int
vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen)
{
@@ -362,6 +559,121 @@
vmbus_rxbr_copyfrom(rbr, rbr->rxbr_rindex, data, dlen);
mtx_unlock_spin(&rbr->rxbr_lock);
+
+ return (0);
+}
+
+/*
+ * NOTE:
+ * We only hold spin lock to check the ring buffer space. It is
+ * released before calling user provided callback routine.
+ * Caller should hold lock to serialize ring buffer accesses.
+ */
+int
+vmbus_rxbr_peek_call(struct vmbus_rxbr *rbr, int dlen, uint32_t skip,
+ vmbus_br_copy_callback_t cb, void *cbarg)
+{
+ uint32_t rindex, br_dsize0 = rbr->rxbr_dsize;
+ int ret;
+
+ mtx_lock_spin(&rbr->rxbr_lock);
+ /*
+ * The requested data + skip and the 64bits channel packet
+ * offset should be there at least.
+ */
+ if (vmbus_rxbr_avail(rbr) < skip + dlen + sizeof(uint64_t)) {
+ mtx_unlock_spin(&rbr->rxbr_lock);
+ return (EAGAIN);
+ }
+
+ rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex, skip, br_dsize0);
+ mtx_unlock_spin(&rbr->rxbr_lock);
+
+ ret = vmbus_rxbr_copyfrom_call(rbr, rindex, dlen, cb, cbarg);
+
+ return (ret);
+}
+
+/*
+ * NOTE:
+ * We assume idx_adv == sizeof(channel packet).
+ */
+int
+vmbus_rxbr_idxadv_peek(struct vmbus_rxbr *rbr, void *data, int dlen,
+ uint32_t idx_adv, boolean_t *need_sig)
+{
+ uint32_t rindex, br_dsize = rbr->rxbr_dsize;
+
+ mtx_lock_spin(&rbr->rxbr_lock);
+ /*
+ * Make sure it has enough data to read.
+ */
+ if (vmbus_rxbr_avail(rbr) < idx_adv + sizeof(uint64_t) + dlen) {
+ mtx_unlock_spin(&rbr->rxbr_lock);
+ return (EAGAIN);
+ }
+
+ if (idx_adv > 0) {
+ /*
+ * Advance the read index first, including the channel's 64bit
+ * previous write offset.
+ */
+ rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex,
+ idx_adv + sizeof(uint64_t), br_dsize);
+ __compiler_membar();
+ rbr->rxbr_rindex = rindex;
+ }
+
+ vmbus_rxbr_copyfrom(rbr, rbr->rxbr_rindex, data, dlen);
+
+ mtx_unlock_spin(&rbr->rxbr_lock);
+
+ if (need_sig) {
+ if (idx_adv > 0)
+ *need_sig =
+ vmbus_rxbr_need_signal(rbr, idx_adv +
+ sizeof(uint64_t));
+ else
+ *need_sig = false;
+ }
+
+ return (0);
+}
+
+/*
+ * NOTE:
+ * Just update the RX rb index.
+ */
+int
+vmbus_rxbr_idxadv(struct vmbus_rxbr *rbr, uint32_t idx_adv,
+ boolean_t *need_sig)
+{
+ uint32_t rindex, br_dsize = rbr->rxbr_dsize;
+
+ mtx_lock_spin(&rbr->rxbr_lock);
+ /*
+ * Make sure it has enough space to advance.
+ */
+ if (vmbus_rxbr_avail(rbr) < idx_adv + sizeof(uint64_t)) {
+ mtx_unlock_spin(&rbr->rxbr_lock);
+ return (EAGAIN);
+ }
+
+ /*
+ * Advance the read index, including the channel's 64bit
+ * previous write offset.
+ */
+ rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex,
+ idx_adv + sizeof(uint64_t), br_dsize);
+ __compiler_membar();
+ rbr->rxbr_rindex = rindex;
+
+ mtx_unlock_spin(&rbr->rxbr_lock);
+
+ if (need_sig) {
+ *need_sig =
+ vmbus_rxbr_need_signal(rbr, idx_adv + sizeof(uint64_t));
+ }
return (0);
}
Index: head/sys/dev/hyperv/vmbus/vmbus_brvar.h
===================================================================
--- head/sys/dev/hyperv/vmbus/vmbus_brvar.h
+++ head/sys/dev/hyperv/vmbus/vmbus_brvar.h
@@ -44,6 +44,10 @@
#define vbr_windex vbr->br_windex
#define vbr_rindex vbr->br_rindex
#define vbr_imask vbr->br_imask
+#define vbr_psndsz vbr->br_pending_snd_sz
+#define vbr_fpsndsz vbr->br_feature_bits.feat_pending_snd_sz
+#define vbr_fvalue vbr->br_feature_bits.value
+#define vbr_intrcnt vbr->br_g2h_intr_cnt
#define vbr_data vbr->br_data
struct vmbus_rxbr {
@@ -54,6 +58,10 @@
#define rxbr_windex rxbr.vbr_windex
#define rxbr_rindex rxbr.vbr_rindex
#define rxbr_imask rxbr.vbr_imask
+#define rxbr_psndsz rxbr.vbr_psndsz
+#define rxbr_fpsndsz rxbr.vbr_fpsndsz
+#define rxbr_fvalue rxbr.vbr_fvalue
+#define rxbr_intrcnt rxbr.vbr_intrcnt
#define rxbr_data rxbr.vbr_data
#define rxbr_dsize rxbr.vbr_dsize
@@ -65,6 +73,10 @@
#define txbr_windex txbr.vbr_windex
#define txbr_rindex txbr.vbr_rindex
#define txbr_imask txbr.vbr_imask
+#define txbr_psndsz txbr.vbr_psndsz
+#define txbr_fpsndsz txbr.vbr_fpsndsz
+#define txbr_fvalue txbr.vbr_fvalue
+#define txbr_intrcnt txbr.vbr_intrcnt
#define txbr_data txbr.vbr_data
#define txbr_dsize txbr.vbr_dsize
@@ -118,13 +130,28 @@
int vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen);
int vmbus_rxbr_read(struct vmbus_rxbr *rbr, void *data, int dlen,
uint32_t skip);
+int vmbus_rxbr_idxadv(struct vmbus_rxbr *rbr, uint32_t idx_adv,
+ boolean_t *need_sig);
+int vmbus_rxbr_idxadv_peek(struct vmbus_rxbr *rbr, void *data,
+ int dlen, uint32_t idx_adv, boolean_t *need_sig);
+int vmbus_rxbr_peek_call(struct vmbus_rxbr *rbr, int dlen,
+ uint32_t skip, vmbus_br_copy_callback_t cb, void *cbarg);
void vmbus_rxbr_intr_mask(struct vmbus_rxbr *rbr);
uint32_t vmbus_rxbr_intr_unmask(struct vmbus_rxbr *rbr);
+uint32_t vmbus_rxbr_available(const struct vmbus_rxbr *rbr);
void vmbus_txbr_init(struct vmbus_txbr *tbr);
void vmbus_txbr_deinit(struct vmbus_txbr *tbr);
void vmbus_txbr_setup(struct vmbus_txbr *tbr, void *buf, int blen);
int vmbus_txbr_write(struct vmbus_txbr *tbr,
const struct iovec iov[], int iovlen, boolean_t *need_sig);
+int vmbus_txbr_write_call(struct vmbus_txbr *tbr,
+ const struct iovec iov[], int iovlen,
+ vmbus_br_copy_callback_t cb, void *cbarg,
+ boolean_t *need_sig);
+uint32_t vmbus_txbr_available(const struct vmbus_txbr *tbr);
+uint32_t vmbus_txbr_get_imask(const struct vmbus_txbr *tbr);
+void vmbus_txbr_set_pending_snd_sz(struct vmbus_txbr *tbr,
+ uint32_t size);
#endif /* _VMBUS_BRVAR_H_ */
Index: head/sys/dev/hyperv/vmbus/vmbus_chan.c
===================================================================
--- head/sys/dev/hyperv/vmbus/vmbus_chan.c
+++ head/sys/dev/hyperv/vmbus/vmbus_chan.c
@@ -127,10 +127,11 @@
};
/*
- * Notify host that there are data pending on our TX bufring.
+ * Notify host that there are data pending on our TX bufring or
+ * we have put some data on the TX bufring.
*/
static __inline void
-vmbus_chan_signal_tx(const struct vmbus_channel *chan)
+vmbus_chan_signal(const struct vmbus_channel *chan)
{
atomic_set_long(chan->ch_evtflag, chan->ch_evtflag_mask);
if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF)
@@ -139,6 +140,22 @@
hypercall_signal_event(chan->ch_monprm_dma.hv_paddr);
}
+static __inline void
+vmbus_chan_signal_tx(struct vmbus_channel *chan)
+{
+ chan->ch_txbr.txbr_intrcnt ++;
+
+ vmbus_chan_signal(chan);
+}
+
+static __inline void
+vmbus_chan_signal_rx(struct vmbus_channel *chan)
+{
+ chan->ch_rxbr.rxbr_intrcnt ++;
+
+ vmbus_chan_signal(chan);
+}
+
static void
vmbus_chan_ins_prilist(struct vmbus_softc *sc, struct vmbus_channel *chan)
{
@@ -1012,7 +1029,60 @@
taskqueue_drain(chan->ch_tq, &chan->ch_task);
}
+uint32_t
+vmbus_chan_write_available(struct vmbus_channel *chan)
+{
+ return (vmbus_txbr_available(&chan->ch_txbr));
+}
+
+bool
+vmbus_chan_write_signal(struct vmbus_channel *chan,
+ int32_t min_signal_size)
+{
+ if (min_signal_size >= 0 &&
+ vmbus_chan_write_available(chan) > min_signal_size) {
+ return false;
+ }
+
+ if (!vmbus_txbr_get_imask(&chan->ch_txbr)) {
+ /* txbr imask is not set, signal the reader */
+ vmbus_chan_signal_tx(chan);
+ return true;
+ }
+
+ return false;
+}
+
+void
+vmbus_chan_set_pending_send_size(struct vmbus_channel *chan,
+ uint32_t size)
+{
+ if (chan)
+ vmbus_txbr_set_pending_snd_sz(&chan->ch_txbr, size);
+}
+
int
+vmbus_chan_iov_send(struct vmbus_channel *chan,
+ const struct iovec iov[], int iovlen,
+ vmbus_br_copy_callback_t cb, void *cbarg)
+{
+ int error;
+ boolean_t send_evt;
+
+ if (iovlen == 0)
+ return (0);
+
+ error = vmbus_txbr_write_call(&chan->ch_txbr, iov, iovlen,
+ cb, cbarg, &send_evt);
+
+ if (!error && send_evt) {
+ vmbus_chan_signal_tx(chan);
+ }
+
+ return error;
+}
+
+int
vmbus_chan_send(struct vmbus_channel *chan, uint16_t type, uint16_t flags,
void *data, int dlen, uint64_t xactid)
{
@@ -1211,6 +1281,78 @@
return (0);
}
+uint32_t
+vmbus_chan_read_available(struct vmbus_channel *chan)
+{
+ return (vmbus_rxbr_available(&chan->ch_rxbr));
+}
+
+/*
+ * This routine does:
+ * - Advance the channel read index for 'advance' bytes
+ * - Copy data_len bytes in to the buffer pointed by 'data'
+ * Return 0 if operation succeed. EAGAIN if operations if failed.
+ * If failed, the buffer pointed by 'data' is intact, and the
+ * channel read index is not advanced at all.
+ */
+int
+vmbus_chan_recv_peek(struct vmbus_channel *chan,
+ void *data, int data_len, uint32_t advance)
+{
+ int error;
+ boolean_t sig_event;
+
+ if (data == NULL || data_len <= 0)
+ return (EINVAL);
+
+ error = vmbus_rxbr_idxadv_peek(&chan->ch_rxbr,
+ data, data_len, advance, &sig_event);
+
+ if (!error && sig_event) {
+ vmbus_chan_signal_rx(chan);
+ }
+
+ return (error);
+}
+
+/*
+ * This routine does:
+ * - Advance the channel read index for 'advance' bytes
+ */
+int
+vmbus_chan_recv_idxadv(struct vmbus_channel *chan, uint32_t advance)
+{
+ int error;
+ boolean_t sig_event;
+
+ if (advance == 0)
+ return (EINVAL);
+
+ error = vmbus_rxbr_idxadv(&chan->ch_rxbr, advance, &sig_event);
+
+ if (!error && sig_event) {
+ vmbus_chan_signal_rx(chan);
+ }
+
+ return (error);
+}
+
+
+/*
+ * Caller should hold its own lock to serialize the ring buffer
+ * copy.
+ */
+int
+vmbus_chan_recv_peek_call(struct vmbus_channel *chan, int data_len,
+ uint32_t skip, vmbus_br_copy_callback_t cb, void *cbarg)
+{
+ if (!chan || data_len <= 0 || cb == NULL)
+ return (EINVAL);
+
+ return (vmbus_rxbr_peek_call(&chan->ch_rxbr, data_len, skip,
+ cb, cbarg));
+}
+
static void
vmbus_chan_task(void *xchan, int pending __unused)
{
@@ -1732,6 +1874,25 @@
1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
}
+ if (offer->chm_chflags & VMBUS_CHAN_TLNPI_PROVIDER_OFFER) {
+ /* This is HyperV socket channel */
+ chan->ch_is_hvs = true;
+ /* The first byte != 0 means the host initiated connection. */
+ chan->ch_hvs_conn_from_host =
+ offer->chm_udata.pipe.user_def[0];
+
+ if (bootverbose) {
+ device_printf(sc->vmbus_dev,
+ "chan%u is hyperv socket channel "
+ "connected %s host\n",
+ chan->ch_id,
+ (chan->ch_hvs_conn_from_host != 0) ?
+ "from" : "to");
+ }
+ } else {
+ chan->ch_is_hvs = false;
+ }
+
/*
* Setup event flag.
*/
@@ -2047,8 +2208,31 @@
return false;
}
-const struct hyperv_guid *
-vmbus_chan_guid_inst(const struct vmbus_channel *chan)
+bool
+vmbus_chan_is_hvs(const struct vmbus_channel *chan)
+{
+ return chan->ch_is_hvs;
+}
+
+bool
+vmbus_chan_is_hvs_conn_from_host(const struct vmbus_channel *chan)
+{
+ KASSERT(vmbus_chan_is_hvs(chan) == true,
+ ("Not a HyperV Socket channel %u", chan->ch_id));
+ if (chan->ch_hvs_conn_from_host != 0)
+ return true;
+ else
+ return false;
+}
+
+struct hyperv_guid *
+vmbus_chan_guid_type(struct vmbus_channel *chan)
+{
+ return &chan->ch_guid_type;
+}
+
+struct hyperv_guid *
+vmbus_chan_guid_inst(struct vmbus_channel *chan)
{
return &chan->ch_guid_inst;
}
Index: head/sys/dev/hyperv/vmbus/vmbus_chanvar.h
===================================================================
--- head/sys/dev/hyperv/vmbus/vmbus_chanvar.h
+++ head/sys/dev/hyperv/vmbus/vmbus_chanvar.h
@@ -149,6 +149,12 @@
int ch_refs;
+ /*
+ * These are for HyperV socket channel only
+ */
+ bool ch_is_hvs;
+ uint8_t ch_hvs_conn_from_host;
+
struct sysctl_ctx_list ch_sysctl_ctx;
} __aligned(CACHE_LINE_SIZE);
Index: head/sys/dev/hyperv/vmbus/vmbus_reg.h
===================================================================
--- head/sys/dev/hyperv/vmbus/vmbus_reg.h
+++ head/sys/dev/hyperv/vmbus/vmbus_reg.h
@@ -127,7 +127,54 @@
*/
volatile uint32_t br_imask;
- uint8_t br_rsvd[4084];
+ /*
+ * WS2012/Win8 and later versions of Hyper-V implement interrupt
+ * driven flow management. The feature bit feat_pending_snd_sz
+ * is set by the host on the host->guest buffer ring, and by the
+ * guest on the guest->host buffer ring.
+ *
+ * The meaning of the feature bit is a bit complex in that it has
+ * semantics that apply to both buffer rings. If the guest sets
+ * the feature bit in the guest->host buffer ring, the guest is
+ * telling the host that:
+ * 1) It will set the br_pending_snd_sz field in the guest->host buffer
+ * ring when it is waiting for space to become available, and
+ * 2) It will read the pending_send_sz field in the host->guest
+ * ring buffer and interrupt the host when it frees enough space
+ *
+ * Similarly, if the host sets the feature bit in the host->guest
+ * ring buffer, the host is telling the guest that:
+ * 1) It will set the pending_send_sz field in the host->guest ring
+ * buffer when it is waiting for space to become available, and
+ * 2) It will read the pending_send_sz field in the guest->host
+ * ring buffer and interrupt the guest when it frees enough space
+ *
+ * If either the guest or host does not set the feature bit that it
+ * owns, that guest or host must do polling if it encounters a full
+ * ring buffer, and not signal the other end with an interrupt.
+ */
+ volatile uint32_t br_pending_snd_sz;
+ uint32_t br_rsvd1[12];
+ union {
+ struct {
+ uint32_t feat_pending_snd_sz:1;
+ };
+ uint32_t value;
+ } br_feature_bits;
+
+ /* Padding to PAGE_SIZE */
+ uint8_t br_rsvd2[4020];
+
+ /*
+ * Total guest to host interrupt count
+ * - For rx ring, this counts the guest signaling host when this rx
+ * ring changing from full to not full.
+ *
+ * - For tx ring, this counts the guest signaling host when this tx
+ * ring changing from empty to non empty.
+ */
+ uint64_t br_g2h_intr_cnt;
+
uint8_t br_data[];
} __packed;
CTASSERT(sizeof(struct vmbus_bufring) == PAGE_SIZE);
@@ -196,7 +243,14 @@
#define VMBUS_CHANMSG_TYPE_CONNECT 14 /* REQ */
#define VMBUS_CHANMSG_TYPE_CONNECT_RESP 15 /* RESP */
#define VMBUS_CHANMSG_TYPE_DISCONNECT 16 /* REQ */
-#define VMBUS_CHANMSG_TYPE_MAX 22
+#define VMBUS_CHANMSG_TYPE_17 17
+#define VMBUS_CHANMSG_TYPE_18 18
+#define VMBUS_CHANMSG_TYPE_19 19
+#define VMBUS_CHANMSG_TYPE_20 20
+#define VMBUS_CHANMSG_TYPE_TL_CONN 21 /* REQ */
+#define VMBUS_CHANMSG_TYPE_22 22
+#define VMBUS_CHANMSG_TYPE_TL_RESULT 23 /* RESP */
+#define VMBUS_CHANMSG_TYPE_MAX 24
struct vmbus_chanmsg_hdr {
uint32_t chm_type; /* VMBUS_CHANMSG_TYPE_ */
@@ -229,6 +283,15 @@
struct vmbus_chanmsg_hdr chm_hdr;
} __packed;
+/* VMBUS_CHANMSG_TYPE_TL_CONN */
+/* Hyper-V socket guest connect request */
+struct vmbus_chanmsg_tl_connect {
+ struct vmbus_chanmsg_hdr chm_hdr;
+ struct hyperv_guid guest_endpoint_id;
+ struct hyperv_guid host_service_id;
+} __packed;
+
+
/* VMBUS_CHANMSG_TYPE_CHOPEN */
struct vmbus_chanmsg_chopen {
struct vmbus_chanmsg_hdr chm_hdr;
@@ -310,6 +373,12 @@
uint32_t chm_chanid;
} __packed;
+/* Size of the user defined data buffer for non-pipe offers */
+#define VMBUS_CHANMSG_CHOFFER_UDATA_SIZE 120
+
+/* Size of the user defined data buffer for pipe offers. */
+#define VMBUS_CHANMSG_CHOFFER_UDATA_PIPE_SIZE 116
+
/* VMBUS_CHANMSG_TYPE_CHOFFER */
struct vmbus_chanmsg_choffer {
struct vmbus_chanmsg_hdr chm_hdr;
@@ -320,7 +389,26 @@
uint32_t chm_svrctx_sz;
uint16_t chm_chflags;
uint16_t chm_mmio_sz; /* unit: MB */
- uint8_t chm_udata[120];
+
+ union {
+ /* Non-pipes */
+ struct {
+ uint8_t user_def[VMBUS_CHANMSG_CHOFFER_UDATA_SIZE];
+ } std;
+ /*
+ * Pipes:
+ * For integrated pipe protocol, which is implemented on
+ * top of standard user-defined data. Pipe clients have
+ * VMBUS_CHANMSG_CHOFFER_UDATA_PIPE_SIZE bytes left for
+ * their own user.
+ */
+ struct {
+ uint32_t pipe_mode;
+ uint8_t
+ user_def[VMBUS_CHANMSG_CHOFFER_UDATA_PIPE_SIZE];
+ } pipe;
+ } chm_udata;
+
uint16_t chm_subidx;
uint16_t chm_rsvd;
uint32_t chm_chanid;
@@ -330,6 +418,9 @@
uint32_t chm_connid;
} __packed;
CTASSERT(sizeof(struct vmbus_chanmsg_choffer) <= VMBUS_MSG_DSIZE_MAX);
+
+/* Server Flag */
+#define VMBUS_CHAN_TLNPI_PROVIDER_OFFER 0x2000
#define VMBUS_CHOFFER_FLAG1_HASMNF 0x01
Index: head/sys/modules/hyperv/Makefile
===================================================================
--- head/sys/modules/hyperv/Makefile
+++ head/sys/modules/hyperv/Makefile
@@ -1,5 +1,5 @@
# $FreeBSD$
-SUBDIR = vmbus netvsc storvsc utilities
+SUBDIR = vmbus netvsc storvsc utilities hvsock
.include <bsd.subdir.mk>
Index: head/sys/modules/hyperv/hvsock/Makefile
===================================================================
--- head/sys/modules/hyperv/hvsock/Makefile
+++ head/sys/modules/hyperv/hvsock/Makefile
@@ -0,0 +1,13 @@
+# $FreeBSD$
+
+.PATH: ${SRCTOP}/sys/dev/hyperv/hvsock
+
+KMOD= hv_sock
+SRCS= hv_sock.c
+SRCS+= hv_sock.h
+
+CFLAGS+= -I${SRCTOP}/sys/dev/hyperv/include \
+ -I${SRCTOP}/sys/dev/hyperv/vmbus \
+ -I${SRCTOP}/sys/dev/hyperv/hvsock
+
+.include <bsd.kmod.mk>
Index: head/sys/sys/socket.h
===================================================================
--- head/sys/sys/socket.h
+++ head/sys/sys/socket.h
@@ -265,7 +265,8 @@
#define AF_IEEE80211 37 /* IEEE 802.11 protocol */
#define AF_INET_SDP 40 /* OFED Socket Direct Protocol ipv4 */
#define AF_INET6_SDP 42 /* OFED Socket Direct Protocol ipv6 */
-#define AF_MAX 42
+#define AF_HYPERV 43 /* HyperV sockets */
+#define AF_MAX 43
/*
* When allocating a new AF_ constant, please only allocate
* even numbered constants for FreeBSD until 134 as odd numbered AF_
@@ -273,7 +274,6 @@
*/
#define AF_VENDOR00 39
#define AF_VENDOR01 41
-#define AF_VENDOR02 43
#define AF_VENDOR03 45
#define AF_VENDOR04 47
#define AF_VENDOR05 49

File Metadata

Mime Type
text/plain
Expires
Sun, Feb 8, 3:39 AM (11 h, 20 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28463550
Default Alt Text
D24061.diff (77 KB)

Event Timeline