Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F144251440
D24061.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
77 KB
Referenced Files
None
Subscribers
None
D24061.diff
View Options
Index: head/sys/conf/files.x86
===================================================================
--- head/sys/conf/files.x86
+++ head/sys/conf/files.x86
@@ -133,6 +133,7 @@
dev/hwpmc/hwpmc_uncore.c optional hwpmc
dev/hwpmc/hwpmc_tsc.c optional hwpmc
dev/hwpmc/hwpmc_x86.c optional hwpmc
+dev/hyperv/hvsock/hv_sock.c optional hyperv
dev/hyperv/input/hv_kbd.c optional hyperv
dev/hyperv/input/hv_kbdc.c optional hyperv
dev/hyperv/pcib/vmbus_pcib.c optional hyperv pci
Index: head/sys/dev/hyperv/hvsock/hv_sock.h
===================================================================
--- head/sys/dev/hyperv/hvsock/hv_sock.h
+++ head/sys/dev/hyperv/hvsock/hv_sock.h
@@ -0,0 +1,122 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _HVSOCK_H
+#define _HVSOCK_H
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/queue.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/include/vmbus.h>
+
+/*
+ * HyperV Socket Protocols
+ */
+#define HYPERV_SOCK_PROTO_TRANS 1 /* Transport protocol */
+
+#define HVADDR_PORT_ANY -1U
+#define HVADDR_PORT_UNKNOWN -1U
+
+#define HVS_LIST_BOUND 0x01
+#define HVS_LIST_CONNECTED 0x02
+#define HVS_LIST_ALL (HVS_LIST_BOUND | HVS_LIST_CONNECTED)
+
+struct sockaddr_hvs {
+ unsigned char sa_len;
+ sa_family_t sa_family;
+ unsigned int hvs_port;
+ unsigned char hvs_zero[sizeof(struct sockaddr) -
+ sizeof(sa_family_t) -
+ sizeof(unsigned char) -
+ sizeof(unsigned int)];
+};
+
+struct vmpipe_proto_header {
+ uint32_t vmpipe_pkt_type;
+ uint32_t vmpipe_data_size;
+} __packed;
+
+struct hvs_pkt_header {
+ struct vmbus_chanpkt_hdr chan_pkt_hdr;
+ struct vmpipe_proto_header vmpipe_pkt_hdr;
+} __packed;
+
+struct hvs_pcb {
+ struct socket *so; /* Pointer to socket */
+ struct sockaddr_hvs local_addr;
+ struct sockaddr_hvs remote_addr;
+
+ struct hyperv_guid vm_srv_id;
+ struct hyperv_guid host_srv_id;
+
+ struct vmbus_channel *chan;
+ /* Current packet header on rx ring */
+ struct hvs_pkt_header hvs_pkt;
+ /* Available data in receive br in current packet */
+ uint32_t recv_data_len;
+ /* offset in the packet */
+ uint32_t recv_data_off;
+ bool rb_init;
+ /* Link lists for global bound and connected sockets */
+ LIST_ENTRY(hvs_pcb) bound_next;
+ LIST_ENTRY(hvs_pcb) connected_next;
+};
+
+#define so2hvspcb(so) \
+ ((struct hvs_pcb *)((so)->so_pcb))
+#define hsvpcb2so(hvspcb) \
+ ((struct socket *)((hvspcb)->so))
+
+void hvs_addr_init(struct sockaddr_hvs *, const struct hyperv_guid *);
+void hvs_trans_init(void);
+void hvs_trans_close(struct socket *);
+void hvs_trans_detach(struct socket *);
+void hvs_trans_abort(struct socket *);
+int hvs_trans_attach(struct socket *, int, struct thread *);
+int hvs_trans_bind(struct socket *, struct sockaddr *, struct thread *);
+int hvs_trans_listen(struct socket *, int, struct thread *);
+int hvs_trans_accept(struct socket *, struct sockaddr **);
+int hvs_trans_connect(struct socket *,
+ struct sockaddr *, struct thread *);
+int hvs_trans_peeraddr(struct socket *, struct sockaddr **);
+int hvs_trans_sockaddr(struct socket *, struct sockaddr **);
+int hvs_trans_soreceive(struct socket *, struct sockaddr **,
+ struct uio *, struct mbuf **, struct mbuf **, int *);
+int hvs_trans_sosend(struct socket *, struct sockaddr *, struct uio *,
+ struct mbuf *, struct mbuf *, int, struct thread *);
+int hvs_trans_disconnect(struct socket *);
+int hvs_trans_shutdown(struct socket *);
+
+int hvs_trans_lock(void);
+void hvs_trans_unlock(void);
+
+void hvs_remove_socket_from_list(struct socket *, unsigned char);
+#endif /* _HVSOCK_H */
Index: head/sys/dev/hyperv/hvsock/hv_sock.c
===================================================================
--- head/sys/dev/hyperv/hvsock/hv_sock.c
+++ head/sys/dev/hyperv/hvsock/hv_sock.c
@@ -0,0 +1,1748 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/domain.h>
+#include <sys/lock.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/sockbuf.h>
+#include <sys/sx.h>
+#include <sys/uio.h>
+
+#include <net/vnet.h>
+
+#include <dev/hyperv/vmbus/vmbus_reg.h>
+
+#include "hv_sock.h"
+
+#define HVSOCK_DBG_NONE 0x0
+#define HVSOCK_DBG_INFO 0x1
+#define HVSOCK_DBG_ERR 0x2
+#define HVSOCK_DBG_VERBOSE 0x3
+
+
+SYSCTL_NODE(_net, OID_AUTO, hvsock, CTLFLAG_RD, 0, "HyperV socket");
+
+static int hvs_dbg_level;
+SYSCTL_INT(_net_hvsock, OID_AUTO, hvs_dbg_level, CTLFLAG_RWTUN, &hvs_dbg_level,
+ 0, "hyperv socket debug level: 0 = none, 1 = info, 2 = error, 3 = verbose");
+
+
+#define HVSOCK_DBG(level, ...) do { \
+ if (hvs_dbg_level >= (level)) \
+ printf(__VA_ARGS__); \
+ } while (0)
+
+MALLOC_DEFINE(M_HVSOCK, "hyperv_socket", "hyperv socket control structures");
+
+/* The MTU is 16KB per host side's design */
+#define HVSOCK_MTU_SIZE (1024 * 16)
+#define HVSOCK_SEND_BUF_SZ (PAGE_SIZE - sizeof(struct vmpipe_proto_header))
+
+#define HVSOCK_HEADER_LEN (sizeof(struct hvs_pkt_header))
+
+#define HVSOCK_PKT_LEN(payload_len) (HVSOCK_HEADER_LEN + \
+ roundup2(payload_len, 8) + \
+ sizeof(uint64_t))
+
+
+static struct domain hv_socket_domain;
+
+/*
+ * HyperV Transport sockets
+ */
+static struct pr_usrreqs hvs_trans_usrreqs = {
+ .pru_attach = hvs_trans_attach,
+ .pru_bind = hvs_trans_bind,
+ .pru_listen = hvs_trans_listen,
+ .pru_accept = hvs_trans_accept,
+ .pru_connect = hvs_trans_connect,
+ .pru_peeraddr = hvs_trans_peeraddr,
+ .pru_sockaddr = hvs_trans_sockaddr,
+ .pru_soreceive = hvs_trans_soreceive,
+ .pru_sosend = hvs_trans_sosend,
+ .pru_disconnect = hvs_trans_disconnect,
+ .pru_close = hvs_trans_close,
+ .pru_detach = hvs_trans_detach,
+ .pru_shutdown = hvs_trans_shutdown,
+ .pru_abort = hvs_trans_abort,
+};
+
+/*
+ * Definitions of protocols supported in HyperV socket domain
+ */
+static struct protosw hv_socket_protosw[] = {
+{
+ .pr_type = SOCK_STREAM,
+ .pr_domain = &hv_socket_domain,
+ .pr_protocol = HYPERV_SOCK_PROTO_TRANS,
+ .pr_flags = PR_CONNREQUIRED,
+ .pr_init = hvs_trans_init,
+ .pr_usrreqs = &hvs_trans_usrreqs,
+},
+};
+
+static struct domain hv_socket_domain = {
+ .dom_family = AF_HYPERV,
+ .dom_name = "hyperv",
+ .dom_protosw = hv_socket_protosw,
+ .dom_protoswNPROTOSW = &hv_socket_protosw[nitems(hv_socket_protosw)]
+};
+
+VNET_DOMAIN_SET(hv_socket_);
+
+#define MAX_PORT ((uint32_t)0xFFFFFFFF)
+#define MIN_PORT ((uint32_t)0x0)
+
+/* 00000000-facb-11e6-bd58-64006a7986d3 */
+static const struct hyperv_guid srv_id_template = {
+ .hv_guid = {
+ 0x00, 0x00, 0x00, 0x00, 0xcb, 0xfa, 0xe6, 0x11,
+ 0xbd, 0x58, 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3 }
+};
+
+static int hvsock_br_callback(void *, int, void *);
+static uint32_t hvsock_canread_check(struct hvs_pcb *);
+static uint32_t hvsock_canwrite_check(struct hvs_pcb *);
+static int hvsock_send_data(struct vmbus_channel *chan,
+ struct uio *uio, uint32_t to_write, struct sockbuf *sb);
+
+
+
+/* Globals */
+static struct sx hvs_trans_socks_sx;
+static struct mtx hvs_trans_socks_mtx;
+static LIST_HEAD(, hvs_pcb) hvs_trans_bound_socks;
+static LIST_HEAD(, hvs_pcb) hvs_trans_connected_socks;
+static uint32_t previous_auto_bound_port;
+
+static void
+hvsock_print_guid(struct hyperv_guid *guid)
+{
+ unsigned char *p = (unsigned char *)guid;
+
+ HVSOCK_DBG(HVSOCK_DBG_INFO,
+ "0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x-0x%x\n",
+ *(unsigned int *)p,
+ *((unsigned short *) &p[4]),
+ *((unsigned short *) &p[6]),
+ p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
+}
+
+static bool
+is_valid_srv_id(const struct hyperv_guid *id)
+{
+ return !memcmp(&id->hv_guid[4],
+ &srv_id_template.hv_guid[4], sizeof(struct hyperv_guid) - 4);
+}
+
+static unsigned int
+get_port_by_srv_id(const struct hyperv_guid *srv_id)
+{
+ return *((const unsigned int *)srv_id);
+}
+
+static void
+set_port_by_srv_id(struct hyperv_guid *srv_id, unsigned int port)
+{
+ *((unsigned int *)srv_id) = port;
+}
+
+
+static void
+__hvs_remove_pcb_from_list(struct hvs_pcb *pcb, unsigned char list)
+{
+ struct hvs_pcb *p = NULL;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
+
+ if (!pcb)
+ return;
+
+ if (list & HVS_LIST_BOUND) {
+ LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
+ if (p == pcb)
+ LIST_REMOVE(p, bound_next);
+ }
+
+ if (list & HVS_LIST_CONNECTED) {
+ LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
+ if (p == pcb)
+ LIST_REMOVE(pcb, connected_next);
+ }
+}
+
+static void
+__hvs_remove_socket_from_list(struct socket *so, unsigned char list)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: pcb is %p\n", __func__, pcb);
+
+ __hvs_remove_pcb_from_list(pcb, list);
+}
+
+static void
+__hvs_insert_socket_on_list(struct socket *so, unsigned char list)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ if (list & HVS_LIST_BOUND)
+ LIST_INSERT_HEAD(&hvs_trans_bound_socks,
+ pcb, bound_next);
+
+ if (list & HVS_LIST_CONNECTED)
+ LIST_INSERT_HEAD(&hvs_trans_connected_socks,
+ pcb, connected_next);
+}
+
+void
+hvs_remove_socket_from_list(struct socket *so, unsigned char list)
+{
+ if (!so || !so->so_pcb) {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: socket or so_pcb is null\n", __func__);
+ return;
+ }
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ __hvs_remove_socket_from_list(so, list);
+ mtx_unlock(&hvs_trans_socks_mtx);
+}
+
+static void
+hvs_insert_socket_on_list(struct socket *so, unsigned char list)
+{
+ if (!so || !so->so_pcb) {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: socket or so_pcb is null\n", __func__);
+ return;
+ }
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ __hvs_insert_socket_on_list(so, list);
+ mtx_unlock(&hvs_trans_socks_mtx);
+}
+
+static struct socket *
+__hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
+{
+ struct hvs_pcb *p = NULL;
+
+ if (list & HVS_LIST_BOUND)
+ LIST_FOREACH(p, &hvs_trans_bound_socks, bound_next)
+ if (p->so != NULL &&
+ addr->hvs_port == p->local_addr.hvs_port)
+ return p->so;
+
+ if (list & HVS_LIST_CONNECTED)
+ LIST_FOREACH(p, &hvs_trans_connected_socks, connected_next)
+ if (p->so != NULL &&
+ addr->hvs_port == p->local_addr.hvs_port)
+ return p->so;
+
+ return NULL;
+}
+
+static struct socket *
+hvs_find_socket_on_list(struct sockaddr_hvs *addr, unsigned char list)
+{
+ struct socket *s = NULL;
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ s = __hvs_find_socket_on_list(addr, list);
+ mtx_unlock(&hvs_trans_socks_mtx);
+
+ return s;
+}
+
+static inline void
+hvs_addr_set(struct sockaddr_hvs *addr, unsigned int port)
+{
+ memset(addr, 0, sizeof(*addr));
+ addr->sa_family = AF_HYPERV;
+ addr->hvs_port = port;
+}
+
+void
+hvs_addr_init(struct sockaddr_hvs *addr, const struct hyperv_guid *svr_id)
+{
+ hvs_addr_set(addr, get_port_by_srv_id(svr_id));
+}
+
+int
+hvs_trans_lock(void)
+{
+ sx_xlock(&hvs_trans_socks_sx);
+ return (0);
+}
+
+void
+hvs_trans_unlock(void)
+{
+ sx_xunlock(&hvs_trans_socks_sx);
+}
+
+void
+hvs_trans_init(void)
+{
+ /* Skip initialization of globals for non-default instances. */
+ if (!IS_DEFAULT_VNET(curvnet))
+ return;
+
+ if (vm_guest != VM_GUEST_HV)
+ return;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_init called\n", __func__);
+
+ /* Initialize Globals */
+ previous_auto_bound_port = MAX_PORT;
+ sx_init(&hvs_trans_socks_sx, "hvs_trans_sock_sx");
+ mtx_init(&hvs_trans_socks_mtx,
+ "hvs_trans_socks_mtx", NULL, MTX_DEF);
+ LIST_INIT(&hvs_trans_bound_socks);
+ LIST_INIT(&hvs_trans_connected_socks);
+}
+
+/*
+ * Called in two cases:
+ * 1) When user calls socket();
+ * 2) When we accept new incoming conneciton and call sonewconn().
+ */
+int
+hvs_trans_attach(struct socket *so, int proto, struct thread *td)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_attach called\n", __func__);
+
+ if (so->so_type != SOCK_STREAM)
+ return (ESOCKTNOSUPPORT);
+
+ if (proto != 0 && proto != HYPERV_SOCK_PROTO_TRANS)
+ return (EPROTONOSUPPORT);
+
+ if (pcb != NULL)
+ return (EISCONN);
+ pcb = malloc(sizeof(struct hvs_pcb), M_HVSOCK, M_NOWAIT | M_ZERO);
+ if (pcb == NULL)
+ return (ENOMEM);
+
+ pcb->so = so;
+ so->so_pcb = (void *)pcb;
+
+ return (0);
+}
+
+void
+hvs_trans_detach(struct socket *so)
+{
+ struct hvs_pcb *pcb;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_detach called\n", __func__);
+
+ (void) hvs_trans_lock();
+ pcb = so2hvspcb(so);
+ if (pcb == NULL) {
+ hvs_trans_unlock();
+ return;
+ }
+
+ if (SOLISTENING(so)) {
+ bzero(pcb, sizeof(*pcb));
+ free(pcb, M_HVSOCK);
+ }
+
+ so->so_pcb = NULL;
+
+ hvs_trans_unlock();
+}
+
+int
+hvs_trans_bind(struct socket *so, struct sockaddr *addr, struct thread *td)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct sockaddr_hvs *sa = (struct sockaddr_hvs *) addr;
+ int error = 0;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_bind called\n", __func__);
+
+ if (sa == NULL) {
+ return (EINVAL);
+ }
+
+ if (pcb == NULL) {
+ return (EINVAL);
+ }
+
+ if (sa->sa_family != AF_HYPERV) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: Not supported, sa_family is %u\n",
+ __func__, sa->sa_family);
+ return (EAFNOSUPPORT);
+ }
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: binding port = 0x%x\n", __func__, sa->hvs_port);
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ if (__hvs_find_socket_on_list(sa,
+ HVS_LIST_BOUND | HVS_LIST_CONNECTED)) {
+ error = EADDRINUSE;
+ } else {
+ /*
+ * The address is available for us to bind.
+ * Add socket to the bound list.
+ */
+ hvs_addr_set(&pcb->local_addr, sa->hvs_port);
+ hvs_addr_set(&pcb->remote_addr, HVADDR_PORT_ANY);
+ __hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
+ }
+ mtx_unlock(&hvs_trans_socks_mtx);
+
+ return (error);
+}
+
+int
+hvs_trans_listen(struct socket *so, int backlog, struct thread *td)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct socket *bound_so;
+ int error;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_listen called\n", __func__);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ /* Check if the address is already bound and it was by us. */
+ bound_so = hvs_find_socket_on_list(&pcb->local_addr, HVS_LIST_BOUND);
+ if (bound_so == NULL || bound_so != so) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: Address not bound or not by us.\n", __func__);
+ return (EADDRNOTAVAIL);
+ }
+
+ SOCK_LOCK(so);
+ error = solisten_proto_check(so);
+ if (error == 0)
+ solisten_proto(so, backlog);
+ SOCK_UNLOCK(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket listen error = %d\n", __func__, error);
+ return (error);
+}
+
+int
+hvs_trans_accept(struct socket *so, struct sockaddr **nam)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_accept called\n", __func__);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ *nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr,
+ M_NOWAIT);
+
+ return ((*nam == NULL) ? ENOMEM : 0);
+}
+
+int
+hvs_trans_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct sockaddr_hvs *raddr = (struct sockaddr_hvs *)nam;
+ bool found_auto_bound_port = false;
+ int i, error = 0;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_connect called, remote port is %x\n",
+ __func__, raddr->hvs_port);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ /* Verify the remote address */
+ if (raddr == NULL)
+ return (EINVAL);
+ if (raddr->sa_family != AF_HYPERV)
+ return (EAFNOSUPPORT);
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ if (so->so_state &
+ (SS_ISCONNECTED|SS_ISDISCONNECTING|SS_ISCONNECTING)) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: socket connect in progress\n",
+ __func__);
+ error = EINPROGRESS;
+ goto out;
+ }
+
+ /*
+ * Find an available port for us to auto bind the local
+ * address.
+ */
+ hvs_addr_set(&pcb->local_addr, 0);
+
+ for (i = previous_auto_bound_port - 1;
+ i != previous_auto_bound_port; i --) {
+ if (i == MIN_PORT)
+ i = MAX_PORT;
+
+ pcb->local_addr.hvs_port = i;
+
+ if (__hvs_find_socket_on_list(&pcb->local_addr,
+ HVS_LIST_BOUND | HVS_LIST_CONNECTED) == NULL) {
+ found_auto_bound_port = true;
+ previous_auto_bound_port = i;
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: found local bound port is %x\n",
+ __func__, pcb->local_addr.hvs_port);
+ break;
+ }
+ }
+
+ if (found_auto_bound_port == true) {
+ /* Found available port for auto bound, put on list */
+ __hvs_insert_socket_on_list(so, HVS_LIST_BOUND);
+ /* Set VM service ID */
+ pcb->vm_srv_id = srv_id_template;
+ set_port_by_srv_id(&pcb->vm_srv_id, pcb->local_addr.hvs_port);
+ /* Set host service ID and remote port */
+ pcb->host_srv_id = srv_id_template;
+ set_port_by_srv_id(&pcb->host_srv_id, raddr->hvs_port);
+ hvs_addr_set(&pcb->remote_addr, raddr->hvs_port);
+
+ /* Change the socket state to SS_ISCONNECTING */
+ soisconnecting(so);
+ } else {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: No local port available for auto bound\n",
+ __func__);
+ error = EADDRINUSE;
+ }
+
+ HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect vm_srv_id is ");
+ hvsock_print_guid(&pcb->vm_srv_id);
+ HVSOCK_DBG(HVSOCK_DBG_INFO, "Connect host_srv_id is ");
+ hvsock_print_guid(&pcb->host_srv_id);
+
+out:
+ mtx_unlock(&hvs_trans_socks_mtx);
+
+ if (found_auto_bound_port == true)
+ vmbus_req_tl_connect(&pcb->vm_srv_id, &pcb->host_srv_id);
+
+ return (error);
+}
+
+int
+hvs_trans_disconnect(struct socket *so)
+{
+ struct hvs_pcb *pcb;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_disconnect called\n", __func__);
+
+ (void) hvs_trans_lock();
+ pcb = so2hvspcb(so);
+ if (pcb == NULL) {
+ hvs_trans_unlock();
+ return (EINVAL);
+ }
+
+ /* If socket is already disconnected, skip this */
+ if ((so->so_state & SS_ISDISCONNECTED) == 0)
+ soisdisconnecting(so);
+
+ hvs_trans_unlock();
+
+ return (0);
+}
+
+#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
+struct hvs_callback_arg {
+ struct uio *uio;
+ struct sockbuf *sb;
+};
+
+int
+hvs_trans_soreceive(struct socket *so, struct sockaddr **paddr,
+ struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct sockbuf *sb;
+ ssize_t orig_resid;
+ uint32_t canread, to_read;
+ int flags, error = 0;
+ struct hvs_callback_arg cbarg;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_soreceive called\n", __func__);
+
+ if (so->so_type != SOCK_STREAM)
+ return (EINVAL);
+ if (pcb == NULL)
+ return (EINVAL);
+
+ if (flagsp != NULL)
+ flags = *flagsp &~ MSG_EOR;
+ else
+ flags = 0;
+
+ if (flags & MSG_PEEK)
+ return (EOPNOTSUPP);
+
+ /* If no space to copy out anything */
+ if (uio->uio_resid == 0 || uio->uio_rw != UIO_READ)
+ return (EINVAL);
+
+ sb = &so->so_rcv;
+
+ orig_resid = uio->uio_resid;
+
+ /* Prevent other readers from entering the socket. */
+ error = sblock(sb, SBLOCKWAIT(flags));
+ if (error) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: sblock returned error = %d\n", __func__, error);
+ return (error);
+ }
+
+ SOCKBUF_LOCK(sb);
+
+ cbarg.uio = uio;
+ cbarg.sb = sb;
+ /*
+ * If the socket is closing, there might still be some data
+ * in rx br to read. However we need to make sure
+ * the channel is still open.
+ */
+ if ((sb->sb_state & SBS_CANTRCVMORE) &&
+ (so->so_state & SS_ISDISCONNECTED)) {
+ /* Other thread already closed the channel */
+ error = EPIPE;
+ goto out;
+ }
+
+ while (true) {
+ while (uio->uio_resid > 0 &&
+ (canread = hvsock_canread_check(pcb)) > 0) {
+ to_read = MIN(canread, uio->uio_resid);
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: to_read = %u, skip = %u\n", __func__, to_read,
+ (unsigned int)(sizeof(struct hvs_pkt_header) +
+ pcb->recv_data_off));
+
+ error = vmbus_chan_recv_peek_call(pcb->chan, to_read,
+ sizeof(struct hvs_pkt_header) + pcb->recv_data_off,
+ hvsock_br_callback, (void *)&cbarg);
+ /*
+ * It is possible socket is disconnected becasue
+ * we released lock in hvsock_br_callback. So we
+ * need to check the state to make sure it is not
+ * disconnected.
+ */
+ if (error || so->so_state & SS_ISDISCONNECTED) {
+ break;
+ }
+
+ pcb->recv_data_len -= to_read;
+ pcb->recv_data_off += to_read;
+ }
+
+ if (error)
+ break;
+
+ /* Abort if socket has reported problems. */
+ if (so->so_error) {
+ if (so->so_error == ESHUTDOWN &&
+ orig_resid > uio->uio_resid) {
+ /*
+ * Although we got a FIN, we also received
+ * some data in this round. Delivery it
+ * to user.
+ */
+ error = 0;
+ } else {
+ if (so->so_error != ESHUTDOWN)
+ error = so->so_error;
+ }
+
+ break;
+ }
+
+ /* Cannot received more. */
+ if (sb->sb_state & SBS_CANTRCVMORE)
+ break;
+
+ /* We are done if buffer has been filled */
+ if (uio->uio_resid == 0)
+ break;
+
+ if (!(flags & MSG_WAITALL) && orig_resid > uio->uio_resid)
+ break;
+
+ /* Buffer ring is empty and we shall not block */
+ if ((so->so_state & SS_NBIO) ||
+ (flags & (MSG_DONTWAIT|MSG_NBIO))) {
+ if (orig_resid == uio->uio_resid) {
+ /* We have not read anything */
+ error = EAGAIN;
+ }
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: non blocked read return, error %d.\n",
+ __func__, error);
+ break;
+ }
+
+ /*
+ * Wait and block until (more) data comes in.
+ * Note: Drops the sockbuf lock during wait.
+ */
+ error = sbwait(sb);
+
+ if (error)
+ break;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: wake up from sbwait, read available is %u\n",
+ __func__, vmbus_chan_read_available(pcb->chan));
+ }
+
+out:
+ SOCKBUF_UNLOCK(sb);
+
+ sbunlock(sb);
+
+ /* We recieved a FIN in this call */
+ if (so->so_error == ESHUTDOWN) {
+ if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
+ /* Send has already closed */
+ soisdisconnecting(so);
+ } else {
+ /* Just close the receive side */
+ socantrcvmore(so);
+ }
+ }
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: returning error = %d, so_error = %d\n",
+ __func__, error, so->so_error);
+
+ return (error);
+}
+
+int
+hvs_trans_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
+ struct mbuf *top, struct mbuf *controlp, int flags, struct thread *td)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct sockbuf *sb;
+ ssize_t orig_resid;
+ uint32_t canwrite, to_write;
+ int error = 0;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_sosend called, uio_resid = %lu\n",
+ __func__, uio->uio_resid);
+
+ if (so->so_type != SOCK_STREAM)
+ return (EINVAL);
+ if (pcb == NULL)
+ return (EINVAL);
+
+ /* If nothing to send */
+ if (uio->uio_resid == 0 || uio->uio_rw != UIO_WRITE)
+ return (EINVAL);
+
+ sb = &so->so_snd;
+
+ orig_resid = uio->uio_resid;
+
+ /* Prevent other writers from entering the socket. */
+ error = sblock(sb, SBLOCKWAIT(flags));
+ if (error) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: sblock returned error = %d\n", __func__, error);
+ return (error);
+ }
+
+ SOCKBUF_LOCK(sb);
+
+ if ((sb->sb_state & SBS_CANTSENDMORE) ||
+ so->so_error == ESHUTDOWN) {
+ error = EPIPE;
+ goto out;
+ }
+
+ while (uio->uio_resid > 0) {
+ canwrite = hvsock_canwrite_check(pcb);
+ if (canwrite == 0) {
+ /* We have sent some data */
+ if (orig_resid > uio->uio_resid)
+ break;
+ /*
+ * We have not sent any data and it is
+ * non-blocked io
+ */
+ if (so->so_state & SS_NBIO ||
+ (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
+ error = EWOULDBLOCK;
+ break;
+ } else {
+ /*
+ * We are here because there is no space on
+ * send buffer ring. Signal the other side
+ * to read and free more space.
+ * Sleep wait until space avaiable to send
+ * Note: Drops the sockbuf lock during wait.
+ */
+ error = sbwait(sb);
+
+ if (error)
+ break;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: wake up from sbwait, space avail on "
+ "tx ring is %u\n",
+ __func__,
+ vmbus_chan_write_available(pcb->chan));
+
+ continue;
+ }
+ }
+ to_write = MIN(canwrite, uio->uio_resid);
+ to_write = MIN(to_write, HVSOCK_SEND_BUF_SZ);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: canwrite is %u, to_write = %u\n", __func__,
+ canwrite, to_write);
+ error = hvsock_send_data(pcb->chan, uio, to_write, sb);
+
+ if (error)
+ break;
+ }
+
+out:
+ SOCKBUF_UNLOCK(sb);
+ sbunlock(sb);
+
+ return (error);
+}
+
+int
+hvs_trans_peeraddr(struct socket *so, struct sockaddr **nam)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_peeraddr called\n", __func__);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ *nam = sodupsockaddr((struct sockaddr *) &pcb->remote_addr, M_NOWAIT);
+
+ return ((*nam == NULL)? ENOMEM : 0);
+}
+
+int
+hvs_trans_sockaddr(struct socket *so, struct sockaddr **nam)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_sockaddr called\n", __func__);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ *nam = sodupsockaddr((struct sockaddr *) &pcb->local_addr, M_NOWAIT);
+
+ return ((*nam == NULL)? ENOMEM : 0);
+}
+
+void
+hvs_trans_close(struct socket *so)
+{
+ struct hvs_pcb *pcb;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_close called\n", __func__);
+
+ (void) hvs_trans_lock();
+ pcb = so2hvspcb(so);
+ if (!pcb) {
+ hvs_trans_unlock();
+ return;
+ }
+
+ if (so->so_state & SS_ISCONNECTED) {
+ /* Send a FIN to peer */
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: hvs_trans_close sending a FIN to host\n", __func__);
+ (void) hvsock_send_data(pcb->chan, NULL, 0, NULL);
+ }
+
+ if (so->so_state &
+ (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
+ soisdisconnected(so);
+
+ pcb->chan = NULL;
+ pcb->so = NULL;
+
+ if (SOLISTENING(so)) {
+ mtx_lock(&hvs_trans_socks_mtx);
+ /* Remove from bound list */
+ __hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
+ mtx_unlock(&hvs_trans_socks_mtx);
+ }
+
+ hvs_trans_unlock();
+
+ return;
+}
+
+void
+hvs_trans_abort(struct socket *so)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_abort called\n", __func__);
+
+ (void) hvs_trans_lock();
+ if (pcb == NULL) {
+ hvs_trans_unlock();
+ return;
+ }
+
+ if (SOLISTENING(so)) {
+ mtx_lock(&hvs_trans_socks_mtx);
+ /* Remove from bound list */
+ __hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
+ mtx_unlock(&hvs_trans_socks_mtx);
+ }
+
+ if (so->so_state & SS_ISCONNECTED) {
+ (void) sodisconnect(so);
+ }
+ hvs_trans_unlock();
+
+ return;
+}
+
+int
+hvs_trans_shutdown(struct socket *so)
+{
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ struct sockbuf *sb;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: HyperV Socket hvs_trans_shutdown called\n", __func__);
+
+ if (pcb == NULL)
+ return (EINVAL);
+
+ /*
+ * Only get called with the shutdown method is SHUT_WR or
+ * SHUT_RDWR.
+ * When the method is SHUT_RD or SHUT_RDWR, the caller
+ * already set the SBS_CANTRCVMORE on receive side socket
+ * buffer.
+ */
+ if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) {
+ /*
+ * SHUT_WR only case.
+ * Receive side is still open. Just close
+ * the send side.
+ */
+ socantsendmore(so);
+ } else {
+ /* SHUT_RDWR case */
+ if (so->so_state & SS_ISCONNECTED) {
+ /* Send a FIN to peer */
+ sb = &so->so_snd;
+ SOCKBUF_LOCK(sb);
+ (void) hvsock_send_data(pcb->chan, NULL, 0, sb);
+ SOCKBUF_UNLOCK(sb);
+
+ soisdisconnecting(so);
+ }
+ }
+
+ return (0);
+}
+
+/* In the VM, we support Hyper-V Sockets with AF_HYPERV, and the endpoint is
+ * <port> (see struct sockaddr_hvs).
+ *
+ * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
+ * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
+ * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
+ * the below sockaddr:
+ *
+ * struct SOCKADDR_HV
+ * {
+ * ADDRESS_FAMILY Family;
+ * USHORT Reserved;
+ * GUID VmId;
+ * GUID ServiceId;
+ * };
+ * Note: VmID is not used by FreeBSD VM and actually it isn't transmitted via
+ * VMBus, because here it's obvious the host and the VM can easily identify
+ * each other. Though the VmID is useful on the host, especially in the case
+ * of Windows container, FreeBSD VM doesn't need it at all.
+ *
+ * To be compatible with similar infrastructure in Linux VMs, we have
+ * to limit the available GUID space of SOCKADDR_HV so that we can create
+ * a mapping between FreeBSD AF_HYPERV port and SOCKADDR_HV Service GUID.
+ * The rule of writing Hyper-V Sockets apps on the host and in FreeBSD VM is:
+ *
+ ****************************************************************************
+ * The only valid Service GUIDs, from the perspectives of both the host and *
+ * FreeBSD VM, that can be connected by the other end, must conform to this *
+ * format: <port>-facb-11e6-bd58-64006a7986d3. *
+ ****************************************************************************
+ *
+ * When we write apps on the host to connect(), the GUID ServiceID is used.
+ * When we write apps in FreeBSD VM to connect(), we only need to specify the
+ * port and the driver will form the GUID and use that to request the host.
+ *
+ * From the perspective of FreeBSD VM, the remote ephemeral port (i.e. the
+ * auto-generated remote port for a connect request initiated by the host's
+ * connect()) is set to HVADDR_PORT_UNKNOWN, which is not realy used on the
+ * FreeBSD guest.
+ */
+
+/*
+ * Older HyperV hosts (vmbus version 'VMBUS_VERSION_WIN10' or before)
+ * restricts HyperV socket ring buffer size to six 4K pages. Newer
+ * HyperV hosts doen't have this limit.
+ */
+#define HVS_RINGBUF_RCV_SIZE (PAGE_SIZE * 6)
+#define HVS_RINGBUF_SND_SIZE (PAGE_SIZE * 6)
+#define HVS_RINGBUF_MAX_SIZE (PAGE_SIZE * 64)
+
+struct hvsock_sc {
+ device_t dev;
+ struct hvs_pcb *pcb;
+ struct vmbus_channel *channel;
+};
+
+static bool
+hvsock_chan_readable(struct vmbus_channel *chan)
+{
+ uint32_t readable = vmbus_chan_read_available(chan);
+
+ return (readable >= HVSOCK_PKT_LEN(0));
+}
+
+static void
+hvsock_chan_cb(struct vmbus_channel *chan, void *context)
+{
+ struct hvs_pcb *pcb = (struct hvs_pcb *) context;
+ struct socket *so;
+ uint32_t canwrite;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: host send us a wakeup on rb data, pcb = %p\n",
+ __func__, pcb);
+
+ /*
+ * Check if the socket is still attached and valid.
+ * Here we know channel is still open. Need to make
+ * sure the socket has not been closed or freed.
+ */
+ (void) hvs_trans_lock();
+ so = hsvpcb2so(pcb);
+
+ if (pcb->chan != NULL && so != NULL) {
+ /*
+ * Wake up reader if there are data to read.
+ */
+ SOCKBUF_LOCK(&(so)->so_rcv);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: read available = %u\n", __func__,
+ vmbus_chan_read_available(pcb->chan));
+
+ if (hvsock_chan_readable(pcb->chan))
+ sorwakeup_locked(so);
+ else
+ SOCKBUF_UNLOCK(&(so)->so_rcv);
+
+ /*
+ * Wake up sender if space becomes available to write.
+ */
+ SOCKBUF_LOCK(&(so)->so_snd);
+ canwrite = hvsock_canwrite_check(pcb);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: canwrite = %u\n", __func__, canwrite);
+
+ if (canwrite > 0) {
+ sowwakeup_locked(so);
+ } else {
+ SOCKBUF_UNLOCK(&(so)->so_snd);
+ }
+ }
+
+ hvs_trans_unlock();
+
+ return;
+}
+
+static int
+hvsock_br_callback(void *datap, int cplen, void *cbarg)
+{
+ struct hvs_callback_arg *arg = (struct hvs_callback_arg *)cbarg;
+ struct uio *uio = arg->uio;
+ struct sockbuf *sb = arg->sb;
+ int error = 0;
+
+ if (cbarg == NULL || datap == NULL)
+ return (EINVAL);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: called, uio_rw = %s, uio_resid = %lu, cplen = %u, "
+ "datap = %p\n",
+ __func__, (uio->uio_rw == UIO_READ) ? "read from br":"write to br",
+ uio->uio_resid, cplen, datap);
+
+ if (sb)
+ SOCKBUF_UNLOCK(sb);
+
+ error = uiomove(datap, cplen, uio);
+
+ if (sb)
+ SOCKBUF_LOCK(sb);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: after uiomove, uio_resid = %lu, error = %d\n",
+ __func__, uio->uio_resid, error);
+
+ return (error);
+}
+
+static int
+hvsock_send_data(struct vmbus_channel *chan, struct uio *uio,
+ uint32_t to_write, struct sockbuf *sb)
+{
+ struct hvs_pkt_header hvs_pkt;
+ int hvs_pkthlen, hvs_pktlen, pad_pktlen, hlen, error = 0;
+ uint64_t pad = 0;
+ struct iovec iov[3];
+ struct hvs_callback_arg cbarg;
+
+ if (chan == NULL)
+ return (ENOTCONN);
+
+ hlen = sizeof(struct vmbus_chanpkt_hdr);
+ hvs_pkthlen = sizeof(struct hvs_pkt_header);
+ hvs_pktlen = hvs_pkthlen + to_write;
+ pad_pktlen = VMBUS_CHANPKT_TOTLEN(hvs_pktlen);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: hlen = %u, hvs_pkthlen = %u, hvs_pktlen = %u, "
+ "pad_pktlen = %u, data_len = %u\n",
+ __func__, hlen, hvs_pkthlen, hvs_pktlen, pad_pktlen, to_write);
+
+ hvs_pkt.chan_pkt_hdr.cph_type = VMBUS_CHANPKT_TYPE_INBAND;
+ hvs_pkt.chan_pkt_hdr.cph_flags = 0;
+ VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_hlen, hlen);
+ VMBUS_CHANPKT_SETLEN(hvs_pkt.chan_pkt_hdr.cph_tlen, pad_pktlen);
+ hvs_pkt.chan_pkt_hdr.cph_xactid = 0;
+
+ hvs_pkt.vmpipe_pkt_hdr.vmpipe_pkt_type = 1;
+ hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size = to_write;
+
+ cbarg.uio = uio;
+ cbarg.sb = sb;
+
+ if (uio && to_write > 0) {
+ iov[0].iov_base = &hvs_pkt;
+ iov[0].iov_len = hvs_pkthlen;
+ iov[1].iov_base = NULL;
+ iov[1].iov_len = to_write;
+ iov[2].iov_base = &pad;
+ iov[2].iov_len = pad_pktlen - hvs_pktlen;
+
+ error = vmbus_chan_iov_send(chan, iov, 3,
+ hvsock_br_callback, &cbarg);
+ } else {
+ if (to_write == 0) {
+ iov[0].iov_base = &hvs_pkt;
+ iov[0].iov_len = hvs_pkthlen;
+ iov[1].iov_base = &pad;
+ iov[1].iov_len = pad_pktlen - hvs_pktlen;
+ error = vmbus_chan_iov_send(chan, iov, 2, NULL, NULL);
+ }
+ }
+
+ if (error) {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: error = %d\n", __func__, error);
+ }
+
+ return (error);
+}
+
+/*
+ * Check if we have data on current ring buffer to read
+ * or not. If not, advance the ring buffer read index to
+ * next packet. Update the recev_data_len and recev_data_off
+ * to new value.
+ * Return the number of bytes can read.
+ */
+static uint32_t
+hvsock_canread_check(struct hvs_pcb *pcb)
+{
+ uint32_t advance;
+ uint32_t tlen, hlen, dlen;
+ uint32_t bytes_canread = 0;
+ int error;
+
+ if (pcb == NULL || pcb->chan == NULL) {
+ pcb->so->so_error = EIO;
+ return (0);
+ }
+
+ /* Still have data not read yet on current packet */
+ if (pcb->recv_data_len > 0)
+ return (pcb->recv_data_len);
+
+ if (pcb->rb_init)
+ advance =
+ VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen);
+ else
+ advance = 0;
+
+ bytes_canread = vmbus_chan_read_available(pcb->chan);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: bytes_canread on br = %u, advance = %u\n",
+ __func__, bytes_canread, advance);
+
+ if (pcb->rb_init && bytes_canread == (advance + sizeof(uint64_t))) {
+ /*
+ * Nothing to read. Need to advance the rindex before
+ * calling sbwait, so host knows to wake us up when data
+ * is available to read on rb.
+ */
+ error = vmbus_chan_recv_idxadv(pcb->chan, advance);
+ if (error) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: after calling vmbus_chan_recv_idxadv, "
+ "got error = %d\n", __func__, error);
+ return (0);
+ } else {
+ pcb->rb_init = false;
+ pcb->recv_data_len = 0;
+ pcb->recv_data_off = 0;
+ bytes_canread = vmbus_chan_read_available(pcb->chan);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: advanced %u bytes, "
+ " bytes_canread on br now = %u\n",
+ __func__, advance, bytes_canread);
+
+ if (bytes_canread == 0)
+ return (0);
+ else
+ advance = 0;
+ }
+ }
+
+ if (bytes_canread <
+ advance + (sizeof(struct hvs_pkt_header) + sizeof(uint64_t)))
+ return (0);
+
+ error = vmbus_chan_recv_peek(pcb->chan, &pcb->hvs_pkt,
+ sizeof(struct hvs_pkt_header), advance);
+
+ /* Don't have anything to read */
+ if (error) {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: after calling vmbus_chan_recv_peek, got error = %d\n",
+ __func__, error);
+ return (0);
+ }
+
+ /*
+ * We just read in a new packet header. Do some sanity checks.
+ */
+ tlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_tlen);
+ hlen = VMBUS_CHANPKT_GETLEN(pcb->hvs_pkt.chan_pkt_hdr.cph_hlen);
+ dlen = pcb->hvs_pkt.vmpipe_pkt_hdr.vmpipe_data_size;
+ if (__predict_false(hlen < sizeof(struct vmbus_chanpkt_hdr)) ||
+ __predict_false(hlen > tlen) ||
+ __predict_false(tlen < dlen + sizeof(struct hvs_pkt_header))) {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "invalid tlen(%u), hlen(%u) or dlen(%u)\n",
+ tlen, hlen, dlen);
+ pcb->so->so_error = EIO;
+ return (0);
+ }
+ if (pcb->rb_init == false)
+ pcb->rb_init = true;
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "Got new pkt tlen(%u), hlen(%u) or dlen(%u)\n",
+ tlen, hlen, dlen);
+
+ /* The other side has sent a close FIN */
+ if (dlen == 0) {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: Received FIN from other side\n", __func__);
+ /* inform the caller by seting so_error to ESHUTDOWN */
+ pcb->so->so_error = ESHUTDOWN;
+ }
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: canread on receive ring is %u \n", __func__, dlen);
+
+ pcb->recv_data_len = dlen;
+ pcb->recv_data_off = 0;
+
+ return (pcb->recv_data_len);
+}
+
+static uint32_t
+hvsock_canwrite_check(struct hvs_pcb *pcb)
+{
+ uint32_t writeable;
+ uint32_t ret;
+
+ if (pcb == NULL || pcb->chan == NULL)
+ return (0);
+
+ writeable = vmbus_chan_write_available(pcb->chan);
+
+ /*
+ * We must always reserve a 0-length-payload packet for the FIN.
+ */
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: writeable is %u, should be greater than %lu\n",
+ __func__, writeable, HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0));
+
+ if (writeable < HVSOCK_PKT_LEN(1) + HVSOCK_PKT_LEN(0)) {
+ /*
+ * The Tx ring seems full.
+ */
+ return (0);
+ }
+
+ ret = writeable - HVSOCK_PKT_LEN(0) - HVSOCK_PKT_LEN(0);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: available size is %u\n", __func__, rounddown2(ret, 8));
+
+ return (rounddown2(ret, 8));
+}
+
+static void
+hvsock_set_chan_pending_send_size(struct vmbus_channel *chan)
+{
+ vmbus_chan_set_pending_send_size(chan,
+ HVSOCK_PKT_LEN(HVSOCK_SEND_BUF_SZ));
+}
+
+static int
+hvsock_open_channel(struct vmbus_channel *chan, struct socket *so)
+{
+ unsigned int rcvbuf, sndbuf;
+ struct hvs_pcb *pcb = so2hvspcb(so);
+ int ret;
+
+ if (vmbus_current_version < VMBUS_VERSION_WIN10_V5) {
+ sndbuf = HVS_RINGBUF_SND_SIZE;
+ rcvbuf = HVS_RINGBUF_RCV_SIZE;
+ } else {
+ sndbuf = MAX(so->so_snd.sb_hiwat, HVS_RINGBUF_SND_SIZE);
+ sndbuf = MIN(sndbuf, HVS_RINGBUF_MAX_SIZE);
+ sndbuf = rounddown2(sndbuf, PAGE_SIZE);
+ rcvbuf = MAX(so->so_rcv.sb_hiwat, HVS_RINGBUF_RCV_SIZE);
+ rcvbuf = MIN(rcvbuf, HVS_RINGBUF_MAX_SIZE);
+ rcvbuf = rounddown2(rcvbuf, PAGE_SIZE);
+ }
+
+ /*
+ * Can only read whatever user provided size of data
+ * from ring buffer. Turn off batched reading.
+ */
+ vmbus_chan_set_readbatch(chan, false);
+
+ ret = vmbus_chan_open(chan, sndbuf, rcvbuf, NULL, 0,
+ hvsock_chan_cb, pcb);
+
+ if (ret != 0) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: failed to open hvsock channel, sndbuf = %u, "
+ "rcvbuf = %u\n", __func__, sndbuf, rcvbuf);
+ } else {
+ HVSOCK_DBG(HVSOCK_DBG_INFO,
+ "%s: hvsock channel opened, sndbuf = %u, i"
+ "rcvbuf = %u\n", __func__, sndbuf, rcvbuf);
+ /*
+ * Se the pending send size so to receive wakeup
+ * signals from host when there is enough space on
+ * rx buffer ring to write.
+ */
+ hvsock_set_chan_pending_send_size(chan);
+ }
+
+ return ret;
+}
+
+/*
+ * Guest is listening passively on the socket. Open channel and
+ * create a new socket for the conneciton.
+ */
+static void
+hvsock_open_conn_passive(struct vmbus_channel *chan, struct socket *so,
+ struct hvsock_sc *sc)
+{
+ struct socket *new_so;
+ struct hvs_pcb *new_pcb, *pcb;
+ int error;
+
+ /* Do nothing if socket is not listening */
+ if ((so->so_options & SO_ACCEPTCONN) == 0) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: socket is not a listening one\n", __func__);
+ return;
+ }
+
+ /*
+ * Create a new socket. This will call pru_attach to complete
+ * the socket initialization and put the new socket onto
+ * listening socket's sol_incomp list, waiting to be promoted
+ * to sol_comp list.
+ * The new socket created has ref count 0. There is no other
+ * thread that changes the state of this new one at the
+ * moment, so we don't need to hold its lock while opening
+ * channel and filling out its pcb information.
+ */
+ new_so = sonewconn(so, 0);
+ if (!new_so)
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: creating new socket failed\n", __func__);
+
+ /*
+ * Now open the vmbus channel. If it fails, the socket will be
+ * on the listening socket's sol_incomp queue until it is
+ * replaced and aborted.
+ */
+ error = hvsock_open_channel(chan, new_so);
+ if (error) {
+ new_so->so_error = error;
+ return;
+ }
+
+ pcb = so->so_pcb;
+ new_pcb = new_so->so_pcb;
+
+ hvs_addr_set(&(new_pcb->local_addr), pcb->local_addr.hvs_port);
+ /* Remote port is unknown to guest in this type of conneciton */
+ hvs_addr_set(&(new_pcb->remote_addr), HVADDR_PORT_UNKNOWN);
+ new_pcb->chan = chan;
+ new_pcb->recv_data_len = 0;
+ new_pcb->recv_data_off = 0;
+ new_pcb->rb_init = false;
+
+ new_pcb->vm_srv_id = *vmbus_chan_guid_type(chan);
+ new_pcb->host_srv_id = *vmbus_chan_guid_inst(chan);
+
+ hvs_insert_socket_on_list(new_so, HVS_LIST_CONNECTED);
+
+ sc->pcb = new_pcb;
+
+ /*
+ * Change the socket state to SS_ISCONNECTED. This will promote
+ * the socket to sol_comp queue and wake up the thread which
+ * is accepting connection.
+ */
+ soisconnected(new_so);
+}
+
+
+/*
+ * Guest is actively connecting to host.
+ */
+static void
+hvsock_open_conn_active(struct vmbus_channel *chan, struct socket *so)
+{
+ struct hvs_pcb *pcb;
+ int error;
+
+ error = hvsock_open_channel(chan, so);
+ if (error) {
+ so->so_error = error;
+ return;
+ }
+
+ pcb = so->so_pcb;
+ pcb->chan = chan;
+ pcb->recv_data_len = 0;
+ pcb->recv_data_off = 0;
+ pcb->rb_init = false;
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ __hvs_remove_socket_from_list(so, HVS_LIST_BOUND);
+ __hvs_insert_socket_on_list(so, HVS_LIST_CONNECTED);
+ mtx_unlock(&hvs_trans_socks_mtx);
+
+ /*
+ * Change the socket state to SS_ISCONNECTED. This will wake up
+ * the thread sleeping in connect call.
+ */
+ soisconnected(so);
+}
+
+static void
+hvsock_open_connection(struct vmbus_channel *chan, struct hvsock_sc *sc)
+{
+ struct hyperv_guid *inst_guid, *type_guid;
+ bool conn_from_host;
+ struct sockaddr_hvs addr;
+ struct socket *so;
+ struct hvs_pcb *pcb;
+
+ type_guid = (struct hyperv_guid *) vmbus_chan_guid_type(chan);
+ inst_guid = (struct hyperv_guid *) vmbus_chan_guid_inst(chan);
+ conn_from_host = vmbus_chan_is_hvs_conn_from_host(chan);
+
+ HVSOCK_DBG(HVSOCK_DBG_INFO, "type_guid is ");
+ hvsock_print_guid(type_guid);
+ HVSOCK_DBG(HVSOCK_DBG_INFO, "inst_guid is ");
+ hvsock_print_guid(inst_guid);
+ HVSOCK_DBG(HVSOCK_DBG_INFO, "connection %s host\n",
+ (conn_from_host == true ) ? "from" : "to");
+
+ /*
+ * The listening port should be in [0, MAX_LISTEN_PORT]
+ */
+ if (!is_valid_srv_id(type_guid))
+ return;
+
+ /*
+ * There should be a bound socket already created no matter
+ * it is a passive or active connection.
+ * For host initiated connection (passive on guest side),
+ * the type_guid contains the port which guest is bound and
+ * listening.
+ * For the guest initiated connection (active on guest side),
+ * the inst_guid contains the port that guest has auto bound
+ * to.
+ */
+ hvs_addr_init(&addr, conn_from_host ? type_guid : inst_guid);
+ so = hvs_find_socket_on_list(&addr, HVS_LIST_BOUND);
+ if (!so) {
+ HVSOCK_DBG(HVSOCK_DBG_ERR,
+ "%s: no bound socket found for port %u\n",
+ __func__, addr.hvs_port);
+ return;
+ }
+
+ if (conn_from_host) {
+ hvsock_open_conn_passive(chan, so, sc);
+ } else {
+ (void) hvs_trans_lock();
+ pcb = so->so_pcb;
+ if (pcb && pcb->so) {
+ sc->pcb = so2hvspcb(so);
+ hvsock_open_conn_active(chan, so);
+ } else {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "%s: channel detached before open\n", __func__);
+ }
+ hvs_trans_unlock();
+ }
+
+}
+
+static int
+hvsock_probe(device_t dev)
+{
+ struct vmbus_channel *channel = vmbus_get_channel(dev);
+
+ if (!channel || !vmbus_chan_is_hvs(channel)) {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "hvsock_probe called but not a hvsock channel id %u\n",
+ vmbus_chan_id(channel));
+
+ return ENXIO;
+ } else {
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "hvsock_probe got a hvsock channel id %u\n",
+ vmbus_chan_id(channel));
+
+ return BUS_PROBE_DEFAULT;
+ }
+}
+
+static int
+hvsock_attach(device_t dev)
+{
+ struct vmbus_channel *channel = vmbus_get_channel(dev);
+ struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev);
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_attach called.\n");
+
+ hvsock_open_connection(channel, sc);
+
+ /*
+ * Always return success. On error the host will rescind the device
+ * in 30 seconds and we can do cleanup at that time in
+ * vmbus_chan_msgproc_chrescind().
+ */
+ return (0);
+}
+
+static int
+hvsock_detach(device_t dev)
+{
+ struct hvsock_sc *sc = (struct hvsock_sc *)device_get_softc(dev);
+ struct socket *so;
+ int error, retry;
+
+ if (bootverbose)
+ device_printf(dev, "hvsock_detach called.\n");
+
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "hvsock_detach called.\n");
+
+ if (sc->pcb != NULL) {
+ (void) hvs_trans_lock();
+
+ so = hsvpcb2so(sc->pcb);
+ if (so) {
+ /* Close the connection */
+ if (so->so_state &
+ (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
+ soisdisconnected(so);
+ }
+
+ mtx_lock(&hvs_trans_socks_mtx);
+ __hvs_remove_pcb_from_list(sc->pcb,
+ HVS_LIST_BOUND | HVS_LIST_CONNECTED);
+ mtx_unlock(&hvs_trans_socks_mtx);
+
+ /*
+ * Close channel while no reader and sender are working
+ * on the buffer rings.
+ */
+ if (so) {
+ retry = 0;
+ while ((error = sblock(&so->so_rcv, 0)) ==
+ EWOULDBLOCK) {
+ /*
+ * Someone is reading, rx br is busy
+ */
+ soisdisconnected(so);
+ DELAY(500);
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "waiting for rx reader to exit, "
+ "retry = %d\n", retry++);
+ }
+ retry = 0;
+ while ((error = sblock(&so->so_snd, 0)) ==
+ EWOULDBLOCK) {
+ /*
+ * Someone is sending, tx br is busy
+ */
+ soisdisconnected(so);
+ DELAY(500);
+ HVSOCK_DBG(HVSOCK_DBG_VERBOSE,
+ "waiting for tx sender to exit, "
+ "retry = %d\n", retry++);
+ }
+ }
+
+
+ bzero(sc->pcb, sizeof(struct hvs_pcb));
+ free(sc->pcb, M_HVSOCK);
+ sc->pcb = NULL;
+
+ if (so) {
+ sbunlock(&so->so_rcv);
+ sbunlock(&so->so_snd);
+ so->so_pcb = NULL;
+ }
+
+ hvs_trans_unlock();
+ }
+
+ vmbus_chan_close(vmbus_get_channel(dev));
+
+ return (0);
+}
+
+static device_method_t hvsock_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, hvsock_probe),
+ DEVMETHOD(device_attach, hvsock_attach),
+ DEVMETHOD(device_detach, hvsock_detach),
+ DEVMETHOD_END
+};
+
+static driver_t hvsock_driver = {
+ "hv_sock",
+ hvsock_methods,
+ sizeof(struct hvsock_sc)
+};
+
+static devclass_t hvsock_devclass;
+
+DRIVER_MODULE(hvsock, vmbus, hvsock_driver, hvsock_devclass, NULL, NULL);
+MODULE_VERSION(hvsock, 1);
+MODULE_DEPEND(hvsock, vmbus, 1, 1, 1);
Index: head/sys/dev/hyperv/include/vmbus.h
===================================================================
--- head/sys/dev/hyperv/include/vmbus.h
+++ head/sys/dev/hyperv/include/vmbus.h
@@ -31,6 +31,7 @@
#include <sys/param.h>
#include <sys/bus.h>
+#include <sys/_iovec.h>
/*
* VMBUS version is 32 bit, upper 16 bit for major_number and lower
@@ -130,6 +131,7 @@
struct taskqueue;
typedef void (*vmbus_chan_callback_t)(struct vmbus_channel *, void *);
+typedef int (*vmbus_br_copy_callback_t)(void *, int, void *);
static __inline struct vmbus_channel *
vmbus_get_channel(device_t dev)
@@ -205,6 +207,14 @@
int vmbus_chan_recv_pkt(struct vmbus_channel *chan,
struct vmbus_chanpkt_hdr *pkt, int *pktlen);
+int vmbus_chan_recv_idxadv(struct vmbus_channel *chan,
+ uint32_t advance);
+int vmbus_chan_recv_peek(struct vmbus_channel *chan,
+ void *data, int data_len, uint32_t advance);
+int vmbus_chan_recv_peek_call(struct vmbus_channel *chan,
+ int data_len, uint32_t skip,
+ vmbus_br_copy_callback_t cb, void *cbarg);
+
int vmbus_chan_send(struct vmbus_channel *chan, uint16_t type,
uint16_t flags, void *data, int dlen, uint64_t xactid);
int vmbus_chan_send_sglist(struct vmbus_channel *chan,
@@ -213,13 +223,30 @@
int vmbus_chan_send_prplist(struct vmbus_channel *chan,
struct vmbus_gpa_range *prp, int prp_cnt, void *data,
int dlen, uint64_t xactid);
+int vmbus_chan_iov_send(struct vmbus_channel *chan,
+ const struct iovec iov[], int iovlen,
+ vmbus_br_copy_callback_t cb, void *cbarg);
+uint32_t vmbus_chan_write_available(struct vmbus_channel *chan);
+uint32_t vmbus_chan_read_available(struct vmbus_channel *chan);
+bool vmbus_chan_write_signal(struct vmbus_channel *chan,
+ int32_t min_signal_size);
+void vmbus_chan_set_pending_send_size(struct vmbus_channel *chan,
+ uint32_t size);
uint32_t vmbus_chan_id(const struct vmbus_channel *chan);
uint32_t vmbus_chan_subidx(const struct vmbus_channel *chan);
bool vmbus_chan_is_primary(const struct vmbus_channel *chan);
bool vmbus_chan_is_revoked(const struct vmbus_channel *chan);
-const struct hyperv_guid *
- vmbus_chan_guid_inst(const struct vmbus_channel *chan);
+bool vmbus_chan_is_hvs(const struct vmbus_channel *chan);
+bool vmbus_chan_is_hvs_conn_from_host(
+ const struct vmbus_channel *chan);
+int vmbus_req_tl_connect(struct hyperv_guid *,
+ struct hyperv_guid *);
+
+struct hyperv_guid *
+ vmbus_chan_guid_type(struct vmbus_channel *chan);
+struct hyperv_guid *
+ vmbus_chan_guid_inst(struct vmbus_channel *chan);
int vmbus_chan_prplist_nelem(int br_size, int prpcnt_max,
int dlen_max);
bool vmbus_chan_rx_empty(const struct vmbus_channel *chan);
Index: head/sys/dev/hyperv/vmbus/vmbus.c
===================================================================
--- head/sys/dev/hyperv/vmbus/vmbus.c
+++ head/sys/dev/hyperv/vmbus/vmbus.c
@@ -365,10 +365,46 @@
uint32_t gpadl;
again:
- gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
+ gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
if (gpadl == 0)
goto again;
return (gpadl);
+}
+
+/* Used for Hyper-V socket when guest client connects to host */
+int
+vmbus_req_tl_connect(struct hyperv_guid *guest_srv_id,
+ struct hyperv_guid *host_srv_id)
+{
+ struct vmbus_softc *sc = vmbus_get_softc();
+ struct vmbus_chanmsg_tl_connect *req;
+ struct vmbus_msghc *mh;
+ int error;
+
+ if (!sc)
+ return ENXIO;
+
+ mh = vmbus_msghc_get(sc, sizeof(*req));
+ if (mh == NULL) {
+ device_printf(sc->vmbus_dev,
+ "can not get msg hypercall for tl connect\n");
+ return ENXIO;
+ }
+
+ req = vmbus_msghc_dataptr(mh);
+ req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_TL_CONN;
+ req->guest_endpoint_id = *guest_srv_id;
+ req->host_service_id = *host_srv_id;
+
+ error = vmbus_msghc_exec_noresult(mh);
+ vmbus_msghc_put(sc, mh);
+
+ if (error) {
+ device_printf(sc->vmbus_dev,
+ "tl connect msg hypercall failed\n");
+ }
+
+ return error;
}
static int
Index: head/sys/dev/hyperv/vmbus/vmbus_br.c
===================================================================
--- head/sys/dev/hyperv/vmbus/vmbus_br.c
+++ head/sys/dev/hyperv/vmbus/vmbus_br.c
@@ -52,18 +52,23 @@
vmbus_br_sysctl_state(SYSCTL_HANDLER_ARGS)
{
const struct vmbus_br *br = arg1;
- uint32_t rindex, windex, imask, ravail, wavail;
+ uint32_t rindex, windex, imask, psndsz, fvalue, ravail, wavail;
+ uint64_t intrcnt;
char state[256];
+ intrcnt = br->vbr_intrcnt;
rindex = br->vbr_rindex;
windex = br->vbr_windex;
imask = br->vbr_imask;
+ psndsz = br->vbr_psndsz;
+ fvalue = br->vbr_fvalue;
wavail = VMBUS_BR_WAVAIL(rindex, windex, br->vbr_dsize);
ravail = br->vbr_dsize - wavail;
snprintf(state, sizeof(state),
- "rindex:%u windex:%u imask:%u ravail:%u wavail:%u",
- rindex, windex, imask, ravail, wavail);
+ "intrcnt:%lu rindex:%u windex:%u imask:%u psndsz:%u fvalue:%u "
+ "ravail:%u wavail:%u",
+ intrcnt, rindex, windex, imask, psndsz, fvalue, ravail, wavail);
return sysctl_handle_string(oidp, state, sizeof(state), req);
}
@@ -76,9 +81,11 @@
#define BR_STATE_RIDX 0
#define BR_STATE_WIDX 1
#define BR_STATE_IMSK 2
-#define BR_STATE_RSPC 3
-#define BR_STATE_WSPC 4
-#define BR_STATE_MAX 5
+#define BR_STATE_PSSZ 3
+#define BR_STATE_FVAL 4
+#define BR_STATE_RSPC 5
+#define BR_STATE_WSPC 6
+#define BR_STATE_MAX 7
const struct vmbus_br *br = arg1;
uint32_t rindex, windex, wavail, state[BR_STATE_MAX];
@@ -90,6 +97,8 @@
state[BR_STATE_RIDX] = rindex;
state[BR_STATE_WIDX] = windex;
state[BR_STATE_IMSK] = br->vbr_imask;
+ state[BR_STATE_PSSZ] = br->vbr_psndsz;
+ state[BR_STATE_FVAL] = br->vbr_fvalue;
state[BR_STATE_WSPC] = wavail;
state[BR_STATE_RSPC] = br->vbr_dsize - wavail;
@@ -140,6 +149,12 @@
}
uint32_t
+vmbus_rxbr_available(const struct vmbus_rxbr *rbr)
+{
+ return (vmbus_rxbr_avail(rbr));
+}
+
+uint32_t
vmbus_rxbr_intr_unmask(struct vmbus_rxbr *rbr)
{
rbr->rxbr_imask = 0;
@@ -178,6 +193,40 @@
vmbus_br_setup(&rbr->rxbr, buf, blen);
}
+static __inline boolean_t
+vmbus_rxbr_need_signal(const struct vmbus_rxbr *rbr, uint32_t bytes_read)
+{
+ uint32_t pending_snd_sz, canwrite_size;
+
+ /* No need to signal if host doesn't want us to */
+ if (!rbr->rxbr_fpsndsz)
+ return false;
+
+ mb();
+
+ pending_snd_sz = rbr->rxbr_psndsz;
+ /* No need to signal if host sets pending_snd_sz to 0 */
+ if (!pending_snd_sz)
+ return false;
+
+ mb();
+
+ canwrite_size = rbr->rxbr_dsize - vmbus_rxbr_avail(rbr);
+
+ /* No need to signal if br already has enough space before read */
+ if (canwrite_size - bytes_read > pending_snd_sz)
+ return false;
+
+ /*
+ * No need to signal if still doesn't have enough space
+ * asked by host
+ */
+ if (canwrite_size <= pending_snd_sz)
+ return false;
+
+ return true;
+}
+
void
vmbus_txbr_init(struct vmbus_txbr *tbr)
{
@@ -194,8 +243,25 @@
vmbus_txbr_setup(struct vmbus_txbr *tbr, void *buf, int blen)
{
vmbus_br_setup(&tbr->txbr, buf, blen);
+
+ /* Set feature bit enabling flow control */
+ tbr->txbr_fpsndsz = 1;
}
+uint32_t
+vmbus_txbr_get_imask(const struct vmbus_txbr *tbr)
+{
+ mb();
+
+ return(tbr->txbr_imask);
+}
+
+void
+vmbus_txbr_set_pending_snd_sz(struct vmbus_txbr *tbr, uint32_t size)
+{
+ tbr->txbr_psndsz = size;
+}
+
/*
* When we write to the ring buffer, check if the host needs to be
* signaled.
@@ -260,7 +326,117 @@
return VMBUS_BR_IDXINC(windex, cplen, br_dsize);
}
+static __inline uint32_t
+vmbus_txbr_copyto_call(const struct vmbus_txbr *tbr, uint32_t windex,
+ uint32_t cplen, vmbus_br_copy_callback_t cb, void *cbarg, int *ret)
+{
+ uint8_t *br_data = tbr->txbr_data;
+ uint32_t br_dsize = tbr->txbr_dsize;
+ int err = 0;
+
+ if (cplen > br_dsize - windex) {
+ uint32_t fraglen = br_dsize - windex;
+
+ /* Wrap-around detected */
+ err = cb((void *)(br_data + windex), fraglen, cbarg);
+ if (!err)
+ err = cb((void *)br_data, cplen - fraglen, cbarg);
+ } else {
+ err = cb((void *)(br_data + windex), cplen, cbarg);
+ }
+
+ *ret = err;
+
+ return VMBUS_BR_IDXINC(windex, cplen, br_dsize);
+}
+
+uint32_t
+vmbus_txbr_available(const struct vmbus_txbr *tbr)
+{
+ return (vmbus_txbr_avail(tbr));
+}
+
/*
+ * NOTE:
+ * Not holding lock when calling user provided callback routine.
+ * Caller should hold lock to serialize ring buffer accesses.
+ */
+int
+vmbus_txbr_write_call(struct vmbus_txbr *tbr,
+ const struct iovec iov[], int iovlen,
+ vmbus_br_copy_callback_t cb, void *cbarg,
+ boolean_t *need_sig)
+{
+ uint32_t old_windex, windex, total;
+ uint64_t save_windex;
+ int i;
+ int cb_ret = 0;
+
+ total = 0;
+ for (i = 0; i < iovlen; i++)
+ total += iov[i].iov_len;
+ total += sizeof(save_windex);
+
+
+ /*
+ * NOTE:
+ * If this write is going to make br_windex same as br_rindex,
+ * i.e. the available space for write is same as the write size,
+ * we can't do it then, since br_windex == br_rindex means that
+ * the bufring is empty.
+ */
+ if (vmbus_txbr_avail(tbr) <= total) {
+ return (EAGAIN);
+ }
+
+ /* Save br_windex for later use */
+ old_windex = tbr->txbr_windex;
+
+ /*
+ * Copy the scattered channel packet to the TX bufring.
+ */
+ windex = old_windex;
+ for (i = 0; i < iovlen; i++) {
+ if (iov[i].iov_base != NULL) {
+ windex = vmbus_txbr_copyto(tbr, windex,
+ iov[i].iov_base, iov[i].iov_len);
+ } else if (cb != NULL) {
+ windex = vmbus_txbr_copyto_call(tbr, windex,
+ iov[i].iov_len, cb, cbarg, &cb_ret);
+ /*
+ * If callback fails, return without updating
+ * write index.
+ */
+ if (cb_ret)
+ return (cb_ret);
+ }
+ }
+
+ mtx_lock_spin(&tbr->txbr_lock);
+
+ /*
+ * Set the offset of the current channel packet.
+ */
+ save_windex = ((uint64_t)old_windex) << 32;
+ windex = vmbus_txbr_copyto(tbr, windex, &save_windex,
+ sizeof(save_windex));
+
+ /*
+ * Update the write index _after_ the channel packet
+ * is copied.
+ */
+ __compiler_membar();
+ tbr->txbr_windex = windex;
+
+ mtx_unlock_spin(&tbr->txbr_lock);
+
+ if (need_sig)
+ *need_sig = vmbus_txbr_need_signal(tbr, old_windex);
+
+ return (0);
+}
+
+/*
* Write scattered channel packet to TX bufring.
*
* The offset of this channel packet is written as a 64bits value
@@ -346,6 +522,27 @@
return VMBUS_BR_IDXINC(rindex, cplen, br_dsize);
}
+static __inline uint32_t
+vmbus_rxbr_copyfrom_call(const struct vmbus_rxbr *rbr, uint32_t rindex,
+ int cplen, vmbus_br_copy_callback_t cb, void *cbarg)
+{
+ uint8_t *br_data = rbr->rxbr_data;
+ uint32_t br_dsize = rbr->rxbr_dsize;
+ int error = 0;
+
+ if (cplen > br_dsize - rindex) {
+ uint32_t fraglen = br_dsize - rindex;
+
+ /* Wrap-around detected. */
+ error = cb((void *)(br_data + rindex), fraglen, cbarg);
+ if (!error)
+ error = cb((void *)br_data, cplen - fraglen, cbarg);
+ } else {
+ error = cb((void *)(br_data + rindex), cplen, cbarg);
+ }
+ return (error);
+}
+
int
vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen)
{
@@ -362,6 +559,121 @@
vmbus_rxbr_copyfrom(rbr, rbr->rxbr_rindex, data, dlen);
mtx_unlock_spin(&rbr->rxbr_lock);
+
+ return (0);
+}
+
+/*
+ * NOTE:
+ * We only hold spin lock to check the ring buffer space. It is
+ * released before calling user provided callback routine.
+ * Caller should hold lock to serialize ring buffer accesses.
+ */
+int
+vmbus_rxbr_peek_call(struct vmbus_rxbr *rbr, int dlen, uint32_t skip,
+ vmbus_br_copy_callback_t cb, void *cbarg)
+{
+ uint32_t rindex, br_dsize0 = rbr->rxbr_dsize;
+ int ret;
+
+ mtx_lock_spin(&rbr->rxbr_lock);
+ /*
+ * The requested data + skip and the 64bits channel packet
+ * offset should be there at least.
+ */
+ if (vmbus_rxbr_avail(rbr) < skip + dlen + sizeof(uint64_t)) {
+ mtx_unlock_spin(&rbr->rxbr_lock);
+ return (EAGAIN);
+ }
+
+ rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex, skip, br_dsize0);
+ mtx_unlock_spin(&rbr->rxbr_lock);
+
+ ret = vmbus_rxbr_copyfrom_call(rbr, rindex, dlen, cb, cbarg);
+
+ return (ret);
+}
+
+/*
+ * NOTE:
+ * We assume idx_adv == sizeof(channel packet).
+ */
+int
+vmbus_rxbr_idxadv_peek(struct vmbus_rxbr *rbr, void *data, int dlen,
+ uint32_t idx_adv, boolean_t *need_sig)
+{
+ uint32_t rindex, br_dsize = rbr->rxbr_dsize;
+
+ mtx_lock_spin(&rbr->rxbr_lock);
+ /*
+ * Make sure it has enough data to read.
+ */
+ if (vmbus_rxbr_avail(rbr) < idx_adv + sizeof(uint64_t) + dlen) {
+ mtx_unlock_spin(&rbr->rxbr_lock);
+ return (EAGAIN);
+ }
+
+ if (idx_adv > 0) {
+ /*
+ * Advance the read index first, including the channel's 64bit
+ * previous write offset.
+ */
+ rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex,
+ idx_adv + sizeof(uint64_t), br_dsize);
+ __compiler_membar();
+ rbr->rxbr_rindex = rindex;
+ }
+
+ vmbus_rxbr_copyfrom(rbr, rbr->rxbr_rindex, data, dlen);
+
+ mtx_unlock_spin(&rbr->rxbr_lock);
+
+ if (need_sig) {
+ if (idx_adv > 0)
+ *need_sig =
+ vmbus_rxbr_need_signal(rbr, idx_adv +
+ sizeof(uint64_t));
+ else
+ *need_sig = false;
+ }
+
+ return (0);
+}
+
+/*
+ * NOTE:
+ * Just update the RX rb index.
+ */
+int
+vmbus_rxbr_idxadv(struct vmbus_rxbr *rbr, uint32_t idx_adv,
+ boolean_t *need_sig)
+{
+ uint32_t rindex, br_dsize = rbr->rxbr_dsize;
+
+ mtx_lock_spin(&rbr->rxbr_lock);
+ /*
+ * Make sure it has enough space to advance.
+ */
+ if (vmbus_rxbr_avail(rbr) < idx_adv + sizeof(uint64_t)) {
+ mtx_unlock_spin(&rbr->rxbr_lock);
+ return (EAGAIN);
+ }
+
+ /*
+ * Advance the read index, including the channel's 64bit
+ * previous write offset.
+ */
+ rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex,
+ idx_adv + sizeof(uint64_t), br_dsize);
+ __compiler_membar();
+ rbr->rxbr_rindex = rindex;
+
+ mtx_unlock_spin(&rbr->rxbr_lock);
+
+ if (need_sig) {
+ *need_sig =
+ vmbus_rxbr_need_signal(rbr, idx_adv + sizeof(uint64_t));
+ }
return (0);
}
Index: head/sys/dev/hyperv/vmbus/vmbus_brvar.h
===================================================================
--- head/sys/dev/hyperv/vmbus/vmbus_brvar.h
+++ head/sys/dev/hyperv/vmbus/vmbus_brvar.h
@@ -44,6 +44,10 @@
#define vbr_windex vbr->br_windex
#define vbr_rindex vbr->br_rindex
#define vbr_imask vbr->br_imask
+#define vbr_psndsz vbr->br_pending_snd_sz
+#define vbr_fpsndsz vbr->br_feature_bits.feat_pending_snd_sz
+#define vbr_fvalue vbr->br_feature_bits.value
+#define vbr_intrcnt vbr->br_g2h_intr_cnt
#define vbr_data vbr->br_data
struct vmbus_rxbr {
@@ -54,6 +58,10 @@
#define rxbr_windex rxbr.vbr_windex
#define rxbr_rindex rxbr.vbr_rindex
#define rxbr_imask rxbr.vbr_imask
+#define rxbr_psndsz rxbr.vbr_psndsz
+#define rxbr_fpsndsz rxbr.vbr_fpsndsz
+#define rxbr_fvalue rxbr.vbr_fvalue
+#define rxbr_intrcnt rxbr.vbr_intrcnt
#define rxbr_data rxbr.vbr_data
#define rxbr_dsize rxbr.vbr_dsize
@@ -65,6 +73,10 @@
#define txbr_windex txbr.vbr_windex
#define txbr_rindex txbr.vbr_rindex
#define txbr_imask txbr.vbr_imask
+#define txbr_psndsz txbr.vbr_psndsz
+#define txbr_fpsndsz txbr.vbr_fpsndsz
+#define txbr_fvalue txbr.vbr_fvalue
+#define txbr_intrcnt txbr.vbr_intrcnt
#define txbr_data txbr.vbr_data
#define txbr_dsize txbr.vbr_dsize
@@ -118,13 +130,28 @@
int vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen);
int vmbus_rxbr_read(struct vmbus_rxbr *rbr, void *data, int dlen,
uint32_t skip);
+int vmbus_rxbr_idxadv(struct vmbus_rxbr *rbr, uint32_t idx_adv,
+ boolean_t *need_sig);
+int vmbus_rxbr_idxadv_peek(struct vmbus_rxbr *rbr, void *data,
+ int dlen, uint32_t idx_adv, boolean_t *need_sig);
+int vmbus_rxbr_peek_call(struct vmbus_rxbr *rbr, int dlen,
+ uint32_t skip, vmbus_br_copy_callback_t cb, void *cbarg);
void vmbus_rxbr_intr_mask(struct vmbus_rxbr *rbr);
uint32_t vmbus_rxbr_intr_unmask(struct vmbus_rxbr *rbr);
+uint32_t vmbus_rxbr_available(const struct vmbus_rxbr *rbr);
void vmbus_txbr_init(struct vmbus_txbr *tbr);
void vmbus_txbr_deinit(struct vmbus_txbr *tbr);
void vmbus_txbr_setup(struct vmbus_txbr *tbr, void *buf, int blen);
int vmbus_txbr_write(struct vmbus_txbr *tbr,
const struct iovec iov[], int iovlen, boolean_t *need_sig);
+int vmbus_txbr_write_call(struct vmbus_txbr *tbr,
+ const struct iovec iov[], int iovlen,
+ vmbus_br_copy_callback_t cb, void *cbarg,
+ boolean_t *need_sig);
+uint32_t vmbus_txbr_available(const struct vmbus_txbr *tbr);
+uint32_t vmbus_txbr_get_imask(const struct vmbus_txbr *tbr);
+void vmbus_txbr_set_pending_snd_sz(struct vmbus_txbr *tbr,
+ uint32_t size);
#endif /* _VMBUS_BRVAR_H_ */
Index: head/sys/dev/hyperv/vmbus/vmbus_chan.c
===================================================================
--- head/sys/dev/hyperv/vmbus/vmbus_chan.c
+++ head/sys/dev/hyperv/vmbus/vmbus_chan.c
@@ -127,10 +127,11 @@
};
/*
- * Notify host that there are data pending on our TX bufring.
+ * Notify host that there are data pending on our TX bufring or
+ * we have put some data on the TX bufring.
*/
static __inline void
-vmbus_chan_signal_tx(const struct vmbus_channel *chan)
+vmbus_chan_signal(const struct vmbus_channel *chan)
{
atomic_set_long(chan->ch_evtflag, chan->ch_evtflag_mask);
if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF)
@@ -139,6 +140,22 @@
hypercall_signal_event(chan->ch_monprm_dma.hv_paddr);
}
+static __inline void
+vmbus_chan_signal_tx(struct vmbus_channel *chan)
+{
+ chan->ch_txbr.txbr_intrcnt ++;
+
+ vmbus_chan_signal(chan);
+}
+
+static __inline void
+vmbus_chan_signal_rx(struct vmbus_channel *chan)
+{
+ chan->ch_rxbr.rxbr_intrcnt ++;
+
+ vmbus_chan_signal(chan);
+}
+
static void
vmbus_chan_ins_prilist(struct vmbus_softc *sc, struct vmbus_channel *chan)
{
@@ -1012,7 +1029,60 @@
taskqueue_drain(chan->ch_tq, &chan->ch_task);
}
+uint32_t
+vmbus_chan_write_available(struct vmbus_channel *chan)
+{
+ return (vmbus_txbr_available(&chan->ch_txbr));
+}
+
+bool
+vmbus_chan_write_signal(struct vmbus_channel *chan,
+ int32_t min_signal_size)
+{
+ if (min_signal_size >= 0 &&
+ vmbus_chan_write_available(chan) > min_signal_size) {
+ return false;
+ }
+
+ if (!vmbus_txbr_get_imask(&chan->ch_txbr)) {
+ /* txbr imask is not set, signal the reader */
+ vmbus_chan_signal_tx(chan);
+ return true;
+ }
+
+ return false;
+}
+
+void
+vmbus_chan_set_pending_send_size(struct vmbus_channel *chan,
+ uint32_t size)
+{
+ if (chan)
+ vmbus_txbr_set_pending_snd_sz(&chan->ch_txbr, size);
+}
+
int
+vmbus_chan_iov_send(struct vmbus_channel *chan,
+ const struct iovec iov[], int iovlen,
+ vmbus_br_copy_callback_t cb, void *cbarg)
+{
+ int error;
+ boolean_t send_evt;
+
+ if (iovlen == 0)
+ return (0);
+
+ error = vmbus_txbr_write_call(&chan->ch_txbr, iov, iovlen,
+ cb, cbarg, &send_evt);
+
+ if (!error && send_evt) {
+ vmbus_chan_signal_tx(chan);
+ }
+
+ return error;
+}
+
+int
vmbus_chan_send(struct vmbus_channel *chan, uint16_t type, uint16_t flags,
void *data, int dlen, uint64_t xactid)
{
@@ -1211,6 +1281,78 @@
return (0);
}
+uint32_t
+vmbus_chan_read_available(struct vmbus_channel *chan)
+{
+ return (vmbus_rxbr_available(&chan->ch_rxbr));
+}
+
+/*
+ * This routine does:
+ * - Advance the channel read index for 'advance' bytes
+ * - Copy data_len bytes in to the buffer pointed by 'data'
+ * Return 0 if operation succeed. EAGAIN if operations if failed.
+ * If failed, the buffer pointed by 'data' is intact, and the
+ * channel read index is not advanced at all.
+ */
+int
+vmbus_chan_recv_peek(struct vmbus_channel *chan,
+ void *data, int data_len, uint32_t advance)
+{
+ int error;
+ boolean_t sig_event;
+
+ if (data == NULL || data_len <= 0)
+ return (EINVAL);
+
+ error = vmbus_rxbr_idxadv_peek(&chan->ch_rxbr,
+ data, data_len, advance, &sig_event);
+
+ if (!error && sig_event) {
+ vmbus_chan_signal_rx(chan);
+ }
+
+ return (error);
+}
+
+/*
+ * This routine does:
+ * - Advance the channel read index for 'advance' bytes
+ */
+int
+vmbus_chan_recv_idxadv(struct vmbus_channel *chan, uint32_t advance)
+{
+ int error;
+ boolean_t sig_event;
+
+ if (advance == 0)
+ return (EINVAL);
+
+ error = vmbus_rxbr_idxadv(&chan->ch_rxbr, advance, &sig_event);
+
+ if (!error && sig_event) {
+ vmbus_chan_signal_rx(chan);
+ }
+
+ return (error);
+}
+
+
+/*
+ * Caller should hold its own lock to serialize the ring buffer
+ * copy.
+ */
+int
+vmbus_chan_recv_peek_call(struct vmbus_channel *chan, int data_len,
+ uint32_t skip, vmbus_br_copy_callback_t cb, void *cbarg)
+{
+ if (!chan || data_len <= 0 || cb == NULL)
+ return (EINVAL);
+
+ return (vmbus_rxbr_peek_call(&chan->ch_rxbr, data_len, skip,
+ cb, cbarg));
+}
+
static void
vmbus_chan_task(void *xchan, int pending __unused)
{
@@ -1732,6 +1874,25 @@
1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
}
+ if (offer->chm_chflags & VMBUS_CHAN_TLNPI_PROVIDER_OFFER) {
+ /* This is HyperV socket channel */
+ chan->ch_is_hvs = true;
+ /* The first byte != 0 means the host initiated connection. */
+ chan->ch_hvs_conn_from_host =
+ offer->chm_udata.pipe.user_def[0];
+
+ if (bootverbose) {
+ device_printf(sc->vmbus_dev,
+ "chan%u is hyperv socket channel "
+ "connected %s host\n",
+ chan->ch_id,
+ (chan->ch_hvs_conn_from_host != 0) ?
+ "from" : "to");
+ }
+ } else {
+ chan->ch_is_hvs = false;
+ }
+
/*
* Setup event flag.
*/
@@ -2047,8 +2208,31 @@
return false;
}
-const struct hyperv_guid *
-vmbus_chan_guid_inst(const struct vmbus_channel *chan)
+bool
+vmbus_chan_is_hvs(const struct vmbus_channel *chan)
+{
+ return chan->ch_is_hvs;
+}
+
+bool
+vmbus_chan_is_hvs_conn_from_host(const struct vmbus_channel *chan)
+{
+ KASSERT(vmbus_chan_is_hvs(chan) == true,
+ ("Not a HyperV Socket channel %u", chan->ch_id));
+ if (chan->ch_hvs_conn_from_host != 0)
+ return true;
+ else
+ return false;
+}
+
+struct hyperv_guid *
+vmbus_chan_guid_type(struct vmbus_channel *chan)
+{
+ return &chan->ch_guid_type;
+}
+
+struct hyperv_guid *
+vmbus_chan_guid_inst(struct vmbus_channel *chan)
{
return &chan->ch_guid_inst;
}
Index: head/sys/dev/hyperv/vmbus/vmbus_chanvar.h
===================================================================
--- head/sys/dev/hyperv/vmbus/vmbus_chanvar.h
+++ head/sys/dev/hyperv/vmbus/vmbus_chanvar.h
@@ -149,6 +149,12 @@
int ch_refs;
+ /*
+ * These are for HyperV socket channel only
+ */
+ bool ch_is_hvs;
+ uint8_t ch_hvs_conn_from_host;
+
struct sysctl_ctx_list ch_sysctl_ctx;
} __aligned(CACHE_LINE_SIZE);
Index: head/sys/dev/hyperv/vmbus/vmbus_reg.h
===================================================================
--- head/sys/dev/hyperv/vmbus/vmbus_reg.h
+++ head/sys/dev/hyperv/vmbus/vmbus_reg.h
@@ -127,7 +127,54 @@
*/
volatile uint32_t br_imask;
- uint8_t br_rsvd[4084];
+ /*
+ * WS2012/Win8 and later versions of Hyper-V implement interrupt
+ * driven flow management. The feature bit feat_pending_snd_sz
+ * is set by the host on the host->guest buffer ring, and by the
+ * guest on the guest->host buffer ring.
+ *
+ * The meaning of the feature bit is a bit complex in that it has
+ * semantics that apply to both buffer rings. If the guest sets
+ * the feature bit in the guest->host buffer ring, the guest is
+ * telling the host that:
+ * 1) It will set the br_pending_snd_sz field in the guest->host buffer
+ * ring when it is waiting for space to become available, and
+ * 2) It will read the pending_send_sz field in the host->guest
+ * ring buffer and interrupt the host when it frees enough space
+ *
+ * Similarly, if the host sets the feature bit in the host->guest
+ * ring buffer, the host is telling the guest that:
+ * 1) It will set the pending_send_sz field in the host->guest ring
+ * buffer when it is waiting for space to become available, and
+ * 2) It will read the pending_send_sz field in the guest->host
+ * ring buffer and interrupt the guest when it frees enough space
+ *
+ * If either the guest or host does not set the feature bit that it
+ * owns, that guest or host must do polling if it encounters a full
+ * ring buffer, and not signal the other end with an interrupt.
+ */
+ volatile uint32_t br_pending_snd_sz;
+ uint32_t br_rsvd1[12];
+ union {
+ struct {
+ uint32_t feat_pending_snd_sz:1;
+ };
+ uint32_t value;
+ } br_feature_bits;
+
+ /* Padding to PAGE_SIZE */
+ uint8_t br_rsvd2[4020];
+
+ /*
+ * Total guest to host interrupt count
+ * - For rx ring, this counts the guest signaling host when this rx
+ * ring changing from full to not full.
+ *
+ * - For tx ring, this counts the guest signaling host when this tx
+ * ring changing from empty to non empty.
+ */
+ uint64_t br_g2h_intr_cnt;
+
uint8_t br_data[];
} __packed;
CTASSERT(sizeof(struct vmbus_bufring) == PAGE_SIZE);
@@ -196,7 +243,14 @@
#define VMBUS_CHANMSG_TYPE_CONNECT 14 /* REQ */
#define VMBUS_CHANMSG_TYPE_CONNECT_RESP 15 /* RESP */
#define VMBUS_CHANMSG_TYPE_DISCONNECT 16 /* REQ */
-#define VMBUS_CHANMSG_TYPE_MAX 22
+#define VMBUS_CHANMSG_TYPE_17 17
+#define VMBUS_CHANMSG_TYPE_18 18
+#define VMBUS_CHANMSG_TYPE_19 19
+#define VMBUS_CHANMSG_TYPE_20 20
+#define VMBUS_CHANMSG_TYPE_TL_CONN 21 /* REQ */
+#define VMBUS_CHANMSG_TYPE_22 22
+#define VMBUS_CHANMSG_TYPE_TL_RESULT 23 /* RESP */
+#define VMBUS_CHANMSG_TYPE_MAX 24
struct vmbus_chanmsg_hdr {
uint32_t chm_type; /* VMBUS_CHANMSG_TYPE_ */
@@ -229,6 +283,15 @@
struct vmbus_chanmsg_hdr chm_hdr;
} __packed;
+/* VMBUS_CHANMSG_TYPE_TL_CONN */
+/* Hyper-V socket guest connect request */
+struct vmbus_chanmsg_tl_connect {
+ struct vmbus_chanmsg_hdr chm_hdr;
+ struct hyperv_guid guest_endpoint_id;
+ struct hyperv_guid host_service_id;
+} __packed;
+
+
/* VMBUS_CHANMSG_TYPE_CHOPEN */
struct vmbus_chanmsg_chopen {
struct vmbus_chanmsg_hdr chm_hdr;
@@ -310,6 +373,12 @@
uint32_t chm_chanid;
} __packed;
+/* Size of the user defined data buffer for non-pipe offers */
+#define VMBUS_CHANMSG_CHOFFER_UDATA_SIZE 120
+
+/* Size of the user defined data buffer for pipe offers. */
+#define VMBUS_CHANMSG_CHOFFER_UDATA_PIPE_SIZE 116
+
/* VMBUS_CHANMSG_TYPE_CHOFFER */
struct vmbus_chanmsg_choffer {
struct vmbus_chanmsg_hdr chm_hdr;
@@ -320,7 +389,26 @@
uint32_t chm_svrctx_sz;
uint16_t chm_chflags;
uint16_t chm_mmio_sz; /* unit: MB */
- uint8_t chm_udata[120];
+
+ union {
+ /* Non-pipes */
+ struct {
+ uint8_t user_def[VMBUS_CHANMSG_CHOFFER_UDATA_SIZE];
+ } std;
+ /*
+ * Pipes:
+ * For integrated pipe protocol, which is implemented on
+ * top of standard user-defined data. Pipe clients have
+ * VMBUS_CHANMSG_CHOFFER_UDATA_PIPE_SIZE bytes left for
+ * their own user.
+ */
+ struct {
+ uint32_t pipe_mode;
+ uint8_t
+ user_def[VMBUS_CHANMSG_CHOFFER_UDATA_PIPE_SIZE];
+ } pipe;
+ } chm_udata;
+
uint16_t chm_subidx;
uint16_t chm_rsvd;
uint32_t chm_chanid;
@@ -330,6 +418,9 @@
uint32_t chm_connid;
} __packed;
CTASSERT(sizeof(struct vmbus_chanmsg_choffer) <= VMBUS_MSG_DSIZE_MAX);
+
+/* Server Flag */
+#define VMBUS_CHAN_TLNPI_PROVIDER_OFFER 0x2000
#define VMBUS_CHOFFER_FLAG1_HASMNF 0x01
Index: head/sys/modules/hyperv/Makefile
===================================================================
--- head/sys/modules/hyperv/Makefile
+++ head/sys/modules/hyperv/Makefile
@@ -1,5 +1,5 @@
# $FreeBSD$
-SUBDIR = vmbus netvsc storvsc utilities
+SUBDIR = vmbus netvsc storvsc utilities hvsock
.include <bsd.subdir.mk>
Index: head/sys/modules/hyperv/hvsock/Makefile
===================================================================
--- head/sys/modules/hyperv/hvsock/Makefile
+++ head/sys/modules/hyperv/hvsock/Makefile
@@ -0,0 +1,13 @@
+# $FreeBSD$
+
+.PATH: ${SRCTOP}/sys/dev/hyperv/hvsock
+
+KMOD= hv_sock
+SRCS= hv_sock.c
+SRCS+= hv_sock.h
+
+CFLAGS+= -I${SRCTOP}/sys/dev/hyperv/include \
+ -I${SRCTOP}/sys/dev/hyperv/vmbus \
+ -I${SRCTOP}/sys/dev/hyperv/hvsock
+
+.include <bsd.kmod.mk>
Index: head/sys/sys/socket.h
===================================================================
--- head/sys/sys/socket.h
+++ head/sys/sys/socket.h
@@ -265,7 +265,8 @@
#define AF_IEEE80211 37 /* IEEE 802.11 protocol */
#define AF_INET_SDP 40 /* OFED Socket Direct Protocol ipv4 */
#define AF_INET6_SDP 42 /* OFED Socket Direct Protocol ipv6 */
-#define AF_MAX 42
+#define AF_HYPERV 43 /* HyperV sockets */
+#define AF_MAX 43
/*
* When allocating a new AF_ constant, please only allocate
* even numbered constants for FreeBSD until 134 as odd numbered AF_
@@ -273,7 +274,6 @@
*/
#define AF_VENDOR00 39
#define AF_VENDOR01 41
-#define AF_VENDOR02 43
#define AF_VENDOR03 45
#define AF_VENDOR04 47
#define AF_VENDOR05 49
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Feb 8, 3:39 AM (11 h, 20 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28463550
Default Alt Text
D24061.diff (77 KB)
Attached To
Mode
D24061: Hyper-V socket implementation for FreeBSD guest
Attached
Detach File
Event Timeline
Log In to Comment